Invalid JSON:
Unexpected token 'N', ..."/chosen": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 50, | |
| "global_step": 2699, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.003705934127020892, | |
| "grad_norm": 215.50672912597656, | |
| "learning_rate": 4.983327158206743e-07, | |
| "logits/chosen": -6.070415496826172, | |
| "logits/rejected": -6.099751949310303, | |
| "logps/chosen": -984.36767578125, | |
| "logps/rejected": -897.9577026367188, | |
| "loss": 0.6962, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.055581189692020416, | |
| "rewards/margins": 0.0006150867557153106, | |
| "rewards/rejected": 0.05496610328555107, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007411868254041784, | |
| "grad_norm": 203.74668884277344, | |
| "learning_rate": 4.964801778436458e-07, | |
| "logits/chosen": -6.290555477142334, | |
| "logits/rejected": -6.2849812507629395, | |
| "logps/chosen": -932.5144653320312, | |
| "logps/rejected": -823.7965698242188, | |
| "loss": 0.675, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.27587246894836426, | |
| "rewards/margins": 0.05210857465863228, | |
| "rewards/rejected": 0.22376389801502228, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.011117802381062676, | |
| "grad_norm": 201.33848571777344, | |
| "learning_rate": 4.946276398666173e-07, | |
| "logits/chosen": -6.240113735198975, | |
| "logits/rejected": -6.196782112121582, | |
| "logps/chosen": -981.3587646484375, | |
| "logps/rejected": -879.1512451171875, | |
| "loss": 0.67, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 0.4677122235298157, | |
| "rewards/margins": 0.07471133768558502, | |
| "rewards/rejected": 0.39300084114074707, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014823736508083569, | |
| "grad_norm": 201.69285583496094, | |
| "learning_rate": 4.927751018895887e-07, | |
| "logits/chosen": -6.257163047790527, | |
| "logits/rejected": -6.211418151855469, | |
| "logps/chosen": -998.1868286132812, | |
| "logps/rejected": -941.7491455078125, | |
| "loss": 0.6987, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": 0.5558674931526184, | |
| "rewards/margins": 0.02628883719444275, | |
| "rewards/rejected": 0.5295786261558533, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.01852967063510446, | |
| "grad_norm": 213.06088256835938, | |
| "learning_rate": 4.909225639125602e-07, | |
| "logits/chosen": -6.227558612823486, | |
| "logits/rejected": -6.338425636291504, | |
| "logps/chosen": -1029.257080078125, | |
| "logps/rejected": -952.8382568359375, | |
| "loss": 0.6643, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.59141606092453, | |
| "rewards/margins": 0.09994185715913773, | |
| "rewards/rejected": 0.4914742112159729, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.01852967063510446, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.348860263824463, | |
| "eval_logps/chosen": -1146.9443359375, | |
| "eval_logps/rejected": -1055.7943115234375, | |
| "eval_loss": 0.6688504815101624, | |
| "eval_rewards/accuracies": 0.5936188101768494, | |
| "eval_rewards/chosen": 0.7400967478752136, | |
| "eval_rewards/margins": 0.09535637497901917, | |
| "eval_rewards/rejected": 0.6447404623031616, | |
| "eval_runtime": 173.9141, | |
| "eval_samples_per_second": 6.848, | |
| "eval_steps_per_second": 6.848, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.02223560476212535, | |
| "grad_norm": 173.28237915039062, | |
| "learning_rate": 4.890700259355317e-07, | |
| "logits/chosen": -6.145205974578857, | |
| "logits/rejected": -6.158076763153076, | |
| "logps/chosen": -918.4729614257812, | |
| "logps/rejected": -808.6856079101562, | |
| "loss": 0.6631, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.6317356824874878, | |
| "rewards/margins": 0.10025952756404877, | |
| "rewards/rejected": 0.5314761400222778, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.025941538889146246, | |
| "grad_norm": 201.4814910888672, | |
| "learning_rate": 4.872174879585031e-07, | |
| "logits/chosen": -5.990462779998779, | |
| "logits/rejected": -6.091423988342285, | |
| "logps/chosen": -903.2649536132812, | |
| "logps/rejected": -869.3302001953125, | |
| "loss": 0.6871, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.7280756831169128, | |
| "rewards/margins": 0.06912466883659363, | |
| "rewards/rejected": 0.6589510440826416, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.029647473016167138, | |
| "grad_norm": 247.76434326171875, | |
| "learning_rate": 4.853649499814746e-07, | |
| "logits/chosen": -6.052975177764893, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -994.2741088867188, | |
| "logps/rejected": -887.9376220703125, | |
| "loss": 0.6728, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 0.8349438905715942, | |
| "rewards/margins": 0.10054773092269897, | |
| "rewards/rejected": 0.7343961000442505, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.03335340714318803, | |
| "grad_norm": 241.12693786621094, | |
| "learning_rate": 4.835124120044461e-07, | |
| "logits/chosen": -6.244847774505615, | |
| "logits/rejected": -6.195946216583252, | |
| "logps/chosen": -953.9434814453125, | |
| "logps/rejected": -816.5172119140625, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 0.7360488176345825, | |
| "rewards/margins": 0.10302430391311646, | |
| "rewards/rejected": 0.6330245733261108, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.03705934127020892, | |
| "grad_norm": 222.10406494140625, | |
| "learning_rate": 4.816598740274175e-07, | |
| "logits/chosen": -6.177041530609131, | |
| "logits/rejected": -6.0963640213012695, | |
| "logps/chosen": -1006.8380737304688, | |
| "logps/rejected": -819.0447998046875, | |
| "loss": 0.6398, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.8563984036445618, | |
| "rewards/margins": 0.1775047481060028, | |
| "rewards/rejected": 0.6788936853408813, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03705934127020892, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.329836368560791, | |
| "eval_logps/chosen": -1141.4117431640625, | |
| "eval_logps/rejected": -1050.9219970703125, | |
| "eval_loss": 0.6667112708091736, | |
| "eval_rewards/accuracies": 0.5869017839431763, | |
| "eval_rewards/chosen": 1.2933688163757324, | |
| "eval_rewards/margins": 0.16138586401939392, | |
| "eval_rewards/rejected": 1.1319829225540161, | |
| "eval_runtime": 174.0404, | |
| "eval_samples_per_second": 6.843, | |
| "eval_steps_per_second": 6.843, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.040765275397229815, | |
| "grad_norm": 176.17654418945312, | |
| "learning_rate": 4.79807336050389e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.241061210632324, | |
| "logps/chosen": -934.3814697265625, | |
| "logps/rejected": -809.5549926757812, | |
| "loss": 0.6532, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.0543756484985352, | |
| "rewards/margins": 0.2122875154018402, | |
| "rewards/rejected": 0.8420880436897278, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.0444712095242507, | |
| "grad_norm": 205.62350463867188, | |
| "learning_rate": 4.779547980733605e-07, | |
| "logits/chosen": -6.2480058670043945, | |
| "logits/rejected": -6.166928291320801, | |
| "logps/chosen": -996.0416259765625, | |
| "logps/rejected": -834.2034912109375, | |
| "loss": 0.6599, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 1.0299097299575806, | |
| "rewards/margins": 0.1862904280424118, | |
| "rewards/rejected": 0.8436192274093628, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.0481771436512716, | |
| "grad_norm": 191.88278198242188, | |
| "learning_rate": 4.7610226009633197e-07, | |
| "logits/chosen": -6.192694664001465, | |
| "logits/rejected": -6.168017387390137, | |
| "logps/chosen": -957.9847412109375, | |
| "logps/rejected": -854.0274658203125, | |
| "loss": 0.6462, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.9972726702690125, | |
| "rewards/margins": 0.20573964715003967, | |
| "rewards/rejected": 0.7915329933166504, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.05188307777829249, | |
| "grad_norm": 164.00282287597656, | |
| "learning_rate": 4.742497221193034e-07, | |
| "logits/chosen": -6.139467239379883, | |
| "logits/rejected": -6.123991966247559, | |
| "logps/chosen": -1019.96435546875, | |
| "logps/rejected": -879.1519775390625, | |
| "loss": 0.6461, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.0306396484375, | |
| "rewards/margins": 0.21070317924022675, | |
| "rewards/rejected": 0.8199363946914673, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.05558901190531338, | |
| "grad_norm": 192.38653564453125, | |
| "learning_rate": 4.7239718414227493e-07, | |
| "logits/chosen": -6.214751243591309, | |
| "logits/rejected": -6.185935020446777, | |
| "logps/chosen": -984.6964721679688, | |
| "logps/rejected": -895.7721557617188, | |
| "loss": 0.661, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 0.9771279096603394, | |
| "rewards/margins": 0.13651703298091888, | |
| "rewards/rejected": 0.8406108617782593, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.05558901190531338, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.335906982421875, | |
| "eval_logps/chosen": -1141.2706298828125, | |
| "eval_logps/rejected": -1050.9266357421875, | |
| "eval_loss": 0.6631777882575989, | |
| "eval_rewards/accuracies": 0.5961377024650574, | |
| "eval_rewards/chosen": 1.3074761629104614, | |
| "eval_rewards/margins": 0.17596358060836792, | |
| "eval_rewards/rejected": 1.1315125226974487, | |
| "eval_runtime": 174.258, | |
| "eval_samples_per_second": 6.835, | |
| "eval_steps_per_second": 6.835, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.059294946032334275, | |
| "grad_norm": 174.99664306640625, | |
| "learning_rate": 4.705446461652464e-07, | |
| "logits/chosen": -6.193015098571777, | |
| "logits/rejected": -6.1321492195129395, | |
| "logps/chosen": -1026.88037109375, | |
| "logps/rejected": -896.2667846679688, | |
| "loss": 0.6248, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.183638572692871, | |
| "rewards/margins": 0.23824377357959747, | |
| "rewards/rejected": 0.9453946352005005, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.06300088015935516, | |
| "grad_norm": 192.7080078125, | |
| "learning_rate": 4.6869210818821784e-07, | |
| "logits/chosen": -6.125895023345947, | |
| "logits/rejected": -6.105996131896973, | |
| "logps/chosen": -891.0772705078125, | |
| "logps/rejected": -783.1829223632812, | |
| "loss": 0.6062, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.0523326396942139, | |
| "rewards/margins": 0.29890042543411255, | |
| "rewards/rejected": 0.7534322142601013, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.06670681428637606, | |
| "grad_norm": 213.39122009277344, | |
| "learning_rate": 4.668395702111893e-07, | |
| "logits/chosen": -6.171431064605713, | |
| "logits/rejected": -6.141777992248535, | |
| "logps/chosen": -950.2371215820312, | |
| "logps/rejected": -837.3072509765625, | |
| "loss": 0.6254, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.1676208972930908, | |
| "rewards/margins": 0.2787976562976837, | |
| "rewards/rejected": 0.8888231515884399, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.07041274841339695, | |
| "grad_norm": 187.03749084472656, | |
| "learning_rate": 4.649870322341608e-07, | |
| "logits/chosen": -6.19333028793335, | |
| "logits/rejected": -6.163342475891113, | |
| "logps/chosen": -958.0759887695312, | |
| "logps/rejected": -845.267578125, | |
| "loss": 0.6568, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.1464588642120361, | |
| "rewards/margins": 0.23536348342895508, | |
| "rewards/rejected": 0.9110953211784363, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.07411868254041784, | |
| "grad_norm": 150.2332763671875, | |
| "learning_rate": 4.6313449425713225e-07, | |
| "logits/chosen": -6.098294734954834, | |
| "logits/rejected": -6.079904556274414, | |
| "logps/chosen": -920.4479370117188, | |
| "logps/rejected": -867.9637451171875, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1195321083068848, | |
| "rewards/margins": 0.24117548763751984, | |
| "rewards/rejected": 0.8783566355705261, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07411868254041784, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.339965343475342, | |
| "eval_logps/chosen": -1140.9739990234375, | |
| "eval_logps/rejected": -1050.8843994140625, | |
| "eval_loss": 0.6571991443634033, | |
| "eval_rewards/accuracies": 0.6154491901397705, | |
| "eval_rewards/chosen": 1.3371424674987793, | |
| "eval_rewards/margins": 0.20140083134174347, | |
| "eval_rewards/rejected": 1.1357417106628418, | |
| "eval_runtime": 174.4949, | |
| "eval_samples_per_second": 6.825, | |
| "eval_steps_per_second": 6.825, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.07782461666743874, | |
| "grad_norm": 189.8514862060547, | |
| "learning_rate": 4.6128195628010375e-07, | |
| "logits/chosen": -6.194195747375488, | |
| "logits/rejected": -6.146265983581543, | |
| "logps/chosen": -915.0256958007812, | |
| "logps/rejected": -845.5064697265625, | |
| "loss": 0.6606, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.961922287940979, | |
| "rewards/margins": 0.19818060100078583, | |
| "rewards/rejected": 0.763741672039032, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.08153055079445963, | |
| "grad_norm": 189.69139099121094, | |
| "learning_rate": 4.594294183030752e-07, | |
| "logits/chosen": -6.284877300262451, | |
| "logits/rejected": -6.263852119445801, | |
| "logps/chosen": -1026.146484375, | |
| "logps/rejected": -913.3385009765625, | |
| "loss": 0.6511, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.9875092506408691, | |
| "rewards/margins": 0.21295031905174255, | |
| "rewards/rejected": 0.7745589017868042, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.08523648492148052, | |
| "grad_norm": 264.7326354980469, | |
| "learning_rate": 4.575768803260467e-07, | |
| "logits/chosen": -6.017802715301514, | |
| "logits/rejected": -6.151331424713135, | |
| "logps/chosen": -846.173828125, | |
| "logps/rejected": -828.7672119140625, | |
| "loss": 0.6848, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.7509580254554749, | |
| "rewards/margins": 0.0957236960530281, | |
| "rewards/rejected": 0.6552343368530273, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.0889424190485014, | |
| "grad_norm": 224.5631561279297, | |
| "learning_rate": 4.557243423490181e-07, | |
| "logits/chosen": -6.283064365386963, | |
| "logits/rejected": -6.176108360290527, | |
| "logps/chosen": -940.2021484375, | |
| "logps/rejected": -812.8372802734375, | |
| "loss": 0.6557, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.8100060224533081, | |
| "rewards/margins": 0.16767463088035583, | |
| "rewards/rejected": 0.6423314213752747, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.09264835317552231, | |
| "grad_norm": 176.48463439941406, | |
| "learning_rate": 4.538718043719896e-07, | |
| "logits/chosen": -6.0912933349609375, | |
| "logits/rejected": -6.187921524047852, | |
| "logps/chosen": -1011.1219482421875, | |
| "logps/rejected": -848.7685546875, | |
| "loss": 0.6152, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.857901930809021, | |
| "rewards/margins": 0.25349634885787964, | |
| "rewards/rejected": 0.6044055819511414, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.09264835317552231, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.341000080108643, | |
| "eval_logps/chosen": -1144.0086669921875, | |
| "eval_logps/rejected": -1053.47119140625, | |
| "eval_loss": 0.6642729043960571, | |
| "eval_rewards/accuracies": 0.5801846981048584, | |
| "eval_rewards/chosen": 1.0336804389953613, | |
| "eval_rewards/margins": 0.1566334068775177, | |
| "eval_rewards/rejected": 0.8770471215248108, | |
| "eval_runtime": 174.2528, | |
| "eval_samples_per_second": 6.835, | |
| "eval_steps_per_second": 6.835, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0963542873025432, | |
| "grad_norm": 194.99929809570312, | |
| "learning_rate": 4.5201926639496107e-07, | |
| "logits/chosen": -6.046469688415527, | |
| "logits/rejected": -6.070072650909424, | |
| "logps/chosen": -825.3790283203125, | |
| "logps/rejected": -784.54052734375, | |
| "loss": 0.6457, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 0.7765441536903381, | |
| "rewards/margins": 0.17580363154411316, | |
| "rewards/rejected": 0.6007404923439026, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.10006022142956408, | |
| "grad_norm": 232.19290161132812, | |
| "learning_rate": 4.5016672841793257e-07, | |
| "logits/chosen": -6.138308525085449, | |
| "logits/rejected": -6.1265411376953125, | |
| "logps/chosen": -966.5224609375, | |
| "logps/rejected": -880.6701049804688, | |
| "loss": 0.6047, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.0221220254898071, | |
| "rewards/margins": 0.32319146394729614, | |
| "rewards/rejected": 0.6989305019378662, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.10376615555658499, | |
| "grad_norm": 199.44570922851562, | |
| "learning_rate": 4.48314190440904e-07, | |
| "logits/chosen": -6.254446983337402, | |
| "logits/rejected": -6.203383922576904, | |
| "logps/chosen": -974.5589599609375, | |
| "logps/rejected": -894.5767822265625, | |
| "loss": 0.6625, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 0.9916531443595886, | |
| "rewards/margins": 0.18319669365882874, | |
| "rewards/rejected": 0.8084564208984375, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.10747208968360587, | |
| "grad_norm": 195.33485412597656, | |
| "learning_rate": 4.4646165246387553e-07, | |
| "logits/chosen": -6.29571533203125, | |
| "logits/rejected": -6.212879180908203, | |
| "logps/chosen": -883.5402221679688, | |
| "logps/rejected": -783.243408203125, | |
| "loss": 0.6307, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.9049898386001587, | |
| "rewards/margins": 0.2685369849205017, | |
| "rewards/rejected": 0.6364529132843018, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.11117802381062676, | |
| "grad_norm": 214.13623046875, | |
| "learning_rate": 4.44609114486847e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.118219375610352, | |
| "logps/chosen": -953.02001953125, | |
| "logps/rejected": -822.9397583007812, | |
| "loss": 0.6433, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.9801859855651855, | |
| "rewards/margins": 0.20221543312072754, | |
| "rewards/rejected": 0.7779706120491028, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11117802381062676, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.331676483154297, | |
| "eval_logps/chosen": -1140.791748046875, | |
| "eval_logps/rejected": -1050.74365234375, | |
| "eval_loss": 0.6641189455986023, | |
| "eval_rewards/accuracies": 0.6078925132751465, | |
| "eval_rewards/chosen": 1.355363130569458, | |
| "eval_rewards/margins": 0.20555777847766876, | |
| "eval_rewards/rejected": 1.1498054265975952, | |
| "eval_runtime": 174.1181, | |
| "eval_samples_per_second": 6.84, | |
| "eval_steps_per_second": 6.84, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.11488395793764766, | |
| "grad_norm": 151.80670166015625, | |
| "learning_rate": 4.4275657650981843e-07, | |
| "logits/chosen": -6.154031276702881, | |
| "logits/rejected": -6.207782745361328, | |
| "logps/chosen": -944.8739013671875, | |
| "logps/rejected": -844.357421875, | |
| "loss": 0.6034, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 1.1336545944213867, | |
| "rewards/margins": 0.3580685555934906, | |
| "rewards/rejected": 0.7755860090255737, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.11858989206466855, | |
| "grad_norm": 199.51202392578125, | |
| "learning_rate": 4.409040385327899e-07, | |
| "logits/chosen": -6.148090839385986, | |
| "logits/rejected": -6.058573246002197, | |
| "logps/chosen": -870.2352294921875, | |
| "logps/rejected": -785.9710693359375, | |
| "loss": 0.6577, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.0765498876571655, | |
| "rewards/margins": 0.24500849843025208, | |
| "rewards/rejected": 0.8315415382385254, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.12229582619168944, | |
| "grad_norm": 178.98838806152344, | |
| "learning_rate": 4.390515005557614e-07, | |
| "logits/chosen": -6.161218166351318, | |
| "logits/rejected": -6.021878242492676, | |
| "logps/chosen": -900.61279296875, | |
| "logps/rejected": -716.9248046875, | |
| "loss": 0.5969, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.0982271432876587, | |
| "rewards/margins": 0.3538793623447418, | |
| "rewards/rejected": 0.7443478107452393, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.12600176031871033, | |
| "grad_norm": 193.34494018554688, | |
| "learning_rate": 4.3719896257873284e-07, | |
| "logits/chosen": -6.12771463394165, | |
| "logits/rejected": -6.165745735168457, | |
| "logps/chosen": -938.45068359375, | |
| "logps/rejected": -867.9703369140625, | |
| "loss": 0.673, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.1986232995986938, | |
| "rewards/margins": 0.20064759254455566, | |
| "rewards/rejected": 0.9979757070541382, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.12970769444573121, | |
| "grad_norm": 176.0118865966797, | |
| "learning_rate": 4.3534642460170435e-07, | |
| "logits/chosen": -6.143977165222168, | |
| "logits/rejected": -6.176082611083984, | |
| "logps/chosen": -921.1917114257812, | |
| "logps/rejected": -874.5079345703125, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.1385135650634766, | |
| "rewards/margins": 0.2807597219944, | |
| "rewards/rejected": 0.8577538728713989, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.12970769444573121, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.329718589782715, | |
| "eval_logps/chosen": -1140.4703369140625, | |
| "eval_logps/rejected": -1050.4912109375, | |
| "eval_loss": 0.6621597409248352, | |
| "eval_rewards/accuracies": 0.6011754870414734, | |
| "eval_rewards/chosen": 1.3875113725662231, | |
| "eval_rewards/margins": 0.21245607733726501, | |
| "eval_rewards/rejected": 1.1750552654266357, | |
| "eval_runtime": 173.5305, | |
| "eval_samples_per_second": 6.863, | |
| "eval_steps_per_second": 6.863, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.13341362857275213, | |
| "grad_norm": 191.9296875, | |
| "learning_rate": 4.334938866246758e-07, | |
| "logits/chosen": -6.161706447601318, | |
| "logits/rejected": -6.022977828979492, | |
| "logps/chosen": -955.3453979492188, | |
| "logps/rejected": -816.9100341796875, | |
| "loss": 0.6297, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.105089545249939, | |
| "rewards/margins": 0.2945402264595032, | |
| "rewards/rejected": 0.8105493783950806, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.13711956269977302, | |
| "grad_norm": 208.82858276367188, | |
| "learning_rate": 4.3164134864764725e-07, | |
| "logits/chosen": -6.0739240646362305, | |
| "logits/rejected": -6.2275519371032715, | |
| "logps/chosen": -819.2732543945312, | |
| "logps/rejected": -865.2078247070312, | |
| "loss": 0.7031, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.8856269717216492, | |
| "rewards/margins": 0.11217892169952393, | |
| "rewards/rejected": 0.77344810962677, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.1408254968267939, | |
| "grad_norm": 180.92535400390625, | |
| "learning_rate": 4.297888106706187e-07, | |
| "logits/chosen": -6.172797203063965, | |
| "logits/rejected": -6.124629497528076, | |
| "logps/chosen": -935.8492431640625, | |
| "logps/rejected": -818.2625732421875, | |
| "loss": 0.6061, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.0353577136993408, | |
| "rewards/margins": 0.3515828251838684, | |
| "rewards/rejected": 0.683775007724762, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.1445314309538148, | |
| "grad_norm": 214.0187225341797, | |
| "learning_rate": 4.2793627269359016e-07, | |
| "logits/chosen": -6.215329170227051, | |
| "logits/rejected": -6.21909236907959, | |
| "logps/chosen": -1081.975341796875, | |
| "logps/rejected": -913.9468994140625, | |
| "loss": 0.5974, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.190735101699829, | |
| "rewards/margins": 0.33388403058052063, | |
| "rewards/rejected": 0.8568509817123413, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.14823736508083568, | |
| "grad_norm": 176.70887756347656, | |
| "learning_rate": 4.2608373471656166e-07, | |
| "logits/chosen": -6.156098365783691, | |
| "logits/rejected": -6.209042072296143, | |
| "logps/chosen": -976.1398315429688, | |
| "logps/rejected": -873.5931396484375, | |
| "loss": 0.6532, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 1.1283199787139893, | |
| "rewards/margins": 0.22726468741893768, | |
| "rewards/rejected": 0.9010552167892456, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.14823736508083568, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.327154636383057, | |
| "eval_logps/chosen": -1140.1060791015625, | |
| "eval_logps/rejected": -1050.1492919921875, | |
| "eval_loss": 0.6659889817237854, | |
| "eval_rewards/accuracies": 0.6028547286987305, | |
| "eval_rewards/chosen": 1.4239270687103271, | |
| "eval_rewards/margins": 0.2146778702735901, | |
| "eval_rewards/rejected": 1.2092490196228027, | |
| "eval_runtime": 173.6869, | |
| "eval_samples_per_second": 6.857, | |
| "eval_steps_per_second": 6.857, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.15194329920785657, | |
| "grad_norm": 169.63328552246094, | |
| "learning_rate": 4.242311967395331e-07, | |
| "logits/chosen": -6.142382621765137, | |
| "logits/rejected": -6.167304992675781, | |
| "logps/chosen": -947.6598510742188, | |
| "logps/rejected": -787.0640869140625, | |
| "loss": 0.6019, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.2996371984481812, | |
| "rewards/margins": 0.3752737045288086, | |
| "rewards/rejected": 0.9243636131286621, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.15564923333487748, | |
| "grad_norm": 270.06866455078125, | |
| "learning_rate": 4.223786587625046e-07, | |
| "logits/chosen": -6.093822002410889, | |
| "logits/rejected": -6.110901832580566, | |
| "logps/chosen": -937.8591918945312, | |
| "logps/rejected": -855.3360595703125, | |
| "loss": 0.6348, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.346130132675171, | |
| "rewards/margins": 0.3189659118652344, | |
| "rewards/rejected": 1.0271642208099365, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.15935516746189837, | |
| "grad_norm": 124.60543823242188, | |
| "learning_rate": 4.2052612078547607e-07, | |
| "logits/chosen": -6.153736591339111, | |
| "logits/rejected": -6.0936784744262695, | |
| "logps/chosen": -891.9788208007812, | |
| "logps/rejected": -771.3781127929688, | |
| "loss": 0.6269, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.1474323272705078, | |
| "rewards/margins": 0.3255314826965332, | |
| "rewards/rejected": 0.8219007253646851, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.16306110158891926, | |
| "grad_norm": 169.39234924316406, | |
| "learning_rate": 4.186735828084476e-07, | |
| "logits/chosen": -6.129828453063965, | |
| "logits/rejected": -6.149449348449707, | |
| "logps/chosen": -891.6807861328125, | |
| "logps/rejected": -785.4395751953125, | |
| "loss": 0.6103, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.2507822513580322, | |
| "rewards/margins": 0.34615927934646606, | |
| "rewards/rejected": 0.9046230316162109, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.16676703571594015, | |
| "grad_norm": 182.51864624023438, | |
| "learning_rate": 4.16821044831419e-07, | |
| "logits/chosen": -6.106880187988281, | |
| "logits/rejected": -6.003333568572998, | |
| "logps/chosen": -994.0611572265625, | |
| "logps/rejected": -866.1448974609375, | |
| "loss": 0.6798, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 1.1819612979888916, | |
| "rewards/margins": 0.22734245657920837, | |
| "rewards/rejected": 0.9546189308166504, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.16676703571594015, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.315321445465088, | |
| "eval_logps/chosen": -1139.016845703125, | |
| "eval_logps/rejected": -1049.19189453125, | |
| "eval_loss": 0.6655510067939758, | |
| "eval_rewards/accuracies": 0.6053736209869385, | |
| "eval_rewards/chosen": 1.5328552722930908, | |
| "eval_rewards/margins": 0.22787250578403473, | |
| "eval_rewards/rejected": 1.3049829006195068, | |
| "eval_runtime": 174.0654, | |
| "eval_samples_per_second": 6.842, | |
| "eval_steps_per_second": 6.842, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.17047296984296104, | |
| "grad_norm": 175.39608764648438, | |
| "learning_rate": 4.149685068543905e-07, | |
| "logits/chosen": -6.059579372406006, | |
| "logits/rejected": -6.104693412780762, | |
| "logps/chosen": -972.4683837890625, | |
| "logps/rejected": -834.6124877929688, | |
| "loss": 0.6116, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 1.3684813976287842, | |
| "rewards/margins": 0.38127750158309937, | |
| "rewards/rejected": 0.9872040748596191, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.17417890396998192, | |
| "grad_norm": 230.60934448242188, | |
| "learning_rate": 4.1311596887736194e-07, | |
| "logits/chosen": -6.06960916519165, | |
| "logits/rejected": -6.0307111740112305, | |
| "logps/chosen": -868.5339965820312, | |
| "logps/rejected": -845.7874755859375, | |
| "loss": 0.6496, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.1452016830444336, | |
| "rewards/margins": 0.25945180654525757, | |
| "rewards/rejected": 0.8857498168945312, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.1778848380970028, | |
| "grad_norm": 196.1241912841797, | |
| "learning_rate": 4.1126343090033344e-07, | |
| "logits/chosen": -6.163808822631836, | |
| "logits/rejected": -6.103111267089844, | |
| "logps/chosen": -993.4736328125, | |
| "logps/rejected": -810.2939453125, | |
| "loss": 0.5573, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 1.1358219385147095, | |
| "rewards/margins": 0.4707656502723694, | |
| "rewards/rejected": 0.6650562286376953, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.18159077222402373, | |
| "grad_norm": 225.72552490234375, | |
| "learning_rate": 4.094108929233049e-07, | |
| "logits/chosen": -6.20804500579834, | |
| "logits/rejected": -6.198565483093262, | |
| "logps/chosen": -967.2512817382812, | |
| "logps/rejected": -881.41552734375, | |
| "loss": 0.6359, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.2158067226409912, | |
| "rewards/margins": 0.3084403872489929, | |
| "rewards/rejected": 0.9073662757873535, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.18529670635104462, | |
| "grad_norm": 212.1605224609375, | |
| "learning_rate": 4.075583549462764e-07, | |
| "logits/chosen": -6.132593631744385, | |
| "logits/rejected": -6.051444053649902, | |
| "logps/chosen": -943.2779541015625, | |
| "logps/rejected": -822.4968872070312, | |
| "loss": 0.6209, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.2602580785751343, | |
| "rewards/margins": 0.3520536720752716, | |
| "rewards/rejected": 0.9082044363021851, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.18529670635104462, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.321292400360107, | |
| "eval_logps/chosen": -1139.3175048828125, | |
| "eval_logps/rejected": -1049.5101318359375, | |
| "eval_loss": 0.6620848774909973, | |
| "eval_rewards/accuracies": 0.6011754870414734, | |
| "eval_rewards/chosen": 1.5027841329574585, | |
| "eval_rewards/margins": 0.2296140044927597, | |
| "eval_rewards/rejected": 1.2731702327728271, | |
| "eval_runtime": 174.6102, | |
| "eval_samples_per_second": 6.821, | |
| "eval_steps_per_second": 6.821, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.1890026404780655, | |
| "grad_norm": 153.60992431640625, | |
| "learning_rate": 4.0570581696924785e-07, | |
| "logits/chosen": -5.990109443664551, | |
| "logits/rejected": -5.986026763916016, | |
| "logps/chosen": -898.7940673828125, | |
| "logps/rejected": -799.6437377929688, | |
| "loss": 0.6165, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.1576520204544067, | |
| "rewards/margins": 0.3633851110935211, | |
| "rewards/rejected": 0.794266939163208, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1927085746050864, | |
| "grad_norm": 172.2601776123047, | |
| "learning_rate": 4.038532789922193e-07, | |
| "logits/chosen": -6.226175308227539, | |
| "logits/rejected": -6.190736293792725, | |
| "logps/chosen": -883.1309814453125, | |
| "logps/rejected": -794.0902099609375, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.1897079944610596, | |
| "rewards/margins": 0.35320332646369934, | |
| "rewards/rejected": 0.8365045785903931, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.19641450873210728, | |
| "grad_norm": 192.50827026367188, | |
| "learning_rate": 4.0200074101519076e-07, | |
| "logits/chosen": -6.055853843688965, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -956.99267578125, | |
| "logps/rejected": -887.1090087890625, | |
| "loss": 0.6176, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.1858432292938232, | |
| "rewards/margins": 0.29170137643814087, | |
| "rewards/rejected": 0.8941418528556824, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.20012044285912817, | |
| "grad_norm": 207.009521484375, | |
| "learning_rate": 4.0014820303816226e-07, | |
| "logits/chosen": -6.144883155822754, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -929.91943359375, | |
| "logps/rejected": -834.5404052734375, | |
| "loss": 0.6305, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 0.9971332550048828, | |
| "rewards/margins": 0.3494023382663727, | |
| "rewards/rejected": 0.6477310061454773, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.20382637698614908, | |
| "grad_norm": 224.73924255371094, | |
| "learning_rate": 3.982956650611337e-07, | |
| "logits/chosen": -6.141475677490234, | |
| "logits/rejected": -6.262620449066162, | |
| "logps/chosen": -966.7326049804688, | |
| "logps/rejected": -890.4728393554688, | |
| "loss": 0.6286, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.9653292894363403, | |
| "rewards/margins": 0.29025983810424805, | |
| "rewards/rejected": 0.6750694513320923, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.20382637698614908, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.334640979766846, | |
| "eval_logps/chosen": -1142.1336669921875, | |
| "eval_logps/rejected": -1052.1123046875, | |
| "eval_loss": 0.660417377948761, | |
| "eval_rewards/accuracies": 0.6179680824279785, | |
| "eval_rewards/chosen": 1.2211687564849854, | |
| "eval_rewards/margins": 0.2082298845052719, | |
| "eval_rewards/rejected": 1.0129389762878418, | |
| "eval_runtime": 174.5819, | |
| "eval_samples_per_second": 6.822, | |
| "eval_steps_per_second": 6.822, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.20753231111316997, | |
| "grad_norm": 318.4903869628906, | |
| "learning_rate": 3.964431270841052e-07, | |
| "logits/chosen": -6.1527791023254395, | |
| "logits/rejected": -6.1851677894592285, | |
| "logps/chosen": -932.6305541992188, | |
| "logps/rejected": -875.3505859375, | |
| "loss": 0.7095, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": 1.0057893991470337, | |
| "rewards/margins": 0.13861322402954102, | |
| "rewards/rejected": 0.8671760559082031, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.21123824524019086, | |
| "grad_norm": 176.8316192626953, | |
| "learning_rate": 3.9459058910707667e-07, | |
| "logits/chosen": -6.179142951965332, | |
| "logits/rejected": -6.174668788909912, | |
| "logps/chosen": -1010.54052734375, | |
| "logps/rejected": -875.7761840820312, | |
| "loss": 0.607, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.096440076828003, | |
| "rewards/margins": 0.36430811882019043, | |
| "rewards/rejected": 0.7321318984031677, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.21494417936721175, | |
| "grad_norm": 215.4343719482422, | |
| "learning_rate": 3.927380511300482e-07, | |
| "logits/chosen": -6.139018535614014, | |
| "logits/rejected": -6.070583343505859, | |
| "logps/chosen": -961.7306518554688, | |
| "logps/rejected": -831.40380859375, | |
| "loss": 0.6366, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.0454758405685425, | |
| "rewards/margins": 0.31583863496780396, | |
| "rewards/rejected": 0.7296372652053833, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.21865011349423263, | |
| "grad_norm": 202.57000732421875, | |
| "learning_rate": 3.908855131530196e-07, | |
| "logits/chosen": -6.21251106262207, | |
| "logits/rejected": -6.157750606536865, | |
| "logps/chosen": -1023.9791259765625, | |
| "logps/rejected": -936.6031494140625, | |
| "loss": 0.6376, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.146539568901062, | |
| "rewards/margins": 0.26862841844558716, | |
| "rewards/rejected": 0.8779112100601196, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.22235604762125352, | |
| "grad_norm": 182.68316650390625, | |
| "learning_rate": 3.890329751759911e-07, | |
| "logits/chosen": -6.045320987701416, | |
| "logits/rejected": -6.124794006347656, | |
| "logps/chosen": -956.4278564453125, | |
| "logps/rejected": -807.8424682617188, | |
| "loss": 0.6274, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1107687950134277, | |
| "rewards/margins": 0.26531320810317993, | |
| "rewards/rejected": 0.845455527305603, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22235604762125352, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.337012767791748, | |
| "eval_logps/chosen": -1139.7646484375, | |
| "eval_logps/rejected": -1050.012451171875, | |
| "eval_loss": 0.6612704396247864, | |
| "eval_rewards/accuracies": 0.6179680824279785, | |
| "eval_rewards/chosen": 1.458066701889038, | |
| "eval_rewards/margins": 0.23514851927757263, | |
| "eval_rewards/rejected": 1.2229182720184326, | |
| "eval_runtime": 174.599, | |
| "eval_samples_per_second": 6.821, | |
| "eval_steps_per_second": 6.821, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.22606198174827444, | |
| "grad_norm": 190.41612243652344, | |
| "learning_rate": 3.8718043719896253e-07, | |
| "logits/chosen": -6.1819024085998535, | |
| "logits/rejected": -6.2130818367004395, | |
| "logps/chosen": -982.65869140625, | |
| "logps/rejected": -875.1731567382812, | |
| "loss": 0.6125, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.2317478656768799, | |
| "rewards/margins": 0.363824725151062, | |
| "rewards/rejected": 0.8679230809211731, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.22976791587529533, | |
| "grad_norm": 147.1990966796875, | |
| "learning_rate": 3.8532789922193404e-07, | |
| "logits/chosen": -6.232724666595459, | |
| "logits/rejected": -6.308589458465576, | |
| "logps/chosen": -914.0718994140625, | |
| "logps/rejected": -837.0066528320312, | |
| "loss": 0.6067, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.2401645183563232, | |
| "rewards/margins": 0.37453925609588623, | |
| "rewards/rejected": 0.8656252026557922, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.2334738500023162, | |
| "grad_norm": 200.28050231933594, | |
| "learning_rate": 3.834753612449055e-07, | |
| "logits/chosen": -6.240880012512207, | |
| "logits/rejected": -6.253479957580566, | |
| "logps/chosen": -951.05712890625, | |
| "logps/rejected": -853.3023681640625, | |
| "loss": 0.6128, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.3768174648284912, | |
| "rewards/margins": 0.3513033986091614, | |
| "rewards/rejected": 1.0255142450332642, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.2371797841293371, | |
| "grad_norm": 223.86203002929688, | |
| "learning_rate": 3.81622823267877e-07, | |
| "logits/chosen": -6.115043640136719, | |
| "logits/rejected": -6.189513206481934, | |
| "logps/chosen": -951.8463745117188, | |
| "logps/rejected": -889.0245971679688, | |
| "loss": 0.706, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 1.218057632446289, | |
| "rewards/margins": 0.1631278544664383, | |
| "rewards/rejected": 1.0549296140670776, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.240885718256358, | |
| "grad_norm": 211.12872314453125, | |
| "learning_rate": 3.7977028529084845e-07, | |
| "logits/chosen": -6.2889084815979, | |
| "logits/rejected": -6.2076416015625, | |
| "logps/chosen": -1071.9197998046875, | |
| "logps/rejected": -939.8733520507812, | |
| "loss": 0.6382, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.2747665643692017, | |
| "rewards/margins": 0.3331315815448761, | |
| "rewards/rejected": 0.941635012626648, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.240885718256358, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.333639621734619, | |
| "eval_logps/chosen": -1140.6971435546875, | |
| "eval_logps/rejected": -1050.910888671875, | |
| "eval_loss": 0.6616818904876709, | |
| "eval_rewards/accuracies": 0.6204869747161865, | |
| "eval_rewards/chosen": 1.36481773853302, | |
| "eval_rewards/margins": 0.23173516988754272, | |
| "eval_rewards/rejected": 1.133082628250122, | |
| "eval_runtime": 174.4739, | |
| "eval_samples_per_second": 6.826, | |
| "eval_steps_per_second": 6.826, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.24459165238337888, | |
| "grad_norm": 157.5903778076172, | |
| "learning_rate": 3.779177473138199e-07, | |
| "logits/chosen": -6.22428560256958, | |
| "logits/rejected": -6.160924434661865, | |
| "logps/chosen": -881.8909301757812, | |
| "logps/rejected": -848.2203979492188, | |
| "loss": 0.6146, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.0156034231185913, | |
| "rewards/margins": 0.35982877016067505, | |
| "rewards/rejected": 0.6557747721672058, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.24829758651039976, | |
| "grad_norm": 210.9625244140625, | |
| "learning_rate": 3.7606520933679135e-07, | |
| "logits/chosen": -6.125003337860107, | |
| "logits/rejected": -6.0754289627075195, | |
| "logps/chosen": -884.8665771484375, | |
| "logps/rejected": -789.2760620117188, | |
| "loss": 0.651, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.0800001621246338, | |
| "rewards/margins": 0.25831884145736694, | |
| "rewards/rejected": 0.8216812014579773, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.25200352063742065, | |
| "grad_norm": 172.888916015625, | |
| "learning_rate": 3.7421267135976286e-07, | |
| "logits/chosen": -6.232366561889648, | |
| "logits/rejected": -6.1383185386657715, | |
| "logps/chosen": -960.3611450195312, | |
| "logps/rejected": -835.4730224609375, | |
| "loss": 0.6177, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.2130458354949951, | |
| "rewards/margins": 0.35953769087791443, | |
| "rewards/rejected": 0.8535081744194031, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.25570945476444157, | |
| "grad_norm": 186.8795623779297, | |
| "learning_rate": 3.723601333827343e-07, | |
| "logits/chosen": -6.141107082366943, | |
| "logits/rejected": -6.195657253265381, | |
| "logps/chosen": -938.1901245117188, | |
| "logps/rejected": -814.3914184570312, | |
| "loss": 0.6728, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 0.9760942459106445, | |
| "rewards/margins": 0.2641645669937134, | |
| "rewards/rejected": 0.7119296789169312, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.25941538889146243, | |
| "grad_norm": 152.13043212890625, | |
| "learning_rate": 3.705075954057058e-07, | |
| "logits/chosen": -6.24930477142334, | |
| "logits/rejected": -6.1736040115356445, | |
| "logps/chosen": -936.4893798828125, | |
| "logps/rejected": -848.609375, | |
| "loss": 0.5967, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.0887714624404907, | |
| "rewards/margins": 0.3751378059387207, | |
| "rewards/rejected": 0.71363365650177, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.25941538889146243, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.339428901672363, | |
| "eval_logps/chosen": -1141.5142822265625, | |
| "eval_logps/rejected": -1051.634521484375, | |
| "eval_loss": 0.6639354825019836, | |
| "eval_rewards/accuracies": 0.6154491901397705, | |
| "eval_rewards/chosen": 1.2831051349639893, | |
| "eval_rewards/margins": 0.22238638997077942, | |
| "eval_rewards/rejected": 1.0607186555862427, | |
| "eval_runtime": 174.5031, | |
| "eval_samples_per_second": 6.825, | |
| "eval_steps_per_second": 6.825, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.26312132301848334, | |
| "grad_norm": 189.0890655517578, | |
| "learning_rate": 3.6865505742867727e-07, | |
| "logits/chosen": -6.229816436767578, | |
| "logits/rejected": -6.161192893981934, | |
| "logps/chosen": -850.07568359375, | |
| "logps/rejected": -789.8104248046875, | |
| "loss": 0.6791, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.9719418287277222, | |
| "rewards/margins": 0.19261571764945984, | |
| "rewards/rejected": 0.7793260812759399, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.26682725714550426, | |
| "grad_norm": 243.8544921875, | |
| "learning_rate": 3.668025194516488e-07, | |
| "logits/chosen": -6.192295551300049, | |
| "logits/rejected": -6.1518754959106445, | |
| "logps/chosen": -950.3997802734375, | |
| "logps/rejected": -804.35009765625, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1841661930084229, | |
| "rewards/margins": 0.35910895466804504, | |
| "rewards/rejected": 0.8250571489334106, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2705331912725251, | |
| "grad_norm": 194.04550170898438, | |
| "learning_rate": 3.649499814746202e-07, | |
| "logits/chosen": -6.118433952331543, | |
| "logits/rejected": -6.096287727355957, | |
| "logps/chosen": -998.7361450195312, | |
| "logps/rejected": -866.0007934570312, | |
| "loss": 0.5717, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 1.341344952583313, | |
| "rewards/margins": 0.4694565236568451, | |
| "rewards/rejected": 0.8718884587287903, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.27423912539954604, | |
| "grad_norm": 199.3968963623047, | |
| "learning_rate": 3.630974434975917e-07, | |
| "logits/chosen": -6.136265754699707, | |
| "logits/rejected": -6.221334457397461, | |
| "logps/chosen": -959.4265747070312, | |
| "logps/rejected": -911.4915771484375, | |
| "loss": 0.6336, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": 1.3660612106323242, | |
| "rewards/margins": 0.3232826888561249, | |
| "rewards/rejected": 1.0427783727645874, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2779450595265669, | |
| "grad_norm": 184.0609130859375, | |
| "learning_rate": 3.6124490552056313e-07, | |
| "logits/chosen": -6.145341873168945, | |
| "logits/rejected": -6.150424480438232, | |
| "logps/chosen": -933.7684326171875, | |
| "logps/rejected": -828.2578125, | |
| "loss": 0.6562, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.300537109375, | |
| "rewards/margins": 0.30143502354621887, | |
| "rewards/rejected": 0.999101996421814, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2779450595265669, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.332278251647949, | |
| "eval_logps/chosen": -1140.3084716796875, | |
| "eval_logps/rejected": -1050.5599365234375, | |
| "eval_loss": 0.6633999347686768, | |
| "eval_rewards/accuracies": 0.6171284914016724, | |
| "eval_rewards/chosen": 1.4036915302276611, | |
| "eval_rewards/margins": 0.23550742864608765, | |
| "eval_rewards/rejected": 1.1681841611862183, | |
| "eval_runtime": 174.2841, | |
| "eval_samples_per_second": 6.834, | |
| "eval_steps_per_second": 6.834, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.2816509936535878, | |
| "grad_norm": 187.0355224609375, | |
| "learning_rate": 3.5939236754353464e-07, | |
| "logits/chosen": -6.186856746673584, | |
| "logits/rejected": -6.131129264831543, | |
| "logps/chosen": -941.7112426757812, | |
| "logps/rejected": -817.9475708007812, | |
| "loss": 0.5649, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 1.3290807008743286, | |
| "rewards/margins": 0.5047949552536011, | |
| "rewards/rejected": 0.8242858052253723, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.2853569277806087, | |
| "grad_norm": 211.35397338867188, | |
| "learning_rate": 3.575398295665061e-07, | |
| "logits/chosen": -6.122169494628906, | |
| "logits/rejected": -6.1151204109191895, | |
| "logps/chosen": -887.787109375, | |
| "logps/rejected": -862.0372314453125, | |
| "loss": 0.6854, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 1.1365787982940674, | |
| "rewards/margins": 0.2167353630065918, | |
| "rewards/rejected": 0.9198434948921204, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2890628619076296, | |
| "grad_norm": 198.17198181152344, | |
| "learning_rate": 3.556872915894776e-07, | |
| "logits/chosen": -6.241828918457031, | |
| "logits/rejected": -6.265792369842529, | |
| "logps/chosen": -929.9652099609375, | |
| "logps/rejected": -877.3856201171875, | |
| "loss": 0.614, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.117902398109436, | |
| "rewards/margins": 0.3560691475868225, | |
| "rewards/rejected": 0.7618332505226135, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.2927687960346505, | |
| "grad_norm": 150.22120666503906, | |
| "learning_rate": 3.5383475361244905e-07, | |
| "logits/chosen": -6.19686222076416, | |
| "logits/rejected": -6.209750175476074, | |
| "logps/chosen": -1054.142578125, | |
| "logps/rejected": -874.3284912109375, | |
| "loss": 0.5739, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.2843711376190186, | |
| "rewards/margins": 0.4667224884033203, | |
| "rewards/rejected": 0.817648708820343, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.29647473016167136, | |
| "grad_norm": 243.4776611328125, | |
| "learning_rate": 3.519822156354205e-07, | |
| "logits/chosen": -6.137392520904541, | |
| "logits/rejected": -6.096640586853027, | |
| "logps/chosen": -926.0994262695312, | |
| "logps/rejected": -875.486328125, | |
| "loss": 0.6676, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1599671840667725, | |
| "rewards/margins": 0.2768460214138031, | |
| "rewards/rejected": 0.883121132850647, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.29647473016167136, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.3336334228515625, | |
| "eval_logps/chosen": -1140.1082763671875, | |
| "eval_logps/rejected": -1050.391845703125, | |
| "eval_loss": 0.6643325090408325, | |
| "eval_rewards/accuracies": 0.6272040009498596, | |
| "eval_rewards/chosen": 1.4237107038497925, | |
| "eval_rewards/margins": 0.23872110247612, | |
| "eval_rewards/rejected": 1.1849894523620605, | |
| "eval_runtime": 174.4194, | |
| "eval_samples_per_second": 6.828, | |
| "eval_steps_per_second": 6.828, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.3001806642886923, | |
| "grad_norm": 199.17247009277344, | |
| "learning_rate": 3.5012967765839195e-07, | |
| "logits/chosen": -6.241001129150391, | |
| "logits/rejected": -6.098967552185059, | |
| "logps/chosen": -920.8816528320312, | |
| "logps/rejected": -898.8351440429688, | |
| "loss": 0.7121, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 1.0589300394058228, | |
| "rewards/margins": 0.1631755530834198, | |
| "rewards/rejected": 0.8957546353340149, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.30388659841571314, | |
| "grad_norm": 250.08518981933594, | |
| "learning_rate": 3.4827713968136346e-07, | |
| "logits/chosen": -6.267752647399902, | |
| "logits/rejected": -6.312867164611816, | |
| "logps/chosen": -1013.2420654296875, | |
| "logps/rejected": -958.6898193359375, | |
| "loss": 0.6355, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.1633565425872803, | |
| "rewards/margins": 0.3349772095680237, | |
| "rewards/rejected": 0.8283793330192566, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.30759253254273405, | |
| "grad_norm": 208.069580078125, | |
| "learning_rate": 3.464246017043349e-07, | |
| "logits/chosen": -6.2897796630859375, | |
| "logits/rejected": -6.228161811828613, | |
| "logps/chosen": -933.79541015625, | |
| "logps/rejected": -842.0236206054688, | |
| "loss": 0.64, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.0309927463531494, | |
| "rewards/margins": 0.28416553139686584, | |
| "rewards/rejected": 0.7468270659446716, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.31129846666975497, | |
| "grad_norm": 232.0865478515625, | |
| "learning_rate": 3.445720637273064e-07, | |
| "logits/chosen": -6.142411708831787, | |
| "logits/rejected": -6.164281845092773, | |
| "logps/chosen": -1016.1708984375, | |
| "logps/rejected": -898.3994140625, | |
| "loss": 0.6169, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.0414842367172241, | |
| "rewards/margins": 0.35715141892433167, | |
| "rewards/rejected": 0.6843328475952148, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.31500440079677583, | |
| "grad_norm": 193.65232849121094, | |
| "learning_rate": 3.4271952575027787e-07, | |
| "logits/chosen": -6.228142738342285, | |
| "logits/rejected": -6.197975158691406, | |
| "logps/chosen": -931.4905395507812, | |
| "logps/rejected": -856.0863037109375, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 1.047786831855774, | |
| "rewards/margins": 0.20453695952892303, | |
| "rewards/rejected": 0.8432496786117554, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.31500440079677583, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.3389668464660645, | |
| "eval_logps/chosen": -1141.093505859375, | |
| "eval_logps/rejected": -1051.356201171875, | |
| "eval_loss": 0.6609283089637756, | |
| "eval_rewards/accuracies": 0.6120907068252563, | |
| "eval_rewards/chosen": 1.3251850605010986, | |
| "eval_rewards/margins": 0.23663325607776642, | |
| "eval_rewards/rejected": 1.0885517597198486, | |
| "eval_runtime": 174.4195, | |
| "eval_samples_per_second": 6.828, | |
| "eval_steps_per_second": 6.828, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.31871033492379675, | |
| "grad_norm": 191.5203094482422, | |
| "learning_rate": 3.4086698777324937e-07, | |
| "logits/chosen": -6.188792705535889, | |
| "logits/rejected": -6.236809730529785, | |
| "logps/chosen": -977.2009887695312, | |
| "logps/rejected": -865.64306640625, | |
| "loss": 0.5709, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": 1.2319726943969727, | |
| "rewards/margins": 0.48221221566200256, | |
| "rewards/rejected": 0.7497605085372925, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.3224162690508176, | |
| "grad_norm": 179.01022338867188, | |
| "learning_rate": 3.3901444979622077e-07, | |
| "logits/chosen": -6.048168659210205, | |
| "logits/rejected": -6.131080627441406, | |
| "logps/chosen": -959.7575073242188, | |
| "logps/rejected": -822.2799072265625, | |
| "loss": 0.6798, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": 1.2015607357025146, | |
| "rewards/margins": 0.25019967555999756, | |
| "rewards/rejected": 0.9513611793518066, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.3261222031778385, | |
| "grad_norm": 164.24993896484375, | |
| "learning_rate": 3.371619118191923e-07, | |
| "logits/chosen": -6.164304256439209, | |
| "logits/rejected": -6.14687442779541, | |
| "logps/chosen": -984.1392822265625, | |
| "logps/rejected": -887.4852294921875, | |
| "loss": 0.5921, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.326623558998108, | |
| "rewards/margins": 0.4440253674983978, | |
| "rewards/rejected": 0.8825982213020325, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.3298281373048594, | |
| "grad_norm": 208.62112426757812, | |
| "learning_rate": 3.3530937384216373e-07, | |
| "logits/chosen": -6.055702209472656, | |
| "logits/rejected": -6.137775421142578, | |
| "logps/chosen": -966.544921875, | |
| "logps/rejected": -852.4841918945312, | |
| "loss": 0.6072, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.298642873764038, | |
| "rewards/margins": 0.36649665236473083, | |
| "rewards/rejected": 0.932146430015564, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.3335340714318803, | |
| "grad_norm": 198.2984619140625, | |
| "learning_rate": 3.3345683586513524e-07, | |
| "logits/chosen": -6.077668190002441, | |
| "logits/rejected": -6.017401218414307, | |
| "logps/chosen": -968.5540771484375, | |
| "logps/rejected": -825.6281127929688, | |
| "loss": 0.5936, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 1.3333479166030884, | |
| "rewards/margins": 0.48964110016822815, | |
| "rewards/rejected": 0.8437067866325378, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3335340714318803, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.333832263946533, | |
| "eval_logps/chosen": -1138.702392578125, | |
| "eval_logps/rejected": -1049.201904296875, | |
| "eval_loss": 0.6653527021408081, | |
| "eval_rewards/accuracies": 0.6322417855262756, | |
| "eval_rewards/chosen": 1.564302682876587, | |
| "eval_rewards/margins": 0.26032954454421997, | |
| "eval_rewards/rejected": 1.3039733171463013, | |
| "eval_runtime": 174.3951, | |
| "eval_samples_per_second": 6.829, | |
| "eval_steps_per_second": 6.829, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.3372400055589012, | |
| "grad_norm": 172.08016967773438, | |
| "learning_rate": 3.316042978881067e-07, | |
| "logits/chosen": -6.152904033660889, | |
| "logits/rejected": -6.063776969909668, | |
| "logps/chosen": -934.8038940429688, | |
| "logps/rejected": -874.7457885742188, | |
| "loss": 0.6239, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.3152906894683838, | |
| "rewards/margins": 0.36238130927085876, | |
| "rewards/rejected": 0.9529093503952026, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.3409459396859221, | |
| "grad_norm": 154.69894409179688, | |
| "learning_rate": 3.297517599110782e-07, | |
| "logits/chosen": -6.253002166748047, | |
| "logits/rejected": -6.253316402435303, | |
| "logps/chosen": -1026.7967529296875, | |
| "logps/rejected": -909.5032348632812, | |
| "loss": 0.6319, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.4157216548919678, | |
| "rewards/margins": 0.3379477262496948, | |
| "rewards/rejected": 1.077773928642273, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.344651873812943, | |
| "grad_norm": 128.03990173339844, | |
| "learning_rate": 3.278992219340496e-07, | |
| "logits/chosen": -6.163126468658447, | |
| "logits/rejected": -6.272846698760986, | |
| "logps/chosen": -1002.2288208007812, | |
| "logps/rejected": -850.8206787109375, | |
| "loss": 0.5969, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.2641438245773315, | |
| "rewards/margins": 0.45238691568374634, | |
| "rewards/rejected": 0.81175696849823, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.34835780793996385, | |
| "grad_norm": 126.5301284790039, | |
| "learning_rate": 3.260466839570211e-07, | |
| "logits/chosen": -6.24020528793335, | |
| "logits/rejected": -6.305496692657471, | |
| "logps/chosen": -831.75244140625, | |
| "logps/rejected": -811.0787353515625, | |
| "loss": 0.6274, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.032156229019165, | |
| "rewards/margins": 0.3172861337661743, | |
| "rewards/rejected": 0.7148701548576355, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.35206374206698476, | |
| "grad_norm": 211.2845916748047, | |
| "learning_rate": 3.2419414597999255e-07, | |
| "logits/chosen": -6.139876365661621, | |
| "logits/rejected": -6.111436367034912, | |
| "logps/chosen": -975.7356567382812, | |
| "logps/rejected": -844.0784912109375, | |
| "loss": 0.6325, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.1637299060821533, | |
| "rewards/margins": 0.3424530327320099, | |
| "rewards/rejected": 0.8212767839431763, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.35206374206698476, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.342590808868408, | |
| "eval_logps/chosen": -1141.1739501953125, | |
| "eval_logps/rejected": -1051.595458984375, | |
| "eval_loss": 0.6582168936729431, | |
| "eval_rewards/accuracies": 0.6213266253471375, | |
| "eval_rewards/chosen": 1.3171454668045044, | |
| "eval_rewards/margins": 0.25251859426498413, | |
| "eval_rewards/rejected": 1.064626932144165, | |
| "eval_runtime": 174.1358, | |
| "eval_samples_per_second": 6.839, | |
| "eval_steps_per_second": 6.839, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.3557696761940056, | |
| "grad_norm": 203.0992431640625, | |
| "learning_rate": 3.2234160800296406e-07, | |
| "logits/chosen": -6.084280967712402, | |
| "logits/rejected": -6.071610450744629, | |
| "logps/chosen": -821.4600830078125, | |
| "logps/rejected": -727.051025390625, | |
| "loss": 0.6376, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.0315455198287964, | |
| "rewards/margins": 0.3190918266773224, | |
| "rewards/rejected": 0.7124537229537964, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.35947561032102654, | |
| "grad_norm": 180.57797241210938, | |
| "learning_rate": 3.204890700259355e-07, | |
| "logits/chosen": -6.084843635559082, | |
| "logits/rejected": -6.00299072265625, | |
| "logps/chosen": -981.2433471679688, | |
| "logps/rejected": -845.26123046875, | |
| "loss": 0.5913, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.1229795217514038, | |
| "rewards/margins": 0.41206812858581543, | |
| "rewards/rejected": 0.7109113931655884, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.36318154444804746, | |
| "grad_norm": 207.84568786621094, | |
| "learning_rate": 3.18636532048907e-07, | |
| "logits/chosen": -6.196352958679199, | |
| "logits/rejected": -6.150284767150879, | |
| "logps/chosen": -915.8230590820312, | |
| "logps/rejected": -826.0857543945312, | |
| "loss": 0.6367, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.011979579925537, | |
| "rewards/margins": 0.326684832572937, | |
| "rewards/rejected": 0.6852947473526001, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.3668874785750683, | |
| "grad_norm": 169.62612915039062, | |
| "learning_rate": 3.1678399407187847e-07, | |
| "logits/chosen": -6.155111312866211, | |
| "logits/rejected": -6.252329349517822, | |
| "logps/chosen": -945.7684326171875, | |
| "logps/rejected": -895.0965576171875, | |
| "loss": 0.6591, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.0974936485290527, | |
| "rewards/margins": 0.277460515499115, | |
| "rewards/rejected": 0.820033073425293, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.37059341270208923, | |
| "grad_norm": 164.93836975097656, | |
| "learning_rate": 3.1493145609484997e-07, | |
| "logits/chosen": -6.1941423416137695, | |
| "logits/rejected": -6.119546413421631, | |
| "logps/chosen": -859.4827270507812, | |
| "logps/rejected": -778.8285522460938, | |
| "loss": 0.614, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.124022126197815, | |
| "rewards/margins": 0.3852692246437073, | |
| "rewards/rejected": 0.7387528419494629, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.37059341270208923, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.3517584800720215, | |
| "eval_logps/chosen": -1141.3780517578125, | |
| "eval_logps/rejected": -1051.7454833984375, | |
| "eval_loss": 0.6584185361862183, | |
| "eval_rewards/accuracies": 0.6196473836898804, | |
| "eval_rewards/chosen": 1.296731948852539, | |
| "eval_rewards/margins": 0.24711348116397858, | |
| "eval_rewards/rejected": 1.0496186017990112, | |
| "eval_runtime": 174.4804, | |
| "eval_samples_per_second": 6.826, | |
| "eval_steps_per_second": 6.826, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.3742993468291101, | |
| "grad_norm": 161.15005493164062, | |
| "learning_rate": 3.1307891811782137e-07, | |
| "logits/chosen": -6.150378227233887, | |
| "logits/rejected": -6.203757286071777, | |
| "logps/chosen": -1034.891357421875, | |
| "logps/rejected": -888.6922607421875, | |
| "loss": 0.6111, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.073899507522583, | |
| "rewards/margins": 0.3846450746059418, | |
| "rewards/rejected": 0.6892544031143188, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.378005280956131, | |
| "grad_norm": 167.26861572265625, | |
| "learning_rate": 3.112263801407929e-07, | |
| "logits/chosen": -6.080620765686035, | |
| "logits/rejected": -6.124800682067871, | |
| "logps/chosen": -882.3255615234375, | |
| "logps/rejected": -834.7425537109375, | |
| "loss": 0.6356, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.0985982418060303, | |
| "rewards/margins": 0.2840558588504791, | |
| "rewards/rejected": 0.8145424127578735, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.3817112150831519, | |
| "grad_norm": 143.760009765625, | |
| "learning_rate": 3.0937384216376433e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.1283392906188965, | |
| "logps/chosen": -931.8167114257812, | |
| "logps/rejected": -770.5465087890625, | |
| "loss": 0.5515, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": 1.2119407653808594, | |
| "rewards/margins": 0.5438046455383301, | |
| "rewards/rejected": 0.6681360006332397, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.3854171492101728, | |
| "grad_norm": 213.75035095214844, | |
| "learning_rate": 3.0752130418673583e-07, | |
| "logits/chosen": -6.2023396492004395, | |
| "logits/rejected": -6.126180648803711, | |
| "logps/chosen": -923.2513427734375, | |
| "logps/rejected": -764.4282836914062, | |
| "loss": 0.6242, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.2077882289886475, | |
| "rewards/margins": 0.35475489497184753, | |
| "rewards/rejected": 0.8530333638191223, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.3891230833371937, | |
| "grad_norm": 129.1380157470703, | |
| "learning_rate": 3.056687662097073e-07, | |
| "logits/chosen": -6.151089668273926, | |
| "logits/rejected": -6.196557998657227, | |
| "logps/chosen": -891.5910034179688, | |
| "logps/rejected": -825.3084716796875, | |
| "loss": 0.5819, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.2349982261657715, | |
| "rewards/margins": 0.49733766913414, | |
| "rewards/rejected": 0.7376605272293091, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.3891230833371937, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.354092597961426, | |
| "eval_logps/chosen": -1138.8973388671875, | |
| "eval_logps/rejected": -1049.481689453125, | |
| "eval_loss": 0.6607492566108704, | |
| "eval_rewards/accuracies": 0.6213266253471375, | |
| "eval_rewards/chosen": 1.544799566268921, | |
| "eval_rewards/margins": 0.26879334449768066, | |
| "eval_rewards/rejected": 1.2760061025619507, | |
| "eval_runtime": 174.2371, | |
| "eval_samples_per_second": 6.836, | |
| "eval_steps_per_second": 6.836, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.39282901746421456, | |
| "grad_norm": 164.7945556640625, | |
| "learning_rate": 3.038162282326788e-07, | |
| "logits/chosen": -6.143533229827881, | |
| "logits/rejected": -6.127655982971191, | |
| "logps/chosen": -845.65234375, | |
| "logps/rejected": -816.9622802734375, | |
| "loss": 0.6427, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.2775068283081055, | |
| "rewards/margins": 0.3459513187408447, | |
| "rewards/rejected": 0.9315555691719055, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3965349515912355, | |
| "grad_norm": 149.87388610839844, | |
| "learning_rate": 3.019636902556502e-07, | |
| "logits/chosen": -6.187637805938721, | |
| "logits/rejected": -6.129674434661865, | |
| "logps/chosen": -885.9992065429688, | |
| "logps/rejected": -778.6902465820312, | |
| "loss": 0.608, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.1672834157943726, | |
| "rewards/margins": 0.3892399072647095, | |
| "rewards/rejected": 0.7780434489250183, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.40024088571825633, | |
| "grad_norm": 166.63389587402344, | |
| "learning_rate": 3.001111522786217e-07, | |
| "logits/chosen": -6.271850109100342, | |
| "logits/rejected": -6.208528995513916, | |
| "logps/chosen": -913.72998046875, | |
| "logps/rejected": -773.72412109375, | |
| "loss": 0.6183, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.156951904296875, | |
| "rewards/margins": 0.3150864243507385, | |
| "rewards/rejected": 0.8418653607368469, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.40394681984527725, | |
| "grad_norm": 168.648681640625, | |
| "learning_rate": 2.9825861430159315e-07, | |
| "logits/chosen": -6.246833801269531, | |
| "logits/rejected": -6.256269931793213, | |
| "logps/chosen": -948.3607177734375, | |
| "logps/rejected": -898.9035034179688, | |
| "loss": 0.6266, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.2156215906143188, | |
| "rewards/margins": 0.34718313813209534, | |
| "rewards/rejected": 0.8684385418891907, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.40765275397229817, | |
| "grad_norm": 174.19361877441406, | |
| "learning_rate": 2.9640607632456465e-07, | |
| "logits/chosen": -6.159620761871338, | |
| "logits/rejected": -6.141018867492676, | |
| "logps/chosen": -986.7599487304688, | |
| "logps/rejected": -843.1695556640625, | |
| "loss": 0.5832, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.3146113157272339, | |
| "rewards/margins": 0.4466518461704254, | |
| "rewards/rejected": 0.8679596185684204, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.40765275397229817, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.353438854217529, | |
| "eval_logps/chosen": -1139.6771240234375, | |
| "eval_logps/rejected": -1050.29736328125, | |
| "eval_loss": 0.6590859293937683, | |
| "eval_rewards/accuracies": 0.6179680824279785, | |
| "eval_rewards/chosen": 1.466820478439331, | |
| "eval_rewards/margins": 0.2723851799964905, | |
| "eval_rewards/rejected": 1.1944352388381958, | |
| "eval_runtime": 174.1419, | |
| "eval_samples_per_second": 6.839, | |
| "eval_steps_per_second": 6.839, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.411358688099319, | |
| "grad_norm": 207.91915893554688, | |
| "learning_rate": 2.945535383475361e-07, | |
| "logits/chosen": -6.101978302001953, | |
| "logits/rejected": -6.107656955718994, | |
| "logps/chosen": -853.7755737304688, | |
| "logps/rejected": -771.1502685546875, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1558765172958374, | |
| "rewards/margins": 0.4091036915779114, | |
| "rewards/rejected": 0.7467728853225708, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.41506462222633994, | |
| "grad_norm": 172.02349853515625, | |
| "learning_rate": 2.927010003705076e-07, | |
| "logits/chosen": -6.207159996032715, | |
| "logits/rejected": -6.222277641296387, | |
| "logps/chosen": -924.0470581054688, | |
| "logps/rejected": -801.3013916015625, | |
| "loss": 0.5918, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.1804696321487427, | |
| "rewards/margins": 0.443875253200531, | |
| "rewards/rejected": 0.7365943193435669, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.4187705563533608, | |
| "grad_norm": 147.4911346435547, | |
| "learning_rate": 2.9084846239347906e-07, | |
| "logits/chosen": -6.112509727478027, | |
| "logits/rejected": -6.115456581115723, | |
| "logps/chosen": -937.400390625, | |
| "logps/rejected": -851.7019653320312, | |
| "loss": 0.5924, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.4129273891448975, | |
| "rewards/margins": 0.4493323266506195, | |
| "rewards/rejected": 0.9635950326919556, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.4224764904803817, | |
| "grad_norm": 188.7657012939453, | |
| "learning_rate": 2.8899592441645057e-07, | |
| "logits/chosen": -6.175480842590332, | |
| "logits/rejected": -6.202576637268066, | |
| "logps/chosen": -860.0646362304688, | |
| "logps/rejected": -785.8237915039062, | |
| "loss": 0.5818, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 1.2907373905181885, | |
| "rewards/margins": 0.48255085945129395, | |
| "rewards/rejected": 0.8081865310668945, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.4261824246074026, | |
| "grad_norm": 195.689208984375, | |
| "learning_rate": 2.8714338643942197e-07, | |
| "logits/chosen": -6.1582794189453125, | |
| "logits/rejected": -6.1286234855651855, | |
| "logps/chosen": -882.5172729492188, | |
| "logps/rejected": -781.136962890625, | |
| "loss": 0.6334, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.112781286239624, | |
| "rewards/margins": 0.32989996671676636, | |
| "rewards/rejected": 0.7828812599182129, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4261824246074026, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.354198932647705, | |
| "eval_logps/chosen": -1139.8521728515625, | |
| "eval_logps/rejected": -1050.4593505859375, | |
| "eval_loss": 0.6620603203773499, | |
| "eval_rewards/accuracies": 0.6230058670043945, | |
| "eval_rewards/chosen": 1.449316382408142, | |
| "eval_rewards/margins": 0.27107417583465576, | |
| "eval_rewards/rejected": 1.1782420873641968, | |
| "eval_runtime": 174.0926, | |
| "eval_samples_per_second": 6.841, | |
| "eval_steps_per_second": 6.841, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.4298883587344235, | |
| "grad_norm": 196.42591857910156, | |
| "learning_rate": 2.852908484623935e-07, | |
| "logits/chosen": -6.155422687530518, | |
| "logits/rejected": -6.14513635635376, | |
| "logps/chosen": -881.4910888671875, | |
| "logps/rejected": -846.4808349609375, | |
| "loss": 0.5939, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.1745377779006958, | |
| "rewards/margins": 0.4163404405117035, | |
| "rewards/rejected": 0.7581971883773804, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.4335942928614444, | |
| "grad_norm": 143.9552001953125, | |
| "learning_rate": 2.834383104853649e-07, | |
| "logits/chosen": -6.306971549987793, | |
| "logits/rejected": -6.230139255523682, | |
| "logps/chosen": -952.1017456054688, | |
| "logps/rejected": -841.35498046875, | |
| "loss": 0.6176, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.0589168071746826, | |
| "rewards/margins": 0.36980077624320984, | |
| "rewards/rejected": 0.6891158819198608, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.43730022698846527, | |
| "grad_norm": 160.4393310546875, | |
| "learning_rate": 2.8158577250833643e-07, | |
| "logits/chosen": -6.1785125732421875, | |
| "logits/rejected": -6.113655090332031, | |
| "logps/chosen": -856.7884521484375, | |
| "logps/rejected": -804.0035400390625, | |
| "loss": 0.6001, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.080625295639038, | |
| "rewards/margins": 0.3962119221687317, | |
| "rewards/rejected": 0.6844133138656616, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.4410061611154862, | |
| "grad_norm": 149.27992248535156, | |
| "learning_rate": 2.797332345313079e-07, | |
| "logits/chosen": -6.293272972106934, | |
| "logits/rejected": -6.266045570373535, | |
| "logps/chosen": -1081.994140625, | |
| "logps/rejected": -923.7223510742188, | |
| "loss": 0.5861, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.324501395225525, | |
| "rewards/margins": 0.486712783575058, | |
| "rewards/rejected": 0.8377887606620789, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.44471209524250704, | |
| "grad_norm": 158.3959197998047, | |
| "learning_rate": 2.778806965542794e-07, | |
| "logits/chosen": -6.168979644775391, | |
| "logits/rejected": -6.1329731941223145, | |
| "logps/chosen": -876.14697265625, | |
| "logps/rejected": -876.2384643554688, | |
| "loss": 0.6212, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.1340038776397705, | |
| "rewards/margins": 0.32338947057724, | |
| "rewards/rejected": 0.8106144070625305, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.44471209524250704, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.352243900299072, | |
| "eval_logps/chosen": -1139.7337646484375, | |
| "eval_logps/rejected": -1050.3447265625, | |
| "eval_loss": 0.6622124910354614, | |
| "eval_rewards/accuracies": 0.6272040009498596, | |
| "eval_rewards/chosen": 1.4611579179763794, | |
| "eval_rewards/margins": 0.27146124839782715, | |
| "eval_rewards/rejected": 1.1896967887878418, | |
| "eval_runtime": 174.1161, | |
| "eval_samples_per_second": 6.84, | |
| "eval_steps_per_second": 6.84, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.44841802936952796, | |
| "grad_norm": 159.97509765625, | |
| "learning_rate": 2.760281585772508e-07, | |
| "logits/chosen": -6.155325889587402, | |
| "logits/rejected": -6.213382720947266, | |
| "logps/chosen": -876.2828369140625, | |
| "logps/rejected": -887.4181518554688, | |
| "loss": 0.6392, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.1726298332214355, | |
| "rewards/margins": 0.33511778712272644, | |
| "rewards/rejected": 0.8375120162963867, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.4521239634965489, | |
| "grad_norm": 149.97601318359375, | |
| "learning_rate": 2.741756206002223e-07, | |
| "logits/chosen": -6.089978218078613, | |
| "logits/rejected": -6.215594291687012, | |
| "logps/chosen": -1006.5177612304688, | |
| "logps/rejected": -837.3438720703125, | |
| "loss": 0.5759, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.1888597011566162, | |
| "rewards/margins": 0.4549834132194519, | |
| "rewards/rejected": 0.7338763475418091, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.45582989762356974, | |
| "grad_norm": 124.17080688476562, | |
| "learning_rate": 2.7232308262319375e-07, | |
| "logits/chosen": -6.237065315246582, | |
| "logits/rejected": -6.162973403930664, | |
| "logps/chosen": -900.43505859375, | |
| "logps/rejected": -769.6177978515625, | |
| "loss": 0.5828, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.1558102369308472, | |
| "rewards/margins": 0.5107904672622681, | |
| "rewards/rejected": 0.6450197100639343, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.45953583175059065, | |
| "grad_norm": 209.32017517089844, | |
| "learning_rate": 2.7047054464616525e-07, | |
| "logits/chosen": -6.1265363693237305, | |
| "logits/rejected": -6.136591911315918, | |
| "logps/chosen": -850.1053466796875, | |
| "logps/rejected": -755.8872680664062, | |
| "loss": 0.6007, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.0844950675964355, | |
| "rewards/margins": 0.4297495484352112, | |
| "rewards/rejected": 0.6547454595565796, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.4632417658776115, | |
| "grad_norm": 230.33175659179688, | |
| "learning_rate": 2.686180066691367e-07, | |
| "logits/chosen": -6.183014392852783, | |
| "logits/rejected": -6.165501117706299, | |
| "logps/chosen": -842.3179931640625, | |
| "logps/rejected": -821.1053466796875, | |
| "loss": 0.6189, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 0.9313210248947144, | |
| "rewards/margins": 0.330872505903244, | |
| "rewards/rejected": 0.6004485487937927, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4632417658776115, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.354373931884766, | |
| "eval_logps/chosen": -1141.2061767578125, | |
| "eval_logps/rejected": -1051.6622314453125, | |
| "eval_loss": 0.6636425852775574, | |
| "eval_rewards/accuracies": 0.6204869747161865, | |
| "eval_rewards/chosen": 1.31391179561615, | |
| "eval_rewards/margins": 0.2559622824192047, | |
| "eval_rewards/rejected": 1.0579496622085571, | |
| "eval_runtime": 174.2497, | |
| "eval_samples_per_second": 6.835, | |
| "eval_steps_per_second": 6.835, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.4669477000046324, | |
| "grad_norm": 212.96624755859375, | |
| "learning_rate": 2.667654686921082e-07, | |
| "logits/chosen": -6.100918292999268, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -1028.841552734375, | |
| "logps/rejected": -886.3626708984375, | |
| "loss": 0.613, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.181762933731079, | |
| "rewards/margins": 0.37267082929611206, | |
| "rewards/rejected": 0.8090922236442566, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.4706536341316533, | |
| "grad_norm": 162.5323944091797, | |
| "learning_rate": 2.6491293071507966e-07, | |
| "logits/chosen": -6.123560428619385, | |
| "logits/rejected": -6.198370933532715, | |
| "logps/chosen": -930.6388549804688, | |
| "logps/rejected": -765.5624389648438, | |
| "loss": 0.6066, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.0889371633529663, | |
| "rewards/margins": 0.40622156858444214, | |
| "rewards/rejected": 0.6827155351638794, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.4743595682586742, | |
| "grad_norm": 129.6339874267578, | |
| "learning_rate": 2.630603927380511e-07, | |
| "logits/chosen": -6.171866416931152, | |
| "logits/rejected": -6.194737434387207, | |
| "logps/chosen": -922.0595703125, | |
| "logps/rejected": -880.8753051757812, | |
| "loss": 0.6099, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.1603076457977295, | |
| "rewards/margins": 0.38798120617866516, | |
| "rewards/rejected": 0.7723264694213867, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.4780655023856951, | |
| "grad_norm": 185.04010009765625, | |
| "learning_rate": 2.6120785476102257e-07, | |
| "logits/chosen": -6.216259479522705, | |
| "logits/rejected": -6.08756685256958, | |
| "logps/chosen": -940.3963623046875, | |
| "logps/rejected": -871.3863525390625, | |
| "loss": 0.587, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.1315703392028809, | |
| "rewards/margins": 0.41449031233787537, | |
| "rewards/rejected": 0.7170801162719727, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.481771436512716, | |
| "grad_norm": 188.83816528320312, | |
| "learning_rate": 2.5935531678399407e-07, | |
| "logits/chosen": -6.360658645629883, | |
| "logits/rejected": -6.3473029136657715, | |
| "logps/chosen": -941.73876953125, | |
| "logps/rejected": -903.9150390625, | |
| "loss": 0.581, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.2506906986236572, | |
| "rewards/margins": 0.4942537844181061, | |
| "rewards/rejected": 0.7564369440078735, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.481771436512716, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.355961322784424, | |
| "eval_logps/chosen": -1139.5653076171875, | |
| "eval_logps/rejected": -1050.153076171875, | |
| "eval_loss": 0.6662114262580872, | |
| "eval_rewards/accuracies": 0.6288833022117615, | |
| "eval_rewards/chosen": 1.4780066013336182, | |
| "eval_rewards/margins": 0.26914337277412415, | |
| "eval_rewards/rejected": 1.2088632583618164, | |
| "eval_runtime": 174.4932, | |
| "eval_samples_per_second": 6.825, | |
| "eval_steps_per_second": 6.825, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.4854773706397369, | |
| "grad_norm": 176.7130584716797, | |
| "learning_rate": 2.575027788069655e-07, | |
| "logits/chosen": -6.2104811668396, | |
| "logits/rejected": -6.247377395629883, | |
| "logps/chosen": -863.90869140625, | |
| "logps/rejected": -790.2784423828125, | |
| "loss": 0.5685, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.1237311363220215, | |
| "rewards/margins": 0.48923033475875854, | |
| "rewards/rejected": 0.6345008015632629, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.48918330476675775, | |
| "grad_norm": 181.07481384277344, | |
| "learning_rate": 2.5565024082993703e-07, | |
| "logits/chosen": -6.231227397918701, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -985.6842651367188, | |
| "logps/rejected": -883.3465576171875, | |
| "loss": 0.5585, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 1.381658911705017, | |
| "rewards/margins": 0.5166347026824951, | |
| "rewards/rejected": 0.8650242686271667, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.49288923889377867, | |
| "grad_norm": 203.50254821777344, | |
| "learning_rate": 2.537977028529085e-07, | |
| "logits/chosen": -6.048904895782471, | |
| "logits/rejected": -6.121670722961426, | |
| "logps/chosen": -907.2009887695312, | |
| "logps/rejected": -842.6783447265625, | |
| "loss": 0.6369, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.19151771068573, | |
| "rewards/margins": 0.38030725717544556, | |
| "rewards/rejected": 0.8112104535102844, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.49659517302079953, | |
| "grad_norm": 154.94403076171875, | |
| "learning_rate": 2.5194516487588e-07, | |
| "logits/chosen": -6.1506195068359375, | |
| "logits/rejected": -6.043631076812744, | |
| "logps/chosen": -921.5447387695312, | |
| "logps/rejected": -728.7830810546875, | |
| "loss": 0.5662, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.1830615997314453, | |
| "rewards/margins": 0.491299569606781, | |
| "rewards/rejected": 0.6917620897293091, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.5003011071478205, | |
| "grad_norm": 227.9466094970703, | |
| "learning_rate": 2.500926268988514e-07, | |
| "logits/chosen": -6.187090873718262, | |
| "logits/rejected": -6.22959041595459, | |
| "logps/chosen": -829.8533935546875, | |
| "logps/rejected": -727.3782348632812, | |
| "loss": 0.5804, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.0849952697753906, | |
| "rewards/margins": 0.42699941992759705, | |
| "rewards/rejected": 0.657995879650116, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5003011071478205, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.349131107330322, | |
| "eval_logps/chosen": -1137.9072265625, | |
| "eval_logps/rejected": -1048.7745361328125, | |
| "eval_loss": 0.6663568019866943, | |
| "eval_rewards/accuracies": 0.6213266253471375, | |
| "eval_rewards/chosen": 1.6438101530075073, | |
| "eval_rewards/margins": 0.2971048057079315, | |
| "eval_rewards/rejected": 1.346705436706543, | |
| "eval_runtime": 174.0406, | |
| "eval_samples_per_second": 6.843, | |
| "eval_steps_per_second": 6.843, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.5040070412748413, | |
| "grad_norm": 193.2044219970703, | |
| "learning_rate": 2.482400889218229e-07, | |
| "logits/chosen": -6.1281938552856445, | |
| "logits/rejected": -6.117993354797363, | |
| "logps/chosen": -1060.016357421875, | |
| "logps/rejected": -954.3433837890625, | |
| "loss": 0.5773, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.4718914031982422, | |
| "rewards/margins": 0.5141991376876831, | |
| "rewards/rejected": 0.9576921463012695, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.5077129754018622, | |
| "grad_norm": 203.0106658935547, | |
| "learning_rate": 2.4638755094479434e-07, | |
| "logits/chosen": -6.16585111618042, | |
| "logits/rejected": -6.127178192138672, | |
| "logps/chosen": -917.00146484375, | |
| "logps/rejected": -872.7060546875, | |
| "loss": 0.6158, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.3071753978729248, | |
| "rewards/margins": 0.41641944646835327, | |
| "rewards/rejected": 0.8907560110092163, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.5114189095288831, | |
| "grad_norm": 135.35690307617188, | |
| "learning_rate": 2.4453501296776585e-07, | |
| "logits/chosen": -6.066123962402344, | |
| "logits/rejected": -6.084324836730957, | |
| "logps/chosen": -859.6808471679688, | |
| "logps/rejected": -741.0250854492188, | |
| "loss": 0.6318, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.2575219869613647, | |
| "rewards/margins": 0.3475838005542755, | |
| "rewards/rejected": 0.9099382162094116, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.515124843655904, | |
| "grad_norm": 171.01341247558594, | |
| "learning_rate": 2.426824749907373e-07, | |
| "logits/chosen": -6.1828107833862305, | |
| "logits/rejected": -6.259852886199951, | |
| "logps/chosen": -894.8861083984375, | |
| "logps/rejected": -804.0977783203125, | |
| "loss": 0.5773, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.3444898128509521, | |
| "rewards/margins": 0.5704382658004761, | |
| "rewards/rejected": 0.7740517258644104, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.5188307777829249, | |
| "grad_norm": 250.60726928710938, | |
| "learning_rate": 2.4082993701370875e-07, | |
| "logits/chosen": -6.185898780822754, | |
| "logits/rejected": -6.244287014007568, | |
| "logps/chosen": -987.5439453125, | |
| "logps/rejected": -882.2205200195312, | |
| "loss": 0.5984, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.4197866916656494, | |
| "rewards/margins": 0.5091265439987183, | |
| "rewards/rejected": 0.9106601476669312, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5188307777829249, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.353067398071289, | |
| "eval_logps/chosen": -1137.5255126953125, | |
| "eval_logps/rejected": -1048.356689453125, | |
| "eval_loss": 0.6677223443984985, | |
| "eval_rewards/accuracies": 0.6120907068252563, | |
| "eval_rewards/chosen": 1.6819899082183838, | |
| "eval_rewards/margins": 0.29349878430366516, | |
| "eval_rewards/rejected": 1.388491153717041, | |
| "eval_runtime": 174.2641, | |
| "eval_samples_per_second": 6.834, | |
| "eval_steps_per_second": 6.834, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.5225367119099458, | |
| "grad_norm": 240.53480529785156, | |
| "learning_rate": 2.3897739903668026e-07, | |
| "logits/chosen": -6.275177001953125, | |
| "logits/rejected": -6.181919574737549, | |
| "logps/chosen": -965.3531494140625, | |
| "logps/rejected": -786.497802734375, | |
| "loss": 0.6224, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.2736237049102783, | |
| "rewards/margins": 0.4058550298213959, | |
| "rewards/rejected": 0.8677686452865601, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.5262426460369667, | |
| "grad_norm": 165.14010620117188, | |
| "learning_rate": 2.371248610596517e-07, | |
| "logits/chosen": -6.211658954620361, | |
| "logits/rejected": -6.0974249839782715, | |
| "logps/chosen": -882.7542724609375, | |
| "logps/rejected": -741.9580078125, | |
| "loss": 0.5812, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.35499107837677, | |
| "rewards/margins": 0.5230444073677063, | |
| "rewards/rejected": 0.8319465517997742, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.5299485801639876, | |
| "grad_norm": 143.9077606201172, | |
| "learning_rate": 2.352723230826232e-07, | |
| "logits/chosen": -6.10528039932251, | |
| "logits/rejected": -6.128796100616455, | |
| "logps/chosen": -933.9381103515625, | |
| "logps/rejected": -815.3626098632812, | |
| "loss": 0.5783, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.3778315782546997, | |
| "rewards/margins": 0.5523862242698669, | |
| "rewards/rejected": 0.8254453539848328, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.5336545142910085, | |
| "grad_norm": 249.5879669189453, | |
| "learning_rate": 2.3341978510559464e-07, | |
| "logits/chosen": -6.190318584442139, | |
| "logits/rejected": -6.109808921813965, | |
| "logps/chosen": -988.9300537109375, | |
| "logps/rejected": -904.2208862304688, | |
| "loss": 0.6759, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.5002410411834717, | |
| "rewards/margins": 0.37281566858291626, | |
| "rewards/rejected": 1.1274254322052002, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.5373604484180293, | |
| "grad_norm": 158.98834228515625, | |
| "learning_rate": 2.3156724712856612e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.147918701171875, | |
| "logps/chosen": -897.3717651367188, | |
| "logps/rejected": -858.4124755859375, | |
| "loss": 0.6545, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.2706643342971802, | |
| "rewards/margins": 0.30182304978370667, | |
| "rewards/rejected": 0.9688412547111511, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5373604484180293, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.35387659072876, | |
| "eval_logps/chosen": -1137.998046875, | |
| "eval_logps/rejected": -1048.7669677734375, | |
| "eval_loss": 0.6649725437164307, | |
| "eval_rewards/accuracies": 0.6255247592926025, | |
| "eval_rewards/chosen": 1.6347370147705078, | |
| "eval_rewards/margins": 0.2872615456581116, | |
| "eval_rewards/rejected": 1.3474754095077515, | |
| "eval_runtime": 174.8955, | |
| "eval_samples_per_second": 6.81, | |
| "eval_steps_per_second": 6.81, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.5410663825450502, | |
| "grad_norm": 202.708740234375, | |
| "learning_rate": 2.297147091515376e-07, | |
| "logits/chosen": -6.149045944213867, | |
| "logits/rejected": -6.200368881225586, | |
| "logps/chosen": -1000.2081909179688, | |
| "logps/rejected": -880.8834838867188, | |
| "loss": 0.609, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.3515934944152832, | |
| "rewards/margins": 0.4020705223083496, | |
| "rewards/rejected": 0.949522852897644, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.5447723166720712, | |
| "grad_norm": 378.9857482910156, | |
| "learning_rate": 2.2786217117450905e-07, | |
| "logits/chosen": -6.143518447875977, | |
| "logits/rejected": -6.165432929992676, | |
| "logps/chosen": -974.8902587890625, | |
| "logps/rejected": -885.5198974609375, | |
| "loss": 0.6535, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.1508817672729492, | |
| "rewards/margins": 0.333347886800766, | |
| "rewards/rejected": 0.8175338506698608, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.5484782507990921, | |
| "grad_norm": 156.70301818847656, | |
| "learning_rate": 2.2600963319748053e-07, | |
| "logits/chosen": -6.1991682052612305, | |
| "logits/rejected": -6.233222007751465, | |
| "logps/chosen": -1007.1458129882812, | |
| "logps/rejected": -901.8226318359375, | |
| "loss": 0.5823, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.3570195436477661, | |
| "rewards/margins": 0.5376918315887451, | |
| "rewards/rejected": 0.819327712059021, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.552184184926113, | |
| "grad_norm": 194.2936248779297, | |
| "learning_rate": 2.24157095220452e-07, | |
| "logits/chosen": -6.096491813659668, | |
| "logits/rejected": -6.074574947357178, | |
| "logps/chosen": -922.29833984375, | |
| "logps/rejected": -841.58251953125, | |
| "loss": 0.6448, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.311821699142456, | |
| "rewards/margins": 0.2840858995914459, | |
| "rewards/rejected": 1.027735710144043, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.5558901190531338, | |
| "grad_norm": 138.76791381835938, | |
| "learning_rate": 2.223045572434235e-07, | |
| "logits/chosen": -6.16571569442749, | |
| "logits/rejected": -6.221317768096924, | |
| "logps/chosen": -931.6896362304688, | |
| "logps/rejected": -879.6935424804688, | |
| "loss": 0.6187, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.2591129541397095, | |
| "rewards/margins": 0.4034864008426666, | |
| "rewards/rejected": 0.8556264638900757, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5558901190531338, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.352556228637695, | |
| "eval_logps/chosen": -1138.7109375, | |
| "eval_logps/rejected": -1049.4052734375, | |
| "eval_loss": 0.6670076847076416, | |
| "eval_rewards/accuracies": 0.6196473836898804, | |
| "eval_rewards/chosen": 1.5634312629699707, | |
| "eval_rewards/margins": 0.2797936499118805, | |
| "eval_rewards/rejected": 1.2836376428604126, | |
| "eval_runtime": 174.7794, | |
| "eval_samples_per_second": 6.814, | |
| "eval_steps_per_second": 6.814, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.5595960531801547, | |
| "grad_norm": 196.8204345703125, | |
| "learning_rate": 2.2045201926639494e-07, | |
| "logits/chosen": -6.262406349182129, | |
| "logits/rejected": -6.286099433898926, | |
| "logps/chosen": -926.6876831054688, | |
| "logps/rejected": -719.3727416992188, | |
| "loss": 0.5834, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.2394769191741943, | |
| "rewards/margins": 0.5388771891593933, | |
| "rewards/rejected": 0.7005997896194458, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.5633019873071756, | |
| "grad_norm": 181.23269653320312, | |
| "learning_rate": 2.1859948128936642e-07, | |
| "logits/chosen": -6.15579080581665, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -907.208984375, | |
| "logps/rejected": -789.55615234375, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.3312886953353882, | |
| "rewards/margins": 0.29294928908348083, | |
| "rewards/rejected": 1.038339614868164, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.5670079214341965, | |
| "grad_norm": 249.67068481445312, | |
| "learning_rate": 2.167469433123379e-07, | |
| "logits/chosen": -6.171587944030762, | |
| "logits/rejected": -6.20114803314209, | |
| "logps/chosen": -1026.1175537109375, | |
| "logps/rejected": -928.2333984375, | |
| "loss": 0.6224, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.4529210329055786, | |
| "rewards/margins": 0.44856566190719604, | |
| "rewards/rejected": 1.0043553113937378, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.5707138555612175, | |
| "grad_norm": 161.0312042236328, | |
| "learning_rate": 2.1489440533530935e-07, | |
| "logits/chosen": -6.143443584442139, | |
| "logits/rejected": -6.267470836639404, | |
| "logps/chosen": -933.2398681640625, | |
| "logps/rejected": -907.0738525390625, | |
| "loss": 0.6238, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.4245777130126953, | |
| "rewards/margins": 0.4305481016635895, | |
| "rewards/rejected": 0.9940296411514282, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.5744197896882383, | |
| "grad_norm": 156.3391571044922, | |
| "learning_rate": 2.1304186735828083e-07, | |
| "logits/chosen": -6.159350395202637, | |
| "logits/rejected": -6.219527244567871, | |
| "logps/chosen": -970.1463012695312, | |
| "logps/rejected": -861.26416015625, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.3536326885223389, | |
| "rewards/margins": 0.36367741227149963, | |
| "rewards/rejected": 0.9899552464485168, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5744197896882383, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.356298923492432, | |
| "eval_logps/chosen": -1139.156005859375, | |
| "eval_logps/rejected": -1049.7542724609375, | |
| "eval_loss": 0.6682325601577759, | |
| "eval_rewards/accuracies": 0.6162888407707214, | |
| "eval_rewards/chosen": 1.51894211769104, | |
| "eval_rewards/margins": 0.27020886540412903, | |
| "eval_rewards/rejected": 1.2487331628799438, | |
| "eval_runtime": 174.6376, | |
| "eval_samples_per_second": 6.82, | |
| "eval_steps_per_second": 6.82, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.5781257238152592, | |
| "grad_norm": 223.8711395263672, | |
| "learning_rate": 2.111893293812523e-07, | |
| "logits/chosen": -6.078129768371582, | |
| "logits/rejected": -6.09130859375, | |
| "logps/chosen": -902.1170043945312, | |
| "logps/rejected": -816.3302001953125, | |
| "loss": 0.6542, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.166666030883789, | |
| "rewards/margins": 0.32527679204940796, | |
| "rewards/rejected": 0.8413891792297363, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.5818316579422801, | |
| "grad_norm": 146.7541961669922, | |
| "learning_rate": 2.093367914042238e-07, | |
| "logits/chosen": -6.186091899871826, | |
| "logits/rejected": -6.257566928863525, | |
| "logps/chosen": -953.74853515625, | |
| "logps/rejected": -876.87451171875, | |
| "loss": 0.6237, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.2467100620269775, | |
| "rewards/margins": 0.4072350859642029, | |
| "rewards/rejected": 0.8394750356674194, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.585537592069301, | |
| "grad_norm": 201.52931213378906, | |
| "learning_rate": 2.0748425342719524e-07, | |
| "logits/chosen": -6.206198215484619, | |
| "logits/rejected": -6.1201300621032715, | |
| "logps/chosen": -973.3306884765625, | |
| "logps/rejected": -831.2630004882812, | |
| "loss": 0.6483, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.2103111743927002, | |
| "rewards/margins": 0.3353997766971588, | |
| "rewards/rejected": 0.8749113082885742, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.5892435261963218, | |
| "grad_norm": 157.7445526123047, | |
| "learning_rate": 2.0563171545016672e-07, | |
| "logits/chosen": -6.155628681182861, | |
| "logits/rejected": -6.149939060211182, | |
| "logps/chosen": -978.5584716796875, | |
| "logps/rejected": -856.3310546875, | |
| "loss": 0.5719, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.2967727184295654, | |
| "rewards/margins": 0.4965516924858093, | |
| "rewards/rejected": 0.8002211451530457, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.5929494603233427, | |
| "grad_norm": 161.9136505126953, | |
| "learning_rate": 2.037791774731382e-07, | |
| "logits/chosen": -6.121860027313232, | |
| "logits/rejected": -6.133907794952393, | |
| "logps/chosen": -928.7197265625, | |
| "logps/rejected": -803.7809448242188, | |
| "loss": 0.6081, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.312783122062683, | |
| "rewards/margins": 0.4244639277458191, | |
| "rewards/rejected": 0.8883193135261536, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5929494603233427, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.356760025024414, | |
| "eval_logps/chosen": -1139.2073974609375, | |
| "eval_logps/rejected": -1049.739990234375, | |
| "eval_loss": 0.6694273352622986, | |
| "eval_rewards/accuracies": 0.6171284914016724, | |
| "eval_rewards/chosen": 1.513792872428894, | |
| "eval_rewards/margins": 0.2636261582374573, | |
| "eval_rewards/rejected": 1.250166654586792, | |
| "eval_runtime": 174.1847, | |
| "eval_samples_per_second": 6.838, | |
| "eval_steps_per_second": 6.838, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.5966553944503636, | |
| "grad_norm": 110.379638671875, | |
| "learning_rate": 2.0192663949610965e-07, | |
| "logits/chosen": -6.110042572021484, | |
| "logits/rejected": -6.157367706298828, | |
| "logps/chosen": -851.4879760742188, | |
| "logps/rejected": -773.2803955078125, | |
| "loss": 0.5877, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.2313387393951416, | |
| "rewards/margins": 0.5156643390655518, | |
| "rewards/rejected": 0.7156744599342346, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.6003613285773846, | |
| "grad_norm": 196.35398864746094, | |
| "learning_rate": 2.0007410151908113e-07, | |
| "logits/chosen": -6.248660087585449, | |
| "logits/rejected": -6.268450736999512, | |
| "logps/chosen": -1027.9803466796875, | |
| "logps/rejected": -942.8600463867188, | |
| "loss": 0.5961, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.4811532497406006, | |
| "rewards/margins": 0.4853101670742035, | |
| "rewards/rejected": 0.9958430528640747, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.6040672627044055, | |
| "grad_norm": 147.44473266601562, | |
| "learning_rate": 1.982215635420526e-07, | |
| "logits/chosen": -6.094088554382324, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -970.6212158203125, | |
| "logps/rejected": -842.7681884765625, | |
| "loss": 0.668, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.3443838357925415, | |
| "rewards/margins": 0.37295302748680115, | |
| "rewards/rejected": 0.971430778503418, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.6077731968314263, | |
| "grad_norm": 163.9195556640625, | |
| "learning_rate": 1.963690255650241e-07, | |
| "logits/chosen": -6.2662577629089355, | |
| "logits/rejected": -6.079975128173828, | |
| "logps/chosen": -937.974609375, | |
| "logps/rejected": -781.6622314453125, | |
| "loss": 0.5775, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.30463707447052, | |
| "rewards/margins": 0.5367504358291626, | |
| "rewards/rejected": 0.7678866982460022, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.6114791309584472, | |
| "grad_norm": 162.06088256835938, | |
| "learning_rate": 1.9451648758799554e-07, | |
| "logits/chosen": -6.107717990875244, | |
| "logits/rejected": -6.13240909576416, | |
| "logps/chosen": -855.8272705078125, | |
| "logps/rejected": -743.3189086914062, | |
| "loss": 0.6199, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.208332896232605, | |
| "rewards/margins": 0.32260221242904663, | |
| "rewards/rejected": 0.8857306241989136, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6114791309584472, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.351521015167236, | |
| "eval_logps/chosen": -1138.5380859375, | |
| "eval_logps/rejected": -1049.13720703125, | |
| "eval_loss": 0.67048579454422, | |
| "eval_rewards/accuracies": 0.6087321639060974, | |
| "eval_rewards/chosen": 1.580714464187622, | |
| "eval_rewards/margins": 0.2702693045139313, | |
| "eval_rewards/rejected": 1.3104450702667236, | |
| "eval_runtime": 174.7091, | |
| "eval_samples_per_second": 6.817, | |
| "eval_steps_per_second": 6.817, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.6151850650854681, | |
| "grad_norm": 209.0579833984375, | |
| "learning_rate": 1.9266394961096702e-07, | |
| "logits/chosen": -6.237910270690918, | |
| "logits/rejected": -6.252842903137207, | |
| "logps/chosen": -996.7249755859375, | |
| "logps/rejected": -873.1549072265625, | |
| "loss": 0.6163, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.4242780208587646, | |
| "rewards/margins": 0.4079625606536865, | |
| "rewards/rejected": 1.0163153409957886, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.618890999212489, | |
| "grad_norm": 152.62171936035156, | |
| "learning_rate": 1.908114116339385e-07, | |
| "logits/chosen": -6.184769630432129, | |
| "logits/rejected": -6.171984672546387, | |
| "logps/chosen": -904.5120849609375, | |
| "logps/rejected": -842.2957153320312, | |
| "loss": 0.6201, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.3089262247085571, | |
| "rewards/margins": 0.4640830457210541, | |
| "rewards/rejected": 0.8448432087898254, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.6225969333395099, | |
| "grad_norm": 163.1865997314453, | |
| "learning_rate": 1.8895887365690995e-07, | |
| "logits/chosen": -6.068234443664551, | |
| "logits/rejected": -6.1052398681640625, | |
| "logps/chosen": -892.0081787109375, | |
| "logps/rejected": -850.9469604492188, | |
| "loss": 0.5911, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.2593834400177002, | |
| "rewards/margins": 0.4649893641471863, | |
| "rewards/rejected": 0.7943940162658691, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.6263028674665307, | |
| "grad_norm": 194.63597106933594, | |
| "learning_rate": 1.8710633567988143e-07, | |
| "logits/chosen": -6.147702217102051, | |
| "logits/rejected": -6.189964294433594, | |
| "logps/chosen": -944.7171630859375, | |
| "logps/rejected": -848.2835693359375, | |
| "loss": 0.594, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.3190233707427979, | |
| "rewards/margins": 0.39705803990364075, | |
| "rewards/rejected": 0.9219652414321899, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.6300088015935517, | |
| "grad_norm": 196.50355529785156, | |
| "learning_rate": 1.852537977028529e-07, | |
| "logits/chosen": -6.1947102546691895, | |
| "logits/rejected": -6.1808247566223145, | |
| "logps/chosen": -886.1546020507812, | |
| "logps/rejected": -801.2207641601562, | |
| "loss": 0.6283, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.2352696657180786, | |
| "rewards/margins": 0.33806803822517395, | |
| "rewards/rejected": 0.8972015380859375, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6300088015935517, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.353530406951904, | |
| "eval_logps/chosen": -1139.465576171875, | |
| "eval_logps/rejected": -1050.0250244140625, | |
| "eval_loss": 0.6684470772743225, | |
| "eval_rewards/accuracies": 0.6246851682662964, | |
| "eval_rewards/chosen": 1.4879825115203857, | |
| "eval_rewards/margins": 0.26630899310112, | |
| "eval_rewards/rejected": 1.2216734886169434, | |
| "eval_runtime": 174.6538, | |
| "eval_samples_per_second": 6.819, | |
| "eval_steps_per_second": 6.819, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.6337147357205726, | |
| "grad_norm": 148.55259704589844, | |
| "learning_rate": 1.834012597258244e-07, | |
| "logits/chosen": -6.227621078491211, | |
| "logits/rejected": -6.297041893005371, | |
| "logps/chosen": -928.7427978515625, | |
| "logps/rejected": -803.3206176757812, | |
| "loss": 0.5915, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.285189151763916, | |
| "rewards/margins": 0.5148311853408813, | |
| "rewards/rejected": 0.7703579664230347, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.6374206698475935, | |
| "grad_norm": 144.889404296875, | |
| "learning_rate": 1.8154872174879584e-07, | |
| "logits/chosen": -6.107190132141113, | |
| "logits/rejected": -6.062108993530273, | |
| "logps/chosen": -983.3319091796875, | |
| "logps/rejected": -892.3099365234375, | |
| "loss": 0.6625, | |
| "rewards/accuracies": 0.6187499761581421, | |
| "rewards/chosen": 1.1306359767913818, | |
| "rewards/margins": 0.367345929145813, | |
| "rewards/rejected": 0.7632900476455688, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.6411266039746144, | |
| "grad_norm": 196.45310974121094, | |
| "learning_rate": 1.7969618377176732e-07, | |
| "logits/chosen": -6.020756721496582, | |
| "logits/rejected": -6.166621208190918, | |
| "logps/chosen": -843.7468872070312, | |
| "logps/rejected": -768.1483154296875, | |
| "loss": 0.6033, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.140453577041626, | |
| "rewards/margins": 0.39432471990585327, | |
| "rewards/rejected": 0.7461288571357727, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.6448325381016352, | |
| "grad_norm": 151.2347412109375, | |
| "learning_rate": 1.778436457947388e-07, | |
| "logits/chosen": -6.10150671005249, | |
| "logits/rejected": -6.154606819152832, | |
| "logps/chosen": -896.15185546875, | |
| "logps/rejected": -831.7677612304688, | |
| "loss": 0.5964, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.2138025760650635, | |
| "rewards/margins": 0.4559560716152191, | |
| "rewards/rejected": 0.757846474647522, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.6485384722286561, | |
| "grad_norm": 226.45729064941406, | |
| "learning_rate": 1.7599110781771025e-07, | |
| "logits/chosen": -6.205390453338623, | |
| "logits/rejected": -6.212441444396973, | |
| "logps/chosen": -1027.686767578125, | |
| "logps/rejected": -986.68603515625, | |
| "loss": 0.5979, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.3980613946914673, | |
| "rewards/margins": 0.47088512778282166, | |
| "rewards/rejected": 0.9271761775016785, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.6485384722286561, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.354175090789795, | |
| "eval_logps/chosen": -1139.527587890625, | |
| "eval_logps/rejected": -1050.107666015625, | |
| "eval_loss": 0.665941596031189, | |
| "eval_rewards/accuracies": 0.6246851682662964, | |
| "eval_rewards/chosen": 1.4817659854888916, | |
| "eval_rewards/margins": 0.2683611810207367, | |
| "eval_rewards/rejected": 1.2134050130844116, | |
| "eval_runtime": 174.0278, | |
| "eval_samples_per_second": 6.844, | |
| "eval_steps_per_second": 6.844, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.652244406355677, | |
| "grad_norm": 144.0729217529297, | |
| "learning_rate": 1.7413856984068173e-07, | |
| "logits/chosen": -6.174568176269531, | |
| "logits/rejected": -6.175555229187012, | |
| "logps/chosen": -911.03125, | |
| "logps/rejected": -848.8049926757812, | |
| "loss": 0.5949, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.23678457736969, | |
| "rewards/margins": 0.4119951128959656, | |
| "rewards/rejected": 0.8247894048690796, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.655950340482698, | |
| "grad_norm": 181.58526611328125, | |
| "learning_rate": 1.722860318636532e-07, | |
| "logits/chosen": -6.1877946853637695, | |
| "logits/rejected": -6.193057060241699, | |
| "logps/chosen": -931.5440673828125, | |
| "logps/rejected": -827.9782104492188, | |
| "loss": 0.6147, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.2314589023590088, | |
| "rewards/margins": 0.3570247292518616, | |
| "rewards/rejected": 0.8744341135025024, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.6596562746097188, | |
| "grad_norm": 152.58062744140625, | |
| "learning_rate": 1.7043349388662469e-07, | |
| "logits/chosen": -6.187155246734619, | |
| "logits/rejected": -6.14534854888916, | |
| "logps/chosen": -855.6112060546875, | |
| "logps/rejected": -809.7816162109375, | |
| "loss": 0.6337, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.152524709701538, | |
| "rewards/margins": 0.3496701121330261, | |
| "rewards/rejected": 0.8028545379638672, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.6633622087367397, | |
| "grad_norm": 172.72300720214844, | |
| "learning_rate": 1.6858095590959614e-07, | |
| "logits/chosen": -6.147979259490967, | |
| "logits/rejected": -6.196808815002441, | |
| "logps/chosen": -1047.6871337890625, | |
| "logps/rejected": -875.8937377929688, | |
| "loss": 0.5804, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.3021422624588013, | |
| "rewards/margins": 0.5173267126083374, | |
| "rewards/rejected": 0.7848155498504639, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.6670681428637606, | |
| "grad_norm": 157.03228759765625, | |
| "learning_rate": 1.6672841793256762e-07, | |
| "logits/chosen": -6.194676876068115, | |
| "logits/rejected": -6.176183223724365, | |
| "logps/chosen": -866.8079223632812, | |
| "logps/rejected": -801.6738891601562, | |
| "loss": 0.6229, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.2280833721160889, | |
| "rewards/margins": 0.37192708253860474, | |
| "rewards/rejected": 0.8561564683914185, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6670681428637606, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.353016376495361, | |
| "eval_logps/chosen": -1139.4063720703125, | |
| "eval_logps/rejected": -1050.0079345703125, | |
| "eval_loss": 0.6658960580825806, | |
| "eval_rewards/accuracies": 0.6154491901397705, | |
| "eval_rewards/chosen": 1.4938946962356567, | |
| "eval_rewards/margins": 0.27052515745162964, | |
| "eval_rewards/rejected": 1.2233693599700928, | |
| "eval_runtime": 174.6275, | |
| "eval_samples_per_second": 6.82, | |
| "eval_steps_per_second": 6.82, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.6707740769907815, | |
| "grad_norm": 192.70468139648438, | |
| "learning_rate": 1.648758799555391e-07, | |
| "logits/chosen": -6.1675310134887695, | |
| "logits/rejected": -6.276528835296631, | |
| "logps/chosen": -952.8025512695312, | |
| "logps/rejected": -811.5380859375, | |
| "loss": 0.6054, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.2332537174224854, | |
| "rewards/margins": 0.4917047917842865, | |
| "rewards/rejected": 0.7415488958358765, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.6744800111178024, | |
| "grad_norm": 204.32887268066406, | |
| "learning_rate": 1.6302334197851055e-07, | |
| "logits/chosen": -6.1494927406311035, | |
| "logits/rejected": -6.112942695617676, | |
| "logps/chosen": -909.9085693359375, | |
| "logps/rejected": -847.9560546875, | |
| "loss": 0.5969, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.291352391242981, | |
| "rewards/margins": 0.5246464014053345, | |
| "rewards/rejected": 0.7667059898376465, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.6781859452448232, | |
| "grad_norm": 129.51596069335938, | |
| "learning_rate": 1.6117080400148203e-07, | |
| "logits/chosen": -6.111174583435059, | |
| "logits/rejected": -6.139338970184326, | |
| "logps/chosen": -1006.2116088867188, | |
| "logps/rejected": -902.53076171875, | |
| "loss": 0.6243, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.3876721858978271, | |
| "rewards/margins": 0.43100985884666443, | |
| "rewards/rejected": 0.9566623568534851, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.6818918793718441, | |
| "grad_norm": 198.752197265625, | |
| "learning_rate": 1.593182660244535e-07, | |
| "logits/chosen": -6.178341388702393, | |
| "logits/rejected": -6.032105922698975, | |
| "logps/chosen": -931.4904174804688, | |
| "logps/rejected": -842.7693481445312, | |
| "loss": 0.6636, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.2749412059783936, | |
| "rewards/margins": 0.29882222414016724, | |
| "rewards/rejected": 0.9761190414428711, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.6855978134988651, | |
| "grad_norm": 173.8763885498047, | |
| "learning_rate": 1.5746572804742499e-07, | |
| "logits/chosen": -6.294638156890869, | |
| "logits/rejected": -6.279183387756348, | |
| "logps/chosen": -1006.7406005859375, | |
| "logps/rejected": -953.181640625, | |
| "loss": 0.6777, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 1.4022231101989746, | |
| "rewards/margins": 0.36820927262306213, | |
| "rewards/rejected": 1.0340137481689453, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.6855978134988651, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.3531341552734375, | |
| "eval_logps/chosen": -1139.8416748046875, | |
| "eval_logps/rejected": -1050.22802734375, | |
| "eval_loss": 0.6716598868370056, | |
| "eval_rewards/accuracies": 0.6078925132751465, | |
| "eval_rewards/chosen": 1.4503740072250366, | |
| "eval_rewards/margins": 0.24901418387889862, | |
| "eval_rewards/rejected": 1.2013598680496216, | |
| "eval_runtime": 174.1779, | |
| "eval_samples_per_second": 6.838, | |
| "eval_steps_per_second": 6.838, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.689303747625886, | |
| "grad_norm": 166.79347229003906, | |
| "learning_rate": 1.5561319007039644e-07, | |
| "logits/chosen": -6.272482872009277, | |
| "logits/rejected": -6.234023094177246, | |
| "logps/chosen": -851.1373291015625, | |
| "logps/rejected": -780.5647583007812, | |
| "loss": 0.5844, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": 1.1571626663208008, | |
| "rewards/margins": 0.47672972083091736, | |
| "rewards/rejected": 0.680432915687561, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.6930096817529069, | |
| "grad_norm": 151.9160919189453, | |
| "learning_rate": 1.5376065209336792e-07, | |
| "logits/chosen": -6.101273059844971, | |
| "logits/rejected": -6.033238887786865, | |
| "logps/chosen": -865.0284423828125, | |
| "logps/rejected": -818.7235107421875, | |
| "loss": 0.6109, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.0943622589111328, | |
| "rewards/margins": 0.34644466638565063, | |
| "rewards/rejected": 0.7479175925254822, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.6967156158799277, | |
| "grad_norm": 148.6206817626953, | |
| "learning_rate": 1.519081141163394e-07, | |
| "logits/chosen": -6.229578971862793, | |
| "logits/rejected": -6.284262657165527, | |
| "logps/chosen": -960.0841674804688, | |
| "logps/rejected": -855.2936401367188, | |
| "loss": 0.6012, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.3213304281234741, | |
| "rewards/margins": 0.48967212438583374, | |
| "rewards/rejected": 0.8316582441329956, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.7004215500069486, | |
| "grad_norm": 158.1896514892578, | |
| "learning_rate": 1.5005557613931085e-07, | |
| "logits/chosen": -6.0919904708862305, | |
| "logits/rejected": -6.142601013183594, | |
| "logps/chosen": -897.45556640625, | |
| "logps/rejected": -893.6177978515625, | |
| "loss": 0.6336, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.1414662599563599, | |
| "rewards/margins": 0.35405582189559937, | |
| "rewards/rejected": 0.7874104380607605, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.7041274841339695, | |
| "grad_norm": 248.24693298339844, | |
| "learning_rate": 1.4820303816228233e-07, | |
| "logits/chosen": -6.12492036819458, | |
| "logits/rejected": -6.182600498199463, | |
| "logps/chosen": -882.8030395507812, | |
| "logps/rejected": -820.3277587890625, | |
| "loss": 0.5957, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.1524447202682495, | |
| "rewards/margins": 0.43523526191711426, | |
| "rewards/rejected": 0.71720951795578, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7041274841339695, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.356428146362305, | |
| "eval_logps/chosen": -1141.09130859375, | |
| "eval_logps/rejected": -1051.447998046875, | |
| "eval_loss": 0.668496310710907, | |
| "eval_rewards/accuracies": 0.6146095991134644, | |
| "eval_rewards/chosen": 1.3254036903381348, | |
| "eval_rewards/margins": 0.24603614211082458, | |
| "eval_rewards/rejected": 1.0793676376342773, | |
| "eval_runtime": 174.5224, | |
| "eval_samples_per_second": 6.824, | |
| "eval_steps_per_second": 6.824, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.7078334182609904, | |
| "grad_norm": 246.97654724121094, | |
| "learning_rate": 1.463505001852538e-07, | |
| "logits/chosen": -6.261816024780273, | |
| "logits/rejected": -6.296639442443848, | |
| "logps/chosen": -889.0240478515625, | |
| "logps/rejected": -812.9447021484375, | |
| "loss": 0.6209, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.1191017627716064, | |
| "rewards/margins": 0.4275694489479065, | |
| "rewards/rejected": 0.6915323138237, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.7115393523880112, | |
| "grad_norm": 187.32501220703125, | |
| "learning_rate": 1.4449796220822528e-07, | |
| "logits/chosen": -6.17855167388916, | |
| "logits/rejected": -6.182552337646484, | |
| "logps/chosen": -1076.29833984375, | |
| "logps/rejected": -952.2566528320312, | |
| "loss": 0.5959, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.351000428199768, | |
| "rewards/margins": 0.5084677338600159, | |
| "rewards/rejected": 0.8425326347351074, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.7152452865150322, | |
| "grad_norm": 189.5558624267578, | |
| "learning_rate": 1.4264542423119674e-07, | |
| "logits/chosen": -6.147101402282715, | |
| "logits/rejected": -6.108138084411621, | |
| "logps/chosen": -975.3698120117188, | |
| "logps/rejected": -849.8056640625, | |
| "loss": 0.6439, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.0261156558990479, | |
| "rewards/margins": 0.3257748484611511, | |
| "rewards/rejected": 0.7003408670425415, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.7189512206420531, | |
| "grad_norm": 188.531494140625, | |
| "learning_rate": 1.4079288625416822e-07, | |
| "logits/chosen": -6.304642200469971, | |
| "logits/rejected": -6.310070991516113, | |
| "logps/chosen": -951.8046875, | |
| "logps/rejected": -888.8732299804688, | |
| "loss": 0.623, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.1070719957351685, | |
| "rewards/margins": 0.3869238495826721, | |
| "rewards/rejected": 0.7201482057571411, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.722657154769074, | |
| "grad_norm": 198.22499084472656, | |
| "learning_rate": 1.389403482771397e-07, | |
| "logits/chosen": -6.263820648193359, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -928.9371337890625, | |
| "logps/rejected": -846.8762817382812, | |
| "loss": 0.6375, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.0606300830841064, | |
| "rewards/margins": 0.30186527967453003, | |
| "rewards/rejected": 0.7587647438049316, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.722657154769074, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.365767955780029, | |
| "eval_logps/chosen": -1140.9320068359375, | |
| "eval_logps/rejected": -1051.3402099609375, | |
| "eval_loss": 0.6660320162773132, | |
| "eval_rewards/accuracies": 0.6263644099235535, | |
| "eval_rewards/chosen": 1.341342568397522, | |
| "eval_rewards/margins": 0.2511833608150482, | |
| "eval_rewards/rejected": 1.0901591777801514, | |
| "eval_runtime": 174.295, | |
| "eval_samples_per_second": 6.833, | |
| "eval_steps_per_second": 6.833, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.7263630888960949, | |
| "grad_norm": 158.5247039794922, | |
| "learning_rate": 1.3708781030011115e-07, | |
| "logits/chosen": -6.31934118270874, | |
| "logits/rejected": -6.218142986297607, | |
| "logps/chosen": -926.21826171875, | |
| "logps/rejected": -798.2918090820312, | |
| "loss": 0.607, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.078592300415039, | |
| "rewards/margins": 0.38651174306869507, | |
| "rewards/rejected": 0.6920806169509888, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.7300690230231157, | |
| "grad_norm": 178.37115478515625, | |
| "learning_rate": 1.3523527232308263e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.076174736022949, | |
| "logps/chosen": -980.4775390625, | |
| "logps/rejected": -835.7880859375, | |
| "loss": 0.5914, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.232055902481079, | |
| "rewards/margins": 0.5219300389289856, | |
| "rewards/rejected": 0.710125744342804, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.7337749571501366, | |
| "grad_norm": 164.35330200195312, | |
| "learning_rate": 1.333827343460541e-07, | |
| "logits/chosen": -6.220945835113525, | |
| "logits/rejected": -6.0729475021362305, | |
| "logps/chosen": -925.7268676757812, | |
| "logps/rejected": -770.1295776367188, | |
| "loss": 0.6359, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.143280267715454, | |
| "rewards/margins": 0.3542799651622772, | |
| "rewards/rejected": 0.7890002131462097, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.7374808912771575, | |
| "grad_norm": 168.5455322265625, | |
| "learning_rate": 1.3153019636902556e-07, | |
| "logits/chosen": -6.159814834594727, | |
| "logits/rejected": -6.193203926086426, | |
| "logps/chosen": -896.9928588867188, | |
| "logps/rejected": -814.4002075195312, | |
| "loss": 0.5956, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.1392806768417358, | |
| "rewards/margins": 0.39641261100769043, | |
| "rewards/rejected": 0.7428680658340454, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.7411868254041785, | |
| "grad_norm": 178.9434814453125, | |
| "learning_rate": 1.2967765839199704e-07, | |
| "logits/chosen": -6.179207801818848, | |
| "logits/rejected": -6.206517219543457, | |
| "logps/chosen": -962.1814575195312, | |
| "logps/rejected": -875.90869140625, | |
| "loss": 0.5861, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.4602222442626953, | |
| "rewards/margins": 0.5008874535560608, | |
| "rewards/rejected": 0.9593348503112793, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7411868254041785, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.363770008087158, | |
| "eval_logps/chosen": -1139.4501953125, | |
| "eval_logps/rejected": -1049.95703125, | |
| "eval_loss": 0.6680687069892883, | |
| "eval_rewards/accuracies": 0.6154491901397705, | |
| "eval_rewards/chosen": 1.4895213842391968, | |
| "eval_rewards/margins": 0.2610515356063843, | |
| "eval_rewards/rejected": 1.2284698486328125, | |
| "eval_runtime": 174.3568, | |
| "eval_samples_per_second": 6.831, | |
| "eval_steps_per_second": 6.831, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.7448927595311994, | |
| "grad_norm": 236.87335205078125, | |
| "learning_rate": 1.2782512041496851e-07, | |
| "logits/chosen": -6.2562150955200195, | |
| "logits/rejected": -6.181870937347412, | |
| "logps/chosen": -1007.4918823242188, | |
| "logps/rejected": -940.9990234375, | |
| "loss": 0.64, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.4098269939422607, | |
| "rewards/margins": 0.383728563785553, | |
| "rewards/rejected": 1.0260984897613525, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7485986936582202, | |
| "grad_norm": 180.59913635253906, | |
| "learning_rate": 1.2597258243794e-07, | |
| "logits/chosen": -6.184215068817139, | |
| "logits/rejected": -6.141203880310059, | |
| "logps/chosen": -925.7356567382812, | |
| "logps/rejected": -824.5137939453125, | |
| "loss": 0.5902, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.2621865272521973, | |
| "rewards/margins": 0.42541566491127014, | |
| "rewards/rejected": 0.8367708921432495, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.7523046277852411, | |
| "grad_norm": 192.83140563964844, | |
| "learning_rate": 1.2412004446091145e-07, | |
| "logits/chosen": -6.163074970245361, | |
| "logits/rejected": -6.230958461761475, | |
| "logps/chosen": -957.0022583007812, | |
| "logps/rejected": -863.4744873046875, | |
| "loss": 0.5727, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.3720769882202148, | |
| "rewards/margins": 0.5459089279174805, | |
| "rewards/rejected": 0.8261680603027344, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.756010561912262, | |
| "grad_norm": 128.6780242919922, | |
| "learning_rate": 1.2226750648388292e-07, | |
| "logits/chosen": -6.119555473327637, | |
| "logits/rejected": -6.239079475402832, | |
| "logps/chosen": -927.7706909179688, | |
| "logps/rejected": -826.4373779296875, | |
| "loss": 0.5713, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.3195993900299072, | |
| "rewards/margins": 0.4843137264251709, | |
| "rewards/rejected": 0.8352855443954468, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.7597164960392829, | |
| "grad_norm": 244.9542694091797, | |
| "learning_rate": 1.2041496850685438e-07, | |
| "logits/chosen": -6.327083110809326, | |
| "logits/rejected": -6.312042236328125, | |
| "logps/chosen": -1045.3717041015625, | |
| "logps/rejected": -913.8675537109375, | |
| "loss": 0.5965, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.329573392868042, | |
| "rewards/margins": 0.427541583776474, | |
| "rewards/rejected": 0.9020318984985352, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7597164960392829, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.36568546295166, | |
| "eval_logps/chosen": -1140.65673828125, | |
| "eval_logps/rejected": -1051.03271484375, | |
| "eval_loss": 0.6708235740661621, | |
| "eval_rewards/accuracies": 0.6112510561943054, | |
| "eval_rewards/chosen": 1.3688610792160034, | |
| "eval_rewards/margins": 0.24796034395694733, | |
| "eval_rewards/rejected": 1.1209006309509277, | |
| "eval_runtime": 174.13, | |
| "eval_samples_per_second": 6.84, | |
| "eval_steps_per_second": 6.84, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.7634224301663038, | |
| "grad_norm": 154.33078002929688, | |
| "learning_rate": 1.1856243052982586e-07, | |
| "logits/chosen": -6.247130393981934, | |
| "logits/rejected": -6.239518642425537, | |
| "logps/chosen": -976.1388549804688, | |
| "logps/rejected": -855.1339111328125, | |
| "loss": 0.5808, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 1.2261674404144287, | |
| "rewards/margins": 0.4568137526512146, | |
| "rewards/rejected": 0.7693536877632141, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.7671283642933246, | |
| "grad_norm": 152.96670532226562, | |
| "learning_rate": 1.1670989255279732e-07, | |
| "logits/chosen": -6.12724494934082, | |
| "logits/rejected": -6.189521312713623, | |
| "logps/chosen": -933.6554565429688, | |
| "logps/rejected": -847.5482177734375, | |
| "loss": 0.6256, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.2696754932403564, | |
| "rewards/margins": 0.4418273866176605, | |
| "rewards/rejected": 0.8278481364250183, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.7708342984203456, | |
| "grad_norm": 171.41937255859375, | |
| "learning_rate": 1.148573545757688e-07, | |
| "logits/chosen": -6.24149227142334, | |
| "logits/rejected": -6.186224937438965, | |
| "logps/chosen": -952.6795654296875, | |
| "logps/rejected": -866.2142333984375, | |
| "loss": 0.5913, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.2056645154953003, | |
| "rewards/margins": 0.43794241547584534, | |
| "rewards/rejected": 0.7677222490310669, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.7745402325473665, | |
| "grad_norm": 202.81951904296875, | |
| "learning_rate": 1.1300481659874027e-07, | |
| "logits/chosen": -6.235200881958008, | |
| "logits/rejected": -6.2063446044921875, | |
| "logps/chosen": -846.9156494140625, | |
| "logps/rejected": -822.10205078125, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.606249988079071, | |
| "rewards/chosen": 1.0181329250335693, | |
| "rewards/margins": 0.2550104558467865, | |
| "rewards/rejected": 0.7631224393844604, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.7782461666743874, | |
| "grad_norm": 183.90293884277344, | |
| "learning_rate": 1.1115227862171175e-07, | |
| "logits/chosen": -6.047713279724121, | |
| "logits/rejected": -6.1482133865356445, | |
| "logps/chosen": -942.37060546875, | |
| "logps/rejected": -869.7897338867188, | |
| "loss": 0.5837, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.1601864099502563, | |
| "rewards/margins": 0.4576262831687927, | |
| "rewards/rejected": 0.7025600671768188, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7782461666743874, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.365157127380371, | |
| "eval_logps/chosen": -1140.8760986328125, | |
| "eval_logps/rejected": -1051.16064453125, | |
| "eval_loss": 0.6740830540657043, | |
| "eval_rewards/accuracies": 0.5994962453842163, | |
| "eval_rewards/chosen": 1.3469244241714478, | |
| "eval_rewards/margins": 0.23882101476192474, | |
| "eval_rewards/rejected": 1.1081035137176514, | |
| "eval_runtime": 174.0197, | |
| "eval_samples_per_second": 6.844, | |
| "eval_steps_per_second": 6.844, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.7819521008014082, | |
| "grad_norm": 165.31671142578125, | |
| "learning_rate": 1.0929974064468321e-07, | |
| "logits/chosen": -6.240053653717041, | |
| "logits/rejected": -6.137989044189453, | |
| "logps/chosen": -927.2156372070312, | |
| "logps/rejected": -787.1029052734375, | |
| "loss": 0.5752, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.1965583562850952, | |
| "rewards/margins": 0.5078359842300415, | |
| "rewards/rejected": 0.6887223124504089, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.7856580349284291, | |
| "grad_norm": 187.2274932861328, | |
| "learning_rate": 1.0744720266765468e-07, | |
| "logits/chosen": -6.173762321472168, | |
| "logits/rejected": -6.2248053550720215, | |
| "logps/chosen": -946.9639892578125, | |
| "logps/rejected": -851.115234375, | |
| "loss": 0.5905, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.1326261758804321, | |
| "rewards/margins": 0.43694519996643066, | |
| "rewards/rejected": 0.6956809163093567, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.78936396905545, | |
| "grad_norm": 203.85891723632812, | |
| "learning_rate": 1.0559466469062616e-07, | |
| "logits/chosen": -6.200101375579834, | |
| "logits/rejected": -6.217113494873047, | |
| "logps/chosen": -970.5763549804688, | |
| "logps/rejected": -895.93310546875, | |
| "loss": 0.6369, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.2285493612289429, | |
| "rewards/margins": 0.3296011984348297, | |
| "rewards/rejected": 0.8989483118057251, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.793069903182471, | |
| "grad_norm": 204.75799560546875, | |
| "learning_rate": 1.0374212671359762e-07, | |
| "logits/chosen": -6.152576446533203, | |
| "logits/rejected": -6.173853874206543, | |
| "logps/chosen": -1001.7930908203125, | |
| "logps/rejected": -783.9508056640625, | |
| "loss": 0.607, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.1725000143051147, | |
| "rewards/margins": 0.419525682926178, | |
| "rewards/rejected": 0.7529743909835815, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.7967758373094919, | |
| "grad_norm": 217.63076782226562, | |
| "learning_rate": 1.018895887365691e-07, | |
| "logits/chosen": -6.167757987976074, | |
| "logits/rejected": -6.10471248626709, | |
| "logps/chosen": -996.0086669921875, | |
| "logps/rejected": -872.7799072265625, | |
| "loss": 0.6336, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.3676767349243164, | |
| "rewards/margins": 0.4060121476650238, | |
| "rewards/rejected": 0.9616644978523254, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.7967758373094919, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.359668731689453, | |
| "eval_logps/chosen": -1139.7021484375, | |
| "eval_logps/rejected": -1050.19970703125, | |
| "eval_loss": 0.6704598665237427, | |
| "eval_rewards/accuracies": 0.6137699484825134, | |
| "eval_rewards/chosen": 1.4643239974975586, | |
| "eval_rewards/margins": 0.2601255178451538, | |
| "eval_rewards/rejected": 1.2041983604431152, | |
| "eval_runtime": 174.4958, | |
| "eval_samples_per_second": 6.825, | |
| "eval_steps_per_second": 6.825, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.8004817714365127, | |
| "grad_norm": 203.3338165283203, | |
| "learning_rate": 1.0003705075954057e-07, | |
| "logits/chosen": -6.282981872558594, | |
| "logits/rejected": -6.3550286293029785, | |
| "logps/chosen": -897.6851806640625, | |
| "logps/rejected": -796.6698608398438, | |
| "loss": 0.6354, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.2425462007522583, | |
| "rewards/margins": 0.3381389081478119, | |
| "rewards/rejected": 0.9044073224067688, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.8041877055635336, | |
| "grad_norm": 164.21310424804688, | |
| "learning_rate": 9.818451278251204e-08, | |
| "logits/chosen": -6.134262561798096, | |
| "logits/rejected": -6.197000503540039, | |
| "logps/chosen": -919.0861206054688, | |
| "logps/rejected": -864.6526489257812, | |
| "loss": 0.6287, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.2566717863082886, | |
| "rewards/margins": 0.4035015106201172, | |
| "rewards/rejected": 0.8531702160835266, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.8078936396905545, | |
| "grad_norm": 153.4619598388672, | |
| "learning_rate": 9.633197480548351e-08, | |
| "logits/chosen": -6.275097846984863, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -924.9786376953125, | |
| "logps/rejected": -775.713134765625, | |
| "loss": 0.5827, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.353480577468872, | |
| "rewards/margins": 0.5435738563537598, | |
| "rewards/rejected": 0.8099066019058228, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.8115995738175754, | |
| "grad_norm": 150.305419921875, | |
| "learning_rate": 9.447943682845498e-08, | |
| "logits/chosen": -6.12339973449707, | |
| "logits/rejected": -6.132723808288574, | |
| "logps/chosen": -996.9190673828125, | |
| "logps/rejected": -849.7203369140625, | |
| "loss": 0.6283, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 1.3306301832199097, | |
| "rewards/margins": 0.4646291732788086, | |
| "rewards/rejected": 0.8660010099411011, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.8153055079445963, | |
| "grad_norm": 137.26937866210938, | |
| "learning_rate": 9.262689885142645e-08, | |
| "logits/chosen": -6.194148063659668, | |
| "logits/rejected": -6.139514923095703, | |
| "logps/chosen": -874.9927978515625, | |
| "logps/rejected": -870.7437744140625, | |
| "loss": 0.6213, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.2261251211166382, | |
| "rewards/margins": 0.37579071521759033, | |
| "rewards/rejected": 0.8503344655036926, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8153055079445963, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.357300758361816, | |
| "eval_logps/chosen": -1138.9786376953125, | |
| "eval_logps/rejected": -1049.5445556640625, | |
| "eval_loss": 0.6698047518730164, | |
| "eval_rewards/accuracies": 0.6120907068252563, | |
| "eval_rewards/chosen": 1.5366746187210083, | |
| "eval_rewards/margins": 0.26695773005485535, | |
| "eval_rewards/rejected": 1.269716739654541, | |
| "eval_runtime": 174.7913, | |
| "eval_samples_per_second": 6.814, | |
| "eval_steps_per_second": 6.814, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.8190114420716171, | |
| "grad_norm": 195.36293029785156, | |
| "learning_rate": 9.077436087439792e-08, | |
| "logits/chosen": -6.112738609313965, | |
| "logits/rejected": -6.180370807647705, | |
| "logps/chosen": -958.05810546875, | |
| "logps/rejected": -914.5363159179688, | |
| "loss": 0.6249, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.3648970127105713, | |
| "rewards/margins": 0.38803738355636597, | |
| "rewards/rejected": 0.9768595695495605, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.822717376198638, | |
| "grad_norm": 218.26048278808594, | |
| "learning_rate": 8.89218228973694e-08, | |
| "logits/chosen": -6.269371032714844, | |
| "logits/rejected": -6.25061559677124, | |
| "logps/chosen": -948.0462036132812, | |
| "logps/rejected": -886.0133666992188, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.214468002319336, | |
| "rewards/margins": 0.23822224140167236, | |
| "rewards/rejected": 0.9762457013130188, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.826423310325659, | |
| "grad_norm": 211.05848693847656, | |
| "learning_rate": 8.706928492034086e-08, | |
| "logits/chosen": -6.225638389587402, | |
| "logits/rejected": -6.232121467590332, | |
| "logps/chosen": -908.8572998046875, | |
| "logps/rejected": -863.0309448242188, | |
| "loss": 0.6504, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.1848371028900146, | |
| "rewards/margins": 0.3589955270290375, | |
| "rewards/rejected": 0.8258415460586548, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.8301292444526799, | |
| "grad_norm": 225.31521606445312, | |
| "learning_rate": 8.521674694331234e-08, | |
| "logits/chosen": -6.275576591491699, | |
| "logits/rejected": -6.208001136779785, | |
| "logps/chosen": -863.1882934570312, | |
| "logps/rejected": -742.2914428710938, | |
| "loss": 0.5924, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.097625970840454, | |
| "rewards/margins": 0.4352286756038666, | |
| "rewards/rejected": 0.6623972058296204, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.8338351785797008, | |
| "grad_norm": 229.04183959960938, | |
| "learning_rate": 8.336420896628381e-08, | |
| "logits/chosen": -6.226241111755371, | |
| "logits/rejected": -6.138689994812012, | |
| "logps/chosen": -1008.5877685546875, | |
| "logps/rejected": -819.7078247070312, | |
| "loss": 0.6272, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.2099117040634155, | |
| "rewards/margins": 0.3881527781486511, | |
| "rewards/rejected": 0.8217589259147644, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8338351785797008, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.360867500305176, | |
| "eval_logps/chosen": -1140.3447265625, | |
| "eval_logps/rejected": -1050.7974853515625, | |
| "eval_loss": 0.6691888570785522, | |
| "eval_rewards/accuracies": 0.6146095991134644, | |
| "eval_rewards/chosen": 1.4000587463378906, | |
| "eval_rewards/margins": 0.2556445896625519, | |
| "eval_rewards/rejected": 1.1444141864776611, | |
| "eval_runtime": 174.9273, | |
| "eval_samples_per_second": 6.809, | |
| "eval_steps_per_second": 6.809, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.8375411127067216, | |
| "grad_norm": 179.97850036621094, | |
| "learning_rate": 8.151167098925527e-08, | |
| "logits/chosen": -6.222306251525879, | |
| "logits/rejected": -6.101675510406494, | |
| "logps/chosen": -929.2380981445312, | |
| "logps/rejected": -836.9358520507812, | |
| "loss": 0.5694, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.3586682081222534, | |
| "rewards/margins": 0.4944392740726471, | |
| "rewards/rejected": 0.8642290234565735, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.8412470468337425, | |
| "grad_norm": 187.8804473876953, | |
| "learning_rate": 7.965913301222675e-08, | |
| "logits/chosen": -6.19569730758667, | |
| "logits/rejected": -6.270641326904297, | |
| "logps/chosen": -982.6009521484375, | |
| "logps/rejected": -859.2730712890625, | |
| "loss": 0.6045, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.1823841333389282, | |
| "rewards/margins": 0.3857570290565491, | |
| "rewards/rejected": 0.7966271638870239, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.8449529809607634, | |
| "grad_norm": 250.08534240722656, | |
| "learning_rate": 7.780659503519822e-08, | |
| "logits/chosen": -6.17086935043335, | |
| "logits/rejected": -6.222559452056885, | |
| "logps/chosen": -1034.419677734375, | |
| "logps/rejected": -955.6583251953125, | |
| "loss": 0.659, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 1.3033745288848877, | |
| "rewards/margins": 0.39839601516723633, | |
| "rewards/rejected": 0.9049783945083618, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.8486589150877843, | |
| "grad_norm": 148.72601318359375, | |
| "learning_rate": 7.59540570581697e-08, | |
| "logits/chosen": -6.179747581481934, | |
| "logits/rejected": -6.1192827224731445, | |
| "logps/chosen": -986.9896240234375, | |
| "logps/rejected": -826.3615112304688, | |
| "loss": 0.5721, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 1.323201298713684, | |
| "rewards/margins": 0.5028241276741028, | |
| "rewards/rejected": 0.8203772306442261, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.8523648492148052, | |
| "grad_norm": 178.256591796875, | |
| "learning_rate": 7.410151908114116e-08, | |
| "logits/chosen": -6.32712459564209, | |
| "logits/rejected": -6.3251752853393555, | |
| "logps/chosen": -896.9401245117188, | |
| "logps/rejected": -792.0689697265625, | |
| "loss": 0.6251, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.158760905265808, | |
| "rewards/margins": 0.38926878571510315, | |
| "rewards/rejected": 0.7694920897483826, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8523648492148052, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.361525535583496, | |
| "eval_logps/chosen": -1140.1575927734375, | |
| "eval_logps/rejected": -1050.596923828125, | |
| "eval_loss": 0.667841911315918, | |
| "eval_rewards/accuracies": 0.6095718145370483, | |
| "eval_rewards/chosen": 1.418774962425232, | |
| "eval_rewards/margins": 0.25429922342300415, | |
| "eval_rewards/rejected": 1.164475917816162, | |
| "eval_runtime": 174.9606, | |
| "eval_samples_per_second": 6.807, | |
| "eval_steps_per_second": 6.807, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.8560707833418261, | |
| "grad_norm": 289.4090270996094, | |
| "learning_rate": 7.224898110411264e-08, | |
| "logits/chosen": -6.040954113006592, | |
| "logits/rejected": -6.1116743087768555, | |
| "logps/chosen": -868.0579833984375, | |
| "logps/rejected": -742.7337646484375, | |
| "loss": 0.5972, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.136728286743164, | |
| "rewards/margins": 0.48759302496910095, | |
| "rewards/rejected": 0.6491352915763855, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.859776717468847, | |
| "grad_norm": 209.7076416015625, | |
| "learning_rate": 7.039644312708411e-08, | |
| "logits/chosen": -6.146378993988037, | |
| "logits/rejected": -6.205878257751465, | |
| "logps/chosen": -955.7279052734375, | |
| "logps/rejected": -776.606201171875, | |
| "loss": 0.6427, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.3216975927352905, | |
| "rewards/margins": 0.4136362671852112, | |
| "rewards/rejected": 0.9080612063407898, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.8634826515958679, | |
| "grad_norm": 157.4209442138672, | |
| "learning_rate": 6.854390515005557e-08, | |
| "logits/chosen": -6.1600141525268555, | |
| "logits/rejected": -6.090916633605957, | |
| "logps/chosen": -1095.3663330078125, | |
| "logps/rejected": -915.3992919921875, | |
| "loss": 0.5802, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.4250476360321045, | |
| "rewards/margins": 0.5543738007545471, | |
| "rewards/rejected": 0.8706739544868469, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.8671885857228888, | |
| "grad_norm": 202.49034118652344, | |
| "learning_rate": 6.669136717302705e-08, | |
| "logits/chosen": -6.110904216766357, | |
| "logits/rejected": -6.1261210441589355, | |
| "logps/chosen": -967.0671997070312, | |
| "logps/rejected": -858.1019287109375, | |
| "loss": 0.6087, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.3673769235610962, | |
| "rewards/margins": 0.5143194198608398, | |
| "rewards/rejected": 0.8530575037002563, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.8708945198499096, | |
| "grad_norm": 103.83200073242188, | |
| "learning_rate": 6.483882919599852e-08, | |
| "logits/chosen": -6.204503059387207, | |
| "logits/rejected": -6.187712669372559, | |
| "logps/chosen": -872.6140747070312, | |
| "logps/rejected": -760.0308227539062, | |
| "loss": 0.6168, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.1666477918624878, | |
| "rewards/margins": 0.4573976993560791, | |
| "rewards/rejected": 0.7092500925064087, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8708945198499096, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.359028339385986, | |
| "eval_logps/chosen": -1138.91455078125, | |
| "eval_logps/rejected": -1049.516845703125, | |
| "eval_loss": 0.668138325214386, | |
| "eval_rewards/accuracies": 0.6221662759780884, | |
| "eval_rewards/chosen": 1.543074131011963, | |
| "eval_rewards/margins": 0.2705841064453125, | |
| "eval_rewards/rejected": 1.2724900245666504, | |
| "eval_runtime": 174.7239, | |
| "eval_samples_per_second": 6.816, | |
| "eval_steps_per_second": 6.816, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.8746004539769305, | |
| "grad_norm": 226.57864379882812, | |
| "learning_rate": 6.298629121897e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.086965084075928, | |
| "logps/chosen": -982.00390625, | |
| "logps/rejected": -850.6180419921875, | |
| "loss": 0.6142, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.2618882656097412, | |
| "rewards/margins": 0.42980679869651794, | |
| "rewards/rejected": 0.8320814967155457, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.8783063881039515, | |
| "grad_norm": 250.1316680908203, | |
| "learning_rate": 6.113375324194146e-08, | |
| "logits/chosen": -6.260158061981201, | |
| "logits/rejected": -6.332103252410889, | |
| "logps/chosen": -921.3849487304688, | |
| "logps/rejected": -801.4791259765625, | |
| "loss": 0.6029, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.3854167461395264, | |
| "rewards/margins": 0.49748069047927856, | |
| "rewards/rejected": 0.8879362344741821, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.8820123222309724, | |
| "grad_norm": 257.1830749511719, | |
| "learning_rate": 5.928121526491293e-08, | |
| "logits/chosen": -6.317752361297607, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -978.55126953125, | |
| "logps/rejected": -801.6576538085938, | |
| "loss": 0.5736, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 1.3101433515548706, | |
| "rewards/margins": 0.5170674324035645, | |
| "rewards/rejected": 0.7930759787559509, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.8857182563579933, | |
| "grad_norm": 174.4718475341797, | |
| "learning_rate": 5.74286772878844e-08, | |
| "logits/chosen": -6.182769298553467, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -841.9371948242188, | |
| "logps/rejected": -779.7135009765625, | |
| "loss": 0.6186, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.247337818145752, | |
| "rewards/margins": 0.39551234245300293, | |
| "rewards/rejected": 0.8518252372741699, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.8894241904850141, | |
| "grad_norm": 150.2713165283203, | |
| "learning_rate": 5.557613931085587e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.047909736633301, | |
| "logps/chosen": -949.482421875, | |
| "logps/rejected": -869.8361206054688, | |
| "loss": 0.6183, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": 1.4006798267364502, | |
| "rewards/margins": 0.3790472149848938, | |
| "rewards/rejected": 1.0216325521469116, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.8894241904850141, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.359555244445801, | |
| "eval_logps/chosen": -1138.1962890625, | |
| "eval_logps/rejected": -1048.9058837890625, | |
| "eval_loss": 0.6646097898483276, | |
| "eval_rewards/accuracies": 0.6221662759780884, | |
| "eval_rewards/chosen": 1.6149109601974487, | |
| "eval_rewards/margins": 0.2813268303871155, | |
| "eval_rewards/rejected": 1.333584189414978, | |
| "eval_runtime": 174.9379, | |
| "eval_samples_per_second": 6.808, | |
| "eval_steps_per_second": 6.808, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.893130124612035, | |
| "grad_norm": 142.9062042236328, | |
| "learning_rate": 5.372360133382734e-08, | |
| "logits/chosen": -6.116464138031006, | |
| "logits/rejected": -6.133796691894531, | |
| "logps/chosen": -922.8450317382812, | |
| "logps/rejected": -898.6383666992188, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.3375623226165771, | |
| "rewards/margins": 0.38691186904907227, | |
| "rewards/rejected": 0.9506505131721497, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.8968360587390559, | |
| "grad_norm": 194.79432678222656, | |
| "learning_rate": 5.187106335679881e-08, | |
| "logits/chosen": -6.197569370269775, | |
| "logits/rejected": -6.240142345428467, | |
| "logps/chosen": -906.27001953125, | |
| "logps/rejected": -837.5906372070312, | |
| "loss": 0.6182, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.3941477537155151, | |
| "rewards/margins": 0.47745227813720703, | |
| "rewards/rejected": 0.9166954159736633, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.9005419928660768, | |
| "grad_norm": 176.5300750732422, | |
| "learning_rate": 5.001852537977028e-08, | |
| "logits/chosen": -6.1893310546875, | |
| "logits/rejected": -6.254434585571289, | |
| "logps/chosen": -949.20654296875, | |
| "logps/rejected": -864.1427001953125, | |
| "loss": 0.6234, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": 1.4380910396575928, | |
| "rewards/margins": 0.3952023983001709, | |
| "rewards/rejected": 1.0428886413574219, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.9042479269930978, | |
| "grad_norm": 204.15846252441406, | |
| "learning_rate": 4.8165987402741755e-08, | |
| "logits/chosen": -6.22286319732666, | |
| "logits/rejected": -6.251989841461182, | |
| "logps/chosen": -1061.7998046875, | |
| "logps/rejected": -907.6027221679688, | |
| "loss": 0.6386, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.4448530673980713, | |
| "rewards/margins": 0.4636596739292145, | |
| "rewards/rejected": 0.9811934232711792, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.9079538611201186, | |
| "grad_norm": 172.95408630371094, | |
| "learning_rate": 4.631344942571323e-08, | |
| "logits/chosen": -6.1700005531311035, | |
| "logits/rejected": -6.175479412078857, | |
| "logps/chosen": -931.7879028320312, | |
| "logps/rejected": -793.9867553710938, | |
| "loss": 0.5927, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 1.3887077569961548, | |
| "rewards/margins": 0.46491020917892456, | |
| "rewards/rejected": 0.9237974882125854, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9079538611201186, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.360927581787109, | |
| "eval_logps/chosen": -1138.57568359375, | |
| "eval_logps/rejected": -1049.185302734375, | |
| "eval_loss": 0.6698666214942932, | |
| "eval_rewards/accuracies": 0.6179680824279785, | |
| "eval_rewards/chosen": 1.576967477798462, | |
| "eval_rewards/margins": 0.2713308334350586, | |
| "eval_rewards/rejected": 1.3056366443634033, | |
| "eval_runtime": 174.8797, | |
| "eval_samples_per_second": 6.81, | |
| "eval_steps_per_second": 6.81, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.9116597952471395, | |
| "grad_norm": 199.45687866210938, | |
| "learning_rate": 4.44609114486847e-08, | |
| "logits/chosen": -6.102917194366455, | |
| "logits/rejected": -6.159636974334717, | |
| "logps/chosen": -846.6583251953125, | |
| "logps/rejected": -778.142333984375, | |
| "loss": 0.6016, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": 1.2215101718902588, | |
| "rewards/margins": 0.4805546700954437, | |
| "rewards/rejected": 0.7409554719924927, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.9153657293741604, | |
| "grad_norm": 148.47500610351562, | |
| "learning_rate": 4.260837347165617e-08, | |
| "logits/chosen": -6.2448930740356445, | |
| "logits/rejected": -6.158895015716553, | |
| "logps/chosen": -1002.0369262695312, | |
| "logps/rejected": -890.6515502929688, | |
| "loss": 0.5622, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": 1.374596357345581, | |
| "rewards/margins": 0.5444284081459045, | |
| "rewards/rejected": 0.8301678895950317, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.9190716635011813, | |
| "grad_norm": 186.5249481201172, | |
| "learning_rate": 4.075583549462764e-08, | |
| "logits/chosen": -6.199591159820557, | |
| "logits/rejected": -6.20455265045166, | |
| "logps/chosen": -952.9064331054688, | |
| "logps/rejected": -825.30859375, | |
| "loss": 0.6153, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.4213062524795532, | |
| "rewards/margins": 0.4216051697731018, | |
| "rewards/rejected": 0.9997010231018066, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.9227775976282021, | |
| "grad_norm": 175.28465270996094, | |
| "learning_rate": 3.890329751759911e-08, | |
| "logits/chosen": -6.196984767913818, | |
| "logits/rejected": -6.227551460266113, | |
| "logps/chosen": -967.3463745117188, | |
| "logps/rejected": -785.5040283203125, | |
| "loss": 0.5873, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.3924157619476318, | |
| "rewards/margins": 0.5140901803970337, | |
| "rewards/rejected": 0.8783254623413086, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.926483531755223, | |
| "grad_norm": 205.52532958984375, | |
| "learning_rate": 3.705075954057058e-08, | |
| "logits/chosen": -6.220477104187012, | |
| "logits/rejected": -6.22896146774292, | |
| "logps/chosen": -926.861328125, | |
| "logps/rejected": -857.521484375, | |
| "loss": 0.6039, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.3103541135787964, | |
| "rewards/margins": 0.4116063117980957, | |
| "rewards/rejected": 0.8987478017807007, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.926483531755223, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.359811305999756, | |
| "eval_logps/chosen": -1138.34375, | |
| "eval_logps/rejected": -1049.01904296875, | |
| "eval_loss": 0.6664721965789795, | |
| "eval_rewards/accuracies": 0.6263644099235535, | |
| "eval_rewards/chosen": 1.6001617908477783, | |
| "eval_rewards/margins": 0.2778994143009186, | |
| "eval_rewards/rejected": 1.3222622871398926, | |
| "eval_runtime": 175.0068, | |
| "eval_samples_per_second": 6.805, | |
| "eval_steps_per_second": 6.805, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.9301894658822439, | |
| "grad_norm": 126.9505615234375, | |
| "learning_rate": 3.5198221563542054e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -6.187649726867676, | |
| "logps/chosen": -1037.003173828125, | |
| "logps/rejected": -877.7476806640625, | |
| "loss": 0.5535, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": 1.5431692600250244, | |
| "rewards/margins": 0.6853631734848022, | |
| "rewards/rejected": 0.8578060865402222, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.9338954000092649, | |
| "grad_norm": 217.86968994140625, | |
| "learning_rate": 3.3345683586513526e-08, | |
| "logits/chosen": -6.283205032348633, | |
| "logits/rejected": -6.221907615661621, | |
| "logps/chosen": -924.9992065429688, | |
| "logps/rejected": -855.3511962890625, | |
| "loss": 0.6438, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.3765745162963867, | |
| "rewards/margins": 0.3500059247016907, | |
| "rewards/rejected": 1.0265684127807617, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.9376013341362858, | |
| "grad_norm": 170.4639434814453, | |
| "learning_rate": 3.1493145609485e-08, | |
| "logits/chosen": -6.257707595825195, | |
| "logits/rejected": -6.27487850189209, | |
| "logps/chosen": -903.9068603515625, | |
| "logps/rejected": -843.6154174804688, | |
| "loss": 0.6395, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.2748887538909912, | |
| "rewards/margins": 0.3705710768699646, | |
| "rewards/rejected": 0.9043177366256714, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.9413072682633066, | |
| "grad_norm": 150.2277069091797, | |
| "learning_rate": 2.9640607632456464e-08, | |
| "logits/chosen": -6.218627452850342, | |
| "logits/rejected": -6.296253204345703, | |
| "logps/chosen": -1027.5751953125, | |
| "logps/rejected": -868.8278198242188, | |
| "loss": 0.6068, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 1.4749407768249512, | |
| "rewards/margins": 0.5608575940132141, | |
| "rewards/rejected": 0.9140831232070923, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.9450132023903275, | |
| "grad_norm": 176.552490234375, | |
| "learning_rate": 2.7788069655427936e-08, | |
| "logits/chosen": -6.115626335144043, | |
| "logits/rejected": -6.002920150756836, | |
| "logps/chosen": -835.7297973632812, | |
| "logps/rejected": -763.3617553710938, | |
| "loss": 0.6288, | |
| "rewards/accuracies": 0.6312500238418579, | |
| "rewards/chosen": 1.1424133777618408, | |
| "rewards/margins": 0.3475190997123718, | |
| "rewards/rejected": 0.7948943376541138, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9450132023903275, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.3612494468688965, | |
| "eval_logps/chosen": -1138.4256591796875, | |
| "eval_logps/rejected": -1049.066162109375, | |
| "eval_loss": 0.6675523519515991, | |
| "eval_rewards/accuracies": 0.6146095991134644, | |
| "eval_rewards/chosen": 1.5919551849365234, | |
| "eval_rewards/margins": 0.2744098901748657, | |
| "eval_rewards/rejected": 1.3175454139709473, | |
| "eval_runtime": 174.7172, | |
| "eval_samples_per_second": 6.817, | |
| "eval_steps_per_second": 6.817, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.9487191365173484, | |
| "grad_norm": 168.2140350341797, | |
| "learning_rate": 2.5935531678399405e-08, | |
| "logits/chosen": -6.1927971839904785, | |
| "logits/rejected": -6.197493553161621, | |
| "logps/chosen": -991.3941650390625, | |
| "logps/rejected": -906.1048583984375, | |
| "loss": 0.6289, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 1.511885404586792, | |
| "rewards/margins": 0.4927326738834381, | |
| "rewards/rejected": 1.0191527605056763, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.9524250706443693, | |
| "grad_norm": 171.1510009765625, | |
| "learning_rate": 2.4082993701370877e-08, | |
| "logits/chosen": -6.209620475769043, | |
| "logits/rejected": -6.170471668243408, | |
| "logps/chosen": -925.6842041015625, | |
| "logps/rejected": -813.4041137695312, | |
| "loss": 0.6095, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.367976188659668, | |
| "rewards/margins": 0.4230473041534424, | |
| "rewards/rejected": 0.9449288249015808, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.9561310047713902, | |
| "grad_norm": 194.6319580078125, | |
| "learning_rate": 2.223045572434235e-08, | |
| "logits/chosen": -6.165289878845215, | |
| "logits/rejected": -6.122786521911621, | |
| "logps/chosen": -965.0526123046875, | |
| "logps/rejected": -807.0406494140625, | |
| "loss": 0.5615, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": 1.4062001705169678, | |
| "rewards/margins": 0.6220154166221619, | |
| "rewards/rejected": 0.7841848134994507, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.959836938898411, | |
| "grad_norm": 187.33181762695312, | |
| "learning_rate": 2.037791774731382e-08, | |
| "logits/chosen": -6.2167792320251465, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -980.70263671875, | |
| "logps/rejected": -831.4769287109375, | |
| "loss": 0.568, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.3932929039001465, | |
| "rewards/margins": 0.5573422908782959, | |
| "rewards/rejected": 0.8359505534172058, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.963542873025432, | |
| "grad_norm": 142.72923278808594, | |
| "learning_rate": 1.852537977028529e-08, | |
| "logits/chosen": -6.158575534820557, | |
| "logits/rejected": -6.203221321105957, | |
| "logps/chosen": -921.6607666015625, | |
| "logps/rejected": -842.1689453125, | |
| "loss": 0.5714, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.325329065322876, | |
| "rewards/margins": 0.510645866394043, | |
| "rewards/rejected": 0.8146833181381226, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.963542873025432, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.361992835998535, | |
| "eval_logps/chosen": -1138.678955078125, | |
| "eval_logps/rejected": -1049.255615234375, | |
| "eval_loss": 0.6706271171569824, | |
| "eval_rewards/accuracies": 0.6120907068252563, | |
| "eval_rewards/chosen": 1.566640853881836, | |
| "eval_rewards/margins": 0.26803797483444214, | |
| "eval_rewards/rejected": 1.2986030578613281, | |
| "eval_runtime": 174.9793, | |
| "eval_samples_per_second": 6.807, | |
| "eval_steps_per_second": 6.807, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.9672488071524529, | |
| "grad_norm": 190.34542846679688, | |
| "learning_rate": 1.6672841793256763e-08, | |
| "logits/chosen": -6.201694488525391, | |
| "logits/rejected": -6.2331223487854, | |
| "logps/chosen": -881.90576171875, | |
| "logps/rejected": -822.5655517578125, | |
| "loss": 0.6516, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 1.2538020610809326, | |
| "rewards/margins": 0.3467678427696228, | |
| "rewards/rejected": 0.9070342183113098, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.9709547412794738, | |
| "grad_norm": 273.5181884765625, | |
| "learning_rate": 1.4820303816228232e-08, | |
| "logits/chosen": -6.193233489990234, | |
| "logits/rejected": -6.104687690734863, | |
| "logps/chosen": -866.4398193359375, | |
| "logps/rejected": -785.888916015625, | |
| "loss": 0.6333, | |
| "rewards/accuracies": 0.643750011920929, | |
| "rewards/chosen": 1.187961220741272, | |
| "rewards/margins": 0.3987189829349518, | |
| "rewards/rejected": 0.7892423868179321, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.9746606754064947, | |
| "grad_norm": 230.2846221923828, | |
| "learning_rate": 1.2967765839199703e-08, | |
| "logits/chosen": -6.259491920471191, | |
| "logits/rejected": -6.279690265655518, | |
| "logps/chosen": -791.6710815429688, | |
| "logps/rejected": -694.3341674804688, | |
| "loss": 0.6119, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": 1.2341381311416626, | |
| "rewards/margins": 0.4778687059879303, | |
| "rewards/rejected": 0.7562695741653442, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.9783666095335155, | |
| "grad_norm": 219.6723175048828, | |
| "learning_rate": 1.1115227862171175e-08, | |
| "logits/chosen": -6.238982677459717, | |
| "logits/rejected": -6.274487495422363, | |
| "logps/chosen": -911.5924072265625, | |
| "logps/rejected": -826.4364013671875, | |
| "loss": 0.6341, | |
| "rewards/accuracies": 0.6937500238418579, | |
| "rewards/chosen": 1.3268905878067017, | |
| "rewards/margins": 0.3773989975452423, | |
| "rewards/rejected": 0.9494916200637817, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.9820725436605364, | |
| "grad_norm": 228.98863220214844, | |
| "learning_rate": 9.262689885142645e-09, | |
| "logits/chosen": -6.187376976013184, | |
| "logits/rejected": -6.207940578460693, | |
| "logps/chosen": -867.3173828125, | |
| "logps/rejected": -846.06298828125, | |
| "loss": 0.6064, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": 1.1946938037872314, | |
| "rewards/margins": 0.3911550045013428, | |
| "rewards/rejected": 0.8035389184951782, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9820725436605364, | |
| "eval_logits/chosen": NaN, | |
| "eval_logits/rejected": -6.362624645233154, | |
| "eval_logps/chosen": -1138.8880615234375, | |
| "eval_logps/rejected": -1049.453125, | |
| "eval_loss": 0.6678369641304016, | |
| "eval_rewards/accuracies": 0.6246851682662964, | |
| "eval_rewards/chosen": 1.5457268953323364, | |
| "eval_rewards/margins": 0.2668676972389221, | |
| "eval_rewards/rejected": 1.2788591384887695, | |
| "eval_runtime": 174.9484, | |
| "eval_samples_per_second": 6.808, | |
| "eval_steps_per_second": 6.808, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.9857784777875573, | |
| "grad_norm": 147.26541137695312, | |
| "learning_rate": 7.410151908114116e-09, | |
| "logits/chosen": -6.185873508453369, | |
| "logits/rejected": -6.177300930023193, | |
| "logps/chosen": -981.9752197265625, | |
| "logps/rejected": -823.0641479492188, | |
| "loss": 0.557, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": 1.3892064094543457, | |
| "rewards/margins": 0.5826338529586792, | |
| "rewards/rejected": 0.8065725564956665, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.9894844119145783, | |
| "grad_norm": 143.0415802001953, | |
| "learning_rate": 5.5576139310855874e-09, | |
| "logits/chosen": -6.007561683654785, | |
| "logits/rejected": -6.002453327178955, | |
| "logps/chosen": -852.1906127929688, | |
| "logps/rejected": -811.2535400390625, | |
| "loss": 0.5599, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": 1.2815325260162354, | |
| "rewards/margins": 0.49190282821655273, | |
| "rewards/rejected": 0.7896297574043274, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.9931903460415991, | |
| "grad_norm": 210.65750122070312, | |
| "learning_rate": 3.705075954057058e-09, | |
| "logits/chosen": -6.069428443908691, | |
| "logits/rejected": -6.068325996398926, | |
| "logps/chosen": -1039.2158203125, | |
| "logps/rejected": -915.1730346679688, | |
| "loss": 0.5495, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": 1.5940990447998047, | |
| "rewards/margins": 0.5972550511360168, | |
| "rewards/rejected": 0.9968441128730774, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.99689628016862, | |
| "grad_norm": 211.33241271972656, | |
| "learning_rate": 1.852537977028529e-09, | |
| "logits/chosen": -6.273778438568115, | |
| "logits/rejected": -6.336636543273926, | |
| "logps/chosen": -950.8792724609375, | |
| "logps/rejected": -863.4602661132812, | |
| "loss": 0.6386, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 1.4326883554458618, | |
| "rewards/margins": 0.3855132758617401, | |
| "rewards/rejected": 1.0471750497817993, | |
| "step": 2690 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2699, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |