| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.6058631921824107, | |
| "eval_steps": 50, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013029315960912053, | |
| "grad_norm": 41.75, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": 0.4338657259941101, | |
| "logits/rejected": 0.4453325867652893, | |
| "logps/chosen": -67.76948547363281, | |
| "logps/rejected": -152.9691162109375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.026058631921824105, | |
| "grad_norm": 36.25, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "logits/chosen": 0.3402215540409088, | |
| "logits/rejected": 0.3878844380378723, | |
| "logps/chosen": -98.9161148071289, | |
| "logps/rejected": -155.82638549804688, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.03908794788273615, | |
| "grad_norm": 64.5, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": 0.38514813780784607, | |
| "logits/rejected": 0.36703822016716003, | |
| "logps/chosen": -93.1368408203125, | |
| "logps/rejected": -161.52493286132812, | |
| "loss": 0.6983, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.0311676524579525, | |
| "rewards/margins": -0.002570953220129013, | |
| "rewards/rejected": -0.028596699237823486, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.05211726384364821, | |
| "grad_norm": 29.875, | |
| "learning_rate": 6.666666666666667e-06, | |
| "logits/chosen": 0.4961632192134857, | |
| "logits/rejected": 0.49073392152786255, | |
| "logps/chosen": -94.36677551269531, | |
| "logps/rejected": -176.82952880859375, | |
| "loss": 0.7247, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -0.008490505628287792, | |
| "rewards/margins": -0.055457405745983124, | |
| "rewards/rejected": 0.04696689918637276, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.06514657980456026, | |
| "grad_norm": 27.5, | |
| "learning_rate": 8.333333333333334e-06, | |
| "logits/chosen": 0.3893408179283142, | |
| "logits/rejected": 0.41501885652542114, | |
| "logps/chosen": -91.56944274902344, | |
| "logps/rejected": -141.12969970703125, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.02342848852276802, | |
| "rewards/margins": 0.03252270072698593, | |
| "rewards/rejected": -0.009094213135540485, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0781758957654723, | |
| "grad_norm": 28.375, | |
| "learning_rate": 1e-05, | |
| "logits/chosen": 0.4950886070728302, | |
| "logits/rejected": 0.5048765540122986, | |
| "logps/chosen": -79.60177612304688, | |
| "logps/rejected": -174.52386474609375, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.0013483259826898575, | |
| "rewards/margins": 0.008842225186526775, | |
| "rewards/rejected": -0.007493901532143354, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.09120521172638436, | |
| "grad_norm": 44.25, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "logits/chosen": 0.3866894543170929, | |
| "logits/rejected": 0.4369007349014282, | |
| "logps/chosen": -73.19027709960938, | |
| "logps/rejected": -144.08810424804688, | |
| "loss": 0.7116, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.015920385718345642, | |
| "rewards/margins": -0.026944227516651154, | |
| "rewards/rejected": 0.011023844592273235, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.10423452768729642, | |
| "grad_norm": 63.75, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "logits/chosen": 0.45976200699806213, | |
| "logits/rejected": 0.426272988319397, | |
| "logps/chosen": -71.57977294921875, | |
| "logps/rejected": -137.3433074951172, | |
| "loss": 0.6707, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.01595836505293846, | |
| "rewards/margins": 0.04949212074279785, | |
| "rewards/rejected": -0.03353375196456909, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.11726384364820847, | |
| "grad_norm": 54.75, | |
| "learning_rate": 1.5e-05, | |
| "logits/chosen": 0.49033746123313904, | |
| "logits/rejected": 0.48075181245803833, | |
| "logps/chosen": -91.1353759765625, | |
| "logps/rejected": -167.73594665527344, | |
| "loss": 0.6547, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.04745086282491684, | |
| "rewards/margins": 0.08511507511138916, | |
| "rewards/rejected": -0.03766421973705292, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.13029315960912052, | |
| "grad_norm": 33.5, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "logits/chosen": 0.5154792070388794, | |
| "logits/rejected": 0.4838900566101074, | |
| "logps/chosen": -96.14872741699219, | |
| "logps/rejected": -157.02932739257812, | |
| "loss": 0.6958, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.024153033271431923, | |
| "rewards/margins": 0.006197445094585419, | |
| "rewards/rejected": 0.017955590039491653, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.14332247557003258, | |
| "grad_norm": 35.0, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "logits/chosen": 0.45827457308769226, | |
| "logits/rejected": 0.5124724507331848, | |
| "logps/chosen": -93.97823333740234, | |
| "logps/rejected": -138.24327087402344, | |
| "loss": 0.699, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.01763225719332695, | |
| "rewards/margins": 0.0005271416157484055, | |
| "rewards/rejected": -0.018159402534365654, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.1563517915309446, | |
| "grad_norm": 34.5, | |
| "learning_rate": 2e-05, | |
| "logits/chosen": 0.4826943874359131, | |
| "logits/rejected": 0.43963971734046936, | |
| "logps/chosen": -98.74089050292969, | |
| "logps/rejected": -145.690185546875, | |
| "loss": 0.7101, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.030411405488848686, | |
| "rewards/margins": -0.028361458331346512, | |
| "rewards/rejected": -0.0020499457605183125, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.16938110749185667, | |
| "grad_norm": 33.25, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "logits/chosen": 0.384093314409256, | |
| "logits/rejected": 0.4154108166694641, | |
| "logps/chosen": -110.437744140625, | |
| "logps/rejected": -170.55215454101562, | |
| "loss": 0.7018, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.000756765715777874, | |
| "rewards/margins": -0.005527975037693977, | |
| "rewards/rejected": 0.004771207459270954, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.18241042345276873, | |
| "grad_norm": 32.0, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "logits/chosen": 0.3536284565925598, | |
| "logits/rejected": 0.4306492209434509, | |
| "logps/chosen": -87.72677612304688, | |
| "logps/rejected": -135.49493408203125, | |
| "loss": 0.7118, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.030064944177865982, | |
| "rewards/margins": -0.03109516017138958, | |
| "rewards/rejected": 0.001030217856168747, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.19543973941368079, | |
| "grad_norm": 32.5, | |
| "learning_rate": 2.5e-05, | |
| "logits/chosen": 0.4092313051223755, | |
| "logits/rejected": 0.5090660452842712, | |
| "logps/chosen": -95.63008117675781, | |
| "logps/rejected": -135.93472290039062, | |
| "loss": 0.6946, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.0016081184148788452, | |
| "rewards/margins": 0.002313855104148388, | |
| "rewards/rejected": -0.003921976778656244, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.20846905537459284, | |
| "grad_norm": 33.25, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "logits/chosen": 0.4373230040073395, | |
| "logits/rejected": 0.5158215761184692, | |
| "logps/chosen": -115.45347595214844, | |
| "logps/rejected": -160.17929077148438, | |
| "loss": 0.6503, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.015219582244753838, | |
| "rewards/margins": 0.10696868598461151, | |
| "rewards/rejected": -0.09174911677837372, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.22149837133550487, | |
| "grad_norm": 31.25, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "logits/chosen": 0.5184516906738281, | |
| "logits/rejected": 0.5677393674850464, | |
| "logps/chosen": -128.66629028320312, | |
| "logps/rejected": -172.19888305664062, | |
| "loss": 0.635, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.0017459085211157799, | |
| "rewards/margins": 0.12914448976516724, | |
| "rewards/rejected": -0.1308903992176056, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.23452768729641693, | |
| "grad_norm": 60.5, | |
| "learning_rate": 3e-05, | |
| "logits/chosen": 0.43745332956314087, | |
| "logits/rejected": 0.4682745337486267, | |
| "logps/chosen": -108.17106628417969, | |
| "logps/rejected": -155.61282348632812, | |
| "loss": 0.6391, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.010563232935965061, | |
| "rewards/margins": 0.1288895308971405, | |
| "rewards/rejected": -0.13945278525352478, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.247557003257329, | |
| "grad_norm": 28.375, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "logits/chosen": 0.4536093473434448, | |
| "logits/rejected": 0.4597874581813812, | |
| "logps/chosen": -80.29083251953125, | |
| "logps/rejected": -146.64483642578125, | |
| "loss": 0.6456, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.004713800735771656, | |
| "rewards/margins": 0.10411291569471359, | |
| "rewards/rejected": -0.10882672667503357, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.26058631921824105, | |
| "grad_norm": 50.0, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "logits/chosen": 0.46047478914260864, | |
| "logits/rejected": 0.5494062304496765, | |
| "logps/chosen": -103.00077056884766, | |
| "logps/rejected": -168.70933532714844, | |
| "loss": 0.6503, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.048282139003276825, | |
| "rewards/margins": 0.09549374878406525, | |
| "rewards/rejected": -0.14377588033676147, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2736156351791531, | |
| "grad_norm": 31.0, | |
| "learning_rate": 3.5e-05, | |
| "logits/chosen": 0.5022985935211182, | |
| "logits/rejected": 0.5251904726028442, | |
| "logps/chosen": -82.43826293945312, | |
| "logps/rejected": -148.17120361328125, | |
| "loss": 0.6247, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.02719825878739357, | |
| "rewards/margins": 0.15447314083576202, | |
| "rewards/rejected": -0.12727488577365875, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.28664495114006516, | |
| "grad_norm": 30.75, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "logits/chosen": 0.4817676544189453, | |
| "logits/rejected": 0.4860598146915436, | |
| "logps/chosen": -101.01628875732422, | |
| "logps/rejected": -146.12977600097656, | |
| "loss": 0.622, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.027572251856327057, | |
| "rewards/margins": 0.15721869468688965, | |
| "rewards/rejected": -0.1847909688949585, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.2996742671009772, | |
| "grad_norm": 21.75, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "logits/chosen": 0.48463064432144165, | |
| "logits/rejected": 0.5631467700004578, | |
| "logps/chosen": -81.53482055664062, | |
| "logps/rejected": -135.9483184814453, | |
| "loss": 0.5766, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.021168498322367668, | |
| "rewards/margins": 0.2705130875110626, | |
| "rewards/rejected": -0.2493445873260498, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3127035830618892, | |
| "grad_norm": 35.25, | |
| "learning_rate": 4e-05, | |
| "logits/chosen": 0.38634905219078064, | |
| "logits/rejected": 0.42648378014564514, | |
| "logps/chosen": -97.1165771484375, | |
| "logps/rejected": -161.6883087158203, | |
| "loss": 0.5806, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.008925480768084526, | |
| "rewards/margins": 0.2537250518798828, | |
| "rewards/rejected": -0.2626505196094513, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.3257328990228013, | |
| "grad_norm": 27.25, | |
| "learning_rate": 4.166666666666667e-05, | |
| "logits/chosen": 0.41833925247192383, | |
| "logits/rejected": 0.4584392011165619, | |
| "logps/chosen": -89.66869354248047, | |
| "logps/rejected": -150.55813598632812, | |
| "loss": 0.5952, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.019657809287309647, | |
| "rewards/margins": 0.21433238685131073, | |
| "rewards/rejected": -0.23399019241333008, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.33876221498371334, | |
| "grad_norm": 50.5, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "logits/chosen": 0.46740618348121643, | |
| "logits/rejected": 0.4832380712032318, | |
| "logps/chosen": -62.494773864746094, | |
| "logps/rejected": -146.53067016601562, | |
| "loss": 0.5411, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0009484302718192339, | |
| "rewards/margins": 0.3466818928718567, | |
| "rewards/rejected": -0.3476303815841675, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.3517915309446254, | |
| "grad_norm": 23.0, | |
| "learning_rate": 4.5e-05, | |
| "logits/chosen": 0.45530009269714355, | |
| "logits/rejected": 0.5172832012176514, | |
| "logps/chosen": -85.00700378417969, | |
| "logps/rejected": -136.05020141601562, | |
| "loss": 0.5479, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": -0.001965973526239395, | |
| "rewards/margins": 0.34270864725112915, | |
| "rewards/rejected": -0.34467458724975586, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.36482084690553745, | |
| "grad_norm": 23.375, | |
| "learning_rate": 4.666666666666667e-05, | |
| "logits/chosen": 0.46558958292007446, | |
| "logits/rejected": 0.5210444331169128, | |
| "logps/chosen": -105.98873901367188, | |
| "logps/rejected": -163.59945678710938, | |
| "loss": 0.519, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.022590279579162598, | |
| "rewards/margins": 0.4247127175331116, | |
| "rewards/rejected": -0.44730299711227417, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.3778501628664495, | |
| "grad_norm": 22.75, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "logits/chosen": 0.4795917868614197, | |
| "logits/rejected": 0.47115039825439453, | |
| "logps/chosen": -107.12705993652344, | |
| "logps/rejected": -142.822509765625, | |
| "loss": 0.5271, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.037488676607608795, | |
| "rewards/margins": 0.3942331075668335, | |
| "rewards/rejected": -0.4317218065261841, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.39087947882736157, | |
| "grad_norm": 19.625, | |
| "learning_rate": 5e-05, | |
| "logits/chosen": 0.4289873242378235, | |
| "logits/rejected": 0.5595239996910095, | |
| "logps/chosen": -86.29112243652344, | |
| "logps/rejected": -172.88059997558594, | |
| "loss": 0.459, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.05108689144253731, | |
| "rewards/margins": 0.5967621803283691, | |
| "rewards/rejected": -0.5456752777099609, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.40390879478827363, | |
| "grad_norm": 24.375, | |
| "learning_rate": 4.993150684931507e-05, | |
| "logits/chosen": 0.39370930194854736, | |
| "logits/rejected": 0.42319971323013306, | |
| "logps/chosen": -102.44596862792969, | |
| "logps/rejected": -169.67660522460938, | |
| "loss": 0.4393, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.0332549586892128, | |
| "rewards/margins": 0.642684817314148, | |
| "rewards/rejected": -0.6759397387504578, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.4169381107491857, | |
| "grad_norm": 20.0, | |
| "learning_rate": 4.986301369863014e-05, | |
| "logits/chosen": 0.49218329787254333, | |
| "logits/rejected": 0.5275806784629822, | |
| "logps/chosen": -74.05796813964844, | |
| "logps/rejected": -133.33255004882812, | |
| "loss": 0.4407, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02741517871618271, | |
| "rewards/margins": 0.6402420997619629, | |
| "rewards/rejected": -0.612826943397522, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.42996742671009774, | |
| "grad_norm": 24.75, | |
| "learning_rate": 4.979452054794521e-05, | |
| "logits/chosen": 0.35451555252075195, | |
| "logits/rejected": 0.40355199575424194, | |
| "logps/chosen": -104.55900573730469, | |
| "logps/rejected": -151.27711486816406, | |
| "loss": 0.4234, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.023018483072519302, | |
| "rewards/margins": 0.6792783737182617, | |
| "rewards/rejected": -0.6562598943710327, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.44299674267100975, | |
| "grad_norm": 17.5, | |
| "learning_rate": 4.972602739726028e-05, | |
| "logits/chosen": 0.40463435649871826, | |
| "logits/rejected": 0.5144488215446472, | |
| "logps/chosen": -72.91780090332031, | |
| "logps/rejected": -145.31849670410156, | |
| "loss": 0.4111, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.02165827713906765, | |
| "rewards/margins": 0.7402617931365967, | |
| "rewards/rejected": -0.7186034917831421, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.4560260586319218, | |
| "grad_norm": 16.5, | |
| "learning_rate": 4.9657534246575346e-05, | |
| "logits/chosen": 0.4734452962875366, | |
| "logits/rejected": 0.5330387353897095, | |
| "logps/chosen": -83.89728546142578, | |
| "logps/rejected": -147.41265869140625, | |
| "loss": 0.3853, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.009855479001998901, | |
| "rewards/margins": 0.8149614930152893, | |
| "rewards/rejected": -0.8248169422149658, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.46905537459283386, | |
| "grad_norm": 24.75, | |
| "learning_rate": 4.958904109589041e-05, | |
| "logits/chosen": 0.3432111144065857, | |
| "logits/rejected": 0.39720407128334045, | |
| "logps/chosen": -84.57624053955078, | |
| "logps/rejected": -131.17434692382812, | |
| "loss": 0.4056, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0020672655664384365, | |
| "rewards/margins": 0.7789303064346313, | |
| "rewards/rejected": -0.7809975743293762, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.4820846905537459, | |
| "grad_norm": 68.5, | |
| "learning_rate": 4.952054794520548e-05, | |
| "logits/chosen": 0.3694133758544922, | |
| "logits/rejected": 0.42799627780914307, | |
| "logps/chosen": -85.02811431884766, | |
| "logps/rejected": -169.74673461914062, | |
| "loss": 0.3145, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.07135964930057526, | |
| "rewards/margins": 1.2428215742111206, | |
| "rewards/rejected": -1.171462059020996, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.495114006514658, | |
| "grad_norm": 18.0, | |
| "learning_rate": 4.945205479452055e-05, | |
| "logits/chosen": 0.4724690318107605, | |
| "logits/rejected": 0.5161466598510742, | |
| "logps/chosen": -79.45156860351562, | |
| "logps/rejected": -183.5731201171875, | |
| "loss": 0.281, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08071783930063248, | |
| "rewards/margins": 1.4206629991531372, | |
| "rewards/rejected": -1.3399451971054077, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.50814332247557, | |
| "grad_norm": 10.9375, | |
| "learning_rate": 4.938356164383562e-05, | |
| "logits/chosen": 0.570473313331604, | |
| "logits/rejected": 0.5667930841445923, | |
| "logps/chosen": -67.05783081054688, | |
| "logps/rejected": -160.54501342773438, | |
| "loss": 0.2824, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.012804888188838959, | |
| "rewards/margins": 1.2680517435073853, | |
| "rewards/rejected": -1.255246877670288, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.5211726384364821, | |
| "grad_norm": 15.0, | |
| "learning_rate": 4.9315068493150684e-05, | |
| "logits/chosen": 0.3750945031642914, | |
| "logits/rejected": 0.5399055480957031, | |
| "logps/chosen": -80.3337631225586, | |
| "logps/rejected": -150.540771484375, | |
| "loss": 0.2555, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09064020216464996, | |
| "rewards/margins": 1.4325942993164062, | |
| "rewards/rejected": -1.3419541120529175, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5342019543973942, | |
| "grad_norm": 17.625, | |
| "learning_rate": 4.9246575342465756e-05, | |
| "logits/chosen": 0.40898123383522034, | |
| "logits/rejected": 0.3948415219783783, | |
| "logps/chosen": -120.64512634277344, | |
| "logps/rejected": -172.23046875, | |
| "loss": 0.2607, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.03111358918249607, | |
| "rewards/margins": 1.4379582405090332, | |
| "rewards/rejected": -1.4068448543548584, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.5472312703583062, | |
| "grad_norm": 11.0, | |
| "learning_rate": 4.917808219178082e-05, | |
| "logits/chosen": 0.44859111309051514, | |
| "logits/rejected": 0.4527463912963867, | |
| "logps/chosen": -111.03682708740234, | |
| "logps/rejected": -175.25076293945312, | |
| "loss": 0.23, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07664196938276291, | |
| "rewards/margins": 1.6669435501098633, | |
| "rewards/rejected": -1.590301513671875, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.5602605863192183, | |
| "grad_norm": 15.75, | |
| "learning_rate": 4.9109589041095895e-05, | |
| "logits/chosen": 0.4859389662742615, | |
| "logits/rejected": 0.5201914310455322, | |
| "logps/chosen": -78.25588989257812, | |
| "logps/rejected": -162.362548828125, | |
| "loss": 0.2227, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0701964795589447, | |
| "rewards/margins": 1.5760339498519897, | |
| "rewards/rejected": -1.5058374404907227, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.5732899022801303, | |
| "grad_norm": 12.125, | |
| "learning_rate": 4.904109589041096e-05, | |
| "logits/chosen": 0.5065852403640747, | |
| "logits/rejected": 0.5527216196060181, | |
| "logps/chosen": -78.39152526855469, | |
| "logps/rejected": -183.5028839111328, | |
| "loss": 0.197, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0381561741232872, | |
| "rewards/margins": 1.9460369348526, | |
| "rewards/rejected": -1.9078807830810547, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.5863192182410424, | |
| "grad_norm": 15.75, | |
| "learning_rate": 4.8972602739726034e-05, | |
| "logits/chosen": 0.5216741561889648, | |
| "logits/rejected": 0.6273947954177856, | |
| "logps/chosen": -74.12837982177734, | |
| "logps/rejected": -167.24652099609375, | |
| "loss": 0.1831, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.13984212279319763, | |
| "rewards/margins": 2.0655643939971924, | |
| "rewards/rejected": -1.9257222414016724, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5993485342019544, | |
| "grad_norm": 15.125, | |
| "learning_rate": 4.89041095890411e-05, | |
| "logits/chosen": 0.5224686861038208, | |
| "logits/rejected": 0.5461165308952332, | |
| "logps/chosen": -101.55109405517578, | |
| "logps/rejected": -163.4028778076172, | |
| "loss": 0.1841, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.01885811612010002, | |
| "rewards/margins": 1.9022661447525024, | |
| "rewards/rejected": -1.8834080696105957, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.6123778501628665, | |
| "grad_norm": 10.3125, | |
| "learning_rate": 4.8835616438356167e-05, | |
| "logits/chosen": 0.438764363527298, | |
| "logits/rejected": 0.5729016661643982, | |
| "logps/chosen": -73.1627426147461, | |
| "logps/rejected": -153.8181610107422, | |
| "loss": 0.1734, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.08434567600488663, | |
| "rewards/margins": 2.0392439365386963, | |
| "rewards/rejected": -1.9548982381820679, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.6254071661237784, | |
| "grad_norm": 8.9375, | |
| "learning_rate": 4.876712328767123e-05, | |
| "logits/chosen": 0.40418195724487305, | |
| "logits/rejected": 0.4241870045661926, | |
| "logps/chosen": -143.9720001220703, | |
| "logps/rejected": -195.26536560058594, | |
| "loss": 0.1135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0051138997077941895, | |
| "rewards/margins": 2.4568700790405273, | |
| "rewards/rejected": -2.461984157562256, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6384364820846905, | |
| "grad_norm": 16.5, | |
| "learning_rate": 4.8698630136986305e-05, | |
| "logits/chosen": 0.5531054735183716, | |
| "logits/rejected": 0.5722475051879883, | |
| "logps/chosen": -80.95619201660156, | |
| "logps/rejected": -174.85643005371094, | |
| "loss": 0.1363, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.057745300233364105, | |
| "rewards/margins": 2.4418563842773438, | |
| "rewards/rejected": -2.3841114044189453, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.6514657980456026, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 4.863013698630137e-05, | |
| "logits/chosen": 0.3978479504585266, | |
| "logits/rejected": 0.575504720211029, | |
| "logps/chosen": -111.10527038574219, | |
| "logps/rejected": -194.09478759765625, | |
| "loss": 0.0979, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.024841848760843277, | |
| "rewards/margins": 2.8879756927490234, | |
| "rewards/rejected": -2.9128177165985107, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6514657980456026, | |
| "eval_logits/chosen": 0.40171119570732117, | |
| "eval_logits/rejected": 0.4472416043281555, | |
| "eval_logps/chosen": -94.96456909179688, | |
| "eval_logps/rejected": -177.69801330566406, | |
| "eval_loss": 0.10980458557605743, | |
| "eval_rewards/accuracies": 0.9985119104385376, | |
| "eval_rewards/chosen": 0.048970796167850494, | |
| "eval_rewards/margins": 2.70963716506958, | |
| "eval_rewards/rejected": -2.6606662273406982, | |
| "eval_runtime": 53.1051, | |
| "eval_samples_per_second": 12.635, | |
| "eval_steps_per_second": 0.791, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6644951140065146, | |
| "grad_norm": 9.9375, | |
| "learning_rate": 4.856164383561644e-05, | |
| "logits/chosen": 0.5971242189407349, | |
| "logits/rejected": 0.5052528381347656, | |
| "logps/chosen": -100.87618255615234, | |
| "logps/rejected": -183.73324584960938, | |
| "loss": 0.1258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.029725002124905586, | |
| "rewards/margins": 2.650700330734253, | |
| "rewards/rejected": -2.6209752559661865, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.6775244299674267, | |
| "grad_norm": 9.5, | |
| "learning_rate": 4.849315068493151e-05, | |
| "logits/chosen": 0.46090734004974365, | |
| "logits/rejected": 0.5332375168800354, | |
| "logps/chosen": -83.30604553222656, | |
| "logps/rejected": -198.97483825683594, | |
| "loss": 0.0769, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07736861705780029, | |
| "rewards/margins": 3.2583978176116943, | |
| "rewards/rejected": -3.1810293197631836, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.6905537459283387, | |
| "grad_norm": 10.5, | |
| "learning_rate": 4.8424657534246577e-05, | |
| "logits/chosen": 0.5082046985626221, | |
| "logits/rejected": 0.5545482635498047, | |
| "logps/chosen": -96.28477478027344, | |
| "logps/rejected": -153.87228393554688, | |
| "loss": 0.1285, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.049273423850536346, | |
| "rewards/margins": 2.481055974960327, | |
| "rewards/rejected": -2.4317827224731445, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.7035830618892508, | |
| "grad_norm": 11.6875, | |
| "learning_rate": 4.835616438356165e-05, | |
| "logits/chosen": 0.4179171621799469, | |
| "logits/rejected": 0.40184441208839417, | |
| "logps/chosen": -138.70870971679688, | |
| "logps/rejected": -198.06478881835938, | |
| "loss": 0.0812, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07567030191421509, | |
| "rewards/margins": 3.1052422523498535, | |
| "rewards/rejected": -3.0295724868774414, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.7166123778501629, | |
| "grad_norm": 4.34375, | |
| "learning_rate": 4.8287671232876716e-05, | |
| "logits/chosen": 0.5138372182846069, | |
| "logits/rejected": 0.5542392730712891, | |
| "logps/chosen": -93.45801544189453, | |
| "logps/rejected": -196.15989685058594, | |
| "loss": 0.0619, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.008173711597919464, | |
| "rewards/margins": 3.3829448223114014, | |
| "rewards/rejected": -3.3747713565826416, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.7296416938110749, | |
| "grad_norm": 5.9375, | |
| "learning_rate": 4.821917808219178e-05, | |
| "logits/chosen": 0.4723089337348938, | |
| "logits/rejected": 0.5142194032669067, | |
| "logps/chosen": -101.18618774414062, | |
| "logps/rejected": -202.30770874023438, | |
| "loss": 0.0698, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.20023450255393982, | |
| "rewards/margins": 3.583833694458008, | |
| "rewards/rejected": -3.7840681076049805, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.742671009771987, | |
| "grad_norm": 4.84375, | |
| "learning_rate": 4.815068493150685e-05, | |
| "logits/chosen": 0.6098852157592773, | |
| "logits/rejected": 0.5306227207183838, | |
| "logps/chosen": -92.79605102539062, | |
| "logps/rejected": -194.44285583496094, | |
| "loss": 0.0694, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09434399008750916, | |
| "rewards/margins": 3.6242706775665283, | |
| "rewards/rejected": -3.5299267768859863, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.755700325732899, | |
| "grad_norm": 3.09375, | |
| "learning_rate": 4.808219178082192e-05, | |
| "logits/chosen": 0.596287190914154, | |
| "logits/rejected": 0.5526207685470581, | |
| "logps/chosen": -80.8297348022461, | |
| "logps/rejected": -199.17770385742188, | |
| "loss": 0.0424, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.16401855647563934, | |
| "rewards/margins": 4.359023094177246, | |
| "rewards/rejected": -4.195004463195801, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.7687296416938111, | |
| "grad_norm": 4.78125, | |
| "learning_rate": 4.801369863013699e-05, | |
| "logits/chosen": 0.5375024080276489, | |
| "logits/rejected": 0.5418161153793335, | |
| "logps/chosen": -94.54348754882812, | |
| "logps/rejected": -179.93148803710938, | |
| "loss": 0.065, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02510090172290802, | |
| "rewards/margins": 3.8914875984191895, | |
| "rewards/rejected": -3.866386890411377, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.7817589576547231, | |
| "grad_norm": 3.953125, | |
| "learning_rate": 4.794520547945205e-05, | |
| "logits/chosen": 0.5458413362503052, | |
| "logits/rejected": 0.5163211226463318, | |
| "logps/chosen": -102.55235290527344, | |
| "logps/rejected": -192.88011169433594, | |
| "loss": 0.0422, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.03571543097496033, | |
| "rewards/margins": 4.136109352111816, | |
| "rewards/rejected": -4.100393772125244, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7947882736156352, | |
| "grad_norm": 3.390625, | |
| "learning_rate": 4.7876712328767126e-05, | |
| "logits/chosen": 0.44991570711135864, | |
| "logits/rejected": 0.47752076387405396, | |
| "logps/chosen": -71.73591613769531, | |
| "logps/rejected": -166.39166259765625, | |
| "loss": 0.0474, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.09097965061664581, | |
| "rewards/margins": 3.9125423431396484, | |
| "rewards/rejected": -3.8215625286102295, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.8078175895765473, | |
| "grad_norm": 5.4375, | |
| "learning_rate": 4.780821917808219e-05, | |
| "logits/chosen": 0.5184447169303894, | |
| "logits/rejected": 0.49066781997680664, | |
| "logps/chosen": -96.78662109375, | |
| "logps/rejected": -220.57266235351562, | |
| "loss": 0.044, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.07911338657140732, | |
| "rewards/margins": 4.811767101287842, | |
| "rewards/rejected": -4.890880584716797, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.8208469055374593, | |
| "grad_norm": 3.0, | |
| "learning_rate": 4.7739726027397265e-05, | |
| "logits/chosen": 0.5503054857254028, | |
| "logits/rejected": 0.7354578971862793, | |
| "logps/chosen": -76.80421447753906, | |
| "logps/rejected": -210.28140258789062, | |
| "loss": 0.0163, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.024801086634397507, | |
| "rewards/margins": 5.572457790374756, | |
| "rewards/rejected": -5.547656536102295, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.8338762214983714, | |
| "grad_norm": 3.46875, | |
| "learning_rate": 4.767123287671233e-05, | |
| "logits/chosen": 0.5171054005622864, | |
| "logits/rejected": 0.512793242931366, | |
| "logps/chosen": -131.59396362304688, | |
| "logps/rejected": -217.56964111328125, | |
| "loss": 0.0153, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10994181782007217, | |
| "rewards/margins": 5.379184246063232, | |
| "rewards/rejected": -5.489125728607178, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8469055374592834, | |
| "grad_norm": 4.03125, | |
| "learning_rate": 4.7602739726027403e-05, | |
| "logits/chosen": 0.44678860902786255, | |
| "logits/rejected": 0.5419712662696838, | |
| "logps/chosen": -104.75637817382812, | |
| "logps/rejected": -201.79806518554688, | |
| "loss": 0.0296, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1282435804605484, | |
| "rewards/margins": 4.831494331359863, | |
| "rewards/rejected": -4.959737777709961, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8599348534201955, | |
| "grad_norm": 2.40625, | |
| "learning_rate": 4.753424657534247e-05, | |
| "logits/chosen": 0.582385778427124, | |
| "logits/rejected": 0.6422931551933289, | |
| "logps/chosen": -94.39370727539062, | |
| "logps/rejected": -199.6475830078125, | |
| "loss": 0.0258, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04259404167532921, | |
| "rewards/margins": 5.368470191955566, | |
| "rewards/rejected": -5.411064147949219, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.8729641693811075, | |
| "grad_norm": 2.6875, | |
| "learning_rate": 4.7465753424657536e-05, | |
| "logits/chosen": 0.5766660571098328, | |
| "logits/rejected": 0.6043537855148315, | |
| "logps/chosen": -102.68363952636719, | |
| "logps/rejected": -214.7265625, | |
| "loss": 0.0178, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2821919322013855, | |
| "rewards/margins": 5.887226581573486, | |
| "rewards/rejected": -6.169419288635254, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.8859934853420195, | |
| "grad_norm": 1.0859375, | |
| "learning_rate": 4.73972602739726e-05, | |
| "logits/chosen": 0.4715408682823181, | |
| "logits/rejected": 0.5762664079666138, | |
| "logps/chosen": -86.6288070678711, | |
| "logps/rejected": -225.074951171875, | |
| "loss": 0.0129, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10806949436664581, | |
| "rewards/margins": 6.162431716918945, | |
| "rewards/rejected": -6.270501136779785, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.8990228013029316, | |
| "grad_norm": 1.71875, | |
| "learning_rate": 4.7328767123287675e-05, | |
| "logits/chosen": 0.613117516040802, | |
| "logits/rejected": 0.5737402439117432, | |
| "logps/chosen": -71.23908996582031, | |
| "logps/rejected": -197.6245880126953, | |
| "loss": 0.0132, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.04037155583500862, | |
| "rewards/margins": 5.633227825164795, | |
| "rewards/rejected": -5.6735992431640625, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.9120521172638436, | |
| "grad_norm": 0.98828125, | |
| "learning_rate": 4.726027397260274e-05, | |
| "logits/chosen": 0.6605570912361145, | |
| "logits/rejected": 0.6310275197029114, | |
| "logps/chosen": -123.74465942382812, | |
| "logps/rejected": -249.78793334960938, | |
| "loss": 0.0076, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3881164789199829, | |
| "rewards/margins": 7.08071231842041, | |
| "rewards/rejected": -7.4688286781311035, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9250814332247557, | |
| "grad_norm": 23.375, | |
| "learning_rate": 4.719178082191781e-05, | |
| "logits/chosen": 0.5911487936973572, | |
| "logits/rejected": 0.6923888325691223, | |
| "logps/chosen": -161.05184936523438, | |
| "logps/rejected": -264.648193359375, | |
| "loss": 0.058, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.7978758811950684, | |
| "rewards/margins": 8.156850814819336, | |
| "rewards/rejected": -8.954728126525879, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.9381107491856677, | |
| "grad_norm": 4.5625, | |
| "learning_rate": 4.712328767123288e-05, | |
| "logits/chosen": 0.6496680378913879, | |
| "logits/rejected": 0.6733301281929016, | |
| "logps/chosen": -121.81378173828125, | |
| "logps/rejected": -239.56304931640625, | |
| "loss": 0.0129, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4053517282009125, | |
| "rewards/margins": 7.532309532165527, | |
| "rewards/rejected": -7.937661647796631, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.9511400651465798, | |
| "grad_norm": 17.625, | |
| "learning_rate": 4.7054794520547946e-05, | |
| "logits/chosen": 0.5184324383735657, | |
| "logits/rejected": 0.6415278911590576, | |
| "logps/chosen": -105.58231353759766, | |
| "logps/rejected": -222.8607940673828, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.48813995718955994, | |
| "rewards/margins": 7.293839454650879, | |
| "rewards/rejected": -7.781979560852051, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.9641693811074918, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.698630136986302e-05, | |
| "logits/chosen": 0.5843162536621094, | |
| "logits/rejected": 0.5905658602714539, | |
| "logps/chosen": -100.66535949707031, | |
| "logps/rejected": -242.2615203857422, | |
| "loss": 0.006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5956183075904846, | |
| "rewards/margins": 7.831192493438721, | |
| "rewards/rejected": -8.426811218261719, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.9771986970684039, | |
| "grad_norm": 0.90625, | |
| "learning_rate": 4.6917808219178085e-05, | |
| "logits/chosen": 0.6023251414299011, | |
| "logits/rejected": 0.6175463199615479, | |
| "logps/chosen": -74.83623504638672, | |
| "logps/rejected": -226.2584228515625, | |
| "loss": 0.0081, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.32583388686180115, | |
| "rewards/margins": 7.1260175704956055, | |
| "rewards/rejected": -7.4518513679504395, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.990228013029316, | |
| "grad_norm": 1.5859375, | |
| "learning_rate": 4.684931506849316e-05, | |
| "logits/chosen": 0.549035906791687, | |
| "logits/rejected": 0.5604692697525024, | |
| "logps/chosen": -106.24671936035156, | |
| "logps/rejected": -224.1392059326172, | |
| "loss": 0.0062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.48925158381462097, | |
| "rewards/margins": 8.133434295654297, | |
| "rewards/rejected": -8.622686386108398, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 1.003257328990228, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 4.6780821917808224e-05, | |
| "logits/chosen": 0.4611436426639557, | |
| "logits/rejected": 0.5326769948005676, | |
| "logps/chosen": -122.00413513183594, | |
| "logps/rejected": -225.345703125, | |
| "loss": 0.0104, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3694484233856201, | |
| "rewards/margins": 8.14291763305664, | |
| "rewards/rejected": -8.512365341186523, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.01628664495114, | |
| "grad_norm": 1.0390625, | |
| "learning_rate": 4.671232876712329e-05, | |
| "logits/chosen": 0.5869070887565613, | |
| "logits/rejected": 0.6033880710601807, | |
| "logps/chosen": -82.62848663330078, | |
| "logps/rejected": -218.4529571533203, | |
| "loss": 0.0067, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.45080143213272095, | |
| "rewards/margins": 7.709619522094727, | |
| "rewards/rejected": -8.160421371459961, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.0293159609120521, | |
| "grad_norm": 0.5, | |
| "learning_rate": 4.6643835616438356e-05, | |
| "logits/chosen": 0.6383049488067627, | |
| "logits/rejected": 0.6318773031234741, | |
| "logps/chosen": -85.02655029296875, | |
| "logps/rejected": -236.74661254882812, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6345354318618774, | |
| "rewards/margins": 8.56661319732666, | |
| "rewards/rejected": -9.201148986816406, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.0423452768729642, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 4.657534246575342e-05, | |
| "logits/chosen": 0.5868783593177795, | |
| "logits/rejected": 0.6521725654602051, | |
| "logps/chosen": -72.04723358154297, | |
| "logps/rejected": -230.14759826660156, | |
| "loss": 0.0064, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3693377375602722, | |
| "rewards/margins": 8.969406127929688, | |
| "rewards/rejected": -9.3387451171875, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0553745928338762, | |
| "grad_norm": 3.875, | |
| "learning_rate": 4.6506849315068495e-05, | |
| "logits/chosen": 0.6232761144638062, | |
| "logits/rejected": 0.7092280983924866, | |
| "logps/chosen": -79.42515563964844, | |
| "logps/rejected": -243.50372314453125, | |
| "loss": 0.0114, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4384864568710327, | |
| "rewards/margins": 9.8868408203125, | |
| "rewards/rejected": -10.325326919555664, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.0684039087947883, | |
| "grad_norm": 17.625, | |
| "learning_rate": 4.643835616438356e-05, | |
| "logits/chosen": 0.5587644577026367, | |
| "logits/rejected": 0.507000744342804, | |
| "logps/chosen": -107.61006164550781, | |
| "logps/rejected": -269.83843994140625, | |
| "loss": 0.0298, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.767175555229187, | |
| "rewards/margins": 10.086366653442383, | |
| "rewards/rejected": -10.85354232788086, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.0814332247557004, | |
| "grad_norm": 1.765625, | |
| "learning_rate": 4.6369863013698634e-05, | |
| "logits/chosen": 0.7217209339141846, | |
| "logits/rejected": 0.6606077551841736, | |
| "logps/chosen": -112.81648254394531, | |
| "logps/rejected": -288.869384765625, | |
| "loss": 0.0031, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.637757420539856, | |
| "rewards/margins": 11.375957489013672, | |
| "rewards/rejected": -12.013714790344238, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.0944625407166124, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 4.63013698630137e-05, | |
| "logits/chosen": 0.598381757736206, | |
| "logits/rejected": 0.7315313816070557, | |
| "logps/chosen": -107.20101928710938, | |
| "logps/rejected": -281.4562683105469, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0282131433486938, | |
| "rewards/margins": 10.506587982177734, | |
| "rewards/rejected": -11.534799575805664, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.1074918566775245, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 4.623287671232877e-05, | |
| "logits/chosen": 0.5361148118972778, | |
| "logits/rejected": 0.625439465045929, | |
| "logps/chosen": -94.30006408691406, | |
| "logps/rejected": -247.62734985351562, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6392572522163391, | |
| "rewards/margins": 9.514155387878418, | |
| "rewards/rejected": -10.153412818908691, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.1205211726384365, | |
| "grad_norm": 0.1318359375, | |
| "learning_rate": 4.616438356164384e-05, | |
| "logits/chosen": 0.4699576199054718, | |
| "logits/rejected": 0.5327920317649841, | |
| "logps/chosen": -99.83711242675781, | |
| "logps/rejected": -277.1376953125, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7559419870376587, | |
| "rewards/margins": 10.79163932800293, | |
| "rewards/rejected": -11.54758071899414, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.1335504885993486, | |
| "grad_norm": 21.625, | |
| "learning_rate": 4.609589041095891e-05, | |
| "logits/chosen": 0.5424385666847229, | |
| "logits/rejected": 0.5994272232055664, | |
| "logps/chosen": -126.75860595703125, | |
| "logps/rejected": -259.98785400390625, | |
| "loss": 0.027, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.0079156160354614, | |
| "rewards/margins": 9.790204048156738, | |
| "rewards/rejected": -10.79811954498291, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.1465798045602607, | |
| "grad_norm": 0.1923828125, | |
| "learning_rate": 4.602739726027398e-05, | |
| "logits/chosen": 0.4807354509830475, | |
| "logits/rejected": 0.5597364902496338, | |
| "logps/chosen": -106.52574157714844, | |
| "logps/rejected": -272.2024841308594, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9496315717697144, | |
| "rewards/margins": 10.869487762451172, | |
| "rewards/rejected": -11.81911849975586, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.1596091205211727, | |
| "grad_norm": 0.1513671875, | |
| "learning_rate": 4.5958904109589044e-05, | |
| "logits/chosen": 0.4442989230155945, | |
| "logits/rejected": 0.5743086338043213, | |
| "logps/chosen": -126.14883422851562, | |
| "logps/rejected": -257.60479736328125, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.766221821308136, | |
| "rewards/margins": 10.424041748046875, | |
| "rewards/rejected": -11.190263748168945, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.1726384364820848, | |
| "grad_norm": 1.109375, | |
| "learning_rate": 4.589041095890411e-05, | |
| "logits/chosen": 0.6463179588317871, | |
| "logits/rejected": 0.7357967495918274, | |
| "logps/chosen": -111.60262298583984, | |
| "logps/rejected": -257.9665222167969, | |
| "loss": 0.004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7475589513778687, | |
| "rewards/margins": 9.678692817687988, | |
| "rewards/rejected": -10.426251411437988, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1856677524429968, | |
| "grad_norm": 6.6875, | |
| "learning_rate": 4.5821917808219176e-05, | |
| "logits/chosen": 0.48268792033195496, | |
| "logits/rejected": 0.5555750131607056, | |
| "logps/chosen": -109.53272247314453, | |
| "logps/rejected": -254.43492126464844, | |
| "loss": 0.021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1768321990966797, | |
| "rewards/margins": 10.083324432373047, | |
| "rewards/rejected": -11.260156631469727, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.1986970684039089, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.575342465753425e-05, | |
| "logits/chosen": 0.4292120337486267, | |
| "logits/rejected": 0.521615743637085, | |
| "logps/chosen": -95.94520568847656, | |
| "logps/rejected": -253.99993896484375, | |
| "loss": 0.0043, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.871029257774353, | |
| "rewards/margins": 10.545323371887207, | |
| "rewards/rejected": -11.416353225708008, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.211726384364821, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.5684931506849315e-05, | |
| "logits/chosen": 0.599204421043396, | |
| "logits/rejected": 0.6558493375778198, | |
| "logps/chosen": -81.71524047851562, | |
| "logps/rejected": -286.6025390625, | |
| "loss": 0.002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6884966492652893, | |
| "rewards/margins": 12.463363647460938, | |
| "rewards/rejected": -13.151860237121582, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.224755700325733, | |
| "grad_norm": 18.0, | |
| "learning_rate": 4.561643835616439e-05, | |
| "logits/chosen": 0.5306810140609741, | |
| "logits/rejected": 0.6242883801460266, | |
| "logps/chosen": -123.8375244140625, | |
| "logps/rejected": -301.3340759277344, | |
| "loss": 0.115, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.471944808959961, | |
| "rewards/margins": 11.491494178771973, | |
| "rewards/rejected": -12.963438034057617, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.237785016286645, | |
| "grad_norm": 3.6875, | |
| "learning_rate": 4.5547945205479454e-05, | |
| "logits/chosen": 0.307037353515625, | |
| "logits/rejected": 0.4169548749923706, | |
| "logps/chosen": -104.93318176269531, | |
| "logps/rejected": -298.0616455078125, | |
| "loss": 0.0056, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8200819492340088, | |
| "rewards/margins": 12.302337646484375, | |
| "rewards/rejected": -13.122421264648438, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.2508143322475571, | |
| "grad_norm": 0.357421875, | |
| "learning_rate": 4.547945205479453e-05, | |
| "logits/chosen": 0.44628292322158813, | |
| "logits/rejected": 0.5122686624526978, | |
| "logps/chosen": -138.44715881347656, | |
| "logps/rejected": -297.4310302734375, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1169642210006714, | |
| "rewards/margins": 12.18505859375, | |
| "rewards/rejected": -13.302022933959961, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.2638436482084692, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.541095890410959e-05, | |
| "logits/chosen": 0.4561493992805481, | |
| "logits/rejected": 0.42395809292793274, | |
| "logps/chosen": -97.692626953125, | |
| "logps/rejected": -269.0616149902344, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0600621700286865, | |
| "rewards/margins": 10.464313507080078, | |
| "rewards/rejected": -11.524375915527344, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.2768729641693812, | |
| "grad_norm": 0.171875, | |
| "learning_rate": 4.534246575342466e-05, | |
| "logits/chosen": 0.5301443934440613, | |
| "logits/rejected": 0.5689199566841125, | |
| "logps/chosen": -82.25302124023438, | |
| "logps/rejected": -299.8308410644531, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6450921297073364, | |
| "rewards/margins": 12.625539779663086, | |
| "rewards/rejected": -13.270631790161133, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.2899022801302933, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 4.5273972602739725e-05, | |
| "logits/chosen": 0.5289660096168518, | |
| "logits/rejected": 0.5680521726608276, | |
| "logps/chosen": -114.97647094726562, | |
| "logps/rejected": -289.7352294921875, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0461821556091309, | |
| "rewards/margins": 11.369194030761719, | |
| "rewards/rejected": -12.415376663208008, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.3029315960912053, | |
| "grad_norm": 12.875, | |
| "learning_rate": 4.520547945205479e-05, | |
| "logits/chosen": 0.5327968597412109, | |
| "logits/rejected": 0.5609641075134277, | |
| "logps/chosen": -102.47958374023438, | |
| "logps/rejected": -250.79983520507812, | |
| "loss": 0.0433, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.2846791744232178, | |
| "rewards/margins": 10.277151107788086, | |
| "rewards/rejected": -11.561830520629883, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3029315960912053, | |
| "eval_logits/chosen": 0.4522504210472107, | |
| "eval_logits/rejected": 0.5126740336418152, | |
| "eval_logps/chosen": -105.14033508300781, | |
| "eval_logps/rejected": -271.7301330566406, | |
| "eval_loss": 0.010936837643384933, | |
| "eval_rewards/accuracies": 0.9955357313156128, | |
| "eval_rewards/chosen": -0.9686061143875122, | |
| "eval_rewards/margins": 11.095270156860352, | |
| "eval_rewards/rejected": -12.06387710571289, | |
| "eval_runtime": 52.2837, | |
| "eval_samples_per_second": 12.834, | |
| "eval_steps_per_second": 0.803, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3159609120521172, | |
| "grad_norm": 0.3359375, | |
| "learning_rate": 4.5136986301369864e-05, | |
| "logits/chosen": 0.4589378833770752, | |
| "logits/rejected": 0.5487878918647766, | |
| "logps/chosen": -105.76063537597656, | |
| "logps/rejected": -304.374755859375, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6952133774757385, | |
| "rewards/margins": 11.938570022583008, | |
| "rewards/rejected": -12.633784294128418, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.3289902280130292, | |
| "grad_norm": 16.875, | |
| "learning_rate": 4.506849315068493e-05, | |
| "logits/chosen": 0.3769131302833557, | |
| "logits/rejected": 0.4298419952392578, | |
| "logps/chosen": -90.59988403320312, | |
| "logps/rejected": -247.70855712890625, | |
| "loss": 0.0366, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.8937082886695862, | |
| "rewards/margins": 10.360432624816895, | |
| "rewards/rejected": -11.254140853881836, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.3420195439739413, | |
| "grad_norm": 0.1240234375, | |
| "learning_rate": 4.5e-05, | |
| "logits/chosen": 0.4195227026939392, | |
| "logits/rejected": 0.4982715845108032, | |
| "logps/chosen": -108.6019515991211, | |
| "logps/rejected": -256.0687255859375, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.35523366928100586, | |
| "rewards/margins": 10.765069961547852, | |
| "rewards/rejected": -11.120304107666016, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.3550488599348534, | |
| "grad_norm": 0.734375, | |
| "learning_rate": 4.493150684931507e-05, | |
| "logits/chosen": 0.512363851070404, | |
| "logits/rejected": 0.576703667640686, | |
| "logps/chosen": -87.09799194335938, | |
| "logps/rejected": -250.88160705566406, | |
| "loss": 0.002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8132816553115845, | |
| "rewards/margins": 10.275364875793457, | |
| "rewards/rejected": -11.088645935058594, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.3680781758957654, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.486301369863014e-05, | |
| "logits/chosen": 0.5740979909896851, | |
| "logits/rejected": 0.6141005158424377, | |
| "logps/chosen": -101.0667495727539, | |
| "logps/rejected": -270.2124328613281, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4534332752227783, | |
| "rewards/margins": 11.378608703613281, | |
| "rewards/rejected": -11.832042694091797, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.3811074918566775, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 4.479452054794521e-05, | |
| "logits/chosen": 0.4920623004436493, | |
| "logits/rejected": 0.5869815945625305, | |
| "logps/chosen": -78.95692443847656, | |
| "logps/rejected": -261.3721923828125, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5587632656097412, | |
| "rewards/margins": 11.233173370361328, | |
| "rewards/rejected": -11.791936874389648, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.3941368078175895, | |
| "grad_norm": 0.251953125, | |
| "learning_rate": 4.472602739726028e-05, | |
| "logits/chosen": 0.570668637752533, | |
| "logits/rejected": 0.6403558850288391, | |
| "logps/chosen": -100.12591552734375, | |
| "logps/rejected": -284.8184814453125, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.974056601524353, | |
| "rewards/margins": 11.53510570526123, | |
| "rewards/rejected": -12.509162902832031, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.4071661237785016, | |
| "grad_norm": 0.486328125, | |
| "learning_rate": 4.465753424657535e-05, | |
| "logits/chosen": 0.5420557260513306, | |
| "logits/rejected": 0.5884326100349426, | |
| "logps/chosen": -88.60862731933594, | |
| "logps/rejected": -289.9623718261719, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7016857862472534, | |
| "rewards/margins": 12.609970092773438, | |
| "rewards/rejected": -13.31165599822998, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.4201954397394136, | |
| "grad_norm": 0.16796875, | |
| "learning_rate": 4.458904109589041e-05, | |
| "logits/chosen": 0.4910571575164795, | |
| "logits/rejected": 0.5071029663085938, | |
| "logps/chosen": -126.79181671142578, | |
| "logps/rejected": -296.6622314453125, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5642073154449463, | |
| "rewards/margins": 11.54067325592041, | |
| "rewards/rejected": -13.104881286621094, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.4332247557003257, | |
| "grad_norm": 0.09423828125, | |
| "learning_rate": 4.452054794520548e-05, | |
| "logits/chosen": 0.5247446298599243, | |
| "logits/rejected": 0.47774773836135864, | |
| "logps/chosen": -100.17961883544922, | |
| "logps/rejected": -256.7818908691406, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5451500415802002, | |
| "rewards/margins": 10.933671951293945, | |
| "rewards/rejected": -11.478821754455566, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4462540716612378, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 4.4452054794520545e-05, | |
| "logits/chosen": 0.6131365299224854, | |
| "logits/rejected": 0.615870475769043, | |
| "logps/chosen": -91.60357666015625, | |
| "logps/rejected": -277.2375793457031, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.4648512601852417, | |
| "rewards/margins": 11.733713150024414, | |
| "rewards/rejected": -12.198564529418945, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.4592833876221498, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 4.438356164383562e-05, | |
| "logits/chosen": 0.7266855239868164, | |
| "logits/rejected": 0.633425235748291, | |
| "logps/chosen": -83.83377075195312, | |
| "logps/rejected": -264.64501953125, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8645696640014648, | |
| "rewards/margins": 11.404350280761719, | |
| "rewards/rejected": -12.268918991088867, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.4723127035830619, | |
| "grad_norm": 39.5, | |
| "learning_rate": 4.4315068493150684e-05, | |
| "logits/chosen": 0.6473186016082764, | |
| "logits/rejected": 0.6468358635902405, | |
| "logps/chosen": -145.5900115966797, | |
| "logps/rejected": -300.77301025390625, | |
| "loss": 0.0413, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.2610886096954346, | |
| "rewards/margins": 12.301548957824707, | |
| "rewards/rejected": -13.562638282775879, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.485342019543974, | |
| "grad_norm": 0.400390625, | |
| "learning_rate": 4.424657534246576e-05, | |
| "logits/chosen": 0.4430687427520752, | |
| "logits/rejected": 0.5213119983673096, | |
| "logps/chosen": -133.21205139160156, | |
| "logps/rejected": -270.613525390625, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0954465866088867, | |
| "rewards/margins": 11.330828666687012, | |
| "rewards/rejected": -12.426275253295898, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.498371335504886, | |
| "grad_norm": 0.94140625, | |
| "learning_rate": 4.417808219178082e-05, | |
| "logits/chosen": 0.5086010694503784, | |
| "logits/rejected": 0.5820840001106262, | |
| "logps/chosen": -123.90394592285156, | |
| "logps/rejected": -257.7217712402344, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.11330246925354, | |
| "rewards/margins": 10.829158782958984, | |
| "rewards/rejected": -11.942461013793945, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.511400651465798, | |
| "grad_norm": 0.1435546875, | |
| "learning_rate": 4.4109589041095896e-05, | |
| "logits/chosen": 0.6593326330184937, | |
| "logits/rejected": 0.6211075186729431, | |
| "logps/chosen": -75.89244842529297, | |
| "logps/rejected": -266.60357666015625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7435863614082336, | |
| "rewards/margins": 11.823095321655273, | |
| "rewards/rejected": -12.566681861877441, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.52442996742671, | |
| "grad_norm": 0.3125, | |
| "learning_rate": 4.404109589041096e-05, | |
| "logits/chosen": 0.44883668422698975, | |
| "logits/rejected": 0.5639724135398865, | |
| "logps/chosen": -93.89613342285156, | |
| "logps/rejected": -286.56451416015625, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0818122625350952, | |
| "rewards/margins": 12.308505058288574, | |
| "rewards/rejected": -13.390316009521484, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.5374592833876222, | |
| "grad_norm": 0.294921875, | |
| "learning_rate": 4.3972602739726035e-05, | |
| "logits/chosen": 0.5254025459289551, | |
| "logits/rejected": 0.5744770765304565, | |
| "logps/chosen": -120.49933624267578, | |
| "logps/rejected": -313.8304443359375, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2532737255096436, | |
| "rewards/margins": 13.612017631530762, | |
| "rewards/rejected": -14.8652925491333, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.5504885993485342, | |
| "grad_norm": 0.06005859375, | |
| "learning_rate": 4.39041095890411e-05, | |
| "logits/chosen": 0.4704741835594177, | |
| "logits/rejected": 0.5933064222335815, | |
| "logps/chosen": -101.07899475097656, | |
| "logps/rejected": -312.5476989746094, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0991394519805908, | |
| "rewards/margins": 13.127508163452148, | |
| "rewards/rejected": -14.226646423339844, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.5635179153094463, | |
| "grad_norm": 0.7265625, | |
| "learning_rate": 4.383561643835617e-05, | |
| "logits/chosen": 0.5662128329277039, | |
| "logits/rejected": 0.5538490414619446, | |
| "logps/chosen": -106.43547058105469, | |
| "logps/rejected": -237.27182006835938, | |
| "loss": 0.0017, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7144113779067993, | |
| "rewards/margins": 9.923102378845215, | |
| "rewards/rejected": -10.637513160705566, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5765472312703583, | |
| "grad_norm": 0.2734375, | |
| "learning_rate": 4.376712328767123e-05, | |
| "logits/chosen": 0.6062589883804321, | |
| "logits/rejected": 0.6001408100128174, | |
| "logps/chosen": -85.78362274169922, | |
| "logps/rejected": -230.78456115722656, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.44176292419433594, | |
| "rewards/margins": 10.009519577026367, | |
| "rewards/rejected": -10.451282501220703, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.5895765472312704, | |
| "grad_norm": 0.890625, | |
| "learning_rate": 4.36986301369863e-05, | |
| "logits/chosen": 0.4221673607826233, | |
| "logits/rejected": 0.5758030414581299, | |
| "logps/chosen": -105.4853744506836, | |
| "logps/rejected": -291.33416748046875, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8794921636581421, | |
| "rewards/margins": 12.619811058044434, | |
| "rewards/rejected": -13.499303817749023, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.6026058631921825, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 4.363013698630137e-05, | |
| "logits/chosen": 0.5420705676078796, | |
| "logits/rejected": 0.6151952147483826, | |
| "logps/chosen": -100.22688293457031, | |
| "logps/rejected": -243.79376220703125, | |
| "loss": 0.0141, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1169291734695435, | |
| "rewards/margins": 10.309640884399414, | |
| "rewards/rejected": -11.426569938659668, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.6156351791530945, | |
| "grad_norm": 0.1025390625, | |
| "learning_rate": 4.356164383561644e-05, | |
| "logits/chosen": 0.5193166136741638, | |
| "logits/rejected": 0.6056085228919983, | |
| "logps/chosen": -82.8109359741211, | |
| "logps/rejected": -290.5059814453125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6234797239303589, | |
| "rewards/margins": 13.424489974975586, | |
| "rewards/rejected": -14.047967910766602, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.6286644951140063, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 4.349315068493151e-05, | |
| "logits/chosen": 0.524208664894104, | |
| "logits/rejected": 0.4996390640735626, | |
| "logps/chosen": -99.54425811767578, | |
| "logps/rejected": -269.98858642578125, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.75706547498703, | |
| "rewards/margins": 11.190351486206055, | |
| "rewards/rejected": -11.947418212890625, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.6416938110749184, | |
| "grad_norm": 0.263671875, | |
| "learning_rate": 4.342465753424658e-05, | |
| "logits/chosen": 0.6168690323829651, | |
| "logits/rejected": 0.6482622027397156, | |
| "logps/chosen": -85.97930908203125, | |
| "logps/rejected": -270.2721862792969, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8299384117126465, | |
| "rewards/margins": 11.914796829223633, | |
| "rewards/rejected": -12.744734764099121, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.6547231270358305, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 4.335616438356165e-05, | |
| "logits/chosen": 0.4758910536766052, | |
| "logits/rejected": 0.6165511012077332, | |
| "logps/chosen": -120.85889434814453, | |
| "logps/rejected": -330.94580078125, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3049366474151611, | |
| "rewards/margins": 13.541584014892578, | |
| "rewards/rejected": -14.846521377563477, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.6677524429967425, | |
| "grad_norm": 6.8125, | |
| "learning_rate": 4.3287671232876716e-05, | |
| "logits/chosen": 0.4912353754043579, | |
| "logits/rejected": 0.5630989074707031, | |
| "logps/chosen": -99.70421600341797, | |
| "logps/rejected": -262.81793212890625, | |
| "loss": 0.0078, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1004682779312134, | |
| "rewards/margins": 11.206673622131348, | |
| "rewards/rejected": -12.307140350341797, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.6807817589576546, | |
| "grad_norm": 0.16015625, | |
| "learning_rate": 4.321917808219178e-05, | |
| "logits/chosen": 0.4782199263572693, | |
| "logits/rejected": 0.525773823261261, | |
| "logps/chosen": -104.79579162597656, | |
| "logps/rejected": -289.299560546875, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9410255551338196, | |
| "rewards/margins": 13.11217212677002, | |
| "rewards/rejected": -14.05319595336914, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.6938110749185666, | |
| "grad_norm": 0.21875, | |
| "learning_rate": 4.3150684931506855e-05, | |
| "logits/chosen": 0.4822072684764862, | |
| "logits/rejected": 0.4817202687263489, | |
| "logps/chosen": -86.81942749023438, | |
| "logps/rejected": -299.3095703125, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1278772354125977, | |
| "rewards/margins": 12.366036415100098, | |
| "rewards/rejected": -13.493914604187012, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.7068403908794787, | |
| "grad_norm": 0.08642578125, | |
| "learning_rate": 4.308219178082192e-05, | |
| "logits/chosen": 0.5804314613342285, | |
| "logits/rejected": 0.6889848709106445, | |
| "logps/chosen": -91.85730743408203, | |
| "logps/rejected": -298.603515625, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0585956573486328, | |
| "rewards/margins": 12.788677215576172, | |
| "rewards/rejected": -13.847272872924805, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.7198697068403908, | |
| "grad_norm": 0.072265625, | |
| "learning_rate": 4.301369863013699e-05, | |
| "logits/chosen": 0.4860071837902069, | |
| "logits/rejected": 0.6394906044006348, | |
| "logps/chosen": -122.80025482177734, | |
| "logps/rejected": -303.95257568359375, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.156942367553711, | |
| "rewards/margins": 13.32013988494873, | |
| "rewards/rejected": -14.477082252502441, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.7328990228013028, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 4.294520547945205e-05, | |
| "logits/chosen": 0.4813675880432129, | |
| "logits/rejected": 0.6056811213493347, | |
| "logps/chosen": -89.08052062988281, | |
| "logps/rejected": -268.1934814453125, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8572717308998108, | |
| "rewards/margins": 12.159040451049805, | |
| "rewards/rejected": -13.016312599182129, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.7459283387622149, | |
| "grad_norm": 0.05615234375, | |
| "learning_rate": 4.2876712328767126e-05, | |
| "logits/chosen": 0.43135523796081543, | |
| "logits/rejected": 0.5367728471755981, | |
| "logps/chosen": -104.37152099609375, | |
| "logps/rejected": -309.7494201660156, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9332711100578308, | |
| "rewards/margins": 13.174678802490234, | |
| "rewards/rejected": -14.107950210571289, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.758957654723127, | |
| "grad_norm": 0.1123046875, | |
| "learning_rate": 4.280821917808219e-05, | |
| "logits/chosen": 0.46707215905189514, | |
| "logits/rejected": 0.545040488243103, | |
| "logps/chosen": -141.20016479492188, | |
| "logps/rejected": -337.659423828125, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5533472299575806, | |
| "rewards/margins": 13.37056827545166, | |
| "rewards/rejected": -14.923914909362793, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.771986970684039, | |
| "grad_norm": 0.142578125, | |
| "learning_rate": 4.2739726027397265e-05, | |
| "logits/chosen": 0.45749402046203613, | |
| "logits/rejected": 0.5103408098220825, | |
| "logps/chosen": -97.52786254882812, | |
| "logps/rejected": -218.84869384765625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6832572221755981, | |
| "rewards/margins": 9.930760383605957, | |
| "rewards/rejected": -10.614017486572266, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.785016286644951, | |
| "grad_norm": 0.04638671875, | |
| "learning_rate": 4.267123287671233e-05, | |
| "logits/chosen": 0.6288174986839294, | |
| "logits/rejected": 0.6228695511817932, | |
| "logps/chosen": -118.99038696289062, | |
| "logps/rejected": -292.7908020019531, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2574949264526367, | |
| "rewards/margins": 12.354877471923828, | |
| "rewards/rejected": -13.612371444702148, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.798045602605863, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 4.2602739726027404e-05, | |
| "logits/chosen": 0.4609254002571106, | |
| "logits/rejected": 0.480663537979126, | |
| "logps/chosen": -87.55207824707031, | |
| "logps/rejected": -289.66162109375, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7838760614395142, | |
| "rewards/margins": 12.662479400634766, | |
| "rewards/rejected": -13.446355819702148, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.8110749185667752, | |
| "grad_norm": 0.091796875, | |
| "learning_rate": 4.253424657534247e-05, | |
| "logits/chosen": 0.5592811703681946, | |
| "logits/rejected": 0.6325635313987732, | |
| "logps/chosen": -113.62852478027344, | |
| "logps/rejected": -291.84967041015625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9283789992332458, | |
| "rewards/margins": 12.165189743041992, | |
| "rewards/rejected": -13.093568801879883, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.8241042345276872, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 4.2465753424657536e-05, | |
| "logits/chosen": 0.5351129174232483, | |
| "logits/rejected": 0.5127934813499451, | |
| "logps/chosen": -173.83511352539062, | |
| "logps/rejected": -313.468994140625, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -2.008704662322998, | |
| "rewards/margins": 12.894453048706055, | |
| "rewards/rejected": -14.903158187866211, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8371335504885993, | |
| "grad_norm": 25.25, | |
| "learning_rate": 4.23972602739726e-05, | |
| "logits/chosen": 0.5461170673370361, | |
| "logits/rejected": 0.5241893529891968, | |
| "logps/chosen": -90.9225082397461, | |
| "logps/rejected": -266.9288635253906, | |
| "loss": 0.0711, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.2498632669448853, | |
| "rewards/margins": 11.258042335510254, | |
| "rewards/rejected": -12.507905960083008, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.8501628664495113, | |
| "grad_norm": 0.15625, | |
| "learning_rate": 4.232876712328767e-05, | |
| "logits/chosen": 0.4733356535434723, | |
| "logits/rejected": 0.5178252458572388, | |
| "logps/chosen": -120.46127319335938, | |
| "logps/rejected": -303.619384765625, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1871830224990845, | |
| "rewards/margins": 12.890132904052734, | |
| "rewards/rejected": -14.077316284179688, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.8631921824104234, | |
| "grad_norm": 0.2578125, | |
| "learning_rate": 4.226027397260274e-05, | |
| "logits/chosen": 0.48812466859817505, | |
| "logits/rejected": 0.6284564733505249, | |
| "logps/chosen": -94.5536880493164, | |
| "logps/rejected": -292.870849609375, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.950133204460144, | |
| "rewards/margins": 13.727540969848633, | |
| "rewards/rejected": -14.677675247192383, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.8762214983713354, | |
| "grad_norm": 0.0859375, | |
| "learning_rate": 4.219178082191781e-05, | |
| "logits/chosen": 0.6320376992225647, | |
| "logits/rejected": 0.6237307786941528, | |
| "logps/chosen": -152.7342529296875, | |
| "logps/rejected": -295.3027648925781, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.494692325592041, | |
| "rewards/margins": 12.356241226196289, | |
| "rewards/rejected": -13.850934028625488, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.8892508143322475, | |
| "grad_norm": 0.2265625, | |
| "learning_rate": 4.212328767123288e-05, | |
| "logits/chosen": 0.7280508279800415, | |
| "logits/rejected": 0.6899917125701904, | |
| "logps/chosen": -79.49422454833984, | |
| "logps/rejected": -290.1501770019531, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7727007865905762, | |
| "rewards/margins": 12.538410186767578, | |
| "rewards/rejected": -13.311111450195312, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.9022801302931596, | |
| "grad_norm": 0.875, | |
| "learning_rate": 4.2054794520547946e-05, | |
| "logits/chosen": 0.4206058382987976, | |
| "logits/rejected": 0.5227707624435425, | |
| "logps/chosen": -101.57917785644531, | |
| "logps/rejected": -279.0715637207031, | |
| "loss": 0.0023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9858956336975098, | |
| "rewards/margins": 12.150425910949707, | |
| "rewards/rejected": -13.136322021484375, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.9153094462540716, | |
| "grad_norm": 1.4609375, | |
| "learning_rate": 4.198630136986302e-05, | |
| "logits/chosen": 0.5245968699455261, | |
| "logits/rejected": 0.6121017932891846, | |
| "logps/chosen": -116.4501953125, | |
| "logps/rejected": -281.0984802246094, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5875823497772217, | |
| "rewards/margins": 12.053236961364746, | |
| "rewards/rejected": -13.640819549560547, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.9283387622149837, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 4.1917808219178085e-05, | |
| "logits/chosen": 0.45093053579330444, | |
| "logits/rejected": 0.587200403213501, | |
| "logps/chosen": -104.39015197753906, | |
| "logps/rejected": -293.44232177734375, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8772008419036865, | |
| "rewards/margins": 12.074963569641113, | |
| "rewards/rejected": -12.952163696289062, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.9413680781758957, | |
| "grad_norm": 3.4375, | |
| "learning_rate": 4.184931506849315e-05, | |
| "logits/chosen": 0.48234254121780396, | |
| "logits/rejected": 0.5706640481948853, | |
| "logps/chosen": -147.8875732421875, | |
| "logps/rejected": -278.24407958984375, | |
| "loss": 0.005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0798665285110474, | |
| "rewards/margins": 11.722947120666504, | |
| "rewards/rejected": -12.802812576293945, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.9543973941368078, | |
| "grad_norm": 0.60546875, | |
| "learning_rate": 4.1780821917808224e-05, | |
| "logits/chosen": 0.5278698205947876, | |
| "logits/rejected": 0.635560154914856, | |
| "logps/chosen": -99.79202270507812, | |
| "logps/rejected": -271.11785888671875, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8048182725906372, | |
| "rewards/margins": 12.346576690673828, | |
| "rewards/rejected": -13.151394844055176, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.9543973941368078, | |
| "eval_logits/chosen": 0.4638054668903351, | |
| "eval_logits/rejected": 0.5228009223937988, | |
| "eval_logps/chosen": -108.31918334960938, | |
| "eval_logps/rejected": -286.8623046875, | |
| "eval_loss": 0.007638773415237665, | |
| "eval_rewards/accuracies": 0.9955357313156128, | |
| "eval_rewards/chosen": -1.2864917516708374, | |
| "eval_rewards/margins": 12.290605545043945, | |
| "eval_rewards/rejected": -13.57709789276123, | |
| "eval_runtime": 52.2778, | |
| "eval_samples_per_second": 12.835, | |
| "eval_steps_per_second": 0.803, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.9674267100977199, | |
| "grad_norm": 1.53125, | |
| "learning_rate": 4.171232876712329e-05, | |
| "logits/chosen": 0.5083509683609009, | |
| "logits/rejected": 0.6153576374053955, | |
| "logps/chosen": -86.2269515991211, | |
| "logps/rejected": -281.91888427734375, | |
| "loss": 0.0038, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8234192132949829, | |
| "rewards/margins": 12.487937927246094, | |
| "rewards/rejected": -13.311358451843262, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.980456026058632, | |
| "grad_norm": 0.08740234375, | |
| "learning_rate": 4.1643835616438356e-05, | |
| "logits/chosen": 0.4471871554851532, | |
| "logits/rejected": 0.5222618579864502, | |
| "logps/chosen": -77.19293212890625, | |
| "logps/rejected": -279.3829040527344, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7928668856620789, | |
| "rewards/margins": 12.986977577209473, | |
| "rewards/rejected": -13.779845237731934, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.993485342019544, | |
| "grad_norm": 0.38671875, | |
| "learning_rate": 4.157534246575342e-05, | |
| "logits/chosen": 0.5125950574874878, | |
| "logits/rejected": 0.531832218170166, | |
| "logps/chosen": -89.48603057861328, | |
| "logps/rejected": -292.6934509277344, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9772664308547974, | |
| "rewards/margins": 12.81619644165039, | |
| "rewards/rejected": -13.793462753295898, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 2.006514657980456, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 4.1506849315068495e-05, | |
| "logits/chosen": 0.5642431378364563, | |
| "logits/rejected": 0.6921492218971252, | |
| "logps/chosen": -109.61473083496094, | |
| "logps/rejected": -336.2562255859375, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3835595846176147, | |
| "rewards/margins": 15.08292007446289, | |
| "rewards/rejected": -16.46647834777832, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 2.019543973941368, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 4.143835616438356e-05, | |
| "logits/chosen": 0.4728472828865051, | |
| "logits/rejected": 0.5778607726097107, | |
| "logps/chosen": -113.82855987548828, | |
| "logps/rejected": -300.3656005859375, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0702670812606812, | |
| "rewards/margins": 13.495317459106445, | |
| "rewards/rejected": -14.565585136413574, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.03257328990228, | |
| "grad_norm": 0.1611328125, | |
| "learning_rate": 4.1369863013698634e-05, | |
| "logits/chosen": 0.558509886264801, | |
| "logits/rejected": 0.5765538215637207, | |
| "logps/chosen": -96.08161163330078, | |
| "logps/rejected": -311.4420471191406, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7950121760368347, | |
| "rewards/margins": 14.138938903808594, | |
| "rewards/rejected": -14.933950424194336, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.045602605863192, | |
| "grad_norm": 0.23828125, | |
| "learning_rate": 4.13013698630137e-05, | |
| "logits/chosen": 0.5611923933029175, | |
| "logits/rejected": 0.5538697242736816, | |
| "logps/chosen": -118.36637878417969, | |
| "logps/rejected": -269.89837646484375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5846986770629883, | |
| "rewards/margins": 11.414068222045898, | |
| "rewards/rejected": -12.998766899108887, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.0586319218241043, | |
| "grad_norm": 0.240234375, | |
| "learning_rate": 4.123287671232877e-05, | |
| "logits/chosen": 0.5009916424751282, | |
| "logits/rejected": 0.5371646881103516, | |
| "logps/chosen": -100.47499084472656, | |
| "logps/rejected": -283.9187316894531, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0149474143981934, | |
| "rewards/margins": 11.762290000915527, | |
| "rewards/rejected": -12.777236938476562, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.0716612377850163, | |
| "grad_norm": 0.1591796875, | |
| "learning_rate": 4.116438356164384e-05, | |
| "logits/chosen": 0.6033108830451965, | |
| "logits/rejected": 0.6458787322044373, | |
| "logps/chosen": -118.35772705078125, | |
| "logps/rejected": -342.5250244140625, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5086966753005981, | |
| "rewards/margins": 15.292023658752441, | |
| "rewards/rejected": -16.80072021484375, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.0846905537459284, | |
| "grad_norm": 0.29296875, | |
| "learning_rate": 4.1095890410958905e-05, | |
| "logits/chosen": 0.5724061131477356, | |
| "logits/rejected": 0.6467206478118896, | |
| "logps/chosen": -95.32568359375, | |
| "logps/rejected": -270.0829772949219, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0148924589157104, | |
| "rewards/margins": 12.349864959716797, | |
| "rewards/rejected": -13.364758491516113, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.0977198697068404, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 4.102739726027398e-05, | |
| "logits/chosen": 0.36649227142333984, | |
| "logits/rejected": 0.4759945273399353, | |
| "logps/chosen": -79.16898345947266, | |
| "logps/rejected": -256.05426025390625, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5944907069206238, | |
| "rewards/margins": 11.262916564941406, | |
| "rewards/rejected": -11.85740852355957, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.1107491856677525, | |
| "grad_norm": 0.115234375, | |
| "learning_rate": 4.0958904109589044e-05, | |
| "logits/chosen": 0.4255332350730896, | |
| "logits/rejected": 0.5424034595489502, | |
| "logps/chosen": -146.3050079345703, | |
| "logps/rejected": -328.6482849121094, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6175340414047241, | |
| "rewards/margins": 14.242683410644531, | |
| "rewards/rejected": -15.860215187072754, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.1237785016286646, | |
| "grad_norm": 6.78125, | |
| "learning_rate": 4.089041095890411e-05, | |
| "logits/chosen": 0.5109447836875916, | |
| "logits/rejected": 0.5712834596633911, | |
| "logps/chosen": -125.36318969726562, | |
| "logps/rejected": -287.7838134765625, | |
| "loss": 0.012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4990017414093018, | |
| "rewards/margins": 12.238770484924316, | |
| "rewards/rejected": -13.737771987915039, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 2.1368078175895766, | |
| "grad_norm": 0.054931640625, | |
| "learning_rate": 4.0821917808219176e-05, | |
| "logits/chosen": 0.5000830888748169, | |
| "logits/rejected": 0.5245240926742554, | |
| "logps/chosen": -97.7026596069336, | |
| "logps/rejected": -304.09375, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1258344650268555, | |
| "rewards/margins": 12.807943344116211, | |
| "rewards/rejected": -13.9337797164917, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.1498371335504887, | |
| "grad_norm": 0.1376953125, | |
| "learning_rate": 4.075342465753425e-05, | |
| "logits/chosen": 0.4336688816547394, | |
| "logits/rejected": 0.6021983623504639, | |
| "logps/chosen": -106.12345123291016, | |
| "logps/rejected": -288.62469482421875, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3127267360687256, | |
| "rewards/margins": 12.904582977294922, | |
| "rewards/rejected": -14.217309951782227, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.1628664495114007, | |
| "grad_norm": 0.036865234375, | |
| "learning_rate": 4.0684931506849315e-05, | |
| "logits/chosen": 0.4477992355823517, | |
| "logits/rejected": 0.5709498524665833, | |
| "logps/chosen": -108.85577392578125, | |
| "logps/rejected": -285.3506164550781, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2774969339370728, | |
| "rewards/margins": 11.801679611206055, | |
| "rewards/rejected": -13.07917594909668, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.175895765472313, | |
| "grad_norm": 0.037841796875, | |
| "learning_rate": 4.061643835616439e-05, | |
| "logits/chosen": 0.452865868806839, | |
| "logits/rejected": 0.5479907989501953, | |
| "logps/chosen": -110.41411590576172, | |
| "logps/rejected": -293.05035400390625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1979793310165405, | |
| "rewards/margins": 13.181974411010742, | |
| "rewards/rejected": -14.37995433807373, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 2.188925081433225, | |
| "grad_norm": 0.040771484375, | |
| "learning_rate": 4.0547945205479454e-05, | |
| "logits/chosen": 0.4804653823375702, | |
| "logits/rejected": 0.5071645379066467, | |
| "logps/chosen": -93.72543334960938, | |
| "logps/rejected": -326.3215637207031, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0895832777023315, | |
| "rewards/margins": 15.001323699951172, | |
| "rewards/rejected": -16.090906143188477, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.201954397394137, | |
| "grad_norm": 0.1396484375, | |
| "learning_rate": 4.047945205479452e-05, | |
| "logits/chosen": 0.3976234197616577, | |
| "logits/rejected": 0.5127770304679871, | |
| "logps/chosen": -86.84957122802734, | |
| "logps/rejected": -272.968505859375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7831870913505554, | |
| "rewards/margins": 12.802996635437012, | |
| "rewards/rejected": -13.58618450164795, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 2.214983713355049, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 4.041095890410959e-05, | |
| "logits/chosen": 0.4045504927635193, | |
| "logits/rejected": 0.45465028285980225, | |
| "logps/chosen": -105.28460693359375, | |
| "logps/rejected": -309.6754150390625, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0679643154144287, | |
| "rewards/margins": 14.03729248046875, | |
| "rewards/rejected": -15.105257987976074, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.228013029315961, | |
| "grad_norm": 0.03759765625, | |
| "learning_rate": 4.034246575342466e-05, | |
| "logits/chosen": 0.4175838530063629, | |
| "logits/rejected": 0.5390201210975647, | |
| "logps/chosen": -95.82322692871094, | |
| "logps/rejected": -332.502685546875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0852179527282715, | |
| "rewards/margins": 14.905830383300781, | |
| "rewards/rejected": -15.991046905517578, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.241042345276873, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 4.027397260273973e-05, | |
| "logits/chosen": 0.48719215393066406, | |
| "logits/rejected": 0.5657703876495361, | |
| "logps/chosen": -88.64961242675781, | |
| "logps/rejected": -275.7567138671875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9338966012001038, | |
| "rewards/margins": 13.001708030700684, | |
| "rewards/rejected": -13.935606002807617, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 2.254071661237785, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 4.02054794520548e-05, | |
| "logits/chosen": 0.5867688655853271, | |
| "logits/rejected": 0.6384550333023071, | |
| "logps/chosen": -110.77032470703125, | |
| "logps/rejected": -328.6289367675781, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4720832109451294, | |
| "rewards/margins": 14.248076438903809, | |
| "rewards/rejected": -15.720159530639648, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 2.267100977198697, | |
| "grad_norm": 0.039794921875, | |
| "learning_rate": 4.0136986301369864e-05, | |
| "logits/chosen": 0.4327799677848816, | |
| "logits/rejected": 0.4705524742603302, | |
| "logps/chosen": -105.45439147949219, | |
| "logps/rejected": -319.2513122558594, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8988175392150879, | |
| "rewards/margins": 14.017317771911621, | |
| "rewards/rejected": -14.916135787963867, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.2801302931596092, | |
| "grad_norm": 0.09619140625, | |
| "learning_rate": 4.006849315068493e-05, | |
| "logits/chosen": 0.5131232738494873, | |
| "logits/rejected": 0.5097309947013855, | |
| "logps/chosen": -120.355712890625, | |
| "logps/rejected": -296.6656494140625, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3266019821166992, | |
| "rewards/margins": 12.798480987548828, | |
| "rewards/rejected": -14.125082969665527, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.2931596091205213, | |
| "grad_norm": 0.2255859375, | |
| "learning_rate": 4e-05, | |
| "logits/chosen": 0.4963986575603485, | |
| "logits/rejected": 0.5654538869857788, | |
| "logps/chosen": -119.40376281738281, | |
| "logps/rejected": -269.1568908691406, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.111502766609192, | |
| "rewards/margins": 11.538202285766602, | |
| "rewards/rejected": -12.64970588684082, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.3061889250814334, | |
| "grad_norm": 0.201171875, | |
| "learning_rate": 3.993150684931507e-05, | |
| "logits/chosen": 0.5080669522285461, | |
| "logits/rejected": 0.4891076385974884, | |
| "logps/chosen": -112.92520141601562, | |
| "logps/rejected": -291.1544189453125, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3876513242721558, | |
| "rewards/margins": 11.71539306640625, | |
| "rewards/rejected": -13.103044509887695, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 2.3192182410423454, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 3.9863013698630135e-05, | |
| "logits/chosen": 0.4692964553833008, | |
| "logits/rejected": 0.5622753500938416, | |
| "logps/chosen": -92.26762390136719, | |
| "logps/rejected": -267.98675537109375, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8580554723739624, | |
| "rewards/margins": 12.327470779418945, | |
| "rewards/rejected": -13.185525894165039, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 2.3322475570032575, | |
| "grad_norm": 9.25, | |
| "learning_rate": 3.979452054794521e-05, | |
| "logits/chosen": 0.5638495683670044, | |
| "logits/rejected": 0.5911377668380737, | |
| "logps/chosen": -117.00182342529297, | |
| "logps/rejected": -285.2914123535156, | |
| "loss": 0.0098, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.443804144859314, | |
| "rewards/margins": 11.933603286743164, | |
| "rewards/rejected": -13.377408981323242, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 2.3452768729641695, | |
| "grad_norm": 0.043212890625, | |
| "learning_rate": 3.9726027397260274e-05, | |
| "logits/chosen": 0.4331457316875458, | |
| "logits/rejected": 0.5054813623428345, | |
| "logps/chosen": -114.8367919921875, | |
| "logps/rejected": -263.3021240234375, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0023431777954102, | |
| "rewards/margins": 11.675691604614258, | |
| "rewards/rejected": -12.678034782409668, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.3583061889250816, | |
| "grad_norm": 0.2177734375, | |
| "learning_rate": 3.965753424657535e-05, | |
| "logits/chosen": 0.614739716053009, | |
| "logits/rejected": 0.6245816946029663, | |
| "logps/chosen": -94.85420989990234, | |
| "logps/rejected": -277.0835266113281, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3436460494995117, | |
| "rewards/margins": 11.414693832397461, | |
| "rewards/rejected": -12.758339881896973, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 2.3713355048859937, | |
| "grad_norm": 0.134765625, | |
| "learning_rate": 3.958904109589041e-05, | |
| "logits/chosen": 0.5919771790504456, | |
| "logits/rejected": 0.61507648229599, | |
| "logps/chosen": -69.8411865234375, | |
| "logps/rejected": -272.3177795410156, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.573222279548645, | |
| "rewards/margins": 12.539608001708984, | |
| "rewards/rejected": -13.112829208374023, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 2.3843648208469057, | |
| "grad_norm": 0.0966796875, | |
| "learning_rate": 3.952054794520548e-05, | |
| "logits/chosen": 0.48881152272224426, | |
| "logits/rejected": 0.5776315927505493, | |
| "logps/chosen": -89.60847473144531, | |
| "logps/rejected": -293.9697265625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0633786916732788, | |
| "rewards/margins": 13.587398529052734, | |
| "rewards/rejected": -14.650779724121094, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 2.3973941368078178, | |
| "grad_norm": 0.07470703125, | |
| "learning_rate": 3.9452054794520546e-05, | |
| "logits/chosen": 0.6034122705459595, | |
| "logits/rejected": 0.5341907739639282, | |
| "logps/chosen": -82.32555389404297, | |
| "logps/rejected": -266.908203125, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8900930881500244, | |
| "rewards/margins": 12.200946807861328, | |
| "rewards/rejected": -13.091039657592773, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.41042345276873, | |
| "grad_norm": 0.039306640625, | |
| "learning_rate": 3.938356164383562e-05, | |
| "logits/chosen": 0.5521727800369263, | |
| "logits/rejected": 0.6301867365837097, | |
| "logps/chosen": -98.17955017089844, | |
| "logps/rejected": -288.569580078125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1262962818145752, | |
| "rewards/margins": 11.977638244628906, | |
| "rewards/rejected": -13.103934288024902, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.423452768729642, | |
| "grad_norm": 0.0166015625, | |
| "learning_rate": 3.9315068493150684e-05, | |
| "logits/chosen": 0.5002225041389465, | |
| "logits/rejected": 0.595288097858429, | |
| "logps/chosen": -96.44597625732422, | |
| "logps/rejected": -270.15771484375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1506391763687134, | |
| "rewards/margins": 12.394613265991211, | |
| "rewards/rejected": -13.545251846313477, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 2.436482084690554, | |
| "grad_norm": 0.1865234375, | |
| "learning_rate": 3.924657534246576e-05, | |
| "logits/chosen": 0.5442834496498108, | |
| "logits/rejected": 0.5952669978141785, | |
| "logps/chosen": -104.47047424316406, | |
| "logps/rejected": -306.7992248535156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0476319789886475, | |
| "rewards/margins": 13.874088287353516, | |
| "rewards/rejected": -14.921720504760742, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 2.449511400651466, | |
| "grad_norm": 0.138671875, | |
| "learning_rate": 3.9178082191780823e-05, | |
| "logits/chosen": 0.38490670919418335, | |
| "logits/rejected": 0.6002693176269531, | |
| "logps/chosen": -87.23043823242188, | |
| "logps/rejected": -338.7787170410156, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8123894929885864, | |
| "rewards/margins": 16.088157653808594, | |
| "rewards/rejected": -16.90054702758789, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.462540716612378, | |
| "grad_norm": 0.267578125, | |
| "learning_rate": 3.910958904109589e-05, | |
| "logits/chosen": 0.4915946125984192, | |
| "logits/rejected": 0.5476264953613281, | |
| "logps/chosen": -82.17195892333984, | |
| "logps/rejected": -279.19854736328125, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7035698890686035, | |
| "rewards/margins": 12.01541519165039, | |
| "rewards/rejected": -12.718984603881836, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 2.47557003257329, | |
| "grad_norm": 0.078125, | |
| "learning_rate": 3.904109589041096e-05, | |
| "logits/chosen": 0.5442248582839966, | |
| "logits/rejected": 0.5692495107650757, | |
| "logps/chosen": -118.85929870605469, | |
| "logps/rejected": -289.40765380859375, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2475743293762207, | |
| "rewards/margins": 12.606383323669434, | |
| "rewards/rejected": -13.853957176208496, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.488599348534202, | |
| "grad_norm": 0.3046875, | |
| "learning_rate": 3.897260273972603e-05, | |
| "logits/chosen": 0.5258509516716003, | |
| "logits/rejected": 0.6596174240112305, | |
| "logps/chosen": -131.38265991210938, | |
| "logps/rejected": -283.6547546386719, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4643476009368896, | |
| "rewards/margins": 11.995124816894531, | |
| "rewards/rejected": -13.45947265625, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 2.5016286644951142, | |
| "grad_norm": 0.0108642578125, | |
| "learning_rate": 3.89041095890411e-05, | |
| "logits/chosen": 0.4301671087741852, | |
| "logits/rejected": 0.5925787091255188, | |
| "logps/chosen": -98.11710357666016, | |
| "logps/rejected": -325.28521728515625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8909515738487244, | |
| "rewards/margins": 15.353211402893066, | |
| "rewards/rejected": -16.24416160583496, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.5146579804560263, | |
| "grad_norm": 0.2021484375, | |
| "learning_rate": 3.883561643835617e-05, | |
| "logits/chosen": 0.5148497819900513, | |
| "logits/rejected": 0.5551873445510864, | |
| "logps/chosen": -97.75564575195312, | |
| "logps/rejected": -261.85284423828125, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9065383672714233, | |
| "rewards/margins": 12.213420867919922, | |
| "rewards/rejected": -13.119958877563477, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 2.5276872964169383, | |
| "grad_norm": 0.036865234375, | |
| "learning_rate": 3.8767123287671233e-05, | |
| "logits/chosen": 0.49658170342445374, | |
| "logits/rejected": 0.5841426849365234, | |
| "logps/chosen": -129.8172149658203, | |
| "logps/rejected": -320.80657958984375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4361586570739746, | |
| "rewards/margins": 13.985431671142578, | |
| "rewards/rejected": -15.421590805053711, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 2.5407166123778504, | |
| "grad_norm": 0.032470703125, | |
| "learning_rate": 3.86986301369863e-05, | |
| "logits/chosen": 0.40110084414482117, | |
| "logits/rejected": 0.4429419934749603, | |
| "logps/chosen": -110.05766296386719, | |
| "logps/rejected": -279.5133056640625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.49040687084198, | |
| "rewards/margins": 12.111815452575684, | |
| "rewards/rejected": -13.602222442626953, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.5537459283387625, | |
| "grad_norm": 0.047607421875, | |
| "learning_rate": 3.863013698630137e-05, | |
| "logits/chosen": 0.3707536458969116, | |
| "logits/rejected": 0.4637380838394165, | |
| "logps/chosen": -111.06605529785156, | |
| "logps/rejected": -331.0019836425781, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4405083656311035, | |
| "rewards/margins": 14.104761123657227, | |
| "rewards/rejected": -15.545269966125488, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.5667752442996745, | |
| "grad_norm": 0.1787109375, | |
| "learning_rate": 3.856164383561644e-05, | |
| "logits/chosen": 0.4742357134819031, | |
| "logits/rejected": 0.5186038613319397, | |
| "logps/chosen": -102.65884399414062, | |
| "logps/rejected": -286.2248229980469, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2666797637939453, | |
| "rewards/margins": 12.227950096130371, | |
| "rewards/rejected": -13.494630813598633, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 2.5798045602605866, | |
| "grad_norm": 3.21875, | |
| "learning_rate": 3.8493150684931505e-05, | |
| "logits/chosen": 0.5423088073730469, | |
| "logits/rejected": 0.5629587173461914, | |
| "logps/chosen": -116.73429870605469, | |
| "logps/rejected": -314.3695068359375, | |
| "loss": 0.0048, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7822774648666382, | |
| "rewards/margins": 12.672046661376953, | |
| "rewards/rejected": -14.454323768615723, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 2.5928338762214986, | |
| "grad_norm": 0.032470703125, | |
| "learning_rate": 3.842465753424658e-05, | |
| "logits/chosen": 0.3940759599208832, | |
| "logits/rejected": 0.5198019742965698, | |
| "logps/chosen": -129.81735229492188, | |
| "logps/rejected": -329.01812744140625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2780612707138062, | |
| "rewards/margins": 14.856027603149414, | |
| "rewards/rejected": -16.13408851623535, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 2.6058631921824107, | |
| "grad_norm": 0.0235595703125, | |
| "learning_rate": 3.8356164383561644e-05, | |
| "logits/chosen": 0.4287755489349365, | |
| "logits/rejected": 0.49127259850502014, | |
| "logps/chosen": -88.73255920410156, | |
| "logps/rejected": -294.54254150390625, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.845811128616333, | |
| "rewards/margins": 13.853937149047852, | |
| "rewards/rejected": -14.699748992919922, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.6058631921824107, | |
| "eval_logits/chosen": 0.41032981872558594, | |
| "eval_logits/rejected": 0.4839063882827759, | |
| "eval_logps/chosen": -110.02198791503906, | |
| "eval_logps/rejected": -293.51873779296875, | |
| "eval_loss": 0.00859944336116314, | |
| "eval_rewards/accuracies": 0.9955357313156128, | |
| "eval_rewards/chosen": -1.456769585609436, | |
| "eval_rewards/margins": 12.785966873168945, | |
| "eval_rewards/rejected": -14.24273681640625, | |
| "eval_runtime": 52.2735, | |
| "eval_samples_per_second": 12.836, | |
| "eval_steps_per_second": 0.803, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 760, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 200, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |