| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 1235, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "grad_norm": 14.532158655563514, |
| "learning_rate": 2.6954177897574124e-09, |
| "logits/chosen": 34.72175216674805, |
| "logits/rejected": 20.32191276550293, |
| "logps/chosen": -43.472816467285156, |
| "logps/rejected": -15.394071578979492, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "grad_norm": 13.33639437555034, |
| "learning_rate": 2.6954177897574124e-08, |
| "logits/chosen": 24.2269344329834, |
| "logits/rejected": 11.215648651123047, |
| "logps/chosen": -28.242902755737305, |
| "logps/rejected": -20.951187133789062, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.4305555522441864, |
| "rewards/chosen": 0.0030395330395549536, |
| "rewards/margins": 0.001963509013876319, |
| "rewards/rejected": 0.0010760227451100945, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 14.205098357697945, |
| "learning_rate": 5.390835579514825e-08, |
| "logits/chosen": 18.444320678710938, |
| "logits/rejected": 9.983879089355469, |
| "logps/chosen": -31.849700927734375, |
| "logps/rejected": -23.311790466308594, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": 0.0012197095202282071, |
| "rewards/margins": 0.0035085126291960478, |
| "rewards/rejected": -0.0022888043895363808, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02, |
| "grad_norm": 16.673011790453604, |
| "learning_rate": 8.086253369272237e-08, |
| "logits/chosen": 17.74435043334961, |
| "logits/rejected": 8.345118522644043, |
| "logps/chosen": -30.406158447265625, |
| "logps/rejected": -21.020471572875977, |
| "loss": 0.6895, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.010070301592350006, |
| "rewards/margins": 0.020921695977449417, |
| "rewards/rejected": -0.010851392522454262, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03, |
| "grad_norm": 14.573591360793975, |
| "learning_rate": 1.078167115902965e-07, |
| "logits/chosen": 18.73330307006836, |
| "logits/rejected": 9.43653678894043, |
| "logps/chosen": -31.389293670654297, |
| "logps/rejected": -22.603178024291992, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.016900639981031418, |
| "rewards/margins": 0.02598029002547264, |
| "rewards/rejected": -0.009079648181796074, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 15.064895702850436, |
| "learning_rate": 1.347708894878706e-07, |
| "logits/chosen": 21.941606521606445, |
| "logits/rejected": 11.377029418945312, |
| "logps/chosen": -26.427230834960938, |
| "logps/rejected": -22.94554901123047, |
| "loss": 0.682, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.004804253112524748, |
| "rewards/margins": 0.022189300507307053, |
| "rewards/rejected": -0.026993554085493088, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05, |
| "grad_norm": 11.51348272649836, |
| "learning_rate": 1.6172506738544473e-07, |
| "logits/chosen": 18.3980770111084, |
| "logits/rejected": 8.642730712890625, |
| "logps/chosen": -31.209949493408203, |
| "logps/rejected": -22.193492889404297, |
| "loss": 0.6801, |
| "rewards/accuracies": 0.5249999761581421, |
| "rewards/chosen": -0.004430429544299841, |
| "rewards/margins": 0.025433117523789406, |
| "rewards/rejected": -0.029863541945815086, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 12.59495036610267, |
| "learning_rate": 1.8867924528301886e-07, |
| "logits/chosen": 22.12859535217285, |
| "logits/rejected": 11.408747673034668, |
| "logps/chosen": -33.64688491821289, |
| "logps/rejected": -25.069316864013672, |
| "loss": 0.6677, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": 0.007852818816900253, |
| "rewards/margins": 0.07031778991222382, |
| "rewards/rejected": -0.06246497482061386, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06, |
| "grad_norm": 13.631521920184003, |
| "learning_rate": 2.15633423180593e-07, |
| "logits/chosen": 19.470523834228516, |
| "logits/rejected": 9.611539840698242, |
| "logps/chosen": -34.38616180419922, |
| "logps/rejected": -24.599369049072266, |
| "loss": 0.6537, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0013453528517857194, |
| "rewards/margins": 0.07501634210348129, |
| "rewards/rejected": -0.07636170089244843, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07, |
| "grad_norm": 15.155810829127399, |
| "learning_rate": 2.425876010781671e-07, |
| "logits/chosen": 18.752395629882812, |
| "logits/rejected": 10.284080505371094, |
| "logps/chosen": -30.28765296936035, |
| "logps/rejected": -22.451499938964844, |
| "loss": 0.6389, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.02513519860804081, |
| "rewards/margins": 0.14448794722557068, |
| "rewards/rejected": -0.11935273557901382, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 15.287885727754697, |
| "learning_rate": 2.695417789757412e-07, |
| "logits/chosen": 18.602680206298828, |
| "logits/rejected": 10.510783195495605, |
| "logps/chosen": -27.710529327392578, |
| "logps/rejected": -20.73261260986328, |
| "loss": 0.6084, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.0187116377055645, |
| "rewards/margins": 0.17055347561836243, |
| "rewards/rejected": -0.15184184908866882, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09, |
| "grad_norm": 14.136752828952837, |
| "learning_rate": 2.9649595687331536e-07, |
| "logits/chosen": 18.222576141357422, |
| "logits/rejected": 8.738725662231445, |
| "logps/chosen": -32.60607147216797, |
| "logps/rejected": -23.04232406616211, |
| "loss": 0.577, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.010421124286949635, |
| "rewards/margins": 0.258650541305542, |
| "rewards/rejected": -0.24822942912578583, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.1, |
| "grad_norm": 10.395501571662873, |
| "learning_rate": 3.2345013477088946e-07, |
| "logits/chosen": 20.114803314208984, |
| "logits/rejected": 10.068379402160645, |
| "logps/chosen": -32.489681243896484, |
| "logps/rejected": -24.69915199279785, |
| "loss": 0.5631, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.022254476323723793, |
| "rewards/margins": 0.33683449029922485, |
| "rewards/rejected": -0.3145800232887268, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 13.16014124752717, |
| "learning_rate": 3.504043126684636e-07, |
| "logits/chosen": 20.067184448242188, |
| "logits/rejected": 9.164568901062012, |
| "logps/chosen": -32.826438903808594, |
| "logps/rejected": -23.3800106048584, |
| "loss": 0.5475, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 0.000534355640411377, |
| "rewards/margins": 0.368183434009552, |
| "rewards/rejected": -0.3676490783691406, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.11, |
| "grad_norm": 9.132463704120546, |
| "learning_rate": 3.773584905660377e-07, |
| "logits/chosen": 17.273075103759766, |
| "logits/rejected": 7.174586296081543, |
| "logps/chosen": -35.4624137878418, |
| "logps/rejected": -26.397430419921875, |
| "loss": 0.5052, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 0.009195957332849503, |
| "rewards/margins": 0.5245504379272461, |
| "rewards/rejected": -0.515354573726654, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 9.183413111659194, |
| "learning_rate": 4.043126684636118e-07, |
| "logits/chosen": 16.8369197845459, |
| "logits/rejected": 7.7970170974731445, |
| "logps/chosen": -32.14591979980469, |
| "logps/rejected": -30.26938819885254, |
| "loss": 0.4874, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.035380132496356964, |
| "rewards/margins": 0.7585235238075256, |
| "rewards/rejected": -0.723143458366394, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.13, |
| "grad_norm": 7.878919640462125, |
| "learning_rate": 4.31266846361186e-07, |
| "logits/chosen": 19.754840850830078, |
| "logits/rejected": 9.633878707885742, |
| "logps/chosen": -31.04324722290039, |
| "logps/rejected": -27.710851669311523, |
| "loss": 0.4563, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 0.042704738676548004, |
| "rewards/margins": 0.838780403137207, |
| "rewards/rejected": -0.7960756421089172, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.14, |
| "grad_norm": 7.2682115815248185, |
| "learning_rate": 4.582210242587601e-07, |
| "logits/chosen": 17.248498916625977, |
| "logits/rejected": 6.265856742858887, |
| "logps/chosen": -35.737918853759766, |
| "logps/rejected": -33.12517547607422, |
| "loss": 0.4045, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.042743489146232605, |
| "rewards/margins": 1.2479735612869263, |
| "rewards/rejected": -1.2052299976348877, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 6.655107331443258, |
| "learning_rate": 4.851752021563342e-07, |
| "logits/chosen": 15.79400634765625, |
| "logits/rejected": 7.4026994705200195, |
| "logps/chosen": -33.69775390625, |
| "logps/rejected": -33.79288864135742, |
| "loss": 0.415, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.022446583956480026, |
| "rewards/margins": 1.2890950441360474, |
| "rewards/rejected": -1.266648530960083, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.15, |
| "grad_norm": 6.738689797871621, |
| "learning_rate": 5.121293800539083e-07, |
| "logits/chosen": 17.93876075744629, |
| "logits/rejected": 7.969454765319824, |
| "logps/chosen": -39.0764274597168, |
| "logps/rejected": -38.42572784423828, |
| "loss": 0.3938, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.012814655900001526, |
| "rewards/margins": 1.4817765951156616, |
| "rewards/rejected": -1.4945913553237915, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 7.571413646680112, |
| "learning_rate": 5.390835579514824e-07, |
| "logits/chosen": 15.535064697265625, |
| "logits/rejected": 7.618639945983887, |
| "logps/chosen": -29.554052352905273, |
| "logps/rejected": -38.369998931884766, |
| "loss": 0.3967, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 0.009946437552571297, |
| "rewards/margins": 1.5956742763519287, |
| "rewards/rejected": -1.5857279300689697, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.17, |
| "grad_norm": 8.30487385022327, |
| "learning_rate": 5.660377358490566e-07, |
| "logits/chosen": 17.70827865600586, |
| "logits/rejected": 9.438637733459473, |
| "logps/chosen": -25.7026309967041, |
| "logps/rejected": -35.64281463623047, |
| "loss": 0.4122, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.03218810260295868, |
| "rewards/margins": 1.5774122476577759, |
| "rewards/rejected": -1.5452241897583008, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.18, |
| "grad_norm": 10.394921980828073, |
| "learning_rate": 5.929919137466307e-07, |
| "logits/chosen": 14.426767349243164, |
| "logits/rejected": 6.211413383483887, |
| "logps/chosen": -30.019893646240234, |
| "logps/rejected": -32.835453033447266, |
| "loss": 0.3722, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": 0.03471699357032776, |
| "rewards/margins": 1.463291049003601, |
| "rewards/rejected": -1.4285740852355957, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 6.375611768144224, |
| "learning_rate": 6.199460916442049e-07, |
| "logits/chosen": 13.590364456176758, |
| "logits/rejected": 5.274600028991699, |
| "logps/chosen": -32.46255111694336, |
| "logps/rejected": -35.932701110839844, |
| "loss": 0.3672, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": 0.015460697934031487, |
| "rewards/margins": 1.697251558303833, |
| "rewards/rejected": -1.6817909479141235, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.19, |
| "grad_norm": 7.0998588698532, |
| "learning_rate": 6.469002695417789e-07, |
| "logits/chosen": 15.448835372924805, |
| "logits/rejected": 7.325920104980469, |
| "logps/chosen": -31.609811782836914, |
| "logps/rejected": -36.574485778808594, |
| "loss": 0.3442, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.02785242535173893, |
| "rewards/margins": 1.6729406118392944, |
| "rewards/rejected": -1.7007930278778076, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 11.112020896957825, |
| "learning_rate": 6.738544474393531e-07, |
| "logits/chosen": 11.986245155334473, |
| "logits/rejected": 5.750561714172363, |
| "logps/chosen": -25.793066024780273, |
| "logps/rejected": -41.1673469543457, |
| "loss": 0.3791, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.025392189621925354, |
| "rewards/margins": 1.8996098041534424, |
| "rewards/rejected": -1.925002098083496, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.21, |
| "grad_norm": 8.28256712940868, |
| "learning_rate": 7.008086253369272e-07, |
| "logits/chosen": 12.781267166137695, |
| "logits/rejected": 5.152982234954834, |
| "logps/chosen": -39.48822021484375, |
| "logps/rejected": -41.072654724121094, |
| "loss": 0.3437, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.0019520943751558661, |
| "rewards/margins": 2.153017520904541, |
| "rewards/rejected": -2.1549696922302246, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.22, |
| "grad_norm": 7.89202268057561, |
| "learning_rate": 7.277628032345014e-07, |
| "logits/chosen": 9.854484558105469, |
| "logits/rejected": 4.801867485046387, |
| "logps/chosen": -28.593700408935547, |
| "logps/rejected": -42.71391677856445, |
| "loss": 0.3319, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.09453903138637543, |
| "rewards/margins": 2.2262840270996094, |
| "rewards/rejected": -2.3208229541778564, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 8.826305045755074, |
| "learning_rate": 7.547169811320754e-07, |
| "logits/chosen": 9.630620956420898, |
| "logits/rejected": 3.555039167404175, |
| "logps/chosen": -33.346229553222656, |
| "logps/rejected": -44.17544937133789, |
| "loss": 0.3312, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.12214686721563339, |
| "rewards/margins": 2.3439300060272217, |
| "rewards/rejected": -2.4660770893096924, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.23, |
| "grad_norm": 8.653546928358537, |
| "learning_rate": 7.816711590296495e-07, |
| "logits/chosen": 10.433551788330078, |
| "logits/rejected": 3.2461979389190674, |
| "logps/chosen": -39.67103576660156, |
| "logps/rejected": -42.812984466552734, |
| "loss": 0.3321, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.12133828550577164, |
| "rewards/margins": 2.284976005554199, |
| "rewards/rejected": -2.4063143730163574, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 8.824713073330798, |
| "learning_rate": 8.086253369272237e-07, |
| "logits/chosen": 11.353474617004395, |
| "logits/rejected": 4.719082832336426, |
| "logps/chosen": -35.91315841674805, |
| "logps/rejected": -44.575462341308594, |
| "loss": 0.3245, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.12168803066015244, |
| "rewards/margins": 2.267569065093994, |
| "rewards/rejected": -2.3892571926116943, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 9.66741690133772, |
| "learning_rate": 8.355795148247978e-07, |
| "logits/chosen": 10.735493659973145, |
| "logits/rejected": 4.588931560516357, |
| "logps/chosen": -26.46687889099121, |
| "logps/rejected": -49.5245246887207, |
| "loss": 0.2771, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.17442038655281067, |
| "rewards/margins": 2.738029956817627, |
| "rewards/rejected": -2.9124507904052734, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.26, |
| "grad_norm": 9.315896892682426, |
| "learning_rate": 8.62533692722372e-07, |
| "logits/chosen": 10.167337417602539, |
| "logits/rejected": 4.709428787231445, |
| "logps/chosen": -27.1495304107666, |
| "logps/rejected": -42.73828125, |
| "loss": 0.3354, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.22090284526348114, |
| "rewards/margins": 2.134673833847046, |
| "rewards/rejected": -2.355576515197754, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.27, |
| "grad_norm": 13.726622644360368, |
| "learning_rate": 8.89487870619946e-07, |
| "logits/chosen": 10.056068420410156, |
| "logits/rejected": 3.6924197673797607, |
| "logps/chosen": -29.271936416625977, |
| "logps/rejected": -52.2385368347168, |
| "loss": 0.2851, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.25930890440940857, |
| "rewards/margins": 2.7558753490448, |
| "rewards/rejected": -3.0151844024658203, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 9.719990562865625, |
| "learning_rate": 9.164420485175202e-07, |
| "logits/chosen": 10.418781280517578, |
| "logits/rejected": 3.512908935546875, |
| "logps/chosen": -35.38787841796875, |
| "logps/rejected": -54.7783203125, |
| "loss": 0.3212, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.13537803292274475, |
| "rewards/margins": 3.10463285446167, |
| "rewards/rejected": -3.2400107383728027, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 7.713291117111928, |
| "learning_rate": 9.433962264150943e-07, |
| "logits/chosen": 11.484260559082031, |
| "logits/rejected": 2.651977300643921, |
| "logps/chosen": -40.273658752441406, |
| "logps/rejected": -58.427894592285156, |
| "loss": 0.3083, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.1973624974489212, |
| "rewards/margins": 3.379941940307617, |
| "rewards/rejected": -3.5773043632507324, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.29, |
| "grad_norm": 8.823882065396496, |
| "learning_rate": 9.703504043126684e-07, |
| "logits/chosen": 9.80009937286377, |
| "logits/rejected": 3.363906145095825, |
| "logps/chosen": -35.430931091308594, |
| "logps/rejected": -53.07218551635742, |
| "loss": 0.2974, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.268148809671402, |
| "rewards/margins": 2.871591567993164, |
| "rewards/rejected": -3.1397411823272705, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3, |
| "grad_norm": 8.758247083429891, |
| "learning_rate": 9.973045822102425e-07, |
| "logits/chosen": 8.435894966125488, |
| "logits/rejected": 2.97558331489563, |
| "logps/chosen": -30.389368057250977, |
| "logps/rejected": -57.72784423828125, |
| "loss": 0.2906, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.27489131689071655, |
| "rewards/margins": 3.4869256019592285, |
| "rewards/rejected": -3.761817216873169, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.31, |
| "grad_norm": 7.631748999946755, |
| "learning_rate": 9.973005398920215e-07, |
| "logits/chosen": 9.810181617736816, |
| "logits/rejected": 2.715953826904297, |
| "logps/chosen": -32.38187789916992, |
| "logps/rejected": -59.24934005737305, |
| "loss": 0.2515, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.26635074615478516, |
| "rewards/margins": 3.6957993507385254, |
| "rewards/rejected": -3.9621498584747314, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.640014523194331, |
| "learning_rate": 9.943011397720455e-07, |
| "logits/chosen": 6.572465419769287, |
| "logits/rejected": 1.5894476175308228, |
| "logps/chosen": -35.82884216308594, |
| "logps/rejected": -62.335121154785156, |
| "loss": 0.2944, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.3674604296684265, |
| "rewards/margins": 3.553370952606201, |
| "rewards/rejected": -3.9208312034606934, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 8.447688684618234, |
| "learning_rate": 9.913017396520695e-07, |
| "logits/chosen": 8.614189147949219, |
| "logits/rejected": 2.1055407524108887, |
| "logps/chosen": -36.229373931884766, |
| "logps/rejected": -67.00838470458984, |
| "loss": 0.2475, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.36652085185050964, |
| "rewards/margins": 4.133388996124268, |
| "rewards/rejected": -4.499909400939941, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.33, |
| "grad_norm": 10.308333518649784, |
| "learning_rate": 9.883023395320934e-07, |
| "logits/chosen": 7.3046979904174805, |
| "logits/rejected": 1.8474470376968384, |
| "logps/chosen": -33.466575622558594, |
| "logps/rejected": -54.38008499145508, |
| "loss": 0.2786, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.4685148298740387, |
| "rewards/margins": 2.9815940856933594, |
| "rewards/rejected": -3.450108766555786, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.34, |
| "grad_norm": 11.066433923488011, |
| "learning_rate": 9.853029394121174e-07, |
| "logits/chosen": 8.033042907714844, |
| "logits/rejected": 1.4042919874191284, |
| "logps/chosen": -36.41872024536133, |
| "logps/rejected": -62.8419303894043, |
| "loss": 0.2887, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.3985442519187927, |
| "rewards/margins": 3.7590441703796387, |
| "rewards/rejected": -4.157588481903076, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.35, |
| "grad_norm": 9.436834774341992, |
| "learning_rate": 9.823035392921416e-07, |
| "logits/chosen": 8.418048858642578, |
| "logits/rejected": 2.3021135330200195, |
| "logps/chosen": -32.22361755371094, |
| "logps/rejected": -59.6855583190918, |
| "loss": 0.2838, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.2626524865627289, |
| "rewards/margins": 3.715350389480591, |
| "rewards/rejected": -3.9780030250549316, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 8.228341584616038, |
| "learning_rate": 9.793041391721656e-07, |
| "logits/chosen": 6.120682716369629, |
| "logits/rejected": 1.3484184741973877, |
| "logps/chosen": -33.420013427734375, |
| "logps/rejected": -50.40047836303711, |
| "loss": 0.2841, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.2531784176826477, |
| "rewards/margins": 3.000014305114746, |
| "rewards/rejected": -3.253192901611328, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 10.32480072105457, |
| "learning_rate": 9.763047390521895e-07, |
| "logits/chosen": 5.195191383361816, |
| "logits/rejected": 1.293041467666626, |
| "logps/chosen": -32.28490447998047, |
| "logps/rejected": -65.37628173828125, |
| "loss": 0.2243, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.3500588834285736, |
| "rewards/margins": 4.006264686584473, |
| "rewards/rejected": -4.356323719024658, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.37, |
| "grad_norm": 9.684893504642167, |
| "learning_rate": 9.733053389322135e-07, |
| "logits/chosen": 6.817592620849609, |
| "logits/rejected": 1.8375227451324463, |
| "logps/chosen": -32.0588264465332, |
| "logps/rejected": -58.29685592651367, |
| "loss": 0.2894, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.3759368360042572, |
| "rewards/margins": 3.311121702194214, |
| "rewards/rejected": -3.687058687210083, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 10.551148814526442, |
| "learning_rate": 9.703059388122375e-07, |
| "logits/chosen": 7.666343688964844, |
| "logits/rejected": 0.895540714263916, |
| "logps/chosen": -40.66641616821289, |
| "logps/rejected": -62.7708625793457, |
| "loss": 0.266, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.376313179731369, |
| "rewards/margins": 3.9723620414733887, |
| "rewards/rejected": -4.34867525100708, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.39, |
| "grad_norm": 13.955838544627463, |
| "learning_rate": 9.673065386922614e-07, |
| "logits/chosen": 7.45211935043335, |
| "logits/rejected": 1.4658912420272827, |
| "logps/chosen": -37.92814254760742, |
| "logps/rejected": -60.00164031982422, |
| "loss": 0.2695, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.5523335337638855, |
| "rewards/margins": 3.546916961669922, |
| "rewards/rejected": -4.099250793457031, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 6.509468723388655, |
| "learning_rate": 9.643071385722856e-07, |
| "logits/chosen": 6.468374729156494, |
| "logits/rejected": 1.589951992034912, |
| "logps/chosen": -31.105602264404297, |
| "logps/rejected": -68.32657623291016, |
| "loss": 0.233, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -0.3899196684360504, |
| "rewards/margins": 4.078752040863037, |
| "rewards/rejected": -4.4686713218688965, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 8.461512679967555, |
| "learning_rate": 9.613077384523096e-07, |
| "logits/chosen": 5.951247215270996, |
| "logits/rejected": 0.2273063212633133, |
| "logps/chosen": -40.5043830871582, |
| "logps/rejected": -66.83312225341797, |
| "loss": 0.2335, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.46840596199035645, |
| "rewards/margins": 4.177884578704834, |
| "rewards/rejected": -4.6462907791137695, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.41, |
| "grad_norm": 11.967462722409747, |
| "learning_rate": 9.583083383323336e-07, |
| "logits/chosen": 7.466969966888428, |
| "logits/rejected": 2.0265185832977295, |
| "logps/chosen": -36.37960433959961, |
| "logps/rejected": -62.22391891479492, |
| "loss": 0.3024, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.5502637624740601, |
| "rewards/margins": 3.5663585662841797, |
| "rewards/rejected": -4.116622447967529, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.42, |
| "grad_norm": 13.48790921542868, |
| "learning_rate": 9.553089382123575e-07, |
| "logits/chosen": 7.438709259033203, |
| "logits/rejected": 2.2369534969329834, |
| "logps/chosen": -31.806324005126953, |
| "logps/rejected": -62.761444091796875, |
| "loss": 0.2708, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.3073802590370178, |
| "rewards/margins": 3.9625587463378906, |
| "rewards/rejected": -4.269938945770264, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.43, |
| "grad_norm": 8.131965384825092, |
| "learning_rate": 9.523095380923815e-07, |
| "logits/chosen": 6.172783851623535, |
| "logits/rejected": 1.3336482048034668, |
| "logps/chosen": -38.68606948852539, |
| "logps/rejected": -61.667015075683594, |
| "loss": 0.2684, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.5361462831497192, |
| "rewards/margins": 3.6847171783447266, |
| "rewards/rejected": -4.220863342285156, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 9.497892720299374, |
| "learning_rate": 9.493101379724055e-07, |
| "logits/chosen": 6.613364219665527, |
| "logits/rejected": 0.7903220057487488, |
| "logps/chosen": -38.494571685791016, |
| "logps/rejected": -69.2742919921875, |
| "loss": 0.2634, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.45714178681373596, |
| "rewards/margins": 4.283186912536621, |
| "rewards/rejected": -4.740328788757324, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 9.301689454835405, |
| "learning_rate": 9.463107378524294e-07, |
| "logits/chosen": 7.8855462074279785, |
| "logits/rejected": 1.7080109119415283, |
| "logps/chosen": -46.07327651977539, |
| "logps/rejected": -69.71966552734375, |
| "loss": 0.2601, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.4959110617637634, |
| "rewards/margins": 4.211344242095947, |
| "rewards/rejected": -4.7072553634643555, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.45, |
| "grad_norm": 8.870511185695392, |
| "learning_rate": 9.433113377324534e-07, |
| "logits/chosen": 4.9687042236328125, |
| "logits/rejected": 0.5882563591003418, |
| "logps/chosen": -35.23982620239258, |
| "logps/rejected": -66.53153228759766, |
| "loss": 0.2627, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6176945567131042, |
| "rewards/margins": 3.988445281982422, |
| "rewards/rejected": -4.606139659881592, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.46, |
| "grad_norm": 9.088680603264963, |
| "learning_rate": 9.403119376124775e-07, |
| "logits/chosen": 4.735517501831055, |
| "logits/rejected": 0.7597037553787231, |
| "logps/chosen": -32.378013610839844, |
| "logps/rejected": -61.3135986328125, |
| "loss": 0.2795, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.517710268497467, |
| "rewards/margins": 3.588815212249756, |
| "rewards/rejected": -4.106525421142578, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.47, |
| "grad_norm": 9.322483216606127, |
| "learning_rate": 9.373125374925015e-07, |
| "logits/chosen": 5.155986309051514, |
| "logits/rejected": 0.859477698802948, |
| "logps/chosen": -34.49011993408203, |
| "logps/rejected": -67.98521423339844, |
| "loss": 0.2787, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.48589468002319336, |
| "rewards/margins": 4.109589576721191, |
| "rewards/rejected": -4.595483779907227, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 8.369998864435798, |
| "learning_rate": 9.343131373725255e-07, |
| "logits/chosen": 6.183840751647949, |
| "logits/rejected": 0.43990451097488403, |
| "logps/chosen": -37.519309997558594, |
| "logps/rejected": -74.74612426757812, |
| "loss": 0.234, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.5745238065719604, |
| "rewards/margins": 4.902890205383301, |
| "rewards/rejected": -5.477413654327393, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 9.01122070108579, |
| "learning_rate": 9.313137372525495e-07, |
| "logits/chosen": 6.226532936096191, |
| "logits/rejected": 1.1327306032180786, |
| "logps/chosen": -43.671016693115234, |
| "logps/rejected": -65.06999206542969, |
| "loss": 0.2639, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.5650517344474792, |
| "rewards/margins": 3.8689913749694824, |
| "rewards/rejected": -4.434042930603027, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.49, |
| "grad_norm": 10.53898233882828, |
| "learning_rate": 9.283143371325735e-07, |
| "logits/chosen": 7.082172393798828, |
| "logits/rejected": 1.4899251461029053, |
| "logps/chosen": -39.97394561767578, |
| "logps/rejected": -67.15525817871094, |
| "loss": 0.2816, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.5089329481124878, |
| "rewards/margins": 4.089753150939941, |
| "rewards/rejected": -4.598686695098877, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 10.507702524930748, |
| "learning_rate": 9.253149370125974e-07, |
| "logits/chosen": 6.221233367919922, |
| "logits/rejected": 1.4244422912597656, |
| "logps/chosen": -37.602134704589844, |
| "logps/rejected": -63.385986328125, |
| "loss": 0.2623, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.6569489240646362, |
| "rewards/margins": 3.7848668098449707, |
| "rewards/rejected": -4.441815376281738, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.51, |
| "grad_norm": 7.547301709428263, |
| "learning_rate": 9.223155368926214e-07, |
| "logits/chosen": 5.5077009201049805, |
| "logits/rejected": 0.2255195677280426, |
| "logps/chosen": -44.82948684692383, |
| "logps/rejected": -70.71250915527344, |
| "loss": 0.2411, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.7187212705612183, |
| "rewards/margins": 4.245903491973877, |
| "rewards/rejected": -4.964625358581543, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 6.997225295696728, |
| "learning_rate": 9.193161367726454e-07, |
| "logits/chosen": 5.982936859130859, |
| "logits/rejected": 0.9599858522415161, |
| "logps/chosen": -37.054931640625, |
| "logps/rejected": -68.97079467773438, |
| "loss": 0.2675, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.6195434331893921, |
| "rewards/margins": 4.198363304138184, |
| "rewards/rejected": -4.817906379699707, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 12.60027391679346, |
| "learning_rate": 9.163167366526694e-07, |
| "logits/chosen": 3.3835721015930176, |
| "logits/rejected": -0.19015471637248993, |
| "logps/chosen": -28.28788185119629, |
| "logps/rejected": -66.44264221191406, |
| "loss": 0.2576, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.6566665768623352, |
| "rewards/margins": 4.069520473480225, |
| "rewards/rejected": -4.726187229156494, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.53, |
| "grad_norm": 8.897497961790428, |
| "learning_rate": 9.133173365326934e-07, |
| "logits/chosen": 5.087882041931152, |
| "logits/rejected": 0.9116026163101196, |
| "logps/chosen": -33.74671173095703, |
| "logps/rejected": -73.04826354980469, |
| "loss": 0.2648, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.6395894289016724, |
| "rewards/margins": 4.4610419273376465, |
| "rewards/rejected": -5.100631237030029, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.54, |
| "grad_norm": 10.358901625732027, |
| "learning_rate": 9.103179364127174e-07, |
| "logits/chosen": 5.677692413330078, |
| "logits/rejected": 0.2521089017391205, |
| "logps/chosen": -39.447689056396484, |
| "logps/rejected": -75.43064880371094, |
| "loss": 0.2183, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.6245348453521729, |
| "rewards/margins": 4.826773643493652, |
| "rewards/rejected": -5.451308250427246, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.55, |
| "grad_norm": 13.919487286435421, |
| "learning_rate": 9.073185362927415e-07, |
| "logits/chosen": 6.0167036056518555, |
| "logits/rejected": 0.1550506204366684, |
| "logps/chosen": -33.495323181152344, |
| "logps/rejected": -73.1063461303711, |
| "loss": 0.2139, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.6048420667648315, |
| "rewards/margins": 4.695935249328613, |
| "rewards/rejected": -5.300777435302734, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 9.811099990146761, |
| "learning_rate": 9.043191361727654e-07, |
| "logits/chosen": 5.033354759216309, |
| "logits/rejected": 0.25484520196914673, |
| "logps/chosen": -39.27431106567383, |
| "logps/rejected": -71.24162292480469, |
| "loss": 0.2634, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.5098114013671875, |
| "rewards/margins": 4.455938339233398, |
| "rewards/rejected": -4.965749740600586, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 8.017368985065696, |
| "learning_rate": 9.013197360527894e-07, |
| "logits/chosen": 5.161509990692139, |
| "logits/rejected": -0.10750408470630646, |
| "logps/chosen": -33.15007781982422, |
| "logps/rejected": -68.93727111816406, |
| "loss": 0.2397, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6771873235702515, |
| "rewards/margins": 4.225132942199707, |
| "rewards/rejected": -4.90231990814209, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.57, |
| "grad_norm": 9.515143348659938, |
| "learning_rate": 8.983203359328135e-07, |
| "logits/chosen": 6.287454605102539, |
| "logits/rejected": 0.1438266783952713, |
| "logps/chosen": -35.95096206665039, |
| "logps/rejected": -72.1221923828125, |
| "loss": 0.2619, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.5426121950149536, |
| "rewards/margins": 4.689238548278809, |
| "rewards/rejected": -5.231850624084473, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.58, |
| "grad_norm": 8.853639892057634, |
| "learning_rate": 8.953209358128374e-07, |
| "logits/chosen": 6.399009704589844, |
| "logits/rejected": 0.3351520895957947, |
| "logps/chosen": -42.617916107177734, |
| "logps/rejected": -77.32844543457031, |
| "loss": 0.2486, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.6509742736816406, |
| "rewards/margins": 4.959528923034668, |
| "rewards/rejected": -5.610503673553467, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.59, |
| "grad_norm": 6.236836391613244, |
| "learning_rate": 8.923215356928614e-07, |
| "logits/chosen": 4.475239276885986, |
| "logits/rejected": -0.2508452832698822, |
| "logps/chosen": -37.084800720214844, |
| "logps/rejected": -71.18461608886719, |
| "loss": 0.2146, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.6816585063934326, |
| "rewards/margins": 4.352997779846191, |
| "rewards/rejected": -5.034656047821045, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 11.394264503228657, |
| "learning_rate": 8.893221355728854e-07, |
| "logits/chosen": 2.8366403579711914, |
| "logits/rejected": -0.25868746638298035, |
| "logps/chosen": -30.913869857788086, |
| "logps/rejected": -65.82793426513672, |
| "loss": 0.2565, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.47125107049942017, |
| "rewards/margins": 4.218214988708496, |
| "rewards/rejected": -4.6894659996032715, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.61, |
| "grad_norm": 9.63703798333239, |
| "learning_rate": 8.863227354529093e-07, |
| "logits/chosen": 5.555853366851807, |
| "logits/rejected": 0.24771659076213837, |
| "logps/chosen": -42.00291061401367, |
| "logps/rejected": -71.82039642333984, |
| "loss": 0.2199, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6926873922348022, |
| "rewards/margins": 4.374499320983887, |
| "rewards/rejected": -5.0671868324279785, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 12.475179147108308, |
| "learning_rate": 8.833233353329333e-07, |
| "logits/chosen": 4.9750657081604, |
| "logits/rejected": 0.9217138290405273, |
| "logps/chosen": -32.88280487060547, |
| "logps/rejected": -68.7645034790039, |
| "loss": 0.2622, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -0.6225011944770813, |
| "rewards/margins": 4.266197681427002, |
| "rewards/rejected": -4.888699054718018, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 13.961604966562444, |
| "learning_rate": 8.803239352129574e-07, |
| "logits/chosen": 4.99851655960083, |
| "logits/rejected": 0.3421913683414459, |
| "logps/chosen": -39.43068313598633, |
| "logps/rejected": -63.01778030395508, |
| "loss": 0.2288, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -0.5820320844650269, |
| "rewards/margins": 3.9034628868103027, |
| "rewards/rejected": -4.485495090484619, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.63, |
| "grad_norm": 8.898695794199242, |
| "learning_rate": 8.773245350929814e-07, |
| "logits/chosen": 4.911359786987305, |
| "logits/rejected": -0.29061204195022583, |
| "logps/chosen": -38.77482986450195, |
| "logps/rejected": -71.60365295410156, |
| "loss": 0.2317, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.6680704951286316, |
| "rewards/margins": 4.3021674156188965, |
| "rewards/rejected": -4.970237731933594, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 7.082466242287932, |
| "learning_rate": 8.743251349730053e-07, |
| "logits/chosen": 8.117277145385742, |
| "logits/rejected": 0.22241690754890442, |
| "logps/chosen": -43.375308990478516, |
| "logps/rejected": -83.64913177490234, |
| "loss": 0.2283, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.4950665533542633, |
| "rewards/margins": 5.797509670257568, |
| "rewards/rejected": -6.292576789855957, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.65, |
| "grad_norm": 11.152027144249061, |
| "learning_rate": 8.713257348530294e-07, |
| "logits/chosen": 3.5433716773986816, |
| "logits/rejected": 0.14973898231983185, |
| "logps/chosen": -37.43271255493164, |
| "logps/rejected": -64.81431579589844, |
| "loss": 0.2465, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.6468411087989807, |
| "rewards/margins": 3.858449935913086, |
| "rewards/rejected": -4.505290508270264, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 10.057327590247457, |
| "learning_rate": 8.683263347330534e-07, |
| "logits/chosen": 4.4199323654174805, |
| "logits/rejected": -0.7671071887016296, |
| "logps/chosen": -41.11504364013672, |
| "logps/rejected": -84.44219970703125, |
| "loss": 0.2022, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.7142581343650818, |
| "rewards/margins": 5.4674906730651855, |
| "rewards/rejected": -6.181748390197754, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.66, |
| "grad_norm": 8.438998651633067, |
| "learning_rate": 8.653269346130773e-07, |
| "logits/chosen": 5.182823181152344, |
| "logits/rejected": 0.20817065238952637, |
| "logps/chosen": -36.99726486206055, |
| "logps/rejected": -67.30989074707031, |
| "loss": 0.2377, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.5765316486358643, |
| "rewards/margins": 4.2105937004089355, |
| "rewards/rejected": -4.787125587463379, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.67, |
| "grad_norm": 11.4901243014845, |
| "learning_rate": 8.623275344931013e-07, |
| "logits/chosen": 5.666651725769043, |
| "logits/rejected": 0.33456581830978394, |
| "logps/chosen": -39.16795349121094, |
| "logps/rejected": -85.42790222167969, |
| "loss": 0.2227, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -0.4975081980228424, |
| "rewards/margins": 5.7258501052856445, |
| "rewards/rejected": -6.223358154296875, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 19.82780237798911, |
| "learning_rate": 8.593281343731254e-07, |
| "logits/chosen": 4.063232898712158, |
| "logits/rejected": -0.5040629506111145, |
| "logps/chosen": -34.486968994140625, |
| "logps/rejected": -70.23979187011719, |
| "loss": 0.2577, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.6543490886688232, |
| "rewards/margins": 4.389010429382324, |
| "rewards/rejected": -5.043359279632568, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.69, |
| "grad_norm": 13.135947133773865, |
| "learning_rate": 8.563287342531494e-07, |
| "logits/chosen": 4.684737682342529, |
| "logits/rejected": 0.15397313237190247, |
| "logps/chosen": -44.84247970581055, |
| "logps/rejected": -81.59874725341797, |
| "loss": 0.2415, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.934431254863739, |
| "rewards/margins": 4.985320091247559, |
| "rewards/rejected": -5.9197516441345215, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 9.942117794842815, |
| "learning_rate": 8.533293341331733e-07, |
| "logits/chosen": 5.942558765411377, |
| "logits/rejected": -0.4634874761104584, |
| "logps/chosen": -40.689144134521484, |
| "logps/rejected": -85.91549682617188, |
| "loss": 0.2115, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.8215259313583374, |
| "rewards/margins": 5.591064453125, |
| "rewards/rejected": -6.412590026855469, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7, |
| "grad_norm": 10.317389757107083, |
| "learning_rate": 8.503299340131973e-07, |
| "logits/chosen": 6.38779354095459, |
| "logits/rejected": -0.08178432285785675, |
| "logps/chosen": -44.51734161376953, |
| "logps/rejected": -84.4544677734375, |
| "loss": 0.235, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6095073223114014, |
| "rewards/margins": 5.518308639526367, |
| "rewards/rejected": -6.127816200256348, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.71, |
| "grad_norm": 10.455585529285335, |
| "learning_rate": 8.473305338932213e-07, |
| "logits/chosen": 4.026412010192871, |
| "logits/rejected": -0.11150656640529633, |
| "logps/chosen": -41.58991241455078, |
| "logps/rejected": -79.89984130859375, |
| "loss": 0.2029, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.7930614948272705, |
| "rewards/margins": 5.062989234924316, |
| "rewards/rejected": -5.856051445007324, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 12.413727527066243, |
| "learning_rate": 8.443311337732452e-07, |
| "logits/chosen": 3.686732769012451, |
| "logits/rejected": -0.9453102350234985, |
| "logps/chosen": -36.16538619995117, |
| "logps/rejected": -66.45039367675781, |
| "loss": 0.2538, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.6018454432487488, |
| "rewards/margins": 4.1831464767456055, |
| "rewards/rejected": -4.784992218017578, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.73, |
| "grad_norm": 5.779220350203222, |
| "learning_rate": 8.413317336532693e-07, |
| "logits/chosen": 3.2173056602478027, |
| "logits/rejected": -0.6864817142486572, |
| "logps/chosen": -45.675899505615234, |
| "logps/rejected": -79.54066467285156, |
| "loss": 0.2205, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.37534499168396, |
| "rewards/margins": 4.585756301879883, |
| "rewards/rejected": -5.961101055145264, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 8.005991362378799, |
| "learning_rate": 8.383323335332934e-07, |
| "logits/chosen": 4.786983489990234, |
| "logits/rejected": 0.6237784624099731, |
| "logps/chosen": -35.7918815612793, |
| "logps/rejected": -78.17144012451172, |
| "loss": 0.2198, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.5418484210968018, |
| "rewards/margins": 5.177511215209961, |
| "rewards/rejected": -5.719359397888184, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.74, |
| "grad_norm": 10.754977883833703, |
| "learning_rate": 8.353329334133174e-07, |
| "logits/chosen": 4.775103569030762, |
| "logits/rejected": -0.6356438398361206, |
| "logps/chosen": -38.72078323364258, |
| "logps/rejected": -76.97815704345703, |
| "loss": 0.2345, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.5932806730270386, |
| "rewards/margins": 5.025919437408447, |
| "rewards/rejected": -5.619199752807617, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 8.370671264923958, |
| "learning_rate": 8.323335332933413e-07, |
| "logits/chosen": 3.7196967601776123, |
| "logits/rejected": -0.6006534099578857, |
| "logps/chosen": -37.750770568847656, |
| "logps/rejected": -75.3314208984375, |
| "loss": 0.239, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6637129783630371, |
| "rewards/margins": 4.798027992248535, |
| "rewards/rejected": -5.4617414474487305, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 6.856873898439941, |
| "learning_rate": 8.293341331733653e-07, |
| "logits/chosen": 4.082217216491699, |
| "logits/rejected": -0.9965828657150269, |
| "logps/chosen": -37.654178619384766, |
| "logps/rejected": -84.30792236328125, |
| "loss": 0.2356, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.9460653066635132, |
| "rewards/margins": 5.248031139373779, |
| "rewards/rejected": -6.194096088409424, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.77, |
| "grad_norm": 12.985186244804439, |
| "learning_rate": 8.263347330533893e-07, |
| "logits/chosen": 3.5539650917053223, |
| "logits/rejected": -1.1321687698364258, |
| "logps/chosen": -38.285552978515625, |
| "logps/rejected": -74.53987121582031, |
| "loss": 0.2505, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.0940616130828857, |
| "rewards/margins": 4.375874996185303, |
| "rewards/rejected": -5.469937324523926, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.78, |
| "grad_norm": 10.435750083651422, |
| "learning_rate": 8.233353329334133e-07, |
| "logits/chosen": 5.076735973358154, |
| "logits/rejected": 0.126987487077713, |
| "logps/chosen": -34.09984588623047, |
| "logps/rejected": -71.26011657714844, |
| "loss": 0.2292, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.7402558922767639, |
| "rewards/margins": 4.3730621337890625, |
| "rewards/rejected": -5.11331844329834, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 10.13777044080165, |
| "learning_rate": 8.203359328134373e-07, |
| "logits/chosen": 7.634838104248047, |
| "logits/rejected": 1.4603595733642578, |
| "logps/chosen": -40.903377532958984, |
| "logps/rejected": -76.29527282714844, |
| "loss": 0.1999, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.5216394066810608, |
| "rewards/margins": 5.088165283203125, |
| "rewards/rejected": -5.609805107116699, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.79, |
| "grad_norm": 9.326208459350674, |
| "learning_rate": 8.173365326934613e-07, |
| "logits/chosen": 6.336162567138672, |
| "logits/rejected": 0.3762982487678528, |
| "logps/chosen": -33.36209487915039, |
| "logps/rejected": -74.17536926269531, |
| "loss": 0.2128, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.5508583784103394, |
| "rewards/margins": 4.918101787567139, |
| "rewards/rejected": -5.468959808349609, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 8.983054286726517, |
| "learning_rate": 8.143371325734852e-07, |
| "logits/chosen": 4.599123001098633, |
| "logits/rejected": -0.9218250513076782, |
| "logps/chosen": -43.528175354003906, |
| "logps/rejected": -74.83673095703125, |
| "loss": 0.196, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.7882202863693237, |
| "rewards/margins": 4.805144309997559, |
| "rewards/rejected": -5.593365669250488, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.81, |
| "grad_norm": 7.586393164977657, |
| "learning_rate": 8.113377324535092e-07, |
| "logits/chosen": 5.207022666931152, |
| "logits/rejected": -0.7721199989318848, |
| "logps/chosen": -42.59235382080078, |
| "logps/rejected": -85.05528259277344, |
| "loss": 0.2199, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.7499576807022095, |
| "rewards/margins": 5.488531112670898, |
| "rewards/rejected": -6.238488674163818, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.82, |
| "grad_norm": 9.992746900458588, |
| "learning_rate": 8.083383323335332e-07, |
| "logits/chosen": 3.9228744506835938, |
| "logits/rejected": -0.4620266854763031, |
| "logps/chosen": -37.23911666870117, |
| "logps/rejected": -78.92009735107422, |
| "loss": 0.2303, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.8569334745407104, |
| "rewards/margins": 4.876101970672607, |
| "rewards/rejected": -5.733035087585449, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 10.673195989249953, |
| "learning_rate": 8.053389322135572e-07, |
| "logits/chosen": 5.194386959075928, |
| "logits/rejected": -0.3306584060192108, |
| "logps/chosen": -37.87696075439453, |
| "logps/rejected": -72.78455352783203, |
| "loss": 0.2187, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.6252318024635315, |
| "rewards/margins": 4.7294392585754395, |
| "rewards/rejected": -5.354671478271484, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.83, |
| "grad_norm": 14.803696255288303, |
| "learning_rate": 8.023395320935813e-07, |
| "logits/chosen": 3.891000270843506, |
| "logits/rejected": -0.9068805575370789, |
| "logps/chosen": -40.6781120300293, |
| "logps/rejected": -81.7871322631836, |
| "loss": 0.1926, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.6426281929016113, |
| "rewards/margins": 5.368206977844238, |
| "rewards/rejected": -6.01083517074585, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 7.25945056094058, |
| "learning_rate": 7.993401319736053e-07, |
| "logits/chosen": 3.9840798377990723, |
| "logits/rejected": -0.24789929389953613, |
| "logps/chosen": -33.29187774658203, |
| "logps/rejected": -69.07560729980469, |
| "loss": 0.2182, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.6687902212142944, |
| "rewards/margins": 4.272780418395996, |
| "rewards/rejected": -4.941570281982422, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.85, |
| "grad_norm": 9.78196474122683, |
| "learning_rate": 7.963407318536293e-07, |
| "logits/chosen": 5.05679988861084, |
| "logits/rejected": 0.055974699556827545, |
| "logps/chosen": -37.91151809692383, |
| "logps/rejected": -80.07110595703125, |
| "loss": 0.2448, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.7068133354187012, |
| "rewards/margins": 5.139575958251953, |
| "rewards/rejected": -5.8463897705078125, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.86, |
| "grad_norm": 8.672914354007535, |
| "learning_rate": 7.933413317336532e-07, |
| "logits/chosen": 5.2890625, |
| "logits/rejected": 0.1258922517299652, |
| "logps/chosen": -37.52037048339844, |
| "logps/rejected": -84.71490478515625, |
| "loss": 0.1865, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.5368348956108093, |
| "rewards/margins": 5.716190814971924, |
| "rewards/rejected": -6.253025531768799, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 9.84113162499674, |
| "learning_rate": 7.903419316136772e-07, |
| "logits/chosen": 6.731717109680176, |
| "logits/rejected": 0.1761070042848587, |
| "logps/chosen": -46.707733154296875, |
| "logps/rejected": -83.32044219970703, |
| "loss": 0.2375, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.8217447996139526, |
| "rewards/margins": 5.439145088195801, |
| "rewards/rejected": -6.260889530181885, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.87, |
| "grad_norm": 10.836630414800007, |
| "learning_rate": 7.873425314937012e-07, |
| "logits/chosen": 5.533178806304932, |
| "logits/rejected": 0.15569272637367249, |
| "logps/chosen": -37.35350799560547, |
| "logps/rejected": -78.94861602783203, |
| "loss": 0.1957, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.545146107673645, |
| "rewards/margins": 5.181343078613281, |
| "rewards/rejected": -5.726489067077637, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 8.602866268180797, |
| "learning_rate": 7.843431313737253e-07, |
| "logits/chosen": 4.896660804748535, |
| "logits/rejected": -0.805033802986145, |
| "logps/chosen": -39.619224548339844, |
| "logps/rejected": -77.04666137695312, |
| "loss": 0.2214, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.47726893424987793, |
| "rewards/margins": 5.242520332336426, |
| "rewards/rejected": -5.719789981842041, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.89, |
| "grad_norm": 6.662307424265464, |
| "learning_rate": 7.813437312537492e-07, |
| "logits/chosen": 4.894705772399902, |
| "logits/rejected": -0.20809988677501678, |
| "logps/chosen": -31.3262996673584, |
| "logps/rejected": -81.197265625, |
| "loss": 0.176, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.6251105666160583, |
| "rewards/margins": 5.416481018066406, |
| "rewards/rejected": -6.041591644287109, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9, |
| "grad_norm": 13.236914879599565, |
| "learning_rate": 7.783443311337732e-07, |
| "logits/chosen": 5.344097137451172, |
| "logits/rejected": 0.4349094331264496, |
| "logps/chosen": -41.192283630371094, |
| "logps/rejected": -73.71807861328125, |
| "loss": 0.2189, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.9201833605766296, |
| "rewards/margins": 4.493067741394043, |
| "rewards/rejected": -5.4132513999938965, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 11.447379083352251, |
| "learning_rate": 7.753449310137972e-07, |
| "logits/chosen": 5.665997505187988, |
| "logits/rejected": -1.3642793893814087, |
| "logps/chosen": -40.07326889038086, |
| "logps/rejected": -86.33983612060547, |
| "loss": 0.1977, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.7224239110946655, |
| "rewards/margins": 5.889608860015869, |
| "rewards/rejected": -6.612032413482666, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.91, |
| "grad_norm": 13.326349881889897, |
| "learning_rate": 7.723455308938211e-07, |
| "logits/chosen": 6.115036487579346, |
| "logits/rejected": -0.4208458960056305, |
| "logps/chosen": -39.56875991821289, |
| "logps/rejected": -86.91368103027344, |
| "loss": 0.195, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.669539213180542, |
| "rewards/margins": 5.865464210510254, |
| "rewards/rejected": -6.535002708435059, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 9.16173651681725, |
| "learning_rate": 7.693461307738452e-07, |
| "logits/chosen": 4.2573957443237305, |
| "logits/rejected": -1.075480580329895, |
| "logps/chosen": -40.893592834472656, |
| "logps/rejected": -78.26148223876953, |
| "loss": 0.2122, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.8861662745475769, |
| "rewards/margins": 5.005410671234131, |
| "rewards/rejected": -5.89157772064209, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.93, |
| "grad_norm": 8.806688464717144, |
| "learning_rate": 7.663467306538693e-07, |
| "logits/chosen": 4.599158763885498, |
| "logits/rejected": -0.7346502542495728, |
| "logps/chosen": -39.177040100097656, |
| "logps/rejected": -87.11119842529297, |
| "loss": 0.2062, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.8978098034858704, |
| "rewards/margins": 5.6298370361328125, |
| "rewards/rejected": -6.527646541595459, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.94, |
| "grad_norm": 25.10098990447531, |
| "learning_rate": 7.633473305338932e-07, |
| "logits/chosen": 3.0874524116516113, |
| "logits/rejected": -0.8507956266403198, |
| "logps/chosen": -35.36151885986328, |
| "logps/rejected": -74.7900390625, |
| "loss": 0.2189, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.6411249041557312, |
| "rewards/margins": 4.888244152069092, |
| "rewards/rejected": -5.529369831085205, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.95, |
| "grad_norm": 10.385014559881746, |
| "learning_rate": 7.603479304139172e-07, |
| "logits/chosen": 3.757722854614258, |
| "logits/rejected": -0.22128507494926453, |
| "logps/chosen": -37.696983337402344, |
| "logps/rejected": -73.14608001708984, |
| "loss": 0.2361, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.9848117828369141, |
| "rewards/margins": 4.35174036026001, |
| "rewards/rejected": -5.336552143096924, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 10.380895353757795, |
| "learning_rate": 7.573485302939412e-07, |
| "logits/chosen": 4.578551769256592, |
| "logits/rejected": -0.7743647694587708, |
| "logps/chosen": -39.889625549316406, |
| "logps/rejected": -82.70549774169922, |
| "loss": 0.2109, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.8430160284042358, |
| "rewards/margins": 5.3374342918396, |
| "rewards/rejected": -6.180450916290283, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 12.046426752286843, |
| "learning_rate": 7.543491301739652e-07, |
| "logits/chosen": 4.2825775146484375, |
| "logits/rejected": -0.97093665599823, |
| "logps/chosen": -40.860382080078125, |
| "logps/rejected": -89.05497741699219, |
| "loss": 0.211, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -0.7804887890815735, |
| "rewards/margins": 5.945776462554932, |
| "rewards/rejected": -6.726265907287598, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.97, |
| "grad_norm": 6.32612572100064, |
| "learning_rate": 7.513497300539891e-07, |
| "logits/chosen": 4.0176568031311035, |
| "logits/rejected": -0.5003149509429932, |
| "logps/chosen": -38.51813507080078, |
| "logps/rejected": -80.8168716430664, |
| "loss": 0.2054, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.5089032649993896, |
| "rewards/margins": 5.293961048126221, |
| "rewards/rejected": -5.802863597869873, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.98, |
| "grad_norm": 5.9075949824072875, |
| "learning_rate": 7.483503299340131e-07, |
| "logits/chosen": 3.3083388805389404, |
| "logits/rejected": -1.7419131994247437, |
| "logps/chosen": -38.396018981933594, |
| "logps/rejected": -82.67097473144531, |
| "loss": 0.2315, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.7572061419487, |
| "rewards/margins": 5.389390468597412, |
| "rewards/rejected": -6.146596431732178, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.99, |
| "grad_norm": 10.20062551022738, |
| "learning_rate": 7.453509298140372e-07, |
| "logits/chosen": 4.2033233642578125, |
| "logits/rejected": -0.8881649971008301, |
| "logps/chosen": -37.607303619384766, |
| "logps/rejected": -84.1115951538086, |
| "loss": 0.1816, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.5505583882331848, |
| "rewards/margins": 5.680405616760254, |
| "rewards/rejected": -6.230964183807373, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 7.177796631658208, |
| "learning_rate": 7.423515296940611e-07, |
| "logits/chosen": 6.914002418518066, |
| "logits/rejected": 0.5874744057655334, |
| "logps/chosen": -40.15135955810547, |
| "logps/rejected": -82.51225280761719, |
| "loss": 0.214, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.8042081594467163, |
| "rewards/margins": 5.3896894454956055, |
| "rewards/rejected": -6.193896770477295, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_logits/chosen": 3.8774516582489014, |
| "eval_logits/rejected": -1.1117931604385376, |
| "eval_logps/chosen": -28.125341415405273, |
| "eval_logps/rejected": -61.24203872680664, |
| "eval_loss": 0.16555103659629822, |
| "eval_rewards/accuracies": 1.0, |
| "eval_rewards/chosen": -0.05321738123893738, |
| "eval_rewards/margins": 4.279094219207764, |
| "eval_rewards/rejected": -4.332311153411865, |
| "eval_runtime": 19.0346, |
| "eval_samples_per_second": 5.254, |
| "eval_steps_per_second": 0.368, |
| "step": 1235 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3705, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|