| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 4479, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033489618218352314, | |
| "grad_norm": 78.33113098144531, | |
| "learning_rate": 2.1875e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.2264862060547, | |
| "logps/rejected": -218.9656982421875, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.4137499928474426, | |
| "rewards/chosen": 0.0005424434202723205, | |
| "rewards/margins": 0.0029623538721352816, | |
| "rewards/rejected": -0.0024199108593165874, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06697923643670463, | |
| "grad_norm": 106.17163848876953, | |
| "learning_rate": 4.419642857142857e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.5259246826172, | |
| "logps/rejected": -224.7578887939453, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.42124998569488525, | |
| "rewards/chosen": -0.005094751715660095, | |
| "rewards/margins": 0.00641661649569869, | |
| "rewards/rejected": -0.011511369608342648, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10046885465505694, | |
| "grad_norm": 86.04861450195312, | |
| "learning_rate": 6.651785714285713e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -165.04095458984375, | |
| "logps/rejected": -219.6518096923828, | |
| "loss": 0.6756, | |
| "rewards/accuracies": 0.5112500190734863, | |
| "rewards/chosen": -0.026584235951304436, | |
| "rewards/margins": 0.03996539115905762, | |
| "rewards/rejected": -0.0665496289730072, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13395847287340926, | |
| "grad_norm": 82.77224731445312, | |
| "learning_rate": 8.88392857142857e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.75872802734375, | |
| "logps/rejected": -223.51528930664062, | |
| "loss": 0.6591, | |
| "rewards/accuracies": 0.5099999904632568, | |
| "rewards/chosen": -0.11609632521867752, | |
| "rewards/margins": 0.10106377303600311, | |
| "rewards/rejected": -0.21716010570526123, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16744809109176156, | |
| "grad_norm": 135.95346069335938, | |
| "learning_rate": 1.1116071428571427e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.67254638671875, | |
| "logps/rejected": -226.42140197753906, | |
| "loss": 0.6295, | |
| "rewards/accuracies": 0.5099999904632568, | |
| "rewards/chosen": -0.18336135149002075, | |
| "rewards/margins": 0.20662552118301392, | |
| "rewards/rejected": -0.38998690247535706, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20093770931011387, | |
| "grad_norm": 89.77359771728516, | |
| "learning_rate": 1.3348214285714285e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.88687133789062, | |
| "logps/rejected": -226.58355712890625, | |
| "loss": 0.602, | |
| "rewards/accuracies": 0.5637500286102295, | |
| "rewards/chosen": -0.29084426164627075, | |
| "rewards/margins": 0.32938891649246216, | |
| "rewards/rejected": -0.6202332377433777, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23442732752846618, | |
| "grad_norm": 89.93605041503906, | |
| "learning_rate": 1.558035714285714e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.608971118927002, | |
| "logps/chosen": -176.1905059814453, | |
| "logps/rejected": -231.0211944580078, | |
| "loss": 0.5782, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.4454282522201538, | |
| "rewards/margins": 0.5162708163261414, | |
| "rewards/rejected": -0.9616988897323608, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2679169457468185, | |
| "grad_norm": 113.58289337158203, | |
| "learning_rate": 1.7812499999999999e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.52401733398438, | |
| "logps/rejected": -236.76588439941406, | |
| "loss": 0.5478, | |
| "rewards/accuracies": 0.6150000095367432, | |
| "rewards/chosen": -0.5549299120903015, | |
| "rewards/margins": 0.8102107048034668, | |
| "rewards/rejected": -1.3651405572891235, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3014065639651708, | |
| "grad_norm": 100.28213500976562, | |
| "learning_rate": 1.999999696300462e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.1260223388672, | |
| "logps/rejected": -235.15631103515625, | |
| "loss": 0.5635, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": -0.48344433307647705, | |
| "rewards/margins": 0.770007848739624, | |
| "rewards/rejected": -1.253452181816101, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.33489618218352313, | |
| "grad_norm": 90.32833099365234, | |
| "learning_rate": 1.999210181452139e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.36907958984375, | |
| "logps/rejected": -232.14285278320312, | |
| "loss": 0.5376, | |
| "rewards/accuracies": 0.6087499856948853, | |
| "rewards/chosen": -0.5261387825012207, | |
| "rewards/margins": 0.8372372984886169, | |
| "rewards/rejected": -1.3633761405944824, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3683858004018754, | |
| "grad_norm": 72.57466125488281, | |
| "learning_rate": 1.996903560165487e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.88233947753906, | |
| "logps/rejected": -242.15728759765625, | |
| "loss": 0.5083, | |
| "rewards/accuracies": 0.6225000023841858, | |
| "rewards/chosen": -0.5493210554122925, | |
| "rewards/margins": 1.0930429697036743, | |
| "rewards/rejected": -1.6423640251159668, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.40187541862022774, | |
| "grad_norm": 47.55934143066406, | |
| "learning_rate": 1.993083334596579e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.1678924560547, | |
| "logps/rejected": -251.43661499023438, | |
| "loss": 0.5193, | |
| "rewards/accuracies": 0.6225000023841858, | |
| "rewards/chosen": -0.7250985503196716, | |
| "rewards/margins": 1.2086968421936035, | |
| "rewards/rejected": -1.9337953329086304, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43536503683858, | |
| "grad_norm": 90.7481460571289, | |
| "learning_rate": 1.987755305015383e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -196.693359375, | |
| "logps/rejected": -247.3010711669922, | |
| "loss": 0.516, | |
| "rewards/accuracies": 0.6137499809265137, | |
| "rewards/chosen": -0.6984607577323914, | |
| "rewards/margins": 1.173628807067871, | |
| "rewards/rejected": -1.8720895051956177, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.46885465505693236, | |
| "grad_norm": 86.08389282226562, | |
| "learning_rate": 1.980927560999178e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.29693603515625, | |
| "logps/rejected": -245.04824829101562, | |
| "loss": 0.5057, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.6868166327476501, | |
| "rewards/margins": 1.367271900177002, | |
| "rewards/rejected": -2.0540883541107178, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5023442732752846, | |
| "grad_norm": 40.12553405761719, | |
| "learning_rate": 1.9726104691501045e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.41378784179688, | |
| "logps/rejected": -240.62547302246094, | |
| "loss": 0.5132, | |
| "rewards/accuracies": 0.5975000262260437, | |
| "rewards/chosen": -0.5570769309997559, | |
| "rewards/margins": 1.2463946342468262, | |
| "rewards/rejected": -1.803471326828003, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.535833891493637, | |
| "grad_norm": 36.09309005737305, | |
| "learning_rate": 1.9628166573554945e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -170.22169494628906, | |
| "logps/rejected": -239.9406280517578, | |
| "loss": 0.4553, | |
| "rewards/accuracies": 0.6449999809265137, | |
| "rewards/chosen": -0.5568282604217529, | |
| "rewards/margins": 1.5600597858428955, | |
| "rewards/rejected": -2.1168878078460693, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5693235097119893, | |
| "grad_norm": 88.8606185913086, | |
| "learning_rate": 1.951560995614879e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.4136199951172, | |
| "logps/rejected": -241.44386291503906, | |
| "loss": 0.4912, | |
| "rewards/accuracies": 0.6175000071525574, | |
| "rewards/chosen": -0.6789398193359375, | |
| "rewards/margins": 1.448940634727478, | |
| "rewards/rejected": -2.127880573272705, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6028131279303416, | |
| "grad_norm": 37.501346588134766, | |
| "learning_rate": 1.9388605734627843e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.4543914794922, | |
| "logps/rejected": -241.45433044433594, | |
| "loss": 0.505, | |
| "rewards/accuracies": 0.6212499737739563, | |
| "rewards/chosen": -0.719947338104248, | |
| "rewards/margins": 1.5332283973693848, | |
| "rewards/rejected": -2.253175735473633, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6363027461486939, | |
| "grad_norm": 58.78173065185547, | |
| "learning_rate": 1.9247346740215936e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.4608612060547, | |
| "logps/rejected": -236.8692169189453, | |
| "loss": 0.4756, | |
| "rewards/accuracies": 0.6274999976158142, | |
| "rewards/chosen": -0.5931037068367004, | |
| "rewards/margins": 1.6174336671829224, | |
| "rewards/rejected": -2.2105374336242676, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6697923643670463, | |
| "grad_norm": 53.627410888671875, | |
| "learning_rate": 1.909204744723877e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -169.64356994628906, | |
| "logps/rejected": -238.07931518554688, | |
| "loss": 0.4699, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.5164381265640259, | |
| "rewards/margins": 1.6023368835449219, | |
| "rewards/rejected": -2.1187753677368164, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7032819825853985, | |
| "grad_norm": 47.64691162109375, | |
| "learning_rate": 1.8922943647486314e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -174.08212280273438, | |
| "logps/rejected": -251.6885223388672, | |
| "loss": 0.4309, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.560505211353302, | |
| "rewards/margins": 1.9433872699737549, | |
| "rewards/rejected": -2.503892421722412, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7367716008037508, | |
| "grad_norm": 58.94224166870117, | |
| "learning_rate": 1.8740292092208816e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -162.09487915039062, | |
| "logps/rejected": -236.79824829101562, | |
| "loss": 0.4293, | |
| "rewards/accuracies": 0.6524999737739563, | |
| "rewards/chosen": -0.6041057705879211, | |
| "rewards/margins": 2.0014426708221436, | |
| "rewards/rejected": -2.60554838180542, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7702612190221031, | |
| "grad_norm": 41.707763671875, | |
| "learning_rate": 1.8544370102289943e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.0761260986328, | |
| "logps/rejected": -240.7725067138672, | |
| "loss": 0.4419, | |
| "rewards/accuracies": 0.6612499952316284, | |
| "rewards/chosen": -0.6522895097732544, | |
| "rewards/margins": 1.7689578533172607, | |
| "rewards/rejected": -2.4212474822998047, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8037508372404555, | |
| "grad_norm": 45.48369216918945, | |
| "learning_rate": 1.83354751471889e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.2169952392578, | |
| "logps/rejected": -264.9205322265625, | |
| "loss": 0.4503, | |
| "rewards/accuracies": 0.6549999713897705, | |
| "rewards/chosen": -0.49645543098449707, | |
| "rewards/margins": 2.04986572265625, | |
| "rewards/rejected": -2.546321392059326, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8372404554588078, | |
| "grad_norm": 51.16058349609375, | |
| "learning_rate": 1.8113924393290904e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.03074645996094, | |
| "logps/rejected": -249.8163604736328, | |
| "loss": 0.4319, | |
| "rewards/accuracies": 0.6612499952316284, | |
| "rewards/chosen": -0.6471911072731018, | |
| "rewards/margins": 2.1099319458007812, | |
| "rewards/rejected": -2.7571229934692383, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.87073007367716, | |
| "grad_norm": 64.02259063720703, | |
| "learning_rate": 1.7880054222351658e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.18972778320312, | |
| "logps/rejected": -237.3641815185547, | |
| "loss": 0.4155, | |
| "rewards/accuracies": 0.6725000143051147, | |
| "rewards/chosen": -0.38780125975608826, | |
| "rewards/margins": 1.9852185249328613, | |
| "rewards/rejected": -2.3730199337005615, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9042196918955124, | |
| "grad_norm": 35.12641525268555, | |
| "learning_rate": 1.763421972076705e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.52285766601562, | |
| "logps/rejected": -247.11244201660156, | |
| "loss": 0.4359, | |
| "rewards/accuracies": 0.6512500047683716, | |
| "rewards/chosen": -0.493091344833374, | |
| "rewards/margins": 1.8931076526641846, | |
| "rewards/rejected": -2.3861987590789795, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9377093101138647, | |
| "grad_norm": 64.41110229492188, | |
| "learning_rate": 1.7376794140443474e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.29629516601562, | |
| "logps/rejected": -234.5249481201172, | |
| "loss": 0.4512, | |
| "rewards/accuracies": 0.6549999713897705, | |
| "rewards/chosen": -0.4724200367927551, | |
| "rewards/margins": 1.9340243339538574, | |
| "rewards/rejected": -2.4064440727233887, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.971198928332217, | |
| "grad_norm": 26.93653106689453, | |
| "learning_rate": 1.7108168332087366e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.42259216308594, | |
| "logps/rejected": -243.82032775878906, | |
| "loss": 0.4343, | |
| "rewards/accuracies": 0.6512500047683716, | |
| "rewards/chosen": -0.3961036205291748, | |
| "rewards/margins": 1.8803616762161255, | |
| "rewards/rejected": -2.27646541595459, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0046885465505693, | |
| "grad_norm": 74.74053955078125, | |
| "learning_rate": 1.682875015177438e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -174.56732177734375, | |
| "logps/rejected": -246.36451721191406, | |
| "loss": 0.3957, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": -0.34164169430732727, | |
| "rewards/margins": 2.248396635055542, | |
| "rewards/rejected": -2.590038537979126, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0381781647689217, | |
| "grad_norm": 58.65504455566406, | |
| "learning_rate": 1.6538963841699207e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.5469207763672, | |
| "logps/rejected": -258.92706298828125, | |
| "loss": 0.2861, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -0.2739707827568054, | |
| "rewards/margins": 3.0113985538482666, | |
| "rewards/rejected": -3.2853691577911377, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0716677829872738, | |
| "grad_norm": 59.74324417114258, | |
| "learning_rate": 1.6239249386046274e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.00692749023438, | |
| "logps/rejected": -255.23556518554688, | |
| "loss": 0.2914, | |
| "rewards/accuracies": 0.7549999952316284, | |
| "rewards/chosen": -0.4652925729751587, | |
| "rewards/margins": 3.098710298538208, | |
| "rewards/rejected": -3.564002752304077, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1051574012056262, | |
| "grad_norm": 37.80025863647461, | |
| "learning_rate": 1.593006184295927e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -185.12716674804688, | |
| "logps/rejected": -254.19509887695312, | |
| "loss": 0.2798, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -0.28863173723220825, | |
| "rewards/margins": 3.227825880050659, | |
| "rewards/rejected": -3.516458034515381, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1386470194239786, | |
| "grad_norm": 40.97309875488281, | |
| "learning_rate": 1.5611870653623825e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.1793975830078, | |
| "logps/rejected": -245.0845184326172, | |
| "loss": 0.2778, | |
| "rewards/accuracies": 0.7450000047683716, | |
| "rewards/chosen": -0.3949226438999176, | |
| "rewards/margins": 3.3151471614837646, | |
| "rewards/rejected": -3.7100696563720703, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.1721366376423308, | |
| "grad_norm": 61.272247314453125, | |
| "learning_rate": 1.5285158929512291e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -174.18487548828125, | |
| "logps/rejected": -247.96957397460938, | |
| "loss": 0.3048, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.4471362233161926, | |
| "rewards/margins": 3.481740951538086, | |
| "rewards/rejected": -3.928877830505371, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2056262558606832, | |
| "grad_norm": 20.384906768798828, | |
| "learning_rate": 1.4950422718872916e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.91143798828125, | |
| "logps/rejected": -264.8081970214844, | |
| "loss": 0.2738, | |
| "rewards/accuracies": 0.7574999928474426, | |
| "rewards/chosen": -0.4734611213207245, | |
| "rewards/margins": 3.4893076419830322, | |
| "rewards/rejected": -3.962768793106079, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2391158740790356, | |
| "grad_norm": 46.84432601928711, | |
| "learning_rate": 1.4608170253576945e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -171.02236938476562, | |
| "logps/rejected": -259.7280578613281, | |
| "loss": 0.2928, | |
| "rewards/accuracies": 0.7262499928474426, | |
| "rewards/chosen": -0.6498711109161377, | |
| "rewards/margins": 3.556124210357666, | |
| "rewards/rejected": -4.205995082855225, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2726054922973877, | |
| "grad_norm": 40.36602020263672, | |
| "learning_rate": 1.4258921177467371e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.40257263183594, | |
| "logps/rejected": -251.6402130126953, | |
| "loss": 0.301, | |
| "rewards/accuracies": 0.7325000166893005, | |
| "rewards/chosen": -0.7374945878982544, | |
| "rewards/margins": 3.618178606033325, | |
| "rewards/rejected": -4.355673789978027, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3060951105157401, | |
| "grad_norm": 33.35322952270508, | |
| "learning_rate": 1.3903205757380715e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.98854064941406, | |
| "logps/rejected": -259.6983337402344, | |
| "loss": 0.2985, | |
| "rewards/accuracies": 0.7275000214576721, | |
| "rewards/chosen": -0.7513535022735596, | |
| "rewards/margins": 3.433237314224243, | |
| "rewards/rejected": -4.184591293334961, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3395847287340925, | |
| "grad_norm": 31.858760833740234, | |
| "learning_rate": 1.3541564078039942e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.97511291503906, | |
| "logps/rejected": -267.1122131347656, | |
| "loss": 0.307, | |
| "rewards/accuracies": 0.7174999713897705, | |
| "rewards/chosen": -0.6912581920623779, | |
| "rewards/margins": 3.4836156368255615, | |
| "rewards/rejected": -4.1748738288879395, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3730743469524447, | |
| "grad_norm": 40.272186279296875, | |
| "learning_rate": 1.3174545222040757e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -181.2541046142578, | |
| "logps/rejected": -267.8948974609375, | |
| "loss": 0.2764, | |
| "rewards/accuracies": 0.7612500190734863, | |
| "rewards/chosen": -0.5613307356834412, | |
| "rewards/margins": 3.6199841499328613, | |
| "rewards/rejected": -4.181314468383789, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.406563965170797, | |
| "grad_norm": 20.189088821411133, | |
| "learning_rate": 1.2802706436176447e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.3399658203125, | |
| "logps/rejected": -275.252685546875, | |
| "loss": 0.2673, | |
| "rewards/accuracies": 0.7512500286102295, | |
| "rewards/chosen": -0.49821099638938904, | |
| "rewards/margins": 3.6726813316345215, | |
| "rewards/rejected": -4.170892238616943, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4400535833891492, | |
| "grad_norm": 28.09309196472168, | |
| "learning_rate": 1.2426612285366904e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.54571533203125, | |
| "logps/rejected": -272.14337158203125, | |
| "loss": 0.2833, | |
| "rewards/accuracies": 0.7649999856948853, | |
| "rewards/chosen": -0.5274211168289185, | |
| "rewards/margins": 3.785543203353882, | |
| "rewards/rejected": -4.31296443939209, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4735432016075016, | |
| "grad_norm": 5.396151542663574, | |
| "learning_rate": 1.2046833795476566e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.48960876464844, | |
| "logps/rejected": -268.61944580078125, | |
| "loss": 0.2594, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -0.3929290771484375, | |
| "rewards/margins": 3.8942084312438965, | |
| "rewards/rejected": -4.287137508392334, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.507032819825854, | |
| "grad_norm": 26.636991500854492, | |
| "learning_rate": 1.16639475863226e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.34547424316406, | |
| "logps/rejected": -259.8311462402344, | |
| "loss": 0.3026, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": -0.5500699281692505, | |
| "rewards/margins": 3.565783739089966, | |
| "rewards/rejected": -4.115853786468506, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5405224380442064, | |
| "grad_norm": 14.03653335571289, | |
| "learning_rate": 1.1278534996189831e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.8995361328125, | |
| "logps/rejected": -273.84112548828125, | |
| "loss": 0.2603, | |
| "rewards/accuracies": 0.7487499713897705, | |
| "rewards/chosen": -0.5162584185600281, | |
| "rewards/margins": 4.0679030418396, | |
| "rewards/rejected": -4.584161758422852, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.5740120562625586, | |
| "grad_norm": 67.45540618896484, | |
| "learning_rate": 1.0891181199181518e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.06849670410156, | |
| "logps/rejected": -265.9678649902344, | |
| "loss": 0.272, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -0.5778465867042542, | |
| "rewards/margins": 3.9320404529571533, | |
| "rewards/rejected": -4.509886264801025, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.607501674480911, | |
| "grad_norm": 21.127580642700195, | |
| "learning_rate": 1.0502474316746242e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.6305694580078, | |
| "logps/rejected": -265.5202331542969, | |
| "loss": 0.2839, | |
| "rewards/accuracies": 0.7462499737739563, | |
| "rewards/chosen": -0.5587973594665527, | |
| "rewards/margins": 3.9246935844421387, | |
| "rewards/rejected": -4.48349142074585, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6409912926992631, | |
| "grad_norm": 47.24773025512695, | |
| "learning_rate": 1.0113004524729797e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -196.45948791503906, | |
| "logps/rejected": -272.1256408691406, | |
| "loss": 0.2791, | |
| "rewards/accuracies": 0.7587500214576721, | |
| "rewards/chosen": -0.5817875862121582, | |
| "rewards/margins": 3.766108989715576, | |
| "rewards/rejected": -4.347896099090576, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6744809109176155, | |
| "grad_norm": 20.178668975830078, | |
| "learning_rate": 9.723363157307888e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.7681427001953, | |
| "logps/rejected": -268.5182800292969, | |
| "loss": 0.2744, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -0.5075680017471313, | |
| "rewards/margins": 3.9134867191314697, | |
| "rewards/rejected": -4.421054840087891, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.707970529135968, | |
| "grad_norm": 31.073015213012695, | |
| "learning_rate": 9.334141809160118e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.35658264160156, | |
| "logps/rejected": -265.6587829589844, | |
| "loss": 0.2405, | |
| "rewards/accuracies": 0.7712500095367432, | |
| "rewards/chosen": -0.6600850820541382, | |
| "rewards/margins": 4.134018421173096, | |
| "rewards/rejected": -4.794103622436523, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.7414601473543203, | |
| "grad_norm": 36.3228759765625, | |
| "learning_rate": 8.945931437248468e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.47000122070312, | |
| "logps/rejected": -270.1788635253906, | |
| "loss": 0.2674, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -0.6616349816322327, | |
| "rewards/margins": 4.066000461578369, | |
| "rewards/rejected": -4.727634906768799, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7749497655726725, | |
| "grad_norm": 27.108051300048828, | |
| "learning_rate": 8.559321463564014e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.67808532714844, | |
| "logps/rejected": -261.2061767578125, | |
| "loss": 0.2494, | |
| "rewards/accuracies": 0.7549999952316284, | |
| "rewards/chosen": -0.5604009032249451, | |
| "rewards/margins": 4.31578254699707, | |
| "rewards/rejected": -4.876183032989502, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8084393837910246, | |
| "grad_norm": 54.821876525878906, | |
| "learning_rate": 8.174898880204195e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.19236755371094, | |
| "logps/rejected": -269.1416015625, | |
| "loss": 0.2817, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": -0.5425779223442078, | |
| "rewards/margins": 3.9950203895568848, | |
| "rewards/rejected": -4.537598133087158, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.841929002009377, | |
| "grad_norm": 36.13364791870117, | |
| "learning_rate": 7.793247358139428e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.92677307128906, | |
| "logps/rejected": -266.75799560546875, | |
| "loss": 0.2885, | |
| "rewards/accuracies": 0.7387499809265137, | |
| "rewards/chosen": -0.5648588538169861, | |
| "rewards/margins": 3.864666700363159, | |
| "rewards/rejected": -4.429525852203369, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8754186202277294, | |
| "grad_norm": 24.641510009765625, | |
| "learning_rate": 7.414946361022179e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -171.00909423828125, | |
| "logps/rejected": -273.5279541015625, | |
| "loss": 0.2695, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": -0.4850202798843384, | |
| "rewards/margins": 4.063894271850586, | |
| "rewards/rejected": -4.548914432525635, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9089082384460818, | |
| "grad_norm": 25.44546127319336, | |
| "learning_rate": 7.040570265384029e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.17147827148438, | |
| "logps/rejected": -272.64111328125, | |
| "loss": 0.2881, | |
| "rewards/accuracies": 0.7512500286102295, | |
| "rewards/chosen": -0.5362930297851562, | |
| "rewards/margins": 4.026025295257568, | |
| "rewards/rejected": -4.562318325042725, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.942397856664434, | |
| "grad_norm": 62.34092330932617, | |
| "learning_rate": 6.670687488556586e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.8939208984375, | |
| "logps/rejected": -270.8504943847656, | |
| "loss": 0.2685, | |
| "rewards/accuracies": 0.7337499856948853, | |
| "rewards/chosen": -0.3625078499317169, | |
| "rewards/margins": 4.072076797485352, | |
| "rewards/rejected": -4.434584617614746, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9758874748827864, | |
| "grad_norm": 16.188819885253906, | |
| "learning_rate": 6.305859625640224e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.49630737304688, | |
| "logps/rejected": -280.4139404296875, | |
| "loss": 0.2755, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -0.6155076026916504, | |
| "rewards/margins": 4.242664337158203, | |
| "rewards/rejected": -4.8581719398498535, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0093770931011385, | |
| "grad_norm": 35.435707092285156, | |
| "learning_rate": 5.946640596831101e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -166.32289123535156, | |
| "logps/rejected": -263.216552734375, | |
| "loss": 0.2391, | |
| "rewards/accuracies": 0.7712500095367432, | |
| "rewards/chosen": -0.6572730541229248, | |
| "rewards/margins": 4.339555740356445, | |
| "rewards/rejected": -4.996828556060791, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.042866711319491, | |
| "grad_norm": 42.23343276977539, | |
| "learning_rate": 5.59357580640101e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.9312744140625, | |
| "logps/rejected": -277.5908508300781, | |
| "loss": 0.213, | |
| "rewards/accuracies": 0.7850000262260437, | |
| "rewards/chosen": -0.35315731167793274, | |
| "rewards/margins": 4.545411586761475, | |
| "rewards/rejected": -4.898569107055664, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.0763563295378433, | |
| "grad_norm": 2.853132486343384, | |
| "learning_rate": 5.247201314606984e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.59486389160156, | |
| "logps/rejected": -276.373291015625, | |
| "loss": 0.2047, | |
| "rewards/accuracies": 0.7950000166893005, | |
| "rewards/chosen": -0.3648325800895691, | |
| "rewards/margins": 4.745596885681152, | |
| "rewards/rejected": -5.110429763793945, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.1098459477561957, | |
| "grad_norm": 22.07088851928711, | |
| "learning_rate": 4.90804302378802e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.32708740234375, | |
| "logps/rejected": -260.5697021484375, | |
| "loss": 0.2054, | |
| "rewards/accuracies": 0.7925000190734863, | |
| "rewards/chosen": -0.48022788763046265, | |
| "rewards/margins": 4.517958641052246, | |
| "rewards/rejected": -4.998186111450195, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.1433355659745477, | |
| "grad_norm": 50.728519439697266, | |
| "learning_rate": 4.57661587988459e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.0932159423828, | |
| "logps/rejected": -270.6129150390625, | |
| "loss": 0.236, | |
| "rewards/accuracies": 0.7574999928474426, | |
| "rewards/chosen": -0.4882276654243469, | |
| "rewards/margins": 4.606672286987305, | |
| "rewards/rejected": -5.094900131225586, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.1768251841929, | |
| "grad_norm": 19.410276412963867, | |
| "learning_rate": 4.253423090593318e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -185.2410125732422, | |
| "logps/rejected": -282.7039794921875, | |
| "loss": 0.2242, | |
| "rewards/accuracies": 0.7612500190734863, | |
| "rewards/chosen": -0.5257070064544678, | |
| "rewards/margins": 4.692570209503174, | |
| "rewards/rejected": -5.218277454376221, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.2103148024112524, | |
| "grad_norm": 45.68756103515625, | |
| "learning_rate": 3.938955361343912e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.8925018310547, | |
| "logps/rejected": -284.1990966796875, | |
| "loss": 0.2259, | |
| "rewards/accuracies": 0.7699999809265137, | |
| "rewards/chosen": -0.605311930179596, | |
| "rewards/margins": 4.8395843505859375, | |
| "rewards/rejected": -5.444896221160889, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.243804420629605, | |
| "grad_norm": 51.53227996826172, | |
| "learning_rate": 3.6336901502583364e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.85601806640625, | |
| "logps/rejected": -275.8158874511719, | |
| "loss": 0.2048, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.6794506907463074, | |
| "rewards/margins": 4.734764575958252, | |
| "rewards/rejected": -5.414215087890625, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.2772940388479572, | |
| "grad_norm": 3.569408893585205, | |
| "learning_rate": 3.3380909432234807e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.00836181640625, | |
| "logps/rejected": -280.286376953125, | |
| "loss": 0.1999, | |
| "rewards/accuracies": 0.7950000166893005, | |
| "rewards/chosen": -0.6098263263702393, | |
| "rewards/margins": 4.961060047149658, | |
| "rewards/rejected": -5.570886611938477, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.3107836570663096, | |
| "grad_norm": 27.362163543701172, | |
| "learning_rate": 3.0526065501779184e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -172.97593688964844, | |
| "logps/rejected": -275.5477600097656, | |
| "loss": 0.2184, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.6930285096168518, | |
| "rewards/margins": 4.821885585784912, | |
| "rewards/rejected": -5.514913558959961, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.3442732752846616, | |
| "grad_norm": 28.243000030517578, | |
| "learning_rate": 2.7776704236812454e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.44705200195312, | |
| "logps/rejected": -277.888427734375, | |
| "loss": 0.2128, | |
| "rewards/accuracies": 0.7649999856948853, | |
| "rewards/chosen": -0.6010170578956604, | |
| "rewards/margins": 5.026294708251953, | |
| "rewards/rejected": -5.6273112297058105, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.377762893503014, | |
| "grad_norm": 14.03532886505127, | |
| "learning_rate": 2.5137000008006437e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.77134704589844, | |
| "logps/rejected": -279.57769775390625, | |
| "loss": 0.21, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": -0.7788973450660706, | |
| "rewards/margins": 5.022655010223389, | |
| "rewards/rejected": -5.801552772521973, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.4112525117213663, | |
| "grad_norm": 35.019554138183594, | |
| "learning_rate": 2.261096069313816e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.45738220214844, | |
| "logps/rejected": -281.279541015625, | |
| "loss": 0.1887, | |
| "rewards/accuracies": 0.8075000047683716, | |
| "rewards/chosen": -0.7265406847000122, | |
| "rewards/margins": 5.097284317016602, | |
| "rewards/rejected": -5.823824882507324, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.4447421299397187, | |
| "grad_norm": 25.041046142578125, | |
| "learning_rate": 2.020242159190646e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.86915588378906, | |
| "logps/rejected": -277.746826171875, | |
| "loss": 0.2311, | |
| "rewards/accuracies": 0.7587500214576721, | |
| "rewards/chosen": -0.786669135093689, | |
| "rewards/margins": 4.789151191711426, | |
| "rewards/rejected": -5.575820446014404, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.478231748158071, | |
| "grad_norm": 20.99360466003418, | |
| "learning_rate": 1.7915039602775062e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.3199462890625, | |
| "logps/rejected": -273.0755920410156, | |
| "loss": 0.2429, | |
| "rewards/accuracies": 0.7737500071525574, | |
| "rewards/chosen": -0.8147923946380615, | |
| "rewards/margins": 4.847590446472168, | |
| "rewards/rejected": -5.66238260269165, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.511721366376423, | |
| "grad_norm": 18.44826889038086, | |
| "learning_rate": 1.5752287670682861e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -170.71795654296875, | |
| "logps/rejected": -276.1592102050781, | |
| "loss": 0.2043, | |
| "rewards/accuracies": 0.7862499952316284, | |
| "rewards/chosen": -0.638399064540863, | |
| "rewards/margins": 5.212125301361084, | |
| "rewards/rejected": -5.850524425506592, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.5452109845947755, | |
| "grad_norm": 40.779659271240234, | |
| "learning_rate": 1.3717449514052314e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.7264404296875, | |
| "logps/rejected": -284.6885986328125, | |
| "loss": 0.2033, | |
| "rewards/accuracies": 0.7962499856948853, | |
| "rewards/chosen": -0.882935106754303, | |
| "rewards/margins": 5.128498554229736, | |
| "rewards/rejected": -6.0114336013793945, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.578700602813128, | |
| "grad_norm": 44.556678771972656, | |
| "learning_rate": 1.1813614639101088e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.99533081054688, | |
| "logps/rejected": -275.25518798828125, | |
| "loss": 0.2274, | |
| "rewards/accuracies": 0.7774999737739563, | |
| "rewards/chosen": -0.703125, | |
| "rewards/margins": 5.014428615570068, | |
| "rewards/rejected": -5.717553615570068, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.6121902210314802, | |
| "grad_norm": 61.39085388183594, | |
| "learning_rate": 1.0043673649027517e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.3540802001953, | |
| "logps/rejected": -282.1649475097656, | |
| "loss": 0.2097, | |
| "rewards/accuracies": 0.7662500143051147, | |
| "rewards/chosen": -0.683403730392456, | |
| "rewards/margins": 5.063638687133789, | |
| "rewards/rejected": -5.747043609619141, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.6456798392498326, | |
| "grad_norm": 58.0173454284668, | |
| "learning_rate": 8.410313855191464e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.94400024414062, | |
| "logps/rejected": -286.5594177246094, | |
| "loss": 0.2042, | |
| "rewards/accuracies": 0.7862499952316284, | |
| "rewards/chosen": -0.8088821172714233, | |
| "rewards/margins": 5.067000865936279, | |
| "rewards/rejected": -5.875882625579834, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.679169457468185, | |
| "grad_norm": 16.31562042236328, | |
| "learning_rate": 6.916015196954383e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -185.46673583984375, | |
| "logps/rejected": -288.2527770996094, | |
| "loss": 0.217, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.7252050638198853, | |
| "rewards/margins": 5.204960823059082, | |
| "rewards/rejected": -5.930166244506836, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7126590756865374, | |
| "grad_norm": 20.799222946166992, | |
| "learning_rate": 5.5630464763733787e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.50820922851562, | |
| "logps/rejected": -288.9837646484375, | |
| "loss": 0.2258, | |
| "rewards/accuracies": 0.7724999785423279, | |
| "rewards/chosen": -0.7981621026992798, | |
| "rewards/margins": 5.062735557556152, | |
| "rewards/rejected": -5.860898017883301, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.7461486939048894, | |
| "grad_norm": 18.682947158813477, | |
| "learning_rate": 4.353461913466405e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.44317626953125, | |
| "logps/rejected": -266.35333251953125, | |
| "loss": 0.2426, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -0.6803594827651978, | |
| "rewards/margins": 4.8590497970581055, | |
| "rewards/rejected": -5.539409160614014, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.7796383121232418, | |
| "grad_norm": 54.06953048706055, | |
| "learning_rate": 3.2890980272783255e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.65658569335938, | |
| "logps/rejected": -280.3162536621094, | |
| "loss": 0.2086, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.812857449054718, | |
| "rewards/margins": 5.271449565887451, | |
| "rewards/rejected": -6.0843071937561035, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.813127930341594, | |
| "grad_norm": 12.436116218566895, | |
| "learning_rate": 2.371570847483839e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.7625732421875, | |
| "logps/rejected": -277.9272766113281, | |
| "loss": 0.2046, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": -0.6954517364501953, | |
| "rewards/margins": 5.145771026611328, | |
| "rewards/rejected": -5.841222763061523, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.8466175485599465, | |
| "grad_norm": 66.9225845336914, | |
| "learning_rate": 1.6022734607604393e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.79019165039062, | |
| "logps/rejected": -282.13323974609375, | |
| "loss": 0.2096, | |
| "rewards/accuracies": 0.7925000190734863, | |
| "rewards/chosen": -0.8357629179954529, | |
| "rewards/margins": 5.103863716125488, | |
| "rewards/rejected": -5.939626693725586, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.8801071667782985, | |
| "grad_norm": 15.983145713806152, | |
| "learning_rate": 9.823738956571182e-09, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.03807067871094, | |
| "logps/rejected": -292.3773193359375, | |
| "loss": 0.206, | |
| "rewards/accuracies": 0.7912499904632568, | |
| "rewards/chosen": -0.6932557821273804, | |
| "rewards/margins": 5.146268367767334, | |
| "rewards/rejected": -5.839523792266846, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.913596784996651, | |
| "grad_norm": 33.383487701416016, | |
| "learning_rate": 5.128133491700715e-09, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.7404327392578, | |
| "logps/rejected": -289.3056945800781, | |
| "loss": 0.1936, | |
| "rewards/accuracies": 0.7975000143051147, | |
| "rewards/chosen": -0.7487243413925171, | |
| "rewards/margins": 5.300227642059326, | |
| "rewards/rejected": -6.048952579498291, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.9470864032150033, | |
| "grad_norm": 3.542743682861328, | |
| "learning_rate": 1.9430475771796684e-09, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.31752014160156, | |
| "logps/rejected": -268.015380859375, | |
| "loss": 0.2124, | |
| "rewards/accuracies": 0.7862499952316284, | |
| "rewards/chosen": -0.6255255937576294, | |
| "rewards/margins": 4.9648332595825195, | |
| "rewards/rejected": -5.590358257293701, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.9805760214333556, | |
| "grad_norm": 19.205642700195312, | |
| "learning_rate": 2.733171468656259e-10, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.03684997558594, | |
| "logps/rejected": -277.01495361328125, | |
| "loss": 0.2059, | |
| "rewards/accuracies": 0.7837499976158142, | |
| "rewards/chosen": -0.7730162739753723, | |
| "rewards/margins": 5.190572738647461, | |
| "rewards/rejected": -5.963588714599609, | |
| "step": 4450 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 4479, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |