| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 100, |
| "global_step": 5811, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.591065292096219e-10, |
| "logits/chosen": -2.5129990577697754, |
| "logits/rejected": -2.4275057315826416, |
| "logps/chosen": -96.6673583984375, |
| "logps/rejected": -105.15755462646484, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.59106529209622e-09, |
| "logits/chosen": -2.988718271255493, |
| "logits/rejected": -2.9780874252319336, |
| "logps/chosen": -302.4128723144531, |
| "logps/rejected": -225.56951904296875, |
| "loss": 0.6947, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.007120599504560232, |
| "rewards/margins": -0.004252635408192873, |
| "rewards/rejected": -0.002867964096367359, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.718213058419244e-08, |
| "logits/chosen": -2.8921194076538086, |
| "logits/rejected": -2.7121551036834717, |
| "logps/chosen": -287.7423400878906, |
| "logps/rejected": -217.6292724609375, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.0037767409812659025, |
| "rewards/margins": 0.010759315453469753, |
| "rewards/rejected": -0.006982574705034494, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 2.5773195876288656e-08, |
| "logits/chosen": -3.015655994415283, |
| "logits/rejected": -2.9962334632873535, |
| "logps/chosen": -297.9928283691406, |
| "logps/rejected": -203.88180541992188, |
| "loss": 0.6891, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.025857295840978622, |
| "rewards/margins": 0.025261688977479935, |
| "rewards/rejected": 0.0005956076784059405, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.436426116838488e-08, |
| "logits/chosen": -2.8478853702545166, |
| "logits/rejected": -2.9214625358581543, |
| "logps/chosen": -267.7845153808594, |
| "logps/rejected": -250.1910400390625, |
| "loss": 0.6861, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.027253543958067894, |
| "rewards/margins": 0.02428315207362175, |
| "rewards/rejected": 0.002970390487462282, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 4.29553264604811e-08, |
| "logits/chosen": -3.0094895362854004, |
| "logits/rejected": -2.9605789184570312, |
| "logps/chosen": -322.73681640625, |
| "logps/rejected": -245.77450561523438, |
| "loss": 0.6839, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.022152891382575035, |
| "rewards/margins": 0.04998321458697319, |
| "rewards/rejected": -0.027830326929688454, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 5.154639175257731e-08, |
| "logits/chosen": -2.9751992225646973, |
| "logits/rejected": -2.9595389366149902, |
| "logps/chosen": -308.54351806640625, |
| "logps/rejected": -224.53707885742188, |
| "loss": 0.6688, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.04110954329371452, |
| "rewards/margins": 0.11852701753377914, |
| "rewards/rejected": -0.07741747796535492, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 6.013745704467354e-08, |
| "logits/chosen": -2.9823076725006104, |
| "logits/rejected": -3.0206565856933594, |
| "logps/chosen": -375.16925048828125, |
| "logps/rejected": -224.032958984375, |
| "loss": 0.6642, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.07654228806495667, |
| "rewards/margins": 0.1424637734889984, |
| "rewards/rejected": -0.06592147052288055, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 6.872852233676976e-08, |
| "logits/chosen": -3.0697617530822754, |
| "logits/rejected": -3.036527156829834, |
| "logps/chosen": -353.6755676269531, |
| "logps/rejected": -216.9717559814453, |
| "loss": 0.6418, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.04413590952754021, |
| "rewards/margins": 0.10847017914056778, |
| "rewards/rejected": -0.06433425843715668, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 7.731958762886598e-08, |
| "logits/chosen": -2.9876997470855713, |
| "logits/rejected": -2.9616377353668213, |
| "logps/chosen": -291.57012939453125, |
| "logps/rejected": -193.0994873046875, |
| "loss": 0.6367, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.08899353444576263, |
| "rewards/margins": 0.24147820472717285, |
| "rewards/rejected": -0.1524846851825714, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 8.59106529209622e-08, |
| "logits/chosen": -2.8401777744293213, |
| "logits/rejected": -2.7715401649475098, |
| "logps/chosen": -261.7100524902344, |
| "logps/rejected": -255.4248046875, |
| "loss": 0.6284, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.03916650265455246, |
| "rewards/margins": 0.1420799195766449, |
| "rewards/rejected": -0.10291342437267303, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_logits/chosen": -2.823406934738159, |
| "eval_logits/rejected": -2.797581672668457, |
| "eval_logps/chosen": -253.8098907470703, |
| "eval_logps/rejected": -258.8415832519531, |
| "eval_loss": 0.6098471879959106, |
| "eval_rewards/accuracies": 0.734375, |
| "eval_rewards/chosen": 0.04252301901578903, |
| "eval_rewards/margins": 0.22968964278697968, |
| "eval_rewards/rejected": -0.18716664612293243, |
| "eval_runtime": 58.4622, |
| "eval_samples_per_second": 17.105, |
| "eval_steps_per_second": 0.274, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 9.450171821305841e-08, |
| "logits/chosen": -3.0175564289093018, |
| "logits/rejected": -3.084195137023926, |
| "logps/chosen": -344.5015869140625, |
| "logps/rejected": -294.0466613769531, |
| "loss": 0.6208, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.07540851831436157, |
| "rewards/margins": 0.3283361792564392, |
| "rewards/rejected": -0.25292766094207764, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.0309278350515462e-07, |
| "logits/chosen": -2.831212043762207, |
| "logits/rejected": -2.7832601070404053, |
| "logps/chosen": -184.1349639892578, |
| "logps/rejected": -206.84634399414062, |
| "loss": 0.587, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.06816364824771881, |
| "rewards/margins": 0.0881614089012146, |
| "rewards/rejected": -0.1563250720500946, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.1168384879725086e-07, |
| "logits/chosen": -3.0061099529266357, |
| "logits/rejected": -2.8498525619506836, |
| "logps/chosen": -333.06072998046875, |
| "logps/rejected": -189.4818115234375, |
| "loss": 0.5832, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.19702570140361786, |
| "rewards/margins": 0.5247530341148376, |
| "rewards/rejected": -0.3277273178100586, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.202749140893471e-07, |
| "logits/chosen": -2.8609023094177246, |
| "logits/rejected": -2.77339243888855, |
| "logps/chosen": -297.0363464355469, |
| "logps/rejected": -242.37255859375, |
| "loss": 0.5649, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.016815107315778732, |
| "rewards/margins": 0.422064870595932, |
| "rewards/rejected": -0.40524977445602417, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.2886597938144328e-07, |
| "logits/chosen": -3.088327407836914, |
| "logits/rejected": -2.9465346336364746, |
| "logps/chosen": -305.6724548339844, |
| "logps/rejected": -314.7848205566406, |
| "loss": 0.5548, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.12263361364603043, |
| "rewards/margins": 0.6976320147514343, |
| "rewards/rejected": -0.5749984979629517, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.3745704467353952e-07, |
| "logits/chosen": -2.883831024169922, |
| "logits/rejected": -2.8376777172088623, |
| "logps/chosen": -267.89154052734375, |
| "logps/rejected": -199.8636474609375, |
| "loss": 0.5362, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.17647962272167206, |
| "rewards/margins": 0.5636450052261353, |
| "rewards/rejected": -0.387165367603302, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.4604810996563573e-07, |
| "logits/chosen": -2.823948383331299, |
| "logits/rejected": -2.7283661365509033, |
| "logps/chosen": -234.5882568359375, |
| "logps/rejected": -194.86480712890625, |
| "loss": 0.4987, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.09966392815113068, |
| "rewards/margins": 0.7896274328231812, |
| "rewards/rejected": -0.6899635791778564, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 1.5463917525773197e-07, |
| "logits/chosen": -2.9629111289978027, |
| "logits/rejected": -2.9428882598876953, |
| "logps/chosen": -232.97244262695312, |
| "logps/rejected": -183.2829132080078, |
| "loss": 0.5185, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.2645714282989502, |
| "rewards/margins": 0.8501029014587402, |
| "rewards/rejected": -0.5855314135551453, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.6323024054982818e-07, |
| "logits/chosen": -2.9642796516418457, |
| "logits/rejected": -2.97268009185791, |
| "logps/chosen": -275.6226501464844, |
| "logps/rejected": -233.35537719726562, |
| "loss": 0.5748, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.1616288721561432, |
| "rewards/margins": 0.4936322569847107, |
| "rewards/rejected": -0.3320034146308899, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 1.718213058419244e-07, |
| "logits/chosen": -2.9080729484558105, |
| "logits/rejected": -2.9043314456939697, |
| "logps/chosen": -282.22369384765625, |
| "logps/rejected": -235.44992065429688, |
| "loss": 0.4908, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.13387183845043182, |
| "rewards/margins": 0.7095439434051514, |
| "rewards/rejected": -0.5756720900535583, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.1, |
| "eval_logits/chosen": -2.7959609031677246, |
| "eval_logits/rejected": -2.7718665599823, |
| "eval_logps/chosen": -254.51446533203125, |
| "eval_logps/rejected": -263.8123779296875, |
| "eval_loss": 0.5425560474395752, |
| "eval_rewards/accuracies": 0.75, |
| "eval_rewards/chosen": -0.027933437377214432, |
| "eval_rewards/margins": 0.6563125252723694, |
| "eval_rewards/rejected": -0.6842460036277771, |
| "eval_runtime": 58.0136, |
| "eval_samples_per_second": 17.237, |
| "eval_steps_per_second": 0.276, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.804123711340206e-07, |
| "logits/chosen": -2.664795160293579, |
| "logits/rejected": -2.427393674850464, |
| "logps/chosen": -297.56488037109375, |
| "logps/rejected": -226.8320770263672, |
| "loss": 0.5682, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.22673270106315613, |
| "rewards/margins": 0.919518768787384, |
| "rewards/rejected": -1.1462514400482178, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 1.8900343642611682e-07, |
| "logits/chosen": -2.8164966106414795, |
| "logits/rejected": -2.7533140182495117, |
| "logps/chosen": -316.3358459472656, |
| "logps/rejected": -248.8792724609375, |
| "loss": 0.5418, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.2400936633348465, |
| "rewards/margins": 0.5202454924583435, |
| "rewards/rejected": -0.7603391408920288, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 1.9759450171821303e-07, |
| "logits/chosen": -2.8455495834350586, |
| "logits/rejected": -2.815950870513916, |
| "logps/chosen": -291.536376953125, |
| "logps/rejected": -252.3511199951172, |
| "loss": 0.516, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.05435393005609512, |
| "rewards/margins": 0.677357017993927, |
| "rewards/rejected": -0.7317109107971191, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.0618556701030925e-07, |
| "logits/chosen": -2.884962558746338, |
| "logits/rejected": -2.9899585247039795, |
| "logps/chosen": -362.83612060546875, |
| "logps/rejected": -246.82815551757812, |
| "loss": 0.5416, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4428789019584656, |
| "rewards/margins": 0.2312956303358078, |
| "rewards/rejected": -0.6741746068000793, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 2.1477663230240549e-07, |
| "logits/chosen": -2.979492425918579, |
| "logits/rejected": -2.9899439811706543, |
| "logps/chosen": -232.15756225585938, |
| "logps/rejected": -157.3478240966797, |
| "loss": 0.5141, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.26762503385543823, |
| "rewards/margins": 1.1515061855316162, |
| "rewards/rejected": -0.8838812112808228, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 2.2336769759450173e-07, |
| "logits/chosen": -3.0052077770233154, |
| "logits/rejected": -2.9878716468811035, |
| "logps/chosen": -309.3619689941406, |
| "logps/rejected": -189.45968627929688, |
| "loss": 0.5321, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.4304015040397644, |
| "rewards/margins": 1.0413486957550049, |
| "rewards/rejected": -0.6109471917152405, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 2.3195876288659794e-07, |
| "logits/chosen": -2.8794291019439697, |
| "logits/rejected": -2.831512928009033, |
| "logps/chosen": -306.6054992675781, |
| "logps/rejected": -237.39382934570312, |
| "loss": 0.4909, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.18433420360088348, |
| "rewards/margins": 0.8596351742744446, |
| "rewards/rejected": -0.6753008365631104, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 2.405498281786942e-07, |
| "logits/chosen": -2.9824016094207764, |
| "logits/rejected": -2.9367408752441406, |
| "logps/chosen": -350.133056640625, |
| "logps/rejected": -254.4954071044922, |
| "loss": 0.5336, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.20658831298351288, |
| "rewards/margins": 0.7473801374435425, |
| "rewards/rejected": -0.5407918691635132, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 2.4914089347079036e-07, |
| "logits/chosen": -2.7565178871154785, |
| "logits/rejected": -2.944960832595825, |
| "logps/chosen": -242.48397827148438, |
| "logps/rejected": -227.69107055664062, |
| "loss": 0.5056, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.699475109577179, |
| "rewards/margins": 1.3677313327789307, |
| "rewards/rejected": -0.6682561635971069, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 2.5773195876288655e-07, |
| "logits/chosen": -2.779125928878784, |
| "logits/rejected": -2.943162679672241, |
| "logps/chosen": -411.8221130371094, |
| "logps/rejected": -222.3397216796875, |
| "loss": 0.5264, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.34889036417007446, |
| "rewards/margins": 1.2339386940002441, |
| "rewards/rejected": -0.8850483894348145, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.15, |
| "eval_logits/chosen": -2.812185525894165, |
| "eval_logits/rejected": -2.789177656173706, |
| "eval_logps/chosen": -253.82086181640625, |
| "eval_logps/rejected": -266.7626953125, |
| "eval_loss": 0.5323615670204163, |
| "eval_rewards/accuracies": 0.765625, |
| "eval_rewards/chosen": 0.04142449051141739, |
| "eval_rewards/margins": 1.0207018852233887, |
| "eval_rewards/rejected": -0.9792775511741638, |
| "eval_runtime": 59.6543, |
| "eval_samples_per_second": 16.763, |
| "eval_steps_per_second": 0.268, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 2.663230240549828e-07, |
| "logits/chosen": -2.9955785274505615, |
| "logits/rejected": -2.9795451164245605, |
| "logps/chosen": -318.2289123535156, |
| "logps/rejected": -216.7342071533203, |
| "loss": 0.5172, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.10595469176769257, |
| "rewards/margins": 1.0604875087738037, |
| "rewards/rejected": -0.9545329213142395, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.7491408934707903e-07, |
| "logits/chosen": -2.9341654777526855, |
| "logits/rejected": -2.9966204166412354, |
| "logps/chosen": -379.42572021484375, |
| "logps/rejected": -282.17291259765625, |
| "loss": 0.4682, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.08291205763816833, |
| "rewards/margins": 1.1605613231658936, |
| "rewards/rejected": -1.0776493549346924, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 2.835051546391752e-07, |
| "logits/chosen": -2.8474197387695312, |
| "logits/rejected": -2.844364643096924, |
| "logps/chosen": -331.90802001953125, |
| "logps/rejected": -223.3518524169922, |
| "loss": 0.4881, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02964567206799984, |
| "rewards/margins": 1.4632409811019897, |
| "rewards/rejected": -1.4928867816925049, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 2.9209621993127146e-07, |
| "logits/chosen": -2.68745756149292, |
| "logits/rejected": -2.817155361175537, |
| "logps/chosen": -232.6031036376953, |
| "logps/rejected": -246.84768676757812, |
| "loss": 0.5196, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.5186244249343872, |
| "rewards/margins": 1.0158860683441162, |
| "rewards/rejected": -0.49726182222366333, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 3.006872852233677e-07, |
| "logits/chosen": -3.0304269790649414, |
| "logits/rejected": -2.9698691368103027, |
| "logps/chosen": -159.0189208984375, |
| "logps/rejected": -212.7183380126953, |
| "loss": 0.4873, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.310208797454834, |
| "rewards/margins": 0.5256294012069702, |
| "rewards/rejected": -0.8358383178710938, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 3.0927835051546394e-07, |
| "logits/chosen": -3.0391955375671387, |
| "logits/rejected": -3.0694854259490967, |
| "logps/chosen": -381.39715576171875, |
| "logps/rejected": -347.92559814453125, |
| "loss": 0.4607, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.43349432945251465, |
| "rewards/margins": 1.4752476215362549, |
| "rewards/rejected": -1.0417532920837402, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 3.178694158075601e-07, |
| "logits/chosen": -2.985565662384033, |
| "logits/rejected": -2.951699733734131, |
| "logps/chosen": -158.28598022460938, |
| "logps/rejected": -127.53106689453125, |
| "loss": 0.4825, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.025753701105713844, |
| "rewards/margins": 1.3051038980484009, |
| "rewards/rejected": -1.330857515335083, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 3.2646048109965636e-07, |
| "logits/chosen": -2.8883204460144043, |
| "logits/rejected": -2.7797765731811523, |
| "logps/chosen": -300.84283447265625, |
| "logps/rejected": -306.0265197753906, |
| "loss": 0.5606, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": 0.03248428553342819, |
| "rewards/margins": 0.8653362393379211, |
| "rewards/rejected": -0.8328520655632019, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 3.3505154639175255e-07, |
| "logits/chosen": -2.8869693279266357, |
| "logits/rejected": -2.8558154106140137, |
| "logps/chosen": -295.94268798828125, |
| "logps/rejected": -245.67544555664062, |
| "loss": 0.4789, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.0472743920981884, |
| "rewards/margins": 1.3127429485321045, |
| "rewards/rejected": -1.2654683589935303, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 3.436426116838488e-07, |
| "logits/chosen": -3.0791659355163574, |
| "logits/rejected": -3.0203123092651367, |
| "logps/chosen": -251.6421661376953, |
| "logps/rejected": -219.4331512451172, |
| "loss": 0.5536, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.3093084990978241, |
| "rewards/margins": 0.4000861644744873, |
| "rewards/rejected": -0.7093946933746338, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.21, |
| "eval_logits/chosen": -2.8764305114746094, |
| "eval_logits/rejected": -2.8541693687438965, |
| "eval_logps/chosen": -254.42034912109375, |
| "eval_logps/rejected": -272.24603271484375, |
| "eval_loss": 0.4957379102706909, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -0.01852385140955448, |
| "eval_rewards/margins": 1.5090851783752441, |
| "eval_rewards/rejected": -1.5276089906692505, |
| "eval_runtime": 56.3835, |
| "eval_samples_per_second": 17.736, |
| "eval_steps_per_second": 0.284, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 3.5223367697594503e-07, |
| "logits/chosen": -2.8821568489074707, |
| "logits/rejected": -2.8333544731140137, |
| "logps/chosen": -320.3736267089844, |
| "logps/rejected": -205.11056518554688, |
| "loss": 0.4277, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.0024402737617492676, |
| "rewards/margins": 1.244533658027649, |
| "rewards/rejected": -1.2420933246612549, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 3.608247422680412e-07, |
| "logits/chosen": -2.9139723777770996, |
| "logits/rejected": -2.8578293323516846, |
| "logps/chosen": -301.2723693847656, |
| "logps/rejected": -248.91744995117188, |
| "loss": 0.5208, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.27292880415916443, |
| "rewards/margins": 0.9970871210098267, |
| "rewards/rejected": -1.2700159549713135, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 3.6941580756013745e-07, |
| "logits/chosen": -2.9251325130462646, |
| "logits/rejected": -2.8964738845825195, |
| "logps/chosen": -210.9687042236328, |
| "logps/rejected": -185.3360137939453, |
| "loss": 0.5229, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.43344053626060486, |
| "rewards/margins": 0.8561422228813171, |
| "rewards/rejected": -1.2895828485488892, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 3.7800687285223364e-07, |
| "logits/chosen": -2.901094436645508, |
| "logits/rejected": -2.8542165756225586, |
| "logps/chosen": -348.6666259765625, |
| "logps/rejected": -329.27294921875, |
| "loss": 0.6022, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 0.014251199550926685, |
| "rewards/margins": 0.753572940826416, |
| "rewards/rejected": -0.7393215298652649, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 3.865979381443299e-07, |
| "logits/chosen": -2.936382532119751, |
| "logits/rejected": -2.9940216541290283, |
| "logps/chosen": -308.2112731933594, |
| "logps/rejected": -232.1812744140625, |
| "loss": 0.504, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.3824332356452942, |
| "rewards/margins": 0.28821295499801636, |
| "rewards/rejected": -0.6706462502479553, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 3.9518900343642607e-07, |
| "logits/chosen": -2.951936960220337, |
| "logits/rejected": -3.0050208568573, |
| "logps/chosen": -326.07659912109375, |
| "logps/rejected": -301.6195983886719, |
| "loss": 0.5801, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.1851659119129181, |
| "rewards/margins": 1.5346710681915283, |
| "rewards/rejected": -1.349505066871643, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 4.037800687285223e-07, |
| "logits/chosen": -2.8154656887054443, |
| "logits/rejected": -2.8765406608581543, |
| "logps/chosen": -320.0531311035156, |
| "logps/rejected": -226.99124145507812, |
| "loss": 0.4564, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.22784185409545898, |
| "rewards/margins": 0.7857998013496399, |
| "rewards/rejected": -1.013641595840454, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.123711340206185e-07, |
| "logits/chosen": -3.0402634143829346, |
| "logits/rejected": -3.021247625350952, |
| "logps/chosen": -284.4671936035156, |
| "logps/rejected": -287.5126647949219, |
| "loss": 0.4915, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.03487253934144974, |
| "rewards/margins": 0.45380640029907227, |
| "rewards/rejected": -0.4886789321899414, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 4.209621993127148e-07, |
| "logits/chosen": -2.9354074001312256, |
| "logits/rejected": -2.920379638671875, |
| "logps/chosen": -311.0786437988281, |
| "logps/rejected": -246.13339233398438, |
| "loss": 0.4588, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.48857221007347107, |
| "rewards/margins": 0.9489312171936035, |
| "rewards/rejected": -1.4375033378601074, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.2955326460481097e-07, |
| "logits/chosen": -3.123109817504883, |
| "logits/rejected": -3.0762407779693604, |
| "logps/chosen": -309.3453063964844, |
| "logps/rejected": -281.5166015625, |
| "loss": 0.5362, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.25006183981895447, |
| "rewards/margins": 1.4525038003921509, |
| "rewards/rejected": -1.202441930770874, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.26, |
| "eval_logits/chosen": -2.8957557678222656, |
| "eval_logits/rejected": -2.87016224861145, |
| "eval_logps/chosen": -256.86529541015625, |
| "eval_logps/rejected": -272.88690185546875, |
| "eval_loss": 0.503109335899353, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -0.2630198001861572, |
| "eval_rewards/margins": 1.3286765813827515, |
| "eval_rewards/rejected": -1.5916962623596191, |
| "eval_runtime": 55.3853, |
| "eval_samples_per_second": 18.055, |
| "eval_steps_per_second": 0.289, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 4.381443298969072e-07, |
| "logits/chosen": -2.6036550998687744, |
| "logits/rejected": -2.6383635997772217, |
| "logps/chosen": -252.81375122070312, |
| "logps/rejected": -243.0044708251953, |
| "loss": 0.5633, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.6361426115036011, |
| "rewards/margins": 0.3173540532588959, |
| "rewards/rejected": -0.9534965753555298, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.4673539518900345e-07, |
| "logits/chosen": -2.96708607673645, |
| "logits/rejected": -3.0084481239318848, |
| "logps/chosen": -186.54592895507812, |
| "logps/rejected": -187.34884643554688, |
| "loss": 0.5443, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.1140596866607666, |
| "rewards/margins": 1.242305874824524, |
| "rewards/rejected": -1.356365442276001, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 4.5532646048109964e-07, |
| "logits/chosen": -2.987997531890869, |
| "logits/rejected": -2.9607906341552734, |
| "logps/chosen": -292.63690185546875, |
| "logps/rejected": -240.1947479248047, |
| "loss": 0.5091, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.1345333755016327, |
| "rewards/margins": 1.3595573902130127, |
| "rewards/rejected": -1.2250239849090576, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.639175257731959e-07, |
| "logits/chosen": -2.8647749423980713, |
| "logits/rejected": -2.868330955505371, |
| "logps/chosen": -186.86167907714844, |
| "logps/rejected": -243.17910766601562, |
| "loss": 0.6201, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.7495313882827759, |
| "rewards/margins": 0.6188509464263916, |
| "rewards/rejected": -1.3683823347091675, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 4.7250859106529206e-07, |
| "logits/chosen": -2.9979634284973145, |
| "logits/rejected": -2.9638993740081787, |
| "logps/chosen": -349.7961730957031, |
| "logps/rejected": -288.20062255859375, |
| "loss": 0.6153, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.04092409461736679, |
| "rewards/margins": 1.4131947755813599, |
| "rewards/rejected": -1.4541189670562744, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.810996563573884e-07, |
| "logits/chosen": -3.0216901302337646, |
| "logits/rejected": -3.0451061725616455, |
| "logps/chosen": -326.0102844238281, |
| "logps/rejected": -307.83367919921875, |
| "loss": 0.539, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.6230143904685974, |
| "rewards/margins": 0.9213398098945618, |
| "rewards/rejected": -1.5443540811538696, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 4.896907216494845e-07, |
| "logits/chosen": -2.990562677383423, |
| "logits/rejected": -2.9301705360412598, |
| "logps/chosen": -336.96826171875, |
| "logps/rejected": -225.23599243164062, |
| "loss": 0.5141, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8188888430595398, |
| "rewards/margins": 1.2901289463043213, |
| "rewards/rejected": -2.109017848968506, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.982817869415807e-07, |
| "logits/chosen": -2.900038003921509, |
| "logits/rejected": -2.9860446453094482, |
| "logps/chosen": -322.78240966796875, |
| "logps/rejected": -231.99667358398438, |
| "loss": 0.5668, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.6513129472732544, |
| "rewards/margins": 0.49615031480789185, |
| "rewards/rejected": -1.147463321685791, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 4.992350353796136e-07, |
| "logits/chosen": -2.936190605163574, |
| "logits/rejected": -2.8864665031433105, |
| "logps/chosen": -251.67172241210938, |
| "logps/rejected": -241.36318969726562, |
| "loss": 0.4964, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.027965422719717026, |
| "rewards/margins": 1.3970218896865845, |
| "rewards/rejected": -1.3690563440322876, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.982788296041308e-07, |
| "logits/chosen": -2.9233040809631348, |
| "logits/rejected": -2.961263656616211, |
| "logps/chosen": -199.49600219726562, |
| "logps/rejected": -255.68612670898438, |
| "loss": 0.5966, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.13642588257789612, |
| "rewards/margins": 1.8362632989883423, |
| "rewards/rejected": -1.9726893901824951, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.31, |
| "eval_logits/chosen": -2.8986048698425293, |
| "eval_logits/rejected": -2.8777544498443604, |
| "eval_logps/chosen": -257.2279357910156, |
| "eval_logps/rejected": -273.46136474609375, |
| "eval_loss": 0.5963188409805298, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -0.29928162693977356, |
| "eval_rewards/margins": 1.349860429763794, |
| "eval_rewards/rejected": -1.6491420269012451, |
| "eval_runtime": 54.6151, |
| "eval_samples_per_second": 18.31, |
| "eval_steps_per_second": 0.293, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 4.973226238286479e-07, |
| "logits/chosen": -2.9391376972198486, |
| "logits/rejected": -2.9422051906585693, |
| "logps/chosen": -302.2151184082031, |
| "logps/rejected": -287.15606689453125, |
| "loss": 0.5368, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.4417892098426819, |
| "rewards/margins": 1.2578237056732178, |
| "rewards/rejected": -1.6996129751205444, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 4.96366418053165e-07, |
| "logits/chosen": -3.0650055408477783, |
| "logits/rejected": -3.0660297870635986, |
| "logps/chosen": -334.0442810058594, |
| "logps/rejected": -252.632080078125, |
| "loss": 0.5857, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 0.07565183192491531, |
| "rewards/margins": 0.7010248899459839, |
| "rewards/rejected": -0.6253730654716492, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.954102122776821e-07, |
| "logits/chosen": -2.918349027633667, |
| "logits/rejected": -2.8907716274261475, |
| "logps/chosen": -195.51907348632812, |
| "logps/rejected": -167.48745727539062, |
| "loss": 0.5561, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.20618407428264618, |
| "rewards/margins": 1.305176019668579, |
| "rewards/rejected": -1.5113601684570312, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 4.944540065021993e-07, |
| "logits/chosen": -2.8896799087524414, |
| "logits/rejected": -2.9980359077453613, |
| "logps/chosen": -264.43023681640625, |
| "logps/rejected": -230.43008422851562, |
| "loss": 0.5386, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.18526920676231384, |
| "rewards/margins": 1.8127784729003906, |
| "rewards/rejected": -1.9980475902557373, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.934978007267163e-07, |
| "logits/chosen": -2.959494113922119, |
| "logits/rejected": -2.982419967651367, |
| "logps/chosen": -242.4766082763672, |
| "logps/rejected": -274.0234680175781, |
| "loss": 0.5399, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.2960631847381592, |
| "rewards/margins": 1.0227611064910889, |
| "rewards/rejected": -1.318824291229248, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 4.925415949512335e-07, |
| "logits/chosen": -2.9986279010772705, |
| "logits/rejected": -2.966939926147461, |
| "logps/chosen": -328.2708435058594, |
| "logps/rejected": -273.13006591796875, |
| "loss": 0.5061, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.47610530257225037, |
| "rewards/margins": 1.19678795337677, |
| "rewards/rejected": -1.6728931665420532, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.915853891757506e-07, |
| "logits/chosen": -2.902583360671997, |
| "logits/rejected": -2.941610336303711, |
| "logps/chosen": -197.52853393554688, |
| "logps/rejected": -190.82029724121094, |
| "loss": 0.6096, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.34461653232574463, |
| "rewards/margins": 0.8742098808288574, |
| "rewards/rejected": -1.2188262939453125, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 4.906291834002677e-07, |
| "logits/chosen": -2.8424625396728516, |
| "logits/rejected": -2.791315793991089, |
| "logps/chosen": -273.73455810546875, |
| "logps/rejected": -229.92031860351562, |
| "loss": 0.5023, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6406866312026978, |
| "rewards/margins": 0.9865404367446899, |
| "rewards/rejected": -1.6272270679473877, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.896729776247848e-07, |
| "logits/chosen": -3.039944648742676, |
| "logits/rejected": -2.9114279747009277, |
| "logps/chosen": -344.2494201660156, |
| "logps/rejected": -179.3026580810547, |
| "loss": 0.4748, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.01536635123193264, |
| "rewards/margins": 1.3766069412231445, |
| "rewards/rejected": -1.3612406253814697, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 4.88716771849302e-07, |
| "logits/chosen": -2.9146389961242676, |
| "logits/rejected": -2.918255090713501, |
| "logps/chosen": -441.6365661621094, |
| "logps/rejected": -344.4063720703125, |
| "loss": 0.5014, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.2016751766204834, |
| "rewards/margins": 1.255171775817871, |
| "rewards/rejected": -1.456847071647644, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.36, |
| "eval_logits/chosen": -2.7868502140045166, |
| "eval_logits/rejected": -2.7659108638763428, |
| "eval_logps/chosen": -257.09423828125, |
| "eval_logps/rejected": -271.72039794921875, |
| "eval_loss": 0.5382026433944702, |
| "eval_rewards/accuracies": 0.75, |
| "eval_rewards/chosen": -0.28591296076774597, |
| "eval_rewards/margins": 1.189131736755371, |
| "eval_rewards/rejected": -1.4750447273254395, |
| "eval_runtime": 57.4875, |
| "eval_samples_per_second": 17.395, |
| "eval_steps_per_second": 0.278, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.87760566073819e-07, |
| "logits/chosen": -2.8412322998046875, |
| "logits/rejected": -2.9222323894500732, |
| "logps/chosen": -265.5148010253906, |
| "logps/rejected": -250.9593963623047, |
| "loss": 0.5254, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7364897131919861, |
| "rewards/margins": 1.0006908178329468, |
| "rewards/rejected": -1.7371807098388672, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 4.868043602983362e-07, |
| "logits/chosen": -2.9580254554748535, |
| "logits/rejected": -2.9545352458953857, |
| "logps/chosen": -275.90625, |
| "logps/rejected": -375.8464660644531, |
| "loss": 0.5059, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.17938612401485443, |
| "rewards/margins": 2.483105421066284, |
| "rewards/rejected": -2.662491798400879, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.858481545228533e-07, |
| "logits/chosen": -2.9665563106536865, |
| "logits/rejected": -2.962049722671509, |
| "logps/chosen": -315.9619140625, |
| "logps/rejected": -332.67608642578125, |
| "loss": 0.5005, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.3134005665779114, |
| "rewards/margins": 1.136232614517212, |
| "rewards/rejected": -1.4496333599090576, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 4.848919487473704e-07, |
| "logits/chosen": -2.9164295196533203, |
| "logits/rejected": -2.856682538986206, |
| "logps/chosen": -322.0476379394531, |
| "logps/rejected": -230.6309356689453, |
| "loss": 0.5886, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.5171477198600769, |
| "rewards/margins": 1.454939603805542, |
| "rewards/rejected": -1.9720872640609741, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.839357429718875e-07, |
| "logits/chosen": -2.884716510772705, |
| "logits/rejected": -2.9519991874694824, |
| "logps/chosen": -297.78839111328125, |
| "logps/rejected": -239.98959350585938, |
| "loss": 0.5356, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.3978341817855835, |
| "rewards/margins": 1.704395055770874, |
| "rewards/rejected": -2.102229356765747, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 4.829795371964047e-07, |
| "logits/chosen": -2.831848621368408, |
| "logits/rejected": -2.817645311355591, |
| "logps/chosen": -249.10452270507812, |
| "logps/rejected": -241.6534881591797, |
| "loss": 0.8766, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.2761456370353699, |
| "rewards/margins": 1.4581564664840698, |
| "rewards/rejected": -1.7343019247055054, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.820233314209217e-07, |
| "logits/chosen": -2.9512200355529785, |
| "logits/rejected": -2.9007842540740967, |
| "logps/chosen": -230.91299438476562, |
| "logps/rejected": -257.34375, |
| "loss": 0.5829, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.4074572026729584, |
| "rewards/margins": 1.2204868793487549, |
| "rewards/rejected": -1.627943992614746, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 4.810671256454389e-07, |
| "logits/chosen": -2.7580349445343018, |
| "logits/rejected": -2.8238117694854736, |
| "logps/chosen": -309.89202880859375, |
| "logps/rejected": -203.035400390625, |
| "loss": 0.4978, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.5273137092590332, |
| "rewards/margins": 1.8291202783584595, |
| "rewards/rejected": -2.3564341068267822, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.80110919869956e-07, |
| "logits/chosen": -2.884530544281006, |
| "logits/rejected": -2.9204657077789307, |
| "logps/chosen": -300.7388916015625, |
| "logps/rejected": -258.79180908203125, |
| "loss": 0.5672, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8420109748840332, |
| "rewards/margins": 1.2984743118286133, |
| "rewards/rejected": -2.1404852867126465, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 4.791547140944731e-07, |
| "logits/chosen": -2.904214859008789, |
| "logits/rejected": -2.9331746101379395, |
| "logps/chosen": -254.6400146484375, |
| "logps/rejected": -260.09088134765625, |
| "loss": 0.5334, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.006157719995826483, |
| "rewards/margins": 1.616097092628479, |
| "rewards/rejected": -1.6099392175674438, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.41, |
| "eval_logits/chosen": -2.7265069484710693, |
| "eval_logits/rejected": -2.705258846282959, |
| "eval_logps/chosen": -258.5242004394531, |
| "eval_logps/rejected": -275.9377746582031, |
| "eval_loss": 0.5677424669265747, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -0.4289108216762543, |
| "eval_rewards/margins": 1.4678754806518555, |
| "eval_rewards/rejected": -1.8967863321304321, |
| "eval_runtime": 55.1088, |
| "eval_samples_per_second": 18.146, |
| "eval_steps_per_second": 0.29, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.781985083189902e-07, |
| "logits/chosen": -2.8288321495056152, |
| "logits/rejected": -2.7795658111572266, |
| "logps/chosen": -183.28457641601562, |
| "logps/rejected": -241.27743530273438, |
| "loss": 0.557, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.7061542868614197, |
| "rewards/margins": 1.3368699550628662, |
| "rewards/rejected": -2.0430245399475098, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 4.772423025435074e-07, |
| "logits/chosen": -3.0099616050720215, |
| "logits/rejected": -2.973783016204834, |
| "logps/chosen": -186.28518676757812, |
| "logps/rejected": -266.48236083984375, |
| "loss": 0.6266, |
| "rewards/accuracies": 0.44999998807907104, |
| "rewards/chosen": -0.71001797914505, |
| "rewards/margins": -0.206703782081604, |
| "rewards/rejected": -0.5033141374588013, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.762860967680244e-07, |
| "logits/chosen": -2.9518191814422607, |
| "logits/rejected": -3.006854772567749, |
| "logps/chosen": -195.8343048095703, |
| "logps/rejected": -227.02340698242188, |
| "loss": 0.5728, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.15900671482086182, |
| "rewards/margins": 0.9850654602050781, |
| "rewards/rejected": -1.1440720558166504, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 4.7532989099254154e-07, |
| "logits/chosen": -3.009342670440674, |
| "logits/rejected": -3.0587260723114014, |
| "logps/chosen": -300.0588684082031, |
| "logps/rejected": -257.58203125, |
| "loss": 0.6019, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.6498911380767822, |
| "rewards/margins": 0.7764835953712463, |
| "rewards/rejected": -1.4263746738433838, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.7437368521705866e-07, |
| "logits/chosen": -2.902837038040161, |
| "logits/rejected": -2.8557207584381104, |
| "logps/chosen": -246.87142944335938, |
| "logps/rejected": -213.7313232421875, |
| "loss": 0.5167, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.49609607458114624, |
| "rewards/margins": 1.5304511785507202, |
| "rewards/rejected": -2.026547431945801, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 4.7341747944157577e-07, |
| "logits/chosen": -2.726759910583496, |
| "logits/rejected": -2.728843927383423, |
| "logps/chosen": -199.48330688476562, |
| "logps/rejected": -235.99014282226562, |
| "loss": 0.5803, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1567280292510986, |
| "rewards/margins": 1.015815258026123, |
| "rewards/rejected": -2.1725430488586426, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.724612736660929e-07, |
| "logits/chosen": -2.89784836769104, |
| "logits/rejected": -2.9295287132263184, |
| "logps/chosen": -265.757080078125, |
| "logps/rejected": -193.9804229736328, |
| "loss": 0.482, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.018481796607375145, |
| "rewards/margins": 2.08237361907959, |
| "rewards/rejected": -2.063891887664795, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 4.7150506789061006e-07, |
| "logits/chosen": -2.8157646656036377, |
| "logits/rejected": -2.831799268722534, |
| "logps/chosen": -235.80184936523438, |
| "logps/rejected": -296.13421630859375, |
| "loss": 0.5127, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5655059814453125, |
| "rewards/margins": 1.134603500366211, |
| "rewards/rejected": -1.7001097202301025, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.7054886211512717e-07, |
| "logits/chosen": -2.9849319458007812, |
| "logits/rejected": -2.9874143600463867, |
| "logps/chosen": -297.6209411621094, |
| "logps/rejected": -262.95428466796875, |
| "loss": 0.5396, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.07225757837295532, |
| "rewards/margins": 0.5653451085090637, |
| "rewards/rejected": -0.6376025676727295, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 4.695926563396443e-07, |
| "logits/chosen": -3.043614149093628, |
| "logits/rejected": -3.0626580715179443, |
| "logps/chosen": -243.42160034179688, |
| "logps/rejected": -207.7015838623047, |
| "loss": 0.5251, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.3596685528755188, |
| "rewards/margins": 1.1817331314086914, |
| "rewards/rejected": -1.5414015054702759, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.46, |
| "eval_logits/chosen": -2.8661580085754395, |
| "eval_logits/rejected": -2.846320629119873, |
| "eval_logps/chosen": -256.3507080078125, |
| "eval_logps/rejected": -270.0767822265625, |
| "eval_loss": 0.5772436261177063, |
| "eval_rewards/accuracies": 0.734375, |
| "eval_rewards/chosen": -0.21155984699726105, |
| "eval_rewards/margins": 1.0991249084472656, |
| "eval_rewards/rejected": -1.3106846809387207, |
| "eval_runtime": 58.614, |
| "eval_samples_per_second": 17.061, |
| "eval_steps_per_second": 0.273, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.686364505641614e-07, |
| "logits/chosen": -3.027421474456787, |
| "logits/rejected": -3.1281371116638184, |
| "logps/chosen": -293.70989990234375, |
| "logps/rejected": -189.66464233398438, |
| "loss": 0.4978, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.41769805550575256, |
| "rewards/margins": 1.199561357498169, |
| "rewards/rejected": -1.6172593832015991, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 4.676802447886785e-07, |
| "logits/chosen": -2.8584070205688477, |
| "logits/rejected": -2.882302761077881, |
| "logps/chosen": -256.07684326171875, |
| "logps/rejected": -255.83047485351562, |
| "loss": 0.5412, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.33531659841537476, |
| "rewards/margins": 0.8404865264892578, |
| "rewards/rejected": -1.1758031845092773, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 4.6672403901319564e-07, |
| "logits/chosen": -2.929386854171753, |
| "logits/rejected": -3.0053086280822754, |
| "logps/chosen": -316.0078125, |
| "logps/rejected": -171.36656188964844, |
| "loss": 0.4762, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.023432254791259766, |
| "rewards/margins": 2.0073459148406982, |
| "rewards/rejected": -1.9839136600494385, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.6576783323771275e-07, |
| "logits/chosen": -2.8826663494110107, |
| "logits/rejected": -2.8366870880126953, |
| "logps/chosen": -243.7962188720703, |
| "logps/rejected": -187.9961700439453, |
| "loss": 0.503, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.2530770003795624, |
| "rewards/margins": 1.1314551830291748, |
| "rewards/rejected": -1.3845322132110596, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 4.6481162746222987e-07, |
| "logits/chosen": -2.8165650367736816, |
| "logits/rejected": -2.9121110439300537, |
| "logps/chosen": -251.54098510742188, |
| "logps/rejected": -237.3175506591797, |
| "loss": 0.8784, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.2420404851436615, |
| "rewards/margins": 2.1679394245147705, |
| "rewards/rejected": -2.409980058670044, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.63855421686747e-07, |
| "logits/chosen": -2.813908100128174, |
| "logits/rejected": -2.8820648193359375, |
| "logps/chosen": -274.791748046875, |
| "logps/rejected": -240.8386993408203, |
| "loss": 0.5649, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.04944751411676407, |
| "rewards/margins": 1.3660838603973389, |
| "rewards/rejected": -1.4155313968658447, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 4.628992159112641e-07, |
| "logits/chosen": -2.7781646251678467, |
| "logits/rejected": -2.8930909633636475, |
| "logps/chosen": -328.9050598144531, |
| "logps/rejected": -236.53414916992188, |
| "loss": 0.5675, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.06258317828178406, |
| "rewards/margins": 1.6145604848861694, |
| "rewards/rejected": -1.6771436929702759, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.6194301013578116e-07, |
| "logits/chosen": -2.9083309173583984, |
| "logits/rejected": -2.824375629425049, |
| "logps/chosen": -287.6618347167969, |
| "logps/rejected": -230.19393920898438, |
| "loss": 0.5169, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.4656705856323242, |
| "rewards/margins": 1.6011505126953125, |
| "rewards/rejected": -2.0668210983276367, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 4.609868043602983e-07, |
| "logits/chosen": -2.937588691711426, |
| "logits/rejected": -2.896270275115967, |
| "logps/chosen": -275.5927734375, |
| "logps/rejected": -288.66680908203125, |
| "loss": 0.4917, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.19148771464824677, |
| "rewards/margins": 1.2772417068481445, |
| "rewards/rejected": -1.4687296152114868, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.600305985848154e-07, |
| "logits/chosen": -2.9878718852996826, |
| "logits/rejected": -3.0833239555358887, |
| "logps/chosen": -213.8026123046875, |
| "logps/rejected": -214.43362426757812, |
| "loss": 0.5205, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.5573463439941406, |
| "rewards/margins": 0.5783860087394714, |
| "rewards/rejected": -1.1357324123382568, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.52, |
| "eval_logits/chosen": -2.7978734970092773, |
| "eval_logits/rejected": -2.78934907913208, |
| "eval_logps/chosen": -258.027587890625, |
| "eval_logps/rejected": -275.55523681640625, |
| "eval_loss": 0.5262419581413269, |
| "eval_rewards/accuracies": 0.71875, |
| "eval_rewards/chosen": -0.37924808263778687, |
| "eval_rewards/margins": 1.479280710220337, |
| "eval_rewards/rejected": -1.858528733253479, |
| "eval_runtime": 57.7979, |
| "eval_samples_per_second": 17.302, |
| "eval_steps_per_second": 0.277, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 4.590743928093325e-07, |
| "logits/chosen": -2.8478896617889404, |
| "logits/rejected": -2.786147117614746, |
| "logps/chosen": -383.54327392578125, |
| "logps/rejected": -270.4455261230469, |
| "loss": 0.5498, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0184290409088135, |
| "rewards/margins": 0.9436414837837219, |
| "rewards/rejected": -1.9620707035064697, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.581181870338497e-07, |
| "logits/chosen": -2.8463029861450195, |
| "logits/rejected": -2.900444746017456, |
| "logps/chosen": -327.9524841308594, |
| "logps/rejected": -274.52862548828125, |
| "loss": 0.5847, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.27039843797683716, |
| "rewards/margins": 1.4945565462112427, |
| "rewards/rejected": -1.7649548053741455, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 4.571619812583668e-07, |
| "logits/chosen": -2.793391466140747, |
| "logits/rejected": -2.795802354812622, |
| "logps/chosen": -267.8859558105469, |
| "logps/rejected": -217.9220733642578, |
| "loss": 0.4481, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.3454091250896454, |
| "rewards/margins": 2.0243167877197266, |
| "rewards/rejected": -2.3697259426116943, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.562057754828839e-07, |
| "logits/chosen": -2.868319034576416, |
| "logits/rejected": -2.905986785888672, |
| "logps/chosen": -264.89349365234375, |
| "logps/rejected": -310.06231689453125, |
| "loss": 0.5553, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.33068472146987915, |
| "rewards/margins": 2.051600933074951, |
| "rewards/rejected": -2.3822855949401855, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 4.55249569707401e-07, |
| "logits/chosen": -2.834726572036743, |
| "logits/rejected": -2.9254660606384277, |
| "logps/chosen": -293.556884765625, |
| "logps/rejected": -234.21005249023438, |
| "loss": 0.5042, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.1473982334136963, |
| "rewards/margins": 1.1652801036834717, |
| "rewards/rejected": -2.312678337097168, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.5429336393191814e-07, |
| "logits/chosen": -2.7575011253356934, |
| "logits/rejected": -2.8820366859436035, |
| "logps/chosen": -322.6793518066406, |
| "logps/rejected": -208.82388305664062, |
| "loss": 0.6648, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.6877565383911133, |
| "rewards/margins": 0.889872670173645, |
| "rewards/rejected": -1.5776290893554688, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 4.5333715815643525e-07, |
| "logits/chosen": -2.8447697162628174, |
| "logits/rejected": -2.8715322017669678, |
| "logps/chosen": -386.28570556640625, |
| "logps/rejected": -268.4273376464844, |
| "loss": 0.5397, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.26172274351119995, |
| "rewards/margins": 1.4582087993621826, |
| "rewards/rejected": -1.7199318408966064, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.5238095238095237e-07, |
| "logits/chosen": -2.957181215286255, |
| "logits/rejected": -2.9968810081481934, |
| "logps/chosen": -263.70684814453125, |
| "logps/rejected": -233.9396209716797, |
| "loss": 0.5506, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.8327061533927917, |
| "rewards/margins": 0.7109770774841309, |
| "rewards/rejected": -1.5436832904815674, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 4.514247466054695e-07, |
| "logits/chosen": -2.9863791465759277, |
| "logits/rejected": -2.9521121978759766, |
| "logps/chosen": -245.79244995117188, |
| "logps/rejected": -189.44338989257812, |
| "loss": 0.5105, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.5945242643356323, |
| "rewards/margins": 1.325272560119629, |
| "rewards/rejected": -1.9197969436645508, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.504685408299866e-07, |
| "logits/chosen": -2.892086982727051, |
| "logits/rejected": -2.942537784576416, |
| "logps/chosen": -336.52685546875, |
| "logps/rejected": -304.50567626953125, |
| "loss": 0.5094, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.14167115092277527, |
| "rewards/margins": 0.7381815314292908, |
| "rewards/rejected": -0.8798527717590332, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.57, |
| "eval_logits/chosen": -2.753582715988159, |
| "eval_logits/rejected": -2.7452518939971924, |
| "eval_logps/chosen": -260.51361083984375, |
| "eval_logps/rejected": -276.33770751953125, |
| "eval_loss": 0.5432895421981812, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -0.6278508901596069, |
| "eval_rewards/margins": 1.3089274168014526, |
| "eval_rewards/rejected": -1.93677818775177, |
| "eval_runtime": 53.4701, |
| "eval_samples_per_second": 18.702, |
| "eval_steps_per_second": 0.299, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 4.495123350545037e-07, |
| "logits/chosen": -2.9081952571868896, |
| "logits/rejected": -2.9619812965393066, |
| "logps/chosen": -278.28076171875, |
| "logps/rejected": -240.11181640625, |
| "loss": 0.5065, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.323709636926651, |
| "rewards/margins": 0.9985870122909546, |
| "rewards/rejected": -1.3222965002059937, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.4855612927902083e-07, |
| "logits/chosen": -2.8727335929870605, |
| "logits/rejected": -2.8023390769958496, |
| "logps/chosen": -325.6292419433594, |
| "logps/rejected": -239.16049194335938, |
| "loss": 0.7095, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.41461220383644104, |
| "rewards/margins": 1.4964089393615723, |
| "rewards/rejected": -1.9110209941864014, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 4.4759992350353795e-07, |
| "logits/chosen": -2.8231780529022217, |
| "logits/rejected": -2.908735513687134, |
| "logps/chosen": -304.6755676269531, |
| "logps/rejected": -277.4778747558594, |
| "loss": 0.4456, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8432878255844116, |
| "rewards/margins": 1.6819578409194946, |
| "rewards/rejected": -2.5252456665039062, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.46643717728055e-07, |
| "logits/chosen": -2.7151193618774414, |
| "logits/rejected": -2.8067574501037598, |
| "logps/chosen": -280.8679504394531, |
| "logps/rejected": -273.8851318359375, |
| "loss": 0.5359, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.0236613750457764, |
| "rewards/margins": 1.5441632270812988, |
| "rewards/rejected": -2.567824125289917, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 4.4568751195257213e-07, |
| "logits/chosen": -2.776689052581787, |
| "logits/rejected": -2.8435564041137695, |
| "logps/chosen": -241.6661376953125, |
| "logps/rejected": -214.4073944091797, |
| "loss": 0.5624, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.6465439796447754, |
| "rewards/margins": 1.3678399324417114, |
| "rewards/rejected": -2.0143837928771973, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.447313061770893e-07, |
| "logits/chosen": -2.7138195037841797, |
| "logits/rejected": -2.585179328918457, |
| "logps/chosen": -335.81146240234375, |
| "logps/rejected": -350.88385009765625, |
| "loss": 0.54, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.47935476899147034, |
| "rewards/margins": 2.091104030609131, |
| "rewards/rejected": -2.5704588890075684, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 4.437751004016064e-07, |
| "logits/chosen": -2.958820104598999, |
| "logits/rejected": -2.9421515464782715, |
| "logps/chosen": -305.28839111328125, |
| "logps/rejected": -262.6142883300781, |
| "loss": 0.5226, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.1983194649219513, |
| "rewards/margins": 1.0410559177398682, |
| "rewards/rejected": -1.239375352859497, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.4281889462612353e-07, |
| "logits/chosen": -2.9157230854034424, |
| "logits/rejected": -2.9529147148132324, |
| "logps/chosen": -262.28411865234375, |
| "logps/rejected": -205.23324584960938, |
| "loss": 0.4879, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.35369348526000977, |
| "rewards/margins": 1.1171058416366577, |
| "rewards/rejected": -1.4707993268966675, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 4.4186268885064064e-07, |
| "logits/chosen": -3.012878894805908, |
| "logits/rejected": -2.9825873374938965, |
| "logps/chosen": -287.4079895019531, |
| "logps/rejected": -257.97772216796875, |
| "loss": 0.5481, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8515245318412781, |
| "rewards/margins": 0.8873499035835266, |
| "rewards/rejected": -1.7388744354248047, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.4090648307515776e-07, |
| "logits/chosen": -2.9880738258361816, |
| "logits/rejected": -2.9730162620544434, |
| "logps/chosen": -224.86508178710938, |
| "logps/rejected": -165.42733764648438, |
| "loss": 0.5837, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.528744101524353, |
| "rewards/margins": 0.9531749486923218, |
| "rewards/rejected": -1.4819190502166748, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.62, |
| "eval_logits/chosen": -2.775576591491699, |
| "eval_logits/rejected": -2.7643439769744873, |
| "eval_logps/chosen": -258.015380859375, |
| "eval_logps/rejected": -276.55419921875, |
| "eval_loss": 0.5348690152168274, |
| "eval_rewards/accuracies": 0.765625, |
| "eval_rewards/chosen": -0.37802520394325256, |
| "eval_rewards/margins": 1.5804035663604736, |
| "eval_rewards/rejected": -1.9584287405014038, |
| "eval_runtime": 62.7794, |
| "eval_samples_per_second": 15.929, |
| "eval_steps_per_second": 0.255, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 4.399502772996749e-07, |
| "logits/chosen": -3.065775156021118, |
| "logits/rejected": -2.972374677658081, |
| "logps/chosen": -329.2245178222656, |
| "logps/rejected": -304.1506042480469, |
| "loss": 0.4827, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5635257363319397, |
| "rewards/margins": 1.3332871198654175, |
| "rewards/rejected": -1.8968127965927124, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 4.38994071524192e-07, |
| "logits/chosen": -3.030421733856201, |
| "logits/rejected": -3.05527925491333, |
| "logps/chosen": -258.029541015625, |
| "logps/rejected": -280.21673583984375, |
| "loss": 0.5735, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8231660723686218, |
| "rewards/margins": 1.493502140045166, |
| "rewards/rejected": -2.3166680335998535, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.380378657487091e-07, |
| "logits/chosen": -3.074777364730835, |
| "logits/rejected": -3.0200257301330566, |
| "logps/chosen": -330.8500061035156, |
| "logps/rejected": -371.5386047363281, |
| "loss": 0.5982, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.35177409648895264, |
| "rewards/margins": 1.7098945379257202, |
| "rewards/rejected": -2.061668634414673, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 4.370816599732262e-07, |
| "logits/chosen": -2.999420166015625, |
| "logits/rejected": -2.9334309101104736, |
| "logps/chosen": -296.1866760253906, |
| "logps/rejected": -236.72341918945312, |
| "loss": 0.5337, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.419023871421814, |
| "rewards/margins": 1.311092734336853, |
| "rewards/rejected": -2.730116367340088, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.3612545419774334e-07, |
| "logits/chosen": -2.883204460144043, |
| "logits/rejected": -2.8073534965515137, |
| "logps/chosen": -257.63519287109375, |
| "logps/rejected": -301.64227294921875, |
| "loss": 0.5928, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.9009000658988953, |
| "rewards/margins": 1.269942045211792, |
| "rewards/rejected": -2.170842409133911, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 4.3516924842226045e-07, |
| "logits/chosen": -2.920656681060791, |
| "logits/rejected": -2.91890025138855, |
| "logps/chosen": -322.2228698730469, |
| "logps/rejected": -296.6404113769531, |
| "loss": 0.629, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9084477424621582, |
| "rewards/margins": 1.9311565160751343, |
| "rewards/rejected": -2.839603900909424, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.3421304264677757e-07, |
| "logits/chosen": -2.7664384841918945, |
| "logits/rejected": -2.7587597370147705, |
| "logps/chosen": -214.4353485107422, |
| "logps/rejected": -212.84805297851562, |
| "loss": 0.476, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.016667127609253, |
| "rewards/margins": 1.7192564010620117, |
| "rewards/rejected": -2.7359237670898438, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 4.332568368712947e-07, |
| "logits/chosen": -2.933134078979492, |
| "logits/rejected": -2.877431869506836, |
| "logps/chosen": -361.19573974609375, |
| "logps/rejected": -279.9434814453125, |
| "loss": 0.5784, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -1.5590813159942627, |
| "rewards/margins": 0.4831056594848633, |
| "rewards/rejected": -2.042186975479126, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.323006310958118e-07, |
| "logits/chosen": -2.904773235321045, |
| "logits/rejected": -2.985483169555664, |
| "logps/chosen": -411.599853515625, |
| "logps/rejected": -315.9092712402344, |
| "loss": 0.4975, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.908369243144989, |
| "rewards/margins": 1.5060293674468994, |
| "rewards/rejected": -2.414398670196533, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 4.313444253203289e-07, |
| "logits/chosen": -2.9397823810577393, |
| "logits/rejected": -2.9090006351470947, |
| "logps/chosen": -281.9468078613281, |
| "logps/rejected": -261.4234619140625, |
| "loss": 0.5214, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.9744187593460083, |
| "rewards/margins": 1.7388330698013306, |
| "rewards/rejected": -2.7132515907287598, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.67, |
| "eval_logits/chosen": -2.711259603500366, |
| "eval_logits/rejected": -2.698620080947876, |
| "eval_logps/chosen": -264.2903137207031, |
| "eval_logps/rejected": -279.27606201171875, |
| "eval_loss": 0.5732331871986389, |
| "eval_rewards/accuracies": 0.765625, |
| "eval_rewards/chosen": -1.0055204629898071, |
| "eval_rewards/margins": 1.2250933647155762, |
| "eval_rewards/rejected": -2.2306137084960938, |
| "eval_runtime": 57.0185, |
| "eval_samples_per_second": 17.538, |
| "eval_steps_per_second": 0.281, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.3038821954484603e-07, |
| "logits/chosen": -2.8764219284057617, |
| "logits/rejected": -2.7695984840393066, |
| "logps/chosen": -238.80453491210938, |
| "logps/rejected": -252.7805633544922, |
| "loss": 0.4686, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -1.3183587789535522, |
| "rewards/margins": 0.5890123248100281, |
| "rewards/rejected": -1.907371163368225, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 4.2943201376936315e-07, |
| "logits/chosen": -2.8463966846466064, |
| "logits/rejected": -2.850677967071533, |
| "logps/chosen": -291.9525146484375, |
| "logps/rejected": -315.1170349121094, |
| "loss": 0.5393, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8451521992683411, |
| "rewards/margins": 1.4508628845214844, |
| "rewards/rejected": -2.2960150241851807, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.2847580799388026e-07, |
| "logits/chosen": -2.8076231479644775, |
| "logits/rejected": -2.7472128868103027, |
| "logps/chosen": -258.0926513671875, |
| "logps/rejected": -188.78359985351562, |
| "loss": 0.5812, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.4854873418807983, |
| "rewards/margins": 0.9221324920654297, |
| "rewards/rejected": -2.4076199531555176, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 4.275196022183974e-07, |
| "logits/chosen": -2.7642879486083984, |
| "logits/rejected": -2.812042474746704, |
| "logps/chosen": -319.1858825683594, |
| "logps/rejected": -246.2572784423828, |
| "loss": 0.553, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.092181921005249, |
| "rewards/margins": 0.8022899627685547, |
| "rewards/rejected": -1.8944717645645142, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.265633964429145e-07, |
| "logits/chosen": -2.812278985977173, |
| "logits/rejected": -2.761359691619873, |
| "logps/chosen": -342.2608337402344, |
| "logps/rejected": -224.8918914794922, |
| "loss": 0.5775, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.914333701133728, |
| "rewards/margins": 1.487335205078125, |
| "rewards/rejected": -2.4016687870025635, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 4.256071906674316e-07, |
| "logits/chosen": -2.9929111003875732, |
| "logits/rejected": -2.94170880317688, |
| "logps/chosen": -379.921875, |
| "logps/rejected": -368.10357666015625, |
| "loss": 0.652, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6550450325012207, |
| "rewards/margins": 0.9407709240913391, |
| "rewards/rejected": -1.595815896987915, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.246509848919487e-07, |
| "logits/chosen": -2.912461757659912, |
| "logits/rejected": -2.9404354095458984, |
| "logps/chosen": -348.6622009277344, |
| "logps/rejected": -248.0426788330078, |
| "loss": 0.5636, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7293527722358704, |
| "rewards/margins": 1.5709936618804932, |
| "rewards/rejected": -2.3003463745117188, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 4.2369477911646584e-07, |
| "logits/chosen": -2.829761505126953, |
| "logits/rejected": -2.7305688858032227, |
| "logps/chosen": -295.06781005859375, |
| "logps/rejected": -240.6433563232422, |
| "loss": 0.5567, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.69425368309021, |
| "rewards/margins": 1.3369704484939575, |
| "rewards/rejected": -2.031224012374878, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 4.2273857334098296e-07, |
| "logits/chosen": -2.7433788776397705, |
| "logits/rejected": -2.67673921585083, |
| "logps/chosen": -289.397216796875, |
| "logps/rejected": -243.04833984375, |
| "loss": 0.6061, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.7310119867324829, |
| "rewards/margins": 0.585370659828186, |
| "rewards/rejected": -1.316382646560669, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 4.2178236756550007e-07, |
| "logits/chosen": -2.923424243927002, |
| "logits/rejected": -2.912429094314575, |
| "logps/chosen": -198.62017822265625, |
| "logps/rejected": -277.8341979980469, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.4593328535556793, |
| "rewards/margins": 0.9467372894287109, |
| "rewards/rejected": -1.4060701131820679, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.72, |
| "eval_logits/chosen": -2.727492094039917, |
| "eval_logits/rejected": -2.7166121006011963, |
| "eval_logps/chosen": -261.146728515625, |
| "eval_logps/rejected": -278.7448425292969, |
| "eval_loss": 0.5136687159538269, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -0.6911616921424866, |
| "eval_rewards/margins": 1.4863313436508179, |
| "eval_rewards/rejected": -2.177493095397949, |
| "eval_runtime": 58.5256, |
| "eval_samples_per_second": 17.087, |
| "eval_steps_per_second": 0.273, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.208261617900172e-07, |
| "logits/chosen": -2.9004664421081543, |
| "logits/rejected": -2.991079807281494, |
| "logps/chosen": -200.07357788085938, |
| "logps/rejected": -212.9990234375, |
| "loss": 0.4996, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.8260477185249329, |
| "rewards/margins": 1.3000409603118896, |
| "rewards/rejected": -2.1260886192321777, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 4.198699560145343e-07, |
| "logits/chosen": -2.750919818878174, |
| "logits/rejected": -2.563699245452881, |
| "logps/chosen": -229.60348510742188, |
| "logps/rejected": -250.4010467529297, |
| "loss": 0.6298, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8068568110466003, |
| "rewards/margins": 0.5274587869644165, |
| "rewards/rejected": -1.334315538406372, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 4.189137502390514e-07, |
| "logits/chosen": -2.871040105819702, |
| "logits/rejected": -2.859773635864258, |
| "logps/chosen": -255.4622344970703, |
| "logps/rejected": -367.54644775390625, |
| "loss": 0.6702, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.7843345999717712, |
| "rewards/margins": 1.0513083934783936, |
| "rewards/rejected": -1.8356430530548096, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 4.179575444635686e-07, |
| "logits/chosen": -2.797947406768799, |
| "logits/rejected": -2.768245220184326, |
| "logps/chosen": -297.70465087890625, |
| "logps/rejected": -303.483154296875, |
| "loss": 0.5911, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.02579927444458, |
| "rewards/margins": 1.1320759057998657, |
| "rewards/rejected": -2.1578750610351562, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.170013386880857e-07, |
| "logits/chosen": -2.842421531677246, |
| "logits/rejected": -2.816070318222046, |
| "logps/chosen": -286.8984069824219, |
| "logps/rejected": -256.8731689453125, |
| "loss": 0.4926, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8610552549362183, |
| "rewards/margins": 0.9260069727897644, |
| "rewards/rejected": -1.7870622873306274, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 4.1604513291260277e-07, |
| "logits/chosen": -2.814866542816162, |
| "logits/rejected": -2.7706210613250732, |
| "logps/chosen": -273.64111328125, |
| "logps/rejected": -259.51885986328125, |
| "loss": 0.5227, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.5814541578292847, |
| "rewards/margins": 2.4891767501831055, |
| "rewards/rejected": -3.0706310272216797, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.150889271371199e-07, |
| "logits/chosen": -2.7491848468780518, |
| "logits/rejected": -2.800107002258301, |
| "logps/chosen": -268.9418029785156, |
| "logps/rejected": -296.1412658691406, |
| "loss": 0.59, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.48690223693847656, |
| "rewards/margins": 1.39999520778656, |
| "rewards/rejected": -1.886897325515747, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 4.14132721361637e-07, |
| "logits/chosen": -2.7790863513946533, |
| "logits/rejected": -2.756493330001831, |
| "logps/chosen": -376.4327392578125, |
| "logps/rejected": -220.66128540039062, |
| "loss": 0.4328, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.6699414849281311, |
| "rewards/margins": 2.2528209686279297, |
| "rewards/rejected": -2.922762632369995, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.131765155861541e-07, |
| "logits/chosen": -2.732978343963623, |
| "logits/rejected": -2.712939739227295, |
| "logps/chosen": -216.78231811523438, |
| "logps/rejected": -187.97975158691406, |
| "loss": 0.5317, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.3067319989204407, |
| "rewards/margins": 1.883384346961975, |
| "rewards/rejected": -2.1901164054870605, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 4.1222030981067123e-07, |
| "logits/chosen": -2.7365033626556396, |
| "logits/rejected": -2.709888458251953, |
| "logps/chosen": -284.1839294433594, |
| "logps/rejected": -285.10064697265625, |
| "loss": 0.4655, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -1.2894710302352905, |
| "rewards/margins": 0.8915459513664246, |
| "rewards/rejected": -2.1810169219970703, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.77, |
| "eval_logits/chosen": -2.6837804317474365, |
| "eval_logits/rejected": -2.6651253700256348, |
| "eval_logps/chosen": -262.2220153808594, |
| "eval_logps/rejected": -279.8998718261719, |
| "eval_loss": 0.5090023875236511, |
| "eval_rewards/accuracies": 0.703125, |
| "eval_rewards/chosen": -0.7986923456192017, |
| "eval_rewards/margins": 1.494301438331604, |
| "eval_rewards/rejected": -2.2929937839508057, |
| "eval_runtime": 59.1398, |
| "eval_samples_per_second": 16.909, |
| "eval_steps_per_second": 0.271, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.1126410403518835e-07, |
| "logits/chosen": -2.774035930633545, |
| "logits/rejected": -2.5919785499572754, |
| "logps/chosen": -263.84185791015625, |
| "logps/rejected": -284.3238220214844, |
| "loss": 0.506, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6006399393081665, |
| "rewards/margins": 2.4321160316467285, |
| "rewards/rejected": -3.0327563285827637, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 4.1030789825970546e-07, |
| "logits/chosen": -2.834711790084839, |
| "logits/rejected": -2.8974971771240234, |
| "logps/chosen": -314.38604736328125, |
| "logps/rejected": -352.1858825683594, |
| "loss": 0.5863, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.9283245205879211, |
| "rewards/margins": 0.4295298457145691, |
| "rewards/rejected": -1.3578544855117798, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 4.093516924842226e-07, |
| "logits/chosen": -2.6712985038757324, |
| "logits/rejected": -2.6710007190704346, |
| "logps/chosen": -241.9701690673828, |
| "logps/rejected": -220.11502075195312, |
| "loss": 0.4433, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.21889445185661316, |
| "rewards/margins": 1.4537973403930664, |
| "rewards/rejected": -1.6726917028427124, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 4.083954867087397e-07, |
| "logits/chosen": -2.9339497089385986, |
| "logits/rejected": -2.9125781059265137, |
| "logps/chosen": -197.97679138183594, |
| "logps/rejected": -202.1653289794922, |
| "loss": 0.5284, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -1.0227124691009521, |
| "rewards/margins": 0.7052000761032104, |
| "rewards/rejected": -1.7279125452041626, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 4.074392809332568e-07, |
| "logits/chosen": -2.841710329055786, |
| "logits/rejected": -2.8297677040100098, |
| "logps/chosen": -316.0207824707031, |
| "logps/rejected": -229.83837890625, |
| "loss": 0.513, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8295713663101196, |
| "rewards/margins": 1.6421802043914795, |
| "rewards/rejected": -2.4717514514923096, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.064830751577739e-07, |
| "logits/chosen": -2.889648914337158, |
| "logits/rejected": -2.767516613006592, |
| "logps/chosen": -185.7052001953125, |
| "logps/rejected": -177.06546020507812, |
| "loss": 0.4782, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.2799871563911438, |
| "rewards/margins": 1.6918373107910156, |
| "rewards/rejected": -1.971824288368225, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 4.0552686938229104e-07, |
| "logits/chosen": -2.913878917694092, |
| "logits/rejected": -2.7892653942108154, |
| "logps/chosen": -242.6092071533203, |
| "logps/rejected": -218.4198760986328, |
| "loss": 0.4485, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.5770751237869263, |
| "rewards/margins": 1.0650291442871094, |
| "rewards/rejected": -1.6421045064926147, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.045706636068082e-07, |
| "logits/chosen": -2.769942045211792, |
| "logits/rejected": -2.8180408477783203, |
| "logps/chosen": -178.39805603027344, |
| "logps/rejected": -233.78713989257812, |
| "loss": 0.4683, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.6862732768058777, |
| "rewards/margins": 1.50155770778656, |
| "rewards/rejected": -2.187831163406372, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 4.036144578313253e-07, |
| "logits/chosen": -2.832733392715454, |
| "logits/rejected": -2.9456982612609863, |
| "logps/chosen": -258.8636779785156, |
| "logps/rejected": -223.64968872070312, |
| "loss": 0.5346, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5500979423522949, |
| "rewards/margins": 1.3681669235229492, |
| "rewards/rejected": -1.9182647466659546, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.0265825205584244e-07, |
| "logits/chosen": -2.820535182952881, |
| "logits/rejected": -2.8729701042175293, |
| "logps/chosen": -261.64056396484375, |
| "logps/rejected": -265.8938293457031, |
| "loss": 0.5731, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.2627449929714203, |
| "rewards/margins": 1.6428673267364502, |
| "rewards/rejected": -1.9056123495101929, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.83, |
| "eval_logits/chosen": -2.6727685928344727, |
| "eval_logits/rejected": -2.654268264770508, |
| "eval_logps/chosen": -262.48760986328125, |
| "eval_logps/rejected": -280.4902038574219, |
| "eval_loss": 0.5312163829803467, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -0.8252508044242859, |
| "eval_rewards/margins": 1.5267785787582397, |
| "eval_rewards/rejected": -2.352029323577881, |
| "eval_runtime": 58.1435, |
| "eval_samples_per_second": 17.199, |
| "eval_steps_per_second": 0.275, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 4.0170204628035956e-07, |
| "logits/chosen": -2.7514543533325195, |
| "logits/rejected": -2.8077378273010254, |
| "logps/chosen": -211.7623748779297, |
| "logps/rejected": -252.843994140625, |
| "loss": 0.494, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.7454391121864319, |
| "rewards/margins": 1.7308275699615479, |
| "rewards/rejected": -2.476266384124756, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 4.007458405048766e-07, |
| "logits/chosen": -2.857224225997925, |
| "logits/rejected": -2.839128017425537, |
| "logps/chosen": -282.80975341796875, |
| "logps/rejected": -277.6203308105469, |
| "loss": 0.4801, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9825541377067566, |
| "rewards/margins": 1.5490392446517944, |
| "rewards/rejected": -2.5315933227539062, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 3.9978963472939373e-07, |
| "logits/chosen": -2.838963031768799, |
| "logits/rejected": -2.808168411254883, |
| "logps/chosen": -291.43280029296875, |
| "logps/rejected": -260.65203857421875, |
| "loss": 0.48, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.7560056447982788, |
| "rewards/margins": 1.1872189044952393, |
| "rewards/rejected": -1.9432246685028076, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.9883342895391085e-07, |
| "logits/chosen": -2.815406084060669, |
| "logits/rejected": -2.7723686695098877, |
| "logps/chosen": -270.00689697265625, |
| "logps/rejected": -229.3192901611328, |
| "loss": 0.546, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.42303770780563354, |
| "rewards/margins": 2.0215001106262207, |
| "rewards/rejected": -2.4445383548736572, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 3.9787722317842796e-07, |
| "logits/chosen": -2.89304256439209, |
| "logits/rejected": -2.849522113800049, |
| "logps/chosen": -351.61968994140625, |
| "logps/rejected": -226.35800170898438, |
| "loss": 0.5355, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.039799273014068604, |
| "rewards/margins": 1.9888496398925781, |
| "rewards/rejected": -1.9490505456924438, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.969210174029451e-07, |
| "logits/chosen": -2.7271950244903564, |
| "logits/rejected": -2.7782604694366455, |
| "logps/chosen": -182.22679138183594, |
| "logps/rejected": -258.85784912109375, |
| "loss": 0.5878, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.30569443106651306, |
| "rewards/margins": 2.009462833404541, |
| "rewards/rejected": -2.315157175064087, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 3.959648116274622e-07, |
| "logits/chosen": -2.7675626277923584, |
| "logits/rejected": -2.7918038368225098, |
| "logps/chosen": -258.5990295410156, |
| "logps/rejected": -230.567138671875, |
| "loss": 0.6044, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.6162198781967163, |
| "rewards/margins": 1.2834047079086304, |
| "rewards/rejected": -1.8996245861053467, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.950086058519793e-07, |
| "logits/chosen": -2.922581195831299, |
| "logits/rejected": -2.8593482971191406, |
| "logps/chosen": -224.3607940673828, |
| "logps/rejected": -256.91510009765625, |
| "loss": 0.5717, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.4705128073692322, |
| "rewards/margins": 1.2131963968276978, |
| "rewards/rejected": -1.6837093830108643, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 3.9405240007649643e-07, |
| "logits/chosen": -2.9224143028259277, |
| "logits/rejected": -2.942783832550049, |
| "logps/chosen": -216.39816284179688, |
| "logps/rejected": -272.4553527832031, |
| "loss": 0.5066, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.5551620125770569, |
| "rewards/margins": 1.4193694591522217, |
| "rewards/rejected": -1.9745315313339233, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.9309619430101354e-07, |
| "logits/chosen": -2.8787567615509033, |
| "logits/rejected": -2.8499011993408203, |
| "logps/chosen": -241.4861297607422, |
| "logps/rejected": -210.9620361328125, |
| "loss": 0.5233, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7351746559143066, |
| "rewards/margins": 1.017896294593811, |
| "rewards/rejected": -1.7530708312988281, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.88, |
| "eval_logits/chosen": -2.709693431854248, |
| "eval_logits/rejected": -2.686978340148926, |
| "eval_logps/chosen": -258.8084411621094, |
| "eval_logps/rejected": -277.92047119140625, |
| "eval_loss": 0.5205972790718079, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -0.4573337435722351, |
| "eval_rewards/margins": 1.637721300125122, |
| "eval_rewards/rejected": -2.095055103302002, |
| "eval_runtime": 55.0835, |
| "eval_samples_per_second": 18.154, |
| "eval_steps_per_second": 0.29, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 3.9213998852553066e-07, |
| "logits/chosen": -2.900834798812866, |
| "logits/rejected": -2.703029155731201, |
| "logps/chosen": -338.3115539550781, |
| "logps/rejected": -326.6482238769531, |
| "loss": 0.6064, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.00930917263031, |
| "rewards/margins": 1.1334011554718018, |
| "rewards/rejected": -2.1427102088928223, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.9118378275004783e-07, |
| "logits/chosen": -2.888641119003296, |
| "logits/rejected": -2.817422389984131, |
| "logps/chosen": -339.2339172363281, |
| "logps/rejected": -273.8936462402344, |
| "loss": 0.5537, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8813110589981079, |
| "rewards/margins": 1.3287475109100342, |
| "rewards/rejected": -2.2100586891174316, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 3.9022757697456494e-07, |
| "logits/chosen": -2.739957094192505, |
| "logits/rejected": -2.78080677986145, |
| "logps/chosen": -356.21844482421875, |
| "logps/rejected": -331.55096435546875, |
| "loss": 0.5099, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.5665701627731323, |
| "rewards/margins": 1.0891746282577515, |
| "rewards/rejected": -1.6557449102401733, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.8927137119908206e-07, |
| "logits/chosen": -2.9097769260406494, |
| "logits/rejected": -2.848907947540283, |
| "logps/chosen": -305.66607666015625, |
| "logps/rejected": -211.9135284423828, |
| "loss": 0.487, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.30798807740211487, |
| "rewards/margins": 2.1856961250305176, |
| "rewards/rejected": -2.4936842918395996, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 3.883151654235992e-07, |
| "logits/chosen": -2.831984281539917, |
| "logits/rejected": -2.8497231006622314, |
| "logps/chosen": -256.02667236328125, |
| "logps/rejected": -253.0034942626953, |
| "loss": 0.5203, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.10425261408090591, |
| "rewards/margins": 1.9909296035766602, |
| "rewards/rejected": -2.095182180404663, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.873589596481163e-07, |
| "logits/chosen": -2.765052080154419, |
| "logits/rejected": -2.8637542724609375, |
| "logps/chosen": -267.7926025390625, |
| "logps/rejected": -239.08718872070312, |
| "loss": 0.5506, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.7126263976097107, |
| "rewards/margins": 0.9376744031906128, |
| "rewards/rejected": -1.6503007411956787, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 3.864027538726334e-07, |
| "logits/chosen": -2.8235788345336914, |
| "logits/rejected": -2.7703769207000732, |
| "logps/chosen": -227.66281127929688, |
| "logps/rejected": -227.4187469482422, |
| "loss": 0.5707, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.47516727447509766, |
| "rewards/margins": 1.8380186557769775, |
| "rewards/rejected": -2.313185691833496, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.8544654809715047e-07, |
| "logits/chosen": -2.840291976928711, |
| "logits/rejected": -2.889273166656494, |
| "logps/chosen": -289.7173156738281, |
| "logps/rejected": -289.07391357421875, |
| "loss": 0.5463, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.0754880905151367, |
| "rewards/margins": 1.31435227394104, |
| "rewards/rejected": -2.3898403644561768, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 3.844903423216676e-07, |
| "logits/chosen": -2.862814426422119, |
| "logits/rejected": -2.8419394493103027, |
| "logps/chosen": -256.5997619628906, |
| "logps/rejected": -221.2139434814453, |
| "loss": 0.5288, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6081517338752747, |
| "rewards/margins": 0.817557156085968, |
| "rewards/rejected": -1.4257088899612427, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.835341365461847e-07, |
| "logits/chosen": -2.836991786956787, |
| "logits/rejected": -2.78855562210083, |
| "logps/chosen": -302.5685119628906, |
| "logps/rejected": -237.11209106445312, |
| "loss": 0.5593, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.189387708902359, |
| "rewards/margins": 1.4565317630767822, |
| "rewards/rejected": -1.6459195613861084, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.93, |
| "eval_logits/chosen": -2.651865005493164, |
| "eval_logits/rejected": -2.6221344470977783, |
| "eval_logps/chosen": -259.7433166503906, |
| "eval_logps/rejected": -278.97027587890625, |
| "eval_loss": 0.523062527179718, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -0.5508205890655518, |
| "eval_rewards/margins": 1.649214267730713, |
| "eval_rewards/rejected": -2.2000348567962646, |
| "eval_runtime": 52.7667, |
| "eval_samples_per_second": 18.951, |
| "eval_steps_per_second": 0.303, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 3.825779307707018e-07, |
| "logits/chosen": -2.805868625640869, |
| "logits/rejected": -2.8683719635009766, |
| "logps/chosen": -165.62680053710938, |
| "logps/rejected": -188.5421600341797, |
| "loss": 0.5336, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.4086759090423584, |
| "rewards/margins": 1.5789250135421753, |
| "rewards/rejected": -1.9876010417938232, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.8162172499521893e-07, |
| "logits/chosen": -2.823812961578369, |
| "logits/rejected": -2.8563215732574463, |
| "logps/chosen": -220.8478546142578, |
| "logps/rejected": -212.088623046875, |
| "loss": 0.5532, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.7714720368385315, |
| "rewards/margins": 1.1308372020721436, |
| "rewards/rejected": -1.9023091793060303, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 3.8066551921973605e-07, |
| "logits/chosen": -2.772951126098633, |
| "logits/rejected": -2.741703510284424, |
| "logps/chosen": -282.5356750488281, |
| "logps/rejected": -204.34359741210938, |
| "loss": 0.4889, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.4399307668209076, |
| "rewards/margins": 1.7154382467269897, |
| "rewards/rejected": -2.1553690433502197, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 3.7970931344425316e-07, |
| "logits/chosen": -2.822625160217285, |
| "logits/rejected": -2.813814640045166, |
| "logps/chosen": -298.0512390136719, |
| "logps/rejected": -247.30172729492188, |
| "loss": 0.4864, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.5513595938682556, |
| "rewards/margins": 0.6814876198768616, |
| "rewards/rejected": -1.2328474521636963, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.787531076687703e-07, |
| "logits/chosen": -2.7317397594451904, |
| "logits/rejected": -2.7448277473449707, |
| "logps/chosen": -269.85760498046875, |
| "logps/rejected": -223.9963836669922, |
| "loss": 0.5273, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.41241344809532166, |
| "rewards/margins": 2.08837890625, |
| "rewards/rejected": -2.5007922649383545, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 3.7779690189328745e-07, |
| "logits/chosen": -2.812678813934326, |
| "logits/rejected": -2.8333404064178467, |
| "logps/chosen": -280.35247802734375, |
| "logps/rejected": -255.755126953125, |
| "loss": 0.4633, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.533105194568634, |
| "rewards/margins": 1.1613143682479858, |
| "rewards/rejected": -1.694419503211975, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.7684069611780456e-07, |
| "logits/chosen": -2.824018716812134, |
| "logits/rejected": -2.789066791534424, |
| "logps/chosen": -230.8186798095703, |
| "logps/rejected": -197.3396453857422, |
| "loss": 0.565, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8623602986335754, |
| "rewards/margins": 0.8971187472343445, |
| "rewards/rejected": -1.7594791650772095, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 3.758844903423217e-07, |
| "logits/chosen": -2.731091260910034, |
| "logits/rejected": -2.8013782501220703, |
| "logps/chosen": -261.1441650390625, |
| "logps/rejected": -285.99298095703125, |
| "loss": 0.5784, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -1.0055458545684814, |
| "rewards/margins": 1.3556736707687378, |
| "rewards/rejected": -2.3612194061279297, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.749282845668388e-07, |
| "logits/chosen": -2.979485034942627, |
| "logits/rejected": -2.8463826179504395, |
| "logps/chosen": -279.7747497558594, |
| "logps/rejected": -281.71881103515625, |
| "loss": 0.5474, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.5681439638137817, |
| "rewards/margins": 1.333145022392273, |
| "rewards/rejected": -1.9012889862060547, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 3.739720787913559e-07, |
| "logits/chosen": -2.8072428703308105, |
| "logits/rejected": -2.8444907665252686, |
| "logps/chosen": -318.6991882324219, |
| "logps/rejected": -286.9081726074219, |
| "loss": 0.4967, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.501719057559967, |
| "rewards/margins": 1.7894643545150757, |
| "rewards/rejected": -2.2911829948425293, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.98, |
| "eval_logits/chosen": -2.6878409385681152, |
| "eval_logits/rejected": -2.65635347366333, |
| "eval_logps/chosen": -259.57489013671875, |
| "eval_logps/rejected": -276.53948974609375, |
| "eval_loss": 0.528998613357544, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -0.5339791178703308, |
| "eval_rewards/margins": 1.4229780435562134, |
| "eval_rewards/rejected": -1.9569573402404785, |
| "eval_runtime": 58.1447, |
| "eval_samples_per_second": 17.198, |
| "eval_steps_per_second": 0.275, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.73015873015873e-07, |
| "logits/chosen": -2.8797740936279297, |
| "logits/rejected": -2.8204915523529053, |
| "logps/chosen": -275.71417236328125, |
| "logps/rejected": -218.75216674804688, |
| "loss": 0.497, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.2984248995780945, |
| "rewards/margins": 0.8872405886650085, |
| "rewards/rejected": -1.185665488243103, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 3.7205966724039014e-07, |
| "logits/chosen": -2.728538990020752, |
| "logits/rejected": -2.7202653884887695, |
| "logps/chosen": -326.10626220703125, |
| "logps/rejected": -258.46539306640625, |
| "loss": 0.4882, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.4980931878089905, |
| "rewards/margins": 1.183232069015503, |
| "rewards/rejected": -1.6813253164291382, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.711034614649072e-07, |
| "logits/chosen": -2.7934298515319824, |
| "logits/rejected": -2.7393717765808105, |
| "logps/chosen": -288.08892822265625, |
| "logps/rejected": -230.5961151123047, |
| "loss": 0.4772, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.8787330389022827, |
| "rewards/margins": 1.967394232749939, |
| "rewards/rejected": -2.8461270332336426, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 3.701472556894243e-07, |
| "logits/chosen": -2.742077350616455, |
| "logits/rejected": -2.6756367683410645, |
| "logps/chosen": -230.4021759033203, |
| "logps/rejected": -311.9582214355469, |
| "loss": 0.3813, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.0876777321100235, |
| "rewards/margins": 2.651371479034424, |
| "rewards/rejected": -2.7390494346618652, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.6919104991394144e-07, |
| "logits/chosen": -2.8869168758392334, |
| "logits/rejected": -2.870358943939209, |
| "logps/chosen": -180.28982543945312, |
| "logps/rejected": -222.55810546875, |
| "loss": 0.0952, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.38509073853492737, |
| "rewards/margins": 3.711843490600586, |
| "rewards/rejected": -3.3267529010772705, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.01, |
| "learning_rate": 3.6823484413845855e-07, |
| "logits/chosen": -2.7434253692626953, |
| "logits/rejected": -2.826244354248047, |
| "logps/chosen": -290.17999267578125, |
| "logps/rejected": -359.95318603515625, |
| "loss": 0.0795, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3888338804244995, |
| "rewards/margins": 6.838004112243652, |
| "rewards/rejected": -5.449170112609863, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.6727863836297567e-07, |
| "logits/chosen": -2.761378765106201, |
| "logits/rejected": -2.8128793239593506, |
| "logps/chosen": -196.734619140625, |
| "logps/rejected": -248.90444946289062, |
| "loss": 0.0896, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.7179908752441406, |
| "rewards/margins": 4.64093542098999, |
| "rewards/rejected": -3.9229445457458496, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.02, |
| "learning_rate": 3.663224325874928e-07, |
| "logits/chosen": -2.7781982421875, |
| "logits/rejected": -2.745850086212158, |
| "logps/chosen": -230.58413696289062, |
| "logps/rejected": -243.18405151367188, |
| "loss": 0.1075, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.2712101638317108, |
| "rewards/margins": 3.9006011486053467, |
| "rewards/rejected": -4.171811103820801, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.653662268120099e-07, |
| "logits/chosen": -2.69258975982666, |
| "logits/rejected": -2.718759775161743, |
| "logps/chosen": -263.413818359375, |
| "logps/rejected": -258.7752990722656, |
| "loss": 0.0765, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1363146305084229, |
| "rewards/margins": 5.461094856262207, |
| "rewards/rejected": -4.324779987335205, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.03, |
| "learning_rate": 3.6441002103652707e-07, |
| "logits/chosen": -2.7689287662506104, |
| "logits/rejected": -2.7180933952331543, |
| "logps/chosen": -184.6995391845703, |
| "logps/rejected": -219.4838409423828, |
| "loss": 0.0921, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8591575622558594, |
| "rewards/margins": 3.6370902061462402, |
| "rewards/rejected": -4.4962477684021, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.03, |
| "eval_logits/chosen": -2.634498119354248, |
| "eval_logits/rejected": -2.6040313243865967, |
| "eval_logps/chosen": -265.61114501953125, |
| "eval_logps/rejected": -288.5853576660156, |
| "eval_loss": 0.5368282198905945, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -1.1376045942306519, |
| "eval_rewards/margins": 2.023937463760376, |
| "eval_rewards/rejected": -3.1615419387817383, |
| "eval_runtime": 57.4706, |
| "eval_samples_per_second": 17.4, |
| "eval_steps_per_second": 0.278, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.634538152610442e-07, |
| "logits/chosen": -2.779973268508911, |
| "logits/rejected": -2.83324933052063, |
| "logps/chosen": -228.81320190429688, |
| "logps/rejected": -310.46905517578125, |
| "loss": 0.0672, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.3947059214115143, |
| "rewards/margins": 5.533167362213135, |
| "rewards/rejected": -5.138461112976074, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.04, |
| "learning_rate": 3.624976094855613e-07, |
| "logits/chosen": -2.7965312004089355, |
| "logits/rejected": -2.7170250415802, |
| "logps/chosen": -250.6485137939453, |
| "logps/rejected": -262.75152587890625, |
| "loss": 0.0705, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7664440870285034, |
| "rewards/margins": 7.152462959289551, |
| "rewards/rejected": -6.386018753051758, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.615414037100784e-07, |
| "logits/chosen": -2.6802003383636475, |
| "logits/rejected": -2.638892650604248, |
| "logps/chosen": -275.2335205078125, |
| "logps/rejected": -222.55178833007812, |
| "loss": 0.099, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.053186558187007904, |
| "rewards/margins": 4.206416606903076, |
| "rewards/rejected": -4.259603023529053, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.05, |
| "learning_rate": 3.6058519793459553e-07, |
| "logits/chosen": -2.795947790145874, |
| "logits/rejected": -2.8200221061706543, |
| "logps/chosen": -189.23316955566406, |
| "logps/rejected": -218.7393341064453, |
| "loss": 0.0855, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7773109674453735, |
| "rewards/margins": 5.873230457305908, |
| "rewards/rejected": -5.095919132232666, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.5962899215911265e-07, |
| "logits/chosen": -2.630261182785034, |
| "logits/rejected": -2.57206392288208, |
| "logps/chosen": -217.30972290039062, |
| "logps/rejected": -250.2468719482422, |
| "loss": 0.0676, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.0636889785528183, |
| "rewards/margins": 4.82668399810791, |
| "rewards/rejected": -4.8903727531433105, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.06, |
| "learning_rate": 3.5867278638362976e-07, |
| "logits/chosen": -2.601591110229492, |
| "logits/rejected": -2.6793808937072754, |
| "logps/chosen": -240.95700073242188, |
| "logps/rejected": -273.2774353027344, |
| "loss": 0.0734, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05607228726148605, |
| "rewards/margins": 5.447501182556152, |
| "rewards/rejected": -5.391427516937256, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.577165806081469e-07, |
| "logits/chosen": -2.7586188316345215, |
| "logits/rejected": -2.804933547973633, |
| "logps/chosen": -262.1355895996094, |
| "logps/rejected": -302.13507080078125, |
| "loss": 0.095, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5269735455513, |
| "rewards/margins": 6.823977470397949, |
| "rewards/rejected": -6.297003746032715, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.07, |
| "learning_rate": 3.56760374832664e-07, |
| "logits/chosen": -2.906247615814209, |
| "logits/rejected": -2.7534871101379395, |
| "logps/chosen": -250.2699737548828, |
| "logps/rejected": -261.11962890625, |
| "loss": 0.0438, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.4384024143218994, |
| "rewards/margins": 6.545752048492432, |
| "rewards/rejected": -5.107348442077637, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.5580416905718106e-07, |
| "logits/chosen": -2.655647039413452, |
| "logits/rejected": -2.770217180252075, |
| "logps/chosen": -294.7089538574219, |
| "logps/rejected": -271.32037353515625, |
| "loss": 0.1017, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.16056032478809357, |
| "rewards/margins": 4.826616287231445, |
| "rewards/rejected": -4.666056156158447, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.08, |
| "learning_rate": 3.5484796328169817e-07, |
| "logits/chosen": -2.8367366790771484, |
| "logits/rejected": -2.7900490760803223, |
| "logps/chosen": -235.642333984375, |
| "logps/rejected": -272.7825012207031, |
| "loss": 0.0733, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0952982902526855, |
| "rewards/margins": 5.964905261993408, |
| "rewards/rejected": -4.8696064949035645, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.08, |
| "eval_logits/chosen": -2.659477949142456, |
| "eval_logits/rejected": -2.628939151763916, |
| "eval_logps/chosen": -265.2799377441406, |
| "eval_logps/rejected": -291.42083740234375, |
| "eval_loss": 0.5452979803085327, |
| "eval_rewards/accuracies": 0.765625, |
| "eval_rewards/chosen": -1.1044831275939941, |
| "eval_rewards/margins": 2.3406097888946533, |
| "eval_rewards/rejected": -3.4450929164886475, |
| "eval_runtime": 55.5581, |
| "eval_samples_per_second": 17.999, |
| "eval_steps_per_second": 0.288, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.538917575062153e-07, |
| "logits/chosen": -2.700606346130371, |
| "logits/rejected": -2.6558995246887207, |
| "logps/chosen": -210.18374633789062, |
| "logps/rejected": -282.9020080566406, |
| "loss": 0.1171, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15502862632274628, |
| "rewards/margins": 5.092817783355713, |
| "rewards/rejected": -4.937788963317871, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.09, |
| "learning_rate": 3.529355517307324e-07, |
| "logits/chosen": -2.8686940670013428, |
| "logits/rejected": -2.740063190460205, |
| "logps/chosen": -269.2464904785156, |
| "logps/rejected": -366.86639404296875, |
| "loss": 0.0607, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0339075326919556, |
| "rewards/margins": 8.14382266998291, |
| "rewards/rejected": -7.109914302825928, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.519793459552495e-07, |
| "logits/chosen": -2.823467969894409, |
| "logits/rejected": -2.833052635192871, |
| "logps/chosen": -280.67706298828125, |
| "logps/rejected": -302.888671875, |
| "loss": 0.0767, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 0.975002646446228, |
| "rewards/margins": 6.0545806884765625, |
| "rewards/rejected": -5.079577445983887, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.1, |
| "learning_rate": 3.510231401797667e-07, |
| "logits/chosen": -2.9246203899383545, |
| "logits/rejected": -2.8968777656555176, |
| "logps/chosen": -409.8377380371094, |
| "logps/rejected": -376.01153564453125, |
| "loss": 0.0978, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6437445878982544, |
| "rewards/margins": 7.857232570648193, |
| "rewards/rejected": -7.2134881019592285, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 3.500669344042838e-07, |
| "logits/chosen": -2.8815579414367676, |
| "logits/rejected": -2.696906566619873, |
| "logps/chosen": -312.1972961425781, |
| "logps/rejected": -323.14031982421875, |
| "loss": 0.0847, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4120159149169922, |
| "rewards/margins": 6.041172027587891, |
| "rewards/rejected": -5.629156112670898, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.491107286288009e-07, |
| "logits/chosen": -2.869086265563965, |
| "logits/rejected": -2.794461250305176, |
| "logps/chosen": -230.935302734375, |
| "logps/rejected": -229.4247283935547, |
| "loss": 0.1155, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.5138686895370483, |
| "rewards/margins": 6.901867866516113, |
| "rewards/rejected": -5.387998580932617, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.12, |
| "learning_rate": 3.4815452285331803e-07, |
| "logits/chosen": -2.906574249267578, |
| "logits/rejected": -2.918184280395508, |
| "logps/chosen": -435.5089416503906, |
| "logps/rejected": -367.09820556640625, |
| "loss": 0.0949, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.4653266966342926, |
| "rewards/margins": 4.6506667137146, |
| "rewards/rejected": -4.18533992767334, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.4719831707783515e-07, |
| "logits/chosen": -2.6144165992736816, |
| "logits/rejected": -2.6169540882110596, |
| "logps/chosen": -259.0269470214844, |
| "logps/rejected": -253.59518432617188, |
| "loss": 0.1058, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8547903895378113, |
| "rewards/margins": 5.843932151794434, |
| "rewards/rejected": -4.989141941070557, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.13, |
| "learning_rate": 3.4624211130235227e-07, |
| "logits/chosen": -2.905179738998413, |
| "logits/rejected": -2.836651086807251, |
| "logps/chosen": -208.110107421875, |
| "logps/rejected": -241.3552703857422, |
| "loss": 0.086, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6082350015640259, |
| "rewards/margins": 6.723033905029297, |
| "rewards/rejected": -6.114798545837402, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.452859055268694e-07, |
| "logits/chosen": -2.7746129035949707, |
| "logits/rejected": -2.7575089931488037, |
| "logps/chosen": -204.41021728515625, |
| "logps/rejected": -263.98004150390625, |
| "loss": 0.0972, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.20659780502319336, |
| "rewards/margins": 5.700135707855225, |
| "rewards/rejected": -5.90673303604126, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.14, |
| "eval_logits/chosen": -2.670933246612549, |
| "eval_logits/rejected": -2.647088050842285, |
| "eval_logps/chosen": -271.1505126953125, |
| "eval_logps/rejected": -296.7934265136719, |
| "eval_loss": 0.557054877281189, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -1.6915401220321655, |
| "eval_rewards/margins": 2.2908077239990234, |
| "eval_rewards/rejected": -3.9823474884033203, |
| "eval_runtime": 55.8179, |
| "eval_samples_per_second": 17.915, |
| "eval_steps_per_second": 0.287, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.14, |
| "learning_rate": 3.443296997513865e-07, |
| "logits/chosen": -2.8264622688293457, |
| "logits/rejected": -2.802203416824341, |
| "logps/chosen": -317.52960205078125, |
| "logps/rejected": -342.2868957519531, |
| "loss": 0.0822, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 1.2196967601776123, |
| "rewards/margins": 7.658734321594238, |
| "rewards/rejected": -6.439038276672363, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.433734939759036e-07, |
| "logits/chosen": -2.8357937335968018, |
| "logits/rejected": -2.8530819416046143, |
| "logps/chosen": -235.8030548095703, |
| "logps/rejected": -277.0107116699219, |
| "loss": 0.1058, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.7737981677055359, |
| "rewards/margins": 6.4052414894104, |
| "rewards/rejected": -5.631443023681641, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.15, |
| "learning_rate": 3.4241728820042073e-07, |
| "logits/chosen": -2.7552199363708496, |
| "logits/rejected": -2.7428534030914307, |
| "logps/chosen": -227.3050079345703, |
| "logps/rejected": -270.3177185058594, |
| "loss": 0.121, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.6933164000511169, |
| "rewards/margins": 4.165007591247559, |
| "rewards/rejected": -4.858323574066162, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.4146108242493784e-07, |
| "logits/chosen": -2.7783877849578857, |
| "logits/rejected": -2.8269574642181396, |
| "logps/chosen": -289.098388671875, |
| "logps/rejected": -384.05474853515625, |
| "loss": 0.1735, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.44400936365127563, |
| "rewards/margins": 7.5069899559021, |
| "rewards/rejected": -7.062979698181152, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.16, |
| "learning_rate": 3.405048766494549e-07, |
| "logits/chosen": -2.576467990875244, |
| "logits/rejected": -2.569551706314087, |
| "logps/chosen": -264.085205078125, |
| "logps/rejected": -242.80126953125, |
| "loss": 0.2203, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.19903890788555145, |
| "rewards/margins": 5.752175331115723, |
| "rewards/rejected": -5.553135871887207, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.39548670873972e-07, |
| "logits/chosen": -2.780787944793701, |
| "logits/rejected": -2.7364470958709717, |
| "logps/chosen": -327.9325866699219, |
| "logps/rejected": -381.19915771484375, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.037799786776304245, |
| "rewards/margins": 5.7476677894592285, |
| "rewards/rejected": -5.785468101501465, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.17, |
| "learning_rate": 3.3859246509848914e-07, |
| "logits/chosen": -2.716096878051758, |
| "logits/rejected": -2.7619426250457764, |
| "logps/chosen": -270.20281982421875, |
| "logps/rejected": -309.69500732421875, |
| "loss": 0.0833, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1129147857427597, |
| "rewards/margins": 5.1096320152282715, |
| "rewards/rejected": -5.22254753112793, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.376362593230063e-07, |
| "logits/chosen": -2.633354902267456, |
| "logits/rejected": -2.5907938480377197, |
| "logps/chosen": -214.78662109375, |
| "logps/rejected": -303.368408203125, |
| "loss": 0.0682, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.21891649067401886, |
| "rewards/margins": 6.782713413238525, |
| "rewards/rejected": -6.5637969970703125, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.18, |
| "learning_rate": 3.366800535475234e-07, |
| "logits/chosen": -2.788681745529175, |
| "logits/rejected": -2.6701889038085938, |
| "logps/chosen": -367.4220886230469, |
| "logps/rejected": -265.5601501464844, |
| "loss": 0.0699, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.7071707248687744, |
| "rewards/margins": 8.423349380493164, |
| "rewards/rejected": -5.716177940368652, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.3572384777204054e-07, |
| "logits/chosen": -2.6153483390808105, |
| "logits/rejected": -2.575199842453003, |
| "logps/chosen": -398.6623229980469, |
| "logps/rejected": -343.8503112792969, |
| "loss": 0.1058, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5428920984268188, |
| "rewards/margins": 7.921414852142334, |
| "rewards/rejected": -7.3785223960876465, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.19, |
| "eval_logits/chosen": -2.5798184871673584, |
| "eval_logits/rejected": -2.5527260303497314, |
| "eval_logps/chosen": -264.8562927246094, |
| "eval_logps/rejected": -295.91058349609375, |
| "eval_loss": 0.5789377689361572, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -1.0621176958084106, |
| "eval_rewards/margins": 2.8319482803344727, |
| "eval_rewards/rejected": -3.8940658569335938, |
| "eval_runtime": 58.0073, |
| "eval_samples_per_second": 17.239, |
| "eval_steps_per_second": 0.276, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.19, |
| "learning_rate": 3.3476764199655765e-07, |
| "logits/chosen": -2.513836145401001, |
| "logits/rejected": -2.6243300437927246, |
| "logps/chosen": -219.6814422607422, |
| "logps/rejected": -218.55807495117188, |
| "loss": 0.1111, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.24565038084983826, |
| "rewards/margins": 4.5390119552612305, |
| "rewards/rejected": -4.293361663818359, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3.3381143622107477e-07, |
| "logits/chosen": -2.8270373344421387, |
| "logits/rejected": -2.7377943992614746, |
| "logps/chosen": -401.356201171875, |
| "logps/rejected": -353.20965576171875, |
| "loss": 0.058, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6988257169723511, |
| "rewards/margins": 6.860513210296631, |
| "rewards/rejected": -6.161687850952148, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.2, |
| "learning_rate": 3.328552304455919e-07, |
| "logits/chosen": -2.717745542526245, |
| "logits/rejected": -2.673698902130127, |
| "logps/chosen": -268.2499084472656, |
| "logps/rejected": -293.6933898925781, |
| "loss": 0.0778, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8076402544975281, |
| "rewards/margins": 7.105103969573975, |
| "rewards/rejected": -6.297463417053223, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 3.31899024670109e-07, |
| "logits/chosen": -2.759124517440796, |
| "logits/rejected": -2.744246006011963, |
| "logps/chosen": -315.86248779296875, |
| "logps/rejected": -262.46099853515625, |
| "loss": 0.0721, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.9767075777053833, |
| "rewards/margins": 4.7864885330200195, |
| "rewards/rejected": -5.7631964683532715, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.21, |
| "learning_rate": 3.309428188946261e-07, |
| "logits/chosen": -2.6659247875213623, |
| "logits/rejected": -2.627288341522217, |
| "logps/chosen": -165.9207305908203, |
| "logps/rejected": -166.2641143798828, |
| "loss": 0.1052, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.17963728308677673, |
| "rewards/margins": 4.340859413146973, |
| "rewards/rejected": -4.520496368408203, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 3.2998661311914323e-07, |
| "logits/chosen": -2.6454150676727295, |
| "logits/rejected": -2.5655908584594727, |
| "logps/chosen": -195.2259979248047, |
| "logps/rejected": -328.01806640625, |
| "loss": 0.0714, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6261202096939087, |
| "rewards/margins": 8.00461196899414, |
| "rewards/rejected": -7.3784918785095215, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.22, |
| "learning_rate": 3.2903040734366035e-07, |
| "logits/chosen": -2.7958927154541016, |
| "logits/rejected": -2.7633419036865234, |
| "logps/chosen": -299.79107666015625, |
| "logps/rejected": -378.6677551269531, |
| "loss": 0.0659, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1539552211761475, |
| "rewards/margins": 7.467595100402832, |
| "rewards/rejected": -6.31364107131958, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 3.2807420156817746e-07, |
| "logits/chosen": -2.7090749740600586, |
| "logits/rejected": -2.64817476272583, |
| "logps/chosen": -226.72525024414062, |
| "logps/rejected": -330.6046142578125, |
| "loss": 0.1011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.004512411542236805, |
| "rewards/margins": 5.72462272644043, |
| "rewards/rejected": -5.729135990142822, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.23, |
| "learning_rate": 3.271179957926946e-07, |
| "logits/chosen": -2.447643995285034, |
| "logits/rejected": -2.5474460124969482, |
| "logps/chosen": -325.9497985839844, |
| "logps/rejected": -339.540771484375, |
| "loss": 0.0895, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07166711986064911, |
| "rewards/margins": 5.96669864654541, |
| "rewards/rejected": -5.895030975341797, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 3.261617900172117e-07, |
| "logits/chosen": -2.672534465789795, |
| "logits/rejected": -2.7197232246398926, |
| "logps/chosen": -225.26168823242188, |
| "logps/rejected": -283.93212890625, |
| "loss": 0.2423, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.4908114969730377, |
| "rewards/margins": 6.257862567901611, |
| "rewards/rejected": -5.767050743103027, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.24, |
| "eval_logits/chosen": -2.5784032344818115, |
| "eval_logits/rejected": -2.5413780212402344, |
| "eval_logps/chosen": -266.1980895996094, |
| "eval_logps/rejected": -292.55987548828125, |
| "eval_loss": 0.545539379119873, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -1.1962969303131104, |
| "eval_rewards/margins": 2.362699508666992, |
| "eval_rewards/rejected": -3.5589966773986816, |
| "eval_runtime": 57.2058, |
| "eval_samples_per_second": 17.481, |
| "eval_steps_per_second": 0.28, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.24, |
| "learning_rate": 3.2520558424172876e-07, |
| "logits/chosen": -2.705650568008423, |
| "logits/rejected": -2.6275668144226074, |
| "logps/chosen": -204.61109924316406, |
| "logps/rejected": -228.20059204101562, |
| "loss": 0.0638, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12385628372430801, |
| "rewards/margins": 5.060498237609863, |
| "rewards/rejected": -5.184354782104492, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 3.242493784662459e-07, |
| "logits/chosen": -2.7630136013031006, |
| "logits/rejected": -2.7599825859069824, |
| "logps/chosen": -271.2209777832031, |
| "logps/rejected": -319.4446716308594, |
| "loss": 0.0914, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.31576937437057495, |
| "rewards/margins": 5.5094099044799805, |
| "rewards/rejected": -5.193641185760498, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 3.2329317269076304e-07, |
| "logits/chosen": -2.7317633628845215, |
| "logits/rejected": -2.655245780944824, |
| "logps/chosen": -273.3720397949219, |
| "logps/rejected": -324.91253662109375, |
| "loss": 0.0998, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.1897385120391846, |
| "rewards/margins": 7.156263828277588, |
| "rewards/rejected": -5.966525077819824, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 3.2233696691528016e-07, |
| "logits/chosen": -2.7852416038513184, |
| "logits/rejected": -2.7786805629730225, |
| "logps/chosen": -261.6937255859375, |
| "logps/rejected": -367.239501953125, |
| "loss": 0.0981, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2177461385726929, |
| "rewards/margins": 8.014082908630371, |
| "rewards/rejected": -6.796337127685547, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.26, |
| "learning_rate": 3.2138076113979727e-07, |
| "logits/chosen": -2.648454427719116, |
| "logits/rejected": -2.6311562061309814, |
| "logps/chosen": -240.67056274414062, |
| "logps/rejected": -267.9673156738281, |
| "loss": 0.0978, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.28647559881210327, |
| "rewards/margins": 6.354050636291504, |
| "rewards/rejected": -6.067575454711914, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.27, |
| "learning_rate": 3.204245553643144e-07, |
| "logits/chosen": -2.7582898139953613, |
| "logits/rejected": -2.830904722213745, |
| "logps/chosen": -309.43853759765625, |
| "logps/rejected": -350.77618408203125, |
| "loss": 0.098, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6567636728286743, |
| "rewards/margins": 6.094487190246582, |
| "rewards/rejected": -5.437722682952881, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 3.194683495888315e-07, |
| "logits/chosen": -2.784090518951416, |
| "logits/rejected": -2.7697300910949707, |
| "logps/chosen": -277.5585021972656, |
| "logps/rejected": -286.3648986816406, |
| "loss": 0.1034, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.03639407083392143, |
| "rewards/margins": 6.042351245880127, |
| "rewards/rejected": -6.078745365142822, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 3.185121438133486e-07, |
| "logits/chosen": -2.758202075958252, |
| "logits/rejected": -2.7667107582092285, |
| "logps/chosen": -282.9512023925781, |
| "logps/rejected": -355.96319580078125, |
| "loss": 0.1263, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.692094087600708, |
| "rewards/margins": 7.221386909484863, |
| "rewards/rejected": -6.529292106628418, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 3.1755593803786574e-07, |
| "logits/chosen": -2.6361289024353027, |
| "logits/rejected": -2.656646966934204, |
| "logps/chosen": -201.61358642578125, |
| "logps/rejected": -313.6552734375, |
| "loss": 0.0946, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.4261380434036255, |
| "rewards/margins": 5.699584484100342, |
| "rewards/rejected": -7.125722408294678, |
| "step": 2490 |
| }, |
| { |
| "epoch": 1.29, |
| "learning_rate": 3.1659973226238285e-07, |
| "logits/chosen": -2.6961874961853027, |
| "logits/rejected": -2.658639669418335, |
| "logps/chosen": -205.56558227539062, |
| "logps/rejected": -260.9989013671875, |
| "loss": 0.1177, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.1720048189163208, |
| "rewards/margins": 3.783812999725342, |
| "rewards/rejected": -4.955817222595215, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.29, |
| "eval_logits/chosen": -2.518930435180664, |
| "eval_logits/rejected": -2.480231285095215, |
| "eval_logps/chosen": -272.3760681152344, |
| "eval_logps/rejected": -300.9119567871094, |
| "eval_loss": 0.5888839960098267, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -1.814096450805664, |
| "eval_rewards/margins": 2.580104112625122, |
| "eval_rewards/rejected": -4.394200325012207, |
| "eval_runtime": 58.8794, |
| "eval_samples_per_second": 16.984, |
| "eval_steps_per_second": 0.272, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 3.1564352648689997e-07, |
| "logits/chosen": -2.7483344078063965, |
| "logits/rejected": -2.7376341819763184, |
| "logps/chosen": -269.5032653808594, |
| "logps/rejected": -253.87051391601562, |
| "loss": 0.1018, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.7494356632232666, |
| "rewards/margins": 5.012850761413574, |
| "rewards/rejected": -4.263415336608887, |
| "step": 2510 |
| }, |
| { |
| "epoch": 1.3, |
| "learning_rate": 3.146873207114171e-07, |
| "logits/chosen": -2.7938504219055176, |
| "logits/rejected": -2.7508413791656494, |
| "logps/chosen": -274.398193359375, |
| "logps/rejected": -306.8814697265625, |
| "loss": 0.1026, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5945212244987488, |
| "rewards/margins": 7.071564674377441, |
| "rewards/rejected": -6.477043151855469, |
| "step": 2520 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 3.137311149359342e-07, |
| "logits/chosen": -2.858582019805908, |
| "logits/rejected": -2.723261833190918, |
| "logps/chosen": -274.79425048828125, |
| "logps/rejected": -379.57501220703125, |
| "loss": 0.0912, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.21533474326133728, |
| "rewards/margins": 5.66866397857666, |
| "rewards/rejected": -5.883998870849609, |
| "step": 2530 |
| }, |
| { |
| "epoch": 1.31, |
| "learning_rate": 3.127749091604513e-07, |
| "logits/chosen": -2.8357625007629395, |
| "logits/rejected": -2.814939260482788, |
| "logps/chosen": -232.6085662841797, |
| "logps/rejected": -294.39849853515625, |
| "loss": 0.0899, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.3493742048740387, |
| "rewards/margins": 6.373709678649902, |
| "rewards/rejected": -6.723084449768066, |
| "step": 2540 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 3.1181870338496843e-07, |
| "logits/chosen": -2.679546594619751, |
| "logits/rejected": -2.7267134189605713, |
| "logps/chosen": -209.9823760986328, |
| "logps/rejected": -287.78826904296875, |
| "loss": 0.091, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.18076567351818085, |
| "rewards/margins": 5.487452983856201, |
| "rewards/rejected": -5.306687831878662, |
| "step": 2550 |
| }, |
| { |
| "epoch": 1.32, |
| "learning_rate": 3.108624976094856e-07, |
| "logits/chosen": -2.5259623527526855, |
| "logits/rejected": -2.6586062908172607, |
| "logps/chosen": -365.71258544921875, |
| "logps/rejected": -242.0026397705078, |
| "loss": 0.1047, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6775692701339722, |
| "rewards/margins": 5.460636615753174, |
| "rewards/rejected": -4.783067226409912, |
| "step": 2560 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 3.0990629183400266e-07, |
| "logits/chosen": -2.5484824180603027, |
| "logits/rejected": -2.580888509750366, |
| "logps/chosen": -270.9229736328125, |
| "logps/rejected": -282.5039367675781, |
| "loss": 0.1062, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.9959784746170044, |
| "rewards/margins": 7.229222297668457, |
| "rewards/rejected": -6.233242988586426, |
| "step": 2570 |
| }, |
| { |
| "epoch": 1.33, |
| "learning_rate": 3.089500860585198e-07, |
| "logits/chosen": -2.762617588043213, |
| "logits/rejected": -2.739429235458374, |
| "logps/chosen": -200.62588500976562, |
| "logps/rejected": -308.99127197265625, |
| "loss": 0.1209, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.03968176990747452, |
| "rewards/margins": 7.24197244644165, |
| "rewards/rejected": -7.2022905349731445, |
| "step": 2580 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 3.079938802830369e-07, |
| "logits/chosen": -2.7819771766662598, |
| "logits/rejected": -2.755398750305176, |
| "logps/chosen": -207.9453582763672, |
| "logps/rejected": -235.0533447265625, |
| "loss": 0.086, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6877096891403198, |
| "rewards/margins": 5.369903564453125, |
| "rewards/rejected": -6.057612895965576, |
| "step": 2590 |
| }, |
| { |
| "epoch": 1.34, |
| "learning_rate": 3.07037674507554e-07, |
| "logits/chosen": -2.706509828567505, |
| "logits/rejected": -2.741109848022461, |
| "logps/chosen": -328.0285949707031, |
| "logps/rejected": -378.04339599609375, |
| "loss": 0.1213, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.15584062039852142, |
| "rewards/margins": 7.148020267486572, |
| "rewards/rejected": -6.992179870605469, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.34, |
| "eval_logits/chosen": -2.5206711292266846, |
| "eval_logits/rejected": -2.4774041175842285, |
| "eval_logps/chosen": -268.8435974121094, |
| "eval_logps/rejected": -295.39013671875, |
| "eval_loss": 0.5683205723762512, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -1.4608486890792847, |
| "eval_rewards/margins": 2.3811748027801514, |
| "eval_rewards/rejected": -3.8420238494873047, |
| "eval_runtime": 59.9277, |
| "eval_samples_per_second": 16.687, |
| "eval_steps_per_second": 0.267, |
| "step": 2600 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 3.060814687320711e-07, |
| "logits/chosen": -2.6932194232940674, |
| "logits/rejected": -2.6989266872406006, |
| "logps/chosen": -347.0301818847656, |
| "logps/rejected": -320.0285949707031, |
| "loss": 0.0986, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 1.9022884368896484, |
| "rewards/margins": 8.200953483581543, |
| "rewards/rejected": -6.2986650466918945, |
| "step": 2610 |
| }, |
| { |
| "epoch": 1.35, |
| "learning_rate": 3.0512526295658824e-07, |
| "logits/chosen": -2.5327630043029785, |
| "logits/rejected": -2.500969409942627, |
| "logps/chosen": -234.2447509765625, |
| "logps/rejected": -247.3339080810547, |
| "loss": 0.094, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.20838475227355957, |
| "rewards/margins": 5.5928425788879395, |
| "rewards/rejected": -5.80122709274292, |
| "step": 2620 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 3.0416905718110536e-07, |
| "logits/chosen": -2.600487232208252, |
| "logits/rejected": -2.682471990585327, |
| "logps/chosen": -246.78024291992188, |
| "logps/rejected": -246.34237670898438, |
| "loss": 0.1632, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.0733012929558754, |
| "rewards/margins": 5.1650261878967285, |
| "rewards/rejected": -5.0917253494262695, |
| "step": 2630 |
| }, |
| { |
| "epoch": 1.36, |
| "learning_rate": 3.0321285140562247e-07, |
| "logits/chosen": -2.7498373985290527, |
| "logits/rejected": -2.6727969646453857, |
| "logps/chosen": -197.46665954589844, |
| "logps/rejected": -293.0552062988281, |
| "loss": 0.087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.277592420578003, |
| "rewards/margins": 4.636383533477783, |
| "rewards/rejected": -5.913976192474365, |
| "step": 2640 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 3.022566456301396e-07, |
| "logits/chosen": -2.568721294403076, |
| "logits/rejected": -2.7006120681762695, |
| "logps/chosen": -296.3185119628906, |
| "logps/rejected": -307.19818115234375, |
| "loss": 0.1191, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.03509577363729477, |
| "rewards/margins": 6.352346897125244, |
| "rewards/rejected": -6.387442111968994, |
| "step": 2650 |
| }, |
| { |
| "epoch": 1.37, |
| "learning_rate": 3.013004398546567e-07, |
| "logits/chosen": -2.590341091156006, |
| "logits/rejected": -2.6296682357788086, |
| "logps/chosen": -299.8781433105469, |
| "logps/rejected": -309.7999572753906, |
| "loss": 0.1044, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.5305342078208923, |
| "rewards/margins": 5.682303428649902, |
| "rewards/rejected": -6.212838172912598, |
| "step": 2660 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 3.003442340791738e-07, |
| "logits/chosen": -2.69757080078125, |
| "logits/rejected": -2.7322795391082764, |
| "logps/chosen": -250.77490234375, |
| "logps/rejected": -287.36785888671875, |
| "loss": 0.0781, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.4853235185146332, |
| "rewards/margins": 6.51000452041626, |
| "rewards/rejected": -6.995328426361084, |
| "step": 2670 |
| }, |
| { |
| "epoch": 1.38, |
| "learning_rate": 2.9938802830369093e-07, |
| "logits/chosen": -2.7183175086975098, |
| "logits/rejected": -2.7362000942230225, |
| "logps/chosen": -189.50961303710938, |
| "logps/rejected": -314.11114501953125, |
| "loss": 0.1049, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.7947381734848022, |
| "rewards/margins": 6.518137454986572, |
| "rewards/rejected": -7.312876224517822, |
| "step": 2680 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.9843182252820805e-07, |
| "logits/chosen": -2.5106515884399414, |
| "logits/rejected": -2.5575432777404785, |
| "logps/chosen": -249.78012084960938, |
| "logps/rejected": -237.69677734375, |
| "loss": 0.0708, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.6967580914497375, |
| "rewards/margins": 5.879612922668457, |
| "rewards/rejected": -6.576371192932129, |
| "step": 2690 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 2.974756167527252e-07, |
| "logits/chosen": -2.7255759239196777, |
| "logits/rejected": -2.62638521194458, |
| "logps/chosen": -315.8297424316406, |
| "logps/rejected": -245.0868682861328, |
| "loss": 0.0889, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14952102303504944, |
| "rewards/margins": 5.201340675354004, |
| "rewards/rejected": -5.3508620262146, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.39, |
| "eval_logits/chosen": -2.452204704284668, |
| "eval_logits/rejected": -2.4123356342315674, |
| "eval_logps/chosen": -270.2423400878906, |
| "eval_logps/rejected": -294.30682373046875, |
| "eval_loss": 0.5890262126922607, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -1.6007238626480103, |
| "eval_rewards/margins": 2.132964849472046, |
| "eval_rewards/rejected": -3.7336881160736084, |
| "eval_runtime": 60.2724, |
| "eval_samples_per_second": 16.591, |
| "eval_steps_per_second": 0.265, |
| "step": 2700 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.9651941097724233e-07, |
| "logits/chosen": -2.6822290420532227, |
| "logits/rejected": -2.7052135467529297, |
| "logps/chosen": -278.01422119140625, |
| "logps/rejected": -339.3485412597656, |
| "loss": 0.1087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.15647803246974945, |
| "rewards/margins": 5.597909450531006, |
| "rewards/rejected": -5.754388332366943, |
| "step": 2710 |
| }, |
| { |
| "epoch": 1.4, |
| "learning_rate": 2.9556320520175945e-07, |
| "logits/chosen": -2.5796897411346436, |
| "logits/rejected": -2.6120152473449707, |
| "logps/chosen": -276.00408935546875, |
| "logps/rejected": -235.9673614501953, |
| "loss": 0.1325, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -1.0651065111160278, |
| "rewards/margins": 4.378058433532715, |
| "rewards/rejected": -5.443163871765137, |
| "step": 2720 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.946069994262765e-07, |
| "logits/chosen": -2.682631015777588, |
| "logits/rejected": -2.5832624435424805, |
| "logps/chosen": -325.86883544921875, |
| "logps/rejected": -448.3667907714844, |
| "loss": 0.1033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.8688300848007202, |
| "rewards/margins": 9.84605598449707, |
| "rewards/rejected": -7.977224826812744, |
| "step": 2730 |
| }, |
| { |
| "epoch": 1.41, |
| "learning_rate": 2.9365079365079363e-07, |
| "logits/chosen": -2.656704902648926, |
| "logits/rejected": -2.6298129558563232, |
| "logps/chosen": -312.0522766113281, |
| "logps/rejected": -264.58172607421875, |
| "loss": 0.0869, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.325814425945282, |
| "rewards/margins": 7.053065299987793, |
| "rewards/rejected": -6.727250099182129, |
| "step": 2740 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.9269458787531074e-07, |
| "logits/chosen": -2.7410786151885986, |
| "logits/rejected": -2.60438871383667, |
| "logps/chosen": -338.72125244140625, |
| "logps/rejected": -300.7152099609375, |
| "loss": 0.099, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.48889246582984924, |
| "rewards/margins": 5.851205348968506, |
| "rewards/rejected": -5.3623127937316895, |
| "step": 2750 |
| }, |
| { |
| "epoch": 1.42, |
| "learning_rate": 2.9173838209982786e-07, |
| "logits/chosen": -2.5442605018615723, |
| "logits/rejected": -2.396920680999756, |
| "logps/chosen": -212.8131561279297, |
| "logps/rejected": -245.8048858642578, |
| "loss": 0.0924, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.18661096692085266, |
| "rewards/margins": 5.660151481628418, |
| "rewards/rejected": -5.846762657165527, |
| "step": 2760 |
| }, |
| { |
| "epoch": 1.43, |
| "learning_rate": 2.90782176324345e-07, |
| "logits/chosen": -2.770378589630127, |
| "logits/rejected": -2.652078628540039, |
| "logps/chosen": -284.2225036621094, |
| "logps/rejected": -237.8290557861328, |
| "loss": 0.0933, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5430759787559509, |
| "rewards/margins": 5.237969398498535, |
| "rewards/rejected": -5.781044960021973, |
| "step": 2770 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.898259705488621e-07, |
| "logits/chosen": -2.763434410095215, |
| "logits/rejected": -2.824732542037964, |
| "logps/chosen": -295.1107482910156, |
| "logps/rejected": -265.9899597167969, |
| "loss": 0.0896, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.034651029855012894, |
| "rewards/margins": 6.801316261291504, |
| "rewards/rejected": -6.835967063903809, |
| "step": 2780 |
| }, |
| { |
| "epoch": 1.44, |
| "learning_rate": 2.888697647733792e-07, |
| "logits/chosen": -2.626591920852661, |
| "logits/rejected": -2.693389892578125, |
| "logps/chosen": -382.92559814453125, |
| "logps/rejected": -343.2526550292969, |
| "loss": 0.1328, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3407518267631531, |
| "rewards/margins": 8.370372772216797, |
| "rewards/rejected": -8.029620170593262, |
| "step": 2790 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.879135589978963e-07, |
| "logits/chosen": -2.7098212242126465, |
| "logits/rejected": -2.6440200805664062, |
| "logps/chosen": -237.72042846679688, |
| "logps/rejected": -263.23858642578125, |
| "loss": 0.0995, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1682741343975067, |
| "rewards/margins": 6.354408264160156, |
| "rewards/rejected": -6.186134338378906, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.45, |
| "eval_logits/chosen": -2.505031108856201, |
| "eval_logits/rejected": -2.4685418605804443, |
| "eval_logps/chosen": -269.7538146972656, |
| "eval_logps/rejected": -295.3314514160156, |
| "eval_loss": 0.6072700023651123, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -1.5518717765808105, |
| "eval_rewards/margins": 2.2842793464660645, |
| "eval_rewards/rejected": -3.836151123046875, |
| "eval_runtime": 55.9165, |
| "eval_samples_per_second": 17.884, |
| "eval_steps_per_second": 0.286, |
| "step": 2800 |
| }, |
| { |
| "epoch": 1.45, |
| "learning_rate": 2.8695735322241344e-07, |
| "logits/chosen": -2.6192431449890137, |
| "logits/rejected": -2.5992114543914795, |
| "logps/chosen": -243.22531127929688, |
| "logps/rejected": -259.7867431640625, |
| "loss": 0.1528, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.13970229029655457, |
| "rewards/margins": 6.5014328956604, |
| "rewards/rejected": -6.641134738922119, |
| "step": 2810 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.8600114744693055e-07, |
| "logits/chosen": -2.5777206420898438, |
| "logits/rejected": -2.595568895339966, |
| "logps/chosen": -315.70513916015625, |
| "logps/rejected": -328.4643859863281, |
| "loss": 0.0978, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 1.2178268432617188, |
| "rewards/margins": 7.306548118591309, |
| "rewards/rejected": -6.08872127532959, |
| "step": 2820 |
| }, |
| { |
| "epoch": 1.46, |
| "learning_rate": 2.8504494167144767e-07, |
| "logits/chosen": -2.5762603282928467, |
| "logits/rejected": -2.6186347007751465, |
| "logps/chosen": -338.2220153808594, |
| "logps/rejected": -372.440673828125, |
| "loss": 0.1064, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.40805625915527344, |
| "rewards/margins": 8.335431098937988, |
| "rewards/rejected": -7.927374362945557, |
| "step": 2830 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.8408873589596484e-07, |
| "logits/chosen": -2.6718220710754395, |
| "logits/rejected": -2.6635639667510986, |
| "logps/chosen": -187.11207580566406, |
| "logps/rejected": -261.17236328125, |
| "loss": 0.0828, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.6262374520301819, |
| "rewards/margins": 5.5064697265625, |
| "rewards/rejected": -6.132707595825195, |
| "step": 2840 |
| }, |
| { |
| "epoch": 1.47, |
| "learning_rate": 2.8313253012048195e-07, |
| "logits/chosen": -2.549769163131714, |
| "logits/rejected": -2.656653881072998, |
| "logps/chosen": -244.2962188720703, |
| "logps/rejected": -360.3494567871094, |
| "loss": 0.1782, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -1.2172654867172241, |
| "rewards/margins": 5.140130043029785, |
| "rewards/rejected": -6.357396125793457, |
| "step": 2850 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.8217632434499907e-07, |
| "logits/chosen": -2.645021915435791, |
| "logits/rejected": -2.6408421993255615, |
| "logps/chosen": -283.7989807128906, |
| "logps/rejected": -308.847900390625, |
| "loss": 0.1242, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1975909173488617, |
| "rewards/margins": 6.074164867401123, |
| "rewards/rejected": -6.271755695343018, |
| "step": 2860 |
| }, |
| { |
| "epoch": 1.48, |
| "learning_rate": 2.812201185695162e-07, |
| "logits/chosen": -2.6722216606140137, |
| "logits/rejected": -2.614084482192993, |
| "logps/chosen": -170.52699279785156, |
| "logps/rejected": -282.1553039550781, |
| "loss": 0.0736, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22432120144367218, |
| "rewards/margins": 6.5738677978515625, |
| "rewards/rejected": -6.349545955657959, |
| "step": 2870 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.802639127940333e-07, |
| "logits/chosen": -2.706528425216675, |
| "logits/rejected": -2.6337788105010986, |
| "logps/chosen": -313.04913330078125, |
| "logps/rejected": -283.40972900390625, |
| "loss": 0.0995, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.12990444898605347, |
| "rewards/margins": 6.201463222503662, |
| "rewards/rejected": -6.071558475494385, |
| "step": 2880 |
| }, |
| { |
| "epoch": 1.49, |
| "learning_rate": 2.7930770701855036e-07, |
| "logits/chosen": -2.7476582527160645, |
| "logits/rejected": -2.683351755142212, |
| "logps/chosen": -351.7320251464844, |
| "logps/rejected": -329.08026123046875, |
| "loss": 0.1173, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.49377602338790894, |
| "rewards/margins": 8.726078987121582, |
| "rewards/rejected": -8.23230266571045, |
| "step": 2890 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.783515012430675e-07, |
| "logits/chosen": -2.5259041786193848, |
| "logits/rejected": -2.5326766967773438, |
| "logps/chosen": -190.27813720703125, |
| "logps/rejected": -241.18991088867188, |
| "loss": 0.1145, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.45352381467819214, |
| "rewards/margins": 5.041954517364502, |
| "rewards/rejected": -5.495478630065918, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_logits/chosen": -2.4674015045166016, |
| "eval_logits/rejected": -2.42722749710083, |
| "eval_logps/chosen": -272.1744384765625, |
| "eval_logps/rejected": -299.8460998535156, |
| "eval_loss": 0.579024076461792, |
| "eval_rewards/accuracies": 0.84375, |
| "eval_rewards/chosen": -1.7939329147338867, |
| "eval_rewards/margins": 2.4936835765838623, |
| "eval_rewards/rejected": -4.28761625289917, |
| "eval_runtime": 57.5798, |
| "eval_samples_per_second": 17.367, |
| "eval_steps_per_second": 0.278, |
| "step": 2900 |
| }, |
| { |
| "epoch": 1.5, |
| "learning_rate": 2.773952954675846e-07, |
| "logits/chosen": -2.6517531871795654, |
| "logits/rejected": -2.611769914627075, |
| "logps/chosen": -357.39666748046875, |
| "logps/rejected": -310.7156677246094, |
| "loss": 0.086, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 1.0798923969268799, |
| "rewards/margins": 8.055770874023438, |
| "rewards/rejected": -6.9758782386779785, |
| "step": 2910 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.764390896921017e-07, |
| "logits/chosen": -2.603874921798706, |
| "logits/rejected": -2.5526695251464844, |
| "logps/chosen": -272.8443298339844, |
| "logps/rejected": -329.92401123046875, |
| "loss": 0.0943, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.03761887550354, |
| "rewards/margins": 8.095129013061523, |
| "rewards/rejected": -7.057511329650879, |
| "step": 2920 |
| }, |
| { |
| "epoch": 1.51, |
| "learning_rate": 2.754828839166188e-07, |
| "logits/chosen": -2.6576075553894043, |
| "logits/rejected": -2.6514670848846436, |
| "logps/chosen": -191.14877319335938, |
| "logps/rejected": -301.5423889160156, |
| "loss": 0.0724, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.5387217402458191, |
| "rewards/margins": 7.507475852966309, |
| "rewards/rejected": -6.968753814697266, |
| "step": 2930 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.7452667814113594e-07, |
| "logits/chosen": -2.7524516582489014, |
| "logits/rejected": -2.7706661224365234, |
| "logps/chosen": -318.32501220703125, |
| "logps/rejected": -316.9727783203125, |
| "loss": 0.087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.01138849277049303, |
| "rewards/margins": 5.306549072265625, |
| "rewards/rejected": -5.295160293579102, |
| "step": 2940 |
| }, |
| { |
| "epoch": 1.52, |
| "learning_rate": 2.7357047236565306e-07, |
| "logits/chosen": -2.699721097946167, |
| "logits/rejected": -2.7687458992004395, |
| "logps/chosen": -280.2265319824219, |
| "logps/rejected": -364.947509765625, |
| "loss": 0.0659, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.4582802355289459, |
| "rewards/margins": 6.130402088165283, |
| "rewards/rejected": -5.672121047973633, |
| "step": 2950 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.7261426659017017e-07, |
| "logits/chosen": -2.7413382530212402, |
| "logits/rejected": -2.7660346031188965, |
| "logps/chosen": -324.0436706542969, |
| "logps/rejected": -243.81802368164062, |
| "loss": 0.0914, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.6909480094909668, |
| "rewards/margins": 4.682136535644531, |
| "rewards/rejected": -5.373085021972656, |
| "step": 2960 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 2.716580608146873e-07, |
| "logits/chosen": -2.646226644515991, |
| "logits/rejected": -2.712257146835327, |
| "logps/chosen": -374.819580078125, |
| "logps/rejected": -351.70025634765625, |
| "loss": 0.0677, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3280370235443115, |
| "rewards/margins": 9.090957641601562, |
| "rewards/rejected": -7.762920379638672, |
| "step": 2970 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.7070185503920446e-07, |
| "logits/chosen": -2.639868974685669, |
| "logits/rejected": -2.7557711601257324, |
| "logps/chosen": -273.087646484375, |
| "logps/rejected": -317.41766357421875, |
| "loss": 0.0616, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6701494455337524, |
| "rewards/margins": 7.305191993713379, |
| "rewards/rejected": -7.975341796875, |
| "step": 2980 |
| }, |
| { |
| "epoch": 1.54, |
| "learning_rate": 2.6974564926372157e-07, |
| "logits/chosen": -2.759113311767578, |
| "logits/rejected": -2.7275261878967285, |
| "logps/chosen": -301.2916259765625, |
| "logps/rejected": -324.0910949707031, |
| "loss": 0.0916, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.8304478526115417, |
| "rewards/margins": 7.585775852203369, |
| "rewards/rejected": -6.755328178405762, |
| "step": 2990 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.687894434882387e-07, |
| "logits/chosen": -2.653514862060547, |
| "logits/rejected": -2.606902599334717, |
| "logps/chosen": -277.50506591796875, |
| "logps/rejected": -264.87481689453125, |
| "loss": 0.0644, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.18126115202903748, |
| "rewards/margins": 4.916709899902344, |
| "rewards/rejected": -5.097971439361572, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.55, |
| "eval_logits/chosen": -2.457401990890503, |
| "eval_logits/rejected": -2.4193201065063477, |
| "eval_logps/chosen": -271.5200500488281, |
| "eval_logps/rejected": -299.0209045410156, |
| "eval_loss": 0.5735086798667908, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -1.7284938097000122, |
| "eval_rewards/margins": 2.476605176925659, |
| "eval_rewards/rejected": -4.205099105834961, |
| "eval_runtime": 58.4864, |
| "eval_samples_per_second": 17.098, |
| "eval_steps_per_second": 0.274, |
| "step": 3000 |
| }, |
| { |
| "epoch": 1.55, |
| "learning_rate": 2.678332377127558e-07, |
| "logits/chosen": -2.693279981613159, |
| "logits/rejected": -2.6434133052825928, |
| "logps/chosen": -257.072509765625, |
| "logps/rejected": -268.16107177734375, |
| "loss": 0.0794, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.660453736782074, |
| "rewards/margins": 7.455300807952881, |
| "rewards/rejected": -6.7948479652404785, |
| "step": 3010 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.668770319372729e-07, |
| "logits/chosen": -2.476691484451294, |
| "logits/rejected": -2.369554042816162, |
| "logps/chosen": -243.014892578125, |
| "logps/rejected": -179.72573852539062, |
| "loss": 0.0769, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.28263527154922485, |
| "rewards/margins": 4.549951076507568, |
| "rewards/rejected": -4.832587242126465, |
| "step": 3020 |
| }, |
| { |
| "epoch": 1.56, |
| "learning_rate": 2.6592082616179004e-07, |
| "logits/chosen": -2.799598217010498, |
| "logits/rejected": -2.6863772869110107, |
| "logps/chosen": -202.5391082763672, |
| "logps/rejected": -217.33743286132812, |
| "loss": 0.0956, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2968635559082031, |
| "rewards/margins": 5.342751979827881, |
| "rewards/rejected": -5.639615535736084, |
| "step": 3030 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.649646203863071e-07, |
| "logits/chosen": -2.78475022315979, |
| "logits/rejected": -2.7314937114715576, |
| "logps/chosen": -341.8492126464844, |
| "logps/rejected": -377.26318359375, |
| "loss": 0.0786, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.6281161308288574, |
| "rewards/margins": 6.323546409606934, |
| "rewards/rejected": -5.695430278778076, |
| "step": 3040 |
| }, |
| { |
| "epoch": 1.57, |
| "learning_rate": 2.640084146108242e-07, |
| "logits/chosen": -2.835313558578491, |
| "logits/rejected": -2.8478219509124756, |
| "logps/chosen": -413.6333923339844, |
| "logps/rejected": -281.8817138671875, |
| "loss": 0.0753, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.3795287013053894, |
| "rewards/margins": 6.382612705230713, |
| "rewards/rejected": -6.003084659576416, |
| "step": 3050 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.6305220883534133e-07, |
| "logits/chosen": -2.7521536350250244, |
| "logits/rejected": -2.802274227142334, |
| "logps/chosen": -283.74365234375, |
| "logps/rejected": -274.3045654296875, |
| "loss": 0.0746, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.19368138909339905, |
| "rewards/margins": 5.350792407989502, |
| "rewards/rejected": -5.157111167907715, |
| "step": 3060 |
| }, |
| { |
| "epoch": 1.58, |
| "learning_rate": 2.6209600305985845e-07, |
| "logits/chosen": -2.6364054679870605, |
| "logits/rejected": -2.6233391761779785, |
| "logps/chosen": -320.7242736816406, |
| "logps/rejected": -418.53863525390625, |
| "loss": 0.1234, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.2693870961666107, |
| "rewards/margins": 10.055575370788574, |
| "rewards/rejected": -9.786188125610352, |
| "step": 3070 |
| }, |
| { |
| "epoch": 1.59, |
| "learning_rate": 2.6113979728437556e-07, |
| "logits/chosen": -2.757228374481201, |
| "logits/rejected": -2.746696949005127, |
| "logps/chosen": -278.10736083984375, |
| "logps/rejected": -269.76751708984375, |
| "loss": 0.1059, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.18973210453987122, |
| "rewards/margins": 5.467267036437988, |
| "rewards/rejected": -5.2775349617004395, |
| "step": 3080 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.601835915088927e-07, |
| "logits/chosen": -2.6811797618865967, |
| "logits/rejected": -2.694938898086548, |
| "logps/chosen": -268.3731689453125, |
| "logps/rejected": -316.51507568359375, |
| "loss": 0.0925, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0646179914474487, |
| "rewards/margins": 7.384450435638428, |
| "rewards/rejected": -6.319832801818848, |
| "step": 3090 |
| }, |
| { |
| "epoch": 1.6, |
| "learning_rate": 2.592273857334098e-07, |
| "logits/chosen": -2.8825931549072266, |
| "logits/rejected": -2.838369607925415, |
| "logps/chosen": -272.46466064453125, |
| "logps/rejected": -302.24951171875, |
| "loss": 0.0798, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7096697092056274, |
| "rewards/margins": 8.205864906311035, |
| "rewards/rejected": -6.496194362640381, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.6, |
| "eval_logits/chosen": -2.569591760635376, |
| "eval_logits/rejected": -2.536669969558716, |
| "eval_logps/chosen": -271.4610290527344, |
| "eval_logps/rejected": -299.8199768066406, |
| "eval_loss": 0.5536529421806335, |
| "eval_rewards/accuracies": 0.84375, |
| "eval_rewards/chosen": -1.7225927114486694, |
| "eval_rewards/margins": 2.5624139308929443, |
| "eval_rewards/rejected": -4.285006523132324, |
| "eval_runtime": 56.0043, |
| "eval_samples_per_second": 17.856, |
| "eval_steps_per_second": 0.286, |
| "step": 3100 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.582711799579269e-07, |
| "logits/chosen": -2.8248748779296875, |
| "logits/rejected": -2.778346300125122, |
| "logps/chosen": -299.2210693359375, |
| "logps/rejected": -313.359375, |
| "loss": 0.071, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.7604249715805054, |
| "rewards/margins": 7.902795314788818, |
| "rewards/rejected": -7.142370700836182, |
| "step": 3110 |
| }, |
| { |
| "epoch": 1.61, |
| "learning_rate": 2.573149741824441e-07, |
| "logits/chosen": -2.6978442668914795, |
| "logits/rejected": -2.6833174228668213, |
| "logps/chosen": -336.5847473144531, |
| "logps/rejected": -245.4396209716797, |
| "loss": 0.0829, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.08581381291151047, |
| "rewards/margins": 5.277584075927734, |
| "rewards/rejected": -5.191770076751709, |
| "step": 3120 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.563587684069612e-07, |
| "logits/chosen": -2.8688364028930664, |
| "logits/rejected": -2.7075347900390625, |
| "logps/chosen": -350.5228576660156, |
| "logps/rejected": -273.2851867675781, |
| "loss": 0.0564, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7785223126411438, |
| "rewards/margins": 6.459234714508057, |
| "rewards/rejected": -7.237756252288818, |
| "step": 3130 |
| }, |
| { |
| "epoch": 1.62, |
| "learning_rate": 2.554025626314783e-07, |
| "logits/chosen": -2.727123737335205, |
| "logits/rejected": -2.725803852081299, |
| "logps/chosen": -344.9717712402344, |
| "logps/rejected": -390.8403625488281, |
| "loss": 0.0898, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9268060922622681, |
| "rewards/margins": 6.525388240814209, |
| "rewards/rejected": -7.4521942138671875, |
| "step": 3140 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.544463568559954e-07, |
| "logits/chosen": -2.786041259765625, |
| "logits/rejected": -2.7138454914093018, |
| "logps/chosen": -323.46832275390625, |
| "logps/rejected": -323.85125732421875, |
| "loss": 0.0678, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7023747563362122, |
| "rewards/margins": 8.29463005065918, |
| "rewards/rejected": -7.592255592346191, |
| "step": 3150 |
| }, |
| { |
| "epoch": 1.63, |
| "learning_rate": 2.5349015108051254e-07, |
| "logits/chosen": -2.623403787612915, |
| "logits/rejected": -2.6345882415771484, |
| "logps/chosen": -251.9879913330078, |
| "logps/rejected": -317.69769287109375, |
| "loss": 0.0833, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3217948377132416, |
| "rewards/margins": 7.894297122955322, |
| "rewards/rejected": -7.572502136230469, |
| "step": 3160 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.5253394530502966e-07, |
| "logits/chosen": -2.483633518218994, |
| "logits/rejected": -2.636124610900879, |
| "logps/chosen": -375.053955078125, |
| "logps/rejected": -289.378662109375, |
| "loss": 0.0582, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.537722647190094, |
| "rewards/margins": 7.012340545654297, |
| "rewards/rejected": -6.474618434906006, |
| "step": 3170 |
| }, |
| { |
| "epoch": 1.64, |
| "learning_rate": 2.5157773952954677e-07, |
| "logits/chosen": -2.746309757232666, |
| "logits/rejected": -2.692573070526123, |
| "logps/chosen": -342.6100158691406, |
| "logps/rejected": -319.15850830078125, |
| "loss": 0.1194, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.26776519417762756, |
| "rewards/margins": 7.538763523101807, |
| "rewards/rejected": -7.270998477935791, |
| "step": 3180 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.506215337540639e-07, |
| "logits/chosen": -2.842471122741699, |
| "logits/rejected": -2.7346935272216797, |
| "logps/chosen": -357.83837890625, |
| "logps/rejected": -415.0469665527344, |
| "loss": 0.1034, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20541605353355408, |
| "rewards/margins": 6.793099880218506, |
| "rewards/rejected": -6.587684631347656, |
| "step": 3190 |
| }, |
| { |
| "epoch": 1.65, |
| "learning_rate": 2.4966532797858095e-07, |
| "logits/chosen": -2.6495633125305176, |
| "logits/rejected": -2.666757106781006, |
| "logps/chosen": -266.09454345703125, |
| "logps/rejected": -267.7814636230469, |
| "loss": 0.1013, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.0470559298992157, |
| "rewards/margins": 6.690218925476074, |
| "rewards/rejected": -6.73727560043335, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.65, |
| "eval_logits/chosen": -2.526689291000366, |
| "eval_logits/rejected": -2.492635726928711, |
| "eval_logps/chosen": -269.9497985839844, |
| "eval_logps/rejected": -296.7825012207031, |
| "eval_loss": 0.5574991703033447, |
| "eval_rewards/accuracies": 0.875, |
| "eval_rewards/chosen": -1.5714715719223022, |
| "eval_rewards/margins": 2.4097867012023926, |
| "eval_rewards/rejected": -3.9812583923339844, |
| "eval_runtime": 57.7657, |
| "eval_samples_per_second": 17.311, |
| "eval_steps_per_second": 0.277, |
| "step": 3200 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.4870912220309807e-07, |
| "logits/chosen": -2.625276565551758, |
| "logits/rejected": -2.73038649559021, |
| "logps/chosen": -289.6630554199219, |
| "logps/rejected": -282.751953125, |
| "loss": 0.0784, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.39164501428604126, |
| "rewards/margins": 5.46439266204834, |
| "rewards/rejected": -5.072747707366943, |
| "step": 3210 |
| }, |
| { |
| "epoch": 1.66, |
| "learning_rate": 2.477529164276152e-07, |
| "logits/chosen": -2.7541117668151855, |
| "logits/rejected": -2.75673508644104, |
| "logps/chosen": -309.00799560546875, |
| "logps/rejected": -350.14556884765625, |
| "loss": 0.0784, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0380117893218994, |
| "rewards/margins": 7.454461574554443, |
| "rewards/rejected": -6.416450500488281, |
| "step": 3220 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.4679671065213235e-07, |
| "logits/chosen": -2.7203588485717773, |
| "logits/rejected": -2.704502582550049, |
| "logps/chosen": -347.71453857421875, |
| "logps/rejected": -295.966552734375, |
| "loss": 0.1137, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.29694637656211853, |
| "rewards/margins": 5.83956241607666, |
| "rewards/rejected": -5.5426154136657715, |
| "step": 3230 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 2.4584050487664947e-07, |
| "logits/chosen": -2.7662394046783447, |
| "logits/rejected": -2.7026658058166504, |
| "logps/chosen": -346.2272644042969, |
| "logps/rejected": -320.8843078613281, |
| "loss": 0.0958, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.34752047061920166, |
| "rewards/margins": 6.67000675201416, |
| "rewards/rejected": -6.32248592376709, |
| "step": 3240 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.448842991011666e-07, |
| "logits/chosen": -2.6715903282165527, |
| "logits/rejected": -2.603444814682007, |
| "logps/chosen": -275.488037109375, |
| "logps/rejected": -369.26861572265625, |
| "loss": 0.1227, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.22678561508655548, |
| "rewards/margins": 7.3243255615234375, |
| "rewards/rejected": -7.09753942489624, |
| "step": 3250 |
| }, |
| { |
| "epoch": 1.68, |
| "learning_rate": 2.439280933256837e-07, |
| "logits/chosen": -2.5521583557128906, |
| "logits/rejected": -2.575525999069214, |
| "logps/chosen": -234.0755157470703, |
| "logps/rejected": -253.0180206298828, |
| "loss": 0.0473, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.34158411622047424, |
| "rewards/margins": 7.31561803817749, |
| "rewards/rejected": -6.974034309387207, |
| "step": 3260 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.429718875502008e-07, |
| "logits/chosen": -2.246537923812866, |
| "logits/rejected": -2.325873613357544, |
| "logps/chosen": -278.77386474609375, |
| "logps/rejected": -295.7586975097656, |
| "loss": 0.0705, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.1356315165758133, |
| "rewards/margins": 5.905457496643066, |
| "rewards/rejected": -5.7698259353637695, |
| "step": 3270 |
| }, |
| { |
| "epoch": 1.69, |
| "learning_rate": 2.420156817747179e-07, |
| "logits/chosen": -2.7891759872436523, |
| "logits/rejected": -2.7579565048217773, |
| "logps/chosen": -356.2643127441406, |
| "logps/rejected": -370.2890319824219, |
| "loss": 0.0637, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.439815878868103, |
| "rewards/margins": 8.942848205566406, |
| "rewards/rejected": -7.503033638000488, |
| "step": 3280 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.41059475999235e-07, |
| "logits/chosen": -2.726214647293091, |
| "logits/rejected": -2.5874438285827637, |
| "logps/chosen": -226.1343231201172, |
| "logps/rejected": -244.17489624023438, |
| "loss": 0.0722, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.5386222004890442, |
| "rewards/margins": 6.369637489318848, |
| "rewards/rejected": -6.908260345458984, |
| "step": 3290 |
| }, |
| { |
| "epoch": 1.7, |
| "learning_rate": 2.4010327022375216e-07, |
| "logits/chosen": -2.660001516342163, |
| "logits/rejected": -2.6236727237701416, |
| "logps/chosen": -350.3585510253906, |
| "logps/rejected": -423.14605712890625, |
| "loss": 0.1254, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3113314211368561, |
| "rewards/margins": 8.888386726379395, |
| "rewards/rejected": -8.577055931091309, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.7, |
| "eval_logits/chosen": -2.533999443054199, |
| "eval_logits/rejected": -2.5016584396362305, |
| "eval_logps/chosen": -270.6473083496094, |
| "eval_logps/rejected": -301.6729736328125, |
| "eval_loss": 0.5904735326766968, |
| "eval_rewards/accuracies": 0.859375, |
| "eval_rewards/chosen": -1.6412229537963867, |
| "eval_rewards/margins": 2.8290822505950928, |
| "eval_rewards/rejected": -4.4703049659729, |
| "eval_runtime": 56.7796, |
| "eval_samples_per_second": 17.612, |
| "eval_steps_per_second": 0.282, |
| "step": 3300 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.391470644482693e-07, |
| "logits/chosen": -2.5988898277282715, |
| "logits/rejected": -2.633589267730713, |
| "logps/chosen": -299.37860107421875, |
| "logps/rejected": -375.39788818359375, |
| "loss": 0.0757, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.23076781630516052, |
| "rewards/margins": 7.872524261474609, |
| "rewards/rejected": -7.641757011413574, |
| "step": 3310 |
| }, |
| { |
| "epoch": 1.71, |
| "learning_rate": 2.3819085867278636e-07, |
| "logits/chosen": -2.6122288703918457, |
| "logits/rejected": -2.6097447872161865, |
| "logps/chosen": -184.86968994140625, |
| "logps/rejected": -299.4801330566406, |
| "loss": 0.0826, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.3567231297492981, |
| "rewards/margins": 8.226961135864258, |
| "rewards/rejected": -8.583684921264648, |
| "step": 3320 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.3723465289730348e-07, |
| "logits/chosen": -2.629166603088379, |
| "logits/rejected": -2.598412036895752, |
| "logps/chosen": -287.3348693847656, |
| "logps/rejected": -273.7483825683594, |
| "loss": 0.1158, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.2611514925956726, |
| "rewards/margins": 5.725651264190674, |
| "rewards/rejected": -5.986802577972412, |
| "step": 3330 |
| }, |
| { |
| "epoch": 1.72, |
| "learning_rate": 2.362784471218206e-07, |
| "logits/chosen": -2.73244571685791, |
| "logits/rejected": -2.7296879291534424, |
| "logps/chosen": -355.73236083984375, |
| "logps/rejected": -323.4547424316406, |
| "loss": 0.0704, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 1.1553064584732056, |
| "rewards/margins": 6.715522766113281, |
| "rewards/rejected": -5.560215950012207, |
| "step": 3340 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.353222413463377e-07, |
| "logits/chosen": -2.7055535316467285, |
| "logits/rejected": -2.659834146499634, |
| "logps/chosen": -234.71792602539062, |
| "logps/rejected": -258.9352722167969, |
| "loss": 0.0594, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.04846489429473877, |
| "rewards/margins": 5.886017799377441, |
| "rewards/rejected": -5.934482574462891, |
| "step": 3350 |
| }, |
| { |
| "epoch": 1.73, |
| "learning_rate": 2.3436603557085483e-07, |
| "logits/chosen": -2.7515339851379395, |
| "logits/rejected": -2.647671937942505, |
| "logps/chosen": -205.7976837158203, |
| "logps/rejected": -278.0999450683594, |
| "loss": 0.0886, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.5387641787528992, |
| "rewards/margins": 6.382545471191406, |
| "rewards/rejected": -5.8437819480896, |
| "step": 3360 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.3340982979537197e-07, |
| "logits/chosen": -2.727328300476074, |
| "logits/rejected": -2.7680537700653076, |
| "logps/chosen": -395.30169677734375, |
| "logps/rejected": -329.90234375, |
| "loss": 0.0734, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9938201904296875, |
| "rewards/margins": 6.250397682189941, |
| "rewards/rejected": -5.256577491760254, |
| "step": 3370 |
| }, |
| { |
| "epoch": 1.74, |
| "learning_rate": 2.3245362401988909e-07, |
| "logits/chosen": -2.7096972465515137, |
| "logits/rejected": -2.605597734451294, |
| "logps/chosen": -308.7266845703125, |
| "logps/rejected": -269.8343811035156, |
| "loss": 0.0906, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 1.0761408805847168, |
| "rewards/margins": 7.6199774742126465, |
| "rewards/rejected": -6.543837547302246, |
| "step": 3380 |
| }, |
| { |
| "epoch": 1.75, |
| "learning_rate": 2.314974182444062e-07, |
| "logits/chosen": -2.7419021129608154, |
| "logits/rejected": -2.797194004058838, |
| "logps/chosen": -308.60302734375, |
| "logps/rejected": -293.39581298828125, |
| "loss": 0.1425, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.2203086614608765, |
| "rewards/margins": 5.09138822555542, |
| "rewards/rejected": -6.311697483062744, |
| "step": 3390 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.305412124689233e-07, |
| "logits/chosen": -2.521355628967285, |
| "logits/rejected": -2.5652852058410645, |
| "logps/chosen": -271.5760192871094, |
| "logps/rejected": -332.80743408203125, |
| "loss": 0.085, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12037495523691177, |
| "rewards/margins": 6.457161903381348, |
| "rewards/rejected": -6.33678674697876, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.76, |
| "eval_logits/chosen": -2.495957612991333, |
| "eval_logits/rejected": -2.461439609527588, |
| "eval_logps/chosen": -273.39410400390625, |
| "eval_logps/rejected": -303.7296142578125, |
| "eval_loss": 0.6132888793945312, |
| "eval_rewards/accuracies": 0.84375, |
| "eval_rewards/chosen": -1.9158999919891357, |
| "eval_rewards/margins": 2.760065793991089, |
| "eval_rewards/rejected": -4.675965309143066, |
| "eval_runtime": 57.5942, |
| "eval_samples_per_second": 17.363, |
| "eval_steps_per_second": 0.278, |
| "step": 3400 |
| }, |
| { |
| "epoch": 1.76, |
| "learning_rate": 2.295850066934404e-07, |
| "logits/chosen": -2.6166062355041504, |
| "logits/rejected": -2.540011167526245, |
| "logps/chosen": -279.5812683105469, |
| "logps/rejected": -327.165283203125, |
| "loss": 0.0789, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.3240896463394165, |
| "rewards/margins": 6.653228759765625, |
| "rewards/rejected": -7.977317810058594, |
| "step": 3410 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.2862880091795752e-07, |
| "logits/chosen": -2.5269017219543457, |
| "logits/rejected": -2.516174077987671, |
| "logps/chosen": -194.35435485839844, |
| "logps/rejected": -307.7319030761719, |
| "loss": 0.0863, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.5239468812942505, |
| "rewards/margins": 5.74463415145874, |
| "rewards/rejected": -6.268580436706543, |
| "step": 3420 |
| }, |
| { |
| "epoch": 1.77, |
| "learning_rate": 2.2767259514247464e-07, |
| "logits/chosen": -2.3292429447174072, |
| "logits/rejected": -2.2449238300323486, |
| "logps/chosen": -235.97329711914062, |
| "logps/rejected": -297.90130615234375, |
| "loss": 0.0867, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7022100687026978, |
| "rewards/margins": 6.978515625, |
| "rewards/rejected": -6.27630615234375, |
| "step": 3430 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.2671638936699178e-07, |
| "logits/chosen": -2.413973093032837, |
| "logits/rejected": -2.389719247817993, |
| "logps/chosen": -315.0927734375, |
| "logps/rejected": -248.7796630859375, |
| "loss": 0.0776, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.12068784236907959, |
| "rewards/margins": 5.998594760894775, |
| "rewards/rejected": -5.877906799316406, |
| "step": 3440 |
| }, |
| { |
| "epoch": 1.78, |
| "learning_rate": 2.257601835915089e-07, |
| "logits/chosen": -2.582331895828247, |
| "logits/rejected": -2.504185199737549, |
| "logps/chosen": -333.48358154296875, |
| "logps/rejected": -384.9881286621094, |
| "loss": 0.0597, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.09340760856866837, |
| "rewards/margins": 7.274853706359863, |
| "rewards/rejected": -7.181446075439453, |
| "step": 3450 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.24803977816026e-07, |
| "logits/chosen": -2.3808603286743164, |
| "logits/rejected": -2.5695574283599854, |
| "logps/chosen": -218.94461059570312, |
| "logps/rejected": -255.54013061523438, |
| "loss": 0.1264, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.38311663269996643, |
| "rewards/margins": 8.536532402038574, |
| "rewards/rejected": -8.153416633605957, |
| "step": 3460 |
| }, |
| { |
| "epoch": 1.79, |
| "learning_rate": 2.2384777204054313e-07, |
| "logits/chosen": -2.7166881561279297, |
| "logits/rejected": -2.651099920272827, |
| "logps/chosen": -266.19390869140625, |
| "logps/rejected": -290.5924377441406, |
| "loss": 0.0907, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5935767292976379, |
| "rewards/margins": 7.180167198181152, |
| "rewards/rejected": -6.586589813232422, |
| "step": 3470 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2.2289156626506022e-07, |
| "logits/chosen": -2.6081440448760986, |
| "logits/rejected": -2.435035228729248, |
| "logps/chosen": -307.9920959472656, |
| "logps/rejected": -380.0340270996094, |
| "loss": 0.081, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.4312973916530609, |
| "rewards/margins": 8.036073684692383, |
| "rewards/rejected": -7.604775428771973, |
| "step": 3480 |
| }, |
| { |
| "epoch": 1.8, |
| "learning_rate": 2.2193536048957733e-07, |
| "logits/chosen": -2.6976230144500732, |
| "logits/rejected": -2.664168119430542, |
| "logps/chosen": -258.38092041015625, |
| "logps/rejected": -316.06903076171875, |
| "loss": 0.1032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.064023457467556, |
| "rewards/margins": 6.762887477874756, |
| "rewards/rejected": -6.698863983154297, |
| "step": 3490 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2.2097915471409445e-07, |
| "logits/chosen": -2.7331182956695557, |
| "logits/rejected": -2.7065072059631348, |
| "logps/chosen": -242.06661987304688, |
| "logps/rejected": -319.9682312011719, |
| "loss": 0.065, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.17091651260852814, |
| "rewards/margins": 7.219882011413574, |
| "rewards/rejected": -7.0489654541015625, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.81, |
| "eval_logits/chosen": -2.5004467964172363, |
| "eval_logits/rejected": -2.4597153663635254, |
| "eval_logps/chosen": -272.472412109375, |
| "eval_logps/rejected": -300.49505615234375, |
| "eval_loss": 0.607377290725708, |
| "eval_rewards/accuracies": 0.859375, |
| "eval_rewards/chosen": -1.8237330913543701, |
| "eval_rewards/margins": 2.528778553009033, |
| "eval_rewards/rejected": -4.352511405944824, |
| "eval_runtime": 58.0784, |
| "eval_samples_per_second": 17.218, |
| "eval_steps_per_second": 0.275, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 2.200229489386116e-07, |
| "logits/chosen": -2.7414891719818115, |
| "logits/rejected": -2.6085870265960693, |
| "logps/chosen": -311.56866455078125, |
| "logps/rejected": -367.4567565917969, |
| "loss": 0.0892, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 0.9936217069625854, |
| "rewards/margins": 6.746335029602051, |
| "rewards/rejected": -5.752713680267334, |
| "step": 3510 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 2.190667431631287e-07, |
| "logits/chosen": -2.767604112625122, |
| "logits/rejected": -2.6218278408050537, |
| "logps/chosen": -354.67822265625, |
| "logps/rejected": -397.64068603515625, |
| "loss": 0.1602, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 1.2055106163024902, |
| "rewards/margins": 9.150163650512695, |
| "rewards/rejected": -7.944652557373047, |
| "step": 3520 |
| }, |
| { |
| "epoch": 1.82, |
| "learning_rate": 2.1811053738764582e-07, |
| "logits/chosen": -2.4568405151367188, |
| "logits/rejected": -2.4575486183166504, |
| "logps/chosen": -235.7547149658203, |
| "logps/rejected": -265.8733215332031, |
| "loss": 0.0686, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.24134965240955353, |
| "rewards/margins": 4.935102939605713, |
| "rewards/rejected": -5.17645263671875, |
| "step": 3530 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 2.1715433161216294e-07, |
| "logits/chosen": -2.7147650718688965, |
| "logits/rejected": -2.6502747535705566, |
| "logps/chosen": -291.44219970703125, |
| "logps/rejected": -360.6312255859375, |
| "loss": 0.0713, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.0583919286727905, |
| "rewards/margins": 7.564295768737793, |
| "rewards/rejected": -6.5059027671813965, |
| "step": 3540 |
| }, |
| { |
| "epoch": 1.83, |
| "learning_rate": 2.1619812583668005e-07, |
| "logits/chosen": -2.5987043380737305, |
| "logits/rejected": -2.607950448989868, |
| "logps/chosen": -299.592529296875, |
| "logps/rejected": -337.1802978515625, |
| "loss": 0.0877, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.13366484642028809, |
| "rewards/margins": 6.168055534362793, |
| "rewards/rejected": -6.034390926361084, |
| "step": 3550 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 2.1524192006119714e-07, |
| "logits/chosen": -2.549741744995117, |
| "logits/rejected": -2.519808292388916, |
| "logps/chosen": -290.70684814453125, |
| "logps/rejected": -333.82489013671875, |
| "loss": 0.0653, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6356315612792969, |
| "rewards/margins": 6.749837398529053, |
| "rewards/rejected": -7.38546895980835, |
| "step": 3560 |
| }, |
| { |
| "epoch": 1.84, |
| "learning_rate": 2.1428571428571426e-07, |
| "logits/chosen": -2.5867228507995605, |
| "logits/rejected": -2.5592923164367676, |
| "logps/chosen": -248.76699829101562, |
| "logps/rejected": -269.9541015625, |
| "loss": 0.0585, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.503594696521759, |
| "rewards/margins": 5.626603126525879, |
| "rewards/rejected": -6.130197525024414, |
| "step": 3570 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 2.133295085102314e-07, |
| "logits/chosen": -2.4805967807769775, |
| "logits/rejected": -2.5831592082977295, |
| "logps/chosen": -288.694580078125, |
| "logps/rejected": -289.60638427734375, |
| "loss": 0.0924, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.20494358241558075, |
| "rewards/margins": 6.796807289123535, |
| "rewards/rejected": -7.0017499923706055, |
| "step": 3580 |
| }, |
| { |
| "epoch": 1.85, |
| "learning_rate": 2.1237330273474851e-07, |
| "logits/chosen": -2.6966331005096436, |
| "logits/rejected": -2.650146245956421, |
| "logps/chosen": -374.99774169921875, |
| "logps/rejected": -346.72711181640625, |
| "loss": 0.1106, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8571535348892212, |
| "rewards/margins": 7.465939521789551, |
| "rewards/rejected": -6.608786106109619, |
| "step": 3590 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 2.1141709695926563e-07, |
| "logits/chosen": -2.73488450050354, |
| "logits/rejected": -2.7135844230651855, |
| "logps/chosen": -230.2847137451172, |
| "logps/rejected": -190.71505737304688, |
| "loss": 0.0755, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.4617141783237457, |
| "rewards/margins": 5.763091087341309, |
| "rewards/rejected": -6.224804878234863, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.86, |
| "eval_logits/chosen": -2.4716105461120605, |
| "eval_logits/rejected": -2.4327313899993896, |
| "eval_logps/chosen": -273.4872131347656, |
| "eval_logps/rejected": -300.97479248046875, |
| "eval_loss": 0.5835925340652466, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -1.9252128601074219, |
| "eval_rewards/margins": 2.4752719402313232, |
| "eval_rewards/rejected": -4.400485038757324, |
| "eval_runtime": 60.2598, |
| "eval_samples_per_second": 16.595, |
| "eval_steps_per_second": 0.266, |
| "step": 3600 |
| }, |
| { |
| "epoch": 1.86, |
| "learning_rate": 2.1046089118378275e-07, |
| "logits/chosen": -2.7524561882019043, |
| "logits/rejected": -2.7061877250671387, |
| "logps/chosen": -265.36962890625, |
| "logps/rejected": -293.2806396484375, |
| "loss": 0.1317, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8292659521102905, |
| "rewards/margins": 7.344795227050781, |
| "rewards/rejected": -6.515528678894043, |
| "step": 3610 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 2.0950468540829986e-07, |
| "logits/chosen": -2.6626524925231934, |
| "logits/rejected": -2.640347719192505, |
| "logps/chosen": -207.5610809326172, |
| "logps/rejected": -238.7421417236328, |
| "loss": 0.097, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3047277331352234, |
| "rewards/margins": 6.0444464683532715, |
| "rewards/rejected": -6.349174499511719, |
| "step": 3620 |
| }, |
| { |
| "epoch": 1.87, |
| "learning_rate": 2.0854847963281698e-07, |
| "logits/chosen": -2.450810194015503, |
| "logits/rejected": -2.3714869022369385, |
| "logps/chosen": -290.0536804199219, |
| "logps/rejected": -285.4010009765625, |
| "loss": 0.076, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.6087032556533813, |
| "rewards/margins": 5.092909336090088, |
| "rewards/rejected": -5.70161247253418, |
| "step": 3630 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 2.0759227385733407e-07, |
| "logits/chosen": -2.564415216445923, |
| "logits/rejected": -2.6595184803009033, |
| "logps/chosen": -372.54949951171875, |
| "logps/rejected": -315.68438720703125, |
| "loss": 0.1015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.47370368242263794, |
| "rewards/margins": 6.991959571838379, |
| "rewards/rejected": -7.465662479400635, |
| "step": 3640 |
| }, |
| { |
| "epoch": 1.88, |
| "learning_rate": 2.066360680818512e-07, |
| "logits/chosen": -2.651179552078247, |
| "logits/rejected": -2.6251769065856934, |
| "logps/chosen": -375.2741394042969, |
| "logps/rejected": -317.2344055175781, |
| "loss": 0.1108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.11416218429803848, |
| "rewards/margins": 7.2585554122924805, |
| "rewards/rejected": -7.144394874572754, |
| "step": 3650 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 2.0567986230636832e-07, |
| "logits/chosen": -2.5170671939849854, |
| "logits/rejected": -2.639958620071411, |
| "logps/chosen": -219.71676635742188, |
| "logps/rejected": -264.04632568359375, |
| "loss": 0.0927, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3074165880680084, |
| "rewards/margins": 5.667797565460205, |
| "rewards/rejected": -5.975214004516602, |
| "step": 3660 |
| }, |
| { |
| "epoch": 1.89, |
| "learning_rate": 2.0472365653088544e-07, |
| "logits/chosen": -2.646237850189209, |
| "logits/rejected": -2.712930679321289, |
| "logps/chosen": -297.159423828125, |
| "logps/rejected": -336.8759765625, |
| "loss": 0.0811, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.833897590637207, |
| "rewards/margins": 6.053628444671631, |
| "rewards/rejected": -6.887526035308838, |
| "step": 3670 |
| }, |
| { |
| "epoch": 1.9, |
| "learning_rate": 2.0376745075540256e-07, |
| "logits/chosen": -2.8176498413085938, |
| "logits/rejected": -2.798159122467041, |
| "logps/chosen": -279.7525634765625, |
| "logps/rejected": -284.43316650390625, |
| "loss": 0.0621, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.006855732295662165, |
| "rewards/margins": 6.67547082901001, |
| "rewards/rejected": -6.682325839996338, |
| "step": 3680 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.0281124497991967e-07, |
| "logits/chosen": -2.7028536796569824, |
| "logits/rejected": -2.6612937450408936, |
| "logps/chosen": -252.33505249023438, |
| "logps/rejected": -403.2816467285156, |
| "loss": 0.0998, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.05786427855491638, |
| "rewards/margins": 7.661648750305176, |
| "rewards/rejected": -7.603785037994385, |
| "step": 3690 |
| }, |
| { |
| "epoch": 1.91, |
| "learning_rate": 2.018550392044368e-07, |
| "logits/chosen": -2.7595086097717285, |
| "logits/rejected": -2.681696653366089, |
| "logps/chosen": -295.5634460449219, |
| "logps/rejected": -494.0884704589844, |
| "loss": 0.0746, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.988511860370636, |
| "rewards/margins": 10.416128158569336, |
| "rewards/rejected": -9.427616119384766, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.91, |
| "eval_logits/chosen": -2.5114712715148926, |
| "eval_logits/rejected": -2.468604564666748, |
| "eval_logps/chosen": -273.5149230957031, |
| "eval_logps/rejected": -301.87615966796875, |
| "eval_loss": 0.5788707137107849, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -1.9279824495315552, |
| "eval_rewards/margins": 2.562638521194458, |
| "eval_rewards/rejected": -4.4906206130981445, |
| "eval_runtime": 56.2772, |
| "eval_samples_per_second": 17.769, |
| "eval_steps_per_second": 0.284, |
| "step": 3700 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 2.0089883342895388e-07, |
| "logits/chosen": -2.7059268951416016, |
| "logits/rejected": -2.753756523132324, |
| "logps/chosen": -202.04066467285156, |
| "logps/rejected": -245.59237670898438, |
| "loss": 0.075, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1704142987728119, |
| "rewards/margins": 6.434650421142578, |
| "rewards/rejected": -6.605063438415527, |
| "step": 3710 |
| }, |
| { |
| "epoch": 1.92, |
| "learning_rate": 1.9994262765347102e-07, |
| "logits/chosen": -2.642674207687378, |
| "logits/rejected": -2.5932514667510986, |
| "logps/chosen": -399.32305908203125, |
| "logps/rejected": -326.49798583984375, |
| "loss": 0.134, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.060870956629514694, |
| "rewards/margins": 6.860370635986328, |
| "rewards/rejected": -6.921241760253906, |
| "step": 3720 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.9898642187798813e-07, |
| "logits/chosen": -2.6123080253601074, |
| "logits/rejected": -2.7516627311706543, |
| "logps/chosen": -410.9776306152344, |
| "logps/rejected": -326.8647155761719, |
| "loss": 0.0628, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.07760889828205109, |
| "rewards/margins": 6.036097049713135, |
| "rewards/rejected": -6.113706111907959, |
| "step": 3730 |
| }, |
| { |
| "epoch": 1.93, |
| "learning_rate": 1.9803021610250525e-07, |
| "logits/chosen": -2.641099452972412, |
| "logits/rejected": -2.711040735244751, |
| "logps/chosen": -216.26535034179688, |
| "logps/rejected": -274.23516845703125, |
| "loss": 0.0742, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5522519946098328, |
| "rewards/margins": 6.619080543518066, |
| "rewards/rejected": -6.06682825088501, |
| "step": 3740 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.9707401032702237e-07, |
| "logits/chosen": -2.6930148601531982, |
| "logits/rejected": -2.691132068634033, |
| "logps/chosen": -269.2910461425781, |
| "logps/rejected": -311.1435241699219, |
| "loss": 0.0593, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.5175756216049194, |
| "rewards/margins": 6.387923240661621, |
| "rewards/rejected": -5.870347023010254, |
| "step": 3750 |
| }, |
| { |
| "epoch": 1.94, |
| "learning_rate": 1.9611780455153948e-07, |
| "logits/chosen": -2.7549490928649902, |
| "logits/rejected": -2.7406229972839355, |
| "logps/chosen": -292.29833984375, |
| "logps/rejected": -254.7724609375, |
| "loss": 0.1138, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20989219844341278, |
| "rewards/margins": 5.599099159240723, |
| "rewards/rejected": -5.389206886291504, |
| "step": 3760 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.951615987760566e-07, |
| "logits/chosen": -2.5066380500793457, |
| "logits/rejected": -2.4894328117370605, |
| "logps/chosen": -221.491455078125, |
| "logps/rejected": -262.5354309082031, |
| "loss": 0.072, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.48722711205482483, |
| "rewards/margins": 4.931153297424316, |
| "rewards/rejected": -5.418381214141846, |
| "step": 3770 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 1.942053930005737e-07, |
| "logits/chosen": -2.8080992698669434, |
| "logits/rejected": -2.69472074508667, |
| "logps/chosen": -234.15390014648438, |
| "logps/rejected": -300.17291259765625, |
| "loss": 0.0526, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7450860142707825, |
| "rewards/margins": 7.135354518890381, |
| "rewards/rejected": -7.880439758300781, |
| "step": 3780 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.9324918722509086e-07, |
| "logits/chosen": -2.763511896133423, |
| "logits/rejected": -2.758317708969116, |
| "logps/chosen": -267.06695556640625, |
| "logps/rejected": -251.7860107421875, |
| "loss": 0.0843, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5050710439682007, |
| "rewards/margins": 6.807704925537109, |
| "rewards/rejected": -7.3127760887146, |
| "step": 3790 |
| }, |
| { |
| "epoch": 1.96, |
| "learning_rate": 1.9229298144960794e-07, |
| "logits/chosen": -2.6397032737731934, |
| "logits/rejected": -2.6277005672454834, |
| "logps/chosen": -230.0516357421875, |
| "logps/rejected": -253.93594360351562, |
| "loss": 0.1348, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.08455387502908707, |
| "rewards/margins": 5.982255458831787, |
| "rewards/rejected": -6.06680965423584, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.96, |
| "eval_logits/chosen": -2.5393259525299072, |
| "eval_logits/rejected": -2.494310140609741, |
| "eval_logps/chosen": -272.8935546875, |
| "eval_logps/rejected": -299.39764404296875, |
| "eval_loss": 0.6015481352806091, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -1.865846872329712, |
| "eval_rewards/margins": 2.376923084259033, |
| "eval_rewards/rejected": -4.242770195007324, |
| "eval_runtime": 57.6051, |
| "eval_samples_per_second": 17.36, |
| "eval_steps_per_second": 0.278, |
| "step": 3800 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.9133677567412506e-07, |
| "logits/chosen": -2.790476083755493, |
| "logits/rejected": -2.786289691925049, |
| "logps/chosen": -319.78619384765625, |
| "logps/rejected": -282.0672607421875, |
| "loss": 0.0818, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.11253446340560913, |
| "rewards/margins": 6.30682897567749, |
| "rewards/rejected": -6.419363498687744, |
| "step": 3810 |
| }, |
| { |
| "epoch": 1.97, |
| "learning_rate": 1.9038056989864218e-07, |
| "logits/chosen": -2.7111282348632812, |
| "logits/rejected": -2.765439033508301, |
| "logps/chosen": -256.03546142578125, |
| "logps/rejected": -314.55523681640625, |
| "loss": 0.1137, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.10970799624919891, |
| "rewards/margins": 6.709108829498291, |
| "rewards/rejected": -6.818817138671875, |
| "step": 3820 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.894243641231593e-07, |
| "logits/chosen": -2.5691027641296387, |
| "logits/rejected": -2.4961977005004883, |
| "logps/chosen": -272.5830993652344, |
| "logps/rejected": -250.82357788085938, |
| "loss": 0.103, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4049804210662842, |
| "rewards/margins": 5.012188911437988, |
| "rewards/rejected": -6.417168617248535, |
| "step": 3830 |
| }, |
| { |
| "epoch": 1.98, |
| "learning_rate": 1.884681583476764e-07, |
| "logits/chosen": -2.8165037631988525, |
| "logits/rejected": -2.76141357421875, |
| "logps/chosen": -229.1115264892578, |
| "logps/rejected": -300.12347412109375, |
| "loss": 0.0621, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.30909544229507446, |
| "rewards/margins": 5.961316108703613, |
| "rewards/rejected": -6.270411968231201, |
| "step": 3840 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.8751195257219352e-07, |
| "logits/chosen": -2.683171272277832, |
| "logits/rejected": -2.74794602394104, |
| "logps/chosen": -281.92901611328125, |
| "logps/rejected": -404.372314453125, |
| "loss": 0.0803, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3100479245185852, |
| "rewards/margins": 7.182066440582275, |
| "rewards/rejected": -7.492114067077637, |
| "step": 3850 |
| }, |
| { |
| "epoch": 1.99, |
| "learning_rate": 1.8655574679671067e-07, |
| "logits/chosen": -2.660099506378174, |
| "logits/rejected": -2.69828724861145, |
| "logps/chosen": -241.91787719726562, |
| "logps/rejected": -317.3074951171875, |
| "loss": 0.0951, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.469612717628479, |
| "rewards/margins": 7.08514404296875, |
| "rewards/rejected": -8.554756164550781, |
| "step": 3860 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.8559954102122778e-07, |
| "logits/chosen": -2.550110340118408, |
| "logits/rejected": -2.5635857582092285, |
| "logps/chosen": -280.5724792480469, |
| "logps/rejected": -239.25119018554688, |
| "loss": 0.1036, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.0125322341918945, |
| "rewards/margins": 4.381348609924316, |
| "rewards/rejected": -5.393881320953369, |
| "step": 3870 |
| }, |
| { |
| "epoch": 2.0, |
| "learning_rate": 1.8464333524574487e-07, |
| "logits/chosen": -2.7105534076690674, |
| "logits/rejected": -2.670560598373413, |
| "logps/chosen": -213.32907104492188, |
| "logps/rejected": -330.0856628417969, |
| "loss": 0.0344, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.48191890120506287, |
| "rewards/margins": 7.262728691101074, |
| "rewards/rejected": -7.744647026062012, |
| "step": 3880 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.8368712947026199e-07, |
| "logits/chosen": -2.8019955158233643, |
| "logits/rejected": -2.7659356594085693, |
| "logps/chosen": -305.8590393066406, |
| "logps/rejected": -306.02325439453125, |
| "loss": 0.022, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.42963480949401855, |
| "rewards/margins": 6.8882341384887695, |
| "rewards/rejected": -6.458600044250488, |
| "step": 3890 |
| }, |
| { |
| "epoch": 2.01, |
| "learning_rate": 1.827309236947791e-07, |
| "logits/chosen": -2.6406970024108887, |
| "logits/rejected": -2.6530818939208984, |
| "logps/chosen": -155.24813842773438, |
| "logps/rejected": -301.69390869140625, |
| "loss": 0.0217, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.34457913041114807, |
| "rewards/margins": 6.758476257324219, |
| "rewards/rejected": -7.103055000305176, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.01, |
| "eval_logits/chosen": -2.5271873474121094, |
| "eval_logits/rejected": -2.4840664863586426, |
| "eval_logps/chosen": -277.5699157714844, |
| "eval_logps/rejected": -306.1987609863281, |
| "eval_loss": 0.612151563167572, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -2.3334813117980957, |
| "eval_rewards/margins": 2.589404582977295, |
| "eval_rewards/rejected": -4.922885894775391, |
| "eval_runtime": 54.5082, |
| "eval_samples_per_second": 18.346, |
| "eval_steps_per_second": 0.294, |
| "step": 3900 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.8177471791929622e-07, |
| "logits/chosen": -2.416943073272705, |
| "logits/rejected": -2.4777729511260986, |
| "logps/chosen": -234.59054565429688, |
| "logps/rejected": -378.48101806640625, |
| "loss": 0.0294, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7428705096244812, |
| "rewards/margins": 10.251193046569824, |
| "rewards/rejected": -10.994062423706055, |
| "step": 3910 |
| }, |
| { |
| "epoch": 2.02, |
| "learning_rate": 1.8081851214381333e-07, |
| "logits/chosen": -2.6043264865875244, |
| "logits/rejected": -2.5203278064727783, |
| "logps/chosen": -263.97882080078125, |
| "logps/rejected": -393.0724182128906, |
| "loss": 0.0121, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05669177696108818, |
| "rewards/margins": 8.67068099975586, |
| "rewards/rejected": -8.613988876342773, |
| "step": 3920 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.7986230636833047e-07, |
| "logits/chosen": -2.6340689659118652, |
| "logits/rejected": -2.6645379066467285, |
| "logps/chosen": -179.75973510742188, |
| "logps/rejected": -246.31448364257812, |
| "loss": 0.0189, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3356815576553345, |
| "rewards/margins": 6.356654167175293, |
| "rewards/rejected": -7.692336082458496, |
| "step": 3930 |
| }, |
| { |
| "epoch": 2.03, |
| "learning_rate": 1.789061005928476e-07, |
| "logits/chosen": -2.812453269958496, |
| "logits/rejected": -2.752922534942627, |
| "logps/chosen": -276.16876220703125, |
| "logps/rejected": -295.46429443359375, |
| "loss": 0.0279, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.023592447862029076, |
| "rewards/margins": 7.244173526763916, |
| "rewards/rejected": -7.2205810546875, |
| "step": 3940 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.7794989481736468e-07, |
| "logits/chosen": -2.6128292083740234, |
| "logits/rejected": -2.644348382949829, |
| "logps/chosen": -243.8308563232422, |
| "logps/rejected": -270.5189514160156, |
| "loss": 0.0299, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.30936262011528015, |
| "rewards/margins": 8.135075569152832, |
| "rewards/rejected": -8.444437026977539, |
| "step": 3950 |
| }, |
| { |
| "epoch": 2.04, |
| "learning_rate": 1.769936890418818e-07, |
| "logits/chosen": -2.5391926765441895, |
| "logits/rejected": -2.5172486305236816, |
| "logps/chosen": -303.0284729003906, |
| "logps/rejected": -303.38739013671875, |
| "loss": 0.0178, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04590020328760147, |
| "rewards/margins": 7.713925361633301, |
| "rewards/rejected": -7.668023586273193, |
| "step": 3960 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.760374832663989e-07, |
| "logits/chosen": -2.785437822341919, |
| "logits/rejected": -2.667668104171753, |
| "logps/chosen": -374.7364807128906, |
| "logps/rejected": -371.83050537109375, |
| "loss": 0.0097, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3061269521713257, |
| "rewards/margins": 10.878585815429688, |
| "rewards/rejected": -9.572458267211914, |
| "step": 3970 |
| }, |
| { |
| "epoch": 2.05, |
| "learning_rate": 1.7508127749091603e-07, |
| "logits/chosen": -2.6636507511138916, |
| "logits/rejected": -2.622056007385254, |
| "logps/chosen": -272.5489196777344, |
| "logps/rejected": -296.45025634765625, |
| "loss": 0.0159, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6961295008659363, |
| "rewards/margins": 7.3043532371521, |
| "rewards/rejected": -8.000483512878418, |
| "step": 3980 |
| }, |
| { |
| "epoch": 2.06, |
| "learning_rate": 1.7412507171543314e-07, |
| "logits/chosen": -2.6111302375793457, |
| "logits/rejected": -2.7141504287719727, |
| "logps/chosen": -306.14471435546875, |
| "logps/rejected": -277.4181213378906, |
| "loss": 0.0378, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2339712679386139, |
| "rewards/margins": 8.951885223388672, |
| "rewards/rejected": -9.185856819152832, |
| "step": 3990 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.7316886593995028e-07, |
| "logits/chosen": -2.770508289337158, |
| "logits/rejected": -2.7339038848876953, |
| "logps/chosen": -301.5724182128906, |
| "logps/rejected": -351.9184875488281, |
| "loss": 0.0219, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5096687078475952, |
| "rewards/margins": 8.371912956237793, |
| "rewards/rejected": -8.881582260131836, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.07, |
| "eval_logits/chosen": -2.454496383666992, |
| "eval_logits/rejected": -2.4104785919189453, |
| "eval_logps/chosen": -284.124755859375, |
| "eval_logps/rejected": -317.1334228515625, |
| "eval_loss": 0.6521932482719421, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -2.988966464996338, |
| "eval_rewards/margins": 3.0273852348327637, |
| "eval_rewards/rejected": -6.016351222991943, |
| "eval_runtime": 61.6079, |
| "eval_samples_per_second": 16.232, |
| "eval_steps_per_second": 0.26, |
| "step": 4000 |
| }, |
| { |
| "epoch": 2.07, |
| "learning_rate": 1.722126601644674e-07, |
| "logits/chosen": -2.7440953254699707, |
| "logits/rejected": -2.736643075942993, |
| "logps/chosen": -323.197998046875, |
| "logps/rejected": -265.2000732421875, |
| "loss": 0.0133, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9906817674636841, |
| "rewards/margins": 7.5294013023376465, |
| "rewards/rejected": -8.520084381103516, |
| "step": 4010 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.7125645438898452e-07, |
| "logits/chosen": -2.7173900604248047, |
| "logits/rejected": -2.676675319671631, |
| "logps/chosen": -314.3374938964844, |
| "logps/rejected": -355.52618408203125, |
| "loss": 0.0061, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3634423315525055, |
| "rewards/margins": 9.33686637878418, |
| "rewards/rejected": -9.700309753417969, |
| "step": 4020 |
| }, |
| { |
| "epoch": 2.08, |
| "learning_rate": 1.703002486135016e-07, |
| "logits/chosen": -2.6162686347961426, |
| "logits/rejected": -2.603562593460083, |
| "logps/chosen": -248.433837890625, |
| "logps/rejected": -305.6585388183594, |
| "loss": 0.0108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.43873992562294006, |
| "rewards/margins": 10.635756492614746, |
| "rewards/rejected": -11.07449722290039, |
| "step": 4030 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.6934404283801872e-07, |
| "logits/chosen": -2.52081036567688, |
| "logits/rejected": -2.3590970039367676, |
| "logps/chosen": -352.1839904785156, |
| "logps/rejected": -366.2679138183594, |
| "loss": 0.0083, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.43957453966140747, |
| "rewards/margins": 11.327077865600586, |
| "rewards/rejected": -10.887503623962402, |
| "step": 4040 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 1.6838783706253584e-07, |
| "logits/chosen": -2.640784502029419, |
| "logits/rejected": -2.524874687194824, |
| "logps/chosen": -167.76235961914062, |
| "logps/rejected": -211.56985473632812, |
| "loss": 0.0156, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.1560419499874115, |
| "rewards/margins": 8.205093383789062, |
| "rewards/rejected": -8.049051284790039, |
| "step": 4050 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.6743163128705295e-07, |
| "logits/chosen": -2.604750156402588, |
| "logits/rejected": -2.5685653686523438, |
| "logps/chosen": -289.0841064453125, |
| "logps/rejected": -324.72552490234375, |
| "loss": 0.0133, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2151424884796143, |
| "rewards/margins": 8.238618850708008, |
| "rewards/rejected": -10.453761100769043, |
| "step": 4060 |
| }, |
| { |
| "epoch": 2.1, |
| "learning_rate": 1.664754255115701e-07, |
| "logits/chosen": -2.4264097213745117, |
| "logits/rejected": -2.375046730041504, |
| "logps/chosen": -224.1468505859375, |
| "logps/rejected": -290.3971862792969, |
| "loss": 0.0178, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14240024983882904, |
| "rewards/margins": 9.841353416442871, |
| "rewards/rejected": -9.698953628540039, |
| "step": 4070 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.655192197360872e-07, |
| "logits/chosen": -2.753242254257202, |
| "logits/rejected": -2.6922965049743652, |
| "logps/chosen": -274.47601318359375, |
| "logps/rejected": -324.0868835449219, |
| "loss": 0.0062, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3948326110839844, |
| "rewards/margins": 8.828141212463379, |
| "rewards/rejected": -9.22297477722168, |
| "step": 4080 |
| }, |
| { |
| "epoch": 2.11, |
| "learning_rate": 1.6456301396060433e-07, |
| "logits/chosen": -2.554525375366211, |
| "logits/rejected": -2.6398258209228516, |
| "logps/chosen": -365.6826477050781, |
| "logps/rejected": -360.66107177734375, |
| "loss": 0.019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2733768820762634, |
| "rewards/margins": 8.528050422668457, |
| "rewards/rejected": -8.801426887512207, |
| "step": 4090 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.6360680818512144e-07, |
| "logits/chosen": -2.7123589515686035, |
| "logits/rejected": -2.61602783203125, |
| "logps/chosen": -368.64544677734375, |
| "logps/rejected": -432.6624450683594, |
| "loss": 0.0119, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.055361270904541, |
| "rewards/margins": 8.865598678588867, |
| "rewards/rejected": -9.920958518981934, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.12, |
| "eval_logits/chosen": -2.4698657989501953, |
| "eval_logits/rejected": -2.4272119998931885, |
| "eval_logps/chosen": -289.0121154785156, |
| "eval_logps/rejected": -323.7186584472656, |
| "eval_loss": 0.692164421081543, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -3.477701187133789, |
| "eval_rewards/margins": 3.197173595428467, |
| "eval_rewards/rejected": -6.674875259399414, |
| "eval_runtime": 57.1311, |
| "eval_samples_per_second": 17.504, |
| "eval_steps_per_second": 0.28, |
| "step": 4100 |
| }, |
| { |
| "epoch": 2.12, |
| "learning_rate": 1.6265060240963853e-07, |
| "logits/chosen": -2.6370177268981934, |
| "logits/rejected": -2.5220537185668945, |
| "logps/chosen": -334.99066162109375, |
| "logps/rejected": -290.169189453125, |
| "loss": 0.0078, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.933539867401123, |
| "rewards/margins": 8.522821426391602, |
| "rewards/rejected": -7.589282989501953, |
| "step": 4110 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.6169439663415565e-07, |
| "logits/chosen": -2.6449599266052246, |
| "logits/rejected": -2.6207022666931152, |
| "logps/chosen": -269.48529052734375, |
| "logps/rejected": -324.10418701171875, |
| "loss": 0.0086, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8957691192626953, |
| "rewards/margins": 9.796669960021973, |
| "rewards/rejected": -10.692439079284668, |
| "step": 4120 |
| }, |
| { |
| "epoch": 2.13, |
| "learning_rate": 1.6073819085867276e-07, |
| "logits/chosen": -2.797229290008545, |
| "logits/rejected": -2.7991158962249756, |
| "logps/chosen": -309.7330627441406, |
| "logps/rejected": -439.6482849121094, |
| "loss": 0.0158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3518083095550537, |
| "rewards/margins": 11.625936508178711, |
| "rewards/rejected": -10.274128913879395, |
| "step": 4130 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.597819850831899e-07, |
| "logits/chosen": -2.752419948577881, |
| "logits/rejected": -2.6186330318450928, |
| "logps/chosen": -208.90380859375, |
| "logps/rejected": -247.1297149658203, |
| "loss": 0.0054, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2973028421401978, |
| "rewards/margins": 9.918791770935059, |
| "rewards/rejected": -8.621490478515625, |
| "step": 4140 |
| }, |
| { |
| "epoch": 2.14, |
| "learning_rate": 1.5882577930770702e-07, |
| "logits/chosen": -2.5983939170837402, |
| "logits/rejected": -2.551213502883911, |
| "logps/chosen": -321.56195068359375, |
| "logps/rejected": -328.3628234863281, |
| "loss": 0.0088, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.38840678334236145, |
| "rewards/margins": 10.420036315917969, |
| "rewards/rejected": -10.03162956237793, |
| "step": 4150 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.5786957353222414e-07, |
| "logits/chosen": -2.5684826374053955, |
| "logits/rejected": -2.608212471008301, |
| "logps/chosen": -272.9964599609375, |
| "logps/rejected": -265.1176452636719, |
| "loss": 0.0104, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0111887454986572, |
| "rewards/margins": 7.318711280822754, |
| "rewards/rejected": -8.329900741577148, |
| "step": 4160 |
| }, |
| { |
| "epoch": 2.15, |
| "learning_rate": 1.5691336775674125e-07, |
| "logits/chosen": -2.4619576930999756, |
| "logits/rejected": -2.555619716644287, |
| "logps/chosen": -268.24859619140625, |
| "logps/rejected": -298.4876403808594, |
| "loss": 0.0098, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.17980532348155975, |
| "rewards/margins": 9.836331367492676, |
| "rewards/rejected": -10.01613712310791, |
| "step": 4170 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.5595716198125837e-07, |
| "logits/chosen": -2.7515110969543457, |
| "logits/rejected": -2.7178173065185547, |
| "logps/chosen": -400.4342346191406, |
| "logps/rejected": -458.4161682128906, |
| "loss": 0.0081, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9931119084358215, |
| "rewards/margins": 10.737370491027832, |
| "rewards/rejected": -11.73048210144043, |
| "step": 4180 |
| }, |
| { |
| "epoch": 2.16, |
| "learning_rate": 1.5500095620577546e-07, |
| "logits/chosen": -2.6951303482055664, |
| "logits/rejected": -2.748305559158325, |
| "logps/chosen": -240.6981201171875, |
| "logps/rejected": -257.77752685546875, |
| "loss": 0.0113, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5856528282165527, |
| "rewards/margins": 7.461671352386475, |
| "rewards/rejected": -9.047324180603027, |
| "step": 4190 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.5404475043029257e-07, |
| "logits/chosen": -2.5231451988220215, |
| "logits/rejected": -2.5645296573638916, |
| "logps/chosen": -191.24134826660156, |
| "logps/rejected": -325.54949951171875, |
| "loss": 0.0153, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5546247959136963, |
| "rewards/margins": 8.656599998474121, |
| "rewards/rejected": -10.211225509643555, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.17, |
| "eval_logits/chosen": -2.4464974403381348, |
| "eval_logits/rejected": -2.4046523571014404, |
| "eval_logps/chosen": -286.6412658691406, |
| "eval_logps/rejected": -323.7452697753906, |
| "eval_loss": 0.6993398666381836, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -3.2406165599823, |
| "eval_rewards/margins": 3.4369187355041504, |
| "eval_rewards/rejected": -6.677535533905029, |
| "eval_runtime": 54.7971, |
| "eval_samples_per_second": 18.249, |
| "eval_steps_per_second": 0.292, |
| "step": 4200 |
| }, |
| { |
| "epoch": 2.17, |
| "learning_rate": 1.5308854465480971e-07, |
| "logits/chosen": -2.6298282146453857, |
| "logits/rejected": -2.6375985145568848, |
| "logps/chosen": -293.63629150390625, |
| "logps/rejected": -297.9925537109375, |
| "loss": 0.0112, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0975180864334106, |
| "rewards/margins": 8.3226318359375, |
| "rewards/rejected": -9.420149803161621, |
| "step": 4210 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.5213233887932683e-07, |
| "logits/chosen": -2.5850603580474854, |
| "logits/rejected": -2.606503963470459, |
| "logps/chosen": -316.19854736328125, |
| "logps/rejected": -332.8941955566406, |
| "loss": 0.0088, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.9063823819160461, |
| "rewards/margins": 7.739400386810303, |
| "rewards/rejected": -8.645783424377441, |
| "step": 4220 |
| }, |
| { |
| "epoch": 2.18, |
| "learning_rate": 1.5117613310384395e-07, |
| "logits/chosen": -2.5701706409454346, |
| "logits/rejected": -2.5911612510681152, |
| "logps/chosen": -269.97894287109375, |
| "logps/rejected": -319.3363342285156, |
| "loss": 0.0079, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6825813055038452, |
| "rewards/margins": 10.70821762084961, |
| "rewards/rejected": -11.390798568725586, |
| "step": 4230 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.5021992732836106e-07, |
| "logits/chosen": -2.4653377532958984, |
| "logits/rejected": -2.5559732913970947, |
| "logps/chosen": -279.4239196777344, |
| "logps/rejected": -356.7681884765625, |
| "loss": 0.0143, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9881469011306763, |
| "rewards/margins": 9.201104164123535, |
| "rewards/rejected": -10.189250946044922, |
| "step": 4240 |
| }, |
| { |
| "epoch": 2.19, |
| "learning_rate": 1.4926372155287818e-07, |
| "logits/chosen": -2.7210116386413574, |
| "logits/rejected": -2.593418836593628, |
| "logps/chosen": -228.53121948242188, |
| "logps/rejected": -271.88787841796875, |
| "loss": 0.0185, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9970628619194031, |
| "rewards/margins": 8.71868896484375, |
| "rewards/rejected": -9.715751647949219, |
| "step": 4250 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.483075157773953e-07, |
| "logits/chosen": -2.5943050384521484, |
| "logits/rejected": -2.673746347427368, |
| "logps/chosen": -251.91336059570312, |
| "logps/rejected": -270.3241271972656, |
| "loss": 0.0095, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.251340627670288, |
| "rewards/margins": 8.234782218933105, |
| "rewards/rejected": -9.486123085021973, |
| "step": 4260 |
| }, |
| { |
| "epoch": 2.2, |
| "learning_rate": 1.4735131000191238e-07, |
| "logits/chosen": -2.6009936332702637, |
| "logits/rejected": -2.607675313949585, |
| "logps/chosen": -309.9886169433594, |
| "logps/rejected": -332.39801025390625, |
| "loss": 0.0121, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0659501552581787, |
| "rewards/margins": 9.141355514526367, |
| "rewards/rejected": -10.207304954528809, |
| "step": 4270 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.4639510422642952e-07, |
| "logits/chosen": -2.7442212104797363, |
| "logits/rejected": -2.6310532093048096, |
| "logps/chosen": -345.13616943359375, |
| "logps/rejected": -405.30755615234375, |
| "loss": 0.01, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7909820079803467, |
| "rewards/margins": 9.60711669921875, |
| "rewards/rejected": -10.398099899291992, |
| "step": 4280 |
| }, |
| { |
| "epoch": 2.21, |
| "learning_rate": 1.4543889845094664e-07, |
| "logits/chosen": -2.4257078170776367, |
| "logits/rejected": -2.461683750152588, |
| "logps/chosen": -375.21478271484375, |
| "logps/rejected": -433.16973876953125, |
| "loss": 0.0485, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8098801374435425, |
| "rewards/margins": 11.403145790100098, |
| "rewards/rejected": -12.21302604675293, |
| "step": 4290 |
| }, |
| { |
| "epoch": 2.22, |
| "learning_rate": 1.4448269267546376e-07, |
| "logits/chosen": -2.7228384017944336, |
| "logits/rejected": -2.763788938522339, |
| "logps/chosen": -330.9010314941406, |
| "logps/rejected": -367.5445861816406, |
| "loss": 0.011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.8657931089401245, |
| "rewards/margins": 10.422277450561523, |
| "rewards/rejected": -9.55648422241211, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.22, |
| "eval_logits/chosen": -2.428981304168701, |
| "eval_logits/rejected": -2.3842594623565674, |
| "eval_logps/chosen": -292.2260437011719, |
| "eval_logps/rejected": -331.3666687011719, |
| "eval_loss": 0.7177846431732178, |
| "eval_rewards/accuracies": 0.765625, |
| "eval_rewards/chosen": -3.7990951538085938, |
| "eval_rewards/margins": 3.6405770778656006, |
| "eval_rewards/rejected": -7.439671993255615, |
| "eval_runtime": 57.5668, |
| "eval_samples_per_second": 17.371, |
| "eval_steps_per_second": 0.278, |
| "step": 4300 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.4352648689998087e-07, |
| "logits/chosen": -2.6788887977600098, |
| "logits/rejected": -2.659087657928467, |
| "logps/chosen": -255.2762908935547, |
| "logps/rejected": -230.3298797607422, |
| "loss": 0.0081, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5762121081352234, |
| "rewards/margins": 9.638313293457031, |
| "rewards/rejected": -10.21452522277832, |
| "step": 4310 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 1.42570281124498e-07, |
| "logits/chosen": -2.5874216556549072, |
| "logits/rejected": -2.647291898727417, |
| "logps/chosen": -264.53802490234375, |
| "logps/rejected": -368.0313415527344, |
| "loss": 0.0123, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9759609699249268, |
| "rewards/margins": 9.195481300354004, |
| "rewards/rejected": -11.171442031860352, |
| "step": 4320 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.416140753490151e-07, |
| "logits/chosen": -2.6484475135803223, |
| "logits/rejected": -2.7253453731536865, |
| "logps/chosen": -338.7431640625, |
| "logps/rejected": -423.6756896972656, |
| "loss": 0.0099, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1497945338487625, |
| "rewards/margins": 11.79082202911377, |
| "rewards/rejected": -11.940614700317383, |
| "step": 4330 |
| }, |
| { |
| "epoch": 2.24, |
| "learning_rate": 1.4065786957353222e-07, |
| "logits/chosen": -2.5038111209869385, |
| "logits/rejected": -2.5019071102142334, |
| "logps/chosen": -315.7591552734375, |
| "logps/rejected": -336.18963623046875, |
| "loss": 0.0246, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.016095232218503952, |
| "rewards/margins": 8.441411018371582, |
| "rewards/rejected": -8.425315856933594, |
| "step": 4340 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.3970166379804933e-07, |
| "logits/chosen": -2.49928879737854, |
| "logits/rejected": -2.376461982727051, |
| "logps/chosen": -380.0243225097656, |
| "logps/rejected": -395.73077392578125, |
| "loss": 0.0177, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1554236114025116, |
| "rewards/margins": 9.892860412597656, |
| "rewards/rejected": -10.048284530639648, |
| "step": 4350 |
| }, |
| { |
| "epoch": 2.25, |
| "learning_rate": 1.3874545802256645e-07, |
| "logits/chosen": -2.758044481277466, |
| "logits/rejected": -2.6601271629333496, |
| "logps/chosen": -311.03436279296875, |
| "logps/rejected": -419.60418701171875, |
| "loss": 0.0129, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6286399364471436, |
| "rewards/margins": 10.120224952697754, |
| "rewards/rejected": -11.748865127563477, |
| "step": 4360 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.3778925224708357e-07, |
| "logits/chosen": -2.516096830368042, |
| "logits/rejected": -2.5368704795837402, |
| "logps/chosen": -253.93722534179688, |
| "logps/rejected": -275.40423583984375, |
| "loss": 0.006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.05337781831622124, |
| "rewards/margins": 8.323257446289062, |
| "rewards/rejected": -8.376635551452637, |
| "step": 4370 |
| }, |
| { |
| "epoch": 2.26, |
| "learning_rate": 1.3683304647160068e-07, |
| "logits/chosen": -2.6350722312927246, |
| "logits/rejected": -2.5284571647644043, |
| "logps/chosen": -279.087158203125, |
| "logps/rejected": -357.74542236328125, |
| "loss": 0.0119, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0737884044647217, |
| "rewards/margins": 10.794805526733398, |
| "rewards/rejected": -11.868593215942383, |
| "step": 4380 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.358768406961178e-07, |
| "logits/chosen": -2.6859638690948486, |
| "logits/rejected": -2.735161066055298, |
| "logps/chosen": -295.9905700683594, |
| "logps/rejected": -444.8924255371094, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7544025778770447, |
| "rewards/margins": 10.69865608215332, |
| "rewards/rejected": -9.944252967834473, |
| "step": 4390 |
| }, |
| { |
| "epoch": 2.27, |
| "learning_rate": 1.349206349206349e-07, |
| "logits/chosen": -2.757059335708618, |
| "logits/rejected": -2.771275520324707, |
| "logps/chosen": -266.63800048828125, |
| "logps/rejected": -281.4782409667969, |
| "loss": 0.0072, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0992039442062378, |
| "rewards/margins": 8.593305587768555, |
| "rewards/rejected": -9.692508697509766, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.27, |
| "eval_logits/chosen": -2.4535796642303467, |
| "eval_logits/rejected": -2.4095299243927, |
| "eval_logps/chosen": -287.504150390625, |
| "eval_logps/rejected": -324.9907531738281, |
| "eval_loss": 0.6839932203292847, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -3.326904535293579, |
| "eval_rewards/margins": 3.47517728805542, |
| "eval_rewards/rejected": -6.802082061767578, |
| "eval_runtime": 58.0489, |
| "eval_samples_per_second": 17.227, |
| "eval_steps_per_second": 0.276, |
| "step": 4400 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.3396442914515203e-07, |
| "logits/chosen": -2.330714464187622, |
| "logits/rejected": -2.469642400741577, |
| "logps/chosen": -260.82843017578125, |
| "logps/rejected": -299.21343994140625, |
| "loss": 0.0115, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.7311785221099854, |
| "rewards/margins": 8.269608497619629, |
| "rewards/rejected": -10.000787734985352, |
| "step": 4410 |
| }, |
| { |
| "epoch": 2.28, |
| "learning_rate": 1.3300822336966917e-07, |
| "logits/chosen": -2.5285234451293945, |
| "logits/rejected": -2.3487613201141357, |
| "logps/chosen": -334.66229248046875, |
| "logps/rejected": -329.3540954589844, |
| "loss": 0.0094, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.163953959941864, |
| "rewards/margins": 9.816844940185547, |
| "rewards/rejected": -9.652891159057617, |
| "step": 4420 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.3205201759418626e-07, |
| "logits/chosen": -2.400176525115967, |
| "logits/rejected": -2.173835277557373, |
| "logps/chosen": -355.26043701171875, |
| "logps/rejected": -349.78851318359375, |
| "loss": 0.0099, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.45656710863113403, |
| "rewards/margins": 10.023509979248047, |
| "rewards/rejected": -9.56694221496582, |
| "step": 4430 |
| }, |
| { |
| "epoch": 2.29, |
| "learning_rate": 1.3109581181870338e-07, |
| "logits/chosen": -2.611816883087158, |
| "logits/rejected": -2.6642374992370605, |
| "logps/chosen": -294.43756103515625, |
| "logps/rejected": -321.86846923828125, |
| "loss": 0.0105, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08868559449911118, |
| "rewards/margins": 9.15350341796875, |
| "rewards/rejected": -9.064818382263184, |
| "step": 4440 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.301396060432205e-07, |
| "logits/chosen": -2.6899092197418213, |
| "logits/rejected": -2.6209728717803955, |
| "logps/chosen": -340.12030029296875, |
| "logps/rejected": -341.85638427734375, |
| "loss": 0.0079, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5528199672698975, |
| "rewards/margins": 10.257894515991211, |
| "rewards/rejected": -10.810712814331055, |
| "step": 4450 |
| }, |
| { |
| "epoch": 2.3, |
| "learning_rate": 1.291834002677376e-07, |
| "logits/chosen": -2.5746819972991943, |
| "logits/rejected": -2.4712207317352295, |
| "logps/chosen": -340.21661376953125, |
| "logps/rejected": -348.29376220703125, |
| "loss": 0.0111, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.839795470237732, |
| "rewards/margins": 9.294793128967285, |
| "rewards/rejected": -11.134590148925781, |
| "step": 4460 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.2822719449225472e-07, |
| "logits/chosen": -2.4497411251068115, |
| "logits/rejected": -2.6023406982421875, |
| "logps/chosen": -258.5740966796875, |
| "logps/rejected": -322.1835021972656, |
| "loss": 0.0067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4901916980743408, |
| "rewards/margins": 8.80390739440918, |
| "rewards/rejected": -10.294098854064941, |
| "step": 4470 |
| }, |
| { |
| "epoch": 2.31, |
| "learning_rate": 1.2727098871677184e-07, |
| "logits/chosen": -2.679898262023926, |
| "logits/rejected": -2.6797006130218506, |
| "logps/chosen": -358.4029235839844, |
| "logps/rejected": -349.04119873046875, |
| "loss": 0.004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.31357401609420776, |
| "rewards/margins": 9.342456817626953, |
| "rewards/rejected": -9.656030654907227, |
| "step": 4480 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.2631478294128898e-07, |
| "logits/chosen": -2.6513264179229736, |
| "logits/rejected": -2.6451632976531982, |
| "logps/chosen": -398.11871337890625, |
| "logps/rejected": -359.9664611816406, |
| "loss": 0.0102, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07513128221035004, |
| "rewards/margins": 10.039201736450195, |
| "rewards/rejected": -9.964070320129395, |
| "step": 4490 |
| }, |
| { |
| "epoch": 2.32, |
| "learning_rate": 1.253585771658061e-07, |
| "logits/chosen": -2.587759017944336, |
| "logits/rejected": -2.633078098297119, |
| "logps/chosen": -251.6234588623047, |
| "logps/rejected": -377.10443115234375, |
| "loss": 0.0197, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1120997667312622, |
| "rewards/margins": 9.92921257019043, |
| "rewards/rejected": -11.041314125061035, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.32, |
| "eval_logits/chosen": -2.4542932510375977, |
| "eval_logits/rejected": -2.411810874938965, |
| "eval_logps/chosen": -291.1250305175781, |
| "eval_logps/rejected": -329.98406982421875, |
| "eval_loss": 0.7013015151023865, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -3.688992738723755, |
| "eval_rewards/margins": 3.612422466278076, |
| "eval_rewards/rejected": -7.301414966583252, |
| "eval_runtime": 56.7399, |
| "eval_samples_per_second": 17.624, |
| "eval_steps_per_second": 0.282, |
| "step": 4500 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.2440237139032319e-07, |
| "logits/chosen": -2.7155184745788574, |
| "logits/rejected": -2.7012360095977783, |
| "logps/chosen": -270.6969909667969, |
| "logps/rejected": -277.15362548828125, |
| "loss": 0.0102, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8242677450180054, |
| "rewards/margins": 10.182249069213867, |
| "rewards/rejected": -12.006516456604004, |
| "step": 4510 |
| }, |
| { |
| "epoch": 2.33, |
| "learning_rate": 1.234461656148403e-07, |
| "logits/chosen": -2.7778592109680176, |
| "logits/rejected": -2.6845195293426514, |
| "logps/chosen": -406.66497802734375, |
| "logps/rejected": -398.89044189453125, |
| "loss": 0.0065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5758999586105347, |
| "rewards/margins": 11.570829391479492, |
| "rewards/rejected": -13.146730422973633, |
| "step": 4520 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.2248995983935742e-07, |
| "logits/chosen": -2.7265734672546387, |
| "logits/rejected": -2.6226305961608887, |
| "logps/chosen": -284.33843994140625, |
| "logps/rejected": -360.93121337890625, |
| "loss": 0.0053, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6043723821640015, |
| "rewards/margins": 10.222024917602539, |
| "rewards/rejected": -10.826397895812988, |
| "step": 4530 |
| }, |
| { |
| "epoch": 2.34, |
| "learning_rate": 1.2153375406387456e-07, |
| "logits/chosen": -2.724083185195923, |
| "logits/rejected": -2.75142765045166, |
| "logps/chosen": -355.35504150390625, |
| "logps/rejected": -472.2686462402344, |
| "loss": 0.008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3634461164474487, |
| "rewards/margins": 11.717732429504395, |
| "rewards/rejected": -13.081178665161133, |
| "step": 4540 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.2057754828839165e-07, |
| "logits/chosen": -2.666905641555786, |
| "logits/rejected": -2.737536907196045, |
| "logps/chosen": -310.6121520996094, |
| "logps/rejected": -350.0155029296875, |
| "loss": 0.0089, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12724515795707703, |
| "rewards/margins": 9.271829605102539, |
| "rewards/rejected": -9.39907455444336, |
| "step": 4550 |
| }, |
| { |
| "epoch": 2.35, |
| "learning_rate": 1.1962134251290876e-07, |
| "logits/chosen": -2.6463513374328613, |
| "logits/rejected": -2.6242516040802, |
| "logps/chosen": -268.8026123046875, |
| "logps/rejected": -253.2088623046875, |
| "loss": 0.0123, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7786948680877686, |
| "rewards/margins": 8.877888679504395, |
| "rewards/rejected": -10.656583786010742, |
| "step": 4560 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.1866513673742588e-07, |
| "logits/chosen": -2.6526236534118652, |
| "logits/rejected": -2.5933640003204346, |
| "logps/chosen": -244.880615234375, |
| "logps/rejected": -330.068603515625, |
| "loss": 0.0185, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4325166940689087, |
| "rewards/margins": 8.305582046508789, |
| "rewards/rejected": -9.73809814453125, |
| "step": 4570 |
| }, |
| { |
| "epoch": 2.36, |
| "learning_rate": 1.1770893096194301e-07, |
| "logits/chosen": -2.6464786529541016, |
| "logits/rejected": -2.621084451675415, |
| "logps/chosen": -358.1322326660156, |
| "logps/rejected": -398.2645568847656, |
| "loss": 0.0102, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9040740728378296, |
| "rewards/margins": 10.22703742980957, |
| "rewards/rejected": -12.131113052368164, |
| "step": 4580 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.1675272518646012e-07, |
| "logits/chosen": -2.704784631729126, |
| "logits/rejected": -2.6682817935943604, |
| "logps/chosen": -297.62274169921875, |
| "logps/rejected": -330.6324462890625, |
| "loss": 0.0272, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.19843515753746033, |
| "rewards/margins": 9.420347213745117, |
| "rewards/rejected": -9.618782997131348, |
| "step": 4590 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 1.1579651941097724e-07, |
| "logits/chosen": -2.6055915355682373, |
| "logits/rejected": -2.6153995990753174, |
| "logps/chosen": -305.62933349609375, |
| "logps/rejected": -291.2359924316406, |
| "loss": 0.0182, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.7859185934066772, |
| "rewards/margins": 8.813383102416992, |
| "rewards/rejected": -10.599302291870117, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.37, |
| "eval_logits/chosen": -2.4565374851226807, |
| "eval_logits/rejected": -2.416307210922241, |
| "eval_logps/chosen": -293.22906494140625, |
| "eval_logps/rejected": -332.3355712890625, |
| "eval_loss": 0.7476168870925903, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -3.8993959426879883, |
| "eval_rewards/margins": 3.637169361114502, |
| "eval_rewards/rejected": -7.536564826965332, |
| "eval_runtime": 57.2122, |
| "eval_samples_per_second": 17.479, |
| "eval_steps_per_second": 0.28, |
| "step": 4600 |
| }, |
| { |
| "epoch": 2.38, |
| "learning_rate": 1.1484031363549436e-07, |
| "logits/chosen": -2.5126757621765137, |
| "logits/rejected": -2.449023962020874, |
| "logps/chosen": -327.66717529296875, |
| "logps/rejected": -361.0265808105469, |
| "loss": 0.0268, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -1.0840609073638916, |
| "rewards/margins": 10.741630554199219, |
| "rewards/rejected": -11.825691223144531, |
| "step": 4610 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.1388410786001147e-07, |
| "logits/chosen": -2.6590983867645264, |
| "logits/rejected": -2.688147783279419, |
| "logps/chosen": -304.8904113769531, |
| "logps/rejected": -383.8213195800781, |
| "loss": 0.0162, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.8201377391815186, |
| "rewards/margins": 9.453073501586914, |
| "rewards/rejected": -11.273211479187012, |
| "step": 4620 |
| }, |
| { |
| "epoch": 2.39, |
| "learning_rate": 1.1292790208452859e-07, |
| "logits/chosen": -2.6834404468536377, |
| "logits/rejected": -2.6824703216552734, |
| "logps/chosen": -271.0035400390625, |
| "logps/rejected": -379.20989990234375, |
| "loss": 0.0132, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0902073383331299, |
| "rewards/margins": 11.127284049987793, |
| "rewards/rejected": -12.21749210357666, |
| "step": 4630 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.119716963090457e-07, |
| "logits/chosen": -2.3537399768829346, |
| "logits/rejected": -2.4233551025390625, |
| "logps/chosen": -216.2086944580078, |
| "logps/rejected": -297.00640869140625, |
| "loss": 0.0146, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2917330265045166, |
| "rewards/margins": 7.949918270111084, |
| "rewards/rejected": -10.24165153503418, |
| "step": 4640 |
| }, |
| { |
| "epoch": 2.4, |
| "learning_rate": 1.1101549053356282e-07, |
| "logits/chosen": -2.7646780014038086, |
| "logits/rejected": -2.6880381107330322, |
| "logps/chosen": -306.4629821777344, |
| "logps/rejected": -336.5583190917969, |
| "loss": 0.0227, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0529628992080688, |
| "rewards/margins": 8.59121036529541, |
| "rewards/rejected": -9.644172668457031, |
| "step": 4650 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.1005928475807993e-07, |
| "logits/chosen": -2.550281286239624, |
| "logits/rejected": -2.499551296234131, |
| "logps/chosen": -284.04730224609375, |
| "logps/rejected": -312.99896240234375, |
| "loss": 0.0107, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5507326126098633, |
| "rewards/margins": 9.462206840515137, |
| "rewards/rejected": -11.012939453125, |
| "step": 4660 |
| }, |
| { |
| "epoch": 2.41, |
| "learning_rate": 1.0910307898259705e-07, |
| "logits/chosen": -2.3352179527282715, |
| "logits/rejected": -2.438673973083496, |
| "logps/chosen": -236.6370849609375, |
| "logps/rejected": -284.3169250488281, |
| "loss": 0.0135, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.611262559890747, |
| "rewards/margins": 8.59797477722168, |
| "rewards/rejected": -10.209238052368164, |
| "step": 4670 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.0814687320711418e-07, |
| "logits/chosen": -2.474139928817749, |
| "logits/rejected": -2.377544641494751, |
| "logps/chosen": -238.1358184814453, |
| "logps/rejected": -414.88720703125, |
| "loss": 0.0087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9006067514419556, |
| "rewards/margins": 9.972890853881836, |
| "rewards/rejected": -10.873498916625977, |
| "step": 4680 |
| }, |
| { |
| "epoch": 2.42, |
| "learning_rate": 1.0719066743163128e-07, |
| "logits/chosen": -2.493590831756592, |
| "logits/rejected": -2.6044669151306152, |
| "logps/chosen": -323.1622009277344, |
| "logps/rejected": -323.79510498046875, |
| "loss": 0.0178, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.587964415550232, |
| "rewards/margins": 9.504448890686035, |
| "rewards/rejected": -11.092413902282715, |
| "step": 4690 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.062344616561484e-07, |
| "logits/chosen": -2.638388156890869, |
| "logits/rejected": -2.634883403778076, |
| "logps/chosen": -368.1080627441406, |
| "logps/rejected": -507.3169860839844, |
| "loss": 0.0125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11677348613739014, |
| "rewards/margins": 13.523852348327637, |
| "rewards/rejected": -13.640626907348633, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.43, |
| "eval_logits/chosen": -2.4100139141082764, |
| "eval_logits/rejected": -2.369899272918701, |
| "eval_logps/chosen": -294.79522705078125, |
| "eval_logps/rejected": -332.7344665527344, |
| "eval_loss": 0.7198817133903503, |
| "eval_rewards/accuracies": 0.84375, |
| "eval_rewards/chosen": -4.056015968322754, |
| "eval_rewards/margins": 3.5204358100891113, |
| "eval_rewards/rejected": -7.576451778411865, |
| "eval_runtime": 55.0706, |
| "eval_samples_per_second": 18.158, |
| "eval_steps_per_second": 0.291, |
| "step": 4700 |
| }, |
| { |
| "epoch": 2.43, |
| "learning_rate": 1.0527825588066551e-07, |
| "logits/chosen": -2.615658760070801, |
| "logits/rejected": -2.48193097114563, |
| "logps/chosen": -350.5819091796875, |
| "logps/rejected": -332.139892578125, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4044158458709717, |
| "rewards/margins": 10.358014106750488, |
| "rewards/rejected": -11.762430191040039, |
| "step": 4710 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.0432205010518264e-07, |
| "logits/chosen": -2.6633851528167725, |
| "logits/rejected": -2.6755900382995605, |
| "logps/chosen": -244.67703247070312, |
| "logps/rejected": -381.3924865722656, |
| "loss": 0.0063, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5906885266304016, |
| "rewards/margins": 11.672990798950195, |
| "rewards/rejected": -12.263678550720215, |
| "step": 4720 |
| }, |
| { |
| "epoch": 2.44, |
| "learning_rate": 1.0336584432969974e-07, |
| "logits/chosen": -2.4058127403259277, |
| "logits/rejected": -2.398548126220703, |
| "logps/chosen": -268.20660400390625, |
| "logps/rejected": -309.49078369140625, |
| "loss": 0.0141, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": 0.11153459548950195, |
| "rewards/margins": 9.980080604553223, |
| "rewards/rejected": -9.868546485900879, |
| "step": 4730 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.0240963855421686e-07, |
| "logits/chosen": -2.542297601699829, |
| "logits/rejected": -2.5843400955200195, |
| "logps/chosen": -385.5765686035156, |
| "logps/rejected": -344.6966552734375, |
| "loss": 0.0071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7057178020477295, |
| "rewards/margins": 9.097609519958496, |
| "rewards/rejected": -10.803327560424805, |
| "step": 4740 |
| }, |
| { |
| "epoch": 2.45, |
| "learning_rate": 1.0145343277873399e-07, |
| "logits/chosen": -2.505624771118164, |
| "logits/rejected": -2.4930660724639893, |
| "logps/chosen": -330.05987548828125, |
| "logps/rejected": -383.5957336425781, |
| "loss": 0.0139, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8719791173934937, |
| "rewards/margins": 10.555585861206055, |
| "rewards/rejected": -11.427566528320312, |
| "step": 4750 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 1.004972270032511e-07, |
| "logits/chosen": -2.2423624992370605, |
| "logits/rejected": -2.250560760498047, |
| "logps/chosen": -287.89349365234375, |
| "logps/rejected": -298.4164123535156, |
| "loss": 0.0167, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.064730167388916, |
| "rewards/margins": 7.943607330322266, |
| "rewards/rejected": -9.00833797454834, |
| "step": 4760 |
| }, |
| { |
| "epoch": 2.46, |
| "learning_rate": 9.95410212277682e-08, |
| "logits/chosen": -2.6729438304901123, |
| "logits/rejected": -2.5839288234710693, |
| "logps/chosen": -304.6081237792969, |
| "logps/rejected": -281.4034423828125, |
| "loss": 0.0149, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.613656759262085, |
| "rewards/margins": 8.870094299316406, |
| "rewards/rejected": -9.483750343322754, |
| "step": 4770 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 9.858481545228532e-08, |
| "logits/chosen": -2.431548833847046, |
| "logits/rejected": -2.5211846828460693, |
| "logps/chosen": -185.5460205078125, |
| "logps/rejected": -266.6904602050781, |
| "loss": 0.0164, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.3175272941589355, |
| "rewards/margins": 8.721592903137207, |
| "rewards/rejected": -10.039118766784668, |
| "step": 4780 |
| }, |
| { |
| "epoch": 2.47, |
| "learning_rate": 9.762860967680245e-08, |
| "logits/chosen": -2.5315418243408203, |
| "logits/rejected": -2.6745972633361816, |
| "logps/chosen": -215.60311889648438, |
| "logps/rejected": -252.6163330078125, |
| "loss": 0.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5650713443756104, |
| "rewards/margins": 8.301115036010742, |
| "rewards/rejected": -9.866186141967773, |
| "step": 4790 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 9.667240390131957e-08, |
| "logits/chosen": -2.6866960525512695, |
| "logits/rejected": -2.6582419872283936, |
| "logps/chosen": -263.9376220703125, |
| "logps/rejected": -329.9527587890625, |
| "loss": 0.0082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7252375483512878, |
| "rewards/margins": 12.293913841247559, |
| "rewards/rejected": -13.01915168762207, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.48, |
| "eval_logits/chosen": -2.430349826812744, |
| "eval_logits/rejected": -2.3925321102142334, |
| "eval_logps/chosen": -290.84771728515625, |
| "eval_logps/rejected": -328.32550048828125, |
| "eval_loss": 0.7047879695892334, |
| "eval_rewards/accuracies": 0.875, |
| "eval_rewards/chosen": -3.6612637042999268, |
| "eval_rewards/margins": 3.474294662475586, |
| "eval_rewards/rejected": -7.135558605194092, |
| "eval_runtime": 56.5008, |
| "eval_samples_per_second": 17.699, |
| "eval_steps_per_second": 0.283, |
| "step": 4800 |
| }, |
| { |
| "epoch": 2.48, |
| "learning_rate": 9.571619812583667e-08, |
| "logits/chosen": -2.3522887229919434, |
| "logits/rejected": -2.5020272731781006, |
| "logps/chosen": -404.24993896484375, |
| "logps/rejected": -365.1546936035156, |
| "loss": 0.0103, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5152179598808289, |
| "rewards/margins": 10.364774703979492, |
| "rewards/rejected": -10.879993438720703, |
| "step": 4810 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 9.47599923503538e-08, |
| "logits/chosen": -2.634892225265503, |
| "logits/rejected": -2.660521984100342, |
| "logps/chosen": -330.85308837890625, |
| "logps/rejected": -385.0195617675781, |
| "loss": 0.0189, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.17577147483825684, |
| "rewards/margins": 10.900343894958496, |
| "rewards/rejected": -11.076115608215332, |
| "step": 4820 |
| }, |
| { |
| "epoch": 2.49, |
| "learning_rate": 9.380378657487091e-08, |
| "logits/chosen": -2.514988422393799, |
| "logits/rejected": -2.510554790496826, |
| "logps/chosen": -250.59939575195312, |
| "logps/rejected": -327.1246643066406, |
| "loss": 0.0152, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6027127504348755, |
| "rewards/margins": 11.022318840026855, |
| "rewards/rejected": -11.625032424926758, |
| "step": 4830 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.284758079938803e-08, |
| "logits/chosen": -2.7160019874572754, |
| "logits/rejected": -2.725782632827759, |
| "logps/chosen": -366.26788330078125, |
| "logps/rejected": -313.48223876953125, |
| "loss": 0.0077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3688530921936035, |
| "rewards/margins": 8.293670654296875, |
| "rewards/rejected": -9.66252326965332, |
| "step": 4840 |
| }, |
| { |
| "epoch": 2.5, |
| "learning_rate": 9.189137502390513e-08, |
| "logits/chosen": -2.5986154079437256, |
| "logits/rejected": -2.60760760307312, |
| "logps/chosen": -338.04925537109375, |
| "logps/rejected": -425.7908630371094, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2331068515777588, |
| "rewards/margins": 9.989707946777344, |
| "rewards/rejected": -11.222814559936523, |
| "step": 4850 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 9.093516924842226e-08, |
| "logits/chosen": -2.5680298805236816, |
| "logits/rejected": -2.603311061859131, |
| "logps/chosen": -270.52349853515625, |
| "logps/rejected": -418.3185119628906, |
| "loss": 0.0155, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3546164631843567, |
| "rewards/margins": 11.326202392578125, |
| "rewards/rejected": -11.680818557739258, |
| "step": 4860 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 8.997896347293938e-08, |
| "logits/chosen": -2.529101610183716, |
| "logits/rejected": -2.4874515533447266, |
| "logps/chosen": -205.5690460205078, |
| "logps/rejected": -333.98065185546875, |
| "loss": 0.0151, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0936403274536133, |
| "rewards/margins": 9.565814018249512, |
| "rewards/rejected": -10.659454345703125, |
| "step": 4870 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.902275769745648e-08, |
| "logits/chosen": -2.4107840061187744, |
| "logits/rejected": -2.529804229736328, |
| "logps/chosen": -229.46145629882812, |
| "logps/rejected": -267.4582214355469, |
| "loss": 0.0123, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -1.5835365056991577, |
| "rewards/margins": 7.870436668395996, |
| "rewards/rejected": -9.453973770141602, |
| "step": 4880 |
| }, |
| { |
| "epoch": 2.52, |
| "learning_rate": 8.806655192197361e-08, |
| "logits/chosen": -2.4289088249206543, |
| "logits/rejected": -2.549330949783325, |
| "logps/chosen": -171.3069610595703, |
| "logps/rejected": -321.93853759765625, |
| "loss": 0.0065, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.041638135910034, |
| "rewards/margins": 10.020352363586426, |
| "rewards/rejected": -12.061990737915039, |
| "step": 4890 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 8.711034614649072e-08, |
| "logits/chosen": -2.7017006874084473, |
| "logits/rejected": -2.7009201049804688, |
| "logps/chosen": -279.64984130859375, |
| "logps/rejected": -352.21160888671875, |
| "loss": 0.0118, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1117719411849976, |
| "rewards/margins": 10.062509536743164, |
| "rewards/rejected": -11.17428207397461, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.53, |
| "eval_logits/chosen": -2.4046812057495117, |
| "eval_logits/rejected": -2.36327862739563, |
| "eval_logps/chosen": -292.14312744140625, |
| "eval_logps/rejected": -330.12237548828125, |
| "eval_loss": 0.6975539326667786, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -3.7908036708831787, |
| "eval_rewards/margins": 3.524440288543701, |
| "eval_rewards/rejected": -7.315243721008301, |
| "eval_runtime": 53.2942, |
| "eval_samples_per_second": 18.764, |
| "eval_steps_per_second": 0.3, |
| "step": 4900 |
| }, |
| { |
| "epoch": 2.53, |
| "learning_rate": 8.615414037100784e-08, |
| "logits/chosen": -2.5984580516815186, |
| "logits/rejected": -2.746319532394409, |
| "logps/chosen": -321.95367431640625, |
| "logps/rejected": -298.1436767578125, |
| "loss": 0.0084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.020658016204834, |
| "rewards/margins": 8.369918823242188, |
| "rewards/rejected": -10.390576362609863, |
| "step": 4910 |
| }, |
| { |
| "epoch": 2.54, |
| "learning_rate": 8.519793459552494e-08, |
| "logits/chosen": -2.359086513519287, |
| "logits/rejected": -2.3888332843780518, |
| "logps/chosen": -395.9248962402344, |
| "logps/rejected": -374.02069091796875, |
| "loss": 0.014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.04106631129980087, |
| "rewards/margins": 11.742452621459961, |
| "rewards/rejected": -11.783517837524414, |
| "step": 4920 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 8.424172882004207e-08, |
| "logits/chosen": -2.661177158355713, |
| "logits/rejected": -2.6514670848846436, |
| "logps/chosen": -387.62054443359375, |
| "logps/rejected": -339.0218505859375, |
| "loss": 0.0263, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0007514476892538369, |
| "rewards/margins": 9.993762016296387, |
| "rewards/rejected": -9.993009567260742, |
| "step": 4930 |
| }, |
| { |
| "epoch": 2.55, |
| "learning_rate": 8.328552304455919e-08, |
| "logits/chosen": -2.335365056991577, |
| "logits/rejected": -2.317937135696411, |
| "logps/chosen": -231.7373504638672, |
| "logps/rejected": -296.01287841796875, |
| "loss": 0.0162, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7403205633163452, |
| "rewards/margins": 9.786886215209961, |
| "rewards/rejected": -10.527207374572754, |
| "step": 4940 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 8.23293172690763e-08, |
| "logits/chosen": -2.5740818977355957, |
| "logits/rejected": -2.612046718597412, |
| "logps/chosen": -265.88116455078125, |
| "logps/rejected": -311.5575256347656, |
| "loss": 0.0043, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1604253053665161, |
| "rewards/margins": 10.83531665802002, |
| "rewards/rejected": -11.995742797851562, |
| "step": 4950 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 8.137311149359343e-08, |
| "logits/chosen": -2.7012178897857666, |
| "logits/rejected": -2.6206467151641846, |
| "logps/chosen": -434.08843994140625, |
| "logps/rejected": -364.0971984863281, |
| "loss": 0.0085, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.09630658477544785, |
| "rewards/margins": 11.247058868408203, |
| "rewards/rejected": -11.343365669250488, |
| "step": 4960 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 8.041690571811053e-08, |
| "logits/chosen": -2.614105463027954, |
| "logits/rejected": -2.5202865600585938, |
| "logps/chosen": -219.88876342773438, |
| "logps/rejected": -269.26568603515625, |
| "loss": 0.0173, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.203838348388672, |
| "rewards/margins": 8.655978202819824, |
| "rewards/rejected": -10.859817504882812, |
| "step": 4970 |
| }, |
| { |
| "epoch": 2.57, |
| "learning_rate": 7.946069994262765e-08, |
| "logits/chosen": -2.546452045440674, |
| "logits/rejected": -2.6220192909240723, |
| "logps/chosen": -294.5769958496094, |
| "logps/rejected": -284.33343505859375, |
| "loss": 0.009, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0741980075836182, |
| "rewards/margins": 9.485953330993652, |
| "rewards/rejected": -10.560152053833008, |
| "step": 4980 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.850449416714476e-08, |
| "logits/chosen": -2.7545557022094727, |
| "logits/rejected": -2.676429033279419, |
| "logps/chosen": -480.96600341796875, |
| "logps/rejected": -401.0008850097656, |
| "loss": 0.0081, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3216051459312439, |
| "rewards/margins": 9.37825870513916, |
| "rewards/rejected": -9.699864387512207, |
| "step": 4990 |
| }, |
| { |
| "epoch": 2.58, |
| "learning_rate": 7.754828839166188e-08, |
| "logits/chosen": -2.48799467086792, |
| "logits/rejected": -2.4741270542144775, |
| "logps/chosen": -251.6031036376953, |
| "logps/rejected": -321.9014587402344, |
| "loss": 0.0118, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3492779731750488, |
| "rewards/margins": 8.896702766418457, |
| "rewards/rejected": -10.245981216430664, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.58, |
| "eval_logits/chosen": -2.4194068908691406, |
| "eval_logits/rejected": -2.3763530254364014, |
| "eval_logps/chosen": -293.284423828125, |
| "eval_logps/rejected": -332.5270690917969, |
| "eval_loss": 0.7198395133018494, |
| "eval_rewards/accuracies": 0.828125, |
| "eval_rewards/chosen": -3.9049317836761475, |
| "eval_rewards/margins": 3.650782823562622, |
| "eval_rewards/rejected": -7.555714130401611, |
| "eval_runtime": 56.8998, |
| "eval_samples_per_second": 17.575, |
| "eval_steps_per_second": 0.281, |
| "step": 5000 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 7.6592082616179e-08, |
| "logits/chosen": -2.4661271572113037, |
| "logits/rejected": -2.477613687515259, |
| "logps/chosen": -245.18594360351562, |
| "logps/rejected": -335.5259094238281, |
| "loss": 0.0138, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.542055368423462, |
| "rewards/margins": 10.06078052520752, |
| "rewards/rejected": -11.602836608886719, |
| "step": 5010 |
| }, |
| { |
| "epoch": 2.59, |
| "learning_rate": 7.563587684069611e-08, |
| "logits/chosen": -2.5083346366882324, |
| "logits/rejected": -2.643256187438965, |
| "logps/chosen": -207.7921600341797, |
| "logps/rejected": -385.1307678222656, |
| "loss": 0.0049, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6641393899917603, |
| "rewards/margins": 10.65031909942627, |
| "rewards/rejected": -12.314460754394531, |
| "step": 5020 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 7.467967106521324e-08, |
| "logits/chosen": -2.588287830352783, |
| "logits/rejected": -2.5413451194763184, |
| "logps/chosen": -273.2277526855469, |
| "logps/rejected": -238.3046875, |
| "loss": 0.0103, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6997830867767334, |
| "rewards/margins": 7.600827217102051, |
| "rewards/rejected": -8.300610542297363, |
| "step": 5030 |
| }, |
| { |
| "epoch": 2.6, |
| "learning_rate": 7.372346528973034e-08, |
| "logits/chosen": -2.5987842082977295, |
| "logits/rejected": -2.5648391246795654, |
| "logps/chosen": -217.76416015625, |
| "logps/rejected": -320.9278259277344, |
| "loss": 0.0087, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9506279230117798, |
| "rewards/margins": 10.49673080444336, |
| "rewards/rejected": -12.447359085083008, |
| "step": 5040 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 7.276725951424746e-08, |
| "logits/chosen": -2.5334415435791016, |
| "logits/rejected": -2.48858642578125, |
| "logps/chosen": -171.40257263183594, |
| "logps/rejected": -390.48590087890625, |
| "loss": 0.0139, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9724035263061523, |
| "rewards/margins": 8.248844146728516, |
| "rewards/rejected": -10.2212495803833, |
| "step": 5050 |
| }, |
| { |
| "epoch": 2.61, |
| "learning_rate": 7.181105373876457e-08, |
| "logits/chosen": -2.483840227127075, |
| "logits/rejected": -2.437764883041382, |
| "logps/chosen": -204.07522583007812, |
| "logps/rejected": -299.15594482421875, |
| "loss": 0.0111, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3759775161743164, |
| "rewards/margins": 9.727631568908691, |
| "rewards/rejected": -11.103609085083008, |
| "step": 5060 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 7.08548479632817e-08, |
| "logits/chosen": -2.751817226409912, |
| "logits/rejected": -2.6693196296691895, |
| "logps/chosen": -372.95458984375, |
| "logps/rejected": -369.3866271972656, |
| "loss": 0.0131, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.938228964805603, |
| "rewards/margins": 9.311058044433594, |
| "rewards/rejected": -10.249287605285645, |
| "step": 5070 |
| }, |
| { |
| "epoch": 2.62, |
| "learning_rate": 6.98986421877988e-08, |
| "logits/chosen": -2.541592836380005, |
| "logits/rejected": -2.455427646636963, |
| "logps/chosen": -295.2919006347656, |
| "logps/rejected": -412.5565490722656, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2343528270721436, |
| "rewards/margins": 10.338408470153809, |
| "rewards/rejected": -11.572762489318848, |
| "step": 5080 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.894243641231592e-08, |
| "logits/chosen": -2.578338623046875, |
| "logits/rejected": -2.542959690093994, |
| "logps/chosen": -227.2720947265625, |
| "logps/rejected": -329.40032958984375, |
| "loss": 0.0778, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6857062578201294, |
| "rewards/margins": 11.191483497619629, |
| "rewards/rejected": -11.877190589904785, |
| "step": 5090 |
| }, |
| { |
| "epoch": 2.63, |
| "learning_rate": 6.798623063683305e-08, |
| "logits/chosen": -2.287254810333252, |
| "logits/rejected": -2.432054281234741, |
| "logps/chosen": -312.0555114746094, |
| "logps/rejected": -452.99169921875, |
| "loss": 0.006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9427648782730103, |
| "rewards/margins": 15.484460830688477, |
| "rewards/rejected": -16.42722511291504, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.63, |
| "eval_logits/chosen": -2.3859879970550537, |
| "eval_logits/rejected": -2.340737819671631, |
| "eval_logps/chosen": -296.35302734375, |
| "eval_logps/rejected": -336.11944580078125, |
| "eval_loss": 0.7505870461463928, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -4.211794376373291, |
| "eval_rewards/margins": 3.703155040740967, |
| "eval_rewards/rejected": -7.914949893951416, |
| "eval_runtime": 56.2566, |
| "eval_samples_per_second": 17.776, |
| "eval_steps_per_second": 0.284, |
| "step": 5100 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.703002486135017e-08, |
| "logits/chosen": -2.3773114681243896, |
| "logits/rejected": -2.5287060737609863, |
| "logps/chosen": -236.22640991210938, |
| "logps/rejected": -360.97784423828125, |
| "loss": 0.0178, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9979360699653625, |
| "rewards/margins": 10.704690933227539, |
| "rewards/rejected": -11.702627182006836, |
| "step": 5110 |
| }, |
| { |
| "epoch": 2.64, |
| "learning_rate": 6.607381908586727e-08, |
| "logits/chosen": -2.642033338546753, |
| "logits/rejected": -2.6108345985412598, |
| "logps/chosen": -317.5076599121094, |
| "logps/rejected": -348.7528076171875, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6177361011505127, |
| "rewards/margins": 9.611312866210938, |
| "rewards/rejected": -11.229048728942871, |
| "step": 5120 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.511761331038438e-08, |
| "logits/chosen": -2.6155383586883545, |
| "logits/rejected": -2.6100358963012695, |
| "logps/chosen": -281.2548522949219, |
| "logps/rejected": -298.05865478515625, |
| "loss": 0.0147, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.588085651397705, |
| "rewards/margins": 7.020742893218994, |
| "rewards/rejected": -9.608829498291016, |
| "step": 5130 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 6.416140753490151e-08, |
| "logits/chosen": -2.627002239227295, |
| "logits/rejected": -2.6328094005584717, |
| "logps/chosen": -421.49774169921875, |
| "logps/rejected": -432.20098876953125, |
| "loss": 0.0179, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.4822900295257568, |
| "rewards/margins": 8.786565780639648, |
| "rewards/rejected": -10.268855094909668, |
| "step": 5140 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.320520175941863e-08, |
| "logits/chosen": -2.4586381912231445, |
| "logits/rejected": -2.452455997467041, |
| "logps/chosen": -246.73715209960938, |
| "logps/rejected": -327.2841491699219, |
| "loss": 0.0172, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.41286563873291, |
| "rewards/margins": 9.417495727539062, |
| "rewards/rejected": -11.830362319946289, |
| "step": 5150 |
| }, |
| { |
| "epoch": 2.66, |
| "learning_rate": 6.224899598393573e-08, |
| "logits/chosen": -2.557018756866455, |
| "logits/rejected": -2.4926464557647705, |
| "logps/chosen": -285.82635498046875, |
| "logps/rejected": -348.3973693847656, |
| "loss": 0.0111, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4971519708633423, |
| "rewards/margins": 9.434330940246582, |
| "rewards/rejected": -10.931482315063477, |
| "step": 5160 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.129279020845286e-08, |
| "logits/chosen": -2.4606575965881348, |
| "logits/rejected": -2.5436136722564697, |
| "logps/chosen": -265.47454833984375, |
| "logps/rejected": -310.14862060546875, |
| "loss": 0.0113, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.3885912597179413, |
| "rewards/margins": 10.448331832885742, |
| "rewards/rejected": -10.836923599243164, |
| "step": 5170 |
| }, |
| { |
| "epoch": 2.67, |
| "learning_rate": 6.033658443296998e-08, |
| "logits/chosen": -2.5347704887390137, |
| "logits/rejected": -2.484384059906006, |
| "logps/chosen": -266.8102111816406, |
| "logps/rejected": -340.2280578613281, |
| "loss": 0.0178, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.881771445274353, |
| "rewards/margins": 11.114812850952148, |
| "rewards/rejected": -11.99658489227295, |
| "step": 5180 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.9380378657487085e-08, |
| "logits/chosen": -2.5079243183135986, |
| "logits/rejected": -2.5110316276550293, |
| "logps/chosen": -455.3853454589844, |
| "logps/rejected": -375.2730407714844, |
| "loss": 0.0062, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6811244487762451, |
| "rewards/margins": 11.579205513000488, |
| "rewards/rejected": -12.26032829284668, |
| "step": 5190 |
| }, |
| { |
| "epoch": 2.68, |
| "learning_rate": 5.842417288200421e-08, |
| "logits/chosen": -2.665579080581665, |
| "logits/rejected": -2.835705280303955, |
| "logps/chosen": -388.7041320800781, |
| "logps/rejected": -376.1544494628906, |
| "loss": 0.0143, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7552551627159119, |
| "rewards/margins": 9.90630054473877, |
| "rewards/rejected": -10.6615571975708, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.68, |
| "eval_logits/chosen": -2.394641160964966, |
| "eval_logits/rejected": -2.350865125656128, |
| "eval_logps/chosen": -296.6682434082031, |
| "eval_logps/rejected": -336.7720642089844, |
| "eval_loss": 0.7407526969909668, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -4.243312835693359, |
| "eval_rewards/margins": 3.7369019985198975, |
| "eval_rewards/rejected": -7.980215549468994, |
| "eval_runtime": 55.9932, |
| "eval_samples_per_second": 17.859, |
| "eval_steps_per_second": 0.286, |
| "step": 5200 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.7467967106521317e-08, |
| "logits/chosen": -2.518009901046753, |
| "logits/rejected": -2.5615527629852295, |
| "logps/chosen": -296.2488098144531, |
| "logps/rejected": -408.9478454589844, |
| "loss": 0.0042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9118080139160156, |
| "rewards/margins": 10.68850326538086, |
| "rewards/rejected": -12.600311279296875, |
| "step": 5210 |
| }, |
| { |
| "epoch": 2.69, |
| "learning_rate": 5.651176133103844e-08, |
| "logits/chosen": -2.740626096725464, |
| "logits/rejected": -2.676818370819092, |
| "logps/chosen": -306.84588623046875, |
| "logps/rejected": -342.53240966796875, |
| "loss": 0.0142, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.46114450693130493, |
| "rewards/margins": 10.382904052734375, |
| "rewards/rejected": -10.844049453735352, |
| "step": 5220 |
| }, |
| { |
| "epoch": 2.7, |
| "learning_rate": 5.555555555555555e-08, |
| "logits/chosen": -2.5409655570983887, |
| "logits/rejected": -2.4781863689422607, |
| "logps/chosen": -215.8829345703125, |
| "logps/rejected": -301.06756591796875, |
| "loss": 0.0098, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1217892169952393, |
| "rewards/margins": 8.91219711303711, |
| "rewards/rejected": -10.033987045288086, |
| "step": 5230 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 5.459934978007267e-08, |
| "logits/chosen": -2.6471657752990723, |
| "logits/rejected": -2.611330509185791, |
| "logps/chosen": -273.4901123046875, |
| "logps/rejected": -403.7444763183594, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.45464539527893066, |
| "rewards/margins": 11.401620864868164, |
| "rewards/rejected": -11.856266021728516, |
| "step": 5240 |
| }, |
| { |
| "epoch": 2.71, |
| "learning_rate": 5.3643144004589786e-08, |
| "logits/chosen": -2.542269706726074, |
| "logits/rejected": -2.433465003967285, |
| "logps/chosen": -301.2662048339844, |
| "logps/rejected": -399.2783203125, |
| "loss": 0.0122, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4464475214481354, |
| "rewards/margins": 13.448400497436523, |
| "rewards/rejected": -13.001953125, |
| "step": 5250 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 5.26869382291069e-08, |
| "logits/chosen": -2.6752572059631348, |
| "logits/rejected": -2.7158637046813965, |
| "logps/chosen": -226.4488983154297, |
| "logps/rejected": -335.8851623535156, |
| "loss": 0.0119, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.019395578652620316, |
| "rewards/margins": 10.634721755981445, |
| "rewards/rejected": -10.654115676879883, |
| "step": 5260 |
| }, |
| { |
| "epoch": 2.72, |
| "learning_rate": 5.173073245362402e-08, |
| "logits/chosen": -2.265803337097168, |
| "logits/rejected": -2.495293617248535, |
| "logps/chosen": -273.8394470214844, |
| "logps/rejected": -262.0378112792969, |
| "loss": 0.0138, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.907080888748169, |
| "rewards/margins": 9.230062484741211, |
| "rewards/rejected": -11.1371431350708, |
| "step": 5270 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 5.077452667814113e-08, |
| "logits/chosen": -2.645397186279297, |
| "logits/rejected": -2.6353235244750977, |
| "logps/chosen": -234.93240356445312, |
| "logps/rejected": -313.4653015136719, |
| "loss": 0.0084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.619875192642212, |
| "rewards/margins": 9.85165786743164, |
| "rewards/rejected": -11.47153377532959, |
| "step": 5280 |
| }, |
| { |
| "epoch": 2.73, |
| "learning_rate": 4.981832090265825e-08, |
| "logits/chosen": -2.5697460174560547, |
| "logits/rejected": -2.524587631225586, |
| "logps/chosen": -278.901123046875, |
| "logps/rejected": -400.72540283203125, |
| "loss": 0.0122, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3195740878582001, |
| "rewards/margins": 12.423995018005371, |
| "rewards/rejected": -12.104421615600586, |
| "step": 5290 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.8862115127175364e-08, |
| "logits/chosen": -2.6613426208496094, |
| "logits/rejected": -2.5382397174835205, |
| "logps/chosen": -298.51617431640625, |
| "logps/rejected": -405.8147277832031, |
| "loss": 0.0057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6868101358413696, |
| "rewards/margins": 10.438592910766602, |
| "rewards/rejected": -12.125402450561523, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.74, |
| "eval_logits/chosen": -2.384242057800293, |
| "eval_logits/rejected": -2.33884596824646, |
| "eval_logps/chosen": -297.62750244140625, |
| "eval_logps/rejected": -337.80126953125, |
| "eval_loss": 0.7552159428596497, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -4.339241981506348, |
| "eval_rewards/margins": 3.743894100189209, |
| "eval_rewards/rejected": -8.083136558532715, |
| "eval_runtime": 59.8742, |
| "eval_samples_per_second": 16.702, |
| "eval_steps_per_second": 0.267, |
| "step": 5300 |
| }, |
| { |
| "epoch": 2.74, |
| "learning_rate": 4.790590935169248e-08, |
| "logits/chosen": -2.6838698387145996, |
| "logits/rejected": -2.574967384338379, |
| "logps/chosen": -274.59368896484375, |
| "logps/rejected": -469.4027404785156, |
| "loss": 0.012, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4363610744476318, |
| "rewards/margins": 10.517416000366211, |
| "rewards/rejected": -11.953778266906738, |
| "step": 5310 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.69497035762096e-08, |
| "logits/chosen": -2.537161350250244, |
| "logits/rejected": -2.4791531562805176, |
| "logps/chosen": -344.87347412109375, |
| "logps/rejected": -444.57366943359375, |
| "loss": 0.0116, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.297786235809326, |
| "rewards/margins": 12.291933059692383, |
| "rewards/rejected": -14.589719772338867, |
| "step": 5320 |
| }, |
| { |
| "epoch": 2.75, |
| "learning_rate": 4.599349780072671e-08, |
| "logits/chosen": -2.5215706825256348, |
| "logits/rejected": -2.408939838409424, |
| "logps/chosen": -344.95184326171875, |
| "logps/rejected": -272.84417724609375, |
| "loss": 0.0134, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2328124046325684, |
| "rewards/margins": 9.55348014831543, |
| "rewards/rejected": -11.78629207611084, |
| "step": 5330 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.5037292025243834e-08, |
| "logits/chosen": -2.4096181392669678, |
| "logits/rejected": -2.3585500717163086, |
| "logps/chosen": -231.6038055419922, |
| "logps/rejected": -414.8946838378906, |
| "loss": 0.0113, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.199920892715454, |
| "rewards/margins": 9.544143676757812, |
| "rewards/rejected": -11.744064331054688, |
| "step": 5340 |
| }, |
| { |
| "epoch": 2.76, |
| "learning_rate": 4.408108624976094e-08, |
| "logits/chosen": -2.6306357383728027, |
| "logits/rejected": -2.373485565185547, |
| "logps/chosen": -273.1640625, |
| "logps/rejected": -362.6429138183594, |
| "loss": 0.0084, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7220970392227173, |
| "rewards/margins": 11.861469268798828, |
| "rewards/rejected": -12.583566665649414, |
| "step": 5350 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 4.3124880474278065e-08, |
| "logits/chosen": -2.6821742057800293, |
| "logits/rejected": -2.5935044288635254, |
| "logps/chosen": -330.2795715332031, |
| "logps/rejected": -295.5904541015625, |
| "loss": 0.0196, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4781021177768707, |
| "rewards/margins": 9.99770450592041, |
| "rewards/rejected": -10.475805282592773, |
| "step": 5360 |
| }, |
| { |
| "epoch": 2.77, |
| "learning_rate": 4.2168674698795174e-08, |
| "logits/chosen": -2.668886184692383, |
| "logits/rejected": -2.7140769958496094, |
| "logps/chosen": -188.55136108398438, |
| "logps/rejected": -355.8598327636719, |
| "loss": 0.0061, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.314338207244873, |
| "rewards/margins": 8.853775978088379, |
| "rewards/rejected": -11.168115615844727, |
| "step": 5370 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 4.1212468923312296e-08, |
| "logits/chosen": -2.5291595458984375, |
| "logits/rejected": -2.4308247566223145, |
| "logps/chosen": -271.3199768066406, |
| "logps/rejected": -382.0475158691406, |
| "loss": 0.0167, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.8199418783187866, |
| "rewards/margins": 9.165318489074707, |
| "rewards/rejected": -10.985260009765625, |
| "step": 5380 |
| }, |
| { |
| "epoch": 2.78, |
| "learning_rate": 4.025626314782941e-08, |
| "logits/chosen": -2.578953266143799, |
| "logits/rejected": -2.5158464908599854, |
| "logps/chosen": -277.73052978515625, |
| "logps/rejected": -347.6210632324219, |
| "loss": 0.003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.26607850193977356, |
| "rewards/margins": 11.397039413452148, |
| "rewards/rejected": -11.663119316101074, |
| "step": 5390 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.930005737234653e-08, |
| "logits/chosen": -2.543391704559326, |
| "logits/rejected": -2.6446430683135986, |
| "logps/chosen": -216.94741821289062, |
| "logps/rejected": -307.4268493652344, |
| "loss": 0.0138, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3559612035751343, |
| "rewards/margins": 9.72540283203125, |
| "rewards/rejected": -11.0813627243042, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.79, |
| "eval_logits/chosen": -2.3737339973449707, |
| "eval_logits/rejected": -2.3286330699920654, |
| "eval_logps/chosen": -296.6304016113281, |
| "eval_logps/rejected": -336.73223876953125, |
| "eval_loss": 0.7403773069381714, |
| "eval_rewards/accuracies": 0.8125, |
| "eval_rewards/chosen": -4.239532470703125, |
| "eval_rewards/margins": 3.7366957664489746, |
| "eval_rewards/rejected": -7.9762282371521, |
| "eval_runtime": 58.906, |
| "eval_samples_per_second": 16.976, |
| "eval_steps_per_second": 0.272, |
| "step": 5400 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 3.8343851596863644e-08, |
| "logits/chosen": -2.68801212310791, |
| "logits/rejected": -2.5317561626434326, |
| "logps/chosen": -327.53106689453125, |
| "logps/rejected": -318.7012939453125, |
| "loss": 0.0065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.34214359521865845, |
| "rewards/margins": 12.371678352355957, |
| "rewards/rejected": -12.713821411132812, |
| "step": 5410 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.738764582138076e-08, |
| "logits/chosen": -2.63051700592041, |
| "logits/rejected": -2.5712480545043945, |
| "logps/chosen": -292.55035400390625, |
| "logps/rejected": -406.0823059082031, |
| "loss": 0.0096, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4555866718292236, |
| "rewards/margins": 9.967303276062012, |
| "rewards/rejected": -12.422890663146973, |
| "step": 5420 |
| }, |
| { |
| "epoch": 2.8, |
| "learning_rate": 3.6431440045897875e-08, |
| "logits/chosen": -2.558973550796509, |
| "logits/rejected": -2.5760269165039062, |
| "logps/chosen": -288.33062744140625, |
| "logps/rejected": -438.51007080078125, |
| "loss": 0.0067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0792489051818848, |
| "rewards/margins": 10.678377151489258, |
| "rewards/rejected": -11.7576265335083, |
| "step": 5430 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.547523427041499e-08, |
| "logits/chosen": -2.607342481613159, |
| "logits/rejected": -2.609557628631592, |
| "logps/chosen": -283.79608154296875, |
| "logps/rejected": -293.2716369628906, |
| "loss": 0.0106, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1565558910369873, |
| "rewards/margins": 10.7040433883667, |
| "rewards/rejected": -11.860601425170898, |
| "step": 5440 |
| }, |
| { |
| "epoch": 2.81, |
| "learning_rate": 3.4519028494932106e-08, |
| "logits/chosen": -2.703679084777832, |
| "logits/rejected": -2.5151591300964355, |
| "logps/chosen": -353.95758056640625, |
| "logps/rejected": -433.54766845703125, |
| "loss": 0.0033, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7000893354415894, |
| "rewards/margins": 11.649955749511719, |
| "rewards/rejected": -12.350044250488281, |
| "step": 5450 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.356282271944923e-08, |
| "logits/chosen": -2.603567600250244, |
| "logits/rejected": -2.502267360687256, |
| "logps/chosen": -226.2731170654297, |
| "logps/rejected": -341.97320556640625, |
| "loss": 0.0146, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.9284445643424988, |
| "rewards/margins": 11.389801979064941, |
| "rewards/rejected": -12.318245887756348, |
| "step": 5460 |
| }, |
| { |
| "epoch": 2.82, |
| "learning_rate": 3.260661694396634e-08, |
| "logits/chosen": -2.5252528190612793, |
| "logits/rejected": -2.5249342918395996, |
| "logps/chosen": -263.4516906738281, |
| "logps/rejected": -313.29998779296875, |
| "loss": 0.0053, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3164348602294922, |
| "rewards/margins": 11.594769477844238, |
| "rewards/rejected": -11.911203384399414, |
| "step": 5470 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 3.165041116848346e-08, |
| "logits/chosen": -2.660788059234619, |
| "logits/rejected": -2.5421648025512695, |
| "logps/chosen": -253.57839965820312, |
| "logps/rejected": -402.5025329589844, |
| "loss": 0.0055, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6556440591812134, |
| "rewards/margins": 12.180809020996094, |
| "rewards/rejected": -13.836453437805176, |
| "step": 5480 |
| }, |
| { |
| "epoch": 2.83, |
| "learning_rate": 3.0694205393000576e-08, |
| "logits/chosen": -2.5150065422058105, |
| "logits/rejected": -2.4512484073638916, |
| "logps/chosen": -212.173828125, |
| "logps/rejected": -332.0416564941406, |
| "loss": 0.0046, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4317614436149597, |
| "rewards/margins": 11.226727485656738, |
| "rewards/rejected": -10.794965744018555, |
| "step": 5490 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.9737999617517688e-08, |
| "logits/chosen": -2.5827393531799316, |
| "logits/rejected": -2.5749595165252686, |
| "logps/chosen": -308.6050109863281, |
| "logps/rejected": -365.627197265625, |
| "loss": 0.0079, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4415368139743805, |
| "rewards/margins": 10.942670822143555, |
| "rewards/rejected": -11.384206771850586, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.84, |
| "eval_logits/chosen": -2.364140272140503, |
| "eval_logits/rejected": -2.319963216781616, |
| "eval_logps/chosen": -298.70074462890625, |
| "eval_logps/rejected": -339.1661682128906, |
| "eval_loss": 0.7524814605712891, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -4.44656229019165, |
| "eval_rewards/margins": 3.773061752319336, |
| "eval_rewards/rejected": -8.219624519348145, |
| "eval_runtime": 58.809, |
| "eval_samples_per_second": 17.004, |
| "eval_steps_per_second": 0.272, |
| "step": 5500 |
| }, |
| { |
| "epoch": 2.84, |
| "learning_rate": 2.8781793842034804e-08, |
| "logits/chosen": -2.426349401473999, |
| "logits/rejected": -2.384749174118042, |
| "logps/chosen": -259.9743347167969, |
| "logps/rejected": -300.3887634277344, |
| "loss": 0.015, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.3269202709198, |
| "rewards/margins": 8.843810081481934, |
| "rewards/rejected": -11.170731544494629, |
| "step": 5510 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.782558806655192e-08, |
| "logits/chosen": -2.5341413021087646, |
| "logits/rejected": -2.5924274921417236, |
| "logps/chosen": -302.21563720703125, |
| "logps/rejected": -409.6150817871094, |
| "loss": 0.0134, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3284962177276611, |
| "rewards/margins": 9.481134414672852, |
| "rewards/rejected": -10.809629440307617, |
| "step": 5520 |
| }, |
| { |
| "epoch": 2.85, |
| "learning_rate": 2.6869382291069035e-08, |
| "logits/chosen": -2.4547677040100098, |
| "logits/rejected": -2.457869052886963, |
| "logps/chosen": -229.97561645507812, |
| "logps/rejected": -327.1774597167969, |
| "loss": 0.0107, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2366197109222412, |
| "rewards/margins": 10.809396743774414, |
| "rewards/rejected": -12.04601764678955, |
| "step": 5530 |
| }, |
| { |
| "epoch": 2.86, |
| "learning_rate": 2.591317651558615e-08, |
| "logits/chosen": -2.308411121368408, |
| "logits/rejected": -2.4219300746917725, |
| "logps/chosen": -283.2604675292969, |
| "logps/rejected": -351.8711242675781, |
| "loss": 0.004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.108804225921631, |
| "rewards/margins": 10.290410995483398, |
| "rewards/rejected": -12.399213790893555, |
| "step": 5540 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.4956970740103267e-08, |
| "logits/chosen": -2.5385169982910156, |
| "logits/rejected": -2.4845941066741943, |
| "logps/chosen": -319.16473388671875, |
| "logps/rejected": -403.1438293457031, |
| "loss": 0.0085, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6357721090316772, |
| "rewards/margins": 10.850339889526367, |
| "rewards/rejected": -12.486112594604492, |
| "step": 5550 |
| }, |
| { |
| "epoch": 2.87, |
| "learning_rate": 2.4000764964620386e-08, |
| "logits/chosen": -2.6393580436706543, |
| "logits/rejected": -2.731678009033203, |
| "logps/chosen": -312.54034423828125, |
| "logps/rejected": -417.93658447265625, |
| "loss": 0.0119, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7174618244171143, |
| "rewards/margins": 9.234588623046875, |
| "rewards/rejected": -10.952049255371094, |
| "step": 5560 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.30445591891375e-08, |
| "logits/chosen": -2.5211081504821777, |
| "logits/rejected": -2.4729580879211426, |
| "logps/chosen": -307.0782775878906, |
| "logps/rejected": -450.36962890625, |
| "loss": 0.0088, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05847327783703804, |
| "rewards/margins": 13.286686897277832, |
| "rewards/rejected": -13.228212356567383, |
| "step": 5570 |
| }, |
| { |
| "epoch": 2.88, |
| "learning_rate": 2.2088353413654617e-08, |
| "logits/chosen": -2.313760757446289, |
| "logits/rejected": -2.362217664718628, |
| "logps/chosen": -294.3525390625, |
| "logps/rejected": -331.1678771972656, |
| "loss": 0.0149, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.203790307044983, |
| "rewards/margins": 14.620699882507324, |
| "rewards/rejected": -13.416910171508789, |
| "step": 5580 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 2.1132147638171733e-08, |
| "logits/chosen": -2.0504655838012695, |
| "logits/rejected": -2.1224112510681152, |
| "logps/chosen": -262.3179626464844, |
| "logps/rejected": -329.19732666015625, |
| "loss": 0.0119, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7382757067680359, |
| "rewards/margins": 10.974761962890625, |
| "rewards/rejected": -11.713037490844727, |
| "step": 5590 |
| }, |
| { |
| "epoch": 2.89, |
| "learning_rate": 2.0175941862688848e-08, |
| "logits/chosen": -2.4296658039093018, |
| "logits/rejected": -2.384312391281128, |
| "logps/chosen": -263.25592041015625, |
| "logps/rejected": -385.36688232421875, |
| "loss": 0.0077, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.0216162204742432, |
| "rewards/margins": 11.878069877624512, |
| "rewards/rejected": -12.899686813354492, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.89, |
| "eval_logits/chosen": -2.351677894592285, |
| "eval_logits/rejected": -2.3077552318573, |
| "eval_logps/chosen": -299.8206481933594, |
| "eval_logps/rejected": -340.4544677734375, |
| "eval_loss": 0.7519664168357849, |
| "eval_rewards/accuracies": 0.796875, |
| "eval_rewards/chosen": -4.5585551261901855, |
| "eval_rewards/margins": 3.7899010181427, |
| "eval_rewards/rejected": -8.348456382751465, |
| "eval_runtime": 57.0149, |
| "eval_samples_per_second": 17.539, |
| "eval_steps_per_second": 0.281, |
| "step": 5600 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.9219736087205964e-08, |
| "logits/chosen": -2.4466593265533447, |
| "logits/rejected": -2.5641415119171143, |
| "logps/chosen": -313.849609375, |
| "logps/rejected": -355.71954345703125, |
| "loss": 0.0183, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.058788836002349854, |
| "rewards/margins": 9.841516494750977, |
| "rewards/rejected": -9.782726287841797, |
| "step": 5610 |
| }, |
| { |
| "epoch": 2.9, |
| "learning_rate": 1.826353031172308e-08, |
| "logits/chosen": -2.6215555667877197, |
| "logits/rejected": -2.596318244934082, |
| "logps/chosen": -334.0271911621094, |
| "logps/rejected": -374.85211181640625, |
| "loss": 0.0108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0268580913543701, |
| "rewards/margins": 10.430700302124023, |
| "rewards/rejected": -11.45755672454834, |
| "step": 5620 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.73073245362402e-08, |
| "logits/chosen": -2.3654887676239014, |
| "logits/rejected": -2.3259222507476807, |
| "logps/chosen": -418.31524658203125, |
| "logps/rejected": -296.1111145019531, |
| "loss": 0.023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.1241583824157715, |
| "rewards/margins": 9.49864673614502, |
| "rewards/rejected": -11.622804641723633, |
| "step": 5630 |
| }, |
| { |
| "epoch": 2.91, |
| "learning_rate": 1.6351118760757314e-08, |
| "logits/chosen": -2.4572885036468506, |
| "logits/rejected": -2.4687421321868896, |
| "logps/chosen": -250.23764038085938, |
| "logps/rejected": -250.4460906982422, |
| "loss": 0.0111, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.8683540225028992, |
| "rewards/margins": 8.310081481933594, |
| "rewards/rejected": -9.178436279296875, |
| "step": 5640 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.539491298527443e-08, |
| "logits/chosen": -2.6408703327178955, |
| "logits/rejected": -2.641308307647705, |
| "logps/chosen": -283.0168151855469, |
| "logps/rejected": -405.4056701660156, |
| "loss": 0.0036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7451741099357605, |
| "rewards/margins": 11.91575813293457, |
| "rewards/rejected": -12.660932540893555, |
| "step": 5650 |
| }, |
| { |
| "epoch": 2.92, |
| "learning_rate": 1.4438707209791546e-08, |
| "logits/chosen": -2.531616687774658, |
| "logits/rejected": -2.6468756198883057, |
| "logps/chosen": -414.9684143066406, |
| "logps/rejected": -356.252685546875, |
| "loss": 0.0157, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.21153855323791504, |
| "rewards/margins": 10.415987014770508, |
| "rewards/rejected": -10.627525329589844, |
| "step": 5660 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.3482501434308661e-08, |
| "logits/chosen": -2.300788402557373, |
| "logits/rejected": -2.308450937271118, |
| "logps/chosen": -356.91632080078125, |
| "logps/rejected": -350.33892822265625, |
| "loss": 0.015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6116657257080078, |
| "rewards/margins": 11.264276504516602, |
| "rewards/rejected": -12.875943183898926, |
| "step": 5670 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 1.2526295658825777e-08, |
| "logits/chosen": -2.603456497192383, |
| "logits/rejected": -2.6166439056396484, |
| "logps/chosen": -311.9185791015625, |
| "logps/rejected": -450.9242248535156, |
| "loss": 0.0147, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.2669516801834106, |
| "rewards/margins": 11.346095085144043, |
| "rewards/rejected": -12.613046646118164, |
| "step": 5680 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.1570089883342895e-08, |
| "logits/chosen": -2.5220370292663574, |
| "logits/rejected": -2.4531850814819336, |
| "logps/chosen": -311.7686462402344, |
| "logps/rejected": -400.8462829589844, |
| "loss": 0.0135, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0794099569320679, |
| "rewards/margins": 12.467567443847656, |
| "rewards/rejected": -13.546978950500488, |
| "step": 5690 |
| }, |
| { |
| "epoch": 2.94, |
| "learning_rate": 1.061388410786001e-08, |
| "logits/chosen": -2.4289803504943848, |
| "logits/rejected": -2.4906742572784424, |
| "logps/chosen": -285.3009338378906, |
| "logps/rejected": -263.3441467285156, |
| "loss": 0.0094, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.07789945602417, |
| "rewards/margins": 7.724274635314941, |
| "rewards/rejected": -9.80217456817627, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.94, |
| "eval_logits/chosen": -2.3509910106658936, |
| "eval_logits/rejected": -2.3062477111816406, |
| "eval_logps/chosen": -299.77734375, |
| "eval_logps/rejected": -340.47900390625, |
| "eval_loss": 0.7527089715003967, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -4.5542216300964355, |
| "eval_rewards/margins": 3.7966880798339844, |
| "eval_rewards/rejected": -8.350910186767578, |
| "eval_runtime": 55.9629, |
| "eval_samples_per_second": 17.869, |
| "eval_steps_per_second": 0.286, |
| "step": 5700 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 9.657678332377126e-09, |
| "logits/chosen": -2.4378364086151123, |
| "logits/rejected": -2.5011210441589355, |
| "logps/chosen": -278.77166748046875, |
| "logps/rejected": -327.8222351074219, |
| "loss": 0.0145, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.6567916870117188, |
| "rewards/margins": 9.313983917236328, |
| "rewards/rejected": -10.970773696899414, |
| "step": 5710 |
| }, |
| { |
| "epoch": 2.95, |
| "learning_rate": 8.701472556894243e-09, |
| "logits/chosen": -2.4347808361053467, |
| "logits/rejected": -2.4027464389801025, |
| "logps/chosen": -301.68988037109375, |
| "logps/rejected": -355.3216247558594, |
| "loss": 0.0049, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.6911423206329346, |
| "rewards/margins": 10.72395133972168, |
| "rewards/rejected": -13.415092468261719, |
| "step": 5720 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 7.745266781411359e-09, |
| "logits/chosen": -2.4534902572631836, |
| "logits/rejected": -2.554394006729126, |
| "logps/chosen": -260.3663635253906, |
| "logps/rejected": -410.23223876953125, |
| "loss": 0.0164, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.0108587741851807, |
| "rewards/margins": 9.392390251159668, |
| "rewards/rejected": -11.403249740600586, |
| "step": 5730 |
| }, |
| { |
| "epoch": 2.96, |
| "learning_rate": 6.7890610059284754e-09, |
| "logits/chosen": -2.566368579864502, |
| "logits/rejected": -2.585576057434082, |
| "logps/chosen": -261.0205993652344, |
| "logps/rejected": -348.109619140625, |
| "loss": 0.0074, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3029209077358246, |
| "rewards/margins": 10.11386775970459, |
| "rewards/rejected": -10.416789054870605, |
| "step": 5740 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 5.832855230445592e-09, |
| "logits/chosen": -2.5495338439941406, |
| "logits/rejected": -2.4890074729919434, |
| "logps/chosen": -247.47286987304688, |
| "logps/rejected": -320.06011962890625, |
| "loss": 0.0072, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.9899286031723022, |
| "rewards/margins": 10.633166313171387, |
| "rewards/rejected": -11.62309455871582, |
| "step": 5750 |
| }, |
| { |
| "epoch": 2.97, |
| "learning_rate": 4.8766494549627085e-09, |
| "logits/chosen": -2.60798978805542, |
| "logits/rejected": -2.477149486541748, |
| "logps/chosen": -305.1927795410156, |
| "logps/rejected": -318.5039978027344, |
| "loss": 0.017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8671627044677734, |
| "rewards/margins": 9.939409255981445, |
| "rewards/rejected": -11.806573867797852, |
| "step": 5760 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 3.920443679479824e-09, |
| "logits/chosen": -2.545316219329834, |
| "logits/rejected": -2.5275652408599854, |
| "logps/chosen": -292.89263916015625, |
| "logps/rejected": -323.63385009765625, |
| "loss": 0.0161, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.3659417629241943, |
| "rewards/margins": 9.62957763671875, |
| "rewards/rejected": -11.995519638061523, |
| "step": 5770 |
| }, |
| { |
| "epoch": 2.98, |
| "learning_rate": 2.96423790399694e-09, |
| "logits/chosen": -2.48178768157959, |
| "logits/rejected": -2.6639437675476074, |
| "logps/chosen": -219.31777954101562, |
| "logps/rejected": -324.5710754394531, |
| "loss": 0.008, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.672484040260315, |
| "rewards/margins": 8.533833503723145, |
| "rewards/rejected": -10.206315994262695, |
| "step": 5780 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 2.008032128514056e-09, |
| "logits/chosen": -2.54166841506958, |
| "logits/rejected": -2.5910754203796387, |
| "logps/chosen": -343.8594665527344, |
| "logps/rejected": -451.935791015625, |
| "loss": 0.011, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7438589334487915, |
| "rewards/margins": 10.097026824951172, |
| "rewards/rejected": -11.840886116027832, |
| "step": 5790 |
| }, |
| { |
| "epoch": 2.99, |
| "learning_rate": 1.0518263530311723e-09, |
| "logits/chosen": -2.5881881713867188, |
| "logits/rejected": -2.5880398750305176, |
| "logps/chosen": -201.51014709472656, |
| "logps/rejected": -326.72650146484375, |
| "loss": 0.0054, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4769667983055115, |
| "rewards/margins": 9.810731887817383, |
| "rewards/rejected": -10.287699699401855, |
| "step": 5800 |
| }, |
| { |
| "epoch": 2.99, |
| "eval_logits/chosen": -2.353024482727051, |
| "eval_logits/rejected": -2.308088779449463, |
| "eval_logps/chosen": -299.4037780761719, |
| "eval_logps/rejected": -340.0493469238281, |
| "eval_loss": 0.7519845962524414, |
| "eval_rewards/accuracies": 0.78125, |
| "eval_rewards/chosen": -4.5168681144714355, |
| "eval_rewards/margins": 3.791072130203247, |
| "eval_rewards/rejected": -8.307940483093262, |
| "eval_runtime": 55.3708, |
| "eval_samples_per_second": 18.06, |
| "eval_steps_per_second": 0.289, |
| "step": 5800 |
| }, |
| { |
| "epoch": 3.0, |
| "learning_rate": 9.562057754828839e-11, |
| "logits/chosen": -2.476783514022827, |
| "logits/rejected": -2.4620718955993652, |
| "logps/chosen": -259.29327392578125, |
| "logps/rejected": -435.9239196777344, |
| "loss": 0.015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16803565621376038, |
| "rewards/margins": 8.936319351196289, |
| "rewards/rejected": -8.768282890319824, |
| "step": 5810 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 5811, |
| "total_flos": 0.0, |
| "train_loss": 0.2172969928600547, |
| "train_runtime": 23865.9828, |
| "train_samples_per_second": 7.789, |
| "train_steps_per_second": 0.243 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 5811, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 0.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|