| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 2154, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013927576601671309, | |
| "grad_norm": 4.152076244354248, | |
| "learning_rate": 4.988393686165274e-05, | |
| "logits/chosen": -62.701507568359375, | |
| "logits/rejected": -68.97389221191406, | |
| "logps/chosen": -198.8888702392578, | |
| "logps/rejected": -61.155731201171875, | |
| "loss": 0.4537, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.9171991348266602, | |
| "rewards/margins": 0.8940274119377136, | |
| "rewards/rejected": 0.02317170798778534, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.027855153203342618, | |
| "grad_norm": 1.4107317924499512, | |
| "learning_rate": 4.965181058495822e-05, | |
| "logits/chosen": -62.833290100097656, | |
| "logits/rejected": -69.8056411743164, | |
| "logps/chosen": -184.6682586669922, | |
| "logps/rejected": -70.85790252685547, | |
| "loss": 0.0524, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.683924913406372, | |
| "rewards/margins": 3.4110190868377686, | |
| "rewards/rejected": -0.7270944714546204, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04178272980501393, | |
| "grad_norm": 0.09278041124343872, | |
| "learning_rate": 4.9419684308263696e-05, | |
| "logits/chosen": -61.66850662231445, | |
| "logits/rejected": -68.24287414550781, | |
| "logps/chosen": -187.5883331298828, | |
| "logps/rejected": -84.70890808105469, | |
| "loss": 0.0225, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7358405590057373, | |
| "rewards/margins": 4.847321510314941, | |
| "rewards/rejected": -2.111480712890625, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.055710306406685235, | |
| "grad_norm": 0.0695909857749939, | |
| "learning_rate": 4.918755803156918e-05, | |
| "logits/chosen": -60.128639221191406, | |
| "logits/rejected": -69.80757141113281, | |
| "logps/chosen": -201.54000854492188, | |
| "logps/rejected": -101.98795318603516, | |
| "loss": 0.0066, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8020272254943848, | |
| "rewards/margins": 6.231036186218262, | |
| "rewards/rejected": -3.4290084838867188, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.06963788300835655, | |
| "grad_norm": 0.05616675317287445, | |
| "learning_rate": 4.895543175487465e-05, | |
| "logits/chosen": -62.200286865234375, | |
| "logits/rejected": -70.98320007324219, | |
| "logps/chosen": -196.42550659179688, | |
| "logps/rejected": -113.12223815917969, | |
| "loss": 0.0064, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6243250370025635, | |
| "rewards/margins": 7.210093021392822, | |
| "rewards/rejected": -4.585768699645996, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.08356545961002786, | |
| "grad_norm": 0.11463262885808945, | |
| "learning_rate": 4.872330547818013e-05, | |
| "logits/chosen": -60.18315505981445, | |
| "logits/rejected": -67.06063842773438, | |
| "logps/chosen": -181.308837890625, | |
| "logps/rejected": -111.84517669677734, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.787447929382324, | |
| "rewards/margins": 7.701785087585449, | |
| "rewards/rejected": -4.914338111877441, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.09749303621169916, | |
| "grad_norm": 0.547484815120697, | |
| "learning_rate": 4.849117920148561e-05, | |
| "logits/chosen": -62.386741638183594, | |
| "logits/rejected": -70.57563781738281, | |
| "logps/chosen": -181.2909698486328, | |
| "logps/rejected": -112.4539566040039, | |
| "loss": 0.0112, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.940854787826538, | |
| "rewards/margins": 7.5811638832092285, | |
| "rewards/rejected": -4.6403093338012695, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.11142061281337047, | |
| "grad_norm": 0.0395994707942009, | |
| "learning_rate": 4.825905292479109e-05, | |
| "logits/chosen": -59.95817947387695, | |
| "logits/rejected": -66.5330581665039, | |
| "logps/chosen": -188.38150024414062, | |
| "logps/rejected": -113.49845123291016, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9799869060516357, | |
| "rewards/margins": 8.083974838256836, | |
| "rewards/rejected": -5.103987693786621, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.12534818941504178, | |
| "grad_norm": 0.015164317563176155, | |
| "learning_rate": 4.8026926648096564e-05, | |
| "logits/chosen": -59.706085205078125, | |
| "logits/rejected": -65.60169219970703, | |
| "logps/chosen": -183.0960235595703, | |
| "logps/rejected": -116.09562683105469, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0736746788024902, | |
| "rewards/margins": 8.412053108215332, | |
| "rewards/rejected": -5.338377952575684, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1392757660167131, | |
| "grad_norm": 0.02766968123614788, | |
| "learning_rate": 4.7794800371402045e-05, | |
| "logits/chosen": -58.617881774902344, | |
| "logits/rejected": -67.40831756591797, | |
| "logps/chosen": -186.41848754882812, | |
| "logps/rejected": -124.04377746582031, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1033053398132324, | |
| "rewards/margins": 9.324697494506836, | |
| "rewards/rejected": -6.2213921546936035, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.1532033426183844, | |
| "grad_norm": 0.00412454828619957, | |
| "learning_rate": 4.756267409470752e-05, | |
| "logits/chosen": -60.01245880126953, | |
| "logits/rejected": -75.11415100097656, | |
| "logps/chosen": -185.37948608398438, | |
| "logps/rejected": -135.00729370117188, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.50886869430542, | |
| "rewards/margins": 9.430502891540527, | |
| "rewards/rejected": -6.921634674072266, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.1671309192200557, | |
| "grad_norm": 0.0065751285292208195, | |
| "learning_rate": 4.7330547818013e-05, | |
| "logits/chosen": -62.41272735595703, | |
| "logits/rejected": -78.67548370361328, | |
| "logps/chosen": -192.0312957763672, | |
| "logps/rejected": -128.97535705566406, | |
| "loss": 0.0237, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.159588575363159, | |
| "rewards/margins": 9.233824729919434, | |
| "rewards/rejected": -7.0742363929748535, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.181058495821727, | |
| "grad_norm": 0.021884003654122353, | |
| "learning_rate": 4.7098421541318476e-05, | |
| "logits/chosen": -58.38056564331055, | |
| "logits/rejected": -73.96531677246094, | |
| "logps/chosen": -190.47996520996094, | |
| "logps/rejected": -132.82919311523438, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.546886682510376, | |
| "rewards/margins": 9.201826095581055, | |
| "rewards/rejected": -6.654940128326416, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.19498607242339833, | |
| "grad_norm": 0.019679848104715347, | |
| "learning_rate": 4.686629526462396e-05, | |
| "logits/chosen": -59.04082489013672, | |
| "logits/rejected": -72.82725524902344, | |
| "logps/chosen": -201.3197021484375, | |
| "logps/rejected": -137.27078247070312, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7231216430664062, | |
| "rewards/margins": 9.771955490112305, | |
| "rewards/rejected": -7.048834323883057, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.20891364902506965, | |
| "grad_norm": 0.024676833301782608, | |
| "learning_rate": 4.663416898792944e-05, | |
| "logits/chosen": -57.91033172607422, | |
| "logits/rejected": -75.91812896728516, | |
| "logps/chosen": -193.44387817382812, | |
| "logps/rejected": -137.32614135742188, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.3082289695739746, | |
| "rewards/margins": 9.814374923706055, | |
| "rewards/rejected": -7.506146430969238, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22284122562674094, | |
| "grad_norm": 0.35007965564727783, | |
| "learning_rate": 4.640204271123492e-05, | |
| "logits/chosen": -59.402008056640625, | |
| "logits/rejected": -80.88484191894531, | |
| "logps/chosen": -194.41064453125, | |
| "logps/rejected": -142.43862915039062, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.072852373123169, | |
| "rewards/margins": 9.972261428833008, | |
| "rewards/rejected": -7.899407863616943, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.23676880222841226, | |
| "grad_norm": 0.16541853547096252, | |
| "learning_rate": 4.6169916434540394e-05, | |
| "logits/chosen": -57.10036087036133, | |
| "logits/rejected": -77.26717376708984, | |
| "logps/chosen": -188.2787322998047, | |
| "logps/rejected": -140.36428833007812, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.387071132659912, | |
| "rewards/margins": 10.67628002166748, | |
| "rewards/rejected": -8.289209365844727, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.25069637883008355, | |
| "grad_norm": 0.03563378006219864, | |
| "learning_rate": 4.5937790157845876e-05, | |
| "logits/chosen": -56.153770446777344, | |
| "logits/rejected": -69.28707885742188, | |
| "logps/chosen": -180.4019775390625, | |
| "logps/rejected": -142.40525817871094, | |
| "loss": 0.002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8976480960845947, | |
| "rewards/margins": 10.338732719421387, | |
| "rewards/rejected": -7.441084861755371, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.2646239554317549, | |
| "grad_norm": 0.06060578674077988, | |
| "learning_rate": 4.570566388115135e-05, | |
| "logits/chosen": -54.91765213012695, | |
| "logits/rejected": -62.35480499267578, | |
| "logps/chosen": -181.2240753173828, | |
| "logps/rejected": -123.99375915527344, | |
| "loss": 0.003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1657493114471436, | |
| "rewards/margins": 9.318578720092773, | |
| "rewards/rejected": -6.152829170227051, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2785515320334262, | |
| "grad_norm": 0.0840856060385704, | |
| "learning_rate": 4.547353760445683e-05, | |
| "logits/chosen": -59.377830505371094, | |
| "logits/rejected": -71.08708190917969, | |
| "logps/chosen": -185.47975158691406, | |
| "logps/rejected": -134.31422424316406, | |
| "loss": 0.0017, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0341360569000244, | |
| "rewards/margins": 10.54447078704834, | |
| "rewards/rejected": -7.5103349685668945, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2924791086350975, | |
| "grad_norm": 0.06361512094736099, | |
| "learning_rate": 4.5241411327762306e-05, | |
| "logits/chosen": -63.87712478637695, | |
| "logits/rejected": -73.06085205078125, | |
| "logps/chosen": -213.22708129882812, | |
| "logps/rejected": -147.33645629882812, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.229581356048584, | |
| "rewards/margins": 9.58311939239502, | |
| "rewards/rejected": -7.353537559509277, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3064066852367688, | |
| "grad_norm": 0.046209145337343216, | |
| "learning_rate": 4.500928505106779e-05, | |
| "logits/chosen": -59.61906051635742, | |
| "logits/rejected": -73.25764465332031, | |
| "logps/chosen": -192.64942932128906, | |
| "logps/rejected": -150.16732788085938, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.1536495685577393, | |
| "rewards/margins": 10.42214298248291, | |
| "rewards/rejected": -8.268494606018066, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3203342618384401, | |
| "grad_norm": 0.006761509459465742, | |
| "learning_rate": 4.477715877437326e-05, | |
| "logits/chosen": -58.5309944152832, | |
| "logits/rejected": -68.51580810546875, | |
| "logps/chosen": -193.02699279785156, | |
| "logps/rejected": -150.4070587158203, | |
| "loss": 0.0026, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.504077434539795, | |
| "rewards/margins": 10.579719543457031, | |
| "rewards/rejected": -8.075642585754395, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3342618384401114, | |
| "grad_norm": 0.007206082809716463, | |
| "learning_rate": 4.4545032497678744e-05, | |
| "logits/chosen": -56.670623779296875, | |
| "logits/rejected": -67.32600402832031, | |
| "logps/chosen": -164.60655212402344, | |
| "logps/rejected": -152.1390838623047, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.570728302001953, | |
| "rewards/margins": 11.153335571289062, | |
| "rewards/rejected": -8.582606315612793, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.34818941504178275, | |
| "grad_norm": 0.0010453248396515846, | |
| "learning_rate": 4.431290622098422e-05, | |
| "logits/chosen": -57.710182189941406, | |
| "logits/rejected": -67.71039581298828, | |
| "logps/chosen": -192.8719940185547, | |
| "logps/rejected": -145.4492950439453, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7189605236053467, | |
| "rewards/margins": 11.462281227111816, | |
| "rewards/rejected": -8.743319511413574, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.362116991643454, | |
| "grad_norm": 0.0006360734696500003, | |
| "learning_rate": 4.40807799442897e-05, | |
| "logits/chosen": -57.6457633972168, | |
| "logits/rejected": -73.46452331542969, | |
| "logps/chosen": -192.46934509277344, | |
| "logps/rejected": -158.818115234375, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.0252301692962646, | |
| "rewards/margins": 11.26839542388916, | |
| "rewards/rejected": -9.2431640625, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.37604456824512533, | |
| "grad_norm": 0.007503910455852747, | |
| "learning_rate": 4.3848653667595174e-05, | |
| "logits/chosen": -55.48795700073242, | |
| "logits/rejected": -67.17798614501953, | |
| "logps/chosen": -167.91905212402344, | |
| "logps/rejected": -146.4444122314453, | |
| "loss": 0.0206, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 3.000150203704834, | |
| "rewards/margins": 11.605803489685059, | |
| "rewards/rejected": -8.605653762817383, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.38997214484679665, | |
| "grad_norm": 0.04008936882019043, | |
| "learning_rate": 4.3616527390900656e-05, | |
| "logits/chosen": -56.12969207763672, | |
| "logits/rejected": -61.01743698120117, | |
| "logps/chosen": -187.25384521484375, | |
| "logps/rejected": -127.84864807128906, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0087008476257324, | |
| "rewards/margins": 9.433700561523438, | |
| "rewards/rejected": -6.424999237060547, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.403899721448468, | |
| "grad_norm": 0.013929404318332672, | |
| "learning_rate": 4.338440111420613e-05, | |
| "logits/chosen": -55.58070755004883, | |
| "logits/rejected": -61.17802810668945, | |
| "logps/chosen": -188.1905517578125, | |
| "logps/rejected": -136.31631469726562, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6118099689483643, | |
| "rewards/margins": 9.186898231506348, | |
| "rewards/rejected": -6.575087547302246, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4178272980501393, | |
| "grad_norm": 0.010859617963433266, | |
| "learning_rate": 4.3152274837511605e-05, | |
| "logits/chosen": -57.99853515625, | |
| "logits/rejected": -67.6804428100586, | |
| "logps/chosen": -188.915283203125, | |
| "logps/rejected": -148.61837768554688, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.471911907196045, | |
| "rewards/margins": 11.200289726257324, | |
| "rewards/rejected": -8.728376388549805, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.43175487465181056, | |
| "grad_norm": 0.006545162294059992, | |
| "learning_rate": 4.2920148560817086e-05, | |
| "logits/chosen": -58.74571990966797, | |
| "logits/rejected": -69.44742584228516, | |
| "logps/chosen": -204.63160705566406, | |
| "logps/rejected": -162.09996032714844, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.9170233011245728, | |
| "rewards/margins": 11.851190567016602, | |
| "rewards/rejected": -9.934167861938477, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.4456824512534819, | |
| "grad_norm": 0.0008512301137670875, | |
| "learning_rate": 4.268802228412256e-05, | |
| "logits/chosen": -58.301719665527344, | |
| "logits/rejected": -70.69847106933594, | |
| "logps/chosen": -183.65023803710938, | |
| "logps/rejected": -150.41163635253906, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.81954288482666, | |
| "rewards/margins": 11.889869689941406, | |
| "rewards/rejected": -9.07032585144043, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.4596100278551532, | |
| "grad_norm": 0.01963173598051071, | |
| "learning_rate": 4.245589600742804e-05, | |
| "logits/chosen": -56.879295349121094, | |
| "logits/rejected": -61.67259979248047, | |
| "logps/chosen": -171.37228393554688, | |
| "logps/rejected": -126.36521911621094, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5972225666046143, | |
| "rewards/margins": 9.433979988098145, | |
| "rewards/rejected": -5.836757659912109, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.4735376044568245, | |
| "grad_norm": 0.08404721319675446, | |
| "learning_rate": 4.222376973073352e-05, | |
| "logits/chosen": -58.5677375793457, | |
| "logits/rejected": -64.17628479003906, | |
| "logps/chosen": -175.2589569091797, | |
| "logps/rejected": -128.47628784179688, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.6719391345977783, | |
| "rewards/margins": 9.350198745727539, | |
| "rewards/rejected": -5.6782612800598145, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.48746518105849584, | |
| "grad_norm": 0.00980888307094574, | |
| "learning_rate": 4.1991643454039e-05, | |
| "logits/chosen": -59.190216064453125, | |
| "logits/rejected": -64.348388671875, | |
| "logps/chosen": -185.35513305664062, | |
| "logps/rejected": -121.76618957519531, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.289034366607666, | |
| "rewards/margins": 10.052196502685547, | |
| "rewards/rejected": -6.7631635665893555, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5013927576601671, | |
| "grad_norm": 0.0020706213545054197, | |
| "learning_rate": 4.175951717734447e-05, | |
| "logits/chosen": -58.2427864074707, | |
| "logits/rejected": -60.856529235839844, | |
| "logps/chosen": -194.89059448242188, | |
| "logps/rejected": -143.33876037597656, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.073824405670166, | |
| "rewards/margins": 10.457071304321289, | |
| "rewards/rejected": -7.383247375488281, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5153203342618384, | |
| "grad_norm": 0.058567263185977936, | |
| "learning_rate": 4.1527390900649954e-05, | |
| "logits/chosen": -57.275421142578125, | |
| "logits/rejected": -59.9005012512207, | |
| "logps/chosen": -190.70034790039062, | |
| "logps/rejected": -146.0443878173828, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.392638683319092, | |
| "rewards/margins": 11.20268440246582, | |
| "rewards/rejected": -7.810046195983887, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.5292479108635098, | |
| "grad_norm": 0.04418317973613739, | |
| "learning_rate": 4.129526462395543e-05, | |
| "logits/chosen": -55.69623565673828, | |
| "logits/rejected": -59.0958366394043, | |
| "logps/chosen": -190.6586151123047, | |
| "logps/rejected": -140.555419921875, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8878097534179688, | |
| "rewards/margins": 10.44308090209961, | |
| "rewards/rejected": -7.555271148681641, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.5431754874651811, | |
| "grad_norm": 0.03342704474925995, | |
| "learning_rate": 4.106313834726091e-05, | |
| "logits/chosen": -56.76637649536133, | |
| "logits/rejected": -57.29325485229492, | |
| "logps/chosen": -188.15127563476562, | |
| "logps/rejected": -150.76486206054688, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7517261505126953, | |
| "rewards/margins": 11.091150283813477, | |
| "rewards/rejected": -8.339423179626465, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.5571030640668524, | |
| "grad_norm": 0.0036266562528908253, | |
| "learning_rate": 4.0831012070566385e-05, | |
| "logits/chosen": -56.438453674316406, | |
| "logits/rejected": -58.3618049621582, | |
| "logps/chosen": -189.70164489746094, | |
| "logps/rejected": -153.29364013671875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.944225788116455, | |
| "rewards/margins": 11.944395065307617, | |
| "rewards/rejected": -9.000168800354004, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5710306406685237, | |
| "grad_norm": 0.2346808910369873, | |
| "learning_rate": 4.0598885793871866e-05, | |
| "logits/chosen": -55.23307418823242, | |
| "logits/rejected": -58.60841751098633, | |
| "logps/chosen": -192.96762084960938, | |
| "logps/rejected": -159.66629028320312, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9198691844940186, | |
| "rewards/margins": 12.067418098449707, | |
| "rewards/rejected": -9.14754867553711, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.584958217270195, | |
| "grad_norm": 0.011593617498874664, | |
| "learning_rate": 4.036675951717734e-05, | |
| "logits/chosen": -54.3653564453125, | |
| "logits/rejected": -54.57688522338867, | |
| "logps/chosen": -199.13137817382812, | |
| "logps/rejected": -155.0882568359375, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6887240409851074, | |
| "rewards/margins": 11.552785873413086, | |
| "rewards/rejected": -8.86406135559082, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.5988857938718662, | |
| "grad_norm": 0.0311275701969862, | |
| "learning_rate": 4.013463324048282e-05, | |
| "logits/chosen": -54.45751190185547, | |
| "logits/rejected": -55.70067596435547, | |
| "logps/chosen": -180.8164825439453, | |
| "logps/rejected": -157.07566833496094, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0822670459747314, | |
| "rewards/margins": 12.752963066101074, | |
| "rewards/rejected": -9.670696258544922, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.6128133704735376, | |
| "grad_norm": 0.0015138484304770827, | |
| "learning_rate": 3.9902506963788303e-05, | |
| "logits/chosen": -53.217491149902344, | |
| "logits/rejected": -55.91618728637695, | |
| "logps/chosen": -197.72886657714844, | |
| "logps/rejected": -155.79360961914062, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0949454307556152, | |
| "rewards/margins": 12.46452808380127, | |
| "rewards/rejected": -9.36958122253418, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.6267409470752089, | |
| "grad_norm": 0.0017882110550999641, | |
| "learning_rate": 3.9670380687093785e-05, | |
| "logits/chosen": -53.5261116027832, | |
| "logits/rejected": -53.344627380371094, | |
| "logps/chosen": -178.9260711669922, | |
| "logps/rejected": -149.0911407470703, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9560890197753906, | |
| "rewards/margins": 12.503755569458008, | |
| "rewards/rejected": -9.5476655960083, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.6406685236768802, | |
| "grad_norm": 0.0048352451995015144, | |
| "learning_rate": 3.943825441039926e-05, | |
| "logits/chosen": -51.4307746887207, | |
| "logits/rejected": -46.89828872680664, | |
| "logps/chosen": -165.9393768310547, | |
| "logps/rejected": -152.39059448242188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4784343242645264, | |
| "rewards/margins": 13.470016479492188, | |
| "rewards/rejected": -9.991582870483398, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.6545961002785515, | |
| "grad_norm": 0.00039086639299057424, | |
| "learning_rate": 3.920612813370474e-05, | |
| "logits/chosen": -52.365867614746094, | |
| "logits/rejected": -54.72825241088867, | |
| "logps/chosen": -174.74122619628906, | |
| "logps/rejected": -155.45237731933594, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.434833526611328, | |
| "rewards/margins": 12.323007583618164, | |
| "rewards/rejected": -8.888172149658203, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.6685236768802229, | |
| "grad_norm": 0.003697748063132167, | |
| "learning_rate": 3.8974001857010215e-05, | |
| "logits/chosen": -52.16254425048828, | |
| "logits/rejected": -54.24531936645508, | |
| "logps/chosen": -181.53611755371094, | |
| "logps/rejected": -157.99269104003906, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.4443488121032715, | |
| "rewards/margins": 12.453468322753906, | |
| "rewards/rejected": -9.009119033813477, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.6824512534818942, | |
| "grad_norm": 0.002408905653283, | |
| "learning_rate": 3.87418755803157e-05, | |
| "logits/chosen": -51.06204605102539, | |
| "logits/rejected": -50.75390625, | |
| "logps/chosen": -188.5814666748047, | |
| "logps/rejected": -163.3162078857422, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4482767581939697, | |
| "rewards/margins": 11.760849952697754, | |
| "rewards/rejected": -9.312572479248047, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.6963788300835655, | |
| "grad_norm": 0.00516551174223423, | |
| "learning_rate": 3.850974930362117e-05, | |
| "logits/chosen": -50.288543701171875, | |
| "logits/rejected": -50.007869720458984, | |
| "logps/chosen": -195.97866821289062, | |
| "logps/rejected": -161.41064453125, | |
| "loss": 0.0076, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.8668901920318604, | |
| "rewards/margins": 11.639043807983398, | |
| "rewards/rejected": -9.772153854370117, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7103064066852368, | |
| "grad_norm": 0.05361781641840935, | |
| "learning_rate": 3.827762302692665e-05, | |
| "logits/chosen": -51.25343704223633, | |
| "logits/rejected": -51.83671951293945, | |
| "logps/chosen": -194.06118774414062, | |
| "logps/rejected": -160.32118225097656, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.963975429534912, | |
| "rewards/margins": 11.554098129272461, | |
| "rewards/rejected": -8.590123176574707, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.724233983286908, | |
| "grad_norm": 0.058117084205150604, | |
| "learning_rate": 3.804549675023213e-05, | |
| "logits/chosen": -51.83235549926758, | |
| "logits/rejected": -52.084144592285156, | |
| "logps/chosen": -192.75059509277344, | |
| "logps/rejected": -153.1986846923828, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.5486092567443848, | |
| "rewards/margins": 11.368444442749023, | |
| "rewards/rejected": -8.81983470916748, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.7381615598885793, | |
| "grad_norm": 0.0007053585723042488, | |
| "learning_rate": 3.781337047353761e-05, | |
| "logits/chosen": -51.002742767333984, | |
| "logits/rejected": -50.87931823730469, | |
| "logps/chosen": -194.43157958984375, | |
| "logps/rejected": -167.8184356689453, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.1437911987304688, | |
| "rewards/margins": 12.299717903137207, | |
| "rewards/rejected": -10.155926704406738, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.7520891364902507, | |
| "grad_norm": 0.0001465307577745989, | |
| "learning_rate": 3.758124419684308e-05, | |
| "logits/chosen": -52.29041290283203, | |
| "logits/rejected": -55.17450714111328, | |
| "logps/chosen": -182.52536010742188, | |
| "logps/rejected": -171.62203979492188, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4032492637634277, | |
| "rewards/margins": 13.419294357299805, | |
| "rewards/rejected": -11.016047477722168, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.766016713091922, | |
| "grad_norm": 0.004270358011126518, | |
| "learning_rate": 3.7349117920148565e-05, | |
| "logits/chosen": -51.10778045654297, | |
| "logits/rejected": -51.92633819580078, | |
| "logps/chosen": -183.72921752929688, | |
| "logps/rejected": -163.09469604492188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8001818656921387, | |
| "rewards/margins": 13.584017753601074, | |
| "rewards/rejected": -10.78383731842041, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.7799442896935933, | |
| "grad_norm": 0.0015566727379336953, | |
| "learning_rate": 3.711699164345404e-05, | |
| "logits/chosen": -51.11431884765625, | |
| "logits/rejected": -55.25889205932617, | |
| "logps/chosen": -181.35389709472656, | |
| "logps/rejected": -168.32589721679688, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.060955762863159, | |
| "rewards/margins": 11.915121078491211, | |
| "rewards/rejected": -9.854166030883789, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.7938718662952646, | |
| "grad_norm": 0.0005651906249113381, | |
| "learning_rate": 3.688486536675952e-05, | |
| "logits/chosen": -52.69512176513672, | |
| "logits/rejected": -53.26996994018555, | |
| "logps/chosen": -211.1423797607422, | |
| "logps/rejected": -173.87876892089844, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.2983193397521973, | |
| "rewards/margins": 13.304783821105957, | |
| "rewards/rejected": -11.006464004516602, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.807799442896936, | |
| "grad_norm": 0.0008942225249484181, | |
| "learning_rate": 3.6652739090064995e-05, | |
| "logits/chosen": -51.08681869506836, | |
| "logits/rejected": -53.34214401245117, | |
| "logps/chosen": -198.1741943359375, | |
| "logps/rejected": -172.56463623046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.6808273792266846, | |
| "rewards/margins": 13.719111442565918, | |
| "rewards/rejected": -11.03828239440918, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.8217270194986073, | |
| "grad_norm": 0.0006534375716000795, | |
| "learning_rate": 3.642061281337048e-05, | |
| "logits/chosen": -50.69335174560547, | |
| "logits/rejected": -53.41472625732422, | |
| "logps/chosen": -187.2412109375, | |
| "logps/rejected": -174.28054809570312, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.670588254928589, | |
| "rewards/margins": 13.738470077514648, | |
| "rewards/rejected": -11.067883491516113, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.8356545961002786, | |
| "grad_norm": 0.0013752166414633393, | |
| "learning_rate": 3.618848653667595e-05, | |
| "logits/chosen": -52.0291748046875, | |
| "logits/rejected": -54.64427947998047, | |
| "logps/chosen": -201.79702758789062, | |
| "logps/rejected": -173.984130859375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.147660732269287, | |
| "rewards/margins": 13.210504531860352, | |
| "rewards/rejected": -11.062845230102539, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8495821727019499, | |
| "grad_norm": 0.006681976839900017, | |
| "learning_rate": 3.595636025998143e-05, | |
| "logits/chosen": -49.95285415649414, | |
| "logits/rejected": -50.84339141845703, | |
| "logps/chosen": -196.45458984375, | |
| "logps/rejected": -179.96713256835938, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.1962101459503174, | |
| "rewards/margins": 13.848483085632324, | |
| "rewards/rejected": -11.652273178100586, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.8635097493036211, | |
| "grad_norm": 0.0005970289348624647, | |
| "learning_rate": 3.572423398328691e-05, | |
| "logits/chosen": -50.802921295166016, | |
| "logits/rejected": -51.33774948120117, | |
| "logps/chosen": -198.7797393798828, | |
| "logps/rejected": -179.11044311523438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.640532970428467, | |
| "rewards/margins": 14.48454475402832, | |
| "rewards/rejected": -11.844011306762695, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.8774373259052924, | |
| "grad_norm": 0.0030954822432249784, | |
| "learning_rate": 3.549210770659239e-05, | |
| "logits/chosen": -51.285545349121094, | |
| "logits/rejected": -59.092987060546875, | |
| "logps/chosen": -203.8007049560547, | |
| "logps/rejected": -172.9857177734375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.8215763568878174, | |
| "rewards/margins": 12.862950325012207, | |
| "rewards/rejected": -11.041373252868652, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.8913649025069638, | |
| "grad_norm": 0.000901729566976428, | |
| "learning_rate": 3.525998142989786e-05, | |
| "logits/chosen": -50.075538635253906, | |
| "logits/rejected": -51.51918411254883, | |
| "logps/chosen": -220.5761260986328, | |
| "logps/rejected": -181.4318389892578, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.931749939918518, | |
| "rewards/margins": 13.427146911621094, | |
| "rewards/rejected": -11.495397567749023, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.9052924791086351, | |
| "grad_norm": 0.09048446267843246, | |
| "learning_rate": 3.5027855153203345e-05, | |
| "logits/chosen": -50.31797409057617, | |
| "logits/rejected": -54.93279266357422, | |
| "logps/chosen": -192.12985229492188, | |
| "logps/rejected": -168.02911376953125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.672158718109131, | |
| "rewards/margins": 12.830169677734375, | |
| "rewards/rejected": -10.158011436462402, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.9192200557103064, | |
| "grad_norm": 0.0008625476621091366, | |
| "learning_rate": 3.479572887650882e-05, | |
| "logits/chosen": -49.658103942871094, | |
| "logits/rejected": -49.33926010131836, | |
| "logps/chosen": -185.3748779296875, | |
| "logps/rejected": -168.5997772216797, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0287516117095947, | |
| "rewards/margins": 13.385884284973145, | |
| "rewards/rejected": -10.357132911682129, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.9331476323119777, | |
| "grad_norm": 0.0020681144669651985, | |
| "learning_rate": 3.45636025998143e-05, | |
| "logits/chosen": -50.34550094604492, | |
| "logits/rejected": -46.77113342285156, | |
| "logps/chosen": -203.38619995117188, | |
| "logps/rejected": -169.04295349121094, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0530314445495605, | |
| "rewards/margins": 13.665138244628906, | |
| "rewards/rejected": -10.612106323242188, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.947075208913649, | |
| "grad_norm": 0.0008213380351662636, | |
| "learning_rate": 3.4331476323119775e-05, | |
| "logits/chosen": -50.81815719604492, | |
| "logits/rejected": -48.66960906982422, | |
| "logps/chosen": -195.8907470703125, | |
| "logps/rejected": -169.99732971191406, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.5576915740966797, | |
| "rewards/margins": 13.460963249206543, | |
| "rewards/rejected": -10.903271675109863, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.9610027855153204, | |
| "grad_norm": 0.0015723519027233124, | |
| "learning_rate": 3.4099350046425257e-05, | |
| "logits/chosen": -48.638885498046875, | |
| "logits/rejected": -50.73641586303711, | |
| "logps/chosen": -176.6822967529297, | |
| "logps/rejected": -164.06044006347656, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8135972023010254, | |
| "rewards/margins": 12.596272468566895, | |
| "rewards/rejected": -9.782674789428711, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.9749303621169917, | |
| "grad_norm": 0.00701997522264719, | |
| "learning_rate": 3.386722376973073e-05, | |
| "logits/chosen": -50.4714469909668, | |
| "logits/rejected": -52.91802978515625, | |
| "logps/chosen": -184.271728515625, | |
| "logps/rejected": -162.01087951660156, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.934034824371338, | |
| "rewards/margins": 12.961067199707031, | |
| "rewards/rejected": -10.027031898498535, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9888579387186629, | |
| "grad_norm": 0.003028564853593707, | |
| "learning_rate": 3.363509749303621e-05, | |
| "logits/chosen": -49.239967346191406, | |
| "logits/rejected": -47.960716247558594, | |
| "logps/chosen": -182.0518341064453, | |
| "logps/rejected": -172.40980529785156, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.2808187007904053, | |
| "rewards/margins": 12.81373119354248, | |
| "rewards/rejected": -10.53291130065918, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.0027855153203342, | |
| "grad_norm": 0.0019847529474645853, | |
| "learning_rate": 3.3402971216341694e-05, | |
| "logits/chosen": -48.445213317871094, | |
| "logits/rejected": -48.705787658691406, | |
| "logps/chosen": -196.92776489257812, | |
| "logps/rejected": -176.7248077392578, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7315926551818848, | |
| "rewards/margins": 13.8847017288208, | |
| "rewards/rejected": -11.153108596801758, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.0167130919220055, | |
| "grad_norm": 0.00019169057486578822, | |
| "learning_rate": 3.3170844939647175e-05, | |
| "logits/chosen": -47.656700134277344, | |
| "logits/rejected": -47.6513786315918, | |
| "logps/chosen": -185.78167724609375, | |
| "logps/rejected": -178.4099578857422, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.055850028991699, | |
| "rewards/margins": 14.758455276489258, | |
| "rewards/rejected": -11.702605247497559, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.0306406685236769, | |
| "grad_norm": 0.0018290742300450802, | |
| "learning_rate": 3.293871866295265e-05, | |
| "logits/chosen": -47.585262298583984, | |
| "logits/rejected": -47.93593215942383, | |
| "logps/chosen": -189.79299926757812, | |
| "logps/rejected": -173.1365509033203, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0570778846740723, | |
| "rewards/margins": 14.200826644897461, | |
| "rewards/rejected": -11.143750190734863, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.0445682451253482, | |
| "grad_norm": 0.002084003994241357, | |
| "learning_rate": 3.270659238625813e-05, | |
| "logits/chosen": -48.902366638183594, | |
| "logits/rejected": -49.37635040283203, | |
| "logps/chosen": -190.6774444580078, | |
| "logps/rejected": -176.73709106445312, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9810030460357666, | |
| "rewards/margins": 13.916801452636719, | |
| "rewards/rejected": -10.935799598693848, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.0584958217270195, | |
| "grad_norm": 0.00043452094541862607, | |
| "learning_rate": 3.2474466109563606e-05, | |
| "logits/chosen": -47.70875549316406, | |
| "logits/rejected": -47.576881408691406, | |
| "logps/chosen": -174.31796264648438, | |
| "logps/rejected": -172.3538360595703, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.190167188644409, | |
| "rewards/margins": 14.282261848449707, | |
| "rewards/rejected": -11.092094421386719, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.0724233983286908, | |
| "grad_norm": 0.0001380470785079524, | |
| "learning_rate": 3.224233983286909e-05, | |
| "logits/chosen": -44.77172088623047, | |
| "logits/rejected": -48.8241081237793, | |
| "logps/chosen": -185.09288024902344, | |
| "logps/rejected": -175.90911865234375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2085177898406982, | |
| "rewards/margins": 14.184524536132812, | |
| "rewards/rejected": -10.976004600524902, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.0863509749303621, | |
| "grad_norm": 0.0007571239257231355, | |
| "learning_rate": 3.201021355617456e-05, | |
| "logits/chosen": -46.60997772216797, | |
| "logits/rejected": -47.1086311340332, | |
| "logps/chosen": -181.51124572753906, | |
| "logps/rejected": -176.6173095703125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.862274646759033, | |
| "rewards/margins": 14.07470989227295, | |
| "rewards/rejected": -11.212434768676758, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.1002785515320335, | |
| "grad_norm": 0.0007815372664481401, | |
| "learning_rate": 3.177808727948004e-05, | |
| "logits/chosen": -46.209800720214844, | |
| "logits/rejected": -49.99565505981445, | |
| "logps/chosen": -189.17105102539062, | |
| "logps/rejected": -175.38833618164062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.5530970096588135, | |
| "rewards/margins": 14.227849006652832, | |
| "rewards/rejected": -11.674753189086914, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.1142061281337048, | |
| "grad_norm": 4.730727232526988e-05, | |
| "learning_rate": 3.154596100278552e-05, | |
| "logits/chosen": -45.40021514892578, | |
| "logits/rejected": -46.26294708251953, | |
| "logps/chosen": -192.1022491455078, | |
| "logps/rejected": -183.72848510742188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7471506595611572, | |
| "rewards/margins": 14.762069702148438, | |
| "rewards/rejected": -12.014918327331543, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.128133704735376, | |
| "grad_norm": 0.0005188810173422098, | |
| "learning_rate": 3.1313834726091e-05, | |
| "logits/chosen": -47.36610412597656, | |
| "logits/rejected": -46.83913040161133, | |
| "logps/chosen": -181.6043701171875, | |
| "logps/rejected": -183.43173217773438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.78470778465271, | |
| "rewards/margins": 15.225115776062012, | |
| "rewards/rejected": -12.440409660339355, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.1420612813370474, | |
| "grad_norm": 0.003626539371907711, | |
| "learning_rate": 3.1081708449396474e-05, | |
| "logits/chosen": -47.204830169677734, | |
| "logits/rejected": -49.52333450317383, | |
| "logps/chosen": -180.70919799804688, | |
| "logps/rejected": -169.68026733398438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0807857513427734, | |
| "rewards/margins": 14.249502182006836, | |
| "rewards/rejected": -11.168718338012695, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.1559888579387188, | |
| "grad_norm": 0.0012059325817972422, | |
| "learning_rate": 3.0849582172701955e-05, | |
| "logits/chosen": -46.76143264770508, | |
| "logits/rejected": -48.97161102294922, | |
| "logps/chosen": -174.12811279296875, | |
| "logps/rejected": -175.88150024414062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8706345558166504, | |
| "rewards/margins": 14.090449333190918, | |
| "rewards/rejected": -11.219817161560059, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.16991643454039, | |
| "grad_norm": 0.0010171913309022784, | |
| "learning_rate": 3.061745589600743e-05, | |
| "logits/chosen": -48.58325958251953, | |
| "logits/rejected": -46.89453125, | |
| "logps/chosen": -173.7610626220703, | |
| "logps/rejected": -183.8321533203125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4698550701141357, | |
| "rewards/margins": 14.651331901550293, | |
| "rewards/rejected": -12.181478500366211, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.1838440111420612, | |
| "grad_norm": 0.0009448982309550047, | |
| "learning_rate": 3.0385329619312908e-05, | |
| "logits/chosen": -47.29355239868164, | |
| "logits/rejected": -46.92559051513672, | |
| "logps/chosen": -196.41555786132812, | |
| "logps/rejected": -175.25381469726562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1618518829345703, | |
| "rewards/margins": 14.048612594604492, | |
| "rewards/rejected": -10.886759757995605, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.1977715877437327, | |
| "grad_norm": 0.0016251832712441683, | |
| "learning_rate": 3.0153203342618386e-05, | |
| "logits/chosen": -46.79810333251953, | |
| "logits/rejected": -46.351219177246094, | |
| "logps/chosen": -192.8514404296875, | |
| "logps/rejected": -179.13809204101562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0230753421783447, | |
| "rewards/margins": 14.267328262329102, | |
| "rewards/rejected": -11.244253158569336, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.2116991643454038, | |
| "grad_norm": 0.0008583049057051539, | |
| "learning_rate": 2.9921077065923864e-05, | |
| "logits/chosen": -46.299705505371094, | |
| "logits/rejected": -52.336578369140625, | |
| "logps/chosen": -183.10208129882812, | |
| "logps/rejected": -179.01170349121094, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.5486302375793457, | |
| "rewards/margins": 13.563204765319824, | |
| "rewards/rejected": -11.014575004577637, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.2256267409470751, | |
| "grad_norm": 0.002296986524015665, | |
| "learning_rate": 2.9688950789229342e-05, | |
| "logits/chosen": -47.59013366699219, | |
| "logits/rejected": -44.00934600830078, | |
| "logps/chosen": -205.0873565673828, | |
| "logps/rejected": -178.86834716796875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8372130393981934, | |
| "rewards/margins": 13.294425964355469, | |
| "rewards/rejected": -10.457212448120117, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.2395543175487465, | |
| "grad_norm": 0.000371147325495258, | |
| "learning_rate": 2.945682451253482e-05, | |
| "logits/chosen": -42.788970947265625, | |
| "logits/rejected": -49.1223030090332, | |
| "logps/chosen": -173.95599365234375, | |
| "logps/rejected": -172.49954223632812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.9040775299072266, | |
| "rewards/margins": 15.029403686523438, | |
| "rewards/rejected": -11.125325202941895, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.2534818941504178, | |
| "grad_norm": 0.0004314547113608569, | |
| "learning_rate": 2.9224698235840298e-05, | |
| "logits/chosen": -45.99776840209961, | |
| "logits/rejected": -46.92440414428711, | |
| "logps/chosen": -194.90078735351562, | |
| "logps/rejected": -178.046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8200998306274414, | |
| "rewards/margins": 14.157859802246094, | |
| "rewards/rejected": -11.337759971618652, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.267409470752089, | |
| "grad_norm": 0.0007565075648017228, | |
| "learning_rate": 2.8992571959145776e-05, | |
| "logits/chosen": -47.412750244140625, | |
| "logits/rejected": -47.732460021972656, | |
| "logps/chosen": -182.77047729492188, | |
| "logps/rejected": -174.72396850585938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9069650173187256, | |
| "rewards/margins": 13.822225570678711, | |
| "rewards/rejected": -10.915260314941406, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.2813370473537604, | |
| "grad_norm": 0.00014751723210792989, | |
| "learning_rate": 2.8760445682451254e-05, | |
| "logits/chosen": -45.15221405029297, | |
| "logits/rejected": -45.39213180541992, | |
| "logps/chosen": -180.3527069091797, | |
| "logps/rejected": -173.14462280273438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.407719135284424, | |
| "rewards/margins": 14.697021484375, | |
| "rewards/rejected": -11.289301872253418, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.2952646239554317, | |
| "grad_norm": 0.00017833193123806268, | |
| "learning_rate": 2.852831940575673e-05, | |
| "logits/chosen": -45.37639617919922, | |
| "logits/rejected": -46.76648712158203, | |
| "logps/chosen": -184.28834533691406, | |
| "logps/rejected": -172.79425048828125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.130150556564331, | |
| "rewards/margins": 14.1089506149292, | |
| "rewards/rejected": -10.978799819946289, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.309192200557103, | |
| "grad_norm": 2.0110555851715617e-05, | |
| "learning_rate": 2.829619312906221e-05, | |
| "logits/chosen": -43.93904495239258, | |
| "logits/rejected": -43.89077377319336, | |
| "logps/chosen": -195.8922119140625, | |
| "logps/rejected": -177.62008666992188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.138190507888794, | |
| "rewards/margins": 14.122373580932617, | |
| "rewards/rejected": -10.984184265136719, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.3231197771587744, | |
| "grad_norm": 0.0016257427632808685, | |
| "learning_rate": 2.8064066852367688e-05, | |
| "logits/chosen": -44.54652404785156, | |
| "logits/rejected": -43.146263122558594, | |
| "logps/chosen": -189.8309783935547, | |
| "logps/rejected": -182.90872192382812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7086493968963623, | |
| "rewards/margins": 15.096916198730469, | |
| "rewards/rejected": -12.388265609741211, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.3370473537604457, | |
| "grad_norm": 0.00040859784348867834, | |
| "learning_rate": 2.7831940575673166e-05, | |
| "logits/chosen": -44.68381881713867, | |
| "logits/rejected": -46.662906646728516, | |
| "logps/chosen": -190.71554565429688, | |
| "logps/rejected": -173.70538330078125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.913283586502075, | |
| "rewards/margins": 13.87846565246582, | |
| "rewards/rejected": -10.965181350708008, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.350974930362117, | |
| "grad_norm": 0.0006592085701413453, | |
| "learning_rate": 2.7599814298978644e-05, | |
| "logits/chosen": -42.80352020263672, | |
| "logits/rejected": -43.12960433959961, | |
| "logps/chosen": -187.87155151367188, | |
| "logps/rejected": -174.0194549560547, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.122223138809204, | |
| "rewards/margins": 14.28703784942627, | |
| "rewards/rejected": -11.164815902709961, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.3649025069637883, | |
| "grad_norm": 0.00013744817988481373, | |
| "learning_rate": 2.736768802228412e-05, | |
| "logits/chosen": -41.9451904296875, | |
| "logits/rejected": -47.34199523925781, | |
| "logps/chosen": -172.63613891601562, | |
| "logps/rejected": -174.45248413085938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9825220108032227, | |
| "rewards/margins": 14.628707885742188, | |
| "rewards/rejected": -11.646185874938965, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.3788300835654597, | |
| "grad_norm": 0.00018170691328123212, | |
| "learning_rate": 2.71355617455896e-05, | |
| "logits/chosen": -44.152679443359375, | |
| "logits/rejected": -43.70296096801758, | |
| "logps/chosen": -189.96286010742188, | |
| "logps/rejected": -182.75204467773438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9260449409484863, | |
| "rewards/margins": 15.165403366088867, | |
| "rewards/rejected": -12.239357948303223, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.392757660167131, | |
| "grad_norm": 0.00020125451555941254, | |
| "learning_rate": 2.6903435468895084e-05, | |
| "logits/chosen": -43.45044708251953, | |
| "logits/rejected": -53.4655647277832, | |
| "logps/chosen": -191.4513397216797, | |
| "logps/rejected": -173.65176391601562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9874825477600098, | |
| "rewards/margins": 13.630624771118164, | |
| "rewards/rejected": -10.64314079284668, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4066852367688023, | |
| "grad_norm": 0.00031613241299055517, | |
| "learning_rate": 2.6671309192200562e-05, | |
| "logits/chosen": -45.708953857421875, | |
| "logits/rejected": -36.509056091308594, | |
| "logps/chosen": -175.71456909179688, | |
| "logps/rejected": -179.5768280029297, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7891390323638916, | |
| "rewards/margins": 15.765680313110352, | |
| "rewards/rejected": -12.976541519165039, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.4206128133704734, | |
| "grad_norm": 0.0009248528513126075, | |
| "learning_rate": 2.643918291550604e-05, | |
| "logits/chosen": -45.35698318481445, | |
| "logits/rejected": -44.74424362182617, | |
| "logps/chosen": -188.96438598632812, | |
| "logps/rejected": -184.33306884765625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.623995780944824, | |
| "rewards/margins": 14.462275505065918, | |
| "rewards/rejected": -11.83828067779541, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.434540389972145, | |
| "grad_norm": 0.00033521506702527404, | |
| "learning_rate": 2.620705663881152e-05, | |
| "logits/chosen": -42.90143585205078, | |
| "logits/rejected": -42.27935791015625, | |
| "logps/chosen": -187.33126831054688, | |
| "logps/rejected": -179.48333740234375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8840413093566895, | |
| "rewards/margins": 14.168217658996582, | |
| "rewards/rejected": -11.28417682647705, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.448467966573816, | |
| "grad_norm": 3.1652274628868327e-05, | |
| "learning_rate": 2.5974930362116996e-05, | |
| "logits/chosen": -42.856422424316406, | |
| "logits/rejected": -46.94526290893555, | |
| "logps/chosen": -180.2850341796875, | |
| "logps/rejected": -180.52711486816406, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3675103187561035, | |
| "rewards/margins": 14.869485855102539, | |
| "rewards/rejected": -11.501976013183594, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.4623955431754876, | |
| "grad_norm": 0.0001855907030403614, | |
| "learning_rate": 2.5742804085422474e-05, | |
| "logits/chosen": -43.780208587646484, | |
| "logits/rejected": -45.45014953613281, | |
| "logps/chosen": -189.4515838623047, | |
| "logps/rejected": -171.45359802246094, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.998511791229248, | |
| "rewards/margins": 13.935274124145508, | |
| "rewards/rejected": -10.936761856079102, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.4763231197771587, | |
| "grad_norm": 0.0009718401124700904, | |
| "learning_rate": 2.5510677808727952e-05, | |
| "logits/chosen": -42.94011688232422, | |
| "logits/rejected": -43.77968215942383, | |
| "logps/chosen": -199.50701904296875, | |
| "logps/rejected": -189.60020446777344, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7382802963256836, | |
| "rewards/margins": 15.149266242980957, | |
| "rewards/rejected": -12.410985946655273, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.49025069637883, | |
| "grad_norm": 0.0005517892423085868, | |
| "learning_rate": 2.527855153203343e-05, | |
| "logits/chosen": -41.704933166503906, | |
| "logits/rejected": -42.4830322265625, | |
| "logps/chosen": -188.4182891845703, | |
| "logps/rejected": -180.1853790283203, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.676875352859497, | |
| "rewards/margins": 15.806604385375977, | |
| "rewards/rejected": -12.129728317260742, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.5041782729805013, | |
| "grad_norm": 0.0015563963679596782, | |
| "learning_rate": 2.504642525533891e-05, | |
| "logits/chosen": -43.79706954956055, | |
| "logits/rejected": -45.16236114501953, | |
| "logps/chosen": -183.44960021972656, | |
| "logps/rejected": -181.75918579101562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9543235301971436, | |
| "rewards/margins": 14.605280876159668, | |
| "rewards/rejected": -11.650958061218262, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.5181058495821727, | |
| "grad_norm": 0.0001682774309301749, | |
| "learning_rate": 2.4814298978644386e-05, | |
| "logits/chosen": -43.524757385253906, | |
| "logits/rejected": -47.19926071166992, | |
| "logps/chosen": -179.42062377929688, | |
| "logps/rejected": -173.64984130859375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.019610643386841, | |
| "rewards/margins": 14.090364456176758, | |
| "rewards/rejected": -11.070755004882812, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.532033426183844, | |
| "grad_norm": 0.0002563179295975715, | |
| "learning_rate": 2.4582172701949864e-05, | |
| "logits/chosen": -41.658203125, | |
| "logits/rejected": -45.531280517578125, | |
| "logps/chosen": -168.8417205810547, | |
| "logps/rejected": -179.17169189453125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.318143844604492, | |
| "rewards/margins": 14.908279418945312, | |
| "rewards/rejected": -11.59013557434082, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.5459610027855153, | |
| "grad_norm": 0.0005912501364946365, | |
| "learning_rate": 2.4350046425255342e-05, | |
| "logits/chosen": -42.020301818847656, | |
| "logits/rejected": -41.976051330566406, | |
| "logps/chosen": -194.45645141601562, | |
| "logps/rejected": -180.68141174316406, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.85841703414917, | |
| "rewards/margins": 14.995101928710938, | |
| "rewards/rejected": -12.136682510375977, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.5598885793871866, | |
| "grad_norm": 0.002638684120029211, | |
| "learning_rate": 2.411792014856082e-05, | |
| "logits/chosen": -41.923728942871094, | |
| "logits/rejected": -46.06597137451172, | |
| "logps/chosen": -198.2957000732422, | |
| "logps/rejected": -185.3656768798828, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1566615104675293, | |
| "rewards/margins": 14.20202922821045, | |
| "rewards/rejected": -11.045369148254395, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.573816155988858, | |
| "grad_norm": 2.0041943571413867e-05, | |
| "learning_rate": 2.3885793871866298e-05, | |
| "logits/chosen": -44.468955993652344, | |
| "logits/rejected": -42.48888397216797, | |
| "logps/chosen": -184.76351928710938, | |
| "logps/rejected": -182.19944763183594, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2618041038513184, | |
| "rewards/margins": 15.966888427734375, | |
| "rewards/rejected": -12.705083847045898, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.5877437325905293, | |
| "grad_norm": 1.645497468416579e-05, | |
| "learning_rate": 2.3653667595171773e-05, | |
| "logits/chosen": -42.27872848510742, | |
| "logits/rejected": -43.35807418823242, | |
| "logps/chosen": -205.8543243408203, | |
| "logps/rejected": -180.86024475097656, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8403396606445312, | |
| "rewards/margins": 15.042218208312988, | |
| "rewards/rejected": -12.201878547668457, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.6016713091922006, | |
| "grad_norm": 0.0003787026507779956, | |
| "learning_rate": 2.342154131847725e-05, | |
| "logits/chosen": -42.156349182128906, | |
| "logits/rejected": -37.70032501220703, | |
| "logps/chosen": -194.82247924804688, | |
| "logps/rejected": -176.8505096435547, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.513833999633789, | |
| "rewards/margins": 14.492878913879395, | |
| "rewards/rejected": -10.979044914245605, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.615598885793872, | |
| "grad_norm": 7.55380024202168e-05, | |
| "learning_rate": 2.318941504178273e-05, | |
| "logits/chosen": -43.539344787597656, | |
| "logits/rejected": -41.69083023071289, | |
| "logps/chosen": -187.00527954101562, | |
| "logps/rejected": -177.04592895507812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1924500465393066, | |
| "rewards/margins": 14.285606384277344, | |
| "rewards/rejected": -11.093156814575195, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.6295264623955432, | |
| "grad_norm": 0.0002465043216943741, | |
| "learning_rate": 2.2957288765088207e-05, | |
| "logits/chosen": -42.10594177246094, | |
| "logits/rejected": -41.667667388916016, | |
| "logps/chosen": -190.01614379882812, | |
| "logps/rejected": -181.31919860839844, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0133557319641113, | |
| "rewards/margins": 14.723991394042969, | |
| "rewards/rejected": -11.710634231567383, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.6434540389972145, | |
| "grad_norm": 3.898440991179086e-05, | |
| "learning_rate": 2.2725162488393685e-05, | |
| "logits/chosen": -42.041419982910156, | |
| "logits/rejected": -48.19127655029297, | |
| "logps/chosen": -188.48135375976562, | |
| "logps/rejected": -179.7986602783203, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3684513568878174, | |
| "rewards/margins": 15.300027847290039, | |
| "rewards/rejected": -11.931575775146484, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.6573816155988856, | |
| "grad_norm": 0.00035718828439712524, | |
| "learning_rate": 2.2493036211699163e-05, | |
| "logits/chosen": -42.593849182128906, | |
| "logits/rejected": -49.53551483154297, | |
| "logps/chosen": -189.70465087890625, | |
| "logps/rejected": -187.06527709960938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.751575469970703, | |
| "rewards/margins": 14.097253799438477, | |
| "rewards/rejected": -11.345675468444824, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.6713091922005572, | |
| "grad_norm": 0.00047763698967173696, | |
| "learning_rate": 2.226090993500464e-05, | |
| "logits/chosen": -42.90016555786133, | |
| "logits/rejected": -48.32063293457031, | |
| "logps/chosen": -173.6857452392578, | |
| "logps/rejected": -173.83023071289062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3109641075134277, | |
| "rewards/margins": 13.790491104125977, | |
| "rewards/rejected": -10.47952651977539, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6852367688022283, | |
| "grad_norm": 0.000792037055362016, | |
| "learning_rate": 2.2028783658310122e-05, | |
| "logits/chosen": -39.64678192138672, | |
| "logits/rejected": -48.17890930175781, | |
| "logps/chosen": -192.51730346679688, | |
| "logps/rejected": -169.37339782714844, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.397125244140625, | |
| "rewards/margins": 13.744363784790039, | |
| "rewards/rejected": -10.347238540649414, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.6991643454038998, | |
| "grad_norm": 0.0005765881505794823, | |
| "learning_rate": 2.17966573816156e-05, | |
| "logits/chosen": -38.50248336791992, | |
| "logits/rejected": -39.00818634033203, | |
| "logps/chosen": -195.1656494140625, | |
| "logps/rejected": -186.5626678466797, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.825869083404541, | |
| "rewards/margins": 14.93237590789795, | |
| "rewards/rejected": -12.10650634765625, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.713091922005571, | |
| "grad_norm": 0.0004956713528372347, | |
| "learning_rate": 2.1564531104921078e-05, | |
| "logits/chosen": -42.719886779785156, | |
| "logits/rejected": -43.95571517944336, | |
| "logps/chosen": -181.20785522460938, | |
| "logps/rejected": -173.41934204101562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.436485767364502, | |
| "rewards/margins": 14.404312133789062, | |
| "rewards/rejected": -10.967824935913086, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.7270194986072425, | |
| "grad_norm": 0.004964966792613268, | |
| "learning_rate": 2.1332404828226556e-05, | |
| "logits/chosen": -40.15790939331055, | |
| "logits/rejected": -46.36140441894531, | |
| "logps/chosen": -194.02664184570312, | |
| "logps/rejected": -174.8671112060547, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7149300575256348, | |
| "rewards/margins": 13.3515043258667, | |
| "rewards/rejected": -10.636574745178223, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.7409470752089136, | |
| "grad_norm": 0.0004779207520186901, | |
| "learning_rate": 2.1100278551532034e-05, | |
| "logits/chosen": -38.4859733581543, | |
| "logits/rejected": -43.74262237548828, | |
| "logps/chosen": -169.06556701660156, | |
| "logps/rejected": -180.42384338378906, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.352203369140625, | |
| "rewards/margins": 14.585695266723633, | |
| "rewards/rejected": -11.233491897583008, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.754874651810585, | |
| "grad_norm": 0.0005939751281403005, | |
| "learning_rate": 2.0868152274837512e-05, | |
| "logits/chosen": -40.667049407958984, | |
| "logits/rejected": -40.19790267944336, | |
| "logps/chosen": -196.654052734375, | |
| "logps/rejected": -183.01376342773438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.437725305557251, | |
| "rewards/margins": 13.927282333374023, | |
| "rewards/rejected": -11.489557266235352, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.7688022284122562, | |
| "grad_norm": 0.0006439912249334157, | |
| "learning_rate": 2.063602599814299e-05, | |
| "logits/chosen": -41.551029205322266, | |
| "logits/rejected": -46.43274688720703, | |
| "logps/chosen": -192.17355346679688, | |
| "logps/rejected": -172.40625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9757094383239746, | |
| "rewards/margins": 13.070402145385742, | |
| "rewards/rejected": -10.09469223022461, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.7827298050139275, | |
| "grad_norm": 0.001160840387456119, | |
| "learning_rate": 2.0403899721448468e-05, | |
| "logits/chosen": -42.423248291015625, | |
| "logits/rejected": -43.39242172241211, | |
| "logps/chosen": -188.78314208984375, | |
| "logps/rejected": -178.84017944335938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.4687724113464355, | |
| "rewards/margins": 13.714757919311523, | |
| "rewards/rejected": -11.245985984802246, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.7966573816155988, | |
| "grad_norm": 0.0008970465278252959, | |
| "learning_rate": 2.0171773444753946e-05, | |
| "logits/chosen": -39.771629333496094, | |
| "logits/rejected": -45.55982208251953, | |
| "logps/chosen": -175.4274444580078, | |
| "logps/rejected": -178.88775634765625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9034018516540527, | |
| "rewards/margins": 14.535983085632324, | |
| "rewards/rejected": -11.632580757141113, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.8105849582172702, | |
| "grad_norm": 0.0006431234069168568, | |
| "learning_rate": 1.9939647168059424e-05, | |
| "logits/chosen": -39.90253448486328, | |
| "logits/rejected": -44.82994842529297, | |
| "logps/chosen": -189.65737915039062, | |
| "logps/rejected": -178.84754943847656, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.19281005859375, | |
| "rewards/margins": 14.324234008789062, | |
| "rewards/rejected": -11.131423950195312, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8245125348189415, | |
| "grad_norm": 0.0008485277649015188, | |
| "learning_rate": 1.9707520891364902e-05, | |
| "logits/chosen": -40.094932556152344, | |
| "logits/rejected": -45.365562438964844, | |
| "logps/chosen": -182.76187133789062, | |
| "logps/rejected": -174.17022705078125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.948390245437622, | |
| "rewards/margins": 13.181282043457031, | |
| "rewards/rejected": -10.232892990112305, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.8384401114206128, | |
| "grad_norm": 0.0004803981864824891, | |
| "learning_rate": 1.947539461467038e-05, | |
| "logits/chosen": -38.52114486694336, | |
| "logits/rejected": -37.8065299987793, | |
| "logps/chosen": -188.72463989257812, | |
| "logps/rejected": -189.0813446044922, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8810222148895264, | |
| "rewards/margins": 15.528773307800293, | |
| "rewards/rejected": -12.647750854492188, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.8523676880222841, | |
| "grad_norm": 0.0011859548976644874, | |
| "learning_rate": 1.9243268337975858e-05, | |
| "logits/chosen": -39.07917785644531, | |
| "logits/rejected": -42.37941360473633, | |
| "logps/chosen": -177.0192108154297, | |
| "logps/rejected": -176.3035125732422, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3045661449432373, | |
| "rewards/margins": 14.152140617370605, | |
| "rewards/rejected": -10.847575187683105, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.8662952646239555, | |
| "grad_norm": 0.00019842004985548556, | |
| "learning_rate": 1.9011142061281336e-05, | |
| "logits/chosen": -38.62647247314453, | |
| "logits/rejected": -41.89341735839844, | |
| "logps/chosen": -195.18069458007812, | |
| "logps/rejected": -179.20639038085938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8785204887390137, | |
| "rewards/margins": 13.824996948242188, | |
| "rewards/rejected": -10.946475982666016, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.8802228412256268, | |
| "grad_norm": 2.9733255360042676e-05, | |
| "learning_rate": 1.8779015784586814e-05, | |
| "logits/chosen": -39.85136795043945, | |
| "logits/rejected": -37.52424240112305, | |
| "logps/chosen": -168.53857421875, | |
| "logps/rejected": -179.1663360595703, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0412697792053223, | |
| "rewards/margins": 15.687167167663574, | |
| "rewards/rejected": -12.645896911621094, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.894150417827298, | |
| "grad_norm": 0.0005429552984423935, | |
| "learning_rate": 1.8546889507892295e-05, | |
| "logits/chosen": -38.35095977783203, | |
| "logits/rejected": -37.555809020996094, | |
| "logps/chosen": -191.65780639648438, | |
| "logps/rejected": -187.13223266601562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.386960983276367, | |
| "rewards/margins": 15.752670288085938, | |
| "rewards/rejected": -12.365708351135254, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.9080779944289694, | |
| "grad_norm": 0.0004797822330147028, | |
| "learning_rate": 1.8314763231197773e-05, | |
| "logits/chosen": -40.002098083496094, | |
| "logits/rejected": -37.707252502441406, | |
| "logps/chosen": -198.4432373046875, | |
| "logps/rejected": -179.8600616455078, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.40791654586792, | |
| "rewards/margins": 14.185983657836914, | |
| "rewards/rejected": -11.778066635131836, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.9220055710306405, | |
| "grad_norm": 0.0002558935957495123, | |
| "learning_rate": 1.808263695450325e-05, | |
| "logits/chosen": -40.22335433959961, | |
| "logits/rejected": -39.680335998535156, | |
| "logps/chosen": -178.06088256835938, | |
| "logps/rejected": -174.2890167236328, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.06481671333313, | |
| "rewards/margins": 14.587986946105957, | |
| "rewards/rejected": -11.523172378540039, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.935933147632312, | |
| "grad_norm": 0.022678282111883163, | |
| "learning_rate": 1.785051067780873e-05, | |
| "logits/chosen": -37.662994384765625, | |
| "logits/rejected": -42.678794860839844, | |
| "logps/chosen": -174.59817504882812, | |
| "logps/rejected": -172.0020751953125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9920780658721924, | |
| "rewards/margins": 13.535223007202148, | |
| "rewards/rejected": -10.543145179748535, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.9498607242339832, | |
| "grad_norm": 0.002099443692713976, | |
| "learning_rate": 1.7618384401114207e-05, | |
| "logits/chosen": -40.57250213623047, | |
| "logits/rejected": -43.25788497924805, | |
| "logps/chosen": -188.49884033203125, | |
| "logps/rejected": -175.46717834472656, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0427377223968506, | |
| "rewards/margins": 14.629735946655273, | |
| "rewards/rejected": -11.586997985839844, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.9637883008356547, | |
| "grad_norm": 0.0002445300342515111, | |
| "learning_rate": 1.7386258124419685e-05, | |
| "logits/chosen": -38.27973556518555, | |
| "logits/rejected": -41.077720642089844, | |
| "logps/chosen": -189.12991333007812, | |
| "logps/rejected": -183.447998046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.450929641723633, | |
| "rewards/margins": 15.17542552947998, | |
| "rewards/rejected": -11.72449779510498, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.9777158774373258, | |
| "grad_norm": 4.384323619888164e-05, | |
| "learning_rate": 1.7154131847725163e-05, | |
| "logits/chosen": -37.973472595214844, | |
| "logits/rejected": -41.2947998046875, | |
| "logps/chosen": -170.81488037109375, | |
| "logps/rejected": -187.4012908935547, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1459457874298096, | |
| "rewards/margins": 15.257524490356445, | |
| "rewards/rejected": -12.111578941345215, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.9916434540389973, | |
| "grad_norm": 0.0007763483445160091, | |
| "learning_rate": 1.692200557103064e-05, | |
| "logits/chosen": -37.70799255371094, | |
| "logits/rejected": -39.826412200927734, | |
| "logps/chosen": -190.74960327148438, | |
| "logps/rejected": -175.0165252685547, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.826359987258911, | |
| "rewards/margins": 14.098872184753418, | |
| "rewards/rejected": -11.27251148223877, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.0055710306406684, | |
| "grad_norm": 6.922234024386853e-05, | |
| "learning_rate": 1.668987929433612e-05, | |
| "logits/chosen": -36.95861053466797, | |
| "logits/rejected": -40.93572998046875, | |
| "logps/chosen": -195.0906524658203, | |
| "logps/rejected": -184.3977508544922, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.468242883682251, | |
| "rewards/margins": 14.012043952941895, | |
| "rewards/rejected": -11.543802261352539, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.01949860724234, | |
| "grad_norm": 0.0008899418171495199, | |
| "learning_rate": 1.6457753017641597e-05, | |
| "logits/chosen": -37.98674774169922, | |
| "logits/rejected": -37.987403869628906, | |
| "logps/chosen": -181.60769653320312, | |
| "logps/rejected": -180.72677612304688, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1309444904327393, | |
| "rewards/margins": 15.349719047546387, | |
| "rewards/rejected": -12.218774795532227, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.033426183844011, | |
| "grad_norm": 0.0009419086272828281, | |
| "learning_rate": 1.6225626740947075e-05, | |
| "logits/chosen": -37.715850830078125, | |
| "logits/rejected": -41.038299560546875, | |
| "logps/chosen": -188.2119598388672, | |
| "logps/rejected": -174.30699157714844, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8673110008239746, | |
| "rewards/margins": 14.030998229980469, | |
| "rewards/rejected": -11.163687705993652, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.0473537604456826, | |
| "grad_norm": 0.00017140038835350424, | |
| "learning_rate": 1.5993500464252553e-05, | |
| "logits/chosen": -36.50278091430664, | |
| "logits/rejected": -40.35831832885742, | |
| "logps/chosen": -189.39010620117188, | |
| "logps/rejected": -179.73275756835938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1329684257507324, | |
| "rewards/margins": 14.958372116088867, | |
| "rewards/rejected": -11.825403213500977, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.0612813370473537, | |
| "grad_norm": 2.8825706976931542e-05, | |
| "learning_rate": 1.576137418755803e-05, | |
| "logits/chosen": -37.138877868652344, | |
| "logits/rejected": -35.2958869934082, | |
| "logps/chosen": -196.31619262695312, | |
| "logps/rejected": -182.4630584716797, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.744528293609619, | |
| "rewards/margins": 14.846966743469238, | |
| "rewards/rejected": -12.102435111999512, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.0752089136490253, | |
| "grad_norm": 0.00021149200620129704, | |
| "learning_rate": 1.552924791086351e-05, | |
| "logits/chosen": -39.6096305847168, | |
| "logits/rejected": -46.948265075683594, | |
| "logps/chosen": -183.4660186767578, | |
| "logps/rejected": -176.20413208007812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1110737323760986, | |
| "rewards/margins": 14.174654006958008, | |
| "rewards/rejected": -11.063581466674805, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.0891364902506964, | |
| "grad_norm": 0.0004168855957686901, | |
| "learning_rate": 1.529712163416899e-05, | |
| "logits/chosen": -33.97929763793945, | |
| "logits/rejected": -37.035865783691406, | |
| "logps/chosen": -179.11045837402344, | |
| "logps/rejected": -172.54037475585938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1583425998687744, | |
| "rewards/margins": 14.817370414733887, | |
| "rewards/rejected": -11.659029006958008, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.103064066852368, | |
| "grad_norm": 4.1843464714474976e-05, | |
| "learning_rate": 1.5064995357474469e-05, | |
| "logits/chosen": -36.117828369140625, | |
| "logits/rejected": -32.342254638671875, | |
| "logps/chosen": -199.46429443359375, | |
| "logps/rejected": -186.94625854492188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9351420402526855, | |
| "rewards/margins": 14.709848403930664, | |
| "rewards/rejected": -11.774707794189453, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.116991643454039, | |
| "grad_norm": 0.0004989901790395379, | |
| "learning_rate": 1.4832869080779947e-05, | |
| "logits/chosen": -35.539146423339844, | |
| "logits/rejected": -33.397926330566406, | |
| "logps/chosen": -190.5336456298828, | |
| "logps/rejected": -182.02114868164062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.323108196258545, | |
| "rewards/margins": 15.390007019042969, | |
| "rewards/rejected": -12.066898345947266, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.13091922005571, | |
| "grad_norm": 0.0010163492988795042, | |
| "learning_rate": 1.4600742804085425e-05, | |
| "logits/chosen": -36.64421463012695, | |
| "logits/rejected": -37.93674087524414, | |
| "logps/chosen": -198.0296630859375, | |
| "logps/rejected": -179.7415008544922, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.915405750274658, | |
| "rewards/margins": 14.40515422821045, | |
| "rewards/rejected": -11.489748001098633, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.1448467966573816, | |
| "grad_norm": 0.00033078828710131347, | |
| "learning_rate": 1.4368616527390903e-05, | |
| "logits/chosen": -37.70105743408203, | |
| "logits/rejected": -36.71582794189453, | |
| "logps/chosen": -199.81381225585938, | |
| "logps/rejected": -185.13119506835938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8086984157562256, | |
| "rewards/margins": 15.142376899719238, | |
| "rewards/rejected": -12.33367919921875, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.1587743732590527, | |
| "grad_norm": 0.0038093943148851395, | |
| "learning_rate": 1.413649025069638e-05, | |
| "logits/chosen": -36.39249038696289, | |
| "logits/rejected": -38.92975616455078, | |
| "logps/chosen": -196.83294677734375, | |
| "logps/rejected": -175.97525024414062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1542813777923584, | |
| "rewards/margins": 14.027664184570312, | |
| "rewards/rejected": -10.873384475708008, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.1727019498607243, | |
| "grad_norm": 0.00011094040382886305, | |
| "learning_rate": 1.3904363974001859e-05, | |
| "logits/chosen": -36.158992767333984, | |
| "logits/rejected": -44.41789627075195, | |
| "logps/chosen": -187.1454620361328, | |
| "logps/rejected": -183.45184326171875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.974985361099243, | |
| "rewards/margins": 14.125747680664062, | |
| "rewards/rejected": -11.150762557983398, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.1866295264623954, | |
| "grad_norm": 0.003225723747164011, | |
| "learning_rate": 1.3672237697307335e-05, | |
| "logits/chosen": -35.47116470336914, | |
| "logits/rejected": -43.47711944580078, | |
| "logps/chosen": -191.4207305908203, | |
| "logps/rejected": -183.61276245117188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8648734092712402, | |
| "rewards/margins": 14.461938858032227, | |
| "rewards/rejected": -11.597065925598145, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.200557103064067, | |
| "grad_norm": 0.00013535360631067306, | |
| "learning_rate": 1.3440111420612813e-05, | |
| "logits/chosen": -35.062652587890625, | |
| "logits/rejected": -37.315582275390625, | |
| "logps/chosen": -180.5184783935547, | |
| "logps/rejected": -180.47647094726562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1524276733398438, | |
| "rewards/margins": 14.701214790344238, | |
| "rewards/rejected": -11.548785209655762, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.214484679665738, | |
| "grad_norm": 5.084893018647563e-06, | |
| "learning_rate": 1.3207985143918291e-05, | |
| "logits/chosen": -35.936790466308594, | |
| "logits/rejected": -36.74972152709961, | |
| "logps/chosen": -181.1525421142578, | |
| "logps/rejected": -181.84603881835938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.186203718185425, | |
| "rewards/margins": 15.35499095916748, | |
| "rewards/rejected": -12.168787002563477, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.2284122562674096, | |
| "grad_norm": 0.0004160265962127596, | |
| "learning_rate": 1.2975858867223769e-05, | |
| "logits/chosen": -37.140037536621094, | |
| "logits/rejected": -42.357078552246094, | |
| "logps/chosen": -187.4177703857422, | |
| "logps/rejected": -178.51724243164062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9173107147216797, | |
| "rewards/margins": 13.878204345703125, | |
| "rewards/rejected": -10.960893630981445, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.2423398328690807, | |
| "grad_norm": 0.00011604127212194726, | |
| "learning_rate": 1.2743732590529247e-05, | |
| "logits/chosen": -34.818607330322266, | |
| "logits/rejected": -37.724525451660156, | |
| "logps/chosen": -183.49917602539062, | |
| "logps/rejected": -178.86361694335938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8125436305999756, | |
| "rewards/margins": 14.389010429382324, | |
| "rewards/rejected": -11.576468467712402, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.256267409470752, | |
| "grad_norm": 0.0017907076980918646, | |
| "learning_rate": 1.2511606313834725e-05, | |
| "logits/chosen": -37.98354721069336, | |
| "logits/rejected": -49.680633544921875, | |
| "logps/chosen": -182.12060546875, | |
| "logps/rejected": -176.7200927734375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.886204481124878, | |
| "rewards/margins": 13.741534233093262, | |
| "rewards/rejected": -10.855329513549805, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.2701949860724233, | |
| "grad_norm": 0.0012422021245583892, | |
| "learning_rate": 1.2279480037140205e-05, | |
| "logits/chosen": -36.824703216552734, | |
| "logits/rejected": -36.0146369934082, | |
| "logps/chosen": -207.47109985351562, | |
| "logps/rejected": -192.5692596435547, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.312756061553955, | |
| "rewards/margins": 14.709668159484863, | |
| "rewards/rejected": -12.396913528442383, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.284122562674095, | |
| "grad_norm": 0.00051548529881984, | |
| "learning_rate": 1.2047353760445683e-05, | |
| "logits/chosen": -37.36172103881836, | |
| "logits/rejected": -35.01740264892578, | |
| "logps/chosen": -201.63711547851562, | |
| "logps/rejected": -179.27565002441406, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.687352180480957, | |
| "rewards/margins": 14.42353630065918, | |
| "rewards/rejected": -11.736186027526855, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.298050139275766, | |
| "grad_norm": 0.00029921080567874014, | |
| "learning_rate": 1.181522748375116e-05, | |
| "logits/chosen": -38.72724533081055, | |
| "logits/rejected": -41.40291213989258, | |
| "logps/chosen": -176.58592224121094, | |
| "logps/rejected": -186.08123779296875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.005880832672119, | |
| "rewards/margins": 14.604331970214844, | |
| "rewards/rejected": -11.598451614379883, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.3119777158774375, | |
| "grad_norm": 0.00012926581257488579, | |
| "learning_rate": 1.1583101207056638e-05, | |
| "logits/chosen": -35.43940734863281, | |
| "logits/rejected": -36.595970153808594, | |
| "logps/chosen": -187.47674560546875, | |
| "logps/rejected": -182.973876953125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.200408458709717, | |
| "rewards/margins": 14.481452941894531, | |
| "rewards/rejected": -11.281042098999023, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.3259052924791086, | |
| "grad_norm": 0.0009956905851140618, | |
| "learning_rate": 1.1350974930362116e-05, | |
| "logits/chosen": -33.24986267089844, | |
| "logits/rejected": -35.32088088989258, | |
| "logps/chosen": -190.22396850585938, | |
| "logps/rejected": -186.2030029296875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.875269889831543, | |
| "rewards/margins": 15.12162971496582, | |
| "rewards/rejected": -12.246358871459961, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.33983286908078, | |
| "grad_norm": 0.00024119042791426182, | |
| "learning_rate": 1.1118848653667596e-05, | |
| "logits/chosen": -35.24303436279297, | |
| "logits/rejected": -37.33085632324219, | |
| "logps/chosen": -184.21661376953125, | |
| "logps/rejected": -183.30850219726562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1757493019104004, | |
| "rewards/margins": 15.013044357299805, | |
| "rewards/rejected": -11.837295532226562, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.3537604456824512, | |
| "grad_norm": 0.0016317879781126976, | |
| "learning_rate": 1.0886722376973074e-05, | |
| "logits/chosen": -36.30957794189453, | |
| "logits/rejected": -43.52277755737305, | |
| "logps/chosen": -192.4365234375, | |
| "logps/rejected": -186.6822052001953, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.168163776397705, | |
| "rewards/margins": 14.91504955291748, | |
| "rewards/rejected": -11.746885299682617, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.3676880222841223, | |
| "grad_norm": 0.00019028224050998688, | |
| "learning_rate": 1.0654596100278552e-05, | |
| "logits/chosen": -35.48088073730469, | |
| "logits/rejected": -40.22465133666992, | |
| "logps/chosen": -178.8065948486328, | |
| "logps/rejected": -177.88638305664062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1263413429260254, | |
| "rewards/margins": 14.86894416809082, | |
| "rewards/rejected": -11.742603302001953, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.381615598885794, | |
| "grad_norm": 0.00013990727893542498, | |
| "learning_rate": 1.042246982358403e-05, | |
| "logits/chosen": -35.26982879638672, | |
| "logits/rejected": -40.66838836669922, | |
| "logps/chosen": -181.98269653320312, | |
| "logps/rejected": -180.34336853027344, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.132636547088623, | |
| "rewards/margins": 15.100153923034668, | |
| "rewards/rejected": -11.96751594543457, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.3955431754874654, | |
| "grad_norm": 0.00010347921488573775, | |
| "learning_rate": 1.0190343546889508e-05, | |
| "logits/chosen": -35.530635833740234, | |
| "logits/rejected": -37.326942443847656, | |
| "logps/chosen": -174.54537963867188, | |
| "logps/rejected": -173.74192810058594, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0651495456695557, | |
| "rewards/margins": 14.588651657104492, | |
| "rewards/rejected": -11.5235013961792, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.4094707520891365, | |
| "grad_norm": 0.0007370264502242208, | |
| "learning_rate": 9.958217270194986e-06, | |
| "logits/chosen": -33.94354248046875, | |
| "logits/rejected": -37.71461486816406, | |
| "logps/chosen": -182.42373657226562, | |
| "logps/rejected": -184.89016723632812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.007021188735962, | |
| "rewards/margins": 14.52685546875, | |
| "rewards/rejected": -11.519835472106934, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.4233983286908076, | |
| "grad_norm": 0.0005266707739792764, | |
| "learning_rate": 9.726090993500464e-06, | |
| "logits/chosen": -35.36183547973633, | |
| "logits/rejected": -42.900447845458984, | |
| "logps/chosen": -197.20449829101562, | |
| "logps/rejected": -183.08486938476562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7276971340179443, | |
| "rewards/margins": 14.261263847351074, | |
| "rewards/rejected": -11.53356647491455, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.437325905292479, | |
| "grad_norm": 0.0010256224777549505, | |
| "learning_rate": 9.493964716805944e-06, | |
| "logits/chosen": -32.54072570800781, | |
| "logits/rejected": -40.63361358642578, | |
| "logps/chosen": -182.46578979492188, | |
| "logps/rejected": -180.01171875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2156386375427246, | |
| "rewards/margins": 14.604194641113281, | |
| "rewards/rejected": -11.388555526733398, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.4512534818941503, | |
| "grad_norm": 0.0003196709440089762, | |
| "learning_rate": 9.261838440111422e-06, | |
| "logits/chosen": -36.42176818847656, | |
| "logits/rejected": -40.110435485839844, | |
| "logps/chosen": -172.70303344726562, | |
| "logps/rejected": -183.40029907226562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9896297454833984, | |
| "rewards/margins": 14.956689834594727, | |
| "rewards/rejected": -11.967057228088379, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.465181058495822, | |
| "grad_norm": 0.0006114134448580444, | |
| "learning_rate": 9.0297121634169e-06, | |
| "logits/chosen": -34.053199768066406, | |
| "logits/rejected": -40.59447479248047, | |
| "logps/chosen": -187.02113342285156, | |
| "logps/rejected": -177.52719116210938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.881593942642212, | |
| "rewards/margins": 14.561999320983887, | |
| "rewards/rejected": -11.680405616760254, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.479108635097493, | |
| "grad_norm": 0.006414721254259348, | |
| "learning_rate": 8.797585886722378e-06, | |
| "logits/chosen": -35.063026428222656, | |
| "logits/rejected": -40.642601013183594, | |
| "logps/chosen": -180.2222442626953, | |
| "logps/rejected": -177.395751953125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.213207721710205, | |
| "rewards/margins": 14.45429801940918, | |
| "rewards/rejected": -11.241090774536133, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.4930362116991645, | |
| "grad_norm": 0.00032282338361255825, | |
| "learning_rate": 8.565459610027856e-06, | |
| "logits/chosen": -35.19350051879883, | |
| "logits/rejected": -44.75577926635742, | |
| "logps/chosen": -182.2408447265625, | |
| "logps/rejected": -174.94203186035156, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2195639610290527, | |
| "rewards/margins": 14.126502990722656, | |
| "rewards/rejected": -10.906938552856445, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.5069637883008355, | |
| "grad_norm": 0.00048296854947693646, | |
| "learning_rate": 8.333333333333334e-06, | |
| "logits/chosen": -37.459720611572266, | |
| "logits/rejected": -34.258609771728516, | |
| "logps/chosen": -200.30984497070312, | |
| "logps/rejected": -186.15658569335938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.408186912536621, | |
| "rewards/margins": 14.674453735351562, | |
| "rewards/rejected": -12.266264915466309, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.520891364902507, | |
| "grad_norm": 0.000831536075565964, | |
| "learning_rate": 8.101207056638812e-06, | |
| "logits/chosen": -34.92743682861328, | |
| "logits/rejected": -37.31873321533203, | |
| "logps/chosen": -205.9928741455078, | |
| "logps/rejected": -176.43490600585938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.477787971496582, | |
| "rewards/margins": 14.188326835632324, | |
| "rewards/rejected": -11.710537910461426, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.534818941504178, | |
| "grad_norm": 4.310339136281982e-05, | |
| "learning_rate": 7.869080779944291e-06, | |
| "logits/chosen": -33.059104919433594, | |
| "logits/rejected": -38.825035095214844, | |
| "logps/chosen": -192.43711853027344, | |
| "logps/rejected": -172.77899169921875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.044785261154175, | |
| "rewards/margins": 14.586946487426758, | |
| "rewards/rejected": -11.54216194152832, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.5487465181058497, | |
| "grad_norm": 0.0003261861565988511, | |
| "learning_rate": 7.63695450324977e-06, | |
| "logits/chosen": -32.958534240722656, | |
| "logits/rejected": -36.15265655517578, | |
| "logps/chosen": -184.374755859375, | |
| "logps/rejected": -186.88174438476562, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.408311367034912, | |
| "rewards/margins": 14.616594314575195, | |
| "rewards/rejected": -12.208281517028809, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.562674094707521, | |
| "grad_norm": 0.0008768205298110843, | |
| "learning_rate": 7.4048282265552465e-06, | |
| "logits/chosen": -34.02647018432617, | |
| "logits/rejected": -40.55278015136719, | |
| "logps/chosen": -180.45309448242188, | |
| "logps/rejected": -187.4500274658203, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.939589262008667, | |
| "rewards/margins": 15.011263847351074, | |
| "rewards/rejected": -12.071674346923828, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.5766016713091924, | |
| "grad_norm": 0.00012635016173589975, | |
| "learning_rate": 7.1727019498607245e-06, | |
| "logits/chosen": -35.22064971923828, | |
| "logits/rejected": -39.80550003051758, | |
| "logps/chosen": -185.4181365966797, | |
| "logps/rejected": -171.8718719482422, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2081618309020996, | |
| "rewards/margins": 14.463720321655273, | |
| "rewards/rejected": -11.255556106567383, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.5905292479108635, | |
| "grad_norm": 0.0006862750160507858, | |
| "learning_rate": 6.9405756731662025e-06, | |
| "logits/chosen": -32.005226135253906, | |
| "logits/rejected": -36.187660217285156, | |
| "logps/chosen": -180.09140014648438, | |
| "logps/rejected": -182.02792358398438, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8680760860443115, | |
| "rewards/margins": 14.800928115844727, | |
| "rewards/rejected": -11.932851791381836, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 2.6044568245125346, | |
| "grad_norm": 4.4549200538313016e-05, | |
| "learning_rate": 6.7084493964716805e-06, | |
| "logits/chosen": -35.79204559326172, | |
| "logits/rejected": -38.733375549316406, | |
| "logps/chosen": -189.5426483154297, | |
| "logps/rejected": -189.75588989257812, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.612856388092041, | |
| "rewards/margins": 14.540079116821289, | |
| "rewards/rejected": -11.927224159240723, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 2.618384401114206, | |
| "grad_norm": 2.0292129192966968e-05, | |
| "learning_rate": 6.4763231197771585e-06, | |
| "logits/chosen": -34.89278030395508, | |
| "logits/rejected": -33.01803970336914, | |
| "logps/chosen": -180.7728729248047, | |
| "logps/rejected": -184.56167602539062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.2225723266601562, | |
| "rewards/margins": 16.246957778930664, | |
| "rewards/rejected": -13.024386405944824, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 2.6323119777158777, | |
| "grad_norm": 0.000608162023127079, | |
| "learning_rate": 6.244196843082637e-06, | |
| "logits/chosen": -32.106956481933594, | |
| "logits/rejected": -39.27301788330078, | |
| "logps/chosen": -183.5093994140625, | |
| "logps/rejected": -175.55055236816406, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.5961029529571533, | |
| "rewards/margins": 14.337031364440918, | |
| "rewards/rejected": -10.740928649902344, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 2.6462395543175488, | |
| "grad_norm": 0.00044016874744556844, | |
| "learning_rate": 6.012070566388115e-06, | |
| "logits/chosen": -36.276512145996094, | |
| "logits/rejected": -39.37938690185547, | |
| "logps/chosen": -195.60324096679688, | |
| "logps/rejected": -177.52719116210938, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.660752773284912, | |
| "rewards/margins": 13.905537605285645, | |
| "rewards/rejected": -11.244784355163574, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.66016713091922, | |
| "grad_norm": 0.0002324298257008195, | |
| "learning_rate": 5.779944289693594e-06, | |
| "logits/chosen": -36.83045196533203, | |
| "logits/rejected": -40.6081428527832, | |
| "logps/chosen": -180.4891815185547, | |
| "logps/rejected": -181.4812469482422, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.993919849395752, | |
| "rewards/margins": 14.951128005981445, | |
| "rewards/rejected": -11.957206726074219, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 2.6740947075208914, | |
| "grad_norm": 0.0007754198159091175, | |
| "learning_rate": 5.547818012999071e-06, | |
| "logits/chosen": -30.579111099243164, | |
| "logits/rejected": -38.765045166015625, | |
| "logps/chosen": -174.76547241210938, | |
| "logps/rejected": -183.4005126953125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.667250394821167, | |
| "rewards/margins": 13.938199043273926, | |
| "rewards/rejected": -11.27094841003418, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.688022284122563, | |
| "grad_norm": 0.0011436525965109468, | |
| "learning_rate": 5.315691736304549e-06, | |
| "logits/chosen": -34.016395568847656, | |
| "logits/rejected": -36.03986740112305, | |
| "logps/chosen": -186.30514526367188, | |
| "logps/rejected": -179.84713745117188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.890596389770508, | |
| "rewards/margins": 15.47045612335205, | |
| "rewards/rejected": -12.579858779907227, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 2.701949860724234, | |
| "grad_norm": 0.0023635756224393845, | |
| "learning_rate": 5.083565459610028e-06, | |
| "logits/chosen": -34.19483184814453, | |
| "logits/rejected": -40.62418746948242, | |
| "logps/chosen": -167.63302612304688, | |
| "logps/rejected": -179.10595703125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.8706557750701904, | |
| "rewards/margins": 14.436419486999512, | |
| "rewards/rejected": -11.565765380859375, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 2.715877437325905, | |
| "grad_norm": 0.0005265133222565055, | |
| "learning_rate": 4.851439182915506e-06, | |
| "logits/chosen": -35.469627380371094, | |
| "logits/rejected": -47.087947845458984, | |
| "logps/chosen": -179.50204467773438, | |
| "logps/rejected": -179.5740966796875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.908020257949829, | |
| "rewards/margins": 14.18663215637207, | |
| "rewards/rejected": -11.278613090515137, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.7298050139275767, | |
| "grad_norm": 0.0008786062244325876, | |
| "learning_rate": 4.619312906220984e-06, | |
| "logits/chosen": -32.846824645996094, | |
| "logits/rejected": -38.901092529296875, | |
| "logps/chosen": -186.23672485351562, | |
| "logps/rejected": -179.1976776123047, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0682077407836914, | |
| "rewards/margins": 14.65526008605957, | |
| "rewards/rejected": -11.587051391601562, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.743732590529248, | |
| "grad_norm": 0.0003964914649259299, | |
| "learning_rate": 4.387186629526462e-06, | |
| "logits/chosen": -33.28449249267578, | |
| "logits/rejected": -37.38798141479492, | |
| "logps/chosen": -170.21182250976562, | |
| "logps/rejected": -183.37669372558594, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0049500465393066, | |
| "rewards/margins": 14.757115364074707, | |
| "rewards/rejected": -11.752164840698242, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.7576601671309193, | |
| "grad_norm": 9.640253119869158e-05, | |
| "learning_rate": 4.155060352831941e-06, | |
| "logits/chosen": -34.633426666259766, | |
| "logits/rejected": -40.41498565673828, | |
| "logps/chosen": -187.4309539794922, | |
| "logps/rejected": -183.026123046875, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.992767095565796, | |
| "rewards/margins": 15.027621269226074, | |
| "rewards/rejected": -12.034854888916016, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.7715877437325904, | |
| "grad_norm": 0.003279632655903697, | |
| "learning_rate": 3.922934076137419e-06, | |
| "logits/chosen": -33.140899658203125, | |
| "logits/rejected": -33.699161529541016, | |
| "logps/chosen": -182.78408813476562, | |
| "logps/rejected": -178.6408233642578, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.0828540325164795, | |
| "rewards/margins": 14.902987480163574, | |
| "rewards/rejected": -11.8201322555542, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.785515320334262, | |
| "grad_norm": 0.00014628810458816588, | |
| "learning_rate": 3.690807799442897e-06, | |
| "logits/chosen": -32.4156379699707, | |
| "logits/rejected": -35.6164665222168, | |
| "logps/chosen": -176.88565063476562, | |
| "logps/rejected": -180.22218322753906, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.3253417015075684, | |
| "rewards/margins": 15.263575553894043, | |
| "rewards/rejected": -11.938233375549316, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.799442896935933, | |
| "grad_norm": 0.00014062832633499056, | |
| "learning_rate": 3.4586815227483758e-06, | |
| "logits/chosen": -32.96575164794922, | |
| "logits/rejected": -38.11034393310547, | |
| "logps/chosen": -189.6385040283203, | |
| "logps/rejected": -182.67161560058594, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1147329807281494, | |
| "rewards/margins": 14.853757858276367, | |
| "rewards/rejected": -11.73902702331543, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.8133704735376046, | |
| "grad_norm": 0.0013761012814939022, | |
| "learning_rate": 3.2265552460538537e-06, | |
| "logits/chosen": -30.57448387145996, | |
| "logits/rejected": -31.974700927734375, | |
| "logps/chosen": -173.05075073242188, | |
| "logps/rejected": -176.5461883544922, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.327730655670166, | |
| "rewards/margins": 14.546607971191406, | |
| "rewards/rejected": -11.218875885009766, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.8272980501392757, | |
| "grad_norm": 0.0010256225941702724, | |
| "learning_rate": 2.9944289693593313e-06, | |
| "logits/chosen": -32.459144592285156, | |
| "logits/rejected": -33.54745101928711, | |
| "logps/chosen": -189.91656494140625, | |
| "logps/rejected": -184.4038848876953, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7565293312072754, | |
| "rewards/margins": 14.743266105651855, | |
| "rewards/rejected": -11.986737251281738, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.841225626740947, | |
| "grad_norm": 0.0001987310970434919, | |
| "learning_rate": 2.7623026926648097e-06, | |
| "logits/chosen": -34.515052795410156, | |
| "logits/rejected": -40.575809478759766, | |
| "logps/chosen": -188.13137817382812, | |
| "logps/rejected": -173.988525390625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.147059917449951, | |
| "rewards/margins": 13.975947380065918, | |
| "rewards/rejected": -10.828886985778809, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.8551532033426184, | |
| "grad_norm": 0.0019440051401033998, | |
| "learning_rate": 2.5301764159702877e-06, | |
| "logits/chosen": -31.311901092529297, | |
| "logits/rejected": -43.11977005004883, | |
| "logps/chosen": -176.417236328125, | |
| "logps/rejected": -179.4446258544922, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9070076942443848, | |
| "rewards/margins": 14.395605087280273, | |
| "rewards/rejected": -11.48859691619873, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.86908077994429, | |
| "grad_norm": 0.0007369028753601015, | |
| "learning_rate": 2.298050139275766e-06, | |
| "logits/chosen": -35.70469665527344, | |
| "logits/rejected": -39.731964111328125, | |
| "logps/chosen": -198.8573760986328, | |
| "logps/rejected": -193.2760009765625, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.670799732208252, | |
| "rewards/margins": 15.742904663085938, | |
| "rewards/rejected": -13.072105407714844, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.883008356545961, | |
| "grad_norm": 0.0003779043036047369, | |
| "learning_rate": 2.0659238625812445e-06, | |
| "logits/chosen": -31.949493408203125, | |
| "logits/rejected": -31.089712142944336, | |
| "logps/chosen": -191.99917602539062, | |
| "logps/rejected": -180.83590698242188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.66987943649292, | |
| "rewards/margins": 14.582662582397461, | |
| "rewards/rejected": -11.912784576416016, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.896935933147632, | |
| "grad_norm": 0.00013451039558276534, | |
| "learning_rate": 1.8337975858867223e-06, | |
| "logits/chosen": -33.855201721191406, | |
| "logits/rejected": -40.16669464111328, | |
| "logps/chosen": -194.4259490966797, | |
| "logps/rejected": -189.32534790039062, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.899250030517578, | |
| "rewards/margins": 14.937406539916992, | |
| "rewards/rejected": -12.038156509399414, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.9108635097493036, | |
| "grad_norm": 0.0010667495662346482, | |
| "learning_rate": 1.6016713091922007e-06, | |
| "logits/chosen": -32.62724304199219, | |
| "logits/rejected": -32.57086944580078, | |
| "logps/chosen": -188.95858764648438, | |
| "logps/rejected": -182.73516845703125, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.1314823627471924, | |
| "rewards/margins": 15.311136245727539, | |
| "rewards/rejected": -12.179654121398926, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.924791086350975, | |
| "grad_norm": 2.006152135436423e-05, | |
| "learning_rate": 1.369545032497679e-06, | |
| "logits/chosen": -32.503414154052734, | |
| "logits/rejected": -39.49541091918945, | |
| "logps/chosen": -190.20492553710938, | |
| "logps/rejected": -180.6317901611328, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.7702364921569824, | |
| "rewards/margins": 14.473835945129395, | |
| "rewards/rejected": -11.70359992980957, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.9387186629526463, | |
| "grad_norm": 0.00034252344630658627, | |
| "learning_rate": 1.1374187558031571e-06, | |
| "logits/chosen": -35.25168228149414, | |
| "logits/rejected": -38.91762161254883, | |
| "logps/chosen": -175.8085479736328, | |
| "logps/rejected": -179.20114135742188, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.377187728881836, | |
| "rewards/margins": 14.512290954589844, | |
| "rewards/rejected": -11.135104179382324, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.9526462395543174, | |
| "grad_norm": 0.00010172268230235204, | |
| "learning_rate": 9.052924791086352e-07, | |
| "logits/chosen": -33.312416076660156, | |
| "logits/rejected": -40.598228454589844, | |
| "logps/chosen": -190.16940307617188, | |
| "logps/rejected": -175.98265075683594, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.345635414123535, | |
| "rewards/margins": 13.13763427734375, | |
| "rewards/rejected": -10.791997909545898, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.966573816155989, | |
| "grad_norm": 0.00028497324092313647, | |
| "learning_rate": 6.731662024141133e-07, | |
| "logits/chosen": -33.10255813598633, | |
| "logits/rejected": -37.320838928222656, | |
| "logps/chosen": -194.23452758789062, | |
| "logps/rejected": -182.88694763183594, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.9284331798553467, | |
| "rewards/margins": 14.861193656921387, | |
| "rewards/rejected": -11.932759284973145, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.98050139275766, | |
| "grad_norm": 6.592216959688812e-05, | |
| "learning_rate": 4.4103992571959147e-07, | |
| "logits/chosen": -33.3602180480957, | |
| "logits/rejected": -31.276813507080078, | |
| "logps/chosen": -202.84475708007812, | |
| "logps/rejected": -188.7292938232422, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 2.3148932456970215, | |
| "rewards/margins": 14.879697799682617, | |
| "rewards/rejected": -12.564805030822754, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.9944289693593316, | |
| "grad_norm": 0.0003906878991983831, | |
| "learning_rate": 2.0891364902506967e-07, | |
| "logits/chosen": -32.936134338378906, | |
| "logits/rejected": -35.92009735107422, | |
| "logps/chosen": -193.31060791015625, | |
| "logps/rejected": -176.9427490234375, | |
| "loss": 0.0, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 3.182100772857666, | |
| "rewards/margins": 14.334200859069824, | |
| "rewards/rejected": -11.152099609375, | |
| "step": 2150 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2154, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |