| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.564102564102564, | |
| "eval_steps": 100, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01282051282051282, | |
| "grad_norm": 27.75, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": -0.06380753219127655, | |
| "logits/rejected": 0.12772592902183533, | |
| "logps/chosen": -112.26579284667969, | |
| "logps/rejected": -171.0265655517578, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.02564102564102564, | |
| "grad_norm": 29.625, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "logits/chosen": -0.06565480679273605, | |
| "logits/rejected": 0.17766284942626953, | |
| "logps/chosen": -88.36125183105469, | |
| "logps/rejected": -151.4352264404297, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.038461538461538464, | |
| "grad_norm": 34.75, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": -0.21257327497005463, | |
| "logits/rejected": 0.0220273919403553, | |
| "logps/chosen": -84.14213562011719, | |
| "logps/rejected": -140.61831665039062, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": 0.007791845127940178, | |
| "rewards/margins": 0.01143356692045927, | |
| "rewards/rejected": -0.003641726914793253, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.05128205128205128, | |
| "grad_norm": 52.5, | |
| "learning_rate": 6.666666666666667e-06, | |
| "logits/chosen": -0.19956666231155396, | |
| "logits/rejected": 0.04657585173845291, | |
| "logps/chosen": -83.94677734375, | |
| "logps/rejected": -137.0675048828125, | |
| "loss": 0.7134, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.019337624311447144, | |
| "rewards/margins": -0.031028207391500473, | |
| "rewards/rejected": 0.011690582148730755, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0641025641025641, | |
| "grad_norm": 34.5, | |
| "learning_rate": 8.333333333333334e-06, | |
| "logits/chosen": -0.06831943988800049, | |
| "logits/rejected": 0.2727906405925751, | |
| "logps/chosen": -63.68186950683594, | |
| "logps/rejected": -138.19874572753906, | |
| "loss": 0.7083, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.0014037620276212692, | |
| "rewards/margins": -0.0242691058665514, | |
| "rewards/rejected": 0.02286534383893013, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.07692307692307693, | |
| "grad_norm": 49.5, | |
| "learning_rate": 1e-05, | |
| "logits/chosen": -0.01841258443892002, | |
| "logits/rejected": 0.16046011447906494, | |
| "logps/chosen": -102.26649475097656, | |
| "logps/rejected": -140.34805297851562, | |
| "loss": 0.6849, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.026656517758965492, | |
| "rewards/margins": 0.02228293940424919, | |
| "rewards/rejected": 0.004373575560748577, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.08974358974358974, | |
| "grad_norm": 29.0, | |
| "learning_rate": 1.1666666666666668e-05, | |
| "logits/chosen": -0.14498299360275269, | |
| "logits/rejected": 0.08867734670639038, | |
| "logps/chosen": -82.10832214355469, | |
| "logps/rejected": -141.8557586669922, | |
| "loss": 0.6947, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.00393604626879096, | |
| "rewards/margins": 0.0014155255630612373, | |
| "rewards/rejected": -0.00535157136619091, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.10256410256410256, | |
| "grad_norm": 82.5, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "logits/chosen": -0.16020306944847107, | |
| "logits/rejected": -0.03665738180279732, | |
| "logps/chosen": -103.45797729492188, | |
| "logps/rejected": -142.54185485839844, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.008296433836221695, | |
| "rewards/margins": 0.02857138216495514, | |
| "rewards/rejected": -0.036867816001176834, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.11538461538461539, | |
| "grad_norm": 33.0, | |
| "learning_rate": 1.5e-05, | |
| "logits/chosen": -0.22490036487579346, | |
| "logits/rejected": 0.13594487309455872, | |
| "logps/chosen": -58.683170318603516, | |
| "logps/rejected": -146.40663146972656, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.017883911728858948, | |
| "rewards/margins": 0.024988900870084763, | |
| "rewards/rejected": -0.007104992866516113, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.1282051282051282, | |
| "grad_norm": 28.0, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "logits/chosen": -0.08521188050508499, | |
| "logits/rejected": 0.1271093636751175, | |
| "logps/chosen": -82.43521118164062, | |
| "logps/rejected": -141.2976837158203, | |
| "loss": 0.6757, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": 0.033490099012851715, | |
| "rewards/margins": 0.040245190262794495, | |
| "rewards/rejected": -0.006755088455975056, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.14102564102564102, | |
| "grad_norm": 41.0, | |
| "learning_rate": 1.8333333333333333e-05, | |
| "logits/chosen": 0.005568627268075943, | |
| "logits/rejected": 0.25887250900268555, | |
| "logps/chosen": -121.749755859375, | |
| "logps/rejected": -172.1695556640625, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": 0.0011581219732761383, | |
| "rewards/margins": 0.010924594476819038, | |
| "rewards/rejected": -0.0097664725035429, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.15384615384615385, | |
| "grad_norm": 30.625, | |
| "learning_rate": 2e-05, | |
| "logits/chosen": -0.0973924770951271, | |
| "logits/rejected": 0.1759243905544281, | |
| "logps/chosen": -71.58274841308594, | |
| "logps/rejected": -137.77407836914062, | |
| "loss": 0.6966, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.01036906149238348, | |
| "rewards/margins": -0.002420688048005104, | |
| "rewards/rejected": -0.00794837437570095, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 40.75, | |
| "learning_rate": 2.1666666666666667e-05, | |
| "logits/chosen": -0.005536120384931564, | |
| "logits/rejected": 0.22361864149570465, | |
| "logps/chosen": -84.1854248046875, | |
| "logps/rejected": -136.99334716796875, | |
| "loss": 0.6824, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.007866489700973034, | |
| "rewards/margins": 0.030259691178798676, | |
| "rewards/rejected": -0.038126181811094284, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.1794871794871795, | |
| "grad_norm": 30.25, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "logits/chosen": -0.18938826024532318, | |
| "logits/rejected": -0.04171081632375717, | |
| "logps/chosen": -122.1632080078125, | |
| "logps/rejected": -150.66244506835938, | |
| "loss": 0.6728, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": -0.030274342745542526, | |
| "rewards/margins": 0.0520247146487236, | |
| "rewards/rejected": -0.08229905366897583, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.19230769230769232, | |
| "grad_norm": 26.375, | |
| "learning_rate": 2.5e-05, | |
| "logits/chosen": -0.13239961862564087, | |
| "logits/rejected": 0.04957669600844383, | |
| "logps/chosen": -92.12763977050781, | |
| "logps/rejected": -149.10986328125, | |
| "loss": 0.6794, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.03091922402381897, | |
| "rewards/margins": 0.039343856275081635, | |
| "rewards/rejected": -0.008424634113907814, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.20512820512820512, | |
| "grad_norm": 26.375, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "logits/chosen": -0.17461884021759033, | |
| "logits/rejected": 0.1618526726961136, | |
| "logps/chosen": -87.01570129394531, | |
| "logps/rejected": -164.5313720703125, | |
| "loss": 0.6624, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.0006066989153623581, | |
| "rewards/margins": 0.0701022818684578, | |
| "rewards/rejected": -0.0707089751958847, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.21794871794871795, | |
| "grad_norm": 48.75, | |
| "learning_rate": 2.8333333333333335e-05, | |
| "logits/chosen": -0.0012298859655857086, | |
| "logits/rejected": 0.2805720567703247, | |
| "logps/chosen": -102.5722427368164, | |
| "logps/rejected": -176.0152587890625, | |
| "loss": 0.667, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.005490007810294628, | |
| "rewards/margins": 0.05767596513032913, | |
| "rewards/rejected": -0.06316597759723663, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.23076923076923078, | |
| "grad_norm": 35.75, | |
| "learning_rate": 3e-05, | |
| "logits/chosen": -0.18062862753868103, | |
| "logits/rejected": 0.11536470800638199, | |
| "logps/chosen": -89.42520141601562, | |
| "logps/rejected": -155.97567749023438, | |
| "loss": 0.6566, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.0270236786454916, | |
| "rewards/margins": 0.08378218114376068, | |
| "rewards/rejected": -0.05675850063562393, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.24358974358974358, | |
| "grad_norm": 36.75, | |
| "learning_rate": 3.1666666666666666e-05, | |
| "logits/chosen": -0.11459638178348541, | |
| "logits/rejected": 0.14410018920898438, | |
| "logps/chosen": -79.88298034667969, | |
| "logps/rejected": -171.2315673828125, | |
| "loss": 0.6422, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.004121261648833752, | |
| "rewards/margins": 0.11095136404037476, | |
| "rewards/rejected": -0.10683010518550873, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.2564102564102564, | |
| "grad_norm": 26.875, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "logits/chosen": -0.13926473259925842, | |
| "logits/rejected": 0.14452148973941803, | |
| "logps/chosen": -116.87776947021484, | |
| "logps/rejected": -185.11489868164062, | |
| "loss": 0.6201, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.0278608500957489, | |
| "rewards/margins": 0.16453403234481812, | |
| "rewards/rejected": -0.19239488244056702, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2692307692307692, | |
| "grad_norm": 28.75, | |
| "learning_rate": 3.5e-05, | |
| "logits/chosen": -0.10806109011173248, | |
| "logits/rejected": 0.1241796687245369, | |
| "logps/chosen": -94.49578857421875, | |
| "logps/rejected": -168.69583129882812, | |
| "loss": 0.6505, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.007688253186643124, | |
| "rewards/margins": 0.09761032462120056, | |
| "rewards/rejected": -0.10529857128858566, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.28205128205128205, | |
| "grad_norm": 54.25, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "logits/chosen": -0.1821673959493637, | |
| "logits/rejected": -0.02065378986299038, | |
| "logps/chosen": -87.93394470214844, | |
| "logps/rejected": -148.05177307128906, | |
| "loss": 0.6204, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.006978330202400684, | |
| "rewards/margins": 0.16433021426200867, | |
| "rewards/rejected": -0.17130856215953827, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.2948717948717949, | |
| "grad_norm": 25.375, | |
| "learning_rate": 3.8333333333333334e-05, | |
| "logits/chosen": -0.1627923548221588, | |
| "logits/rejected": 0.1547648012638092, | |
| "logps/chosen": -140.819580078125, | |
| "logps/rejected": -160.78582763671875, | |
| "loss": 0.6131, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": -0.03600157052278519, | |
| "rewards/margins": 0.18165405094623566, | |
| "rewards/rejected": -0.21765561401844025, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.3076923076923077, | |
| "grad_norm": 22.0, | |
| "learning_rate": 4e-05, | |
| "logits/chosen": -0.14879387617111206, | |
| "logits/rejected": 0.09165795892477036, | |
| "logps/chosen": -93.18842315673828, | |
| "logps/rejected": -167.2100067138672, | |
| "loss": 0.569, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.054303426295518875, | |
| "rewards/margins": 0.28615322709083557, | |
| "rewards/rejected": -0.2318498194217682, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 37.0, | |
| "learning_rate": 4.166666666666667e-05, | |
| "logits/chosen": -0.03993874788284302, | |
| "logits/rejected": 0.15101341903209686, | |
| "logps/chosen": -106.16200256347656, | |
| "logps/rejected": -156.69503784179688, | |
| "loss": 0.5653, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.050836555659770966, | |
| "rewards/margins": 0.3024190068244934, | |
| "rewards/rejected": -0.25158244371414185, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 39.0, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "logits/chosen": -0.11266515403985977, | |
| "logits/rejected": 0.16806921362876892, | |
| "logps/chosen": -67.06320190429688, | |
| "logps/rejected": -160.43670654296875, | |
| "loss": 0.5179, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.07256568223237991, | |
| "rewards/margins": 0.40622758865356445, | |
| "rewards/rejected": -0.33366188406944275, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.34615384615384615, | |
| "grad_norm": 42.75, | |
| "learning_rate": 4.5e-05, | |
| "logits/chosen": -0.14196887612342834, | |
| "logits/rejected": 0.07888446003198624, | |
| "logps/chosen": -99.10836791992188, | |
| "logps/rejected": -156.85507202148438, | |
| "loss": 0.5386, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.006482891738414764, | |
| "rewards/margins": 0.36570924520492554, | |
| "rewards/rejected": -0.3721921443939209, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.358974358974359, | |
| "grad_norm": 20.125, | |
| "learning_rate": 4.666666666666667e-05, | |
| "logits/chosen": -0.09473855048418045, | |
| "logits/rejected": 0.16539070010185242, | |
| "logps/chosen": -85.95926666259766, | |
| "logps/rejected": -158.43234252929688, | |
| "loss": 0.5003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.05999298766255379, | |
| "rewards/margins": 0.45657122135162354, | |
| "rewards/rejected": -0.39657825231552124, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.3717948717948718, | |
| "grad_norm": 35.5, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "logits/chosen": -0.17103618383407593, | |
| "logits/rejected": 0.1040990948677063, | |
| "logps/chosen": -90.04185485839844, | |
| "logps/rejected": -150.4471893310547, | |
| "loss": 0.5019, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.0575297586619854, | |
| "rewards/margins": 0.4724646806716919, | |
| "rewards/rejected": -0.4149349331855774, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.38461538461538464, | |
| "grad_norm": 24.875, | |
| "learning_rate": 5e-05, | |
| "logits/chosen": -0.1406603753566742, | |
| "logits/rejected": 0.15464246273040771, | |
| "logps/chosen": -87.17767333984375, | |
| "logps/rejected": -174.3140869140625, | |
| "loss": 0.4416, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.00285655097104609, | |
| "rewards/margins": 0.6478032469749451, | |
| "rewards/rejected": -0.6449466943740845, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.3974358974358974, | |
| "grad_norm": 26.875, | |
| "learning_rate": 4.996732026143791e-05, | |
| "logits/chosen": -0.17788799107074738, | |
| "logits/rejected": 0.1254492998123169, | |
| "logps/chosen": -84.51072692871094, | |
| "logps/rejected": -142.1890869140625, | |
| "loss": 0.4662, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.044377654790878296, | |
| "rewards/margins": 0.5697494745254517, | |
| "rewards/rejected": -0.5253718495368958, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.41025641025641024, | |
| "grad_norm": 25.375, | |
| "learning_rate": 4.993464052287582e-05, | |
| "logits/chosen": -0.0847737044095993, | |
| "logits/rejected": 0.09796766936779022, | |
| "logps/chosen": -78.6427001953125, | |
| "logps/rejected": -148.26177978515625, | |
| "loss": 0.4458, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.04513291269540787, | |
| "rewards/margins": 0.6427649259567261, | |
| "rewards/rejected": -0.6878978610038757, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.4230769230769231, | |
| "grad_norm": 37.75, | |
| "learning_rate": 4.990196078431373e-05, | |
| "logits/chosen": -0.14097975194454193, | |
| "logits/rejected": 0.03422202542424202, | |
| "logps/chosen": -89.5251235961914, | |
| "logps/rejected": -140.79981994628906, | |
| "loss": 0.4696, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.04720389470458031, | |
| "rewards/margins": 0.5877120494842529, | |
| "rewards/rejected": -0.5405081510543823, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.4358974358974359, | |
| "grad_norm": 23.25, | |
| "learning_rate": 4.986928104575164e-05, | |
| "logits/chosen": -0.06732790172100067, | |
| "logits/rejected": 0.05971694737672806, | |
| "logps/chosen": -101.961181640625, | |
| "logps/rejected": -166.38430786132812, | |
| "loss": 0.4222, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.0005174288526177406, | |
| "rewards/margins": 0.716086745262146, | |
| "rewards/rejected": -0.7166041135787964, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.44871794871794873, | |
| "grad_norm": 30.125, | |
| "learning_rate": 4.983660130718955e-05, | |
| "logits/chosen": -0.07001346349716187, | |
| "logits/rejected": 0.1499863564968109, | |
| "logps/chosen": -117.123291015625, | |
| "logps/rejected": -187.142578125, | |
| "loss": 0.3286, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06537400931119919, | |
| "rewards/margins": 1.0441173315048218, | |
| "rewards/rejected": -1.1094913482666016, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.46153846153846156, | |
| "grad_norm": 17.75, | |
| "learning_rate": 4.980392156862745e-05, | |
| "logits/chosen": -0.2118874490261078, | |
| "logits/rejected": -0.011951565742492676, | |
| "logps/chosen": -94.68974304199219, | |
| "logps/rejected": -131.18968200683594, | |
| "loss": 0.4172, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.04915960878133774, | |
| "rewards/margins": 0.7715533375740051, | |
| "rewards/rejected": -0.7223937511444092, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.47435897435897434, | |
| "grad_norm": 20.375, | |
| "learning_rate": 4.977124183006536e-05, | |
| "logits/chosen": -0.07903751730918884, | |
| "logits/rejected": 0.11850599199533463, | |
| "logps/chosen": -142.09075927734375, | |
| "logps/rejected": -188.67568969726562, | |
| "loss": 0.3435, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.06267163157463074, | |
| "rewards/margins": 1.0669060945510864, | |
| "rewards/rejected": -1.1295777559280396, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.48717948717948717, | |
| "grad_norm": 30.875, | |
| "learning_rate": 4.973856209150327e-05, | |
| "logits/chosen": -0.13469821214675903, | |
| "logits/rejected": 0.14652732014656067, | |
| "logps/chosen": -92.22380065917969, | |
| "logps/rejected": -180.69471740722656, | |
| "loss": 0.2816, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.02168530412018299, | |
| "rewards/margins": 1.2966737747192383, | |
| "rewards/rejected": -1.2749884128570557, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 51.25, | |
| "learning_rate": 4.970588235294118e-05, | |
| "logits/chosen": -0.07626571506261826, | |
| "logits/rejected": 0.18709780275821686, | |
| "logps/chosen": -68.80900573730469, | |
| "logps/rejected": -145.2672882080078, | |
| "loss": 0.3523, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.014520924538373947, | |
| "rewards/margins": 1.048326849937439, | |
| "rewards/rejected": -1.0628478527069092, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.5128205128205128, | |
| "grad_norm": 18.375, | |
| "learning_rate": 4.967320261437909e-05, | |
| "logits/chosen": -0.07083877921104431, | |
| "logits/rejected": 0.11468646675348282, | |
| "logps/chosen": -88.19615936279297, | |
| "logps/rejected": -157.5552978515625, | |
| "loss": 0.3199, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.025296173989772797, | |
| "rewards/margins": 1.067254900932312, | |
| "rewards/rejected": -1.0925511121749878, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5256410256410257, | |
| "grad_norm": 25.125, | |
| "learning_rate": 4.9640522875817e-05, | |
| "logits/chosen": -0.19776758551597595, | |
| "logits/rejected": 0.03766755759716034, | |
| "logps/chosen": -105.27020263671875, | |
| "logps/rejected": -203.21847534179688, | |
| "loss": 0.2483, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.11630360037088394, | |
| "rewards/margins": 1.5168688297271729, | |
| "rewards/rejected": -1.6331722736358643, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.5384615384615384, | |
| "grad_norm": 21.375, | |
| "learning_rate": 4.960784313725491e-05, | |
| "logits/chosen": -0.08834187686443329, | |
| "logits/rejected": 0.05226800590753555, | |
| "logps/chosen": -107.19200134277344, | |
| "logps/rejected": -171.2469482421875, | |
| "loss": 0.2645, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.018074864521622658, | |
| "rewards/margins": 1.3436520099639893, | |
| "rewards/rejected": -1.361726999282837, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.5512820512820513, | |
| "grad_norm": 15.5625, | |
| "learning_rate": 4.9575163398692816e-05, | |
| "logits/chosen": -0.15494224429130554, | |
| "logits/rejected": 0.057956140488386154, | |
| "logps/chosen": -93.79485321044922, | |
| "logps/rejected": -157.0441436767578, | |
| "loss": 0.2549, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06218550354242325, | |
| "rewards/margins": 1.3940272331237793, | |
| "rewards/rejected": -1.4562126398086548, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.5641025641025641, | |
| "grad_norm": 12.5, | |
| "learning_rate": 4.9542483660130725e-05, | |
| "logits/chosen": -0.11792595684528351, | |
| "logits/rejected": 0.05899347737431526, | |
| "logps/chosen": -124.18502807617188, | |
| "logps/rejected": -178.94952392578125, | |
| "loss": 0.2458, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0970105230808258, | |
| "rewards/margins": 1.4840583801269531, | |
| "rewards/rejected": -1.5810691118240356, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.5769230769230769, | |
| "grad_norm": 11.3125, | |
| "learning_rate": 4.9509803921568634e-05, | |
| "logits/chosen": -0.15335853397846222, | |
| "logits/rejected": 0.05615951120853424, | |
| "logps/chosen": -90.747802734375, | |
| "logps/rejected": -167.22177124023438, | |
| "loss": 0.1952, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.018913907930254936, | |
| "rewards/margins": 1.8678545951843262, | |
| "rewards/rejected": -1.8867684602737427, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.5897435897435898, | |
| "grad_norm": 24.25, | |
| "learning_rate": 4.947712418300654e-05, | |
| "logits/chosen": -0.14522482454776764, | |
| "logits/rejected": 0.07597565650939941, | |
| "logps/chosen": -97.08460235595703, | |
| "logps/rejected": -156.2093505859375, | |
| "loss": 0.2591, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.018786408007144928, | |
| "rewards/margins": 1.3993942737579346, | |
| "rewards/rejected": -1.4181805849075317, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.6025641025641025, | |
| "grad_norm": 13.8125, | |
| "learning_rate": 4.9444444444444446e-05, | |
| "logits/chosen": -0.1702781468629837, | |
| "logits/rejected": 0.05810967832803726, | |
| "logps/chosen": -114.03224182128906, | |
| "logps/rejected": -184.90426635742188, | |
| "loss": 0.1817, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10201935470104218, | |
| "rewards/margins": 1.997949481010437, | |
| "rewards/rejected": -2.099968671798706, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.6153846153846154, | |
| "grad_norm": 10.125, | |
| "learning_rate": 4.9411764705882355e-05, | |
| "logits/chosen": -0.054596614092588425, | |
| "logits/rejected": 0.19742971658706665, | |
| "logps/chosen": -88.81393432617188, | |
| "logps/rejected": -170.418212890625, | |
| "loss": 0.1467, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.021169569343328476, | |
| "rewards/margins": 2.3421127796173096, | |
| "rewards/rejected": -2.3632824420928955, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.6282051282051282, | |
| "grad_norm": 9.0, | |
| "learning_rate": 4.9379084967320265e-05, | |
| "logits/chosen": -0.0012325868010520935, | |
| "logits/rejected": 0.19580551981925964, | |
| "logps/chosen": -77.67559051513672, | |
| "logps/rejected": -141.58233642578125, | |
| "loss": 0.1625, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.03474044427275658, | |
| "rewards/margins": 2.011512517929077, | |
| "rewards/rejected": -2.046252965927124, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 10.4375, | |
| "learning_rate": 4.9346405228758174e-05, | |
| "logits/chosen": -0.1005917340517044, | |
| "logits/rejected": 0.13480234146118164, | |
| "logps/chosen": -61.89552307128906, | |
| "logps/rejected": -146.83975219726562, | |
| "loss": 0.1594, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.0344095341861248, | |
| "rewards/margins": 2.2168617248535156, | |
| "rewards/rejected": -2.2512712478637695, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.6538461538461539, | |
| "grad_norm": 6.375, | |
| "learning_rate": 4.931372549019608e-05, | |
| "logits/chosen": -0.11498992145061493, | |
| "logits/rejected": 0.1706167608499527, | |
| "logps/chosen": -92.78648376464844, | |
| "logps/rejected": -172.9186248779297, | |
| "loss": 0.1023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.01331278681755066, | |
| "rewards/margins": 2.5386626720428467, | |
| "rewards/rejected": -2.551975727081299, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 9.3125, | |
| "learning_rate": 4.928104575163399e-05, | |
| "logits/chosen": -0.10311194509267807, | |
| "logits/rejected": 0.12531223893165588, | |
| "logps/chosen": -89.15770721435547, | |
| "logps/rejected": -171.0924072265625, | |
| "loss": 0.106, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.030243821442127228, | |
| "rewards/margins": 2.667717456817627, | |
| "rewards/rejected": -2.6374735832214355, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.6794871794871795, | |
| "grad_norm": 6.03125, | |
| "learning_rate": 4.92483660130719e-05, | |
| "logits/chosen": -0.07722577452659607, | |
| "logits/rejected": 0.06518508493900299, | |
| "logps/chosen": -84.23039245605469, | |
| "logps/rejected": -136.64599609375, | |
| "loss": 0.1059, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.05306389927864075, | |
| "rewards/margins": 2.5028438568115234, | |
| "rewards/rejected": -2.449779748916626, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.6923076923076923, | |
| "grad_norm": 9.125, | |
| "learning_rate": 4.9215686274509804e-05, | |
| "logits/chosen": -0.03560367599129677, | |
| "logits/rejected": 0.1739499419927597, | |
| "logps/chosen": -126.07595825195312, | |
| "logps/rejected": -185.3727569580078, | |
| "loss": 0.0934, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.08225273340940475, | |
| "rewards/margins": 2.7095654010772705, | |
| "rewards/rejected": -2.7918179035186768, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.7051282051282052, | |
| "grad_norm": 4.5, | |
| "learning_rate": 4.918300653594771e-05, | |
| "logits/chosen": -0.22686247527599335, | |
| "logits/rejected": 0.05876573175191879, | |
| "logps/chosen": -70.68663024902344, | |
| "logps/rejected": -194.93209838867188, | |
| "loss": 0.0743, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.026015952229499817, | |
| "rewards/margins": 3.59175705909729, | |
| "rewards/rejected": -3.6177730560302734, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.717948717948718, | |
| "grad_norm": 4.125, | |
| "learning_rate": 4.915032679738562e-05, | |
| "logits/chosen": -0.20101141929626465, | |
| "logits/rejected": 0.09745941311120987, | |
| "logps/chosen": -94.7490463256836, | |
| "logps/rejected": -214.51974487304688, | |
| "loss": 0.0533, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.03515905141830444, | |
| "rewards/margins": 3.569089651107788, | |
| "rewards/rejected": -3.5339303016662598, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.7307692307692307, | |
| "grad_norm": 7.59375, | |
| "learning_rate": 4.911764705882353e-05, | |
| "logits/chosen": -0.08269526064395905, | |
| "logits/rejected": 0.06301219016313553, | |
| "logps/chosen": -142.38357543945312, | |
| "logps/rejected": -199.48101806640625, | |
| "loss": 0.0763, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3302525281906128, | |
| "rewards/margins": 3.5644030570983887, | |
| "rewards/rejected": -3.894655227661133, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.7435897435897436, | |
| "grad_norm": 4.90625, | |
| "learning_rate": 4.908496732026144e-05, | |
| "logits/chosen": -0.15892192721366882, | |
| "logits/rejected": 0.12236778438091278, | |
| "logps/chosen": -98.73062896728516, | |
| "logps/rejected": -216.34136962890625, | |
| "loss": 0.0495, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.06345722824335098, | |
| "rewards/margins": 4.57633638381958, | |
| "rewards/rejected": -4.639793395996094, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.7564102564102564, | |
| "grad_norm": 10.1875, | |
| "learning_rate": 4.9052287581699344e-05, | |
| "logits/chosen": -0.0935150608420372, | |
| "logits/rejected": 0.13067664206027985, | |
| "logps/chosen": -98.37684631347656, | |
| "logps/rejected": -201.44674682617188, | |
| "loss": 0.0495, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.004328111186623573, | |
| "rewards/margins": 4.173211097717285, | |
| "rewards/rejected": -4.177538871765137, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.7692307692307693, | |
| "grad_norm": 3.0, | |
| "learning_rate": 4.901960784313725e-05, | |
| "logits/chosen": -0.06250445544719696, | |
| "logits/rejected": 0.1804438680410385, | |
| "logps/chosen": -110.74950408935547, | |
| "logps/rejected": -190.72528076171875, | |
| "loss": 0.0483, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.006370380520820618, | |
| "rewards/margins": 3.865217685699463, | |
| "rewards/rejected": -3.8715879917144775, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.782051282051282, | |
| "grad_norm": 1.96875, | |
| "learning_rate": 4.898692810457516e-05, | |
| "logits/chosen": -0.023088647052645683, | |
| "logits/rejected": 0.17144130170345306, | |
| "logps/chosen": -119.53297424316406, | |
| "logps/rejected": -206.44424438476562, | |
| "loss": 0.0229, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.1655292510986328, | |
| "rewards/margins": 4.698057651519775, | |
| "rewards/rejected": -4.863586902618408, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.7948717948717948, | |
| "grad_norm": 1.90625, | |
| "learning_rate": 4.895424836601307e-05, | |
| "logits/chosen": -0.2687144875526428, | |
| "logits/rejected": 0.07433108240365982, | |
| "logps/chosen": -68.940185546875, | |
| "logps/rejected": -195.2063446044922, | |
| "loss": 0.0265, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.010380644351243973, | |
| "rewards/margins": 4.6095967292785645, | |
| "rewards/rejected": -4.6199774742126465, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.8076923076923077, | |
| "grad_norm": 2.40625, | |
| "learning_rate": 4.892156862745098e-05, | |
| "logits/chosen": -0.03314891457557678, | |
| "logits/rejected": 0.18997398018836975, | |
| "logps/chosen": -107.19807434082031, | |
| "logps/rejected": -223.56234741210938, | |
| "loss": 0.0173, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.18736138939857483, | |
| "rewards/margins": 5.349735260009766, | |
| "rewards/rejected": -5.5370965003967285, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.8205128205128205, | |
| "grad_norm": 3.078125, | |
| "learning_rate": 4.888888888888889e-05, | |
| "logits/chosen": -0.1300622671842575, | |
| "logits/rejected": 0.05471666902303696, | |
| "logps/chosen": -93.76850891113281, | |
| "logps/rejected": -175.02157592773438, | |
| "loss": 0.0385, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.19924220442771912, | |
| "rewards/margins": 4.4009809494018555, | |
| "rewards/rejected": -4.600223064422607, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 4.6875, | |
| "learning_rate": 4.88562091503268e-05, | |
| "logits/chosen": -0.13419753313064575, | |
| "logits/rejected": 0.1885075569152832, | |
| "logps/chosen": -84.98932647705078, | |
| "logps/rejected": -220.42127990722656, | |
| "loss": 0.0384, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.1595815271139145, | |
| "rewards/margins": 5.886142730712891, | |
| "rewards/rejected": -6.045723915100098, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.8461538461538461, | |
| "grad_norm": 3.40625, | |
| "learning_rate": 4.882352941176471e-05, | |
| "logits/chosen": -0.07228720933198929, | |
| "logits/rejected": 0.08259041607379913, | |
| "logps/chosen": -93.90277862548828, | |
| "logps/rejected": -215.33859252929688, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.10520735383033752, | |
| "rewards/margins": 5.958078861236572, | |
| "rewards/rejected": -6.063286781311035, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.8589743589743589, | |
| "grad_norm": 1.328125, | |
| "learning_rate": 4.879084967320262e-05, | |
| "logits/chosen": -0.07955673336982727, | |
| "logits/rejected": 0.11226824671030045, | |
| "logps/chosen": -110.10163879394531, | |
| "logps/rejected": -214.9049530029297, | |
| "loss": 0.014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.27183860540390015, | |
| "rewards/margins": 5.614879131317139, | |
| "rewards/rejected": -5.886717796325684, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.8717948717948718, | |
| "grad_norm": 3.984375, | |
| "learning_rate": 4.875816993464053e-05, | |
| "logits/chosen": -0.1996242254972458, | |
| "logits/rejected": 0.08367177098989487, | |
| "logps/chosen": -79.60945892333984, | |
| "logps/rejected": -217.68849182128906, | |
| "loss": 0.0199, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.0003956109285354614, | |
| "rewards/margins": 6.300136566162109, | |
| "rewards/rejected": -6.299740791320801, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.8846153846153846, | |
| "grad_norm": 1.5390625, | |
| "learning_rate": 4.872549019607843e-05, | |
| "logits/chosen": -0.1404382586479187, | |
| "logits/rejected": 0.11748763918876648, | |
| "logps/chosen": -90.89656066894531, | |
| "logps/rejected": -223.33705139160156, | |
| "loss": 0.0136, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2140466570854187, | |
| "rewards/margins": 6.7996625900268555, | |
| "rewards/rejected": -7.013710021972656, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.8974358974358975, | |
| "grad_norm": 2.703125, | |
| "learning_rate": 4.869281045751634e-05, | |
| "logits/chosen": -0.15130311250686646, | |
| "logits/rejected": 0.07753509283065796, | |
| "logps/chosen": -72.41594696044922, | |
| "logps/rejected": -197.2650604248047, | |
| "loss": 0.0143, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.11739481985569, | |
| "rewards/margins": 6.414281368255615, | |
| "rewards/rejected": -6.296886444091797, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9102564102564102, | |
| "grad_norm": 0.99609375, | |
| "learning_rate": 4.866013071895425e-05, | |
| "logits/chosen": -0.07087592780590057, | |
| "logits/rejected": 0.15436890721321106, | |
| "logps/chosen": -103.80513763427734, | |
| "logps/rejected": -227.20993041992188, | |
| "loss": 0.0074, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2319905012845993, | |
| "rewards/margins": 7.314933776855469, | |
| "rewards/rejected": -7.546924114227295, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.9230769230769231, | |
| "grad_norm": 2.71875, | |
| "learning_rate": 4.862745098039216e-05, | |
| "logits/chosen": -0.09012198448181152, | |
| "logits/rejected": 0.14715133607387543, | |
| "logps/chosen": -93.99620056152344, | |
| "logps/rejected": -216.41314697265625, | |
| "loss": 0.0135, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.37357670068740845, | |
| "rewards/margins": 7.083094120025635, | |
| "rewards/rejected": -7.456670761108398, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.9358974358974359, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 4.8594771241830066e-05, | |
| "logits/chosen": -0.16302238404750824, | |
| "logits/rejected": 0.13868963718414307, | |
| "logps/chosen": -87.77458190917969, | |
| "logps/rejected": -237.41920471191406, | |
| "loss": 0.0032, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5054303407669067, | |
| "rewards/margins": 8.197410583496094, | |
| "rewards/rejected": -8.702839851379395, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.9487179487179487, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 4.8562091503267976e-05, | |
| "logits/chosen": -0.20026513934135437, | |
| "logits/rejected": 0.06251777708530426, | |
| "logps/chosen": -85.14456176757812, | |
| "logps/rejected": -256.8261413574219, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3448637127876282, | |
| "rewards/margins": 8.562459945678711, | |
| "rewards/rejected": -8.907323837280273, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 0.81640625, | |
| "learning_rate": 4.8529411764705885e-05, | |
| "logits/chosen": -0.17372927069664001, | |
| "logits/rejected": 0.03617147356271744, | |
| "logps/chosen": -102.04420471191406, | |
| "logps/rejected": -249.6424560546875, | |
| "loss": 0.0038, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.41514554619789124, | |
| "rewards/margins": 9.466489791870117, | |
| "rewards/rejected": -9.881635665893555, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.9743589743589743, | |
| "grad_norm": 7.0, | |
| "learning_rate": 4.8496732026143794e-05, | |
| "logits/chosen": -0.21097320318222046, | |
| "logits/rejected": 0.10003777593374252, | |
| "logps/chosen": -88.14920806884766, | |
| "logps/rejected": -265.88311767578125, | |
| "loss": 0.0277, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.21336621046066284, | |
| "rewards/margins": 9.484343528747559, | |
| "rewards/rejected": -9.697710037231445, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.9871794871794872, | |
| "grad_norm": 0.80859375, | |
| "learning_rate": 4.84640522875817e-05, | |
| "logits/chosen": -0.13699910044670105, | |
| "logits/rejected": 0.0689852312207222, | |
| "logps/chosen": -67.47610473632812, | |
| "logps/rejected": -197.6947784423828, | |
| "loss": 0.0051, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.12284618616104126, | |
| "rewards/margins": 7.643227577209473, | |
| "rewards/rejected": -7.766073703765869, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 20.0, | |
| "learning_rate": 4.843137254901961e-05, | |
| "logits/chosen": -0.1453484296798706, | |
| "logits/rejected": 0.05191052705049515, | |
| "logps/chosen": -101.6785888671875, | |
| "logps/rejected": -247.76162719726562, | |
| "loss": 0.1067, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -0.8418331742286682, | |
| "rewards/margins": 9.073858261108398, | |
| "rewards/rejected": -9.915691375732422, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 1.0128205128205128, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.839869281045752e-05, | |
| "logits/chosen": -0.08318670094013214, | |
| "logits/rejected": 0.11238709092140198, | |
| "logps/chosen": -115.86375427246094, | |
| "logps/rejected": -245.01834106445312, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.736923098564148, | |
| "rewards/margins": 9.396677017211914, | |
| "rewards/rejected": -10.133600234985352, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 1.0256410256410255, | |
| "grad_norm": 0.470703125, | |
| "learning_rate": 4.8366013071895424e-05, | |
| "logits/chosen": -0.29700779914855957, | |
| "logits/rejected": -0.07588706910610199, | |
| "logps/chosen": -117.58323669433594, | |
| "logps/rejected": -260.2636413574219, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7379497289657593, | |
| "rewards/margins": 9.926956176757812, | |
| "rewards/rejected": -10.664905548095703, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.0384615384615385, | |
| "grad_norm": 3.34375, | |
| "learning_rate": 4.8333333333333334e-05, | |
| "logits/chosen": -0.07969608902931213, | |
| "logits/rejected": 0.1311008334159851, | |
| "logps/chosen": -116.35662841796875, | |
| "logps/rejected": -275.5772399902344, | |
| "loss": 0.004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8076063394546509, | |
| "rewards/margins": 10.605157852172852, | |
| "rewards/rejected": -11.412765502929688, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 1.0512820512820513, | |
| "grad_norm": 20.375, | |
| "learning_rate": 4.830065359477124e-05, | |
| "logits/chosen": -0.1454664170742035, | |
| "logits/rejected": 0.053483135998249054, | |
| "logps/chosen": -159.20071411132812, | |
| "logps/rejected": -280.21307373046875, | |
| "loss": 0.0556, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.4751166105270386, | |
| "rewards/margins": 9.850839614868164, | |
| "rewards/rejected": -11.325956344604492, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 1.064102564102564, | |
| "grad_norm": 0.365234375, | |
| "learning_rate": 4.826797385620915e-05, | |
| "logits/chosen": -0.050416022539138794, | |
| "logits/rejected": -0.005251371301710606, | |
| "logps/chosen": -119.74546813964844, | |
| "logps/rejected": -259.7323913574219, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0453357696533203, | |
| "rewards/margins": 10.02558708190918, | |
| "rewards/rejected": -11.0709228515625, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.0769230769230769, | |
| "grad_norm": 25.875, | |
| "learning_rate": 4.823529411764706e-05, | |
| "logits/chosen": -0.13358981907367706, | |
| "logits/rejected": 0.02499028481543064, | |
| "logps/chosen": -119.50708770751953, | |
| "logps/rejected": -250.57264709472656, | |
| "loss": 0.1641, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.1635303497314453, | |
| "rewards/margins": 9.870977401733398, | |
| "rewards/rejected": -11.034507751464844, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.0897435897435896, | |
| "grad_norm": 0.251953125, | |
| "learning_rate": 4.820261437908497e-05, | |
| "logits/chosen": -0.24879048764705658, | |
| "logits/rejected": -0.004787685349583626, | |
| "logps/chosen": -71.481689453125, | |
| "logps/rejected": -256.2223815917969, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.18112725019454956, | |
| "rewards/margins": 11.132868766784668, | |
| "rewards/rejected": -11.313994407653809, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 1.1025641025641026, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 4.816993464052288e-05, | |
| "logits/chosen": -0.12416582554578781, | |
| "logits/rejected": 0.05255984887480736, | |
| "logps/chosen": -80.43519592285156, | |
| "logps/rejected": -231.00787353515625, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.578750491142273, | |
| "rewards/margins": 9.352859497070312, | |
| "rewards/rejected": -9.931610107421875, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 1.1153846153846154, | |
| "grad_norm": 0.6640625, | |
| "learning_rate": 4.813725490196079e-05, | |
| "logits/chosen": -0.15443308651447296, | |
| "logits/rejected": -0.030204713344573975, | |
| "logps/chosen": -128.3735809326172, | |
| "logps/rejected": -286.6634521484375, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4307315349578857, | |
| "rewards/margins": 10.682621955871582, | |
| "rewards/rejected": -12.113353729248047, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 1.1282051282051282, | |
| "grad_norm": 0.63671875, | |
| "learning_rate": 4.81045751633987e-05, | |
| "logits/chosen": -0.14273235201835632, | |
| "logits/rejected": 0.005313074216246605, | |
| "logps/chosen": -110.80179595947266, | |
| "logps/rejected": -264.642333984375, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9311256408691406, | |
| "rewards/margins": 10.301932334899902, | |
| "rewards/rejected": -11.233057975769043, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.141025641025641, | |
| "grad_norm": 0.10302734375, | |
| "learning_rate": 4.807189542483661e-05, | |
| "logits/chosen": -0.18348479270935059, | |
| "logits/rejected": 0.01949886418879032, | |
| "logps/chosen": -79.74383544921875, | |
| "logps/rejected": -268.1177978515625, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6667418479919434, | |
| "rewards/margins": 10.819829940795898, | |
| "rewards/rejected": -11.486571311950684, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 1.1538461538461537, | |
| "grad_norm": 0.1416015625, | |
| "learning_rate": 4.803921568627452e-05, | |
| "logits/chosen": -0.24952609837055206, | |
| "logits/rejected": -0.04354415833950043, | |
| "logps/chosen": -68.38008117675781, | |
| "logps/rejected": -237.9485626220703, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3136591911315918, | |
| "rewards/margins": 10.86776351928711, | |
| "rewards/rejected": -11.181421279907227, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1666666666666667, | |
| "grad_norm": 21.0, | |
| "learning_rate": 4.800653594771242e-05, | |
| "logits/chosen": -0.22795766592025757, | |
| "logits/rejected": -0.1173793226480484, | |
| "logps/chosen": -132.00320434570312, | |
| "logps/rejected": -298.23883056640625, | |
| "loss": 0.1621, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.5926971435546875, | |
| "rewards/margins": 11.332834243774414, | |
| "rewards/rejected": -12.925531387329102, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 1.1794871794871795, | |
| "grad_norm": 0.271484375, | |
| "learning_rate": 4.797385620915033e-05, | |
| "logits/chosen": -0.15937921404838562, | |
| "logits/rejected": -0.03522328659892082, | |
| "logps/chosen": -130.28121948242188, | |
| "logps/rejected": -279.67474365234375, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4885865449905396, | |
| "rewards/margins": 10.48618221282959, | |
| "rewards/rejected": -11.974767684936523, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.1923076923076923, | |
| "grad_norm": 29.875, | |
| "learning_rate": 4.794117647058824e-05, | |
| "logits/chosen": -0.24258048832416534, | |
| "logits/rejected": -0.004059216007590294, | |
| "logps/chosen": -92.91792297363281, | |
| "logps/rejected": -269.5362548828125, | |
| "loss": 0.0976, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -1.1794253587722778, | |
| "rewards/margins": 11.304587364196777, | |
| "rewards/rejected": -12.484012603759766, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 1.205128205128205, | |
| "grad_norm": 3.0625, | |
| "learning_rate": 4.790849673202615e-05, | |
| "logits/chosen": -0.1257822960615158, | |
| "logits/rejected": -0.00756160169839859, | |
| "logps/chosen": -121.64588928222656, | |
| "logps/rejected": -260.95367431640625, | |
| "loss": 0.0084, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.58152437210083, | |
| "rewards/margins": 9.530534744262695, | |
| "rewards/rejected": -11.112058639526367, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 1.217948717948718, | |
| "grad_norm": 0.27734375, | |
| "learning_rate": 4.7875816993464056e-05, | |
| "logits/chosen": -0.20265880227088928, | |
| "logits/rejected": 0.009628769010305405, | |
| "logps/chosen": -93.69900512695312, | |
| "logps/rejected": -253.49929809570312, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9097017049789429, | |
| "rewards/margins": 11.15369987487793, | |
| "rewards/rejected": -12.06340217590332, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 1.2307692307692308, | |
| "grad_norm": 0.06787109375, | |
| "learning_rate": 4.7843137254901966e-05, | |
| "logits/chosen": -0.2114959955215454, | |
| "logits/rejected": -0.01585013046860695, | |
| "logps/chosen": -122.66839599609375, | |
| "logps/rejected": -302.6429748535156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.17991304397583, | |
| "rewards/margins": 12.138936996459961, | |
| "rewards/rejected": -13.318851470947266, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.2435897435897436, | |
| "grad_norm": 20.125, | |
| "learning_rate": 4.7810457516339875e-05, | |
| "logits/chosen": -0.23516832292079926, | |
| "logits/rejected": -0.01460610143840313, | |
| "logps/chosen": -109.59274291992188, | |
| "logps/rejected": -277.5960693359375, | |
| "loss": 0.1316, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.4016437530517578, | |
| "rewards/margins": 11.208518981933594, | |
| "rewards/rejected": -12.610161781311035, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 1.2564102564102564, | |
| "grad_norm": 0.275390625, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "logits/chosen": -0.19789156317710876, | |
| "logits/rejected": -0.015858955681324005, | |
| "logps/chosen": -88.6310043334961, | |
| "logps/rejected": -241.5882568359375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6613929867744446, | |
| "rewards/margins": 9.982341766357422, | |
| "rewards/rejected": -10.6437349319458, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 1.2692307692307692, | |
| "grad_norm": 0.12353515625, | |
| "learning_rate": 4.774509803921569e-05, | |
| "logits/chosen": -0.24105946719646454, | |
| "logits/rejected": -0.025975240394473076, | |
| "logps/chosen": -86.48670196533203, | |
| "logps/rejected": -283.8379821777344, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6856904625892639, | |
| "rewards/margins": 11.958650588989258, | |
| "rewards/rejected": -12.644342422485352, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 0.1640625, | |
| "learning_rate": 4.77124183006536e-05, | |
| "logits/chosen": -0.2439402937889099, | |
| "logits/rejected": -0.09340156614780426, | |
| "logps/chosen": -72.89773559570312, | |
| "logps/rejected": -274.8874206542969, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5476179122924805, | |
| "rewards/margins": 12.54997730255127, | |
| "rewards/rejected": -13.097596168518066, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "eval_logits/chosen": -0.259898841381073, | |
| "eval_logits/rejected": -0.10980935394763947, | |
| "eval_logps/chosen": -110.17989349365234, | |
| "eval_logps/rejected": -272.600830078125, | |
| "eval_loss": 0.009662091732025146, | |
| "eval_rewards/accuracies": 0.995312511920929, | |
| "eval_rewards/chosen": -1.1208233833312988, | |
| "eval_rewards/margins": 11.10918140411377, | |
| "eval_rewards/rejected": -12.230003356933594, | |
| "eval_runtime": 49.3042, | |
| "eval_samples_per_second": 12.737, | |
| "eval_steps_per_second": 0.811, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.294871794871795, | |
| "grad_norm": 8.3125, | |
| "learning_rate": 4.7679738562091505e-05, | |
| "logits/chosen": -0.21803517639636993, | |
| "logits/rejected": 0.019085543230175972, | |
| "logps/chosen": -100.55758666992188, | |
| "logps/rejected": -279.3001708984375, | |
| "loss": 0.0156, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8744910955429077, | |
| "rewards/margins": 12.374544143676758, | |
| "rewards/rejected": -13.249034881591797, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 1.3076923076923077, | |
| "grad_norm": 1.390625, | |
| "learning_rate": 4.7647058823529414e-05, | |
| "logits/chosen": -0.2429550290107727, | |
| "logits/rejected": -0.06982121616601944, | |
| "logps/chosen": -122.7874755859375, | |
| "logps/rejected": -283.1558837890625, | |
| "loss": 0.0029, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3468942642211914, | |
| "rewards/margins": 11.533368110656738, | |
| "rewards/rejected": -12.88026237487793, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 1.3205128205128205, | |
| "grad_norm": 0.051025390625, | |
| "learning_rate": 4.761437908496732e-05, | |
| "logits/chosen": -0.2806547284126282, | |
| "logits/rejected": -0.0724981278181076, | |
| "logps/chosen": -82.58291625976562, | |
| "logps/rejected": -310.4030456542969, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.3749184012413025, | |
| "rewards/margins": 13.757573127746582, | |
| "rewards/rejected": -14.132492065429688, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 1.3515625, | |
| "learning_rate": 4.7581699346405226e-05, | |
| "logits/chosen": -0.25046002864837646, | |
| "logits/rejected": -0.14771123230457306, | |
| "logps/chosen": -102.93738555908203, | |
| "logps/rejected": -282.7419128417969, | |
| "loss": 0.0043, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0247480869293213, | |
| "rewards/margins": 11.310482025146484, | |
| "rewards/rejected": -12.335229873657227, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.3461538461538463, | |
| "grad_norm": 0.09521484375, | |
| "learning_rate": 4.7549019607843135e-05, | |
| "logits/chosen": -0.20055457949638367, | |
| "logits/rejected": -0.06911473721265793, | |
| "logps/chosen": -109.2905502319336, | |
| "logps/rejected": -271.8915100097656, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8089574575424194, | |
| "rewards/margins": 11.409313201904297, | |
| "rewards/rejected": -12.218271255493164, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 1.358974358974359, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 4.7516339869281045e-05, | |
| "logits/chosen": -0.19741995632648468, | |
| "logits/rejected": 0.02603726089000702, | |
| "logps/chosen": -105.29849243164062, | |
| "logps/rejected": -275.34283447265625, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9090204238891602, | |
| "rewards/margins": 11.586647987365723, | |
| "rewards/rejected": -12.495668411254883, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.3717948717948718, | |
| "grad_norm": 0.3671875, | |
| "learning_rate": 4.7483660130718954e-05, | |
| "logits/chosen": -0.2011612206697464, | |
| "logits/rejected": 0.04474819451570511, | |
| "logps/chosen": -77.16537475585938, | |
| "logps/rejected": -246.36817932128906, | |
| "loss": 0.0019, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.652474582195282, | |
| "rewards/margins": 10.86870288848877, | |
| "rewards/rejected": -11.521177291870117, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 1.3846153846153846, | |
| "grad_norm": 3.1875, | |
| "learning_rate": 4.745098039215686e-05, | |
| "logits/chosen": -0.2594239115715027, | |
| "logits/rejected": -0.1484495997428894, | |
| "logps/chosen": -142.93325805664062, | |
| "logps/rejected": -312.1004638671875, | |
| "loss": 0.0073, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3567759990692139, | |
| "rewards/margins": 11.624643325805664, | |
| "rewards/rejected": -12.98141860961914, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.3974358974358974, | |
| "grad_norm": 0.1005859375, | |
| "learning_rate": 4.741830065359477e-05, | |
| "logits/chosen": -0.20124056935310364, | |
| "logits/rejected": 0.001663937233388424, | |
| "logps/chosen": -78.78507232666016, | |
| "logps/rejected": -276.4385070800781, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6467851400375366, | |
| "rewards/margins": 12.096258163452148, | |
| "rewards/rejected": -12.743043899536133, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 1.4102564102564101, | |
| "grad_norm": 0.5, | |
| "learning_rate": 4.738562091503268e-05, | |
| "logits/chosen": -0.27084699273109436, | |
| "logits/rejected": -0.04281499981880188, | |
| "logps/chosen": -104.03623962402344, | |
| "logps/rejected": -263.74444580078125, | |
| "loss": 0.0023, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0747262239456177, | |
| "rewards/margins": 11.967494010925293, | |
| "rewards/rejected": -13.042221069335938, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4230769230769231, | |
| "grad_norm": 0.185546875, | |
| "learning_rate": 4.735294117647059e-05, | |
| "logits/chosen": -0.19713038206100464, | |
| "logits/rejected": -0.016605187207460403, | |
| "logps/chosen": -106.29576110839844, | |
| "logps/rejected": -256.5580139160156, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9818036556243896, | |
| "rewards/margins": 10.725968360900879, | |
| "rewards/rejected": -11.707772254943848, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 1.435897435897436, | |
| "grad_norm": 0.1767578125, | |
| "learning_rate": 4.73202614379085e-05, | |
| "logits/chosen": -0.24797482788562775, | |
| "logits/rejected": -0.025667553767561913, | |
| "logps/chosen": -93.77513122558594, | |
| "logps/rejected": -300.7659912109375, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8895140886306763, | |
| "rewards/margins": 12.620410919189453, | |
| "rewards/rejected": -13.50992488861084, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.4487179487179487, | |
| "grad_norm": 0.1884765625, | |
| "learning_rate": 4.728758169934641e-05, | |
| "logits/chosen": -0.1930946409702301, | |
| "logits/rejected": 0.03949951007962227, | |
| "logps/chosen": -83.20973205566406, | |
| "logps/rejected": -306.88189697265625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8088476657867432, | |
| "rewards/margins": 13.19533920288086, | |
| "rewards/rejected": -14.004186630249023, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 1.4615384615384617, | |
| "grad_norm": 0.15234375, | |
| "learning_rate": 4.725490196078431e-05, | |
| "logits/chosen": -0.25294578075408936, | |
| "logits/rejected": -0.077580027282238, | |
| "logps/chosen": -141.69619750976562, | |
| "logps/rejected": -287.53643798828125, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1367712020874023, | |
| "rewards/margins": 11.081330299377441, | |
| "rewards/rejected": -12.21810245513916, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 1.4743589743589745, | |
| "grad_norm": 3.328125, | |
| "learning_rate": 4.722222222222222e-05, | |
| "logits/chosen": -0.24814940989017487, | |
| "logits/rejected": -0.049164943397045135, | |
| "logps/chosen": -107.56108093261719, | |
| "logps/rejected": -295.115234375, | |
| "loss": 0.007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0834684371948242, | |
| "rewards/margins": 12.255739212036133, | |
| "rewards/rejected": -13.339208602905273, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 1.4871794871794872, | |
| "grad_norm": 0.490234375, | |
| "learning_rate": 4.718954248366013e-05, | |
| "logits/chosen": -0.2628023326396942, | |
| "logits/rejected": -0.11186269670724869, | |
| "logps/chosen": -116.96622467041016, | |
| "logps/rejected": -288.20849609375, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3240647315979004, | |
| "rewards/margins": 11.99062442779541, | |
| "rewards/rejected": -13.314689636230469, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "grad_norm": 1.1796875, | |
| "learning_rate": 4.715686274509804e-05, | |
| "logits/chosen": -0.20847730338573456, | |
| "logits/rejected": -0.02169419638812542, | |
| "logps/chosen": -95.97996520996094, | |
| "logps/rejected": -255.66940307617188, | |
| "loss": 0.0022, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8477219343185425, | |
| "rewards/margins": 11.209989547729492, | |
| "rewards/rejected": -12.057710647583008, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 1.5128205128205128, | |
| "grad_norm": 0.484375, | |
| "learning_rate": 4.712418300653595e-05, | |
| "logits/chosen": -0.2605594992637634, | |
| "logits/rejected": -0.08282825350761414, | |
| "logps/chosen": -114.42742919921875, | |
| "logps/rejected": -283.7960510253906, | |
| "loss": 0.0027, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3345361948013306, | |
| "rewards/margins": 11.124120712280273, | |
| "rewards/rejected": -12.458658218383789, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 1.5256410256410255, | |
| "grad_norm": 0.042236328125, | |
| "learning_rate": 4.709150326797386e-05, | |
| "logits/chosen": -0.2922889292240143, | |
| "logits/rejected": -0.1332317590713501, | |
| "logps/chosen": -90.85557556152344, | |
| "logps/rejected": -297.7986755371094, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.912261426448822, | |
| "rewards/margins": 12.832252502441406, | |
| "rewards/rejected": -13.744512557983398, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 1.5384615384615383, | |
| "grad_norm": 0.53515625, | |
| "learning_rate": 4.705882352941177e-05, | |
| "logits/chosen": -0.24172773957252502, | |
| "logits/rejected": -0.0021842457354068756, | |
| "logps/chosen": -73.25704956054688, | |
| "logps/rejected": -286.9146423339844, | |
| "loss": 0.0025, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.2910924553871155, | |
| "rewards/margins": 13.212228775024414, | |
| "rewards/rejected": -13.503320693969727, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5512820512820513, | |
| "grad_norm": 0.71484375, | |
| "learning_rate": 4.702614379084968e-05, | |
| "logits/chosen": -0.22106537222862244, | |
| "logits/rejected": -0.059269629418849945, | |
| "logps/chosen": -103.22166442871094, | |
| "logps/rejected": -258.6286926269531, | |
| "loss": 0.0028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1389858722686768, | |
| "rewards/margins": 10.450969696044922, | |
| "rewards/rejected": -11.589956283569336, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 1.564102564102564, | |
| "grad_norm": 0.5234375, | |
| "learning_rate": 4.6993464052287586e-05, | |
| "logits/chosen": -0.24546560645103455, | |
| "logits/rejected": -0.0644855722784996, | |
| "logps/chosen": -95.77403259277344, | |
| "logps/rejected": -263.54083251953125, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.937038242816925, | |
| "rewards/margins": 11.23678207397461, | |
| "rewards/rejected": -12.173819541931152, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 1.5769230769230769, | |
| "grad_norm": 2.65625, | |
| "learning_rate": 4.6960784313725495e-05, | |
| "logits/chosen": -0.17821358144283295, | |
| "logits/rejected": -0.011757217347621918, | |
| "logps/chosen": -102.22160339355469, | |
| "logps/rejected": -245.85723876953125, | |
| "loss": 0.0014, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9887152910232544, | |
| "rewards/margins": 10.024918556213379, | |
| "rewards/rejected": -11.013633728027344, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 1.5897435897435899, | |
| "grad_norm": 0.65234375, | |
| "learning_rate": 4.69281045751634e-05, | |
| "logits/chosen": -0.21225012838840485, | |
| "logits/rejected": -0.03775294870138168, | |
| "logps/chosen": -71.15055847167969, | |
| "logps/rejected": -221.15081787109375, | |
| "loss": 0.0021, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5071994662284851, | |
| "rewards/margins": 9.90135383605957, | |
| "rewards/rejected": -10.408552169799805, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 0.1650390625, | |
| "learning_rate": 4.689542483660131e-05, | |
| "logits/chosen": -0.28111523389816284, | |
| "logits/rejected": -0.10224111378192902, | |
| "logps/chosen": -99.63172912597656, | |
| "logps/rejected": -290.7098388671875, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7577407956123352, | |
| "rewards/margins": 12.49168586730957, | |
| "rewards/rejected": -13.24942684173584, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 1.6153846153846154, | |
| "grad_norm": 0.06396484375, | |
| "learning_rate": 4.6862745098039216e-05, | |
| "logits/chosen": -0.3224430978298187, | |
| "logits/rejected": -0.173972025513649, | |
| "logps/chosen": -86.10284423828125, | |
| "logps/rejected": -265.00311279296875, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8580787777900696, | |
| "rewards/margins": 11.381837844848633, | |
| "rewards/rejected": -12.239917755126953, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 1.6282051282051282, | |
| "grad_norm": 0.302734375, | |
| "learning_rate": 4.6830065359477125e-05, | |
| "logits/chosen": -0.3168780207633972, | |
| "logits/rejected": -0.14501667022705078, | |
| "logps/chosen": -110.96299743652344, | |
| "logps/rejected": -329.3466796875, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.992100715637207, | |
| "rewards/margins": 13.694648742675781, | |
| "rewards/rejected": -14.686749458312988, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 1.641025641025641, | |
| "grad_norm": 0.04443359375, | |
| "learning_rate": 4.6797385620915035e-05, | |
| "logits/chosen": -0.26439139246940613, | |
| "logits/rejected": -0.061362866312265396, | |
| "logps/chosen": -147.5707244873047, | |
| "logps/rejected": -286.7435302734375, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.227353811264038, | |
| "rewards/margins": 11.673542022705078, | |
| "rewards/rejected": -12.900895118713379, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.6538461538461537, | |
| "grad_norm": 0.69921875, | |
| "learning_rate": 4.6764705882352944e-05, | |
| "logits/chosen": -0.22864043712615967, | |
| "logits/rejected": -0.06696401536464691, | |
| "logps/chosen": -109.353515625, | |
| "logps/rejected": -316.71466064453125, | |
| "loss": 0.0027, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3228856325149536, | |
| "rewards/margins": 14.110214233398438, | |
| "rewards/rejected": -15.433099746704102, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 2.265625, | |
| "learning_rate": 4.673202614379085e-05, | |
| "logits/chosen": -0.1789395660161972, | |
| "logits/rejected": -0.1031792014837265, | |
| "logps/chosen": -143.99923706054688, | |
| "logps/rejected": -308.4597473144531, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8470748662948608, | |
| "rewards/margins": 12.27413558959961, | |
| "rewards/rejected": -14.121211051940918, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.6794871794871795, | |
| "grad_norm": 0.08984375, | |
| "learning_rate": 4.669934640522876e-05, | |
| "logits/chosen": -0.35584574937820435, | |
| "logits/rejected": -0.13938316702842712, | |
| "logps/chosen": -70.41092681884766, | |
| "logps/rejected": -308.68011474609375, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.42667657136917114, | |
| "rewards/margins": 13.290081024169922, | |
| "rewards/rejected": -13.716757774353027, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 1.6923076923076923, | |
| "grad_norm": 0.07373046875, | |
| "learning_rate": 4.666666666666667e-05, | |
| "logits/chosen": -0.3193415403366089, | |
| "logits/rejected": -0.11971499025821686, | |
| "logps/chosen": -101.42462158203125, | |
| "logps/rejected": -326.64837646484375, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8744809627532959, | |
| "rewards/margins": 14.022336959838867, | |
| "rewards/rejected": -14.896818161010742, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.7051282051282053, | |
| "grad_norm": 0.419921875, | |
| "learning_rate": 4.663398692810458e-05, | |
| "logits/chosen": -0.13937196135520935, | |
| "logits/rejected": -0.009476883336901665, | |
| "logps/chosen": -109.2764892578125, | |
| "logps/rejected": -288.0853271484375, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4989168643951416, | |
| "rewards/margins": 11.488309860229492, | |
| "rewards/rejected": -12.987225532531738, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 1.717948717948718, | |
| "grad_norm": 0.046142578125, | |
| "learning_rate": 4.660130718954249e-05, | |
| "logits/chosen": -0.2726636528968811, | |
| "logits/rejected": -0.0510869026184082, | |
| "logps/chosen": -109.837646484375, | |
| "logps/rejected": -280.8477783203125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2274763584136963, | |
| "rewards/margins": 11.558832168579102, | |
| "rewards/rejected": -12.786308288574219, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 1.7307692307692308, | |
| "grad_norm": 0.1748046875, | |
| "learning_rate": 4.656862745098039e-05, | |
| "logits/chosen": -0.2647542953491211, | |
| "logits/rejected": -0.14178255200386047, | |
| "logps/chosen": -104.56260681152344, | |
| "logps/rejected": -279.00921630859375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8853874206542969, | |
| "rewards/margins": 11.830360412597656, | |
| "rewards/rejected": -12.715747833251953, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 1.7435897435897436, | |
| "grad_norm": 0.05322265625, | |
| "learning_rate": 4.65359477124183e-05, | |
| "logits/chosen": -0.3152746558189392, | |
| "logits/rejected": -0.058999575674533844, | |
| "logps/chosen": -91.43777465820312, | |
| "logps/rejected": -273.6168518066406, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5946141481399536, | |
| "rewards/margins": 12.286003112792969, | |
| "rewards/rejected": -12.880617141723633, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.7564102564102564, | |
| "grad_norm": 0.0167236328125, | |
| "learning_rate": 4.650326797385621e-05, | |
| "logits/chosen": -0.2858930826187134, | |
| "logits/rejected": -0.058619871735572815, | |
| "logps/chosen": -81.2271499633789, | |
| "logps/rejected": -308.54339599609375, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7845070362091064, | |
| "rewards/margins": 13.5476655960083, | |
| "rewards/rejected": -14.332173347473145, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 1.7692307692307692, | |
| "grad_norm": 0.0361328125, | |
| "learning_rate": 4.647058823529412e-05, | |
| "logits/chosen": -0.2746838927268982, | |
| "logits/rejected": -0.11644043028354645, | |
| "logps/chosen": -104.08808898925781, | |
| "logps/rejected": -273.0931091308594, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2171382904052734, | |
| "rewards/margins": 11.691030502319336, | |
| "rewards/rejected": -12.90816879272461, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 1.782051282051282, | |
| "grad_norm": 0.0615234375, | |
| "learning_rate": 4.643790849673203e-05, | |
| "logits/chosen": -0.28262853622436523, | |
| "logits/rejected": -0.0382668673992157, | |
| "logps/chosen": -84.68949127197266, | |
| "logps/rejected": -324.01300048828125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.7759197950363159, | |
| "rewards/margins": 14.598560333251953, | |
| "rewards/rejected": -15.374479293823242, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 1.7948717948717947, | |
| "grad_norm": 0.0986328125, | |
| "learning_rate": 4.640522875816994e-05, | |
| "logits/chosen": -0.29283374547958374, | |
| "logits/rejected": -0.08335462212562561, | |
| "logps/chosen": -94.09033203125, | |
| "logps/rejected": -276.757568359375, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.5965969562530518, | |
| "rewards/margins": 12.221990585327148, | |
| "rewards/rejected": -12.818586349487305, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.8076923076923077, | |
| "grad_norm": 0.103515625, | |
| "learning_rate": 4.637254901960785e-05, | |
| "logits/chosen": -0.26228034496307373, | |
| "logits/rejected": -0.05300623178482056, | |
| "logps/chosen": -98.79747772216797, | |
| "logps/rejected": -288.3526611328125, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0880465507507324, | |
| "rewards/margins": 12.731582641601562, | |
| "rewards/rejected": -13.819629669189453, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 1.8205128205128205, | |
| "grad_norm": 27.75, | |
| "learning_rate": 4.633986928104576e-05, | |
| "logits/chosen": -0.27154913544654846, | |
| "logits/rejected": -0.20054474472999573, | |
| "logps/chosen": -149.24365234375, | |
| "logps/rejected": -311.93780517578125, | |
| "loss": 0.0306, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.906026840209961, | |
| "rewards/margins": 12.595197677612305, | |
| "rewards/rejected": -14.50122356414795, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 1.8333333333333335, | |
| "grad_norm": 0.369140625, | |
| "learning_rate": 4.630718954248367e-05, | |
| "logits/chosen": -0.2443104386329651, | |
| "logits/rejected": -0.02478812262415886, | |
| "logps/chosen": -89.20999145507812, | |
| "logps/rejected": -261.52899169921875, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9475430250167847, | |
| "rewards/margins": 11.407354354858398, | |
| "rewards/rejected": -12.354896545410156, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 1.8461538461538463, | |
| "grad_norm": 0.2080078125, | |
| "learning_rate": 4.6274509803921576e-05, | |
| "logits/chosen": -0.1753990799188614, | |
| "logits/rejected": -0.05313686281442642, | |
| "logps/chosen": -103.32756805419922, | |
| "logps/rejected": -288.1191101074219, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9423798322677612, | |
| "rewards/margins": 12.834511756896973, | |
| "rewards/rejected": -13.77688980102539, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.858974358974359, | |
| "grad_norm": 0.2578125, | |
| "learning_rate": 4.624183006535948e-05, | |
| "logits/chosen": -0.1979113519191742, | |
| "logits/rejected": -0.12107887864112854, | |
| "logps/chosen": -121.73201751708984, | |
| "logps/rejected": -252.05709838867188, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3529582023620605, | |
| "rewards/margins": 10.709735870361328, | |
| "rewards/rejected": -12.06269359588623, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.8717948717948718, | |
| "grad_norm": 0.2431640625, | |
| "learning_rate": 4.620915032679739e-05, | |
| "logits/chosen": -0.25694841146469116, | |
| "logits/rejected": -0.04195690155029297, | |
| "logps/chosen": -85.82433319091797, | |
| "logps/rejected": -261.2509765625, | |
| "loss": 0.0015, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0540355443954468, | |
| "rewards/margins": 12.137004852294922, | |
| "rewards/rejected": -13.191040992736816, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 1.8846153846153846, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 4.61764705882353e-05, | |
| "logits/chosen": -0.2850918173789978, | |
| "logits/rejected": -0.15326175093650818, | |
| "logps/chosen": -88.37925720214844, | |
| "logps/rejected": -293.49127197265625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6834101676940918, | |
| "rewards/margins": 13.834033966064453, | |
| "rewards/rejected": -14.517443656921387, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 1.8974358974358974, | |
| "grad_norm": 0.0556640625, | |
| "learning_rate": 4.6143790849673206e-05, | |
| "logits/chosen": -0.2825964093208313, | |
| "logits/rejected": -0.07466967403888702, | |
| "logps/chosen": -107.55108642578125, | |
| "logps/rejected": -337.55242919921875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9410918354988098, | |
| "rewards/margins": 15.14223575592041, | |
| "rewards/rejected": -16.08332633972168, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.9102564102564101, | |
| "grad_norm": 0.21484375, | |
| "learning_rate": 4.6111111111111115e-05, | |
| "logits/chosen": -0.3447108268737793, | |
| "logits/rejected": -0.22812435030937195, | |
| "logps/chosen": -122.07078552246094, | |
| "logps/rejected": -305.02166748046875, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3601295948028564, | |
| "rewards/margins": 13.087089538574219, | |
| "rewards/rejected": -14.447218894958496, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 0.1142578125, | |
| "learning_rate": 4.607843137254902e-05, | |
| "logits/chosen": -0.27915388345718384, | |
| "logits/rejected": -0.10849837213754654, | |
| "logps/chosen": -92.0287857055664, | |
| "logps/rejected": -309.43890380859375, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.157518744468689, | |
| "rewards/margins": 13.15457534790039, | |
| "rewards/rejected": -14.312093734741211, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.935897435897436, | |
| "grad_norm": 0.0179443359375, | |
| "learning_rate": 4.604575163398693e-05, | |
| "logits/chosen": -0.31482794880867004, | |
| "logits/rejected": -0.19109605252742767, | |
| "logps/chosen": -96.60372924804688, | |
| "logps/rejected": -286.2981872558594, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0948021411895752, | |
| "rewards/margins": 12.643112182617188, | |
| "rewards/rejected": -13.737914085388184, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 1.9487179487179487, | |
| "grad_norm": 0.130859375, | |
| "learning_rate": 4.6013071895424836e-05, | |
| "logits/chosen": -0.2671804130077362, | |
| "logits/rejected": -0.06821894645690918, | |
| "logps/chosen": -147.66506958007812, | |
| "logps/rejected": -301.7723388671875, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8261582851409912, | |
| "rewards/margins": 12.629762649536133, | |
| "rewards/rejected": -14.455921173095703, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.9615384615384617, | |
| "grad_norm": 0.349609375, | |
| "learning_rate": 4.5980392156862746e-05, | |
| "logits/chosen": -0.33202871680259705, | |
| "logits/rejected": -0.1599435955286026, | |
| "logps/chosen": -86.12158966064453, | |
| "logps/rejected": -284.26324462890625, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.8113446235656738, | |
| "rewards/margins": 12.993163108825684, | |
| "rewards/rejected": -13.8045072555542, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 1.9743589743589745, | |
| "grad_norm": 0.09814453125, | |
| "learning_rate": 4.5947712418300655e-05, | |
| "logits/chosen": -0.3155522048473358, | |
| "logits/rejected": -0.08728814125061035, | |
| "logps/chosen": -79.83859252929688, | |
| "logps/rejected": -278.98388671875, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.6317344903945923, | |
| "rewards/margins": 13.157981872558594, | |
| "rewards/rejected": -13.789715766906738, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 1.9871794871794872, | |
| "grad_norm": 0.44921875, | |
| "learning_rate": 4.5915032679738564e-05, | |
| "logits/chosen": -0.22629907727241516, | |
| "logits/rejected": -0.04202582314610481, | |
| "logps/chosen": -117.17872619628906, | |
| "logps/rejected": -292.93505859375, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5509653091430664, | |
| "rewards/margins": 12.848068237304688, | |
| "rewards/rejected": -14.399032592773438, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 13.0625, | |
| "learning_rate": 4.588235294117647e-05, | |
| "logits/chosen": -0.2263547033071518, | |
| "logits/rejected": -0.0545755997300148, | |
| "logps/chosen": -106.74394989013672, | |
| "logps/rejected": -248.90081787109375, | |
| "loss": 0.0467, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.7626768350601196, | |
| "rewards/margins": 10.839977264404297, | |
| "rewards/rejected": -12.602653503417969, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 2.0128205128205128, | |
| "grad_norm": 0.09716796875, | |
| "learning_rate": 4.584967320261438e-05, | |
| "logits/chosen": -0.2697691321372986, | |
| "logits/rejected": -0.06424982845783234, | |
| "logps/chosen": -103.65620422363281, | |
| "logps/rejected": -305.4455261230469, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0703643560409546, | |
| "rewards/margins": 13.297143936157227, | |
| "rewards/rejected": -14.367508888244629, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 2.0256410256410255, | |
| "grad_norm": 0.1796875, | |
| "learning_rate": 4.5816993464052285e-05, | |
| "logits/chosen": -0.2249889373779297, | |
| "logits/rejected": -0.09951350837945938, | |
| "logps/chosen": -106.23167419433594, | |
| "logps/rejected": -280.3406982421875, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1637394428253174, | |
| "rewards/margins": 11.740355491638184, | |
| "rewards/rejected": -12.904095649719238, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 2.0384615384615383, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 4.5784313725490194e-05, | |
| "logits/chosen": -0.25188344717025757, | |
| "logits/rejected": -0.07627353072166443, | |
| "logps/chosen": -107.0892562866211, | |
| "logps/rejected": -266.4984436035156, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1322087049484253, | |
| "rewards/margins": 11.575529098510742, | |
| "rewards/rejected": -12.707737922668457, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 2.051282051282051, | |
| "grad_norm": 0.017578125, | |
| "learning_rate": 4.5751633986928104e-05, | |
| "logits/chosen": -0.26001444458961487, | |
| "logits/rejected": -0.08347820490598679, | |
| "logps/chosen": -93.40831756591797, | |
| "logps/rejected": -319.4044189453125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0102758407592773, | |
| "rewards/margins": 14.32186508178711, | |
| "rewards/rejected": -15.332140922546387, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.064102564102564, | |
| "grad_norm": 0.1044921875, | |
| "learning_rate": 4.571895424836601e-05, | |
| "logits/chosen": -0.20525528490543365, | |
| "logits/rejected": -0.061775218695402145, | |
| "logps/chosen": -122.27955627441406, | |
| "logps/rejected": -298.58941650390625, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.682543158531189, | |
| "rewards/margins": 12.67213249206543, | |
| "rewards/rejected": -14.35467529296875, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 2.076923076923077, | |
| "grad_norm": 0.076171875, | |
| "learning_rate": 4.568627450980392e-05, | |
| "logits/chosen": -0.26059162616729736, | |
| "logits/rejected": -0.1604340374469757, | |
| "logps/chosen": -129.52304077148438, | |
| "logps/rejected": -311.7171630859375, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5141618251800537, | |
| "rewards/margins": 13.54619312286377, | |
| "rewards/rejected": -15.060354232788086, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 2.08974358974359, | |
| "grad_norm": 0.193359375, | |
| "learning_rate": 4.565359477124183e-05, | |
| "logits/chosen": -0.29279133677482605, | |
| "logits/rejected": -0.13754771649837494, | |
| "logps/chosen": -118.63838195800781, | |
| "logps/rejected": -246.6336669921875, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.066185474395752, | |
| "rewards/margins": 10.322509765625, | |
| "rewards/rejected": -11.38869571685791, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 2.1025641025641026, | |
| "grad_norm": 0.1494140625, | |
| "learning_rate": 4.562091503267974e-05, | |
| "logits/chosen": -0.2527160048484802, | |
| "logits/rejected": -0.11166957020759583, | |
| "logps/chosen": -94.09567260742188, | |
| "logps/rejected": -285.9129943847656, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9152126312255859, | |
| "rewards/margins": 13.175485610961914, | |
| "rewards/rejected": -14.0906982421875, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 2.1153846153846154, | |
| "grad_norm": 0.026123046875, | |
| "learning_rate": 4.558823529411765e-05, | |
| "logits/chosen": -0.3027876913547516, | |
| "logits/rejected": -0.1073901355266571, | |
| "logps/chosen": -94.38215637207031, | |
| "logps/rejected": -276.5028381347656, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.071329116821289, | |
| "rewards/margins": 12.81164836883545, | |
| "rewards/rejected": -13.882976531982422, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 2.128205128205128, | |
| "grad_norm": 0.024169921875, | |
| "learning_rate": 4.555555555555556e-05, | |
| "logits/chosen": -0.2944399118423462, | |
| "logits/rejected": -0.04996255412697792, | |
| "logps/chosen": -103.53515625, | |
| "logps/rejected": -288.7379150390625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2219610214233398, | |
| "rewards/margins": 13.849769592285156, | |
| "rewards/rejected": -15.071731567382812, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 2.141025641025641, | |
| "grad_norm": 0.0908203125, | |
| "learning_rate": 4.552287581699347e-05, | |
| "logits/chosen": -0.2894943952560425, | |
| "logits/rejected": -0.08604772388935089, | |
| "logps/chosen": -151.27647399902344, | |
| "logps/rejected": -352.01983642578125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.9637279510498047, | |
| "rewards/margins": 15.012813568115234, | |
| "rewards/rejected": -16.97654151916504, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 2.1538461538461537, | |
| "grad_norm": 0.034912109375, | |
| "learning_rate": 4.549019607843137e-05, | |
| "logits/chosen": -0.22282078862190247, | |
| "logits/rejected": -0.03683660924434662, | |
| "logps/chosen": -114.15497589111328, | |
| "logps/rejected": -316.06231689453125, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3463189601898193, | |
| "rewards/margins": 13.798446655273438, | |
| "rewards/rejected": -15.144765853881836, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 2.1666666666666665, | |
| "grad_norm": 0.49609375, | |
| "learning_rate": 4.545751633986928e-05, | |
| "logits/chosen": -0.30756574869155884, | |
| "logits/rejected": -0.18649938702583313, | |
| "logps/chosen": -107.5008544921875, | |
| "logps/rejected": -318.0722961425781, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2089110612869263, | |
| "rewards/margins": 13.918359756469727, | |
| "rewards/rejected": -15.127269744873047, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 2.1794871794871793, | |
| "grad_norm": 0.2138671875, | |
| "learning_rate": 4.542483660130719e-05, | |
| "logits/chosen": -0.2402784824371338, | |
| "logits/rejected": -0.12995833158493042, | |
| "logps/chosen": -105.42998504638672, | |
| "logps/rejected": -280.7650146484375, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6004787683486938, | |
| "rewards/margins": 12.342819213867188, | |
| "rewards/rejected": -13.94329833984375, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.1923076923076925, | |
| "grad_norm": 0.07958984375, | |
| "learning_rate": 4.53921568627451e-05, | |
| "logits/chosen": -0.3008817732334137, | |
| "logits/rejected": -0.17829753458499908, | |
| "logps/chosen": -95.71510314941406, | |
| "logps/rejected": -283.48406982421875, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1105754375457764, | |
| "rewards/margins": 12.865203857421875, | |
| "rewards/rejected": -13.975778579711914, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 2.2051282051282053, | |
| "grad_norm": 0.275390625, | |
| "learning_rate": 4.535947712418301e-05, | |
| "logits/chosen": -0.2339504063129425, | |
| "logits/rejected": -0.061104245483875275, | |
| "logps/chosen": -86.83187103271484, | |
| "logps/rejected": -270.61767578125, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2641196250915527, | |
| "rewards/margins": 12.290773391723633, | |
| "rewards/rejected": -13.554893493652344, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 2.217948717948718, | |
| "grad_norm": 0.056640625, | |
| "learning_rate": 4.532679738562092e-05, | |
| "logits/chosen": -0.30389243364334106, | |
| "logits/rejected": -0.12270551919937134, | |
| "logps/chosen": -102.2293701171875, | |
| "logps/rejected": -304.9759521484375, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2542763948440552, | |
| "rewards/margins": 13.360130310058594, | |
| "rewards/rejected": -14.614407539367676, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 2.230769230769231, | |
| "grad_norm": 0.09326171875, | |
| "learning_rate": 4.5294117647058826e-05, | |
| "logits/chosen": -0.2497141808271408, | |
| "logits/rejected": -0.06457696110010147, | |
| "logps/chosen": -100.6624984741211, | |
| "logps/rejected": -329.494384765625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.264633059501648, | |
| "rewards/margins": 15.253053665161133, | |
| "rewards/rejected": -16.51768684387207, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 2.2435897435897436, | |
| "grad_norm": 0.05224609375, | |
| "learning_rate": 4.5261437908496736e-05, | |
| "logits/chosen": -0.3165115714073181, | |
| "logits/rejected": -0.13756012916564941, | |
| "logps/chosen": -105.48709869384766, | |
| "logps/rejected": -304.1381530761719, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1276494264602661, | |
| "rewards/margins": 13.234853744506836, | |
| "rewards/rejected": -14.362503051757812, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 2.2564102564102564, | |
| "grad_norm": 0.1826171875, | |
| "learning_rate": 4.5228758169934645e-05, | |
| "logits/chosen": -0.24985185265541077, | |
| "logits/rejected": -0.12730102241039276, | |
| "logps/chosen": -107.34295654296875, | |
| "logps/rejected": -298.46728515625, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5559189319610596, | |
| "rewards/margins": 12.924419403076172, | |
| "rewards/rejected": -14.480337142944336, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.269230769230769, | |
| "grad_norm": 0.1328125, | |
| "learning_rate": 4.5196078431372554e-05, | |
| "logits/chosen": -0.20896370708942413, | |
| "logits/rejected": -0.023086171597242355, | |
| "logps/chosen": -89.58634948730469, | |
| "logps/rejected": -261.9871520996094, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9289363622665405, | |
| "rewards/margins": 11.460927963256836, | |
| "rewards/rejected": -12.389863967895508, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 2.282051282051282, | |
| "grad_norm": 0.197265625, | |
| "learning_rate": 4.516339869281046e-05, | |
| "logits/chosen": -0.30590391159057617, | |
| "logits/rejected": -0.12203465402126312, | |
| "logps/chosen": -119.63058471679688, | |
| "logps/rejected": -281.2730712890625, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1193912029266357, | |
| "rewards/margins": 12.338713645935059, | |
| "rewards/rejected": -13.458105087280273, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 2.2948717948717947, | |
| "grad_norm": 0.1689453125, | |
| "learning_rate": 4.5130718954248366e-05, | |
| "logits/chosen": -0.2841281294822693, | |
| "logits/rejected": -0.11347918957471848, | |
| "logps/chosen": -93.43059539794922, | |
| "logps/rejected": -288.3245544433594, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.043287754058838, | |
| "rewards/margins": 12.979776382446289, | |
| "rewards/rejected": -14.023063659667969, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 2.3076923076923075, | |
| "grad_norm": 13.875, | |
| "learning_rate": 4.5098039215686275e-05, | |
| "logits/chosen": -0.325821191072464, | |
| "logits/rejected": -0.1742836833000183, | |
| "logps/chosen": -102.51724243164062, | |
| "logps/rejected": -273.74578857421875, | |
| "loss": 0.0436, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": -1.4758192300796509, | |
| "rewards/margins": 12.041901588439941, | |
| "rewards/rejected": -13.517720222473145, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.3205128205128207, | |
| "grad_norm": 0.8203125, | |
| "learning_rate": 4.5065359477124184e-05, | |
| "logits/chosen": -0.2550922632217407, | |
| "logits/rejected": -0.15083283185958862, | |
| "logps/chosen": -109.05842590332031, | |
| "logps/rejected": -291.51251220703125, | |
| "loss": 0.0011, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.559402346611023, | |
| "rewards/margins": 13.180217742919922, | |
| "rewards/rejected": -14.739620208740234, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 0.08544921875, | |
| "learning_rate": 4.5032679738562094e-05, | |
| "logits/chosen": -0.3414364159107208, | |
| "logits/rejected": -0.14673462510108948, | |
| "logps/chosen": -101.14544677734375, | |
| "logps/rejected": -284.2122802734375, | |
| "loss": 0.0007, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3198997974395752, | |
| "rewards/margins": 12.26275634765625, | |
| "rewards/rejected": -13.582656860351562, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 2.3461538461538463, | |
| "grad_norm": 0.73828125, | |
| "learning_rate": 4.5e-05, | |
| "logits/chosen": -0.2925676703453064, | |
| "logits/rejected": -0.1411311775445938, | |
| "logps/chosen": -96.9495849609375, | |
| "logps/rejected": -257.125732421875, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.0567059516906738, | |
| "rewards/margins": 11.797374725341797, | |
| "rewards/rejected": -12.854080200195312, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 2.358974358974359, | |
| "grad_norm": 0.087890625, | |
| "learning_rate": 4.496732026143791e-05, | |
| "logits/chosen": -0.36833012104034424, | |
| "logits/rejected": -0.20165221393108368, | |
| "logps/chosen": -130.09317016601562, | |
| "logps/rejected": -359.2932434082031, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.699230670928955, | |
| "rewards/margins": 15.806741714477539, | |
| "rewards/rejected": -17.50597381591797, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.371794871794872, | |
| "grad_norm": 0.2197265625, | |
| "learning_rate": 4.493464052287582e-05, | |
| "logits/chosen": -0.285861611366272, | |
| "logits/rejected": -0.08664289116859436, | |
| "logps/chosen": -107.8544921875, | |
| "logps/rejected": -354.9894714355469, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.188364863395691, | |
| "rewards/margins": 16.293102264404297, | |
| "rewards/rejected": -17.481468200683594, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 2.3846153846153846, | |
| "grad_norm": 0.330078125, | |
| "learning_rate": 4.490196078431373e-05, | |
| "logits/chosen": -0.3141424059867859, | |
| "logits/rejected": -0.09589091688394547, | |
| "logps/chosen": -106.24095153808594, | |
| "logps/rejected": -327.93975830078125, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1157587766647339, | |
| "rewards/margins": 15.024555206298828, | |
| "rewards/rejected": -16.14031219482422, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 2.3974358974358974, | |
| "grad_norm": 0.03173828125, | |
| "learning_rate": 4.486928104575164e-05, | |
| "logits/chosen": -0.2788226902484894, | |
| "logits/rejected": -0.12082622200250626, | |
| "logps/chosen": -125.92074584960938, | |
| "logps/rejected": -346.5249938964844, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.501129150390625, | |
| "rewards/margins": 14.815469741821289, | |
| "rewards/rejected": -16.316598892211914, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 2.41025641025641, | |
| "grad_norm": 2.140625, | |
| "learning_rate": 4.483660130718955e-05, | |
| "logits/chosen": -0.30329498648643494, | |
| "logits/rejected": -0.09551708400249481, | |
| "logps/chosen": -104.24821472167969, | |
| "logps/rejected": -320.36126708984375, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.5797302722930908, | |
| "rewards/margins": 13.738924980163574, | |
| "rewards/rejected": -15.318655967712402, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.423076923076923, | |
| "grad_norm": 0.201171875, | |
| "learning_rate": 4.480392156862745e-05, | |
| "logits/chosen": -0.32630714774131775, | |
| "logits/rejected": -0.1403510421514511, | |
| "logps/chosen": -117.85655212402344, | |
| "logps/rejected": -309.023193359375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2067795991897583, | |
| "rewards/margins": 14.257521629333496, | |
| "rewards/rejected": -15.464301109313965, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 2.435897435897436, | |
| "grad_norm": 0.05859375, | |
| "learning_rate": 4.477124183006536e-05, | |
| "logits/chosen": -0.2662752568721771, | |
| "logits/rejected": -0.13621115684509277, | |
| "logps/chosen": -141.50767517089844, | |
| "logps/rejected": -310.569580078125, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6395208835601807, | |
| "rewards/margins": 13.533281326293945, | |
| "rewards/rejected": -15.172801971435547, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.448717948717949, | |
| "grad_norm": 0.01806640625, | |
| "learning_rate": 4.473856209150327e-05, | |
| "logits/chosen": -0.26498672366142273, | |
| "logits/rejected": -0.0718044638633728, | |
| "logps/chosen": -76.05623626708984, | |
| "logps/rejected": -286.27020263671875, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.072414755821228, | |
| "rewards/margins": 13.859857559204102, | |
| "rewards/rejected": -14.932271957397461, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 2.4615384615384617, | |
| "grad_norm": 0.109375, | |
| "learning_rate": 4.470588235294118e-05, | |
| "logits/chosen": -0.26316002011299133, | |
| "logits/rejected": -0.07263979315757751, | |
| "logps/chosen": -115.10618591308594, | |
| "logps/rejected": -282.0235595703125, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -0.9896156787872314, | |
| "rewards/margins": 12.763177871704102, | |
| "rewards/rejected": -13.75279426574707, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.4743589743589745, | |
| "grad_norm": 0.05078125, | |
| "learning_rate": 4.467320261437909e-05, | |
| "logits/chosen": -0.2852447032928467, | |
| "logits/rejected": -0.11667799949645996, | |
| "logps/chosen": -88.82963562011719, | |
| "logps/rejected": -298.4388732910156, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.1287240982055664, | |
| "rewards/margins": 13.590049743652344, | |
| "rewards/rejected": -14.718772888183594, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 2.4871794871794872, | |
| "grad_norm": 0.70703125, | |
| "learning_rate": 4.4640522875817e-05, | |
| "logits/chosen": -0.2135988473892212, | |
| "logits/rejected": -0.10373395681381226, | |
| "logps/chosen": -127.32122039794922, | |
| "logps/rejected": -334.4412536621094, | |
| "loss": 0.0008, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.8167392015457153, | |
| "rewards/margins": 15.495786666870117, | |
| "rewards/rejected": -17.31252670288086, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "grad_norm": 0.1669921875, | |
| "learning_rate": 4.460784313725491e-05, | |
| "logits/chosen": -0.23329344391822815, | |
| "logits/rejected": -0.0719284862279892, | |
| "logps/chosen": -110.10687255859375, | |
| "logps/rejected": -300.25189208984375, | |
| "loss": 0.0005, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.2819175720214844, | |
| "rewards/margins": 13.15202808380127, | |
| "rewards/rejected": -14.43394660949707, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 2.5128205128205128, | |
| "grad_norm": 0.19921875, | |
| "learning_rate": 4.4575163398692816e-05, | |
| "logits/chosen": -0.21618735790252686, | |
| "logits/rejected": -0.03836328908801079, | |
| "logps/chosen": -96.79692077636719, | |
| "logps/rejected": -260.9601135253906, | |
| "loss": 0.0009, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4426820278167725, | |
| "rewards/margins": 11.765287399291992, | |
| "rewards/rejected": -13.207969665527344, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.5256410256410255, | |
| "grad_norm": 0.03515625, | |
| "learning_rate": 4.4542483660130726e-05, | |
| "logits/chosen": -0.2685829997062683, | |
| "logits/rejected": -0.0963975191116333, | |
| "logps/chosen": -93.97406005859375, | |
| "logps/rejected": -292.0045166015625, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.4119703769683838, | |
| "rewards/margins": 13.41724967956543, | |
| "rewards/rejected": -14.82922077178955, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 2.5384615384615383, | |
| "grad_norm": 0.099609375, | |
| "learning_rate": 4.450980392156863e-05, | |
| "logits/chosen": -0.33342769742012024, | |
| "logits/rejected": -0.16594503819942474, | |
| "logps/chosen": -106.32969665527344, | |
| "logps/rejected": -270.3590393066406, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.3807129859924316, | |
| "rewards/margins": 12.118289947509766, | |
| "rewards/rejected": -13.499002456665039, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 2.551282051282051, | |
| "grad_norm": 0.3203125, | |
| "learning_rate": 4.447712418300654e-05, | |
| "logits/chosen": -0.2735711336135864, | |
| "logits/rejected": -0.11022089421749115, | |
| "logps/chosen": -107.94451904296875, | |
| "logps/rejected": -298.1263427734375, | |
| "loss": 0.0006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.7573144435882568, | |
| "rewards/margins": 13.403427124023438, | |
| "rewards/rejected": -15.160740852355957, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 0.04248046875, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "logits/chosen": -0.31434327363967896, | |
| "logits/rejected": -0.16673806309700012, | |
| "logps/chosen": -110.20227813720703, | |
| "logps/rejected": -310.43817138671875, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.284892201423645, | |
| "rewards/margins": 13.95626449584961, | |
| "rewards/rejected": -15.241157531738281, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "eval_logits/chosen": -0.3091069757938385, | |
| "eval_logits/rejected": -0.17900259792804718, | |
| "eval_logps/chosen": -115.3447265625, | |
| "eval_logps/rejected": -298.21331787109375, | |
| "eval_loss": 0.006633765529841185, | |
| "eval_rewards/accuracies": 0.995312511920929, | |
| "eval_rewards/chosen": -1.6373059749603271, | |
| "eval_rewards/margins": 13.153945922851562, | |
| "eval_rewards/rejected": -14.791254043579102, | |
| "eval_runtime": 49.0619, | |
| "eval_samples_per_second": 12.8, | |
| "eval_steps_per_second": 0.815, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1560, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 200, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |