| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 294, | |
| "global_step": 2931, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01023541453428864, | |
| "grad_norm": 43.49986783253336, | |
| "kl": 2.608715057373047, | |
| "learning_rate": 3.0612244897959183e-08, | |
| "logits/chosen": -111473206.85714285, | |
| "logits/rejected": -94626835.6923077, | |
| "logps/chosen": -652.2265625, | |
| "logps/rejected": -566.5637394831731, | |
| "loss": 0.4991, | |
| "rewards/chosen": -0.04828752790178571, | |
| "rewards/margins": -0.045037640811322804, | |
| "rewards/rejected": -0.003249887090462905, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02047082906857728, | |
| "grad_norm": 45.377383375399816, | |
| "kl": 2.1035537719726562, | |
| "learning_rate": 6.462585034013606e-08, | |
| "logits/chosen": -83623772.44444445, | |
| "logits/rejected": -98599098.18181819, | |
| "logps/chosen": -535.5240885416666, | |
| "logps/rejected": -588.3898703835227, | |
| "loss": 0.5002, | |
| "rewards/chosen": -0.012379965848392911, | |
| "rewards/margins": -0.00647647934730607, | |
| "rewards/rejected": -0.005903486501086842, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.030706243602865915, | |
| "grad_norm": 47.6297142496506, | |
| "kl": 2.7048912048339844, | |
| "learning_rate": 9.863945578231292e-08, | |
| "logits/chosen": -84807603.2, | |
| "logits/rejected": -71051987.2, | |
| "logps/chosen": -515.9376953125, | |
| "logps/rejected": -514.024072265625, | |
| "loss": 0.4975, | |
| "rewards/chosen": -0.024640909830729165, | |
| "rewards/margins": -0.002495519320170083, | |
| "rewards/rejected": -0.022145390510559082, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.04094165813715456, | |
| "grad_norm": 36.8985081490464, | |
| "kl": 5.254791259765625, | |
| "learning_rate": 1.326530612244898e-07, | |
| "logits/chosen": -85094592.0, | |
| "logits/rejected": -87958234.66666667, | |
| "logps/chosen": -537.9602748325893, | |
| "logps/rejected": -530.6398111979166, | |
| "loss": 0.5002, | |
| "rewards/chosen": 0.024391608578818186, | |
| "rewards/margins": 0.03762454007353101, | |
| "rewards/rejected": -0.01323293149471283, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0511770726714432, | |
| "grad_norm": 48.90887685320883, | |
| "kl": 5.289024353027344, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -78688878.54545455, | |
| "logits/rejected": -93538062.22222222, | |
| "logps/chosen": -549.6178089488636, | |
| "logps/rejected": -589.7821180555555, | |
| "loss": 0.5033, | |
| "rewards/chosen": 0.023246071555397728, | |
| "rewards/margins": -0.07650775198984627, | |
| "rewards/rejected": 0.09975382354524401, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06141248720573183, | |
| "grad_norm": 36.308193875487795, | |
| "kl": 12.417892456054688, | |
| "learning_rate": 2.0068027210884352e-07, | |
| "logits/chosen": -83240362.66666667, | |
| "logits/rejected": -82516282.18181819, | |
| "logps/chosen": -506.6652018229167, | |
| "logps/rejected": -571.6751598011364, | |
| "loss": 0.5017, | |
| "rewards/chosen": 0.02451240022977193, | |
| "rewards/margins": -0.017813264420538242, | |
| "rewards/rejected": 0.04232566465031017, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.07164790174002048, | |
| "grad_norm": 40.69660777082527, | |
| "kl": 13.569828033447266, | |
| "learning_rate": 2.346938775510204e-07, | |
| "logits/chosen": -89204087.46666667, | |
| "logits/rejected": -104327052.8, | |
| "logps/chosen": -570.499609375, | |
| "logps/rejected": -574.176953125, | |
| "loss": 0.491, | |
| "rewards/chosen": 0.10849486986796061, | |
| "rewards/margins": 0.2052929679552714, | |
| "rewards/rejected": -0.0967980980873108, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.08188331627430911, | |
| "grad_norm": 36.95157131931197, | |
| "kl": 23.946407318115234, | |
| "learning_rate": 2.6870748299319727e-07, | |
| "logits/chosen": -109718818.13333334, | |
| "logits/rejected": -87821907.2, | |
| "logps/chosen": -617.5989583333334, | |
| "logps/rejected": -588.38564453125, | |
| "loss": 0.4978, | |
| "rewards/chosen": 0.1503196080525716, | |
| "rewards/margins": -0.09428855578104656, | |
| "rewards/rejected": 0.24460816383361816, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.09211873080859775, | |
| "grad_norm": 46.4163800004006, | |
| "kl": 62.34700012207031, | |
| "learning_rate": 3.027210884353741e-07, | |
| "logits/chosen": -103054848.0, | |
| "logits/rejected": -76245773.71428572, | |
| "logps/chosen": -603.8435246394231, | |
| "logps/rejected": -516.7438616071429, | |
| "loss": 0.4851, | |
| "rewards/chosen": 0.3449526566725511, | |
| "rewards/margins": -0.10476050534091152, | |
| "rewards/rejected": 0.4497131620134626, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1023541453428864, | |
| "grad_norm": 40.60547971708711, | |
| "kl": 119.17515563964844, | |
| "learning_rate": 3.3673469387755096e-07, | |
| "logits/chosen": -95468101.81818181, | |
| "logits/rejected": -94606791.1111111, | |
| "logps/chosen": -601.3514293323864, | |
| "logps/rejected": -620.5993381076389, | |
| "loss": 0.4698, | |
| "rewards/chosen": 0.7123452099886808, | |
| "rewards/margins": 0.039795061554571576, | |
| "rewards/rejected": 0.6725501484341092, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.11258955987717502, | |
| "grad_norm": 35.710378804036566, | |
| "kl": 290.5523681640625, | |
| "learning_rate": 3.707482993197279e-07, | |
| "logits/chosen": -108210918.4, | |
| "logits/rejected": -91392499.2, | |
| "logps/chosen": -645.35595703125, | |
| "logps/rejected": -562.3923828125, | |
| "loss": 0.4728, | |
| "rewards/chosen": 1.9680000305175782, | |
| "rewards/margins": 0.7570903778076172, | |
| "rewards/rejected": 1.210909652709961, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.12282497441146366, | |
| "grad_norm": 35.17890953445531, | |
| "kl": 451.8232421875, | |
| "learning_rate": 4.0476190476190476e-07, | |
| "logits/chosen": -88438312.0, | |
| "logits/rejected": -109754261.33333333, | |
| "logps/chosen": -518.6763305664062, | |
| "logps/rejected": -589.44970703125, | |
| "loss": 0.466, | |
| "rewards/chosen": 2.5440354347229004, | |
| "rewards/margins": 0.2261904080708823, | |
| "rewards/rejected": 2.317845026652018, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.1330603889457523, | |
| "grad_norm": 31.330141726965156, | |
| "kl": 530.52392578125, | |
| "learning_rate": 4.387755102040816e-07, | |
| "logits/chosen": -100719965.0909091, | |
| "logits/rejected": -100037198.22222222, | |
| "logps/chosen": -594.1492365056819, | |
| "logps/rejected": -589.1796875, | |
| "loss": 0.4351, | |
| "rewards/chosen": 3.291921788995916, | |
| "rewards/margins": 0.4270220477171618, | |
| "rewards/rejected": 2.8648997412787542, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.14329580348004095, | |
| "grad_norm": 23.86281232214143, | |
| "kl": 658.0179443359375, | |
| "learning_rate": 4.727891156462585e-07, | |
| "logits/chosen": -115303606.85714285, | |
| "logits/rejected": -109218453.33333333, | |
| "logps/chosen": -617.2470703125, | |
| "logps/rejected": -546.180908203125, | |
| "loss": 0.4174, | |
| "rewards/chosen": 3.440274919782366, | |
| "rewards/margins": 0.4147660391671315, | |
| "rewards/rejected": 3.0255088806152344, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.1535312180143296, | |
| "grad_norm": 29.55019035252469, | |
| "kl": 849.1378784179688, | |
| "learning_rate": 5.068027210884354e-07, | |
| "logits/chosen": -104089250.9090909, | |
| "logits/rejected": -112913493.33333333, | |
| "logps/chosen": -506.69753196022725, | |
| "logps/rejected": -624.7750651041666, | |
| "loss": 0.4663, | |
| "rewards/chosen": 3.6730131669477983, | |
| "rewards/margins": -0.8608655833234686, | |
| "rewards/rejected": 4.533878750271267, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.16376663254861823, | |
| "grad_norm": 24.964919672196842, | |
| "kl": 875.5402221679688, | |
| "learning_rate": 5.408163265306123e-07, | |
| "logits/chosen": -109994830.76923077, | |
| "logits/rejected": -111798601.14285715, | |
| "logps/chosen": -581.7682542067307, | |
| "logps/rejected": -567.9554268973214, | |
| "loss": 0.4383, | |
| "rewards/chosen": 5.3036322960486775, | |
| "rewards/margins": 1.3674162141569366, | |
| "rewards/rejected": 3.936216081891741, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.17400204708290687, | |
| "grad_norm": 26.396407490554235, | |
| "kl": 900.7223510742188, | |
| "learning_rate": 5.748299319727891e-07, | |
| "logits/chosen": -125637876.36363636, | |
| "logits/rejected": -111796778.66666667, | |
| "logps/chosen": -707.3591974431819, | |
| "logps/rejected": -564.7646484375, | |
| "loss": 0.4225, | |
| "rewards/chosen": 6.323025790127841, | |
| "rewards/margins": 1.465928164395419, | |
| "rewards/rejected": 4.857097625732422, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1842374616171955, | |
| "grad_norm": 26.688093990242596, | |
| "kl": 968.2679443359375, | |
| "learning_rate": 6.08843537414966e-07, | |
| "logits/chosen": -106460544.0, | |
| "logits/rejected": -99920104.72727273, | |
| "logps/chosen": -548.9823133680555, | |
| "logps/rejected": -486.6614435369318, | |
| "loss": 0.3959, | |
| "rewards/chosen": 5.817420111762153, | |
| "rewards/margins": 2.0305458608299793, | |
| "rewards/rejected": 3.7868742509321733, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.19447287615148415, | |
| "grad_norm": 27.514737576204514, | |
| "kl": 1100.53125, | |
| "learning_rate": 6.428571428571429e-07, | |
| "logits/chosen": -104961415.1111111, | |
| "logits/rejected": -99523467.63636364, | |
| "logps/chosen": -518.9440104166666, | |
| "logps/rejected": -469.3260387073864, | |
| "loss": 0.4233, | |
| "rewards/chosen": 6.698972066243489, | |
| "rewards/margins": 0.8934900688402578, | |
| "rewards/rejected": 5.8054819974032315, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.2047082906857728, | |
| "grad_norm": 28.306342866049548, | |
| "kl": 1226.0400390625, | |
| "learning_rate": 6.768707482993196e-07, | |
| "logits/chosen": -98903750.4, | |
| "logits/rejected": -109158988.8, | |
| "logps/chosen": -489.16083984375, | |
| "logps/rejected": -544.397265625, | |
| "loss": 0.3861, | |
| "rewards/chosen": 7.531886291503906, | |
| "rewards/margins": 1.526143264770508, | |
| "rewards/rejected": 6.005743026733398, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.21494370522006143, | |
| "grad_norm": 22.693614623739084, | |
| "kl": 1270.7314453125, | |
| "learning_rate": 7.108843537414966e-07, | |
| "logits/chosen": -113987630.54545455, | |
| "logits/rejected": -114975089.77777778, | |
| "logps/chosen": -541.0110973011364, | |
| "logps/rejected": -538.138671875, | |
| "loss": 0.4139, | |
| "rewards/chosen": 7.440266002308238, | |
| "rewards/margins": -0.12881392661971347, | |
| "rewards/rejected": 7.569079928927952, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.22517911975435004, | |
| "grad_norm": 23.947122487878186, | |
| "kl": 1415.864013671875, | |
| "learning_rate": 7.448979591836734e-07, | |
| "logits/chosen": -96022882.9090909, | |
| "logits/rejected": -130673493.33333333, | |
| "logps/chosen": -429.89302201704544, | |
| "logps/rejected": -592.8015407986111, | |
| "loss": 0.3978, | |
| "rewards/chosen": 7.7172019264914775, | |
| "rewards/margins": 2.46611855246804, | |
| "rewards/rejected": 5.2510833740234375, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.23541453428863868, | |
| "grad_norm": 24.063075761950447, | |
| "kl": 1236.5908203125, | |
| "learning_rate": 7.789115646258503e-07, | |
| "logits/chosen": -111414570.66666667, | |
| "logits/rejected": -93740104.0, | |
| "logps/chosen": -510.4249674479167, | |
| "logps/rejected": -424.00146484375, | |
| "loss": 0.4325, | |
| "rewards/chosen": 7.313229878743489, | |
| "rewards/margins": 1.4464839299519854, | |
| "rewards/rejected": 5.866745948791504, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.24564994882292732, | |
| "grad_norm": 20.088674283050615, | |
| "kl": 1163.108154296875, | |
| "learning_rate": 8.129251700680271e-07, | |
| "logits/chosen": -109586229.33333333, | |
| "logits/rejected": -133773656.0, | |
| "logps/chosen": -519.625244140625, | |
| "logps/rejected": -560.62890625, | |
| "loss": 0.3586, | |
| "rewards/chosen": 6.922650655110677, | |
| "rewards/margins": 2.392770131429036, | |
| "rewards/rejected": 4.529880523681641, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.25588536335721596, | |
| "grad_norm": 25.835106855876596, | |
| "kl": 1395.358154296875, | |
| "learning_rate": 8.469387755102041e-07, | |
| "logits/chosen": -112743603.2, | |
| "logits/rejected": -99526758.4, | |
| "logps/chosen": -546.212255859375, | |
| "logps/rejected": -472.45087890625, | |
| "loss": 0.3533, | |
| "rewards/chosen": 8.94365997314453, | |
| "rewards/margins": 3.0286102294921866, | |
| "rewards/rejected": 5.915049743652344, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.2661207778915046, | |
| "grad_norm": 28.67885947302261, | |
| "kl": 1351.502197265625, | |
| "learning_rate": 8.809523809523809e-07, | |
| "logits/chosen": -123957152.0, | |
| "logits/rejected": -107065344.0, | |
| "logps/chosen": -517.8416748046875, | |
| "logps/rejected": -519.6959635416666, | |
| "loss": 0.4139, | |
| "rewards/chosen": 8.116106033325195, | |
| "rewards/margins": 0.3912080128987627, | |
| "rewards/rejected": 7.724898020426433, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.27635619242579323, | |
| "grad_norm": 23.569658518425406, | |
| "kl": 1269.64306640625, | |
| "learning_rate": 9.149659863945578e-07, | |
| "logits/chosen": -133106278.4, | |
| "logits/rejected": -113169587.2, | |
| "logps/chosen": -585.38896484375, | |
| "logps/rejected": -489.866357421875, | |
| "loss": 0.3802, | |
| "rewards/chosen": 8.878982543945312, | |
| "rewards/margins": 1.278940582275391, | |
| "rewards/rejected": 7.6000419616699215, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.2865916069600819, | |
| "grad_norm": 24.265479157836495, | |
| "kl": 877.0639038085938, | |
| "learning_rate": 9.489795918367347e-07, | |
| "logits/chosen": -113836704.0, | |
| "logits/rejected": -111877589.33333333, | |
| "logps/chosen": -503.03131103515625, | |
| "logps/rejected": -492.7080891927083, | |
| "loss": 0.3226, | |
| "rewards/chosen": 6.913208484649658, | |
| "rewards/margins": 3.9872498512268066, | |
| "rewards/rejected": 2.9259586334228516, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.2968270214943705, | |
| "grad_norm": 33.06706771664489, | |
| "kl": 492.48126220703125, | |
| "learning_rate": 9.829931972789116e-07, | |
| "logits/chosen": -122592484.57142857, | |
| "logits/rejected": -120002619.07692307, | |
| "logps/chosen": -527.830322265625, | |
| "logps/rejected": -702.9601862980769, | |
| "loss": 0.3762, | |
| "rewards/chosen": 5.435535975864956, | |
| "rewards/margins": 7.473937066046746, | |
| "rewards/rejected": -2.038401090181791, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.300921187308086, | |
| "eval_logits/chosen": -110518608.0, | |
| "eval_logits/rejected": -102539296.0, | |
| "eval_logps/chosen": -475.5417785644531, | |
| "eval_logps/rejected": -533.520751953125, | |
| "eval_loss": 0.5446844696998596, | |
| "eval_rewards/chosen": 2.4218292236328125, | |
| "eval_rewards/margins": -0.5885589122772217, | |
| "eval_rewards/rejected": 3.010388135910034, | |
| "eval_runtime": 2.6473, | |
| "eval_samples_per_second": 3.777, | |
| "eval_steps_per_second": 0.755, | |
| "kl": 0.0, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.3070624360286592, | |
| "grad_norm": 18.659418545479838, | |
| "kl": 6.161457061767578, | |
| "learning_rate": 9.999911292933214e-07, | |
| "logits/chosen": -106471222.85714285, | |
| "logits/rejected": -111757961.84615384, | |
| "logps/chosen": -547.7110421316964, | |
| "logps/rejected": -634.5023287259615, | |
| "loss": 0.447, | |
| "rewards/chosen": 0.06852419035775321, | |
| "rewards/margins": 2.988240658581912, | |
| "rewards/rejected": -2.9197164682241588, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.3172978505629478, | |
| "grad_norm": 18.96738462683121, | |
| "kl": 300.7326965332031, | |
| "learning_rate": 9.999201655284278e-07, | |
| "logits/chosen": -110882067.6923077, | |
| "logits/rejected": -105352630.85714285, | |
| "logps/chosen": -500.43941556490387, | |
| "logps/rejected": -635.9861886160714, | |
| "loss": 0.3072, | |
| "rewards/chosen": 3.4353617154634914, | |
| "rewards/margins": 6.811538549569937, | |
| "rewards/rejected": -3.3761768341064453, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.32753326509723646, | |
| "grad_norm": 11.374422004887663, | |
| "kl": 203.40391540527344, | |
| "learning_rate": 9.99778248070531e-07, | |
| "logits/chosen": -131503383.27272727, | |
| "logits/rejected": -111262620.44444445, | |
| "logps/chosen": -657.5584161931819, | |
| "logps/rejected": -765.3725043402778, | |
| "loss": 0.3116, | |
| "rewards/chosen": 3.251959367231889, | |
| "rewards/margins": 18.295085791385535, | |
| "rewards/rejected": -15.043126424153646, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.33776867963152507, | |
| "grad_norm": 8.880038655785679, | |
| "kl": 295.427490234375, | |
| "learning_rate": 9.995653970619826e-07, | |
| "logits/chosen": -106187031.27272727, | |
| "logits/rejected": -103581155.55555555, | |
| "logps/chosen": -590.5241477272727, | |
| "logps/rejected": -587.0112847222222, | |
| "loss": 0.3363, | |
| "rewards/chosen": 3.493159207430753, | |
| "rewards/margins": 6.228314948804451, | |
| "rewards/rejected": -2.7351557413736978, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.34800409416581374, | |
| "grad_norm": 5.40509067733252, | |
| "kl": 468.18304443359375, | |
| "learning_rate": 9.992816427127367e-07, | |
| "logits/chosen": -99738496.0, | |
| "logits/rejected": -99705779.2, | |
| "logps/chosen": -506.1044921875, | |
| "logps/rejected": -543.95732421875, | |
| "loss": 0.3102, | |
| "rewards/chosen": 5.911396789550781, | |
| "rewards/margins": 5.55134220123291, | |
| "rewards/rejected": 0.36005458831787107, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.35823950870010235, | |
| "grad_norm": 12.913253729889696, | |
| "kl": 725.096923828125, | |
| "learning_rate": 9.989270252960613e-07, | |
| "logits/chosen": -109946088.0, | |
| "logits/rejected": -102954101.33333333, | |
| "logps/chosen": -531.6331787109375, | |
| "logps/rejected": -548.4167073567709, | |
| "loss": 0.2884, | |
| "rewards/chosen": 7.9849534034729, | |
| "rewards/margins": 7.94753082593282, | |
| "rewards/rejected": 0.03742257754007975, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.368474923234391, | |
| "grad_norm": 17.735001258432412, | |
| "kl": 360.2090759277344, | |
| "learning_rate": 9.985015951428235e-07, | |
| "logits/chosen": -96365765.81818181, | |
| "logits/rejected": -106907882.66666667, | |
| "logps/chosen": -506.93399325284093, | |
| "logps/rejected": -564.6844618055555, | |
| "loss": 0.2752, | |
| "rewards/chosen": 6.11878065629439, | |
| "rewards/margins": 4.105176810062293, | |
| "rewards/rejected": 2.013603846232096, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.37871033776867963, | |
| "grad_norm": 18.86741420861504, | |
| "kl": 373.96728515625, | |
| "learning_rate": 9.980054126343455e-07, | |
| "logits/chosen": -112412480.0, | |
| "logits/rejected": -97450692.26666667, | |
| "logps/chosen": -504.6830078125, | |
| "logps/rejected": -691.042578125, | |
| "loss": 0.3076, | |
| "rewards/chosen": 6.367465209960938, | |
| "rewards/margins": 20.507525634765624, | |
| "rewards/rejected": -14.140060424804688, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.3889457523029683, | |
| "grad_norm": 10.094878631245667, | |
| "kl": 196.72659301757812, | |
| "learning_rate": 9.97438548193834e-07, | |
| "logits/chosen": -105118729.14285715, | |
| "logits/rejected": -110734293.33333333, | |
| "logps/chosen": -562.8646065848214, | |
| "logps/rejected": -507.587646484375, | |
| "loss": 0.2493, | |
| "rewards/chosen": 4.5860748291015625, | |
| "rewards/margins": 0.42298221588134766, | |
| "rewards/rejected": 4.163092613220215, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.3991811668372569, | |
| "grad_norm": 14.627865457817679, | |
| "kl": 279.072265625, | |
| "learning_rate": 9.968010822763865e-07, | |
| "logits/chosen": -113412138.66666667, | |
| "logits/rejected": -133443886.54545455, | |
| "logps/chosen": -530.6067708333334, | |
| "logps/rejected": -751.8836115056819, | |
| "loss": 0.3747, | |
| "rewards/chosen": 5.105476379394531, | |
| "rewards/margins": 12.318459944291547, | |
| "rewards/rejected": -7.212983564897017, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.4094165813715456, | |
| "grad_norm": 19.572916091667736, | |
| "kl": 460.6169738769531, | |
| "learning_rate": 9.960931053575709e-07, | |
| "logits/chosen": -110601969.77777778, | |
| "logits/rejected": -108428951.27272727, | |
| "logps/chosen": -532.2218967013889, | |
| "logps/rejected": -531.6844815340909, | |
| "loss": 0.35, | |
| "rewards/chosen": 3.8675689697265625, | |
| "rewards/margins": 5.184099370783025, | |
| "rewards/rejected": -1.3165304010564631, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4196519959058342, | |
| "grad_norm": 20.088088174480013, | |
| "kl": 221.86827087402344, | |
| "learning_rate": 9.953147179205854e-07, | |
| "logits/chosen": -129331342.22222222, | |
| "logits/rejected": -110869527.27272727, | |
| "logps/chosen": -582.8767361111111, | |
| "logps/rejected": -650.5072798295455, | |
| "loss": 0.3309, | |
| "rewards/chosen": 4.793890211317274, | |
| "rewards/margins": 14.806629913021819, | |
| "rewards/rejected": -10.012739701704545, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.42988741044012285, | |
| "grad_norm": 10.346910287143617, | |
| "kl": 415.3928527832031, | |
| "learning_rate": 9.94466030441996e-07, | |
| "logits/chosen": -110768944.0, | |
| "logits/rejected": -111097237.33333333, | |
| "logps/chosen": -549.23974609375, | |
| "logps/rejected": -712.3189290364584, | |
| "loss": 0.3102, | |
| "rewards/chosen": 6.6238861083984375, | |
| "rewards/margins": 17.687956492106117, | |
| "rewards/rejected": -11.064070383707682, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.44012282497441146, | |
| "grad_norm": 18.314103572328413, | |
| "kl": 576.5167236328125, | |
| "learning_rate": 9.935471633760572e-07, | |
| "logits/chosen": -99995520.0, | |
| "logits/rejected": -107673870.22222222, | |
| "logps/chosen": -487.12202592329544, | |
| "logps/rejected": -492.6575520833333, | |
| "loss": 0.4035, | |
| "rewards/chosen": 6.976482044566762, | |
| "rewards/margins": 4.1658927840415885, | |
| "rewards/rejected": 2.8105892605251737, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.4503582395087001, | |
| "grad_norm": 11.724627840974088, | |
| "kl": 1106.9119873046875, | |
| "learning_rate": 9.925582471376154e-07, | |
| "logits/chosen": -108020854.15384616, | |
| "logits/rejected": -109625874.28571428, | |
| "logps/chosen": -473.66616586538464, | |
| "logps/rejected": -555.1595284598214, | |
| "loss": 0.3728, | |
| "rewards/chosen": 7.7546832744891825, | |
| "rewards/margins": 3.9347364404699303, | |
| "rewards/rejected": 3.819946834019252, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.46059365404298874, | |
| "grad_norm": 15.613591939966279, | |
| "kl": 1362.2752685546875, | |
| "learning_rate": 9.914994220836e-07, | |
| "logits/chosen": -117927030.15384616, | |
| "logits/rejected": -106152228.57142857, | |
| "logps/chosen": -495.19174429086536, | |
| "logps/rejected": -458.72154017857144, | |
| "loss": 0.3465, | |
| "rewards/chosen": 9.081056448129507, | |
| "rewards/margins": 3.9735811673677883, | |
| "rewards/rejected": 5.107475280761719, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.47082906857727735, | |
| "grad_norm": 11.952485164591582, | |
| "kl": 1285.5869140625, | |
| "learning_rate": 9.903708384931013e-07, | |
| "logits/chosen": -120327202.13333334, | |
| "logits/rejected": -109135692.8, | |
| "logps/chosen": -505.6738606770833, | |
| "logps/rejected": -466.63486328125, | |
| "loss": 0.385, | |
| "rewards/chosen": 10.037539672851562, | |
| "rewards/margins": 4.661623382568359, | |
| "rewards/rejected": 5.375916290283203, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.481064483111566, | |
| "grad_norm": 0.12365071345843295, | |
| "kl": 682.0918579101562, | |
| "learning_rate": 9.891726565460422e-07, | |
| "logits/chosen": -112774152.0, | |
| "logits/rejected": -112093888.0, | |
| "logps/chosen": -538.9781494140625, | |
| "logps/rejected": -575.06591796875, | |
| "loss": 0.3106, | |
| "rewards/chosen": 8.146060943603516, | |
| "rewards/margins": 8.388625462849935, | |
| "rewards/rejected": -0.24256451924641928, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.49129989764585463, | |
| "grad_norm": 0.49092307811834135, | |
| "kl": 450.83636474609375, | |
| "learning_rate": 9.87905046300444e-07, | |
| "logits/chosen": -114120941.71428572, | |
| "logits/rejected": -114062867.6923077, | |
| "logps/chosen": -527.6252092633929, | |
| "logps/rejected": -601.4497445913462, | |
| "loss": 0.2638, | |
| "rewards/chosen": 8.766914367675781, | |
| "rewards/margins": 13.61475078876202, | |
| "rewards/rejected": -4.847836421086238, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.5015353121801432, | |
| "grad_norm": 6.1800940127591195, | |
| "kl": 483.6259765625, | |
| "learning_rate": 9.865681876682896e-07, | |
| "logits/chosen": -145298368.0, | |
| "logits/rejected": -108289821.53846154, | |
| "logps/chosen": -661.1333705357143, | |
| "logps/rejected": -580.3282376802885, | |
| "loss": 0.3479, | |
| "rewards/chosen": 6.970576695033482, | |
| "rewards/margins": 2.642127152327652, | |
| "rewards/rejected": 4.32844954270583, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.5117707267144319, | |
| "grad_norm": 7.945808526296434, | |
| "kl": 107.43032836914062, | |
| "learning_rate": 9.851622703899882e-07, | |
| "logits/chosen": -121188147.2, | |
| "logits/rejected": -111221376.0, | |
| "logps/chosen": -597.09716796875, | |
| "logps/rejected": -687.855078125, | |
| "loss": 0.3365, | |
| "rewards/chosen": 3.832713317871094, | |
| "rewards/margins": 14.021282196044922, | |
| "rewards/rejected": -10.188568878173829, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.5220061412487206, | |
| "grad_norm": 11.356989950783646, | |
| "kl": 150.3419189453125, | |
| "learning_rate": 9.836874940074464e-07, | |
| "logits/chosen": -101973013.33333333, | |
| "logits/rejected": -103617608.0, | |
| "logps/chosen": -522.2017415364584, | |
| "logps/rejected": -547.4072265625, | |
| "loss": 0.3154, | |
| "rewards/chosen": 5.501302083333333, | |
| "rewards/margins": 3.550793608029683, | |
| "rewards/rejected": 1.95050847530365, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.5322415557830092, | |
| "grad_norm": 15.04108069791104, | |
| "kl": 167.46121215820312, | |
| "learning_rate": 9.821440678357468e-07, | |
| "logits/chosen": -96939017.14285715, | |
| "logits/rejected": -111715840.0, | |
| "logps/chosen": -514.41259765625, | |
| "logps/rejected": -753.7920673076923, | |
| "loss": 0.3735, | |
| "rewards/chosen": 6.49347904750279, | |
| "rewards/margins": 17.330997592800266, | |
| "rewards/rejected": -10.837518545297476, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.5424769703172978, | |
| "grad_norm": 24.162543374076332, | |
| "kl": 312.23114013671875, | |
| "learning_rate": 9.8053221093344e-07, | |
| "logits/chosen": -116499040.0, | |
| "logits/rejected": -100585819.42857143, | |
| "logps/chosen": -541.9063313802084, | |
| "logps/rejected": -666.6215122767857, | |
| "loss": 0.3326, | |
| "rewards/chosen": 4.888221104939778, | |
| "rewards/margins": 12.125807535080682, | |
| "rewards/rejected": -7.237586430140904, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.5527123848515865, | |
| "grad_norm": 12.602052991862804, | |
| "kl": 267.88934326171875, | |
| "learning_rate": 9.788521520714529e-07, | |
| "logits/chosen": -92977206.85714285, | |
| "logits/rejected": -93523712.0, | |
| "logps/chosen": -519.3472726004464, | |
| "logps/rejected": -672.4755108173077, | |
| "loss": 0.341, | |
| "rewards/chosen": 6.53789302280971, | |
| "rewards/margins": 20.28299847277966, | |
| "rewards/rejected": -13.745105449969952, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.5629477993858751, | |
| "grad_norm": 8.979067508659034, | |
| "kl": 238.48565673828125, | |
| "learning_rate": 9.7710412970062e-07, | |
| "logits/chosen": -93921649.77777778, | |
| "logits/rejected": -94202391.27272727, | |
| "logps/chosen": -538.5509982638889, | |
| "logps/rejected": -825.6182528409091, | |
| "loss": 0.3459, | |
| "rewards/chosen": 5.394065856933594, | |
| "rewards/margins": 30.139568675648082, | |
| "rewards/rejected": -24.74550281871449, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.5731832139201638, | |
| "grad_norm": 13.93301876621068, | |
| "kl": 451.12652587890625, | |
| "learning_rate": 9.752883919178408e-07, | |
| "logits/chosen": -93620721.77777778, | |
| "logits/rejected": -119429352.72727273, | |
| "logps/chosen": -447.1330295138889, | |
| "logps/rejected": -851.3874289772727, | |
| "loss": 0.3262, | |
| "rewards/chosen": 6.1410704718695746, | |
| "rewards/margins": 23.947242235896564, | |
| "rewards/rejected": -17.80617176402699, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.5834186284544524, | |
| "grad_norm": 4.890619149638879, | |
| "kl": 97.303955078125, | |
| "learning_rate": 9.734051964308648e-07, | |
| "logits/chosen": -101164160.0, | |
| "logits/rejected": -95843181.71428572, | |
| "logps/chosen": -528.27490234375, | |
| "logps/rejected": -700.330078125, | |
| "loss": 0.284, | |
| "rewards/chosen": 7.194744990422175, | |
| "rewards/margins": 15.150297772753369, | |
| "rewards/rejected": -7.9555527823311945, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.593654042988741, | |
| "grad_norm": 11.746526426847675, | |
| "kl": 96.44719696044922, | |
| "learning_rate": 9.71454810521718e-07, | |
| "logits/chosen": -100481490.28571428, | |
| "logits/rejected": -102850837.33333333, | |
| "logps/chosen": -567.1158272879464, | |
| "logps/rejected": -688.10302734375, | |
| "loss": 0.327, | |
| "rewards/chosen": 3.4819068908691406, | |
| "rewards/margins": 17.827465057373047, | |
| "rewards/rejected": -14.345558166503906, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.601842374616172, | |
| "eval_logits/chosen": -98040168.0, | |
| "eval_logits/rejected": -91529280.0, | |
| "eval_logps/chosen": -425.20477294921875, | |
| "eval_logps/rejected": -455.2176208496094, | |
| "eval_loss": 0.39909082651138306, | |
| "eval_rewards/chosen": 7.455529689788818, | |
| "eval_rewards/margins": -3.385171413421631, | |
| "eval_rewards/rejected": 10.84070110321045, | |
| "eval_runtime": 2.934, | |
| "eval_samples_per_second": 3.408, | |
| "eval_steps_per_second": 0.682, | |
| "kl": 0.0, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.6038894575230297, | |
| "grad_norm": 11.508747631852, | |
| "kl": 189.71697998046875, | |
| "learning_rate": 9.694375110087653e-07, | |
| "logits/chosen": -105231475.2, | |
| "logits/rejected": -98110668.8, | |
| "logps/chosen": -517.33330078125, | |
| "logps/rejected": -650.870703125, | |
| "loss": 0.3037, | |
| "rewards/chosen": 7.2950927734375, | |
| "rewards/margins": 11.132262039184571, | |
| "rewards/rejected": -3.8371692657470704, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.6141248720573184, | |
| "grad_norm": 8.003411922362675, | |
| "kl": 246.2487030029297, | |
| "learning_rate": 9.673535842074236e-07, | |
| "logits/chosen": -89631707.42857143, | |
| "logits/rejected": -93188706.46153846, | |
| "logps/chosen": -533.9011579241071, | |
| "logps/rejected": -591.8625300480769, | |
| "loss": 0.3143, | |
| "rewards/chosen": 4.597275870186942, | |
| "rewards/margins": 8.995123852740278, | |
| "rewards/rejected": -4.397847982553335, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.6243602865916069, | |
| "grad_norm": 13.51479152454202, | |
| "kl": 251.9799041748047, | |
| "learning_rate": 9.65203325889523e-07, | |
| "logits/chosen": -103712000.0, | |
| "logits/rejected": -102217077.33333333, | |
| "logps/chosen": -554.9147251674107, | |
| "logps/rejected": -660.7069498697916, | |
| "loss": 0.2897, | |
| "rewards/chosen": 3.735581534249442, | |
| "rewards/margins": 9.694771902901785, | |
| "rewards/rejected": -5.959190368652344, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.6345957011258956, | |
| "grad_norm": 6.752419583328531, | |
| "kl": 284.58343505859375, | |
| "learning_rate": 9.6298704124133e-07, | |
| "logits/chosen": -112582016.0, | |
| "logits/rejected": -102245866.66666667, | |
| "logps/chosen": -558.93798828125, | |
| "logps/rejected": -657.9363606770834, | |
| "loss": 0.2812, | |
| "rewards/chosen": 5.288269996643066, | |
| "rewards/margins": 13.41814390818278, | |
| "rewards/rejected": -8.129873911539713, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.6448311156601843, | |
| "grad_norm": 7.266588839401468, | |
| "kl": 580.995361328125, | |
| "learning_rate": 9.607050448202303e-07, | |
| "logits/chosen": -92270584.8888889, | |
| "logits/rejected": -102551435.63636364, | |
| "logps/chosen": -440.3317599826389, | |
| "logps/rejected": -712.0537109375, | |
| "loss": 0.2915, | |
| "rewards/chosen": 8.482827080620659, | |
| "rewards/margins": 19.49976086857343, | |
| "rewards/rejected": -11.01693378795277, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.6550665301944729, | |
| "grad_norm": 7.2744015718609205, | |
| "kl": 340.7578125, | |
| "learning_rate": 9.583576605100849e-07, | |
| "logits/chosen": -101343896.0, | |
| "logits/rejected": -84657386.66666667, | |
| "logps/chosen": -561.446533203125, | |
| "logps/rejected": -587.9672037760416, | |
| "loss": 0.3113, | |
| "rewards/chosen": 6.773608207702637, | |
| "rewards/margins": 9.901070276896158, | |
| "rewards/rejected": -3.127462069193522, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.6653019447287615, | |
| "grad_norm": 8.836525928002317, | |
| "kl": 427.6199645996094, | |
| "learning_rate": 9.559452214752618e-07, | |
| "logits/chosen": -105170837.33333333, | |
| "logits/rejected": -86878112.0, | |
| "logps/chosen": -552.1053059895834, | |
| "logps/rejected": -517.5369873046875, | |
| "loss": 0.2568, | |
| "rewards/chosen": 6.057671864827474, | |
| "rewards/margins": 3.1388653119405108, | |
| "rewards/rejected": 2.918806552886963, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.6755373592630501, | |
| "grad_norm": 13.148384194030669, | |
| "kl": 565.3815307617188, | |
| "learning_rate": 9.53468070113348e-07, | |
| "logits/chosen": -112625361.45454545, | |
| "logits/rejected": -100674417.77777778, | |
| "logps/chosen": -521.5106090198864, | |
| "logps/rejected": -619.4135199652778, | |
| "loss": 0.3215, | |
| "rewards/chosen": 6.586706681685015, | |
| "rewards/margins": 7.943774637549815, | |
| "rewards/rejected": -1.3570679558648004, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.6857727737973388, | |
| "grad_norm": 19.435897057149237, | |
| "kl": 780.356689453125, | |
| "learning_rate": 9.50926558006555e-07, | |
| "logits/chosen": -95798784.0, | |
| "logits/rejected": -105156501.33333333, | |
| "logps/chosen": -470.95458984375, | |
| "logps/rejected": -546.5327555338541, | |
| "loss": 0.3182, | |
| "rewards/chosen": 7.692968368530273, | |
| "rewards/margins": 2.9158140818277998, | |
| "rewards/rejected": 4.777154286702474, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.6960081883316275, | |
| "grad_norm": 6.833848958642979, | |
| "kl": 473.15252685546875, | |
| "learning_rate": 9.483210458718179e-07, | |
| "logits/chosen": -118425624.0, | |
| "logits/rejected": -96726122.66666667, | |
| "logps/chosen": -520.1289672851562, | |
| "logps/rejected": -606.3455403645834, | |
| "loss": 0.3185, | |
| "rewards/chosen": 7.276991844177246, | |
| "rewards/margins": 8.136903285980225, | |
| "rewards/rejected": -0.8599114418029785, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.706243602865916, | |
| "grad_norm": 14.808552507419774, | |
| "kl": 146.22113037109375, | |
| "learning_rate": 9.456519035095981e-07, | |
| "logits/chosen": -107473191.38461539, | |
| "logits/rejected": -101956937.14285715, | |
| "logps/chosen": -526.8605769230769, | |
| "logps/rejected": -668.9007393973214, | |
| "loss": 0.2883, | |
| "rewards/chosen": 7.2536163330078125, | |
| "rewards/margins": 11.410586220877512, | |
| "rewards/rejected": -4.156969887869699, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.7164790174002047, | |
| "grad_norm": 24.1373966304863, | |
| "kl": 37.00183868408203, | |
| "learning_rate": 9.429195097513992e-07, | |
| "logits/chosen": -105830352.0, | |
| "logits/rejected": -106361685.33333333, | |
| "logps/chosen": -576.9627685546875, | |
| "logps/rejected": -804.2159830729166, | |
| "loss": 0.3142, | |
| "rewards/chosen": 2.0549275875091553, | |
| "rewards/margins": 19.464907089869182, | |
| "rewards/rejected": -17.409979502360027, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.7267144319344934, | |
| "grad_norm": 8.881119400084598, | |
| "kl": 153.50912475585938, | |
| "learning_rate": 9.401242524059977e-07, | |
| "logits/chosen": -104918784.0, | |
| "logits/rejected": -106944930.9090909, | |
| "logps/chosen": -621.583984375, | |
| "logps/rejected": -713.2195490056819, | |
| "loss": 0.2933, | |
| "rewards/chosen": 1.9484386444091797, | |
| "rewards/margins": 12.224429910833186, | |
| "rewards/rejected": -10.275991266424006, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.736949846468782, | |
| "grad_norm": 15.249343262390562, | |
| "kl": 181.8312530517578, | |
| "learning_rate": 9.372665282044024e-07, | |
| "logits/chosen": -112600905.14285715, | |
| "logits/rejected": -99067864.61538461, | |
| "logps/chosen": -519.7294921875, | |
| "logps/rejected": -749.6340144230769, | |
| "loss": 0.2071, | |
| "rewards/chosen": 7.2293581281389505, | |
| "rewards/margins": 27.119649782285585, | |
| "rewards/rejected": -19.890291654146633, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.7471852610030706, | |
| "grad_norm": 16.243100773857645, | |
| "kl": 254.5145263671875, | |
| "learning_rate": 9.343467427435461e-07, | |
| "logits/chosen": -88804677.81818181, | |
| "logits/rejected": -105656291.55555555, | |
| "logps/chosen": -501.12753018465907, | |
| "logps/rejected": -622.5223524305555, | |
| "loss": 0.2931, | |
| "rewards/chosen": 2.286927830089222, | |
| "rewards/margins": 9.008850810503718, | |
| "rewards/rejected": -6.721922980414496, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.7574206755373593, | |
| "grad_norm": 13.717272461313408, | |
| "kl": 251.88658142089844, | |
| "learning_rate": 9.313653104287186e-07, | |
| "logits/chosen": -102378560.0, | |
| "logits/rejected": -96828368.0, | |
| "logps/chosen": -588.1607259114584, | |
| "logps/rejected": -709.3660888671875, | |
| "loss": 0.3341, | |
| "rewards/chosen": 2.007448355356852, | |
| "rewards/margins": 10.696151892344156, | |
| "rewards/rejected": -8.688703536987305, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.7676560900716479, | |
| "grad_norm": 11.54276353096192, | |
| "kl": 315.0760192871094, | |
| "learning_rate": 9.283226544147511e-07, | |
| "logits/chosen": -86456040.72727273, | |
| "logits/rejected": -96371100.44444445, | |
| "logps/chosen": -448.26313920454544, | |
| "logps/rejected": -643.1417100694445, | |
| "loss": 0.3983, | |
| "rewards/chosen": 7.560623862526634, | |
| "rewards/margins": 11.595734336159445, | |
| "rewards/rejected": -4.0351104736328125, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.7778915046059366, | |
| "grad_norm": 15.08405881286112, | |
| "kl": 487.782470703125, | |
| "learning_rate": 9.25219206545957e-07, | |
| "logits/chosen": -98477909.33333333, | |
| "logits/rejected": -93243504.0, | |
| "logps/chosen": -528.0136311848959, | |
| "logps/rejected": -646.53369140625, | |
| "loss": 0.2999, | |
| "rewards/chosen": 7.208832422892253, | |
| "rewards/margins": 12.962552229563396, | |
| "rewards/rejected": -5.753719806671143, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.7881269191402251, | |
| "grad_norm": 10.02364890514971, | |
| "kl": 337.864013671875, | |
| "learning_rate": 9.220554072948411e-07, | |
| "logits/chosen": -86742869.33333333, | |
| "logits/rejected": -99716196.57142857, | |
| "logps/chosen": -465.5155436197917, | |
| "logps/rejected": -689.5123465401786, | |
| "loss": 0.3449, | |
| "rewards/chosen": 7.739498138427734, | |
| "rewards/margins": 17.27605492728097, | |
| "rewards/rejected": -9.536556788853236, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.7983623336745138, | |
| "grad_norm": 6.84043498795176, | |
| "kl": 173.30726623535156, | |
| "learning_rate": 9.188317056995821e-07, | |
| "logits/chosen": -97626282.66666667, | |
| "logits/rejected": -92453771.63636364, | |
| "logps/chosen": -546.7721896701389, | |
| "logps/rejected": -680.9408735795455, | |
| "loss": 0.2973, | |
| "rewards/chosen": 5.306654188368055, | |
| "rewards/margins": 12.666858711627999, | |
| "rewards/rejected": -7.360204523259943, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.8085977482088025, | |
| "grad_norm": 13.851012784720913, | |
| "kl": 245.4700927734375, | |
| "learning_rate": 9.155485593003018e-07, | |
| "logits/chosen": -85085882.18181819, | |
| "logits/rejected": -89022186.66666667, | |
| "logps/chosen": -476.38858309659093, | |
| "logps/rejected": -872.1040581597222, | |
| "loss": 0.2803, | |
| "rewards/chosen": 5.146501020951704, | |
| "rewards/margins": 33.767214996646146, | |
| "rewards/rejected": -28.620713975694443, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.8188331627430911, | |
| "grad_norm": 13.213804383427497, | |
| "kl": 134.34815979003906, | |
| "learning_rate": 9.122064340741255e-07, | |
| "logits/chosen": -106269207.27272727, | |
| "logits/rejected": -83881678.22222222, | |
| "logps/chosen": -550.1131480823864, | |
| "logps/rejected": -748.6202256944445, | |
| "loss": 0.2973, | |
| "rewards/chosen": 5.797416687011719, | |
| "rewards/margins": 30.07076432969835, | |
| "rewards/rejected": -24.27334764268663, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.8290685772773797, | |
| "grad_norm": 8.051808004953356, | |
| "kl": 60.33879089355469, | |
| "learning_rate": 9.088058043690465e-07, | |
| "logits/chosen": -116116224.0, | |
| "logits/rejected": -96582818.9090909, | |
| "logps/chosen": -650.5863715277778, | |
| "logps/rejected": -727.5021306818181, | |
| "loss": 0.3047, | |
| "rewards/chosen": 4.034843444824219, | |
| "rewards/margins": 18.15105576948686, | |
| "rewards/rejected": -14.116212324662643, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.8393039918116684, | |
| "grad_norm": 4.835531934200326, | |
| "kl": 320.6475524902344, | |
| "learning_rate": 9.053471528366017e-07, | |
| "logits/chosen": -123069098.66666667, | |
| "logits/rejected": -99107821.71428572, | |
| "logps/chosen": -613.021728515625, | |
| "logps/rejected": -663.7785295758929, | |
| "loss": 0.2865, | |
| "rewards/chosen": 5.482795715332031, | |
| "rewards/margins": 18.49122510637556, | |
| "rewards/rejected": -13.008429391043526, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.849539406345957, | |
| "grad_norm": 6.763920280816717, | |
| "kl": 119.12086486816406, | |
| "learning_rate": 9.01830970363368e-07, | |
| "logits/chosen": -120392115.2, | |
| "logits/rejected": -101945299.2, | |
| "logps/chosen": -546.945947265625, | |
| "logps/rejected": -929.22958984375, | |
| "loss": 0.2935, | |
| "rewards/chosen": 5.795610046386718, | |
| "rewards/margins": 33.776902770996095, | |
| "rewards/rejected": -27.981292724609375, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.8597748208802457, | |
| "grad_norm": 16.695156760816847, | |
| "kl": 396.1942443847656, | |
| "learning_rate": 8.982577560012924e-07, | |
| "logits/chosen": -92570790.4, | |
| "logits/rejected": -96133427.2, | |
| "logps/chosen": -500.14462890625, | |
| "logps/rejected": -747.961962890625, | |
| "loss": 0.3157, | |
| "rewards/chosen": 6.134347915649414, | |
| "rewards/margins": 16.947478103637696, | |
| "rewards/rejected": -10.813130187988282, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.8700102354145343, | |
| "grad_norm": 8.721135056770576, | |
| "kl": 161.04820251464844, | |
| "learning_rate": 8.9462801689686e-07, | |
| "logits/chosen": -111414489.6, | |
| "logits/rejected": -94172275.2, | |
| "logps/chosen": -513.11845703125, | |
| "logps/rejected": -592.3296875, | |
| "loss": 0.2634, | |
| "rewards/chosen": 7.548574066162109, | |
| "rewards/margins": 10.237901115417479, | |
| "rewards/rejected": -2.689327049255371, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.8802456499488229, | |
| "grad_norm": 23.6450296971921, | |
| "kl": 120.66998291015625, | |
| "learning_rate": 8.909422682191157e-07, | |
| "logits/chosen": -108347946.66666667, | |
| "logits/rejected": -96643232.0, | |
| "logps/chosen": -650.9365234375, | |
| "logps/rejected": -971.8366088867188, | |
| "loss": 0.3753, | |
| "rewards/chosen": 1.5685276985168457, | |
| "rewards/margins": 38.76629304885864, | |
| "rewards/rejected": -37.1977653503418, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.8904810644831116, | |
| "grad_norm": 10.184595791078122, | |
| "kl": 55.834754943847656, | |
| "learning_rate": 8.872010330865454e-07, | |
| "logits/chosen": -118076885.33333333, | |
| "logits/rejected": -120020640.0, | |
| "logps/chosen": -600.4386800130209, | |
| "logps/rejected": -1235.656494140625, | |
| "loss": 0.3306, | |
| "rewards/chosen": 3.8476082483927407, | |
| "rewards/margins": 56.71857992808024, | |
| "rewards/rejected": -52.8709716796875, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.9007164790174002, | |
| "grad_norm": 15.000452794185478, | |
| "kl": 166.8241424560547, | |
| "learning_rate": 8.834048424928304e-07, | |
| "logits/chosen": -84017722.18181819, | |
| "logits/rejected": -92456405.33333333, | |
| "logps/chosen": -500.98464133522725, | |
| "logps/rejected": -590.1988389756945, | |
| "loss": 0.3939, | |
| "rewards/chosen": 3.811018857088956, | |
| "rewards/margins": 5.707060091423266, | |
| "rewards/rejected": -1.8960412343343098, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.9027635619242579, | |
| "eval_logits/chosen": -88125560.0, | |
| "eval_logits/rejected": -82095856.0, | |
| "eval_logps/chosen": -429.337646484375, | |
| "eval_logps/rejected": -468.053466796875, | |
| "eval_loss": 0.20316681265830994, | |
| "eval_rewards/chosen": 7.042242527008057, | |
| "eval_rewards/margins": -2.51487398147583, | |
| "eval_rewards/rejected": 9.557116508483887, | |
| "eval_runtime": 2.6369, | |
| "eval_samples_per_second": 3.792, | |
| "eval_steps_per_second": 0.758, | |
| "kl": 0.0, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.9109518935516888, | |
| "grad_norm": 13.183651143904488, | |
| "kl": 134.21334838867188, | |
| "learning_rate": 8.795542352314834e-07, | |
| "logits/chosen": -94884726.15384616, | |
| "logits/rejected": -93538349.71428572, | |
| "logps/chosen": -486.8059645432692, | |
| "logps/rejected": -642.7673688616071, | |
| "loss": 0.31, | |
| "rewards/chosen": 7.5120063194861775, | |
| "rewards/margins": 10.829019651308164, | |
| "rewards/rejected": -3.3170133318219865, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.9211873080859775, | |
| "grad_norm": 12.212132136986225, | |
| "kl": 207.9207763671875, | |
| "learning_rate": 8.756497578193771e-07, | |
| "logits/chosen": -83985936.0, | |
| "logits/rejected": -85384976.0, | |
| "logps/chosen": -493.8150329589844, | |
| "logps/rejected": -741.118408203125, | |
| "loss": 0.3121, | |
| "rewards/chosen": 3.521029472351074, | |
| "rewards/margins": 21.653578758239746, | |
| "rewards/rejected": -18.132549285888672, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.9314227226202662, | |
| "grad_norm": 15.869594800463931, | |
| "kl": 121.78730773925781, | |
| "learning_rate": 8.716919644191773e-07, | |
| "logits/chosen": -95207923.2, | |
| "logits/rejected": -86504256.0, | |
| "logps/chosen": -502.17255859375, | |
| "logps/rejected": -570.52373046875, | |
| "loss": 0.3842, | |
| "rewards/chosen": 4.867946624755859, | |
| "rewards/margins": 8.839313507080078, | |
| "rewards/rejected": -3.9713668823242188, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.9416581371545547, | |
| "grad_norm": 11.625516820508704, | |
| "kl": 26.68120574951172, | |
| "learning_rate": 8.676814167606905e-07, | |
| "logits/chosen": -94190675.2, | |
| "logits/rejected": -74575884.8, | |
| "logps/chosen": -573.2962890625, | |
| "logps/rejected": -981.627734375, | |
| "loss": 0.2716, | |
| "rewards/chosen": 3.0543357849121096, | |
| "rewards/margins": 48.8309928894043, | |
| "rewards/rejected": -45.77665710449219, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.9518935516888434, | |
| "grad_norm": 11.337702446083828, | |
| "kl": 94.01974487304688, | |
| "learning_rate": 8.636186840611379e-07, | |
| "logits/chosen": -75281489.45454545, | |
| "logits/rejected": -80548579.55555555, | |
| "logps/chosen": -506.4801580255682, | |
| "logps/rejected": -948.1449652777778, | |
| "loss": 0.3049, | |
| "rewards/chosen": 3.306485262784091, | |
| "rewards/margins": 38.26877231790562, | |
| "rewards/rejected": -34.96228705512153, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.962128966223132, | |
| "grad_norm": 9.210148269471487, | |
| "kl": 10.692268371582031, | |
| "learning_rate": 8.595043429443657e-07, | |
| "logits/chosen": -81648140.8, | |
| "logits/rejected": -68831852.8, | |
| "logps/chosen": -525.10361328125, | |
| "logps/rejected": -681.321337890625, | |
| "loss": 0.3209, | |
| "rewards/chosen": 4.949716567993164, | |
| "rewards/margins": 18.163093948364256, | |
| "rewards/rejected": -13.213377380371094, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.9723643807574207, | |
| "grad_norm": 15.353110855422312, | |
| "kl": 127.87919616699219, | |
| "learning_rate": 8.553389773590054e-07, | |
| "logits/chosen": -79860096.0, | |
| "logits/rejected": -79998992.0, | |
| "logps/chosen": -505.05023193359375, | |
| "logps/rejected": -634.8770751953125, | |
| "loss": 0.2912, | |
| "rewards/chosen": 3.4712483882904053, | |
| "rewards/margins": 12.555377721786499, | |
| "rewards/rejected": -9.084129333496094, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.9825997952917093, | |
| "grad_norm": 13.869616858822004, | |
| "kl": 238.24578857421875, | |
| "learning_rate": 8.511231784955937e-07, | |
| "logits/chosen": -88956788.36363636, | |
| "logits/rejected": -103397703.1111111, | |
| "logps/chosen": -548.9154829545455, | |
| "logps/rejected": -788.0484483506945, | |
| "loss": 0.3187, | |
| "rewards/chosen": 4.906893643465909, | |
| "rewards/margins": 20.047172777580492, | |
| "rewards/rejected": -15.140279134114584, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.9928352098259979, | |
| "grad_norm": 7.268319539608931, | |
| "kl": 338.30377197265625, | |
| "learning_rate": 8.468575447026651e-07, | |
| "logits/chosen": -101875840.0, | |
| "logits/rejected": -87527862.85714285, | |
| "logps/chosen": -622.6335261418269, | |
| "logps/rejected": -595.9942801339286, | |
| "loss": 0.2935, | |
| "rewards/chosen": 6.616310706505408, | |
| "rewards/margins": 11.137901348072093, | |
| "rewards/rejected": -4.521590641566685, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.0030706243602865, | |
| "grad_norm": 11.76521234747505, | |
| "kl": 199.67869567871094, | |
| "learning_rate": 8.425426814018276e-07, | |
| "logits/chosen": -89589788.44444445, | |
| "logits/rejected": -80169029.81818181, | |
| "logps/chosen": -493.0221896701389, | |
| "logps/rejected": -716.8766424005681, | |
| "loss": 0.3217, | |
| "rewards/chosen": 7.745334201388889, | |
| "rewards/margins": 21.7819885870423, | |
| "rewards/rejected": -14.036654385653408, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.0133060388945752, | |
| "grad_norm": 8.474284377746724, | |
| "kl": 244.9769744873047, | |
| "learning_rate": 8.381792010018361e-07, | |
| "logits/chosen": -94540608.0, | |
| "logits/rejected": -89885610.66666667, | |
| "logps/chosen": -535.6998845880681, | |
| "logps/rejected": -803.9326714409722, | |
| "loss": 0.2194, | |
| "rewards/chosen": 5.931960365988991, | |
| "rewards/margins": 25.050717825841424, | |
| "rewards/rejected": -19.118757459852432, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.0235414534288638, | |
| "grad_norm": 12.815547513288609, | |
| "kl": 179.80715942382812, | |
| "learning_rate": 8.33767722811672e-07, | |
| "logits/chosen": -89090510.22222222, | |
| "logits/rejected": -94376587.63636364, | |
| "logps/chosen": -540.0843641493055, | |
| "logps/rejected": -727.6770241477273, | |
| "loss": 0.2687, | |
| "rewards/chosen": 4.669386969672309, | |
| "rewards/margins": 16.463514000478416, | |
| "rewards/rejected": -11.794127030806107, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0337768679631525, | |
| "grad_norm": 6.109931782123331, | |
| "kl": 160.75302124023438, | |
| "learning_rate": 8.293088729526465e-07, | |
| "logits/chosen": -82976885.33333333, | |
| "logits/rejected": -86193800.0, | |
| "logps/chosen": -501.4635823567708, | |
| "logps/rejected": -1245.8173828125, | |
| "loss": 0.2226, | |
| "rewards/chosen": 5.3093001047770185, | |
| "rewards/margins": 75.92017046610515, | |
| "rewards/rejected": -70.61087036132812, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.0440122824974412, | |
| "grad_norm": 15.654981917234721, | |
| "kl": 116.26859283447266, | |
| "learning_rate": 8.24803284269533e-07, | |
| "logits/chosen": -81231587.55555555, | |
| "logits/rejected": -93158120.72727273, | |
| "logps/chosen": -519.5434027777778, | |
| "logps/rejected": -1056.223544034091, | |
| "loss": 0.2267, | |
| "rewards/chosen": 4.975582546657986, | |
| "rewards/margins": 50.14659195716935, | |
| "rewards/rejected": -45.17100941051137, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.0542476970317298, | |
| "grad_norm": 6.75360490081935, | |
| "kl": 89.63571166992188, | |
| "learning_rate": 8.202515962407484e-07, | |
| "logits/chosen": -82426867.2, | |
| "logits/rejected": -97263526.4, | |
| "logps/chosen": -495.3529296875, | |
| "logps/rejected": -915.62001953125, | |
| "loss": 0.2569, | |
| "rewards/chosen": 4.025004577636719, | |
| "rewards/margins": 30.261656188964842, | |
| "rewards/rejected": -26.236651611328124, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.0644831115660185, | |
| "grad_norm": 8.435457326936826, | |
| "kl": 65.1689682006836, | |
| "learning_rate": 8.156544548875929e-07, | |
| "logits/chosen": -94793408.0, | |
| "logits/rejected": -82803456.0, | |
| "logps/chosen": -615.216552734375, | |
| "logps/rejected": -890.8118489583334, | |
| "loss": 0.2582, | |
| "rewards/chosen": 4.597088813781738, | |
| "rewards/margins": 39.764737129211426, | |
| "rewards/rejected": -35.16764831542969, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.0747185261003072, | |
| "grad_norm": 10.185636259451332, | |
| "kl": 41.50421142578125, | |
| "learning_rate": 8.110125126825586e-07, | |
| "logits/chosen": -94445606.4, | |
| "logits/rejected": -81221324.8, | |
| "logps/chosen": -530.09130859375, | |
| "logps/rejected": -720.1796875, | |
| "loss": 0.1988, | |
| "rewards/chosen": 2.9237091064453127, | |
| "rewards/margins": 18.941264851888022, | |
| "rewards/rejected": -16.01755574544271, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.0849539406345956, | |
| "grad_norm": 14.367610693803071, | |
| "kl": 40.293880462646484, | |
| "learning_rate": 8.063264284567244e-07, | |
| "logits/chosen": -88462400.0, | |
| "logits/rejected": -84809104.0, | |
| "logps/chosen": -556.0542805989584, | |
| "logps/rejected": -790.8518676757812, | |
| "loss": 0.2299, | |
| "rewards/chosen": 2.747189521789551, | |
| "rewards/margins": 25.718636512756348, | |
| "rewards/rejected": -22.971446990966797, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.0951893551688843, | |
| "grad_norm": 7.867680484103394, | |
| "kl": 84.09693908691406, | |
| "learning_rate": 8.015968673062485e-07, | |
| "logits/chosen": -97079057.45454545, | |
| "logits/rejected": -113056576.0, | |
| "logps/chosen": -652.1316583806819, | |
| "logps/rejected": -871.9325086805555, | |
| "loss": 0.2491, | |
| "rewards/chosen": 3.3613832647150215, | |
| "rewards/margins": 30.61995531332613, | |
| "rewards/rejected": -27.25857204861111, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.105424769703173, | |
| "grad_norm": 18.500289346264474, | |
| "kl": 178.9644317626953, | |
| "learning_rate": 7.968245004979715e-07, | |
| "logits/chosen": -88232459.63636364, | |
| "logits/rejected": -97237361.77777778, | |
| "logps/chosen": -494.20854048295456, | |
| "logps/rejected": -620.2445203993055, | |
| "loss": 0.2429, | |
| "rewards/chosen": 4.08733506636186, | |
| "rewards/margins": 12.414174089528093, | |
| "rewards/rejected": -8.326839023166233, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.1156601842374616, | |
| "grad_norm": 12.84885943054635, | |
| "kl": 122.49982452392578, | |
| "learning_rate": 7.920100053741426e-07, | |
| "logits/chosen": -93443669.33333333, | |
| "logits/rejected": -100065931.63636364, | |
| "logps/chosen": -518.4554036458334, | |
| "logps/rejected": -726.6989524147727, | |
| "loss": 0.2324, | |
| "rewards/chosen": 2.174501207139757, | |
| "rewards/margins": 17.892823961046005, | |
| "rewards/rejected": -15.71832275390625, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.1258955987717503, | |
| "grad_norm": 19.387059089946195, | |
| "kl": 27.290096282958984, | |
| "learning_rate": 7.87154065256285e-07, | |
| "logits/chosen": -92360000.0, | |
| "logits/rejected": -116687132.44444445, | |
| "logps/chosen": -506.4671519886364, | |
| "logps/rejected": -787.19140625, | |
| "loss": 0.2213, | |
| "rewards/chosen": 4.497406352650035, | |
| "rewards/margins": 22.27773743446427, | |
| "rewards/rejected": -17.780331081814236, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.136131013306039, | |
| "grad_norm": 13.175723781566907, | |
| "kl": 73.90142822265625, | |
| "learning_rate": 7.822573693482119e-07, | |
| "logits/chosen": -97808337.45454545, | |
| "logits/rejected": -93331783.1111111, | |
| "logps/chosen": -509.45339133522725, | |
| "logps/rejected": -572.677734375, | |
| "loss": 0.2823, | |
| "rewards/chosen": 4.765384674072266, | |
| "rewards/margins": 7.263773176405165, | |
| "rewards/rejected": -2.498388502332899, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.1463664278403276, | |
| "grad_norm": 13.791443161319787, | |
| "kl": 29.938060760498047, | |
| "learning_rate": 7.773206126382077e-07, | |
| "logits/chosen": -101143435.63636364, | |
| "logits/rejected": -114077838.22222222, | |
| "logps/chosen": -584.0867365056819, | |
| "logps/rejected": -804.4205729166666, | |
| "loss": 0.2454, | |
| "rewards/chosen": 3.9651097384366123, | |
| "rewards/margins": 18.79806329746439, | |
| "rewards/rejected": -14.832953559027779, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.156601842374616, | |
| "grad_norm": 8.034234943256347, | |
| "kl": 116.21412658691406, | |
| "learning_rate": 7.723444958003882e-07, | |
| "logits/chosen": -116664576.0, | |
| "logits/rejected": -97974736.0, | |
| "logps/chosen": -587.37646484375, | |
| "logps/rejected": -664.1363525390625, | |
| "loss": 0.2012, | |
| "rewards/chosen": 5.379718780517578, | |
| "rewards/margins": 13.820740699768066, | |
| "rewards/rejected": -8.441021919250488, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.1668372569089047, | |
| "grad_norm": 22.76694207719624, | |
| "kl": 9.00653076171875, | |
| "learning_rate": 7.673297250952547e-07, | |
| "logits/chosen": -100386534.4, | |
| "logits/rejected": -109167040.0, | |
| "logps/chosen": -535.891455078125, | |
| "logps/rejected": -875.89404296875, | |
| "loss": 0.2078, | |
| "rewards/chosen": 3.430181121826172, | |
| "rewards/margins": 24.129712677001955, | |
| "rewards/rejected": -20.69953155517578, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.1770726714431934, | |
| "grad_norm": 12.60561797855466, | |
| "kl": 11.566696166992188, | |
| "learning_rate": 7.622770122694525e-07, | |
| "logits/chosen": -127401927.1111111, | |
| "logits/rejected": -114918923.63636364, | |
| "logps/chosen": -666.6869574652778, | |
| "logps/rejected": -823.8343394886364, | |
| "loss": 0.2521, | |
| "rewards/chosen": 2.1344752841525607, | |
| "rewards/margins": 23.581607741538924, | |
| "rewards/rejected": -21.447132457386363, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.187308085977482, | |
| "grad_norm": 13.853587264944288, | |
| "kl": 42.4306755065918, | |
| "learning_rate": 7.571870744547551e-07, | |
| "logits/chosen": -109880817.77777778, | |
| "logits/rejected": -106957009.45454545, | |
| "logps/chosen": -593.1612413194445, | |
| "logps/rejected": -905.1737393465909, | |
| "loss": 0.2524, | |
| "rewards/chosen": 2.1737113528781467, | |
| "rewards/margins": 30.0016698933611, | |
| "rewards/rejected": -27.827958540482953, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.1975435005117707, | |
| "grad_norm": 15.543850269979604, | |
| "kl": 110.62580108642578, | |
| "learning_rate": 7.520606340662798e-07, | |
| "logits/chosen": -96752089.6, | |
| "logits/rejected": -93654444.8, | |
| "logps/chosen": -498.780810546875, | |
| "logps/rejected": -706.256103515625, | |
| "loss": 0.2188, | |
| "rewards/chosen": 5.138518905639648, | |
| "rewards/margins": 18.503916549682614, | |
| "rewards/rejected": -13.365397644042968, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.203684749232344, | |
| "eval_logits/chosen": -105911152.0, | |
| "eval_logits/rejected": -95925920.0, | |
| "eval_logps/chosen": -484.4088134765625, | |
| "eval_logps/rejected": -496.7947998046875, | |
| "eval_loss": 0.39017611742019653, | |
| "eval_rewards/chosen": 1.535125732421875, | |
| "eval_rewards/margins": -5.147857666015625, | |
| "eval_rewards/rejected": 6.6829833984375, | |
| "eval_runtime": 2.6237, | |
| "eval_samples_per_second": 3.811, | |
| "eval_steps_per_second": 0.762, | |
| "kl": 0.0, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 1.2077789150460594, | |
| "grad_norm": 10.926577914323278, | |
| "kl": 134.444091796875, | |
| "learning_rate": 7.468984186999565e-07, | |
| "logits/chosen": -93668560.0, | |
| "logits/rejected": -102340970.66666667, | |
| "logps/chosen": -475.44390869140625, | |
| "logps/rejected": -876.3225911458334, | |
| "loss": 0.2985, | |
| "rewards/chosen": 1.7625150680541992, | |
| "rewards/margins": 29.492424329121906, | |
| "rewards/rejected": -27.729909261067707, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.218014329580348, | |
| "grad_norm": 17.09063024418297, | |
| "kl": 101.04781341552734, | |
| "learning_rate": 7.417011610292584e-07, | |
| "logits/chosen": -129169689.6, | |
| "logits/rejected": -107744477.86666666, | |
| "logps/chosen": -637.96298828125, | |
| "logps/rejected": -853.0078776041667, | |
| "loss": 0.1699, | |
| "rewards/chosen": 1.2093938827514648, | |
| "rewards/margins": 29.540536053975423, | |
| "rewards/rejected": -28.33114217122396, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.2282497441146367, | |
| "grad_norm": 10.4739549796137, | |
| "kl": 80.91563415527344, | |
| "learning_rate": 7.364695987012156e-07, | |
| "logits/chosen": -100979484.44444445, | |
| "logits/rejected": -102875240.72727273, | |
| "logps/chosen": -516.0002712673611, | |
| "logps/rejected": -780.2686434659091, | |
| "loss": 0.2704, | |
| "rewards/chosen": 2.6132854885525174, | |
| "rewards/margins": 26.03951617924854, | |
| "rewards/rejected": -23.426230690696023, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.2384851586489254, | |
| "grad_norm": 15.645731487799601, | |
| "kl": 55.83991622924805, | |
| "learning_rate": 7.312044742317196e-07, | |
| "logits/chosen": -97166316.8, | |
| "logits/rejected": -98332864.0, | |
| "logps/chosen": -473.275732421875, | |
| "logps/rejected": -872.339453125, | |
| "loss": 0.2458, | |
| "rewards/chosen": 5.7388427734375, | |
| "rewards/margins": 40.456884765625, | |
| "rewards/rejected": -34.7180419921875, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.2487205731832138, | |
| "grad_norm": 16.544369267846214, | |
| "kl": 26.200450897216797, | |
| "learning_rate": 7.259065349001381e-07, | |
| "logits/chosen": -112301428.36363636, | |
| "logits/rejected": -91687751.1111111, | |
| "logps/chosen": -603.98046875, | |
| "logps/rejected": -697.6657986111111, | |
| "loss": 0.2189, | |
| "rewards/chosen": 4.487701416015625, | |
| "rewards/margins": 19.04924519856771, | |
| "rewards/rejected": -14.561543782552084, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.2589559877175025, | |
| "grad_norm": 12.68572044575919, | |
| "kl": 198.68328857421875, | |
| "learning_rate": 7.205765326432538e-07, | |
| "logits/chosen": -104480836.92307693, | |
| "logits/rejected": -114156653.71428572, | |
| "logps/chosen": -595.9436598557693, | |
| "logps/rejected": -1002.3537946428571, | |
| "loss": 0.296, | |
| "rewards/chosen": 5.6437542255108175, | |
| "rewards/margins": 30.03105716914921, | |
| "rewards/rejected": -24.387302943638392, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.2691914022517912, | |
| "grad_norm": 14.327102372681413, | |
| "kl": 131.150390625, | |
| "learning_rate": 7.152152239485418e-07, | |
| "logits/chosen": -92621104.0, | |
| "logits/rejected": -94707562.66666667, | |
| "logps/chosen": -522.0101318359375, | |
| "logps/rejected": -597.5576171875, | |
| "loss": 0.2498, | |
| "rewards/chosen": 4.805232524871826, | |
| "rewards/margins": 9.411616484324139, | |
| "rewards/rejected": -4.6063839594523115, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.2794268167860798, | |
| "grad_norm": 13.696519258504198, | |
| "kl": 120.63490295410156, | |
| "learning_rate": 7.098233697468019e-07, | |
| "logits/chosen": -91027916.8, | |
| "logits/rejected": -120194508.8, | |
| "logps/chosen": -560.667919921875, | |
| "logps/rejected": -722.74677734375, | |
| "loss": 0.2297, | |
| "rewards/chosen": 4.605225372314453, | |
| "rewards/margins": 18.276374053955077, | |
| "rewards/rejected": -13.671148681640625, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.2896622313203685, | |
| "grad_norm": 19.84542195273479, | |
| "kl": 238.61802673339844, | |
| "learning_rate": 7.044017353041585e-07, | |
| "logits/chosen": -123477056.0, | |
| "logits/rejected": -79627398.4, | |
| "logps/chosen": -695.36806640625, | |
| "logps/rejected": -575.853515625, | |
| "loss": 0.2209, | |
| "rewards/chosen": -6.707785034179688, | |
| "rewards/margins": 1.1494865417480469, | |
| "rewards/rejected": -7.857271575927735, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.2998976458546572, | |
| "grad_norm": 17.108004492145188, | |
| "kl": 22.916343688964844, | |
| "learning_rate": 6.989510901134477e-07, | |
| "logits/chosen": -104629414.4, | |
| "logits/rejected": -99776633.6, | |
| "logps/chosen": -606.61474609375, | |
| "logps/rejected": -840.0265625, | |
| "loss": 0.2224, | |
| "rewards/chosen": 4.991733551025391, | |
| "rewards/margins": 33.285465240478516, | |
| "rewards/rejected": -28.293731689453125, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.3101330603889458, | |
| "grad_norm": 10.36059136074515, | |
| "kl": 116.60319519042969, | |
| "learning_rate": 6.934722077850016e-07, | |
| "logits/chosen": -100436208.0, | |
| "logits/rejected": -115627882.66666667, | |
| "logps/chosen": -515.864501953125, | |
| "logps/rejected": -724.6964518229166, | |
| "loss": 0.2796, | |
| "rewards/chosen": 4.3812665939331055, | |
| "rewards/margins": 12.6353546778361, | |
| "rewards/rejected": -8.254088083902994, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.3203684749232343, | |
| "grad_norm": 15.48356653719983, | |
| "kl": 45.89117431640625, | |
| "learning_rate": 6.879658659368514e-07, | |
| "logits/chosen": -91997213.0909091, | |
| "logits/rejected": -89794709.33333333, | |
| "logps/chosen": -473.80996981534093, | |
| "logps/rejected": -531.3462456597222, | |
| "loss": 0.2376, | |
| "rewards/chosen": 4.85120287808505, | |
| "rewards/margins": 6.749144891295771, | |
| "rewards/rejected": -1.8979420132107205, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.330603889457523, | |
| "grad_norm": 12.868008283397653, | |
| "kl": 18.55701446533203, | |
| "learning_rate": 6.82432846084359e-07, | |
| "logits/chosen": -100941725.53846154, | |
| "logits/rejected": -99222765.71428572, | |
| "logps/chosen": -621.4768254206731, | |
| "logps/rejected": -828.6552734375, | |
| "loss": 0.2153, | |
| "rewards/chosen": 1.7661338219275842, | |
| "rewards/margins": 25.749048337831603, | |
| "rewards/rejected": -23.982914515904017, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.3408393039918116, | |
| "grad_norm": 2.2800239418596466, | |
| "kl": 89.49375915527344, | |
| "learning_rate": 6.768739335292968e-07, | |
| "logits/chosen": -121310284.8, | |
| "logits/rejected": -93642886.4, | |
| "logps/chosen": -556.091015625, | |
| "logps/rejected": -601.05625, | |
| "loss": 0.1707, | |
| "rewards/chosen": 4.573309326171875, | |
| "rewards/margins": 14.642904663085938, | |
| "rewards/rejected": -10.069595336914062, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.3510747185261003, | |
| "grad_norm": 15.366769443574192, | |
| "kl": 293.1350402832031, | |
| "learning_rate": 6.712899172483892e-07, | |
| "logits/chosen": -91668277.33333333, | |
| "logits/rejected": -110515696.0, | |
| "logps/chosen": -507.600341796875, | |
| "logps/rejected": -879.2127685546875, | |
| "loss": 0.2528, | |
| "rewards/chosen": 5.978892644246419, | |
| "rewards/margins": 22.380460103352863, | |
| "rewards/rejected": -16.401567459106445, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.361310133060389, | |
| "grad_norm": 12.823756571517078, | |
| "kl": 88.99737548828125, | |
| "learning_rate": 6.656815897813345e-07, | |
| "logits/chosen": -87199751.1111111, | |
| "logits/rejected": -95049268.36363636, | |
| "logps/chosen": -489.8304036458333, | |
| "logps/rejected": -755.6834161931819, | |
| "loss": 0.1953, | |
| "rewards/chosen": 5.404658423529731, | |
| "rewards/margins": 20.248757988515525, | |
| "rewards/rejected": -14.844099564985795, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.3715455475946776, | |
| "grad_norm": 4.283693497606417, | |
| "kl": 163.53549194335938, | |
| "learning_rate": 6.600497471183179e-07, | |
| "logits/chosen": -101366438.4, | |
| "logits/rejected": -88560857.6, | |
| "logps/chosen": -491.186083984375, | |
| "logps/rejected": -717.26845703125, | |
| "loss": 0.208, | |
| "rewards/chosen": 6.666817474365234, | |
| "rewards/margins": 24.17246780395508, | |
| "rewards/rejected": -17.505650329589844, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.3817809621289663, | |
| "grad_norm": 7.001179043778539, | |
| "kl": 66.92013549804688, | |
| "learning_rate": 6.543951885870382e-07, | |
| "logits/chosen": -95844366.22222222, | |
| "logits/rejected": -96672808.72727273, | |
| "logps/chosen": -510.3117404513889, | |
| "logps/rejected": -747.4904119318181, | |
| "loss": 0.1907, | |
| "rewards/chosen": 6.773768530951606, | |
| "rewards/margins": 23.975265002009845, | |
| "rewards/rejected": -17.20149647105824, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.3920163766632547, | |
| "grad_norm": 14.848116150242822, | |
| "kl": 87.44967651367188, | |
| "learning_rate": 6.48718716739258e-07, | |
| "logits/chosen": -107372951.27272727, | |
| "logits/rejected": -94606008.8888889, | |
| "logps/chosen": -541.4000355113636, | |
| "logps/rejected": -566.193359375, | |
| "loss": 0.2408, | |
| "rewards/chosen": 3.51375337080522, | |
| "rewards/margins": 9.902944892343848, | |
| "rewards/rejected": -6.389191521538629, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 1.4022517911975436, | |
| "grad_norm": 11.300486504343981, | |
| "kl": 102.50377655029297, | |
| "learning_rate": 6.430211372368983e-07, | |
| "logits/chosen": -104613869.71428572, | |
| "logits/rejected": -78341808.0, | |
| "logps/chosen": -588.1082589285714, | |
| "logps/rejected": -561.199462890625, | |
| "loss": 0.2488, | |
| "rewards/chosen": 3.8442611694335938, | |
| "rewards/margins": 6.867713610331217, | |
| "rewards/rejected": -3.0234524408976235, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 1.412487205731832, | |
| "grad_norm": 7.278523044606318, | |
| "kl": 47.79290008544922, | |
| "learning_rate": 6.373032587376903e-07, | |
| "logits/chosen": -98703773.53846154, | |
| "logits/rejected": -88402276.57142857, | |
| "logps/chosen": -518.7367412860577, | |
| "logps/rejected": -640.8177315848214, | |
| "loss": 0.2596, | |
| "rewards/chosen": 3.898407275860126, | |
| "rewards/margins": 13.543180486658118, | |
| "rewards/rejected": -9.644773210797991, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 1.4227226202661207, | |
| "grad_norm": 14.528128586654807, | |
| "kl": 20.99447250366211, | |
| "learning_rate": 6.31565892780403e-07, | |
| "logits/chosen": -113441461.33333333, | |
| "logits/rejected": -117415408.0, | |
| "logps/chosen": -673.6360677083334, | |
| "logps/rejected": -767.6175537109375, | |
| "loss": 0.2178, | |
| "rewards/chosen": 3.181301752726237, | |
| "rewards/margins": 22.52920405069987, | |
| "rewards/rejected": -19.347902297973633, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 1.4329580348004094, | |
| "grad_norm": 13.997484831173727, | |
| "kl": 23.745975494384766, | |
| "learning_rate": 6.258098536696608e-07, | |
| "logits/chosen": -94636196.57142857, | |
| "logits/rejected": -112435381.33333333, | |
| "logps/chosen": -549.872802734375, | |
| "logps/rejected": -694.0374348958334, | |
| "loss": 0.2152, | |
| "rewards/chosen": 1.8581578390938895, | |
| "rewards/margins": 16.185761497134255, | |
| "rewards/rejected": -14.327603658040365, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.443193449334698, | |
| "grad_norm": 7.521678155084116, | |
| "kl": 91.66047668457031, | |
| "learning_rate": 6.200359583603702e-07, | |
| "logits/chosen": -91731765.33333333, | |
| "logits/rejected": -93070952.0, | |
| "logps/chosen": -454.1505940755208, | |
| "logps/rejected": -862.4594116210938, | |
| "loss": 0.2282, | |
| "rewards/chosen": 4.978697141011556, | |
| "rewards/margins": 37.34273370107015, | |
| "rewards/rejected": -32.364036560058594, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 1.4534288638689867, | |
| "grad_norm": 9.62091749430687, | |
| "kl": 155.88140869140625, | |
| "learning_rate": 6.142450263417684e-07, | |
| "logits/chosen": -102225635.55555555, | |
| "logits/rejected": -85442594.9090909, | |
| "logps/chosen": -540.4261067708334, | |
| "logps/rejected": -603.1096857244319, | |
| "loss": 0.2332, | |
| "rewards/chosen": 3.9674284193250866, | |
| "rewards/margins": 13.474742927936592, | |
| "rewards/rejected": -9.507314508611506, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 1.4636642784032754, | |
| "grad_norm": 10.495215345293603, | |
| "kl": 126.53300476074219, | |
| "learning_rate": 6.084378795211142e-07, | |
| "logits/chosen": -100052528.0, | |
| "logits/rejected": -94666272.0, | |
| "logps/chosen": -581.60009765625, | |
| "logps/rejected": -689.3089192708334, | |
| "loss": 0.2222, | |
| "rewards/chosen": 2.338420867919922, | |
| "rewards/margins": 16.052043914794922, | |
| "rewards/rejected": -13.713623046875, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 1.473899692937564, | |
| "grad_norm": 3.841823620667789, | |
| "kl": 66.59071350097656, | |
| "learning_rate": 6.026153421070332e-07, | |
| "logits/chosen": -100694074.18181819, | |
| "logits/rejected": -116525688.8888889, | |
| "logps/chosen": -546.9574751420455, | |
| "logps/rejected": -961.9046223958334, | |
| "loss": 0.195, | |
| "rewards/chosen": 4.302017905495384, | |
| "rewards/margins": 37.5053429651742, | |
| "rewards/rejected": -33.20332505967882, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 1.4841351074718525, | |
| "grad_norm": 14.904727186940962, | |
| "kl": 87.73085021972656, | |
| "learning_rate": 5.967782404925392e-07, | |
| "logits/chosen": -98680000.0, | |
| "logits/rejected": -112079114.66666667, | |
| "logps/chosen": -520.26025390625, | |
| "logps/rejected": -843.9371744791666, | |
| "loss": 0.2511, | |
| "rewards/chosen": 3.4578888416290283, | |
| "rewards/margins": 27.866142829259235, | |
| "rewards/rejected": -24.408253987630207, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.4943705220061412, | |
| "grad_norm": 13.06698843977878, | |
| "kl": 151.13763427734375, | |
| "learning_rate": 5.909274031377433e-07, | |
| "logits/chosen": -95404913.77777778, | |
| "logits/rejected": -88787351.27272727, | |
| "logps/chosen": -568.8107096354166, | |
| "logps/rejected": -707.0969460227273, | |
| "loss": 0.2509, | |
| "rewards/chosen": 3.700217776828342, | |
| "rewards/margins": 16.992627500283596, | |
| "rewards/rejected": -13.292409723455256, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 1.5046059365404298, | |
| "grad_norm": 11.196107724803628, | |
| "kl": 162.71450805664062, | |
| "learning_rate": 5.850636604522717e-07, | |
| "logits/chosen": -103681984.0, | |
| "logits/rejected": -115164441.6, | |
| "logps/chosen": -570.78681640625, | |
| "logps/rejected": -804.131298828125, | |
| "loss": 0.2339, | |
| "rewards/chosen": 5.796274948120117, | |
| "rewards/margins": 18.870234298706055, | |
| "rewards/rejected": -13.073959350585938, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.5046059365404298, | |
| "eval_logits/chosen": -98059872.0, | |
| "eval_logits/rejected": -89680456.0, | |
| "eval_logps/chosen": -486.9290771484375, | |
| "eval_logps/rejected": -479.80731201171875, | |
| "eval_loss": 0.35584360361099243, | |
| "eval_rewards/chosen": 1.2830994129180908, | |
| "eval_rewards/margins": -7.098632574081421, | |
| "eval_rewards/rejected": 8.381731986999512, | |
| "eval_runtime": 2.6347, | |
| "eval_samples_per_second": 3.795, | |
| "eval_steps_per_second": 0.759, | |
| "kl": 0.0, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 1.5148413510747185, | |
| "grad_norm": 16.203618211283683, | |
| "kl": 92.46534729003906, | |
| "learning_rate": 5.791878446774034e-07, | |
| "logits/chosen": -103741664.0, | |
| "logits/rejected": -82316051.2, | |
| "logps/chosen": -515.66162109375, | |
| "logps/rejected": -602.708349609375, | |
| "loss": 0.2773, | |
| "rewards/chosen": 4.238919830322265, | |
| "rewards/margins": 13.160222625732422, | |
| "rewards/rejected": -8.921302795410156, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 1.5250767656090072, | |
| "grad_norm": 4.554873934008656, | |
| "kl": 167.45574951171875, | |
| "learning_rate": 5.733007897679528e-07, | |
| "logits/chosen": -95108238.22222222, | |
| "logits/rejected": -91586996.36363636, | |
| "logps/chosen": -521.0322265625, | |
| "logps/rejected": -721.2854225852273, | |
| "loss": 0.1973, | |
| "rewards/chosen": 4.9189779493543835, | |
| "rewards/margins": 20.43809859921234, | |
| "rewards/rejected": -15.519120649857955, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 1.5353121801432958, | |
| "grad_norm": 7.6574893969363504, | |
| "kl": 45.464637756347656, | |
| "learning_rate": 5.674033312739047e-07, | |
| "logits/chosen": -136661788.44444445, | |
| "logits/rejected": -121526865.45454545, | |
| "logps/chosen": -624.0289713541666, | |
| "logps/rejected": -737.0136274857955, | |
| "loss": 0.2013, | |
| "rewards/chosen": 4.872588263617621, | |
| "rewards/margins": 18.956630899448587, | |
| "rewards/rejected": -14.084042635830967, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.5455475946775845, | |
| "grad_norm": 8.371922803052616, | |
| "kl": 122.37657165527344, | |
| "learning_rate": 5.614963062218252e-07, | |
| "logits/chosen": -112632277.33333333, | |
| "logits/rejected": -97041344.0, | |
| "logps/chosen": -567.5799967447916, | |
| "logps/rejected": -660.11474609375, | |
| "loss": 0.1998, | |
| "rewards/chosen": 6.09868049621582, | |
| "rewards/margins": 15.676589965820312, | |
| "rewards/rejected": -9.577909469604492, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 1.555783009211873, | |
| "grad_norm": 9.555101175645209, | |
| "kl": 84.63465881347656, | |
| "learning_rate": 5.555805529960626e-07, | |
| "logits/chosen": -107756818.28571428, | |
| "logits/rejected": -104269922.46153846, | |
| "logps/chosen": -569.1482631138393, | |
| "logps/rejected": -676.1059194711538, | |
| "loss": 0.276, | |
| "rewards/chosen": 5.877526419503348, | |
| "rewards/margins": 21.615523621276186, | |
| "rewards/rejected": -15.737997201772837, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 1.5660184237461618, | |
| "grad_norm": 8.285346789381096, | |
| "kl": 17.301956176757812, | |
| "learning_rate": 5.496569112197548e-07, | |
| "logits/chosen": -108150368.0, | |
| "logits/rejected": -117954704.0, | |
| "logps/chosen": -564.1471354166666, | |
| "logps/rejected": -842.083740234375, | |
| "loss": 0.2172, | |
| "rewards/chosen": 6.367968241373698, | |
| "rewards/margins": 25.966650644938152, | |
| "rewards/rejected": -19.598682403564453, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 1.5762538382804503, | |
| "grad_norm": 17.201078473501788, | |
| "kl": 29.990673065185547, | |
| "learning_rate": 5.437262216356628e-07, | |
| "logits/chosen": -99166184.72727273, | |
| "logits/rejected": -96667192.8888889, | |
| "logps/chosen": -560.8868963068181, | |
| "logps/rejected": -690.4375, | |
| "loss": 0.2375, | |
| "rewards/chosen": 2.5369420485063032, | |
| "rewards/margins": 14.643823084205087, | |
| "rewards/rejected": -12.106881035698784, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 1.586489252814739, | |
| "grad_norm": 14.598976158627305, | |
| "kl": 17.094257354736328, | |
| "learning_rate": 5.377893259868427e-07, | |
| "logits/chosen": -93399792.0, | |
| "logits/rejected": -101074688.0, | |
| "logps/chosen": -506.314208984375, | |
| "logps/rejected": -705.162109375, | |
| "loss": 0.1902, | |
| "rewards/chosen": 3.808037519454956, | |
| "rewards/margins": 19.942469994227093, | |
| "rewards/rejected": -16.134432474772137, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.5967246673490276, | |
| "grad_norm": 11.677222021035318, | |
| "kl": 105.08064270019531, | |
| "learning_rate": 5.318470668971779e-07, | |
| "logits/chosen": -113351078.4, | |
| "logits/rejected": -97037670.4, | |
| "logps/chosen": -570.3861328125, | |
| "logps/rejected": -638.00615234375, | |
| "loss": 0.2409, | |
| "rewards/chosen": 3.3090076446533203, | |
| "rewards/margins": 12.0073673248291, | |
| "rewards/rejected": -8.69835968017578, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 1.6069600818833163, | |
| "grad_norm": 16.040864873166022, | |
| "kl": 193.0989227294922, | |
| "learning_rate": 5.259002877517853e-07, | |
| "logits/chosen": -90070867.2, | |
| "logits/rejected": -99008230.4, | |
| "logps/chosen": -465.715625, | |
| "logps/rejected": -680.6184895833334, | |
| "loss": 0.2715, | |
| "rewards/chosen": 4.33167724609375, | |
| "rewards/margins": 15.996807861328126, | |
| "rewards/rejected": -11.665130615234375, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 1.617195496417605, | |
| "grad_norm": 24.30022370778095, | |
| "kl": 89.72409057617188, | |
| "learning_rate": 5.199498325773134e-07, | |
| "logits/chosen": -104927464.72727273, | |
| "logits/rejected": -88201393.77777778, | |
| "logps/chosen": -567.3364701704545, | |
| "logps/rejected": -562.6102430555555, | |
| "loss": 0.2075, | |
| "rewards/chosen": 4.681469310413707, | |
| "rewards/margins": 8.131336558948863, | |
| "rewards/rejected": -3.4498672485351562, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 1.6274309109518934, | |
| "grad_norm": 10.016797523044985, | |
| "kl": 32.81374740600586, | |
| "learning_rate": 5.139965459221495e-07, | |
| "logits/chosen": -107691754.66666667, | |
| "logits/rejected": -97935697.45454545, | |
| "logps/chosen": -572.6688368055555, | |
| "logps/rejected": -638.0316051136364, | |
| "loss": 0.2441, | |
| "rewards/chosen": 5.759796990288629, | |
| "rewards/margins": 18.01395570388948, | |
| "rewards/rejected": -12.254158713600852, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 1.6376663254861823, | |
| "grad_norm": 16.580461098226703, | |
| "kl": 68.80343627929688, | |
| "learning_rate": 5.080412727365535e-07, | |
| "logits/chosen": -124645034.66666667, | |
| "logits/rejected": -97441800.0, | |
| "logps/chosen": -639.878173828125, | |
| "logps/rejected": -744.6334228515625, | |
| "loss": 0.1817, | |
| "rewards/chosen": 5.480181376139323, | |
| "rewards/margins": 26.614111582438152, | |
| "rewards/rejected": -21.133930206298828, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.6479017400204707, | |
| "grad_norm": 13.42103249531687, | |
| "kl": 56.13240432739258, | |
| "learning_rate": 5.020848582527335e-07, | |
| "logits/chosen": -103806021.81818181, | |
| "logits/rejected": -112090880.0, | |
| "logps/chosen": -548.7734375, | |
| "logps/rejected": -757.8369140625, | |
| "loss": 0.2598, | |
| "rewards/chosen": 2.9247055053710938, | |
| "rewards/margins": 20.906888326009113, | |
| "rewards/rejected": -17.98218282063802, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 1.6581371545547596, | |
| "grad_norm": 12.634298219691603, | |
| "kl": 141.90322875976562, | |
| "learning_rate": 4.96128147864882e-07, | |
| "logits/chosen": -89943796.36363636, | |
| "logits/rejected": -102573880.8888889, | |
| "logps/chosen": -538.3907137784091, | |
| "logps/rejected": -753.2516818576389, | |
| "loss": 0.1773, | |
| "rewards/chosen": 5.100575186989524, | |
| "rewards/margins": 21.259171649663134, | |
| "rewards/rejected": -16.15859646267361, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 1.668372569089048, | |
| "grad_norm": 10.28197062624758, | |
| "kl": 107.90435791015625, | |
| "learning_rate": 4.90171987009189e-07, | |
| "logits/chosen": -106620205.71428572, | |
| "logits/rejected": -113546080.0, | |
| "logps/chosen": -552.3481096540179, | |
| "logps/rejected": -906.138427734375, | |
| "loss": 0.1918, | |
| "rewards/chosen": 4.133395603724888, | |
| "rewards/margins": 27.026680719284784, | |
| "rewards/rejected": -22.893285115559895, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 1.6786079836233367, | |
| "grad_norm": 17.96411547258075, | |
| "kl": 112.21823120117188, | |
| "learning_rate": 4.8421722104385e-07, | |
| "logits/chosen": -75008458.66666667, | |
| "logits/rejected": -94131072.0, | |
| "logps/chosen": -432.4429117838542, | |
| "logps/rejected": -667.931640625, | |
| "loss": 0.2239, | |
| "rewards/chosen": 3.447942097981771, | |
| "rewards/margins": 15.51882571265811, | |
| "rewards/rejected": -12.070883614676339, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 1.6888433981576254, | |
| "grad_norm": 10.395637391051638, | |
| "kl": 23.069644927978516, | |
| "learning_rate": 4.78264695129083e-07, | |
| "logits/chosen": -116295520.0, | |
| "logits/rejected": -107159306.66666667, | |
| "logps/chosen": -560.1647338867188, | |
| "logps/rejected": -712.3726399739584, | |
| "loss": 0.2036, | |
| "rewards/chosen": 3.4754228591918945, | |
| "rewards/margins": 13.128763516743978, | |
| "rewards/rejected": -9.653340657552084, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.699078812691914, | |
| "grad_norm": 8.156359676723904, | |
| "kl": 80.28675842285156, | |
| "learning_rate": 4.723152541071761e-07, | |
| "logits/chosen": -128034397.0909091, | |
| "logits/rejected": -115200327.1111111, | |
| "logps/chosen": -657.2834250710227, | |
| "logps/rejected": -728.0647786458334, | |
| "loss": 0.2249, | |
| "rewards/chosen": 4.118690490722656, | |
| "rewards/margins": 21.072269863552517, | |
| "rewards/rejected": -16.95357937282986, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 1.7093142272262027, | |
| "grad_norm": 11.037173163466019, | |
| "kl": 70.9041748046875, | |
| "learning_rate": 4.663697423825777e-07, | |
| "logits/chosen": -111218501.81818181, | |
| "logits/rejected": -110178133.33333333, | |
| "logps/chosen": -608.4298650568181, | |
| "logps/rejected": -630.6374240451389, | |
| "loss": 0.1887, | |
| "rewards/chosen": 1.0514965057373047, | |
| "rewards/margins": 6.370984183417426, | |
| "rewards/rejected": -5.319487677680121, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 1.7195496417604912, | |
| "grad_norm": 10.163513417285465, | |
| "kl": 90.06620788574219, | |
| "learning_rate": 4.604290038020513e-07, | |
| "logits/chosen": -104523801.6, | |
| "logits/rejected": -105088793.6, | |
| "logps/chosen": -601.39541015625, | |
| "logps/rejected": -695.315625, | |
| "loss": 0.2451, | |
| "rewards/chosen": 3.491130065917969, | |
| "rewards/margins": 16.996690368652345, | |
| "rewards/rejected": -13.505560302734375, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 1.72978505629478, | |
| "grad_norm": 8.51268769415796, | |
| "kl": 78.53779602050781, | |
| "learning_rate": 4.5449388153490786e-07, | |
| "logits/chosen": -113766317.71428572, | |
| "logits/rejected": -97993846.15384616, | |
| "logps/chosen": -695.9150390625, | |
| "logps/rejected": -615.8410832331731, | |
| "loss": 0.2208, | |
| "rewards/chosen": -1.227851186479841, | |
| "rewards/margins": 6.885328533885243, | |
| "rewards/rejected": -8.113179720365084, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 1.7400204708290685, | |
| "grad_norm": 16.94149194280564, | |
| "kl": 84.0811767578125, | |
| "learning_rate": 4.485652179533347e-07, | |
| "logits/chosen": -94786432.0, | |
| "logits/rejected": -94003632.0, | |
| "logps/chosen": -496.9593912760417, | |
| "logps/rejected": -667.4026489257812, | |
| "loss": 0.2518, | |
| "rewards/chosen": 4.43801212310791, | |
| "rewards/margins": 15.846646308898926, | |
| "rewards/rejected": -11.408634185791016, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.7502558853633572, | |
| "grad_norm": 11.672933785919794, | |
| "kl": 37.66474533081055, | |
| "learning_rate": 4.426438545128372e-07, | |
| "logits/chosen": -112977585.77777778, | |
| "logits/rejected": -100742376.72727273, | |
| "logps/chosen": -576.189453125, | |
| "logps/rejected": -709.9443359375, | |
| "loss": 0.2097, | |
| "rewards/chosen": 3.0994716220431857, | |
| "rewards/margins": 16.225449571705827, | |
| "rewards/rejected": -13.125977949662643, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 1.7604912998976459, | |
| "grad_norm": 0.8936706403580842, | |
| "kl": 46.1988525390625, | |
| "learning_rate": 4.367306316328121e-07, | |
| "logits/chosen": -108418517.33333333, | |
| "logits/rejected": -93911760.0, | |
| "logps/chosen": -573.855224609375, | |
| "logps/rejected": -722.8265380859375, | |
| "loss": 0.1714, | |
| "rewards/chosen": 4.775790532430013, | |
| "rewards/margins": 19.311710675557453, | |
| "rewards/rejected": -14.535920143127441, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 1.7707267144319345, | |
| "grad_norm": 8.657146619452396, | |
| "kl": 213.92623901367188, | |
| "learning_rate": 4.30826388577265e-07, | |
| "logits/chosen": -104575015.38461539, | |
| "logits/rejected": -98068845.71428572, | |
| "logps/chosen": -534.2808743990385, | |
| "logps/rejected": -600.8302176339286, | |
| "loss": 0.1977, | |
| "rewards/chosen": 5.302577678973858, | |
| "rewards/margins": 9.943926381540823, | |
| "rewards/rejected": -4.641348702566964, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 1.7809621289662232, | |
| "grad_norm": 10.789363563455533, | |
| "kl": 131.159423828125, | |
| "learning_rate": 4.2493196333569584e-07, | |
| "logits/chosen": -97828420.92307693, | |
| "logits/rejected": -83620754.28571428, | |
| "logps/chosen": -487.26893028846155, | |
| "logps/rejected": -598.9041573660714, | |
| "loss": 0.2351, | |
| "rewards/chosen": 5.452203603891226, | |
| "rewards/margins": 12.849205142849094, | |
| "rewards/rejected": -7.397001538957868, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 1.7911975435005116, | |
| "grad_norm": 12.092749709288178, | |
| "kl": 166.184814453125, | |
| "learning_rate": 4.190481925041606e-07, | |
| "logits/chosen": -105054982.4, | |
| "logits/rejected": -86679507.2, | |
| "logps/chosen": -531.07646484375, | |
| "logps/rejected": -626.576318359375, | |
| "loss": 0.2203, | |
| "rewards/chosen": 6.711921691894531, | |
| "rewards/margins": 16.232135009765624, | |
| "rewards/rejected": -9.520213317871093, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.8014329580348005, | |
| "grad_norm": 10.247946491106909, | |
| "kl": 195.1206512451172, | |
| "learning_rate": 4.131759111665348e-07, | |
| "logits/chosen": -104919637.33333333, | |
| "logits/rejected": -98690360.0, | |
| "logps/chosen": -521.9753824869791, | |
| "logps/rejected": -756.753173828125, | |
| "loss": 0.2021, | |
| "rewards/chosen": 5.4654890696207685, | |
| "rewards/margins": 21.19386164347331, | |
| "rewards/rejected": -15.728372573852539, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 1.805527123848516, | |
| "eval_logits/chosen": -95303792.0, | |
| "eval_logits/rejected": -87230504.0, | |
| "eval_logps/chosen": -472.82220458984375, | |
| "eval_logps/rejected": -488.859130859375, | |
| "eval_loss": 0.2824169099330902, | |
| "eval_rewards/chosen": 2.69378662109375, | |
| "eval_rewards/margins": -4.782763957977295, | |
| "eval_rewards/rejected": 7.476550579071045, | |
| "eval_runtime": 2.6295, | |
| "eval_samples_per_second": 3.803, | |
| "eval_steps_per_second": 0.761, | |
| "kl": 0.0, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 1.811668372569089, | |
| "grad_norm": 8.176862722781726, | |
| "kl": 152.9454345703125, | |
| "learning_rate": 4.0731595277598986e-07, | |
| "logits/chosen": -95640564.36363636, | |
| "logits/rejected": -105284501.33333333, | |
| "logps/chosen": -486.55069247159093, | |
| "logps/rejected": -683.3498263888889, | |
| "loss": 0.2283, | |
| "rewards/chosen": 6.601251775568182, | |
| "rewards/margins": 14.320289997139362, | |
| "rewards/rejected": -7.71903822157118, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 1.8219037871033776, | |
| "grad_norm": 12.366163049603129, | |
| "kl": 134.93138122558594, | |
| "learning_rate": 4.0146914903669997e-07, | |
| "logits/chosen": -105663914.66666667, | |
| "logits/rejected": -86276560.0, | |
| "logps/chosen": -558.2360026041666, | |
| "logps/rejected": -618.48046875, | |
| "loss": 0.2848, | |
| "rewards/chosen": 6.694384256998698, | |
| "rewards/margins": 12.43185583750407, | |
| "rewards/rejected": -5.737471580505371, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 1.8321392016376663, | |
| "grad_norm": 21.06542323848456, | |
| "kl": 114.84019470214844, | |
| "learning_rate": 3.9563632978579997e-07, | |
| "logits/chosen": -130333696.0, | |
| "logits/rejected": -118268970.66666667, | |
| "logps/chosen": -678.7927024147727, | |
| "logps/rejected": -766.8365885416666, | |
| "loss": 0.1941, | |
| "rewards/chosen": -1.294362328269265, | |
| "rewards/margins": 13.073830556387852, | |
| "rewards/rejected": -14.368192884657118, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 1.842374616171955, | |
| "grad_norm": 7.508684819898513, | |
| "kl": 144.90069580078125, | |
| "learning_rate": 3.898183228756049e-07, | |
| "logits/chosen": -97050137.6, | |
| "logits/rejected": -90996108.8, | |
| "logps/chosen": -500.40439453125, | |
| "logps/rejected": -676.8037109375, | |
| "loss": 0.2811, | |
| "rewards/chosen": 5.051666259765625, | |
| "rewards/margins": 15.36988525390625, | |
| "rewards/rejected": -10.318218994140626, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.8526100307062436, | |
| "grad_norm": 12.437592526611242, | |
| "kl": 114.08500671386719, | |
| "learning_rate": 3.840159540561134e-07, | |
| "logits/chosen": -105590414.22222222, | |
| "logits/rejected": -89462213.81818181, | |
| "logps/chosen": -513.1369357638889, | |
| "logps/rejected": -584.1775568181819, | |
| "loss": 0.2467, | |
| "rewards/chosen": 3.517538070678711, | |
| "rewards/margins": 8.527439637617633, | |
| "rewards/rejected": -5.009901566938921, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 1.862845445240532, | |
| "grad_norm": 15.091845848621832, | |
| "kl": 47.479251861572266, | |
| "learning_rate": 3.782300468578103e-07, | |
| "logits/chosen": -108857937.45454545, | |
| "logits/rejected": -103325866.66666667, | |
| "logps/chosen": -555.7039240056819, | |
| "logps/rejected": -626.8645833333334, | |
| "loss": 0.2541, | |
| "rewards/chosen": 3.7626831748268823, | |
| "rewards/margins": 11.195115561437126, | |
| "rewards/rejected": -7.432432386610243, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 1.873080859774821, | |
| "grad_norm": 19.183770345291794, | |
| "kl": 41.419944763183594, | |
| "learning_rate": 3.7246142247478035e-07, | |
| "logits/chosen": -101665499.42857143, | |
| "logits/rejected": -109967360.0, | |
| "logps/chosen": -559.6848842075893, | |
| "logps/rejected": -773.8717447916666, | |
| "loss": 0.2448, | |
| "rewards/chosen": 1.5084868839808874, | |
| "rewards/margins": 14.50175648643857, | |
| "rewards/rejected": -12.993269602457682, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 1.8833162743091094, | |
| "grad_norm": 9.069536625547933, | |
| "kl": 45.53041076660156, | |
| "learning_rate": 3.6671089964815825e-07, | |
| "logits/chosen": -108608816.0, | |
| "logits/rejected": -89606421.33333333, | |
| "logps/chosen": -513.4603271484375, | |
| "logps/rejected": -684.734375, | |
| "loss": 0.265, | |
| "rewards/chosen": 4.741620063781738, | |
| "rewards/margins": 17.57877826690674, | |
| "rewards/rejected": -12.837158203125, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 1.8935516888433983, | |
| "grad_norm": 19.468688850390492, | |
| "kl": 90.07597351074219, | |
| "learning_rate": 3.6097929454992404e-07, | |
| "logits/chosen": -106665146.18181819, | |
| "logits/rejected": -86712704.0, | |
| "logps/chosen": -654.1851917613636, | |
| "logps/rejected": -702.7716471354166, | |
| "loss": 0.1897, | |
| "rewards/chosen": 4.631488106467507, | |
| "rewards/margins": 19.50170362838591, | |
| "rewards/rejected": -14.870215521918404, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.9037871033776868, | |
| "grad_norm": 10.66508500138114, | |
| "kl": 153.10336303710938, | |
| "learning_rate": 3.5526742066706316e-07, | |
| "logits/chosen": -99318946.9090909, | |
| "logits/rejected": -107160832.0, | |
| "logps/chosen": -510.36629971590907, | |
| "logps/rejected": -641.9811197916666, | |
| "loss": 0.2336, | |
| "rewards/chosen": 5.530141657049006, | |
| "rewards/margins": 12.735254538179648, | |
| "rewards/rejected": -7.205112881130642, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 1.9140225179119754, | |
| "grad_norm": 8.059119234031746, | |
| "kl": 44.486595153808594, | |
| "learning_rate": 3.4957608868610927e-07, | |
| "logits/chosen": -114195315.2, | |
| "logits/rejected": -108387635.2, | |
| "logps/chosen": -591.734326171875, | |
| "logps/rejected": -637.55859375, | |
| "loss": 0.2205, | |
| "rewards/chosen": 1.337228012084961, | |
| "rewards/margins": 5.68485221862793, | |
| "rewards/rejected": -4.347624206542969, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 1.924257932446264, | |
| "grad_norm": 10.00472400660884, | |
| "kl": 281.92333984375, | |
| "learning_rate": 3.439061063780826e-07, | |
| "logits/chosen": -91237228.3076923, | |
| "logits/rejected": -98920045.71428572, | |
| "logps/chosen": -496.14002403846155, | |
| "logps/rejected": -737.2635323660714, | |
| "loss": 0.2072, | |
| "rewards/chosen": 5.549809382512019, | |
| "rewards/margins": 21.350414066524294, | |
| "rewards/rejected": -15.800604684012276, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 1.9344933469805528, | |
| "grad_norm": 10.522188816412612, | |
| "kl": 322.6961669921875, | |
| "learning_rate": 3.382582784838428e-07, | |
| "logits/chosen": -121360153.6, | |
| "logits/rejected": -107659980.8, | |
| "logps/chosen": -605.14111328125, | |
| "logps/rejected": -678.30458984375, | |
| "loss": 0.2686, | |
| "rewards/chosen": 4.314989471435547, | |
| "rewards/margins": 10.155291748046874, | |
| "rewards/rejected": -5.840302276611328, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 1.9447287615148414, | |
| "grad_norm": 12.824401757150811, | |
| "kl": 382.4858093261719, | |
| "learning_rate": 3.326334065998737e-07, | |
| "logits/chosen": -101046491.42857143, | |
| "logits/rejected": -115594496.0, | |
| "logps/chosen": -531.6218610491071, | |
| "logps/rejected": -693.0041316105769, | |
| "loss": 0.1756, | |
| "rewards/chosen": 6.1677044459751675, | |
| "rewards/margins": 14.67305441217108, | |
| "rewards/rejected": -8.505349966195913, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.9549641760491299, | |
| "grad_norm": 20.386712706894404, | |
| "kl": 99.92483520507812, | |
| "learning_rate": 3.270322890645105e-07, | |
| "logits/chosen": -101339978.66666667, | |
| "logits/rejected": -103863136.0, | |
| "logps/chosen": -533.7759602864584, | |
| "logps/rejected": -736.4990234375, | |
| "loss": 0.2402, | |
| "rewards/chosen": 5.52944819132487, | |
| "rewards/margins": 17.632124582926433, | |
| "rewards/rejected": -12.102676391601562, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 1.9651995905834188, | |
| "grad_norm": 5.038959800127083, | |
| "kl": 48.24281311035156, | |
| "learning_rate": 3.214557208446327e-07, | |
| "logits/chosen": -100319953.45454545, | |
| "logits/rejected": -92343381.33333333, | |
| "logps/chosen": -546.0839399857955, | |
| "logps/rejected": -560.3200412326389, | |
| "loss": 0.2344, | |
| "rewards/chosen": 4.807866876775568, | |
| "rewards/margins": 3.9413422863892835, | |
| "rewards/rejected": 0.8665245903862847, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 1.9754350051177072, | |
| "grad_norm": 6.680434736938146, | |
| "kl": 124.0519790649414, | |
| "learning_rate": 3.159044934228348e-07, | |
| "logits/chosen": -105253174.85714285, | |
| "logits/rejected": -104486037.33333333, | |
| "logps/chosen": -535.1736886160714, | |
| "logps/rejected": -598.3387044270834, | |
| "loss": 0.2006, | |
| "rewards/chosen": 5.359432765415737, | |
| "rewards/margins": 8.449786413283576, | |
| "rewards/rejected": -3.0903536478678384, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 1.9856704196519959, | |
| "grad_norm": 11.230039906447315, | |
| "kl": 122.17435455322266, | |
| "learning_rate": 3.1037939468509e-07, | |
| "logits/chosen": -97059254.85714285, | |
| "logits/rejected": -96700928.0, | |
| "logps/chosen": -553.3533761160714, | |
| "logps/rejected": -575.9772761418269, | |
| "loss": 0.2392, | |
| "rewards/chosen": 5.7837949480329245, | |
| "rewards/margins": 5.070555718390496, | |
| "rewards/rejected": 0.7132392296424279, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 1.9959058341862845, | |
| "grad_norm": 7.4274799670453575, | |
| "kl": 186.48150634765625, | |
| "learning_rate": 3.0488120880892663e-07, | |
| "logits/chosen": -96918192.0, | |
| "logits/rejected": -95595184.0, | |
| "logps/chosen": -471.45294189453125, | |
| "logps/rejected": -576.8538818359375, | |
| "loss": 0.2082, | |
| "rewards/chosen": 5.839811325073242, | |
| "rewards/margins": 13.376245975494385, | |
| "rewards/rejected": -7.536434650421143, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.006141248720573, | |
| "grad_norm": 10.584557120336866, | |
| "kl": 125.15768432617188, | |
| "learning_rate": 2.9941071615212903e-07, | |
| "logits/chosen": -109870899.2, | |
| "logits/rejected": -80665926.4, | |
| "logps/chosen": -596.23798828125, | |
| "logps/rejected": -555.70048828125, | |
| "loss": 0.1868, | |
| "rewards/chosen": 6.416107940673828, | |
| "rewards/margins": 7.968547344207764, | |
| "rewards/rejected": -1.5524394035339355, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 2.016376663254862, | |
| "grad_norm": 8.808290014986497, | |
| "kl": 202.176513671875, | |
| "learning_rate": 2.9396869314198125e-07, | |
| "logits/chosen": -94040345.6, | |
| "logits/rejected": -91740160.0, | |
| "logps/chosen": -494.99248046875, | |
| "logps/rejected": -640.023974609375, | |
| "loss": 0.1113, | |
| "rewards/chosen": 6.723219299316407, | |
| "rewards/margins": 12.369775009155273, | |
| "rewards/rejected": -5.646555709838867, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 2.0266120777891503, | |
| "grad_norm": 7.577887182750462, | |
| "kl": 10.694358825683594, | |
| "learning_rate": 2.8855591216507e-07, | |
| "logits/chosen": -112530150.4, | |
| "logits/rejected": -101618681.6, | |
| "logps/chosen": -533.929443359375, | |
| "logps/rejected": -578.36171875, | |
| "loss": 0.1269, | |
| "rewards/chosen": 5.4773094177246096, | |
| "rewards/margins": 14.136001586914062, | |
| "rewards/rejected": -8.658692169189454, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 2.036847492323439, | |
| "grad_norm": 14.922868038999017, | |
| "kl": 75.09333801269531, | |
| "learning_rate": 2.831731414576576e-07, | |
| "logits/chosen": -107291744.0, | |
| "logits/rejected": -97015648.0, | |
| "logps/chosen": -555.2694091796875, | |
| "logps/rejected": -593.9874674479166, | |
| "loss": 0.1077, | |
| "rewards/chosen": 4.765722274780273, | |
| "rewards/margins": 11.604981740315754, | |
| "rewards/rejected": -6.8392594655354815, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 2.0470829068577276, | |
| "grad_norm": 4.235861378584866, | |
| "kl": 50.54994583129883, | |
| "learning_rate": 2.7782114499664846e-07, | |
| "logits/chosen": -92629866.66666667, | |
| "logits/rejected": -105082845.0909091, | |
| "logps/chosen": -467.57025824652777, | |
| "logps/rejected": -619.3458806818181, | |
| "loss": 0.1028, | |
| "rewards/chosen": 5.671641031901042, | |
| "rewards/margins": 12.007132559111625, | |
| "rewards/rejected": -6.335491527210582, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.0573183213920165, | |
| "grad_norm": 5.227781817757105, | |
| "kl": 108.27552795410156, | |
| "learning_rate": 2.725006823911562e-07, | |
| "logits/chosen": -107582277.81818181, | |
| "logits/rejected": -94774528.0, | |
| "logps/chosen": -517.8961292613636, | |
| "logps/rejected": -567.1627604166666, | |
| "loss": 0.1834, | |
| "rewards/chosen": 6.7398598410866475, | |
| "rewards/margins": 13.737200149382003, | |
| "rewards/rejected": -6.997340308295356, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 2.067553735926305, | |
| "grad_norm": 10.398496742073478, | |
| "kl": 116.98248291015625, | |
| "learning_rate": 2.6721250877469243e-07, | |
| "logits/chosen": -134676784.0, | |
| "logits/rejected": -104658389.33333333, | |
| "logps/chosen": -541.9503173828125, | |
| "logps/rejected": -725.39892578125, | |
| "loss": 0.1362, | |
| "rewards/chosen": 6.3118109703063965, | |
| "rewards/margins": 20.48145945866903, | |
| "rewards/rejected": -14.16964848836263, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 2.0777891504605934, | |
| "grad_norm": 5.107311910263906, | |
| "kl": 44.497169494628906, | |
| "learning_rate": 2.6195737469799194e-07, | |
| "logits/chosen": -116298069.33333333, | |
| "logits/rejected": -106568624.0, | |
| "logps/chosen": -578.7025960286459, | |
| "logps/rejected": -693.4928588867188, | |
| "loss": 0.1087, | |
| "rewards/chosen": 5.359598795572917, | |
| "rewards/margins": 11.766344706217449, | |
| "rewards/rejected": -6.406745910644531, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 2.0880245649948823, | |
| "grad_norm": 10.206921524937323, | |
| "kl": 67.50515747070312, | |
| "learning_rate": 2.567360260224855e-07, | |
| "logits/chosen": -113804869.81818181, | |
| "logits/rejected": -130537272.8888889, | |
| "logps/chosen": -587.5666725852273, | |
| "logps/rejected": -769.2184787326389, | |
| "loss": 0.1701, | |
| "rewards/chosen": 3.0162519975142046, | |
| "rewards/margins": 18.496425744258996, | |
| "rewards/rejected": -15.480173746744791, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 2.0982599795291708, | |
| "grad_norm": 11.147368052934937, | |
| "kl": 40.51292037963867, | |
| "learning_rate": 2.5154920381444025e-07, | |
| "logits/chosen": -132331107.55555555, | |
| "logits/rejected": -110937320.72727273, | |
| "logps/chosen": -654.6213650173611, | |
| "logps/rejected": -708.1742720170455, | |
| "loss": 0.1052, | |
| "rewards/chosen": 5.017095353868273, | |
| "rewards/margins": 18.23353156658134, | |
| "rewards/rejected": -13.216436212713068, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 2.1064483111566017, | |
| "eval_logits/chosen": -104192184.0, | |
| "eval_logits/rejected": -93659376.0, | |
| "eval_logps/chosen": -475.7626953125, | |
| "eval_logps/rejected": -484.2992858886719, | |
| "eval_loss": 0.30316784977912903, | |
| "eval_rewards/chosen": 2.399737596511841, | |
| "eval_rewards/margins": -5.53279709815979, | |
| "eval_rewards/rejected": 7.932534694671631, | |
| "eval_runtime": 2.6298, | |
| "eval_samples_per_second": 3.803, | |
| "eval_steps_per_second": 0.761, | |
| "kl": 0.0, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 2.1084953940634596, | |
| "grad_norm": 12.754723605905042, | |
| "kl": 127.5950927734375, | |
| "learning_rate": 2.463976442397802e-07, | |
| "logits/chosen": -112657896.72727273, | |
| "logits/rejected": -103738140.44444445, | |
| "logps/chosen": -540.5254794034091, | |
| "logps/rejected": -752.6629774305555, | |
| "loss": 0.1219, | |
| "rewards/chosen": 6.787003950639204, | |
| "rewards/margins": 20.9433997568458, | |
| "rewards/rejected": -14.156395806206596, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 2.118730808597748, | |
| "grad_norm": 11.192314383278982, | |
| "kl": 104.99942016601562, | |
| "learning_rate": 2.4128207845960206e-07, | |
| "logits/chosen": -100784049.23076923, | |
| "logits/rejected": -123069549.71428572, | |
| "logps/chosen": -486.64881310096155, | |
| "logps/rejected": -850.0779854910714, | |
| "loss": 0.1127, | |
| "rewards/chosen": 6.771697411170373, | |
| "rewards/margins": 32.08223556686234, | |
| "rewards/rejected": -25.310538155691965, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 2.128966223132037, | |
| "grad_norm": 4.699379149769008, | |
| "kl": 65.97349548339844, | |
| "learning_rate": 2.3620323252640205e-07, | |
| "logits/chosen": -107415344.0, | |
| "logits/rejected": -99578250.66666667, | |
| "logps/chosen": -487.5479431152344, | |
| "logps/rejected": -701.464599609375, | |
| "loss": 0.1361, | |
| "rewards/chosen": 5.433587551116943, | |
| "rewards/margins": 23.49019765853882, | |
| "rewards/rejected": -18.056610107421875, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 2.1392016376663254, | |
| "grad_norm": 6.473056947414698, | |
| "kl": 103.8187255859375, | |
| "learning_rate": 2.3116182728102634e-07, | |
| "logits/chosen": -111746880.0, | |
| "logits/rejected": -113652608.0, | |
| "logps/chosen": -527.4331665039062, | |
| "logps/rejected": -751.0619303385416, | |
| "loss": 0.1384, | |
| "rewards/chosen": 6.121653079986572, | |
| "rewards/margins": 20.22429895401001, | |
| "rewards/rejected": -14.102645874023438, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 2.1494370522006143, | |
| "grad_norm": 10.712281475799484, | |
| "kl": 30.688316345214844, | |
| "learning_rate": 2.2615857825036193e-07, | |
| "logits/chosen": -121882752.0, | |
| "logits/rejected": -131416184.0, | |
| "logps/chosen": -552.6856689453125, | |
| "logps/rejected": -909.7028198242188, | |
| "loss": 0.1264, | |
| "rewards/chosen": 4.177041689554851, | |
| "rewards/margins": 28.032979647318523, | |
| "rewards/rejected": -23.855937957763672, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.1596724667349028, | |
| "grad_norm": 1.8332271817652683, | |
| "kl": 39.04943084716797, | |
| "learning_rate": 2.2119419554578368e-07, | |
| "logits/chosen": -119259346.28571428, | |
| "logits/rejected": -93369898.66666667, | |
| "logps/chosen": -523.9795270647321, | |
| "logps/rejected": -705.1712239583334, | |
| "loss": 0.0919, | |
| "rewards/chosen": 6.490928104945591, | |
| "rewards/margins": 26.213579268682572, | |
| "rewards/rejected": -19.72265116373698, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 2.169907881269191, | |
| "grad_norm": 8.092134429001227, | |
| "kl": 161.7046661376953, | |
| "learning_rate": 2.1626938376236602e-07, | |
| "logits/chosen": -126143765.33333333, | |
| "logits/rejected": -112001426.28571428, | |
| "logps/chosen": -463.4314371744792, | |
| "logps/rejected": -742.9820731026786, | |
| "loss": 0.1189, | |
| "rewards/chosen": 8.039971669514975, | |
| "rewards/margins": 22.077065785725914, | |
| "rewards/rejected": -14.037094116210938, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 2.18014329580348, | |
| "grad_norm": 8.024062355675337, | |
| "kl": 97.97288513183594, | |
| "learning_rate": 2.1138484187888095e-07, | |
| "logits/chosen": -141612469.33333334, | |
| "logits/rejected": -105401728.0, | |
| "logps/chosen": -591.967529296875, | |
| "logps/rejected": -688.3210100446429, | |
| "loss": 0.1071, | |
| "rewards/chosen": 8.986672719319662, | |
| "rewards/margins": 22.672740572974796, | |
| "rewards/rejected": -13.686067853655134, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 2.1903787103377685, | |
| "grad_norm": 15.25176970177958, | |
| "kl": 106.90888977050781, | |
| "learning_rate": 2.0654126315859162e-07, | |
| "logits/chosen": -106417353.14285715, | |
| "logits/rejected": -109585565.53846154, | |
| "logps/chosen": -549.0357840401786, | |
| "logps/rejected": -735.4391526442307, | |
| "loss": 0.1331, | |
| "rewards/chosen": 4.569193158830915, | |
| "rewards/margins": 20.35442100776421, | |
| "rewards/rejected": -15.785227848933292, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 2.2006141248720574, | |
| "grad_norm": 4.424610131492461, | |
| "kl": 66.78227233886719, | |
| "learning_rate": 2.017393350508572e-07, | |
| "logits/chosen": -106947863.27272727, | |
| "logits/rejected": -103769792.0, | |
| "logps/chosen": -504.95876242897725, | |
| "logps/rejected": -635.9586588541666, | |
| "loss": 0.0994, | |
| "rewards/chosen": 5.044010509144176, | |
| "rewards/margins": 11.438200979521781, | |
| "rewards/rejected": -6.3941904703776045, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 2.210849539406346, | |
| "grad_norm": 12.489708937727526, | |
| "kl": 99.99842834472656, | |
| "learning_rate": 1.969797390935643e-07, | |
| "logits/chosen": -111711104.0, | |
| "logits/rejected": -113755212.8, | |
| "logps/chosen": -533.645703125, | |
| "logps/rejected": -773.666796875, | |
| "loss": 0.1523, | |
| "rewards/chosen": 5.977291870117187, | |
| "rewards/margins": 20.843841552734375, | |
| "rewards/rejected": -14.866549682617187, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 2.2210849539406348, | |
| "grad_norm": 4.670440511594177, | |
| "kl": 149.47491455078125, | |
| "learning_rate": 1.9226315081639417e-07, | |
| "logits/chosen": -125969038.22222222, | |
| "logits/rejected": -112787362.9090909, | |
| "logps/chosen": -551.9454752604166, | |
| "logps/rejected": -647.052734375, | |
| "loss": 0.1559, | |
| "rewards/chosen": 8.319302876790365, | |
| "rewards/margins": 19.287110993356414, | |
| "rewards/rejected": -10.96780811656605, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 2.231320368474923, | |
| "grad_norm": 16.031191242535417, | |
| "kl": 60.552860260009766, | |
| "learning_rate": 1.8759023964494663e-07, | |
| "logits/chosen": -118854707.2, | |
| "logits/rejected": -101221004.8, | |
| "logps/chosen": -557.2357421875, | |
| "logps/rejected": -587.901953125, | |
| "loss": 0.1241, | |
| "rewards/chosen": 5.938985824584961, | |
| "rewards/margins": 11.856063842773438, | |
| "rewards/rejected": -5.917078018188477, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 2.241555783009212, | |
| "grad_norm": 12.757199732588536, | |
| "kl": 122.61748504638672, | |
| "learning_rate": 1.8296166880572805e-07, | |
| "logits/chosen": -121700633.6, | |
| "logits/rejected": -103405388.8, | |
| "logps/chosen": -503.8498046875, | |
| "logps/rejected": -634.86533203125, | |
| "loss": 0.1402, | |
| "rewards/chosen": 5.459800338745117, | |
| "rewards/margins": 11.527409744262695, | |
| "rewards/rejected": -6.067609405517578, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 2.2517911975435005, | |
| "grad_norm": 1.807781957810491, | |
| "kl": 54.06723403930664, | |
| "learning_rate": 1.7837809523201885e-07, | |
| "logits/chosen": -118060302.22222222, | |
| "logits/rejected": -99475618.9090909, | |
| "logps/chosen": -570.5062391493055, | |
| "logps/rejected": -619.9396306818181, | |
| "loss": 0.1579, | |
| "rewards/chosen": 4.121121724446614, | |
| "rewards/margins": 14.215212966456559, | |
| "rewards/rejected": -10.094091242009943, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.262026612077789, | |
| "grad_norm": 9.781604616798875, | |
| "kl": 64.63507080078125, | |
| "learning_rate": 1.7384016947063562e-07, | |
| "logits/chosen": -121720960.0, | |
| "logits/rejected": -132511338.66666667, | |
| "logps/chosen": -631.6351841517857, | |
| "logps/rejected": -796.120361328125, | |
| "loss": 0.1219, | |
| "rewards/chosen": 6.886650085449219, | |
| "rewards/margins": 25.14352289835612, | |
| "rewards/rejected": -18.256872812906902, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 2.272262026612078, | |
| "grad_norm": 21.39354262825075, | |
| "kl": 66.86387634277344, | |
| "learning_rate": 1.6934853558959861e-07, | |
| "logits/chosen": -110275956.36363636, | |
| "logits/rejected": -107321664.0, | |
| "logps/chosen": -514.1443093039773, | |
| "logps/rejected": -617.8937717013889, | |
| "loss": 0.159, | |
| "rewards/chosen": 6.370298212224787, | |
| "rewards/margins": 15.696105571708294, | |
| "rewards/rejected": -9.325807359483507, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 2.2824974411463663, | |
| "grad_norm": 22.076841086039263, | |
| "kl": 36.765682220458984, | |
| "learning_rate": 1.6490383108671923e-07, | |
| "logits/chosen": -111057954.9090909, | |
| "logits/rejected": -102580707.55555555, | |
| "logps/chosen": -540.7732599431819, | |
| "logps/rejected": -714.6677517361111, | |
| "loss": 0.1526, | |
| "rewards/chosen": 4.920051574707031, | |
| "rewards/margins": 16.855376349555122, | |
| "rewards/rejected": -11.935324774848091, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 2.292732855680655, | |
| "grad_norm": 17.947879888518305, | |
| "kl": 15.610729217529297, | |
| "learning_rate": 1.605066867991207e-07, | |
| "logits/chosen": -102536045.71428572, | |
| "logits/rejected": -113235712.0, | |
| "logps/chosen": -513.5811941964286, | |
| "logps/rejected": -585.6139322916666, | |
| "loss": 0.1546, | |
| "rewards/chosen": 5.293537139892578, | |
| "rewards/margins": 12.563283284505207, | |
| "rewards/rejected": -7.26974614461263, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 2.3029682702149437, | |
| "grad_norm": 11.405668194591636, | |
| "kl": 12.592327117919922, | |
| "learning_rate": 1.5615772681370154e-07, | |
| "logits/chosen": -121222873.6, | |
| "logits/rejected": -105986884.26666667, | |
| "logps/chosen": -515.24560546875, | |
| "logps/rejected": -681.1936848958334, | |
| "loss": 0.1456, | |
| "rewards/chosen": 4.4692131042480465, | |
| "rewards/margins": 15.419220225016275, | |
| "rewards/rejected": -10.950007120768229, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.313203684749232, | |
| "grad_norm": 10.569983905365673, | |
| "kl": 64.39910888671875, | |
| "learning_rate": 1.5185756837856044e-07, | |
| "logits/chosen": -111442368.0, | |
| "logits/rejected": -115095463.38461539, | |
| "logps/chosen": -488.7227260044643, | |
| "logps/rejected": -654.4423076923077, | |
| "loss": 0.135, | |
| "rewards/chosen": 4.371343885149274, | |
| "rewards/margins": 14.086171789483709, | |
| "rewards/rejected": -9.714827904334435, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 2.323439099283521, | |
| "grad_norm": 3.354324936297212, | |
| "kl": 22.279205322265625, | |
| "learning_rate": 1.4760682181539014e-07, | |
| "logits/chosen": -130732770.46153846, | |
| "logits/rejected": -109831277.71428572, | |
| "logps/chosen": -586.5646033653846, | |
| "logps/rejected": -723.4093191964286, | |
| "loss": 0.1786, | |
| "rewards/chosen": 5.04144521859976, | |
| "rewards/margins": 24.284674675910026, | |
| "rewards/rejected": -19.243229457310267, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 2.3336745138178094, | |
| "grad_norm": 25.305933395761674, | |
| "kl": 39.49127960205078, | |
| "learning_rate": 1.4340609043285352e-07, | |
| "logits/chosen": -116073890.9090909, | |
| "logits/rejected": -128906240.0, | |
| "logps/chosen": -541.4201882102273, | |
| "logps/rejected": -702.7337239583334, | |
| "loss": 0.1394, | |
| "rewards/chosen": 4.314708709716797, | |
| "rewards/margins": 15.29558605617947, | |
| "rewards/rejected": -10.980877346462673, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 2.3439099283520983, | |
| "grad_norm": 7.496154801810936, | |
| "kl": 35.06755065917969, | |
| "learning_rate": 1.392559704409565e-07, | |
| "logits/chosen": -106620288.0, | |
| "logits/rejected": -95783952.0, | |
| "logps/chosen": -490.46142578125, | |
| "logps/rejected": -753.1649169921875, | |
| "loss": 0.1545, | |
| "rewards/chosen": 4.967744827270508, | |
| "rewards/margins": 30.458803176879883, | |
| "rewards/rejected": -25.491058349609375, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 2.3541453428863868, | |
| "grad_norm": 10.112974863426174, | |
| "kl": 45.81691360473633, | |
| "learning_rate": 1.351570508664281e-07, | |
| "logits/chosen": -123060982.15384616, | |
| "logits/rejected": -106164790.85714285, | |
| "logps/chosen": -555.3134014423077, | |
| "logps/rejected": -700.9796316964286, | |
| "loss": 0.0849, | |
| "rewards/chosen": 6.32999772291917, | |
| "rewards/margins": 20.283706916557563, | |
| "rewards/rejected": -13.953709193638392, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.3643807574206757, | |
| "grad_norm": 12.497011915287993, | |
| "kl": 54.70706558227539, | |
| "learning_rate": 1.3110991346911937e-07, | |
| "logits/chosen": -109132339.2, | |
| "logits/rejected": -124739571.2, | |
| "logps/chosen": -497.4130859375, | |
| "logps/rejected": -851.44716796875, | |
| "loss": 0.0718, | |
| "rewards/chosen": 6.165841293334961, | |
| "rewards/margins": 32.20471458435059, | |
| "rewards/rejected": -26.038873291015626, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 2.374616171954964, | |
| "grad_norm": 14.048872684243365, | |
| "kl": 31.685832977294922, | |
| "learning_rate": 1.271151326594352e-07, | |
| "logits/chosen": -113037340.44444445, | |
| "logits/rejected": -119740986.18181819, | |
| "logps/chosen": -560.6264105902778, | |
| "logps/rejected": -703.0914417613636, | |
| "loss": 0.1018, | |
| "rewards/chosen": 4.181118435329861, | |
| "rewards/margins": 16.667807222616794, | |
| "rewards/rejected": -12.486688787286932, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 2.384851586489253, | |
| "grad_norm": 11.284739170383132, | |
| "kl": 81.66230773925781, | |
| "learning_rate": 1.2317327541680644e-07, | |
| "logits/chosen": -116141714.28571428, | |
| "logits/rejected": -117831989.33333333, | |
| "logps/chosen": -503.6156529017857, | |
| "logps/rejected": -756.2096354166666, | |
| "loss": 0.1345, | |
| "rewards/chosen": 6.625304630824497, | |
| "rewards/margins": 17.272850127447217, | |
| "rewards/rejected": -10.64754549662272, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 2.3950870010235414, | |
| "grad_norm": 13.720827077271856, | |
| "kl": 65.03724670410156, | |
| "learning_rate": 1.1928490120922014e-07, | |
| "logits/chosen": -121709582.22222222, | |
| "logits/rejected": -105234408.72727273, | |
| "logps/chosen": -556.2324761284722, | |
| "logps/rejected": -708.0031516335227, | |
| "loss": 0.1302, | |
| "rewards/chosen": 4.338587866889106, | |
| "rewards/margins": 17.573104357478595, | |
| "rewards/rejected": -13.234516490589488, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 2.40532241555783, | |
| "grad_norm": 5.575592393255063, | |
| "kl": 9.638999938964844, | |
| "learning_rate": 1.1545056191381381e-07, | |
| "logits/chosen": -93364021.33333333, | |
| "logits/rejected": -152067344.0, | |
| "logps/chosen": -450.686279296875, | |
| "logps/rejected": -931.534912109375, | |
| "loss": 0.1371, | |
| "rewards/chosen": 5.051417350769043, | |
| "rewards/margins": 26.019991874694824, | |
| "rewards/rejected": -20.96857452392578, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 2.407369498464688, | |
| "eval_logits/chosen": -111141848.0, | |
| "eval_logits/rejected": -99338928.0, | |
| "eval_logps/chosen": -484.7994079589844, | |
| "eval_logps/rejected": -496.7482604980469, | |
| "eval_loss": 0.28797829151153564, | |
| "eval_rewards/chosen": 1.4960663318634033, | |
| "eval_rewards/margins": -5.191570997238159, | |
| "eval_rewards/rejected": 6.6876373291015625, | |
| "eval_runtime": 2.6674, | |
| "eval_samples_per_second": 3.749, | |
| "eval_steps_per_second": 0.75, | |
| "kl": 0.0, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 2.4155578300921188, | |
| "grad_norm": 10.829951404513368, | |
| "kl": 122.53065490722656, | |
| "learning_rate": 1.1167080173854682e-07, | |
| "logits/chosen": -103706450.28571428, | |
| "logits/rejected": -114846109.53846154, | |
| "logps/chosen": -502.20877511160717, | |
| "logps/rejected": -735.6909555288462, | |
| "loss": 0.1348, | |
| "rewards/chosen": 6.153853280203683, | |
| "rewards/margins": 23.16724580198854, | |
| "rewards/rejected": -17.013392521784855, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 2.425793244626407, | |
| "grad_norm": 15.679831732247864, | |
| "kl": 102.68487548828125, | |
| "learning_rate": 1.0794615714496174e-07, | |
| "logits/chosen": -113436697.6, | |
| "logits/rejected": -126385536.0, | |
| "logps/chosen": -537.893603515625, | |
| "logps/rejected": -781.82001953125, | |
| "loss": 0.1383, | |
| "rewards/chosen": 6.26806869506836, | |
| "rewards/margins": 16.49968566894531, | |
| "rewards/rejected": -10.231616973876953, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 2.436028659160696, | |
| "grad_norm": 14.38159184226354, | |
| "kl": 82.27883911132812, | |
| "learning_rate": 1.042771567720438e-07, | |
| "logits/chosen": -112610340.57142857, | |
| "logits/rejected": -105989385.84615384, | |
| "logps/chosen": -570.0341099330357, | |
| "logps/rejected": -723.4311899038462, | |
| "loss": 0.1201, | |
| "rewards/chosen": 5.978327069963727, | |
| "rewards/margins": 21.12158479795351, | |
| "rewards/rejected": -15.143257727989784, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 2.4462640736949846, | |
| "grad_norm": 0.6965533738726798, | |
| "kl": 99.01249694824219, | |
| "learning_rate": 1.0066432136119124e-07, | |
| "logits/chosen": -110192921.6, | |
| "logits/rejected": -127216371.2, | |
| "logps/chosen": -529.600537109375, | |
| "logps/rejected": -701.580419921875, | |
| "loss": 0.116, | |
| "rewards/chosen": 4.8227394104003904, | |
| "rewards/margins": 16.614155578613282, | |
| "rewards/rejected": -11.79141616821289, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 2.4564994882292734, | |
| "grad_norm": 15.536905841649384, | |
| "kl": 37.292198181152344, | |
| "learning_rate": 9.710816368230718e-08, | |
| "logits/chosen": -124482245.81818181, | |
| "logits/rejected": -94917866.66666667, | |
| "logps/chosen": -550.3764204545455, | |
| "logps/rejected": -566.4765082465278, | |
| "loss": 0.1288, | |
| "rewards/chosen": 6.663654674183238, | |
| "rewards/margins": 15.713337291370738, | |
| "rewards/rejected": -9.0496826171875, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.466734902763562, | |
| "grad_norm": 6.068120588196036, | |
| "kl": 35.238346099853516, | |
| "learning_rate": 9.360918846102056e-08, | |
| "logits/chosen": -117002346.66666667, | |
| "logits/rejected": -113970642.28571428, | |
| "logps/chosen": -617.9042154947916, | |
| "logps/rejected": -671.12451171875, | |
| "loss": 0.1702, | |
| "rewards/chosen": 5.355202356974284, | |
| "rewards/margins": 11.924032574608212, | |
| "rewards/rejected": -6.568830217633929, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 2.4769703172978508, | |
| "grad_norm": 16.413330514370553, | |
| "kl": 38.19875717163086, | |
| "learning_rate": 9.016789230705218e-08, | |
| "logits/chosen": -121840032.0, | |
| "logits/rejected": -112829514.66666667, | |
| "logps/chosen": -500.3292541503906, | |
| "logps/rejected": -700.7330729166666, | |
| "loss": 0.127, | |
| "rewards/chosen": 6.467748641967773, | |
| "rewards/margins": 23.09494972229004, | |
| "rewards/rejected": -16.627201080322266, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 2.487205731832139, | |
| "grad_norm": 13.868930669170362, | |
| "kl": 4.333488464355469, | |
| "learning_rate": 8.678476364372967e-08, | |
| "logits/chosen": -133496746.66666667, | |
| "logits/rejected": -99632568.0, | |
| "logps/chosen": -603.1192220052084, | |
| "logps/rejected": -568.6356201171875, | |
| "loss": 0.1184, | |
| "rewards/chosen": 3.938859303792318, | |
| "rewards/margins": 12.045438130696615, | |
| "rewards/rejected": -8.106578826904297, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 2.4974411463664277, | |
| "grad_norm": 12.860085633024877, | |
| "kl": 46.0883903503418, | |
| "learning_rate": 8.346028263866606e-08, | |
| "logits/chosen": -107161856.0, | |
| "logits/rejected": -118059622.4, | |
| "logps/chosen": -491.523388671875, | |
| "logps/rejected": -718.28203125, | |
| "loss": 0.11, | |
| "rewards/chosen": 4.71644172668457, | |
| "rewards/margins": 19.74509162902832, | |
| "rewards/rejected": -15.02864990234375, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 2.5076765609007166, | |
| "grad_norm": 14.911278979967584, | |
| "kl": 10.185958862304688, | |
| "learning_rate": 8.019492113560938e-08, | |
| "logits/chosen": -114439645.86666666, | |
| "logits/rejected": -130004211.2, | |
| "logps/chosen": -514.1682291666667, | |
| "logps/rejected": -731.38642578125, | |
| "loss": 0.1235, | |
| "rewards/chosen": 5.1857854207356775, | |
| "rewards/margins": 20.735327657063802, | |
| "rewards/rejected": -15.549542236328126, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 2.517911975435005, | |
| "grad_norm": 10.345603971630045, | |
| "kl": 43.71831130981445, | |
| "learning_rate": 7.698914258747392e-08, | |
| "logits/chosen": -97093056.0, | |
| "logits/rejected": -104960490.66666667, | |
| "logps/chosen": -479.16485595703125, | |
| "logps/rejected": -591.1146647135416, | |
| "loss": 0.1591, | |
| "rewards/chosen": 5.103046417236328, | |
| "rewards/margins": 6.8243058522542315, | |
| "rewards/rejected": -1.7212594350179036, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 2.528147389969294, | |
| "grad_norm": 8.021925150938095, | |
| "kl": 102.18989562988281, | |
| "learning_rate": 7.384340199056216e-08, | |
| "logits/chosen": -133009488.0, | |
| "logits/rejected": -111899104.0, | |
| "logps/chosen": -588.400146484375, | |
| "logps/rejected": -699.8717447916666, | |
| "loss": 0.1252, | |
| "rewards/chosen": 4.981169700622559, | |
| "rewards/margins": 16.865241050720215, | |
| "rewards/rejected": -11.884071350097656, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 2.5383828045035823, | |
| "grad_norm": 14.884330124927732, | |
| "kl": 105.43118286132812, | |
| "learning_rate": 7.07581458199879e-08, | |
| "logits/chosen": -111587126.85714285, | |
| "logits/rejected": -114068164.92307693, | |
| "logps/chosen": -524.8841378348214, | |
| "logps/rejected": -845.0369591346154, | |
| "loss": 0.0733, | |
| "rewards/chosen": 5.159061431884766, | |
| "rewards/margins": 30.266236818753757, | |
| "rewards/rejected": -25.10717538686899, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 2.548618219037871, | |
| "grad_norm": 9.74651017994561, | |
| "kl": 117.86248779296875, | |
| "learning_rate": 6.773381196630656e-08, | |
| "logits/chosen": -121217838.54545455, | |
| "logits/rejected": -122516280.8888889, | |
| "logps/chosen": -530.4308860085227, | |
| "logps/rejected": -772.5392795138889, | |
| "loss": 0.1062, | |
| "rewards/chosen": 7.943235917524858, | |
| "rewards/margins": 24.32046030988597, | |
| "rewards/rejected": -16.37722439236111, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 2.5588536335721597, | |
| "grad_norm": 11.454867890880834, | |
| "kl": 32.92910385131836, | |
| "learning_rate": 6.477082967336689e-08, | |
| "logits/chosen": -123549474.13333334, | |
| "logits/rejected": -121342182.4, | |
| "logps/chosen": -668.5967447916667, | |
| "logps/rejected": -656.6751953125, | |
| "loss": 0.1401, | |
| "rewards/chosen": 2.778960418701172, | |
| "rewards/margins": 7.843721389770508, | |
| "rewards/rejected": -5.064760971069336, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.5690890481064486, | |
| "grad_norm": 11.646385280367278, | |
| "kl": 101.4727783203125, | |
| "learning_rate": 6.186961947738739e-08, | |
| "logits/chosen": -108983193.6, | |
| "logits/rejected": -121287667.2, | |
| "logps/chosen": -504.491748046875, | |
| "logps/rejected": -775.789453125, | |
| "loss": 0.1439, | |
| "rewards/chosen": 5.482083511352539, | |
| "rewards/margins": 27.428909683227538, | |
| "rewards/rejected": -21.946826171875, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 2.579324462640737, | |
| "grad_norm": 7.859714543172971, | |
| "kl": 45.51924133300781, | |
| "learning_rate": 5.903059314726988e-08, | |
| "logits/chosen": -106612451.55555555, | |
| "logits/rejected": -140557684.36363637, | |
| "logps/chosen": -491.76817491319446, | |
| "logps/rejected": -827.0592151988636, | |
| "loss": 0.1053, | |
| "rewards/chosen": 6.706490834554036, | |
| "rewards/margins": 26.80151286269679, | |
| "rewards/rejected": -20.095022028142754, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 2.5895598771750254, | |
| "grad_norm": 4.937490929049706, | |
| "kl": 10.572738647460938, | |
| "learning_rate": 5.625415362615721e-08, | |
| "logits/chosen": -116113109.33333333, | |
| "logits/rejected": -102797296.0, | |
| "logps/chosen": -558.3600260416666, | |
| "logps/rejected": -646.0928955078125, | |
| "loss": 0.1756, | |
| "rewards/chosen": 3.8815214369032116, | |
| "rewards/margins": 7.237435552808973, | |
| "rewards/rejected": -3.3559141159057617, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 2.5997952917093143, | |
| "grad_norm": 14.37678972130025, | |
| "kl": 63.45878601074219, | |
| "learning_rate": 5.354069497424335e-08, | |
| "logits/chosen": -128418880.0, | |
| "logits/rejected": -113965074.28571428, | |
| "logps/chosen": -584.1385498046875, | |
| "logps/rejected": -602.3302176339286, | |
| "loss": 0.1687, | |
| "rewards/chosen": 6.9789072672526045, | |
| "rewards/margins": 13.387665158226376, | |
| "rewards/rejected": -6.408757890973773, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 2.610030706243603, | |
| "grad_norm": 10.459219539605776, | |
| "kl": 67.96239471435547, | |
| "learning_rate": 5.089060231284453e-08, | |
| "logits/chosen": -125863469.71428572, | |
| "logits/rejected": -103395268.92307693, | |
| "logps/chosen": -521.3954380580357, | |
| "logps/rejected": -616.2382061298077, | |
| "loss": 0.1155, | |
| "rewards/chosen": 5.0010577610560825, | |
| "rewards/margins": 12.883576235928377, | |
| "rewards/rejected": -7.882518474872295, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 2.6202661207778917, | |
| "grad_norm": 3.7716822556149108, | |
| "kl": 13.6009521484375, | |
| "learning_rate": 4.830425176973918e-08, | |
| "logits/chosen": -115673611.63636364, | |
| "logits/rejected": -123093077.33333333, | |
| "logps/chosen": -535.6490589488636, | |
| "logps/rejected": -707.8030056423611, | |
| "loss": 0.1083, | |
| "rewards/chosen": 3.2927263433283027, | |
| "rewards/margins": 18.8191172667224, | |
| "rewards/rejected": -15.526390923394096, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 2.63050153531218, | |
| "grad_norm": 11.540542432175807, | |
| "kl": 49.550472259521484, | |
| "learning_rate": 4.578201042578317e-08, | |
| "logits/chosen": -127875677.0909091, | |
| "logits/rejected": -106003847.1111111, | |
| "logps/chosen": -603.5329367897727, | |
| "logps/rejected": -758.7048611111111, | |
| "loss": 0.1564, | |
| "rewards/chosen": 5.201872045343572, | |
| "rewards/margins": 21.771740345039753, | |
| "rewards/rejected": -16.569868299696182, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 2.6407369498464686, | |
| "grad_norm": 15.048238698409907, | |
| "kl": 65.9299087524414, | |
| "learning_rate": 4.3324236262811395e-08, | |
| "logits/chosen": -112790442.66666667, | |
| "logits/rejected": -142388864.0, | |
| "logps/chosen": -601.592041015625, | |
| "logps/rejected": -929.5491333007812, | |
| "loss": 0.1373, | |
| "rewards/chosen": 1.01103679339091, | |
| "rewards/margins": 23.71454707781474, | |
| "rewards/rejected": -22.703510284423828, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 2.6509723643807575, | |
| "grad_norm": 14.536384887849032, | |
| "kl": 79.07972717285156, | |
| "learning_rate": 4.0931278112828203e-08, | |
| "logits/chosen": -124037196.8, | |
| "logits/rejected": -107391513.6, | |
| "logps/chosen": -587.8376953125, | |
| "logps/rejected": -705.137939453125, | |
| "loss": 0.1315, | |
| "rewards/chosen": 4.753457641601562, | |
| "rewards/margins": 18.607870483398436, | |
| "rewards/rejected": -13.854412841796876, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 2.661207778915046, | |
| "grad_norm": 11.239515091047844, | |
| "kl": 61.44083786010742, | |
| "learning_rate": 3.860347560849836e-08, | |
| "logits/chosen": -123588676.92307693, | |
| "logits/rejected": -121233179.42857143, | |
| "logps/chosen": -569.4657451923077, | |
| "logps/rejected": -808.5862165178571, | |
| "loss": 0.0885, | |
| "rewards/chosen": 6.6277606670673075, | |
| "rewards/margins": 27.442394717709046, | |
| "rewards/rejected": -20.81463405064174, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 2.671443193449335, | |
| "grad_norm": 17.077056073007693, | |
| "kl": 24.01074981689453, | |
| "learning_rate": 3.634115913494257e-08, | |
| "logits/chosen": -128678567.38461539, | |
| "logits/rejected": -114697645.71428572, | |
| "logps/chosen": -596.2249474158654, | |
| "logps/rejected": -722.2978515625, | |
| "loss": 0.1256, | |
| "rewards/chosen": 6.063357426570012, | |
| "rewards/margins": 14.008555485652042, | |
| "rewards/rejected": -7.945198059082031, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 2.6816786079836232, | |
| "grad_norm": 7.060231943175802, | |
| "kl": 2.9792327880859375, | |
| "learning_rate": 3.414464978284609e-08, | |
| "logits/chosen": -123749196.8, | |
| "logits/rejected": -113550476.8, | |
| "logps/chosen": -564.08203125, | |
| "logps/rejected": -686.4732421875, | |
| "loss": 0.1022, | |
| "rewards/chosen": 4.978203582763672, | |
| "rewards/margins": 14.521561431884766, | |
| "rewards/rejected": -9.543357849121094, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 2.691914022517912, | |
| "grad_norm": 14.640691296223196, | |
| "kl": 102.84552001953125, | |
| "learning_rate": 3.201425930288648e-08, | |
| "logits/chosen": -104803337.14285715, | |
| "logits/rejected": -97529668.92307693, | |
| "logps/chosen": -538.1909528459821, | |
| "logps/rejected": -575.7629206730769, | |
| "loss": 0.1412, | |
| "rewards/chosen": 5.508868081229074, | |
| "rewards/margins": 13.19678711105179, | |
| "rewards/rejected": -7.687919029822717, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 2.7021494370522006, | |
| "grad_norm": 6.935672879419478, | |
| "kl": 4.456298828125, | |
| "learning_rate": 2.995029006148631e-08, | |
| "logits/chosen": -114078361.6, | |
| "logits/rejected": -108900300.8, | |
| "logps/chosen": -508.643212890625, | |
| "logps/rejected": -690.940185546875, | |
| "loss": 0.1285, | |
| "rewards/chosen": 4.7735595703125, | |
| "rewards/margins": 19.15091552734375, | |
| "rewards/rejected": -14.37735595703125, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.7082906857727735, | |
| "eval_logits/chosen": -114141008.0, | |
| "eval_logits/rejected": -101458704.0, | |
| "eval_logps/chosen": -486.99053955078125, | |
| "eval_logps/rejected": -501.12237548828125, | |
| "eval_loss": 0.31646159291267395, | |
| "eval_rewards/chosen": 1.276953101158142, | |
| "eval_rewards/margins": -4.973272919654846, | |
| "eval_rewards/rejected": 6.250226020812988, | |
| "eval_runtime": 2.6383, | |
| "eval_samples_per_second": 3.79, | |
| "eval_steps_per_second": 0.758, | |
| "kl": 0.0, | |
| "step": 2646 | |
| }, | |
| { | |
| "epoch": 2.7123848515864895, | |
| "grad_norm": 10.887928094851356, | |
| "kl": 8.261627197265625, | |
| "learning_rate": 2.795303499789864e-08, | |
| "logits/chosen": -116870853.81818181, | |
| "logits/rejected": -107267861.33333333, | |
| "logps/chosen": -509.50319602272725, | |
| "logps/rejected": -655.4784071180555, | |
| "loss": 0.0932, | |
| "rewards/chosen": 4.065437316894531, | |
| "rewards/margins": 13.481632656521267, | |
| "rewards/rejected": -9.416195339626736, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 2.722620266120778, | |
| "grad_norm": 1.6419687975273198, | |
| "kl": 35.763301849365234, | |
| "learning_rate": 2.6022777582630384e-08, | |
| "logits/chosen": -112244152.8888889, | |
| "logits/rejected": -112315066.18181819, | |
| "logps/chosen": -522.056640625, | |
| "logps/rejected": -778.3561789772727, | |
| "loss": 0.132, | |
| "rewards/chosen": 5.681062486436632, | |
| "rewards/margins": 27.134175001972856, | |
| "rewards/rejected": -21.453112515536223, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 2.7328556806550663, | |
| "grad_norm": 9.109659129909929, | |
| "kl": 69.41864013671875, | |
| "learning_rate": 2.4159791777208728e-08, | |
| "logits/chosen": -113096832.0, | |
| "logits/rejected": -117938201.6, | |
| "logps/chosen": -536.3591796875, | |
| "logps/rejected": -802.1228515625, | |
| "loss": 0.1411, | |
| "rewards/chosen": 5.3816673278808596, | |
| "rewards/margins": 26.51123580932617, | |
| "rewards/rejected": -21.12956848144531, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 2.7430910951893552, | |
| "grad_norm": 2.540946323619725, | |
| "kl": 104.94831848144531, | |
| "learning_rate": 2.236434199529813e-08, | |
| "logits/chosen": -113226342.4, | |
| "logits/rejected": -109490892.8, | |
| "logps/chosen": -558.754833984375, | |
| "logps/rejected": -850.9609375, | |
| "loss": 0.1044, | |
| "rewards/chosen": 4.071189117431641, | |
| "rewards/margins": 33.206589508056645, | |
| "rewards/rejected": -29.135400390625, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 2.7533265097236437, | |
| "grad_norm": 4.993767586326576, | |
| "kl": 71.74156188964844, | |
| "learning_rate": 2.063668306517197e-08, | |
| "logits/chosen": -120991760.0, | |
| "logits/rejected": -126745472.0, | |
| "logps/chosen": -594.7095947265625, | |
| "logps/rejected": -762.1270345052084, | |
| "loss": 0.1218, | |
| "rewards/chosen": 3.4017386436462402, | |
| "rewards/margins": 19.160385290781655, | |
| "rewards/rejected": -15.758646647135416, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 2.7635619242579326, | |
| "grad_norm": 10.093784018688082, | |
| "kl": 109.51588439941406, | |
| "learning_rate": 1.897706019354478e-08, | |
| "logits/chosen": -111541806.54545455, | |
| "logits/rejected": -112799928.8888889, | |
| "logps/chosen": -518.2110262784091, | |
| "logps/rejected": -666.8862847222222, | |
| "loss": 0.1316, | |
| "rewards/chosen": 6.8094329833984375, | |
| "rewards/margins": 24.927536010742188, | |
| "rewards/rejected": -18.11810302734375, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 2.773797338792221, | |
| "grad_norm": 6.280834542580219, | |
| "kl": 58.57270050048828, | |
| "learning_rate": 1.7385708930770294e-08, | |
| "logits/chosen": -116748681.14285715, | |
| "logits/rejected": -133647261.53846154, | |
| "logps/chosen": -565.2002999441964, | |
| "logps/rejected": -752.8988882211538, | |
| "loss": 0.104, | |
| "rewards/chosen": 7.381816319056919, | |
| "rewards/margins": 21.23567702744033, | |
| "rewards/rejected": -13.853860708383413, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 2.7840327533265095, | |
| "grad_norm": 8.201048256790347, | |
| "kl": 107.66532897949219, | |
| "learning_rate": 1.5862855137409203e-08, | |
| "logits/chosen": -115046613.33333333, | |
| "logits/rejected": -128310921.14285715, | |
| "logps/chosen": -652.1544596354166, | |
| "logps/rejected": -716.2571149553571, | |
| "loss": 0.1398, | |
| "rewards/chosen": 0.37717580795288086, | |
| "rewards/margins": 6.230465275900705, | |
| "rewards/rejected": -5.853289467947824, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 2.7942681678607983, | |
| "grad_norm": 13.370009278983177, | |
| "kl": 48.944252014160156, | |
| "learning_rate": 1.4408714952173162e-08, | |
| "logits/chosen": -105869802.66666667, | |
| "logits/rejected": -128118326.85714285, | |
| "logps/chosen": -429.5232747395833, | |
| "logps/rejected": -792.2711356026786, | |
| "loss": 0.1207, | |
| "rewards/chosen": 6.8134206136067705, | |
| "rewards/margins": 18.571839105515252, | |
| "rewards/rejected": -11.758418491908483, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 2.8045035823950872, | |
| "grad_norm": 14.217809187421249, | |
| "kl": 5.711326599121094, | |
| "learning_rate": 1.3023494761248422e-08, | |
| "logits/chosen": -137282560.0, | |
| "logits/rejected": -104739466.66666667, | |
| "logps/chosen": -653.2716064453125, | |
| "logps/rejected": -704.67724609375, | |
| "loss": 0.1089, | |
| "rewards/chosen": 6.01470947265625, | |
| "rewards/margins": 23.953670501708984, | |
| "rewards/rejected": -17.938961029052734, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 2.8147389969293757, | |
| "grad_norm": 19.336026642511733, | |
| "kl": 2.7219467163085938, | |
| "learning_rate": 1.1707391169002767e-08, | |
| "logits/chosen": -134308893.53846154, | |
| "logits/rejected": -110243748.57142857, | |
| "logps/chosen": -623.1481370192307, | |
| "logps/rejected": -802.8217075892857, | |
| "loss": 0.1541, | |
| "rewards/chosen": 2.4775123596191406, | |
| "rewards/margins": 22.423845018659318, | |
| "rewards/rejected": -19.946332659040177, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 2.824974411463664, | |
| "grad_norm": 8.72024018742674, | |
| "kl": 154.34445190429688, | |
| "learning_rate": 1.0460590970082062e-08, | |
| "logits/chosen": -115301104.0, | |
| "logits/rejected": -118230410.66666667, | |
| "logps/chosen": -532.9121704101562, | |
| "logps/rejected": -702.4969075520834, | |
| "loss": 0.1176, | |
| "rewards/chosen": 4.131191730499268, | |
| "rewards/margins": 18.579192320505776, | |
| "rewards/rejected": -14.44800059000651, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 2.835209825997953, | |
| "grad_norm": 20.017577796935008, | |
| "kl": 86.5708999633789, | |
| "learning_rate": 9.283271122898172e-09, | |
| "logits/chosen": -125055464.72727273, | |
| "logits/rejected": -112441578.66666667, | |
| "logps/chosen": -558.66748046875, | |
| "logps/rejected": -690.9308810763889, | |
| "loss": 0.138, | |
| "rewards/chosen": 5.285506855357777, | |
| "rewards/margins": 12.411841151690243, | |
| "rewards/rejected": -7.126334296332465, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 2.8454452405322415, | |
| "grad_norm": 5.245051575737728, | |
| "kl": 35.114925384521484, | |
| "learning_rate": 8.175598724513234e-09, | |
| "logits/chosen": -109534720.0, | |
| "logits/rejected": -102300536.8888889, | |
| "logps/chosen": -508.27436967329544, | |
| "logps/rejected": -593.6958550347222, | |
| "loss": 0.1155, | |
| "rewards/chosen": 6.462843461470171, | |
| "rewards/margins": 12.150792748037011, | |
| "rewards/rejected": -5.68794928656684, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 2.8556806550665303, | |
| "grad_norm": 7.518000103434843, | |
| "kl": 38.176666259765625, | |
| "learning_rate": 7.137730986923829e-09, | |
| "logits/chosen": -124864187.07692307, | |
| "logits/rejected": -117530971.42857143, | |
| "logps/chosen": -513.7694936899038, | |
| "logps/rejected": -756.7769252232143, | |
| "loss": 0.105, | |
| "rewards/chosen": 5.097284170297476, | |
| "rewards/margins": 19.500482412484978, | |
| "rewards/rejected": -14.4031982421875, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 2.865916069600819, | |
| "grad_norm": 7.323346052279645, | |
| "kl": 82.49562072753906, | |
| "learning_rate": 6.1698152147475755e-09, | |
| "logits/chosen": -126298890.66666667, | |
| "logits/rejected": -132990744.0, | |
| "logps/chosen": -573.87841796875, | |
| "logps/rejected": -943.2960205078125, | |
| "loss": 0.1373, | |
| "rewards/chosen": 5.581108729044597, | |
| "rewards/margins": 32.77851931254069, | |
| "rewards/rejected": -27.197410583496094, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 2.8761514841351072, | |
| "grad_norm": 10.36267978448859, | |
| "kl": 61.939781188964844, | |
| "learning_rate": 5.271988784316172e-09, | |
| "logits/chosen": -123898256.0, | |
| "logits/rejected": -113120874.66666667, | |
| "logps/chosen": -540.285888671875, | |
| "logps/rejected": -723.02099609375, | |
| "loss": 0.1174, | |
| "rewards/chosen": 6.676476955413818, | |
| "rewards/margins": 20.52393356959025, | |
| "rewards/rejected": -13.847456614176432, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 2.886386898669396, | |
| "grad_norm": 13.425727534613003, | |
| "kl": 77.56233215332031, | |
| "learning_rate": 4.444379124178055e-09, | |
| "logits/chosen": -122654720.0, | |
| "logits/rejected": -109599568.0, | |
| "logps/chosen": -599.6444905598959, | |
| "logps/rejected": -687.8331298828125, | |
| "loss": 0.168, | |
| "rewards/chosen": 0.1978003184000651, | |
| "rewards/margins": 3.7757269541422525, | |
| "rewards/rejected": -3.5779266357421875, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 2.896622313203685, | |
| "grad_norm": 15.4066210292021, | |
| "kl": 84.69986724853516, | |
| "learning_rate": 3.6871036970116952e-09, | |
| "logits/chosen": -146604400.0, | |
| "logits/rejected": -113729952.0, | |
| "logps/chosen": -643.28515625, | |
| "logps/rejected": -616.5157877604166, | |
| "loss": 0.1298, | |
| "rewards/chosen": 4.322854995727539, | |
| "rewards/margins": 12.591309229532877, | |
| "rewards/rejected": -8.268454233805338, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 2.9068577277379735, | |
| "grad_norm": 9.219603587639986, | |
| "kl": 79.7108154296875, | |
| "learning_rate": 3.000269982954773e-09, | |
| "logits/chosen": -115262836.36363636, | |
| "logits/rejected": -134471665.7777778, | |
| "logps/chosen": -507.20157137784093, | |
| "logps/rejected": -831.7874348958334, | |
| "loss": 0.1479, | |
| "rewards/chosen": 3.993692571466619, | |
| "rewards/margins": 21.894585041084675, | |
| "rewards/rejected": -17.900892469618057, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 2.917093142272262, | |
| "grad_norm": 16.91311490037144, | |
| "kl": 77.60739135742188, | |
| "learning_rate": 2.3839754643491526e-09, | |
| "logits/chosen": -112859029.33333333, | |
| "logits/rejected": -115164448.0, | |
| "logps/chosen": -526.4039306640625, | |
| "logps/rejected": -760.6889038085938, | |
| "loss": 0.1223, | |
| "rewards/chosen": 4.538237889607747, | |
| "rewards/margins": 15.784871419270832, | |
| "rewards/rejected": -11.246633529663086, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 2.927328556806551, | |
| "grad_norm": 8.339427757770848, | |
| "kl": 5.040294647216797, | |
| "learning_rate": 1.838307611905343e-09, | |
| "logits/chosen": -115087931.07692307, | |
| "logits/rejected": -111349010.28571428, | |
| "logps/chosen": -522.8479567307693, | |
| "logps/rejected": -736.3690011160714, | |
| "loss": 0.1385, | |
| "rewards/chosen": 3.7734222412109375, | |
| "rewards/margins": 20.419662475585938, | |
| "rewards/rejected": -16.646240234375, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 2.9375639713408392, | |
| "grad_norm": 2.308264911894697, | |
| "kl": 47.0976676940918, | |
| "learning_rate": 1.3633438722877033e-09, | |
| "logits/chosen": -123765944.0, | |
| "logits/rejected": -125191381.33333333, | |
| "logps/chosen": -528.5211181640625, | |
| "logps/rejected": -757.469482421875, | |
| "loss": 0.1018, | |
| "rewards/chosen": 5.355100631713867, | |
| "rewards/margins": 23.6528263092041, | |
| "rewards/rejected": -18.297725677490234, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 2.947799385875128, | |
| "grad_norm": 10.925389055878155, | |
| "kl": 47.42646408081055, | |
| "learning_rate": 9.5915165712257e-10, | |
| "logits/chosen": -115053260.8, | |
| "logits/rejected": -94018624.0, | |
| "logps/chosen": -509.125390625, | |
| "logps/rejected": -618.67490234375, | |
| "loss": 0.1529, | |
| "rewards/chosen": 5.673241424560547, | |
| "rewards/margins": 16.379257202148438, | |
| "rewards/rejected": -10.70601577758789, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 2.9580348004094166, | |
| "grad_norm": 12.802240055553504, | |
| "kl": 80.9378662109375, | |
| "learning_rate": 6.257883334302994e-10, | |
| "logits/chosen": -112381975.27272727, | |
| "logits/rejected": -109818197.33333333, | |
| "logps/chosen": -535.2524857954545, | |
| "logps/rejected": -599.8187391493055, | |
| "loss": 0.1487, | |
| "rewards/chosen": 5.386872725053267, | |
| "rewards/margins": 6.529768317636817, | |
| "rewards/rejected": -1.1428955925835504, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 2.968270214943705, | |
| "grad_norm": 11.03345507942441, | |
| "kl": 18.87073516845703, | |
| "learning_rate": 3.6330121548344696e-10, | |
| "logits/chosen": -119768729.6, | |
| "logits/rejected": -113732620.8, | |
| "logps/chosen": -533.147265625, | |
| "logps/rejected": -648.48359375, | |
| "loss": 0.1007, | |
| "rewards/chosen": 5.657349395751953, | |
| "rewards/margins": 18.19538269042969, | |
| "rewards/rejected": -12.538033294677735, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 2.978505629477994, | |
| "grad_norm": 5.957476587134599, | |
| "kl": 15.592277526855469, | |
| "learning_rate": 1.7172755809119476e-10, | |
| "logits/chosen": -118812183.27272727, | |
| "logits/rejected": -118075306.66666667, | |
| "logps/chosen": -516.0139382102273, | |
| "logps/rejected": -703.6335720486111, | |
| "loss": 0.1288, | |
| "rewards/chosen": 5.660085851495916, | |
| "rewards/margins": 22.83608072454279, | |
| "rewards/rejected": -17.175994873046875, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 2.9887410440122824, | |
| "grad_norm": 8.59267719952877, | |
| "kl": 68.4888687133789, | |
| "learning_rate": 5.10945513118588e-11, | |
| "logits/chosen": -133802453.33333333, | |
| "logits/rejected": -118769832.0, | |
| "logps/chosen": -579.6878662109375, | |
| "logps/rejected": -798.2313232421875, | |
| "loss": 0.1533, | |
| "rewards/chosen": 4.62050183614095, | |
| "rewards/margins": 28.16325314839681, | |
| "rewards/rejected": -23.54275131225586, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 2.9989764585465712, | |
| "grad_norm": 9.368139533103754, | |
| "kl": 44.17957305908203, | |
| "learning_rate": 1.419316593864739e-12, | |
| "logits/chosen": -123620633.6, | |
| "logits/rejected": -123439654.4, | |
| "logps/chosen": -555.02197265625, | |
| "logps/rejected": -822.5419921875, | |
| "loss": 0.1122, | |
| "rewards/chosen": 5.326744079589844, | |
| "rewards/margins": 26.535008239746094, | |
| "rewards/rejected": -21.20826416015625, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 2931, | |
| "total_flos": 6.959030383253914e+16, | |
| "train_loss": 0.2375620225312888, | |
| "train_runtime": 17678.1972, | |
| "train_samples_per_second": 2.652, | |
| "train_steps_per_second": 0.166 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2931, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 294, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 6.959030383253914e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |