| { |
| "best_metric": 0.20157214999198914, |
| "best_model_checkpoint": "saves/sycophancy/Llama-8B-3.1-Instruct/kto-1000/checkpoint-900", |
| "epoch": 9.955555555555556, |
| "eval_steps": 50, |
| "global_step": 1120, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 0.6378149390220642, |
| "kl": 3.622833251953125, |
| "learning_rate": 4.4642857142857147e-07, |
| "logits/chosen": -5340750.260869565, |
| "logits/rejected": -7994575.05882353, |
| "logps/chosen": -18.079263438349184, |
| "logps/rejected": -21.049960865693933, |
| "loss": 0.4996, |
| "rewards/chosen": 9.296146099982055e-05, |
| "rewards/margins": 0.004717958452718337, |
| "rewards/rejected": -0.0046249969917185165, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.9874787926673889, |
| "kl": 4.1745758056640625, |
| "learning_rate": 8.928571428571429e-07, |
| "logits/chosen": -6424458.126582279, |
| "logits/rejected": -7035510.518518519, |
| "logps/chosen": -16.333378659018987, |
| "logps/rejected": -20.153974368248456, |
| "loss": 0.5004, |
| "rewards/chosen": 0.00033599175984346414, |
| "rewards/margins": -0.0020582731030176983, |
| "rewards/rejected": 0.0023942648628611624, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 0.7464212775230408, |
| "kl": 5.602532386779785, |
| "learning_rate": 1.3392857142857143e-06, |
| "logits/chosen": -6498458.666666667, |
| "logits/rejected": -7804168.585365853, |
| "logps/chosen": -18.974333934294872, |
| "logps/rejected": -19.95519424066311, |
| "loss": 0.4995, |
| "rewards/chosen": 0.004143252204626034, |
| "rewards/margins": 0.0015260380569885042, |
| "rewards/rejected": 0.00261721414763753, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.8639384508132935, |
| "kl": 3.769927978515625, |
| "learning_rate": 1.7857142857142859e-06, |
| "logits/chosen": -6949126.481012658, |
| "logits/rejected": -7009534.419753087, |
| "logps/chosen": -18.691378436511076, |
| "logps/rejected": -21.17385223765432, |
| "loss": 0.5005, |
| "rewards/chosen": 4.183029449438747e-05, |
| "rewards/margins": -0.003747381938260167, |
| "rewards/rejected": 0.0037892122327545545, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.7081840634346008, |
| "kl": 4.460285186767578, |
| "learning_rate": 2.2321428571428573e-06, |
| "logits/chosen": -6138506.810810811, |
| "logits/rejected": -7474874.790697674, |
| "logps/chosen": -15.936563027871621, |
| "logps/rejected": -19.708515965661338, |
| "loss": 0.4996, |
| "rewards/chosen": 0.006799320916871767, |
| "rewards/margins": 0.0036495141567935143, |
| "rewards/rejected": 0.0031498067600782527, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "eval_logits/chosen": -5421844.2105263155, |
| "eval_logits/rejected": -7437597.257142857, |
| "eval_logps/chosen": -15.877472245065789, |
| "eval_logps/rejected": -19.51913364955357, |
| "eval_loss": 0.49955567717552185, |
| "eval_rewards/chosen": 0.004892895723644056, |
| "eval_rewards/margins": 0.0035702324675438097, |
| "eval_rewards/rejected": 0.0013226632561002458, |
| "eval_runtime": 40.2304, |
| "eval_samples_per_second": 4.971, |
| "eval_steps_per_second": 2.486, |
| "kl": 5.540611267089844, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 1.241780161857605, |
| "kl": 4.673937797546387, |
| "learning_rate": 2.6785714285714285e-06, |
| "logits/chosen": -6793330.488888889, |
| "logits/rejected": -7055889.371428572, |
| "logps/chosen": -17.507694498697916, |
| "logps/rejected": -19.42872314453125, |
| "loss": 0.4997, |
| "rewards/chosen": 0.0070105810960133874, |
| "rewards/margins": 0.002384628426460993, |
| "rewards/rejected": 0.0046259526695523944, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 0.9310891032218933, |
| "kl": 5.921775817871094, |
| "learning_rate": 3.125e-06, |
| "logits/chosen": -5830232.094117647, |
| "logits/rejected": -6842688.0, |
| "logps/chosen": -16.648492072610296, |
| "logps/rejected": -19.747470703125, |
| "loss": 0.4992, |
| "rewards/chosen": 0.017277229533475987, |
| "rewards/margins": 0.00601234426685408, |
| "rewards/rejected": 0.011264885266621907, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 0.8196409344673157, |
| "kl": 5.64462947845459, |
| "learning_rate": 3.5714285714285718e-06, |
| "logits/chosen": -5826336.0, |
| "logits/rejected": -7215595.865168539, |
| "logps/chosen": -16.967518981073944, |
| "logps/rejected": -19.847180312938903, |
| "loss": 0.4995, |
| "rewards/chosen": 0.025596736182629223, |
| "rewards/margins": 0.005482420153134156, |
| "rewards/rejected": 0.020114316029495066, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.839858889579773, |
| "kl": 4.220225811004639, |
| "learning_rate": 4.017857142857143e-06, |
| "logits/chosen": -5777798.168674699, |
| "logits/rejected": -7580332.051948052, |
| "logps/chosen": -16.549828219126507, |
| "logps/rejected": -19.9091701755276, |
| "loss": 0.4976, |
| "rewards/chosen": 0.02844639571316271, |
| "rewards/margins": 0.017465983155635093, |
| "rewards/rejected": 0.010980412557527616, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 1.1537283658981323, |
| "kl": 2.7403106689453125, |
| "learning_rate": 4.464285714285715e-06, |
| "logits/chosen": -7065959.619047619, |
| "logits/rejected": -6761146.947368421, |
| "logps/chosen": -14.906824021112351, |
| "logps/rejected": -19.169194271689967, |
| "loss": 0.4926, |
| "rewards/chosen": 0.05525268827165876, |
| "rewards/margins": 0.05733023875189903, |
| "rewards/rejected": -0.00207755048024027, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_logits/chosen": -5315816.421052632, |
| "eval_logits/rejected": -7413740.4952380955, |
| "eval_logps/chosen": -15.246182411595395, |
| "eval_logps/rejected": -19.441766648065474, |
| "eval_loss": 0.4927283525466919, |
| "eval_rewards/chosen": 0.06802193993016294, |
| "eval_rewards/margins": 0.05896249884053281, |
| "eval_rewards/rejected": 0.009059441089630128, |
| "eval_runtime": 40.4071, |
| "eval_samples_per_second": 4.95, |
| "eval_steps_per_second": 2.475, |
| "kl": 4.745123863220215, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 0.7985321283340454, |
| "kl": 5.093235492706299, |
| "learning_rate": 4.910714285714286e-06, |
| "logits/chosen": -5042543.342465754, |
| "logits/rejected": -7843558.988505747, |
| "logps/chosen": -17.295563998287673, |
| "logps/rejected": -20.055018633261493, |
| "loss": 0.4911, |
| "rewards/chosen": 0.07309432878886184, |
| "rewards/margins": 0.07835055548193849, |
| "rewards/rejected": -0.005256226693076649, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 1.0267765522003174, |
| "kl": 3.9262523651123047, |
| "learning_rate": 4.999222955002041e-06, |
| "logits/chosen": -6111225.6, |
| "logits/rejected": -7384148.8, |
| "logps/chosen": -15.335737609863282, |
| "logps/rejected": -20.553973388671874, |
| "loss": 0.4774, |
| "rewards/chosen": 0.1081552267074585, |
| "rewards/margins": 0.1778375506401062, |
| "rewards/rejected": -0.0696823239326477, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.1555555555555554, |
| "grad_norm": 1.1002306938171387, |
| "kl": 7.534914016723633, |
| "learning_rate": 4.996067037544542e-06, |
| "logits/chosen": -6226998.518518519, |
| "logits/rejected": -6797638.481012658, |
| "logps/chosen": -15.14893783757716, |
| "logps/rejected": -21.460035106803797, |
| "loss": 0.4529, |
| "rewards/chosen": 0.17665848908600984, |
| "rewards/margins": 0.37105785196396424, |
| "rewards/rejected": -0.19439936287795442, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 1.6234118938446045, |
| "kl": 2.820012092590332, |
| "learning_rate": 4.990486745229364e-06, |
| "logits/chosen": -6370023.905882353, |
| "logits/rejected": -6765664.426666667, |
| "logps/chosen": -13.538666130514706, |
| "logps/rejected": -24.607947591145834, |
| "loss": 0.4222, |
| "rewards/chosen": 0.2904420067282284, |
| "rewards/margins": 0.6410638532451555, |
| "rewards/rejected": -0.35062184651692707, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 2.411935806274414, |
| "kl": 5.159808158874512, |
| "learning_rate": 4.982487498071349e-06, |
| "logits/chosen": -6229082.810810811, |
| "logits/rejected": -8071890.604651162, |
| "logps/chosen": -14.392588022592905, |
| "logps/rejected": -25.66743504723837, |
| "loss": 0.3935, |
| "rewards/chosen": 0.3086823643864812, |
| "rewards/margins": 0.8838760356795181, |
| "rewards/rejected": -0.5751936712930369, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "eval_logits/chosen": -4366973.978947368, |
| "eval_logits/rejected": -6947738.209523809, |
| "eval_logps/chosen": -12.933160721628289, |
| "eval_logps/rejected": -25.257954334077382, |
| "eval_loss": 0.3965040445327759, |
| "eval_rewards/chosen": 0.2993241360313014, |
| "eval_rewards/margins": 0.8718834563903044, |
| "eval_rewards/rejected": -0.5725593203590029, |
| "eval_runtime": 40.1917, |
| "eval_samples_per_second": 4.976, |
| "eval_steps_per_second": 2.488, |
| "kl": 0.6683712005615234, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 1.8237110376358032, |
| "kl": 0.0, |
| "learning_rate": 4.9720770655628216e-06, |
| "logits/chosen": -6551021.714285715, |
| "logits/rejected": -5174619.7894736845, |
| "logps/chosen": -13.481320335751487, |
| "logps/rejected": -25.97862484580592, |
| "loss": 0.3855, |
| "rewards/chosen": 0.3265551612490699, |
| "rewards/margins": 1.018067168711421, |
| "rewards/rejected": -0.691512007462351, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.511111111111111, |
| "grad_norm": 3.4937143325805664, |
| "kl": 4.106247901916504, |
| "learning_rate": 4.959265559127253e-06, |
| "logits/chosen": -5526704.0, |
| "logits/rejected": -6293590.0, |
| "logps/chosen": -12.759190877278646, |
| "logps/rejected": -31.458213806152344, |
| "loss": 0.3504, |
| "rewards/chosen": 0.4065140088399251, |
| "rewards/margins": 1.4857414563496907, |
| "rewards/rejected": -1.0792274475097656, |
| "step": 170 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.698695421218872, |
| "kl": 7.25313663482666, |
| "learning_rate": 4.944065422298262e-06, |
| "logits/chosen": -4328710.716049382, |
| "logits/rejected": -6780950.683544304, |
| "logps/chosen": -11.54164406105324, |
| "logps/rejected": -29.772522745253166, |
| "loss": 0.3289, |
| "rewards/chosen": 0.5871110610020014, |
| "rewards/margins": 1.6339186519212063, |
| "rewards/rejected": -1.046807590919205, |
| "step": 180 |
| }, |
| { |
| "epoch": 1.6888888888888889, |
| "grad_norm": 3.9022204875946045, |
| "kl": 0.0, |
| "learning_rate": 4.9264914186334775e-06, |
| "logits/chosen": -4316673.882352941, |
| "logits/rejected": -6495770.434782608, |
| "logps/chosen": -11.134178610409007, |
| "logps/rejected": -32.336056916610055, |
| "loss": 0.3026, |
| "rewards/chosen": 0.6231782576617073, |
| "rewards/margins": 1.8932676144573086, |
| "rewards/rejected": -1.2700893567956013, |
| "step": 190 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 2.392188787460327, |
| "kl": 0.0, |
| "learning_rate": 4.90656061737503e-06, |
| "logits/chosen": -5518577.230769231, |
| "logits/rejected": -6244784.0, |
| "logps/chosen": -12.836633926782852, |
| "logps/rejected": -38.383896246189025, |
| "loss": 0.288, |
| "rewards/chosen": 0.5627804780617739, |
| "rewards/margins": 2.217969806139137, |
| "rewards/rejected": -1.6551893280773629, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "eval_logits/chosen": -3715966.989473684, |
| "eval_logits/rejected": -6637966.628571428, |
| "eval_logps/chosen": -11.326945415296052, |
| "eval_logps/rejected": -38.19833984375, |
| "eval_loss": 0.2868475317955017, |
| "eval_rewards/chosen": 0.4599455582468133, |
| "eval_rewards/margins": 2.3265433423799977, |
| "eval_rewards/rejected": -1.8665977841331844, |
| "eval_runtime": 40.3421, |
| "eval_samples_per_second": 4.958, |
| "eval_steps_per_second": 2.479, |
| "kl": 0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 17.500810623168945, |
| "kl": 2.489029884338379, |
| "learning_rate": 4.884292376870567e-06, |
| "logits/chosen": -4393788.0, |
| "logits/rejected": -5814082.8, |
| "logps/chosen": -12.235552978515624, |
| "logps/rejected": -40.3268310546875, |
| "loss": 0.2701, |
| "rewards/chosen": 0.5962685585021973, |
| "rewards/margins": 2.66733922958374, |
| "rewards/rejected": -2.071070671081543, |
| "step": 210 |
| }, |
| { |
| "epoch": 1.9555555555555557, |
| "grad_norm": 9.270183563232422, |
| "kl": 0.0, |
| "learning_rate": 4.859708325770919e-06, |
| "logits/chosen": -3907669.9178082193, |
| "logits/rejected": -6618544.551724138, |
| "logps/chosen": -11.271523671607449, |
| "logps/rejected": -44.115442034841955, |
| "loss": 0.2588, |
| "rewards/chosen": 0.5157197926142444, |
| "rewards/margins": 2.958967620604546, |
| "rewards/rejected": -2.4432478279903016, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.0444444444444443, |
| "grad_norm": 1.4330343008041382, |
| "kl": 0.0, |
| "learning_rate": 4.832832342022666e-06, |
| "logits/chosen": -4723112.7272727275, |
| "logits/rejected": -6010504.0, |
| "logps/chosen": -11.930933172052557, |
| "logps/rejected": -48.691870795355904, |
| "loss": 0.2795, |
| "rewards/chosen": 0.406935605135831, |
| "rewards/margins": 3.253672936950067, |
| "rewards/rejected": -2.846737331814236, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 5.4253339767456055, |
| "kl": 0.0, |
| "learning_rate": 4.80369052967602e-06, |
| "logits/chosen": -6000682.53164557, |
| "logits/rejected": -5252077.8271604935, |
| "logps/chosen": -14.767633751977849, |
| "logps/rejected": -53.28524064429013, |
| "loss": 0.2758, |
| "rewards/chosen": 0.18598234200779395, |
| "rewards/margins": 3.565630056128164, |
| "rewards/rejected": -3.3796477141203702, |
| "step": 240 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 0.7183493971824646, |
| "kl": 0.0, |
| "learning_rate": 4.7723111935305275e-06, |
| "logits/chosen": -3402531.878787879, |
| "logits/rejected": -6712939.574468086, |
| "logps/chosen": -13.516438802083334, |
| "logps/rejected": -56.948673329454785, |
| "loss": 0.2304, |
| "rewards/chosen": 0.5478392514315519, |
| "rewards/margins": 4.241653143552563, |
| "rewards/rejected": -3.693813892121011, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "eval_logits/chosen": -3821936.505263158, |
| "eval_logits/rejected": -6486972.952380952, |
| "eval_logps/chosen": -13.11566868832237, |
| "eval_logps/rejected": -60.788411458333336, |
| "eval_loss": 0.24558097124099731, |
| "eval_rewards/chosen": 0.28107337951660155, |
| "eval_rewards/margins": 4.406677918207078, |
| "eval_rewards/rejected": -4.125604538690476, |
| "eval_runtime": 40.127, |
| "eval_samples_per_second": 4.984, |
| "eval_steps_per_second": 2.492, |
| "kl": 0.0, |
| "step": 250 |
| }, |
| { |
| "epoch": 2.311111111111111, |
| "grad_norm": 1.6400600671768188, |
| "kl": 0.0, |
| "learning_rate": 4.7387248116432524e-06, |
| "logits/chosen": -4895198.784810127, |
| "logits/rejected": -5800785.777777778, |
| "logps/chosen": -9.293088502521757, |
| "logps/rejected": -63.35857325424383, |
| "loss": 0.1984, |
| "rewards/chosen": 0.8992576116248022, |
| "rewards/margins": 5.3043196866392694, |
| "rewards/rejected": -4.405062075014468, |
| "step": 260 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 12.461936950683594, |
| "kl": 0.0, |
| "learning_rate": 4.702964005726106e-06, |
| "logits/chosen": -4234095.08045977, |
| "logits/rejected": -6007622.136986301, |
| "logps/chosen": -9.521214276894757, |
| "logps/rejected": -64.36043182791096, |
| "loss": 0.2428, |
| "rewards/chosen": 0.6787874068336925, |
| "rewards/margins": 4.935348268081552, |
| "rewards/rejected": -4.256560861247859, |
| "step": 270 |
| }, |
| { |
| "epoch": 2.488888888888889, |
| "grad_norm": 0.6239128112792969, |
| "kl": 4.688835144042969, |
| "learning_rate": 4.665063509461098e-06, |
| "logits/chosen": -3335776.7710843375, |
| "logits/rejected": -6998775.688311689, |
| "logps/chosen": -8.463530391095633, |
| "logps/rejected": -64.86012327516234, |
| "loss": 0.1964, |
| "rewards/chosen": 0.7906499655849962, |
| "rewards/margins": 5.239572655443966, |
| "rewards/rejected": -4.4489226898589695, |
| "step": 280 |
| }, |
| { |
| "epoch": 2.5777777777777775, |
| "grad_norm": 2.890619993209839, |
| "kl": 0.0, |
| "learning_rate": 4.625060134764273e-06, |
| "logits/chosen": -4292849.904761905, |
| "logits/rejected": -6233568.0, |
| "logps/chosen": -11.808643159412203, |
| "logps/rejected": -65.01340203536184, |
| "loss": 0.2304, |
| "rewards/chosen": 0.563728514171782, |
| "rewards/margins": 5.269437871182473, |
| "rewards/rejected": -4.705709357010691, |
| "step": 290 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 1.0486770868301392, |
| "kl": 0.0, |
| "learning_rate": 4.5829927360311224e-06, |
| "logits/chosen": -5530610.731707317, |
| "logits/rejected": -7102130.051282051, |
| "logps/chosen": -14.958841463414634, |
| "logps/rejected": -72.83315805288461, |
| "loss": 0.2265, |
| "rewards/chosen": 0.28788845713545635, |
| "rewards/margins": 5.562287652097395, |
| "rewards/rejected": -5.274399194961939, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "eval_logits/chosen": -3639481.9368421054, |
| "eval_logits/rejected": -6323365.790476191, |
| "eval_logps/chosen": -13.871395713404604, |
| "eval_logps/rejected": -70.58707217261905, |
| "eval_loss": 0.22765418887138367, |
| "eval_rewards/chosen": 0.205500612760845, |
| "eval_rewards/margins": 5.310971687909655, |
| "eval_rewards/rejected": -5.1054710751488095, |
| "eval_runtime": 40.296, |
| "eval_samples_per_second": 4.963, |
| "eval_steps_per_second": 2.482, |
| "kl": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 2.7555555555555555, |
| "grad_norm": 1.1156599521636963, |
| "kl": 0.0, |
| "learning_rate": 4.538902172398151e-06, |
| "logits/chosen": -4984293.135802469, |
| "logits/rejected": -5892559.392405063, |
| "logps/chosen": -11.82368148992091, |
| "logps/rejected": -77.61156299446202, |
| "loss": 0.1936, |
| "rewards/chosen": 0.48060866932810087, |
| "rewards/margins": 6.2366797358826895, |
| "rewards/rejected": -5.756071066554589, |
| "step": 310 |
| }, |
| { |
| "epoch": 2.8444444444444446, |
| "grad_norm": 0.6188316941261292, |
| "kl": 0.0, |
| "learning_rate": 4.492831268057307e-06, |
| "logits/chosen": -3922454.697674419, |
| "logits/rejected": -6713733.189189189, |
| "logps/chosen": -13.131177325581396, |
| "logps/rejected": -77.20437869510135, |
| "loss": 0.2132, |
| "rewards/chosen": 0.38874027340911155, |
| "rewards/margins": 6.086961840923892, |
| "rewards/rejected": -5.69822156751478, |
| "step": 320 |
| }, |
| { |
| "epoch": 2.9333333333333336, |
| "grad_norm": 2.7531495094299316, |
| "kl": 0.0, |
| "learning_rate": 4.444824770661788e-06, |
| "logits/chosen": -3217461.3333333335, |
| "logits/rejected": -5140263.804878049, |
| "logps/chosen": -11.303620167267628, |
| "logps/rejected": -78.03017220846037, |
| "loss": 0.199, |
| "rewards/chosen": 0.6726197462815505, |
| "rewards/margins": 6.520977156247252, |
| "rewards/rejected": -5.848357409965701, |
| "step": 330 |
| }, |
| { |
| "epoch": 3.022222222222222, |
| "grad_norm": 0.4581988453865051, |
| "kl": 0.0, |
| "learning_rate": 4.394929307863633e-06, |
| "logits/chosen": -5287541.894736842, |
| "logits/rejected": -5331063.238095238, |
| "logps/chosen": -14.192402086759868, |
| "logps/rejected": -81.94573683965774, |
| "loss": 0.1927, |
| "rewards/chosen": 0.4768250615973222, |
| "rewards/margins": 6.490097664950186, |
| "rewards/rejected": -6.013272603352864, |
| "step": 340 |
| }, |
| { |
| "epoch": 3.111111111111111, |
| "grad_norm": 0.4631578326225281, |
| "kl": 0.0, |
| "learning_rate": 4.34319334202531e-06, |
| "logits/chosen": -2600329.5135135134, |
| "logits/rejected": -5682235.534883721, |
| "logps/chosen": -10.595643739442568, |
| "logps/rejected": -84.35163063226744, |
| "loss": 0.1787, |
| "rewards/chosen": 0.6555876860747466, |
| "rewards/margins": 7.0652812044250375, |
| "rewards/rejected": -6.4096935183502906, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.111111111111111, |
| "eval_logits/chosen": -3060385.6842105263, |
| "eval_logits/rejected": -5682171.123809524, |
| "eval_logps/chosen": -15.833179995888157, |
| "eval_logps/rejected": -81.55650111607143, |
| "eval_loss": 0.22521743178367615, |
| "eval_rewards/chosen": 0.009322176481548108, |
| "eval_rewards/margins": 6.211735680945834, |
| "eval_rewards/rejected": -6.202413504464285, |
| "eval_runtime": 40.1275, |
| "eval_samples_per_second": 4.984, |
| "eval_steps_per_second": 2.492, |
| "kl": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 2.467944622039795, |
| "kl": 0.0, |
| "learning_rate": 4.289667123149296e-06, |
| "logits/chosen": -2830946.976744186, |
| "logits/rejected": -5211064.6486486485, |
| "logps/chosen": -12.128145439680232, |
| "logps/rejected": -79.77615603885135, |
| "loss": 0.2274, |
| "rewards/chosen": 0.43278454625329305, |
| "rewards/margins": 6.468923544748859, |
| "rewards/rejected": -6.036138998495566, |
| "step": 360 |
| }, |
| { |
| "epoch": 3.2888888888888888, |
| "grad_norm": 0.5653411746025085, |
| "kl": 0.0, |
| "learning_rate": 4.234402640071355e-06, |
| "logits/chosen": -4379926.292134832, |
| "logits/rejected": -5016103.211267605, |
| "logps/chosen": -8.949755036429073, |
| "logps/rejected": -82.0338633362676, |
| "loss": 0.1937, |
| "rewards/chosen": 0.791152182589756, |
| "rewards/margins": 6.987948868466737, |
| "rewards/rejected": -6.196796685876981, |
| "step": 370 |
| }, |
| { |
| "epoch": 3.3777777777777778, |
| "grad_norm": 3.137716054916382, |
| "kl": 0.0, |
| "learning_rate": 4.177453569964925e-06, |
| "logits/chosen": -5280053.975903614, |
| "logits/rejected": -5897708.467532467, |
| "logps/chosen": -13.431849468185241, |
| "logps/rejected": -85.2514838676948, |
| "loss": 0.2002, |
| "rewards/chosen": 0.43340485354503955, |
| "rewards/margins": 7.019576982197637, |
| "rewards/rejected": -6.586172128652597, |
| "step": 380 |
| }, |
| { |
| "epoch": 3.466666666666667, |
| "grad_norm": 0.13507850468158722, |
| "kl": 0.0, |
| "learning_rate": 4.118875226205677e-06, |
| "logits/chosen": -4954811.858823529, |
| "logits/rejected": -4586689.706666667, |
| "logps/chosen": -10.430465877757353, |
| "logps/rejected": -83.6682421875, |
| "loss": 0.1938, |
| "rewards/chosen": 0.6540487850413603, |
| "rewards/margins": 7.004159462124694, |
| "rewards/rejected": -6.350110677083333, |
| "step": 390 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "grad_norm": 2.0981369018554688, |
| "kl": 0.0, |
| "learning_rate": 4.058724504646834e-06, |
| "logits/chosen": -4761438.048780488, |
| "logits/rejected": -5521709.948717949, |
| "logps/chosen": -10.361271555830793, |
| "logps/rejected": -90.31487254607372, |
| "loss": 0.1818, |
| "rewards/chosen": 0.6475761227491426, |
| "rewards/margins": 7.503091822869335, |
| "rewards/rejected": -6.8555157001201925, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "eval_logits/chosen": -2924462.484210526, |
| "eval_logits/rejected": -5623589.180952381, |
| "eval_logps/chosen": -15.789686986019737, |
| "eval_logps/rejected": -83.83147321428571, |
| "eval_loss": 0.22848816215991974, |
| "eval_rewards/chosen": 0.013671553762335526, |
| "eval_rewards/margins": 6.443582849335252, |
| "eval_rewards/rejected": -6.429911295572917, |
| "eval_runtime": 40.313, |
| "eval_samples_per_second": 4.961, |
| "eval_steps_per_second": 2.481, |
| "kl": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 3.6444444444444444, |
| "grad_norm": 0.3149232268333435, |
| "kl": 0.0, |
| "learning_rate": 3.997059828357501e-06, |
| "logits/chosen": -3992299.9402985075, |
| "logits/rejected": -5252628.301075269, |
| "logps/chosen": -18.675668289412314, |
| "logps/rejected": -90.46900201612904, |
| "loss": 0.2008, |
| "rewards/chosen": -0.19581353486473882, |
| "rewards/margins": 6.8142979560206784, |
| "rewards/rejected": -7.010111490885417, |
| "step": 410 |
| }, |
| { |
| "epoch": 3.7333333333333334, |
| "grad_norm": 1.5370200872421265, |
| "kl": 0.0, |
| "learning_rate": 3.933941090877615e-06, |
| "logits/chosen": -4428303.802469136, |
| "logits/rejected": -5042584.303797469, |
| "logps/chosen": -14.75340892650463, |
| "logps/rejected": -87.4963966079905, |
| "loss": 0.2099, |
| "rewards/chosen": 0.253694228184076, |
| "rewards/margins": 7.024359650454646, |
| "rewards/rejected": -6.77066542227057, |
| "step": 420 |
| }, |
| { |
| "epoch": 3.822222222222222, |
| "grad_norm": 0.5295763611793518, |
| "kl": 0.0, |
| "learning_rate": 3.869429598044679e-06, |
| "logits/chosen": -3395873.56097561, |
| "logits/rejected": -5533436.717948718, |
| "logps/chosen": -15.093147091749238, |
| "logps/rejected": -80.15256911057692, |
| "loss": 0.2452, |
| "rewards/chosen": 0.13673415998133218, |
| "rewards/margins": 6.296287100936861, |
| "rewards/rejected": -6.159552940955529, |
| "step": 430 |
| }, |
| { |
| "epoch": 3.911111111111111, |
| "grad_norm": 0.8047326803207397, |
| "kl": 0.0, |
| "learning_rate": 3.8035880084487454e-06, |
| "logits/chosen": -3673009.777777778, |
| "logits/rejected": -5163254.278481012, |
| "logps/chosen": -13.116269712094907, |
| "logps/rejected": -92.62794822982595, |
| "loss": 0.1816, |
| "rewards/chosen": 0.5284830258216386, |
| "rewards/margins": 7.8263577662290755, |
| "rewards/rejected": -7.297874740407437, |
| "step": 440 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.6752709746360779, |
| "kl": 0.0, |
| "learning_rate": 3.736480272573519e-06, |
| "logits/chosen": -4347196.493150685, |
| "logits/rejected": -5256166.988505747, |
| "logps/chosen": -18.342484147581334, |
| "logps/rejected": -97.40822557471265, |
| "loss": 0.1921, |
| "rewards/chosen": -0.07928093165567476, |
| "rewards/margins": 7.679832248570618, |
| "rewards/rejected": -7.759113180226293, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_logits/chosen": -3297428.210526316, |
| "eval_logits/rejected": -5958031.847619047, |
| "eval_logps/chosen": -16.006887335526315, |
| "eval_logps/rejected": -88.42149367559524, |
| "eval_loss": 0.2126670479774475, |
| "eval_rewards/chosen": -0.008048589606034127, |
| "eval_rewards/margins": 6.880864519582954, |
| "eval_rewards/rejected": -6.888913109188988, |
| "eval_runtime": 40.1691, |
| "eval_samples_per_second": 4.979, |
| "eval_steps_per_second": 2.489, |
| "kl": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 4.088888888888889, |
| "grad_norm": 0.328436017036438, |
| "kl": 0.0, |
| "learning_rate": 3.6681715706826555e-06, |
| "logits/chosen": -5328077.511111111, |
| "logits/rejected": -5352870.4, |
| "logps/chosen": -16.54457465277778, |
| "logps/rejected": -92.87059849330358, |
| "loss": 0.2191, |
| "rewards/chosen": 0.12854560216267905, |
| "rewards/margins": 7.4038246200198214, |
| "rewards/rejected": -7.275279017857143, |
| "step": 460 |
| }, |
| { |
| "epoch": 4.177777777777778, |
| "grad_norm": 0.13535404205322266, |
| "kl": 0.0, |
| "learning_rate": 3.598728249511613e-06, |
| "logits/chosen": -4407000.746666667, |
| "logits/rejected": -5553773.552941176, |
| "logps/chosen": -10.39356689453125, |
| "logps/rejected": -90.10190142463236, |
| "loss": 0.187, |
| "rewards/chosen": 0.4877380879720052, |
| "rewards/margins": 7.491619923909505, |
| "rewards/rejected": -7.0038818359375, |
| "step": 470 |
| }, |
| { |
| "epoch": 4.266666666666667, |
| "grad_norm": 0.6326517462730408, |
| "kl": 0.0, |
| "learning_rate": 3.5282177578265295e-06, |
| "logits/chosen": -4370845.057471264, |
| "logits/rejected": -6354919.452054795, |
| "logps/chosen": -12.190081716954023, |
| "logps/rejected": -97.62553510273973, |
| "loss": 0.1707, |
| "rewards/chosen": 0.6859642554973734, |
| "rewards/margins": 8.409464964508503, |
| "rewards/rejected": -7.72350070901113, |
| "step": 480 |
| }, |
| { |
| "epoch": 4.355555555555555, |
| "grad_norm": 1.1461330652236938, |
| "kl": 0.0, |
| "learning_rate": 3.4567085809127247e-06, |
| "logits/chosen": -3404656.9411764704, |
| "logits/rejected": -6023053.226666667, |
| "logps/chosen": -16.520122931985295, |
| "logps/rejected": -100.1553515625, |
| "loss": 0.2085, |
| "rewards/chosen": -0.05400382771211512, |
| "rewards/margins": 8.000415931402468, |
| "rewards/rejected": -8.054419759114584, |
| "step": 490 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.5591081380844116, |
| "kl": 0.0, |
| "learning_rate": 3.384270174056454e-06, |
| "logits/chosen": -2667930.4691358022, |
| "logits/rejected": -6442100.253164557, |
| "logps/chosen": -13.357886043595679, |
| "logps/rejected": -89.76098941851266, |
| "loss": 0.1945, |
| "rewards/chosen": 0.47434229909637826, |
| "rewards/margins": 7.406902314424701, |
| "rewards/rejected": -6.932560015328323, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "eval_logits/chosen": -3297794.694736842, |
| "eval_logits/rejected": -5972243.5047619045, |
| "eval_logps/chosen": -16.59450555098684, |
| "eval_logps/rejected": -92.23130580357143, |
| "eval_loss": 0.21143874526023865, |
| "eval_rewards/chosen": -0.06681048744603207, |
| "eval_rewards/margins": 7.203084648342658, |
| "eval_rewards/rejected": -7.2698951357886905, |
| "eval_runtime": 40.1679, |
| "eval_samples_per_second": 4.979, |
| "eval_steps_per_second": 2.49, |
| "kl": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 4.533333333333333, |
| "grad_norm": 0.856115460395813, |
| "kl": 0.0, |
| "learning_rate": 3.3109728950845184e-06, |
| "logits/chosen": -4890368.8, |
| "logits/rejected": -6204083.2, |
| "logps/chosen": -15.44854736328125, |
| "logps/rejected": -99.00941162109375, |
| "loss": 0.2007, |
| "rewards/chosen": 0.0889211654663086, |
| "rewards/margins": 7.809566307067871, |
| "rewards/rejected": -7.720645141601563, |
| "step": 510 |
| }, |
| { |
| "epoch": 4.622222222222222, |
| "grad_norm": 0.7520022988319397, |
| "kl": 0.0, |
| "learning_rate": 3.236887936027261e-06, |
| "logits/chosen": -3247513.8313253014, |
| "logits/rejected": -5874468.155844156, |
| "logps/chosen": -13.96297298569277, |
| "logps/rejected": -91.78726156655844, |
| "loss": 0.1923, |
| "rewards/chosen": 0.47854435013001223, |
| "rewards/margins": 7.833355981369866, |
| "rewards/rejected": -7.354811631239854, |
| "step": 520 |
| }, |
| { |
| "epoch": 4.711111111111111, |
| "grad_norm": 2.9641735553741455, |
| "kl": 0.0, |
| "learning_rate": 3.162087253971318e-06, |
| "logits/chosen": -4554967.225806451, |
| "logits/rejected": -5900110.367346939, |
| "logps/chosen": -15.998369770665322, |
| "logps/rejected": -93.55442841198979, |
| "loss": 0.177, |
| "rewards/chosen": 0.23647348342403288, |
| "rewards/margins": 7.601298673754071, |
| "rewards/rejected": -7.364825190330039, |
| "step": 530 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 0.11354226619005203, |
| "kl": 0.0, |
| "learning_rate": 3.0866435011692884e-06, |
| "logits/chosen": -5795603.626666667, |
| "logits/rejected": -5345560.847058823, |
| "logps/chosen": -14.156272786458333, |
| "logps/rejected": -102.74303768382353, |
| "loss": 0.1752, |
| "rewards/chosen": 0.3158830769856771, |
| "rewards/margins": 8.545466458189722, |
| "rewards/rejected": -8.229583381204044, |
| "step": 540 |
| }, |
| { |
| "epoch": 4.888888888888889, |
| "grad_norm": 0.6861762404441833, |
| "kl": 0.0, |
| "learning_rate": 3.0106299544742013e-06, |
| "logits/chosen": -3000861.827160494, |
| "logits/rejected": -5673123.645569621, |
| "logps/chosen": -10.59613639925733, |
| "logps/rejected": -87.43932950949367, |
| "loss": 0.2105, |
| "rewards/chosen": 0.4492984818823544, |
| "rewards/margins": 7.256159297003003, |
| "rewards/rejected": -6.806860815120649, |
| "step": 550 |
| }, |
| { |
| "epoch": 4.888888888888889, |
| "eval_logits/chosen": -3147055.1578947366, |
| "eval_logits/rejected": -5862926.628571428, |
| "eval_logps/chosen": -16.276572779605264, |
| "eval_logps/rejected": -90.12877604166667, |
| "eval_loss": 0.2067316174507141, |
| "eval_rewards/chosen": -0.035017113936574834, |
| "eval_rewards/margins": 7.024624115602116, |
| "eval_rewards/rejected": -7.05964122953869, |
| "eval_runtime": 40.2198, |
| "eval_samples_per_second": 4.973, |
| "eval_steps_per_second": 2.486, |
| "kl": 0.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 4.977777777777778, |
| "grad_norm": 0.801609992980957, |
| "kl": 0.0, |
| "learning_rate": 2.9341204441673267e-06, |
| "logits/chosen": -3848151.717647059, |
| "logits/rejected": -5394151.253333333, |
| "logps/chosen": -10.85682803883272, |
| "logps/rejected": -90.03494791666667, |
| "loss": 0.1832, |
| "rewards/chosen": 0.6799631006577436, |
| "rewards/margins": 7.6680034652410765, |
| "rewards/rejected": -6.988040364583333, |
| "step": 560 |
| }, |
| { |
| "epoch": 5.066666666666666, |
| "grad_norm": 1.4983981847763062, |
| "kl": 0.0, |
| "learning_rate": 2.8571892822484502e-06, |
| "logits/chosen": -4290816.735632184, |
| "logits/rejected": -6108049.534246575, |
| "logps/chosen": -12.377775345725574, |
| "logps/rejected": -91.37554179152397, |
| "loss": 0.1776, |
| "rewards/chosen": 0.47618024102572737, |
| "rewards/margins": 7.804108757988485, |
| "rewards/rejected": -7.327928516962757, |
| "step": 570 |
| }, |
| { |
| "epoch": 5.155555555555556, |
| "grad_norm": 1.4494401216506958, |
| "kl": 0.0, |
| "learning_rate": 2.7799111902582697e-06, |
| "logits/chosen": -3389414.1052631577, |
| "logits/rejected": -6431971.80952381, |
| "logps/chosen": -16.54691676089638, |
| "logps/rejected": -92.70068359375, |
| "loss": 0.211, |
| "rewards/chosen": 0.06845460439983167, |
| "rewards/margins": 7.234954150995813, |
| "rewards/rejected": -7.166499546595982, |
| "step": 580 |
| }, |
| { |
| "epoch": 5.2444444444444445, |
| "grad_norm": 0.2840491235256195, |
| "kl": 0.0, |
| "learning_rate": 2.7023612267030085e-06, |
| "logits/chosen": -3092382.72, |
| "logits/rejected": -5891583.247058824, |
| "logps/chosen": -15.6631396484375, |
| "logps/rejected": -98.87757352941176, |
| "loss": 0.1861, |
| "rewards/chosen": 0.13971505482991536, |
| "rewards/margins": 7.8902543183869005, |
| "rewards/rejected": -7.750539263556985, |
| "step": 590 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "grad_norm": 1.7495723962783813, |
| "kl": 0.0, |
| "learning_rate": 2.624614714151743e-06, |
| "logits/chosen": -3774011.487179487, |
| "logits/rejected": -6055470.048780488, |
| "logps/chosen": -12.078990447215546, |
| "logps/rejected": -91.39095846036585, |
| "loss": 0.1921, |
| "rewards/chosen": 0.4090720934745593, |
| "rewards/margins": 7.7116628125580196, |
| "rewards/rejected": -7.302590719083461, |
| "step": 600 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "eval_logits/chosen": -3241836.4631578946, |
| "eval_logits/rejected": -5997722.819047619, |
| "eval_logps/chosen": -16.496885279605262, |
| "eval_logps/rejected": -91.58647693452382, |
| "eval_loss": 0.2063719928264618, |
| "eval_rewards/chosen": -0.05704839606034128, |
| "eval_rewards/margins": 7.148363387793825, |
| "eval_rewards/rejected": -7.205411783854166, |
| "eval_runtime": 40.1907, |
| "eval_samples_per_second": 4.976, |
| "eval_steps_per_second": 2.488, |
| "kl": 0.0, |
| "step": 600 |
| }, |
| { |
| "epoch": 5.4222222222222225, |
| "grad_norm": 0.3096153736114502, |
| "kl": 4.927865982055664, |
| "learning_rate": 2.5467471660772557e-06, |
| "logits/chosen": -3925314.4935064935, |
| "logits/rejected": -5939865.445783133, |
| "logps/chosen": -10.83917711926745, |
| "logps/rejected": -96.4466891001506, |
| "loss": 0.163, |
| "rewards/chosen": 0.5618379766290839, |
| "rewards/margins": 8.103459541011613, |
| "rewards/rejected": -7.54162156438253, |
| "step": 610 |
| }, |
| { |
| "epoch": 5.511111111111111, |
| "grad_norm": 0.952792763710022, |
| "kl": 0.0, |
| "learning_rate": 2.4688342135114625e-06, |
| "logits/chosen": -3953338.4, |
| "logits/rejected": -5201153.2, |
| "logps/chosen": -12.864892578125, |
| "logps/rejected": -93.82406616210938, |
| "loss": 0.1973, |
| "rewards/chosen": 0.5206992626190186, |
| "rewards/margins": 7.976323652267456, |
| "rewards/rejected": -7.455624389648437, |
| "step": 620 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 0.38527581095695496, |
| "kl": 0.0, |
| "learning_rate": 2.3909515315866606e-06, |
| "logits/chosen": -4149851.534883721, |
| "logits/rejected": -5401914.378378378, |
| "logps/chosen": -12.361329544422238, |
| "logps/rejected": -101.43082902238176, |
| "loss": 0.1759, |
| "rewards/chosen": 0.5003170190855514, |
| "rewards/margins": 8.561492391235046, |
| "rewards/rejected": -8.061175372149494, |
| "step": 630 |
| }, |
| { |
| "epoch": 5.688888888888889, |
| "grad_norm": 0.044581424444913864, |
| "kl": 0.0, |
| "learning_rate": 2.3131747660339396e-06, |
| "logits/chosen": -3309795.6, |
| "logits/rejected": -5141365.6, |
| "logps/chosen": -10.460143280029296, |
| "logps/rejected": -97.539892578125, |
| "loss": 0.1627, |
| "rewards/chosen": 0.5505589962005615, |
| "rewards/margins": 8.339413976669311, |
| "rewards/rejected": -7.78885498046875, |
| "step": 640 |
| }, |
| { |
| "epoch": 5.777777777777778, |
| "grad_norm": 0.7985220551490784, |
| "kl": 0.0, |
| "learning_rate": 2.235579459710156e-06, |
| "logits/chosen": -5236049.066666666, |
| "logits/rejected": -5756221.74117647, |
| "logps/chosen": -15.822184244791666, |
| "logps/rejected": -104.32300091911765, |
| "loss": 0.1614, |
| "rewards/chosen": 0.269891357421875, |
| "rewards/margins": 8.66156041762408, |
| "rewards/rejected": -8.391669060202206, |
| "step": 650 |
| }, |
| { |
| "epoch": 5.777777777777778, |
| "eval_logits/chosen": -3228708.7157894736, |
| "eval_logits/rejected": -5918022.095238095, |
| "eval_logps/chosen": -17.425839072779606, |
| "eval_logps/rejected": -95.5464099702381, |
| "eval_loss": 0.2070188969373703, |
| "eval_rewards/chosen": -0.149943703099301, |
| "eval_rewards/margins": 7.451461849356056, |
| "eval_rewards/rejected": -7.601405552455357, |
| "eval_runtime": 40.0875, |
| "eval_samples_per_second": 4.989, |
| "eval_steps_per_second": 2.495, |
| "kl": 0.0, |
| "step": 650 |
| }, |
| { |
| "epoch": 5.866666666666667, |
| "grad_norm": 0.05763305351138115, |
| "kl": 0.0, |
| "learning_rate": 2.158240979224817e-06, |
| "logits/chosen": -3888249.0434782607, |
| "logits/rejected": -5484160.94117647, |
| "logps/chosen": -13.06786578634511, |
| "logps/rejected": -101.57473575367646, |
| "loss": 0.1988, |
| "rewards/chosen": 0.35180357228154724, |
| "rewards/margins": 8.539068407414819, |
| "rewards/rejected": -8.187264835133272, |
| "step": 660 |
| }, |
| { |
| "epoch": 5.955555555555556, |
| "grad_norm": 0.10472501069307327, |
| "kl": 0.0, |
| "learning_rate": 2.0812344417381595e-06, |
| "logits/chosen": -4696800.0, |
| "logits/rejected": -5666451.75308642, |
| "logps/chosen": -11.155172999901108, |
| "logps/rejected": -100.65013744212963, |
| "loss": 0.1728, |
| "rewards/chosen": 0.7526525183569027, |
| "rewards/margins": 8.847644320054435, |
| "rewards/rejected": -8.094991801697532, |
| "step": 670 |
| }, |
| { |
| "epoch": 6.044444444444444, |
| "grad_norm": 0.5247897505760193, |
| "kl": 0.0, |
| "learning_rate": 2.004634642001507e-06, |
| "logits/chosen": -4391685.454545454, |
| "logits/rejected": -6017281.777777778, |
| "logps/chosen": -15.404382879083807, |
| "logps/rejected": -93.35451253255208, |
| "loss": 0.2183, |
| "rewards/chosen": 0.15974465283480557, |
| "rewards/margins": 7.519007755048348, |
| "rewards/rejected": -7.359263102213542, |
| "step": 680 |
| }, |
| { |
| "epoch": 6.133333333333334, |
| "grad_norm": 0.2628670632839203, |
| "kl": 6.880197525024414, |
| "learning_rate": 1.9285159797107766e-06, |
| "logits/chosen": -4377976.338028169, |
| "logits/rejected": -5771072.0, |
| "logps/chosen": -14.434786944322184, |
| "logps/rejected": -96.85829968398876, |
| "loss": 0.1794, |
| "rewards/chosen": 0.21448210595359266, |
| "rewards/margins": 7.86581500403997, |
| "rewards/rejected": -7.651332898086377, |
| "step": 690 |
| }, |
| { |
| "epoch": 6.222222222222222, |
| "grad_norm": 0.4303622841835022, |
| "kl": 0.0, |
| "learning_rate": 1.852952387243698e-06, |
| "logits/chosen": -4863565.662921349, |
| "logits/rejected": -5648771.605633803, |
| "logps/chosen": -15.540859265273877, |
| "logps/rejected": -106.32531910211267, |
| "loss": 0.1896, |
| "rewards/chosen": 0.14140075512146683, |
| "rewards/margins": 8.74780480441724, |
| "rewards/rejected": -8.606404049295774, |
| "step": 700 |
| }, |
| { |
| "epoch": 6.222222222222222, |
| "eval_logits/chosen": -3418014.3157894737, |
| "eval_logits/rejected": -6086026.971428571, |
| "eval_logps/chosen": -17.973583984375, |
| "eval_logps/rejected": -96.15703125, |
| "eval_loss": 0.21233326196670532, |
| "eval_rewards/chosen": -0.20471825850637335, |
| "eval_rewards/margins": 7.457748026835889, |
| "eval_rewards/rejected": -7.662466285342262, |
| "eval_runtime": 39.9854, |
| "eval_samples_per_second": 5.002, |
| "eval_steps_per_second": 2.501, |
| "kl": 0.0, |
| "step": 700 |
| }, |
| { |
| "epoch": 6.311111111111111, |
| "grad_norm": 1.0218161344528198, |
| "kl": 0.0, |
| "learning_rate": 1.778017257850926e-06, |
| "logits/chosen": -3331781.0, |
| "logits/rejected": -6754188.8, |
| "logps/chosen": -15.500772094726562, |
| "logps/rejected": -106.769287109375, |
| "loss": 0.1903, |
| "rewards/chosen": 0.11742455959320068, |
| "rewards/margins": 8.76119439601898, |
| "rewards/rejected": -8.643769836425781, |
| "step": 710 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 1.2540534734725952, |
| "kl": 0.0, |
| "learning_rate": 1.7037833743707892e-06, |
| "logits/chosen": -3863724.5569620254, |
| "logits/rejected": -5971969.975308642, |
| "logps/chosen": -14.746993052808545, |
| "logps/rejected": -100.60282841435185, |
| "loss": 0.1789, |
| "rewards/chosen": 0.2655511687073526, |
| "rewards/margins": 8.338163413113216, |
| "rewards/rejected": -8.072612244405864, |
| "step": 720 |
| }, |
| { |
| "epoch": 6.488888888888889, |
| "grad_norm": 0.38389647006988525, |
| "kl": 0.0, |
| "learning_rate": 1.6303228385369318e-06, |
| "logits/chosen": -4138485.204819277, |
| "logits/rejected": -6034715.428571428, |
| "logps/chosen": -9.766010330384036, |
| "logps/rejected": -98.01102120535714, |
| "loss": 0.1822, |
| "rewards/chosen": 0.598959038056523, |
| "rewards/margins": 8.55015944897373, |
| "rewards/rejected": -7.951200410917208, |
| "step": 730 |
| }, |
| { |
| "epoch": 6.5777777777777775, |
| "grad_norm": 4.275939464569092, |
| "kl": 0.0, |
| "learning_rate": 1.5577070009474872e-06, |
| "logits/chosen": -4224448.0, |
| "logits/rejected": -5103517.128205128, |
| "logps/chosen": -17.509926400533537, |
| "logps/rejected": -98.83905498798077, |
| "loss": 0.1864, |
| "rewards/chosen": 0.08433959542251215, |
| "rewards/margins": 8.074940181360013, |
| "rewards/rejected": -7.9906005859375, |
| "step": 740 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "grad_norm": 0.08042572438716888, |
| "kl": 0.0, |
| "learning_rate": 1.4860063917638128e-06, |
| "logits/chosen": -4519920.963855422, |
| "logits/rejected": -6225353.558441559, |
| "logps/chosen": -10.121629094503012, |
| "logps/rejected": -105.2835836038961, |
| "loss": 0.1631, |
| "rewards/chosen": 0.6880607145378389, |
| "rewards/margins": 9.006282165938002, |
| "rewards/rejected": -8.318221451400163, |
| "step": 750 |
| }, |
| { |
| "epoch": 6.666666666666667, |
| "eval_logits/chosen": -3385464.252631579, |
| "eval_logits/rejected": -6043348.114285714, |
| "eval_logps/chosen": -17.730515008223684, |
| "eval_logps/rejected": -96.13487723214286, |
| "eval_loss": 0.2076110988855362, |
| "eval_rewards/chosen": -0.1804113689221834, |
| "eval_rewards/margins": 7.479839049604602, |
| "eval_rewards/rejected": -7.660250418526786, |
| "eval_runtime": 39.9728, |
| "eval_samples_per_second": 5.003, |
| "eval_steps_per_second": 2.502, |
| "kl": 0.0, |
| "step": 750 |
| }, |
| { |
| "epoch": 6.7555555555555555, |
| "grad_norm": 1.02430260181427, |
| "kl": 0.0, |
| "learning_rate": 1.415290652206105e-06, |
| "logits/chosen": -3472036.345679012, |
| "logits/rejected": -6653354.126582279, |
| "logps/chosen": -15.811827859760802, |
| "logps/rejected": -99.10262559335443, |
| "loss": 0.1972, |
| "rewards/chosen": 0.11022957460379895, |
| "rewards/margins": 7.983734599574131, |
| "rewards/rejected": -7.873505024970332, |
| "step": 760 |
| }, |
| { |
| "epoch": 6.844444444444444, |
| "grad_norm": 0.08783946186304092, |
| "kl": 0.0, |
| "learning_rate": 1.3456284669124159e-06, |
| "logits/chosen": -4270462.419753087, |
| "logits/rejected": -5304998.075949367, |
| "logps/chosen": -13.503223560474536, |
| "logps/rejected": -100.49641515031645, |
| "loss": 0.1856, |
| "rewards/chosen": 0.4948253867066937, |
| "rewards/margins": 8.575803587112153, |
| "rewards/rejected": -8.08097820040546, |
| "step": 770 |
| }, |
| { |
| "epoch": 6.933333333333334, |
| "grad_norm": 8.593017578125, |
| "kl": 0.0, |
| "learning_rate": 1.2770874972267777e-06, |
| "logits/chosen": -5554544.761904762, |
| "logits/rejected": -4970955.216494845, |
| "logps/chosen": -8.096478659009177, |
| "logps/rejected": -96.65286726804123, |
| "loss": 0.1404, |
| "rewards/chosen": 0.985069093250093, |
| "rewards/margins": 8.613870891333212, |
| "rewards/rejected": -7.628801798083119, |
| "step": 780 |
| }, |
| { |
| "epoch": 7.022222222222222, |
| "grad_norm": 0.024941733106970787, |
| "kl": 0.0, |
| "learning_rate": 1.2097343154812332e-06, |
| "logits/chosen": -3828968.5853658537, |
| "logits/rejected": -5647588.923076923, |
| "logps/chosen": -14.890583317454269, |
| "logps/rejected": -105.80616486378206, |
| "loss": 0.1729, |
| "rewards/chosen": 0.31065517518578506, |
| "rewards/margins": 8.939148264128093, |
| "rewards/rejected": -8.628493088942308, |
| "step": 790 |
| }, |
| { |
| "epoch": 7.111111111111111, |
| "grad_norm": 0.9142467379570007, |
| "kl": 0.0, |
| "learning_rate": 1.1436343403356019e-06, |
| "logits/chosen": -2518241.951219512, |
| "logits/rejected": -7004834.461538462, |
| "logps/chosen": -8.99466835580221, |
| "logps/rejected": -93.29170422676282, |
| "loss": 0.1704, |
| "rewards/chosen": 0.8409453601371951, |
| "rewards/margins": 8.280389877615159, |
| "rewards/rejected": -7.439444517477964, |
| "step": 800 |
| }, |
| { |
| "epoch": 7.111111111111111, |
| "eval_logits/chosen": -3383563.1157894735, |
| "eval_logits/rejected": -6061806.323809524, |
| "eval_logps/chosen": -17.49361636513158, |
| "eval_logps/rejected": -95.88157552083334, |
| "eval_loss": 0.20636223256587982, |
| "eval_rewards/chosen": -0.15672151665938527, |
| "eval_rewards/margins": 7.478199893310853, |
| "eval_rewards/rejected": -7.6349214099702385, |
| "eval_runtime": 39.9635, |
| "eval_samples_per_second": 5.005, |
| "eval_steps_per_second": 2.502, |
| "kl": 0.0, |
| "step": 800 |
| }, |
| { |
| "epoch": 7.2, |
| "grad_norm": 0.5936899185180664, |
| "kl": 0.0, |
| "learning_rate": 1.0788517732377696e-06, |
| "logits/chosen": -4853851.377777778, |
| "logits/rejected": -5750787.657142857, |
| "logps/chosen": -16.244346788194445, |
| "logps/rejected": -99.18863699776786, |
| "loss": 0.1975, |
| "rewards/chosen": 0.1839043935139974, |
| "rewards/margins": 8.11509161449614, |
| "rewards/rejected": -7.931187220982143, |
| "step": 810 |
| }, |
| { |
| "epoch": 7.288888888888889, |
| "grad_norm": 0.6178103685379028, |
| "kl": 0.0, |
| "learning_rate": 1.0154495360662464e-06, |
| "logits/chosen": -3597639.550561798, |
| "logits/rejected": -5250260.281690141, |
| "logps/chosen": -10.889571629213483, |
| "logps/rejected": -104.53252915933099, |
| "loss": 0.1948, |
| "rewards/chosen": 0.6096143401070927, |
| "rewards/margins": 8.769230729851811, |
| "rewards/rejected": -8.159616389744718, |
| "step": 820 |
| }, |
| { |
| "epoch": 7.377777777777778, |
| "grad_norm": 0.5359880328178406, |
| "kl": 0.0, |
| "learning_rate": 9.534892100155296e-07, |
| "logits/chosen": -4431683.764705882, |
| "logits/rejected": -5044657.92, |
| "logps/chosen": -7.5809979607077205, |
| "logps/rejected": -100.82807942708334, |
| "loss": 0.1518, |
| "rewards/chosen": 0.9582724178538603, |
| "rewards/margins": 9.22237153894761, |
| "rewards/rejected": -8.26409912109375, |
| "step": 830 |
| }, |
| { |
| "epoch": 7.466666666666667, |
| "grad_norm": 0.10778788477182388, |
| "kl": 0.0, |
| "learning_rate": 8.930309757836517e-07, |
| "logits/chosen": -3469462.12345679, |
| "logits/rejected": -5908565.063291139, |
| "logps/chosen": -16.790587625385804, |
| "logps/rejected": -101.96766836431962, |
| "loss": 0.2007, |
| "rewards/chosen": -0.026320469232253087, |
| "rewards/margins": 8.076566261879297, |
| "rewards/rejected": -8.10288673111155, |
| "step": 840 |
| }, |
| { |
| "epoch": 7.555555555555555, |
| "grad_norm": 0.5070289969444275, |
| "kl": 0.0, |
| "learning_rate": 8.341335551199903e-07, |
| "logits/chosen": -4980081.92, |
| "logits/rejected": -4904306.447058824, |
| "logps/chosen": -18.339703776041667, |
| "logps/rejected": -99.41567095588235, |
| "loss": 0.1902, |
| "rewards/chosen": -0.1808448282877604, |
| "rewards/margins": 7.7977655807195925, |
| "rewards/rejected": -7.978610409007353, |
| "step": 850 |
| }, |
| { |
| "epoch": 7.555555555555555, |
| "eval_logits/chosen": -3373625.6, |
| "eval_logits/rejected": -6032148.7238095235, |
| "eval_logps/chosen": -17.943980006167763, |
| "eval_logps/rejected": -98.32533482142857, |
| "eval_loss": 0.20289196074008942, |
| "eval_rewards/chosen": -0.2017578928094161, |
| "eval_rewards/margins": 7.677540144764988, |
| "eval_rewards/rejected": -7.879298037574404, |
| "eval_runtime": 39.9829, |
| "eval_samples_per_second": 5.002, |
| "eval_steps_per_second": 2.501, |
| "kl": 0.0, |
| "step": 850 |
| }, |
| { |
| "epoch": 7.644444444444445, |
| "grad_norm": 0.14907072484493256, |
| "kl": 0.0, |
| "learning_rate": 7.768541537901325e-07, |
| "logits/chosen": -3498182.3209876544, |
| "logits/rejected": -5589237.873417721, |
| "logps/chosen": -12.153599115065587, |
| "logps/rejected": -107.72871340981013, |
| "loss": 0.1576, |
| "rewards/chosen": 0.5338122520917727, |
| "rewards/margins": 9.225064755305775, |
| "rewards/rejected": -8.691252503214002, |
| "step": 860 |
| }, |
| { |
| "epoch": 7.733333333333333, |
| "grad_norm": 1.276770830154419, |
| "kl": 0.0, |
| "learning_rate": 7.212484060131753e-07, |
| "logits/chosen": -3928918.054054054, |
| "logits/rejected": -5899938.604651162, |
| "logps/chosen": -11.909211029877534, |
| "logps/rejected": -108.36705123546511, |
| "loss": 0.1614, |
| "rewards/chosen": 0.5402751613307644, |
| "rewards/margins": 9.478402545960126, |
| "rewards/rejected": -8.938127384629361, |
| "step": 870 |
| }, |
| { |
| "epoch": 7.822222222222222, |
| "grad_norm": 0.2422640174627304, |
| "kl": 0.0, |
| "learning_rate": 6.673703204254348e-07, |
| "logits/chosen": -4272293.402597402, |
| "logits/rejected": -5654204.144578313, |
| "logps/chosen": -16.09622945413961, |
| "logps/rejected": -101.58252541415662, |
| "loss": 0.188, |
| "rewards/chosen": 0.17999009962205764, |
| "rewards/margins": 8.395220650732751, |
| "rewards/rejected": -8.215230551110693, |
| "step": 880 |
| }, |
| { |
| "epoch": 7.911111111111111, |
| "grad_norm": 0.10478074848651886, |
| "kl": 0.0, |
| "learning_rate": 6.152722276230506e-07, |
| "logits/chosen": -2755641.8630136987, |
| "logits/rejected": -6228930.206896552, |
| "logps/chosen": -16.47136197024829, |
| "logps/rejected": -101.78394396551724, |
| "loss": 0.1859, |
| "rewards/chosen": 0.009184811213245131, |
| "rewards/margins": 8.1317139397154, |
| "rewards/rejected": -8.122529128502155, |
| "step": 890 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.4041171371936798, |
| "kl": 0.0, |
| "learning_rate": 5.650047293344316e-07, |
| "logits/chosen": -3320032.0, |
| "logits/rejected": -6169933.948717949, |
| "logps/chosen": -13.464470095750762, |
| "logps/rejected": -105.82504507211539, |
| "loss": 0.174, |
| "rewards/chosen": 0.35973583779683926, |
| "rewards/margins": 8.903688975316872, |
| "rewards/rejected": -8.543953137520033, |
| "step": 900 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_logits/chosen": -3360374.568421053, |
| "eval_logits/rejected": -5993503.695238095, |
| "eval_logps/chosen": -18.104893092105264, |
| "eval_logps/rejected": -99.21747581845239, |
| "eval_loss": 0.20157214999198914, |
| "eval_rewards/chosen": -0.21784908896998356, |
| "eval_rewards/margins": 7.7506631645642425, |
| "eval_rewards/rejected": -7.968512253534226, |
| "eval_runtime": 39.9676, |
| "eval_samples_per_second": 5.004, |
| "eval_steps_per_second": 2.502, |
| "kl": 0.0, |
| "step": 900 |
| }, |
| { |
| "epoch": 8.088888888888889, |
| "grad_norm": 0.22442945837974548, |
| "kl": 0.2600688934326172, |
| "learning_rate": 5.166166492719124e-07, |
| "logits/chosen": -3581766.6341463416, |
| "logits/rejected": -6007682.051282051, |
| "logps/chosen": -5.289140654773247, |
| "logps/rejected": -99.4656012119391, |
| "loss": 0.149, |
| "rewards/chosen": 1.2142908980206746, |
| "rewards/margins": 9.21364768137404, |
| "rewards/rejected": -7.999356783353365, |
| "step": 910 |
| }, |
| { |
| "epoch": 8.177777777777777, |
| "grad_norm": 1.7692155838012695, |
| "kl": 0.0, |
| "learning_rate": 4.7015498571035877e-07, |
| "logits/chosen": -3715771.4285714286, |
| "logits/rejected": -6512965.397590362, |
| "logps/chosen": -14.221671760856331, |
| "logps/rejected": -112.0968797063253, |
| "loss": 0.1667, |
| "rewards/chosen": 0.3181131536310369, |
| "rewards/margins": 9.539859759193913, |
| "rewards/rejected": -9.221746605562876, |
| "step": 920 |
| }, |
| { |
| "epoch": 8.266666666666667, |
| "grad_norm": 0.39222252368927, |
| "kl": 0.0, |
| "learning_rate": 4.256648658387813e-07, |
| "logits/chosen": -4780249.945945946, |
| "logits/rejected": -5116162.976744186, |
| "logps/chosen": -14.482019372888514, |
| "logps/rejected": -102.86130541424419, |
| "loss": 0.1828, |
| "rewards/chosen": 0.17394746316445842, |
| "rewards/margins": 8.420604014081972, |
| "rewards/rejected": -8.246656550917514, |
| "step": 930 |
| }, |
| { |
| "epoch": 8.355555555555556, |
| "grad_norm": 0.9251185059547424, |
| "kl": 0.0, |
| "learning_rate": 3.831895019292897e-07, |
| "logits/chosen": -4103191.466666667, |
| "logits/rejected": -6261455.811764706, |
| "logps/chosen": -18.9859228515625, |
| "logps/rejected": -111.14751838235294, |
| "loss": 0.1668, |
| "rewards/chosen": -0.3472717793782552, |
| "rewards/margins": 8.64698876175226, |
| "rewards/rejected": -8.994260541130515, |
| "step": 940 |
| }, |
| { |
| "epoch": 8.444444444444445, |
| "grad_norm": 0.3069675862789154, |
| "kl": 0.0, |
| "learning_rate": 3.427701493659674e-07, |
| "logits/chosen": -3051439.0204081633, |
| "logits/rejected": -5417702.193548387, |
| "logps/chosen": -15.728038554288903, |
| "logps/rejected": -94.66737021169355, |
| "loss": 0.2268, |
| "rewards/chosen": 0.18987542755749762, |
| "rewards/margins": 7.890255322183254, |
| "rewards/rejected": -7.7003798946257564, |
| "step": 950 |
| }, |
| { |
| "epoch": 8.444444444444445, |
| "eval_logits/chosen": -3331174.4, |
| "eval_logits/rejected": -5953203.80952381, |
| "eval_logps/chosen": -18.29108244243421, |
| "eval_logps/rejected": -99.80818452380953, |
| "eval_loss": 0.20358878374099731, |
| "eval_rewards/chosen": -0.23646802400287828, |
| "eval_rewards/margins": 7.791113472462895, |
| "eval_rewards/rejected": -8.027581496465773, |
| "eval_runtime": 39.985, |
| "eval_samples_per_second": 5.002, |
| "eval_steps_per_second": 2.501, |
| "kl": 0.0, |
| "step": 950 |
| }, |
| { |
| "epoch": 8.533333333333333, |
| "grad_norm": 0.7467713356018066, |
| "kl": 0.0, |
| "learning_rate": 3.044460665744284e-07, |
| "logits/chosen": -4374587.949367088, |
| "logits/rejected": -5032113.777777778, |
| "logps/chosen": -11.329170323625396, |
| "logps/rejected": -100.85111641589506, |
| "loss": 0.168, |
| "rewards/chosen": 0.6183021641984771, |
| "rewards/margins": 8.623793821220081, |
| "rewards/rejected": -8.005491657021604, |
| "step": 960 |
| }, |
| { |
| "epoch": 8.622222222222222, |
| "grad_norm": 4.250644207000732, |
| "kl": 0.0, |
| "learning_rate": 2.6825447689097174e-07, |
| "logits/chosen": -3936100.512820513, |
| "logits/rejected": -5783889.951219512, |
| "logps/chosen": -16.802386944110577, |
| "logps/rejected": -107.20095750762195, |
| "loss": 0.1843, |
| "rewards/chosen": 0.1256556877723107, |
| "rewards/margins": 8.727746662905695, |
| "rewards/rejected": -8.602090975133384, |
| "step": 970 |
| }, |
| { |
| "epoch": 8.71111111111111, |
| "grad_norm": 0.5136679410934448, |
| "kl": 0.0, |
| "learning_rate": 2.3423053240837518e-07, |
| "logits/chosen": -3032271.5897435895, |
| "logits/rejected": -5597113.365853659, |
| "logps/chosen": -15.48264410556891, |
| "logps/rejected": -109.19771817835365, |
| "loss": 0.1797, |
| "rewards/chosen": 0.2344228304349459, |
| "rewards/margins": 9.17205383272153, |
| "rewards/rejected": -8.937631002286585, |
| "step": 980 |
| }, |
| { |
| "epoch": 8.8, |
| "grad_norm": 0.033152155578136444, |
| "kl": 0.0, |
| "learning_rate": 2.0240727983344837e-07, |
| "logits/chosen": -4437685.073170732, |
| "logits/rejected": -5751509.333333333, |
| "logps/chosen": -11.637835246760671, |
| "logps/rejected": -108.5361328125, |
| "loss": 0.1638, |
| "rewards/chosen": 0.4895447521674924, |
| "rewards/margins": 9.366979898401468, |
| "rewards/rejected": -8.877435146233974, |
| "step": 990 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 0.45799577236175537, |
| "kl": 0.0, |
| "learning_rate": 1.7281562838948968e-07, |
| "logits/chosen": -3706790.2337662335, |
| "logits/rejected": -6177359.421686747, |
| "logps/chosen": -8.653350433745942, |
| "logps/rejected": -98.99482304216868, |
| "loss": 0.1646, |
| "rewards/chosen": 0.7714733272403865, |
| "rewards/margins": 8.717424122609362, |
| "rewards/rejected": -7.945950795368976, |
| "step": 1000 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "eval_logits/chosen": -3326715.957894737, |
| "eval_logits/rejected": -5970805.638095238, |
| "eval_logps/chosen": -18.51261564555921, |
| "eval_logps/rejected": -100.40941220238095, |
| "eval_loss": 0.20378512144088745, |
| "eval_rewards/chosen": -0.25862145674856085, |
| "eval_rewards/margins": 7.829083156346677, |
| "eval_rewards/rejected": -8.087704613095237, |
| "eval_runtime": 39.9785, |
| "eval_samples_per_second": 5.003, |
| "eval_steps_per_second": 2.501, |
| "kl": 0.0, |
| "step": 1000 |
| }, |
| { |
| "epoch": 8.977777777777778, |
| "grad_norm": 0.6202503442764282, |
| "kl": 0.0, |
| "learning_rate": 1.4548431979484134e-07, |
| "logits/chosen": -4080809.6744186045, |
| "logits/rejected": -5047355.675675675, |
| "logps/chosen": -15.184675349745639, |
| "logps/rejected": -103.97293998099663, |
| "loss": 0.1931, |
| "rewards/chosen": 0.372888165850972, |
| "rewards/margins": 8.783699306581546, |
| "rewards/rejected": -8.410811140730575, |
| "step": 1010 |
| }, |
| { |
| "epoch": 9.066666666666666, |
| "grad_norm": 0.09676516056060791, |
| "kl": 0.0, |
| "learning_rate": 1.2043990034669413e-07, |
| "logits/chosen": -4672818.4, |
| "logits/rejected": -6457845.6, |
| "logps/chosen": -12.221817779541016, |
| "logps/rejected": -110.7223876953125, |
| "loss": 0.1534, |
| "rewards/chosen": 0.4222383975982666, |
| "rewards/margins": 9.52428002357483, |
| "rewards/rejected": -9.102041625976563, |
| "step": 1020 |
| }, |
| { |
| "epoch": 9.155555555555555, |
| "grad_norm": 1.8521521091461182, |
| "kl": 0.0, |
| "learning_rate": 9.770669513725128e-08, |
| "logits/chosen": -3108790.564102564, |
| "logits/rejected": -5779450.926829268, |
| "logps/chosen": -12.13914059369992, |
| "logps/rejected": -100.78940786966463, |
| "loss": 0.1975, |
| "rewards/chosen": 0.41713900443835134, |
| "rewards/margins": 8.525855271349554, |
| "rewards/rejected": -8.108716266911204, |
| "step": 1030 |
| }, |
| { |
| "epoch": 9.244444444444444, |
| "grad_norm": 0.0922972559928894, |
| "kl": 0.0, |
| "learning_rate": 7.730678442730539e-08, |
| "logits/chosen": -5806258.162162162, |
| "logits/rejected": -6302255.627906977, |
| "logps/chosen": -14.043008340371621, |
| "logps/rejected": -103.81770076308139, |
| "loss": 0.1782, |
| "rewards/chosen": 0.26274237761626373, |
| "rewards/margins": 8.713173314207832, |
| "rewards/rejected": -8.45043093659157, |
| "step": 1040 |
| }, |
| { |
| "epoch": 9.333333333333334, |
| "grad_norm": 0.8284230828285217, |
| "kl": 0.0, |
| "learning_rate": 5.92599822001666e-08, |
| "logits/chosen": -4424622.02247191, |
| "logits/rejected": -5578106.591549296, |
| "logps/chosen": -15.04966890142205, |
| "logps/rejected": -97.8271484375, |
| "loss": 0.1964, |
| "rewards/chosen": 0.4592494321673104, |
| "rewards/margins": 8.313380566630338, |
| "rewards/rejected": -7.854131134463028, |
| "step": 1050 |
| }, |
| { |
| "epoch": 9.333333333333334, |
| "eval_logits/chosen": -3347635.5368421054, |
| "eval_logits/rejected": -5967138.133333334, |
| "eval_logps/chosen": -18.55568205180921, |
| "eval_logps/rejected": -100.46324404761904, |
| "eval_loss": 0.20376209914684296, |
| "eval_rewards/chosen": -0.2629280491879112, |
| "eval_rewards/margins": 7.830159283401375, |
| "eval_rewards/rejected": -8.093087332589286, |
| "eval_runtime": 40.0214, |
| "eval_samples_per_second": 4.997, |
| "eval_steps_per_second": 2.499, |
| "kl": 0.0, |
| "step": 1050 |
| }, |
| { |
| "epoch": 9.422222222222222, |
| "grad_norm": 1.5310004949569702, |
| "kl": 0.0, |
| "learning_rate": 4.358381691677932e-08, |
| "logits/chosen": -4701114.046511628, |
| "logits/rejected": -5889055.135135135, |
| "logps/chosen": -13.65238633266715, |
| "logps/rejected": -113.2189743454392, |
| "loss": 0.1667, |
| "rewards/chosen": 0.41006483033645985, |
| "rewards/margins": 9.57349553398933, |
| "rewards/rejected": -9.163430703652871, |
| "step": 1060 |
| }, |
| { |
| "epoch": 9.511111111111111, |
| "grad_norm": 0.025284266099333763, |
| "kl": 0.0, |
| "learning_rate": 3.0293514490713216e-08, |
| "logits/chosen": -4475391.58974359, |
| "logits/rejected": -6686905.756097561, |
| "logps/chosen": -9.495955247145433, |
| "logps/rejected": -109.00394197789635, |
| "loss": 0.1453, |
| "rewards/chosen": 0.9194140311999198, |
| "rewards/margins": 9.71330798484893, |
| "rewards/rejected": -8.79389395364901, |
| "step": 1070 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.565949022769928, |
| "kl": 0.0, |
| "learning_rate": 1.9401983499569843e-08, |
| "logits/chosen": -5440440.470588235, |
| "logits/rejected": -4343044.693333333, |
| "logps/chosen": -16.57922794117647, |
| "logps/rejected": -98.1113671875, |
| "loss": 0.2092, |
| "rewards/chosen": 0.01385952444637523, |
| "rewards/margins": 7.818304511425542, |
| "rewards/rejected": -7.804444986979167, |
| "step": 1080 |
| }, |
| { |
| "epoch": 9.688888888888888, |
| "grad_norm": 0.4187465012073517, |
| "kl": 0.0, |
| "learning_rate": 1.0919802647165467e-08, |
| "logits/chosen": -3077447.6619718308, |
| "logits/rejected": -4659458.157303371, |
| "logps/chosen": -18.406172631492076, |
| "logps/rejected": -112.01404494382022, |
| "loss": 0.1503, |
| "rewards/chosen": -0.05281579998177542, |
| "rewards/margins": 9.010594926844067, |
| "rewards/rejected": -9.063410726825843, |
| "step": 1090 |
| }, |
| { |
| "epoch": 9.777777777777779, |
| "grad_norm": 0.11264122277498245, |
| "kl": 0.0, |
| "learning_rate": 4.855210488670381e-09, |
| "logits/chosen": -4569069.714285715, |
| "logits/rejected": -5741534.072289157, |
| "logps/chosen": -6.198184719333401, |
| "logps/rejected": -107.22802146084338, |
| "loss": 0.1483, |
| "rewards/chosen": 0.8745543244597199, |
| "rewards/margins": 9.67981687858246, |
| "rewards/rejected": -8.805262554122741, |
| "step": 1100 |
| }, |
| { |
| "epoch": 9.777777777777779, |
| "eval_logits/chosen": -3328483.0315789473, |
| "eval_logits/rejected": -5965142.552380952, |
| "eval_logps/chosen": -18.615285773026315, |
| "eval_logps/rejected": -100.25174851190476, |
| "eval_loss": 0.2076234370470047, |
| "eval_rewards/chosen": -0.26888851366545025, |
| "eval_rewards/margins": 7.803050427926811, |
| "eval_rewards/rejected": -8.071938941592261, |
| "eval_runtime": 40.0343, |
| "eval_samples_per_second": 4.996, |
| "eval_steps_per_second": 2.498, |
| "kl": 0.0, |
| "step": 1100 |
| }, |
| { |
| "epoch": 9.866666666666667, |
| "grad_norm": 3.3872766494750977, |
| "kl": 0.0, |
| "learning_rate": 1.2140974286808937e-09, |
| "logits/chosen": -3248757.0, |
| "logits/rejected": -4898999.2, |
| "logps/chosen": -19.87044677734375, |
| "logps/rejected": -101.28759155273437, |
| "loss": 0.2023, |
| "rewards/chosen": -0.24896178245544434, |
| "rewards/margins": 8.035884714126587, |
| "rewards/rejected": -8.28484649658203, |
| "step": 1110 |
| }, |
| { |
| "epoch": 9.955555555555556, |
| "grad_norm": 0.8176528811454773, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "logits/chosen": -4188007.905882353, |
| "logits/rejected": -4686422.1866666665, |
| "logps/chosen": -11.079340676700367, |
| "logps/rejected": -102.76514973958334, |
| "loss": 0.1815, |
| "rewards/chosen": 0.5793109669404871, |
| "rewards/margins": 8.898303479961319, |
| "rewards/rejected": -8.318992513020833, |
| "step": 1120 |
| }, |
| { |
| "epoch": 9.955555555555556, |
| "step": 1120, |
| "total_flos": 9.766349467641446e+16, |
| "train_loss": 0.23609391812767302, |
| "train_runtime": 6623.7347, |
| "train_samples_per_second": 2.718, |
| "train_steps_per_second": 0.169 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1120, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.766349467641446e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|