| { |
| "best_metric": 0.2732886075973511, |
| "best_model_checkpoint": "saves/sycophancy/Llama-8B-3.1-Instruct/kto-500/checkpoint-500", |
| "epoch": 9.955555555555556, |
| "eval_steps": 50, |
| "global_step": 560, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 0.6533107161521912, |
| "kl": 4.067477226257324, |
| "learning_rate": 8.928571428571429e-07, |
| "logits/chosen": -6841018.329113924, |
| "logits/rejected": -7836444.444444444, |
| "logps/chosen": -16.606612193433545, |
| "logps/rejected": -19.352891710069443, |
| "loss": 0.5, |
| "rewards/chosen": 0.0014614655247217492, |
| "rewards/margins": -0.000693329951226255, |
| "rewards/rejected": 0.0021547954759480042, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 0.6705703139305115, |
| "kl": 5.545372486114502, |
| "learning_rate": 1.7857142857142859e-06, |
| "logits/chosen": -6803992.303797469, |
| "logits/rejected": -7634997.728395062, |
| "logps/chosen": -17.628129017503955, |
| "logps/rejected": -19.349335093557098, |
| "loss": 0.5003, |
| "rewards/chosen": -0.001114800006528444, |
| "rewards/margins": -0.002355615283981266, |
| "rewards/rejected": 0.0012408152774528221, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 0.6907099485397339, |
| "kl": 2.6640281677246094, |
| "learning_rate": 2.6785714285714285e-06, |
| "logits/chosen": -6161688.746666667, |
| "logits/rejected": -7775072.376470588, |
| "logps/chosen": -16.01873046875, |
| "logps/rejected": -19.69498650045956, |
| "loss": 0.499, |
| "rewards/chosen": 0.0048561612764994305, |
| "rewards/margins": 0.005783557833409777, |
| "rewards/rejected": -0.0009273965569103465, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 1.0000953674316406, |
| "kl": 3.6405258178710938, |
| "learning_rate": 3.5714285714285718e-06, |
| "logits/chosen": -6069453.76744186, |
| "logits/rejected": -7335607.3513513515, |
| "logps/chosen": -16.918597554051598, |
| "logps/rejected": -19.19496608424831, |
| "loss": 0.4999, |
| "rewards/chosen": 0.0016274835134661475, |
| "rewards/margins": 0.000747067534196759, |
| "rewards/rejected": 0.0008804159792693885, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.7750270962715149, |
| "kl": 4.009792327880859, |
| "learning_rate": 4.464285714285715e-06, |
| "logits/chosen": -5697954.37037037, |
| "logits/rejected": -7671765.873417721, |
| "logps/chosen": -16.696614583333332, |
| "logps/rejected": -19.640096543710442, |
| "loss": 0.499, |
| "rewards/chosen": 0.016511881792986835, |
| "rewards/margins": 0.008323215370830996, |
| "rewards/rejected": 0.008188666422155839, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_logits/chosen": -6797012.48, |
| "eval_logits/rejected": -7105245.44, |
| "eval_logps/chosen": -15.1082421875, |
| "eval_logps/rejected": -19.328565673828123, |
| "eval_loss": 0.49956679344177246, |
| "eval_rewards/chosen": 0.01544377326965332, |
| "eval_rewards/margins": -0.0007611429691314706, |
| "eval_rewards/rejected": 0.01620491623878479, |
| "eval_runtime": 20.7819, |
| "eval_samples_per_second": 4.812, |
| "eval_steps_per_second": 2.406, |
| "kl": 3.8203978538513184, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 0.760673463344574, |
| "kl": 5.259383201599121, |
| "learning_rate": 4.999222955002041e-06, |
| "logits/chosen": -6167793.6, |
| "logits/rejected": -6915556.0, |
| "logps/chosen": -18.555807495117186, |
| "logps/rejected": -20.55987091064453, |
| "loss": 0.4979, |
| "rewards/chosen": 0.030471977591514588, |
| "rewards/margins": 0.017126622796058658, |
| "rewards/rejected": 0.013345354795455932, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 0.8709146976470947, |
| "kl": 5.821277618408203, |
| "learning_rate": 4.990486745229364e-06, |
| "logits/chosen": -5566239.567567567, |
| "logits/rejected": -7954213.209302326, |
| "logps/chosen": -16.395964751372468, |
| "logps/rejected": -19.50782101653343, |
| "loss": 0.4952, |
| "rewards/chosen": 0.06164715096757219, |
| "rewards/margins": 0.048601815042669555, |
| "rewards/rejected": 0.013045335924902628, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 0.9092549681663513, |
| "kl": 7.013403415679932, |
| "learning_rate": 4.9720770655628216e-06, |
| "logits/chosen": -6926604.8, |
| "logits/rejected": -7139911.771428571, |
| "logps/chosen": -16.64070095486111, |
| "logps/rejected": -18.566683523995536, |
| "loss": 0.4886, |
| "rewards/chosen": 0.08301433987087674, |
| "rewards/margins": 0.08200664118168846, |
| "rewards/rejected": 0.0010076986891882761, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 1.182418942451477, |
| "kl": 17.02285385131836, |
| "learning_rate": 4.944065422298262e-06, |
| "logits/chosen": -6318053.6, |
| "logits/rejected": -6837182.4, |
| "logps/chosen": -15.03062744140625, |
| "logps/rejected": -20.429293823242187, |
| "loss": 0.4805, |
| "rewards/chosen": 0.1787983775138855, |
| "rewards/margins": 0.15114764571189881, |
| "rewards/rejected": 0.027650731801986694, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 1.1860688924789429, |
| "kl": 15.505624771118164, |
| "learning_rate": 4.90656061737503e-06, |
| "logits/chosen": -6204396.883116883, |
| "logits/rejected": -6490921.638554217, |
| "logps/chosen": -14.448405476359579, |
| "logps/rejected": -20.421110222138555, |
| "loss": 0.4715, |
| "rewards/chosen": 0.25868542163403, |
| "rewards/margins": 0.23589365106665308, |
| "rewards/rejected": 0.02279177056737693, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "eval_logits/chosen": -6498615.68, |
| "eval_logits/rejected": -7022910.72, |
| "eval_logps/chosen": -12.5245361328125, |
| "eval_logps/rejected": -19.137769775390623, |
| "eval_loss": 0.47049248218536377, |
| "eval_rewards/chosen": 0.27381431579589843, |
| "eval_rewards/margins": 0.23852983951568602, |
| "eval_rewards/rejected": 0.035284476280212404, |
| "eval_runtime": 20.7743, |
| "eval_samples_per_second": 4.814, |
| "eval_steps_per_second": 2.407, |
| "kl": 12.469597816467285, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.9555555555555557, |
| "grad_norm": 1.5210201740264893, |
| "kl": 10.133670806884766, |
| "learning_rate": 4.859708325770919e-06, |
| "logits/chosen": -5213267.692307692, |
| "logits/rejected": -8371516.87804878, |
| "logps/chosen": -14.842853252704327, |
| "logps/rejected": -20.327931473894818, |
| "loss": 0.4463, |
| "rewards/chosen": 0.31009825682028747, |
| "rewards/margins": 0.42884164157698046, |
| "rewards/rejected": -0.11874338475669302, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 1.9936622381210327, |
| "kl": 1.3701114654541016, |
| "learning_rate": 4.80369052967602e-06, |
| "logits/chosen": -5974752.0, |
| "logits/rejected": -6628083.368421053, |
| "logps/chosen": -13.366662888299851, |
| "logps/rejected": -21.737105520148027, |
| "loss": 0.4121, |
| "rewards/chosen": 0.37865970248267766, |
| "rewards/margins": 0.7291635236046965, |
| "rewards/rejected": -0.3505038211220189, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.311111111111111, |
| "grad_norm": 1.529518961906433, |
| "kl": 4.181853294372559, |
| "learning_rate": 4.7387248116432524e-06, |
| "logits/chosen": -5894920.421052632, |
| "logits/rejected": -6893739.428571428, |
| "logps/chosen": -13.183826647306743, |
| "logps/rejected": -23.257681710379465, |
| "loss": 0.4071, |
| "rewards/chosen": 0.38488915092066717, |
| "rewards/margins": 0.776886170370537, |
| "rewards/rejected": -0.3919970194498698, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.488888888888889, |
| "grad_norm": 2.2388455867767334, |
| "kl": 11.135213851928711, |
| "learning_rate": 4.665063509461098e-06, |
| "logits/chosen": -4817724.0, |
| "logits/rejected": -7434909.090909091, |
| "logps/chosen": -12.073420206705729, |
| "logps/rejected": -26.122675115411933, |
| "loss": 0.3807, |
| "rewards/chosen": 0.41833003362019855, |
| "rewards/margins": 1.0169767610954517, |
| "rewards/rejected": -0.598646727475253, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 2.9637298583984375, |
| "kl": 0.0, |
| "learning_rate": 4.5829927360311224e-06, |
| "logits/chosen": -4782145.406593407, |
| "logits/rejected": -6822974.144927536, |
| "logps/chosen": -14.056957202953297, |
| "logps/rejected": -32.71091627038044, |
| "loss": 0.3459, |
| "rewards/chosen": 0.36075919015066965, |
| "rewards/margins": 1.566741674829961, |
| "rewards/rejected": -1.2059824846792913, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "eval_logits/chosen": -5608189.44, |
| "eval_logits/rejected": -6636171.52, |
| "eval_logps/chosen": -14.359583740234376, |
| "eval_logps/rejected": -30.49203369140625, |
| "eval_loss": 0.3755200207233429, |
| "eval_rewards/chosen": 0.090309476852417, |
| "eval_rewards/margins": 1.1904513835906982, |
| "eval_rewards/rejected": -1.1001419067382812, |
| "eval_runtime": 20.8081, |
| "eval_samples_per_second": 4.806, |
| "eval_steps_per_second": 2.403, |
| "kl": 0.7521286010742188, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.8444444444444446, |
| "grad_norm": 2.555079698562622, |
| "kl": 2.267221450805664, |
| "learning_rate": 4.492831268057307e-06, |
| "logits/chosen": -3827353.7560975607, |
| "logits/rejected": -6784299.487179487, |
| "logps/chosen": -13.25042129144436, |
| "logps/rejected": -33.39762682792468, |
| "loss": 0.3292, |
| "rewards/chosen": 0.3535805678949123, |
| "rewards/margins": 1.744958921698498, |
| "rewards/rejected": -1.3913783538035858, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.022222222222222, |
| "grad_norm": 1.4790682792663574, |
| "kl": 0.0831449031829834, |
| "learning_rate": 4.394929307863633e-06, |
| "logits/chosen": -5203950.702702703, |
| "logits/rejected": -6706759.441860465, |
| "logps/chosen": -15.109988650760135, |
| "logps/rejected": -37.56380870730378, |
| "loss": 0.2896, |
| "rewards/chosen": 0.43377953606682856, |
| "rewards/margins": 2.1837287917218067, |
| "rewards/rejected": -1.7499492556549783, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 1.453729271888733, |
| "kl": 1.8771247863769531, |
| "learning_rate": 4.289667123149296e-06, |
| "logits/chosen": -4639765.333333333, |
| "logits/rejected": -6930286.6823529415, |
| "logps/chosen": -13.8180029296875, |
| "logps/rejected": -40.942793543198526, |
| "loss": 0.2707, |
| "rewards/chosen": 0.4121772257486979, |
| "rewards/margins": 2.442220165776271, |
| "rewards/rejected": -2.0300429400275735, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.3777777777777778, |
| "grad_norm": 1.3966221809387207, |
| "kl": 9.155083656311035, |
| "learning_rate": 4.177453569964925e-06, |
| "logits/chosen": -5388630.857142857, |
| "logits/rejected": -6755297.684210527, |
| "logps/chosen": -12.648887997581845, |
| "logps/rejected": -39.70372250205592, |
| "loss": 0.2847, |
| "rewards/chosen": 0.5054586501348586, |
| "rewards/margins": 2.584699549471824, |
| "rewards/rejected": -2.0792408993369653, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "grad_norm": 6.537291526794434, |
| "kl": 0.0, |
| "learning_rate": 4.058724504646834e-06, |
| "logits/chosen": -4172365.879518072, |
| "logits/rejected": -6079465.558441559, |
| "logps/chosen": -14.219998646931476, |
| "logps/rejected": -46.81524896002435, |
| "loss": 0.2879, |
| "rewards/chosen": 0.2552003975374153, |
| "rewards/margins": 2.85085913333287, |
| "rewards/rejected": -2.5956587357954546, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.5555555555555554, |
| "eval_logits/chosen": -5304341.76, |
| "eval_logits/rejected": -6579046.4, |
| "eval_logps/chosen": -15.011697998046875, |
| "eval_logps/rejected": -42.5760400390625, |
| "eval_loss": 0.3254357576370239, |
| "eval_rewards/chosen": 0.025098147392272948, |
| "eval_rewards/margins": 2.3336407804489134, |
| "eval_rewards/rejected": -2.3085426330566405, |
| "eval_runtime": 20.7983, |
| "eval_samples_per_second": 4.808, |
| "eval_steps_per_second": 2.404, |
| "kl": 0.0, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.7333333333333334, |
| "grad_norm": 2.430555820465088, |
| "kl": 0.0, |
| "learning_rate": 3.933941090877615e-06, |
| "logits/chosen": -4026738.4, |
| "logits/rejected": -6801368.8, |
| "logps/chosen": -11.006294250488281, |
| "logps/rejected": -45.532797241210936, |
| "loss": 0.2517, |
| "rewards/chosen": 0.5302759170532226, |
| "rewards/margins": 3.205550193786621, |
| "rewards/rejected": -2.6752742767333983, |
| "step": 210 |
| }, |
| { |
| "epoch": 3.911111111111111, |
| "grad_norm": 1.6560513973236084, |
| "kl": 3.6237878799438477, |
| "learning_rate": 3.8035880084487454e-06, |
| "logits/chosen": -4789041.230769231, |
| "logits/rejected": -7053110.634146341, |
| "logps/chosen": -10.559817583133013, |
| "logps/rejected": -46.676900724085364, |
| "loss": 0.2392, |
| "rewards/chosen": 0.6918807885585687, |
| "rewards/margins": 3.4286504880274142, |
| "rewards/rejected": -2.7367696994688453, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.088888888888889, |
| "grad_norm": 1.3882936239242554, |
| "kl": 1.455021858215332, |
| "learning_rate": 3.6681715706826555e-06, |
| "logits/chosen": -4748690.823529412, |
| "logits/rejected": -6739801.6, |
| "logps/chosen": -9.264028033088236, |
| "logps/rejected": -50.18566080729167, |
| "loss": 0.2326, |
| "rewards/chosen": 0.8066374834846047, |
| "rewards/margins": 3.8256635658413756, |
| "rewards/rejected": -3.019026082356771, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.266666666666667, |
| "grad_norm": 1.702983021736145, |
| "kl": 0.0, |
| "learning_rate": 3.5282177578265295e-06, |
| "logits/chosen": -4685888.7710843375, |
| "logits/rejected": -6661258.389610389, |
| "logps/chosen": -7.184921816170934, |
| "logps/rejected": -53.11224761566559, |
| "loss": 0.2209, |
| "rewards/chosen": 0.9436166602444936, |
| "rewards/margins": 4.360447540545833, |
| "rewards/rejected": -3.4168308803013394, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "grad_norm": 0.7544198036193848, |
| "kl": 0.0, |
| "learning_rate": 3.384270174056454e-06, |
| "logits/chosen": -4689330.8, |
| "logits/rejected": -6276662.8, |
| "logps/chosen": -12.63294677734375, |
| "logps/rejected": -61.84871826171875, |
| "loss": 0.2319, |
| "rewards/chosen": 0.31192424297332766, |
| "rewards/margins": 4.568389391899109, |
| "rewards/rejected": -4.256465148925781, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.444444444444445, |
| "eval_logits/chosen": -5472120.32, |
| "eval_logits/rejected": -6862511.36, |
| "eval_logps/chosen": -16.78877197265625, |
| "eval_logps/rejected": -54.95546875, |
| "eval_loss": 0.3014616072177887, |
| "eval_rewards/chosen": -0.1526092529296875, |
| "eval_rewards/margins": 3.393876037597656, |
| "eval_rewards/rejected": -3.5464852905273436, |
| "eval_runtime": 20.809, |
| "eval_samples_per_second": 4.806, |
| "eval_steps_per_second": 2.403, |
| "kl": 3.790660858154297, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.622222222222222, |
| "grad_norm": 1.594618797302246, |
| "kl": 0.0, |
| "learning_rate": 3.236887936027261e-06, |
| "logits/chosen": -5005965.552941176, |
| "logits/rejected": -6077611.52, |
| "logps/chosen": -12.832533892463236, |
| "logps/rejected": -61.87927083333334, |
| "loss": 0.2328, |
| "rewards/chosen": 0.6093355066636029, |
| "rewards/margins": 4.77091916551777, |
| "rewards/rejected": -4.161583658854167, |
| "step": 260 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 14.283431053161621, |
| "kl": 0.0, |
| "learning_rate": 3.0866435011692884e-06, |
| "logits/chosen": -3974209.969230769, |
| "logits/rejected": -7320299.115789474, |
| "logps/chosen": -12.673212139423077, |
| "logps/rejected": -58.971828741776314, |
| "loss": 0.222, |
| "rewards/chosen": 0.35330341045673075, |
| "rewards/margins": 4.22993337036627, |
| "rewards/rejected": -3.8766299599095393, |
| "step": 270 |
| }, |
| { |
| "epoch": 4.977777777777778, |
| "grad_norm": 6.273312091827393, |
| "kl": 0.0, |
| "learning_rate": 2.9341204441673267e-06, |
| "logits/chosen": -4811573.7011494255, |
| "logits/rejected": -6532414.2465753425, |
| "logps/chosen": -12.03460903825431, |
| "logps/rejected": -72.13428403253425, |
| "loss": 0.2032, |
| "rewards/chosen": 0.4564220384619702, |
| "rewards/margins": 5.712377146686499, |
| "rewards/rejected": -5.255955108224529, |
| "step": 280 |
| }, |
| { |
| "epoch": 5.155555555555556, |
| "grad_norm": 1.409645676612854, |
| "kl": 0.0, |
| "learning_rate": 2.7799111902582697e-06, |
| "logits/chosen": -5135937.641025641, |
| "logits/rejected": -7215049.365853659, |
| "logps/chosen": -13.245896559495192, |
| "logps/rejected": -71.7289979516006, |
| "loss": 0.2097, |
| "rewards/chosen": 0.39485403207632214, |
| "rewards/margins": 5.603232521500865, |
| "rewards/rejected": -5.208378489424542, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "grad_norm": 1.3076075315475464, |
| "kl": 0.0, |
| "learning_rate": 2.624614714151743e-06, |
| "logits/chosen": -4269975.518072289, |
| "logits/rejected": -6750665.974025974, |
| "logps/chosen": -9.87456392954631, |
| "logps/rejected": -71.41611074472402, |
| "loss": 0.1971, |
| "rewards/chosen": 0.6907072411962303, |
| "rewards/margins": 5.822813477245337, |
| "rewards/rejected": -5.132106236049107, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.333333333333333, |
| "eval_logits/chosen": -5352769.92, |
| "eval_logits/rejected": -6700284.16, |
| "eval_logps/chosen": -20.0192236328125, |
| "eval_logps/rejected": -65.3341015625, |
| "eval_loss": 0.2927246391773224, |
| "eval_rewards/chosen": -0.4756543731689453, |
| "eval_rewards/margins": 4.108694686889649, |
| "eval_rewards/rejected": -4.584349060058594, |
| "eval_runtime": 20.7933, |
| "eval_samples_per_second": 4.809, |
| "eval_steps_per_second": 2.405, |
| "kl": 0.0, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.511111111111111, |
| "grad_norm": 1.6624188423156738, |
| "kl": 0.0, |
| "learning_rate": 2.4688342135114625e-06, |
| "logits/chosen": -4672509.506493507, |
| "logits/rejected": -6401643.951807229, |
| "logps/chosen": -14.955799449573863, |
| "logps/rejected": -73.92682840737952, |
| "loss": 0.2184, |
| "rewards/chosen": 0.1713225996339476, |
| "rewards/margins": 5.420992421499535, |
| "rewards/rejected": -5.249669821865587, |
| "step": 310 |
| }, |
| { |
| "epoch": 5.688888888888889, |
| "grad_norm": 1.1759079694747925, |
| "kl": 0.0, |
| "learning_rate": 2.3131747660339396e-06, |
| "logits/chosen": -3520974.9873417723, |
| "logits/rejected": -6545384.691358024, |
| "logps/chosen": -12.296756792672072, |
| "logps/rejected": -71.31258439429013, |
| "loss": 0.2118, |
| "rewards/chosen": 0.5269823677932159, |
| "rewards/margins": 5.867736370493833, |
| "rewards/rejected": -5.340754002700617, |
| "step": 320 |
| }, |
| { |
| "epoch": 5.866666666666667, |
| "grad_norm": 0.6257539391517639, |
| "kl": 3.781810760498047, |
| "learning_rate": 2.158240979224817e-06, |
| "logits/chosen": -3069954.6329113925, |
| "logits/rejected": -6776339.75308642, |
| "logps/chosen": -10.886953619462025, |
| "logps/rejected": -71.04876181520062, |
| "loss": 0.1733, |
| "rewards/chosen": 0.8049045514456833, |
| "rewards/margins": 5.933409175047149, |
| "rewards/rejected": -5.128504623601466, |
| "step": 330 |
| }, |
| { |
| "epoch": 6.044444444444444, |
| "grad_norm": 0.3074154853820801, |
| "kl": 0.0, |
| "learning_rate": 2.004634642001507e-06, |
| "logits/chosen": -5546165.6, |
| "logits/rejected": -5844707.2, |
| "logps/chosen": -7.493644714355469, |
| "logps/rejected": -72.3456787109375, |
| "loss": 0.189, |
| "rewards/chosen": 1.0212175369262695, |
| "rewards/margins": 6.292597389221192, |
| "rewards/rejected": -5.271379852294922, |
| "step": 340 |
| }, |
| { |
| "epoch": 6.222222222222222, |
| "grad_norm": 0.8704222440719604, |
| "kl": 2.9211883544921875, |
| "learning_rate": 1.852952387243698e-06, |
| "logits/chosen": -3859937.2467532465, |
| "logits/rejected": -6969344.0, |
| "logps/chosen": -13.282477044439934, |
| "logps/rejected": -82.35998682228916, |
| "loss": 0.1825, |
| "rewards/chosen": 0.44339762105570213, |
| "rewards/margins": 6.771900597806455, |
| "rewards/rejected": -6.328502976750753, |
| "step": 350 |
| }, |
| { |
| "epoch": 6.222222222222222, |
| "eval_logits/chosen": -5317811.2, |
| "eval_logits/rejected": -6587809.28, |
| "eval_logps/chosen": -21.9388818359375, |
| "eval_logps/rejected": -70.896962890625, |
| "eval_loss": 0.2855421304702759, |
| "eval_rewards/chosen": -0.6676201629638672, |
| "eval_rewards/margins": 4.473014602661133, |
| "eval_rewards/rejected": -5.140634765625, |
| "eval_runtime": 20.782, |
| "eval_samples_per_second": 4.812, |
| "eval_steps_per_second": 2.406, |
| "kl": 0.0, |
| "step": 350 |
| }, |
| { |
| "epoch": 6.4, |
| "grad_norm": 2.186678886413574, |
| "kl": 0.0, |
| "learning_rate": 1.7037833743707892e-06, |
| "logits/chosen": -3891186.701298701, |
| "logits/rejected": -6218544.192771085, |
| "logps/chosen": -11.341275301846592, |
| "logps/rejected": -76.01284826807229, |
| "loss": 0.2035, |
| "rewards/chosen": 0.5609105097783076, |
| "rewards/margins": 6.146511948737269, |
| "rewards/rejected": -5.585601438958961, |
| "step": 360 |
| }, |
| { |
| "epoch": 6.5777777777777775, |
| "grad_norm": 0.6433426141738892, |
| "kl": 0.0, |
| "learning_rate": 1.5577070009474872e-06, |
| "logits/chosen": -4631077.608247423, |
| "logits/rejected": -5847530.158730159, |
| "logps/chosen": -11.66492328447165, |
| "logps/rejected": -78.18622116815476, |
| "loss": 0.2251, |
| "rewards/chosen": 0.4717446160070675, |
| "rewards/margins": 6.37879097947433, |
| "rewards/rejected": -5.907046363467262, |
| "step": 370 |
| }, |
| { |
| "epoch": 6.7555555555555555, |
| "grad_norm": 1.106202483177185, |
| "kl": 0.0, |
| "learning_rate": 1.415290652206105e-06, |
| "logits/chosen": -4495871.157894737, |
| "logits/rejected": -6737105.523809524, |
| "logps/chosen": -9.170941804584704, |
| "logps/rejected": -74.34019252232143, |
| "loss": 0.2006, |
| "rewards/chosen": 0.8608366313733553, |
| "rewards/margins": 6.370171740539092, |
| "rewards/rejected": -5.509335109165737, |
| "step": 380 |
| }, |
| { |
| "epoch": 6.933333333333334, |
| "grad_norm": 0.3888777494430542, |
| "kl": 2.491642951965332, |
| "learning_rate": 1.2770874972267777e-06, |
| "logits/chosen": -4726923.341772152, |
| "logits/rejected": -6615125.333333333, |
| "logps/chosen": -11.210519525069225, |
| "logps/rejected": -79.0329258294753, |
| "loss": 0.1732, |
| "rewards/chosen": 0.7415898962865902, |
| "rewards/margins": 6.530411646503604, |
| "rewards/rejected": -5.788821750217014, |
| "step": 390 |
| }, |
| { |
| "epoch": 7.111111111111111, |
| "grad_norm": 1.2314908504486084, |
| "kl": 0.0, |
| "learning_rate": 1.1436343403356019e-06, |
| "logits/chosen": -3235399.314285714, |
| "logits/rejected": -6684086.755555555, |
| "logps/chosen": -16.83338623046875, |
| "logps/rejected": -76.9798611111111, |
| "loss": 0.1996, |
| "rewards/chosen": -0.024504613876342774, |
| "rewards/margins": 5.710608911514282, |
| "rewards/rejected": -5.735113525390625, |
| "step": 400 |
| }, |
| { |
| "epoch": 7.111111111111111, |
| "eval_logits/chosen": -5272716.48, |
| "eval_logits/rejected": -6501901.44, |
| "eval_logps/chosen": -22.1139404296875, |
| "eval_logps/rejected": -72.409404296875, |
| "eval_loss": 0.28036418557167053, |
| "eval_rewards/chosen": -0.6851260375976562, |
| "eval_rewards/margins": 4.606752624511719, |
| "eval_rewards/rejected": -5.291878662109375, |
| "eval_runtime": 20.812, |
| "eval_samples_per_second": 4.805, |
| "eval_steps_per_second": 2.402, |
| "kl": 0.0, |
| "step": 400 |
| }, |
| { |
| "epoch": 7.288888888888889, |
| "grad_norm": 11.075384140014648, |
| "kl": 0.0, |
| "learning_rate": 1.0154495360662464e-06, |
| "logits/chosen": -5561997.333333333, |
| "logits/rejected": -4703039.157894737, |
| "logps/chosen": -13.027040027436756, |
| "logps/rejected": -82.51130114103618, |
| "loss": 0.2111, |
| "rewards/chosen": 0.3391632352556501, |
| "rewards/margins": 6.753453010903264, |
| "rewards/rejected": -6.414289775647615, |
| "step": 410 |
| }, |
| { |
| "epoch": 7.466666666666667, |
| "grad_norm": 5.149106502532959, |
| "kl": 0.0, |
| "learning_rate": 8.930309757836517e-07, |
| "logits/chosen": -3194344.6486486485, |
| "logits/rejected": -6930841.302325581, |
| "logps/chosen": -12.075965675147804, |
| "logps/rejected": -80.88395371547965, |
| "loss": 0.1964, |
| "rewards/chosen": 0.4253324560216955, |
| "rewards/margins": 6.625286466741172, |
| "rewards/rejected": -6.199954010719477, |
| "step": 420 |
| }, |
| { |
| "epoch": 7.644444444444445, |
| "grad_norm": 1.6590875387191772, |
| "kl": 0.0, |
| "learning_rate": 7.768541537901325e-07, |
| "logits/chosen": -4468182.4, |
| "logits/rejected": -6386037.942857143, |
| "logps/chosen": -7.90546129014757, |
| "logps/rejected": -78.26188616071428, |
| "loss": 0.1915, |
| "rewards/chosen": 0.8592787848578559, |
| "rewards/margins": 6.713061656649151, |
| "rewards/rejected": -5.853782871791295, |
| "step": 430 |
| }, |
| { |
| "epoch": 7.822222222222222, |
| "grad_norm": 0.5123002529144287, |
| "kl": 11.214415550231934, |
| "learning_rate": 6.673703204254348e-07, |
| "logits/chosen": -1876414.5777777778, |
| "logits/rejected": -7622743.771428571, |
| "logps/chosen": -12.83230251736111, |
| "logps/rejected": -82.74340122767858, |
| "loss": 0.1886, |
| "rewards/chosen": 0.6239681667751736, |
| "rewards/margins": 6.8449258955698165, |
| "rewards/rejected": -6.220957728794643, |
| "step": 440 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.4275095760822296, |
| "kl": 0.0, |
| "learning_rate": 5.650047293344316e-07, |
| "logits/chosen": -5781915.701492538, |
| "logits/rejected": -6046539.698924731, |
| "logps/chosen": -13.672110030900187, |
| "logps/rejected": -80.53140225974462, |
| "loss": 0.1776, |
| "rewards/chosen": 0.4122744318264634, |
| "rewards/margins": 6.3341966848506575, |
| "rewards/rejected": -5.921922253024194, |
| "step": 450 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_logits/chosen": -5282150.4, |
| "eval_logits/rejected": -6475059.2, |
| "eval_logps/chosen": -22.7550830078125, |
| "eval_logps/rejected": -74.3277001953125, |
| "eval_loss": 0.27525943517684937, |
| "eval_rewards/chosen": -0.7492402648925781, |
| "eval_rewards/margins": 4.734468231201172, |
| "eval_rewards/rejected": -5.48370849609375, |
| "eval_runtime": 20.8139, |
| "eval_samples_per_second": 4.804, |
| "eval_steps_per_second": 2.402, |
| "kl": 0.0, |
| "step": 450 |
| }, |
| { |
| "epoch": 8.177777777777777, |
| "grad_norm": 6.894736289978027, |
| "kl": 0.0, |
| "learning_rate": 4.7015498571035877e-07, |
| "logits/chosen": -3783783.5061728396, |
| "logits/rejected": -6069178.734177215, |
| "logps/chosen": -5.430916868610146, |
| "logps/rejected": -82.43604751780063, |
| "loss": 0.1634, |
| "rewards/chosen": 0.9972056636103878, |
| "rewards/margins": 7.210469452920119, |
| "rewards/rejected": -6.213263789309731, |
| "step": 460 |
| }, |
| { |
| "epoch": 8.355555555555556, |
| "grad_norm": 0.09748721122741699, |
| "kl": 0.0, |
| "learning_rate": 3.831895019292897e-07, |
| "logits/chosen": -7059043.7402597405, |
| "logits/rejected": -5150067.277108434, |
| "logps/chosen": -17.455629819399352, |
| "logps/rejected": -83.4837749435241, |
| "loss": 0.1944, |
| "rewards/chosen": 0.07330032447715859, |
| "rewards/margins": 6.58620889234237, |
| "rewards/rejected": -6.512908567865211, |
| "step": 470 |
| }, |
| { |
| "epoch": 8.533333333333333, |
| "grad_norm": 37.684791564941406, |
| "kl": 0.0, |
| "learning_rate": 3.044460665744284e-07, |
| "logits/chosen": -5911329.28, |
| "logits/rejected": -5294802.070588236, |
| "logps/chosen": -13.222509765625, |
| "logps/rejected": -76.97578699448529, |
| "loss": 0.2056, |
| "rewards/chosen": 0.3842613474527995, |
| "rewards/margins": 6.094753649842505, |
| "rewards/rejected": -5.710492302389706, |
| "step": 480 |
| }, |
| { |
| "epoch": 8.71111111111111, |
| "grad_norm": 0.48345357179641724, |
| "kl": 0.0, |
| "learning_rate": 2.3423053240837518e-07, |
| "logits/chosen": -3852471.8139534886, |
| "logits/rejected": -6658902.486486486, |
| "logps/chosen": -9.945970402207486, |
| "logps/rejected": -84.33938846072635, |
| "loss": 0.1847, |
| "rewards/chosen": 0.8006199681481649, |
| "rewards/margins": 7.142369004782456, |
| "rewards/rejected": -6.34174903663429, |
| "step": 490 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "grad_norm": 0.2579200565814972, |
| "kl": 0.0, |
| "learning_rate": 1.7281562838948968e-07, |
| "logits/chosen": -3453992.727272727, |
| "logits/rejected": -7513944.0, |
| "logps/chosen": -10.801272305575283, |
| "logps/rejected": -81.59269205729167, |
| "loss": 0.208, |
| "rewards/chosen": 0.5914750532670454, |
| "rewards/margins": 6.682477623525292, |
| "rewards/rejected": -6.091002570258246, |
| "step": 500 |
| }, |
| { |
| "epoch": 8.88888888888889, |
| "eval_logits/chosen": -5270992.0, |
| "eval_logits/rejected": -6459799.04, |
| "eval_logps/chosen": -22.9960888671875, |
| "eval_logps/rejected": -75.276025390625, |
| "eval_loss": 0.2732886075973511, |
| "eval_rewards/chosen": -0.7733412170410157, |
| "eval_rewards/margins": 4.80520004272461, |
| "eval_rewards/rejected": -5.578541259765625, |
| "eval_runtime": 20.8559, |
| "eval_samples_per_second": 4.795, |
| "eval_steps_per_second": 2.397, |
| "kl": 0.0, |
| "step": 500 |
| }, |
| { |
| "epoch": 9.066666666666666, |
| "grad_norm": 0.3732987344264984, |
| "kl": 0.0, |
| "learning_rate": 1.2043990034669413e-07, |
| "logits/chosen": -4063886.1298701297, |
| "logits/rejected": -6919009.927710843, |
| "logps/chosen": -19.0400485744724, |
| "logps/rejected": -83.49184629141567, |
| "loss": 0.2051, |
| "rewards/chosen": -0.03847099898697494, |
| "rewards/margins": 6.430230467033356, |
| "rewards/rejected": -6.468701466020331, |
| "step": 510 |
| }, |
| { |
| "epoch": 9.244444444444444, |
| "grad_norm": 0.415623277425766, |
| "kl": 0.0, |
| "learning_rate": 7.730678442730539e-08, |
| "logits/chosen": -5067307.52, |
| "logits/rejected": -6418687.247058824, |
| "logps/chosen": -11.464226888020834, |
| "logps/rejected": -86.0582950367647, |
| "loss": 0.1587, |
| "rewards/chosen": 0.5690560913085938, |
| "rewards/margins": 7.230119251924403, |
| "rewards/rejected": -6.661063160615809, |
| "step": 520 |
| }, |
| { |
| "epoch": 9.422222222222222, |
| "grad_norm": 2.298677444458008, |
| "kl": 0.0, |
| "learning_rate": 4.358381691677932e-08, |
| "logits/chosen": -4109652.0, |
| "logits/rejected": -6422994.4, |
| "logps/chosen": -9.813970184326172, |
| "logps/rejected": -85.676953125, |
| "loss": 0.1562, |
| "rewards/chosen": 0.9679355621337891, |
| "rewards/margins": 7.5916393280029295, |
| "rewards/rejected": -6.6237037658691404, |
| "step": 530 |
| }, |
| { |
| "epoch": 9.6, |
| "grad_norm": 0.9799414873123169, |
| "kl": 0.38483619689941406, |
| "learning_rate": 1.9401983499569843e-08, |
| "logits/chosen": -4599832.788732395, |
| "logits/rejected": -5707845.0337078655, |
| "logps/chosen": -11.476158464458626, |
| "logps/rejected": -83.63685042134831, |
| "loss": 0.1776, |
| "rewards/chosen": 0.3927964492582939, |
| "rewards/margins": 6.871875106429994, |
| "rewards/rejected": -6.4790786571717, |
| "step": 540 |
| }, |
| { |
| "epoch": 9.777777777777779, |
| "grad_norm": 0.49998390674591064, |
| "kl": 0.0, |
| "learning_rate": 4.855210488670381e-09, |
| "logits/chosen": -4236334.577777778, |
| "logits/rejected": -4783881.142857143, |
| "logps/chosen": -5.939041476779514, |
| "logps/rejected": -75.12473493303571, |
| "loss": 0.2075, |
| "rewards/chosen": 1.0517437405056425, |
| "rewards/margins": 6.599594867040241, |
| "rewards/rejected": -5.547851126534598, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.777777777777779, |
| "eval_logits/chosen": -5283820.8, |
| "eval_logits/rejected": -6485849.6, |
| "eval_logps/chosen": -23.082109375, |
| "eval_logps/rejected": -74.8675, |
| "eval_loss": 0.27660998702049255, |
| "eval_rewards/chosen": -0.7819430541992187, |
| "eval_rewards/margins": 4.755745544433594, |
| "eval_rewards/rejected": -5.537688598632813, |
| "eval_runtime": 20.8339, |
| "eval_samples_per_second": 4.8, |
| "eval_steps_per_second": 2.4, |
| "kl": 0.0, |
| "step": 550 |
| }, |
| { |
| "epoch": 9.955555555555556, |
| "grad_norm": 0.7372793555259705, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "logits/chosen": -3234109.7721518986, |
| "logits/rejected": -6989616.197530864, |
| "logps/chosen": -16.822625655162184, |
| "logps/rejected": -82.41786024305556, |
| "loss": 0.2168, |
| "rewards/chosen": -0.01685871655427957, |
| "rewards/margins": 6.099000326896723, |
| "rewards/rejected": -6.115859043451003, |
| "step": 560 |
| }, |
| { |
| "epoch": 9.955555555555556, |
| "step": 560, |
| "total_flos": 5.004660760510464e+16, |
| "train_loss": 0.2794176772236824, |
| "train_runtime": 3263.9168, |
| "train_samples_per_second": 2.757, |
| "train_steps_per_second": 0.172 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 560, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 5.004660760510464e+16, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|