| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.6666666666666665, |
| "eval_steps": 500, |
| "global_step": 3000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0044444444444444444, |
| "grad_norm": 3.6353344917297363, |
| "learning_rate": 1e-05, |
| "logits/chosen": -0.4628738462924957, |
| "logits/rejected": -0.46038827300071716, |
| "logps/chosen": -305.24371337890625, |
| "logps/rejected": -217.2339324951172, |
| "loss": 0.69, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.006762671284377575, |
| "rewards/margins": 0.005093236453831196, |
| "rewards/rejected": 0.0016694354126229882, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008888888888888889, |
| "grad_norm": 3.840994119644165, |
| "learning_rate": 9.999945685076187e-06, |
| "logits/chosen": -0.4660520553588867, |
| "logits/rejected": -0.4597313404083252, |
| "logps/chosen": -295.14178466796875, |
| "logps/rejected": -215.0008544921875, |
| "loss": 0.6585, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 0.07166890054941177, |
| "rewards/margins": 0.057903312146663666, |
| "rewards/rejected": 0.013765583746135235, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013333333333333334, |
| "grad_norm": 3.3536572456359863, |
| "learning_rate": 9.99978274148479e-06, |
| "logits/chosen": -0.46407952904701233, |
| "logits/rejected": -0.46835923194885254, |
| "logps/chosen": -307.1814880371094, |
| "logps/rejected": -220.930908203125, |
| "loss": 0.6155, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.18775026500225067, |
| "rewards/margins": 0.13849034905433655, |
| "rewards/rejected": 0.049259938299655914, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.017777777777777778, |
| "grad_norm": 3.603736400604248, |
| "learning_rate": 9.999511172765917e-06, |
| "logits/chosen": -0.4126955568790436, |
| "logits/rejected": -0.4344128668308258, |
| "logps/chosen": -295.6951599121094, |
| "logps/rejected": -219.573974609375, |
| "loss": 0.572, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.3320372700691223, |
| "rewards/margins": 0.22264714539051056, |
| "rewards/rejected": 0.10939009487628937, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 3.7065272331237793, |
| "learning_rate": 9.999130984819662e-06, |
| "logits/chosen": -0.42767876386642456, |
| "logits/rejected": -0.4458894729614258, |
| "logps/chosen": -331.75592041015625, |
| "logps/rejected": -231.910400390625, |
| "loss": 0.5323, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.5343211889266968, |
| "rewards/margins": 0.32672011852264404, |
| "rewards/rejected": 0.20760111510753632, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 3.656710624694824, |
| "learning_rate": 9.998642185905977e-06, |
| "logits/chosen": -0.44063276052474976, |
| "logits/rejected": -0.4492092728614807, |
| "logps/chosen": -311.40277099609375, |
| "logps/rejected": -233.438720703125, |
| "loss": 0.5154, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.6043025851249695, |
| "rewards/margins": 0.3682531714439392, |
| "rewards/rejected": 0.23604938387870789, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03111111111111111, |
| "grad_norm": 3.5971853733062744, |
| "learning_rate": 9.998044786644492e-06, |
| "logits/chosen": -0.39475446939468384, |
| "logits/rejected": -0.4055609703063965, |
| "logps/chosen": -298.6465759277344, |
| "logps/rejected": -219.363525390625, |
| "loss": 0.4452, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.8082733154296875, |
| "rewards/margins": 0.5571426153182983, |
| "rewards/rejected": 0.25113070011138916, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.035555555555555556, |
| "grad_norm": 3.67197322845459, |
| "learning_rate": 9.997338800014284e-06, |
| "logits/chosen": -0.41250643134117126, |
| "logits/rejected": -0.4259340167045593, |
| "logps/chosen": -293.3608703613281, |
| "logps/rejected": -224.9442901611328, |
| "loss": 0.4586, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 0.867678165435791, |
| "rewards/margins": 0.5272501707077026, |
| "rewards/rejected": 0.34042787551879883, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.049458026885986, |
| "learning_rate": 9.9965242413536e-06, |
| "logits/chosen": -0.41178879141807556, |
| "logits/rejected": -0.4304323196411133, |
| "logps/chosen": -306.1034851074219, |
| "logps/rejected": -228.5247802734375, |
| "loss": 0.3777, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 1.235072374343872, |
| "rewards/margins": 0.7781749367713928, |
| "rewards/rejected": 0.4568973183631897, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 3.9216673374176025, |
| "learning_rate": 9.995601128359516e-06, |
| "logits/chosen": -0.40246009826660156, |
| "logits/rejected": -0.3950818181037903, |
| "logps/chosen": -303.0498352050781, |
| "logps/rejected": -226.4988250732422, |
| "loss": 0.3999, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": 1.3293551206588745, |
| "rewards/margins": 0.8089650869369507, |
| "rewards/rejected": 0.5203902721405029, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04888888888888889, |
| "grad_norm": 4.434176921844482, |
| "learning_rate": 9.994569481087552e-06, |
| "logits/chosen": -0.39378249645233154, |
| "logits/rejected": -0.40684300661087036, |
| "logps/chosen": -329.46173095703125, |
| "logps/rejected": -223.7794952392578, |
| "loss": 0.3168, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 1.6077896356582642, |
| "rewards/margins": 1.0661401748657227, |
| "rewards/rejected": 0.5416494607925415, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 4.062209129333496, |
| "learning_rate": 9.993429321951251e-06, |
| "logits/chosen": -0.34955719113349915, |
| "logits/rejected": -0.3819810748100281, |
| "logps/chosen": -291.4505615234375, |
| "logps/rejected": -212.7031707763672, |
| "loss": 0.2348, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.9593639373779297, |
| "rewards/margins": 1.3969981670379639, |
| "rewards/rejected": 0.5623658299446106, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.057777777777777775, |
| "grad_norm": 4.022445201873779, |
| "learning_rate": 9.992180675721671e-06, |
| "logits/chosen": -0.3607024550437927, |
| "logits/rejected": -0.3758237659931183, |
| "logps/chosen": -325.44622802734375, |
| "logps/rejected": -226.538818359375, |
| "loss": 0.1438, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 2.2623605728149414, |
| "rewards/margins": 1.6984504461288452, |
| "rewards/rejected": 0.5639100670814514, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.06222222222222222, |
| "grad_norm": 4.516530513763428, |
| "learning_rate": 9.990823569526868e-06, |
| "logits/chosen": -0.3758849501609802, |
| "logits/rejected": -0.401409387588501, |
| "logps/chosen": -293.59283447265625, |
| "logps/rejected": -212.1451873779297, |
| "loss": 0.2153, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.2533986568450928, |
| "rewards/margins": 1.599491000175476, |
| "rewards/rejected": 0.6539075374603271, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 4.2673563957214355, |
| "learning_rate": 9.989358032851283e-06, |
| "logits/chosen": -0.38496989011764526, |
| "logits/rejected": -0.4185038208961487, |
| "logps/chosen": -331.06707763671875, |
| "logps/rejected": -238.4701690673828, |
| "loss": 0.2667, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.29125714302063, |
| "rewards/margins": 1.5516706705093384, |
| "rewards/rejected": 0.739586353302002, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 4.720420837402344, |
| "learning_rate": 9.987784097535126e-06, |
| "logits/chosen": -0.36235010623931885, |
| "logits/rejected": -0.3792596757411957, |
| "logps/chosen": -303.70196533203125, |
| "logps/rejected": -226.2982940673828, |
| "loss": 0.1933, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 2.5473880767822266, |
| "rewards/margins": 1.8545589447021484, |
| "rewards/rejected": 0.6928290128707886, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07555555555555556, |
| "grad_norm": 7.281383037567139, |
| "learning_rate": 9.986101797773667e-06, |
| "logits/chosen": -0.380900114774704, |
| "logits/rejected": -0.3917911946773529, |
| "logps/chosen": -295.6128234863281, |
| "logps/rejected": -229.4749298095703, |
| "loss": 0.213, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.3351025581359863, |
| "rewards/margins": 1.7777379751205444, |
| "rewards/rejected": 0.5573645830154419, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 4.908195972442627, |
| "learning_rate": 9.984311170116497e-06, |
| "logits/chosen": -0.37983238697052, |
| "logits/rejected": -0.3918471932411194, |
| "logps/chosen": -291.7480163574219, |
| "logps/rejected": -220.09652709960938, |
| "loss": 0.1737, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.4933111667633057, |
| "rewards/margins": 1.936655044555664, |
| "rewards/rejected": 0.5566561222076416, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08444444444444445, |
| "grad_norm": 5.108543395996094, |
| "learning_rate": 9.98241225346674e-06, |
| "logits/chosen": -0.34952667355537415, |
| "logits/rejected": -0.3850114643573761, |
| "logps/chosen": -302.5187683105469, |
| "logps/rejected": -222.33834838867188, |
| "loss": 0.1069, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.5169692039489746, |
| "rewards/margins": 2.1484999656677246, |
| "rewards/rejected": 0.36846891045570374, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 4.840832233428955, |
| "learning_rate": 9.9804050890802e-06, |
| "logits/chosen": -0.33506280183792114, |
| "logits/rejected": -0.3472011089324951, |
| "logps/chosen": -269.5045471191406, |
| "logps/rejected": -205.8480987548828, |
| "loss": 0.1074, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.4071407318115234, |
| "rewards/margins": 2.1650052070617676, |
| "rewards/rejected": 0.24213531613349915, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09333333333333334, |
| "grad_norm": 5.977898120880127, |
| "learning_rate": 9.978289720564471e-06, |
| "logits/chosen": -0.33771952986717224, |
| "logits/rejected": -0.3690803050994873, |
| "logps/chosen": -308.5394287109375, |
| "logps/rejected": -226.24813842773438, |
| "loss": 0.2107, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": 2.450155735015869, |
| "rewards/margins": 2.019880771636963, |
| "rewards/rejected": 0.4302748143672943, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.09777777777777778, |
| "grad_norm": 6.031980514526367, |
| "learning_rate": 9.976066193877982e-06, |
| "logits/chosen": -0.347932904958725, |
| "logits/rejected": -0.3845617175102234, |
| "logps/chosen": -293.0547790527344, |
| "logps/rejected": -220.5181427001953, |
| "loss": 0.1324, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 2.321216106414795, |
| "rewards/margins": 2.2665910720825195, |
| "rewards/rejected": 0.054625045508146286, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.10222222222222223, |
| "grad_norm": 4.3759660720825195, |
| "learning_rate": 9.97373455732901e-06, |
| "logits/chosen": -0.34049180150032043, |
| "logits/rejected": -0.35589173436164856, |
| "logps/chosen": -294.77117919921875, |
| "logps/rejected": -228.1370086669922, |
| "loss": 0.0647, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.3987247943878174, |
| "rewards/margins": 2.2897610664367676, |
| "rewards/rejected": 0.1089634895324707, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 4.7751336097717285, |
| "learning_rate": 9.971294861574617e-06, |
| "logits/chosen": -0.3569382429122925, |
| "logits/rejected": -0.35876479744911194, |
| "logps/chosen": -285.916748046875, |
| "logps/rejected": -219.81753540039062, |
| "loss": 0.0106, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.5361573696136475, |
| "rewards/margins": 2.6316070556640625, |
| "rewards/rejected": -0.09544976055622101, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 6.836686134338379, |
| "learning_rate": 9.968747159619556e-06, |
| "logits/chosen": -0.3644478917121887, |
| "logits/rejected": -0.3773222863674164, |
| "logps/chosen": -301.05084228515625, |
| "logps/rejected": -231.2653045654297, |
| "loss": 0.1055, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": 2.7265372276306152, |
| "rewards/margins": 2.74798846244812, |
| "rewards/rejected": -0.021450763568282127, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.11555555555555555, |
| "grad_norm": 6.012020587921143, |
| "learning_rate": 9.966091506815128e-06, |
| "logits/chosen": -0.34487825632095337, |
| "logits/rejected": -0.3683899939060211, |
| "logps/chosen": -296.5931701660156, |
| "logps/rejected": -223.12753295898438, |
| "loss": 0.0823, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": 2.7003228664398193, |
| "rewards/margins": 2.8483641147613525, |
| "rewards/rejected": -0.14804117381572723, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 5.459484100341797, |
| "learning_rate": 9.963327960857962e-06, |
| "logits/chosen": -0.3142702579498291, |
| "logits/rejected": -0.36442944407463074, |
| "logps/chosen": -310.71368408203125, |
| "logps/rejected": -211.5228729248047, |
| "loss": -0.0914, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 3.0668811798095703, |
| "rewards/margins": 3.4564356803894043, |
| "rewards/rejected": -0.3895547688007355, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.12444444444444444, |
| "grad_norm": 4.6439080238342285, |
| "learning_rate": 9.960456581788771e-06, |
| "logits/chosen": -0.3213174343109131, |
| "logits/rejected": -0.35702863335609436, |
| "logps/chosen": -295.9752502441406, |
| "logps/rejected": -218.5254364013672, |
| "loss": -0.0509, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.7388923168182373, |
| "rewards/margins": 3.2099480628967285, |
| "rewards/rejected": -0.47105544805526733, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1288888888888889, |
| "grad_norm": 7.5686140060424805, |
| "learning_rate": 9.957477431991053e-06, |
| "logits/chosen": -0.3489062190055847, |
| "logits/rejected": -0.38331982493400574, |
| "logps/chosen": -301.20574951171875, |
| "logps/rejected": -221.79617309570312, |
| "loss": 0.0026, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.3142552375793457, |
| "rewards/margins": 3.0163302421569824, |
| "rewards/rejected": -0.7020750045776367, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 7.296176910400391, |
| "learning_rate": 9.954390576189726e-06, |
| "logits/chosen": -0.32641178369522095, |
| "logits/rejected": -0.3621976673603058, |
| "logps/chosen": -312.5970153808594, |
| "logps/rejected": -246.51974487304688, |
| "loss": 0.1054, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.6623806953430176, |
| "rewards/margins": 2.9366531372070312, |
| "rewards/rejected": -0.27427244186401367, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.13777777777777778, |
| "grad_norm": 6.089158535003662, |
| "learning_rate": 9.95119608144972e-06, |
| "logits/chosen": -0.34094589948654175, |
| "logits/rejected": -0.36057132482528687, |
| "logps/chosen": -298.9684143066406, |
| "logps/rejected": -232.59097290039062, |
| "loss": 0.0235, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 2.0118796825408936, |
| "rewards/margins": 2.952807664871216, |
| "rewards/rejected": -0.9409275054931641, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 5.5049662590026855, |
| "learning_rate": 9.947894017174535e-06, |
| "logits/chosen": -0.30161410570144653, |
| "logits/rejected": -0.3480113446712494, |
| "logps/chosen": -306.0929870605469, |
| "logps/rejected": -226.927734375, |
| "loss": -0.0148, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.6642236709594727, |
| "rewards/margins": 3.523815631866455, |
| "rewards/rejected": -0.8595919609069824, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.14666666666666667, |
| "grad_norm": 5.161729335784912, |
| "learning_rate": 9.944484455104716e-06, |
| "logits/chosen": -0.3171108067035675, |
| "logits/rejected": -0.35261866450309753, |
| "logps/chosen": -290.88250732421875, |
| "logps/rejected": -234.3745880126953, |
| "loss": 0.085, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 1.7442996501922607, |
| "rewards/margins": 2.7870707511901855, |
| "rewards/rejected": -1.0427708625793457, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1511111111111111, |
| "grad_norm": 5.5866618156433105, |
| "learning_rate": 9.940967469316307e-06, |
| "logits/chosen": -0.3179735541343689, |
| "logits/rejected": -0.3568040728569031, |
| "logps/chosen": -328.8294372558594, |
| "logps/rejected": -225.0006561279297, |
| "loss": -0.2403, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 3.2372944355010986, |
| "rewards/margins": 4.343171119689941, |
| "rewards/rejected": -1.105877161026001, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 6.542994499206543, |
| "learning_rate": 9.937343136219234e-06, |
| "logits/chosen": -0.2941819429397583, |
| "logits/rejected": -0.3379240930080414, |
| "logps/chosen": -301.7574462890625, |
| "logps/rejected": -217.0867919921875, |
| "loss": -0.1938, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 2.6589813232421875, |
| "rewards/margins": 4.12928581237793, |
| "rewards/rejected": -1.4703044891357422, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 8.16442584991455, |
| "learning_rate": 9.933611534555645e-06, |
| "logits/chosen": -0.3271011710166931, |
| "logits/rejected": -0.3445083200931549, |
| "logps/chosen": -319.7223205566406, |
| "logps/rejected": -247.19967651367188, |
| "loss": -0.0822, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 2.863480806350708, |
| "rewards/margins": 3.848937511444092, |
| "rewards/rejected": -0.9854568243026733, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.16444444444444445, |
| "grad_norm": 5.072612285614014, |
| "learning_rate": 9.929772745398207e-06, |
| "logits/chosen": -0.3311443328857422, |
| "logits/rejected": -0.34855595231056213, |
| "logps/chosen": -304.24371337890625, |
| "logps/rejected": -228.12423706054688, |
| "loss": -0.0953, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.729543685913086, |
| "rewards/margins": 3.911928653717041, |
| "rewards/rejected": -1.1823843717575073, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1688888888888889, |
| "grad_norm": 5.6365966796875, |
| "learning_rate": 9.925826852148332e-06, |
| "logits/chosen": -0.37482309341430664, |
| "logits/rejected": -0.37685567140579224, |
| "logps/chosen": -323.057373046875, |
| "logps/rejected": -238.3128204345703, |
| "loss": -0.1351, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.4461398124694824, |
| "rewards/margins": 4.063778400421143, |
| "rewards/rejected": -1.6176389455795288, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.17333333333333334, |
| "grad_norm": 7.323070049285889, |
| "learning_rate": 9.921773940534382e-06, |
| "logits/chosen": -0.30995437502861023, |
| "logits/rejected": -0.3514579236507416, |
| "logps/chosen": -283.3837890625, |
| "logps/rejected": -228.30905151367188, |
| "loss": 0.0783, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.8092823028564453, |
| "rewards/margins": 3.168578624725342, |
| "rewards/rejected": -1.3592965602874756, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 5.961920738220215, |
| "learning_rate": 9.917614098609786e-06, |
| "logits/chosen": -0.3327783942222595, |
| "logits/rejected": -0.36006277799606323, |
| "logps/chosen": -307.5805358886719, |
| "logps/rejected": -228.3577880859375, |
| "loss": -0.0971, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 2.8803937435150146, |
| "rewards/margins": 3.999189853668213, |
| "rewards/rejected": -1.1187958717346191, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.18222222222222223, |
| "grad_norm": 8.006556510925293, |
| "learning_rate": 9.913347416751148e-06, |
| "logits/chosen": -0.290499210357666, |
| "logits/rejected": -0.32564371824264526, |
| "logps/chosen": -310.0920715332031, |
| "logps/rejected": -222.1564178466797, |
| "loss": -0.1483, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 3.187587022781372, |
| "rewards/margins": 4.311644554138184, |
| "rewards/rejected": -1.124057650566101, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.18666666666666668, |
| "grad_norm": 5.825204849243164, |
| "learning_rate": 9.908973987656263e-06, |
| "logits/chosen": -0.3070078492164612, |
| "logits/rejected": -0.3182796239852905, |
| "logps/chosen": -289.46490478515625, |
| "logps/rejected": -223.34725952148438, |
| "loss": -0.2937, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 3.196002244949341, |
| "rewards/margins": 4.649127006530762, |
| "rewards/rejected": -1.453124761581421, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.19111111111111112, |
| "grad_norm": 8.087427139282227, |
| "learning_rate": 9.904493906342124e-06, |
| "logits/chosen": -0.284060001373291, |
| "logits/rejected": -0.3289189636707306, |
| "logps/chosen": -292.52984619140625, |
| "logps/rejected": -227.1925811767578, |
| "loss": -0.0803, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.883082628250122, |
| "rewards/margins": 4.214940071105957, |
| "rewards/rejected": -1.3318575620651245, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.19555555555555557, |
| "grad_norm": 6.931927680969238, |
| "learning_rate": 9.899907270142835e-06, |
| "logits/chosen": -0.29949700832366943, |
| "logits/rejected": -0.3155062794685364, |
| "logps/chosen": -305.6365661621094, |
| "logps/rejected": -233.4442901611328, |
| "loss": -0.0835, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.9822609424591064, |
| "rewards/margins": 4.396633148193359, |
| "rewards/rejected": -1.4143723249435425, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 9.130791664123535, |
| "learning_rate": 9.895214178707516e-06, |
| "logits/chosen": -0.31096282601356506, |
| "logits/rejected": -0.3551832437515259, |
| "logps/chosen": -313.84320068359375, |
| "logps/rejected": -236.7030029296875, |
| "loss": -0.0991, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.626882553100586, |
| "rewards/margins": 4.163486003875732, |
| "rewards/rejected": -1.536603331565857, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.20444444444444446, |
| "grad_norm": 5.590844631195068, |
| "learning_rate": 9.890414733998131e-06, |
| "logits/chosen": -0.2635526657104492, |
| "logits/rejected": -0.29329806566238403, |
| "logps/chosen": -297.04638671875, |
| "logps/rejected": -236.12271118164062, |
| "loss": 0.0262, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.713446855545044, |
| "rewards/margins": 3.905104875564575, |
| "rewards/rejected": -1.1916577816009521, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2088888888888889, |
| "grad_norm": 4.7747979164123535, |
| "learning_rate": 9.885509040287267e-06, |
| "logits/chosen": -0.30965957045555115, |
| "logits/rejected": -0.31621426343917847, |
| "logps/chosen": -288.59014892578125, |
| "logps/rejected": -220.62765502929688, |
| "loss": -0.342, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 3.3641788959503174, |
| "rewards/margins": 5.042544841766357, |
| "rewards/rejected": -1.6783654689788818, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 6.6951680183410645, |
| "learning_rate": 9.880497204155879e-06, |
| "logits/chosen": -0.27586597204208374, |
| "logits/rejected": -0.3355752229690552, |
| "logps/chosen": -316.9598388671875, |
| "logps/rejected": -244.87149047851562, |
| "loss": -0.0482, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 2.7237601280212402, |
| "rewards/margins": 3.941441774368286, |
| "rewards/rejected": -1.217681646347046, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.21777777777777776, |
| "grad_norm": 5.907822608947754, |
| "learning_rate": 9.875379334490962e-06, |
| "logits/chosen": -0.3292551338672638, |
| "logits/rejected": -0.31635525822639465, |
| "logps/chosen": -291.8619689941406, |
| "logps/rejected": -231.01016235351562, |
| "loss": -0.1414, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.5336103439331055, |
| "rewards/margins": 4.392641067504883, |
| "rewards/rejected": -1.8590309619903564, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 7.046641826629639, |
| "learning_rate": 9.870155542483199e-06, |
| "logits/chosen": -0.3067111372947693, |
| "logits/rejected": -0.35157865285873413, |
| "logps/chosen": -319.73187255859375, |
| "logps/rejected": -236.04483032226562, |
| "loss": -0.3243, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 3.8645987510681152, |
| "rewards/margins": 5.3452653884887695, |
| "rewards/rejected": -1.4806665182113647, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.22666666666666666, |
| "grad_norm": 9.379409790039062, |
| "learning_rate": 9.864825941624538e-06, |
| "logits/chosen": -0.267128050327301, |
| "logits/rejected": -0.2918349802494049, |
| "logps/chosen": -313.8056335449219, |
| "logps/rejected": -247.0101776123047, |
| "loss": -0.1449, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 3.3342041969299316, |
| "rewards/margins": 4.6097564697265625, |
| "rewards/rejected": -1.27555251121521, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2311111111111111, |
| "grad_norm": 7.974717617034912, |
| "learning_rate": 9.85939064770572e-06, |
| "logits/chosen": -0.3181043267250061, |
| "logits/rejected": -0.3094359338283539, |
| "logps/chosen": -317.86505126953125, |
| "logps/rejected": -245.4908905029297, |
| "loss": -0.3156, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 3.143451452255249, |
| "rewards/margins": 5.211213111877441, |
| "rewards/rejected": -2.0677614212036133, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.23555555555555555, |
| "grad_norm": 6.511713027954102, |
| "learning_rate": 9.853849778813777e-06, |
| "logits/chosen": -0.29388368129730225, |
| "logits/rejected": -0.3029894530773163, |
| "logps/chosen": -297.4751892089844, |
| "logps/rejected": -231.9884796142578, |
| "loss": -0.167, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 3.295167922973633, |
| "rewards/margins": 5.074382305145264, |
| "rewards/rejected": -1.77921462059021, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.8637776374816895, |
| "learning_rate": 9.848203455329459e-06, |
| "logits/chosen": -0.31308668851852417, |
| "logits/rejected": -0.3360288441181183, |
| "logps/chosen": -296.82318115234375, |
| "logps/rejected": -236.3435821533203, |
| "loss": -0.2143, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.4768126010894775, |
| "rewards/margins": 4.899205684661865, |
| "rewards/rejected": -2.422393321990967, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 10.344189643859863, |
| "learning_rate": 9.842451799924616e-06, |
| "logits/chosen": -0.2888021171092987, |
| "logits/rejected": -0.3189722001552582, |
| "logps/chosen": -321.9690246582031, |
| "logps/rejected": -239.86520385742188, |
| "loss": -0.327, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.709468364715576, |
| "rewards/margins": 5.345309257507324, |
| "rewards/rejected": -2.635840892791748, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.24888888888888888, |
| "grad_norm": 11.444976806640625, |
| "learning_rate": 9.836594937559541e-06, |
| "logits/chosen": -0.28263232111930847, |
| "logits/rejected": -0.2964705526828766, |
| "logps/chosen": -296.26995849609375, |
| "logps/rejected": -232.54934692382812, |
| "loss": 0.0017, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.52939772605896, |
| "rewards/margins": 4.631046295166016, |
| "rewards/rejected": -2.1016488075256348, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.25333333333333335, |
| "grad_norm": 5.921905040740967, |
| "learning_rate": 9.830632995480243e-06, |
| "logits/chosen": -0.26743844151496887, |
| "logits/rejected": -0.27696385979652405, |
| "logps/chosen": -298.95477294921875, |
| "logps/rejected": -236.3730926513672, |
| "loss": -0.2483, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.9082038402557373, |
| "rewards/margins": 5.20479154586792, |
| "rewards/rejected": -2.2965879440307617, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.2577777777777778, |
| "grad_norm": 6.96235990524292, |
| "learning_rate": 9.824566103215697e-06, |
| "logits/chosen": -0.2472468614578247, |
| "logits/rejected": -0.2931605279445648, |
| "logps/chosen": -298.02581787109375, |
| "logps/rejected": -231.60879516601562, |
| "loss": -0.2804, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.8193132877349854, |
| "rewards/margins": 5.249671459197998, |
| "rewards/rejected": -2.430358648300171, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.26222222222222225, |
| "grad_norm": 8.460125923156738, |
| "learning_rate": 9.818394392575018e-06, |
| "logits/chosen": -0.30542343854904175, |
| "logits/rejected": -0.32763975858688354, |
| "logps/chosen": -285.7476501464844, |
| "logps/rejected": -243.5345458984375, |
| "loss": -0.1747, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.6046016216278076, |
| "rewards/margins": 4.996693134307861, |
| "rewards/rejected": -2.3920915126800537, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 7.488274097442627, |
| "learning_rate": 9.812117997644606e-06, |
| "logits/chosen": -0.2731490731239319, |
| "logits/rejected": -0.30121108889579773, |
| "logps/chosen": -284.916259765625, |
| "logps/rejected": -233.185546875, |
| "loss": -0.1565, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.169166088104248, |
| "rewards/margins": 5.066960334777832, |
| "rewards/rejected": -2.897794246673584, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.27111111111111114, |
| "grad_norm": 8.804214477539062, |
| "learning_rate": 9.805737054785223e-06, |
| "logits/chosen": -0.2953334450721741, |
| "logits/rejected": -0.327360063791275, |
| "logps/chosen": -300.7308349609375, |
| "logps/rejected": -236.43685913085938, |
| "loss": -0.0923, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.0204293727874756, |
| "rewards/margins": 4.94085168838501, |
| "rewards/rejected": -2.920422315597534, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.27555555555555555, |
| "grad_norm": 8.035072326660156, |
| "learning_rate": 9.79925170262904e-06, |
| "logits/chosen": -0.26204347610473633, |
| "logits/rejected": -0.31125301122665405, |
| "logps/chosen": -280.49102783203125, |
| "logps/rejected": -226.66110229492188, |
| "loss": -0.2221, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 1.1722501516342163, |
| "rewards/margins": 4.767660140991211, |
| "rewards/rejected": -3.595409870147705, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.416834831237793, |
| "learning_rate": 9.792662082076618e-06, |
| "logits/chosen": -0.2821267247200012, |
| "logits/rejected": -0.29524296522140503, |
| "logps/chosen": -313.2020568847656, |
| "logps/rejected": -236.20578002929688, |
| "loss": -0.3875, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.725848913192749, |
| "rewards/margins": 5.90293025970459, |
| "rewards/rejected": -3.17708158493042, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 9.474376678466797, |
| "learning_rate": 9.785968336293859e-06, |
| "logits/chosen": -0.2762632966041565, |
| "logits/rejected": -0.34091368317604065, |
| "logps/chosen": -315.105224609375, |
| "logps/rejected": -250.1154327392578, |
| "loss": -0.2361, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.9460265636444092, |
| "rewards/margins": 5.044549465179443, |
| "rewards/rejected": -3.0985231399536133, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 7.876622200012207, |
| "learning_rate": 9.779170610708872e-06, |
| "logits/chosen": -0.26600781083106995, |
| "logits/rejected": -0.2999460697174072, |
| "logps/chosen": -315.525146484375, |
| "logps/rejected": -239.6782989501953, |
| "loss": -0.3024, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 2.9207816123962402, |
| "rewards/margins": 6.140283107757568, |
| "rewards/rejected": -3.219501495361328, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.29333333333333333, |
| "grad_norm": 9.389948844909668, |
| "learning_rate": 9.772269053008841e-06, |
| "logits/chosen": -0.2716449201107025, |
| "logits/rejected": -0.31395813822746277, |
| "logps/chosen": -293.0248107910156, |
| "logps/rejected": -221.9087371826172, |
| "loss": -0.1898, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.0722451210021973, |
| "rewards/margins": 5.335482597351074, |
| "rewards/rejected": -3.263237714767456, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.29777777777777775, |
| "grad_norm": 8.571460723876953, |
| "learning_rate": 9.765263813136796e-06, |
| "logits/chosen": -0.27379176020622253, |
| "logits/rejected": -0.31927746534347534, |
| "logps/chosen": -306.12799072265625, |
| "logps/rejected": -229.9273681640625, |
| "loss": -0.1855, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 1.6866050958633423, |
| "rewards/margins": 4.964447975158691, |
| "rewards/rejected": -3.2778429985046387, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3022222222222222, |
| "grad_norm": 7.001428127288818, |
| "learning_rate": 9.758155043288367e-06, |
| "logits/chosen": -0.28565549850463867, |
| "logits/rejected": -0.3229166865348816, |
| "logps/chosen": -297.42449951171875, |
| "logps/rejected": -250.22286987304688, |
| "loss": -0.2147, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.1362271308898926, |
| "rewards/margins": 5.681948661804199, |
| "rewards/rejected": -3.5457210540771484, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.30666666666666664, |
| "grad_norm": 8.346474647521973, |
| "learning_rate": 9.750942897908468e-06, |
| "logits/chosen": -0.24829097092151642, |
| "logits/rejected": -0.2842785120010376, |
| "logps/chosen": -293.74859619140625, |
| "logps/rejected": -236.0409698486328, |
| "loss": -0.4852, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.535234212875366, |
| "rewards/margins": 6.377307891845703, |
| "rewards/rejected": -3.842073917388916, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 6.549100875854492, |
| "learning_rate": 9.743627533687953e-06, |
| "logits/chosen": -0.2822897136211395, |
| "logits/rejected": -0.3249056041240692, |
| "logps/chosen": -297.6363830566406, |
| "logps/rejected": -229.268798828125, |
| "loss": -0.4093, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.632903814315796, |
| "rewards/margins": 6.515559196472168, |
| "rewards/rejected": -3.882655620574951, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.31555555555555553, |
| "grad_norm": 6.955848217010498, |
| "learning_rate": 9.736209109560201e-06, |
| "logits/chosen": -0.2583480179309845, |
| "logits/rejected": -0.31130915880203247, |
| "logps/chosen": -286.8586730957031, |
| "logps/rejected": -228.97238159179688, |
| "loss": -0.3784, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 2.1645522117614746, |
| "rewards/margins": 5.964513301849365, |
| "rewards/rejected": -3.7999610900878906, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.709640502929688, |
| "learning_rate": 9.728687786697667e-06, |
| "logits/chosen": -0.2713499069213867, |
| "logits/rejected": -0.3231387734413147, |
| "logps/chosen": -310.96929931640625, |
| "logps/rejected": -238.51025390625, |
| "loss": -0.3302, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.8049497604370117, |
| "rewards/margins": 6.414994239807129, |
| "rewards/rejected": -3.6100432872772217, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3244444444444444, |
| "grad_norm": 6.710853099822998, |
| "learning_rate": 9.721063728508384e-06, |
| "logits/chosen": -0.28875869512557983, |
| "logits/rejected": -0.32300078868865967, |
| "logps/chosen": -297.21221923828125, |
| "logps/rejected": -246.2225799560547, |
| "loss": -0.3494, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.3010525703430176, |
| "rewards/margins": 6.263821601867676, |
| "rewards/rejected": -3.9627685546875, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3288888888888889, |
| "grad_norm": 9.562369346618652, |
| "learning_rate": 9.713337100632407e-06, |
| "logits/chosen": -0.23941664397716522, |
| "logits/rejected": -0.2882528305053711, |
| "logps/chosen": -297.01116943359375, |
| "logps/rejected": -246.925048828125, |
| "loss": -0.2107, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 1.6213099956512451, |
| "rewards/margins": 5.55633020401001, |
| "rewards/rejected": -3.935020923614502, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 8.356274604797363, |
| "learning_rate": 9.705508070938219e-06, |
| "logits/chosen": -0.26807016134262085, |
| "logits/rejected": -0.29893961548805237, |
| "logps/chosen": -310.1183776855469, |
| "logps/rejected": -234.883544921875, |
| "loss": -0.6507, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": 3.1703178882598877, |
| "rewards/margins": 7.669167995452881, |
| "rewards/rejected": -4.498850345611572, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.3377777777777778, |
| "grad_norm": 7.058998107910156, |
| "learning_rate": 9.697576809519079e-06, |
| "logits/chosen": -0.2949567139148712, |
| "logits/rejected": -0.3223188519477844, |
| "logps/chosen": -311.98773193359375, |
| "logps/rejected": -245.71194458007812, |
| "loss": -0.3221, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 1.6648155450820923, |
| "rewards/margins": 6.265153884887695, |
| "rewards/rejected": -4.600337982177734, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3422222222222222, |
| "grad_norm": 8.904199600219727, |
| "learning_rate": 9.689543488689332e-06, |
| "logits/chosen": -0.25813308358192444, |
| "logits/rejected": -0.29112708568573, |
| "logps/chosen": -301.86834716796875, |
| "logps/rejected": -247.51974487304688, |
| "loss": -0.3268, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 0.9261104464530945, |
| "rewards/margins": 5.75935173034668, |
| "rewards/rejected": -4.8332414627075195, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.3466666666666667, |
| "grad_norm": 6.82271671295166, |
| "learning_rate": 9.68140828298066e-06, |
| "logits/chosen": -0.27823004126548767, |
| "logits/rejected": -0.30908042192459106, |
| "logps/chosen": -302.1865539550781, |
| "logps/rejected": -234.9086456298828, |
| "loss": -0.5093, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 1.626147985458374, |
| "rewards/margins": 6.896539211273193, |
| "rewards/rejected": -5.270391941070557, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3511111111111111, |
| "grad_norm": 10.339046478271484, |
| "learning_rate": 9.673171369138297e-06, |
| "logits/chosen": -0.2561442255973816, |
| "logits/rejected": -0.2945733666419983, |
| "logps/chosen": -304.0384216308594, |
| "logps/rejected": -242.31137084960938, |
| "loss": -0.3237, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.6995117664337158, |
| "rewards/margins": 6.307187557220459, |
| "rewards/rejected": -4.607676029205322, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 7.32875919342041, |
| "learning_rate": 9.66483292611718e-06, |
| "logits/chosen": -0.2525383234024048, |
| "logits/rejected": -0.28177526593208313, |
| "logps/chosen": -292.54046630859375, |
| "logps/rejected": -235.5553741455078, |
| "loss": -0.4031, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.865870714187622, |
| "rewards/margins": 6.680575370788574, |
| "rewards/rejected": -4.814703941345215, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 12.206645011901855, |
| "learning_rate": 9.656393135078067e-06, |
| "logits/chosen": -0.2548236846923828, |
| "logits/rejected": -0.30014172196388245, |
| "logps/chosen": -300.7771911621094, |
| "logps/rejected": -239.86367797851562, |
| "loss": -0.538, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 1.971895456314087, |
| "rewards/margins": 7.470471382141113, |
| "rewards/rejected": -5.498574733734131, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.36444444444444446, |
| "grad_norm": 8.910218238830566, |
| "learning_rate": 9.647852179383606e-06, |
| "logits/chosen": -0.27060994505882263, |
| "logits/rejected": -0.3155694603919983, |
| "logps/chosen": -300.47210693359375, |
| "logps/rejected": -234.78250122070312, |
| "loss": -0.5798, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 1.5166006088256836, |
| "rewards/margins": 7.203047275543213, |
| "rewards/rejected": -5.686446189880371, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3688888888888889, |
| "grad_norm": 10.572662353515625, |
| "learning_rate": 9.639210244594335e-06, |
| "logits/chosen": -0.2864235043525696, |
| "logits/rejected": -0.30632856488227844, |
| "logps/chosen": -301.4001159667969, |
| "logps/rejected": -254.0180206298828, |
| "loss": -0.4088, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 1.349990963935852, |
| "rewards/margins": 6.885331630706787, |
| "rewards/rejected": -5.535341262817383, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 9.515912055969238, |
| "learning_rate": 9.630467518464666e-06, |
| "logits/chosen": -0.2558160424232483, |
| "logits/rejected": -0.2956928312778473, |
| "logps/chosen": -304.60302734375, |
| "logps/rejected": -240.9836883544922, |
| "loss": -0.2976, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.0677791833877563, |
| "rewards/margins": 6.517449378967285, |
| "rewards/rejected": -5.44966983795166, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 7.812021255493164, |
| "learning_rate": 9.621624190938802e-06, |
| "logits/chosen": -0.2350511997938156, |
| "logits/rejected": -0.2532605528831482, |
| "logps/chosen": -320.7872009277344, |
| "logps/rejected": -254.2796173095703, |
| "loss": -0.4306, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 1.7731481790542603, |
| "rewards/margins": 7.587254524230957, |
| "rewards/rejected": -5.814105987548828, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.38222222222222224, |
| "grad_norm": 7.678309917449951, |
| "learning_rate": 9.612680454146609e-06, |
| "logits/chosen": -0.22189001739025116, |
| "logits/rejected": -0.2614109218120575, |
| "logps/chosen": -325.14239501953125, |
| "logps/rejected": -250.6171875, |
| "loss": -0.6808, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.040872573852539, |
| "rewards/margins": 8.241477012634277, |
| "rewards/rejected": -6.200604438781738, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.38666666666666666, |
| "grad_norm": 10.421648979187012, |
| "learning_rate": 9.603636502399436e-06, |
| "logits/chosen": -0.2654271721839905, |
| "logits/rejected": -0.302105575799942, |
| "logps/chosen": -332.26593017578125, |
| "logps/rejected": -245.6372528076172, |
| "loss": -0.6012, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 1.5932146310806274, |
| "rewards/margins": 8.200170516967773, |
| "rewards/rejected": -6.606956481933594, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.39111111111111113, |
| "grad_norm": 6.699859142303467, |
| "learning_rate": 9.594492532185909e-06, |
| "logits/chosen": -0.2850594222545624, |
| "logits/rejected": -0.3033252954483032, |
| "logps/chosen": -307.18463134765625, |
| "logps/rejected": -248.4318389892578, |
| "loss": -0.3909, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.7229059934616089, |
| "rewards/margins": 6.767951011657715, |
| "rewards/rejected": -6.045044422149658, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.39555555555555555, |
| "grad_norm": 9.903278350830078, |
| "learning_rate": 9.585248742167638e-06, |
| "logits/chosen": -0.2718963325023651, |
| "logits/rejected": -0.30517634749412537, |
| "logps/chosen": -309.89031982421875, |
| "logps/rejected": -250.9574432373047, |
| "loss": -0.3108, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.4615413546562195, |
| "rewards/margins": 6.469930171966553, |
| "rewards/rejected": -6.9314703941345215, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.433151721954346, |
| "learning_rate": 9.57590533317493e-06, |
| "logits/chosen": -0.2626163959503174, |
| "logits/rejected": -0.32341477274894714, |
| "logps/chosen": -312.3274230957031, |
| "logps/rejected": -233.1435546875, |
| "loss": -0.7956, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": 0.924404501914978, |
| "rewards/margins": 8.514490127563477, |
| "rewards/rejected": -7.590085029602051, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.40444444444444444, |
| "grad_norm": 8.837213516235352, |
| "learning_rate": 9.566462508202403e-06, |
| "logits/chosen": -0.248914435505867, |
| "logits/rejected": -0.3085024952888489, |
| "logps/chosen": -311.30328369140625, |
| "logps/rejected": -252.2725067138672, |
| "loss": -0.4715, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.2921377718448639, |
| "rewards/margins": 7.126054286956787, |
| "rewards/rejected": -6.833916664123535, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.4088888888888889, |
| "grad_norm": 7.693172454833984, |
| "learning_rate": 9.55692047240458e-06, |
| "logits/chosen": -0.2304973304271698, |
| "logits/rejected": -0.28682953119277954, |
| "logps/chosen": -311.18023681640625, |
| "logps/rejected": -240.9733123779297, |
| "loss": -0.6889, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.7987859845161438, |
| "rewards/margins": 8.30670166015625, |
| "rewards/rejected": -7.507915496826172, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.41333333333333333, |
| "grad_norm": 8.775394439697266, |
| "learning_rate": 9.547279433091446e-06, |
| "logits/chosen": -0.2938714325428009, |
| "logits/rejected": -0.314927875995636, |
| "logps/chosen": -307.7293701171875, |
| "logps/rejected": -249.83523559570312, |
| "loss": -0.5757, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 0.2602098286151886, |
| "rewards/margins": 7.839123725891113, |
| "rewards/rejected": -7.57891321182251, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.4177777777777778, |
| "grad_norm": 6.385190010070801, |
| "learning_rate": 9.537539599723924e-06, |
| "logits/chosen": -0.2282254993915558, |
| "logits/rejected": -0.29543009400367737, |
| "logps/chosen": -302.2158508300781, |
| "logps/rejected": -243.7989501953125, |
| "loss": -0.7249, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 1.112415075302124, |
| "rewards/margins": 8.864578247070312, |
| "rewards/rejected": -7.752162933349609, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 9.705090522766113, |
| "learning_rate": 9.527701183909336e-06, |
| "logits/chosen": -0.255817174911499, |
| "logits/rejected": -0.30061060190200806, |
| "logps/chosen": -319.11309814453125, |
| "logps/rejected": -252.5963592529297, |
| "loss": -0.4242, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -0.3803827464580536, |
| "rewards/margins": 7.071600437164307, |
| "rewards/rejected": -7.4519829750061035, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 10.644119262695312, |
| "learning_rate": 9.51776439939681e-06, |
| "logits/chosen": -0.24410729110240936, |
| "logits/rejected": -0.31472498178482056, |
| "logps/chosen": -326.570556640625, |
| "logps/rejected": -254.5313262939453, |
| "loss": -0.6844, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 0.5738657116889954, |
| "rewards/margins": 9.123512268066406, |
| "rewards/rejected": -8.549646377563477, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4311111111111111, |
| "grad_norm": 12.36355972290039, |
| "learning_rate": 9.507729462072615e-06, |
| "logits/chosen": -0.24467067420482635, |
| "logits/rejected": -0.3331097364425659, |
| "logps/chosen": -322.2784729003906, |
| "logps/rejected": -260.8439025878906, |
| "loss": -0.5093, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.3209023177623749, |
| "rewards/margins": 8.416958808898926, |
| "rewards/rejected": -8.09605598449707, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.43555555555555553, |
| "grad_norm": 11.505626678466797, |
| "learning_rate": 9.4975965899555e-06, |
| "logits/chosen": -0.28976163268089294, |
| "logits/rejected": -0.3090762794017792, |
| "logps/chosen": -307.57489013671875, |
| "logps/rejected": -246.5322265625, |
| "loss": -0.4303, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.3077417612075806, |
| "rewards/margins": 6.817173004150391, |
| "rewards/rejected": -8.124914169311523, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 15.380836486816406, |
| "learning_rate": 9.48736600319193e-06, |
| "logits/chosen": -0.2653730809688568, |
| "logits/rejected": -0.2980864644050598, |
| "logps/chosen": -319.14019775390625, |
| "logps/rejected": -263.46063232421875, |
| "loss": -0.3595, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1717190742492676, |
| "rewards/margins": 7.247198581695557, |
| "rewards/rejected": -8.418917655944824, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 7.199528694152832, |
| "learning_rate": 9.47703792405133e-06, |
| "logits/chosen": -0.2643812596797943, |
| "logits/rejected": -0.3057587146759033, |
| "logps/chosen": -305.49395751953125, |
| "logps/rejected": -250.5885467529297, |
| "loss": -0.7595, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -1.0399351119995117, |
| "rewards/margins": 8.203756332397461, |
| "rewards/rejected": -9.243691444396973, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "eval_logits/chosen": -0.2629312574863434, |
| "eval_logits/rejected": -0.30713388323783875, |
| "eval_logps/chosen": -313.8876953125, |
| "eval_logps/rejected": -254.3212127685547, |
| "eval_loss": -0.574113667011261, |
| "eval_rewards/accuracies": 0.828249990940094, |
| "eval_rewards/chosen": -1.1603001356124878, |
| "eval_rewards/margins": 8.141514778137207, |
| "eval_rewards/rejected": -9.301814079284668, |
| "eval_runtime": 2192.8697, |
| "eval_samples_per_second": 1.824, |
| "eval_steps_per_second": 0.912, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4488888888888889, |
| "grad_norm": 11.993717193603516, |
| "learning_rate": 9.466612576921223e-06, |
| "logits/chosen": -0.2699393332004547, |
| "logits/rejected": -0.3285272717475891, |
| "logps/chosen": -319.2097473144531, |
| "logps/rejected": -263.5858154296875, |
| "loss": -0.394, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -1.1724228858947754, |
| "rewards/margins": 7.439938545227051, |
| "rewards/rejected": -8.612360954284668, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.4533333333333333, |
| "grad_norm": 11.781710624694824, |
| "learning_rate": 9.456090188302389e-06, |
| "logits/chosen": -0.26111698150634766, |
| "logits/rejected": -0.28280287981033325, |
| "logps/chosen": -309.879638671875, |
| "logps/rejected": -263.6683654785156, |
| "loss": -0.6619, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2497330904006958, |
| "rewards/margins": 8.266626358032227, |
| "rewards/rejected": -9.516359329223633, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4577777777777778, |
| "grad_norm": 11.226400375366211, |
| "learning_rate": 9.445470986803922e-06, |
| "logits/chosen": -0.2626830041408539, |
| "logits/rejected": -0.3101075291633606, |
| "logps/chosen": -299.5857238769531, |
| "logps/rejected": -248.7251434326172, |
| "loss": -0.6192, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.528692603111267, |
| "rewards/margins": 8.301843643188477, |
| "rewards/rejected": -9.830536842346191, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.4622222222222222, |
| "grad_norm": 8.692116737365723, |
| "learning_rate": 9.434755203138269e-06, |
| "logits/chosen": -0.27712422609329224, |
| "logits/rejected": -0.33624228835105896, |
| "logps/chosen": -341.59759521484375, |
| "logps/rejected": -257.9901123046875, |
| "loss": -0.5867, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.3974745273590088, |
| "rewards/margins": 8.428323745727539, |
| "rewards/rejected": -9.825799942016602, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 7.316524982452393, |
| "learning_rate": 9.423943070116219e-06, |
| "logits/chosen": -0.3034690320491791, |
| "logits/rejected": -0.3194289803504944, |
| "logps/chosen": -294.5498046875, |
| "logps/rejected": -247.59463500976562, |
| "loss": -0.5768, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -2.136320114135742, |
| "rewards/margins": 7.421705722808838, |
| "rewards/rejected": -9.558026313781738, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4711111111111111, |
| "grad_norm": 15.097468376159668, |
| "learning_rate": 9.413034822641845e-06, |
| "logits/chosen": -0.29432040452957153, |
| "logits/rejected": -0.3473649322986603, |
| "logps/chosen": -314.51007080078125, |
| "logps/rejected": -255.99453735351562, |
| "loss": -0.3577, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.039400815963745, |
| "rewards/margins": 7.780667304992676, |
| "rewards/rejected": -9.820066452026367, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.47555555555555556, |
| "grad_norm": 10.856350898742676, |
| "learning_rate": 9.402030697707398e-06, |
| "logits/chosen": -0.27809661626815796, |
| "logits/rejected": -0.3084755539894104, |
| "logps/chosen": -290.03839111328125, |
| "logps/rejected": -246.6155242919922, |
| "loss": -0.7648, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.642029047012329, |
| "rewards/margins": 8.514683723449707, |
| "rewards/rejected": -10.156713485717773, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 10.466115951538086, |
| "learning_rate": 9.390930934388164e-06, |
| "logits/chosen": -0.25123220682144165, |
| "logits/rejected": -0.28659194707870483, |
| "logps/chosen": -310.38702392578125, |
| "logps/rejected": -264.1778869628906, |
| "loss": -0.6251, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -2.053196430206299, |
| "rewards/margins": 8.635331153869629, |
| "rewards/rejected": -10.68852710723877, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.48444444444444446, |
| "grad_norm": 14.518105506896973, |
| "learning_rate": 9.37973577383726e-06, |
| "logits/chosen": -0.2105627954006195, |
| "logits/rejected": -0.2725834250450134, |
| "logps/chosen": -309.6773681640625, |
| "logps/rejected": -247.98507690429688, |
| "loss": -0.7806, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.5902966260910034, |
| "rewards/margins": 9.257573127746582, |
| "rewards/rejected": -10.847868919372559, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 9.231803894042969, |
| "learning_rate": 9.368445459280405e-06, |
| "logits/chosen": -0.26593995094299316, |
| "logits/rejected": -0.28871750831604004, |
| "logps/chosen": -315.306884765625, |
| "logps/rejected": -262.83197021484375, |
| "loss": -0.5635, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -3.177497148513794, |
| "rewards/margins": 7.756557464599609, |
| "rewards/rejected": -10.934054374694824, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.49333333333333335, |
| "grad_norm": 9.906770706176758, |
| "learning_rate": 9.357060236010626e-06, |
| "logits/chosen": -0.25906693935394287, |
| "logits/rejected": -0.32186049222946167, |
| "logps/chosen": -326.8785095214844, |
| "logps/rejected": -273.9308166503906, |
| "loss": -0.6905, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -2.1445183753967285, |
| "rewards/margins": 9.113534927368164, |
| "rewards/rejected": -11.258054733276367, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 7.245250225067139, |
| "learning_rate": 9.345580351382939e-06, |
| "logits/chosen": -0.2802310585975647, |
| "logits/rejected": -0.2841408848762512, |
| "logps/chosen": -293.36065673828125, |
| "logps/rejected": -265.8360900878906, |
| "loss": -0.7463, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -3.965707778930664, |
| "rewards/margins": 9.175572395324707, |
| "rewards/rejected": -13.141279220581055, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5022222222222222, |
| "grad_norm": 11.543607711791992, |
| "learning_rate": 9.334006054808966e-06, |
| "logits/chosen": -0.2962619960308075, |
| "logits/rejected": -0.3181178569793701, |
| "logps/chosen": -326.261962890625, |
| "logps/rejected": -281.9925231933594, |
| "loss": -0.5012, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -4.311570644378662, |
| "rewards/margins": 8.469701766967773, |
| "rewards/rejected": -12.781272888183594, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.5066666666666667, |
| "grad_norm": 18.13285255432129, |
| "learning_rate": 9.322337597751525e-06, |
| "logits/chosen": -0.29192933440208435, |
| "logits/rejected": -0.32068902254104614, |
| "logps/chosen": -320.00146484375, |
| "logps/rejected": -267.2696838378906, |
| "loss": 0.0417, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -5.439437389373779, |
| "rewards/margins": 6.237511157989502, |
| "rewards/rejected": -11.676947593688965, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5111111111111111, |
| "grad_norm": 15.025782585144043, |
| "learning_rate": 9.310575233719155e-06, |
| "logits/chosen": -0.2575679421424866, |
| "logits/rejected": -0.28944242000579834, |
| "logps/chosen": -312.4665222167969, |
| "logps/rejected": -266.6491394042969, |
| "loss": -0.5652, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -2.7938873767852783, |
| "rewards/margins": 8.282114028930664, |
| "rewards/rejected": -11.07600212097168, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.5155555555555555, |
| "grad_norm": 10.936811447143555, |
| "learning_rate": 9.29871921826062e-06, |
| "logits/chosen": -0.2927904725074768, |
| "logits/rejected": -0.35370174050331116, |
| "logps/chosen": -321.13885498046875, |
| "logps/rejected": -268.1432800292969, |
| "loss": -0.2429, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -4.02617883682251, |
| "rewards/margins": 8.042525291442871, |
| "rewards/rejected": -12.068704605102539, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 12.064530372619629, |
| "learning_rate": 9.28676980895935e-06, |
| "logits/chosen": -0.24123439192771912, |
| "logits/rejected": -0.2889128625392914, |
| "logps/chosen": -308.1340637207031, |
| "logps/rejected": -256.29632568359375, |
| "loss": -0.8401, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -3.284236431121826, |
| "rewards/margins": 10.122550010681152, |
| "rewards/rejected": -13.40678596496582, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5244444444444445, |
| "grad_norm": 13.778730392456055, |
| "learning_rate": 9.274727265427849e-06, |
| "logits/chosen": -0.2769649922847748, |
| "logits/rejected": -0.31647247076034546, |
| "logps/chosen": -306.0679626464844, |
| "logps/rejected": -254.93179321289062, |
| "loss": -0.7553, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -4.035617828369141, |
| "rewards/margins": 9.203435897827148, |
| "rewards/rejected": -13.239053726196289, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5288888888888889, |
| "grad_norm": 8.471962928771973, |
| "learning_rate": 9.262591849302049e-06, |
| "logits/chosen": -0.2713521122932434, |
| "logits/rejected": -0.3014729619026184, |
| "logps/chosen": -299.43475341796875, |
| "logps/rejected": -266.22686767578125, |
| "loss": -0.4191, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.5517988204956055, |
| "rewards/margins": 8.98070240020752, |
| "rewards/rejected": -13.532503128051758, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 12.954193115234375, |
| "learning_rate": 9.250363824235629e-06, |
| "logits/chosen": -0.2955438494682312, |
| "logits/rejected": -0.3413962721824646, |
| "logps/chosen": -318.26068115234375, |
| "logps/rejected": -256.75689697265625, |
| "loss": -0.5025, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -3.381830930709839, |
| "rewards/margins": 9.101526260375977, |
| "rewards/rejected": -12.483358383178711, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5377777777777778, |
| "grad_norm": 10.307146072387695, |
| "learning_rate": 9.238043455894294e-06, |
| "logits/chosen": -0.27938082814216614, |
| "logits/rejected": -0.3281027674674988, |
| "logps/chosen": -327.4522399902344, |
| "logps/rejected": -257.4443054199219, |
| "loss": -0.7464, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -4.192465305328369, |
| "rewards/margins": 8.572819709777832, |
| "rewards/rejected": -12.765284538269043, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5422222222222223, |
| "grad_norm": 8.460762023925781, |
| "learning_rate": 9.225631011949987e-06, |
| "logits/chosen": -0.2906576991081238, |
| "logits/rejected": -0.32649320363998413, |
| "logps/chosen": -341.95928955078125, |
| "logps/rejected": -275.3990783691406, |
| "loss": -0.9454, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -2.693796396255493, |
| "rewards/margins": 10.737831115722656, |
| "rewards/rejected": -13.43162727355957, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5466666666666666, |
| "grad_norm": 18.260358810424805, |
| "learning_rate": 9.213126762075088e-06, |
| "logits/chosen": -0.3098008632659912, |
| "logits/rejected": -0.3394979238510132, |
| "logps/chosen": -306.67449951171875, |
| "logps/rejected": -263.02349853515625, |
| "loss": -0.5332, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -4.892157554626465, |
| "rewards/margins": 9.199603080749512, |
| "rewards/rejected": -14.091761589050293, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5511111111111111, |
| "grad_norm": 12.752532005310059, |
| "learning_rate": 9.200530977936551e-06, |
| "logits/chosen": -0.3172837793827057, |
| "logits/rejected": -0.3619407117366791, |
| "logps/chosen": -349.4862976074219, |
| "logps/rejected": -279.5086669921875, |
| "loss": -0.6241, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.374999046325684, |
| "rewards/margins": 9.857782363891602, |
| "rewards/rejected": -14.232782363891602, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 15.203842163085938, |
| "learning_rate": 9.187843933189994e-06, |
| "logits/chosen": -0.28893885016441345, |
| "logits/rejected": -0.3426817059516907, |
| "logps/chosen": -328.72979736328125, |
| "logps/rejected": -271.71099853515625, |
| "loss": -0.8634, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -3.5763354301452637, |
| "rewards/margins": 11.408061027526855, |
| "rewards/rejected": -14.984395980834961, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 10.816106796264648, |
| "learning_rate": 9.175065903473769e-06, |
| "logits/chosen": -0.2791399657726288, |
| "logits/rejected": -0.2996821403503418, |
| "logps/chosen": -321.34771728515625, |
| "logps/rejected": -280.1087341308594, |
| "loss": -0.5511, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -5.1469244956970215, |
| "rewards/margins": 9.314436912536621, |
| "rewards/rejected": -14.4613618850708, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5644444444444444, |
| "grad_norm": 8.36387825012207, |
| "learning_rate": 9.162197166402957e-06, |
| "logits/chosen": -0.29182273149490356, |
| "logits/rejected": -0.32408252358436584, |
| "logps/chosen": -316.68487548828125, |
| "logps/rejected": -270.219482421875, |
| "loss": -1.0203, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -4.753455638885498, |
| "rewards/margins": 10.925695419311523, |
| "rewards/rejected": -15.679153442382812, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 11.839485168457031, |
| "learning_rate": 9.149238001563348e-06, |
| "logits/chosen": -0.30801886320114136, |
| "logits/rejected": -0.329951673746109, |
| "logps/chosen": -312.73577880859375, |
| "logps/rejected": -267.54437255859375, |
| "loss": -0.5904, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -6.468144416809082, |
| "rewards/margins": 8.780177116394043, |
| "rewards/rejected": -15.248323440551758, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5733333333333334, |
| "grad_norm": 16.650188446044922, |
| "learning_rate": 9.136188690505363e-06, |
| "logits/chosen": -0.2637523412704468, |
| "logits/rejected": -0.31697210669517517, |
| "logps/chosen": -331.0145568847656, |
| "logps/rejected": -271.3872985839844, |
| "loss": -0.5915, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -5.373086452484131, |
| "rewards/margins": 8.960186958312988, |
| "rewards/rejected": -14.333274841308594, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 18.575305938720703, |
| "learning_rate": 9.123049516737936e-06, |
| "logits/chosen": -0.3117810785770416, |
| "logits/rejected": -0.3635488450527191, |
| "logps/chosen": -328.19989013671875, |
| "logps/rejected": -272.4068603515625, |
| "loss": -0.6851, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -5.281620979309082, |
| "rewards/margins": 10.281137466430664, |
| "rewards/rejected": -15.562756538391113, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5822222222222222, |
| "grad_norm": 16.429004669189453, |
| "learning_rate": 9.109820765722357e-06, |
| "logits/chosen": -0.27543455362319946, |
| "logits/rejected": -0.31441715359687805, |
| "logps/chosen": -335.43609619140625, |
| "logps/rejected": -286.1941833496094, |
| "loss": -0.8026, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -5.2430877685546875, |
| "rewards/margins": 10.86163330078125, |
| "rewards/rejected": -16.10472297668457, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 9.399615287780762, |
| "learning_rate": 9.096502724866067e-06, |
| "logits/chosen": -0.3014602065086365, |
| "logits/rejected": -0.35103824734687805, |
| "logps/chosen": -359.2838439941406, |
| "logps/rejected": -294.8686828613281, |
| "loss": -1.0671, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -4.225809574127197, |
| "rewards/margins": 11.496713638305664, |
| "rewards/rejected": -15.722521781921387, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5911111111111111, |
| "grad_norm": 14.605314254760742, |
| "learning_rate": 9.083095683516414e-06, |
| "logits/chosen": -0.28259098529815674, |
| "logits/rejected": -0.32325831055641174, |
| "logps/chosen": -350.68878173828125, |
| "logps/rejected": -277.5746154785156, |
| "loss": -0.9032, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -6.993232727050781, |
| "rewards/margins": 10.017694473266602, |
| "rewards/rejected": -17.010927200317383, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5955555555555555, |
| "grad_norm": 17.158226013183594, |
| "learning_rate": 9.069599932954371e-06, |
| "logits/chosen": -0.29114705324172974, |
| "logits/rejected": -0.3473047912120819, |
| "logps/chosen": -339.568603515625, |
| "logps/rejected": -287.64666748046875, |
| "loss": -0.5308, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -8.217833518981934, |
| "rewards/margins": 8.741876602172852, |
| "rewards/rejected": -16.9597110748291, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 16.861963272094727, |
| "learning_rate": 9.056015766388205e-06, |
| "logits/chosen": -0.3223651945590973, |
| "logits/rejected": -0.36349570751190186, |
| "logps/chosen": -334.6025695800781, |
| "logps/rejected": -285.73828125, |
| "loss": -0.7831, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -7.5147528648376465, |
| "rewards/margins": 10.620495796203613, |
| "rewards/rejected": -18.1352481842041, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.6044444444444445, |
| "grad_norm": 14.257497787475586, |
| "learning_rate": 9.042343478947103e-06, |
| "logits/chosen": -0.3066635727882385, |
| "logits/rejected": -0.32420462369918823, |
| "logps/chosen": -337.81097412109375, |
| "logps/rejected": -286.6188049316406, |
| "loss": -0.9349, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -6.352471351623535, |
| "rewards/margins": 11.51642894744873, |
| "rewards/rejected": -17.868900299072266, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6088888888888889, |
| "grad_norm": 16.795236587524414, |
| "learning_rate": 9.028583367674767e-06, |
| "logits/chosen": -0.34612902998924255, |
| "logits/rejected": -0.36396104097366333, |
| "logps/chosen": -332.6644287109375, |
| "logps/rejected": -287.1361083984375, |
| "loss": -0.9376, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -5.387200355529785, |
| "rewards/margins": 11.706459999084473, |
| "rewards/rejected": -17.093660354614258, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.6133333333333333, |
| "grad_norm": 11.827564239501953, |
| "learning_rate": 9.014735731522952e-06, |
| "logits/chosen": -0.3044932782649994, |
| "logits/rejected": -0.33501502871513367, |
| "logps/chosen": -331.37835693359375, |
| "logps/rejected": -279.79718017578125, |
| "loss": -1.0226, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -5.231140613555908, |
| "rewards/margins": 12.324139595031738, |
| "rewards/rejected": -17.555278778076172, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6177777777777778, |
| "grad_norm": 13.102601051330566, |
| "learning_rate": 9.00080087134498e-06, |
| "logits/chosen": -0.31660374999046326, |
| "logits/rejected": -0.3677740693092346, |
| "logps/chosen": -340.77081298828125, |
| "logps/rejected": -275.3927001953125, |
| "loss": -0.8796, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -5.748688220977783, |
| "rewards/margins": 11.431347846984863, |
| "rewards/rejected": -17.180036544799805, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 28.49679946899414, |
| "learning_rate": 8.9867790898892e-06, |
| "logits/chosen": -0.3224649131298065, |
| "logits/rejected": -0.3925584852695465, |
| "logps/chosen": -349.48919677734375, |
| "logps/rejected": -279.79119873046875, |
| "loss": -0.8641, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -6.573420524597168, |
| "rewards/margins": 11.63664436340332, |
| "rewards/rejected": -18.210065841674805, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6266666666666667, |
| "grad_norm": 17.276578903198242, |
| "learning_rate": 8.972670691792409e-06, |
| "logits/chosen": -0.3031178414821625, |
| "logits/rejected": -0.347816526889801, |
| "logps/chosen": -332.0860900878906, |
| "logps/rejected": -278.52264404296875, |
| "loss": -0.6865, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -7.202242851257324, |
| "rewards/margins": 10.682793617248535, |
| "rewards/rejected": -17.88503646850586, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.6311111111111111, |
| "grad_norm": 13.52579402923584, |
| "learning_rate": 8.958475983573234e-06, |
| "logits/chosen": -0.3044522702693939, |
| "logits/rejected": -0.34488362073898315, |
| "logps/chosen": -337.5535583496094, |
| "logps/rejected": -295.95428466796875, |
| "loss": -0.9102, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -6.68551778793335, |
| "rewards/margins": 11.271787643432617, |
| "rewards/rejected": -17.957305908203125, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6355555555555555, |
| "grad_norm": 12.732015609741211, |
| "learning_rate": 8.944195273625472e-06, |
| "logits/chosen": -0.2973068356513977, |
| "logits/rejected": -0.35419678688049316, |
| "logps/chosen": -318.517822265625, |
| "logps/rejected": -271.3919677734375, |
| "loss": -0.8689, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -7.6208038330078125, |
| "rewards/margins": 10.748211860656738, |
| "rewards/rejected": -18.369014739990234, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 15.25625991821289, |
| "learning_rate": 8.92982887221139e-06, |
| "logits/chosen": -0.3091279864311218, |
| "logits/rejected": -0.3462229073047638, |
| "logps/chosen": -345.78131103515625, |
| "logps/rejected": -292.4202575683594, |
| "loss": -0.4038, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -9.038877487182617, |
| "rewards/margins": 10.578144073486328, |
| "rewards/rejected": -19.617021560668945, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6444444444444445, |
| "grad_norm": 8.678261756896973, |
| "learning_rate": 8.915377091454992e-06, |
| "logits/chosen": -0.2622337341308594, |
| "logits/rejected": -0.3454502820968628, |
| "logps/chosen": -330.6527404785156, |
| "logps/rejected": -271.9297790527344, |
| "loss": -0.9941, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -6.34903621673584, |
| "rewards/margins": 11.77011775970459, |
| "rewards/rejected": -18.119152069091797, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6488888888888888, |
| "grad_norm": 18.099544525146484, |
| "learning_rate": 8.900840245335225e-06, |
| "logits/chosen": -0.29967910051345825, |
| "logits/rejected": -0.3550174832344055, |
| "logps/chosen": -339.3348388671875, |
| "logps/rejected": -284.2086181640625, |
| "loss": -0.6141, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -8.690653800964355, |
| "rewards/margins": 10.278780937194824, |
| "rewards/rejected": -18.969436645507812, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6533333333333333, |
| "grad_norm": 17.058128356933594, |
| "learning_rate": 8.886218649679162e-06, |
| "logits/chosen": -0.30947160720825195, |
| "logits/rejected": -0.3345088064670563, |
| "logps/chosen": -320.3451232910156, |
| "logps/rejected": -277.7720031738281, |
| "loss": -1.014, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -8.53862190246582, |
| "rewards/margins": 12.284268379211426, |
| "rewards/rejected": -20.822891235351562, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6577777777777778, |
| "grad_norm": 13.636448860168457, |
| "learning_rate": 8.871512622155147e-06, |
| "logits/chosen": -0.2878524363040924, |
| "logits/rejected": -0.3395880162715912, |
| "logps/chosen": -362.52899169921875, |
| "logps/rejected": -303.67620849609375, |
| "loss": -0.9753, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -8.569745063781738, |
| "rewards/margins": 12.254476547241211, |
| "rewards/rejected": -20.824222564697266, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6622222222222223, |
| "grad_norm": 13.943758964538574, |
| "learning_rate": 8.856722482265886e-06, |
| "logits/chosen": -0.2777239978313446, |
| "logits/rejected": -0.2970428466796875, |
| "logps/chosen": -317.4947509765625, |
| "logps/rejected": -292.8834533691406, |
| "loss": -0.952, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -9.092373847961426, |
| "rewards/margins": 11.070058822631836, |
| "rewards/rejected": -20.162433624267578, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 10.792975425720215, |
| "learning_rate": 8.841848551341506e-06, |
| "logits/chosen": -0.300568550825119, |
| "logits/rejected": -0.35186997056007385, |
| "logps/chosen": -342.993408203125, |
| "logps/rejected": -283.03594970703125, |
| "loss": -0.9528, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -8.736288070678711, |
| "rewards/margins": 11.417104721069336, |
| "rewards/rejected": -20.153392791748047, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6711111111111111, |
| "grad_norm": 14.272412300109863, |
| "learning_rate": 8.826891152532579e-06, |
| "logits/chosen": -0.24646346271038055, |
| "logits/rejected": -0.3181930184364319, |
| "logps/chosen": -330.8204040527344, |
| "logps/rejected": -290.7005310058594, |
| "loss": -0.8532, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -8.7890625, |
| "rewards/margins": 12.464083671569824, |
| "rewards/rejected": -21.25314712524414, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6755555555555556, |
| "grad_norm": 18.93645477294922, |
| "learning_rate": 8.811850610803094e-06, |
| "logits/chosen": -0.2739986181259155, |
| "logits/rejected": -0.3238711357116699, |
| "logps/chosen": -351.69171142578125, |
| "logps/rejected": -305.0896301269531, |
| "loss": -0.9423, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -8.920026779174805, |
| "rewards/margins": 12.025522232055664, |
| "rewards/rejected": -20.945547103881836, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 18.738351821899414, |
| "learning_rate": 8.796727252923403e-06, |
| "logits/chosen": -0.31761056184768677, |
| "logits/rejected": -0.3448847532272339, |
| "logps/chosen": -331.40557861328125, |
| "logps/rejected": -296.1661071777344, |
| "loss": -0.5371, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -10.65619945526123, |
| "rewards/margins": 11.044633865356445, |
| "rewards/rejected": -21.700834274291992, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6844444444444444, |
| "grad_norm": 17.68587875366211, |
| "learning_rate": 8.781521407463119e-06, |
| "logits/chosen": -0.29655805230140686, |
| "logits/rejected": -0.35701996088027954, |
| "logps/chosen": -353.18145751953125, |
| "logps/rejected": -300.9978942871094, |
| "loss": -1.0413, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -8.621820449829102, |
| "rewards/margins": 13.096748352050781, |
| "rewards/rejected": -21.718570709228516, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6888888888888889, |
| "grad_norm": 8.765332221984863, |
| "learning_rate": 8.766233404783975e-06, |
| "logits/chosen": -0.33828821778297424, |
| "logits/rejected": -0.35216769576072693, |
| "logps/chosen": -349.8067626953125, |
| "logps/rejected": -313.57818603515625, |
| "loss": -0.856, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -9.512666702270508, |
| "rewards/margins": 12.066935539245605, |
| "rewards/rejected": -21.57960319519043, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 20.219892501831055, |
| "learning_rate": 8.750863577032652e-06, |
| "logits/chosen": -0.3195672929286957, |
| "logits/rejected": -0.3713618218898773, |
| "logps/chosen": -358.82037353515625, |
| "logps/rejected": -303.29571533203125, |
| "loss": -1.2232, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -9.787614822387695, |
| "rewards/margins": 13.660125732421875, |
| "rewards/rejected": -23.447738647460938, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6977777777777778, |
| "grad_norm": 14.765336036682129, |
| "learning_rate": 8.735412258133562e-06, |
| "logits/chosen": -0.3235880136489868, |
| "logits/rejected": -0.36679068207740784, |
| "logps/chosen": -354.3951416015625, |
| "logps/rejected": -299.1098937988281, |
| "loss": -0.8905, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -10.815263748168945, |
| "rewards/margins": 12.30003547668457, |
| "rewards/rejected": -23.115299224853516, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.7022222222222222, |
| "grad_norm": 13.638387680053711, |
| "learning_rate": 8.719879783781585e-06, |
| "logits/chosen": -0.33130335807800293, |
| "logits/rejected": -0.3633490204811096, |
| "logps/chosen": -351.318603515625, |
| "logps/rejected": -302.3772888183594, |
| "loss": -1.0524, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -9.303329467773438, |
| "rewards/margins": 13.00804615020752, |
| "rewards/rejected": -22.31137466430664, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7066666666666667, |
| "grad_norm": 10.990659713745117, |
| "learning_rate": 8.704266491434787e-06, |
| "logits/chosen": -0.3289201259613037, |
| "logits/rejected": -0.36471107602119446, |
| "logps/chosen": -330.88763427734375, |
| "logps/rejected": -286.8157958984375, |
| "loss": -0.6598, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -10.67319107055664, |
| "rewards/margins": 11.411112785339355, |
| "rewards/rejected": -22.084304809570312, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 12.232583045959473, |
| "learning_rate": 8.688572720307083e-06, |
| "logits/chosen": -0.3058468997478485, |
| "logits/rejected": -0.3826626241207123, |
| "logps/chosen": -360.0539855957031, |
| "logps/rejected": -302.0265808105469, |
| "loss": -0.9798, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -9.977747917175293, |
| "rewards/margins": 13.34874153137207, |
| "rewards/rejected": -23.326488494873047, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7155555555555555, |
| "grad_norm": 28.097835540771484, |
| "learning_rate": 8.672798811360863e-06, |
| "logits/chosen": -0.3440350890159607, |
| "logits/rejected": -0.3669665455818176, |
| "logps/chosen": -348.4472351074219, |
| "logps/rejected": -317.9486083984375, |
| "loss": -1.158, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -9.318201065063477, |
| "rewards/margins": 13.85308837890625, |
| "rewards/rejected": -23.17129135131836, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 13.566072463989258, |
| "learning_rate": 8.656945107299598e-06, |
| "logits/chosen": -0.32617539167404175, |
| "logits/rejected": -0.3627128601074219, |
| "logps/chosen": -350.51495361328125, |
| "logps/rejected": -313.4057922363281, |
| "loss": -1.1211, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -9.836647987365723, |
| "rewards/margins": 13.414007186889648, |
| "rewards/rejected": -23.250656127929688, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7244444444444444, |
| "grad_norm": 10.32918930053711, |
| "learning_rate": 8.641011952560372e-06, |
| "logits/chosen": -0.3140029311180115, |
| "logits/rejected": -0.3582364618778229, |
| "logps/chosen": -333.9091796875, |
| "logps/rejected": -282.69061279296875, |
| "loss": -0.8948, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -10.822344779968262, |
| "rewards/margins": 11.024767875671387, |
| "rewards/rejected": -21.84711265563965, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.7288888888888889, |
| "grad_norm": 14.581253051757812, |
| "learning_rate": 8.624999693306422e-06, |
| "logits/chosen": -0.33729246258735657, |
| "logits/rejected": -0.3753616213798523, |
| "logps/chosen": -342.4247741699219, |
| "logps/rejected": -309.2422790527344, |
| "loss": -0.9419, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -11.228838920593262, |
| "rewards/margins": 13.112091064453125, |
| "rewards/rejected": -24.34092903137207, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 11.523558616638184, |
| "learning_rate": 8.608908677419606e-06, |
| "logits/chosen": -0.37991100549697876, |
| "logits/rejected": -0.40186434984207153, |
| "logps/chosen": -348.12396240234375, |
| "logps/rejected": -309.076171875, |
| "loss": -1.0504, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -12.649127006530762, |
| "rewards/margins": 12.755289077758789, |
| "rewards/rejected": -25.404415130615234, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.7377777777777778, |
| "grad_norm": 17.81552505493164, |
| "learning_rate": 8.592739254492845e-06, |
| "logits/chosen": -0.362493097782135, |
| "logits/rejected": -0.4177095293998718, |
| "logps/chosen": -335.11981201171875, |
| "logps/rejected": -290.92218017578125, |
| "loss": -1.1041, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -11.677441596984863, |
| "rewards/margins": 13.119203567504883, |
| "rewards/rejected": -24.79664421081543, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7422222222222222, |
| "grad_norm": 20.329221725463867, |
| "learning_rate": 8.576491775822527e-06, |
| "logits/chosen": -0.33437713980674744, |
| "logits/rejected": -0.39904457330703735, |
| "logps/chosen": -357.16943359375, |
| "logps/rejected": -297.7870178222656, |
| "loss": -0.724, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -11.969806671142578, |
| "rewards/margins": 12.316507339477539, |
| "rewards/rejected": -24.28631591796875, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 19.302101135253906, |
| "learning_rate": 8.560166594400878e-06, |
| "logits/chosen": -0.3832574486732483, |
| "logits/rejected": -0.44351863861083984, |
| "logps/chosen": -352.62115478515625, |
| "logps/rejected": -304.46124267578125, |
| "loss": -0.6363, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -12.629277229309082, |
| "rewards/margins": 11.651094436645508, |
| "rewards/rejected": -24.280370712280273, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7511111111111111, |
| "grad_norm": 14.173489570617676, |
| "learning_rate": 8.543764064908295e-06, |
| "logits/chosen": -0.34056347608566284, |
| "logits/rejected": -0.39399194717407227, |
| "logps/chosen": -340.8840026855469, |
| "logps/rejected": -307.18603515625, |
| "loss": -1.2865, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -11.463663101196289, |
| "rewards/margins": 13.927221298217773, |
| "rewards/rejected": -25.390884399414062, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 22.398326873779297, |
| "learning_rate": 8.527284543705631e-06, |
| "logits/chosen": -0.37620821595191956, |
| "logits/rejected": -0.4051085412502289, |
| "logps/chosen": -341.5446472167969, |
| "logps/rejected": -314.3455810546875, |
| "loss": -1.1236, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -11.881677627563477, |
| "rewards/margins": 14.11926555633545, |
| "rewards/rejected": -26.00094223022461, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 20.017797470092773, |
| "learning_rate": 8.510728388826464e-06, |
| "logits/chosen": -0.33530497550964355, |
| "logits/rejected": -0.3962380290031433, |
| "logps/chosen": -341.4028625488281, |
| "logps/rejected": -305.88824462890625, |
| "loss": -1.5163, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -10.922323226928711, |
| "rewards/margins": 15.069772720336914, |
| "rewards/rejected": -25.992095947265625, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7644444444444445, |
| "grad_norm": 13.252291679382324, |
| "learning_rate": 8.494095959969309e-06, |
| "logits/chosen": -0.34795650839805603, |
| "logits/rejected": -0.40874728560447693, |
| "logps/chosen": -348.9808654785156, |
| "logps/rejected": -308.14349365234375, |
| "loss": -0.9905, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -13.379107475280762, |
| "rewards/margins": 12.994186401367188, |
| "rewards/rejected": -26.373294830322266, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7688888888888888, |
| "grad_norm": 18.38790512084961, |
| "learning_rate": 8.477387618489808e-06, |
| "logits/chosen": -0.3455773890018463, |
| "logits/rejected": -0.40834465622901917, |
| "logps/chosen": -343.2769470214844, |
| "logps/rejected": -297.47784423828125, |
| "loss": -1.4511, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -10.266695022583008, |
| "rewards/margins": 15.304100036621094, |
| "rewards/rejected": -25.5707950592041, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.7733333333333333, |
| "grad_norm": 21.267852783203125, |
| "learning_rate": 8.460603727392877e-06, |
| "logits/chosen": -0.35729557275772095, |
| "logits/rejected": -0.3905247449874878, |
| "logps/chosen": -370.48577880859375, |
| "logps/rejected": -324.64532470703125, |
| "loss": -1.1358, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -12.016322135925293, |
| "rewards/margins": 15.51159381866455, |
| "rewards/rejected": -27.527912139892578, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 20.68170166015625, |
| "learning_rate": 8.443744651324828e-06, |
| "logits/chosen": -0.3603067994117737, |
| "logits/rejected": -0.40933218598365784, |
| "logps/chosen": -356.02154541015625, |
| "logps/rejected": -310.26666259765625, |
| "loss": -1.0198, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -13.34446907043457, |
| "rewards/margins": 13.9938325881958, |
| "rewards/rejected": -27.338302612304688, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 14.795793533325195, |
| "learning_rate": 8.426810756565428e-06, |
| "logits/chosen": -0.3585900664329529, |
| "logits/rejected": -0.42686209082603455, |
| "logps/chosen": -368.9267272949219, |
| "logps/rejected": -311.18023681640625, |
| "loss": -1.5537, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -13.114725112915039, |
| "rewards/margins": 16.6258487701416, |
| "rewards/rejected": -29.74057388305664, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7866666666666666, |
| "grad_norm": 19.962947845458984, |
| "learning_rate": 8.409802411019962e-06, |
| "logits/chosen": -0.347336083650589, |
| "logits/rejected": -0.4067932665348053, |
| "logps/chosen": -343.19158935546875, |
| "logps/rejected": -304.2242126464844, |
| "loss": -1.3862, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -13.927999496459961, |
| "rewards/margins": 15.6153564453125, |
| "rewards/rejected": -29.54335594177246, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.7911111111111111, |
| "grad_norm": 16.915666580200195, |
| "learning_rate": 8.392719984211228e-06, |
| "logits/chosen": -0.36178287863731384, |
| "logits/rejected": -0.42369580268859863, |
| "logps/chosen": -363.2778625488281, |
| "logps/rejected": -314.5802001953125, |
| "loss": -1.3641, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -14.133458137512207, |
| "rewards/margins": 13.37634563446045, |
| "rewards/rejected": -27.50980567932129, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7955555555555556, |
| "grad_norm": 16.34973907470703, |
| "learning_rate": 8.375563847271506e-06, |
| "logits/chosen": -0.3902398645877838, |
| "logits/rejected": -0.4178919792175293, |
| "logps/chosen": -354.6260070800781, |
| "logps/rejected": -320.308837890625, |
| "loss": -1.296, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -14.261367797851562, |
| "rewards/margins": 15.514287948608398, |
| "rewards/rejected": -29.77565574645996, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 20.18850326538086, |
| "learning_rate": 8.35833437293451e-06, |
| "logits/chosen": -0.3586779534816742, |
| "logits/rejected": -0.3966183066368103, |
| "logps/chosen": -353.3863830566406, |
| "logps/rejected": -317.9190979003906, |
| "loss": -1.0465, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -13.865551948547363, |
| "rewards/margins": 14.156455993652344, |
| "rewards/rejected": -28.02200698852539, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8044444444444444, |
| "grad_norm": 18.74745750427246, |
| "learning_rate": 8.341031935527267e-06, |
| "logits/chosen": -0.35274258255958557, |
| "logits/rejected": -0.4157370626926422, |
| "logps/chosen": -365.7769470214844, |
| "logps/rejected": -320.2703552246094, |
| "loss": -1.0852, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -14.110136032104492, |
| "rewards/margins": 15.161088943481445, |
| "rewards/rejected": -29.271224975585938, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.8088888888888889, |
| "grad_norm": 14.005874633789062, |
| "learning_rate": 8.323656910962011e-06, |
| "logits/chosen": -0.40306010842323303, |
| "logits/rejected": -0.44573473930358887, |
| "logps/chosen": -346.27105712890625, |
| "logps/rejected": -315.6506042480469, |
| "loss": -1.4107, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -13.054827690124512, |
| "rewards/margins": 15.541888236999512, |
| "rewards/rejected": -28.596715927124023, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8133333333333334, |
| "grad_norm": 25.924898147583008, |
| "learning_rate": 8.306209676727994e-06, |
| "logits/chosen": -0.3658706545829773, |
| "logits/rejected": -0.4349114000797272, |
| "logps/chosen": -358.9135437011719, |
| "logps/rejected": -326.36090087890625, |
| "loss": -1.4081, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -12.446220397949219, |
| "rewards/margins": 17.219030380249023, |
| "rewards/rejected": -29.66524887084961, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.8177777777777778, |
| "grad_norm": 20.495826721191406, |
| "learning_rate": 8.288690611883296e-06, |
| "logits/chosen": -0.39841917157173157, |
| "logits/rejected": -0.4497374892234802, |
| "logps/chosen": -353.8162841796875, |
| "logps/rejected": -313.1631774902344, |
| "loss": -1.556, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -13.111363410949707, |
| "rewards/margins": 17.558149337768555, |
| "rewards/rejected": -30.669513702392578, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8222222222222222, |
| "grad_norm": 24.87085723876953, |
| "learning_rate": 8.271100097046585e-06, |
| "logits/chosen": -0.3760126233100891, |
| "logits/rejected": -0.42560848593711853, |
| "logps/chosen": -350.9206237792969, |
| "logps/rejected": -320.97637939453125, |
| "loss": -1.4347, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -13.494958877563477, |
| "rewards/margins": 16.944490432739258, |
| "rewards/rejected": -30.439449310302734, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.8266666666666667, |
| "grad_norm": 20.46843910217285, |
| "learning_rate": 8.25343851438885e-06, |
| "logits/chosen": -0.4249737858772278, |
| "logits/rejected": -0.4788896441459656, |
| "logps/chosen": -361.0630187988281, |
| "logps/rejected": -319.73138427734375, |
| "loss": -1.3017, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -14.13292407989502, |
| "rewards/margins": 17.0448055267334, |
| "rewards/rejected": -31.1777286529541, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.8311111111111111, |
| "grad_norm": 22.38373374938965, |
| "learning_rate": 8.235706247625098e-06, |
| "logits/chosen": -0.38224634528160095, |
| "logits/rejected": -0.4391182065010071, |
| "logps/chosen": -361.043701171875, |
| "logps/rejected": -325.9903564453125, |
| "loss": -1.2866, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -13.751859664916992, |
| "rewards/margins": 17.840734481811523, |
| "rewards/rejected": -31.59259605407715, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.8355555555555556, |
| "grad_norm": 21.872596740722656, |
| "learning_rate": 8.217903682006017e-06, |
| "logits/chosen": -0.39942440390586853, |
| "logits/rejected": -0.45849889516830444, |
| "logps/chosen": -373.39013671875, |
| "logps/rejected": -337.3862609863281, |
| "loss": -1.6818, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -15.173101425170898, |
| "rewards/margins": 18.07097053527832, |
| "rewards/rejected": -33.24407196044922, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 21.81854248046875, |
| "learning_rate": 8.200031204309604e-06, |
| "logits/chosen": -0.40619197487831116, |
| "logits/rejected": -0.4568824768066406, |
| "logps/chosen": -342.34356689453125, |
| "logps/rejected": -317.7866516113281, |
| "loss": -1.6939, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -13.93403148651123, |
| "rewards/margins": 17.663101196289062, |
| "rewards/rejected": -31.597131729125977, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 22.453853607177734, |
| "learning_rate": 8.182089202832767e-06, |
| "logits/chosen": -0.3882743716239929, |
| "logits/rejected": -0.4640750288963318, |
| "logps/chosen": -382.75787353515625, |
| "logps/rejected": -337.13995361328125, |
| "loss": -2.0499, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -14.779397964477539, |
| "rewards/margins": 20.105022430419922, |
| "rewards/rejected": -34.884422302246094, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8488888888888889, |
| "grad_norm": 18.471513748168945, |
| "learning_rate": 8.16407806738288e-06, |
| "logits/chosen": -0.39945605397224426, |
| "logits/rejected": -0.4585798680782318, |
| "logps/chosen": -383.2483215332031, |
| "logps/rejected": -347.448974609375, |
| "loss": -1.2417, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -15.864675521850586, |
| "rewards/margins": 16.104869842529297, |
| "rewards/rejected": -31.969547271728516, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 37.47999954223633, |
| "learning_rate": 8.145998189269327e-06, |
| "logits/chosen": -0.4188354015350342, |
| "logits/rejected": -0.4583558142185211, |
| "logps/chosen": -377.2878723144531, |
| "logps/rejected": -350.0664978027344, |
| "loss": -1.6863, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -18.30971336364746, |
| "rewards/margins": 19.272193908691406, |
| "rewards/rejected": -37.5819091796875, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8577777777777778, |
| "grad_norm": 19.40680503845215, |
| "learning_rate": 8.127849961294984e-06, |
| "logits/chosen": -0.4305190145969391, |
| "logits/rejected": -0.477532297372818, |
| "logps/chosen": -372.54443359375, |
| "logps/rejected": -340.7271728515625, |
| "loss": -1.6979, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -17.950782775878906, |
| "rewards/margins": 18.499141693115234, |
| "rewards/rejected": -36.44992446899414, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.8622222222222222, |
| "grad_norm": 28.603803634643555, |
| "learning_rate": 8.109633777747703e-06, |
| "logits/chosen": -0.42268872261047363, |
| "logits/rejected": -0.4787193834781647, |
| "logps/chosen": -373.359619140625, |
| "logps/rejected": -347.9590148925781, |
| "loss": -1.3852, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -20.33183479309082, |
| "rewards/margins": 17.556093215942383, |
| "rewards/rejected": -37.88792419433594, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 23.769119262695312, |
| "learning_rate": 8.091350034391732e-06, |
| "logits/chosen": -0.40240478515625, |
| "logits/rejected": -0.48416176438331604, |
| "logps/chosen": -384.06976318359375, |
| "logps/rejected": -361.1874694824219, |
| "loss": -1.5434, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -18.98853874206543, |
| "rewards/margins": 18.99036979675293, |
| "rewards/rejected": -37.97890853881836, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8711111111111111, |
| "grad_norm": 36.155155181884766, |
| "learning_rate": 8.072999128459119e-06, |
| "logits/chosen": -0.41284674406051636, |
| "logits/rejected": -0.4507782459259033, |
| "logps/chosen": -360.9227600097656, |
| "logps/rejected": -339.7681884765625, |
| "loss": -1.3714, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -18.811853408813477, |
| "rewards/margins": 17.97747230529785, |
| "rewards/rejected": -36.78932189941406, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8755555555555555, |
| "grad_norm": 21.57328224182129, |
| "learning_rate": 8.05458145864109e-06, |
| "logits/chosen": -0.39822930097579956, |
| "logits/rejected": -0.4551811218261719, |
| "logps/chosen": -353.82623291015625, |
| "logps/rejected": -345.6108093261719, |
| "loss": -1.2538, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -17.53032875061035, |
| "rewards/margins": 19.313552856445312, |
| "rewards/rejected": -36.8438835144043, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 39.19557571411133, |
| "learning_rate": 8.036097425079377e-06, |
| "logits/chosen": -0.38101926445961, |
| "logits/rejected": -0.4362686276435852, |
| "logps/chosen": -381.2861328125, |
| "logps/rejected": -345.8175048828125, |
| "loss": -1.2588, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -19.006837844848633, |
| "rewards/margins": 17.608064651489258, |
| "rewards/rejected": -36.61490249633789, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8844444444444445, |
| "grad_norm": 22.725879669189453, |
| "learning_rate": 8.017547429357532e-06, |
| "logits/chosen": -0.3905089497566223, |
| "logits/rejected": -0.44199681282043457, |
| "logps/chosen": -367.7683410644531, |
| "logps/rejected": -347.76812744140625, |
| "loss": -2.4106, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -15.675936698913574, |
| "rewards/margins": 22.89352798461914, |
| "rewards/rejected": -38.56946563720703, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 20.353073120117188, |
| "learning_rate": 7.998931874492192e-06, |
| "logits/chosen": -0.37944620847702026, |
| "logits/rejected": -0.44008979201316833, |
| "logps/chosen": -352.2929382324219, |
| "logps/rejected": -333.12530517578125, |
| "loss": -1.4519, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -15.595646858215332, |
| "rewards/margins": 17.53089141845703, |
| "rewards/rejected": -33.12653732299805, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_logits/chosen": -0.39150622487068176, |
| "eval_logits/rejected": -0.45033136010169983, |
| "eval_logps/chosen": -367.9861755371094, |
| "eval_logps/rejected": -349.3917541503906, |
| "eval_loss": -1.812597393989563, |
| "eval_rewards/accuracies": 0.8402500152587891, |
| "eval_rewards/chosen": -17.389860153198242, |
| "eval_rewards/margins": 20.433107376098633, |
| "eval_rewards/rejected": -37.822967529296875, |
| "eval_runtime": 2196.225, |
| "eval_samples_per_second": 1.821, |
| "eval_steps_per_second": 0.911, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8933333333333333, |
| "grad_norm": 46.65407943725586, |
| "learning_rate": 7.980251164924342e-06, |
| "logits/chosen": -0.36357760429382324, |
| "logits/rejected": -0.4234141409397125, |
| "logps/chosen": -398.599853515625, |
| "logps/rejected": -381.31634521484375, |
| "loss": -1.5236, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -18.2071475982666, |
| "rewards/margins": 20.315492630004883, |
| "rewards/rejected": -38.522640228271484, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.8977777777777778, |
| "grad_norm": 30.023534774780273, |
| "learning_rate": 7.9615057065105e-06, |
| "logits/chosen": -0.38881856203079224, |
| "logits/rejected": -0.441250741481781, |
| "logps/chosen": -356.4747619628906, |
| "logps/rejected": -353.72003173828125, |
| "loss": -2.1979, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -16.97440528869629, |
| "rewards/margins": 23.14920997619629, |
| "rewards/rejected": -40.123619079589844, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.9022222222222223, |
| "grad_norm": 37.5120735168457, |
| "learning_rate": 7.94269590651393e-06, |
| "logits/chosen": -0.40530771017074585, |
| "logits/rejected": -0.4659528136253357, |
| "logps/chosen": -374.06915283203125, |
| "logps/rejected": -339.68365478515625, |
| "loss": -1.3756, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -20.926109313964844, |
| "rewards/margins": 18.391803741455078, |
| "rewards/rejected": -39.31791305541992, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.9066666666666666, |
| "grad_norm": 32.38276672363281, |
| "learning_rate": 7.923822173595773e-06, |
| "logits/chosen": -0.42920392751693726, |
| "logits/rejected": -0.47345709800720215, |
| "logps/chosen": -371.0946350097656, |
| "logps/rejected": -351.13507080078125, |
| "loss": -1.1326, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -19.484638214111328, |
| "rewards/margins": 17.145843505859375, |
| "rewards/rejected": -36.6304817199707, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.9111111111111111, |
| "grad_norm": 27.41214942932129, |
| "learning_rate": 7.904884917806174e-06, |
| "logits/chosen": -0.4001992642879486, |
| "logits/rejected": -0.4714701175689697, |
| "logps/chosen": -381.10882568359375, |
| "logps/rejected": -369.2865905761719, |
| "loss": -1.7117, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -19.89790153503418, |
| "rewards/margins": 20.406051635742188, |
| "rewards/rejected": -40.303955078125, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.9155555555555556, |
| "grad_norm": 28.964096069335938, |
| "learning_rate": 7.885884550575376e-06, |
| "logits/chosen": -0.4225890636444092, |
| "logits/rejected": -0.49147137999534607, |
| "logps/chosen": -391.1488037109375, |
| "logps/rejected": -357.6933288574219, |
| "loss": -2.2471, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -20.362295150756836, |
| "rewards/margins": 22.882240295410156, |
| "rewards/rejected": -43.244537353515625, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 18.16975212097168, |
| "learning_rate": 7.866821484704777e-06, |
| "logits/chosen": -0.39086705446243286, |
| "logits/rejected": -0.4711666703224182, |
| "logps/chosen": -402.845703125, |
| "logps/rejected": -357.61785888671875, |
| "loss": -1.5986, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -21.25181007385254, |
| "rewards/margins": 19.634967803955078, |
| "rewards/rejected": -40.886775970458984, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.9244444444444444, |
| "grad_norm": 24.85601234436035, |
| "learning_rate": 7.847696134357967e-06, |
| "logits/chosen": -0.39659881591796875, |
| "logits/rejected": -0.4599393308162689, |
| "logps/chosen": -391.4700012207031, |
| "logps/rejected": -379.650634765625, |
| "loss": -2.0057, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -20.11394691467285, |
| "rewards/margins": 23.591705322265625, |
| "rewards/rejected": -43.70565414428711, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.9288888888888889, |
| "grad_norm": 18.584766387939453, |
| "learning_rate": 7.828508915051724e-06, |
| "logits/chosen": -0.406088650226593, |
| "logits/rejected": -0.46071720123291016, |
| "logps/chosen": -375.5560607910156, |
| "logps/rejected": -376.0016784667969, |
| "loss": -2.86, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -19.491628646850586, |
| "rewards/margins": 27.680038452148438, |
| "rewards/rejected": -47.171669006347656, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.9333333333333333, |
| "grad_norm": 28.448862075805664, |
| "learning_rate": 7.80926024364699e-06, |
| "logits/chosen": -0.42899399995803833, |
| "logits/rejected": -0.49288374185562134, |
| "logps/chosen": -403.46722412109375, |
| "logps/rejected": -376.3447570800781, |
| "loss": -1.8305, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -22.05727767944336, |
| "rewards/margins": 24.375003814697266, |
| "rewards/rejected": -46.43228530883789, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.9377777777777778, |
| "grad_norm": 68.84838104248047, |
| "learning_rate": 7.789950538339813e-06, |
| "logits/chosen": -0.4087978005409241, |
| "logits/rejected": -0.4491947591304779, |
| "logps/chosen": -401.5524597167969, |
| "logps/rejected": -392.5592956542969, |
| "loss": -2.0026, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -21.574420928955078, |
| "rewards/margins": 26.205398559570312, |
| "rewards/rejected": -47.779815673828125, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.9422222222222222, |
| "grad_norm": 45.94411087036133, |
| "learning_rate": 7.770580218652262e-06, |
| "logits/chosen": -0.43011608719825745, |
| "logits/rejected": -0.4682633876800537, |
| "logps/chosen": -372.8076171875, |
| "logps/rejected": -385.06195068359375, |
| "loss": -2.0194, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -22.3854923248291, |
| "rewards/margins": 27.099695205688477, |
| "rewards/rejected": -49.48518753051758, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.9466666666666667, |
| "grad_norm": 52.724761962890625, |
| "learning_rate": 7.751149705423313e-06, |
| "logits/chosen": -0.4273204207420349, |
| "logits/rejected": -0.48607999086380005, |
| "logps/chosen": -369.5546875, |
| "logps/rejected": -360.80035400390625, |
| "loss": -1.6839, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -22.321815490722656, |
| "rewards/margins": 21.776235580444336, |
| "rewards/rejected": -44.098045349121094, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.9511111111111111, |
| "grad_norm": 23.580408096313477, |
| "learning_rate": 7.731659420799704e-06, |
| "logits/chosen": -0.408935010433197, |
| "logits/rejected": -0.4614839553833008, |
| "logps/chosen": -400.769287109375, |
| "logps/rejected": -375.7509765625, |
| "loss": -1.2632, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -23.772607803344727, |
| "rewards/margins": 21.02273178100586, |
| "rewards/rejected": -44.79533767700195, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.9555555555555556, |
| "grad_norm": 26.15434455871582, |
| "learning_rate": 7.712109788226763e-06, |
| "logits/chosen": -0.4153992235660553, |
| "logits/rejected": -0.45802217721939087, |
| "logps/chosen": -383.772216796875, |
| "logps/rejected": -388.574462890625, |
| "loss": -2.3232, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -22.940959930419922, |
| "rewards/margins": 24.761186599731445, |
| "rewards/rejected": -47.702144622802734, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 51.878143310546875, |
| "learning_rate": 7.692501232439214e-06, |
| "logits/chosen": -0.4019390940666199, |
| "logits/rejected": -0.48944348096847534, |
| "logps/chosen": -374.1484680175781, |
| "logps/rejected": -386.2460021972656, |
| "loss": -2.357, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -21.894039154052734, |
| "rewards/margins": 26.68624496459961, |
| "rewards/rejected": -48.58028030395508, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.9644444444444444, |
| "grad_norm": 19.433937072753906, |
| "learning_rate": 7.672834179451943e-06, |
| "logits/chosen": -0.4297551214694977, |
| "logits/rejected": -0.4838961064815521, |
| "logps/chosen": -375.82952880859375, |
| "logps/rejected": -371.78631591796875, |
| "loss": -1.63, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -23.77305030822754, |
| "rewards/margins": 20.549041748046875, |
| "rewards/rejected": -44.32209014892578, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.9688888888888889, |
| "grad_norm": 31.28313636779785, |
| "learning_rate": 7.653109056550741e-06, |
| "logits/chosen": -0.434882253408432, |
| "logits/rejected": -0.5161997079849243, |
| "logps/chosen": -379.7670593261719, |
| "logps/rejected": -393.16558837890625, |
| "loss": -3.1634, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -23.529193878173828, |
| "rewards/margins": 31.59170913696289, |
| "rewards/rejected": -55.12090301513672, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.9733333333333334, |
| "grad_norm": 26.750364303588867, |
| "learning_rate": 7.633326292283028e-06, |
| "logits/chosen": -0.44604843854904175, |
| "logits/rejected": -0.5044312477111816, |
| "logps/chosen": -383.0677185058594, |
| "logps/rejected": -414.618408203125, |
| "loss": -3.4819, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -23.556535720825195, |
| "rewards/margins": 34.202369689941406, |
| "rewards/rejected": -57.75890350341797, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.9777777777777777, |
| "grad_norm": 25.017847061157227, |
| "learning_rate": 7.6134863164485395e-06, |
| "logits/chosen": -0.47239094972610474, |
| "logits/rejected": -0.5211464166641235, |
| "logps/chosen": -429.9480895996094, |
| "logps/rejected": -426.56463623046875, |
| "loss": -2.141, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -29.6816349029541, |
| "rewards/margins": 26.086597442626953, |
| "rewards/rejected": -55.76823043823242, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.9822222222222222, |
| "grad_norm": 26.209083557128906, |
| "learning_rate": 7.593589560089984e-06, |
| "logits/chosen": -0.46421319246292114, |
| "logits/rejected": -0.5433587431907654, |
| "logps/chosen": -405.5035400390625, |
| "logps/rejected": -399.19378662109375, |
| "loss": -2.5744, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -28.879892349243164, |
| "rewards/margins": 26.91481590270996, |
| "rewards/rejected": -55.794708251953125, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.9866666666666667, |
| "grad_norm": 67.32453918457031, |
| "learning_rate": 7.573636455483684e-06, |
| "logits/chosen": -0.4945516586303711, |
| "logits/rejected": -0.5519949197769165, |
| "logps/chosen": -412.07989501953125, |
| "logps/rejected": -467.892822265625, |
| "loss": -2.6987, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -31.901325225830078, |
| "rewards/margins": 39.842533111572266, |
| "rewards/rejected": -71.74385833740234, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.9911111111111112, |
| "grad_norm": 35.05012512207031, |
| "learning_rate": 7.553627436130183e-06, |
| "logits/chosen": -0.4611131548881531, |
| "logits/rejected": -0.5199744701385498, |
| "logps/chosen": -402.3938293457031, |
| "logps/rejected": -429.143798828125, |
| "loss": -2.9302, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -28.55450439453125, |
| "rewards/margins": 32.54138946533203, |
| "rewards/rejected": -61.09589767456055, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.9955555555555555, |
| "grad_norm": 39.04290771484375, |
| "learning_rate": 7.533562936744825e-06, |
| "logits/chosen": -0.4493132531642914, |
| "logits/rejected": -0.49818509817123413, |
| "logps/chosen": -429.8363342285156, |
| "logps/rejected": -478.5335388183594, |
| "loss": -4.5909, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -30.265472412109375, |
| "rewards/margins": 44.172183990478516, |
| "rewards/rejected": -74.43766021728516, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 105.05587768554688, |
| "learning_rate": 7.513443393248312e-06, |
| "logits/chosen": -0.44253572821617126, |
| "logits/rejected": -0.5035872459411621, |
| "logps/chosen": -416.77484130859375, |
| "logps/rejected": -463.5193786621094, |
| "loss": -2.9637, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -30.517858505249023, |
| "rewards/margins": 42.752220153808594, |
| "rewards/rejected": -73.27008056640625, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.0044444444444445, |
| "grad_norm": 143.1883544921875, |
| "learning_rate": 7.493269242757233e-06, |
| "logits/chosen": -0.4549011290073395, |
| "logits/rejected": -0.5123938918113708, |
| "logps/chosen": -414.7423400878906, |
| "logps/rejected": -436.71807861328125, |
| "loss": -1.5439, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -32.691585540771484, |
| "rewards/margins": 36.47550582885742, |
| "rewards/rejected": -69.1670913696289, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.008888888888889, |
| "grad_norm": 81.42022705078125, |
| "learning_rate": 7.473040923574567e-06, |
| "logits/chosen": -0.4221878945827484, |
| "logits/rejected": -0.48623982071876526, |
| "logps/chosen": -414.17059326171875, |
| "logps/rejected": -472.69464111328125, |
| "loss": -2.7306, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -31.74398422241211, |
| "rewards/margins": 41.37885284423828, |
| "rewards/rejected": -73.12284088134766, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.0133333333333334, |
| "grad_norm": 35.27888870239258, |
| "learning_rate": 7.4527588751801606e-06, |
| "logits/chosen": -0.4145434498786926, |
| "logits/rejected": -0.47934216260910034, |
| "logps/chosen": -442.28857421875, |
| "logps/rejected": -457.786865234375, |
| "loss": -3.4397, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -31.377155303955078, |
| "rewards/margins": 37.45344543457031, |
| "rewards/rejected": -68.83060455322266, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.0177777777777777, |
| "grad_norm": 33.032012939453125, |
| "learning_rate": 7.432423538221179e-06, |
| "logits/chosen": -0.4252205491065979, |
| "logits/rejected": -0.4945794641971588, |
| "logps/chosen": -399.1712951660156, |
| "logps/rejected": -493.27459716796875, |
| "loss": -5.4394, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -28.82522964477539, |
| "rewards/margins": 51.07146072387695, |
| "rewards/rejected": -79.89668273925781, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.0222222222222221, |
| "grad_norm": 38.360721588134766, |
| "learning_rate": 7.412035354502532e-06, |
| "logits/chosen": -0.43832993507385254, |
| "logits/rejected": -0.48460859060287476, |
| "logps/chosen": -388.6338806152344, |
| "logps/rejected": -462.28277587890625, |
| "loss": -5.0777, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": -29.647830963134766, |
| "rewards/margins": 45.525054931640625, |
| "rewards/rejected": -75.17288208007812, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.0266666666666666, |
| "grad_norm": 50.89879608154297, |
| "learning_rate": 7.391594766977277e-06, |
| "logits/chosen": -0.45310840010643005, |
| "logits/rejected": -0.5277084112167358, |
| "logps/chosen": -419.843017578125, |
| "logps/rejected": -458.0693359375, |
| "loss": -4.3999, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -30.20745849609375, |
| "rewards/margins": 42.24293518066406, |
| "rewards/rejected": -72.45039367675781, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.031111111111111, |
| "grad_norm": 51.82854080200195, |
| "learning_rate": 7.371102219736999e-06, |
| "logits/chosen": -0.4446256756782532, |
| "logits/rejected": -0.46639928221702576, |
| "logps/chosen": -419.08734130859375, |
| "logps/rejected": -515.2535400390625, |
| "loss": -5.4518, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -33.454734802246094, |
| "rewards/margins": 49.91343307495117, |
| "rewards/rejected": -83.36817932128906, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.0355555555555556, |
| "grad_norm": 51.885841369628906, |
| "learning_rate": 7.350558158002154e-06, |
| "logits/chosen": -0.4529612064361572, |
| "logits/rejected": -0.49919238686561584, |
| "logps/chosen": -415.342529296875, |
| "logps/rejected": -537.9119873046875, |
| "loss": -6.4479, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -35.03794860839844, |
| "rewards/margins": 59.22160720825195, |
| "rewards/rejected": -94.25955963134766, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 206.30380249023438, |
| "learning_rate": 7.329963028112399e-06, |
| "logits/chosen": -0.4479581415653229, |
| "logits/rejected": -0.48728424310684204, |
| "logps/chosen": -466.96136474609375, |
| "logps/rejected": -589.3536987304688, |
| "loss": -3.1816, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -49.58006286621094, |
| "rewards/margins": 63.28960418701172, |
| "rewards/rejected": -112.86966705322266, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.0444444444444445, |
| "grad_norm": 54.78931427001953, |
| "learning_rate": 7.3093172775169e-06, |
| "logits/chosen": -0.40078288316726685, |
| "logits/rejected": -0.4713813364505768, |
| "logps/chosen": -478.08856201171875, |
| "logps/rejected": -565.62255859375, |
| "loss": -5.2624, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -42.438636779785156, |
| "rewards/margins": 55.889015197753906, |
| "rewards/rejected": -98.32765197753906, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.048888888888889, |
| "grad_norm": 33.327232360839844, |
| "learning_rate": 7.288621354764605e-06, |
| "logits/chosen": -0.4297246038913727, |
| "logits/rejected": -0.46544164419174194, |
| "logps/chosen": -448.8904724121094, |
| "logps/rejected": -547.2132568359375, |
| "loss": -5.0996, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -39.51817321777344, |
| "rewards/margins": 55.78776931762695, |
| "rewards/rejected": -95.30594635009766, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.0533333333333332, |
| "grad_norm": 43.984230041503906, |
| "learning_rate": 7.2678757094945e-06, |
| "logits/chosen": -0.359012246131897, |
| "logits/rejected": -0.4188918173313141, |
| "logps/chosen": -435.9148864746094, |
| "logps/rejected": -524.6188354492188, |
| "loss": -5.3232, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -40.167320251464844, |
| "rewards/margins": 50.48529815673828, |
| "rewards/rejected": -90.65261840820312, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.0577777777777777, |
| "grad_norm": 43.805030822753906, |
| "learning_rate": 7.2470807924258435e-06, |
| "logits/chosen": -0.33509600162506104, |
| "logits/rejected": -0.43126893043518066, |
| "logps/chosen": -472.0062561035156, |
| "logps/rejected": -550.6618041992188, |
| "loss": -4.2012, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -43.6344108581543, |
| "rewards/margins": 54.643653869628906, |
| "rewards/rejected": -98.27806091308594, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.0622222222222222, |
| "grad_norm": 55.845645904541016, |
| "learning_rate": 7.226237055348369e-06, |
| "logits/chosen": -0.3667193651199341, |
| "logits/rejected": -0.411059707403183, |
| "logps/chosen": -445.1033630371094, |
| "logps/rejected": -565.6658325195312, |
| "loss": -5.7241, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -44.05376434326172, |
| "rewards/margins": 60.89931106567383, |
| "rewards/rejected": -104.95307922363281, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.0666666666666667, |
| "grad_norm": 52.21657943725586, |
| "learning_rate": 7.205344951112474e-06, |
| "logits/chosen": -0.34739190340042114, |
| "logits/rejected": -0.4000583291053772, |
| "logps/chosen": -476.917724609375, |
| "logps/rejected": -596.9302978515625, |
| "loss": -5.2471, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -45.8980827331543, |
| "rewards/margins": 66.90580749511719, |
| "rewards/rejected": -112.80389404296875, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.0711111111111111, |
| "grad_norm": 73.39852905273438, |
| "learning_rate": 7.184404933619377e-06, |
| "logits/chosen": -0.3436613082885742, |
| "logits/rejected": -0.430023193359375, |
| "logps/chosen": -481.1473083496094, |
| "logps/rejected": -586.77783203125, |
| "loss": -3.8824, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -49.4990234375, |
| "rewards/margins": 60.65178680419922, |
| "rewards/rejected": -110.15081787109375, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.0755555555555556, |
| "grad_norm": 235.89935302734375, |
| "learning_rate": 7.163417457811261e-06, |
| "logits/chosen": -0.335957795381546, |
| "logits/rejected": -0.4028739333152771, |
| "logps/chosen": -488.63800048828125, |
| "logps/rejected": -557.654052734375, |
| "loss": -4.055, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -52.452980041503906, |
| "rewards/margins": 50.59123229980469, |
| "rewards/rejected": -103.0442123413086, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 53.787052154541016, |
| "learning_rate": 7.142382979661386e-06, |
| "logits/chosen": -0.34447726607322693, |
| "logits/rejected": -0.40175333619117737, |
| "logps/chosen": -459.96429443359375, |
| "logps/rejected": -552.6686401367188, |
| "loss": -4.7845, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -45.343711853027344, |
| "rewards/margins": 51.044227600097656, |
| "rewards/rejected": -96.387939453125, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.0844444444444445, |
| "grad_norm": 86.77619934082031, |
| "learning_rate": 7.121301956164184e-06, |
| "logits/chosen": -0.29622939229011536, |
| "logits/rejected": -0.3380669057369232, |
| "logps/chosen": -496.318359375, |
| "logps/rejected": -672.7567138671875, |
| "loss": -5.4564, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -50.32990646362305, |
| "rewards/margins": 73.62047576904297, |
| "rewards/rejected": -123.95037841796875, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.0888888888888888, |
| "grad_norm": 92.10472869873047, |
| "learning_rate": 7.100174845325327e-06, |
| "logits/chosen": -0.2991761565208435, |
| "logits/rejected": -0.3294784724712372, |
| "logps/chosen": -509.4859313964844, |
| "logps/rejected": -744.8641357421875, |
| "loss": -9.63, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -58.88629150390625, |
| "rewards/margins": 92.65065002441406, |
| "rewards/rejected": -151.5369415283203, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.0933333333333333, |
| "grad_norm": 188.78851318359375, |
| "learning_rate": 7.0790021061517825e-06, |
| "logits/chosen": -0.2575603723526001, |
| "logits/rejected": -0.34444746375083923, |
| "logps/chosen": -523.0759887695312, |
| "logps/rejected": -661.7510375976562, |
| "loss": -3.843, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -56.90376663208008, |
| "rewards/margins": 74.44867706298828, |
| "rewards/rejected": -131.35244750976562, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.0977777777777777, |
| "grad_norm": 148.79954528808594, |
| "learning_rate": 7.057784198641835e-06, |
| "logits/chosen": -0.28366950154304504, |
| "logits/rejected": -0.3355741500854492, |
| "logps/chosen": -489.5838928222656, |
| "logps/rejected": -644.3504638671875, |
| "loss": -5.428, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -53.00238800048828, |
| "rewards/margins": 78.2137222290039, |
| "rewards/rejected": -131.2161102294922, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.1022222222222222, |
| "grad_norm": 160.3306884765625, |
| "learning_rate": 7.036521583775099e-06, |
| "logits/chosen": -0.258393794298172, |
| "logits/rejected": -0.2814292311668396, |
| "logps/chosen": -461.08599853515625, |
| "logps/rejected": -765.3035888671875, |
| "loss": -12.7607, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -53.90653610229492, |
| "rewards/margins": 112.190185546875, |
| "rewards/rejected": -166.0967254638672, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.1066666666666667, |
| "grad_norm": 242.3454132080078, |
| "learning_rate": 7.015214723502496e-06, |
| "logits/chosen": -0.22555121779441833, |
| "logits/rejected": -0.21216616034507751, |
| "logps/chosen": -536.2150268554688, |
| "logps/rejected": -937.1095581054688, |
| "loss": -15.0884, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -73.13267517089844, |
| "rewards/margins": 140.1719207763672, |
| "rewards/rejected": -213.30459594726562, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.1111111111111112, |
| "grad_norm": 492.08544921875, |
| "learning_rate": 6.993864080736221e-06, |
| "logits/chosen": -0.21394245326519012, |
| "logits/rejected": -0.2340272217988968, |
| "logps/chosen": -685.1243896484375, |
| "logps/rejected": -1000.0198974609375, |
| "loss": -4.4512, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -107.66426849365234, |
| "rewards/margins": 122.04512786865234, |
| "rewards/rejected": -229.7093963623047, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.1155555555555556, |
| "grad_norm": 1650.1431884765625, |
| "learning_rate": 6.972470119339692e-06, |
| "logits/chosen": -0.24737460911273956, |
| "logits/rejected": -0.2552156448364258, |
| "logps/chosen": -635.1632080078125, |
| "logps/rejected": -981.98583984375, |
| "loss": -1.7668, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -99.38597106933594, |
| "rewards/margins": 128.8037109375, |
| "rewards/rejected": -228.1896514892578, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 163.82701110839844, |
| "learning_rate": 6.9510333041174595e-06, |
| "logits/chosen": -0.18519486486911774, |
| "logits/rejected": -0.21833041310310364, |
| "logps/chosen": -644.9240112304688, |
| "logps/rejected": -994.1267700195312, |
| "loss": -3.269, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -101.34546661376953, |
| "rewards/margins": 130.02304077148438, |
| "rewards/rejected": -231.36849975585938, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.1244444444444444, |
| "grad_norm": 288.865478515625, |
| "learning_rate": 6.929554100805118e-06, |
| "logits/chosen": -0.15947946906089783, |
| "logits/rejected": -0.15981920063495636, |
| "logps/chosen": -641.01611328125, |
| "logps/rejected": -1218.5843505859375, |
| "loss": -18.7538, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -103.062255859375, |
| "rewards/margins": 199.00765991210938, |
| "rewards/rejected": -302.0699462890625, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.1288888888888888, |
| "grad_norm": 403.9992980957031, |
| "learning_rate": 6.908032976059184e-06, |
| "logits/chosen": -0.13905613124370575, |
| "logits/rejected": -0.11378375440835953, |
| "logps/chosen": -689.9586791992188, |
| "logps/rejected": -1391.485595703125, |
| "loss": -19.5417, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -115.4753189086914, |
| "rewards/margins": 238.10592651367188, |
| "rewards/rejected": -353.58123779296875, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.1333333333333333, |
| "grad_norm": 415.2798767089844, |
| "learning_rate": 6.886470397446958e-06, |
| "logits/chosen": -0.17582398653030396, |
| "logits/rejected": -0.15832173824310303, |
| "logps/chosen": -593.4633178710938, |
| "logps/rejected": -1337.550048828125, |
| "loss": -27.0786, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -84.53248596191406, |
| "rewards/margins": 245.6161651611328, |
| "rewards/rejected": -330.1486511230469, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.1377777777777778, |
| "grad_norm": 1071.2152099609375, |
| "learning_rate": 6.864866833436368e-06, |
| "logits/chosen": -0.14370083808898926, |
| "logits/rejected": -0.12866979837417603, |
| "logps/chosen": -737.2032470703125, |
| "logps/rejected": -1335.3382568359375, |
| "loss": -11.4925, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -133.48977661132812, |
| "rewards/margins": 196.9632568359375, |
| "rewards/rejected": -330.4530029296875, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.1422222222222222, |
| "grad_norm": 789.3484497070312, |
| "learning_rate": 6.843222753385785e-06, |
| "logits/chosen": -0.1296389400959015, |
| "logits/rejected": -0.10764478147029877, |
| "logps/chosen": -651.3052978515625, |
| "logps/rejected": -1448.587646484375, |
| "loss": -23.9463, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -99.7173080444336, |
| "rewards/margins": 265.5214538574219, |
| "rewards/rejected": -365.23876953125, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.1466666666666667, |
| "grad_norm": 1540.5205078125, |
| "learning_rate": 6.8215386275338335e-06, |
| "logits/chosen": -0.08001247048377991, |
| "logits/rejected": -0.09123299270868301, |
| "logps/chosen": -698.7423095703125, |
| "logps/rejected": -1478.103759765625, |
| "loss": -19.8387, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -116.57110595703125, |
| "rewards/margins": 258.63818359375, |
| "rewards/rejected": -375.20928955078125, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.1511111111111112, |
| "grad_norm": 767.0540771484375, |
| "learning_rate": 6.799814926989171e-06, |
| "logits/chosen": -0.13497574627399445, |
| "logits/rejected": -0.04107438400387764, |
| "logps/chosen": -796.4733276367188, |
| "logps/rejected": -1904.00390625, |
| "loss": -26.4269, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -151.79910278320312, |
| "rewards/margins": 352.4693908691406, |
| "rewards/rejected": -504.2684020996094, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.1555555555555554, |
| "grad_norm": 695.8399047851562, |
| "learning_rate": 6.778052123720252e-06, |
| "logits/chosen": -0.11790412664413452, |
| "logits/rejected": -0.14769446849822998, |
| "logps/chosen": -748.3878173828125, |
| "logps/rejected": -1471.530029296875, |
| "loss": -12.4276, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -133.96165466308594, |
| "rewards/margins": 242.260986328125, |
| "rewards/rejected": -376.22265625, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 973.673095703125, |
| "learning_rate": 6.756250690545079e-06, |
| "logits/chosen": -0.08396363258361816, |
| "logits/rejected": -0.05135069414973259, |
| "logps/chosen": -951.2482299804688, |
| "logps/rejected": -1985.371826171875, |
| "loss": -32.127, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -190.14053344726562, |
| "rewards/margins": 336.322998046875, |
| "rewards/rejected": -526.4635620117188, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.1644444444444444, |
| "grad_norm": 1283.2349853515625, |
| "learning_rate": 6.734411101120925e-06, |
| "logits/chosen": -0.06799821555614471, |
| "logits/rejected": -0.07330864667892456, |
| "logps/chosen": -873.49072265625, |
| "logps/rejected": -1809.4739990234375, |
| "loss": -2.5052, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -171.44715881347656, |
| "rewards/margins": 302.28948974609375, |
| "rewards/rejected": -473.7366638183594, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.1688888888888889, |
| "grad_norm": 780.3442993164062, |
| "learning_rate": 6.712533829934042e-06, |
| "logits/chosen": -0.043098777532577515, |
| "logits/rejected": -0.024469073861837387, |
| "logps/chosen": -749.4600830078125, |
| "logps/rejected": -2043.115966796875, |
| "loss": -40.6397, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -130.70285034179688, |
| "rewards/margins": 411.14263916015625, |
| "rewards/rejected": -541.8453979492188, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.1733333333333333, |
| "grad_norm": 1000.9935913085938, |
| "learning_rate": 6.690619352289359e-06, |
| "logits/chosen": -0.09932423382997513, |
| "logits/rejected": -0.11026652157306671, |
| "logps/chosen": -644.5574951171875, |
| "logps/rejected": -1428.832763671875, |
| "loss": -21.736, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -99.68866729736328, |
| "rewards/margins": 255.21646118164062, |
| "rewards/rejected": -354.9051513671875, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.1777777777777778, |
| "grad_norm": 2265.180908203125, |
| "learning_rate": 6.6686681443001485e-06, |
| "logits/chosen": -0.05305319279432297, |
| "logits/rejected": -0.033907536417245865, |
| "logps/chosen": -796.5277709960938, |
| "logps/rejected": -2201.7998046875, |
| "loss": -52.5458, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -136.5464630126953, |
| "rewards/margins": 452.1681213378906, |
| "rewards/rejected": -588.7145385742188, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.1822222222222223, |
| "grad_norm": 9328.162109375, |
| "learning_rate": 6.6466806828776865e-06, |
| "logits/chosen": -0.028526514768600464, |
| "logits/rejected": -0.03504006937146187, |
| "logps/chosen": -988.6253662109375, |
| "logps/rejected": -2289.334228515625, |
| "loss": -18.6908, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -200.4395751953125, |
| "rewards/margins": 416.51495361328125, |
| "rewards/rejected": -616.9544677734375, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.1866666666666668, |
| "grad_norm": 4739.24951171875, |
| "learning_rate": 6.62465744572089e-06, |
| "logits/chosen": -0.04077509418129921, |
| "logits/rejected": -0.00983515102416277, |
| "logps/chosen": -994.3863525390625, |
| "logps/rejected": -2518.66162109375, |
| "loss": -20.8872, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -204.39039611816406, |
| "rewards/margins": 485.6438903808594, |
| "rewards/rejected": -690.0343017578125, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.1911111111111112, |
| "grad_norm": 1538.55615234375, |
| "learning_rate": 6.602598911305938e-06, |
| "logits/chosen": -0.06173365190625191, |
| "logits/rejected": -0.006297842599451542, |
| "logps/chosen": -734.0841064453125, |
| "logps/rejected": -2258.87353515625, |
| "loss": -43.0722, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -131.23867797851562, |
| "rewards/margins": 476.96844482421875, |
| "rewards/rejected": -608.2071533203125, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.1955555555555555, |
| "grad_norm": 587.7572021484375, |
| "learning_rate": 6.580505558875878e-06, |
| "logits/chosen": -0.04299772530794144, |
| "logits/rejected": -0.048394013196229935, |
| "logps/chosen": -753.9576416015625, |
| "logps/rejected": -2047.7181396484375, |
| "loss": -27.9425, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -135.0194091796875, |
| "rewards/margins": 414.18634033203125, |
| "rewards/rejected": -549.2056884765625, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 451.1556701660156, |
| "learning_rate": 6.558377868430211e-06, |
| "logits/chosen": -0.04246233031153679, |
| "logits/rejected": -0.043165404349565506, |
| "logps/chosen": -545.9451904296875, |
| "logps/rejected": -1733.472900390625, |
| "loss": -42.4667, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -69.52648162841797, |
| "rewards/margins": 382.27105712890625, |
| "rewards/rejected": -451.79754638671875, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.2044444444444444, |
| "grad_norm": 923.8303833007812, |
| "learning_rate": 6.536216320714466e-06, |
| "logits/chosen": -0.013859344646334648, |
| "logits/rejected": -0.03816484287381172, |
| "logps/chosen": -593.7230224609375, |
| "logps/rejected": -1762.0712890625, |
| "loss": -41.333, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -87.8678207397461, |
| "rewards/margins": 374.93817138671875, |
| "rewards/rejected": -462.8059997558594, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.208888888888889, |
| "grad_norm": 1663.5784912109375, |
| "learning_rate": 6.514021397209751e-06, |
| "logits/chosen": 0.014377089217305183, |
| "logits/rejected": -0.030507531017065048, |
| "logps/chosen": -894.8898315429688, |
| "logps/rejected": -2128.688720703125, |
| "loss": -22.929, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -165.6055450439453, |
| "rewards/margins": 399.2110900878906, |
| "rewards/rejected": -564.816650390625, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.2133333333333334, |
| "grad_norm": 584.5473022460938, |
| "learning_rate": 6.491793580122301e-06, |
| "logits/chosen": 0.002529005752876401, |
| "logits/rejected": 0.0013079143827781081, |
| "logps/chosen": -935.2101440429688, |
| "logps/rejected": -2530.46533203125, |
| "loss": -29.8881, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -185.2637481689453, |
| "rewards/margins": 504.73175048828125, |
| "rewards/rejected": -689.9954833984375, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.2177777777777778, |
| "grad_norm": 790.5787963867188, |
| "learning_rate": 6.46953335237299e-06, |
| "logits/chosen": -0.02872173860669136, |
| "logits/rejected": -0.0967845544219017, |
| "logps/chosen": -736.6044921875, |
| "logps/rejected": -1947.3765869140625, |
| "loss": -28.6821, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -127.88818359375, |
| "rewards/margins": 389.64739990234375, |
| "rewards/rejected": -517.5355834960938, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.2222222222222223, |
| "grad_norm": 565.301513671875, |
| "learning_rate": 6.447241197586847e-06, |
| "logits/chosen": -0.005647065117955208, |
| "logits/rejected": -0.013025308027863503, |
| "logps/chosen": -920.2081298828125, |
| "logps/rejected": -2415.46826171875, |
| "loss": -44.2469, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -190.19842529296875, |
| "rewards/margins": 470.56903076171875, |
| "rewards/rejected": -660.7674560546875, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.2266666666666666, |
| "grad_norm": 1703.7418212890625, |
| "learning_rate": 6.424917600082552e-06, |
| "logits/chosen": -0.04485129565000534, |
| "logits/rejected": -0.01103221159428358, |
| "logps/chosen": -715.8340454101562, |
| "logps/rejected": -2420.53173828125, |
| "loss": -63.7882, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -120.95291900634766, |
| "rewards/margins": 534.1937255859375, |
| "rewards/rejected": -655.1466674804688, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.231111111111111, |
| "grad_norm": 551.6255493164062, |
| "learning_rate": 6.402563044861899e-06, |
| "logits/chosen": 0.053200650960206985, |
| "logits/rejected": -0.008127940818667412, |
| "logps/chosen": -1044.637451171875, |
| "logps/rejected": -2480.965087890625, |
| "loss": -13.3416, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -219.8478240966797, |
| "rewards/margins": 458.9497985839844, |
| "rewards/rejected": -678.797607421875, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.2355555555555555, |
| "grad_norm": 1526.0687255859375, |
| "learning_rate": 6.380178017599276e-06, |
| "logits/chosen": 0.024942180141806602, |
| "logits/rejected": 0.03292980045080185, |
| "logps/chosen": -1266.3096923828125, |
| "logps/rejected": -3159.16064453125, |
| "loss": -19.9606, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -285.90057373046875, |
| "rewards/margins": 595.1817016601562, |
| "rewards/rejected": -881.0822143554688, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 861.3545532226562, |
| "learning_rate": 6.357763004631104e-06, |
| "logits/chosen": 0.038592465221881866, |
| "logits/rejected": -0.04844246804714203, |
| "logps/chosen": -1352.316650390625, |
| "logps/rejected": -2715.897705078125, |
| "loss": 27.6993, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -313.6338195800781, |
| "rewards/margins": 437.67071533203125, |
| "rewards/rejected": -751.3045654296875, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.2444444444444445, |
| "grad_norm": 4720.46435546875, |
| "learning_rate": 6.335318492945271e-06, |
| "logits/chosen": 0.06307001411914825, |
| "logits/rejected": 0.050534725189208984, |
| "logps/chosen": -1334.775390625, |
| "logps/rejected": -2528.11083984375, |
| "loss": 41.9707, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -308.42840576171875, |
| "rewards/margins": 382.95184326171875, |
| "rewards/rejected": -691.3802490234375, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.248888888888889, |
| "grad_norm": 4182.30126953125, |
| "learning_rate": 6.312844970170551e-06, |
| "logits/chosen": 0.06979052722454071, |
| "logits/rejected": 0.05389819294214249, |
| "logps/chosen": -848.5364379882812, |
| "logps/rejected": -2794.65234375, |
| "loss": -48.1904, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -164.25741577148438, |
| "rewards/margins": 609.0851440429688, |
| "rewards/rejected": -773.3425903320312, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.2533333333333334, |
| "grad_norm": 634.9495239257812, |
| "learning_rate": 6.29034292456602e-06, |
| "logits/chosen": 0.097917839884758, |
| "logits/rejected": 0.09180790185928345, |
| "logps/chosen": -1049.5567626953125, |
| "logps/rejected": -3027.710693359375, |
| "loss": -14.7226, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -223.29800415039062, |
| "rewards/margins": 616.1754760742188, |
| "rewards/rejected": -839.4735107421875, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.2577777777777777, |
| "grad_norm": 1667.810791015625, |
| "learning_rate": 6.267812845010431e-06, |
| "logits/chosen": 0.05149533227086067, |
| "logits/rejected": 0.01714668609201908, |
| "logps/chosen": -576.4118041992188, |
| "logps/rejected": -2054.046630859375, |
| "loss": -52.7336, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -82.24992370605469, |
| "rewards/margins": 466.6830139160156, |
| "rewards/rejected": -548.9329223632812, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.2622222222222224, |
| "grad_norm": 149.2144317626953, |
| "learning_rate": 6.245255220991606e-06, |
| "logits/chosen": 0.027654284611344337, |
| "logits/rejected": -0.004665301647037268, |
| "logps/chosen": -588.321533203125, |
| "logps/rejected": -1803.2874755859375, |
| "loss": -47.1506, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -86.83482360839844, |
| "rewards/margins": 386.0186462402344, |
| "rewards/rejected": -472.853515625, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.2666666666666666, |
| "grad_norm": 860.606689453125, |
| "learning_rate": 6.2226705425958e-06, |
| "logits/chosen": 0.010288884863257408, |
| "logits/rejected": 0.01715211756527424, |
| "logps/chosen": -876.3726806640625, |
| "logps/rejected": -3463.96728515625, |
| "loss": -97.3343, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -168.2960205078125, |
| "rewards/margins": 805.5137939453125, |
| "rewards/rejected": -973.8098754882812, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.271111111111111, |
| "grad_norm": 1589.0054931640625, |
| "learning_rate": 6.200059300497045e-06, |
| "logits/chosen": 0.06689944118261337, |
| "logits/rejected": 0.15388646721839905, |
| "logps/chosen": -1188.5296630859375, |
| "logps/rejected": -5079.46044921875, |
| "loss": -138.2135, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -261.08453369140625, |
| "rewards/margins": 1197.956298828125, |
| "rewards/rejected": -1459.0408935546875, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.2755555555555556, |
| "grad_norm": 2617.210205078125, |
| "learning_rate": 6.177421985946499e-06, |
| "logits/chosen": 0.04427279904484749, |
| "logits/rejected": 0.08278901129961014, |
| "logps/chosen": -1090.3377685546875, |
| "logps/rejected": -4322.57958984375, |
| "loss": -93.149, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -228.5421600341797, |
| "rewards/margins": 997.9312744140625, |
| "rewards/rejected": -1226.473388671875, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 1109.904296875, |
| "learning_rate": 6.1547590907617685e-06, |
| "logits/chosen": 0.08895837515592575, |
| "logits/rejected": 0.11301213502883911, |
| "logps/chosen": -742.1561889648438, |
| "logps/rejected": -3306.692138671875, |
| "loss": -84.8328, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -130.0913543701172, |
| "rewards/margins": 794.3884887695312, |
| "rewards/rejected": -924.4798583984375, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.2844444444444445, |
| "grad_norm": 2034.2410888671875, |
| "learning_rate": 6.132071107316221e-06, |
| "logits/chosen": 0.05466142296791077, |
| "logits/rejected": 0.020903872326016426, |
| "logps/chosen": -794.6520385742188, |
| "logps/rejected": -2645.4228515625, |
| "loss": -50.6485, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -143.0889129638672, |
| "rewards/margins": 582.6082153320312, |
| "rewards/rejected": -725.6971435546875, |
| "step": 1445 |
| }, |
| { |
| "epoch": 1.2888888888888888, |
| "grad_norm": 3161.018310546875, |
| "learning_rate": 6.109358528528296e-06, |
| "logits/chosen": 0.09831374883651733, |
| "logits/rejected": 0.045040689408779144, |
| "logps/chosen": -898.0750732421875, |
| "logps/rejected": -2734.27490234375, |
| "loss": -50.0723, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -176.62228393554688, |
| "rewards/margins": 578.7171020507812, |
| "rewards/rejected": -755.3394165039062, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.2933333333333334, |
| "grad_norm": 4272.8037109375, |
| "learning_rate": 6.0866218478507875e-06, |
| "logits/chosen": 0.034332215785980225, |
| "logits/rejected": -0.030711542814970016, |
| "logps/chosen": -887.9432373046875, |
| "logps/rejected": -2781.74609375, |
| "loss": -67.6966, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -168.97879028320312, |
| "rewards/margins": 603.0858154296875, |
| "rewards/rejected": -772.0645751953125, |
| "step": 1455 |
| }, |
| { |
| "epoch": 1.2977777777777777, |
| "grad_norm": 2452.99609375, |
| "learning_rate": 6.063861559260127e-06, |
| "logits/chosen": 0.0846308022737503, |
| "logits/rejected": -0.018889425322413445, |
| "logps/chosen": -1020.7131958007812, |
| "logps/rejected": -2657.43115234375, |
| "loss": -18.0709, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -213.4755859375, |
| "rewards/margins": 520.6746215820312, |
| "rewards/rejected": -734.150146484375, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.3022222222222222, |
| "grad_norm": 212.89353942871094, |
| "learning_rate": 6.041078157245649e-06, |
| "logits/chosen": 0.09322404861450195, |
| "logits/rejected": 0.08232339471578598, |
| "logps/chosen": -1002.1947021484375, |
| "logps/rejected": -3723.547607421875, |
| "loss": -68.3427, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -206.69680786132812, |
| "rewards/margins": 846.6780395507812, |
| "rewards/rejected": -1053.374755859375, |
| "step": 1465 |
| }, |
| { |
| "epoch": 1.3066666666666666, |
| "grad_norm": 2106.11083984375, |
| "learning_rate": 6.018272136798854e-06, |
| "logits/chosen": 0.08394975960254669, |
| "logits/rejected": 0.03536719083786011, |
| "logps/chosen": -936.6207885742188, |
| "logps/rejected": -3006.883056640625, |
| "loss": -47.9597, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -179.7518310546875, |
| "rewards/margins": 658.16796875, |
| "rewards/rejected": -837.9197998046875, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.3111111111111111, |
| "grad_norm": 187.57144165039062, |
| "learning_rate": 5.995443993402647e-06, |
| "logits/chosen": 0.08657495677471161, |
| "logits/rejected": 0.09708672016859055, |
| "logps/chosen": -720.3692626953125, |
| "logps/rejected": -3342.55029296875, |
| "loss": -75.119, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -126.44435119628906, |
| "rewards/margins": 811.7178955078125, |
| "rewards/rejected": -938.1622314453125, |
| "step": 1475 |
| }, |
| { |
| "epoch": 1.3155555555555556, |
| "grad_norm": 918.5309448242188, |
| "learning_rate": 5.972594223020575e-06, |
| "logits/chosen": 0.1325269639492035, |
| "logits/rejected": 0.12702801823616028, |
| "logps/chosen": -996.6691284179688, |
| "logps/rejected": -3892.973876953125, |
| "loss": -66.2872, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -203.91000366210938, |
| "rewards/margins": 897.1043701171875, |
| "rewards/rejected": -1101.014404296875, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 3301.5615234375, |
| "learning_rate": 5.949723322086053e-06, |
| "logits/chosen": 0.11541260778903961, |
| "logits/rejected": 0.024793455377221107, |
| "logps/chosen": -983.22021484375, |
| "logps/rejected": -3393.292236328125, |
| "loss": -70.6341, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -196.8666534423828, |
| "rewards/margins": 754.6265258789062, |
| "rewards/rejected": -951.4931640625, |
| "step": 1485 |
| }, |
| { |
| "epoch": 1.3244444444444445, |
| "grad_norm": 3240.269287109375, |
| "learning_rate": 5.926831787491577e-06, |
| "logits/chosen": 0.09748705476522446, |
| "logits/rejected": 0.07405810058116913, |
| "logps/chosen": -1009.3624877929688, |
| "logps/rejected": -3883.693359375, |
| "loss": -72.7366, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -210.13650512695312, |
| "rewards/margins": 885.2335205078125, |
| "rewards/rejected": -1095.369873046875, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.3288888888888888, |
| "grad_norm": 1777.92333984375, |
| "learning_rate": 5.9039201165779315e-06, |
| "logits/chosen": 0.15679362416267395, |
| "logits/rejected": 0.06116216257214546, |
| "logps/chosen": -935.0094604492188, |
| "logps/rejected": -3276.86474609375, |
| "loss": -13.9424, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": -190.66973876953125, |
| "rewards/margins": 728.3402099609375, |
| "rewards/rejected": -919.0099487304688, |
| "step": 1495 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "grad_norm": 2631.911376953125, |
| "learning_rate": 5.880988807123379e-06, |
| "logits/chosen": 0.13361360132694244, |
| "logits/rejected": 0.11118870973587036, |
| "logps/chosen": -867.6259765625, |
| "logps/rejected": -4221.30810546875, |
| "loss": -116.1614, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -168.588623046875, |
| "rewards/margins": 1031.815185546875, |
| "rewards/rejected": -1200.4039306640625, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3333333333333333, |
| "eval_logits/chosen": 0.1006997674703598, |
| "eval_logits/rejected": 0.056181661784648895, |
| "eval_logps/chosen": -774.4694213867188, |
| "eval_logps/rejected": -3444.830078125, |
| "eval_loss": -85.59445190429688, |
| "eval_rewards/accuracies": 0.7942500114440918, |
| "eval_rewards/chosen": -139.33482360839844, |
| "eval_rewards/margins": 827.11962890625, |
| "eval_rewards/rejected": -966.4544677734375, |
| "eval_runtime": 2193.4235, |
| "eval_samples_per_second": 1.824, |
| "eval_steps_per_second": 0.912, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.3377777777777777, |
| "grad_norm": 955.7211303710938, |
| "learning_rate": 5.858038357332851e-06, |
| "logits/chosen": 0.10563405603170395, |
| "logits/rejected": 0.04411952942609787, |
| "logps/chosen": -717.7381591796875, |
| "logps/rejected": -2960.67724609375, |
| "loss": -68.156, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -117.87982177734375, |
| "rewards/margins": 701.3028564453125, |
| "rewards/rejected": -819.1826782226562, |
| "step": 1505 |
| }, |
| { |
| "epoch": 1.3422222222222222, |
| "grad_norm": 3122.2939453125, |
| "learning_rate": 5.835069265827119e-06, |
| "logits/chosen": 0.0719769075512886, |
| "logits/rejected": 0.030872393399477005, |
| "logps/chosen": -808.4874877929688, |
| "logps/rejected": -3150.03173828125, |
| "loss": -67.1917, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -145.69776916503906, |
| "rewards/margins": 730.8069458007812, |
| "rewards/rejected": -876.5046997070312, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.3466666666666667, |
| "grad_norm": 1660.158935546875, |
| "learning_rate": 5.812082031631966e-06, |
| "logits/chosen": 0.12388893216848373, |
| "logits/rejected": 0.04534872621297836, |
| "logps/chosen": -628.2339477539062, |
| "logps/rejected": -3061.64697265625, |
| "loss": -91.6737, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -90.30569458007812, |
| "rewards/margins": 763.7083129882812, |
| "rewards/rejected": -854.0139770507812, |
| "step": 1515 |
| }, |
| { |
| "epoch": 1.3511111111111112, |
| "grad_norm": 208.55455017089844, |
| "learning_rate": 5.789077154167342e-06, |
| "logits/chosen": 0.08742909133434296, |
| "logits/rejected": 0.057026900351047516, |
| "logps/chosen": -752.9409790039062, |
| "logps/rejected": -2982.9560546875, |
| "loss": -85.7046, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -132.0449981689453, |
| "rewards/margins": 694.6397705078125, |
| "rewards/rejected": -826.6846923828125, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.3555555555555556, |
| "grad_norm": 196.6206817626953, |
| "learning_rate": 5.766055133236513e-06, |
| "logits/chosen": 0.12026973813772202, |
| "logits/rejected": 0.054700933396816254, |
| "logps/chosen": -761.4303588867188, |
| "logps/rejected": -3311.610595703125, |
| "loss": -89.4418, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -136.79627990722656, |
| "rewards/margins": 790.1121215820312, |
| "rewards/rejected": -926.9083862304688, |
| "step": 1525 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 3600.595947265625, |
| "learning_rate": 5.7430164690152045e-06, |
| "logits/chosen": 0.0973397046327591, |
| "logits/rejected": -0.010263195261359215, |
| "logps/chosen": -1011.42236328125, |
| "logps/rejected": -3372.37744140625, |
| "loss": -77.1505, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -206.7822265625, |
| "rewards/margins": 737.5157470703125, |
| "rewards/rejected": -944.2979736328125, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.3644444444444446, |
| "grad_norm": 2392.074462890625, |
| "learning_rate": 5.7199616620407325e-06, |
| "logits/chosen": 0.2140587866306305, |
| "logits/rejected": 0.13287585973739624, |
| "logps/chosen": -1339.725341796875, |
| "logps/rejected": -4591.2490234375, |
| "loss": -72.0547, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -304.84576416015625, |
| "rewards/margins": 1005.3800048828125, |
| "rewards/rejected": -1310.225830078125, |
| "step": 1535 |
| }, |
| { |
| "epoch": 1.3688888888888888, |
| "grad_norm": 1979.218017578125, |
| "learning_rate": 5.696891213201134e-06, |
| "logits/chosen": 0.11510731279850006, |
| "logits/rejected": 0.04925750941038132, |
| "logps/chosen": -711.453369140625, |
| "logps/rejected": -3214.048828125, |
| "loss": -65.2588, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -123.6042251586914, |
| "rewards/margins": 775.2196044921875, |
| "rewards/rejected": -898.8238525390625, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.3733333333333333, |
| "grad_norm": 1114.5130615234375, |
| "learning_rate": 5.673805623724272e-06, |
| "logits/chosen": 0.09844042360782623, |
| "logits/rejected": 0.06581716239452362, |
| "logps/chosen": -904.0095825195312, |
| "logps/rejected": -3654.065673828125, |
| "loss": -103.2758, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -177.9736785888672, |
| "rewards/margins": 850.4739379882812, |
| "rewards/rejected": -1028.447509765625, |
| "step": 1545 |
| }, |
| { |
| "epoch": 1.3777777777777778, |
| "grad_norm": 2146.8916015625, |
| "learning_rate": 5.650705395166965e-06, |
| "logits/chosen": 0.1780976802110672, |
| "logits/rejected": 0.11291356384754181, |
| "logps/chosen": -1122.606201171875, |
| "logps/rejected": -3680.157470703125, |
| "loss": -52.3573, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -240.822509765625, |
| "rewards/margins": 790.123291015625, |
| "rewards/rejected": -1030.94580078125, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.3822222222222222, |
| "grad_norm": 2417.669677734375, |
| "learning_rate": 5.627591029404072e-06, |
| "logits/chosen": 0.12985600531101227, |
| "logits/rejected": 0.0674312636256218, |
| "logps/chosen": -911.4212036132812, |
| "logps/rejected": -2673.277099609375, |
| "loss": -32.7178, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -182.8104248046875, |
| "rewards/margins": 550.80224609375, |
| "rewards/rejected": -733.6126708984375, |
| "step": 1555 |
| }, |
| { |
| "epoch": 1.3866666666666667, |
| "grad_norm": 874.9458618164062, |
| "learning_rate": 5.604463028617598e-06, |
| "logits/chosen": 0.14844806492328644, |
| "logits/rejected": 0.15728525817394257, |
| "logps/chosen": -931.70947265625, |
| "logps/rejected": -5559.9111328125, |
| "loss": -162.2626, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -186.3575897216797, |
| "rewards/margins": 1415.8370361328125, |
| "rewards/rejected": -1602.194580078125, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.3911111111111112, |
| "grad_norm": 1034.4825439453125, |
| "learning_rate": 5.581321895285787e-06, |
| "logits/chosen": 0.18663232028484344, |
| "logits/rejected": 0.07420190423727036, |
| "logps/chosen": -999.5822143554688, |
| "logps/rejected": -4148.65234375, |
| "loss": -63.8503, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -205.96835327148438, |
| "rewards/margins": 971.0234375, |
| "rewards/rejected": -1176.9918212890625, |
| "step": 1565 |
| }, |
| { |
| "epoch": 1.3955555555555557, |
| "grad_norm": 5635.2412109375, |
| "learning_rate": 5.558168132172195e-06, |
| "logits/chosen": 0.13406811654567719, |
| "logits/rejected": 0.014459284953773022, |
| "logps/chosen": -930.2107543945312, |
| "logps/rejected": -2864.10986328125, |
| "loss": -40.1761, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -183.5532989501953, |
| "rewards/margins": 603.8217163085938, |
| "rewards/rejected": -787.3750610351562, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 752.8253173828125, |
| "learning_rate": 5.535002242314772e-06, |
| "logits/chosen": 0.1415996253490448, |
| "logits/rejected": 0.05800303816795349, |
| "logps/chosen": -1115.41796875, |
| "logps/rejected": -4055.927001953125, |
| "loss": -56.2453, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -241.4073028564453, |
| "rewards/margins": 909.2194213867188, |
| "rewards/rejected": -1150.626708984375, |
| "step": 1575 |
| }, |
| { |
| "epoch": 1.4044444444444444, |
| "grad_norm": 204.28115844726562, |
| "learning_rate": 5.511824729014936e-06, |
| "logits/chosen": 0.150762677192688, |
| "logits/rejected": 0.14514730870723724, |
| "logps/chosen": -741.7395629882812, |
| "logps/rejected": -4206.9638671875, |
| "loss": -117.9318, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -126.77288818359375, |
| "rewards/margins": 1062.399169921875, |
| "rewards/rejected": -1189.172119140625, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.4088888888888889, |
| "grad_norm": 6281.19091796875, |
| "learning_rate": 5.488636095826636e-06, |
| "logits/chosen": 0.15948662161827087, |
| "logits/rejected": 0.0816819816827774, |
| "logps/chosen": -1141.554931640625, |
| "logps/rejected": -4589.34765625, |
| "loss": -115.1681, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -243.4787139892578, |
| "rewards/margins": 1065.435302734375, |
| "rewards/rejected": -1308.9140625, |
| "step": 1585 |
| }, |
| { |
| "epoch": 1.4133333333333333, |
| "grad_norm": 600.7849731445312, |
| "learning_rate": 5.465436846545407e-06, |
| "logits/chosen": 0.1765061318874359, |
| "logits/rejected": 0.030613476410508156, |
| "logps/chosen": -1068.354736328125, |
| "logps/rejected": -3554.58154296875, |
| "loss": -44.7145, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -224.12246704101562, |
| "rewards/margins": 772.4940185546875, |
| "rewards/rejected": -996.6165771484375, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.4177777777777778, |
| "grad_norm": 657.72021484375, |
| "learning_rate": 5.4422274851974356e-06, |
| "logits/chosen": 0.18472027778625488, |
| "logits/rejected": 0.11286415904760361, |
| "logps/chosen": -865.9571533203125, |
| "logps/rejected": -4187.04052734375, |
| "loss": -97.4628, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -167.65736389160156, |
| "rewards/margins": 1020.7579956054688, |
| "rewards/rejected": -1188.4154052734375, |
| "step": 1595 |
| }, |
| { |
| "epoch": 1.4222222222222223, |
| "grad_norm": 668.9474487304688, |
| "learning_rate": 5.419008516028597e-06, |
| "logits/chosen": 0.1573035717010498, |
| "logits/rejected": 0.04901648312807083, |
| "logps/chosen": -925.8907470703125, |
| "logps/rejected": -3878.40380859375, |
| "loss": -61.9469, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -186.1890869140625, |
| "rewards/margins": 912.0675048828125, |
| "rewards/rejected": -1098.256591796875, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.4266666666666667, |
| "grad_norm": 3902.2021484375, |
| "learning_rate": 5.395780443493508e-06, |
| "logits/chosen": 0.16088075935840607, |
| "logits/rejected": 0.10695306956768036, |
| "logps/chosen": -1087.218994140625, |
| "logps/rejected": -4154.6220703125, |
| "loss": -104.1147, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -228.36355590820312, |
| "rewards/margins": 948.61474609375, |
| "rewards/rejected": -1176.978271484375, |
| "step": 1605 |
| }, |
| { |
| "epoch": 1.431111111111111, |
| "grad_norm": 2289.0302734375, |
| "learning_rate": 5.372543772244566e-06, |
| "logits/chosen": 0.17162616550922394, |
| "logits/rejected": 0.07870938628911972, |
| "logps/chosen": -1285.9188232421875, |
| "logps/rejected": -4683.6875, |
| "loss": -128.7691, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -292.28375244140625, |
| "rewards/margins": 1045.0806884765625, |
| "rewards/rejected": -1337.3646240234375, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.4355555555555555, |
| "grad_norm": 3771.407470703125, |
| "learning_rate": 5.34929900712098e-06, |
| "logits/chosen": 0.1444883942604065, |
| "logits/rejected": 0.07830196619033813, |
| "logps/chosen": -1254.024169921875, |
| "logps/rejected": -5026.85693359375, |
| "loss": -123.4905, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -284.7939147949219, |
| "rewards/margins": 1156.435302734375, |
| "rewards/rejected": -1441.229248046875, |
| "step": 1615 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 4702.58544921875, |
| "learning_rate": 5.326046653137811e-06, |
| "logits/chosen": 0.13368460536003113, |
| "logits/rejected": 0.12430386245250702, |
| "logps/chosen": -1176.692138671875, |
| "logps/rejected": -5584.7294921875, |
| "loss": -166.6501, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -262.0100402832031, |
| "rewards/margins": 1345.173583984375, |
| "rewards/rejected": -1607.18359375, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.4444444444444444, |
| "grad_norm": 569.5634155273438, |
| "learning_rate": 5.302787215474992e-06, |
| "logits/chosen": 0.15046411752700806, |
| "logits/rejected": 0.104924775660038, |
| "logps/chosen": -986.0426635742188, |
| "logps/rejected": -5192.7490234375, |
| "loss": -136.9891, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -200.11141967773438, |
| "rewards/margins": 1287.0059814453125, |
| "rewards/rejected": -1487.117431640625, |
| "step": 1625 |
| }, |
| { |
| "epoch": 1.448888888888889, |
| "grad_norm": 27159.068359375, |
| "learning_rate": 5.279521199466356e-06, |
| "logits/chosen": 0.1597212851047516, |
| "logits/rejected": 0.0540970079600811, |
| "logps/chosen": -1303.693115234375, |
| "logps/rejected": -3794.32958984375, |
| "loss": -35.0208, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -298.74774169921875, |
| "rewards/margins": 771.4027709960938, |
| "rewards/rejected": -1070.150634765625, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.4533333333333334, |
| "grad_norm": 2882.916015625, |
| "learning_rate": 5.256249110588659e-06, |
| "logits/chosen": 0.18333426117897034, |
| "logits/rejected": 0.11001193523406982, |
| "logps/chosen": -938.8673706054688, |
| "logps/rejected": -4647.5478515625, |
| "loss": -113.7993, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -190.62075805664062, |
| "rewards/margins": 1137.749267578125, |
| "rewards/rejected": -1328.3699951171875, |
| "step": 1635 |
| }, |
| { |
| "epoch": 1.4577777777777778, |
| "grad_norm": 1122.654052734375, |
| "learning_rate": 5.232971454450595e-06, |
| "logits/chosen": 0.18779829144477844, |
| "logits/rejected": 0.10752624273300171, |
| "logps/chosen": -1198.038330078125, |
| "logps/rejected": -4063.475341796875, |
| "loss": -76.5747, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -266.9295959472656, |
| "rewards/margins": 885.2711181640625, |
| "rewards/rejected": -1152.20068359375, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.462222222222222, |
| "grad_norm": 26724.4765625, |
| "learning_rate": 5.209688736781811e-06, |
| "logits/chosen": 0.2143486738204956, |
| "logits/rejected": 0.08180849254131317, |
| "logps/chosen": -982.2023315429688, |
| "logps/rejected": -5008.22705078125, |
| "loss": -132.0043, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -193.89479064941406, |
| "rewards/margins": 1239.5858154296875, |
| "rewards/rejected": -1433.480712890625, |
| "step": 1645 |
| }, |
| { |
| "epoch": 1.4666666666666668, |
| "grad_norm": 871.9409790039062, |
| "learning_rate": 5.1864014634219214e-06, |
| "logits/chosen": 0.19577138125896454, |
| "logits/rejected": 0.07801645994186401, |
| "logps/chosen": -1281.193115234375, |
| "logps/rejected": -5583.1650390625, |
| "loss": -93.5913, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -290.54290771484375, |
| "rewards/margins": 1318.56298828125, |
| "rewards/rejected": -1609.1058349609375, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.471111111111111, |
| "grad_norm": 633.2246704101562, |
| "learning_rate": 5.163110140309518e-06, |
| "logits/chosen": 0.16473805904388428, |
| "logits/rejected": 0.06993107497692108, |
| "logps/chosen": -1357.2281494140625, |
| "logps/rejected": -4072.783935546875, |
| "loss": 21.3237, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -320.8450622558594, |
| "rewards/margins": 837.5509643554688, |
| "rewards/rejected": -1158.3958740234375, |
| "step": 1655 |
| }, |
| { |
| "epoch": 1.4755555555555555, |
| "grad_norm": 533.0902709960938, |
| "learning_rate": 5.139815273471177e-06, |
| "logits/chosen": 0.18089079856872559, |
| "logits/rejected": 0.09585729986429214, |
| "logps/chosen": -841.4392700195312, |
| "logps/rejected": -4242.81884765625, |
| "loss": -109.7265, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -155.72561645507812, |
| "rewards/margins": 1048.8828125, |
| "rewards/rejected": -1204.6085205078125, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 52428.46484375, |
| "learning_rate": 5.116517369010467e-06, |
| "logits/chosen": 0.24841204285621643, |
| "logits/rejected": 0.09177226573228836, |
| "logps/chosen": -1188.5052490234375, |
| "logps/rejected": -3917.296875, |
| "loss": 13.6613, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -261.0191345214844, |
| "rewards/margins": 848.3692626953125, |
| "rewards/rejected": -1109.388427734375, |
| "step": 1665 |
| }, |
| { |
| "epoch": 1.4844444444444445, |
| "grad_norm": 7470.201171875, |
| "learning_rate": 5.0932169330969464e-06, |
| "logits/chosen": 0.18534071743488312, |
| "logits/rejected": 0.12083166837692261, |
| "logps/chosen": -1179.9267578125, |
| "logps/rejected": -5664.2998046875, |
| "loss": -87.2841, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -264.7419128417969, |
| "rewards/margins": 1368.9412841796875, |
| "rewards/rejected": -1633.6832275390625, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.488888888888889, |
| "grad_norm": 20524.2578125, |
| "learning_rate": 5.069914471955179e-06, |
| "logits/chosen": 0.20060646533966064, |
| "logits/rejected": 0.10489257425069809, |
| "logps/chosen": -981.4915161132812, |
| "logps/rejected": -3824.27685546875, |
| "loss": -37.233, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -197.96775817871094, |
| "rewards/margins": 878.4957275390625, |
| "rewards/rejected": -1076.463623046875, |
| "step": 1675 |
| }, |
| { |
| "epoch": 1.4933333333333334, |
| "grad_norm": 2312.172119140625, |
| "learning_rate": 5.046610491853724e-06, |
| "logits/chosen": 0.20951858162879944, |
| "logits/rejected": 0.07898414134979248, |
| "logps/chosen": -573.9847412109375, |
| "logps/rejected": -3592.535888671875, |
| "loss": -109.885, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -77.85977172851562, |
| "rewards/margins": 932.3436279296875, |
| "rewards/rejected": -1010.2033081054688, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.4977777777777779, |
| "grad_norm": 1623.2857666015625, |
| "learning_rate": 5.023305499094145e-06, |
| "logits/chosen": 0.22633978724479675, |
| "logits/rejected": 0.08304329216480255, |
| "logps/chosen": -1290.774169921875, |
| "logps/rejected": -4456.76416015625, |
| "loss": -107.5082, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -294.6800537109375, |
| "rewards/margins": 974.953125, |
| "rewards/rejected": -1269.6331787109375, |
| "step": 1685 |
| }, |
| { |
| "epoch": 1.5022222222222221, |
| "grad_norm": 1518.488525390625, |
| "learning_rate": 5e-06, |
| "logits/chosen": 0.16941127181053162, |
| "logits/rejected": 0.09130094945430756, |
| "logps/chosen": -761.7131958007812, |
| "logps/rejected": -4554.498046875, |
| "loss": -142.6045, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -138.38058471679688, |
| "rewards/margins": 1160.93408203125, |
| "rewards/rejected": -1299.314697265625, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.5066666666666668, |
| "grad_norm": 311.0038757324219, |
| "learning_rate": 4.976694500905858e-06, |
| "logits/chosen": 0.19227799773216248, |
| "logits/rejected": 0.09216197580099106, |
| "logps/chosen": -1680.769775390625, |
| "logps/rejected": -5286.17919921875, |
| "loss": -36.3603, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -417.07342529296875, |
| "rewards/margins": 1102.249755859375, |
| "rewards/rejected": -1519.323486328125, |
| "step": 1695 |
| }, |
| { |
| "epoch": 1.511111111111111, |
| "grad_norm": 341.06793212890625, |
| "learning_rate": 4.953389508146277e-06, |
| "logits/chosen": 0.22450792789459229, |
| "logits/rejected": 0.09371861070394516, |
| "logps/chosen": -1111.1561279296875, |
| "logps/rejected": -6192.85595703125, |
| "loss": -159.6742, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -240.66537475585938, |
| "rewards/margins": 1548.493896484375, |
| "rewards/rejected": -1789.159423828125, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.5155555555555555, |
| "grad_norm": 390.3556213378906, |
| "learning_rate": 4.930085528044823e-06, |
| "logits/chosen": 0.1802193522453308, |
| "logits/rejected": 0.07305122911930084, |
| "logps/chosen": -787.3092041015625, |
| "logps/rejected": -4876.71875, |
| "loss": -154.2313, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -143.25827026367188, |
| "rewards/margins": 1251.798583984375, |
| "rewards/rejected": -1395.056884765625, |
| "step": 1705 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 2948.445068359375, |
| "learning_rate": 4.906783066903055e-06, |
| "logits/chosen": 0.2890220582485199, |
| "logits/rejected": 0.0888415202498436, |
| "logps/chosen": -826.3502197265625, |
| "logps/rejected": -5362.0126953125, |
| "loss": -154.5983, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -151.1717071533203, |
| "rewards/margins": 1392.552001953125, |
| "rewards/rejected": -1543.7237548828125, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.5244444444444445, |
| "grad_norm": 2725.235107421875, |
| "learning_rate": 4.883482630989536e-06, |
| "logits/chosen": 0.24592271447181702, |
| "logits/rejected": 0.12680990993976593, |
| "logps/chosen": -1053.497802734375, |
| "logps/rejected": -4971.4873046875, |
| "loss": -44.4038, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -220.80410766601562, |
| "rewards/margins": 1203.4788818359375, |
| "rewards/rejected": -1424.282958984375, |
| "step": 1715 |
| }, |
| { |
| "epoch": 1.528888888888889, |
| "grad_norm": 2223.574951171875, |
| "learning_rate": 4.860184726528824e-06, |
| "logits/chosen": 0.22868971526622772, |
| "logits/rejected": 0.0975094586610794, |
| "logps/chosen": -1032.4659423828125, |
| "logps/rejected": -4410.2880859375, |
| "loss": -112.415, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -218.31973266601562, |
| "rewards/margins": 1034.145263671875, |
| "rewards/rejected": -1252.46484375, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.5333333333333332, |
| "grad_norm": 3770.7490234375, |
| "learning_rate": 4.8368898596904834e-06, |
| "logits/chosen": 0.1960953325033188, |
| "logits/rejected": 0.10902541875839233, |
| "logps/chosen": -1435.333251953125, |
| "logps/rejected": -5620.6708984375, |
| "loss": -76.7429, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -335.00872802734375, |
| "rewards/margins": 1281.798583984375, |
| "rewards/rejected": -1616.8072509765625, |
| "step": 1725 |
| }, |
| { |
| "epoch": 1.537777777777778, |
| "grad_norm": 184.910400390625, |
| "learning_rate": 4.81359853657808e-06, |
| "logits/chosen": 0.23400822281837463, |
| "logits/rejected": 0.0940646380186081, |
| "logps/chosen": -829.26171875, |
| "logps/rejected": -3815.18408203125, |
| "loss": -76.5698, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -158.69537353515625, |
| "rewards/margins": 921.3673706054688, |
| "rewards/rejected": -1080.062744140625, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.5422222222222222, |
| "grad_norm": 1886.403076171875, |
| "learning_rate": 4.790311263218191e-06, |
| "logits/chosen": 0.21838542819023132, |
| "logits/rejected": 0.10093291848897934, |
| "logps/chosen": -858.345703125, |
| "logps/rejected": -4288.09326171875, |
| "loss": -129.9072, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -168.2855682373047, |
| "rewards/margins": 1054.7025146484375, |
| "rewards/rejected": -1222.988037109375, |
| "step": 1735 |
| }, |
| { |
| "epoch": 1.5466666666666666, |
| "grad_norm": 520.177978515625, |
| "learning_rate": 4.767028545549407e-06, |
| "logits/chosen": 0.18090055882930756, |
| "logits/rejected": 0.09364360570907593, |
| "logps/chosen": -1115.906005859375, |
| "logps/rejected": -4257.9716796875, |
| "loss": -91.7768, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -241.649658203125, |
| "rewards/margins": 967.1146240234375, |
| "rewards/rejected": -1208.7642822265625, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.551111111111111, |
| "grad_norm": 2584.12890625, |
| "learning_rate": 4.743750889411342e-06, |
| "logits/chosen": 0.2136339694261551, |
| "logits/rejected": 0.08267398178577423, |
| "logps/chosen": -702.8377685546875, |
| "logps/rejected": -4060.947265625, |
| "loss": -115.7741, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -118.99308776855469, |
| "rewards/margins": 1030.06689453125, |
| "rewards/rejected": -1149.0599365234375, |
| "step": 1745 |
| }, |
| { |
| "epoch": 1.5555555555555556, |
| "grad_norm": 2166.552978515625, |
| "learning_rate": 4.720478800533647e-06, |
| "logits/chosen": 0.24663996696472168, |
| "logits/rejected": 0.1067671999335289, |
| "logps/chosen": -1155.109375, |
| "logps/rejected": -4144.953125, |
| "loss": -89.86, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -249.94668579101562, |
| "rewards/margins": 923.3084106445312, |
| "rewards/rejected": -1173.255126953125, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 3155.211669921875, |
| "learning_rate": 4.697212784525009e-06, |
| "logits/chosen": 0.2132018506526947, |
| "logits/rejected": 0.06178309768438339, |
| "logps/chosen": -685.2828369140625, |
| "logps/rejected": -3697.40771484375, |
| "loss": -107.7679, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -112.0811538696289, |
| "rewards/margins": 929.0880737304688, |
| "rewards/rejected": -1041.1693115234375, |
| "step": 1755 |
| }, |
| { |
| "epoch": 1.5644444444444443, |
| "grad_norm": 4135.490234375, |
| "learning_rate": 4.673953346862189e-06, |
| "logits/chosen": 0.173181414604187, |
| "logits/rejected": 0.11724446713924408, |
| "logps/chosen": -847.1492309570312, |
| "logps/rejected": -4463.97900390625, |
| "loss": -133.5609, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -164.16900634765625, |
| "rewards/margins": 1107.0006103515625, |
| "rewards/rejected": -1271.169677734375, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.568888888888889, |
| "grad_norm": 4123.27490234375, |
| "learning_rate": 4.65070099287902e-06, |
| "logits/chosen": 0.19095957279205322, |
| "logits/rejected": 0.09138090908527374, |
| "logps/chosen": -1100.893798828125, |
| "logps/rejected": -5407.83447265625, |
| "loss": -141.662, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -233.89419555664062, |
| "rewards/margins": 1317.0438232421875, |
| "rewards/rejected": -1550.93798828125, |
| "step": 1765 |
| }, |
| { |
| "epoch": 1.5733333333333333, |
| "grad_norm": 1560.21240234375, |
| "learning_rate": 4.627456227755435e-06, |
| "logits/chosen": 0.1787518560886383, |
| "logits/rejected": 0.035512715578079224, |
| "logps/chosen": -1368.987060546875, |
| "logps/rejected": -4401.994140625, |
| "loss": -1.5697, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -312.0104064941406, |
| "rewards/margins": 941.6595458984375, |
| "rewards/rejected": -1253.669921875, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.5777777777777777, |
| "grad_norm": 32934.18359375, |
| "learning_rate": 4.604219556506492e-06, |
| "logits/chosen": 0.2877276539802551, |
| "logits/rejected": 0.10175907611846924, |
| "logps/chosen": -1241.767333984375, |
| "logps/rejected": -5513.44580078125, |
| "loss": -76.6882, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -280.1549377441406, |
| "rewards/margins": 1308.2108154296875, |
| "rewards/rejected": -1588.3658447265625, |
| "step": 1775 |
| }, |
| { |
| "epoch": 1.5822222222222222, |
| "grad_norm": 2151.84814453125, |
| "learning_rate": 4.580991483971403e-06, |
| "logits/chosen": 0.1933944970369339, |
| "logits/rejected": 0.06155434995889664, |
| "logps/chosen": -987.50244140625, |
| "logps/rejected": -4560.982421875, |
| "loss": -105.6609, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -201.29513549804688, |
| "rewards/margins": 1100.52783203125, |
| "rewards/rejected": -1301.822998046875, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.5866666666666667, |
| "grad_norm": 1171.986083984375, |
| "learning_rate": 4.557772514802564e-06, |
| "logits/chosen": 0.2200632095336914, |
| "logits/rejected": 0.12937499582767487, |
| "logps/chosen": -743.1904907226562, |
| "logps/rejected": -5043.15576171875, |
| "loss": -153.4002, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -131.7304229736328, |
| "rewards/margins": 1311.0921630859375, |
| "rewards/rejected": -1442.822509765625, |
| "step": 1785 |
| }, |
| { |
| "epoch": 1.5911111111111111, |
| "grad_norm": 1308.8167724609375, |
| "learning_rate": 4.5345631534545935e-06, |
| "logits/chosen": 0.29665079712867737, |
| "logits/rejected": 0.10534970462322235, |
| "logps/chosen": -745.3150634765625, |
| "logps/rejected": -5404.919921875, |
| "loss": -168.1946, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -128.2810516357422, |
| "rewards/margins": 1427.3328857421875, |
| "rewards/rejected": -1555.6141357421875, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.5955555555555554, |
| "grad_norm": 1778.48779296875, |
| "learning_rate": 4.511363904173366e-06, |
| "logits/chosen": 0.23913387954235077, |
| "logits/rejected": 0.07443296164274216, |
| "logps/chosen": -1030.5181884765625, |
| "logps/rejected": -5341.0888671875, |
| "loss": -146.5949, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -212.07064819335938, |
| "rewards/margins": 1324.4898681640625, |
| "rewards/rejected": -1536.560546875, |
| "step": 1795 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 8834.0068359375, |
| "learning_rate": 4.488175270985065e-06, |
| "logits/chosen": 0.1603962481021881, |
| "logits/rejected": 0.030650785192847252, |
| "logps/chosen": -1292.248291015625, |
| "logps/rejected": -4622.3115234375, |
| "loss": -96.2908, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -296.3851318359375, |
| "rewards/margins": 1023.2731323242188, |
| "rewards/rejected": -1319.6583251953125, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.6044444444444443, |
| "grad_norm": 3213.7578125, |
| "learning_rate": 4.46499775768523e-06, |
| "logits/chosen": 0.2049437314271927, |
| "logits/rejected": 0.09727514535188675, |
| "logps/chosen": -1178.759521484375, |
| "logps/rejected": -5381.6337890625, |
| "loss": -151.4572, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -260.3944396972656, |
| "rewards/margins": 1285.650634765625, |
| "rewards/rejected": -1546.045166015625, |
| "step": 1805 |
| }, |
| { |
| "epoch": 1.608888888888889, |
| "grad_norm": 1180.321044921875, |
| "learning_rate": 4.441831867827806e-06, |
| "logits/chosen": 0.19563202559947968, |
| "logits/rejected": 0.028948839753866196, |
| "logps/chosen": -1116.917236328125, |
| "logps/rejected": -4179.2021484375, |
| "loss": -89.8656, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -243.24179077148438, |
| "rewards/margins": 945.7950439453125, |
| "rewards/rejected": -1189.036865234375, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.6133333333333333, |
| "grad_norm": 32007.529296875, |
| "learning_rate": 4.418678104714214e-06, |
| "logits/chosen": 0.18151655793190002, |
| "logits/rejected": 0.10459226369857788, |
| "logps/chosen": -1238.7088623046875, |
| "logps/rejected": -5372.6796875, |
| "loss": -36.7473, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -275.9144592285156, |
| "rewards/margins": 1263.199951171875, |
| "rewards/rejected": -1539.114501953125, |
| "step": 1815 |
| }, |
| { |
| "epoch": 1.6177777777777778, |
| "grad_norm": 4922.98291015625, |
| "learning_rate": 4.395536971382403e-06, |
| "logits/chosen": 0.25195056200027466, |
| "logits/rejected": 0.06397799402475357, |
| "logps/chosen": -614.5676879882812, |
| "logps/rejected": -4044.288330078125, |
| "loss": -129.0167, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -88.91123962402344, |
| "rewards/margins": 1059.1080322265625, |
| "rewards/rejected": -1148.0191650390625, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.6222222222222222, |
| "grad_norm": 18587.265625, |
| "learning_rate": 4.372408970595931e-06, |
| "logits/chosen": 0.2288244515657425, |
| "logits/rejected": 0.12830862402915955, |
| "logps/chosen": -1080.5850830078125, |
| "logps/rejected": -4167.451171875, |
| "loss": -24.3531, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -233.8815460205078, |
| "rewards/margins": 948.3587036132812, |
| "rewards/rejected": -1182.2403564453125, |
| "step": 1825 |
| }, |
| { |
| "epoch": 1.6266666666666667, |
| "grad_norm": 1632.0062255859375, |
| "learning_rate": 4.349294604833037e-06, |
| "logits/chosen": 0.19971409440040588, |
| "logits/rejected": 0.09651723504066467, |
| "logps/chosen": -903.0916748046875, |
| "logps/rejected": -5288.95068359375, |
| "loss": -165.1823, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -180.99258422851562, |
| "rewards/margins": 1341.563232421875, |
| "rewards/rejected": -1522.5560302734375, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.6311111111111112, |
| "grad_norm": 3647.180908203125, |
| "learning_rate": 4.326194376275729e-06, |
| "logits/chosen": 0.20333686470985413, |
| "logits/rejected": 0.04894101247191429, |
| "logps/chosen": -1200.640380859375, |
| "logps/rejected": -4328.55810546875, |
| "loss": -15.3313, |
| "rewards/accuracies": 0.7124999761581421, |
| "rewards/chosen": -267.91546630859375, |
| "rewards/margins": 959.5001220703125, |
| "rewards/rejected": -1227.4156494140625, |
| "step": 1835 |
| }, |
| { |
| "epoch": 1.6355555555555554, |
| "grad_norm": 369.36090087890625, |
| "learning_rate": 4.303108786798869e-06, |
| "logits/chosen": 0.23952741920948029, |
| "logits/rejected": 0.11030056327581406, |
| "logps/chosen": -868.2476806640625, |
| "logps/rejected": -6594.4208984375, |
| "loss": -203.4027, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -168.63491821289062, |
| "rewards/margins": 1745.8909912109375, |
| "rewards/rejected": -1914.5257568359375, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 840.448974609375, |
| "learning_rate": 4.280038337959268e-06, |
| "logits/chosen": 0.22680750489234924, |
| "logits/rejected": 0.04391016438603401, |
| "logps/chosen": -1048.15625, |
| "logps/rejected": -3534.797607421875, |
| "loss": -36.2203, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -226.58468627929688, |
| "rewards/margins": 769.23974609375, |
| "rewards/rejected": -995.8245239257812, |
| "step": 1845 |
| }, |
| { |
| "epoch": 1.6444444444444444, |
| "grad_norm": 2940.37646484375, |
| "learning_rate": 4.256983530984797e-06, |
| "logits/chosen": 0.2374953329563141, |
| "logits/rejected": 0.07543542981147766, |
| "logps/chosen": -1364.133056640625, |
| "logps/rejected": -4755.900390625, |
| "loss": -75.3059, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -317.7399597167969, |
| "rewards/margins": 1042.538818359375, |
| "rewards/rejected": -1360.2789306640625, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.6488888888888888, |
| "grad_norm": 4782.626953125, |
| "learning_rate": 4.2339448667634885e-06, |
| "logits/chosen": 0.21303322911262512, |
| "logits/rejected": 0.08738868683576584, |
| "logps/chosen": -2001.385498046875, |
| "logps/rejected": -4847.5869140625, |
| "loss": -8.6322, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -505.5797424316406, |
| "rewards/margins": 881.3775634765625, |
| "rewards/rejected": -1386.957275390625, |
| "step": 1855 |
| }, |
| { |
| "epoch": 1.6533333333333333, |
| "grad_norm": 30556.8828125, |
| "learning_rate": 4.21092284583266e-06, |
| "logits/chosen": 0.24975800514221191, |
| "logits/rejected": 0.08625680953264236, |
| "logps/chosen": -1471.2796630859375, |
| "logps/rejected": -5477.4912109375, |
| "loss": -56.1464, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -351.5309753417969, |
| "rewards/margins": 1227.842529296875, |
| "rewards/rejected": -1579.3734130859375, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.6577777777777778, |
| "grad_norm": 9967.26953125, |
| "learning_rate": 4.187917968368036e-06, |
| "logits/chosen": 0.24700018763542175, |
| "logits/rejected": 0.14024746417999268, |
| "logps/chosen": -1792.168701171875, |
| "logps/rejected": -6281.21484375, |
| "loss": -94.6902, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -446.555419921875, |
| "rewards/margins": 1371.84716796875, |
| "rewards/rejected": -1818.4027099609375, |
| "step": 1865 |
| }, |
| { |
| "epoch": 1.6622222222222223, |
| "grad_norm": 5255.771484375, |
| "learning_rate": 4.164930734172884e-06, |
| "logits/chosen": 0.2706100344657898, |
| "logits/rejected": 0.10564364492893219, |
| "logps/chosen": -855.5506591796875, |
| "logps/rejected": -5278.5625, |
| "loss": -168.4971, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -161.1185760498047, |
| "rewards/margins": 1357.52880859375, |
| "rewards/rejected": -1518.647216796875, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.6666666666666665, |
| "grad_norm": 1137.33056640625, |
| "learning_rate": 4.141961642667152e-06, |
| "logits/chosen": 0.22544869780540466, |
| "logits/rejected": 0.1046823039650917, |
| "logps/chosen": -853.8209228515625, |
| "logps/rejected": -4098.5380859375, |
| "loss": -98.7491, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -156.70986938476562, |
| "rewards/margins": 1000.6829223632812, |
| "rewards/rejected": -1157.392822265625, |
| "step": 1875 |
| }, |
| { |
| "epoch": 1.6711111111111112, |
| "grad_norm": 3251.376708984375, |
| "learning_rate": 4.119011192876624e-06, |
| "logits/chosen": 0.2323744297027588, |
| "logits/rejected": 0.11708340793848038, |
| "logps/chosen": -1144.6881103515625, |
| "logps/rejected": -5402.4853515625, |
| "loss": -121.1345, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -251.96133422851562, |
| "rewards/margins": 1302.1949462890625, |
| "rewards/rejected": -1554.1563720703125, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.6755555555555555, |
| "grad_norm": 3250.385986328125, |
| "learning_rate": 4.09607988342207e-06, |
| "logits/chosen": 0.2249506413936615, |
| "logits/rejected": 0.08312972635030746, |
| "logps/chosen": -1434.54541015625, |
| "logps/rejected": -6453.66259765625, |
| "loss": -130.9487, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -339.8567810058594, |
| "rewards/margins": 1531.913818359375, |
| "rewards/rejected": -1871.770751953125, |
| "step": 1885 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 2996.223388671875, |
| "learning_rate": 4.0731682125084244e-06, |
| "logits/chosen": 0.2461550533771515, |
| "logits/rejected": 0.11112775653600693, |
| "logps/chosen": -1187.622802734375, |
| "logps/rejected": -5081.6640625, |
| "loss": -67.5338, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -262.11505126953125, |
| "rewards/margins": 1196.88916015625, |
| "rewards/rejected": -1459.004150390625, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.6844444444444444, |
| "grad_norm": 788.92431640625, |
| "learning_rate": 4.0502766779139485e-06, |
| "logits/chosen": 0.2763102948665619, |
| "logits/rejected": 0.1312466859817505, |
| "logps/chosen": -644.9693603515625, |
| "logps/rejected": -4451.1240234375, |
| "loss": -144.7744, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -97.20635986328125, |
| "rewards/margins": 1170.815185546875, |
| "rewards/rejected": -1268.0216064453125, |
| "step": 1895 |
| }, |
| { |
| "epoch": 1.6888888888888889, |
| "grad_norm": 24277.05859375, |
| "learning_rate": 4.027405776979426e-06, |
| "logits/chosen": 0.2461940348148346, |
| "logits/rejected": 0.10517482459545135, |
| "logps/chosen": -1614.966064453125, |
| "logps/rejected": -5885.6884765625, |
| "loss": -23.2786, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -385.0234375, |
| "rewards/margins": 1312.0137939453125, |
| "rewards/rejected": -1697.037109375, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.6933333333333334, |
| "grad_norm": 7921.02783203125, |
| "learning_rate": 4.0045560065973535e-06, |
| "logits/chosen": 0.24439683556556702, |
| "logits/rejected": 0.137715682387352, |
| "logps/chosen": -907.2028198242188, |
| "logps/rejected": -6215.12890625, |
| "loss": -171.6943, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -180.00177001953125, |
| "rewards/margins": 1617.5052490234375, |
| "rewards/rejected": -1797.507080078125, |
| "step": 1905 |
| }, |
| { |
| "epoch": 1.6977777777777778, |
| "grad_norm": 500.7940979003906, |
| "learning_rate": 3.981727863201146e-06, |
| "logits/chosen": 0.23465153574943542, |
| "logits/rejected": 0.04516502842307091, |
| "logps/chosen": -952.7020263671875, |
| "logps/rejected": -4535.16357421875, |
| "loss": -93.9728, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -186.6133270263672, |
| "rewards/margins": 1108.33837890625, |
| "rewards/rejected": -1294.95166015625, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.7022222222222223, |
| "grad_norm": 9174.0546875, |
| "learning_rate": 3.958921842754351e-06, |
| "logits/chosen": 0.27381208539009094, |
| "logits/rejected": 0.11198027431964874, |
| "logps/chosen": -1263.0186767578125, |
| "logps/rejected": -5507.2158203125, |
| "loss": -149.7667, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -291.18353271484375, |
| "rewards/margins": 1298.3118896484375, |
| "rewards/rejected": -1589.495361328125, |
| "step": 1915 |
| }, |
| { |
| "epoch": 1.7066666666666666, |
| "grad_norm": 1700.24072265625, |
| "learning_rate": 3.936138440739875e-06, |
| "logits/chosen": 0.27449166774749756, |
| "logits/rejected": 0.12847240269184113, |
| "logps/chosen": -1872.1488037109375, |
| "logps/rejected": -5714.8662109375, |
| "loss": -67.8528, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -469.8207092285156, |
| "rewards/margins": 1178.263916015625, |
| "rewards/rejected": -1648.0845947265625, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.7111111111111112, |
| "grad_norm": 700.5916137695312, |
| "learning_rate": 3.913378152149214e-06, |
| "logits/chosen": 0.24738919734954834, |
| "logits/rejected": 0.1326262354850769, |
| "logps/chosen": -1318.026611328125, |
| "logps/rejected": -6299.62890625, |
| "loss": -160.6032, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -301.7664489746094, |
| "rewards/margins": 1518.3792724609375, |
| "rewards/rejected": -1820.145751953125, |
| "step": 1925 |
| }, |
| { |
| "epoch": 1.7155555555555555, |
| "grad_norm": 2220.398193359375, |
| "learning_rate": 3.890641471471706e-06, |
| "logits/chosen": 0.3000715374946594, |
| "logits/rejected": 0.1240081787109375, |
| "logps/chosen": -685.4688110351562, |
| "logps/rejected": -4482.275390625, |
| "loss": -130.5446, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -113.60041809082031, |
| "rewards/margins": 1168.1650390625, |
| "rewards/rejected": -1281.765380859375, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 2720.75634765625, |
| "learning_rate": 3.86792889268378e-06, |
| "logits/chosen": 0.2833808958530426, |
| "logits/rejected": 0.1499921977519989, |
| "logps/chosen": -1256.76953125, |
| "logps/rejected": -5304.7353515625, |
| "loss": -38.7009, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -288.4460144042969, |
| "rewards/margins": 1236.643310546875, |
| "rewards/rejected": -1525.089599609375, |
| "step": 1935 |
| }, |
| { |
| "epoch": 1.7244444444444444, |
| "grad_norm": 630.7939453125, |
| "learning_rate": 3.845240909238234e-06, |
| "logits/chosen": 0.25388604402542114, |
| "logits/rejected": 0.11848431825637817, |
| "logps/chosen": -774.7329711914062, |
| "logps/rejected": -5794.5478515625, |
| "loss": -183.0251, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -141.75735473632812, |
| "rewards/margins": 1530.6614990234375, |
| "rewards/rejected": -1672.418701171875, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.728888888888889, |
| "grad_norm": 2211.608154296875, |
| "learning_rate": 3.8225780140535025e-06, |
| "logits/chosen": 0.2086627185344696, |
| "logits/rejected": 0.11551016569137573, |
| "logps/chosen": -911.6271362304688, |
| "logps/rejected": -5228.11181640625, |
| "loss": -157.3048, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -186.49317932128906, |
| "rewards/margins": 1314.8564453125, |
| "rewards/rejected": -1501.349609375, |
| "step": 1945 |
| }, |
| { |
| "epoch": 1.7333333333333334, |
| "grad_norm": 2765.096435546875, |
| "learning_rate": 3.7999406995029565e-06, |
| "logits/chosen": 0.20155318081378937, |
| "logits/rejected": 0.07800821959972382, |
| "logps/chosen": -1026.3896484375, |
| "logps/rejected": -5046.630859375, |
| "loss": -112.5577, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -213.7878875732422, |
| "rewards/margins": 1228.40625, |
| "rewards/rejected": -1442.1942138671875, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.7377777777777776, |
| "grad_norm": 2560.62939453125, |
| "learning_rate": 3.777329457404202e-06, |
| "logits/chosen": 0.25891679525375366, |
| "logits/rejected": 0.13444481790065765, |
| "logps/chosen": -1175.992919921875, |
| "logps/rejected": -7102.63818359375, |
| "loss": -199.6698, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -255.4857177734375, |
| "rewards/margins": 1806.615966796875, |
| "rewards/rejected": -2062.101806640625, |
| "step": 1955 |
| }, |
| { |
| "epoch": 1.7422222222222223, |
| "grad_norm": 707.2987670898438, |
| "learning_rate": 3.754744779008395e-06, |
| "logits/chosen": 0.22196459770202637, |
| "logits/rejected": 0.08021806925535202, |
| "logps/chosen": -1127.932861328125, |
| "logps/rejected": -4943.876953125, |
| "loss": -100.5632, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -240.08834838867188, |
| "rewards/margins": 1175.7769775390625, |
| "rewards/rejected": -1415.865234375, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.7466666666666666, |
| "grad_norm": 946.744384765625, |
| "learning_rate": 3.7321871549895715e-06, |
| "logits/chosen": 0.27232426404953003, |
| "logits/rejected": 0.10154370963573456, |
| "logps/chosen": -945.1385498046875, |
| "logps/rejected": -4433.72900390625, |
| "loss": -113.8801, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -188.45480346679688, |
| "rewards/margins": 1073.018798828125, |
| "rewards/rejected": -1261.4736328125, |
| "step": 1965 |
| }, |
| { |
| "epoch": 1.751111111111111, |
| "grad_norm": 3697.505126953125, |
| "learning_rate": 3.709657075433982e-06, |
| "logits/chosen": 0.2271924763917923, |
| "logits/rejected": 0.04478111118078232, |
| "logps/chosen": -1110.8345947265625, |
| "logps/rejected": -4532.7265625, |
| "loss": -15.3714, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -241.8577423095703, |
| "rewards/margins": 1051.829833984375, |
| "rewards/rejected": -1293.687744140625, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.7555555555555555, |
| "grad_norm": 27715.974609375, |
| "learning_rate": 3.68715502982945e-06, |
| "logits/chosen": 0.24351032078266144, |
| "logits/rejected": 0.14694446325302124, |
| "logps/chosen": -1400.335205078125, |
| "logps/rejected": -7460.48583984375, |
| "loss": -162.933, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -324.5793151855469, |
| "rewards/margins": 1843.2681884765625, |
| "rewards/rejected": -2167.847412109375, |
| "step": 1975 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 286.2002868652344, |
| "learning_rate": 3.6646815070547316e-06, |
| "logits/chosen": 0.20532718300819397, |
| "logits/rejected": 0.06882862746715546, |
| "logps/chosen": -878.3626098632812, |
| "logps/rejected": -6145.6318359375, |
| "loss": -198.1174, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -170.4589080810547, |
| "rewards/margins": 1607.0462646484375, |
| "rewards/rejected": -1777.5052490234375, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.7644444444444445, |
| "grad_norm": 9633.6513671875, |
| "learning_rate": 3.6422369953688973e-06, |
| "logits/chosen": 0.15580406785011292, |
| "logits/rejected": 0.09763354808092117, |
| "logps/chosen": -1395.62158203125, |
| "logps/rejected": -5257.75634765625, |
| "loss": -126.3119, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -325.6045227050781, |
| "rewards/margins": 1179.4747314453125, |
| "rewards/rejected": -1505.0791015625, |
| "step": 1985 |
| }, |
| { |
| "epoch": 1.7688888888888887, |
| "grad_norm": 2226.966552734375, |
| "learning_rate": 3.619821982400725e-06, |
| "logits/chosen": 0.20389437675476074, |
| "logits/rejected": 0.0854041799902916, |
| "logps/chosen": -791.7081298828125, |
| "logps/rejected": -4680.9716796875, |
| "loss": -131.5356, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -140.87710571289062, |
| "rewards/margins": 1196.602783203125, |
| "rewards/rejected": -1337.47998046875, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.7733333333333334, |
| "grad_norm": 4158.79150390625, |
| "learning_rate": 3.5974369551381023e-06, |
| "logits/chosen": 0.24090011417865753, |
| "logits/rejected": 0.108218252658844, |
| "logps/chosen": -835.3377075195312, |
| "logps/rejected": -4290.2568359375, |
| "loss": -129.5949, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -161.09817504882812, |
| "rewards/margins": 1058.4774169921875, |
| "rewards/rejected": -1219.5758056640625, |
| "step": 1995 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "grad_norm": 55221.4765625, |
| "learning_rate": 3.575082399917451e-06, |
| "logits/chosen": 0.2529537081718445, |
| "logits/rejected": 0.13592717051506042, |
| "logps/chosen": -1170.9078369140625, |
| "logps/rejected": -4635.1025390625, |
| "loss": -78.99, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -260.46441650390625, |
| "rewards/margins": 1063.17529296875, |
| "rewards/rejected": -1323.6397705078125, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7777777777777777, |
| "eval_logits/chosen": 0.25555315613746643, |
| "eval_logits/rejected": 0.1154949739575386, |
| "eval_logps/chosen": -962.0708618164062, |
| "eval_logps/rejected": -5228.919921875, |
| "eval_loss": -118.59148406982422, |
| "eval_rewards/accuracies": 0.7950000166893005, |
| "eval_rewards/chosen": -195.61526489257812, |
| "eval_rewards/margins": 1306.0662841796875, |
| "eval_rewards/rejected": -1501.6815185546875, |
| "eval_runtime": 2189.3694, |
| "eval_samples_per_second": 1.827, |
| "eval_steps_per_second": 0.914, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.7822222222222224, |
| "grad_norm": 6284.64111328125, |
| "learning_rate": 3.5527588024131542e-06, |
| "logits/chosen": 0.24829688668251038, |
| "logits/rejected": 0.07171948999166489, |
| "logps/chosen": -1000.9933471679688, |
| "logps/rejected": -3266.4541015625, |
| "loss": -57.9582, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -211.169189453125, |
| "rewards/margins": 703.8554077148438, |
| "rewards/rejected": -915.0247192382812, |
| "step": 2005 |
| }, |
| { |
| "epoch": 1.7866666666666666, |
| "grad_norm": 1038.489501953125, |
| "learning_rate": 3.5304666476270133e-06, |
| "logits/chosen": 0.2809663414955139, |
| "logits/rejected": 0.14121077954769135, |
| "logps/chosen": -1090.3250732421875, |
| "logps/rejected": -3823.89697265625, |
| "loss": -66.4715, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -242.9805145263672, |
| "rewards/margins": 840.3790283203125, |
| "rewards/rejected": -1083.3594970703125, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.791111111111111, |
| "grad_norm": 11290.4970703125, |
| "learning_rate": 3.5082064198777e-06, |
| "logits/chosen": 0.27660128474235535, |
| "logits/rejected": 0.13636983931064606, |
| "logps/chosen": -1400.4759521484375, |
| "logps/rejected": -5616.1904296875, |
| "loss": -40.4341, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -328.164794921875, |
| "rewards/margins": 1289.2822265625, |
| "rewards/rejected": -1617.4468994140625, |
| "step": 2015 |
| }, |
| { |
| "epoch": 1.7955555555555556, |
| "grad_norm": 1743.8653564453125, |
| "learning_rate": 3.4859786027902485e-06, |
| "logits/chosen": 0.25726962089538574, |
| "logits/rejected": 0.12484592199325562, |
| "logps/chosen": -954.4866333007812, |
| "logps/rejected": -6539.61767578125, |
| "loss": -204.0919, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -197.6421661376953, |
| "rewards/margins": 1699.316650390625, |
| "rewards/rejected": -1896.9586181640625, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 4847.9658203125, |
| "learning_rate": 3.463783679285535e-06, |
| "logits/chosen": 0.2333337813615799, |
| "logits/rejected": 0.16198399662971497, |
| "logps/chosen": -1057.621826171875, |
| "logps/rejected": -6812.3671875, |
| "loss": -170.2748, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -225.828125, |
| "rewards/margins": 1750.376708984375, |
| "rewards/rejected": -1976.2047119140625, |
| "step": 2025 |
| }, |
| { |
| "epoch": 1.8044444444444445, |
| "grad_norm": 33725.33203125, |
| "learning_rate": 3.441622131569789e-06, |
| "logits/chosen": 0.2570604085922241, |
| "logits/rejected": 0.14060847461223602, |
| "logps/chosen": -1557.6676025390625, |
| "logps/rejected": -6319.19482421875, |
| "loss": -105.8451, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -375.49542236328125, |
| "rewards/margins": 1451.328369140625, |
| "rewards/rejected": -1826.8238525390625, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.8088888888888888, |
| "grad_norm": 15339.69921875, |
| "learning_rate": 3.4194944411241213e-06, |
| "logits/chosen": 0.2011386901140213, |
| "logits/rejected": 0.09683053195476532, |
| "logps/chosen": -1450.0079345703125, |
| "logps/rejected": -5528.1240234375, |
| "loss": -50.8625, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -343.0514221191406, |
| "rewards/margins": 1246.124267578125, |
| "rewards/rejected": -1589.175537109375, |
| "step": 2035 |
| }, |
| { |
| "epoch": 1.8133333333333335, |
| "grad_norm": 2727.2666015625, |
| "learning_rate": 3.3974010886940618e-06, |
| "logits/chosen": 0.26924824714660645, |
| "logits/rejected": 0.14655689895153046, |
| "logps/chosen": -1023.5804443359375, |
| "logps/rejected": -5686.3837890625, |
| "loss": -132.3764, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -209.53689575195312, |
| "rewards/margins": 1428.213623046875, |
| "rewards/rejected": -1637.7503662109375, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.8177777777777777, |
| "grad_norm": 1484.5675048828125, |
| "learning_rate": 3.3753425542791106e-06, |
| "logits/chosen": 0.24760404229164124, |
| "logits/rejected": 0.05067341402173042, |
| "logps/chosen": -1063.3309326171875, |
| "logps/rejected": -5101.98291015625, |
| "loss": -111.0792, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -221.51705932617188, |
| "rewards/margins": 1244.4766845703125, |
| "rewards/rejected": -1465.9937744140625, |
| "step": 2045 |
| }, |
| { |
| "epoch": 1.8222222222222222, |
| "grad_norm": 125.7868423461914, |
| "learning_rate": 3.3533193171223143e-06, |
| "logits/chosen": 0.2911909222602844, |
| "logits/rejected": 0.14763948321342468, |
| "logps/chosen": -875.8429565429688, |
| "logps/rejected": -5039.96630859375, |
| "loss": -100.6206, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -171.8740234375, |
| "rewards/margins": 1272.8016357421875, |
| "rewards/rejected": -1444.67578125, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.8266666666666667, |
| "grad_norm": 283.5314636230469, |
| "learning_rate": 3.3313318556998523e-06, |
| "logits/chosen": 0.30172306299209595, |
| "logits/rejected": 0.14604321122169495, |
| "logps/chosen": -1543.921630859375, |
| "logps/rejected": -5745.048828125, |
| "loss": -60.9914, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -370.66815185546875, |
| "rewards/margins": 1284.1890869140625, |
| "rewards/rejected": -1654.857177734375, |
| "step": 2055 |
| }, |
| { |
| "epoch": 1.8311111111111111, |
| "grad_norm": 6189.3994140625, |
| "learning_rate": 3.3093806477106422e-06, |
| "logits/chosen": 0.24961909651756287, |
| "logits/rejected": 0.07614009082317352, |
| "logps/chosen": -959.4791870117188, |
| "logps/rejected": -3959.48046875, |
| "loss": -106.988, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -186.31588745117188, |
| "rewards/margins": 934.6566162109375, |
| "rewards/rejected": -1120.9725341796875, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.8355555555555556, |
| "grad_norm": 3273.436279296875, |
| "learning_rate": 3.2874661700659586e-06, |
| "logits/chosen": 0.24432864785194397, |
| "logits/rejected": 0.15967050194740295, |
| "logps/chosen": -963.5921630859375, |
| "logps/rejected": -3926.388671875, |
| "loss": -55.2178, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -201.84323120117188, |
| "rewards/margins": 909.5814208984375, |
| "rewards/rejected": -1111.424560546875, |
| "step": 2065 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 6412.69921875, |
| "learning_rate": 3.2655888988790773e-06, |
| "logits/chosen": 0.2859584391117096, |
| "logits/rejected": 0.14298884570598602, |
| "logps/chosen": -787.3509521484375, |
| "logps/rejected": -3874.66796875, |
| "loss": -117.0411, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -147.5047607421875, |
| "rewards/margins": 949.2532958984375, |
| "rewards/rejected": -1096.758056640625, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.8444444444444446, |
| "grad_norm": 2904.4208984375, |
| "learning_rate": 3.2437493094549223e-06, |
| "logits/chosen": 0.2775154709815979, |
| "logits/rejected": 0.13756434619426727, |
| "logps/chosen": -1029.6170654296875, |
| "logps/rejected": -6512.67724609375, |
| "loss": -180.2663, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -212.7581024169922, |
| "rewards/margins": 1673.581298828125, |
| "rewards/rejected": -1886.3394775390625, |
| "step": 2075 |
| }, |
| { |
| "epoch": 1.8488888888888888, |
| "grad_norm": 13741.6259765625, |
| "learning_rate": 3.221947876279749e-06, |
| "logits/chosen": 0.2896290719509125, |
| "logits/rejected": 0.14775845408439636, |
| "logps/chosen": -954.1802978515625, |
| "logps/rejected": -5414.69091796875, |
| "loss": -64.0171, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -196.6832733154297, |
| "rewards/margins": 1362.746826171875, |
| "rewards/rejected": -1559.4300537109375, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.8533333333333335, |
| "grad_norm": 65065.3203125, |
| "learning_rate": 3.200185073010831e-06, |
| "logits/chosen": 0.26258403062820435, |
| "logits/rejected": 0.14745929837226868, |
| "logps/chosen": -1522.0899658203125, |
| "logps/rejected": -5227.2353515625, |
| "loss": -15.5414, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -365.59576416015625, |
| "rewards/margins": 1135.225341796875, |
| "rewards/rejected": -1500.8211669921875, |
| "step": 2085 |
| }, |
| { |
| "epoch": 1.8577777777777778, |
| "grad_norm": 1687.3052978515625, |
| "learning_rate": 3.1784613724661673e-06, |
| "logits/chosen": 0.23484960198402405, |
| "logits/rejected": 0.14270682632923126, |
| "logps/chosen": -1005.099609375, |
| "logps/rejected": -5562.2373046875, |
| "loss": -159.2417, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -207.5850067138672, |
| "rewards/margins": 1393.408935546875, |
| "rewards/rejected": -1600.9937744140625, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.8622222222222222, |
| "grad_norm": 3218.713134765625, |
| "learning_rate": 3.1567772466142156e-06, |
| "logits/chosen": 0.3124232888221741, |
| "logits/rejected": 0.15169766545295715, |
| "logps/chosen": -892.7361450195312, |
| "logps/rejected": -3794.728515625, |
| "loss": -43.8165, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -180.31503295898438, |
| "rewards/margins": 895.3651123046875, |
| "rewards/rejected": -1075.68017578125, |
| "step": 2095 |
| }, |
| { |
| "epoch": 1.8666666666666667, |
| "grad_norm": 3074.12255859375, |
| "learning_rate": 3.135133166563633e-06, |
| "logits/chosen": 0.3098248541355133, |
| "logits/rejected": 0.18088603019714355, |
| "logps/chosen": -946.4464111328125, |
| "logps/rejected": -6187.61328125, |
| "loss": -159.383, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -195.90956115722656, |
| "rewards/margins": 1596.5634765625, |
| "rewards/rejected": -1792.472900390625, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.871111111111111, |
| "grad_norm": 576.5510864257812, |
| "learning_rate": 3.1135296025530426e-06, |
| "logits/chosen": 0.24984732270240784, |
| "logits/rejected": 0.13234370946884155, |
| "logps/chosen": -798.1278686523438, |
| "logps/rejected": -4656.5185546875, |
| "loss": -116.6982, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -148.08984375, |
| "rewards/margins": 1182.6334228515625, |
| "rewards/rejected": -1330.7232666015625, |
| "step": 2105 |
| }, |
| { |
| "epoch": 1.8755555555555556, |
| "grad_norm": 2194.000244140625, |
| "learning_rate": 3.091967023940818e-06, |
| "logits/chosen": 0.2714166045188904, |
| "logits/rejected": 0.15113946795463562, |
| "logps/chosen": -803.9478149414062, |
| "logps/rejected": -4329.81591796875, |
| "loss": -125.6359, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -147.02865600585938, |
| "rewards/margins": 1084.868896484375, |
| "rewards/rejected": -1231.8975830078125, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 2433.168701171875, |
| "learning_rate": 3.070445899194885e-06, |
| "logits/chosen": 0.24431154131889343, |
| "logits/rejected": 0.15248742699623108, |
| "logps/chosen": -626.6971435546875, |
| "logps/rejected": -5019.6328125, |
| "loss": -159.2351, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -91.44898986816406, |
| "rewards/margins": 1342.43310546875, |
| "rewards/rejected": -1433.882080078125, |
| "step": 2115 |
| }, |
| { |
| "epoch": 1.8844444444444446, |
| "grad_norm": 496.083251953125, |
| "learning_rate": 3.0489666958825435e-06, |
| "logits/chosen": 0.25379398465156555, |
| "logits/rejected": 0.12348195165395737, |
| "logps/chosen": -831.2281494140625, |
| "logps/rejected": -4773.64794921875, |
| "loss": -150.0979, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -154.3735809326172, |
| "rewards/margins": 1207.74072265625, |
| "rewards/rejected": -1362.1143798828125, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.8888888888888888, |
| "grad_norm": 1209.834228515625, |
| "learning_rate": 3.0275298806603102e-06, |
| "logits/chosen": 0.21688711643218994, |
| "logits/rejected": 0.09313462674617767, |
| "logps/chosen": -793.4132690429688, |
| "logps/rejected": -5038.3974609375, |
| "loss": -144.1405, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -147.80435180664062, |
| "rewards/margins": 1298.1739501953125, |
| "rewards/rejected": -1445.978271484375, |
| "step": 2125 |
| }, |
| { |
| "epoch": 1.8933333333333333, |
| "grad_norm": 21135.072265625, |
| "learning_rate": 3.0061359192637807e-06, |
| "logits/chosen": 0.28784361481666565, |
| "logits/rejected": 0.18038244545459747, |
| "logps/chosen": -1571.0714111328125, |
| "logps/rejected": -6410.5361328125, |
| "loss": -117.5507, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -377.8763427734375, |
| "rewards/margins": 1479.3931884765625, |
| "rewards/rejected": -1857.269287109375, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.8977777777777778, |
| "grad_norm": 12763.6298828125, |
| "learning_rate": 2.984785276497507e-06, |
| "logits/chosen": 0.31823939085006714, |
| "logits/rejected": 0.16759738326072693, |
| "logps/chosen": -2247.3828125, |
| "logps/rejected": -6463.50048828125, |
| "loss": -4.7558, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -583.8851928710938, |
| "rewards/margins": 1291.077392578125, |
| "rewards/rejected": -1874.9625244140625, |
| "step": 2135 |
| }, |
| { |
| "epoch": 1.9022222222222223, |
| "grad_norm": 3317.30126953125, |
| "learning_rate": 2.963478416224903e-06, |
| "logits/chosen": 0.2757074236869812, |
| "logits/rejected": 0.13910232484340668, |
| "logps/chosen": -2493.97119140625, |
| "logps/rejected": -5704.9130859375, |
| "loss": 107.7401, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -649.1580810546875, |
| "rewards/margins": 990.18603515625, |
| "rewards/rejected": -1639.3443603515625, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.9066666666666667, |
| "grad_norm": 14934.9130859375, |
| "learning_rate": 2.9422158013581658e-06, |
| "logits/chosen": 0.274467796087265, |
| "logits/rejected": 0.10115502774715424, |
| "logps/chosen": -1841.6937255859375, |
| "logps/rejected": -7628.2978515625, |
| "loss": -182.1846, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -456.56787109375, |
| "rewards/margins": 1763.705810546875, |
| "rewards/rejected": -2220.27392578125, |
| "step": 2145 |
| }, |
| { |
| "epoch": 1.911111111111111, |
| "grad_norm": 16433.63671875, |
| "learning_rate": 2.920997893848219e-06, |
| "logits/chosen": 0.24389319121837616, |
| "logits/rejected": 0.13923580944538116, |
| "logps/chosen": -1711.043212890625, |
| "logps/rejected": -4636.3291015625, |
| "loss": 64.9863, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -429.0899353027344, |
| "rewards/margins": 895.8580322265625, |
| "rewards/rejected": -1324.947998046875, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.9155555555555557, |
| "grad_norm": 14178.9462890625, |
| "learning_rate": 2.899825154674674e-06, |
| "logits/chosen": 0.3027743399143219, |
| "logits/rejected": 0.13782671093940735, |
| "logps/chosen": -755.7228393554688, |
| "logps/rejected": -4505.7099609375, |
| "loss": -104.4201, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -140.9221954345703, |
| "rewards/margins": 1146.5015869140625, |
| "rewards/rejected": -1287.4237060546875, |
| "step": 2155 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 4904.7080078125, |
| "learning_rate": 2.878698043835817e-06, |
| "logits/chosen": 0.26686787605285645, |
| "logits/rejected": 0.13966426253318787, |
| "logps/chosen": -678.2152099609375, |
| "logps/rejected": -4504.28564453125, |
| "loss": -145.052, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -106.86373138427734, |
| "rewards/margins": 1175.1090087890625, |
| "rewards/rejected": -1281.9727783203125, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.9244444444444444, |
| "grad_norm": 454.3016052246094, |
| "learning_rate": 2.8576170203386144e-06, |
| "logits/chosen": 0.2569185197353363, |
| "logits/rejected": 0.14983759820461273, |
| "logps/chosen": -849.8656005859375, |
| "logps/rejected": -5053.3447265625, |
| "loss": -139.7136, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -165.0109405517578, |
| "rewards/margins": 1282.168701171875, |
| "rewards/rejected": -1447.1796875, |
| "step": 2165 |
| }, |
| { |
| "epoch": 1.9288888888888889, |
| "grad_norm": 15577.6640625, |
| "learning_rate": 2.8365825421887393e-06, |
| "logits/chosen": 0.29794877767562866, |
| "logits/rejected": 0.13814763724803925, |
| "logps/chosen": -963.2847900390625, |
| "logps/rejected": -5412.55029296875, |
| "loss": -149.6424, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -193.61622619628906, |
| "rewards/margins": 1364.0364990234375, |
| "rewards/rejected": -1557.6527099609375, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.9333333333333333, |
| "grad_norm": 58780.875, |
| "learning_rate": 2.8155950663806234e-06, |
| "logits/chosen": 0.3018116354942322, |
| "logits/rejected": 0.21214398741722107, |
| "logps/chosen": -1285.2984619140625, |
| "logps/rejected": -6422.83349609375, |
| "loss": -167.4751, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -295.5580749511719, |
| "rewards/margins": 1564.8118896484375, |
| "rewards/rejected": -1860.369873046875, |
| "step": 2175 |
| }, |
| { |
| "epoch": 1.9377777777777778, |
| "grad_norm": 587.9622192382812, |
| "learning_rate": 2.7946550488875276e-06, |
| "logits/chosen": 0.26838162541389465, |
| "logits/rejected": 0.08940029889345169, |
| "logps/chosen": -1345.5733642578125, |
| "logps/rejected": -5254.9560546875, |
| "loss": -123.3595, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -306.68939208984375, |
| "rewards/margins": 1202.3447265625, |
| "rewards/rejected": -1509.0341796875, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.942222222222222, |
| "grad_norm": 1813.9815673828125, |
| "learning_rate": 2.7737629446516325e-06, |
| "logits/chosen": 0.253492146730423, |
| "logits/rejected": 0.16496218740940094, |
| "logps/chosen": -796.4617309570312, |
| "logps/rejected": -5880.72412109375, |
| "loss": -183.0133, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -150.0802459716797, |
| "rewards/margins": 1550.702880859375, |
| "rewards/rejected": -1700.783203125, |
| "step": 2185 |
| }, |
| { |
| "epoch": 1.9466666666666668, |
| "grad_norm": 3586.96142578125, |
| "learning_rate": 2.7529192075741586e-06, |
| "logits/chosen": 0.2590496838092804, |
| "logits/rejected": 0.16046349704265594, |
| "logps/chosen": -972.6398315429688, |
| "logps/rejected": -4850.56591796875, |
| "loss": -95.2198, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -199.7455596923828, |
| "rewards/margins": 1186.538818359375, |
| "rewards/rejected": -1386.2843017578125, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.951111111111111, |
| "grad_norm": 3619.4375, |
| "learning_rate": 2.732124290505501e-06, |
| "logits/chosen": 0.2826315760612488, |
| "logits/rejected": 0.10563318431377411, |
| "logps/chosen": -1130.849853515625, |
| "logps/rejected": -5552.994140625, |
| "loss": -143.2646, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -243.33334350585938, |
| "rewards/margins": 1356.5181884765625, |
| "rewards/rejected": -1599.8515625, |
| "step": 2195 |
| }, |
| { |
| "epoch": 1.9555555555555557, |
| "grad_norm": 8393.4658203125, |
| "learning_rate": 2.7113786452353976e-06, |
| "logits/chosen": 0.2891395688056946, |
| "logits/rejected": 0.1238345354795456, |
| "logps/chosen": -747.4689331054688, |
| "logps/rejected": -4269.8525390625, |
| "loss": -131.3651, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -130.8421173095703, |
| "rewards/margins": 1081.1690673828125, |
| "rewards/rejected": -1212.01123046875, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 14323.326171875, |
| "learning_rate": 2.6906827224831024e-06, |
| "logits/chosen": 0.28969186544418335, |
| "logits/rejected": 0.15531742572784424, |
| "logps/chosen": -1660.210693359375, |
| "logps/rejected": -5445.890625, |
| "loss": -88.6212, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -405.94293212890625, |
| "rewards/margins": 1156.8634033203125, |
| "rewards/rejected": -1562.806396484375, |
| "step": 2205 |
| }, |
| { |
| "epoch": 1.9644444444444444, |
| "grad_norm": 2783.807861328125, |
| "learning_rate": 2.670036971887603e-06, |
| "logits/chosen": 0.26972195506095886, |
| "logits/rejected": 0.1590013951063156, |
| "logps/chosen": -809.4324951171875, |
| "logps/rejected": -6129.16015625, |
| "loss": -195.0255, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -145.40342712402344, |
| "rewards/margins": 1622.9312744140625, |
| "rewards/rejected": -1768.3343505859375, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.968888888888889, |
| "grad_norm": 4170.75146484375, |
| "learning_rate": 2.6494418419978485e-06, |
| "logits/chosen": 0.2808416783809662, |
| "logits/rejected": 0.11938568204641342, |
| "logps/chosen": -971.3878784179688, |
| "logps/rejected": -5779.38671875, |
| "loss": -182.4402, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -188.3170166015625, |
| "rewards/margins": 1474.6036376953125, |
| "rewards/rejected": -1662.920654296875, |
| "step": 2215 |
| }, |
| { |
| "epoch": 1.9733333333333334, |
| "grad_norm": 439.20733642578125, |
| "learning_rate": 2.6288977802630024e-06, |
| "logits/chosen": 0.2903082072734833, |
| "logits/rejected": 0.14760538935661316, |
| "logps/chosen": -889.22412109375, |
| "logps/rejected": -5612.66015625, |
| "loss": -105.0156, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -166.32510375976562, |
| "rewards/margins": 1447.6334228515625, |
| "rewards/rejected": -1613.9583740234375, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.9777777777777779, |
| "grad_norm": 2823.536865234375, |
| "learning_rate": 2.608405233022724e-06, |
| "logits/chosen": 0.28835329413414, |
| "logits/rejected": 0.14759060740470886, |
| "logps/chosen": -690.9716796875, |
| "logps/rejected": -4797.54443359375, |
| "loss": -153.7535, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -110.11827087402344, |
| "rewards/margins": 1261.5618896484375, |
| "rewards/rejected": -1371.6800537109375, |
| "step": 2225 |
| }, |
| { |
| "epoch": 1.982222222222222, |
| "grad_norm": 14636.8349609375, |
| "learning_rate": 2.5879646454974705e-06, |
| "logits/chosen": 0.2774786055088043, |
| "logits/rejected": 0.19184234738349915, |
| "logps/chosen": -1131.55078125, |
| "logps/rejected": -6097.72998046875, |
| "loss": -150.7048, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -244.20458984375, |
| "rewards/margins": 1515.9283447265625, |
| "rewards/rejected": -1760.1328125, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.9866666666666668, |
| "grad_norm": 2957.12890625, |
| "learning_rate": 2.5675764617788233e-06, |
| "logits/chosen": 0.28359976410865784, |
| "logits/rejected": 0.11363682895898819, |
| "logps/chosen": -949.8064575195312, |
| "logps/rejected": -3837.858154296875, |
| "loss": -35.1198, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -198.0341796875, |
| "rewards/margins": 892.0084838867188, |
| "rewards/rejected": -1090.042724609375, |
| "step": 2235 |
| }, |
| { |
| "epoch": 1.991111111111111, |
| "grad_norm": 1262.95068359375, |
| "learning_rate": 2.5472411248198415e-06, |
| "logits/chosen": 0.2636898458003998, |
| "logits/rejected": 0.11793669313192368, |
| "logps/chosen": -668.3490600585938, |
| "logps/rejected": -4936.6396484375, |
| "loss": -142.7667, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -104.84495544433594, |
| "rewards/margins": 1309.4827880859375, |
| "rewards/rejected": -1414.3277587890625, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.9955555555555555, |
| "grad_norm": 26299.51953125, |
| "learning_rate": 2.526959076425434e-06, |
| "logits/chosen": 0.3218989372253418, |
| "logits/rejected": 0.14606359601020813, |
| "logps/chosen": -1382.350830078125, |
| "logps/rejected": -6349.71630859375, |
| "loss": -141.7378, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -316.47515869140625, |
| "rewards/margins": 1523.3597412109375, |
| "rewards/rejected": -1839.834716796875, |
| "step": 2245 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 366.8431396484375, |
| "learning_rate": 2.506730757242768e-06, |
| "logits/chosen": 0.2511804699897766, |
| "logits/rejected": 0.12638862431049347, |
| "logps/chosen": -1067.3612060546875, |
| "logps/rejected": -5650.3388671875, |
| "loss": -146.8461, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -231.1248016357422, |
| "rewards/margins": 1394.5806884765625, |
| "rewards/rejected": -1625.7054443359375, |
| "step": 2250 |
| }, |
| { |
| "epoch": 2.0044444444444443, |
| "grad_norm": 3183.072021484375, |
| "learning_rate": 2.4865566067516896e-06, |
| "logits/chosen": 0.2529202401638031, |
| "logits/rejected": 0.11537809669971466, |
| "logps/chosen": -869.9254150390625, |
| "logps/rejected": -5691.705078125, |
| "loss": -177.6594, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -165.04727172851562, |
| "rewards/margins": 1479.71484375, |
| "rewards/rejected": -1644.7620849609375, |
| "step": 2255 |
| }, |
| { |
| "epoch": 2.008888888888889, |
| "grad_norm": 3311.302734375, |
| "learning_rate": 2.4664370632551764e-06, |
| "logits/chosen": 0.2762852907180786, |
| "logits/rejected": 0.15143956243991852, |
| "logps/chosen": -642.7841186523438, |
| "logps/rejected": -4601.5341796875, |
| "loss": -147.3249, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -102.81626892089844, |
| "rewards/margins": 1210.4541015625, |
| "rewards/rejected": -1313.2705078125, |
| "step": 2260 |
| }, |
| { |
| "epoch": 2.013333333333333, |
| "grad_norm": 621.3425903320312, |
| "learning_rate": 2.4463725638698182e-06, |
| "logits/chosen": 0.22690370678901672, |
| "logits/rejected": 0.10626769065856934, |
| "logps/chosen": -741.8753662109375, |
| "logps/rejected": -5965.8818359375, |
| "loss": -196.9432, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -124.20670318603516, |
| "rewards/margins": 1594.4808349609375, |
| "rewards/rejected": -1718.6875, |
| "step": 2265 |
| }, |
| { |
| "epoch": 2.017777777777778, |
| "grad_norm": 842.56298828125, |
| "learning_rate": 2.426363544516317e-06, |
| "logits/chosen": 0.2934921979904175, |
| "logits/rejected": 0.13362529873847961, |
| "logps/chosen": -939.9967651367188, |
| "logps/rejected": -7108.2431640625, |
| "loss": -219.792, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -183.85302734375, |
| "rewards/margins": 1877.675048828125, |
| "rewards/rejected": -2061.5283203125, |
| "step": 2270 |
| }, |
| { |
| "epoch": 2.022222222222222, |
| "grad_norm": 836.9631958007812, |
| "learning_rate": 2.406410439910017e-06, |
| "logits/chosen": 0.23737819492816925, |
| "logits/rejected": 0.15523849427700043, |
| "logps/chosen": -1270.0848388671875, |
| "logps/rejected": -5911.75341796875, |
| "loss": -163.8763, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -287.78839111328125, |
| "rewards/margins": 1415.1302490234375, |
| "rewards/rejected": -1702.918701171875, |
| "step": 2275 |
| }, |
| { |
| "epoch": 2.026666666666667, |
| "grad_norm": 15218.943359375, |
| "learning_rate": 2.3865136835514618e-06, |
| "logits/chosen": 0.24523913860321045, |
| "logits/rejected": 0.11696295440196991, |
| "logps/chosen": -1308.589111328125, |
| "logps/rejected": -5270.3623046875, |
| "loss": -85.0526, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -303.65472412109375, |
| "rewards/margins": 1212.801513671875, |
| "rewards/rejected": -1516.4560546875, |
| "step": 2280 |
| }, |
| { |
| "epoch": 2.031111111111111, |
| "grad_norm": 1257.7388916015625, |
| "learning_rate": 2.366673707716973e-06, |
| "logits/chosen": 0.28686919808387756, |
| "logits/rejected": 0.10142295062541962, |
| "logps/chosen": -1574.061767578125, |
| "logps/rejected": -5060.2666015625, |
| "loss": -8.5554, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -379.158203125, |
| "rewards/margins": 1073.184814453125, |
| "rewards/rejected": -1452.343017578125, |
| "step": 2285 |
| }, |
| { |
| "epoch": 2.0355555555555553, |
| "grad_norm": 6688.791015625, |
| "learning_rate": 2.3468909434492606e-06, |
| "logits/chosen": 0.27058038115501404, |
| "logits/rejected": 0.1307818591594696, |
| "logps/chosen": -898.3326416015625, |
| "logps/rejected": -6339.33740234375, |
| "loss": -201.7951, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -173.98294067382812, |
| "rewards/margins": 1659.8505859375, |
| "rewards/rejected": -1833.8333740234375, |
| "step": 2290 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 293.4644775390625, |
| "learning_rate": 2.327165820548059e-06, |
| "logits/chosen": 0.2272816151380539, |
| "logits/rejected": 0.11062890291213989, |
| "logps/chosen": -724.8757934570312, |
| "logps/rejected": -4666.20849609375, |
| "loss": -149.7807, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -126.92144775390625, |
| "rewards/margins": 1207.5185546875, |
| "rewards/rejected": -1334.4400634765625, |
| "step": 2295 |
| }, |
| { |
| "epoch": 2.0444444444444443, |
| "grad_norm": 795.9879760742188, |
| "learning_rate": 2.307498767560787e-06, |
| "logits/chosen": 0.298098623752594, |
| "logits/rejected": 0.1521844118833542, |
| "logps/chosen": -683.4656372070312, |
| "logps/rejected": -5807.5537109375, |
| "loss": -193.3084, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -109.24932861328125, |
| "rewards/margins": 1563.719970703125, |
| "rewards/rejected": -1672.9693603515625, |
| "step": 2300 |
| }, |
| { |
| "epoch": 2.048888888888889, |
| "grad_norm": 1204.4793701171875, |
| "learning_rate": 2.287890211773238e-06, |
| "logits/chosen": 0.27916765213012695, |
| "logits/rejected": 0.1887357085943222, |
| "logps/chosen": -929.1427001953125, |
| "logps/rejected": -6514.75537109375, |
| "loss": -178.8006, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -192.68038940429688, |
| "rewards/margins": 1696.822265625, |
| "rewards/rejected": -1889.502685546875, |
| "step": 2305 |
| }, |
| { |
| "epoch": 2.0533333333333332, |
| "grad_norm": 785.901611328125, |
| "learning_rate": 2.268340579200299e-06, |
| "logits/chosen": 0.2767581343650818, |
| "logits/rejected": 0.09982942789793015, |
| "logps/chosen": -1018.1090087890625, |
| "logps/rejected": -4958.3857421875, |
| "loss": -150.214, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -216.730712890625, |
| "rewards/margins": 1208.173583984375, |
| "rewards/rejected": -1424.904296875, |
| "step": 2310 |
| }, |
| { |
| "epoch": 2.057777777777778, |
| "grad_norm": 23179.99609375, |
| "learning_rate": 2.2488502945766893e-06, |
| "logits/chosen": 0.2563682198524475, |
| "logits/rejected": 0.13451406359672546, |
| "logps/chosen": -1650.1959228515625, |
| "logps/rejected": -6506.95068359375, |
| "loss": -50.9706, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -400.352783203125, |
| "rewards/margins": 1482.503173828125, |
| "rewards/rejected": -1882.8560791015625, |
| "step": 2315 |
| }, |
| { |
| "epoch": 2.062222222222222, |
| "grad_norm": 268.4923095703125, |
| "learning_rate": 2.2294197813477403e-06, |
| "logits/chosen": 0.2721272110939026, |
| "logits/rejected": 0.14246222376823425, |
| "logps/chosen": -1548.1038818359375, |
| "logps/rejected": -5455.2294921875, |
| "loss": -27.1433, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -370.7905578613281, |
| "rewards/margins": 1198.22021484375, |
| "rewards/rejected": -1569.0107421875, |
| "step": 2320 |
| }, |
| { |
| "epoch": 2.066666666666667, |
| "grad_norm": 128.77793884277344, |
| "learning_rate": 2.210049461660189e-06, |
| "logits/chosen": 0.24591055512428284, |
| "logits/rejected": 0.12461193650960922, |
| "logps/chosen": -1497.133544921875, |
| "logps/rejected": -5740.95654296875, |
| "loss": -93.5223, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -357.1094055175781, |
| "rewards/margins": 1303.1572265625, |
| "rewards/rejected": -1660.266845703125, |
| "step": 2325 |
| }, |
| { |
| "epoch": 2.071111111111111, |
| "grad_norm": 503.60260009765625, |
| "learning_rate": 2.1907397563530127e-06, |
| "logits/chosen": 0.2611016631126404, |
| "logits/rejected": 0.1542952060699463, |
| "logps/chosen": -719.0428466796875, |
| "logps/rejected": -4828.14697265625, |
| "loss": -153.646, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -124.05521392822266, |
| "rewards/margins": 1255.4459228515625, |
| "rewards/rejected": -1379.5010986328125, |
| "step": 2330 |
| }, |
| { |
| "epoch": 2.0755555555555554, |
| "grad_norm": 2286.80224609375, |
| "learning_rate": 2.1714910849482777e-06, |
| "logits/chosen": 0.27613455057144165, |
| "logits/rejected": 0.10287618637084961, |
| "logps/chosen": -876.4581909179688, |
| "logps/rejected": -4919.50244140625, |
| "loss": -94.8136, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -169.60960388183594, |
| "rewards/margins": 1241.900634765625, |
| "rewards/rejected": -1411.5103759765625, |
| "step": 2335 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 2044.5191650390625, |
| "learning_rate": 2.152303865642035e-06, |
| "logits/chosen": 0.2598101496696472, |
| "logits/rejected": 0.14950606226921082, |
| "logps/chosen": -958.21240234375, |
| "logps/rejected": -4285.7109375, |
| "loss": -106.7893, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -195.62478637695312, |
| "rewards/margins": 1023.82421875, |
| "rewards/rejected": -1219.4488525390625, |
| "step": 2340 |
| }, |
| { |
| "epoch": 2.0844444444444443, |
| "grad_norm": 648.6554565429688, |
| "learning_rate": 2.1331785152952243e-06, |
| "logits/chosen": 0.31486600637435913, |
| "logits/rejected": 0.15721896290779114, |
| "logps/chosen": -1407.5550537109375, |
| "logps/rejected": -6065.3701171875, |
| "loss": -71.9897, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -328.1251525878906, |
| "rewards/margins": 1424.674072265625, |
| "rewards/rejected": -1752.7991943359375, |
| "step": 2345 |
| }, |
| { |
| "epoch": 2.088888888888889, |
| "grad_norm": 2410.511962890625, |
| "learning_rate": 2.1141154494246263e-06, |
| "logits/chosen": 0.24657824635505676, |
| "logits/rejected": 0.11266227811574936, |
| "logps/chosen": -826.2483520507812, |
| "logps/rejected": -4563.3427734375, |
| "loss": -141.5711, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -157.9169464111328, |
| "rewards/margins": 1143.7630615234375, |
| "rewards/rejected": -1301.679931640625, |
| "step": 2350 |
| }, |
| { |
| "epoch": 2.0933333333333333, |
| "grad_norm": 25792.09375, |
| "learning_rate": 2.0951150821938278e-06, |
| "logits/chosen": 0.29828113317489624, |
| "logits/rejected": 0.13297337293624878, |
| "logps/chosen": -985.1458740234375, |
| "logps/rejected": -5754.3720703125, |
| "loss": -152.3468, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -199.97625732421875, |
| "rewards/margins": 1458.9273681640625, |
| "rewards/rejected": -1658.9036865234375, |
| "step": 2355 |
| }, |
| { |
| "epoch": 2.097777777777778, |
| "grad_norm": 123596.65625, |
| "learning_rate": 2.0761778264042286e-06, |
| "logits/chosen": 0.2687097489833832, |
| "logits/rejected": 0.1304052472114563, |
| "logps/chosen": -2005.7015380859375, |
| "logps/rejected": -5281.50146484375, |
| "loss": -29.7957, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -502.0967712402344, |
| "rewards/margins": 1011.1641845703125, |
| "rewards/rejected": -1513.2607421875, |
| "step": 2360 |
| }, |
| { |
| "epoch": 2.102222222222222, |
| "grad_norm": 3398.30419921875, |
| "learning_rate": 2.0573040934860717e-06, |
| "logits/chosen": 0.26310405135154724, |
| "logits/rejected": 0.14153099060058594, |
| "logps/chosen": -1015.2615356445312, |
| "logps/rejected": -4906.306640625, |
| "loss": -100.0801, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -214.89389038085938, |
| "rewards/margins": 1187.948974609375, |
| "rewards/rejected": -1402.8427734375, |
| "step": 2365 |
| }, |
| { |
| "epoch": 2.1066666666666665, |
| "grad_norm": 955.353759765625, |
| "learning_rate": 2.038494293489502e-06, |
| "logits/chosen": 0.2678568959236145, |
| "logits/rejected": 0.15257170796394348, |
| "logps/chosen": -591.45361328125, |
| "logps/rejected": -5582.18359375, |
| "loss": -185.903, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -83.00418090820312, |
| "rewards/margins": 1523.2943115234375, |
| "rewards/rejected": -1606.2984619140625, |
| "step": 2370 |
| }, |
| { |
| "epoch": 2.111111111111111, |
| "grad_norm": 13179.9892578125, |
| "learning_rate": 2.0197488350756618e-06, |
| "logits/chosen": 0.2749830186367035, |
| "logits/rejected": 0.12992441654205322, |
| "logps/chosen": -1259.3759765625, |
| "logps/rejected": -4679.1806640625, |
| "loss": -17.5891, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -285.11517333984375, |
| "rewards/margins": 1053.487060546875, |
| "rewards/rejected": -1338.602294921875, |
| "step": 2375 |
| }, |
| { |
| "epoch": 2.1155555555555554, |
| "grad_norm": 3182.348876953125, |
| "learning_rate": 2.0010681255078086e-06, |
| "logits/chosen": 0.2857711911201477, |
| "logits/rejected": 0.1397833526134491, |
| "logps/chosen": -621.2139892578125, |
| "logps/rejected": -5319.80419921875, |
| "loss": -176.5545, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -89.78614807128906, |
| "rewards/margins": 1435.8160400390625, |
| "rewards/rejected": -1525.602294921875, |
| "step": 2380 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 28968.890625, |
| "learning_rate": 1.98245257064247e-06, |
| "logits/chosen": 0.2485010176897049, |
| "logits/rejected": 0.10595102608203888, |
| "logps/chosen": -1199.7591552734375, |
| "logps/rejected": -5753.07421875, |
| "loss": -127.3482, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -258.9444885253906, |
| "rewards/margins": 1396.914306640625, |
| "rewards/rejected": -1655.8587646484375, |
| "step": 2385 |
| }, |
| { |
| "epoch": 2.1244444444444444, |
| "grad_norm": 6602.171875, |
| "learning_rate": 1.9639025749206238e-06, |
| "logits/chosen": 0.28412342071533203, |
| "logits/rejected": 0.1559821218252182, |
| "logps/chosen": -727.5496826171875, |
| "logps/rejected": -5962.5419921875, |
| "loss": -188.2974, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -126.3979721069336, |
| "rewards/margins": 1595.5186767578125, |
| "rewards/rejected": -1721.9166259765625, |
| "step": 2390 |
| }, |
| { |
| "epoch": 2.128888888888889, |
| "grad_norm": 227.03285217285156, |
| "learning_rate": 1.945418541358911e-06, |
| "logits/chosen": 0.3243732452392578, |
| "logits/rejected": 0.14940175414085388, |
| "logps/chosen": -1182.3828125, |
| "logps/rejected": -5785.05322265625, |
| "loss": -147.8244, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -269.4920959472656, |
| "rewards/margins": 1404.4029541015625, |
| "rewards/rejected": -1673.895263671875, |
| "step": 2395 |
| }, |
| { |
| "epoch": 2.1333333333333333, |
| "grad_norm": 1853.3699951171875, |
| "learning_rate": 1.927000871540882e-06, |
| "logits/chosen": 0.29489001631736755, |
| "logits/rejected": 0.17083010077476501, |
| "logps/chosen": -1334.423095703125, |
| "logps/rejected": -6484.08740234375, |
| "loss": -120.2257, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -304.0583801269531, |
| "rewards/margins": 1575.259033203125, |
| "rewards/rejected": -1879.3173828125, |
| "step": 2400 |
| }, |
| { |
| "epoch": 2.137777777777778, |
| "grad_norm": 16868.67578125, |
| "learning_rate": 1.9086499656082685e-06, |
| "logits/chosen": 0.2887013256549835, |
| "logits/rejected": 0.16366983950138092, |
| "logps/chosen": -964.7067260742188, |
| "logps/rejected": -7210.1044921875, |
| "loss": -199.7976, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -200.40370178222656, |
| "rewards/margins": 1897.8489990234375, |
| "rewards/rejected": -2098.252685546875, |
| "step": 2405 |
| }, |
| { |
| "epoch": 2.1422222222222222, |
| "grad_norm": 4798.34521484375, |
| "learning_rate": 1.8903662222522962e-06, |
| "logits/chosen": 0.28090929985046387, |
| "logits/rejected": 0.15366259217262268, |
| "logps/chosen": -1051.0230712890625, |
| "logps/rejected": -5791.501953125, |
| "loss": -146.6108, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -222.67373657226562, |
| "rewards/margins": 1445.8172607421875, |
| "rewards/rejected": -1668.490966796875, |
| "step": 2410 |
| }, |
| { |
| "epoch": 2.1466666666666665, |
| "grad_norm": 8078.548828125, |
| "learning_rate": 1.872150038705015e-06, |
| "logits/chosen": 0.27881601452827454, |
| "logits/rejected": 0.13041305541992188, |
| "logps/chosen": -1052.749755859375, |
| "logps/rejected": -5167.72900390625, |
| "loss": -80.2512, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -226.31826782226562, |
| "rewards/margins": 1255.333251953125, |
| "rewards/rejected": -1481.651611328125, |
| "step": 2415 |
| }, |
| { |
| "epoch": 2.151111111111111, |
| "grad_norm": 519.2947998046875, |
| "learning_rate": 1.8540018107306734e-06, |
| "logits/chosen": 0.2691516578197479, |
| "logits/rejected": 0.12493614852428436, |
| "logps/chosen": -691.4432983398438, |
| "logps/rejected": -4221.6240234375, |
| "loss": -118.5242, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -120.33553314208984, |
| "rewards/margins": 1081.915283203125, |
| "rewards/rejected": -1202.250732421875, |
| "step": 2420 |
| }, |
| { |
| "epoch": 2.1555555555555554, |
| "grad_norm": 3418.582763671875, |
| "learning_rate": 1.835921932617119e-06, |
| "logits/chosen": 0.2900315523147583, |
| "logits/rejected": 0.14485278725624084, |
| "logps/chosen": -988.5070190429688, |
| "logps/rejected": -5698.2431640625, |
| "loss": -152.5378, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -199.50582885742188, |
| "rewards/margins": 1441.86328125, |
| "rewards/rejected": -1641.369140625, |
| "step": 2425 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 19606.84765625, |
| "learning_rate": 1.8179107971672332e-06, |
| "logits/chosen": 0.27348119020462036, |
| "logits/rejected": 0.15357454121112823, |
| "logps/chosen": -1183.80029296875, |
| "logps/rejected": -5311.82568359375, |
| "loss": -81.1999, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -264.7737731933594, |
| "rewards/margins": 1263.395751953125, |
| "rewards/rejected": -1528.169677734375, |
| "step": 2430 |
| }, |
| { |
| "epoch": 2.1644444444444444, |
| "grad_norm": 20062.255859375, |
| "learning_rate": 1.7999687956903955e-06, |
| "logits/chosen": 0.3121498227119446, |
| "logits/rejected": 0.20644445717334747, |
| "logps/chosen": -1230.6981201171875, |
| "logps/rejected": -5334.83740234375, |
| "loss": -91.9467, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -277.8145751953125, |
| "rewards/margins": 1254.2783203125, |
| "rewards/rejected": -1532.093017578125, |
| "step": 2435 |
| }, |
| { |
| "epoch": 2.168888888888889, |
| "grad_norm": 4172.34375, |
| "learning_rate": 1.7820963179939831e-06, |
| "logits/chosen": 0.3299214541912079, |
| "logits/rejected": 0.16972143948078156, |
| "logps/chosen": -1183.5252685546875, |
| "logps/rejected": -4983.53515625, |
| "loss": -87.2905, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -266.27203369140625, |
| "rewards/margins": 1163.3853759765625, |
| "rewards/rejected": -1429.6573486328125, |
| "step": 2440 |
| }, |
| { |
| "epoch": 2.1733333333333333, |
| "grad_norm": 1083.02734375, |
| "learning_rate": 1.7642937523749038e-06, |
| "logits/chosen": 0.2996160686016083, |
| "logits/rejected": 0.1782444417476654, |
| "logps/chosen": -872.85546875, |
| "logps/rejected": -5863.09814453125, |
| "loss": -189.3406, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -165.60020446777344, |
| "rewards/margins": 1527.197998046875, |
| "rewards/rejected": -1692.798095703125, |
| "step": 2445 |
| }, |
| { |
| "epoch": 2.1777777777777776, |
| "grad_norm": 9887.4541015625, |
| "learning_rate": 1.7465614856111524e-06, |
| "logits/chosen": 0.2807261347770691, |
| "logits/rejected": 0.14497342705726624, |
| "logps/chosen": -1245.781494140625, |
| "logps/rejected": -4972.5302734375, |
| "loss": -22.9363, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -285.582275390625, |
| "rewards/margins": 1141.8092041015625, |
| "rewards/rejected": -1427.391357421875, |
| "step": 2450 |
| }, |
| { |
| "epoch": 2.1822222222222223, |
| "grad_norm": 23496.1953125, |
| "learning_rate": 1.7288999029534177e-06, |
| "logits/chosen": 0.2600800395011902, |
| "logits/rejected": 0.1541323959827423, |
| "logps/chosen": -1313.3170166015625, |
| "logps/rejected": -5457.80126953125, |
| "loss": -102.3604, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -300.3144226074219, |
| "rewards/margins": 1266.052001953125, |
| "rewards/rejected": -1566.366455078125, |
| "step": 2455 |
| }, |
| { |
| "epoch": 2.1866666666666665, |
| "grad_norm": 1345.37451171875, |
| "learning_rate": 1.7113093881167065e-06, |
| "logits/chosen": 0.28410571813583374, |
| "logits/rejected": 0.1551993191242218, |
| "logps/chosen": -718.8294067382812, |
| "logps/rejected": -5112.97998046875, |
| "loss": -167.883, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -117.19173431396484, |
| "rewards/margins": 1350.6492919921875, |
| "rewards/rejected": -1467.841064453125, |
| "step": 2460 |
| }, |
| { |
| "epoch": 2.1911111111111112, |
| "grad_norm": 1490.6287841796875, |
| "learning_rate": 1.6937903232720076e-06, |
| "logits/chosen": 0.26901775598526, |
| "logits/rejected": 0.1793210208415985, |
| "logps/chosen": -761.0711669921875, |
| "logps/rejected": -4391.287109375, |
| "loss": -120.9907, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -134.74447631835938, |
| "rewards/margins": 1113.41650390625, |
| "rewards/rejected": -1248.160888671875, |
| "step": 2465 |
| }, |
| { |
| "epoch": 2.1955555555555555, |
| "grad_norm": 7482.61767578125, |
| "learning_rate": 1.6763430890379906e-06, |
| "logits/chosen": 0.3026997745037079, |
| "logits/rejected": 0.18270622193813324, |
| "logps/chosen": -1114.9698486328125, |
| "logps/rejected": -7292.0234375, |
| "loss": -144.5522, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -238.2162322998047, |
| "rewards/margins": 1879.3472900390625, |
| "rewards/rejected": -2117.563232421875, |
| "step": 2470 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 323.86334228515625, |
| "learning_rate": 1.6589680644727347e-06, |
| "logits/chosen": 0.28739094734191895, |
| "logits/rejected": 0.11439162492752075, |
| "logps/chosen": -1026.1435546875, |
| "logps/rejected": -5118.4853515625, |
| "loss": -86.5258, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -213.24893188476562, |
| "rewards/margins": 1255.3507080078125, |
| "rewards/rejected": -1468.599365234375, |
| "step": 2475 |
| }, |
| { |
| "epoch": 2.2044444444444444, |
| "grad_norm": 593.9039916992188, |
| "learning_rate": 1.6416656270654935e-06, |
| "logits/chosen": 0.284396231174469, |
| "logits/rejected": 0.11476149410009384, |
| "logps/chosen": -842.9694213867188, |
| "logps/rejected": -4736.8408203125, |
| "loss": -120.9049, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -159.19554138183594, |
| "rewards/margins": 1193.8314208984375, |
| "rewards/rejected": -1353.0269775390625, |
| "step": 2480 |
| }, |
| { |
| "epoch": 2.2088888888888887, |
| "grad_norm": 2584.263427734375, |
| "learning_rate": 1.6244361527284953e-06, |
| "logits/chosen": 0.24597282707691193, |
| "logits/rejected": 0.146062970161438, |
| "logps/chosen": -920.9533081054688, |
| "logps/rejected": -4820.09716796875, |
| "loss": -100.4682, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -184.3901824951172, |
| "rewards/margins": 1191.036376953125, |
| "rewards/rejected": -1375.426513671875, |
| "step": 2485 |
| }, |
| { |
| "epoch": 2.2133333333333334, |
| "grad_norm": 3524.037109375, |
| "learning_rate": 1.607280015788774e-06, |
| "logits/chosen": 0.29631665349006653, |
| "logits/rejected": 0.2110665738582611, |
| "logps/chosen": -910.0657348632812, |
| "logps/rejected": -6517.1923828125, |
| "loss": -212.3231, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -173.08511352539062, |
| "rewards/margins": 1711.4779052734375, |
| "rewards/rejected": -1884.5631103515625, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.2177777777777776, |
| "grad_norm": 864.408447265625, |
| "learning_rate": 1.5901975889800387e-06, |
| "logits/chosen": 0.28587955236434937, |
| "logits/rejected": 0.11620024591684341, |
| "logps/chosen": -993.3572387695312, |
| "logps/rejected": -3700.596435546875, |
| "loss": 9.6764, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -200.581298828125, |
| "rewards/margins": 839.7741088867188, |
| "rewards/rejected": -1040.35546875, |
| "step": 2495 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "grad_norm": 5089.59765625, |
| "learning_rate": 1.573189243434573e-06, |
| "logits/chosen": 0.283964067697525, |
| "logits/rejected": 0.14328907430171967, |
| "logps/chosen": -789.642578125, |
| "logps/rejected": -4073.38818359375, |
| "loss": -123.2755, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -142.17788696289062, |
| "rewards/margins": 1010.5198364257812, |
| "rewards/rejected": -1152.6978759765625, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.2222222222222223, |
| "eval_logits/chosen": 0.2799828350543976, |
| "eval_logits/rejected": 0.15025141835212708, |
| "eval_logps/chosen": -949.5983276367188, |
| "eval_logps/rejected": -5488.9052734375, |
| "eval_loss": -138.4701385498047, |
| "eval_rewards/accuracies": 0.7982500195503235, |
| "eval_rewards/chosen": -191.87351989746094, |
| "eval_rewards/margins": 1387.8035888671875, |
| "eval_rewards/rejected": -1579.6771240234375, |
| "eval_runtime": 2194.2374, |
| "eval_samples_per_second": 1.823, |
| "eval_steps_per_second": 0.911, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.2266666666666666, |
| "grad_norm": 13272.6533203125, |
| "learning_rate": 1.556255348675174e-06, |
| "logits/chosen": 0.27020254731178284, |
| "logits/rejected": 0.17103810608386993, |
| "logps/chosen": -1132.460205078125, |
| "logps/rejected": -6499.81103515625, |
| "loss": -192.538, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -242.13705444335938, |
| "rewards/margins": 1637.689453125, |
| "rewards/rejected": -1879.8265380859375, |
| "step": 2505 |
| }, |
| { |
| "epoch": 2.2311111111111113, |
| "grad_norm": 1124.8309326171875, |
| "learning_rate": 1.5393962726071238e-06, |
| "logits/chosen": 0.3239533007144928, |
| "logits/rejected": 0.23121027648448944, |
| "logps/chosen": -872.4189453125, |
| "logps/rejected": -6284.74267578125, |
| "loss": -197.2108, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -170.4202117919922, |
| "rewards/margins": 1646.008056640625, |
| "rewards/rejected": -1816.4283447265625, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.2355555555555555, |
| "grad_norm": 797.7090454101562, |
| "learning_rate": 1.522612381510195e-06, |
| "logits/chosen": 0.27814406156539917, |
| "logits/rejected": 0.10909430682659149, |
| "logps/chosen": -822.7091064453125, |
| "logps/rejected": -4839.541015625, |
| "loss": -137.9079, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -148.74365234375, |
| "rewards/margins": 1238.419921875, |
| "rewards/rejected": -1387.16357421875, |
| "step": 2515 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 8007.61669921875, |
| "learning_rate": 1.5059040400306934e-06, |
| "logits/chosen": 0.2747926414012909, |
| "logits/rejected": 0.1866052895784378, |
| "logps/chosen": -1195.6070556640625, |
| "logps/rejected": -5947.76025390625, |
| "loss": -145.4582, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -265.4050598144531, |
| "rewards/margins": 1448.935791015625, |
| "rewards/rejected": -1714.340576171875, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.2444444444444445, |
| "grad_norm": 3061.326904296875, |
| "learning_rate": 1.489271611173538e-06, |
| "logits/chosen": 0.2585465908050537, |
| "logits/rejected": 0.13917012512683868, |
| "logps/chosen": -1283.451416015625, |
| "logps/rejected": -5492.83740234375, |
| "loss": -112.0707, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -291.5242614746094, |
| "rewards/margins": 1290.2623291015625, |
| "rewards/rejected": -1581.7864990234375, |
| "step": 2525 |
| }, |
| { |
| "epoch": 2.2488888888888887, |
| "grad_norm": 1673.1954345703125, |
| "learning_rate": 1.4727154562943703e-06, |
| "logits/chosen": 0.23232996463775635, |
| "logits/rejected": 0.08956534415483475, |
| "logps/chosen": -1091.0972900390625, |
| "logps/rejected": -5140.0478515625, |
| "loss": -62.3756, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -234.17626953125, |
| "rewards/margins": 1241.8621826171875, |
| "rewards/rejected": -1476.03857421875, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.2533333333333334, |
| "grad_norm": 1304.2879638671875, |
| "learning_rate": 1.4562359350917054e-06, |
| "logits/chosen": 0.29232141375541687, |
| "logits/rejected": 0.13222011923789978, |
| "logps/chosen": -1121.95947265625, |
| "logps/rejected": -4915.46826171875, |
| "loss": -54.4957, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -246.84402465820312, |
| "rewards/margins": 1160.127685546875, |
| "rewards/rejected": -1406.9716796875, |
| "step": 2535 |
| }, |
| { |
| "epoch": 2.2577777777777777, |
| "grad_norm": 38543.875, |
| "learning_rate": 1.439833405599122e-06, |
| "logits/chosen": 0.3039282560348511, |
| "logits/rejected": 0.18165595829486847, |
| "logps/chosen": -1596.4161376953125, |
| "logps/rejected": -6373.388671875, |
| "loss": -51.0181, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -379.29132080078125, |
| "rewards/margins": 1462.46826171875, |
| "rewards/rejected": -1841.759521484375, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.2622222222222224, |
| "grad_norm": 5038.9033203125, |
| "learning_rate": 1.423508224177474e-06, |
| "logits/chosen": 0.296329140663147, |
| "logits/rejected": 0.20559100806713104, |
| "logps/chosen": -1279.4678955078125, |
| "logps/rejected": -5379.91552734375, |
| "loss": -109.9517, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -295.9058532714844, |
| "rewards/margins": 1254.336669921875, |
| "rewards/rejected": -1550.242431640625, |
| "step": 2545 |
| }, |
| { |
| "epoch": 2.2666666666666666, |
| "grad_norm": 1271.1041259765625, |
| "learning_rate": 1.4072607455071564e-06, |
| "logits/chosen": 0.3049846291542053, |
| "logits/rejected": 0.16324898600578308, |
| "logps/chosen": -874.4205322265625, |
| "logps/rejected": -5274.2666015625, |
| "loss": -160.3556, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -170.37686157226562, |
| "rewards/margins": 1343.336181640625, |
| "rewards/rejected": -1513.7130126953125, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.2711111111111113, |
| "grad_norm": 35980.61328125, |
| "learning_rate": 1.3910913225803946e-06, |
| "logits/chosen": 0.2843366265296936, |
| "logits/rejected": 0.16899266839027405, |
| "logps/chosen": -1173.7501220703125, |
| "logps/rejected": -5803.62109375, |
| "loss": -108.6437, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -256.6104736328125, |
| "rewards/margins": 1415.995849609375, |
| "rewards/rejected": -1672.6060791015625, |
| "step": 2555 |
| }, |
| { |
| "epoch": 2.2755555555555556, |
| "grad_norm": 2189.73388671875, |
| "learning_rate": 1.3750003066935785e-06, |
| "logits/chosen": 0.268254816532135, |
| "logits/rejected": 0.12566912174224854, |
| "logps/chosen": -738.77099609375, |
| "logps/rejected": -5212.462890625, |
| "loss": -148.1972, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -136.58822631835938, |
| "rewards/margins": 1365.056884765625, |
| "rewards/rejected": -1501.6451416015625, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 976.1454467773438, |
| "learning_rate": 1.35898804743963e-06, |
| "logits/chosen": 0.2227521389722824, |
| "logits/rejected": 0.13083919882774353, |
| "logps/chosen": -759.2448120117188, |
| "logps/rejected": -4507.90283203125, |
| "loss": -140.8261, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -136.7843780517578, |
| "rewards/margins": 1150.536865234375, |
| "rewards/rejected": -1287.3211669921875, |
| "step": 2565 |
| }, |
| { |
| "epoch": 2.2844444444444445, |
| "grad_norm": 8772.4306640625, |
| "learning_rate": 1.3430548927004045e-06, |
| "logits/chosen": 0.28518247604370117, |
| "logits/rejected": 0.16483518481254578, |
| "logps/chosen": -1061.1610107421875, |
| "logps/rejected": -5703.8583984375, |
| "loss": -139.7199, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -225.1636199951172, |
| "rewards/margins": 1419.538330078125, |
| "rewards/rejected": -1644.702392578125, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.2888888888888888, |
| "grad_norm": 34132.70703125, |
| "learning_rate": 1.3272011886391368e-06, |
| "logits/chosen": 0.33141201734542847, |
| "logits/rejected": 0.17792007327079773, |
| "logps/chosen": -922.2615356445312, |
| "logps/rejected": -5158.650390625, |
| "loss": -111.9785, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -183.021240234375, |
| "rewards/margins": 1301.0078125, |
| "rewards/rejected": -1484.029052734375, |
| "step": 2575 |
| }, |
| { |
| "epoch": 2.2933333333333334, |
| "grad_norm": 1763.7606201171875, |
| "learning_rate": 1.3114272796929179e-06, |
| "logits/chosen": 0.2872436046600342, |
| "logits/rejected": 0.15340930223464966, |
| "logps/chosen": -882.0314331054688, |
| "logps/rejected": -5464.994140625, |
| "loss": -151.7027, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -172.9815673828125, |
| "rewards/margins": 1398.1480712890625, |
| "rewards/rejected": -1571.1295166015625, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.2977777777777777, |
| "grad_norm": 1740.0184326171875, |
| "learning_rate": 1.295733508565213e-06, |
| "logits/chosen": 0.2984916567802429, |
| "logits/rejected": 0.14384624361991882, |
| "logps/chosen": -981.6642456054688, |
| "logps/rejected": -6239.05078125, |
| "loss": -130.3876, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -193.13771057128906, |
| "rewards/margins": 1608.839599609375, |
| "rewards/rejected": -1801.9775390625, |
| "step": 2585 |
| }, |
| { |
| "epoch": 2.3022222222222224, |
| "grad_norm": 758.87548828125, |
| "learning_rate": 1.2801202162184156e-06, |
| "logits/chosen": 0.33013027906417847, |
| "logits/rejected": 0.21612338721752167, |
| "logps/chosen": -1064.0704345703125, |
| "logps/rejected": -6316.1181640625, |
| "loss": -189.2421, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -224.8413848876953, |
| "rewards/margins": 1602.2481689453125, |
| "rewards/rejected": -1827.0894775390625, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.3066666666666666, |
| "grad_norm": 15071.369140625, |
| "learning_rate": 1.2645877418664394e-06, |
| "logits/chosen": 0.32451096177101135, |
| "logits/rejected": 0.19353589415550232, |
| "logps/chosen": -1068.0587158203125, |
| "logps/rejected": -6766.76318359375, |
| "loss": -187.4031, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": -220.43295288085938, |
| "rewards/margins": 1737.58203125, |
| "rewards/rejected": -1958.0152587890625, |
| "step": 2595 |
| }, |
| { |
| "epoch": 2.311111111111111, |
| "grad_norm": 12381.3037109375, |
| "learning_rate": 1.2491364229673487e-06, |
| "logits/chosen": 0.3370315432548523, |
| "logits/rejected": 0.1649162471294403, |
| "logps/chosen": -717.8569946289062, |
| "logps/rejected": -7134.41796875, |
| "loss": -239.0621, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -109.92384338378906, |
| "rewards/margins": 1961.102783203125, |
| "rewards/rejected": -2071.02685546875, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.3155555555555556, |
| "grad_norm": 455.9731140136719, |
| "learning_rate": 1.2337665952160266e-06, |
| "logits/chosen": 0.30162930488586426, |
| "logits/rejected": 0.17294518649578094, |
| "logps/chosen": -584.118896484375, |
| "logps/rejected": -5918.54931640625, |
| "loss": -201.4354, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -85.90629577636719, |
| "rewards/margins": 1623.7030029296875, |
| "rewards/rejected": -1709.609130859375, |
| "step": 2605 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 19863.31640625, |
| "learning_rate": 1.218478592536883e-06, |
| "logits/chosen": 0.2775426506996155, |
| "logits/rejected": 0.16599637269973755, |
| "logps/chosen": -700.1888427734375, |
| "logps/rejected": -6176.19384765625, |
| "loss": -179.3026, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -113.01383209228516, |
| "rewards/margins": 1669.012939453125, |
| "rewards/rejected": -1782.0267333984375, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.3244444444444445, |
| "grad_norm": 1728.3558349609375, |
| "learning_rate": 1.2032727470765982e-06, |
| "logits/chosen": 0.2696084976196289, |
| "logits/rejected": 0.12217340618371964, |
| "logps/chosen": -736.8240966796875, |
| "logps/rejected": -4522.2509765625, |
| "loss": -144.1016, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -126.67594146728516, |
| "rewards/margins": 1164.253662109375, |
| "rewards/rejected": -1290.9296875, |
| "step": 2615 |
| }, |
| { |
| "epoch": 2.328888888888889, |
| "grad_norm": 41318.84765625, |
| "learning_rate": 1.188149389196907e-06, |
| "logits/chosen": 0.2616775631904602, |
| "logits/rejected": 0.06497828662395477, |
| "logps/chosen": -1061.459716796875, |
| "logps/rejected": -4532.78369140625, |
| "loss": -94.431, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -220.02978515625, |
| "rewards/margins": 1071.6636962890625, |
| "rewards/rejected": -1291.693603515625, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.3333333333333335, |
| "grad_norm": 3416.536865234375, |
| "learning_rate": 1.1731088474674235e-06, |
| "logits/chosen": 0.2715473473072052, |
| "logits/rejected": 0.17155803740024567, |
| "logps/chosen": -1312.193115234375, |
| "logps/rejected": -5410.1376953125, |
| "loss": -107.787, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -304.3921203613281, |
| "rewards/margins": 1249.8294677734375, |
| "rewards/rejected": -1554.2218017578125, |
| "step": 2625 |
| }, |
| { |
| "epoch": 2.3377777777777777, |
| "grad_norm": 22813.966796875, |
| "learning_rate": 1.158151448658496e-06, |
| "logits/chosen": 0.2805376946926117, |
| "logits/rejected": 0.09144130349159241, |
| "logps/chosen": -864.3079833984375, |
| "logps/rejected": -3818.399658203125, |
| "loss": -65.4111, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -168.9315948486328, |
| "rewards/margins": 911.1812744140625, |
| "rewards/rejected": -1080.1129150390625, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.3422222222222224, |
| "grad_norm": 261.12493896484375, |
| "learning_rate": 1.1432775177341165e-06, |
| "logits/chosen": 0.2519761919975281, |
| "logits/rejected": 0.17796723544597626, |
| "logps/chosen": -1471.5924072265625, |
| "logps/rejected": -6248.021484375, |
| "loss": -98.2758, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -344.00408935546875, |
| "rewards/margins": 1459.767822265625, |
| "rewards/rejected": -1803.771728515625, |
| "step": 2635 |
| }, |
| { |
| "epoch": 2.3466666666666667, |
| "grad_norm": 2255.53662109375, |
| "learning_rate": 1.1284873778448551e-06, |
| "logits/chosen": 0.3226759731769562, |
| "logits/rejected": 0.18996241688728333, |
| "logps/chosen": -909.2160034179688, |
| "logps/rejected": -5448.169921875, |
| "loss": -143.997, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -182.4736328125, |
| "rewards/margins": 1387.028564453125, |
| "rewards/rejected": -1569.5023193359375, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.351111111111111, |
| "grad_norm": 8862.724609375, |
| "learning_rate": 1.11378135032084e-06, |
| "logits/chosen": 0.28544437885284424, |
| "logits/rejected": 0.20408236980438232, |
| "logps/chosen": -1109.788330078125, |
| "logps/rejected": -5678.60009765625, |
| "loss": -129.311, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -242.9453582763672, |
| "rewards/margins": 1391.257568359375, |
| "rewards/rejected": -1634.202880859375, |
| "step": 2645 |
| }, |
| { |
| "epoch": 2.3555555555555556, |
| "grad_norm": 1130.7384033203125, |
| "learning_rate": 1.099159754664778e-06, |
| "logits/chosen": 0.26497143507003784, |
| "logits/rejected": 0.15883751213550568, |
| "logps/chosen": -836.6282958984375, |
| "logps/rejected": -5560.3154296875, |
| "loss": -167.006, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -156.88101196289062, |
| "rewards/margins": 1444.151611328125, |
| "rewards/rejected": -1601.032470703125, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 539.4586791992188, |
| "learning_rate": 1.08462290854501e-06, |
| "logits/chosen": 0.24443700909614563, |
| "logits/rejected": 0.12075741589069366, |
| "logps/chosen": -725.8864135742188, |
| "logps/rejected": -5279.64599609375, |
| "loss": -173.5427, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -126.3614273071289, |
| "rewards/margins": 1393.6824951171875, |
| "rewards/rejected": -1520.0439453125, |
| "step": 2655 |
| }, |
| { |
| "epoch": 2.3644444444444446, |
| "grad_norm": 2828.679443359375, |
| "learning_rate": 1.0701711277886113e-06, |
| "logits/chosen": 0.29601508378982544, |
| "logits/rejected": 0.11907931417226791, |
| "logps/chosen": -884.7530517578125, |
| "logps/rejected": -5574.60009765625, |
| "loss": -174.7385, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -166.56764221191406, |
| "rewards/margins": 1436.950439453125, |
| "rewards/rejected": -1603.5179443359375, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.368888888888889, |
| "grad_norm": 3390.908935546875, |
| "learning_rate": 1.0558047263745297e-06, |
| "logits/chosen": 0.2810467779636383, |
| "logits/rejected": 0.19021853804588318, |
| "logps/chosen": -911.3983154296875, |
| "logps/rejected": -5323.45166015625, |
| "loss": -166.5435, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -181.58909606933594, |
| "rewards/margins": 1347.478271484375, |
| "rewards/rejected": -1529.0672607421875, |
| "step": 2665 |
| }, |
| { |
| "epoch": 2.3733333333333335, |
| "grad_norm": 34407.26171875, |
| "learning_rate": 1.041524016426767e-06, |
| "logits/chosen": 0.20854806900024414, |
| "logits/rejected": 0.05795394256711006, |
| "logps/chosen": -712.537109375, |
| "logps/rejected": -3602.32275390625, |
| "loss": -80.607, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -119.67732238769531, |
| "rewards/margins": 895.1790161132812, |
| "rewards/rejected": -1014.8563232421875, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.3777777777777778, |
| "grad_norm": 9518.83203125, |
| "learning_rate": 1.0273293082075914e-06, |
| "logits/chosen": 0.2578274607658386, |
| "logits/rejected": 0.17585155367851257, |
| "logps/chosen": -1027.3427734375, |
| "logps/rejected": -6738.07275390625, |
| "loss": -215.5135, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -211.1336669921875, |
| "rewards/margins": 1740.4661865234375, |
| "rewards/rejected": -1951.5999755859375, |
| "step": 2675 |
| }, |
| { |
| "epoch": 2.3822222222222225, |
| "grad_norm": 7502.2529296875, |
| "learning_rate": 1.0132209101107999e-06, |
| "logits/chosen": 0.3141968548297882, |
| "logits/rejected": 0.2107016146183014, |
| "logps/chosen": -1110.4361572265625, |
| "logps/rejected": -5156.166015625, |
| "loss": -122.0484, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -241.15963745117188, |
| "rewards/margins": 1236.4522705078125, |
| "rewards/rejected": -1477.61181640625, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.3866666666666667, |
| "grad_norm": 238.7607879638672, |
| "learning_rate": 9.991991286550207e-07, |
| "logits/chosen": 0.23604659736156464, |
| "logits/rejected": 0.11110663414001465, |
| "logps/chosen": -579.2400512695312, |
| "logps/rejected": -4965.0341796875, |
| "loss": -166.4363, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -79.92164611816406, |
| "rewards/margins": 1343.4114990234375, |
| "rewards/rejected": -1423.333251953125, |
| "step": 2685 |
| }, |
| { |
| "epoch": 2.391111111111111, |
| "grad_norm": 5041.09130859375, |
| "learning_rate": 9.852642684770497e-07, |
| "logits/chosen": 0.26261353492736816, |
| "logits/rejected": 0.1222461462020874, |
| "logps/chosen": -826.0100708007812, |
| "logps/rejected": -5429.02294921875, |
| "loss": -170.3606, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -153.9402618408203, |
| "rewards/margins": 1409.7860107421875, |
| "rewards/rejected": -1563.7265625, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.3955555555555557, |
| "grad_norm": 1266.4052734375, |
| "learning_rate": 9.71416632325235e-07, |
| "logits/chosen": 0.31110304594039917, |
| "logits/rejected": 0.13888294994831085, |
| "logps/chosen": -890.7296752929688, |
| "logps/rejected": -5671.9345703125, |
| "loss": -169.221, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -180.40179443359375, |
| "rewards/margins": 1461.09912109375, |
| "rewards/rejected": -1641.5009765625, |
| "step": 2695 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 1724.4227294921875, |
| "learning_rate": 9.576565210528977e-07, |
| "logits/chosen": 0.2952241897583008, |
| "logits/rejected": 0.2211633175611496, |
| "logps/chosen": -963.3016357421875, |
| "logps/rejected": -6138.5263671875, |
| "loss": -192.7385, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -198.35617065429688, |
| "rewards/margins": 1576.3699951171875, |
| "rewards/rejected": -1774.7261962890625, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.4044444444444446, |
| "grad_norm": 8043.2890625, |
| "learning_rate": 9.439842336117954e-07, |
| "logits/chosen": 0.25455421209335327, |
| "logits/rejected": 0.18454737961292267, |
| "logps/chosen": -896.5437622070312, |
| "logps/rejected": -6594.96337890625, |
| "loss": -205.7928, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -174.62222290039062, |
| "rewards/margins": 1733.2877197265625, |
| "rewards/rejected": -1907.9097900390625, |
| "step": 2705 |
| }, |
| { |
| "epoch": 2.408888888888889, |
| "grad_norm": 3391.88623046875, |
| "learning_rate": 9.304000670456287e-07, |
| "logits/chosen": 0.3178189992904663, |
| "logits/rejected": 0.1605030745267868, |
| "logps/chosen": -1149.2420654296875, |
| "logps/rejected": -4620.4638671875, |
| "loss": -74.191, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -253.33938598632812, |
| "rewards/margins": 1061.9801025390625, |
| "rewards/rejected": -1315.3193359375, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.413333333333333, |
| "grad_norm": 214.3654022216797, |
| "learning_rate": 9.169043164835867e-07, |
| "logits/chosen": 0.30394309759140015, |
| "logits/rejected": 0.14178232848644257, |
| "logps/chosen": -768.9393310546875, |
| "logps/rejected": -4397.9794921875, |
| "loss": -137.648, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -139.43666076660156, |
| "rewards/margins": 1116.515625, |
| "rewards/rejected": -1255.9522705078125, |
| "step": 2715 |
| }, |
| { |
| "epoch": 2.417777777777778, |
| "grad_norm": 3922.317138671875, |
| "learning_rate": 9.034972751339344e-07, |
| "logits/chosen": 0.25802871584892273, |
| "logits/rejected": 0.17789769172668457, |
| "logps/chosen": -687.6468505859375, |
| "logps/rejected": -4543.0927734375, |
| "loss": -141.2324, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -114.65617370605469, |
| "rewards/margins": 1180.039794921875, |
| "rewards/rejected": -1294.6961669921875, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.422222222222222, |
| "grad_norm": 254.11538696289062, |
| "learning_rate": 8.901792342776439e-07, |
| "logits/chosen": 0.289792001247406, |
| "logits/rejected": 0.16305933892726898, |
| "logps/chosen": -759.8902587890625, |
| "logps/rejected": -4723.6533203125, |
| "loss": -149.0882, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -137.31631469726562, |
| "rewards/margins": 1211.2047119140625, |
| "rewards/rejected": -1348.52099609375, |
| "step": 2725 |
| }, |
| { |
| "epoch": 2.4266666666666667, |
| "grad_norm": 28783.611328125, |
| "learning_rate": 8.769504832620646e-07, |
| "logits/chosen": 0.24740520119667053, |
| "logits/rejected": 0.15953673422336578, |
| "logps/chosen": -1310.870361328125, |
| "logps/rejected": -6392.5654296875, |
| "loss": -117.6489, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -299.13153076171875, |
| "rewards/margins": 1550.385498046875, |
| "rewards/rejected": -1849.516845703125, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.431111111111111, |
| "grad_norm": 46932.33203125, |
| "learning_rate": 8.638113094946382e-07, |
| "logits/chosen": 0.2463511973619461, |
| "logits/rejected": 0.18184559047222137, |
| "logps/chosen": -1124.6669921875, |
| "logps/rejected": -5322.58740234375, |
| "loss": -44.6354, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -249.29104614257812, |
| "rewards/margins": 1281.93896484375, |
| "rewards/rejected": -1531.22998046875, |
| "step": 2735 |
| }, |
| { |
| "epoch": 2.4355555555555557, |
| "grad_norm": 25637.5234375, |
| "learning_rate": 8.507619984366533e-07, |
| "logits/chosen": 0.2657385766506195, |
| "logits/rejected": 0.11195264011621475, |
| "logps/chosen": -1541.96142578125, |
| "logps/rejected": -5743.16015625, |
| "loss": -76.6394, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -368.45343017578125, |
| "rewards/margins": 1289.7939453125, |
| "rewards/rejected": -1658.247314453125, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 209.9908905029297, |
| "learning_rate": 8.378028335970451e-07, |
| "logits/chosen": 0.28514575958251953, |
| "logits/rejected": 0.17313992977142334, |
| "logps/chosen": -694.4436645507812, |
| "logps/rejected": -4759.2431640625, |
| "loss": -155.0138, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -113.36138916015625, |
| "rewards/margins": 1247.594970703125, |
| "rewards/rejected": -1360.95654296875, |
| "step": 2745 |
| }, |
| { |
| "epoch": 2.4444444444444446, |
| "grad_norm": 6302.7861328125, |
| "learning_rate": 8.249340965262326e-07, |
| "logits/chosen": 0.2982204258441925, |
| "logits/rejected": 0.15927128493785858, |
| "logps/chosen": -1578.1866455078125, |
| "logps/rejected": -5388.40380859375, |
| "loss": -50.8549, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -376.220703125, |
| "rewards/margins": 1173.39599609375, |
| "rewards/rejected": -1549.6168212890625, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.448888888888889, |
| "grad_norm": 52994.66015625, |
| "learning_rate": 8.121560668100065e-07, |
| "logits/chosen": 0.29376110434532166, |
| "logits/rejected": 0.17090515792369843, |
| "logps/chosen": -940.7385864257812, |
| "logps/rejected": -5072.6376953125, |
| "loss": -99.0853, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -188.0763397216797, |
| "rewards/margins": 1264.488037109375, |
| "rewards/rejected": -1452.5645751953125, |
| "step": 2755 |
| }, |
| { |
| "epoch": 2.453333333333333, |
| "grad_norm": 627.5823364257812, |
| "learning_rate": 7.994690220634505e-07, |
| "logits/chosen": 0.3252302408218384, |
| "logits/rejected": 0.20802000164985657, |
| "logps/chosen": -870.7932739257812, |
| "logps/rejected": -4798.0185546875, |
| "loss": -74.9661, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -176.578125, |
| "rewards/margins": 1199.25, |
| "rewards/rejected": -1375.828125, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.457777777777778, |
| "grad_norm": 876.1959838867188, |
| "learning_rate": 7.868732379249122e-07, |
| "logits/chosen": 0.3275570273399353, |
| "logits/rejected": 0.2259139120578766, |
| "logps/chosen": -836.3707275390625, |
| "logps/rejected": -5776.85888671875, |
| "loss": -186.1428, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -158.77273559570312, |
| "rewards/margins": 1506.2392578125, |
| "rewards/rejected": -1665.0120849609375, |
| "step": 2765 |
| }, |
| { |
| "epoch": 2.462222222222222, |
| "grad_norm": 981.3470458984375, |
| "learning_rate": 7.743689880500138e-07, |
| "logits/chosen": 0.27497947216033936, |
| "logits/rejected": 0.15269355475902557, |
| "logps/chosen": -1123.970947265625, |
| "logps/rejected": -6852.0, |
| "loss": -183.6466, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -239.05422973632812, |
| "rewards/margins": 1750.989990234375, |
| "rewards/rejected": -1990.044189453125, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.466666666666667, |
| "grad_norm": 3556.46240234375, |
| "learning_rate": 7.619565441057075e-07, |
| "logits/chosen": 0.28561097383499146, |
| "logits/rejected": 0.17919988930225372, |
| "logps/chosen": -1052.835693359375, |
| "logps/rejected": -4998.1611328125, |
| "loss": -82.4639, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -221.49697875976562, |
| "rewards/margins": 1210.818359375, |
| "rewards/rejected": -1432.3154296875, |
| "step": 2775 |
| }, |
| { |
| "epoch": 2.471111111111111, |
| "grad_norm": 3905.67333984375, |
| "learning_rate": 7.496361757643711e-07, |
| "logits/chosen": 0.26461154222488403, |
| "logits/rejected": 0.1348814219236374, |
| "logps/chosen": -591.9471435546875, |
| "logps/rejected": -4148.04833984375, |
| "loss": -134.6489, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -87.35942077636719, |
| "rewards/margins": 1090.668212890625, |
| "rewards/rejected": -1178.027587890625, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.4755555555555557, |
| "grad_norm": 1660.606201171875, |
| "learning_rate": 7.37408150697953e-07, |
| "logits/chosen": 0.2802228331565857, |
| "logits/rejected": 0.11637835204601288, |
| "logps/chosen": -1097.4710693359375, |
| "logps/rejected": -5994.0126953125, |
| "loss": -166.3307, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -225.012939453125, |
| "rewards/margins": 1505.164794921875, |
| "rewards/rejected": -1730.177734375, |
| "step": 2785 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 1663.2200927734375, |
| "learning_rate": 7.252727345721522e-07, |
| "logits/chosen": 0.3185957074165344, |
| "logits/rejected": 0.15198683738708496, |
| "logps/chosen": -947.1329956054688, |
| "logps/rejected": -4434.3203125, |
| "loss": -99.0338, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -190.89508056640625, |
| "rewards/margins": 1071.29296875, |
| "rewards/rejected": -1262.18798828125, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.4844444444444447, |
| "grad_norm": 208.50205993652344, |
| "learning_rate": 7.132301910406503e-07, |
| "logits/chosen": 0.3126494288444519, |
| "logits/rejected": 0.1392475962638855, |
| "logps/chosen": -939.2322998046875, |
| "logps/rejected": -4408.1259765625, |
| "loss": -102.9164, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -191.371337890625, |
| "rewards/margins": 1065.46240234375, |
| "rewards/rejected": -1256.833740234375, |
| "step": 2795 |
| }, |
| { |
| "epoch": 2.488888888888889, |
| "grad_norm": 1555.61474609375, |
| "learning_rate": 7.012807817393808e-07, |
| "logits/chosen": 0.27860361337661743, |
| "logits/rejected": 0.15866129100322723, |
| "logps/chosen": -720.2628173828125, |
| "logps/rejected": -6280.7958984375, |
| "loss": -207.936, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -111.23835754394531, |
| "rewards/margins": 1701.69140625, |
| "rewards/rejected": -1812.929931640625, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.493333333333333, |
| "grad_norm": 2379.057373046875, |
| "learning_rate": 6.894247662808456e-07, |
| "logits/chosen": 0.30893489718437195, |
| "logits/rejected": 0.19185426831245422, |
| "logps/chosen": -1076.5465087890625, |
| "logps/rejected": -5069.224609375, |
| "loss": -57.0669, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -233.0377960205078, |
| "rewards/margins": 1219.6016845703125, |
| "rewards/rejected": -1452.6395263671875, |
| "step": 2805 |
| }, |
| { |
| "epoch": 2.497777777777778, |
| "grad_norm": 3640.01318359375, |
| "learning_rate": 6.776624022484762e-07, |
| "logits/chosen": 0.28318601846694946, |
| "logits/rejected": 0.09088112413883209, |
| "logps/chosen": -1075.1552734375, |
| "logps/rejected": -4032.20849609375, |
| "loss": -48.1432, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -229.81808471679688, |
| "rewards/margins": 915.85009765625, |
| "rewards/rejected": -1145.668212890625, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.502222222222222, |
| "grad_norm": 4241.67041015625, |
| "learning_rate": 6.659939451910341e-07, |
| "logits/chosen": 0.25709307193756104, |
| "logits/rejected": 0.12720845639705658, |
| "logps/chosen": -1155.10400390625, |
| "logps/rejected": -5932.6298828125, |
| "loss": -158.6419, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -255.63681030273438, |
| "rewards/margins": 1458.3765869140625, |
| "rewards/rejected": -1714.0133056640625, |
| "step": 2815 |
| }, |
| { |
| "epoch": 2.506666666666667, |
| "grad_norm": 7445.6806640625, |
| "learning_rate": 6.544196486170628e-07, |
| "logits/chosen": 0.31700649857521057, |
| "logits/rejected": 0.17533881962299347, |
| "logps/chosen": -1753.020751953125, |
| "logps/rejected": -5132.28955078125, |
| "loss": -31.6605, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -437.10980224609375, |
| "rewards/margins": 1035.9033203125, |
| "rewards/rejected": -1473.0130615234375, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.511111111111111, |
| "grad_norm": 76.35946655273438, |
| "learning_rate": 6.429397639893758e-07, |
| "logits/chosen": 0.29984182119369507, |
| "logits/rejected": 0.14310705661773682, |
| "logps/chosen": -669.9993896484375, |
| "logps/rejected": -4669.39697265625, |
| "loss": -125.5323, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -108.04997253417969, |
| "rewards/margins": 1224.614013671875, |
| "rewards/rejected": -1332.6640625, |
| "step": 2825 |
| }, |
| { |
| "epoch": 2.5155555555555553, |
| "grad_norm": 1889.5223388671875, |
| "learning_rate": 6.315545407195972e-07, |
| "logits/chosen": 0.30137237906455994, |
| "logits/rejected": 0.17334292829036713, |
| "logps/chosen": -1158.4581298828125, |
| "logps/rejected": -5883.08447265625, |
| "loss": -175.8998, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -254.6534423828125, |
| "rewards/margins": 1442.730712890625, |
| "rewards/rejected": -1697.384033203125, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 64488.90625, |
| "learning_rate": 6.202642261627411e-07, |
| "logits/chosen": 0.29571765661239624, |
| "logits/rejected": 0.1689019501209259, |
| "logps/chosen": -1035.979248046875, |
| "logps/rejected": -5526.853515625, |
| "loss": -81.0478, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -219.8235321044922, |
| "rewards/margins": 1370.886962890625, |
| "rewards/rejected": -1590.710693359375, |
| "step": 2835 |
| }, |
| { |
| "epoch": 2.5244444444444447, |
| "grad_norm": 939.4684448242188, |
| "learning_rate": 6.090690656118376e-07, |
| "logits/chosen": 0.24368198215961456, |
| "logits/rejected": 0.13803556561470032, |
| "logps/chosen": -1286.31884765625, |
| "logps/rejected": -5878.44921875, |
| "loss": -127.8098, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -293.3697204589844, |
| "rewards/margins": 1399.4827880859375, |
| "rewards/rejected": -1692.852294921875, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.528888888888889, |
| "grad_norm": 7556.40869140625, |
| "learning_rate": 5.979693022926025e-07, |
| "logits/chosen": 0.2953812777996063, |
| "logits/rejected": 0.21100744605064392, |
| "logps/chosen": -867.93212890625, |
| "logps/rejected": -6264.2119140625, |
| "loss": -175.4742, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -166.1602783203125, |
| "rewards/margins": 1642.23046875, |
| "rewards/rejected": -1808.390625, |
| "step": 2845 |
| }, |
| { |
| "epoch": 2.533333333333333, |
| "grad_norm": 1970.3321533203125, |
| "learning_rate": 5.869651773581569e-07, |
| "logits/chosen": 0.27132663130760193, |
| "logits/rejected": 0.15004324913024902, |
| "logps/chosen": -817.6067504882812, |
| "logps/rejected": -4236.732421875, |
| "loss": -127.4373, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -150.48648071289062, |
| "rewards/margins": 1048.2225341796875, |
| "rewards/rejected": -1198.709228515625, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.537777777777778, |
| "grad_norm": 117.9075698852539, |
| "learning_rate": 5.760569298837825e-07, |
| "logits/chosen": 0.268520712852478, |
| "logits/rejected": 0.18351659178733826, |
| "logps/chosen": -942.9318237304688, |
| "logps/rejected": -5472.38427734375, |
| "loss": -129.0516, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -190.59347534179688, |
| "rewards/margins": 1382.4625244140625, |
| "rewards/rejected": -1573.0560302734375, |
| "step": 2855 |
| }, |
| { |
| "epoch": 2.542222222222222, |
| "grad_norm": 478.9919128417969, |
| "learning_rate": 5.652447968617325e-07, |
| "logits/chosen": 0.2659715414047241, |
| "logits/rejected": 0.13578690588474274, |
| "logps/chosen": -970.9541015625, |
| "logps/rejected": -5885.84912109375, |
| "loss": -136.3934, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -195.96975708007812, |
| "rewards/margins": 1503.201904296875, |
| "rewards/rejected": -1699.1715087890625, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.546666666666667, |
| "grad_norm": 39385.68359375, |
| "learning_rate": 5.54529013196079e-07, |
| "logits/chosen": 0.2462116777896881, |
| "logits/rejected": 0.14219094812870026, |
| "logps/chosen": -894.8280029296875, |
| "logps/rejected": -5302.37841796875, |
| "loss": -137.5195, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -172.64376831054688, |
| "rewards/margins": 1348.2857666015625, |
| "rewards/rejected": -1520.9293212890625, |
| "step": 2865 |
| }, |
| { |
| "epoch": 2.551111111111111, |
| "grad_norm": 2286.886474609375, |
| "learning_rate": 5.43909811697611e-07, |
| "logits/chosen": 0.276773601770401, |
| "logits/rejected": 0.19585570693016052, |
| "logps/chosen": -913.3253173828125, |
| "logps/rejected": -7219.9755859375, |
| "loss": -186.5, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -178.7404327392578, |
| "rewards/margins": 1916.071533203125, |
| "rewards/rejected": -2094.811767578125, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.5555555555555554, |
| "grad_norm": 5329.54931640625, |
| "learning_rate": 5.333874230787772e-07, |
| "logits/chosen": 0.29106637835502625, |
| "logits/rejected": 0.14704649150371552, |
| "logps/chosen": -770.7490844726562, |
| "logps/rejected": -4565.36083984375, |
| "loss": -133.2276, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -138.66664123535156, |
| "rewards/margins": 1162.058837890625, |
| "rewards/rejected": -1300.7255859375, |
| "step": 2875 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 262.0441589355469, |
| "learning_rate": 5.22962075948672e-07, |
| "logits/chosen": 0.26344218850135803, |
| "logits/rejected": 0.1744934469461441, |
| "logps/chosen": -961.9212036132812, |
| "logps/rejected": -6654.2138671875, |
| "loss": -161.2493, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -198.50918579101562, |
| "rewards/margins": 1731.0286865234375, |
| "rewards/rejected": -1929.537841796875, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.5644444444444443, |
| "grad_norm": 4153.9013671875, |
| "learning_rate": 5.126339968080696e-07, |
| "logits/chosen": 0.3136979043483734, |
| "logits/rejected": 0.19271951913833618, |
| "logps/chosen": -1034.75439453125, |
| "logps/rejected": -5065.7763671875, |
| "loss": -151.362, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -216.62698364257812, |
| "rewards/margins": 1232.653564453125, |
| "rewards/rejected": -1449.28076171875, |
| "step": 2885 |
| }, |
| { |
| "epoch": 2.568888888888889, |
| "grad_norm": 1322.776123046875, |
| "learning_rate": 5.024034100445019e-07, |
| "logits/chosen": 0.3199235796928406, |
| "logits/rejected": 0.2028283178806305, |
| "logps/chosen": -1238.3748779296875, |
| "logps/rejected": -7070.80859375, |
| "loss": -130.184, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -279.8954772949219, |
| "rewards/margins": 1772.2200927734375, |
| "rewards/rejected": -2052.11572265625, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.5733333333333333, |
| "grad_norm": 2449.66259765625, |
| "learning_rate": 4.922705379273862e-07, |
| "logits/chosen": 0.31815481185913086, |
| "logits/rejected": 0.15612837672233582, |
| "logps/chosen": -758.1586303710938, |
| "logps/rejected": -5149.2646484375, |
| "loss": -136.5634, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -132.6713104248047, |
| "rewards/margins": 1347.046142578125, |
| "rewards/rejected": -1479.7174072265625, |
| "step": 2895 |
| }, |
| { |
| "epoch": 2.5777777777777775, |
| "grad_norm": 1349.1029052734375, |
| "learning_rate": 4.822356006031925e-07, |
| "logits/chosen": 0.27785414457321167, |
| "logits/rejected": 0.15942269563674927, |
| "logps/chosen": -931.7042236328125, |
| "logps/rejected": -6512.2880859375, |
| "loss": -176.7551, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -182.6170196533203, |
| "rewards/margins": 1704.859619140625, |
| "rewards/rejected": -1887.476318359375, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.582222222222222, |
| "grad_norm": 2075.302734375, |
| "learning_rate": 4.7229881609066387e-07, |
| "logits/chosen": 0.2660304605960846, |
| "logits/rejected": 0.20030149817466736, |
| "logps/chosen": -700.0392456054688, |
| "logps/rejected": -4857.5595703125, |
| "loss": -128.6582, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -125.2123794555664, |
| "rewards/margins": 1267.055419921875, |
| "rewards/rejected": -1392.2677001953125, |
| "step": 2905 |
| }, |
| { |
| "epoch": 2.586666666666667, |
| "grad_norm": 4998.267578125, |
| "learning_rate": 4.624604002760769e-07, |
| "logits/chosen": 0.2807886600494385, |
| "logits/rejected": 0.14153829216957092, |
| "logps/chosen": -984.181640625, |
| "logps/rejected": -5608.037109375, |
| "loss": -98.5327, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -202.26271057128906, |
| "rewards/margins": 1414.0897216796875, |
| "rewards/rejected": -1616.3524169921875, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.591111111111111, |
| "grad_norm": 47020.91796875, |
| "learning_rate": 4.5272056690855494e-07, |
| "logits/chosen": 0.28023654222488403, |
| "logits/rejected": 0.17847904562950134, |
| "logps/chosen": -1161.7044677734375, |
| "logps/rejected": -5072.55322265625, |
| "loss": -108.1601, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -259.67950439453125, |
| "rewards/margins": 1196.2237548828125, |
| "rewards/rejected": -1455.9033203125, |
| "step": 2915 |
| }, |
| { |
| "epoch": 2.5955555555555554, |
| "grad_norm": 507.62255859375, |
| "learning_rate": 4.430795275954203e-07, |
| "logits/chosen": 0.2736111283302307, |
| "logits/rejected": 0.11480126529932022, |
| "logps/chosen": -1178.013427734375, |
| "logps/rejected": -4356.06494140625, |
| "loss": -74.2978, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -263.2037353515625, |
| "rewards/margins": 983.2220458984375, |
| "rewards/rejected": -1246.4256591796875, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 2745.565673828125, |
| "learning_rate": 4.335374917975982e-07, |
| "logits/chosen": 0.3306751549243927, |
| "logits/rejected": 0.1977991759777069, |
| "logps/chosen": -887.9862060546875, |
| "logps/rejected": -5503.65380859375, |
| "loss": -160.5708, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -174.6531219482422, |
| "rewards/margins": 1412.6531982421875, |
| "rewards/rejected": -1587.3062744140625, |
| "step": 2925 |
| }, |
| { |
| "epoch": 2.6044444444444443, |
| "grad_norm": 49781.76171875, |
| "learning_rate": 4.240946668250695e-07, |
| "logits/chosen": 0.32286888360977173, |
| "logits/rejected": 0.1941709816455841, |
| "logps/chosen": -1057.018798828125, |
| "logps/rejected": -5550.3740234375, |
| "loss": -117.8244, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -226.92691040039062, |
| "rewards/margins": 1372.856201171875, |
| "rewards/rejected": -1599.7830810546875, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.608888888888889, |
| "grad_norm": 425.31451416015625, |
| "learning_rate": 4.147512578323615e-07, |
| "logits/chosen": 0.3183066248893738, |
| "logits/rejected": 0.1569916307926178, |
| "logps/chosen": -723.6983642578125, |
| "logps/rejected": -5880.9716796875, |
| "loss": -194.4636, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -126.38167572021484, |
| "rewards/margins": 1574.293212890625, |
| "rewards/rejected": -1700.675048828125, |
| "step": 2935 |
| }, |
| { |
| "epoch": 2.6133333333333333, |
| "grad_norm": 363.6115417480469, |
| "learning_rate": 4.055074678140924e-07, |
| "logits/chosen": 0.27981218695640564, |
| "logits/rejected": 0.14406158030033112, |
| "logps/chosen": -871.66796875, |
| "logps/rejected": -4052.08447265625, |
| "loss": -60.8822, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -175.34841918945312, |
| "rewards/margins": 976.5602416992188, |
| "rewards/rejected": -1151.9085693359375, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.6177777777777775, |
| "grad_norm": 4091.587890625, |
| "learning_rate": 3.9636349760056427e-07, |
| "logits/chosen": 0.2926829159259796, |
| "logits/rejected": 0.18066337704658508, |
| "logps/chosen": -917.7472534179688, |
| "logps/rejected": -4688.95947265625, |
| "loss": -66.4249, |
| "rewards/accuracies": 0.7437499761581421, |
| "rewards/chosen": -183.13433837890625, |
| "rewards/margins": 1157.160400390625, |
| "rewards/rejected": -1340.294677734375, |
| "step": 2945 |
| }, |
| { |
| "epoch": 2.6222222222222222, |
| "grad_norm": 438.34112548828125, |
| "learning_rate": 3.8731954585339236e-07, |
| "logits/chosen": 0.287516325712204, |
| "logits/rejected": 0.13789844512939453, |
| "logps/chosen": -640.5111083984375, |
| "logps/rejected": -4453.0732421875, |
| "loss": -125.7426, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -101.16863250732422, |
| "rewards/margins": 1169.9163818359375, |
| "rewards/rejected": -1271.085205078125, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.626666666666667, |
| "grad_norm": 24082.072265625, |
| "learning_rate": 3.783758090611983e-07, |
| "logits/chosen": 0.2945462167263031, |
| "logits/rejected": 0.2195758819580078, |
| "logps/chosen": -1500.5338134765625, |
| "logps/rejected": -6326.08984375, |
| "loss": -94.1291, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -358.7112121582031, |
| "rewards/margins": 1471.235107421875, |
| "rewards/rejected": -1829.946533203125, |
| "step": 2955 |
| }, |
| { |
| "epoch": 2.631111111111111, |
| "grad_norm": 46658.03515625, |
| "learning_rate": 3.6953248153533515e-07, |
| "logits/chosen": 0.29300108551979065, |
| "logits/rejected": 0.14128407835960388, |
| "logps/chosen": -1227.6444091796875, |
| "logps/rejected": -5651.7646484375, |
| "loss": -65.2221, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -268.34100341796875, |
| "rewards/margins": 1360.017822265625, |
| "rewards/rejected": -1628.3587646484375, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.6355555555555554, |
| "grad_norm": 28998.833984375, |
| "learning_rate": 3.6078975540566716e-07, |
| "logits/chosen": 0.26225921511650085, |
| "logits/rejected": 0.162250354886055, |
| "logps/chosen": -834.6824340820312, |
| "logps/rejected": -4251.00830078125, |
| "loss": -121.6408, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -151.69883728027344, |
| "rewards/margins": 1052.7584228515625, |
| "rewards/rejected": -1204.4571533203125, |
| "step": 2965 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 3350.82177734375, |
| "learning_rate": 3.5214782061639585e-07, |
| "logits/chosen": 0.2726621925830841, |
| "logits/rejected": 0.15095624327659607, |
| "logps/chosen": -606.6204833984375, |
| "logps/rejected": -5018.07666015625, |
| "loss": -167.8164, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -83.92247772216797, |
| "rewards/margins": 1354.77001953125, |
| "rewards/rejected": -1438.692626953125, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.6444444444444444, |
| "grad_norm": 9222.0986328125, |
| "learning_rate": 3.4360686492193263e-07, |
| "logits/chosen": 0.3132045567035675, |
| "logits/rejected": 0.19590581953525543, |
| "logps/chosen": -731.6533813476562, |
| "logps/rejected": -5446.47900390625, |
| "loss": -176.554, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -129.68350219726562, |
| "rewards/margins": 1432.760986328125, |
| "rewards/rejected": -1562.4447021484375, |
| "step": 2975 |
| }, |
| { |
| "epoch": 2.648888888888889, |
| "grad_norm": 938.7496948242188, |
| "learning_rate": 3.351670738828211e-07, |
| "logits/chosen": 0.25676268339157104, |
| "logits/rejected": 0.13642063736915588, |
| "logps/chosen": -693.4869995117188, |
| "logps/rejected": -5993.0966796875, |
| "loss": -200.1757, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -112.75614166259766, |
| "rewards/margins": 1614.5584716796875, |
| "rewards/rejected": -1727.314697265625, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.6533333333333333, |
| "grad_norm": 6245.14306640625, |
| "learning_rate": 3.268286308617041e-07, |
| "logits/chosen": 0.2964590787887573, |
| "logits/rejected": 0.19171063601970673, |
| "logps/chosen": -826.4782104492188, |
| "logps/rejected": -5780.4306640625, |
| "loss": -174.8108, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -152.87879943847656, |
| "rewards/margins": 1511.440185546875, |
| "rewards/rejected": -1664.319091796875, |
| "step": 2985 |
| }, |
| { |
| "epoch": 2.6577777777777776, |
| "grad_norm": 522.4179077148438, |
| "learning_rate": 3.1859171701934033e-07, |
| "logits/chosen": 0.24954314529895782, |
| "logits/rejected": 0.2331644594669342, |
| "logps/chosen": -800.5831909179688, |
| "logps/rejected": -7081.02197265625, |
| "loss": -223.0746, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -149.42422485351562, |
| "rewards/margins": 1907.770263671875, |
| "rewards/rejected": -2057.1943359375, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.6622222222222223, |
| "grad_norm": 6589.33935546875, |
| "learning_rate": 3.104565113106689e-07, |
| "logits/chosen": 0.2932339906692505, |
| "logits/rejected": 0.1445777863264084, |
| "logps/chosen": -910.7932739257812, |
| "logps/rejected": -5050.48974609375, |
| "loss": -156.6396, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -178.19857788085938, |
| "rewards/margins": 1272.28076171875, |
| "rewards/rejected": -1450.479248046875, |
| "step": 2995 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "grad_norm": 239.47412109375, |
| "learning_rate": 3.024231904809222e-07, |
| "logits/chosen": 0.2685368061065674, |
| "logits/rejected": 0.15247103571891785, |
| "logps/chosen": -1137.364013671875, |
| "logps/rejected": -5778.4267578125, |
| "loss": -152.9642, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -254.4438934326172, |
| "rewards/margins": 1415.1513671875, |
| "rewards/rejected": -1669.5953369140625, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.6666666666666665, |
| "eval_logits/chosen": 0.28874698281288147, |
| "eval_logits/rejected": 0.16911287605762482, |
| "eval_logps/chosen": -859.7401123046875, |
| "eval_logps/rejected": -5484.81494140625, |
| "eval_loss": -146.18759155273438, |
| "eval_rewards/accuracies": 0.8042500019073486, |
| "eval_rewards/chosen": -164.916015625, |
| "eval_rewards/margins": 1413.53369140625, |
| "eval_rewards/rejected": -1578.449951171875, |
| "eval_runtime": 2194.092, |
| "eval_samples_per_second": 1.823, |
| "eval_steps_per_second": 0.912, |
| "step": 3000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3375, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|