| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8888888888888888, |
| "eval_steps": 500, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0044444444444444444, |
| "grad_norm": 3.6353344917297363, |
| "learning_rate": 1e-05, |
| "logits/chosen": -0.4628738462924957, |
| "logits/rejected": -0.46038827300071716, |
| "logps/chosen": -305.24371337890625, |
| "logps/rejected": -217.2339324951172, |
| "loss": 0.69, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.006762671284377575, |
| "rewards/margins": 0.005093236453831196, |
| "rewards/rejected": 0.0016694354126229882, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.008888888888888889, |
| "grad_norm": 3.840994119644165, |
| "learning_rate": 9.999945685076187e-06, |
| "logits/chosen": -0.4660520553588867, |
| "logits/rejected": -0.4597313404083252, |
| "logps/chosen": -295.14178466796875, |
| "logps/rejected": -215.0008544921875, |
| "loss": 0.6585, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 0.07166890054941177, |
| "rewards/margins": 0.057903312146663666, |
| "rewards/rejected": 0.013765583746135235, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.013333333333333334, |
| "grad_norm": 3.3536572456359863, |
| "learning_rate": 9.99978274148479e-06, |
| "logits/chosen": -0.46407952904701233, |
| "logits/rejected": -0.46835923194885254, |
| "logps/chosen": -307.1814880371094, |
| "logps/rejected": -220.930908203125, |
| "loss": 0.6155, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.18775026500225067, |
| "rewards/margins": 0.13849034905433655, |
| "rewards/rejected": 0.049259938299655914, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.017777777777777778, |
| "grad_norm": 3.603736400604248, |
| "learning_rate": 9.999511172765917e-06, |
| "logits/chosen": -0.4126955568790436, |
| "logits/rejected": -0.4344128668308258, |
| "logps/chosen": -295.6951599121094, |
| "logps/rejected": -219.573974609375, |
| "loss": 0.572, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.3320372700691223, |
| "rewards/margins": 0.22264714539051056, |
| "rewards/rejected": 0.10939009487628937, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.022222222222222223, |
| "grad_norm": 3.7065272331237793, |
| "learning_rate": 9.999130984819662e-06, |
| "logits/chosen": -0.42767876386642456, |
| "logits/rejected": -0.4458894729614258, |
| "logps/chosen": -331.75592041015625, |
| "logps/rejected": -231.910400390625, |
| "loss": 0.5323, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.5343211889266968, |
| "rewards/margins": 0.32672011852264404, |
| "rewards/rejected": 0.20760111510753632, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.02666666666666667, |
| "grad_norm": 3.656710624694824, |
| "learning_rate": 9.998642185905977e-06, |
| "logits/chosen": -0.44063276052474976, |
| "logits/rejected": -0.4492092728614807, |
| "logps/chosen": -311.40277099609375, |
| "logps/rejected": -233.438720703125, |
| "loss": 0.5154, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": 0.6043025851249695, |
| "rewards/margins": 0.3682531714439392, |
| "rewards/rejected": 0.23604938387870789, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.03111111111111111, |
| "grad_norm": 3.5971853733062744, |
| "learning_rate": 9.998044786644492e-06, |
| "logits/chosen": -0.39475446939468384, |
| "logits/rejected": -0.4055609703063965, |
| "logps/chosen": -298.6465759277344, |
| "logps/rejected": -219.363525390625, |
| "loss": 0.4452, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 0.8082733154296875, |
| "rewards/margins": 0.5571426153182983, |
| "rewards/rejected": 0.25113070011138916, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.035555555555555556, |
| "grad_norm": 3.67197322845459, |
| "learning_rate": 9.997338800014284e-06, |
| "logits/chosen": -0.41250643134117126, |
| "logits/rejected": -0.4259340167045593, |
| "logps/chosen": -293.3608703613281, |
| "logps/rejected": -224.9442901611328, |
| "loss": 0.4586, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 0.867678165435791, |
| "rewards/margins": 0.5272501707077026, |
| "rewards/rejected": 0.34042787551879883, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 5.049458026885986, |
| "learning_rate": 9.9965242413536e-06, |
| "logits/chosen": -0.41178879141807556, |
| "logits/rejected": -0.4304323196411133, |
| "logps/chosen": -306.1034851074219, |
| "logps/rejected": -228.5247802734375, |
| "loss": 0.3777, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 1.235072374343872, |
| "rewards/margins": 0.7781749367713928, |
| "rewards/rejected": 0.4568973183631897, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.044444444444444446, |
| "grad_norm": 3.9216673374176025, |
| "learning_rate": 9.995601128359516e-06, |
| "logits/chosen": -0.40246009826660156, |
| "logits/rejected": -0.3950818181037903, |
| "logps/chosen": -303.0498352050781, |
| "logps/rejected": -226.4988250732422, |
| "loss": 0.3999, |
| "rewards/accuracies": 0.731249988079071, |
| "rewards/chosen": 1.3293551206588745, |
| "rewards/margins": 0.8089650869369507, |
| "rewards/rejected": 0.5203902721405029, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.04888888888888889, |
| "grad_norm": 4.434176921844482, |
| "learning_rate": 9.994569481087552e-06, |
| "logits/chosen": -0.39378249645233154, |
| "logits/rejected": -0.40684300661087036, |
| "logps/chosen": -329.46173095703125, |
| "logps/rejected": -223.7794952392578, |
| "loss": 0.3168, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 1.6077896356582642, |
| "rewards/margins": 1.0661401748657227, |
| "rewards/rejected": 0.5416494607925415, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.05333333333333334, |
| "grad_norm": 4.062209129333496, |
| "learning_rate": 9.993429321951251e-06, |
| "logits/chosen": -0.34955719113349915, |
| "logits/rejected": -0.3819810748100281, |
| "logps/chosen": -291.4505615234375, |
| "logps/rejected": -212.7031707763672, |
| "loss": 0.2348, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.9593639373779297, |
| "rewards/margins": 1.3969981670379639, |
| "rewards/rejected": 0.5623658299446106, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.057777777777777775, |
| "grad_norm": 4.022445201873779, |
| "learning_rate": 9.992180675721671e-06, |
| "logits/chosen": -0.3607024550437927, |
| "logits/rejected": -0.3758237659931183, |
| "logps/chosen": -325.44622802734375, |
| "logps/rejected": -226.538818359375, |
| "loss": 0.1438, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 2.2623605728149414, |
| "rewards/margins": 1.6984504461288452, |
| "rewards/rejected": 0.5639100670814514, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.06222222222222222, |
| "grad_norm": 4.516530513763428, |
| "learning_rate": 9.990823569526868e-06, |
| "logits/chosen": -0.3758849501609802, |
| "logits/rejected": -0.401409387588501, |
| "logps/chosen": -293.59283447265625, |
| "logps/rejected": -212.1451873779297, |
| "loss": 0.2153, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.2533986568450928, |
| "rewards/margins": 1.599491000175476, |
| "rewards/rejected": 0.6539075374603271, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.06666666666666667, |
| "grad_norm": 4.2673563957214355, |
| "learning_rate": 9.989358032851283e-06, |
| "logits/chosen": -0.38496989011764526, |
| "logits/rejected": -0.4185038208961487, |
| "logps/chosen": -331.06707763671875, |
| "logps/rejected": -238.4701690673828, |
| "loss": 0.2667, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.29125714302063, |
| "rewards/margins": 1.5516706705093384, |
| "rewards/rejected": 0.739586353302002, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07111111111111111, |
| "grad_norm": 4.720420837402344, |
| "learning_rate": 9.987784097535126e-06, |
| "logits/chosen": -0.36235010623931885, |
| "logits/rejected": -0.3792596757411957, |
| "logps/chosen": -303.70196533203125, |
| "logps/rejected": -226.2982940673828, |
| "loss": 0.1933, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 2.5473880767822266, |
| "rewards/margins": 1.8545589447021484, |
| "rewards/rejected": 0.6928290128707886, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.07555555555555556, |
| "grad_norm": 7.281383037567139, |
| "learning_rate": 9.986101797773667e-06, |
| "logits/chosen": -0.380900114774704, |
| "logits/rejected": -0.3917911946773529, |
| "logps/chosen": -295.6128234863281, |
| "logps/rejected": -229.4749298095703, |
| "loss": 0.213, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.3351025581359863, |
| "rewards/margins": 1.7777379751205444, |
| "rewards/rejected": 0.5573645830154419, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 4.908195972442627, |
| "learning_rate": 9.984311170116497e-06, |
| "logits/chosen": -0.37983238697052, |
| "logits/rejected": -0.3918471932411194, |
| "logps/chosen": -291.7480163574219, |
| "logps/rejected": -220.09652709960938, |
| "loss": 0.1737, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.4933111667633057, |
| "rewards/margins": 1.936655044555664, |
| "rewards/rejected": 0.5566561222076416, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08444444444444445, |
| "grad_norm": 5.108543395996094, |
| "learning_rate": 9.98241225346674e-06, |
| "logits/chosen": -0.34952667355537415, |
| "logits/rejected": -0.3850114643573761, |
| "logps/chosen": -302.5187683105469, |
| "logps/rejected": -222.33834838867188, |
| "loss": 0.1069, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.5169692039489746, |
| "rewards/margins": 2.1484999656677246, |
| "rewards/rejected": 0.36846891045570374, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.08888888888888889, |
| "grad_norm": 4.840832233428955, |
| "learning_rate": 9.9804050890802e-06, |
| "logits/chosen": -0.33506280183792114, |
| "logits/rejected": -0.3472011089324951, |
| "logps/chosen": -269.5045471191406, |
| "logps/rejected": -205.8480987548828, |
| "loss": 0.1074, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.4071407318115234, |
| "rewards/margins": 2.1650052070617676, |
| "rewards/rejected": 0.24213531613349915, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.09333333333333334, |
| "grad_norm": 5.977898120880127, |
| "learning_rate": 9.978289720564471e-06, |
| "logits/chosen": -0.33771952986717224, |
| "logits/rejected": -0.3690803050994873, |
| "logps/chosen": -308.5394287109375, |
| "logps/rejected": -226.24813842773438, |
| "loss": 0.2107, |
| "rewards/accuracies": 0.7562500238418579, |
| "rewards/chosen": 2.450155735015869, |
| "rewards/margins": 2.019880771636963, |
| "rewards/rejected": 0.4302748143672943, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.09777777777777778, |
| "grad_norm": 6.031980514526367, |
| "learning_rate": 9.976066193877982e-06, |
| "logits/chosen": -0.347932904958725, |
| "logits/rejected": -0.3845617175102234, |
| "logps/chosen": -293.0547790527344, |
| "logps/rejected": -220.5181427001953, |
| "loss": 0.1324, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 2.321216106414795, |
| "rewards/margins": 2.2665910720825195, |
| "rewards/rejected": 0.054625045508146286, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.10222222222222223, |
| "grad_norm": 4.3759660720825195, |
| "learning_rate": 9.97373455732901e-06, |
| "logits/chosen": -0.34049180150032043, |
| "logits/rejected": -0.35589173436164856, |
| "logps/chosen": -294.77117919921875, |
| "logps/rejected": -228.1370086669922, |
| "loss": 0.0647, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.3987247943878174, |
| "rewards/margins": 2.2897610664367676, |
| "rewards/rejected": 0.1089634895324707, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.10666666666666667, |
| "grad_norm": 4.7751336097717285, |
| "learning_rate": 9.971294861574617e-06, |
| "logits/chosen": -0.3569382429122925, |
| "logits/rejected": -0.35876479744911194, |
| "logps/chosen": -285.916748046875, |
| "logps/rejected": -219.81753540039062, |
| "loss": 0.0106, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.5361573696136475, |
| "rewards/margins": 2.6316070556640625, |
| "rewards/rejected": -0.09544976055622101, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 6.836686134338379, |
| "learning_rate": 9.968747159619556e-06, |
| "logits/chosen": -0.3644478917121887, |
| "logits/rejected": -0.3773222863674164, |
| "logps/chosen": -301.05084228515625, |
| "logps/rejected": -231.2653045654297, |
| "loss": 0.1055, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": 2.7265372276306152, |
| "rewards/margins": 2.74798846244812, |
| "rewards/rejected": -0.021450763568282127, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.11555555555555555, |
| "grad_norm": 6.012020587921143, |
| "learning_rate": 9.966091506815128e-06, |
| "logits/chosen": -0.34487825632095337, |
| "logits/rejected": -0.3683899939060211, |
| "logps/chosen": -296.5931701660156, |
| "logps/rejected": -223.12753295898438, |
| "loss": 0.0823, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": 2.7003228664398193, |
| "rewards/margins": 2.8483641147613525, |
| "rewards/rejected": -0.14804117381572723, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 5.459484100341797, |
| "learning_rate": 9.963327960857962e-06, |
| "logits/chosen": -0.3142702579498291, |
| "logits/rejected": -0.36442944407463074, |
| "logps/chosen": -310.71368408203125, |
| "logps/rejected": -211.5228729248047, |
| "loss": -0.0914, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 3.0668811798095703, |
| "rewards/margins": 3.4564356803894043, |
| "rewards/rejected": -0.3895547688007355, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.12444444444444444, |
| "grad_norm": 4.6439080238342285, |
| "learning_rate": 9.960456581788771e-06, |
| "logits/chosen": -0.3213174343109131, |
| "logits/rejected": -0.35702863335609436, |
| "logps/chosen": -295.9752502441406, |
| "logps/rejected": -218.5254364013672, |
| "loss": -0.0509, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.7388923168182373, |
| "rewards/margins": 3.2099480628967285, |
| "rewards/rejected": -0.47105544805526733, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.1288888888888889, |
| "grad_norm": 7.5686140060424805, |
| "learning_rate": 9.957477431991053e-06, |
| "logits/chosen": -0.3489062190055847, |
| "logits/rejected": -0.38331982493400574, |
| "logps/chosen": -301.20574951171875, |
| "logps/rejected": -221.79617309570312, |
| "loss": 0.0026, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.3142552375793457, |
| "rewards/margins": 3.0163302421569824, |
| "rewards/rejected": -0.7020750045776367, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.13333333333333333, |
| "grad_norm": 7.296176910400391, |
| "learning_rate": 9.954390576189726e-06, |
| "logits/chosen": -0.32641178369522095, |
| "logits/rejected": -0.3621976673603058, |
| "logps/chosen": -312.5970153808594, |
| "logps/rejected": -246.51974487304688, |
| "loss": 0.1054, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.6623806953430176, |
| "rewards/margins": 2.9366531372070312, |
| "rewards/rejected": -0.27427244186401367, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.13777777777777778, |
| "grad_norm": 6.089158535003662, |
| "learning_rate": 9.95119608144972e-06, |
| "logits/chosen": -0.34094589948654175, |
| "logits/rejected": -0.36057132482528687, |
| "logps/chosen": -298.9684143066406, |
| "logps/rejected": -232.59097290039062, |
| "loss": 0.0235, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 2.0118796825408936, |
| "rewards/margins": 2.952807664871216, |
| "rewards/rejected": -0.9409275054931641, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.14222222222222222, |
| "grad_norm": 5.5049662590026855, |
| "learning_rate": 9.947894017174535e-06, |
| "logits/chosen": -0.30161410570144653, |
| "logits/rejected": -0.3480113446712494, |
| "logps/chosen": -306.0929870605469, |
| "logps/rejected": -226.927734375, |
| "loss": -0.0148, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.6642236709594727, |
| "rewards/margins": 3.523815631866455, |
| "rewards/rejected": -0.8595919609069824, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.14666666666666667, |
| "grad_norm": 5.161729335784912, |
| "learning_rate": 9.944484455104716e-06, |
| "logits/chosen": -0.3171108067035675, |
| "logits/rejected": -0.35261866450309753, |
| "logps/chosen": -290.88250732421875, |
| "logps/rejected": -234.3745880126953, |
| "loss": 0.085, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 1.7442996501922607, |
| "rewards/margins": 2.7870707511901855, |
| "rewards/rejected": -1.0427708625793457, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.1511111111111111, |
| "grad_norm": 5.5866618156433105, |
| "learning_rate": 9.940967469316307e-06, |
| "logits/chosen": -0.3179735541343689, |
| "logits/rejected": -0.3568040728569031, |
| "logps/chosen": -328.8294372558594, |
| "logps/rejected": -225.0006561279297, |
| "loss": -0.2403, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 3.2372944355010986, |
| "rewards/margins": 4.343171119689941, |
| "rewards/rejected": -1.105877161026001, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.15555555555555556, |
| "grad_norm": 6.542994499206543, |
| "learning_rate": 9.937343136219234e-06, |
| "logits/chosen": -0.2941819429397583, |
| "logits/rejected": -0.3379240930080414, |
| "logps/chosen": -301.7574462890625, |
| "logps/rejected": -217.0867919921875, |
| "loss": -0.1938, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 2.6589813232421875, |
| "rewards/margins": 4.12928581237793, |
| "rewards/rejected": -1.4703044891357422, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 8.16442584991455, |
| "learning_rate": 9.933611534555645e-06, |
| "logits/chosen": -0.3271011710166931, |
| "logits/rejected": -0.3445083200931549, |
| "logps/chosen": -319.7223205566406, |
| "logps/rejected": -247.19967651367188, |
| "loss": -0.0822, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 2.863480806350708, |
| "rewards/margins": 3.848937511444092, |
| "rewards/rejected": -0.9854568243026733, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.16444444444444445, |
| "grad_norm": 5.072612285614014, |
| "learning_rate": 9.929772745398207e-06, |
| "logits/chosen": -0.3311443328857422, |
| "logits/rejected": -0.34855595231056213, |
| "logps/chosen": -304.24371337890625, |
| "logps/rejected": -228.12423706054688, |
| "loss": -0.0953, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.729543685913086, |
| "rewards/margins": 3.911928653717041, |
| "rewards/rejected": -1.1823843717575073, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.1688888888888889, |
| "grad_norm": 5.6365966796875, |
| "learning_rate": 9.925826852148332e-06, |
| "logits/chosen": -0.37482309341430664, |
| "logits/rejected": -0.37685567140579224, |
| "logps/chosen": -323.057373046875, |
| "logps/rejected": -238.3128204345703, |
| "loss": -0.1351, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.4461398124694824, |
| "rewards/margins": 4.063778400421143, |
| "rewards/rejected": -1.6176389455795288, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.17333333333333334, |
| "grad_norm": 7.323070049285889, |
| "learning_rate": 9.921773940534382e-06, |
| "logits/chosen": -0.30995437502861023, |
| "logits/rejected": -0.3514579236507416, |
| "logps/chosen": -283.3837890625, |
| "logps/rejected": -228.30905151367188, |
| "loss": 0.0783, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 1.8092823028564453, |
| "rewards/margins": 3.168578624725342, |
| "rewards/rejected": -1.3592965602874756, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.17777777777777778, |
| "grad_norm": 5.961920738220215, |
| "learning_rate": 9.917614098609786e-06, |
| "logits/chosen": -0.3327783942222595, |
| "logits/rejected": -0.36006277799606323, |
| "logps/chosen": -307.5805358886719, |
| "logps/rejected": -228.3577880859375, |
| "loss": -0.0971, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 2.8803937435150146, |
| "rewards/margins": 3.999189853668213, |
| "rewards/rejected": -1.1187958717346191, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.18222222222222223, |
| "grad_norm": 8.006556510925293, |
| "learning_rate": 9.913347416751148e-06, |
| "logits/chosen": -0.290499210357666, |
| "logits/rejected": -0.32564371824264526, |
| "logps/chosen": -310.0920715332031, |
| "logps/rejected": -222.1564178466797, |
| "loss": -0.1483, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 3.187587022781372, |
| "rewards/margins": 4.311644554138184, |
| "rewards/rejected": -1.124057650566101, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.18666666666666668, |
| "grad_norm": 5.825204849243164, |
| "learning_rate": 9.908973987656263e-06, |
| "logits/chosen": -0.3070078492164612, |
| "logits/rejected": -0.3182796239852905, |
| "logps/chosen": -289.46490478515625, |
| "logps/rejected": -223.34725952148438, |
| "loss": -0.2937, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": 3.196002244949341, |
| "rewards/margins": 4.649127006530762, |
| "rewards/rejected": -1.453124761581421, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.19111111111111112, |
| "grad_norm": 8.087427139282227, |
| "learning_rate": 9.904493906342124e-06, |
| "logits/chosen": -0.284060001373291, |
| "logits/rejected": -0.3289189636707306, |
| "logps/chosen": -292.52984619140625, |
| "logps/rejected": -227.1925811767578, |
| "loss": -0.0803, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.883082628250122, |
| "rewards/margins": 4.214940071105957, |
| "rewards/rejected": -1.3318575620651245, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.19555555555555557, |
| "grad_norm": 6.931927680969238, |
| "learning_rate": 9.899907270142835e-06, |
| "logits/chosen": -0.29949700832366943, |
| "logits/rejected": -0.3155062794685364, |
| "logps/chosen": -305.6365661621094, |
| "logps/rejected": -233.4442901611328, |
| "loss": -0.0835, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.9822609424591064, |
| "rewards/margins": 4.396633148193359, |
| "rewards/rejected": -1.4143723249435425, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 9.130791664123535, |
| "learning_rate": 9.895214178707516e-06, |
| "logits/chosen": -0.31096282601356506, |
| "logits/rejected": -0.3551832437515259, |
| "logps/chosen": -313.84320068359375, |
| "logps/rejected": -236.7030029296875, |
| "loss": -0.0991, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.626882553100586, |
| "rewards/margins": 4.163486003875732, |
| "rewards/rejected": -1.536603331565857, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.20444444444444446, |
| "grad_norm": 5.590844631195068, |
| "learning_rate": 9.890414733998131e-06, |
| "logits/chosen": -0.2635526657104492, |
| "logits/rejected": -0.29329806566238403, |
| "logps/chosen": -297.04638671875, |
| "logps/rejected": -236.12271118164062, |
| "loss": 0.0262, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.713446855545044, |
| "rewards/margins": 3.905104875564575, |
| "rewards/rejected": -1.1916577816009521, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2088888888888889, |
| "grad_norm": 4.7747979164123535, |
| "learning_rate": 9.885509040287267e-06, |
| "logits/chosen": -0.30965957045555115, |
| "logits/rejected": -0.31621426343917847, |
| "logps/chosen": -288.59014892578125, |
| "logps/rejected": -220.62765502929688, |
| "loss": -0.342, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 3.3641788959503174, |
| "rewards/margins": 5.042544841766357, |
| "rewards/rejected": -1.6783654689788818, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.21333333333333335, |
| "grad_norm": 6.6951680183410645, |
| "learning_rate": 9.880497204155879e-06, |
| "logits/chosen": -0.27586597204208374, |
| "logits/rejected": -0.3355752229690552, |
| "logps/chosen": -316.9598388671875, |
| "logps/rejected": -244.87149047851562, |
| "loss": -0.0482, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 2.7237601280212402, |
| "rewards/margins": 3.941441774368286, |
| "rewards/rejected": -1.217681646347046, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.21777777777777776, |
| "grad_norm": 5.907822608947754, |
| "learning_rate": 9.875379334490962e-06, |
| "logits/chosen": -0.3292551338672638, |
| "logits/rejected": -0.31635525822639465, |
| "logps/chosen": -291.8619689941406, |
| "logps/rejected": -231.01016235351562, |
| "loss": -0.1414, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.5336103439331055, |
| "rewards/margins": 4.392641067504883, |
| "rewards/rejected": -1.8590309619903564, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 7.046641826629639, |
| "learning_rate": 9.870155542483199e-06, |
| "logits/chosen": -0.3067111372947693, |
| "logits/rejected": -0.35157865285873413, |
| "logps/chosen": -319.73187255859375, |
| "logps/rejected": -236.04483032226562, |
| "loss": -0.3243, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 3.8645987510681152, |
| "rewards/margins": 5.3452653884887695, |
| "rewards/rejected": -1.4806665182113647, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.22666666666666666, |
| "grad_norm": 9.379409790039062, |
| "learning_rate": 9.864825941624538e-06, |
| "logits/chosen": -0.267128050327301, |
| "logits/rejected": -0.2918349802494049, |
| "logps/chosen": -313.8056335449219, |
| "logps/rejected": -247.0101776123047, |
| "loss": -0.1449, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": 3.3342041969299316, |
| "rewards/margins": 4.6097564697265625, |
| "rewards/rejected": -1.27555251121521, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.2311111111111111, |
| "grad_norm": 7.974717617034912, |
| "learning_rate": 9.85939064770572e-06, |
| "logits/chosen": -0.3181043267250061, |
| "logits/rejected": -0.3094359338283539, |
| "logps/chosen": -317.86505126953125, |
| "logps/rejected": -245.4908905029297, |
| "loss": -0.3156, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 3.143451452255249, |
| "rewards/margins": 5.211213111877441, |
| "rewards/rejected": -2.0677614212036133, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.23555555555555555, |
| "grad_norm": 6.511713027954102, |
| "learning_rate": 9.853849778813777e-06, |
| "logits/chosen": -0.29388368129730225, |
| "logits/rejected": -0.3029894530773163, |
| "logps/chosen": -297.4751892089844, |
| "logps/rejected": -231.9884796142578, |
| "loss": -0.167, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 3.295167922973633, |
| "rewards/margins": 5.074382305145264, |
| "rewards/rejected": -1.77921462059021, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 6.8637776374816895, |
| "learning_rate": 9.848203455329459e-06, |
| "logits/chosen": -0.31308668851852417, |
| "logits/rejected": -0.3360288441181183, |
| "logps/chosen": -296.82318115234375, |
| "logps/rejected": -236.3435821533203, |
| "loss": -0.2143, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.4768126010894775, |
| "rewards/margins": 4.899205684661865, |
| "rewards/rejected": -2.422393321990967, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.24444444444444444, |
| "grad_norm": 10.344189643859863, |
| "learning_rate": 9.842451799924616e-06, |
| "logits/chosen": -0.2888021171092987, |
| "logits/rejected": -0.3189722001552582, |
| "logps/chosen": -321.9690246582031, |
| "logps/rejected": -239.86520385742188, |
| "loss": -0.327, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.709468364715576, |
| "rewards/margins": 5.345309257507324, |
| "rewards/rejected": -2.635840892791748, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.24888888888888888, |
| "grad_norm": 11.444976806640625, |
| "learning_rate": 9.836594937559541e-06, |
| "logits/chosen": -0.28263232111930847, |
| "logits/rejected": -0.2964705526828766, |
| "logps/chosen": -296.26995849609375, |
| "logps/rejected": -232.54934692382812, |
| "loss": 0.0017, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 2.52939772605896, |
| "rewards/margins": 4.631046295166016, |
| "rewards/rejected": -2.1016488075256348, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.25333333333333335, |
| "grad_norm": 5.921905040740967, |
| "learning_rate": 9.830632995480243e-06, |
| "logits/chosen": -0.26743844151496887, |
| "logits/rejected": -0.27696385979652405, |
| "logps/chosen": -298.95477294921875, |
| "logps/rejected": -236.3730926513672, |
| "loss": -0.2483, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.9082038402557373, |
| "rewards/margins": 5.20479154586792, |
| "rewards/rejected": -2.2965879440307617, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.2577777777777778, |
| "grad_norm": 6.96235990524292, |
| "learning_rate": 9.824566103215697e-06, |
| "logits/chosen": -0.2472468614578247, |
| "logits/rejected": -0.2931605279445648, |
| "logps/chosen": -298.02581787109375, |
| "logps/rejected": -231.60879516601562, |
| "loss": -0.2804, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.8193132877349854, |
| "rewards/margins": 5.249671459197998, |
| "rewards/rejected": -2.430358648300171, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.26222222222222225, |
| "grad_norm": 8.460125923156738, |
| "learning_rate": 9.818394392575018e-06, |
| "logits/chosen": -0.30542343854904175, |
| "logits/rejected": -0.32763975858688354, |
| "logps/chosen": -285.7476501464844, |
| "logps/rejected": -243.5345458984375, |
| "loss": -0.1747, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 2.6046016216278076, |
| "rewards/margins": 4.996693134307861, |
| "rewards/rejected": -2.3920915126800537, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.26666666666666666, |
| "grad_norm": 7.488274097442627, |
| "learning_rate": 9.812117997644606e-06, |
| "logits/chosen": -0.2731490731239319, |
| "logits/rejected": -0.30121108889579773, |
| "logps/chosen": -284.916259765625, |
| "logps/rejected": -233.185546875, |
| "loss": -0.1565, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.169166088104248, |
| "rewards/margins": 5.066960334777832, |
| "rewards/rejected": -2.897794246673584, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.27111111111111114, |
| "grad_norm": 8.804214477539062, |
| "learning_rate": 9.805737054785223e-06, |
| "logits/chosen": -0.2953334450721741, |
| "logits/rejected": -0.327360063791275, |
| "logps/chosen": -300.7308349609375, |
| "logps/rejected": -236.43685913085938, |
| "loss": -0.0923, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": 2.0204293727874756, |
| "rewards/margins": 4.94085168838501, |
| "rewards/rejected": -2.920422315597534, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.27555555555555555, |
| "grad_norm": 8.035072326660156, |
| "learning_rate": 9.79925170262904e-06, |
| "logits/chosen": -0.26204347610473633, |
| "logits/rejected": -0.31125301122665405, |
| "logps/chosen": -280.49102783203125, |
| "logps/rejected": -226.66110229492188, |
| "loss": -0.2221, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 1.1722501516342163, |
| "rewards/margins": 4.767660140991211, |
| "rewards/rejected": -3.595409870147705, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 6.416834831237793, |
| "learning_rate": 9.792662082076618e-06, |
| "logits/chosen": -0.2821267247200012, |
| "logits/rejected": -0.29524296522140503, |
| "logps/chosen": -313.2020568847656, |
| "logps/rejected": -236.20578002929688, |
| "loss": -0.3875, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 2.725848913192749, |
| "rewards/margins": 5.90293025970459, |
| "rewards/rejected": -3.17708158493042, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.28444444444444444, |
| "grad_norm": 9.474376678466797, |
| "learning_rate": 9.785968336293859e-06, |
| "logits/chosen": -0.2762632966041565, |
| "logits/rejected": -0.34091368317604065, |
| "logps/chosen": -315.105224609375, |
| "logps/rejected": -250.1154327392578, |
| "loss": -0.2361, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.9460265636444092, |
| "rewards/margins": 5.044549465179443, |
| "rewards/rejected": -3.0985231399536133, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.28888888888888886, |
| "grad_norm": 7.876622200012207, |
| "learning_rate": 9.779170610708872e-06, |
| "logits/chosen": -0.26600781083106995, |
| "logits/rejected": -0.2999460697174072, |
| "logps/chosen": -315.525146484375, |
| "logps/rejected": -239.6782989501953, |
| "loss": -0.3024, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": 2.9207816123962402, |
| "rewards/margins": 6.140283107757568, |
| "rewards/rejected": -3.219501495361328, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.29333333333333333, |
| "grad_norm": 9.389948844909668, |
| "learning_rate": 9.772269053008841e-06, |
| "logits/chosen": -0.2716449201107025, |
| "logits/rejected": -0.31395813822746277, |
| "logps/chosen": -293.0248107910156, |
| "logps/rejected": -221.9087371826172, |
| "loss": -0.1898, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.0722451210021973, |
| "rewards/margins": 5.335482597351074, |
| "rewards/rejected": -3.263237714767456, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.29777777777777775, |
| "grad_norm": 8.571460723876953, |
| "learning_rate": 9.765263813136796e-06, |
| "logits/chosen": -0.27379176020622253, |
| "logits/rejected": -0.31927746534347534, |
| "logps/chosen": -306.12799072265625, |
| "logps/rejected": -229.9273681640625, |
| "loss": -0.1855, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": 1.6866050958633423, |
| "rewards/margins": 4.964447975158691, |
| "rewards/rejected": -3.2778429985046387, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.3022222222222222, |
| "grad_norm": 7.001428127288818, |
| "learning_rate": 9.758155043288367e-06, |
| "logits/chosen": -0.28565549850463867, |
| "logits/rejected": -0.3229166865348816, |
| "logps/chosen": -297.42449951171875, |
| "logps/rejected": -250.22286987304688, |
| "loss": -0.2147, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 2.1362271308898926, |
| "rewards/margins": 5.681948661804199, |
| "rewards/rejected": -3.5457210540771484, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.30666666666666664, |
| "grad_norm": 8.346474647521973, |
| "learning_rate": 9.750942897908468e-06, |
| "logits/chosen": -0.24829097092151642, |
| "logits/rejected": -0.2842785120010376, |
| "logps/chosen": -293.74859619140625, |
| "logps/rejected": -236.0409698486328, |
| "loss": -0.4852, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.535234212875366, |
| "rewards/margins": 6.377307891845703, |
| "rewards/rejected": -3.842073917388916, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.3111111111111111, |
| "grad_norm": 6.549100875854492, |
| "learning_rate": 9.743627533687953e-06, |
| "logits/chosen": -0.2822897136211395, |
| "logits/rejected": -0.3249056041240692, |
| "logps/chosen": -297.6363830566406, |
| "logps/rejected": -229.268798828125, |
| "loss": -0.4093, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 2.632903814315796, |
| "rewards/margins": 6.515559196472168, |
| "rewards/rejected": -3.882655620574951, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.31555555555555553, |
| "grad_norm": 6.955848217010498, |
| "learning_rate": 9.736209109560201e-06, |
| "logits/chosen": -0.2583480179309845, |
| "logits/rejected": -0.31130915880203247, |
| "logps/chosen": -286.8586730957031, |
| "logps/rejected": -228.97238159179688, |
| "loss": -0.3784, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 2.1645522117614746, |
| "rewards/margins": 5.964513301849365, |
| "rewards/rejected": -3.7999610900878906, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 9.709640502929688, |
| "learning_rate": 9.728687786697667e-06, |
| "logits/chosen": -0.2713499069213867, |
| "logits/rejected": -0.3231387734413147, |
| "logps/chosen": -310.96929931640625, |
| "logps/rejected": -238.51025390625, |
| "loss": -0.3302, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": 2.8049497604370117, |
| "rewards/margins": 6.414994239807129, |
| "rewards/rejected": -3.6100432872772217, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.3244444444444444, |
| "grad_norm": 6.710853099822998, |
| "learning_rate": 9.721063728508384e-06, |
| "logits/chosen": -0.28875869512557983, |
| "logits/rejected": -0.32300078868865967, |
| "logps/chosen": -297.21221923828125, |
| "logps/rejected": -246.2225799560547, |
| "loss": -0.3494, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 2.3010525703430176, |
| "rewards/margins": 6.263821601867676, |
| "rewards/rejected": -3.9627685546875, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.3288888888888889, |
| "grad_norm": 9.562369346618652, |
| "learning_rate": 9.713337100632407e-06, |
| "logits/chosen": -0.23941664397716522, |
| "logits/rejected": -0.2882528305053711, |
| "logps/chosen": -297.01116943359375, |
| "logps/rejected": -246.925048828125, |
| "loss": -0.2107, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 1.6213099956512451, |
| "rewards/margins": 5.55633020401001, |
| "rewards/rejected": -3.935020923614502, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 8.356274604797363, |
| "learning_rate": 9.705508070938219e-06, |
| "logits/chosen": -0.26807016134262085, |
| "logits/rejected": -0.29893961548805237, |
| "logps/chosen": -310.1183776855469, |
| "logps/rejected": -234.883544921875, |
| "loss": -0.6507, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": 3.1703178882598877, |
| "rewards/margins": 7.669167995452881, |
| "rewards/rejected": -4.498850345611572, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.3377777777777778, |
| "grad_norm": 7.058998107910156, |
| "learning_rate": 9.697576809519079e-06, |
| "logits/chosen": -0.2949567139148712, |
| "logits/rejected": -0.3223188519477844, |
| "logps/chosen": -311.98773193359375, |
| "logps/rejected": -245.71194458007812, |
| "loss": -0.3221, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": 1.6648155450820923, |
| "rewards/margins": 6.265153884887695, |
| "rewards/rejected": -4.600337982177734, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.3422222222222222, |
| "grad_norm": 8.904199600219727, |
| "learning_rate": 9.689543488689332e-06, |
| "logits/chosen": -0.25813308358192444, |
| "logits/rejected": -0.29112708568573, |
| "logps/chosen": -301.86834716796875, |
| "logps/rejected": -247.51974487304688, |
| "loss": -0.3268, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 0.9261104464530945, |
| "rewards/margins": 5.75935173034668, |
| "rewards/rejected": -4.8332414627075195, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.3466666666666667, |
| "grad_norm": 6.82271671295166, |
| "learning_rate": 9.68140828298066e-06, |
| "logits/chosen": -0.27823004126548767, |
| "logits/rejected": -0.30908042192459106, |
| "logps/chosen": -302.1865539550781, |
| "logps/rejected": -234.9086456298828, |
| "loss": -0.5093, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 1.626147985458374, |
| "rewards/margins": 6.896539211273193, |
| "rewards/rejected": -5.270391941070557, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.3511111111111111, |
| "grad_norm": 10.339046478271484, |
| "learning_rate": 9.673171369138297e-06, |
| "logits/chosen": -0.2561442255973816, |
| "logits/rejected": -0.2945733666419983, |
| "logps/chosen": -304.0384216308594, |
| "logps/rejected": -242.31137084960938, |
| "loss": -0.3237, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.6995117664337158, |
| "rewards/margins": 6.307187557220459, |
| "rewards/rejected": -4.607676029205322, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.35555555555555557, |
| "grad_norm": 7.32875919342041, |
| "learning_rate": 9.66483292611718e-06, |
| "logits/chosen": -0.2525383234024048, |
| "logits/rejected": -0.28177526593208313, |
| "logps/chosen": -292.54046630859375, |
| "logps/rejected": -235.5553741455078, |
| "loss": -0.4031, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": 1.865870714187622, |
| "rewards/margins": 6.680575370788574, |
| "rewards/rejected": -4.814703941345215, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 12.206645011901855, |
| "learning_rate": 9.656393135078067e-06, |
| "logits/chosen": -0.2548236846923828, |
| "logits/rejected": -0.30014172196388245, |
| "logps/chosen": -300.7771911621094, |
| "logps/rejected": -239.86367797851562, |
| "loss": -0.538, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 1.971895456314087, |
| "rewards/margins": 7.470471382141113, |
| "rewards/rejected": -5.498574733734131, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.36444444444444446, |
| "grad_norm": 8.910218238830566, |
| "learning_rate": 9.647852179383606e-06, |
| "logits/chosen": -0.27060994505882263, |
| "logits/rejected": -0.3155694603919983, |
| "logps/chosen": -300.47210693359375, |
| "logps/rejected": -234.78250122070312, |
| "loss": -0.5798, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": 1.5166006088256836, |
| "rewards/margins": 7.203047275543213, |
| "rewards/rejected": -5.686446189880371, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.3688888888888889, |
| "grad_norm": 10.572662353515625, |
| "learning_rate": 9.639210244594335e-06, |
| "logits/chosen": -0.2864235043525696, |
| "logits/rejected": -0.30632856488227844, |
| "logps/chosen": -301.4001159667969, |
| "logps/rejected": -254.0180206298828, |
| "loss": -0.4088, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": 1.349990963935852, |
| "rewards/margins": 6.885331630706787, |
| "rewards/rejected": -5.535341262817383, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.37333333333333335, |
| "grad_norm": 9.515912055969238, |
| "learning_rate": 9.630467518464666e-06, |
| "logits/chosen": -0.2558160424232483, |
| "logits/rejected": -0.2956928312778473, |
| "logps/chosen": -304.60302734375, |
| "logps/rejected": -240.9836883544922, |
| "loss": -0.2976, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": 1.0677791833877563, |
| "rewards/margins": 6.517449378967285, |
| "rewards/rejected": -5.44966983795166, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.37777777777777777, |
| "grad_norm": 7.812021255493164, |
| "learning_rate": 9.621624190938802e-06, |
| "logits/chosen": -0.2350511997938156, |
| "logits/rejected": -0.2532605528831482, |
| "logps/chosen": -320.7872009277344, |
| "logps/rejected": -254.2796173095703, |
| "loss": -0.4306, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": 1.7731481790542603, |
| "rewards/margins": 7.587254524230957, |
| "rewards/rejected": -5.814105987548828, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.38222222222222224, |
| "grad_norm": 7.678309917449951, |
| "learning_rate": 9.612680454146609e-06, |
| "logits/chosen": -0.22189001739025116, |
| "logits/rejected": -0.2614109218120575, |
| "logps/chosen": -325.14239501953125, |
| "logps/rejected": -250.6171875, |
| "loss": -0.6808, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 2.040872573852539, |
| "rewards/margins": 8.241477012634277, |
| "rewards/rejected": -6.200604438781738, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.38666666666666666, |
| "grad_norm": 10.421648979187012, |
| "learning_rate": 9.603636502399436e-06, |
| "logits/chosen": -0.2654271721839905, |
| "logits/rejected": -0.302105575799942, |
| "logps/chosen": -332.26593017578125, |
| "logps/rejected": -245.6372528076172, |
| "loss": -0.6012, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 1.5932146310806274, |
| "rewards/margins": 8.200170516967773, |
| "rewards/rejected": -6.606956481933594, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.39111111111111113, |
| "grad_norm": 6.699859142303467, |
| "learning_rate": 9.594492532185909e-06, |
| "logits/chosen": -0.2850594222545624, |
| "logits/rejected": -0.3033252954483032, |
| "logps/chosen": -307.18463134765625, |
| "logps/rejected": -248.4318389892578, |
| "loss": -0.3909, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.7229059934616089, |
| "rewards/margins": 6.767951011657715, |
| "rewards/rejected": -6.045044422149658, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.39555555555555555, |
| "grad_norm": 9.903278350830078, |
| "learning_rate": 9.585248742167638e-06, |
| "logits/chosen": -0.2718963325023651, |
| "logits/rejected": -0.30517634749412537, |
| "logps/chosen": -309.89031982421875, |
| "logps/rejected": -250.9574432373047, |
| "loss": -0.3108, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.4615413546562195, |
| "rewards/margins": 6.469930171966553, |
| "rewards/rejected": -6.9314703941345215, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 5.433151721954346, |
| "learning_rate": 9.57590533317493e-06, |
| "logits/chosen": -0.2626163959503174, |
| "logits/rejected": -0.32341477274894714, |
| "logps/chosen": -312.3274230957031, |
| "logps/rejected": -233.1435546875, |
| "loss": -0.7956, |
| "rewards/accuracies": 0.893750011920929, |
| "rewards/chosen": 0.924404501914978, |
| "rewards/margins": 8.514490127563477, |
| "rewards/rejected": -7.590085029602051, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.40444444444444444, |
| "grad_norm": 8.837213516235352, |
| "learning_rate": 9.566462508202403e-06, |
| "logits/chosen": -0.248914435505867, |
| "logits/rejected": -0.3085024952888489, |
| "logps/chosen": -311.30328369140625, |
| "logps/rejected": -252.2725067138672, |
| "loss": -0.4715, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.2921377718448639, |
| "rewards/margins": 7.126054286956787, |
| "rewards/rejected": -6.833916664123535, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.4088888888888889, |
| "grad_norm": 7.693172454833984, |
| "learning_rate": 9.55692047240458e-06, |
| "logits/chosen": -0.2304973304271698, |
| "logits/rejected": -0.28682953119277954, |
| "logps/chosen": -311.18023681640625, |
| "logps/rejected": -240.9733123779297, |
| "loss": -0.6889, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.7987859845161438, |
| "rewards/margins": 8.30670166015625, |
| "rewards/rejected": -7.507915496826172, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.41333333333333333, |
| "grad_norm": 8.775394439697266, |
| "learning_rate": 9.547279433091446e-06, |
| "logits/chosen": -0.2938714325428009, |
| "logits/rejected": -0.314927875995636, |
| "logps/chosen": -307.7293701171875, |
| "logps/rejected": -249.83523559570312, |
| "loss": -0.5757, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 0.2602098286151886, |
| "rewards/margins": 7.839123725891113, |
| "rewards/rejected": -7.57891321182251, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.4177777777777778, |
| "grad_norm": 6.385190010070801, |
| "learning_rate": 9.537539599723924e-06, |
| "logits/chosen": -0.2282254993915558, |
| "logits/rejected": -0.29543009400367737, |
| "logps/chosen": -302.2158508300781, |
| "logps/rejected": -243.7989501953125, |
| "loss": -0.7249, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": 1.112415075302124, |
| "rewards/margins": 8.864578247070312, |
| "rewards/rejected": -7.752162933349609, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.4222222222222222, |
| "grad_norm": 9.705090522766113, |
| "learning_rate": 9.527701183909336e-06, |
| "logits/chosen": -0.255817174911499, |
| "logits/rejected": -0.30061060190200806, |
| "logps/chosen": -319.11309814453125, |
| "logps/rejected": -252.5963592529297, |
| "loss": -0.4242, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -0.3803827464580536, |
| "rewards/margins": 7.071600437164307, |
| "rewards/rejected": -7.4519829750061035, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.4266666666666667, |
| "grad_norm": 10.644119262695312, |
| "learning_rate": 9.51776439939681e-06, |
| "logits/chosen": -0.24410729110240936, |
| "logits/rejected": -0.31472498178482056, |
| "logps/chosen": -326.570556640625, |
| "logps/rejected": -254.5313262939453, |
| "loss": -0.6844, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": 0.5738657116889954, |
| "rewards/margins": 9.123512268066406, |
| "rewards/rejected": -8.549646377563477, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.4311111111111111, |
| "grad_norm": 12.36355972290039, |
| "learning_rate": 9.507729462072615e-06, |
| "logits/chosen": -0.24467067420482635, |
| "logits/rejected": -0.3331097364425659, |
| "logps/chosen": -322.2784729003906, |
| "logps/rejected": -260.8439025878906, |
| "loss": -0.5093, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.3209023177623749, |
| "rewards/margins": 8.416958808898926, |
| "rewards/rejected": -8.09605598449707, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.43555555555555553, |
| "grad_norm": 11.505626678466797, |
| "learning_rate": 9.4975965899555e-06, |
| "logits/chosen": -0.28976163268089294, |
| "logits/rejected": -0.3090762794017792, |
| "logps/chosen": -307.57489013671875, |
| "logps/rejected": -246.5322265625, |
| "loss": -0.4303, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -1.3077417612075806, |
| "rewards/margins": 6.817173004150391, |
| "rewards/rejected": -8.124914169311523, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 15.380836486816406, |
| "learning_rate": 9.48736600319193e-06, |
| "logits/chosen": -0.2653730809688568, |
| "logits/rejected": -0.2980864644050598, |
| "logps/chosen": -319.14019775390625, |
| "logps/rejected": -263.46063232421875, |
| "loss": -0.3595, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1717190742492676, |
| "rewards/margins": 7.247198581695557, |
| "rewards/rejected": -8.418917655944824, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 7.199528694152832, |
| "learning_rate": 9.47703792405133e-06, |
| "logits/chosen": -0.2643812596797943, |
| "logits/rejected": -0.3057587146759033, |
| "logps/chosen": -305.49395751953125, |
| "logps/rejected": -250.5885467529297, |
| "loss": -0.7595, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -1.0399351119995117, |
| "rewards/margins": 8.203756332397461, |
| "rewards/rejected": -9.243691444396973, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "eval_logits/chosen": -0.2629312574863434, |
| "eval_logits/rejected": -0.30713388323783875, |
| "eval_logps/chosen": -313.8876953125, |
| "eval_logps/rejected": -254.3212127685547, |
| "eval_loss": -0.574113667011261, |
| "eval_rewards/accuracies": 0.828249990940094, |
| "eval_rewards/chosen": -1.1603001356124878, |
| "eval_rewards/margins": 8.141514778137207, |
| "eval_rewards/rejected": -9.301814079284668, |
| "eval_runtime": 2192.8697, |
| "eval_samples_per_second": 1.824, |
| "eval_steps_per_second": 0.912, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.4488888888888889, |
| "grad_norm": 11.993717193603516, |
| "learning_rate": 9.466612576921223e-06, |
| "logits/chosen": -0.2699393332004547, |
| "logits/rejected": -0.3285272717475891, |
| "logps/chosen": -319.2097473144531, |
| "logps/rejected": -263.5858154296875, |
| "loss": -0.394, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -1.1724228858947754, |
| "rewards/margins": 7.439938545227051, |
| "rewards/rejected": -8.612360954284668, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.4533333333333333, |
| "grad_norm": 11.781710624694824, |
| "learning_rate": 9.456090188302389e-06, |
| "logits/chosen": -0.26111698150634766, |
| "logits/rejected": -0.28280287981033325, |
| "logps/chosen": -309.879638671875, |
| "logps/rejected": -263.6683654785156, |
| "loss": -0.6619, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2497330904006958, |
| "rewards/margins": 8.266626358032227, |
| "rewards/rejected": -9.516359329223633, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.4577777777777778, |
| "grad_norm": 11.226400375366211, |
| "learning_rate": 9.445470986803922e-06, |
| "logits/chosen": -0.2626830041408539, |
| "logits/rejected": -0.3101075291633606, |
| "logps/chosen": -299.5857238769531, |
| "logps/rejected": -248.7251434326172, |
| "loss": -0.6192, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.528692603111267, |
| "rewards/margins": 8.301843643188477, |
| "rewards/rejected": -9.830536842346191, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.4622222222222222, |
| "grad_norm": 8.692116737365723, |
| "learning_rate": 9.434755203138269e-06, |
| "logits/chosen": -0.27712422609329224, |
| "logits/rejected": -0.33624228835105896, |
| "logps/chosen": -341.59759521484375, |
| "logps/rejected": -257.9901123046875, |
| "loss": -0.5867, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.3974745273590088, |
| "rewards/margins": 8.428323745727539, |
| "rewards/rejected": -9.825799942016602, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.4666666666666667, |
| "grad_norm": 7.316524982452393, |
| "learning_rate": 9.423943070116219e-06, |
| "logits/chosen": -0.3034690320491791, |
| "logits/rejected": -0.3194289803504944, |
| "logps/chosen": -294.5498046875, |
| "logps/rejected": -247.59463500976562, |
| "loss": -0.5768, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -2.136320114135742, |
| "rewards/margins": 7.421705722808838, |
| "rewards/rejected": -9.558026313781738, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.4711111111111111, |
| "grad_norm": 15.097468376159668, |
| "learning_rate": 9.413034822641845e-06, |
| "logits/chosen": -0.29432040452957153, |
| "logits/rejected": -0.3473649322986603, |
| "logps/chosen": -314.51007080078125, |
| "logps/rejected": -255.99453735351562, |
| "loss": -0.3577, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -2.039400815963745, |
| "rewards/margins": 7.780667304992676, |
| "rewards/rejected": -9.820066452026367, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.47555555555555556, |
| "grad_norm": 10.856350898742676, |
| "learning_rate": 9.402030697707398e-06, |
| "logits/chosen": -0.27809661626815796, |
| "logits/rejected": -0.3084755539894104, |
| "logps/chosen": -290.03839111328125, |
| "logps/rejected": -246.6155242919922, |
| "loss": -0.7648, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.642029047012329, |
| "rewards/margins": 8.514683723449707, |
| "rewards/rejected": -10.156713485717773, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 10.466115951538086, |
| "learning_rate": 9.390930934388164e-06, |
| "logits/chosen": -0.25123220682144165, |
| "logits/rejected": -0.28659194707870483, |
| "logps/chosen": -310.38702392578125, |
| "logps/rejected": -264.1778869628906, |
| "loss": -0.6251, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -2.053196430206299, |
| "rewards/margins": 8.635331153869629, |
| "rewards/rejected": -10.68852710723877, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.48444444444444446, |
| "grad_norm": 14.518105506896973, |
| "learning_rate": 9.37973577383726e-06, |
| "logits/chosen": -0.2105627954006195, |
| "logits/rejected": -0.2725834250450134, |
| "logps/chosen": -309.6773681640625, |
| "logps/rejected": -247.98507690429688, |
| "loss": -0.7806, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -1.5902966260910034, |
| "rewards/margins": 9.257573127746582, |
| "rewards/rejected": -10.847868919372559, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.4888888888888889, |
| "grad_norm": 9.231803894042969, |
| "learning_rate": 9.368445459280405e-06, |
| "logits/chosen": -0.26593995094299316, |
| "logits/rejected": -0.28871750831604004, |
| "logps/chosen": -315.306884765625, |
| "logps/rejected": -262.83197021484375, |
| "loss": -0.5635, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -3.177497148513794, |
| "rewards/margins": 7.756557464599609, |
| "rewards/rejected": -10.934054374694824, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.49333333333333335, |
| "grad_norm": 9.906770706176758, |
| "learning_rate": 9.357060236010626e-06, |
| "logits/chosen": -0.25906693935394287, |
| "logits/rejected": -0.32186049222946167, |
| "logps/chosen": -326.8785095214844, |
| "logps/rejected": -273.9308166503906, |
| "loss": -0.6905, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -2.1445183753967285, |
| "rewards/margins": 9.113534927368164, |
| "rewards/rejected": -11.258054733276367, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.49777777777777776, |
| "grad_norm": 7.245250225067139, |
| "learning_rate": 9.345580351382939e-06, |
| "logits/chosen": -0.2802310585975647, |
| "logits/rejected": -0.2841408848762512, |
| "logps/chosen": -293.36065673828125, |
| "logps/rejected": -265.8360900878906, |
| "loss": -0.7463, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -3.965707778930664, |
| "rewards/margins": 9.175572395324707, |
| "rewards/rejected": -13.141279220581055, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.5022222222222222, |
| "grad_norm": 11.543607711791992, |
| "learning_rate": 9.334006054808966e-06, |
| "logits/chosen": -0.2962619960308075, |
| "logits/rejected": -0.3181178569793701, |
| "logps/chosen": -326.261962890625, |
| "logps/rejected": -281.9925231933594, |
| "loss": -0.5012, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -4.311570644378662, |
| "rewards/margins": 8.469701766967773, |
| "rewards/rejected": -12.781272888183594, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.5066666666666667, |
| "grad_norm": 18.13285255432129, |
| "learning_rate": 9.322337597751525e-06, |
| "logits/chosen": -0.29192933440208435, |
| "logits/rejected": -0.32068902254104614, |
| "logps/chosen": -320.00146484375, |
| "logps/rejected": -267.2696838378906, |
| "loss": 0.0417, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -5.439437389373779, |
| "rewards/margins": 6.237511157989502, |
| "rewards/rejected": -11.676947593688965, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.5111111111111111, |
| "grad_norm": 15.025782585144043, |
| "learning_rate": 9.310575233719155e-06, |
| "logits/chosen": -0.2575679421424866, |
| "logits/rejected": -0.28944242000579834, |
| "logps/chosen": -312.4665222167969, |
| "logps/rejected": -266.6491394042969, |
| "loss": -0.5652, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -2.7938873767852783, |
| "rewards/margins": 8.282114028930664, |
| "rewards/rejected": -11.07600212097168, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.5155555555555555, |
| "grad_norm": 10.936811447143555, |
| "learning_rate": 9.29871921826062e-06, |
| "logits/chosen": -0.2927904725074768, |
| "logits/rejected": -0.35370174050331116, |
| "logps/chosen": -321.13885498046875, |
| "logps/rejected": -268.1432800292969, |
| "loss": -0.2429, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -4.02617883682251, |
| "rewards/margins": 8.042525291442871, |
| "rewards/rejected": -12.068704605102539, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 12.064530372619629, |
| "learning_rate": 9.28676980895935e-06, |
| "logits/chosen": -0.24123439192771912, |
| "logits/rejected": -0.2889128625392914, |
| "logps/chosen": -308.1340637207031, |
| "logps/rejected": -256.29632568359375, |
| "loss": -0.8401, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -3.284236431121826, |
| "rewards/margins": 10.122550010681152, |
| "rewards/rejected": -13.40678596496582, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.5244444444444445, |
| "grad_norm": 13.778730392456055, |
| "learning_rate": 9.274727265427849e-06, |
| "logits/chosen": -0.2769649922847748, |
| "logits/rejected": -0.31647247076034546, |
| "logps/chosen": -306.0679626464844, |
| "logps/rejected": -254.93179321289062, |
| "loss": -0.7553, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -4.035617828369141, |
| "rewards/margins": 9.203435897827148, |
| "rewards/rejected": -13.239053726196289, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.5288888888888889, |
| "grad_norm": 8.471962928771973, |
| "learning_rate": 9.262591849302049e-06, |
| "logits/chosen": -0.2713521122932434, |
| "logits/rejected": -0.3014729619026184, |
| "logps/chosen": -299.43475341796875, |
| "logps/rejected": -266.22686767578125, |
| "loss": -0.4191, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.5517988204956055, |
| "rewards/margins": 8.98070240020752, |
| "rewards/rejected": -13.532503128051758, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 12.954193115234375, |
| "learning_rate": 9.250363824235629e-06, |
| "logits/chosen": -0.2955438494682312, |
| "logits/rejected": -0.3413962721824646, |
| "logps/chosen": -318.26068115234375, |
| "logps/rejected": -256.75689697265625, |
| "loss": -0.5025, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -3.381830930709839, |
| "rewards/margins": 9.101526260375977, |
| "rewards/rejected": -12.483358383178711, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.5377777777777778, |
| "grad_norm": 10.307146072387695, |
| "learning_rate": 9.238043455894294e-06, |
| "logits/chosen": -0.27938082814216614, |
| "logits/rejected": -0.3281027674674988, |
| "logps/chosen": -327.4522399902344, |
| "logps/rejected": -257.4443054199219, |
| "loss": -0.7464, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -4.192465305328369, |
| "rewards/margins": 8.572819709777832, |
| "rewards/rejected": -12.765284538269043, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.5422222222222223, |
| "grad_norm": 8.460762023925781, |
| "learning_rate": 9.225631011949987e-06, |
| "logits/chosen": -0.2906576991081238, |
| "logits/rejected": -0.32649320363998413, |
| "logps/chosen": -341.95928955078125, |
| "logps/rejected": -275.3990783691406, |
| "loss": -0.9454, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -2.693796396255493, |
| "rewards/margins": 10.737831115722656, |
| "rewards/rejected": -13.43162727355957, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.5466666666666666, |
| "grad_norm": 18.260358810424805, |
| "learning_rate": 9.213126762075088e-06, |
| "logits/chosen": -0.3098008632659912, |
| "logits/rejected": -0.3394979238510132, |
| "logps/chosen": -306.67449951171875, |
| "logps/rejected": -263.02349853515625, |
| "loss": -0.5332, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -4.892157554626465, |
| "rewards/margins": 9.199603080749512, |
| "rewards/rejected": -14.091761589050293, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.5511111111111111, |
| "grad_norm": 12.752532005310059, |
| "learning_rate": 9.200530977936551e-06, |
| "logits/chosen": -0.3172837793827057, |
| "logits/rejected": -0.3619407117366791, |
| "logps/chosen": -349.4862976074219, |
| "logps/rejected": -279.5086669921875, |
| "loss": -0.6241, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -4.374999046325684, |
| "rewards/margins": 9.857782363891602, |
| "rewards/rejected": -14.232782363891602, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 15.203842163085938, |
| "learning_rate": 9.187843933189994e-06, |
| "logits/chosen": -0.28893885016441345, |
| "logits/rejected": -0.3426817059516907, |
| "logps/chosen": -328.72979736328125, |
| "logps/rejected": -271.71099853515625, |
| "loss": -0.8634, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -3.5763354301452637, |
| "rewards/margins": 11.408061027526855, |
| "rewards/rejected": -14.984395980834961, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 10.816106796264648, |
| "learning_rate": 9.175065903473769e-06, |
| "logits/chosen": -0.2791399657726288, |
| "logits/rejected": -0.2996821403503418, |
| "logps/chosen": -321.34771728515625, |
| "logps/rejected": -280.1087341308594, |
| "loss": -0.5511, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -5.1469244956970215, |
| "rewards/margins": 9.314436912536621, |
| "rewards/rejected": -14.4613618850708, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.5644444444444444, |
| "grad_norm": 8.36387825012207, |
| "learning_rate": 9.162197166402957e-06, |
| "logits/chosen": -0.29182273149490356, |
| "logits/rejected": -0.32408252358436584, |
| "logps/chosen": -316.68487548828125, |
| "logps/rejected": -270.219482421875, |
| "loss": -1.0203, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -4.753455638885498, |
| "rewards/margins": 10.925695419311523, |
| "rewards/rejected": -15.679153442382812, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.5688888888888889, |
| "grad_norm": 11.839485168457031, |
| "learning_rate": 9.149238001563348e-06, |
| "logits/chosen": -0.30801886320114136, |
| "logits/rejected": -0.329951673746109, |
| "logps/chosen": -312.73577880859375, |
| "logps/rejected": -267.54437255859375, |
| "loss": -0.5904, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -6.468144416809082, |
| "rewards/margins": 8.780177116394043, |
| "rewards/rejected": -15.248323440551758, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.5733333333333334, |
| "grad_norm": 16.650188446044922, |
| "learning_rate": 9.136188690505363e-06, |
| "logits/chosen": -0.2637523412704468, |
| "logits/rejected": -0.31697210669517517, |
| "logps/chosen": -331.0145568847656, |
| "logps/rejected": -271.3872985839844, |
| "loss": -0.5915, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -5.373086452484131, |
| "rewards/margins": 8.960186958312988, |
| "rewards/rejected": -14.333274841308594, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.5777777777777777, |
| "grad_norm": 18.575305938720703, |
| "learning_rate": 9.123049516737936e-06, |
| "logits/chosen": -0.3117810785770416, |
| "logits/rejected": -0.3635488450527191, |
| "logps/chosen": -328.19989013671875, |
| "logps/rejected": -272.4068603515625, |
| "loss": -0.6851, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -5.281620979309082, |
| "rewards/margins": 10.281137466430664, |
| "rewards/rejected": -15.562756538391113, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.5822222222222222, |
| "grad_norm": 16.429004669189453, |
| "learning_rate": 9.109820765722357e-06, |
| "logits/chosen": -0.27543455362319946, |
| "logits/rejected": -0.31441715359687805, |
| "logps/chosen": -335.43609619140625, |
| "logps/rejected": -286.1941833496094, |
| "loss": -0.8026, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -5.2430877685546875, |
| "rewards/margins": 10.86163330078125, |
| "rewards/rejected": -16.10472297668457, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.5866666666666667, |
| "grad_norm": 9.399615287780762, |
| "learning_rate": 9.096502724866067e-06, |
| "logits/chosen": -0.3014602065086365, |
| "logits/rejected": -0.35103824734687805, |
| "logps/chosen": -359.2838439941406, |
| "logps/rejected": -294.8686828613281, |
| "loss": -1.0671, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -4.225809574127197, |
| "rewards/margins": 11.496713638305664, |
| "rewards/rejected": -15.722521781921387, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.5911111111111111, |
| "grad_norm": 14.605314254760742, |
| "learning_rate": 9.083095683516414e-06, |
| "logits/chosen": -0.28259098529815674, |
| "logits/rejected": -0.32325831055641174, |
| "logps/chosen": -350.68878173828125, |
| "logps/rejected": -277.5746154785156, |
| "loss": -0.9032, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -6.993232727050781, |
| "rewards/margins": 10.017694473266602, |
| "rewards/rejected": -17.010927200317383, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.5955555555555555, |
| "grad_norm": 17.158226013183594, |
| "learning_rate": 9.069599932954371e-06, |
| "logits/chosen": -0.29114705324172974, |
| "logits/rejected": -0.3473047912120819, |
| "logps/chosen": -339.568603515625, |
| "logps/rejected": -287.64666748046875, |
| "loss": -0.5308, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -8.217833518981934, |
| "rewards/margins": 8.741876602172852, |
| "rewards/rejected": -16.9597110748291, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 16.861963272094727, |
| "learning_rate": 9.056015766388205e-06, |
| "logits/chosen": -0.3223651945590973, |
| "logits/rejected": -0.36349570751190186, |
| "logps/chosen": -334.6025695800781, |
| "logps/rejected": -285.73828125, |
| "loss": -0.7831, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -7.5147528648376465, |
| "rewards/margins": 10.620495796203613, |
| "rewards/rejected": -18.1352481842041, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.6044444444444445, |
| "grad_norm": 14.257497787475586, |
| "learning_rate": 9.042343478947103e-06, |
| "logits/chosen": -0.3066635727882385, |
| "logits/rejected": -0.32420462369918823, |
| "logps/chosen": -337.81097412109375, |
| "logps/rejected": -286.6188049316406, |
| "loss": -0.9349, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -6.352471351623535, |
| "rewards/margins": 11.51642894744873, |
| "rewards/rejected": -17.868900299072266, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.6088888888888889, |
| "grad_norm": 16.795236587524414, |
| "learning_rate": 9.028583367674767e-06, |
| "logits/chosen": -0.34612902998924255, |
| "logits/rejected": -0.36396104097366333, |
| "logps/chosen": -332.6644287109375, |
| "logps/rejected": -287.1361083984375, |
| "loss": -0.9376, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -5.387200355529785, |
| "rewards/margins": 11.706459999084473, |
| "rewards/rejected": -17.093660354614258, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.6133333333333333, |
| "grad_norm": 11.827564239501953, |
| "learning_rate": 9.014735731522952e-06, |
| "logits/chosen": -0.3044932782649994, |
| "logits/rejected": -0.33501502871513367, |
| "logps/chosen": -331.37835693359375, |
| "logps/rejected": -279.79718017578125, |
| "loss": -1.0226, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -5.231140613555908, |
| "rewards/margins": 12.324139595031738, |
| "rewards/rejected": -17.555278778076172, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.6177777777777778, |
| "grad_norm": 13.102601051330566, |
| "learning_rate": 9.00080087134498e-06, |
| "logits/chosen": -0.31660374999046326, |
| "logits/rejected": -0.3677740693092346, |
| "logps/chosen": -340.77081298828125, |
| "logps/rejected": -275.3927001953125, |
| "loss": -0.8796, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -5.748688220977783, |
| "rewards/margins": 11.431347846984863, |
| "rewards/rejected": -17.180036544799805, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.6222222222222222, |
| "grad_norm": 28.49679946899414, |
| "learning_rate": 8.9867790898892e-06, |
| "logits/chosen": -0.3224649131298065, |
| "logits/rejected": -0.3925584852695465, |
| "logps/chosen": -349.48919677734375, |
| "logps/rejected": -279.79119873046875, |
| "loss": -0.8641, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -6.573420524597168, |
| "rewards/margins": 11.63664436340332, |
| "rewards/rejected": -18.210065841674805, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6266666666666667, |
| "grad_norm": 17.276578903198242, |
| "learning_rate": 8.972670691792409e-06, |
| "logits/chosen": -0.3031178414821625, |
| "logits/rejected": -0.347816526889801, |
| "logps/chosen": -332.0860900878906, |
| "logps/rejected": -278.52264404296875, |
| "loss": -0.6865, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -7.202242851257324, |
| "rewards/margins": 10.682793617248535, |
| "rewards/rejected": -17.88503646850586, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.6311111111111111, |
| "grad_norm": 13.52579402923584, |
| "learning_rate": 8.958475983573234e-06, |
| "logits/chosen": -0.3044522702693939, |
| "logits/rejected": -0.34488362073898315, |
| "logps/chosen": -337.5535583496094, |
| "logps/rejected": -295.95428466796875, |
| "loss": -0.9102, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -6.68551778793335, |
| "rewards/margins": 11.271787643432617, |
| "rewards/rejected": -17.957305908203125, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.6355555555555555, |
| "grad_norm": 12.732015609741211, |
| "learning_rate": 8.944195273625472e-06, |
| "logits/chosen": -0.2973068356513977, |
| "logits/rejected": -0.35419678688049316, |
| "logps/chosen": -318.517822265625, |
| "logps/rejected": -271.3919677734375, |
| "loss": -0.8689, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -7.6208038330078125, |
| "rewards/margins": 10.748211860656738, |
| "rewards/rejected": -18.369014739990234, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 15.25625991821289, |
| "learning_rate": 8.92982887221139e-06, |
| "logits/chosen": -0.3091279864311218, |
| "logits/rejected": -0.3462229073047638, |
| "logps/chosen": -345.78131103515625, |
| "logps/rejected": -292.4202575683594, |
| "loss": -0.4038, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -9.038877487182617, |
| "rewards/margins": 10.578144073486328, |
| "rewards/rejected": -19.617021560668945, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.6444444444444445, |
| "grad_norm": 8.678261756896973, |
| "learning_rate": 8.915377091454992e-06, |
| "logits/chosen": -0.2622337341308594, |
| "logits/rejected": -0.3454502820968628, |
| "logps/chosen": -330.6527404785156, |
| "logps/rejected": -271.9297790527344, |
| "loss": -0.9941, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -6.34903621673584, |
| "rewards/margins": 11.77011775970459, |
| "rewards/rejected": -18.119152069091797, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.6488888888888888, |
| "grad_norm": 18.099544525146484, |
| "learning_rate": 8.900840245335225e-06, |
| "logits/chosen": -0.29967910051345825, |
| "logits/rejected": -0.3550174832344055, |
| "logps/chosen": -339.3348388671875, |
| "logps/rejected": -284.2086181640625, |
| "loss": -0.6141, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -8.690653800964355, |
| "rewards/margins": 10.278780937194824, |
| "rewards/rejected": -18.969436645507812, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.6533333333333333, |
| "grad_norm": 17.058128356933594, |
| "learning_rate": 8.886218649679162e-06, |
| "logits/chosen": -0.30947160720825195, |
| "logits/rejected": -0.3345088064670563, |
| "logps/chosen": -320.3451232910156, |
| "logps/rejected": -277.7720031738281, |
| "loss": -1.014, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -8.53862190246582, |
| "rewards/margins": 12.284268379211426, |
| "rewards/rejected": -20.822891235351562, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.6577777777777778, |
| "grad_norm": 13.636448860168457, |
| "learning_rate": 8.871512622155147e-06, |
| "logits/chosen": -0.2878524363040924, |
| "logits/rejected": -0.3395880162715912, |
| "logps/chosen": -362.52899169921875, |
| "logps/rejected": -303.67620849609375, |
| "loss": -0.9753, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -8.569745063781738, |
| "rewards/margins": 12.254476547241211, |
| "rewards/rejected": -20.824222564697266, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6622222222222223, |
| "grad_norm": 13.943758964538574, |
| "learning_rate": 8.856722482265886e-06, |
| "logits/chosen": -0.2777239978313446, |
| "logits/rejected": -0.2970428466796875, |
| "logps/chosen": -317.4947509765625, |
| "logps/rejected": -292.8834533691406, |
| "loss": -0.952, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -9.092373847961426, |
| "rewards/margins": 11.070058822631836, |
| "rewards/rejected": -20.162433624267578, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 10.792975425720215, |
| "learning_rate": 8.841848551341506e-06, |
| "logits/chosen": -0.300568550825119, |
| "logits/rejected": -0.35186997056007385, |
| "logps/chosen": -342.993408203125, |
| "logps/rejected": -283.03594970703125, |
| "loss": -0.9528, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -8.736288070678711, |
| "rewards/margins": 11.417104721069336, |
| "rewards/rejected": -20.153392791748047, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.6711111111111111, |
| "grad_norm": 14.272412300109863, |
| "learning_rate": 8.826891152532579e-06, |
| "logits/chosen": -0.24646346271038055, |
| "logits/rejected": -0.3181930184364319, |
| "logps/chosen": -330.8204040527344, |
| "logps/rejected": -290.7005310058594, |
| "loss": -0.8532, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -8.7890625, |
| "rewards/margins": 12.464083671569824, |
| "rewards/rejected": -21.25314712524414, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.6755555555555556, |
| "grad_norm": 18.93645477294922, |
| "learning_rate": 8.811850610803094e-06, |
| "logits/chosen": -0.2739986181259155, |
| "logits/rejected": -0.3238711357116699, |
| "logps/chosen": -351.69171142578125, |
| "logps/rejected": -305.0896301269531, |
| "loss": -0.9423, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -8.920026779174805, |
| "rewards/margins": 12.025522232055664, |
| "rewards/rejected": -20.945547103881836, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 18.738351821899414, |
| "learning_rate": 8.796727252923403e-06, |
| "logits/chosen": -0.31761056184768677, |
| "logits/rejected": -0.3448847532272339, |
| "logps/chosen": -331.40557861328125, |
| "logps/rejected": -296.1661071777344, |
| "loss": -0.5371, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -10.65619945526123, |
| "rewards/margins": 11.044633865356445, |
| "rewards/rejected": -21.700834274291992, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.6844444444444444, |
| "grad_norm": 17.68587875366211, |
| "learning_rate": 8.781521407463119e-06, |
| "logits/chosen": -0.29655805230140686, |
| "logits/rejected": -0.35701996088027954, |
| "logps/chosen": -353.18145751953125, |
| "logps/rejected": -300.9978942871094, |
| "loss": -1.0413, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -8.621820449829102, |
| "rewards/margins": 13.096748352050781, |
| "rewards/rejected": -21.718570709228516, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.6888888888888889, |
| "grad_norm": 8.765332221984863, |
| "learning_rate": 8.766233404783975e-06, |
| "logits/chosen": -0.33828821778297424, |
| "logits/rejected": -0.35216769576072693, |
| "logps/chosen": -349.8067626953125, |
| "logps/rejected": -313.57818603515625, |
| "loss": -0.856, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -9.512666702270508, |
| "rewards/margins": 12.066935539245605, |
| "rewards/rejected": -21.57960319519043, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.6933333333333334, |
| "grad_norm": 20.219892501831055, |
| "learning_rate": 8.750863577032652e-06, |
| "logits/chosen": -0.3195672929286957, |
| "logits/rejected": -0.3713618218898773, |
| "logps/chosen": -358.82037353515625, |
| "logps/rejected": -303.29571533203125, |
| "loss": -1.2232, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -9.787614822387695, |
| "rewards/margins": 13.660125732421875, |
| "rewards/rejected": -23.447738647460938, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.6977777777777778, |
| "grad_norm": 14.765336036682129, |
| "learning_rate": 8.735412258133562e-06, |
| "logits/chosen": -0.3235880136489868, |
| "logits/rejected": -0.36679068207740784, |
| "logps/chosen": -354.3951416015625, |
| "logps/rejected": -299.1098937988281, |
| "loss": -0.8905, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -10.815263748168945, |
| "rewards/margins": 12.30003547668457, |
| "rewards/rejected": -23.115299224853516, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.7022222222222222, |
| "grad_norm": 13.638387680053711, |
| "learning_rate": 8.719879783781585e-06, |
| "logits/chosen": -0.33130335807800293, |
| "logits/rejected": -0.3633490204811096, |
| "logps/chosen": -351.318603515625, |
| "logps/rejected": -302.3772888183594, |
| "loss": -1.0524, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -9.303329467773438, |
| "rewards/margins": 13.00804615020752, |
| "rewards/rejected": -22.31137466430664, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.7066666666666667, |
| "grad_norm": 10.990659713745117, |
| "learning_rate": 8.704266491434787e-06, |
| "logits/chosen": -0.3289201259613037, |
| "logits/rejected": -0.36471107602119446, |
| "logps/chosen": -330.88763427734375, |
| "logps/rejected": -286.8157958984375, |
| "loss": -0.6598, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -10.67319107055664, |
| "rewards/margins": 11.411112785339355, |
| "rewards/rejected": -22.084304809570312, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.7111111111111111, |
| "grad_norm": 12.232583045959473, |
| "learning_rate": 8.688572720307083e-06, |
| "logits/chosen": -0.3058468997478485, |
| "logits/rejected": -0.3826626241207123, |
| "logps/chosen": -360.0539855957031, |
| "logps/rejected": -302.0265808105469, |
| "loss": -0.9798, |
| "rewards/accuracies": 0.856249988079071, |
| "rewards/chosen": -9.977747917175293, |
| "rewards/margins": 13.34874153137207, |
| "rewards/rejected": -23.326488494873047, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7155555555555555, |
| "grad_norm": 28.097835540771484, |
| "learning_rate": 8.672798811360863e-06, |
| "logits/chosen": -0.3440350890159607, |
| "logits/rejected": -0.3669665455818176, |
| "logps/chosen": -348.4472351074219, |
| "logps/rejected": -317.9486083984375, |
| "loss": -1.158, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -9.318201065063477, |
| "rewards/margins": 13.85308837890625, |
| "rewards/rejected": -23.17129135131836, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 13.566072463989258, |
| "learning_rate": 8.656945107299598e-06, |
| "logits/chosen": -0.32617539167404175, |
| "logits/rejected": -0.3627128601074219, |
| "logps/chosen": -350.51495361328125, |
| "logps/rejected": -313.4057922363281, |
| "loss": -1.1211, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -9.836647987365723, |
| "rewards/margins": 13.414007186889648, |
| "rewards/rejected": -23.250656127929688, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.7244444444444444, |
| "grad_norm": 10.32918930053711, |
| "learning_rate": 8.641011952560372e-06, |
| "logits/chosen": -0.3140029311180115, |
| "logits/rejected": -0.3582364618778229, |
| "logps/chosen": -333.9091796875, |
| "logps/rejected": -282.69061279296875, |
| "loss": -0.8948, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -10.822344779968262, |
| "rewards/margins": 11.024767875671387, |
| "rewards/rejected": -21.84711265563965, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.7288888888888889, |
| "grad_norm": 14.581253051757812, |
| "learning_rate": 8.624999693306422e-06, |
| "logits/chosen": -0.33729246258735657, |
| "logits/rejected": -0.3753616213798523, |
| "logps/chosen": -342.4247741699219, |
| "logps/rejected": -309.2422790527344, |
| "loss": -0.9419, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -11.228838920593262, |
| "rewards/margins": 13.112091064453125, |
| "rewards/rejected": -24.34092903137207, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.7333333333333333, |
| "grad_norm": 11.523558616638184, |
| "learning_rate": 8.608908677419606e-06, |
| "logits/chosen": -0.37991100549697876, |
| "logits/rejected": -0.40186434984207153, |
| "logps/chosen": -348.12396240234375, |
| "logps/rejected": -309.076171875, |
| "loss": -1.0504, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -12.649127006530762, |
| "rewards/margins": 12.755289077758789, |
| "rewards/rejected": -25.404415130615234, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.7377777777777778, |
| "grad_norm": 17.81552505493164, |
| "learning_rate": 8.592739254492845e-06, |
| "logits/chosen": -0.362493097782135, |
| "logits/rejected": -0.4177095293998718, |
| "logps/chosen": -335.11981201171875, |
| "logps/rejected": -290.92218017578125, |
| "loss": -1.1041, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -11.677441596984863, |
| "rewards/margins": 13.119203567504883, |
| "rewards/rejected": -24.79664421081543, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.7422222222222222, |
| "grad_norm": 20.329221725463867, |
| "learning_rate": 8.576491775822527e-06, |
| "logits/chosen": -0.33437713980674744, |
| "logits/rejected": -0.39904457330703735, |
| "logps/chosen": -357.16943359375, |
| "logps/rejected": -297.7870178222656, |
| "loss": -0.724, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -11.969806671142578, |
| "rewards/margins": 12.316507339477539, |
| "rewards/rejected": -24.28631591796875, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.7466666666666667, |
| "grad_norm": 19.302101135253906, |
| "learning_rate": 8.560166594400878e-06, |
| "logits/chosen": -0.3832574486732483, |
| "logits/rejected": -0.44351863861083984, |
| "logps/chosen": -352.62115478515625, |
| "logps/rejected": -304.46124267578125, |
| "loss": -0.6363, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -12.629277229309082, |
| "rewards/margins": 11.651094436645508, |
| "rewards/rejected": -24.280370712280273, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.7511111111111111, |
| "grad_norm": 14.173489570617676, |
| "learning_rate": 8.543764064908295e-06, |
| "logits/chosen": -0.34056347608566284, |
| "logits/rejected": -0.39399194717407227, |
| "logps/chosen": -340.8840026855469, |
| "logps/rejected": -307.18603515625, |
| "loss": -1.2865, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -11.463663101196289, |
| "rewards/margins": 13.927221298217773, |
| "rewards/rejected": -25.390884399414062, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.7555555555555555, |
| "grad_norm": 22.398326873779297, |
| "learning_rate": 8.527284543705631e-06, |
| "logits/chosen": -0.37620821595191956, |
| "logits/rejected": -0.4051085412502289, |
| "logps/chosen": -341.5446472167969, |
| "logps/rejected": -314.3455810546875, |
| "loss": -1.1236, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -11.881677627563477, |
| "rewards/margins": 14.11926555633545, |
| "rewards/rejected": -26.00094223022461, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 20.017797470092773, |
| "learning_rate": 8.510728388826464e-06, |
| "logits/chosen": -0.33530497550964355, |
| "logits/rejected": -0.3962380290031433, |
| "logps/chosen": -341.4028625488281, |
| "logps/rejected": -305.88824462890625, |
| "loss": -1.5163, |
| "rewards/accuracies": 0.8812500238418579, |
| "rewards/chosen": -10.922323226928711, |
| "rewards/margins": 15.069772720336914, |
| "rewards/rejected": -25.992095947265625, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.7644444444444445, |
| "grad_norm": 13.252291679382324, |
| "learning_rate": 8.494095959969309e-06, |
| "logits/chosen": -0.34795650839805603, |
| "logits/rejected": -0.40874728560447693, |
| "logps/chosen": -348.9808654785156, |
| "logps/rejected": -308.14349365234375, |
| "loss": -0.9905, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -13.379107475280762, |
| "rewards/margins": 12.994186401367188, |
| "rewards/rejected": -26.373294830322266, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.7688888888888888, |
| "grad_norm": 18.38790512084961, |
| "learning_rate": 8.477387618489808e-06, |
| "logits/chosen": -0.3455773890018463, |
| "logits/rejected": -0.40834465622901917, |
| "logps/chosen": -343.2769470214844, |
| "logps/rejected": -297.47784423828125, |
| "loss": -1.4511, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -10.266695022583008, |
| "rewards/margins": 15.304100036621094, |
| "rewards/rejected": -25.5707950592041, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.7733333333333333, |
| "grad_norm": 21.267852783203125, |
| "learning_rate": 8.460603727392877e-06, |
| "logits/chosen": -0.35729557275772095, |
| "logits/rejected": -0.3905247449874878, |
| "logps/chosen": -370.48577880859375, |
| "logps/rejected": -324.64532470703125, |
| "loss": -1.1358, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -12.016322135925293, |
| "rewards/margins": 15.51159381866455, |
| "rewards/rejected": -27.527912139892578, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 20.68170166015625, |
| "learning_rate": 8.443744651324828e-06, |
| "logits/chosen": -0.3603067994117737, |
| "logits/rejected": -0.40933218598365784, |
| "logps/chosen": -356.02154541015625, |
| "logps/rejected": -310.26666259765625, |
| "loss": -1.0198, |
| "rewards/accuracies": 0.8187500238418579, |
| "rewards/chosen": -13.34446907043457, |
| "rewards/margins": 13.9938325881958, |
| "rewards/rejected": -27.338302612304688, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.7822222222222223, |
| "grad_norm": 14.795793533325195, |
| "learning_rate": 8.426810756565428e-06, |
| "logits/chosen": -0.3585900664329529, |
| "logits/rejected": -0.42686209082603455, |
| "logps/chosen": -368.9267272949219, |
| "logps/rejected": -311.18023681640625, |
| "loss": -1.5537, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -13.114725112915039, |
| "rewards/margins": 16.6258487701416, |
| "rewards/rejected": -29.74057388305664, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.7866666666666666, |
| "grad_norm": 19.962947845458984, |
| "learning_rate": 8.409802411019962e-06, |
| "logits/chosen": -0.347336083650589, |
| "logits/rejected": -0.4067932665348053, |
| "logps/chosen": -343.19158935546875, |
| "logps/rejected": -304.2242126464844, |
| "loss": -1.3862, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -13.927999496459961, |
| "rewards/margins": 15.6153564453125, |
| "rewards/rejected": -29.54335594177246, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.7911111111111111, |
| "grad_norm": 16.915666580200195, |
| "learning_rate": 8.392719984211228e-06, |
| "logits/chosen": -0.36178287863731384, |
| "logits/rejected": -0.42369580268859863, |
| "logps/chosen": -363.2778625488281, |
| "logps/rejected": -314.5802001953125, |
| "loss": -1.3641, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -14.133458137512207, |
| "rewards/margins": 13.37634563446045, |
| "rewards/rejected": -27.50980567932129, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.7955555555555556, |
| "grad_norm": 16.34973907470703, |
| "learning_rate": 8.375563847271506e-06, |
| "logits/chosen": -0.3902398645877838, |
| "logits/rejected": -0.4178919792175293, |
| "logps/chosen": -354.6260070800781, |
| "logps/rejected": -320.308837890625, |
| "loss": -1.296, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -14.261367797851562, |
| "rewards/margins": 15.514287948608398, |
| "rewards/rejected": -29.77565574645996, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 20.18850326538086, |
| "learning_rate": 8.35833437293451e-06, |
| "logits/chosen": -0.3586779534816742, |
| "logits/rejected": -0.3966183066368103, |
| "logps/chosen": -353.3863830566406, |
| "logps/rejected": -317.9190979003906, |
| "loss": -1.0465, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -13.865551948547363, |
| "rewards/margins": 14.156455993652344, |
| "rewards/rejected": -28.02200698852539, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8044444444444444, |
| "grad_norm": 18.74745750427246, |
| "learning_rate": 8.341031935527267e-06, |
| "logits/chosen": -0.35274258255958557, |
| "logits/rejected": -0.4157370626926422, |
| "logps/chosen": -365.7769470214844, |
| "logps/rejected": -320.2703552246094, |
| "loss": -1.0852, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -14.110136032104492, |
| "rewards/margins": 15.161088943481445, |
| "rewards/rejected": -29.271224975585938, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.8088888888888889, |
| "grad_norm": 14.005874633789062, |
| "learning_rate": 8.323656910962011e-06, |
| "logits/chosen": -0.40306010842323303, |
| "logits/rejected": -0.44573473930358887, |
| "logps/chosen": -346.27105712890625, |
| "logps/rejected": -315.6506042480469, |
| "loss": -1.4107, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -13.054827690124512, |
| "rewards/margins": 15.541888236999512, |
| "rewards/rejected": -28.596715927124023, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.8133333333333334, |
| "grad_norm": 25.924898147583008, |
| "learning_rate": 8.306209676727994e-06, |
| "logits/chosen": -0.3658706545829773, |
| "logits/rejected": -0.4349114000797272, |
| "logps/chosen": -358.9135437011719, |
| "logps/rejected": -326.36090087890625, |
| "loss": -1.4081, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -12.446220397949219, |
| "rewards/margins": 17.219030380249023, |
| "rewards/rejected": -29.66524887084961, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.8177777777777778, |
| "grad_norm": 20.495826721191406, |
| "learning_rate": 8.288690611883296e-06, |
| "logits/chosen": -0.39841917157173157, |
| "logits/rejected": -0.4497374892234802, |
| "logps/chosen": -353.8162841796875, |
| "logps/rejected": -313.1631774902344, |
| "loss": -1.556, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -13.111363410949707, |
| "rewards/margins": 17.558149337768555, |
| "rewards/rejected": -30.669513702392578, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.8222222222222222, |
| "grad_norm": 24.87085723876953, |
| "learning_rate": 8.271100097046585e-06, |
| "logits/chosen": -0.3760126233100891, |
| "logits/rejected": -0.42560848593711853, |
| "logps/chosen": -350.9206237792969, |
| "logps/rejected": -320.97637939453125, |
| "loss": -1.4347, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -13.494958877563477, |
| "rewards/margins": 16.944490432739258, |
| "rewards/rejected": -30.439449310302734, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.8266666666666667, |
| "grad_norm": 20.46843910217285, |
| "learning_rate": 8.25343851438885e-06, |
| "logits/chosen": -0.4249737858772278, |
| "logits/rejected": -0.4788896441459656, |
| "logps/chosen": -361.0630187988281, |
| "logps/rejected": -319.73138427734375, |
| "loss": -1.3017, |
| "rewards/accuracies": 0.831250011920929, |
| "rewards/chosen": -14.13292407989502, |
| "rewards/margins": 17.0448055267334, |
| "rewards/rejected": -31.1777286529541, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.8311111111111111, |
| "grad_norm": 22.38373374938965, |
| "learning_rate": 8.235706247625098e-06, |
| "logits/chosen": -0.38224634528160095, |
| "logits/rejected": -0.4391182065010071, |
| "logps/chosen": -361.043701171875, |
| "logps/rejected": -325.9903564453125, |
| "loss": -1.2866, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -13.751859664916992, |
| "rewards/margins": 17.840734481811523, |
| "rewards/rejected": -31.59259605407715, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.8355555555555556, |
| "grad_norm": 21.872596740722656, |
| "learning_rate": 8.217903682006017e-06, |
| "logits/chosen": -0.39942440390586853, |
| "logits/rejected": -0.45849889516830444, |
| "logps/chosen": -373.39013671875, |
| "logps/rejected": -337.3862609863281, |
| "loss": -1.6818, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -15.173101425170898, |
| "rewards/margins": 18.07097053527832, |
| "rewards/rejected": -33.24407196044922, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 21.81854248046875, |
| "learning_rate": 8.200031204309604e-06, |
| "logits/chosen": -0.40619197487831116, |
| "logits/rejected": -0.4568824768066406, |
| "logps/chosen": -342.34356689453125, |
| "logps/rejected": -317.7866516113281, |
| "loss": -1.6939, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -13.93403148651123, |
| "rewards/margins": 17.663101196289062, |
| "rewards/rejected": -31.597131729125977, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.8444444444444444, |
| "grad_norm": 22.453853607177734, |
| "learning_rate": 8.182089202832767e-06, |
| "logits/chosen": -0.3882743716239929, |
| "logits/rejected": -0.4640750288963318, |
| "logps/chosen": -382.75787353515625, |
| "logps/rejected": -337.13995361328125, |
| "loss": -2.0499, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -14.779397964477539, |
| "rewards/margins": 20.105022430419922, |
| "rewards/rejected": -34.884422302246094, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.8488888888888889, |
| "grad_norm": 18.471513748168945, |
| "learning_rate": 8.16407806738288e-06, |
| "logits/chosen": -0.39945605397224426, |
| "logits/rejected": -0.4585798680782318, |
| "logps/chosen": -383.2483215332031, |
| "logps/rejected": -347.448974609375, |
| "loss": -1.2417, |
| "rewards/accuracies": 0.793749988079071, |
| "rewards/chosen": -15.864675521850586, |
| "rewards/margins": 16.104869842529297, |
| "rewards/rejected": -31.969547271728516, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.8533333333333334, |
| "grad_norm": 37.47999954223633, |
| "learning_rate": 8.145998189269327e-06, |
| "logits/chosen": -0.4188354015350342, |
| "logits/rejected": -0.4583558142185211, |
| "logps/chosen": -377.2878723144531, |
| "logps/rejected": -350.0664978027344, |
| "loss": -1.6863, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -18.30971336364746, |
| "rewards/margins": 19.272193908691406, |
| "rewards/rejected": -37.5819091796875, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.8577777777777778, |
| "grad_norm": 19.40680503845215, |
| "learning_rate": 8.127849961294984e-06, |
| "logits/chosen": -0.4305190145969391, |
| "logits/rejected": -0.477532297372818, |
| "logps/chosen": -372.54443359375, |
| "logps/rejected": -340.7271728515625, |
| "loss": -1.6979, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -17.950782775878906, |
| "rewards/margins": 18.499141693115234, |
| "rewards/rejected": -36.44992446899414, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.8622222222222222, |
| "grad_norm": 28.603803634643555, |
| "learning_rate": 8.109633777747703e-06, |
| "logits/chosen": -0.42268872261047363, |
| "logits/rejected": -0.4787193834781647, |
| "logps/chosen": -373.359619140625, |
| "logps/rejected": -347.9590148925781, |
| "loss": -1.3852, |
| "rewards/accuracies": 0.8062499761581421, |
| "rewards/chosen": -20.33183479309082, |
| "rewards/margins": 17.556093215942383, |
| "rewards/rejected": -37.88792419433594, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.8666666666666667, |
| "grad_norm": 23.769119262695312, |
| "learning_rate": 8.091350034391732e-06, |
| "logits/chosen": -0.40240478515625, |
| "logits/rejected": -0.48416176438331604, |
| "logps/chosen": -384.06976318359375, |
| "logps/rejected": -361.1874694824219, |
| "loss": -1.5434, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -18.98853874206543, |
| "rewards/margins": 18.99036979675293, |
| "rewards/rejected": -37.97890853881836, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.8711111111111111, |
| "grad_norm": 36.155155181884766, |
| "learning_rate": 8.072999128459119e-06, |
| "logits/chosen": -0.41284674406051636, |
| "logits/rejected": -0.4507782459259033, |
| "logps/chosen": -360.9227600097656, |
| "logps/rejected": -339.7681884765625, |
| "loss": -1.3714, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -18.811853408813477, |
| "rewards/margins": 17.97747230529785, |
| "rewards/rejected": -36.78932189941406, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.8755555555555555, |
| "grad_norm": 21.57328224182129, |
| "learning_rate": 8.05458145864109e-06, |
| "logits/chosen": -0.39822930097579956, |
| "logits/rejected": -0.4551811218261719, |
| "logps/chosen": -353.82623291015625, |
| "logps/rejected": -345.6108093261719, |
| "loss": -1.2538, |
| "rewards/accuracies": 0.768750011920929, |
| "rewards/chosen": -17.53032875061035, |
| "rewards/margins": 19.313552856445312, |
| "rewards/rejected": -36.8438835144043, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 39.19557571411133, |
| "learning_rate": 8.036097425079377e-06, |
| "logits/chosen": -0.38101926445961, |
| "logits/rejected": -0.4362686276435852, |
| "logps/chosen": -381.2861328125, |
| "logps/rejected": -345.8175048828125, |
| "loss": -1.2588, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -19.006837844848633, |
| "rewards/margins": 17.608064651489258, |
| "rewards/rejected": -36.61490249633789, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8844444444444445, |
| "grad_norm": 22.725879669189453, |
| "learning_rate": 8.017547429357532e-06, |
| "logits/chosen": -0.3905089497566223, |
| "logits/rejected": -0.44199681282043457, |
| "logps/chosen": -367.7683410644531, |
| "logps/rejected": -347.76812744140625, |
| "loss": -2.4106, |
| "rewards/accuracies": 0.8687499761581421, |
| "rewards/chosen": -15.675936698913574, |
| "rewards/margins": 22.89352798461914, |
| "rewards/rejected": -38.56946563720703, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 20.353073120117188, |
| "learning_rate": 7.998931874492192e-06, |
| "logits/chosen": -0.37944620847702026, |
| "logits/rejected": -0.44008979201316833, |
| "logps/chosen": -352.2929382324219, |
| "logps/rejected": -333.12530517578125, |
| "loss": -1.4519, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -15.595646858215332, |
| "rewards/margins": 17.53089141845703, |
| "rewards/rejected": -33.12653732299805, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "eval_logits/chosen": -0.39150622487068176, |
| "eval_logits/rejected": -0.45033136010169983, |
| "eval_logps/chosen": -367.9861755371094, |
| "eval_logps/rejected": -349.3917541503906, |
| "eval_loss": -1.812597393989563, |
| "eval_rewards/accuracies": 0.8402500152587891, |
| "eval_rewards/chosen": -17.389860153198242, |
| "eval_rewards/margins": 20.433107376098633, |
| "eval_rewards/rejected": -37.822967529296875, |
| "eval_runtime": 2196.225, |
| "eval_samples_per_second": 1.821, |
| "eval_steps_per_second": 0.911, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 3375, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|