VNU-SecAlign / checkpoints /final_checkpoint /trainer_state.json
Jason-42195's picture
Upload folder using huggingface_hub
3098bd3 verified
Invalid JSON:Unexpected token 'N', ..."ejected": NaN, "... is not valid JSON
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9996685449121644,
"eval_steps": 200,
"global_step": 754,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013258203513423931,
"grad_norm": 27.282764434814453,
"learning_rate": 5.000000000000001e-07,
"logits/chosen": -0.5551050901412964,
"logits/rejected": -0.5903115272521973,
"logps/chosen": -123.05072021484375,
"logps/rejected": -128.62611389160156,
"loss": 1.9744,
"nll_loss": 2.560427188873291,
"rewards/accuracies": 0.38749998807907104,
"rewards/chosen": -0.002333300421014428,
"rewards/margins": -0.002295339945703745,
"rewards/rejected": -3.796028977376409e-05,
"step": 10
},
{
"epoch": 0.026516407026847863,
"grad_norm": 17.47486114501953,
"learning_rate": 1.0000000000000002e-06,
"logits/chosen": -0.5378572940826416,
"logits/rejected": -0.5796166658401489,
"logps/chosen": -125.56513977050781,
"logps/rejected": -122.72200012207031,
"loss": 1.9287,
"nll_loss": 2.4739668369293213,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.0028954462613910437,
"rewards/margins": 0.004081044811755419,
"rewards/rejected": -0.0011855984339490533,
"step": 20
},
{
"epoch": 0.039774610540271794,
"grad_norm": 28.93717384338379,
"learning_rate": 1.5e-06,
"logits/chosen": -0.5573912262916565,
"logits/rejected": -0.6301255226135254,
"logps/chosen": -120.21688079833984,
"logps/rejected": -120.69698333740234,
"loss": 1.9568,
"nll_loss": 2.5327491760253906,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.006910824682563543,
"rewards/margins": 0.007299685385078192,
"rewards/rejected": -0.0003888603823725134,
"step": 30
},
{
"epoch": 0.053032814053695726,
"grad_norm": 26.4592227935791,
"learning_rate": 2.0000000000000003e-06,
"logits/chosen": -0.5415462255477905,
"logits/rejected": -0.5897966623306274,
"logps/chosen": -123.216064453125,
"logps/rejected": -116.96390533447266,
"loss": 1.945,
"nll_loss": 2.5195693969726562,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.02091406285762787,
"rewards/margins": 0.020817000418901443,
"rewards/rejected": 9.70602995948866e-05,
"step": 40
},
{
"epoch": 0.06629101756711965,
"grad_norm": 17.95655059814453,
"learning_rate": 2.5e-06,
"logits/chosen": -0.5369003415107727,
"logits/rejected": -0.5648149251937866,
"logps/chosen": -112.5962905883789,
"logps/rejected": -106.9513931274414,
"loss": 1.93,
"nll_loss": 2.506865978240967,
"rewards/accuracies": 0.71875,
"rewards/chosen": 0.04093035310506821,
"rewards/margins": 0.0435122512280941,
"rewards/rejected": -0.0025818957947194576,
"step": 50
},
{
"epoch": 0.07954922108054359,
"grad_norm": 18.845481872558594,
"learning_rate": 3e-06,
"logits/chosen": -0.5306503176689148,
"logits/rejected": -0.5870386958122253,
"logps/chosen": -115.35811614990234,
"logps/rejected": -119.94677734375,
"loss": 1.9014,
"nll_loss": 2.470397472381592,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.0722598135471344,
"rewards/margins": 0.07325105369091034,
"rewards/rejected": -0.000991240842267871,
"step": 60
},
{
"epoch": 0.09280742459396751,
"grad_norm": 22.062213897705078,
"learning_rate": 3.5e-06,
"logits/chosen": -0.5302293300628662,
"logits/rejected": -0.5745421648025513,
"logps/chosen": -107.54048156738281,
"logps/rejected": -108.28858947753906,
"loss": 1.9362,
"nll_loss": 2.587956428527832,
"rewards/accuracies": 0.7593749761581421,
"rewards/chosen": 0.1419026404619217,
"rewards/margins": 0.144887775182724,
"rewards/rejected": -0.0029851621948182583,
"step": 70
},
{
"epoch": 0.10606562810739145,
"grad_norm": 22.929567337036133,
"learning_rate": 4.000000000000001e-06,
"logits/chosen": -0.5311123132705688,
"logits/rejected": -0.601387083530426,
"logps/chosen": -106.30036926269531,
"logps/rejected": -114.9739761352539,
"loss": 1.8807,
"nll_loss": 2.5304553508758545,
"rewards/accuracies": 0.815625011920929,
"rewards/chosen": 0.22294898331165314,
"rewards/margins": 0.23250994086265564,
"rewards/rejected": -0.009560950100421906,
"step": 80
},
{
"epoch": 0.11932383162081538,
"grad_norm": 15.968583106994629,
"learning_rate": 4.5e-06,
"logits/chosen": -0.5320655703544617,
"logits/rejected": -0.558965802192688,
"logps/chosen": -113.92137145996094,
"logps/rejected": -106.32939147949219,
"loss": 1.8047,
"nll_loss": 2.424870014190674,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.31752246618270874,
"rewards/margins": 0.33052030205726624,
"rewards/rejected": -0.012997796759009361,
"step": 90
},
{
"epoch": 0.1325820351342393,
"grad_norm": 16.777925491333008,
"learning_rate": 5e-06,
"logits/chosen": -0.5369315147399902,
"logits/rejected": -0.550090491771698,
"logps/chosen": -115.67036437988281,
"logps/rejected": -113.78245544433594,
"loss": 1.7842,
"nll_loss": 2.421271800994873,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.4060588777065277,
"rewards/margins": 0.42890095710754395,
"rewards/rejected": -0.02284209243953228,
"step": 100
},
{
"epoch": 0.14584023864766324,
"grad_norm": 12.77545166015625,
"learning_rate": 4.923547400611622e-06,
"logits/chosen": -0.4923822283744812,
"logits/rejected": -0.550975501537323,
"logps/chosen": -104.06398010253906,
"logps/rejected": -105.51200103759766,
"loss": 1.7203,
"nll_loss": 2.3695566654205322,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.5946463942527771,
"rewards/margins": 0.6244661211967468,
"rewards/rejected": -0.02981976605951786,
"step": 110
},
{
"epoch": 0.15909844216108718,
"grad_norm": 14.160531997680664,
"learning_rate": 4.847094801223242e-06,
"logits/chosen": -0.5261751413345337,
"logits/rejected": -0.593400239944458,
"logps/chosen": -109.50382995605469,
"logps/rejected": -117.4461669921875,
"loss": 1.6824,
"nll_loss": 2.298811674118042,
"rewards/accuracies": 0.831250011920929,
"rewards/chosen": 0.646867573261261,
"rewards/margins": 0.6841082572937012,
"rewards/rejected": -0.037240687757730484,
"step": 120
},
{
"epoch": 0.17235664567451112,
"grad_norm": 11.232987403869629,
"learning_rate": 4.770642201834863e-06,
"logits/chosen": -0.5387733578681946,
"logits/rejected": -0.5604445338249207,
"logps/chosen": -105.5321044921875,
"logps/rejected": -108.08251953125,
"loss": 1.6447,
"nll_loss": 2.28352689743042,
"rewards/accuracies": 0.846875011920929,
"rewards/chosen": 0.8856587409973145,
"rewards/margins": 0.9316526651382446,
"rewards/rejected": -0.04599405825138092,
"step": 130
},
{
"epoch": 0.18561484918793503,
"grad_norm": 13.136269569396973,
"learning_rate": 4.694189602446483e-06,
"logits/chosen": -0.4762907028198242,
"logits/rejected": -0.5645761489868164,
"logps/chosen": -104.01419830322266,
"logps/rejected": -105.42718505859375,
"loss": 1.5795,
"nll_loss": 2.1962618827819824,
"rewards/accuracies": 0.8687499761581421,
"rewards/chosen": 1.1001434326171875,
"rewards/margins": 1.1506679058074951,
"rewards/rejected": -0.0505245216190815,
"step": 140
},
{
"epoch": 0.19887305270135897,
"grad_norm": 11.437210083007812,
"learning_rate": 4.617737003058104e-06,
"logits/chosen": -0.45225948095321655,
"logits/rejected": -0.5816742181777954,
"logps/chosen": -95.95039367675781,
"logps/rejected": -116.07032775878906,
"loss": 1.5695,
"nll_loss": 2.187495708465576,
"rewards/accuracies": 0.875,
"rewards/chosen": 1.3856970071792603,
"rewards/margins": 1.4338386058807373,
"rewards/rejected": -0.04814162850379944,
"step": 150
},
{
"epoch": 0.2121312562147829,
"grad_norm": 11.217316627502441,
"learning_rate": 4.541284403669725e-06,
"logits/chosen": -0.4223412573337555,
"logits/rejected": -0.5557634234428406,
"logps/chosen": -99.70536804199219,
"logps/rejected": -108.40791320800781,
"loss": 1.5248,
"nll_loss": 2.0865731239318848,
"rewards/accuracies": 0.856249988079071,
"rewards/chosen": 1.4318909645080566,
"rewards/margins": 1.4701259136199951,
"rewards/rejected": -0.03823506087064743,
"step": 160
},
{
"epoch": 0.22538945972820681,
"grad_norm": 11.582230567932129,
"learning_rate": 4.464831804281346e-06,
"logits/chosen": -0.4045068323612213,
"logits/rejected": -0.5808693170547485,
"logps/chosen": -101.94197845458984,
"logps/rejected": -115.78692626953125,
"loss": 1.5259,
"nll_loss": 2.059906482696533,
"rewards/accuracies": 0.871874988079071,
"rewards/chosen": 1.6565885543823242,
"rewards/margins": 1.685520887374878,
"rewards/rejected": -0.028932059183716774,
"step": 170
},
{
"epoch": 0.23864766324163075,
"grad_norm": 9.85542106628418,
"learning_rate": 4.388379204892967e-06,
"logits/chosen": -0.40881863236427307,
"logits/rejected": -0.5515257120132446,
"logps/chosen": -94.77958679199219,
"logps/rejected": -109.2522201538086,
"loss": 1.4906,
"nll_loss": 2.006005048751831,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": 1.7907886505126953,
"rewards/margins": 1.8029606342315674,
"rewards/rejected": -0.012172091752290726,
"step": 180
},
{
"epoch": 0.25190586675505466,
"grad_norm": 11.775798797607422,
"learning_rate": 4.311926605504588e-06,
"logits/chosen": -0.40728694200515747,
"logits/rejected": -0.5780837535858154,
"logps/chosen": -102.8525619506836,
"logps/rejected": -113.8287353515625,
"loss": 1.5071,
"nll_loss": 2.015195846557617,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": 1.94949209690094,
"rewards/margins": 1.9533236026763916,
"rewards/rejected": -0.003831386100500822,
"step": 190
},
{
"epoch": 0.2651640702684786,
"grad_norm": 9.207958221435547,
"learning_rate": 4.235474006116208e-06,
"logits/chosen": -0.3382512331008911,
"logits/rejected": -0.5608124136924744,
"logps/chosen": -95.6505355834961,
"logps/rejected": -115.0374755859375,
"loss": 1.4625,
"nll_loss": 1.900460958480835,
"rewards/accuracies": 0.8812500238418579,
"rewards/chosen": 1.9269275665283203,
"rewards/margins": 1.9018806219100952,
"rewards/rejected": 0.0250468198210001,
"step": 200
},
{
"epoch": 0.2651640702684786,
"eval_logits/chosen": 0.10900751501321793,
"eval_logits/rejected": -0.6459429860115051,
"eval_logps/chosen": -26.658893585205078,
"eval_logps/rejected": -23.843191146850586,
"eval_loss": 1.61775803565979,
"eval_nll_loss": 2.3195407390594482,
"eval_rewards/accuracies": 0.995156466960907,
"eval_rewards/chosen": 1.6962153911590576,
"eval_rewards/margins": 1.591001272201538,
"eval_rewards/rejected": 0.10521402209997177,
"eval_runtime": 126.4924,
"eval_samples_per_second": 21.203,
"eval_steps_per_second": 5.305,
"step": 200
},
{
"epoch": 0.27842227378190254,
"grad_norm": 11.874744415283203,
"learning_rate": 4.1590214067278286e-06,
"logits/chosen": -0.3222394287586212,
"logits/rejected": -0.5524163246154785,
"logps/chosen": -91.2304916381836,
"logps/rejected": -112.20948791503906,
"loss": 1.4373,
"nll_loss": 1.863207221031189,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": 2.0715649127960205,
"rewards/margins": 2.029603958129883,
"rewards/rejected": 0.04196098819375038,
"step": 210
},
{
"epoch": 0.2916804772953265,
"grad_norm": 12.305173873901367,
"learning_rate": 4.08256880733945e-06,
"logits/chosen": -0.24730145931243896,
"logits/rejected": -0.5199188590049744,
"logps/chosen": -87.4412841796875,
"logps/rejected": -108.43525695800781,
"loss": 1.4032,
"nll_loss": 1.8019367456436157,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": 2.422152280807495,
"rewards/margins": 2.348961114883423,
"rewards/rejected": 0.07319097220897675,
"step": 220
},
{
"epoch": 0.3049386808087504,
"grad_norm": 10.129953384399414,
"learning_rate": 4.00611620795107e-06,
"logits/chosen": -0.31045737862586975,
"logits/rejected": -0.5804657340049744,
"logps/chosen": -91.82988739013672,
"logps/rejected": -122.0359878540039,
"loss": 1.4386,
"nll_loss": 1.850035309791565,
"rewards/accuracies": 0.90625,
"rewards/chosen": 2.229877233505249,
"rewards/margins": 2.152693748474121,
"rewards/rejected": 0.07718367874622345,
"step": 230
},
{
"epoch": 0.31819688432217436,
"grad_norm": 13.068564414978027,
"learning_rate": 3.9296636085626916e-06,
"logits/chosen": -0.2514588534832001,
"logits/rejected": -0.5521794557571411,
"logps/chosen": -87.9677963256836,
"logps/rejected": -109.2548828125,
"loss": 1.4216,
"nll_loss": 1.8036372661590576,
"rewards/accuracies": 0.909375011920929,
"rewards/chosen": 2.569032907485962,
"rewards/margins": 2.454697370529175,
"rewards/rejected": 0.11433545500040054,
"step": 240
},
{
"epoch": 0.3314550878355983,
"grad_norm": 9.738222122192383,
"learning_rate": 3.853211009174313e-06,
"logits/chosen": -0.22102048993110657,
"logits/rejected": -0.5118038654327393,
"logps/chosen": -81.05973815917969,
"logps/rejected": -107.3470458984375,
"loss": 1.3802,
"nll_loss": 1.738581895828247,
"rewards/accuracies": 0.890625,
"rewards/chosen": 2.435701847076416,
"rewards/margins": 2.2943472862243652,
"rewards/rejected": 0.14135441184043884,
"step": 250
},
{
"epoch": 0.34471329134902223,
"grad_norm": 11.227466583251953,
"learning_rate": 3.776758409785933e-06,
"logits/chosen": -0.24970126152038574,
"logits/rejected": -0.5423383116722107,
"logps/chosen": -90.58589172363281,
"logps/rejected": -124.6546859741211,
"loss": 1.4231,
"nll_loss": 1.796224594116211,
"rewards/accuracies": 0.8843749761581421,
"rewards/chosen": 2.3060898780822754,
"rewards/margins": 2.1725523471832275,
"rewards/rejected": 0.13353754580020905,
"step": 260
},
{
"epoch": 0.3579714948624461,
"grad_norm": 10.950806617736816,
"learning_rate": 3.7003058103975537e-06,
"logits/chosen": -0.22132663428783417,
"logits/rejected": -0.5066910982131958,
"logps/chosen": -83.74676513671875,
"logps/rejected": -106.14500427246094,
"loss": 1.3775,
"nll_loss": 1.7145506143569946,
"rewards/accuracies": 0.921875,
"rewards/chosen": 2.4925014972686768,
"rewards/margins": 2.321049690246582,
"rewards/rejected": 0.17145180702209473,
"step": 270
},
{
"epoch": 0.37122969837587005,
"grad_norm": 10.97486686706543,
"learning_rate": 3.6238532110091746e-06,
"logits/chosen": -0.258320152759552,
"logits/rejected": -0.5529795289039612,
"logps/chosen": -86.88526916503906,
"logps/rejected": -111.84498596191406,
"loss": 1.3977,
"nll_loss": 1.7480520009994507,
"rewards/accuracies": 0.8968750238418579,
"rewards/chosen": 2.6251580715179443,
"rewards/margins": 2.444127321243286,
"rewards/rejected": 0.1810309737920761,
"step": 280
},
{
"epoch": 0.384487901889294,
"grad_norm": 9.71605110168457,
"learning_rate": 3.5474006116207954e-06,
"logits/chosen": -0.2799197733402252,
"logits/rejected": -0.5588380098342896,
"logps/chosen": -95.93229675292969,
"logps/rejected": -128.71484375,
"loss": 1.4329,
"nll_loss": 1.8220994472503662,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": 2.5101046562194824,
"rewards/margins": 2.3628010749816895,
"rewards/rejected": 0.14730362594127655,
"step": 290
},
{
"epoch": 0.39774610540271793,
"grad_norm": 21.213260650634766,
"learning_rate": 3.4709480122324163e-06,
"logits/chosen": -0.18812108039855957,
"logits/rejected": -0.50641930103302,
"logps/chosen": -90.42585754394531,
"logps/rejected": -107.69468688964844,
"loss": 1.39,
"nll_loss": 1.7294094562530518,
"rewards/accuracies": 0.940625011920929,
"rewards/chosen": 2.7429447174072266,
"rewards/margins": 2.536832094192505,
"rewards/rejected": 0.20611290633678436,
"step": 300
},
{
"epoch": 0.41100430891614187,
"grad_norm": 11.334510803222656,
"learning_rate": 3.394495412844037e-06,
"logits/chosen": -0.2053213119506836,
"logits/rejected": -0.5050525665283203,
"logps/chosen": -84.91264343261719,
"logps/rejected": -112.988037109375,
"loss": 1.375,
"nll_loss": 1.7265828847885132,
"rewards/accuracies": 0.921875,
"rewards/chosen": 2.71644926071167,
"rewards/margins": 2.498016834259033,
"rewards/rejected": 0.21843275427818298,
"step": 310
},
{
"epoch": 0.4242625124295658,
"grad_norm": 10.023775100708008,
"learning_rate": 3.318042813455658e-06,
"logits/chosen": -0.20158584415912628,
"logits/rejected": -0.5022256970405579,
"logps/chosen": -90.71162414550781,
"logps/rejected": -111.33503723144531,
"loss": 1.381,
"nll_loss": 1.7314481735229492,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 2.7540974617004395,
"rewards/margins": 2.4751474857330322,
"rewards/rejected": 0.2789500057697296,
"step": 320
},
{
"epoch": 0.43752071594298975,
"grad_norm": 10.706851959228516,
"learning_rate": 3.2415902140672784e-06,
"logits/chosen": -0.1562536209821701,
"logits/rejected": -0.5007289052009583,
"logps/chosen": -89.73895263671875,
"logps/rejected": -106.5843276977539,
"loss": 1.3693,
"nll_loss": 1.6994127035140991,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": 2.805201292037964,
"rewards/margins": 2.5105228424072266,
"rewards/rejected": 0.29467862844467163,
"step": 330
},
{
"epoch": 0.45077891945641363,
"grad_norm": 25.567277908325195,
"learning_rate": 3.1651376146788993e-06,
"logits/chosen": -0.23418506979942322,
"logits/rejected": -0.5170575976371765,
"logps/chosen": -102.624267578125,
"logps/rejected": -112.3178939819336,
"loss": 1.4032,
"nll_loss": 1.7835899591445923,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 2.494511842727661,
"rewards/margins": 2.1974825859069824,
"rewards/rejected": 0.2970294654369354,
"step": 340
},
{
"epoch": 0.46403712296983757,
"grad_norm": 12.165983200073242,
"learning_rate": 3.08868501529052e-06,
"logits/chosen": -0.20166996121406555,
"logits/rejected": -0.4993151128292084,
"logps/chosen": -99.72298431396484,
"logps/rejected": -117.60599517822266,
"loss": 1.387,
"nll_loss": 1.7688575983047485,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 2.6475043296813965,
"rewards/margins": 2.348958969116211,
"rewards/rejected": 0.2985452711582184,
"step": 350
},
{
"epoch": 0.4772953264832615,
"grad_norm": 9.041903495788574,
"learning_rate": 3.012232415902141e-06,
"logits/chosen": -0.07940540462732315,
"logits/rejected": -0.4662111699581146,
"logps/chosen": -75.92832946777344,
"logps/rejected": -104.47607421875,
"loss": 1.3136,
"nll_loss": 1.6304452419281006,
"rewards/accuracies": 0.921875,
"rewards/chosen": 2.911555051803589,
"rewards/margins": 2.5380759239196777,
"rewards/rejected": 0.37347906827926636,
"step": 360
},
{
"epoch": 0.49055352999668544,
"grad_norm": 10.296601295471191,
"learning_rate": 2.935779816513762e-06,
"logits/chosen": -0.08891765028238297,
"logits/rejected": -0.4381803572177887,
"logps/chosen": -83.01859283447266,
"logps/rejected": -97.76497650146484,
"loss": 1.3309,
"nll_loss": 1.6442056894302368,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 2.8263065814971924,
"rewards/margins": 2.439760208129883,
"rewards/rejected": 0.3865460455417633,
"step": 370
},
{
"epoch": 0.5038117335101093,
"grad_norm": 11.621145248413086,
"learning_rate": 2.8593272171253827e-06,
"logits/chosen": -0.13201047480106354,
"logits/rejected": -0.4527694582939148,
"logps/chosen": -87.77113342285156,
"logps/rejected": -122.87522888183594,
"loss": 1.3474,
"nll_loss": 1.6941699981689453,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 2.6734607219696045,
"rewards/margins": 2.329221248626709,
"rewards/rejected": 0.3442399501800537,
"step": 380
},
{
"epoch": 0.5170699370235333,
"grad_norm": 9.234843254089355,
"learning_rate": 2.782874617737003e-06,
"logits/chosen": -0.13576461374759674,
"logits/rejected": -0.5005991458892822,
"logps/chosen": -98.45845031738281,
"logps/rejected": -119.65950012207031,
"loss": 1.3579,
"nll_loss": 1.7100152969360352,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 2.792757034301758,
"rewards/margins": 2.4400179386138916,
"rewards/rejected": 0.35273903608322144,
"step": 390
},
{
"epoch": 0.5303281405369572,
"grad_norm": 10.394464492797852,
"learning_rate": 2.706422018348624e-06,
"logits/chosen": -0.14902424812316895,
"logits/rejected": -0.49045664072036743,
"logps/chosen": -96.21540069580078,
"logps/rejected": -113.64534759521484,
"loss": 1.3661,
"nll_loss": 1.73909592628479,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 2.7949509620666504,
"rewards/margins": 2.4181408882141113,
"rewards/rejected": 0.3768100440502167,
"step": 400
},
{
"epoch": 0.5303281405369572,
"eval_logits/chosen": 0.7696120142936707,
"eval_logits/rejected": -0.5085250735282898,
"eval_logps/chosen": -23.760295867919922,
"eval_logps/rejected": -19.921955108642578,
"eval_loss": 1.6759577989578247,
"eval_nll_loss": 2.218850612640381,
"eval_rewards/accuracies": 0.8729507923126221,
"eval_rewards/chosen": 1.9860752820968628,
"eval_rewards/margins": 1.4887374639511108,
"eval_rewards/rejected": 0.4973376393318176,
"eval_runtime": 126.408,
"eval_samples_per_second": 21.217,
"eval_steps_per_second": 5.308,
"step": 400
},
{
"epoch": 0.5435863440503812,
"grad_norm": 12.399572372436523,
"learning_rate": 2.629969418960245e-06,
"logits/chosen": -0.05847010016441345,
"logits/rejected": -0.42683249711990356,
"logps/chosen": -82.53068542480469,
"logps/rejected": -105.50111389160156,
"loss": 1.3133,
"nll_loss": 1.6254644393920898,
"rewards/accuracies": 0.9156249761581421,
"rewards/chosen": 3.054617166519165,
"rewards/margins": 2.591078042984009,
"rewards/rejected": 0.4635390341281891,
"step": 410
},
{
"epoch": 0.5568445475638051,
"grad_norm": 11.11414909362793,
"learning_rate": 2.5535168195718657e-06,
"logits/chosen": -0.10004544258117676,
"logits/rejected": -0.45182594656944275,
"logps/chosen": -85.72924041748047,
"logps/rejected": -116.79786682128906,
"loss": 1.3335,
"nll_loss": 1.6914409399032593,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 2.7389349937438965,
"rewards/margins": 2.347107410430908,
"rewards/rejected": 0.39182740449905396,
"step": 420
},
{
"epoch": 0.5701027510772291,
"grad_norm": 11.188493728637695,
"learning_rate": 2.4770642201834866e-06,
"logits/chosen": -0.0028325587045401335,
"logits/rejected": -0.40763336420059204,
"logps/chosen": -83.37824249267578,
"logps/rejected": -90.0846176147461,
"loss": 1.2739,
"nll_loss": 1.597538709640503,
"rewards/accuracies": 0.9593750238418579,
"rewards/chosen": 3.1972365379333496,
"rewards/margins": 2.69289493560791,
"rewards/rejected": 0.5043416619300842,
"step": 430
},
{
"epoch": 0.583360954590653,
"grad_norm": 8.546418190002441,
"learning_rate": 2.400611620795107e-06,
"logits/chosen": -0.017235688865184784,
"logits/rejected": -0.4104672372341156,
"logps/chosen": -79.5479507446289,
"logps/rejected": -100.47693634033203,
"loss": 1.3056,
"nll_loss": 1.6203285455703735,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 3.0248606204986572,
"rewards/margins": 2.5377180576324463,
"rewards/rejected": 0.4871426224708557,
"step": 440
},
{
"epoch": 0.596619158104077,
"grad_norm": 12.532038688659668,
"learning_rate": 2.324159021406728e-06,
"logits/chosen": -0.08448103815317154,
"logits/rejected": -0.41612687706947327,
"logps/chosen": -89.61862182617188,
"logps/rejected": -112.32474517822266,
"loss": 1.3386,
"nll_loss": 1.6953102350234985,
"rewards/accuracies": 0.921875,
"rewards/chosen": 2.803964376449585,
"rewards/margins": 2.3399243354797363,
"rewards/rejected": 0.4640396535396576,
"step": 450
},
{
"epoch": 0.6098773616175008,
"grad_norm": 13.633370399475098,
"learning_rate": 2.2477064220183487e-06,
"logits/chosen": -0.013189451768994331,
"logits/rejected": -0.41869059205055237,
"logps/chosen": -90.39549255371094,
"logps/rejected": -107.79378509521484,
"loss": 1.289,
"nll_loss": 1.6027923822402954,
"rewards/accuracies": 0.918749988079071,
"rewards/chosen": 2.938788652420044,
"rewards/margins": 2.443359851837158,
"rewards/rejected": 0.49542921781539917,
"step": 460
},
{
"epoch": 0.6231355651309247,
"grad_norm": 11.236469268798828,
"learning_rate": 2.1712538226299696e-06,
"logits/chosen": -0.09504064172506332,
"logits/rejected": NaN,
"logps/chosen": -106.80879974365234,
"logps/rejected": -117.15657806396484,
"loss": 1.3584,
"nll_loss": 1.7296257019042969,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.964348316192627,
"rewards/margins": 2.4477875232696533,
"rewards/rejected": 0.516560971736908,
"step": 470
},
{
"epoch": 0.6363937686443487,
"grad_norm": 10.896163940429688,
"learning_rate": 2.0948012232415905e-06,
"logits/chosen": -0.0007806614157743752,
"logits/rejected": -0.39822936058044434,
"logps/chosen": -95.16539001464844,
"logps/rejected": -116.85235595703125,
"loss": 1.3068,
"nll_loss": 1.6314979791641235,
"rewards/accuracies": 0.903124988079071,
"rewards/chosen": 2.8457746505737305,
"rewards/margins": 2.327517509460449,
"rewards/rejected": 0.518257200717926,
"step": 480
},
{
"epoch": 0.6496519721577726,
"grad_norm": 7.040874004364014,
"learning_rate": 2.0183486238532113e-06,
"logits/chosen": 0.059870027005672455,
"logits/rejected": -0.3692580461502075,
"logps/chosen": -71.55367279052734,
"logps/rejected": -92.91752624511719,
"loss": 1.2388,
"nll_loss": 1.5524179935455322,
"rewards/accuracies": 0.953125,
"rewards/chosen": 3.224459171295166,
"rewards/margins": 2.605776309967041,
"rewards/rejected": 0.6186825037002563,
"step": 490
},
{
"epoch": 0.6629101756711966,
"grad_norm": 12.266393661499023,
"learning_rate": 1.9418960244648317e-06,
"logits/chosen": -0.09728819876909256,
"logits/rejected": -0.4279538094997406,
"logps/chosen": -98.3723373413086,
"logps/rejected": -116.90461730957031,
"loss": 1.3497,
"nll_loss": 1.7364962100982666,
"rewards/accuracies": 0.9281250238418579,
"rewards/chosen": 2.8716821670532227,
"rewards/margins": 2.3796398639678955,
"rewards/rejected": 0.4920427203178406,
"step": 500
},
{
"epoch": 0.6761683791846205,
"grad_norm": 11.75126838684082,
"learning_rate": 1.8654434250764528e-06,
"logits/chosen": -0.06470651179552078,
"logits/rejected": -0.40114492177963257,
"logps/chosen": -93.51762390136719,
"logps/rejected": -114.25923156738281,
"loss": 1.325,
"nll_loss": 1.6921663284301758,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 3.008223056793213,
"rewards/margins": 2.433964490890503,
"rewards/rejected": 0.5742586851119995,
"step": 510
},
{
"epoch": 0.6894265826980445,
"grad_norm": 9.93482494354248,
"learning_rate": 1.7889908256880737e-06,
"logits/chosen": 0.08653802424669266,
"logits/rejected": -0.34983566403388977,
"logps/chosen": -74.48589324951172,
"logps/rejected": -97.51747131347656,
"loss": 1.2279,
"nll_loss": 1.5161999464035034,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 3.1022284030914307,
"rewards/margins": 2.469442367553711,
"rewards/rejected": 0.6327860951423645,
"step": 520
},
{
"epoch": 0.7026847862114683,
"grad_norm": 10.692046165466309,
"learning_rate": 1.7125382262996943e-06,
"logits/chosen": 0.032983891665935516,
"logits/rejected": -0.3582807779312134,
"logps/chosen": -82.62593078613281,
"logps/rejected": -102.52696228027344,
"loss": 1.2671,
"nll_loss": 1.5854206085205078,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 3.2378318309783936,
"rewards/margins": 2.618978977203369,
"rewards/rejected": 0.618852436542511,
"step": 530
},
{
"epoch": 0.7159429897248922,
"grad_norm": 16.475303649902344,
"learning_rate": 1.6360856269113152e-06,
"logits/chosen": -0.015640150755643845,
"logits/rejected": -0.3781605362892151,
"logps/chosen": -90.1484146118164,
"logps/rejected": -112.01336669921875,
"loss": 1.2988,
"nll_loss": 1.6478378772735596,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.8458220958709717,
"rewards/margins": 2.3063721656799316,
"rewards/rejected": 0.53944993019104,
"step": 540
},
{
"epoch": 0.7292011932383162,
"grad_norm": 9.720301628112793,
"learning_rate": 1.559633027522936e-06,
"logits/chosen": -0.07955951988697052,
"logits/rejected": -0.4525434374809265,
"logps/chosen": -93.29837799072266,
"logps/rejected": -139.53305053710938,
"loss": 1.3062,
"nll_loss": 1.6710792779922485,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 2.821993827819824,
"rewards/margins": 2.3934006690979004,
"rewards/rejected": 0.42859315872192383,
"step": 550
},
{
"epoch": 0.7424593967517401,
"grad_norm": 12.09350872039795,
"learning_rate": 1.4831804281345567e-06,
"logits/chosen": 0.014041140675544739,
"logits/rejected": -0.37960466742515564,
"logps/chosen": -93.00960540771484,
"logps/rejected": -117.96089935302734,
"loss": 1.2977,
"nll_loss": 1.6683666706085205,
"rewards/accuracies": 0.956250011920929,
"rewards/chosen": 3.0185599327087402,
"rewards/margins": 2.459902286529541,
"rewards/rejected": 0.5586578249931335,
"step": 560
},
{
"epoch": 0.7557176002651641,
"grad_norm": 9.332964897155762,
"learning_rate": 1.4067278287461775e-06,
"logits/chosen": -0.0421045646071434,
"logits/rejected": -0.3469962775707245,
"logps/chosen": -93.99418640136719,
"logps/rejected": -120.124267578125,
"loss": 1.3144,
"nll_loss": 1.673651099205017,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 2.913119077682495,
"rewards/margins": 2.3597278594970703,
"rewards/rejected": 0.5533913373947144,
"step": 570
},
{
"epoch": 0.768975803778588,
"grad_norm": 8.982401847839355,
"learning_rate": 1.3302752293577984e-06,
"logits/chosen": 0.049627698957920074,
"logits/rejected": -0.3409837484359741,
"logps/chosen": -80.563232421875,
"logps/rejected": -110.6832504272461,
"loss": 1.2686,
"nll_loss": 1.6071580648422241,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": 3.0636610984802246,
"rewards/margins": 2.460207223892212,
"rewards/rejected": 0.6034537553787231,
"step": 580
},
{
"epoch": 0.782234007292012,
"grad_norm": 11.456655502319336,
"learning_rate": 1.253822629969419e-06,
"logits/chosen": 0.01245723944157362,
"logits/rejected": -0.34452953934669495,
"logps/chosen": -83.06973266601562,
"logps/rejected": -112.3355712890625,
"loss": 1.2596,
"nll_loss": 1.579742193222046,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 3.0834407806396484,
"rewards/margins": 2.512244701385498,
"rewards/rejected": 0.5711959600448608,
"step": 590
},
{
"epoch": 0.7954922108054359,
"grad_norm": 14.906444549560547,
"learning_rate": 1.17737003058104e-06,
"logits/chosen": 0.10093510150909424,
"logits/rejected": -0.3423386812210083,
"logps/chosen": -84.59368896484375,
"logps/rejected": -118.52232360839844,
"loss": 1.2424,
"nll_loss": 1.5443143844604492,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 3.1625304222106934,
"rewards/margins": 2.5714595317840576,
"rewards/rejected": 0.5910708904266357,
"step": 600
},
{
"epoch": 0.7954922108054359,
"eval_logits/chosen": 1.064488410949707,
"eval_logits/rejected": -0.3661547303199768,
"eval_logps/chosen": -22.925987243652344,
"eval_logps/rejected": -17.408470153808594,
"eval_loss": 1.7094613313674927,
"eval_nll_loss": 2.171783685684204,
"eval_rewards/accuracies": 0.7257823944091797,
"eval_rewards/chosen": 2.0695061683654785,
"eval_rewards/margins": 1.3208197355270386,
"eval_rewards/rejected": 0.7486862540245056,
"eval_runtime": 126.8733,
"eval_samples_per_second": 21.139,
"eval_steps_per_second": 5.289,
"step": 600
},
{
"epoch": 0.8087504143188597,
"grad_norm": 11.739520072937012,
"learning_rate": 1.1009174311926608e-06,
"logits/chosen": 0.1604587882757187,
"logits/rejected": -0.30457383394241333,
"logps/chosen": -77.71000671386719,
"logps/rejected": -100.43540954589844,
"loss": 1.2066,
"nll_loss": 1.4790329933166504,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 3.3722426891326904,
"rewards/margins": 2.629152297973633,
"rewards/rejected": 0.743090808391571,
"step": 610
},
{
"epoch": 0.8220086178322837,
"grad_norm": 10.975104331970215,
"learning_rate": 1.0244648318042814e-06,
"logits/chosen": 0.08241738379001617,
"logits/rejected": -0.30928146839141846,
"logps/chosen": -87.46654510498047,
"logps/rejected": -106.96031188964844,
"loss": 1.2453,
"nll_loss": 1.555537223815918,
"rewards/accuracies": 0.953125,
"rewards/chosen": 3.170712947845459,
"rewards/margins": 2.561098575592041,
"rewards/rejected": 0.6096144318580627,
"step": 620
},
{
"epoch": 0.8352668213457076,
"grad_norm": 13.56505298614502,
"learning_rate": 9.480122324159022e-07,
"logits/chosen": 0.023757517337799072,
"logits/rejected": -0.33807113766670227,
"logps/chosen": -94.47450256347656,
"logps/rejected": -128.47509765625,
"loss": 1.2845,
"nll_loss": 1.631945013999939,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 2.836199998855591,
"rewards/margins": 2.2965328693389893,
"rewards/rejected": 0.5396672487258911,
"step": 630
},
{
"epoch": 0.8485250248591316,
"grad_norm": 16.084320068359375,
"learning_rate": 8.71559633027523e-07,
"logits/chosen": 0.0821368619799614,
"logits/rejected": -0.33640867471694946,
"logps/chosen": -78.98149108886719,
"logps/rejected": -111.6216049194336,
"loss": 1.2382,
"nll_loss": 1.5553481578826904,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": 3.1928162574768066,
"rewards/margins": 2.547853708267212,
"rewards/rejected": 0.6449624300003052,
"step": 640
},
{
"epoch": 0.8617832283725555,
"grad_norm": 9.197409629821777,
"learning_rate": 7.951070336391438e-07,
"logits/chosen": 0.11906716972589493,
"logits/rejected": -0.3259919583797455,
"logps/chosen": -74.3010482788086,
"logps/rejected": -108.84968566894531,
"loss": 1.2331,
"nll_loss": 1.558100938796997,
"rewards/accuracies": 0.965624988079071,
"rewards/chosen": 3.2675209045410156,
"rewards/margins": 2.6362545490264893,
"rewards/rejected": 0.6312668323516846,
"step": 650
},
{
"epoch": 0.8750414318859795,
"grad_norm": 9.966322898864746,
"learning_rate": 7.186544342507645e-07,
"logits/chosen": 0.03254573419690132,
"logits/rejected": -0.3160571753978729,
"logps/chosen": -85.23878479003906,
"logps/rejected": -104.35621643066406,
"loss": 1.2927,
"nll_loss": 1.662453293800354,
"rewards/accuracies": 0.953125,
"rewards/chosen": 3.136343002319336,
"rewards/margins": 2.490025043487549,
"rewards/rejected": 0.6463181376457214,
"step": 660
},
{
"epoch": 0.8882996353994034,
"grad_norm": 9.719200134277344,
"learning_rate": 6.422018348623854e-07,
"logits/chosen": 0.060719866305589676,
"logits/rejected": -0.37117859721183777,
"logps/chosen": -82.31736755371094,
"logps/rejected": -121.16090393066406,
"loss": 1.2738,
"nll_loss": 1.6195169687271118,
"rewards/accuracies": 0.9312499761581421,
"rewards/chosen": 3.1395468711853027,
"rewards/margins": 2.5326294898986816,
"rewards/rejected": 0.6069172620773315,
"step": 670
},
{
"epoch": 0.9015578389128273,
"grad_norm": 10.185211181640625,
"learning_rate": 5.657492354740061e-07,
"logits/chosen": 0.16828341782093048,
"logits/rejected": -0.28492841124534607,
"logps/chosen": -74.0738754272461,
"logps/rejected": -89.85967254638672,
"loss": 1.2257,
"nll_loss": 1.5249927043914795,
"rewards/accuracies": 0.934374988079071,
"rewards/chosen": 3.2941131591796875,
"rewards/margins": 2.549973249435425,
"rewards/rejected": 0.744140088558197,
"step": 680
},
{
"epoch": 0.9148160424262513,
"grad_norm": 11.983025550842285,
"learning_rate": 4.89296636085627e-07,
"logits/chosen": 0.035757843405008316,
"logits/rejected": -0.3307420015335083,
"logps/chosen": -98.5334701538086,
"logps/rejected": -112.04931640625,
"loss": 1.2842,
"nll_loss": 1.63511061668396,
"rewards/accuracies": 0.9375,
"rewards/chosen": 3.1852855682373047,
"rewards/margins": 2.5147435665130615,
"rewards/rejected": 0.6705416440963745,
"step": 690
},
{
"epoch": 0.9280742459396751,
"grad_norm": 9.73780632019043,
"learning_rate": 4.128440366972478e-07,
"logits/chosen": 0.06930799782276154,
"logits/rejected": -0.33910712599754333,
"logps/chosen": -86.6443862915039,
"logps/rejected": -112.4611587524414,
"loss": 1.2466,
"nll_loss": 1.5855239629745483,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 3.226916551589966,
"rewards/margins": 2.535816192626953,
"rewards/rejected": 0.6910998225212097,
"step": 700
},
{
"epoch": 0.9413324494530991,
"grad_norm": 9.678099632263184,
"learning_rate": 3.363914373088685e-07,
"logits/chosen": 0.12796175479888916,
"logits/rejected": -0.29829975962638855,
"logps/chosen": -83.7620620727539,
"logps/rejected": -99.74095153808594,
"loss": 1.2307,
"nll_loss": 1.524804711341858,
"rewards/accuracies": 0.9468749761581421,
"rewards/chosen": 3.5030083656311035,
"rewards/margins": 2.73651385307312,
"rewards/rejected": 0.7664941549301147,
"step": 710
},
{
"epoch": 0.954590652966523,
"grad_norm": 9.837422370910645,
"learning_rate": 2.599388379204893e-07,
"logits/chosen": 0.06604432314634323,
"logits/rejected": -0.326642245054245,
"logps/chosen": -87.40840911865234,
"logps/rejected": -112.01222229003906,
"loss": 1.2683,
"nll_loss": 1.62287175655365,
"rewards/accuracies": 0.9375,
"rewards/chosen": 3.202043056488037,
"rewards/margins": 2.5358872413635254,
"rewards/rejected": 0.6661559343338013,
"step": 720
},
{
"epoch": 0.967848856479947,
"grad_norm": 14.159199714660645,
"learning_rate": 1.8348623853211012e-07,
"logits/chosen": 0.02280101552605629,
"logits/rejected": -0.3272295594215393,
"logps/chosen": -84.34774017333984,
"logps/rejected": -106.10346984863281,
"loss": 1.2922,
"nll_loss": 1.6527671813964844,
"rewards/accuracies": 0.9437500238418579,
"rewards/chosen": 3.2514991760253906,
"rewards/margins": 2.529658555984497,
"rewards/rejected": 0.7218402624130249,
"step": 730
},
{
"epoch": 0.9811070599933709,
"grad_norm": 9.912482261657715,
"learning_rate": 1.070336391437309e-07,
"logits/chosen": 0.09289325773715973,
"logits/rejected": -0.31532809138298035,
"logps/chosen": -82.03899383544922,
"logps/rejected": -116.87910461425781,
"loss": 1.2127,
"nll_loss": 1.5196421146392822,
"rewards/accuracies": 0.9624999761581421,
"rewards/chosen": 3.268120527267456,
"rewards/margins": 2.592498302459717,
"rewards/rejected": 0.6756229996681213,
"step": 740
},
{
"epoch": 0.9943652635067949,
"grad_norm": 10.23357105255127,
"learning_rate": 3.0581039755351686e-08,
"logits/chosen": 0.015320442616939545,
"logits/rejected": -0.3034003674983978,
"logps/chosen": -99.24415588378906,
"logps/rejected": -107.158447265625,
"loss": 1.3115,
"nll_loss": 1.6806989908218384,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 3.2072606086730957,
"rewards/margins": 2.46455717086792,
"rewards/rejected": 0.7427036166191101,
"step": 750
}
],
"logging_steps": 10,
"max_steps": 754,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}