Safetensors
qwen3
HAVOR-Deploy / trainer_state.json
OverwhelmingFit's picture
Duplicate from OverwhelmingFit/HAVOR-4B-D
9dd1190
Raw
History Blame Contribute Delete
319 kB
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 3.324399260628466,
"eval_steps": 400.0,
"global_step": 2400,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0013863216266173752,
"grad_norm": 16.979148864746094,
"learning_rate": 0.0,
"log_odds_chosen": -0.018929382786154747,
"log_odds_ratio": -0.9500243067741394,
"logits/chosen": 1.954408049583435,
"logits/rejected": 2.0060200691223145,
"logps/chosen": -0.19068074226379395,
"logps/rejected": -0.3597089946269989,
"loss": 1.7279,
"nll_loss": 1.632872462272644,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.019068075343966484,
"rewards/margins": 0.016902821138501167,
"rewards/rejected": -0.03597090020775795,
"step": 1
},
{
"epoch": 0.006931608133086876,
"grad_norm": 20.4935245513916,
"learning_rate": 8e-09,
"log_odds_chosen": -0.03683535382151604,
"log_odds_ratio": -0.919361412525177,
"logits/chosen": 1.4786995649337769,
"logits/rejected": 1.501151204109192,
"logps/chosen": -0.3465827703475952,
"logps/rejected": -0.4160928726196289,
"loss": 1.9313,
"nll_loss": 1.8393574953079224,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": -0.0346582755446434,
"rewards/margins": 0.006951010320335627,
"rewards/rejected": -0.04160928353667259,
"step": 5
},
{
"epoch": 0.013863216266173753,
"grad_norm": 21.816869735717773,
"learning_rate": 1.8e-08,
"log_odds_chosen": 0.004844508599489927,
"log_odds_ratio": -0.8253452181816101,
"logits/chosen": 1.6159125566482544,
"logits/rejected": 1.631380319595337,
"logps/chosen": -0.29385247826576233,
"logps/rejected": -0.3623644709587097,
"loss": 1.8841,
"nll_loss": 1.8015655279159546,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": -0.029385250061750412,
"rewards/margins": 0.006851200480014086,
"rewards/rejected": -0.03623645007610321,
"step": 10
},
{
"epoch": 0.020794824399260628,
"grad_norm": 23.262466430664062,
"learning_rate": 2.8000000000000003e-08,
"log_odds_chosen": 0.015412552282214165,
"log_odds_ratio": -0.8413525819778442,
"logits/chosen": 1.5471739768981934,
"logits/rejected": 1.581672191619873,
"logps/chosen": -0.29625552892684937,
"logps/rejected": -0.3712186813354492,
"loss": 1.8182,
"nll_loss": 1.7340798377990723,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.029625553637742996,
"rewards/margins": 0.00749631691724062,
"rewards/rejected": -0.03712187334895134,
"step": 15
},
{
"epoch": 0.027726432532347505,
"grad_norm": 23.77545166015625,
"learning_rate": 3.7999999999999996e-08,
"log_odds_chosen": 0.13983069360256195,
"log_odds_ratio": -0.7581052184104919,
"logits/chosen": 1.5518994331359863,
"logits/rejected": 1.5775402784347534,
"logps/chosen": -0.3246292769908905,
"logps/rejected": -0.4345749020576477,
"loss": 1.9212,
"nll_loss": 1.8454102277755737,
"rewards/accuracies": 0.5666666626930237,
"rewards/chosen": -0.03246293216943741,
"rewards/margins": 0.010994565673172474,
"rewards/rejected": -0.04345749691128731,
"step": 20
},
{
"epoch": 0.03465804066543438,
"grad_norm": 21.824874877929688,
"learning_rate": 4.799999999999999e-08,
"log_odds_chosen": -0.025701021775603294,
"log_odds_ratio": -0.8547807931900024,
"logits/chosen": 1.6149464845657349,
"logits/rejected": 1.6376367807388306,
"logps/chosen": -0.3293762803077698,
"logps/rejected": -0.3794548809528351,
"loss": 1.916,
"nll_loss": 1.8305460214614868,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.032937631011009216,
"rewards/margins": 0.005007854197174311,
"rewards/rejected": -0.03794548660516739,
"step": 25
},
{
"epoch": 0.041589648798521256,
"grad_norm": 23.641414642333984,
"learning_rate": 5.7999999999999997e-08,
"log_odds_chosen": -0.01403873972594738,
"log_odds_ratio": -0.8547641634941101,
"logits/chosen": 1.6201530694961548,
"logits/rejected": 1.6380269527435303,
"logps/chosen": -0.3491577208042145,
"logps/rejected": -0.4028278887271881,
"loss": 1.8577,
"nll_loss": 1.7722562551498413,
"rewards/accuracies": 0.5166666507720947,
"rewards/chosen": -0.034915778785943985,
"rewards/margins": 0.00536701874807477,
"rewards/rejected": -0.04028278589248657,
"step": 30
},
{
"epoch": 0.04852125693160813,
"grad_norm": 21.654430389404297,
"learning_rate": 6.8e-08,
"log_odds_chosen": 0.17713108658790588,
"log_odds_ratio": -0.7579152584075928,
"logits/chosen": 1.5499821901321411,
"logits/rejected": 1.5707145929336548,
"logps/chosen": -0.31028756499290466,
"logps/rejected": -0.4093845784664154,
"loss": 1.844,
"nll_loss": 1.7682510614395142,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.031028758734464645,
"rewards/margins": 0.009909691289067268,
"rewards/rejected": -0.04093845188617706,
"step": 35
},
{
"epoch": 0.05545286506469501,
"grad_norm": 30.187108993530273,
"learning_rate": 7.8e-08,
"log_odds_chosen": 0.18452158570289612,
"log_odds_ratio": -0.7830556035041809,
"logits/chosen": 1.6507318019866943,
"logits/rejected": 1.6740870475769043,
"logps/chosen": -0.28885844349861145,
"logps/rejected": -0.38478565216064453,
"loss": 1.8536,
"nll_loss": 1.775251865386963,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.028885845094919205,
"rewards/margins": 0.00959271751344204,
"rewards/rejected": -0.03847856447100639,
"step": 40
},
{
"epoch": 0.062384473197781884,
"grad_norm": 20.389713287353516,
"learning_rate": 8.8e-08,
"log_odds_chosen": 0.02360691875219345,
"log_odds_ratio": -0.8329288959503174,
"logits/chosen": 1.6618582010269165,
"logits/rejected": 1.676173448562622,
"logps/chosen": -0.35506105422973633,
"logps/rejected": -0.4218937158584595,
"loss": 1.888,
"nll_loss": 1.8047412633895874,
"rewards/accuracies": 0.5333333611488342,
"rewards/chosen": -0.03550610691308975,
"rewards/margins": 0.006683265324681997,
"rewards/rejected": -0.04218936711549759,
"step": 45
},
{
"epoch": 0.06931608133086876,
"grad_norm": 18.37879180908203,
"learning_rate": 9.799999999999999e-08,
"log_odds_chosen": 0.11584530770778656,
"log_odds_ratio": -0.768284022808075,
"logits/chosen": 1.658928394317627,
"logits/rejected": 1.6891582012176514,
"logps/chosen": -0.3134225904941559,
"logps/rejected": -0.41441354155540466,
"loss": 1.8648,
"nll_loss": 1.78793203830719,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.03134226053953171,
"rewards/margins": 0.01009910274296999,
"rewards/rejected": -0.04144136235117912,
"step": 50
},
{
"epoch": 0.07624768946395563,
"grad_norm": 19.63814926147461,
"learning_rate": 1.08e-07,
"log_odds_chosen": -0.07513849437236786,
"log_odds_ratio": -0.8687005639076233,
"logits/chosen": 1.5901387929916382,
"logits/rejected": 1.600988507270813,
"logps/chosen": -0.3237656056880951,
"logps/rejected": -0.36480894684791565,
"loss": 1.8659,
"nll_loss": 1.7789969444274902,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": -0.03237656131386757,
"rewards/margins": 0.004104338586330414,
"rewards/rejected": -0.03648089990019798,
"step": 55
},
{
"epoch": 0.08317929759704251,
"grad_norm": 21.044511795043945,
"learning_rate": 1.1799999999999998e-07,
"log_odds_chosen": 0.12105648219585419,
"log_odds_ratio": -0.8005534410476685,
"logits/chosen": 1.7121615409851074,
"logits/rejected": 1.7367610931396484,
"logps/chosen": -0.2864932119846344,
"logps/rejected": -0.41227853298187256,
"loss": 1.8738,
"nll_loss": 1.793702483177185,
"rewards/accuracies": 0.5666666626930237,
"rewards/chosen": -0.02864932268857956,
"rewards/margins": 0.012578531168401241,
"rewards/rejected": -0.04122785106301308,
"step": 60
},
{
"epoch": 0.09011090573012939,
"grad_norm": 15.05855941772461,
"learning_rate": 1.28e-07,
"log_odds_chosen": -0.08899393677711487,
"log_odds_ratio": -0.9060453772544861,
"logits/chosen": 1.7786991596221924,
"logits/rejected": 1.7979838848114014,
"logps/chosen": -0.2860961854457855,
"logps/rejected": -0.35240638256073,
"loss": 1.8233,
"nll_loss": 1.7327053546905518,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": -0.028609616681933403,
"rewards/margins": 0.006631023250520229,
"rewards/rejected": -0.03524063900113106,
"step": 65
},
{
"epoch": 0.09704251386321626,
"grad_norm": 12.863988876342773,
"learning_rate": 1.38e-07,
"log_odds_chosen": 0.02776586450636387,
"log_odds_ratio": -0.8355816006660461,
"logits/chosen": 1.926443099975586,
"logits/rejected": 1.9493002891540527,
"logps/chosen": -0.3111321032047272,
"logps/rejected": -0.3676571547985077,
"loss": 1.7489,
"nll_loss": 1.6653305292129517,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.031113211065530777,
"rewards/margins": 0.005652503110468388,
"rewards/rejected": -0.03676571324467659,
"step": 70
},
{
"epoch": 0.10397412199630314,
"grad_norm": 14.937926292419434,
"learning_rate": 1.48e-07,
"log_odds_chosen": 0.21290098130702972,
"log_odds_ratio": -0.7118617296218872,
"logits/chosen": 1.9580568075180054,
"logits/rejected": 1.9671556949615479,
"logps/chosen": -0.33254608511924744,
"logps/rejected": -0.42474591732025146,
"loss": 1.842,
"nll_loss": 1.770819902420044,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.033254608511924744,
"rewards/margins": 0.009219982661306858,
"rewards/rejected": -0.04247459024190903,
"step": 75
},
{
"epoch": 0.11090573012939002,
"grad_norm": 12.081910133361816,
"learning_rate": 1.58e-07,
"log_odds_chosen": 0.2506192624568939,
"log_odds_ratio": -0.7591946721076965,
"logits/chosen": 1.9343218803405762,
"logits/rejected": 1.9571690559387207,
"logps/chosen": -0.28907135128974915,
"logps/rejected": -0.4210290014743805,
"loss": 1.8269,
"nll_loss": 1.7509891986846924,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.028907136991620064,
"rewards/margins": 0.013195758685469627,
"rewards/rejected": -0.04210289567708969,
"step": 80
},
{
"epoch": 0.1178373382624769,
"grad_norm": 10.16718864440918,
"learning_rate": 1.68e-07,
"log_odds_chosen": 0.15295961499214172,
"log_odds_ratio": -0.7727136015892029,
"logits/chosen": 1.9498001337051392,
"logits/rejected": 1.9737645387649536,
"logps/chosen": -0.2715214490890503,
"logps/rejected": -0.39682307839393616,
"loss": 1.7426,
"nll_loss": 1.665367603302002,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.02715214155614376,
"rewards/margins": 0.012530164793133736,
"rewards/rejected": -0.039682310074567795,
"step": 85
},
{
"epoch": 0.12476894639556377,
"grad_norm": 8.379185676574707,
"learning_rate": 1.78e-07,
"log_odds_chosen": 0.011365304701030254,
"log_odds_ratio": -0.8190609216690063,
"logits/chosen": 2.1162543296813965,
"logits/rejected": 2.1363251209259033,
"logps/chosen": -0.32597094774246216,
"logps/rejected": -0.37424901127815247,
"loss": 1.7812,
"nll_loss": 1.699331521987915,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": -0.032597098499536514,
"rewards/margins": 0.004827807657420635,
"rewards/rejected": -0.03742489963769913,
"step": 90
},
{
"epoch": 0.13170055452865065,
"grad_norm": 5.4643874168396,
"learning_rate": 1.88e-07,
"log_odds_chosen": 0.3046998977661133,
"log_odds_ratio": -0.710991621017456,
"logits/chosen": 2.2868356704711914,
"logits/rejected": 2.319998264312744,
"logps/chosen": -0.2605274021625519,
"logps/rejected": -0.41061633825302124,
"loss": 1.6722,
"nll_loss": 1.6011360883712769,
"rewards/accuracies": 0.6333333253860474,
"rewards/chosen": -0.026052741333842278,
"rewards/margins": 0.015008894726634026,
"rewards/rejected": -0.0410616360604763,
"step": 95
},
{
"epoch": 0.13863216266173753,
"grad_norm": 5.312658309936523,
"learning_rate": 1.98e-07,
"log_odds_chosen": 0.08499274402856827,
"log_odds_ratio": -0.8072493672370911,
"logits/chosen": 2.332758665084839,
"logits/rejected": 2.354078769683838,
"logps/chosen": -0.29434487223625183,
"logps/rejected": -0.36028656363487244,
"loss": 1.6704,
"nll_loss": 1.5896700620651245,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.029434483498334885,
"rewards/margins": 0.006594173610210419,
"rewards/rejected": -0.0360286645591259,
"step": 100
},
{
"epoch": 0.1455637707948244,
"grad_norm": 4.78338098526001,
"learning_rate": 2.08e-07,
"log_odds_chosen": 0.13001103699207306,
"log_odds_ratio": -0.7748425602912903,
"logits/chosen": 2.3882839679718018,
"logits/rejected": 2.4127211570739746,
"logps/chosen": -0.2966289222240448,
"logps/rejected": -0.39911049604415894,
"loss": 1.6961,
"nll_loss": 1.6186034679412842,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.02966289594769478,
"rewards/margins": 0.01024815533310175,
"rewards/rejected": -0.039911042898893356,
"step": 105
},
{
"epoch": 0.15249537892791126,
"grad_norm": 5.31309700012207,
"learning_rate": 2.18e-07,
"log_odds_chosen": 0.0922786071896553,
"log_odds_ratio": -0.8008362054824829,
"logits/chosen": 2.4012527465820312,
"logits/rejected": 2.405054807662964,
"logps/chosen": -0.31135261058807373,
"logps/rejected": -0.40367835760116577,
"loss": 1.6347,
"nll_loss": 1.5546340942382812,
"rewards/accuracies": 0.5666666626930237,
"rewards/chosen": -0.031135262921452522,
"rewards/margins": 0.009232571348547935,
"rewards/rejected": -0.04036783427000046,
"step": 110
},
{
"epoch": 0.15942698706099814,
"grad_norm": 4.425213813781738,
"learning_rate": 2.2799999999999998e-07,
"log_odds_chosen": -0.02769007720053196,
"log_odds_ratio": -0.8615487813949585,
"logits/chosen": 2.4684412479400635,
"logits/rejected": 2.4840517044067383,
"logps/chosen": -0.28860214352607727,
"logps/rejected": -0.3511938452720642,
"loss": 1.5912,
"nll_loss": 1.5050197839736938,
"rewards/accuracies": 0.5166666507720947,
"rewards/chosen": -0.028860213235020638,
"rewards/margins": 0.006259171757847071,
"rewards/rejected": -0.03511938080191612,
"step": 115
},
{
"epoch": 0.16635859519408502,
"grad_norm": 4.366436958312988,
"learning_rate": 2.38e-07,
"log_odds_chosen": 0.07487554848194122,
"log_odds_ratio": -0.7914910912513733,
"logits/chosen": 2.51110577583313,
"logits/rejected": 2.52194881439209,
"logps/chosen": -0.3162931501865387,
"logps/rejected": -0.3853374719619751,
"loss": 1.6602,
"nll_loss": 1.5810089111328125,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": -0.03162931278347969,
"rewards/margins": 0.006904428359121084,
"rewards/rejected": -0.03853374347090721,
"step": 120
},
{
"epoch": 0.1732902033271719,
"grad_norm": 3.4541594982147217,
"learning_rate": 2.48e-07,
"log_odds_chosen": 0.24983125925064087,
"log_odds_ratio": -0.7045524716377258,
"logits/chosen": 2.5535173416137695,
"logits/rejected": 2.5766799449920654,
"logps/chosen": -0.2637786567211151,
"logps/rejected": -0.3734634816646576,
"loss": 1.5197,
"nll_loss": 1.4492452144622803,
"rewards/accuracies": 0.6416666507720947,
"rewards/chosen": -0.026377864181995392,
"rewards/margins": 0.01096847839653492,
"rewards/rejected": -0.03734634816646576,
"step": 125
},
{
"epoch": 0.18022181146025879,
"grad_norm": 3.3947913646698,
"learning_rate": 2.58e-07,
"log_odds_chosen": 0.13998886942863464,
"log_odds_ratio": -0.7501145005226135,
"logits/chosen": 2.617069959640503,
"logits/rejected": 2.6200926303863525,
"logps/chosen": -0.289898544549942,
"logps/rejected": -0.3845054805278778,
"loss": 1.5256,
"nll_loss": 1.45059072971344,
"rewards/accuracies": 0.5916666388511658,
"rewards/chosen": -0.02898985706269741,
"rewards/margins": 0.00946069322526455,
"rewards/rejected": -0.03845055028796196,
"step": 130
},
{
"epoch": 0.18715341959334567,
"grad_norm": 2.9145822525024414,
"learning_rate": 2.68e-07,
"log_odds_chosen": 0.10625941306352615,
"log_odds_ratio": -0.7714784145355225,
"logits/chosen": 2.6704909801483154,
"logits/rejected": 2.686890125274658,
"logps/chosen": -0.2948876619338989,
"logps/rejected": -0.3641115128993988,
"loss": 1.4299,
"nll_loss": 1.3527849912643433,
"rewards/accuracies": 0.5333333611488342,
"rewards/chosen": -0.02948876842856407,
"rewards/margins": 0.006922383327037096,
"rewards/rejected": -0.03641115128993988,
"step": 135
},
{
"epoch": 0.19408502772643252,
"grad_norm": 3.358665704727173,
"learning_rate": 2.78e-07,
"log_odds_chosen": 0.09461755305528641,
"log_odds_ratio": -0.735588014125824,
"logits/chosen": 2.7141809463500977,
"logits/rejected": 2.725619077682495,
"logps/chosen": -0.2867269814014435,
"logps/rejected": -0.3572947680950165,
"loss": 1.4621,
"nll_loss": 1.3885754346847534,
"rewards/accuracies": 0.5666666626930237,
"rewards/chosen": -0.028672698885202408,
"rewards/margins": 0.007056778762489557,
"rewards/rejected": -0.03572947904467583,
"step": 140
},
{
"epoch": 0.2010166358595194,
"grad_norm": 3.4602420330047607,
"learning_rate": 2.88e-07,
"log_odds_chosen": 0.18481667339801788,
"log_odds_ratio": -0.7241543531417847,
"logits/chosen": 2.639542818069458,
"logits/rejected": 2.6469709873199463,
"logps/chosen": -0.2566547989845276,
"logps/rejected": -0.3493967652320862,
"loss": 1.4112,
"nll_loss": 1.3387987613677979,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.02566548064351082,
"rewards/margins": 0.009274197742342949,
"rewards/rejected": -0.03493968024849892,
"step": 145
},
{
"epoch": 0.20794824399260628,
"grad_norm": 3.3385841846466064,
"learning_rate": 2.98e-07,
"log_odds_chosen": 0.14522768557071686,
"log_odds_ratio": -0.7345627546310425,
"logits/chosen": 2.535898447036743,
"logits/rejected": 2.555110216140747,
"logps/chosen": -0.3192240595817566,
"logps/rejected": -0.39829033613204956,
"loss": 1.3993,
"nll_loss": 1.3258506059646606,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.03192240744829178,
"rewards/margins": 0.007906629703938961,
"rewards/rejected": -0.039829038083553314,
"step": 150
},
{
"epoch": 0.21487985212569316,
"grad_norm": 3.195482015609741,
"learning_rate": 3.08e-07,
"log_odds_chosen": 0.32608503103256226,
"log_odds_ratio": -0.6445600390434265,
"logits/chosen": 2.3535492420196533,
"logits/rejected": 2.367779493331909,
"logps/chosen": -0.2523113787174225,
"logps/rejected": -0.35818183422088623,
"loss": 1.3715,
"nll_loss": 1.3070040941238403,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.0252311360090971,
"rewards/margins": 0.010587050579488277,
"rewards/rejected": -0.0358181856572628,
"step": 155
},
{
"epoch": 0.22181146025878004,
"grad_norm": 2.6090826988220215,
"learning_rate": 3.18e-07,
"log_odds_chosen": 0.38106462359428406,
"log_odds_ratio": -0.6169842481613159,
"logits/chosen": 2.301053047180176,
"logits/rejected": 2.3180480003356934,
"logps/chosen": -0.2931780219078064,
"logps/rejected": -0.4148869812488556,
"loss": 1.3188,
"nll_loss": 1.2571097612380981,
"rewards/accuracies": 0.6166666746139526,
"rewards/chosen": -0.02931780181825161,
"rewards/margins": 0.01217089593410492,
"rewards/rejected": -0.04148869961500168,
"step": 160
},
{
"epoch": 0.22874306839186692,
"grad_norm": 2.2898175716400146,
"learning_rate": 3.2799999999999997e-07,
"log_odds_chosen": 0.24468782544136047,
"log_odds_ratio": -0.6965723037719727,
"logits/chosen": 2.494852304458618,
"logits/rejected": 2.505509376525879,
"logps/chosen": -0.28601107001304626,
"logps/rejected": -0.4080314338207245,
"loss": 1.2778,
"nll_loss": 1.2081010341644287,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.02860110253095627,
"rewards/margins": 0.012202044948935509,
"rewards/rejected": -0.040803149342536926,
"step": 165
},
{
"epoch": 0.2356746765249538,
"grad_norm": 2.1119935512542725,
"learning_rate": 3.38e-07,
"log_odds_chosen": 0.30791595578193665,
"log_odds_ratio": -0.6434581279754639,
"logits/chosen": 2.4814131259918213,
"logits/rejected": 2.492072105407715,
"logps/chosen": -0.29261448979377747,
"logps/rejected": -0.4158262610435486,
"loss": 1.2693,
"nll_loss": 1.2049847841262817,
"rewards/accuracies": 0.5583333373069763,
"rewards/chosen": -0.029261449351906776,
"rewards/margins": 0.012321173213422298,
"rewards/rejected": -0.0415826216340065,
"step": 170
},
{
"epoch": 0.24260628465804066,
"grad_norm": 2.052659511566162,
"learning_rate": 3.48e-07,
"log_odds_chosen": 0.2674095928668976,
"log_odds_ratio": -0.6604223847389221,
"logits/chosen": 2.45322847366333,
"logits/rejected": 2.4553847312927246,
"logps/chosen": -0.27184900641441345,
"logps/rejected": -0.37303584814071655,
"loss": 1.269,
"nll_loss": 1.202986240386963,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.027184901759028435,
"rewards/margins": 0.010118687525391579,
"rewards/rejected": -0.037303585559129715,
"step": 175
},
{
"epoch": 0.24953789279112754,
"grad_norm": 1.9022108316421509,
"learning_rate": 3.58e-07,
"log_odds_chosen": 0.3532262146472931,
"log_odds_ratio": -0.6328123211860657,
"logits/chosen": 2.6101863384246826,
"logits/rejected": 2.6266489028930664,
"logps/chosen": -0.2756868898868561,
"logps/rejected": -0.3959445059299469,
"loss": 1.2196,
"nll_loss": 1.1562813520431519,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.027568688616156578,
"rewards/margins": 0.012025760486721992,
"rewards/rejected": -0.03959444910287857,
"step": 180
},
{
"epoch": 0.25646950092421444,
"grad_norm": 2.0479915142059326,
"learning_rate": 3.68e-07,
"log_odds_chosen": 0.1621844470500946,
"log_odds_ratio": -0.7073792219161987,
"logits/chosen": 2.635596990585327,
"logits/rejected": 2.6427814960479736,
"logps/chosen": -0.28807908296585083,
"logps/rejected": -0.35789498686790466,
"loss": 1.1997,
"nll_loss": 1.1289515495300293,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.028807910159230232,
"rewards/margins": 0.0069815958850085735,
"rewards/rejected": -0.035789504647254944,
"step": 185
},
{
"epoch": 0.2634011090573013,
"grad_norm": 1.7852911949157715,
"learning_rate": 3.7799999999999997e-07,
"log_odds_chosen": 0.22550253570079803,
"log_odds_ratio": -0.6922202706336975,
"logits/chosen": 2.6954963207244873,
"logits/rejected": 2.7063536643981934,
"logps/chosen": -0.29761365056037903,
"logps/rejected": -0.3754601776599884,
"loss": 1.2128,
"nll_loss": 1.143579125404358,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.02976136840879917,
"rewards/margins": 0.007784651126712561,
"rewards/rejected": -0.03754602000117302,
"step": 190
},
{
"epoch": 0.27033271719038815,
"grad_norm": 2.0064260959625244,
"learning_rate": 3.88e-07,
"log_odds_chosen": 0.30835989117622375,
"log_odds_ratio": -0.6616735458374023,
"logits/chosen": 2.747114896774292,
"logits/rejected": 2.756840229034424,
"logps/chosen": -0.2845754623413086,
"logps/rejected": -0.3947003185749054,
"loss": 1.182,
"nll_loss": 1.1158353090286255,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.02845754846930504,
"rewards/margins": 0.011012484319508076,
"rewards/rejected": -0.03947003558278084,
"step": 195
},
{
"epoch": 0.27726432532347506,
"grad_norm": 1.6967897415161133,
"learning_rate": 3.98e-07,
"log_odds_chosen": 0.36090362071990967,
"log_odds_ratio": -0.6156808733940125,
"logits/chosen": 2.736931562423706,
"logits/rejected": 2.7524020671844482,
"logps/chosen": -0.2760154902935028,
"logps/rejected": -0.39298757910728455,
"loss": 1.2024,
"nll_loss": 1.1408705711364746,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.02760155126452446,
"rewards/margins": 0.011697209440171719,
"rewards/rejected": -0.03929876163601875,
"step": 200
},
{
"epoch": 0.2841959334565619,
"grad_norm": 1.344346046447754,
"learning_rate": 3.9927272727272724e-07,
"log_odds_chosen": 0.44202545285224915,
"log_odds_ratio": -0.629891574382782,
"logits/chosen": 2.8161656856536865,
"logits/rejected": 2.827834129333496,
"logps/chosen": -0.27560955286026,
"logps/rejected": -0.43479490280151367,
"loss": 1.1696,
"nll_loss": 1.1066458225250244,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": -0.02756096050143242,
"rewards/margins": 0.015918532386422157,
"rewards/rejected": -0.04347948729991913,
"step": 205
},
{
"epoch": 0.2911275415896488,
"grad_norm": 1.632926344871521,
"learning_rate": 3.983636363636363e-07,
"log_odds_chosen": 0.27023443579673767,
"log_odds_ratio": -0.7081299424171448,
"logits/chosen": 2.6833786964416504,
"logits/rejected": 2.696648359298706,
"logps/chosen": -0.29506218433380127,
"logps/rejected": -0.4175509214401245,
"loss": 1.171,
"nll_loss": 1.100161075592041,
"rewards/accuracies": 0.5666666626930237,
"rewards/chosen": -0.029506217688322067,
"rewards/margins": 0.012248875573277473,
"rewards/rejected": -0.04175509512424469,
"step": 210
},
{
"epoch": 0.2980591497227357,
"grad_norm": 1.6668481826782227,
"learning_rate": 3.9745454545454543e-07,
"log_odds_chosen": 0.3347550332546234,
"log_odds_ratio": -0.6504988074302673,
"logits/chosen": 2.6714982986450195,
"logits/rejected": 2.6816256046295166,
"logps/chosen": -0.29570263624191284,
"logps/rejected": -0.42322224378585815,
"loss": 1.1627,
"nll_loss": 1.0976592302322388,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.029570268467068672,
"rewards/margins": 0.012751961126923561,
"rewards/rejected": -0.04232222959399223,
"step": 215
},
{
"epoch": 0.3049907578558225,
"grad_norm": 1.663832664489746,
"learning_rate": 3.965454545454545e-07,
"log_odds_chosen": 0.329058974981308,
"log_odds_ratio": -0.6497308611869812,
"logits/chosen": 2.728097438812256,
"logits/rejected": 2.751708507537842,
"logps/chosen": -0.3120826184749603,
"logps/rejected": -0.44624626636505127,
"loss": 1.2113,
"nll_loss": 1.1463485956192017,
"rewards/accuracies": 0.5916666388511658,
"rewards/chosen": -0.031208263710141182,
"rewards/margins": 0.013416365720331669,
"rewards/rejected": -0.04462462291121483,
"step": 220
},
{
"epoch": 0.31192236598890943,
"grad_norm": 1.2902253866195679,
"learning_rate": 3.9563636363636363e-07,
"log_odds_chosen": 0.3168531656265259,
"log_odds_ratio": -0.6508561372756958,
"logits/chosen": 2.767462968826294,
"logits/rejected": 2.776341438293457,
"logps/chosen": -0.2664511799812317,
"logps/rejected": -0.3841439485549927,
"loss": 1.1313,
"nll_loss": 1.0662100315093994,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.02664511650800705,
"rewards/margins": 0.011769277974963188,
"rewards/rejected": -0.03841439634561539,
"step": 225
},
{
"epoch": 0.3188539741219963,
"grad_norm": 1.4394150972366333,
"learning_rate": 3.947272727272727e-07,
"log_odds_chosen": 0.5200116038322449,
"log_odds_ratio": -0.5801463723182678,
"logits/chosen": 2.7293012142181396,
"logits/rejected": 2.7543258666992188,
"logps/chosen": -0.2802920639514923,
"logps/rejected": -0.4830513894557953,
"loss": 1.1608,
"nll_loss": 1.1027837991714478,
"rewards/accuracies": 0.6583333611488342,
"rewards/chosen": -0.02802920900285244,
"rewards/margins": 0.020275937393307686,
"rewards/rejected": -0.048305146396160126,
"step": 230
},
{
"epoch": 0.3257855822550832,
"grad_norm": 1.5486044883728027,
"learning_rate": 3.9381818181818177e-07,
"log_odds_chosen": 0.3175574839115143,
"log_odds_ratio": -0.6549851894378662,
"logits/chosen": 2.717921495437622,
"logits/rejected": 2.7285637855529785,
"logps/chosen": -0.30369266867637634,
"logps/rejected": -0.4324611723423004,
"loss": 1.1833,
"nll_loss": 1.117845892906189,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.030369265004992485,
"rewards/margins": 0.012876848690211773,
"rewards/rejected": -0.043246109038591385,
"step": 235
},
{
"epoch": 0.33271719038817005,
"grad_norm": 1.4637186527252197,
"learning_rate": 3.929090909090909e-07,
"log_odds_chosen": 0.677905261516571,
"log_odds_ratio": -0.5080384612083435,
"logits/chosen": 2.6565752029418945,
"logits/rejected": 2.670898199081421,
"logps/chosen": -0.24577026069164276,
"logps/rejected": -0.48834845423698425,
"loss": 1.1682,
"nll_loss": 1.1173516511917114,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.024577027186751366,
"rewards/margins": 0.02425781637430191,
"rewards/rejected": -0.04883484169840813,
"step": 240
},
{
"epoch": 0.33964879852125696,
"grad_norm": 1.5058139562606812,
"learning_rate": 3.9199999999999996e-07,
"log_odds_chosen": 0.4405784010887146,
"log_odds_ratio": -0.6027734875679016,
"logits/chosen": 2.6786551475524902,
"logits/rejected": 2.6928422451019287,
"logps/chosen": -0.28616759181022644,
"logps/rejected": -0.4547707438468933,
"loss": 1.1336,
"nll_loss": 1.0733466148376465,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.028616759926080704,
"rewards/margins": 0.016860313713550568,
"rewards/rejected": -0.04547707736492157,
"step": 245
},
{
"epoch": 0.3465804066543438,
"grad_norm": 1.8535481691360474,
"learning_rate": 3.910909090909091e-07,
"log_odds_chosen": 0.42455023527145386,
"log_odds_ratio": -0.6171411275863647,
"logits/chosen": 2.7245190143585205,
"logits/rejected": 2.732139825820923,
"logps/chosen": -0.30346882343292236,
"logps/rejected": -0.46232348680496216,
"loss": 1.1587,
"nll_loss": 1.0969831943511963,
"rewards/accuracies": 0.6333333253860474,
"rewards/chosen": -0.030346881598234177,
"rewards/margins": 0.015885472297668457,
"rewards/rejected": -0.046232353895902634,
"step": 250
},
{
"epoch": 0.35351201478743066,
"grad_norm": 1.3919163942337036,
"learning_rate": 3.9018181818181816e-07,
"log_odds_chosen": 0.5069887042045593,
"log_odds_ratio": -0.5706583857536316,
"logits/chosen": 2.649379014968872,
"logits/rejected": 2.6625030040740967,
"logps/chosen": -0.25029629468917847,
"logps/rejected": -0.43570002913475037,
"loss": 1.1158,
"nll_loss": 1.0587836503982544,
"rewards/accuracies": 0.6916666626930237,
"rewards/chosen": -0.025029627606272697,
"rewards/margins": 0.01854037493467331,
"rewards/rejected": -0.04357000067830086,
"step": 255
},
{
"epoch": 0.36044362292051757,
"grad_norm": 1.3738930225372314,
"learning_rate": 3.8927272727272723e-07,
"log_odds_chosen": 0.3587478697299957,
"log_odds_ratio": -0.6369132995605469,
"logits/chosen": 2.712921619415283,
"logits/rejected": 2.728205442428589,
"logps/chosen": -0.2719246745109558,
"logps/rejected": -0.3995325267314911,
"loss": 1.1394,
"nll_loss": 1.075750470161438,
"rewards/accuracies": 0.6166666746139526,
"rewards/chosen": -0.02719247154891491,
"rewards/margins": 0.012760787270963192,
"rewards/rejected": -0.03995325788855553,
"step": 260
},
{
"epoch": 0.3673752310536044,
"grad_norm": 1.7332333326339722,
"learning_rate": 3.8836363636363635e-07,
"log_odds_chosen": 0.5665148496627808,
"log_odds_ratio": -0.5796270966529846,
"logits/chosen": 2.66032338142395,
"logits/rejected": 2.6766788959503174,
"logps/chosen": -0.2771783769130707,
"logps/rejected": -0.4812432527542114,
"loss": 1.1499,
"nll_loss": 1.0919440984725952,
"rewards/accuracies": 0.6833333373069763,
"rewards/chosen": -0.027717838063836098,
"rewards/margins": 0.020406486466526985,
"rewards/rejected": -0.04812432825565338,
"step": 265
},
{
"epoch": 0.37430683918669133,
"grad_norm": 1.733955979347229,
"learning_rate": 3.874545454545454e-07,
"log_odds_chosen": 0.533964991569519,
"log_odds_ratio": -0.5693932175636292,
"logits/chosen": 2.676833391189575,
"logits/rejected": 2.7048349380493164,
"logps/chosen": -0.2923631966114044,
"logps/rejected": -0.4988028109073639,
"loss": 1.1422,
"nll_loss": 1.0852835178375244,
"rewards/accuracies": 0.6583333611488342,
"rewards/chosen": -0.02923632226884365,
"rewards/margins": 0.020643968135118484,
"rewards/rejected": -0.049880288541316986,
"step": 270
},
{
"epoch": 0.3812384473197782,
"grad_norm": 1.678440809249878,
"learning_rate": 3.865454545454545e-07,
"log_odds_chosen": 0.48775380849838257,
"log_odds_ratio": -0.5923742651939392,
"logits/chosen": 2.589829444885254,
"logits/rejected": 2.6072795391082764,
"logps/chosen": -0.31491658091545105,
"logps/rejected": -0.47682541608810425,
"loss": 1.1148,
"nll_loss": 1.0555262565612793,
"rewards/accuracies": 0.6583333611488342,
"rewards/chosen": -0.031491655856370926,
"rewards/margins": 0.01619088277220726,
"rewards/rejected": -0.047682538628578186,
"step": 275
},
{
"epoch": 0.38817005545286504,
"grad_norm": 1.911895751953125,
"learning_rate": 3.856363636363636e-07,
"log_odds_chosen": 0.3376753032207489,
"log_odds_ratio": -0.6669396162033081,
"logits/chosen": 2.6268460750579834,
"logits/rejected": 2.6471641063690186,
"logps/chosen": -0.29394927620887756,
"logps/rejected": -0.4321646988391876,
"loss": 1.1103,
"nll_loss": 1.0436404943466187,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.029394926503300667,
"rewards/margins": 0.01382154505699873,
"rewards/rejected": -0.04321647435426712,
"step": 280
},
{
"epoch": 0.39510166358595195,
"grad_norm": 1.6275674104690552,
"learning_rate": 3.847272727272727e-07,
"log_odds_chosen": 0.4959801137447357,
"log_odds_ratio": -0.6270378828048706,
"logits/chosen": 2.666292667388916,
"logits/rejected": 2.6843721866607666,
"logps/chosen": -0.3145168423652649,
"logps/rejected": -0.48912960290908813,
"loss": 1.1656,
"nll_loss": 1.1028538942337036,
"rewards/accuracies": 0.6416666507720947,
"rewards/chosen": -0.03145168721675873,
"rewards/margins": 0.017461273819208145,
"rewards/rejected": -0.048912957310676575,
"step": 285
},
{
"epoch": 0.4020332717190388,
"grad_norm": 1.6614007949829102,
"learning_rate": 3.838181818181818e-07,
"log_odds_chosen": 0.3132282793521881,
"log_odds_ratio": -0.6719579100608826,
"logits/chosen": 2.644859790802002,
"logits/rejected": 2.6587765216827393,
"logps/chosen": -0.30401140451431274,
"logps/rejected": -0.4220035970211029,
"loss": 1.1595,
"nll_loss": 1.0922586917877197,
"rewards/accuracies": 0.5916666388511658,
"rewards/chosen": -0.030401142314076424,
"rewards/margins": 0.011799216270446777,
"rewards/rejected": -0.04220036417245865,
"step": 290
},
{
"epoch": 0.4089648798521257,
"grad_norm": 5.234076499938965,
"learning_rate": 3.829090909090909e-07,
"log_odds_chosen": 0.4143497347831726,
"log_odds_ratio": -0.6194970011711121,
"logits/chosen": 2.693376064300537,
"logits/rejected": 2.7130610942840576,
"logps/chosen": -0.300813227891922,
"logps/rejected": -0.45194199681282043,
"loss": 1.1107,
"nll_loss": 1.0487507581710815,
"rewards/accuracies": 0.6666666865348816,
"rewards/chosen": -0.030081328004598618,
"rewards/margins": 0.01511287409812212,
"rewards/rejected": -0.04519420489668846,
"step": 295
},
{
"epoch": 0.41589648798521256,
"grad_norm": 1.2319281101226807,
"learning_rate": 3.8199999999999995e-07,
"log_odds_chosen": 0.6317125558853149,
"log_odds_ratio": -0.5252640247344971,
"logits/chosen": 2.6187520027160645,
"logits/rejected": 2.6462595462799072,
"logps/chosen": -0.27775922417640686,
"logps/rejected": -0.5154780745506287,
"loss": 1.1218,
"nll_loss": 1.069314956665039,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.027775920927524567,
"rewards/margins": 0.023771891370415688,
"rewards/rejected": -0.0515478178858757,
"step": 300
},
{
"epoch": 0.42282809611829947,
"grad_norm": 1.404926061630249,
"learning_rate": 3.810909090909091e-07,
"log_odds_chosen": 0.45306530594825745,
"log_odds_ratio": -0.5977523326873779,
"logits/chosen": 2.6711065769195557,
"logits/rejected": 2.6791975498199463,
"logps/chosen": -0.2907872498035431,
"logps/rejected": -0.4649318754673004,
"loss": 1.0976,
"nll_loss": 1.037819266319275,
"rewards/accuracies": 0.6416666507720947,
"rewards/chosen": -0.029078727588057518,
"rewards/margins": 0.017414459958672523,
"rewards/rejected": -0.04649318382143974,
"step": 305
},
{
"epoch": 0.4297597042513863,
"grad_norm": 1.635770320892334,
"learning_rate": 3.8018181818181815e-07,
"log_odds_chosen": 0.42734283208847046,
"log_odds_ratio": -0.637465238571167,
"logits/chosen": 2.603739023208618,
"logits/rejected": 2.632021903991699,
"logps/chosen": -0.2919798493385315,
"logps/rejected": -0.480343759059906,
"loss": 1.1467,
"nll_loss": 1.0829123258590698,
"rewards/accuracies": 0.6583333611488342,
"rewards/chosen": -0.029197994619607925,
"rewards/margins": 0.01883639022707939,
"rewards/rejected": -0.04803437739610672,
"step": 310
},
{
"epoch": 0.4366913123844732,
"grad_norm": 1.6268060207366943,
"learning_rate": 3.7927272727272727e-07,
"log_odds_chosen": 0.5012027621269226,
"log_odds_ratio": -0.5923992991447449,
"logits/chosen": 2.5646347999572754,
"logits/rejected": 2.583251953125,
"logps/chosen": -0.28540945053100586,
"logps/rejected": -0.48576289415359497,
"loss": 1.1493,
"nll_loss": 1.0901035070419312,
"rewards/accuracies": 0.6833333373069763,
"rewards/chosen": -0.028540942817926407,
"rewards/margins": 0.02003534696996212,
"rewards/rejected": -0.048576291650533676,
"step": 315
},
{
"epoch": 0.4436229205175601,
"grad_norm": 1.6135777235031128,
"learning_rate": 3.7836363636363634e-07,
"log_odds_chosen": 0.47709882259368896,
"log_odds_ratio": -0.6118133664131165,
"logits/chosen": 2.651594400405884,
"logits/rejected": 2.663482666015625,
"logps/chosen": -0.30147698521614075,
"logps/rejected": -0.5001630187034607,
"loss": 1.1258,
"nll_loss": 1.0645849704742432,
"rewards/accuracies": 0.6416666507720947,
"rewards/chosen": -0.030147703364491463,
"rewards/margins": 0.019868608564138412,
"rewards/rejected": -0.05001631751656532,
"step": 320
},
{
"epoch": 0.45055452865064693,
"grad_norm": 1.5299911499023438,
"learning_rate": 3.774545454545454e-07,
"log_odds_chosen": 0.4915499687194824,
"log_odds_ratio": -0.6021497845649719,
"logits/chosen": 2.590543031692505,
"logits/rejected": 2.616163730621338,
"logps/chosen": -0.2796178162097931,
"logps/rejected": -0.4787500500679016,
"loss": 1.1219,
"nll_loss": 1.0617003440856934,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.02796177938580513,
"rewards/margins": 0.01991322636604309,
"rewards/rejected": -0.04787500575184822,
"step": 325
},
{
"epoch": 0.45748613678373384,
"grad_norm": 1.5430803298950195,
"learning_rate": 3.7654545454545454e-07,
"log_odds_chosen": 0.5409862399101257,
"log_odds_ratio": -0.5837644934654236,
"logits/chosen": 2.6871683597564697,
"logits/rejected": 2.697368860244751,
"logps/chosen": -0.322612464427948,
"logps/rejected": -0.5166773200035095,
"loss": 1.068,
"nll_loss": 1.0095747709274292,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.03226124867796898,
"rewards/margins": 0.019406486302614212,
"rewards/rejected": -0.05166773125529289,
"step": 330
},
{
"epoch": 0.4644177449168207,
"grad_norm": 1.520017147064209,
"learning_rate": 3.756363636363636e-07,
"log_odds_chosen": 0.7835187911987305,
"log_odds_ratio": -0.49368441104888916,
"logits/chosen": 2.681840419769287,
"logits/rejected": 2.7117552757263184,
"logps/chosen": -0.26524561643600464,
"logps/rejected": -0.5571123361587524,
"loss": 1.0703,
"nll_loss": 1.0209718942642212,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.026524560526013374,
"rewards/margins": 0.029186667874455452,
"rewards/rejected": -0.055711228400468826,
"step": 335
},
{
"epoch": 0.4713493530499076,
"grad_norm": 6.2188262939453125,
"learning_rate": 3.747272727272727e-07,
"log_odds_chosen": 0.7931634187698364,
"log_odds_ratio": -0.48867708444595337,
"logits/chosen": 2.574190378189087,
"logits/rejected": 2.5961813926696777,
"logps/chosen": -0.2862854599952698,
"logps/rejected": -0.5607175230979919,
"loss": 1.0977,
"nll_loss": 1.0488049983978271,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.028628544881939888,
"rewards/margins": 0.02744320221245289,
"rewards/rejected": -0.05607175827026367,
"step": 340
},
{
"epoch": 0.47828096118299446,
"grad_norm": 1.3965646028518677,
"learning_rate": 3.738181818181818e-07,
"log_odds_chosen": 0.6148959994316101,
"log_odds_ratio": -0.5497661828994751,
"logits/chosen": 2.556060791015625,
"logits/rejected": 2.5766706466674805,
"logps/chosen": -0.2946844696998596,
"logps/rejected": -0.5049049258232117,
"loss": 1.1347,
"nll_loss": 1.0796822309494019,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.02946844883263111,
"rewards/margins": 0.021022040396928787,
"rewards/rejected": -0.05049047991633415,
"step": 345
},
{
"epoch": 0.4852125693160813,
"grad_norm": 1.5803139209747314,
"learning_rate": 3.7290909090909087e-07,
"log_odds_chosen": 0.688506543636322,
"log_odds_ratio": -0.5257928371429443,
"logits/chosen": 2.6283648014068604,
"logits/rejected": 2.667273759841919,
"logps/chosen": -0.23559394478797913,
"logps/rejected": -0.4642951190471649,
"loss": 1.0637,
"nll_loss": 1.0110965967178345,
"rewards/accuracies": 0.7166666388511658,
"rewards/chosen": -0.023559393361210823,
"rewards/margins": 0.022870119661092758,
"rewards/rejected": -0.04642951115965843,
"step": 350
},
{
"epoch": 0.4921441774491682,
"grad_norm": 1.4043947458267212,
"learning_rate": 3.72e-07,
"log_odds_chosen": 0.7894371151924133,
"log_odds_ratio": -0.5066149830818176,
"logits/chosen": 2.5662755966186523,
"logits/rejected": 2.597470998764038,
"logps/chosen": -0.23841489851474762,
"logps/rejected": -0.49710512161254883,
"loss": 1.1026,
"nll_loss": 1.0519250631332397,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.023841489106416702,
"rewards/margins": 0.025869019329547882,
"rewards/rejected": -0.049710508435964584,
"step": 355
},
{
"epoch": 0.49907578558225507,
"grad_norm": 1.5117144584655762,
"learning_rate": 3.7109090909090907e-07,
"log_odds_chosen": 0.5975762009620667,
"log_odds_ratio": -0.5889599919319153,
"logits/chosen": 2.448983669281006,
"logits/rejected": 2.486905813217163,
"logps/chosen": -0.2695424258708954,
"logps/rejected": -0.47119179368019104,
"loss": 1.107,
"nll_loss": 1.0481308698654175,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.026954246684908867,
"rewards/margins": 0.02016492560505867,
"rewards/rejected": -0.047119174152612686,
"step": 360
},
{
"epoch": 0.506007393715342,
"grad_norm": 1.5192545652389526,
"learning_rate": 3.7018181818181814e-07,
"log_odds_chosen": 0.6952040791511536,
"log_odds_ratio": -0.5299401879310608,
"logits/chosen": 2.472970485687256,
"logits/rejected": 2.5098752975463867,
"logps/chosen": -0.3138105571269989,
"logps/rejected": -0.5887749195098877,
"loss": 1.1069,
"nll_loss": 1.0538718700408936,
"rewards/accuracies": 0.6916666626930237,
"rewards/chosen": -0.03138105198740959,
"rewards/margins": 0.02749643847346306,
"rewards/rejected": -0.05887749046087265,
"step": 365
},
{
"epoch": 0.5129390018484289,
"grad_norm": 1.2908129692077637,
"learning_rate": 3.6927272727272726e-07,
"log_odds_chosen": 0.6446402072906494,
"log_odds_ratio": -0.5391488671302795,
"logits/chosen": 2.492727756500244,
"logits/rejected": 2.5441088676452637,
"logps/chosen": -0.32913315296173096,
"logps/rejected": -0.5775908827781677,
"loss": 1.1262,
"nll_loss": 1.072272539138794,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.032913316041231155,
"rewards/margins": 0.024845769628882408,
"rewards/rejected": -0.05775909125804901,
"step": 370
},
{
"epoch": 0.5198706099815157,
"grad_norm": 1.5955073833465576,
"learning_rate": 3.6836363636363633e-07,
"log_odds_chosen": 0.4990961253643036,
"log_odds_ratio": -0.5916425585746765,
"logits/chosen": 2.449550151824951,
"logits/rejected": 2.475804567337036,
"logps/chosen": -0.29480889439582825,
"logps/rejected": -0.4691779315471649,
"loss": 1.0926,
"nll_loss": 1.0334601402282715,
"rewards/accuracies": 0.6583333611488342,
"rewards/chosen": -0.029480891302227974,
"rewards/margins": 0.017436908558011055,
"rewards/rejected": -0.04691779240965843,
"step": 375
},
{
"epoch": 0.5268022181146026,
"grad_norm": 1.6701200008392334,
"learning_rate": 3.674545454545454e-07,
"log_odds_chosen": 0.4017346203327179,
"log_odds_ratio": -0.6414787173271179,
"logits/chosen": 2.485521078109741,
"logits/rejected": 2.5085794925689697,
"logps/chosen": -0.3057960867881775,
"logps/rejected": -0.4507531225681305,
"loss": 1.0797,
"nll_loss": 1.015582799911499,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.030579613521695137,
"rewards/margins": 0.01449570246040821,
"rewards/rejected": -0.04507531598210335,
"step": 380
},
{
"epoch": 0.5337338262476895,
"grad_norm": 1.4670026302337646,
"learning_rate": 3.665454545454545e-07,
"log_odds_chosen": 0.4697194993495941,
"log_odds_ratio": -0.6057302355766296,
"logits/chosen": 2.58746337890625,
"logits/rejected": 2.6025400161743164,
"logps/chosen": -0.33911365270614624,
"logps/rejected": -0.540283739566803,
"loss": 1.1428,
"nll_loss": 1.0822194814682007,
"rewards/accuracies": 0.6083333492279053,
"rewards/chosen": -0.033911366015672684,
"rewards/margins": 0.020117007195949554,
"rewards/rejected": -0.054028380662202835,
"step": 385
},
{
"epoch": 0.5406654343807763,
"grad_norm": 1.425374984741211,
"learning_rate": 3.656363636363636e-07,
"log_odds_chosen": 0.8662251234054565,
"log_odds_ratio": -0.4929133951663971,
"logits/chosen": 2.4548234939575195,
"logits/rejected": 2.4894497394561768,
"logps/chosen": -0.29042714834213257,
"logps/rejected": -0.6492635011672974,
"loss": 1.0862,
"nll_loss": 1.0368915796279907,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.029042713344097137,
"rewards/margins": 0.035883646458387375,
"rewards/rejected": -0.06492635607719421,
"step": 390
},
{
"epoch": 0.5475970425138632,
"grad_norm": 1.9283677339553833,
"learning_rate": 3.647272727272727e-07,
"log_odds_chosen": 0.6832193732261658,
"log_odds_ratio": -0.5179533362388611,
"logits/chosen": 2.4680521488189697,
"logits/rejected": 2.5091042518615723,
"logps/chosen": -0.3011523485183716,
"logps/rejected": -0.5473502278327942,
"loss": 1.1184,
"nll_loss": 1.0665814876556396,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.030115237459540367,
"rewards/margins": 0.02461978793144226,
"rewards/rejected": -0.05473501980304718,
"step": 395
},
{
"epoch": 0.5545286506469501,
"grad_norm": 1.5800950527191162,
"learning_rate": 3.638181818181818e-07,
"log_odds_chosen": 0.9603479504585266,
"log_odds_ratio": -0.4618772566318512,
"logits/chosen": 2.5418217182159424,
"logits/rejected": 2.5758605003356934,
"logps/chosen": -0.3006496727466583,
"logps/rejected": -0.680100679397583,
"loss": 1.1202,
"nll_loss": 1.073991060256958,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.03006497025489807,
"rewards/margins": 0.03794510290026665,
"rewards/rejected": -0.06801006942987442,
"step": 400
},
{
"epoch": 0.5614602587800369,
"grad_norm": 1.8024792671203613,
"learning_rate": 3.6290909090909086e-07,
"log_odds_chosen": 0.5875980854034424,
"log_odds_ratio": -0.5984980463981628,
"logits/chosen": 2.422645092010498,
"logits/rejected": 2.4577627182006836,
"logps/chosen": -0.26548755168914795,
"logps/rejected": -0.4987878203392029,
"loss": 1.07,
"nll_loss": 1.010174036026001,
"rewards/accuracies": 0.7083333134651184,
"rewards/chosen": -0.026548750698566437,
"rewards/margins": 0.023330029100179672,
"rewards/rejected": -0.04987877607345581,
"step": 405
},
{
"epoch": 0.5683918669131238,
"grad_norm": 1.3316905498504639,
"learning_rate": 3.62e-07,
"log_odds_chosen": 0.9628907442092896,
"log_odds_ratio": -0.45750167965888977,
"logits/chosen": 2.5198185443878174,
"logits/rejected": 2.5674140453338623,
"logps/chosen": -0.2701815366744995,
"logps/rejected": -0.6767290234565735,
"loss": 1.1047,
"nll_loss": 1.0589832067489624,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.02701815403997898,
"rewards/margins": 0.0406547375023365,
"rewards/rejected": -0.06767289340496063,
"step": 410
},
{
"epoch": 0.5753234750462107,
"grad_norm": 3.3782176971435547,
"learning_rate": 3.6109090909090906e-07,
"log_odds_chosen": 0.8159240484237671,
"log_odds_ratio": -0.502350389957428,
"logits/chosen": 2.4107635021209717,
"logits/rejected": 2.462902545928955,
"logps/chosen": -0.2618210017681122,
"logps/rejected": -0.5774862766265869,
"loss": 1.0908,
"nll_loss": 1.040544033050537,
"rewards/accuracies": 0.7166666388511658,
"rewards/chosen": -0.026182103902101517,
"rewards/margins": 0.031566519290208817,
"rewards/rejected": -0.05774862319231033,
"step": 415
},
{
"epoch": 0.5822550831792976,
"grad_norm": 1.423762559890747,
"learning_rate": 3.601818181818182e-07,
"log_odds_chosen": 0.712608277797699,
"log_odds_ratio": -0.5430810451507568,
"logits/chosen": 2.4404184818267822,
"logits/rejected": 2.460728168487549,
"logps/chosen": -0.3297029137611389,
"logps/rejected": -0.6201837062835693,
"loss": 1.1066,
"nll_loss": 1.0522674322128296,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.03297029063105583,
"rewards/margins": 0.02904808521270752,
"rewards/rejected": -0.06201838329434395,
"step": 420
},
{
"epoch": 0.5891866913123844,
"grad_norm": 1.5393766164779663,
"learning_rate": 3.5927272727272725e-07,
"log_odds_chosen": 0.7168568968772888,
"log_odds_ratio": -0.530264139175415,
"logits/chosen": 2.4670755863189697,
"logits/rejected": 2.4867959022521973,
"logps/chosen": -0.30588680505752563,
"logps/rejected": -0.582858145236969,
"loss": 1.1199,
"nll_loss": 1.0668836832046509,
"rewards/accuracies": 0.7166666388511658,
"rewards/chosen": -0.030588679015636444,
"rewards/margins": 0.027697138488292694,
"rewards/rejected": -0.05828581750392914,
"step": 425
},
{
"epoch": 0.5961182994454713,
"grad_norm": 1.5168192386627197,
"learning_rate": 3.583636363636363e-07,
"log_odds_chosen": 0.6971157789230347,
"log_odds_ratio": -0.5508431792259216,
"logits/chosen": 2.4846901893615723,
"logits/rejected": 2.517913579940796,
"logps/chosen": -0.29162880778312683,
"logps/rejected": -0.5490512251853943,
"loss": 1.0672,
"nll_loss": 1.0120903253555298,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.029162878170609474,
"rewards/margins": 0.025742238387465477,
"rewards/rejected": -0.05490512028336525,
"step": 430
},
{
"epoch": 0.6030499075785583,
"grad_norm": 1.951669454574585,
"learning_rate": 3.5745454545454545e-07,
"log_odds_chosen": 0.6440210342407227,
"log_odds_ratio": -0.5461083054542542,
"logits/chosen": 2.4179627895355225,
"logits/rejected": 2.4611566066741943,
"logps/chosen": -0.27406224608421326,
"logps/rejected": -0.528464138507843,
"loss": 1.1128,
"nll_loss": 1.0581576824188232,
"rewards/accuracies": 0.6916666626930237,
"rewards/chosen": -0.027406223118305206,
"rewards/margins": 0.025440199300646782,
"rewards/rejected": -0.05284642428159714,
"step": 435
},
{
"epoch": 0.609981515711645,
"grad_norm": 1.452818512916565,
"learning_rate": 3.565454545454545e-07,
"log_odds_chosen": 0.6425326466560364,
"log_odds_ratio": -0.5652487874031067,
"logits/chosen": 2.4771902561187744,
"logits/rejected": 2.5125765800476074,
"logps/chosen": -0.30930647253990173,
"logps/rejected": -0.534605085849762,
"loss": 1.1152,
"nll_loss": 1.058679461479187,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.030930647626519203,
"rewards/margins": 0.022529857233166695,
"rewards/rejected": -0.0534605048596859,
"step": 440
},
{
"epoch": 0.616913123844732,
"grad_norm": 2.1722567081451416,
"learning_rate": 3.556363636363636e-07,
"log_odds_chosen": 0.5683826208114624,
"log_odds_ratio": -0.5821471810340881,
"logits/chosen": 2.3948564529418945,
"logits/rejected": 2.432129144668579,
"logps/chosen": -0.2796666920185089,
"logps/rejected": -0.5052643418312073,
"loss": 1.036,
"nll_loss": 0.9777409434318542,
"rewards/accuracies": 0.6333333253860474,
"rewards/chosen": -0.027966666966676712,
"rewards/margins": 0.022559762001037598,
"rewards/rejected": -0.05052642896771431,
"step": 445
},
{
"epoch": 0.6238447319778189,
"grad_norm": 1.4338502883911133,
"learning_rate": 3.547272727272727e-07,
"log_odds_chosen": 0.6986488699913025,
"log_odds_ratio": -0.5211442112922668,
"logits/chosen": 2.412381410598755,
"logits/rejected": 2.452411413192749,
"logps/chosen": -0.33079832792282104,
"logps/rejected": -0.5769501328468323,
"loss": 1.1057,
"nll_loss": 1.0535985231399536,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.0330798365175724,
"rewards/margins": 0.024615177884697914,
"rewards/rejected": -0.057695016264915466,
"step": 450
},
{
"epoch": 0.6307763401109058,
"grad_norm": 1.6581847667694092,
"learning_rate": 3.538181818181818e-07,
"log_odds_chosen": 0.8325883746147156,
"log_odds_ratio": -0.5279497504234314,
"logits/chosen": 2.3902318477630615,
"logits/rejected": 2.4334285259246826,
"logps/chosen": -0.2896474003791809,
"logps/rejected": -0.6175944805145264,
"loss": 1.0771,
"nll_loss": 1.024324893951416,
"rewards/accuracies": 0.7166666388511658,
"rewards/chosen": -0.028964735567569733,
"rewards/margins": 0.032794706523418427,
"rewards/rejected": -0.06175943836569786,
"step": 455
},
{
"epoch": 0.6377079482439926,
"grad_norm": 1.2917609214782715,
"learning_rate": 3.529090909090909e-07,
"log_odds_chosen": 0.683183491230011,
"log_odds_ratio": -0.5794288516044617,
"logits/chosen": 2.442657709121704,
"logits/rejected": 2.49123477935791,
"logps/chosen": -0.3224933445453644,
"logps/rejected": -0.6132354736328125,
"loss": 1.1346,
"nll_loss": 1.0766867399215698,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.0322493314743042,
"rewards/margins": 0.02907421998679638,
"rewards/rejected": -0.06132354959845543,
"step": 460
},
{
"epoch": 0.6446395563770795,
"grad_norm": 1.514763593673706,
"learning_rate": 3.52e-07,
"log_odds_chosen": 0.7153250575065613,
"log_odds_ratio": -0.5446946024894714,
"logits/chosen": 2.3731462955474854,
"logits/rejected": 2.4032256603240967,
"logps/chosen": -0.2961350679397583,
"logps/rejected": -0.5602670311927795,
"loss": 1.1165,
"nll_loss": 1.0620505809783936,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.029613511636853218,
"rewards/margins": 0.026413191109895706,
"rewards/rejected": -0.056026704609394073,
"step": 465
},
{
"epoch": 0.6515711645101664,
"grad_norm": 1.3288161754608154,
"learning_rate": 3.5109090909090905e-07,
"log_odds_chosen": 0.6764271855354309,
"log_odds_ratio": -0.539259135723114,
"logits/chosen": 2.3467721939086914,
"logits/rejected": 2.384276866912842,
"logps/chosen": -0.28593122959136963,
"logps/rejected": -0.5482361912727356,
"loss": 1.0768,
"nll_loss": 1.0228937864303589,
"rewards/accuracies": 0.6833333373069763,
"rewards/chosen": -0.028593122959136963,
"rewards/margins": 0.026230497285723686,
"rewards/rejected": -0.0548236221075058,
"step": 470
},
{
"epoch": 0.6585027726432532,
"grad_norm": 1.6036826372146606,
"learning_rate": 3.5018181818181817e-07,
"log_odds_chosen": 0.7218735218048096,
"log_odds_ratio": -0.5222643613815308,
"logits/chosen": 2.311798095703125,
"logits/rejected": 2.353884220123291,
"logps/chosen": -0.2681826055049896,
"logps/rejected": -0.5527848601341248,
"loss": 1.0583,
"nll_loss": 1.0060540437698364,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.026818258687853813,
"rewards/margins": 0.02846023067831993,
"rewards/rejected": -0.05527849122881889,
"step": 475
},
{
"epoch": 0.6654343807763401,
"grad_norm": 1.6921563148498535,
"learning_rate": 3.4927272727272724e-07,
"log_odds_chosen": 0.7491247057914734,
"log_odds_ratio": -0.5005953907966614,
"logits/chosen": 2.384194850921631,
"logits/rejected": 2.4205212593078613,
"logps/chosen": -0.27753210067749023,
"logps/rejected": -0.5439023971557617,
"loss": 1.0796,
"nll_loss": 1.0295780897140503,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.027753213420510292,
"rewards/margins": 0.02663702890276909,
"rewards/rejected": -0.05439024046063423,
"step": 480
},
{
"epoch": 0.672365988909427,
"grad_norm": 1.6062825918197632,
"learning_rate": 3.483636363636363e-07,
"log_odds_chosen": 0.8605387210845947,
"log_odds_ratio": -0.4727242887020111,
"logits/chosen": 2.392383098602295,
"logits/rejected": 2.4284956455230713,
"logps/chosen": -0.30633166432380676,
"logps/rejected": -0.6641873121261597,
"loss": 1.0927,
"nll_loss": 1.0453789234161377,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.030633168295025826,
"rewards/margins": 0.035785574465990067,
"rewards/rejected": -0.06641873717308044,
"step": 485
},
{
"epoch": 0.6792975970425139,
"grad_norm": 1.4081010818481445,
"learning_rate": 3.4745454545454544e-07,
"log_odds_chosen": 0.7056547403335571,
"log_odds_ratio": -0.534669816493988,
"logits/chosen": 2.4164679050445557,
"logits/rejected": 2.4538962841033936,
"logps/chosen": -0.30063506960868835,
"logps/rejected": -0.566967248916626,
"loss": 1.0686,
"nll_loss": 1.015173077583313,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.030063504353165627,
"rewards/margins": 0.026633214205503464,
"rewards/rejected": -0.05669672042131424,
"step": 490
},
{
"epoch": 0.6862292051756007,
"grad_norm": 1.6677594184875488,
"learning_rate": 3.465454545454545e-07,
"log_odds_chosen": 0.6989320516586304,
"log_odds_ratio": -0.5302554368972778,
"logits/chosen": 2.3923895359039307,
"logits/rejected": 2.4322257041931152,
"logps/chosen": -0.3043942451477051,
"logps/rejected": -0.5767375826835632,
"loss": 1.1053,
"nll_loss": 1.052259922027588,
"rewards/accuracies": 0.7416666746139526,
"rewards/chosen": -0.030439427122473717,
"rewards/margins": 0.027234338223934174,
"rewards/rejected": -0.05767376720905304,
"step": 495
},
{
"epoch": 0.6931608133086876,
"grad_norm": 2.065037727355957,
"learning_rate": 3.4563636363636363e-07,
"log_odds_chosen": 0.9193868041038513,
"log_odds_ratio": -0.47953858971595764,
"logits/chosen": 2.3140947818756104,
"logits/rejected": 2.370821952819824,
"logps/chosen": -0.26736411452293396,
"logps/rejected": -0.6239200830459595,
"loss": 1.1044,
"nll_loss": 1.0564467906951904,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.026736412197351456,
"rewards/margins": 0.03565559908747673,
"rewards/rejected": -0.06239200755953789,
"step": 500
},
{
"epoch": 0.7000924214417745,
"grad_norm": 1.7896554470062256,
"learning_rate": 3.447272727272727e-07,
"log_odds_chosen": 1.0297752618789673,
"log_odds_ratio": -0.4404907822608948,
"logits/chosen": 2.414668560028076,
"logits/rejected": 2.4732654094696045,
"logps/chosen": -0.2793917953968048,
"logps/rejected": -0.6925608515739441,
"loss": 1.068,
"nll_loss": 1.0239418745040894,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.027939176186919212,
"rewards/margins": 0.04131689295172691,
"rewards/rejected": -0.06925607472658157,
"step": 505
},
{
"epoch": 0.7070240295748613,
"grad_norm": 1.7647099494934082,
"learning_rate": 3.4381818181818177e-07,
"log_odds_chosen": 0.7891833782196045,
"log_odds_ratio": -0.5092800855636597,
"logits/chosen": 2.4179863929748535,
"logits/rejected": 2.4637575149536133,
"logps/chosen": -0.32941383123397827,
"logps/rejected": -0.6404102444648743,
"loss": 1.1317,
"nll_loss": 1.0808058977127075,
"rewards/accuracies": 0.7166666388511658,
"rewards/chosen": -0.032941386103630066,
"rewards/margins": 0.031099645420908928,
"rewards/rejected": -0.06404102593660355,
"step": 510
},
{
"epoch": 0.7139556377079482,
"grad_norm": 1.6020543575286865,
"learning_rate": 3.429090909090909e-07,
"log_odds_chosen": 0.823983371257782,
"log_odds_ratio": -0.49682337045669556,
"logits/chosen": 2.2930688858032227,
"logits/rejected": 2.3509280681610107,
"logps/chosen": -0.2938121259212494,
"logps/rejected": -0.6020478010177612,
"loss": 1.0654,
"nll_loss": 1.015733003616333,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.02938121184706688,
"rewards/margins": 0.030823571607470512,
"rewards/rejected": -0.06020478159189224,
"step": 515
},
{
"epoch": 0.7208872458410351,
"grad_norm": 1.902418851852417,
"learning_rate": 3.4199999999999997e-07,
"log_odds_chosen": 0.9842289090156555,
"log_odds_ratio": -0.46310731768608093,
"logits/chosen": 2.354994535446167,
"logits/rejected": 2.3915045261383057,
"logps/chosen": -0.27029862999916077,
"logps/rejected": -0.6681958436965942,
"loss": 1.0773,
"nll_loss": 1.030941367149353,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.027029862627387047,
"rewards/margins": 0.03978971764445305,
"rewards/rejected": -0.06681957095861435,
"step": 520
},
{
"epoch": 0.727818853974122,
"grad_norm": 1.5766515731811523,
"learning_rate": 3.410909090909091e-07,
"log_odds_chosen": 0.7862997055053711,
"log_odds_ratio": -0.5025666952133179,
"logits/chosen": 2.351290464401245,
"logits/rejected": 2.3797943592071533,
"logps/chosen": -0.2873378396034241,
"logps/rejected": -0.5840609669685364,
"loss": 1.0972,
"nll_loss": 1.0469059944152832,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.028733786195516586,
"rewards/margins": 0.02967231348156929,
"rewards/rejected": -0.058406099677085876,
"step": 525
},
{
"epoch": 0.7347504621072088,
"grad_norm": 1.9089113473892212,
"learning_rate": 3.4018181818181816e-07,
"log_odds_chosen": 1.0071905851364136,
"log_odds_ratio": -0.4409308433532715,
"logits/chosen": 2.36696720123291,
"logits/rejected": 2.4168639183044434,
"logps/chosen": -0.2714075744152069,
"logps/rejected": -0.6719208359718323,
"loss": 1.0857,
"nll_loss": 1.0415993928909302,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.02714076079428196,
"rewards/margins": 0.04005131870508194,
"rewards/rejected": -0.06719207763671875,
"step": 530
},
{
"epoch": 0.7416820702402958,
"grad_norm": 1.6972178220748901,
"learning_rate": 3.3927272727272723e-07,
"log_odds_chosen": 0.7740481495857239,
"log_odds_ratio": -0.5107226967811584,
"logits/chosen": 2.373772382736206,
"logits/rejected": 2.415752410888672,
"logps/chosen": -0.30703625082969666,
"logps/rejected": -0.6409940123558044,
"loss": 1.1177,
"nll_loss": 1.066676378250122,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.030703624710440636,
"rewards/margins": 0.03339577093720436,
"rewards/rejected": -0.06409939378499985,
"step": 535
},
{
"epoch": 0.7486136783733827,
"grad_norm": 1.3471928834915161,
"learning_rate": 3.3836363636363635e-07,
"log_odds_chosen": 0.8596405386924744,
"log_odds_ratio": -0.4774978458881378,
"logits/chosen": 2.286552906036377,
"logits/rejected": 2.335909128189087,
"logps/chosen": -0.28501778841018677,
"logps/rejected": -0.6287774443626404,
"loss": 1.0694,
"nll_loss": 1.0216971635818481,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.028501776978373528,
"rewards/margins": 0.03437596932053566,
"rewards/rejected": -0.06287775188684464,
"step": 540
},
{
"epoch": 0.7555452865064695,
"grad_norm": 1.906703233718872,
"learning_rate": 3.374545454545454e-07,
"log_odds_chosen": 0.8641347885131836,
"log_odds_ratio": -0.5105417370796204,
"logits/chosen": 2.3575448989868164,
"logits/rejected": 2.396763801574707,
"logps/chosen": -0.26420578360557556,
"logps/rejected": -0.6220420002937317,
"loss": 1.0629,
"nll_loss": 1.0118043422698975,
"rewards/accuracies": 0.6916666626930237,
"rewards/chosen": -0.026420580223202705,
"rewards/margins": 0.03578362613916397,
"rewards/rejected": -0.06220419704914093,
"step": 545
},
{
"epoch": 0.7624768946395564,
"grad_norm": 1.4510164260864258,
"learning_rate": 3.365454545454545e-07,
"log_odds_chosen": 0.777636706829071,
"log_odds_ratio": -0.5234912633895874,
"logits/chosen": 2.271596670150757,
"logits/rejected": 2.3241117000579834,
"logps/chosen": -0.313221275806427,
"logps/rejected": -0.6108809113502502,
"loss": 1.1234,
"nll_loss": 1.0710450410842896,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.03132212534546852,
"rewards/margins": 0.029765967279672623,
"rewards/rejected": -0.06108810007572174,
"step": 550
},
{
"epoch": 0.7694085027726433,
"grad_norm": 2.371877908706665,
"learning_rate": 3.356363636363636e-07,
"log_odds_chosen": 0.7746042013168335,
"log_odds_ratio": -0.5306139588356018,
"logits/chosen": 2.318974018096924,
"logits/rejected": 2.381847858428955,
"logps/chosen": -0.2922042906284332,
"logps/rejected": -0.6194152235984802,
"loss": 1.0591,
"nll_loss": 1.006041169166565,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.02922043204307556,
"rewards/margins": 0.032721079885959625,
"rewards/rejected": -0.061941519379615784,
"step": 555
},
{
"epoch": 0.7763401109057301,
"grad_norm": 1.5632991790771484,
"learning_rate": 3.347272727272727e-07,
"log_odds_chosen": 0.9328292608261108,
"log_odds_ratio": -0.4911152124404907,
"logits/chosen": 2.3930165767669678,
"logits/rejected": 2.4501211643218994,
"logps/chosen": -0.33901265263557434,
"logps/rejected": -0.71490877866745,
"loss": 1.1406,
"nll_loss": 1.091480016708374,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.033901263028383255,
"rewards/margins": 0.03758960962295532,
"rewards/rejected": -0.07149087637662888,
"step": 560
},
{
"epoch": 0.783271719038817,
"grad_norm": 1.698624849319458,
"learning_rate": 3.338181818181818e-07,
"log_odds_chosen": 0.8142465949058533,
"log_odds_ratio": -0.48792019486427307,
"logits/chosen": 2.2310545444488525,
"logits/rejected": 2.270510673522949,
"logps/chosen": -0.2853752076625824,
"logps/rejected": -0.5743341445922852,
"loss": 1.0228,
"nll_loss": 0.9739974737167358,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.02853752300143242,
"rewards/margins": 0.028895895928144455,
"rewards/rejected": -0.057433418929576874,
"step": 565
},
{
"epoch": 0.7902033271719039,
"grad_norm": 1.6501067876815796,
"learning_rate": 3.329090909090909e-07,
"log_odds_chosen": 0.8794564008712769,
"log_odds_ratio": -0.47009655833244324,
"logits/chosen": 2.3232598304748535,
"logits/rejected": 2.363266944885254,
"logps/chosen": -0.33390435576438904,
"logps/rejected": -0.6623743772506714,
"loss": 1.1093,
"nll_loss": 1.0623211860656738,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.033390436321496964,
"rewards/margins": 0.032846998423337936,
"rewards/rejected": -0.0662374347448349,
"step": 570
},
{
"epoch": 0.7971349353049908,
"grad_norm": 1.9385813474655151,
"learning_rate": 3.3199999999999996e-07,
"log_odds_chosen": 0.8120476007461548,
"log_odds_ratio": -0.4858551323413849,
"logits/chosen": 2.2998452186584473,
"logits/rejected": 2.329951524734497,
"logps/chosen": -0.2809382975101471,
"logps/rejected": -0.5397506952285767,
"loss": 1.0673,
"nll_loss": 1.018762230873108,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.02809382788836956,
"rewards/margins": 0.025881236419081688,
"rewards/rejected": -0.05397506803274155,
"step": 575
},
{
"epoch": 0.8040665434380776,
"grad_norm": 1.647004246711731,
"learning_rate": 3.310909090909091e-07,
"log_odds_chosen": 0.7569546103477478,
"log_odds_ratio": -0.5495377779006958,
"logits/chosen": 2.2851226329803467,
"logits/rejected": 2.3207554817199707,
"logps/chosen": -0.3422669768333435,
"logps/rejected": -0.6526975035667419,
"loss": 1.1354,
"nll_loss": 1.0804812908172607,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.03422669693827629,
"rewards/margins": 0.03104304149746895,
"rewards/rejected": -0.06526973843574524,
"step": 580
},
{
"epoch": 0.8109981515711645,
"grad_norm": 1.3443453311920166,
"learning_rate": 3.3018181818181815e-07,
"log_odds_chosen": 0.7256454825401306,
"log_odds_ratio": -0.5355305075645447,
"logits/chosen": 2.308670997619629,
"logits/rejected": 2.348257541656494,
"logps/chosen": -0.3142472207546234,
"logps/rejected": -0.6030288338661194,
"loss": 1.1232,
"nll_loss": 1.069667935371399,
"rewards/accuracies": 0.7083333134651184,
"rewards/chosen": -0.03142471984028816,
"rewards/margins": 0.028878165408968925,
"rewards/rejected": -0.06030288711190224,
"step": 585
},
{
"epoch": 0.8179297597042514,
"grad_norm": 1.7293723821640015,
"learning_rate": 3.2927272727272727e-07,
"log_odds_chosen": 0.7784165740013123,
"log_odds_ratio": -0.5222859978675842,
"logits/chosen": 2.2894599437713623,
"logits/rejected": 2.3367385864257812,
"logps/chosen": -0.29684463143348694,
"logps/rejected": -0.5837019085884094,
"loss": 1.0918,
"nll_loss": 1.0395236015319824,
"rewards/accuracies": 0.7416666746139526,
"rewards/chosen": -0.029684465378522873,
"rewards/margins": 0.02868572250008583,
"rewards/rejected": -0.058370187878608704,
"step": 590
},
{
"epoch": 0.8248613678373382,
"grad_norm": 1.8093395233154297,
"learning_rate": 3.2836363636363634e-07,
"log_odds_chosen": 0.8844853043556213,
"log_odds_ratio": -0.4726443290710449,
"logits/chosen": 2.2617225646972656,
"logits/rejected": 2.3025357723236084,
"logps/chosen": -0.2791774868965149,
"logps/rejected": -0.6056706309318542,
"loss": 1.0658,
"nll_loss": 1.0185186862945557,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.02791774831712246,
"rewards/margins": 0.03264930844306946,
"rewards/rejected": -0.06056705862283707,
"step": 595
},
{
"epoch": 0.8317929759704251,
"grad_norm": 1.8343292474746704,
"learning_rate": 3.274545454545454e-07,
"log_odds_chosen": 0.9979297518730164,
"log_odds_ratio": -0.44733384251594543,
"logits/chosen": 2.2689735889434814,
"logits/rejected": 2.30771803855896,
"logps/chosen": -0.29521337151527405,
"logps/rejected": -0.6864122748374939,
"loss": 1.0462,
"nll_loss": 1.0014839172363281,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.029521334916353226,
"rewards/margins": 0.039119891822338104,
"rewards/rejected": -0.06864122301340103,
"step": 600
},
{
"epoch": 0.838724584103512,
"grad_norm": 1.5383275747299194,
"learning_rate": 3.2654545454545454e-07,
"log_odds_chosen": 1.0520718097686768,
"log_odds_ratio": -0.4258045554161072,
"logits/chosen": 2.2323710918426514,
"logits/rejected": 2.269280195236206,
"logps/chosen": -0.2652204632759094,
"logps/rejected": -0.6579357981681824,
"loss": 1.0456,
"nll_loss": 1.0029722452163696,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.026522040367126465,
"rewards/margins": 0.03927153721451759,
"rewards/rejected": -0.06579358130693436,
"step": 605
},
{
"epoch": 0.8456561922365989,
"grad_norm": 1.7920851707458496,
"learning_rate": 3.256363636363636e-07,
"log_odds_chosen": 0.9857064485549927,
"log_odds_ratio": -0.4479914903640747,
"logits/chosen": 2.2189059257507324,
"logits/rejected": 2.2568318843841553,
"logps/chosen": -0.27704155445098877,
"logps/rejected": -0.6286638975143433,
"loss": 1.0787,
"nll_loss": 1.0338690280914307,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.027704155072569847,
"rewards/margins": 0.03516223281621933,
"rewards/rejected": -0.06286638975143433,
"step": 610
},
{
"epoch": 0.8525878003696857,
"grad_norm": 1.4790568351745605,
"learning_rate": 3.247272727272727e-07,
"log_odds_chosen": 0.841203510761261,
"log_odds_ratio": -0.47351616621017456,
"logits/chosen": 2.2627458572387695,
"logits/rejected": 2.304532051086426,
"logps/chosen": -0.30810025334358215,
"logps/rejected": -0.6228964328765869,
"loss": 1.0887,
"nll_loss": 1.041351079940796,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.030810019001364708,
"rewards/margins": 0.031479619443416595,
"rewards/rejected": -0.06228964403271675,
"step": 615
},
{
"epoch": 0.8595194085027726,
"grad_norm": 1.640016794204712,
"learning_rate": 3.238181818181818e-07,
"log_odds_chosen": 0.8392209410667419,
"log_odds_ratio": -0.47920557856559753,
"logits/chosen": 2.241325855255127,
"logits/rejected": 2.3104820251464844,
"logps/chosen": -0.24823182821273804,
"logps/rejected": -0.5455386638641357,
"loss": 0.9989,
"nll_loss": 0.950990617275238,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.024823185056447983,
"rewards/margins": 0.029730679467320442,
"rewards/rejected": -0.054553862661123276,
"step": 620
},
{
"epoch": 0.8664510166358595,
"grad_norm": 1.6642489433288574,
"learning_rate": 3.229090909090909e-07,
"log_odds_chosen": 0.8546110987663269,
"log_odds_ratio": -0.5026638507843018,
"logits/chosen": 2.2081406116485596,
"logits/rejected": 2.243947744369507,
"logps/chosen": -0.29789215326309204,
"logps/rejected": -0.630996584892273,
"loss": 1.0533,
"nll_loss": 1.0030620098114014,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.029789213091135025,
"rewards/margins": 0.03331044688820839,
"rewards/rejected": -0.06309965997934341,
"step": 625
},
{
"epoch": 0.8733826247689463,
"grad_norm": 1.5830177068710327,
"learning_rate": 3.22e-07,
"log_odds_chosen": 0.8428265452384949,
"log_odds_ratio": -0.5204115509986877,
"logits/chosen": 2.2851126194000244,
"logits/rejected": 2.326284646987915,
"logps/chosen": -0.3049141466617584,
"logps/rejected": -0.6031973361968994,
"loss": 1.0701,
"nll_loss": 1.018021821975708,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.030491415411233902,
"rewards/margins": 0.029828311875462532,
"rewards/rejected": -0.060319721698760986,
"step": 630
},
{
"epoch": 0.8803142329020333,
"grad_norm": 1.658988118171692,
"learning_rate": 3.2109090909090907e-07,
"log_odds_chosen": 0.9268123507499695,
"log_odds_ratio": -0.5015169978141785,
"logits/chosen": 2.2516047954559326,
"logits/rejected": 2.307269811630249,
"logps/chosen": -0.2988060712814331,
"logps/rejected": -0.6871820092201233,
"loss": 1.0584,
"nll_loss": 1.0082836151123047,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.029880603775382042,
"rewards/margins": 0.03883758932352066,
"rewards/rejected": -0.06871819496154785,
"step": 635
},
{
"epoch": 0.8872458410351202,
"grad_norm": 3.0365874767303467,
"learning_rate": 3.2018181818181814e-07,
"log_odds_chosen": 0.8886032700538635,
"log_odds_ratio": -0.4523109197616577,
"logits/chosen": 2.2199485301971436,
"logits/rejected": 2.2670233249664307,
"logps/chosen": -0.25624144077301025,
"logps/rejected": -0.5575699210166931,
"loss": 1.0108,
"nll_loss": 0.965610146522522,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.025624146685004234,
"rewards/margins": 0.030132848769426346,
"rewards/rejected": -0.05575699731707573,
"step": 640
},
{
"epoch": 0.8941774491682071,
"grad_norm": 1.6926220655441284,
"learning_rate": 3.1927272727272726e-07,
"log_odds_chosen": 0.9891830682754517,
"log_odds_ratio": -0.4355195462703705,
"logits/chosen": 2.2216622829437256,
"logits/rejected": 2.275890350341797,
"logps/chosen": -0.276635080575943,
"logps/rejected": -0.6514686942100525,
"loss": 1.0596,
"nll_loss": 1.0160428285598755,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.02766350656747818,
"rewards/margins": 0.03748335689306259,
"rewards/rejected": -0.06514687836170197,
"step": 645
},
{
"epoch": 0.9011090573012939,
"grad_norm": 1.4911339282989502,
"learning_rate": 3.1836363636363633e-07,
"log_odds_chosen": 0.9020595550537109,
"log_odds_ratio": -0.48418372869491577,
"logits/chosen": 2.2785911560058594,
"logits/rejected": 2.312934160232544,
"logps/chosen": -0.31726518273353577,
"logps/rejected": -0.6841873526573181,
"loss": 1.05,
"nll_loss": 1.0015556812286377,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.031726520508527756,
"rewards/margins": 0.036692213267087936,
"rewards/rejected": -0.06841873377561569,
"step": 650
},
{
"epoch": 0.9080406654343808,
"grad_norm": 2.0379440784454346,
"learning_rate": 3.174545454545454e-07,
"log_odds_chosen": 1.1423557996749878,
"log_odds_ratio": -0.4178314507007599,
"logits/chosen": 2.3556437492370605,
"logits/rejected": 2.4005048274993896,
"logps/chosen": -0.2667427659034729,
"logps/rejected": -0.742853581905365,
"loss": 1.0941,
"nll_loss": 1.052323818206787,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.02667427621781826,
"rewards/margins": 0.04761108011007309,
"rewards/rejected": -0.0742853581905365,
"step": 655
},
{
"epoch": 0.9149722735674677,
"grad_norm": 1.3288828134536743,
"learning_rate": 3.1654545454545453e-07,
"log_odds_chosen": 0.9340334534645081,
"log_odds_ratio": -0.48560333251953125,
"logits/chosen": 2.2910754680633545,
"logits/rejected": 2.3387868404388428,
"logps/chosen": -0.3347066640853882,
"logps/rejected": -0.6973811388015747,
"loss": 1.0676,
"nll_loss": 1.0190544128417969,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.033470671623945236,
"rewards/margins": 0.036267444491386414,
"rewards/rejected": -0.06973811984062195,
"step": 660
},
{
"epoch": 0.9219038817005545,
"grad_norm": 1.6985810995101929,
"learning_rate": 3.156363636363636e-07,
"log_odds_chosen": 0.9894071221351624,
"log_odds_ratio": -0.4461503326892853,
"logits/chosen": 2.2392578125,
"logits/rejected": 2.283933162689209,
"logps/chosen": -0.2434028834104538,
"logps/rejected": -0.5999926328659058,
"loss": 1.0296,
"nll_loss": 0.9849395751953125,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.02434029057621956,
"rewards/margins": 0.03565897420048714,
"rewards/rejected": -0.059999268501996994,
"step": 665
},
{
"epoch": 0.9288354898336414,
"grad_norm": 1.6689305305480957,
"learning_rate": 3.147272727272727e-07,
"log_odds_chosen": 0.7809682488441467,
"log_odds_ratio": -0.5143331289291382,
"logits/chosen": 2.245195150375366,
"logits/rejected": 2.2930848598480225,
"logps/chosen": -0.313152015209198,
"logps/rejected": -0.590879499912262,
"loss": 1.0569,
"nll_loss": 1.0054324865341187,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.03131520375609398,
"rewards/margins": 0.027772750705480576,
"rewards/rejected": -0.059087950736284256,
"step": 670
},
{
"epoch": 0.9357670979667283,
"grad_norm": 1.883654236793518,
"learning_rate": 3.138181818181818e-07,
"log_odds_chosen": 0.8892870545387268,
"log_odds_ratio": -0.4877478778362274,
"logits/chosen": 2.2379813194274902,
"logits/rejected": 2.3097243309020996,
"logps/chosen": -0.24917837977409363,
"logps/rejected": -0.5756833553314209,
"loss": 1.0534,
"nll_loss": 1.0045883655548096,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.024917839094996452,
"rewards/margins": 0.03265049681067467,
"rewards/rejected": -0.05756833776831627,
"step": 675
},
{
"epoch": 0.9426987060998152,
"grad_norm": 2.1263558864593506,
"learning_rate": 3.1290909090909086e-07,
"log_odds_chosen": 0.9385001063346863,
"log_odds_ratio": -0.485267698764801,
"logits/chosen": 2.2864134311676025,
"logits/rejected": 2.3468661308288574,
"logps/chosen": -0.3098216950893402,
"logps/rejected": -0.6490688323974609,
"loss": 1.0503,
"nll_loss": 1.001733660697937,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.03098217211663723,
"rewards/margins": 0.033924710005521774,
"rewards/rejected": -0.06490688025951385,
"step": 680
},
{
"epoch": 0.949630314232902,
"grad_norm": 1.8792967796325684,
"learning_rate": 3.12e-07,
"log_odds_chosen": 0.6588108539581299,
"log_odds_ratio": -0.5568161010742188,
"logits/chosen": 2.2107622623443604,
"logits/rejected": 2.2423746585845947,
"logps/chosen": -0.3359021544456482,
"logps/rejected": -0.5799560546875,
"loss": 1.0822,
"nll_loss": 1.0264886617660522,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.03359021618962288,
"rewards/margins": 0.024405384436249733,
"rewards/rejected": -0.05799560621380806,
"step": 685
},
{
"epoch": 0.9565619223659889,
"grad_norm": 1.4165992736816406,
"learning_rate": 3.1109090909090906e-07,
"log_odds_chosen": 0.9096938371658325,
"log_odds_ratio": -0.4722006916999817,
"logits/chosen": 2.235501527786255,
"logits/rejected": 2.2762210369110107,
"logps/chosen": -0.29770374298095703,
"logps/rejected": -0.6392419934272766,
"loss": 1.0716,
"nll_loss": 1.024424433708191,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.0297703817486763,
"rewards/margins": 0.03415382280945778,
"rewards/rejected": -0.06392420083284378,
"step": 690
},
{
"epoch": 0.9634935304990758,
"grad_norm": 2.6423442363739014,
"learning_rate": 3.101818181818182e-07,
"log_odds_chosen": 0.8946338295936584,
"log_odds_ratio": -0.4573117196559906,
"logits/chosen": 2.3018126487731934,
"logits/rejected": 2.3378913402557373,
"logps/chosen": -0.2610825002193451,
"logps/rejected": -0.5758386850357056,
"loss": 1.0243,
"nll_loss": 0.9785677790641785,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.026108253747224808,
"rewards/margins": 0.03147561475634575,
"rewards/rejected": -0.05758386105298996,
"step": 695
},
{
"epoch": 0.9704251386321626,
"grad_norm": 1.6327714920043945,
"learning_rate": 3.0927272727272725e-07,
"log_odds_chosen": 0.8046802878379822,
"log_odds_ratio": -0.5016317367553711,
"logits/chosen": 2.166358470916748,
"logits/rejected": 2.2131805419921875,
"logps/chosen": -0.27325597405433655,
"logps/rejected": -0.559944748878479,
"loss": 1.0823,
"nll_loss": 1.0321154594421387,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.027325598523020744,
"rewards/margins": 0.028668878600001335,
"rewards/rejected": -0.05599447339773178,
"step": 700
},
{
"epoch": 0.9773567467652495,
"grad_norm": 1.5271565914154053,
"learning_rate": 3.083636363636363e-07,
"log_odds_chosen": 0.9641692638397217,
"log_odds_ratio": -0.4276140630245209,
"logits/chosen": 2.2330405712127686,
"logits/rejected": 2.275627374649048,
"logps/chosen": -0.2772447168827057,
"logps/rejected": -0.6339874267578125,
"loss": 1.0827,
"nll_loss": 1.039945125579834,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.027724474668502808,
"rewards/margins": 0.03567427024245262,
"rewards/rejected": -0.06339874863624573,
"step": 705
},
{
"epoch": 0.9842883548983364,
"grad_norm": 1.9477697610855103,
"learning_rate": 3.0745454545454545e-07,
"log_odds_chosen": 0.9072960019111633,
"log_odds_ratio": -0.49298056960105896,
"logits/chosen": 2.2244527339935303,
"logits/rejected": 2.2482240200042725,
"logps/chosen": -0.274914413690567,
"logps/rejected": -0.6292871832847595,
"loss": 1.0557,
"nll_loss": 1.0063644647598267,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.02749144285917282,
"rewards/margins": 0.03543727472424507,
"rewards/rejected": -0.0629287138581276,
"step": 710
},
{
"epoch": 0.9912199630314233,
"grad_norm": 14.025252342224121,
"learning_rate": 3.065454545454545e-07,
"log_odds_chosen": 0.9216189980506897,
"log_odds_ratio": -0.5028970837593079,
"logits/chosen": 2.2144951820373535,
"logits/rejected": 2.24345326423645,
"logps/chosen": -0.281184583902359,
"logps/rejected": -0.6528930068016052,
"loss": 1.0402,
"nll_loss": 0.9899436831474304,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.02811845950782299,
"rewards/margins": 0.03717083856463432,
"rewards/rejected": -0.06528931111097336,
"step": 715
},
{
"epoch": 0.9981515711645101,
"grad_norm": 1.4665298461914062,
"learning_rate": 3.056363636363636e-07,
"log_odds_chosen": 1.1137733459472656,
"log_odds_ratio": -0.4105357825756073,
"logits/chosen": 2.2789127826690674,
"logits/rejected": 2.3198835849761963,
"logps/chosen": -0.2862909138202667,
"logps/rejected": -0.725393533706665,
"loss": 1.0414,
"nll_loss": 1.0003019571304321,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.028629092499613762,
"rewards/margins": 0.04391026496887207,
"rewards/rejected": -0.07253936678171158,
"step": 720
},
{
"epoch": 1.0041589648798521,
"grad_norm": 2.1046509742736816,
"learning_rate": 3.047272727272727e-07,
"log_odds_chosen": 0.8841580152511597,
"log_odds_ratio": -0.5193544626235962,
"logits/chosen": 2.157961368560791,
"logits/rejected": 2.214885711669922,
"logps/chosen": -0.31194257736206055,
"logps/rejected": -0.6325222849845886,
"loss": 0.9397,
"nll_loss": 1.0297856330871582,
"rewards/accuracies": 0.6891025900840759,
"rewards/chosen": -0.031194258481264114,
"rewards/margins": 0.03205796703696251,
"rewards/rejected": -0.06325222551822662,
"step": 725
},
{
"epoch": 1.011090573012939,
"grad_norm": 11.973499298095703,
"learning_rate": 3.038181818181818e-07,
"log_odds_chosen": 1.0592764616012573,
"log_odds_ratio": -0.42826417088508606,
"logits/chosen": 2.2182023525238037,
"logits/rejected": 2.259822368621826,
"logps/chosen": -0.27322566509246826,
"logps/rejected": -0.6173789501190186,
"loss": 1.0566,
"nll_loss": 1.0138195753097534,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.027322567999362946,
"rewards/margins": 0.03441532701253891,
"rewards/rejected": -0.061737895011901855,
"step": 730
},
{
"epoch": 1.018022181146026,
"grad_norm": 1.6032253503799438,
"learning_rate": 3.029090909090909e-07,
"log_odds_chosen": 1.0015116930007935,
"log_odds_ratio": -0.45987558364868164,
"logits/chosen": 2.2573957443237305,
"logits/rejected": 2.302398920059204,
"logps/chosen": -0.29264530539512634,
"logps/rejected": -0.7004638910293579,
"loss": 1.0733,
"nll_loss": 1.0272737741470337,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.029264533892273903,
"rewards/margins": 0.04078185185790062,
"rewards/rejected": -0.07004638761281967,
"step": 735
},
{
"epoch": 1.0249537892791127,
"grad_norm": 1.9156720638275146,
"learning_rate": 3.02e-07,
"log_odds_chosen": 0.9316055178642273,
"log_odds_ratio": -0.49356502294540405,
"logits/chosen": 2.2086682319641113,
"logits/rejected": 2.247258424758911,
"logps/chosen": -0.2785702645778656,
"logps/rejected": -0.6358006000518799,
"loss": 1.0579,
"nll_loss": 1.0085234642028809,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.02785702608525753,
"rewards/margins": 0.035723041743040085,
"rewards/rejected": -0.06358006596565247,
"step": 740
},
{
"epoch": 1.0318853974121995,
"grad_norm": 1.8676711320877075,
"learning_rate": 3.0109090909090905e-07,
"log_odds_chosen": 1.0930769443511963,
"log_odds_ratio": -0.41506868600845337,
"logits/chosen": 2.1967382431030273,
"logits/rejected": 2.2548134326934814,
"logps/chosen": -0.2859395146369934,
"logps/rejected": -0.6978200078010559,
"loss": 1.0064,
"nll_loss": 0.964898407459259,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.02859395369887352,
"rewards/margins": 0.04118805751204491,
"rewards/rejected": -0.06978200376033783,
"step": 745
},
{
"epoch": 1.0388170055452866,
"grad_norm": 1.7424761056900024,
"learning_rate": 3.0018181818181817e-07,
"log_odds_chosen": 1.030094027519226,
"log_odds_ratio": -0.45161333680152893,
"logits/chosen": 2.171247959136963,
"logits/rejected": 2.210944652557373,
"logps/chosen": -0.2976371645927429,
"logps/rejected": -0.6693560481071472,
"loss": 1.0341,
"nll_loss": 0.9889503717422485,
"rewards/accuracies": 0.7583333253860474,
"rewards/chosen": -0.02976371720433235,
"rewards/margins": 0.03717188537120819,
"rewards/rejected": -0.06693560630083084,
"step": 750
},
{
"epoch": 1.0457486136783734,
"grad_norm": 1.6317884922027588,
"learning_rate": 2.9927272727272724e-07,
"log_odds_chosen": 1.2374706268310547,
"log_odds_ratio": -0.3697361350059509,
"logits/chosen": 2.271786689758301,
"logits/rejected": 2.3244924545288086,
"logps/chosen": -0.27134397625923157,
"logps/rejected": -0.7432472109794617,
"loss": 1.0338,
"nll_loss": 0.996829628944397,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.02713439241051674,
"rewards/margins": 0.04719032719731331,
"rewards/rejected": -0.07432472705841064,
"step": 755
},
{
"epoch": 1.0526802218114604,
"grad_norm": 1.9266161918640137,
"learning_rate": 2.983636363636363e-07,
"log_odds_chosen": 1.1042684316635132,
"log_odds_ratio": -0.42497718334198,
"logits/chosen": 2.178818464279175,
"logits/rejected": 2.243793487548828,
"logps/chosen": -0.23215913772583008,
"logps/rejected": -0.6292255520820618,
"loss": 1.0091,
"nll_loss": 0.9665910601615906,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.023215916007757187,
"rewards/margins": 0.03970663994550705,
"rewards/rejected": -0.06292255967855453,
"step": 760
},
{
"epoch": 1.0596118299445472,
"grad_norm": 2.3465418815612793,
"learning_rate": 2.9745454545454544e-07,
"log_odds_chosen": 1.1358228921890259,
"log_odds_ratio": -0.43412578105926514,
"logits/chosen": 2.157498598098755,
"logits/rejected": 2.2107603549957275,
"logps/chosen": -0.29064181447029114,
"logps/rejected": -0.6974590420722961,
"loss": 1.0562,
"nll_loss": 1.0127959251403809,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.029064182192087173,
"rewards/margins": 0.04068171977996826,
"rewards/rejected": -0.06974589824676514,
"step": 765
},
{
"epoch": 1.066543438077634,
"grad_norm": 1.337546944618225,
"learning_rate": 2.965454545454545e-07,
"log_odds_chosen": 1.1235884428024292,
"log_odds_ratio": -0.4154122769832611,
"logits/chosen": 2.1831068992614746,
"logits/rejected": 2.230762243270874,
"logps/chosen": -0.25542908906936646,
"logps/rejected": -0.6585070490837097,
"loss": 1.0557,
"nll_loss": 1.0141483545303345,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.025542909279465675,
"rewards/margins": 0.04030779376626015,
"rewards/rejected": -0.06585069000720978,
"step": 770
},
{
"epoch": 1.073475046210721,
"grad_norm": 1.92345130443573,
"learning_rate": 2.9563636363636363e-07,
"log_odds_chosen": 1.2442071437835693,
"log_odds_ratio": -0.3937591016292572,
"logits/chosen": 2.1876795291900635,
"logits/rejected": 2.237107515335083,
"logps/chosen": -0.2693581283092499,
"logps/rejected": -0.7542040944099426,
"loss": 1.0156,
"nll_loss": 0.9762417078018188,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.026935815811157227,
"rewards/margins": 0.04848460480570793,
"rewards/rejected": -0.07542040199041367,
"step": 775
},
{
"epoch": 1.0804066543438078,
"grad_norm": 1.3390204906463623,
"learning_rate": 2.947272727272727e-07,
"log_odds_chosen": 1.0398173332214355,
"log_odds_ratio": -0.429106205701828,
"logits/chosen": 2.1649744510650635,
"logits/rejected": 2.2123680114746094,
"logps/chosen": -0.27507466077804565,
"logps/rejected": -0.6559757590293884,
"loss": 1.0672,
"nll_loss": 1.0242794752120972,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.027507467195391655,
"rewards/margins": 0.038090117275714874,
"rewards/rejected": -0.06559757888317108,
"step": 780
},
{
"epoch": 1.0873382624768946,
"grad_norm": 1.570081114768982,
"learning_rate": 2.9381818181818177e-07,
"log_odds_chosen": 0.9334086775779724,
"log_odds_ratio": -0.4862407147884369,
"logits/chosen": 2.119239330291748,
"logits/rejected": 2.1761107444763184,
"logps/chosen": -0.25805288553237915,
"logps/rejected": -0.6043078303337097,
"loss": 1.0514,
"nll_loss": 1.002801537513733,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.025805287063121796,
"rewards/margins": 0.034625496715307236,
"rewards/rejected": -0.06043078005313873,
"step": 785
},
{
"epoch": 1.0942698706099816,
"grad_norm": 2.047260284423828,
"learning_rate": 2.929090909090909e-07,
"log_odds_chosen": 1.1341291666030884,
"log_odds_ratio": -0.4764944314956665,
"logits/chosen": 2.219109296798706,
"logits/rejected": 2.278170347213745,
"logps/chosen": -0.3056103587150574,
"logps/rejected": -0.7312763333320618,
"loss": 1.0227,
"nll_loss": 0.9750102162361145,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.030561033636331558,
"rewards/margins": 0.04256659746170044,
"rewards/rejected": -0.0731276348233223,
"step": 790
},
{
"epoch": 1.1012014787430684,
"grad_norm": 1.683436393737793,
"learning_rate": 2.9199999999999997e-07,
"log_odds_chosen": 1.1937026977539062,
"log_odds_ratio": -0.41498705744743347,
"logits/chosen": 2.276967763900757,
"logits/rejected": 2.3187286853790283,
"logps/chosen": -0.2915228605270386,
"logps/rejected": -0.7586190700531006,
"loss": 1.0585,
"nll_loss": 1.0169990062713623,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.0291522815823555,
"rewards/margins": 0.04670962318778038,
"rewards/rejected": -0.07586190849542618,
"step": 795
},
{
"epoch": 1.1081330868761552,
"grad_norm": 1.2493406534194946,
"learning_rate": 2.910909090909091e-07,
"log_odds_chosen": 1.2343627214431763,
"log_odds_ratio": -0.3899300992488861,
"logits/chosen": 2.1663858890533447,
"logits/rejected": 2.2325944900512695,
"logps/chosen": -0.2753217816352844,
"logps/rejected": -0.7316860556602478,
"loss": 1.0388,
"nll_loss": 0.999790370464325,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02753218077123165,
"rewards/margins": 0.04563641548156738,
"rewards/rejected": -0.07316859811544418,
"step": 800
},
{
"epoch": 1.1150646950092422,
"grad_norm": 1.5523159503936768,
"learning_rate": 2.9018181818181816e-07,
"log_odds_chosen": 1.0665152072906494,
"log_odds_ratio": -0.45254915952682495,
"logits/chosen": 2.245769739151001,
"logits/rejected": 2.2844347953796387,
"logps/chosen": -0.31260156631469727,
"logps/rejected": -0.7254413366317749,
"loss": 1.0384,
"nll_loss": 0.9930953979492188,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.031260158866643906,
"rewards/margins": 0.041283976286649704,
"rewards/rejected": -0.07254412770271301,
"step": 805
},
{
"epoch": 1.121996303142329,
"grad_norm": 2.6294355392456055,
"learning_rate": 2.8927272727272723e-07,
"log_odds_chosen": 1.2369199991226196,
"log_odds_ratio": -0.42075902223587036,
"logits/chosen": 2.0666658878326416,
"logits/rejected": 2.124197244644165,
"logps/chosen": -0.2580902874469757,
"logps/rejected": -0.705311119556427,
"loss": 1.0688,
"nll_loss": 1.0266811847686768,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.02580902725458145,
"rewards/margins": 0.04472209885716438,
"rewards/rejected": -0.07053112238645554,
"step": 810
},
{
"epoch": 1.1289279112754158,
"grad_norm": 2.0441761016845703,
"learning_rate": 2.8836363636363636e-07,
"log_odds_chosen": 1.0299813747406006,
"log_odds_ratio": -0.48719069361686707,
"logits/chosen": 2.1046738624572754,
"logits/rejected": 2.1503727436065674,
"logps/chosen": -0.2982068955898285,
"logps/rejected": -0.6966476440429688,
"loss": 1.0879,
"nll_loss": 1.0392258167266846,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.029820691794157028,
"rewards/margins": 0.039844077080488205,
"rewards/rejected": -0.06966476142406464,
"step": 815
},
{
"epoch": 1.1358595194085028,
"grad_norm": 1.5556236505508423,
"learning_rate": 2.8745454545454543e-07,
"log_odds_chosen": 1.2123700380325317,
"log_odds_ratio": -0.4055812954902649,
"logits/chosen": 2.209066867828369,
"logits/rejected": 2.2755773067474365,
"logps/chosen": -0.28011006116867065,
"logps/rejected": -0.7493889331817627,
"loss": 1.0693,
"nll_loss": 1.0287730693817139,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.028011005371809006,
"rewards/margins": 0.046927884221076965,
"rewards/rejected": -0.07493889331817627,
"step": 820
},
{
"epoch": 1.1427911275415896,
"grad_norm": 1.8499306440353394,
"learning_rate": 2.865454545454545e-07,
"log_odds_chosen": 1.2569739818572998,
"log_odds_ratio": -0.4136119782924652,
"logits/chosen": 2.1816537380218506,
"logits/rejected": 2.23928165435791,
"logps/chosen": -0.2742965817451477,
"logps/rejected": -0.7915823459625244,
"loss": 1.0642,
"nll_loss": 1.0228677988052368,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.02742965891957283,
"rewards/margins": 0.051728587597608566,
"rewards/rejected": -0.0791582390666008,
"step": 825
},
{
"epoch": 1.1497227356746764,
"grad_norm": 1.7029317617416382,
"learning_rate": 2.856363636363636e-07,
"log_odds_chosen": 1.1728360652923584,
"log_odds_ratio": -0.4236757159233093,
"logits/chosen": 2.178098201751709,
"logits/rejected": 2.2561495304107666,
"logps/chosen": -0.26117414236068726,
"logps/rejected": -0.7411549091339111,
"loss": 1.0474,
"nll_loss": 1.0050232410430908,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.026117417961359024,
"rewards/margins": 0.04799807071685791,
"rewards/rejected": -0.07411548495292664,
"step": 830
},
{
"epoch": 1.1566543438077634,
"grad_norm": 2.336233615875244,
"learning_rate": 2.847272727272727e-07,
"log_odds_chosen": 1.2286179065704346,
"log_odds_ratio": -0.41043874621391296,
"logits/chosen": 2.171309232711792,
"logits/rejected": 2.2093799114227295,
"logps/chosen": -0.2653755843639374,
"logps/rejected": -0.7861889600753784,
"loss": 1.062,
"nll_loss": 1.0209335088729858,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.026537559926509857,
"rewards/margins": 0.052081331610679626,
"rewards/rejected": -0.07861888408660889,
"step": 835
},
{
"epoch": 1.1635859519408502,
"grad_norm": 1.7186654806137085,
"learning_rate": 2.838181818181818e-07,
"log_odds_chosen": 1.1606539487838745,
"log_odds_ratio": -0.40123751759529114,
"logits/chosen": 2.206134796142578,
"logits/rejected": 2.2637779712677,
"logps/chosen": -0.2596302628517151,
"logps/rejected": -0.7078793048858643,
"loss": 1.0382,
"nll_loss": 0.9980748891830444,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.02596302516758442,
"rewards/margins": 0.04482491686940193,
"rewards/rejected": -0.0707879364490509,
"step": 840
},
{
"epoch": 1.1705175600739373,
"grad_norm": 2.3376824855804443,
"learning_rate": 2.829090909090909e-07,
"log_odds_chosen": 0.9114227890968323,
"log_odds_ratio": -0.510475754737854,
"logits/chosen": 2.1974329948425293,
"logits/rejected": 2.257779359817505,
"logps/chosen": -0.3180769979953766,
"logps/rejected": -0.6727191805839539,
"loss": 1.056,
"nll_loss": 1.0049461126327515,
"rewards/accuracies": 0.7333333492279053,
"rewards/chosen": -0.03180769830942154,
"rewards/margins": 0.035464223474264145,
"rewards/rejected": -0.06727192550897598,
"step": 845
},
{
"epoch": 1.177449168207024,
"grad_norm": 1.9338550567626953,
"learning_rate": 2.8199999999999996e-07,
"log_odds_chosen": 1.1852858066558838,
"log_odds_ratio": -0.40160685777664185,
"logits/chosen": 2.1158764362335205,
"logits/rejected": 2.1807785034179688,
"logps/chosen": -0.3050893247127533,
"logps/rejected": -0.7781674861907959,
"loss": 1.023,
"nll_loss": 0.9828112125396729,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.030508937314152718,
"rewards/margins": 0.047307804226875305,
"rewards/rejected": -0.07781673967838287,
"step": 850
},
{
"epoch": 1.1843807763401109,
"grad_norm": 32.405643463134766,
"learning_rate": 2.810909090909091e-07,
"log_odds_chosen": 1.2816615104675293,
"log_odds_ratio": -0.38976308703422546,
"logits/chosen": 2.062394142150879,
"logits/rejected": 2.130361557006836,
"logps/chosen": -0.27238449454307556,
"logps/rejected": -0.7614350914955139,
"loss": 1.0719,
"nll_loss": 1.032881498336792,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.027238452807068825,
"rewards/margins": 0.048905063420534134,
"rewards/rejected": -0.07614351063966751,
"step": 855
},
{
"epoch": 1.1913123844731979,
"grad_norm": 1.5311826467514038,
"learning_rate": 2.8018181818181815e-07,
"log_odds_chosen": 1.1707886457443237,
"log_odds_ratio": -0.42907601594924927,
"logits/chosen": 2.1916935443878174,
"logits/rejected": 2.255692481994629,
"logps/chosen": -0.31618261337280273,
"logps/rejected": -0.7682264447212219,
"loss": 1.0343,
"nll_loss": 0.9914371967315674,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.031618259847164154,
"rewards/margins": 0.04520439729094505,
"rewards/rejected": -0.07682265341281891,
"step": 860
},
{
"epoch": 1.1982439926062847,
"grad_norm": 1.3627372980117798,
"learning_rate": 2.792727272727273e-07,
"log_odds_chosen": 1.2053844928741455,
"log_odds_ratio": -0.39541786909103394,
"logits/chosen": 2.108999490737915,
"logits/rejected": 2.1568171977996826,
"logps/chosen": -0.2578326463699341,
"logps/rejected": -0.6993592381477356,
"loss": 1.0029,
"nll_loss": 0.9633194804191589,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.025783265009522438,
"rewards/margins": 0.04415265843272209,
"rewards/rejected": -0.06993592530488968,
"step": 865
},
{
"epoch": 1.2051756007393715,
"grad_norm": 2.1243605613708496,
"learning_rate": 2.7836363636363635e-07,
"log_odds_chosen": 1.1114146709442139,
"log_odds_ratio": -0.44650015234947205,
"logits/chosen": 2.155231237411499,
"logits/rejected": 2.2109177112579346,
"logps/chosen": -0.30871832370758057,
"logps/rejected": -0.7572067379951477,
"loss": 1.0634,
"nll_loss": 1.018787145614624,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.030871832743287086,
"rewards/margins": 0.04484884440898895,
"rewards/rejected": -0.07572067528963089,
"step": 870
},
{
"epoch": 1.2121072088724585,
"grad_norm": 1.8302316665649414,
"learning_rate": 2.774545454545454e-07,
"log_odds_chosen": 1.195847749710083,
"log_odds_ratio": -0.45233553647994995,
"logits/chosen": 2.2495977878570557,
"logits/rejected": 2.3004953861236572,
"logps/chosen": -0.32949963212013245,
"logps/rejected": -0.8411144614219666,
"loss": 1.0578,
"nll_loss": 1.0125887393951416,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03294995799660683,
"rewards/margins": 0.05116148665547371,
"rewards/rejected": -0.08411144465208054,
"step": 875
},
{
"epoch": 1.2190388170055453,
"grad_norm": 1.7341769933700562,
"learning_rate": 2.7654545454545454e-07,
"log_odds_chosen": 1.0370428562164307,
"log_odds_ratio": -0.4735434949398041,
"logits/chosen": 2.110995292663574,
"logits/rejected": 2.166097402572632,
"logps/chosen": -0.321010559797287,
"logps/rejected": -0.7727290391921997,
"loss": 1.0489,
"nll_loss": 1.0015724897384644,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.03210105374455452,
"rewards/margins": 0.045171838253736496,
"rewards/rejected": -0.07727289199829102,
"step": 880
},
{
"epoch": 1.225970425138632,
"grad_norm": 1.5535844564437866,
"learning_rate": 2.756363636363636e-07,
"log_odds_chosen": 1.0898783206939697,
"log_odds_ratio": -0.42453181743621826,
"logits/chosen": 2.1117665767669678,
"logits/rejected": 2.1755316257476807,
"logps/chosen": -0.27768510580062866,
"logps/rejected": -0.6975895166397095,
"loss": 1.0298,
"nll_loss": 0.9873270392417908,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.027768509462475777,
"rewards/margins": 0.04199044778943062,
"rewards/rejected": -0.06975895911455154,
"step": 885
},
{
"epoch": 1.232902033271719,
"grad_norm": 2.520540237426758,
"learning_rate": 2.747272727272727e-07,
"log_odds_chosen": 1.0620572566986084,
"log_odds_ratio": -0.44835466146469116,
"logits/chosen": 2.088442802429199,
"logits/rejected": 2.1342625617980957,
"logps/chosen": -0.31703558564186096,
"logps/rejected": -0.6918619871139526,
"loss": 1.0714,
"nll_loss": 1.0265547037124634,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.031703557819128036,
"rewards/margins": 0.03748263791203499,
"rewards/rejected": -0.06918619573116302,
"step": 890
},
{
"epoch": 1.239833641404806,
"grad_norm": 1.8474420309066772,
"learning_rate": 2.738181818181818e-07,
"log_odds_chosen": 1.2523150444030762,
"log_odds_ratio": -0.38173770904541016,
"logits/chosen": 2.1438181400299072,
"logits/rejected": 2.1918885707855225,
"logps/chosen": -0.2869779169559479,
"logps/rejected": -0.8195567727088928,
"loss": 1.0427,
"nll_loss": 1.004526138305664,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.028697794303297997,
"rewards/margins": 0.05325789004564285,
"rewards/rejected": -0.0819556713104248,
"step": 895
},
{
"epoch": 1.2467652495378927,
"grad_norm": 1.8705800771713257,
"learning_rate": 2.729090909090909e-07,
"log_odds_chosen": 1.3002660274505615,
"log_odds_ratio": -0.3636976182460785,
"logits/chosen": 2.1178719997406006,
"logits/rejected": 2.189141035079956,
"logps/chosen": -0.25350457429885864,
"logps/rejected": -0.7615570425987244,
"loss": 1.0187,
"nll_loss": 0.9823279976844788,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.025350457057356834,
"rewards/margins": 0.05080525204539299,
"rewards/rejected": -0.07615570724010468,
"step": 900
},
{
"epoch": 1.2536968576709797,
"grad_norm": 1.3865669965744019,
"learning_rate": 2.72e-07,
"log_odds_chosen": 1.2055052518844604,
"log_odds_ratio": -0.3909756541252136,
"logits/chosen": 2.1472742557525635,
"logits/rejected": 2.194523572921753,
"logps/chosen": -0.26772162318229675,
"logps/rejected": -0.7685297727584839,
"loss": 1.0522,
"nll_loss": 1.0131094455718994,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.026772161945700645,
"rewards/margins": 0.05008082091808319,
"rewards/rejected": -0.07685296982526779,
"step": 905
},
{
"epoch": 1.2606284658040665,
"grad_norm": 1.4818207025527954,
"learning_rate": 2.7109090909090907e-07,
"log_odds_chosen": 1.338388204574585,
"log_odds_ratio": -0.3422391712665558,
"logits/chosen": 2.08438777923584,
"logits/rejected": 2.1425976753234863,
"logps/chosen": -0.28000956773757935,
"logps/rejected": -0.7683375477790833,
"loss": 1.0637,
"nll_loss": 1.0295100212097168,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.028000956401228905,
"rewards/margins": 0.04883280023932457,
"rewards/rejected": -0.07683374732732773,
"step": 910
},
{
"epoch": 1.2675600739371533,
"grad_norm": 1.9989374876022339,
"learning_rate": 2.7018181818181814e-07,
"log_odds_chosen": 1.1596630811691284,
"log_odds_ratio": -0.46093183755874634,
"logits/chosen": 2.104174852371216,
"logits/rejected": 2.1529417037963867,
"logps/chosen": -0.30684390664100647,
"logps/rejected": -0.7890374660491943,
"loss": 1.0142,
"nll_loss": 0.9680600762367249,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.03068438731133938,
"rewards/margins": 0.048219338059425354,
"rewards/rejected": -0.07890374213457108,
"step": 915
},
{
"epoch": 1.2744916820702403,
"grad_norm": 1.6794359683990479,
"learning_rate": 2.6927272727272727e-07,
"log_odds_chosen": 1.4304643869400024,
"log_odds_ratio": -0.38649657368659973,
"logits/chosen": 2.1374399662017822,
"logits/rejected": 2.1798601150512695,
"logps/chosen": -0.29669511318206787,
"logps/rejected": -0.8554088473320007,
"loss": 1.0687,
"nll_loss": 1.0300294160842896,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.029669513925909996,
"rewards/margins": 0.05587137117981911,
"rewards/rejected": -0.08554088324308395,
"step": 920
},
{
"epoch": 1.2814232902033271,
"grad_norm": 1.981391429901123,
"learning_rate": 2.6836363636363634e-07,
"log_odds_chosen": 1.2685843706130981,
"log_odds_ratio": -0.41906872391700745,
"logits/chosen": 2.2779479026794434,
"logits/rejected": 2.3425650596618652,
"logps/chosen": -0.3056505024433136,
"logps/rejected": -0.8699617981910706,
"loss": 1.0141,
"nll_loss": 0.9721490144729614,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.03056504763662815,
"rewards/margins": 0.056431129574775696,
"rewards/rejected": -0.0869961753487587,
"step": 925
},
{
"epoch": 1.2883548983364141,
"grad_norm": 2.3423352241516113,
"learning_rate": 2.674545454545454e-07,
"log_odds_chosen": 1.137675404548645,
"log_odds_ratio": -0.4286971390247345,
"logits/chosen": 2.1223714351654053,
"logits/rejected": 2.1788086891174316,
"logps/chosen": -0.28890591859817505,
"logps/rejected": -0.7336766719818115,
"loss": 1.0341,
"nll_loss": 0.9912530779838562,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.028890585526823997,
"rewards/margins": 0.04447708651423454,
"rewards/rejected": -0.07336767762899399,
"step": 930
},
{
"epoch": 1.295286506469501,
"grad_norm": 1.893749475479126,
"learning_rate": 2.6654545454545453e-07,
"log_odds_chosen": 1.4332900047302246,
"log_odds_ratio": -0.34831517934799194,
"logits/chosen": 2.1003878116607666,
"logits/rejected": 2.1722400188446045,
"logps/chosen": -0.28711190819740295,
"logps/rejected": -0.8249975442886353,
"loss": 1.0661,
"nll_loss": 1.0313143730163574,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.028711196035146713,
"rewards/margins": 0.05378856882452965,
"rewards/rejected": -0.08249974995851517,
"step": 935
},
{
"epoch": 1.3022181146025877,
"grad_norm": 4.6435160636901855,
"learning_rate": 2.656363636363636e-07,
"log_odds_chosen": 1.082852840423584,
"log_odds_ratio": -0.45315298438072205,
"logits/chosen": 2.1574742794036865,
"logits/rejected": 2.178946018218994,
"logps/chosen": -0.3037134110927582,
"logps/rejected": -0.7448738813400269,
"loss": 1.0704,
"nll_loss": 1.0250810384750366,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.03037133812904358,
"rewards/margins": 0.0441160574555397,
"rewards/rejected": -0.07448740303516388,
"step": 940
},
{
"epoch": 1.3091497227356748,
"grad_norm": 2.1025171279907227,
"learning_rate": 2.647272727272727e-07,
"log_odds_chosen": 1.1002755165100098,
"log_odds_ratio": -0.44053414463996887,
"logits/chosen": 2.106156826019287,
"logits/rejected": 2.155453681945801,
"logps/chosen": -0.2461674064397812,
"logps/rejected": -0.6956557035446167,
"loss": 1.0449,
"nll_loss": 1.0008207559585571,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.024616742506623268,
"rewards/margins": 0.04494882747530937,
"rewards/rejected": -0.06956557184457779,
"step": 945
},
{
"epoch": 1.3160813308687616,
"grad_norm": 1.3751192092895508,
"learning_rate": 2.638181818181818e-07,
"log_odds_chosen": 1.303594708442688,
"log_odds_ratio": -0.36508041620254517,
"logits/chosen": 2.1502010822296143,
"logits/rejected": 2.213773012161255,
"logps/chosen": -0.24462252855300903,
"logps/rejected": -0.7531419396400452,
"loss": 1.0002,
"nll_loss": 0.9637396931648254,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.024462254717946053,
"rewards/margins": 0.05085194110870361,
"rewards/rejected": -0.07531419396400452,
"step": 950
},
{
"epoch": 1.3230129390018484,
"grad_norm": 2.228548526763916,
"learning_rate": 2.6290909090909087e-07,
"log_odds_chosen": 1.1054414510726929,
"log_odds_ratio": -0.4313698410987854,
"logits/chosen": 2.054419755935669,
"logits/rejected": 2.1415534019470215,
"logps/chosen": -0.25074058771133423,
"logps/rejected": -0.6487269997596741,
"loss": 0.9808,
"nll_loss": 0.9376189708709717,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.025074057281017303,
"rewards/margins": 0.039798639714717865,
"rewards/rejected": -0.06487269699573517,
"step": 955
},
{
"epoch": 1.3299445471349354,
"grad_norm": 2.61733078956604,
"learning_rate": 2.62e-07,
"log_odds_chosen": 1.2382240295410156,
"log_odds_ratio": -0.42497238516807556,
"logits/chosen": 2.222075939178467,
"logits/rejected": 2.268425464630127,
"logps/chosen": -0.3022395372390747,
"logps/rejected": -0.819779634475708,
"loss": 1.1007,
"nll_loss": 1.0581555366516113,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.03022395819425583,
"rewards/margins": 0.05175400897860527,
"rewards/rejected": -0.0819779708981514,
"step": 960
},
{
"epoch": 1.3368761552680222,
"grad_norm": 1.3807116746902466,
"learning_rate": 2.6109090909090906e-07,
"log_odds_chosen": 1.2055879831314087,
"log_odds_ratio": -0.3867366909980774,
"logits/chosen": 2.1033875942230225,
"logits/rejected": 2.1540472507476807,
"logps/chosen": -0.277556449174881,
"logps/rejected": -0.7209326028823853,
"loss": 1.0018,
"nll_loss": 0.9630894064903259,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.027755646035075188,
"rewards/margins": 0.044337622821331024,
"rewards/rejected": -0.07209326326847076,
"step": 965
},
{
"epoch": 1.343807763401109,
"grad_norm": 2.0066561698913574,
"learning_rate": 2.601818181818182e-07,
"log_odds_chosen": 1.3411426544189453,
"log_odds_ratio": -0.4055359959602356,
"logits/chosen": 2.0890884399414062,
"logits/rejected": 2.154545783996582,
"logps/chosen": -0.3011336922645569,
"logps/rejected": -0.8289684057235718,
"loss": 1.0136,
"nll_loss": 0.9730068445205688,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.03011336922645569,
"rewards/margins": 0.05278347432613373,
"rewards/rejected": -0.08289684355258942,
"step": 970
},
{
"epoch": 1.350739371534196,
"grad_norm": 1.9893730878829956,
"learning_rate": 2.5927272727272726e-07,
"log_odds_chosen": 1.1508934497833252,
"log_odds_ratio": -0.4049813449382782,
"logits/chosen": 2.1374075412750244,
"logits/rejected": 2.1761152744293213,
"logps/chosen": -0.3276708424091339,
"logps/rejected": -0.7975314259529114,
"loss": 1.0488,
"nll_loss": 1.0082674026489258,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.03276708349585533,
"rewards/margins": 0.04698607698082924,
"rewards/rejected": -0.07975315302610397,
"step": 975
},
{
"epoch": 1.3576709796672828,
"grad_norm": 2.1329009532928467,
"learning_rate": 2.583636363636363e-07,
"log_odds_chosen": 1.4169307947158813,
"log_odds_ratio": -0.4349002242088318,
"logits/chosen": 2.0836265087127686,
"logits/rejected": 2.1221506595611572,
"logps/chosen": -0.28608009219169617,
"logps/rejected": -0.8973101377487183,
"loss": 1.043,
"nll_loss": 0.9995481967926025,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.028608011081814766,
"rewards/margins": 0.06112300232052803,
"rewards/rejected": -0.08973101526498795,
"step": 980
},
{
"epoch": 1.3646025878003698,
"grad_norm": 1.5113952159881592,
"learning_rate": 2.5745454545454545e-07,
"log_odds_chosen": 1.0963186025619507,
"log_odds_ratio": -0.45471566915512085,
"logits/chosen": 2.0918946266174316,
"logits/rejected": 2.147059917449951,
"logps/chosen": -0.2852664589881897,
"logps/rejected": -0.6635777354240417,
"loss": 1.0257,
"nll_loss": 0.9802023768424988,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.02852664329111576,
"rewards/margins": 0.037831127643585205,
"rewards/rejected": -0.06635776907205582,
"step": 985
},
{
"epoch": 1.3715341959334566,
"grad_norm": 1.2862117290496826,
"learning_rate": 2.565454545454545e-07,
"log_odds_chosen": 1.2140270471572876,
"log_odds_ratio": -0.37888041138648987,
"logits/chosen": 2.191817283630371,
"logits/rejected": 2.248182773590088,
"logps/chosen": -0.3066679537296295,
"logps/rejected": -0.775726854801178,
"loss": 1.0404,
"nll_loss": 1.0024964809417725,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.030666792765259743,
"rewards/margins": 0.046905890107154846,
"rewards/rejected": -0.07757268846035004,
"step": 990
},
{
"epoch": 1.3784658040665434,
"grad_norm": 2.1814792156219482,
"learning_rate": 2.556363636363636e-07,
"log_odds_chosen": 1.1626993417739868,
"log_odds_ratio": -0.421124666929245,
"logits/chosen": 2.1899914741516113,
"logits/rejected": 2.2058660984039307,
"logps/chosen": -0.3344000279903412,
"logps/rejected": -0.812175989151001,
"loss": 1.0606,
"nll_loss": 1.0184708833694458,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.0334400050342083,
"rewards/margins": 0.04777759313583374,
"rewards/rejected": -0.08121760189533234,
"step": 995
},
{
"epoch": 1.3853974121996302,
"grad_norm": 1.2583403587341309,
"learning_rate": 2.547272727272727e-07,
"log_odds_chosen": 1.4967857599258423,
"log_odds_ratio": -0.32969042658805847,
"logits/chosen": 2.146193027496338,
"logits/rejected": 2.210885763168335,
"logps/chosen": -0.2802196443080902,
"logps/rejected": -0.9464040994644165,
"loss": 0.9995,
"nll_loss": 0.9665043354034424,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02802196517586708,
"rewards/margins": 0.0666184350848198,
"rewards/rejected": -0.09464039653539658,
"step": 1000
},
{
"epoch": 1.3923290203327172,
"grad_norm": 3.5290753841400146,
"learning_rate": 2.538181818181818e-07,
"log_odds_chosen": 1.478700041770935,
"log_odds_ratio": -0.36008498072624207,
"logits/chosen": 2.2012546062469482,
"logits/rejected": 2.257498264312744,
"logps/chosen": -0.2387988120317459,
"logps/rejected": -0.8150947690010071,
"loss": 1.0503,
"nll_loss": 1.0142549276351929,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.023879878222942352,
"rewards/margins": 0.057629600167274475,
"rewards/rejected": -0.08150947839021683,
"step": 1005
},
{
"epoch": 1.399260628465804,
"grad_norm": 2.2447733879089355,
"learning_rate": 2.529090909090909e-07,
"log_odds_chosen": 1.5088070631027222,
"log_odds_ratio": -0.37234050035476685,
"logits/chosen": 2.131213665008545,
"logits/rejected": 2.2011141777038574,
"logps/chosen": -0.30126506090164185,
"logps/rejected": -0.9823321104049683,
"loss": 1.0395,
"nll_loss": 1.0022485256195068,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.030126502737402916,
"rewards/margins": 0.06810670346021652,
"rewards/rejected": -0.09823321551084518,
"step": 1010
},
{
"epoch": 1.406192236598891,
"grad_norm": 1.6832704544067383,
"learning_rate": 2.52e-07,
"log_odds_chosen": 1.3786380290985107,
"log_odds_ratio": -0.3612533509731293,
"logits/chosen": 2.1688296794891357,
"logits/rejected": 2.2276947498321533,
"logps/chosen": -0.26425522565841675,
"logps/rejected": -0.7921401858329773,
"loss": 1.0578,
"nll_loss": 1.0216939449310303,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.026425523683428764,
"rewards/margins": 0.05278850346803665,
"rewards/rejected": -0.07921402156352997,
"step": 1015
},
{
"epoch": 1.4131238447319778,
"grad_norm": 1.8064701557159424,
"learning_rate": 2.5109090909090905e-07,
"log_odds_chosen": 1.475263237953186,
"log_odds_ratio": -0.3918589651584625,
"logits/chosen": 2.1581435203552246,
"logits/rejected": 2.197493314743042,
"logps/chosen": -0.3150491416454315,
"logps/rejected": -0.9940579533576965,
"loss": 1.0716,
"nll_loss": 1.032424807548523,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.03150491416454315,
"rewards/margins": 0.0679008811712265,
"rewards/rejected": -0.09940580278635025,
"step": 1020
},
{
"epoch": 1.4200554528650646,
"grad_norm": 3.116750478744507,
"learning_rate": 2.501818181818182e-07,
"log_odds_chosen": 1.4267123937606812,
"log_odds_ratio": -0.3581138253211975,
"logits/chosen": 2.150437593460083,
"logits/rejected": 2.196103572845459,
"logps/chosen": -0.3007969856262207,
"logps/rejected": -0.9089770913124084,
"loss": 1.0489,
"nll_loss": 1.0131365060806274,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.030079694464802742,
"rewards/margins": 0.060818012803792953,
"rewards/rejected": -0.09089770913124084,
"step": 1025
},
{
"epoch": 1.4269870609981516,
"grad_norm": 1.498563528060913,
"learning_rate": 2.4927272727272725e-07,
"log_odds_chosen": 1.207962155342102,
"log_odds_ratio": -0.44308769702911377,
"logits/chosen": 2.086268663406372,
"logits/rejected": 2.1397366523742676,
"logps/chosen": -0.2770783007144928,
"logps/rejected": -0.7369803786277771,
"loss": 0.9999,
"nll_loss": 0.9556010961532593,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.02770783193409443,
"rewards/margins": 0.04599021375179291,
"rewards/rejected": -0.07369804382324219,
"step": 1030
},
{
"epoch": 1.4339186691312384,
"grad_norm": 2.2361621856689453,
"learning_rate": 2.483636363636363e-07,
"log_odds_chosen": 1.3191566467285156,
"log_odds_ratio": -0.40445128083229065,
"logits/chosen": 2.138350486755371,
"logits/rejected": 2.201185703277588,
"logps/chosen": -0.29545170068740845,
"logps/rejected": -0.824712872505188,
"loss": 1.0544,
"nll_loss": 1.0139575004577637,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.029545169323682785,
"rewards/margins": 0.05292612686753273,
"rewards/rejected": -0.08247129619121552,
"step": 1035
},
{
"epoch": 1.4408502772643252,
"grad_norm": 1.6298840045928955,
"learning_rate": 2.4745454545454544e-07,
"log_odds_chosen": 1.2173666954040527,
"log_odds_ratio": -0.39109528064727783,
"logits/chosen": 2.0655899047851562,
"logits/rejected": 2.1055774688720703,
"logps/chosen": -0.28332966566085815,
"logps/rejected": -0.7577340006828308,
"loss": 1.0551,
"nll_loss": 1.015963077545166,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.028332972899079323,
"rewards/margins": 0.04744042083621025,
"rewards/rejected": -0.07577338814735413,
"step": 1040
},
{
"epoch": 1.4477818853974123,
"grad_norm": 1.9538235664367676,
"learning_rate": 2.465454545454545e-07,
"log_odds_chosen": 1.4704244136810303,
"log_odds_ratio": -0.36091259121894836,
"logits/chosen": 2.067735195159912,
"logits/rejected": 2.107760190963745,
"logps/chosen": -0.24208252131938934,
"logps/rejected": -0.8322712779045105,
"loss": 1.0402,
"nll_loss": 1.0041333436965942,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.024208255112171173,
"rewards/margins": 0.05901888757944107,
"rewards/rejected": -0.08322712779045105,
"step": 1045
},
{
"epoch": 1.454713493530499,
"grad_norm": 2.375593900680542,
"learning_rate": 2.4563636363636363e-07,
"log_odds_chosen": 1.342626690864563,
"log_odds_ratio": -0.3853161633014679,
"logits/chosen": 2.0571084022521973,
"logits/rejected": 2.118323802947998,
"logps/chosen": -0.2818390429019928,
"logps/rejected": -0.8502424955368042,
"loss": 1.0375,
"nll_loss": 0.9989607930183411,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.02818390727043152,
"rewards/margins": 0.05684033781290054,
"rewards/rejected": -0.08502423763275146,
"step": 1050
},
{
"epoch": 1.4616451016635859,
"grad_norm": 3.926017999649048,
"learning_rate": 2.447272727272727e-07,
"log_odds_chosen": 1.2430663108825684,
"log_odds_ratio": -0.4533371925354004,
"logits/chosen": 2.024120569229126,
"logits/rejected": 2.0624420642852783,
"logps/chosen": -0.26999524235725403,
"logps/rejected": -0.7484342455863953,
"loss": 1.029,
"nll_loss": 0.9836971759796143,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.026999525725841522,
"rewards/margins": 0.04784390702843666,
"rewards/rejected": -0.07484342157840729,
"step": 1055
},
{
"epoch": 1.4685767097966729,
"grad_norm": 2.118476152420044,
"learning_rate": 2.438181818181818e-07,
"log_odds_chosen": 0.9853528738021851,
"log_odds_ratio": -0.4883633553981781,
"logits/chosen": 2.0680394172668457,
"logits/rejected": 2.136178493499756,
"logps/chosen": -0.3353304862976074,
"logps/rejected": -0.7483987808227539,
"loss": 1.0975,
"nll_loss": 1.0486379861831665,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.033533044159412384,
"rewards/margins": 0.04130683094263077,
"rewards/rejected": -0.07483987510204315,
"step": 1060
},
{
"epoch": 1.4755083179297597,
"grad_norm": 2.533656120300293,
"learning_rate": 2.429090909090909e-07,
"log_odds_chosen": 0.9804785847663879,
"log_odds_ratio": -0.4883616864681244,
"logits/chosen": 2.097592353820801,
"logits/rejected": 2.1535866260528564,
"logps/chosen": -0.29354825615882874,
"logps/rejected": -0.7082226872444153,
"loss": 1.0215,
"nll_loss": 0.9726455807685852,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.02935483120381832,
"rewards/margins": 0.041467439383268356,
"rewards/rejected": -0.07082226872444153,
"step": 1065
},
{
"epoch": 1.4824399260628467,
"grad_norm": 1.7187951803207397,
"learning_rate": 2.4199999999999997e-07,
"log_odds_chosen": 1.2794216871261597,
"log_odds_ratio": -0.3667431175708771,
"logits/chosen": 2.010540246963501,
"logits/rejected": 2.082904815673828,
"logps/chosen": -0.25798189640045166,
"logps/rejected": -0.7230808138847351,
"loss": 1.0057,
"nll_loss": 0.9690180420875549,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.025798192247748375,
"rewards/margins": 0.046509888023138046,
"rewards/rejected": -0.07230808585882187,
"step": 1070
},
{
"epoch": 1.4893715341959335,
"grad_norm": 2.982020616531372,
"learning_rate": 2.410909090909091e-07,
"log_odds_chosen": 1.1678146123886108,
"log_odds_ratio": -0.41319549083709717,
"logits/chosen": 2.1234309673309326,
"logits/rejected": 2.17087459564209,
"logps/chosen": -0.32161521911621094,
"logps/rejected": -0.8085314035415649,
"loss": 1.0905,
"nll_loss": 1.0492180585861206,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.032161518931388855,
"rewards/margins": 0.04869161546230316,
"rewards/rejected": -0.08085312694311142,
"step": 1075
},
{
"epoch": 1.4963031423290203,
"grad_norm": 2.0627431869506836,
"learning_rate": 2.4018181818181816e-07,
"log_odds_chosen": 1.1932671070098877,
"log_odds_ratio": -0.45415419340133667,
"logits/chosen": 2.096099615097046,
"logits/rejected": 2.167684316635132,
"logps/chosen": -0.2884353697299957,
"logps/rejected": -0.8226889371871948,
"loss": 1.0148,
"nll_loss": 0.9693484902381897,
"rewards/accuracies": 0.7416666746139526,
"rewards/chosen": -0.028843533247709274,
"rewards/margins": 0.0534253753721714,
"rewards/rejected": -0.08226890861988068,
"step": 1080
},
{
"epoch": 1.503234750462107,
"grad_norm": 2.2353203296661377,
"learning_rate": 2.3927272727272724e-07,
"log_odds_chosen": 1.2742195129394531,
"log_odds_ratio": -0.41592225432395935,
"logits/chosen": 2.007319450378418,
"logits/rejected": 2.0866177082061768,
"logps/chosen": -0.28505003452301025,
"logps/rejected": -0.8023856282234192,
"loss": 1.0211,
"nll_loss": 0.9794998168945312,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.028505001217126846,
"rewards/margins": 0.05173356831073761,
"rewards/rejected": -0.08023856580257416,
"step": 1085
},
{
"epoch": 1.510166358595194,
"grad_norm": 2.5934882164001465,
"learning_rate": 2.3836363636363636e-07,
"log_odds_chosen": 1.4275834560394287,
"log_odds_ratio": -0.3598444163799286,
"logits/chosen": 2.1860811710357666,
"logits/rejected": 2.234570264816284,
"logps/chosen": -0.2805514633655548,
"logps/rejected": -0.867975652217865,
"loss": 1.0418,
"nll_loss": 1.0058512687683105,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.028055142611265182,
"rewards/margins": 0.058742426335811615,
"rewards/rejected": -0.0867975726723671,
"step": 1090
},
{
"epoch": 1.5170979667282811,
"grad_norm": 3.790891408920288,
"learning_rate": 2.3745454545454543e-07,
"log_odds_chosen": 1.4269194602966309,
"log_odds_ratio": -0.3880935609340668,
"logits/chosen": 2.081702470779419,
"logits/rejected": 2.1460609436035156,
"logps/chosen": -0.2924844026565552,
"logps/rejected": -0.8836368322372437,
"loss": 1.0421,
"nll_loss": 1.0032716989517212,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.029248446226119995,
"rewards/margins": 0.05911524221301079,
"rewards/rejected": -0.08836368471384048,
"step": 1095
},
{
"epoch": 1.524029574861368,
"grad_norm": 1.444333553314209,
"learning_rate": 2.3654545454545453e-07,
"log_odds_chosen": 1.5300582647323608,
"log_odds_ratio": -0.3157467842102051,
"logits/chosen": 2.065735340118408,
"logits/rejected": 2.124572992324829,
"logps/chosen": -0.25401103496551514,
"logps/rejected": -0.870669960975647,
"loss": 1.0242,
"nll_loss": 0.9926338791847229,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02540110982954502,
"rewards/margins": 0.061665892601013184,
"rewards/rejected": -0.08706699311733246,
"step": 1100
},
{
"epoch": 1.5309611829944547,
"grad_norm": 1.328914999961853,
"learning_rate": 2.3563636363636362e-07,
"log_odds_chosen": 1.5849844217300415,
"log_odds_ratio": -0.3574371933937073,
"logits/chosen": 2.0881130695343018,
"logits/rejected": 2.132666826248169,
"logps/chosen": -0.297654926776886,
"logps/rejected": -0.9698154330253601,
"loss": 1.0571,
"nll_loss": 1.021361231803894,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.029765494167804718,
"rewards/margins": 0.06721605360507965,
"rewards/rejected": -0.09698154032230377,
"step": 1105
},
{
"epoch": 1.5378927911275415,
"grad_norm": 3.064085006713867,
"learning_rate": 2.347272727272727e-07,
"log_odds_chosen": 1.482607364654541,
"log_odds_ratio": -0.41177818179130554,
"logits/chosen": 2.100356101989746,
"logits/rejected": 2.149595022201538,
"logps/chosen": -0.3262555003166199,
"logps/rejected": -0.9901362657546997,
"loss": 1.024,
"nll_loss": 0.9827964305877686,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.03262555971741676,
"rewards/margins": 0.0663880705833435,
"rewards/rejected": -0.09901363402605057,
"step": 1110
},
{
"epoch": 1.5448243992606283,
"grad_norm": 1.7144571542739868,
"learning_rate": 2.3381818181818182e-07,
"log_odds_chosen": 1.297485113143921,
"log_odds_ratio": -0.44758838415145874,
"logits/chosen": 2.169114112854004,
"logits/rejected": 2.225714683532715,
"logps/chosen": -0.334065705537796,
"logps/rejected": -0.8946993350982666,
"loss": 1.0743,
"nll_loss": 1.0295709371566772,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.033406566828489304,
"rewards/margins": 0.05606337636709213,
"rewards/rejected": -0.08946993947029114,
"step": 1115
},
{
"epoch": 1.5517560073937153,
"grad_norm": 1.577359676361084,
"learning_rate": 2.329090909090909e-07,
"log_odds_chosen": 1.224462628364563,
"log_odds_ratio": -0.40063703060150146,
"logits/chosen": 2.1936235427856445,
"logits/rejected": 2.2490689754486084,
"logps/chosen": -0.2935000956058502,
"logps/rejected": -0.746100664138794,
"loss": 1.0438,
"nll_loss": 1.0037094354629517,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.02935001067817211,
"rewards/margins": 0.045260071754455566,
"rewards/rejected": -0.07461007684469223,
"step": 1120
},
{
"epoch": 1.5586876155268024,
"grad_norm": 1.445590615272522,
"learning_rate": 2.3199999999999999e-07,
"log_odds_chosen": 1.428971529006958,
"log_odds_ratio": -0.3972775638103485,
"logits/chosen": 2.1382179260253906,
"logits/rejected": 2.1906816959381104,
"logps/chosen": -0.3196958899497986,
"logps/rejected": -0.9572470784187317,
"loss": 1.0678,
"nll_loss": 1.0280849933624268,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.0319695882499218,
"rewards/margins": 0.06375513225793839,
"rewards/rejected": -0.09572472423315048,
"step": 1125
},
{
"epoch": 1.5656192236598891,
"grad_norm": 2.6556386947631836,
"learning_rate": 2.3109090909090908e-07,
"log_odds_chosen": 1.5480204820632935,
"log_odds_ratio": -0.2979664206504822,
"logits/chosen": 1.9882813692092896,
"logits/rejected": 2.047785520553589,
"logps/chosen": -0.2392881214618683,
"logps/rejected": -0.806952178478241,
"loss": 1.0056,
"nll_loss": 0.9758478403091431,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.0239288117736578,
"rewards/margins": 0.05676640570163727,
"rewards/rejected": -0.08069522678852081,
"step": 1130
},
{
"epoch": 1.572550831792976,
"grad_norm": 1.543114185333252,
"learning_rate": 2.3018181818181815e-07,
"log_odds_chosen": 1.306591272354126,
"log_odds_ratio": -0.38037270307540894,
"logits/chosen": 2.1771938800811768,
"logits/rejected": 2.204706907272339,
"logps/chosen": -0.2897684872150421,
"logps/rejected": -0.8257070183753967,
"loss": 1.0772,
"nll_loss": 1.0391967296600342,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.02897684834897518,
"rewards/margins": 0.05359385162591934,
"rewards/rejected": -0.08257070928812027,
"step": 1135
},
{
"epoch": 1.5794824399260627,
"grad_norm": 2.1715428829193115,
"learning_rate": 2.2927272727272728e-07,
"log_odds_chosen": 1.4083577394485474,
"log_odds_ratio": -0.4124881327152252,
"logits/chosen": 2.092696189880371,
"logits/rejected": 2.1379013061523438,
"logps/chosen": -0.302141934633255,
"logps/rejected": -0.8928415775299072,
"loss": 1.0676,
"nll_loss": 1.026381492614746,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.03021419048309326,
"rewards/margins": 0.059069979935884476,
"rewards/rejected": -0.08928415924310684,
"step": 1140
},
{
"epoch": 1.5864140480591498,
"grad_norm": 1.848644733428955,
"learning_rate": 2.2836363636363635e-07,
"log_odds_chosen": 1.3089061975479126,
"log_odds_ratio": -0.40707269310951233,
"logits/chosen": 2.151923418045044,
"logits/rejected": 2.2069790363311768,
"logps/chosen": -0.29877957701683044,
"logps/rejected": -0.8117203116416931,
"loss": 1.0202,
"nll_loss": 0.9795213341712952,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.029877962544560432,
"rewards/margins": 0.05129408463835716,
"rewards/rejected": -0.08117203414440155,
"step": 1145
},
{
"epoch": 1.5933456561922366,
"grad_norm": 2.3527963161468506,
"learning_rate": 2.2745454545454542e-07,
"log_odds_chosen": 1.4974111318588257,
"log_odds_ratio": -0.40171802043914795,
"logits/chosen": 2.093785524368286,
"logits/rejected": 2.144911050796509,
"logps/chosen": -0.2905969023704529,
"logps/rejected": -0.9690452218055725,
"loss": 1.043,
"nll_loss": 1.0028067827224731,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.02905968949198723,
"rewards/margins": 0.06784483045339584,
"rewards/rejected": -0.09690450876951218,
"step": 1150
},
{
"epoch": 1.6002772643253236,
"grad_norm": 3.2236886024475098,
"learning_rate": 2.2654545454545454e-07,
"log_odds_chosen": 1.4238559007644653,
"log_odds_ratio": -0.39723172783851624,
"logits/chosen": 2.1333365440368652,
"logits/rejected": 2.172248125076294,
"logps/chosen": -0.2883763015270233,
"logps/rejected": -0.8981528878211975,
"loss": 1.0034,
"nll_loss": 0.9636661410331726,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.028837626799941063,
"rewards/margins": 0.06097765639424324,
"rewards/rejected": -0.08981527388095856,
"step": 1155
},
{
"epoch": 1.6072088724584104,
"grad_norm": 1.6170852184295654,
"learning_rate": 2.2563636363636361e-07,
"log_odds_chosen": 1.2530089616775513,
"log_odds_ratio": -0.3909212052822113,
"logits/chosen": 2.122631311416626,
"logits/rejected": 2.1740965843200684,
"logps/chosen": -0.3135833740234375,
"logps/rejected": -0.8097442984580994,
"loss": 1.0268,
"nll_loss": 0.9877387881278992,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.03135833516716957,
"rewards/margins": 0.04961610212922096,
"rewards/rejected": -0.08097445219755173,
"step": 1160
},
{
"epoch": 1.6141404805914972,
"grad_norm": 3.0495035648345947,
"learning_rate": 2.247272727272727e-07,
"log_odds_chosen": 1.2540229558944702,
"log_odds_ratio": -0.4228639304637909,
"logits/chosen": 2.1452457904815674,
"logits/rejected": 2.199737548828125,
"logps/chosen": -0.32104960083961487,
"logps/rejected": -0.9188255071640015,
"loss": 1.0655,
"nll_loss": 1.0232080221176147,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.032104961574077606,
"rewards/margins": 0.059777598828077316,
"rewards/rejected": -0.09188255667686462,
"step": 1165
},
{
"epoch": 1.621072088724584,
"grad_norm": 1.4193778038024902,
"learning_rate": 2.238181818181818e-07,
"log_odds_chosen": 1.4086551666259766,
"log_odds_ratio": -0.4291422665119171,
"logits/chosen": 2.132582187652588,
"logits/rejected": 2.1951537132263184,
"logps/chosen": -0.27538225054740906,
"logps/rejected": -0.8874372839927673,
"loss": 1.0582,
"nll_loss": 1.0153110027313232,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.027538226917386055,
"rewards/margins": 0.061205506324768066,
"rewards/rejected": -0.08874373137950897,
"step": 1170
},
{
"epoch": 1.628003696857671,
"grad_norm": 4.035185813903809,
"learning_rate": 2.2290909090909088e-07,
"log_odds_chosen": 1.1511118412017822,
"log_odds_ratio": -0.42211630940437317,
"logits/chosen": 2.043503761291504,
"logits/rejected": 2.0831780433654785,
"logps/chosen": -0.260224312543869,
"logps/rejected": -0.69998699426651,
"loss": 1.0533,
"nll_loss": 1.0110424757003784,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.02602243237197399,
"rewards/margins": 0.04397625848650932,
"rewards/rejected": -0.06999869644641876,
"step": 1175
},
{
"epoch": 1.634935304990758,
"grad_norm": 2.188004732131958,
"learning_rate": 2.22e-07,
"log_odds_chosen": 1.3063474893569946,
"log_odds_ratio": -0.4064422845840454,
"logits/chosen": 2.0244498252868652,
"logits/rejected": 2.106379747390747,
"logps/chosen": -0.28905677795410156,
"logps/rejected": -0.8373602628707886,
"loss": 1.019,
"nll_loss": 0.9783718585968018,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.028905682265758514,
"rewards/margins": 0.054830338805913925,
"rewards/rejected": -0.08373603224754333,
"step": 1180
},
{
"epoch": 1.6418669131238448,
"grad_norm": 2.3367137908935547,
"learning_rate": 2.2109090909090907e-07,
"log_odds_chosen": 1.1956565380096436,
"log_odds_ratio": -0.4174603819847107,
"logits/chosen": 2.0511369705200195,
"logits/rejected": 2.097266435623169,
"logps/chosen": -0.29188284277915955,
"logps/rejected": -0.7658560872077942,
"loss": 1.0463,
"nll_loss": 1.0045546293258667,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.029188284650444984,
"rewards/margins": 0.047397319227457047,
"rewards/rejected": -0.07658561319112778,
"step": 1185
},
{
"epoch": 1.6487985212569316,
"grad_norm": 2.0365090370178223,
"learning_rate": 2.2018181818181817e-07,
"log_odds_chosen": 1.470836877822876,
"log_odds_ratio": -0.32899710536003113,
"logits/chosen": 2.0854430198669434,
"logits/rejected": 2.146840810775757,
"logps/chosen": -0.25189822912216187,
"logps/rejected": -0.805548906326294,
"loss": 1.0219,
"nll_loss": 0.9889623522758484,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.025189822539687157,
"rewards/margins": 0.055365074425935745,
"rewards/rejected": -0.08055491000413895,
"step": 1190
},
{
"epoch": 1.6557301293900184,
"grad_norm": 1.4684514999389648,
"learning_rate": 2.1927272727272727e-07,
"log_odds_chosen": 1.5294240713119507,
"log_odds_ratio": -0.3121597468852997,
"logits/chosen": 2.0382626056671143,
"logits/rejected": 2.096428871154785,
"logps/chosen": -0.28892782330513,
"logps/rejected": -0.9338265061378479,
"loss": 1.0155,
"nll_loss": 0.9842939972877502,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02889277972280979,
"rewards/margins": 0.06448986381292343,
"rewards/rejected": -0.09338264167308807,
"step": 1195
},
{
"epoch": 1.6626617375231052,
"grad_norm": 1.848939299583435,
"learning_rate": 2.1836363636363634e-07,
"log_odds_chosen": 1.2887681722640991,
"log_odds_ratio": -0.39700159430503845,
"logits/chosen": 2.0447583198547363,
"logits/rejected": 2.086496353149414,
"logps/chosen": -0.28646960854530334,
"logps/rejected": -0.8112481832504272,
"loss": 1.0786,
"nll_loss": 1.0388500690460205,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.028646962717175484,
"rewards/margins": 0.05247785896062851,
"rewards/rejected": -0.08112481236457825,
"step": 1200
},
{
"epoch": 1.6695933456561922,
"grad_norm": 2.438013792037964,
"learning_rate": 2.1745454545454544e-07,
"log_odds_chosen": 1.2431906461715698,
"log_odds_ratio": -0.41865456104278564,
"logits/chosen": 1.9995055198669434,
"logits/rejected": 2.066706418991089,
"logps/chosen": -0.28791549801826477,
"logps/rejected": -0.8092067241668701,
"loss": 1.0478,
"nll_loss": 1.0059376955032349,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.02879154682159424,
"rewards/margins": 0.052129123359918594,
"rewards/rejected": -0.08092068135738373,
"step": 1205
},
{
"epoch": 1.6765249537892792,
"grad_norm": 1.8914486169815063,
"learning_rate": 2.1654545454545453e-07,
"log_odds_chosen": 1.669597864151001,
"log_odds_ratio": -0.2944275438785553,
"logits/chosen": 2.067124366760254,
"logits/rejected": 2.1200904846191406,
"logps/chosen": -0.2542329728603363,
"logps/rejected": -0.9926477670669556,
"loss": 1.024,
"nll_loss": 0.9945566058158875,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02542329952120781,
"rewards/margins": 0.07384147495031357,
"rewards/rejected": -0.09926477074623108,
"step": 1210
},
{
"epoch": 1.683456561922366,
"grad_norm": 1.8205511569976807,
"learning_rate": 2.156363636363636e-07,
"log_odds_chosen": 1.615554928779602,
"log_odds_ratio": -0.3494965434074402,
"logits/chosen": 2.0657761096954346,
"logits/rejected": 2.138937473297119,
"logps/chosen": -0.3055498003959656,
"logps/rejected": -0.9770273566246033,
"loss": 1.0574,
"nll_loss": 1.0224652290344238,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.03055497631430626,
"rewards/margins": 0.06714775413274765,
"rewards/rejected": -0.0977027490735054,
"step": 1215
},
{
"epoch": 1.6903881700554528,
"grad_norm": 3.3164730072021484,
"learning_rate": 2.1472727272727273e-07,
"log_odds_chosen": 1.2579916715621948,
"log_odds_ratio": -0.41120514273643494,
"logits/chosen": 2.0500335693359375,
"logits/rejected": 2.107823133468628,
"logps/chosen": -0.2929707467556,
"logps/rejected": -0.8051959276199341,
"loss": 1.0315,
"nll_loss": 0.9903787970542908,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.029297074303030968,
"rewards/margins": 0.051222506910562515,
"rewards/rejected": -0.08051959425210953,
"step": 1220
},
{
"epoch": 1.6973197781885396,
"grad_norm": 3.1997969150543213,
"learning_rate": 2.138181818181818e-07,
"log_odds_chosen": 1.3084485530853271,
"log_odds_ratio": -0.41026392579078674,
"logits/chosen": 1.9871976375579834,
"logits/rejected": 2.0475666522979736,
"logps/chosen": -0.31262117624282837,
"logps/rejected": -0.8240591287612915,
"loss": 1.014,
"nll_loss": 0.972952127456665,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.0312621183693409,
"rewards/margins": 0.05114380270242691,
"rewards/rejected": -0.08240590989589691,
"step": 1225
},
{
"epoch": 1.7042513863216266,
"grad_norm": 2.6871352195739746,
"learning_rate": 2.129090909090909e-07,
"log_odds_chosen": 1.0879921913146973,
"log_odds_ratio": -0.4872511923313141,
"logits/chosen": 2.080152750015259,
"logits/rejected": 2.119732141494751,
"logps/chosen": -0.369219571352005,
"logps/rejected": -0.8730208277702332,
"loss": 1.0926,
"nll_loss": 1.0438321828842163,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03692195937037468,
"rewards/margins": 0.05038012936711311,
"rewards/rejected": -0.0873020812869072,
"step": 1230
},
{
"epoch": 1.7111829944547134,
"grad_norm": 2.325326442718506,
"learning_rate": 2.12e-07,
"log_odds_chosen": 1.397155523300171,
"log_odds_ratio": -0.39500170946121216,
"logits/chosen": 2.125500202178955,
"logits/rejected": 2.1754844188690186,
"logps/chosen": -0.2974463105201721,
"logps/rejected": -0.9127005934715271,
"loss": 1.029,
"nll_loss": 0.9895287156105042,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.02974463254213333,
"rewards/margins": 0.06152542680501938,
"rewards/rejected": -0.0912700667977333,
"step": 1235
},
{
"epoch": 1.7181146025878005,
"grad_norm": 1.5156536102294922,
"learning_rate": 2.1109090909090906e-07,
"log_odds_chosen": 1.2906519174575806,
"log_odds_ratio": -0.40361616015434265,
"logits/chosen": 2.051255702972412,
"logits/rejected": 2.103986978530884,
"logps/chosen": -0.268101304769516,
"logps/rejected": -0.7997626066207886,
"loss": 1.0366,
"nll_loss": 0.9962154030799866,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.02681013010442257,
"rewards/margins": 0.05316613242030144,
"rewards/rejected": -0.07997626811265945,
"step": 1240
},
{
"epoch": 1.7250462107208873,
"grad_norm": 1.6096516847610474,
"learning_rate": 2.101818181818182e-07,
"log_odds_chosen": 1.2347605228424072,
"log_odds_ratio": -0.4557424783706665,
"logits/chosen": 2.0431010723114014,
"logits/rejected": 2.1129000186920166,
"logps/chosen": -0.30273541808128357,
"logps/rejected": -0.7864081859588623,
"loss": 1.0529,
"nll_loss": 1.0073034763336182,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03027353622019291,
"rewards/margins": 0.048367276787757874,
"rewards/rejected": -0.07864081859588623,
"step": 1245
},
{
"epoch": 1.731977818853974,
"grad_norm": 1.9126524925231934,
"learning_rate": 2.0927272727272726e-07,
"log_odds_chosen": 1.4341093301773071,
"log_odds_ratio": -0.3776791989803314,
"logits/chosen": 2.029585838317871,
"logits/rejected": 2.096311569213867,
"logps/chosen": -0.2593327462673187,
"logps/rejected": -0.8751354217529297,
"loss": 1.0188,
"nll_loss": 0.9809887409210205,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.025933273136615753,
"rewards/margins": 0.06158026307821274,
"rewards/rejected": -0.08751355111598969,
"step": 1250
},
{
"epoch": 1.7389094269870609,
"grad_norm": 2.2804696559906006,
"learning_rate": 2.0836363636363633e-07,
"log_odds_chosen": 1.4436743259429932,
"log_odds_ratio": -0.3733202815055847,
"logits/chosen": 2.1618611812591553,
"logits/rejected": 2.197058916091919,
"logps/chosen": -0.2881154417991638,
"logps/rejected": -0.9169884324073792,
"loss": 1.0506,
"nll_loss": 1.0132598876953125,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02881154604256153,
"rewards/margins": 0.06288730353116989,
"rewards/rejected": -0.09169885516166687,
"step": 1255
},
{
"epoch": 1.7458410351201479,
"grad_norm": 2.1073522567749023,
"learning_rate": 2.0745454545454545e-07,
"log_odds_chosen": 1.6635074615478516,
"log_odds_ratio": -0.33457812666893005,
"logits/chosen": 2.074899911880493,
"logits/rejected": 2.122929573059082,
"logps/chosen": -0.27845534682273865,
"logps/rejected": -1.005134105682373,
"loss": 1.0756,
"nll_loss": 1.0421861410140991,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.027845535427331924,
"rewards/margins": 0.07266788184642792,
"rewards/rejected": -0.10051342844963074,
"step": 1260
},
{
"epoch": 1.752772643253235,
"grad_norm": 2.2422614097595215,
"learning_rate": 2.0654545454545452e-07,
"log_odds_chosen": 1.3691959381103516,
"log_odds_ratio": -0.40435412526130676,
"logits/chosen": 2.1057488918304443,
"logits/rejected": 2.1638669967651367,
"logps/chosen": -0.31880542635917664,
"logps/rejected": -0.903930127620697,
"loss": 1.0408,
"nll_loss": 1.000407099723816,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.031880538910627365,
"rewards/margins": 0.05851246044039726,
"rewards/rejected": -0.09039301425218582,
"step": 1265
},
{
"epoch": 1.7597042513863217,
"grad_norm": 2.287367820739746,
"learning_rate": 2.0563636363636362e-07,
"log_odds_chosen": 1.282787799835205,
"log_odds_ratio": -0.4151849150657654,
"logits/chosen": 2.05078387260437,
"logits/rejected": 2.125896692276001,
"logps/chosen": -0.28913381695747375,
"logps/rejected": -0.792908251285553,
"loss": 1.0073,
"nll_loss": 0.9657413363456726,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.028913382440805435,
"rewards/margins": 0.05037744715809822,
"rewards/rejected": -0.07929082959890366,
"step": 1270
},
{
"epoch": 1.7666358595194085,
"grad_norm": 1.7958959341049194,
"learning_rate": 2.0472727272727272e-07,
"log_odds_chosen": 1.724271297454834,
"log_odds_ratio": -0.2810591757297516,
"logits/chosen": 2.0967202186584473,
"logits/rejected": 2.161252021789551,
"logps/chosen": -0.2763175964355469,
"logps/rejected": -1.0735727548599243,
"loss": 1.0564,
"nll_loss": 1.0283379554748535,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.027631759643554688,
"rewards/margins": 0.07972551882266998,
"rewards/rejected": -0.10735727101564407,
"step": 1275
},
{
"epoch": 1.7735674676524953,
"grad_norm": 2.2755537033081055,
"learning_rate": 2.038181818181818e-07,
"log_odds_chosen": 1.4004557132720947,
"log_odds_ratio": -0.3993929326534271,
"logits/chosen": 2.111241102218628,
"logits/rejected": 2.151012897491455,
"logps/chosen": -0.29145321249961853,
"logps/rejected": -0.8968268036842346,
"loss": 0.983,
"nll_loss": 0.9430230259895325,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.029145320877432823,
"rewards/margins": 0.060537371784448624,
"rewards/rejected": -0.0896826833486557,
"step": 1280
},
{
"epoch": 1.7804990757855823,
"grad_norm": 1.726332187652588,
"learning_rate": 2.029090909090909e-07,
"log_odds_chosen": 1.7174845933914185,
"log_odds_ratio": -0.3723779022693634,
"logits/chosen": 2.092041254043579,
"logits/rejected": 2.163289785385132,
"logps/chosen": -0.2837842106819153,
"logps/rejected": -1.036734700202942,
"loss": 0.9897,
"nll_loss": 0.9525095224380493,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.028378423303365707,
"rewards/margins": 0.07529504597187042,
"rewards/rejected": -0.10367346554994583,
"step": 1285
},
{
"epoch": 1.787430683918669,
"grad_norm": 1.5710785388946533,
"learning_rate": 2.0199999999999998e-07,
"log_odds_chosen": 1.312154769897461,
"log_odds_ratio": -0.3994414210319519,
"logits/chosen": 2.0113887786865234,
"logits/rejected": 2.0753164291381836,
"logps/chosen": -0.272027850151062,
"logps/rejected": -0.7796825766563416,
"loss": 1.0201,
"nll_loss": 0.9801668524742126,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.02720278687775135,
"rewards/margins": 0.05076547712087631,
"rewards/rejected": -0.07796826213598251,
"step": 1290
},
{
"epoch": 1.7943622920517561,
"grad_norm": 1.920129418373108,
"learning_rate": 2.0109090909090908e-07,
"log_odds_chosen": 1.6343634128570557,
"log_odds_ratio": -0.3433375358581543,
"logits/chosen": 2.1635687351226807,
"logits/rejected": 2.2147443294525146,
"logps/chosen": -0.2949898838996887,
"logps/rejected": -1.0586962699890137,
"loss": 1.0487,
"nll_loss": 1.0143301486968994,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.029498988762497902,
"rewards/margins": 0.07637064903974533,
"rewards/rejected": -0.10586963593959808,
"step": 1295
},
{
"epoch": 1.801293900184843,
"grad_norm": 1.9142963886260986,
"learning_rate": 2.0018181818181818e-07,
"log_odds_chosen": 1.8189107179641724,
"log_odds_ratio": -0.30017244815826416,
"logits/chosen": 2.058302879333496,
"logits/rejected": 2.130945920944214,
"logps/chosen": -0.2555373013019562,
"logps/rejected": -1.0402501821517944,
"loss": 1.0479,
"nll_loss": 1.0179013013839722,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.025553731247782707,
"rewards/margins": 0.07847128063440323,
"rewards/rejected": -0.10402501374483109,
"step": 1300
},
{
"epoch": 1.8082255083179297,
"grad_norm": 2.576738119125366,
"learning_rate": 1.9927272727272725e-07,
"log_odds_chosen": 1.4616259336471558,
"log_odds_ratio": -0.35250329971313477,
"logits/chosen": 2.0860280990600586,
"logits/rejected": 2.160719394683838,
"logps/chosen": -0.26303815841674805,
"logps/rejected": -0.8395700454711914,
"loss": 1.039,
"nll_loss": 1.0037142038345337,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.026303818449378014,
"rewards/margins": 0.05765319615602493,
"rewards/rejected": -0.0839570015668869,
"step": 1305
},
{
"epoch": 1.8151571164510165,
"grad_norm": 2.0888595581054688,
"learning_rate": 1.9836363636363634e-07,
"log_odds_chosen": 1.4016786813735962,
"log_odds_ratio": -0.36707499623298645,
"logits/chosen": 2.0193212032318115,
"logits/rejected": 2.0818252563476562,
"logps/chosen": -0.3212401270866394,
"logps/rejected": -0.8710358142852783,
"loss": 1.0211,
"nll_loss": 0.9843639731407166,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.03212401270866394,
"rewards/margins": 0.05497957393527031,
"rewards/rejected": -0.08710358291864395,
"step": 1310
},
{
"epoch": 1.8220887245841035,
"grad_norm": 2.2947607040405273,
"learning_rate": 1.9745454545454544e-07,
"log_odds_chosen": 1.5820189714431763,
"log_odds_ratio": -0.3358237147331238,
"logits/chosen": 2.1270110607147217,
"logits/rejected": 2.1922497749328613,
"logps/chosen": -0.2790026068687439,
"logps/rejected": -0.9709238409996033,
"loss": 1.0543,
"nll_loss": 1.0207080841064453,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.02790026180446148,
"rewards/margins": 0.06919214129447937,
"rewards/rejected": -0.0970923900604248,
"step": 1315
},
{
"epoch": 1.8290203327171903,
"grad_norm": 2.2239065170288086,
"learning_rate": 1.9654545454545454e-07,
"log_odds_chosen": 1.8014733791351318,
"log_odds_ratio": -0.3064168691635132,
"logits/chosen": 2.168591022491455,
"logits/rejected": 2.2188949584960938,
"logps/chosen": -0.289535254240036,
"logps/rejected": -1.121842622756958,
"loss": 1.0043,
"nll_loss": 0.9736562967300415,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.02895352989435196,
"rewards/margins": 0.08323074132204056,
"rewards/rejected": -0.11218428611755371,
"step": 1320
},
{
"epoch": 1.8359519408502774,
"grad_norm": 1.6879349946975708,
"learning_rate": 1.956363636363636e-07,
"log_odds_chosen": 1.4831633567810059,
"log_odds_ratio": -0.36744216084480286,
"logits/chosen": 2.0397822856903076,
"logits/rejected": 2.090501070022583,
"logps/chosen": -0.27023550868034363,
"logps/rejected": -0.8913961052894592,
"loss": 0.9775,
"nll_loss": 0.9407526850700378,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.027023550122976303,
"rewards/margins": 0.06211606040596962,
"rewards/rejected": -0.08913961052894592,
"step": 1325
},
{
"epoch": 1.8428835489833642,
"grad_norm": 3.5290584564208984,
"learning_rate": 1.947272727272727e-07,
"log_odds_chosen": 1.599334716796875,
"log_odds_ratio": -0.32742586731910706,
"logits/chosen": 2.0843544006347656,
"logits/rejected": 2.1510770320892334,
"logps/chosen": -0.26283249258995056,
"logps/rejected": -0.9456924796104431,
"loss": 0.9577,
"nll_loss": 0.9249733686447144,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.026283251121640205,
"rewards/margins": 0.0682859942317009,
"rewards/rejected": -0.09456924349069595,
"step": 1330
},
{
"epoch": 1.849815157116451,
"grad_norm": 3.283820629119873,
"learning_rate": 1.938181818181818e-07,
"log_odds_chosen": 1.60695219039917,
"log_odds_ratio": -0.3642633557319641,
"logits/chosen": 2.1823904514312744,
"logits/rejected": 2.2375266551971436,
"logps/chosen": -0.3108011782169342,
"logps/rejected": -1.0633673667907715,
"loss": 1.032,
"nll_loss": 0.995610773563385,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03108011931180954,
"rewards/margins": 0.07525661587715149,
"rewards/rejected": -0.10633675009012222,
"step": 1335
},
{
"epoch": 1.8567467652495377,
"grad_norm": 2.3443901538848877,
"learning_rate": 1.929090909090909e-07,
"log_odds_chosen": 1.549951195716858,
"log_odds_ratio": -0.3641236424446106,
"logits/chosen": 2.1002511978149414,
"logits/rejected": 2.1668472290039062,
"logps/chosen": -0.3097735047340393,
"logps/rejected": -1.032346487045288,
"loss": 1.0146,
"nll_loss": 0.9781424403190613,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.03097734972834587,
"rewards/margins": 0.07225729525089264,
"rewards/rejected": -0.1032346561551094,
"step": 1340
},
{
"epoch": 1.8636783733826248,
"grad_norm": 1.9872773885726929,
"learning_rate": 1.9199999999999997e-07,
"log_odds_chosen": 1.321940541267395,
"log_odds_ratio": -0.44285106658935547,
"logits/chosen": 2.0162153244018555,
"logits/rejected": 2.074026584625244,
"logps/chosen": -0.3019007742404938,
"logps/rejected": -0.8703359961509705,
"loss": 1.051,
"nll_loss": 1.0067391395568848,
"rewards/accuracies": 0.7666666507720947,
"rewards/chosen": -0.030190076678991318,
"rewards/margins": 0.05684352666139603,
"rewards/rejected": -0.08703361451625824,
"step": 1345
},
{
"epoch": 1.8706099815157118,
"grad_norm": 1.5316020250320435,
"learning_rate": 1.9109090909090907e-07,
"log_odds_chosen": 1.522079348564148,
"log_odds_ratio": -0.3456721305847168,
"logits/chosen": 2.055612802505493,
"logits/rejected": 2.1493773460388184,
"logps/chosen": -0.2610771358013153,
"logps/rejected": -0.8397551774978638,
"loss": 1.0111,
"nll_loss": 0.9765692353248596,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.02610771171748638,
"rewards/margins": 0.05786780267953873,
"rewards/rejected": -0.08397550880908966,
"step": 1350
},
{
"epoch": 1.8775415896487986,
"grad_norm": 1.9374873638153076,
"learning_rate": 1.9018181818181817e-07,
"log_odds_chosen": 1.3929773569107056,
"log_odds_ratio": -0.3517773449420929,
"logits/chosen": 2.034905433654785,
"logits/rejected": 2.079784631729126,
"logps/chosen": -0.2506738603115082,
"logps/rejected": -0.7879815697669983,
"loss": 1.0656,
"nll_loss": 1.0304430723190308,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.025067387148737907,
"rewards/margins": 0.05373078212141991,
"rewards/rejected": -0.07879816740751266,
"step": 1355
},
{
"epoch": 1.8844731977818854,
"grad_norm": 1.2929794788360596,
"learning_rate": 1.8927272727272726e-07,
"log_odds_chosen": 1.4262324571609497,
"log_odds_ratio": -0.3507004380226135,
"logits/chosen": 2.130051374435425,
"logits/rejected": 2.2022147178649902,
"logps/chosen": -0.30148500204086304,
"logps/rejected": -0.8996387124061584,
"loss": 1.0229,
"nll_loss": 0.9878306984901428,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.030148500576615334,
"rewards/margins": 0.05981536582112312,
"rewards/rejected": -0.0899638757109642,
"step": 1360
},
{
"epoch": 1.8914048059149722,
"grad_norm": 2.3704674243927,
"learning_rate": 1.8836363636363633e-07,
"log_odds_chosen": 1.4139504432678223,
"log_odds_ratio": -0.3692961037158966,
"logits/chosen": 2.1010749340057373,
"logits/rejected": 2.1468594074249268,
"logps/chosen": -0.3202250301837921,
"logps/rejected": -0.9537723660469055,
"loss": 1.0341,
"nll_loss": 0.9971208572387695,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.03202249854803085,
"rewards/margins": 0.0633547380566597,
"rewards/rejected": -0.09537723660469055,
"step": 1365
},
{
"epoch": 1.8983364140480592,
"grad_norm": 1.7380679845809937,
"learning_rate": 1.8745454545454543e-07,
"log_odds_chosen": 1.5445674657821655,
"log_odds_ratio": -0.3786125183105469,
"logits/chosen": 2.0269358158111572,
"logits/rejected": 2.0798943042755127,
"logps/chosen": -0.28776755928993225,
"logps/rejected": -0.9360780715942383,
"loss": 1.041,
"nll_loss": 1.0031627416610718,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.028776757419109344,
"rewards/margins": 0.06483104825019836,
"rewards/rejected": -0.0936078131198883,
"step": 1370
},
{
"epoch": 1.905268022181146,
"grad_norm": 2.3464677333831787,
"learning_rate": 1.8654545454545453e-07,
"log_odds_chosen": 1.3654427528381348,
"log_odds_ratio": -0.4110698699951172,
"logits/chosen": 2.1894376277923584,
"logits/rejected": 2.233098268508911,
"logps/chosen": -0.30282995104789734,
"logps/rejected": -0.8446999788284302,
"loss": 0.9941,
"nll_loss": 0.9530263543128967,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.030282998457551003,
"rewards/margins": 0.05418700724840164,
"rewards/rejected": -0.08447001129388809,
"step": 1375
},
{
"epoch": 1.912199630314233,
"grad_norm": 2.243062973022461,
"learning_rate": 1.8563636363636363e-07,
"log_odds_chosen": 1.3511884212493896,
"log_odds_ratio": -0.4768778681755066,
"logits/chosen": 2.0655782222747803,
"logits/rejected": 2.1229450702667236,
"logps/chosen": -0.37024933099746704,
"logps/rejected": -0.9667562246322632,
"loss": 1.0402,
"nll_loss": 0.9925115704536438,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.03702492639422417,
"rewards/margins": 0.05965068191289902,
"rewards/rejected": -0.09667561948299408,
"step": 1380
},
{
"epoch": 1.9191312384473198,
"grad_norm": 2.5852763652801514,
"learning_rate": 1.847272727272727e-07,
"log_odds_chosen": 1.548577070236206,
"log_odds_ratio": -0.3329441249370575,
"logits/chosen": 2.083811044692993,
"logits/rejected": 2.13382887840271,
"logps/chosen": -0.3115028738975525,
"logps/rejected": -1.0105055570602417,
"loss": 1.0092,
"nll_loss": 0.9758760929107666,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03115028701722622,
"rewards/margins": 0.069900281727314,
"rewards/rejected": -0.10105058550834656,
"step": 1385
},
{
"epoch": 1.9260628465804066,
"grad_norm": 2.076587677001953,
"learning_rate": 1.838181818181818e-07,
"log_odds_chosen": 1.193703055381775,
"log_odds_ratio": -0.4486643075942993,
"logits/chosen": 2.1062402725219727,
"logits/rejected": 2.1550328731536865,
"logps/chosen": -0.3409879505634308,
"logps/rejected": -0.8286535739898682,
"loss": 1.0018,
"nll_loss": 0.9569076299667358,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.0340987928211689,
"rewards/margins": 0.048766572028398514,
"rewards/rejected": -0.08286535739898682,
"step": 1390
},
{
"epoch": 1.9329944547134934,
"grad_norm": 2.120288610458374,
"learning_rate": 1.829090909090909e-07,
"log_odds_chosen": 1.4117330312728882,
"log_odds_ratio": -0.3424789011478424,
"logits/chosen": 2.101274013519287,
"logits/rejected": 2.1526596546173096,
"logps/chosen": -0.2649425268173218,
"logps/rejected": -0.7853403091430664,
"loss": 0.9994,
"nll_loss": 0.9651403427124023,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.026494255289435387,
"rewards/margins": 0.05203978344798088,
"rewards/rejected": -0.07853402942419052,
"step": 1395
},
{
"epoch": 1.9399260628465804,
"grad_norm": 2.1944470405578613,
"learning_rate": 1.82e-07,
"log_odds_chosen": 1.5040756464004517,
"log_odds_ratio": -0.37599992752075195,
"logits/chosen": 2.0189223289489746,
"logits/rejected": 2.067805051803589,
"logps/chosen": -0.2981501519680023,
"logps/rejected": -0.9519171118736267,
"loss": 1.0507,
"nll_loss": 1.013134241104126,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.02981501631438732,
"rewards/margins": 0.06537671387195587,
"rewards/rejected": -0.09519171714782715,
"step": 1400
},
{
"epoch": 1.9468576709796674,
"grad_norm": 1.9627952575683594,
"learning_rate": 1.8109090909090906e-07,
"log_odds_chosen": 1.6703484058380127,
"log_odds_ratio": -0.32047805190086365,
"logits/chosen": 2.0558595657348633,
"logits/rejected": 2.126713275909424,
"logps/chosen": -0.2901943325996399,
"logps/rejected": -1.0323206186294556,
"loss": 1.0102,
"nll_loss": 0.9781351089477539,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.029019435867667198,
"rewards/margins": 0.07421263307332993,
"rewards/rejected": -0.10323206335306168,
"step": 1405
},
{
"epoch": 1.9537892791127542,
"grad_norm": 2.921412706375122,
"learning_rate": 1.8018181818181816e-07,
"log_odds_chosen": 1.3125395774841309,
"log_odds_ratio": -0.4437628984451294,
"logits/chosen": 2.1079742908477783,
"logits/rejected": 2.172934055328369,
"logps/chosen": -0.3079548180103302,
"logps/rejected": -0.8908718228340149,
"loss": 1.0198,
"nll_loss": 0.9754597544670105,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.030795477330684662,
"rewards/margins": 0.058291707187891006,
"rewards/rejected": -0.08908718079328537,
"step": 1410
},
{
"epoch": 1.960720887245841,
"grad_norm": 2.2531754970550537,
"learning_rate": 1.7927272727272725e-07,
"log_odds_chosen": 1.4887337684631348,
"log_odds_ratio": -0.3652048408985138,
"logits/chosen": 2.036674976348877,
"logits/rejected": 2.0993988513946533,
"logps/chosen": -0.2855999767780304,
"logps/rejected": -0.8912386298179626,
"loss": 1.0189,
"nll_loss": 0.9823691248893738,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.02855999954044819,
"rewards/margins": 0.06056387349963188,
"rewards/rejected": -0.08912386745214462,
"step": 1415
},
{
"epoch": 1.9676524953789278,
"grad_norm": 2.0116090774536133,
"learning_rate": 1.7836363636363635e-07,
"log_odds_chosen": 1.619387149810791,
"log_odds_ratio": -0.3138326406478882,
"logits/chosen": 2.0645925998687744,
"logits/rejected": 2.129347801208496,
"logps/chosen": -0.2735585570335388,
"logps/rejected": -0.9985336065292358,
"loss": 0.9783,
"nll_loss": 0.94692462682724,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.027355853468179703,
"rewards/margins": 0.07249751687049866,
"rewards/rejected": -0.09985338151454926,
"step": 1420
},
{
"epoch": 1.9745841035120146,
"grad_norm": 3.110692024230957,
"learning_rate": 1.7745454545454545e-07,
"log_odds_chosen": 1.2186717987060547,
"log_odds_ratio": -0.45589005947113037,
"logits/chosen": 2.0935304164886475,
"logits/rejected": 2.1518046855926514,
"logps/chosen": -0.35527312755584717,
"logps/rejected": -0.8807690739631653,
"loss": 1.036,
"nll_loss": 0.9903665781021118,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.035527314990758896,
"rewards/margins": 0.05254959315061569,
"rewards/rejected": -0.08807691931724548,
"step": 1425
},
{
"epoch": 1.9815157116451017,
"grad_norm": 2.146620750427246,
"learning_rate": 1.7654545454545452e-07,
"log_odds_chosen": 1.3884727954864502,
"log_odds_ratio": -0.38641104102134705,
"logits/chosen": 2.083814859390259,
"logits/rejected": 2.1228816509246826,
"logps/chosen": -0.29765060544013977,
"logps/rejected": -0.8775668740272522,
"loss": 1.0353,
"nll_loss": 0.9966583847999573,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.029765058308839798,
"rewards/margins": 0.0579916313290596,
"rewards/rejected": -0.0877566859126091,
"step": 1430
},
{
"epoch": 1.9884473197781887,
"grad_norm": 1.4122179746627808,
"learning_rate": 1.7563636363636362e-07,
"log_odds_chosen": 1.6712970733642578,
"log_odds_ratio": -0.31637993454933167,
"logits/chosen": 2.1619114875793457,
"logits/rejected": 2.231441020965576,
"logps/chosen": -0.2716436982154846,
"logps/rejected": -1.0012226104736328,
"loss": 1.0147,
"nll_loss": 0.9830483794212341,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.027164369821548462,
"rewards/margins": 0.07295789569616318,
"rewards/rejected": -0.10012225806713104,
"step": 1435
},
{
"epoch": 1.9953789279112755,
"grad_norm": 2.2266688346862793,
"learning_rate": 1.7472727272727271e-07,
"log_odds_chosen": 1.3788424730300903,
"log_odds_ratio": -0.3804737329483032,
"logits/chosen": 2.135176181793213,
"logits/rejected": 2.1902530193328857,
"logps/chosen": -0.31672823429107666,
"logps/rejected": -0.9234539866447449,
"loss": 1.0622,
"nll_loss": 1.0241928100585938,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.031672827899456024,
"rewards/margins": 0.0606725737452507,
"rewards/rejected": -0.09234539419412613,
"step": 1440
},
{
"epoch": 2.0013863216266174,
"grad_norm": 3.505758047103882,
"learning_rate": 1.738181818181818e-07,
"log_odds_chosen": 1.3300753831863403,
"log_odds_ratio": -0.41754671931266785,
"logits/chosen": 2.1010961532592773,
"logits/rejected": 2.1418211460113525,
"logps/chosen": -0.28666701912879944,
"logps/rejected": -0.8230305910110474,
"loss": 0.9121,
"nll_loss": 1.00087308883667,
"rewards/accuracies": 0.7884615659713745,
"rewards/chosen": -0.028666695579886436,
"rewards/margins": 0.053636353462934494,
"rewards/rejected": -0.08230306208133698,
"step": 1445
},
{
"epoch": 2.0083179297597042,
"grad_norm": 2.070633888244629,
"learning_rate": 1.7290909090909088e-07,
"log_odds_chosen": 1.7298386096954346,
"log_odds_ratio": -0.31547990441322327,
"logits/chosen": 2.099557399749756,
"logits/rejected": 2.149728536605835,
"logps/chosen": -0.2745874524116516,
"logps/rejected": -1.0096734762191772,
"loss": 1.045,
"nll_loss": 1.0134097337722778,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02745874598622322,
"rewards/margins": 0.0735086053609848,
"rewards/rejected": -0.10096735507249832,
"step": 1450
},
{
"epoch": 2.015249537892791,
"grad_norm": 2.7808001041412354,
"learning_rate": 1.7199999999999998e-07,
"log_odds_chosen": 1.4617559909820557,
"log_odds_ratio": -0.32592448592185974,
"logits/chosen": 2.0248734951019287,
"logits/rejected": 2.08671236038208,
"logps/chosen": -0.24415723979473114,
"logps/rejected": -0.8095900416374207,
"loss": 1.0132,
"nll_loss": 0.980653703212738,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.024415725842118263,
"rewards/margins": 0.05654327571392059,
"rewards/rejected": -0.08095899969339371,
"step": 1455
},
{
"epoch": 2.022181146025878,
"grad_norm": 2.6555349826812744,
"learning_rate": 1.7109090909090908e-07,
"log_odds_chosen": 1.5638434886932373,
"log_odds_ratio": -0.36828985810279846,
"logits/chosen": 2.0425848960876465,
"logits/rejected": 2.0981991291046143,
"logps/chosen": -0.2785496115684509,
"logps/rejected": -0.9188894629478455,
"loss": 1.0393,
"nll_loss": 1.0024975538253784,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.027854960411787033,
"rewards/margins": 0.06403397768735886,
"rewards/rejected": -0.09188893437385559,
"step": 1460
},
{
"epoch": 2.029112754158965,
"grad_norm": 2.058720588684082,
"learning_rate": 1.7018181818181817e-07,
"log_odds_chosen": 1.749341368675232,
"log_odds_ratio": -0.3121403753757477,
"logits/chosen": 2.096733331680298,
"logits/rejected": 2.1691980361938477,
"logps/chosen": -0.2867683470249176,
"logps/rejected": -1.0571597814559937,
"loss": 1.0299,
"nll_loss": 0.9987198114395142,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.02867683582007885,
"rewards/margins": 0.07703914493322372,
"rewards/rejected": -0.10571598261594772,
"step": 1465
},
{
"epoch": 2.036044362292052,
"grad_norm": 2.216594934463501,
"learning_rate": 1.6927272727272724e-07,
"log_odds_chosen": 1.6534370183944702,
"log_odds_ratio": -0.3209837079048157,
"logits/chosen": 2.0079751014709473,
"logits/rejected": 2.092797040939331,
"logps/chosen": -0.26305797696113586,
"logps/rejected": -0.9659760594367981,
"loss": 1.0213,
"nll_loss": 0.989173173904419,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.026305796578526497,
"rewards/margins": 0.07029180228710175,
"rewards/rejected": -0.09659762680530548,
"step": 1470
},
{
"epoch": 2.0429759704251387,
"grad_norm": 2.0765326023101807,
"learning_rate": 1.6836363636363634e-07,
"log_odds_chosen": 1.5068204402923584,
"log_odds_ratio": -0.3684650659561157,
"logits/chosen": 2.1914827823638916,
"logits/rejected": 2.2479913234710693,
"logps/chosen": -0.3309532403945923,
"logps/rejected": -1.0164872407913208,
"loss": 1.0249,
"nll_loss": 0.9880392551422119,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.03309532627463341,
"rewards/margins": 0.06855340301990509,
"rewards/rejected": -0.1016487255692482,
"step": 1475
},
{
"epoch": 2.0499075785582255,
"grad_norm": 2.91998028755188,
"learning_rate": 1.6745454545454544e-07,
"log_odds_chosen": 1.4909127950668335,
"log_odds_ratio": -0.38384488224983215,
"logits/chosen": 2.0744376182556152,
"logits/rejected": 2.132093906402588,
"logps/chosen": -0.30625709891319275,
"logps/rejected": -0.9366697072982788,
"loss": 1.0368,
"nll_loss": 0.9983736872673035,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.030625708401203156,
"rewards/margins": 0.06304127722978592,
"rewards/rejected": -0.09366698563098907,
"step": 1480
},
{
"epoch": 2.0568391866913123,
"grad_norm": 2.1422932147979736,
"learning_rate": 1.6654545454545454e-07,
"log_odds_chosen": 1.6511032581329346,
"log_odds_ratio": -0.3445586562156677,
"logits/chosen": 2.038560152053833,
"logits/rejected": 2.1156678199768066,
"logps/chosen": -0.3046126961708069,
"logps/rejected": -1.0538097620010376,
"loss": 1.0316,
"nll_loss": 0.9971045255661011,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.030461272224783897,
"rewards/margins": 0.07491971552371979,
"rewards/rejected": -0.10538098216056824,
"step": 1485
},
{
"epoch": 2.063770794824399,
"grad_norm": 2.381495475769043,
"learning_rate": 1.656363636363636e-07,
"log_odds_chosen": 1.7995991706848145,
"log_odds_ratio": -0.27245932817459106,
"logits/chosen": 2.140026569366455,
"logits/rejected": 2.1896448135375977,
"logps/chosen": -0.25530093908309937,
"logps/rejected": -1.0938752889633179,
"loss": 1.0032,
"nll_loss": 0.9759642481803894,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.025530096143484116,
"rewards/margins": 0.08385743200778961,
"rewards/rejected": -0.10938753187656403,
"step": 1490
},
{
"epoch": 2.0707024029574863,
"grad_norm": 1.4904897212982178,
"learning_rate": 1.647272727272727e-07,
"log_odds_chosen": 1.5017544031143188,
"log_odds_ratio": -0.35147780179977417,
"logits/chosen": 2.0568175315856934,
"logits/rejected": 2.1001527309417725,
"logps/chosen": -0.2676287889480591,
"logps/rejected": -0.9620057344436646,
"loss": 1.0647,
"nll_loss": 1.0295929908752441,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.026762880384922028,
"rewards/margins": 0.06943770498037338,
"rewards/rejected": -0.09620057791471481,
"step": 1495
},
{
"epoch": 2.077634011090573,
"grad_norm": 2.361671209335327,
"learning_rate": 1.638181818181818e-07,
"log_odds_chosen": 1.625975489616394,
"log_odds_ratio": -0.323038786649704,
"logits/chosen": 2.047917366027832,
"logits/rejected": 2.1029675006866455,
"logps/chosen": -0.2699020802974701,
"logps/rejected": -0.9326964020729065,
"loss": 1.0517,
"nll_loss": 1.0193482637405396,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02699020877480507,
"rewards/margins": 0.06627943366765976,
"rewards/rejected": -0.09326963871717453,
"step": 1500
},
{
"epoch": 2.08456561922366,
"grad_norm": 1.3164469003677368,
"learning_rate": 1.629090909090909e-07,
"log_odds_chosen": 1.7589662075042725,
"log_odds_ratio": -0.2796500623226166,
"logits/chosen": 2.0355899333953857,
"logits/rejected": 2.1100242137908936,
"logps/chosen": -0.24932418763637543,
"logps/rejected": -0.9945331811904907,
"loss": 0.9908,
"nll_loss": 0.9628455638885498,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.024932416155934334,
"rewards/margins": 0.07452090084552765,
"rewards/rejected": -0.09945331513881683,
"step": 1505
},
{
"epoch": 2.0914972273567467,
"grad_norm": 2.498619318008423,
"learning_rate": 1.62e-07,
"log_odds_chosen": 1.4863014221191406,
"log_odds_ratio": -0.40170204639434814,
"logits/chosen": 2.0163559913635254,
"logits/rejected": 2.0721843242645264,
"logps/chosen": -0.2834693491458893,
"logps/rejected": -0.9046918153762817,
"loss": 1.0191,
"nll_loss": 0.9789711833000183,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.028346937149763107,
"rewards/margins": 0.06212225183844566,
"rewards/rejected": -0.09046918898820877,
"step": 1510
},
{
"epoch": 2.0984288354898335,
"grad_norm": 1.4074289798736572,
"learning_rate": 1.6109090909090907e-07,
"log_odds_chosen": 1.654536485671997,
"log_odds_ratio": -0.34274205565452576,
"logits/chosen": 2.0640828609466553,
"logits/rejected": 2.1356544494628906,
"logps/chosen": -0.2802557945251465,
"logps/rejected": -1.0241177082061768,
"loss": 1.0183,
"nll_loss": 0.98404860496521,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.02802557870745659,
"rewards/margins": 0.07438618689775467,
"rewards/rejected": -0.10241175442934036,
"step": 1515
},
{
"epoch": 2.1053604436229207,
"grad_norm": 1.9247922897338867,
"learning_rate": 1.6018181818181816e-07,
"log_odds_chosen": 1.4689146280288696,
"log_odds_ratio": -0.3932505249977112,
"logits/chosen": 2.040996789932251,
"logits/rejected": 2.0966296195983887,
"logps/chosen": -0.2887535095214844,
"logps/rejected": -0.8648843765258789,
"loss": 1.0325,
"nll_loss": 0.993179440498352,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02887534908950329,
"rewards/margins": 0.05761308595538139,
"rewards/rejected": -0.08648844808340073,
"step": 1520
},
{
"epoch": 2.1122920517560075,
"grad_norm": 2.162506103515625,
"learning_rate": 1.5927272727272726e-07,
"log_odds_chosen": 1.542865514755249,
"log_odds_ratio": -0.32878366112709045,
"logits/chosen": 2.064614772796631,
"logits/rejected": 2.1328229904174805,
"logps/chosen": -0.2473837435245514,
"logps/rejected": -0.8912278413772583,
"loss": 1.0342,
"nll_loss": 1.001328706741333,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.02473837323486805,
"rewards/margins": 0.06438441574573517,
"rewards/rejected": -0.08912278711795807,
"step": 1525
},
{
"epoch": 2.1192236598890943,
"grad_norm": 2.9879612922668457,
"learning_rate": 1.5836363636363636e-07,
"log_odds_chosen": 1.6084933280944824,
"log_odds_ratio": -0.3255232870578766,
"logits/chosen": 2.105452537536621,
"logits/rejected": 2.161074638366699,
"logps/chosen": -0.3203544020652771,
"logps/rejected": -1.0134159326553345,
"loss": 1.0447,
"nll_loss": 1.0121761560440063,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.03203544020652771,
"rewards/margins": 0.0693061575293541,
"rewards/rejected": -0.1013416051864624,
"step": 1530
},
{
"epoch": 2.126155268022181,
"grad_norm": 3.2565925121307373,
"learning_rate": 1.5745454545454543e-07,
"log_odds_chosen": 1.522952675819397,
"log_odds_ratio": -0.3534841239452362,
"logits/chosen": 2.0590784549713135,
"logits/rejected": 2.1140329837799072,
"logps/chosen": -0.26037055253982544,
"logps/rejected": -0.8886002898216248,
"loss": 0.986,
"nll_loss": 0.9506571888923645,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.026037057861685753,
"rewards/margins": 0.06282297521829605,
"rewards/rejected": -0.08886002749204636,
"step": 1535
},
{
"epoch": 2.133086876155268,
"grad_norm": 2.2923009395599365,
"learning_rate": 1.5654545454545453e-07,
"log_odds_chosen": 1.7157777547836304,
"log_odds_ratio": -0.31327977776527405,
"logits/chosen": 2.0170087814331055,
"logits/rejected": 2.061363935470581,
"logps/chosen": -0.2875458300113678,
"logps/rejected": -1.1021568775177002,
"loss": 1.0536,
"nll_loss": 1.022287130355835,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.02875458262860775,
"rewards/margins": 0.08146108686923981,
"rewards/rejected": -0.1102156713604927,
"step": 1540
},
{
"epoch": 2.1400184842883547,
"grad_norm": 4.337259769439697,
"learning_rate": 1.5563636363636362e-07,
"log_odds_chosen": 1.6073641777038574,
"log_odds_ratio": -0.32192662358283997,
"logits/chosen": 2.0638022422790527,
"logits/rejected": 2.128871440887451,
"logps/chosen": -0.24944542348384857,
"logps/rejected": -0.8842807412147522,
"loss": 0.9852,
"nll_loss": 0.9530341625213623,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.024944543838500977,
"rewards/margins": 0.06348354369401932,
"rewards/rejected": -0.0884280651807785,
"step": 1545
},
{
"epoch": 2.146950092421442,
"grad_norm": 1.788140058517456,
"learning_rate": 1.5472727272727272e-07,
"log_odds_chosen": 1.9304050207138062,
"log_odds_ratio": -0.2865446209907532,
"logits/chosen": 2.093130588531494,
"logits/rejected": 2.1594488620758057,
"logps/chosen": -0.26757973432540894,
"logps/rejected": -1.1472647190093994,
"loss": 1.032,
"nll_loss": 1.003312349319458,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.026757972314953804,
"rewards/margins": 0.08796848356723785,
"rewards/rejected": -0.1147264614701271,
"step": 1550
},
{
"epoch": 2.1538817005545288,
"grad_norm": 2.651644706726074,
"learning_rate": 1.538181818181818e-07,
"log_odds_chosen": 1.6535481214523315,
"log_odds_ratio": -0.34444233775138855,
"logits/chosen": 2.0424141883850098,
"logits/rejected": 2.0861897468566895,
"logps/chosen": -0.24586957693099976,
"logps/rejected": -0.9815968871116638,
"loss": 1.0189,
"nll_loss": 0.9844585061073303,
"rewards/accuracies": 0.7833333611488342,
"rewards/chosen": -0.024586956948041916,
"rewards/margins": 0.07357273995876312,
"rewards/rejected": -0.09815969318151474,
"step": 1555
},
{
"epoch": 2.1608133086876156,
"grad_norm": 1.644753336906433,
"learning_rate": 1.529090909090909e-07,
"log_odds_chosen": 1.670788049697876,
"log_odds_ratio": -0.32996612787246704,
"logits/chosen": 2.052905321121216,
"logits/rejected": 2.1133363246917725,
"logps/chosen": -0.2544824182987213,
"logps/rejected": -0.9820945858955383,
"loss": 1.0313,
"nll_loss": 0.9983068704605103,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.02544824406504631,
"rewards/margins": 0.07276120781898499,
"rewards/rejected": -0.09820946305990219,
"step": 1560
},
{
"epoch": 2.1677449168207024,
"grad_norm": 6.568633079528809,
"learning_rate": 1.5199999999999998e-07,
"log_odds_chosen": 1.5977410078048706,
"log_odds_ratio": -0.3366919457912445,
"logits/chosen": 1.9728270769119263,
"logits/rejected": 2.0407023429870605,
"logps/chosen": -0.28399333357810974,
"logps/rejected": -0.991985559463501,
"loss": 0.9979,
"nll_loss": 0.9642470479011536,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.028399331495165825,
"rewards/margins": 0.07079920917749405,
"rewards/rejected": -0.09919854253530502,
"step": 1565
},
{
"epoch": 2.174676524953789,
"grad_norm": 1.5433924198150635,
"learning_rate": 1.5109090909090908e-07,
"log_odds_chosen": 1.749664306640625,
"log_odds_ratio": -0.3313737213611603,
"logits/chosen": 2.0340335369110107,
"logits/rejected": 2.1193594932556152,
"logps/chosen": -0.28188708424568176,
"logps/rejected": -1.0578782558441162,
"loss": 1.0097,
"nll_loss": 0.9766021370887756,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.028188709169626236,
"rewards/margins": 0.07759912312030792,
"rewards/rejected": -0.10578783601522446,
"step": 1570
},
{
"epoch": 2.181608133086876,
"grad_norm": 2.6189091205596924,
"learning_rate": 1.5018181818181815e-07,
"log_odds_chosen": 1.6792007684707642,
"log_odds_ratio": -0.3193473219871521,
"logits/chosen": 2.0829341411590576,
"logits/rejected": 2.135345697402954,
"logps/chosen": -0.28347066044807434,
"logps/rejected": -1.055553913116455,
"loss": 1.0235,
"nll_loss": 0.9915785193443298,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.028347067534923553,
"rewards/margins": 0.07720831781625748,
"rewards/rejected": -0.10555538535118103,
"step": 1575
},
{
"epoch": 2.188539741219963,
"grad_norm": 3.955164670944214,
"learning_rate": 1.4927272727272725e-07,
"log_odds_chosen": 1.7316787242889404,
"log_odds_ratio": -0.34139496088027954,
"logits/chosen": 2.0785231590270996,
"logits/rejected": 2.1350691318511963,
"logps/chosen": -0.3276236951351166,
"logps/rejected": -1.071801781654358,
"loss": 1.0001,
"nll_loss": 0.9659791588783264,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.03276237100362778,
"rewards/margins": 0.07441780716180801,
"rewards/rejected": -0.10718018561601639,
"step": 1580
},
{
"epoch": 2.19547134935305,
"grad_norm": 2.1189846992492676,
"learning_rate": 1.4836363636363635e-07,
"log_odds_chosen": 1.6613436937332153,
"log_odds_ratio": -0.30669593811035156,
"logits/chosen": 2.053230047225952,
"logits/rejected": 2.096280097961426,
"logps/chosen": -0.29614728689193726,
"logps/rejected": -0.9985705018043518,
"loss": 1.0128,
"nll_loss": 0.9821043610572815,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.029614729806780815,
"rewards/margins": 0.07024230808019638,
"rewards/rejected": -0.09985704720020294,
"step": 1585
},
{
"epoch": 2.202402957486137,
"grad_norm": 2.127919912338257,
"learning_rate": 1.4745454545454544e-07,
"log_odds_chosen": 1.785252332687378,
"log_odds_ratio": -0.3034234941005707,
"logits/chosen": 2.0558953285217285,
"logits/rejected": 2.1106560230255127,
"logps/chosen": -0.24885861575603485,
"logps/rejected": -1.0158021450042725,
"loss": 0.9837,
"nll_loss": 0.9533202648162842,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.024885861203074455,
"rewards/margins": 0.07669434696435928,
"rewards/rejected": -0.10158021748065948,
"step": 1590
},
{
"epoch": 2.2093345656192236,
"grad_norm": 1.5249942541122437,
"learning_rate": 1.4654545454545454e-07,
"log_odds_chosen": 1.6279139518737793,
"log_odds_ratio": -0.318685919046402,
"logits/chosen": 1.9436652660369873,
"logits/rejected": 2.0145747661590576,
"logps/chosen": -0.2697173058986664,
"logps/rejected": -0.9779646992683411,
"loss": 1.0295,
"nll_loss": 0.9976499080657959,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02697172947227955,
"rewards/margins": 0.07082473486661911,
"rewards/rejected": -0.0977964699268341,
"step": 1595
},
{
"epoch": 2.2162661737523104,
"grad_norm": 1.8669286966323853,
"learning_rate": 1.456363636363636e-07,
"log_odds_chosen": 1.5612767934799194,
"log_odds_ratio": -0.33474841713905334,
"logits/chosen": 2.109074831008911,
"logits/rejected": 2.1647109985351562,
"logps/chosen": -0.3184746503829956,
"logps/rejected": -0.9675430655479431,
"loss": 1.0129,
"nll_loss": 0.9794076681137085,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.03184746578335762,
"rewards/margins": 0.06490684300661087,
"rewards/rejected": -0.09675431996583939,
"step": 1600
},
{
"epoch": 2.223197781885397,
"grad_norm": 3.105397939682007,
"learning_rate": 1.447272727272727e-07,
"log_odds_chosen": 1.330115556716919,
"log_odds_ratio": -0.4384697377681732,
"logits/chosen": 1.977685809135437,
"logits/rejected": 2.0224320888519287,
"logps/chosen": -0.32018712162971497,
"logps/rejected": -0.841995894908905,
"loss": 0.9974,
"nll_loss": 0.9535176157951355,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.03201870992779732,
"rewards/margins": 0.05218088626861572,
"rewards/rejected": -0.08419959247112274,
"step": 1605
},
{
"epoch": 2.2301293900184844,
"grad_norm": 2.435016393661499,
"learning_rate": 1.438181818181818e-07,
"log_odds_chosen": 1.5461903810501099,
"log_odds_ratio": -0.37018927931785583,
"logits/chosen": 2.003584146499634,
"logits/rejected": 2.068721294403076,
"logps/chosen": -0.2579006850719452,
"logps/rejected": -0.9120422601699829,
"loss": 0.9777,
"nll_loss": 0.9407215118408203,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.025790071114897728,
"rewards/margins": 0.06541414558887482,
"rewards/rejected": -0.0912042185664177,
"step": 1610
},
{
"epoch": 2.2370609981515712,
"grad_norm": 2.2165729999542236,
"learning_rate": 1.429090909090909e-07,
"log_odds_chosen": 1.4898757934570312,
"log_odds_ratio": -0.36625441908836365,
"logits/chosen": 2.0889649391174316,
"logits/rejected": 2.1458566188812256,
"logps/chosen": -0.2834877669811249,
"logps/rejected": -0.8642836809158325,
"loss": 1.0307,
"nll_loss": 0.994057834148407,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.028348777443170547,
"rewards/margins": 0.05807959660887718,
"rewards/rejected": -0.08642836660146713,
"step": 1615
},
{
"epoch": 2.243992606284658,
"grad_norm": 2.29976224899292,
"learning_rate": 1.4199999999999997e-07,
"log_odds_chosen": 1.654101848602295,
"log_odds_ratio": -0.3310154378414154,
"logits/chosen": 1.9520740509033203,
"logits/rejected": 1.9996510744094849,
"logps/chosen": -0.29835817217826843,
"logps/rejected": -1.0397998094558716,
"loss": 1.051,
"nll_loss": 1.0178704261779785,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.02983582206070423,
"rewards/margins": 0.074144147336483,
"rewards/rejected": -0.10397996753454208,
"step": 1620
},
{
"epoch": 2.250924214417745,
"grad_norm": 2.7785146236419678,
"learning_rate": 1.4109090909090907e-07,
"log_odds_chosen": 1.701553225517273,
"log_odds_ratio": -0.27737200260162354,
"logits/chosen": 2.0182878971099854,
"logits/rejected": 2.088885545730591,
"logps/chosen": -0.2814914286136627,
"logps/rejected": -1.0081548690795898,
"loss": 1.0094,
"nll_loss": 0.9816693067550659,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.02814914472401142,
"rewards/margins": 0.07266633957624435,
"rewards/rejected": -0.10081546753644943,
"step": 1625
},
{
"epoch": 2.2578558225508316,
"grad_norm": 1.6091256141662598,
"learning_rate": 1.4018181818181817e-07,
"log_odds_chosen": 1.6265534162521362,
"log_odds_ratio": -0.3619597852230072,
"logits/chosen": 1.9786964654922485,
"logits/rejected": 2.02095365524292,
"logps/chosen": -0.2642413377761841,
"logps/rejected": -0.998843252658844,
"loss": 1.0237,
"nll_loss": 0.9874651432037354,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.026424136012792587,
"rewards/margins": 0.07346019893884659,
"rewards/rejected": -0.09988433867692947,
"step": 1630
},
{
"epoch": 2.264787430683919,
"grad_norm": 2.9606359004974365,
"learning_rate": 1.3927272727272727e-07,
"log_odds_chosen": 1.4278662204742432,
"log_odds_ratio": -0.37395817041397095,
"logits/chosen": 2.1129722595214844,
"logits/rejected": 2.1692874431610107,
"logps/chosen": -0.30893474817276,
"logps/rejected": -0.838131308555603,
"loss": 1.0075,
"nll_loss": 0.9701253771781921,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.03089348040521145,
"rewards/margins": 0.052919652312994,
"rewards/rejected": -0.0838131383061409,
"step": 1635
},
{
"epoch": 2.2717190388170057,
"grad_norm": 2.276667356491089,
"learning_rate": 1.3836363636363634e-07,
"log_odds_chosen": 1.5527472496032715,
"log_odds_ratio": -0.3658043444156647,
"logits/chosen": 2.067870616912842,
"logits/rejected": 2.1205027103424072,
"logps/chosen": -0.2891118824481964,
"logps/rejected": -0.9314442873001099,
"loss": 1.0232,
"nll_loss": 0.9866386651992798,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02891119197010994,
"rewards/margins": 0.06423323601484299,
"rewards/rejected": -0.09314444661140442,
"step": 1640
},
{
"epoch": 2.2786506469500925,
"grad_norm": 1.4873467683792114,
"learning_rate": 1.3745454545454543e-07,
"log_odds_chosen": 1.851697564125061,
"log_odds_ratio": -0.3145390450954437,
"logits/chosen": 2.075488567352295,
"logits/rejected": 2.1666271686553955,
"logps/chosen": -0.2427607625722885,
"logps/rejected": -1.01997709274292,
"loss": 0.9927,
"nll_loss": 0.9612413048744202,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.02427607588469982,
"rewards/margins": 0.07772162556648254,
"rewards/rejected": -0.10199771821498871,
"step": 1645
},
{
"epoch": 2.2855822550831792,
"grad_norm": 2.837634325027466,
"learning_rate": 1.3654545454545453e-07,
"log_odds_chosen": 1.7820488214492798,
"log_odds_ratio": -0.31967195868492126,
"logits/chosen": 2.0722880363464355,
"logits/rejected": 2.1255042552948,
"logps/chosen": -0.3070850968360901,
"logps/rejected": -1.0848478078842163,
"loss": 1.0299,
"nll_loss": 0.9979235529899597,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.030708512291312218,
"rewards/margins": 0.07777624577283859,
"rewards/rejected": -0.10848478227853775,
"step": 1650
},
{
"epoch": 2.292513863216266,
"grad_norm": 1.572646975517273,
"learning_rate": 1.3563636363636363e-07,
"log_odds_chosen": 1.8489151000976562,
"log_odds_ratio": -0.2767617404460907,
"logits/chosen": 2.077411413192749,
"logits/rejected": 2.1307966709136963,
"logps/chosen": -0.2723609209060669,
"logps/rejected": -1.0447889566421509,
"loss": 0.9829,
"nll_loss": 0.9551786184310913,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.0272360946983099,
"rewards/margins": 0.07724279910326004,
"rewards/rejected": -0.10447890311479568,
"step": 1655
},
{
"epoch": 2.299445471349353,
"grad_norm": 2.2417216300964355,
"learning_rate": 1.347272727272727e-07,
"log_odds_chosen": 1.674991488456726,
"log_odds_ratio": -0.30424702167510986,
"logits/chosen": 1.9423750638961792,
"logits/rejected": 2.0181448459625244,
"logps/chosen": -0.26656848192214966,
"logps/rejected": -0.947805643081665,
"loss": 1.0604,
"nll_loss": 1.0300124883651733,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.026656849309802055,
"rewards/margins": 0.0681237280368805,
"rewards/rejected": -0.0947805717587471,
"step": 1660
},
{
"epoch": 2.30637707948244,
"grad_norm": 2.700683355331421,
"learning_rate": 1.338181818181818e-07,
"log_odds_chosen": 1.6018115282058716,
"log_odds_ratio": -0.32080212235450745,
"logits/chosen": 2.066725969314575,
"logits/rejected": 2.1303725242614746,
"logps/chosen": -0.25507083535194397,
"logps/rejected": -0.9004265069961548,
"loss": 0.9929,
"nll_loss": 0.9608381390571594,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.025507085025310516,
"rewards/margins": 0.06453555822372437,
"rewards/rejected": -0.09004264324903488,
"step": 1665
},
{
"epoch": 2.313308687615527,
"grad_norm": 2.3125734329223633,
"learning_rate": 1.329090909090909e-07,
"log_odds_chosen": 1.7538138628005981,
"log_odds_ratio": -0.30055293440818787,
"logits/chosen": 2.082395315170288,
"logits/rejected": 2.138796091079712,
"logps/chosen": -0.32292428612709045,
"logps/rejected": -1.132580280303955,
"loss": 1.0183,
"nll_loss": 0.9882605671882629,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.032292433083057404,
"rewards/margins": 0.08096561580896378,
"rewards/rejected": -0.11325804144144058,
"step": 1670
},
{
"epoch": 2.3202402957486137,
"grad_norm": 2.303008794784546,
"learning_rate": 1.32e-07,
"log_odds_chosen": 1.66550874710083,
"log_odds_ratio": -0.32220420241355896,
"logits/chosen": 2.0094358921051025,
"logits/rejected": 2.0630240440368652,
"logps/chosen": -0.28716641664505005,
"logps/rejected": -1.0477604866027832,
"loss": 1.0112,
"nll_loss": 0.9789758324623108,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.028716640546917915,
"rewards/margins": 0.07605940848588943,
"rewards/rejected": -0.1047760397195816,
"step": 1675
},
{
"epoch": 2.3271719038817005,
"grad_norm": 3.235823631286621,
"learning_rate": 1.3109090909090906e-07,
"log_odds_chosen": 1.7851775884628296,
"log_odds_ratio": -0.29265934228897095,
"logits/chosen": 2.0131328105926514,
"logits/rejected": 2.072057008743286,
"logps/chosen": -0.30348002910614014,
"logps/rejected": -1.1123188734054565,
"loss": 1.0679,
"nll_loss": 1.0386704206466675,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.030348004773259163,
"rewards/margins": 0.08088389039039612,
"rewards/rejected": -0.11123190075159073,
"step": 1680
},
{
"epoch": 2.3341035120147873,
"grad_norm": 2.9141244888305664,
"learning_rate": 1.3018181818181816e-07,
"log_odds_chosen": 1.7565422058105469,
"log_odds_ratio": -0.3614824414253235,
"logits/chosen": 2.1100566387176514,
"logits/rejected": 2.167602777481079,
"logps/chosen": -0.3503415882587433,
"logps/rejected": -1.2140240669250488,
"loss": 1.0574,
"nll_loss": 1.021295428276062,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.03503415733575821,
"rewards/margins": 0.08636824786663055,
"rewards/rejected": -0.12140240520238876,
"step": 1685
},
{
"epoch": 2.3410351201478745,
"grad_norm": 2.8215250968933105,
"learning_rate": 1.2927272727272726e-07,
"log_odds_chosen": 1.7545371055603027,
"log_odds_ratio": -0.30033814907073975,
"logits/chosen": 1.996227741241455,
"logits/rejected": 2.0622332096099854,
"logps/chosen": -0.2831988036632538,
"logps/rejected": -1.0355448722839355,
"loss": 0.9776,
"nll_loss": 0.9475898742675781,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.02831987477838993,
"rewards/margins": 0.07523461431264877,
"rewards/rejected": -0.10355449467897415,
"step": 1690
},
{
"epoch": 2.3479667282809613,
"grad_norm": 1.334836721420288,
"learning_rate": 1.2836363636363635e-07,
"log_odds_chosen": 1.8632985353469849,
"log_odds_ratio": -0.23800452053546906,
"logits/chosen": 2.006606101989746,
"logits/rejected": 2.080357789993286,
"logps/chosen": -0.2609540820121765,
"logps/rejected": -1.0780720710754395,
"loss": 0.996,
"nll_loss": 0.9722317457199097,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.02609540894627571,
"rewards/margins": 0.0817117914557457,
"rewards/rejected": -0.1078072041273117,
"step": 1695
},
{
"epoch": 2.354898336414048,
"grad_norm": 2.684661626815796,
"learning_rate": 1.2745454545454545e-07,
"log_odds_chosen": 1.8242162466049194,
"log_odds_ratio": -0.2849119305610657,
"logits/chosen": 1.953218698501587,
"logits/rejected": 2.0185937881469727,
"logps/chosen": -0.25621822476387024,
"logps/rejected": -1.050632357597351,
"loss": 0.9865,
"nll_loss": 0.9579840898513794,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.025621820241212845,
"rewards/margins": 0.07944142073392868,
"rewards/rejected": -0.10506324470043182,
"step": 1700
},
{
"epoch": 2.361829944547135,
"grad_norm": 1.7048418521881104,
"learning_rate": 1.2654545454545452e-07,
"log_odds_chosen": 1.9969215393066406,
"log_odds_ratio": -0.25461483001708984,
"logits/chosen": 1.9886645078659058,
"logits/rejected": 2.0469629764556885,
"logps/chosen": -0.2665863335132599,
"logps/rejected": -1.1525532007217407,
"loss": 1.0313,
"nll_loss": 1.0058410167694092,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.02665863186120987,
"rewards/margins": 0.08859668672084808,
"rewards/rejected": -0.11525531858205795,
"step": 1705
},
{
"epoch": 2.3687615526802217,
"grad_norm": 2.232328414916992,
"learning_rate": 1.2563636363636362e-07,
"log_odds_chosen": 1.9676387310028076,
"log_odds_ratio": -0.2715602517127991,
"logits/chosen": 1.936298131942749,
"logits/rejected": 1.9897048473358154,
"logps/chosen": -0.2537732422351837,
"logps/rejected": -1.1118028163909912,
"loss": 1.0124,
"nll_loss": 0.9852831363677979,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.025377323850989342,
"rewards/margins": 0.08580294251441956,
"rewards/rejected": -0.11118026822805405,
"step": 1710
},
{
"epoch": 2.3756931608133085,
"grad_norm": 3.159862518310547,
"learning_rate": 1.2472727272727272e-07,
"log_odds_chosen": 1.9476854801177979,
"log_odds_ratio": -0.2814669609069824,
"logits/chosen": 1.9453091621398926,
"logits/rejected": 2.018049478530884,
"logps/chosen": -0.2795754075050354,
"logps/rejected": -1.1083685159683228,
"loss": 1.0349,
"nll_loss": 1.0067722797393799,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.02795754000544548,
"rewards/margins": 0.08287932723760605,
"rewards/rejected": -0.11083687096834183,
"step": 1715
},
{
"epoch": 2.3826247689463957,
"grad_norm": 2.8082034587860107,
"learning_rate": 1.238181818181818e-07,
"log_odds_chosen": 1.8380389213562012,
"log_odds_ratio": -0.3166138231754303,
"logits/chosen": 2.0408785343170166,
"logits/rejected": 2.11739182472229,
"logps/chosen": -0.28606662154197693,
"logps/rejected": -1.1519672870635986,
"loss": 1.0073,
"nll_loss": 0.9756883978843689,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.02860666997730732,
"rewards/margins": 0.08659005165100098,
"rewards/rejected": -0.11519671976566315,
"step": 1720
},
{
"epoch": 2.3895563770794825,
"grad_norm": 2.500714063644409,
"learning_rate": 1.2290909090909088e-07,
"log_odds_chosen": 1.6278916597366333,
"log_odds_ratio": -0.3502149283885956,
"logits/chosen": 2.0757570266723633,
"logits/rejected": 2.1220290660858154,
"logps/chosen": -0.31332165002822876,
"logps/rejected": -1.052827000617981,
"loss": 1.003,
"nll_loss": 0.9680219888687134,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.031332165002822876,
"rewards/margins": 0.07395053654909134,
"rewards/rejected": -0.10528270155191422,
"step": 1725
},
{
"epoch": 2.3964879852125693,
"grad_norm": 2.743722915649414,
"learning_rate": 1.2199999999999998e-07,
"log_odds_chosen": 1.4381171464920044,
"log_odds_ratio": -0.43818411231040955,
"logits/chosen": 2.0334222316741943,
"logits/rejected": 2.081953287124634,
"logps/chosen": -0.3282889723777771,
"logps/rejected": -0.9676831364631653,
"loss": 1.0602,
"nll_loss": 1.0163487195968628,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.03282889723777771,
"rewards/margins": 0.0639394223690033,
"rewards/rejected": -0.0967683270573616,
"step": 1730
},
{
"epoch": 2.403419593345656,
"grad_norm": 2.034834146499634,
"learning_rate": 1.2109090909090908e-07,
"log_odds_chosen": 1.7440353631973267,
"log_odds_ratio": -0.2908443510532379,
"logits/chosen": 2.1094157695770264,
"logits/rejected": 2.1522154808044434,
"logps/chosen": -0.2517443597316742,
"logps/rejected": -0.9762896299362183,
"loss": 0.9974,
"nll_loss": 0.9682726263999939,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.02517443709075451,
"rewards/margins": 0.0724545270204544,
"rewards/rejected": -0.09762895852327347,
"step": 1735
},
{
"epoch": 2.410351201478743,
"grad_norm": 4.332306861877441,
"learning_rate": 1.2018181818181818e-07,
"log_odds_chosen": 1.5381571054458618,
"log_odds_ratio": -0.3476658761501312,
"logits/chosen": 2.057114362716675,
"logits/rejected": 2.090787410736084,
"logps/chosen": -0.276753693819046,
"logps/rejected": -0.9296085834503174,
"loss": 1.0436,
"nll_loss": 1.0088648796081543,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.027675366029143333,
"rewards/margins": 0.06528548896312714,
"rewards/rejected": -0.09296084940433502,
"step": 1740
},
{
"epoch": 2.41728280961183,
"grad_norm": 3.1289680004119873,
"learning_rate": 1.1927272727272725e-07,
"log_odds_chosen": 1.5321450233459473,
"log_odds_ratio": -0.3635505437850952,
"logits/chosen": 2.098928928375244,
"logits/rejected": 2.1394617557525635,
"logps/chosen": -0.32607170939445496,
"logps/rejected": -1.0010368824005127,
"loss": 1.0101,
"nll_loss": 0.973716676235199,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.032607175409793854,
"rewards/margins": 0.06749651581048965,
"rewards/rejected": -0.10010368376970291,
"step": 1745
},
{
"epoch": 2.424214417744917,
"grad_norm": 2.292997360229492,
"learning_rate": 1.1836363636363636e-07,
"log_odds_chosen": 1.6794272661209106,
"log_odds_ratio": -0.3062840700149536,
"logits/chosen": 1.9977962970733643,
"logits/rejected": 2.0764520168304443,
"logps/chosen": -0.24340610206127167,
"logps/rejected": -0.9343698620796204,
"loss": 0.9467,
"nll_loss": 0.9160255193710327,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.024340612813830376,
"rewards/margins": 0.0690963938832283,
"rewards/rejected": -0.09343700110912323,
"step": 1750
},
{
"epoch": 2.4311460258780038,
"grad_norm": 2.060739517211914,
"learning_rate": 1.1745454545454545e-07,
"log_odds_chosen": 1.7372018098831177,
"log_odds_ratio": -0.36456355452537537,
"logits/chosen": 2.081760883331299,
"logits/rejected": 2.139793634414673,
"logps/chosen": -0.30359095335006714,
"logps/rejected": -1.0979803800582886,
"loss": 1.0152,
"nll_loss": 0.9787145853042603,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.030359093099832535,
"rewards/margins": 0.0794389396905899,
"rewards/rejected": -0.10979804396629333,
"step": 1755
},
{
"epoch": 2.4380776340110906,
"grad_norm": 2.298625946044922,
"learning_rate": 1.1654545454545455e-07,
"log_odds_chosen": 1.6793102025985718,
"log_odds_ratio": -0.335517019033432,
"logits/chosen": 2.0144755840301514,
"logits/rejected": 2.075939416885376,
"logps/chosen": -0.2551276385784149,
"logps/rejected": -0.9466179013252258,
"loss": 1.0231,
"nll_loss": 0.9895772337913513,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02551276609301567,
"rewards/margins": 0.06914903223514557,
"rewards/rejected": -0.09466180205345154,
"step": 1760
},
{
"epoch": 2.4450092421441774,
"grad_norm": 1.3807379007339478,
"learning_rate": 1.1563636363636362e-07,
"log_odds_chosen": 1.695957064628601,
"log_odds_ratio": -0.33206743001937866,
"logits/chosen": 2.0311429500579834,
"logits/rejected": 2.0982778072357178,
"logps/chosen": -0.25683820247650146,
"logps/rejected": -0.978820264339447,
"loss": 1.019,
"nll_loss": 0.9857791066169739,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.025683818385004997,
"rewards/margins": 0.07219821959733963,
"rewards/rejected": -0.09788203239440918,
"step": 1765
},
{
"epoch": 2.451940850277264,
"grad_norm": 2.096445083618164,
"learning_rate": 1.1472727272727272e-07,
"log_odds_chosen": 1.7026692628860474,
"log_odds_ratio": -0.3184446692466736,
"logits/chosen": 2.0503830909729004,
"logits/rejected": 2.086923122406006,
"logps/chosen": -0.27430155873298645,
"logps/rejected": -0.9957641363143921,
"loss": 1.0057,
"nll_loss": 0.9738113880157471,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.027430152520537376,
"rewards/margins": 0.07214626669883728,
"rewards/rejected": -0.0995764285326004,
"step": 1770
},
{
"epoch": 2.458872458410351,
"grad_norm": 2.9843761920928955,
"learning_rate": 1.1381818181818182e-07,
"log_odds_chosen": 1.8962608575820923,
"log_odds_ratio": -0.2932147681713104,
"logits/chosen": 2.1202423572540283,
"logits/rejected": 2.1624252796173096,
"logps/chosen": -0.2599133849143982,
"logps/rejected": -1.1326204538345337,
"loss": 1.0537,
"nll_loss": 1.0243782997131348,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.025991341099143028,
"rewards/margins": 0.08727072179317474,
"rewards/rejected": -0.11326204240322113,
"step": 1775
},
{
"epoch": 2.465804066543438,
"grad_norm": 2.571378469467163,
"learning_rate": 1.1290909090909091e-07,
"log_odds_chosen": 2.0019185543060303,
"log_odds_ratio": -0.24575158953666687,
"logits/chosen": 2.049402952194214,
"logits/rejected": 2.103079080581665,
"logps/chosen": -0.3088202476501465,
"logps/rejected": -1.2870643138885498,
"loss": 1.0222,
"nll_loss": 0.997674822807312,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.03088202513754368,
"rewards/margins": 0.09782441705465317,
"rewards/rejected": -0.1287064403295517,
"step": 1780
},
{
"epoch": 2.472735674676525,
"grad_norm": 1.9969513416290283,
"learning_rate": 1.1200000000000001e-07,
"log_odds_chosen": 1.6785210371017456,
"log_odds_ratio": -0.3223731815814972,
"logits/chosen": 2.0518710613250732,
"logits/rejected": 2.1081185340881348,
"logps/chosen": -0.2911817133426666,
"logps/rejected": -1.0387407541275024,
"loss": 0.9996,
"nll_loss": 0.9674090147018433,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02911817468702793,
"rewards/margins": 0.07475589960813522,
"rewards/rejected": -0.1038740798830986,
"step": 1785
},
{
"epoch": 2.479667282809612,
"grad_norm": 2.367443799972534,
"learning_rate": 1.1109090909090908e-07,
"log_odds_chosen": 1.6323319673538208,
"log_odds_ratio": -0.35728973150253296,
"logits/chosen": 1.9989386796951294,
"logits/rejected": 2.0702216625213623,
"logps/chosen": -0.2859072685241699,
"logps/rejected": -0.9873275756835938,
"loss": 0.9933,
"nll_loss": 0.9575673341751099,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.02859073132276535,
"rewards/margins": 0.07014203071594238,
"rewards/rejected": -0.09873275458812714,
"step": 1790
},
{
"epoch": 2.4865988909426986,
"grad_norm": 5.1550374031066895,
"learning_rate": 1.1018181818181818e-07,
"log_odds_chosen": 1.9188029766082764,
"log_odds_ratio": -0.31065088510513306,
"logits/chosen": 1.9901913404464722,
"logits/rejected": 2.050278902053833,
"logps/chosen": -0.3183686137199402,
"logps/rejected": -1.161499261856079,
"loss": 1.0106,
"nll_loss": 0.9795438051223755,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.0318368636071682,
"rewards/margins": 0.0843130573630333,
"rewards/rejected": -0.1161499172449112,
"step": 1795
},
{
"epoch": 2.4935304990757854,
"grad_norm": 2.2873830795288086,
"learning_rate": 1.0927272727272728e-07,
"log_odds_chosen": 1.4898102283477783,
"log_odds_ratio": -0.36497774720191956,
"logits/chosen": 2.037238121032715,
"logits/rejected": 2.0807337760925293,
"logps/chosen": -0.29659178853034973,
"logps/rejected": -1.0002473592758179,
"loss": 1.0471,
"nll_loss": 1.0105878114700317,
"rewards/accuracies": 0.7916666865348816,
"rewards/chosen": -0.029659178107976913,
"rewards/margins": 0.0703655481338501,
"rewards/rejected": -0.10002472996711731,
"step": 1800
},
{
"epoch": 2.5004621072088726,
"grad_norm": 2.6431796550750732,
"learning_rate": 1.0836363636363637e-07,
"log_odds_chosen": 1.6469905376434326,
"log_odds_ratio": -0.3312085270881653,
"logits/chosen": 1.9593592882156372,
"logits/rejected": 2.0358383655548096,
"logps/chosen": -0.26151055097579956,
"logps/rejected": -0.9036704301834106,
"loss": 1.0059,
"nll_loss": 0.9727994203567505,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.026151059195399284,
"rewards/margins": 0.06421598047018051,
"rewards/rejected": -0.09036703407764435,
"step": 1805
},
{
"epoch": 2.5073937153419594,
"grad_norm": 2.9634170532226562,
"learning_rate": 1.0745454545454544e-07,
"log_odds_chosen": 1.5996876955032349,
"log_odds_ratio": -0.33672866225242615,
"logits/chosen": 1.9904649257659912,
"logits/rejected": 2.0536324977874756,
"logps/chosen": -0.32099291682243347,
"logps/rejected": -1.0023874044418335,
"loss": 1.0524,
"nll_loss": 1.018733263015747,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03209929168224335,
"rewards/margins": 0.0681394636631012,
"rewards/rejected": -0.10023875534534454,
"step": 1810
},
{
"epoch": 2.5143253234750462,
"grad_norm": 1.860707402229309,
"learning_rate": 1.0654545454545454e-07,
"log_odds_chosen": 1.8385961055755615,
"log_odds_ratio": -0.34007883071899414,
"logits/chosen": 1.9847419261932373,
"logits/rejected": 2.0414199829101562,
"logps/chosen": -0.3060314953327179,
"logps/rejected": -1.150540828704834,
"loss": 0.9906,
"nll_loss": 0.9565935730934143,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.03060315176844597,
"rewards/margins": 0.08445093035697937,
"rewards/rejected": -0.11505408585071564,
"step": 1815
},
{
"epoch": 2.521256931608133,
"grad_norm": 2.953806161880493,
"learning_rate": 1.0563636363636364e-07,
"log_odds_chosen": 1.6283913850784302,
"log_odds_ratio": -0.3325265049934387,
"logits/chosen": 2.0417227745056152,
"logits/rejected": 2.104788064956665,
"logps/chosen": -0.31526321172714233,
"logps/rejected": -1.0568766593933105,
"loss": 0.9896,
"nll_loss": 0.9563248753547668,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.031526319682598114,
"rewards/margins": 0.0741613581776619,
"rewards/rejected": -0.10568765550851822,
"step": 1820
},
{
"epoch": 2.52818853974122,
"grad_norm": 2.1079516410827637,
"learning_rate": 1.0472727272727273e-07,
"log_odds_chosen": 1.5739433765411377,
"log_odds_ratio": -0.33921295404434204,
"logits/chosen": 2.122877359390259,
"logits/rejected": 2.1718015670776367,
"logps/chosen": -0.2680935859680176,
"logps/rejected": -0.9542296528816223,
"loss": 1.0209,
"nll_loss": 0.986934244632721,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.026809358969330788,
"rewards/margins": 0.06861360371112823,
"rewards/rejected": -0.09542296081781387,
"step": 1825
},
{
"epoch": 2.5351201478743066,
"grad_norm": 2.3235225677490234,
"learning_rate": 1.038181818181818e-07,
"log_odds_chosen": 1.7671153545379639,
"log_odds_ratio": -0.3048493266105652,
"logits/chosen": 2.0175423622131348,
"logits/rejected": 2.1084630489349365,
"logps/chosen": -0.28295522928237915,
"logps/rejected": -1.078856348991394,
"loss": 1.0093,
"nll_loss": 0.9788612723350525,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.028295524418354034,
"rewards/margins": 0.07959011197090149,
"rewards/rejected": -0.10788564383983612,
"step": 1830
},
{
"epoch": 2.542051756007394,
"grad_norm": 2.376753807067871,
"learning_rate": 1.029090909090909e-07,
"log_odds_chosen": 1.5308775901794434,
"log_odds_ratio": -0.35065487027168274,
"logits/chosen": 2.0661802291870117,
"logits/rejected": 2.1190385818481445,
"logps/chosen": -0.30774179100990295,
"logps/rejected": -0.9961751103401184,
"loss": 1.0005,
"nll_loss": 0.9654229879379272,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.030774177983403206,
"rewards/margins": 0.06884334981441498,
"rewards/rejected": -0.09961751848459244,
"step": 1835
},
{
"epoch": 2.5489833641404807,
"grad_norm": 1.1884440183639526,
"learning_rate": 1.02e-07,
"log_odds_chosen": 1.692187786102295,
"log_odds_ratio": -0.35703638195991516,
"logits/chosen": 2.050490379333496,
"logits/rejected": 2.091343879699707,
"logps/chosen": -0.26052388548851013,
"logps/rejected": -1.032615065574646,
"loss": 0.9831,
"nll_loss": 0.9473720192909241,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.026052383705973625,
"rewards/margins": 0.07720911502838135,
"rewards/rejected": -0.10326149314641953,
"step": 1840
},
{
"epoch": 2.5559149722735675,
"grad_norm": 2.3813302516937256,
"learning_rate": 1.010909090909091e-07,
"log_odds_chosen": 1.5822950601577759,
"log_odds_ratio": -0.3297514021396637,
"logits/chosen": 2.0444798469543457,
"logits/rejected": 2.1010236740112305,
"logps/chosen": -0.2959387004375458,
"logps/rejected": -0.987533450126648,
"loss": 1.0236,
"nll_loss": 0.9906317591667175,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02959386818110943,
"rewards/margins": 0.06915947794914246,
"rewards/rejected": -0.09875334799289703,
"step": 1845
},
{
"epoch": 2.5628465804066543,
"grad_norm": 2.9394869804382324,
"learning_rate": 1.0018181818181817e-07,
"log_odds_chosen": 1.702123761177063,
"log_odds_ratio": -0.3088690936565399,
"logits/chosen": 2.113861322402954,
"logits/rejected": 2.1717820167541504,
"logps/chosen": -0.3088254928588867,
"logps/rejected": -1.0686160326004028,
"loss": 1.0239,
"nll_loss": 0.9929828643798828,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.03088255040347576,
"rewards/margins": 0.07597906142473221,
"rewards/rejected": -0.10686160624027252,
"step": 1850
},
{
"epoch": 2.5697781885397415,
"grad_norm": 3.485311508178711,
"learning_rate": 9.927272727272727e-08,
"log_odds_chosen": 1.7402740716934204,
"log_odds_ratio": -0.38107120990753174,
"logits/chosen": 1.9880726337432861,
"logits/rejected": 2.057579755783081,
"logps/chosen": -0.2805604338645935,
"logps/rejected": -1.0576139688491821,
"loss": 0.9883,
"nll_loss": 0.9501924514770508,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.02805604785680771,
"rewards/margins": 0.07770536839962006,
"rewards/rejected": -0.10576140880584717,
"step": 1855
},
{
"epoch": 2.5767097966728283,
"grad_norm": 3.232532262802124,
"learning_rate": 9.836363636363636e-08,
"log_odds_chosen": 1.8129467964172363,
"log_odds_ratio": -0.2785561978816986,
"logits/chosen": 1.9095889329910278,
"logits/rejected": 1.9935516119003296,
"logps/chosen": -0.2174181193113327,
"logps/rejected": -0.9398717880249023,
"loss": 0.9843,
"nll_loss": 0.9564692974090576,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.02174181304872036,
"rewards/margins": 0.07224537432193756,
"rewards/rejected": -0.09398718178272247,
"step": 1860
},
{
"epoch": 2.583641404805915,
"grad_norm": 2.0778720378875732,
"learning_rate": 9.745454545454545e-08,
"log_odds_chosen": 1.8064855337142944,
"log_odds_ratio": -0.3176063001155853,
"logits/chosen": 2.1032285690307617,
"logits/rejected": 2.154674768447876,
"logps/chosen": -0.3073400855064392,
"logps/rejected": -1.1221901178359985,
"loss": 0.9987,
"nll_loss": 0.9669729471206665,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.03073401190340519,
"rewards/margins": 0.08148500323295593,
"rewards/rejected": -0.11221900582313538,
"step": 1865
},
{
"epoch": 2.590573012939002,
"grad_norm": 1.6975332498550415,
"learning_rate": 9.654545454545454e-08,
"log_odds_chosen": 1.6128000020980835,
"log_odds_ratio": -0.33097171783447266,
"logits/chosen": 2.0006532669067383,
"logits/rejected": 2.056741237640381,
"logps/chosen": -0.2736147940158844,
"logps/rejected": -0.9443971514701843,
"loss": 0.98,
"nll_loss": 0.9469044804573059,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.02736147679388523,
"rewards/margins": 0.06707824021577835,
"rewards/rejected": -0.09443972259759903,
"step": 1870
},
{
"epoch": 2.5975046210720887,
"grad_norm": 2.9151763916015625,
"learning_rate": 9.563636363636364e-08,
"log_odds_chosen": 2.098798990249634,
"log_odds_ratio": -0.263777494430542,
"logits/chosen": 1.9829503297805786,
"logits/rejected": 2.0440664291381836,
"logps/chosen": -0.2544497847557068,
"logps/rejected": -1.2993313074111938,
"loss": 1.015,
"nll_loss": 0.9885779023170471,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.025444982573390007,
"rewards/margins": 0.10448816418647766,
"rewards/rejected": -0.12993313372135162,
"step": 1875
},
{
"epoch": 2.6044362292051755,
"grad_norm": 1.4423106908798218,
"learning_rate": 9.472727272727272e-08,
"log_odds_chosen": 1.7735576629638672,
"log_odds_ratio": -0.3009028136730194,
"logits/chosen": 1.8770281076431274,
"logits/rejected": 1.9749999046325684,
"logps/chosen": -0.2571166455745697,
"logps/rejected": -0.9384245276451111,
"loss": 0.9065,
"nll_loss": 0.8764019012451172,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.025711664929986,
"rewards/margins": 0.06813079863786697,
"rewards/rejected": -0.09384246915578842,
"step": 1880
},
{
"epoch": 2.6113678373382623,
"grad_norm": 3.350883960723877,
"learning_rate": 9.381818181818182e-08,
"log_odds_chosen": 1.902999997138977,
"log_odds_ratio": -0.27424463629722595,
"logits/chosen": 2.0839245319366455,
"logits/rejected": 2.14450740814209,
"logps/chosen": -0.26829907298088074,
"logps/rejected": -1.1070636510849,
"loss": 1.0208,
"nll_loss": 0.993401825428009,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.026829909533262253,
"rewards/margins": 0.08387646079063416,
"rewards/rejected": -0.11070636659860611,
"step": 1885
},
{
"epoch": 2.6182994454713495,
"grad_norm": 3.9501917362213135,
"learning_rate": 9.29090909090909e-08,
"log_odds_chosen": 1.724169135093689,
"log_odds_ratio": -0.3101595342159271,
"logits/chosen": 2.021693229675293,
"logits/rejected": 2.096151828765869,
"logps/chosen": -0.2878502309322357,
"logps/rejected": -1.0322964191436768,
"loss": 1.0293,
"nll_loss": 0.9983063340187073,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.028785018250346184,
"rewards/margins": 0.07444461435079575,
"rewards/rejected": -0.10322963446378708,
"step": 1890
},
{
"epoch": 2.6252310536044363,
"grad_norm": 2.835116386413574,
"learning_rate": 9.2e-08,
"log_odds_chosen": 2.0075416564941406,
"log_odds_ratio": -0.29768672585487366,
"logits/chosen": 2.0260884761810303,
"logits/rejected": 2.08447527885437,
"logps/chosen": -0.2922082841396332,
"logps/rejected": -1.2498011589050293,
"loss": 1.0113,
"nll_loss": 0.981542706489563,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.029220828786492348,
"rewards/margins": 0.09575929492712021,
"rewards/rejected": -0.12498010694980621,
"step": 1895
},
{
"epoch": 2.632162661737523,
"grad_norm": 1.9952729940414429,
"learning_rate": 9.109090909090909e-08,
"log_odds_chosen": 2.203530788421631,
"log_odds_ratio": -0.2388562709093094,
"logits/chosen": 2.05356764793396,
"logits/rejected": 2.108933925628662,
"logps/chosen": -0.2701965272426605,
"logps/rejected": -1.3429614305496216,
"loss": 1.0337,
"nll_loss": 1.00979745388031,
"rewards/accuracies": 0.9333333373069763,
"rewards/chosen": -0.027019653469324112,
"rewards/margins": 0.10727646201848984,
"rewards/rejected": -0.13429613411426544,
"step": 1900
},
{
"epoch": 2.63909426987061,
"grad_norm": 2.688429594039917,
"learning_rate": 9.018181818181818e-08,
"log_odds_chosen": 1.6793392896652222,
"log_odds_ratio": -0.3166213929653168,
"logits/chosen": 1.9479715824127197,
"logits/rejected": 2.0088744163513184,
"logps/chosen": -0.27929723262786865,
"logps/rejected": -0.9713757634162903,
"loss": 1.0348,
"nll_loss": 1.0030966997146606,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.027929725125432014,
"rewards/margins": 0.06920785456895828,
"rewards/rejected": -0.09713757783174515,
"step": 1905
},
{
"epoch": 2.6460258780036967,
"grad_norm": 1.4949442148208618,
"learning_rate": 8.927272727272727e-08,
"log_odds_chosen": 1.9754811525344849,
"log_odds_ratio": -0.30257752537727356,
"logits/chosen": 1.9509350061416626,
"logits/rejected": 2.0503389835357666,
"logps/chosen": -0.27535703778266907,
"logps/rejected": -1.118589162826538,
"loss": 0.9724,
"nll_loss": 0.9421722292900085,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.027535704895853996,
"rewards/margins": 0.0843232050538063,
"rewards/rejected": -0.11185891181230545,
"step": 1910
},
{
"epoch": 2.652957486136784,
"grad_norm": 2.16941237449646,
"learning_rate": 8.836363636363637e-08,
"log_odds_chosen": 2.000943422317505,
"log_odds_ratio": -0.2847401797771454,
"logits/chosen": 2.106870412826538,
"logits/rejected": 2.168245553970337,
"logps/chosen": -0.26422372460365295,
"logps/rejected": -1.217974305152893,
"loss": 1.061,
"nll_loss": 1.0324803590774536,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.026422372087836266,
"rewards/margins": 0.09537507593631744,
"rewards/rejected": -0.12179744988679886,
"step": 1915
},
{
"epoch": 2.6598890942698707,
"grad_norm": 1.8085908889770508,
"learning_rate": 8.745454545454545e-08,
"log_odds_chosen": 1.5008604526519775,
"log_odds_ratio": -0.38829174637794495,
"logits/chosen": 1.9134535789489746,
"logits/rejected": 1.9677314758300781,
"logps/chosen": -0.2830255627632141,
"logps/rejected": -0.9362450242042542,
"loss": 1.0461,
"nll_loss": 1.0073057413101196,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.02830255590379238,
"rewards/margins": 0.06532195210456848,
"rewards/rejected": -0.09362450987100601,
"step": 1920
},
{
"epoch": 2.6668207024029575,
"grad_norm": 2.762589931488037,
"learning_rate": 8.654545454545455e-08,
"log_odds_chosen": 1.8519963026046753,
"log_odds_ratio": -0.3259159326553345,
"logits/chosen": 1.9644198417663574,
"logits/rejected": 2.05434250831604,
"logps/chosen": -0.2632545232772827,
"logps/rejected": -1.0625907182693481,
"loss": 1.0235,
"nll_loss": 0.9909093976020813,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02632545307278633,
"rewards/margins": 0.07993361353874207,
"rewards/rejected": -0.1062590703368187,
"step": 1925
},
{
"epoch": 2.6737523105360443,
"grad_norm": 2.3893611431121826,
"learning_rate": 8.563636363636363e-08,
"log_odds_chosen": 1.6310181617736816,
"log_odds_ratio": -0.3340619206428528,
"logits/chosen": 2.005711555480957,
"logits/rejected": 2.067094087600708,
"logps/chosen": -0.2860340476036072,
"logps/rejected": -0.9216349124908447,
"loss": 0.9937,
"nll_loss": 0.9602577686309814,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02860340289771557,
"rewards/margins": 0.06356008350849152,
"rewards/rejected": -0.09216348081827164,
"step": 1930
},
{
"epoch": 2.680683918669131,
"grad_norm": 1.9156088829040527,
"learning_rate": 8.472727272727273e-08,
"log_odds_chosen": 1.7571347951889038,
"log_odds_ratio": -0.3111540377140045,
"logits/chosen": 2.0473296642303467,
"logits/rejected": 2.1130969524383545,
"logps/chosen": -0.28812503814697266,
"logps/rejected": -1.1086490154266357,
"loss": 0.9945,
"nll_loss": 0.9633785486221313,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.028812507167458534,
"rewards/margins": 0.08205239474773407,
"rewards/rejected": -0.11086489260196686,
"step": 1935
},
{
"epoch": 2.687615526802218,
"grad_norm": 2.3184399604797363,
"learning_rate": 8.381818181818181e-08,
"log_odds_chosen": 1.6769403219223022,
"log_odds_ratio": -0.3335730731487274,
"logits/chosen": 1.9665719270706177,
"logits/rejected": 2.030404567718506,
"logps/chosen": -0.24711455404758453,
"logps/rejected": -0.9258390069007874,
"loss": 0.9979,
"nll_loss": 0.9645217061042786,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.024711458012461662,
"rewards/margins": 0.06787244230508804,
"rewards/rejected": -0.09258389472961426,
"step": 1940
},
{
"epoch": 2.6945471349353047,
"grad_norm": 1.7348381280899048,
"learning_rate": 8.290909090909091e-08,
"log_odds_chosen": 2.012420415878296,
"log_odds_ratio": -0.2879267930984497,
"logits/chosen": 2.080967426300049,
"logits/rejected": 2.181638479232788,
"logps/chosen": -0.2593296766281128,
"logps/rejected": -1.197130560874939,
"loss": 1.0093,
"nll_loss": 0.9804985523223877,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.02593296393752098,
"rewards/margins": 0.09378007054328918,
"rewards/rejected": -0.11971304565668106,
"step": 1945
},
{
"epoch": 2.701478743068392,
"grad_norm": 2.2291972637176514,
"learning_rate": 8.199999999999999e-08,
"log_odds_chosen": 1.8021572828292847,
"log_odds_ratio": -0.2828613221645355,
"logits/chosen": 1.9889678955078125,
"logits/rejected": 2.0603299140930176,
"logps/chosen": -0.27885323762893677,
"logps/rejected": -1.020405888557434,
"loss": 1.0091,
"nll_loss": 0.9808385372161865,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02788531966507435,
"rewards/margins": 0.0741552785038948,
"rewards/rejected": -0.10204058140516281,
"step": 1950
},
{
"epoch": 2.7084103512014788,
"grad_norm": 3.511234998703003,
"learning_rate": 8.109090909090909e-08,
"log_odds_chosen": 1.6991480588912964,
"log_odds_ratio": -0.3476136028766632,
"logits/chosen": 2.0690817832946777,
"logits/rejected": 2.131218671798706,
"logps/chosen": -0.28822681307792664,
"logps/rejected": -0.9959009885787964,
"loss": 1.0083,
"nll_loss": 0.9735735058784485,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.028822684660553932,
"rewards/margins": 0.07076740264892578,
"rewards/rejected": -0.09959009289741516,
"step": 1955
},
{
"epoch": 2.7153419593345656,
"grad_norm": 1.731318712234497,
"learning_rate": 8.018181818181817e-08,
"log_odds_chosen": 1.9510962963104248,
"log_odds_ratio": -0.2717619836330414,
"logits/chosen": 2.0861499309539795,
"logits/rejected": 2.1444475650787354,
"logps/chosen": -0.26343804597854614,
"logps/rejected": -1.1370905637741089,
"loss": 1.0018,
"nll_loss": 0.9746354222297668,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.026343801990151405,
"rewards/margins": 0.08736524730920792,
"rewards/rejected": -0.11370905488729477,
"step": 1960
},
{
"epoch": 2.7222735674676524,
"grad_norm": 2.5520708560943604,
"learning_rate": 7.927272727272727e-08,
"log_odds_chosen": 1.883239507675171,
"log_odds_ratio": -0.34364765882492065,
"logits/chosen": 2.0899996757507324,
"logits/rejected": 2.1520025730133057,
"logps/chosen": -0.3178773820400238,
"logps/rejected": -1.1890572309494019,
"loss": 1.0465,
"nll_loss": 1.012162208557129,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.03178774565458298,
"rewards/margins": 0.087117999792099,
"rewards/rejected": -0.11890573799610138,
"step": 1965
},
{
"epoch": 2.7292051756007396,
"grad_norm": 3.0097408294677734,
"learning_rate": 7.836363636363637e-08,
"log_odds_chosen": 1.7459481954574585,
"log_odds_ratio": -0.3089205026626587,
"logits/chosen": 1.948202133178711,
"logits/rejected": 2.030421733856201,
"logps/chosen": -0.2865378260612488,
"logps/rejected": -0.9788001775741577,
"loss": 0.9699,
"nll_loss": 0.9390251040458679,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.028653783723711967,
"rewards/margins": 0.06922624260187149,
"rewards/rejected": -0.09788002073764801,
"step": 1970
},
{
"epoch": 2.7361367837338264,
"grad_norm": 2.334012746810913,
"learning_rate": 7.745454545454545e-08,
"log_odds_chosen": 1.6162441968917847,
"log_odds_ratio": -0.3280728757381439,
"logits/chosen": 2.022932767868042,
"logits/rejected": 2.0705296993255615,
"logps/chosen": -0.2768207788467407,
"logps/rejected": -0.927074134349823,
"loss": 0.9948,
"nll_loss": 0.9620178937911987,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.027682077139616013,
"rewards/margins": 0.06502533704042435,
"rewards/rejected": -0.09270740300416946,
"step": 1975
},
{
"epoch": 2.743068391866913,
"grad_norm": 3.4409821033477783,
"learning_rate": 7.654545454545455e-08,
"log_odds_chosen": 1.7293587923049927,
"log_odds_ratio": -0.32593438029289246,
"logits/chosen": 2.0177626609802246,
"logits/rejected": 2.0882925987243652,
"logps/chosen": -0.3067542612552643,
"logps/rejected": -1.018390417098999,
"loss": 1.004,
"nll_loss": 0.9714083075523376,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.030675429850816727,
"rewards/margins": 0.07116362452507019,
"rewards/rejected": -0.10183904320001602,
"step": 1980
},
{
"epoch": 2.75,
"grad_norm": 3.144928455352783,
"learning_rate": 7.563636363636363e-08,
"log_odds_chosen": 2.015221357345581,
"log_odds_ratio": -0.24131697416305542,
"logits/chosen": 2.0317463874816895,
"logits/rejected": 2.112942934036255,
"logps/chosen": -0.2766430974006653,
"logps/rejected": -1.1748555898666382,
"loss": 0.9986,
"nll_loss": 0.974430501461029,
"rewards/accuracies": 0.9416666626930237,
"rewards/chosen": -0.027664311230182648,
"rewards/margins": 0.0898212417960167,
"rewards/rejected": -0.11748553812503815,
"step": 1985
},
{
"epoch": 2.756931608133087,
"grad_norm": 2.3371379375457764,
"learning_rate": 7.472727272727273e-08,
"log_odds_chosen": 1.7855689525604248,
"log_odds_ratio": -0.28874701261520386,
"logits/chosen": 2.058995485305786,
"logits/rejected": 2.1220133304595947,
"logps/chosen": -0.23786862194538116,
"logps/rejected": -0.8881834745407104,
"loss": 0.9978,
"nll_loss": 0.9689105153083801,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.023786865174770355,
"rewards/margins": 0.06503147631883621,
"rewards/rejected": -0.08881834149360657,
"step": 1990
},
{
"epoch": 2.7638632162661736,
"grad_norm": 2.0242509841918945,
"learning_rate": 7.381818181818182e-08,
"log_odds_chosen": 1.881475806236267,
"log_odds_ratio": -0.2716512382030487,
"logits/chosen": 1.9721044301986694,
"logits/rejected": 2.0286245346069336,
"logps/chosen": -0.2365345060825348,
"logps/rejected": -1.0640085935592651,
"loss": 0.9904,
"nll_loss": 0.9632561802864075,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.02365345135331154,
"rewards/margins": 0.08274741470813751,
"rewards/rejected": -0.10640083998441696,
"step": 1995
},
{
"epoch": 2.7707948243992604,
"grad_norm": 3.5550692081451416,
"learning_rate": 7.290909090909091e-08,
"log_odds_chosen": 1.6393131017684937,
"log_odds_ratio": -0.3566218316555023,
"logits/chosen": 2.027010440826416,
"logits/rejected": 2.1158926486968994,
"logps/chosen": -0.2945045530796051,
"logps/rejected": -0.9923899173736572,
"loss": 0.9887,
"nll_loss": 0.9530836343765259,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.02945045940577984,
"rewards/margins": 0.06978853791952133,
"rewards/rejected": -0.09923899918794632,
"step": 2000
},
{
"epoch": 2.7777264325323476,
"grad_norm": 2.983398199081421,
"learning_rate": 7.2e-08,
"log_odds_chosen": 1.883139729499817,
"log_odds_ratio": -0.2949954569339752,
"logits/chosen": 2.0600686073303223,
"logits/rejected": 2.1349780559539795,
"logps/chosen": -0.27356693148612976,
"logps/rejected": -1.1228362321853638,
"loss": 1.0009,
"nll_loss": 0.971444308757782,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.027356691658496857,
"rewards/margins": 0.08492692559957504,
"rewards/rejected": -0.1122836172580719,
"step": 2005
},
{
"epoch": 2.7846580406654344,
"grad_norm": 2.232745885848999,
"learning_rate": 7.10909090909091e-08,
"log_odds_chosen": 1.8647377490997314,
"log_odds_ratio": -0.31064775586128235,
"logits/chosen": 2.072453260421753,
"logits/rejected": 2.1425271034240723,
"logps/chosen": -0.3136703670024872,
"logps/rejected": -1.2263870239257812,
"loss": 1.0013,
"nll_loss": 0.9702617526054382,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.03136703744530678,
"rewards/margins": 0.09127166122198105,
"rewards/rejected": -0.12263870239257812,
"step": 2010
},
{
"epoch": 2.7915896487985212,
"grad_norm": 2.4576735496520996,
"learning_rate": 7.018181818181818e-08,
"log_odds_chosen": 1.848351001739502,
"log_odds_ratio": -0.300770103931427,
"logits/chosen": 1.9794286489486694,
"logits/rejected": 2.051739454269409,
"logps/chosen": -0.2656658887863159,
"logps/rejected": -1.1345840692520142,
"loss": 1.0129,
"nll_loss": 0.9827767610549927,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.026566587388515472,
"rewards/margins": 0.08689180761575699,
"rewards/rejected": -0.11345840990543365,
"step": 2015
},
{
"epoch": 2.798521256931608,
"grad_norm": 4.359684467315674,
"learning_rate": 6.927272727272727e-08,
"log_odds_chosen": 1.7028800249099731,
"log_odds_ratio": -0.3596075177192688,
"logits/chosen": 2.0374951362609863,
"logits/rejected": 2.0811033248901367,
"logps/chosen": -0.26721158623695374,
"logps/rejected": -1.0201528072357178,
"loss": 1.0472,
"nll_loss": 1.011243462562561,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.026721160858869553,
"rewards/margins": 0.0752941220998764,
"rewards/rejected": -0.10201527923345566,
"step": 2020
},
{
"epoch": 2.8054528650646953,
"grad_norm": 1.5897514820098877,
"learning_rate": 6.836363636363636e-08,
"log_odds_chosen": 1.7618235349655151,
"log_odds_ratio": -0.2725900709629059,
"logits/chosen": 1.951567530632019,
"logits/rejected": 2.018179416656494,
"logps/chosen": -0.250274121761322,
"logps/rejected": -0.964055061340332,
"loss": 0.9872,
"nll_loss": 0.9599834680557251,
"rewards/accuracies": 0.9333333373069763,
"rewards/chosen": -0.025027411058545113,
"rewards/margins": 0.07137809693813324,
"rewards/rejected": -0.0964054986834526,
"step": 2025
},
{
"epoch": 2.812384473197782,
"grad_norm": 2.3347268104553223,
"learning_rate": 6.745454545454546e-08,
"log_odds_chosen": 1.8136812448501587,
"log_odds_ratio": -0.3173971176147461,
"logits/chosen": 2.0132126808166504,
"logits/rejected": 2.0880587100982666,
"logps/chosen": -0.3025325536727905,
"logps/rejected": -1.1345620155334473,
"loss": 1.0012,
"nll_loss": 0.9694395661354065,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.030253252014517784,
"rewards/margins": 0.08320295065641403,
"rewards/rejected": -0.11345621198415756,
"step": 2030
},
{
"epoch": 2.819316081330869,
"grad_norm": 1.9864964485168457,
"learning_rate": 6.654545454545454e-08,
"log_odds_chosen": 1.8496549129486084,
"log_odds_ratio": -0.29193249344825745,
"logits/chosen": 1.9852664470672607,
"logits/rejected": 2.046905994415283,
"logps/chosen": -0.23638120293617249,
"logps/rejected": -1.0749742984771729,
"loss": 0.9969,
"nll_loss": 0.9677135348320007,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.023638121783733368,
"rewards/margins": 0.08385932445526123,
"rewards/rejected": -0.107497438788414,
"step": 2035
},
{
"epoch": 2.8262476894639557,
"grad_norm": 2.766561269760132,
"learning_rate": 6.563636363636364e-08,
"log_odds_chosen": 2.029188632965088,
"log_odds_ratio": -0.26917028427124023,
"logits/chosen": 2.03354549407959,
"logits/rejected": 2.1179850101470947,
"logps/chosen": -0.24409012496471405,
"logps/rejected": -1.1629306077957153,
"loss": 1.0074,
"nll_loss": 0.9805120825767517,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.024409016594290733,
"rewards/margins": 0.09188403934240341,
"rewards/rejected": -0.1162930577993393,
"step": 2040
},
{
"epoch": 2.8331792975970425,
"grad_norm": 3.011481523513794,
"learning_rate": 6.472727272727272e-08,
"log_odds_chosen": 1.706537127494812,
"log_odds_ratio": -0.4006377160549164,
"logits/chosen": 2.1181445121765137,
"logits/rejected": 2.156266689300537,
"logps/chosen": -0.3777162432670593,
"logps/rejected": -1.2636988162994385,
"loss": 1.0484,
"nll_loss": 1.0083175897598267,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.03777162730693817,
"rewards/margins": 0.08859825879335403,
"rewards/rejected": -0.1263698935508728,
"step": 2045
},
{
"epoch": 2.8401109057301293,
"grad_norm": 3.116852283477783,
"learning_rate": 6.381818181818182e-08,
"log_odds_chosen": 1.7585667371749878,
"log_odds_ratio": -0.3240087330341339,
"logits/chosen": 1.9366189241409302,
"logits/rejected": 2.018488883972168,
"logps/chosen": -0.2796057164669037,
"logps/rejected": -0.9981705546379089,
"loss": 0.9852,
"nll_loss": 0.9527918100357056,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.027960574254393578,
"rewards/margins": 0.07185646891593933,
"rewards/rejected": -0.09981703758239746,
"step": 2050
},
{
"epoch": 2.847042513863216,
"grad_norm": 1.7142726182937622,
"learning_rate": 6.290909090909092e-08,
"log_odds_chosen": 1.9744784832000732,
"log_odds_ratio": -0.24878713488578796,
"logits/chosen": 1.9690757989883423,
"logits/rejected": 2.040696859359741,
"logps/chosen": -0.3136541545391083,
"logps/rejected": -1.1877192258834839,
"loss": 0.9877,
"nll_loss": 0.9628265500068665,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.031365420669317245,
"rewards/margins": 0.08740650117397308,
"rewards/rejected": -0.11877192556858063,
"step": 2055
},
{
"epoch": 2.8539741219963033,
"grad_norm": 1.3764642477035522,
"learning_rate": 6.2e-08,
"log_odds_chosen": 1.8966907262802124,
"log_odds_ratio": -0.2863107919692993,
"logits/chosen": 2.0856635570526123,
"logits/rejected": 2.136650800704956,
"logps/chosen": -0.3037610948085785,
"logps/rejected": -1.1746824979782104,
"loss": 1.0304,
"nll_loss": 1.0017729997634888,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.03037611022591591,
"rewards/margins": 0.08709214627742767,
"rewards/rejected": -0.11746825277805328,
"step": 2060
},
{
"epoch": 2.86090573012939,
"grad_norm": 2.363109827041626,
"learning_rate": 6.10909090909091e-08,
"log_odds_chosen": 1.8390313386917114,
"log_odds_ratio": -0.30428022146224976,
"logits/chosen": 1.9853383302688599,
"logits/rejected": 2.032965898513794,
"logps/chosen": -0.250918447971344,
"logps/rejected": -1.0042502880096436,
"loss": 0.9942,
"nll_loss": 0.9637566804885864,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02509184740483761,
"rewards/margins": 0.07533318549394608,
"rewards/rejected": -0.10042501986026764,
"step": 2065
},
{
"epoch": 2.867837338262477,
"grad_norm": 2.9388301372528076,
"learning_rate": 6.018181818181818e-08,
"log_odds_chosen": 1.841733694076538,
"log_odds_ratio": -0.2979178726673126,
"logits/chosen": 1.9924938678741455,
"logits/rejected": 2.053075075149536,
"logps/chosen": -0.2633225917816162,
"logps/rejected": -1.0852948427200317,
"loss": 0.9875,
"nll_loss": 0.9577153921127319,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.02633226290345192,
"rewards/margins": 0.08219723403453827,
"rewards/rejected": -0.10852950066328049,
"step": 2070
},
{
"epoch": 2.8747689463955637,
"grad_norm": 2.952969789505005,
"learning_rate": 5.927272727272727e-08,
"log_odds_chosen": 1.6144490242004395,
"log_odds_ratio": -0.35894396901130676,
"logits/chosen": 2.0224180221557617,
"logits/rejected": 2.0612215995788574,
"logps/chosen": -0.26091185212135315,
"logps/rejected": -1.0284487009048462,
"loss": 1.0073,
"nll_loss": 0.9714316725730896,
"rewards/accuracies": 0.8583333492279053,
"rewards/chosen": -0.026091186329722404,
"rewards/margins": 0.07675368338823318,
"rewards/rejected": -0.10284487158060074,
"step": 2075
},
{
"epoch": 2.8817005545286505,
"grad_norm": 2.8261444568634033,
"learning_rate": 5.836363636363636e-08,
"log_odds_chosen": 1.8391097784042358,
"log_odds_ratio": -0.3189569413661957,
"logits/chosen": 1.9929203987121582,
"logits/rejected": 2.0488264560699463,
"logps/chosen": -0.2942853271961212,
"logps/rejected": -1.1487162113189697,
"loss": 1.02,
"nll_loss": 0.988071858882904,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.029428532347083092,
"rewards/margins": 0.08544307202100754,
"rewards/rejected": -0.11487161368131638,
"step": 2080
},
{
"epoch": 2.8886321626617377,
"grad_norm": 4.116847991943359,
"learning_rate": 5.745454545454545e-08,
"log_odds_chosen": 1.619594931602478,
"log_odds_ratio": -0.3377019762992859,
"logits/chosen": 2.075233221054077,
"logits/rejected": 2.119717836380005,
"logps/chosen": -0.2857421040534973,
"logps/rejected": -0.9990529417991638,
"loss": 1.0049,
"nll_loss": 0.9711350202560425,
"rewards/accuracies": 0.8333333134651184,
"rewards/chosen": -0.028574209660291672,
"rewards/margins": 0.07133107632398605,
"rewards/rejected": -0.09990529716014862,
"step": 2085
},
{
"epoch": 2.8955637707948245,
"grad_norm": 2.5412161350250244,
"learning_rate": 5.654545454545454e-08,
"log_odds_chosen": 1.8909341096878052,
"log_odds_ratio": -0.33492469787597656,
"logits/chosen": 2.021022081375122,
"logits/rejected": 2.0802001953125,
"logps/chosen": -0.28026703000068665,
"logps/rejected": -1.1536551713943481,
"loss": 1.0157,
"nll_loss": 0.982164740562439,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.028026705607771873,
"rewards/margins": 0.08733881264925003,
"rewards/rejected": -0.11536551266908646,
"step": 2090
},
{
"epoch": 2.9024953789279113,
"grad_norm": 2.7214314937591553,
"learning_rate": 5.563636363636364e-08,
"log_odds_chosen": 1.7071201801300049,
"log_odds_ratio": -0.31407538056373596,
"logits/chosen": 1.9751737117767334,
"logits/rejected": 2.022892475128174,
"logps/chosen": -0.29637211561203003,
"logps/rejected": -1.0419337749481201,
"loss": 1.0243,
"nll_loss": 0.9928818941116333,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.029637213796377182,
"rewards/margins": 0.07455617934465408,
"rewards/rejected": -0.10419339686632156,
"step": 2095
},
{
"epoch": 2.909426987060998,
"grad_norm": 2.559386730194092,
"learning_rate": 5.4727272727272724e-08,
"log_odds_chosen": 1.7541364431381226,
"log_odds_ratio": -0.3207100033760071,
"logits/chosen": 2.0054821968078613,
"logits/rejected": 2.0676021575927734,
"logps/chosen": -0.29729416966438293,
"logps/rejected": -1.0183097124099731,
"loss": 0.975,
"nll_loss": 0.9429475665092468,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.029729416593909264,
"rewards/margins": 0.07210154831409454,
"rewards/rejected": -0.10183095932006836,
"step": 2100
},
{
"epoch": 2.916358595194085,
"grad_norm": 3.168686866760254,
"learning_rate": 5.381818181818182e-08,
"log_odds_chosen": 1.8241220712661743,
"log_odds_ratio": -0.3224296271800995,
"logits/chosen": 2.084487199783325,
"logits/rejected": 2.151993989944458,
"logps/chosen": -0.298380970954895,
"logps/rejected": -1.1077054738998413,
"loss": 1.0205,
"nll_loss": 0.9882618188858032,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02983810193836689,
"rewards/margins": 0.08093245327472687,
"rewards/rejected": -0.11077055335044861,
"step": 2105
},
{
"epoch": 2.9232902033271717,
"grad_norm": 4.917242527008057,
"learning_rate": 5.2909090909090905e-08,
"log_odds_chosen": 1.6183534860610962,
"log_odds_ratio": -0.3984599709510803,
"logits/chosen": 2.01122784614563,
"logits/rejected": 2.090132236480713,
"logps/chosen": -0.31952646374702454,
"logps/rejected": -0.9697479009628296,
"loss": 1.012,
"nll_loss": 0.9721961617469788,
"rewards/accuracies": 0.8166666626930237,
"rewards/chosen": -0.03195264935493469,
"rewards/margins": 0.06502215564250946,
"rewards/rejected": -0.09697480499744415,
"step": 2110
},
{
"epoch": 2.9302218114602585,
"grad_norm": 2.700791120529175,
"learning_rate": 5.2e-08,
"log_odds_chosen": 1.6960736513137817,
"log_odds_ratio": -0.3477153778076172,
"logits/chosen": 2.015334129333496,
"logits/rejected": 2.0682668685913086,
"logps/chosen": -0.26093918085098267,
"logps/rejected": -1.0459142923355103,
"loss": 1.0222,
"nll_loss": 0.9874255657196045,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.026093924418091774,
"rewards/margins": 0.078497514128685,
"rewards/rejected": -0.10459142178297043,
"step": 2115
},
{
"epoch": 2.9371534195933457,
"grad_norm": 3.925088405609131,
"learning_rate": 5.1090909090909086e-08,
"log_odds_chosen": 1.4773210287094116,
"log_odds_ratio": -0.41333526372909546,
"logits/chosen": 2.0795083045959473,
"logits/rejected": 2.115938901901245,
"logps/chosen": -0.34735921025276184,
"logps/rejected": -0.9831670522689819,
"loss": 1.0538,
"nll_loss": 1.0124287605285645,
"rewards/accuracies": 0.8083333373069763,
"rewards/chosen": -0.034735921770334244,
"rewards/margins": 0.06358078867197037,
"rewards/rejected": -0.09831671416759491,
"step": 2120
},
{
"epoch": 2.9440850277264325,
"grad_norm": 3.097028970718384,
"learning_rate": 5.0181818181818184e-08,
"log_odds_chosen": 1.7917529344558716,
"log_odds_ratio": -0.3308008909225464,
"logits/chosen": 1.9779587984085083,
"logits/rejected": 2.0592360496520996,
"logps/chosen": -0.2952510714530945,
"logps/rejected": -1.067455530166626,
"loss": 1.0471,
"nll_loss": 1.0139851570129395,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.02952510491013527,
"rewards/margins": 0.07722045481204987,
"rewards/rejected": -0.10674557089805603,
"step": 2125
},
{
"epoch": 2.9510166358595193,
"grad_norm": 2.174734354019165,
"learning_rate": 4.9272727272727274e-08,
"log_odds_chosen": 1.7724860906600952,
"log_odds_ratio": -0.3275219798088074,
"logits/chosen": 2.0415663719177246,
"logits/rejected": 2.103684425354004,
"logps/chosen": -0.27193596959114075,
"logps/rejected": -1.0836893320083618,
"loss": 0.9898,
"nll_loss": 0.9570819139480591,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.027193596586585045,
"rewards/margins": 0.08117534220218658,
"rewards/rejected": -0.10836894065141678,
"step": 2130
},
{
"epoch": 2.957948243992606,
"grad_norm": 1.5665240287780762,
"learning_rate": 4.8363636363636365e-08,
"log_odds_chosen": 1.7095661163330078,
"log_odds_ratio": -0.34748879075050354,
"logits/chosen": 1.98307466506958,
"logits/rejected": 2.0420520305633545,
"logps/chosen": -0.32826271653175354,
"logps/rejected": -1.0827791690826416,
"loss": 1.0023,
"nll_loss": 0.9675683975219727,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.032826270908117294,
"rewards/margins": 0.07545164227485657,
"rewards/rejected": -0.10827790945768356,
"step": 2135
},
{
"epoch": 2.9648798521256934,
"grad_norm": 3.9010303020477295,
"learning_rate": 4.7454545454545455e-08,
"log_odds_chosen": 1.8488551378250122,
"log_odds_ratio": -0.3088344633579254,
"logits/chosen": 2.000403881072998,
"logits/rejected": 2.080737352371216,
"logps/chosen": -0.298979252576828,
"logps/rejected": -1.1432158946990967,
"loss": 1.0063,
"nll_loss": 0.9754161238670349,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.02989793010056019,
"rewards/margins": 0.08442366868257523,
"rewards/rejected": -0.11432159692049026,
"step": 2140
},
{
"epoch": 2.97181146025878,
"grad_norm": 2.5673093795776367,
"learning_rate": 4.6545454545454546e-08,
"log_odds_chosen": 1.8592909574508667,
"log_odds_ratio": -0.26686161756515503,
"logits/chosen": 2.063077688217163,
"logits/rejected": 2.1509907245635986,
"logps/chosen": -0.2621632218360901,
"logps/rejected": -1.0871644020080566,
"loss": 1.0334,
"nll_loss": 1.0067225694656372,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.02621631696820259,
"rewards/margins": 0.08250012993812561,
"rewards/rejected": -0.1087164580821991,
"step": 2145
},
{
"epoch": 2.978743068391867,
"grad_norm": 2.2342443466186523,
"learning_rate": 4.5636363636363637e-08,
"log_odds_chosen": 1.6307332515716553,
"log_odds_ratio": -0.3424622416496277,
"logits/chosen": 2.004580497741699,
"logits/rejected": 2.060046434402466,
"logps/chosen": -0.3020874261856079,
"logps/rejected": -1.084688663482666,
"loss": 0.9816,
"nll_loss": 0.9473193883895874,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.03020874410867691,
"rewards/margins": 0.07826013118028641,
"rewards/rejected": -0.10846886783838272,
"step": 2150
},
{
"epoch": 2.9856746765249538,
"grad_norm": 3.1599783897399902,
"learning_rate": 4.472727272727273e-08,
"log_odds_chosen": 2.105912685394287,
"log_odds_ratio": -0.27635109424591064,
"logits/chosen": 2.068650245666504,
"logits/rejected": 2.123730182647705,
"logps/chosen": -0.2944362461566925,
"logps/rejected": -1.2996933460235596,
"loss": 1.0106,
"nll_loss": 0.9829762578010559,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.029443617910146713,
"rewards/margins": 0.10052569955587387,
"rewards/rejected": -0.12996931374073029,
"step": 2155
},
{
"epoch": 2.9926062846580406,
"grad_norm": 3.6730105876922607,
"learning_rate": 4.381818181818182e-08,
"log_odds_chosen": 1.8076189756393433,
"log_odds_ratio": -0.2718731462955475,
"logits/chosen": 2.034381628036499,
"logits/rejected": 2.103024959564209,
"logps/chosen": -0.2844863831996918,
"logps/rejected": -1.0949420928955078,
"loss": 1.0227,
"nll_loss": 0.9955376982688904,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.028448637574911118,
"rewards/margins": 0.08104557543992996,
"rewards/rejected": -0.10949420928955078,
"step": 2160
},
{
"epoch": 2.9995378927911274,
"grad_norm": 3.9206016063690186,
"learning_rate": 4.290909090909091e-08,
"log_odds_chosen": 1.8349543809890747,
"log_odds_ratio": -0.27683863043785095,
"logits/chosen": 2.036572217941284,
"logits/rejected": 2.089813232421875,
"logps/chosen": -0.27643078565597534,
"logps/rejected": -1.0756059885025024,
"loss": 0.9799,
"nll_loss": 0.9521928429603577,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.027643078938126564,
"rewards/margins": 0.0799175277352333,
"rewards/rejected": -0.10756059736013412,
"step": 2165
},
{
"epoch": 3.0055452865064693,
"grad_norm": 1.9081456661224365,
"learning_rate": 4.2e-08,
"log_odds_chosen": 1.7460095882415771,
"log_odds_ratio": -0.32449567317962646,
"logits/chosen": 2.021742820739746,
"logits/rejected": 2.1034204959869385,
"logps/chosen": -0.27256032824516296,
"logps/rejected": -1.0215450525283813,
"loss": 0.8395,
"nll_loss": 0.9364208579063416,
"rewards/accuracies": 0.8621795177459717,
"rewards/chosen": -0.027256034314632416,
"rewards/margins": 0.07489847391843796,
"rewards/rejected": -0.10215452313423157,
"step": 2170
},
{
"epoch": 3.0124768946395566,
"grad_norm": 2.545027017593384,
"learning_rate": 4.109090909090909e-08,
"log_odds_chosen": 1.8810588121414185,
"log_odds_ratio": -0.2728542387485504,
"logits/chosen": 2.072154998779297,
"logits/rejected": 2.1284384727478027,
"logps/chosen": -0.28162866830825806,
"logps/rejected": -1.0774617195129395,
"loss": 1.017,
"nll_loss": 0.9897640347480774,
"rewards/accuracies": 0.9333333373069763,
"rewards/chosen": -0.028162868693470955,
"rewards/margins": 0.0795833095908165,
"rewards/rejected": -0.1077461913228035,
"step": 2175
},
{
"epoch": 3.0194085027726434,
"grad_norm": 1.6765599250793457,
"learning_rate": 4.018181818181818e-08,
"log_odds_chosen": 1.817507028579712,
"log_odds_ratio": -0.27471134066581726,
"logits/chosen": 1.970469355583191,
"logits/rejected": 2.050422191619873,
"logps/chosen": -0.2666342556476593,
"logps/rejected": -1.0124282836914062,
"loss": 0.9833,
"nll_loss": 0.9558401703834534,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.02666342444717884,
"rewards/margins": 0.0745794028043747,
"rewards/rejected": -0.10124283283948898,
"step": 2180
},
{
"epoch": 3.02634011090573,
"grad_norm": 1.7218668460845947,
"learning_rate": 3.927272727272727e-08,
"log_odds_chosen": 1.917079210281372,
"log_odds_ratio": -0.24975134432315826,
"logits/chosen": 2.128929853439331,
"logits/rejected": 2.173793077468872,
"logps/chosen": -0.29144442081451416,
"logps/rejected": -1.118105173110962,
"loss": 1.0045,
"nll_loss": 0.9795462489128113,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.029144441708922386,
"rewards/margins": 0.08266608417034149,
"rewards/rejected": -0.11181053519248962,
"step": 2185
},
{
"epoch": 3.033271719038817,
"grad_norm": 2.8524911403656006,
"learning_rate": 3.836363636363636e-08,
"log_odds_chosen": 1.9781666994094849,
"log_odds_ratio": -0.24611227214336395,
"logits/chosen": 2.036329746246338,
"logits/rejected": 2.1263298988342285,
"logps/chosen": -0.2536916136741638,
"logps/rejected": -1.1110204458236694,
"loss": 0.9873,
"nll_loss": 0.9627164602279663,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.025369159877300262,
"rewards/margins": 0.08573289960622787,
"rewards/rejected": -0.11110205203294754,
"step": 2190
},
{
"epoch": 3.040203327171904,
"grad_norm": 2.266507387161255,
"learning_rate": 3.745454545454546e-08,
"log_odds_chosen": 1.695191502571106,
"log_odds_ratio": -0.32812872529029846,
"logits/chosen": 1.9582918882369995,
"logits/rejected": 2.0455925464630127,
"logps/chosen": -0.23305271565914154,
"logps/rejected": -0.9286764860153198,
"loss": 1.0022,
"nll_loss": 0.969412088394165,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.023305270820856094,
"rewards/margins": 0.06956236064434052,
"rewards/rejected": -0.0928676426410675,
"step": 2195
},
{
"epoch": 3.0471349353049906,
"grad_norm": 2.823397159576416,
"learning_rate": 3.654545454545455e-08,
"log_odds_chosen": 1.8539453744888306,
"log_odds_ratio": -0.31512120366096497,
"logits/chosen": 1.9888670444488525,
"logits/rejected": 2.056990623474121,
"logps/chosen": -0.2506329417228699,
"logps/rejected": -1.078035593032837,
"loss": 0.97,
"nll_loss": 0.9385051727294922,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.025063293054699898,
"rewards/margins": 0.08274027705192566,
"rewards/rejected": -0.10780356079339981,
"step": 2200
},
{
"epoch": 3.054066543438078,
"grad_norm": 2.515324354171753,
"learning_rate": 3.563636363636364e-08,
"log_odds_chosen": 2.2040858268737793,
"log_odds_ratio": -0.2215387225151062,
"logits/chosen": 2.07570743560791,
"logits/rejected": 2.1536285877227783,
"logps/chosen": -0.26311811804771423,
"logps/rejected": -1.3125801086425781,
"loss": 1.0534,
"nll_loss": 1.03126060962677,
"rewards/accuracies": 0.9416666626930237,
"rewards/chosen": -0.026311814785003662,
"rewards/margins": 0.10494618117809296,
"rewards/rejected": -0.1312580108642578,
"step": 2205
},
{
"epoch": 3.0609981515711646,
"grad_norm": 2.884657382965088,
"learning_rate": 3.472727272727273e-08,
"log_odds_chosen": 1.8875175714492798,
"log_odds_ratio": -0.26213160157203674,
"logits/chosen": 1.9864482879638672,
"logits/rejected": 2.0323519706726074,
"logps/chosen": -0.28199702501296997,
"logps/rejected": -1.1133620738983154,
"loss": 1.0323,
"nll_loss": 1.006096363067627,
"rewards/accuracies": 0.9333333373069763,
"rewards/chosen": -0.028199700638651848,
"rewards/margins": 0.08313652873039246,
"rewards/rejected": -0.11133621633052826,
"step": 2210
},
{
"epoch": 3.0679297597042514,
"grad_norm": 5.779941082000732,
"learning_rate": 3.381818181818182e-08,
"log_odds_chosen": 1.598193883895874,
"log_odds_ratio": -0.3541763126850128,
"logits/chosen": 2.0120930671691895,
"logits/rejected": 2.105281114578247,
"logps/chosen": -0.32157793641090393,
"logps/rejected": -0.9493343830108643,
"loss": 1.0617,
"nll_loss": 1.0262763500213623,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.032157786190509796,
"rewards/margins": 0.06277565658092499,
"rewards/rejected": -0.09493346512317657,
"step": 2215
},
{
"epoch": 3.074861367837338,
"grad_norm": 1.6296344995498657,
"learning_rate": 3.290909090909091e-08,
"log_odds_chosen": 2.0749495029449463,
"log_odds_ratio": -0.2285868376493454,
"logits/chosen": 1.9751384258270264,
"logits/rejected": 2.0483574867248535,
"logps/chosen": -0.22339893877506256,
"logps/rejected": -1.1327685117721558,
"loss": 0.9692,
"nll_loss": 0.946365475654602,
"rewards/accuracies": 0.9666666388511658,
"rewards/chosen": -0.022339891642332077,
"rewards/margins": 0.09093696624040604,
"rewards/rejected": -0.11327686160802841,
"step": 2220
},
{
"epoch": 3.081792975970425,
"grad_norm": 5.483705997467041,
"learning_rate": 3.2e-08,
"log_odds_chosen": 1.910452127456665,
"log_odds_ratio": -0.2830710709095001,
"logits/chosen": 2.040644645690918,
"logits/rejected": 2.113460063934326,
"logps/chosen": -0.2728864550590515,
"logps/rejected": -1.134534239768982,
"loss": 1.0302,
"nll_loss": 1.0018789768218994,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.0272886510938406,
"rewards/margins": 0.08616478741168976,
"rewards/rejected": -0.11345343291759491,
"step": 2225
},
{
"epoch": 3.088724584103512,
"grad_norm": 3.0457351207733154,
"learning_rate": 3.109090909090909e-08,
"log_odds_chosen": 1.8377028703689575,
"log_odds_ratio": -0.2788829207420349,
"logits/chosen": 1.994458794593811,
"logits/rejected": 2.058776378631592,
"logps/chosen": -0.2481241673231125,
"logps/rejected": -1.0059267282485962,
"loss": 0.9581,
"nll_loss": 0.9301670789718628,
"rewards/accuracies": 0.9083333611488342,
"rewards/chosen": -0.024812418967485428,
"rewards/margins": 0.07578025758266449,
"rewards/rejected": -0.10059265792369843,
"step": 2230
},
{
"epoch": 3.095656192236599,
"grad_norm": 1.5724313259124756,
"learning_rate": 3.018181818181818e-08,
"log_odds_chosen": 1.8434357643127441,
"log_odds_ratio": -0.2810656428337097,
"logits/chosen": 2.0633890628814697,
"logits/rejected": 2.108525276184082,
"logps/chosen": -0.27151191234588623,
"logps/rejected": -1.0885863304138184,
"loss": 0.9965,
"nll_loss": 0.9684168696403503,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.027151191607117653,
"rewards/margins": 0.08170744776725769,
"rewards/rejected": -0.1088586375117302,
"step": 2235
},
{
"epoch": 3.102587800369686,
"grad_norm": 2.0384411811828613,
"learning_rate": 2.927272727272727e-08,
"log_odds_chosen": 1.992503046989441,
"log_odds_ratio": -0.2679726183414459,
"logits/chosen": 2.0843007564544678,
"logits/rejected": 2.149775505065918,
"logps/chosen": -0.2585987150669098,
"logps/rejected": -1.1711468696594238,
"loss": 0.9832,
"nll_loss": 0.9563843607902527,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.02585986815392971,
"rewards/margins": 0.0912548154592514,
"rewards/rejected": -0.11711468547582626,
"step": 2240
},
{
"epoch": 3.1095194085027726,
"grad_norm": 1.6383320093154907,
"learning_rate": 2.836363636363636e-08,
"log_odds_chosen": 1.9366320371627808,
"log_odds_ratio": -0.29981935024261475,
"logits/chosen": 2.024559259414673,
"logits/rejected": 2.1117780208587646,
"logps/chosen": -0.28914347290992737,
"logps/rejected": -1.1294143199920654,
"loss": 0.9738,
"nll_loss": 0.943781316280365,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.02891434356570244,
"rewards/margins": 0.08402708917856216,
"rewards/rejected": -0.1129414513707161,
"step": 2245
},
{
"epoch": 3.1164510166358594,
"grad_norm": 2.1520862579345703,
"learning_rate": 2.745454545454545e-08,
"log_odds_chosen": 1.9886517524719238,
"log_odds_ratio": -0.26550471782684326,
"logits/chosen": 1.966781497001648,
"logits/rejected": 2.0406601428985596,
"logps/chosen": -0.25236350297927856,
"logps/rejected": -1.1123831272125244,
"loss": 0.9942,
"nll_loss": 0.9676342010498047,
"rewards/accuracies": 0.9416666626930237,
"rewards/chosen": -0.025236355140805244,
"rewards/margins": 0.08600196242332458,
"rewards/rejected": -0.11123832315206528,
"step": 2250
},
{
"epoch": 3.1233826247689462,
"grad_norm": 2.3597357273101807,
"learning_rate": 2.6545454545454542e-08,
"log_odds_chosen": 1.6445980072021484,
"log_odds_ratio": -0.3386446535587311,
"logits/chosen": 2.031057834625244,
"logits/rejected": 2.0886597633361816,
"logps/chosen": -0.2981587052345276,
"logps/rejected": -1.0017781257629395,
"loss": 1.0319,
"nll_loss": 0.9980849027633667,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.02981586940586567,
"rewards/margins": 0.07036194205284119,
"rewards/rejected": -0.1001778170466423,
"step": 2255
},
{
"epoch": 3.1303142329020335,
"grad_norm": 2.875710964202881,
"learning_rate": 2.5636363636363633e-08,
"log_odds_chosen": 2.163516044616699,
"log_odds_ratio": -0.2714638113975525,
"logits/chosen": 2.009424924850464,
"logits/rejected": 2.0776355266571045,
"logps/chosen": -0.25366875529289246,
"logps/rejected": -1.265039324760437,
"loss": 0.9918,
"nll_loss": 0.9646516442298889,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.025366876274347305,
"rewards/margins": 0.10113705694675446,
"rewards/rejected": -0.12650392949581146,
"step": 2260
},
{
"epoch": 3.1372458410351203,
"grad_norm": 2.339726686477661,
"learning_rate": 2.4727272727272727e-08,
"log_odds_chosen": 1.7720131874084473,
"log_odds_ratio": -0.3246005177497864,
"logits/chosen": 2.034062623977661,
"logits/rejected": 2.0977277755737305,
"logps/chosen": -0.27337199449539185,
"logps/rejected": -1.0653187036514282,
"loss": 0.988,
"nll_loss": 0.9555687308311462,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.027337197214365005,
"rewards/margins": 0.07919467240571976,
"rewards/rejected": -0.10653186589479446,
"step": 2265
},
{
"epoch": 3.144177449168207,
"grad_norm": 3.5936927795410156,
"learning_rate": 2.3818181818181817e-08,
"log_odds_chosen": 1.9385422468185425,
"log_odds_ratio": -0.3190802335739136,
"logits/chosen": 1.9812796115875244,
"logits/rejected": 2.03690767288208,
"logps/chosen": -0.2825137674808502,
"logps/rejected": -1.2010154724121094,
"loss": 1.0062,
"nll_loss": 0.9742683172225952,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02825137972831726,
"rewards/margins": 0.09185018390417099,
"rewards/rejected": -0.12010155618190765,
"step": 2270
},
{
"epoch": 3.151109057301294,
"grad_norm": 2.3216748237609863,
"learning_rate": 2.2909090909090908e-08,
"log_odds_chosen": 1.849832534790039,
"log_odds_ratio": -0.30438894033432007,
"logits/chosen": 2.030111789703369,
"logits/rejected": 2.0937530994415283,
"logps/chosen": -0.3096204400062561,
"logps/rejected": -1.1795369386672974,
"loss": 1.0247,
"nll_loss": 0.994240939617157,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.030962049961090088,
"rewards/margins": 0.08699165284633636,
"rewards/rejected": -0.11795369535684586,
"step": 2275
},
{
"epoch": 3.1580406654343807,
"grad_norm": 5.251974582672119,
"learning_rate": 2.2e-08,
"log_odds_chosen": 1.73758864402771,
"log_odds_ratio": -0.3142777681350708,
"logits/chosen": 2.018113851547241,
"logits/rejected": 2.0766327381134033,
"logps/chosen": -0.25756967067718506,
"logps/rejected": -1.0410569906234741,
"loss": 0.9954,
"nll_loss": 0.9640125036239624,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.025756964460015297,
"rewards/margins": 0.07834872603416443,
"rewards/rejected": -0.10410568863153458,
"step": 2280
},
{
"epoch": 3.1649722735674675,
"grad_norm": 2.160755157470703,
"learning_rate": 2.109090909090909e-08,
"log_odds_chosen": 1.7930853366851807,
"log_odds_ratio": -0.31781256198883057,
"logits/chosen": 1.9471144676208496,
"logits/rejected": 2.0215981006622314,
"logps/chosen": -0.3217299282550812,
"logps/rejected": -1.0115313529968262,
"loss": 0.98,
"nll_loss": 0.9482495784759521,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03217298910021782,
"rewards/margins": 0.0689801424741745,
"rewards/rejected": -0.10115313529968262,
"step": 2285
},
{
"epoch": 3.1719038817005547,
"grad_norm": 2.8207828998565674,
"learning_rate": 2.018181818181818e-08,
"log_odds_chosen": 1.7178608179092407,
"log_odds_ratio": -0.351553738117218,
"logits/chosen": 1.946655035018921,
"logits/rejected": 2.0031888484954834,
"logps/chosen": -0.2946816682815552,
"logps/rejected": -1.0428930521011353,
"loss": 1.0444,
"nll_loss": 1.009276270866394,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.029468165710568428,
"rewards/margins": 0.07482115179300308,
"rewards/rejected": -0.10428932309150696,
"step": 2290
},
{
"epoch": 3.1788354898336415,
"grad_norm": 3.0449793338775635,
"learning_rate": 1.927272727272727e-08,
"log_odds_chosen": 1.8244539499282837,
"log_odds_ratio": -0.3448461890220642,
"logits/chosen": 2.010988712310791,
"logits/rejected": 2.0648319721221924,
"logps/chosen": -0.29256051778793335,
"logps/rejected": -1.0924314260482788,
"loss": 0.9988,
"nll_loss": 0.9642786383628845,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.029256051406264305,
"rewards/margins": 0.07998708635568619,
"rewards/rejected": -0.10924313217401505,
"step": 2295
},
{
"epoch": 3.1857670979667283,
"grad_norm": 1.2735953330993652,
"learning_rate": 1.836363636363636e-08,
"log_odds_chosen": 2.074246406555176,
"log_odds_ratio": -0.23903319239616394,
"logits/chosen": 2.0425198078155518,
"logits/rejected": 2.104451894760132,
"logps/chosen": -0.2704167068004608,
"logps/rejected": -1.1976983547210693,
"loss": 0.9924,
"nll_loss": 0.9685426950454712,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.02704167179763317,
"rewards/margins": 0.09272817522287369,
"rewards/rejected": -0.11976984143257141,
"step": 2300
},
{
"epoch": 3.192698706099815,
"grad_norm": 2.2513015270233154,
"learning_rate": 1.7454545454545455e-08,
"log_odds_chosen": 1.8696075677871704,
"log_odds_ratio": -0.31109151244163513,
"logits/chosen": 1.982120156288147,
"logits/rejected": 2.0289342403411865,
"logps/chosen": -0.2669697701931,
"logps/rejected": -1.0606690645217896,
"loss": 1.0334,
"nll_loss": 1.0023095607757568,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.026696979999542236,
"rewards/margins": 0.0793699249625206,
"rewards/rejected": -0.10606691986322403,
"step": 2305
},
{
"epoch": 3.199630314232902,
"grad_norm": 3.0249485969543457,
"learning_rate": 1.6545454545454545e-08,
"log_odds_chosen": 1.5753225088119507,
"log_odds_ratio": -0.32907435297966003,
"logits/chosen": 1.9598543643951416,
"logits/rejected": 2.043677568435669,
"logps/chosen": -0.2798163592815399,
"logps/rejected": -0.9463704228401184,
"loss": 1.0423,
"nll_loss": 1.0094271898269653,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.027981635183095932,
"rewards/margins": 0.06665540486574173,
"rewards/rejected": -0.09463704377412796,
"step": 2310
},
{
"epoch": 3.2065619223659887,
"grad_norm": 2.1163971424102783,
"learning_rate": 1.5636363636363636e-08,
"log_odds_chosen": 1.8971047401428223,
"log_odds_ratio": -0.27131593227386475,
"logits/chosen": 1.9652113914489746,
"logits/rejected": 2.0411899089813232,
"logps/chosen": -0.2609769105911255,
"logps/rejected": -1.032325029373169,
"loss": 0.9929,
"nll_loss": 0.9658178687095642,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.02609768696129322,
"rewards/margins": 0.07713483273983002,
"rewards/rejected": -0.10323251038789749,
"step": 2315
},
{
"epoch": 3.213493530499076,
"grad_norm": 1.9558539390563965,
"learning_rate": 1.4727272727272726e-08,
"log_odds_chosen": 1.7015489339828491,
"log_odds_ratio": -0.3433685898780823,
"logits/chosen": 2.122408866882324,
"logits/rejected": 2.172548770904541,
"logps/chosen": -0.3318374454975128,
"logps/rejected": -1.1208266019821167,
"loss": 1.0349,
"nll_loss": 1.0005649328231812,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.0331837423145771,
"rewards/margins": 0.07889891415834427,
"rewards/rejected": -0.11208265274763107,
"step": 2320
},
{
"epoch": 3.2204251386321627,
"grad_norm": 1.2714937925338745,
"learning_rate": 1.3818181818181817e-08,
"log_odds_chosen": 2.1636459827423096,
"log_odds_ratio": -0.23991011083126068,
"logits/chosen": 2.0637595653533936,
"logits/rejected": 2.1236019134521484,
"logps/chosen": -0.2423422783613205,
"logps/rejected": -1.284201741218567,
"loss": 0.9579,
"nll_loss": 0.9339547753334045,
"rewards/accuracies": 0.9333333373069763,
"rewards/chosen": -0.02423422783613205,
"rewards/margins": 0.10418593138456345,
"rewards/rejected": -0.1284201443195343,
"step": 2325
},
{
"epoch": 3.2273567467652495,
"grad_norm": 3.6437788009643555,
"learning_rate": 1.2909090909090908e-08,
"log_odds_chosen": 2.0622992515563965,
"log_odds_ratio": -0.25077375769615173,
"logits/chosen": 2.067502737045288,
"logits/rejected": 2.1184935569763184,
"logps/chosen": -0.28136977553367615,
"logps/rejected": -1.2336159944534302,
"loss": 0.9928,
"nll_loss": 0.9677172899246216,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.028136977925896645,
"rewards/margins": 0.09522464126348495,
"rewards/rejected": -0.12336160987615585,
"step": 2330
},
{
"epoch": 3.2342883548983363,
"grad_norm": 2.5397536754608154,
"learning_rate": 1.1999999999999998e-08,
"log_odds_chosen": 1.913984775543213,
"log_odds_ratio": -0.26355621218681335,
"logits/chosen": 1.9935226440429688,
"logits/rejected": 2.0521795749664307,
"logps/chosen": -0.24003277719020844,
"logps/rejected": -1.0872286558151245,
"loss": 1.0179,
"nll_loss": 0.9915151000022888,
"rewards/accuracies": 0.8666666746139526,
"rewards/chosen": -0.024003280326724052,
"rewards/margins": 0.08471958339214325,
"rewards/rejected": -0.10872285813093185,
"step": 2335
},
{
"epoch": 3.241219963031423,
"grad_norm": 2.6664090156555176,
"learning_rate": 1.109090909090909e-08,
"log_odds_chosen": 1.7946439981460571,
"log_odds_ratio": -0.3102231025695801,
"logits/chosen": 2.0916616916656494,
"logits/rejected": 2.1227028369903564,
"logps/chosen": -0.33324259519577026,
"logps/rejected": -1.194060206413269,
"loss": 1.027,
"nll_loss": 0.9959444403648376,
"rewards/accuracies": 0.8833333253860474,
"rewards/chosen": -0.033324260264635086,
"rewards/margins": 0.08608177304267883,
"rewards/rejected": -0.11940603703260422,
"step": 2340
},
{
"epoch": 3.2481515711645104,
"grad_norm": 3.543788194656372,
"learning_rate": 1.0181818181818181e-08,
"log_odds_chosen": 1.9351673126220703,
"log_odds_ratio": -0.28479108214378357,
"logits/chosen": 2.082379102706909,
"logits/rejected": 2.1343612670898438,
"logps/chosen": -0.27910298109054565,
"logps/rejected": -1.1840400695800781,
"loss": 1.0176,
"nll_loss": 0.9891124963760376,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.027910297736525536,
"rewards/margins": 0.09049370884895325,
"rewards/rejected": -0.11840400844812393,
"step": 2345
},
{
"epoch": 3.255083179297597,
"grad_norm": 2.4029653072357178,
"learning_rate": 9.272727272727272e-09,
"log_odds_chosen": 1.745149850845337,
"log_odds_ratio": -0.3268326222896576,
"logits/chosen": 2.009989023208618,
"logits/rejected": 2.0562989711761475,
"logps/chosen": -0.2905314564704895,
"logps/rejected": -1.0186735391616821,
"loss": 1.0114,
"nll_loss": 0.9786819815635681,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.02905314415693283,
"rewards/margins": 0.0728142112493515,
"rewards/rejected": -0.10186735540628433,
"step": 2350
},
{
"epoch": 3.262014787430684,
"grad_norm": 1.2835363149642944,
"learning_rate": 8.363636363636362e-09,
"log_odds_chosen": 2.011457920074463,
"log_odds_ratio": -0.2857803404331207,
"logits/chosen": 1.9969309568405151,
"logits/rejected": 2.0629494190216064,
"logps/chosen": -0.2886459529399872,
"logps/rejected": -1.2359684705734253,
"loss": 1.0145,
"nll_loss": 0.9858835339546204,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.028864597901701927,
"rewards/margins": 0.09473226219415665,
"rewards/rejected": -0.12359685450792313,
"step": 2355
},
{
"epoch": 3.2689463955637708,
"grad_norm": 2.477343797683716,
"learning_rate": 7.454545454545453e-09,
"log_odds_chosen": 1.7344855070114136,
"log_odds_ratio": -0.3474830687046051,
"logits/chosen": 1.9719436168670654,
"logits/rejected": 2.057311773300171,
"logps/chosen": -0.26447659730911255,
"logps/rejected": -1.0018370151519775,
"loss": 0.9815,
"nll_loss": 0.9467440247535706,
"rewards/accuracies": 0.8416666388511658,
"rewards/chosen": -0.026447657495737076,
"rewards/margins": 0.0737360343337059,
"rewards/rejected": -0.10018369555473328,
"step": 2360
},
{
"epoch": 3.2758780036968576,
"grad_norm": 2.9011664390563965,
"learning_rate": 6.545454545454546e-09,
"log_odds_chosen": 2.135871648788452,
"log_odds_ratio": -0.23335178196430206,
"logits/chosen": 2.0359859466552734,
"logits/rejected": 2.091546058654785,
"logps/chosen": -0.2491091936826706,
"logps/rejected": -1.206799030303955,
"loss": 1.0227,
"nll_loss": 0.9993228912353516,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.02491091936826706,
"rewards/margins": 0.0957689955830574,
"rewards/rejected": -0.12067990005016327,
"step": 2365
},
{
"epoch": 3.2828096118299444,
"grad_norm": 2.491895914077759,
"learning_rate": 5.6363636363636365e-09,
"log_odds_chosen": 1.9308069944381714,
"log_odds_ratio": -0.29651203751564026,
"logits/chosen": 1.9695427417755127,
"logits/rejected": 2.052243709564209,
"logps/chosen": -0.2871295213699341,
"logps/rejected": -1.1503547430038452,
"loss": 0.9909,
"nll_loss": 0.9612923860549927,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.028712956234812737,
"rewards/margins": 0.08632253110408783,
"rewards/rejected": -0.11503548920154572,
"step": 2370
},
{
"epoch": 3.2897412199630316,
"grad_norm": 2.254281520843506,
"learning_rate": 4.727272727272727e-09,
"log_odds_chosen": 1.8961893320083618,
"log_odds_ratio": -0.2896248400211334,
"logits/chosen": 2.035944938659668,
"logits/rejected": 2.102773427963257,
"logps/chosen": -0.30598270893096924,
"logps/rejected": -1.146560549736023,
"loss": 1.0012,
"nll_loss": 0.9722784757614136,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.030598269775509834,
"rewards/margins": 0.0840577781200409,
"rewards/rejected": -0.11465605348348618,
"step": 2375
},
{
"epoch": 3.2966728280961184,
"grad_norm": 4.881258487701416,
"learning_rate": 3.8181818181818185e-09,
"log_odds_chosen": 1.9275856018066406,
"log_odds_ratio": -0.26452910900115967,
"logits/chosen": 2.0649008750915527,
"logits/rejected": 2.1088078022003174,
"logps/chosen": -0.2894597351551056,
"logps/rejected": -1.1926367282867432,
"loss": 0.998,
"nll_loss": 0.9715353846549988,
"rewards/accuracies": 0.9333333373069763,
"rewards/chosen": -0.028945976868271828,
"rewards/margins": 0.09031769633293152,
"rewards/rejected": -0.1192636638879776,
"step": 2380
},
{
"epoch": 3.303604436229205,
"grad_norm": 2.7718567848205566,
"learning_rate": 2.909090909090909e-09,
"log_odds_chosen": 1.9183131456375122,
"log_odds_ratio": -0.29583075642585754,
"logits/chosen": 2.068955183029175,
"logits/rejected": 2.141144275665283,
"logps/chosen": -0.28529077768325806,
"logps/rejected": -1.2924492359161377,
"loss": 1.0428,
"nll_loss": 1.013238787651062,
"rewards/accuracies": 0.8916666507720947,
"rewards/chosen": -0.028529079630970955,
"rewards/margins": 0.10071584582328796,
"rewards/rejected": -0.12924490869045258,
"step": 2385
},
{
"epoch": 3.310536044362292,
"grad_norm": 2.2338600158691406,
"learning_rate": 2e-09,
"log_odds_chosen": 1.802534580230713,
"log_odds_ratio": -0.27281951904296875,
"logits/chosen": 1.9571995735168457,
"logits/rejected": 2.0102591514587402,
"logps/chosen": -0.27262741327285767,
"logps/rejected": -1.066834568977356,
"loss": 1.0008,
"nll_loss": 0.9735398888587952,
"rewards/accuracies": 0.9166666865348816,
"rewards/chosen": -0.027262739837169647,
"rewards/margins": 0.07942071557044983,
"rewards/rejected": -0.10668346285820007,
"step": 2390
},
{
"epoch": 3.317467652495379,
"grad_norm": 3.0731616020202637,
"learning_rate": 1.090909090909091e-09,
"log_odds_chosen": 2.0021066665649414,
"log_odds_ratio": -0.28030332922935486,
"logits/chosen": 1.9898285865783691,
"logits/rejected": 2.062572717666626,
"logps/chosen": -0.27583804726600647,
"logps/rejected": -1.1610000133514404,
"loss": 0.9737,
"nll_loss": 0.9456390738487244,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.027583809569478035,
"rewards/margins": 0.08851619809865952,
"rewards/rejected": -0.1161000058054924,
"step": 2395
},
{
"epoch": 3.324399260628466,
"grad_norm": 3.5117971897125244,
"learning_rate": 1.8181818181818182e-10,
"log_odds_chosen": 1.9471094608306885,
"log_odds_ratio": -0.2912288308143616,
"logits/chosen": 2.042628049850464,
"logits/rejected": 2.106832265853882,
"logps/chosen": -0.276217520236969,
"logps/rejected": -1.118571162223816,
"loss": 0.9807,
"nll_loss": 0.9515801668167114,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.02762174978852272,
"rewards/margins": 0.08423535525798798,
"rewards/rejected": -0.11185713112354279,
"step": 2400
}
],
"logging_steps": 5,
"max_steps": 2400,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 400,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}