{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.324399260628466, "eval_steps": 400.0, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0013863216266173752, "grad_norm": 16.979148864746094, "learning_rate": 0.0, "log_odds_chosen": -0.018929382786154747, "log_odds_ratio": -0.9500243067741394, "logits/chosen": 1.954408049583435, "logits/rejected": 2.0060200691223145, "logps/chosen": -0.19068074226379395, "logps/rejected": -0.3597089946269989, "loss": 1.7279, "nll_loss": 1.632872462272644, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.019068075343966484, "rewards/margins": 0.016902821138501167, "rewards/rejected": -0.03597090020775795, "step": 1 }, { "epoch": 0.006931608133086876, "grad_norm": 20.4935245513916, "learning_rate": 8e-09, "log_odds_chosen": -0.03683535382151604, "log_odds_ratio": -0.919361412525177, "logits/chosen": 1.4786995649337769, "logits/rejected": 1.501151204109192, "logps/chosen": -0.3465827703475952, "logps/rejected": -0.4160928726196289, "loss": 1.9313, "nll_loss": 1.8393574953079224, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.0346582755446434, "rewards/margins": 0.006951010320335627, "rewards/rejected": -0.04160928353667259, "step": 5 }, { "epoch": 0.013863216266173753, "grad_norm": 21.816869735717773, "learning_rate": 1.8e-08, "log_odds_chosen": 0.004844508599489927, "log_odds_ratio": -0.8253452181816101, "logits/chosen": 1.6159125566482544, "logits/rejected": 1.631380319595337, "logps/chosen": -0.29385247826576233, "logps/rejected": -0.3623644709587097, "loss": 1.8841, "nll_loss": 1.8015655279159546, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.029385250061750412, "rewards/margins": 0.006851200480014086, "rewards/rejected": -0.03623645007610321, "step": 10 }, { "epoch": 0.020794824399260628, "grad_norm": 23.262466430664062, "learning_rate": 2.8000000000000003e-08, "log_odds_chosen": 0.015412552282214165, "log_odds_ratio": -0.8413525819778442, "logits/chosen": 1.5471739768981934, "logits/rejected": 1.581672191619873, "logps/chosen": -0.29625552892684937, "logps/rejected": -0.3712186813354492, "loss": 1.8182, "nll_loss": 1.7340798377990723, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.029625553637742996, "rewards/margins": 0.00749631691724062, "rewards/rejected": -0.03712187334895134, "step": 15 }, { "epoch": 0.027726432532347505, "grad_norm": 23.77545166015625, "learning_rate": 3.7999999999999996e-08, "log_odds_chosen": 0.13983069360256195, "log_odds_ratio": -0.7581052184104919, "logits/chosen": 1.5518994331359863, "logits/rejected": 1.5775402784347534, "logps/chosen": -0.3246292769908905, "logps/rejected": -0.4345749020576477, "loss": 1.9212, "nll_loss": 1.8454102277755737, "rewards/accuracies": 0.5666666626930237, "rewards/chosen": -0.03246293216943741, "rewards/margins": 0.010994565673172474, "rewards/rejected": -0.04345749691128731, "step": 20 }, { "epoch": 0.03465804066543438, "grad_norm": 21.824874877929688, "learning_rate": 4.799999999999999e-08, "log_odds_chosen": -0.025701021775603294, "log_odds_ratio": -0.8547807931900024, "logits/chosen": 1.6149464845657349, "logits/rejected": 1.6376367807388306, "logps/chosen": -0.3293762803077698, "logps/rejected": -0.3794548809528351, "loss": 1.916, "nll_loss": 1.8305460214614868, "rewards/accuracies": 0.5, "rewards/chosen": -0.032937631011009216, "rewards/margins": 0.005007854197174311, "rewards/rejected": -0.03794548660516739, "step": 25 }, { "epoch": 0.041589648798521256, "grad_norm": 23.641414642333984, "learning_rate": 5.7999999999999997e-08, "log_odds_chosen": -0.01403873972594738, "log_odds_ratio": -0.8547641634941101, "logits/chosen": 1.6201530694961548, "logits/rejected": 1.6380269527435303, "logps/chosen": -0.3491577208042145, "logps/rejected": -0.4028278887271881, "loss": 1.8577, "nll_loss": 1.7722562551498413, "rewards/accuracies": 0.5166666507720947, "rewards/chosen": -0.034915778785943985, "rewards/margins": 0.00536701874807477, "rewards/rejected": -0.04028278589248657, "step": 30 }, { "epoch": 0.04852125693160813, "grad_norm": 21.654430389404297, "learning_rate": 6.8e-08, "log_odds_chosen": 0.17713108658790588, "log_odds_ratio": -0.7579152584075928, "logits/chosen": 1.5499821901321411, "logits/rejected": 1.5707145929336548, "logps/chosen": -0.31028756499290466, "logps/rejected": -0.4093845784664154, "loss": 1.844, "nll_loss": 1.7682510614395142, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.031028758734464645, "rewards/margins": 0.009909691289067268, "rewards/rejected": -0.04093845188617706, "step": 35 }, { "epoch": 0.05545286506469501, "grad_norm": 30.187108993530273, "learning_rate": 7.8e-08, "log_odds_chosen": 0.18452158570289612, "log_odds_ratio": -0.7830556035041809, "logits/chosen": 1.6507318019866943, "logits/rejected": 1.6740870475769043, "logps/chosen": -0.28885844349861145, "logps/rejected": -0.38478565216064453, "loss": 1.8536, "nll_loss": 1.775251865386963, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.028885845094919205, "rewards/margins": 0.00959271751344204, "rewards/rejected": -0.03847856447100639, "step": 40 }, { "epoch": 0.062384473197781884, "grad_norm": 20.389713287353516, "learning_rate": 8.8e-08, "log_odds_chosen": 0.02360691875219345, "log_odds_ratio": -0.8329288959503174, "logits/chosen": 1.6618582010269165, "logits/rejected": 1.676173448562622, "logps/chosen": -0.35506105422973633, "logps/rejected": -0.4218937158584595, "loss": 1.888, "nll_loss": 1.8047412633895874, "rewards/accuracies": 0.5333333611488342, "rewards/chosen": -0.03550610691308975, "rewards/margins": 0.006683265324681997, "rewards/rejected": -0.04218936711549759, "step": 45 }, { "epoch": 0.06931608133086876, "grad_norm": 18.37879180908203, "learning_rate": 9.799999999999999e-08, "log_odds_chosen": 0.11584530770778656, "log_odds_ratio": -0.768284022808075, "logits/chosen": 1.658928394317627, "logits/rejected": 1.6891582012176514, "logps/chosen": -0.3134225904941559, "logps/rejected": -0.41441354155540466, "loss": 1.8648, "nll_loss": 1.78793203830719, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.03134226053953171, "rewards/margins": 0.01009910274296999, "rewards/rejected": -0.04144136235117912, "step": 50 }, { "epoch": 0.07624768946395563, "grad_norm": 19.63814926147461, "learning_rate": 1.08e-07, "log_odds_chosen": -0.07513849437236786, "log_odds_ratio": -0.8687005639076233, "logits/chosen": 1.5901387929916382, "logits/rejected": 1.600988507270813, "logps/chosen": -0.3237656056880951, "logps/rejected": -0.36480894684791565, "loss": 1.8659, "nll_loss": 1.7789969444274902, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.03237656131386757, "rewards/margins": 0.004104338586330414, "rewards/rejected": -0.03648089990019798, "step": 55 }, { "epoch": 0.08317929759704251, "grad_norm": 21.044511795043945, "learning_rate": 1.1799999999999998e-07, "log_odds_chosen": 0.12105648219585419, "log_odds_ratio": -0.8005534410476685, "logits/chosen": 1.7121615409851074, "logits/rejected": 1.7367610931396484, "logps/chosen": -0.2864932119846344, "logps/rejected": -0.41227853298187256, "loss": 1.8738, "nll_loss": 1.793702483177185, "rewards/accuracies": 0.5666666626930237, "rewards/chosen": -0.02864932268857956, "rewards/margins": 0.012578531168401241, "rewards/rejected": -0.04122785106301308, "step": 60 }, { "epoch": 0.09011090573012939, "grad_norm": 15.05855941772461, "learning_rate": 1.28e-07, "log_odds_chosen": -0.08899393677711487, "log_odds_ratio": -0.9060453772544861, "logits/chosen": 1.7786991596221924, "logits/rejected": 1.7979838848114014, "logps/chosen": -0.2860961854457855, "logps/rejected": -0.35240638256073, "loss": 1.8233, "nll_loss": 1.7327053546905518, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.028609616681933403, "rewards/margins": 0.006631023250520229, "rewards/rejected": -0.03524063900113106, "step": 65 }, { "epoch": 0.09704251386321626, "grad_norm": 12.863988876342773, "learning_rate": 1.38e-07, "log_odds_chosen": 0.02776586450636387, "log_odds_ratio": -0.8355816006660461, "logits/chosen": 1.926443099975586, "logits/rejected": 1.9493002891540527, "logps/chosen": -0.3111321032047272, "logps/rejected": -0.3676571547985077, "loss": 1.7489, "nll_loss": 1.6653305292129517, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.031113211065530777, "rewards/margins": 0.005652503110468388, "rewards/rejected": -0.03676571324467659, "step": 70 }, { "epoch": 0.10397412199630314, "grad_norm": 14.937926292419434, "learning_rate": 1.48e-07, "log_odds_chosen": 0.21290098130702972, "log_odds_ratio": -0.7118617296218872, "logits/chosen": 1.9580568075180054, "logits/rejected": 1.9671556949615479, "logps/chosen": -0.33254608511924744, "logps/rejected": -0.42474591732025146, "loss": 1.842, "nll_loss": 1.770819902420044, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.033254608511924744, "rewards/margins": 0.009219982661306858, "rewards/rejected": -0.04247459024190903, "step": 75 }, { "epoch": 0.11090573012939002, "grad_norm": 12.081910133361816, "learning_rate": 1.58e-07, "log_odds_chosen": 0.2506192624568939, "log_odds_ratio": -0.7591946721076965, "logits/chosen": 1.9343218803405762, "logits/rejected": 1.9571690559387207, "logps/chosen": -0.28907135128974915, "logps/rejected": -0.4210290014743805, "loss": 1.8269, "nll_loss": 1.7509891986846924, "rewards/accuracies": 0.625, "rewards/chosen": -0.028907136991620064, "rewards/margins": 0.013195758685469627, "rewards/rejected": -0.04210289567708969, "step": 80 }, { "epoch": 0.1178373382624769, "grad_norm": 10.16718864440918, "learning_rate": 1.68e-07, "log_odds_chosen": 0.15295961499214172, "log_odds_ratio": -0.7727136015892029, "logits/chosen": 1.9498001337051392, "logits/rejected": 1.9737645387649536, "logps/chosen": -0.2715214490890503, "logps/rejected": -0.39682307839393616, "loss": 1.7426, "nll_loss": 1.665367603302002, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.02715214155614376, "rewards/margins": 0.012530164793133736, "rewards/rejected": -0.039682310074567795, "step": 85 }, { "epoch": 0.12476894639556377, "grad_norm": 8.379185676574707, "learning_rate": 1.78e-07, "log_odds_chosen": 0.011365304701030254, "log_odds_ratio": -0.8190609216690063, "logits/chosen": 2.1162543296813965, "logits/rejected": 2.1363251209259033, "logps/chosen": -0.32597094774246216, "logps/rejected": -0.37424901127815247, "loss": 1.7812, "nll_loss": 1.699331521987915, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.032597098499536514, "rewards/margins": 0.004827807657420635, "rewards/rejected": -0.03742489963769913, "step": 90 }, { "epoch": 0.13170055452865065, "grad_norm": 5.4643874168396, "learning_rate": 1.88e-07, "log_odds_chosen": 0.3046998977661133, "log_odds_ratio": -0.710991621017456, "logits/chosen": 2.2868356704711914, "logits/rejected": 2.319998264312744, "logps/chosen": -0.2605274021625519, "logps/rejected": -0.41061633825302124, "loss": 1.6722, "nll_loss": 1.6011360883712769, "rewards/accuracies": 0.6333333253860474, "rewards/chosen": -0.026052741333842278, "rewards/margins": 0.015008894726634026, "rewards/rejected": -0.0410616360604763, "step": 95 }, { "epoch": 0.13863216266173753, "grad_norm": 5.312658309936523, "learning_rate": 1.98e-07, "log_odds_chosen": 0.08499274402856827, "log_odds_ratio": -0.8072493672370911, "logits/chosen": 2.332758665084839, "logits/rejected": 2.354078769683838, "logps/chosen": -0.29434487223625183, "logps/rejected": -0.36028656363487244, "loss": 1.6704, "nll_loss": 1.5896700620651245, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.029434483498334885, "rewards/margins": 0.006594173610210419, "rewards/rejected": -0.0360286645591259, "step": 100 }, { "epoch": 0.1455637707948244, "grad_norm": 4.78338098526001, "learning_rate": 2.08e-07, "log_odds_chosen": 0.13001103699207306, "log_odds_ratio": -0.7748425602912903, "logits/chosen": 2.3882839679718018, "logits/rejected": 2.4127211570739746, "logps/chosen": -0.2966289222240448, "logps/rejected": -0.39911049604415894, "loss": 1.6961, "nll_loss": 1.6186034679412842, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.02966289594769478, "rewards/margins": 0.01024815533310175, "rewards/rejected": -0.039911042898893356, "step": 105 }, { "epoch": 0.15249537892791126, "grad_norm": 5.31309700012207, "learning_rate": 2.18e-07, "log_odds_chosen": 0.0922786071896553, "log_odds_ratio": -0.8008362054824829, "logits/chosen": 2.4012527465820312, "logits/rejected": 2.405054807662964, "logps/chosen": -0.31135261058807373, "logps/rejected": -0.40367835760116577, "loss": 1.6347, "nll_loss": 1.5546340942382812, "rewards/accuracies": 0.5666666626930237, "rewards/chosen": -0.031135262921452522, "rewards/margins": 0.009232571348547935, "rewards/rejected": -0.04036783427000046, "step": 110 }, { "epoch": 0.15942698706099814, "grad_norm": 4.425213813781738, "learning_rate": 2.2799999999999998e-07, "log_odds_chosen": -0.02769007720053196, "log_odds_ratio": -0.8615487813949585, "logits/chosen": 2.4684412479400635, "logits/rejected": 2.4840517044067383, "logps/chosen": -0.28860214352607727, "logps/rejected": -0.3511938452720642, "loss": 1.5912, "nll_loss": 1.5050197839736938, "rewards/accuracies": 0.5166666507720947, "rewards/chosen": -0.028860213235020638, "rewards/margins": 0.006259171757847071, "rewards/rejected": -0.03511938080191612, "step": 115 }, { "epoch": 0.16635859519408502, "grad_norm": 4.366436958312988, "learning_rate": 2.38e-07, "log_odds_chosen": 0.07487554848194122, "log_odds_ratio": -0.7914910912513733, "logits/chosen": 2.51110577583313, "logits/rejected": 2.52194881439209, "logps/chosen": -0.3162931501865387, "logps/rejected": -0.3853374719619751, "loss": 1.6602, "nll_loss": 1.5810089111328125, "rewards/accuracies": 0.5416666865348816, "rewards/chosen": -0.03162931278347969, "rewards/margins": 0.006904428359121084, "rewards/rejected": -0.03853374347090721, "step": 120 }, { "epoch": 0.1732902033271719, "grad_norm": 3.4541594982147217, "learning_rate": 2.48e-07, "log_odds_chosen": 0.24983125925064087, "log_odds_ratio": -0.7045524716377258, "logits/chosen": 2.5535173416137695, "logits/rejected": 2.5766799449920654, "logps/chosen": -0.2637786567211151, "logps/rejected": -0.3734634816646576, "loss": 1.5197, "nll_loss": 1.4492452144622803, "rewards/accuracies": 0.6416666507720947, "rewards/chosen": -0.026377864181995392, "rewards/margins": 0.01096847839653492, "rewards/rejected": -0.03734634816646576, "step": 125 }, { "epoch": 0.18022181146025879, "grad_norm": 3.3947913646698, "learning_rate": 2.58e-07, "log_odds_chosen": 0.13998886942863464, "log_odds_ratio": -0.7501145005226135, "logits/chosen": 2.617069959640503, "logits/rejected": 2.6200926303863525, "logps/chosen": -0.289898544549942, "logps/rejected": -0.3845054805278778, "loss": 1.5256, "nll_loss": 1.45059072971344, "rewards/accuracies": 0.5916666388511658, "rewards/chosen": -0.02898985706269741, "rewards/margins": 0.00946069322526455, "rewards/rejected": -0.03845055028796196, "step": 130 }, { "epoch": 0.18715341959334567, "grad_norm": 2.9145822525024414, "learning_rate": 2.68e-07, "log_odds_chosen": 0.10625941306352615, "log_odds_ratio": -0.7714784145355225, "logits/chosen": 2.6704909801483154, "logits/rejected": 2.686890125274658, "logps/chosen": -0.2948876619338989, "logps/rejected": -0.3641115128993988, "loss": 1.4299, "nll_loss": 1.3527849912643433, "rewards/accuracies": 0.5333333611488342, "rewards/chosen": -0.02948876842856407, "rewards/margins": 0.006922383327037096, "rewards/rejected": -0.03641115128993988, "step": 135 }, { "epoch": 0.19408502772643252, "grad_norm": 3.358665704727173, "learning_rate": 2.78e-07, "log_odds_chosen": 0.09461755305528641, "log_odds_ratio": -0.735588014125824, "logits/chosen": 2.7141809463500977, "logits/rejected": 2.725619077682495, "logps/chosen": -0.2867269814014435, "logps/rejected": -0.3572947680950165, "loss": 1.4621, "nll_loss": 1.3885754346847534, "rewards/accuracies": 0.5666666626930237, "rewards/chosen": -0.028672698885202408, "rewards/margins": 0.007056778762489557, "rewards/rejected": -0.03572947904467583, "step": 140 }, { "epoch": 0.2010166358595194, "grad_norm": 3.4602420330047607, "learning_rate": 2.88e-07, "log_odds_chosen": 0.18481667339801788, "log_odds_ratio": -0.7241543531417847, "logits/chosen": 2.639542818069458, "logits/rejected": 2.6469709873199463, "logps/chosen": -0.2566547989845276, "logps/rejected": -0.3493967652320862, "loss": 1.4112, "nll_loss": 1.3387987613677979, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.02566548064351082, "rewards/margins": 0.009274197742342949, "rewards/rejected": -0.03493968024849892, "step": 145 }, { "epoch": 0.20794824399260628, "grad_norm": 3.3385841846466064, "learning_rate": 2.98e-07, "log_odds_chosen": 0.14522768557071686, "log_odds_ratio": -0.7345627546310425, "logits/chosen": 2.535898447036743, "logits/rejected": 2.555110216140747, "logps/chosen": -0.3192240595817566, "logps/rejected": -0.39829033613204956, "loss": 1.3993, "nll_loss": 1.3258506059646606, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.03192240744829178, "rewards/margins": 0.007906629703938961, "rewards/rejected": -0.039829038083553314, "step": 150 }, { "epoch": 0.21487985212569316, "grad_norm": 3.195482015609741, "learning_rate": 3.08e-07, "log_odds_chosen": 0.32608503103256226, "log_odds_ratio": -0.6445600390434265, "logits/chosen": 2.3535492420196533, "logits/rejected": 2.367779493331909, "logps/chosen": -0.2523113787174225, "logps/rejected": -0.35818183422088623, "loss": 1.3715, "nll_loss": 1.3070040941238403, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.0252311360090971, "rewards/margins": 0.010587050579488277, "rewards/rejected": -0.0358181856572628, "step": 155 }, { "epoch": 0.22181146025878004, "grad_norm": 2.6090826988220215, "learning_rate": 3.18e-07, "log_odds_chosen": 0.38106462359428406, "log_odds_ratio": -0.6169842481613159, "logits/chosen": 2.301053047180176, "logits/rejected": 2.3180480003356934, "logps/chosen": -0.2931780219078064, "logps/rejected": -0.4148869812488556, "loss": 1.3188, "nll_loss": 1.2571097612380981, "rewards/accuracies": 0.6166666746139526, "rewards/chosen": -0.02931780181825161, "rewards/margins": 0.01217089593410492, "rewards/rejected": -0.04148869961500168, "step": 160 }, { "epoch": 0.22874306839186692, "grad_norm": 2.2898175716400146, "learning_rate": 3.2799999999999997e-07, "log_odds_chosen": 0.24468782544136047, "log_odds_ratio": -0.6965723037719727, "logits/chosen": 2.494852304458618, "logits/rejected": 2.505509376525879, "logps/chosen": -0.28601107001304626, "logps/rejected": -0.4080314338207245, "loss": 1.2778, "nll_loss": 1.2081010341644287, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02860110253095627, "rewards/margins": 0.012202044948935509, "rewards/rejected": -0.040803149342536926, "step": 165 }, { "epoch": 0.2356746765249538, "grad_norm": 2.1119935512542725, "learning_rate": 3.38e-07, "log_odds_chosen": 0.30791595578193665, "log_odds_ratio": -0.6434581279754639, "logits/chosen": 2.4814131259918213, "logits/rejected": 2.492072105407715, "logps/chosen": -0.29261448979377747, "logps/rejected": -0.4158262610435486, "loss": 1.2693, "nll_loss": 1.2049847841262817, "rewards/accuracies": 0.5583333373069763, "rewards/chosen": -0.029261449351906776, "rewards/margins": 0.012321173213422298, "rewards/rejected": -0.0415826216340065, "step": 170 }, { "epoch": 0.24260628465804066, "grad_norm": 2.052659511566162, "learning_rate": 3.48e-07, "log_odds_chosen": 0.2674095928668976, "log_odds_ratio": -0.6604223847389221, "logits/chosen": 2.45322847366333, "logits/rejected": 2.4553847312927246, "logps/chosen": -0.27184900641441345, "logps/rejected": -0.37303584814071655, "loss": 1.269, "nll_loss": 1.202986240386963, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.027184901759028435, "rewards/margins": 0.010118687525391579, "rewards/rejected": -0.037303585559129715, "step": 175 }, { "epoch": 0.24953789279112754, "grad_norm": 1.9022108316421509, "learning_rate": 3.58e-07, "log_odds_chosen": 0.3532262146472931, "log_odds_ratio": -0.6328123211860657, "logits/chosen": 2.6101863384246826, "logits/rejected": 2.6266489028930664, "logps/chosen": -0.2756868898868561, "logps/rejected": -0.3959445059299469, "loss": 1.2196, "nll_loss": 1.1562813520431519, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.027568688616156578, "rewards/margins": 0.012025760486721992, "rewards/rejected": -0.03959444910287857, "step": 180 }, { "epoch": 0.25646950092421444, "grad_norm": 2.0479915142059326, "learning_rate": 3.68e-07, "log_odds_chosen": 0.1621844470500946, "log_odds_ratio": -0.7073792219161987, "logits/chosen": 2.635596990585327, "logits/rejected": 2.6427814960479736, "logps/chosen": -0.28807908296585083, "logps/rejected": -0.35789498686790466, "loss": 1.1997, "nll_loss": 1.1289515495300293, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.028807910159230232, "rewards/margins": 0.0069815958850085735, "rewards/rejected": -0.035789504647254944, "step": 185 }, { "epoch": 0.2634011090573013, "grad_norm": 1.7852911949157715, "learning_rate": 3.7799999999999997e-07, "log_odds_chosen": 0.22550253570079803, "log_odds_ratio": -0.6922202706336975, "logits/chosen": 2.6954963207244873, "logits/rejected": 2.7063536643981934, "logps/chosen": -0.29761365056037903, "logps/rejected": -0.3754601776599884, "loss": 1.2128, "nll_loss": 1.143579125404358, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.02976136840879917, "rewards/margins": 0.007784651126712561, "rewards/rejected": -0.03754602000117302, "step": 190 }, { "epoch": 0.27033271719038815, "grad_norm": 2.0064260959625244, "learning_rate": 3.88e-07, "log_odds_chosen": 0.30835989117622375, "log_odds_ratio": -0.6616735458374023, "logits/chosen": 2.747114896774292, "logits/rejected": 2.756840229034424, "logps/chosen": -0.2845754623413086, "logps/rejected": -0.3947003185749054, "loss": 1.182, "nll_loss": 1.1158353090286255, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.02845754846930504, "rewards/margins": 0.011012484319508076, "rewards/rejected": -0.03947003558278084, "step": 195 }, { "epoch": 0.27726432532347506, "grad_norm": 1.6967897415161133, "learning_rate": 3.98e-07, "log_odds_chosen": 0.36090362071990967, "log_odds_ratio": -0.6156808733940125, "logits/chosen": 2.736931562423706, "logits/rejected": 2.7524020671844482, "logps/chosen": -0.2760154902935028, "logps/rejected": -0.39298757910728455, "loss": 1.2024, "nll_loss": 1.1408705711364746, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.02760155126452446, "rewards/margins": 0.011697209440171719, "rewards/rejected": -0.03929876163601875, "step": 200 }, { "epoch": 0.2841959334565619, "grad_norm": 1.344346046447754, "learning_rate": 3.9927272727272724e-07, "log_odds_chosen": 0.44202545285224915, "log_odds_ratio": -0.629891574382782, "logits/chosen": 2.8161656856536865, "logits/rejected": 2.827834129333496, "logps/chosen": -0.27560955286026, "logps/rejected": -0.43479490280151367, "loss": 1.1696, "nll_loss": 1.1066458225250244, "rewards/accuracies": 0.5833333134651184, "rewards/chosen": -0.02756096050143242, "rewards/margins": 0.015918532386422157, "rewards/rejected": -0.04347948729991913, "step": 205 }, { "epoch": 0.2911275415896488, "grad_norm": 1.632926344871521, "learning_rate": 3.983636363636363e-07, "log_odds_chosen": 0.27023443579673767, "log_odds_ratio": -0.7081299424171448, "logits/chosen": 2.6833786964416504, "logits/rejected": 2.696648359298706, "logps/chosen": -0.29506218433380127, "logps/rejected": -0.4175509214401245, "loss": 1.171, "nll_loss": 1.100161075592041, "rewards/accuracies": 0.5666666626930237, "rewards/chosen": -0.029506217688322067, "rewards/margins": 0.012248875573277473, "rewards/rejected": -0.04175509512424469, "step": 210 }, { "epoch": 0.2980591497227357, "grad_norm": 1.6668481826782227, "learning_rate": 3.9745454545454543e-07, "log_odds_chosen": 0.3347550332546234, "log_odds_ratio": -0.6504988074302673, "logits/chosen": 2.6714982986450195, "logits/rejected": 2.6816256046295166, "logps/chosen": -0.29570263624191284, "logps/rejected": -0.42322224378585815, "loss": 1.1627, "nll_loss": 1.0976592302322388, "rewards/accuracies": 0.625, "rewards/chosen": -0.029570268467068672, "rewards/margins": 0.012751961126923561, "rewards/rejected": -0.04232222959399223, "step": 215 }, { "epoch": 0.3049907578558225, "grad_norm": 1.663832664489746, "learning_rate": 3.965454545454545e-07, "log_odds_chosen": 0.329058974981308, "log_odds_ratio": -0.6497308611869812, "logits/chosen": 2.728097438812256, "logits/rejected": 2.751708507537842, "logps/chosen": -0.3120826184749603, "logps/rejected": -0.44624626636505127, "loss": 1.2113, "nll_loss": 1.1463485956192017, "rewards/accuracies": 0.5916666388511658, "rewards/chosen": -0.031208263710141182, "rewards/margins": 0.013416365720331669, "rewards/rejected": -0.04462462291121483, "step": 220 }, { "epoch": 0.31192236598890943, "grad_norm": 1.2902253866195679, "learning_rate": 3.9563636363636363e-07, "log_odds_chosen": 0.3168531656265259, "log_odds_ratio": -0.6508561372756958, "logits/chosen": 2.767462968826294, "logits/rejected": 2.776341438293457, "logps/chosen": -0.2664511799812317, "logps/rejected": -0.3841439485549927, "loss": 1.1313, "nll_loss": 1.0662100315093994, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.02664511650800705, "rewards/margins": 0.011769277974963188, "rewards/rejected": -0.03841439634561539, "step": 225 }, { "epoch": 0.3188539741219963, "grad_norm": 1.4394150972366333, "learning_rate": 3.947272727272727e-07, "log_odds_chosen": 0.5200116038322449, "log_odds_ratio": -0.5801463723182678, "logits/chosen": 2.7293012142181396, "logits/rejected": 2.7543258666992188, "logps/chosen": -0.2802920639514923, "logps/rejected": -0.4830513894557953, "loss": 1.1608, "nll_loss": 1.1027837991714478, "rewards/accuracies": 0.6583333611488342, "rewards/chosen": -0.02802920900285244, "rewards/margins": 0.020275937393307686, "rewards/rejected": -0.048305146396160126, "step": 230 }, { "epoch": 0.3257855822550832, "grad_norm": 1.5486044883728027, "learning_rate": 3.9381818181818177e-07, "log_odds_chosen": 0.3175574839115143, "log_odds_ratio": -0.6549851894378662, "logits/chosen": 2.717921495437622, "logits/rejected": 2.7285637855529785, "logps/chosen": -0.30369266867637634, "logps/rejected": -0.4324611723423004, "loss": 1.1833, "nll_loss": 1.117845892906189, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.030369265004992485, "rewards/margins": 0.012876848690211773, "rewards/rejected": -0.043246109038591385, "step": 235 }, { "epoch": 0.33271719038817005, "grad_norm": 1.4637186527252197, "learning_rate": 3.929090909090909e-07, "log_odds_chosen": 0.677905261516571, "log_odds_ratio": -0.5080384612083435, "logits/chosen": 2.6565752029418945, "logits/rejected": 2.670898199081421, "logps/chosen": -0.24577026069164276, "logps/rejected": -0.48834845423698425, "loss": 1.1682, "nll_loss": 1.1173516511917114, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.024577027186751366, "rewards/margins": 0.02425781637430191, "rewards/rejected": -0.04883484169840813, "step": 240 }, { "epoch": 0.33964879852125696, "grad_norm": 1.5058139562606812, "learning_rate": 3.9199999999999996e-07, "log_odds_chosen": 0.4405784010887146, "log_odds_ratio": -0.6027734875679016, "logits/chosen": 2.6786551475524902, "logits/rejected": 2.6928422451019287, "logps/chosen": -0.28616759181022644, "logps/rejected": -0.4547707438468933, "loss": 1.1336, "nll_loss": 1.0733466148376465, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.028616759926080704, "rewards/margins": 0.016860313713550568, "rewards/rejected": -0.04547707736492157, "step": 245 }, { "epoch": 0.3465804066543438, "grad_norm": 1.8535481691360474, "learning_rate": 3.910909090909091e-07, "log_odds_chosen": 0.42455023527145386, "log_odds_ratio": -0.6171411275863647, "logits/chosen": 2.7245190143585205, "logits/rejected": 2.732139825820923, "logps/chosen": -0.30346882343292236, "logps/rejected": -0.46232348680496216, "loss": 1.1587, "nll_loss": 1.0969831943511963, "rewards/accuracies": 0.6333333253860474, "rewards/chosen": -0.030346881598234177, "rewards/margins": 0.015885472297668457, "rewards/rejected": -0.046232353895902634, "step": 250 }, { "epoch": 0.35351201478743066, "grad_norm": 1.3919163942337036, "learning_rate": 3.9018181818181816e-07, "log_odds_chosen": 0.5069887042045593, "log_odds_ratio": -0.5706583857536316, "logits/chosen": 2.649379014968872, "logits/rejected": 2.6625030040740967, "logps/chosen": -0.25029629468917847, "logps/rejected": -0.43570002913475037, "loss": 1.1158, "nll_loss": 1.0587836503982544, "rewards/accuracies": 0.6916666626930237, "rewards/chosen": -0.025029627606272697, "rewards/margins": 0.01854037493467331, "rewards/rejected": -0.04357000067830086, "step": 255 }, { "epoch": 0.36044362292051757, "grad_norm": 1.3738930225372314, "learning_rate": 3.8927272727272723e-07, "log_odds_chosen": 0.3587478697299957, "log_odds_ratio": -0.6369132995605469, "logits/chosen": 2.712921619415283, "logits/rejected": 2.728205442428589, "logps/chosen": -0.2719246745109558, "logps/rejected": -0.3995325267314911, "loss": 1.1394, "nll_loss": 1.075750470161438, "rewards/accuracies": 0.6166666746139526, "rewards/chosen": -0.02719247154891491, "rewards/margins": 0.012760787270963192, "rewards/rejected": -0.03995325788855553, "step": 260 }, { "epoch": 0.3673752310536044, "grad_norm": 1.7332333326339722, "learning_rate": 3.8836363636363635e-07, "log_odds_chosen": 0.5665148496627808, "log_odds_ratio": -0.5796270966529846, "logits/chosen": 2.66032338142395, "logits/rejected": 2.6766788959503174, "logps/chosen": -0.2771783769130707, "logps/rejected": -0.4812432527542114, "loss": 1.1499, "nll_loss": 1.0919440984725952, "rewards/accuracies": 0.6833333373069763, "rewards/chosen": -0.027717838063836098, "rewards/margins": 0.020406486466526985, "rewards/rejected": -0.04812432825565338, "step": 265 }, { "epoch": 0.37430683918669133, "grad_norm": 1.733955979347229, "learning_rate": 3.874545454545454e-07, "log_odds_chosen": 0.533964991569519, "log_odds_ratio": -0.5693932175636292, "logits/chosen": 2.676833391189575, "logits/rejected": 2.7048349380493164, "logps/chosen": -0.2923631966114044, "logps/rejected": -0.4988028109073639, "loss": 1.1422, "nll_loss": 1.0852835178375244, "rewards/accuracies": 0.6583333611488342, "rewards/chosen": -0.02923632226884365, "rewards/margins": 0.020643968135118484, "rewards/rejected": -0.049880288541316986, "step": 270 }, { "epoch": 0.3812384473197782, "grad_norm": 1.678440809249878, "learning_rate": 3.865454545454545e-07, "log_odds_chosen": 0.48775380849838257, "log_odds_ratio": -0.5923742651939392, "logits/chosen": 2.589829444885254, "logits/rejected": 2.6072795391082764, "logps/chosen": -0.31491658091545105, "logps/rejected": -0.47682541608810425, "loss": 1.1148, "nll_loss": 1.0555262565612793, "rewards/accuracies": 0.6583333611488342, "rewards/chosen": -0.031491655856370926, "rewards/margins": 0.01619088277220726, "rewards/rejected": -0.047682538628578186, "step": 275 }, { "epoch": 0.38817005545286504, "grad_norm": 1.911895751953125, "learning_rate": 3.856363636363636e-07, "log_odds_chosen": 0.3376753032207489, "log_odds_ratio": -0.6669396162033081, "logits/chosen": 2.6268460750579834, "logits/rejected": 2.6471641063690186, "logps/chosen": -0.29394927620887756, "logps/rejected": -0.4321646988391876, "loss": 1.1103, "nll_loss": 1.0436404943466187, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.029394926503300667, "rewards/margins": 0.01382154505699873, "rewards/rejected": -0.04321647435426712, "step": 280 }, { "epoch": 0.39510166358595195, "grad_norm": 1.6275674104690552, "learning_rate": 3.847272727272727e-07, "log_odds_chosen": 0.4959801137447357, "log_odds_ratio": -0.6270378828048706, "logits/chosen": 2.666292667388916, "logits/rejected": 2.6843721866607666, "logps/chosen": -0.3145168423652649, "logps/rejected": -0.48912960290908813, "loss": 1.1656, "nll_loss": 1.1028538942337036, "rewards/accuracies": 0.6416666507720947, "rewards/chosen": -0.03145168721675873, "rewards/margins": 0.017461273819208145, "rewards/rejected": -0.048912957310676575, "step": 285 }, { "epoch": 0.4020332717190388, "grad_norm": 1.6614007949829102, "learning_rate": 3.838181818181818e-07, "log_odds_chosen": 0.3132282793521881, "log_odds_ratio": -0.6719579100608826, "logits/chosen": 2.644859790802002, "logits/rejected": 2.6587765216827393, "logps/chosen": -0.30401140451431274, "logps/rejected": -0.4220035970211029, "loss": 1.1595, "nll_loss": 1.0922586917877197, "rewards/accuracies": 0.5916666388511658, "rewards/chosen": -0.030401142314076424, "rewards/margins": 0.011799216270446777, "rewards/rejected": -0.04220036417245865, "step": 290 }, { "epoch": 0.4089648798521257, "grad_norm": 5.234076499938965, "learning_rate": 3.829090909090909e-07, "log_odds_chosen": 0.4143497347831726, "log_odds_ratio": -0.6194970011711121, "logits/chosen": 2.693376064300537, "logits/rejected": 2.7130610942840576, "logps/chosen": -0.300813227891922, "logps/rejected": -0.45194199681282043, "loss": 1.1107, "nll_loss": 1.0487507581710815, "rewards/accuracies": 0.6666666865348816, "rewards/chosen": -0.030081328004598618, "rewards/margins": 0.01511287409812212, "rewards/rejected": -0.04519420489668846, "step": 295 }, { "epoch": 0.41589648798521256, "grad_norm": 1.2319281101226807, "learning_rate": 3.8199999999999995e-07, "log_odds_chosen": 0.6317125558853149, "log_odds_ratio": -0.5252640247344971, "logits/chosen": 2.6187520027160645, "logits/rejected": 2.6462595462799072, "logps/chosen": -0.27775922417640686, "logps/rejected": -0.5154780745506287, "loss": 1.1218, "nll_loss": 1.069314956665039, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.027775920927524567, "rewards/margins": 0.023771891370415688, "rewards/rejected": -0.0515478178858757, "step": 300 }, { "epoch": 0.42282809611829947, "grad_norm": 1.404926061630249, "learning_rate": 3.810909090909091e-07, "log_odds_chosen": 0.45306530594825745, "log_odds_ratio": -0.5977523326873779, "logits/chosen": 2.6711065769195557, "logits/rejected": 2.6791975498199463, "logps/chosen": -0.2907872498035431, "logps/rejected": -0.4649318754673004, "loss": 1.0976, "nll_loss": 1.037819266319275, "rewards/accuracies": 0.6416666507720947, "rewards/chosen": -0.029078727588057518, "rewards/margins": 0.017414459958672523, "rewards/rejected": -0.04649318382143974, "step": 305 }, { "epoch": 0.4297597042513863, "grad_norm": 1.635770320892334, "learning_rate": 3.8018181818181815e-07, "log_odds_chosen": 0.42734283208847046, "log_odds_ratio": -0.637465238571167, "logits/chosen": 2.603739023208618, "logits/rejected": 2.632021903991699, "logps/chosen": -0.2919798493385315, "logps/rejected": -0.480343759059906, "loss": 1.1467, "nll_loss": 1.0829123258590698, "rewards/accuracies": 0.6583333611488342, "rewards/chosen": -0.029197994619607925, "rewards/margins": 0.01883639022707939, "rewards/rejected": -0.04803437739610672, "step": 310 }, { "epoch": 0.4366913123844732, "grad_norm": 1.6268060207366943, "learning_rate": 3.7927272727272727e-07, "log_odds_chosen": 0.5012027621269226, "log_odds_ratio": -0.5923992991447449, "logits/chosen": 2.5646347999572754, "logits/rejected": 2.583251953125, "logps/chosen": -0.28540945053100586, "logps/rejected": -0.48576289415359497, "loss": 1.1493, "nll_loss": 1.0901035070419312, "rewards/accuracies": 0.6833333373069763, "rewards/chosen": -0.028540942817926407, "rewards/margins": 0.02003534696996212, "rewards/rejected": -0.048576291650533676, "step": 315 }, { "epoch": 0.4436229205175601, "grad_norm": 1.6135777235031128, "learning_rate": 3.7836363636363634e-07, "log_odds_chosen": 0.47709882259368896, "log_odds_ratio": -0.6118133664131165, "logits/chosen": 2.651594400405884, "logits/rejected": 2.663482666015625, "logps/chosen": -0.30147698521614075, "logps/rejected": -0.5001630187034607, "loss": 1.1258, "nll_loss": 1.0645849704742432, "rewards/accuracies": 0.6416666507720947, "rewards/chosen": -0.030147703364491463, "rewards/margins": 0.019868608564138412, "rewards/rejected": -0.05001631751656532, "step": 320 }, { "epoch": 0.45055452865064693, "grad_norm": 1.5299911499023438, "learning_rate": 3.774545454545454e-07, "log_odds_chosen": 0.4915499687194824, "log_odds_ratio": -0.6021497845649719, "logits/chosen": 2.590543031692505, "logits/rejected": 2.616163730621338, "logps/chosen": -0.2796178162097931, "logps/rejected": -0.4787500500679016, "loss": 1.1219, "nll_loss": 1.0617003440856934, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.02796177938580513, "rewards/margins": 0.01991322636604309, "rewards/rejected": -0.04787500575184822, "step": 325 }, { "epoch": 0.45748613678373384, "grad_norm": 1.5430803298950195, "learning_rate": 3.7654545454545454e-07, "log_odds_chosen": 0.5409862399101257, "log_odds_ratio": -0.5837644934654236, "logits/chosen": 2.6871683597564697, "logits/rejected": 2.697368860244751, "logps/chosen": -0.322612464427948, "logps/rejected": -0.5166773200035095, "loss": 1.068, "nll_loss": 1.0095747709274292, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.03226124867796898, "rewards/margins": 0.019406486302614212, "rewards/rejected": -0.05166773125529289, "step": 330 }, { "epoch": 0.4644177449168207, "grad_norm": 1.520017147064209, "learning_rate": 3.756363636363636e-07, "log_odds_chosen": 0.7835187911987305, "log_odds_ratio": -0.49368441104888916, "logits/chosen": 2.681840419769287, "logits/rejected": 2.7117552757263184, "logps/chosen": -0.26524561643600464, "logps/rejected": -0.5571123361587524, "loss": 1.0703, "nll_loss": 1.0209718942642212, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.026524560526013374, "rewards/margins": 0.029186667874455452, "rewards/rejected": -0.055711228400468826, "step": 335 }, { "epoch": 0.4713493530499076, "grad_norm": 6.2188262939453125, "learning_rate": 3.747272727272727e-07, "log_odds_chosen": 0.7931634187698364, "log_odds_ratio": -0.48867708444595337, "logits/chosen": 2.574190378189087, "logits/rejected": 2.5961813926696777, "logps/chosen": -0.2862854599952698, "logps/rejected": -0.5607175230979919, "loss": 1.0977, "nll_loss": 1.0488049983978271, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.028628544881939888, "rewards/margins": 0.02744320221245289, "rewards/rejected": -0.05607175827026367, "step": 340 }, { "epoch": 0.47828096118299446, "grad_norm": 1.3965646028518677, "learning_rate": 3.738181818181818e-07, "log_odds_chosen": 0.6148959994316101, "log_odds_ratio": -0.5497661828994751, "logits/chosen": 2.556060791015625, "logits/rejected": 2.5766706466674805, "logps/chosen": -0.2946844696998596, "logps/rejected": -0.5049049258232117, "loss": 1.1347, "nll_loss": 1.0796822309494019, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.02946844883263111, "rewards/margins": 0.021022040396928787, "rewards/rejected": -0.05049047991633415, "step": 345 }, { "epoch": 0.4852125693160813, "grad_norm": 1.5803139209747314, "learning_rate": 3.7290909090909087e-07, "log_odds_chosen": 0.688506543636322, "log_odds_ratio": -0.5257928371429443, "logits/chosen": 2.6283648014068604, "logits/rejected": 2.667273759841919, "logps/chosen": -0.23559394478797913, "logps/rejected": -0.4642951190471649, "loss": 1.0637, "nll_loss": 1.0110965967178345, "rewards/accuracies": 0.7166666388511658, "rewards/chosen": -0.023559393361210823, "rewards/margins": 0.022870119661092758, "rewards/rejected": -0.04642951115965843, "step": 350 }, { "epoch": 0.4921441774491682, "grad_norm": 1.4043947458267212, "learning_rate": 3.72e-07, "log_odds_chosen": 0.7894371151924133, "log_odds_ratio": -0.5066149830818176, "logits/chosen": 2.5662755966186523, "logits/rejected": 2.597470998764038, "logps/chosen": -0.23841489851474762, "logps/rejected": -0.49710512161254883, "loss": 1.1026, "nll_loss": 1.0519250631332397, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.023841489106416702, "rewards/margins": 0.025869019329547882, "rewards/rejected": -0.049710508435964584, "step": 355 }, { "epoch": 0.49907578558225507, "grad_norm": 1.5117144584655762, "learning_rate": 3.7109090909090907e-07, "log_odds_chosen": 0.5975762009620667, "log_odds_ratio": -0.5889599919319153, "logits/chosen": 2.448983669281006, "logits/rejected": 2.486905813217163, "logps/chosen": -0.2695424258708954, "logps/rejected": -0.47119179368019104, "loss": 1.107, "nll_loss": 1.0481308698654175, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.026954246684908867, "rewards/margins": 0.02016492560505867, "rewards/rejected": -0.047119174152612686, "step": 360 }, { "epoch": 0.506007393715342, "grad_norm": 1.5192545652389526, "learning_rate": 3.7018181818181814e-07, "log_odds_chosen": 0.6952040791511536, "log_odds_ratio": -0.5299401879310608, "logits/chosen": 2.472970485687256, "logits/rejected": 2.5098752975463867, "logps/chosen": -0.3138105571269989, "logps/rejected": -0.5887749195098877, "loss": 1.1069, "nll_loss": 1.0538718700408936, "rewards/accuracies": 0.6916666626930237, "rewards/chosen": -0.03138105198740959, "rewards/margins": 0.02749643847346306, "rewards/rejected": -0.05887749046087265, "step": 365 }, { "epoch": 0.5129390018484289, "grad_norm": 1.2908129692077637, "learning_rate": 3.6927272727272726e-07, "log_odds_chosen": 0.6446402072906494, "log_odds_ratio": -0.5391488671302795, "logits/chosen": 2.492727756500244, "logits/rejected": 2.5441088676452637, "logps/chosen": -0.32913315296173096, "logps/rejected": -0.5775908827781677, "loss": 1.1262, "nll_loss": 1.072272539138794, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.032913316041231155, "rewards/margins": 0.024845769628882408, "rewards/rejected": -0.05775909125804901, "step": 370 }, { "epoch": 0.5198706099815157, "grad_norm": 1.5955073833465576, "learning_rate": 3.6836363636363633e-07, "log_odds_chosen": 0.4990961253643036, "log_odds_ratio": -0.5916425585746765, "logits/chosen": 2.449550151824951, "logits/rejected": 2.475804567337036, "logps/chosen": -0.29480889439582825, "logps/rejected": -0.4691779315471649, "loss": 1.0926, "nll_loss": 1.0334601402282715, "rewards/accuracies": 0.6583333611488342, "rewards/chosen": -0.029480891302227974, "rewards/margins": 0.017436908558011055, "rewards/rejected": -0.04691779240965843, "step": 375 }, { "epoch": 0.5268022181146026, "grad_norm": 1.6701200008392334, "learning_rate": 3.674545454545454e-07, "log_odds_chosen": 0.4017346203327179, "log_odds_ratio": -0.6414787173271179, "logits/chosen": 2.485521078109741, "logits/rejected": 2.5085794925689697, "logps/chosen": -0.3057960867881775, "logps/rejected": -0.4507531225681305, "loss": 1.0797, "nll_loss": 1.015582799911499, "rewards/accuracies": 0.625, "rewards/chosen": -0.030579613521695137, "rewards/margins": 0.01449570246040821, "rewards/rejected": -0.04507531598210335, "step": 380 }, { "epoch": 0.5337338262476895, "grad_norm": 1.4670026302337646, "learning_rate": 3.665454545454545e-07, "log_odds_chosen": 0.4697194993495941, "log_odds_ratio": -0.6057302355766296, "logits/chosen": 2.58746337890625, "logits/rejected": 2.6025400161743164, "logps/chosen": -0.33911365270614624, "logps/rejected": -0.540283739566803, "loss": 1.1428, "nll_loss": 1.0822194814682007, "rewards/accuracies": 0.6083333492279053, "rewards/chosen": -0.033911366015672684, "rewards/margins": 0.020117007195949554, "rewards/rejected": -0.054028380662202835, "step": 385 }, { "epoch": 0.5406654343807763, "grad_norm": 1.425374984741211, "learning_rate": 3.656363636363636e-07, "log_odds_chosen": 0.8662251234054565, "log_odds_ratio": -0.4929133951663971, "logits/chosen": 2.4548234939575195, "logits/rejected": 2.4894497394561768, "logps/chosen": -0.29042714834213257, "logps/rejected": -0.6492635011672974, "loss": 1.0862, "nll_loss": 1.0368915796279907, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.029042713344097137, "rewards/margins": 0.035883646458387375, "rewards/rejected": -0.06492635607719421, "step": 390 }, { "epoch": 0.5475970425138632, "grad_norm": 1.9283677339553833, "learning_rate": 3.647272727272727e-07, "log_odds_chosen": 0.6832193732261658, "log_odds_ratio": -0.5179533362388611, "logits/chosen": 2.4680521488189697, "logits/rejected": 2.5091042518615723, "logps/chosen": -0.3011523485183716, "logps/rejected": -0.5473502278327942, "loss": 1.1184, "nll_loss": 1.0665814876556396, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.030115237459540367, "rewards/margins": 0.02461978793144226, "rewards/rejected": -0.05473501980304718, "step": 395 }, { "epoch": 0.5545286506469501, "grad_norm": 1.5800950527191162, "learning_rate": 3.638181818181818e-07, "log_odds_chosen": 0.9603479504585266, "log_odds_ratio": -0.4618772566318512, "logits/chosen": 2.5418217182159424, "logits/rejected": 2.5758605003356934, "logps/chosen": -0.3006496727466583, "logps/rejected": -0.680100679397583, "loss": 1.1202, "nll_loss": 1.073991060256958, "rewards/accuracies": 0.7333333492279053, "rewards/chosen": -0.03006497025489807, "rewards/margins": 0.03794510290026665, "rewards/rejected": -0.06801006942987442, "step": 400 }, { "epoch": 0.5614602587800369, "grad_norm": 1.8024792671203613, "learning_rate": 3.6290909090909086e-07, "log_odds_chosen": 0.5875980854034424, "log_odds_ratio": -0.5984980463981628, "logits/chosen": 2.422645092010498, "logits/rejected": 2.4577627182006836, "logps/chosen": -0.26548755168914795, "logps/rejected": -0.4987878203392029, "loss": 1.07, "nll_loss": 1.010174036026001, "rewards/accuracies": 0.7083333134651184, "rewards/chosen": -0.026548750698566437, "rewards/margins": 0.023330029100179672, "rewards/rejected": -0.04987877607345581, "step": 405 }, { "epoch": 0.5683918669131238, "grad_norm": 1.3316905498504639, "learning_rate": 3.62e-07, "log_odds_chosen": 0.9628907442092896, "log_odds_ratio": -0.45750167965888977, "logits/chosen": 2.5198185443878174, "logits/rejected": 2.5674140453338623, "logps/chosen": -0.2701815366744995, "logps/rejected": -0.6767290234565735, "loss": 1.1047, "nll_loss": 1.0589832067489624, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.02701815403997898, "rewards/margins": 0.0406547375023365, "rewards/rejected": -0.06767289340496063, "step": 410 }, { "epoch": 0.5753234750462107, "grad_norm": 3.3782176971435547, "learning_rate": 3.6109090909090906e-07, "log_odds_chosen": 0.8159240484237671, "log_odds_ratio": -0.502350389957428, "logits/chosen": 2.4107635021209717, "logits/rejected": 2.462902545928955, "logps/chosen": -0.2618210017681122, "logps/rejected": -0.5774862766265869, "loss": 1.0908, "nll_loss": 1.040544033050537, "rewards/accuracies": 0.7166666388511658, "rewards/chosen": -0.026182103902101517, "rewards/margins": 0.031566519290208817, "rewards/rejected": -0.05774862319231033, "step": 415 }, { "epoch": 0.5822550831792976, "grad_norm": 1.423762559890747, "learning_rate": 3.601818181818182e-07, "log_odds_chosen": 0.712608277797699, "log_odds_ratio": -0.5430810451507568, "logits/chosen": 2.4404184818267822, "logits/rejected": 2.460728168487549, "logps/chosen": -0.3297029137611389, "logps/rejected": -0.6201837062835693, "loss": 1.1066, "nll_loss": 1.0522674322128296, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03297029063105583, "rewards/margins": 0.02904808521270752, "rewards/rejected": -0.06201838329434395, "step": 420 }, { "epoch": 0.5891866913123844, "grad_norm": 1.5393766164779663, "learning_rate": 3.5927272727272725e-07, "log_odds_chosen": 0.7168568968772888, "log_odds_ratio": -0.530264139175415, "logits/chosen": 2.4670755863189697, "logits/rejected": 2.4867959022521973, "logps/chosen": -0.30588680505752563, "logps/rejected": -0.582858145236969, "loss": 1.1199, "nll_loss": 1.0668836832046509, "rewards/accuracies": 0.7166666388511658, "rewards/chosen": -0.030588679015636444, "rewards/margins": 0.027697138488292694, "rewards/rejected": -0.05828581750392914, "step": 425 }, { "epoch": 0.5961182994454713, "grad_norm": 1.5168192386627197, "learning_rate": 3.583636363636363e-07, "log_odds_chosen": 0.6971157789230347, "log_odds_ratio": -0.5508431792259216, "logits/chosen": 2.4846901893615723, "logits/rejected": 2.517913579940796, "logps/chosen": -0.29162880778312683, "logps/rejected": -0.5490512251853943, "loss": 1.0672, "nll_loss": 1.0120903253555298, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.029162878170609474, "rewards/margins": 0.025742238387465477, "rewards/rejected": -0.05490512028336525, "step": 430 }, { "epoch": 0.6030499075785583, "grad_norm": 1.951669454574585, "learning_rate": 3.5745454545454545e-07, "log_odds_chosen": 0.6440210342407227, "log_odds_ratio": -0.5461083054542542, "logits/chosen": 2.4179627895355225, "logits/rejected": 2.4611566066741943, "logps/chosen": -0.27406224608421326, "logps/rejected": -0.528464138507843, "loss": 1.1128, "nll_loss": 1.0581576824188232, "rewards/accuracies": 0.6916666626930237, "rewards/chosen": -0.027406223118305206, "rewards/margins": 0.025440199300646782, "rewards/rejected": -0.05284642428159714, "step": 435 }, { "epoch": 0.609981515711645, "grad_norm": 1.452818512916565, "learning_rate": 3.565454545454545e-07, "log_odds_chosen": 0.6425326466560364, "log_odds_ratio": -0.5652487874031067, "logits/chosen": 2.4771902561187744, "logits/rejected": 2.5125765800476074, "logps/chosen": -0.30930647253990173, "logps/rejected": -0.534605085849762, "loss": 1.1152, "nll_loss": 1.058679461479187, "rewards/accuracies": 0.7333333492279053, "rewards/chosen": -0.030930647626519203, "rewards/margins": 0.022529857233166695, "rewards/rejected": -0.0534605048596859, "step": 440 }, { "epoch": 0.616913123844732, "grad_norm": 2.1722567081451416, "learning_rate": 3.556363636363636e-07, "log_odds_chosen": 0.5683826208114624, "log_odds_ratio": -0.5821471810340881, "logits/chosen": 2.3948564529418945, "logits/rejected": 2.432129144668579, "logps/chosen": -0.2796666920185089, "logps/rejected": -0.5052643418312073, "loss": 1.036, "nll_loss": 0.9777409434318542, "rewards/accuracies": 0.6333333253860474, "rewards/chosen": -0.027966666966676712, "rewards/margins": 0.022559762001037598, "rewards/rejected": -0.05052642896771431, "step": 445 }, { "epoch": 0.6238447319778189, "grad_norm": 1.4338502883911133, "learning_rate": 3.547272727272727e-07, "log_odds_chosen": 0.6986488699913025, "log_odds_ratio": -0.5211442112922668, "logits/chosen": 2.412381410598755, "logits/rejected": 2.452411413192749, "logps/chosen": -0.33079832792282104, "logps/rejected": -0.5769501328468323, "loss": 1.1057, "nll_loss": 1.0535985231399536, "rewards/accuracies": 0.75, "rewards/chosen": -0.0330798365175724, "rewards/margins": 0.024615177884697914, "rewards/rejected": -0.057695016264915466, "step": 450 }, { "epoch": 0.6307763401109058, "grad_norm": 1.6581847667694092, "learning_rate": 3.538181818181818e-07, "log_odds_chosen": 0.8325883746147156, "log_odds_ratio": -0.5279497504234314, "logits/chosen": 2.3902318477630615, "logits/rejected": 2.4334285259246826, "logps/chosen": -0.2896474003791809, "logps/rejected": -0.6175944805145264, "loss": 1.0771, "nll_loss": 1.024324893951416, "rewards/accuracies": 0.7166666388511658, "rewards/chosen": -0.028964735567569733, "rewards/margins": 0.032794706523418427, "rewards/rejected": -0.06175943836569786, "step": 455 }, { "epoch": 0.6377079482439926, "grad_norm": 1.2917609214782715, "learning_rate": 3.529090909090909e-07, "log_odds_chosen": 0.683183491230011, "log_odds_ratio": -0.5794288516044617, "logits/chosen": 2.442657709121704, "logits/rejected": 2.49123477935791, "logps/chosen": -0.3224933445453644, "logps/rejected": -0.6132354736328125, "loss": 1.1346, "nll_loss": 1.0766867399215698, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.0322493314743042, "rewards/margins": 0.02907421998679638, "rewards/rejected": -0.06132354959845543, "step": 460 }, { "epoch": 0.6446395563770795, "grad_norm": 1.514763593673706, "learning_rate": 3.52e-07, "log_odds_chosen": 0.7153250575065613, "log_odds_ratio": -0.5446946024894714, "logits/chosen": 2.3731462955474854, "logits/rejected": 2.4032256603240967, "logps/chosen": -0.2961350679397583, "logps/rejected": -0.5602670311927795, "loss": 1.1165, "nll_loss": 1.0620505809783936, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.029613511636853218, "rewards/margins": 0.026413191109895706, "rewards/rejected": -0.056026704609394073, "step": 465 }, { "epoch": 0.6515711645101664, "grad_norm": 1.3288161754608154, "learning_rate": 3.5109090909090905e-07, "log_odds_chosen": 0.6764271855354309, "log_odds_ratio": -0.539259135723114, "logits/chosen": 2.3467721939086914, "logits/rejected": 2.384276866912842, "logps/chosen": -0.28593122959136963, "logps/rejected": -0.5482361912727356, "loss": 1.0768, "nll_loss": 1.0228937864303589, "rewards/accuracies": 0.6833333373069763, "rewards/chosen": -0.028593122959136963, "rewards/margins": 0.026230497285723686, "rewards/rejected": -0.0548236221075058, "step": 470 }, { "epoch": 0.6585027726432532, "grad_norm": 1.6036826372146606, "learning_rate": 3.5018181818181817e-07, "log_odds_chosen": 0.7218735218048096, "log_odds_ratio": -0.5222643613815308, "logits/chosen": 2.311798095703125, "logits/rejected": 2.353884220123291, "logps/chosen": -0.2681826055049896, "logps/rejected": -0.5527848601341248, "loss": 1.0583, "nll_loss": 1.0060540437698364, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.026818258687853813, "rewards/margins": 0.02846023067831993, "rewards/rejected": -0.05527849122881889, "step": 475 }, { "epoch": 0.6654343807763401, "grad_norm": 1.6921563148498535, "learning_rate": 3.4927272727272724e-07, "log_odds_chosen": 0.7491247057914734, "log_odds_ratio": -0.5005953907966614, "logits/chosen": 2.384194850921631, "logits/rejected": 2.4205212593078613, "logps/chosen": -0.27753210067749023, "logps/rejected": -0.5439023971557617, "loss": 1.0796, "nll_loss": 1.0295780897140503, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.027753213420510292, "rewards/margins": 0.02663702890276909, "rewards/rejected": -0.05439024046063423, "step": 480 }, { "epoch": 0.672365988909427, "grad_norm": 1.6062825918197632, "learning_rate": 3.483636363636363e-07, "log_odds_chosen": 0.8605387210845947, "log_odds_ratio": -0.4727242887020111, "logits/chosen": 2.392383098602295, "logits/rejected": 2.4284956455230713, "logps/chosen": -0.30633166432380676, "logps/rejected": -0.6641873121261597, "loss": 1.0927, "nll_loss": 1.0453789234161377, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.030633168295025826, "rewards/margins": 0.035785574465990067, "rewards/rejected": -0.06641873717308044, "step": 485 }, { "epoch": 0.6792975970425139, "grad_norm": 1.4081010818481445, "learning_rate": 3.4745454545454544e-07, "log_odds_chosen": 0.7056547403335571, "log_odds_ratio": -0.534669816493988, "logits/chosen": 2.4164679050445557, "logits/rejected": 2.4538962841033936, "logps/chosen": -0.30063506960868835, "logps/rejected": -0.566967248916626, "loss": 1.0686, "nll_loss": 1.015173077583313, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.030063504353165627, "rewards/margins": 0.026633214205503464, "rewards/rejected": -0.05669672042131424, "step": 490 }, { "epoch": 0.6862292051756007, "grad_norm": 1.6677594184875488, "learning_rate": 3.465454545454545e-07, "log_odds_chosen": 0.6989320516586304, "log_odds_ratio": -0.5302554368972778, "logits/chosen": 2.3923895359039307, "logits/rejected": 2.4322257041931152, "logps/chosen": -0.3043942451477051, "logps/rejected": -0.5767375826835632, "loss": 1.1053, "nll_loss": 1.052259922027588, "rewards/accuracies": 0.7416666746139526, "rewards/chosen": -0.030439427122473717, "rewards/margins": 0.027234338223934174, "rewards/rejected": -0.05767376720905304, "step": 495 }, { "epoch": 0.6931608133086876, "grad_norm": 2.065037727355957, "learning_rate": 3.4563636363636363e-07, "log_odds_chosen": 0.9193868041038513, "log_odds_ratio": -0.47953858971595764, "logits/chosen": 2.3140947818756104, "logits/rejected": 2.370821952819824, "logps/chosen": -0.26736411452293396, "logps/rejected": -0.6239200830459595, "loss": 1.1044, "nll_loss": 1.0564467906951904, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.026736412197351456, "rewards/margins": 0.03565559908747673, "rewards/rejected": -0.06239200755953789, "step": 500 }, { "epoch": 0.7000924214417745, "grad_norm": 1.7896554470062256, "learning_rate": 3.447272727272727e-07, "log_odds_chosen": 1.0297752618789673, "log_odds_ratio": -0.4404907822608948, "logits/chosen": 2.414668560028076, "logits/rejected": 2.4732654094696045, "logps/chosen": -0.2793917953968048, "logps/rejected": -0.6925608515739441, "loss": 1.068, "nll_loss": 1.0239418745040894, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.027939176186919212, "rewards/margins": 0.04131689295172691, "rewards/rejected": -0.06925607472658157, "step": 505 }, { "epoch": 0.7070240295748613, "grad_norm": 1.7647099494934082, "learning_rate": 3.4381818181818177e-07, "log_odds_chosen": 0.7891833782196045, "log_odds_ratio": -0.5092800855636597, "logits/chosen": 2.4179863929748535, "logits/rejected": 2.4637575149536133, "logps/chosen": -0.32941383123397827, "logps/rejected": -0.6404102444648743, "loss": 1.1317, "nll_loss": 1.0808058977127075, "rewards/accuracies": 0.7166666388511658, "rewards/chosen": -0.032941386103630066, "rewards/margins": 0.031099645420908928, "rewards/rejected": -0.06404102593660355, "step": 510 }, { "epoch": 0.7139556377079482, "grad_norm": 1.6020543575286865, "learning_rate": 3.429090909090909e-07, "log_odds_chosen": 0.823983371257782, "log_odds_ratio": -0.49682337045669556, "logits/chosen": 2.2930688858032227, "logits/rejected": 2.3509280681610107, "logps/chosen": -0.2938121259212494, "logps/rejected": -0.6020478010177612, "loss": 1.0654, "nll_loss": 1.015733003616333, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.02938121184706688, "rewards/margins": 0.030823571607470512, "rewards/rejected": -0.06020478159189224, "step": 515 }, { "epoch": 0.7208872458410351, "grad_norm": 1.902418851852417, "learning_rate": 3.4199999999999997e-07, "log_odds_chosen": 0.9842289090156555, "log_odds_ratio": -0.46310731768608093, "logits/chosen": 2.354994535446167, "logits/rejected": 2.3915045261383057, "logps/chosen": -0.27029862999916077, "logps/rejected": -0.6681958436965942, "loss": 1.0773, "nll_loss": 1.030941367149353, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.027029862627387047, "rewards/margins": 0.03978971764445305, "rewards/rejected": -0.06681957095861435, "step": 520 }, { "epoch": 0.727818853974122, "grad_norm": 1.5766515731811523, "learning_rate": 3.410909090909091e-07, "log_odds_chosen": 0.7862997055053711, "log_odds_ratio": -0.5025666952133179, "logits/chosen": 2.351290464401245, "logits/rejected": 2.3797943592071533, "logps/chosen": -0.2873378396034241, "logps/rejected": -0.5840609669685364, "loss": 1.0972, "nll_loss": 1.0469059944152832, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.028733786195516586, "rewards/margins": 0.02967231348156929, "rewards/rejected": -0.058406099677085876, "step": 525 }, { "epoch": 0.7347504621072088, "grad_norm": 1.9089113473892212, "learning_rate": 3.4018181818181816e-07, "log_odds_chosen": 1.0071905851364136, "log_odds_ratio": -0.4409308433532715, "logits/chosen": 2.36696720123291, "logits/rejected": 2.4168639183044434, "logps/chosen": -0.2714075744152069, "logps/rejected": -0.6719208359718323, "loss": 1.0857, "nll_loss": 1.0415993928909302, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.02714076079428196, "rewards/margins": 0.04005131870508194, "rewards/rejected": -0.06719207763671875, "step": 530 }, { "epoch": 0.7416820702402958, "grad_norm": 1.6972178220748901, "learning_rate": 3.3927272727272723e-07, "log_odds_chosen": 0.7740481495857239, "log_odds_ratio": -0.5107226967811584, "logits/chosen": 2.373772382736206, "logits/rejected": 2.415752410888672, "logps/chosen": -0.30703625082969666, "logps/rejected": -0.6409940123558044, "loss": 1.1177, "nll_loss": 1.066676378250122, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.030703624710440636, "rewards/margins": 0.03339577093720436, "rewards/rejected": -0.06409939378499985, "step": 535 }, { "epoch": 0.7486136783733827, "grad_norm": 1.3471928834915161, "learning_rate": 3.3836363636363635e-07, "log_odds_chosen": 0.8596405386924744, "log_odds_ratio": -0.4774978458881378, "logits/chosen": 2.286552906036377, "logits/rejected": 2.335909128189087, "logps/chosen": -0.28501778841018677, "logps/rejected": -0.6287774443626404, "loss": 1.0694, "nll_loss": 1.0216971635818481, "rewards/accuracies": 0.75, "rewards/chosen": -0.028501776978373528, "rewards/margins": 0.03437596932053566, "rewards/rejected": -0.06287775188684464, "step": 540 }, { "epoch": 0.7555452865064695, "grad_norm": 1.906703233718872, "learning_rate": 3.374545454545454e-07, "log_odds_chosen": 0.8641347885131836, "log_odds_ratio": -0.5105417370796204, "logits/chosen": 2.3575448989868164, "logits/rejected": 2.396763801574707, "logps/chosen": -0.26420578360557556, "logps/rejected": -0.6220420002937317, "loss": 1.0629, "nll_loss": 1.0118043422698975, "rewards/accuracies": 0.6916666626930237, "rewards/chosen": -0.026420580223202705, "rewards/margins": 0.03578362613916397, "rewards/rejected": -0.06220419704914093, "step": 545 }, { "epoch": 0.7624768946395564, "grad_norm": 1.4510164260864258, "learning_rate": 3.365454545454545e-07, "log_odds_chosen": 0.777636706829071, "log_odds_ratio": -0.5234912633895874, "logits/chosen": 2.271596670150757, "logits/rejected": 2.3241117000579834, "logps/chosen": -0.313221275806427, "logps/rejected": -0.6108809113502502, "loss": 1.1234, "nll_loss": 1.0710450410842896, "rewards/accuracies": 0.75, "rewards/chosen": -0.03132212534546852, "rewards/margins": 0.029765967279672623, "rewards/rejected": -0.06108810007572174, "step": 550 }, { "epoch": 0.7694085027726433, "grad_norm": 2.371877908706665, "learning_rate": 3.356363636363636e-07, "log_odds_chosen": 0.7746042013168335, "log_odds_ratio": -0.5306139588356018, "logits/chosen": 2.318974018096924, "logits/rejected": 2.381847858428955, "logps/chosen": -0.2922042906284332, "logps/rejected": -0.6194152235984802, "loss": 1.0591, "nll_loss": 1.006041169166565, "rewards/accuracies": 0.7333333492279053, "rewards/chosen": -0.02922043204307556, "rewards/margins": 0.032721079885959625, "rewards/rejected": -0.061941519379615784, "step": 555 }, { "epoch": 0.7763401109057301, "grad_norm": 1.5632991790771484, "learning_rate": 3.347272727272727e-07, "log_odds_chosen": 0.9328292608261108, "log_odds_ratio": -0.4911152124404907, "logits/chosen": 2.3930165767669678, "logits/rejected": 2.4501211643218994, "logps/chosen": -0.33901265263557434, "logps/rejected": -0.71490877866745, "loss": 1.1406, "nll_loss": 1.091480016708374, "rewards/accuracies": 0.75, "rewards/chosen": -0.033901263028383255, "rewards/margins": 0.03758960962295532, "rewards/rejected": -0.07149087637662888, "step": 560 }, { "epoch": 0.783271719038817, "grad_norm": 1.698624849319458, "learning_rate": 3.338181818181818e-07, "log_odds_chosen": 0.8142465949058533, "log_odds_ratio": -0.48792019486427307, "logits/chosen": 2.2310545444488525, "logits/rejected": 2.270510673522949, "logps/chosen": -0.2853752076625824, "logps/rejected": -0.5743341445922852, "loss": 1.0228, "nll_loss": 0.9739974737167358, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.02853752300143242, "rewards/margins": 0.028895895928144455, "rewards/rejected": -0.057433418929576874, "step": 565 }, { "epoch": 0.7902033271719039, "grad_norm": 1.6501067876815796, "learning_rate": 3.329090909090909e-07, "log_odds_chosen": 0.8794564008712769, "log_odds_ratio": -0.47009655833244324, "logits/chosen": 2.3232598304748535, "logits/rejected": 2.363266944885254, "logps/chosen": -0.33390435576438904, "logps/rejected": -0.6623743772506714, "loss": 1.1093, "nll_loss": 1.0623211860656738, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.033390436321496964, "rewards/margins": 0.032846998423337936, "rewards/rejected": -0.0662374347448349, "step": 570 }, { "epoch": 0.7971349353049908, "grad_norm": 1.9385813474655151, "learning_rate": 3.3199999999999996e-07, "log_odds_chosen": 0.8120476007461548, "log_odds_ratio": -0.4858551323413849, "logits/chosen": 2.2998452186584473, "logits/rejected": 2.329951524734497, "logps/chosen": -0.2809382975101471, "logps/rejected": -0.5397506952285767, "loss": 1.0673, "nll_loss": 1.018762230873108, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.02809382788836956, "rewards/margins": 0.025881236419081688, "rewards/rejected": -0.05397506803274155, "step": 575 }, { "epoch": 0.8040665434380776, "grad_norm": 1.647004246711731, "learning_rate": 3.310909090909091e-07, "log_odds_chosen": 0.7569546103477478, "log_odds_ratio": -0.5495377779006958, "logits/chosen": 2.2851226329803467, "logits/rejected": 2.3207554817199707, "logps/chosen": -0.3422669768333435, "logps/rejected": -0.6526975035667419, "loss": 1.1354, "nll_loss": 1.0804812908172607, "rewards/accuracies": 0.75, "rewards/chosen": -0.03422669693827629, "rewards/margins": 0.03104304149746895, "rewards/rejected": -0.06526973843574524, "step": 580 }, { "epoch": 0.8109981515711645, "grad_norm": 1.3443453311920166, "learning_rate": 3.3018181818181815e-07, "log_odds_chosen": 0.7256454825401306, "log_odds_ratio": -0.5355305075645447, "logits/chosen": 2.308670997619629, "logits/rejected": 2.348257541656494, "logps/chosen": -0.3142472207546234, "logps/rejected": -0.6030288338661194, "loss": 1.1232, "nll_loss": 1.069667935371399, "rewards/accuracies": 0.7083333134651184, "rewards/chosen": -0.03142471984028816, "rewards/margins": 0.028878165408968925, "rewards/rejected": -0.06030288711190224, "step": 585 }, { "epoch": 0.8179297597042514, "grad_norm": 1.7293723821640015, "learning_rate": 3.2927272727272727e-07, "log_odds_chosen": 0.7784165740013123, "log_odds_ratio": -0.5222859978675842, "logits/chosen": 2.2894599437713623, "logits/rejected": 2.3367385864257812, "logps/chosen": -0.29684463143348694, "logps/rejected": -0.5837019085884094, "loss": 1.0918, "nll_loss": 1.0395236015319824, "rewards/accuracies": 0.7416666746139526, "rewards/chosen": -0.029684465378522873, "rewards/margins": 0.02868572250008583, "rewards/rejected": -0.058370187878608704, "step": 590 }, { "epoch": 0.8248613678373382, "grad_norm": 1.8093395233154297, "learning_rate": 3.2836363636363634e-07, "log_odds_chosen": 0.8844853043556213, "log_odds_ratio": -0.4726443290710449, "logits/chosen": 2.2617225646972656, "logits/rejected": 2.3025357723236084, "logps/chosen": -0.2791774868965149, "logps/rejected": -0.6056706309318542, "loss": 1.0658, "nll_loss": 1.0185186862945557, "rewards/accuracies": 0.7333333492279053, "rewards/chosen": -0.02791774831712246, "rewards/margins": 0.03264930844306946, "rewards/rejected": -0.06056705862283707, "step": 595 }, { "epoch": 0.8317929759704251, "grad_norm": 1.8343292474746704, "learning_rate": 3.274545454545454e-07, "log_odds_chosen": 0.9979297518730164, "log_odds_ratio": -0.44733384251594543, "logits/chosen": 2.2689735889434814, "logits/rejected": 2.30771803855896, "logps/chosen": -0.29521337151527405, "logps/rejected": -0.6864122748374939, "loss": 1.0462, "nll_loss": 1.0014839172363281, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.029521334916353226, "rewards/margins": 0.039119891822338104, "rewards/rejected": -0.06864122301340103, "step": 600 }, { "epoch": 0.838724584103512, "grad_norm": 1.5383275747299194, "learning_rate": 3.2654545454545454e-07, "log_odds_chosen": 1.0520718097686768, "log_odds_ratio": -0.4258045554161072, "logits/chosen": 2.2323710918426514, "logits/rejected": 2.269280195236206, "logps/chosen": -0.2652204632759094, "logps/rejected": -0.6579357981681824, "loss": 1.0456, "nll_loss": 1.0029722452163696, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.026522040367126465, "rewards/margins": 0.03927153721451759, "rewards/rejected": -0.06579358130693436, "step": 605 }, { "epoch": 0.8456561922365989, "grad_norm": 1.7920851707458496, "learning_rate": 3.256363636363636e-07, "log_odds_chosen": 0.9857064485549927, "log_odds_ratio": -0.4479914903640747, "logits/chosen": 2.2189059257507324, "logits/rejected": 2.2568318843841553, "logps/chosen": -0.27704155445098877, "logps/rejected": -0.6286638975143433, "loss": 1.0787, "nll_loss": 1.0338690280914307, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.027704155072569847, "rewards/margins": 0.03516223281621933, "rewards/rejected": -0.06286638975143433, "step": 610 }, { "epoch": 0.8525878003696857, "grad_norm": 1.4790568351745605, "learning_rate": 3.247272727272727e-07, "log_odds_chosen": 0.841203510761261, "log_odds_ratio": -0.47351616621017456, "logits/chosen": 2.2627458572387695, "logits/rejected": 2.304532051086426, "logps/chosen": -0.30810025334358215, "logps/rejected": -0.6228964328765869, "loss": 1.0887, "nll_loss": 1.041351079940796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.030810019001364708, "rewards/margins": 0.031479619443416595, "rewards/rejected": -0.06228964403271675, "step": 615 }, { "epoch": 0.8595194085027726, "grad_norm": 1.640016794204712, "learning_rate": 3.238181818181818e-07, "log_odds_chosen": 0.8392209410667419, "log_odds_ratio": -0.47920557856559753, "logits/chosen": 2.241325855255127, "logits/rejected": 2.3104820251464844, "logps/chosen": -0.24823182821273804, "logps/rejected": -0.5455386638641357, "loss": 0.9989, "nll_loss": 0.950990617275238, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.024823185056447983, "rewards/margins": 0.029730679467320442, "rewards/rejected": -0.054553862661123276, "step": 620 }, { "epoch": 0.8664510166358595, "grad_norm": 1.6642489433288574, "learning_rate": 3.229090909090909e-07, "log_odds_chosen": 0.8546110987663269, "log_odds_ratio": -0.5026638507843018, "logits/chosen": 2.2081406116485596, "logits/rejected": 2.243947744369507, "logps/chosen": -0.29789215326309204, "logps/rejected": -0.630996584892273, "loss": 1.0533, "nll_loss": 1.0030620098114014, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.029789213091135025, "rewards/margins": 0.03331044688820839, "rewards/rejected": -0.06309965997934341, "step": 625 }, { "epoch": 0.8733826247689463, "grad_norm": 1.5830177068710327, "learning_rate": 3.22e-07, "log_odds_chosen": 0.8428265452384949, "log_odds_ratio": -0.5204115509986877, "logits/chosen": 2.2851126194000244, "logits/rejected": 2.326284646987915, "logps/chosen": -0.3049141466617584, "logps/rejected": -0.6031973361968994, "loss": 1.0701, "nll_loss": 1.018021821975708, "rewards/accuracies": 0.75, "rewards/chosen": -0.030491415411233902, "rewards/margins": 0.029828311875462532, "rewards/rejected": -0.060319721698760986, "step": 630 }, { "epoch": 0.8803142329020333, "grad_norm": 1.658988118171692, "learning_rate": 3.2109090909090907e-07, "log_odds_chosen": 0.9268123507499695, "log_odds_ratio": -0.5015169978141785, "logits/chosen": 2.2516047954559326, "logits/rejected": 2.307269811630249, "logps/chosen": -0.2988060712814331, "logps/rejected": -0.6871820092201233, "loss": 1.0584, "nll_loss": 1.0082836151123047, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.029880603775382042, "rewards/margins": 0.03883758932352066, "rewards/rejected": -0.06871819496154785, "step": 635 }, { "epoch": 0.8872458410351202, "grad_norm": 3.0365874767303467, "learning_rate": 3.2018181818181814e-07, "log_odds_chosen": 0.8886032700538635, "log_odds_ratio": -0.4523109197616577, "logits/chosen": 2.2199485301971436, "logits/rejected": 2.2670233249664307, "logps/chosen": -0.25624144077301025, "logps/rejected": -0.5575699210166931, "loss": 1.0108, "nll_loss": 0.965610146522522, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.025624146685004234, "rewards/margins": 0.030132848769426346, "rewards/rejected": -0.05575699731707573, "step": 640 }, { "epoch": 0.8941774491682071, "grad_norm": 1.6926220655441284, "learning_rate": 3.1927272727272726e-07, "log_odds_chosen": 0.9891830682754517, "log_odds_ratio": -0.4355195462703705, "logits/chosen": 2.2216622829437256, "logits/rejected": 2.275890350341797, "logps/chosen": -0.276635080575943, "logps/rejected": -0.6514686942100525, "loss": 1.0596, "nll_loss": 1.0160428285598755, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02766350656747818, "rewards/margins": 0.03748335689306259, "rewards/rejected": -0.06514687836170197, "step": 645 }, { "epoch": 0.9011090573012939, "grad_norm": 1.4911339282989502, "learning_rate": 3.1836363636363633e-07, "log_odds_chosen": 0.9020595550537109, "log_odds_ratio": -0.48418372869491577, "logits/chosen": 2.2785911560058594, "logits/rejected": 2.312934160232544, "logps/chosen": -0.31726518273353577, "logps/rejected": -0.6841873526573181, "loss": 1.05, "nll_loss": 1.0015556812286377, "rewards/accuracies": 0.75, "rewards/chosen": -0.031726520508527756, "rewards/margins": 0.036692213267087936, "rewards/rejected": -0.06841873377561569, "step": 650 }, { "epoch": 0.9080406654343808, "grad_norm": 2.0379440784454346, "learning_rate": 3.174545454545454e-07, "log_odds_chosen": 1.1423557996749878, "log_odds_ratio": -0.4178314507007599, "logits/chosen": 2.3556437492370605, "logits/rejected": 2.4005048274993896, "logps/chosen": -0.2667427659034729, "logps/rejected": -0.742853581905365, "loss": 1.0941, "nll_loss": 1.052323818206787, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.02667427621781826, "rewards/margins": 0.04761108011007309, "rewards/rejected": -0.0742853581905365, "step": 655 }, { "epoch": 0.9149722735674677, "grad_norm": 1.3288828134536743, "learning_rate": 3.1654545454545453e-07, "log_odds_chosen": 0.9340334534645081, "log_odds_ratio": -0.48560333251953125, "logits/chosen": 2.2910754680633545, "logits/rejected": 2.3387868404388428, "logps/chosen": -0.3347066640853882, "logps/rejected": -0.6973811388015747, "loss": 1.0676, "nll_loss": 1.0190544128417969, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.033470671623945236, "rewards/margins": 0.036267444491386414, "rewards/rejected": -0.06973811984062195, "step": 660 }, { "epoch": 0.9219038817005545, "grad_norm": 1.6985810995101929, "learning_rate": 3.156363636363636e-07, "log_odds_chosen": 0.9894071221351624, "log_odds_ratio": -0.4461503326892853, "logits/chosen": 2.2392578125, "logits/rejected": 2.283933162689209, "logps/chosen": -0.2434028834104538, "logps/rejected": -0.5999926328659058, "loss": 1.0296, "nll_loss": 0.9849395751953125, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.02434029057621956, "rewards/margins": 0.03565897420048714, "rewards/rejected": -0.059999268501996994, "step": 665 }, { "epoch": 0.9288354898336414, "grad_norm": 1.6689305305480957, "learning_rate": 3.147272727272727e-07, "log_odds_chosen": 0.7809682488441467, "log_odds_ratio": -0.5143331289291382, "logits/chosen": 2.245195150375366, "logits/rejected": 2.2930848598480225, "logps/chosen": -0.313152015209198, "logps/rejected": -0.590879499912262, "loss": 1.0569, "nll_loss": 1.0054324865341187, "rewards/accuracies": 0.75, "rewards/chosen": -0.03131520375609398, "rewards/margins": 0.027772750705480576, "rewards/rejected": -0.059087950736284256, "step": 670 }, { "epoch": 0.9357670979667283, "grad_norm": 1.883654236793518, "learning_rate": 3.138181818181818e-07, "log_odds_chosen": 0.8892870545387268, "log_odds_ratio": -0.4877478778362274, "logits/chosen": 2.2379813194274902, "logits/rejected": 2.3097243309020996, "logps/chosen": -0.24917837977409363, "logps/rejected": -0.5756833553314209, "loss": 1.0534, "nll_loss": 1.0045883655548096, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.024917839094996452, "rewards/margins": 0.03265049681067467, "rewards/rejected": -0.05756833776831627, "step": 675 }, { "epoch": 0.9426987060998152, "grad_norm": 2.1263558864593506, "learning_rate": 3.1290909090909086e-07, "log_odds_chosen": 0.9385001063346863, "log_odds_ratio": -0.485267698764801, "logits/chosen": 2.2864134311676025, "logits/rejected": 2.3468661308288574, "logps/chosen": -0.3098216950893402, "logps/rejected": -0.6490688323974609, "loss": 1.0503, "nll_loss": 1.001733660697937, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.03098217211663723, "rewards/margins": 0.033924710005521774, "rewards/rejected": -0.06490688025951385, "step": 680 }, { "epoch": 0.949630314232902, "grad_norm": 1.8792967796325684, "learning_rate": 3.12e-07, "log_odds_chosen": 0.6588108539581299, "log_odds_ratio": -0.5568161010742188, "logits/chosen": 2.2107622623443604, "logits/rejected": 2.2423746585845947, "logps/chosen": -0.3359021544456482, "logps/rejected": -0.5799560546875, "loss": 1.0822, "nll_loss": 1.0264886617660522, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.03359021618962288, "rewards/margins": 0.024405384436249733, "rewards/rejected": -0.05799560621380806, "step": 685 }, { "epoch": 0.9565619223659889, "grad_norm": 1.4165992736816406, "learning_rate": 3.1109090909090906e-07, "log_odds_chosen": 0.9096938371658325, "log_odds_ratio": -0.4722006916999817, "logits/chosen": 2.235501527786255, "logits/rejected": 2.2762210369110107, "logps/chosen": -0.29770374298095703, "logps/rejected": -0.6392419934272766, "loss": 1.0716, "nll_loss": 1.024424433708191, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.0297703817486763, "rewards/margins": 0.03415382280945778, "rewards/rejected": -0.06392420083284378, "step": 690 }, { "epoch": 0.9634935304990758, "grad_norm": 2.6423442363739014, "learning_rate": 3.101818181818182e-07, "log_odds_chosen": 0.8946338295936584, "log_odds_ratio": -0.4573117196559906, "logits/chosen": 2.3018126487731934, "logits/rejected": 2.3378913402557373, "logps/chosen": -0.2610825002193451, "logps/rejected": -0.5758386850357056, "loss": 1.0243, "nll_loss": 0.9785677790641785, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.026108253747224808, "rewards/margins": 0.03147561475634575, "rewards/rejected": -0.05758386105298996, "step": 695 }, { "epoch": 0.9704251386321626, "grad_norm": 1.6327714920043945, "learning_rate": 3.0927272727272725e-07, "log_odds_chosen": 0.8046802878379822, "log_odds_ratio": -0.5016317367553711, "logits/chosen": 2.166358470916748, "logits/rejected": 2.2131805419921875, "logps/chosen": -0.27325597405433655, "logps/rejected": -0.559944748878479, "loss": 1.0823, "nll_loss": 1.0321154594421387, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.027325598523020744, "rewards/margins": 0.028668878600001335, "rewards/rejected": -0.05599447339773178, "step": 700 }, { "epoch": 0.9773567467652495, "grad_norm": 1.5271565914154053, "learning_rate": 3.083636363636363e-07, "log_odds_chosen": 0.9641692638397217, "log_odds_ratio": -0.4276140630245209, "logits/chosen": 2.2330405712127686, "logits/rejected": 2.275627374649048, "logps/chosen": -0.2772447168827057, "logps/rejected": -0.6339874267578125, "loss": 1.0827, "nll_loss": 1.039945125579834, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.027724474668502808, "rewards/margins": 0.03567427024245262, "rewards/rejected": -0.06339874863624573, "step": 705 }, { "epoch": 0.9842883548983364, "grad_norm": 1.9477697610855103, "learning_rate": 3.0745454545454545e-07, "log_odds_chosen": 0.9072960019111633, "log_odds_ratio": -0.49298056960105896, "logits/chosen": 2.2244527339935303, "logits/rejected": 2.2482240200042725, "logps/chosen": -0.274914413690567, "logps/rejected": -0.6292871832847595, "loss": 1.0557, "nll_loss": 1.0063644647598267, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.02749144285917282, "rewards/margins": 0.03543727472424507, "rewards/rejected": -0.0629287138581276, "step": 710 }, { "epoch": 0.9912199630314233, "grad_norm": 14.025252342224121, "learning_rate": 3.065454545454545e-07, "log_odds_chosen": 0.9216189980506897, "log_odds_ratio": -0.5028970837593079, "logits/chosen": 2.2144951820373535, "logits/rejected": 2.24345326423645, "logps/chosen": -0.281184583902359, "logps/rejected": -0.6528930068016052, "loss": 1.0402, "nll_loss": 0.9899436831474304, "rewards/accuracies": 0.7333333492279053, "rewards/chosen": -0.02811845950782299, "rewards/margins": 0.03717083856463432, "rewards/rejected": -0.06528931111097336, "step": 715 }, { "epoch": 0.9981515711645101, "grad_norm": 1.4665298461914062, "learning_rate": 3.056363636363636e-07, "log_odds_chosen": 1.1137733459472656, "log_odds_ratio": -0.4105357825756073, "logits/chosen": 2.2789127826690674, "logits/rejected": 2.3198835849761963, "logps/chosen": -0.2862909138202667, "logps/rejected": -0.725393533706665, "loss": 1.0414, "nll_loss": 1.0003019571304321, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.028629092499613762, "rewards/margins": 0.04391026496887207, "rewards/rejected": -0.07253936678171158, "step": 720 }, { "epoch": 1.0041589648798521, "grad_norm": 2.1046509742736816, "learning_rate": 3.047272727272727e-07, "log_odds_chosen": 0.8841580152511597, "log_odds_ratio": -0.5193544626235962, "logits/chosen": 2.157961368560791, "logits/rejected": 2.214885711669922, "logps/chosen": -0.31194257736206055, "logps/rejected": -0.6325222849845886, "loss": 0.9397, "nll_loss": 1.0297856330871582, "rewards/accuracies": 0.6891025900840759, "rewards/chosen": -0.031194258481264114, "rewards/margins": 0.03205796703696251, "rewards/rejected": -0.06325222551822662, "step": 725 }, { "epoch": 1.011090573012939, "grad_norm": 11.973499298095703, "learning_rate": 3.038181818181818e-07, "log_odds_chosen": 1.0592764616012573, "log_odds_ratio": -0.42826417088508606, "logits/chosen": 2.2182023525238037, "logits/rejected": 2.259822368621826, "logps/chosen": -0.27322566509246826, "logps/rejected": -0.6173789501190186, "loss": 1.0566, "nll_loss": 1.0138195753097534, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.027322567999362946, "rewards/margins": 0.03441532701253891, "rewards/rejected": -0.061737895011901855, "step": 730 }, { "epoch": 1.018022181146026, "grad_norm": 1.6032253503799438, "learning_rate": 3.029090909090909e-07, "log_odds_chosen": 1.0015116930007935, "log_odds_ratio": -0.45987558364868164, "logits/chosen": 2.2573957443237305, "logits/rejected": 2.302398920059204, "logps/chosen": -0.29264530539512634, "logps/rejected": -0.7004638910293579, "loss": 1.0733, "nll_loss": 1.0272737741470337, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.029264533892273903, "rewards/margins": 0.04078185185790062, "rewards/rejected": -0.07004638761281967, "step": 735 }, { "epoch": 1.0249537892791127, "grad_norm": 1.9156720638275146, "learning_rate": 3.02e-07, "log_odds_chosen": 0.9316055178642273, "log_odds_ratio": -0.49356502294540405, "logits/chosen": 2.2086682319641113, "logits/rejected": 2.247258424758911, "logps/chosen": -0.2785702645778656, "logps/rejected": -0.6358006000518799, "loss": 1.0579, "nll_loss": 1.0085234642028809, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.02785702608525753, "rewards/margins": 0.035723041743040085, "rewards/rejected": -0.06358006596565247, "step": 740 }, { "epoch": 1.0318853974121995, "grad_norm": 1.8676711320877075, "learning_rate": 3.0109090909090905e-07, "log_odds_chosen": 1.0930769443511963, "log_odds_ratio": -0.41506868600845337, "logits/chosen": 2.1967382431030273, "logits/rejected": 2.2548134326934814, "logps/chosen": -0.2859395146369934, "logps/rejected": -0.6978200078010559, "loss": 1.0064, "nll_loss": 0.964898407459259, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.02859395369887352, "rewards/margins": 0.04118805751204491, "rewards/rejected": -0.06978200376033783, "step": 745 }, { "epoch": 1.0388170055452866, "grad_norm": 1.7424761056900024, "learning_rate": 3.0018181818181817e-07, "log_odds_chosen": 1.030094027519226, "log_odds_ratio": -0.45161333680152893, "logits/chosen": 2.171247959136963, "logits/rejected": 2.210944652557373, "logps/chosen": -0.2976371645927429, "logps/rejected": -0.6693560481071472, "loss": 1.0341, "nll_loss": 0.9889503717422485, "rewards/accuracies": 0.7583333253860474, "rewards/chosen": -0.02976371720433235, "rewards/margins": 0.03717188537120819, "rewards/rejected": -0.06693560630083084, "step": 750 }, { "epoch": 1.0457486136783734, "grad_norm": 1.6317884922027588, "learning_rate": 2.9927272727272724e-07, "log_odds_chosen": 1.2374706268310547, "log_odds_ratio": -0.3697361350059509, "logits/chosen": 2.271786689758301, "logits/rejected": 2.3244924545288086, "logps/chosen": -0.27134397625923157, "logps/rejected": -0.7432472109794617, "loss": 1.0338, "nll_loss": 0.996829628944397, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.02713439241051674, "rewards/margins": 0.04719032719731331, "rewards/rejected": -0.07432472705841064, "step": 755 }, { "epoch": 1.0526802218114604, "grad_norm": 1.9266161918640137, "learning_rate": 2.983636363636363e-07, "log_odds_chosen": 1.1042684316635132, "log_odds_ratio": -0.42497718334198, "logits/chosen": 2.178818464279175, "logits/rejected": 2.243793487548828, "logps/chosen": -0.23215913772583008, "logps/rejected": -0.6292255520820618, "loss": 1.0091, "nll_loss": 0.9665910601615906, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.023215916007757187, "rewards/margins": 0.03970663994550705, "rewards/rejected": -0.06292255967855453, "step": 760 }, { "epoch": 1.0596118299445472, "grad_norm": 2.3465418815612793, "learning_rate": 2.9745454545454544e-07, "log_odds_chosen": 1.1358228921890259, "log_odds_ratio": -0.43412578105926514, "logits/chosen": 2.157498598098755, "logits/rejected": 2.2107603549957275, "logps/chosen": -0.29064181447029114, "logps/rejected": -0.6974590420722961, "loss": 1.0562, "nll_loss": 1.0127959251403809, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029064182192087173, "rewards/margins": 0.04068171977996826, "rewards/rejected": -0.06974589824676514, "step": 765 }, { "epoch": 1.066543438077634, "grad_norm": 1.337546944618225, "learning_rate": 2.965454545454545e-07, "log_odds_chosen": 1.1235884428024292, "log_odds_ratio": -0.4154122769832611, "logits/chosen": 2.1831068992614746, "logits/rejected": 2.230762243270874, "logps/chosen": -0.25542908906936646, "logps/rejected": -0.6585070490837097, "loss": 1.0557, "nll_loss": 1.0141483545303345, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.025542909279465675, "rewards/margins": 0.04030779376626015, "rewards/rejected": -0.06585069000720978, "step": 770 }, { "epoch": 1.073475046210721, "grad_norm": 1.92345130443573, "learning_rate": 2.9563636363636363e-07, "log_odds_chosen": 1.2442071437835693, "log_odds_ratio": -0.3937591016292572, "logits/chosen": 2.1876795291900635, "logits/rejected": 2.237107515335083, "logps/chosen": -0.2693581283092499, "logps/rejected": -0.7542040944099426, "loss": 1.0156, "nll_loss": 0.9762417078018188, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.026935815811157227, "rewards/margins": 0.04848460480570793, "rewards/rejected": -0.07542040199041367, "step": 775 }, { "epoch": 1.0804066543438078, "grad_norm": 1.3390204906463623, "learning_rate": 2.947272727272727e-07, "log_odds_chosen": 1.0398173332214355, "log_odds_ratio": -0.429106205701828, "logits/chosen": 2.1649744510650635, "logits/rejected": 2.2123680114746094, "logps/chosen": -0.27507466077804565, "logps/rejected": -0.6559757590293884, "loss": 1.0672, "nll_loss": 1.0242794752120972, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.027507467195391655, "rewards/margins": 0.038090117275714874, "rewards/rejected": -0.06559757888317108, "step": 780 }, { "epoch": 1.0873382624768946, "grad_norm": 1.570081114768982, "learning_rate": 2.9381818181818177e-07, "log_odds_chosen": 0.9334086775779724, "log_odds_ratio": -0.4862407147884369, "logits/chosen": 2.119239330291748, "logits/rejected": 2.1761107444763184, "logps/chosen": -0.25805288553237915, "logps/rejected": -0.6043078303337097, "loss": 1.0514, "nll_loss": 1.002801537513733, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.025805287063121796, "rewards/margins": 0.034625496715307236, "rewards/rejected": -0.06043078005313873, "step": 785 }, { "epoch": 1.0942698706099816, "grad_norm": 2.047260284423828, "learning_rate": 2.929090909090909e-07, "log_odds_chosen": 1.1341291666030884, "log_odds_ratio": -0.4764944314956665, "logits/chosen": 2.219109296798706, "logits/rejected": 2.278170347213745, "logps/chosen": -0.3056103587150574, "logps/rejected": -0.7312763333320618, "loss": 1.0227, "nll_loss": 0.9750102162361145, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.030561033636331558, "rewards/margins": 0.04256659746170044, "rewards/rejected": -0.0731276348233223, "step": 790 }, { "epoch": 1.1012014787430684, "grad_norm": 1.683436393737793, "learning_rate": 2.9199999999999997e-07, "log_odds_chosen": 1.1937026977539062, "log_odds_ratio": -0.41498705744743347, "logits/chosen": 2.276967763900757, "logits/rejected": 2.3187286853790283, "logps/chosen": -0.2915228605270386, "logps/rejected": -0.7586190700531006, "loss": 1.0585, "nll_loss": 1.0169990062713623, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.0291522815823555, "rewards/margins": 0.04670962318778038, "rewards/rejected": -0.07586190849542618, "step": 795 }, { "epoch": 1.1081330868761552, "grad_norm": 1.2493406534194946, "learning_rate": 2.910909090909091e-07, "log_odds_chosen": 1.2343627214431763, "log_odds_ratio": -0.3899300992488861, "logits/chosen": 2.1663858890533447, "logits/rejected": 2.2325944900512695, "logps/chosen": -0.2753217816352844, "logps/rejected": -0.7316860556602478, "loss": 1.0388, "nll_loss": 0.999790370464325, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02753218077123165, "rewards/margins": 0.04563641548156738, "rewards/rejected": -0.07316859811544418, "step": 800 }, { "epoch": 1.1150646950092422, "grad_norm": 1.5523159503936768, "learning_rate": 2.9018181818181816e-07, "log_odds_chosen": 1.0665152072906494, "log_odds_ratio": -0.45254915952682495, "logits/chosen": 2.245769739151001, "logits/rejected": 2.2844347953796387, "logps/chosen": -0.31260156631469727, "logps/rejected": -0.7254413366317749, "loss": 1.0384, "nll_loss": 0.9930953979492188, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.031260158866643906, "rewards/margins": 0.041283976286649704, "rewards/rejected": -0.07254412770271301, "step": 805 }, { "epoch": 1.121996303142329, "grad_norm": 2.6294355392456055, "learning_rate": 2.8927272727272723e-07, "log_odds_chosen": 1.2369199991226196, "log_odds_ratio": -0.42075902223587036, "logits/chosen": 2.0666658878326416, "logits/rejected": 2.124197244644165, "logps/chosen": -0.2580902874469757, "logps/rejected": -0.705311119556427, "loss": 1.0688, "nll_loss": 1.0266811847686768, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02580902725458145, "rewards/margins": 0.04472209885716438, "rewards/rejected": -0.07053112238645554, "step": 810 }, { "epoch": 1.1289279112754158, "grad_norm": 2.0441761016845703, "learning_rate": 2.8836363636363636e-07, "log_odds_chosen": 1.0299813747406006, "log_odds_ratio": -0.48719069361686707, "logits/chosen": 2.1046738624572754, "logits/rejected": 2.1503727436065674, "logps/chosen": -0.2982068955898285, "logps/rejected": -0.6966476440429688, "loss": 1.0879, "nll_loss": 1.0392258167266846, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.029820691794157028, "rewards/margins": 0.039844077080488205, "rewards/rejected": -0.06966476142406464, "step": 815 }, { "epoch": 1.1358595194085028, "grad_norm": 1.5556236505508423, "learning_rate": 2.8745454545454543e-07, "log_odds_chosen": 1.2123700380325317, "log_odds_ratio": -0.4055812954902649, "logits/chosen": 2.209066867828369, "logits/rejected": 2.2755773067474365, "logps/chosen": -0.28011006116867065, "logps/rejected": -0.7493889331817627, "loss": 1.0693, "nll_loss": 1.0287730693817139, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.028011005371809006, "rewards/margins": 0.046927884221076965, "rewards/rejected": -0.07493889331817627, "step": 820 }, { "epoch": 1.1427911275415896, "grad_norm": 1.8499306440353394, "learning_rate": 2.865454545454545e-07, "log_odds_chosen": 1.2569739818572998, "log_odds_ratio": -0.4136119782924652, "logits/chosen": 2.1816537380218506, "logits/rejected": 2.23928165435791, "logps/chosen": -0.2742965817451477, "logps/rejected": -0.7915823459625244, "loss": 1.0642, "nll_loss": 1.0228677988052368, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.02742965891957283, "rewards/margins": 0.051728587597608566, "rewards/rejected": -0.0791582390666008, "step": 825 }, { "epoch": 1.1497227356746764, "grad_norm": 1.7029317617416382, "learning_rate": 2.856363636363636e-07, "log_odds_chosen": 1.1728360652923584, "log_odds_ratio": -0.4236757159233093, "logits/chosen": 2.178098201751709, "logits/rejected": 2.2561495304107666, "logps/chosen": -0.26117414236068726, "logps/rejected": -0.7411549091339111, "loss": 1.0474, "nll_loss": 1.0050232410430908, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.026117417961359024, "rewards/margins": 0.04799807071685791, "rewards/rejected": -0.07411548495292664, "step": 830 }, { "epoch": 1.1566543438077634, "grad_norm": 2.336233615875244, "learning_rate": 2.847272727272727e-07, "log_odds_chosen": 1.2286179065704346, "log_odds_ratio": -0.41043874621391296, "logits/chosen": 2.171309232711792, "logits/rejected": 2.2093799114227295, "logps/chosen": -0.2653755843639374, "logps/rejected": -0.7861889600753784, "loss": 1.062, "nll_loss": 1.0209335088729858, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.026537559926509857, "rewards/margins": 0.052081331610679626, "rewards/rejected": -0.07861888408660889, "step": 835 }, { "epoch": 1.1635859519408502, "grad_norm": 1.7186654806137085, "learning_rate": 2.838181818181818e-07, "log_odds_chosen": 1.1606539487838745, "log_odds_ratio": -0.40123751759529114, "logits/chosen": 2.206134796142578, "logits/rejected": 2.2637779712677, "logps/chosen": -0.2596302628517151, "logps/rejected": -0.7078793048858643, "loss": 1.0382, "nll_loss": 0.9980748891830444, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.02596302516758442, "rewards/margins": 0.04482491686940193, "rewards/rejected": -0.0707879364490509, "step": 840 }, { "epoch": 1.1705175600739373, "grad_norm": 2.3376824855804443, "learning_rate": 2.829090909090909e-07, "log_odds_chosen": 0.9114227890968323, "log_odds_ratio": -0.510475754737854, "logits/chosen": 2.1974329948425293, "logits/rejected": 2.257779359817505, "logps/chosen": -0.3180769979953766, "logps/rejected": -0.6727191805839539, "loss": 1.056, "nll_loss": 1.0049461126327515, "rewards/accuracies": 0.7333333492279053, "rewards/chosen": -0.03180769830942154, "rewards/margins": 0.035464223474264145, "rewards/rejected": -0.06727192550897598, "step": 845 }, { "epoch": 1.177449168207024, "grad_norm": 1.9338550567626953, "learning_rate": 2.8199999999999996e-07, "log_odds_chosen": 1.1852858066558838, "log_odds_ratio": -0.40160685777664185, "logits/chosen": 2.1158764362335205, "logits/rejected": 2.1807785034179688, "logps/chosen": -0.3050893247127533, "logps/rejected": -0.7781674861907959, "loss": 1.023, "nll_loss": 0.9828112125396729, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.030508937314152718, "rewards/margins": 0.047307804226875305, "rewards/rejected": -0.07781673967838287, "step": 850 }, { "epoch": 1.1843807763401109, "grad_norm": 32.405643463134766, "learning_rate": 2.810909090909091e-07, "log_odds_chosen": 1.2816615104675293, "log_odds_ratio": -0.38976308703422546, "logits/chosen": 2.062394142150879, "logits/rejected": 2.130361557006836, "logps/chosen": -0.27238449454307556, "logps/rejected": -0.7614350914955139, "loss": 1.0719, "nll_loss": 1.032881498336792, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.027238452807068825, "rewards/margins": 0.048905063420534134, "rewards/rejected": -0.07614351063966751, "step": 855 }, { "epoch": 1.1913123844731979, "grad_norm": 1.5311826467514038, "learning_rate": 2.8018181818181815e-07, "log_odds_chosen": 1.1707886457443237, "log_odds_ratio": -0.42907601594924927, "logits/chosen": 2.1916935443878174, "logits/rejected": 2.255692481994629, "logps/chosen": -0.31618261337280273, "logps/rejected": -0.7682264447212219, "loss": 1.0343, "nll_loss": 0.9914371967315674, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.031618259847164154, "rewards/margins": 0.04520439729094505, "rewards/rejected": -0.07682265341281891, "step": 860 }, { "epoch": 1.1982439926062847, "grad_norm": 1.3627372980117798, "learning_rate": 2.792727272727273e-07, "log_odds_chosen": 1.2053844928741455, "log_odds_ratio": -0.39541786909103394, "logits/chosen": 2.108999490737915, "logits/rejected": 2.1568171977996826, "logps/chosen": -0.2578326463699341, "logps/rejected": -0.6993592381477356, "loss": 1.0029, "nll_loss": 0.9633194804191589, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.025783265009522438, "rewards/margins": 0.04415265843272209, "rewards/rejected": -0.06993592530488968, "step": 865 }, { "epoch": 1.2051756007393715, "grad_norm": 2.1243605613708496, "learning_rate": 2.7836363636363635e-07, "log_odds_chosen": 1.1114146709442139, "log_odds_ratio": -0.44650015234947205, "logits/chosen": 2.155231237411499, "logits/rejected": 2.2109177112579346, "logps/chosen": -0.30871832370758057, "logps/rejected": -0.7572067379951477, "loss": 1.0634, "nll_loss": 1.018787145614624, "rewards/accuracies": 0.75, "rewards/chosen": -0.030871832743287086, "rewards/margins": 0.04484884440898895, "rewards/rejected": -0.07572067528963089, "step": 870 }, { "epoch": 1.2121072088724585, "grad_norm": 1.8302316665649414, "learning_rate": 2.774545454545454e-07, "log_odds_chosen": 1.195847749710083, "log_odds_ratio": -0.45233553647994995, "logits/chosen": 2.2495977878570557, "logits/rejected": 2.3004953861236572, "logps/chosen": -0.32949963212013245, "logps/rejected": -0.8411144614219666, "loss": 1.0578, "nll_loss": 1.0125887393951416, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03294995799660683, "rewards/margins": 0.05116148665547371, "rewards/rejected": -0.08411144465208054, "step": 875 }, { "epoch": 1.2190388170055453, "grad_norm": 1.7341769933700562, "learning_rate": 2.7654545454545454e-07, "log_odds_chosen": 1.0370428562164307, "log_odds_ratio": -0.4735434949398041, "logits/chosen": 2.110995292663574, "logits/rejected": 2.166097402572632, "logps/chosen": -0.321010559797287, "logps/rejected": -0.7727290391921997, "loss": 1.0489, "nll_loss": 1.0015724897384644, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.03210105374455452, "rewards/margins": 0.045171838253736496, "rewards/rejected": -0.07727289199829102, "step": 880 }, { "epoch": 1.225970425138632, "grad_norm": 1.5535844564437866, "learning_rate": 2.756363636363636e-07, "log_odds_chosen": 1.0898783206939697, "log_odds_ratio": -0.42453181743621826, "logits/chosen": 2.1117665767669678, "logits/rejected": 2.1755316257476807, "logps/chosen": -0.27768510580062866, "logps/rejected": -0.6975895166397095, "loss": 1.0298, "nll_loss": 0.9873270392417908, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.027768509462475777, "rewards/margins": 0.04199044778943062, "rewards/rejected": -0.06975895911455154, "step": 885 }, { "epoch": 1.232902033271719, "grad_norm": 2.520540237426758, "learning_rate": 2.747272727272727e-07, "log_odds_chosen": 1.0620572566986084, "log_odds_ratio": -0.44835466146469116, "logits/chosen": 2.088442802429199, "logits/rejected": 2.1342625617980957, "logps/chosen": -0.31703558564186096, "logps/rejected": -0.6918619871139526, "loss": 1.0714, "nll_loss": 1.0265547037124634, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.031703557819128036, "rewards/margins": 0.03748263791203499, "rewards/rejected": -0.06918619573116302, "step": 890 }, { "epoch": 1.239833641404806, "grad_norm": 1.8474420309066772, "learning_rate": 2.738181818181818e-07, "log_odds_chosen": 1.2523150444030762, "log_odds_ratio": -0.38173770904541016, "logits/chosen": 2.1438181400299072, "logits/rejected": 2.1918885707855225, "logps/chosen": -0.2869779169559479, "logps/rejected": -0.8195567727088928, "loss": 1.0427, "nll_loss": 1.004526138305664, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.028697794303297997, "rewards/margins": 0.05325789004564285, "rewards/rejected": -0.0819556713104248, "step": 895 }, { "epoch": 1.2467652495378927, "grad_norm": 1.8705800771713257, "learning_rate": 2.729090909090909e-07, "log_odds_chosen": 1.3002660274505615, "log_odds_ratio": -0.3636976182460785, "logits/chosen": 2.1178719997406006, "logits/rejected": 2.189141035079956, "logps/chosen": -0.25350457429885864, "logps/rejected": -0.7615570425987244, "loss": 1.0187, "nll_loss": 0.9823279976844788, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.025350457057356834, "rewards/margins": 0.05080525204539299, "rewards/rejected": -0.07615570724010468, "step": 900 }, { "epoch": 1.2536968576709797, "grad_norm": 1.3865669965744019, "learning_rate": 2.72e-07, "log_odds_chosen": 1.2055052518844604, "log_odds_ratio": -0.3909756541252136, "logits/chosen": 2.1472742557525635, "logits/rejected": 2.194523572921753, "logps/chosen": -0.26772162318229675, "logps/rejected": -0.7685297727584839, "loss": 1.0522, "nll_loss": 1.0131094455718994, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.026772161945700645, "rewards/margins": 0.05008082091808319, "rewards/rejected": -0.07685296982526779, "step": 905 }, { "epoch": 1.2606284658040665, "grad_norm": 1.4818207025527954, "learning_rate": 2.7109090909090907e-07, "log_odds_chosen": 1.338388204574585, "log_odds_ratio": -0.3422391712665558, "logits/chosen": 2.08438777923584, "logits/rejected": 2.1425976753234863, "logps/chosen": -0.28000956773757935, "logps/rejected": -0.7683375477790833, "loss": 1.0637, "nll_loss": 1.0295100212097168, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.028000956401228905, "rewards/margins": 0.04883280023932457, "rewards/rejected": -0.07683374732732773, "step": 910 }, { "epoch": 1.2675600739371533, "grad_norm": 1.9989374876022339, "learning_rate": 2.7018181818181814e-07, "log_odds_chosen": 1.1596630811691284, "log_odds_ratio": -0.46093183755874634, "logits/chosen": 2.104174852371216, "logits/rejected": 2.1529417037963867, "logps/chosen": -0.30684390664100647, "logps/rejected": -0.7890374660491943, "loss": 1.0142, "nll_loss": 0.9680600762367249, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.03068438731133938, "rewards/margins": 0.048219338059425354, "rewards/rejected": -0.07890374213457108, "step": 915 }, { "epoch": 1.2744916820702403, "grad_norm": 1.6794359683990479, "learning_rate": 2.6927272727272727e-07, "log_odds_chosen": 1.4304643869400024, "log_odds_ratio": -0.38649657368659973, "logits/chosen": 2.1374399662017822, "logits/rejected": 2.1798601150512695, "logps/chosen": -0.29669511318206787, "logps/rejected": -0.8554088473320007, "loss": 1.0687, "nll_loss": 1.0300294160842896, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.029669513925909996, "rewards/margins": 0.05587137117981911, "rewards/rejected": -0.08554088324308395, "step": 920 }, { "epoch": 1.2814232902033271, "grad_norm": 1.981391429901123, "learning_rate": 2.6836363636363634e-07, "log_odds_chosen": 1.2685843706130981, "log_odds_ratio": -0.41906872391700745, "logits/chosen": 2.2779479026794434, "logits/rejected": 2.3425650596618652, "logps/chosen": -0.3056505024433136, "logps/rejected": -0.8699617981910706, "loss": 1.0141, "nll_loss": 0.9721490144729614, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.03056504763662815, "rewards/margins": 0.056431129574775696, "rewards/rejected": -0.0869961753487587, "step": 925 }, { "epoch": 1.2883548983364141, "grad_norm": 2.3423352241516113, "learning_rate": 2.674545454545454e-07, "log_odds_chosen": 1.137675404548645, "log_odds_ratio": -0.4286971390247345, "logits/chosen": 2.1223714351654053, "logits/rejected": 2.1788086891174316, "logps/chosen": -0.28890591859817505, "logps/rejected": -0.7336766719818115, "loss": 1.0341, "nll_loss": 0.9912530779838562, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.028890585526823997, "rewards/margins": 0.04447708651423454, "rewards/rejected": -0.07336767762899399, "step": 930 }, { "epoch": 1.295286506469501, "grad_norm": 1.893749475479126, "learning_rate": 2.6654545454545453e-07, "log_odds_chosen": 1.4332900047302246, "log_odds_ratio": -0.34831517934799194, "logits/chosen": 2.1003878116607666, "logits/rejected": 2.1722400188446045, "logps/chosen": -0.28711190819740295, "logps/rejected": -0.8249975442886353, "loss": 1.0661, "nll_loss": 1.0313143730163574, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.028711196035146713, "rewards/margins": 0.05378856882452965, "rewards/rejected": -0.08249974995851517, "step": 935 }, { "epoch": 1.3022181146025877, "grad_norm": 4.6435160636901855, "learning_rate": 2.656363636363636e-07, "log_odds_chosen": 1.082852840423584, "log_odds_ratio": -0.45315298438072205, "logits/chosen": 2.1574742794036865, "logits/rejected": 2.178946018218994, "logps/chosen": -0.3037134110927582, "logps/rejected": -0.7448738813400269, "loss": 1.0704, "nll_loss": 1.0250810384750366, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.03037133812904358, "rewards/margins": 0.0441160574555397, "rewards/rejected": -0.07448740303516388, "step": 940 }, { "epoch": 1.3091497227356748, "grad_norm": 2.1025171279907227, "learning_rate": 2.647272727272727e-07, "log_odds_chosen": 1.1002755165100098, "log_odds_ratio": -0.44053414463996887, "logits/chosen": 2.106156826019287, "logits/rejected": 2.155453681945801, "logps/chosen": -0.2461674064397812, "logps/rejected": -0.6956557035446167, "loss": 1.0449, "nll_loss": 1.0008207559585571, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.024616742506623268, "rewards/margins": 0.04494882747530937, "rewards/rejected": -0.06956557184457779, "step": 945 }, { "epoch": 1.3160813308687616, "grad_norm": 1.3751192092895508, "learning_rate": 2.638181818181818e-07, "log_odds_chosen": 1.303594708442688, "log_odds_ratio": -0.36508041620254517, "logits/chosen": 2.1502010822296143, "logits/rejected": 2.213773012161255, "logps/chosen": -0.24462252855300903, "logps/rejected": -0.7531419396400452, "loss": 1.0002, "nll_loss": 0.9637396931648254, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.024462254717946053, "rewards/margins": 0.05085194110870361, "rewards/rejected": -0.07531419396400452, "step": 950 }, { "epoch": 1.3230129390018484, "grad_norm": 2.228548526763916, "learning_rate": 2.6290909090909087e-07, "log_odds_chosen": 1.1054414510726929, "log_odds_ratio": -0.4313698410987854, "logits/chosen": 2.054419755935669, "logits/rejected": 2.1415534019470215, "logps/chosen": -0.25074058771133423, "logps/rejected": -0.6487269997596741, "loss": 0.9808, "nll_loss": 0.9376189708709717, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.025074057281017303, "rewards/margins": 0.039798639714717865, "rewards/rejected": -0.06487269699573517, "step": 955 }, { "epoch": 1.3299445471349354, "grad_norm": 2.61733078956604, "learning_rate": 2.62e-07, "log_odds_chosen": 1.2382240295410156, "log_odds_ratio": -0.42497238516807556, "logits/chosen": 2.222075939178467, "logits/rejected": 2.268425464630127, "logps/chosen": -0.3022395372390747, "logps/rejected": -0.819779634475708, "loss": 1.1007, "nll_loss": 1.0581555366516113, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.03022395819425583, "rewards/margins": 0.05175400897860527, "rewards/rejected": -0.0819779708981514, "step": 960 }, { "epoch": 1.3368761552680222, "grad_norm": 1.3807116746902466, "learning_rate": 2.6109090909090906e-07, "log_odds_chosen": 1.2055879831314087, "log_odds_ratio": -0.3867366909980774, "logits/chosen": 2.1033875942230225, "logits/rejected": 2.1540472507476807, "logps/chosen": -0.277556449174881, "logps/rejected": -0.7209326028823853, "loss": 1.0018, "nll_loss": 0.9630894064903259, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.027755646035075188, "rewards/margins": 0.044337622821331024, "rewards/rejected": -0.07209326326847076, "step": 965 }, { "epoch": 1.343807763401109, "grad_norm": 2.0066561698913574, "learning_rate": 2.601818181818182e-07, "log_odds_chosen": 1.3411426544189453, "log_odds_ratio": -0.4055359959602356, "logits/chosen": 2.0890884399414062, "logits/rejected": 2.154545783996582, "logps/chosen": -0.3011336922645569, "logps/rejected": -0.8289684057235718, "loss": 1.0136, "nll_loss": 0.9730068445205688, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.03011336922645569, "rewards/margins": 0.05278347432613373, "rewards/rejected": -0.08289684355258942, "step": 970 }, { "epoch": 1.350739371534196, "grad_norm": 1.9893730878829956, "learning_rate": 2.5927272727272726e-07, "log_odds_chosen": 1.1508934497833252, "log_odds_ratio": -0.4049813449382782, "logits/chosen": 2.1374075412750244, "logits/rejected": 2.1761152744293213, "logps/chosen": -0.3276708424091339, "logps/rejected": -0.7975314259529114, "loss": 1.0488, "nll_loss": 1.0082674026489258, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.03276708349585533, "rewards/margins": 0.04698607698082924, "rewards/rejected": -0.07975315302610397, "step": 975 }, { "epoch": 1.3576709796672828, "grad_norm": 2.1329009532928467, "learning_rate": 2.583636363636363e-07, "log_odds_chosen": 1.4169307947158813, "log_odds_ratio": -0.4349002242088318, "logits/chosen": 2.0836265087127686, "logits/rejected": 2.1221506595611572, "logps/chosen": -0.28608009219169617, "logps/rejected": -0.8973101377487183, "loss": 1.043, "nll_loss": 0.9995481967926025, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.028608011081814766, "rewards/margins": 0.06112300232052803, "rewards/rejected": -0.08973101526498795, "step": 980 }, { "epoch": 1.3646025878003698, "grad_norm": 1.5113952159881592, "learning_rate": 2.5745454545454545e-07, "log_odds_chosen": 1.0963186025619507, "log_odds_ratio": -0.45471566915512085, "logits/chosen": 2.0918946266174316, "logits/rejected": 2.147059917449951, "logps/chosen": -0.2852664589881897, "logps/rejected": -0.6635777354240417, "loss": 1.0257, "nll_loss": 0.9802023768424988, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.02852664329111576, "rewards/margins": 0.037831127643585205, "rewards/rejected": -0.06635776907205582, "step": 985 }, { "epoch": 1.3715341959334566, "grad_norm": 1.2862117290496826, "learning_rate": 2.565454545454545e-07, "log_odds_chosen": 1.2140270471572876, "log_odds_ratio": -0.37888041138648987, "logits/chosen": 2.191817283630371, "logits/rejected": 2.248182773590088, "logps/chosen": -0.3066679537296295, "logps/rejected": -0.775726854801178, "loss": 1.0404, "nll_loss": 1.0024964809417725, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.030666792765259743, "rewards/margins": 0.046905890107154846, "rewards/rejected": -0.07757268846035004, "step": 990 }, { "epoch": 1.3784658040665434, "grad_norm": 2.1814792156219482, "learning_rate": 2.556363636363636e-07, "log_odds_chosen": 1.1626993417739868, "log_odds_ratio": -0.421124666929245, "logits/chosen": 2.1899914741516113, "logits/rejected": 2.2058660984039307, "logps/chosen": -0.3344000279903412, "logps/rejected": -0.812175989151001, "loss": 1.0606, "nll_loss": 1.0184708833694458, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.0334400050342083, "rewards/margins": 0.04777759313583374, "rewards/rejected": -0.08121760189533234, "step": 995 }, { "epoch": 1.3853974121996302, "grad_norm": 1.2583403587341309, "learning_rate": 2.547272727272727e-07, "log_odds_chosen": 1.4967857599258423, "log_odds_ratio": -0.32969042658805847, "logits/chosen": 2.146193027496338, "logits/rejected": 2.210885763168335, "logps/chosen": -0.2802196443080902, "logps/rejected": -0.9464040994644165, "loss": 0.9995, "nll_loss": 0.9665043354034424, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02802196517586708, "rewards/margins": 0.0666184350848198, "rewards/rejected": -0.09464039653539658, "step": 1000 }, { "epoch": 1.3923290203327172, "grad_norm": 3.5290753841400146, "learning_rate": 2.538181818181818e-07, "log_odds_chosen": 1.478700041770935, "log_odds_ratio": -0.36008498072624207, "logits/chosen": 2.2012546062469482, "logits/rejected": 2.257498264312744, "logps/chosen": -0.2387988120317459, "logps/rejected": -0.8150947690010071, "loss": 1.0503, "nll_loss": 1.0142549276351929, "rewards/accuracies": 0.875, "rewards/chosen": -0.023879878222942352, "rewards/margins": 0.057629600167274475, "rewards/rejected": -0.08150947839021683, "step": 1005 }, { "epoch": 1.399260628465804, "grad_norm": 2.2447733879089355, "learning_rate": 2.529090909090909e-07, "log_odds_chosen": 1.5088070631027222, "log_odds_ratio": -0.37234050035476685, "logits/chosen": 2.131213665008545, "logits/rejected": 2.2011141777038574, "logps/chosen": -0.30126506090164185, "logps/rejected": -0.9823321104049683, "loss": 1.0395, "nll_loss": 1.0022485256195068, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.030126502737402916, "rewards/margins": 0.06810670346021652, "rewards/rejected": -0.09823321551084518, "step": 1010 }, { "epoch": 1.406192236598891, "grad_norm": 1.6832704544067383, "learning_rate": 2.52e-07, "log_odds_chosen": 1.3786380290985107, "log_odds_ratio": -0.3612533509731293, "logits/chosen": 2.1688296794891357, "logits/rejected": 2.2276947498321533, "logps/chosen": -0.26425522565841675, "logps/rejected": -0.7921401858329773, "loss": 1.0578, "nll_loss": 1.0216939449310303, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.026425523683428764, "rewards/margins": 0.05278850346803665, "rewards/rejected": -0.07921402156352997, "step": 1015 }, { "epoch": 1.4131238447319778, "grad_norm": 1.8064701557159424, "learning_rate": 2.5109090909090905e-07, "log_odds_chosen": 1.475263237953186, "log_odds_ratio": -0.3918589651584625, "logits/chosen": 2.1581435203552246, "logits/rejected": 2.197493314743042, "logps/chosen": -0.3150491416454315, "logps/rejected": -0.9940579533576965, "loss": 1.0716, "nll_loss": 1.032424807548523, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.03150491416454315, "rewards/margins": 0.0679008811712265, "rewards/rejected": -0.09940580278635025, "step": 1020 }, { "epoch": 1.4200554528650646, "grad_norm": 3.116750478744507, "learning_rate": 2.501818181818182e-07, "log_odds_chosen": 1.4267123937606812, "log_odds_ratio": -0.3581138253211975, "logits/chosen": 2.150437593460083, "logits/rejected": 2.196103572845459, "logps/chosen": -0.3007969856262207, "logps/rejected": -0.9089770913124084, "loss": 1.0489, "nll_loss": 1.0131365060806274, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.030079694464802742, "rewards/margins": 0.060818012803792953, "rewards/rejected": -0.09089770913124084, "step": 1025 }, { "epoch": 1.4269870609981516, "grad_norm": 1.498563528060913, "learning_rate": 2.4927272727272725e-07, "log_odds_chosen": 1.207962155342102, "log_odds_ratio": -0.44308769702911377, "logits/chosen": 2.086268663406372, "logits/rejected": 2.1397366523742676, "logps/chosen": -0.2770783007144928, "logps/rejected": -0.7369803786277771, "loss": 0.9999, "nll_loss": 0.9556010961532593, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.02770783193409443, "rewards/margins": 0.04599021375179291, "rewards/rejected": -0.07369804382324219, "step": 1030 }, { "epoch": 1.4339186691312384, "grad_norm": 2.2361621856689453, "learning_rate": 2.483636363636363e-07, "log_odds_chosen": 1.3191566467285156, "log_odds_ratio": -0.40445128083229065, "logits/chosen": 2.138350486755371, "logits/rejected": 2.201185703277588, "logps/chosen": -0.29545170068740845, "logps/rejected": -0.824712872505188, "loss": 1.0544, "nll_loss": 1.0139575004577637, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.029545169323682785, "rewards/margins": 0.05292612686753273, "rewards/rejected": -0.08247129619121552, "step": 1035 }, { "epoch": 1.4408502772643252, "grad_norm": 1.6298840045928955, "learning_rate": 2.4745454545454544e-07, "log_odds_chosen": 1.2173666954040527, "log_odds_ratio": -0.39109528064727783, "logits/chosen": 2.0655899047851562, "logits/rejected": 2.1055774688720703, "logps/chosen": -0.28332966566085815, "logps/rejected": -0.7577340006828308, "loss": 1.0551, "nll_loss": 1.015963077545166, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.028332972899079323, "rewards/margins": 0.04744042083621025, "rewards/rejected": -0.07577338814735413, "step": 1040 }, { "epoch": 1.4477818853974123, "grad_norm": 1.9538235664367676, "learning_rate": 2.465454545454545e-07, "log_odds_chosen": 1.4704244136810303, "log_odds_ratio": -0.36091259121894836, "logits/chosen": 2.067735195159912, "logits/rejected": 2.107760190963745, "logps/chosen": -0.24208252131938934, "logps/rejected": -0.8322712779045105, "loss": 1.0402, "nll_loss": 1.0041333436965942, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.024208255112171173, "rewards/margins": 0.05901888757944107, "rewards/rejected": -0.08322712779045105, "step": 1045 }, { "epoch": 1.454713493530499, "grad_norm": 2.375593900680542, "learning_rate": 2.4563636363636363e-07, "log_odds_chosen": 1.342626690864563, "log_odds_ratio": -0.3853161633014679, "logits/chosen": 2.0571084022521973, "logits/rejected": 2.118323802947998, "logps/chosen": -0.2818390429019928, "logps/rejected": -0.8502424955368042, "loss": 1.0375, "nll_loss": 0.9989607930183411, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.02818390727043152, "rewards/margins": 0.05684033781290054, "rewards/rejected": -0.08502423763275146, "step": 1050 }, { "epoch": 1.4616451016635859, "grad_norm": 3.926017999649048, "learning_rate": 2.447272727272727e-07, "log_odds_chosen": 1.2430663108825684, "log_odds_ratio": -0.4533371925354004, "logits/chosen": 2.024120569229126, "logits/rejected": 2.0624420642852783, "logps/chosen": -0.26999524235725403, "logps/rejected": -0.7484342455863953, "loss": 1.029, "nll_loss": 0.9836971759796143, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.026999525725841522, "rewards/margins": 0.04784390702843666, "rewards/rejected": -0.07484342157840729, "step": 1055 }, { "epoch": 1.4685767097966729, "grad_norm": 2.118476152420044, "learning_rate": 2.438181818181818e-07, "log_odds_chosen": 0.9853528738021851, "log_odds_ratio": -0.4883633553981781, "logits/chosen": 2.0680394172668457, "logits/rejected": 2.136178493499756, "logps/chosen": -0.3353304862976074, "logps/rejected": -0.7483987808227539, "loss": 1.0975, "nll_loss": 1.0486379861831665, "rewards/accuracies": 0.75, "rewards/chosen": -0.033533044159412384, "rewards/margins": 0.04130683094263077, "rewards/rejected": -0.07483987510204315, "step": 1060 }, { "epoch": 1.4755083179297597, "grad_norm": 2.533656120300293, "learning_rate": 2.429090909090909e-07, "log_odds_chosen": 0.9804785847663879, "log_odds_ratio": -0.4883616864681244, "logits/chosen": 2.097592353820801, "logits/rejected": 2.1535866260528564, "logps/chosen": -0.29354825615882874, "logps/rejected": -0.7082226872444153, "loss": 1.0215, "nll_loss": 0.9726455807685852, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.02935483120381832, "rewards/margins": 0.041467439383268356, "rewards/rejected": -0.07082226872444153, "step": 1065 }, { "epoch": 1.4824399260628467, "grad_norm": 1.7187951803207397, "learning_rate": 2.4199999999999997e-07, "log_odds_chosen": 1.2794216871261597, "log_odds_ratio": -0.3667431175708771, "logits/chosen": 2.010540246963501, "logits/rejected": 2.082904815673828, "logps/chosen": -0.25798189640045166, "logps/rejected": -0.7230808138847351, "loss": 1.0057, "nll_loss": 0.9690180420875549, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.025798192247748375, "rewards/margins": 0.046509888023138046, "rewards/rejected": -0.07230808585882187, "step": 1070 }, { "epoch": 1.4893715341959335, "grad_norm": 2.982020616531372, "learning_rate": 2.410909090909091e-07, "log_odds_chosen": 1.1678146123886108, "log_odds_ratio": -0.41319549083709717, "logits/chosen": 2.1234309673309326, "logits/rejected": 2.17087459564209, "logps/chosen": -0.32161521911621094, "logps/rejected": -0.8085314035415649, "loss": 1.0905, "nll_loss": 1.0492180585861206, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.032161518931388855, "rewards/margins": 0.04869161546230316, "rewards/rejected": -0.08085312694311142, "step": 1075 }, { "epoch": 1.4963031423290203, "grad_norm": 2.0627431869506836, "learning_rate": 2.4018181818181816e-07, "log_odds_chosen": 1.1932671070098877, "log_odds_ratio": -0.45415419340133667, "logits/chosen": 2.096099615097046, "logits/rejected": 2.167684316635132, "logps/chosen": -0.2884353697299957, "logps/rejected": -0.8226889371871948, "loss": 1.0148, "nll_loss": 0.9693484902381897, "rewards/accuracies": 0.7416666746139526, "rewards/chosen": -0.028843533247709274, "rewards/margins": 0.0534253753721714, "rewards/rejected": -0.08226890861988068, "step": 1080 }, { "epoch": 1.503234750462107, "grad_norm": 2.2353203296661377, "learning_rate": 2.3927272727272724e-07, "log_odds_chosen": 1.2742195129394531, "log_odds_ratio": -0.41592225432395935, "logits/chosen": 2.007319450378418, "logits/rejected": 2.0866177082061768, "logps/chosen": -0.28505003452301025, "logps/rejected": -0.8023856282234192, "loss": 1.0211, "nll_loss": 0.9794998168945312, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.028505001217126846, "rewards/margins": 0.05173356831073761, "rewards/rejected": -0.08023856580257416, "step": 1085 }, { "epoch": 1.510166358595194, "grad_norm": 2.5934882164001465, "learning_rate": 2.3836363636363636e-07, "log_odds_chosen": 1.4275834560394287, "log_odds_ratio": -0.3598444163799286, "logits/chosen": 2.1860811710357666, "logits/rejected": 2.234570264816284, "logps/chosen": -0.2805514633655548, "logps/rejected": -0.867975652217865, "loss": 1.0418, "nll_loss": 1.0058512687683105, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.028055142611265182, "rewards/margins": 0.058742426335811615, "rewards/rejected": -0.0867975726723671, "step": 1090 }, { "epoch": 1.5170979667282811, "grad_norm": 3.790891408920288, "learning_rate": 2.3745454545454543e-07, "log_odds_chosen": 1.4269194602966309, "log_odds_ratio": -0.3880935609340668, "logits/chosen": 2.081702470779419, "logits/rejected": 2.1460609436035156, "logps/chosen": -0.2924844026565552, "logps/rejected": -0.8836368322372437, "loss": 1.0421, "nll_loss": 1.0032716989517212, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.029248446226119995, "rewards/margins": 0.05911524221301079, "rewards/rejected": -0.08836368471384048, "step": 1095 }, { "epoch": 1.524029574861368, "grad_norm": 1.444333553314209, "learning_rate": 2.3654545454545453e-07, "log_odds_chosen": 1.5300582647323608, "log_odds_ratio": -0.3157467842102051, "logits/chosen": 2.065735340118408, "logits/rejected": 2.124572992324829, "logps/chosen": -0.25401103496551514, "logps/rejected": -0.870669960975647, "loss": 1.0242, "nll_loss": 0.9926338791847229, "rewards/accuracies": 0.875, "rewards/chosen": -0.02540110982954502, "rewards/margins": 0.061665892601013184, "rewards/rejected": -0.08706699311733246, "step": 1100 }, { "epoch": 1.5309611829944547, "grad_norm": 1.328914999961853, "learning_rate": 2.3563636363636362e-07, "log_odds_chosen": 1.5849844217300415, "log_odds_ratio": -0.3574371933937073, "logits/chosen": 2.0881130695343018, "logits/rejected": 2.132666826248169, "logps/chosen": -0.297654926776886, "logps/rejected": -0.9698154330253601, "loss": 1.0571, "nll_loss": 1.021361231803894, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.029765494167804718, "rewards/margins": 0.06721605360507965, "rewards/rejected": -0.09698154032230377, "step": 1105 }, { "epoch": 1.5378927911275415, "grad_norm": 3.064085006713867, "learning_rate": 2.347272727272727e-07, "log_odds_chosen": 1.482607364654541, "log_odds_ratio": -0.41177818179130554, "logits/chosen": 2.100356101989746, "logits/rejected": 2.149595022201538, "logps/chosen": -0.3262555003166199, "logps/rejected": -0.9901362657546997, "loss": 1.024, "nll_loss": 0.9827964305877686, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.03262555971741676, "rewards/margins": 0.0663880705833435, "rewards/rejected": -0.09901363402605057, "step": 1110 }, { "epoch": 1.5448243992606283, "grad_norm": 1.7144571542739868, "learning_rate": 2.3381818181818182e-07, "log_odds_chosen": 1.297485113143921, "log_odds_ratio": -0.44758838415145874, "logits/chosen": 2.169114112854004, "logits/rejected": 2.225714683532715, "logps/chosen": -0.334065705537796, "logps/rejected": -0.8946993350982666, "loss": 1.0743, "nll_loss": 1.0295709371566772, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.033406566828489304, "rewards/margins": 0.05606337636709213, "rewards/rejected": -0.08946993947029114, "step": 1115 }, { "epoch": 1.5517560073937153, "grad_norm": 1.577359676361084, "learning_rate": 2.329090909090909e-07, "log_odds_chosen": 1.224462628364563, "log_odds_ratio": -0.40063703060150146, "logits/chosen": 2.1936235427856445, "logits/rejected": 2.2490689754486084, "logps/chosen": -0.2935000956058502, "logps/rejected": -0.746100664138794, "loss": 1.0438, "nll_loss": 1.0037094354629517, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.02935001067817211, "rewards/margins": 0.045260071754455566, "rewards/rejected": -0.07461007684469223, "step": 1120 }, { "epoch": 1.5586876155268024, "grad_norm": 1.445590615272522, "learning_rate": 2.3199999999999999e-07, "log_odds_chosen": 1.428971529006958, "log_odds_ratio": -0.3972775638103485, "logits/chosen": 2.1382179260253906, "logits/rejected": 2.1906816959381104, "logps/chosen": -0.3196958899497986, "logps/rejected": -0.9572470784187317, "loss": 1.0678, "nll_loss": 1.0280849933624268, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.0319695882499218, "rewards/margins": 0.06375513225793839, "rewards/rejected": -0.09572472423315048, "step": 1125 }, { "epoch": 1.5656192236598891, "grad_norm": 2.6556386947631836, "learning_rate": 2.3109090909090908e-07, "log_odds_chosen": 1.5480204820632935, "log_odds_ratio": -0.2979664206504822, "logits/chosen": 1.9882813692092896, "logits/rejected": 2.047785520553589, "logps/chosen": -0.2392881214618683, "logps/rejected": -0.806952178478241, "loss": 1.0056, "nll_loss": 0.9758478403091431, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.0239288117736578, "rewards/margins": 0.05676640570163727, "rewards/rejected": -0.08069522678852081, "step": 1130 }, { "epoch": 1.572550831792976, "grad_norm": 1.543114185333252, "learning_rate": 2.3018181818181815e-07, "log_odds_chosen": 1.306591272354126, "log_odds_ratio": -0.38037270307540894, "logits/chosen": 2.1771938800811768, "logits/rejected": 2.204706907272339, "logps/chosen": -0.2897684872150421, "logps/rejected": -0.8257070183753967, "loss": 1.0772, "nll_loss": 1.0391967296600342, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.02897684834897518, "rewards/margins": 0.05359385162591934, "rewards/rejected": -0.08257070928812027, "step": 1135 }, { "epoch": 1.5794824399260627, "grad_norm": 2.1715428829193115, "learning_rate": 2.2927272727272728e-07, "log_odds_chosen": 1.4083577394485474, "log_odds_ratio": -0.4124881327152252, "logits/chosen": 2.092696189880371, "logits/rejected": 2.1379013061523438, "logps/chosen": -0.302141934633255, "logps/rejected": -0.8928415775299072, "loss": 1.0676, "nll_loss": 1.026381492614746, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.03021419048309326, "rewards/margins": 0.059069979935884476, "rewards/rejected": -0.08928415924310684, "step": 1140 }, { "epoch": 1.5864140480591498, "grad_norm": 1.848644733428955, "learning_rate": 2.2836363636363635e-07, "log_odds_chosen": 1.3089061975479126, "log_odds_ratio": -0.40707269310951233, "logits/chosen": 2.151923418045044, "logits/rejected": 2.2069790363311768, "logps/chosen": -0.29877957701683044, "logps/rejected": -0.8117203116416931, "loss": 1.0202, "nll_loss": 0.9795213341712952, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.029877962544560432, "rewards/margins": 0.05129408463835716, "rewards/rejected": -0.08117203414440155, "step": 1145 }, { "epoch": 1.5933456561922366, "grad_norm": 2.3527963161468506, "learning_rate": 2.2745454545454542e-07, "log_odds_chosen": 1.4974111318588257, "log_odds_ratio": -0.40171802043914795, "logits/chosen": 2.093785524368286, "logits/rejected": 2.144911050796509, "logps/chosen": -0.2905969023704529, "logps/rejected": -0.9690452218055725, "loss": 1.043, "nll_loss": 1.0028067827224731, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.02905968949198723, "rewards/margins": 0.06784483045339584, "rewards/rejected": -0.09690450876951218, "step": 1150 }, { "epoch": 1.6002772643253236, "grad_norm": 3.2236886024475098, "learning_rate": 2.2654545454545454e-07, "log_odds_chosen": 1.4238559007644653, "log_odds_ratio": -0.39723172783851624, "logits/chosen": 2.1333365440368652, "logits/rejected": 2.172248125076294, "logps/chosen": -0.2883763015270233, "logps/rejected": -0.8981528878211975, "loss": 1.0034, "nll_loss": 0.9636661410331726, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.028837626799941063, "rewards/margins": 0.06097765639424324, "rewards/rejected": -0.08981527388095856, "step": 1155 }, { "epoch": 1.6072088724584104, "grad_norm": 1.6170852184295654, "learning_rate": 2.2563636363636361e-07, "log_odds_chosen": 1.2530089616775513, "log_odds_ratio": -0.3909212052822113, "logits/chosen": 2.122631311416626, "logits/rejected": 2.1740965843200684, "logps/chosen": -0.3135833740234375, "logps/rejected": -0.8097442984580994, "loss": 1.0268, "nll_loss": 0.9877387881278992, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.03135833516716957, "rewards/margins": 0.04961610212922096, "rewards/rejected": -0.08097445219755173, "step": 1160 }, { "epoch": 1.6141404805914972, "grad_norm": 3.0495035648345947, "learning_rate": 2.247272727272727e-07, "log_odds_chosen": 1.2540229558944702, "log_odds_ratio": -0.4228639304637909, "logits/chosen": 2.1452457904815674, "logits/rejected": 2.199737548828125, "logps/chosen": -0.32104960083961487, "logps/rejected": -0.9188255071640015, "loss": 1.0655, "nll_loss": 1.0232080221176147, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.032104961574077606, "rewards/margins": 0.059777598828077316, "rewards/rejected": -0.09188255667686462, "step": 1165 }, { "epoch": 1.621072088724584, "grad_norm": 1.4193778038024902, "learning_rate": 2.238181818181818e-07, "log_odds_chosen": 1.4086551666259766, "log_odds_ratio": -0.4291422665119171, "logits/chosen": 2.132582187652588, "logits/rejected": 2.1951537132263184, "logps/chosen": -0.27538225054740906, "logps/rejected": -0.8874372839927673, "loss": 1.0582, "nll_loss": 1.0153110027313232, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.027538226917386055, "rewards/margins": 0.061205506324768066, "rewards/rejected": -0.08874373137950897, "step": 1170 }, { "epoch": 1.628003696857671, "grad_norm": 4.035185813903809, "learning_rate": 2.2290909090909088e-07, "log_odds_chosen": 1.1511118412017822, "log_odds_ratio": -0.42211630940437317, "logits/chosen": 2.043503761291504, "logits/rejected": 2.0831780433654785, "logps/chosen": -0.260224312543869, "logps/rejected": -0.69998699426651, "loss": 1.0533, "nll_loss": 1.0110424757003784, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.02602243237197399, "rewards/margins": 0.04397625848650932, "rewards/rejected": -0.06999869644641876, "step": 1175 }, { "epoch": 1.634935304990758, "grad_norm": 2.188004732131958, "learning_rate": 2.22e-07, "log_odds_chosen": 1.3063474893569946, "log_odds_ratio": -0.4064422845840454, "logits/chosen": 2.0244498252868652, "logits/rejected": 2.106379747390747, "logps/chosen": -0.28905677795410156, "logps/rejected": -0.8373602628707886, "loss": 1.019, "nll_loss": 0.9783718585968018, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.028905682265758514, "rewards/margins": 0.054830338805913925, "rewards/rejected": -0.08373603224754333, "step": 1180 }, { "epoch": 1.6418669131238448, "grad_norm": 2.3367137908935547, "learning_rate": 2.2109090909090907e-07, "log_odds_chosen": 1.1956565380096436, "log_odds_ratio": -0.4174603819847107, "logits/chosen": 2.0511369705200195, "logits/rejected": 2.097266435623169, "logps/chosen": -0.29188284277915955, "logps/rejected": -0.7658560872077942, "loss": 1.0463, "nll_loss": 1.0045546293258667, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.029188284650444984, "rewards/margins": 0.047397319227457047, "rewards/rejected": -0.07658561319112778, "step": 1185 }, { "epoch": 1.6487985212569316, "grad_norm": 2.0365090370178223, "learning_rate": 2.2018181818181817e-07, "log_odds_chosen": 1.470836877822876, "log_odds_ratio": -0.32899710536003113, "logits/chosen": 2.0854430198669434, "logits/rejected": 2.146840810775757, "logps/chosen": -0.25189822912216187, "logps/rejected": -0.805548906326294, "loss": 1.0219, "nll_loss": 0.9889623522758484, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.025189822539687157, "rewards/margins": 0.055365074425935745, "rewards/rejected": -0.08055491000413895, "step": 1190 }, { "epoch": 1.6557301293900184, "grad_norm": 1.4684514999389648, "learning_rate": 2.1927272727272727e-07, "log_odds_chosen": 1.5294240713119507, "log_odds_ratio": -0.3121597468852997, "logits/chosen": 2.0382626056671143, "logits/rejected": 2.096428871154785, "logps/chosen": -0.28892782330513, "logps/rejected": -0.9338265061378479, "loss": 1.0155, "nll_loss": 0.9842939972877502, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02889277972280979, "rewards/margins": 0.06448986381292343, "rewards/rejected": -0.09338264167308807, "step": 1195 }, { "epoch": 1.6626617375231052, "grad_norm": 1.848939299583435, "learning_rate": 2.1836363636363634e-07, "log_odds_chosen": 1.2887681722640991, "log_odds_ratio": -0.39700159430503845, "logits/chosen": 2.0447583198547363, "logits/rejected": 2.086496353149414, "logps/chosen": -0.28646960854530334, "logps/rejected": -0.8112481832504272, "loss": 1.0786, "nll_loss": 1.0388500690460205, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.028646962717175484, "rewards/margins": 0.05247785896062851, "rewards/rejected": -0.08112481236457825, "step": 1200 }, { "epoch": 1.6695933456561922, "grad_norm": 2.438013792037964, "learning_rate": 2.1745454545454544e-07, "log_odds_chosen": 1.2431906461715698, "log_odds_ratio": -0.41865456104278564, "logits/chosen": 1.9995055198669434, "logits/rejected": 2.066706418991089, "logps/chosen": -0.28791549801826477, "logps/rejected": -0.8092067241668701, "loss": 1.0478, "nll_loss": 1.0059376955032349, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.02879154682159424, "rewards/margins": 0.052129123359918594, "rewards/rejected": -0.08092068135738373, "step": 1205 }, { "epoch": 1.6765249537892792, "grad_norm": 1.8914486169815063, "learning_rate": 2.1654545454545453e-07, "log_odds_chosen": 1.669597864151001, "log_odds_ratio": -0.2944275438785553, "logits/chosen": 2.067124366760254, "logits/rejected": 2.1200904846191406, "logps/chosen": -0.2542329728603363, "logps/rejected": -0.9926477670669556, "loss": 1.024, "nll_loss": 0.9945566058158875, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02542329952120781, "rewards/margins": 0.07384147495031357, "rewards/rejected": -0.09926477074623108, "step": 1210 }, { "epoch": 1.683456561922366, "grad_norm": 1.8205511569976807, "learning_rate": 2.156363636363636e-07, "log_odds_chosen": 1.615554928779602, "log_odds_ratio": -0.3494965434074402, "logits/chosen": 2.0657761096954346, "logits/rejected": 2.138937473297119, "logps/chosen": -0.3055498003959656, "logps/rejected": -0.9770273566246033, "loss": 1.0574, "nll_loss": 1.0224652290344238, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.03055497631430626, "rewards/margins": 0.06714775413274765, "rewards/rejected": -0.0977027490735054, "step": 1215 }, { "epoch": 1.6903881700554528, "grad_norm": 3.3164730072021484, "learning_rate": 2.1472727272727273e-07, "log_odds_chosen": 1.2579916715621948, "log_odds_ratio": -0.41120514273643494, "logits/chosen": 2.0500335693359375, "logits/rejected": 2.107823133468628, "logps/chosen": -0.2929707467556, "logps/rejected": -0.8051959276199341, "loss": 1.0315, "nll_loss": 0.9903787970542908, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.029297074303030968, "rewards/margins": 0.051222506910562515, "rewards/rejected": -0.08051959425210953, "step": 1220 }, { "epoch": 1.6973197781885396, "grad_norm": 3.1997969150543213, "learning_rate": 2.138181818181818e-07, "log_odds_chosen": 1.3084485530853271, "log_odds_ratio": -0.41026392579078674, "logits/chosen": 1.9871976375579834, "logits/rejected": 2.0475666522979736, "logps/chosen": -0.31262117624282837, "logps/rejected": -0.8240591287612915, "loss": 1.014, "nll_loss": 0.972952127456665, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0312621183693409, "rewards/margins": 0.05114380270242691, "rewards/rejected": -0.08240590989589691, "step": 1225 }, { "epoch": 1.7042513863216266, "grad_norm": 2.6871352195739746, "learning_rate": 2.129090909090909e-07, "log_odds_chosen": 1.0879921913146973, "log_odds_ratio": -0.4872511923313141, "logits/chosen": 2.080152750015259, "logits/rejected": 2.119732141494751, "logps/chosen": -0.369219571352005, "logps/rejected": -0.8730208277702332, "loss": 1.0926, "nll_loss": 1.0438321828842163, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03692195937037468, "rewards/margins": 0.05038012936711311, "rewards/rejected": -0.0873020812869072, "step": 1230 }, { "epoch": 1.7111829944547134, "grad_norm": 2.325326442718506, "learning_rate": 2.12e-07, "log_odds_chosen": 1.397155523300171, "log_odds_ratio": -0.39500170946121216, "logits/chosen": 2.125500202178955, "logits/rejected": 2.1754844188690186, "logps/chosen": -0.2974463105201721, "logps/rejected": -0.9127005934715271, "loss": 1.029, "nll_loss": 0.9895287156105042, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.02974463254213333, "rewards/margins": 0.06152542680501938, "rewards/rejected": -0.0912700667977333, "step": 1235 }, { "epoch": 1.7181146025878005, "grad_norm": 1.5156536102294922, "learning_rate": 2.1109090909090906e-07, "log_odds_chosen": 1.2906519174575806, "log_odds_ratio": -0.40361616015434265, "logits/chosen": 2.051255702972412, "logits/rejected": 2.103986978530884, "logps/chosen": -0.268101304769516, "logps/rejected": -0.7997626066207886, "loss": 1.0366, "nll_loss": 0.9962154030799866, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.02681013010442257, "rewards/margins": 0.05316613242030144, "rewards/rejected": -0.07997626811265945, "step": 1240 }, { "epoch": 1.7250462107208873, "grad_norm": 1.6096516847610474, "learning_rate": 2.101818181818182e-07, "log_odds_chosen": 1.2347605228424072, "log_odds_ratio": -0.4557424783706665, "logits/chosen": 2.0431010723114014, "logits/rejected": 2.1129000186920166, "logps/chosen": -0.30273541808128357, "logps/rejected": -0.7864081859588623, "loss": 1.0529, "nll_loss": 1.0073034763336182, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03027353622019291, "rewards/margins": 0.048367276787757874, "rewards/rejected": -0.07864081859588623, "step": 1245 }, { "epoch": 1.731977818853974, "grad_norm": 1.9126524925231934, "learning_rate": 2.0927272727272726e-07, "log_odds_chosen": 1.4341093301773071, "log_odds_ratio": -0.3776791989803314, "logits/chosen": 2.029585838317871, "logits/rejected": 2.096311569213867, "logps/chosen": -0.2593327462673187, "logps/rejected": -0.8751354217529297, "loss": 1.0188, "nll_loss": 0.9809887409210205, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.025933273136615753, "rewards/margins": 0.06158026307821274, "rewards/rejected": -0.08751355111598969, "step": 1250 }, { "epoch": 1.7389094269870609, "grad_norm": 2.2804696559906006, "learning_rate": 2.0836363636363633e-07, "log_odds_chosen": 1.4436743259429932, "log_odds_ratio": -0.3733202815055847, "logits/chosen": 2.1618611812591553, "logits/rejected": 2.197058916091919, "logps/chosen": -0.2881154417991638, "logps/rejected": -0.9169884324073792, "loss": 1.0506, "nll_loss": 1.0132598876953125, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02881154604256153, "rewards/margins": 0.06288730353116989, "rewards/rejected": -0.09169885516166687, "step": 1255 }, { "epoch": 1.7458410351201479, "grad_norm": 2.1073522567749023, "learning_rate": 2.0745454545454545e-07, "log_odds_chosen": 1.6635074615478516, "log_odds_ratio": -0.33457812666893005, "logits/chosen": 2.074899911880493, "logits/rejected": 2.122929573059082, "logps/chosen": -0.27845534682273865, "logps/rejected": -1.005134105682373, "loss": 1.0756, "nll_loss": 1.0421861410140991, "rewards/accuracies": 0.875, "rewards/chosen": -0.027845535427331924, "rewards/margins": 0.07266788184642792, "rewards/rejected": -0.10051342844963074, "step": 1260 }, { "epoch": 1.752772643253235, "grad_norm": 2.2422614097595215, "learning_rate": 2.0654545454545452e-07, "log_odds_chosen": 1.3691959381103516, "log_odds_ratio": -0.40435412526130676, "logits/chosen": 2.1057488918304443, "logits/rejected": 2.1638669967651367, "logps/chosen": -0.31880542635917664, "logps/rejected": -0.903930127620697, "loss": 1.0408, "nll_loss": 1.000407099723816, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.031880538910627365, "rewards/margins": 0.05851246044039726, "rewards/rejected": -0.09039301425218582, "step": 1265 }, { "epoch": 1.7597042513863217, "grad_norm": 2.287367820739746, "learning_rate": 2.0563636363636362e-07, "log_odds_chosen": 1.282787799835205, "log_odds_ratio": -0.4151849150657654, "logits/chosen": 2.05078387260437, "logits/rejected": 2.125896692276001, "logps/chosen": -0.28913381695747375, "logps/rejected": -0.792908251285553, "loss": 1.0073, "nll_loss": 0.9657413363456726, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.028913382440805435, "rewards/margins": 0.05037744715809822, "rewards/rejected": -0.07929082959890366, "step": 1270 }, { "epoch": 1.7666358595194085, "grad_norm": 1.7958959341049194, "learning_rate": 2.0472727272727272e-07, "log_odds_chosen": 1.724271297454834, "log_odds_ratio": -0.2810591757297516, "logits/chosen": 2.0967202186584473, "logits/rejected": 2.161252021789551, "logps/chosen": -0.2763175964355469, "logps/rejected": -1.0735727548599243, "loss": 1.0564, "nll_loss": 1.0283379554748535, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.027631759643554688, "rewards/margins": 0.07972551882266998, "rewards/rejected": -0.10735727101564407, "step": 1275 }, { "epoch": 1.7735674676524953, "grad_norm": 2.2755537033081055, "learning_rate": 2.038181818181818e-07, "log_odds_chosen": 1.4004557132720947, "log_odds_ratio": -0.3993929326534271, "logits/chosen": 2.111241102218628, "logits/rejected": 2.151012897491455, "logps/chosen": -0.29145321249961853, "logps/rejected": -0.8968268036842346, "loss": 0.983, "nll_loss": 0.9430230259895325, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.029145320877432823, "rewards/margins": 0.060537371784448624, "rewards/rejected": -0.0896826833486557, "step": 1280 }, { "epoch": 1.7804990757855823, "grad_norm": 1.726332187652588, "learning_rate": 2.029090909090909e-07, "log_odds_chosen": 1.7174845933914185, "log_odds_ratio": -0.3723779022693634, "logits/chosen": 2.092041254043579, "logits/rejected": 2.163289785385132, "logps/chosen": -0.2837842106819153, "logps/rejected": -1.036734700202942, "loss": 0.9897, "nll_loss": 0.9525095224380493, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.028378423303365707, "rewards/margins": 0.07529504597187042, "rewards/rejected": -0.10367346554994583, "step": 1285 }, { "epoch": 1.787430683918669, "grad_norm": 1.5710785388946533, "learning_rate": 2.0199999999999998e-07, "log_odds_chosen": 1.312154769897461, "log_odds_ratio": -0.3994414210319519, "logits/chosen": 2.0113887786865234, "logits/rejected": 2.0753164291381836, "logps/chosen": -0.272027850151062, "logps/rejected": -0.7796825766563416, "loss": 1.0201, "nll_loss": 0.9801668524742126, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.02720278687775135, "rewards/margins": 0.05076547712087631, "rewards/rejected": -0.07796826213598251, "step": 1290 }, { "epoch": 1.7943622920517561, "grad_norm": 1.920129418373108, "learning_rate": 2.0109090909090908e-07, "log_odds_chosen": 1.6343634128570557, "log_odds_ratio": -0.3433375358581543, "logits/chosen": 2.1635687351226807, "logits/rejected": 2.2147443294525146, "logps/chosen": -0.2949898838996887, "logps/rejected": -1.0586962699890137, "loss": 1.0487, "nll_loss": 1.0143301486968994, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.029498988762497902, "rewards/margins": 0.07637064903974533, "rewards/rejected": -0.10586963593959808, "step": 1295 }, { "epoch": 1.801293900184843, "grad_norm": 1.9142963886260986, "learning_rate": 2.0018181818181818e-07, "log_odds_chosen": 1.8189107179641724, "log_odds_ratio": -0.30017244815826416, "logits/chosen": 2.058302879333496, "logits/rejected": 2.130945920944214, "logps/chosen": -0.2555373013019562, "logps/rejected": -1.0402501821517944, "loss": 1.0479, "nll_loss": 1.0179013013839722, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.025553731247782707, "rewards/margins": 0.07847128063440323, "rewards/rejected": -0.10402501374483109, "step": 1300 }, { "epoch": 1.8082255083179297, "grad_norm": 2.576738119125366, "learning_rate": 1.9927272727272725e-07, "log_odds_chosen": 1.4616259336471558, "log_odds_ratio": -0.35250329971313477, "logits/chosen": 2.0860280990600586, "logits/rejected": 2.160719394683838, "logps/chosen": -0.26303815841674805, "logps/rejected": -0.8395700454711914, "loss": 1.039, "nll_loss": 1.0037142038345337, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.026303818449378014, "rewards/margins": 0.05765319615602493, "rewards/rejected": -0.0839570015668869, "step": 1305 }, { "epoch": 1.8151571164510165, "grad_norm": 2.0888595581054688, "learning_rate": 1.9836363636363634e-07, "log_odds_chosen": 1.4016786813735962, "log_odds_ratio": -0.36707499623298645, "logits/chosen": 2.0193212032318115, "logits/rejected": 2.0818252563476562, "logps/chosen": -0.3212401270866394, "logps/rejected": -0.8710358142852783, "loss": 1.0211, "nll_loss": 0.9843639731407166, "rewards/accuracies": 0.875, "rewards/chosen": -0.03212401270866394, "rewards/margins": 0.05497957393527031, "rewards/rejected": -0.08710358291864395, "step": 1310 }, { "epoch": 1.8220887245841035, "grad_norm": 2.2947607040405273, "learning_rate": 1.9745454545454544e-07, "log_odds_chosen": 1.5820189714431763, "log_odds_ratio": -0.3358237147331238, "logits/chosen": 2.1270110607147217, "logits/rejected": 2.1922497749328613, "logps/chosen": -0.2790026068687439, "logps/rejected": -0.9709238409996033, "loss": 1.0543, "nll_loss": 1.0207080841064453, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.02790026180446148, "rewards/margins": 0.06919214129447937, "rewards/rejected": -0.0970923900604248, "step": 1315 }, { "epoch": 1.8290203327171903, "grad_norm": 2.2239065170288086, "learning_rate": 1.9654545454545454e-07, "log_odds_chosen": 1.8014733791351318, "log_odds_ratio": -0.3064168691635132, "logits/chosen": 2.168591022491455, "logits/rejected": 2.2188949584960938, "logps/chosen": -0.289535254240036, "logps/rejected": -1.121842622756958, "loss": 1.0043, "nll_loss": 0.9736562967300415, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.02895352989435196, "rewards/margins": 0.08323074132204056, "rewards/rejected": -0.11218428611755371, "step": 1320 }, { "epoch": 1.8359519408502774, "grad_norm": 1.6879349946975708, "learning_rate": 1.956363636363636e-07, "log_odds_chosen": 1.4831633567810059, "log_odds_ratio": -0.36744216084480286, "logits/chosen": 2.0397822856903076, "logits/rejected": 2.090501070022583, "logps/chosen": -0.27023550868034363, "logps/rejected": -0.8913961052894592, "loss": 0.9775, "nll_loss": 0.9407526850700378, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.027023550122976303, "rewards/margins": 0.06211606040596962, "rewards/rejected": -0.08913961052894592, "step": 1325 }, { "epoch": 1.8428835489833642, "grad_norm": 3.5290584564208984, "learning_rate": 1.947272727272727e-07, "log_odds_chosen": 1.599334716796875, "log_odds_ratio": -0.32742586731910706, "logits/chosen": 2.0843544006347656, "logits/rejected": 2.1510770320892334, "logps/chosen": -0.26283249258995056, "logps/rejected": -0.9456924796104431, "loss": 0.9577, "nll_loss": 0.9249733686447144, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.026283251121640205, "rewards/margins": 0.0682859942317009, "rewards/rejected": -0.09456924349069595, "step": 1330 }, { "epoch": 1.849815157116451, "grad_norm": 3.283820629119873, "learning_rate": 1.938181818181818e-07, "log_odds_chosen": 1.60695219039917, "log_odds_ratio": -0.3642633557319641, "logits/chosen": 2.1823904514312744, "logits/rejected": 2.2375266551971436, "logps/chosen": -0.3108011782169342, "logps/rejected": -1.0633673667907715, "loss": 1.032, "nll_loss": 0.995610773563385, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03108011931180954, "rewards/margins": 0.07525661587715149, "rewards/rejected": -0.10633675009012222, "step": 1335 }, { "epoch": 1.8567467652495377, "grad_norm": 2.3443901538848877, "learning_rate": 1.929090909090909e-07, "log_odds_chosen": 1.549951195716858, "log_odds_ratio": -0.3641236424446106, "logits/chosen": 2.1002511978149414, "logits/rejected": 2.1668472290039062, "logps/chosen": -0.3097735047340393, "logps/rejected": -1.032346487045288, "loss": 1.0146, "nll_loss": 0.9781424403190613, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.03097734972834587, "rewards/margins": 0.07225729525089264, "rewards/rejected": -0.1032346561551094, "step": 1340 }, { "epoch": 1.8636783733826248, "grad_norm": 1.9872773885726929, "learning_rate": 1.9199999999999997e-07, "log_odds_chosen": 1.321940541267395, "log_odds_ratio": -0.44285106658935547, "logits/chosen": 2.0162153244018555, "logits/rejected": 2.074026584625244, "logps/chosen": -0.3019007742404938, "logps/rejected": -0.8703359961509705, "loss": 1.051, "nll_loss": 1.0067391395568848, "rewards/accuracies": 0.7666666507720947, "rewards/chosen": -0.030190076678991318, "rewards/margins": 0.05684352666139603, "rewards/rejected": -0.08703361451625824, "step": 1345 }, { "epoch": 1.8706099815157118, "grad_norm": 1.5316020250320435, "learning_rate": 1.9109090909090907e-07, "log_odds_chosen": 1.522079348564148, "log_odds_ratio": -0.3456721305847168, "logits/chosen": 2.055612802505493, "logits/rejected": 2.1493773460388184, "logps/chosen": -0.2610771358013153, "logps/rejected": -0.8397551774978638, "loss": 1.0111, "nll_loss": 0.9765692353248596, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02610771171748638, "rewards/margins": 0.05786780267953873, "rewards/rejected": -0.08397550880908966, "step": 1350 }, { "epoch": 1.8775415896487986, "grad_norm": 1.9374873638153076, "learning_rate": 1.9018181818181817e-07, "log_odds_chosen": 1.3929773569107056, "log_odds_ratio": -0.3517773449420929, "logits/chosen": 2.034905433654785, "logits/rejected": 2.079784631729126, "logps/chosen": -0.2506738603115082, "logps/rejected": -0.7879815697669983, "loss": 1.0656, "nll_loss": 1.0304430723190308, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.025067387148737907, "rewards/margins": 0.05373078212141991, "rewards/rejected": -0.07879816740751266, "step": 1355 }, { "epoch": 1.8844731977818854, "grad_norm": 1.2929794788360596, "learning_rate": 1.8927272727272726e-07, "log_odds_chosen": 1.4262324571609497, "log_odds_ratio": -0.3507004380226135, "logits/chosen": 2.130051374435425, "logits/rejected": 2.2022147178649902, "logps/chosen": -0.30148500204086304, "logps/rejected": -0.8996387124061584, "loss": 1.0229, "nll_loss": 0.9878306984901428, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.030148500576615334, "rewards/margins": 0.05981536582112312, "rewards/rejected": -0.0899638757109642, "step": 1360 }, { "epoch": 1.8914048059149722, "grad_norm": 2.3704674243927, "learning_rate": 1.8836363636363633e-07, "log_odds_chosen": 1.4139504432678223, "log_odds_ratio": -0.3692961037158966, "logits/chosen": 2.1010749340057373, "logits/rejected": 2.1468594074249268, "logps/chosen": -0.3202250301837921, "logps/rejected": -0.9537723660469055, "loss": 1.0341, "nll_loss": 0.9971208572387695, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.03202249854803085, "rewards/margins": 0.0633547380566597, "rewards/rejected": -0.09537723660469055, "step": 1365 }, { "epoch": 1.8983364140480592, "grad_norm": 1.7380679845809937, "learning_rate": 1.8745454545454543e-07, "log_odds_chosen": 1.5445674657821655, "log_odds_ratio": -0.3786125183105469, "logits/chosen": 2.0269358158111572, "logits/rejected": 2.0798943042755127, "logps/chosen": -0.28776755928993225, "logps/rejected": -0.9360780715942383, "loss": 1.041, "nll_loss": 1.0031627416610718, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.028776757419109344, "rewards/margins": 0.06483104825019836, "rewards/rejected": -0.0936078131198883, "step": 1370 }, { "epoch": 1.905268022181146, "grad_norm": 2.3464677333831787, "learning_rate": 1.8654545454545453e-07, "log_odds_chosen": 1.3654427528381348, "log_odds_ratio": -0.4110698699951172, "logits/chosen": 2.1894376277923584, "logits/rejected": 2.233098268508911, "logps/chosen": -0.30282995104789734, "logps/rejected": -0.8446999788284302, "loss": 0.9941, "nll_loss": 0.9530263543128967, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.030282998457551003, "rewards/margins": 0.05418700724840164, "rewards/rejected": -0.08447001129388809, "step": 1375 }, { "epoch": 1.912199630314233, "grad_norm": 2.243062973022461, "learning_rate": 1.8563636363636363e-07, "log_odds_chosen": 1.3511884212493896, "log_odds_ratio": -0.4768778681755066, "logits/chosen": 2.0655782222747803, "logits/rejected": 2.1229450702667236, "logps/chosen": -0.37024933099746704, "logps/rejected": -0.9667562246322632, "loss": 1.0402, "nll_loss": 0.9925115704536438, "rewards/accuracies": 0.75, "rewards/chosen": -0.03702492639422417, "rewards/margins": 0.05965068191289902, "rewards/rejected": -0.09667561948299408, "step": 1380 }, { "epoch": 1.9191312384473198, "grad_norm": 2.5852763652801514, "learning_rate": 1.847272727272727e-07, "log_odds_chosen": 1.548577070236206, "log_odds_ratio": -0.3329441249370575, "logits/chosen": 2.083811044692993, "logits/rejected": 2.13382887840271, "logps/chosen": -0.3115028738975525, "logps/rejected": -1.0105055570602417, "loss": 1.0092, "nll_loss": 0.9758760929107666, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03115028701722622, "rewards/margins": 0.069900281727314, "rewards/rejected": -0.10105058550834656, "step": 1385 }, { "epoch": 1.9260628465804066, "grad_norm": 2.076587677001953, "learning_rate": 1.838181818181818e-07, "log_odds_chosen": 1.193703055381775, "log_odds_ratio": -0.4486643075942993, "logits/chosen": 2.1062402725219727, "logits/rejected": 2.1550328731536865, "logps/chosen": -0.3409879505634308, "logps/rejected": -0.8286535739898682, "loss": 1.0018, "nll_loss": 0.9569076299667358, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.0340987928211689, "rewards/margins": 0.048766572028398514, "rewards/rejected": -0.08286535739898682, "step": 1390 }, { "epoch": 1.9329944547134934, "grad_norm": 2.120288610458374, "learning_rate": 1.829090909090909e-07, "log_odds_chosen": 1.4117330312728882, "log_odds_ratio": -0.3424789011478424, "logits/chosen": 2.101274013519287, "logits/rejected": 2.1526596546173096, "logps/chosen": -0.2649425268173218, "logps/rejected": -0.7853403091430664, "loss": 0.9994, "nll_loss": 0.9651403427124023, "rewards/accuracies": 0.875, "rewards/chosen": -0.026494255289435387, "rewards/margins": 0.05203978344798088, "rewards/rejected": -0.07853402942419052, "step": 1395 }, { "epoch": 1.9399260628465804, "grad_norm": 2.1944470405578613, "learning_rate": 1.82e-07, "log_odds_chosen": 1.5040756464004517, "log_odds_ratio": -0.37599992752075195, "logits/chosen": 2.0189223289489746, "logits/rejected": 2.067805051803589, "logps/chosen": -0.2981501519680023, "logps/rejected": -0.9519171118736267, "loss": 1.0507, "nll_loss": 1.013134241104126, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.02981501631438732, "rewards/margins": 0.06537671387195587, "rewards/rejected": -0.09519171714782715, "step": 1400 }, { "epoch": 1.9468576709796674, "grad_norm": 1.9627952575683594, "learning_rate": 1.8109090909090906e-07, "log_odds_chosen": 1.6703484058380127, "log_odds_ratio": -0.32047805190086365, "logits/chosen": 2.0558595657348633, "logits/rejected": 2.126713275909424, "logps/chosen": -0.2901943325996399, "logps/rejected": -1.0323206186294556, "loss": 1.0102, "nll_loss": 0.9781351089477539, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029019435867667198, "rewards/margins": 0.07421263307332993, "rewards/rejected": -0.10323206335306168, "step": 1405 }, { "epoch": 1.9537892791127542, "grad_norm": 2.921412706375122, "learning_rate": 1.8018181818181816e-07, "log_odds_chosen": 1.3125395774841309, "log_odds_ratio": -0.4437628984451294, "logits/chosen": 2.1079742908477783, "logits/rejected": 2.172934055328369, "logps/chosen": -0.3079548180103302, "logps/rejected": -0.8908718228340149, "loss": 1.0198, "nll_loss": 0.9754597544670105, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.030795477330684662, "rewards/margins": 0.058291707187891006, "rewards/rejected": -0.08908718079328537, "step": 1410 }, { "epoch": 1.960720887245841, "grad_norm": 2.2531754970550537, "learning_rate": 1.7927272727272725e-07, "log_odds_chosen": 1.4887337684631348, "log_odds_ratio": -0.3652048408985138, "logits/chosen": 2.036674976348877, "logits/rejected": 2.0993988513946533, "logps/chosen": -0.2855999767780304, "logps/rejected": -0.8912386298179626, "loss": 1.0189, "nll_loss": 0.9823691248893738, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.02855999954044819, "rewards/margins": 0.06056387349963188, "rewards/rejected": -0.08912386745214462, "step": 1415 }, { "epoch": 1.9676524953789278, "grad_norm": 2.0116090774536133, "learning_rate": 1.7836363636363635e-07, "log_odds_chosen": 1.619387149810791, "log_odds_ratio": -0.3138326406478882, "logits/chosen": 2.0645925998687744, "logits/rejected": 2.129347801208496, "logps/chosen": -0.2735585570335388, "logps/rejected": -0.9985336065292358, "loss": 0.9783, "nll_loss": 0.94692462682724, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.027355853468179703, "rewards/margins": 0.07249751687049866, "rewards/rejected": -0.09985338151454926, "step": 1420 }, { "epoch": 1.9745841035120146, "grad_norm": 3.110692024230957, "learning_rate": 1.7745454545454545e-07, "log_odds_chosen": 1.2186717987060547, "log_odds_ratio": -0.45589005947113037, "logits/chosen": 2.0935304164886475, "logits/rejected": 2.1518046855926514, "logps/chosen": -0.35527312755584717, "logps/rejected": -0.8807690739631653, "loss": 1.036, "nll_loss": 0.9903665781021118, "rewards/accuracies": 0.75, "rewards/chosen": -0.035527314990758896, "rewards/margins": 0.05254959315061569, "rewards/rejected": -0.08807691931724548, "step": 1425 }, { "epoch": 1.9815157116451017, "grad_norm": 2.146620750427246, "learning_rate": 1.7654545454545452e-07, "log_odds_chosen": 1.3884727954864502, "log_odds_ratio": -0.38641104102134705, "logits/chosen": 2.083814859390259, "logits/rejected": 2.1228816509246826, "logps/chosen": -0.29765060544013977, "logps/rejected": -0.8775668740272522, "loss": 1.0353, "nll_loss": 0.9966583847999573, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.029765058308839798, "rewards/margins": 0.0579916313290596, "rewards/rejected": -0.0877566859126091, "step": 1430 }, { "epoch": 1.9884473197781887, "grad_norm": 1.4122179746627808, "learning_rate": 1.7563636363636362e-07, "log_odds_chosen": 1.6712970733642578, "log_odds_ratio": -0.31637993454933167, "logits/chosen": 2.1619114875793457, "logits/rejected": 2.231441020965576, "logps/chosen": -0.2716436982154846, "logps/rejected": -1.0012226104736328, "loss": 1.0147, "nll_loss": 0.9830483794212341, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027164369821548462, "rewards/margins": 0.07295789569616318, "rewards/rejected": -0.10012225806713104, "step": 1435 }, { "epoch": 1.9953789279112755, "grad_norm": 2.2266688346862793, "learning_rate": 1.7472727272727271e-07, "log_odds_chosen": 1.3788424730300903, "log_odds_ratio": -0.3804737329483032, "logits/chosen": 2.135176181793213, "logits/rejected": 2.1902530193328857, "logps/chosen": -0.31672823429107666, "logps/rejected": -0.9234539866447449, "loss": 1.0622, "nll_loss": 1.0241928100585938, "rewards/accuracies": 0.875, "rewards/chosen": -0.031672827899456024, "rewards/margins": 0.0606725737452507, "rewards/rejected": -0.09234539419412613, "step": 1440 }, { "epoch": 2.0013863216266174, "grad_norm": 3.505758047103882, "learning_rate": 1.738181818181818e-07, "log_odds_chosen": 1.3300753831863403, "log_odds_ratio": -0.41754671931266785, "logits/chosen": 2.1010961532592773, "logits/rejected": 2.1418211460113525, "logps/chosen": -0.28666701912879944, "logps/rejected": -0.8230305910110474, "loss": 0.9121, "nll_loss": 1.00087308883667, "rewards/accuracies": 0.7884615659713745, "rewards/chosen": -0.028666695579886436, "rewards/margins": 0.053636353462934494, "rewards/rejected": -0.08230306208133698, "step": 1445 }, { "epoch": 2.0083179297597042, "grad_norm": 2.070633888244629, "learning_rate": 1.7290909090909088e-07, "log_odds_chosen": 1.7298386096954346, "log_odds_ratio": -0.31547990441322327, "logits/chosen": 2.099557399749756, "logits/rejected": 2.149728536605835, "logps/chosen": -0.2745874524116516, "logps/rejected": -1.0096734762191772, "loss": 1.045, "nll_loss": 1.0134097337722778, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02745874598622322, "rewards/margins": 0.0735086053609848, "rewards/rejected": -0.10096735507249832, "step": 1450 }, { "epoch": 2.015249537892791, "grad_norm": 2.7808001041412354, "learning_rate": 1.7199999999999998e-07, "log_odds_chosen": 1.4617559909820557, "log_odds_ratio": -0.32592448592185974, "logits/chosen": 2.0248734951019287, "logits/rejected": 2.08671236038208, "logps/chosen": -0.24415723979473114, "logps/rejected": -0.8095900416374207, "loss": 1.0132, "nll_loss": 0.980653703212738, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.024415725842118263, "rewards/margins": 0.05654327571392059, "rewards/rejected": -0.08095899969339371, "step": 1455 }, { "epoch": 2.022181146025878, "grad_norm": 2.6555349826812744, "learning_rate": 1.7109090909090908e-07, "log_odds_chosen": 1.5638434886932373, "log_odds_ratio": -0.36828985810279846, "logits/chosen": 2.0425848960876465, "logits/rejected": 2.0981991291046143, "logps/chosen": -0.2785496115684509, "logps/rejected": -0.9188894629478455, "loss": 1.0393, "nll_loss": 1.0024975538253784, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.027854960411787033, "rewards/margins": 0.06403397768735886, "rewards/rejected": -0.09188893437385559, "step": 1460 }, { "epoch": 2.029112754158965, "grad_norm": 2.058720588684082, "learning_rate": 1.7018181818181817e-07, "log_odds_chosen": 1.749341368675232, "log_odds_ratio": -0.3121403753757477, "logits/chosen": 2.096733331680298, "logits/rejected": 2.1691980361938477, "logps/chosen": -0.2867683470249176, "logps/rejected": -1.0571597814559937, "loss": 1.0299, "nll_loss": 0.9987198114395142, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.02867683582007885, "rewards/margins": 0.07703914493322372, "rewards/rejected": -0.10571598261594772, "step": 1465 }, { "epoch": 2.036044362292052, "grad_norm": 2.216594934463501, "learning_rate": 1.6927272727272724e-07, "log_odds_chosen": 1.6534370183944702, "log_odds_ratio": -0.3209837079048157, "logits/chosen": 2.0079751014709473, "logits/rejected": 2.092797040939331, "logps/chosen": -0.26305797696113586, "logps/rejected": -0.9659760594367981, "loss": 1.0213, "nll_loss": 0.989173173904419, "rewards/accuracies": 0.875, "rewards/chosen": -0.026305796578526497, "rewards/margins": 0.07029180228710175, "rewards/rejected": -0.09659762680530548, "step": 1470 }, { "epoch": 2.0429759704251387, "grad_norm": 2.0765326023101807, "learning_rate": 1.6836363636363634e-07, "log_odds_chosen": 1.5068204402923584, "log_odds_ratio": -0.3684650659561157, "logits/chosen": 2.1914827823638916, "logits/rejected": 2.2479913234710693, "logps/chosen": -0.3309532403945923, "logps/rejected": -1.0164872407913208, "loss": 1.0249, "nll_loss": 0.9880392551422119, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.03309532627463341, "rewards/margins": 0.06855340301990509, "rewards/rejected": -0.1016487255692482, "step": 1475 }, { "epoch": 2.0499075785582255, "grad_norm": 2.91998028755188, "learning_rate": 1.6745454545454544e-07, "log_odds_chosen": 1.4909127950668335, "log_odds_ratio": -0.38384488224983215, "logits/chosen": 2.0744376182556152, "logits/rejected": 2.132093906402588, "logps/chosen": -0.30625709891319275, "logps/rejected": -0.9366697072982788, "loss": 1.0368, "nll_loss": 0.9983736872673035, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.030625708401203156, "rewards/margins": 0.06304127722978592, "rewards/rejected": -0.09366698563098907, "step": 1480 }, { "epoch": 2.0568391866913123, "grad_norm": 2.1422932147979736, "learning_rate": 1.6654545454545454e-07, "log_odds_chosen": 1.6511032581329346, "log_odds_ratio": -0.3445586562156677, "logits/chosen": 2.038560152053833, "logits/rejected": 2.1156678199768066, "logps/chosen": -0.3046126961708069, "logps/rejected": -1.0538097620010376, "loss": 1.0316, "nll_loss": 0.9971045255661011, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.030461272224783897, "rewards/margins": 0.07491971552371979, "rewards/rejected": -0.10538098216056824, "step": 1485 }, { "epoch": 2.063770794824399, "grad_norm": 2.381495475769043, "learning_rate": 1.656363636363636e-07, "log_odds_chosen": 1.7995991706848145, "log_odds_ratio": -0.27245932817459106, "logits/chosen": 2.140026569366455, "logits/rejected": 2.1896448135375977, "logps/chosen": -0.25530093908309937, "logps/rejected": -1.0938752889633179, "loss": 1.0032, "nll_loss": 0.9759642481803894, "rewards/accuracies": 0.875, "rewards/chosen": -0.025530096143484116, "rewards/margins": 0.08385743200778961, "rewards/rejected": -0.10938753187656403, "step": 1490 }, { "epoch": 2.0707024029574863, "grad_norm": 1.4904897212982178, "learning_rate": 1.647272727272727e-07, "log_odds_chosen": 1.5017544031143188, "log_odds_ratio": -0.35147780179977417, "logits/chosen": 2.0568175315856934, "logits/rejected": 2.1001527309417725, "logps/chosen": -0.2676287889480591, "logps/rejected": -0.9620057344436646, "loss": 1.0647, "nll_loss": 1.0295929908752441, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.026762880384922028, "rewards/margins": 0.06943770498037338, "rewards/rejected": -0.09620057791471481, "step": 1495 }, { "epoch": 2.077634011090573, "grad_norm": 2.361671209335327, "learning_rate": 1.638181818181818e-07, "log_odds_chosen": 1.625975489616394, "log_odds_ratio": -0.323038786649704, "logits/chosen": 2.047917366027832, "logits/rejected": 2.1029675006866455, "logps/chosen": -0.2699020802974701, "logps/rejected": -0.9326964020729065, "loss": 1.0517, "nll_loss": 1.0193482637405396, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02699020877480507, "rewards/margins": 0.06627943366765976, "rewards/rejected": -0.09326963871717453, "step": 1500 }, { "epoch": 2.08456561922366, "grad_norm": 1.3164469003677368, "learning_rate": 1.629090909090909e-07, "log_odds_chosen": 1.7589662075042725, "log_odds_ratio": -0.2796500623226166, "logits/chosen": 2.0355899333953857, "logits/rejected": 2.1100242137908936, "logps/chosen": -0.24932418763637543, "logps/rejected": -0.9945331811904907, "loss": 0.9908, "nll_loss": 0.9628455638885498, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.024932416155934334, "rewards/margins": 0.07452090084552765, "rewards/rejected": -0.09945331513881683, "step": 1505 }, { "epoch": 2.0914972273567467, "grad_norm": 2.498619318008423, "learning_rate": 1.62e-07, "log_odds_chosen": 1.4863014221191406, "log_odds_ratio": -0.40170204639434814, "logits/chosen": 2.0163559913635254, "logits/rejected": 2.0721843242645264, "logps/chosen": -0.2834693491458893, "logps/rejected": -0.9046918153762817, "loss": 1.0191, "nll_loss": 0.9789711833000183, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.028346937149763107, "rewards/margins": 0.06212225183844566, "rewards/rejected": -0.09046918898820877, "step": 1510 }, { "epoch": 2.0984288354898335, "grad_norm": 1.4074289798736572, "learning_rate": 1.6109090909090907e-07, "log_odds_chosen": 1.654536485671997, "log_odds_ratio": -0.34274205565452576, "logits/chosen": 2.0640828609466553, "logits/rejected": 2.1356544494628906, "logps/chosen": -0.2802557945251465, "logps/rejected": -1.0241177082061768, "loss": 1.0183, "nll_loss": 0.98404860496521, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02802557870745659, "rewards/margins": 0.07438618689775467, "rewards/rejected": -0.10241175442934036, "step": 1515 }, { "epoch": 2.1053604436229207, "grad_norm": 1.9247922897338867, "learning_rate": 1.6018181818181816e-07, "log_odds_chosen": 1.4689146280288696, "log_odds_ratio": -0.3932505249977112, "logits/chosen": 2.040996789932251, "logits/rejected": 2.0966296195983887, "logps/chosen": -0.2887535095214844, "logps/rejected": -0.8648843765258789, "loss": 1.0325, "nll_loss": 0.993179440498352, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02887534908950329, "rewards/margins": 0.05761308595538139, "rewards/rejected": -0.08648844808340073, "step": 1520 }, { "epoch": 2.1122920517560075, "grad_norm": 2.162506103515625, "learning_rate": 1.5927272727272726e-07, "log_odds_chosen": 1.542865514755249, "log_odds_ratio": -0.32878366112709045, "logits/chosen": 2.064614772796631, "logits/rejected": 2.1328229904174805, "logps/chosen": -0.2473837435245514, "logps/rejected": -0.8912278413772583, "loss": 1.0342, "nll_loss": 1.001328706741333, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.02473837323486805, "rewards/margins": 0.06438441574573517, "rewards/rejected": -0.08912278711795807, "step": 1525 }, { "epoch": 2.1192236598890943, "grad_norm": 2.9879612922668457, "learning_rate": 1.5836363636363636e-07, "log_odds_chosen": 1.6084933280944824, "log_odds_ratio": -0.3255232870578766, "logits/chosen": 2.105452537536621, "logits/rejected": 2.161074638366699, "logps/chosen": -0.3203544020652771, "logps/rejected": -1.0134159326553345, "loss": 1.0447, "nll_loss": 1.0121761560440063, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.03203544020652771, "rewards/margins": 0.0693061575293541, "rewards/rejected": -0.1013416051864624, "step": 1530 }, { "epoch": 2.126155268022181, "grad_norm": 3.2565925121307373, "learning_rate": 1.5745454545454543e-07, "log_odds_chosen": 1.522952675819397, "log_odds_ratio": -0.3534841239452362, "logits/chosen": 2.0590784549713135, "logits/rejected": 2.1140329837799072, "logps/chosen": -0.26037055253982544, "logps/rejected": -0.8886002898216248, "loss": 0.986, "nll_loss": 0.9506571888923645, "rewards/accuracies": 0.875, "rewards/chosen": -0.026037057861685753, "rewards/margins": 0.06282297521829605, "rewards/rejected": -0.08886002749204636, "step": 1535 }, { "epoch": 2.133086876155268, "grad_norm": 2.2923009395599365, "learning_rate": 1.5654545454545453e-07, "log_odds_chosen": 1.7157777547836304, "log_odds_ratio": -0.31327977776527405, "logits/chosen": 2.0170087814331055, "logits/rejected": 2.061363935470581, "logps/chosen": -0.2875458300113678, "logps/rejected": -1.1021568775177002, "loss": 1.0536, "nll_loss": 1.022287130355835, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02875458262860775, "rewards/margins": 0.08146108686923981, "rewards/rejected": -0.1102156713604927, "step": 1540 }, { "epoch": 2.1400184842883547, "grad_norm": 4.337259769439697, "learning_rate": 1.5563636363636362e-07, "log_odds_chosen": 1.6073641777038574, "log_odds_ratio": -0.32192662358283997, "logits/chosen": 2.0638022422790527, "logits/rejected": 2.128871440887451, "logps/chosen": -0.24944542348384857, "logps/rejected": -0.8842807412147522, "loss": 0.9852, "nll_loss": 0.9530341625213623, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.024944543838500977, "rewards/margins": 0.06348354369401932, "rewards/rejected": -0.0884280651807785, "step": 1545 }, { "epoch": 2.146950092421442, "grad_norm": 1.788140058517456, "learning_rate": 1.5472727272727272e-07, "log_odds_chosen": 1.9304050207138062, "log_odds_ratio": -0.2865446209907532, "logits/chosen": 2.093130588531494, "logits/rejected": 2.1594488620758057, "logps/chosen": -0.26757973432540894, "logps/rejected": -1.1472647190093994, "loss": 1.032, "nll_loss": 1.003312349319458, "rewards/accuracies": 0.875, "rewards/chosen": -0.026757972314953804, "rewards/margins": 0.08796848356723785, "rewards/rejected": -0.1147264614701271, "step": 1550 }, { "epoch": 2.1538817005545288, "grad_norm": 2.651644706726074, "learning_rate": 1.538181818181818e-07, "log_odds_chosen": 1.6535481214523315, "log_odds_ratio": -0.34444233775138855, "logits/chosen": 2.0424141883850098, "logits/rejected": 2.0861897468566895, "logps/chosen": -0.24586957693099976, "logps/rejected": -0.9815968871116638, "loss": 1.0189, "nll_loss": 0.9844585061073303, "rewards/accuracies": 0.7833333611488342, "rewards/chosen": -0.024586956948041916, "rewards/margins": 0.07357273995876312, "rewards/rejected": -0.09815969318151474, "step": 1555 }, { "epoch": 2.1608133086876156, "grad_norm": 1.644753336906433, "learning_rate": 1.529090909090909e-07, "log_odds_chosen": 1.670788049697876, "log_odds_ratio": -0.32996612787246704, "logits/chosen": 2.052905321121216, "logits/rejected": 2.1133363246917725, "logps/chosen": -0.2544824182987213, "logps/rejected": -0.9820945858955383, "loss": 1.0313, "nll_loss": 0.9983068704605103, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.02544824406504631, "rewards/margins": 0.07276120781898499, "rewards/rejected": -0.09820946305990219, "step": 1560 }, { "epoch": 2.1677449168207024, "grad_norm": 6.568633079528809, "learning_rate": 1.5199999999999998e-07, "log_odds_chosen": 1.5977410078048706, "log_odds_ratio": -0.3366919457912445, "logits/chosen": 1.9728270769119263, "logits/rejected": 2.0407023429870605, "logps/chosen": -0.28399333357810974, "logps/rejected": -0.991985559463501, "loss": 0.9979, "nll_loss": 0.9642470479011536, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.028399331495165825, "rewards/margins": 0.07079920917749405, "rewards/rejected": -0.09919854253530502, "step": 1565 }, { "epoch": 2.174676524953789, "grad_norm": 1.5433924198150635, "learning_rate": 1.5109090909090908e-07, "log_odds_chosen": 1.749664306640625, "log_odds_ratio": -0.3313737213611603, "logits/chosen": 2.0340335369110107, "logits/rejected": 2.1193594932556152, "logps/chosen": -0.28188708424568176, "logps/rejected": -1.0578782558441162, "loss": 1.0097, "nll_loss": 0.9766021370887756, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.028188709169626236, "rewards/margins": 0.07759912312030792, "rewards/rejected": -0.10578783601522446, "step": 1570 }, { "epoch": 2.181608133086876, "grad_norm": 2.6189091205596924, "learning_rate": 1.5018181818181815e-07, "log_odds_chosen": 1.6792007684707642, "log_odds_ratio": -0.3193473219871521, "logits/chosen": 2.0829341411590576, "logits/rejected": 2.135345697402954, "logps/chosen": -0.28347066044807434, "logps/rejected": -1.055553913116455, "loss": 1.0235, "nll_loss": 0.9915785193443298, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.028347067534923553, "rewards/margins": 0.07720831781625748, "rewards/rejected": -0.10555538535118103, "step": 1575 }, { "epoch": 2.188539741219963, "grad_norm": 3.955164670944214, "learning_rate": 1.4927272727272725e-07, "log_odds_chosen": 1.7316787242889404, "log_odds_ratio": -0.34139496088027954, "logits/chosen": 2.0785231590270996, "logits/rejected": 2.1350691318511963, "logps/chosen": -0.3276236951351166, "logps/rejected": -1.071801781654358, "loss": 1.0001, "nll_loss": 0.9659791588783264, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.03276237100362778, "rewards/margins": 0.07441780716180801, "rewards/rejected": -0.10718018561601639, "step": 1580 }, { "epoch": 2.19547134935305, "grad_norm": 2.1189846992492676, "learning_rate": 1.4836363636363635e-07, "log_odds_chosen": 1.6613436937332153, "log_odds_ratio": -0.30669593811035156, "logits/chosen": 2.053230047225952, "logits/rejected": 2.096280097961426, "logps/chosen": -0.29614728689193726, "logps/rejected": -0.9985705018043518, "loss": 1.0128, "nll_loss": 0.9821043610572815, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.029614729806780815, "rewards/margins": 0.07024230808019638, "rewards/rejected": -0.09985704720020294, "step": 1585 }, { "epoch": 2.202402957486137, "grad_norm": 2.127919912338257, "learning_rate": 1.4745454545454544e-07, "log_odds_chosen": 1.785252332687378, "log_odds_ratio": -0.3034234941005707, "logits/chosen": 2.0558953285217285, "logits/rejected": 2.1106560230255127, "logps/chosen": -0.24885861575603485, "logps/rejected": -1.0158021450042725, "loss": 0.9837, "nll_loss": 0.9533202648162842, "rewards/accuracies": 0.875, "rewards/chosen": -0.024885861203074455, "rewards/margins": 0.07669434696435928, "rewards/rejected": -0.10158021748065948, "step": 1590 }, { "epoch": 2.2093345656192236, "grad_norm": 1.5249942541122437, "learning_rate": 1.4654545454545454e-07, "log_odds_chosen": 1.6279139518737793, "log_odds_ratio": -0.318685919046402, "logits/chosen": 1.9436652660369873, "logits/rejected": 2.0145747661590576, "logps/chosen": -0.2697173058986664, "logps/rejected": -0.9779646992683411, "loss": 1.0295, "nll_loss": 0.9976499080657959, "rewards/accuracies": 0.875, "rewards/chosen": -0.02697172947227955, "rewards/margins": 0.07082473486661911, "rewards/rejected": -0.0977964699268341, "step": 1595 }, { "epoch": 2.2162661737523104, "grad_norm": 1.8669286966323853, "learning_rate": 1.456363636363636e-07, "log_odds_chosen": 1.5612767934799194, "log_odds_ratio": -0.33474841713905334, "logits/chosen": 2.109074831008911, "logits/rejected": 2.1647109985351562, "logps/chosen": -0.3184746503829956, "logps/rejected": -0.9675430655479431, "loss": 1.0129, "nll_loss": 0.9794076681137085, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.03184746578335762, "rewards/margins": 0.06490684300661087, "rewards/rejected": -0.09675431996583939, "step": 1600 }, { "epoch": 2.223197781885397, "grad_norm": 3.105397939682007, "learning_rate": 1.447272727272727e-07, "log_odds_chosen": 1.330115556716919, "log_odds_ratio": -0.4384697377681732, "logits/chosen": 1.977685809135437, "logits/rejected": 2.0224320888519287, "logps/chosen": -0.32018712162971497, "logps/rejected": -0.841995894908905, "loss": 0.9974, "nll_loss": 0.9535176157951355, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.03201870992779732, "rewards/margins": 0.05218088626861572, "rewards/rejected": -0.08419959247112274, "step": 1605 }, { "epoch": 2.2301293900184844, "grad_norm": 2.435016393661499, "learning_rate": 1.438181818181818e-07, "log_odds_chosen": 1.5461903810501099, "log_odds_ratio": -0.37018927931785583, "logits/chosen": 2.003584146499634, "logits/rejected": 2.068721294403076, "logps/chosen": -0.2579006850719452, "logps/rejected": -0.9120422601699829, "loss": 0.9777, "nll_loss": 0.9407215118408203, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.025790071114897728, "rewards/margins": 0.06541414558887482, "rewards/rejected": -0.0912042185664177, "step": 1610 }, { "epoch": 2.2370609981515712, "grad_norm": 2.2165729999542236, "learning_rate": 1.429090909090909e-07, "log_odds_chosen": 1.4898757934570312, "log_odds_ratio": -0.36625441908836365, "logits/chosen": 2.0889649391174316, "logits/rejected": 2.1458566188812256, "logps/chosen": -0.2834877669811249, "logps/rejected": -0.8642836809158325, "loss": 1.0307, "nll_loss": 0.994057834148407, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.028348777443170547, "rewards/margins": 0.05807959660887718, "rewards/rejected": -0.08642836660146713, "step": 1615 }, { "epoch": 2.243992606284658, "grad_norm": 2.29976224899292, "learning_rate": 1.4199999999999997e-07, "log_odds_chosen": 1.654101848602295, "log_odds_ratio": -0.3310154378414154, "logits/chosen": 1.9520740509033203, "logits/rejected": 1.9996510744094849, "logps/chosen": -0.29835817217826843, "logps/rejected": -1.0397998094558716, "loss": 1.051, "nll_loss": 1.0178704261779785, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.02983582206070423, "rewards/margins": 0.074144147336483, "rewards/rejected": -0.10397996753454208, "step": 1620 }, { "epoch": 2.250924214417745, "grad_norm": 2.7785146236419678, "learning_rate": 1.4109090909090907e-07, "log_odds_chosen": 1.701553225517273, "log_odds_ratio": -0.27737200260162354, "logits/chosen": 2.0182878971099854, "logits/rejected": 2.088885545730591, "logps/chosen": -0.2814914286136627, "logps/rejected": -1.0081548690795898, "loss": 1.0094, "nll_loss": 0.9816693067550659, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.02814914472401142, "rewards/margins": 0.07266633957624435, "rewards/rejected": -0.10081546753644943, "step": 1625 }, { "epoch": 2.2578558225508316, "grad_norm": 1.6091256141662598, "learning_rate": 1.4018181818181817e-07, "log_odds_chosen": 1.6265534162521362, "log_odds_ratio": -0.3619597852230072, "logits/chosen": 1.9786964654922485, "logits/rejected": 2.02095365524292, "logps/chosen": -0.2642413377761841, "logps/rejected": -0.998843252658844, "loss": 1.0237, "nll_loss": 0.9874651432037354, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.026424136012792587, "rewards/margins": 0.07346019893884659, "rewards/rejected": -0.09988433867692947, "step": 1630 }, { "epoch": 2.264787430683919, "grad_norm": 2.9606359004974365, "learning_rate": 1.3927272727272727e-07, "log_odds_chosen": 1.4278662204742432, "log_odds_ratio": -0.37395817041397095, "logits/chosen": 2.1129722595214844, "logits/rejected": 2.1692874431610107, "logps/chosen": -0.30893474817276, "logps/rejected": -0.838131308555603, "loss": 1.0075, "nll_loss": 0.9701253771781921, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.03089348040521145, "rewards/margins": 0.052919652312994, "rewards/rejected": -0.0838131383061409, "step": 1635 }, { "epoch": 2.2717190388170057, "grad_norm": 2.276667356491089, "learning_rate": 1.3836363636363634e-07, "log_odds_chosen": 1.5527472496032715, "log_odds_ratio": -0.3658043444156647, "logits/chosen": 2.067870616912842, "logits/rejected": 2.1205027103424072, "logps/chosen": -0.2891118824481964, "logps/rejected": -0.9314442873001099, "loss": 1.0232, "nll_loss": 0.9866386651992798, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02891119197010994, "rewards/margins": 0.06423323601484299, "rewards/rejected": -0.09314444661140442, "step": 1640 }, { "epoch": 2.2786506469500925, "grad_norm": 1.4873467683792114, "learning_rate": 1.3745454545454543e-07, "log_odds_chosen": 1.851697564125061, "log_odds_ratio": -0.3145390450954437, "logits/chosen": 2.075488567352295, "logits/rejected": 2.1666271686553955, "logps/chosen": -0.2427607625722885, "logps/rejected": -1.01997709274292, "loss": 0.9927, "nll_loss": 0.9612413048744202, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.02427607588469982, "rewards/margins": 0.07772162556648254, "rewards/rejected": -0.10199771821498871, "step": 1645 }, { "epoch": 2.2855822550831792, "grad_norm": 2.837634325027466, "learning_rate": 1.3654545454545453e-07, "log_odds_chosen": 1.7820488214492798, "log_odds_ratio": -0.31967195868492126, "logits/chosen": 2.0722880363464355, "logits/rejected": 2.1255042552948, "logps/chosen": -0.3070850968360901, "logps/rejected": -1.0848478078842163, "loss": 1.0299, "nll_loss": 0.9979235529899597, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.030708512291312218, "rewards/margins": 0.07777624577283859, "rewards/rejected": -0.10848478227853775, "step": 1650 }, { "epoch": 2.292513863216266, "grad_norm": 1.572646975517273, "learning_rate": 1.3563636363636363e-07, "log_odds_chosen": 1.8489151000976562, "log_odds_ratio": -0.2767617404460907, "logits/chosen": 2.077411413192749, "logits/rejected": 2.1307966709136963, "logps/chosen": -0.2723609209060669, "logps/rejected": -1.0447889566421509, "loss": 0.9829, "nll_loss": 0.9551786184310913, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.0272360946983099, "rewards/margins": 0.07724279910326004, "rewards/rejected": -0.10447890311479568, "step": 1655 }, { "epoch": 2.299445471349353, "grad_norm": 2.2417216300964355, "learning_rate": 1.347272727272727e-07, "log_odds_chosen": 1.674991488456726, "log_odds_ratio": -0.30424702167510986, "logits/chosen": 1.9423750638961792, "logits/rejected": 2.0181448459625244, "logps/chosen": -0.26656848192214966, "logps/rejected": -0.947805643081665, "loss": 1.0604, "nll_loss": 1.0300124883651733, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.026656849309802055, "rewards/margins": 0.0681237280368805, "rewards/rejected": -0.0947805717587471, "step": 1660 }, { "epoch": 2.30637707948244, "grad_norm": 2.700683355331421, "learning_rate": 1.338181818181818e-07, "log_odds_chosen": 1.6018115282058716, "log_odds_ratio": -0.32080212235450745, "logits/chosen": 2.066725969314575, "logits/rejected": 2.1303725242614746, "logps/chosen": -0.25507083535194397, "logps/rejected": -0.9004265069961548, "loss": 0.9929, "nll_loss": 0.9608381390571594, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.025507085025310516, "rewards/margins": 0.06453555822372437, "rewards/rejected": -0.09004264324903488, "step": 1665 }, { "epoch": 2.313308687615527, "grad_norm": 2.3125734329223633, "learning_rate": 1.329090909090909e-07, "log_odds_chosen": 1.7538138628005981, "log_odds_ratio": -0.30055293440818787, "logits/chosen": 2.082395315170288, "logits/rejected": 2.138796091079712, "logps/chosen": -0.32292428612709045, "logps/rejected": -1.132580280303955, "loss": 1.0183, "nll_loss": 0.9882605671882629, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.032292433083057404, "rewards/margins": 0.08096561580896378, "rewards/rejected": -0.11325804144144058, "step": 1670 }, { "epoch": 2.3202402957486137, "grad_norm": 2.303008794784546, "learning_rate": 1.32e-07, "log_odds_chosen": 1.66550874710083, "log_odds_ratio": -0.32220420241355896, "logits/chosen": 2.0094358921051025, "logits/rejected": 2.0630240440368652, "logps/chosen": -0.28716641664505005, "logps/rejected": -1.0477604866027832, "loss": 1.0112, "nll_loss": 0.9789758324623108, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.028716640546917915, "rewards/margins": 0.07605940848588943, "rewards/rejected": -0.1047760397195816, "step": 1675 }, { "epoch": 2.3271719038817005, "grad_norm": 3.235823631286621, "learning_rate": 1.3109090909090906e-07, "log_odds_chosen": 1.7851775884628296, "log_odds_ratio": -0.29265934228897095, "logits/chosen": 2.0131328105926514, "logits/rejected": 2.072057008743286, "logps/chosen": -0.30348002910614014, "logps/rejected": -1.1123188734054565, "loss": 1.0679, "nll_loss": 1.0386704206466675, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.030348004773259163, "rewards/margins": 0.08088389039039612, "rewards/rejected": -0.11123190075159073, "step": 1680 }, { "epoch": 2.3341035120147873, "grad_norm": 2.9141244888305664, "learning_rate": 1.3018181818181816e-07, "log_odds_chosen": 1.7565422058105469, "log_odds_ratio": -0.3614824414253235, "logits/chosen": 2.1100566387176514, "logits/rejected": 2.167602777481079, "logps/chosen": -0.3503415882587433, "logps/rejected": -1.2140240669250488, "loss": 1.0574, "nll_loss": 1.021295428276062, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.03503415733575821, "rewards/margins": 0.08636824786663055, "rewards/rejected": -0.12140240520238876, "step": 1685 }, { "epoch": 2.3410351201478745, "grad_norm": 2.8215250968933105, "learning_rate": 1.2927272727272726e-07, "log_odds_chosen": 1.7545371055603027, "log_odds_ratio": -0.30033814907073975, "logits/chosen": 1.996227741241455, "logits/rejected": 2.0622332096099854, "logps/chosen": -0.2831988036632538, "logps/rejected": -1.0355448722839355, "loss": 0.9776, "nll_loss": 0.9475898742675781, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.02831987477838993, "rewards/margins": 0.07523461431264877, "rewards/rejected": -0.10355449467897415, "step": 1690 }, { "epoch": 2.3479667282809613, "grad_norm": 1.334836721420288, "learning_rate": 1.2836363636363635e-07, "log_odds_chosen": 1.8632985353469849, "log_odds_ratio": -0.23800452053546906, "logits/chosen": 2.006606101989746, "logits/rejected": 2.080357789993286, "logps/chosen": -0.2609540820121765, "logps/rejected": -1.0780720710754395, "loss": 0.996, "nll_loss": 0.9722317457199097, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.02609540894627571, "rewards/margins": 0.0817117914557457, "rewards/rejected": -0.1078072041273117, "step": 1695 }, { "epoch": 2.354898336414048, "grad_norm": 2.684661626815796, "learning_rate": 1.2745454545454545e-07, "log_odds_chosen": 1.8242162466049194, "log_odds_ratio": -0.2849119305610657, "logits/chosen": 1.953218698501587, "logits/rejected": 2.0185937881469727, "logps/chosen": -0.25621822476387024, "logps/rejected": -1.050632357597351, "loss": 0.9865, "nll_loss": 0.9579840898513794, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.025621820241212845, "rewards/margins": 0.07944142073392868, "rewards/rejected": -0.10506324470043182, "step": 1700 }, { "epoch": 2.361829944547135, "grad_norm": 1.7048418521881104, "learning_rate": 1.2654545454545452e-07, "log_odds_chosen": 1.9969215393066406, "log_odds_ratio": -0.25461483001708984, "logits/chosen": 1.9886645078659058, "logits/rejected": 2.0469629764556885, "logps/chosen": -0.2665863335132599, "logps/rejected": -1.1525532007217407, "loss": 1.0313, "nll_loss": 1.0058410167694092, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02665863186120987, "rewards/margins": 0.08859668672084808, "rewards/rejected": -0.11525531858205795, "step": 1705 }, { "epoch": 2.3687615526802217, "grad_norm": 2.232328414916992, "learning_rate": 1.2563636363636362e-07, "log_odds_chosen": 1.9676387310028076, "log_odds_ratio": -0.2715602517127991, "logits/chosen": 1.936298131942749, "logits/rejected": 1.9897048473358154, "logps/chosen": -0.2537732422351837, "logps/rejected": -1.1118028163909912, "loss": 1.0124, "nll_loss": 0.9852831363677979, "rewards/accuracies": 0.875, "rewards/chosen": -0.025377323850989342, "rewards/margins": 0.08580294251441956, "rewards/rejected": -0.11118026822805405, "step": 1710 }, { "epoch": 2.3756931608133085, "grad_norm": 3.159862518310547, "learning_rate": 1.2472727272727272e-07, "log_odds_chosen": 1.9476854801177979, "log_odds_ratio": -0.2814669609069824, "logits/chosen": 1.9453091621398926, "logits/rejected": 2.018049478530884, "logps/chosen": -0.2795754075050354, "logps/rejected": -1.1083685159683228, "loss": 1.0349, "nll_loss": 1.0067722797393799, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.02795754000544548, "rewards/margins": 0.08287932723760605, "rewards/rejected": -0.11083687096834183, "step": 1715 }, { "epoch": 2.3826247689463957, "grad_norm": 2.8082034587860107, "learning_rate": 1.238181818181818e-07, "log_odds_chosen": 1.8380389213562012, "log_odds_ratio": -0.3166138231754303, "logits/chosen": 2.0408785343170166, "logits/rejected": 2.11739182472229, "logps/chosen": -0.28606662154197693, "logps/rejected": -1.1519672870635986, "loss": 1.0073, "nll_loss": 0.9756883978843689, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.02860666997730732, "rewards/margins": 0.08659005165100098, "rewards/rejected": -0.11519671976566315, "step": 1720 }, { "epoch": 2.3895563770794825, "grad_norm": 2.500714063644409, "learning_rate": 1.2290909090909088e-07, "log_odds_chosen": 1.6278916597366333, "log_odds_ratio": -0.3502149283885956, "logits/chosen": 2.0757570266723633, "logits/rejected": 2.1220290660858154, "logps/chosen": -0.31332165002822876, "logps/rejected": -1.052827000617981, "loss": 1.003, "nll_loss": 0.9680219888687134, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.031332165002822876, "rewards/margins": 0.07395053654909134, "rewards/rejected": -0.10528270155191422, "step": 1725 }, { "epoch": 2.3964879852125693, "grad_norm": 2.743722915649414, "learning_rate": 1.2199999999999998e-07, "log_odds_chosen": 1.4381171464920044, "log_odds_ratio": -0.43818411231040955, "logits/chosen": 2.0334222316741943, "logits/rejected": 2.081953287124634, "logps/chosen": -0.3282889723777771, "logps/rejected": -0.9676831364631653, "loss": 1.0602, "nll_loss": 1.0163487195968628, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.03282889723777771, "rewards/margins": 0.0639394223690033, "rewards/rejected": -0.0967683270573616, "step": 1730 }, { "epoch": 2.403419593345656, "grad_norm": 2.034834146499634, "learning_rate": 1.2109090909090908e-07, "log_odds_chosen": 1.7440353631973267, "log_odds_ratio": -0.2908443510532379, "logits/chosen": 2.1094157695770264, "logits/rejected": 2.1522154808044434, "logps/chosen": -0.2517443597316742, "logps/rejected": -0.9762896299362183, "loss": 0.9974, "nll_loss": 0.9682726263999939, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.02517443709075451, "rewards/margins": 0.0724545270204544, "rewards/rejected": -0.09762895852327347, "step": 1735 }, { "epoch": 2.410351201478743, "grad_norm": 4.332306861877441, "learning_rate": 1.2018181818181818e-07, "log_odds_chosen": 1.5381571054458618, "log_odds_ratio": -0.3476658761501312, "logits/chosen": 2.057114362716675, "logits/rejected": 2.090787410736084, "logps/chosen": -0.276753693819046, "logps/rejected": -0.9296085834503174, "loss": 1.0436, "nll_loss": 1.0088648796081543, "rewards/accuracies": 0.875, "rewards/chosen": -0.027675366029143333, "rewards/margins": 0.06528548896312714, "rewards/rejected": -0.09296084940433502, "step": 1740 }, { "epoch": 2.41728280961183, "grad_norm": 3.1289680004119873, "learning_rate": 1.1927272727272725e-07, "log_odds_chosen": 1.5321450233459473, "log_odds_ratio": -0.3635505437850952, "logits/chosen": 2.098928928375244, "logits/rejected": 2.1394617557525635, "logps/chosen": -0.32607170939445496, "logps/rejected": -1.0010368824005127, "loss": 1.0101, "nll_loss": 0.973716676235199, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.032607175409793854, "rewards/margins": 0.06749651581048965, "rewards/rejected": -0.10010368376970291, "step": 1745 }, { "epoch": 2.424214417744917, "grad_norm": 2.292997360229492, "learning_rate": 1.1836363636363636e-07, "log_odds_chosen": 1.6794272661209106, "log_odds_ratio": -0.3062840700149536, "logits/chosen": 1.9977962970733643, "logits/rejected": 2.0764520168304443, "logps/chosen": -0.24340610206127167, "logps/rejected": -0.9343698620796204, "loss": 0.9467, "nll_loss": 0.9160255193710327, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.024340612813830376, "rewards/margins": 0.0690963938832283, "rewards/rejected": -0.09343700110912323, "step": 1750 }, { "epoch": 2.4311460258780038, "grad_norm": 2.060739517211914, "learning_rate": 1.1745454545454545e-07, "log_odds_chosen": 1.7372018098831177, "log_odds_ratio": -0.36456355452537537, "logits/chosen": 2.081760883331299, "logits/rejected": 2.139793634414673, "logps/chosen": -0.30359095335006714, "logps/rejected": -1.0979803800582886, "loss": 1.0152, "nll_loss": 0.9787145853042603, "rewards/accuracies": 0.875, "rewards/chosen": -0.030359093099832535, "rewards/margins": 0.0794389396905899, "rewards/rejected": -0.10979804396629333, "step": 1755 }, { "epoch": 2.4380776340110906, "grad_norm": 2.298625946044922, "learning_rate": 1.1654545454545455e-07, "log_odds_chosen": 1.6793102025985718, "log_odds_ratio": -0.335517019033432, "logits/chosen": 2.0144755840301514, "logits/rejected": 2.075939416885376, "logps/chosen": -0.2551276385784149, "logps/rejected": -0.9466179013252258, "loss": 1.0231, "nll_loss": 0.9895772337913513, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02551276609301567, "rewards/margins": 0.06914903223514557, "rewards/rejected": -0.09466180205345154, "step": 1760 }, { "epoch": 2.4450092421441774, "grad_norm": 1.3807379007339478, "learning_rate": 1.1563636363636362e-07, "log_odds_chosen": 1.695957064628601, "log_odds_ratio": -0.33206743001937866, "logits/chosen": 2.0311429500579834, "logits/rejected": 2.0982778072357178, "logps/chosen": -0.25683820247650146, "logps/rejected": -0.978820264339447, "loss": 1.019, "nll_loss": 0.9857791066169739, "rewards/accuracies": 0.875, "rewards/chosen": -0.025683818385004997, "rewards/margins": 0.07219821959733963, "rewards/rejected": -0.09788203239440918, "step": 1765 }, { "epoch": 2.451940850277264, "grad_norm": 2.096445083618164, "learning_rate": 1.1472727272727272e-07, "log_odds_chosen": 1.7026692628860474, "log_odds_ratio": -0.3184446692466736, "logits/chosen": 2.0503830909729004, "logits/rejected": 2.086923122406006, "logps/chosen": -0.27430155873298645, "logps/rejected": -0.9957641363143921, "loss": 1.0057, "nll_loss": 0.9738113880157471, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.027430152520537376, "rewards/margins": 0.07214626669883728, "rewards/rejected": -0.0995764285326004, "step": 1770 }, { "epoch": 2.458872458410351, "grad_norm": 2.9843761920928955, "learning_rate": 1.1381818181818182e-07, "log_odds_chosen": 1.8962608575820923, "log_odds_ratio": -0.2932147681713104, "logits/chosen": 2.1202423572540283, "logits/rejected": 2.1624252796173096, "logps/chosen": -0.2599133849143982, "logps/rejected": -1.1326204538345337, "loss": 1.0537, "nll_loss": 1.0243782997131348, "rewards/accuracies": 0.875, "rewards/chosen": -0.025991341099143028, "rewards/margins": 0.08727072179317474, "rewards/rejected": -0.11326204240322113, "step": 1775 }, { "epoch": 2.465804066543438, "grad_norm": 2.571378469467163, "learning_rate": 1.1290909090909091e-07, "log_odds_chosen": 2.0019185543060303, "log_odds_ratio": -0.24575158953666687, "logits/chosen": 2.049402952194214, "logits/rejected": 2.103079080581665, "logps/chosen": -0.3088202476501465, "logps/rejected": -1.2870643138885498, "loss": 1.0222, "nll_loss": 0.997674822807312, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.03088202513754368, "rewards/margins": 0.09782441705465317, "rewards/rejected": -0.1287064403295517, "step": 1780 }, { "epoch": 2.472735674676525, "grad_norm": 1.9969513416290283, "learning_rate": 1.1200000000000001e-07, "log_odds_chosen": 1.6785210371017456, "log_odds_ratio": -0.3223731815814972, "logits/chosen": 2.0518710613250732, "logits/rejected": 2.1081185340881348, "logps/chosen": -0.2911817133426666, "logps/rejected": -1.0387407541275024, "loss": 0.9996, "nll_loss": 0.9674090147018433, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02911817468702793, "rewards/margins": 0.07475589960813522, "rewards/rejected": -0.1038740798830986, "step": 1785 }, { "epoch": 2.479667282809612, "grad_norm": 2.367443799972534, "learning_rate": 1.1109090909090908e-07, "log_odds_chosen": 1.6323319673538208, "log_odds_ratio": -0.35728973150253296, "logits/chosen": 1.9989386796951294, "logits/rejected": 2.0702216625213623, "logps/chosen": -0.2859072685241699, "logps/rejected": -0.9873275756835938, "loss": 0.9933, "nll_loss": 0.9575673341751099, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.02859073132276535, "rewards/margins": 0.07014203071594238, "rewards/rejected": -0.09873275458812714, "step": 1790 }, { "epoch": 2.4865988909426986, "grad_norm": 5.1550374031066895, "learning_rate": 1.1018181818181818e-07, "log_odds_chosen": 1.9188029766082764, "log_odds_ratio": -0.31065088510513306, "logits/chosen": 1.9901913404464722, "logits/rejected": 2.050278902053833, "logps/chosen": -0.3183686137199402, "logps/rejected": -1.161499261856079, "loss": 1.0106, "nll_loss": 0.9795438051223755, "rewards/accuracies": 0.875, "rewards/chosen": -0.0318368636071682, "rewards/margins": 0.0843130573630333, "rewards/rejected": -0.1161499172449112, "step": 1795 }, { "epoch": 2.4935304990757854, "grad_norm": 2.2873830795288086, "learning_rate": 1.0927272727272728e-07, "log_odds_chosen": 1.4898102283477783, "log_odds_ratio": -0.36497774720191956, "logits/chosen": 2.037238121032715, "logits/rejected": 2.0807337760925293, "logps/chosen": -0.29659178853034973, "logps/rejected": -1.0002473592758179, "loss": 1.0471, "nll_loss": 1.0105878114700317, "rewards/accuracies": 0.7916666865348816, "rewards/chosen": -0.029659178107976913, "rewards/margins": 0.0703655481338501, "rewards/rejected": -0.10002472996711731, "step": 1800 }, { "epoch": 2.5004621072088726, "grad_norm": 2.6431796550750732, "learning_rate": 1.0836363636363637e-07, "log_odds_chosen": 1.6469905376434326, "log_odds_ratio": -0.3312085270881653, "logits/chosen": 1.9593592882156372, "logits/rejected": 2.0358383655548096, "logps/chosen": -0.26151055097579956, "logps/rejected": -0.9036704301834106, "loss": 1.0059, "nll_loss": 0.9727994203567505, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.026151059195399284, "rewards/margins": 0.06421598047018051, "rewards/rejected": -0.09036703407764435, "step": 1805 }, { "epoch": 2.5073937153419594, "grad_norm": 2.9634170532226562, "learning_rate": 1.0745454545454544e-07, "log_odds_chosen": 1.5996876955032349, "log_odds_ratio": -0.33672866225242615, "logits/chosen": 1.9904649257659912, "logits/rejected": 2.0536324977874756, "logps/chosen": -0.32099291682243347, "logps/rejected": -1.0023874044418335, "loss": 1.0524, "nll_loss": 1.018733263015747, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03209929168224335, "rewards/margins": 0.0681394636631012, "rewards/rejected": -0.10023875534534454, "step": 1810 }, { "epoch": 2.5143253234750462, "grad_norm": 1.860707402229309, "learning_rate": 1.0654545454545454e-07, "log_odds_chosen": 1.8385961055755615, "log_odds_ratio": -0.34007883071899414, "logits/chosen": 1.9847419261932373, "logits/rejected": 2.0414199829101562, "logps/chosen": -0.3060314953327179, "logps/rejected": -1.150540828704834, "loss": 0.9906, "nll_loss": 0.9565935730934143, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.03060315176844597, "rewards/margins": 0.08445093035697937, "rewards/rejected": -0.11505408585071564, "step": 1815 }, { "epoch": 2.521256931608133, "grad_norm": 2.953806161880493, "learning_rate": 1.0563636363636364e-07, "log_odds_chosen": 1.6283913850784302, "log_odds_ratio": -0.3325265049934387, "logits/chosen": 2.0417227745056152, "logits/rejected": 2.104788064956665, "logps/chosen": -0.31526321172714233, "logps/rejected": -1.0568766593933105, "loss": 0.9896, "nll_loss": 0.9563248753547668, "rewards/accuracies": 0.875, "rewards/chosen": -0.031526319682598114, "rewards/margins": 0.0741613581776619, "rewards/rejected": -0.10568765550851822, "step": 1820 }, { "epoch": 2.52818853974122, "grad_norm": 2.1079516410827637, "learning_rate": 1.0472727272727273e-07, "log_odds_chosen": 1.5739433765411377, "log_odds_ratio": -0.33921295404434204, "logits/chosen": 2.122877359390259, "logits/rejected": 2.1718015670776367, "logps/chosen": -0.2680935859680176, "logps/rejected": -0.9542296528816223, "loss": 1.0209, "nll_loss": 0.986934244632721, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.026809358969330788, "rewards/margins": 0.06861360371112823, "rewards/rejected": -0.09542296081781387, "step": 1825 }, { "epoch": 2.5351201478743066, "grad_norm": 2.3235225677490234, "learning_rate": 1.038181818181818e-07, "log_odds_chosen": 1.7671153545379639, "log_odds_ratio": -0.3048493266105652, "logits/chosen": 2.0175423622131348, "logits/rejected": 2.1084630489349365, "logps/chosen": -0.28295522928237915, "logps/rejected": -1.078856348991394, "loss": 1.0093, "nll_loss": 0.9788612723350525, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.028295524418354034, "rewards/margins": 0.07959011197090149, "rewards/rejected": -0.10788564383983612, "step": 1830 }, { "epoch": 2.542051756007394, "grad_norm": 2.376753807067871, "learning_rate": 1.029090909090909e-07, "log_odds_chosen": 1.5308775901794434, "log_odds_ratio": -0.35065487027168274, "logits/chosen": 2.0661802291870117, "logits/rejected": 2.1190385818481445, "logps/chosen": -0.30774179100990295, "logps/rejected": -0.9961751103401184, "loss": 1.0005, "nll_loss": 0.9654229879379272, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.030774177983403206, "rewards/margins": 0.06884334981441498, "rewards/rejected": -0.09961751848459244, "step": 1835 }, { "epoch": 2.5489833641404807, "grad_norm": 1.1884440183639526, "learning_rate": 1.02e-07, "log_odds_chosen": 1.692187786102295, "log_odds_ratio": -0.35703638195991516, "logits/chosen": 2.050490379333496, "logits/rejected": 2.091343879699707, "logps/chosen": -0.26052388548851013, "logps/rejected": -1.032615065574646, "loss": 0.9831, "nll_loss": 0.9473720192909241, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.026052383705973625, "rewards/margins": 0.07720911502838135, "rewards/rejected": -0.10326149314641953, "step": 1840 }, { "epoch": 2.5559149722735675, "grad_norm": 2.3813302516937256, "learning_rate": 1.010909090909091e-07, "log_odds_chosen": 1.5822950601577759, "log_odds_ratio": -0.3297514021396637, "logits/chosen": 2.0444798469543457, "logits/rejected": 2.1010236740112305, "logps/chosen": -0.2959387004375458, "logps/rejected": -0.987533450126648, "loss": 1.0236, "nll_loss": 0.9906317591667175, "rewards/accuracies": 0.875, "rewards/chosen": -0.02959386818110943, "rewards/margins": 0.06915947794914246, "rewards/rejected": -0.09875334799289703, "step": 1845 }, { "epoch": 2.5628465804066543, "grad_norm": 2.9394869804382324, "learning_rate": 1.0018181818181817e-07, "log_odds_chosen": 1.702123761177063, "log_odds_ratio": -0.3088690936565399, "logits/chosen": 2.113861322402954, "logits/rejected": 2.1717820167541504, "logps/chosen": -0.3088254928588867, "logps/rejected": -1.0686160326004028, "loss": 1.0239, "nll_loss": 0.9929828643798828, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.03088255040347576, "rewards/margins": 0.07597906142473221, "rewards/rejected": -0.10686160624027252, "step": 1850 }, { "epoch": 2.5697781885397415, "grad_norm": 3.485311508178711, "learning_rate": 9.927272727272727e-08, "log_odds_chosen": 1.7402740716934204, "log_odds_ratio": -0.38107120990753174, "logits/chosen": 1.9880726337432861, "logits/rejected": 2.057579755783081, "logps/chosen": -0.2805604338645935, "logps/rejected": -1.0576139688491821, "loss": 0.9883, "nll_loss": 0.9501924514770508, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.02805604785680771, "rewards/margins": 0.07770536839962006, "rewards/rejected": -0.10576140880584717, "step": 1855 }, { "epoch": 2.5767097966728283, "grad_norm": 3.232532262802124, "learning_rate": 9.836363636363636e-08, "log_odds_chosen": 1.8129467964172363, "log_odds_ratio": -0.2785561978816986, "logits/chosen": 1.9095889329910278, "logits/rejected": 1.9935516119003296, "logps/chosen": -0.2174181193113327, "logps/rejected": -0.9398717880249023, "loss": 0.9843, "nll_loss": 0.9564692974090576, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.02174181304872036, "rewards/margins": 0.07224537432193756, "rewards/rejected": -0.09398718178272247, "step": 1860 }, { "epoch": 2.583641404805915, "grad_norm": 2.0778720378875732, "learning_rate": 9.745454545454545e-08, "log_odds_chosen": 1.8064855337142944, "log_odds_ratio": -0.3176063001155853, "logits/chosen": 2.1032285690307617, "logits/rejected": 2.154674768447876, "logps/chosen": -0.3073400855064392, "logps/rejected": -1.1221901178359985, "loss": 0.9987, "nll_loss": 0.9669729471206665, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.03073401190340519, "rewards/margins": 0.08148500323295593, "rewards/rejected": -0.11221900582313538, "step": 1865 }, { "epoch": 2.590573012939002, "grad_norm": 1.6975332498550415, "learning_rate": 9.654545454545454e-08, "log_odds_chosen": 1.6128000020980835, "log_odds_ratio": -0.33097171783447266, "logits/chosen": 2.0006532669067383, "logits/rejected": 2.056741237640381, "logps/chosen": -0.2736147940158844, "logps/rejected": -0.9443971514701843, "loss": 0.98, "nll_loss": 0.9469044804573059, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.02736147679388523, "rewards/margins": 0.06707824021577835, "rewards/rejected": -0.09443972259759903, "step": 1870 }, { "epoch": 2.5975046210720887, "grad_norm": 2.9151763916015625, "learning_rate": 9.563636363636364e-08, "log_odds_chosen": 2.098798990249634, "log_odds_ratio": -0.263777494430542, "logits/chosen": 1.9829503297805786, "logits/rejected": 2.0440664291381836, "logps/chosen": -0.2544497847557068, "logps/rejected": -1.2993313074111938, "loss": 1.015, "nll_loss": 0.9885779023170471, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.025444982573390007, "rewards/margins": 0.10448816418647766, "rewards/rejected": -0.12993313372135162, "step": 1875 }, { "epoch": 2.6044362292051755, "grad_norm": 1.4423106908798218, "learning_rate": 9.472727272727272e-08, "log_odds_chosen": 1.7735576629638672, "log_odds_ratio": -0.3009028136730194, "logits/chosen": 1.8770281076431274, "logits/rejected": 1.9749999046325684, "logps/chosen": -0.2571166455745697, "logps/rejected": -0.9384245276451111, "loss": 0.9065, "nll_loss": 0.8764019012451172, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.025711664929986, "rewards/margins": 0.06813079863786697, "rewards/rejected": -0.09384246915578842, "step": 1880 }, { "epoch": 2.6113678373382623, "grad_norm": 3.350883960723877, "learning_rate": 9.381818181818182e-08, "log_odds_chosen": 1.902999997138977, "log_odds_ratio": -0.27424463629722595, "logits/chosen": 2.0839245319366455, "logits/rejected": 2.14450740814209, "logps/chosen": -0.26829907298088074, "logps/rejected": -1.1070636510849, "loss": 1.0208, "nll_loss": 0.993401825428009, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.026829909533262253, "rewards/margins": 0.08387646079063416, "rewards/rejected": -0.11070636659860611, "step": 1885 }, { "epoch": 2.6182994454713495, "grad_norm": 3.9501917362213135, "learning_rate": 9.29090909090909e-08, "log_odds_chosen": 1.724169135093689, "log_odds_ratio": -0.3101595342159271, "logits/chosen": 2.021693229675293, "logits/rejected": 2.096151828765869, "logps/chosen": -0.2878502309322357, "logps/rejected": -1.0322964191436768, "loss": 1.0293, "nll_loss": 0.9983063340187073, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.028785018250346184, "rewards/margins": 0.07444461435079575, "rewards/rejected": -0.10322963446378708, "step": 1890 }, { "epoch": 2.6252310536044363, "grad_norm": 2.835116386413574, "learning_rate": 9.2e-08, "log_odds_chosen": 2.0075416564941406, "log_odds_ratio": -0.29768672585487366, "logits/chosen": 2.0260884761810303, "logits/rejected": 2.08447527885437, "logps/chosen": -0.2922082841396332, "logps/rejected": -1.2498011589050293, "loss": 1.0113, "nll_loss": 0.981542706489563, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.029220828786492348, "rewards/margins": 0.09575929492712021, "rewards/rejected": -0.12498010694980621, "step": 1895 }, { "epoch": 2.632162661737523, "grad_norm": 1.9952729940414429, "learning_rate": 9.109090909090909e-08, "log_odds_chosen": 2.203530788421631, "log_odds_ratio": -0.2388562709093094, "logits/chosen": 2.05356764793396, "logits/rejected": 2.108933925628662, "logps/chosen": -0.2701965272426605, "logps/rejected": -1.3429614305496216, "loss": 1.0337, "nll_loss": 1.00979745388031, "rewards/accuracies": 0.9333333373069763, "rewards/chosen": -0.027019653469324112, "rewards/margins": 0.10727646201848984, "rewards/rejected": -0.13429613411426544, "step": 1900 }, { "epoch": 2.63909426987061, "grad_norm": 2.688429594039917, "learning_rate": 9.018181818181818e-08, "log_odds_chosen": 1.6793392896652222, "log_odds_ratio": -0.3166213929653168, "logits/chosen": 1.9479715824127197, "logits/rejected": 2.0088744163513184, "logps/chosen": -0.27929723262786865, "logps/rejected": -0.9713757634162903, "loss": 1.0348, "nll_loss": 1.0030966997146606, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.027929725125432014, "rewards/margins": 0.06920785456895828, "rewards/rejected": -0.09713757783174515, "step": 1905 }, { "epoch": 2.6460258780036967, "grad_norm": 1.4949442148208618, "learning_rate": 8.927272727272727e-08, "log_odds_chosen": 1.9754811525344849, "log_odds_ratio": -0.30257752537727356, "logits/chosen": 1.9509350061416626, "logits/rejected": 2.0503389835357666, "logps/chosen": -0.27535703778266907, "logps/rejected": -1.118589162826538, "loss": 0.9724, "nll_loss": 0.9421722292900085, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.027535704895853996, "rewards/margins": 0.0843232050538063, "rewards/rejected": -0.11185891181230545, "step": 1910 }, { "epoch": 2.652957486136784, "grad_norm": 2.16941237449646, "learning_rate": 8.836363636363637e-08, "log_odds_chosen": 2.000943422317505, "log_odds_ratio": -0.2847401797771454, "logits/chosen": 2.106870412826538, "logits/rejected": 2.168245553970337, "logps/chosen": -0.26422372460365295, "logps/rejected": -1.217974305152893, "loss": 1.061, "nll_loss": 1.0324803590774536, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.026422372087836266, "rewards/margins": 0.09537507593631744, "rewards/rejected": -0.12179744988679886, "step": 1915 }, { "epoch": 2.6598890942698707, "grad_norm": 1.8085908889770508, "learning_rate": 8.745454545454545e-08, "log_odds_chosen": 1.5008604526519775, "log_odds_ratio": -0.38829174637794495, "logits/chosen": 1.9134535789489746, "logits/rejected": 1.9677314758300781, "logps/chosen": -0.2830255627632141, "logps/rejected": -0.9362450242042542, "loss": 1.0461, "nll_loss": 1.0073057413101196, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.02830255590379238, "rewards/margins": 0.06532195210456848, "rewards/rejected": -0.09362450987100601, "step": 1920 }, { "epoch": 2.6668207024029575, "grad_norm": 2.762589931488037, "learning_rate": 8.654545454545455e-08, "log_odds_chosen": 1.8519963026046753, "log_odds_ratio": -0.3259159326553345, "logits/chosen": 1.9644198417663574, "logits/rejected": 2.05434250831604, "logps/chosen": -0.2632545232772827, "logps/rejected": -1.0625907182693481, "loss": 1.0235, "nll_loss": 0.9909093976020813, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02632545307278633, "rewards/margins": 0.07993361353874207, "rewards/rejected": -0.1062590703368187, "step": 1925 }, { "epoch": 2.6737523105360443, "grad_norm": 2.3893611431121826, "learning_rate": 8.563636363636363e-08, "log_odds_chosen": 1.6310181617736816, "log_odds_ratio": -0.3340619206428528, "logits/chosen": 2.005711555480957, "logits/rejected": 2.067094087600708, "logps/chosen": -0.2860340476036072, "logps/rejected": -0.9216349124908447, "loss": 0.9937, "nll_loss": 0.9602577686309814, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02860340289771557, "rewards/margins": 0.06356008350849152, "rewards/rejected": -0.09216348081827164, "step": 1930 }, { "epoch": 2.680683918669131, "grad_norm": 1.9156088829040527, "learning_rate": 8.472727272727273e-08, "log_odds_chosen": 1.7571347951889038, "log_odds_ratio": -0.3111540377140045, "logits/chosen": 2.0473296642303467, "logits/rejected": 2.1130969524383545, "logps/chosen": -0.28812503814697266, "logps/rejected": -1.1086490154266357, "loss": 0.9945, "nll_loss": 0.9633785486221313, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.028812507167458534, "rewards/margins": 0.08205239474773407, "rewards/rejected": -0.11086489260196686, "step": 1935 }, { "epoch": 2.687615526802218, "grad_norm": 2.3184399604797363, "learning_rate": 8.381818181818181e-08, "log_odds_chosen": 1.6769403219223022, "log_odds_ratio": -0.3335730731487274, "logits/chosen": 1.9665719270706177, "logits/rejected": 2.030404567718506, "logps/chosen": -0.24711455404758453, "logps/rejected": -0.9258390069007874, "loss": 0.9979, "nll_loss": 0.9645217061042786, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.024711458012461662, "rewards/margins": 0.06787244230508804, "rewards/rejected": -0.09258389472961426, "step": 1940 }, { "epoch": 2.6945471349353047, "grad_norm": 1.7348381280899048, "learning_rate": 8.290909090909091e-08, "log_odds_chosen": 2.012420415878296, "log_odds_ratio": -0.2879267930984497, "logits/chosen": 2.080967426300049, "logits/rejected": 2.181638479232788, "logps/chosen": -0.2593296766281128, "logps/rejected": -1.197130560874939, "loss": 1.0093, "nll_loss": 0.9804985523223877, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.02593296393752098, "rewards/margins": 0.09378007054328918, "rewards/rejected": -0.11971304565668106, "step": 1945 }, { "epoch": 2.701478743068392, "grad_norm": 2.2291972637176514, "learning_rate": 8.199999999999999e-08, "log_odds_chosen": 1.8021572828292847, "log_odds_ratio": -0.2828613221645355, "logits/chosen": 1.9889678955078125, "logits/rejected": 2.0603299140930176, "logps/chosen": -0.27885323762893677, "logps/rejected": -1.020405888557434, "loss": 1.0091, "nll_loss": 0.9808385372161865, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.02788531966507435, "rewards/margins": 0.0741552785038948, "rewards/rejected": -0.10204058140516281, "step": 1950 }, { "epoch": 2.7084103512014788, "grad_norm": 3.511234998703003, "learning_rate": 8.109090909090909e-08, "log_odds_chosen": 1.6991480588912964, "log_odds_ratio": -0.3476136028766632, "logits/chosen": 2.0690817832946777, "logits/rejected": 2.131218671798706, "logps/chosen": -0.28822681307792664, "logps/rejected": -0.9959009885787964, "loss": 1.0083, "nll_loss": 0.9735735058784485, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.028822684660553932, "rewards/margins": 0.07076740264892578, "rewards/rejected": -0.09959009289741516, "step": 1955 }, { "epoch": 2.7153419593345656, "grad_norm": 1.731318712234497, "learning_rate": 8.018181818181817e-08, "log_odds_chosen": 1.9510962963104248, "log_odds_ratio": -0.2717619836330414, "logits/chosen": 2.0861499309539795, "logits/rejected": 2.1444475650787354, "logps/chosen": -0.26343804597854614, "logps/rejected": -1.1370905637741089, "loss": 1.0018, "nll_loss": 0.9746354222297668, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.026343801990151405, "rewards/margins": 0.08736524730920792, "rewards/rejected": -0.11370905488729477, "step": 1960 }, { "epoch": 2.7222735674676524, "grad_norm": 2.5520708560943604, "learning_rate": 7.927272727272727e-08, "log_odds_chosen": 1.883239507675171, "log_odds_ratio": -0.34364765882492065, "logits/chosen": 2.0899996757507324, "logits/rejected": 2.1520025730133057, "logps/chosen": -0.3178773820400238, "logps/rejected": -1.1890572309494019, "loss": 1.0465, "nll_loss": 1.012162208557129, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.03178774565458298, "rewards/margins": 0.087117999792099, "rewards/rejected": -0.11890573799610138, "step": 1965 }, { "epoch": 2.7292051756007396, "grad_norm": 3.0097408294677734, "learning_rate": 7.836363636363637e-08, "log_odds_chosen": 1.7459481954574585, "log_odds_ratio": -0.3089205026626587, "logits/chosen": 1.948202133178711, "logits/rejected": 2.030421733856201, "logps/chosen": -0.2865378260612488, "logps/rejected": -0.9788001775741577, "loss": 0.9699, "nll_loss": 0.9390251040458679, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.028653783723711967, "rewards/margins": 0.06922624260187149, "rewards/rejected": -0.09788002073764801, "step": 1970 }, { "epoch": 2.7361367837338264, "grad_norm": 2.334012746810913, "learning_rate": 7.745454545454545e-08, "log_odds_chosen": 1.6162441968917847, "log_odds_ratio": -0.3280728757381439, "logits/chosen": 2.022932767868042, "logits/rejected": 2.0705296993255615, "logps/chosen": -0.2768207788467407, "logps/rejected": -0.927074134349823, "loss": 0.9948, "nll_loss": 0.9620178937911987, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.027682077139616013, "rewards/margins": 0.06502533704042435, "rewards/rejected": -0.09270740300416946, "step": 1975 }, { "epoch": 2.743068391866913, "grad_norm": 3.4409821033477783, "learning_rate": 7.654545454545455e-08, "log_odds_chosen": 1.7293587923049927, "log_odds_ratio": -0.32593438029289246, "logits/chosen": 2.0177626609802246, "logits/rejected": 2.0882925987243652, "logps/chosen": -0.3067542612552643, "logps/rejected": -1.018390417098999, "loss": 1.004, "nll_loss": 0.9714083075523376, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.030675429850816727, "rewards/margins": 0.07116362452507019, "rewards/rejected": -0.10183904320001602, "step": 1980 }, { "epoch": 2.75, "grad_norm": 3.144928455352783, "learning_rate": 7.563636363636363e-08, "log_odds_chosen": 2.015221357345581, "log_odds_ratio": -0.24131697416305542, "logits/chosen": 2.0317463874816895, "logits/rejected": 2.112942934036255, "logps/chosen": -0.2766430974006653, "logps/rejected": -1.1748555898666382, "loss": 0.9986, "nll_loss": 0.974430501461029, "rewards/accuracies": 0.9416666626930237, "rewards/chosen": -0.027664311230182648, "rewards/margins": 0.0898212417960167, "rewards/rejected": -0.11748553812503815, "step": 1985 }, { "epoch": 2.756931608133087, "grad_norm": 2.3371379375457764, "learning_rate": 7.472727272727273e-08, "log_odds_chosen": 1.7855689525604248, "log_odds_ratio": -0.28874701261520386, "logits/chosen": 2.058995485305786, "logits/rejected": 2.1220133304595947, "logps/chosen": -0.23786862194538116, "logps/rejected": -0.8881834745407104, "loss": 0.9978, "nll_loss": 0.9689105153083801, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.023786865174770355, "rewards/margins": 0.06503147631883621, "rewards/rejected": -0.08881834149360657, "step": 1990 }, { "epoch": 2.7638632162661736, "grad_norm": 2.0242509841918945, "learning_rate": 7.381818181818182e-08, "log_odds_chosen": 1.881475806236267, "log_odds_ratio": -0.2716512382030487, "logits/chosen": 1.9721044301986694, "logits/rejected": 2.0286245346069336, "logps/chosen": -0.2365345060825348, "logps/rejected": -1.0640085935592651, "loss": 0.9904, "nll_loss": 0.9632561802864075, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.02365345135331154, "rewards/margins": 0.08274741470813751, "rewards/rejected": -0.10640083998441696, "step": 1995 }, { "epoch": 2.7707948243992604, "grad_norm": 3.5550692081451416, "learning_rate": 7.290909090909091e-08, "log_odds_chosen": 1.6393131017684937, "log_odds_ratio": -0.3566218316555023, "logits/chosen": 2.027010440826416, "logits/rejected": 2.1158926486968994, "logps/chosen": -0.2945045530796051, "logps/rejected": -0.9923899173736572, "loss": 0.9887, "nll_loss": 0.9530836343765259, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.02945045940577984, "rewards/margins": 0.06978853791952133, "rewards/rejected": -0.09923899918794632, "step": 2000 }, { "epoch": 2.7777264325323476, "grad_norm": 2.983398199081421, "learning_rate": 7.2e-08, "log_odds_chosen": 1.883139729499817, "log_odds_ratio": -0.2949954569339752, "logits/chosen": 2.0600686073303223, "logits/rejected": 2.1349780559539795, "logps/chosen": -0.27356693148612976, "logps/rejected": -1.1228362321853638, "loss": 1.0009, "nll_loss": 0.971444308757782, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.027356691658496857, "rewards/margins": 0.08492692559957504, "rewards/rejected": -0.1122836172580719, "step": 2005 }, { "epoch": 2.7846580406654344, "grad_norm": 2.232745885848999, "learning_rate": 7.10909090909091e-08, "log_odds_chosen": 1.8647377490997314, "log_odds_ratio": -0.31064775586128235, "logits/chosen": 2.072453260421753, "logits/rejected": 2.1425271034240723, "logps/chosen": -0.3136703670024872, "logps/rejected": -1.2263870239257812, "loss": 1.0013, "nll_loss": 0.9702617526054382, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.03136703744530678, "rewards/margins": 0.09127166122198105, "rewards/rejected": -0.12263870239257812, "step": 2010 }, { "epoch": 2.7915896487985212, "grad_norm": 2.4576735496520996, "learning_rate": 7.018181818181818e-08, "log_odds_chosen": 1.848351001739502, "log_odds_ratio": -0.300770103931427, "logits/chosen": 1.9794286489486694, "logits/rejected": 2.051739454269409, "logps/chosen": -0.2656658887863159, "logps/rejected": -1.1345840692520142, "loss": 1.0129, "nll_loss": 0.9827767610549927, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.026566587388515472, "rewards/margins": 0.08689180761575699, "rewards/rejected": -0.11345840990543365, "step": 2015 }, { "epoch": 2.798521256931608, "grad_norm": 4.359684467315674, "learning_rate": 6.927272727272727e-08, "log_odds_chosen": 1.7028800249099731, "log_odds_ratio": -0.3596075177192688, "logits/chosen": 2.0374951362609863, "logits/rejected": 2.0811033248901367, "logps/chosen": -0.26721158623695374, "logps/rejected": -1.0201528072357178, "loss": 1.0472, "nll_loss": 1.011243462562561, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.026721160858869553, "rewards/margins": 0.0752941220998764, "rewards/rejected": -0.10201527923345566, "step": 2020 }, { "epoch": 2.8054528650646953, "grad_norm": 1.5897514820098877, "learning_rate": 6.836363636363636e-08, "log_odds_chosen": 1.7618235349655151, "log_odds_ratio": -0.2725900709629059, "logits/chosen": 1.951567530632019, "logits/rejected": 2.018179416656494, "logps/chosen": -0.250274121761322, "logps/rejected": -0.964055061340332, "loss": 0.9872, "nll_loss": 0.9599834680557251, "rewards/accuracies": 0.9333333373069763, "rewards/chosen": -0.025027411058545113, "rewards/margins": 0.07137809693813324, "rewards/rejected": -0.0964054986834526, "step": 2025 }, { "epoch": 2.812384473197782, "grad_norm": 2.3347268104553223, "learning_rate": 6.745454545454546e-08, "log_odds_chosen": 1.8136812448501587, "log_odds_ratio": -0.3173971176147461, "logits/chosen": 2.0132126808166504, "logits/rejected": 2.0880587100982666, "logps/chosen": -0.3025325536727905, "logps/rejected": -1.1345620155334473, "loss": 1.0012, "nll_loss": 0.9694395661354065, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.030253252014517784, "rewards/margins": 0.08320295065641403, "rewards/rejected": -0.11345621198415756, "step": 2030 }, { "epoch": 2.819316081330869, "grad_norm": 1.9864964485168457, "learning_rate": 6.654545454545454e-08, "log_odds_chosen": 1.8496549129486084, "log_odds_ratio": -0.29193249344825745, "logits/chosen": 1.9852664470672607, "logits/rejected": 2.046905994415283, "logps/chosen": -0.23638120293617249, "logps/rejected": -1.0749742984771729, "loss": 0.9969, "nll_loss": 0.9677135348320007, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.023638121783733368, "rewards/margins": 0.08385932445526123, "rewards/rejected": -0.107497438788414, "step": 2035 }, { "epoch": 2.8262476894639557, "grad_norm": 2.766561269760132, "learning_rate": 6.563636363636364e-08, "log_odds_chosen": 2.029188632965088, "log_odds_ratio": -0.26917028427124023, "logits/chosen": 2.03354549407959, "logits/rejected": 2.1179850101470947, "logps/chosen": -0.24409012496471405, "logps/rejected": -1.1629306077957153, "loss": 1.0074, "nll_loss": 0.9805120825767517, "rewards/accuracies": 0.875, "rewards/chosen": -0.024409016594290733, "rewards/margins": 0.09188403934240341, "rewards/rejected": -0.1162930577993393, "step": 2040 }, { "epoch": 2.8331792975970425, "grad_norm": 3.011481523513794, "learning_rate": 6.472727272727272e-08, "log_odds_chosen": 1.706537127494812, "log_odds_ratio": -0.4006377160549164, "logits/chosen": 2.1181445121765137, "logits/rejected": 2.156266689300537, "logps/chosen": -0.3777162432670593, "logps/rejected": -1.2636988162994385, "loss": 1.0484, "nll_loss": 1.0083175897598267, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03777162730693817, "rewards/margins": 0.08859825879335403, "rewards/rejected": -0.1263698935508728, "step": 2045 }, { "epoch": 2.8401109057301293, "grad_norm": 3.116852283477783, "learning_rate": 6.381818181818182e-08, "log_odds_chosen": 1.7585667371749878, "log_odds_ratio": -0.3240087330341339, "logits/chosen": 1.9366189241409302, "logits/rejected": 2.018488883972168, "logps/chosen": -0.2796057164669037, "logps/rejected": -0.9981705546379089, "loss": 0.9852, "nll_loss": 0.9527918100357056, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027960574254393578, "rewards/margins": 0.07185646891593933, "rewards/rejected": -0.09981703758239746, "step": 2050 }, { "epoch": 2.847042513863216, "grad_norm": 1.7142726182937622, "learning_rate": 6.290909090909092e-08, "log_odds_chosen": 1.9744784832000732, "log_odds_ratio": -0.24878713488578796, "logits/chosen": 1.9690757989883423, "logits/rejected": 2.040696859359741, "logps/chosen": -0.3136541545391083, "logps/rejected": -1.1877192258834839, "loss": 0.9877, "nll_loss": 0.9628265500068665, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.031365420669317245, "rewards/margins": 0.08740650117397308, "rewards/rejected": -0.11877192556858063, "step": 2055 }, { "epoch": 2.8539741219963033, "grad_norm": 1.3764642477035522, "learning_rate": 6.2e-08, "log_odds_chosen": 1.8966907262802124, "log_odds_ratio": -0.2863107919692993, "logits/chosen": 2.0856635570526123, "logits/rejected": 2.136650800704956, "logps/chosen": -0.3037610948085785, "logps/rejected": -1.1746824979782104, "loss": 1.0304, "nll_loss": 1.0017729997634888, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.03037611022591591, "rewards/margins": 0.08709214627742767, "rewards/rejected": -0.11746825277805328, "step": 2060 }, { "epoch": 2.86090573012939, "grad_norm": 2.363109827041626, "learning_rate": 6.10909090909091e-08, "log_odds_chosen": 1.8390313386917114, "log_odds_ratio": -0.30428022146224976, "logits/chosen": 1.9853383302688599, "logits/rejected": 2.032965898513794, "logps/chosen": -0.250918447971344, "logps/rejected": -1.0042502880096436, "loss": 0.9942, "nll_loss": 0.9637566804885864, "rewards/accuracies": 0.875, "rewards/chosen": -0.02509184740483761, "rewards/margins": 0.07533318549394608, "rewards/rejected": -0.10042501986026764, "step": 2065 }, { "epoch": 2.867837338262477, "grad_norm": 2.9388301372528076, "learning_rate": 6.018181818181818e-08, "log_odds_chosen": 1.841733694076538, "log_odds_ratio": -0.2979178726673126, "logits/chosen": 1.9924938678741455, "logits/rejected": 2.053075075149536, "logps/chosen": -0.2633225917816162, "logps/rejected": -1.0852948427200317, "loss": 0.9875, "nll_loss": 0.9577153921127319, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02633226290345192, "rewards/margins": 0.08219723403453827, "rewards/rejected": -0.10852950066328049, "step": 2070 }, { "epoch": 2.8747689463955637, "grad_norm": 2.952969789505005, "learning_rate": 5.927272727272727e-08, "log_odds_chosen": 1.6144490242004395, "log_odds_ratio": -0.35894396901130676, "logits/chosen": 2.0224180221557617, "logits/rejected": 2.0612215995788574, "logps/chosen": -0.26091185212135315, "logps/rejected": -1.0284487009048462, "loss": 1.0073, "nll_loss": 0.9714316725730896, "rewards/accuracies": 0.8583333492279053, "rewards/chosen": -0.026091186329722404, "rewards/margins": 0.07675368338823318, "rewards/rejected": -0.10284487158060074, "step": 2075 }, { "epoch": 2.8817005545286505, "grad_norm": 2.8261444568634033, "learning_rate": 5.836363636363636e-08, "log_odds_chosen": 1.8391097784042358, "log_odds_ratio": -0.3189569413661957, "logits/chosen": 1.9929203987121582, "logits/rejected": 2.0488264560699463, "logps/chosen": -0.2942853271961212, "logps/rejected": -1.1487162113189697, "loss": 1.02, "nll_loss": 0.988071858882904, "rewards/accuracies": 0.875, "rewards/chosen": -0.029428532347083092, "rewards/margins": 0.08544307202100754, "rewards/rejected": -0.11487161368131638, "step": 2080 }, { "epoch": 2.8886321626617377, "grad_norm": 4.116847991943359, "learning_rate": 5.745454545454545e-08, "log_odds_chosen": 1.619594931602478, "log_odds_ratio": -0.3377019762992859, "logits/chosen": 2.075233221054077, "logits/rejected": 2.119717836380005, "logps/chosen": -0.2857421040534973, "logps/rejected": -0.9990529417991638, "loss": 1.0049, "nll_loss": 0.9711350202560425, "rewards/accuracies": 0.8333333134651184, "rewards/chosen": -0.028574209660291672, "rewards/margins": 0.07133107632398605, "rewards/rejected": -0.09990529716014862, "step": 2085 }, { "epoch": 2.8955637707948245, "grad_norm": 2.5412161350250244, "learning_rate": 5.654545454545454e-08, "log_odds_chosen": 1.8909341096878052, "log_odds_ratio": -0.33492469787597656, "logits/chosen": 2.021022081375122, "logits/rejected": 2.0802001953125, "logps/chosen": -0.28026703000068665, "logps/rejected": -1.1536551713943481, "loss": 1.0157, "nll_loss": 0.982164740562439, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.028026705607771873, "rewards/margins": 0.08733881264925003, "rewards/rejected": -0.11536551266908646, "step": 2090 }, { "epoch": 2.9024953789279113, "grad_norm": 2.7214314937591553, "learning_rate": 5.563636363636364e-08, "log_odds_chosen": 1.7071201801300049, "log_odds_ratio": -0.31407538056373596, "logits/chosen": 1.9751737117767334, "logits/rejected": 2.022892475128174, "logps/chosen": -0.29637211561203003, "logps/rejected": -1.0419337749481201, "loss": 1.0243, "nll_loss": 0.9928818941116333, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029637213796377182, "rewards/margins": 0.07455617934465408, "rewards/rejected": -0.10419339686632156, "step": 2095 }, { "epoch": 2.909426987060998, "grad_norm": 2.559386730194092, "learning_rate": 5.4727272727272724e-08, "log_odds_chosen": 1.7541364431381226, "log_odds_ratio": -0.3207100033760071, "logits/chosen": 2.0054821968078613, "logits/rejected": 2.0676021575927734, "logps/chosen": -0.29729416966438293, "logps/rejected": -1.0183097124099731, "loss": 0.975, "nll_loss": 0.9429475665092468, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.029729416593909264, "rewards/margins": 0.07210154831409454, "rewards/rejected": -0.10183095932006836, "step": 2100 }, { "epoch": 2.916358595194085, "grad_norm": 3.168686866760254, "learning_rate": 5.381818181818182e-08, "log_odds_chosen": 1.8241220712661743, "log_odds_ratio": -0.3224296271800995, "logits/chosen": 2.084487199783325, "logits/rejected": 2.151993989944458, "logps/chosen": -0.298380970954895, "logps/rejected": -1.1077054738998413, "loss": 1.0205, "nll_loss": 0.9882618188858032, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02983810193836689, "rewards/margins": 0.08093245327472687, "rewards/rejected": -0.11077055335044861, "step": 2105 }, { "epoch": 2.9232902033271717, "grad_norm": 4.917242527008057, "learning_rate": 5.2909090909090905e-08, "log_odds_chosen": 1.6183534860610962, "log_odds_ratio": -0.3984599709510803, "logits/chosen": 2.01122784614563, "logits/rejected": 2.090132236480713, "logps/chosen": -0.31952646374702454, "logps/rejected": -0.9697479009628296, "loss": 1.012, "nll_loss": 0.9721961617469788, "rewards/accuracies": 0.8166666626930237, "rewards/chosen": -0.03195264935493469, "rewards/margins": 0.06502215564250946, "rewards/rejected": -0.09697480499744415, "step": 2110 }, { "epoch": 2.9302218114602585, "grad_norm": 2.700791120529175, "learning_rate": 5.2e-08, "log_odds_chosen": 1.6960736513137817, "log_odds_ratio": -0.3477153778076172, "logits/chosen": 2.015334129333496, "logits/rejected": 2.0682668685913086, "logps/chosen": -0.26093918085098267, "logps/rejected": -1.0459142923355103, "loss": 1.0222, "nll_loss": 0.9874255657196045, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.026093924418091774, "rewards/margins": 0.078497514128685, "rewards/rejected": -0.10459142178297043, "step": 2115 }, { "epoch": 2.9371534195933457, "grad_norm": 3.925088405609131, "learning_rate": 5.1090909090909086e-08, "log_odds_chosen": 1.4773210287094116, "log_odds_ratio": -0.41333526372909546, "logits/chosen": 2.0795083045959473, "logits/rejected": 2.115938901901245, "logps/chosen": -0.34735921025276184, "logps/rejected": -0.9831670522689819, "loss": 1.0538, "nll_loss": 1.0124287605285645, "rewards/accuracies": 0.8083333373069763, "rewards/chosen": -0.034735921770334244, "rewards/margins": 0.06358078867197037, "rewards/rejected": -0.09831671416759491, "step": 2120 }, { "epoch": 2.9440850277264325, "grad_norm": 3.097028970718384, "learning_rate": 5.0181818181818184e-08, "log_odds_chosen": 1.7917529344558716, "log_odds_ratio": -0.3308008909225464, "logits/chosen": 1.9779587984085083, "logits/rejected": 2.0592360496520996, "logps/chosen": -0.2952510714530945, "logps/rejected": -1.067455530166626, "loss": 1.0471, "nll_loss": 1.0139851570129395, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.02952510491013527, "rewards/margins": 0.07722045481204987, "rewards/rejected": -0.10674557089805603, "step": 2125 }, { "epoch": 2.9510166358595193, "grad_norm": 2.174734354019165, "learning_rate": 4.9272727272727274e-08, "log_odds_chosen": 1.7724860906600952, "log_odds_ratio": -0.3275219798088074, "logits/chosen": 2.0415663719177246, "logits/rejected": 2.103684425354004, "logps/chosen": -0.27193596959114075, "logps/rejected": -1.0836893320083618, "loss": 0.9898, "nll_loss": 0.9570819139480591, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.027193596586585045, "rewards/margins": 0.08117534220218658, "rewards/rejected": -0.10836894065141678, "step": 2130 }, { "epoch": 2.957948243992606, "grad_norm": 1.5665240287780762, "learning_rate": 4.8363636363636365e-08, "log_odds_chosen": 1.7095661163330078, "log_odds_ratio": -0.34748879075050354, "logits/chosen": 1.98307466506958, "logits/rejected": 2.0420520305633545, "logps/chosen": -0.32826271653175354, "logps/rejected": -1.0827791690826416, "loss": 1.0023, "nll_loss": 0.9675683975219727, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.032826270908117294, "rewards/margins": 0.07545164227485657, "rewards/rejected": -0.10827790945768356, "step": 2135 }, { "epoch": 2.9648798521256934, "grad_norm": 3.9010303020477295, "learning_rate": 4.7454545454545455e-08, "log_odds_chosen": 1.8488551378250122, "log_odds_ratio": -0.3088344633579254, "logits/chosen": 2.000403881072998, "logits/rejected": 2.080737352371216, "logps/chosen": -0.298979252576828, "logps/rejected": -1.1432158946990967, "loss": 1.0063, "nll_loss": 0.9754161238670349, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.02989793010056019, "rewards/margins": 0.08442366868257523, "rewards/rejected": -0.11432159692049026, "step": 2140 }, { "epoch": 2.97181146025878, "grad_norm": 2.5673093795776367, "learning_rate": 4.6545454545454546e-08, "log_odds_chosen": 1.8592909574508667, "log_odds_ratio": -0.26686161756515503, "logits/chosen": 2.063077688217163, "logits/rejected": 2.1509907245635986, "logps/chosen": -0.2621632218360901, "logps/rejected": -1.0871644020080566, "loss": 1.0334, "nll_loss": 1.0067225694656372, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.02621631696820259, "rewards/margins": 0.08250012993812561, "rewards/rejected": -0.1087164580821991, "step": 2145 }, { "epoch": 2.978743068391867, "grad_norm": 2.2342443466186523, "learning_rate": 4.5636363636363637e-08, "log_odds_chosen": 1.6307332515716553, "log_odds_ratio": -0.3424622416496277, "logits/chosen": 2.004580497741699, "logits/rejected": 2.060046434402466, "logps/chosen": -0.3020874261856079, "logps/rejected": -1.084688663482666, "loss": 0.9816, "nll_loss": 0.9473193883895874, "rewards/accuracies": 0.875, "rewards/chosen": -0.03020874410867691, "rewards/margins": 0.07826013118028641, "rewards/rejected": -0.10846886783838272, "step": 2150 }, { "epoch": 2.9856746765249538, "grad_norm": 3.1599783897399902, "learning_rate": 4.472727272727273e-08, "log_odds_chosen": 2.105912685394287, "log_odds_ratio": -0.27635109424591064, "logits/chosen": 2.068650245666504, "logits/rejected": 2.123730182647705, "logps/chosen": -0.2944362461566925, "logps/rejected": -1.2996933460235596, "loss": 1.0106, "nll_loss": 0.9829762578010559, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.029443617910146713, "rewards/margins": 0.10052569955587387, "rewards/rejected": -0.12996931374073029, "step": 2155 }, { "epoch": 2.9926062846580406, "grad_norm": 3.6730105876922607, "learning_rate": 4.381818181818182e-08, "log_odds_chosen": 1.8076189756393433, "log_odds_ratio": -0.2718731462955475, "logits/chosen": 2.034381628036499, "logits/rejected": 2.103024959564209, "logps/chosen": -0.2844863831996918, "logps/rejected": -1.0949420928955078, "loss": 1.0227, "nll_loss": 0.9955376982688904, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.028448637574911118, "rewards/margins": 0.08104557543992996, "rewards/rejected": -0.10949420928955078, "step": 2160 }, { "epoch": 2.9995378927911274, "grad_norm": 3.9206016063690186, "learning_rate": 4.290909090909091e-08, "log_odds_chosen": 1.8349543809890747, "log_odds_ratio": -0.27683863043785095, "logits/chosen": 2.036572217941284, "logits/rejected": 2.089813232421875, "logps/chosen": -0.27643078565597534, "logps/rejected": -1.0756059885025024, "loss": 0.9799, "nll_loss": 0.9521928429603577, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.027643078938126564, "rewards/margins": 0.0799175277352333, "rewards/rejected": -0.10756059736013412, "step": 2165 }, { "epoch": 3.0055452865064693, "grad_norm": 1.9081456661224365, "learning_rate": 4.2e-08, "log_odds_chosen": 1.7460095882415771, "log_odds_ratio": -0.32449567317962646, "logits/chosen": 2.021742820739746, "logits/rejected": 2.1034204959869385, "logps/chosen": -0.27256032824516296, "logps/rejected": -1.0215450525283813, "loss": 0.8395, "nll_loss": 0.9364208579063416, "rewards/accuracies": 0.8621795177459717, "rewards/chosen": -0.027256034314632416, "rewards/margins": 0.07489847391843796, "rewards/rejected": -0.10215452313423157, "step": 2170 }, { "epoch": 3.0124768946395566, "grad_norm": 2.545027017593384, "learning_rate": 4.109090909090909e-08, "log_odds_chosen": 1.8810588121414185, "log_odds_ratio": -0.2728542387485504, "logits/chosen": 2.072154998779297, "logits/rejected": 2.1284384727478027, "logps/chosen": -0.28162866830825806, "logps/rejected": -1.0774617195129395, "loss": 1.017, "nll_loss": 0.9897640347480774, "rewards/accuracies": 0.9333333373069763, "rewards/chosen": -0.028162868693470955, "rewards/margins": 0.0795833095908165, "rewards/rejected": -0.1077461913228035, "step": 2175 }, { "epoch": 3.0194085027726434, "grad_norm": 1.6765599250793457, "learning_rate": 4.018181818181818e-08, "log_odds_chosen": 1.817507028579712, "log_odds_ratio": -0.27471134066581726, "logits/chosen": 1.970469355583191, "logits/rejected": 2.050422191619873, "logps/chosen": -0.2666342556476593, "logps/rejected": -1.0124282836914062, "loss": 0.9833, "nll_loss": 0.9558401703834534, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.02666342444717884, "rewards/margins": 0.0745794028043747, "rewards/rejected": -0.10124283283948898, "step": 2180 }, { "epoch": 3.02634011090573, "grad_norm": 1.7218668460845947, "learning_rate": 3.927272727272727e-08, "log_odds_chosen": 1.917079210281372, "log_odds_ratio": -0.24975134432315826, "logits/chosen": 2.128929853439331, "logits/rejected": 2.173793077468872, "logps/chosen": -0.29144442081451416, "logps/rejected": -1.118105173110962, "loss": 1.0045, "nll_loss": 0.9795462489128113, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.029144441708922386, "rewards/margins": 0.08266608417034149, "rewards/rejected": -0.11181053519248962, "step": 2185 }, { "epoch": 3.033271719038817, "grad_norm": 2.8524911403656006, "learning_rate": 3.836363636363636e-08, "log_odds_chosen": 1.9781666994094849, "log_odds_ratio": -0.24611227214336395, "logits/chosen": 2.036329746246338, "logits/rejected": 2.1263298988342285, "logps/chosen": -0.2536916136741638, "logps/rejected": -1.1110204458236694, "loss": 0.9873, "nll_loss": 0.9627164602279663, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.025369159877300262, "rewards/margins": 0.08573289960622787, "rewards/rejected": -0.11110205203294754, "step": 2190 }, { "epoch": 3.040203327171904, "grad_norm": 2.266507387161255, "learning_rate": 3.745454545454546e-08, "log_odds_chosen": 1.695191502571106, "log_odds_ratio": -0.32812872529029846, "logits/chosen": 1.9582918882369995, "logits/rejected": 2.0455925464630127, "logps/chosen": -0.23305271565914154, "logps/rejected": -0.9286764860153198, "loss": 1.0022, "nll_loss": 0.969412088394165, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.023305270820856094, "rewards/margins": 0.06956236064434052, "rewards/rejected": -0.0928676426410675, "step": 2195 }, { "epoch": 3.0471349353049906, "grad_norm": 2.823397159576416, "learning_rate": 3.654545454545455e-08, "log_odds_chosen": 1.8539453744888306, "log_odds_ratio": -0.31512120366096497, "logits/chosen": 1.9888670444488525, "logits/rejected": 2.056990623474121, "logps/chosen": -0.2506329417228699, "logps/rejected": -1.078035593032837, "loss": 0.97, "nll_loss": 0.9385051727294922, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.025063293054699898, "rewards/margins": 0.08274027705192566, "rewards/rejected": -0.10780356079339981, "step": 2200 }, { "epoch": 3.054066543438078, "grad_norm": 2.515324354171753, "learning_rate": 3.563636363636364e-08, "log_odds_chosen": 2.2040858268737793, "log_odds_ratio": -0.2215387225151062, "logits/chosen": 2.07570743560791, "logits/rejected": 2.1536285877227783, "logps/chosen": -0.26311811804771423, "logps/rejected": -1.3125801086425781, "loss": 1.0534, "nll_loss": 1.03126060962677, "rewards/accuracies": 0.9416666626930237, "rewards/chosen": -0.026311814785003662, "rewards/margins": 0.10494618117809296, "rewards/rejected": -0.1312580108642578, "step": 2205 }, { "epoch": 3.0609981515711646, "grad_norm": 2.884657382965088, "learning_rate": 3.472727272727273e-08, "log_odds_chosen": 1.8875175714492798, "log_odds_ratio": -0.26213160157203674, "logits/chosen": 1.9864482879638672, "logits/rejected": 2.0323519706726074, "logps/chosen": -0.28199702501296997, "logps/rejected": -1.1133620738983154, "loss": 1.0323, "nll_loss": 1.006096363067627, "rewards/accuracies": 0.9333333373069763, "rewards/chosen": -0.028199700638651848, "rewards/margins": 0.08313652873039246, "rewards/rejected": -0.11133621633052826, "step": 2210 }, { "epoch": 3.0679297597042514, "grad_norm": 5.779941082000732, "learning_rate": 3.381818181818182e-08, "log_odds_chosen": 1.598193883895874, "log_odds_ratio": -0.3541763126850128, "logits/chosen": 2.0120930671691895, "logits/rejected": 2.105281114578247, "logps/chosen": -0.32157793641090393, "logps/rejected": -0.9493343830108643, "loss": 1.0617, "nll_loss": 1.0262763500213623, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.032157786190509796, "rewards/margins": 0.06277565658092499, "rewards/rejected": -0.09493346512317657, "step": 2215 }, { "epoch": 3.074861367837338, "grad_norm": 1.6296344995498657, "learning_rate": 3.290909090909091e-08, "log_odds_chosen": 2.0749495029449463, "log_odds_ratio": -0.2285868376493454, "logits/chosen": 1.9751384258270264, "logits/rejected": 2.0483574867248535, "logps/chosen": -0.22339893877506256, "logps/rejected": -1.1327685117721558, "loss": 0.9692, "nll_loss": 0.946365475654602, "rewards/accuracies": 0.9666666388511658, "rewards/chosen": -0.022339891642332077, "rewards/margins": 0.09093696624040604, "rewards/rejected": -0.11327686160802841, "step": 2220 }, { "epoch": 3.081792975970425, "grad_norm": 5.483705997467041, "learning_rate": 3.2e-08, "log_odds_chosen": 1.910452127456665, "log_odds_ratio": -0.2830710709095001, "logits/chosen": 2.040644645690918, "logits/rejected": 2.113460063934326, "logps/chosen": -0.2728864550590515, "logps/rejected": -1.134534239768982, "loss": 1.0302, "nll_loss": 1.0018789768218994, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.0272886510938406, "rewards/margins": 0.08616478741168976, "rewards/rejected": -0.11345343291759491, "step": 2225 }, { "epoch": 3.088724584103512, "grad_norm": 3.0457351207733154, "learning_rate": 3.109090909090909e-08, "log_odds_chosen": 1.8377028703689575, "log_odds_ratio": -0.2788829207420349, "logits/chosen": 1.994458794593811, "logits/rejected": 2.058776378631592, "logps/chosen": -0.2481241673231125, "logps/rejected": -1.0059267282485962, "loss": 0.9581, "nll_loss": 0.9301670789718628, "rewards/accuracies": 0.9083333611488342, "rewards/chosen": -0.024812418967485428, "rewards/margins": 0.07578025758266449, "rewards/rejected": -0.10059265792369843, "step": 2230 }, { "epoch": 3.095656192236599, "grad_norm": 1.5724313259124756, "learning_rate": 3.018181818181818e-08, "log_odds_chosen": 1.8434357643127441, "log_odds_ratio": -0.2810656428337097, "logits/chosen": 2.0633890628814697, "logits/rejected": 2.108525276184082, "logps/chosen": -0.27151191234588623, "logps/rejected": -1.0885863304138184, "loss": 0.9965, "nll_loss": 0.9684168696403503, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.027151191607117653, "rewards/margins": 0.08170744776725769, "rewards/rejected": -0.1088586375117302, "step": 2235 }, { "epoch": 3.102587800369686, "grad_norm": 2.0384411811828613, "learning_rate": 2.927272727272727e-08, "log_odds_chosen": 1.992503046989441, "log_odds_ratio": -0.2679726183414459, "logits/chosen": 2.0843007564544678, "logits/rejected": 2.149775505065918, "logps/chosen": -0.2585987150669098, "logps/rejected": -1.1711468696594238, "loss": 0.9832, "nll_loss": 0.9563843607902527, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.02585986815392971, "rewards/margins": 0.0912548154592514, "rewards/rejected": -0.11711468547582626, "step": 2240 }, { "epoch": 3.1095194085027726, "grad_norm": 1.6383320093154907, "learning_rate": 2.836363636363636e-08, "log_odds_chosen": 1.9366320371627808, "log_odds_ratio": -0.29981935024261475, "logits/chosen": 2.024559259414673, "logits/rejected": 2.1117780208587646, "logps/chosen": -0.28914347290992737, "logps/rejected": -1.1294143199920654, "loss": 0.9738, "nll_loss": 0.943781316280365, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.02891434356570244, "rewards/margins": 0.08402708917856216, "rewards/rejected": -0.1129414513707161, "step": 2245 }, { "epoch": 3.1164510166358594, "grad_norm": 2.1520862579345703, "learning_rate": 2.745454545454545e-08, "log_odds_chosen": 1.9886517524719238, "log_odds_ratio": -0.26550471782684326, "logits/chosen": 1.966781497001648, "logits/rejected": 2.0406601428985596, "logps/chosen": -0.25236350297927856, "logps/rejected": -1.1123831272125244, "loss": 0.9942, "nll_loss": 0.9676342010498047, "rewards/accuracies": 0.9416666626930237, "rewards/chosen": -0.025236355140805244, "rewards/margins": 0.08600196242332458, "rewards/rejected": -0.11123832315206528, "step": 2250 }, { "epoch": 3.1233826247689462, "grad_norm": 2.3597357273101807, "learning_rate": 2.6545454545454542e-08, "log_odds_chosen": 1.6445980072021484, "log_odds_ratio": -0.3386446535587311, "logits/chosen": 2.031057834625244, "logits/rejected": 2.0886597633361816, "logps/chosen": -0.2981587052345276, "logps/rejected": -1.0017781257629395, "loss": 1.0319, "nll_loss": 0.9980849027633667, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.02981586940586567, "rewards/margins": 0.07036194205284119, "rewards/rejected": -0.1001778170466423, "step": 2255 }, { "epoch": 3.1303142329020335, "grad_norm": 2.875710964202881, "learning_rate": 2.5636363636363633e-08, "log_odds_chosen": 2.163516044616699, "log_odds_ratio": -0.2714638113975525, "logits/chosen": 2.009424924850464, "logits/rejected": 2.0776355266571045, "logps/chosen": -0.25366875529289246, "logps/rejected": -1.265039324760437, "loss": 0.9918, "nll_loss": 0.9646516442298889, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.025366876274347305, "rewards/margins": 0.10113705694675446, "rewards/rejected": -0.12650392949581146, "step": 2260 }, { "epoch": 3.1372458410351203, "grad_norm": 2.339726686477661, "learning_rate": 2.4727272727272727e-08, "log_odds_chosen": 1.7720131874084473, "log_odds_ratio": -0.3246005177497864, "logits/chosen": 2.034062623977661, "logits/rejected": 2.0977277755737305, "logps/chosen": -0.27337199449539185, "logps/rejected": -1.0653187036514282, "loss": 0.988, "nll_loss": 0.9555687308311462, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.027337197214365005, "rewards/margins": 0.07919467240571976, "rewards/rejected": -0.10653186589479446, "step": 2265 }, { "epoch": 3.144177449168207, "grad_norm": 3.5936927795410156, "learning_rate": 2.3818181818181817e-08, "log_odds_chosen": 1.9385422468185425, "log_odds_ratio": -0.3190802335739136, "logits/chosen": 1.9812796115875244, "logits/rejected": 2.03690767288208, "logps/chosen": -0.2825137674808502, "logps/rejected": -1.2010154724121094, "loss": 1.0062, "nll_loss": 0.9742683172225952, "rewards/accuracies": 0.875, "rewards/chosen": -0.02825137972831726, "rewards/margins": 0.09185018390417099, "rewards/rejected": -0.12010155618190765, "step": 2270 }, { "epoch": 3.151109057301294, "grad_norm": 2.3216748237609863, "learning_rate": 2.2909090909090908e-08, "log_odds_chosen": 1.849832534790039, "log_odds_ratio": -0.30438894033432007, "logits/chosen": 2.030111789703369, "logits/rejected": 2.0937530994415283, "logps/chosen": -0.3096204400062561, "logps/rejected": -1.1795369386672974, "loss": 1.0247, "nll_loss": 0.994240939617157, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.030962049961090088, "rewards/margins": 0.08699165284633636, "rewards/rejected": -0.11795369535684586, "step": 2275 }, { "epoch": 3.1580406654343807, "grad_norm": 5.251974582672119, "learning_rate": 2.2e-08, "log_odds_chosen": 1.73758864402771, "log_odds_ratio": -0.3142777681350708, "logits/chosen": 2.018113851547241, "logits/rejected": 2.0766327381134033, "logps/chosen": -0.25756967067718506, "logps/rejected": -1.0410569906234741, "loss": 0.9954, "nll_loss": 0.9640125036239624, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.025756964460015297, "rewards/margins": 0.07834872603416443, "rewards/rejected": -0.10410568863153458, "step": 2280 }, { "epoch": 3.1649722735674675, "grad_norm": 2.160755157470703, "learning_rate": 2.109090909090909e-08, "log_odds_chosen": 1.7930853366851807, "log_odds_ratio": -0.31781256198883057, "logits/chosen": 1.9471144676208496, "logits/rejected": 2.0215981006622314, "logps/chosen": -0.3217299282550812, "logps/rejected": -1.0115313529968262, "loss": 0.98, "nll_loss": 0.9482495784759521, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03217298910021782, "rewards/margins": 0.0689801424741745, "rewards/rejected": -0.10115313529968262, "step": 2285 }, { "epoch": 3.1719038817005547, "grad_norm": 2.8207828998565674, "learning_rate": 2.018181818181818e-08, "log_odds_chosen": 1.7178608179092407, "log_odds_ratio": -0.351553738117218, "logits/chosen": 1.946655035018921, "logits/rejected": 2.0031888484954834, "logps/chosen": -0.2946816682815552, "logps/rejected": -1.0428930521011353, "loss": 1.0444, "nll_loss": 1.009276270866394, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.029468165710568428, "rewards/margins": 0.07482115179300308, "rewards/rejected": -0.10428932309150696, "step": 2290 }, { "epoch": 3.1788354898336415, "grad_norm": 3.0449793338775635, "learning_rate": 1.927272727272727e-08, "log_odds_chosen": 1.8244539499282837, "log_odds_ratio": -0.3448461890220642, "logits/chosen": 2.010988712310791, "logits/rejected": 2.0648319721221924, "logps/chosen": -0.29256051778793335, "logps/rejected": -1.0924314260482788, "loss": 0.9988, "nll_loss": 0.9642786383628845, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.029256051406264305, "rewards/margins": 0.07998708635568619, "rewards/rejected": -0.10924313217401505, "step": 2295 }, { "epoch": 3.1857670979667283, "grad_norm": 1.2735953330993652, "learning_rate": 1.836363636363636e-08, "log_odds_chosen": 2.074246406555176, "log_odds_ratio": -0.23903319239616394, "logits/chosen": 2.0425198078155518, "logits/rejected": 2.104451894760132, "logps/chosen": -0.2704167068004608, "logps/rejected": -1.1976983547210693, "loss": 0.9924, "nll_loss": 0.9685426950454712, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.02704167179763317, "rewards/margins": 0.09272817522287369, "rewards/rejected": -0.11976984143257141, "step": 2300 }, { "epoch": 3.192698706099815, "grad_norm": 2.2513015270233154, "learning_rate": 1.7454545454545455e-08, "log_odds_chosen": 1.8696075677871704, "log_odds_ratio": -0.31109151244163513, "logits/chosen": 1.982120156288147, "logits/rejected": 2.0289342403411865, "logps/chosen": -0.2669697701931, "logps/rejected": -1.0606690645217896, "loss": 1.0334, "nll_loss": 1.0023095607757568, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.026696979999542236, "rewards/margins": 0.0793699249625206, "rewards/rejected": -0.10606691986322403, "step": 2305 }, { "epoch": 3.199630314232902, "grad_norm": 3.0249485969543457, "learning_rate": 1.6545454545454545e-08, "log_odds_chosen": 1.5753225088119507, "log_odds_ratio": -0.32907435297966003, "logits/chosen": 1.9598543643951416, "logits/rejected": 2.043677568435669, "logps/chosen": -0.2798163592815399, "logps/rejected": -0.9463704228401184, "loss": 1.0423, "nll_loss": 1.0094271898269653, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027981635183095932, "rewards/margins": 0.06665540486574173, "rewards/rejected": -0.09463704377412796, "step": 2310 }, { "epoch": 3.2065619223659887, "grad_norm": 2.1163971424102783, "learning_rate": 1.5636363636363636e-08, "log_odds_chosen": 1.8971047401428223, "log_odds_ratio": -0.27131593227386475, "logits/chosen": 1.9652113914489746, "logits/rejected": 2.0411899089813232, "logps/chosen": -0.2609769105911255, "logps/rejected": -1.032325029373169, "loss": 0.9929, "nll_loss": 0.9658178687095642, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.02609768696129322, "rewards/margins": 0.07713483273983002, "rewards/rejected": -0.10323251038789749, "step": 2315 }, { "epoch": 3.213493530499076, "grad_norm": 1.9558539390563965, "learning_rate": 1.4727272727272726e-08, "log_odds_chosen": 1.7015489339828491, "log_odds_ratio": -0.3433685898780823, "logits/chosen": 2.122408866882324, "logits/rejected": 2.172548770904541, "logps/chosen": -0.3318374454975128, "logps/rejected": -1.1208266019821167, "loss": 1.0349, "nll_loss": 1.0005649328231812, "rewards/accuracies": 0.875, "rewards/chosen": -0.0331837423145771, "rewards/margins": 0.07889891415834427, "rewards/rejected": -0.11208265274763107, "step": 2320 }, { "epoch": 3.2204251386321627, "grad_norm": 1.2714937925338745, "learning_rate": 1.3818181818181817e-08, "log_odds_chosen": 2.1636459827423096, "log_odds_ratio": -0.23991011083126068, "logits/chosen": 2.0637595653533936, "logits/rejected": 2.1236019134521484, "logps/chosen": -0.2423422783613205, "logps/rejected": -1.284201741218567, "loss": 0.9579, "nll_loss": 0.9339547753334045, "rewards/accuracies": 0.9333333373069763, "rewards/chosen": -0.02423422783613205, "rewards/margins": 0.10418593138456345, "rewards/rejected": -0.1284201443195343, "step": 2325 }, { "epoch": 3.2273567467652495, "grad_norm": 3.6437788009643555, "learning_rate": 1.2909090909090908e-08, "log_odds_chosen": 2.0622992515563965, "log_odds_ratio": -0.25077375769615173, "logits/chosen": 2.067502737045288, "logits/rejected": 2.1184935569763184, "logps/chosen": -0.28136977553367615, "logps/rejected": -1.2336159944534302, "loss": 0.9928, "nll_loss": 0.9677172899246216, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.028136977925896645, "rewards/margins": 0.09522464126348495, "rewards/rejected": -0.12336160987615585, "step": 2330 }, { "epoch": 3.2342883548983363, "grad_norm": 2.5397536754608154, "learning_rate": 1.1999999999999998e-08, "log_odds_chosen": 1.913984775543213, "log_odds_ratio": -0.26355621218681335, "logits/chosen": 1.9935226440429688, "logits/rejected": 2.0521795749664307, "logps/chosen": -0.24003277719020844, "logps/rejected": -1.0872286558151245, "loss": 1.0179, "nll_loss": 0.9915151000022888, "rewards/accuracies": 0.8666666746139526, "rewards/chosen": -0.024003280326724052, "rewards/margins": 0.08471958339214325, "rewards/rejected": -0.10872285813093185, "step": 2335 }, { "epoch": 3.241219963031423, "grad_norm": 2.6664090156555176, "learning_rate": 1.109090909090909e-08, "log_odds_chosen": 1.7946439981460571, "log_odds_ratio": -0.3102231025695801, "logits/chosen": 2.0916616916656494, "logits/rejected": 2.1227028369903564, "logps/chosen": -0.33324259519577026, "logps/rejected": -1.194060206413269, "loss": 1.027, "nll_loss": 0.9959444403648376, "rewards/accuracies": 0.8833333253860474, "rewards/chosen": -0.033324260264635086, "rewards/margins": 0.08608177304267883, "rewards/rejected": -0.11940603703260422, "step": 2340 }, { "epoch": 3.2481515711645104, "grad_norm": 3.543788194656372, "learning_rate": 1.0181818181818181e-08, "log_odds_chosen": 1.9351673126220703, "log_odds_ratio": -0.28479108214378357, "logits/chosen": 2.082379102706909, "logits/rejected": 2.1343612670898438, "logps/chosen": -0.27910298109054565, "logps/rejected": -1.1840400695800781, "loss": 1.0176, "nll_loss": 0.9891124963760376, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.027910297736525536, "rewards/margins": 0.09049370884895325, "rewards/rejected": -0.11840400844812393, "step": 2345 }, { "epoch": 3.255083179297597, "grad_norm": 2.4029653072357178, "learning_rate": 9.272727272727272e-09, "log_odds_chosen": 1.745149850845337, "log_odds_ratio": -0.3268326222896576, "logits/chosen": 2.009989023208618, "logits/rejected": 2.0562989711761475, "logps/chosen": -0.2905314564704895, "logps/rejected": -1.0186735391616821, "loss": 1.0114, "nll_loss": 0.9786819815635681, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02905314415693283, "rewards/margins": 0.0728142112493515, "rewards/rejected": -0.10186735540628433, "step": 2350 }, { "epoch": 3.262014787430684, "grad_norm": 1.2835363149642944, "learning_rate": 8.363636363636362e-09, "log_odds_chosen": 2.011457920074463, "log_odds_ratio": -0.2857803404331207, "logits/chosen": 1.9969309568405151, "logits/rejected": 2.0629494190216064, "logps/chosen": -0.2886459529399872, "logps/rejected": -1.2359684705734253, "loss": 1.0145, "nll_loss": 0.9858835339546204, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.028864597901701927, "rewards/margins": 0.09473226219415665, "rewards/rejected": -0.12359685450792313, "step": 2355 }, { "epoch": 3.2689463955637708, "grad_norm": 2.477343797683716, "learning_rate": 7.454545454545453e-09, "log_odds_chosen": 1.7344855070114136, "log_odds_ratio": -0.3474830687046051, "logits/chosen": 1.9719436168670654, "logits/rejected": 2.057311773300171, "logps/chosen": -0.26447659730911255, "logps/rejected": -1.0018370151519775, "loss": 0.9815, "nll_loss": 0.9467440247535706, "rewards/accuracies": 0.8416666388511658, "rewards/chosen": -0.026447657495737076, "rewards/margins": 0.0737360343337059, "rewards/rejected": -0.10018369555473328, "step": 2360 }, { "epoch": 3.2758780036968576, "grad_norm": 2.9011664390563965, "learning_rate": 6.545454545454546e-09, "log_odds_chosen": 2.135871648788452, "log_odds_ratio": -0.23335178196430206, "logits/chosen": 2.0359859466552734, "logits/rejected": 2.091546058654785, "logps/chosen": -0.2491091936826706, "logps/rejected": -1.206799030303955, "loss": 1.0227, "nll_loss": 0.9993228912353516, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.02491091936826706, "rewards/margins": 0.0957689955830574, "rewards/rejected": -0.12067990005016327, "step": 2365 }, { "epoch": 3.2828096118299444, "grad_norm": 2.491895914077759, "learning_rate": 5.6363636363636365e-09, "log_odds_chosen": 1.9308069944381714, "log_odds_ratio": -0.29651203751564026, "logits/chosen": 1.9695427417755127, "logits/rejected": 2.052243709564209, "logps/chosen": -0.2871295213699341, "logps/rejected": -1.1503547430038452, "loss": 0.9909, "nll_loss": 0.9612923860549927, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.028712956234812737, "rewards/margins": 0.08632253110408783, "rewards/rejected": -0.11503548920154572, "step": 2370 }, { "epoch": 3.2897412199630316, "grad_norm": 2.254281520843506, "learning_rate": 4.727272727272727e-09, "log_odds_chosen": 1.8961893320083618, "log_odds_ratio": -0.2896248400211334, "logits/chosen": 2.035944938659668, "logits/rejected": 2.102773427963257, "logps/chosen": -0.30598270893096924, "logps/rejected": -1.146560549736023, "loss": 1.0012, "nll_loss": 0.9722784757614136, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.030598269775509834, "rewards/margins": 0.0840577781200409, "rewards/rejected": -0.11465605348348618, "step": 2375 }, { "epoch": 3.2966728280961184, "grad_norm": 4.881258487701416, "learning_rate": 3.8181818181818185e-09, "log_odds_chosen": 1.9275856018066406, "log_odds_ratio": -0.26452910900115967, "logits/chosen": 2.0649008750915527, "logits/rejected": 2.1088078022003174, "logps/chosen": -0.2894597351551056, "logps/rejected": -1.1926367282867432, "loss": 0.998, "nll_loss": 0.9715353846549988, "rewards/accuracies": 0.9333333373069763, "rewards/chosen": -0.028945976868271828, "rewards/margins": 0.09031769633293152, "rewards/rejected": -0.1192636638879776, "step": 2380 }, { "epoch": 3.303604436229205, "grad_norm": 2.7718567848205566, "learning_rate": 2.909090909090909e-09, "log_odds_chosen": 1.9183131456375122, "log_odds_ratio": -0.29583075642585754, "logits/chosen": 2.068955183029175, "logits/rejected": 2.141144275665283, "logps/chosen": -0.28529077768325806, "logps/rejected": -1.2924492359161377, "loss": 1.0428, "nll_loss": 1.013238787651062, "rewards/accuracies": 0.8916666507720947, "rewards/chosen": -0.028529079630970955, "rewards/margins": 0.10071584582328796, "rewards/rejected": -0.12924490869045258, "step": 2385 }, { "epoch": 3.310536044362292, "grad_norm": 2.2338600158691406, "learning_rate": 2e-09, "log_odds_chosen": 1.802534580230713, "log_odds_ratio": -0.27281951904296875, "logits/chosen": 1.9571995735168457, "logits/rejected": 2.0102591514587402, "logps/chosen": -0.27262741327285767, "logps/rejected": -1.066834568977356, "loss": 1.0008, "nll_loss": 0.9735398888587952, "rewards/accuracies": 0.9166666865348816, "rewards/chosen": -0.027262739837169647, "rewards/margins": 0.07942071557044983, "rewards/rejected": -0.10668346285820007, "step": 2390 }, { "epoch": 3.317467652495379, "grad_norm": 3.0731616020202637, "learning_rate": 1.090909090909091e-09, "log_odds_chosen": 2.0021066665649414, "log_odds_ratio": -0.28030332922935486, "logits/chosen": 1.9898285865783691, "logits/rejected": 2.062572717666626, "logps/chosen": -0.27583804726600647, "logps/rejected": -1.1610000133514404, "loss": 0.9737, "nll_loss": 0.9456390738487244, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.027583809569478035, "rewards/margins": 0.08851619809865952, "rewards/rejected": -0.1161000058054924, "step": 2395 }, { "epoch": 3.324399260628466, "grad_norm": 3.5117971897125244, "learning_rate": 1.8181818181818182e-10, "log_odds_chosen": 1.9471094608306885, "log_odds_ratio": -0.2912288308143616, "logits/chosen": 2.042628049850464, "logits/rejected": 2.106832265853882, "logps/chosen": -0.276217520236969, "logps/rejected": -1.118571162223816, "loss": 0.9807, "nll_loss": 0.9515801668167114, "rewards/accuracies": 0.875, "rewards/chosen": -0.02762174978852272, "rewards/margins": 0.08423535525798798, "rewards/rejected": -0.11185713112354279, "step": 2400 } ], "logging_steps": 5, "max_steps": 2400, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 400, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }