| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.324399260628466, |
| "eval_steps": 400.0, |
| "global_step": 2400, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0013863216266173752, |
| "grad_norm": 16.979148864746094, |
| "learning_rate": 0.0, |
| "log_odds_chosen": -0.018929382786154747, |
| "log_odds_ratio": -0.9500243067741394, |
| "logits/chosen": 1.954408049583435, |
| "logits/rejected": 2.0060200691223145, |
| "logps/chosen": -0.19068074226379395, |
| "logps/rejected": -0.3597089946269989, |
| "loss": 1.7279, |
| "nll_loss": 1.632872462272644, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.019068075343966484, |
| "rewards/margins": 0.016902821138501167, |
| "rewards/rejected": -0.03597090020775795, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.006931608133086876, |
| "grad_norm": 20.4935245513916, |
| "learning_rate": 8e-09, |
| "log_odds_chosen": -0.03683535382151604, |
| "log_odds_ratio": -0.919361412525177, |
| "logits/chosen": 1.4786995649337769, |
| "logits/rejected": 1.501151204109192, |
| "logps/chosen": -0.3465827703475952, |
| "logps/rejected": -0.4160928726196289, |
| "loss": 1.9313, |
| "nll_loss": 1.8393574953079224, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.0346582755446434, |
| "rewards/margins": 0.006951010320335627, |
| "rewards/rejected": -0.04160928353667259, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.013863216266173753, |
| "grad_norm": 21.816869735717773, |
| "learning_rate": 1.8e-08, |
| "log_odds_chosen": 0.004844508599489927, |
| "log_odds_ratio": -0.8253452181816101, |
| "logits/chosen": 1.6159125566482544, |
| "logits/rejected": 1.631380319595337, |
| "logps/chosen": -0.29385247826576233, |
| "logps/rejected": -0.3623644709587097, |
| "loss": 1.8841, |
| "nll_loss": 1.8015655279159546, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.029385250061750412, |
| "rewards/margins": 0.006851200480014086, |
| "rewards/rejected": -0.03623645007610321, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.020794824399260628, |
| "grad_norm": 23.262466430664062, |
| "learning_rate": 2.8000000000000003e-08, |
| "log_odds_chosen": 0.015412552282214165, |
| "log_odds_ratio": -0.8413525819778442, |
| "logits/chosen": 1.5471739768981934, |
| "logits/rejected": 1.581672191619873, |
| "logps/chosen": -0.29625552892684937, |
| "logps/rejected": -0.3712186813354492, |
| "loss": 1.8182, |
| "nll_loss": 1.7340798377990723, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.029625553637742996, |
| "rewards/margins": 0.00749631691724062, |
| "rewards/rejected": -0.03712187334895134, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.027726432532347505, |
| "grad_norm": 23.77545166015625, |
| "learning_rate": 3.7999999999999996e-08, |
| "log_odds_chosen": 0.13983069360256195, |
| "log_odds_ratio": -0.7581052184104919, |
| "logits/chosen": 1.5518994331359863, |
| "logits/rejected": 1.5775402784347534, |
| "logps/chosen": -0.3246292769908905, |
| "logps/rejected": -0.4345749020576477, |
| "loss": 1.9212, |
| "nll_loss": 1.8454102277755737, |
| "rewards/accuracies": 0.5666666626930237, |
| "rewards/chosen": -0.03246293216943741, |
| "rewards/margins": 0.010994565673172474, |
| "rewards/rejected": -0.04345749691128731, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.03465804066543438, |
| "grad_norm": 21.824874877929688, |
| "learning_rate": 4.799999999999999e-08, |
| "log_odds_chosen": -0.025701021775603294, |
| "log_odds_ratio": -0.8547807931900024, |
| "logits/chosen": 1.6149464845657349, |
| "logits/rejected": 1.6376367807388306, |
| "logps/chosen": -0.3293762803077698, |
| "logps/rejected": -0.3794548809528351, |
| "loss": 1.916, |
| "nll_loss": 1.8305460214614868, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.032937631011009216, |
| "rewards/margins": 0.005007854197174311, |
| "rewards/rejected": -0.03794548660516739, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.041589648798521256, |
| "grad_norm": 23.641414642333984, |
| "learning_rate": 5.7999999999999997e-08, |
| "log_odds_chosen": -0.01403873972594738, |
| "log_odds_ratio": -0.8547641634941101, |
| "logits/chosen": 1.6201530694961548, |
| "logits/rejected": 1.6380269527435303, |
| "logps/chosen": -0.3491577208042145, |
| "logps/rejected": -0.4028278887271881, |
| "loss": 1.8577, |
| "nll_loss": 1.7722562551498413, |
| "rewards/accuracies": 0.5166666507720947, |
| "rewards/chosen": -0.034915778785943985, |
| "rewards/margins": 0.00536701874807477, |
| "rewards/rejected": -0.04028278589248657, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04852125693160813, |
| "grad_norm": 21.654430389404297, |
| "learning_rate": 6.8e-08, |
| "log_odds_chosen": 0.17713108658790588, |
| "log_odds_ratio": -0.7579152584075928, |
| "logits/chosen": 1.5499821901321411, |
| "logits/rejected": 1.5707145929336548, |
| "logps/chosen": -0.31028756499290466, |
| "logps/rejected": -0.4093845784664154, |
| "loss": 1.844, |
| "nll_loss": 1.7682510614395142, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.031028758734464645, |
| "rewards/margins": 0.009909691289067268, |
| "rewards/rejected": -0.04093845188617706, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.05545286506469501, |
| "grad_norm": 30.187108993530273, |
| "learning_rate": 7.8e-08, |
| "log_odds_chosen": 0.18452158570289612, |
| "log_odds_ratio": -0.7830556035041809, |
| "logits/chosen": 1.6507318019866943, |
| "logits/rejected": 1.6740870475769043, |
| "logps/chosen": -0.28885844349861145, |
| "logps/rejected": -0.38478565216064453, |
| "loss": 1.8536, |
| "nll_loss": 1.775251865386963, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.028885845094919205, |
| "rewards/margins": 0.00959271751344204, |
| "rewards/rejected": -0.03847856447100639, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.062384473197781884, |
| "grad_norm": 20.389713287353516, |
| "learning_rate": 8.8e-08, |
| "log_odds_chosen": 0.02360691875219345, |
| "log_odds_ratio": -0.8329288959503174, |
| "logits/chosen": 1.6618582010269165, |
| "logits/rejected": 1.676173448562622, |
| "logps/chosen": -0.35506105422973633, |
| "logps/rejected": -0.4218937158584595, |
| "loss": 1.888, |
| "nll_loss": 1.8047412633895874, |
| "rewards/accuracies": 0.5333333611488342, |
| "rewards/chosen": -0.03550610691308975, |
| "rewards/margins": 0.006683265324681997, |
| "rewards/rejected": -0.04218936711549759, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.06931608133086876, |
| "grad_norm": 18.37879180908203, |
| "learning_rate": 9.799999999999999e-08, |
| "log_odds_chosen": 0.11584530770778656, |
| "log_odds_ratio": -0.768284022808075, |
| "logits/chosen": 1.658928394317627, |
| "logits/rejected": 1.6891582012176514, |
| "logps/chosen": -0.3134225904941559, |
| "logps/rejected": -0.41441354155540466, |
| "loss": 1.8648, |
| "nll_loss": 1.78793203830719, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.03134226053953171, |
| "rewards/margins": 0.01009910274296999, |
| "rewards/rejected": -0.04144136235117912, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07624768946395563, |
| "grad_norm": 19.63814926147461, |
| "learning_rate": 1.08e-07, |
| "log_odds_chosen": -0.07513849437236786, |
| "log_odds_ratio": -0.8687005639076233, |
| "logits/chosen": 1.5901387929916382, |
| "logits/rejected": 1.600988507270813, |
| "logps/chosen": -0.3237656056880951, |
| "logps/rejected": -0.36480894684791565, |
| "loss": 1.8659, |
| "nll_loss": 1.7789969444274902, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.03237656131386757, |
| "rewards/margins": 0.004104338586330414, |
| "rewards/rejected": -0.03648089990019798, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.08317929759704251, |
| "grad_norm": 21.044511795043945, |
| "learning_rate": 1.1799999999999998e-07, |
| "log_odds_chosen": 0.12105648219585419, |
| "log_odds_ratio": -0.8005534410476685, |
| "logits/chosen": 1.7121615409851074, |
| "logits/rejected": 1.7367610931396484, |
| "logps/chosen": -0.2864932119846344, |
| "logps/rejected": -0.41227853298187256, |
| "loss": 1.8738, |
| "nll_loss": 1.793702483177185, |
| "rewards/accuracies": 0.5666666626930237, |
| "rewards/chosen": -0.02864932268857956, |
| "rewards/margins": 0.012578531168401241, |
| "rewards/rejected": -0.04122785106301308, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.09011090573012939, |
| "grad_norm": 15.05855941772461, |
| "learning_rate": 1.28e-07, |
| "log_odds_chosen": -0.08899393677711487, |
| "log_odds_ratio": -0.9060453772544861, |
| "logits/chosen": 1.7786991596221924, |
| "logits/rejected": 1.7979838848114014, |
| "logps/chosen": -0.2860961854457855, |
| "logps/rejected": -0.35240638256073, |
| "loss": 1.8233, |
| "nll_loss": 1.7327053546905518, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.028609616681933403, |
| "rewards/margins": 0.006631023250520229, |
| "rewards/rejected": -0.03524063900113106, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.09704251386321626, |
| "grad_norm": 12.863988876342773, |
| "learning_rate": 1.38e-07, |
| "log_odds_chosen": 0.02776586450636387, |
| "log_odds_ratio": -0.8355816006660461, |
| "logits/chosen": 1.926443099975586, |
| "logits/rejected": 1.9493002891540527, |
| "logps/chosen": -0.3111321032047272, |
| "logps/rejected": -0.3676571547985077, |
| "loss": 1.7489, |
| "nll_loss": 1.6653305292129517, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.031113211065530777, |
| "rewards/margins": 0.005652503110468388, |
| "rewards/rejected": -0.03676571324467659, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.10397412199630314, |
| "grad_norm": 14.937926292419434, |
| "learning_rate": 1.48e-07, |
| "log_odds_chosen": 0.21290098130702972, |
| "log_odds_ratio": -0.7118617296218872, |
| "logits/chosen": 1.9580568075180054, |
| "logits/rejected": 1.9671556949615479, |
| "logps/chosen": -0.33254608511924744, |
| "logps/rejected": -0.42474591732025146, |
| "loss": 1.842, |
| "nll_loss": 1.770819902420044, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.033254608511924744, |
| "rewards/margins": 0.009219982661306858, |
| "rewards/rejected": -0.04247459024190903, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.11090573012939002, |
| "grad_norm": 12.081910133361816, |
| "learning_rate": 1.58e-07, |
| "log_odds_chosen": 0.2506192624568939, |
| "log_odds_ratio": -0.7591946721076965, |
| "logits/chosen": 1.9343218803405762, |
| "logits/rejected": 1.9571690559387207, |
| "logps/chosen": -0.28907135128974915, |
| "logps/rejected": -0.4210290014743805, |
| "loss": 1.8269, |
| "nll_loss": 1.7509891986846924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.028907136991620064, |
| "rewards/margins": 0.013195758685469627, |
| "rewards/rejected": -0.04210289567708969, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1178373382624769, |
| "grad_norm": 10.16718864440918, |
| "learning_rate": 1.68e-07, |
| "log_odds_chosen": 0.15295961499214172, |
| "log_odds_ratio": -0.7727136015892029, |
| "logits/chosen": 1.9498001337051392, |
| "logits/rejected": 1.9737645387649536, |
| "logps/chosen": -0.2715214490890503, |
| "logps/rejected": -0.39682307839393616, |
| "loss": 1.7426, |
| "nll_loss": 1.665367603302002, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.02715214155614376, |
| "rewards/margins": 0.012530164793133736, |
| "rewards/rejected": -0.039682310074567795, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.12476894639556377, |
| "grad_norm": 8.379185676574707, |
| "learning_rate": 1.78e-07, |
| "log_odds_chosen": 0.011365304701030254, |
| "log_odds_ratio": -0.8190609216690063, |
| "logits/chosen": 2.1162543296813965, |
| "logits/rejected": 2.1363251209259033, |
| "logps/chosen": -0.32597094774246216, |
| "logps/rejected": -0.37424901127815247, |
| "loss": 1.7812, |
| "nll_loss": 1.699331521987915, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.032597098499536514, |
| "rewards/margins": 0.004827807657420635, |
| "rewards/rejected": -0.03742489963769913, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.13170055452865065, |
| "grad_norm": 5.4643874168396, |
| "learning_rate": 1.88e-07, |
| "log_odds_chosen": 0.3046998977661133, |
| "log_odds_ratio": -0.710991621017456, |
| "logits/chosen": 2.2868356704711914, |
| "logits/rejected": 2.319998264312744, |
| "logps/chosen": -0.2605274021625519, |
| "logps/rejected": -0.41061633825302124, |
| "loss": 1.6722, |
| "nll_loss": 1.6011360883712769, |
| "rewards/accuracies": 0.6333333253860474, |
| "rewards/chosen": -0.026052741333842278, |
| "rewards/margins": 0.015008894726634026, |
| "rewards/rejected": -0.0410616360604763, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.13863216266173753, |
| "grad_norm": 5.312658309936523, |
| "learning_rate": 1.98e-07, |
| "log_odds_chosen": 0.08499274402856827, |
| "log_odds_ratio": -0.8072493672370911, |
| "logits/chosen": 2.332758665084839, |
| "logits/rejected": 2.354078769683838, |
| "logps/chosen": -0.29434487223625183, |
| "logps/rejected": -0.36028656363487244, |
| "loss": 1.6704, |
| "nll_loss": 1.5896700620651245, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.029434483498334885, |
| "rewards/margins": 0.006594173610210419, |
| "rewards/rejected": -0.0360286645591259, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1455637707948244, |
| "grad_norm": 4.78338098526001, |
| "learning_rate": 2.08e-07, |
| "log_odds_chosen": 0.13001103699207306, |
| "log_odds_ratio": -0.7748425602912903, |
| "logits/chosen": 2.3882839679718018, |
| "logits/rejected": 2.4127211570739746, |
| "logps/chosen": -0.2966289222240448, |
| "logps/rejected": -0.39911049604415894, |
| "loss": 1.6961, |
| "nll_loss": 1.6186034679412842, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.02966289594769478, |
| "rewards/margins": 0.01024815533310175, |
| "rewards/rejected": -0.039911042898893356, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.15249537892791126, |
| "grad_norm": 5.31309700012207, |
| "learning_rate": 2.18e-07, |
| "log_odds_chosen": 0.0922786071896553, |
| "log_odds_ratio": -0.8008362054824829, |
| "logits/chosen": 2.4012527465820312, |
| "logits/rejected": 2.405054807662964, |
| "logps/chosen": -0.31135261058807373, |
| "logps/rejected": -0.40367835760116577, |
| "loss": 1.6347, |
| "nll_loss": 1.5546340942382812, |
| "rewards/accuracies": 0.5666666626930237, |
| "rewards/chosen": -0.031135262921452522, |
| "rewards/margins": 0.009232571348547935, |
| "rewards/rejected": -0.04036783427000046, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.15942698706099814, |
| "grad_norm": 4.425213813781738, |
| "learning_rate": 2.2799999999999998e-07, |
| "log_odds_chosen": -0.02769007720053196, |
| "log_odds_ratio": -0.8615487813949585, |
| "logits/chosen": 2.4684412479400635, |
| "logits/rejected": 2.4840517044067383, |
| "logps/chosen": -0.28860214352607727, |
| "logps/rejected": -0.3511938452720642, |
| "loss": 1.5912, |
| "nll_loss": 1.5050197839736938, |
| "rewards/accuracies": 0.5166666507720947, |
| "rewards/chosen": -0.028860213235020638, |
| "rewards/margins": 0.006259171757847071, |
| "rewards/rejected": -0.03511938080191612, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.16635859519408502, |
| "grad_norm": 4.366436958312988, |
| "learning_rate": 2.38e-07, |
| "log_odds_chosen": 0.07487554848194122, |
| "log_odds_ratio": -0.7914910912513733, |
| "logits/chosen": 2.51110577583313, |
| "logits/rejected": 2.52194881439209, |
| "logps/chosen": -0.3162931501865387, |
| "logps/rejected": -0.3853374719619751, |
| "loss": 1.6602, |
| "nll_loss": 1.5810089111328125, |
| "rewards/accuracies": 0.5416666865348816, |
| "rewards/chosen": -0.03162931278347969, |
| "rewards/margins": 0.006904428359121084, |
| "rewards/rejected": -0.03853374347090721, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.1732902033271719, |
| "grad_norm": 3.4541594982147217, |
| "learning_rate": 2.48e-07, |
| "log_odds_chosen": 0.24983125925064087, |
| "log_odds_ratio": -0.7045524716377258, |
| "logits/chosen": 2.5535173416137695, |
| "logits/rejected": 2.5766799449920654, |
| "logps/chosen": -0.2637786567211151, |
| "logps/rejected": -0.3734634816646576, |
| "loss": 1.5197, |
| "nll_loss": 1.4492452144622803, |
| "rewards/accuracies": 0.6416666507720947, |
| "rewards/chosen": -0.026377864181995392, |
| "rewards/margins": 0.01096847839653492, |
| "rewards/rejected": -0.03734634816646576, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.18022181146025879, |
| "grad_norm": 3.3947913646698, |
| "learning_rate": 2.58e-07, |
| "log_odds_chosen": 0.13998886942863464, |
| "log_odds_ratio": -0.7501145005226135, |
| "logits/chosen": 2.617069959640503, |
| "logits/rejected": 2.6200926303863525, |
| "logps/chosen": -0.289898544549942, |
| "logps/rejected": -0.3845054805278778, |
| "loss": 1.5256, |
| "nll_loss": 1.45059072971344, |
| "rewards/accuracies": 0.5916666388511658, |
| "rewards/chosen": -0.02898985706269741, |
| "rewards/margins": 0.00946069322526455, |
| "rewards/rejected": -0.03845055028796196, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.18715341959334567, |
| "grad_norm": 2.9145822525024414, |
| "learning_rate": 2.68e-07, |
| "log_odds_chosen": 0.10625941306352615, |
| "log_odds_ratio": -0.7714784145355225, |
| "logits/chosen": 2.6704909801483154, |
| "logits/rejected": 2.686890125274658, |
| "logps/chosen": -0.2948876619338989, |
| "logps/rejected": -0.3641115128993988, |
| "loss": 1.4299, |
| "nll_loss": 1.3527849912643433, |
| "rewards/accuracies": 0.5333333611488342, |
| "rewards/chosen": -0.02948876842856407, |
| "rewards/margins": 0.006922383327037096, |
| "rewards/rejected": -0.03641115128993988, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.19408502772643252, |
| "grad_norm": 3.358665704727173, |
| "learning_rate": 2.78e-07, |
| "log_odds_chosen": 0.09461755305528641, |
| "log_odds_ratio": -0.735588014125824, |
| "logits/chosen": 2.7141809463500977, |
| "logits/rejected": 2.725619077682495, |
| "logps/chosen": -0.2867269814014435, |
| "logps/rejected": -0.3572947680950165, |
| "loss": 1.4621, |
| "nll_loss": 1.3885754346847534, |
| "rewards/accuracies": 0.5666666626930237, |
| "rewards/chosen": -0.028672698885202408, |
| "rewards/margins": 0.007056778762489557, |
| "rewards/rejected": -0.03572947904467583, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.2010166358595194, |
| "grad_norm": 3.4602420330047607, |
| "learning_rate": 2.88e-07, |
| "log_odds_chosen": 0.18481667339801788, |
| "log_odds_ratio": -0.7241543531417847, |
| "logits/chosen": 2.639542818069458, |
| "logits/rejected": 2.6469709873199463, |
| "logps/chosen": -0.2566547989845276, |
| "logps/rejected": -0.3493967652320862, |
| "loss": 1.4112, |
| "nll_loss": 1.3387987613677979, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.02566548064351082, |
| "rewards/margins": 0.009274197742342949, |
| "rewards/rejected": -0.03493968024849892, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.20794824399260628, |
| "grad_norm": 3.3385841846466064, |
| "learning_rate": 2.98e-07, |
| "log_odds_chosen": 0.14522768557071686, |
| "log_odds_ratio": -0.7345627546310425, |
| "logits/chosen": 2.535898447036743, |
| "logits/rejected": 2.555110216140747, |
| "logps/chosen": -0.3192240595817566, |
| "logps/rejected": -0.39829033613204956, |
| "loss": 1.3993, |
| "nll_loss": 1.3258506059646606, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.03192240744829178, |
| "rewards/margins": 0.007906629703938961, |
| "rewards/rejected": -0.039829038083553314, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.21487985212569316, |
| "grad_norm": 3.195482015609741, |
| "learning_rate": 3.08e-07, |
| "log_odds_chosen": 0.32608503103256226, |
| "log_odds_ratio": -0.6445600390434265, |
| "logits/chosen": 2.3535492420196533, |
| "logits/rejected": 2.367779493331909, |
| "logps/chosen": -0.2523113787174225, |
| "logps/rejected": -0.35818183422088623, |
| "loss": 1.3715, |
| "nll_loss": 1.3070040941238403, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.0252311360090971, |
| "rewards/margins": 0.010587050579488277, |
| "rewards/rejected": -0.0358181856572628, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.22181146025878004, |
| "grad_norm": 2.6090826988220215, |
| "learning_rate": 3.18e-07, |
| "log_odds_chosen": 0.38106462359428406, |
| "log_odds_ratio": -0.6169842481613159, |
| "logits/chosen": 2.301053047180176, |
| "logits/rejected": 2.3180480003356934, |
| "logps/chosen": -0.2931780219078064, |
| "logps/rejected": -0.4148869812488556, |
| "loss": 1.3188, |
| "nll_loss": 1.2571097612380981, |
| "rewards/accuracies": 0.6166666746139526, |
| "rewards/chosen": -0.02931780181825161, |
| "rewards/margins": 0.01217089593410492, |
| "rewards/rejected": -0.04148869961500168, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.22874306839186692, |
| "grad_norm": 2.2898175716400146, |
| "learning_rate": 3.2799999999999997e-07, |
| "log_odds_chosen": 0.24468782544136047, |
| "log_odds_ratio": -0.6965723037719727, |
| "logits/chosen": 2.494852304458618, |
| "logits/rejected": 2.505509376525879, |
| "logps/chosen": -0.28601107001304626, |
| "logps/rejected": -0.4080314338207245, |
| "loss": 1.2778, |
| "nll_loss": 1.2081010341644287, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.02860110253095627, |
| "rewards/margins": 0.012202044948935509, |
| "rewards/rejected": -0.040803149342536926, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.2356746765249538, |
| "grad_norm": 2.1119935512542725, |
| "learning_rate": 3.38e-07, |
| "log_odds_chosen": 0.30791595578193665, |
| "log_odds_ratio": -0.6434581279754639, |
| "logits/chosen": 2.4814131259918213, |
| "logits/rejected": 2.492072105407715, |
| "logps/chosen": -0.29261448979377747, |
| "logps/rejected": -0.4158262610435486, |
| "loss": 1.2693, |
| "nll_loss": 1.2049847841262817, |
| "rewards/accuracies": 0.5583333373069763, |
| "rewards/chosen": -0.029261449351906776, |
| "rewards/margins": 0.012321173213422298, |
| "rewards/rejected": -0.0415826216340065, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.24260628465804066, |
| "grad_norm": 2.052659511566162, |
| "learning_rate": 3.48e-07, |
| "log_odds_chosen": 0.2674095928668976, |
| "log_odds_ratio": -0.6604223847389221, |
| "logits/chosen": 2.45322847366333, |
| "logits/rejected": 2.4553847312927246, |
| "logps/chosen": -0.27184900641441345, |
| "logps/rejected": -0.37303584814071655, |
| "loss": 1.269, |
| "nll_loss": 1.202986240386963, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.027184901759028435, |
| "rewards/margins": 0.010118687525391579, |
| "rewards/rejected": -0.037303585559129715, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.24953789279112754, |
| "grad_norm": 1.9022108316421509, |
| "learning_rate": 3.58e-07, |
| "log_odds_chosen": 0.3532262146472931, |
| "log_odds_ratio": -0.6328123211860657, |
| "logits/chosen": 2.6101863384246826, |
| "logits/rejected": 2.6266489028930664, |
| "logps/chosen": -0.2756868898868561, |
| "logps/rejected": -0.3959445059299469, |
| "loss": 1.2196, |
| "nll_loss": 1.1562813520431519, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.027568688616156578, |
| "rewards/margins": 0.012025760486721992, |
| "rewards/rejected": -0.03959444910287857, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.25646950092421444, |
| "grad_norm": 2.0479915142059326, |
| "learning_rate": 3.68e-07, |
| "log_odds_chosen": 0.1621844470500946, |
| "log_odds_ratio": -0.7073792219161987, |
| "logits/chosen": 2.635596990585327, |
| "logits/rejected": 2.6427814960479736, |
| "logps/chosen": -0.28807908296585083, |
| "logps/rejected": -0.35789498686790466, |
| "loss": 1.1997, |
| "nll_loss": 1.1289515495300293, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.028807910159230232, |
| "rewards/margins": 0.0069815958850085735, |
| "rewards/rejected": -0.035789504647254944, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.2634011090573013, |
| "grad_norm": 1.7852911949157715, |
| "learning_rate": 3.7799999999999997e-07, |
| "log_odds_chosen": 0.22550253570079803, |
| "log_odds_ratio": -0.6922202706336975, |
| "logits/chosen": 2.6954963207244873, |
| "logits/rejected": 2.7063536643981934, |
| "logps/chosen": -0.29761365056037903, |
| "logps/rejected": -0.3754601776599884, |
| "loss": 1.2128, |
| "nll_loss": 1.143579125404358, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": -0.02976136840879917, |
| "rewards/margins": 0.007784651126712561, |
| "rewards/rejected": -0.03754602000117302, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.27033271719038815, |
| "grad_norm": 2.0064260959625244, |
| "learning_rate": 3.88e-07, |
| "log_odds_chosen": 0.30835989117622375, |
| "log_odds_ratio": -0.6616735458374023, |
| "logits/chosen": 2.747114896774292, |
| "logits/rejected": 2.756840229034424, |
| "logps/chosen": -0.2845754623413086, |
| "logps/rejected": -0.3947003185749054, |
| "loss": 1.182, |
| "nll_loss": 1.1158353090286255, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.02845754846930504, |
| "rewards/margins": 0.011012484319508076, |
| "rewards/rejected": -0.03947003558278084, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.27726432532347506, |
| "grad_norm": 1.6967897415161133, |
| "learning_rate": 3.98e-07, |
| "log_odds_chosen": 0.36090362071990967, |
| "log_odds_ratio": -0.6156808733940125, |
| "logits/chosen": 2.736931562423706, |
| "logits/rejected": 2.7524020671844482, |
| "logps/chosen": -0.2760154902935028, |
| "logps/rejected": -0.39298757910728455, |
| "loss": 1.2024, |
| "nll_loss": 1.1408705711364746, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.02760155126452446, |
| "rewards/margins": 0.011697209440171719, |
| "rewards/rejected": -0.03929876163601875, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.2841959334565619, |
| "grad_norm": 1.344346046447754, |
| "learning_rate": 3.9927272727272724e-07, |
| "log_odds_chosen": 0.44202545285224915, |
| "log_odds_ratio": -0.629891574382782, |
| "logits/chosen": 2.8161656856536865, |
| "logits/rejected": 2.827834129333496, |
| "logps/chosen": -0.27560955286026, |
| "logps/rejected": -0.43479490280151367, |
| "loss": 1.1696, |
| "nll_loss": 1.1066458225250244, |
| "rewards/accuracies": 0.5833333134651184, |
| "rewards/chosen": -0.02756096050143242, |
| "rewards/margins": 0.015918532386422157, |
| "rewards/rejected": -0.04347948729991913, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.2911275415896488, |
| "grad_norm": 1.632926344871521, |
| "learning_rate": 3.983636363636363e-07, |
| "log_odds_chosen": 0.27023443579673767, |
| "log_odds_ratio": -0.7081299424171448, |
| "logits/chosen": 2.6833786964416504, |
| "logits/rejected": 2.696648359298706, |
| "logps/chosen": -0.29506218433380127, |
| "logps/rejected": -0.4175509214401245, |
| "loss": 1.171, |
| "nll_loss": 1.100161075592041, |
| "rewards/accuracies": 0.5666666626930237, |
| "rewards/chosen": -0.029506217688322067, |
| "rewards/margins": 0.012248875573277473, |
| "rewards/rejected": -0.04175509512424469, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2980591497227357, |
| "grad_norm": 1.6668481826782227, |
| "learning_rate": 3.9745454545454543e-07, |
| "log_odds_chosen": 0.3347550332546234, |
| "log_odds_ratio": -0.6504988074302673, |
| "logits/chosen": 2.6714982986450195, |
| "logits/rejected": 2.6816256046295166, |
| "logps/chosen": -0.29570263624191284, |
| "logps/rejected": -0.42322224378585815, |
| "loss": 1.1627, |
| "nll_loss": 1.0976592302322388, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.029570268467068672, |
| "rewards/margins": 0.012751961126923561, |
| "rewards/rejected": -0.04232222959399223, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.3049907578558225, |
| "grad_norm": 1.663832664489746, |
| "learning_rate": 3.965454545454545e-07, |
| "log_odds_chosen": 0.329058974981308, |
| "log_odds_ratio": -0.6497308611869812, |
| "logits/chosen": 2.728097438812256, |
| "logits/rejected": 2.751708507537842, |
| "logps/chosen": -0.3120826184749603, |
| "logps/rejected": -0.44624626636505127, |
| "loss": 1.2113, |
| "nll_loss": 1.1463485956192017, |
| "rewards/accuracies": 0.5916666388511658, |
| "rewards/chosen": -0.031208263710141182, |
| "rewards/margins": 0.013416365720331669, |
| "rewards/rejected": -0.04462462291121483, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.31192236598890943, |
| "grad_norm": 1.2902253866195679, |
| "learning_rate": 3.9563636363636363e-07, |
| "log_odds_chosen": 0.3168531656265259, |
| "log_odds_ratio": -0.6508561372756958, |
| "logits/chosen": 2.767462968826294, |
| "logits/rejected": 2.776341438293457, |
| "logps/chosen": -0.2664511799812317, |
| "logps/rejected": -0.3841439485549927, |
| "loss": 1.1313, |
| "nll_loss": 1.0662100315093994, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": -0.02664511650800705, |
| "rewards/margins": 0.011769277974963188, |
| "rewards/rejected": -0.03841439634561539, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.3188539741219963, |
| "grad_norm": 1.4394150972366333, |
| "learning_rate": 3.947272727272727e-07, |
| "log_odds_chosen": 0.5200116038322449, |
| "log_odds_ratio": -0.5801463723182678, |
| "logits/chosen": 2.7293012142181396, |
| "logits/rejected": 2.7543258666992188, |
| "logps/chosen": -0.2802920639514923, |
| "logps/rejected": -0.4830513894557953, |
| "loss": 1.1608, |
| "nll_loss": 1.1027837991714478, |
| "rewards/accuracies": 0.6583333611488342, |
| "rewards/chosen": -0.02802920900285244, |
| "rewards/margins": 0.020275937393307686, |
| "rewards/rejected": -0.048305146396160126, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.3257855822550832, |
| "grad_norm": 1.5486044883728027, |
| "learning_rate": 3.9381818181818177e-07, |
| "log_odds_chosen": 0.3175574839115143, |
| "log_odds_ratio": -0.6549851894378662, |
| "logits/chosen": 2.717921495437622, |
| "logits/rejected": 2.7285637855529785, |
| "logps/chosen": -0.30369266867637634, |
| "logps/rejected": -0.4324611723423004, |
| "loss": 1.1833, |
| "nll_loss": 1.117845892906189, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.030369265004992485, |
| "rewards/margins": 0.012876848690211773, |
| "rewards/rejected": -0.043246109038591385, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.33271719038817005, |
| "grad_norm": 1.4637186527252197, |
| "learning_rate": 3.929090909090909e-07, |
| "log_odds_chosen": 0.677905261516571, |
| "log_odds_ratio": -0.5080384612083435, |
| "logits/chosen": 2.6565752029418945, |
| "logits/rejected": 2.670898199081421, |
| "logps/chosen": -0.24577026069164276, |
| "logps/rejected": -0.48834845423698425, |
| "loss": 1.1682, |
| "nll_loss": 1.1173516511917114, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.024577027186751366, |
| "rewards/margins": 0.02425781637430191, |
| "rewards/rejected": -0.04883484169840813, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.33964879852125696, |
| "grad_norm": 1.5058139562606812, |
| "learning_rate": 3.9199999999999996e-07, |
| "log_odds_chosen": 0.4405784010887146, |
| "log_odds_ratio": -0.6027734875679016, |
| "logits/chosen": 2.6786551475524902, |
| "logits/rejected": 2.6928422451019287, |
| "logps/chosen": -0.28616759181022644, |
| "logps/rejected": -0.4547707438468933, |
| "loss": 1.1336, |
| "nll_loss": 1.0733466148376465, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.028616759926080704, |
| "rewards/margins": 0.016860313713550568, |
| "rewards/rejected": -0.04547707736492157, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.3465804066543438, |
| "grad_norm": 1.8535481691360474, |
| "learning_rate": 3.910909090909091e-07, |
| "log_odds_chosen": 0.42455023527145386, |
| "log_odds_ratio": -0.6171411275863647, |
| "logits/chosen": 2.7245190143585205, |
| "logits/rejected": 2.732139825820923, |
| "logps/chosen": -0.30346882343292236, |
| "logps/rejected": -0.46232348680496216, |
| "loss": 1.1587, |
| "nll_loss": 1.0969831943511963, |
| "rewards/accuracies": 0.6333333253860474, |
| "rewards/chosen": -0.030346881598234177, |
| "rewards/margins": 0.015885472297668457, |
| "rewards/rejected": -0.046232353895902634, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.35351201478743066, |
| "grad_norm": 1.3919163942337036, |
| "learning_rate": 3.9018181818181816e-07, |
| "log_odds_chosen": 0.5069887042045593, |
| "log_odds_ratio": -0.5706583857536316, |
| "logits/chosen": 2.649379014968872, |
| "logits/rejected": 2.6625030040740967, |
| "logps/chosen": -0.25029629468917847, |
| "logps/rejected": -0.43570002913475037, |
| "loss": 1.1158, |
| "nll_loss": 1.0587836503982544, |
| "rewards/accuracies": 0.6916666626930237, |
| "rewards/chosen": -0.025029627606272697, |
| "rewards/margins": 0.01854037493467331, |
| "rewards/rejected": -0.04357000067830086, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.36044362292051757, |
| "grad_norm": 1.3738930225372314, |
| "learning_rate": 3.8927272727272723e-07, |
| "log_odds_chosen": 0.3587478697299957, |
| "log_odds_ratio": -0.6369132995605469, |
| "logits/chosen": 2.712921619415283, |
| "logits/rejected": 2.728205442428589, |
| "logps/chosen": -0.2719246745109558, |
| "logps/rejected": -0.3995325267314911, |
| "loss": 1.1394, |
| "nll_loss": 1.075750470161438, |
| "rewards/accuracies": 0.6166666746139526, |
| "rewards/chosen": -0.02719247154891491, |
| "rewards/margins": 0.012760787270963192, |
| "rewards/rejected": -0.03995325788855553, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3673752310536044, |
| "grad_norm": 1.7332333326339722, |
| "learning_rate": 3.8836363636363635e-07, |
| "log_odds_chosen": 0.5665148496627808, |
| "log_odds_ratio": -0.5796270966529846, |
| "logits/chosen": 2.66032338142395, |
| "logits/rejected": 2.6766788959503174, |
| "logps/chosen": -0.2771783769130707, |
| "logps/rejected": -0.4812432527542114, |
| "loss": 1.1499, |
| "nll_loss": 1.0919440984725952, |
| "rewards/accuracies": 0.6833333373069763, |
| "rewards/chosen": -0.027717838063836098, |
| "rewards/margins": 0.020406486466526985, |
| "rewards/rejected": -0.04812432825565338, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.37430683918669133, |
| "grad_norm": 1.733955979347229, |
| "learning_rate": 3.874545454545454e-07, |
| "log_odds_chosen": 0.533964991569519, |
| "log_odds_ratio": -0.5693932175636292, |
| "logits/chosen": 2.676833391189575, |
| "logits/rejected": 2.7048349380493164, |
| "logps/chosen": -0.2923631966114044, |
| "logps/rejected": -0.4988028109073639, |
| "loss": 1.1422, |
| "nll_loss": 1.0852835178375244, |
| "rewards/accuracies": 0.6583333611488342, |
| "rewards/chosen": -0.02923632226884365, |
| "rewards/margins": 0.020643968135118484, |
| "rewards/rejected": -0.049880288541316986, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.3812384473197782, |
| "grad_norm": 1.678440809249878, |
| "learning_rate": 3.865454545454545e-07, |
| "log_odds_chosen": 0.48775380849838257, |
| "log_odds_ratio": -0.5923742651939392, |
| "logits/chosen": 2.589829444885254, |
| "logits/rejected": 2.6072795391082764, |
| "logps/chosen": -0.31491658091545105, |
| "logps/rejected": -0.47682541608810425, |
| "loss": 1.1148, |
| "nll_loss": 1.0555262565612793, |
| "rewards/accuracies": 0.6583333611488342, |
| "rewards/chosen": -0.031491655856370926, |
| "rewards/margins": 0.01619088277220726, |
| "rewards/rejected": -0.047682538628578186, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.38817005545286504, |
| "grad_norm": 1.911895751953125, |
| "learning_rate": 3.856363636363636e-07, |
| "log_odds_chosen": 0.3376753032207489, |
| "log_odds_ratio": -0.6669396162033081, |
| "logits/chosen": 2.6268460750579834, |
| "logits/rejected": 2.6471641063690186, |
| "logps/chosen": -0.29394927620887756, |
| "logps/rejected": -0.4321646988391876, |
| "loss": 1.1103, |
| "nll_loss": 1.0436404943466187, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.029394926503300667, |
| "rewards/margins": 0.01382154505699873, |
| "rewards/rejected": -0.04321647435426712, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.39510166358595195, |
| "grad_norm": 1.6275674104690552, |
| "learning_rate": 3.847272727272727e-07, |
| "log_odds_chosen": 0.4959801137447357, |
| "log_odds_ratio": -0.6270378828048706, |
| "logits/chosen": 2.666292667388916, |
| "logits/rejected": 2.6843721866607666, |
| "logps/chosen": -0.3145168423652649, |
| "logps/rejected": -0.48912960290908813, |
| "loss": 1.1656, |
| "nll_loss": 1.1028538942337036, |
| "rewards/accuracies": 0.6416666507720947, |
| "rewards/chosen": -0.03145168721675873, |
| "rewards/margins": 0.017461273819208145, |
| "rewards/rejected": -0.048912957310676575, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.4020332717190388, |
| "grad_norm": 1.6614007949829102, |
| "learning_rate": 3.838181818181818e-07, |
| "log_odds_chosen": 0.3132282793521881, |
| "log_odds_ratio": -0.6719579100608826, |
| "logits/chosen": 2.644859790802002, |
| "logits/rejected": 2.6587765216827393, |
| "logps/chosen": -0.30401140451431274, |
| "logps/rejected": -0.4220035970211029, |
| "loss": 1.1595, |
| "nll_loss": 1.0922586917877197, |
| "rewards/accuracies": 0.5916666388511658, |
| "rewards/chosen": -0.030401142314076424, |
| "rewards/margins": 0.011799216270446777, |
| "rewards/rejected": -0.04220036417245865, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.4089648798521257, |
| "grad_norm": 5.234076499938965, |
| "learning_rate": 3.829090909090909e-07, |
| "log_odds_chosen": 0.4143497347831726, |
| "log_odds_ratio": -0.6194970011711121, |
| "logits/chosen": 2.693376064300537, |
| "logits/rejected": 2.7130610942840576, |
| "logps/chosen": -0.300813227891922, |
| "logps/rejected": -0.45194199681282043, |
| "loss": 1.1107, |
| "nll_loss": 1.0487507581710815, |
| "rewards/accuracies": 0.6666666865348816, |
| "rewards/chosen": -0.030081328004598618, |
| "rewards/margins": 0.01511287409812212, |
| "rewards/rejected": -0.04519420489668846, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.41589648798521256, |
| "grad_norm": 1.2319281101226807, |
| "learning_rate": 3.8199999999999995e-07, |
| "log_odds_chosen": 0.6317125558853149, |
| "log_odds_ratio": -0.5252640247344971, |
| "logits/chosen": 2.6187520027160645, |
| "logits/rejected": 2.6462595462799072, |
| "logps/chosen": -0.27775922417640686, |
| "logps/rejected": -0.5154780745506287, |
| "loss": 1.1218, |
| "nll_loss": 1.069314956665039, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.027775920927524567, |
| "rewards/margins": 0.023771891370415688, |
| "rewards/rejected": -0.0515478178858757, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.42282809611829947, |
| "grad_norm": 1.404926061630249, |
| "learning_rate": 3.810909090909091e-07, |
| "log_odds_chosen": 0.45306530594825745, |
| "log_odds_ratio": -0.5977523326873779, |
| "logits/chosen": 2.6711065769195557, |
| "logits/rejected": 2.6791975498199463, |
| "logps/chosen": -0.2907872498035431, |
| "logps/rejected": -0.4649318754673004, |
| "loss": 1.0976, |
| "nll_loss": 1.037819266319275, |
| "rewards/accuracies": 0.6416666507720947, |
| "rewards/chosen": -0.029078727588057518, |
| "rewards/margins": 0.017414459958672523, |
| "rewards/rejected": -0.04649318382143974, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.4297597042513863, |
| "grad_norm": 1.635770320892334, |
| "learning_rate": 3.8018181818181815e-07, |
| "log_odds_chosen": 0.42734283208847046, |
| "log_odds_ratio": -0.637465238571167, |
| "logits/chosen": 2.603739023208618, |
| "logits/rejected": 2.632021903991699, |
| "logps/chosen": -0.2919798493385315, |
| "logps/rejected": -0.480343759059906, |
| "loss": 1.1467, |
| "nll_loss": 1.0829123258590698, |
| "rewards/accuracies": 0.6583333611488342, |
| "rewards/chosen": -0.029197994619607925, |
| "rewards/margins": 0.01883639022707939, |
| "rewards/rejected": -0.04803437739610672, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.4366913123844732, |
| "grad_norm": 1.6268060207366943, |
| "learning_rate": 3.7927272727272727e-07, |
| "log_odds_chosen": 0.5012027621269226, |
| "log_odds_ratio": -0.5923992991447449, |
| "logits/chosen": 2.5646347999572754, |
| "logits/rejected": 2.583251953125, |
| "logps/chosen": -0.28540945053100586, |
| "logps/rejected": -0.48576289415359497, |
| "loss": 1.1493, |
| "nll_loss": 1.0901035070419312, |
| "rewards/accuracies": 0.6833333373069763, |
| "rewards/chosen": -0.028540942817926407, |
| "rewards/margins": 0.02003534696996212, |
| "rewards/rejected": -0.048576291650533676, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.4436229205175601, |
| "grad_norm": 1.6135777235031128, |
| "learning_rate": 3.7836363636363634e-07, |
| "log_odds_chosen": 0.47709882259368896, |
| "log_odds_ratio": -0.6118133664131165, |
| "logits/chosen": 2.651594400405884, |
| "logits/rejected": 2.663482666015625, |
| "logps/chosen": -0.30147698521614075, |
| "logps/rejected": -0.5001630187034607, |
| "loss": 1.1258, |
| "nll_loss": 1.0645849704742432, |
| "rewards/accuracies": 0.6416666507720947, |
| "rewards/chosen": -0.030147703364491463, |
| "rewards/margins": 0.019868608564138412, |
| "rewards/rejected": -0.05001631751656532, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.45055452865064693, |
| "grad_norm": 1.5299911499023438, |
| "learning_rate": 3.774545454545454e-07, |
| "log_odds_chosen": 0.4915499687194824, |
| "log_odds_ratio": -0.6021497845649719, |
| "logits/chosen": 2.590543031692505, |
| "logits/rejected": 2.616163730621338, |
| "logps/chosen": -0.2796178162097931, |
| "logps/rejected": -0.4787500500679016, |
| "loss": 1.1219, |
| "nll_loss": 1.0617003440856934, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.02796177938580513, |
| "rewards/margins": 0.01991322636604309, |
| "rewards/rejected": -0.04787500575184822, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.45748613678373384, |
| "grad_norm": 1.5430803298950195, |
| "learning_rate": 3.7654545454545454e-07, |
| "log_odds_chosen": 0.5409862399101257, |
| "log_odds_ratio": -0.5837644934654236, |
| "logits/chosen": 2.6871683597564697, |
| "logits/rejected": 2.697368860244751, |
| "logps/chosen": -0.322612464427948, |
| "logps/rejected": -0.5166773200035095, |
| "loss": 1.068, |
| "nll_loss": 1.0095747709274292, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.03226124867796898, |
| "rewards/margins": 0.019406486302614212, |
| "rewards/rejected": -0.05166773125529289, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.4644177449168207, |
| "grad_norm": 1.520017147064209, |
| "learning_rate": 3.756363636363636e-07, |
| "log_odds_chosen": 0.7835187911987305, |
| "log_odds_ratio": -0.49368441104888916, |
| "logits/chosen": 2.681840419769287, |
| "logits/rejected": 2.7117552757263184, |
| "logps/chosen": -0.26524561643600464, |
| "logps/rejected": -0.5571123361587524, |
| "loss": 1.0703, |
| "nll_loss": 1.0209718942642212, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.026524560526013374, |
| "rewards/margins": 0.029186667874455452, |
| "rewards/rejected": -0.055711228400468826, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.4713493530499076, |
| "grad_norm": 6.2188262939453125, |
| "learning_rate": 3.747272727272727e-07, |
| "log_odds_chosen": 0.7931634187698364, |
| "log_odds_ratio": -0.48867708444595337, |
| "logits/chosen": 2.574190378189087, |
| "logits/rejected": 2.5961813926696777, |
| "logps/chosen": -0.2862854599952698, |
| "logps/rejected": -0.5607175230979919, |
| "loss": 1.0977, |
| "nll_loss": 1.0488049983978271, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.028628544881939888, |
| "rewards/margins": 0.02744320221245289, |
| "rewards/rejected": -0.05607175827026367, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.47828096118299446, |
| "grad_norm": 1.3965646028518677, |
| "learning_rate": 3.738181818181818e-07, |
| "log_odds_chosen": 0.6148959994316101, |
| "log_odds_ratio": -0.5497661828994751, |
| "logits/chosen": 2.556060791015625, |
| "logits/rejected": 2.5766706466674805, |
| "logps/chosen": -0.2946844696998596, |
| "logps/rejected": -0.5049049258232117, |
| "loss": 1.1347, |
| "nll_loss": 1.0796822309494019, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.02946844883263111, |
| "rewards/margins": 0.021022040396928787, |
| "rewards/rejected": -0.05049047991633415, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.4852125693160813, |
| "grad_norm": 1.5803139209747314, |
| "learning_rate": 3.7290909090909087e-07, |
| "log_odds_chosen": 0.688506543636322, |
| "log_odds_ratio": -0.5257928371429443, |
| "logits/chosen": 2.6283648014068604, |
| "logits/rejected": 2.667273759841919, |
| "logps/chosen": -0.23559394478797913, |
| "logps/rejected": -0.4642951190471649, |
| "loss": 1.0637, |
| "nll_loss": 1.0110965967178345, |
| "rewards/accuracies": 0.7166666388511658, |
| "rewards/chosen": -0.023559393361210823, |
| "rewards/margins": 0.022870119661092758, |
| "rewards/rejected": -0.04642951115965843, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4921441774491682, |
| "grad_norm": 1.4043947458267212, |
| "learning_rate": 3.72e-07, |
| "log_odds_chosen": 0.7894371151924133, |
| "log_odds_ratio": -0.5066149830818176, |
| "logits/chosen": 2.5662755966186523, |
| "logits/rejected": 2.597470998764038, |
| "logps/chosen": -0.23841489851474762, |
| "logps/rejected": -0.49710512161254883, |
| "loss": 1.1026, |
| "nll_loss": 1.0519250631332397, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.023841489106416702, |
| "rewards/margins": 0.025869019329547882, |
| "rewards/rejected": -0.049710508435964584, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.49907578558225507, |
| "grad_norm": 1.5117144584655762, |
| "learning_rate": 3.7109090909090907e-07, |
| "log_odds_chosen": 0.5975762009620667, |
| "log_odds_ratio": -0.5889599919319153, |
| "logits/chosen": 2.448983669281006, |
| "logits/rejected": 2.486905813217163, |
| "logps/chosen": -0.2695424258708954, |
| "logps/rejected": -0.47119179368019104, |
| "loss": 1.107, |
| "nll_loss": 1.0481308698654175, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.026954246684908867, |
| "rewards/margins": 0.02016492560505867, |
| "rewards/rejected": -0.047119174152612686, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.506007393715342, |
| "grad_norm": 1.5192545652389526, |
| "learning_rate": 3.7018181818181814e-07, |
| "log_odds_chosen": 0.6952040791511536, |
| "log_odds_ratio": -0.5299401879310608, |
| "logits/chosen": 2.472970485687256, |
| "logits/rejected": 2.5098752975463867, |
| "logps/chosen": -0.3138105571269989, |
| "logps/rejected": -0.5887749195098877, |
| "loss": 1.1069, |
| "nll_loss": 1.0538718700408936, |
| "rewards/accuracies": 0.6916666626930237, |
| "rewards/chosen": -0.03138105198740959, |
| "rewards/margins": 0.02749643847346306, |
| "rewards/rejected": -0.05887749046087265, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.5129390018484289, |
| "grad_norm": 1.2908129692077637, |
| "learning_rate": 3.6927272727272726e-07, |
| "log_odds_chosen": 0.6446402072906494, |
| "log_odds_ratio": -0.5391488671302795, |
| "logits/chosen": 2.492727756500244, |
| "logits/rejected": 2.5441088676452637, |
| "logps/chosen": -0.32913315296173096, |
| "logps/rejected": -0.5775908827781677, |
| "loss": 1.1262, |
| "nll_loss": 1.072272539138794, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.032913316041231155, |
| "rewards/margins": 0.024845769628882408, |
| "rewards/rejected": -0.05775909125804901, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.5198706099815157, |
| "grad_norm": 1.5955073833465576, |
| "learning_rate": 3.6836363636363633e-07, |
| "log_odds_chosen": 0.4990961253643036, |
| "log_odds_ratio": -0.5916425585746765, |
| "logits/chosen": 2.449550151824951, |
| "logits/rejected": 2.475804567337036, |
| "logps/chosen": -0.29480889439582825, |
| "logps/rejected": -0.4691779315471649, |
| "loss": 1.0926, |
| "nll_loss": 1.0334601402282715, |
| "rewards/accuracies": 0.6583333611488342, |
| "rewards/chosen": -0.029480891302227974, |
| "rewards/margins": 0.017436908558011055, |
| "rewards/rejected": -0.04691779240965843, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.5268022181146026, |
| "grad_norm": 1.6701200008392334, |
| "learning_rate": 3.674545454545454e-07, |
| "log_odds_chosen": 0.4017346203327179, |
| "log_odds_ratio": -0.6414787173271179, |
| "logits/chosen": 2.485521078109741, |
| "logits/rejected": 2.5085794925689697, |
| "logps/chosen": -0.3057960867881775, |
| "logps/rejected": -0.4507531225681305, |
| "loss": 1.0797, |
| "nll_loss": 1.015582799911499, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.030579613521695137, |
| "rewards/margins": 0.01449570246040821, |
| "rewards/rejected": -0.04507531598210335, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.5337338262476895, |
| "grad_norm": 1.4670026302337646, |
| "learning_rate": 3.665454545454545e-07, |
| "log_odds_chosen": 0.4697194993495941, |
| "log_odds_ratio": -0.6057302355766296, |
| "logits/chosen": 2.58746337890625, |
| "logits/rejected": 2.6025400161743164, |
| "logps/chosen": -0.33911365270614624, |
| "logps/rejected": -0.540283739566803, |
| "loss": 1.1428, |
| "nll_loss": 1.0822194814682007, |
| "rewards/accuracies": 0.6083333492279053, |
| "rewards/chosen": -0.033911366015672684, |
| "rewards/margins": 0.020117007195949554, |
| "rewards/rejected": -0.054028380662202835, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.5406654343807763, |
| "grad_norm": 1.425374984741211, |
| "learning_rate": 3.656363636363636e-07, |
| "log_odds_chosen": 0.8662251234054565, |
| "log_odds_ratio": -0.4929133951663971, |
| "logits/chosen": 2.4548234939575195, |
| "logits/rejected": 2.4894497394561768, |
| "logps/chosen": -0.29042714834213257, |
| "logps/rejected": -0.6492635011672974, |
| "loss": 1.0862, |
| "nll_loss": 1.0368915796279907, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.029042713344097137, |
| "rewards/margins": 0.035883646458387375, |
| "rewards/rejected": -0.06492635607719421, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.5475970425138632, |
| "grad_norm": 1.9283677339553833, |
| "learning_rate": 3.647272727272727e-07, |
| "log_odds_chosen": 0.6832193732261658, |
| "log_odds_ratio": -0.5179533362388611, |
| "logits/chosen": 2.4680521488189697, |
| "logits/rejected": 2.5091042518615723, |
| "logps/chosen": -0.3011523485183716, |
| "logps/rejected": -0.5473502278327942, |
| "loss": 1.1184, |
| "nll_loss": 1.0665814876556396, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.030115237459540367, |
| "rewards/margins": 0.02461978793144226, |
| "rewards/rejected": -0.05473501980304718, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.5545286506469501, |
| "grad_norm": 1.5800950527191162, |
| "learning_rate": 3.638181818181818e-07, |
| "log_odds_chosen": 0.9603479504585266, |
| "log_odds_ratio": -0.4618772566318512, |
| "logits/chosen": 2.5418217182159424, |
| "logits/rejected": 2.5758605003356934, |
| "logps/chosen": -0.3006496727466583, |
| "logps/rejected": -0.680100679397583, |
| "loss": 1.1202, |
| "nll_loss": 1.073991060256958, |
| "rewards/accuracies": 0.7333333492279053, |
| "rewards/chosen": -0.03006497025489807, |
| "rewards/margins": 0.03794510290026665, |
| "rewards/rejected": -0.06801006942987442, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5614602587800369, |
| "grad_norm": 1.8024792671203613, |
| "learning_rate": 3.6290909090909086e-07, |
| "log_odds_chosen": 0.5875980854034424, |
| "log_odds_ratio": -0.5984980463981628, |
| "logits/chosen": 2.422645092010498, |
| "logits/rejected": 2.4577627182006836, |
| "logps/chosen": -0.26548755168914795, |
| "logps/rejected": -0.4987878203392029, |
| "loss": 1.07, |
| "nll_loss": 1.010174036026001, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.026548750698566437, |
| "rewards/margins": 0.023330029100179672, |
| "rewards/rejected": -0.04987877607345581, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.5683918669131238, |
| "grad_norm": 1.3316905498504639, |
| "learning_rate": 3.62e-07, |
| "log_odds_chosen": 0.9628907442092896, |
| "log_odds_ratio": -0.45750167965888977, |
| "logits/chosen": 2.5198185443878174, |
| "logits/rejected": 2.5674140453338623, |
| "logps/chosen": -0.2701815366744995, |
| "logps/rejected": -0.6767290234565735, |
| "loss": 1.1047, |
| "nll_loss": 1.0589832067489624, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.02701815403997898, |
| "rewards/margins": 0.0406547375023365, |
| "rewards/rejected": -0.06767289340496063, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5753234750462107, |
| "grad_norm": 3.3782176971435547, |
| "learning_rate": 3.6109090909090906e-07, |
| "log_odds_chosen": 0.8159240484237671, |
| "log_odds_ratio": -0.502350389957428, |
| "logits/chosen": 2.4107635021209717, |
| "logits/rejected": 2.462902545928955, |
| "logps/chosen": -0.2618210017681122, |
| "logps/rejected": -0.5774862766265869, |
| "loss": 1.0908, |
| "nll_loss": 1.040544033050537, |
| "rewards/accuracies": 0.7166666388511658, |
| "rewards/chosen": -0.026182103902101517, |
| "rewards/margins": 0.031566519290208817, |
| "rewards/rejected": -0.05774862319231033, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.5822550831792976, |
| "grad_norm": 1.423762559890747, |
| "learning_rate": 3.601818181818182e-07, |
| "log_odds_chosen": 0.712608277797699, |
| "log_odds_ratio": -0.5430810451507568, |
| "logits/chosen": 2.4404184818267822, |
| "logits/rejected": 2.460728168487549, |
| "logps/chosen": -0.3297029137611389, |
| "logps/rejected": -0.6201837062835693, |
| "loss": 1.1066, |
| "nll_loss": 1.0522674322128296, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.03297029063105583, |
| "rewards/margins": 0.02904808521270752, |
| "rewards/rejected": -0.06201838329434395, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5891866913123844, |
| "grad_norm": 1.5393766164779663, |
| "learning_rate": 3.5927272727272725e-07, |
| "log_odds_chosen": 0.7168568968772888, |
| "log_odds_ratio": -0.530264139175415, |
| "logits/chosen": 2.4670755863189697, |
| "logits/rejected": 2.4867959022521973, |
| "logps/chosen": -0.30588680505752563, |
| "logps/rejected": -0.582858145236969, |
| "loss": 1.1199, |
| "nll_loss": 1.0668836832046509, |
| "rewards/accuracies": 0.7166666388511658, |
| "rewards/chosen": -0.030588679015636444, |
| "rewards/margins": 0.027697138488292694, |
| "rewards/rejected": -0.05828581750392914, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.5961182994454713, |
| "grad_norm": 1.5168192386627197, |
| "learning_rate": 3.583636363636363e-07, |
| "log_odds_chosen": 0.6971157789230347, |
| "log_odds_ratio": -0.5508431792259216, |
| "logits/chosen": 2.4846901893615723, |
| "logits/rejected": 2.517913579940796, |
| "logps/chosen": -0.29162880778312683, |
| "logps/rejected": -0.5490512251853943, |
| "loss": 1.0672, |
| "nll_loss": 1.0120903253555298, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.029162878170609474, |
| "rewards/margins": 0.025742238387465477, |
| "rewards/rejected": -0.05490512028336525, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.6030499075785583, |
| "grad_norm": 1.951669454574585, |
| "learning_rate": 3.5745454545454545e-07, |
| "log_odds_chosen": 0.6440210342407227, |
| "log_odds_ratio": -0.5461083054542542, |
| "logits/chosen": 2.4179627895355225, |
| "logits/rejected": 2.4611566066741943, |
| "logps/chosen": -0.27406224608421326, |
| "logps/rejected": -0.528464138507843, |
| "loss": 1.1128, |
| "nll_loss": 1.0581576824188232, |
| "rewards/accuracies": 0.6916666626930237, |
| "rewards/chosen": -0.027406223118305206, |
| "rewards/margins": 0.025440199300646782, |
| "rewards/rejected": -0.05284642428159714, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.609981515711645, |
| "grad_norm": 1.452818512916565, |
| "learning_rate": 3.565454545454545e-07, |
| "log_odds_chosen": 0.6425326466560364, |
| "log_odds_ratio": -0.5652487874031067, |
| "logits/chosen": 2.4771902561187744, |
| "logits/rejected": 2.5125765800476074, |
| "logps/chosen": -0.30930647253990173, |
| "logps/rejected": -0.534605085849762, |
| "loss": 1.1152, |
| "nll_loss": 1.058679461479187, |
| "rewards/accuracies": 0.7333333492279053, |
| "rewards/chosen": -0.030930647626519203, |
| "rewards/margins": 0.022529857233166695, |
| "rewards/rejected": -0.0534605048596859, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.616913123844732, |
| "grad_norm": 2.1722567081451416, |
| "learning_rate": 3.556363636363636e-07, |
| "log_odds_chosen": 0.5683826208114624, |
| "log_odds_ratio": -0.5821471810340881, |
| "logits/chosen": 2.3948564529418945, |
| "logits/rejected": 2.432129144668579, |
| "logps/chosen": -0.2796666920185089, |
| "logps/rejected": -0.5052643418312073, |
| "loss": 1.036, |
| "nll_loss": 0.9777409434318542, |
| "rewards/accuracies": 0.6333333253860474, |
| "rewards/chosen": -0.027966666966676712, |
| "rewards/margins": 0.022559762001037598, |
| "rewards/rejected": -0.05052642896771431, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.6238447319778189, |
| "grad_norm": 1.4338502883911133, |
| "learning_rate": 3.547272727272727e-07, |
| "log_odds_chosen": 0.6986488699913025, |
| "log_odds_ratio": -0.5211442112922668, |
| "logits/chosen": 2.412381410598755, |
| "logits/rejected": 2.452411413192749, |
| "logps/chosen": -0.33079832792282104, |
| "logps/rejected": -0.5769501328468323, |
| "loss": 1.1057, |
| "nll_loss": 1.0535985231399536, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0330798365175724, |
| "rewards/margins": 0.024615177884697914, |
| "rewards/rejected": -0.057695016264915466, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.6307763401109058, |
| "grad_norm": 1.6581847667694092, |
| "learning_rate": 3.538181818181818e-07, |
| "log_odds_chosen": 0.8325883746147156, |
| "log_odds_ratio": -0.5279497504234314, |
| "logits/chosen": 2.3902318477630615, |
| "logits/rejected": 2.4334285259246826, |
| "logps/chosen": -0.2896474003791809, |
| "logps/rejected": -0.6175944805145264, |
| "loss": 1.0771, |
| "nll_loss": 1.024324893951416, |
| "rewards/accuracies": 0.7166666388511658, |
| "rewards/chosen": -0.028964735567569733, |
| "rewards/margins": 0.032794706523418427, |
| "rewards/rejected": -0.06175943836569786, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.6377079482439926, |
| "grad_norm": 1.2917609214782715, |
| "learning_rate": 3.529090909090909e-07, |
| "log_odds_chosen": 0.683183491230011, |
| "log_odds_ratio": -0.5794288516044617, |
| "logits/chosen": 2.442657709121704, |
| "logits/rejected": 2.49123477935791, |
| "logps/chosen": -0.3224933445453644, |
| "logps/rejected": -0.6132354736328125, |
| "loss": 1.1346, |
| "nll_loss": 1.0766867399215698, |
| "rewards/accuracies": 0.6499999761581421, |
| "rewards/chosen": -0.0322493314743042, |
| "rewards/margins": 0.02907421998679638, |
| "rewards/rejected": -0.06132354959845543, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.6446395563770795, |
| "grad_norm": 1.514763593673706, |
| "learning_rate": 3.52e-07, |
| "log_odds_chosen": 0.7153250575065613, |
| "log_odds_ratio": -0.5446946024894714, |
| "logits/chosen": 2.3731462955474854, |
| "logits/rejected": 2.4032256603240967, |
| "logps/chosen": -0.2961350679397583, |
| "logps/rejected": -0.5602670311927795, |
| "loss": 1.1165, |
| "nll_loss": 1.0620505809783936, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.029613511636853218, |
| "rewards/margins": 0.026413191109895706, |
| "rewards/rejected": -0.056026704609394073, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.6515711645101664, |
| "grad_norm": 1.3288161754608154, |
| "learning_rate": 3.5109090909090905e-07, |
| "log_odds_chosen": 0.6764271855354309, |
| "log_odds_ratio": -0.539259135723114, |
| "logits/chosen": 2.3467721939086914, |
| "logits/rejected": 2.384276866912842, |
| "logps/chosen": -0.28593122959136963, |
| "logps/rejected": -0.5482361912727356, |
| "loss": 1.0768, |
| "nll_loss": 1.0228937864303589, |
| "rewards/accuracies": 0.6833333373069763, |
| "rewards/chosen": -0.028593122959136963, |
| "rewards/margins": 0.026230497285723686, |
| "rewards/rejected": -0.0548236221075058, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.6585027726432532, |
| "grad_norm": 1.6036826372146606, |
| "learning_rate": 3.5018181818181817e-07, |
| "log_odds_chosen": 0.7218735218048096, |
| "log_odds_ratio": -0.5222643613815308, |
| "logits/chosen": 2.311798095703125, |
| "logits/rejected": 2.353884220123291, |
| "logps/chosen": -0.2681826055049896, |
| "logps/rejected": -0.5527848601341248, |
| "loss": 1.0583, |
| "nll_loss": 1.0060540437698364, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.026818258687853813, |
| "rewards/margins": 0.02846023067831993, |
| "rewards/rejected": -0.05527849122881889, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.6654343807763401, |
| "grad_norm": 1.6921563148498535, |
| "learning_rate": 3.4927272727272724e-07, |
| "log_odds_chosen": 0.7491247057914734, |
| "log_odds_ratio": -0.5005953907966614, |
| "logits/chosen": 2.384194850921631, |
| "logits/rejected": 2.4205212593078613, |
| "logps/chosen": -0.27753210067749023, |
| "logps/rejected": -0.5439023971557617, |
| "loss": 1.0796, |
| "nll_loss": 1.0295780897140503, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.027753213420510292, |
| "rewards/margins": 0.02663702890276909, |
| "rewards/rejected": -0.05439024046063423, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.672365988909427, |
| "grad_norm": 1.6062825918197632, |
| "learning_rate": 3.483636363636363e-07, |
| "log_odds_chosen": 0.8605387210845947, |
| "log_odds_ratio": -0.4727242887020111, |
| "logits/chosen": 2.392383098602295, |
| "logits/rejected": 2.4284956455230713, |
| "logps/chosen": -0.30633166432380676, |
| "logps/rejected": -0.6641873121261597, |
| "loss": 1.0927, |
| "nll_loss": 1.0453789234161377, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.030633168295025826, |
| "rewards/margins": 0.035785574465990067, |
| "rewards/rejected": -0.06641873717308044, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.6792975970425139, |
| "grad_norm": 1.4081010818481445, |
| "learning_rate": 3.4745454545454544e-07, |
| "log_odds_chosen": 0.7056547403335571, |
| "log_odds_ratio": -0.534669816493988, |
| "logits/chosen": 2.4164679050445557, |
| "logits/rejected": 2.4538962841033936, |
| "logps/chosen": -0.30063506960868835, |
| "logps/rejected": -0.566967248916626, |
| "loss": 1.0686, |
| "nll_loss": 1.015173077583313, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.030063504353165627, |
| "rewards/margins": 0.026633214205503464, |
| "rewards/rejected": -0.05669672042131424, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6862292051756007, |
| "grad_norm": 1.6677594184875488, |
| "learning_rate": 3.465454545454545e-07, |
| "log_odds_chosen": 0.6989320516586304, |
| "log_odds_ratio": -0.5302554368972778, |
| "logits/chosen": 2.3923895359039307, |
| "logits/rejected": 2.4322257041931152, |
| "logps/chosen": -0.3043942451477051, |
| "logps/rejected": -0.5767375826835632, |
| "loss": 1.1053, |
| "nll_loss": 1.052259922027588, |
| "rewards/accuracies": 0.7416666746139526, |
| "rewards/chosen": -0.030439427122473717, |
| "rewards/margins": 0.027234338223934174, |
| "rewards/rejected": -0.05767376720905304, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.6931608133086876, |
| "grad_norm": 2.065037727355957, |
| "learning_rate": 3.4563636363636363e-07, |
| "log_odds_chosen": 0.9193868041038513, |
| "log_odds_ratio": -0.47953858971595764, |
| "logits/chosen": 2.3140947818756104, |
| "logits/rejected": 2.370821952819824, |
| "logps/chosen": -0.26736411452293396, |
| "logps/rejected": -0.6239200830459595, |
| "loss": 1.1044, |
| "nll_loss": 1.0564467906951904, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.026736412197351456, |
| "rewards/margins": 0.03565559908747673, |
| "rewards/rejected": -0.06239200755953789, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.7000924214417745, |
| "grad_norm": 1.7896554470062256, |
| "learning_rate": 3.447272727272727e-07, |
| "log_odds_chosen": 1.0297752618789673, |
| "log_odds_ratio": -0.4404907822608948, |
| "logits/chosen": 2.414668560028076, |
| "logits/rejected": 2.4732654094696045, |
| "logps/chosen": -0.2793917953968048, |
| "logps/rejected": -0.6925608515739441, |
| "loss": 1.068, |
| "nll_loss": 1.0239418745040894, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.027939176186919212, |
| "rewards/margins": 0.04131689295172691, |
| "rewards/rejected": -0.06925607472658157, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.7070240295748613, |
| "grad_norm": 1.7647099494934082, |
| "learning_rate": 3.4381818181818177e-07, |
| "log_odds_chosen": 0.7891833782196045, |
| "log_odds_ratio": -0.5092800855636597, |
| "logits/chosen": 2.4179863929748535, |
| "logits/rejected": 2.4637575149536133, |
| "logps/chosen": -0.32941383123397827, |
| "logps/rejected": -0.6404102444648743, |
| "loss": 1.1317, |
| "nll_loss": 1.0808058977127075, |
| "rewards/accuracies": 0.7166666388511658, |
| "rewards/chosen": -0.032941386103630066, |
| "rewards/margins": 0.031099645420908928, |
| "rewards/rejected": -0.06404102593660355, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.7139556377079482, |
| "grad_norm": 1.6020543575286865, |
| "learning_rate": 3.429090909090909e-07, |
| "log_odds_chosen": 0.823983371257782, |
| "log_odds_ratio": -0.49682337045669556, |
| "logits/chosen": 2.2930688858032227, |
| "logits/rejected": 2.3509280681610107, |
| "logps/chosen": -0.2938121259212494, |
| "logps/rejected": -0.6020478010177612, |
| "loss": 1.0654, |
| "nll_loss": 1.015733003616333, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.02938121184706688, |
| "rewards/margins": 0.030823571607470512, |
| "rewards/rejected": -0.06020478159189224, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.7208872458410351, |
| "grad_norm": 1.902418851852417, |
| "learning_rate": 3.4199999999999997e-07, |
| "log_odds_chosen": 0.9842289090156555, |
| "log_odds_ratio": -0.46310731768608093, |
| "logits/chosen": 2.354994535446167, |
| "logits/rejected": 2.3915045261383057, |
| "logps/chosen": -0.27029862999916077, |
| "logps/rejected": -0.6681958436965942, |
| "loss": 1.0773, |
| "nll_loss": 1.030941367149353, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.027029862627387047, |
| "rewards/margins": 0.03978971764445305, |
| "rewards/rejected": -0.06681957095861435, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.727818853974122, |
| "grad_norm": 1.5766515731811523, |
| "learning_rate": 3.410909090909091e-07, |
| "log_odds_chosen": 0.7862997055053711, |
| "log_odds_ratio": -0.5025666952133179, |
| "logits/chosen": 2.351290464401245, |
| "logits/rejected": 2.3797943592071533, |
| "logps/chosen": -0.2873378396034241, |
| "logps/rejected": -0.5840609669685364, |
| "loss": 1.0972, |
| "nll_loss": 1.0469059944152832, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.028733786195516586, |
| "rewards/margins": 0.02967231348156929, |
| "rewards/rejected": -0.058406099677085876, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.7347504621072088, |
| "grad_norm": 1.9089113473892212, |
| "learning_rate": 3.4018181818181816e-07, |
| "log_odds_chosen": 1.0071905851364136, |
| "log_odds_ratio": -0.4409308433532715, |
| "logits/chosen": 2.36696720123291, |
| "logits/rejected": 2.4168639183044434, |
| "logps/chosen": -0.2714075744152069, |
| "logps/rejected": -0.6719208359718323, |
| "loss": 1.0857, |
| "nll_loss": 1.0415993928909302, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.02714076079428196, |
| "rewards/margins": 0.04005131870508194, |
| "rewards/rejected": -0.06719207763671875, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.7416820702402958, |
| "grad_norm": 1.6972178220748901, |
| "learning_rate": 3.3927272727272723e-07, |
| "log_odds_chosen": 0.7740481495857239, |
| "log_odds_ratio": -0.5107226967811584, |
| "logits/chosen": 2.373772382736206, |
| "logits/rejected": 2.415752410888672, |
| "logps/chosen": -0.30703625082969666, |
| "logps/rejected": -0.6409940123558044, |
| "loss": 1.1177, |
| "nll_loss": 1.066676378250122, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.030703624710440636, |
| "rewards/margins": 0.03339577093720436, |
| "rewards/rejected": -0.06409939378499985, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.7486136783733827, |
| "grad_norm": 1.3471928834915161, |
| "learning_rate": 3.3836363636363635e-07, |
| "log_odds_chosen": 0.8596405386924744, |
| "log_odds_ratio": -0.4774978458881378, |
| "logits/chosen": 2.286552906036377, |
| "logits/rejected": 2.335909128189087, |
| "logps/chosen": -0.28501778841018677, |
| "logps/rejected": -0.6287774443626404, |
| "loss": 1.0694, |
| "nll_loss": 1.0216971635818481, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.028501776978373528, |
| "rewards/margins": 0.03437596932053566, |
| "rewards/rejected": -0.06287775188684464, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.7555452865064695, |
| "grad_norm": 1.906703233718872, |
| "learning_rate": 3.374545454545454e-07, |
| "log_odds_chosen": 0.8641347885131836, |
| "log_odds_ratio": -0.5105417370796204, |
| "logits/chosen": 2.3575448989868164, |
| "logits/rejected": 2.396763801574707, |
| "logps/chosen": -0.26420578360557556, |
| "logps/rejected": -0.6220420002937317, |
| "loss": 1.0629, |
| "nll_loss": 1.0118043422698975, |
| "rewards/accuracies": 0.6916666626930237, |
| "rewards/chosen": -0.026420580223202705, |
| "rewards/margins": 0.03578362613916397, |
| "rewards/rejected": -0.06220419704914093, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.7624768946395564, |
| "grad_norm": 1.4510164260864258, |
| "learning_rate": 3.365454545454545e-07, |
| "log_odds_chosen": 0.777636706829071, |
| "log_odds_ratio": -0.5234912633895874, |
| "logits/chosen": 2.271596670150757, |
| "logits/rejected": 2.3241117000579834, |
| "logps/chosen": -0.313221275806427, |
| "logps/rejected": -0.6108809113502502, |
| "loss": 1.1234, |
| "nll_loss": 1.0710450410842896, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.03132212534546852, |
| "rewards/margins": 0.029765967279672623, |
| "rewards/rejected": -0.06108810007572174, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7694085027726433, |
| "grad_norm": 2.371877908706665, |
| "learning_rate": 3.356363636363636e-07, |
| "log_odds_chosen": 0.7746042013168335, |
| "log_odds_ratio": -0.5306139588356018, |
| "logits/chosen": 2.318974018096924, |
| "logits/rejected": 2.381847858428955, |
| "logps/chosen": -0.2922042906284332, |
| "logps/rejected": -0.6194152235984802, |
| "loss": 1.0591, |
| "nll_loss": 1.006041169166565, |
| "rewards/accuracies": 0.7333333492279053, |
| "rewards/chosen": -0.02922043204307556, |
| "rewards/margins": 0.032721079885959625, |
| "rewards/rejected": -0.061941519379615784, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.7763401109057301, |
| "grad_norm": 1.5632991790771484, |
| "learning_rate": 3.347272727272727e-07, |
| "log_odds_chosen": 0.9328292608261108, |
| "log_odds_ratio": -0.4911152124404907, |
| "logits/chosen": 2.3930165767669678, |
| "logits/rejected": 2.4501211643218994, |
| "logps/chosen": -0.33901265263557434, |
| "logps/rejected": -0.71490877866745, |
| "loss": 1.1406, |
| "nll_loss": 1.091480016708374, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.033901263028383255, |
| "rewards/margins": 0.03758960962295532, |
| "rewards/rejected": -0.07149087637662888, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.783271719038817, |
| "grad_norm": 1.698624849319458, |
| "learning_rate": 3.338181818181818e-07, |
| "log_odds_chosen": 0.8142465949058533, |
| "log_odds_ratio": -0.48792019486427307, |
| "logits/chosen": 2.2310545444488525, |
| "logits/rejected": 2.270510673522949, |
| "logps/chosen": -0.2853752076625824, |
| "logps/rejected": -0.5743341445922852, |
| "loss": 1.0228, |
| "nll_loss": 0.9739974737167358, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.02853752300143242, |
| "rewards/margins": 0.028895895928144455, |
| "rewards/rejected": -0.057433418929576874, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.7902033271719039, |
| "grad_norm": 1.6501067876815796, |
| "learning_rate": 3.329090909090909e-07, |
| "log_odds_chosen": 0.8794564008712769, |
| "log_odds_ratio": -0.47009655833244324, |
| "logits/chosen": 2.3232598304748535, |
| "logits/rejected": 2.363266944885254, |
| "logps/chosen": -0.33390435576438904, |
| "logps/rejected": -0.6623743772506714, |
| "loss": 1.1093, |
| "nll_loss": 1.0623211860656738, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.033390436321496964, |
| "rewards/margins": 0.032846998423337936, |
| "rewards/rejected": -0.0662374347448349, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7971349353049908, |
| "grad_norm": 1.9385813474655151, |
| "learning_rate": 3.3199999999999996e-07, |
| "log_odds_chosen": 0.8120476007461548, |
| "log_odds_ratio": -0.4858551323413849, |
| "logits/chosen": 2.2998452186584473, |
| "logits/rejected": 2.329951524734497, |
| "logps/chosen": -0.2809382975101471, |
| "logps/rejected": -0.5397506952285767, |
| "loss": 1.0673, |
| "nll_loss": 1.018762230873108, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.02809382788836956, |
| "rewards/margins": 0.025881236419081688, |
| "rewards/rejected": -0.05397506803274155, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.8040665434380776, |
| "grad_norm": 1.647004246711731, |
| "learning_rate": 3.310909090909091e-07, |
| "log_odds_chosen": 0.7569546103477478, |
| "log_odds_ratio": -0.5495377779006958, |
| "logits/chosen": 2.2851226329803467, |
| "logits/rejected": 2.3207554817199707, |
| "logps/chosen": -0.3422669768333435, |
| "logps/rejected": -0.6526975035667419, |
| "loss": 1.1354, |
| "nll_loss": 1.0804812908172607, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.03422669693827629, |
| "rewards/margins": 0.03104304149746895, |
| "rewards/rejected": -0.06526973843574524, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.8109981515711645, |
| "grad_norm": 1.3443453311920166, |
| "learning_rate": 3.3018181818181815e-07, |
| "log_odds_chosen": 0.7256454825401306, |
| "log_odds_ratio": -0.5355305075645447, |
| "logits/chosen": 2.308670997619629, |
| "logits/rejected": 2.348257541656494, |
| "logps/chosen": -0.3142472207546234, |
| "logps/rejected": -0.6030288338661194, |
| "loss": 1.1232, |
| "nll_loss": 1.069667935371399, |
| "rewards/accuracies": 0.7083333134651184, |
| "rewards/chosen": -0.03142471984028816, |
| "rewards/margins": 0.028878165408968925, |
| "rewards/rejected": -0.06030288711190224, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.8179297597042514, |
| "grad_norm": 1.7293723821640015, |
| "learning_rate": 3.2927272727272727e-07, |
| "log_odds_chosen": 0.7784165740013123, |
| "log_odds_ratio": -0.5222859978675842, |
| "logits/chosen": 2.2894599437713623, |
| "logits/rejected": 2.3367385864257812, |
| "logps/chosen": -0.29684463143348694, |
| "logps/rejected": -0.5837019085884094, |
| "loss": 1.0918, |
| "nll_loss": 1.0395236015319824, |
| "rewards/accuracies": 0.7416666746139526, |
| "rewards/chosen": -0.029684465378522873, |
| "rewards/margins": 0.02868572250008583, |
| "rewards/rejected": -0.058370187878608704, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.8248613678373382, |
| "grad_norm": 1.8093395233154297, |
| "learning_rate": 3.2836363636363634e-07, |
| "log_odds_chosen": 0.8844853043556213, |
| "log_odds_ratio": -0.4726443290710449, |
| "logits/chosen": 2.2617225646972656, |
| "logits/rejected": 2.3025357723236084, |
| "logps/chosen": -0.2791774868965149, |
| "logps/rejected": -0.6056706309318542, |
| "loss": 1.0658, |
| "nll_loss": 1.0185186862945557, |
| "rewards/accuracies": 0.7333333492279053, |
| "rewards/chosen": -0.02791774831712246, |
| "rewards/margins": 0.03264930844306946, |
| "rewards/rejected": -0.06056705862283707, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.8317929759704251, |
| "grad_norm": 1.8343292474746704, |
| "learning_rate": 3.274545454545454e-07, |
| "log_odds_chosen": 0.9979297518730164, |
| "log_odds_ratio": -0.44733384251594543, |
| "logits/chosen": 2.2689735889434814, |
| "logits/rejected": 2.30771803855896, |
| "logps/chosen": -0.29521337151527405, |
| "logps/rejected": -0.6864122748374939, |
| "loss": 1.0462, |
| "nll_loss": 1.0014839172363281, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.029521334916353226, |
| "rewards/margins": 0.039119891822338104, |
| "rewards/rejected": -0.06864122301340103, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.838724584103512, |
| "grad_norm": 1.5383275747299194, |
| "learning_rate": 3.2654545454545454e-07, |
| "log_odds_chosen": 1.0520718097686768, |
| "log_odds_ratio": -0.4258045554161072, |
| "logits/chosen": 2.2323710918426514, |
| "logits/rejected": 2.269280195236206, |
| "logps/chosen": -0.2652204632759094, |
| "logps/rejected": -0.6579357981681824, |
| "loss": 1.0456, |
| "nll_loss": 1.0029722452163696, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.026522040367126465, |
| "rewards/margins": 0.03927153721451759, |
| "rewards/rejected": -0.06579358130693436, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.8456561922365989, |
| "grad_norm": 1.7920851707458496, |
| "learning_rate": 3.256363636363636e-07, |
| "log_odds_chosen": 0.9857064485549927, |
| "log_odds_ratio": -0.4479914903640747, |
| "logits/chosen": 2.2189059257507324, |
| "logits/rejected": 2.2568318843841553, |
| "logps/chosen": -0.27704155445098877, |
| "logps/rejected": -0.6286638975143433, |
| "loss": 1.0787, |
| "nll_loss": 1.0338690280914307, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.027704155072569847, |
| "rewards/margins": 0.03516223281621933, |
| "rewards/rejected": -0.06286638975143433, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.8525878003696857, |
| "grad_norm": 1.4790568351745605, |
| "learning_rate": 3.247272727272727e-07, |
| "log_odds_chosen": 0.841203510761261, |
| "log_odds_ratio": -0.47351616621017456, |
| "logits/chosen": 2.2627458572387695, |
| "logits/rejected": 2.304532051086426, |
| "logps/chosen": -0.30810025334358215, |
| "logps/rejected": -0.6228964328765869, |
| "loss": 1.0887, |
| "nll_loss": 1.041351079940796, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.030810019001364708, |
| "rewards/margins": 0.031479619443416595, |
| "rewards/rejected": -0.06228964403271675, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.8595194085027726, |
| "grad_norm": 1.640016794204712, |
| "learning_rate": 3.238181818181818e-07, |
| "log_odds_chosen": 0.8392209410667419, |
| "log_odds_ratio": -0.47920557856559753, |
| "logits/chosen": 2.241325855255127, |
| "logits/rejected": 2.3104820251464844, |
| "logps/chosen": -0.24823182821273804, |
| "logps/rejected": -0.5455386638641357, |
| "loss": 0.9989, |
| "nll_loss": 0.950990617275238, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.024823185056447983, |
| "rewards/margins": 0.029730679467320442, |
| "rewards/rejected": -0.054553862661123276, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.8664510166358595, |
| "grad_norm": 1.6642489433288574, |
| "learning_rate": 3.229090909090909e-07, |
| "log_odds_chosen": 0.8546110987663269, |
| "log_odds_ratio": -0.5026638507843018, |
| "logits/chosen": 2.2081406116485596, |
| "logits/rejected": 2.243947744369507, |
| "logps/chosen": -0.29789215326309204, |
| "logps/rejected": -0.630996584892273, |
| "loss": 1.0533, |
| "nll_loss": 1.0030620098114014, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.029789213091135025, |
| "rewards/margins": 0.03331044688820839, |
| "rewards/rejected": -0.06309965997934341, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.8733826247689463, |
| "grad_norm": 1.5830177068710327, |
| "learning_rate": 3.22e-07, |
| "log_odds_chosen": 0.8428265452384949, |
| "log_odds_ratio": -0.5204115509986877, |
| "logits/chosen": 2.2851126194000244, |
| "logits/rejected": 2.326284646987915, |
| "logps/chosen": -0.3049141466617584, |
| "logps/rejected": -0.6031973361968994, |
| "loss": 1.0701, |
| "nll_loss": 1.018021821975708, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.030491415411233902, |
| "rewards/margins": 0.029828311875462532, |
| "rewards/rejected": -0.060319721698760986, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.8803142329020333, |
| "grad_norm": 1.658988118171692, |
| "learning_rate": 3.2109090909090907e-07, |
| "log_odds_chosen": 0.9268123507499695, |
| "log_odds_ratio": -0.5015169978141785, |
| "logits/chosen": 2.2516047954559326, |
| "logits/rejected": 2.307269811630249, |
| "logps/chosen": -0.2988060712814331, |
| "logps/rejected": -0.6871820092201233, |
| "loss": 1.0584, |
| "nll_loss": 1.0082836151123047, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.029880603775382042, |
| "rewards/margins": 0.03883758932352066, |
| "rewards/rejected": -0.06871819496154785, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.8872458410351202, |
| "grad_norm": 3.0365874767303467, |
| "learning_rate": 3.2018181818181814e-07, |
| "log_odds_chosen": 0.8886032700538635, |
| "log_odds_ratio": -0.4523109197616577, |
| "logits/chosen": 2.2199485301971436, |
| "logits/rejected": 2.2670233249664307, |
| "logps/chosen": -0.25624144077301025, |
| "logps/rejected": -0.5575699210166931, |
| "loss": 1.0108, |
| "nll_loss": 0.965610146522522, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.025624146685004234, |
| "rewards/margins": 0.030132848769426346, |
| "rewards/rejected": -0.05575699731707573, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8941774491682071, |
| "grad_norm": 1.6926220655441284, |
| "learning_rate": 3.1927272727272726e-07, |
| "log_odds_chosen": 0.9891830682754517, |
| "log_odds_ratio": -0.4355195462703705, |
| "logits/chosen": 2.2216622829437256, |
| "logits/rejected": 2.275890350341797, |
| "logps/chosen": -0.276635080575943, |
| "logps/rejected": -0.6514686942100525, |
| "loss": 1.0596, |
| "nll_loss": 1.0160428285598755, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.02766350656747818, |
| "rewards/margins": 0.03748335689306259, |
| "rewards/rejected": -0.06514687836170197, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.9011090573012939, |
| "grad_norm": 1.4911339282989502, |
| "learning_rate": 3.1836363636363633e-07, |
| "log_odds_chosen": 0.9020595550537109, |
| "log_odds_ratio": -0.48418372869491577, |
| "logits/chosen": 2.2785911560058594, |
| "logits/rejected": 2.312934160232544, |
| "logps/chosen": -0.31726518273353577, |
| "logps/rejected": -0.6841873526573181, |
| "loss": 1.05, |
| "nll_loss": 1.0015556812286377, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.031726520508527756, |
| "rewards/margins": 0.036692213267087936, |
| "rewards/rejected": -0.06841873377561569, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.9080406654343808, |
| "grad_norm": 2.0379440784454346, |
| "learning_rate": 3.174545454545454e-07, |
| "log_odds_chosen": 1.1423557996749878, |
| "log_odds_ratio": -0.4178314507007599, |
| "logits/chosen": 2.3556437492370605, |
| "logits/rejected": 2.4005048274993896, |
| "logps/chosen": -0.2667427659034729, |
| "logps/rejected": -0.742853581905365, |
| "loss": 1.0941, |
| "nll_loss": 1.052323818206787, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.02667427621781826, |
| "rewards/margins": 0.04761108011007309, |
| "rewards/rejected": -0.0742853581905365, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.9149722735674677, |
| "grad_norm": 1.3288828134536743, |
| "learning_rate": 3.1654545454545453e-07, |
| "log_odds_chosen": 0.9340334534645081, |
| "log_odds_ratio": -0.48560333251953125, |
| "logits/chosen": 2.2910754680633545, |
| "logits/rejected": 2.3387868404388428, |
| "logps/chosen": -0.3347066640853882, |
| "logps/rejected": -0.6973811388015747, |
| "loss": 1.0676, |
| "nll_loss": 1.0190544128417969, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.033470671623945236, |
| "rewards/margins": 0.036267444491386414, |
| "rewards/rejected": -0.06973811984062195, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.9219038817005545, |
| "grad_norm": 1.6985810995101929, |
| "learning_rate": 3.156363636363636e-07, |
| "log_odds_chosen": 0.9894071221351624, |
| "log_odds_ratio": -0.4461503326892853, |
| "logits/chosen": 2.2392578125, |
| "logits/rejected": 2.283933162689209, |
| "logps/chosen": -0.2434028834104538, |
| "logps/rejected": -0.5999926328659058, |
| "loss": 1.0296, |
| "nll_loss": 0.9849395751953125, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.02434029057621956, |
| "rewards/margins": 0.03565897420048714, |
| "rewards/rejected": -0.059999268501996994, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.9288354898336414, |
| "grad_norm": 1.6689305305480957, |
| "learning_rate": 3.147272727272727e-07, |
| "log_odds_chosen": 0.7809682488441467, |
| "log_odds_ratio": -0.5143331289291382, |
| "logits/chosen": 2.245195150375366, |
| "logits/rejected": 2.2930848598480225, |
| "logps/chosen": -0.313152015209198, |
| "logps/rejected": -0.590879499912262, |
| "loss": 1.0569, |
| "nll_loss": 1.0054324865341187, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.03131520375609398, |
| "rewards/margins": 0.027772750705480576, |
| "rewards/rejected": -0.059087950736284256, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.9357670979667283, |
| "grad_norm": 1.883654236793518, |
| "learning_rate": 3.138181818181818e-07, |
| "log_odds_chosen": 0.8892870545387268, |
| "log_odds_ratio": -0.4877478778362274, |
| "logits/chosen": 2.2379813194274902, |
| "logits/rejected": 2.3097243309020996, |
| "logps/chosen": -0.24917837977409363, |
| "logps/rejected": -0.5756833553314209, |
| "loss": 1.0534, |
| "nll_loss": 1.0045883655548096, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.024917839094996452, |
| "rewards/margins": 0.03265049681067467, |
| "rewards/rejected": -0.05756833776831627, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.9426987060998152, |
| "grad_norm": 2.1263558864593506, |
| "learning_rate": 3.1290909090909086e-07, |
| "log_odds_chosen": 0.9385001063346863, |
| "log_odds_ratio": -0.485267698764801, |
| "logits/chosen": 2.2864134311676025, |
| "logits/rejected": 2.3468661308288574, |
| "logps/chosen": -0.3098216950893402, |
| "logps/rejected": -0.6490688323974609, |
| "loss": 1.0503, |
| "nll_loss": 1.001733660697937, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.03098217211663723, |
| "rewards/margins": 0.033924710005521774, |
| "rewards/rejected": -0.06490688025951385, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.949630314232902, |
| "grad_norm": 1.8792967796325684, |
| "learning_rate": 3.12e-07, |
| "log_odds_chosen": 0.6588108539581299, |
| "log_odds_ratio": -0.5568161010742188, |
| "logits/chosen": 2.2107622623443604, |
| "logits/rejected": 2.2423746585845947, |
| "logps/chosen": -0.3359021544456482, |
| "logps/rejected": -0.5799560546875, |
| "loss": 1.0822, |
| "nll_loss": 1.0264886617660522, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.03359021618962288, |
| "rewards/margins": 0.024405384436249733, |
| "rewards/rejected": -0.05799560621380806, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.9565619223659889, |
| "grad_norm": 1.4165992736816406, |
| "learning_rate": 3.1109090909090906e-07, |
| "log_odds_chosen": 0.9096938371658325, |
| "log_odds_ratio": -0.4722006916999817, |
| "logits/chosen": 2.235501527786255, |
| "logits/rejected": 2.2762210369110107, |
| "logps/chosen": -0.29770374298095703, |
| "logps/rejected": -0.6392419934272766, |
| "loss": 1.0716, |
| "nll_loss": 1.024424433708191, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.0297703817486763, |
| "rewards/margins": 0.03415382280945778, |
| "rewards/rejected": -0.06392420083284378, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.9634935304990758, |
| "grad_norm": 2.6423442363739014, |
| "learning_rate": 3.101818181818182e-07, |
| "log_odds_chosen": 0.8946338295936584, |
| "log_odds_ratio": -0.4573117196559906, |
| "logits/chosen": 2.3018126487731934, |
| "logits/rejected": 2.3378913402557373, |
| "logps/chosen": -0.2610825002193451, |
| "logps/rejected": -0.5758386850357056, |
| "loss": 1.0243, |
| "nll_loss": 0.9785677790641785, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.026108253747224808, |
| "rewards/margins": 0.03147561475634575, |
| "rewards/rejected": -0.05758386105298996, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.9704251386321626, |
| "grad_norm": 1.6327714920043945, |
| "learning_rate": 3.0927272727272725e-07, |
| "log_odds_chosen": 0.8046802878379822, |
| "log_odds_ratio": -0.5016317367553711, |
| "logits/chosen": 2.166358470916748, |
| "logits/rejected": 2.2131805419921875, |
| "logps/chosen": -0.27325597405433655, |
| "logps/rejected": -0.559944748878479, |
| "loss": 1.0823, |
| "nll_loss": 1.0321154594421387, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.027325598523020744, |
| "rewards/margins": 0.028668878600001335, |
| "rewards/rejected": -0.05599447339773178, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9773567467652495, |
| "grad_norm": 1.5271565914154053, |
| "learning_rate": 3.083636363636363e-07, |
| "log_odds_chosen": 0.9641692638397217, |
| "log_odds_ratio": -0.4276140630245209, |
| "logits/chosen": 2.2330405712127686, |
| "logits/rejected": 2.275627374649048, |
| "logps/chosen": -0.2772447168827057, |
| "logps/rejected": -0.6339874267578125, |
| "loss": 1.0827, |
| "nll_loss": 1.039945125579834, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.027724474668502808, |
| "rewards/margins": 0.03567427024245262, |
| "rewards/rejected": -0.06339874863624573, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.9842883548983364, |
| "grad_norm": 1.9477697610855103, |
| "learning_rate": 3.0745454545454545e-07, |
| "log_odds_chosen": 0.9072960019111633, |
| "log_odds_ratio": -0.49298056960105896, |
| "logits/chosen": 2.2244527339935303, |
| "logits/rejected": 2.2482240200042725, |
| "logps/chosen": -0.274914413690567, |
| "logps/rejected": -0.6292871832847595, |
| "loss": 1.0557, |
| "nll_loss": 1.0063644647598267, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.02749144285917282, |
| "rewards/margins": 0.03543727472424507, |
| "rewards/rejected": -0.0629287138581276, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.9912199630314233, |
| "grad_norm": 14.025252342224121, |
| "learning_rate": 3.065454545454545e-07, |
| "log_odds_chosen": 0.9216189980506897, |
| "log_odds_ratio": -0.5028970837593079, |
| "logits/chosen": 2.2144951820373535, |
| "logits/rejected": 2.24345326423645, |
| "logps/chosen": -0.281184583902359, |
| "logps/rejected": -0.6528930068016052, |
| "loss": 1.0402, |
| "nll_loss": 0.9899436831474304, |
| "rewards/accuracies": 0.7333333492279053, |
| "rewards/chosen": -0.02811845950782299, |
| "rewards/margins": 0.03717083856463432, |
| "rewards/rejected": -0.06528931111097336, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.9981515711645101, |
| "grad_norm": 1.4665298461914062, |
| "learning_rate": 3.056363636363636e-07, |
| "log_odds_chosen": 1.1137733459472656, |
| "log_odds_ratio": -0.4105357825756073, |
| "logits/chosen": 2.2789127826690674, |
| "logits/rejected": 2.3198835849761963, |
| "logps/chosen": -0.2862909138202667, |
| "logps/rejected": -0.725393533706665, |
| "loss": 1.0414, |
| "nll_loss": 1.0003019571304321, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.028629092499613762, |
| "rewards/margins": 0.04391026496887207, |
| "rewards/rejected": -0.07253936678171158, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.0041589648798521, |
| "grad_norm": 2.1046509742736816, |
| "learning_rate": 3.047272727272727e-07, |
| "log_odds_chosen": 0.8841580152511597, |
| "log_odds_ratio": -0.5193544626235962, |
| "logits/chosen": 2.157961368560791, |
| "logits/rejected": 2.214885711669922, |
| "logps/chosen": -0.31194257736206055, |
| "logps/rejected": -0.6325222849845886, |
| "loss": 0.9397, |
| "nll_loss": 1.0297856330871582, |
| "rewards/accuracies": 0.6891025900840759, |
| "rewards/chosen": -0.031194258481264114, |
| "rewards/margins": 0.03205796703696251, |
| "rewards/rejected": -0.06325222551822662, |
| "step": 725 |
| }, |
| { |
| "epoch": 1.011090573012939, |
| "grad_norm": 11.973499298095703, |
| "learning_rate": 3.038181818181818e-07, |
| "log_odds_chosen": 1.0592764616012573, |
| "log_odds_ratio": -0.42826417088508606, |
| "logits/chosen": 2.2182023525238037, |
| "logits/rejected": 2.259822368621826, |
| "logps/chosen": -0.27322566509246826, |
| "logps/rejected": -0.6173789501190186, |
| "loss": 1.0566, |
| "nll_loss": 1.0138195753097534, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.027322567999362946, |
| "rewards/margins": 0.03441532701253891, |
| "rewards/rejected": -0.061737895011901855, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.018022181146026, |
| "grad_norm": 1.6032253503799438, |
| "learning_rate": 3.029090909090909e-07, |
| "log_odds_chosen": 1.0015116930007935, |
| "log_odds_ratio": -0.45987558364868164, |
| "logits/chosen": 2.2573957443237305, |
| "logits/rejected": 2.302398920059204, |
| "logps/chosen": -0.29264530539512634, |
| "logps/rejected": -0.7004638910293579, |
| "loss": 1.0733, |
| "nll_loss": 1.0272737741470337, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.029264533892273903, |
| "rewards/margins": 0.04078185185790062, |
| "rewards/rejected": -0.07004638761281967, |
| "step": 735 |
| }, |
| { |
| "epoch": 1.0249537892791127, |
| "grad_norm": 1.9156720638275146, |
| "learning_rate": 3.02e-07, |
| "log_odds_chosen": 0.9316055178642273, |
| "log_odds_ratio": -0.49356502294540405, |
| "logits/chosen": 2.2086682319641113, |
| "logits/rejected": 2.247258424758911, |
| "logps/chosen": -0.2785702645778656, |
| "logps/rejected": -0.6358006000518799, |
| "loss": 1.0579, |
| "nll_loss": 1.0085234642028809, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.02785702608525753, |
| "rewards/margins": 0.035723041743040085, |
| "rewards/rejected": -0.06358006596565247, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.0318853974121995, |
| "grad_norm": 1.8676711320877075, |
| "learning_rate": 3.0109090909090905e-07, |
| "log_odds_chosen": 1.0930769443511963, |
| "log_odds_ratio": -0.41506868600845337, |
| "logits/chosen": 2.1967382431030273, |
| "logits/rejected": 2.2548134326934814, |
| "logps/chosen": -0.2859395146369934, |
| "logps/rejected": -0.6978200078010559, |
| "loss": 1.0064, |
| "nll_loss": 0.964898407459259, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.02859395369887352, |
| "rewards/margins": 0.04118805751204491, |
| "rewards/rejected": -0.06978200376033783, |
| "step": 745 |
| }, |
| { |
| "epoch": 1.0388170055452866, |
| "grad_norm": 1.7424761056900024, |
| "learning_rate": 3.0018181818181817e-07, |
| "log_odds_chosen": 1.030094027519226, |
| "log_odds_ratio": -0.45161333680152893, |
| "logits/chosen": 2.171247959136963, |
| "logits/rejected": 2.210944652557373, |
| "logps/chosen": -0.2976371645927429, |
| "logps/rejected": -0.6693560481071472, |
| "loss": 1.0341, |
| "nll_loss": 0.9889503717422485, |
| "rewards/accuracies": 0.7583333253860474, |
| "rewards/chosen": -0.02976371720433235, |
| "rewards/margins": 0.03717188537120819, |
| "rewards/rejected": -0.06693560630083084, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.0457486136783734, |
| "grad_norm": 1.6317884922027588, |
| "learning_rate": 2.9927272727272724e-07, |
| "log_odds_chosen": 1.2374706268310547, |
| "log_odds_ratio": -0.3697361350059509, |
| "logits/chosen": 2.271786689758301, |
| "logits/rejected": 2.3244924545288086, |
| "logps/chosen": -0.27134397625923157, |
| "logps/rejected": -0.7432472109794617, |
| "loss": 1.0338, |
| "nll_loss": 0.996829628944397, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.02713439241051674, |
| "rewards/margins": 0.04719032719731331, |
| "rewards/rejected": -0.07432472705841064, |
| "step": 755 |
| }, |
| { |
| "epoch": 1.0526802218114604, |
| "grad_norm": 1.9266161918640137, |
| "learning_rate": 2.983636363636363e-07, |
| "log_odds_chosen": 1.1042684316635132, |
| "log_odds_ratio": -0.42497718334198, |
| "logits/chosen": 2.178818464279175, |
| "logits/rejected": 2.243793487548828, |
| "logps/chosen": -0.23215913772583008, |
| "logps/rejected": -0.6292255520820618, |
| "loss": 1.0091, |
| "nll_loss": 0.9665910601615906, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.023215916007757187, |
| "rewards/margins": 0.03970663994550705, |
| "rewards/rejected": -0.06292255967855453, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.0596118299445472, |
| "grad_norm": 2.3465418815612793, |
| "learning_rate": 2.9745454545454544e-07, |
| "log_odds_chosen": 1.1358228921890259, |
| "log_odds_ratio": -0.43412578105926514, |
| "logits/chosen": 2.157498598098755, |
| "logits/rejected": 2.2107603549957275, |
| "logps/chosen": -0.29064181447029114, |
| "logps/rejected": -0.6974590420722961, |
| "loss": 1.0562, |
| "nll_loss": 1.0127959251403809, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.029064182192087173, |
| "rewards/margins": 0.04068171977996826, |
| "rewards/rejected": -0.06974589824676514, |
| "step": 765 |
| }, |
| { |
| "epoch": 1.066543438077634, |
| "grad_norm": 1.337546944618225, |
| "learning_rate": 2.965454545454545e-07, |
| "log_odds_chosen": 1.1235884428024292, |
| "log_odds_ratio": -0.4154122769832611, |
| "logits/chosen": 2.1831068992614746, |
| "logits/rejected": 2.230762243270874, |
| "logps/chosen": -0.25542908906936646, |
| "logps/rejected": -0.6585070490837097, |
| "loss": 1.0557, |
| "nll_loss": 1.0141483545303345, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.025542909279465675, |
| "rewards/margins": 0.04030779376626015, |
| "rewards/rejected": -0.06585069000720978, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.073475046210721, |
| "grad_norm": 1.92345130443573, |
| "learning_rate": 2.9563636363636363e-07, |
| "log_odds_chosen": 1.2442071437835693, |
| "log_odds_ratio": -0.3937591016292572, |
| "logits/chosen": 2.1876795291900635, |
| "logits/rejected": 2.237107515335083, |
| "logps/chosen": -0.2693581283092499, |
| "logps/rejected": -0.7542040944099426, |
| "loss": 1.0156, |
| "nll_loss": 0.9762417078018188, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.026935815811157227, |
| "rewards/margins": 0.04848460480570793, |
| "rewards/rejected": -0.07542040199041367, |
| "step": 775 |
| }, |
| { |
| "epoch": 1.0804066543438078, |
| "grad_norm": 1.3390204906463623, |
| "learning_rate": 2.947272727272727e-07, |
| "log_odds_chosen": 1.0398173332214355, |
| "log_odds_ratio": -0.429106205701828, |
| "logits/chosen": 2.1649744510650635, |
| "logits/rejected": 2.2123680114746094, |
| "logps/chosen": -0.27507466077804565, |
| "logps/rejected": -0.6559757590293884, |
| "loss": 1.0672, |
| "nll_loss": 1.0242794752120972, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.027507467195391655, |
| "rewards/margins": 0.038090117275714874, |
| "rewards/rejected": -0.06559757888317108, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.0873382624768946, |
| "grad_norm": 1.570081114768982, |
| "learning_rate": 2.9381818181818177e-07, |
| "log_odds_chosen": 0.9334086775779724, |
| "log_odds_ratio": -0.4862407147884369, |
| "logits/chosen": 2.119239330291748, |
| "logits/rejected": 2.1761107444763184, |
| "logps/chosen": -0.25805288553237915, |
| "logps/rejected": -0.6043078303337097, |
| "loss": 1.0514, |
| "nll_loss": 1.002801537513733, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.025805287063121796, |
| "rewards/margins": 0.034625496715307236, |
| "rewards/rejected": -0.06043078005313873, |
| "step": 785 |
| }, |
| { |
| "epoch": 1.0942698706099816, |
| "grad_norm": 2.047260284423828, |
| "learning_rate": 2.929090909090909e-07, |
| "log_odds_chosen": 1.1341291666030884, |
| "log_odds_ratio": -0.4764944314956665, |
| "logits/chosen": 2.219109296798706, |
| "logits/rejected": 2.278170347213745, |
| "logps/chosen": -0.3056103587150574, |
| "logps/rejected": -0.7312763333320618, |
| "loss": 1.0227, |
| "nll_loss": 0.9750102162361145, |
| "rewards/accuracies": 0.7749999761581421, |
| "rewards/chosen": -0.030561033636331558, |
| "rewards/margins": 0.04256659746170044, |
| "rewards/rejected": -0.0731276348233223, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.1012014787430684, |
| "grad_norm": 1.683436393737793, |
| "learning_rate": 2.9199999999999997e-07, |
| "log_odds_chosen": 1.1937026977539062, |
| "log_odds_ratio": -0.41498705744743347, |
| "logits/chosen": 2.276967763900757, |
| "logits/rejected": 2.3187286853790283, |
| "logps/chosen": -0.2915228605270386, |
| "logps/rejected": -0.7586190700531006, |
| "loss": 1.0585, |
| "nll_loss": 1.0169990062713623, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.0291522815823555, |
| "rewards/margins": 0.04670962318778038, |
| "rewards/rejected": -0.07586190849542618, |
| "step": 795 |
| }, |
| { |
| "epoch": 1.1081330868761552, |
| "grad_norm": 1.2493406534194946, |
| "learning_rate": 2.910909090909091e-07, |
| "log_odds_chosen": 1.2343627214431763, |
| "log_odds_ratio": -0.3899300992488861, |
| "logits/chosen": 2.1663858890533447, |
| "logits/rejected": 2.2325944900512695, |
| "logps/chosen": -0.2753217816352844, |
| "logps/rejected": -0.7316860556602478, |
| "loss": 1.0388, |
| "nll_loss": 0.999790370464325, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02753218077123165, |
| "rewards/margins": 0.04563641548156738, |
| "rewards/rejected": -0.07316859811544418, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.1150646950092422, |
| "grad_norm": 1.5523159503936768, |
| "learning_rate": 2.9018181818181816e-07, |
| "log_odds_chosen": 1.0665152072906494, |
| "log_odds_ratio": -0.45254915952682495, |
| "logits/chosen": 2.245769739151001, |
| "logits/rejected": 2.2844347953796387, |
| "logps/chosen": -0.31260156631469727, |
| "logps/rejected": -0.7254413366317749, |
| "loss": 1.0384, |
| "nll_loss": 0.9930953979492188, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.031260158866643906, |
| "rewards/margins": 0.041283976286649704, |
| "rewards/rejected": -0.07254412770271301, |
| "step": 805 |
| }, |
| { |
| "epoch": 1.121996303142329, |
| "grad_norm": 2.6294355392456055, |
| "learning_rate": 2.8927272727272723e-07, |
| "log_odds_chosen": 1.2369199991226196, |
| "log_odds_ratio": -0.42075902223587036, |
| "logits/chosen": 2.0666658878326416, |
| "logits/rejected": 2.124197244644165, |
| "logps/chosen": -0.2580902874469757, |
| "logps/rejected": -0.705311119556427, |
| "loss": 1.0688, |
| "nll_loss": 1.0266811847686768, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.02580902725458145, |
| "rewards/margins": 0.04472209885716438, |
| "rewards/rejected": -0.07053112238645554, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.1289279112754158, |
| "grad_norm": 2.0441761016845703, |
| "learning_rate": 2.8836363636363636e-07, |
| "log_odds_chosen": 1.0299813747406006, |
| "log_odds_ratio": -0.48719069361686707, |
| "logits/chosen": 2.1046738624572754, |
| "logits/rejected": 2.1503727436065674, |
| "logps/chosen": -0.2982068955898285, |
| "logps/rejected": -0.6966476440429688, |
| "loss": 1.0879, |
| "nll_loss": 1.0392258167266846, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.029820691794157028, |
| "rewards/margins": 0.039844077080488205, |
| "rewards/rejected": -0.06966476142406464, |
| "step": 815 |
| }, |
| { |
| "epoch": 1.1358595194085028, |
| "grad_norm": 1.5556236505508423, |
| "learning_rate": 2.8745454545454543e-07, |
| "log_odds_chosen": 1.2123700380325317, |
| "log_odds_ratio": -0.4055812954902649, |
| "logits/chosen": 2.209066867828369, |
| "logits/rejected": 2.2755773067474365, |
| "logps/chosen": -0.28011006116867065, |
| "logps/rejected": -0.7493889331817627, |
| "loss": 1.0693, |
| "nll_loss": 1.0287730693817139, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.028011005371809006, |
| "rewards/margins": 0.046927884221076965, |
| "rewards/rejected": -0.07493889331817627, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.1427911275415896, |
| "grad_norm": 1.8499306440353394, |
| "learning_rate": 2.865454545454545e-07, |
| "log_odds_chosen": 1.2569739818572998, |
| "log_odds_ratio": -0.4136119782924652, |
| "logits/chosen": 2.1816537380218506, |
| "logits/rejected": 2.23928165435791, |
| "logps/chosen": -0.2742965817451477, |
| "logps/rejected": -0.7915823459625244, |
| "loss": 1.0642, |
| "nll_loss": 1.0228677988052368, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.02742965891957283, |
| "rewards/margins": 0.051728587597608566, |
| "rewards/rejected": -0.0791582390666008, |
| "step": 825 |
| }, |
| { |
| "epoch": 1.1497227356746764, |
| "grad_norm": 1.7029317617416382, |
| "learning_rate": 2.856363636363636e-07, |
| "log_odds_chosen": 1.1728360652923584, |
| "log_odds_ratio": -0.4236757159233093, |
| "logits/chosen": 2.178098201751709, |
| "logits/rejected": 2.2561495304107666, |
| "logps/chosen": -0.26117414236068726, |
| "logps/rejected": -0.7411549091339111, |
| "loss": 1.0474, |
| "nll_loss": 1.0050232410430908, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.026117417961359024, |
| "rewards/margins": 0.04799807071685791, |
| "rewards/rejected": -0.07411548495292664, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.1566543438077634, |
| "grad_norm": 2.336233615875244, |
| "learning_rate": 2.847272727272727e-07, |
| "log_odds_chosen": 1.2286179065704346, |
| "log_odds_ratio": -0.41043874621391296, |
| "logits/chosen": 2.171309232711792, |
| "logits/rejected": 2.2093799114227295, |
| "logps/chosen": -0.2653755843639374, |
| "logps/rejected": -0.7861889600753784, |
| "loss": 1.062, |
| "nll_loss": 1.0209335088729858, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.026537559926509857, |
| "rewards/margins": 0.052081331610679626, |
| "rewards/rejected": -0.07861888408660889, |
| "step": 835 |
| }, |
| { |
| "epoch": 1.1635859519408502, |
| "grad_norm": 1.7186654806137085, |
| "learning_rate": 2.838181818181818e-07, |
| "log_odds_chosen": 1.1606539487838745, |
| "log_odds_ratio": -0.40123751759529114, |
| "logits/chosen": 2.206134796142578, |
| "logits/rejected": 2.2637779712677, |
| "logps/chosen": -0.2596302628517151, |
| "logps/rejected": -0.7078793048858643, |
| "loss": 1.0382, |
| "nll_loss": 0.9980748891830444, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.02596302516758442, |
| "rewards/margins": 0.04482491686940193, |
| "rewards/rejected": -0.0707879364490509, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.1705175600739373, |
| "grad_norm": 2.3376824855804443, |
| "learning_rate": 2.829090909090909e-07, |
| "log_odds_chosen": 0.9114227890968323, |
| "log_odds_ratio": -0.510475754737854, |
| "logits/chosen": 2.1974329948425293, |
| "logits/rejected": 2.257779359817505, |
| "logps/chosen": -0.3180769979953766, |
| "logps/rejected": -0.6727191805839539, |
| "loss": 1.056, |
| "nll_loss": 1.0049461126327515, |
| "rewards/accuracies": 0.7333333492279053, |
| "rewards/chosen": -0.03180769830942154, |
| "rewards/margins": 0.035464223474264145, |
| "rewards/rejected": -0.06727192550897598, |
| "step": 845 |
| }, |
| { |
| "epoch": 1.177449168207024, |
| "grad_norm": 1.9338550567626953, |
| "learning_rate": 2.8199999999999996e-07, |
| "log_odds_chosen": 1.1852858066558838, |
| "log_odds_ratio": -0.40160685777664185, |
| "logits/chosen": 2.1158764362335205, |
| "logits/rejected": 2.1807785034179688, |
| "logps/chosen": -0.3050893247127533, |
| "logps/rejected": -0.7781674861907959, |
| "loss": 1.023, |
| "nll_loss": 0.9828112125396729, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.030508937314152718, |
| "rewards/margins": 0.047307804226875305, |
| "rewards/rejected": -0.07781673967838287, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.1843807763401109, |
| "grad_norm": 32.405643463134766, |
| "learning_rate": 2.810909090909091e-07, |
| "log_odds_chosen": 1.2816615104675293, |
| "log_odds_ratio": -0.38976308703422546, |
| "logits/chosen": 2.062394142150879, |
| "logits/rejected": 2.130361557006836, |
| "logps/chosen": -0.27238449454307556, |
| "logps/rejected": -0.7614350914955139, |
| "loss": 1.0719, |
| "nll_loss": 1.032881498336792, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.027238452807068825, |
| "rewards/margins": 0.048905063420534134, |
| "rewards/rejected": -0.07614351063966751, |
| "step": 855 |
| }, |
| { |
| "epoch": 1.1913123844731979, |
| "grad_norm": 1.5311826467514038, |
| "learning_rate": 2.8018181818181815e-07, |
| "log_odds_chosen": 1.1707886457443237, |
| "log_odds_ratio": -0.42907601594924927, |
| "logits/chosen": 2.1916935443878174, |
| "logits/rejected": 2.255692481994629, |
| "logps/chosen": -0.31618261337280273, |
| "logps/rejected": -0.7682264447212219, |
| "loss": 1.0343, |
| "nll_loss": 0.9914371967315674, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.031618259847164154, |
| "rewards/margins": 0.04520439729094505, |
| "rewards/rejected": -0.07682265341281891, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.1982439926062847, |
| "grad_norm": 1.3627372980117798, |
| "learning_rate": 2.792727272727273e-07, |
| "log_odds_chosen": 1.2053844928741455, |
| "log_odds_ratio": -0.39541786909103394, |
| "logits/chosen": 2.108999490737915, |
| "logits/rejected": 2.1568171977996826, |
| "logps/chosen": -0.2578326463699341, |
| "logps/rejected": -0.6993592381477356, |
| "loss": 1.0029, |
| "nll_loss": 0.9633194804191589, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.025783265009522438, |
| "rewards/margins": 0.04415265843272209, |
| "rewards/rejected": -0.06993592530488968, |
| "step": 865 |
| }, |
| { |
| "epoch": 1.2051756007393715, |
| "grad_norm": 2.1243605613708496, |
| "learning_rate": 2.7836363636363635e-07, |
| "log_odds_chosen": 1.1114146709442139, |
| "log_odds_ratio": -0.44650015234947205, |
| "logits/chosen": 2.155231237411499, |
| "logits/rejected": 2.2109177112579346, |
| "logps/chosen": -0.30871832370758057, |
| "logps/rejected": -0.7572067379951477, |
| "loss": 1.0634, |
| "nll_loss": 1.018787145614624, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.030871832743287086, |
| "rewards/margins": 0.04484884440898895, |
| "rewards/rejected": -0.07572067528963089, |
| "step": 870 |
| }, |
| { |
| "epoch": 1.2121072088724585, |
| "grad_norm": 1.8302316665649414, |
| "learning_rate": 2.774545454545454e-07, |
| "log_odds_chosen": 1.195847749710083, |
| "log_odds_ratio": -0.45233553647994995, |
| "logits/chosen": 2.2495977878570557, |
| "logits/rejected": 2.3004953861236572, |
| "logps/chosen": -0.32949963212013245, |
| "logps/rejected": -0.8411144614219666, |
| "loss": 1.0578, |
| "nll_loss": 1.0125887393951416, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.03294995799660683, |
| "rewards/margins": 0.05116148665547371, |
| "rewards/rejected": -0.08411144465208054, |
| "step": 875 |
| }, |
| { |
| "epoch": 1.2190388170055453, |
| "grad_norm": 1.7341769933700562, |
| "learning_rate": 2.7654545454545454e-07, |
| "log_odds_chosen": 1.0370428562164307, |
| "log_odds_ratio": -0.4735434949398041, |
| "logits/chosen": 2.110995292663574, |
| "logits/rejected": 2.166097402572632, |
| "logps/chosen": -0.321010559797287, |
| "logps/rejected": -0.7727290391921997, |
| "loss": 1.0489, |
| "nll_loss": 1.0015724897384644, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.03210105374455452, |
| "rewards/margins": 0.045171838253736496, |
| "rewards/rejected": -0.07727289199829102, |
| "step": 880 |
| }, |
| { |
| "epoch": 1.225970425138632, |
| "grad_norm": 1.5535844564437866, |
| "learning_rate": 2.756363636363636e-07, |
| "log_odds_chosen": 1.0898783206939697, |
| "log_odds_ratio": -0.42453181743621826, |
| "logits/chosen": 2.1117665767669678, |
| "logits/rejected": 2.1755316257476807, |
| "logps/chosen": -0.27768510580062866, |
| "logps/rejected": -0.6975895166397095, |
| "loss": 1.0298, |
| "nll_loss": 0.9873270392417908, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.027768509462475777, |
| "rewards/margins": 0.04199044778943062, |
| "rewards/rejected": -0.06975895911455154, |
| "step": 885 |
| }, |
| { |
| "epoch": 1.232902033271719, |
| "grad_norm": 2.520540237426758, |
| "learning_rate": 2.747272727272727e-07, |
| "log_odds_chosen": 1.0620572566986084, |
| "log_odds_ratio": -0.44835466146469116, |
| "logits/chosen": 2.088442802429199, |
| "logits/rejected": 2.1342625617980957, |
| "logps/chosen": -0.31703558564186096, |
| "logps/rejected": -0.6918619871139526, |
| "loss": 1.0714, |
| "nll_loss": 1.0265547037124634, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.031703557819128036, |
| "rewards/margins": 0.03748263791203499, |
| "rewards/rejected": -0.06918619573116302, |
| "step": 890 |
| }, |
| { |
| "epoch": 1.239833641404806, |
| "grad_norm": 1.8474420309066772, |
| "learning_rate": 2.738181818181818e-07, |
| "log_odds_chosen": 1.2523150444030762, |
| "log_odds_ratio": -0.38173770904541016, |
| "logits/chosen": 2.1438181400299072, |
| "logits/rejected": 2.1918885707855225, |
| "logps/chosen": -0.2869779169559479, |
| "logps/rejected": -0.8195567727088928, |
| "loss": 1.0427, |
| "nll_loss": 1.004526138305664, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.028697794303297997, |
| "rewards/margins": 0.05325789004564285, |
| "rewards/rejected": -0.0819556713104248, |
| "step": 895 |
| }, |
| { |
| "epoch": 1.2467652495378927, |
| "grad_norm": 1.8705800771713257, |
| "learning_rate": 2.729090909090909e-07, |
| "log_odds_chosen": 1.3002660274505615, |
| "log_odds_ratio": -0.3636976182460785, |
| "logits/chosen": 2.1178719997406006, |
| "logits/rejected": 2.189141035079956, |
| "logps/chosen": -0.25350457429885864, |
| "logps/rejected": -0.7615570425987244, |
| "loss": 1.0187, |
| "nll_loss": 0.9823279976844788, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.025350457057356834, |
| "rewards/margins": 0.05080525204539299, |
| "rewards/rejected": -0.07615570724010468, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.2536968576709797, |
| "grad_norm": 1.3865669965744019, |
| "learning_rate": 2.72e-07, |
| "log_odds_chosen": 1.2055052518844604, |
| "log_odds_ratio": -0.3909756541252136, |
| "logits/chosen": 2.1472742557525635, |
| "logits/rejected": 2.194523572921753, |
| "logps/chosen": -0.26772162318229675, |
| "logps/rejected": -0.7685297727584839, |
| "loss": 1.0522, |
| "nll_loss": 1.0131094455718994, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.026772161945700645, |
| "rewards/margins": 0.05008082091808319, |
| "rewards/rejected": -0.07685296982526779, |
| "step": 905 |
| }, |
| { |
| "epoch": 1.2606284658040665, |
| "grad_norm": 1.4818207025527954, |
| "learning_rate": 2.7109090909090907e-07, |
| "log_odds_chosen": 1.338388204574585, |
| "log_odds_ratio": -0.3422391712665558, |
| "logits/chosen": 2.08438777923584, |
| "logits/rejected": 2.1425976753234863, |
| "logps/chosen": -0.28000956773757935, |
| "logps/rejected": -0.7683375477790833, |
| "loss": 1.0637, |
| "nll_loss": 1.0295100212097168, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.028000956401228905, |
| "rewards/margins": 0.04883280023932457, |
| "rewards/rejected": -0.07683374732732773, |
| "step": 910 |
| }, |
| { |
| "epoch": 1.2675600739371533, |
| "grad_norm": 1.9989374876022339, |
| "learning_rate": 2.7018181818181814e-07, |
| "log_odds_chosen": 1.1596630811691284, |
| "log_odds_ratio": -0.46093183755874634, |
| "logits/chosen": 2.104174852371216, |
| "logits/rejected": 2.1529417037963867, |
| "logps/chosen": -0.30684390664100647, |
| "logps/rejected": -0.7890374660491943, |
| "loss": 1.0142, |
| "nll_loss": 0.9680600762367249, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.03068438731133938, |
| "rewards/margins": 0.048219338059425354, |
| "rewards/rejected": -0.07890374213457108, |
| "step": 915 |
| }, |
| { |
| "epoch": 1.2744916820702403, |
| "grad_norm": 1.6794359683990479, |
| "learning_rate": 2.6927272727272727e-07, |
| "log_odds_chosen": 1.4304643869400024, |
| "log_odds_ratio": -0.38649657368659973, |
| "logits/chosen": 2.1374399662017822, |
| "logits/rejected": 2.1798601150512695, |
| "logps/chosen": -0.29669511318206787, |
| "logps/rejected": -0.8554088473320007, |
| "loss": 1.0687, |
| "nll_loss": 1.0300294160842896, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.029669513925909996, |
| "rewards/margins": 0.05587137117981911, |
| "rewards/rejected": -0.08554088324308395, |
| "step": 920 |
| }, |
| { |
| "epoch": 1.2814232902033271, |
| "grad_norm": 1.981391429901123, |
| "learning_rate": 2.6836363636363634e-07, |
| "log_odds_chosen": 1.2685843706130981, |
| "log_odds_ratio": -0.41906872391700745, |
| "logits/chosen": 2.2779479026794434, |
| "logits/rejected": 2.3425650596618652, |
| "logps/chosen": -0.3056505024433136, |
| "logps/rejected": -0.8699617981910706, |
| "loss": 1.0141, |
| "nll_loss": 0.9721490144729614, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.03056504763662815, |
| "rewards/margins": 0.056431129574775696, |
| "rewards/rejected": -0.0869961753487587, |
| "step": 925 |
| }, |
| { |
| "epoch": 1.2883548983364141, |
| "grad_norm": 2.3423352241516113, |
| "learning_rate": 2.674545454545454e-07, |
| "log_odds_chosen": 1.137675404548645, |
| "log_odds_ratio": -0.4286971390247345, |
| "logits/chosen": 2.1223714351654053, |
| "logits/rejected": 2.1788086891174316, |
| "logps/chosen": -0.28890591859817505, |
| "logps/rejected": -0.7336766719818115, |
| "loss": 1.0341, |
| "nll_loss": 0.9912530779838562, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.028890585526823997, |
| "rewards/margins": 0.04447708651423454, |
| "rewards/rejected": -0.07336767762899399, |
| "step": 930 |
| }, |
| { |
| "epoch": 1.295286506469501, |
| "grad_norm": 1.893749475479126, |
| "learning_rate": 2.6654545454545453e-07, |
| "log_odds_chosen": 1.4332900047302246, |
| "log_odds_ratio": -0.34831517934799194, |
| "logits/chosen": 2.1003878116607666, |
| "logits/rejected": 2.1722400188446045, |
| "logps/chosen": -0.28711190819740295, |
| "logps/rejected": -0.8249975442886353, |
| "loss": 1.0661, |
| "nll_loss": 1.0313143730163574, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.028711196035146713, |
| "rewards/margins": 0.05378856882452965, |
| "rewards/rejected": -0.08249974995851517, |
| "step": 935 |
| }, |
| { |
| "epoch": 1.3022181146025877, |
| "grad_norm": 4.6435160636901855, |
| "learning_rate": 2.656363636363636e-07, |
| "log_odds_chosen": 1.082852840423584, |
| "log_odds_ratio": -0.45315298438072205, |
| "logits/chosen": 2.1574742794036865, |
| "logits/rejected": 2.178946018218994, |
| "logps/chosen": -0.3037134110927582, |
| "logps/rejected": -0.7448738813400269, |
| "loss": 1.0704, |
| "nll_loss": 1.0250810384750366, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.03037133812904358, |
| "rewards/margins": 0.0441160574555397, |
| "rewards/rejected": -0.07448740303516388, |
| "step": 940 |
| }, |
| { |
| "epoch": 1.3091497227356748, |
| "grad_norm": 2.1025171279907227, |
| "learning_rate": 2.647272727272727e-07, |
| "log_odds_chosen": 1.1002755165100098, |
| "log_odds_ratio": -0.44053414463996887, |
| "logits/chosen": 2.106156826019287, |
| "logits/rejected": 2.155453681945801, |
| "logps/chosen": -0.2461674064397812, |
| "logps/rejected": -0.6956557035446167, |
| "loss": 1.0449, |
| "nll_loss": 1.0008207559585571, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.024616742506623268, |
| "rewards/margins": 0.04494882747530937, |
| "rewards/rejected": -0.06956557184457779, |
| "step": 945 |
| }, |
| { |
| "epoch": 1.3160813308687616, |
| "grad_norm": 1.3751192092895508, |
| "learning_rate": 2.638181818181818e-07, |
| "log_odds_chosen": 1.303594708442688, |
| "log_odds_ratio": -0.36508041620254517, |
| "logits/chosen": 2.1502010822296143, |
| "logits/rejected": 2.213773012161255, |
| "logps/chosen": -0.24462252855300903, |
| "logps/rejected": -0.7531419396400452, |
| "loss": 1.0002, |
| "nll_loss": 0.9637396931648254, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.024462254717946053, |
| "rewards/margins": 0.05085194110870361, |
| "rewards/rejected": -0.07531419396400452, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.3230129390018484, |
| "grad_norm": 2.228548526763916, |
| "learning_rate": 2.6290909090909087e-07, |
| "log_odds_chosen": 1.1054414510726929, |
| "log_odds_ratio": -0.4313698410987854, |
| "logits/chosen": 2.054419755935669, |
| "logits/rejected": 2.1415534019470215, |
| "logps/chosen": -0.25074058771133423, |
| "logps/rejected": -0.6487269997596741, |
| "loss": 0.9808, |
| "nll_loss": 0.9376189708709717, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.025074057281017303, |
| "rewards/margins": 0.039798639714717865, |
| "rewards/rejected": -0.06487269699573517, |
| "step": 955 |
| }, |
| { |
| "epoch": 1.3299445471349354, |
| "grad_norm": 2.61733078956604, |
| "learning_rate": 2.62e-07, |
| "log_odds_chosen": 1.2382240295410156, |
| "log_odds_ratio": -0.42497238516807556, |
| "logits/chosen": 2.222075939178467, |
| "logits/rejected": 2.268425464630127, |
| "logps/chosen": -0.3022395372390747, |
| "logps/rejected": -0.819779634475708, |
| "loss": 1.1007, |
| "nll_loss": 1.0581555366516113, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.03022395819425583, |
| "rewards/margins": 0.05175400897860527, |
| "rewards/rejected": -0.0819779708981514, |
| "step": 960 |
| }, |
| { |
| "epoch": 1.3368761552680222, |
| "grad_norm": 1.3807116746902466, |
| "learning_rate": 2.6109090909090906e-07, |
| "log_odds_chosen": 1.2055879831314087, |
| "log_odds_ratio": -0.3867366909980774, |
| "logits/chosen": 2.1033875942230225, |
| "logits/rejected": 2.1540472507476807, |
| "logps/chosen": -0.277556449174881, |
| "logps/rejected": -0.7209326028823853, |
| "loss": 1.0018, |
| "nll_loss": 0.9630894064903259, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.027755646035075188, |
| "rewards/margins": 0.044337622821331024, |
| "rewards/rejected": -0.07209326326847076, |
| "step": 965 |
| }, |
| { |
| "epoch": 1.343807763401109, |
| "grad_norm": 2.0066561698913574, |
| "learning_rate": 2.601818181818182e-07, |
| "log_odds_chosen": 1.3411426544189453, |
| "log_odds_ratio": -0.4055359959602356, |
| "logits/chosen": 2.0890884399414062, |
| "logits/rejected": 2.154545783996582, |
| "logps/chosen": -0.3011336922645569, |
| "logps/rejected": -0.8289684057235718, |
| "loss": 1.0136, |
| "nll_loss": 0.9730068445205688, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.03011336922645569, |
| "rewards/margins": 0.05278347432613373, |
| "rewards/rejected": -0.08289684355258942, |
| "step": 970 |
| }, |
| { |
| "epoch": 1.350739371534196, |
| "grad_norm": 1.9893730878829956, |
| "learning_rate": 2.5927272727272726e-07, |
| "log_odds_chosen": 1.1508934497833252, |
| "log_odds_ratio": -0.4049813449382782, |
| "logits/chosen": 2.1374075412750244, |
| "logits/rejected": 2.1761152744293213, |
| "logps/chosen": -0.3276708424091339, |
| "logps/rejected": -0.7975314259529114, |
| "loss": 1.0488, |
| "nll_loss": 1.0082674026489258, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.03276708349585533, |
| "rewards/margins": 0.04698607698082924, |
| "rewards/rejected": -0.07975315302610397, |
| "step": 975 |
| }, |
| { |
| "epoch": 1.3576709796672828, |
| "grad_norm": 2.1329009532928467, |
| "learning_rate": 2.583636363636363e-07, |
| "log_odds_chosen": 1.4169307947158813, |
| "log_odds_ratio": -0.4349002242088318, |
| "logits/chosen": 2.0836265087127686, |
| "logits/rejected": 2.1221506595611572, |
| "logps/chosen": -0.28608009219169617, |
| "logps/rejected": -0.8973101377487183, |
| "loss": 1.043, |
| "nll_loss": 0.9995481967926025, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.028608011081814766, |
| "rewards/margins": 0.06112300232052803, |
| "rewards/rejected": -0.08973101526498795, |
| "step": 980 |
| }, |
| { |
| "epoch": 1.3646025878003698, |
| "grad_norm": 1.5113952159881592, |
| "learning_rate": 2.5745454545454545e-07, |
| "log_odds_chosen": 1.0963186025619507, |
| "log_odds_ratio": -0.45471566915512085, |
| "logits/chosen": 2.0918946266174316, |
| "logits/rejected": 2.147059917449951, |
| "logps/chosen": -0.2852664589881897, |
| "logps/rejected": -0.6635777354240417, |
| "loss": 1.0257, |
| "nll_loss": 0.9802023768424988, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.02852664329111576, |
| "rewards/margins": 0.037831127643585205, |
| "rewards/rejected": -0.06635776907205582, |
| "step": 985 |
| }, |
| { |
| "epoch": 1.3715341959334566, |
| "grad_norm": 1.2862117290496826, |
| "learning_rate": 2.565454545454545e-07, |
| "log_odds_chosen": 1.2140270471572876, |
| "log_odds_ratio": -0.37888041138648987, |
| "logits/chosen": 2.191817283630371, |
| "logits/rejected": 2.248182773590088, |
| "logps/chosen": -0.3066679537296295, |
| "logps/rejected": -0.775726854801178, |
| "loss": 1.0404, |
| "nll_loss": 1.0024964809417725, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.030666792765259743, |
| "rewards/margins": 0.046905890107154846, |
| "rewards/rejected": -0.07757268846035004, |
| "step": 990 |
| }, |
| { |
| "epoch": 1.3784658040665434, |
| "grad_norm": 2.1814792156219482, |
| "learning_rate": 2.556363636363636e-07, |
| "log_odds_chosen": 1.1626993417739868, |
| "log_odds_ratio": -0.421124666929245, |
| "logits/chosen": 2.1899914741516113, |
| "logits/rejected": 2.2058660984039307, |
| "logps/chosen": -0.3344000279903412, |
| "logps/rejected": -0.812175989151001, |
| "loss": 1.0606, |
| "nll_loss": 1.0184708833694458, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.0334400050342083, |
| "rewards/margins": 0.04777759313583374, |
| "rewards/rejected": -0.08121760189533234, |
| "step": 995 |
| }, |
| { |
| "epoch": 1.3853974121996302, |
| "grad_norm": 1.2583403587341309, |
| "learning_rate": 2.547272727272727e-07, |
| "log_odds_chosen": 1.4967857599258423, |
| "log_odds_ratio": -0.32969042658805847, |
| "logits/chosen": 2.146193027496338, |
| "logits/rejected": 2.210885763168335, |
| "logps/chosen": -0.2802196443080902, |
| "logps/rejected": -0.9464040994644165, |
| "loss": 0.9995, |
| "nll_loss": 0.9665043354034424, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02802196517586708, |
| "rewards/margins": 0.0666184350848198, |
| "rewards/rejected": -0.09464039653539658, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.3923290203327172, |
| "grad_norm": 3.5290753841400146, |
| "learning_rate": 2.538181818181818e-07, |
| "log_odds_chosen": 1.478700041770935, |
| "log_odds_ratio": -0.36008498072624207, |
| "logits/chosen": 2.2012546062469482, |
| "logits/rejected": 2.257498264312744, |
| "logps/chosen": -0.2387988120317459, |
| "logps/rejected": -0.8150947690010071, |
| "loss": 1.0503, |
| "nll_loss": 1.0142549276351929, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.023879878222942352, |
| "rewards/margins": 0.057629600167274475, |
| "rewards/rejected": -0.08150947839021683, |
| "step": 1005 |
| }, |
| { |
| "epoch": 1.399260628465804, |
| "grad_norm": 2.2447733879089355, |
| "learning_rate": 2.529090909090909e-07, |
| "log_odds_chosen": 1.5088070631027222, |
| "log_odds_ratio": -0.37234050035476685, |
| "logits/chosen": 2.131213665008545, |
| "logits/rejected": 2.2011141777038574, |
| "logps/chosen": -0.30126506090164185, |
| "logps/rejected": -0.9823321104049683, |
| "loss": 1.0395, |
| "nll_loss": 1.0022485256195068, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.030126502737402916, |
| "rewards/margins": 0.06810670346021652, |
| "rewards/rejected": -0.09823321551084518, |
| "step": 1010 |
| }, |
| { |
| "epoch": 1.406192236598891, |
| "grad_norm": 1.6832704544067383, |
| "learning_rate": 2.52e-07, |
| "log_odds_chosen": 1.3786380290985107, |
| "log_odds_ratio": -0.3612533509731293, |
| "logits/chosen": 2.1688296794891357, |
| "logits/rejected": 2.2276947498321533, |
| "logps/chosen": -0.26425522565841675, |
| "logps/rejected": -0.7921401858329773, |
| "loss": 1.0578, |
| "nll_loss": 1.0216939449310303, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.026425523683428764, |
| "rewards/margins": 0.05278850346803665, |
| "rewards/rejected": -0.07921402156352997, |
| "step": 1015 |
| }, |
| { |
| "epoch": 1.4131238447319778, |
| "grad_norm": 1.8064701557159424, |
| "learning_rate": 2.5109090909090905e-07, |
| "log_odds_chosen": 1.475263237953186, |
| "log_odds_ratio": -0.3918589651584625, |
| "logits/chosen": 2.1581435203552246, |
| "logits/rejected": 2.197493314743042, |
| "logps/chosen": -0.3150491416454315, |
| "logps/rejected": -0.9940579533576965, |
| "loss": 1.0716, |
| "nll_loss": 1.032424807548523, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.03150491416454315, |
| "rewards/margins": 0.0679008811712265, |
| "rewards/rejected": -0.09940580278635025, |
| "step": 1020 |
| }, |
| { |
| "epoch": 1.4200554528650646, |
| "grad_norm": 3.116750478744507, |
| "learning_rate": 2.501818181818182e-07, |
| "log_odds_chosen": 1.4267123937606812, |
| "log_odds_ratio": -0.3581138253211975, |
| "logits/chosen": 2.150437593460083, |
| "logits/rejected": 2.196103572845459, |
| "logps/chosen": -0.3007969856262207, |
| "logps/rejected": -0.9089770913124084, |
| "loss": 1.0489, |
| "nll_loss": 1.0131365060806274, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.030079694464802742, |
| "rewards/margins": 0.060818012803792953, |
| "rewards/rejected": -0.09089770913124084, |
| "step": 1025 |
| }, |
| { |
| "epoch": 1.4269870609981516, |
| "grad_norm": 1.498563528060913, |
| "learning_rate": 2.4927272727272725e-07, |
| "log_odds_chosen": 1.207962155342102, |
| "log_odds_ratio": -0.44308769702911377, |
| "logits/chosen": 2.086268663406372, |
| "logits/rejected": 2.1397366523742676, |
| "logps/chosen": -0.2770783007144928, |
| "logps/rejected": -0.7369803786277771, |
| "loss": 0.9999, |
| "nll_loss": 0.9556010961532593, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.02770783193409443, |
| "rewards/margins": 0.04599021375179291, |
| "rewards/rejected": -0.07369804382324219, |
| "step": 1030 |
| }, |
| { |
| "epoch": 1.4339186691312384, |
| "grad_norm": 2.2361621856689453, |
| "learning_rate": 2.483636363636363e-07, |
| "log_odds_chosen": 1.3191566467285156, |
| "log_odds_ratio": -0.40445128083229065, |
| "logits/chosen": 2.138350486755371, |
| "logits/rejected": 2.201185703277588, |
| "logps/chosen": -0.29545170068740845, |
| "logps/rejected": -0.824712872505188, |
| "loss": 1.0544, |
| "nll_loss": 1.0139575004577637, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.029545169323682785, |
| "rewards/margins": 0.05292612686753273, |
| "rewards/rejected": -0.08247129619121552, |
| "step": 1035 |
| }, |
| { |
| "epoch": 1.4408502772643252, |
| "grad_norm": 1.6298840045928955, |
| "learning_rate": 2.4745454545454544e-07, |
| "log_odds_chosen": 1.2173666954040527, |
| "log_odds_ratio": -0.39109528064727783, |
| "logits/chosen": 2.0655899047851562, |
| "logits/rejected": 2.1055774688720703, |
| "logps/chosen": -0.28332966566085815, |
| "logps/rejected": -0.7577340006828308, |
| "loss": 1.0551, |
| "nll_loss": 1.015963077545166, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.028332972899079323, |
| "rewards/margins": 0.04744042083621025, |
| "rewards/rejected": -0.07577338814735413, |
| "step": 1040 |
| }, |
| { |
| "epoch": 1.4477818853974123, |
| "grad_norm": 1.9538235664367676, |
| "learning_rate": 2.465454545454545e-07, |
| "log_odds_chosen": 1.4704244136810303, |
| "log_odds_ratio": -0.36091259121894836, |
| "logits/chosen": 2.067735195159912, |
| "logits/rejected": 2.107760190963745, |
| "logps/chosen": -0.24208252131938934, |
| "logps/rejected": -0.8322712779045105, |
| "loss": 1.0402, |
| "nll_loss": 1.0041333436965942, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.024208255112171173, |
| "rewards/margins": 0.05901888757944107, |
| "rewards/rejected": -0.08322712779045105, |
| "step": 1045 |
| }, |
| { |
| "epoch": 1.454713493530499, |
| "grad_norm": 2.375593900680542, |
| "learning_rate": 2.4563636363636363e-07, |
| "log_odds_chosen": 1.342626690864563, |
| "log_odds_ratio": -0.3853161633014679, |
| "logits/chosen": 2.0571084022521973, |
| "logits/rejected": 2.118323802947998, |
| "logps/chosen": -0.2818390429019928, |
| "logps/rejected": -0.8502424955368042, |
| "loss": 1.0375, |
| "nll_loss": 0.9989607930183411, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.02818390727043152, |
| "rewards/margins": 0.05684033781290054, |
| "rewards/rejected": -0.08502423763275146, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.4616451016635859, |
| "grad_norm": 3.926017999649048, |
| "learning_rate": 2.447272727272727e-07, |
| "log_odds_chosen": 1.2430663108825684, |
| "log_odds_ratio": -0.4533371925354004, |
| "logits/chosen": 2.024120569229126, |
| "logits/rejected": 2.0624420642852783, |
| "logps/chosen": -0.26999524235725403, |
| "logps/rejected": -0.7484342455863953, |
| "loss": 1.029, |
| "nll_loss": 0.9836971759796143, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.026999525725841522, |
| "rewards/margins": 0.04784390702843666, |
| "rewards/rejected": -0.07484342157840729, |
| "step": 1055 |
| }, |
| { |
| "epoch": 1.4685767097966729, |
| "grad_norm": 2.118476152420044, |
| "learning_rate": 2.438181818181818e-07, |
| "log_odds_chosen": 0.9853528738021851, |
| "log_odds_ratio": -0.4883633553981781, |
| "logits/chosen": 2.0680394172668457, |
| "logits/rejected": 2.136178493499756, |
| "logps/chosen": -0.3353304862976074, |
| "logps/rejected": -0.7483987808227539, |
| "loss": 1.0975, |
| "nll_loss": 1.0486379861831665, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.033533044159412384, |
| "rewards/margins": 0.04130683094263077, |
| "rewards/rejected": -0.07483987510204315, |
| "step": 1060 |
| }, |
| { |
| "epoch": 1.4755083179297597, |
| "grad_norm": 2.533656120300293, |
| "learning_rate": 2.429090909090909e-07, |
| "log_odds_chosen": 0.9804785847663879, |
| "log_odds_ratio": -0.4883616864681244, |
| "logits/chosen": 2.097592353820801, |
| "logits/rejected": 2.1535866260528564, |
| "logps/chosen": -0.29354825615882874, |
| "logps/rejected": -0.7082226872444153, |
| "loss": 1.0215, |
| "nll_loss": 0.9726455807685852, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.02935483120381832, |
| "rewards/margins": 0.041467439383268356, |
| "rewards/rejected": -0.07082226872444153, |
| "step": 1065 |
| }, |
| { |
| "epoch": 1.4824399260628467, |
| "grad_norm": 1.7187951803207397, |
| "learning_rate": 2.4199999999999997e-07, |
| "log_odds_chosen": 1.2794216871261597, |
| "log_odds_ratio": -0.3667431175708771, |
| "logits/chosen": 2.010540246963501, |
| "logits/rejected": 2.082904815673828, |
| "logps/chosen": -0.25798189640045166, |
| "logps/rejected": -0.7230808138847351, |
| "loss": 1.0057, |
| "nll_loss": 0.9690180420875549, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.025798192247748375, |
| "rewards/margins": 0.046509888023138046, |
| "rewards/rejected": -0.07230808585882187, |
| "step": 1070 |
| }, |
| { |
| "epoch": 1.4893715341959335, |
| "grad_norm": 2.982020616531372, |
| "learning_rate": 2.410909090909091e-07, |
| "log_odds_chosen": 1.1678146123886108, |
| "log_odds_ratio": -0.41319549083709717, |
| "logits/chosen": 2.1234309673309326, |
| "logits/rejected": 2.17087459564209, |
| "logps/chosen": -0.32161521911621094, |
| "logps/rejected": -0.8085314035415649, |
| "loss": 1.0905, |
| "nll_loss": 1.0492180585861206, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.032161518931388855, |
| "rewards/margins": 0.04869161546230316, |
| "rewards/rejected": -0.08085312694311142, |
| "step": 1075 |
| }, |
| { |
| "epoch": 1.4963031423290203, |
| "grad_norm": 2.0627431869506836, |
| "learning_rate": 2.4018181818181816e-07, |
| "log_odds_chosen": 1.1932671070098877, |
| "log_odds_ratio": -0.45415419340133667, |
| "logits/chosen": 2.096099615097046, |
| "logits/rejected": 2.167684316635132, |
| "logps/chosen": -0.2884353697299957, |
| "logps/rejected": -0.8226889371871948, |
| "loss": 1.0148, |
| "nll_loss": 0.9693484902381897, |
| "rewards/accuracies": 0.7416666746139526, |
| "rewards/chosen": -0.028843533247709274, |
| "rewards/margins": 0.0534253753721714, |
| "rewards/rejected": -0.08226890861988068, |
| "step": 1080 |
| }, |
| { |
| "epoch": 1.503234750462107, |
| "grad_norm": 2.2353203296661377, |
| "learning_rate": 2.3927272727272724e-07, |
| "log_odds_chosen": 1.2742195129394531, |
| "log_odds_ratio": -0.41592225432395935, |
| "logits/chosen": 2.007319450378418, |
| "logits/rejected": 2.0866177082061768, |
| "logps/chosen": -0.28505003452301025, |
| "logps/rejected": -0.8023856282234192, |
| "loss": 1.0211, |
| "nll_loss": 0.9794998168945312, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.028505001217126846, |
| "rewards/margins": 0.05173356831073761, |
| "rewards/rejected": -0.08023856580257416, |
| "step": 1085 |
| }, |
| { |
| "epoch": 1.510166358595194, |
| "grad_norm": 2.5934882164001465, |
| "learning_rate": 2.3836363636363636e-07, |
| "log_odds_chosen": 1.4275834560394287, |
| "log_odds_ratio": -0.3598444163799286, |
| "logits/chosen": 2.1860811710357666, |
| "logits/rejected": 2.234570264816284, |
| "logps/chosen": -0.2805514633655548, |
| "logps/rejected": -0.867975652217865, |
| "loss": 1.0418, |
| "nll_loss": 1.0058512687683105, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.028055142611265182, |
| "rewards/margins": 0.058742426335811615, |
| "rewards/rejected": -0.0867975726723671, |
| "step": 1090 |
| }, |
| { |
| "epoch": 1.5170979667282811, |
| "grad_norm": 3.790891408920288, |
| "learning_rate": 2.3745454545454543e-07, |
| "log_odds_chosen": 1.4269194602966309, |
| "log_odds_ratio": -0.3880935609340668, |
| "logits/chosen": 2.081702470779419, |
| "logits/rejected": 2.1460609436035156, |
| "logps/chosen": -0.2924844026565552, |
| "logps/rejected": -0.8836368322372437, |
| "loss": 1.0421, |
| "nll_loss": 1.0032716989517212, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.029248446226119995, |
| "rewards/margins": 0.05911524221301079, |
| "rewards/rejected": -0.08836368471384048, |
| "step": 1095 |
| }, |
| { |
| "epoch": 1.524029574861368, |
| "grad_norm": 1.444333553314209, |
| "learning_rate": 2.3654545454545453e-07, |
| "log_odds_chosen": 1.5300582647323608, |
| "log_odds_ratio": -0.3157467842102051, |
| "logits/chosen": 2.065735340118408, |
| "logits/rejected": 2.124572992324829, |
| "logps/chosen": -0.25401103496551514, |
| "logps/rejected": -0.870669960975647, |
| "loss": 1.0242, |
| "nll_loss": 0.9926338791847229, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02540110982954502, |
| "rewards/margins": 0.061665892601013184, |
| "rewards/rejected": -0.08706699311733246, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.5309611829944547, |
| "grad_norm": 1.328914999961853, |
| "learning_rate": 2.3563636363636362e-07, |
| "log_odds_chosen": 1.5849844217300415, |
| "log_odds_ratio": -0.3574371933937073, |
| "logits/chosen": 2.0881130695343018, |
| "logits/rejected": 2.132666826248169, |
| "logps/chosen": -0.297654926776886, |
| "logps/rejected": -0.9698154330253601, |
| "loss": 1.0571, |
| "nll_loss": 1.021361231803894, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.029765494167804718, |
| "rewards/margins": 0.06721605360507965, |
| "rewards/rejected": -0.09698154032230377, |
| "step": 1105 |
| }, |
| { |
| "epoch": 1.5378927911275415, |
| "grad_norm": 3.064085006713867, |
| "learning_rate": 2.347272727272727e-07, |
| "log_odds_chosen": 1.482607364654541, |
| "log_odds_ratio": -0.41177818179130554, |
| "logits/chosen": 2.100356101989746, |
| "logits/rejected": 2.149595022201538, |
| "logps/chosen": -0.3262555003166199, |
| "logps/rejected": -0.9901362657546997, |
| "loss": 1.024, |
| "nll_loss": 0.9827964305877686, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.03262555971741676, |
| "rewards/margins": 0.0663880705833435, |
| "rewards/rejected": -0.09901363402605057, |
| "step": 1110 |
| }, |
| { |
| "epoch": 1.5448243992606283, |
| "grad_norm": 1.7144571542739868, |
| "learning_rate": 2.3381818181818182e-07, |
| "log_odds_chosen": 1.297485113143921, |
| "log_odds_ratio": -0.44758838415145874, |
| "logits/chosen": 2.169114112854004, |
| "logits/rejected": 2.225714683532715, |
| "logps/chosen": -0.334065705537796, |
| "logps/rejected": -0.8946993350982666, |
| "loss": 1.0743, |
| "nll_loss": 1.0295709371566772, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.033406566828489304, |
| "rewards/margins": 0.05606337636709213, |
| "rewards/rejected": -0.08946993947029114, |
| "step": 1115 |
| }, |
| { |
| "epoch": 1.5517560073937153, |
| "grad_norm": 1.577359676361084, |
| "learning_rate": 2.329090909090909e-07, |
| "log_odds_chosen": 1.224462628364563, |
| "log_odds_ratio": -0.40063703060150146, |
| "logits/chosen": 2.1936235427856445, |
| "logits/rejected": 2.2490689754486084, |
| "logps/chosen": -0.2935000956058502, |
| "logps/rejected": -0.746100664138794, |
| "loss": 1.0438, |
| "nll_loss": 1.0037094354629517, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.02935001067817211, |
| "rewards/margins": 0.045260071754455566, |
| "rewards/rejected": -0.07461007684469223, |
| "step": 1120 |
| }, |
| { |
| "epoch": 1.5586876155268024, |
| "grad_norm": 1.445590615272522, |
| "learning_rate": 2.3199999999999999e-07, |
| "log_odds_chosen": 1.428971529006958, |
| "log_odds_ratio": -0.3972775638103485, |
| "logits/chosen": 2.1382179260253906, |
| "logits/rejected": 2.1906816959381104, |
| "logps/chosen": -0.3196958899497986, |
| "logps/rejected": -0.9572470784187317, |
| "loss": 1.0678, |
| "nll_loss": 1.0280849933624268, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.0319695882499218, |
| "rewards/margins": 0.06375513225793839, |
| "rewards/rejected": -0.09572472423315048, |
| "step": 1125 |
| }, |
| { |
| "epoch": 1.5656192236598891, |
| "grad_norm": 2.6556386947631836, |
| "learning_rate": 2.3109090909090908e-07, |
| "log_odds_chosen": 1.5480204820632935, |
| "log_odds_ratio": -0.2979664206504822, |
| "logits/chosen": 1.9882813692092896, |
| "logits/rejected": 2.047785520553589, |
| "logps/chosen": -0.2392881214618683, |
| "logps/rejected": -0.806952178478241, |
| "loss": 1.0056, |
| "nll_loss": 0.9758478403091431, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.0239288117736578, |
| "rewards/margins": 0.05676640570163727, |
| "rewards/rejected": -0.08069522678852081, |
| "step": 1130 |
| }, |
| { |
| "epoch": 1.572550831792976, |
| "grad_norm": 1.543114185333252, |
| "learning_rate": 2.3018181818181815e-07, |
| "log_odds_chosen": 1.306591272354126, |
| "log_odds_ratio": -0.38037270307540894, |
| "logits/chosen": 2.1771938800811768, |
| "logits/rejected": 2.204706907272339, |
| "logps/chosen": -0.2897684872150421, |
| "logps/rejected": -0.8257070183753967, |
| "loss": 1.0772, |
| "nll_loss": 1.0391967296600342, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.02897684834897518, |
| "rewards/margins": 0.05359385162591934, |
| "rewards/rejected": -0.08257070928812027, |
| "step": 1135 |
| }, |
| { |
| "epoch": 1.5794824399260627, |
| "grad_norm": 2.1715428829193115, |
| "learning_rate": 2.2927272727272728e-07, |
| "log_odds_chosen": 1.4083577394485474, |
| "log_odds_ratio": -0.4124881327152252, |
| "logits/chosen": 2.092696189880371, |
| "logits/rejected": 2.1379013061523438, |
| "logps/chosen": -0.302141934633255, |
| "logps/rejected": -0.8928415775299072, |
| "loss": 1.0676, |
| "nll_loss": 1.026381492614746, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.03021419048309326, |
| "rewards/margins": 0.059069979935884476, |
| "rewards/rejected": -0.08928415924310684, |
| "step": 1140 |
| }, |
| { |
| "epoch": 1.5864140480591498, |
| "grad_norm": 1.848644733428955, |
| "learning_rate": 2.2836363636363635e-07, |
| "log_odds_chosen": 1.3089061975479126, |
| "log_odds_ratio": -0.40707269310951233, |
| "logits/chosen": 2.151923418045044, |
| "logits/rejected": 2.2069790363311768, |
| "logps/chosen": -0.29877957701683044, |
| "logps/rejected": -0.8117203116416931, |
| "loss": 1.0202, |
| "nll_loss": 0.9795213341712952, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.029877962544560432, |
| "rewards/margins": 0.05129408463835716, |
| "rewards/rejected": -0.08117203414440155, |
| "step": 1145 |
| }, |
| { |
| "epoch": 1.5933456561922366, |
| "grad_norm": 2.3527963161468506, |
| "learning_rate": 2.2745454545454542e-07, |
| "log_odds_chosen": 1.4974111318588257, |
| "log_odds_ratio": -0.40171802043914795, |
| "logits/chosen": 2.093785524368286, |
| "logits/rejected": 2.144911050796509, |
| "logps/chosen": -0.2905969023704529, |
| "logps/rejected": -0.9690452218055725, |
| "loss": 1.043, |
| "nll_loss": 1.0028067827224731, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.02905968949198723, |
| "rewards/margins": 0.06784483045339584, |
| "rewards/rejected": -0.09690450876951218, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.6002772643253236, |
| "grad_norm": 3.2236886024475098, |
| "learning_rate": 2.2654545454545454e-07, |
| "log_odds_chosen": 1.4238559007644653, |
| "log_odds_ratio": -0.39723172783851624, |
| "logits/chosen": 2.1333365440368652, |
| "logits/rejected": 2.172248125076294, |
| "logps/chosen": -0.2883763015270233, |
| "logps/rejected": -0.8981528878211975, |
| "loss": 1.0034, |
| "nll_loss": 0.9636661410331726, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.028837626799941063, |
| "rewards/margins": 0.06097765639424324, |
| "rewards/rejected": -0.08981527388095856, |
| "step": 1155 |
| }, |
| { |
| "epoch": 1.6072088724584104, |
| "grad_norm": 1.6170852184295654, |
| "learning_rate": 2.2563636363636361e-07, |
| "log_odds_chosen": 1.2530089616775513, |
| "log_odds_ratio": -0.3909212052822113, |
| "logits/chosen": 2.122631311416626, |
| "logits/rejected": 2.1740965843200684, |
| "logps/chosen": -0.3135833740234375, |
| "logps/rejected": -0.8097442984580994, |
| "loss": 1.0268, |
| "nll_loss": 0.9877387881278992, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.03135833516716957, |
| "rewards/margins": 0.04961610212922096, |
| "rewards/rejected": -0.08097445219755173, |
| "step": 1160 |
| }, |
| { |
| "epoch": 1.6141404805914972, |
| "grad_norm": 3.0495035648345947, |
| "learning_rate": 2.247272727272727e-07, |
| "log_odds_chosen": 1.2540229558944702, |
| "log_odds_ratio": -0.4228639304637909, |
| "logits/chosen": 2.1452457904815674, |
| "logits/rejected": 2.199737548828125, |
| "logps/chosen": -0.32104960083961487, |
| "logps/rejected": -0.9188255071640015, |
| "loss": 1.0655, |
| "nll_loss": 1.0232080221176147, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.032104961574077606, |
| "rewards/margins": 0.059777598828077316, |
| "rewards/rejected": -0.09188255667686462, |
| "step": 1165 |
| }, |
| { |
| "epoch": 1.621072088724584, |
| "grad_norm": 1.4193778038024902, |
| "learning_rate": 2.238181818181818e-07, |
| "log_odds_chosen": 1.4086551666259766, |
| "log_odds_ratio": -0.4291422665119171, |
| "logits/chosen": 2.132582187652588, |
| "logits/rejected": 2.1951537132263184, |
| "logps/chosen": -0.27538225054740906, |
| "logps/rejected": -0.8874372839927673, |
| "loss": 1.0582, |
| "nll_loss": 1.0153110027313232, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.027538226917386055, |
| "rewards/margins": 0.061205506324768066, |
| "rewards/rejected": -0.08874373137950897, |
| "step": 1170 |
| }, |
| { |
| "epoch": 1.628003696857671, |
| "grad_norm": 4.035185813903809, |
| "learning_rate": 2.2290909090909088e-07, |
| "log_odds_chosen": 1.1511118412017822, |
| "log_odds_ratio": -0.42211630940437317, |
| "logits/chosen": 2.043503761291504, |
| "logits/rejected": 2.0831780433654785, |
| "logps/chosen": -0.260224312543869, |
| "logps/rejected": -0.69998699426651, |
| "loss": 1.0533, |
| "nll_loss": 1.0110424757003784, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.02602243237197399, |
| "rewards/margins": 0.04397625848650932, |
| "rewards/rejected": -0.06999869644641876, |
| "step": 1175 |
| }, |
| { |
| "epoch": 1.634935304990758, |
| "grad_norm": 2.188004732131958, |
| "learning_rate": 2.22e-07, |
| "log_odds_chosen": 1.3063474893569946, |
| "log_odds_ratio": -0.4064422845840454, |
| "logits/chosen": 2.0244498252868652, |
| "logits/rejected": 2.106379747390747, |
| "logps/chosen": -0.28905677795410156, |
| "logps/rejected": -0.8373602628707886, |
| "loss": 1.019, |
| "nll_loss": 0.9783718585968018, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.028905682265758514, |
| "rewards/margins": 0.054830338805913925, |
| "rewards/rejected": -0.08373603224754333, |
| "step": 1180 |
| }, |
| { |
| "epoch": 1.6418669131238448, |
| "grad_norm": 2.3367137908935547, |
| "learning_rate": 2.2109090909090907e-07, |
| "log_odds_chosen": 1.1956565380096436, |
| "log_odds_ratio": -0.4174603819847107, |
| "logits/chosen": 2.0511369705200195, |
| "logits/rejected": 2.097266435623169, |
| "logps/chosen": -0.29188284277915955, |
| "logps/rejected": -0.7658560872077942, |
| "loss": 1.0463, |
| "nll_loss": 1.0045546293258667, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.029188284650444984, |
| "rewards/margins": 0.047397319227457047, |
| "rewards/rejected": -0.07658561319112778, |
| "step": 1185 |
| }, |
| { |
| "epoch": 1.6487985212569316, |
| "grad_norm": 2.0365090370178223, |
| "learning_rate": 2.2018181818181817e-07, |
| "log_odds_chosen": 1.470836877822876, |
| "log_odds_ratio": -0.32899710536003113, |
| "logits/chosen": 2.0854430198669434, |
| "logits/rejected": 2.146840810775757, |
| "logps/chosen": -0.25189822912216187, |
| "logps/rejected": -0.805548906326294, |
| "loss": 1.0219, |
| "nll_loss": 0.9889623522758484, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.025189822539687157, |
| "rewards/margins": 0.055365074425935745, |
| "rewards/rejected": -0.08055491000413895, |
| "step": 1190 |
| }, |
| { |
| "epoch": 1.6557301293900184, |
| "grad_norm": 1.4684514999389648, |
| "learning_rate": 2.1927272727272727e-07, |
| "log_odds_chosen": 1.5294240713119507, |
| "log_odds_ratio": -0.3121597468852997, |
| "logits/chosen": 2.0382626056671143, |
| "logits/rejected": 2.096428871154785, |
| "logps/chosen": -0.28892782330513, |
| "logps/rejected": -0.9338265061378479, |
| "loss": 1.0155, |
| "nll_loss": 0.9842939972877502, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02889277972280979, |
| "rewards/margins": 0.06448986381292343, |
| "rewards/rejected": -0.09338264167308807, |
| "step": 1195 |
| }, |
| { |
| "epoch": 1.6626617375231052, |
| "grad_norm": 1.848939299583435, |
| "learning_rate": 2.1836363636363634e-07, |
| "log_odds_chosen": 1.2887681722640991, |
| "log_odds_ratio": -0.39700159430503845, |
| "logits/chosen": 2.0447583198547363, |
| "logits/rejected": 2.086496353149414, |
| "logps/chosen": -0.28646960854530334, |
| "logps/rejected": -0.8112481832504272, |
| "loss": 1.0786, |
| "nll_loss": 1.0388500690460205, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.028646962717175484, |
| "rewards/margins": 0.05247785896062851, |
| "rewards/rejected": -0.08112481236457825, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.6695933456561922, |
| "grad_norm": 2.438013792037964, |
| "learning_rate": 2.1745454545454544e-07, |
| "log_odds_chosen": 1.2431906461715698, |
| "log_odds_ratio": -0.41865456104278564, |
| "logits/chosen": 1.9995055198669434, |
| "logits/rejected": 2.066706418991089, |
| "logps/chosen": -0.28791549801826477, |
| "logps/rejected": -0.8092067241668701, |
| "loss": 1.0478, |
| "nll_loss": 1.0059376955032349, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.02879154682159424, |
| "rewards/margins": 0.052129123359918594, |
| "rewards/rejected": -0.08092068135738373, |
| "step": 1205 |
| }, |
| { |
| "epoch": 1.6765249537892792, |
| "grad_norm": 1.8914486169815063, |
| "learning_rate": 2.1654545454545453e-07, |
| "log_odds_chosen": 1.669597864151001, |
| "log_odds_ratio": -0.2944275438785553, |
| "logits/chosen": 2.067124366760254, |
| "logits/rejected": 2.1200904846191406, |
| "logps/chosen": -0.2542329728603363, |
| "logps/rejected": -0.9926477670669556, |
| "loss": 1.024, |
| "nll_loss": 0.9945566058158875, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02542329952120781, |
| "rewards/margins": 0.07384147495031357, |
| "rewards/rejected": -0.09926477074623108, |
| "step": 1210 |
| }, |
| { |
| "epoch": 1.683456561922366, |
| "grad_norm": 1.8205511569976807, |
| "learning_rate": 2.156363636363636e-07, |
| "log_odds_chosen": 1.615554928779602, |
| "log_odds_ratio": -0.3494965434074402, |
| "logits/chosen": 2.0657761096954346, |
| "logits/rejected": 2.138937473297119, |
| "logps/chosen": -0.3055498003959656, |
| "logps/rejected": -0.9770273566246033, |
| "loss": 1.0574, |
| "nll_loss": 1.0224652290344238, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.03055497631430626, |
| "rewards/margins": 0.06714775413274765, |
| "rewards/rejected": -0.0977027490735054, |
| "step": 1215 |
| }, |
| { |
| "epoch": 1.6903881700554528, |
| "grad_norm": 3.3164730072021484, |
| "learning_rate": 2.1472727272727273e-07, |
| "log_odds_chosen": 1.2579916715621948, |
| "log_odds_ratio": -0.41120514273643494, |
| "logits/chosen": 2.0500335693359375, |
| "logits/rejected": 2.107823133468628, |
| "logps/chosen": -0.2929707467556, |
| "logps/rejected": -0.8051959276199341, |
| "loss": 1.0315, |
| "nll_loss": 0.9903787970542908, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.029297074303030968, |
| "rewards/margins": 0.051222506910562515, |
| "rewards/rejected": -0.08051959425210953, |
| "step": 1220 |
| }, |
| { |
| "epoch": 1.6973197781885396, |
| "grad_norm": 3.1997969150543213, |
| "learning_rate": 2.138181818181818e-07, |
| "log_odds_chosen": 1.3084485530853271, |
| "log_odds_ratio": -0.41026392579078674, |
| "logits/chosen": 1.9871976375579834, |
| "logits/rejected": 2.0475666522979736, |
| "logps/chosen": -0.31262117624282837, |
| "logps/rejected": -0.8240591287612915, |
| "loss": 1.014, |
| "nll_loss": 0.972952127456665, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.0312621183693409, |
| "rewards/margins": 0.05114380270242691, |
| "rewards/rejected": -0.08240590989589691, |
| "step": 1225 |
| }, |
| { |
| "epoch": 1.7042513863216266, |
| "grad_norm": 2.6871352195739746, |
| "learning_rate": 2.129090909090909e-07, |
| "log_odds_chosen": 1.0879921913146973, |
| "log_odds_ratio": -0.4872511923313141, |
| "logits/chosen": 2.080152750015259, |
| "logits/rejected": 2.119732141494751, |
| "logps/chosen": -0.369219571352005, |
| "logps/rejected": -0.8730208277702332, |
| "loss": 1.0926, |
| "nll_loss": 1.0438321828842163, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.03692195937037468, |
| "rewards/margins": 0.05038012936711311, |
| "rewards/rejected": -0.0873020812869072, |
| "step": 1230 |
| }, |
| { |
| "epoch": 1.7111829944547134, |
| "grad_norm": 2.325326442718506, |
| "learning_rate": 2.12e-07, |
| "log_odds_chosen": 1.397155523300171, |
| "log_odds_ratio": -0.39500170946121216, |
| "logits/chosen": 2.125500202178955, |
| "logits/rejected": 2.1754844188690186, |
| "logps/chosen": -0.2974463105201721, |
| "logps/rejected": -0.9127005934715271, |
| "loss": 1.029, |
| "nll_loss": 0.9895287156105042, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.02974463254213333, |
| "rewards/margins": 0.06152542680501938, |
| "rewards/rejected": -0.0912700667977333, |
| "step": 1235 |
| }, |
| { |
| "epoch": 1.7181146025878005, |
| "grad_norm": 1.5156536102294922, |
| "learning_rate": 2.1109090909090906e-07, |
| "log_odds_chosen": 1.2906519174575806, |
| "log_odds_ratio": -0.40361616015434265, |
| "logits/chosen": 2.051255702972412, |
| "logits/rejected": 2.103986978530884, |
| "logps/chosen": -0.268101304769516, |
| "logps/rejected": -0.7997626066207886, |
| "loss": 1.0366, |
| "nll_loss": 0.9962154030799866, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.02681013010442257, |
| "rewards/margins": 0.05316613242030144, |
| "rewards/rejected": -0.07997626811265945, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.7250462107208873, |
| "grad_norm": 1.6096516847610474, |
| "learning_rate": 2.101818181818182e-07, |
| "log_odds_chosen": 1.2347605228424072, |
| "log_odds_ratio": -0.4557424783706665, |
| "logits/chosen": 2.0431010723114014, |
| "logits/rejected": 2.1129000186920166, |
| "logps/chosen": -0.30273541808128357, |
| "logps/rejected": -0.7864081859588623, |
| "loss": 1.0529, |
| "nll_loss": 1.0073034763336182, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.03027353622019291, |
| "rewards/margins": 0.048367276787757874, |
| "rewards/rejected": -0.07864081859588623, |
| "step": 1245 |
| }, |
| { |
| "epoch": 1.731977818853974, |
| "grad_norm": 1.9126524925231934, |
| "learning_rate": 2.0927272727272726e-07, |
| "log_odds_chosen": 1.4341093301773071, |
| "log_odds_ratio": -0.3776791989803314, |
| "logits/chosen": 2.029585838317871, |
| "logits/rejected": 2.096311569213867, |
| "logps/chosen": -0.2593327462673187, |
| "logps/rejected": -0.8751354217529297, |
| "loss": 1.0188, |
| "nll_loss": 0.9809887409210205, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.025933273136615753, |
| "rewards/margins": 0.06158026307821274, |
| "rewards/rejected": -0.08751355111598969, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.7389094269870609, |
| "grad_norm": 2.2804696559906006, |
| "learning_rate": 2.0836363636363633e-07, |
| "log_odds_chosen": 1.4436743259429932, |
| "log_odds_ratio": -0.3733202815055847, |
| "logits/chosen": 2.1618611812591553, |
| "logits/rejected": 2.197058916091919, |
| "logps/chosen": -0.2881154417991638, |
| "logps/rejected": -0.9169884324073792, |
| "loss": 1.0506, |
| "nll_loss": 1.0132598876953125, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02881154604256153, |
| "rewards/margins": 0.06288730353116989, |
| "rewards/rejected": -0.09169885516166687, |
| "step": 1255 |
| }, |
| { |
| "epoch": 1.7458410351201479, |
| "grad_norm": 2.1073522567749023, |
| "learning_rate": 2.0745454545454545e-07, |
| "log_odds_chosen": 1.6635074615478516, |
| "log_odds_ratio": -0.33457812666893005, |
| "logits/chosen": 2.074899911880493, |
| "logits/rejected": 2.122929573059082, |
| "logps/chosen": -0.27845534682273865, |
| "logps/rejected": -1.005134105682373, |
| "loss": 1.0756, |
| "nll_loss": 1.0421861410140991, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.027845535427331924, |
| "rewards/margins": 0.07266788184642792, |
| "rewards/rejected": -0.10051342844963074, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.752772643253235, |
| "grad_norm": 2.2422614097595215, |
| "learning_rate": 2.0654545454545452e-07, |
| "log_odds_chosen": 1.3691959381103516, |
| "log_odds_ratio": -0.40435412526130676, |
| "logits/chosen": 2.1057488918304443, |
| "logits/rejected": 2.1638669967651367, |
| "logps/chosen": -0.31880542635917664, |
| "logps/rejected": -0.903930127620697, |
| "loss": 1.0408, |
| "nll_loss": 1.000407099723816, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.031880538910627365, |
| "rewards/margins": 0.05851246044039726, |
| "rewards/rejected": -0.09039301425218582, |
| "step": 1265 |
| }, |
| { |
| "epoch": 1.7597042513863217, |
| "grad_norm": 2.287367820739746, |
| "learning_rate": 2.0563636363636362e-07, |
| "log_odds_chosen": 1.282787799835205, |
| "log_odds_ratio": -0.4151849150657654, |
| "logits/chosen": 2.05078387260437, |
| "logits/rejected": 2.125896692276001, |
| "logps/chosen": -0.28913381695747375, |
| "logps/rejected": -0.792908251285553, |
| "loss": 1.0073, |
| "nll_loss": 0.9657413363456726, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.028913382440805435, |
| "rewards/margins": 0.05037744715809822, |
| "rewards/rejected": -0.07929082959890366, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.7666358595194085, |
| "grad_norm": 1.7958959341049194, |
| "learning_rate": 2.0472727272727272e-07, |
| "log_odds_chosen": 1.724271297454834, |
| "log_odds_ratio": -0.2810591757297516, |
| "logits/chosen": 2.0967202186584473, |
| "logits/rejected": 2.161252021789551, |
| "logps/chosen": -0.2763175964355469, |
| "logps/rejected": -1.0735727548599243, |
| "loss": 1.0564, |
| "nll_loss": 1.0283379554748535, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.027631759643554688, |
| "rewards/margins": 0.07972551882266998, |
| "rewards/rejected": -0.10735727101564407, |
| "step": 1275 |
| }, |
| { |
| "epoch": 1.7735674676524953, |
| "grad_norm": 2.2755537033081055, |
| "learning_rate": 2.038181818181818e-07, |
| "log_odds_chosen": 1.4004557132720947, |
| "log_odds_ratio": -0.3993929326534271, |
| "logits/chosen": 2.111241102218628, |
| "logits/rejected": 2.151012897491455, |
| "logps/chosen": -0.29145321249961853, |
| "logps/rejected": -0.8968268036842346, |
| "loss": 0.983, |
| "nll_loss": 0.9430230259895325, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.029145320877432823, |
| "rewards/margins": 0.060537371784448624, |
| "rewards/rejected": -0.0896826833486557, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.7804990757855823, |
| "grad_norm": 1.726332187652588, |
| "learning_rate": 2.029090909090909e-07, |
| "log_odds_chosen": 1.7174845933914185, |
| "log_odds_ratio": -0.3723779022693634, |
| "logits/chosen": 2.092041254043579, |
| "logits/rejected": 2.163289785385132, |
| "logps/chosen": -0.2837842106819153, |
| "logps/rejected": -1.036734700202942, |
| "loss": 0.9897, |
| "nll_loss": 0.9525095224380493, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.028378423303365707, |
| "rewards/margins": 0.07529504597187042, |
| "rewards/rejected": -0.10367346554994583, |
| "step": 1285 |
| }, |
| { |
| "epoch": 1.787430683918669, |
| "grad_norm": 1.5710785388946533, |
| "learning_rate": 2.0199999999999998e-07, |
| "log_odds_chosen": 1.312154769897461, |
| "log_odds_ratio": -0.3994414210319519, |
| "logits/chosen": 2.0113887786865234, |
| "logits/rejected": 2.0753164291381836, |
| "logps/chosen": -0.272027850151062, |
| "logps/rejected": -0.7796825766563416, |
| "loss": 1.0201, |
| "nll_loss": 0.9801668524742126, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.02720278687775135, |
| "rewards/margins": 0.05076547712087631, |
| "rewards/rejected": -0.07796826213598251, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.7943622920517561, |
| "grad_norm": 1.920129418373108, |
| "learning_rate": 2.0109090909090908e-07, |
| "log_odds_chosen": 1.6343634128570557, |
| "log_odds_ratio": -0.3433375358581543, |
| "logits/chosen": 2.1635687351226807, |
| "logits/rejected": 2.2147443294525146, |
| "logps/chosen": -0.2949898838996887, |
| "logps/rejected": -1.0586962699890137, |
| "loss": 1.0487, |
| "nll_loss": 1.0143301486968994, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.029498988762497902, |
| "rewards/margins": 0.07637064903974533, |
| "rewards/rejected": -0.10586963593959808, |
| "step": 1295 |
| }, |
| { |
| "epoch": 1.801293900184843, |
| "grad_norm": 1.9142963886260986, |
| "learning_rate": 2.0018181818181818e-07, |
| "log_odds_chosen": 1.8189107179641724, |
| "log_odds_ratio": -0.30017244815826416, |
| "logits/chosen": 2.058302879333496, |
| "logits/rejected": 2.130945920944214, |
| "logps/chosen": -0.2555373013019562, |
| "logps/rejected": -1.0402501821517944, |
| "loss": 1.0479, |
| "nll_loss": 1.0179013013839722, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.025553731247782707, |
| "rewards/margins": 0.07847128063440323, |
| "rewards/rejected": -0.10402501374483109, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.8082255083179297, |
| "grad_norm": 2.576738119125366, |
| "learning_rate": 1.9927272727272725e-07, |
| "log_odds_chosen": 1.4616259336471558, |
| "log_odds_ratio": -0.35250329971313477, |
| "logits/chosen": 2.0860280990600586, |
| "logits/rejected": 2.160719394683838, |
| "logps/chosen": -0.26303815841674805, |
| "logps/rejected": -0.8395700454711914, |
| "loss": 1.039, |
| "nll_loss": 1.0037142038345337, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.026303818449378014, |
| "rewards/margins": 0.05765319615602493, |
| "rewards/rejected": -0.0839570015668869, |
| "step": 1305 |
| }, |
| { |
| "epoch": 1.8151571164510165, |
| "grad_norm": 2.0888595581054688, |
| "learning_rate": 1.9836363636363634e-07, |
| "log_odds_chosen": 1.4016786813735962, |
| "log_odds_ratio": -0.36707499623298645, |
| "logits/chosen": 2.0193212032318115, |
| "logits/rejected": 2.0818252563476562, |
| "logps/chosen": -0.3212401270866394, |
| "logps/rejected": -0.8710358142852783, |
| "loss": 1.0211, |
| "nll_loss": 0.9843639731407166, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.03212401270866394, |
| "rewards/margins": 0.05497957393527031, |
| "rewards/rejected": -0.08710358291864395, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.8220887245841035, |
| "grad_norm": 2.2947607040405273, |
| "learning_rate": 1.9745454545454544e-07, |
| "log_odds_chosen": 1.5820189714431763, |
| "log_odds_ratio": -0.3358237147331238, |
| "logits/chosen": 2.1270110607147217, |
| "logits/rejected": 2.1922497749328613, |
| "logps/chosen": -0.2790026068687439, |
| "logps/rejected": -0.9709238409996033, |
| "loss": 1.0543, |
| "nll_loss": 1.0207080841064453, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.02790026180446148, |
| "rewards/margins": 0.06919214129447937, |
| "rewards/rejected": -0.0970923900604248, |
| "step": 1315 |
| }, |
| { |
| "epoch": 1.8290203327171903, |
| "grad_norm": 2.2239065170288086, |
| "learning_rate": 1.9654545454545454e-07, |
| "log_odds_chosen": 1.8014733791351318, |
| "log_odds_ratio": -0.3064168691635132, |
| "logits/chosen": 2.168591022491455, |
| "logits/rejected": 2.2188949584960938, |
| "logps/chosen": -0.289535254240036, |
| "logps/rejected": -1.121842622756958, |
| "loss": 1.0043, |
| "nll_loss": 0.9736562967300415, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.02895352989435196, |
| "rewards/margins": 0.08323074132204056, |
| "rewards/rejected": -0.11218428611755371, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.8359519408502774, |
| "grad_norm": 1.6879349946975708, |
| "learning_rate": 1.956363636363636e-07, |
| "log_odds_chosen": 1.4831633567810059, |
| "log_odds_ratio": -0.36744216084480286, |
| "logits/chosen": 2.0397822856903076, |
| "logits/rejected": 2.090501070022583, |
| "logps/chosen": -0.27023550868034363, |
| "logps/rejected": -0.8913961052894592, |
| "loss": 0.9775, |
| "nll_loss": 0.9407526850700378, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.027023550122976303, |
| "rewards/margins": 0.06211606040596962, |
| "rewards/rejected": -0.08913961052894592, |
| "step": 1325 |
| }, |
| { |
| "epoch": 1.8428835489833642, |
| "grad_norm": 3.5290584564208984, |
| "learning_rate": 1.947272727272727e-07, |
| "log_odds_chosen": 1.599334716796875, |
| "log_odds_ratio": -0.32742586731910706, |
| "logits/chosen": 2.0843544006347656, |
| "logits/rejected": 2.1510770320892334, |
| "logps/chosen": -0.26283249258995056, |
| "logps/rejected": -0.9456924796104431, |
| "loss": 0.9577, |
| "nll_loss": 0.9249733686447144, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.026283251121640205, |
| "rewards/margins": 0.0682859942317009, |
| "rewards/rejected": -0.09456924349069595, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.849815157116451, |
| "grad_norm": 3.283820629119873, |
| "learning_rate": 1.938181818181818e-07, |
| "log_odds_chosen": 1.60695219039917, |
| "log_odds_ratio": -0.3642633557319641, |
| "logits/chosen": 2.1823904514312744, |
| "logits/rejected": 2.2375266551971436, |
| "logps/chosen": -0.3108011782169342, |
| "logps/rejected": -1.0633673667907715, |
| "loss": 1.032, |
| "nll_loss": 0.995610773563385, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.03108011931180954, |
| "rewards/margins": 0.07525661587715149, |
| "rewards/rejected": -0.10633675009012222, |
| "step": 1335 |
| }, |
| { |
| "epoch": 1.8567467652495377, |
| "grad_norm": 2.3443901538848877, |
| "learning_rate": 1.929090909090909e-07, |
| "log_odds_chosen": 1.549951195716858, |
| "log_odds_ratio": -0.3641236424446106, |
| "logits/chosen": 2.1002511978149414, |
| "logits/rejected": 2.1668472290039062, |
| "logps/chosen": -0.3097735047340393, |
| "logps/rejected": -1.032346487045288, |
| "loss": 1.0146, |
| "nll_loss": 0.9781424403190613, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.03097734972834587, |
| "rewards/margins": 0.07225729525089264, |
| "rewards/rejected": -0.1032346561551094, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.8636783733826248, |
| "grad_norm": 1.9872773885726929, |
| "learning_rate": 1.9199999999999997e-07, |
| "log_odds_chosen": 1.321940541267395, |
| "log_odds_ratio": -0.44285106658935547, |
| "logits/chosen": 2.0162153244018555, |
| "logits/rejected": 2.074026584625244, |
| "logps/chosen": -0.3019007742404938, |
| "logps/rejected": -0.8703359961509705, |
| "loss": 1.051, |
| "nll_loss": 1.0067391395568848, |
| "rewards/accuracies": 0.7666666507720947, |
| "rewards/chosen": -0.030190076678991318, |
| "rewards/margins": 0.05684352666139603, |
| "rewards/rejected": -0.08703361451625824, |
| "step": 1345 |
| }, |
| { |
| "epoch": 1.8706099815157118, |
| "grad_norm": 1.5316020250320435, |
| "learning_rate": 1.9109090909090907e-07, |
| "log_odds_chosen": 1.522079348564148, |
| "log_odds_ratio": -0.3456721305847168, |
| "logits/chosen": 2.055612802505493, |
| "logits/rejected": 2.1493773460388184, |
| "logps/chosen": -0.2610771358013153, |
| "logps/rejected": -0.8397551774978638, |
| "loss": 1.0111, |
| "nll_loss": 0.9765692353248596, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.02610771171748638, |
| "rewards/margins": 0.05786780267953873, |
| "rewards/rejected": -0.08397550880908966, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.8775415896487986, |
| "grad_norm": 1.9374873638153076, |
| "learning_rate": 1.9018181818181817e-07, |
| "log_odds_chosen": 1.3929773569107056, |
| "log_odds_ratio": -0.3517773449420929, |
| "logits/chosen": 2.034905433654785, |
| "logits/rejected": 2.079784631729126, |
| "logps/chosen": -0.2506738603115082, |
| "logps/rejected": -0.7879815697669983, |
| "loss": 1.0656, |
| "nll_loss": 1.0304430723190308, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.025067387148737907, |
| "rewards/margins": 0.05373078212141991, |
| "rewards/rejected": -0.07879816740751266, |
| "step": 1355 |
| }, |
| { |
| "epoch": 1.8844731977818854, |
| "grad_norm": 1.2929794788360596, |
| "learning_rate": 1.8927272727272726e-07, |
| "log_odds_chosen": 1.4262324571609497, |
| "log_odds_ratio": -0.3507004380226135, |
| "logits/chosen": 2.130051374435425, |
| "logits/rejected": 2.2022147178649902, |
| "logps/chosen": -0.30148500204086304, |
| "logps/rejected": -0.8996387124061584, |
| "loss": 1.0229, |
| "nll_loss": 0.9878306984901428, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.030148500576615334, |
| "rewards/margins": 0.05981536582112312, |
| "rewards/rejected": -0.0899638757109642, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.8914048059149722, |
| "grad_norm": 2.3704674243927, |
| "learning_rate": 1.8836363636363633e-07, |
| "log_odds_chosen": 1.4139504432678223, |
| "log_odds_ratio": -0.3692961037158966, |
| "logits/chosen": 2.1010749340057373, |
| "logits/rejected": 2.1468594074249268, |
| "logps/chosen": -0.3202250301837921, |
| "logps/rejected": -0.9537723660469055, |
| "loss": 1.0341, |
| "nll_loss": 0.9971208572387695, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.03202249854803085, |
| "rewards/margins": 0.0633547380566597, |
| "rewards/rejected": -0.09537723660469055, |
| "step": 1365 |
| }, |
| { |
| "epoch": 1.8983364140480592, |
| "grad_norm": 1.7380679845809937, |
| "learning_rate": 1.8745454545454543e-07, |
| "log_odds_chosen": 1.5445674657821655, |
| "log_odds_ratio": -0.3786125183105469, |
| "logits/chosen": 2.0269358158111572, |
| "logits/rejected": 2.0798943042755127, |
| "logps/chosen": -0.28776755928993225, |
| "logps/rejected": -0.9360780715942383, |
| "loss": 1.041, |
| "nll_loss": 1.0031627416610718, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.028776757419109344, |
| "rewards/margins": 0.06483104825019836, |
| "rewards/rejected": -0.0936078131198883, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.905268022181146, |
| "grad_norm": 2.3464677333831787, |
| "learning_rate": 1.8654545454545453e-07, |
| "log_odds_chosen": 1.3654427528381348, |
| "log_odds_ratio": -0.4110698699951172, |
| "logits/chosen": 2.1894376277923584, |
| "logits/rejected": 2.233098268508911, |
| "logps/chosen": -0.30282995104789734, |
| "logps/rejected": -0.8446999788284302, |
| "loss": 0.9941, |
| "nll_loss": 0.9530263543128967, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.030282998457551003, |
| "rewards/margins": 0.05418700724840164, |
| "rewards/rejected": -0.08447001129388809, |
| "step": 1375 |
| }, |
| { |
| "epoch": 1.912199630314233, |
| "grad_norm": 2.243062973022461, |
| "learning_rate": 1.8563636363636363e-07, |
| "log_odds_chosen": 1.3511884212493896, |
| "log_odds_ratio": -0.4768778681755066, |
| "logits/chosen": 2.0655782222747803, |
| "logits/rejected": 2.1229450702667236, |
| "logps/chosen": -0.37024933099746704, |
| "logps/rejected": -0.9667562246322632, |
| "loss": 1.0402, |
| "nll_loss": 0.9925115704536438, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.03702492639422417, |
| "rewards/margins": 0.05965068191289902, |
| "rewards/rejected": -0.09667561948299408, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.9191312384473198, |
| "grad_norm": 2.5852763652801514, |
| "learning_rate": 1.847272727272727e-07, |
| "log_odds_chosen": 1.548577070236206, |
| "log_odds_ratio": -0.3329441249370575, |
| "logits/chosen": 2.083811044692993, |
| "logits/rejected": 2.13382887840271, |
| "logps/chosen": -0.3115028738975525, |
| "logps/rejected": -1.0105055570602417, |
| "loss": 1.0092, |
| "nll_loss": 0.9758760929107666, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.03115028701722622, |
| "rewards/margins": 0.069900281727314, |
| "rewards/rejected": -0.10105058550834656, |
| "step": 1385 |
| }, |
| { |
| "epoch": 1.9260628465804066, |
| "grad_norm": 2.076587677001953, |
| "learning_rate": 1.838181818181818e-07, |
| "log_odds_chosen": 1.193703055381775, |
| "log_odds_ratio": -0.4486643075942993, |
| "logits/chosen": 2.1062402725219727, |
| "logits/rejected": 2.1550328731536865, |
| "logps/chosen": -0.3409879505634308, |
| "logps/rejected": -0.8286535739898682, |
| "loss": 1.0018, |
| "nll_loss": 0.9569076299667358, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.0340987928211689, |
| "rewards/margins": 0.048766572028398514, |
| "rewards/rejected": -0.08286535739898682, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.9329944547134934, |
| "grad_norm": 2.120288610458374, |
| "learning_rate": 1.829090909090909e-07, |
| "log_odds_chosen": 1.4117330312728882, |
| "log_odds_ratio": -0.3424789011478424, |
| "logits/chosen": 2.101274013519287, |
| "logits/rejected": 2.1526596546173096, |
| "logps/chosen": -0.2649425268173218, |
| "logps/rejected": -0.7853403091430664, |
| "loss": 0.9994, |
| "nll_loss": 0.9651403427124023, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.026494255289435387, |
| "rewards/margins": 0.05203978344798088, |
| "rewards/rejected": -0.07853402942419052, |
| "step": 1395 |
| }, |
| { |
| "epoch": 1.9399260628465804, |
| "grad_norm": 2.1944470405578613, |
| "learning_rate": 1.82e-07, |
| "log_odds_chosen": 1.5040756464004517, |
| "log_odds_ratio": -0.37599992752075195, |
| "logits/chosen": 2.0189223289489746, |
| "logits/rejected": 2.067805051803589, |
| "logps/chosen": -0.2981501519680023, |
| "logps/rejected": -0.9519171118736267, |
| "loss": 1.0507, |
| "nll_loss": 1.013134241104126, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.02981501631438732, |
| "rewards/margins": 0.06537671387195587, |
| "rewards/rejected": -0.09519171714782715, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.9468576709796674, |
| "grad_norm": 1.9627952575683594, |
| "learning_rate": 1.8109090909090906e-07, |
| "log_odds_chosen": 1.6703484058380127, |
| "log_odds_ratio": -0.32047805190086365, |
| "logits/chosen": 2.0558595657348633, |
| "logits/rejected": 2.126713275909424, |
| "logps/chosen": -0.2901943325996399, |
| "logps/rejected": -1.0323206186294556, |
| "loss": 1.0102, |
| "nll_loss": 0.9781351089477539, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.029019435867667198, |
| "rewards/margins": 0.07421263307332993, |
| "rewards/rejected": -0.10323206335306168, |
| "step": 1405 |
| }, |
| { |
| "epoch": 1.9537892791127542, |
| "grad_norm": 2.921412706375122, |
| "learning_rate": 1.8018181818181816e-07, |
| "log_odds_chosen": 1.3125395774841309, |
| "log_odds_ratio": -0.4437628984451294, |
| "logits/chosen": 2.1079742908477783, |
| "logits/rejected": 2.172934055328369, |
| "logps/chosen": -0.3079548180103302, |
| "logps/rejected": -0.8908718228340149, |
| "loss": 1.0198, |
| "nll_loss": 0.9754597544670105, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.030795477330684662, |
| "rewards/margins": 0.058291707187891006, |
| "rewards/rejected": -0.08908718079328537, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.960720887245841, |
| "grad_norm": 2.2531754970550537, |
| "learning_rate": 1.7927272727272725e-07, |
| "log_odds_chosen": 1.4887337684631348, |
| "log_odds_ratio": -0.3652048408985138, |
| "logits/chosen": 2.036674976348877, |
| "logits/rejected": 2.0993988513946533, |
| "logps/chosen": -0.2855999767780304, |
| "logps/rejected": -0.8912386298179626, |
| "loss": 1.0189, |
| "nll_loss": 0.9823691248893738, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.02855999954044819, |
| "rewards/margins": 0.06056387349963188, |
| "rewards/rejected": -0.08912386745214462, |
| "step": 1415 |
| }, |
| { |
| "epoch": 1.9676524953789278, |
| "grad_norm": 2.0116090774536133, |
| "learning_rate": 1.7836363636363635e-07, |
| "log_odds_chosen": 1.619387149810791, |
| "log_odds_ratio": -0.3138326406478882, |
| "logits/chosen": 2.0645925998687744, |
| "logits/rejected": 2.129347801208496, |
| "logps/chosen": -0.2735585570335388, |
| "logps/rejected": -0.9985336065292358, |
| "loss": 0.9783, |
| "nll_loss": 0.94692462682724, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.027355853468179703, |
| "rewards/margins": 0.07249751687049866, |
| "rewards/rejected": -0.09985338151454926, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.9745841035120146, |
| "grad_norm": 3.110692024230957, |
| "learning_rate": 1.7745454545454545e-07, |
| "log_odds_chosen": 1.2186717987060547, |
| "log_odds_ratio": -0.45589005947113037, |
| "logits/chosen": 2.0935304164886475, |
| "logits/rejected": 2.1518046855926514, |
| "logps/chosen": -0.35527312755584717, |
| "logps/rejected": -0.8807690739631653, |
| "loss": 1.036, |
| "nll_loss": 0.9903665781021118, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.035527314990758896, |
| "rewards/margins": 0.05254959315061569, |
| "rewards/rejected": -0.08807691931724548, |
| "step": 1425 |
| }, |
| { |
| "epoch": 1.9815157116451017, |
| "grad_norm": 2.146620750427246, |
| "learning_rate": 1.7654545454545452e-07, |
| "log_odds_chosen": 1.3884727954864502, |
| "log_odds_ratio": -0.38641104102134705, |
| "logits/chosen": 2.083814859390259, |
| "logits/rejected": 2.1228816509246826, |
| "logps/chosen": -0.29765060544013977, |
| "logps/rejected": -0.8775668740272522, |
| "loss": 1.0353, |
| "nll_loss": 0.9966583847999573, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.029765058308839798, |
| "rewards/margins": 0.0579916313290596, |
| "rewards/rejected": -0.0877566859126091, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.9884473197781887, |
| "grad_norm": 1.4122179746627808, |
| "learning_rate": 1.7563636363636362e-07, |
| "log_odds_chosen": 1.6712970733642578, |
| "log_odds_ratio": -0.31637993454933167, |
| "logits/chosen": 2.1619114875793457, |
| "logits/rejected": 2.231441020965576, |
| "logps/chosen": -0.2716436982154846, |
| "logps/rejected": -1.0012226104736328, |
| "loss": 1.0147, |
| "nll_loss": 0.9830483794212341, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.027164369821548462, |
| "rewards/margins": 0.07295789569616318, |
| "rewards/rejected": -0.10012225806713104, |
| "step": 1435 |
| }, |
| { |
| "epoch": 1.9953789279112755, |
| "grad_norm": 2.2266688346862793, |
| "learning_rate": 1.7472727272727271e-07, |
| "log_odds_chosen": 1.3788424730300903, |
| "log_odds_ratio": -0.3804737329483032, |
| "logits/chosen": 2.135176181793213, |
| "logits/rejected": 2.1902530193328857, |
| "logps/chosen": -0.31672823429107666, |
| "logps/rejected": -0.9234539866447449, |
| "loss": 1.0622, |
| "nll_loss": 1.0241928100585938, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.031672827899456024, |
| "rewards/margins": 0.0606725737452507, |
| "rewards/rejected": -0.09234539419412613, |
| "step": 1440 |
| }, |
| { |
| "epoch": 2.0013863216266174, |
| "grad_norm": 3.505758047103882, |
| "learning_rate": 1.738181818181818e-07, |
| "log_odds_chosen": 1.3300753831863403, |
| "log_odds_ratio": -0.41754671931266785, |
| "logits/chosen": 2.1010961532592773, |
| "logits/rejected": 2.1418211460113525, |
| "logps/chosen": -0.28666701912879944, |
| "logps/rejected": -0.8230305910110474, |
| "loss": 0.9121, |
| "nll_loss": 1.00087308883667, |
| "rewards/accuracies": 0.7884615659713745, |
| "rewards/chosen": -0.028666695579886436, |
| "rewards/margins": 0.053636353462934494, |
| "rewards/rejected": -0.08230306208133698, |
| "step": 1445 |
| }, |
| { |
| "epoch": 2.0083179297597042, |
| "grad_norm": 2.070633888244629, |
| "learning_rate": 1.7290909090909088e-07, |
| "log_odds_chosen": 1.7298386096954346, |
| "log_odds_ratio": -0.31547990441322327, |
| "logits/chosen": 2.099557399749756, |
| "logits/rejected": 2.149728536605835, |
| "logps/chosen": -0.2745874524116516, |
| "logps/rejected": -1.0096734762191772, |
| "loss": 1.045, |
| "nll_loss": 1.0134097337722778, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02745874598622322, |
| "rewards/margins": 0.0735086053609848, |
| "rewards/rejected": -0.10096735507249832, |
| "step": 1450 |
| }, |
| { |
| "epoch": 2.015249537892791, |
| "grad_norm": 2.7808001041412354, |
| "learning_rate": 1.7199999999999998e-07, |
| "log_odds_chosen": 1.4617559909820557, |
| "log_odds_ratio": -0.32592448592185974, |
| "logits/chosen": 2.0248734951019287, |
| "logits/rejected": 2.08671236038208, |
| "logps/chosen": -0.24415723979473114, |
| "logps/rejected": -0.8095900416374207, |
| "loss": 1.0132, |
| "nll_loss": 0.980653703212738, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.024415725842118263, |
| "rewards/margins": 0.05654327571392059, |
| "rewards/rejected": -0.08095899969339371, |
| "step": 1455 |
| }, |
| { |
| "epoch": 2.022181146025878, |
| "grad_norm": 2.6555349826812744, |
| "learning_rate": 1.7109090909090908e-07, |
| "log_odds_chosen": 1.5638434886932373, |
| "log_odds_ratio": -0.36828985810279846, |
| "logits/chosen": 2.0425848960876465, |
| "logits/rejected": 2.0981991291046143, |
| "logps/chosen": -0.2785496115684509, |
| "logps/rejected": -0.9188894629478455, |
| "loss": 1.0393, |
| "nll_loss": 1.0024975538253784, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.027854960411787033, |
| "rewards/margins": 0.06403397768735886, |
| "rewards/rejected": -0.09188893437385559, |
| "step": 1460 |
| }, |
| { |
| "epoch": 2.029112754158965, |
| "grad_norm": 2.058720588684082, |
| "learning_rate": 1.7018181818181817e-07, |
| "log_odds_chosen": 1.749341368675232, |
| "log_odds_ratio": -0.3121403753757477, |
| "logits/chosen": 2.096733331680298, |
| "logits/rejected": 2.1691980361938477, |
| "logps/chosen": -0.2867683470249176, |
| "logps/rejected": -1.0571597814559937, |
| "loss": 1.0299, |
| "nll_loss": 0.9987198114395142, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.02867683582007885, |
| "rewards/margins": 0.07703914493322372, |
| "rewards/rejected": -0.10571598261594772, |
| "step": 1465 |
| }, |
| { |
| "epoch": 2.036044362292052, |
| "grad_norm": 2.216594934463501, |
| "learning_rate": 1.6927272727272724e-07, |
| "log_odds_chosen": 1.6534370183944702, |
| "log_odds_ratio": -0.3209837079048157, |
| "logits/chosen": 2.0079751014709473, |
| "logits/rejected": 2.092797040939331, |
| "logps/chosen": -0.26305797696113586, |
| "logps/rejected": -0.9659760594367981, |
| "loss": 1.0213, |
| "nll_loss": 0.989173173904419, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.026305796578526497, |
| "rewards/margins": 0.07029180228710175, |
| "rewards/rejected": -0.09659762680530548, |
| "step": 1470 |
| }, |
| { |
| "epoch": 2.0429759704251387, |
| "grad_norm": 2.0765326023101807, |
| "learning_rate": 1.6836363636363634e-07, |
| "log_odds_chosen": 1.5068204402923584, |
| "log_odds_ratio": -0.3684650659561157, |
| "logits/chosen": 2.1914827823638916, |
| "logits/rejected": 2.2479913234710693, |
| "logps/chosen": -0.3309532403945923, |
| "logps/rejected": -1.0164872407913208, |
| "loss": 1.0249, |
| "nll_loss": 0.9880392551422119, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.03309532627463341, |
| "rewards/margins": 0.06855340301990509, |
| "rewards/rejected": -0.1016487255692482, |
| "step": 1475 |
| }, |
| { |
| "epoch": 2.0499075785582255, |
| "grad_norm": 2.91998028755188, |
| "learning_rate": 1.6745454545454544e-07, |
| "log_odds_chosen": 1.4909127950668335, |
| "log_odds_ratio": -0.38384488224983215, |
| "logits/chosen": 2.0744376182556152, |
| "logits/rejected": 2.132093906402588, |
| "logps/chosen": -0.30625709891319275, |
| "logps/rejected": -0.9366697072982788, |
| "loss": 1.0368, |
| "nll_loss": 0.9983736872673035, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.030625708401203156, |
| "rewards/margins": 0.06304127722978592, |
| "rewards/rejected": -0.09366698563098907, |
| "step": 1480 |
| }, |
| { |
| "epoch": 2.0568391866913123, |
| "grad_norm": 2.1422932147979736, |
| "learning_rate": 1.6654545454545454e-07, |
| "log_odds_chosen": 1.6511032581329346, |
| "log_odds_ratio": -0.3445586562156677, |
| "logits/chosen": 2.038560152053833, |
| "logits/rejected": 2.1156678199768066, |
| "logps/chosen": -0.3046126961708069, |
| "logps/rejected": -1.0538097620010376, |
| "loss": 1.0316, |
| "nll_loss": 0.9971045255661011, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.030461272224783897, |
| "rewards/margins": 0.07491971552371979, |
| "rewards/rejected": -0.10538098216056824, |
| "step": 1485 |
| }, |
| { |
| "epoch": 2.063770794824399, |
| "grad_norm": 2.381495475769043, |
| "learning_rate": 1.656363636363636e-07, |
| "log_odds_chosen": 1.7995991706848145, |
| "log_odds_ratio": -0.27245932817459106, |
| "logits/chosen": 2.140026569366455, |
| "logits/rejected": 2.1896448135375977, |
| "logps/chosen": -0.25530093908309937, |
| "logps/rejected": -1.0938752889633179, |
| "loss": 1.0032, |
| "nll_loss": 0.9759642481803894, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.025530096143484116, |
| "rewards/margins": 0.08385743200778961, |
| "rewards/rejected": -0.10938753187656403, |
| "step": 1490 |
| }, |
| { |
| "epoch": 2.0707024029574863, |
| "grad_norm": 1.4904897212982178, |
| "learning_rate": 1.647272727272727e-07, |
| "log_odds_chosen": 1.5017544031143188, |
| "log_odds_ratio": -0.35147780179977417, |
| "logits/chosen": 2.0568175315856934, |
| "logits/rejected": 2.1001527309417725, |
| "logps/chosen": -0.2676287889480591, |
| "logps/rejected": -0.9620057344436646, |
| "loss": 1.0647, |
| "nll_loss": 1.0295929908752441, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.026762880384922028, |
| "rewards/margins": 0.06943770498037338, |
| "rewards/rejected": -0.09620057791471481, |
| "step": 1495 |
| }, |
| { |
| "epoch": 2.077634011090573, |
| "grad_norm": 2.361671209335327, |
| "learning_rate": 1.638181818181818e-07, |
| "log_odds_chosen": 1.625975489616394, |
| "log_odds_ratio": -0.323038786649704, |
| "logits/chosen": 2.047917366027832, |
| "logits/rejected": 2.1029675006866455, |
| "logps/chosen": -0.2699020802974701, |
| "logps/rejected": -0.9326964020729065, |
| "loss": 1.0517, |
| "nll_loss": 1.0193482637405396, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02699020877480507, |
| "rewards/margins": 0.06627943366765976, |
| "rewards/rejected": -0.09326963871717453, |
| "step": 1500 |
| }, |
| { |
| "epoch": 2.08456561922366, |
| "grad_norm": 1.3164469003677368, |
| "learning_rate": 1.629090909090909e-07, |
| "log_odds_chosen": 1.7589662075042725, |
| "log_odds_ratio": -0.2796500623226166, |
| "logits/chosen": 2.0355899333953857, |
| "logits/rejected": 2.1100242137908936, |
| "logps/chosen": -0.24932418763637543, |
| "logps/rejected": -0.9945331811904907, |
| "loss": 0.9908, |
| "nll_loss": 0.9628455638885498, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.024932416155934334, |
| "rewards/margins": 0.07452090084552765, |
| "rewards/rejected": -0.09945331513881683, |
| "step": 1505 |
| }, |
| { |
| "epoch": 2.0914972273567467, |
| "grad_norm": 2.498619318008423, |
| "learning_rate": 1.62e-07, |
| "log_odds_chosen": 1.4863014221191406, |
| "log_odds_ratio": -0.40170204639434814, |
| "logits/chosen": 2.0163559913635254, |
| "logits/rejected": 2.0721843242645264, |
| "logps/chosen": -0.2834693491458893, |
| "logps/rejected": -0.9046918153762817, |
| "loss": 1.0191, |
| "nll_loss": 0.9789711833000183, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.028346937149763107, |
| "rewards/margins": 0.06212225183844566, |
| "rewards/rejected": -0.09046918898820877, |
| "step": 1510 |
| }, |
| { |
| "epoch": 2.0984288354898335, |
| "grad_norm": 1.4074289798736572, |
| "learning_rate": 1.6109090909090907e-07, |
| "log_odds_chosen": 1.654536485671997, |
| "log_odds_ratio": -0.34274205565452576, |
| "logits/chosen": 2.0640828609466553, |
| "logits/rejected": 2.1356544494628906, |
| "logps/chosen": -0.2802557945251465, |
| "logps/rejected": -1.0241177082061768, |
| "loss": 1.0183, |
| "nll_loss": 0.98404860496521, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.02802557870745659, |
| "rewards/margins": 0.07438618689775467, |
| "rewards/rejected": -0.10241175442934036, |
| "step": 1515 |
| }, |
| { |
| "epoch": 2.1053604436229207, |
| "grad_norm": 1.9247922897338867, |
| "learning_rate": 1.6018181818181816e-07, |
| "log_odds_chosen": 1.4689146280288696, |
| "log_odds_ratio": -0.3932505249977112, |
| "logits/chosen": 2.040996789932251, |
| "logits/rejected": 2.0966296195983887, |
| "logps/chosen": -0.2887535095214844, |
| "logps/rejected": -0.8648843765258789, |
| "loss": 1.0325, |
| "nll_loss": 0.993179440498352, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02887534908950329, |
| "rewards/margins": 0.05761308595538139, |
| "rewards/rejected": -0.08648844808340073, |
| "step": 1520 |
| }, |
| { |
| "epoch": 2.1122920517560075, |
| "grad_norm": 2.162506103515625, |
| "learning_rate": 1.5927272727272726e-07, |
| "log_odds_chosen": 1.542865514755249, |
| "log_odds_ratio": -0.32878366112709045, |
| "logits/chosen": 2.064614772796631, |
| "logits/rejected": 2.1328229904174805, |
| "logps/chosen": -0.2473837435245514, |
| "logps/rejected": -0.8912278413772583, |
| "loss": 1.0342, |
| "nll_loss": 1.001328706741333, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.02473837323486805, |
| "rewards/margins": 0.06438441574573517, |
| "rewards/rejected": -0.08912278711795807, |
| "step": 1525 |
| }, |
| { |
| "epoch": 2.1192236598890943, |
| "grad_norm": 2.9879612922668457, |
| "learning_rate": 1.5836363636363636e-07, |
| "log_odds_chosen": 1.6084933280944824, |
| "log_odds_ratio": -0.3255232870578766, |
| "logits/chosen": 2.105452537536621, |
| "logits/rejected": 2.161074638366699, |
| "logps/chosen": -0.3203544020652771, |
| "logps/rejected": -1.0134159326553345, |
| "loss": 1.0447, |
| "nll_loss": 1.0121761560440063, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.03203544020652771, |
| "rewards/margins": 0.0693061575293541, |
| "rewards/rejected": -0.1013416051864624, |
| "step": 1530 |
| }, |
| { |
| "epoch": 2.126155268022181, |
| "grad_norm": 3.2565925121307373, |
| "learning_rate": 1.5745454545454543e-07, |
| "log_odds_chosen": 1.522952675819397, |
| "log_odds_ratio": -0.3534841239452362, |
| "logits/chosen": 2.0590784549713135, |
| "logits/rejected": 2.1140329837799072, |
| "logps/chosen": -0.26037055253982544, |
| "logps/rejected": -0.8886002898216248, |
| "loss": 0.986, |
| "nll_loss": 0.9506571888923645, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.026037057861685753, |
| "rewards/margins": 0.06282297521829605, |
| "rewards/rejected": -0.08886002749204636, |
| "step": 1535 |
| }, |
| { |
| "epoch": 2.133086876155268, |
| "grad_norm": 2.2923009395599365, |
| "learning_rate": 1.5654545454545453e-07, |
| "log_odds_chosen": 1.7157777547836304, |
| "log_odds_ratio": -0.31327977776527405, |
| "logits/chosen": 2.0170087814331055, |
| "logits/rejected": 2.061363935470581, |
| "logps/chosen": -0.2875458300113678, |
| "logps/rejected": -1.1021568775177002, |
| "loss": 1.0536, |
| "nll_loss": 1.022287130355835, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.02875458262860775, |
| "rewards/margins": 0.08146108686923981, |
| "rewards/rejected": -0.1102156713604927, |
| "step": 1540 |
| }, |
| { |
| "epoch": 2.1400184842883547, |
| "grad_norm": 4.337259769439697, |
| "learning_rate": 1.5563636363636362e-07, |
| "log_odds_chosen": 1.6073641777038574, |
| "log_odds_ratio": -0.32192662358283997, |
| "logits/chosen": 2.0638022422790527, |
| "logits/rejected": 2.128871440887451, |
| "logps/chosen": -0.24944542348384857, |
| "logps/rejected": -0.8842807412147522, |
| "loss": 0.9852, |
| "nll_loss": 0.9530341625213623, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.024944543838500977, |
| "rewards/margins": 0.06348354369401932, |
| "rewards/rejected": -0.0884280651807785, |
| "step": 1545 |
| }, |
| { |
| "epoch": 2.146950092421442, |
| "grad_norm": 1.788140058517456, |
| "learning_rate": 1.5472727272727272e-07, |
| "log_odds_chosen": 1.9304050207138062, |
| "log_odds_ratio": -0.2865446209907532, |
| "logits/chosen": 2.093130588531494, |
| "logits/rejected": 2.1594488620758057, |
| "logps/chosen": -0.26757973432540894, |
| "logps/rejected": -1.1472647190093994, |
| "loss": 1.032, |
| "nll_loss": 1.003312349319458, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.026757972314953804, |
| "rewards/margins": 0.08796848356723785, |
| "rewards/rejected": -0.1147264614701271, |
| "step": 1550 |
| }, |
| { |
| "epoch": 2.1538817005545288, |
| "grad_norm": 2.651644706726074, |
| "learning_rate": 1.538181818181818e-07, |
| "log_odds_chosen": 1.6535481214523315, |
| "log_odds_ratio": -0.34444233775138855, |
| "logits/chosen": 2.0424141883850098, |
| "logits/rejected": 2.0861897468566895, |
| "logps/chosen": -0.24586957693099976, |
| "logps/rejected": -0.9815968871116638, |
| "loss": 1.0189, |
| "nll_loss": 0.9844585061073303, |
| "rewards/accuracies": 0.7833333611488342, |
| "rewards/chosen": -0.024586956948041916, |
| "rewards/margins": 0.07357273995876312, |
| "rewards/rejected": -0.09815969318151474, |
| "step": 1555 |
| }, |
| { |
| "epoch": 2.1608133086876156, |
| "grad_norm": 1.644753336906433, |
| "learning_rate": 1.529090909090909e-07, |
| "log_odds_chosen": 1.670788049697876, |
| "log_odds_ratio": -0.32996612787246704, |
| "logits/chosen": 2.052905321121216, |
| "logits/rejected": 2.1133363246917725, |
| "logps/chosen": -0.2544824182987213, |
| "logps/rejected": -0.9820945858955383, |
| "loss": 1.0313, |
| "nll_loss": 0.9983068704605103, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.02544824406504631, |
| "rewards/margins": 0.07276120781898499, |
| "rewards/rejected": -0.09820946305990219, |
| "step": 1560 |
| }, |
| { |
| "epoch": 2.1677449168207024, |
| "grad_norm": 6.568633079528809, |
| "learning_rate": 1.5199999999999998e-07, |
| "log_odds_chosen": 1.5977410078048706, |
| "log_odds_ratio": -0.3366919457912445, |
| "logits/chosen": 1.9728270769119263, |
| "logits/rejected": 2.0407023429870605, |
| "logps/chosen": -0.28399333357810974, |
| "logps/rejected": -0.991985559463501, |
| "loss": 0.9979, |
| "nll_loss": 0.9642470479011536, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.028399331495165825, |
| "rewards/margins": 0.07079920917749405, |
| "rewards/rejected": -0.09919854253530502, |
| "step": 1565 |
| }, |
| { |
| "epoch": 2.174676524953789, |
| "grad_norm": 1.5433924198150635, |
| "learning_rate": 1.5109090909090908e-07, |
| "log_odds_chosen": 1.749664306640625, |
| "log_odds_ratio": -0.3313737213611603, |
| "logits/chosen": 2.0340335369110107, |
| "logits/rejected": 2.1193594932556152, |
| "logps/chosen": -0.28188708424568176, |
| "logps/rejected": -1.0578782558441162, |
| "loss": 1.0097, |
| "nll_loss": 0.9766021370887756, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.028188709169626236, |
| "rewards/margins": 0.07759912312030792, |
| "rewards/rejected": -0.10578783601522446, |
| "step": 1570 |
| }, |
| { |
| "epoch": 2.181608133086876, |
| "grad_norm": 2.6189091205596924, |
| "learning_rate": 1.5018181818181815e-07, |
| "log_odds_chosen": 1.6792007684707642, |
| "log_odds_ratio": -0.3193473219871521, |
| "logits/chosen": 2.0829341411590576, |
| "logits/rejected": 2.135345697402954, |
| "logps/chosen": -0.28347066044807434, |
| "logps/rejected": -1.055553913116455, |
| "loss": 1.0235, |
| "nll_loss": 0.9915785193443298, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.028347067534923553, |
| "rewards/margins": 0.07720831781625748, |
| "rewards/rejected": -0.10555538535118103, |
| "step": 1575 |
| }, |
| { |
| "epoch": 2.188539741219963, |
| "grad_norm": 3.955164670944214, |
| "learning_rate": 1.4927272727272725e-07, |
| "log_odds_chosen": 1.7316787242889404, |
| "log_odds_ratio": -0.34139496088027954, |
| "logits/chosen": 2.0785231590270996, |
| "logits/rejected": 2.1350691318511963, |
| "logps/chosen": -0.3276236951351166, |
| "logps/rejected": -1.071801781654358, |
| "loss": 1.0001, |
| "nll_loss": 0.9659791588783264, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.03276237100362778, |
| "rewards/margins": 0.07441780716180801, |
| "rewards/rejected": -0.10718018561601639, |
| "step": 1580 |
| }, |
| { |
| "epoch": 2.19547134935305, |
| "grad_norm": 2.1189846992492676, |
| "learning_rate": 1.4836363636363635e-07, |
| "log_odds_chosen": 1.6613436937332153, |
| "log_odds_ratio": -0.30669593811035156, |
| "logits/chosen": 2.053230047225952, |
| "logits/rejected": 2.096280097961426, |
| "logps/chosen": -0.29614728689193726, |
| "logps/rejected": -0.9985705018043518, |
| "loss": 1.0128, |
| "nll_loss": 0.9821043610572815, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.029614729806780815, |
| "rewards/margins": 0.07024230808019638, |
| "rewards/rejected": -0.09985704720020294, |
| "step": 1585 |
| }, |
| { |
| "epoch": 2.202402957486137, |
| "grad_norm": 2.127919912338257, |
| "learning_rate": 1.4745454545454544e-07, |
| "log_odds_chosen": 1.785252332687378, |
| "log_odds_ratio": -0.3034234941005707, |
| "logits/chosen": 2.0558953285217285, |
| "logits/rejected": 2.1106560230255127, |
| "logps/chosen": -0.24885861575603485, |
| "logps/rejected": -1.0158021450042725, |
| "loss": 0.9837, |
| "nll_loss": 0.9533202648162842, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.024885861203074455, |
| "rewards/margins": 0.07669434696435928, |
| "rewards/rejected": -0.10158021748065948, |
| "step": 1590 |
| }, |
| { |
| "epoch": 2.2093345656192236, |
| "grad_norm": 1.5249942541122437, |
| "learning_rate": 1.4654545454545454e-07, |
| "log_odds_chosen": 1.6279139518737793, |
| "log_odds_ratio": -0.318685919046402, |
| "logits/chosen": 1.9436652660369873, |
| "logits/rejected": 2.0145747661590576, |
| "logps/chosen": -0.2697173058986664, |
| "logps/rejected": -0.9779646992683411, |
| "loss": 1.0295, |
| "nll_loss": 0.9976499080657959, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02697172947227955, |
| "rewards/margins": 0.07082473486661911, |
| "rewards/rejected": -0.0977964699268341, |
| "step": 1595 |
| }, |
| { |
| "epoch": 2.2162661737523104, |
| "grad_norm": 1.8669286966323853, |
| "learning_rate": 1.456363636363636e-07, |
| "log_odds_chosen": 1.5612767934799194, |
| "log_odds_ratio": -0.33474841713905334, |
| "logits/chosen": 2.109074831008911, |
| "logits/rejected": 2.1647109985351562, |
| "logps/chosen": -0.3184746503829956, |
| "logps/rejected": -0.9675430655479431, |
| "loss": 1.0129, |
| "nll_loss": 0.9794076681137085, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.03184746578335762, |
| "rewards/margins": 0.06490684300661087, |
| "rewards/rejected": -0.09675431996583939, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.223197781885397, |
| "grad_norm": 3.105397939682007, |
| "learning_rate": 1.447272727272727e-07, |
| "log_odds_chosen": 1.330115556716919, |
| "log_odds_ratio": -0.4384697377681732, |
| "logits/chosen": 1.977685809135437, |
| "logits/rejected": 2.0224320888519287, |
| "logps/chosen": -0.32018712162971497, |
| "logps/rejected": -0.841995894908905, |
| "loss": 0.9974, |
| "nll_loss": 0.9535176157951355, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.03201870992779732, |
| "rewards/margins": 0.05218088626861572, |
| "rewards/rejected": -0.08419959247112274, |
| "step": 1605 |
| }, |
| { |
| "epoch": 2.2301293900184844, |
| "grad_norm": 2.435016393661499, |
| "learning_rate": 1.438181818181818e-07, |
| "log_odds_chosen": 1.5461903810501099, |
| "log_odds_ratio": -0.37018927931785583, |
| "logits/chosen": 2.003584146499634, |
| "logits/rejected": 2.068721294403076, |
| "logps/chosen": -0.2579006850719452, |
| "logps/rejected": -0.9120422601699829, |
| "loss": 0.9777, |
| "nll_loss": 0.9407215118408203, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.025790071114897728, |
| "rewards/margins": 0.06541414558887482, |
| "rewards/rejected": -0.0912042185664177, |
| "step": 1610 |
| }, |
| { |
| "epoch": 2.2370609981515712, |
| "grad_norm": 2.2165729999542236, |
| "learning_rate": 1.429090909090909e-07, |
| "log_odds_chosen": 1.4898757934570312, |
| "log_odds_ratio": -0.36625441908836365, |
| "logits/chosen": 2.0889649391174316, |
| "logits/rejected": 2.1458566188812256, |
| "logps/chosen": -0.2834877669811249, |
| "logps/rejected": -0.8642836809158325, |
| "loss": 1.0307, |
| "nll_loss": 0.994057834148407, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.028348777443170547, |
| "rewards/margins": 0.05807959660887718, |
| "rewards/rejected": -0.08642836660146713, |
| "step": 1615 |
| }, |
| { |
| "epoch": 2.243992606284658, |
| "grad_norm": 2.29976224899292, |
| "learning_rate": 1.4199999999999997e-07, |
| "log_odds_chosen": 1.654101848602295, |
| "log_odds_ratio": -0.3310154378414154, |
| "logits/chosen": 1.9520740509033203, |
| "logits/rejected": 1.9996510744094849, |
| "logps/chosen": -0.29835817217826843, |
| "logps/rejected": -1.0397998094558716, |
| "loss": 1.051, |
| "nll_loss": 1.0178704261779785, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.02983582206070423, |
| "rewards/margins": 0.074144147336483, |
| "rewards/rejected": -0.10397996753454208, |
| "step": 1620 |
| }, |
| { |
| "epoch": 2.250924214417745, |
| "grad_norm": 2.7785146236419678, |
| "learning_rate": 1.4109090909090907e-07, |
| "log_odds_chosen": 1.701553225517273, |
| "log_odds_ratio": -0.27737200260162354, |
| "logits/chosen": 2.0182878971099854, |
| "logits/rejected": 2.088885545730591, |
| "logps/chosen": -0.2814914286136627, |
| "logps/rejected": -1.0081548690795898, |
| "loss": 1.0094, |
| "nll_loss": 0.9816693067550659, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.02814914472401142, |
| "rewards/margins": 0.07266633957624435, |
| "rewards/rejected": -0.10081546753644943, |
| "step": 1625 |
| }, |
| { |
| "epoch": 2.2578558225508316, |
| "grad_norm": 1.6091256141662598, |
| "learning_rate": 1.4018181818181817e-07, |
| "log_odds_chosen": 1.6265534162521362, |
| "log_odds_ratio": -0.3619597852230072, |
| "logits/chosen": 1.9786964654922485, |
| "logits/rejected": 2.02095365524292, |
| "logps/chosen": -0.2642413377761841, |
| "logps/rejected": -0.998843252658844, |
| "loss": 1.0237, |
| "nll_loss": 0.9874651432037354, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.026424136012792587, |
| "rewards/margins": 0.07346019893884659, |
| "rewards/rejected": -0.09988433867692947, |
| "step": 1630 |
| }, |
| { |
| "epoch": 2.264787430683919, |
| "grad_norm": 2.9606359004974365, |
| "learning_rate": 1.3927272727272727e-07, |
| "log_odds_chosen": 1.4278662204742432, |
| "log_odds_ratio": -0.37395817041397095, |
| "logits/chosen": 2.1129722595214844, |
| "logits/rejected": 2.1692874431610107, |
| "logps/chosen": -0.30893474817276, |
| "logps/rejected": -0.838131308555603, |
| "loss": 1.0075, |
| "nll_loss": 0.9701253771781921, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.03089348040521145, |
| "rewards/margins": 0.052919652312994, |
| "rewards/rejected": -0.0838131383061409, |
| "step": 1635 |
| }, |
| { |
| "epoch": 2.2717190388170057, |
| "grad_norm": 2.276667356491089, |
| "learning_rate": 1.3836363636363634e-07, |
| "log_odds_chosen": 1.5527472496032715, |
| "log_odds_ratio": -0.3658043444156647, |
| "logits/chosen": 2.067870616912842, |
| "logits/rejected": 2.1205027103424072, |
| "logps/chosen": -0.2891118824481964, |
| "logps/rejected": -0.9314442873001099, |
| "loss": 1.0232, |
| "nll_loss": 0.9866386651992798, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02891119197010994, |
| "rewards/margins": 0.06423323601484299, |
| "rewards/rejected": -0.09314444661140442, |
| "step": 1640 |
| }, |
| { |
| "epoch": 2.2786506469500925, |
| "grad_norm": 1.4873467683792114, |
| "learning_rate": 1.3745454545454543e-07, |
| "log_odds_chosen": 1.851697564125061, |
| "log_odds_ratio": -0.3145390450954437, |
| "logits/chosen": 2.075488567352295, |
| "logits/rejected": 2.1666271686553955, |
| "logps/chosen": -0.2427607625722885, |
| "logps/rejected": -1.01997709274292, |
| "loss": 0.9927, |
| "nll_loss": 0.9612413048744202, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.02427607588469982, |
| "rewards/margins": 0.07772162556648254, |
| "rewards/rejected": -0.10199771821498871, |
| "step": 1645 |
| }, |
| { |
| "epoch": 2.2855822550831792, |
| "grad_norm": 2.837634325027466, |
| "learning_rate": 1.3654545454545453e-07, |
| "log_odds_chosen": 1.7820488214492798, |
| "log_odds_ratio": -0.31967195868492126, |
| "logits/chosen": 2.0722880363464355, |
| "logits/rejected": 2.1255042552948, |
| "logps/chosen": -0.3070850968360901, |
| "logps/rejected": -1.0848478078842163, |
| "loss": 1.0299, |
| "nll_loss": 0.9979235529899597, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.030708512291312218, |
| "rewards/margins": 0.07777624577283859, |
| "rewards/rejected": -0.10848478227853775, |
| "step": 1650 |
| }, |
| { |
| "epoch": 2.292513863216266, |
| "grad_norm": 1.572646975517273, |
| "learning_rate": 1.3563636363636363e-07, |
| "log_odds_chosen": 1.8489151000976562, |
| "log_odds_ratio": -0.2767617404460907, |
| "logits/chosen": 2.077411413192749, |
| "logits/rejected": 2.1307966709136963, |
| "logps/chosen": -0.2723609209060669, |
| "logps/rejected": -1.0447889566421509, |
| "loss": 0.9829, |
| "nll_loss": 0.9551786184310913, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.0272360946983099, |
| "rewards/margins": 0.07724279910326004, |
| "rewards/rejected": -0.10447890311479568, |
| "step": 1655 |
| }, |
| { |
| "epoch": 2.299445471349353, |
| "grad_norm": 2.2417216300964355, |
| "learning_rate": 1.347272727272727e-07, |
| "log_odds_chosen": 1.674991488456726, |
| "log_odds_ratio": -0.30424702167510986, |
| "logits/chosen": 1.9423750638961792, |
| "logits/rejected": 2.0181448459625244, |
| "logps/chosen": -0.26656848192214966, |
| "logps/rejected": -0.947805643081665, |
| "loss": 1.0604, |
| "nll_loss": 1.0300124883651733, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.026656849309802055, |
| "rewards/margins": 0.0681237280368805, |
| "rewards/rejected": -0.0947805717587471, |
| "step": 1660 |
| }, |
| { |
| "epoch": 2.30637707948244, |
| "grad_norm": 2.700683355331421, |
| "learning_rate": 1.338181818181818e-07, |
| "log_odds_chosen": 1.6018115282058716, |
| "log_odds_ratio": -0.32080212235450745, |
| "logits/chosen": 2.066725969314575, |
| "logits/rejected": 2.1303725242614746, |
| "logps/chosen": -0.25507083535194397, |
| "logps/rejected": -0.9004265069961548, |
| "loss": 0.9929, |
| "nll_loss": 0.9608381390571594, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.025507085025310516, |
| "rewards/margins": 0.06453555822372437, |
| "rewards/rejected": -0.09004264324903488, |
| "step": 1665 |
| }, |
| { |
| "epoch": 2.313308687615527, |
| "grad_norm": 2.3125734329223633, |
| "learning_rate": 1.329090909090909e-07, |
| "log_odds_chosen": 1.7538138628005981, |
| "log_odds_ratio": -0.30055293440818787, |
| "logits/chosen": 2.082395315170288, |
| "logits/rejected": 2.138796091079712, |
| "logps/chosen": -0.32292428612709045, |
| "logps/rejected": -1.132580280303955, |
| "loss": 1.0183, |
| "nll_loss": 0.9882605671882629, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.032292433083057404, |
| "rewards/margins": 0.08096561580896378, |
| "rewards/rejected": -0.11325804144144058, |
| "step": 1670 |
| }, |
| { |
| "epoch": 2.3202402957486137, |
| "grad_norm": 2.303008794784546, |
| "learning_rate": 1.32e-07, |
| "log_odds_chosen": 1.66550874710083, |
| "log_odds_ratio": -0.32220420241355896, |
| "logits/chosen": 2.0094358921051025, |
| "logits/rejected": 2.0630240440368652, |
| "logps/chosen": -0.28716641664505005, |
| "logps/rejected": -1.0477604866027832, |
| "loss": 1.0112, |
| "nll_loss": 0.9789758324623108, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.028716640546917915, |
| "rewards/margins": 0.07605940848588943, |
| "rewards/rejected": -0.1047760397195816, |
| "step": 1675 |
| }, |
| { |
| "epoch": 2.3271719038817005, |
| "grad_norm": 3.235823631286621, |
| "learning_rate": 1.3109090909090906e-07, |
| "log_odds_chosen": 1.7851775884628296, |
| "log_odds_ratio": -0.29265934228897095, |
| "logits/chosen": 2.0131328105926514, |
| "logits/rejected": 2.072057008743286, |
| "logps/chosen": -0.30348002910614014, |
| "logps/rejected": -1.1123188734054565, |
| "loss": 1.0679, |
| "nll_loss": 1.0386704206466675, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.030348004773259163, |
| "rewards/margins": 0.08088389039039612, |
| "rewards/rejected": -0.11123190075159073, |
| "step": 1680 |
| }, |
| { |
| "epoch": 2.3341035120147873, |
| "grad_norm": 2.9141244888305664, |
| "learning_rate": 1.3018181818181816e-07, |
| "log_odds_chosen": 1.7565422058105469, |
| "log_odds_ratio": -0.3614824414253235, |
| "logits/chosen": 2.1100566387176514, |
| "logits/rejected": 2.167602777481079, |
| "logps/chosen": -0.3503415882587433, |
| "logps/rejected": -1.2140240669250488, |
| "loss": 1.0574, |
| "nll_loss": 1.021295428276062, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.03503415733575821, |
| "rewards/margins": 0.08636824786663055, |
| "rewards/rejected": -0.12140240520238876, |
| "step": 1685 |
| }, |
| { |
| "epoch": 2.3410351201478745, |
| "grad_norm": 2.8215250968933105, |
| "learning_rate": 1.2927272727272726e-07, |
| "log_odds_chosen": 1.7545371055603027, |
| "log_odds_ratio": -0.30033814907073975, |
| "logits/chosen": 1.996227741241455, |
| "logits/rejected": 2.0622332096099854, |
| "logps/chosen": -0.2831988036632538, |
| "logps/rejected": -1.0355448722839355, |
| "loss": 0.9776, |
| "nll_loss": 0.9475898742675781, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.02831987477838993, |
| "rewards/margins": 0.07523461431264877, |
| "rewards/rejected": -0.10355449467897415, |
| "step": 1690 |
| }, |
| { |
| "epoch": 2.3479667282809613, |
| "grad_norm": 1.334836721420288, |
| "learning_rate": 1.2836363636363635e-07, |
| "log_odds_chosen": 1.8632985353469849, |
| "log_odds_ratio": -0.23800452053546906, |
| "logits/chosen": 2.006606101989746, |
| "logits/rejected": 2.080357789993286, |
| "logps/chosen": -0.2609540820121765, |
| "logps/rejected": -1.0780720710754395, |
| "loss": 0.996, |
| "nll_loss": 0.9722317457199097, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.02609540894627571, |
| "rewards/margins": 0.0817117914557457, |
| "rewards/rejected": -0.1078072041273117, |
| "step": 1695 |
| }, |
| { |
| "epoch": 2.354898336414048, |
| "grad_norm": 2.684661626815796, |
| "learning_rate": 1.2745454545454545e-07, |
| "log_odds_chosen": 1.8242162466049194, |
| "log_odds_ratio": -0.2849119305610657, |
| "logits/chosen": 1.953218698501587, |
| "logits/rejected": 2.0185937881469727, |
| "logps/chosen": -0.25621822476387024, |
| "logps/rejected": -1.050632357597351, |
| "loss": 0.9865, |
| "nll_loss": 0.9579840898513794, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.025621820241212845, |
| "rewards/margins": 0.07944142073392868, |
| "rewards/rejected": -0.10506324470043182, |
| "step": 1700 |
| }, |
| { |
| "epoch": 2.361829944547135, |
| "grad_norm": 1.7048418521881104, |
| "learning_rate": 1.2654545454545452e-07, |
| "log_odds_chosen": 1.9969215393066406, |
| "log_odds_ratio": -0.25461483001708984, |
| "logits/chosen": 1.9886645078659058, |
| "logits/rejected": 2.0469629764556885, |
| "logps/chosen": -0.2665863335132599, |
| "logps/rejected": -1.1525532007217407, |
| "loss": 1.0313, |
| "nll_loss": 1.0058410167694092, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.02665863186120987, |
| "rewards/margins": 0.08859668672084808, |
| "rewards/rejected": -0.11525531858205795, |
| "step": 1705 |
| }, |
| { |
| "epoch": 2.3687615526802217, |
| "grad_norm": 2.232328414916992, |
| "learning_rate": 1.2563636363636362e-07, |
| "log_odds_chosen": 1.9676387310028076, |
| "log_odds_ratio": -0.2715602517127991, |
| "logits/chosen": 1.936298131942749, |
| "logits/rejected": 1.9897048473358154, |
| "logps/chosen": -0.2537732422351837, |
| "logps/rejected": -1.1118028163909912, |
| "loss": 1.0124, |
| "nll_loss": 0.9852831363677979, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.025377323850989342, |
| "rewards/margins": 0.08580294251441956, |
| "rewards/rejected": -0.11118026822805405, |
| "step": 1710 |
| }, |
| { |
| "epoch": 2.3756931608133085, |
| "grad_norm": 3.159862518310547, |
| "learning_rate": 1.2472727272727272e-07, |
| "log_odds_chosen": 1.9476854801177979, |
| "log_odds_ratio": -0.2814669609069824, |
| "logits/chosen": 1.9453091621398926, |
| "logits/rejected": 2.018049478530884, |
| "logps/chosen": -0.2795754075050354, |
| "logps/rejected": -1.1083685159683228, |
| "loss": 1.0349, |
| "nll_loss": 1.0067722797393799, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.02795754000544548, |
| "rewards/margins": 0.08287932723760605, |
| "rewards/rejected": -0.11083687096834183, |
| "step": 1715 |
| }, |
| { |
| "epoch": 2.3826247689463957, |
| "grad_norm": 2.8082034587860107, |
| "learning_rate": 1.238181818181818e-07, |
| "log_odds_chosen": 1.8380389213562012, |
| "log_odds_ratio": -0.3166138231754303, |
| "logits/chosen": 2.0408785343170166, |
| "logits/rejected": 2.11739182472229, |
| "logps/chosen": -0.28606662154197693, |
| "logps/rejected": -1.1519672870635986, |
| "loss": 1.0073, |
| "nll_loss": 0.9756883978843689, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.02860666997730732, |
| "rewards/margins": 0.08659005165100098, |
| "rewards/rejected": -0.11519671976566315, |
| "step": 1720 |
| }, |
| { |
| "epoch": 2.3895563770794825, |
| "grad_norm": 2.500714063644409, |
| "learning_rate": 1.2290909090909088e-07, |
| "log_odds_chosen": 1.6278916597366333, |
| "log_odds_ratio": -0.3502149283885956, |
| "logits/chosen": 2.0757570266723633, |
| "logits/rejected": 2.1220290660858154, |
| "logps/chosen": -0.31332165002822876, |
| "logps/rejected": -1.052827000617981, |
| "loss": 1.003, |
| "nll_loss": 0.9680219888687134, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.031332165002822876, |
| "rewards/margins": 0.07395053654909134, |
| "rewards/rejected": -0.10528270155191422, |
| "step": 1725 |
| }, |
| { |
| "epoch": 2.3964879852125693, |
| "grad_norm": 2.743722915649414, |
| "learning_rate": 1.2199999999999998e-07, |
| "log_odds_chosen": 1.4381171464920044, |
| "log_odds_ratio": -0.43818411231040955, |
| "logits/chosen": 2.0334222316741943, |
| "logits/rejected": 2.081953287124634, |
| "logps/chosen": -0.3282889723777771, |
| "logps/rejected": -0.9676831364631653, |
| "loss": 1.0602, |
| "nll_loss": 1.0163487195968628, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.03282889723777771, |
| "rewards/margins": 0.0639394223690033, |
| "rewards/rejected": -0.0967683270573616, |
| "step": 1730 |
| }, |
| { |
| "epoch": 2.403419593345656, |
| "grad_norm": 2.034834146499634, |
| "learning_rate": 1.2109090909090908e-07, |
| "log_odds_chosen": 1.7440353631973267, |
| "log_odds_ratio": -0.2908443510532379, |
| "logits/chosen": 2.1094157695770264, |
| "logits/rejected": 2.1522154808044434, |
| "logps/chosen": -0.2517443597316742, |
| "logps/rejected": -0.9762896299362183, |
| "loss": 0.9974, |
| "nll_loss": 0.9682726263999939, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.02517443709075451, |
| "rewards/margins": 0.0724545270204544, |
| "rewards/rejected": -0.09762895852327347, |
| "step": 1735 |
| }, |
| { |
| "epoch": 2.410351201478743, |
| "grad_norm": 4.332306861877441, |
| "learning_rate": 1.2018181818181818e-07, |
| "log_odds_chosen": 1.5381571054458618, |
| "log_odds_ratio": -0.3476658761501312, |
| "logits/chosen": 2.057114362716675, |
| "logits/rejected": 2.090787410736084, |
| "logps/chosen": -0.276753693819046, |
| "logps/rejected": -0.9296085834503174, |
| "loss": 1.0436, |
| "nll_loss": 1.0088648796081543, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.027675366029143333, |
| "rewards/margins": 0.06528548896312714, |
| "rewards/rejected": -0.09296084940433502, |
| "step": 1740 |
| }, |
| { |
| "epoch": 2.41728280961183, |
| "grad_norm": 3.1289680004119873, |
| "learning_rate": 1.1927272727272725e-07, |
| "log_odds_chosen": 1.5321450233459473, |
| "log_odds_ratio": -0.3635505437850952, |
| "logits/chosen": 2.098928928375244, |
| "logits/rejected": 2.1394617557525635, |
| "logps/chosen": -0.32607170939445496, |
| "logps/rejected": -1.0010368824005127, |
| "loss": 1.0101, |
| "nll_loss": 0.973716676235199, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.032607175409793854, |
| "rewards/margins": 0.06749651581048965, |
| "rewards/rejected": -0.10010368376970291, |
| "step": 1745 |
| }, |
| { |
| "epoch": 2.424214417744917, |
| "grad_norm": 2.292997360229492, |
| "learning_rate": 1.1836363636363636e-07, |
| "log_odds_chosen": 1.6794272661209106, |
| "log_odds_ratio": -0.3062840700149536, |
| "logits/chosen": 1.9977962970733643, |
| "logits/rejected": 2.0764520168304443, |
| "logps/chosen": -0.24340610206127167, |
| "logps/rejected": -0.9343698620796204, |
| "loss": 0.9467, |
| "nll_loss": 0.9160255193710327, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.024340612813830376, |
| "rewards/margins": 0.0690963938832283, |
| "rewards/rejected": -0.09343700110912323, |
| "step": 1750 |
| }, |
| { |
| "epoch": 2.4311460258780038, |
| "grad_norm": 2.060739517211914, |
| "learning_rate": 1.1745454545454545e-07, |
| "log_odds_chosen": 1.7372018098831177, |
| "log_odds_ratio": -0.36456355452537537, |
| "logits/chosen": 2.081760883331299, |
| "logits/rejected": 2.139793634414673, |
| "logps/chosen": -0.30359095335006714, |
| "logps/rejected": -1.0979803800582886, |
| "loss": 1.0152, |
| "nll_loss": 0.9787145853042603, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.030359093099832535, |
| "rewards/margins": 0.0794389396905899, |
| "rewards/rejected": -0.10979804396629333, |
| "step": 1755 |
| }, |
| { |
| "epoch": 2.4380776340110906, |
| "grad_norm": 2.298625946044922, |
| "learning_rate": 1.1654545454545455e-07, |
| "log_odds_chosen": 1.6793102025985718, |
| "log_odds_ratio": -0.335517019033432, |
| "logits/chosen": 2.0144755840301514, |
| "logits/rejected": 2.075939416885376, |
| "logps/chosen": -0.2551276385784149, |
| "logps/rejected": -0.9466179013252258, |
| "loss": 1.0231, |
| "nll_loss": 0.9895772337913513, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02551276609301567, |
| "rewards/margins": 0.06914903223514557, |
| "rewards/rejected": -0.09466180205345154, |
| "step": 1760 |
| }, |
| { |
| "epoch": 2.4450092421441774, |
| "grad_norm": 1.3807379007339478, |
| "learning_rate": 1.1563636363636362e-07, |
| "log_odds_chosen": 1.695957064628601, |
| "log_odds_ratio": -0.33206743001937866, |
| "logits/chosen": 2.0311429500579834, |
| "logits/rejected": 2.0982778072357178, |
| "logps/chosen": -0.25683820247650146, |
| "logps/rejected": -0.978820264339447, |
| "loss": 1.019, |
| "nll_loss": 0.9857791066169739, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.025683818385004997, |
| "rewards/margins": 0.07219821959733963, |
| "rewards/rejected": -0.09788203239440918, |
| "step": 1765 |
| }, |
| { |
| "epoch": 2.451940850277264, |
| "grad_norm": 2.096445083618164, |
| "learning_rate": 1.1472727272727272e-07, |
| "log_odds_chosen": 1.7026692628860474, |
| "log_odds_ratio": -0.3184446692466736, |
| "logits/chosen": 2.0503830909729004, |
| "logits/rejected": 2.086923122406006, |
| "logps/chosen": -0.27430155873298645, |
| "logps/rejected": -0.9957641363143921, |
| "loss": 1.0057, |
| "nll_loss": 0.9738113880157471, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.027430152520537376, |
| "rewards/margins": 0.07214626669883728, |
| "rewards/rejected": -0.0995764285326004, |
| "step": 1770 |
| }, |
| { |
| "epoch": 2.458872458410351, |
| "grad_norm": 2.9843761920928955, |
| "learning_rate": 1.1381818181818182e-07, |
| "log_odds_chosen": 1.8962608575820923, |
| "log_odds_ratio": -0.2932147681713104, |
| "logits/chosen": 2.1202423572540283, |
| "logits/rejected": 2.1624252796173096, |
| "logps/chosen": -0.2599133849143982, |
| "logps/rejected": -1.1326204538345337, |
| "loss": 1.0537, |
| "nll_loss": 1.0243782997131348, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.025991341099143028, |
| "rewards/margins": 0.08727072179317474, |
| "rewards/rejected": -0.11326204240322113, |
| "step": 1775 |
| }, |
| { |
| "epoch": 2.465804066543438, |
| "grad_norm": 2.571378469467163, |
| "learning_rate": 1.1290909090909091e-07, |
| "log_odds_chosen": 2.0019185543060303, |
| "log_odds_ratio": -0.24575158953666687, |
| "logits/chosen": 2.049402952194214, |
| "logits/rejected": 2.103079080581665, |
| "logps/chosen": -0.3088202476501465, |
| "logps/rejected": -1.2870643138885498, |
| "loss": 1.0222, |
| "nll_loss": 0.997674822807312, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.03088202513754368, |
| "rewards/margins": 0.09782441705465317, |
| "rewards/rejected": -0.1287064403295517, |
| "step": 1780 |
| }, |
| { |
| "epoch": 2.472735674676525, |
| "grad_norm": 1.9969513416290283, |
| "learning_rate": 1.1200000000000001e-07, |
| "log_odds_chosen": 1.6785210371017456, |
| "log_odds_ratio": -0.3223731815814972, |
| "logits/chosen": 2.0518710613250732, |
| "logits/rejected": 2.1081185340881348, |
| "logps/chosen": -0.2911817133426666, |
| "logps/rejected": -1.0387407541275024, |
| "loss": 0.9996, |
| "nll_loss": 0.9674090147018433, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02911817468702793, |
| "rewards/margins": 0.07475589960813522, |
| "rewards/rejected": -0.1038740798830986, |
| "step": 1785 |
| }, |
| { |
| "epoch": 2.479667282809612, |
| "grad_norm": 2.367443799972534, |
| "learning_rate": 1.1109090909090908e-07, |
| "log_odds_chosen": 1.6323319673538208, |
| "log_odds_ratio": -0.35728973150253296, |
| "logits/chosen": 1.9989386796951294, |
| "logits/rejected": 2.0702216625213623, |
| "logps/chosen": -0.2859072685241699, |
| "logps/rejected": -0.9873275756835938, |
| "loss": 0.9933, |
| "nll_loss": 0.9575673341751099, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.02859073132276535, |
| "rewards/margins": 0.07014203071594238, |
| "rewards/rejected": -0.09873275458812714, |
| "step": 1790 |
| }, |
| { |
| "epoch": 2.4865988909426986, |
| "grad_norm": 5.1550374031066895, |
| "learning_rate": 1.1018181818181818e-07, |
| "log_odds_chosen": 1.9188029766082764, |
| "log_odds_ratio": -0.31065088510513306, |
| "logits/chosen": 1.9901913404464722, |
| "logits/rejected": 2.050278902053833, |
| "logps/chosen": -0.3183686137199402, |
| "logps/rejected": -1.161499261856079, |
| "loss": 1.0106, |
| "nll_loss": 0.9795438051223755, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0318368636071682, |
| "rewards/margins": 0.0843130573630333, |
| "rewards/rejected": -0.1161499172449112, |
| "step": 1795 |
| }, |
| { |
| "epoch": 2.4935304990757854, |
| "grad_norm": 2.2873830795288086, |
| "learning_rate": 1.0927272727272728e-07, |
| "log_odds_chosen": 1.4898102283477783, |
| "log_odds_ratio": -0.36497774720191956, |
| "logits/chosen": 2.037238121032715, |
| "logits/rejected": 2.0807337760925293, |
| "logps/chosen": -0.29659178853034973, |
| "logps/rejected": -1.0002473592758179, |
| "loss": 1.0471, |
| "nll_loss": 1.0105878114700317, |
| "rewards/accuracies": 0.7916666865348816, |
| "rewards/chosen": -0.029659178107976913, |
| "rewards/margins": 0.0703655481338501, |
| "rewards/rejected": -0.10002472996711731, |
| "step": 1800 |
| }, |
| { |
| "epoch": 2.5004621072088726, |
| "grad_norm": 2.6431796550750732, |
| "learning_rate": 1.0836363636363637e-07, |
| "log_odds_chosen": 1.6469905376434326, |
| "log_odds_ratio": -0.3312085270881653, |
| "logits/chosen": 1.9593592882156372, |
| "logits/rejected": 2.0358383655548096, |
| "logps/chosen": -0.26151055097579956, |
| "logps/rejected": -0.9036704301834106, |
| "loss": 1.0059, |
| "nll_loss": 0.9727994203567505, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.026151059195399284, |
| "rewards/margins": 0.06421598047018051, |
| "rewards/rejected": -0.09036703407764435, |
| "step": 1805 |
| }, |
| { |
| "epoch": 2.5073937153419594, |
| "grad_norm": 2.9634170532226562, |
| "learning_rate": 1.0745454545454544e-07, |
| "log_odds_chosen": 1.5996876955032349, |
| "log_odds_ratio": -0.33672866225242615, |
| "logits/chosen": 1.9904649257659912, |
| "logits/rejected": 2.0536324977874756, |
| "logps/chosen": -0.32099291682243347, |
| "logps/rejected": -1.0023874044418335, |
| "loss": 1.0524, |
| "nll_loss": 1.018733263015747, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.03209929168224335, |
| "rewards/margins": 0.0681394636631012, |
| "rewards/rejected": -0.10023875534534454, |
| "step": 1810 |
| }, |
| { |
| "epoch": 2.5143253234750462, |
| "grad_norm": 1.860707402229309, |
| "learning_rate": 1.0654545454545454e-07, |
| "log_odds_chosen": 1.8385961055755615, |
| "log_odds_ratio": -0.34007883071899414, |
| "logits/chosen": 1.9847419261932373, |
| "logits/rejected": 2.0414199829101562, |
| "logps/chosen": -0.3060314953327179, |
| "logps/rejected": -1.150540828704834, |
| "loss": 0.9906, |
| "nll_loss": 0.9565935730934143, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.03060315176844597, |
| "rewards/margins": 0.08445093035697937, |
| "rewards/rejected": -0.11505408585071564, |
| "step": 1815 |
| }, |
| { |
| "epoch": 2.521256931608133, |
| "grad_norm": 2.953806161880493, |
| "learning_rate": 1.0563636363636364e-07, |
| "log_odds_chosen": 1.6283913850784302, |
| "log_odds_ratio": -0.3325265049934387, |
| "logits/chosen": 2.0417227745056152, |
| "logits/rejected": 2.104788064956665, |
| "logps/chosen": -0.31526321172714233, |
| "logps/rejected": -1.0568766593933105, |
| "loss": 0.9896, |
| "nll_loss": 0.9563248753547668, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.031526319682598114, |
| "rewards/margins": 0.0741613581776619, |
| "rewards/rejected": -0.10568765550851822, |
| "step": 1820 |
| }, |
| { |
| "epoch": 2.52818853974122, |
| "grad_norm": 2.1079516410827637, |
| "learning_rate": 1.0472727272727273e-07, |
| "log_odds_chosen": 1.5739433765411377, |
| "log_odds_ratio": -0.33921295404434204, |
| "logits/chosen": 2.122877359390259, |
| "logits/rejected": 2.1718015670776367, |
| "logps/chosen": -0.2680935859680176, |
| "logps/rejected": -0.9542296528816223, |
| "loss": 1.0209, |
| "nll_loss": 0.986934244632721, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.026809358969330788, |
| "rewards/margins": 0.06861360371112823, |
| "rewards/rejected": -0.09542296081781387, |
| "step": 1825 |
| }, |
| { |
| "epoch": 2.5351201478743066, |
| "grad_norm": 2.3235225677490234, |
| "learning_rate": 1.038181818181818e-07, |
| "log_odds_chosen": 1.7671153545379639, |
| "log_odds_ratio": -0.3048493266105652, |
| "logits/chosen": 2.0175423622131348, |
| "logits/rejected": 2.1084630489349365, |
| "logps/chosen": -0.28295522928237915, |
| "logps/rejected": -1.078856348991394, |
| "loss": 1.0093, |
| "nll_loss": 0.9788612723350525, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.028295524418354034, |
| "rewards/margins": 0.07959011197090149, |
| "rewards/rejected": -0.10788564383983612, |
| "step": 1830 |
| }, |
| { |
| "epoch": 2.542051756007394, |
| "grad_norm": 2.376753807067871, |
| "learning_rate": 1.029090909090909e-07, |
| "log_odds_chosen": 1.5308775901794434, |
| "log_odds_ratio": -0.35065487027168274, |
| "logits/chosen": 2.0661802291870117, |
| "logits/rejected": 2.1190385818481445, |
| "logps/chosen": -0.30774179100990295, |
| "logps/rejected": -0.9961751103401184, |
| "loss": 1.0005, |
| "nll_loss": 0.9654229879379272, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.030774177983403206, |
| "rewards/margins": 0.06884334981441498, |
| "rewards/rejected": -0.09961751848459244, |
| "step": 1835 |
| }, |
| { |
| "epoch": 2.5489833641404807, |
| "grad_norm": 1.1884440183639526, |
| "learning_rate": 1.02e-07, |
| "log_odds_chosen": 1.692187786102295, |
| "log_odds_ratio": -0.35703638195991516, |
| "logits/chosen": 2.050490379333496, |
| "logits/rejected": 2.091343879699707, |
| "logps/chosen": -0.26052388548851013, |
| "logps/rejected": -1.032615065574646, |
| "loss": 0.9831, |
| "nll_loss": 0.9473720192909241, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.026052383705973625, |
| "rewards/margins": 0.07720911502838135, |
| "rewards/rejected": -0.10326149314641953, |
| "step": 1840 |
| }, |
| { |
| "epoch": 2.5559149722735675, |
| "grad_norm": 2.3813302516937256, |
| "learning_rate": 1.010909090909091e-07, |
| "log_odds_chosen": 1.5822950601577759, |
| "log_odds_ratio": -0.3297514021396637, |
| "logits/chosen": 2.0444798469543457, |
| "logits/rejected": 2.1010236740112305, |
| "logps/chosen": -0.2959387004375458, |
| "logps/rejected": -0.987533450126648, |
| "loss": 1.0236, |
| "nll_loss": 0.9906317591667175, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02959386818110943, |
| "rewards/margins": 0.06915947794914246, |
| "rewards/rejected": -0.09875334799289703, |
| "step": 1845 |
| }, |
| { |
| "epoch": 2.5628465804066543, |
| "grad_norm": 2.9394869804382324, |
| "learning_rate": 1.0018181818181817e-07, |
| "log_odds_chosen": 1.702123761177063, |
| "log_odds_ratio": -0.3088690936565399, |
| "logits/chosen": 2.113861322402954, |
| "logits/rejected": 2.1717820167541504, |
| "logps/chosen": -0.3088254928588867, |
| "logps/rejected": -1.0686160326004028, |
| "loss": 1.0239, |
| "nll_loss": 0.9929828643798828, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.03088255040347576, |
| "rewards/margins": 0.07597906142473221, |
| "rewards/rejected": -0.10686160624027252, |
| "step": 1850 |
| }, |
| { |
| "epoch": 2.5697781885397415, |
| "grad_norm": 3.485311508178711, |
| "learning_rate": 9.927272727272727e-08, |
| "log_odds_chosen": 1.7402740716934204, |
| "log_odds_ratio": -0.38107120990753174, |
| "logits/chosen": 1.9880726337432861, |
| "logits/rejected": 2.057579755783081, |
| "logps/chosen": -0.2805604338645935, |
| "logps/rejected": -1.0576139688491821, |
| "loss": 0.9883, |
| "nll_loss": 0.9501924514770508, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.02805604785680771, |
| "rewards/margins": 0.07770536839962006, |
| "rewards/rejected": -0.10576140880584717, |
| "step": 1855 |
| }, |
| { |
| "epoch": 2.5767097966728283, |
| "grad_norm": 3.232532262802124, |
| "learning_rate": 9.836363636363636e-08, |
| "log_odds_chosen": 1.8129467964172363, |
| "log_odds_ratio": -0.2785561978816986, |
| "logits/chosen": 1.9095889329910278, |
| "logits/rejected": 1.9935516119003296, |
| "logps/chosen": -0.2174181193113327, |
| "logps/rejected": -0.9398717880249023, |
| "loss": 0.9843, |
| "nll_loss": 0.9564692974090576, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.02174181304872036, |
| "rewards/margins": 0.07224537432193756, |
| "rewards/rejected": -0.09398718178272247, |
| "step": 1860 |
| }, |
| { |
| "epoch": 2.583641404805915, |
| "grad_norm": 2.0778720378875732, |
| "learning_rate": 9.745454545454545e-08, |
| "log_odds_chosen": 1.8064855337142944, |
| "log_odds_ratio": -0.3176063001155853, |
| "logits/chosen": 2.1032285690307617, |
| "logits/rejected": 2.154674768447876, |
| "logps/chosen": -0.3073400855064392, |
| "logps/rejected": -1.1221901178359985, |
| "loss": 0.9987, |
| "nll_loss": 0.9669729471206665, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.03073401190340519, |
| "rewards/margins": 0.08148500323295593, |
| "rewards/rejected": -0.11221900582313538, |
| "step": 1865 |
| }, |
| { |
| "epoch": 2.590573012939002, |
| "grad_norm": 1.6975332498550415, |
| "learning_rate": 9.654545454545454e-08, |
| "log_odds_chosen": 1.6128000020980835, |
| "log_odds_ratio": -0.33097171783447266, |
| "logits/chosen": 2.0006532669067383, |
| "logits/rejected": 2.056741237640381, |
| "logps/chosen": -0.2736147940158844, |
| "logps/rejected": -0.9443971514701843, |
| "loss": 0.98, |
| "nll_loss": 0.9469044804573059, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.02736147679388523, |
| "rewards/margins": 0.06707824021577835, |
| "rewards/rejected": -0.09443972259759903, |
| "step": 1870 |
| }, |
| { |
| "epoch": 2.5975046210720887, |
| "grad_norm": 2.9151763916015625, |
| "learning_rate": 9.563636363636364e-08, |
| "log_odds_chosen": 2.098798990249634, |
| "log_odds_ratio": -0.263777494430542, |
| "logits/chosen": 1.9829503297805786, |
| "logits/rejected": 2.0440664291381836, |
| "logps/chosen": -0.2544497847557068, |
| "logps/rejected": -1.2993313074111938, |
| "loss": 1.015, |
| "nll_loss": 0.9885779023170471, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.025444982573390007, |
| "rewards/margins": 0.10448816418647766, |
| "rewards/rejected": -0.12993313372135162, |
| "step": 1875 |
| }, |
| { |
| "epoch": 2.6044362292051755, |
| "grad_norm": 1.4423106908798218, |
| "learning_rate": 9.472727272727272e-08, |
| "log_odds_chosen": 1.7735576629638672, |
| "log_odds_ratio": -0.3009028136730194, |
| "logits/chosen": 1.8770281076431274, |
| "logits/rejected": 1.9749999046325684, |
| "logps/chosen": -0.2571166455745697, |
| "logps/rejected": -0.9384245276451111, |
| "loss": 0.9065, |
| "nll_loss": 0.8764019012451172, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.025711664929986, |
| "rewards/margins": 0.06813079863786697, |
| "rewards/rejected": -0.09384246915578842, |
| "step": 1880 |
| }, |
| { |
| "epoch": 2.6113678373382623, |
| "grad_norm": 3.350883960723877, |
| "learning_rate": 9.381818181818182e-08, |
| "log_odds_chosen": 1.902999997138977, |
| "log_odds_ratio": -0.27424463629722595, |
| "logits/chosen": 2.0839245319366455, |
| "logits/rejected": 2.14450740814209, |
| "logps/chosen": -0.26829907298088074, |
| "logps/rejected": -1.1070636510849, |
| "loss": 1.0208, |
| "nll_loss": 0.993401825428009, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.026829909533262253, |
| "rewards/margins": 0.08387646079063416, |
| "rewards/rejected": -0.11070636659860611, |
| "step": 1885 |
| }, |
| { |
| "epoch": 2.6182994454713495, |
| "grad_norm": 3.9501917362213135, |
| "learning_rate": 9.29090909090909e-08, |
| "log_odds_chosen": 1.724169135093689, |
| "log_odds_ratio": -0.3101595342159271, |
| "logits/chosen": 2.021693229675293, |
| "logits/rejected": 2.096151828765869, |
| "logps/chosen": -0.2878502309322357, |
| "logps/rejected": -1.0322964191436768, |
| "loss": 1.0293, |
| "nll_loss": 0.9983063340187073, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.028785018250346184, |
| "rewards/margins": 0.07444461435079575, |
| "rewards/rejected": -0.10322963446378708, |
| "step": 1890 |
| }, |
| { |
| "epoch": 2.6252310536044363, |
| "grad_norm": 2.835116386413574, |
| "learning_rate": 9.2e-08, |
| "log_odds_chosen": 2.0075416564941406, |
| "log_odds_ratio": -0.29768672585487366, |
| "logits/chosen": 2.0260884761810303, |
| "logits/rejected": 2.08447527885437, |
| "logps/chosen": -0.2922082841396332, |
| "logps/rejected": -1.2498011589050293, |
| "loss": 1.0113, |
| "nll_loss": 0.981542706489563, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.029220828786492348, |
| "rewards/margins": 0.09575929492712021, |
| "rewards/rejected": -0.12498010694980621, |
| "step": 1895 |
| }, |
| { |
| "epoch": 2.632162661737523, |
| "grad_norm": 1.9952729940414429, |
| "learning_rate": 9.109090909090909e-08, |
| "log_odds_chosen": 2.203530788421631, |
| "log_odds_ratio": -0.2388562709093094, |
| "logits/chosen": 2.05356764793396, |
| "logits/rejected": 2.108933925628662, |
| "logps/chosen": -0.2701965272426605, |
| "logps/rejected": -1.3429614305496216, |
| "loss": 1.0337, |
| "nll_loss": 1.00979745388031, |
| "rewards/accuracies": 0.9333333373069763, |
| "rewards/chosen": -0.027019653469324112, |
| "rewards/margins": 0.10727646201848984, |
| "rewards/rejected": -0.13429613411426544, |
| "step": 1900 |
| }, |
| { |
| "epoch": 2.63909426987061, |
| "grad_norm": 2.688429594039917, |
| "learning_rate": 9.018181818181818e-08, |
| "log_odds_chosen": 1.6793392896652222, |
| "log_odds_ratio": -0.3166213929653168, |
| "logits/chosen": 1.9479715824127197, |
| "logits/rejected": 2.0088744163513184, |
| "logps/chosen": -0.27929723262786865, |
| "logps/rejected": -0.9713757634162903, |
| "loss": 1.0348, |
| "nll_loss": 1.0030966997146606, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.027929725125432014, |
| "rewards/margins": 0.06920785456895828, |
| "rewards/rejected": -0.09713757783174515, |
| "step": 1905 |
| }, |
| { |
| "epoch": 2.6460258780036967, |
| "grad_norm": 1.4949442148208618, |
| "learning_rate": 8.927272727272727e-08, |
| "log_odds_chosen": 1.9754811525344849, |
| "log_odds_ratio": -0.30257752537727356, |
| "logits/chosen": 1.9509350061416626, |
| "logits/rejected": 2.0503389835357666, |
| "logps/chosen": -0.27535703778266907, |
| "logps/rejected": -1.118589162826538, |
| "loss": 0.9724, |
| "nll_loss": 0.9421722292900085, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.027535704895853996, |
| "rewards/margins": 0.0843232050538063, |
| "rewards/rejected": -0.11185891181230545, |
| "step": 1910 |
| }, |
| { |
| "epoch": 2.652957486136784, |
| "grad_norm": 2.16941237449646, |
| "learning_rate": 8.836363636363637e-08, |
| "log_odds_chosen": 2.000943422317505, |
| "log_odds_ratio": -0.2847401797771454, |
| "logits/chosen": 2.106870412826538, |
| "logits/rejected": 2.168245553970337, |
| "logps/chosen": -0.26422372460365295, |
| "logps/rejected": -1.217974305152893, |
| "loss": 1.061, |
| "nll_loss": 1.0324803590774536, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.026422372087836266, |
| "rewards/margins": 0.09537507593631744, |
| "rewards/rejected": -0.12179744988679886, |
| "step": 1915 |
| }, |
| { |
| "epoch": 2.6598890942698707, |
| "grad_norm": 1.8085908889770508, |
| "learning_rate": 8.745454545454545e-08, |
| "log_odds_chosen": 1.5008604526519775, |
| "log_odds_ratio": -0.38829174637794495, |
| "logits/chosen": 1.9134535789489746, |
| "logits/rejected": 1.9677314758300781, |
| "logps/chosen": -0.2830255627632141, |
| "logps/rejected": -0.9362450242042542, |
| "loss": 1.0461, |
| "nll_loss": 1.0073057413101196, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.02830255590379238, |
| "rewards/margins": 0.06532195210456848, |
| "rewards/rejected": -0.09362450987100601, |
| "step": 1920 |
| }, |
| { |
| "epoch": 2.6668207024029575, |
| "grad_norm": 2.762589931488037, |
| "learning_rate": 8.654545454545455e-08, |
| "log_odds_chosen": 1.8519963026046753, |
| "log_odds_ratio": -0.3259159326553345, |
| "logits/chosen": 1.9644198417663574, |
| "logits/rejected": 2.05434250831604, |
| "logps/chosen": -0.2632545232772827, |
| "logps/rejected": -1.0625907182693481, |
| "loss": 1.0235, |
| "nll_loss": 0.9909093976020813, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02632545307278633, |
| "rewards/margins": 0.07993361353874207, |
| "rewards/rejected": -0.1062590703368187, |
| "step": 1925 |
| }, |
| { |
| "epoch": 2.6737523105360443, |
| "grad_norm": 2.3893611431121826, |
| "learning_rate": 8.563636363636363e-08, |
| "log_odds_chosen": 1.6310181617736816, |
| "log_odds_ratio": -0.3340619206428528, |
| "logits/chosen": 2.005711555480957, |
| "logits/rejected": 2.067094087600708, |
| "logps/chosen": -0.2860340476036072, |
| "logps/rejected": -0.9216349124908447, |
| "loss": 0.9937, |
| "nll_loss": 0.9602577686309814, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02860340289771557, |
| "rewards/margins": 0.06356008350849152, |
| "rewards/rejected": -0.09216348081827164, |
| "step": 1930 |
| }, |
| { |
| "epoch": 2.680683918669131, |
| "grad_norm": 1.9156088829040527, |
| "learning_rate": 8.472727272727273e-08, |
| "log_odds_chosen": 1.7571347951889038, |
| "log_odds_ratio": -0.3111540377140045, |
| "logits/chosen": 2.0473296642303467, |
| "logits/rejected": 2.1130969524383545, |
| "logps/chosen": -0.28812503814697266, |
| "logps/rejected": -1.1086490154266357, |
| "loss": 0.9945, |
| "nll_loss": 0.9633785486221313, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.028812507167458534, |
| "rewards/margins": 0.08205239474773407, |
| "rewards/rejected": -0.11086489260196686, |
| "step": 1935 |
| }, |
| { |
| "epoch": 2.687615526802218, |
| "grad_norm": 2.3184399604797363, |
| "learning_rate": 8.381818181818181e-08, |
| "log_odds_chosen": 1.6769403219223022, |
| "log_odds_ratio": -0.3335730731487274, |
| "logits/chosen": 1.9665719270706177, |
| "logits/rejected": 2.030404567718506, |
| "logps/chosen": -0.24711455404758453, |
| "logps/rejected": -0.9258390069007874, |
| "loss": 0.9979, |
| "nll_loss": 0.9645217061042786, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.024711458012461662, |
| "rewards/margins": 0.06787244230508804, |
| "rewards/rejected": -0.09258389472961426, |
| "step": 1940 |
| }, |
| { |
| "epoch": 2.6945471349353047, |
| "grad_norm": 1.7348381280899048, |
| "learning_rate": 8.290909090909091e-08, |
| "log_odds_chosen": 2.012420415878296, |
| "log_odds_ratio": -0.2879267930984497, |
| "logits/chosen": 2.080967426300049, |
| "logits/rejected": 2.181638479232788, |
| "logps/chosen": -0.2593296766281128, |
| "logps/rejected": -1.197130560874939, |
| "loss": 1.0093, |
| "nll_loss": 0.9804985523223877, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.02593296393752098, |
| "rewards/margins": 0.09378007054328918, |
| "rewards/rejected": -0.11971304565668106, |
| "step": 1945 |
| }, |
| { |
| "epoch": 2.701478743068392, |
| "grad_norm": 2.2291972637176514, |
| "learning_rate": 8.199999999999999e-08, |
| "log_odds_chosen": 1.8021572828292847, |
| "log_odds_ratio": -0.2828613221645355, |
| "logits/chosen": 1.9889678955078125, |
| "logits/rejected": 2.0603299140930176, |
| "logps/chosen": -0.27885323762893677, |
| "logps/rejected": -1.020405888557434, |
| "loss": 1.0091, |
| "nll_loss": 0.9808385372161865, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.02788531966507435, |
| "rewards/margins": 0.0741552785038948, |
| "rewards/rejected": -0.10204058140516281, |
| "step": 1950 |
| }, |
| { |
| "epoch": 2.7084103512014788, |
| "grad_norm": 3.511234998703003, |
| "learning_rate": 8.109090909090909e-08, |
| "log_odds_chosen": 1.6991480588912964, |
| "log_odds_ratio": -0.3476136028766632, |
| "logits/chosen": 2.0690817832946777, |
| "logits/rejected": 2.131218671798706, |
| "logps/chosen": -0.28822681307792664, |
| "logps/rejected": -0.9959009885787964, |
| "loss": 1.0083, |
| "nll_loss": 0.9735735058784485, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.028822684660553932, |
| "rewards/margins": 0.07076740264892578, |
| "rewards/rejected": -0.09959009289741516, |
| "step": 1955 |
| }, |
| { |
| "epoch": 2.7153419593345656, |
| "grad_norm": 1.731318712234497, |
| "learning_rate": 8.018181818181817e-08, |
| "log_odds_chosen": 1.9510962963104248, |
| "log_odds_ratio": -0.2717619836330414, |
| "logits/chosen": 2.0861499309539795, |
| "logits/rejected": 2.1444475650787354, |
| "logps/chosen": -0.26343804597854614, |
| "logps/rejected": -1.1370905637741089, |
| "loss": 1.0018, |
| "nll_loss": 0.9746354222297668, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.026343801990151405, |
| "rewards/margins": 0.08736524730920792, |
| "rewards/rejected": -0.11370905488729477, |
| "step": 1960 |
| }, |
| { |
| "epoch": 2.7222735674676524, |
| "grad_norm": 2.5520708560943604, |
| "learning_rate": 7.927272727272727e-08, |
| "log_odds_chosen": 1.883239507675171, |
| "log_odds_ratio": -0.34364765882492065, |
| "logits/chosen": 2.0899996757507324, |
| "logits/rejected": 2.1520025730133057, |
| "logps/chosen": -0.3178773820400238, |
| "logps/rejected": -1.1890572309494019, |
| "loss": 1.0465, |
| "nll_loss": 1.012162208557129, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.03178774565458298, |
| "rewards/margins": 0.087117999792099, |
| "rewards/rejected": -0.11890573799610138, |
| "step": 1965 |
| }, |
| { |
| "epoch": 2.7292051756007396, |
| "grad_norm": 3.0097408294677734, |
| "learning_rate": 7.836363636363637e-08, |
| "log_odds_chosen": 1.7459481954574585, |
| "log_odds_ratio": -0.3089205026626587, |
| "logits/chosen": 1.948202133178711, |
| "logits/rejected": 2.030421733856201, |
| "logps/chosen": -0.2865378260612488, |
| "logps/rejected": -0.9788001775741577, |
| "loss": 0.9699, |
| "nll_loss": 0.9390251040458679, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.028653783723711967, |
| "rewards/margins": 0.06922624260187149, |
| "rewards/rejected": -0.09788002073764801, |
| "step": 1970 |
| }, |
| { |
| "epoch": 2.7361367837338264, |
| "grad_norm": 2.334012746810913, |
| "learning_rate": 7.745454545454545e-08, |
| "log_odds_chosen": 1.6162441968917847, |
| "log_odds_ratio": -0.3280728757381439, |
| "logits/chosen": 2.022932767868042, |
| "logits/rejected": 2.0705296993255615, |
| "logps/chosen": -0.2768207788467407, |
| "logps/rejected": -0.927074134349823, |
| "loss": 0.9948, |
| "nll_loss": 0.9620178937911987, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.027682077139616013, |
| "rewards/margins": 0.06502533704042435, |
| "rewards/rejected": -0.09270740300416946, |
| "step": 1975 |
| }, |
| { |
| "epoch": 2.743068391866913, |
| "grad_norm": 3.4409821033477783, |
| "learning_rate": 7.654545454545455e-08, |
| "log_odds_chosen": 1.7293587923049927, |
| "log_odds_ratio": -0.32593438029289246, |
| "logits/chosen": 2.0177626609802246, |
| "logits/rejected": 2.0882925987243652, |
| "logps/chosen": -0.3067542612552643, |
| "logps/rejected": -1.018390417098999, |
| "loss": 1.004, |
| "nll_loss": 0.9714083075523376, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.030675429850816727, |
| "rewards/margins": 0.07116362452507019, |
| "rewards/rejected": -0.10183904320001602, |
| "step": 1980 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 3.144928455352783, |
| "learning_rate": 7.563636363636363e-08, |
| "log_odds_chosen": 2.015221357345581, |
| "log_odds_ratio": -0.24131697416305542, |
| "logits/chosen": 2.0317463874816895, |
| "logits/rejected": 2.112942934036255, |
| "logps/chosen": -0.2766430974006653, |
| "logps/rejected": -1.1748555898666382, |
| "loss": 0.9986, |
| "nll_loss": 0.974430501461029, |
| "rewards/accuracies": 0.9416666626930237, |
| "rewards/chosen": -0.027664311230182648, |
| "rewards/margins": 0.0898212417960167, |
| "rewards/rejected": -0.11748553812503815, |
| "step": 1985 |
| }, |
| { |
| "epoch": 2.756931608133087, |
| "grad_norm": 2.3371379375457764, |
| "learning_rate": 7.472727272727273e-08, |
| "log_odds_chosen": 1.7855689525604248, |
| "log_odds_ratio": -0.28874701261520386, |
| "logits/chosen": 2.058995485305786, |
| "logits/rejected": 2.1220133304595947, |
| "logps/chosen": -0.23786862194538116, |
| "logps/rejected": -0.8881834745407104, |
| "loss": 0.9978, |
| "nll_loss": 0.9689105153083801, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.023786865174770355, |
| "rewards/margins": 0.06503147631883621, |
| "rewards/rejected": -0.08881834149360657, |
| "step": 1990 |
| }, |
| { |
| "epoch": 2.7638632162661736, |
| "grad_norm": 2.0242509841918945, |
| "learning_rate": 7.381818181818182e-08, |
| "log_odds_chosen": 1.881475806236267, |
| "log_odds_ratio": -0.2716512382030487, |
| "logits/chosen": 1.9721044301986694, |
| "logits/rejected": 2.0286245346069336, |
| "logps/chosen": -0.2365345060825348, |
| "logps/rejected": -1.0640085935592651, |
| "loss": 0.9904, |
| "nll_loss": 0.9632561802864075, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.02365345135331154, |
| "rewards/margins": 0.08274741470813751, |
| "rewards/rejected": -0.10640083998441696, |
| "step": 1995 |
| }, |
| { |
| "epoch": 2.7707948243992604, |
| "grad_norm": 3.5550692081451416, |
| "learning_rate": 7.290909090909091e-08, |
| "log_odds_chosen": 1.6393131017684937, |
| "log_odds_ratio": -0.3566218316555023, |
| "logits/chosen": 2.027010440826416, |
| "logits/rejected": 2.1158926486968994, |
| "logps/chosen": -0.2945045530796051, |
| "logps/rejected": -0.9923899173736572, |
| "loss": 0.9887, |
| "nll_loss": 0.9530836343765259, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.02945045940577984, |
| "rewards/margins": 0.06978853791952133, |
| "rewards/rejected": -0.09923899918794632, |
| "step": 2000 |
| }, |
| { |
| "epoch": 2.7777264325323476, |
| "grad_norm": 2.983398199081421, |
| "learning_rate": 7.2e-08, |
| "log_odds_chosen": 1.883139729499817, |
| "log_odds_ratio": -0.2949954569339752, |
| "logits/chosen": 2.0600686073303223, |
| "logits/rejected": 2.1349780559539795, |
| "logps/chosen": -0.27356693148612976, |
| "logps/rejected": -1.1228362321853638, |
| "loss": 1.0009, |
| "nll_loss": 0.971444308757782, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.027356691658496857, |
| "rewards/margins": 0.08492692559957504, |
| "rewards/rejected": -0.1122836172580719, |
| "step": 2005 |
| }, |
| { |
| "epoch": 2.7846580406654344, |
| "grad_norm": 2.232745885848999, |
| "learning_rate": 7.10909090909091e-08, |
| "log_odds_chosen": 1.8647377490997314, |
| "log_odds_ratio": -0.31064775586128235, |
| "logits/chosen": 2.072453260421753, |
| "logits/rejected": 2.1425271034240723, |
| "logps/chosen": -0.3136703670024872, |
| "logps/rejected": -1.2263870239257812, |
| "loss": 1.0013, |
| "nll_loss": 0.9702617526054382, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.03136703744530678, |
| "rewards/margins": 0.09127166122198105, |
| "rewards/rejected": -0.12263870239257812, |
| "step": 2010 |
| }, |
| { |
| "epoch": 2.7915896487985212, |
| "grad_norm": 2.4576735496520996, |
| "learning_rate": 7.018181818181818e-08, |
| "log_odds_chosen": 1.848351001739502, |
| "log_odds_ratio": -0.300770103931427, |
| "logits/chosen": 1.9794286489486694, |
| "logits/rejected": 2.051739454269409, |
| "logps/chosen": -0.2656658887863159, |
| "logps/rejected": -1.1345840692520142, |
| "loss": 1.0129, |
| "nll_loss": 0.9827767610549927, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.026566587388515472, |
| "rewards/margins": 0.08689180761575699, |
| "rewards/rejected": -0.11345840990543365, |
| "step": 2015 |
| }, |
| { |
| "epoch": 2.798521256931608, |
| "grad_norm": 4.359684467315674, |
| "learning_rate": 6.927272727272727e-08, |
| "log_odds_chosen": 1.7028800249099731, |
| "log_odds_ratio": -0.3596075177192688, |
| "logits/chosen": 2.0374951362609863, |
| "logits/rejected": 2.0811033248901367, |
| "logps/chosen": -0.26721158623695374, |
| "logps/rejected": -1.0201528072357178, |
| "loss": 1.0472, |
| "nll_loss": 1.011243462562561, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.026721160858869553, |
| "rewards/margins": 0.0752941220998764, |
| "rewards/rejected": -0.10201527923345566, |
| "step": 2020 |
| }, |
| { |
| "epoch": 2.8054528650646953, |
| "grad_norm": 1.5897514820098877, |
| "learning_rate": 6.836363636363636e-08, |
| "log_odds_chosen": 1.7618235349655151, |
| "log_odds_ratio": -0.2725900709629059, |
| "logits/chosen": 1.951567530632019, |
| "logits/rejected": 2.018179416656494, |
| "logps/chosen": -0.250274121761322, |
| "logps/rejected": -0.964055061340332, |
| "loss": 0.9872, |
| "nll_loss": 0.9599834680557251, |
| "rewards/accuracies": 0.9333333373069763, |
| "rewards/chosen": -0.025027411058545113, |
| "rewards/margins": 0.07137809693813324, |
| "rewards/rejected": -0.0964054986834526, |
| "step": 2025 |
| }, |
| { |
| "epoch": 2.812384473197782, |
| "grad_norm": 2.3347268104553223, |
| "learning_rate": 6.745454545454546e-08, |
| "log_odds_chosen": 1.8136812448501587, |
| "log_odds_ratio": -0.3173971176147461, |
| "logits/chosen": 2.0132126808166504, |
| "logits/rejected": 2.0880587100982666, |
| "logps/chosen": -0.3025325536727905, |
| "logps/rejected": -1.1345620155334473, |
| "loss": 1.0012, |
| "nll_loss": 0.9694395661354065, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.030253252014517784, |
| "rewards/margins": 0.08320295065641403, |
| "rewards/rejected": -0.11345621198415756, |
| "step": 2030 |
| }, |
| { |
| "epoch": 2.819316081330869, |
| "grad_norm": 1.9864964485168457, |
| "learning_rate": 6.654545454545454e-08, |
| "log_odds_chosen": 1.8496549129486084, |
| "log_odds_ratio": -0.29193249344825745, |
| "logits/chosen": 1.9852664470672607, |
| "logits/rejected": 2.046905994415283, |
| "logps/chosen": -0.23638120293617249, |
| "logps/rejected": -1.0749742984771729, |
| "loss": 0.9969, |
| "nll_loss": 0.9677135348320007, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.023638121783733368, |
| "rewards/margins": 0.08385932445526123, |
| "rewards/rejected": -0.107497438788414, |
| "step": 2035 |
| }, |
| { |
| "epoch": 2.8262476894639557, |
| "grad_norm": 2.766561269760132, |
| "learning_rate": 6.563636363636364e-08, |
| "log_odds_chosen": 2.029188632965088, |
| "log_odds_ratio": -0.26917028427124023, |
| "logits/chosen": 2.03354549407959, |
| "logits/rejected": 2.1179850101470947, |
| "logps/chosen": -0.24409012496471405, |
| "logps/rejected": -1.1629306077957153, |
| "loss": 1.0074, |
| "nll_loss": 0.9805120825767517, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.024409016594290733, |
| "rewards/margins": 0.09188403934240341, |
| "rewards/rejected": -0.1162930577993393, |
| "step": 2040 |
| }, |
| { |
| "epoch": 2.8331792975970425, |
| "grad_norm": 3.011481523513794, |
| "learning_rate": 6.472727272727272e-08, |
| "log_odds_chosen": 1.706537127494812, |
| "log_odds_ratio": -0.4006377160549164, |
| "logits/chosen": 2.1181445121765137, |
| "logits/rejected": 2.156266689300537, |
| "logps/chosen": -0.3777162432670593, |
| "logps/rejected": -1.2636988162994385, |
| "loss": 1.0484, |
| "nll_loss": 1.0083175897598267, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.03777162730693817, |
| "rewards/margins": 0.08859825879335403, |
| "rewards/rejected": -0.1263698935508728, |
| "step": 2045 |
| }, |
| { |
| "epoch": 2.8401109057301293, |
| "grad_norm": 3.116852283477783, |
| "learning_rate": 6.381818181818182e-08, |
| "log_odds_chosen": 1.7585667371749878, |
| "log_odds_ratio": -0.3240087330341339, |
| "logits/chosen": 1.9366189241409302, |
| "logits/rejected": 2.018488883972168, |
| "logps/chosen": -0.2796057164669037, |
| "logps/rejected": -0.9981705546379089, |
| "loss": 0.9852, |
| "nll_loss": 0.9527918100357056, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.027960574254393578, |
| "rewards/margins": 0.07185646891593933, |
| "rewards/rejected": -0.09981703758239746, |
| "step": 2050 |
| }, |
| { |
| "epoch": 2.847042513863216, |
| "grad_norm": 1.7142726182937622, |
| "learning_rate": 6.290909090909092e-08, |
| "log_odds_chosen": 1.9744784832000732, |
| "log_odds_ratio": -0.24878713488578796, |
| "logits/chosen": 1.9690757989883423, |
| "logits/rejected": 2.040696859359741, |
| "logps/chosen": -0.3136541545391083, |
| "logps/rejected": -1.1877192258834839, |
| "loss": 0.9877, |
| "nll_loss": 0.9628265500068665, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -0.031365420669317245, |
| "rewards/margins": 0.08740650117397308, |
| "rewards/rejected": -0.11877192556858063, |
| "step": 2055 |
| }, |
| { |
| "epoch": 2.8539741219963033, |
| "grad_norm": 1.3764642477035522, |
| "learning_rate": 6.2e-08, |
| "log_odds_chosen": 1.8966907262802124, |
| "log_odds_ratio": -0.2863107919692993, |
| "logits/chosen": 2.0856635570526123, |
| "logits/rejected": 2.136650800704956, |
| "logps/chosen": -0.3037610948085785, |
| "logps/rejected": -1.1746824979782104, |
| "loss": 1.0304, |
| "nll_loss": 1.0017729997634888, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.03037611022591591, |
| "rewards/margins": 0.08709214627742767, |
| "rewards/rejected": -0.11746825277805328, |
| "step": 2060 |
| }, |
| { |
| "epoch": 2.86090573012939, |
| "grad_norm": 2.363109827041626, |
| "learning_rate": 6.10909090909091e-08, |
| "log_odds_chosen": 1.8390313386917114, |
| "log_odds_ratio": -0.30428022146224976, |
| "logits/chosen": 1.9853383302688599, |
| "logits/rejected": 2.032965898513794, |
| "logps/chosen": -0.250918447971344, |
| "logps/rejected": -1.0042502880096436, |
| "loss": 0.9942, |
| "nll_loss": 0.9637566804885864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02509184740483761, |
| "rewards/margins": 0.07533318549394608, |
| "rewards/rejected": -0.10042501986026764, |
| "step": 2065 |
| }, |
| { |
| "epoch": 2.867837338262477, |
| "grad_norm": 2.9388301372528076, |
| "learning_rate": 6.018181818181818e-08, |
| "log_odds_chosen": 1.841733694076538, |
| "log_odds_ratio": -0.2979178726673126, |
| "logits/chosen": 1.9924938678741455, |
| "logits/rejected": 2.053075075149536, |
| "logps/chosen": -0.2633225917816162, |
| "logps/rejected": -1.0852948427200317, |
| "loss": 0.9875, |
| "nll_loss": 0.9577153921127319, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.02633226290345192, |
| "rewards/margins": 0.08219723403453827, |
| "rewards/rejected": -0.10852950066328049, |
| "step": 2070 |
| }, |
| { |
| "epoch": 2.8747689463955637, |
| "grad_norm": 2.952969789505005, |
| "learning_rate": 5.927272727272727e-08, |
| "log_odds_chosen": 1.6144490242004395, |
| "log_odds_ratio": -0.35894396901130676, |
| "logits/chosen": 2.0224180221557617, |
| "logits/rejected": 2.0612215995788574, |
| "logps/chosen": -0.26091185212135315, |
| "logps/rejected": -1.0284487009048462, |
| "loss": 1.0073, |
| "nll_loss": 0.9714316725730896, |
| "rewards/accuracies": 0.8583333492279053, |
| "rewards/chosen": -0.026091186329722404, |
| "rewards/margins": 0.07675368338823318, |
| "rewards/rejected": -0.10284487158060074, |
| "step": 2075 |
| }, |
| { |
| "epoch": 2.8817005545286505, |
| "grad_norm": 2.8261444568634033, |
| "learning_rate": 5.836363636363636e-08, |
| "log_odds_chosen": 1.8391097784042358, |
| "log_odds_ratio": -0.3189569413661957, |
| "logits/chosen": 1.9929203987121582, |
| "logits/rejected": 2.0488264560699463, |
| "logps/chosen": -0.2942853271961212, |
| "logps/rejected": -1.1487162113189697, |
| "loss": 1.02, |
| "nll_loss": 0.988071858882904, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.029428532347083092, |
| "rewards/margins": 0.08544307202100754, |
| "rewards/rejected": -0.11487161368131638, |
| "step": 2080 |
| }, |
| { |
| "epoch": 2.8886321626617377, |
| "grad_norm": 4.116847991943359, |
| "learning_rate": 5.745454545454545e-08, |
| "log_odds_chosen": 1.619594931602478, |
| "log_odds_ratio": -0.3377019762992859, |
| "logits/chosen": 2.075233221054077, |
| "logits/rejected": 2.119717836380005, |
| "logps/chosen": -0.2857421040534973, |
| "logps/rejected": -0.9990529417991638, |
| "loss": 1.0049, |
| "nll_loss": 0.9711350202560425, |
| "rewards/accuracies": 0.8333333134651184, |
| "rewards/chosen": -0.028574209660291672, |
| "rewards/margins": 0.07133107632398605, |
| "rewards/rejected": -0.09990529716014862, |
| "step": 2085 |
| }, |
| { |
| "epoch": 2.8955637707948245, |
| "grad_norm": 2.5412161350250244, |
| "learning_rate": 5.654545454545454e-08, |
| "log_odds_chosen": 1.8909341096878052, |
| "log_odds_ratio": -0.33492469787597656, |
| "logits/chosen": 2.021022081375122, |
| "logits/rejected": 2.0802001953125, |
| "logps/chosen": -0.28026703000068665, |
| "logps/rejected": -1.1536551713943481, |
| "loss": 1.0157, |
| "nll_loss": 0.982164740562439, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.028026705607771873, |
| "rewards/margins": 0.08733881264925003, |
| "rewards/rejected": -0.11536551266908646, |
| "step": 2090 |
| }, |
| { |
| "epoch": 2.9024953789279113, |
| "grad_norm": 2.7214314937591553, |
| "learning_rate": 5.563636363636364e-08, |
| "log_odds_chosen": 1.7071201801300049, |
| "log_odds_ratio": -0.31407538056373596, |
| "logits/chosen": 1.9751737117767334, |
| "logits/rejected": 2.022892475128174, |
| "logps/chosen": -0.29637211561203003, |
| "logps/rejected": -1.0419337749481201, |
| "loss": 1.0243, |
| "nll_loss": 0.9928818941116333, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.029637213796377182, |
| "rewards/margins": 0.07455617934465408, |
| "rewards/rejected": -0.10419339686632156, |
| "step": 2095 |
| }, |
| { |
| "epoch": 2.909426987060998, |
| "grad_norm": 2.559386730194092, |
| "learning_rate": 5.4727272727272724e-08, |
| "log_odds_chosen": 1.7541364431381226, |
| "log_odds_ratio": -0.3207100033760071, |
| "logits/chosen": 2.0054821968078613, |
| "logits/rejected": 2.0676021575927734, |
| "logps/chosen": -0.29729416966438293, |
| "logps/rejected": -1.0183097124099731, |
| "loss": 0.975, |
| "nll_loss": 0.9429475665092468, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.029729416593909264, |
| "rewards/margins": 0.07210154831409454, |
| "rewards/rejected": -0.10183095932006836, |
| "step": 2100 |
| }, |
| { |
| "epoch": 2.916358595194085, |
| "grad_norm": 3.168686866760254, |
| "learning_rate": 5.381818181818182e-08, |
| "log_odds_chosen": 1.8241220712661743, |
| "log_odds_ratio": -0.3224296271800995, |
| "logits/chosen": 2.084487199783325, |
| "logits/rejected": 2.151993989944458, |
| "logps/chosen": -0.298380970954895, |
| "logps/rejected": -1.1077054738998413, |
| "loss": 1.0205, |
| "nll_loss": 0.9882618188858032, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02983810193836689, |
| "rewards/margins": 0.08093245327472687, |
| "rewards/rejected": -0.11077055335044861, |
| "step": 2105 |
| }, |
| { |
| "epoch": 2.9232902033271717, |
| "grad_norm": 4.917242527008057, |
| "learning_rate": 5.2909090909090905e-08, |
| "log_odds_chosen": 1.6183534860610962, |
| "log_odds_ratio": -0.3984599709510803, |
| "logits/chosen": 2.01122784614563, |
| "logits/rejected": 2.090132236480713, |
| "logps/chosen": -0.31952646374702454, |
| "logps/rejected": -0.9697479009628296, |
| "loss": 1.012, |
| "nll_loss": 0.9721961617469788, |
| "rewards/accuracies": 0.8166666626930237, |
| "rewards/chosen": -0.03195264935493469, |
| "rewards/margins": 0.06502215564250946, |
| "rewards/rejected": -0.09697480499744415, |
| "step": 2110 |
| }, |
| { |
| "epoch": 2.9302218114602585, |
| "grad_norm": 2.700791120529175, |
| "learning_rate": 5.2e-08, |
| "log_odds_chosen": 1.6960736513137817, |
| "log_odds_ratio": -0.3477153778076172, |
| "logits/chosen": 2.015334129333496, |
| "logits/rejected": 2.0682668685913086, |
| "logps/chosen": -0.26093918085098267, |
| "logps/rejected": -1.0459142923355103, |
| "loss": 1.0222, |
| "nll_loss": 0.9874255657196045, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.026093924418091774, |
| "rewards/margins": 0.078497514128685, |
| "rewards/rejected": -0.10459142178297043, |
| "step": 2115 |
| }, |
| { |
| "epoch": 2.9371534195933457, |
| "grad_norm": 3.925088405609131, |
| "learning_rate": 5.1090909090909086e-08, |
| "log_odds_chosen": 1.4773210287094116, |
| "log_odds_ratio": -0.41333526372909546, |
| "logits/chosen": 2.0795083045959473, |
| "logits/rejected": 2.115938901901245, |
| "logps/chosen": -0.34735921025276184, |
| "logps/rejected": -0.9831670522689819, |
| "loss": 1.0538, |
| "nll_loss": 1.0124287605285645, |
| "rewards/accuracies": 0.8083333373069763, |
| "rewards/chosen": -0.034735921770334244, |
| "rewards/margins": 0.06358078867197037, |
| "rewards/rejected": -0.09831671416759491, |
| "step": 2120 |
| }, |
| { |
| "epoch": 2.9440850277264325, |
| "grad_norm": 3.097028970718384, |
| "learning_rate": 5.0181818181818184e-08, |
| "log_odds_chosen": 1.7917529344558716, |
| "log_odds_ratio": -0.3308008909225464, |
| "logits/chosen": 1.9779587984085083, |
| "logits/rejected": 2.0592360496520996, |
| "logps/chosen": -0.2952510714530945, |
| "logps/rejected": -1.067455530166626, |
| "loss": 1.0471, |
| "nll_loss": 1.0139851570129395, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.02952510491013527, |
| "rewards/margins": 0.07722045481204987, |
| "rewards/rejected": -0.10674557089805603, |
| "step": 2125 |
| }, |
| { |
| "epoch": 2.9510166358595193, |
| "grad_norm": 2.174734354019165, |
| "learning_rate": 4.9272727272727274e-08, |
| "log_odds_chosen": 1.7724860906600952, |
| "log_odds_ratio": -0.3275219798088074, |
| "logits/chosen": 2.0415663719177246, |
| "logits/rejected": 2.103684425354004, |
| "logps/chosen": -0.27193596959114075, |
| "logps/rejected": -1.0836893320083618, |
| "loss": 0.9898, |
| "nll_loss": 0.9570819139480591, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.027193596586585045, |
| "rewards/margins": 0.08117534220218658, |
| "rewards/rejected": -0.10836894065141678, |
| "step": 2130 |
| }, |
| { |
| "epoch": 2.957948243992606, |
| "grad_norm": 1.5665240287780762, |
| "learning_rate": 4.8363636363636365e-08, |
| "log_odds_chosen": 1.7095661163330078, |
| "log_odds_ratio": -0.34748879075050354, |
| "logits/chosen": 1.98307466506958, |
| "logits/rejected": 2.0420520305633545, |
| "logps/chosen": -0.32826271653175354, |
| "logps/rejected": -1.0827791690826416, |
| "loss": 1.0023, |
| "nll_loss": 0.9675683975219727, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.032826270908117294, |
| "rewards/margins": 0.07545164227485657, |
| "rewards/rejected": -0.10827790945768356, |
| "step": 2135 |
| }, |
| { |
| "epoch": 2.9648798521256934, |
| "grad_norm": 3.9010303020477295, |
| "learning_rate": 4.7454545454545455e-08, |
| "log_odds_chosen": 1.8488551378250122, |
| "log_odds_ratio": -0.3088344633579254, |
| "logits/chosen": 2.000403881072998, |
| "logits/rejected": 2.080737352371216, |
| "logps/chosen": -0.298979252576828, |
| "logps/rejected": -1.1432158946990967, |
| "loss": 1.0063, |
| "nll_loss": 0.9754161238670349, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.02989793010056019, |
| "rewards/margins": 0.08442366868257523, |
| "rewards/rejected": -0.11432159692049026, |
| "step": 2140 |
| }, |
| { |
| "epoch": 2.97181146025878, |
| "grad_norm": 2.5673093795776367, |
| "learning_rate": 4.6545454545454546e-08, |
| "log_odds_chosen": 1.8592909574508667, |
| "log_odds_ratio": -0.26686161756515503, |
| "logits/chosen": 2.063077688217163, |
| "logits/rejected": 2.1509907245635986, |
| "logps/chosen": -0.2621632218360901, |
| "logps/rejected": -1.0871644020080566, |
| "loss": 1.0334, |
| "nll_loss": 1.0067225694656372, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.02621631696820259, |
| "rewards/margins": 0.08250012993812561, |
| "rewards/rejected": -0.1087164580821991, |
| "step": 2145 |
| }, |
| { |
| "epoch": 2.978743068391867, |
| "grad_norm": 2.2342443466186523, |
| "learning_rate": 4.5636363636363637e-08, |
| "log_odds_chosen": 1.6307332515716553, |
| "log_odds_ratio": -0.3424622416496277, |
| "logits/chosen": 2.004580497741699, |
| "logits/rejected": 2.060046434402466, |
| "logps/chosen": -0.3020874261856079, |
| "logps/rejected": -1.084688663482666, |
| "loss": 0.9816, |
| "nll_loss": 0.9473193883895874, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.03020874410867691, |
| "rewards/margins": 0.07826013118028641, |
| "rewards/rejected": -0.10846886783838272, |
| "step": 2150 |
| }, |
| { |
| "epoch": 2.9856746765249538, |
| "grad_norm": 3.1599783897399902, |
| "learning_rate": 4.472727272727273e-08, |
| "log_odds_chosen": 2.105912685394287, |
| "log_odds_ratio": -0.27635109424591064, |
| "logits/chosen": 2.068650245666504, |
| "logits/rejected": 2.123730182647705, |
| "logps/chosen": -0.2944362461566925, |
| "logps/rejected": -1.2996933460235596, |
| "loss": 1.0106, |
| "nll_loss": 0.9829762578010559, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.029443617910146713, |
| "rewards/margins": 0.10052569955587387, |
| "rewards/rejected": -0.12996931374073029, |
| "step": 2155 |
| }, |
| { |
| "epoch": 2.9926062846580406, |
| "grad_norm": 3.6730105876922607, |
| "learning_rate": 4.381818181818182e-08, |
| "log_odds_chosen": 1.8076189756393433, |
| "log_odds_ratio": -0.2718731462955475, |
| "logits/chosen": 2.034381628036499, |
| "logits/rejected": 2.103024959564209, |
| "logps/chosen": -0.2844863831996918, |
| "logps/rejected": -1.0949420928955078, |
| "loss": 1.0227, |
| "nll_loss": 0.9955376982688904, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.028448637574911118, |
| "rewards/margins": 0.08104557543992996, |
| "rewards/rejected": -0.10949420928955078, |
| "step": 2160 |
| }, |
| { |
| "epoch": 2.9995378927911274, |
| "grad_norm": 3.9206016063690186, |
| "learning_rate": 4.290909090909091e-08, |
| "log_odds_chosen": 1.8349543809890747, |
| "log_odds_ratio": -0.27683863043785095, |
| "logits/chosen": 2.036572217941284, |
| "logits/rejected": 2.089813232421875, |
| "logps/chosen": -0.27643078565597534, |
| "logps/rejected": -1.0756059885025024, |
| "loss": 0.9799, |
| "nll_loss": 0.9521928429603577, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.027643078938126564, |
| "rewards/margins": 0.0799175277352333, |
| "rewards/rejected": -0.10756059736013412, |
| "step": 2165 |
| }, |
| { |
| "epoch": 3.0055452865064693, |
| "grad_norm": 1.9081456661224365, |
| "learning_rate": 4.2e-08, |
| "log_odds_chosen": 1.7460095882415771, |
| "log_odds_ratio": -0.32449567317962646, |
| "logits/chosen": 2.021742820739746, |
| "logits/rejected": 2.1034204959869385, |
| "logps/chosen": -0.27256032824516296, |
| "logps/rejected": -1.0215450525283813, |
| "loss": 0.8395, |
| "nll_loss": 0.9364208579063416, |
| "rewards/accuracies": 0.8621795177459717, |
| "rewards/chosen": -0.027256034314632416, |
| "rewards/margins": 0.07489847391843796, |
| "rewards/rejected": -0.10215452313423157, |
| "step": 2170 |
| }, |
| { |
| "epoch": 3.0124768946395566, |
| "grad_norm": 2.545027017593384, |
| "learning_rate": 4.109090909090909e-08, |
| "log_odds_chosen": 1.8810588121414185, |
| "log_odds_ratio": -0.2728542387485504, |
| "logits/chosen": 2.072154998779297, |
| "logits/rejected": 2.1284384727478027, |
| "logps/chosen": -0.28162866830825806, |
| "logps/rejected": -1.0774617195129395, |
| "loss": 1.017, |
| "nll_loss": 0.9897640347480774, |
| "rewards/accuracies": 0.9333333373069763, |
| "rewards/chosen": -0.028162868693470955, |
| "rewards/margins": 0.0795833095908165, |
| "rewards/rejected": -0.1077461913228035, |
| "step": 2175 |
| }, |
| { |
| "epoch": 3.0194085027726434, |
| "grad_norm": 1.6765599250793457, |
| "learning_rate": 4.018181818181818e-08, |
| "log_odds_chosen": 1.817507028579712, |
| "log_odds_ratio": -0.27471134066581726, |
| "logits/chosen": 1.970469355583191, |
| "logits/rejected": 2.050422191619873, |
| "logps/chosen": -0.2666342556476593, |
| "logps/rejected": -1.0124282836914062, |
| "loss": 0.9833, |
| "nll_loss": 0.9558401703834534, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.02666342444717884, |
| "rewards/margins": 0.0745794028043747, |
| "rewards/rejected": -0.10124283283948898, |
| "step": 2180 |
| }, |
| { |
| "epoch": 3.02634011090573, |
| "grad_norm": 1.7218668460845947, |
| "learning_rate": 3.927272727272727e-08, |
| "log_odds_chosen": 1.917079210281372, |
| "log_odds_ratio": -0.24975134432315826, |
| "logits/chosen": 2.128929853439331, |
| "logits/rejected": 2.173793077468872, |
| "logps/chosen": -0.29144442081451416, |
| "logps/rejected": -1.118105173110962, |
| "loss": 1.0045, |
| "nll_loss": 0.9795462489128113, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.029144441708922386, |
| "rewards/margins": 0.08266608417034149, |
| "rewards/rejected": -0.11181053519248962, |
| "step": 2185 |
| }, |
| { |
| "epoch": 3.033271719038817, |
| "grad_norm": 2.8524911403656006, |
| "learning_rate": 3.836363636363636e-08, |
| "log_odds_chosen": 1.9781666994094849, |
| "log_odds_ratio": -0.24611227214336395, |
| "logits/chosen": 2.036329746246338, |
| "logits/rejected": 2.1263298988342285, |
| "logps/chosen": -0.2536916136741638, |
| "logps/rejected": -1.1110204458236694, |
| "loss": 0.9873, |
| "nll_loss": 0.9627164602279663, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.025369159877300262, |
| "rewards/margins": 0.08573289960622787, |
| "rewards/rejected": -0.11110205203294754, |
| "step": 2190 |
| }, |
| { |
| "epoch": 3.040203327171904, |
| "grad_norm": 2.266507387161255, |
| "learning_rate": 3.745454545454546e-08, |
| "log_odds_chosen": 1.695191502571106, |
| "log_odds_ratio": -0.32812872529029846, |
| "logits/chosen": 1.9582918882369995, |
| "logits/rejected": 2.0455925464630127, |
| "logps/chosen": -0.23305271565914154, |
| "logps/rejected": -0.9286764860153198, |
| "loss": 1.0022, |
| "nll_loss": 0.969412088394165, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.023305270820856094, |
| "rewards/margins": 0.06956236064434052, |
| "rewards/rejected": -0.0928676426410675, |
| "step": 2195 |
| }, |
| { |
| "epoch": 3.0471349353049906, |
| "grad_norm": 2.823397159576416, |
| "learning_rate": 3.654545454545455e-08, |
| "log_odds_chosen": 1.8539453744888306, |
| "log_odds_ratio": -0.31512120366096497, |
| "logits/chosen": 1.9888670444488525, |
| "logits/rejected": 2.056990623474121, |
| "logps/chosen": -0.2506329417228699, |
| "logps/rejected": -1.078035593032837, |
| "loss": 0.97, |
| "nll_loss": 0.9385051727294922, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.025063293054699898, |
| "rewards/margins": 0.08274027705192566, |
| "rewards/rejected": -0.10780356079339981, |
| "step": 2200 |
| }, |
| { |
| "epoch": 3.054066543438078, |
| "grad_norm": 2.515324354171753, |
| "learning_rate": 3.563636363636364e-08, |
| "log_odds_chosen": 2.2040858268737793, |
| "log_odds_ratio": -0.2215387225151062, |
| "logits/chosen": 2.07570743560791, |
| "logits/rejected": 2.1536285877227783, |
| "logps/chosen": -0.26311811804771423, |
| "logps/rejected": -1.3125801086425781, |
| "loss": 1.0534, |
| "nll_loss": 1.03126060962677, |
| "rewards/accuracies": 0.9416666626930237, |
| "rewards/chosen": -0.026311814785003662, |
| "rewards/margins": 0.10494618117809296, |
| "rewards/rejected": -0.1312580108642578, |
| "step": 2205 |
| }, |
| { |
| "epoch": 3.0609981515711646, |
| "grad_norm": 2.884657382965088, |
| "learning_rate": 3.472727272727273e-08, |
| "log_odds_chosen": 1.8875175714492798, |
| "log_odds_ratio": -0.26213160157203674, |
| "logits/chosen": 1.9864482879638672, |
| "logits/rejected": 2.0323519706726074, |
| "logps/chosen": -0.28199702501296997, |
| "logps/rejected": -1.1133620738983154, |
| "loss": 1.0323, |
| "nll_loss": 1.006096363067627, |
| "rewards/accuracies": 0.9333333373069763, |
| "rewards/chosen": -0.028199700638651848, |
| "rewards/margins": 0.08313652873039246, |
| "rewards/rejected": -0.11133621633052826, |
| "step": 2210 |
| }, |
| { |
| "epoch": 3.0679297597042514, |
| "grad_norm": 5.779941082000732, |
| "learning_rate": 3.381818181818182e-08, |
| "log_odds_chosen": 1.598193883895874, |
| "log_odds_ratio": -0.3541763126850128, |
| "logits/chosen": 2.0120930671691895, |
| "logits/rejected": 2.105281114578247, |
| "logps/chosen": -0.32157793641090393, |
| "logps/rejected": -0.9493343830108643, |
| "loss": 1.0617, |
| "nll_loss": 1.0262763500213623, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.032157786190509796, |
| "rewards/margins": 0.06277565658092499, |
| "rewards/rejected": -0.09493346512317657, |
| "step": 2215 |
| }, |
| { |
| "epoch": 3.074861367837338, |
| "grad_norm": 1.6296344995498657, |
| "learning_rate": 3.290909090909091e-08, |
| "log_odds_chosen": 2.0749495029449463, |
| "log_odds_ratio": -0.2285868376493454, |
| "logits/chosen": 1.9751384258270264, |
| "logits/rejected": 2.0483574867248535, |
| "logps/chosen": -0.22339893877506256, |
| "logps/rejected": -1.1327685117721558, |
| "loss": 0.9692, |
| "nll_loss": 0.946365475654602, |
| "rewards/accuracies": 0.9666666388511658, |
| "rewards/chosen": -0.022339891642332077, |
| "rewards/margins": 0.09093696624040604, |
| "rewards/rejected": -0.11327686160802841, |
| "step": 2220 |
| }, |
| { |
| "epoch": 3.081792975970425, |
| "grad_norm": 5.483705997467041, |
| "learning_rate": 3.2e-08, |
| "log_odds_chosen": 1.910452127456665, |
| "log_odds_ratio": -0.2830710709095001, |
| "logits/chosen": 2.040644645690918, |
| "logits/rejected": 2.113460063934326, |
| "logps/chosen": -0.2728864550590515, |
| "logps/rejected": -1.134534239768982, |
| "loss": 1.0302, |
| "nll_loss": 1.0018789768218994, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.0272886510938406, |
| "rewards/margins": 0.08616478741168976, |
| "rewards/rejected": -0.11345343291759491, |
| "step": 2225 |
| }, |
| { |
| "epoch": 3.088724584103512, |
| "grad_norm": 3.0457351207733154, |
| "learning_rate": 3.109090909090909e-08, |
| "log_odds_chosen": 1.8377028703689575, |
| "log_odds_ratio": -0.2788829207420349, |
| "logits/chosen": 1.994458794593811, |
| "logits/rejected": 2.058776378631592, |
| "logps/chosen": -0.2481241673231125, |
| "logps/rejected": -1.0059267282485962, |
| "loss": 0.9581, |
| "nll_loss": 0.9301670789718628, |
| "rewards/accuracies": 0.9083333611488342, |
| "rewards/chosen": -0.024812418967485428, |
| "rewards/margins": 0.07578025758266449, |
| "rewards/rejected": -0.10059265792369843, |
| "step": 2230 |
| }, |
| { |
| "epoch": 3.095656192236599, |
| "grad_norm": 1.5724313259124756, |
| "learning_rate": 3.018181818181818e-08, |
| "log_odds_chosen": 1.8434357643127441, |
| "log_odds_ratio": -0.2810656428337097, |
| "logits/chosen": 2.0633890628814697, |
| "logits/rejected": 2.108525276184082, |
| "logps/chosen": -0.27151191234588623, |
| "logps/rejected": -1.0885863304138184, |
| "loss": 0.9965, |
| "nll_loss": 0.9684168696403503, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.027151191607117653, |
| "rewards/margins": 0.08170744776725769, |
| "rewards/rejected": -0.1088586375117302, |
| "step": 2235 |
| }, |
| { |
| "epoch": 3.102587800369686, |
| "grad_norm": 2.0384411811828613, |
| "learning_rate": 2.927272727272727e-08, |
| "log_odds_chosen": 1.992503046989441, |
| "log_odds_ratio": -0.2679726183414459, |
| "logits/chosen": 2.0843007564544678, |
| "logits/rejected": 2.149775505065918, |
| "logps/chosen": -0.2585987150669098, |
| "logps/rejected": -1.1711468696594238, |
| "loss": 0.9832, |
| "nll_loss": 0.9563843607902527, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.02585986815392971, |
| "rewards/margins": 0.0912548154592514, |
| "rewards/rejected": -0.11711468547582626, |
| "step": 2240 |
| }, |
| { |
| "epoch": 3.1095194085027726, |
| "grad_norm": 1.6383320093154907, |
| "learning_rate": 2.836363636363636e-08, |
| "log_odds_chosen": 1.9366320371627808, |
| "log_odds_ratio": -0.29981935024261475, |
| "logits/chosen": 2.024559259414673, |
| "logits/rejected": 2.1117780208587646, |
| "logps/chosen": -0.28914347290992737, |
| "logps/rejected": -1.1294143199920654, |
| "loss": 0.9738, |
| "nll_loss": 0.943781316280365, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.02891434356570244, |
| "rewards/margins": 0.08402708917856216, |
| "rewards/rejected": -0.1129414513707161, |
| "step": 2245 |
| }, |
| { |
| "epoch": 3.1164510166358594, |
| "grad_norm": 2.1520862579345703, |
| "learning_rate": 2.745454545454545e-08, |
| "log_odds_chosen": 1.9886517524719238, |
| "log_odds_ratio": -0.26550471782684326, |
| "logits/chosen": 1.966781497001648, |
| "logits/rejected": 2.0406601428985596, |
| "logps/chosen": -0.25236350297927856, |
| "logps/rejected": -1.1123831272125244, |
| "loss": 0.9942, |
| "nll_loss": 0.9676342010498047, |
| "rewards/accuracies": 0.9416666626930237, |
| "rewards/chosen": -0.025236355140805244, |
| "rewards/margins": 0.08600196242332458, |
| "rewards/rejected": -0.11123832315206528, |
| "step": 2250 |
| }, |
| { |
| "epoch": 3.1233826247689462, |
| "grad_norm": 2.3597357273101807, |
| "learning_rate": 2.6545454545454542e-08, |
| "log_odds_chosen": 1.6445980072021484, |
| "log_odds_ratio": -0.3386446535587311, |
| "logits/chosen": 2.031057834625244, |
| "logits/rejected": 2.0886597633361816, |
| "logps/chosen": -0.2981587052345276, |
| "logps/rejected": -1.0017781257629395, |
| "loss": 1.0319, |
| "nll_loss": 0.9980849027633667, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.02981586940586567, |
| "rewards/margins": 0.07036194205284119, |
| "rewards/rejected": -0.1001778170466423, |
| "step": 2255 |
| }, |
| { |
| "epoch": 3.1303142329020335, |
| "grad_norm": 2.875710964202881, |
| "learning_rate": 2.5636363636363633e-08, |
| "log_odds_chosen": 2.163516044616699, |
| "log_odds_ratio": -0.2714638113975525, |
| "logits/chosen": 2.009424924850464, |
| "logits/rejected": 2.0776355266571045, |
| "logps/chosen": -0.25366875529289246, |
| "logps/rejected": -1.265039324760437, |
| "loss": 0.9918, |
| "nll_loss": 0.9646516442298889, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.025366876274347305, |
| "rewards/margins": 0.10113705694675446, |
| "rewards/rejected": -0.12650392949581146, |
| "step": 2260 |
| }, |
| { |
| "epoch": 3.1372458410351203, |
| "grad_norm": 2.339726686477661, |
| "learning_rate": 2.4727272727272727e-08, |
| "log_odds_chosen": 1.7720131874084473, |
| "log_odds_ratio": -0.3246005177497864, |
| "logits/chosen": 2.034062623977661, |
| "logits/rejected": 2.0977277755737305, |
| "logps/chosen": -0.27337199449539185, |
| "logps/rejected": -1.0653187036514282, |
| "loss": 0.988, |
| "nll_loss": 0.9555687308311462, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.027337197214365005, |
| "rewards/margins": 0.07919467240571976, |
| "rewards/rejected": -0.10653186589479446, |
| "step": 2265 |
| }, |
| { |
| "epoch": 3.144177449168207, |
| "grad_norm": 3.5936927795410156, |
| "learning_rate": 2.3818181818181817e-08, |
| "log_odds_chosen": 1.9385422468185425, |
| "log_odds_ratio": -0.3190802335739136, |
| "logits/chosen": 1.9812796115875244, |
| "logits/rejected": 2.03690767288208, |
| "logps/chosen": -0.2825137674808502, |
| "logps/rejected": -1.2010154724121094, |
| "loss": 1.0062, |
| "nll_loss": 0.9742683172225952, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02825137972831726, |
| "rewards/margins": 0.09185018390417099, |
| "rewards/rejected": -0.12010155618190765, |
| "step": 2270 |
| }, |
| { |
| "epoch": 3.151109057301294, |
| "grad_norm": 2.3216748237609863, |
| "learning_rate": 2.2909090909090908e-08, |
| "log_odds_chosen": 1.849832534790039, |
| "log_odds_ratio": -0.30438894033432007, |
| "logits/chosen": 2.030111789703369, |
| "logits/rejected": 2.0937530994415283, |
| "logps/chosen": -0.3096204400062561, |
| "logps/rejected": -1.1795369386672974, |
| "loss": 1.0247, |
| "nll_loss": 0.994240939617157, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.030962049961090088, |
| "rewards/margins": 0.08699165284633636, |
| "rewards/rejected": -0.11795369535684586, |
| "step": 2275 |
| }, |
| { |
| "epoch": 3.1580406654343807, |
| "grad_norm": 5.251974582672119, |
| "learning_rate": 2.2e-08, |
| "log_odds_chosen": 1.73758864402771, |
| "log_odds_ratio": -0.3142777681350708, |
| "logits/chosen": 2.018113851547241, |
| "logits/rejected": 2.0766327381134033, |
| "logps/chosen": -0.25756967067718506, |
| "logps/rejected": -1.0410569906234741, |
| "loss": 0.9954, |
| "nll_loss": 0.9640125036239624, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.025756964460015297, |
| "rewards/margins": 0.07834872603416443, |
| "rewards/rejected": -0.10410568863153458, |
| "step": 2280 |
| }, |
| { |
| "epoch": 3.1649722735674675, |
| "grad_norm": 2.160755157470703, |
| "learning_rate": 2.109090909090909e-08, |
| "log_odds_chosen": 1.7930853366851807, |
| "log_odds_ratio": -0.31781256198883057, |
| "logits/chosen": 1.9471144676208496, |
| "logits/rejected": 2.0215981006622314, |
| "logps/chosen": -0.3217299282550812, |
| "logps/rejected": -1.0115313529968262, |
| "loss": 0.98, |
| "nll_loss": 0.9482495784759521, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.03217298910021782, |
| "rewards/margins": 0.0689801424741745, |
| "rewards/rejected": -0.10115313529968262, |
| "step": 2285 |
| }, |
| { |
| "epoch": 3.1719038817005547, |
| "grad_norm": 2.8207828998565674, |
| "learning_rate": 2.018181818181818e-08, |
| "log_odds_chosen": 1.7178608179092407, |
| "log_odds_ratio": -0.351553738117218, |
| "logits/chosen": 1.946655035018921, |
| "logits/rejected": 2.0031888484954834, |
| "logps/chosen": -0.2946816682815552, |
| "logps/rejected": -1.0428930521011353, |
| "loss": 1.0444, |
| "nll_loss": 1.009276270866394, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.029468165710568428, |
| "rewards/margins": 0.07482115179300308, |
| "rewards/rejected": -0.10428932309150696, |
| "step": 2290 |
| }, |
| { |
| "epoch": 3.1788354898336415, |
| "grad_norm": 3.0449793338775635, |
| "learning_rate": 1.927272727272727e-08, |
| "log_odds_chosen": 1.8244539499282837, |
| "log_odds_ratio": -0.3448461890220642, |
| "logits/chosen": 2.010988712310791, |
| "logits/rejected": 2.0648319721221924, |
| "logps/chosen": -0.29256051778793335, |
| "logps/rejected": -1.0924314260482788, |
| "loss": 0.9988, |
| "nll_loss": 0.9642786383628845, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.029256051406264305, |
| "rewards/margins": 0.07998708635568619, |
| "rewards/rejected": -0.10924313217401505, |
| "step": 2295 |
| }, |
| { |
| "epoch": 3.1857670979667283, |
| "grad_norm": 1.2735953330993652, |
| "learning_rate": 1.836363636363636e-08, |
| "log_odds_chosen": 2.074246406555176, |
| "log_odds_ratio": -0.23903319239616394, |
| "logits/chosen": 2.0425198078155518, |
| "logits/rejected": 2.104451894760132, |
| "logps/chosen": -0.2704167068004608, |
| "logps/rejected": -1.1976983547210693, |
| "loss": 0.9924, |
| "nll_loss": 0.9685426950454712, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.02704167179763317, |
| "rewards/margins": 0.09272817522287369, |
| "rewards/rejected": -0.11976984143257141, |
| "step": 2300 |
| }, |
| { |
| "epoch": 3.192698706099815, |
| "grad_norm": 2.2513015270233154, |
| "learning_rate": 1.7454545454545455e-08, |
| "log_odds_chosen": 1.8696075677871704, |
| "log_odds_ratio": -0.31109151244163513, |
| "logits/chosen": 1.982120156288147, |
| "logits/rejected": 2.0289342403411865, |
| "logps/chosen": -0.2669697701931, |
| "logps/rejected": -1.0606690645217896, |
| "loss": 1.0334, |
| "nll_loss": 1.0023095607757568, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.026696979999542236, |
| "rewards/margins": 0.0793699249625206, |
| "rewards/rejected": -0.10606691986322403, |
| "step": 2305 |
| }, |
| { |
| "epoch": 3.199630314232902, |
| "grad_norm": 3.0249485969543457, |
| "learning_rate": 1.6545454545454545e-08, |
| "log_odds_chosen": 1.5753225088119507, |
| "log_odds_ratio": -0.32907435297966003, |
| "logits/chosen": 1.9598543643951416, |
| "logits/rejected": 2.043677568435669, |
| "logps/chosen": -0.2798163592815399, |
| "logps/rejected": -0.9463704228401184, |
| "loss": 1.0423, |
| "nll_loss": 1.0094271898269653, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.027981635183095932, |
| "rewards/margins": 0.06665540486574173, |
| "rewards/rejected": -0.09463704377412796, |
| "step": 2310 |
| }, |
| { |
| "epoch": 3.2065619223659887, |
| "grad_norm": 2.1163971424102783, |
| "learning_rate": 1.5636363636363636e-08, |
| "log_odds_chosen": 1.8971047401428223, |
| "log_odds_ratio": -0.27131593227386475, |
| "logits/chosen": 1.9652113914489746, |
| "logits/rejected": 2.0411899089813232, |
| "logps/chosen": -0.2609769105911255, |
| "logps/rejected": -1.032325029373169, |
| "loss": 0.9929, |
| "nll_loss": 0.9658178687095642, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.02609768696129322, |
| "rewards/margins": 0.07713483273983002, |
| "rewards/rejected": -0.10323251038789749, |
| "step": 2315 |
| }, |
| { |
| "epoch": 3.213493530499076, |
| "grad_norm": 1.9558539390563965, |
| "learning_rate": 1.4727272727272726e-08, |
| "log_odds_chosen": 1.7015489339828491, |
| "log_odds_ratio": -0.3433685898780823, |
| "logits/chosen": 2.122408866882324, |
| "logits/rejected": 2.172548770904541, |
| "logps/chosen": -0.3318374454975128, |
| "logps/rejected": -1.1208266019821167, |
| "loss": 1.0349, |
| "nll_loss": 1.0005649328231812, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0331837423145771, |
| "rewards/margins": 0.07889891415834427, |
| "rewards/rejected": -0.11208265274763107, |
| "step": 2320 |
| }, |
| { |
| "epoch": 3.2204251386321627, |
| "grad_norm": 1.2714937925338745, |
| "learning_rate": 1.3818181818181817e-08, |
| "log_odds_chosen": 2.1636459827423096, |
| "log_odds_ratio": -0.23991011083126068, |
| "logits/chosen": 2.0637595653533936, |
| "logits/rejected": 2.1236019134521484, |
| "logps/chosen": -0.2423422783613205, |
| "logps/rejected": -1.284201741218567, |
| "loss": 0.9579, |
| "nll_loss": 0.9339547753334045, |
| "rewards/accuracies": 0.9333333373069763, |
| "rewards/chosen": -0.02423422783613205, |
| "rewards/margins": 0.10418593138456345, |
| "rewards/rejected": -0.1284201443195343, |
| "step": 2325 |
| }, |
| { |
| "epoch": 3.2273567467652495, |
| "grad_norm": 3.6437788009643555, |
| "learning_rate": 1.2909090909090908e-08, |
| "log_odds_chosen": 2.0622992515563965, |
| "log_odds_ratio": -0.25077375769615173, |
| "logits/chosen": 2.067502737045288, |
| "logits/rejected": 2.1184935569763184, |
| "logps/chosen": -0.28136977553367615, |
| "logps/rejected": -1.2336159944534302, |
| "loss": 0.9928, |
| "nll_loss": 0.9677172899246216, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.028136977925896645, |
| "rewards/margins": 0.09522464126348495, |
| "rewards/rejected": -0.12336160987615585, |
| "step": 2330 |
| }, |
| { |
| "epoch": 3.2342883548983363, |
| "grad_norm": 2.5397536754608154, |
| "learning_rate": 1.1999999999999998e-08, |
| "log_odds_chosen": 1.913984775543213, |
| "log_odds_ratio": -0.26355621218681335, |
| "logits/chosen": 1.9935226440429688, |
| "logits/rejected": 2.0521795749664307, |
| "logps/chosen": -0.24003277719020844, |
| "logps/rejected": -1.0872286558151245, |
| "loss": 1.0179, |
| "nll_loss": 0.9915151000022888, |
| "rewards/accuracies": 0.8666666746139526, |
| "rewards/chosen": -0.024003280326724052, |
| "rewards/margins": 0.08471958339214325, |
| "rewards/rejected": -0.10872285813093185, |
| "step": 2335 |
| }, |
| { |
| "epoch": 3.241219963031423, |
| "grad_norm": 2.6664090156555176, |
| "learning_rate": 1.109090909090909e-08, |
| "log_odds_chosen": 1.7946439981460571, |
| "log_odds_ratio": -0.3102231025695801, |
| "logits/chosen": 2.0916616916656494, |
| "logits/rejected": 2.1227028369903564, |
| "logps/chosen": -0.33324259519577026, |
| "logps/rejected": -1.194060206413269, |
| "loss": 1.027, |
| "nll_loss": 0.9959444403648376, |
| "rewards/accuracies": 0.8833333253860474, |
| "rewards/chosen": -0.033324260264635086, |
| "rewards/margins": 0.08608177304267883, |
| "rewards/rejected": -0.11940603703260422, |
| "step": 2340 |
| }, |
| { |
| "epoch": 3.2481515711645104, |
| "grad_norm": 3.543788194656372, |
| "learning_rate": 1.0181818181818181e-08, |
| "log_odds_chosen": 1.9351673126220703, |
| "log_odds_ratio": -0.28479108214378357, |
| "logits/chosen": 2.082379102706909, |
| "logits/rejected": 2.1343612670898438, |
| "logps/chosen": -0.27910298109054565, |
| "logps/rejected": -1.1840400695800781, |
| "loss": 1.0176, |
| "nll_loss": 0.9891124963760376, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.027910297736525536, |
| "rewards/margins": 0.09049370884895325, |
| "rewards/rejected": -0.11840400844812393, |
| "step": 2345 |
| }, |
| { |
| "epoch": 3.255083179297597, |
| "grad_norm": 2.4029653072357178, |
| "learning_rate": 9.272727272727272e-09, |
| "log_odds_chosen": 1.745149850845337, |
| "log_odds_ratio": -0.3268326222896576, |
| "logits/chosen": 2.009989023208618, |
| "logits/rejected": 2.0562989711761475, |
| "logps/chosen": -0.2905314564704895, |
| "logps/rejected": -1.0186735391616821, |
| "loss": 1.0114, |
| "nll_loss": 0.9786819815635681, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.02905314415693283, |
| "rewards/margins": 0.0728142112493515, |
| "rewards/rejected": -0.10186735540628433, |
| "step": 2350 |
| }, |
| { |
| "epoch": 3.262014787430684, |
| "grad_norm": 1.2835363149642944, |
| "learning_rate": 8.363636363636362e-09, |
| "log_odds_chosen": 2.011457920074463, |
| "log_odds_ratio": -0.2857803404331207, |
| "logits/chosen": 1.9969309568405151, |
| "logits/rejected": 2.0629494190216064, |
| "logps/chosen": -0.2886459529399872, |
| "logps/rejected": -1.2359684705734253, |
| "loss": 1.0145, |
| "nll_loss": 0.9858835339546204, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.028864597901701927, |
| "rewards/margins": 0.09473226219415665, |
| "rewards/rejected": -0.12359685450792313, |
| "step": 2355 |
| }, |
| { |
| "epoch": 3.2689463955637708, |
| "grad_norm": 2.477343797683716, |
| "learning_rate": 7.454545454545453e-09, |
| "log_odds_chosen": 1.7344855070114136, |
| "log_odds_ratio": -0.3474830687046051, |
| "logits/chosen": 1.9719436168670654, |
| "logits/rejected": 2.057311773300171, |
| "logps/chosen": -0.26447659730911255, |
| "logps/rejected": -1.0018370151519775, |
| "loss": 0.9815, |
| "nll_loss": 0.9467440247535706, |
| "rewards/accuracies": 0.8416666388511658, |
| "rewards/chosen": -0.026447657495737076, |
| "rewards/margins": 0.0737360343337059, |
| "rewards/rejected": -0.10018369555473328, |
| "step": 2360 |
| }, |
| { |
| "epoch": 3.2758780036968576, |
| "grad_norm": 2.9011664390563965, |
| "learning_rate": 6.545454545454546e-09, |
| "log_odds_chosen": 2.135871648788452, |
| "log_odds_ratio": -0.23335178196430206, |
| "logits/chosen": 2.0359859466552734, |
| "logits/rejected": 2.091546058654785, |
| "logps/chosen": -0.2491091936826706, |
| "logps/rejected": -1.206799030303955, |
| "loss": 1.0227, |
| "nll_loss": 0.9993228912353516, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.02491091936826706, |
| "rewards/margins": 0.0957689955830574, |
| "rewards/rejected": -0.12067990005016327, |
| "step": 2365 |
| }, |
| { |
| "epoch": 3.2828096118299444, |
| "grad_norm": 2.491895914077759, |
| "learning_rate": 5.6363636363636365e-09, |
| "log_odds_chosen": 1.9308069944381714, |
| "log_odds_ratio": -0.29651203751564026, |
| "logits/chosen": 1.9695427417755127, |
| "logits/rejected": 2.052243709564209, |
| "logps/chosen": -0.2871295213699341, |
| "logps/rejected": -1.1503547430038452, |
| "loss": 0.9909, |
| "nll_loss": 0.9612923860549927, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.028712956234812737, |
| "rewards/margins": 0.08632253110408783, |
| "rewards/rejected": -0.11503548920154572, |
| "step": 2370 |
| }, |
| { |
| "epoch": 3.2897412199630316, |
| "grad_norm": 2.254281520843506, |
| "learning_rate": 4.727272727272727e-09, |
| "log_odds_chosen": 1.8961893320083618, |
| "log_odds_ratio": -0.2896248400211334, |
| "logits/chosen": 2.035944938659668, |
| "logits/rejected": 2.102773427963257, |
| "logps/chosen": -0.30598270893096924, |
| "logps/rejected": -1.146560549736023, |
| "loss": 1.0012, |
| "nll_loss": 0.9722784757614136, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.030598269775509834, |
| "rewards/margins": 0.0840577781200409, |
| "rewards/rejected": -0.11465605348348618, |
| "step": 2375 |
| }, |
| { |
| "epoch": 3.2966728280961184, |
| "grad_norm": 4.881258487701416, |
| "learning_rate": 3.8181818181818185e-09, |
| "log_odds_chosen": 1.9275856018066406, |
| "log_odds_ratio": -0.26452910900115967, |
| "logits/chosen": 2.0649008750915527, |
| "logits/rejected": 2.1088078022003174, |
| "logps/chosen": -0.2894597351551056, |
| "logps/rejected": -1.1926367282867432, |
| "loss": 0.998, |
| "nll_loss": 0.9715353846549988, |
| "rewards/accuracies": 0.9333333373069763, |
| "rewards/chosen": -0.028945976868271828, |
| "rewards/margins": 0.09031769633293152, |
| "rewards/rejected": -0.1192636638879776, |
| "step": 2380 |
| }, |
| { |
| "epoch": 3.303604436229205, |
| "grad_norm": 2.7718567848205566, |
| "learning_rate": 2.909090909090909e-09, |
| "log_odds_chosen": 1.9183131456375122, |
| "log_odds_ratio": -0.29583075642585754, |
| "logits/chosen": 2.068955183029175, |
| "logits/rejected": 2.141144275665283, |
| "logps/chosen": -0.28529077768325806, |
| "logps/rejected": -1.2924492359161377, |
| "loss": 1.0428, |
| "nll_loss": 1.013238787651062, |
| "rewards/accuracies": 0.8916666507720947, |
| "rewards/chosen": -0.028529079630970955, |
| "rewards/margins": 0.10071584582328796, |
| "rewards/rejected": -0.12924490869045258, |
| "step": 2385 |
| }, |
| { |
| "epoch": 3.310536044362292, |
| "grad_norm": 2.2338600158691406, |
| "learning_rate": 2e-09, |
| "log_odds_chosen": 1.802534580230713, |
| "log_odds_ratio": -0.27281951904296875, |
| "logits/chosen": 1.9571995735168457, |
| "logits/rejected": 2.0102591514587402, |
| "logps/chosen": -0.27262741327285767, |
| "logps/rejected": -1.066834568977356, |
| "loss": 1.0008, |
| "nll_loss": 0.9735398888587952, |
| "rewards/accuracies": 0.9166666865348816, |
| "rewards/chosen": -0.027262739837169647, |
| "rewards/margins": 0.07942071557044983, |
| "rewards/rejected": -0.10668346285820007, |
| "step": 2390 |
| }, |
| { |
| "epoch": 3.317467652495379, |
| "grad_norm": 3.0731616020202637, |
| "learning_rate": 1.090909090909091e-09, |
| "log_odds_chosen": 2.0021066665649414, |
| "log_odds_ratio": -0.28030332922935486, |
| "logits/chosen": 1.9898285865783691, |
| "logits/rejected": 2.062572717666626, |
| "logps/chosen": -0.27583804726600647, |
| "logps/rejected": -1.1610000133514404, |
| "loss": 0.9737, |
| "nll_loss": 0.9456390738487244, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.027583809569478035, |
| "rewards/margins": 0.08851619809865952, |
| "rewards/rejected": -0.1161000058054924, |
| "step": 2395 |
| }, |
| { |
| "epoch": 3.324399260628466, |
| "grad_norm": 3.5117971897125244, |
| "learning_rate": 1.8181818181818182e-10, |
| "log_odds_chosen": 1.9471094608306885, |
| "log_odds_ratio": -0.2912288308143616, |
| "logits/chosen": 2.042628049850464, |
| "logits/rejected": 2.106832265853882, |
| "logps/chosen": -0.276217520236969, |
| "logps/rejected": -1.118571162223816, |
| "loss": 0.9807, |
| "nll_loss": 0.9515801668167114, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.02762174978852272, |
| "rewards/margins": 0.08423535525798798, |
| "rewards/rejected": -0.11185713112354279, |
| "step": 2400 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 2400, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 400, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|