| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9981298423724285, |
| "eval_steps": 500, |
| "global_step": 467, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0021373230029388193, |
| "grad_norm": 0.007689984980970621, |
| "learning_rate": 1.0638297872340425e-08, |
| "logits/chosen": -1.1381689310073853, |
| "logits/rejected": -0.9913416504859924, |
| "logps/chosen": -0.2839311957359314, |
| "logps/ref_chosen": -0.2839311957359314, |
| "logps/ref_rejected": -0.2955534756183624, |
| "logps/rejected": -0.2955534756183624, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.004274646005877639, |
| "grad_norm": 0.022346626967191696, |
| "learning_rate": 2.127659574468085e-08, |
| "logits/chosen": -1.0311710834503174, |
| "logits/rejected": -0.8901023864746094, |
| "logps/chosen": -0.24952735006809235, |
| "logps/ref_chosen": -0.24952735006809235, |
| "logps/ref_rejected": -0.24253402650356293, |
| "logps/rejected": -0.24253402650356293, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.006411969008816457, |
| "grad_norm": 0.018372567370533943, |
| "learning_rate": 3.191489361702127e-08, |
| "logits/chosen": -0.9175152778625488, |
| "logits/rejected": -0.8237950205802917, |
| "logps/chosen": -0.2627542316913605, |
| "logps/ref_chosen": -0.26232510805130005, |
| "logps/ref_rejected": -0.26657432317733765, |
| "logps/rejected": -0.2669898271560669, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -4.29145984526258e-06, |
| "rewards/margins": -1.36425114760641e-07, |
| "rewards/rejected": -4.155034730501939e-06, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.008549292011755277, |
| "grad_norm": 0.01221014279872179, |
| "learning_rate": 4.25531914893617e-08, |
| "logits/chosen": -0.8491243124008179, |
| "logits/rejected": -0.7499275803565979, |
| "logps/chosen": -0.27713102102279663, |
| "logps/ref_chosen": -0.2772371172904968, |
| "logps/ref_rejected": -0.2652103006839752, |
| "logps/rejected": -0.2652716636657715, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 1.0612650385155575e-06, |
| "rewards/margins": 1.6747367226344068e-06, |
| "rewards/rejected": -6.134716841188492e-07, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.010686615014694095, |
| "grad_norm": 0.018470866605639458, |
| "learning_rate": 5.3191489361702123e-08, |
| "logits/chosen": -1.1360951662063599, |
| "logits/rejected": -1.1910669803619385, |
| "logps/chosen": -0.28728771209716797, |
| "logps/ref_chosen": -0.2888604700565338, |
| "logps/ref_rejected": -0.2991960346698761, |
| "logps/rejected": -0.2990764081478119, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 1.572728797327727e-05, |
| "rewards/margins": 1.4531043234455865e-05, |
| "rewards/rejected": 1.1962420103373006e-06, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.012823938017632914, |
| "grad_norm": 0.011232590302824974, |
| "learning_rate": 6.382978723404254e-08, |
| "logits/chosen": -1.0752513408660889, |
| "logits/rejected": -1.0426117181777954, |
| "logps/chosen": -0.2583611011505127, |
| "logps/ref_chosen": -0.25884705781936646, |
| "logps/ref_rejected": -0.2807583212852478, |
| "logps/rejected": -0.2806395888328552, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 4.8596548367640935e-06, |
| "rewards/margins": 3.6720609841722762e-06, |
| "rewards/rejected": 1.1875943073391682e-06, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.014961261020571734, |
| "grad_norm": 0.017111822962760925, |
| "learning_rate": 7.446808510638298e-08, |
| "logits/chosen": -0.7687017917633057, |
| "logits/rejected": -0.7570354342460632, |
| "logps/chosen": -0.2555104196071625, |
| "logps/ref_chosen": -0.256194531917572, |
| "logps/ref_rejected": -0.2483622431755066, |
| "logps/rejected": -0.2475665658712387, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 6.841127742518438e-06, |
| "rewards/margins": -1.1156287200719817e-06, |
| "rewards/rejected": 7.956756235216744e-06, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.017098584023510555, |
| "grad_norm": 0.009269564412534237, |
| "learning_rate": 8.51063829787234e-08, |
| "logits/chosen": -0.9979547262191772, |
| "logits/rejected": -1.0166677236557007, |
| "logps/chosen": -0.24584153294563293, |
| "logps/ref_chosen": -0.24610446393489838, |
| "logps/ref_rejected": -0.26513946056365967, |
| "logps/rejected": -0.26514720916748047, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": 2.629538812470855e-06, |
| "rewards/margins": 2.707344947339152e-06, |
| "rewards/rejected": -7.780818123137578e-08, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01923590702644937, |
| "grad_norm": 0.009946140460669994, |
| "learning_rate": 9.574468085106382e-08, |
| "logits/chosen": -0.9871404767036438, |
| "logits/rejected": -0.8618879914283752, |
| "logps/chosen": -0.2772977650165558, |
| "logps/ref_chosen": -0.27598726749420166, |
| "logps/ref_rejected": -0.2570885717868805, |
| "logps/rejected": -0.25626546144485474, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -1.310529205511557e-05, |
| "rewards/margins": -2.1336549252737314e-05, |
| "rewards/rejected": 8.231256288127042e-06, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02137323002938819, |
| "grad_norm": 0.0168624147772789, |
| "learning_rate": 1.0638297872340425e-07, |
| "logits/chosen": -1.0609350204467773, |
| "logits/rejected": -0.9935163855552673, |
| "logps/chosen": -0.32475292682647705, |
| "logps/ref_chosen": -0.3254537284374237, |
| "logps/ref_rejected": -0.3171599209308624, |
| "logps/rejected": -0.316974937915802, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 7.008090506133158e-06, |
| "rewards/margins": 5.158278327144217e-06, |
| "rewards/rejected": 1.8498112694942392e-06, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02351055303232701, |
| "grad_norm": 0.017769023776054382, |
| "learning_rate": 1.1702127659574468e-07, |
| "logits/chosen": -0.9694434404373169, |
| "logits/rejected": -0.8893564939498901, |
| "logps/chosen": -0.2821662127971649, |
| "logps/ref_chosen": -0.2829214334487915, |
| "logps/ref_rejected": -0.2508140206336975, |
| "logps/rejected": -0.2507040202617645, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 7.552359875262482e-06, |
| "rewards/margins": 6.4524219851591624e-06, |
| "rewards/rejected": 1.0999378901033197e-06, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02564787603526583, |
| "grad_norm": 0.007083212491124868, |
| "learning_rate": 1.2765957446808508e-07, |
| "logits/chosen": -1.0059618949890137, |
| "logits/rejected": -1.0451630353927612, |
| "logps/chosen": -0.2930695116519928, |
| "logps/ref_chosen": -0.2954905033111572, |
| "logps/ref_rejected": -0.2674206793308258, |
| "logps/rejected": -0.2676032781600952, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 2.4210194169427268e-05, |
| "rewards/margins": 2.6036026611109264e-05, |
| "rewards/rejected": -1.8258299405715661e-06, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.027785199038204648, |
| "grad_norm": 0.016158975660800934, |
| "learning_rate": 1.3829787234042553e-07, |
| "logits/chosen": -0.8666880130767822, |
| "logits/rejected": -0.8786261081695557, |
| "logps/chosen": -0.31891337037086487, |
| "logps/ref_chosen": -0.3178931474685669, |
| "logps/ref_rejected": -0.31218719482421875, |
| "logps/rejected": -0.3121096193790436, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0202438716078177e-05, |
| "rewards/margins": -1.0978243153658696e-05, |
| "rewards/rejected": 7.758055744488956e-07, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.029922522041143467, |
| "grad_norm": 0.014269937761127949, |
| "learning_rate": 1.4893617021276595e-07, |
| "logits/chosen": -0.8767918348312378, |
| "logits/rejected": -0.8516579270362854, |
| "logps/chosen": -0.3135755658149719, |
| "logps/ref_chosen": -0.3135443329811096, |
| "logps/ref_rejected": -0.29571518301963806, |
| "logps/rejected": -0.29494887590408325, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -3.123981286989874e-07, |
| "rewards/margins": -7.97547818365274e-06, |
| "rewards/rejected": 7.663081305508967e-06, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03205984504408229, |
| "grad_norm": 0.013682817108929157, |
| "learning_rate": 1.5957446808510638e-07, |
| "logits/chosen": -1.1180726289749146, |
| "logits/rejected": -0.9456182718276978, |
| "logps/chosen": -0.271602064371109, |
| "logps/ref_chosen": -0.27180153131484985, |
| "logps/ref_rejected": -0.2904853820800781, |
| "logps/rejected": -0.29007938504219055, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 1.995021193579305e-06, |
| "rewards/margins": -2.0650886654038914e-06, |
| "rewards/rejected": 4.060110313730547e-06, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03419716804702111, |
| "grad_norm": 0.010287854820489883, |
| "learning_rate": 1.702127659574468e-07, |
| "logits/chosen": -0.8917864561080933, |
| "logits/rejected": -0.8699406981468201, |
| "logps/chosen": -0.26601818203926086, |
| "logps/ref_chosen": -0.26547712087631226, |
| "logps/ref_rejected": -0.2835603356361389, |
| "logps/rejected": -0.2837405502796173, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -5.410788617155049e-06, |
| "rewards/margins": -3.6082185488339746e-06, |
| "rewards/rejected": -1.802570182007912e-06, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03633449104995993, |
| "grad_norm": 0.02259673923254013, |
| "learning_rate": 1.8085106382978725e-07, |
| "logits/chosen": -0.7452818751335144, |
| "logits/rejected": -0.7833380699157715, |
| "logps/chosen": -0.27778589725494385, |
| "logps/ref_chosen": -0.27733802795410156, |
| "logps/ref_rejected": -0.2895258665084839, |
| "logps/rejected": -0.2907707691192627, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -4.478972641663859e-06, |
| "rewards/margins": 7.969993021106347e-06, |
| "rewards/rejected": -1.2448965208022855e-05, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03847181405289874, |
| "grad_norm": 0.012114214710891247, |
| "learning_rate": 1.9148936170212765e-07, |
| "logits/chosen": -1.088286280632019, |
| "logits/rejected": -1.171863317489624, |
| "logps/chosen": -0.25360727310180664, |
| "logps/ref_chosen": -0.2539060115814209, |
| "logps/ref_rejected": -0.2567565441131592, |
| "logps/rejected": -0.25663307309150696, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 2.9874736355850473e-06, |
| "rewards/margins": 1.7528004718769807e-06, |
| "rewards/rejected": 1.2346724815870402e-06, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.04060913705583756, |
| "grad_norm": 0.014990179799497128, |
| "learning_rate": 2.0212765957446807e-07, |
| "logits/chosen": -1.132434368133545, |
| "logits/rejected": -1.0461602210998535, |
| "logps/chosen": -0.2719663679599762, |
| "logps/ref_chosen": -0.2721010148525238, |
| "logps/ref_rejected": -0.2818528413772583, |
| "logps/rejected": -0.2810749113559723, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 1.3467488315654919e-06, |
| "rewards/margins": -6.432595910155214e-06, |
| "rewards/rejected": 7.779343832226004e-06, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.04274646005877638, |
| "grad_norm": 0.010641532018780708, |
| "learning_rate": 2.127659574468085e-07, |
| "logits/chosen": -1.0178560018539429, |
| "logits/rejected": -1.004392147064209, |
| "logps/chosen": -0.23923546075820923, |
| "logps/ref_chosen": -0.23994286358356476, |
| "logps/ref_rejected": -0.23484505712985992, |
| "logps/rejected": -0.23447804152965546, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 7.073986125760712e-06, |
| "rewards/margins": 3.4037689147226047e-06, |
| "rewards/rejected": 3.670217211038107e-06, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0448837830617152, |
| "grad_norm": 0.023398561403155327, |
| "learning_rate": 2.2340425531914892e-07, |
| "logits/chosen": -1.1628694534301758, |
| "logits/rejected": -1.0378841161727905, |
| "logps/chosen": -0.3366754949092865, |
| "logps/ref_chosen": -0.3352729082107544, |
| "logps/ref_rejected": -0.2747553586959839, |
| "logps/rejected": -0.27454015612602234, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -1.4025879863766022e-05, |
| "rewards/margins": -1.6177464203792624e-05, |
| "rewards/rejected": 2.151583657905576e-06, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04702110606465402, |
| "grad_norm": 0.00764879398047924, |
| "learning_rate": 2.3404255319148937e-07, |
| "logits/chosen": -1.0775376558303833, |
| "logits/rejected": -1.0815640687942505, |
| "logps/chosen": -0.2640347182750702, |
| "logps/ref_chosen": -0.2644973695278168, |
| "logps/ref_rejected": -0.272342324256897, |
| "logps/rejected": -0.27291354537010193, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 4.626465852197725e-06, |
| "rewards/margins": 1.0338952961319592e-05, |
| "rewards/rejected": -5.712486199627165e-06, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04915842906759284, |
| "grad_norm": 0.0144369937479496, |
| "learning_rate": 2.4468085106382976e-07, |
| "logits/chosen": -0.8280113339424133, |
| "logits/rejected": -0.930314838886261, |
| "logps/chosen": -0.28610122203826904, |
| "logps/ref_chosen": -0.2855069637298584, |
| "logps/ref_rejected": -0.2918801009654999, |
| "logps/rejected": -0.29068028926849365, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -5.942350071563851e-06, |
| "rewards/margins": -1.7940790712600574e-05, |
| "rewards/rejected": 1.1998442460026126e-05, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.05129575207053166, |
| "grad_norm": 0.015191740356385708, |
| "learning_rate": 2.5531914893617016e-07, |
| "logits/chosen": -1.032031774520874, |
| "logits/rejected": -1.0416802167892456, |
| "logps/chosen": -0.2396797239780426, |
| "logps/ref_chosen": -0.2403019368648529, |
| "logps/ref_rejected": -0.26635223627090454, |
| "logps/rejected": -0.2668707072734833, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 6.222117917786818e-06, |
| "rewards/margins": 1.140668427979108e-05, |
| "rewards/rejected": -5.18456545250956e-06, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.053433075073470476, |
| "grad_norm": 0.010276973247528076, |
| "learning_rate": 2.659574468085106e-07, |
| "logits/chosen": -1.0849720239639282, |
| "logits/rejected": -0.978914737701416, |
| "logps/chosen": -0.2879059612751007, |
| "logps/ref_chosen": -0.2882329523563385, |
| "logps/ref_rejected": -0.3270376920700073, |
| "logps/rejected": -0.3266304135322571, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 3.2699106213840423e-06, |
| "rewards/margins": -8.028832780837547e-07, |
| "rewards/rejected": 4.0727941268414725e-06, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.055570398076409296, |
| "grad_norm": 0.011395366862416267, |
| "learning_rate": 2.7659574468085106e-07, |
| "logits/chosen": -1.2485454082489014, |
| "logits/rejected": -1.1029356718063354, |
| "logps/chosen": -0.3046153485774994, |
| "logps/ref_chosen": -0.3046730160713196, |
| "logps/ref_rejected": -0.2980884313583374, |
| "logps/rejected": -0.2976985573768616, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 5.76702746002411e-07, |
| "rewards/margins": -3.322050361020956e-06, |
| "rewards/rejected": 3.898753220710205e-06, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.057707721079348115, |
| "grad_norm": 0.012352628633379936, |
| "learning_rate": 2.872340425531915e-07, |
| "logits/chosen": -0.983556866645813, |
| "logits/rejected": -0.9578584432601929, |
| "logps/chosen": -0.2679211497306824, |
| "logps/ref_chosen": -0.26806604862213135, |
| "logps/ref_rejected": -0.2832719683647156, |
| "logps/rejected": -0.2837556302547455, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 1.4488115311905858e-06, |
| "rewards/margins": 6.285562449193094e-06, |
| "rewards/rejected": -4.836749212699942e-06, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.059845044082286934, |
| "grad_norm": 0.014464674517512321, |
| "learning_rate": 2.978723404255319e-07, |
| "logits/chosen": -0.8838961720466614, |
| "logits/rejected": -0.9321247935295105, |
| "logps/chosen": -0.26792505383491516, |
| "logps/ref_chosen": -0.26813918352127075, |
| "logps/ref_rejected": -0.27555111050605774, |
| "logps/rejected": -0.2754617929458618, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 2.141330014637788e-06, |
| "rewards/margins": 1.248247599505703e-06, |
| "rewards/rejected": 8.930826993491792e-07, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.061982367085225754, |
| "grad_norm": 0.00987133290618658, |
| "learning_rate": 3.085106382978723e-07, |
| "logits/chosen": -1.0667345523834229, |
| "logits/rejected": -0.9910133481025696, |
| "logps/chosen": -0.26418349146842957, |
| "logps/ref_chosen": -0.26418259739875793, |
| "logps/ref_rejected": -0.26429852843284607, |
| "logps/rejected": -0.26450464129447937, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -8.870870260579977e-09, |
| "rewards/margins": 2.0521788428595755e-06, |
| "rewards/rejected": -2.061049144685967e-06, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.06411969008816458, |
| "grad_norm": 0.01733557879924774, |
| "learning_rate": 3.1914893617021275e-07, |
| "logits/chosen": -1.1936860084533691, |
| "logits/rejected": -0.954667866230011, |
| "logps/chosen": -0.2748079001903534, |
| "logps/ref_chosen": -0.2753724455833435, |
| "logps/ref_rejected": -0.25137749314308167, |
| "logps/rejected": -0.25217679142951965, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 5.645411874866113e-06, |
| "rewards/margins": 1.3638395103043877e-05, |
| "rewards/rejected": -7.992982318683062e-06, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.06625701309110339, |
| "grad_norm": 0.015033047646284103, |
| "learning_rate": 3.2978723404255315e-07, |
| "logits/chosen": -1.0733742713928223, |
| "logits/rejected": -0.9304803609848022, |
| "logps/chosen": -0.26805660128593445, |
| "logps/ref_chosen": -0.26908016204833984, |
| "logps/ref_rejected": -0.3245185613632202, |
| "logps/rejected": -0.32375532388687134, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 1.0235621630272362e-05, |
| "rewards/margins": 2.603152552183019e-06, |
| "rewards/rejected": 7.632468623341992e-06, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.06839433609404222, |
| "grad_norm": 0.023459795862436295, |
| "learning_rate": 3.404255319148936e-07, |
| "logits/chosen": -0.8882241249084473, |
| "logits/rejected": -0.8279258608818054, |
| "logps/chosen": -0.2662615478038788, |
| "logps/ref_chosen": -0.26683175563812256, |
| "logps/ref_rejected": -0.2915650010108948, |
| "logps/rejected": -0.29217085242271423, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 5.702324870071607e-06, |
| "rewards/margins": 1.1760656889236998e-05, |
| "rewards/rejected": -6.058332019165391e-06, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.07053165909698103, |
| "grad_norm": 0.01294651534408331, |
| "learning_rate": 3.5106382978723405e-07, |
| "logits/chosen": -1.0685746669769287, |
| "logits/rejected": -0.9152853488922119, |
| "logps/chosen": -0.30385279655456543, |
| "logps/ref_chosen": -0.30274850130081177, |
| "logps/ref_rejected": -0.26406827569007874, |
| "logps/rejected": -0.26420876383781433, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -1.1043034646718297e-05, |
| "rewards/margins": -9.638061783334706e-06, |
| "rewards/rejected": -1.4049747960598324e-06, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.07266898209991986, |
| "grad_norm": 0.015486280433833599, |
| "learning_rate": 3.617021276595745e-07, |
| "logits/chosen": -0.9996099472045898, |
| "logits/rejected": -0.9759906530380249, |
| "logps/chosen": -0.29757317900657654, |
| "logps/ref_chosen": -0.2958250343799591, |
| "logps/ref_rejected": -0.33271917700767517, |
| "logps/rejected": -0.33422574400901794, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -1.748139038681984e-05, |
| "rewards/margins": -2.4156097424565814e-06, |
| "rewards/rejected": -1.506578155385796e-05, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.07480630510285867, |
| "grad_norm": 0.04487946629524231, |
| "learning_rate": 3.7234042553191484e-07, |
| "logits/chosen": -1.0305012464523315, |
| "logits/rejected": -1.0103400945663452, |
| "logps/chosen": -0.2506798505783081, |
| "logps/ref_chosen": -0.250600129365921, |
| "logps/ref_rejected": -0.2823598384857178, |
| "logps/rejected": -0.28333860635757446, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -7.971750619617524e-07, |
| "rewards/margins": 8.990246897155885e-06, |
| "rewards/rejected": -9.787421731743962e-06, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.07694362810579748, |
| "grad_norm": 0.022634878754615784, |
| "learning_rate": 3.829787234042553e-07, |
| "logits/chosen": -0.7187901735305786, |
| "logits/rejected": -0.5943453907966614, |
| "logps/chosen": -0.2727569341659546, |
| "logps/ref_chosen": -0.2707131803035736, |
| "logps/ref_rejected": -0.2924281656742096, |
| "logps/rejected": -0.29428574442863464, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -2.043760832748376e-05, |
| "rewards/margins": -1.8620303308125585e-06, |
| "rewards/rejected": -1.85755779966712e-05, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.07908095110873631, |
| "grad_norm": 0.014530599117279053, |
| "learning_rate": 3.9361702127659574e-07, |
| "logits/chosen": -0.9728415608406067, |
| "logits/rejected": -0.9104939103126526, |
| "logps/chosen": -0.27670103311538696, |
| "logps/ref_chosen": -0.27655667066574097, |
| "logps/ref_rejected": -0.2618762254714966, |
| "logps/rejected": -0.2629542052745819, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.4433356909648865e-06, |
| "rewards/margins": 9.336811672255863e-06, |
| "rewards/rejected": -1.0780147022160236e-05, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.08121827411167512, |
| "grad_norm": 0.025673111900687218, |
| "learning_rate": 4.0425531914893614e-07, |
| "logits/chosen": -0.8544105291366577, |
| "logits/rejected": -0.8977361917495728, |
| "logps/chosen": -0.29955047369003296, |
| "logps/ref_chosen": -0.3001410961151123, |
| "logps/ref_rejected": -0.2903303802013397, |
| "logps/rejected": -0.291011780500412, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 5.9064805100206286e-06, |
| "rewards/margins": 1.2720241102215368e-05, |
| "rewards/rejected": -6.813761046942091e-06, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.08335559711461395, |
| "grad_norm": 0.01503435056656599, |
| "learning_rate": 4.148936170212766e-07, |
| "logits/chosen": -1.0454374551773071, |
| "logits/rejected": -1.1168955564498901, |
| "logps/chosen": -0.25362539291381836, |
| "logps/ref_chosen": -0.2543388605117798, |
| "logps/ref_rejected": -0.2731320261955261, |
| "logps/rejected": -0.2739657163619995, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 7.134648058126913e-06, |
| "rewards/margins": 1.547158717585262e-05, |
| "rewards/rejected": -8.336938662978355e-06, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.08549292011755276, |
| "grad_norm": 0.00940409954637289, |
| "learning_rate": 4.25531914893617e-07, |
| "logits/chosen": -1.069317102432251, |
| "logits/rejected": -1.0882292985916138, |
| "logps/chosen": -0.2877088785171509, |
| "logps/ref_chosen": -0.2875947654247284, |
| "logps/ref_rejected": -0.29288870096206665, |
| "logps/rejected": -0.29357314109802246, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.1409611033741385e-06, |
| "rewards/margins": 5.70340398553526e-06, |
| "rewards/rejected": -6.844364179414697e-06, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.08763024312049159, |
| "grad_norm": 0.02564193122088909, |
| "learning_rate": 4.3617021276595744e-07, |
| "logits/chosen": -1.0132936239242554, |
| "logits/rejected": -0.8744012117385864, |
| "logps/chosen": -0.3081262409687042, |
| "logps/ref_chosen": -0.30592209100723267, |
| "logps/ref_rejected": -0.33576250076293945, |
| "logps/rejected": -0.3351185619831085, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -2.2041573174647056e-05, |
| "rewards/margins": -2.8480830223998055e-05, |
| "rewards/rejected": 6.439256594603648e-06, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.0897675661234304, |
| "grad_norm": 0.015929123386740685, |
| "learning_rate": 4.4680851063829783e-07, |
| "logits/chosen": -1.031984567642212, |
| "logits/rejected": -0.8912105560302734, |
| "logps/chosen": -0.28306320309638977, |
| "logps/ref_chosen": -0.2827989161014557, |
| "logps/ref_rejected": -0.3255809545516968, |
| "logps/rejected": -0.3234784007072449, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -2.6425432224641554e-06, |
| "rewards/margins": -2.3668473659199663e-05, |
| "rewards/rejected": 2.1025929527240805e-05, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.09190488912636922, |
| "grad_norm": 0.014628097414970398, |
| "learning_rate": 4.574468085106383e-07, |
| "logits/chosen": -0.7411307096481323, |
| "logits/rejected": -0.793880045413971, |
| "logps/chosen": -0.35161709785461426, |
| "logps/ref_chosen": -0.3515586256980896, |
| "logps/ref_rejected": -0.2708003520965576, |
| "logps/rejected": -0.2736333906650543, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -5.844294719281606e-07, |
| "rewards/margins": 2.7745796614908613e-05, |
| "rewards/rejected": -2.8330226996331476e-05, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.09404221212930804, |
| "grad_norm": 0.022488975897431374, |
| "learning_rate": 4.6808510638297873e-07, |
| "logits/chosen": -0.8798326253890991, |
| "logits/rejected": -0.8225682973861694, |
| "logps/chosen": -0.4166314899921417, |
| "logps/ref_chosen": -0.4167863130569458, |
| "logps/ref_rejected": -0.4299992322921753, |
| "logps/rejected": -0.4309645891189575, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 1.5484633877349552e-06, |
| "rewards/margins": 1.1202228051843122e-05, |
| "rewards/rejected": -9.653764209360816e-06, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.09617953513224686, |
| "grad_norm": 0.009534979239106178, |
| "learning_rate": 4.787234042553192e-07, |
| "logits/chosen": -0.9829509854316711, |
| "logits/rejected": -0.8669167757034302, |
| "logps/chosen": -0.28755754232406616, |
| "logps/ref_chosen": -0.2829086184501648, |
| "logps/ref_rejected": -0.2908410429954529, |
| "logps/rejected": -0.2901032865047455, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -4.6489825763273984e-05, |
| "rewards/margins": -5.3867261158302426e-05, |
| "rewards/rejected": 7.37743448553374e-06, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.09831685813518568, |
| "grad_norm": 0.010269535705447197, |
| "learning_rate": 4.893617021276595e-07, |
| "logits/chosen": -0.8408491015434265, |
| "logits/rejected": -0.8828738927841187, |
| "logps/chosen": -0.2686249613761902, |
| "logps/ref_chosen": -0.27248162031173706, |
| "logps/ref_rejected": -0.2866136431694031, |
| "logps/rejected": -0.28627684712409973, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 3.856648618238978e-05, |
| "rewards/margins": 3.5198921978008e-05, |
| "rewards/rejected": 3.367568979228963e-06, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.1004541811381245, |
| "grad_norm": 0.019969303160905838, |
| "learning_rate": 5e-07, |
| "logits/chosen": -0.978872537612915, |
| "logits/rejected": -1.0489054918289185, |
| "logps/chosen": -0.2610996961593628, |
| "logps/ref_chosen": -0.2609337270259857, |
| "logps/ref_rejected": -0.2962002158164978, |
| "logps/rejected": -0.29735374450683594, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.6597844023635844e-06, |
| "rewards/margins": 9.875596333586145e-06, |
| "rewards/rejected": -1.1535379599081352e-05, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.10259150414106331, |
| "grad_norm": 0.012517467141151428, |
| "learning_rate": 4.999930062653174e-07, |
| "logits/chosen": -0.760237455368042, |
| "logits/rejected": -0.9387673139572144, |
| "logps/chosen": -0.3015531599521637, |
| "logps/ref_chosen": -0.3007725775241852, |
| "logps/ref_rejected": -0.28857314586639404, |
| "logps/rejected": -0.2933538556098938, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -7.806023859302513e-06, |
| "rewards/margins": 4.0000937588047236e-05, |
| "rewards/rejected": -4.780696326633915e-05, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.10472882714400214, |
| "grad_norm": 0.010651052929461002, |
| "learning_rate": 4.999720254525684e-07, |
| "logits/chosen": -1.040915608406067, |
| "logits/rejected": -0.894758939743042, |
| "logps/chosen": -0.31468233466148376, |
| "logps/ref_chosen": -0.3149026036262512, |
| "logps/ref_rejected": -0.32283082604408264, |
| "logps/rejected": -0.3264862596988678, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 2.2025171801942633e-06, |
| "rewards/margins": 3.8756381400162354e-05, |
| "rewards/rejected": -3.655386535683647e-05, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.10686615014694095, |
| "grad_norm": 0.011120118200778961, |
| "learning_rate": 4.999370587356267e-07, |
| "logits/chosen": -1.0271286964416504, |
| "logits/rejected": -0.934863805770874, |
| "logps/chosen": -0.31888121366500854, |
| "logps/ref_chosen": -0.31778770685195923, |
| "logps/ref_rejected": -0.331936776638031, |
| "logps/rejected": -0.335194855928421, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0934966667264234e-05, |
| "rewards/margins": 2.16454645851627e-05, |
| "rewards/rejected": -3.258042852394283e-05, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.10900347314987978, |
| "grad_norm": 0.011493810452520847, |
| "learning_rate": 4.998881080708758e-07, |
| "logits/chosen": -0.7647773623466492, |
| "logits/rejected": -0.7800005078315735, |
| "logps/chosen": -0.22104176878929138, |
| "logps/ref_chosen": -0.22169610857963562, |
| "logps/ref_rejected": -0.2501818537712097, |
| "logps/rejected": -0.25141361355781555, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 6.543561994476477e-06, |
| "rewards/margins": 1.8861162971006706e-05, |
| "rewards/rejected": -1.2317602340772282e-05, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.11114079615281859, |
| "grad_norm": 0.014222142286598682, |
| "learning_rate": 4.998251761970996e-07, |
| "logits/chosen": -0.9322632551193237, |
| "logits/rejected": -0.9878026843070984, |
| "logps/chosen": -0.30190595984458923, |
| "logps/ref_chosen": -0.30281564593315125, |
| "logps/ref_rejected": -0.29201021790504456, |
| "logps/rejected": -0.29405561089515686, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 9.096706889977213e-06, |
| "rewards/margins": 2.9550548788392916e-05, |
| "rewards/rejected": -2.0453840988921e-05, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.11327811915575742, |
| "grad_norm": 0.03754918649792671, |
| "learning_rate": 4.997482666353286e-07, |
| "logits/chosen": -0.9069135189056396, |
| "logits/rejected": -0.8085984587669373, |
| "logps/chosen": -0.2877419888973236, |
| "logps/ref_chosen": -0.28830504417419434, |
| "logps/ref_rejected": -0.30396074056625366, |
| "logps/rejected": -0.3050187826156616, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 5.630683062918251e-06, |
| "rewards/margins": 1.6211228285101242e-05, |
| "rewards/rejected": -1.0580546586425044e-05, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.11541544215869623, |
| "grad_norm": 0.021419456228613853, |
| "learning_rate": 4.996573836886434e-07, |
| "logits/chosen": -0.9866681694984436, |
| "logits/rejected": -0.9116556644439697, |
| "logps/chosen": -0.2705164849758148, |
| "logps/ref_chosen": -0.27046725153923035, |
| "logps/ref_rejected": -0.2800578474998474, |
| "logps/rejected": -0.2831159830093384, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -4.921528216073057e-07, |
| "rewards/margins": 3.008873864018824e-05, |
| "rewards/rejected": -3.058088987017982e-05, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.11755276516163506, |
| "grad_norm": 0.011349153704941273, |
| "learning_rate": 4.995525324419337e-07, |
| "logits/chosen": -1.036462664604187, |
| "logits/rejected": -0.8439108729362488, |
| "logps/chosen": -0.23065927624702454, |
| "logps/ref_chosen": -0.23186782002449036, |
| "logps/ref_rejected": -0.26418453454971313, |
| "logps/rejected": -0.2615554630756378, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 1.2085427442798391e-05, |
| "rewards/margins": -1.4205294064595364e-05, |
| "rewards/rejected": 2.629072332638316e-05, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.11969008816457387, |
| "grad_norm": 0.018369581550359726, |
| "learning_rate": 4.99433718761614e-07, |
| "logits/chosen": -0.8622905015945435, |
| "logits/rejected": -0.8696987628936768, |
| "logps/chosen": -0.27654391527175903, |
| "logps/ref_chosen": -0.2785589396953583, |
| "logps/ref_rejected": -0.282687246799469, |
| "logps/rejected": -0.28860339522361755, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 2.0150633645243943e-05, |
| "rewards/margins": 7.931220170576125e-05, |
| "rewards/rejected": -5.9161575336474925e-05, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1218274111675127, |
| "grad_norm": 0.00998850166797638, |
| "learning_rate": 4.993009492952949e-07, |
| "logits/chosen": -0.945175290107727, |
| "logits/rejected": -0.9578278064727783, |
| "logps/chosen": -0.24169297516345978, |
| "logps/ref_chosen": -0.2417047768831253, |
| "logps/ref_rejected": -0.2775600254535675, |
| "logps/rejected": -0.2778019309043884, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 1.1837528290925547e-07, |
| "rewards/margins": 2.5373828975716606e-06, |
| "rewards/rejected": -2.419008524157107e-06, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.12396473417045151, |
| "grad_norm": 0.01029076799750328, |
| "learning_rate": 4.991542314714122e-07, |
| "logits/chosen": -1.1732174158096313, |
| "logits/rejected": -1.038010597229004, |
| "logps/chosen": -0.2845836579799652, |
| "logps/ref_chosen": -0.2844545245170593, |
| "logps/ref_rejected": -0.3062823414802551, |
| "logps/rejected": -0.30405789613723755, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.2912535112263868e-06, |
| "rewards/margins": -2.353566378587857e-05, |
| "rewards/rejected": 2.2244410502025858e-05, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.12610205717339032, |
| "grad_norm": 0.015407534316182137, |
| "learning_rate": 4.989935734988097e-07, |
| "logits/chosen": -0.851952314376831, |
| "logits/rejected": -0.899534285068512, |
| "logps/chosen": -0.22782140970230103, |
| "logps/ref_chosen": -0.22821755707263947, |
| "logps/ref_rejected": -0.2622910737991333, |
| "logps/rejected": -0.26224446296691895, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 3.961622041970259e-06, |
| "rewards/margins": 3.4955371575051686e-06, |
| "rewards/rejected": 4.660850549953466e-07, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.12823938017632916, |
| "grad_norm": 0.009221049025654793, |
| "learning_rate": 4.988189843662815e-07, |
| "logits/chosen": -0.9586580991744995, |
| "logits/rejected": -0.9132787585258484, |
| "logps/chosen": -0.28018832206726074, |
| "logps/ref_chosen": -0.2809354364871979, |
| "logps/ref_rejected": -0.2671273350715637, |
| "logps/rejected": -0.2690158486366272, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 7.470855507563101e-06, |
| "rewards/margins": 2.6355886802775785e-05, |
| "rewards/rejected": -1.888503356894944e-05, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.13037670317926797, |
| "grad_norm": 0.01639932580292225, |
| "learning_rate": 4.986304738420683e-07, |
| "logits/chosen": -0.8779729008674622, |
| "logits/rejected": -0.893934965133667, |
| "logps/chosen": -0.23751872777938843, |
| "logps/ref_chosen": -0.237104594707489, |
| "logps/ref_rejected": -0.2468222677707672, |
| "logps/rejected": -0.24747203290462494, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -4.141242243349552e-06, |
| "rewards/margins": 2.356492586841341e-06, |
| "rewards/rejected": -6.497734830190893e-06, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.13251402618220678, |
| "grad_norm": 0.01576075330376625, |
| "learning_rate": 4.984280524733107e-07, |
| "logits/chosen": -0.9255910515785217, |
| "logits/rejected": -1.078086256980896, |
| "logps/chosen": -0.2566693127155304, |
| "logps/ref_chosen": -0.25752905011177063, |
| "logps/ref_rejected": -0.2618916630744934, |
| "logps/rejected": -0.2641328275203705, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 8.597522537456825e-06, |
| "rewards/margins": 3.1009101803647354e-05, |
| "rewards/rejected": -2.2411577447201125e-05, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.1346513491851456, |
| "grad_norm": 0.010112633928656578, |
| "learning_rate": 4.982117315854593e-07, |
| "logits/chosen": -0.9503496885299683, |
| "logits/rejected": -1.1387770175933838, |
| "logps/chosen": -0.2760920524597168, |
| "logps/ref_chosen": -0.277058482170105, |
| "logps/ref_rejected": -0.2878873944282532, |
| "logps/rejected": -0.28774750232696533, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 9.664139724918641e-06, |
| "rewards/margins": 8.265087672043592e-06, |
| "rewards/rejected": 1.399051370754023e-06, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.13678867218808444, |
| "grad_norm": 0.0113232946023345, |
| "learning_rate": 4.979815232816416e-07, |
| "logits/chosen": -0.9884060621261597, |
| "logits/rejected": -0.8609364628791809, |
| "logps/chosen": -0.2811279594898224, |
| "logps/ref_chosen": -0.27331385016441345, |
| "logps/ref_rejected": -0.26352259516716003, |
| "logps/rejected": -0.26227083802223206, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -7.814115087967366e-05, |
| "rewards/margins": -9.065894118975848e-05, |
| "rewards/rejected": 1.2517777577158995e-05, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.13892599519102325, |
| "grad_norm": 0.015421329066157341, |
| "learning_rate": 4.977374404419837e-07, |
| "logits/chosen": -1.002480387687683, |
| "logits/rejected": -1.0198136568069458, |
| "logps/chosen": -0.27636951208114624, |
| "logps/ref_chosen": -0.27653729915618896, |
| "logps/ref_rejected": -0.2574266791343689, |
| "logps/rejected": -0.25777584314346313, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 1.6778100189185352e-06, |
| "rewards/margins": 5.1697143135243095e-06, |
| "rewards/rejected": -3.491903953545261e-06, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.14106331819396206, |
| "grad_norm": 0.008260359056293964, |
| "learning_rate": 4.974794967228907e-07, |
| "logits/chosen": -1.021135687828064, |
| "logits/rejected": -0.9880024790763855, |
| "logps/chosen": -0.2904754877090454, |
| "logps/ref_chosen": -0.2898096740245819, |
| "logps/ref_rejected": -0.31103476881980896, |
| "logps/rejected": -0.32018041610717773, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -6.658020083705196e-06, |
| "rewards/margins": 8.4798812167719e-05, |
| "rewards/rejected": -9.145682997768745e-05, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.14320064119690087, |
| "grad_norm": 0.01641656644642353, |
| "learning_rate": 4.972077065562821e-07, |
| "logits/chosen": -0.9692586660385132, |
| "logits/rejected": -1.0919312238693237, |
| "logps/chosen": -0.3143393397331238, |
| "logps/ref_chosen": -0.2907385528087616, |
| "logps/ref_rejected": -0.2902086675167084, |
| "logps/rejected": -0.3072502613067627, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00023600776330567896, |
| "rewards/margins": -6.559171015396714e-05, |
| "rewards/rejected": -0.00017041602404788136, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.14533796419983971, |
| "grad_norm": 0.008077923208475113, |
| "learning_rate": 4.969220851487844e-07, |
| "logits/chosen": -1.0057110786437988, |
| "logits/rejected": -0.9600222706794739, |
| "logps/chosen": -0.34587785601615906, |
| "logps/ref_chosen": -0.3427581787109375, |
| "logps/ref_rejected": -0.34369489550590515, |
| "logps/rejected": -0.3420814871788025, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -3.119698885711841e-05, |
| "rewards/margins": -4.73311374662444e-05, |
| "rewards/rejected": 1.6134148609125987e-05, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.14747528720277853, |
| "grad_norm": 0.01333937980234623, |
| "learning_rate": 4.966226484808803e-07, |
| "logits/chosen": -0.9484730362892151, |
| "logits/rejected": -0.8359491229057312, |
| "logps/chosen": -0.283067911863327, |
| "logps/ref_chosen": -0.28584858775138855, |
| "logps/ref_rejected": -0.27188485860824585, |
| "logps/rejected": -0.2991316318511963, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 2.780670183710754e-05, |
| "rewards/margins": 0.0003002745215781033, |
| "rewards/rejected": -0.00027246781974099576, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.14961261020571734, |
| "grad_norm": 0.009389792568981647, |
| "learning_rate": 4.963094133060148e-07, |
| "logits/chosen": -0.9869619607925415, |
| "logits/rejected": -0.8988137245178223, |
| "logps/chosen": -0.28715410828590393, |
| "logps/ref_chosen": -0.2897653877735138, |
| "logps/ref_rejected": -0.23805168271064758, |
| "logps/rejected": -0.2384706288576126, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": 2.6113073545275256e-05, |
| "rewards/margins": 3.030270636372734e-05, |
| "rewards/rejected": -4.1896332731994335e-06, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.15174993320865615, |
| "grad_norm": 0.00991885643452406, |
| "learning_rate": 4.959823971496574e-07, |
| "logits/chosen": -1.0599706172943115, |
| "logits/rejected": -0.9936259984970093, |
| "logps/chosen": -0.3054412305355072, |
| "logps/ref_chosen": -0.30530205368995667, |
| "logps/ref_rejected": -0.28058379888534546, |
| "logps/rejected": -0.2982639968395233, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.3915641829953529e-06, |
| "rewards/margins": 0.00017541060515213758, |
| "rewards/rejected": -0.00017680219025351107, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.15388725621159496, |
| "grad_norm": 0.012957648374140263, |
| "learning_rate": 4.956416183083221e-07, |
| "logits/chosen": -1.0379211902618408, |
| "logits/rejected": -1.0282961130142212, |
| "logps/chosen": -0.2620239853858948, |
| "logps/ref_chosen": -0.26093459129333496, |
| "logps/ref_rejected": -0.27058857679367065, |
| "logps/rejected": -0.2717420160770416, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0893772923736833e-05, |
| "rewards/margins": 6.405637122952612e-07, |
| "rewards/rejected": -1.1534336408658419e-05, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.1560245792145338, |
| "grad_norm": 0.02365749701857567, |
| "learning_rate": 4.952870958485431e-07, |
| "logits/chosen": -0.7574669122695923, |
| "logits/rejected": -0.7690973877906799, |
| "logps/chosen": -0.31582480669021606, |
| "logps/ref_chosen": -0.25771141052246094, |
| "logps/ref_rejected": -0.3149224519729614, |
| "logps/rejected": -0.42756789922714233, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0005811337614431977, |
| "rewards/margins": 0.0005453207995742559, |
| "rewards/rejected": -0.0011264545610174537, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.15816190221747262, |
| "grad_norm": 0.028325913473963737, |
| "learning_rate": 4.949188496058089e-07, |
| "logits/chosen": -0.8773558139801025, |
| "logits/rejected": -0.9261090159416199, |
| "logps/chosen": -0.2696930468082428, |
| "logps/ref_chosen": -0.26936718821525574, |
| "logps/ref_rejected": -0.25033333897590637, |
| "logps/rejected": -0.24952727556228638, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -3.2585699045739602e-06, |
| "rewards/margins": -1.1319078112137504e-05, |
| "rewards/rejected": 8.060508662310895e-06, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.16029922522041143, |
| "grad_norm": 0.020500406622886658, |
| "learning_rate": 4.945369001834514e-07, |
| "logits/chosen": -1.0802488327026367, |
| "logits/rejected": -1.0232303142547607, |
| "logps/chosen": -0.2651379108428955, |
| "logps/ref_chosen": -0.2662196159362793, |
| "logps/ref_rejected": -0.2821120619773865, |
| "logps/rejected": -0.2911551594734192, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 1.0817102520377375e-05, |
| "rewards/margins": 0.00010124808613909408, |
| "rewards/rejected": -9.043098543770611e-05, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.16243654822335024, |
| "grad_norm": 0.011036383919417858, |
| "learning_rate": 4.941412689514941e-07, |
| "logits/chosen": -1.1717069149017334, |
| "logits/rejected": -1.2359044551849365, |
| "logps/chosen": -0.2618694305419922, |
| "logps/ref_chosen": -0.26608312129974365, |
| "logps/ref_rejected": -0.29494112730026245, |
| "logps/rejected": -0.2930205464363098, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 4.2137078708037734e-05, |
| "rewards/margins": 2.2930735212867148e-05, |
| "rewards/rejected": 1.9206347133149393e-05, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.16457387122628908, |
| "grad_norm": 0.011347147636115551, |
| "learning_rate": 4.937319780454559e-07, |
| "logits/chosen": -0.8374221324920654, |
| "logits/rejected": -0.777981162071228, |
| "logps/chosen": -0.2897748053073883, |
| "logps/ref_chosen": -0.28825703263282776, |
| "logps/ref_rejected": -0.2943416237831116, |
| "logps/rejected": -0.3115558624267578, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -1.5177444765868131e-05, |
| "rewards/margins": 0.00015696504851803184, |
| "rewards/rejected": -0.0001721425069263205, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.1667111942292279, |
| "grad_norm": 0.014452864415943623, |
| "learning_rate": 4.933090503651128e-07, |
| "logits/chosen": -0.9860453605651855, |
| "logits/rejected": -0.948147714138031, |
| "logps/chosen": -0.28923821449279785, |
| "logps/ref_chosen": -0.2889617085456848, |
| "logps/ref_rejected": -0.2535881996154785, |
| "logps/rejected": -0.259809672832489, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.7648175091599114e-06, |
| "rewards/margins": 5.944987424300052e-05, |
| "rewards/rejected": -6.221469084266573e-05, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.1688485172321667, |
| "grad_norm": 0.014155249111354351, |
| "learning_rate": 4.928725095732168e-07, |
| "logits/chosen": -0.7540971636772156, |
| "logits/rejected": -0.8604491353034973, |
| "logps/chosen": -0.27015241980552673, |
| "logps/ref_chosen": -0.2447582483291626, |
| "logps/ref_rejected": -0.21547015011310577, |
| "logps/rejected": -0.3580566346645355, |
| "loss": 0.693, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.00025394154363311827, |
| "rewards/margins": 0.0011719234753400087, |
| "rewards/rejected": -0.0014258649898692966, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.17098584023510552, |
| "grad_norm": 0.016085723415017128, |
| "learning_rate": 4.924223800941717e-07, |
| "logits/chosen": -1.1459994316101074, |
| "logits/rejected": -0.9876860976219177, |
| "logps/chosen": -0.30637502670288086, |
| "logps/ref_chosen": -0.2931276261806488, |
| "logps/ref_rejected": -0.27700746059417725, |
| "logps/rejected": -0.28229862451553345, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00013247402966953814, |
| "rewards/margins": -7.956250919960439e-05, |
| "rewards/rejected": -5.291152046993375e-05, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.17312316323804436, |
| "grad_norm": 0.016921188682317734, |
| "learning_rate": 4.919586871126667e-07, |
| "logits/chosen": -1.1154288053512573, |
| "logits/rejected": -1.0628000497817993, |
| "logps/chosen": -0.28829848766326904, |
| "logps/ref_chosen": -0.28591427206993103, |
| "logps/ref_rejected": -0.3139474391937256, |
| "logps/rejected": -0.3195532262325287, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -2.38419343077112e-05, |
| "rewards/margins": 3.2216143154073507e-05, |
| "rewards/rejected": -5.60580738238059e-05, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.17526048624098317, |
| "grad_norm": 0.01958172582089901, |
| "learning_rate": 4.91481456572267e-07, |
| "logits/chosen": -1.0280213356018066, |
| "logits/rejected": -0.774710476398468, |
| "logps/chosen": -0.26568594574928284, |
| "logps/ref_chosen": -0.23280677199363708, |
| "logps/ref_rejected": -0.24202194809913635, |
| "logps/rejected": -0.279081791639328, |
| "loss": 0.693, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00032879196805879474, |
| "rewards/margins": 4.1806502849794924e-05, |
| "rewards/rejected": -0.00037059851456433535, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.17739780924392198, |
| "grad_norm": 0.008814085274934769, |
| "learning_rate": 4.909907151739633e-07, |
| "logits/chosen": -0.8065323233604431, |
| "logits/rejected": -0.8235383033752441, |
| "logps/chosen": -0.25440314412117004, |
| "logps/ref_chosen": -0.252414345741272, |
| "logps/ref_rejected": -0.21866297721862793, |
| "logps/rejected": -0.2297726273536682, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.988819713005796e-05, |
| "rewards/margins": 9.120807226281613e-05, |
| "rewards/rejected": -0.0001110962766688317, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.1795351322468608, |
| "grad_norm": 0.01849239319562912, |
| "learning_rate": 4.904864903746765e-07, |
| "logits/chosen": -0.7943837642669678, |
| "logits/rejected": -0.8399622440338135, |
| "logps/chosen": -0.3016873896121979, |
| "logps/ref_chosen": -0.3008642792701721, |
| "logps/ref_rejected": -0.30154949426651, |
| "logps/rejected": -0.3004867732524872, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -8.230955245380756e-06, |
| "rewards/margins": -1.8858294424717315e-05, |
| "rewards/rejected": 1.0627340998325963e-05, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.18167245524979964, |
| "grad_norm": 0.00835150945931673, |
| "learning_rate": 4.899688103857222e-07, |
| "logits/chosen": -0.8947075009346008, |
| "logits/rejected": -0.8859119415283203, |
| "logps/chosen": -0.2488580346107483, |
| "logps/ref_chosen": -0.24286296963691711, |
| "logps/ref_rejected": -0.2981007695198059, |
| "logps/rejected": -0.31307339668273926, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -5.9950569266220555e-05, |
| "rewards/margins": 8.977564721135423e-05, |
| "rewards/rejected": -0.00014972621283959597, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.18380977825273845, |
| "grad_norm": 0.008339953608810902, |
| "learning_rate": 4.894377041712326e-07, |
| "logits/chosen": -0.6940639019012451, |
| "logits/rejected": -0.6466383337974548, |
| "logps/chosen": -0.24655766785144806, |
| "logps/ref_chosen": -0.2403053343296051, |
| "logps/ref_rejected": -0.27790510654449463, |
| "logps/rejected": -0.3048871159553528, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -6.252338062040508e-05, |
| "rewards/margins": 0.00020729642710648477, |
| "rewards/rejected": -0.0002698198368307203, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.18594710125567726, |
| "grad_norm": 0.023079147562384605, |
| "learning_rate": 4.888932014465352e-07, |
| "logits/chosen": -0.9037274122238159, |
| "logits/rejected": -0.8148818612098694, |
| "logps/chosen": -0.2838499844074249, |
| "logps/ref_chosen": -0.28337472677230835, |
| "logps/ref_rejected": -0.28008130192756653, |
| "logps/rejected": -0.2978307008743286, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -4.752602762891911e-06, |
| "rewards/margins": 0.00017274172569159418, |
| "rewards/rejected": -0.00017749430844560266, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.18808442425861607, |
| "grad_norm": 0.014020058326423168, |
| "learning_rate": 4.883353326764906e-07, |
| "logits/chosen": -0.9043616056442261, |
| "logits/rejected": -0.8512974381446838, |
| "logps/chosen": -0.25630706548690796, |
| "logps/ref_chosen": -0.26032066345214844, |
| "logps/ref_rejected": -0.3220667541027069, |
| "logps/rejected": -0.43702277541160583, |
| "loss": 0.693, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 4.0135986637324095e-05, |
| "rewards/margins": 0.0011896961368620396, |
| "rewards/rejected": -0.0011495600920170546, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.1902217472615549, |
| "grad_norm": 0.01570913940668106, |
| "learning_rate": 4.877641290737883e-07, |
| "logits/chosen": -0.9995778203010559, |
| "logits/rejected": -1.0022608041763306, |
| "logps/chosen": -0.2513532340526581, |
| "logps/ref_chosen": -0.2506324350833893, |
| "logps/ref_rejected": -0.28245270252227783, |
| "logps/rejected": -0.28889286518096924, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -7.207980161183514e-06, |
| "rewards/margins": 5.719395267078653e-05, |
| "rewards/rejected": -6.440193101298064e-05, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.19235907026449373, |
| "grad_norm": 0.011741074733436108, |
| "learning_rate": 4.871796225971999e-07, |
| "logits/chosen": -0.9824739694595337, |
| "logits/rejected": -0.8545064926147461, |
| "logps/chosen": -0.2665407657623291, |
| "logps/ref_chosen": -0.26319509744644165, |
| "logps/ref_rejected": -0.28182491660118103, |
| "logps/rejected": -0.31050342321395874, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -3.345655932207592e-05, |
| "rewards/margins": 0.000253328587859869, |
| "rewards/rejected": -0.00028678515809588134, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.19449639326743254, |
| "grad_norm": 0.014778682962059975, |
| "learning_rate": 4.86581845949791e-07, |
| "logits/chosen": -0.9443565607070923, |
| "logits/rejected": -1.004144549369812, |
| "logps/chosen": -0.25407537817955017, |
| "logps/ref_chosen": -0.2525879740715027, |
| "logps/ref_rejected": -0.28145086765289307, |
| "logps/rejected": -0.28960704803466797, |
| "loss": 0.693, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.4874065527692437e-05, |
| "rewards/margins": 6.668754940619692e-05, |
| "rewards/rejected": -8.156162220984697e-05, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.19663371627037135, |
| "grad_norm": 0.011612450703978539, |
| "learning_rate": 4.859708325770919e-07, |
| "logits/chosen": -1.1069070100784302, |
| "logits/rejected": -1.1541920900344849, |
| "logps/chosen": -0.2814819812774658, |
| "logps/ref_chosen": -0.2681662440299988, |
| "logps/ref_rejected": -0.3379825949668884, |
| "logps/rejected": -0.3709861934185028, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00013315714022610337, |
| "rewards/margins": 0.00019687906024046242, |
| "rewards/rejected": -0.000330036215018481, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.1987710392733102, |
| "grad_norm": 0.015046533197164536, |
| "learning_rate": 4.853466166652258e-07, |
| "logits/chosen": -0.9910311102867126, |
| "logits/rejected": -0.9629150629043579, |
| "logps/chosen": -0.2537398040294647, |
| "logps/ref_chosen": -0.2430555820465088, |
| "logps/ref_rejected": -0.2784229516983032, |
| "logps/rejected": -0.2843584716320038, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.00010684227163437754, |
| "rewards/margins": -4.748682476929389e-05, |
| "rewards/rejected": -5.935545050306246e-05, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.200908362276249, |
| "grad_norm": 0.010888271033763885, |
| "learning_rate": 4.847092331389964e-07, |
| "logits/chosen": -0.76346355676651, |
| "logits/rejected": -0.7851640582084656, |
| "logps/chosen": -0.2640366554260254, |
| "logps/ref_chosen": -0.24500182271003723, |
| "logps/ref_rejected": -0.2712894380092621, |
| "logps/rejected": -0.28185611963272095, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0001903482188936323, |
| "rewards/margins": -8.468166925013065e-05, |
| "rewards/rejected": -0.00010566655691945925, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.20304568527918782, |
| "grad_norm": 0.01479202788323164, |
| "learning_rate": 4.840587176599343e-07, |
| "logits/chosen": -1.1556029319763184, |
| "logits/rejected": -1.166841745376587, |
| "logps/chosen": -0.34447798132896423, |
| "logps/ref_chosen": -0.3106793463230133, |
| "logps/ref_rejected": -0.29875442385673523, |
| "logps/rejected": -0.306598961353302, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00033798645017668605, |
| "rewards/margins": -0.0002595410624053329, |
| "rewards/rejected": -7.844540232326835e-05, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.20518300828212663, |
| "grad_norm": 0.008516538888216019, |
| "learning_rate": 4.833951066243004e-07, |
| "logits/chosen": -0.9835708737373352, |
| "logits/rejected": -0.925228476524353, |
| "logps/chosen": -0.29244038462638855, |
| "logps/ref_chosen": -0.2901764512062073, |
| "logps/ref_rejected": -0.2603149116039276, |
| "logps/rejected": -0.264593243598938, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -2.2639276721747592e-05, |
| "rewards/margins": 2.0144128939136863e-05, |
| "rewards/rejected": -4.2783405660884455e-05, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.20732033128506547, |
| "grad_norm": 0.01696189120411873, |
| "learning_rate": 4.82718437161051e-07, |
| "logits/chosen": -0.9727522134780884, |
| "logits/rejected": -1.022587537765503, |
| "logps/chosen": -0.2664162516593933, |
| "logps/ref_chosen": -0.2573895752429962, |
| "logps/ref_rejected": -0.23220883309841156, |
| "logps/rejected": -0.2517354488372803, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -9.026652696775272e-05, |
| "rewards/margins": 0.00010499963536858559, |
| "rewards/rejected": -0.0001952661550603807, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.20945765428800428, |
| "grad_norm": 0.009048497304320335, |
| "learning_rate": 4.820287471297597e-07, |
| "logits/chosen": -1.102674961090088, |
| "logits/rejected": -0.9738333821296692, |
| "logps/chosen": -0.2764867842197418, |
| "logps/ref_chosen": -0.2627997100353241, |
| "logps/ref_rejected": -0.2761891186237335, |
| "logps/rejected": -0.2841818332672119, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00013687046885024756, |
| "rewards/margins": -5.694321225746535e-05, |
| "rewards/rejected": -7.992725295480341e-05, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.2115949772909431, |
| "grad_norm": 0.0225482489913702, |
| "learning_rate": 4.813260751184992e-07, |
| "logits/chosen": -1.0213857889175415, |
| "logits/rejected": -0.8931246995925903, |
| "logps/chosen": -0.23238977789878845, |
| "logps/ref_chosen": -0.23008745908737183, |
| "logps/ref_rejected": -0.2689017057418823, |
| "logps/rejected": -0.2824597656726837, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -2.3023245375952683e-05, |
| "rewards/margins": 0.00011255730350967497, |
| "rewards/rejected": -0.00013558054342865944, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.2137323002938819, |
| "grad_norm": 0.01150372065603733, |
| "learning_rate": 4.806104604416823e-07, |
| "logits/chosen": -1.1939239501953125, |
| "logits/rejected": -1.1786190271377563, |
| "logps/chosen": -0.4149876534938812, |
| "logps/ref_chosen": -0.3101845383644104, |
| "logps/ref_rejected": -0.27805304527282715, |
| "logps/rejected": -0.32359492778778076, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0010480312630534172, |
| "rewards/margins": -0.0005926126032136381, |
| "rewards/rejected": -0.00045541865983977914, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.21586962329682075, |
| "grad_norm": 0.018055813387036324, |
| "learning_rate": 4.798819431378626e-07, |
| "logits/chosen": -0.9665027260780334, |
| "logits/rejected": -0.9374414682388306, |
| "logps/chosen": -0.2667009234428406, |
| "logps/ref_chosen": -0.26045846939086914, |
| "logps/ref_rejected": -0.2765519618988037, |
| "logps/rejected": -0.313286691904068, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -6.242447125259787e-05, |
| "rewards/margins": 0.0003049227234441787, |
| "rewards/rejected": -0.0003673472092486918, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.21800694629975956, |
| "grad_norm": 0.014859364368021488, |
| "learning_rate": 4.79140563967494e-07, |
| "logits/chosen": -0.9525546431541443, |
| "logits/rejected": -0.9209149479866028, |
| "logps/chosen": -0.27881696820259094, |
| "logps/ref_chosen": -0.2786974012851715, |
| "logps/ref_rejected": -0.28768932819366455, |
| "logps/rejected": -0.2919776439666748, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -1.1954525689361617e-06, |
| "rewards/margins": 4.1687642806209624e-05, |
| "rewards/rejected": -4.288309719413519e-05, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.22014426930269837, |
| "grad_norm": 0.01960882730782032, |
| "learning_rate": 4.783863644106502e-07, |
| "logits/chosen": -0.9547258615493774, |
| "logits/rejected": -0.9077056646347046, |
| "logps/chosen": -0.25836876034736633, |
| "logps/ref_chosen": -0.25997382402420044, |
| "logps/ref_rejected": -0.2916034758090973, |
| "logps/rejected": -0.294665664434433, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": 1.605085162736941e-05, |
| "rewards/margins": 4.6672881580889225e-05, |
| "rewards/rejected": -3.0622020858572796e-05, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.22228159230563718, |
| "grad_norm": 0.00867555569857359, |
| "learning_rate": 4.776193866647039e-07, |
| "logits/chosen": -1.0706658363342285, |
| "logits/rejected": -0.9061423540115356, |
| "logps/chosen": -0.2817523181438446, |
| "logps/ref_chosen": -0.2848864793777466, |
| "logps/ref_rejected": -0.26774221658706665, |
| "logps/rejected": -0.2812132239341736, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 3.134182043140754e-05, |
| "rewards/margins": 0.00016605171549599618, |
| "rewards/rejected": -0.00013470988778863102, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.224418915308576, |
| "grad_norm": 0.015946751460433006, |
| "learning_rate": 4.768396736419662e-07, |
| "logits/chosen": -0.9789286851882935, |
| "logits/rejected": -1.015540599822998, |
| "logps/chosen": -0.27842462062835693, |
| "logps/ref_chosen": -0.263070285320282, |
| "logps/ref_rejected": -0.30586668848991394, |
| "logps/rejected": -0.3438529074192047, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00015354313654825091, |
| "rewards/margins": 0.00022631860338151455, |
| "rewards/rejected": -0.00037986173992976546, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.22655623831151483, |
| "grad_norm": 0.011582360602915287, |
| "learning_rate": 4.7604726896728496e-07, |
| "logits/chosen": -0.9088851809501648, |
| "logits/rejected": -0.8099016547203064, |
| "logps/chosen": -0.33478638529777527, |
| "logps/ref_chosen": -0.31714433431625366, |
| "logps/ref_rejected": -0.3059766888618469, |
| "logps/rejected": -0.31492024660110474, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00017642055172473192, |
| "rewards/margins": -8.698524470673874e-05, |
| "rewards/rejected": -8.94353142939508e-05, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.22869356131445365, |
| "grad_norm": 0.018156087026000023, |
| "learning_rate": 4.752422169756047e-07, |
| "logits/chosen": -0.7711437940597534, |
| "logits/rejected": -0.7120383977890015, |
| "logps/chosen": -0.2776485085487366, |
| "logps/ref_chosen": -0.2732302248477936, |
| "logps/ref_rejected": -0.28275376558303833, |
| "logps/rejected": -0.2872280180454254, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -4.418290700414218e-05, |
| "rewards/margins": 5.595823040493997e-07, |
| "rewards/rejected": -4.474248271435499e-05, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.23083088431739246, |
| "grad_norm": 0.00900331698358059, |
| "learning_rate": 4.744245627094858e-07, |
| "logits/chosen": -0.7600359320640564, |
| "logits/rejected": -0.7169309854507446, |
| "logps/chosen": -0.32112687826156616, |
| "logps/ref_chosen": -0.29813823103904724, |
| "logps/ref_rejected": -0.34423020482063293, |
| "logps/rejected": -0.3998780846595764, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00022988635464571416, |
| "rewards/margins": 0.0003265927080065012, |
| "rewards/rejected": -0.0005564790335483849, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.23296820732033127, |
| "grad_norm": 0.01693367399275303, |
| "learning_rate": 4.735943519165842e-07, |
| "logits/chosen": -0.888256311416626, |
| "logits/rejected": -0.94065922498703, |
| "logps/chosen": -0.28589680790901184, |
| "logps/ref_chosen": -0.28242921829223633, |
| "logps/ref_rejected": -0.30649587512016296, |
| "logps/rejected": -0.31532636284828186, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -3.4675955248530954e-05, |
| "rewards/margins": 5.3628842579200864e-05, |
| "rewards/rejected": -8.830477599985898e-05, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.2351055303232701, |
| "grad_norm": 0.026335084810853004, |
| "learning_rate": 4.7275163104709194e-07, |
| "logits/chosen": -1.1528754234313965, |
| "logits/rejected": -1.0477982759475708, |
| "logps/chosen": -0.31197091937065125, |
| "logps/ref_chosen": -0.2945389449596405, |
| "logps/ref_rejected": -0.2867526113986969, |
| "logps/rejected": -0.43472719192504883, |
| "loss": 0.693, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00017431929882150143, |
| "rewards/margins": 0.001305426238104701, |
| "rewards/rejected": -0.0014797456096857786, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.23724285332620892, |
| "grad_norm": 0.007627503015100956, |
| "learning_rate": 4.718964472511385e-07, |
| "logits/chosen": -0.7721959352493286, |
| "logits/rejected": -0.9067158102989197, |
| "logps/chosen": -0.26215583086013794, |
| "logps/ref_chosen": -0.25166669487953186, |
| "logps/ref_rejected": -0.23057280480861664, |
| "logps/rejected": -0.2579672932624817, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.00010489136184332892, |
| "rewards/margins": 0.00016905330994632095, |
| "rewards/rejected": -0.00027394466451369226, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.23938017632914774, |
| "grad_norm": 0.015933306887745857, |
| "learning_rate": 4.710288483761524e-07, |
| "logits/chosen": -0.8158953785896301, |
| "logits/rejected": -0.8428848385810852, |
| "logps/chosen": -0.26737216114997864, |
| "logps/ref_chosen": -0.26372402906417847, |
| "logps/ref_rejected": -0.26279786229133606, |
| "logps/rejected": -0.2817317247390747, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -3.648136407718994e-05, |
| "rewards/margins": 0.00015285737754311413, |
| "rewards/rejected": -0.00018933873798232526, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.24151749933208655, |
| "grad_norm": 0.014662610366940498, |
| "learning_rate": 4.7014888296418447e-07, |
| "logits/chosen": -0.8647807836532593, |
| "logits/rejected": -0.7627796530723572, |
| "logps/chosen": -0.2716052830219269, |
| "logps/ref_chosen": -0.2697606682777405, |
| "logps/ref_rejected": -0.22260704636573792, |
| "logps/rejected": -0.3513369560241699, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.8446266039973125e-05, |
| "rewards/margins": 0.0012688529677689075, |
| "rewards/rejected": -0.0012872989755123854, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.2436548223350254, |
| "grad_norm": 0.009754744358360767, |
| "learning_rate": 4.692566002491916e-07, |
| "logits/chosen": -0.988068163394928, |
| "logits/rejected": -1.0250458717346191, |
| "logps/chosen": -0.27649566531181335, |
| "logps/ref_chosen": -0.2684990167617798, |
| "logps/ref_rejected": -0.30923932790756226, |
| "logps/rejected": -0.3450013995170593, |
| "loss": 0.693, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -7.996657222975045e-05, |
| "rewards/margins": 0.0002776542096398771, |
| "rewards/rejected": -0.0003576207673177123, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.2457921453379642, |
| "grad_norm": 0.011114116758108139, |
| "learning_rate": 4.683520501542824e-07, |
| "logits/chosen": -1.1075717210769653, |
| "logits/rejected": -0.998264729976654, |
| "logps/chosen": -0.2692224979400635, |
| "logps/ref_chosen": -0.2502911686897278, |
| "logps/ref_rejected": -0.22717031836509705, |
| "logps/rejected": -0.23121069371700287, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00018931309750769287, |
| "rewards/margins": -0.00014890941383782774, |
| "rewards/rejected": -4.0403690945822746e-05, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.24792946834090301, |
| "grad_norm": 0.013360762037336826, |
| "learning_rate": 4.6743528328892384e-07, |
| "logits/chosen": -1.106555700302124, |
| "logits/rejected": -1.0377901792526245, |
| "logps/chosen": -0.3115602135658264, |
| "logps/ref_chosen": -0.2948013246059418, |
| "logps/ref_rejected": -0.29901835322380066, |
| "logps/rejected": -0.3053865432739258, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0001675889070611447, |
| "rewards/margins": -0.00010390685929451138, |
| "rewards/rejected": -6.368204776663333e-05, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.25006679134384185, |
| "grad_norm": 0.020221643149852753, |
| "learning_rate": 4.6650635094610966e-07, |
| "logits/chosen": -1.0233954191207886, |
| "logits/rejected": -1.0062767267227173, |
| "logps/chosen": -0.2751001715660095, |
| "logps/ref_chosen": -0.2546394467353821, |
| "logps/ref_rejected": -0.28393858671188354, |
| "logps/rejected": -0.3162187337875366, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0002046076551778242, |
| "rewards/margins": 0.00011819371138699353, |
| "rewards/rejected": -0.00032280138111673295, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.25220411434678064, |
| "grad_norm": 0.009259633719921112, |
| "learning_rate": 4.655653050994906e-07, |
| "logits/chosen": -0.9006461501121521, |
| "logits/rejected": -0.9481255412101746, |
| "logps/chosen": -0.3037335276603699, |
| "logps/ref_chosen": -0.29091787338256836, |
| "logps/ref_rejected": -0.26563721895217896, |
| "logps/rejected": -0.28049764037132263, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0001281569857383147, |
| "rewards/margins": 2.044740176643245e-05, |
| "rewards/rejected": -0.00014860439114272594, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2543414373497195, |
| "grad_norm": 0.015450788661837578, |
| "learning_rate": 4.646121984004665e-07, |
| "logits/chosen": -1.013910174369812, |
| "logits/rejected": -0.9086140394210815, |
| "logps/chosen": -0.2894856929779053, |
| "logps/ref_chosen": -0.2745998799800873, |
| "logps/ref_rejected": -0.2563491761684418, |
| "logps/rejected": -0.2704516351222992, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00014885803102515638, |
| "rewards/margins": -7.8334105637623e-06, |
| "rewards/rejected": -0.00014102461864240468, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2564787603526583, |
| "grad_norm": 0.01930931769311428, |
| "learning_rate": 4.636470841752404e-07, |
| "logits/chosen": -0.8818156719207764, |
| "logits/rejected": -0.8463716506958008, |
| "logps/chosen": -0.24229660630226135, |
| "logps/ref_chosen": -0.22806444764137268, |
| "logps/ref_rejected": -0.30565983057022095, |
| "logps/rejected": -0.32617491483688354, |
| "loss": 0.693, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00014232148532755673, |
| "rewards/margins": 6.282905815169215e-05, |
| "rewards/rejected": -0.00020515054347924888, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2586160833555971, |
| "grad_norm": 0.026815062388777733, |
| "learning_rate": 4.626700164218349e-07, |
| "logits/chosen": -1.1243059635162354, |
| "logits/rejected": -1.1047096252441406, |
| "logps/chosen": -0.3274267017841339, |
| "logps/ref_chosen": -0.2680204212665558, |
| "logps/ref_rejected": -0.3155292272567749, |
| "logps/rejected": -0.39612749218940735, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0005940627306699753, |
| "rewards/margins": 0.00021191948326304555, |
| "rewards/rejected": -0.0008059822139330208, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.26075340635853594, |
| "grad_norm": 0.012854819186031818, |
| "learning_rate": 4.6168104980707103e-07, |
| "logits/chosen": -0.9814713001251221, |
| "logits/rejected": -0.9605292677879333, |
| "logps/chosen": -0.407224178314209, |
| "logps/ref_chosen": -0.2889726758003235, |
| "logps/ref_rejected": -0.2463933527469635, |
| "logps/rejected": -0.3896828889846802, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0011825152905657887, |
| "rewards/margins": 0.00025037972955033183, |
| "rewards/rejected": -0.0014328949619084597, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.26289072936147473, |
| "grad_norm": 0.04550722613930702, |
| "learning_rate": 4.606802396635098e-07, |
| "logits/chosen": -1.080730676651001, |
| "logits/rejected": -1.0739704370498657, |
| "logps/chosen": -0.2883850336074829, |
| "logps/ref_chosen": -0.27717676758766174, |
| "logps/ref_rejected": -0.28564029932022095, |
| "logps/rejected": -0.2910604476928711, |
| "loss": 0.693, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00011208253272343427, |
| "rewards/margins": -5.7881065004039556e-05, |
| "rewards/rejected": -5.4201478633331135e-05, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.26502805236441357, |
| "grad_norm": 0.03749263286590576, |
| "learning_rate": 4.59667641986356e-07, |
| "logits/chosen": -0.9739108085632324, |
| "logits/rejected": -0.989708423614502, |
| "logps/chosen": -0.3094882667064667, |
| "logps/ref_chosen": -0.2941918969154358, |
| "logps/ref_rejected": -0.35692229866981506, |
| "logps/rejected": -0.3986133337020874, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00015296357742045075, |
| "rewards/margins": 0.00026394662563689053, |
| "rewards/rejected": -0.00041691018850542605, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.2671653753673524, |
| "grad_norm": 0.0193325262516737, |
| "learning_rate": 4.5864331343032565e-07, |
| "logits/chosen": -1.0082210302352905, |
| "logits/rejected": -0.9892030358314514, |
| "logps/chosen": -0.5784664750099182, |
| "logps/ref_chosen": -0.21026192605495453, |
| "logps/ref_rejected": -0.23376086354255676, |
| "logps/rejected": -0.5583807826042175, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0036820450332015753, |
| "rewards/margins": -0.00043584543163888156, |
| "rewards/rejected": -0.0032461993396282196, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.2693026983702912, |
| "grad_norm": 0.017557300627231598, |
| "learning_rate": 4.576073113064759e-07, |
| "logits/chosen": -0.9353058338165283, |
| "logits/rejected": -1.0621377229690552, |
| "logps/chosen": -0.2963472008705139, |
| "logps/ref_chosen": -0.2886234521865845, |
| "logps/ref_rejected": -0.31517738103866577, |
| "logps/rejected": -0.36221760511398315, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -7.723772432655096e-05, |
| "rewards/margins": 0.00039316501352004707, |
| "rewards/rejected": -0.00047040273784659803, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.27144002137323003, |
| "grad_norm": 0.036875393241643906, |
| "learning_rate": 4.565596935789987e-07, |
| "logits/chosen": -1.092225432395935, |
| "logits/rejected": -1.0788692235946655, |
| "logps/chosen": -0.33224257826805115, |
| "logps/ref_chosen": -0.3288933336734772, |
| "logps/ref_rejected": -0.30440354347229004, |
| "logps/rejected": -0.4201849102973938, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -3.3492451620986685e-05, |
| "rewards/margins": 0.0011243209009990096, |
| "rewards/rejected": -0.0011578132398426533, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.2735773443761689, |
| "grad_norm": 0.03974121809005737, |
| "learning_rate": 4.555005188619775e-07, |
| "logits/chosen": -0.8580077886581421, |
| "logits/rejected": -0.8540875315666199, |
| "logps/chosen": -0.2515720725059509, |
| "logps/ref_chosen": -0.23396587371826172, |
| "logps/ref_rejected": -0.23645643889904022, |
| "logps/rejected": -0.32672157883644104, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00017606174515094608, |
| "rewards/margins": 0.0007265894091688097, |
| "rewards/rejected": -0.00090265111066401, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.27571466737910766, |
| "grad_norm": 0.018628248944878578, |
| "learning_rate": 4.5442984641610784e-07, |
| "logits/chosen": -1.1526374816894531, |
| "logits/rejected": -1.074135184288025, |
| "logps/chosen": -0.29931825399398804, |
| "logps/ref_chosen": -0.29289674758911133, |
| "logps/ref_rejected": -0.27414625883102417, |
| "logps/rejected": -0.2880631685256958, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -6.421504076570272e-05, |
| "rewards/margins": 7.495423778891563e-05, |
| "rewards/rejected": -0.00013916927855461836, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.2778519903820465, |
| "grad_norm": 0.01343632023781538, |
| "learning_rate": 4.533477361453819e-07, |
| "logits/chosen": -1.0570261478424072, |
| "logits/rejected": -1.1426626443862915, |
| "logps/chosen": -0.33023470640182495, |
| "logps/ref_chosen": -0.28865259885787964, |
| "logps/ref_rejected": -0.3303278684616089, |
| "logps/rejected": -0.37689998745918274, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00041582121048122644, |
| "rewards/margins": 4.989993976778351e-05, |
| "rewards/rejected": -0.0004657211247831583, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.2799893133849853, |
| "grad_norm": 0.02337520569562912, |
| "learning_rate": 4.5225424859373684e-07, |
| "logits/chosen": -0.9973204135894775, |
| "logits/rejected": -0.9954949617385864, |
| "logps/chosen": -0.3498792350292206, |
| "logps/ref_chosen": -0.2964628040790558, |
| "logps/ref_rejected": -0.3011559247970581, |
| "logps/rejected": -0.3670787811279297, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0005341644282452762, |
| "rewards/margins": 0.00012506399070844054, |
| "rewards/rejected": -0.0006592284771613777, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.2821266363879241, |
| "grad_norm": 0.02525736205279827, |
| "learning_rate": 4.511494449416671e-07, |
| "logits/chosen": -0.9119186401367188, |
| "logits/rejected": -0.8400843739509583, |
| "logps/chosen": -0.2865787744522095, |
| "logps/ref_chosen": -0.2424817681312561, |
| "logps/ref_rejected": -0.24128124117851257, |
| "logps/rejected": -0.26016679406166077, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00044097009231336415, |
| "rewards/margins": -0.0002521143760532141, |
| "rewards/rejected": -0.0001888557308120653, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.28426395939086296, |
| "grad_norm": 0.010411817580461502, |
| "learning_rate": 4.500333870028016e-07, |
| "logits/chosen": -1.1000407934188843, |
| "logits/rejected": -1.0965006351470947, |
| "logps/chosen": -0.25650519132614136, |
| "logps/ref_chosen": -0.25444507598876953, |
| "logps/ref_rejected": -0.24687746167182922, |
| "logps/rejected": -0.2626207768917084, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -2.0601235519279726e-05, |
| "rewards/margins": 0.00013683177530765533, |
| "rewards/rejected": -0.00015743299445603043, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.28640128239380175, |
| "grad_norm": 0.03425963968038559, |
| "learning_rate": 4.489061372204452e-07, |
| "logits/chosen": -0.9624871611595154, |
| "logits/rejected": -0.8932892084121704, |
| "logps/chosen": -0.3306514620780945, |
| "logps/ref_chosen": -0.2602233290672302, |
| "logps/ref_rejected": -0.28083372116088867, |
| "logps/rejected": -0.3390673100948334, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0007042812067084014, |
| "rewards/margins": -0.00012194520968478173, |
| "rewards/rejected": -0.0005823359242640436, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.2885386053967406, |
| "grad_norm": 0.03688036650419235, |
| "learning_rate": 4.4776775866408533e-07, |
| "logits/chosen": -1.1184890270233154, |
| "logits/rejected": -1.0113682746887207, |
| "logps/chosen": -0.6618571877479553, |
| "logps/ref_chosen": -0.29553329944610596, |
| "logps/ref_rejected": -0.28761035203933716, |
| "logps/rejected": -0.2959080636501312, |
| "loss": 0.693, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.0036632386036217213, |
| "rewards/margins": -0.003580261953175068, |
| "rewards/rejected": -8.29769269330427e-05, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.29067592839967943, |
| "grad_norm": 0.015697244554758072, |
| "learning_rate": 4.4661831502586244e-07, |
| "logits/chosen": -1.0158777236938477, |
| "logits/rejected": -0.9836152195930481, |
| "logps/chosen": -0.34032636880874634, |
| "logps/ref_chosen": -0.3220231831073761, |
| "logps/ref_rejected": -0.35796070098876953, |
| "logps/rejected": -0.38638126850128174, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00018303179240319878, |
| "rewards/margins": 0.00010117368947248906, |
| "rewards/rejected": -0.0002842055109795183, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.2928132514026182, |
| "grad_norm": 0.021380702033638954, |
| "learning_rate": 4.4545787061700746e-07, |
| "logits/chosen": -1.0023033618927002, |
| "logits/rejected": -0.9718290567398071, |
| "logps/chosen": -0.34509143233299255, |
| "logps/ref_chosen": -0.3240284323692322, |
| "logps/ref_rejected": -0.29177695512771606, |
| "logps/rejected": -0.36937934160232544, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00021063002350274473, |
| "rewards/margins": 0.0005653940606862307, |
| "rewards/rejected": -0.0007760241278447211, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.29495057440555705, |
| "grad_norm": 0.027477525174617767, |
| "learning_rate": 4.442864903642427e-07, |
| "logits/chosen": -0.9923666715621948, |
| "logits/rejected": -1.0324485301971436, |
| "logps/chosen": -0.39349597692489624, |
| "logps/ref_chosen": -0.2541620433330536, |
| "logps/ref_rejected": -0.2515232563018799, |
| "logps/rejected": -0.4745479226112366, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0013933394802734256, |
| "rewards/margins": 0.0008369074203073978, |
| "rewards/rejected": -0.0022302467841655016, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.29708789740849584, |
| "grad_norm": 0.009953165426850319, |
| "learning_rate": 4.4310423980614986e-07, |
| "logits/chosen": -1.0028334856033325, |
| "logits/rejected": -0.9110530018806458, |
| "logps/chosen": -0.4791359603404999, |
| "logps/ref_chosen": -0.2510277032852173, |
| "logps/ref_rejected": -0.2747063934803009, |
| "logps/rejected": -0.3279649615287781, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0022810825612396, |
| "rewards/margins": -0.0017484965501353145, |
| "rewards/rejected": -0.0005325857782736421, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.2992252204114347, |
| "grad_norm": 0.027421485632658005, |
| "learning_rate": 4.4191118508950277e-07, |
| "logits/chosen": -1.0206215381622314, |
| "logits/rejected": -1.0765854120254517, |
| "logps/chosen": -0.3297208547592163, |
| "logps/ref_chosen": -0.32380861043930054, |
| "logps/ref_rejected": -0.3436110317707062, |
| "logps/rejected": -0.3874130845069885, |
| "loss": 0.693, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -5.912266715313308e-05, |
| "rewards/margins": 0.0003788975300267339, |
| "rewards/rejected": -0.0004380202735774219, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3013625434143735, |
| "grad_norm": 0.0135075394064188, |
| "learning_rate": 4.407073929655666e-07, |
| "logits/chosen": -0.9094095230102539, |
| "logits/rejected": -0.9072328209877014, |
| "logps/chosen": -0.399659126996994, |
| "logps/ref_chosen": -0.30758026242256165, |
| "logps/ref_rejected": -0.29937583208084106, |
| "logps/rejected": -0.3797423243522644, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0009207883849740028, |
| "rewards/margins": -0.00011712348350556567, |
| "rewards/rejected": -0.0008036648505367339, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.3034998664173123, |
| "grad_norm": 0.02363797463476658, |
| "learning_rate": 4.394929307863632e-07, |
| "logits/chosen": -1.2020052671432495, |
| "logits/rejected": -1.1480340957641602, |
| "logps/chosen": -0.5407591462135315, |
| "logps/ref_chosen": -0.24277769029140472, |
| "logps/ref_rejected": -0.22730624675750732, |
| "logps/rejected": -0.4076313078403473, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.002979814540594816, |
| "rewards/margins": -0.0011765641393139958, |
| "rewards/rejected": -0.0018032502848654985, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.30563718942025114, |
| "grad_norm": 0.05549626797437668, |
| "learning_rate": 4.3826786650090273e-07, |
| "logits/chosen": -1.1618678569793701, |
| "logits/rejected": -1.1556974649429321, |
| "logps/chosen": -0.3329000473022461, |
| "logps/ref_chosen": -0.3016224503517151, |
| "logps/ref_rejected": -0.27624374628067017, |
| "logps/rejected": -0.6231765747070312, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0003127758973278105, |
| "rewards/margins": 0.003156552091240883, |
| "rewards/rejected": -0.003469327697530389, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.3077745124231899, |
| "grad_norm": 0.02050000987946987, |
| "learning_rate": 4.370322686513817e-07, |
| "logits/chosen": -0.861806333065033, |
| "logits/rejected": -0.8078919649124146, |
| "logps/chosen": -0.2508043348789215, |
| "logps/ref_chosen": -0.2450011521577835, |
| "logps/ref_rejected": -0.2200111299753189, |
| "logps/rejected": -0.23384828865528107, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -5.803192107123323e-05, |
| "rewards/margins": 8.033957419684157e-05, |
| "rewards/rejected": -0.00013837151345796883, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.30991183542612877, |
| "grad_norm": 0.019023990258574486, |
| "learning_rate": 4.357862063693485e-07, |
| "logits/chosen": -1.0277233123779297, |
| "logits/rejected": -1.1270538568496704, |
| "logps/chosen": -0.3179543614387512, |
| "logps/ref_chosen": -0.2770538926124573, |
| "logps/ref_rejected": -0.3283047676086426, |
| "logps/rejected": -0.36043909192085266, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00040900454041548073, |
| "rewards/margins": -8.766178507357836e-05, |
| "rewards/rejected": -0.0003213427553419024, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.3120491584290676, |
| "grad_norm": 0.008028342388570309, |
| "learning_rate": 4.345297493718352e-07, |
| "logits/chosen": -0.9767619967460632, |
| "logits/rejected": -0.9226668477058411, |
| "logps/chosen": -0.835956871509552, |
| "logps/ref_chosen": -0.23475220799446106, |
| "logps/ref_rejected": -0.27067989110946655, |
| "logps/rejected": -0.7674523591995239, |
| "loss": 0.693, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.006012047175318003, |
| "rewards/margins": -0.0010443233186379075, |
| "rewards/rejected": -0.004967723973095417, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.3141864814320064, |
| "grad_norm": 0.016957726329565048, |
| "learning_rate": 4.332629679574565e-07, |
| "logits/chosen": -0.7966890335083008, |
| "logits/rejected": -0.8765348196029663, |
| "logps/chosen": -0.2578888535499573, |
| "logps/ref_chosen": -0.23090001940727234, |
| "logps/ref_rejected": -0.25391247868537903, |
| "logps/rejected": -0.32662370800971985, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0002698881144169718, |
| "rewards/margins": 0.00045722417416982353, |
| "rewards/rejected": -0.0007271122885867953, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.31632380443494523, |
| "grad_norm": 0.029666118323802948, |
| "learning_rate": 4.319859330024777e-07, |
| "logits/chosen": -0.9762994050979614, |
| "logits/rejected": -0.901314377784729, |
| "logps/chosen": -0.35421884059906006, |
| "logps/ref_chosen": -0.27796152234077454, |
| "logps/ref_rejected": -0.29037755727767944, |
| "logps/rejected": -0.4447952210903168, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0007625732105225325, |
| "rewards/margins": 0.0007816030411049724, |
| "rewards/rejected": -0.0015441762516275048, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.3184611274378841, |
| "grad_norm": 0.017698727548122406, |
| "learning_rate": 4.3069871595684787e-07, |
| "logits/chosen": -1.035744309425354, |
| "logits/rejected": -1.1764036417007446, |
| "logps/chosen": -0.4736050069332123, |
| "logps/ref_chosen": -0.27382147312164307, |
| "logps/ref_rejected": -0.3068073093891144, |
| "logps/rejected": -0.7743459939956665, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0019978354685008526, |
| "rewards/margins": 0.002677551005035639, |
| "rewards/rejected": -0.004675386939197779, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.32059845044082286, |
| "grad_norm": 0.010226859711110592, |
| "learning_rate": 4.294013888402029e-07, |
| "logits/chosen": -1.1033796072006226, |
| "logits/rejected": -1.0082882642745972, |
| "logps/chosen": -0.376055508852005, |
| "logps/ref_chosen": -0.24129143357276917, |
| "logps/ref_rejected": -0.24431535601615906, |
| "logps/rejected": -0.38209035992622375, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0013476409949362278, |
| "rewards/margins": 3.0109060389804654e-05, |
| "rewards/rejected": -0.0013777499552816153, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.3227357734437617, |
| "grad_norm": 0.02018311433494091, |
| "learning_rate": 4.280940242378362e-07, |
| "logits/chosen": -0.9941385984420776, |
| "logits/rejected": -1.0266544818878174, |
| "logps/chosen": -0.3374132215976715, |
| "logps/ref_chosen": -0.24274253845214844, |
| "logps/ref_rejected": -0.27078789472579956, |
| "logps/rejected": -0.6679689288139343, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0009467067429795861, |
| "rewards/margins": 0.003025103360414505, |
| "rewards/rejected": -0.003971809521317482, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.3248730964467005, |
| "grad_norm": 0.015647241845726967, |
| "learning_rate": 4.2677669529663686e-07, |
| "logits/chosen": -1.0592918395996094, |
| "logits/rejected": -1.0191495418548584, |
| "logps/chosen": -0.294866681098938, |
| "logps/ref_chosen": -0.2779861390590668, |
| "logps/ref_rejected": -0.2506893575191498, |
| "logps/rejected": -0.3610212504863739, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0001688053598627448, |
| "rewards/margins": 0.0009345137514173985, |
| "rewards/rejected": -0.0011033191112801433, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.3270104194496393, |
| "grad_norm": 0.014247226528823376, |
| "learning_rate": 4.254494757209979e-07, |
| "logits/chosen": -1.0556178092956543, |
| "logits/rejected": -0.877293586730957, |
| "logps/chosen": -0.2874278724193573, |
| "logps/ref_chosen": -0.24776652455329895, |
| "logps/ref_rejected": -0.27221953868865967, |
| "logps/rejected": -0.3547138571739197, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00039661358459852636, |
| "rewards/margins": 0.0004283295711502433, |
| "rewards/rejected": -0.0008249431848526001, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.32914774245257816, |
| "grad_norm": 0.03036138229072094, |
| "learning_rate": 4.2411243976869173e-07, |
| "logits/chosen": -1.141327142715454, |
| "logits/rejected": -1.1449819803237915, |
| "logps/chosen": -0.3541218340396881, |
| "logps/ref_chosen": -0.2876412868499756, |
| "logps/ref_rejected": -0.31285402178764343, |
| "logps/rejected": -0.40836772322654724, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.000664805353153497, |
| "rewards/margins": 0.00029033157625235617, |
| "rewards/rejected": -0.0009551369585096836, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.33128506545551695, |
| "grad_norm": 0.023645518347620964, |
| "learning_rate": 4.227656622467162e-07, |
| "logits/chosen": -1.0294756889343262, |
| "logits/rejected": -1.008141040802002, |
| "logps/chosen": -0.6693433523178101, |
| "logps/ref_chosen": -0.2722676694393158, |
| "logps/ref_rejected": -0.3089941143989563, |
| "logps/rejected": -0.6721625924110413, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.003970756661146879, |
| "rewards/margins": -0.00033907213946804404, |
| "rewards/rejected": -0.0036316844634711742, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.3334223884584558, |
| "grad_norm": 0.04189411178231239, |
| "learning_rate": 4.2140921850710855e-07, |
| "logits/chosen": -1.17415452003479, |
| "logits/rejected": -1.1698453426361084, |
| "logps/chosen": -0.3035968244075775, |
| "logps/ref_chosen": -0.2622109651565552, |
| "logps/ref_rejected": -0.2861221432685852, |
| "logps/rejected": -0.3508266806602478, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0004138582444284111, |
| "rewards/margins": 0.0002331873110961169, |
| "rewards/rejected": -0.000647045555524528, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.3355597114613946, |
| "grad_norm": 0.07063852995634079, |
| "learning_rate": 4.200431844427298e-07, |
| "logits/chosen": -1.0624470710754395, |
| "logits/rejected": -1.1391193866729736, |
| "logps/chosen": -0.630163311958313, |
| "logps/ref_chosen": -0.2261686623096466, |
| "logps/ref_rejected": -0.28651681542396545, |
| "logps/rejected": -1.0216408967971802, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004039946012198925, |
| "rewards/margins": 0.0033112945966422558, |
| "rewards/rejected": -0.007351240608841181, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.3376970344643334, |
| "grad_norm": 0.014123285189270973, |
| "learning_rate": 4.186676364830186e-07, |
| "logits/chosen": -0.8795930743217468, |
| "logits/rejected": -0.9772742986679077, |
| "logps/chosen": -0.6658599972724915, |
| "logps/ref_chosen": -0.25330570340156555, |
| "logps/ref_rejected": -0.23623043298721313, |
| "logps/rejected": -0.8036448955535889, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.004125543404370546, |
| "rewards/margins": 0.001548600965179503, |
| "rewards/rejected": -0.00567414378747344, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.33983435746727225, |
| "grad_norm": 0.024288849905133247, |
| "learning_rate": 4.172826515897145e-07, |
| "logits/chosen": -1.0424729585647583, |
| "logits/rejected": -0.975665807723999, |
| "logps/chosen": -0.3315909206867218, |
| "logps/ref_chosen": -0.22897037863731384, |
| "logps/ref_rejected": -0.24478110671043396, |
| "logps/rejected": -0.2822144329547882, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.001026205369271338, |
| "rewards/margins": -0.0006518723093904555, |
| "rewards/rejected": -0.00037433323450386524, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.34197168047021104, |
| "grad_norm": 0.026169564574956894, |
| "learning_rate": 4.158883072525528e-07, |
| "logits/chosen": -1.23733651638031, |
| "logits/rejected": -1.0885720252990723, |
| "logps/chosen": -0.2606554329395294, |
| "logps/ref_chosen": -0.22942125797271729, |
| "logps/ref_rejected": -0.2259996384382248, |
| "logps/rejected": -0.2616400420665741, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.00031234187190420926, |
| "rewards/margins": 4.4062137021683156e-05, |
| "rewards/rejected": -0.0003564039943739772, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.3441090034731499, |
| "grad_norm": 0.053732700645923615, |
| "learning_rate": 4.1448468148492814e-07, |
| "logits/chosen": -1.1118426322937012, |
| "logits/rejected": -1.0801961421966553, |
| "logps/chosen": -0.6751722693443298, |
| "logps/ref_chosen": -0.3340597152709961, |
| "logps/ref_rejected": -0.2785757780075073, |
| "logps/rejected": -0.5933806896209717, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.003411125158891082, |
| "rewards/margins": -0.0002630760718602687, |
| "rewards/rejected": -0.0031480491161346436, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.3462463264760887, |
| "grad_norm": 0.01546022854745388, |
| "learning_rate": 4.130718528195303e-07, |
| "logits/chosen": -1.0428024530410767, |
| "logits/rejected": -1.0006818771362305, |
| "logps/chosen": -0.29387784004211426, |
| "logps/ref_chosen": -0.27180030941963196, |
| "logps/ref_rejected": -0.2495374083518982, |
| "logps/rejected": -0.2741365432739258, |
| "loss": 0.693, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00022077532776165754, |
| "rewards/margins": 2.5216031644959003e-05, |
| "rewards/rejected": -0.0002459913957864046, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.3483836494790275, |
| "grad_norm": 0.02111038565635681, |
| "learning_rate": 4.1164990030394985e-07, |
| "logits/chosen": -1.1173827648162842, |
| "logits/rejected": -1.0576171875, |
| "logps/chosen": -0.3242610692977905, |
| "logps/ref_chosen": -0.3046630918979645, |
| "logps/ref_rejected": -0.30778732895851135, |
| "logps/rejected": -0.32636556029319763, |
| "loss": 0.693, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00019597975187934935, |
| "rewards/margins": -1.0197368283115793e-05, |
| "rewards/rejected": -0.00018578238086774945, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.35052097248196634, |
| "grad_norm": 0.010406052693724632, |
| "learning_rate": 4.10218903496256e-07, |
| "logits/chosen": -1.1846860647201538, |
| "logits/rejected": -1.0826698541641235, |
| "logps/chosen": -0.36644065380096436, |
| "logps/ref_chosen": -0.2922857701778412, |
| "logps/ref_rejected": -0.2719643712043762, |
| "logps/rejected": -0.3597840368747711, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0007415488362312317, |
| "rewards/margins": 0.00013664780999533832, |
| "rewards/rejected": -0.0008781967335380614, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.3526582954849052, |
| "grad_norm": 0.024618519470095634, |
| "learning_rate": 4.087789424605447e-07, |
| "logits/chosen": -1.148983120918274, |
| "logits/rejected": -1.0648589134216309, |
| "logps/chosen": -0.683175802230835, |
| "logps/ref_chosen": -0.2404506951570511, |
| "logps/ref_rejected": -0.2600495219230652, |
| "logps/rejected": -0.5998008847236633, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004427251406013966, |
| "rewards/margins": -0.001029736828058958, |
| "rewards/rejected": -0.0033975141122937202, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.35479561848784397, |
| "grad_norm": 0.043775226920843124, |
| "learning_rate": 4.0733009776245937e-07, |
| "logits/chosen": -1.094302773475647, |
| "logits/rejected": -1.1364027261734009, |
| "logps/chosen": -0.6535714864730835, |
| "logps/ref_chosen": -0.3250899612903595, |
| "logps/ref_rejected": -0.3116614520549774, |
| "logps/rejected": -0.8002303838729858, |
| "loss": 0.693, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.003284815000370145, |
| "rewards/margins": 0.0016008741222321987, |
| "rewards/rejected": -0.0048856888897717, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.3569329414907828, |
| "grad_norm": 0.0208792332559824, |
| "learning_rate": 4.058724504646834e-07, |
| "logits/chosen": -1.0164306163787842, |
| "logits/rejected": -0.9673044681549072, |
| "logps/chosen": -0.2921302616596222, |
| "logps/ref_chosen": -0.2629692852497101, |
| "logps/ref_rejected": -0.28514453768730164, |
| "logps/rejected": -0.5996126532554626, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00029161013662815094, |
| "rewards/margins": 0.0028530708514153957, |
| "rewards/rejected": -0.0031446809880435467, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.3590702644937216, |
| "grad_norm": 0.02304687164723873, |
| "learning_rate": 4.0440608212240445e-07, |
| "logits/chosen": -1.1392021179199219, |
| "logits/rejected": -1.1934229135513306, |
| "logps/chosen": -0.6687176823616028, |
| "logps/ref_chosen": -0.2801383435726166, |
| "logps/ref_rejected": -0.3042837679386139, |
| "logps/rejected": -0.6369175314903259, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.003885793499648571, |
| "rewards/margins": -0.0005594560643658042, |
| "rewards/rejected": -0.0033263377845287323, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.36120758749666043, |
| "grad_norm": 0.013050371780991554, |
| "learning_rate": 4.0293107477875156e-07, |
| "logits/chosen": -0.9949243068695068, |
| "logits/rejected": -1.0055217742919922, |
| "logps/chosen": -0.5803820490837097, |
| "logps/ref_chosen": -0.22309988737106323, |
| "logps/ref_rejected": -0.23219947516918182, |
| "logps/rejected": -0.6310278177261353, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0035728211514651775, |
| "rewards/margins": 0.0004154616908635944, |
| "rewards/rejected": -0.003988282755017281, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.36334491049959927, |
| "grad_norm": 0.014827185310423374, |
| "learning_rate": 4.0144751096020497e-07, |
| "logits/chosen": -1.1596235036849976, |
| "logits/rejected": -1.0978240966796875, |
| "logps/chosen": -0.3899872303009033, |
| "logps/ref_chosen": -0.26081281900405884, |
| "logps/ref_rejected": -0.2669735848903656, |
| "logps/rejected": -0.5403264164924622, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.001291744178161025, |
| "rewards/margins": 0.0014417838538065553, |
| "rewards/rejected": -0.0027335279155522585, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.36548223350253806, |
| "grad_norm": 0.02997681125998497, |
| "learning_rate": 3.999554736719785e-07, |
| "logits/chosen": -1.2055219411849976, |
| "logits/rejected": -1.2811046838760376, |
| "logps/chosen": -0.38567066192626953, |
| "logps/ref_chosen": -0.3038882613182068, |
| "logps/ref_rejected": -0.2914700508117676, |
| "logps/rejected": -0.8252891302108765, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0008178239222615957, |
| "rewards/margins": 0.004520366434007883, |
| "rewards/rejected": -0.0053381905891001225, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.3676195565054769, |
| "grad_norm": 0.021614963188767433, |
| "learning_rate": 3.9845504639337535e-07, |
| "logits/chosen": -1.283888339996338, |
| "logits/rejected": -1.2305909395217896, |
| "logps/chosen": -0.8894591331481934, |
| "logps/ref_chosen": -0.2724803388118744, |
| "logps/ref_rejected": -0.2576662600040436, |
| "logps/rejected": -0.6641738414764404, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00616978807374835, |
| "rewards/margins": -0.0021047131158411503, |
| "rewards/rejected": -0.0040650749579072, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.36975687950841574, |
| "grad_norm": 0.02335093915462494, |
| "learning_rate": 3.9694631307311825e-07, |
| "logits/chosen": -0.9126233458518982, |
| "logits/rejected": -0.8909568190574646, |
| "logps/chosen": -0.47398024797439575, |
| "logps/ref_chosen": -0.25330206751823425, |
| "logps/ref_rejected": -0.27004683017730713, |
| "logps/rejected": -0.5816404819488525, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00220678118057549, |
| "rewards/margins": 0.0009091557585634291, |
| "rewards/rejected": -0.003115937113761902, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.3718942025113545, |
| "grad_norm": 0.027051478624343872, |
| "learning_rate": 3.954293581246514e-07, |
| "logits/chosen": -1.12137770652771, |
| "logits/rejected": -1.0642956495285034, |
| "logps/chosen": -0.2977723181247711, |
| "logps/ref_chosen": -0.2913084328174591, |
| "logps/ref_rejected": -0.31117933988571167, |
| "logps/rejected": -0.31624263525009155, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -6.463862519012764e-05, |
| "rewards/margins": -1.4005563571117818e-05, |
| "rewards/rejected": -5.06330507050734e-05, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.37403152551429336, |
| "grad_norm": 0.06583097577095032, |
| "learning_rate": 3.939042664214184e-07, |
| "logits/chosen": -1.0775827169418335, |
| "logits/rejected": -1.1728906631469727, |
| "logps/chosen": -0.2893608808517456, |
| "logps/ref_chosen": -0.2633807957172394, |
| "logps/ref_rejected": -0.28956925868988037, |
| "logps/rejected": -0.3528980314731598, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00025980063946917653, |
| "rewards/margins": 0.0003734871279448271, |
| "rewards/rejected": -0.0006332877674140036, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.37616884851723215, |
| "grad_norm": 0.02211063914000988, |
| "learning_rate": 3.92371123292113e-07, |
| "logits/chosen": -1.20661199092865, |
| "logits/rejected": -1.2675715684890747, |
| "logps/chosen": -0.3236720860004425, |
| "logps/ref_chosen": -0.2942003905773163, |
| "logps/ref_rejected": -0.3065783977508545, |
| "logps/rejected": -0.36700552701950073, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00029471714515239, |
| "rewards/margins": 0.00030955387046560645, |
| "rewards/rejected": -0.0006042710156179965, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.378306171520171, |
| "grad_norm": 0.016130387783050537, |
| "learning_rate": 3.908300145159055e-07, |
| "logits/chosen": -1.1031980514526367, |
| "logits/rejected": -1.141540288925171, |
| "logps/chosen": -0.38062310218811035, |
| "logps/ref_chosen": -0.2874283790588379, |
| "logps/ref_rejected": -0.29828718304634094, |
| "logps/rejected": -0.38643717765808105, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0009319473174400628, |
| "rewards/margins": -5.044708086643368e-05, |
| "rewards/rejected": -0.0008815001929178834, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3804434945231098, |
| "grad_norm": 0.027748990803956985, |
| "learning_rate": 3.8928102631764304e-07, |
| "logits/chosen": -1.1433825492858887, |
| "logits/rejected": -1.2078571319580078, |
| "logps/chosen": -0.9717259407043457, |
| "logps/ref_chosen": -0.2695179879665375, |
| "logps/ref_rejected": -0.30629223585128784, |
| "logps/rejected": -0.9882835149765015, |
| "loss": 0.693, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.007022079546004534, |
| "rewards/margins": -0.00020216713892295957, |
| "rewards/rejected": -0.006819912698119879, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3825808175260486, |
| "grad_norm": 0.02706519514322281, |
| "learning_rate": 3.877242453630256e-07, |
| "logits/chosen": -1.319815754890442, |
| "logits/rejected": -1.1805726289749146, |
| "logps/chosen": -0.5542667508125305, |
| "logps/ref_chosen": -0.32056325674057007, |
| "logps/ref_rejected": -0.3086071014404297, |
| "logps/rejected": -0.6392649412155151, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.002337035024538636, |
| "rewards/margins": 0.0009695428889244795, |
| "rewards/rejected": -0.0033065781462937593, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.38471814052898745, |
| "grad_norm": 0.03842969611287117, |
| "learning_rate": 3.8615975875375676e-07, |
| "logits/chosen": -1.0457684993743896, |
| "logits/rejected": -1.0183640718460083, |
| "logps/chosen": -0.6278855204582214, |
| "logps/ref_chosen": -0.2530253529548645, |
| "logps/ref_rejected": -0.2655572295188904, |
| "logps/rejected": -0.6273213028907776, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.003748601768165827, |
| "rewards/margins": -0.00013096083421260118, |
| "rewards/rejected": -0.00361764058470726, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.38685546353192624, |
| "grad_norm": 0.02088579721748829, |
| "learning_rate": 3.8458765402267056e-07, |
| "logits/chosen": -1.0092014074325562, |
| "logits/rejected": -1.025460958480835, |
| "logps/chosen": -0.6726264953613281, |
| "logps/ref_chosen": -0.25349050760269165, |
| "logps/ref_rejected": -0.27492228150367737, |
| "logps/rejected": -0.6149729490280151, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.004191359039396048, |
| "rewards/margins": -0.0007908523548394442, |
| "rewards/rejected": -0.0034005064517259598, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.3889927865348651, |
| "grad_norm": 0.024651281535625458, |
| "learning_rate": 3.8300801912883414e-07, |
| "logits/chosen": -1.1568214893341064, |
| "logits/rejected": -1.0937024354934692, |
| "logps/chosen": -0.2788527011871338, |
| "logps/ref_chosen": -0.2565329670906067, |
| "logps/ref_rejected": -0.2679063677787781, |
| "logps/rejected": -0.32140401005744934, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00022319731942843646, |
| "rewards/margins": 0.00031177912023849785, |
| "rewards/rejected": -0.0005349764251150191, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.3911301095378039, |
| "grad_norm": 0.03466137871146202, |
| "learning_rate": 3.8142094245262615e-07, |
| "logits/chosen": -1.245633840560913, |
| "logits/rejected": -1.1368135213851929, |
| "logps/chosen": -0.4290107488632202, |
| "logps/ref_chosen": -0.31136584281921387, |
| "logps/ref_rejected": -0.30961117148399353, |
| "logps/rejected": -0.3617403209209442, |
| "loss": 0.693, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.0011764491209760308, |
| "rewards/margins": -0.0006551574915647507, |
| "rewards/rejected": -0.0005212915712036192, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.3932674325407427, |
| "grad_norm": 0.019514210522174835, |
| "learning_rate": 3.7982651279079227e-07, |
| "logits/chosen": -1.3746901750564575, |
| "logits/rejected": -1.3795042037963867, |
| "logps/chosen": -0.6590787768363953, |
| "logps/ref_chosen": -0.2669219672679901, |
| "logps/ref_rejected": -0.25628024339675903, |
| "logps/rejected": -0.6677826642990112, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.003921568393707275, |
| "rewards/margins": 0.0001934557658387348, |
| "rewards/rejected": -0.0041150241158902645, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.39540475554368154, |
| "grad_norm": 0.024666976183652878, |
| "learning_rate": 3.7822481935147655e-07, |
| "logits/chosen": -1.158657193183899, |
| "logits/rejected": -1.146610975265503, |
| "logps/chosen": -0.5707715749740601, |
| "logps/ref_chosen": -0.2338837832212448, |
| "logps/ref_rejected": -0.3174756169319153, |
| "logps/rejected": -0.7884472012519836, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0033688778057694435, |
| "rewards/margins": 0.0013408383820205927, |
| "rewards/rejected": -0.0047097159549593925, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.3975420785466204, |
| "grad_norm": 0.030332472175359726, |
| "learning_rate": 3.766159517492307e-07, |
| "logits/chosen": -1.1721456050872803, |
| "logits/rejected": -1.257125973701477, |
| "logps/chosen": -1.1888747215270996, |
| "logps/ref_chosen": -0.26278063654899597, |
| "logps/ref_rejected": -0.3681086599826813, |
| "logps/rejected": -1.0927274227142334, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.009260939434170723, |
| "rewards/margins": -0.002014751546084881, |
| "rewards/rejected": -0.007246187888085842, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.39967940154955917, |
| "grad_norm": 0.049674175679683685, |
| "learning_rate": 3.75e-07, |
| "logits/chosen": -1.1332268714904785, |
| "logits/rejected": -1.0856448411941528, |
| "logps/chosen": -0.3653082549571991, |
| "logps/ref_chosen": -0.27847468852996826, |
| "logps/ref_rejected": -0.2892729640007019, |
| "logps/rejected": -0.544154167175293, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0008683353662490845, |
| "rewards/margins": 0.0016804764745756984, |
| "rewards/rejected": -0.0025488114915788174, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.401816724552498, |
| "grad_norm": 0.017677990719676018, |
| "learning_rate": 3.7337705451608667e-07, |
| "logits/chosen": -1.2727012634277344, |
| "logits/rejected": -1.2380880117416382, |
| "logps/chosen": -0.36608850955963135, |
| "logps/ref_chosen": -0.2538753151893616, |
| "logps/ref_rejected": -0.25109949707984924, |
| "logps/rejected": -0.3793545663356781, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0011221321765333414, |
| "rewards/margins": 0.00016041852359194309, |
| "rewards/rejected": -0.0012825505109503865, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.4039540475554368, |
| "grad_norm": 0.02966160885989666, |
| "learning_rate": 3.717472061010918e-07, |
| "logits/chosen": -1.2182228565216064, |
| "logits/rejected": -1.1859164237976074, |
| "logps/chosen": -0.3903014063835144, |
| "logps/ref_chosen": -0.2924586832523346, |
| "logps/ref_rejected": -0.32196229696273804, |
| "logps/rejected": -0.6229968667030334, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0009784273570403457, |
| "rewards/margins": 0.0020319183822721243, |
| "rewards/rejected": -0.003010345855727792, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.40609137055837563, |
| "grad_norm": 0.025880886241793633, |
| "learning_rate": 3.7011054594483443e-07, |
| "logits/chosen": -1.229770541191101, |
| "logits/rejected": -1.1585888862609863, |
| "logps/chosen": -0.576205313205719, |
| "logps/ref_chosen": -0.2521926164627075, |
| "logps/ref_rejected": -0.26658016443252563, |
| "logps/rejected": -0.48973771929740906, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0032401273492723703, |
| "rewards/margins": -0.0010085518006235361, |
| "rewards/rejected": -0.002231575781479478, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.40822869356131447, |
| "grad_norm": 0.015824053436517715, |
| "learning_rate": 3.6846716561824967e-07, |
| "logits/chosen": -0.9254693984985352, |
| "logits/rejected": -1.0834063291549683, |
| "logps/chosen": -0.866976797580719, |
| "logps/ref_chosen": -0.28472232818603516, |
| "logps/ref_rejected": -0.32623252272605896, |
| "logps/rejected": -1.1461207866668701, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005822545848786831, |
| "rewards/margins": 0.0023763373028486967, |
| "rewards/rejected": -0.008198883384466171, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.41036601656425326, |
| "grad_norm": 0.026949137449264526, |
| "learning_rate": 3.668171570682655e-07, |
| "logits/chosen": -1.1005884408950806, |
| "logits/rejected": -1.1082618236541748, |
| "logps/chosen": -0.5736689567565918, |
| "logps/ref_chosen": -0.2761554718017578, |
| "logps/ref_rejected": -0.31166940927505493, |
| "logps/rejected": -0.5958366990089417, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0029751351103186607, |
| "rewards/margins": -0.0001334618718829006, |
| "rewards/rejected": -0.0028416733257472515, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.4125033395671921, |
| "grad_norm": 0.036559008061885834, |
| "learning_rate": 3.6516061261265805e-07, |
| "logits/chosen": -1.165940523147583, |
| "logits/rejected": -1.0535883903503418, |
| "logps/chosen": -0.3698316514492035, |
| "logps/ref_chosen": -0.30925309658050537, |
| "logps/ref_rejected": -0.2624213695526123, |
| "logps/rejected": -0.4968312382698059, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0006057855789549649, |
| "rewards/margins": 0.0017383128870278597, |
| "rewards/rejected": -0.0023440984077751637, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.41464066257013094, |
| "grad_norm": 0.03536602854728699, |
| "learning_rate": 3.634976249348867e-07, |
| "logits/chosen": -1.2258124351501465, |
| "logits/rejected": -1.1232020854949951, |
| "logps/chosen": -0.6192460656166077, |
| "logps/ref_chosen": -0.24589139223098755, |
| "logps/ref_rejected": -0.2646823227405548, |
| "logps/rejected": -0.6642224192619324, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.003733546007424593, |
| "rewards/margins": 0.00026185475871898234, |
| "rewards/rejected": -0.003995400387793779, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.4167779855730697, |
| "grad_norm": 0.041056230664253235, |
| "learning_rate": 3.618282870789081e-07, |
| "logits/chosen": -1.1553905010223389, |
| "logits/rejected": -1.1440675258636475, |
| "logps/chosen": -0.5466388463973999, |
| "logps/ref_chosen": -0.3578364849090576, |
| "logps/ref_rejected": -0.32579946517944336, |
| "logps/rejected": -0.5175646543502808, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0018880233401432633, |
| "rewards/margins": 2.9628972697537392e-05, |
| "rewards/rejected": -0.0019176523201167583, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.41891530857600856, |
| "grad_norm": 0.01535941381007433, |
| "learning_rate": 3.601526924439709e-07, |
| "logits/chosen": -1.1408246755599976, |
| "logits/rejected": -1.175426721572876, |
| "logps/chosen": -0.3166300058364868, |
| "logps/ref_chosen": -0.2899530231952667, |
| "logps/ref_rejected": -0.28827494382858276, |
| "logps/rejected": -0.34230712056159973, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00026676978450268507, |
| "rewards/margins": 0.0002735524612944573, |
| "rewards/rejected": -0.0005403222166933119, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 0.034932032227516174, |
| "learning_rate": 3.584709347793895e-07, |
| "logits/chosen": -0.9613904356956482, |
| "logits/rejected": -0.9686570763587952, |
| "logps/chosen": -0.2962719202041626, |
| "logps/ref_chosen": -0.2704833745956421, |
| "logps/ref_rejected": -0.2422659695148468, |
| "logps/rejected": -0.3245784640312195, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0002578851126600057, |
| "rewards/margins": 0.0005652394029311836, |
| "rewards/rejected": -0.0008231244864873588, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.4231899545818862, |
| "grad_norm": 0.03452929109334946, |
| "learning_rate": 3.567831081792992e-07, |
| "logits/chosen": -1.1398407220840454, |
| "logits/rejected": -1.140756607055664, |
| "logps/chosen": -0.3520175814628601, |
| "logps/ref_chosen": -0.30560025572776794, |
| "logps/ref_rejected": -0.34870997071266174, |
| "logps/rejected": -0.8857642412185669, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00046417320845648646, |
| "rewards/margins": 0.004906368907541037, |
| "rewards/rejected": -0.005370542407035828, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.425327277584825, |
| "grad_norm": 0.030055655166506767, |
| "learning_rate": 3.550893070773914e-07, |
| "logits/chosen": -1.1951026916503906, |
| "logits/rejected": -1.1337119340896606, |
| "logps/chosen": -0.6615334749221802, |
| "logps/ref_chosen": -0.30640166997909546, |
| "logps/ref_rejected": -0.2820543646812439, |
| "logps/rejected": -0.6949758529663086, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.003551317611709237, |
| "rewards/margins": 0.0005778972990810871, |
| "rewards/rejected": -0.0041292146779596806, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.4274646005877638, |
| "grad_norm": 0.03218178078532219, |
| "learning_rate": 3.5338962624163016e-07, |
| "logits/chosen": -1.2598803043365479, |
| "logits/rejected": -1.239607572555542, |
| "logps/chosen": -0.30639007687568665, |
| "logps/ref_chosen": -0.2894829511642456, |
| "logps/ref_rejected": -0.3109118938446045, |
| "logps/rejected": -0.39302414655685425, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00016907168901525438, |
| "rewards/margins": 0.0006520507158711553, |
| "rewards/rejected": -0.0008211223757825792, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.42960192359070265, |
| "grad_norm": 0.016467725858092308, |
| "learning_rate": 3.516841607689501e-07, |
| "logits/chosen": -1.33991539478302, |
| "logits/rejected": -1.2228336334228516, |
| "logps/chosen": -0.37723565101623535, |
| "logps/ref_chosen": -0.3063179552555084, |
| "logps/ref_rejected": -0.28416600823402405, |
| "logps/rejected": -0.419727623462677, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0007091772858984768, |
| "rewards/margins": 0.000646438857074827, |
| "rewards/rejected": -0.0013556161429733038, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.4317392465936415, |
| "grad_norm": 0.03939535841345787, |
| "learning_rate": 3.499730060799352e-07, |
| "logits/chosen": -1.32651686668396, |
| "logits/rejected": -1.2822085618972778, |
| "logps/chosen": -0.3218104839324951, |
| "logps/ref_chosen": -0.2605050802230835, |
| "logps/ref_rejected": -0.29910099506378174, |
| "logps/rejected": -0.44259363412857056, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0006130539695732296, |
| "rewards/margins": 0.0008218720904551446, |
| "rewards/rejected": -0.0014349260600283742, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.4338765695965803, |
| "grad_norm": 0.016224516555666924, |
| "learning_rate": 3.482562579134809e-07, |
| "logits/chosen": -1.066400408744812, |
| "logits/rejected": -1.1194244623184204, |
| "logps/chosen": -0.3738459348678589, |
| "logps/ref_chosen": -0.29206493496894836, |
| "logps/ref_rejected": -0.2677422761917114, |
| "logps/rejected": -0.3497282564640045, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0008178100688382983, |
| "rewards/margins": 2.049633621936664e-06, |
| "rewards/rejected": -0.0008198596769943833, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.4360138925995191, |
| "grad_norm": 0.02971070446074009, |
| "learning_rate": 3.465340123214365e-07, |
| "logits/chosen": -1.1213406324386597, |
| "logits/rejected": -1.107667326927185, |
| "logps/chosen": -0.8933329582214355, |
| "logps/ref_chosen": -0.24884973466396332, |
| "logps/ref_rejected": -0.3260307312011719, |
| "logps/rejected": -1.1893765926361084, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.006444831378757954, |
| "rewards/margins": 0.002188627142459154, |
| "rewards/rejected": -0.008633458986878395, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.4381512156024579, |
| "grad_norm": 0.030559560284018517, |
| "learning_rate": 3.448063656632321e-07, |
| "logits/chosen": -1.253186583518982, |
| "logits/rejected": -1.161086082458496, |
| "logps/chosen": -0.3733241558074951, |
| "logps/ref_chosen": -0.29920512437820435, |
| "logps/ref_rejected": -0.3016617000102997, |
| "logps/rejected": -0.387860506772995, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0007411899860017002, |
| "rewards/margins": 0.00012079813313903287, |
| "rewards/rejected": -0.0008619881118647754, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.44028853860539674, |
| "grad_norm": 0.02169407345354557, |
| "learning_rate": 3.430734146004863e-07, |
| "logits/chosen": -1.274543285369873, |
| "logits/rejected": -1.144378900527954, |
| "logps/chosen": -0.6135156154632568, |
| "logps/ref_chosen": -0.22357094287872314, |
| "logps/ref_rejected": -0.21923862397670746, |
| "logps/rejected": -0.5741630792617798, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.003899447387084365, |
| "rewards/margins": -0.0003502030158415437, |
| "rewards/rejected": -0.0035492442548274994, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.4424258616083356, |
| "grad_norm": 0.014234034344553947, |
| "learning_rate": 3.413352560915988e-07, |
| "logits/chosen": -1.149268627166748, |
| "logits/rejected": -1.123884916305542, |
| "logps/chosen": -0.44670218229293823, |
| "logps/ref_chosen": -0.31560367345809937, |
| "logps/ref_rejected": -0.30974385142326355, |
| "logps/rejected": -0.5978606939315796, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0013109849533066154, |
| "rewards/margins": 0.001570183434523642, |
| "rewards/rejected": -0.00288116792216897, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.44456318461127436, |
| "grad_norm": 0.058319851756095886, |
| "learning_rate": 3.39591987386325e-07, |
| "logits/chosen": -1.1480997800827026, |
| "logits/rejected": -1.0918411016464233, |
| "logps/chosen": -0.3612717092037201, |
| "logps/ref_chosen": -0.2704487442970276, |
| "logps/ref_rejected": -0.24479150772094727, |
| "logps/rejected": -0.3357067108154297, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0009082297910936177, |
| "rewards/margins": 9.225404937751591e-07, |
| "rewards/rejected": -0.0009091523243114352, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.4467005076142132, |
| "grad_norm": 0.049540530890226364, |
| "learning_rate": 3.378437060203357e-07, |
| "logits/chosen": -1.407212495803833, |
| "logits/rejected": -1.3152122497558594, |
| "logps/chosen": -0.3691045939922333, |
| "logps/ref_chosen": -0.3024004101753235, |
| "logps/ref_rejected": -0.28876832127571106, |
| "logps/rejected": -0.37846246361732483, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0006670417496934533, |
| "rewards/margins": 0.00022990003344602883, |
| "rewards/rejected": -0.0008969418704509735, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.448837830617152, |
| "grad_norm": 0.04402080923318863, |
| "learning_rate": 3.360905098097587e-07, |
| "logits/chosen": -1.2295489311218262, |
| "logits/rejected": -1.1545143127441406, |
| "logps/chosen": -0.8086704611778259, |
| "logps/ref_chosen": -0.23235999047756195, |
| "logps/ref_rejected": -0.24142806231975555, |
| "logps/rejected": -1.004802942276001, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.005763105116784573, |
| "rewards/margins": 0.001870643813163042, |
| "rewards/rejected": -0.007633748929947615, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.45097515362009083, |
| "grad_norm": 0.031321682035923004, |
| "learning_rate": 3.343324968457075e-07, |
| "logits/chosen": -1.1726460456848145, |
| "logits/rejected": -1.0996947288513184, |
| "logps/chosen": -0.3977208137512207, |
| "logps/ref_chosen": -0.34511813521385193, |
| "logps/ref_rejected": -0.32132789492607117, |
| "logps/rejected": -0.4332047700881958, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0005260267062112689, |
| "rewards/margins": 0.0005927419406361878, |
| "rewards/rejected": -0.0011187687050551176, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.45311247662302967, |
| "grad_norm": 0.023442508652806282, |
| "learning_rate": 3.325697654887918e-07, |
| "logits/chosen": -1.124049186706543, |
| "logits/rejected": -1.0663901567459106, |
| "logps/chosen": -0.42999467253685, |
| "logps/ref_chosen": -0.30062156915664673, |
| "logps/ref_rejected": -0.34384384751319885, |
| "logps/rejected": -0.7528426647186279, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0012937311548739672, |
| "rewards/margins": 0.002796256449073553, |
| "rewards/rejected": -0.004089987836778164, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.45524979962596845, |
| "grad_norm": 0.056702036410570145, |
| "learning_rate": 3.30802414363615e-07, |
| "logits/chosen": -1.0622425079345703, |
| "logits/rejected": -0.8048832416534424, |
| "logps/chosen": -0.6495295763015747, |
| "logps/ref_chosen": -0.2115727663040161, |
| "logps/ref_rejected": -0.18796564638614655, |
| "logps/rejected": -0.816286027431488, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.004379567224532366, |
| "rewards/margins": 0.0019036362646147609, |
| "rewards/rejected": -0.006283204071223736, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.4573871226289073, |
| "grad_norm": 0.010415288619697094, |
| "learning_rate": 3.2903054235325613e-07, |
| "logits/chosen": -1.275780200958252, |
| "logits/rejected": -1.2921967506408691, |
| "logps/chosen": -0.3864087462425232, |
| "logps/ref_chosen": -0.29050976037979126, |
| "logps/ref_rejected": -0.26950719952583313, |
| "logps/rejected": -0.6814454793930054, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0009589899564161897, |
| "rewards/margins": 0.00316039239987731, |
| "rewards/rejected": -0.004119382239878178, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.45952444563184613, |
| "grad_norm": 0.03175237029790878, |
| "learning_rate": 3.272542485937368e-07, |
| "logits/chosen": -1.2462847232818604, |
| "logits/rejected": -1.3483045101165771, |
| "logps/chosen": -0.7711195945739746, |
| "logps/ref_chosen": -0.2847660183906555, |
| "logps/ref_rejected": -0.35884594917297363, |
| "logps/rejected": -1.069549560546875, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.004863535985350609, |
| "rewards/margins": 0.002243500202894211, |
| "rewards/rejected": -0.00710703618824482, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.4616617686347849, |
| "grad_norm": 0.025863736867904663, |
| "learning_rate": 3.2547363246847546e-07, |
| "logits/chosen": -1.1209917068481445, |
| "logits/rejected": -1.1349291801452637, |
| "logps/chosen": -0.46421563625335693, |
| "logps/ref_chosen": -0.3321390450000763, |
| "logps/ref_rejected": -0.3509797751903534, |
| "logps/rejected": -1.0869563817977905, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0013207655865699053, |
| "rewards/margins": 0.006039001513272524, |
| "rewards/rejected": -0.007359765935689211, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.46379909163772376, |
| "grad_norm": 0.07724393904209137, |
| "learning_rate": 3.2368879360272606e-07, |
| "logits/chosen": -1.1863185167312622, |
| "logits/rejected": -1.1357780694961548, |
| "logps/chosen": -0.958145022392273, |
| "logps/ref_chosen": -0.2845013737678528, |
| "logps/ref_rejected": -0.3145434856414795, |
| "logps/rejected": -0.5520614981651306, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.0067364368587732315, |
| "rewards/margins": -0.004361255560070276, |
| "rewards/rejected": -0.0023751803673803806, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.46593641464066254, |
| "grad_norm": 0.024524612352252007, |
| "learning_rate": 3.218998318580043e-07, |
| "logits/chosen": -1.2841641902923584, |
| "logits/rejected": -1.1925084590911865, |
| "logps/chosen": -0.3108392357826233, |
| "logps/ref_chosen": -0.22942832112312317, |
| "logps/ref_rejected": -0.250807523727417, |
| "logps/rejected": -0.47802409529685974, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0008141091093420982, |
| "rewards/margins": 0.0014580574352294207, |
| "rewards/rejected": -0.0022721663117408752, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.4680737376436014, |
| "grad_norm": 0.04812135547399521, |
| "learning_rate": 3.201068473265007e-07, |
| "logits/chosen": -1.0295281410217285, |
| "logits/rejected": -1.0063157081604004, |
| "logps/chosen": -0.3524959087371826, |
| "logps/ref_chosen": -0.28826600313186646, |
| "logps/ref_rejected": -0.2538532614707947, |
| "logps/rejected": -0.3144063949584961, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0006422993028536439, |
| "rewards/margins": -3.676795677165501e-05, |
| "rewards/rejected": -0.0006055312696844339, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.4702110606465402, |
| "grad_norm": 0.03668051213026047, |
| "learning_rate": 3.1830994032548e-07, |
| "logits/chosen": -1.3302749395370483, |
| "logits/rejected": -1.2410955429077148, |
| "logps/chosen": -0.5558737516403198, |
| "logps/ref_chosen": -0.27361539006233215, |
| "logps/ref_rejected": -0.26842135190963745, |
| "logps/rejected": -0.6113893985748291, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0028225830756127834, |
| "rewards/margins": 0.0006070974050089717, |
| "rewards/rejected": -0.003429680597037077, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.472348383649479, |
| "grad_norm": 0.06386542320251465, |
| "learning_rate": 3.1650921139166874e-07, |
| "logits/chosen": -1.061380386352539, |
| "logits/rejected": -1.1164382696151733, |
| "logps/chosen": -0.27319979667663574, |
| "logps/ref_chosen": -0.2526838779449463, |
| "logps/ref_rejected": -0.2524298429489136, |
| "logps/rejected": -0.2999592423439026, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00020515926007647067, |
| "rewards/margins": 0.0002701346529647708, |
| "rewards/rejected": -0.00047529389848932624, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.47448570665241785, |
| "grad_norm": 0.01867862045764923, |
| "learning_rate": 3.147047612756302e-07, |
| "logits/chosen": -1.2960277795791626, |
| "logits/rejected": -1.1136997938156128, |
| "logps/chosen": -0.5146414637565613, |
| "logps/ref_chosen": -0.2859315574169159, |
| "logps/ref_rejected": -0.24996128678321838, |
| "logps/rejected": -0.5635550022125244, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0022870993707329035, |
| "rewards/margins": 0.0008488375460729003, |
| "rewards/rejected": -0.003135936800390482, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.4766230296553567, |
| "grad_norm": 0.05976104736328125, |
| "learning_rate": 3.128966909361271e-07, |
| "logits/chosen": -1.2074111700057983, |
| "logits/rejected": -1.1333309412002563, |
| "logps/chosen": -0.46035608649253845, |
| "logps/ref_chosen": -0.22987782955169678, |
| "logps/ref_rejected": -0.24911126494407654, |
| "logps/rejected": -0.5576910376548767, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0023047823924571276, |
| "rewards/margins": 0.0007810150273144245, |
| "rewards/rejected": -0.0030857976526021957, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.4787603526582955, |
| "grad_norm": 0.0219325739890337, |
| "learning_rate": 3.110851015344735e-07, |
| "logits/chosen": -1.1878727674484253, |
| "logits/rejected": -1.219316840171814, |
| "logps/chosen": -0.3969532549381256, |
| "logps/ref_chosen": -0.2673417627811432, |
| "logps/ref_rejected": -0.24439971148967743, |
| "logps/rejected": -0.667335033416748, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0012961149914190173, |
| "rewards/margins": 0.0029332388658076525, |
| "rewards/rejected": -0.004229353740811348, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.4808976756612343, |
| "grad_norm": 0.014455790631473064, |
| "learning_rate": 3.0927009442887437e-07, |
| "logits/chosen": -1.082927942276001, |
| "logits/rejected": -1.1634637117385864, |
| "logps/chosen": -0.40938621759414673, |
| "logps/ref_chosen": -0.23671139776706696, |
| "logps/ref_rejected": -0.24473784863948822, |
| "logps/rejected": -0.4257688522338867, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0017267480725422502, |
| "rewards/margins": 8.35619866847992e-05, |
| "rewards/rejected": -0.0018103100592270494, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.4830349986641731, |
| "grad_norm": 0.025662483647465706, |
| "learning_rate": 3.074517711687549e-07, |
| "logits/chosen": -1.0715892314910889, |
| "logits/rejected": -1.0443625450134277, |
| "logps/chosen": -0.508273720741272, |
| "logps/ref_chosen": -0.2699495553970337, |
| "logps/ref_rejected": -0.3265015482902527, |
| "logps/rejected": -0.6757447123527527, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0023832411970943213, |
| "rewards/margins": 0.0011091906344518065, |
| "rewards/rejected": -0.0034924319479614496, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.48517232166711194, |
| "grad_norm": 0.018232734873890877, |
| "learning_rate": 3.056302334890786e-07, |
| "logits/chosen": -1.2078466415405273, |
| "logits/rejected": -1.094984531402588, |
| "logps/chosen": -0.33580055832862854, |
| "logps/ref_chosen": -0.22288911044597626, |
| "logps/ref_rejected": -0.2164846807718277, |
| "logps/rejected": -0.5663087964057922, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0011291146511211991, |
| "rewards/margins": 0.0023691265378147364, |
| "rewards/rejected": -0.003498241538181901, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.4873096446700508, |
| "grad_norm": 0.03843236714601517, |
| "learning_rate": 3.038055833046555e-07, |
| "logits/chosen": -1.3582746982574463, |
| "logits/rejected": -1.2434998750686646, |
| "logps/chosen": -0.7266556620597839, |
| "logps/ref_chosen": -0.273609459400177, |
| "logps/ref_rejected": -0.2809239625930786, |
| "logps/rejected": -1.2620277404785156, |
| "loss": 0.692, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004530462436378002, |
| "rewards/margins": 0.0052805752493441105, |
| "rewards/rejected": -0.009811037220060825, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.48944696767298956, |
| "grad_norm": 0.020652027800679207, |
| "learning_rate": 3.0197792270443976e-07, |
| "logits/chosen": -1.2343034744262695, |
| "logits/rejected": -1.1307424306869507, |
| "logps/chosen": -0.6870149374008179, |
| "logps/ref_chosen": -0.25147154927253723, |
| "logps/ref_rejected": -0.22538992762565613, |
| "logps/rejected": -0.3276790678501129, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004355433862656355, |
| "rewards/margins": -0.003332542022690177, |
| "rewards/rejected": -0.0010228916071355343, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.4915842906759284, |
| "grad_norm": 0.024678058922290802, |
| "learning_rate": 3.001473539458182e-07, |
| "logits/chosen": -1.2614054679870605, |
| "logits/rejected": -1.175713300704956, |
| "logps/chosen": -0.572695255279541, |
| "logps/ref_chosen": -0.2590100169181824, |
| "logps/ref_rejected": -0.31907764077186584, |
| "logps/rejected": -0.8820576071739197, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.003136852290481329, |
| "rewards/margins": 0.00249294750392437, |
| "rewards/rejected": -0.005629799794405699, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.49372161367886724, |
| "grad_norm": 0.0628771185874939, |
| "learning_rate": 2.983139794488883e-07, |
| "logits/chosen": -1.3149338960647583, |
| "logits/rejected": -1.2248057126998901, |
| "logps/chosen": -1.142967939376831, |
| "logps/ref_chosen": -0.2914421856403351, |
| "logps/ref_rejected": -0.2705422341823578, |
| "logps/rejected": -0.5634757876396179, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.008515259250998497, |
| "rewards/margins": -0.005585923790931702, |
| "rewards/rejected": -0.002929334994405508, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.49585893668180603, |
| "grad_norm": 0.03054133616387844, |
| "learning_rate": 2.964779017907287e-07, |
| "logits/chosen": -1.1597241163253784, |
| "logits/rejected": -1.203474760055542, |
| "logps/chosen": -0.45757320523262024, |
| "logps/ref_chosen": -0.28872498869895935, |
| "logps/ref_rejected": -0.27350252866744995, |
| "logps/rejected": -0.5431838631629944, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0016884823562577367, |
| "rewards/margins": 0.0010083310771733522, |
| "rewards/rejected": -0.0026968135498464108, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.49799625968474487, |
| "grad_norm": 0.015557843260467052, |
| "learning_rate": 2.9463922369965915e-07, |
| "logits/chosen": -1.041637659072876, |
| "logits/rejected": -1.0381265878677368, |
| "logps/chosen": -0.5345252752304077, |
| "logps/ref_chosen": -0.29573145508766174, |
| "logps/ref_rejected": -0.4063323140144348, |
| "logps/rejected": -0.7988796830177307, |
| "loss": 0.693, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.002387937856838107, |
| "rewards/margins": 0.0015375366201624274, |
| "rewards/rejected": -0.003925474360585213, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.5001335826876837, |
| "grad_norm": 0.017690466716885567, |
| "learning_rate": 2.927980480494938e-07, |
| "logits/chosen": -1.1971187591552734, |
| "logits/rejected": -1.1160141229629517, |
| "logps/chosen": -0.5526835322380066, |
| "logps/ref_chosen": -0.27271074056625366, |
| "logps/ref_rejected": -0.264575332403183, |
| "logps/rejected": -0.5512862205505371, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0027997277211397886, |
| "rewards/margins": 6.738087540725246e-05, |
| "rewards/rejected": -0.0028671089094132185, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.5022709056906225, |
| "grad_norm": 0.028061063960194588, |
| "learning_rate": 2.909544778537844e-07, |
| "logits/chosen": -1.2941550016403198, |
| "logits/rejected": -1.2209076881408691, |
| "logps/chosen": -0.4346168637275696, |
| "logps/ref_chosen": -0.2890174686908722, |
| "logps/ref_rejected": -0.26074814796447754, |
| "logps/rejected": -0.4420887231826782, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0014559938572347164, |
| "rewards/margins": 0.0003574119182303548, |
| "rewards/rejected": -0.001813405891880393, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.5044082286935613, |
| "grad_norm": 0.0402521938085556, |
| "learning_rate": 2.8910861626005773e-07, |
| "logits/chosen": -1.2046964168548584, |
| "logits/rejected": -1.0965062379837036, |
| "logps/chosen": -0.5011724233627319, |
| "logps/ref_chosen": -0.2599114775657654, |
| "logps/ref_rejected": -0.24466641247272491, |
| "logps/rejected": -0.441351056098938, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.002412609290331602, |
| "rewards/margins": -0.00044576276559382677, |
| "rewards/rejected": -0.001966846641153097, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.5065455516965002, |
| "grad_norm": 0.01907428726553917, |
| "learning_rate": 2.872605665440436e-07, |
| "logits/chosen": -1.2732386589050293, |
| "logits/rejected": -1.1748043298721313, |
| "logps/chosen": -0.48162317276000977, |
| "logps/ref_chosen": -0.29580262303352356, |
| "logps/ref_rejected": -0.31111472845077515, |
| "logps/rejected": -0.4600691497325897, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0018582054181024432, |
| "rewards/margins": -0.00036866156733594835, |
| "rewards/rejected": -0.0014895439380779862, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.508682874699439, |
| "grad_norm": 0.03141213208436966, |
| "learning_rate": 2.8541043210389726e-07, |
| "logits/chosen": -1.0759719610214233, |
| "logits/rejected": -1.1523224115371704, |
| "logps/chosen": -0.3454059362411499, |
| "logps/ref_chosen": -0.26050707697868347, |
| "logps/ref_rejected": -0.2645397186279297, |
| "logps/rejected": -0.5539515018463135, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.0008489887695759535, |
| "rewards/margins": 0.002045129192993045, |
| "rewards/rejected": -0.0028941179625689983, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.5108201977023777, |
| "grad_norm": 0.05162933096289635, |
| "learning_rate": 2.8355831645441387e-07, |
| "logits/chosen": -1.3785024881362915, |
| "logits/rejected": -1.419005036354065, |
| "logps/chosen": -0.3846661150455475, |
| "logps/ref_chosen": -0.2951856255531311, |
| "logps/ref_rejected": -0.3329465687274933, |
| "logps/rejected": -0.5529570579528809, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0008948049508035183, |
| "rewards/margins": 0.0013053002767264843, |
| "rewards/rejected": -0.0022001052275300026, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.5129575207053166, |
| "grad_norm": 0.045119889080524445, |
| "learning_rate": 2.817043232212371e-07, |
| "logits/chosen": -1.3011969327926636, |
| "logits/rejected": -1.2362924814224243, |
| "logps/chosen": -0.4428254961967468, |
| "logps/ref_chosen": -0.32135143876075745, |
| "logps/ref_rejected": -0.3076840043067932, |
| "logps/rejected": -0.6046063303947449, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0012147403322160244, |
| "rewards/margins": 0.0017544830916449428, |
| "rewards/rejected": -0.002969223540276289, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5150948437082554, |
| "grad_norm": 0.0616716630756855, |
| "learning_rate": 2.7984855613506106e-07, |
| "logits/chosen": -1.320504903793335, |
| "logits/rejected": -1.2690130472183228, |
| "logps/chosen": -0.48740828037261963, |
| "logps/ref_chosen": -0.23969341814517975, |
| "logps/ref_rejected": -0.24840842187404633, |
| "logps/rejected": -0.4508085250854492, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.002477148547768593, |
| "rewards/margins": -0.00045314725139178336, |
| "rewards/rejected": -0.002024001209065318, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.5172321667111942, |
| "grad_norm": 0.05075347051024437, |
| "learning_rate": 2.7799111902582693e-07, |
| "logits/chosen": -1.3793060779571533, |
| "logits/rejected": -1.2293661832809448, |
| "logps/chosen": -0.38729506731033325, |
| "logps/ref_chosen": -0.2713886499404907, |
| "logps/ref_rejected": -0.20832695066928864, |
| "logps/rejected": -0.28428661823272705, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.0011590637732297182, |
| "rewards/margins": -0.00039946704055182636, |
| "rewards/rejected": -0.0007595966453664005, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.5193694897141331, |
| "grad_norm": 0.07911568880081177, |
| "learning_rate": 2.761321158169134e-07, |
| "logits/chosen": -1.2099862098693848, |
| "logits/rejected": -1.2692921161651611, |
| "logps/chosen": -0.46820712089538574, |
| "logps/ref_chosen": -0.3131045997142792, |
| "logps/ref_rejected": -0.30705371499061584, |
| "logps/rejected": -1.4385590553283691, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0015510257799178362, |
| "rewards/margins": 0.00976402685046196, |
| "rewards/rejected": -0.011315053328871727, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.5215068127170719, |
| "grad_norm": 0.027160031720995903, |
| "learning_rate": 2.74271650519322e-07, |
| "logits/chosen": -1.2768421173095703, |
| "logits/rejected": -1.2590028047561646, |
| "logps/chosen": -0.5070095658302307, |
| "logps/ref_chosen": -0.28147321939468384, |
| "logps/ref_rejected": -0.3335879147052765, |
| "logps/rejected": -0.7072677612304688, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0022553636226803064, |
| "rewards/margins": 0.001481434446759522, |
| "rewards/rejected": -0.0037367981858551502, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.5236441357200107, |
| "grad_norm": 0.026352498680353165, |
| "learning_rate": 2.7240982722585837e-07, |
| "logits/chosen": -1.1532797813415527, |
| "logits/rejected": -1.150524377822876, |
| "logps/chosen": -0.4151303768157959, |
| "logps/ref_chosen": -0.2723257839679718, |
| "logps/ref_rejected": -0.29468289017677307, |
| "logps/rejected": -0.5018883347511292, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0014280457980930805, |
| "rewards/margins": 0.0006440083961933851, |
| "rewards/rejected": -0.002072053961455822, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.5257814587229495, |
| "grad_norm": 0.06314732134342194, |
| "learning_rate": 2.705467501053076e-07, |
| "logits/chosen": -1.4114865064620972, |
| "logits/rejected": -1.463373064994812, |
| "logps/chosen": -0.694495439529419, |
| "logps/ref_chosen": -0.28253138065338135, |
| "logps/ref_rejected": -0.2926999032497406, |
| "logps/rejected": -1.0575720071792603, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004119640216231346, |
| "rewards/margins": 0.0035290804225951433, |
| "rewards/rejected": -0.00764872133731842, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.5279187817258884, |
| "grad_norm": 0.03255361691117287, |
| "learning_rate": 2.6868252339660607e-07, |
| "logits/chosen": -1.0355377197265625, |
| "logits/rejected": -1.0407332181930542, |
| "logps/chosen": -1.710400938987732, |
| "logps/ref_chosen": -0.35004061460494995, |
| "logps/ref_rejected": -0.38053083419799805, |
| "logps/rejected": -2.5348241329193115, |
| "loss": 0.692, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.013603603467345238, |
| "rewards/margins": 0.00793933030217886, |
| "rewards/rejected": -0.021542930975556374, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.5300561047288271, |
| "grad_norm": 0.12891939282417297, |
| "learning_rate": 2.6681725140300995e-07, |
| "logits/chosen": -1.3145711421966553, |
| "logits/rejected": -1.2533609867095947, |
| "logps/chosen": -0.43842440843582153, |
| "logps/ref_chosen": -0.25527048110961914, |
| "logps/ref_rejected": -0.288927286863327, |
| "logps/rejected": -0.5801223516464233, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0018315394409000874, |
| "rewards/margins": 0.0010804110206663609, |
| "rewards/rejected": -0.002911950461566448, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.5321934277317659, |
| "grad_norm": 0.05915815755724907, |
| "learning_rate": 2.6495103848625854e-07, |
| "logits/chosen": -1.4148733615875244, |
| "logits/rejected": -1.3119208812713623, |
| "logps/chosen": -0.6767380833625793, |
| "logps/ref_chosen": -0.24076546728610992, |
| "logps/ref_rejected": -0.2792627811431885, |
| "logps/rejected": -0.8662442564964294, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00435972586274147, |
| "rewards/margins": 0.0015100898453965783, |
| "rewards/rejected": -0.0058698151260614395, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.5343307507347048, |
| "grad_norm": 0.09488384425640106, |
| "learning_rate": 2.63083989060736e-07, |
| "logits/chosen": -1.1380071640014648, |
| "logits/rejected": -1.1282098293304443, |
| "logps/chosen": -0.7926136255264282, |
| "logps/ref_chosen": -0.2788524329662323, |
| "logps/ref_rejected": -0.32340526580810547, |
| "logps/rejected": -1.2131142616271973, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.005137611646205187, |
| "rewards/margins": 0.0037594791501760483, |
| "rewards/rejected": -0.008897090330719948, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5364680737376436, |
| "grad_norm": 0.02635360136628151, |
| "learning_rate": 2.6121620758762875e-07, |
| "logits/chosen": -1.3205058574676514, |
| "logits/rejected": -1.3159669637680054, |
| "logps/chosen": -0.7755053043365479, |
| "logps/ref_chosen": -0.288030207157135, |
| "logps/ref_rejected": -0.32270681858062744, |
| "logps/rejected": -0.7502388954162598, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.004874750506132841, |
| "rewards/margins": -0.0005994292441755533, |
| "rewards/rejected": -0.004275321029126644, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.5386053967405824, |
| "grad_norm": 0.0846584141254425, |
| "learning_rate": 2.593477985690815e-07, |
| "logits/chosen": -1.1941680908203125, |
| "logits/rejected": -1.2224732637405396, |
| "logps/chosen": -1.1049282550811768, |
| "logps/ref_chosen": -0.32901710271835327, |
| "logps/ref_rejected": -0.26958346366882324, |
| "logps/rejected": -1.4523200988769531, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00775911146774888, |
| "rewards/margins": 0.004068254958838224, |
| "rewards/rejected": -0.011827366426587105, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.5407427197435213, |
| "grad_norm": 0.1053781807422638, |
| "learning_rate": 2.574788665423496e-07, |
| "logits/chosen": -1.1324328184127808, |
| "logits/rejected": -1.1272873878479004, |
| "logps/chosen": -0.4599221348762512, |
| "logps/ref_chosen": -0.29418516159057617, |
| "logps/ref_rejected": -0.24825821816921234, |
| "logps/rejected": -0.4834541976451874, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0016573700122535229, |
| "rewards/margins": 0.0006945900386199355, |
| "rewards/rejected": -0.00235196016728878, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.5428800427464601, |
| "grad_norm": 0.03543369472026825, |
| "learning_rate": 2.5560951607395126e-07, |
| "logits/chosen": -1.2491382360458374, |
| "logits/rejected": -1.203080177307129, |
| "logps/chosen": -0.5461034178733826, |
| "logps/ref_chosen": -0.28536537289619446, |
| "logps/ref_rejected": -0.28821516036987305, |
| "logps/rejected": -0.9339559078216553, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.002607380971312523, |
| "rewards/margins": 0.003850026521831751, |
| "rewards/rejected": -0.006457407493144274, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.5450173657493989, |
| "grad_norm": 0.05195508524775505, |
| "learning_rate": 2.537398517538159e-07, |
| "logits/chosen": -1.2177194356918335, |
| "logits/rejected": -1.219588279724121, |
| "logps/chosen": -0.6220858097076416, |
| "logps/ref_chosen": -0.24220672249794006, |
| "logps/ref_rejected": -0.29989081621170044, |
| "logps/rejected": -1.686585783958435, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0037987902760505676, |
| "rewards/margins": 0.010068160481750965, |
| "rewards/rejected": -0.013866949826478958, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.5471546887523377, |
| "grad_norm": 0.06374656409025192, |
| "learning_rate": 2.518699781894332e-07, |
| "logits/chosen": -1.2281252145767212, |
| "logits/rejected": -1.230895757675171, |
| "logps/chosen": -1.0146502256393433, |
| "logps/ref_chosen": -0.25809556245803833, |
| "logps/ref_rejected": -0.2735273838043213, |
| "logps/rejected": -2.9322500228881836, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.007565546315163374, |
| "rewards/margins": 0.01902167685329914, |
| "rewards/rejected": -0.026587221771478653, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.5492920117552765, |
| "grad_norm": 0.07888156175613403, |
| "learning_rate": 2.5e-07, |
| "logits/chosen": -1.1171998977661133, |
| "logits/rejected": -1.0384949445724487, |
| "logps/chosen": -0.49252837896347046, |
| "logps/ref_chosen": -0.2539077401161194, |
| "logps/ref_rejected": -0.25393056869506836, |
| "logps/rejected": -0.6725160479545593, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0023862060625106096, |
| "rewards/margins": 0.0017996486276388168, |
| "rewards/rejected": -0.00418585492298007, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.5514293347582153, |
| "grad_norm": 0.04931643605232239, |
| "learning_rate": 2.4813002181056676e-07, |
| "logits/chosen": -1.158300518989563, |
| "logits/rejected": -1.1446212530136108, |
| "logps/chosen": -0.5056754350662231, |
| "logps/ref_chosen": -0.22038325667381287, |
| "logps/ref_rejected": -0.2806495130062103, |
| "logps/rejected": -1.356742024421692, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.002852922072634101, |
| "rewards/margins": 0.007908002473413944, |
| "rewards/rejected": -0.010760924778878689, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.5535666577611541, |
| "grad_norm": 0.050788938999176025, |
| "learning_rate": 2.4626014824618413e-07, |
| "logits/chosen": -1.3293839693069458, |
| "logits/rejected": -1.3154393434524536, |
| "logps/chosen": -0.6740342378616333, |
| "logps/ref_chosen": -0.3380240201950073, |
| "logps/ref_rejected": -0.3627238869667053, |
| "logps/rejected": -0.8516584038734436, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0033601014874875546, |
| "rewards/margins": 0.001529243658296764, |
| "rewards/rejected": -0.00488934526219964, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.555703980764093, |
| "grad_norm": 0.05147051811218262, |
| "learning_rate": 2.4439048392604877e-07, |
| "logits/chosen": -1.0589016675949097, |
| "logits/rejected": -1.0961642265319824, |
| "logps/chosen": -0.8991824984550476, |
| "logps/ref_chosen": -0.2006845474243164, |
| "logps/ref_rejected": -0.25660961866378784, |
| "logps/rejected": -0.984327495098114, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0069849793799221516, |
| "rewards/margins": 0.00029219940188340843, |
| "rewards/rejected": -0.007277178578078747, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.5578413037670318, |
| "grad_norm": 0.034513503313064575, |
| "learning_rate": 2.4252113345765043e-07, |
| "logits/chosen": -1.0097726583480835, |
| "logits/rejected": -0.9577274322509766, |
| "logps/chosen": -0.5596430897712708, |
| "logps/ref_chosen": -0.24353596568107605, |
| "logps/ref_rejected": -0.23627209663391113, |
| "logps/rejected": -0.6421999931335449, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.003161071101203561, |
| "rewards/margins": 0.0008982080034911633, |
| "rewards/rejected": -0.00405927887186408, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.5599786267699706, |
| "grad_norm": 0.045014623552560806, |
| "learning_rate": 2.406522014309186e-07, |
| "logits/chosen": -1.2633204460144043, |
| "logits/rejected": -1.273048758506775, |
| "logps/chosen": -1.209998607635498, |
| "logps/ref_chosen": -0.29539167881011963, |
| "logps/ref_rejected": -0.29773807525634766, |
| "logps/rejected": -1.8857746124267578, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00914607010781765, |
| "rewards/margins": 0.006734295282512903, |
| "rewards/rejected": -0.015880364924669266, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.5621159497729095, |
| "grad_norm": 0.03856293857097626, |
| "learning_rate": 2.3878379241237134e-07, |
| "logits/chosen": -1.2527753114700317, |
| "logits/rejected": -1.2550976276397705, |
| "logps/chosen": -1.026818871498108, |
| "logps/ref_chosen": -0.33077529072761536, |
| "logps/ref_rejected": -0.30703988671302795, |
| "logps/rejected": -1.0310285091400146, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00696043623611331, |
| "rewards/margins": 0.00027945078909397125, |
| "rewards/rejected": -0.007239886559545994, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.5642532727758482, |
| "grad_norm": 0.060744673013687134, |
| "learning_rate": 2.3691601093926402e-07, |
| "logits/chosen": -1.2060753107070923, |
| "logits/rejected": -1.180355191230774, |
| "logps/chosen": -0.844327986240387, |
| "logps/ref_chosen": -0.30627042055130005, |
| "logps/ref_rejected": -0.3010614812374115, |
| "logps/rejected": -1.2352372407913208, |
| "loss": 0.692, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.005380576476454735, |
| "rewards/margins": 0.00396118126809597, |
| "rewards/rejected": -0.00934175681322813, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.566390595778787, |
| "grad_norm": 0.050474196672439575, |
| "learning_rate": 2.3504896151374144e-07, |
| "logits/chosen": -1.3079190254211426, |
| "logits/rejected": -1.348214030265808, |
| "logps/chosen": -0.8727208375930786, |
| "logps/ref_chosen": -0.27946937084198, |
| "logps/ref_rejected": -0.3112008273601532, |
| "logps/rejected": -1.0833746194839478, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00593251409009099, |
| "rewards/margins": 0.0017892239848151803, |
| "rewards/rejected": -0.0077217379584908485, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.5685279187817259, |
| "grad_norm": 0.03653128445148468, |
| "learning_rate": 2.3318274859699008e-07, |
| "logits/chosen": -1.165181040763855, |
| "logits/rejected": -1.2644857168197632, |
| "logps/chosen": -0.4418669044971466, |
| "logps/ref_chosen": -0.23612000048160553, |
| "logps/ref_rejected": -0.2620118260383606, |
| "logps/rejected": -1.1574325561523438, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0020574689842760563, |
| "rewards/margins": 0.006896737031638622, |
| "rewards/rejected": -0.008954205550253391, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.5706652417846647, |
| "grad_norm": 0.06005558744072914, |
| "learning_rate": 2.3131747660339394e-07, |
| "logits/chosen": -1.3468873500823975, |
| "logits/rejected": -1.3267701864242554, |
| "logps/chosen": -1.9534547328948975, |
| "logps/ref_chosen": -0.2608641982078552, |
| "logps/ref_rejected": -0.2739013135433197, |
| "logps/rejected": -1.2592928409576416, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.016925904899835587, |
| "rewards/margins": -0.00707198865711689, |
| "rewards/rejected": -0.009853916242718697, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.5728025647876035, |
| "grad_norm": 0.06628505885601044, |
| "learning_rate": 2.2945324989469243e-07, |
| "logits/chosen": -1.1346838474273682, |
| "logits/rejected": -1.1011766195297241, |
| "logps/chosen": -0.7128645777702332, |
| "logps/ref_chosen": -0.28281089663505554, |
| "logps/ref_rejected": -0.22672079503536224, |
| "logps/rejected": -1.8263014554977417, |
| "loss": 0.6903, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.004300536587834358, |
| "rewards/margins": 0.01169527042657137, |
| "rewards/rejected": -0.015995807945728302, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.5749398877905424, |
| "grad_norm": 0.05300581455230713, |
| "learning_rate": 2.2759017277414164e-07, |
| "logits/chosen": -1.2957395315170288, |
| "logits/rejected": -1.2437419891357422, |
| "logps/chosen": -0.8310694694519043, |
| "logps/ref_chosen": -0.27617210149765015, |
| "logps/ref_rejected": -0.2774162292480469, |
| "logps/rejected": -0.7449780106544495, |
| "loss": 0.693, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.0055489735677838326, |
| "rewards/margins": -0.0008733557187952101, |
| "rewards/rejected": -0.004675617907196283, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.5770772107934812, |
| "grad_norm": 0.03577202185988426, |
| "learning_rate": 2.2572834948067795e-07, |
| "logits/chosen": -1.0486654043197632, |
| "logits/rejected": -1.0856605768203735, |
| "logps/chosen": -0.38237008452415466, |
| "logps/ref_chosen": -0.26785731315612793, |
| "logps/ref_rejected": -0.24267315864562988, |
| "logps/rejected": -0.4953690767288208, |
| "loss": 0.693, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.0011451281607151031, |
| "rewards/margins": 0.0013818310108035803, |
| "rewards/rejected": -0.0025269591715186834, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.57921453379642, |
| "grad_norm": 0.03075655736029148, |
| "learning_rate": 2.2386788418308665e-07, |
| "logits/chosen": -1.081438660621643, |
| "logits/rejected": -1.1146409511566162, |
| "logps/chosen": -1.3132230043411255, |
| "logps/ref_chosen": -0.25421690940856934, |
| "logps/ref_rejected": -0.3060562014579773, |
| "logps/rejected": -1.767133355140686, |
| "loss": 0.692, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01059005968272686, |
| "rewards/margins": 0.004020710941404104, |
| "rewards/rejected": -0.014610771089792252, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.5813518567993589, |
| "grad_norm": 0.043300192803144455, |
| "learning_rate": 2.2200888097417302e-07, |
| "logits/chosen": -1.1108338832855225, |
| "logits/rejected": -0.9933065176010132, |
| "logps/chosen": -1.430296778678894, |
| "logps/ref_chosen": -0.19198578596115112, |
| "logps/ref_rejected": -0.21381264925003052, |
| "logps/rejected": -1.3431994915008545, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.012383109889924526, |
| "rewards/margins": -0.0010892393765971065, |
| "rewards/rejected": -0.011293869465589523, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.5834891798022976, |
| "grad_norm": 0.024648431688547134, |
| "learning_rate": 2.2015144386493895e-07, |
| "logits/chosen": -1.095705509185791, |
| "logits/rejected": -1.063516616821289, |
| "logps/chosen": -0.9958860278129578, |
| "logps/ref_chosen": -0.22737827897071838, |
| "logps/ref_rejected": -0.24414074420928955, |
| "logps/rejected": -1.147032618522644, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.007685077376663685, |
| "rewards/margins": 0.0013438413152471185, |
| "rewards/rejected": -0.009028918109834194, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.5856265028052364, |
| "grad_norm": 0.04787592217326164, |
| "learning_rate": 2.1829567677876297e-07, |
| "logits/chosen": -1.0837037563323975, |
| "logits/rejected": -1.077255129814148, |
| "logps/chosen": -0.9055925011634827, |
| "logps/ref_chosen": -0.2588505148887634, |
| "logps/ref_rejected": -0.23681886494159698, |
| "logps/rejected": -0.9611456394195557, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00646741921082139, |
| "rewards/margins": 0.0007758483407087624, |
| "rewards/rejected": -0.007243267726153135, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.5877638258081752, |
| "grad_norm": 0.05348595976829529, |
| "learning_rate": 2.164416835455862e-07, |
| "logits/chosen": -0.8692365288734436, |
| "logits/rejected": -0.7735700011253357, |
| "logps/chosen": -1.0927891731262207, |
| "logps/ref_chosen": -0.2734217047691345, |
| "logps/ref_rejected": -0.2641502022743225, |
| "logps/rejected": -1.1788606643676758, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008193674497306347, |
| "rewards/margins": 0.0009534317068755627, |
| "rewards/rejected": -0.009147105738520622, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.5899011488111141, |
| "grad_norm": 0.050851721316576004, |
| "learning_rate": 2.1458956789610277e-07, |
| "logits/chosen": -1.3109796047210693, |
| "logits/rejected": -1.1440188884735107, |
| "logps/chosen": -0.7192699909210205, |
| "logps/ref_chosen": -0.27701765298843384, |
| "logps/ref_rejected": -0.23723503947257996, |
| "logps/rejected": -0.6495180130004883, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.004422523081302643, |
| "rewards/margins": -0.0002996939292643219, |
| "rewards/rejected": -0.0041228290647268295, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.5920384718140529, |
| "grad_norm": 0.04742752015590668, |
| "learning_rate": 2.1273943345595635e-07, |
| "logits/chosen": -1.3137791156768799, |
| "logits/rejected": -1.2819969654083252, |
| "logps/chosen": -0.7608126401901245, |
| "logps/ref_chosen": -0.30363547801971436, |
| "logps/ref_rejected": -0.3083662986755371, |
| "logps/rejected": -1.459545612335205, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.004571771249175072, |
| "rewards/margins": 0.006940022110939026, |
| "rewards/rejected": -0.011511792428791523, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.5941757948169917, |
| "grad_norm": 0.05093459412455559, |
| "learning_rate": 2.1089138373994222e-07, |
| "logits/chosen": -1.177383303642273, |
| "logits/rejected": -1.1635684967041016, |
| "logps/chosen": -1.167349934577942, |
| "logps/ref_chosen": -0.24873454868793488, |
| "logps/ref_rejected": -0.2633602023124695, |
| "logps/rejected": -2.1198313236236572, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0091861542314291, |
| "rewards/margins": 0.009378557093441486, |
| "rewards/rejected": -0.018564710393548012, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.5963131178199306, |
| "grad_norm": 0.12743529677391052, |
| "learning_rate": 2.0904552214621556e-07, |
| "logits/chosen": -1.2624561786651611, |
| "logits/rejected": -1.2441047430038452, |
| "logps/chosen": -1.9865504503250122, |
| "logps/ref_chosen": -0.29720839858055115, |
| "logps/ref_rejected": -0.2523292601108551, |
| "logps/rejected": -0.6627134680747986, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.016893420368433, |
| "rewards/margins": -0.012789578177034855, |
| "rewards/rejected": -0.004103842191398144, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.5984504408228694, |
| "grad_norm": 0.1000342145562172, |
| "learning_rate": 2.072019519505062e-07, |
| "logits/chosen": -1.0921458005905151, |
| "logits/rejected": -1.1053495407104492, |
| "logps/chosen": -0.6739192605018616, |
| "logps/ref_chosen": -0.2603878974914551, |
| "logps/ref_rejected": -0.23393476009368896, |
| "logps/rejected": -0.8061238527297974, |
| "loss": 0.6899, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00413531344383955, |
| "rewards/margins": 0.0015865778550505638, |
| "rewards/rejected": -0.005721891764551401, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6005877638258081, |
| "grad_norm": 0.16978740692138672, |
| "learning_rate": 2.0536077630034085e-07, |
| "logits/chosen": -1.0731383562088013, |
| "logits/rejected": -0.929885983467102, |
| "logps/chosen": -1.2416409254074097, |
| "logps/ref_chosen": -0.2367279827594757, |
| "logps/ref_rejected": -0.2492906153202057, |
| "logps/rejected": -2.07322359085083, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.010049128904938698, |
| "rewards/margins": 0.008190200664103031, |
| "rewards/rejected": -0.018239328637719154, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.602725086828747, |
| "grad_norm": 0.03789851814508438, |
| "learning_rate": 2.0352209820927135e-07, |
| "logits/chosen": -1.0739492177963257, |
| "logits/rejected": -0.9944383502006531, |
| "logps/chosen": -2.454773426055908, |
| "logps/ref_chosen": -0.23591750860214233, |
| "logps/ref_rejected": -0.2536413073539734, |
| "logps/rejected": -2.3302102088928223, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.02218855544924736, |
| "rewards/margins": -0.0014228656655177474, |
| "rewards/rejected": -0.02076569013297558, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.6048624098316858, |
| "grad_norm": 0.05390346795320511, |
| "learning_rate": 2.0168602055111173e-07, |
| "logits/chosen": -1.1945843696594238, |
| "logits/rejected": -1.1863861083984375, |
| "logps/chosen": -2.066704273223877, |
| "logps/ref_chosen": -0.2851058542728424, |
| "logps/ref_rejected": -0.2766534090042114, |
| "logps/rejected": -1.6455509662628174, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.017815984785556793, |
| "rewards/margins": -0.004127010237425566, |
| "rewards/rejected": -0.01368897408246994, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.6069997328346246, |
| "grad_norm": 0.058497846126556396, |
| "learning_rate": 1.998526460541818e-07, |
| "logits/chosen": -1.0876891613006592, |
| "logits/rejected": -1.058795690536499, |
| "logps/chosen": -1.2350825071334839, |
| "logps/ref_chosen": -0.3077192008495331, |
| "logps/ref_rejected": -0.2690165042877197, |
| "logps/rejected": -1.2350317239761353, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.009273632429540157, |
| "rewards/margins": 0.0003865193575620651, |
| "rewards/rejected": -0.009660152718424797, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.6091370558375635, |
| "grad_norm": 0.08619721233844757, |
| "learning_rate": 1.980220772955602e-07, |
| "logits/chosen": -1.1054788827896118, |
| "logits/rejected": -1.1437482833862305, |
| "logps/chosen": -1.9299521446228027, |
| "logps/ref_chosen": -0.225503072142601, |
| "logps/ref_rejected": -0.2299107313156128, |
| "logps/rejected": -1.9959328174591064, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.017044490203261375, |
| "rewards/margins": 0.0006157298339530826, |
| "rewards/rejected": -0.017660221084952354, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.6112743788405023, |
| "grad_norm": 0.07628190517425537, |
| "learning_rate": 1.961944166953445e-07, |
| "logits/chosen": -0.9158226251602173, |
| "logits/rejected": -1.0018936395645142, |
| "logps/chosen": -0.8377424478530884, |
| "logps/ref_chosen": -0.26793795824050903, |
| "logps/ref_rejected": -0.3206233084201813, |
| "logps/rejected": -0.9062866568565369, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0056980447843670845, |
| "rewards/margins": 0.00015858927508816123, |
| "rewards/rejected": -0.005856634117662907, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.6134117018434411, |
| "grad_norm": 0.05133550614118576, |
| "learning_rate": 1.9436976651092142e-07, |
| "logits/chosen": -1.0490429401397705, |
| "logits/rejected": -0.9950347542762756, |
| "logps/chosen": -1.2916162014007568, |
| "logps/ref_chosen": -0.26049911975860596, |
| "logps/ref_rejected": -0.2344517707824707, |
| "logps/rejected": -2.2383060455322266, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.010311171412467957, |
| "rewards/margins": 0.009727371856570244, |
| "rewards/rejected": -0.02003854513168335, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.6155490248463799, |
| "grad_norm": 0.022556882351636887, |
| "learning_rate": 1.9254822883124517e-07, |
| "logits/chosen": -1.3344517946243286, |
| "logits/rejected": -1.256748914718628, |
| "logps/chosen": -0.8081469535827637, |
| "logps/ref_chosen": -0.28607749938964844, |
| "logps/ref_rejected": -0.32406532764434814, |
| "logps/rejected": -1.0424308776855469, |
| "loss": 0.692, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005220694467425346, |
| "rewards/margins": 0.001962962094694376, |
| "rewards/rejected": -0.007183656562119722, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.6176863478493188, |
| "grad_norm": 0.0609414279460907, |
| "learning_rate": 1.9072990557112564e-07, |
| "logits/chosen": -1.2648276090621948, |
| "logits/rejected": -1.192805290222168, |
| "logps/chosen": -0.8692429661750793, |
| "logps/ref_chosen": -0.23148827254772186, |
| "logps/ref_rejected": -0.2564725875854492, |
| "logps/rejected": -1.4215768575668335, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.006377546116709709, |
| "rewards/margins": 0.0052734967321157455, |
| "rewards/rejected": -0.011651042848825455, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.6198236708522575, |
| "grad_norm": 0.08044213056564331, |
| "learning_rate": 1.8891489846552644e-07, |
| "logits/chosen": -1.1549913883209229, |
| "logits/rejected": -1.1648964881896973, |
| "logps/chosen": -1.088339924812317, |
| "logps/ref_chosen": -0.26470139622688293, |
| "logps/ref_rejected": -0.24771511554718018, |
| "logps/rejected": -2.2677342891693115, |
| "loss": 0.6905, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.008236384950578213, |
| "rewards/margins": 0.011963806115090847, |
| "rewards/rejected": -0.02020019106566906, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6219609938551963, |
| "grad_norm": 0.05088520050048828, |
| "learning_rate": 1.8710330906387286e-07, |
| "logits/chosen": -1.1082024574279785, |
| "logits/rejected": -1.0954867601394653, |
| "logps/chosen": -0.7045848369598389, |
| "logps/ref_chosen": -0.23643063008785248, |
| "logps/ref_rejected": -0.2455935776233673, |
| "logps/rejected": -0.877321183681488, |
| "loss": 0.6907, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.004681542050093412, |
| "rewards/margins": 0.0016357341082766652, |
| "rewards/rejected": -0.00631727697327733, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.6240983168581352, |
| "grad_norm": 0.07719255238771439, |
| "learning_rate": 1.8529523872436977e-07, |
| "logits/chosen": -1.1770347356796265, |
| "logits/rejected": -1.1703041791915894, |
| "logps/chosen": -0.7847885489463806, |
| "logps/ref_chosen": -0.23179294168949127, |
| "logps/ref_rejected": -0.2735842764377594, |
| "logps/rejected": -1.1045089960098267, |
| "loss": 0.6934, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0055299559608101845, |
| "rewards/margins": 0.002779292408376932, |
| "rewards/rejected": -0.00830924790352583, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.626235639861074, |
| "grad_norm": 0.0776946172118187, |
| "learning_rate": 1.8349078860833124e-07, |
| "logits/chosen": -1.3419536352157593, |
| "logits/rejected": -1.298468828201294, |
| "logps/chosen": -3.258559226989746, |
| "logps/ref_chosen": -0.240401029586792, |
| "logps/ref_rejected": -0.23549923300743103, |
| "logps/rejected": -0.9581273794174194, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.030181586742401123, |
| "rewards/margins": -0.022955304011702538, |
| "rewards/rejected": -0.007226281333714724, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.6283729628640128, |
| "grad_norm": 0.022600095719099045, |
| "learning_rate": 1.8169005967452e-07, |
| "logits/chosen": -1.3734328746795654, |
| "logits/rejected": -1.3898669481277466, |
| "logps/chosen": -1.0494236946105957, |
| "logps/ref_chosen": -0.2548738420009613, |
| "logps/ref_rejected": -0.28851205110549927, |
| "logps/rejected": -1.4271754026412964, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.007945496588945389, |
| "rewards/margins": 0.0034411377273499966, |
| "rewards/rejected": -0.011386634781956673, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.6305102858669517, |
| "grad_norm": 0.07802026718854904, |
| "learning_rate": 1.7989315267349933e-07, |
| "logits/chosen": -1.1028356552124023, |
| "logits/rejected": -1.0630947351455688, |
| "logps/chosen": -1.294027328491211, |
| "logps/ref_chosen": -0.3465898633003235, |
| "logps/ref_rejected": -0.337124764919281, |
| "logps/rejected": -1.6178960800170898, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.00947437435388565, |
| "rewards/margins": 0.0033333394676446915, |
| "rewards/rejected": -0.012807712890207767, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.6326476088698905, |
| "grad_norm": 0.07147033512592316, |
| "learning_rate": 1.781001681419957e-07, |
| "logits/chosen": -1.105650782585144, |
| "logits/rejected": -1.0777990818023682, |
| "logps/chosen": -1.5325028896331787, |
| "logps/ref_chosen": -0.2874016761779785, |
| "logps/ref_rejected": -0.33440160751342773, |
| "logps/rejected": -0.9088407754898071, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.01245101261883974, |
| "rewards/margins": -0.006706621497869492, |
| "rewards/rejected": -0.005744390655308962, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.6347849318728293, |
| "grad_norm": 0.06049632281064987, |
| "learning_rate": 1.763112063972739e-07, |
| "logits/chosen": -1.1721357107162476, |
| "logits/rejected": -1.0921467542648315, |
| "logps/chosen": -1.3942652940750122, |
| "logps/ref_chosen": -0.2320319265127182, |
| "logps/ref_rejected": -0.2138584554195404, |
| "logps/rejected": -2.2817580699920654, |
| "loss": 0.6905, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.011622332036495209, |
| "rewards/margins": 0.009056665934622288, |
| "rewards/rejected": -0.02067899890244007, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.6369222548757681, |
| "grad_norm": 0.05264393240213394, |
| "learning_rate": 1.745263675315245e-07, |
| "logits/chosen": -1.0542514324188232, |
| "logits/rejected": -1.0550352334976196, |
| "logps/chosen": -2.239818572998047, |
| "logps/ref_chosen": -0.2352036088705063, |
| "logps/ref_rejected": -0.28426027297973633, |
| "logps/rejected": -2.9605629444122314, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.02004615031182766, |
| "rewards/margins": 0.006716875359416008, |
| "rewards/rejected": -0.026763027533888817, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.6390595778787069, |
| "grad_norm": 0.04607994109392166, |
| "learning_rate": 1.7274575140626315e-07, |
| "logits/chosen": -1.1956627368927002, |
| "logits/rejected": -1.099608302116394, |
| "logps/chosen": -2.058058977127075, |
| "logps/ref_chosen": -0.2983799874782562, |
| "logps/ref_rejected": -0.25143033266067505, |
| "logps/rejected": -2.0495645999908447, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.017596790567040443, |
| "rewards/margins": 0.00038455199683085084, |
| "rewards/rejected": -0.01798134110867977, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.6411969008816457, |
| "grad_norm": 0.02341402880847454, |
| "learning_rate": 1.7096945764674398e-07, |
| "logits/chosen": -1.0295686721801758, |
| "logits/rejected": -1.0062813758850098, |
| "logps/chosen": -0.7505143284797668, |
| "logps/ref_chosen": -0.30882391333580017, |
| "logps/ref_rejected": -0.2723415791988373, |
| "logps/rejected": -0.856078565120697, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0044169039465487, |
| "rewards/margins": 0.0014204656472429633, |
| "rewards/rejected": -0.0058373697102069855, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.6433342238845846, |
| "grad_norm": 0.030726268887519836, |
| "learning_rate": 1.6919758563638502e-07, |
| "logits/chosen": -1.0025672912597656, |
| "logits/rejected": -0.9413330554962158, |
| "logps/chosen": -0.9848615527153015, |
| "logps/ref_chosen": -0.3111041784286499, |
| "logps/ref_rejected": -0.28697702288627625, |
| "logps/rejected": -1.4358422756195068, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00673757353797555, |
| "rewards/margins": 0.00475107878446579, |
| "rewards/rejected": -0.011488650925457478, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.6454715468875234, |
| "grad_norm": 0.058248646557331085, |
| "learning_rate": 1.674302345112083e-07, |
| "logits/chosen": -1.1426154375076294, |
| "logits/rejected": -1.2598832845687866, |
| "logps/chosen": -1.4695303440093994, |
| "logps/ref_chosen": -0.29342371225357056, |
| "logps/ref_rejected": -0.3107927739620209, |
| "logps/rejected": -2.125171422958374, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.011761065572500229, |
| "rewards/margins": 0.0063827200792729855, |
| "rewards/rejected": -0.0181437861174345, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.6476088698904622, |
| "grad_norm": 0.056121714413166046, |
| "learning_rate": 1.656675031542925e-07, |
| "logits/chosen": -1.2518346309661865, |
| "logits/rejected": -1.241498351097107, |
| "logps/chosen": -0.903639018535614, |
| "logps/ref_chosen": -0.29514724016189575, |
| "logps/ref_rejected": -0.285127729177475, |
| "logps/rejected": -1.311747431755066, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0060849180445075035, |
| "rewards/margins": 0.004181279335170984, |
| "rewards/rejected": -0.0102661969140172, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.649746192893401, |
| "grad_norm": 0.1056617796421051, |
| "learning_rate": 1.6390949019024118e-07, |
| "logits/chosen": -1.2891631126403809, |
| "logits/rejected": -1.1275533437728882, |
| "logps/chosen": -0.7646353244781494, |
| "logps/ref_chosen": -0.26553764939308167, |
| "logps/ref_rejected": -0.2557009160518646, |
| "logps/rejected": -0.7555789351463318, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.004990976303815842, |
| "rewards/margins": 7.80413392931223e-06, |
| "rewards/rejected": -0.004998780321329832, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.6518835158963399, |
| "grad_norm": 0.020142387598752975, |
| "learning_rate": 1.621562939796643e-07, |
| "logits/chosen": -1.1348148584365845, |
| "logits/rejected": -1.139262080192566, |
| "logps/chosen": -1.3249098062515259, |
| "logps/ref_chosen": -0.277653306722641, |
| "logps/ref_rejected": -0.27849534153938293, |
| "logps/rejected": -2.8426170349121094, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.010472564958035946, |
| "rewards/margins": 0.015168650075793266, |
| "rewards/rejected": -0.025641214102506638, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.6540208388992786, |
| "grad_norm": 0.04920821636915207, |
| "learning_rate": 1.6040801261367493e-07, |
| "logits/chosen": -1.1745394468307495, |
| "logits/rejected": -1.2524051666259766, |
| "logps/chosen": -1.680423617362976, |
| "logps/ref_chosen": -0.3201037049293518, |
| "logps/ref_rejected": -0.334641695022583, |
| "logps/rejected": -1.6333792209625244, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.01360319834202528, |
| "rewards/margins": -0.0006158240721561015, |
| "rewards/rejected": -0.01298737432807684, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.6561581619022174, |
| "grad_norm": 0.03810185194015503, |
| "learning_rate": 1.5866474390840124e-07, |
| "logits/chosen": -1.1884056329727173, |
| "logits/rejected": -1.1885788440704346, |
| "logps/chosen": -1.4196938276290894, |
| "logps/ref_chosen": -0.2782423496246338, |
| "logps/ref_rejected": -0.28151434659957886, |
| "logps/rejected": -2.2847869396209717, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.011414514854550362, |
| "rewards/margins": 0.008618209511041641, |
| "rewards/rejected": -0.020032724365592003, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.6582954849051563, |
| "grad_norm": 0.028033897280693054, |
| "learning_rate": 1.569265853995137e-07, |
| "logits/chosen": -1.0184959173202515, |
| "logits/rejected": -1.1102197170257568, |
| "logps/chosen": -1.6438257694244385, |
| "logps/ref_chosen": -0.28045278787612915, |
| "logps/ref_rejected": -0.26543742418289185, |
| "logps/rejected": -2.1834535598754883, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0136337298899889, |
| "rewards/margins": 0.00554642966017127, |
| "rewards/rejected": -0.019180160015821457, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.6604328079080951, |
| "grad_norm": 0.04297371581196785, |
| "learning_rate": 1.5519363433676791e-07, |
| "logits/chosen": -1.3368545770645142, |
| "logits/rejected": -1.344334363937378, |
| "logps/chosen": -0.9991239309310913, |
| "logps/ref_chosen": -0.3204846978187561, |
| "logps/ref_rejected": -0.3272019028663635, |
| "logps/rejected": -1.5001248121261597, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.006786392070353031, |
| "rewards/margins": 0.004942836705595255, |
| "rewards/rejected": -0.011729230172932148, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.6625701309110339, |
| "grad_norm": 0.04412882402539253, |
| "learning_rate": 1.5346598767856345e-07, |
| "logits/chosen": -0.9765325784683228, |
| "logits/rejected": -0.9864211082458496, |
| "logps/chosen": -0.8774848580360413, |
| "logps/ref_chosen": -0.2893618047237396, |
| "logps/ref_rejected": -0.3154972493648529, |
| "logps/rejected": -0.9414972066879272, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005881229881197214, |
| "rewards/margins": 0.0003787687746807933, |
| "rewards/rejected": -0.006259998772293329, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.6647074539139728, |
| "grad_norm": 0.043842706829309464, |
| "learning_rate": 1.517437420865191e-07, |
| "logits/chosen": -1.5350700616836548, |
| "logits/rejected": -1.339430570602417, |
| "logps/chosen": -1.766981601715088, |
| "logps/ref_chosen": -0.2888259291648865, |
| "logps/ref_rejected": -0.32595381140708923, |
| "logps/rejected": -2.1401333808898926, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.014781556092202663, |
| "rewards/margins": 0.0033602421171963215, |
| "rewards/rejected": -0.018141796812415123, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.6668447769169116, |
| "grad_norm": 0.06490735709667206, |
| "learning_rate": 1.500269939200648e-07, |
| "logits/chosen": -1.2561033964157104, |
| "logits/rejected": -1.2318388223648071, |
| "logps/chosen": -0.8624341487884521, |
| "logps/ref_chosen": -0.29091936349868774, |
| "logps/ref_rejected": -0.27606919407844543, |
| "logps/rejected": -0.7279765605926514, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005715147126466036, |
| "rewards/margins": -0.0011960736010223627, |
| "rewards/rejected": -0.0045190732926130295, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.6689820999198504, |
| "grad_norm": 0.06785932928323746, |
| "learning_rate": 1.4831583923104998e-07, |
| "logits/chosen": -1.3141409158706665, |
| "logits/rejected": -1.2916662693023682, |
| "logps/chosen": -0.9934280514717102, |
| "logps/ref_chosen": -0.2723192870616913, |
| "logps/ref_rejected": -0.2956874668598175, |
| "logps/rejected": -1.255844235420227, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007211086805909872, |
| "rewards/margins": 0.0023904808331280947, |
| "rewards/rejected": -0.009601568803191185, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.6711194229227893, |
| "grad_norm": 0.025030309334397316, |
| "learning_rate": 1.4661037375836987e-07, |
| "logits/chosen": -1.217995285987854, |
| "logits/rejected": -1.2591590881347656, |
| "logps/chosen": -1.482741355895996, |
| "logps/ref_chosen": -0.27119889855384827, |
| "logps/ref_rejected": -0.24232390522956848, |
| "logps/rejected": -1.8728251457214355, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.012115423567593098, |
| "rewards/margins": 0.004189589526504278, |
| "rewards/rejected": -0.01630501262843609, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.673256745925728, |
| "grad_norm": 0.026920944452285767, |
| "learning_rate": 1.4491069292260866e-07, |
| "logits/chosen": -1.0900887250900269, |
| "logits/rejected": -1.0372798442840576, |
| "logps/chosen": -2.040264844894409, |
| "logps/ref_chosen": -0.2809339165687561, |
| "logps/ref_rejected": -0.2953468859195709, |
| "logps/rejected": -2.281397581100464, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01759330742061138, |
| "rewards/margins": 0.002267198171466589, |
| "rewards/rejected": -0.019860506057739258, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.6753940689286668, |
| "grad_norm": 0.06658953428268433, |
| "learning_rate": 1.432168918207009e-07, |
| "logits/chosen": -1.065732479095459, |
| "logits/rejected": -1.1438014507293701, |
| "logps/chosen": -0.8067935109138489, |
| "logps/ref_chosen": -0.25914156436920166, |
| "logps/ref_rejected": -0.27169129252433777, |
| "logps/rejected": -1.7555887699127197, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.005476518999785185, |
| "rewards/margins": 0.00936245545744896, |
| "rewards/rejected": -0.014838975854218006, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.6775313919316056, |
| "grad_norm": 0.06910010427236557, |
| "learning_rate": 1.4152906522061047e-07, |
| "logits/chosen": -1.16615629196167, |
| "logits/rejected": -1.103142261505127, |
| "logps/chosen": -0.7290077805519104, |
| "logps/ref_chosen": -0.2660996615886688, |
| "logps/ref_rejected": -0.2873613238334656, |
| "logps/rejected": -1.655418872833252, |
| "loss": 0.6908, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0046290806494653225, |
| "rewards/margins": 0.009051494300365448, |
| "rewards/rejected": -0.013680573552846909, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.6796687149345445, |
| "grad_norm": 0.06387712806463242, |
| "learning_rate": 1.3984730755602903e-07, |
| "logits/chosen": -1.2260221242904663, |
| "logits/rejected": -1.1309224367141724, |
| "logps/chosen": -1.700675368309021, |
| "logps/ref_chosen": -0.2656566798686981, |
| "logps/ref_rejected": -0.2824363708496094, |
| "logps/rejected": -1.660581350326538, |
| "loss": 0.6907, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.014350186102092266, |
| "rewards/margins": -0.0005687351222150028, |
| "rewards/rejected": -0.013781450688838959, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.6818060379374833, |
| "grad_norm": 0.06577786803245544, |
| "learning_rate": 1.381717129210918e-07, |
| "logits/chosen": -1.2081557512283325, |
| "logits/rejected": -1.2390806674957275, |
| "logps/chosen": -0.955277681350708, |
| "logps/ref_chosen": -0.22819383442401886, |
| "logps/ref_rejected": -0.30524516105651855, |
| "logps/rejected": -2.3514580726623535, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.007270838133990765, |
| "rewards/margins": 0.013191292993724346, |
| "rewards/rejected": -0.02046213112771511, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.6839433609404221, |
| "grad_norm": 0.08719781786203384, |
| "learning_rate": 1.365023750651133e-07, |
| "logits/chosen": -1.2260836362838745, |
| "logits/rejected": -1.1617822647094727, |
| "logps/chosen": -0.7476742267608643, |
| "logps/ref_chosen": -0.2821665406227112, |
| "logps/ref_rejected": -0.3088555335998535, |
| "logps/rejected": -1.247572422027588, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004655077122151852, |
| "rewards/margins": 0.004732090979814529, |
| "rewards/rejected": -0.009387168101966381, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.686080683943361, |
| "grad_norm": 0.046496327966451645, |
| "learning_rate": 1.3483938738734195e-07, |
| "logits/chosen": -0.9640857577323914, |
| "logits/rejected": -0.9291998147964478, |
| "logps/chosen": -0.7725713849067688, |
| "logps/ref_chosen": -0.25847122073173523, |
| "logps/ref_rejected": -0.2574281096458435, |
| "logps/rejected": -0.8903670907020569, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.005141001660376787, |
| "rewards/margins": 0.0011883880943059921, |
| "rewards/rejected": -0.006329389289021492, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.6882180069462998, |
| "grad_norm": 0.03986057639122009, |
| "learning_rate": 1.3318284293173449e-07, |
| "logits/chosen": -1.0633331537246704, |
| "logits/rejected": -1.0252758264541626, |
| "logps/chosen": -1.5648293495178223, |
| "logps/ref_chosen": -0.26324695348739624, |
| "logps/ref_rejected": -0.2627493143081665, |
| "logps/rejected": -1.6104729175567627, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.013015823438763618, |
| "rewards/margins": 0.00046141244820319116, |
| "rewards/rejected": -0.013477234169840813, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.6903553299492385, |
| "grad_norm": 0.024575239047408104, |
| "learning_rate": 1.3153283438175034e-07, |
| "logits/chosen": -1.1238559484481812, |
| "logits/rejected": -1.1573046445846558, |
| "logps/chosen": -0.8084390759468079, |
| "logps/ref_chosen": -0.2868350148200989, |
| "logps/ref_rejected": -0.3005547523498535, |
| "logps/rejected": -1.135811448097229, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005216039717197418, |
| "rewards/margins": 0.003136527258902788, |
| "rewards/rejected": -0.008352567441761494, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.6924926529521774, |
| "grad_norm": 0.1354009509086609, |
| "learning_rate": 1.2988945405516565e-07, |
| "logits/chosen": -1.1329212188720703, |
| "logits/rejected": -1.1663479804992676, |
| "logps/chosen": -2.0037600994110107, |
| "logps/ref_chosen": -0.26409387588500977, |
| "logps/ref_rejected": -0.28665629029273987, |
| "logps/rejected": -2.618793487548828, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01739666238427162, |
| "rewards/margins": 0.005924709141254425, |
| "rewards/rejected": -0.023321371525526047, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.6946299759551162, |
| "grad_norm": 0.04387975111603737, |
| "learning_rate": 1.2825279389890818e-07, |
| "logits/chosen": -1.095775842666626, |
| "logits/rejected": -1.1812615394592285, |
| "logps/chosen": -1.0570056438446045, |
| "logps/ref_chosen": -0.29625794291496277, |
| "logps/ref_rejected": -0.29755669832229614, |
| "logps/rejected": -1.7567641735076904, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.007607476785778999, |
| "rewards/margins": 0.006984597072005272, |
| "rewards/rejected": -0.014592074789106846, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.696767298958055, |
| "grad_norm": 0.09196353703737259, |
| "learning_rate": 1.2662294548391328e-07, |
| "logits/chosen": -1.2532473802566528, |
| "logits/rejected": -1.0569244623184204, |
| "logps/chosen": -4.137706756591797, |
| "logps/ref_chosen": -0.3144274353981018, |
| "logps/ref_rejected": -0.22866201400756836, |
| "logps/rejected": -4.485183238983154, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.03823279216885567, |
| "rewards/margins": 0.00433242367580533, |
| "rewards/rejected": -0.04256521910429001, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.6989046219609939, |
| "grad_norm": 0.04932067543268204, |
| "learning_rate": 1.2500000000000005e-07, |
| "logits/chosen": -1.030468463897705, |
| "logits/rejected": -1.16643488407135, |
| "logps/chosen": -1.0676077604293823, |
| "logps/ref_chosen": -0.24446645379066467, |
| "logps/ref_rejected": -0.26513275504112244, |
| "logps/rejected": -1.9378782510757446, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.00823141261935234, |
| "rewards/margins": 0.008496041409671307, |
| "rewards/rejected": -0.01672745682299137, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.7010419449639327, |
| "grad_norm": 0.10604022443294525, |
| "learning_rate": 1.2338404825076935e-07, |
| "logits/chosen": -1.1962742805480957, |
| "logits/rejected": -1.1038748025894165, |
| "logps/chosen": -1.7773253917694092, |
| "logps/ref_chosen": -0.30338191986083984, |
| "logps/ref_rejected": -0.2950097322463989, |
| "logps/rejected": -1.0201197862625122, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.014739434234797955, |
| "rewards/margins": -0.007488333620131016, |
| "rewards/rejected": -0.007251100614666939, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.7031792679668715, |
| "grad_norm": 0.06279636919498444, |
| "learning_rate": 1.2177518064852345e-07, |
| "logits/chosen": -1.2721734046936035, |
| "logits/rejected": -1.1641846895217896, |
| "logps/chosen": -0.7383607029914856, |
| "logps/ref_chosen": -0.28133320808410645, |
| "logps/ref_rejected": -0.2676638960838318, |
| "logps/rejected": -1.1966482400894165, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.004570275079458952, |
| "rewards/margins": 0.004719568882137537, |
| "rewards/rejected": -0.009289843961596489, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.7053165909698104, |
| "grad_norm": 0.0709504708647728, |
| "learning_rate": 1.201734872092077e-07, |
| "logits/chosen": -1.1052814722061157, |
| "logits/rejected": -1.120945930480957, |
| "logps/chosen": -1.4752764701843262, |
| "logps/ref_chosen": -0.22806905210018158, |
| "logps/ref_rejected": -0.2613867521286011, |
| "logps/rejected": -4.264892578125, |
| "loss": 0.6894, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.012472073547542095, |
| "rewards/margins": 0.027562979608774185, |
| "rewards/rejected": -0.040035054087638855, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.7074539139727491, |
| "grad_norm": 0.03591476380825043, |
| "learning_rate": 1.185790575473738e-07, |
| "logits/chosen": -1.1976616382598877, |
| "logits/rejected": -1.174218773841858, |
| "logps/chosen": -1.5642393827438354, |
| "logps/ref_chosen": -0.2686062455177307, |
| "logps/ref_rejected": -0.26216256618499756, |
| "logps/rejected": -1.651925802230835, |
| "loss": 0.6903, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.012956329621374607, |
| "rewards/margins": 0.0009413018124178052, |
| "rewards/rejected": -0.013897632248699665, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.7095912369756879, |
| "grad_norm": 0.033163536339998245, |
| "learning_rate": 1.1699198087116588e-07, |
| "logits/chosen": -1.1001530885696411, |
| "logits/rejected": -1.1933869123458862, |
| "logps/chosen": -1.0232754945755005, |
| "logps/ref_chosen": -0.25716808438301086, |
| "logps/ref_rejected": -0.27127647399902344, |
| "logps/rejected": -1.6010782718658447, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.00766107439994812, |
| "rewards/margins": 0.0056369430385529995, |
| "rewards/rejected": -0.013298017904162407, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.7117285599786267, |
| "grad_norm": 0.06878095120191574, |
| "learning_rate": 1.1541234597732947e-07, |
| "logits/chosen": -1.1496696472167969, |
| "logits/rejected": -1.1665306091308594, |
| "logps/chosen": -0.7209312319755554, |
| "logps/ref_chosen": -0.2830858826637268, |
| "logps/ref_rejected": -0.293523907661438, |
| "logps/rejected": -0.9190731644630432, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.004378453362733126, |
| "rewards/margins": 0.0018770386232063174, |
| "rewards/rejected": -0.006255492102354765, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.7138658829815656, |
| "grad_norm": 0.09431196004152298, |
| "learning_rate": 1.1384024124624322e-07, |
| "logits/chosen": -1.052622675895691, |
| "logits/rejected": -1.022928237915039, |
| "logps/chosen": -1.2427582740783691, |
| "logps/ref_chosen": -0.2722591161727905, |
| "logps/ref_rejected": -0.2822473645210266, |
| "logps/rejected": -1.2416496276855469, |
| "loss": 0.6905, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.009704992175102234, |
| "rewards/margins": -0.00011097072274424136, |
| "rewards/rejected": -0.009594020433723927, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.7160032059845044, |
| "grad_norm": 0.03322301805019379, |
| "learning_rate": 1.1227575463697439e-07, |
| "logits/chosen": -1.122735619544983, |
| "logits/rejected": -1.222185730934143, |
| "logps/chosen": -0.8012080788612366, |
| "logps/ref_chosen": -0.2764795124530792, |
| "logps/ref_rejected": -0.336195707321167, |
| "logps/rejected": -0.7999901175498962, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00524728512391448, |
| "rewards/margins": -0.0006093411357142031, |
| "rewards/rejected": -0.004637944046407938, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.7181405289874432, |
| "grad_norm": 0.061493318527936935, |
| "learning_rate": 1.1071897368235694e-07, |
| "logits/chosen": -1.1705626249313354, |
| "logits/rejected": -1.2664892673492432, |
| "logps/chosen": -1.5754473209381104, |
| "logps/ref_chosen": -0.2959895133972168, |
| "logps/ref_rejected": -0.2997013330459595, |
| "logps/rejected": -1.932682752609253, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.012794578447937965, |
| "rewards/margins": 0.0035352339036762714, |
| "rewards/rejected": -0.01632981188595295, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.7202778519903821, |
| "grad_norm": 0.05572124943137169, |
| "learning_rate": 1.0916998548409447e-07, |
| "logits/chosen": -1.130581021308899, |
| "logits/rejected": -1.066174030303955, |
| "logps/chosen": -1.3565336465835571, |
| "logps/ref_chosen": -0.24616912007331848, |
| "logps/ref_rejected": -0.2598510682582855, |
| "logps/rejected": -1.0788873434066772, |
| "loss": 0.692, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.011103645898401737, |
| "rewards/margins": -0.0029132834170013666, |
| "rewards/rejected": -0.008190362714231014, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.7224151749933209, |
| "grad_norm": 0.10384145379066467, |
| "learning_rate": 1.0762887670788701e-07, |
| "logits/chosen": -0.9459188580513, |
| "logits/rejected": -0.8504930734634399, |
| "logps/chosen": -1.0191553831100464, |
| "logps/ref_chosen": -0.24093510210514069, |
| "logps/ref_rejected": -0.26729854941368103, |
| "logps/rejected": -1.1294794082641602, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.007782202214002609, |
| "rewards/margins": 0.0008396058692596853, |
| "rewards/rejected": -0.00862180907279253, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.7245524979962596, |
| "grad_norm": 0.018316587433218956, |
| "learning_rate": 1.0609573357858165e-07, |
| "logits/chosen": -1.2061526775360107, |
| "logits/rejected": -1.2601025104522705, |
| "logps/chosen": -1.094489574432373, |
| "logps/ref_chosen": -0.29173967242240906, |
| "logps/ref_rejected": -0.3133309781551361, |
| "logps/rejected": -1.616560459136963, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.008027498610317707, |
| "rewards/margins": 0.005004794802516699, |
| "rewards/rejected": -0.013032292947173119, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.7266898209991985, |
| "grad_norm": 0.06489983201026917, |
| "learning_rate": 1.0457064187534861e-07, |
| "logits/chosen": -1.1510592699050903, |
| "logits/rejected": -1.102351188659668, |
| "logps/chosen": -0.9001895189285278, |
| "logps/ref_chosen": -0.2713991701602936, |
| "logps/ref_rejected": -0.27723032236099243, |
| "logps/rejected": -1.2492543458938599, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.006287903990596533, |
| "rewards/margins": 0.0034323374275118113, |
| "rewards/rejected": -0.009720239788293839, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.7288271440021373, |
| "grad_norm": 0.06424716114997864, |
| "learning_rate": 1.0305368692688174e-07, |
| "logits/chosen": -1.13260018825531, |
| "logits/rejected": -1.0694531202316284, |
| "logps/chosen": -1.862520456314087, |
| "logps/ref_chosen": -0.2595444917678833, |
| "logps/ref_rejected": -0.2731647193431854, |
| "logps/rejected": -2.2827601432800293, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.016029758378863335, |
| "rewards/margins": 0.0040661939419806, |
| "rewards/rejected": -0.020095951855182648, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.7309644670050761, |
| "grad_norm": 0.07520674169063568, |
| "learning_rate": 1.0154495360662463e-07, |
| "logits/chosen": -0.8258600831031799, |
| "logits/rejected": -0.8433269262313843, |
| "logps/chosen": -1.1305100917816162, |
| "logps/ref_chosen": -0.2683022916316986, |
| "logps/ref_rejected": -0.2848109006881714, |
| "logps/rejected": -1.243198275566101, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.008622078225016594, |
| "rewards/margins": 0.0009617957985028625, |
| "rewards/rejected": -0.009583874605596066, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.733101790008015, |
| "grad_norm": 0.04308605566620827, |
| "learning_rate": 1.0004452632802158e-07, |
| "logits/chosen": -1.1636924743652344, |
| "logits/rejected": -1.1219979524612427, |
| "logps/chosen": -1.6641209125518799, |
| "logps/ref_chosen": -0.2753485441207886, |
| "logps/ref_rejected": -0.2803853452205658, |
| "logps/rejected": -1.8049211502075195, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.013887722045183182, |
| "rewards/margins": 0.0013576359488070011, |
| "rewards/rejected": -0.015245359390974045, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.7352391130109538, |
| "grad_norm": 0.023724785074591637, |
| "learning_rate": 9.855248903979505e-08, |
| "logits/chosen": -1.145557165145874, |
| "logits/rejected": -1.1103419065475464, |
| "logps/chosen": -1.6158597469329834, |
| "logps/ref_chosen": -0.24767890572547913, |
| "logps/ref_rejected": -0.25479885935783386, |
| "logps/rejected": -1.287099838256836, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.013681807555258274, |
| "rewards/margins": -0.00335879810154438, |
| "rewards/rejected": -0.010323010385036469, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.7373764360138926, |
| "grad_norm": 0.06565766781568527, |
| "learning_rate": 9.706892522124838e-08, |
| "logits/chosen": -1.1292834281921387, |
| "logits/rejected": -1.0837907791137695, |
| "logps/chosen": -2.6713132858276367, |
| "logps/ref_chosen": -0.22354263067245483, |
| "logps/ref_rejected": -0.24082818627357483, |
| "logps/rejected": -2.4919419288635254, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.024477705359458923, |
| "rewards/margins": -0.001966568175703287, |
| "rewards/rejected": -0.022511135786771774, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.7395137590168315, |
| "grad_norm": 0.06766300648450851, |
| "learning_rate": 9.559391787759554e-08, |
| "logits/chosen": -1.354848027229309, |
| "logits/rejected": -1.2523478269577026, |
| "logps/chosen": -1.3433290719985962, |
| "logps/ref_chosen": -0.32388246059417725, |
| "logps/ref_rejected": -0.35365214943885803, |
| "logps/rejected": -0.9489238262176514, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.010194464586675167, |
| "rewards/margins": -0.004241748712956905, |
| "rewards/rejected": -0.005952716339379549, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.7416510820197703, |
| "grad_norm": 0.08040929585695267, |
| "learning_rate": 9.412754953531663e-08, |
| "logits/chosen": -1.0879831314086914, |
| "logits/rejected": -1.0791324377059937, |
| "logps/chosen": -3.171231508255005, |
| "logps/ref_chosen": -0.26492586731910706, |
| "logps/ref_rejected": -0.3163069784641266, |
| "logps/rejected": -2.425985336303711, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.02906305529177189, |
| "rewards/margins": -0.007966272532939911, |
| "rewards/rejected": -0.021096784621477127, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.743788405022709, |
| "grad_norm": 0.06491011381149292, |
| "learning_rate": 9.266990223754067e-08, |
| "logits/chosen": -1.0943776369094849, |
| "logits/rejected": -1.1970751285552979, |
| "logps/chosen": -0.8782896995544434, |
| "logps/ref_chosen": -0.23037269711494446, |
| "logps/ref_rejected": -0.2580893039703369, |
| "logps/rejected": -3.4618566036224365, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.006479169707745314, |
| "rewards/margins": 0.025558505207300186, |
| "rewards/rejected": -0.03203767165541649, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.7459257280256478, |
| "grad_norm": 0.05739942565560341, |
| "learning_rate": 9.12210575394553e-08, |
| "logits/chosen": -1.175123691558838, |
| "logits/rejected": -1.2053630352020264, |
| "logps/chosen": -0.9631116986274719, |
| "logps/ref_chosen": -0.2983925938606262, |
| "logps/ref_rejected": -0.29447752237319946, |
| "logps/rejected": -0.9072011113166809, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.006647191010415554, |
| "rewards/margins": -0.0005199552397243679, |
| "rewards/rejected": -0.006127236410975456, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.7480630510285867, |
| "grad_norm": 0.049737460911273956, |
| "learning_rate": 8.978109650374396e-08, |
| "logits/chosen": -1.071784496307373, |
| "logits/rejected": -1.0658966302871704, |
| "logps/chosen": -1.1942861080169678, |
| "logps/ref_chosen": -0.28040096163749695, |
| "logps/ref_rejected": -0.31127622723579407, |
| "logps/rejected": -1.2045406103134155, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.009138851426541805, |
| "rewards/margins": -0.00020620728901121765, |
| "rewards/rejected": -0.008932644501328468, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.7502003740315255, |
| "grad_norm": 0.06262131035327911, |
| "learning_rate": 8.835009969605011e-08, |
| "logits/chosen": -1.2125799655914307, |
| "logits/rejected": -1.1305088996887207, |
| "logps/chosen": -1.0078157186508179, |
| "logps/ref_chosen": -0.25532886385917664, |
| "logps/ref_rejected": -0.24316643178462982, |
| "logps/rejected": -0.8425446152687073, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007524868939071894, |
| "rewards/margins": -0.001531086745671928, |
| "rewards/rejected": -0.005993782076984644, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.7523376970344643, |
| "grad_norm": 0.037578314542770386, |
| "learning_rate": 8.692814718046978e-08, |
| "logits/chosen": -1.1821025609970093, |
| "logits/rejected": -1.1392035484313965, |
| "logps/chosen": -1.8397068977355957, |
| "logps/ref_chosen": -0.2743624746799469, |
| "logps/ref_rejected": -0.26306965947151184, |
| "logps/rejected": -1.4568653106689453, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.015653444454073906, |
| "rewards/margins": -0.003715487662702799, |
| "rewards/rejected": -0.01193795632570982, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.7544750200374032, |
| "grad_norm": 0.0541614294052124, |
| "learning_rate": 8.551531851507185e-08, |
| "logits/chosen": -1.208461046218872, |
| "logits/rejected": -1.054347038269043, |
| "logps/chosen": -1.0258980989456177, |
| "logps/ref_chosen": -0.28386688232421875, |
| "logps/ref_rejected": -0.2676239013671875, |
| "logps/rejected": -1.1631557941436768, |
| "loss": 0.692, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007420312613248825, |
| "rewards/margins": 0.0015350051689893007, |
| "rewards/rejected": -0.008955318480730057, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.756612343040342, |
| "grad_norm": 0.04087401553988457, |
| "learning_rate": 8.411169274744723e-08, |
| "logits/chosen": -1.1159809827804565, |
| "logits/rejected": -1.117483377456665, |
| "logps/chosen": -0.8914464712142944, |
| "logps/ref_chosen": -0.2520354986190796, |
| "logps/ref_rejected": -0.285053014755249, |
| "logps/rejected": -1.238872766494751, |
| "loss": 0.691, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0063941096886992455, |
| "rewards/margins": 0.003144086804240942, |
| "rewards/rejected": -0.0095381960272789, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.7587496660432808, |
| "grad_norm": 0.1142241433262825, |
| "learning_rate": 8.271734841028552e-08, |
| "logits/chosen": -1.1041523218154907, |
| "logits/rejected": -1.099101185798645, |
| "logps/chosen": -1.2273969650268555, |
| "logps/ref_chosen": -0.31465134024620056, |
| "logps/ref_rejected": -0.29631203413009644, |
| "logps/rejected": -1.307835578918457, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00912745762616396, |
| "rewards/margins": 0.0009877784177660942, |
| "rewards/rejected": -0.010115234181284904, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.7608869890462197, |
| "grad_norm": 0.13401734828948975, |
| "learning_rate": 8.133236351698142e-08, |
| "logits/chosen": -1.279919981956482, |
| "logits/rejected": -1.1777186393737793, |
| "logps/chosen": -1.9279370307922363, |
| "logps/ref_chosen": -0.3523467779159546, |
| "logps/ref_rejected": -0.3496631979942322, |
| "logps/rejected": -4.22702693939209, |
| "loss": 0.691, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.015755901113152504, |
| "rewards/margins": 0.02301773615181446, |
| "rewards/rejected": -0.038773637264966965, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.7630243120491584, |
| "grad_norm": 0.15186871588230133, |
| "learning_rate": 7.99568155572701e-08, |
| "logits/chosen": -1.309054970741272, |
| "logits/rejected": -1.2452538013458252, |
| "logps/chosen": -1.4320964813232422, |
| "logps/ref_chosen": -0.28003811836242676, |
| "logps/ref_rejected": -0.30409619212150574, |
| "logps/rejected": -1.847014307975769, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.011520582251250744, |
| "rewards/margins": 0.003908597398549318, |
| "rewards/rejected": -0.015429181978106499, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.7651616350520972, |
| "grad_norm": 0.057084277272224426, |
| "learning_rate": 7.859078149289144e-08, |
| "logits/chosen": -1.1047471761703491, |
| "logits/rejected": -1.1351032257080078, |
| "logps/chosen": -0.9979822635650635, |
| "logps/ref_chosen": -0.28867530822753906, |
| "logps/ref_rejected": -0.26092076301574707, |
| "logps/rejected": -1.5368304252624512, |
| "loss": 0.6907, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.007093069609254599, |
| "rewards/margins": 0.005666028708219528, |
| "rewards/rejected": -0.012759096920490265, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.7672989580550361, |
| "grad_norm": 0.041765403002500534, |
| "learning_rate": 7.723433775328384e-08, |
| "logits/chosen": -1.1192529201507568, |
| "logits/rejected": -1.265089750289917, |
| "logps/chosen": -2.022310495376587, |
| "logps/ref_chosen": -0.29014062881469727, |
| "logps/ref_rejected": -0.28735655546188354, |
| "logps/rejected": -2.6289854049682617, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01732170209288597, |
| "rewards/margins": 0.006094588432461023, |
| "rewards/rejected": -0.023416288197040558, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.7694362810579749, |
| "grad_norm": 0.05468735098838806, |
| "learning_rate": 7.588756023130833e-08, |
| "logits/chosen": -0.8661350011825562, |
| "logits/rejected": -0.9621841907501221, |
| "logps/chosen": -2.116637945175171, |
| "logps/ref_chosen": -0.2920258045196533, |
| "logps/ref_rejected": -0.31703561544418335, |
| "logps/rejected": -2.2606375217437744, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0182461217045784, |
| "rewards/margins": 0.0011898954398930073, |
| "rewards/rejected": -0.01943601667881012, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.7715736040609137, |
| "grad_norm": 0.05585193634033203, |
| "learning_rate": 7.455052427900213e-08, |
| "logits/chosen": -1.2692015171051025, |
| "logits/rejected": -1.1027172803878784, |
| "logps/chosen": -1.100353479385376, |
| "logps/ref_chosen": -0.23069022595882416, |
| "logps/ref_rejected": -0.22342757880687714, |
| "logps/rejected": -1.0934088230133057, |
| "loss": 0.6947, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008696632459759712, |
| "rewards/margins": 3.179942723363638e-06, |
| "rewards/rejected": -0.00869981199502945, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.7737109270638525, |
| "grad_norm": 0.06848090887069702, |
| "learning_rate": 7.322330470336313e-08, |
| "logits/chosen": -1.1645256280899048, |
| "logits/rejected": -1.0935707092285156, |
| "logps/chosen": -1.6071454286575317, |
| "logps/ref_chosen": -0.27192527055740356, |
| "logps/ref_rejected": -0.3024570941925049, |
| "logps/rejected": -1.2258251905441284, |
| "loss": 0.6892, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.013352199457585812, |
| "rewards/margins": -0.004118519835174084, |
| "rewards/rejected": -0.009233680553734303, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.7758482500667914, |
| "grad_norm": 0.06690943241119385, |
| "learning_rate": 7.190597576216384e-08, |
| "logits/chosen": -0.8887664675712585, |
| "logits/rejected": -0.9066710472106934, |
| "logps/chosen": -1.2840781211853027, |
| "logps/ref_chosen": -0.28294187784194946, |
| "logps/ref_rejected": -0.3023577928543091, |
| "logps/rejected": -1.4506629705429077, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.010011361911892891, |
| "rewards/margins": 0.0014716887380927801, |
| "rewards/rejected": -0.01148305181413889, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.7779855730697302, |
| "grad_norm": 0.15532270073890686, |
| "learning_rate": 7.059861115979701e-08, |
| "logits/chosen": -1.0949058532714844, |
| "logits/rejected": -1.1424363851547241, |
| "logps/chosen": -1.4416131973266602, |
| "logps/ref_chosen": -0.30306118726730347, |
| "logps/ref_rejected": -0.288621187210083, |
| "logps/rejected": -1.4862004518508911, |
| "loss": 0.6945, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.011385519057512283, |
| "rewards/margins": 0.0005902723059989512, |
| "rewards/rejected": -0.011975791305303574, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.7801228960726689, |
| "grad_norm": 0.03942997753620148, |
| "learning_rate": 6.930128404315214e-08, |
| "logits/chosen": -1.0720016956329346, |
| "logits/rejected": -1.059690237045288, |
| "logps/chosen": -2.215381145477295, |
| "logps/ref_chosen": -0.25054827332496643, |
| "logps/ref_rejected": -0.2531178891658783, |
| "logps/rejected": -1.862559199333191, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.01964832842350006, |
| "rewards/margins": -0.0035539153032004833, |
| "rewards/rejected": -0.01609441265463829, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.7822602190756078, |
| "grad_norm": 0.03460577502846718, |
| "learning_rate": 6.801406699752229e-08, |
| "logits/chosen": -1.2337627410888672, |
| "logits/rejected": -1.1528120040893555, |
| "logps/chosen": -1.390278697013855, |
| "logps/ref_chosen": -0.2971178889274597, |
| "logps/ref_rejected": -0.26539239287376404, |
| "logps/rejected": -1.5021487474441528, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.010931607335805893, |
| "rewards/margins": 0.0014359557535499334, |
| "rewards/rejected": -0.012367562390863895, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.7843975420785466, |
| "grad_norm": 0.05893367901444435, |
| "learning_rate": 6.673703204254347e-08, |
| "logits/chosen": -1.3160229921340942, |
| "logits/rejected": -1.3496131896972656, |
| "logps/chosen": -2.0979464054107666, |
| "logps/ref_chosen": -0.2959456145763397, |
| "logps/ref_rejected": -0.3258477449417114, |
| "logps/rejected": -4.554948806762695, |
| "loss": 0.6894, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.018020007759332657, |
| "rewards/margins": 0.024271003901958466, |
| "rewards/rejected": -0.04229101166129112, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.7865348650814854, |
| "grad_norm": 0.07477625459432602, |
| "learning_rate": 6.547025062816486e-08, |
| "logits/chosen": -1.02717125415802, |
| "logits/rejected": -1.0643151998519897, |
| "logps/chosen": -0.6758044362068176, |
| "logps/ref_chosen": -0.27515894174575806, |
| "logps/ref_rejected": -0.27331942319869995, |
| "logps/rejected": -0.9668493866920471, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.004006455186754465, |
| "rewards/margins": 0.002928843256086111, |
| "rewards/rejected": -0.006935297977179289, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.7886721880844243, |
| "grad_norm": 0.037224266678094864, |
| "learning_rate": 6.42137936306514e-08, |
| "logits/chosen": -1.153114914894104, |
| "logits/rejected": -1.0626243352890015, |
| "logps/chosen": -0.6446943283081055, |
| "logps/ref_chosen": -0.31326937675476074, |
| "logps/ref_rejected": -0.2920030951499939, |
| "logps/rejected": -0.5195773243904114, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0033142496831715107, |
| "rewards/margins": -0.0010385076748207211, |
| "rewards/rejected": -0.0022757421247661114, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.7908095110873631, |
| "grad_norm": 0.10783287137746811, |
| "learning_rate": 6.296773134861824e-08, |
| "logits/chosen": -1.1031492948532104, |
| "logits/rejected": -1.1142442226409912, |
| "logps/chosen": -3.2173519134521484, |
| "logps/ref_chosen": -0.27304139733314514, |
| "logps/ref_rejected": -0.2698368430137634, |
| "logps/rejected": -2.982172966003418, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.029443107545375824, |
| "rewards/margins": -0.0023197471164166927, |
| "rewards/rejected": -0.027123358100652695, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.7929468340903019, |
| "grad_norm": 0.05294763669371605, |
| "learning_rate": 6.173213349909728e-08, |
| "logits/chosen": -1.2241467237472534, |
| "logits/rejected": -1.1346536874771118, |
| "logps/chosen": -1.5309860706329346, |
| "logps/ref_chosen": -0.25244808197021484, |
| "logps/ref_rejected": -0.29839521646499634, |
| "logps/rejected": -2.4278104305267334, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.012785379774868488, |
| "rewards/margins": 0.0085087725892663, |
| "rewards/rejected": -0.021294154226779938, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.7950841570932408, |
| "grad_norm": 0.1021474227309227, |
| "learning_rate": 6.050706921363672e-08, |
| "logits/chosen": -1.2749450206756592, |
| "logits/rejected": -1.3102686405181885, |
| "logps/chosen": -1.40775728225708, |
| "logps/ref_chosen": -0.25743889808654785, |
| "logps/ref_rejected": -0.2727058231830597, |
| "logps/rejected": -1.435315728187561, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.011503184214234352, |
| "rewards/margins": 0.00012291534221731126, |
| "rewards/rejected": -0.011626098304986954, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.7972214800961795, |
| "grad_norm": 0.07975135743618011, |
| "learning_rate": 5.929260703443337e-08, |
| "logits/chosen": -0.8594330549240112, |
| "logits/rejected": -0.9553730487823486, |
| "logps/chosen": -1.1430832147598267, |
| "logps/ref_chosen": -0.25558850169181824, |
| "logps/ref_rejected": -0.31050026416778564, |
| "logps/rejected": -1.1174638271331787, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.00887494720518589, |
| "rewards/margins": -0.0008053114870563149, |
| "rewards/rejected": -0.00806963536888361, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.7993588030991183, |
| "grad_norm": 0.06049225479364395, |
| "learning_rate": 5.808881491049722e-08, |
| "logits/chosen": -1.3267157077789307, |
| "logits/rejected": -1.3589601516723633, |
| "logps/chosen": -2.688066244125366, |
| "logps/ref_chosen": -0.25192493200302124, |
| "logps/ref_rejected": -0.25697553157806396, |
| "logps/rejected": -2.1965785026550293, |
| "loss": 0.693, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.024361414834856987, |
| "rewards/margins": -0.004965384490787983, |
| "rewards/rejected": -0.01939602941274643, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.8014961261020572, |
| "grad_norm": 0.08409368991851807, |
| "learning_rate": 5.6895760193850145e-08, |
| "logits/chosen": -1.0944725275039673, |
| "logits/rejected": -1.1215978860855103, |
| "logps/chosen": -1.0347650051116943, |
| "logps/ref_chosen": -0.26866835355758667, |
| "logps/ref_rejected": -0.2864525020122528, |
| "logps/rejected": -2.067497968673706, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.007660965900868177, |
| "rewards/margins": 0.010149486362934113, |
| "rewards/rejected": -0.017810452729463577, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.803633449104996, |
| "grad_norm": 0.10201013833284378, |
| "learning_rate": 5.571350963575727e-08, |
| "logits/chosen": -1.234429955482483, |
| "logits/rejected": -1.0782431364059448, |
| "logps/chosen": -1.6262080669403076, |
| "logps/ref_chosen": -0.2716027796268463, |
| "logps/ref_rejected": -0.2345011681318283, |
| "logps/rejected": -1.703019618988037, |
| "loss": 0.691, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.013546052388846874, |
| "rewards/margins": 0.0011391331208869815, |
| "rewards/rejected": -0.014685184694826603, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.8057707721079348, |
| "grad_norm": 0.06033862754702568, |
| "learning_rate": 5.454212938299255e-08, |
| "logits/chosen": -1.2432072162628174, |
| "logits/rejected": -1.1445574760437012, |
| "logps/chosen": -2.042750358581543, |
| "logps/ref_chosen": -0.30305564403533936, |
| "logps/ref_rejected": -0.2785211503505707, |
| "logps/rejected": -1.946964979171753, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.017396945506334305, |
| "rewards/margins": -0.0007125064730644226, |
| "rewards/rejected": -0.016684439033269882, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.8079080951108736, |
| "grad_norm": 0.0754721388220787, |
| "learning_rate": 5.338168497413756e-08, |
| "logits/chosen": -1.1568113565444946, |
| "logits/rejected": -1.3110694885253906, |
| "logps/chosen": -2.0485715866088867, |
| "logps/ref_chosen": -0.27131277322769165, |
| "logps/ref_rejected": -0.2730526030063629, |
| "logps/rejected": -2.489506959915161, |
| "loss": 0.691, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.017772583290934563, |
| "rewards/margins": 0.004391958471387625, |
| "rewards/rejected": -0.022164542227983475, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.8100454181138125, |
| "grad_norm": 0.03889808803796768, |
| "learning_rate": 5.223224133591475e-08, |
| "logits/chosen": -1.3069510459899902, |
| "logits/rejected": -1.196826696395874, |
| "logps/chosen": -3.7151710987091064, |
| "logps/ref_chosen": -0.2946428656578064, |
| "logps/ref_rejected": -0.30563172698020935, |
| "logps/rejected": -4.933197498321533, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.03420528024435043, |
| "rewards/margins": 0.012070373632013798, |
| "rewards/rejected": -0.046275656670331955, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.8121827411167513, |
| "grad_norm": 0.09681233763694763, |
| "learning_rate": 5.109386277955477e-08, |
| "logits/chosen": -1.2361708879470825, |
| "logits/rejected": -1.1945033073425293, |
| "logps/chosen": -1.1589354276657104, |
| "logps/ref_chosen": -0.32733604311943054, |
| "logps/ref_rejected": -0.34583956003189087, |
| "logps/rejected": -1.4540259838104248, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.008315994404256344, |
| "rewards/margins": 0.0027658697217702866, |
| "rewards/rejected": -0.011081863194704056, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.81432006411969, |
| "grad_norm": 0.023183537647128105, |
| "learning_rate": 4.996661299719845e-08, |
| "logits/chosen": -0.9406832456588745, |
| "logits/rejected": -0.959719717502594, |
| "logps/chosen": -1.1925268173217773, |
| "logps/ref_chosen": -0.20886394381523132, |
| "logps/ref_rejected": -0.26284876465797424, |
| "logps/rejected": -2.0193445682525635, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.009836629033088684, |
| "rewards/margins": 0.007728328462690115, |
| "rewards/rejected": -0.017564957961440086, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.8164573871226289, |
| "grad_norm": 0.05210980772972107, |
| "learning_rate": 4.885055505833291e-08, |
| "logits/chosen": -1.3668040037155151, |
| "logits/rejected": -1.3035857677459717, |
| "logps/chosen": -1.0539454221725464, |
| "logps/ref_chosen": -0.2937431037425995, |
| "logps/ref_rejected": -0.3718718886375427, |
| "logps/rejected": -1.296951413154602, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007602022960782051, |
| "rewards/margins": 0.0016487715765833855, |
| "rewards/rejected": -0.009250793606042862, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.8185947101255677, |
| "grad_norm": 0.03742099925875664, |
| "learning_rate": 4.774575140626316e-08, |
| "logits/chosen": -1.1532747745513916, |
| "logits/rejected": -1.0690302848815918, |
| "logps/chosen": -1.262237548828125, |
| "logps/ref_chosen": -0.22047261893749237, |
| "logps/ref_rejected": -0.25424256920814514, |
| "logps/rejected": -1.2673687934875488, |
| "loss": 0.691, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.010417649522423744, |
| "rewards/margins": -0.00028638693038374186, |
| "rewards/rejected": -0.010131262242794037, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.8207320331285065, |
| "grad_norm": 0.07904389500617981, |
| "learning_rate": 4.6652263854618016e-08, |
| "logits/chosen": -1.2751234769821167, |
| "logits/rejected": -1.2919316291809082, |
| "logps/chosen": -1.106188178062439, |
| "logps/ref_chosen": -0.2794521152973175, |
| "logps/ref_rejected": -0.28776630759239197, |
| "logps/rejected": -3.1602072715759277, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.008267360739409924, |
| "rewards/margins": 0.02045704796910286, |
| "rewards/rejected": -0.028724411502480507, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.8228693561314454, |
| "grad_norm": 0.05350683256983757, |
| "learning_rate": 4.557015358389216e-08, |
| "logits/chosen": -1.0806591510772705, |
| "logits/rejected": -1.144321084022522, |
| "logps/chosen": -1.296242594718933, |
| "logps/ref_chosen": -0.2946457862854004, |
| "logps/ref_rejected": -0.25857770442962646, |
| "logps/rejected": -1.792632818222046, |
| "loss": 0.6891, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.010015969164669514, |
| "rewards/margins": 0.005324581637978554, |
| "rewards/rejected": -0.015340550802648067, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.8250066791343842, |
| "grad_norm": 0.07477623224258423, |
| "learning_rate": 4.449948113802254e-08, |
| "logits/chosen": -1.3005449771881104, |
| "logits/rejected": -1.2780457735061646, |
| "logps/chosen": -0.6975512504577637, |
| "logps/ref_chosen": -0.27956047654151917, |
| "logps/ref_rejected": -0.2929243743419647, |
| "logps/rejected": -0.7112749814987183, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.004179907497018576, |
| "rewards/margins": 3.59815385309048e-06, |
| "rewards/rejected": -0.004183505661785603, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.827144002137323, |
| "grad_norm": 0.04655684158205986, |
| "learning_rate": 4.3440306421001324e-08, |
| "logits/chosen": -1.1461201906204224, |
| "logits/rejected": -1.1273192167282104, |
| "logps/chosen": -0.8491421341896057, |
| "logps/ref_chosen": -0.2832236588001251, |
| "logps/ref_rejected": -0.27946937084198, |
| "logps/rejected": -0.9979395866394043, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.005659184418618679, |
| "rewards/margins": 0.0015255182515829802, |
| "rewards/rejected": -0.007184701971709728, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.8292813251402619, |
| "grad_norm": 0.040074706077575684, |
| "learning_rate": 4.2392688693524055e-08, |
| "logits/chosen": -1.201115369796753, |
| "logits/rejected": -1.1675482988357544, |
| "logps/chosen": -1.2663828134536743, |
| "logps/ref_chosen": -0.2769949734210968, |
| "logps/ref_rejected": -0.27079758048057556, |
| "logps/rejected": -2.1578354835510254, |
| "loss": 0.6907, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.009893876500427723, |
| "rewards/margins": 0.00897650234401226, |
| "rewards/rejected": -0.018870381638407707, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.8314186481432007, |
| "grad_norm": 0.14047721028327942, |
| "learning_rate": 4.1356686569674335e-08, |
| "logits/chosen": -1.3131245374679565, |
| "logits/rejected": -1.2667745351791382, |
| "logps/chosen": -2.449765920639038, |
| "logps/ref_chosen": -0.2735025882720947, |
| "logps/ref_rejected": -0.3153075575828552, |
| "logps/rejected": -2.6731748580932617, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.021762633696198463, |
| "rewards/margins": 0.001816042698919773, |
| "rewards/rejected": -0.023578675463795662, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.8335559711461394, |
| "grad_norm": 0.05379972606897354, |
| "learning_rate": 4.0332358013644015e-08, |
| "logits/chosen": -1.2174177169799805, |
| "logits/rejected": -1.21800696849823, |
| "logps/chosen": -2.465816020965576, |
| "logps/ref_chosen": -0.27440598607063293, |
| "logps/ref_rejected": -0.2723068594932556, |
| "logps/rejected": -4.399135112762451, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.021914100274443626, |
| "rewards/margins": 0.01935417950153351, |
| "rewards/rejected": -0.041268277913331985, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.8356932941490782, |
| "grad_norm": 0.04803801700472832, |
| "learning_rate": 3.9319760336490205e-08, |
| "logits/chosen": -0.8474469780921936, |
| "logits/rejected": -0.7499436736106873, |
| "logps/chosen": -0.888762354850769, |
| "logps/ref_chosen": -0.26048749685287476, |
| "logps/ref_rejected": -0.27132195234298706, |
| "logps/rejected": -1.396944522857666, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.0062827481888234615, |
| "rewards/margins": 0.004973476752638817, |
| "rewards/rejected": -0.011256225407123566, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.8378306171520171, |
| "grad_norm": 0.03792424499988556, |
| "learning_rate": 3.831895019292897e-08, |
| "logits/chosen": -1.2523425817489624, |
| "logits/rejected": -1.2453007698059082, |
| "logps/chosen": -2.422001838684082, |
| "logps/ref_chosen": -0.34102123975753784, |
| "logps/ref_rejected": -0.38750913739204407, |
| "logps/rejected": -2.885390281677246, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.020809805020689964, |
| "rewards/margins": 0.00416900496929884, |
| "rewards/rejected": -0.02497881092131138, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.8399679401549559, |
| "grad_norm": 0.05553947389125824, |
| "learning_rate": 3.732998357816514e-08, |
| "logits/chosen": -1.1434022188186646, |
| "logits/rejected": -1.1102546453475952, |
| "logps/chosen": -1.0866552591323853, |
| "logps/ref_chosen": -0.31954628229141235, |
| "logps/ref_rejected": -0.2894784212112427, |
| "logps/rejected": -0.8887047171592712, |
| "loss": 0.694, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007671091239899397, |
| "rewards/margins": -0.0016788288485258818, |
| "rewards/rejected": -0.0059922621585428715, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 0.05157195031642914, |
| "learning_rate": 3.635291582475963e-08, |
| "logits/chosen": -1.0490577220916748, |
| "logits/rejected": -0.9551658630371094, |
| "logps/chosen": -1.1616473197937012, |
| "logps/ref_chosen": -0.3110552430152893, |
| "logps/ref_rejected": -0.29874786734580994, |
| "logps/rejected": -1.1342157125473022, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008505920879542828, |
| "rewards/margins": -0.00015124335186555982, |
| "rewards/rejected": -0.008354677818715572, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.8442425861608336, |
| "grad_norm": 0.052859533578157425, |
| "learning_rate": 3.538780159953347e-08, |
| "logits/chosen": -1.092551589012146, |
| "logits/rejected": -0.9267686009407043, |
| "logps/chosen": -0.6999182105064392, |
| "logps/ref_chosen": -0.31383803486824036, |
| "logps/ref_rejected": -0.26451122760772705, |
| "logps/rejected": -0.5865459442138672, |
| "loss": 0.6933, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.0038608014583587646, |
| "rewards/margins": -0.0006404545274563134, |
| "rewards/rejected": -0.003220347221940756, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.8463799091637724, |
| "grad_norm": 0.040192391723394394, |
| "learning_rate": 3.4434694900509345e-08, |
| "logits/chosen": -1.2798329591751099, |
| "logits/rejected": -1.2662111520767212, |
| "logps/chosen": -1.1712665557861328, |
| "logps/ref_chosen": -0.31277698278427124, |
| "logps/ref_rejected": -0.2908567786216736, |
| "logps/rejected": -1.3155272006988525, |
| "loss": 0.69, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008584895171225071, |
| "rewards/margins": 0.0016618092777207494, |
| "rewards/rejected": -0.010246704332530499, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.8485172321667112, |
| "grad_norm": 0.07344400882720947, |
| "learning_rate": 3.349364905389032e-08, |
| "logits/chosen": -1.2493705749511719, |
| "logits/rejected": -1.1473186016082764, |
| "logps/chosen": -2.1941111087799072, |
| "logps/ref_chosen": -0.26943594217300415, |
| "logps/ref_rejected": -0.27296656370162964, |
| "logps/rejected": -3.068364143371582, |
| "loss": 0.692, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.019246753305196762, |
| "rewards/margins": 0.008707225322723389, |
| "rewards/rejected": -0.027953976765275, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.85065455516965, |
| "grad_norm": 0.07030794024467468, |
| "learning_rate": 3.256471671107616e-08, |
| "logits/chosen": -1.0609214305877686, |
| "logits/rejected": -1.0746532678604126, |
| "logps/chosen": -2.17276930809021, |
| "logps/ref_chosen": -0.2870789170265198, |
| "logps/ref_rejected": -0.2589226961135864, |
| "logps/rejected": -1.9765245914459229, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.018856903538107872, |
| "rewards/margins": -0.0016808825312182307, |
| "rewards/rejected": -0.01717602089047432, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.8527918781725888, |
| "grad_norm": 0.0657213032245636, |
| "learning_rate": 3.1647949845717585e-08, |
| "logits/chosen": -0.9469838738441467, |
| "logits/rejected": -0.8936931490898132, |
| "logps/chosen": -1.1325814723968506, |
| "logps/ref_chosen": -0.2707745134830475, |
| "logps/ref_rejected": -0.33182671666145325, |
| "logps/rejected": -1.0249507427215576, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0086180679500103, |
| "rewards/margins": -0.0016868289094418287, |
| "rewards/rejected": -0.006931239739060402, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.8549292011755276, |
| "grad_norm": 0.02742362953722477, |
| "learning_rate": 3.074339975080836e-08, |
| "logits/chosen": -1.0643562078475952, |
| "logits/rejected": -1.0489563941955566, |
| "logps/chosen": -1.9609400033950806, |
| "logps/ref_chosen": -0.2782091796398163, |
| "logps/ref_rejected": -0.2605365812778473, |
| "logps/rejected": -2.2800099849700928, |
| "loss": 0.6905, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.016827307641506195, |
| "rewards/margins": 0.003367425175383687, |
| "rewards/rejected": -0.020194733515381813, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.8570665241784665, |
| "grad_norm": 0.15186117589473724, |
| "learning_rate": 2.98511170358155e-08, |
| "logits/chosen": -1.027389407157898, |
| "logits/rejected": -1.033107876777649, |
| "logps/chosen": -0.9600960612297058, |
| "logps/ref_chosen": -0.27743393182754517, |
| "logps/ref_rejected": -0.2859315574169159, |
| "logps/rejected": -0.9394850134849548, |
| "loss": 0.6937, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.006826621945947409, |
| "rewards/margins": -0.0002910876355599612, |
| "rewards/rejected": -0.00653553381562233, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.8592038471814053, |
| "grad_norm": 0.06097419932484627, |
| "learning_rate": 2.8971151623847584e-08, |
| "logits/chosen": -1.1736664772033691, |
| "logits/rejected": -1.103684902191162, |
| "logps/chosen": -2.0695838928222656, |
| "logps/ref_chosen": -0.2911103665828705, |
| "logps/ref_rejected": -0.30646640062332153, |
| "logps/rejected": -1.379057765007019, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.017784735187888145, |
| "rewards/margins": -0.007058821152895689, |
| "rewards/rejected": -0.010725914500653744, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.8613411701843441, |
| "grad_norm": 0.03248900920152664, |
| "learning_rate": 2.8103552748861475e-08, |
| "logits/chosen": -1.1433343887329102, |
| "logits/rejected": -1.140360951423645, |
| "logps/chosen": -1.5606733560562134, |
| "logps/ref_chosen": -0.26488226652145386, |
| "logps/ref_rejected": -0.2668915092945099, |
| "logps/rejected": -1.4508603811264038, |
| "loss": 0.6944, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.012957911938428879, |
| "rewards/margins": -0.001118223532103002, |
| "rewards/rejected": -0.011839688755571842, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.863478493187283, |
| "grad_norm": 0.05639756843447685, |
| "learning_rate": 2.724836895290805e-08, |
| "logits/chosen": -1.0716643333435059, |
| "logits/rejected": -0.9454800486564636, |
| "logps/chosen": -0.9249680042266846, |
| "logps/ref_chosen": -0.2456003725528717, |
| "logps/ref_rejected": -0.2471083700656891, |
| "logps/rejected": -2.290517568588257, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.006793675944209099, |
| "rewards/margins": 0.013640416786074638, |
| "rewards/rejected": -0.020434092730283737, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.8656158161902218, |
| "grad_norm": 0.057736948132514954, |
| "learning_rate": 2.6405648083415833e-08, |
| "logits/chosen": -1.2194430828094482, |
| "logits/rejected": -1.1069297790527344, |
| "logps/chosen": -1.4966850280761719, |
| "logps/ref_chosen": -0.2926192581653595, |
| "logps/ref_rejected": -0.3052845597267151, |
| "logps/rejected": -1.2467200756072998, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.012040657922625542, |
| "rewards/margins": -0.002626303117722273, |
| "rewards/rejected": -0.009414355270564556, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.8677531391931605, |
| "grad_norm": 0.06308147311210632, |
| "learning_rate": 2.55754372905142e-08, |
| "logits/chosen": -1.2207025289535522, |
| "logits/rejected": -1.1226931810379028, |
| "logps/chosen": -1.3449983596801758, |
| "logps/ref_chosen": -0.26276296377182007, |
| "logps/ref_rejected": -0.2825526297092438, |
| "logps/rejected": -1.1711039543151855, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.010822354815900326, |
| "rewards/margins": -0.0019368398934602737, |
| "rewards/rejected": -0.008885513991117477, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.8698904621960993, |
| "grad_norm": 0.06970781832933426, |
| "learning_rate": 2.475778302439524e-08, |
| "logits/chosen": -1.2031471729278564, |
| "logits/rejected": -1.2158570289611816, |
| "logps/chosen": -3.241205930709839, |
| "logps/ref_chosen": -0.3063742220401764, |
| "logps/ref_rejected": -0.3189111351966858, |
| "logps/rejected": -3.3646600246429443, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.02934831753373146, |
| "rewards/margins": 0.001109174219891429, |
| "rewards/rejected": -0.03045749105513096, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.8720277851990382, |
| "grad_norm": 0.053331565111875534, |
| "learning_rate": 2.3952731032714973e-08, |
| "logits/chosen": -0.9331076741218567, |
| "logits/rejected": -0.913826584815979, |
| "logps/chosen": -0.8843392133712769, |
| "logps/ref_chosen": -0.19724667072296143, |
| "logps/ref_rejected": -0.21685902774333954, |
| "logps/rejected": -2.6877570152282715, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.006870926357805729, |
| "rewards/margins": 0.017838051542639732, |
| "rewards/rejected": -0.024708978831768036, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.874165108201977, |
| "grad_norm": 0.10884372889995575, |
| "learning_rate": 2.3160326358033778e-08, |
| "logits/chosen": -1.0069117546081543, |
| "logits/rejected": -0.9372966885566711, |
| "logps/chosen": -1.5851794481277466, |
| "logps/ref_chosen": -0.2567218244075775, |
| "logps/ref_rejected": -0.2876638174057007, |
| "logps/rejected": -3.496063232421875, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.013284576125442982, |
| "rewards/margins": 0.018799416720867157, |
| "rewards/rejected": -0.03208399564027786, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.8763024312049158, |
| "grad_norm": 0.08395621925592422, |
| "learning_rate": 2.2380613335296033e-08, |
| "logits/chosen": -0.9971158504486084, |
| "logits/rejected": -1.0825163125991821, |
| "logps/chosen": -1.076045274734497, |
| "logps/ref_chosen": -0.26019197702407837, |
| "logps/ref_rejected": -0.26725149154663086, |
| "logps/rejected": -0.8471391797065735, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008158531971275806, |
| "rewards/margins": -0.0023596561513841152, |
| "rewards/rejected": -0.0057988762855529785, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.8784397542078547, |
| "grad_norm": 0.04935070127248764, |
| "learning_rate": 2.1613635589349756e-08, |
| "logits/chosen": -1.0364327430725098, |
| "logits/rejected": -1.0644334554672241, |
| "logps/chosen": -0.6886401176452637, |
| "logps/ref_chosen": -0.28133004903793335, |
| "logps/ref_rejected": -0.2933580279350281, |
| "logps/rejected": -0.7532072067260742, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.0040731001645326614, |
| "rewards/margins": 0.0005253910785540938, |
| "rewards/rejected": -0.004598491359502077, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.8805770772107935, |
| "grad_norm": 0.05942749232053757, |
| "learning_rate": 2.085943603250595e-08, |
| "logits/chosen": -1.026981234550476, |
| "logits/rejected": -0.9986696839332581, |
| "logps/chosen": -1.1557159423828125, |
| "logps/ref_chosen": -0.2736368775367737, |
| "logps/ref_rejected": -0.2570945620536804, |
| "logps/rejected": -2.048044443130493, |
| "loss": 0.6909, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.008820789866149426, |
| "rewards/margins": 0.009088706225156784, |
| "rewards/rejected": -0.017909498885273933, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.8827144002137323, |
| "grad_norm": 0.028082555159926414, |
| "learning_rate": 2.0118056862137354e-08, |
| "logits/chosen": -1.0706284046173096, |
| "logits/rejected": -0.9969926476478577, |
| "logps/chosen": -0.9348675012588501, |
| "logps/ref_chosen": -0.2700585126876831, |
| "logps/ref_rejected": -0.27900373935699463, |
| "logps/rejected": -0.9142702221870422, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.006648090668022633, |
| "rewards/margins": -0.0002954264055006206, |
| "rewards/rejected": -0.006352663971483707, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.8848517232166712, |
| "grad_norm": 0.043999720364809036, |
| "learning_rate": 1.938953955831771e-08, |
| "logits/chosen": -1.1807438135147095, |
| "logits/rejected": -1.1560091972351074, |
| "logps/chosen": -1.154266357421875, |
| "logps/ref_chosen": -0.2998018264770508, |
| "logps/ref_rejected": -0.33052897453308105, |
| "logps/rejected": -1.1802566051483154, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.008544646203517914, |
| "rewards/margins": -4.737001290777698e-05, |
| "rewards/rejected": -0.008497276343405247, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.88698904621961, |
| "grad_norm": 0.043411049991846085, |
| "learning_rate": 1.8673924881500823e-08, |
| "logits/chosen": -1.0801759958267212, |
| "logits/rejected": -1.0928469896316528, |
| "logps/chosen": -2.7921931743621826, |
| "logps/ref_chosen": -0.31348100304603577, |
| "logps/ref_rejected": -0.3117949068546295, |
| "logps/rejected": -4.0844550132751465, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.024787120521068573, |
| "rewards/margins": 0.012939481064677238, |
| "rewards/rejected": -0.03772660344839096, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.8891263692225487, |
| "grad_norm": 0.038843587040901184, |
| "learning_rate": 1.797125287024029e-08, |
| "logits/chosen": -1.1883503198623657, |
| "logits/rejected": -1.2091866731643677, |
| "logps/chosen": -1.8129465579986572, |
| "logps/ref_chosen": -0.30427277088165283, |
| "logps/ref_rejected": -0.2890283465385437, |
| "logps/rejected": -2.7354025840759277, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.015086738392710686, |
| "rewards/margins": 0.009377004578709602, |
| "rewards/rejected": -0.024463742971420288, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.8912636922254876, |
| "grad_norm": 0.10713820159435272, |
| "learning_rate": 1.7281562838948966e-08, |
| "logits/chosen": -1.0024110078811646, |
| "logits/rejected": -1.029348611831665, |
| "logps/chosen": -1.4553325176239014, |
| "logps/ref_chosen": -0.3092575967311859, |
| "logps/ref_rejected": -0.32750844955444336, |
| "logps/rejected": -1.2979540824890137, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.011460748501121998, |
| "rewards/margins": -0.0017562932334840298, |
| "rewards/rejected": -0.00970445480197668, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.8934010152284264, |
| "grad_norm": 0.0541224330663681, |
| "learning_rate": 1.6604893375699592e-08, |
| "logits/chosen": -1.233893871307373, |
| "logits/rejected": -1.1285375356674194, |
| "logps/chosen": -1.1181683540344238, |
| "logps/ref_chosen": -0.2698320746421814, |
| "logps/ref_rejected": -0.25865986943244934, |
| "logps/rejected": -1.2507156133651733, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.00848336424678564, |
| "rewards/margins": 0.0014371926663443446, |
| "rewards/rejected": -0.009920557960867882, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.8955383382313652, |
| "grad_norm": 0.07540567219257355, |
| "learning_rate": 1.5941282340065697e-08, |
| "logits/chosen": -1.2551161050796509, |
| "logits/rejected": -1.3349965810775757, |
| "logps/chosen": -0.9141122102737427, |
| "logps/ref_chosen": -0.28588783740997314, |
| "logps/ref_rejected": -0.3459934890270233, |
| "logps/rejected": -1.6550768613815308, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00628224341198802, |
| "rewards/margins": 0.006808590609580278, |
| "rewards/rejected": -0.013090834021568298, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.897675661234304, |
| "grad_norm": 0.06357910484075546, |
| "learning_rate": 1.5290766861003475e-08, |
| "logits/chosen": -1.0383198261260986, |
| "logits/rejected": -0.9995956420898438, |
| "logps/chosen": -0.6299840807914734, |
| "logps/ref_chosen": -0.25129714608192444, |
| "logps/ref_rejected": -0.23695197701454163, |
| "logps/rejected": -0.7673303484916687, |
| "loss": 0.6922, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.003786869114264846, |
| "rewards/margins": 0.0015169148100540042, |
| "rewards/rejected": -0.005303783807903528, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.8998129842372429, |
| "grad_norm": 0.13299192488193512, |
| "learning_rate": 1.4653383334774228e-08, |
| "logits/chosen": -1.1441679000854492, |
| "logits/rejected": -1.1827843189239502, |
| "logps/chosen": -2.2570748329162598, |
| "logps/ref_chosen": -0.2979227304458618, |
| "logps/ref_rejected": -0.309956431388855, |
| "logps/rejected": -2.515218496322632, |
| "loss": 0.6937, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.019591515883803368, |
| "rewards/margins": 0.0024610990658402443, |
| "rewards/rejected": -0.022052617743611336, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.9019503072401817, |
| "grad_norm": 0.04590640962123871, |
| "learning_rate": 1.4029167422908105e-08, |
| "logits/chosen": -1.2420191764831543, |
| "logits/rejected": -1.2000600099563599, |
| "logps/chosen": -0.9707568883895874, |
| "logps/ref_chosen": -0.3104054629802704, |
| "logps/ref_rejected": -0.3285520076751709, |
| "logps/rejected": -1.6886109113693237, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.006603513844311237, |
| "rewards/margins": 0.006997073534876108, |
| "rewards/rejected": -0.013600586913526058, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.9040876302431204, |
| "grad_norm": 0.036288484930992126, |
| "learning_rate": 1.3418154050208936e-08, |
| "logits/chosen": -1.0877618789672852, |
| "logits/rejected": -1.1105204820632935, |
| "logps/chosen": -1.2828643321990967, |
| "logps/ref_chosen": -0.2894713580608368, |
| "logps/ref_rejected": -0.29316088557243347, |
| "logps/rejected": -2.018519401550293, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00993392989039421, |
| "rewards/margins": 0.0073196557350456715, |
| "rewards/rejected": -0.017253585159778595, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.9062249532460593, |
| "grad_norm": 0.05584224686026573, |
| "learning_rate": 1.2820377402800064e-08, |
| "logits/chosen": -1.0266621112823486, |
| "logits/rejected": -0.84267258644104, |
| "logps/chosen": -2.756096839904785, |
| "logps/ref_chosen": -0.24305550754070282, |
| "logps/ref_rejected": -0.2626683712005615, |
| "logps/rejected": -4.52547550201416, |
| "loss": 0.689, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.025130413472652435, |
| "rewards/margins": 0.017497658729553223, |
| "rewards/rejected": -0.042628075927495956, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.9083622762489981, |
| "grad_norm": 0.08656321465969086, |
| "learning_rate": 1.2235870926211616e-08, |
| "logits/chosen": -1.0264123678207397, |
| "logits/rejected": -0.9960221648216248, |
| "logps/chosen": -2.2166051864624023, |
| "logps/ref_chosen": -0.2546856999397278, |
| "logps/ref_rejected": -0.24695128202438354, |
| "logps/rejected": -2.906306505203247, |
| "loss": 0.6915, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.01961919292807579, |
| "rewards/margins": 0.006974362302571535, |
| "rewards/rejected": -0.02659355290234089, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.9104995992519369, |
| "grad_norm": 0.028010496869683266, |
| "learning_rate": 1.1664667323509347e-08, |
| "logits/chosen": -1.1612226963043213, |
| "logits/rejected": -1.0272963047027588, |
| "logps/chosen": -0.868614912033081, |
| "logps/ref_chosen": -0.25770702958106995, |
| "logps/ref_rejected": -0.27996566891670227, |
| "logps/rejected": -0.8165216445922852, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.006109079346060753, |
| "rewards/margins": -0.0007435189327225089, |
| "rewards/rejected": -0.0053655607625842094, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.9126369222548758, |
| "grad_norm": 0.05501643940806389, |
| "learning_rate": 1.1106798553464802e-08, |
| "logits/chosen": -1.076944351196289, |
| "logits/rejected": -1.0348684787750244, |
| "logps/chosen": -0.7788200378417969, |
| "logps/ref_chosen": -0.27429747581481934, |
| "logps/ref_rejected": -0.26158803701400757, |
| "logps/rejected": -1.1152002811431885, |
| "loss": 0.691, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.005045225378125906, |
| "rewards/margins": 0.003490896662697196, |
| "rewards/rejected": -0.008536122739315033, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.9147742452578146, |
| "grad_norm": 0.0499616339802742, |
| "learning_rate": 1.0562295828767387e-08, |
| "logits/chosen": -1.1496565341949463, |
| "logits/rejected": -1.1493815183639526, |
| "logps/chosen": -1.1903436183929443, |
| "logps/ref_chosen": -0.28060171008110046, |
| "logps/ref_rejected": -0.3275516629219055, |
| "logps/rejected": -1.0847976207733154, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.009097418747842312, |
| "rewards/margins": -0.0015249579446390271, |
| "rewards/rejected": -0.007572460453957319, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.9169115682607534, |
| "grad_norm": 0.06182143837213516, |
| "learning_rate": 1.0031189614277763e-08, |
| "logits/chosen": -1.0761969089508057, |
| "logits/rejected": -1.0539543628692627, |
| "logps/chosen": -2.400468349456787, |
| "logps/ref_chosen": -0.22628960013389587, |
| "logps/ref_rejected": -0.2360939383506775, |
| "logps/rejected": -2.89908504486084, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.02174178697168827, |
| "rewards/margins": 0.0048881215043365955, |
| "rewards/rejected": -0.02662990801036358, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.9190488912636923, |
| "grad_norm": 0.06035654991865158, |
| "learning_rate": 9.513509625323518e-09, |
| "logits/chosen": -1.049296498298645, |
| "logits/rejected": -1.0529532432556152, |
| "logps/chosen": -0.8551061153411865, |
| "logps/ref_chosen": -0.2748366892337799, |
| "logps/ref_rejected": -0.24892383813858032, |
| "logps/rejected": -1.0484230518341064, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.005802694242447615, |
| "rewards/margins": 0.0021922974847257137, |
| "rewards/rejected": -0.007994991727173328, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.921186214266631, |
| "grad_norm": 0.07599455118179321, |
| "learning_rate": 9.009284826036689e-09, |
| "logits/chosen": -0.9848541617393494, |
| "logits/rejected": -1.023877501487732, |
| "logps/chosen": -1.2949349880218506, |
| "logps/ref_chosen": -0.2874469757080078, |
| "logps/ref_rejected": -0.2727859318256378, |
| "logps/rejected": -1.9414962530136108, |
| "loss": 0.6899, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.01007488090544939, |
| "rewards/margins": 0.006612222176045179, |
| "rewards/rejected": -0.016687102615833282, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.9233235372695698, |
| "grad_norm": 0.05161188170313835, |
| "learning_rate": 8.518543427732949e-09, |
| "logits/chosen": -1.0869096517562866, |
| "logits/rejected": -1.0737793445587158, |
| "logps/chosen": -1.4159889221191406, |
| "logps/ref_chosen": -0.22637943923473358, |
| "logps/ref_rejected": -0.23996470868587494, |
| "logps/rejected": -2.357701063156128, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.011896093375980854, |
| "rewards/margins": 0.00928126834332943, |
| "rewards/rejected": -0.02117736265063286, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.9254608602725087, |
| "grad_norm": 0.04050996154546738, |
| "learning_rate": 8.041312887333396e-09, |
| "logits/chosen": -1.119923710823059, |
| "logits/rejected": -1.0840022563934326, |
| "logps/chosen": -0.8473765254020691, |
| "logps/ref_chosen": -0.3231489658355713, |
| "logps/ref_rejected": -0.29785120487213135, |
| "logps/rejected": -1.1657902002334595, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.0052422755397856236, |
| "rewards/margins": 0.0034371137153357267, |
| "rewards/rejected": -0.008679389022290707, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.9275981832754475, |
| "grad_norm": 0.07983153313398361, |
| "learning_rate": 7.577619905828281e-09, |
| "logits/chosen": -1.146085262298584, |
| "logits/rejected": -1.0778329372406006, |
| "logps/chosen": -1.6529432535171509, |
| "logps/ref_chosen": -0.25406455993652344, |
| "logps/ref_rejected": -0.2319611757993698, |
| "logps/rejected": -1.7526359558105469, |
| "loss": 0.6895, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.013988783583045006, |
| "rewards/margins": 0.0012179624754935503, |
| "rewards/rejected": -0.015206746757030487, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.9297355062783863, |
| "grad_norm": 0.1202373206615448, |
| "learning_rate": 7.127490426783123e-09, |
| "logits/chosen": -1.201835036277771, |
| "logits/rejected": -1.1484363079071045, |
| "logps/chosen": -1.498607873916626, |
| "logps/ref_chosen": -0.281930148601532, |
| "logps/ref_rejected": -0.2764580249786377, |
| "logps/rejected": -1.8418561220169067, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01216677762567997, |
| "rewards/margins": 0.003487203037366271, |
| "rewards/rejected": -0.015653979033231735, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.9318728292813251, |
| "grad_norm": 0.04052572324872017, |
| "learning_rate": 6.6909496348871445e-09, |
| "logits/chosen": -1.27157461643219, |
| "logits/rejected": -1.2992945909500122, |
| "logps/chosen": -2.0145699977874756, |
| "logps/ref_chosen": -0.28414857387542725, |
| "logps/ref_rejected": -0.31018394231796265, |
| "logps/rejected": -1.9193394184112549, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01730421558022499, |
| "rewards/margins": -0.00121266208589077, |
| "rewards/rejected": -0.01609155349433422, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.934010152284264, |
| "grad_norm": 0.02827433869242668, |
| "learning_rate": 6.268021954544095e-09, |
| "logits/chosen": -1.0729405879974365, |
| "logits/rejected": -1.0769671201705933, |
| "logps/chosen": -0.5261660814285278, |
| "logps/ref_chosen": -0.3042512536048889, |
| "logps/ref_rejected": -0.27524396777153015, |
| "logps/rejected": -0.5675642490386963, |
| "loss": 0.6946, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.002219148213043809, |
| "rewards/margins": 0.0007040543132461607, |
| "rewards/rejected": -0.002923202933743596, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.9361474752872028, |
| "grad_norm": 0.18654611706733704, |
| "learning_rate": 5.858731048505927e-09, |
| "logits/chosen": -1.2085157632827759, |
| "logits/rejected": -1.2286968231201172, |
| "logps/chosen": -1.5443133115768433, |
| "logps/ref_chosen": -0.286636084318161, |
| "logps/ref_rejected": -0.32453712821006775, |
| "logps/rejected": -1.9652129411697388, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.012576771900057793, |
| "rewards/margins": 0.003829987719655037, |
| "rewards/rejected": -0.01640675961971283, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.9382847982901416, |
| "grad_norm": 0.04869319126009941, |
| "learning_rate": 5.463099816548577e-09, |
| "logits/chosen": -1.1957026720046997, |
| "logits/rejected": -1.1260743141174316, |
| "logps/chosen": -1.6731027364730835, |
| "logps/ref_chosen": -0.23255890607833862, |
| "logps/ref_rejected": -0.22799378633499146, |
| "logps/rejected": -2.5066137313842773, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.014405437745153904, |
| "rewards/margins": 0.008380758576095104, |
| "rewards/rejected": -0.022786198183894157, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.9404221212930804, |
| "grad_norm": 0.028870297595858574, |
| "learning_rate": 5.08115039419113e-09, |
| "logits/chosen": -1.0532033443450928, |
| "logits/rejected": -0.9745779633522034, |
| "logps/chosen": -1.6381572484970093, |
| "logps/ref_chosen": -0.25896158814430237, |
| "logps/ref_rejected": -0.26583969593048096, |
| "logps/rejected": -1.754570722579956, |
| "loss": 0.6927, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.013791956007480621, |
| "rewards/margins": 0.0010953551391139627, |
| "rewards/rejected": -0.014887310564517975, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.9425594442960192, |
| "grad_norm": 0.03678857162594795, |
| "learning_rate": 4.712904151456864e-09, |
| "logits/chosen": -1.062943935394287, |
| "logits/rejected": -1.0091686248779297, |
| "logps/chosen": -0.7011521458625793, |
| "logps/ref_chosen": -0.30668699741363525, |
| "logps/ref_rejected": -0.3110805153846741, |
| "logps/rejected": -0.6848558187484741, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.003944651689380407, |
| "rewards/margins": -0.00020689875236712396, |
| "rewards/rejected": -0.0037377530243247747, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.944696767298958, |
| "grad_norm": 0.031516749411821365, |
| "learning_rate": 4.358381691677931e-09, |
| "logits/chosen": -1.0623046159744263, |
| "logits/rejected": -1.0156131982803345, |
| "logps/chosen": -0.6341949701309204, |
| "logps/ref_chosen": -0.23815736174583435, |
| "logps/ref_rejected": -0.24365609884262085, |
| "logps/rejected": -0.6355026364326477, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.25, |
| "rewards/chosen": -0.0039603752084076405, |
| "rewards/margins": -4.190987237961963e-05, |
| "rewards/rejected": -0.003918466158211231, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.9468340903018969, |
| "grad_norm": 0.08616174012422562, |
| "learning_rate": 4.0176028503425826e-09, |
| "logits/chosen": -1.1188013553619385, |
| "logits/rejected": -1.0280470848083496, |
| "logps/chosen": -1.2545464038848877, |
| "logps/ref_chosen": -0.2857895493507385, |
| "logps/ref_rejected": -0.25111040472984314, |
| "logps/rejected": -1.0421061515808105, |
| "loss": 0.6914, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.009687569923698902, |
| "rewards/margins": -0.0017776124877855182, |
| "rewards/rejected": -0.007909957319498062, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.9489714133048357, |
| "grad_norm": 0.10123251378536224, |
| "learning_rate": 3.6905866939851983e-09, |
| "logits/chosen": -1.1717698574066162, |
| "logits/rejected": -1.080072283744812, |
| "logps/chosen": -0.639046847820282, |
| "logps/ref_chosen": -0.352128803730011, |
| "logps/ref_rejected": -0.3118590712547302, |
| "logps/rejected": -0.5797746181488037, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.002869180403649807, |
| "rewards/margins": -0.00019002471526619047, |
| "rewards/rejected": -0.0026791556738317013, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.9511087363077745, |
| "grad_norm": 0.035320959985256195, |
| "learning_rate": 3.3773515191196646e-09, |
| "logits/chosen": -1.1203659772872925, |
| "logits/rejected": -1.123307466506958, |
| "logps/chosen": -1.9944889545440674, |
| "logps/ref_chosen": -0.24146565794944763, |
| "logps/ref_rejected": -0.2422138750553131, |
| "logps/rejected": -2.1423873901367188, |
| "loss": 0.693, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.017530232667922974, |
| "rewards/margins": 0.0014715016586706042, |
| "rewards/rejected": -0.019001735374331474, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.9532460593107134, |
| "grad_norm": 0.0520508699119091, |
| "learning_rate": 3.077914851215585e-09, |
| "logits/chosen": -1.1985692977905273, |
| "logits/rejected": -1.1618348360061646, |
| "logps/chosen": -1.7318633794784546, |
| "logps/ref_chosen": -0.27540823817253113, |
| "logps/ref_rejected": -0.2923397123813629, |
| "logps/rejected": -1.8383948802947998, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.014564551413059235, |
| "rewards/margins": 0.0008959975675679743, |
| "rewards/rejected": -0.015460549853742123, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.9553833823136522, |
| "grad_norm": 0.0733427181839943, |
| "learning_rate": 2.7922934437178692e-09, |
| "logits/chosen": -1.0265856981277466, |
| "logits/rejected": -1.077622652053833, |
| "logps/chosen": -1.0291080474853516, |
| "logps/ref_chosen": -0.2614051401615143, |
| "logps/ref_rejected": -0.2726322114467621, |
| "logps/rejected": -0.9504073262214661, |
| "loss": 0.6891, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.007677028886973858, |
| "rewards/margins": -0.0008992779185064137, |
| "rewards/rejected": -0.0067777507938444614, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.957520705316591, |
| "grad_norm": 0.08417893201112747, |
| "learning_rate": 2.5205032771092592e-09, |
| "logits/chosen": -1.0855573415756226, |
| "logits/rejected": -1.0685391426086426, |
| "logps/chosen": -1.4116144180297852, |
| "logps/ref_chosen": -0.2881208062171936, |
| "logps/ref_rejected": -0.3019348680973053, |
| "logps/rejected": -2.4704976081848145, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.011234934441745281, |
| "rewards/margins": 0.010450691916048527, |
| "rewards/rejected": -0.021685628220438957, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.9596580283195298, |
| "grad_norm": 0.03422243148088455, |
| "learning_rate": 2.2625595580163247e-09, |
| "logits/chosen": -1.2020583152770996, |
| "logits/rejected": -1.2069993019104004, |
| "logps/chosen": -2.0264804363250732, |
| "logps/ref_chosen": -0.3250980079174042, |
| "logps/ref_rejected": -0.37241020798683167, |
| "logps/rejected": -3.3655900955200195, |
| "loss": 0.6917, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.017013821750879288, |
| "rewards/margins": 0.012917975895106792, |
| "rewards/rejected": -0.029931796714663506, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.9617953513224686, |
| "grad_norm": 0.1055985763669014, |
| "learning_rate": 2.0184767183584474e-09, |
| "logits/chosen": -0.861950159072876, |
| "logits/rejected": -0.9005047082901001, |
| "logps/chosen": -1.3926258087158203, |
| "logps/ref_chosen": -0.3365902006626129, |
| "logps/ref_rejected": -0.3642772138118744, |
| "logps/rejected": -1.3829231262207031, |
| "loss": 0.6926, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.010560356080532074, |
| "rewards/margins": -0.00037389714270830154, |
| "rewards/rejected": -0.010186458937823772, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.9639326743254074, |
| "grad_norm": 0.09978106617927551, |
| "learning_rate": 1.7882684145406612e-09, |
| "logits/chosen": -1.0659477710723877, |
| "logits/rejected": -1.0803520679473877, |
| "logps/chosen": -1.418735384941101, |
| "logps/ref_chosen": -0.28240862488746643, |
| "logps/ref_rejected": -0.267762690782547, |
| "logps/rejected": -1.3518282175064087, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.011363268829882145, |
| "rewards/margins": -0.0005226129433140159, |
| "rewards/rejected": -0.010840654373168945, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.9660699973283462, |
| "grad_norm": 0.07156708091497421, |
| "learning_rate": 1.5719475266893489e-09, |
| "logits/chosen": -1.1436063051223755, |
| "logits/rejected": -1.1482046842575073, |
| "logps/chosen": -1.4140851497650146, |
| "logps/ref_chosen": -0.25572752952575684, |
| "logps/ref_rejected": -0.2551599144935608, |
| "logps/rejected": -1.8835846185684204, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.01158357597887516, |
| "rewards/margins": 0.004700670950114727, |
| "rewards/rejected": -0.016284245997667313, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.9682073203312851, |
| "grad_norm": 0.10024421662092209, |
| "learning_rate": 1.3695261579316775e-09, |
| "logits/chosen": -1.2006874084472656, |
| "logits/rejected": -1.1288232803344727, |
| "logps/chosen": -2.8809053897857666, |
| "logps/ref_chosen": -0.2576255202293396, |
| "logps/ref_rejected": -0.2587803602218628, |
| "logps/rejected": -1.595051884651184, |
| "loss": 0.6924, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.026232797652482986, |
| "rewards/margins": -0.012870082631707191, |
| "rewards/rejected": -0.013362716883420944, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.9703446433342239, |
| "grad_norm": 0.07974204421043396, |
| "learning_rate": 1.1810156337183908e-09, |
| "logits/chosen": -1.1333831548690796, |
| "logits/rejected": -1.1090826988220215, |
| "logps/chosen": -2.223517656326294, |
| "logps/ref_chosen": -0.19759081304073334, |
| "logps/ref_rejected": -0.22725769877433777, |
| "logps/rejected": -2.4174184799194336, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.3125, |
| "rewards/chosen": -0.020259268581867218, |
| "rewards/margins": 0.0016423360211774707, |
| "rewards/rejected": -0.021901607513427734, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.9724819663371627, |
| "grad_norm": 0.04933435842394829, |
| "learning_rate": 1.0064265011902328e-09, |
| "logits/chosen": -1.103116512298584, |
| "logits/rejected": -1.0423328876495361, |
| "logps/chosen": -1.0515449047088623, |
| "logps/ref_chosen": -0.3091231882572174, |
| "logps/ref_rejected": -0.28202730417251587, |
| "logps/rejected": -1.0539557933807373, |
| "loss": 0.692, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.007424216251820326, |
| "rewards/margins": 0.0002950675261672586, |
| "rewards/rejected": -0.007719284389168024, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.9746192893401016, |
| "grad_norm": 0.05153544247150421, |
| "learning_rate": 8.457685285878091e-10, |
| "logits/chosen": -0.9670603275299072, |
| "logits/rejected": -1.051478385925293, |
| "logps/chosen": -1.4354450702667236, |
| "logps/ref_chosen": -0.32283252477645874, |
| "logps/ref_rejected": -0.32713887095451355, |
| "logps/rejected": -2.9762208461761475, |
| "loss": 0.6904, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.011126126162707806, |
| "rewards/margins": 0.015364693477749825, |
| "rewards/rejected": -0.026490818709135056, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.9767566123430403, |
| "grad_norm": 0.04315502569079399, |
| "learning_rate": 6.990507047049676e-10, |
| "logits/chosen": -1.256274938583374, |
| "logits/rejected": -1.4078019857406616, |
| "logps/chosen": -4.19973087310791, |
| "logps/ref_chosen": -0.29258519411087036, |
| "logps/ref_rejected": -0.2584461569786072, |
| "logps/rejected": -3.8421783447265625, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0390714555978775, |
| "rewards/margins": -0.0032341349869966507, |
| "rewards/rejected": -0.035837322473526, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.9788939353459791, |
| "grad_norm": 0.033967357128858566, |
| "learning_rate": 5.662812383859794e-10, |
| "logits/chosen": -1.1865086555480957, |
| "logits/rejected": -1.147991418838501, |
| "logps/chosen": -1.014259696006775, |
| "logps/ref_chosen": -0.289029061794281, |
| "logps/ref_rejected": -0.25405067205429077, |
| "logps/rejected": -1.5963501930236816, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0072523062117397785, |
| "rewards/margins": 0.006170689128339291, |
| "rewards/rejected": -0.013422995805740356, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.981031258348918, |
| "grad_norm": 0.04235478863120079, |
| "learning_rate": 4.4746755806621126e-10, |
| "logits/chosen": -1.1211308240890503, |
| "logits/rejected": -1.1855254173278809, |
| "logps/chosen": -1.5747071504592896, |
| "logps/ref_chosen": -0.261625200510025, |
| "logps/ref_rejected": -0.32258230447769165, |
| "logps/rejected": -2.0239381790161133, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01313081942498684, |
| "rewards/margins": 0.003882738295942545, |
| "rewards/rejected": -0.017013555392622948, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.9831685813518568, |
| "grad_norm": 0.0612514354288578, |
| "learning_rate": 3.4261631135654167e-10, |
| "logits/chosen": -0.964380145072937, |
| "logits/rejected": -0.8499566912651062, |
| "logps/chosen": -0.7069191336631775, |
| "logps/ref_chosen": -0.24211262166500092, |
| "logps/ref_rejected": -0.2225920855998993, |
| "logps/rejected": -0.9843603372573853, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.004648065660148859, |
| "rewards/margins": 0.002969617024064064, |
| "rewards/rejected": -0.007617682218551636, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.9853059043547956, |
| "grad_norm": 0.08188635855913162, |
| "learning_rate": 2.5173336467135263e-10, |
| "logits/chosen": -1.1875112056732178, |
| "logits/rejected": -1.0911164283752441, |
| "logps/chosen": -0.8793160915374756, |
| "logps/ref_chosen": -0.27605482935905457, |
| "logps/ref_rejected": -0.29801109433174133, |
| "logps/rejected": -0.8369953632354736, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.006032612174749374, |
| "rewards/margins": -0.0006427701446227729, |
| "rewards/rejected": -0.005389841739088297, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.9874432273577345, |
| "grad_norm": 0.05556642264127731, |
| "learning_rate": 1.7482380290034792e-10, |
| "logits/chosen": -1.1771061420440674, |
| "logits/rejected": -1.0971652269363403, |
| "logps/chosen": -2.168231725692749, |
| "logps/ref_chosen": -0.24617156386375427, |
| "logps/ref_rejected": -0.26906073093414307, |
| "logps/rejected": -2.434049129486084, |
| "loss": 0.6891, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.01922059990465641, |
| "rewards/margins": 0.0024292808957397938, |
| "rewards/rejected": -0.021649882197380066, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.9895805503606733, |
| "grad_norm": 0.05566013976931572, |
| "learning_rate": 1.1189192912416933e-10, |
| "logits/chosen": -1.1709370613098145, |
| "logits/rejected": -1.0920566320419312, |
| "logps/chosen": -1.544476866722107, |
| "logps/ref_chosen": -0.30340495705604553, |
| "logps/ref_rejected": -0.29229214787483215, |
| "logps/rejected": -1.696970820426941, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.012410718947649002, |
| "rewards/margins": 0.0016360683366656303, |
| "rewards/rejected": -0.014046786352992058, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.9917178733636121, |
| "grad_norm": 0.04849935322999954, |
| "learning_rate": 6.294126437336733e-11, |
| "logits/chosen": -1.1361756324768066, |
| "logits/rejected": -1.0911016464233398, |
| "logps/chosen": -1.3650732040405273, |
| "logps/ref_chosen": -0.2822812497615814, |
| "logps/ref_rejected": -0.2800726890563965, |
| "logps/rejected": -1.8009192943572998, |
| "loss": 0.6906, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.010827918536961079, |
| "rewards/margins": 0.004380547441542149, |
| "rewards/rejected": -0.015208465978503227, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.9938551963665508, |
| "grad_norm": 0.03441289812326431, |
| "learning_rate": 2.797454743164174e-11, |
| "logits/chosen": -1.2859561443328857, |
| "logits/rejected": -1.1582015752792358, |
| "logps/chosen": -1.7082535028457642, |
| "logps/ref_chosen": -0.26477229595184326, |
| "logps/ref_rejected": -0.2741115689277649, |
| "logps/rejected": -1.032767415046692, |
| "loss": 0.693, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.014434811659157276, |
| "rewards/margins": -0.006848253775388002, |
| "rewards/rejected": -0.007586558349430561, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.9959925193694897, |
| "grad_norm": 0.056417495012283325, |
| "learning_rate": 6.993734682547714e-12, |
| "logits/chosen": -1.0160553455352783, |
| "logits/rejected": -0.9553963541984558, |
| "logps/chosen": -1.0417840480804443, |
| "logps/ref_chosen": -0.2895504832267761, |
| "logps/ref_rejected": -0.2877947986125946, |
| "logps/rejected": -1.152309536933899, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.007522334810346365, |
| "rewards/margins": 0.001122812507674098, |
| "rewards/rejected": -0.00864514708518982, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.9981298423724285, |
| "grad_norm": 0.08919379115104675, |
| "learning_rate": 0.0, |
| "logits/chosen": -0.9465761184692383, |
| "logits/rejected": -0.9007689952850342, |
| "logps/chosen": -0.8224260807037354, |
| "logps/ref_chosen": -0.2377173751592636, |
| "logps/ref_rejected": -0.2431478500366211, |
| "logps/rejected": -0.8240893483161926, |
| "loss": 0.6899, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": -0.005847086198627949, |
| "rewards/margins": -3.767057933146134e-05, |
| "rewards/rejected": -0.005809415597468615, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.9981298423724285, |
| "step": 467, |
| "total_flos": 0.0, |
| "train_loss": 0.0, |
| "train_runtime": 0.0077, |
| "train_samples_per_second": 7740188.199, |
| "train_steps_per_second": 60369.228 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 467, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 32, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|