| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.998165137614679, |
| "eval_steps": 500, |
| "global_step": 221, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.004516584333098095, |
| "grad_norm": 17.491543776149005, |
| "learning_rate": 4.285714285714285e-08, |
| "logits/chosen": -3.703737258911133, |
| "logits/rejected": -3.642177104949951, |
| "logps/chosen": -230.21658325195312, |
| "logps/rejected": -213.08389282226562, |
| "loss": 0.8161, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.00903316866619619, |
| "grad_norm": 16.5951979105037, |
| "learning_rate": 8.57142857142857e-08, |
| "logits/chosen": -3.811067819595337, |
| "logits/rejected": -3.761146306991577, |
| "logps/chosen": -186.53829956054688, |
| "logps/rejected": -172.09280395507812, |
| "loss": 0.818, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.013549752999294284, |
| "grad_norm": 16.426043598595893, |
| "learning_rate": 1.2857142857142855e-07, |
| "logits/chosen": -3.673379898071289, |
| "logits/rejected": -3.574615955352783, |
| "logps/chosen": -185.69815063476562, |
| "logps/rejected": -167.53453063964844, |
| "loss": 0.8218, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": -0.00822072196751833, |
| "rewards/margins": -0.011108924634754658, |
| "rewards/rejected": 0.0028882024344056845, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01806633733239238, |
| "grad_norm": 17.182455484491946, |
| "learning_rate": 1.714285714285714e-07, |
| "logits/chosen": -3.6595711708068848, |
| "logits/rejected": -3.664757490158081, |
| "logps/chosen": -233.35186767578125, |
| "logps/rejected": -220.01324462890625, |
| "loss": 0.8183, |
| "rewards/accuracies": 0.453125, |
| "rewards/chosen": -0.0016412150580435991, |
| "rewards/margins": -0.0031568286940455437, |
| "rewards/rejected": 0.0015156148001551628, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.022582921665490474, |
| "grad_norm": 15.69002599516555, |
| "learning_rate": 2.1428571428571428e-07, |
| "logits/chosen": -3.727100133895874, |
| "logits/rejected": -3.737985134124756, |
| "logps/chosen": -190.59376525878906, |
| "logps/rejected": -175.35711669921875, |
| "loss": 0.8179, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.007597364019602537, |
| "rewards/margins": -0.0018714312463998795, |
| "rewards/rejected": -0.005725932773202658, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.02709950599858857, |
| "grad_norm": 15.612644086790919, |
| "learning_rate": 2.571428571428571e-07, |
| "logits/chosen": -3.6620445251464844, |
| "logits/rejected": -3.6660032272338867, |
| "logps/chosen": -184.26095581054688, |
| "logps/rejected": -162.46971130371094, |
| "loss": 0.8186, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.008165668696165085, |
| "rewards/margins": -0.008361553773283958, |
| "rewards/rejected": 0.00019588530994951725, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.031616090331686664, |
| "grad_norm": 13.683214125166593, |
| "learning_rate": 3e-07, |
| "logits/chosen": -3.690432071685791, |
| "logits/rejected": -3.5761916637420654, |
| "logps/chosen": -170.06161499023438, |
| "logps/rejected": -156.4132080078125, |
| "loss": 0.8176, |
| "rewards/accuracies": 0.359375, |
| "rewards/chosen": -0.00132226780988276, |
| "rewards/margins": -0.01818685233592987, |
| "rewards/rejected": 0.016864586621522903, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.03613267466478476, |
| "grad_norm": 16.4402451983829, |
| "learning_rate": 2.999838368626891e-07, |
| "logits/chosen": -3.741443157196045, |
| "logits/rejected": -3.670546293258667, |
| "logps/chosen": -196.43753051757812, |
| "logps/rejected": -175.54696655273438, |
| "loss": 0.8229, |
| "rewards/accuracies": 0.46875, |
| "rewards/chosen": -0.003386292140930891, |
| "rewards/margins": -0.005730946082621813, |
| "rewards/rejected": 0.002344653941690922, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.04064925899788285, |
| "grad_norm": 17.452034360978363, |
| "learning_rate": 2.9993535093404974e-07, |
| "logits/chosen": -3.6631717681884766, |
| "logits/rejected": -3.5741701126098633, |
| "logps/chosen": -229.43096923828125, |
| "logps/rejected": -198.76205444335938, |
| "loss": 0.8287, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": -0.007437766529619694, |
| "rewards/margins": -0.012869942933321, |
| "rewards/rejected": 0.005432176869362593, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.04516584333098095, |
| "grad_norm": 18.322329811535944, |
| "learning_rate": 2.998545526632117e-07, |
| "logits/chosen": -3.725301504135132, |
| "logits/rejected": -3.667342185974121, |
| "logps/chosen": -203.80462646484375, |
| "logps/rejected": -184.80245971679688, |
| "loss": 0.8243, |
| "rewards/accuracies": 0.390625, |
| "rewards/chosen": -0.009436231106519699, |
| "rewards/margins": -0.01625261828303337, |
| "rewards/rejected": 0.0068163881078362465, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04968242766407904, |
| "grad_norm": 18.170319693328302, |
| "learning_rate": 2.9974145946288874e-07, |
| "logits/chosen": -3.6613407135009766, |
| "logits/rejected": -3.6700329780578613, |
| "logps/chosen": -222.37948608398438, |
| "logps/rejected": -197.45956420898438, |
| "loss": 0.8201, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.00792229175567627, |
| "rewards/margins": -0.00381114287301898, |
| "rewards/rejected": -0.004111149813979864, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.05419901199717714, |
| "grad_norm": 16.73406724526876, |
| "learning_rate": 2.9959609570562665e-07, |
| "logits/chosen": -3.6817235946655273, |
| "logits/rejected": -3.6180002689361572, |
| "logps/chosen": -207.94598388671875, |
| "logps/rejected": -185.84361267089844, |
| "loss": 0.8223, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0013277027755975723, |
| "rewards/margins": -0.012387244962155819, |
| "rewards/rejected": 0.011059543117880821, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.05871559633027523, |
| "grad_norm": 14.830646302858986, |
| "learning_rate": 2.994184927185504e-07, |
| "logits/chosen": -3.5986547470092773, |
| "logits/rejected": -3.653524875640869, |
| "logps/chosen": -195.15597534179688, |
| "logps/rejected": -176.31295776367188, |
| "loss": 0.8152, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.0033681748900562525, |
| "rewards/margins": 0.006227727048099041, |
| "rewards/rejected": -0.002859552390873432, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.06323218066337333, |
| "grad_norm": 14.922261930025773, |
| "learning_rate": 2.9920868877661274e-07, |
| "logits/chosen": -3.749242067337036, |
| "logits/rejected": -3.669080972671509, |
| "logps/chosen": -187.9488525390625, |
| "logps/rejected": -171.089111328125, |
| "loss": 0.817, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.010204151272773743, |
| "rewards/margins": -0.0027150483801960945, |
| "rewards/rejected": -0.007489103823900223, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.06774876499647142, |
| "grad_norm": 16.851808937739815, |
| "learning_rate": 2.9896672909434605e-07, |
| "logits/chosen": -3.7500953674316406, |
| "logits/rejected": -3.6712448596954346, |
| "logps/chosen": -207.16380310058594, |
| "logps/rejected": -186.6913299560547, |
| "loss": 0.8188, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.008293930441141129, |
| "rewards/margins": -0.005823222920298576, |
| "rewards/rejected": -0.0024707079865038395, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.07226534932956952, |
| "grad_norm": 14.552003861923032, |
| "learning_rate": 2.986926658161179e-07, |
| "logits/chosen": -3.640725612640381, |
| "logits/rejected": -3.612764835357666, |
| "logps/chosen": -192.30868530273438, |
| "logps/rejected": -175.70521545410156, |
| "loss": 0.8232, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0005644555203616619, |
| "rewards/margins": -0.005334064364433289, |
| "rewards/rejected": 0.005898520816117525, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.07678193366266761, |
| "grad_norm": 16.088064283526844, |
| "learning_rate": 2.9838655800489354e-07, |
| "logits/chosen": -3.6310815811157227, |
| "logits/rejected": -3.6059412956237793, |
| "logps/chosen": -201.4388427734375, |
| "logps/rejected": -183.0606689453125, |
| "loss": 0.8124, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": 0.0131416916847229, |
| "rewards/margins": 0.013241738080978394, |
| "rewards/rejected": -0.00010004616342484951, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.0812985179957657, |
| "grad_norm": 14.992244007771152, |
| "learning_rate": 2.980484716295075e-07, |
| "logits/chosen": -3.733997344970703, |
| "logits/rejected": -3.698491096496582, |
| "logps/chosen": -188.22225952148438, |
| "logps/rejected": -173.397705078125, |
| "loss": 0.8156, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": 0.005783616099506617, |
| "rewards/margins": 0.006108994595706463, |
| "rewards/rejected": -0.00032537919469177723, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.0858151023288638, |
| "grad_norm": 19.12841459122326, |
| "learning_rate": 2.976784795504466e-07, |
| "logits/chosen": -3.6905295848846436, |
| "logits/rejected": -3.5900840759277344, |
| "logps/chosen": -203.93548583984375, |
| "logps/rejected": -177.45327758789062, |
| "loss": 0.8194, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.008039581589400768, |
| "rewards/margins": 0.008988430723547935, |
| "rewards/rejected": -0.01702801324427128, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.0903316866619619, |
| "grad_norm": 16.487134694988207, |
| "learning_rate": 2.972766615041477e-07, |
| "logits/chosen": -3.625434398651123, |
| "logits/rejected": -3.5620625019073486, |
| "logps/chosen": -230.413818359375, |
| "logps/rejected": -209.59951782226562, |
| "loss": 0.8031, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": 0.0009453308302909136, |
| "rewards/margins": 0.023260660469532013, |
| "rewards/rejected": -0.02231532707810402, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.09484827099505999, |
| "grad_norm": 18.422012266521907, |
| "learning_rate": 2.968431040858144e-07, |
| "logits/chosen": -3.6120433807373047, |
| "logits/rejected": -3.5835161209106445, |
| "logps/chosen": -179.35166931152344, |
| "logps/rejected": -174.52101135253906, |
| "loss": 0.8095, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.007007395848631859, |
| "rewards/margins": 0.009391836822032928, |
| "rewards/rejected": -0.016399234533309937, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.09936485532815809, |
| "grad_norm": 19.19498923375192, |
| "learning_rate": 2.963779007307544e-07, |
| "logits/chosen": -3.5445475578308105, |
| "logits/rejected": -3.5850906372070312, |
| "logps/chosen": -232.3138427734375, |
| "logps/rejected": -210.61709594726562, |
| "loss": 0.8196, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.0029985038563609123, |
| "rewards/margins": 0.020012138411402702, |
| "rewards/rejected": -0.017013631761074066, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.10388143966125618, |
| "grad_norm": 15.614857590390317, |
| "learning_rate": 2.958811516942438e-07, |
| "logits/chosen": -3.697209358215332, |
| "logits/rejected": -3.624351978302002, |
| "logps/chosen": -197.09347534179688, |
| "logps/rejected": -177.77401733398438, |
| "loss": 0.804, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": 0.0026644528843462467, |
| "rewards/margins": 0.028430193662643433, |
| "rewards/rejected": -0.025765739381313324, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.10839802399435428, |
| "grad_norm": 15.89381246255073, |
| "learning_rate": 2.953529640299211e-07, |
| "logits/chosen": -3.600754737854004, |
| "logits/rejected": -3.5322327613830566, |
| "logps/chosen": -228.12118530273438, |
| "logps/rejected": -209.07611083984375, |
| "loss": 0.8188, |
| "rewards/accuracies": 0.453125, |
| "rewards/chosen": -0.006520797498524189, |
| "rewards/margins": -0.0006878629792481661, |
| "rewards/rejected": -0.0058329347521066666, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.11291460832745237, |
| "grad_norm": 15.396893791413314, |
| "learning_rate": 2.947934515667162e-07, |
| "logits/chosen": -3.555856466293335, |
| "logits/rejected": -3.548595905303955, |
| "logps/chosen": -209.21588134765625, |
| "logps/rejected": -191.61837768554688, |
| "loss": 0.8118, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.0008083764696493745, |
| "rewards/margins": 0.019747722893953323, |
| "rewards/rejected": -0.020556099712848663, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.11743119266055047, |
| "grad_norm": 17.20718944198085, |
| "learning_rate": 2.9420273488431933e-07, |
| "logits/chosen": -3.8108675479888916, |
| "logits/rejected": -3.727308750152588, |
| "logps/chosen": -208.67276000976562, |
| "logps/rejected": -187.67626953125, |
| "loss": 0.8029, |
| "rewards/accuracies": 0.515625, |
| "rewards/chosen": -0.009361563250422478, |
| "rewards/margins": 0.012206509709358215, |
| "rewards/rejected": -0.021568071097135544, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.12194777699364856, |
| "grad_norm": 15.82300857880186, |
| "learning_rate": 2.9358094128719524e-07, |
| "logits/chosen": -3.725928544998169, |
| "logits/rejected": -3.6373510360717773, |
| "logps/chosen": -184.15992736816406, |
| "logps/rejected": -163.3759307861328, |
| "loss": 0.8106, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0011545311426743865, |
| "rewards/margins": 0.01487318892031908, |
| "rewards/rejected": -0.016027718782424927, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.12646436132674665, |
| "grad_norm": 17.170961024910188, |
| "learning_rate": 2.929282047771477e-07, |
| "logits/chosen": -3.7685747146606445, |
| "logits/rejected": -3.627628803253174, |
| "logps/chosen": -180.48487854003906, |
| "logps/rejected": -159.71482849121094, |
| "loss": 0.8028, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.006781768519431353, |
| "rewards/margins": 0.012908656150102615, |
| "rewards/rejected": -0.01969042420387268, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.13098094565984475, |
| "grad_norm": 16.359167675199306, |
| "learning_rate": 2.9224466602444125e-07, |
| "logits/chosen": -3.7441000938415527, |
| "logits/rejected": -3.6602091789245605, |
| "logps/chosen": -188.38925170898438, |
| "logps/rejected": -167.985595703125, |
| "loss": 0.8037, |
| "rewards/accuracies": 0.578125, |
| "rewards/chosen": -0.003988177981227636, |
| "rewards/margins": 0.01803458295762539, |
| "rewards/rejected": -0.022022761404514313, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.13549752999294284, |
| "grad_norm": 17.676336950925887, |
| "learning_rate": 2.9153047233748554e-07, |
| "logits/chosen": -3.643461227416992, |
| "logits/rejected": -3.617875814437866, |
| "logps/chosen": -225.23348999023438, |
| "logps/rejected": -212.8236541748047, |
| "loss": 0.8048, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": 0.004974519833922386, |
| "rewards/margins": 0.03113599866628647, |
| "rewards/rejected": -0.026161476969718933, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.14001411432604094, |
| "grad_norm": 14.414082201816626, |
| "learning_rate": 2.907857776310889e-07, |
| "logits/chosen": -3.666928291320801, |
| "logits/rejected": -3.638302803039551, |
| "logps/chosen": -191.73712158203125, |
| "logps/rejected": -166.03549194335938, |
| "loss": 0.8045, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": 0.004222148098051548, |
| "rewards/margins": 0.035250477492809296, |
| "rewards/rejected": -0.031028330326080322, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.14453069865913903, |
| "grad_norm": 15.20568282502077, |
| "learning_rate": 2.9001074239328855e-07, |
| "logits/chosen": -3.678450345993042, |
| "logits/rejected": -3.6584630012512207, |
| "logps/chosen": -189.22283935546875, |
| "logps/rejected": -172.16836547851562, |
| "loss": 0.8085, |
| "rewards/accuracies": 0.59375, |
| "rewards/chosen": -0.005335791036486626, |
| "rewards/margins": 0.0169003177434206, |
| "rewards/rejected": -0.022236105054616928, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.14904728299223713, |
| "grad_norm": 17.666411807362163, |
| "learning_rate": 2.892055336507641e-07, |
| "logits/chosen": -3.6488301753997803, |
| "logits/rejected": -3.6809892654418945, |
| "logps/chosen": -221.47088623046875, |
| "logps/rejected": -208.6617431640625, |
| "loss": 0.7961, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": 0.0049254028126597404, |
| "rewards/margins": 0.04181712493300438, |
| "rewards/rejected": -0.03689172491431236, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.15356386732533522, |
| "grad_norm": 15.904931812764481, |
| "learning_rate": 2.883703249328419e-07, |
| "logits/chosen": -3.7053463459014893, |
| "logits/rejected": -3.6859936714172363, |
| "logps/chosen": -193.99462890625, |
| "logps/rejected": -168.01174926757812, |
| "loss": 0.7982, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.005228930618613958, |
| "rewards/margins": 0.04429711773991585, |
| "rewards/rejected": -0.03906818851828575, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.15808045165843332, |
| "grad_norm": 17.574931019060468, |
| "learning_rate": 2.8750529623409767e-07, |
| "logits/chosen": -3.74350643157959, |
| "logits/rejected": -3.688340902328491, |
| "logps/chosen": -227.44448852539062, |
| "logps/rejected": -207.76531982421875, |
| "loss": 0.8001, |
| "rewards/accuracies": 0.609375, |
| "rewards/chosen": -0.013922490179538727, |
| "rewards/margins": 0.041697580367326736, |
| "rewards/rejected": -0.05562007054686546, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.1625970359915314, |
| "grad_norm": 16.144138203267183, |
| "learning_rate": 2.866106339755666e-07, |
| "logits/chosen": -3.6101019382476807, |
| "logits/rejected": -3.516798496246338, |
| "logps/chosen": -225.3680877685547, |
| "logps/rejected": -200.65811157226562, |
| "loss": 0.7982, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.00023476500064134598, |
| "rewards/margins": 0.043288152664899826, |
| "rewards/rejected": -0.043053388595581055, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.1671136203246295, |
| "grad_norm": 18.051062496264226, |
| "learning_rate": 2.856865309645679e-07, |
| "logits/chosen": -3.668738603591919, |
| "logits/rejected": -3.5682475566864014, |
| "logps/chosen": -216.2345733642578, |
| "logps/rejected": -190.3118896484375, |
| "loss": 0.7908, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": 0.005171060096472502, |
| "rewards/margins": 0.06707943975925446, |
| "rewards/rejected": -0.06190839037299156, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.1716302046577276, |
| "grad_norm": 15.250674643466992, |
| "learning_rate": 2.847331863531529e-07, |
| "logits/chosen": -3.6172351837158203, |
| "logits/rejected": -3.513965129852295, |
| "logps/chosen": -207.12973022460938, |
| "logps/rejected": -186.56016540527344, |
| "loss": 0.7973, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.00749462703242898, |
| "rewards/margins": 0.036814432591199875, |
| "rewards/rejected": -0.04430905729532242, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.1761467889908257, |
| "grad_norm": 13.727288814848803, |
| "learning_rate": 2.8375080559518633e-07, |
| "logits/chosen": -3.677856922149658, |
| "logits/rejected": -3.651683807373047, |
| "logps/chosen": -173.89752197265625, |
| "logps/rejected": -161.67294311523438, |
| "loss": 0.8033, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.005101449321955442, |
| "rewards/margins": 0.03479147329926491, |
| "rewards/rejected": -0.03989291936159134, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.1806633733239238, |
| "grad_norm": 16.031051528638532, |
| "learning_rate": 2.827396004020694e-07, |
| "logits/chosen": -3.657525062561035, |
| "logits/rejected": -3.6614298820495605, |
| "logps/chosen": -187.38975524902344, |
| "logps/rejected": -174.07693481445312, |
| "loss": 0.7971, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.006134797818958759, |
| "rewards/margins": 0.04029170051217079, |
| "rewards/rejected": -0.046426497399806976, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.1851799576570219, |
| "grad_norm": 14.72860095286963, |
| "learning_rate": 2.8169978869711385e-07, |
| "logits/chosen": -3.7374589443206787, |
| "logits/rejected": -3.656147003173828, |
| "logps/chosen": -187.12936401367188, |
| "logps/rejected": -161.41815185546875, |
| "loss": 0.787, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": 0.004431622102856636, |
| "rewards/margins": 0.05767218768596649, |
| "rewards/rejected": -0.053240567445755005, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.18969654199011998, |
| "grad_norm": 17.441579066359495, |
| "learning_rate": 2.806315945685779e-07, |
| "logits/chosen": -3.6515302658081055, |
| "logits/rejected": -3.551766872406006, |
| "logps/chosen": -250.11190795898438, |
| "logps/rejected": -221.34317016601562, |
| "loss": 0.7699, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": 0.006256776861846447, |
| "rewards/margins": 0.09309859573841095, |
| "rewards/rejected": -0.08684182167053223, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.19421312632321808, |
| "grad_norm": 17.048744298556095, |
| "learning_rate": 2.7953524822137317e-07, |
| "logits/chosen": -3.7105650901794434, |
| "logits/rejected": -3.6019039154052734, |
| "logps/chosen": -208.5770263671875, |
| "logps/rejected": -180.16976928710938, |
| "loss": 0.8027, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.018564769998192787, |
| "rewards/margins": 0.04158995673060417, |
| "rewards/rejected": -0.06015472859144211, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.19872971065631617, |
| "grad_norm": 16.16815408757425, |
| "learning_rate": 2.784109859274537e-07, |
| "logits/chosen": -3.597439765930176, |
| "logits/rejected": -3.6136202812194824, |
| "logps/chosen": -211.610107421875, |
| "logps/rejected": -191.22201538085938, |
| "loss": 0.7829, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.003241416532546282, |
| "rewards/margins": 0.07393845170736313, |
| "rewards/rejected": -0.07717986404895782, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.20324629498941427, |
| "grad_norm": 13.017137061508738, |
| "learning_rate": 2.7725904997489726e-07, |
| "logits/chosen": -3.613895893096924, |
| "logits/rejected": -3.6305315494537354, |
| "logps/chosen": -189.8942108154297, |
| "logps/rejected": -180.846923828125, |
| "loss": 0.7937, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.023781713098287582, |
| "rewards/margins": 0.04976597800850868, |
| "rewards/rejected": -0.07354769110679626, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.20776287932251236, |
| "grad_norm": 15.730598564063932, |
| "learning_rate": 2.760796886156901e-07, |
| "logits/chosen": -3.5601043701171875, |
| "logits/rejected": -3.5674729347229004, |
| "logps/chosen": -202.303466796875, |
| "logps/rejected": -192.0014190673828, |
| "loss": 0.7865, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.017426731064915657, |
| "rewards/margins": 0.05039919540286064, |
| "rewards/rejected": -0.06782592833042145, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.21227946365561046, |
| "grad_norm": 15.472698059079649, |
| "learning_rate": 2.748731560122267e-07, |
| "logits/chosen": -3.6035664081573486, |
| "logits/rejected": -3.539640426635742, |
| "logps/chosen": -221.04888916015625, |
| "logps/rejected": -198.54339599609375, |
| "loss": 0.7848, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.005871212109923363, |
| "rewards/margins": 0.07652122527360916, |
| "rewards/rejected": -0.08239243924617767, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.21679604798870855, |
| "grad_norm": 15.777479890984548, |
| "learning_rate": 2.7363971218253573e-07, |
| "logits/chosen": -3.6367340087890625, |
| "logits/rejected": -3.5720105171203613, |
| "logps/chosen": -206.6673583984375, |
| "logps/rejected": -191.620361328125, |
| "loss": 0.7834, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.0010449562687426805, |
| "rewards/margins": 0.07644650340080261, |
| "rewards/rejected": -0.07540154457092285, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.22131263232180665, |
| "grad_norm": 14.647917769515534, |
| "learning_rate": 2.7237962294424354e-07, |
| "logits/chosen": -3.742436408996582, |
| "logits/rejected": -3.583324432373047, |
| "logps/chosen": -216.07376098632812, |
| "logps/rejected": -191.45822143554688, |
| "loss": 0.7637, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": 0.0027800833340734243, |
| "rewards/margins": 0.09453913569450378, |
| "rewards/rejected": -0.09175905585289001, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.22582921665490474, |
| "grad_norm": 14.379641829000743, |
| "learning_rate": 2.7109315985728866e-07, |
| "logits/chosen": -3.4936299324035645, |
| "logits/rejected": -3.461601734161377, |
| "logps/chosen": -222.20791625976562, |
| "logps/rejected": -198.30593872070312, |
| "loss": 0.7946, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.01759425923228264, |
| "rewards/margins": 0.06354185938835144, |
| "rewards/rejected": -0.08113611489534378, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.23034580098800284, |
| "grad_norm": 15.536214345554205, |
| "learning_rate": 2.697806001653979e-07, |
| "logits/chosen": -3.656852960586548, |
| "logits/rejected": -3.5931718349456787, |
| "logps/chosen": -215.4611053466797, |
| "logps/rejected": -195.17086791992188, |
| "loss": 0.7702, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.012303248047828674, |
| "rewards/margins": 0.09312085807323456, |
| "rewards/rejected": -0.10542410612106323, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.23486238532110093, |
| "grad_norm": 15.948190715341653, |
| "learning_rate": 2.684422267363384e-07, |
| "logits/chosen": -3.658729076385498, |
| "logits/rejected": -3.5965871810913086, |
| "logps/chosen": -227.6315460205078, |
| "logps/rejected": -216.8878936767578, |
| "loss": 0.7749, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.02002471685409546, |
| "rewards/margins": 0.08053679019212723, |
| "rewards/rejected": -0.10056150704622269, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.23937896965419903, |
| "grad_norm": 13.3354410986771, |
| "learning_rate": 2.670783280009569e-07, |
| "logits/chosen": -3.5585010051727295, |
| "logits/rejected": -3.542996406555176, |
| "logps/chosen": -200.6866912841797, |
| "logps/rejected": -176.0266876220703, |
| "loss": 0.7969, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.016675246879458427, |
| "rewards/margins": 0.0597347766160965, |
| "rewards/rejected": -0.07641002535820007, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.24389555398729712, |
| "grad_norm": 15.37006073629648, |
| "learning_rate": 2.656891978910205e-07, |
| "logits/chosen": -3.5804214477539062, |
| "logits/rejected": -3.580320358276367, |
| "logps/chosen": -199.03363037109375, |
| "logps/rejected": -175.6981201171875, |
| "loss": 0.7826, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.022621821612119675, |
| "rewards/margins": 0.07916627079248428, |
| "rewards/rejected": -0.10178809612989426, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.24841213832039521, |
| "grad_norm": 13.977675083898662, |
| "learning_rate": 2.642751357758722e-07, |
| "logits/chosen": -3.6277871131896973, |
| "logits/rejected": -3.576045274734497, |
| "logps/chosen": -198.56690979003906, |
| "logps/rejected": -173.84307861328125, |
| "loss": 0.7798, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.014770936220884323, |
| "rewards/margins": 0.08849596232175827, |
| "rewards/rejected": -0.10326690226793289, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.2529287226534933, |
| "grad_norm": 15.0141286536347, |
| "learning_rate": 2.628364463979135e-07, |
| "logits/chosen": -3.591761589050293, |
| "logits/rejected": -3.6051506996154785, |
| "logps/chosen": -220.2593231201172, |
| "logps/rejected": -202.11480712890625, |
| "loss": 0.7609, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.004766255617141724, |
| "rewards/margins": 0.10991345345973969, |
| "rewards/rejected": -0.11467970907688141, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.2574453069865914, |
| "grad_norm": 16.32019401181327, |
| "learning_rate": 2.613734398069308e-07, |
| "logits/chosen": -3.6448159217834473, |
| "logits/rejected": -3.6120004653930664, |
| "logps/chosen": -218.3624267578125, |
| "logps/rejected": -206.45156860351562, |
| "loss": 0.7646, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.0081629678606987, |
| "rewards/margins": 0.10102861374616623, |
| "rewards/rejected": -0.10919158160686493, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.2619618913196895, |
| "grad_norm": 14.250044872308086, |
| "learning_rate": 2.598864312932762e-07, |
| "logits/chosen": -3.5635013580322266, |
| "logits/rejected": -3.560309886932373, |
| "logps/chosen": -199.04324340820312, |
| "logps/rejected": -181.06304931640625, |
| "loss": 0.7902, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.025674719363451004, |
| "rewards/margins": 0.06853548437356949, |
| "rewards/rejected": -0.09421020746231079, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.2664784756527876, |
| "grad_norm": 15.467967190395154, |
| "learning_rate": 2.5837574131992034e-07, |
| "logits/chosen": -3.590390205383301, |
| "logits/rejected": -3.6538496017456055, |
| "logps/chosen": -211.0101776123047, |
| "logps/rejected": -200.92840576171875, |
| "loss": 0.7626, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.022280972450971603, |
| "rewards/margins": 0.10610129684209824, |
| "rewards/rejected": -0.12838226556777954, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.2709950599858857, |
| "grad_norm": 15.203009992593772, |
| "learning_rate": 2.568416954533894e-07, |
| "logits/chosen": -3.687786102294922, |
| "logits/rejected": -3.6774373054504395, |
| "logps/chosen": -186.29849243164062, |
| "logps/rejected": -169.62168884277344, |
| "loss": 0.7785, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.018803317099809647, |
| "rewards/margins": 0.08361957967281342, |
| "rewards/rejected": -0.10242290794849396, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.2755116443189838, |
| "grad_norm": 14.57683761120013, |
| "learning_rate": 2.552846242936032e-07, |
| "logits/chosen": -3.638808250427246, |
| "logits/rejected": -3.6172327995300293, |
| "logps/chosen": -202.23802185058594, |
| "logps/rejected": -183.21786499023438, |
| "loss": 0.7644, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.009251074865460396, |
| "rewards/margins": 0.11388231813907623, |
| "rewards/rejected": -0.12313339859247208, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.2800282286520819, |
| "grad_norm": 14.071215109151332, |
| "learning_rate": 2.537048634026279e-07, |
| "logits/chosen": -3.652895927429199, |
| "logits/rejected": -3.5790305137634277, |
| "logps/chosen": -200.75323486328125, |
| "logps/rejected": -180.8776092529297, |
| "loss": 0.7716, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.02093246765434742, |
| "rewards/margins": 0.09524297714233398, |
| "rewards/rejected": -0.11617545038461685, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.28454481298518, |
| "grad_norm": 13.217637221004559, |
| "learning_rate": 2.521027532323594e-07, |
| "logits/chosen": -3.59419584274292, |
| "logits/rejected": -3.5415499210357666, |
| "logps/chosen": -196.58969116210938, |
| "logps/rejected": -181.43600463867188, |
| "loss": 0.7674, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.023986171931028366, |
| "rewards/margins": 0.10850539058446884, |
| "rewards/rejected": -0.1324915587902069, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.28906139731827807, |
| "grad_norm": 14.497006980804226, |
| "learning_rate": 2.5047863905115337e-07, |
| "logits/chosen": -3.5735766887664795, |
| "logits/rejected": -3.5254015922546387, |
| "logps/chosen": -198.43698120117188, |
| "logps/rejected": -177.77783203125, |
| "loss": 0.7642, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.018381193280220032, |
| "rewards/margins": 0.12361248582601547, |
| "rewards/rejected": -0.1419936716556549, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.29357798165137616, |
| "grad_norm": 14.857860546032539, |
| "learning_rate": 2.4883287086941666e-07, |
| "logits/chosen": -3.592167854309082, |
| "logits/rejected": -3.554619073867798, |
| "logps/chosen": -209.84280395507812, |
| "logps/rejected": -195.99557495117188, |
| "loss": 0.7711, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.04406602308154106, |
| "rewards/margins": 0.11150160431861877, |
| "rewards/rejected": -0.15556763112545013, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.29809456598447426, |
| "grad_norm": 15.7289147220562, |
| "learning_rate": 2.4716580336417735e-07, |
| "logits/chosen": -3.730616569519043, |
| "logits/rejected": -3.611698627471924, |
| "logps/chosen": -213.7239990234375, |
| "logps/rejected": -191.99624633789062, |
| "loss": 0.7641, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.04750160127878189, |
| "rewards/margins": 0.11192238330841064, |
| "rewards/rejected": -0.15942397713661194, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.30261115031757235, |
| "grad_norm": 16.553215826780214, |
| "learning_rate": 2.4547779580264873e-07, |
| "logits/chosen": -3.6770639419555664, |
| "logits/rejected": -3.6511921882629395, |
| "logps/chosen": -229.4193115234375, |
| "logps/rejected": -219.91082763671875, |
| "loss": 0.7634, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.07060261070728302, |
| "rewards/margins": 0.10048267990350723, |
| "rewards/rejected": -0.17108528316020966, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.30712773465067045, |
| "grad_norm": 14.535708064025933, |
| "learning_rate": 2.4376921196480405e-07, |
| "logits/chosen": -3.643744945526123, |
| "logits/rejected": -3.6282317638397217, |
| "logps/chosen": -215.79782104492188, |
| "logps/rejected": -195.50967407226562, |
| "loss": 0.7459, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.030901005491614342, |
| "rewards/margins": 0.15577057003974915, |
| "rewards/rejected": -0.18667156994342804, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.31164431898376854, |
| "grad_norm": 12.826587337871429, |
| "learning_rate": 2.420404200649791e-07, |
| "logits/chosen": -3.6386120319366455, |
| "logits/rejected": -3.6321802139282227, |
| "logps/chosen": -197.25970458984375, |
| "logps/rejected": -188.24197387695312, |
| "loss": 0.7671, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.08027191460132599, |
| "rewards/margins": 0.09604091197252274, |
| "rewards/rejected": -0.17631281912326813, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.31616090331686664, |
| "grad_norm": 16.418214717599835, |
| "learning_rate": 2.402917926725185e-07, |
| "logits/chosen": -3.6721415519714355, |
| "logits/rejected": -3.6725897789001465, |
| "logps/chosen": -216.7154541015625, |
| "logps/rejected": -201.8681640625, |
| "loss": 0.7291, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.029750004410743713, |
| "rewards/margins": 0.1923179030418396, |
| "rewards/rejected": -0.22206789255142212, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.32067748764996473, |
| "grad_norm": 13.215008697180764, |
| "learning_rate": 2.385237066314845e-07, |
| "logits/chosen": -3.565258026123047, |
| "logits/rejected": -3.553618907928467, |
| "logps/chosen": -197.5516815185547, |
| "logps/rejected": -184.250732421875, |
| "loss": 0.7722, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.050527218729257584, |
| "rewards/margins": 0.11302457749843597, |
| "rewards/rejected": -0.16355180740356445, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.3251940719830628, |
| "grad_norm": 13.956180499179757, |
| "learning_rate": 2.3673654297944303e-07, |
| "logits/chosen": -3.6860711574554443, |
| "logits/rejected": -3.6514902114868164, |
| "logps/chosen": -226.09010314941406, |
| "logps/rejected": -204.78941345214844, |
| "loss": 0.753, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.06019885092973709, |
| "rewards/margins": 0.1402389109134674, |
| "rewards/rejected": -0.2004377692937851, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.3297106563161609, |
| "grad_norm": 12.632161370394448, |
| "learning_rate": 2.3493068686534757e-07, |
| "logits/chosen": -3.6133785247802734, |
| "logits/rejected": -3.573826789855957, |
| "logps/chosen": -208.22662353515625, |
| "logps/rejected": -189.77066040039062, |
| "loss": 0.7492, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.055879779160022736, |
| "rewards/margins": 0.1519559770822525, |
| "rewards/rejected": -0.20783576369285583, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.334227240649259, |
| "grad_norm": 14.261595430312116, |
| "learning_rate": 2.3310652746653585e-07, |
| "logits/chosen": -3.6738648414611816, |
| "logits/rejected": -3.6415371894836426, |
| "logps/chosen": -185.10997009277344, |
| "logps/rejected": -167.96487426757812, |
| "loss": 0.7588, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.052835769951343536, |
| "rewards/margins": 0.1488420069217682, |
| "rewards/rejected": -0.20167775452136993, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.3387438249823571, |
| "grad_norm": 14.822381543756691, |
| "learning_rate": 2.312644579048592e-07, |
| "logits/chosen": -3.7603840827941895, |
| "logits/rejected": -3.660132646560669, |
| "logps/chosen": -209.13519287109375, |
| "logps/rejected": -186.57931518554688, |
| "loss": 0.748, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.07213892042636871, |
| "rewards/margins": 0.1440207064151764, |
| "rewards/rejected": -0.2161596417427063, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.3432604093154552, |
| "grad_norm": 14.275023304603453, |
| "learning_rate": 2.29404875161961e-07, |
| "logits/chosen": -3.6284379959106445, |
| "logits/rejected": -3.5983853340148926, |
| "logps/chosen": -222.89759826660156, |
| "logps/rejected": -200.91384887695312, |
| "loss": 0.7395, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.06927837431430817, |
| "rewards/margins": 0.18208304047584534, |
| "rewards/rejected": -0.2513614296913147, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.3477769936485533, |
| "grad_norm": 12.109768727947394, |
| "learning_rate": 2.2752817999372408e-07, |
| "logits/chosen": -3.717994213104248, |
| "logits/rejected": -3.6696221828460693, |
| "logps/chosen": -179.09075927734375, |
| "logps/rejected": -165.41148376464844, |
| "loss": 0.7619, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.07646910846233368, |
| "rewards/margins": 0.10589072108268738, |
| "rewards/rejected": -0.18235982954502106, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.3522935779816514, |
| "grad_norm": 12.334847197016632, |
| "learning_rate": 2.2563477684390454e-07, |
| "logits/chosen": -3.6596970558166504, |
| "logits/rejected": -3.7064521312713623, |
| "logps/chosen": -183.98486328125, |
| "logps/rejected": -175.17532348632812, |
| "loss": 0.7407, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.0660928264260292, |
| "rewards/margins": 0.15099170804023743, |
| "rewards/rejected": -0.21708452701568604, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.3568101623147495, |
| "grad_norm": 14.527926172073567, |
| "learning_rate": 2.2372507375697016e-07, |
| "logits/chosen": -3.6260461807250977, |
| "logits/rejected": -3.56558895111084, |
| "logps/chosen": -215.37002563476562, |
| "logps/rejected": -191.77987670898438, |
| "loss": 0.7351, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.04920428246259689, |
| "rewards/margins": 0.18218760192394257, |
| "rewards/rejected": -0.23139187693595886, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.3613267466478476, |
| "grad_norm": 13.953022020448985, |
| "learning_rate": 2.217994822901639e-07, |
| "logits/chosen": -3.6040546894073486, |
| "logits/rejected": -3.6221628189086914, |
| "logps/chosen": -223.6805419921875, |
| "logps/rejected": -201.2560577392578, |
| "loss": 0.7415, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.053364675492048264, |
| "rewards/margins": 0.1852804273366928, |
| "rewards/rejected": -0.23864510655403137, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.3658433309809457, |
| "grad_norm": 13.78052722074895, |
| "learning_rate": 2.1985841742480954e-07, |
| "logits/chosen": -3.5805296897888184, |
| "logits/rejected": -3.5260043144226074, |
| "logps/chosen": -215.5792236328125, |
| "logps/rejected": -199.66006469726562, |
| "loss": 0.7274, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.06636206805706024, |
| "rewards/margins": 0.1942358762025833, |
| "rewards/rejected": -0.26059794425964355, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.3703599153140438, |
| "grad_norm": 14.881670177581055, |
| "learning_rate": 2.1790229747687971e-07, |
| "logits/chosen": -3.739069938659668, |
| "logits/rejected": -3.669661283493042, |
| "logps/chosen": -239.0128173828125, |
| "logps/rejected": -217.56625366210938, |
| "loss": 0.7162, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.06281246244907379, |
| "rewards/margins": 0.22767522931098938, |
| "rewards/rejected": -0.290487676858902, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.37487649964714187, |
| "grad_norm": 13.702491862774862, |
| "learning_rate": 2.1593154400684523e-07, |
| "logits/chosen": -3.6899726390838623, |
| "logits/rejected": -3.5771546363830566, |
| "logps/chosen": -202.50872802734375, |
| "logps/rejected": -176.08013916015625, |
| "loss": 0.7315, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.08337001502513885, |
| "rewards/margins": 0.18461519479751587, |
| "rewards/rejected": -0.2679852247238159, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.37939308398023996, |
| "grad_norm": 13.984029281056696, |
| "learning_rate": 2.139465817288254e-07, |
| "logits/chosen": -3.7421321868896484, |
| "logits/rejected": -3.6175167560577393, |
| "logps/chosen": -190.1082000732422, |
| "logps/rejected": -175.02052307128906, |
| "loss": 0.7337, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.08382508903741837, |
| "rewards/margins": 0.17669573426246643, |
| "rewards/rejected": -0.2605208456516266, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.38390966831333806, |
| "grad_norm": 19.996335370988827, |
| "learning_rate": 2.1194783841905826e-07, |
| "logits/chosen": -3.5821313858032227, |
| "logits/rejected": -3.4785704612731934, |
| "logps/chosen": -230.62477111816406, |
| "logps/rejected": -208.86831665039062, |
| "loss": 0.7245, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.11116647720336914, |
| "rewards/margins": 0.19649800658226013, |
| "rewards/rejected": -0.3076644837856293, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.38842625264643615, |
| "grad_norm": 11.794852107515213, |
| "learning_rate": 2.0993574482371138e-07, |
| "logits/chosen": -3.557180643081665, |
| "logits/rejected": -3.5188608169555664, |
| "logps/chosen": -200.02206420898438, |
| "logps/rejected": -186.23666381835938, |
| "loss": 0.7401, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.09516202658414841, |
| "rewards/margins": 0.18562006950378418, |
| "rewards/rejected": -0.2807821035385132, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.39294283697953425, |
| "grad_norm": 15.110048854614613, |
| "learning_rate": 2.0791073456605222e-07, |
| "logits/chosen": -3.6332998275756836, |
| "logits/rejected": -3.6093335151672363, |
| "logps/chosen": -247.05874633789062, |
| "logps/rejected": -224.82608032226562, |
| "loss": 0.6953, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.08461566269397736, |
| "rewards/margins": 0.2697482705116272, |
| "rewards/rejected": -0.35436391830444336, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.39745942131263234, |
| "grad_norm": 12.45488028971463, |
| "learning_rate": 2.058732440529989e-07, |
| "logits/chosen": -3.7330398559570312, |
| "logits/rejected": -3.5911219120025635, |
| "logps/chosen": -211.71954345703125, |
| "logps/rejected": -190.85597229003906, |
| "loss": 0.733, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.11091723293066025, |
| "rewards/margins": 0.17457152903079987, |
| "rewards/rejected": -0.2854887545108795, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.40197600564573044, |
| "grad_norm": 13.342737944026238, |
| "learning_rate": 2.0382371238107038e-07, |
| "logits/chosen": -3.7038931846618652, |
| "logits/rejected": -3.630575656890869, |
| "logps/chosen": -217.16465759277344, |
| "logps/rejected": -195.84320068359375, |
| "loss": 0.717, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.08662399649620056, |
| "rewards/margins": 0.21940693259239197, |
| "rewards/rejected": -0.30603092908859253, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.40649258997882853, |
| "grad_norm": 12.805765983733862, |
| "learning_rate": 2.0176258124175791e-07, |
| "logits/chosen": -3.5431618690490723, |
| "logits/rejected": -3.5177979469299316, |
| "logps/chosen": -208.36294555664062, |
| "logps/rejected": -196.33544921875, |
| "loss": 0.7397, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.1321878284215927, |
| "rewards/margins": 0.17401957511901855, |
| "rewards/rejected": -0.30620741844177246, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.41100917431192663, |
| "grad_norm": 14.324921342449116, |
| "learning_rate": 1.996902948263364e-07, |
| "logits/chosen": -3.6215481758117676, |
| "logits/rejected": -3.5871336460113525, |
| "logps/chosen": -218.2590789794922, |
| "logps/rejected": -202.82135009765625, |
| "loss": 0.7184, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.06889334321022034, |
| "rewards/margins": 0.2213161736726761, |
| "rewards/rejected": -0.2902095317840576, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.4155257586450247, |
| "grad_norm": 13.522825965617656, |
| "learning_rate": 1.9760729973013756e-07, |
| "logits/chosen": -3.5652565956115723, |
| "logits/rejected": -3.559969902038574, |
| "logps/chosen": -216.3070068359375, |
| "logps/rejected": -207.52525329589844, |
| "loss": 0.7279, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.12052971869707108, |
| "rewards/margins": 0.19747133553028107, |
| "rewards/rejected": -0.31800103187561035, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.4200423429781228, |
| "grad_norm": 14.17056179163305, |
| "learning_rate": 1.9551404485630487e-07, |
| "logits/chosen": -3.648214101791382, |
| "logits/rejected": -3.633265972137451, |
| "logps/chosen": -228.32022094726562, |
| "logps/rejected": -216.98306274414062, |
| "loss": 0.712, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.09251593053340912, |
| "rewards/margins": 0.24657899141311646, |
| "rewards/rejected": -0.33909493684768677, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.4245589273112209, |
| "grad_norm": 12.41603788591784, |
| "learning_rate": 1.9341098131905102e-07, |
| "logits/chosen": -3.563978672027588, |
| "logits/rejected": -3.575545310974121, |
| "logps/chosen": -197.626708984375, |
| "logps/rejected": -183.598876953125, |
| "loss": 0.7199, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.06666558235883713, |
| "rewards/margins": 0.22736752033233643, |
| "rewards/rejected": -0.29403308033943176, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.429075511644319, |
| "grad_norm": 13.43017680491703, |
| "learning_rate": 1.91298562346439e-07, |
| "logits/chosen": -3.5415198802948, |
| "logits/rejected": -3.423177480697632, |
| "logps/chosen": -210.36868286132812, |
| "logps/rejected": -193.19737243652344, |
| "loss": 0.7208, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.13729974627494812, |
| "rewards/margins": 0.20369592308998108, |
| "rewards/rejected": -0.3409956693649292, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.4335920959774171, |
| "grad_norm": 13.192586910237464, |
| "learning_rate": 1.8917724318270764e-07, |
| "logits/chosen": -3.6500909328460693, |
| "logits/rejected": -3.641021728515625, |
| "logps/chosen": -212.2454833984375, |
| "logps/rejected": -195.04710388183594, |
| "loss": 0.7359, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.13967543840408325, |
| "rewards/margins": 0.19579245150089264, |
| "rewards/rejected": -0.3354678750038147, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.4381086803105152, |
| "grad_norm": 14.631006890045864, |
| "learning_rate": 1.8704748099016263e-07, |
| "logits/chosen": -3.5358424186706543, |
| "logits/rejected": -3.492499828338623, |
| "logps/chosen": -226.14268493652344, |
| "logps/rejected": -209.61166381835938, |
| "loss": 0.7276, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.12765762209892273, |
| "rewards/margins": 0.21874003112316132, |
| "rewards/rejected": -0.34639766812324524, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.4426252646436133, |
| "grad_norm": 12.620333608491627, |
| "learning_rate": 1.8490973475065407e-07, |
| "logits/chosen": -3.62363862991333, |
| "logits/rejected": -3.558384656906128, |
| "logps/chosen": -201.33468627929688, |
| "logps/rejected": -187.42794799804688, |
| "loss": 0.735, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.10830561816692352, |
| "rewards/margins": 0.19010263681411743, |
| "rewards/rejected": -0.29840826988220215, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.4471418489767114, |
| "grad_norm": 11.634459384049228, |
| "learning_rate": 1.8276446516666194e-07, |
| "logits/chosen": -3.564702033996582, |
| "logits/rejected": -3.4371743202209473, |
| "logps/chosen": -203.50912475585938, |
| "logps/rejected": -186.2063446044922, |
| "loss": 0.7276, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.12979203462600708, |
| "rewards/margins": 0.19052280485630035, |
| "rewards/rejected": -0.32031482458114624, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.4516584333098095, |
| "grad_norm": 14.33489341500007, |
| "learning_rate": 1.806121345620111e-07, |
| "logits/chosen": -3.5418810844421387, |
| "logits/rejected": -3.465827703475952, |
| "logps/chosen": -222.30589294433594, |
| "logps/rejected": -204.18182373046875, |
| "loss": 0.7318, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.15671955049037933, |
| "rewards/margins": 0.2025194764137268, |
| "rewards/rejected": -0.35923904180526733, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4561750176429076, |
| "grad_norm": 12.379106375796471, |
| "learning_rate": 1.7845320678223614e-07, |
| "logits/chosen": -3.572160482406616, |
| "logits/rejected": -3.4780170917510986, |
| "logps/chosen": -204.60537719726562, |
| "logps/rejected": -188.61119079589844, |
| "loss": 0.7315, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.11631282418966293, |
| "rewards/margins": 0.19585317373275757, |
| "rewards/rejected": -0.3121660053730011, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.46069160197600567, |
| "grad_norm": 13.428841184771063, |
| "learning_rate": 1.7628814709461914e-07, |
| "logits/chosen": -3.4720003604888916, |
| "logits/rejected": -3.541748523712158, |
| "logps/chosen": -225.80712890625, |
| "logps/rejected": -213.91180419921875, |
| "loss": 0.7084, |
| "rewards/accuracies": 0.921875, |
| "rewards/chosen": -0.12924836575984955, |
| "rewards/margins": 0.276883989572525, |
| "rewards/rejected": -0.40613240003585815, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.46520818630910377, |
| "grad_norm": 12.642202001549919, |
| "learning_rate": 1.7411742208792024e-07, |
| "logits/chosen": -3.6850690841674805, |
| "logits/rejected": -3.545577049255371, |
| "logps/chosen": -225.94509887695312, |
| "logps/rejected": -193.059326171875, |
| "loss": 0.7084, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.11674878001213074, |
| "rewards/margins": 0.25400400161743164, |
| "rewards/rejected": -0.3707527816295624, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.46972477064220186, |
| "grad_norm": 11.73048532713264, |
| "learning_rate": 1.7194149957182414e-07, |
| "logits/chosen": -3.5629310607910156, |
| "logits/rejected": -3.5668156147003174, |
| "logps/chosen": -171.92477416992188, |
| "logps/rejected": -165.56927490234375, |
| "loss": 0.7396, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.1383664906024933, |
| "rewards/margins": 0.1683700531721115, |
| "rewards/rejected": -0.3067365288734436, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.47424135497529996, |
| "grad_norm": 12.673432432328502, |
| "learning_rate": 1.6976084847612282e-07, |
| "logits/chosen": -3.525435447692871, |
| "logits/rejected": -3.4939839839935303, |
| "logps/chosen": -206.1438751220703, |
| "logps/rejected": -188.24546813964844, |
| "loss": 0.722, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.13165442645549774, |
| "rewards/margins": 0.2146846055984497, |
| "rewards/rejected": -0.34633904695510864, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.47875793930839805, |
| "grad_norm": 12.032890874796571, |
| "learning_rate": 1.6757593874965754e-07, |
| "logits/chosen": -3.5656533241271973, |
| "logits/rejected": -3.527076005935669, |
| "logps/chosen": -197.67202758789062, |
| "logps/rejected": -181.7797088623047, |
| "loss": 0.7251, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.0992819219827652, |
| "rewards/margins": 0.225886732339859, |
| "rewards/rejected": -0.325168639421463, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.48327452364149615, |
| "grad_norm": 12.310976191149708, |
| "learning_rate": 1.6538724125904051e-07, |
| "logits/chosen": -3.686993360519409, |
| "logits/rejected": -3.6414313316345215, |
| "logps/chosen": -204.51181030273438, |
| "logps/rejected": -196.01507568359375, |
| "loss": 0.7234, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.12974955141544342, |
| "rewards/margins": 0.21487677097320557, |
| "rewards/rejected": -0.3446263074874878, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.48779110797459424, |
| "grad_norm": 12.199852142739852, |
| "learning_rate": 1.6319522768717944e-07, |
| "logits/chosen": -3.6431784629821777, |
| "logits/rejected": -3.561030864715576, |
| "logps/chosen": -199.9854278564453, |
| "logps/rejected": -181.25628662109375, |
| "loss": 0.7371, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.11536161601543427, |
| "rewards/margins": 0.2128218412399292, |
| "rewards/rejected": -0.32818344235420227, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.49230769230769234, |
| "grad_norm": 13.488082140342616, |
| "learning_rate": 1.610003704316256e-07, |
| "logits/chosen": -3.7115769386291504, |
| "logits/rejected": -3.6150527000427246, |
| "logps/chosen": -206.2103271484375, |
| "logps/rejected": -184.147705078125, |
| "loss": 0.6968, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.07892445474863052, |
| "rewards/margins": 0.2777579128742218, |
| "rewards/rejected": -0.35668236017227173, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.49682427664079043, |
| "grad_norm": 12.799344796933894, |
| "learning_rate": 1.5880314250276833e-07, |
| "logits/chosen": -3.6075048446655273, |
| "logits/rejected": -3.451253652572632, |
| "logps/chosen": -212.31695556640625, |
| "logps/rejected": -189.007080078125, |
| "loss": 0.723, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.1649230718612671, |
| "rewards/margins": 0.23563840985298157, |
| "rewards/rejected": -0.40056151151657104, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5013408609738885, |
| "grad_norm": 11.195885489420343, |
| "learning_rate": 1.5660401742189716e-07, |
| "logits/chosen": -3.6232829093933105, |
| "logits/rejected": -3.517642021179199, |
| "logps/chosen": -196.2974090576172, |
| "logps/rejected": -181.78778076171875, |
| "loss": 0.7163, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.11609620600938797, |
| "rewards/margins": 0.24031955003738403, |
| "rewards/rejected": -0.3564157783985138, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.5058574453069866, |
| "grad_norm": 13.462764100267743, |
| "learning_rate": 1.5440346911915413e-07, |
| "logits/chosen": -3.5703439712524414, |
| "logits/rejected": -3.5258054733276367, |
| "logps/chosen": -197.42391967773438, |
| "logps/rejected": -182.4862060546875, |
| "loss": 0.707, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.13110296428203583, |
| "rewards/margins": 0.25373634696006775, |
| "rewards/rejected": -0.3848392963409424, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.5103740296400847, |
| "grad_norm": 11.216776131932745, |
| "learning_rate": 1.522019718313975e-07, |
| "logits/chosen": -3.5310792922973633, |
| "logits/rejected": -3.565998077392578, |
| "logps/chosen": -209.51132202148438, |
| "logps/rejected": -189.17047119140625, |
| "loss": 0.7095, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.12987811863422394, |
| "rewards/margins": 0.2518337368965149, |
| "rewards/rejected": -0.38171184062957764, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.5148906139731828, |
| "grad_norm": 13.293761430898734, |
| "learning_rate": 1.5e-07, |
| "logits/chosen": -3.7002620697021484, |
| "logits/rejected": -3.6795387268066406, |
| "logps/chosen": -213.72909545898438, |
| "logps/rejected": -196.23594665527344, |
| "loss": 0.713, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.13611683249473572, |
| "rewards/margins": 0.2516591548919678, |
| "rewards/rejected": -0.3877760171890259, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.5194071983062809, |
| "grad_norm": 11.607463785941228, |
| "learning_rate": 1.4779802816860252e-07, |
| "logits/chosen": -3.5893545150756836, |
| "logits/rejected": -3.5178956985473633, |
| "logps/chosen": -208.91015625, |
| "logps/rejected": -186.68402099609375, |
| "loss": 0.6884, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.06911865621805191, |
| "rewards/margins": 0.3234255909919739, |
| "rewards/rejected": -0.3925442397594452, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.523923782639379, |
| "grad_norm": 11.17744384668911, |
| "learning_rate": 1.4559653088084589e-07, |
| "logits/chosen": -3.5923399925231934, |
| "logits/rejected": -3.5601184368133545, |
| "logps/chosen": -197.11102294921875, |
| "logps/rejected": -182.57537841796875, |
| "loss": 0.7113, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.17773228883743286, |
| "rewards/margins": 0.2376946210861206, |
| "rewards/rejected": -0.41542690992355347, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.5284403669724771, |
| "grad_norm": 12.324943566684535, |
| "learning_rate": 1.4339598257810283e-07, |
| "logits/chosen": -3.4791109561920166, |
| "logits/rejected": -3.528164863586426, |
| "logps/chosen": -203.9499969482422, |
| "logps/rejected": -191.4123077392578, |
| "loss": 0.7141, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.1312122344970703, |
| "rewards/margins": 0.24087485671043396, |
| "rewards/rejected": -0.3720870912075043, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.5329569513055752, |
| "grad_norm": 13.355110060788324, |
| "learning_rate": 1.411968574972317e-07, |
| "logits/chosen": -3.4863524436950684, |
| "logits/rejected": -3.5186939239501953, |
| "logps/chosen": -211.86410522460938, |
| "logps/rejected": -196.56219482421875, |
| "loss": 0.7007, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.09455064684152603, |
| "rewards/margins": 0.29401230812072754, |
| "rewards/rejected": -0.38856297731399536, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.5374735356386733, |
| "grad_norm": 11.843733502428305, |
| "learning_rate": 1.3899962956837443e-07, |
| "logits/chosen": -3.5447893142700195, |
| "logits/rejected": -3.5373752117156982, |
| "logps/chosen": -217.02601623535156, |
| "logps/rejected": -195.690185546875, |
| "loss": 0.6888, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.11182112246751785, |
| "rewards/margins": 0.3210294544696808, |
| "rewards/rejected": -0.43285059928894043, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.5419901199717714, |
| "grad_norm": 12.77724705920504, |
| "learning_rate": 1.3680477231282058e-07, |
| "logits/chosen": -3.6904451847076416, |
| "logits/rejected": -3.648146629333496, |
| "logps/chosen": -174.260009765625, |
| "logps/rejected": -160.5956573486328, |
| "loss": 0.7273, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.14862585067749023, |
| "rewards/margins": 0.2124842405319214, |
| "rewards/rejected": -0.3611100912094116, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5465067043048695, |
| "grad_norm": 11.150050819573732, |
| "learning_rate": 1.346127587409595e-07, |
| "logits/chosen": -3.6496615409851074, |
| "logits/rejected": -3.544325828552246, |
| "logps/chosen": -199.09524536132812, |
| "logps/rejected": -178.0385284423828, |
| "loss": 0.6964, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.13068127632141113, |
| "rewards/margins": 0.2756379246711731, |
| "rewards/rejected": -0.40631920099258423, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.5510232886379676, |
| "grad_norm": 12.080724661826311, |
| "learning_rate": 1.3242406125034247e-07, |
| "logits/chosen": -3.6039113998413086, |
| "logits/rejected": -3.4897897243499756, |
| "logps/chosen": -230.91986083984375, |
| "logps/rejected": -215.7164764404297, |
| "loss": 0.7039, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.20738250017166138, |
| "rewards/margins": 0.24289953708648682, |
| "rewards/rejected": -0.4502820372581482, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.5555398729710657, |
| "grad_norm": 11.919256579608561, |
| "learning_rate": 1.302391515238772e-07, |
| "logits/chosen": -3.5706183910369873, |
| "logits/rejected": -3.552396774291992, |
| "logps/chosen": -213.68630981445312, |
| "logps/rejected": -193.32818603515625, |
| "loss": 0.6788, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.1757589429616928, |
| "rewards/margins": 0.32006677985191345, |
| "rewards/rejected": -0.49582570791244507, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.5600564573041638, |
| "grad_norm": 12.629389606369296, |
| "learning_rate": 1.280585004281759e-07, |
| "logits/chosen": -3.539670944213867, |
| "logits/rejected": -3.535710334777832, |
| "logps/chosen": -206.27227783203125, |
| "logps/rejected": -193.86167907714844, |
| "loss": 0.7106, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.16979312896728516, |
| "rewards/margins": 0.2596694827079773, |
| "rewards/rejected": -0.42946261167526245, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.5645730416372619, |
| "grad_norm": 11.591449990368316, |
| "learning_rate": 1.2588257791207977e-07, |
| "logits/chosen": -3.59249210357666, |
| "logits/rejected": -3.588090419769287, |
| "logps/chosen": -210.10736083984375, |
| "logps/rejected": -194.91561889648438, |
| "loss": 0.6973, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.17436593770980835, |
| "rewards/margins": 0.2836093306541443, |
| "rewards/rejected": -0.45797526836395264, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.56908962597036, |
| "grad_norm": 12.618504079208098, |
| "learning_rate": 1.2371185290538087e-07, |
| "logits/chosen": -3.608921527862549, |
| "logits/rejected": -3.516284942626953, |
| "logps/chosen": -215.42098999023438, |
| "logps/rejected": -192.1516571044922, |
| "loss": 0.6996, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.18459659814834595, |
| "rewards/margins": 0.27924641966819763, |
| "rewards/rejected": -0.4638429880142212, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.573606210303458, |
| "grad_norm": 12.749831427337236, |
| "learning_rate": 1.2154679321776385e-07, |
| "logits/chosen": -3.5367257595062256, |
| "logits/rejected": -3.458606719970703, |
| "logps/chosen": -225.93020629882812, |
| "logps/rejected": -214.3812713623047, |
| "loss": 0.721, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.2491096556186676, |
| "rewards/margins": 0.22173485159873962, |
| "rewards/rejected": -0.4708445072174072, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.5781227946365561, |
| "grad_norm": 12.444761165337477, |
| "learning_rate": 1.193878654379889e-07, |
| "logits/chosen": -3.5870304107666016, |
| "logits/rejected": -3.5706138610839844, |
| "logps/chosen": -206.72052001953125, |
| "logps/rejected": -196.97564697265625, |
| "loss": 0.7064, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.17630356550216675, |
| "rewards/margins": 0.2867695987224579, |
| "rewards/rejected": -0.463073194026947, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.5826393789696542, |
| "grad_norm": 14.412718695628323, |
| "learning_rate": 1.1723553483333806e-07, |
| "logits/chosen": -3.6507744789123535, |
| "logits/rejected": -3.536320209503174, |
| "logps/chosen": -192.77645874023438, |
| "logps/rejected": -172.97238159179688, |
| "loss": 0.7102, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.17945942282676697, |
| "rewards/margins": 0.24124844372272491, |
| "rewards/rejected": -0.4207078814506531, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.5871559633027523, |
| "grad_norm": 11.81162755328704, |
| "learning_rate": 1.1509026524934596e-07, |
| "logits/chosen": -3.531088352203369, |
| "logits/rejected": -3.4827826023101807, |
| "logps/chosen": -208.96688842773438, |
| "logps/rejected": -188.86569213867188, |
| "loss": 0.7152, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.21842709183692932, |
| "rewards/margins": 0.2507480978965759, |
| "rewards/rejected": -0.46917521953582764, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5916725476358504, |
| "grad_norm": 12.914405491596224, |
| "learning_rate": 1.129525190098374e-07, |
| "logits/chosen": -3.6707763671875, |
| "logits/rejected": -3.636361598968506, |
| "logps/chosen": -213.6358642578125, |
| "logps/rejected": -204.06399536132812, |
| "loss": 0.7035, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.18059837818145752, |
| "rewards/margins": 0.2868345379829407, |
| "rewards/rejected": -0.4674329161643982, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.5961891319689485, |
| "grad_norm": 11.557057022060595, |
| "learning_rate": 1.1082275681729236e-07, |
| "logits/chosen": -3.6127572059631348, |
| "logits/rejected": -3.531533718109131, |
| "logps/chosen": -183.26527404785156, |
| "logps/rejected": -165.15826416015625, |
| "loss": 0.7145, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.17764925956726074, |
| "rewards/margins": 0.2194777876138687, |
| "rewards/rejected": -0.39712706208229065, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.6007057163020466, |
| "grad_norm": 12.328325193031683, |
| "learning_rate": 1.0870143765356105e-07, |
| "logits/chosen": -3.634964942932129, |
| "logits/rejected": -3.566643714904785, |
| "logps/chosen": -209.58245849609375, |
| "logps/rejected": -185.32574462890625, |
| "loss": 0.7069, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.1629945933818817, |
| "rewards/margins": 0.2675268054008484, |
| "rewards/rejected": -0.4305214285850525, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.6052223006351447, |
| "grad_norm": 12.054973441313555, |
| "learning_rate": 1.0658901868094899e-07, |
| "logits/chosen": -3.571657657623291, |
| "logits/rejected": -3.5290191173553467, |
| "logps/chosen": -208.86460876464844, |
| "logps/rejected": -198.53517150878906, |
| "loss": 0.722, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.20138764381408691, |
| "rewards/margins": 0.22342219948768616, |
| "rewards/rejected": -0.42480987310409546, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.6097388849682428, |
| "grad_norm": 11.358121818992657, |
| "learning_rate": 1.0448595514369515e-07, |
| "logits/chosen": -3.5903096199035645, |
| "logits/rejected": -3.4630379676818848, |
| "logps/chosen": -195.21963500976562, |
| "logps/rejected": -178.4676055908203, |
| "loss": 0.738, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.21748274564743042, |
| "rewards/margins": 0.20364663004875183, |
| "rewards/rejected": -0.42112940549850464, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.6142554693013409, |
| "grad_norm": 12.076223242455574, |
| "learning_rate": 1.0239270026986241e-07, |
| "logits/chosen": -3.641045093536377, |
| "logits/rejected": -3.6035234928131104, |
| "logps/chosen": -210.5784454345703, |
| "logps/rejected": -193.73934936523438, |
| "loss": 0.6854, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.14184671640396118, |
| "rewards/margins": 0.324258953332901, |
| "rewards/rejected": -0.4661056697368622, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.618772053634439, |
| "grad_norm": 11.692752596565882, |
| "learning_rate": 1.0030970517366362e-07, |
| "logits/chosen": -3.563607931137085, |
| "logits/rejected": -3.428741216659546, |
| "logps/chosen": -219.72509765625, |
| "logps/rejected": -196.94625854492188, |
| "loss": 0.6911, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.1963091939687729, |
| "rewards/margins": 0.31136542558670044, |
| "rewards/rejected": -0.5076746344566345, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.6232886379675371, |
| "grad_norm": 12.345411080526995, |
| "learning_rate": 9.82374187582421e-08, |
| "logits/chosen": -3.528062343597412, |
| "logits/rejected": -3.505977153778076, |
| "logps/chosen": -218.63848876953125, |
| "logps/rejected": -206.9732666015625, |
| "loss": 0.7132, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.23708635568618774, |
| "rewards/margins": 0.25329455733299255, |
| "rewards/rejected": -0.4903808832168579, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.6278052223006352, |
| "grad_norm": 11.674703919156808, |
| "learning_rate": 9.617628761892963e-08, |
| "logits/chosen": -3.6489012241363525, |
| "logits/rejected": -3.6186487674713135, |
| "logps/chosen": -189.92733764648438, |
| "logps/rejected": -176.64971923828125, |
| "loss": 0.7119, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.18905529379844666, |
| "rewards/margins": 0.25132396817207336, |
| "rewards/rejected": -0.4403792917728424, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.6323218066337333, |
| "grad_norm": 13.29691200714144, |
| "learning_rate": 9.412675594700113e-08, |
| "logits/chosen": -3.5062429904937744, |
| "logits/rejected": -3.4647328853607178, |
| "logps/chosen": -201.7200927734375, |
| "logps/rejected": -181.33375549316406, |
| "loss": 0.681, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.08604306727647781, |
| "rewards/margins": 0.33681678771972656, |
| "rewards/rejected": -0.4228598475456238, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6368383909668314, |
| "grad_norm": 12.691599989680086, |
| "learning_rate": 9.208926543394776e-08, |
| "logits/chosen": -3.5145082473754883, |
| "logits/rejected": -3.4705848693847656, |
| "logps/chosen": -226.04798889160156, |
| "logps/rejected": -208.7861328125, |
| "loss": 0.6872, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.20757007598876953, |
| "rewards/margins": 0.32437780499458313, |
| "rewards/rejected": -0.5319478511810303, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.6413549752999295, |
| "grad_norm": 12.77311070940504, |
| "learning_rate": 9.006425517628863e-08, |
| "logits/chosen": -3.5832412242889404, |
| "logits/rejected": -3.532606601715088, |
| "logps/chosen": -218.02249145507812, |
| "logps/rejected": -205.59286499023438, |
| "loss": 0.6794, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.21441030502319336, |
| "rewards/margins": 0.32625484466552734, |
| "rewards/rejected": -0.5406651496887207, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.6458715596330276, |
| "grad_norm": 11.7323843348539, |
| "learning_rate": 8.805216158094177e-08, |
| "logits/chosen": -3.5744497776031494, |
| "logits/rejected": -3.5260581970214844, |
| "logps/chosen": -193.29339599609375, |
| "logps/rejected": -183.2548828125, |
| "loss": 0.7311, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.23033544421195984, |
| "rewards/margins": 0.2078953981399536, |
| "rewards/rejected": -0.43823081254959106, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.6503881439661257, |
| "grad_norm": 11.68741246706069, |
| "learning_rate": 8.605341827117462e-08, |
| "logits/chosen": -3.584440231323242, |
| "logits/rejected": -3.5315933227539062, |
| "logps/chosen": -206.68771362304688, |
| "logps/rejected": -191.7264862060547, |
| "loss": 0.6958, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.183091938495636, |
| "rewards/margins": 0.28822189569473267, |
| "rewards/rejected": -0.47131383419036865, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.6549047282992237, |
| "grad_norm": 12.84057054734779, |
| "learning_rate": 8.406845599315482e-08, |
| "logits/chosen": -3.5925729274749756, |
| "logits/rejected": -3.5170035362243652, |
| "logps/chosen": -220.13719177246094, |
| "logps/rejected": -200.61856079101562, |
| "loss": 0.6551, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.2075750231742859, |
| "rewards/margins": 0.38476982712745667, |
| "rewards/rejected": -0.5923448204994202, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.6594213126323218, |
| "grad_norm": 12.946963405257083, |
| "learning_rate": 8.20977025231203e-08, |
| "logits/chosen": -3.5441179275512695, |
| "logits/rejected": -3.5702133178710938, |
| "logps/chosen": -245.39036560058594, |
| "logps/rejected": -230.05406188964844, |
| "loss": 0.6539, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.2259531170129776, |
| "rewards/margins": 0.41027867794036865, |
| "rewards/rejected": -0.6362317800521851, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.6639378969654199, |
| "grad_norm": 11.995200470883699, |
| "learning_rate": 8.014158257519046e-08, |
| "logits/chosen": -3.5933175086975098, |
| "logits/rejected": -3.598524808883667, |
| "logps/chosen": -213.013427734375, |
| "logps/rejected": -197.81692504882812, |
| "loss": 0.674, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.15267431735992432, |
| "rewards/margins": 0.3510599732398987, |
| "rewards/rejected": -0.5037343502044678, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.668454481298518, |
| "grad_norm": 11.025028923299288, |
| "learning_rate": 7.820051770983612e-08, |
| "logits/chosen": -3.578158378601074, |
| "logits/rejected": -3.4002773761749268, |
| "logps/chosen": -207.0416259765625, |
| "logps/rejected": -182.08555603027344, |
| "loss": 0.7096, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.1736908257007599, |
| "rewards/margins": 0.26846104860305786, |
| "rewards/rejected": -0.44215184450149536, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.6729710656316161, |
| "grad_norm": 14.342920455609963, |
| "learning_rate": 7.627492624302986e-08, |
| "logits/chosen": -3.5777783393859863, |
| "logits/rejected": -3.579009532928467, |
| "logps/chosen": -221.22406005859375, |
| "logps/rejected": -209.30577087402344, |
| "loss": 0.6937, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.2393387109041214, |
| "rewards/margins": 0.28950440883636475, |
| "rewards/rejected": -0.5288431644439697, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.6774876499647142, |
| "grad_norm": 11.16447507239811, |
| "learning_rate": 7.436522315609545e-08, |
| "logits/chosen": -3.6183109283447266, |
| "logits/rejected": -3.6173715591430664, |
| "logps/chosen": -191.01394653320312, |
| "logps/rejected": -181.1483612060547, |
| "loss": 0.6995, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.21332278847694397, |
| "rewards/margins": 0.2814997434616089, |
| "rewards/rejected": -0.49482250213623047, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6820042342978123, |
| "grad_norm": 12.13300661848618, |
| "learning_rate": 7.247182000627588e-08, |
| "logits/chosen": -3.4436144828796387, |
| "logits/rejected": -3.4601621627807617, |
| "logps/chosen": -206.80313110351562, |
| "logps/rejected": -197.01959228515625, |
| "loss": 0.7062, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.219729483127594, |
| "rewards/margins": 0.2750515937805176, |
| "rewards/rejected": -0.4947810769081116, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.6865208186309104, |
| "grad_norm": 12.27938156296404, |
| "learning_rate": 7.059512483803904e-08, |
| "logits/chosen": -3.528646945953369, |
| "logits/rejected": -3.4958152770996094, |
| "logps/chosen": -227.61428833007812, |
| "logps/rejected": -216.52243041992188, |
| "loss": 0.6608, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.23144832253456116, |
| "rewards/margins": 0.3668804168701172, |
| "rewards/rejected": -0.598328709602356, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.6910374029640085, |
| "grad_norm": 10.934485175214164, |
| "learning_rate": 6.873554209514085e-08, |
| "logits/chosen": -3.5069892406463623, |
| "logits/rejected": -3.464691638946533, |
| "logps/chosen": -182.59202575683594, |
| "logps/rejected": -175.9390106201172, |
| "loss": 0.7086, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.1632474958896637, |
| "rewards/margins": 0.27051842212677, |
| "rewards/rejected": -0.43376588821411133, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.6955539872971066, |
| "grad_norm": 12.823137287258696, |
| "learning_rate": 6.689347253346412e-08, |
| "logits/chosen": -3.5779876708984375, |
| "logits/rejected": -3.5178349018096924, |
| "logps/chosen": -232.82090759277344, |
| "logps/rejected": -217.25289916992188, |
| "loss": 0.6864, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.28119853138923645, |
| "rewards/margins": 0.29815465211868286, |
| "rewards/rejected": -0.5793532133102417, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.7000705716302047, |
| "grad_norm": 13.681445102824224, |
| "learning_rate": 6.506931313465244e-08, |
| "logits/chosen": -3.563887119293213, |
| "logits/rejected": -3.5259387493133545, |
| "logps/chosen": -235.2152099609375, |
| "logps/rejected": -218.48611450195312, |
| "loss": 0.6864, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.2641199231147766, |
| "rewards/margins": 0.32186007499694824, |
| "rewards/rejected": -0.5859800577163696, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.7045871559633028, |
| "grad_norm": 11.872530426234707, |
| "learning_rate": 6.326345702055698e-08, |
| "logits/chosen": -3.666203737258911, |
| "logits/rejected": -3.594811201095581, |
| "logps/chosen": -196.93624877929688, |
| "logps/rejected": -176.68557739257812, |
| "loss": 0.7163, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.1758304387331009, |
| "rewards/margins": 0.2576484978199005, |
| "rewards/rejected": -0.4334789514541626, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.7091037402964009, |
| "grad_norm": 12.799664975071849, |
| "learning_rate": 6.147629336851552e-08, |
| "logits/chosen": -3.624176025390625, |
| "logits/rejected": -3.557004451751709, |
| "logps/chosen": -218.7604522705078, |
| "logps/rejected": -206.44232177734375, |
| "loss": 0.7112, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.2525786757469177, |
| "rewards/margins": 0.24881505966186523, |
| "rewards/rejected": -0.5013936758041382, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.713620324629499, |
| "grad_norm": 10.877958914597729, |
| "learning_rate": 5.970820732748143e-08, |
| "logits/chosen": -3.4155774116516113, |
| "logits/rejected": -3.501858711242676, |
| "logps/chosen": -199.32589721679688, |
| "logps/rejected": -196.3961181640625, |
| "loss": 0.6945, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.22878919541835785, |
| "rewards/margins": 0.28728824853897095, |
| "rewards/rejected": -0.5160773992538452, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.7181369089625971, |
| "grad_norm": 12.955814203296498, |
| "learning_rate": 5.795957993502092e-08, |
| "logits/chosen": -3.540942668914795, |
| "logits/rejected": -3.4938039779663086, |
| "logps/chosen": -210.17218017578125, |
| "logps/rejected": -208.64927673339844, |
| "loss": 0.7032, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.2663414180278778, |
| "rewards/margins": 0.27345699071884155, |
| "rewards/rejected": -0.539798378944397, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.7226534932956952, |
| "grad_norm": 11.004852801575502, |
| "learning_rate": 5.623078803519595e-08, |
| "logits/chosen": -3.6684892177581787, |
| "logits/rejected": -3.609145402908325, |
| "logps/chosen": -197.72994995117188, |
| "logps/rejected": -178.94186401367188, |
| "loss": 0.699, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.17543372511863708, |
| "rewards/margins": 0.2890481650829315, |
| "rewards/rejected": -0.4644818603992462, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7271700776287933, |
| "grad_norm": 11.267055827604478, |
| "learning_rate": 5.4522204197351294e-08, |
| "logits/chosen": -3.6161813735961914, |
| "logits/rejected": -3.5283775329589844, |
| "logps/chosen": -217.74359130859375, |
| "logps/rejected": -197.9161376953125, |
| "loss": 0.6485, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.13655489683151245, |
| "rewards/margins": 0.43066203594207764, |
| "rewards/rejected": -0.5672169923782349, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.7316866619618914, |
| "grad_norm": 11.455186989092018, |
| "learning_rate": 5.2834196635822626e-08, |
| "logits/chosen": -3.6274447441101074, |
| "logits/rejected": -3.5645861625671387, |
| "logps/chosen": -192.35226440429688, |
| "logps/rejected": -178.86270141601562, |
| "loss": 0.7223, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.23346485197544098, |
| "rewards/margins": 0.22727568447589874, |
| "rewards/rejected": -0.4607405364513397, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.7362032462949895, |
| "grad_norm": 12.833815191659758, |
| "learning_rate": 5.1167129130583346e-08, |
| "logits/chosen": -3.5601654052734375, |
| "logits/rejected": -3.5595359802246094, |
| "logps/chosen": -231.76150512695312, |
| "logps/rejected": -218.86770629882812, |
| "loss": 0.6694, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.27048397064208984, |
| "rewards/margins": 0.35143792629241943, |
| "rewards/rejected": -0.6219218969345093, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.7407198306280875, |
| "grad_norm": 11.087097762035608, |
| "learning_rate": 4.952136094884666e-08, |
| "logits/chosen": -3.6404900550842285, |
| "logits/rejected": -3.5640687942504883, |
| "logps/chosen": -188.69154357910156, |
| "logps/rejected": -169.15692138671875, |
| "loss": 0.6994, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.18032759428024292, |
| "rewards/margins": 0.27624213695526123, |
| "rewards/rejected": -0.45656976103782654, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.7452364149611856, |
| "grad_norm": 11.909284446478502, |
| "learning_rate": 4.789724676764062e-08, |
| "logits/chosen": -3.5401947498321533, |
| "logits/rejected": -3.585221767425537, |
| "logps/chosen": -204.66531372070312, |
| "logps/rejected": -201.26177978515625, |
| "loss": 0.7003, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.2440950870513916, |
| "rewards/margins": 0.27386218309402466, |
| "rewards/rejected": -0.5179572701454163, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.7497529992942837, |
| "grad_norm": 11.847238391062595, |
| "learning_rate": 4.629513659737209e-08, |
| "logits/chosen": -3.520542860031128, |
| "logits/rejected": -3.4925966262817383, |
| "logps/chosen": -226.33909606933594, |
| "logps/rejected": -211.7904815673828, |
| "loss": 0.68, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.25608351826667786, |
| "rewards/margins": 0.3440699577331543, |
| "rewards/rejected": -0.6001534461975098, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.7542695836273818, |
| "grad_norm": 11.52418594856635, |
| "learning_rate": 4.471537570639676e-08, |
| "logits/chosen": -3.586939811706543, |
| "logits/rejected": -3.5489325523376465, |
| "logps/chosen": -209.86761474609375, |
| "logps/rejected": -194.3120880126953, |
| "loss": 0.6738, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.18899373710155487, |
| "rewards/margins": 0.36132901906967163, |
| "rewards/rejected": -0.5503227710723877, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.7587861679604799, |
| "grad_norm": 11.746177386790478, |
| "learning_rate": 4.315830454661059e-08, |
| "logits/chosen": -3.519134521484375, |
| "logits/rejected": -3.450460910797119, |
| "logps/chosen": -224.33236694335938, |
| "logps/rejected": -205.32876586914062, |
| "loss": 0.6724, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.14168740808963776, |
| "rewards/margins": 0.36835241317749023, |
| "rewards/rejected": -0.5100398063659668, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.763302752293578, |
| "grad_norm": 13.727528925392352, |
| "learning_rate": 4.1624258680079695e-08, |
| "logits/chosen": -3.5889594554901123, |
| "logits/rejected": -3.6074798107147217, |
| "logps/chosen": -190.32859802246094, |
| "logps/rejected": -181.37701416015625, |
| "loss": 0.7041, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.21288928389549255, |
| "rewards/margins": 0.2832014262676239, |
| "rewards/rejected": -0.49609071016311646, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.7678193366266761, |
| "grad_norm": 12.36532095556768, |
| "learning_rate": 4.0113568706723745e-08, |
| "logits/chosen": -3.5663981437683105, |
| "logits/rejected": -3.5320568084716797, |
| "logps/chosen": -205.47369384765625, |
| "logps/rejected": -189.5297393798828, |
| "loss": 0.6821, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.21529924869537354, |
| "rewards/margins": 0.3193380832672119, |
| "rewards/rejected": -0.5346373319625854, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7723359209597742, |
| "grad_norm": 11.211886652040736, |
| "learning_rate": 3.8626560193069194e-08, |
| "logits/chosen": -3.6578316688537598, |
| "logits/rejected": -3.5587844848632812, |
| "logps/chosen": -184.16693115234375, |
| "logps/rejected": -164.69442749023438, |
| "loss": 0.7124, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.1814623475074768, |
| "rewards/margins": 0.24063682556152344, |
| "rewards/rejected": -0.42209917306900024, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.7768525052928723, |
| "grad_norm": 11.991981526284254, |
| "learning_rate": 3.71635536020865e-08, |
| "logits/chosen": -3.687999725341797, |
| "logits/rejected": -3.5739190578460693, |
| "logps/chosen": -187.41111755371094, |
| "logps/rejected": -170.08901977539062, |
| "loss": 0.6982, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.1899150013923645, |
| "rewards/margins": 0.27486538887023926, |
| "rewards/rejected": -0.46478039026260376, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.7813690896259704, |
| "grad_norm": 11.682125153864412, |
| "learning_rate": 3.572486422412786e-08, |
| "logits/chosen": -3.541208028793335, |
| "logits/rejected": -3.4319119453430176, |
| "logps/chosen": -232.7732696533203, |
| "logps/rejected": -207.6559295654297, |
| "loss": 0.6788, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.19109418988227844, |
| "rewards/margins": 0.37378770112991333, |
| "rewards/rejected": -0.5648818612098694, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.7858856739590685, |
| "grad_norm": 11.96356466385332, |
| "learning_rate": 3.4310802108979456e-08, |
| "logits/chosen": -3.6099984645843506, |
| "logits/rejected": -3.588571071624756, |
| "logps/chosen": -221.05567932128906, |
| "logps/rejected": -211.12063598632812, |
| "loss": 0.6749, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.2735821604728699, |
| "rewards/margins": 0.365002304315567, |
| "rewards/rejected": -0.6385844945907593, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.7904022582921666, |
| "grad_norm": 12.839800224543241, |
| "learning_rate": 3.292167199904311e-08, |
| "logits/chosen": -3.5515999794006348, |
| "logits/rejected": -3.5092904567718506, |
| "logps/chosen": -238.809814453125, |
| "logps/rejected": -215.16036987304688, |
| "loss": 0.6457, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.21585515141487122, |
| "rewards/margins": 0.43380266427993774, |
| "rewards/rejected": -0.6496578454971313, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.7949188426252647, |
| "grad_norm": 10.718656806306809, |
| "learning_rate": 3.1557773263661604e-08, |
| "logits/chosen": -3.477273941040039, |
| "logits/rejected": -3.4155681133270264, |
| "logps/chosen": -229.67233276367188, |
| "logps/rejected": -217.80172729492188, |
| "loss": 0.681, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.3297360837459564, |
| "rewards/margins": 0.32702451944351196, |
| "rewards/rejected": -0.6567606329917908, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.7994354269583628, |
| "grad_norm": 12.333864621322466, |
| "learning_rate": 3.02193998346021e-08, |
| "logits/chosen": -3.5071773529052734, |
| "logits/rejected": -3.4976847171783447, |
| "logps/chosen": -221.59481811523438, |
| "logps/rejected": -209.19186401367188, |
| "loss": 0.6713, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.26982712745666504, |
| "rewards/margins": 0.3569854199886322, |
| "rewards/rejected": -0.6268125176429749, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.8039520112914609, |
| "grad_norm": 12.039934107046923, |
| "learning_rate": 2.8906840142711338e-08, |
| "logits/chosen": -3.606412410736084, |
| "logits/rejected": -3.5920205116271973, |
| "logps/chosen": -235.42181396484375, |
| "logps/rejected": -216.65164184570312, |
| "loss": 0.6652, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.23246988654136658, |
| "rewards/margins": 0.3717581629753113, |
| "rewards/rejected": -0.6042280197143555, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.808468595624559, |
| "grad_norm": 11.030662943317653, |
| "learning_rate": 2.7620377055756423e-08, |
| "logits/chosen": -3.624559164047241, |
| "logits/rejected": -3.638848066329956, |
| "logps/chosen": -191.0684814453125, |
| "logps/rejected": -183.99212646484375, |
| "loss": 0.6806, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.1932627409696579, |
| "rewards/margins": 0.34079742431640625, |
| "rewards/rejected": -0.5340601205825806, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.8129851799576571, |
| "grad_norm": 11.319505715592419, |
| "learning_rate": 2.6360287817464256e-08, |
| "logits/chosen": -3.5405383110046387, |
| "logits/rejected": -3.5048179626464844, |
| "logps/chosen": -207.15835571289062, |
| "logps/rejected": -188.7417449951172, |
| "loss": 0.6835, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.19087350368499756, |
| "rewards/margins": 0.3352086842060089, |
| "rewards/rejected": -0.5260821580886841, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.8175017642907552, |
| "grad_norm": 13.430513620252457, |
| "learning_rate": 2.512684398777329e-08, |
| "logits/chosen": -3.5324528217315674, |
| "logits/rejected": -3.4295010566711426, |
| "logps/chosen": -213.90878295898438, |
| "logps/rejected": -195.8572998046875, |
| "loss": 0.6963, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.23799797892570496, |
| "rewards/margins": 0.2941531538963318, |
| "rewards/rejected": -0.5321511626243591, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.8220183486238533, |
| "grad_norm": 11.575530525209127, |
| "learning_rate": 2.3920311384309914e-08, |
| "logits/chosen": -3.601444721221924, |
| "logits/rejected": -3.5109052658081055, |
| "logps/chosen": -199.96237182617188, |
| "logps/rejected": -183.93069458007812, |
| "loss": 0.7104, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.22000840306282043, |
| "rewards/margins": 0.2704961895942688, |
| "rewards/rejected": -0.4905046224594116, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.8265349329569514, |
| "grad_norm": 11.049495139232015, |
| "learning_rate": 2.2740950025102763e-08, |
| "logits/chosen": -3.5267105102539062, |
| "logits/rejected": -3.5060598850250244, |
| "logps/chosen": -204.97425842285156, |
| "logps/rejected": -196.4848175048828, |
| "loss": 0.7116, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.25764691829681396, |
| "rewards/margins": 0.26898401975631714, |
| "rewards/rejected": -0.5266309976577759, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.8310515172900494, |
| "grad_norm": 12.243629851598072, |
| "learning_rate": 2.158901407254629e-08, |
| "logits/chosen": -3.6085617542266846, |
| "logits/rejected": -3.5918960571289062, |
| "logps/chosen": -207.69467163085938, |
| "logps/rejected": -201.7321014404297, |
| "loss": 0.6776, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.26026493310928345, |
| "rewards/margins": 0.3179108500480652, |
| "rewards/rejected": -0.5781757235527039, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.8355681016231475, |
| "grad_norm": 10.504114021037811, |
| "learning_rate": 2.0464751778626836e-08, |
| "logits/chosen": -3.489086151123047, |
| "logits/rejected": -3.5122947692871094, |
| "logps/chosen": -227.21865844726562, |
| "logps/rejected": -225.1961669921875, |
| "loss": 0.6854, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.29528743028640747, |
| "rewards/margins": 0.34715089201927185, |
| "rewards/rejected": -0.6424383521080017, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.8400846859562456, |
| "grad_norm": 12.604767051843007, |
| "learning_rate": 1.9368405431422102e-08, |
| "logits/chosen": -3.492341995239258, |
| "logits/rejected": -3.5087356567382812, |
| "logps/chosen": -239.1469268798828, |
| "logps/rejected": -227.90689086914062, |
| "loss": 0.6872, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.3633117079734802, |
| "rewards/margins": 0.31293728947639465, |
| "rewards/rejected": -0.6762489676475525, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.8446012702893437, |
| "grad_norm": 13.007777850694795, |
| "learning_rate": 1.8300211302886137e-08, |
| "logits/chosen": -3.5985684394836426, |
| "logits/rejected": -3.560959815979004, |
| "logps/chosen": -233.37771606445312, |
| "logps/rejected": -218.78062438964844, |
| "loss": 0.6679, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.31275439262390137, |
| "rewards/margins": 0.35552066564559937, |
| "rewards/rejected": -0.6682751178741455, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.8491178546224418, |
| "grad_norm": 12.224849381089012, |
| "learning_rate": 1.726039959793059e-08, |
| "logits/chosen": -3.6801674365997314, |
| "logits/rejected": -3.643399953842163, |
| "logps/chosen": -201.95065307617188, |
| "logps/rejected": -191.9815673828125, |
| "loss": 0.7021, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.23352208733558655, |
| "rewards/margins": 0.2819408178329468, |
| "rewards/rejected": -0.5154628753662109, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.8536344389555399, |
| "grad_norm": 11.06103238426834, |
| "learning_rate": 1.6249194404813633e-08, |
| "logits/chosen": -3.583648204803467, |
| "logits/rejected": -3.536038398742676, |
| "logps/chosen": -205.93267822265625, |
| "logps/rejected": -188.04385375976562, |
| "loss": 0.6819, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.20944997668266296, |
| "rewards/margins": 0.33364248275756836, |
| "rewards/rejected": -0.5430924892425537, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.858151023288638, |
| "grad_norm": 11.515369124606178, |
| "learning_rate": 1.526681364684707e-08, |
| "logits/chosen": -3.544914722442627, |
| "logits/rejected": -3.477529525756836, |
| "logps/chosen": -246.21878051757812, |
| "logps/rejected": -227.75767517089844, |
| "loss": 0.6284, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.25970178842544556, |
| "rewards/margins": 0.4616071283817291, |
| "rewards/rejected": -0.7213089466094971, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8626676076217361, |
| "grad_norm": 11.17899475785984, |
| "learning_rate": 1.4313469035432053e-08, |
| "logits/chosen": -3.539396286010742, |
| "logits/rejected": -3.491584062576294, |
| "logps/chosen": -217.02066040039062, |
| "logps/rejected": -194.50637817382812, |
| "loss": 0.6783, |
| "rewards/accuracies": 0.890625, |
| "rewards/chosen": -0.23896610736846924, |
| "rewards/margins": 0.36122721433639526, |
| "rewards/rejected": -0.6001933813095093, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.8671841919548342, |
| "grad_norm": 11.452045965709495, |
| "learning_rate": 1.3389366024433346e-08, |
| "logits/chosen": -3.508542776107788, |
| "logits/rejected": -3.4577219486236572, |
| "logps/chosen": -209.47579956054688, |
| "logps/rejected": -191.61953735351562, |
| "loss": 0.6856, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.23392406105995178, |
| "rewards/margins": 0.3334580063819885, |
| "rewards/rejected": -0.5673820972442627, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.8717007762879323, |
| "grad_norm": 11.051060681758146, |
| "learning_rate": 1.2494703765902337e-08, |
| "logits/chosen": -3.545461654663086, |
| "logits/rejected": -3.5021471977233887, |
| "logps/chosen": -221.29922485351562, |
| "logps/rejected": -205.3109588623047, |
| "loss": 0.6884, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.26481908559799194, |
| "rewards/margins": 0.3176526129245758, |
| "rewards/rejected": -0.5824716687202454, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.8762173606210304, |
| "grad_norm": 11.263698377490607, |
| "learning_rate": 1.1629675067158119e-08, |
| "logits/chosen": -3.59144926071167, |
| "logits/rejected": -3.596311569213867, |
| "logps/chosen": -223.28732299804688, |
| "logps/rejected": -207.96267700195312, |
| "loss": 0.6421, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.16717705130577087, |
| "rewards/margins": 0.46462714672088623, |
| "rewards/rejected": -0.6318042278289795, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.8807339449541285, |
| "grad_norm": 11.60107390050444, |
| "learning_rate": 1.0794466349235865e-08, |
| "logits/chosen": -3.6060633659362793, |
| "logits/rejected": -3.517813205718994, |
| "logps/chosen": -202.70123291015625, |
| "logps/rejected": -183.82339477539062, |
| "loss": 0.6759, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.16405262053012848, |
| "rewards/margins": 0.36374321579933167, |
| "rewards/rejected": -0.5277957916259766, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.8852505292872266, |
| "grad_norm": 11.879162180318056, |
| "learning_rate": 9.989257606711438e-09, |
| "logits/chosen": -3.59369158744812, |
| "logits/rejected": -3.5690078735351562, |
| "logps/chosen": -226.8720703125, |
| "logps/rejected": -210.02069091796875, |
| "loss": 0.6661, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.2107160985469818, |
| "rewards/margins": 0.3873599171638489, |
| "rewards/rejected": -0.5980759859085083, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.8897671136203247, |
| "grad_norm": 11.434668917143895, |
| "learning_rate": 9.214222368911112e-09, |
| "logits/chosen": -3.5294957160949707, |
| "logits/rejected": -3.504406452178955, |
| "logps/chosen": -210.91481018066406, |
| "logps/rejected": -190.58346557617188, |
| "loss": 0.6694, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.17558521032333374, |
| "rewards/margins": 0.37856271862983704, |
| "rewards/rejected": -0.5541479587554932, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.8942836979534228, |
| "grad_norm": 11.442820506378803, |
| "learning_rate": 8.469527662514425e-09, |
| "logits/chosen": -3.6074564456939697, |
| "logits/rejected": -3.5393269062042236, |
| "logps/chosen": -206.037109375, |
| "logps/rejected": -184.319580078125, |
| "loss": 0.6837, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.2739471197128296, |
| "rewards/margins": 0.31510984897613525, |
| "rewards/rejected": -0.5890569686889648, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.8988002822865209, |
| "grad_norm": 10.48141977546952, |
| "learning_rate": 7.755333975558703e-09, |
| "logits/chosen": -3.534519910812378, |
| "logits/rejected": -3.546844005584717, |
| "logps/chosen": -209.56192016601562, |
| "logps/rejected": -192.3565673828125, |
| "loss": 0.656, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.1646820604801178, |
| "rewards/margins": 0.422355055809021, |
| "rewards/rejected": -0.5870370864868164, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.903316866619619, |
| "grad_norm": 11.512112387427932, |
| "learning_rate": 7.071795222852295e-09, |
| "logits/chosen": -3.5276589393615723, |
| "logits/rejected": -3.4519450664520264, |
| "logps/chosen": -203.3861083984375, |
| "logps/rejected": -186.114013671875, |
| "loss": 0.7095, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.2570461928844452, |
| "rewards/margins": 0.2705690562725067, |
| "rewards/rejected": -0.5276152491569519, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.9078334509527171, |
| "grad_norm": 11.664115289787672, |
| "learning_rate": 6.41905871280477e-09, |
| "logits/chosen": -3.5618152618408203, |
| "logits/rejected": -3.5523681640625, |
| "logps/chosen": -208.64450073242188, |
| "logps/rejected": -194.45249938964844, |
| "loss": 0.7034, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.2307577133178711, |
| "rewards/margins": 0.2903120219707489, |
| "rewards/rejected": -0.5210697650909424, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.9123500352858152, |
| "grad_norm": 12.221452533932299, |
| "learning_rate": 5.797265115680649e-09, |
| "logits/chosen": -3.560934543609619, |
| "logits/rejected": -3.5254406929016113, |
| "logps/chosen": -212.36541748046875, |
| "logps/rejected": -200.04124450683594, |
| "loss": 0.7023, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.3073262572288513, |
| "rewards/margins": 0.28462159633636475, |
| "rewards/rejected": -0.5919477939605713, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.9168666196189132, |
| "grad_norm": 11.449688321668301, |
| "learning_rate": 5.206548433283803e-09, |
| "logits/chosen": -3.5118632316589355, |
| "logits/rejected": -3.5008668899536133, |
| "logps/chosen": -216.9571990966797, |
| "logps/rejected": -194.16934204101562, |
| "loss": 0.6675, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.248696431517601, |
| "rewards/margins": 0.396094411611557, |
| "rewards/rejected": -0.6447908282279968, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.9213832039520113, |
| "grad_norm": 12.04856516660961, |
| "learning_rate": 4.6470359700788995e-09, |
| "logits/chosen": -3.621587038040161, |
| "logits/rejected": -3.5827927589416504, |
| "logps/chosen": -221.77987670898438, |
| "logps/rejected": -201.95237731933594, |
| "loss": 0.6732, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.26525014638900757, |
| "rewards/margins": 0.36969608068466187, |
| "rewards/rejected": -0.6349462270736694, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.9258997882851094, |
| "grad_norm": 11.722034459630727, |
| "learning_rate": 4.118848305756173e-09, |
| "logits/chosen": -3.595360279083252, |
| "logits/rejected": -3.5677242279052734, |
| "logps/chosen": -223.11941528320312, |
| "logps/rejected": -205.64205932617188, |
| "loss": 0.6758, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.2749682664871216, |
| "rewards/margins": 0.3367688059806824, |
| "rewards/rejected": -0.611737072467804, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.9304163726182075, |
| "grad_norm": 11.503388221795708, |
| "learning_rate": 3.622099269245571e-09, |
| "logits/chosen": -3.60404109954834, |
| "logits/rejected": -3.5229146480560303, |
| "logps/chosen": -223.0252685546875, |
| "logps/rejected": -200.0838623046875, |
| "loss": 0.6655, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.18073460459709167, |
| "rewards/margins": 0.37948232889175415, |
| "rewards/rejected": -0.5602169036865234, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.9349329569513056, |
| "grad_norm": 10.985620131645993, |
| "learning_rate": 3.156895914185581e-09, |
| "logits/chosen": -3.526765823364258, |
| "logits/rejected": -3.5020856857299805, |
| "logps/chosen": -209.4959259033203, |
| "logps/rejected": -197.26101684570312, |
| "loss": 0.6902, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.24708959460258484, |
| "rewards/margins": 0.3107626438140869, |
| "rewards/rejected": -0.5578522086143494, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.9394495412844037, |
| "grad_norm": 12.99867871857846, |
| "learning_rate": 2.7233384958522676e-09, |
| "logits/chosen": -3.6278443336486816, |
| "logits/rejected": -3.6109533309936523, |
| "logps/chosen": -207.57948303222656, |
| "logps/rejected": -194.7212677001953, |
| "loss": 0.6843, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.23499882221221924, |
| "rewards/margins": 0.3160548508167267, |
| "rewards/rejected": -0.5510536432266235, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.9439661256175018, |
| "grad_norm": 11.2046360254521, |
| "learning_rate": 2.321520449553421e-09, |
| "logits/chosen": -3.5132226943969727, |
| "logits/rejected": -3.491821527481079, |
| "logps/chosen": -206.74038696289062, |
| "logps/rejected": -185.68463134765625, |
| "loss": 0.6676, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.1989631950855255, |
| "rewards/margins": 0.39246267080307007, |
| "rewards/rejected": -0.591425895690918, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.9484827099505999, |
| "grad_norm": 13.729648643233745, |
| "learning_rate": 1.9515283704924667e-09, |
| "logits/chosen": -3.6648130416870117, |
| "logits/rejected": -3.5529017448425293, |
| "logps/chosen": -232.48114013671875, |
| "logps/rejected": -210.60609436035156, |
| "loss": 0.6581, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.2861940860748291, |
| "rewards/margins": 0.377960741519928, |
| "rewards/rejected": -0.6641547679901123, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.952999294283698, |
| "grad_norm": 12.04630458243632, |
| "learning_rate": 1.6134419951064404e-09, |
| "logits/chosen": -3.586575984954834, |
| "logits/rejected": -3.577587127685547, |
| "logps/chosen": -201.98065185546875, |
| "logps/rejected": -184.5204620361328, |
| "loss": 0.6845, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.21713638305664062, |
| "rewards/margins": 0.32104575634002686, |
| "rewards/rejected": -0.5381821393966675, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.9575158786167961, |
| "grad_norm": 11.591398022496064, |
| "learning_rate": 1.3073341838821028e-09, |
| "logits/chosen": -3.606295585632324, |
| "logits/rejected": -3.6138882637023926, |
| "logps/chosen": -222.92762756347656, |
| "logps/rejected": -213.224365234375, |
| "loss": 0.6756, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.2295455038547516, |
| "rewards/margins": 0.37904244661331177, |
| "rewards/rejected": -0.608587920665741, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.9620324629498942, |
| "grad_norm": 14.166283230715495, |
| "learning_rate": 1.033270905653949e-09, |
| "logits/chosen": -3.615994453430176, |
| "logits/rejected": -3.5957651138305664, |
| "logps/chosen": -197.43399047851562, |
| "logps/rejected": -187.89100646972656, |
| "loss": 0.7109, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.2787552773952484, |
| "rewards/margins": 0.2632961571216583, |
| "rewards/rejected": -0.5420514345169067, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.9665490472829923, |
| "grad_norm": 12.160467402707475, |
| "learning_rate": 7.913112233872476e-10, |
| "logits/chosen": -3.5975520610809326, |
| "logits/rejected": -3.5754852294921875, |
| "logps/chosen": -212.54818725585938, |
| "logps/rejected": -193.0272216796875, |
| "loss": 0.666, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.23875494301319122, |
| "rewards/margins": 0.3742338716983795, |
| "rewards/rejected": -0.6129888296127319, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.9710656316160904, |
| "grad_norm": 11.288257675305129, |
| "learning_rate": 5.815072814496225e-10, |
| "logits/chosen": -3.5631093978881836, |
| "logits/rejected": -3.5513646602630615, |
| "logps/chosen": -179.36865234375, |
| "logps/rejected": -169.8910675048828, |
| "loss": 0.6992, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.21426759660243988, |
| "rewards/margins": 0.2879588007926941, |
| "rewards/rejected": -0.5022264122962952, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.9755822159491885, |
| "grad_norm": 11.189287909041054, |
| "learning_rate": 4.0390429437332505e-10, |
| "logits/chosen": -3.49703311920166, |
| "logits/rejected": -3.45841908454895, |
| "logps/chosen": -210.85260009765625, |
| "logps/rejected": -196.04306030273438, |
| "loss": 0.6895, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.2567548155784607, |
| "rewards/margins": 0.3082759380340576, |
| "rewards/rejected": -0.5650308132171631, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.9800988002822866, |
| "grad_norm": 11.496270664039283, |
| "learning_rate": 2.585405371112459e-10, |
| "logits/chosen": -3.5845980644226074, |
| "logits/rejected": -3.5456676483154297, |
| "logps/chosen": -199.40249633789062, |
| "logps/rejected": -191.75473022460938, |
| "loss": 0.733, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.319938600063324, |
| "rewards/margins": 0.20854635536670685, |
| "rewards/rejected": -0.5284849405288696, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.9846153846153847, |
| "grad_norm": 12.656855394825056, |
| "learning_rate": 1.454473367883291e-10, |
| "logits/chosen": -3.478101968765259, |
| "logits/rejected": -3.501936435699463, |
| "logps/chosen": -222.27523803710938, |
| "logps/rejected": -206.86306762695312, |
| "loss": 0.6763, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.26684999465942383, |
| "rewards/margins": 0.34992286562919617, |
| "rewards/rejected": -0.6167728900909424, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.9891319689484828, |
| "grad_norm": 13.110720385558672, |
| "learning_rate": 6.464906595023967e-11, |
| "logits/chosen": -3.5070900917053223, |
| "logits/rejected": -3.489541530609131, |
| "logps/chosen": -221.77259826660156, |
| "logps/rejected": -207.49349975585938, |
| "loss": 0.6686, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.22918352484703064, |
| "rewards/margins": 0.3775954246520996, |
| "rewards/rejected": -0.6067789793014526, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.9936485532815809, |
| "grad_norm": 10.689364544141856, |
| "learning_rate": 1.616313731091501e-11, |
| "logits/chosen": -3.472511053085327, |
| "logits/rejected": -3.4571049213409424, |
| "logps/chosen": -218.83880615234375, |
| "logps/rejected": -205.46530151367188, |
| "loss": 0.6523, |
| "rewards/accuracies": 0.90625, |
| "rewards/chosen": -0.24689728021621704, |
| "rewards/margins": 0.4170025587081909, |
| "rewards/rejected": -0.663899838924408, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.998165137614679, |
| "grad_norm": 11.564165675567537, |
| "learning_rate": 0.0, |
| "logits/chosen": -3.556617259979248, |
| "logits/rejected": -3.5278592109680176, |
| "logps/chosen": -206.43194580078125, |
| "logps/rejected": -186.3187713623047, |
| "loss": 0.6886, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.1849951595067978, |
| "rewards/margins": 0.3221530318260193, |
| "rewards/rejected": -0.5071482062339783, |
| "step": 221 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 221, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 316050221826048.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|