| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.21340162185232608, | |
| "eval_steps": 500, | |
| "global_step": 6500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0008207754686627926, | |
| "grad_norm": 158.0, | |
| "learning_rate": 1.312910284463895e-07, | |
| "logits/chosen": -4.1174211502075195, | |
| "logits/rejected": -4.145937442779541, | |
| "logps/chosen": -750.2059326171875, | |
| "logps/rejected": -523.7258911132812, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.007276252377778292, | |
| "rewards/margins": 0.0009733623010106385, | |
| "rewards/rejected": -0.0082496153190732, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0016415509373255853, | |
| "grad_norm": 150.0, | |
| "learning_rate": 2.680525164113786e-07, | |
| "logits/chosen": -4.106570720672607, | |
| "logits/rejected": -4.1443281173706055, | |
| "logps/chosen": -741.0921630859375, | |
| "logps/rejected": -502.666259765625, | |
| "loss": 0.6987, | |
| "rewards/accuracies": 0.46000000834465027, | |
| "rewards/chosen": -0.002752303844317794, | |
| "rewards/margins": -0.008955330587923527, | |
| "rewards/rejected": 0.006203026045113802, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.002462326405988378, | |
| "grad_norm": 237.0, | |
| "learning_rate": 4.0481400437636766e-07, | |
| "logits/chosen": -4.268686771392822, | |
| "logits/rejected": -4.344180107116699, | |
| "logps/chosen": -718.856201171875, | |
| "logps/rejected": -500.9825744628906, | |
| "loss": 0.702, | |
| "rewards/accuracies": 0.47999998927116394, | |
| "rewards/chosen": -0.01361551322042942, | |
| "rewards/margins": -0.016321375966072083, | |
| "rewards/rejected": 0.0027058636769652367, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0032831018746511706, | |
| "grad_norm": 216.0, | |
| "learning_rate": 5.415754923413568e-07, | |
| "logits/chosen": -4.252306938171387, | |
| "logits/rejected": -4.244429111480713, | |
| "logps/chosen": -663.5410766601562, | |
| "logps/rejected": -519.08984375, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": -0.0002230451937066391, | |
| "rewards/margins": 0.000489498081151396, | |
| "rewards/rejected": -0.0007125435513444245, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.004103877343313963, | |
| "grad_norm": 96.5, | |
| "learning_rate": 6.783369803063458e-07, | |
| "logits/chosen": -4.129316806793213, | |
| "logits/rejected": -4.208637714385986, | |
| "logps/chosen": -719.7318725585938, | |
| "logps/rejected": -521.5628662109375, | |
| "loss": 0.6934, | |
| "rewards/accuracies": 0.5199999809265137, | |
| "rewards/chosen": 0.002255064435303211, | |
| "rewards/margins": 0.0010580136440694332, | |
| "rewards/rejected": 0.0011970511404797435, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.004924652811976756, | |
| "grad_norm": 161.0, | |
| "learning_rate": 8.150984682713349e-07, | |
| "logits/chosen": -4.199742794036865, | |
| "logits/rejected": -4.159580707550049, | |
| "logps/chosen": -695.33984375, | |
| "logps/rejected": -532.4611206054688, | |
| "loss": 0.7009, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.006176384165883064, | |
| "rewards/margins": -0.014046883210539818, | |
| "rewards/rejected": 0.007870499044656754, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.005745428280639548, | |
| "grad_norm": 201.0, | |
| "learning_rate": 9.518599562363239e-07, | |
| "logits/chosen": -4.214637279510498, | |
| "logits/rejected": -4.182404041290283, | |
| "logps/chosen": -777.0204467773438, | |
| "logps/rejected": -569.65771484375, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.47999998927116394, | |
| "rewards/chosen": -0.0014341524802148342, | |
| "rewards/margins": 0.004235363565385342, | |
| "rewards/rejected": -0.0056695155799388885, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.006566203749302341, | |
| "grad_norm": 270.0, | |
| "learning_rate": 1.088621444201313e-06, | |
| "logits/chosen": -4.112586498260498, | |
| "logits/rejected": -4.159411907196045, | |
| "logps/chosen": -700.7896728515625, | |
| "logps/rejected": -617.4669799804688, | |
| "loss": 0.703, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.013239642605185509, | |
| "rewards/margins": -0.01829313486814499, | |
| "rewards/rejected": 0.005053492728620768, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.007386979217965133, | |
| "grad_norm": 320.0, | |
| "learning_rate": 1.225382932166302e-06, | |
| "logits/chosen": -4.012618541717529, | |
| "logits/rejected": -4.056581020355225, | |
| "logps/chosen": -560.5947875976562, | |
| "logps/rejected": -470.4196472167969, | |
| "loss": 0.6863, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.00924967136234045, | |
| "rewards/margins": 0.01506039034575224, | |
| "rewards/rejected": -0.005810718517750502, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.008207754686627926, | |
| "grad_norm": 80.5, | |
| "learning_rate": 1.3621444201312912e-06, | |
| "logits/chosen": -3.9422268867492676, | |
| "logits/rejected": -4.169854640960693, | |
| "logps/chosen": -868.24365234375, | |
| "logps/rejected": -700.423583984375, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.0005125462776049972, | |
| "rewards/margins": 0.011028454639017582, | |
| "rewards/rejected": -0.010515906848013401, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.00902853015529072, | |
| "grad_norm": 226.0, | |
| "learning_rate": 1.4989059080962803e-06, | |
| "logits/chosen": -4.265016078948975, | |
| "logits/rejected": -4.272618770599365, | |
| "logps/chosen": -698.0468139648438, | |
| "logps/rejected": -537.390380859375, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.4399999976158142, | |
| "rewards/chosen": 0.005534702911973, | |
| "rewards/margins": 0.0049882736057043076, | |
| "rewards/rejected": 0.0005464285495691001, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.009849305623953511, | |
| "grad_norm": 258.0, | |
| "learning_rate": 1.6356673960612692e-06, | |
| "logits/chosen": -4.159086227416992, | |
| "logits/rejected": -4.134156703948975, | |
| "logps/chosen": -703.7493896484375, | |
| "logps/rejected": -520.1746826171875, | |
| "loss": 0.7016, | |
| "rewards/accuracies": 0.3799999952316284, | |
| "rewards/chosen": -0.00535870436578989, | |
| "rewards/margins": -0.015425672754645348, | |
| "rewards/rejected": 0.010066968388855457, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.010670081092616303, | |
| "grad_norm": 147.0, | |
| "learning_rate": 1.7724288840262582e-06, | |
| "logits/chosen": -4.203159809112549, | |
| "logits/rejected": -4.189880847930908, | |
| "logps/chosen": -780.8976440429688, | |
| "logps/rejected": -511.4765625, | |
| "loss": 0.6884, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.016911819577217102, | |
| "rewards/margins": 0.010711951181292534, | |
| "rewards/rejected": 0.006199866533279419, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.011490856561279097, | |
| "grad_norm": 223.0, | |
| "learning_rate": 1.9091903719912473e-06, | |
| "logits/chosen": -4.209036350250244, | |
| "logits/rejected": -4.1973419189453125, | |
| "logps/chosen": -747.6294555664062, | |
| "logps/rejected": -575.0943603515625, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.006754291243851185, | |
| "rewards/margins": -0.0026557582896202803, | |
| "rewards/rejected": 0.009410049766302109, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.012311632029941889, | |
| "grad_norm": 207.0, | |
| "learning_rate": 2.0459518599562366e-06, | |
| "logits/chosen": -4.2033867835998535, | |
| "logits/rejected": -4.380429267883301, | |
| "logps/chosen": -750.8778076171875, | |
| "logps/rejected": -514.9067993164062, | |
| "loss": 0.6839, | |
| "rewards/accuracies": 0.5199999809265137, | |
| "rewards/chosen": 0.02162899449467659, | |
| "rewards/margins": 0.019526075571775436, | |
| "rewards/rejected": 0.0021029210183769464, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.013132407498604682, | |
| "grad_norm": 160.0, | |
| "learning_rate": 2.1827133479212255e-06, | |
| "logits/chosen": -4.284715175628662, | |
| "logits/rejected": -4.265630722045898, | |
| "logps/chosen": -698.50439453125, | |
| "logps/rejected": -537.8710327148438, | |
| "loss": 0.6825, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.01697949506342411, | |
| "rewards/margins": 0.022279297932982445, | |
| "rewards/rejected": -0.005299802869558334, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.013953182967267474, | |
| "grad_norm": 171.0, | |
| "learning_rate": 2.3194748358862144e-06, | |
| "logits/chosen": -4.165258407592773, | |
| "logits/rejected": -4.168488025665283, | |
| "logps/chosen": -791.05419921875, | |
| "logps/rejected": -458.402587890625, | |
| "loss": 0.6981, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.0009704286349005997, | |
| "rewards/margins": -0.008310976438224316, | |
| "rewards/rejected": 0.009281404316425323, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.014773958435930266, | |
| "grad_norm": 170.0, | |
| "learning_rate": 2.4562363238512038e-06, | |
| "logits/chosen": -4.120928764343262, | |
| "logits/rejected": -4.241576671600342, | |
| "logps/chosen": -773.9873657226562, | |
| "logps/rejected": -557.421142578125, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.027397265657782555, | |
| "rewards/margins": 0.024752546101808548, | |
| "rewards/rejected": 0.0026447183918207884, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.01559473390459306, | |
| "grad_norm": 248.0, | |
| "learning_rate": 2.592997811816193e-06, | |
| "logits/chosen": -3.9233641624450684, | |
| "logits/rejected": -4.040389060974121, | |
| "logps/chosen": -659.5145874023438, | |
| "logps/rejected": -562.8936767578125, | |
| "loss": 0.6946, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.00023105592117644846, | |
| "rewards/margins": -0.0014622471062466502, | |
| "rewards/rejected": 0.0012311902828514576, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.01641550937325585, | |
| "grad_norm": 218.0, | |
| "learning_rate": 2.7297592997811816e-06, | |
| "logits/chosen": -4.040191173553467, | |
| "logits/rejected": -4.220387935638428, | |
| "logps/chosen": -694.8212280273438, | |
| "logps/rejected": -592.8945922851562, | |
| "loss": 0.6992, | |
| "rewards/accuracies": 0.4399999976158142, | |
| "rewards/chosen": 0.015942150726914406, | |
| "rewards/margins": -0.010712460614740849, | |
| "rewards/rejected": 0.02665461041033268, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.017236284841918643, | |
| "grad_norm": 111.5, | |
| "learning_rate": 2.866520787746171e-06, | |
| "logits/chosen": -3.9839234352111816, | |
| "logits/rejected": -4.0927205085754395, | |
| "logps/chosen": -850.669921875, | |
| "logps/rejected": -657.936279296875, | |
| "loss": 0.6903, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.016984395682811737, | |
| "rewards/margins": 0.0072107817977666855, | |
| "rewards/rejected": 0.009773612953722477, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.01805706031058144, | |
| "grad_norm": 308.0, | |
| "learning_rate": 3.0032822757111603e-06, | |
| "logits/chosen": -4.234395503997803, | |
| "logits/rejected": -4.200852394104004, | |
| "logps/chosen": -867.9359130859375, | |
| "logps/rejected": -640.5671997070312, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.47999998927116394, | |
| "rewards/chosen": 0.017692890018224716, | |
| "rewards/margins": 0.005810171365737915, | |
| "rewards/rejected": 0.011882718652486801, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.01887783577924423, | |
| "grad_norm": 193.0, | |
| "learning_rate": 3.1400437636761488e-06, | |
| "logits/chosen": -4.274899482727051, | |
| "logits/rejected": -4.361607551574707, | |
| "logps/chosen": -772.3294677734375, | |
| "logps/rejected": -483.2698974609375, | |
| "loss": 0.6799, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.027177538722753525, | |
| "rewards/margins": 0.028825119137763977, | |
| "rewards/rejected": -0.001647579250857234, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.019698611247907023, | |
| "grad_norm": 166.0, | |
| "learning_rate": 3.276805251641138e-06, | |
| "logits/chosen": -4.136162281036377, | |
| "logits/rejected": -4.200733661651611, | |
| "logps/chosen": -719.72998046875, | |
| "logps/rejected": -528.1305541992188, | |
| "loss": 0.6876, | |
| "rewards/accuracies": 0.5199999809265137, | |
| "rewards/chosen": 0.019943855702877045, | |
| "rewards/margins": 0.012945435009896755, | |
| "rewards/rejected": 0.006998421624302864, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.020519386716569814, | |
| "grad_norm": 160.0, | |
| "learning_rate": 3.4135667396061274e-06, | |
| "logits/chosen": -4.017778396606445, | |
| "logits/rejected": -4.027612209320068, | |
| "logps/chosen": -691.2648315429688, | |
| "logps/rejected": -643.5048217773438, | |
| "loss": 0.6981, | |
| "rewards/accuracies": 0.41999998688697815, | |
| "rewards/chosen": -0.003029178362339735, | |
| "rewards/margins": -0.008131473325192928, | |
| "rewards/rejected": 0.005102294497191906, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.021340162185232606, | |
| "grad_norm": 288.0, | |
| "learning_rate": 3.5503282275711163e-06, | |
| "logits/chosen": -4.192790508270264, | |
| "logits/rejected": -4.225100517272949, | |
| "logps/chosen": -628.4151000976562, | |
| "logps/rejected": -418.1283874511719, | |
| "loss": 0.6874, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.031290203332901, | |
| "rewards/margins": 0.013368282467126846, | |
| "rewards/rejected": 0.017921922728419304, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.0221609376538954, | |
| "grad_norm": 103.0, | |
| "learning_rate": 3.6870897155361052e-06, | |
| "logits/chosen": -4.11993408203125, | |
| "logits/rejected": -4.221017360687256, | |
| "logps/chosen": -701.9328002929688, | |
| "logps/rejected": -584.27294921875, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.018245697021484375, | |
| "rewards/margins": 0.0061894748359918594, | |
| "rewards/rejected": 0.012056220322847366, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.022981713122558194, | |
| "grad_norm": 238.0, | |
| "learning_rate": 3.823851203501095e-06, | |
| "logits/chosen": -4.17140007019043, | |
| "logits/rejected": -4.289581775665283, | |
| "logps/chosen": -705.1632690429688, | |
| "logps/rejected": -457.5531005859375, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.5199999809265137, | |
| "rewards/chosen": 0.03122738189995289, | |
| "rewards/margins": 0.02252998761832714, | |
| "rewards/rejected": 0.008697391487658024, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.023802488591220985, | |
| "grad_norm": 215.0, | |
| "learning_rate": 3.9606126914660835e-06, | |
| "logits/chosen": -4.087902069091797, | |
| "logits/rejected": -4.143421173095703, | |
| "logps/chosen": -790.4818725585938, | |
| "logps/rejected": -638.943359375, | |
| "loss": 0.6754, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.04056026414036751, | |
| "rewards/margins": 0.03895123302936554, | |
| "rewards/rejected": 0.0016090321587398648, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.024623264059883777, | |
| "grad_norm": 236.0, | |
| "learning_rate": 4.097374179431072e-06, | |
| "logits/chosen": -4.093388557434082, | |
| "logits/rejected": -4.1176371574401855, | |
| "logps/chosen": -811.650634765625, | |
| "logps/rejected": -576.680908203125, | |
| "loss": 0.6658, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.05430950969457626, | |
| "rewards/margins": 0.05836718529462814, | |
| "rewards/rejected": -0.004057674203068018, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.02544403952854657, | |
| "grad_norm": 270.0, | |
| "learning_rate": 4.234135667396061e-06, | |
| "logits/chosen": -4.053430557250977, | |
| "logits/rejected": -4.1902666091918945, | |
| "logps/chosen": -639.8978881835938, | |
| "logps/rejected": -590.718505859375, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.026400210335850716, | |
| "rewards/margins": 0.01981959119439125, | |
| "rewards/rejected": 0.006580619607120752, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.026264814997209365, | |
| "grad_norm": 183.0, | |
| "learning_rate": 4.370897155361051e-06, | |
| "logits/chosen": -4.134805202484131, | |
| "logits/rejected": -4.280844211578369, | |
| "logps/chosen": -688.987060546875, | |
| "logps/rejected": -571.3134155273438, | |
| "loss": 0.6725, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.051876142621040344, | |
| "rewards/margins": 0.044371940195560455, | |
| "rewards/rejected": 0.0075042033568024635, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.027085590465872156, | |
| "grad_norm": 158.0, | |
| "learning_rate": 4.50765864332604e-06, | |
| "logits/chosen": -4.2159342765808105, | |
| "logits/rejected": -4.336707592010498, | |
| "logps/chosen": -794.3070068359375, | |
| "logps/rejected": -574.3153686523438, | |
| "loss": 0.6961, | |
| "rewards/accuracies": 0.46000000834465027, | |
| "rewards/chosen": 0.014993507415056229, | |
| "rewards/margins": -0.0014925742289051414, | |
| "rewards/rejected": 0.016486085951328278, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.02790636593453495, | |
| "grad_norm": 219.0, | |
| "learning_rate": 4.644420131291029e-06, | |
| "logits/chosen": -4.15441370010376, | |
| "logits/rejected": -4.211958408355713, | |
| "logps/chosen": -692.42529296875, | |
| "logps/rejected": -581.7979125976562, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.04421781376004219, | |
| "rewards/margins": 0.02522583305835724, | |
| "rewards/rejected": 0.0189919825643301, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.02872714140319774, | |
| "grad_norm": 175.0, | |
| "learning_rate": 4.781181619256018e-06, | |
| "logits/chosen": -4.147443771362305, | |
| "logits/rejected": -4.1150126457214355, | |
| "logps/chosen": -791.9564819335938, | |
| "logps/rejected": -645.9444580078125, | |
| "loss": 0.6655, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.06266529858112335, | |
| "rewards/margins": 0.06153992563486099, | |
| "rewards/rejected": 0.0011253589764237404, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.029547916871860532, | |
| "grad_norm": 171.0, | |
| "learning_rate": 4.917943107221007e-06, | |
| "logits/chosen": -4.13357400894165, | |
| "logits/rejected": -4.254169464111328, | |
| "logps/chosen": -709.9840698242188, | |
| "logps/rejected": -507.0233459472656, | |
| "loss": 0.6634, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.046279050409793854, | |
| "rewards/margins": 0.07115618884563446, | |
| "rewards/rejected": -0.024877142161130905, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.030368692340523327, | |
| "grad_norm": 207.0, | |
| "learning_rate": 4.99830766627179e-06, | |
| "logits/chosen": -4.161207675933838, | |
| "logits/rejected": -4.27424430847168, | |
| "logps/chosen": -639.82373046875, | |
| "logps/rejected": -429.6187744140625, | |
| "loss": 0.6713, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.05205925926566124, | |
| "rewards/margins": 0.0562448687851429, | |
| "rewards/rejected": -0.00418561277911067, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.03118946780918612, | |
| "grad_norm": 85.0, | |
| "learning_rate": 4.994076831951261e-06, | |
| "logits/chosen": -4.219003677368164, | |
| "logits/rejected": -4.230233669281006, | |
| "logps/chosen": -647.91455078125, | |
| "logps/rejected": -490.3548583984375, | |
| "loss": 0.6668, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.06009820103645325, | |
| "rewards/margins": 0.06201506033539772, | |
| "rewards/rejected": -0.001916866865940392, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.03201024327784891, | |
| "grad_norm": 196.0, | |
| "learning_rate": 4.989845997630734e-06, | |
| "logits/chosen": -4.0881028175354, | |
| "logits/rejected": -4.141018867492676, | |
| "logps/chosen": -924.4205322265625, | |
| "logps/rejected": -624.48046875, | |
| "loss": 0.6708, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.058801230043172836, | |
| "rewards/margins": 0.055282555520534515, | |
| "rewards/rejected": 0.0035186754539608955, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.0328310187465117, | |
| "grad_norm": 88.5, | |
| "learning_rate": 4.985615163310205e-06, | |
| "logits/chosen": -4.2076311111450195, | |
| "logits/rejected": -4.243706226348877, | |
| "logps/chosen": -681.6548461914062, | |
| "logps/rejected": -456.4216003417969, | |
| "loss": 0.659, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.07433926314115524, | |
| "rewards/margins": 0.08106620609760284, | |
| "rewards/rejected": -0.006726943422108889, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.033651794215174495, | |
| "grad_norm": 242.0, | |
| "learning_rate": 4.981384328989678e-06, | |
| "logits/chosen": -4.125373363494873, | |
| "logits/rejected": -4.25338888168335, | |
| "logps/chosen": -866.8526000976562, | |
| "logps/rejected": -517.0289916992188, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.47999998927116394, | |
| "rewards/chosen": -0.002582031534984708, | |
| "rewards/margins": 0.004485914018005133, | |
| "rewards/rejected": -0.00706794299185276, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.03447256968383729, | |
| "grad_norm": 260.0, | |
| "learning_rate": 4.977153494669149e-06, | |
| "logits/chosen": -4.289287090301514, | |
| "logits/rejected": -4.317573070526123, | |
| "logps/chosen": -766.9920043945312, | |
| "logps/rejected": -571.254638671875, | |
| "loss": 0.6883, | |
| "rewards/accuracies": 0.4399999976158142, | |
| "rewards/chosen": 0.02896983176469803, | |
| "rewards/margins": 0.012178352102637291, | |
| "rewards/rejected": 0.01679147779941559, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.03529334515250008, | |
| "grad_norm": 270.0, | |
| "learning_rate": 4.972922660348622e-06, | |
| "logits/chosen": -4.071505069732666, | |
| "logits/rejected": -4.080994129180908, | |
| "logps/chosen": -811.8478393554688, | |
| "logps/rejected": -614.4443359375, | |
| "loss": 0.6888, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.028604382649064064, | |
| "rewards/margins": 0.011533576995134354, | |
| "rewards/rejected": 0.01707080751657486, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.03611412062116288, | |
| "grad_norm": 294.0, | |
| "learning_rate": 4.968691826028093e-06, | |
| "logits/chosen": -4.189393520355225, | |
| "logits/rejected": -4.166673183441162, | |
| "logps/chosen": -789.1497802734375, | |
| "logps/rejected": -534.8926391601562, | |
| "loss": 0.6808, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.04065079241991043, | |
| "rewards/margins": 0.027193991467356682, | |
| "rewards/rejected": 0.0134567990899086, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.03693489608982567, | |
| "grad_norm": 168.0, | |
| "learning_rate": 4.964460991707566e-06, | |
| "logits/chosen": -4.194692611694336, | |
| "logits/rejected": -4.184408664703369, | |
| "logps/chosen": -790.99072265625, | |
| "logps/rejected": -579.170166015625, | |
| "loss": 0.688, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.025631260126829147, | |
| "rewards/margins": 0.012109901756048203, | |
| "rewards/rejected": 0.01352135930210352, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.03775567155848846, | |
| "grad_norm": 332.0, | |
| "learning_rate": 4.960230157387037e-06, | |
| "logits/chosen": -4.162527561187744, | |
| "logits/rejected": -4.196642875671387, | |
| "logps/chosen": -745.1271362304688, | |
| "logps/rejected": -615.0593872070312, | |
| "loss": 0.7154, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.05613722652196884, | |
| "rewards/margins": -0.025486024096608162, | |
| "rewards/rejected": 0.08162324875593185, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.03857644702715125, | |
| "grad_norm": 284.0, | |
| "learning_rate": 4.95599932306651e-06, | |
| "logits/chosen": -4.014937877655029, | |
| "logits/rejected": -4.0320024490356445, | |
| "logps/chosen": -623.9722900390625, | |
| "logps/rejected": -527.3934936523438, | |
| "loss": 0.714, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.052386511117219925, | |
| "rewards/margins": -0.019469745457172394, | |
| "rewards/rejected": 0.07185625284910202, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.039397222495814045, | |
| "grad_norm": 188.0, | |
| "learning_rate": 4.951768488745981e-06, | |
| "logits/chosen": -4.1850738525390625, | |
| "logits/rejected": -4.26005220413208, | |
| "logps/chosen": -761.0906372070312, | |
| "logps/rejected": -467.2315979003906, | |
| "loss": 0.6765, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.07359949499368668, | |
| "rewards/margins": 0.03598727285861969, | |
| "rewards/rejected": 0.03761221468448639, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.04021799796447684, | |
| "grad_norm": 262.0, | |
| "learning_rate": 4.947537654425454e-06, | |
| "logits/chosen": -4.217501163482666, | |
| "logits/rejected": -4.082854747772217, | |
| "logps/chosen": -671.0990600585938, | |
| "logps/rejected": -584.4004516601562, | |
| "loss": 0.6815, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.07619458436965942, | |
| "rewards/margins": 0.028806446120142937, | |
| "rewards/rejected": 0.047388140112161636, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.04103877343313963, | |
| "grad_norm": 114.5, | |
| "learning_rate": 4.943306820104925e-06, | |
| "logits/chosen": -4.066970348358154, | |
| "logits/rejected": -4.118155479431152, | |
| "logps/chosen": -707.7814331054688, | |
| "logps/rejected": -522.02880859375, | |
| "loss": 0.6886, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.03777886554598808, | |
| "rewards/margins": 0.011078822426497936, | |
| "rewards/rejected": 0.02670004405081272, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.04185954890180242, | |
| "grad_norm": 97.0, | |
| "learning_rate": 4.939075985784398e-06, | |
| "logits/chosen": -4.040958881378174, | |
| "logits/rejected": -4.270505905151367, | |
| "logps/chosen": -622.3072509765625, | |
| "logps/rejected": -454.3692932128906, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.5199999809265137, | |
| "rewards/chosen": 0.05720474198460579, | |
| "rewards/margins": 0.0045347679406404495, | |
| "rewards/rejected": 0.05266997963190079, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.04268032437046521, | |
| "grad_norm": 159.0, | |
| "learning_rate": 4.934845151463869e-06, | |
| "logits/chosen": -4.0371527671813965, | |
| "logits/rejected": -4.051915168762207, | |
| "logps/chosen": -697.654296875, | |
| "logps/rejected": -547.8112182617188, | |
| "loss": 0.6924, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.06825733929872513, | |
| "rewards/margins": 0.005310583859682083, | |
| "rewards/rejected": 0.06294675171375275, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.04350109983912801, | |
| "grad_norm": 117.5, | |
| "learning_rate": 4.930614317143341e-06, | |
| "logits/chosen": -4.278604507446289, | |
| "logits/rejected": -4.323906421661377, | |
| "logps/chosen": -667.7477416992188, | |
| "logps/rejected": -371.2331237792969, | |
| "loss": 0.6782, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.0644068494439125, | |
| "rewards/margins": 0.03309326991438866, | |
| "rewards/rejected": 0.03131357580423355, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.0443218753077908, | |
| "grad_norm": 173.0, | |
| "learning_rate": 4.926383482822813e-06, | |
| "logits/chosen": -4.178940296173096, | |
| "logits/rejected": -4.232710361480713, | |
| "logps/chosen": -695.1062622070312, | |
| "logps/rejected": -554.77734375, | |
| "loss": 0.6671, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0913730263710022, | |
| "rewards/margins": 0.05747390165925026, | |
| "rewards/rejected": 0.03389911353588104, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.045142650776453595, | |
| "grad_norm": 158.0, | |
| "learning_rate": 4.922152648502285e-06, | |
| "logits/chosen": -4.029209136962891, | |
| "logits/rejected": -4.280393123626709, | |
| "logps/chosen": -510.9493103027344, | |
| "logps/rejected": -371.9337463378906, | |
| "loss": 0.6774, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.05904337763786316, | |
| "rewards/margins": 0.03466322645545006, | |
| "rewards/rejected": 0.024380149319767952, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.04596342624511639, | |
| "grad_norm": 173.0, | |
| "learning_rate": 4.917921814181757e-06, | |
| "logits/chosen": -4.22706413269043, | |
| "logits/rejected": -4.331350326538086, | |
| "logps/chosen": -816.2857666015625, | |
| "logps/rejected": -513.9669189453125, | |
| "loss": 0.6742, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.06091469153761864, | |
| "rewards/margins": 0.0425628200173378, | |
| "rewards/rejected": 0.018351875245571136, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.04678420171377918, | |
| "grad_norm": 101.0, | |
| "learning_rate": 4.913690979861229e-06, | |
| "logits/chosen": -4.167561054229736, | |
| "logits/rejected": -4.328184127807617, | |
| "logps/chosen": -795.4500122070312, | |
| "logps/rejected": -513.28515625, | |
| "loss": 0.6806, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.05300300940871239, | |
| "rewards/margins": 0.029740547761321068, | |
| "rewards/rejected": 0.023262467235326767, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.04760497718244197, | |
| "grad_norm": 228.0, | |
| "learning_rate": 4.909460145540701e-06, | |
| "logits/chosen": -4.266139507293701, | |
| "logits/rejected": -4.2949018478393555, | |
| "logps/chosen": -818.7105712890625, | |
| "logps/rejected": -544.2308349609375, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.06837845593690872, | |
| "rewards/margins": 0.04987398162484169, | |
| "rewards/rejected": 0.01850447617471218, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.04842575265110476, | |
| "grad_norm": 171.0, | |
| "learning_rate": 4.905229311220173e-06, | |
| "logits/chosen": -4.202406406402588, | |
| "logits/rejected": -4.179924964904785, | |
| "logps/chosen": -740.6281127929688, | |
| "logps/rejected": -558.76416015625, | |
| "loss": 0.6772, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.08042413741350174, | |
| "rewards/margins": 0.03836137428879738, | |
| "rewards/rejected": 0.042062774300575256, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.049246528119767555, | |
| "grad_norm": 158.0, | |
| "learning_rate": 4.900998476899645e-06, | |
| "logits/chosen": -4.150285243988037, | |
| "logits/rejected": -4.043075084686279, | |
| "logps/chosen": -907.114501953125, | |
| "logps/rejected": -761.865234375, | |
| "loss": 0.6657, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.08876218646764755, | |
| "rewards/margins": 0.06508920341730118, | |
| "rewards/rejected": 0.02367297373712063, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.05006730358843035, | |
| "grad_norm": 296.0, | |
| "learning_rate": 4.896767642579117e-06, | |
| "logits/chosen": -4.109315395355225, | |
| "logits/rejected": -4.170251846313477, | |
| "logps/chosen": -897.26953125, | |
| "logps/rejected": -622.68310546875, | |
| "loss": 0.6654, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.09235405921936035, | |
| "rewards/margins": 0.062233816832304, | |
| "rewards/rejected": 0.0301202479749918, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.05088807905709314, | |
| "grad_norm": 205.0, | |
| "learning_rate": 4.892536808258589e-06, | |
| "logits/chosen": -4.138393402099609, | |
| "logits/rejected": -4.094308376312256, | |
| "logps/chosen": -725.5248413085938, | |
| "logps/rejected": -577.0723266601562, | |
| "loss": 0.6771, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.07355516403913498, | |
| "rewards/margins": 0.03712104633450508, | |
| "rewards/rejected": 0.0364341177046299, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.05170885452575594, | |
| "grad_norm": 324.0, | |
| "learning_rate": 4.888305973938061e-06, | |
| "logits/chosen": -4.188644886016846, | |
| "logits/rejected": -4.159783840179443, | |
| "logps/chosen": -896.7723388671875, | |
| "logps/rejected": -618.6541748046875, | |
| "loss": 0.6671, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.09490902721881866, | |
| "rewards/margins": 0.060124482959508896, | |
| "rewards/rejected": 0.03478454798460007, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.05252962999441873, | |
| "grad_norm": 150.0, | |
| "learning_rate": 4.884075139617533e-06, | |
| "logits/chosen": -4.2158002853393555, | |
| "logits/rejected": -4.447061061859131, | |
| "logps/chosen": -633.4708862304688, | |
| "logps/rejected": -464.6991271972656, | |
| "loss": 0.6719, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.08528276532888412, | |
| "rewards/margins": 0.04901896044611931, | |
| "rewards/rejected": 0.036263808608055115, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.05335040546308152, | |
| "grad_norm": 288.0, | |
| "learning_rate": 4.879844305297005e-06, | |
| "logits/chosen": -4.165538311004639, | |
| "logits/rejected": -4.247755527496338, | |
| "logps/chosen": -781.2611694335938, | |
| "logps/rejected": -623.4193115234375, | |
| "loss": 0.6521, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.1412876844406128, | |
| "rewards/margins": 0.09663904458284378, | |
| "rewards/rejected": 0.044648658484220505, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.05417118093174431, | |
| "grad_norm": 152.0, | |
| "learning_rate": 4.875613470976477e-06, | |
| "logits/chosen": -4.050336837768555, | |
| "logits/rejected": -4.09099817276001, | |
| "logps/chosen": -526.436767578125, | |
| "logps/rejected": -486.9073181152344, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.07214751839637756, | |
| "rewards/margins": 0.0187962856143713, | |
| "rewards/rejected": 0.05335123464465141, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.054991956400407105, | |
| "grad_norm": 340.0, | |
| "learning_rate": 4.871382636655949e-06, | |
| "logits/chosen": -4.108943462371826, | |
| "logits/rejected": -4.278311252593994, | |
| "logps/chosen": -790.7910766601562, | |
| "logps/rejected": -563.6265869140625, | |
| "loss": 0.6585, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.08027210086584091, | |
| "rewards/margins": 0.07762635499238968, | |
| "rewards/rejected": 0.002645747736096382, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.0558127318690699, | |
| "grad_norm": 185.0, | |
| "learning_rate": 4.867151802335421e-06, | |
| "logits/chosen": -4.215210437774658, | |
| "logits/rejected": -4.298604965209961, | |
| "logps/chosen": -722.01025390625, | |
| "logps/rejected": -528.048583984375, | |
| "loss": 0.6605, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.11607293039560318, | |
| "rewards/margins": 0.07552981376647949, | |
| "rewards/rejected": 0.04054312780499458, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.05663350733773269, | |
| "grad_norm": 278.0, | |
| "learning_rate": 4.862920968014893e-06, | |
| "logits/chosen": -4.231524467468262, | |
| "logits/rejected": -4.164397716522217, | |
| "logps/chosen": -781.760009765625, | |
| "logps/rejected": -626.20751953125, | |
| "loss": 0.6602, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.11867771297693253, | |
| "rewards/margins": 0.07886005192995071, | |
| "rewards/rejected": 0.03981764614582062, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.05745428280639548, | |
| "grad_norm": 198.0, | |
| "learning_rate": 4.858690133694365e-06, | |
| "logits/chosen": -4.20419454574585, | |
| "logits/rejected": -4.252129554748535, | |
| "logps/chosen": -770.19873046875, | |
| "logps/rejected": -581.0945434570312, | |
| "loss": 0.6478, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.09692225605249405, | |
| "rewards/margins": 0.10454078763723373, | |
| "rewards/rejected": -0.0076185232028365135, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.05827505827505827, | |
| "grad_norm": 252.0, | |
| "learning_rate": 4.854459299373837e-06, | |
| "logits/chosen": -4.024634838104248, | |
| "logits/rejected": -4.1678338050842285, | |
| "logps/chosen": -713.2058715820312, | |
| "logps/rejected": -553.06591796875, | |
| "loss": 0.6573, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.11371553689241409, | |
| "rewards/margins": 0.08740261197090149, | |
| "rewards/rejected": 0.026312928646802902, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 0.059095833743721064, | |
| "grad_norm": 197.0, | |
| "learning_rate": 4.850228465053309e-06, | |
| "logits/chosen": -4.1599507331848145, | |
| "logits/rejected": -4.157556056976318, | |
| "logps/chosen": -475.4374084472656, | |
| "logps/rejected": -405.504150390625, | |
| "loss": 0.6723, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.034932468086481094, | |
| "rewards/margins": 0.04883403331041336, | |
| "rewards/rejected": -0.01390156988054514, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.05991660921238386, | |
| "grad_norm": 218.0, | |
| "learning_rate": 4.845997630732781e-06, | |
| "logits/chosen": -4.242368698120117, | |
| "logits/rejected": -4.189089775085449, | |
| "logps/chosen": -717.33154296875, | |
| "logps/rejected": -426.8397216796875, | |
| "loss": 0.6618, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.11887142807245255, | |
| "rewards/margins": 0.07003708183765411, | |
| "rewards/rejected": 0.048834361135959625, | |
| "step": 1825 | |
| }, | |
| { | |
| "epoch": 0.060737384681046655, | |
| "grad_norm": 165.0, | |
| "learning_rate": 4.841766796412253e-06, | |
| "logits/chosen": -4.1461181640625, | |
| "logits/rejected": -4.220346450805664, | |
| "logps/chosen": -646.8983764648438, | |
| "logps/rejected": -572.834228515625, | |
| "loss": 0.6754, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.08920831978321075, | |
| "rewards/margins": 0.04618198052048683, | |
| "rewards/rejected": 0.04302635043859482, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.06155816014970945, | |
| "grad_norm": 320.0, | |
| "learning_rate": 4.837535962091725e-06, | |
| "logits/chosen": -4.157592296600342, | |
| "logits/rejected": -4.301741123199463, | |
| "logps/chosen": -965.8426513671875, | |
| "logps/rejected": -738.236083984375, | |
| "loss": 0.651, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.14710073173046112, | |
| "rewards/margins": 0.09817076474428177, | |
| "rewards/rejected": 0.048929959535598755, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.06237893561837224, | |
| "grad_norm": 186.0, | |
| "learning_rate": 4.833305127771197e-06, | |
| "logits/chosen": -4.285674571990967, | |
| "logits/rejected": -4.307552337646484, | |
| "logps/chosen": -676.7159423828125, | |
| "logps/rejected": -584.8186645507812, | |
| "loss": 0.6319, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.08745526522397995, | |
| "rewards/margins": 0.14058372378349304, | |
| "rewards/rejected": -0.05312845855951309, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.06319971108703504, | |
| "grad_norm": 217.0, | |
| "learning_rate": 4.829074293450669e-06, | |
| "logits/chosen": -4.085958480834961, | |
| "logits/rejected": -4.113046169281006, | |
| "logps/chosen": -686.4339599609375, | |
| "logps/rejected": -575.382568359375, | |
| "loss": 0.6642, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.06624173372983932, | |
| "rewards/margins": 0.06577203422784805, | |
| "rewards/rejected": 0.0004696959222201258, | |
| "step": 1925 | |
| }, | |
| { | |
| "epoch": 0.06402048655569782, | |
| "grad_norm": 175.0, | |
| "learning_rate": 4.824843459130141e-06, | |
| "logits/chosen": -4.064718723297119, | |
| "logits/rejected": -4.079798698425293, | |
| "logps/chosen": -667.0342407226562, | |
| "logps/rejected": -558.95947265625, | |
| "loss": 0.6847, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.07132381200790405, | |
| "rewards/margins": 0.031461507081985474, | |
| "rewards/rejected": 0.03986230492591858, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.06484126202436062, | |
| "grad_norm": 151.0, | |
| "learning_rate": 4.820612624809613e-06, | |
| "logits/chosen": -4.2754926681518555, | |
| "logits/rejected": -4.281916618347168, | |
| "logps/chosen": -924.2509155273438, | |
| "logps/rejected": -606.2994384765625, | |
| "loss": 0.6524, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.0693202093243599, | |
| "rewards/margins": 0.10057316720485687, | |
| "rewards/rejected": -0.03125295788049698, | |
| "step": 1975 | |
| }, | |
| { | |
| "epoch": 0.0656620374930234, | |
| "grad_norm": 142.0, | |
| "learning_rate": 4.816381790489085e-06, | |
| "logits/chosen": -3.999875545501709, | |
| "logits/rejected": -4.053827285766602, | |
| "logps/chosen": -583.6886596679688, | |
| "logps/rejected": -434.0615234375, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.04275403916835785, | |
| "rewards/margins": 0.04212899133563042, | |
| "rewards/rejected": 0.000625052722170949, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.0664828129616862, | |
| "grad_norm": 216.0, | |
| "learning_rate": 4.812150956168557e-06, | |
| "logits/chosen": -4.0387349128723145, | |
| "logits/rejected": -4.308966636657715, | |
| "logps/chosen": -707.042724609375, | |
| "logps/rejected": -579.7852172851562, | |
| "loss": 0.6822, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.024804413318634033, | |
| "rewards/margins": 0.03053770214319229, | |
| "rewards/rejected": -0.005733292084187269, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 0.06730358843034899, | |
| "grad_norm": 280.0, | |
| "learning_rate": 4.807920121848029e-06, | |
| "logits/chosen": -4.186931133270264, | |
| "logits/rejected": -4.16575813293457, | |
| "logps/chosen": -772.7561645507812, | |
| "logps/rejected": -545.01708984375, | |
| "loss": 0.6805, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.06507274508476257, | |
| "rewards/margins": 0.050922200083732605, | |
| "rewards/rejected": 0.014150548726320267, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.06812436389901179, | |
| "grad_norm": 174.0, | |
| "learning_rate": 4.803689287527501e-06, | |
| "logits/chosen": -4.136441707611084, | |
| "logits/rejected": -4.354907035827637, | |
| "logps/chosen": -664.2943725585938, | |
| "logps/rejected": -509.4131164550781, | |
| "loss": 0.6535, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.0457613430917263, | |
| "rewards/margins": 0.09452405571937561, | |
| "rewards/rejected": -0.048762716352939606, | |
| "step": 2075 | |
| }, | |
| { | |
| "epoch": 0.06894513936767457, | |
| "grad_norm": 125.0, | |
| "learning_rate": 4.799458453206973e-06, | |
| "logits/chosen": -4.2889180183410645, | |
| "logits/rejected": -4.377254486083984, | |
| "logps/chosen": -819.005615234375, | |
| "logps/rejected": -547.5381469726562, | |
| "loss": 0.6506, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.08488751202821732, | |
| "rewards/margins": 0.10479531437158585, | |
| "rewards/rejected": -0.019907798618078232, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.06976591483633737, | |
| "grad_norm": 138.0, | |
| "learning_rate": 4.795227618886445e-06, | |
| "logits/chosen": -4.016672134399414, | |
| "logits/rejected": -4.22638463973999, | |
| "logps/chosen": -694.584716796875, | |
| "logps/rejected": -532.6060791015625, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.025129133835434914, | |
| "rewards/margins": 0.058808211237192154, | |
| "rewards/rejected": -0.03367907553911209, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.07058669030500016, | |
| "grad_norm": 154.0, | |
| "learning_rate": 4.790996784565917e-06, | |
| "logits/chosen": -4.167966365814209, | |
| "logits/rejected": -4.243107795715332, | |
| "logps/chosen": -710.6177978515625, | |
| "logps/rejected": -499.143798828125, | |
| "loss": 0.6524, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.10299518704414368, | |
| "rewards/margins": 0.10052691400051117, | |
| "rewards/rejected": 0.002468266524374485, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.07140746577366296, | |
| "grad_norm": 255.0, | |
| "learning_rate": 4.786765950245389e-06, | |
| "logits/chosen": -4.177813529968262, | |
| "logits/rejected": -4.1956963539123535, | |
| "logps/chosen": -716.7862548828125, | |
| "logps/rejected": -503.1024475097656, | |
| "loss": 0.6542, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0937609001994133, | |
| "rewards/margins": 0.0912182629108429, | |
| "rewards/rejected": 0.0025426370557397604, | |
| "step": 2175 | |
| }, | |
| { | |
| "epoch": 0.07222824124232576, | |
| "grad_norm": 221.0, | |
| "learning_rate": 4.782535115924861e-06, | |
| "logits/chosen": -4.274630546569824, | |
| "logits/rejected": -4.261683940887451, | |
| "logps/chosen": -825.6651000976562, | |
| "logps/rejected": -622.1373291015625, | |
| "loss": 0.6467, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.13351120054721832, | |
| "rewards/margins": 0.11293386667966843, | |
| "rewards/rejected": 0.020577318966388702, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.07304901671098854, | |
| "grad_norm": 153.0, | |
| "learning_rate": 4.778304281604333e-06, | |
| "logits/chosen": -4.295671463012695, | |
| "logits/rejected": -4.270346164703369, | |
| "logps/chosen": -703.2100219726562, | |
| "logps/rejected": -497.50555419921875, | |
| "loss": 0.6664, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.06314626336097717, | |
| "rewards/margins": 0.06403078138828278, | |
| "rewards/rejected": -0.0008845186093822122, | |
| "step": 2225 | |
| }, | |
| { | |
| "epoch": 0.07386979217965134, | |
| "grad_norm": 148.0, | |
| "learning_rate": 4.774073447283805e-06, | |
| "logits/chosen": -4.128739356994629, | |
| "logits/rejected": -4.267156600952148, | |
| "logps/chosen": -723.7312622070312, | |
| "logps/rejected": -558.724853515625, | |
| "loss": 0.6135, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.1362067013978958, | |
| "rewards/margins": 0.18841738998889923, | |
| "rewards/rejected": -0.05221069976687431, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.07469056764831412, | |
| "grad_norm": 172.0, | |
| "learning_rate": 4.769842612963277e-06, | |
| "logits/chosen": -4.256707191467285, | |
| "logits/rejected": -4.36817741394043, | |
| "logps/chosen": -731.4835815429688, | |
| "logps/rejected": -561.2887573242188, | |
| "loss": 0.6452, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.06940807402133942, | |
| "rewards/margins": 0.11642622202634811, | |
| "rewards/rejected": -0.0470181368291378, | |
| "step": 2275 | |
| }, | |
| { | |
| "epoch": 0.07551134311697692, | |
| "grad_norm": 163.0, | |
| "learning_rate": 4.765611778642749e-06, | |
| "logits/chosen": -4.106626510620117, | |
| "logits/rejected": -4.237212657928467, | |
| "logps/chosen": -624.831298828125, | |
| "logps/rejected": -555.9198608398438, | |
| "loss": 0.6602, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.06575372815132141, | |
| "rewards/margins": 0.08808085322380066, | |
| "rewards/rejected": -0.022327115759253502, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.07633211858563971, | |
| "grad_norm": 134.0, | |
| "learning_rate": 4.761380944322221e-06, | |
| "logits/chosen": -4.047150135040283, | |
| "logits/rejected": -4.1057209968566895, | |
| "logps/chosen": -701.8414306640625, | |
| "logps/rejected": -535.3977661132812, | |
| "loss": 0.6631, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.09485507011413574, | |
| "rewards/margins": 0.0826391950249672, | |
| "rewards/rejected": 0.012215878814458847, | |
| "step": 2325 | |
| }, | |
| { | |
| "epoch": 0.0771528940543025, | |
| "grad_norm": 274.0, | |
| "learning_rate": 4.757150110001693e-06, | |
| "logits/chosen": -4.054962635040283, | |
| "logits/rejected": -4.135710716247559, | |
| "logps/chosen": -729.4888305664062, | |
| "logps/rejected": -625.6478881835938, | |
| "loss": 0.6432, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.09774763882160187, | |
| "rewards/margins": 0.12367933988571167, | |
| "rewards/rejected": -0.02593171037733555, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.07797366952296529, | |
| "grad_norm": 354.0, | |
| "learning_rate": 4.752919275681165e-06, | |
| "logits/chosen": -4.054114818572998, | |
| "logits/rejected": -4.159776210784912, | |
| "logps/chosen": -829.9243774414062, | |
| "logps/rejected": -499.4671630859375, | |
| "loss": 0.6425, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.11411032825708389, | |
| "rewards/margins": 0.1236293613910675, | |
| "rewards/rejected": -0.009519041515886784, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.07879444499162809, | |
| "grad_norm": 125.0, | |
| "learning_rate": 4.748688441360637e-06, | |
| "logits/chosen": -4.200274467468262, | |
| "logits/rejected": -4.268646240234375, | |
| "logps/chosen": -702.6317749023438, | |
| "logps/rejected": -487.8554992675781, | |
| "loss": 0.6509, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.08645128458738327, | |
| "rewards/margins": 0.10850445926189423, | |
| "rewards/rejected": -0.022053170949220657, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.07961522046029089, | |
| "grad_norm": 158.0, | |
| "learning_rate": 4.744457607040109e-06, | |
| "logits/chosen": -4.188302516937256, | |
| "logits/rejected": -4.178359508514404, | |
| "logps/chosen": -740.6657104492188, | |
| "logps/rejected": -549.1505126953125, | |
| "loss": 0.6553, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.08103080838918686, | |
| "rewards/margins": 0.09224209934473038, | |
| "rewards/rejected": -0.011211306788027287, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 0.08043599592895367, | |
| "grad_norm": 108.0, | |
| "learning_rate": 4.740226772719581e-06, | |
| "logits/chosen": -3.956430673599243, | |
| "logits/rejected": -4.144464015960693, | |
| "logps/chosen": -579.575439453125, | |
| "logps/rejected": -490.2182922363281, | |
| "loss": 0.611, | |
| "rewards/accuracies": 0.8199999928474426, | |
| "rewards/chosen": 0.14252804219722748, | |
| "rewards/margins": 0.18614298105239868, | |
| "rewards/rejected": -0.043614912778139114, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.08125677139761647, | |
| "grad_norm": 223.0, | |
| "learning_rate": 4.735995938399053e-06, | |
| "logits/chosen": -4.095440864562988, | |
| "logits/rejected": -4.225205898284912, | |
| "logps/chosen": -797.9213256835938, | |
| "logps/rejected": -592.8213500976562, | |
| "loss": 0.6548, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.10039664059877396, | |
| "rewards/margins": 0.10022265464067459, | |
| "rewards/rejected": 0.00017398863565176725, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 0.08207754686627926, | |
| "grad_norm": 109.0, | |
| "learning_rate": 4.731765104078525e-06, | |
| "logits/chosen": -4.233191013336182, | |
| "logits/rejected": -4.310283184051514, | |
| "logps/chosen": -621.84326171875, | |
| "logps/rejected": -474.5871887207031, | |
| "loss": 0.6607, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.09741487354040146, | |
| "rewards/margins": 0.08807798475027084, | |
| "rewards/rejected": 0.009336878545582294, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.08289832233494206, | |
| "grad_norm": 158.0, | |
| "learning_rate": 4.727534269757997e-06, | |
| "logits/chosen": -4.1276421546936035, | |
| "logits/rejected": -4.145913600921631, | |
| "logps/chosen": -729.427001953125, | |
| "logps/rejected": -513.8289794921875, | |
| "loss": 0.6248, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.11118257790803909, | |
| "rewards/margins": 0.16082428395748138, | |
| "rewards/rejected": -0.0496416911482811, | |
| "step": 2525 | |
| }, | |
| { | |
| "epoch": 0.08371909780360484, | |
| "grad_norm": 117.0, | |
| "learning_rate": 4.723303435437469e-06, | |
| "logits/chosen": -4.257935047149658, | |
| "logits/rejected": -4.259277820587158, | |
| "logps/chosen": -620.875, | |
| "logps/rejected": -509.9112548828125, | |
| "loss": 0.6807, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.06533210724592209, | |
| "rewards/margins": 0.039884164929389954, | |
| "rewards/rejected": 0.025447946041822433, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.08453987327226764, | |
| "grad_norm": 142.0, | |
| "learning_rate": 4.719072601116941e-06, | |
| "logits/chosen": -4.256199836730957, | |
| "logits/rejected": -4.267094612121582, | |
| "logps/chosen": -683.416748046875, | |
| "logps/rejected": -555.82373046875, | |
| "loss": 0.6384, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": 0.10108324140310287, | |
| "rewards/margins": 0.11987866461277008, | |
| "rewards/rejected": -0.018795425072312355, | |
| "step": 2575 | |
| }, | |
| { | |
| "epoch": 0.08536064874093043, | |
| "grad_norm": 156.0, | |
| "learning_rate": 4.714841766796413e-06, | |
| "logits/chosen": -4.165497303009033, | |
| "logits/rejected": -4.2554426193237305, | |
| "logps/chosen": -523.2576904296875, | |
| "logps/rejected": -401.132080078125, | |
| "loss": 0.6547, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.0769733414053917, | |
| "rewards/margins": 0.08824872970581055, | |
| "rewards/rejected": -0.011275377124547958, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.08618142420959322, | |
| "grad_norm": 207.0, | |
| "learning_rate": 4.710610932475885e-06, | |
| "logits/chosen": -4.149517059326172, | |
| "logits/rejected": -4.061563491821289, | |
| "logps/chosen": -748.7947387695312, | |
| "logps/rejected": -543.3330688476562, | |
| "loss": 0.647, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.08717348426580429, | |
| "rewards/margins": 0.11688578873872757, | |
| "rewards/rejected": -0.029712295159697533, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.08700219967825602, | |
| "grad_norm": 165.0, | |
| "learning_rate": 4.706380098155357e-06, | |
| "logits/chosen": -4.1994500160217285, | |
| "logits/rejected": -4.425838470458984, | |
| "logps/chosen": -723.5103149414062, | |
| "logps/rejected": -516.3181762695312, | |
| "loss": 0.6455, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": 0.06696411967277527, | |
| "rewards/margins": 0.11419793963432312, | |
| "rewards/rejected": -0.04723381623625755, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.08782297514691881, | |
| "grad_norm": 276.0, | |
| "learning_rate": 4.702149263834829e-06, | |
| "logits/chosen": -4.170817852020264, | |
| "logits/rejected": -4.261829376220703, | |
| "logps/chosen": -769.914306640625, | |
| "logps/rejected": -620.16845703125, | |
| "loss": 0.6665, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.13049346208572388, | |
| "rewards/margins": 0.0769289955496788, | |
| "rewards/rejected": 0.053564462810754776, | |
| "step": 2675 | |
| }, | |
| { | |
| "epoch": 0.0886437506155816, | |
| "grad_norm": 318.0, | |
| "learning_rate": 4.697918429514301e-06, | |
| "logits/chosen": -3.8616783618927, | |
| "logits/rejected": -4.007978439331055, | |
| "logps/chosen": -709.0524291992188, | |
| "logps/rejected": -439.6181640625, | |
| "loss": 0.6312, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.10517993569374084, | |
| "rewards/margins": 0.14700084924697876, | |
| "rewards/rejected": -0.041820917278528214, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.08946452608424439, | |
| "grad_norm": 152.0, | |
| "learning_rate": 4.693687595193773e-06, | |
| "logits/chosen": -4.251624584197998, | |
| "logits/rejected": -4.1947197914123535, | |
| "logps/chosen": -780.1447143554688, | |
| "logps/rejected": -565.7095947265625, | |
| "loss": 0.6175, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.12333157658576965, | |
| "rewards/margins": 0.19305434823036194, | |
| "rewards/rejected": -0.06972277909517288, | |
| "step": 2725 | |
| }, | |
| { | |
| "epoch": 0.09028530155290719, | |
| "grad_norm": 111.0, | |
| "learning_rate": 4.689456760873245e-06, | |
| "logits/chosen": -4.107117652893066, | |
| "logits/rejected": -4.242709636688232, | |
| "logps/chosen": -627.9918212890625, | |
| "logps/rejected": -433.5965881347656, | |
| "loss": 0.6195, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.08907662332057953, | |
| "rewards/margins": 0.17075376212596893, | |
| "rewards/rejected": -0.08167713135480881, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.09110607702156998, | |
| "grad_norm": 169.0, | |
| "learning_rate": 4.685225926552717e-06, | |
| "logits/chosen": -4.189664840698242, | |
| "logits/rejected": -4.309139728546143, | |
| "logps/chosen": -799.1683349609375, | |
| "logps/rejected": -554.3936767578125, | |
| "loss": 0.6118, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.1505787968635559, | |
| "rewards/margins": 0.19868890941143036, | |
| "rewards/rejected": -0.048110123723745346, | |
| "step": 2775 | |
| }, | |
| { | |
| "epoch": 0.09192685249023277, | |
| "grad_norm": 187.0, | |
| "learning_rate": 4.680995092232189e-06, | |
| "logits/chosen": -4.031192779541016, | |
| "logits/rejected": -4.0329108238220215, | |
| "logps/chosen": -847.0072631835938, | |
| "logps/rejected": -591.4379272460938, | |
| "loss": 0.6511, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.07973573356866837, | |
| "rewards/margins": 0.11566194891929626, | |
| "rewards/rejected": -0.0359262190759182, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.09274762795889556, | |
| "grad_norm": 232.0, | |
| "learning_rate": 4.676764257911661e-06, | |
| "logits/chosen": -4.184224605560303, | |
| "logits/rejected": -4.380885124206543, | |
| "logps/chosen": -766.1116943359375, | |
| "logps/rejected": -525.26318359375, | |
| "loss": 0.6456, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.12456972897052765, | |
| "rewards/margins": 0.12566854059696198, | |
| "rewards/rejected": -0.0010988200083374977, | |
| "step": 2825 | |
| }, | |
| { | |
| "epoch": 0.09356840342755836, | |
| "grad_norm": 254.0, | |
| "learning_rate": 4.672533423591132e-06, | |
| "logits/chosen": -4.144000053405762, | |
| "logits/rejected": -4.1139092445373535, | |
| "logps/chosen": -696.5800170898438, | |
| "logps/rejected": -422.58770751953125, | |
| "loss": 0.6521, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.05194849520921707, | |
| "rewards/margins": 0.11492128670215607, | |
| "rewards/rejected": -0.0629727840423584, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.09438917889622114, | |
| "grad_norm": 131.0, | |
| "learning_rate": 4.668302589270605e-06, | |
| "logits/chosen": -4.328439712524414, | |
| "logits/rejected": -4.420200347900391, | |
| "logps/chosen": -755.8521728515625, | |
| "logps/rejected": -508.4173889160156, | |
| "loss": 0.6161, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": 0.15212687849998474, | |
| "rewards/margins": 0.18056415021419525, | |
| "rewards/rejected": -0.028437262400984764, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.09520995436488394, | |
| "grad_norm": 129.0, | |
| "learning_rate": 4.664071754950076e-06, | |
| "logits/chosen": -4.203272819519043, | |
| "logits/rejected": -4.173132419586182, | |
| "logps/chosen": -728.8046264648438, | |
| "logps/rejected": -540.3849487304688, | |
| "loss": 0.597, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.1777937114238739, | |
| "rewards/margins": 0.2252504974603653, | |
| "rewards/rejected": -0.04745679721236229, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.09603072983354674, | |
| "grad_norm": 99.0, | |
| "learning_rate": 4.659840920629549e-06, | |
| "logits/chosen": -4.030066967010498, | |
| "logits/rejected": -4.034922122955322, | |
| "logps/chosen": -724.56689453125, | |
| "logps/rejected": -503.8728942871094, | |
| "loss": 0.6176, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": 0.14354346692562103, | |
| "rewards/margins": 0.17834116518497467, | |
| "rewards/rejected": -0.034797683358192444, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 0.09685150530220953, | |
| "grad_norm": 374.0, | |
| "learning_rate": 4.65561008630902e-06, | |
| "logits/chosen": -4.154969692230225, | |
| "logits/rejected": -4.25916862487793, | |
| "logps/chosen": -688.744873046875, | |
| "logps/rejected": -566.5956420898438, | |
| "loss": 0.6823, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.0816277414560318, | |
| "rewards/margins": 0.05097034573554993, | |
| "rewards/rejected": 0.03065740317106247, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.09767228077087232, | |
| "grad_norm": 226.0, | |
| "learning_rate": 4.651379251988493e-06, | |
| "logits/chosen": -4.115259170532227, | |
| "logits/rejected": -4.045093536376953, | |
| "logps/chosen": -814.7285766601562, | |
| "logps/rejected": -566.1422119140625, | |
| "loss": 0.6329, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1374712586402893, | |
| "rewards/margins": 0.15288038551807404, | |
| "rewards/rejected": -0.015409140847623348, | |
| "step": 2975 | |
| }, | |
| { | |
| "epoch": 0.09849305623953511, | |
| "grad_norm": 180.0, | |
| "learning_rate": 4.647148417667964e-06, | |
| "logits/chosen": -3.97808837890625, | |
| "logits/rejected": -4.152464389801025, | |
| "logps/chosen": -530.7527465820312, | |
| "logps/rejected": -402.433349609375, | |
| "loss": 0.6348, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.10918596386909485, | |
| "rewards/margins": 0.13875608146190643, | |
| "rewards/rejected": -0.02957012504339218, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.09931383170819791, | |
| "grad_norm": 183.0, | |
| "learning_rate": 4.642917583347437e-06, | |
| "logits/chosen": -4.104302883148193, | |
| "logits/rejected": -4.1437273025512695, | |
| "logps/chosen": -675.0985717773438, | |
| "logps/rejected": -486.0548095703125, | |
| "loss": 0.6402, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.10294067114591599, | |
| "rewards/margins": 0.13575658202171326, | |
| "rewards/rejected": -0.03281591460108757, | |
| "step": 3025 | |
| }, | |
| { | |
| "epoch": 0.1001346071768607, | |
| "grad_norm": 135.0, | |
| "learning_rate": 4.638686749026908e-06, | |
| "logits/chosen": -4.05023193359375, | |
| "logits/rejected": -4.130072593688965, | |
| "logps/chosen": -598.7313232421875, | |
| "logps/rejected": -418.406982421875, | |
| "loss": 0.652, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.08227918297052383, | |
| "rewards/margins": 0.10548584908246994, | |
| "rewards/rejected": -0.023206667974591255, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.10095538264552349, | |
| "grad_norm": 155.0, | |
| "learning_rate": 4.634455914706381e-06, | |
| "logits/chosen": -4.130331039428711, | |
| "logits/rejected": -4.150590896606445, | |
| "logps/chosen": -792.5228881835938, | |
| "logps/rejected": -588.8016967773438, | |
| "loss": 0.6174, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.12860740721225739, | |
| "rewards/margins": 0.1893465369939804, | |
| "rewards/rejected": -0.06073914095759392, | |
| "step": 3075 | |
| }, | |
| { | |
| "epoch": 0.10177615811418628, | |
| "grad_norm": 187.0, | |
| "learning_rate": 4.630225080385852e-06, | |
| "logits/chosen": -4.148515224456787, | |
| "logits/rejected": -4.221671104431152, | |
| "logps/chosen": -591.18994140625, | |
| "logps/rejected": -556.4551391601562, | |
| "loss": 0.6459, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.06625935435295105, | |
| "rewards/margins": 0.11798873543739319, | |
| "rewards/rejected": -0.05172938480973244, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.10259693358284908, | |
| "grad_norm": 173.0, | |
| "learning_rate": 4.625994246065325e-06, | |
| "logits/chosen": -4.128535747528076, | |
| "logits/rejected": -4.260271072387695, | |
| "logps/chosen": -821.3890380859375, | |
| "logps/rejected": -580.5010986328125, | |
| "loss": 0.6233, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.13844671845436096, | |
| "rewards/margins": 0.19154323637485504, | |
| "rewards/rejected": -0.05309649929404259, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.10341770905151187, | |
| "grad_norm": 71.0, | |
| "learning_rate": 4.621763411744796e-06, | |
| "logits/chosen": -4.129300117492676, | |
| "logits/rejected": -4.251578330993652, | |
| "logps/chosen": -769.3382568359375, | |
| "logps/rejected": -589.3805541992188, | |
| "loss": 0.6388, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.053467120975255966, | |
| "rewards/margins": 0.14735950529575348, | |
| "rewards/rejected": -0.09389238059520721, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.10423848452017466, | |
| "grad_norm": 124.5, | |
| "learning_rate": 4.617532577424269e-06, | |
| "logits/chosen": -4.02286958694458, | |
| "logits/rejected": -4.068836212158203, | |
| "logps/chosen": -746.35205078125, | |
| "logps/rejected": -515.939453125, | |
| "loss": 0.5937, | |
| "rewards/accuracies": 0.7599999904632568, | |
| "rewards/chosen": 0.16510872542858124, | |
| "rewards/margins": 0.237737774848938, | |
| "rewards/rejected": -0.07262909412384033, | |
| "step": 3175 | |
| }, | |
| { | |
| "epoch": 0.10505925998883746, | |
| "grad_norm": 183.0, | |
| "learning_rate": 4.61330174310374e-06, | |
| "logits/chosen": -4.258195400238037, | |
| "logits/rejected": -4.252414703369141, | |
| "logps/chosen": -774.0881958007812, | |
| "logps/rejected": -619.4967041015625, | |
| "loss": 0.6142, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.1559956967830658, | |
| "rewards/margins": 0.2075086236000061, | |
| "rewards/rejected": -0.05151292681694031, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.10588003545750024, | |
| "grad_norm": 167.0, | |
| "learning_rate": 4.609070908783213e-06, | |
| "logits/chosen": -4.104750156402588, | |
| "logits/rejected": -4.103361129760742, | |
| "logps/chosen": -654.9664916992188, | |
| "logps/rejected": -494.8394470214844, | |
| "loss": 0.6136, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.1408548504114151, | |
| "rewards/margins": 0.21712234616279602, | |
| "rewards/rejected": -0.07626748830080032, | |
| "step": 3225 | |
| }, | |
| { | |
| "epoch": 0.10670081092616304, | |
| "grad_norm": 139.0, | |
| "learning_rate": 4.604840074462684e-06, | |
| "logits/chosen": -4.338498115539551, | |
| "logits/rejected": -4.383465766906738, | |
| "logps/chosen": -923.0902099609375, | |
| "logps/rejected": -563.9151611328125, | |
| "loss": 0.618, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.15305353701114655, | |
| "rewards/margins": 0.19835226237773895, | |
| "rewards/rejected": -0.04529871046543121, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.10752158639482583, | |
| "grad_norm": 103.0, | |
| "learning_rate": 4.600609240142157e-06, | |
| "logits/chosen": -4.177867889404297, | |
| "logits/rejected": -4.26078987121582, | |
| "logps/chosen": -620.0000610351562, | |
| "logps/rejected": -437.626953125, | |
| "loss": 0.6493, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.08293507248163223, | |
| "rewards/margins": 0.12653230130672455, | |
| "rewards/rejected": -0.04359724000096321, | |
| "step": 3275 | |
| }, | |
| { | |
| "epoch": 0.10834236186348863, | |
| "grad_norm": 134.0, | |
| "learning_rate": 4.596378405821628e-06, | |
| "logits/chosen": -4.094875335693359, | |
| "logits/rejected": -4.194637775421143, | |
| "logps/chosen": -598.10498046875, | |
| "logps/rejected": -512.329833984375, | |
| "loss": 0.6383, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.1556520313024521, | |
| "rewards/margins": 0.15189100801944733, | |
| "rewards/rejected": 0.0037610388826578856, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 0.10916313733215141, | |
| "grad_norm": 203.0, | |
| "learning_rate": 4.592147571501101e-06, | |
| "logits/chosen": -4.059422492980957, | |
| "logits/rejected": -4.308881759643555, | |
| "logps/chosen": -587.382568359375, | |
| "logps/rejected": -533.905029296875, | |
| "loss": 0.6631, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0650976300239563, | |
| "rewards/margins": 0.08459506183862686, | |
| "rewards/rejected": -0.01949743553996086, | |
| "step": 3325 | |
| }, | |
| { | |
| "epoch": 0.10998391280081421, | |
| "grad_norm": 161.0, | |
| "learning_rate": 4.587916737180572e-06, | |
| "logits/chosen": -4.154107570648193, | |
| "logits/rejected": -4.236989974975586, | |
| "logps/chosen": -815.28173828125, | |
| "logps/rejected": -604.0435791015625, | |
| "loss": 0.6354, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.17469516396522522, | |
| "rewards/margins": 0.16531267762184143, | |
| "rewards/rejected": 0.009382497519254684, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 0.11080468826947701, | |
| "grad_norm": 165.0, | |
| "learning_rate": 4.583685902860045e-06, | |
| "logits/chosen": -4.099838733673096, | |
| "logits/rejected": -4.225775241851807, | |
| "logps/chosen": -628.6179809570312, | |
| "logps/rejected": -413.48553466796875, | |
| "loss": 0.6492, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.10960385948419571, | |
| "rewards/margins": 0.12888643145561218, | |
| "rewards/rejected": -0.019282571971416473, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.1116254637381398, | |
| "grad_norm": 286.0, | |
| "learning_rate": 4.579455068539516e-06, | |
| "logits/chosen": -4.12373161315918, | |
| "logits/rejected": -4.227139472961426, | |
| "logps/chosen": -669.8875122070312, | |
| "logps/rejected": -554.4998168945312, | |
| "loss": 0.6217, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.08527453243732452, | |
| "rewards/margins": 0.1920253038406372, | |
| "rewards/rejected": -0.10675078630447388, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.11244623920680259, | |
| "grad_norm": 222.0, | |
| "learning_rate": 4.575224234218989e-06, | |
| "logits/chosen": -4.115863800048828, | |
| "logits/rejected": -4.1790452003479, | |
| "logps/chosen": -704.6529541015625, | |
| "logps/rejected": -396.4385070800781, | |
| "loss": 0.66, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.07934532314538956, | |
| "rewards/margins": 0.10207292437553406, | |
| "rewards/rejected": -0.0227276012301445, | |
| "step": 3425 | |
| }, | |
| { | |
| "epoch": 0.11326701467546538, | |
| "grad_norm": 151.0, | |
| "learning_rate": 4.57099339989846e-06, | |
| "logits/chosen": -4.064518451690674, | |
| "logits/rejected": -4.319989204406738, | |
| "logps/chosen": -744.1235961914062, | |
| "logps/rejected": -554.1798706054688, | |
| "loss": 0.6139, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.11846752464771271, | |
| "rewards/margins": 0.21143819391727448, | |
| "rewards/rejected": -0.09297066926956177, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 0.11408779014412818, | |
| "grad_norm": 155.0, | |
| "learning_rate": 4.566762565577933e-06, | |
| "logits/chosen": -4.110750675201416, | |
| "logits/rejected": -4.162017822265625, | |
| "logps/chosen": -641.3878173828125, | |
| "logps/rejected": -398.95648193359375, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.11753468215465546, | |
| "rewards/margins": 0.16651563346385956, | |
| "rewards/rejected": -0.0489809475839138, | |
| "step": 3475 | |
| }, | |
| { | |
| "epoch": 0.11490856561279096, | |
| "grad_norm": 247.0, | |
| "learning_rate": 4.562531731257404e-06, | |
| "logits/chosen": -4.102660655975342, | |
| "logits/rejected": -4.216392517089844, | |
| "logps/chosen": -780.7814331054688, | |
| "logps/rejected": -678.3236694335938, | |
| "loss": 0.6189, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.17697925865650177, | |
| "rewards/margins": 0.19066447019577026, | |
| "rewards/rejected": -0.013685191050171852, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.11572934108145376, | |
| "grad_norm": 156.0, | |
| "learning_rate": 4.558300896936877e-06, | |
| "logits/chosen": -4.092509746551514, | |
| "logits/rejected": -4.2496185302734375, | |
| "logps/chosen": -773.97607421875, | |
| "logps/rejected": -715.7351684570312, | |
| "loss": 0.6341, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.16046512126922607, | |
| "rewards/margins": 0.1650882363319397, | |
| "rewards/rejected": -0.004623092710971832, | |
| "step": 3525 | |
| }, | |
| { | |
| "epoch": 0.11655011655011654, | |
| "grad_norm": 251.0, | |
| "learning_rate": 4.554070062616348e-06, | |
| "logits/chosen": -4.061056613922119, | |
| "logits/rejected": -4.082568168640137, | |
| "logps/chosen": -647.5535278320312, | |
| "logps/rejected": -458.9394836425781, | |
| "loss": 0.6262, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.14398512244224548, | |
| "rewards/margins": 0.17963096499443054, | |
| "rewards/rejected": -0.03564583137631416, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 0.11737089201877934, | |
| "grad_norm": 230.0, | |
| "learning_rate": 4.54983922829582e-06, | |
| "logits/chosen": -4.078422546386719, | |
| "logits/rejected": -4.286499977111816, | |
| "logps/chosen": -601.8107299804688, | |
| "logps/rejected": -559.1649780273438, | |
| "loss": 0.6041, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.17145298421382904, | |
| "rewards/margins": 0.21858105063438416, | |
| "rewards/rejected": -0.04712804779410362, | |
| "step": 3575 | |
| }, | |
| { | |
| "epoch": 0.11819166748744213, | |
| "grad_norm": 223.0, | |
| "learning_rate": 4.545608393975292e-06, | |
| "logits/chosen": -4.175769805908203, | |
| "logits/rejected": -4.221960544586182, | |
| "logps/chosen": -651.9328002929688, | |
| "logps/rejected": -498.6581115722656, | |
| "loss": 0.6304, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.10373630374670029, | |
| "rewards/margins": 0.16331829130649567, | |
| "rewards/rejected": -0.05958200618624687, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.11901244295610493, | |
| "grad_norm": 164.0, | |
| "learning_rate": 4.541377559654764e-06, | |
| "logits/chosen": -4.202296733856201, | |
| "logits/rejected": -4.219254970550537, | |
| "logps/chosen": -789.4403076171875, | |
| "logps/rejected": -508.3605041503906, | |
| "loss": 0.6121, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.21312782168388367, | |
| "rewards/margins": 0.21922513842582703, | |
| "rewards/rejected": -0.006097340490669012, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.11983321842476773, | |
| "grad_norm": 106.0, | |
| "learning_rate": 4.537146725334236e-06, | |
| "logits/chosen": -4.121474742889404, | |
| "logits/rejected": -4.223160266876221, | |
| "logps/chosen": -642.5357666015625, | |
| "logps/rejected": -391.467041015625, | |
| "loss": 0.6079, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.1511656790971756, | |
| "rewards/margins": 0.20676733553409576, | |
| "rewards/rejected": -0.05560165271162987, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 0.12065399389343051, | |
| "grad_norm": 143.0, | |
| "learning_rate": 4.532915891013708e-06, | |
| "logits/chosen": -3.9525647163391113, | |
| "logits/rejected": -4.0206708908081055, | |
| "logps/chosen": -648.7009887695312, | |
| "logps/rejected": -420.0541687011719, | |
| "loss": 0.6235, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.11896297335624695, | |
| "rewards/margins": 0.18399059772491455, | |
| "rewards/rejected": -0.06502760946750641, | |
| "step": 3675 | |
| }, | |
| { | |
| "epoch": 0.12147476936209331, | |
| "grad_norm": 117.0, | |
| "learning_rate": 4.52868505669318e-06, | |
| "logits/chosen": -4.178898334503174, | |
| "logits/rejected": -4.247806072235107, | |
| "logps/chosen": -669.1397094726562, | |
| "logps/rejected": -537.1188354492188, | |
| "loss": 0.5979, | |
| "rewards/accuracies": 0.8399999737739563, | |
| "rewards/chosen": 0.11097316443920135, | |
| "rewards/margins": 0.24359607696533203, | |
| "rewards/rejected": -0.13262291252613068, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 0.1222955448307561, | |
| "grad_norm": 258.0, | |
| "learning_rate": 4.524454222372652e-06, | |
| "logits/chosen": -4.230034828186035, | |
| "logits/rejected": -4.431790351867676, | |
| "logps/chosen": -781.9459228515625, | |
| "logps/rejected": -604.2550659179688, | |
| "loss": 0.6705, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.02702181786298752, | |
| "rewards/margins": 0.08671737462282181, | |
| "rewards/rejected": -0.059695564210414886, | |
| "step": 3725 | |
| }, | |
| { | |
| "epoch": 0.1231163202994189, | |
| "grad_norm": 92.5, | |
| "learning_rate": 4.520223388052124e-06, | |
| "logits/chosen": -4.086462497711182, | |
| "logits/rejected": -4.117921829223633, | |
| "logps/chosen": -670.205810546875, | |
| "logps/rejected": -539.9747924804688, | |
| "loss": 0.6525, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.1253843605518341, | |
| "rewards/margins": 0.12610657513141632, | |
| "rewards/rejected": -0.0007222199346870184, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.12393709576808168, | |
| "grad_norm": 163.0, | |
| "learning_rate": 4.515992553731596e-06, | |
| "logits/chosen": -4.041085720062256, | |
| "logits/rejected": -4.055666446685791, | |
| "logps/chosen": -655.8377075195312, | |
| "logps/rejected": -762.2373657226562, | |
| "loss": 0.612, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.10744628310203552, | |
| "rewards/margins": 0.4711156487464905, | |
| "rewards/rejected": -0.3636693060398102, | |
| "step": 3775 | |
| }, | |
| { | |
| "epoch": 0.12475787123674448, | |
| "grad_norm": 130.0, | |
| "learning_rate": 4.511761719411068e-06, | |
| "logits/chosen": -3.9867475032806396, | |
| "logits/rejected": -4.021495819091797, | |
| "logps/chosen": -705.5885009765625, | |
| "logps/rejected": -611.0843505859375, | |
| "loss": 0.6303, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.12187641113996506, | |
| "rewards/margins": 0.16648361086845398, | |
| "rewards/rejected": -0.04460718482732773, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.12557864670540728, | |
| "grad_norm": 142.0, | |
| "learning_rate": 4.50753088509054e-06, | |
| "logits/chosen": -4.264425754547119, | |
| "logits/rejected": -4.1908040046691895, | |
| "logps/chosen": -770.77294921875, | |
| "logps/rejected": -574.1017456054688, | |
| "loss": 0.6508, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.1506727933883667, | |
| "rewards/margins": 0.17019566893577576, | |
| "rewards/rejected": -0.019522881135344505, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 0.12639942217407008, | |
| "grad_norm": 150.0, | |
| "learning_rate": 4.503300050770012e-06, | |
| "logits/chosen": -4.039714813232422, | |
| "logits/rejected": -4.125188827514648, | |
| "logps/chosen": -787.6677856445312, | |
| "logps/rejected": -570.5169067382812, | |
| "loss": 0.6468, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.1255396604537964, | |
| "rewards/margins": 0.12908677756786346, | |
| "rewards/rejected": -0.0035471057053655386, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 0.12722019764273285, | |
| "grad_norm": 225.0, | |
| "learning_rate": 4.499069216449484e-06, | |
| "logits/chosen": -4.007396697998047, | |
| "logits/rejected": -3.876781702041626, | |
| "logps/chosen": -626.9278564453125, | |
| "logps/rejected": -422.06170654296875, | |
| "loss": 0.6722, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.06111162155866623, | |
| "rewards/margins": 0.07888734340667725, | |
| "rewards/rejected": -0.017775723710656166, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.12804097311139565, | |
| "grad_norm": 354.0, | |
| "learning_rate": 4.494838382128956e-06, | |
| "logits/chosen": -4.007481575012207, | |
| "logits/rejected": -3.866903781890869, | |
| "logps/chosen": -805.19921875, | |
| "logps/rejected": -565.35107421875, | |
| "loss": 0.6381, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.1270570307970047, | |
| "rewards/margins": 0.1453864425420761, | |
| "rewards/rejected": -0.018329383805394173, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 0.12886174858005844, | |
| "grad_norm": 151.0, | |
| "learning_rate": 4.490607547808428e-06, | |
| "logits/chosen": -4.091001987457275, | |
| "logits/rejected": -4.0446929931640625, | |
| "logps/chosen": -804.8853759765625, | |
| "logps/rejected": -614.6016845703125, | |
| "loss": 0.6234, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.12878599762916565, | |
| "rewards/margins": 0.19198790192604065, | |
| "rewards/rejected": -0.0632018968462944, | |
| "step": 3925 | |
| }, | |
| { | |
| "epoch": 0.12968252404872124, | |
| "grad_norm": 89.0, | |
| "learning_rate": 4.4863767134879e-06, | |
| "logits/chosen": -4.09019660949707, | |
| "logits/rejected": -4.269530773162842, | |
| "logps/chosen": -662.8995971679688, | |
| "logps/rejected": -482.9696044921875, | |
| "loss": 0.6567, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.08450495451688766, | |
| "rewards/margins": 0.10292019695043564, | |
| "rewards/rejected": -0.018415246158838272, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 0.130503299517384, | |
| "grad_norm": 352.0, | |
| "learning_rate": 4.482145879167372e-06, | |
| "logits/chosen": -4.184777736663818, | |
| "logits/rejected": -4.207626819610596, | |
| "logps/chosen": -727.8124389648438, | |
| "logps/rejected": -498.6629638671875, | |
| "loss": 0.6473, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.17035777866840363, | |
| "rewards/margins": 0.12111759930849075, | |
| "rewards/rejected": 0.04924018308520317, | |
| "step": 3975 | |
| }, | |
| { | |
| "epoch": 0.1313240749860468, | |
| "grad_norm": 220.0, | |
| "learning_rate": 4.477915044846844e-06, | |
| "logits/chosen": -4.153555393218994, | |
| "logits/rejected": -4.249951362609863, | |
| "logps/chosen": -749.2251586914062, | |
| "logps/rejected": -574.6797485351562, | |
| "loss": 0.6228, | |
| "rewards/accuracies": 0.7599999904632568, | |
| "rewards/chosen": 0.14904262125492096, | |
| "rewards/margins": 0.18919992446899414, | |
| "rewards/rejected": -0.04015731438994408, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.1321448504547096, | |
| "grad_norm": 116.5, | |
| "learning_rate": 4.473684210526316e-06, | |
| "logits/chosen": -4.1704230308532715, | |
| "logits/rejected": -4.231996536254883, | |
| "logps/chosen": -990.6297607421875, | |
| "logps/rejected": -666.6671752929688, | |
| "loss": 0.6352, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1348913311958313, | |
| "rewards/margins": 0.17748390138149261, | |
| "rewards/rejected": -0.04259258881211281, | |
| "step": 4025 | |
| }, | |
| { | |
| "epoch": 0.1329656259233724, | |
| "grad_norm": 450.0, | |
| "learning_rate": 4.469453376205788e-06, | |
| "logits/chosen": -4.276303291320801, | |
| "logits/rejected": -4.25356388092041, | |
| "logps/chosen": -855.682373046875, | |
| "logps/rejected": -677.4093627929688, | |
| "loss": 0.6157, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.14246423542499542, | |
| "rewards/margins": 0.2275465726852417, | |
| "rewards/rejected": -0.08508235216140747, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 0.13378640139203518, | |
| "grad_norm": 209.0, | |
| "learning_rate": 4.46522254188526e-06, | |
| "logits/chosen": -4.336771488189697, | |
| "logits/rejected": -4.413280963897705, | |
| "logps/chosen": -787.864501953125, | |
| "logps/rejected": -556.9666748046875, | |
| "loss": 0.6387, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1050407811999321, | |
| "rewards/margins": 0.16268308460712433, | |
| "rewards/rejected": -0.05764232203364372, | |
| "step": 4075 | |
| }, | |
| { | |
| "epoch": 0.13460717686069798, | |
| "grad_norm": 129.0, | |
| "learning_rate": 4.460991707564732e-06, | |
| "logits/chosen": -4.026488304138184, | |
| "logits/rejected": -4.244543552398682, | |
| "logps/chosen": -593.8565063476562, | |
| "logps/rejected": -494.901611328125, | |
| "loss": 0.6009, | |
| "rewards/accuracies": 0.7599999904632568, | |
| "rewards/chosen": 0.05559429153800011, | |
| "rewards/margins": 0.2449643760919571, | |
| "rewards/rejected": -0.1893700361251831, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 0.13542795232936078, | |
| "grad_norm": 126.5, | |
| "learning_rate": 4.456760873244204e-06, | |
| "logits/chosen": -4.122188091278076, | |
| "logits/rejected": -4.109318733215332, | |
| "logps/chosen": -474.5272521972656, | |
| "logps/rejected": -401.0484619140625, | |
| "loss": 0.6556, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.09522067755460739, | |
| "rewards/margins": 0.10534299910068512, | |
| "rewards/rejected": -0.010122320614755154, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.13624872779802358, | |
| "grad_norm": 170.0, | |
| "learning_rate": 4.452530038923676e-06, | |
| "logits/chosen": -4.061251640319824, | |
| "logits/rejected": -4.103055000305176, | |
| "logps/chosen": -752.6796875, | |
| "logps/rejected": -524.8905029296875, | |
| "loss": 0.6571, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.09011424332857132, | |
| "rewards/margins": 0.11959852278232574, | |
| "rewards/rejected": -0.029484273865818977, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 0.13706950326668638, | |
| "grad_norm": 174.0, | |
| "learning_rate": 4.448299204603148e-06, | |
| "logits/chosen": -4.170642375946045, | |
| "logits/rejected": -4.242205619812012, | |
| "logps/chosen": -837.6634521484375, | |
| "logps/rejected": -575.63525390625, | |
| "loss": 0.6356, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.1658136397600174, | |
| "rewards/margins": 0.16742119193077087, | |
| "rewards/rejected": -0.0016075682360678911, | |
| "step": 4175 | |
| }, | |
| { | |
| "epoch": 0.13789027873534915, | |
| "grad_norm": 390.0, | |
| "learning_rate": 4.44406837028262e-06, | |
| "logits/chosen": -4.180340766906738, | |
| "logits/rejected": -4.24651575088501, | |
| "logps/chosen": -639.0101318359375, | |
| "logps/rejected": -509.2608642578125, | |
| "loss": 0.6631, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.05516006052494049, | |
| "rewards/margins": 0.1022617444396019, | |
| "rewards/rejected": -0.047101687639951706, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.13871105420401195, | |
| "grad_norm": 119.5, | |
| "learning_rate": 4.439837535962092e-06, | |
| "logits/chosen": -4.179917335510254, | |
| "logits/rejected": -4.300662994384766, | |
| "logps/chosen": -896.769775390625, | |
| "logps/rejected": -689.7025756835938, | |
| "loss": 0.6706, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.12181320041418076, | |
| "rewards/margins": 0.09058444947004318, | |
| "rewards/rejected": 0.031228771433234215, | |
| "step": 4225 | |
| }, | |
| { | |
| "epoch": 0.13953182967267475, | |
| "grad_norm": 207.0, | |
| "learning_rate": 4.435606701641564e-06, | |
| "logits/chosen": -4.1937360763549805, | |
| "logits/rejected": -4.245815277099609, | |
| "logps/chosen": -621.9994506835938, | |
| "logps/rejected": -518.42724609375, | |
| "loss": 0.6725, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.1049574688076973, | |
| "rewards/margins": 0.07572697848081589, | |
| "rewards/rejected": 0.02923049032688141, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.14035260514133754, | |
| "grad_norm": 129.0, | |
| "learning_rate": 4.431375867321036e-06, | |
| "logits/chosen": -4.198615550994873, | |
| "logits/rejected": -4.119570255279541, | |
| "logps/chosen": -573.978271484375, | |
| "logps/rejected": -433.1485595703125, | |
| "loss": 0.6443, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.0844545066356659, | |
| "rewards/margins": 0.14824089407920837, | |
| "rewards/rejected": -0.06378639489412308, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 0.14117338061000032, | |
| "grad_norm": 1808.0, | |
| "learning_rate": 4.427145033000508e-06, | |
| "logits/chosen": -3.9868874549865723, | |
| "logits/rejected": -4.086323261260986, | |
| "logps/chosen": -702.60009765625, | |
| "logps/rejected": -433.14678955078125, | |
| "loss": 0.6546, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.11101648211479187, | |
| "rewards/margins": 0.12218675017356873, | |
| "rewards/rejected": -0.01117025688290596, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 0.14199415607866311, | |
| "grad_norm": 165.0, | |
| "learning_rate": 4.42291419867998e-06, | |
| "logits/chosen": -4.045719146728516, | |
| "logits/rejected": -4.074853897094727, | |
| "logps/chosen": -693.7863159179688, | |
| "logps/rejected": -559.730224609375, | |
| "loss": 0.6367, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.14746950566768646, | |
| "rewards/margins": 0.15490588545799255, | |
| "rewards/rejected": -0.007436369080096483, | |
| "step": 4325 | |
| }, | |
| { | |
| "epoch": 0.1428149315473259, | |
| "grad_norm": 174.0, | |
| "learning_rate": 4.418683364359452e-06, | |
| "logits/chosen": -4.222276210784912, | |
| "logits/rejected": -4.294714450836182, | |
| "logps/chosen": -605.540771484375, | |
| "logps/rejected": -488.9421081542969, | |
| "loss": 0.6346, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.10411551594734192, | |
| "rewards/margins": 0.14872083067893982, | |
| "rewards/rejected": -0.0446053072810173, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 0.1436357070159887, | |
| "grad_norm": 88.0, | |
| "learning_rate": 4.414452530038924e-06, | |
| "logits/chosen": -4.0823750495910645, | |
| "logits/rejected": -4.026907920837402, | |
| "logps/chosen": -584.8992919921875, | |
| "logps/rejected": -504.4686279296875, | |
| "loss": 0.6124, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.1442980170249939, | |
| "rewards/margins": 0.2002059370279312, | |
| "rewards/rejected": -0.055907901376485825, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.1444564824846515, | |
| "grad_norm": 226.0, | |
| "learning_rate": 4.410221695718396e-06, | |
| "logits/chosen": -4.218522548675537, | |
| "logits/rejected": -4.213830947875977, | |
| "logps/chosen": -909.9868774414062, | |
| "logps/rejected": -584.3450927734375, | |
| "loss": 0.6128, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.19589273631572723, | |
| "rewards/margins": 0.2196936011314392, | |
| "rewards/rejected": -0.023800864815711975, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.14527725795331428, | |
| "grad_norm": 212.0, | |
| "learning_rate": 4.405990861397868e-06, | |
| "logits/chosen": -4.175970077514648, | |
| "logits/rejected": -4.285007953643799, | |
| "logps/chosen": -666.7886962890625, | |
| "logps/rejected": -492.61016845703125, | |
| "loss": 0.6155, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.1311405748128891, | |
| "rewards/margins": 0.20073936879634857, | |
| "rewards/rejected": -0.06959877908229828, | |
| "step": 4425 | |
| }, | |
| { | |
| "epoch": 0.14609803342197708, | |
| "grad_norm": 298.0, | |
| "learning_rate": 4.40176002707734e-06, | |
| "logits/chosen": -4.091942310333252, | |
| "logits/rejected": -4.163181304931641, | |
| "logps/chosen": -770.0662231445312, | |
| "logps/rejected": -469.1553955078125, | |
| "loss": 0.6015, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.19774757325649261, | |
| "rewards/margins": 0.2411683201789856, | |
| "rewards/rejected": -0.04342072829604149, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 0.14691880889063988, | |
| "grad_norm": 194.0, | |
| "learning_rate": 4.397529192756812e-06, | |
| "logits/chosen": -4.143798351287842, | |
| "logits/rejected": -4.189226150512695, | |
| "logps/chosen": -596.7088012695312, | |
| "logps/rejected": -398.5277404785156, | |
| "loss": 0.6588, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.06982951611280441, | |
| "rewards/margins": 0.12531226873397827, | |
| "rewards/rejected": -0.05548277497291565, | |
| "step": 4475 | |
| }, | |
| { | |
| "epoch": 0.14773958435930268, | |
| "grad_norm": 218.0, | |
| "learning_rate": 4.393298358436284e-06, | |
| "logits/chosen": -4.239335060119629, | |
| "logits/rejected": -4.231566905975342, | |
| "logps/chosen": -792.1026000976562, | |
| "logps/rejected": -575.8635864257812, | |
| "loss": 0.6145, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.17430633306503296, | |
| "rewards/margins": 0.2026720494031906, | |
| "rewards/rejected": -0.028365688398480415, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.14856035982796545, | |
| "grad_norm": 161.0, | |
| "learning_rate": 4.389067524115756e-06, | |
| "logits/chosen": -4.187715530395508, | |
| "logits/rejected": -4.225770950317383, | |
| "logps/chosen": -643.482421875, | |
| "logps/rejected": -506.84161376953125, | |
| "loss": 0.6411, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0837879404425621, | |
| "rewards/margins": 0.16951730847358704, | |
| "rewards/rejected": -0.08572938293218613, | |
| "step": 4525 | |
| }, | |
| { | |
| "epoch": 0.14938113529662825, | |
| "grad_norm": 160.0, | |
| "learning_rate": 4.384836689795228e-06, | |
| "logits/chosen": -4.2231764793396, | |
| "logits/rejected": -4.232290744781494, | |
| "logps/chosen": -607.963134765625, | |
| "logps/rejected": -468.76544189453125, | |
| "loss": 0.641, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.14704573154449463, | |
| "rewards/margins": 0.1499151736497879, | |
| "rewards/rejected": -0.002869446761906147, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 0.15020191076529105, | |
| "grad_norm": 161.0, | |
| "learning_rate": 4.3806058554747e-06, | |
| "logits/chosen": -4.201754093170166, | |
| "logits/rejected": -4.216127872467041, | |
| "logps/chosen": -621.528076171875, | |
| "logps/rejected": -426.3268737792969, | |
| "loss": 0.6394, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.12953585386276245, | |
| "rewards/margins": 0.1462259441614151, | |
| "rewards/rejected": -0.016690107062458992, | |
| "step": 4575 | |
| }, | |
| { | |
| "epoch": 0.15102268623395385, | |
| "grad_norm": 206.0, | |
| "learning_rate": 4.376375021154172e-06, | |
| "logits/chosen": -4.243280410766602, | |
| "logits/rejected": -4.3009724617004395, | |
| "logps/chosen": -822.161865234375, | |
| "logps/rejected": -618.0408325195312, | |
| "loss": 0.6202, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.21996277570724487, | |
| "rewards/margins": 0.2018716037273407, | |
| "rewards/rejected": 0.018091170117259026, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.15184346170261664, | |
| "grad_norm": 62.75, | |
| "learning_rate": 4.372144186833644e-06, | |
| "logits/chosen": -4.265949249267578, | |
| "logits/rejected": -4.3107805252075195, | |
| "logps/chosen": -684.9762573242188, | |
| "logps/rejected": -497.3363952636719, | |
| "loss": 0.6175, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.21863166987895966, | |
| "rewards/margins": 0.20082683861255646, | |
| "rewards/rejected": 0.017804812639951706, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.15266423717127942, | |
| "grad_norm": 156.0, | |
| "learning_rate": 4.367913352513116e-06, | |
| "logits/chosen": -4.131052494049072, | |
| "logits/rejected": -4.288494110107422, | |
| "logps/chosen": -714.316650390625, | |
| "logps/rejected": -461.1311340332031, | |
| "loss": 0.5934, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.2658103108406067, | |
| "rewards/margins": 0.2636970579624176, | |
| "rewards/rejected": 0.00211325753480196, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 0.15348501263994221, | |
| "grad_norm": 191.0, | |
| "learning_rate": 4.363682518192588e-06, | |
| "logits/chosen": -4.270446300506592, | |
| "logits/rejected": -4.1907639503479, | |
| "logps/chosen": -814.872314453125, | |
| "logps/rejected": -580.6480102539062, | |
| "loss": 0.648, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.1529182642698288, | |
| "rewards/margins": 0.13565631210803986, | |
| "rewards/rejected": 0.017261944711208344, | |
| "step": 4675 | |
| }, | |
| { | |
| "epoch": 0.154305788108605, | |
| "grad_norm": 332.0, | |
| "learning_rate": 4.35945168387206e-06, | |
| "logits/chosen": -4.242652893066406, | |
| "logits/rejected": -4.241410255432129, | |
| "logps/chosen": -663.955322265625, | |
| "logps/rejected": -559.5843505859375, | |
| "loss": 0.6485, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.10647504031658173, | |
| "rewards/margins": 0.13156315684318542, | |
| "rewards/rejected": -0.025088123977184296, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 0.1551265635772678, | |
| "grad_norm": 138.0, | |
| "learning_rate": 4.355220849551532e-06, | |
| "logits/chosen": -3.9757704734802246, | |
| "logits/rejected": -3.965700149536133, | |
| "logps/chosen": -679.7207641601562, | |
| "logps/rejected": -476.59259033203125, | |
| "loss": 0.6664, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.15955568850040436, | |
| "rewards/margins": 0.10310190171003342, | |
| "rewards/rejected": 0.05645379424095154, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 0.15594733904593058, | |
| "grad_norm": 106.5, | |
| "learning_rate": 4.350990015231004e-06, | |
| "logits/chosen": -4.21544075012207, | |
| "logits/rejected": -4.245710372924805, | |
| "logps/chosen": -1018.4263305664062, | |
| "logps/rejected": -763.0903930664062, | |
| "loss": 0.614, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.23744136095046997, | |
| "rewards/margins": 0.22951194643974304, | |
| "rewards/rejected": 0.007929441519081593, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.15676811451459338, | |
| "grad_norm": 170.0, | |
| "learning_rate": 4.346759180910476e-06, | |
| "logits/chosen": -4.2572550773620605, | |
| "logits/rejected": -4.290678977966309, | |
| "logps/chosen": -785.2484130859375, | |
| "logps/rejected": -493.4697570800781, | |
| "loss": 0.6116, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.1742425113916397, | |
| "rewards/margins": 0.20866689085960388, | |
| "rewards/rejected": -0.03442436829209328, | |
| "step": 4775 | |
| }, | |
| { | |
| "epoch": 0.15758888998325618, | |
| "grad_norm": 168.0, | |
| "learning_rate": 4.342528346589948e-06, | |
| "logits/chosen": -4.143786430358887, | |
| "logits/rejected": -4.080834865570068, | |
| "logps/chosen": -737.3786010742188, | |
| "logps/rejected": -521.0652465820312, | |
| "loss": 0.5969, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": 0.19123902916908264, | |
| "rewards/margins": 0.24138489365577698, | |
| "rewards/rejected": -0.05014587566256523, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.15840966545191898, | |
| "grad_norm": 190.0, | |
| "learning_rate": 4.33829751226942e-06, | |
| "logits/chosen": -4.198343753814697, | |
| "logits/rejected": -4.262630462646484, | |
| "logps/chosen": -691.4227905273438, | |
| "logps/rejected": -543.6026611328125, | |
| "loss": 0.6123, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.1909123808145523, | |
| "rewards/margins": 0.20675772428512573, | |
| "rewards/rejected": -0.015845321118831635, | |
| "step": 4825 | |
| }, | |
| { | |
| "epoch": 0.15923044092058178, | |
| "grad_norm": 133.0, | |
| "learning_rate": 4.334066677948892e-06, | |
| "logits/chosen": -4.119637489318848, | |
| "logits/rejected": -4.084893226623535, | |
| "logps/chosen": -581.482666015625, | |
| "logps/rejected": -442.5643615722656, | |
| "loss": 0.5791, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.21446186304092407, | |
| "rewards/margins": 0.31452712416648865, | |
| "rewards/rejected": -0.10006527602672577, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 0.16005121638924455, | |
| "grad_norm": 112.5, | |
| "learning_rate": 4.329835843628364e-06, | |
| "logits/chosen": -4.266894340515137, | |
| "logits/rejected": -4.300928115844727, | |
| "logps/chosen": -833.2391967773438, | |
| "logps/rejected": -558.8040771484375, | |
| "loss": 0.667, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.10664394497871399, | |
| "rewards/margins": 0.12200988829135895, | |
| "rewards/rejected": -0.01536593772470951, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.16087199185790735, | |
| "grad_norm": 153.0, | |
| "learning_rate": 4.325605009307836e-06, | |
| "logits/chosen": -4.04871940612793, | |
| "logits/rejected": -4.289731025695801, | |
| "logps/chosen": -695.7615356445312, | |
| "logps/rejected": -539.9432983398438, | |
| "loss": 0.5951, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.20514139533042908, | |
| "rewards/margins": 0.2501979470252991, | |
| "rewards/rejected": -0.0450565330684185, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 0.16169276732657015, | |
| "grad_norm": 130.0, | |
| "learning_rate": 4.321374174987308e-06, | |
| "logits/chosen": -4.080851078033447, | |
| "logits/rejected": -4.139771938323975, | |
| "logps/chosen": -775.0393676757812, | |
| "logps/rejected": -510.61358642578125, | |
| "loss": 0.6145, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.17780721187591553, | |
| "rewards/margins": 0.22440527379512787, | |
| "rewards/rejected": -0.04659804329276085, | |
| "step": 4925 | |
| }, | |
| { | |
| "epoch": 0.16251354279523295, | |
| "grad_norm": 130.0, | |
| "learning_rate": 4.31714334066678e-06, | |
| "logits/chosen": -4.235746383666992, | |
| "logits/rejected": -4.229293346405029, | |
| "logps/chosen": -632.58154296875, | |
| "logps/rejected": -580.3175048828125, | |
| "loss": 0.6374, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.1310328096151352, | |
| "rewards/margins": 0.16347967088222504, | |
| "rewards/rejected": -0.032446879893541336, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 0.16333431826389572, | |
| "grad_norm": 320.0, | |
| "learning_rate": 4.312912506346252e-06, | |
| "logits/chosen": -3.9886889457702637, | |
| "logits/rejected": -4.151256084442139, | |
| "logps/chosen": -638.684814453125, | |
| "logps/rejected": -490.31024169921875, | |
| "loss": 0.6275, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.0955779105424881, | |
| "rewards/margins": 0.19564473628997803, | |
| "rewards/rejected": -0.10006682574748993, | |
| "step": 4975 | |
| }, | |
| { | |
| "epoch": 0.16415509373255852, | |
| "grad_norm": 228.0, | |
| "learning_rate": 4.308681672025724e-06, | |
| "logits/chosen": -4.112755298614502, | |
| "logits/rejected": -4.220064640045166, | |
| "logps/chosen": -815.4720458984375, | |
| "logps/rejected": -582.703369140625, | |
| "loss": 0.6494, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.08248654007911682, | |
| "rewards/margins": 0.1237930878996849, | |
| "rewards/rejected": -0.041306547820568085, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.16497586920122131, | |
| "grad_norm": 362.0, | |
| "learning_rate": 4.304450837705196e-06, | |
| "logits/chosen": -4.267681121826172, | |
| "logits/rejected": -4.1627960205078125, | |
| "logps/chosen": -826.8048706054688, | |
| "logps/rejected": -580.4628295898438, | |
| "loss": 0.6022, | |
| "rewards/accuracies": 0.7599999904632568, | |
| "rewards/chosen": 0.17233513295650482, | |
| "rewards/margins": 0.23386350274085999, | |
| "rewards/rejected": -0.06152837723493576, | |
| "step": 5025 | |
| }, | |
| { | |
| "epoch": 0.1657966446698841, | |
| "grad_norm": 280.0, | |
| "learning_rate": 4.300220003384668e-06, | |
| "logits/chosen": -4.138628005981445, | |
| "logits/rejected": -4.228004455566406, | |
| "logps/chosen": -595.0879516601562, | |
| "logps/rejected": -410.44061279296875, | |
| "loss": 0.6298, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.1664738804101944, | |
| "rewards/margins": 0.1815992295742035, | |
| "rewards/rejected": -0.015125354751944542, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 0.1666174201385469, | |
| "grad_norm": 175.0, | |
| "learning_rate": 4.29598916906414e-06, | |
| "logits/chosen": -4.243340969085693, | |
| "logits/rejected": -4.193829536437988, | |
| "logps/chosen": -670.334228515625, | |
| "logps/rejected": -474.9371032714844, | |
| "loss": 0.6345, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.16480280458927155, | |
| "rewards/margins": 0.16151557862758636, | |
| "rewards/rejected": 0.0032872117590159178, | |
| "step": 5075 | |
| }, | |
| { | |
| "epoch": 0.16743819560720968, | |
| "grad_norm": 191.0, | |
| "learning_rate": 4.291758334743612e-06, | |
| "logits/chosen": -4.132363319396973, | |
| "logits/rejected": -4.141480922698975, | |
| "logps/chosen": -799.2957763671875, | |
| "logps/rejected": -670.20849609375, | |
| "loss": 0.6597, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.13164381682872772, | |
| "rewards/margins": 0.11912553012371063, | |
| "rewards/rejected": 0.012518273666501045, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 0.16825897107587248, | |
| "grad_norm": 192.0, | |
| "learning_rate": 4.287527500423084e-06, | |
| "logits/chosen": -4.019545078277588, | |
| "logits/rejected": -4.000518321990967, | |
| "logps/chosen": -720.95947265625, | |
| "logps/rejected": -593.3463134765625, | |
| "loss": 0.657, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.12281408905982971, | |
| "rewards/margins": 0.5138216614723206, | |
| "rewards/rejected": -0.39100757241249084, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.16907974654453528, | |
| "grad_norm": 208.0, | |
| "learning_rate": 4.283296666102556e-06, | |
| "logits/chosen": -4.264939785003662, | |
| "logits/rejected": -4.2540388107299805, | |
| "logps/chosen": -605.2987670898438, | |
| "logps/rejected": -425.5446472167969, | |
| "loss": 0.6227, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.1905955821275711, | |
| "rewards/margins": 0.18971359729766846, | |
| "rewards/rejected": 0.00088197470176965, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 0.16990052201319808, | |
| "grad_norm": 143.0, | |
| "learning_rate": 4.279065831782028e-06, | |
| "logits/chosen": -4.044861793518066, | |
| "logits/rejected": -4.183505058288574, | |
| "logps/chosen": -620.2490234375, | |
| "logps/rejected": -478.9082336425781, | |
| "loss": 0.5998, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.18604780733585358, | |
| "rewards/margins": 0.23399414122104645, | |
| "rewards/rejected": -0.04794633388519287, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 0.17072129748186085, | |
| "grad_norm": 256.0, | |
| "learning_rate": 4.2748349974615e-06, | |
| "logits/chosen": -4.1650519371032715, | |
| "logits/rejected": -4.288421154022217, | |
| "logps/chosen": -782.1646728515625, | |
| "logps/rejected": -516.1348876953125, | |
| "loss": 0.6416, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.14982113242149353, | |
| "rewards/margins": 0.17511123418807983, | |
| "rewards/rejected": -0.02529011480510235, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.17154207295052365, | |
| "grad_norm": 226.0, | |
| "learning_rate": 4.270604163140972e-06, | |
| "logits/chosen": -4.2230544090271, | |
| "logits/rejected": -4.363689422607422, | |
| "logps/chosen": -932.8560791015625, | |
| "logps/rejected": -670.7036743164062, | |
| "loss": 0.5626, | |
| "rewards/accuracies": 0.7599999904632568, | |
| "rewards/chosen": 0.21535824239253998, | |
| "rewards/margins": 0.3550167977809906, | |
| "rewards/rejected": -0.13965855538845062, | |
| "step": 5225 | |
| }, | |
| { | |
| "epoch": 0.17236284841918645, | |
| "grad_norm": 89.5, | |
| "learning_rate": 4.266373328820444e-06, | |
| "logits/chosen": -4.2306623458862305, | |
| "logits/rejected": -4.218886375427246, | |
| "logps/chosen": -593.7825927734375, | |
| "logps/rejected": -460.80255126953125, | |
| "loss": 0.6256, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.24924713373184204, | |
| "rewards/margins": 0.19636379182338715, | |
| "rewards/rejected": 0.05288334935903549, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.17318362388784925, | |
| "grad_norm": 104.5, | |
| "learning_rate": 4.262142494499916e-06, | |
| "logits/chosen": -4.100011825561523, | |
| "logits/rejected": -4.121422290802002, | |
| "logps/chosen": -763.5194702148438, | |
| "logps/rejected": -460.8433532714844, | |
| "loss": 0.5805, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.30740201473236084, | |
| "rewards/margins": 0.29280537366867065, | |
| "rewards/rejected": 0.014596661552786827, | |
| "step": 5275 | |
| }, | |
| { | |
| "epoch": 0.17400439935651205, | |
| "grad_norm": 145.0, | |
| "learning_rate": 4.257911660179388e-06, | |
| "logits/chosen": -4.0073347091674805, | |
| "logits/rejected": -4.158247470855713, | |
| "logps/chosen": -669.4347534179688, | |
| "logps/rejected": -583.874755859375, | |
| "loss": 0.6597, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.09481468796730042, | |
| "rewards/margins": 0.12035001069307327, | |
| "rewards/rejected": -0.025535322725772858, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 0.17482517482517482, | |
| "grad_norm": 194.0, | |
| "learning_rate": 4.25368082585886e-06, | |
| "logits/chosen": -4.175117015838623, | |
| "logits/rejected": -4.1560378074646, | |
| "logps/chosen": -516.7086181640625, | |
| "logps/rejected": -377.07342529296875, | |
| "loss": 0.6758, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.13141313195228577, | |
| "rewards/margins": 0.06963927298784256, | |
| "rewards/rejected": 0.061773862689733505, | |
| "step": 5325 | |
| }, | |
| { | |
| "epoch": 0.17564595029383762, | |
| "grad_norm": 228.0, | |
| "learning_rate": 4.249449991538332e-06, | |
| "logits/chosen": -4.131567478179932, | |
| "logits/rejected": -4.205038070678711, | |
| "logps/chosen": -705.3861083984375, | |
| "logps/rejected": -533.5681762695312, | |
| "loss": 0.641, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.16328547894954681, | |
| "rewards/margins": 0.18053361773490906, | |
| "rewards/rejected": -0.017248129472136497, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 0.17646672576250041, | |
| "grad_norm": 63.25, | |
| "learning_rate": 4.245219157217804e-06, | |
| "logits/chosen": -4.128468036651611, | |
| "logits/rejected": -4.179846286773682, | |
| "logps/chosen": -770.1663208007812, | |
| "logps/rejected": -586.923828125, | |
| "loss": 0.5994, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.1435445100069046, | |
| "rewards/margins": 0.26513317227363586, | |
| "rewards/rejected": -0.12158867716789246, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.1772875012311632, | |
| "grad_norm": 172.0, | |
| "learning_rate": 4.240988322897276e-06, | |
| "logits/chosen": -4.156975269317627, | |
| "logits/rejected": -4.22945499420166, | |
| "logps/chosen": -607.2662353515625, | |
| "logps/rejected": -553.4839477539062, | |
| "loss": 0.5973, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.25185444951057434, | |
| "rewards/margins": 0.28923726081848145, | |
| "rewards/rejected": -0.037382807582616806, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.17810827669982598, | |
| "grad_norm": 115.5, | |
| "learning_rate": 4.236757488576748e-06, | |
| "logits/chosen": -4.242987155914307, | |
| "logits/rejected": -4.287976264953613, | |
| "logps/chosen": -649.2179565429688, | |
| "logps/rejected": -505.8639831542969, | |
| "loss": 0.5919, | |
| "rewards/accuracies": 0.7799999713897705, | |
| "rewards/chosen": 0.19670113921165466, | |
| "rewards/margins": 0.25432896614074707, | |
| "rewards/rejected": -0.057627782225608826, | |
| "step": 5425 | |
| }, | |
| { | |
| "epoch": 0.17892905216848878, | |
| "grad_norm": 215.0, | |
| "learning_rate": 4.23252665425622e-06, | |
| "logits/chosen": -4.028115272521973, | |
| "logits/rejected": -4.092276096343994, | |
| "logps/chosen": -829.40380859375, | |
| "logps/rejected": -672.47509765625, | |
| "loss": 0.6481, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.15024369955062866, | |
| "rewards/margins": 0.1664939671754837, | |
| "rewards/rejected": -0.016250288113951683, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 0.17974982763715158, | |
| "grad_norm": 112.0, | |
| "learning_rate": 4.228295819935692e-06, | |
| "logits/chosen": -4.228298187255859, | |
| "logits/rejected": -4.2013726234436035, | |
| "logps/chosen": -787.6983032226562, | |
| "logps/rejected": -453.38031005859375, | |
| "loss": 0.5961, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.25117358565330505, | |
| "rewards/margins": 0.26828765869140625, | |
| "rewards/rejected": -0.017114050686359406, | |
| "step": 5475 | |
| }, | |
| { | |
| "epoch": 0.18057060310581438, | |
| "grad_norm": 115.0, | |
| "learning_rate": 4.224064985615164e-06, | |
| "logits/chosen": -4.019930839538574, | |
| "logits/rejected": -4.275580883026123, | |
| "logps/chosen": -673.7694702148438, | |
| "logps/rejected": -538.3352661132812, | |
| "loss": 0.6338, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.1483411341905594, | |
| "rewards/margins": 0.1611100435256958, | |
| "rewards/rejected": -0.012768914923071861, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.18139137857447715, | |
| "grad_norm": 129.0, | |
| "learning_rate": 4.219834151294636e-06, | |
| "logits/chosen": -4.229439735412598, | |
| "logits/rejected": -4.16810417175293, | |
| "logps/chosen": -900.92431640625, | |
| "logps/rejected": -535.6353759765625, | |
| "loss": 0.6067, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.23091623187065125, | |
| "rewards/margins": 0.24078793823719025, | |
| "rewards/rejected": -0.0098717100918293, | |
| "step": 5525 | |
| }, | |
| { | |
| "epoch": 0.18221215404313995, | |
| "grad_norm": 169.0, | |
| "learning_rate": 4.215603316974108e-06, | |
| "logits/chosen": -4.243109226226807, | |
| "logits/rejected": -4.321156024932861, | |
| "logps/chosen": -867.3097534179688, | |
| "logps/rejected": -529.377197265625, | |
| "loss": 0.5628, | |
| "rewards/accuracies": 0.8199999928474426, | |
| "rewards/chosen": 0.20608556270599365, | |
| "rewards/margins": 0.33511531352996826, | |
| "rewards/rejected": -0.1290297657251358, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 0.18303292951180275, | |
| "grad_norm": 163.0, | |
| "learning_rate": 4.21137248265358e-06, | |
| "logits/chosen": -4.261384963989258, | |
| "logits/rejected": -4.396700859069824, | |
| "logps/chosen": -738.622802734375, | |
| "logps/rejected": -579.0276489257812, | |
| "loss": 0.6155, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.1820848137140274, | |
| "rewards/margins": 0.20051410794258118, | |
| "rewards/rejected": -0.018429284915328026, | |
| "step": 5575 | |
| }, | |
| { | |
| "epoch": 0.18385370498046555, | |
| "grad_norm": 298.0, | |
| "learning_rate": 4.207141648333052e-06, | |
| "logits/chosen": -4.233059883117676, | |
| "logits/rejected": -4.272811412811279, | |
| "logps/chosen": -814.3858642578125, | |
| "logps/rejected": -547.3389282226562, | |
| "loss": 0.6609, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.1449856013059616, | |
| "rewards/margins": 0.10664605349302292, | |
| "rewards/rejected": 0.038339558988809586, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.18467448044912835, | |
| "grad_norm": 65.0, | |
| "learning_rate": 4.202910814012524e-06, | |
| "logits/chosen": -4.120992660522461, | |
| "logits/rejected": -4.08679723739624, | |
| "logps/chosen": -621.9240112304688, | |
| "logps/rejected": -547.0635375976562, | |
| "loss": 0.6141, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.16784168779850006, | |
| "rewards/margins": 0.20409084856510162, | |
| "rewards/rejected": -0.03624917194247246, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.18549525591779112, | |
| "grad_norm": 163.0, | |
| "learning_rate": 4.198679979691996e-06, | |
| "logits/chosen": -4.074944019317627, | |
| "logits/rejected": -4.333995819091797, | |
| "logps/chosen": -645.5282592773438, | |
| "logps/rejected": -481.385986328125, | |
| "loss": 0.633, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.18368229269981384, | |
| "rewards/margins": 0.20405273139476776, | |
| "rewards/rejected": -0.020370442420244217, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 0.18631603138645392, | |
| "grad_norm": 147.0, | |
| "learning_rate": 4.194449145371468e-06, | |
| "logits/chosen": -4.042386054992676, | |
| "logits/rejected": -4.160777568817139, | |
| "logps/chosen": -743.6102294921875, | |
| "logps/rejected": -578.762939453125, | |
| "loss": 0.6119, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.1839004009962082, | |
| "rewards/margins": 0.23765355348587036, | |
| "rewards/rejected": -0.05375315621495247, | |
| "step": 5675 | |
| }, | |
| { | |
| "epoch": 0.18713680685511672, | |
| "grad_norm": 134.0, | |
| "learning_rate": 4.19021831105094e-06, | |
| "logits/chosen": -4.224156856536865, | |
| "logits/rejected": -4.259792327880859, | |
| "logps/chosen": -711.947998046875, | |
| "logps/rejected": -538.19189453125, | |
| "loss": 0.6145, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.22774983942508698, | |
| "rewards/margins": 0.22662487626075745, | |
| "rewards/rejected": 0.0011249757371842861, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 0.18795758232377952, | |
| "grad_norm": 155.0, | |
| "learning_rate": 4.185987476730412e-06, | |
| "logits/chosen": -4.1899213790893555, | |
| "logits/rejected": -4.298079967498779, | |
| "logps/chosen": -790.821533203125, | |
| "logps/rejected": -654.3777465820312, | |
| "loss": 0.6277, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.13436265289783478, | |
| "rewards/margins": 0.20195932686328888, | |
| "rewards/rejected": -0.0675966665148735, | |
| "step": 5725 | |
| }, | |
| { | |
| "epoch": 0.1887783577924423, | |
| "grad_norm": 210.0, | |
| "learning_rate": 4.181756642409884e-06, | |
| "logits/chosen": -4.21663236618042, | |
| "logits/rejected": -4.2310566902160645, | |
| "logps/chosen": -826.4243774414062, | |
| "logps/rejected": -574.5404052734375, | |
| "loss": 0.6265, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.14746469259262085, | |
| "rewards/margins": 0.22628816962242126, | |
| "rewards/rejected": -0.07882346212863922, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.18959913326110509, | |
| "grad_norm": 149.0, | |
| "learning_rate": 4.177525808089356e-06, | |
| "logits/chosen": -4.072010040283203, | |
| "logits/rejected": -4.1931257247924805, | |
| "logps/chosen": -699.114501953125, | |
| "logps/rejected": -505.4546203613281, | |
| "loss": 0.5832, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.27559271454811096, | |
| "rewards/margins": 0.302867591381073, | |
| "rewards/rejected": -0.02727488987147808, | |
| "step": 5775 | |
| }, | |
| { | |
| "epoch": 0.19041990872976788, | |
| "grad_norm": 108.0, | |
| "learning_rate": 4.173294973768828e-06, | |
| "logits/chosen": -4.217546463012695, | |
| "logits/rejected": -4.097277641296387, | |
| "logps/chosen": -739.192626953125, | |
| "logps/rejected": -503.85296630859375, | |
| "loss": 0.6646, | |
| "rewards/accuracies": 0.5400000214576721, | |
| "rewards/chosen": 0.1342460662126541, | |
| "rewards/margins": 0.13998816907405853, | |
| "rewards/rejected": -0.005742125678807497, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.19124068419843068, | |
| "grad_norm": 88.0, | |
| "learning_rate": 4.1690641394483e-06, | |
| "logits/chosen": -4.054004669189453, | |
| "logits/rejected": -4.206819534301758, | |
| "logps/chosen": -619.5850219726562, | |
| "logps/rejected": -404.0340576171875, | |
| "loss": 0.612, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.1378345638513565, | |
| "rewards/margins": 0.2363594025373459, | |
| "rewards/rejected": -0.09852485358715057, | |
| "step": 5825 | |
| }, | |
| { | |
| "epoch": 0.19206145966709348, | |
| "grad_norm": 302.0, | |
| "learning_rate": 4.164833305127772e-06, | |
| "logits/chosen": -4.187170505523682, | |
| "logits/rejected": -4.270932674407959, | |
| "logps/chosen": -751.2974853515625, | |
| "logps/rejected": -424.2715759277344, | |
| "loss": 0.6397, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.09154117852449417, | |
| "rewards/margins": 0.16450461745262146, | |
| "rewards/rejected": -0.07296343892812729, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 0.19288223513575625, | |
| "grad_norm": 141.0, | |
| "learning_rate": 4.160602470807243e-06, | |
| "logits/chosen": -4.164458751678467, | |
| "logits/rejected": -4.169947624206543, | |
| "logps/chosen": -669.7885131835938, | |
| "logps/rejected": -516.6512451171875, | |
| "loss": 0.658, | |
| "rewards/accuracies": 0.5199999809265137, | |
| "rewards/chosen": 0.12925738096237183, | |
| "rewards/margins": 0.11811258643865585, | |
| "rewards/rejected": 0.011144790798425674, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.19370301060441905, | |
| "grad_norm": 143.0, | |
| "learning_rate": 4.156371636486716e-06, | |
| "logits/chosen": -4.081195831298828, | |
| "logits/rejected": -4.181396961212158, | |
| "logps/chosen": -765.7492065429688, | |
| "logps/rejected": -589.5405883789062, | |
| "loss": 0.5968, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.18399088084697723, | |
| "rewards/margins": 0.2514546513557434, | |
| "rewards/rejected": -0.06746377795934677, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 0.19452378607308185, | |
| "grad_norm": 159.0, | |
| "learning_rate": 4.152140802166187e-06, | |
| "logits/chosen": -4.238447666168213, | |
| "logits/rejected": -4.124698638916016, | |
| "logps/chosen": -767.7413940429688, | |
| "logps/rejected": -659.5244750976562, | |
| "loss": 0.6515, | |
| "rewards/accuracies": 0.5600000023841858, | |
| "rewards/chosen": 0.24860292673110962, | |
| "rewards/margins": 0.1544342190027237, | |
| "rewards/rejected": 0.09416870772838593, | |
| "step": 5925 | |
| }, | |
| { | |
| "epoch": 0.19534456154174465, | |
| "grad_norm": 126.0, | |
| "learning_rate": 4.14790996784566e-06, | |
| "logits/chosen": -4.309987545013428, | |
| "logits/rejected": -4.205745697021484, | |
| "logps/chosen": -679.7860717773438, | |
| "logps/rejected": -557.9547729492188, | |
| "loss": 0.6518, | |
| "rewards/accuracies": 0.6200000047683716, | |
| "rewards/chosen": 0.19679342210292816, | |
| "rewards/margins": 0.1271744966506958, | |
| "rewards/rejected": 0.06961893290281296, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 0.19616533701040742, | |
| "grad_norm": 252.0, | |
| "learning_rate": 4.143679133525131e-06, | |
| "logits/chosen": -4.0731916427612305, | |
| "logits/rejected": -4.357572555541992, | |
| "logps/chosen": -673.31787109375, | |
| "logps/rejected": -456.28924560546875, | |
| "loss": 0.6656, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.12726329267024994, | |
| "rewards/margins": 0.1085977777838707, | |
| "rewards/rejected": 0.018665514886379242, | |
| "step": 5975 | |
| }, | |
| { | |
| "epoch": 0.19698611247907022, | |
| "grad_norm": 162.0, | |
| "learning_rate": 4.139448299204604e-06, | |
| "logits/chosen": -4.230855941772461, | |
| "logits/rejected": -4.283547878265381, | |
| "logps/chosen": -627.782470703125, | |
| "logps/rejected": -425.7085266113281, | |
| "loss": 0.6155, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.16935734450817108, | |
| "rewards/margins": 0.20453746616840363, | |
| "rewards/rejected": -0.03518013656139374, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.19780688794773302, | |
| "grad_norm": 89.5, | |
| "learning_rate": 4.135217464884075e-06, | |
| "logits/chosen": -4.026022434234619, | |
| "logits/rejected": -4.211236000061035, | |
| "logps/chosen": -636.9410400390625, | |
| "logps/rejected": -497.6493835449219, | |
| "loss": 0.6299, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.20437130331993103, | |
| "rewards/margins": 0.18679769337177277, | |
| "rewards/rejected": 0.01757361926138401, | |
| "step": 6025 | |
| }, | |
| { | |
| "epoch": 0.19862766341639582, | |
| "grad_norm": 81.5, | |
| "learning_rate": 4.130986630563548e-06, | |
| "logits/chosen": -4.12556266784668, | |
| "logits/rejected": -4.181331157684326, | |
| "logps/chosen": -663.214111328125, | |
| "logps/rejected": -548.843505859375, | |
| "loss": 0.6332, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.211769238114357, | |
| "rewards/margins": 0.17643685638904572, | |
| "rewards/rejected": 0.03533238545060158, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 0.19944843888505862, | |
| "grad_norm": 149.0, | |
| "learning_rate": 4.126755796243019e-06, | |
| "logits/chosen": -4.021080493927002, | |
| "logits/rejected": -4.118467807769775, | |
| "logps/chosen": -780.01611328125, | |
| "logps/rejected": -585.002685546875, | |
| "loss": 0.6105, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.2011178731918335, | |
| "rewards/margins": 0.20934166014194489, | |
| "rewards/rejected": -0.008223775774240494, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 0.2002692143537214, | |
| "grad_norm": 140.0, | |
| "learning_rate": 4.122524961922492e-06, | |
| "logits/chosen": -4.2615885734558105, | |
| "logits/rejected": -4.098018646240234, | |
| "logps/chosen": -739.8490600585938, | |
| "logps/rejected": -538.7686157226562, | |
| "loss": 0.6331, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.22424788773059845, | |
| "rewards/margins": 0.1791938841342926, | |
| "rewards/rejected": 0.045054011046886444, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 0.20108998982238419, | |
| "grad_norm": 135.0, | |
| "learning_rate": 4.118294127601963e-06, | |
| "logits/chosen": -4.1815314292907715, | |
| "logits/rejected": -4.297692775726318, | |
| "logps/chosen": -692.421875, | |
| "logps/rejected": -482.92730712890625, | |
| "loss": 0.5629, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": 0.23259569704532623, | |
| "rewards/margins": 0.3299049437046051, | |
| "rewards/rejected": -0.09730925410985947, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.20191076529104698, | |
| "grad_norm": 176.0, | |
| "learning_rate": 4.114063293281436e-06, | |
| "logits/chosen": -4.1495208740234375, | |
| "logits/rejected": -4.088111877441406, | |
| "logps/chosen": -725.1746215820312, | |
| "logps/rejected": -486.193603515625, | |
| "loss": 0.6098, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.21869854629039764, | |
| "rewards/margins": 0.23580920696258545, | |
| "rewards/rejected": -0.01711067371070385, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 0.20273154075970978, | |
| "grad_norm": 182.0, | |
| "learning_rate": 4.109832458960907e-06, | |
| "logits/chosen": -4.105493068695068, | |
| "logits/rejected": -4.2606730461120605, | |
| "logps/chosen": -630.2760009765625, | |
| "logps/rejected": -479.7104187011719, | |
| "loss": 0.6216, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.18199868500232697, | |
| "rewards/margins": 0.20924949645996094, | |
| "rewards/rejected": -0.02725079283118248, | |
| "step": 6175 | |
| }, | |
| { | |
| "epoch": 0.20355231622837255, | |
| "grad_norm": 118.0, | |
| "learning_rate": 4.10560162464038e-06, | |
| "logits/chosen": -4.221208095550537, | |
| "logits/rejected": -4.3337202072143555, | |
| "logps/chosen": -819.2532958984375, | |
| "logps/rejected": -593.1384887695312, | |
| "loss": 0.601, | |
| "rewards/accuracies": 0.8199999928474426, | |
| "rewards/chosen": 0.1887252926826477, | |
| "rewards/margins": 0.24284206330776215, | |
| "rewards/rejected": -0.05411674454808235, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.20437309169703535, | |
| "grad_norm": 183.0, | |
| "learning_rate": 4.101370790319851e-06, | |
| "logits/chosen": -4.222593784332275, | |
| "logits/rejected": -4.143655776977539, | |
| "logps/chosen": -936.6873168945312, | |
| "logps/rejected": -611.08740234375, | |
| "loss": 0.6512, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.24604524672031403, | |
| "rewards/margins": 0.1898384541273117, | |
| "rewards/rejected": 0.05620681867003441, | |
| "step": 6225 | |
| }, | |
| { | |
| "epoch": 0.20519386716569815, | |
| "grad_norm": 120.0, | |
| "learning_rate": 4.097139955999324e-06, | |
| "logits/chosen": -4.276827812194824, | |
| "logits/rejected": -4.2462544441223145, | |
| "logps/chosen": -786.3945922851562, | |
| "logps/rejected": -569.2025756835938, | |
| "loss": 0.6368, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.18757988512516022, | |
| "rewards/margins": 0.16715353727340698, | |
| "rewards/rejected": 0.020426325500011444, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.20601464263436095, | |
| "grad_norm": 264.0, | |
| "learning_rate": 4.092909121678795e-06, | |
| "logits/chosen": -4.115857124328613, | |
| "logits/rejected": -4.1909685134887695, | |
| "logps/chosen": -653.240478515625, | |
| "logps/rejected": -459.0234375, | |
| "loss": 0.6534, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.09809459000825882, | |
| "rewards/margins": 0.13492736220359802, | |
| "rewards/rejected": -0.0368327796459198, | |
| "step": 6275 | |
| }, | |
| { | |
| "epoch": 0.20683541810302375, | |
| "grad_norm": 206.0, | |
| "learning_rate": 4.088678287358268e-06, | |
| "logits/chosen": -4.155333995819092, | |
| "logits/rejected": -4.186524868011475, | |
| "logps/chosen": -736.306396484375, | |
| "logps/rejected": -539.7884521484375, | |
| "loss": 0.6026, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.18953242897987366, | |
| "rewards/margins": 0.24376721680164337, | |
| "rewards/rejected": -0.054234765470027924, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 0.20765619357168652, | |
| "grad_norm": 237.0, | |
| "learning_rate": 4.084447453037739e-06, | |
| "logits/chosen": -4.154298782348633, | |
| "logits/rejected": -4.270554065704346, | |
| "logps/chosen": -870.3758544921875, | |
| "logps/rejected": -743.3199462890625, | |
| "loss": 0.6616, | |
| "rewards/accuracies": 0.5799999833106995, | |
| "rewards/chosen": 0.12642379105091095, | |
| "rewards/margins": 0.09510830044746399, | |
| "rewards/rejected": 0.03131549060344696, | |
| "step": 6325 | |
| }, | |
| { | |
| "epoch": 0.20847696904034932, | |
| "grad_norm": 205.0, | |
| "learning_rate": 4.080216618717212e-06, | |
| "logits/chosen": -4.2590012550354, | |
| "logits/rejected": -4.194936752319336, | |
| "logps/chosen": -614.5784301757812, | |
| "logps/rejected": -491.8901672363281, | |
| "loss": 0.6684, | |
| "rewards/accuracies": 0.6399999856948853, | |
| "rewards/chosen": 0.13347198069095612, | |
| "rewards/margins": 0.14738866686820984, | |
| "rewards/rejected": -0.01391667127609253, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 0.20929774450901212, | |
| "grad_norm": 221.0, | |
| "learning_rate": 4.075985784396683e-06, | |
| "logits/chosen": -4.210058212280273, | |
| "logits/rejected": -4.1984453201293945, | |
| "logps/chosen": -720.3945922851562, | |
| "logps/rejected": -589.595947265625, | |
| "loss": 0.6069, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": 0.22792109847068787, | |
| "rewards/margins": 0.2285040020942688, | |
| "rewards/rejected": -0.0005828905268572271, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.21011851997767492, | |
| "grad_norm": 290.0, | |
| "learning_rate": 4.071754950076156e-06, | |
| "logits/chosen": -4.021488666534424, | |
| "logits/rejected": -4.137681484222412, | |
| "logps/chosen": -799.5679931640625, | |
| "logps/rejected": -506.8511657714844, | |
| "loss": 0.6242, | |
| "rewards/accuracies": 0.6600000262260437, | |
| "rewards/chosen": 0.154415562748909, | |
| "rewards/margins": 0.17487049102783203, | |
| "rewards/rejected": -0.020454909652471542, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.2109392954463377, | |
| "grad_norm": 150.0, | |
| "learning_rate": 4.067524115755627e-06, | |
| "logits/chosen": -4.259695529937744, | |
| "logits/rejected": -4.3616743087768555, | |
| "logps/chosen": -781.1605224609375, | |
| "logps/rejected": -477.47882080078125, | |
| "loss": 0.5909, | |
| "rewards/accuracies": 0.7400000095367432, | |
| "rewards/chosen": 0.1878763735294342, | |
| "rewards/margins": 0.2647455334663391, | |
| "rewards/rejected": -0.07686912268400192, | |
| "step": 6425 | |
| }, | |
| { | |
| "epoch": 0.2117600709150005, | |
| "grad_norm": 154.0, | |
| "learning_rate": 4.0632932814351e-06, | |
| "logits/chosen": -4.311985015869141, | |
| "logits/rejected": -4.301010608673096, | |
| "logps/chosen": -793.2703857421875, | |
| "logps/rejected": -579.3934326171875, | |
| "loss": 0.5878, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.21181993186473846, | |
| "rewards/margins": 0.28632134199142456, | |
| "rewards/rejected": -0.0745014175772667, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 0.21258084638366329, | |
| "grad_norm": 217.0, | |
| "learning_rate": 4.059062447114571e-06, | |
| "logits/chosen": -4.188201904296875, | |
| "logits/rejected": -4.189501762390137, | |
| "logps/chosen": -824.2460327148438, | |
| "logps/rejected": -638.919189453125, | |
| "loss": 0.6032, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": 0.23749302327632904, | |
| "rewards/margins": 0.26156920194625854, | |
| "rewards/rejected": -0.024076232686638832, | |
| "step": 6475 | |
| }, | |
| { | |
| "epoch": 0.21340162185232608, | |
| "grad_norm": 155.0, | |
| "learning_rate": 4.054831612794044e-06, | |
| "logits/chosen": -4.130152702331543, | |
| "logits/rejected": -4.191186428070068, | |
| "logps/chosen": -683.3399047851562, | |
| "logps/rejected": -519.9230346679688, | |
| "loss": 0.6279, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": 0.22105403244495392, | |
| "rewards/margins": 0.19117388129234314, | |
| "rewards/rejected": 0.02988017164170742, | |
| "step": 6500 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 30459, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |