| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9981298423724285, | |
| "eval_steps": 400, | |
| "global_step": 467, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0021373230029388193, | |
| "grad_norm": 23814117.69119963, | |
| "learning_rate": 2.127659574468085e-08, | |
| "logits/chosen": -1.1381689310073853, | |
| "logits/rejected": -0.9913416504859924, | |
| "logps/chosen": -0.2839311361312866, | |
| "logps/rejected": -0.29555341601371765, | |
| "loss": 305.9593, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.7098277807235718, | |
| "rewards/margins": 0.029055725783109665, | |
| "rewards/rejected": -0.7388835549354553, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.010686615014694095, | |
| "grad_norm": 1974395.8030804002, | |
| "learning_rate": 1.0638297872340425e-07, | |
| "logits/chosen": -0.9901005029678345, | |
| "logits/rejected": -0.9188694953918457, | |
| "logps/chosen": -0.26972177624702454, | |
| "logps/rejected": -0.2686304748058319, | |
| "loss": 266.3214, | |
| "rewards/accuracies": 0.546875, | |
| "rewards/chosen": -0.6743044853210449, | |
| "rewards/margins": -0.002728263381868601, | |
| "rewards/rejected": -0.6715761423110962, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.02137323002938819, | |
| "grad_norm": 46220091.26670953, | |
| "learning_rate": 2.127659574468085e-07, | |
| "logits/chosen": -0.9833618998527527, | |
| "logits/rejected": -0.9393731951713562, | |
| "logps/chosen": -0.27256160974502563, | |
| "logps/rejected": -0.273215115070343, | |
| "loss": 185.9952, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.6814040541648865, | |
| "rewards/margins": 0.0016337722772732377, | |
| "rewards/rejected": -0.6830377578735352, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03205984504408229, | |
| "grad_norm": 474920.2122214309, | |
| "learning_rate": 3.1914893617021275e-07, | |
| "logits/chosen": -0.963974118232727, | |
| "logits/rejected": -0.9196063876152039, | |
| "logps/chosen": -0.29573556780815125, | |
| "logps/rejected": -0.28305521607398987, | |
| "loss": 125.1317, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.7393389940261841, | |
| "rewards/margins": -0.0317009761929512, | |
| "rewards/rejected": -0.7076379656791687, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.04274646005877638, | |
| "grad_norm": 4515133.468491916, | |
| "learning_rate": 4.25531914893617e-07, | |
| "logits/chosen": -0.976075291633606, | |
| "logits/rejected": -0.9759608507156372, | |
| "logps/chosen": -0.2616123557090759, | |
| "logps/rejected": -0.27002111077308655, | |
| "loss": 127.9034, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.6540309190750122, | |
| "rewards/margins": 0.02102179452776909, | |
| "rewards/rejected": -0.6750527620315552, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.053433075073470476, | |
| "grad_norm": 5165268.674113416, | |
| "learning_rate": 5.319148936170212e-07, | |
| "logits/chosen": -1.0451716184616089, | |
| "logits/rejected": -1.0216295719146729, | |
| "logps/chosen": -0.28275421261787415, | |
| "logps/rejected": -0.2863079905509949, | |
| "loss": 161.215, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.7068854570388794, | |
| "rewards/margins": 0.0088844895362854, | |
| "rewards/rejected": -0.7157700657844543, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.06411969008816458, | |
| "grad_norm": 143372948.8120165, | |
| "learning_rate": 6.382978723404255e-07, | |
| "logits/chosen": -1.071578025817871, | |
| "logits/rejected": -0.9856084585189819, | |
| "logps/chosen": -0.2763022780418396, | |
| "logps/rejected": -0.2745462656021118, | |
| "loss": 388.8185, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.6907557249069214, | |
| "rewards/margins": -0.0043900711461901665, | |
| "rewards/rejected": -0.6863657236099243, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07480630510285867, | |
| "grad_norm": 1184929.3556330686, | |
| "learning_rate": 7.446808510638297e-07, | |
| "logits/chosen": -1.01273512840271, | |
| "logits/rejected": -0.9335028529167175, | |
| "logps/chosen": -0.27808648347854614, | |
| "logps/rejected": -0.29893654584884644, | |
| "loss": 115.7746, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.695216178894043, | |
| "rewards/margins": 0.05212521553039551, | |
| "rewards/rejected": -0.7473413348197937, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.08549292011755276, | |
| "grad_norm": 1877904.3293607633, | |
| "learning_rate": 8.51063829787234e-07, | |
| "logits/chosen": -0.9277470707893372, | |
| "logits/rejected": -0.9166946411132812, | |
| "logps/chosen": -0.2787823975086212, | |
| "logps/rejected": -0.2824743986129761, | |
| "loss": 138.8757, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.6969559788703918, | |
| "rewards/margins": 0.009230067022144794, | |
| "rewards/rejected": -0.7061859965324402, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09617953513224686, | |
| "grad_norm": 2671724.397751134, | |
| "learning_rate": 9.574468085106384e-07, | |
| "logits/chosen": -0.9359474182128906, | |
| "logits/rejected": -0.8535245060920715, | |
| "logps/chosen": -0.33036336302757263, | |
| "logps/rejected": -0.33015647530555725, | |
| "loss": 104.7933, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.8259084820747375, | |
| "rewards/margins": -0.0005173005047254264, | |
| "rewards/rejected": -0.8253911733627319, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.10686615014694095, | |
| "grad_norm": 79521388.74298675, | |
| "learning_rate": 9.998741174712533e-07, | |
| "logits/chosen": -0.9259702563285828, | |
| "logits/rejected": -0.9349774122238159, | |
| "logps/chosen": -0.2925248146057129, | |
| "logps/rejected": -0.3076633810997009, | |
| "loss": 175.2819, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.731312096118927, | |
| "rewards/margins": 0.0378464013338089, | |
| "rewards/rejected": -0.7691584825515747, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11755276516163506, | |
| "grad_norm": 327828.43192551495, | |
| "learning_rate": 9.991050648838675e-07, | |
| "logits/chosen": -0.9278720021247864, | |
| "logits/rejected": -0.8686744570732117, | |
| "logps/chosen": -0.2634710669517517, | |
| "logps/rejected": -0.27794915437698364, | |
| "loss": 2695.6354, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.6586776971817017, | |
| "rewards/margins": 0.03619522601366043, | |
| "rewards/rejected": -0.6948728561401367, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.12823938017632916, | |
| "grad_norm": 899122.206242127, | |
| "learning_rate": 9.97637968732563e-07, | |
| "logits/chosen": -0.9561047554016113, | |
| "logits/rejected": -0.9336016774177551, | |
| "logps/chosen": -0.2656118869781494, | |
| "logps/rejected": -0.28187674283981323, | |
| "loss": 105.9757, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.6640297174453735, | |
| "rewards/margins": 0.0406620129942894, | |
| "rewards/rejected": -0.7046917080879211, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13892599519102325, | |
| "grad_norm": 377242.1086806779, | |
| "learning_rate": 9.954748808839674e-07, | |
| "logits/chosen": -0.956866443157196, | |
| "logits/rejected": -1.005385398864746, | |
| "logps/chosen": -0.2731708288192749, | |
| "logps/rejected": -0.26419904828071594, | |
| "loss": 108.4874, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.682927131652832, | |
| "rewards/margins": -0.02242954447865486, | |
| "rewards/rejected": -0.6604975461959839, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.14961261020571734, | |
| "grad_norm": 42496.77007169402, | |
| "learning_rate": 9.926188266120295e-07, | |
| "logits/chosen": -0.9903133511543274, | |
| "logits/rejected": -0.9588413238525391, | |
| "logps/chosen": -0.305401474237442, | |
| "logps/rejected": -0.298237681388855, | |
| "loss": 405.7784, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.7635036706924438, | |
| "rewards/margins": -0.017909497022628784, | |
| "rewards/rejected": -0.7455942034721375, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16029922522041143, | |
| "grad_norm": 523140.8331781128, | |
| "learning_rate": 9.890738003669027e-07, | |
| "logits/chosen": -0.9634426236152649, | |
| "logits/rejected": -0.9494821429252625, | |
| "logps/chosen": -0.2741475999355316, | |
| "logps/rejected": -0.2895483672618866, | |
| "loss": 2624.7643, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.685369074344635, | |
| "rewards/margins": 0.038501907140016556, | |
| "rewards/rejected": -0.7238709926605225, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.17098584023510552, | |
| "grad_norm": 37571862.32606961, | |
| "learning_rate": 9.848447601883433e-07, | |
| "logits/chosen": -1.0007914304733276, | |
| "logits/rejected": -0.9825354814529419, | |
| "logps/chosen": -0.28798869252204895, | |
| "logps/rejected": -0.28025856614112854, | |
| "loss": 131.06, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.7199716567993164, | |
| "rewards/margins": -0.019325237721204758, | |
| "rewards/rejected": -0.7006464600563049, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18167245524979964, | |
| "grad_norm": 8861430.225925114, | |
| "learning_rate": 9.799376207714444e-07, | |
| "logits/chosen": -0.9258670806884766, | |
| "logits/rejected": -0.8755356073379517, | |
| "logps/chosen": -0.2675308287143707, | |
| "logps/rejected": -0.28247857093811035, | |
| "loss": 151.0586, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.6688271760940552, | |
| "rewards/margins": 0.03736928477883339, | |
| "rewards/rejected": -0.7061963081359863, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.19235907026449373, | |
| "grad_norm": 322999.5648039634, | |
| "learning_rate": 9.743592451943998e-07, | |
| "logits/chosen": -0.8945444226264954, | |
| "logits/rejected": -0.8326283693313599, | |
| "logps/chosen": -0.2888963222503662, | |
| "logps/rejected": -0.30566543340682983, | |
| "loss": 104.0617, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.7222408056259155, | |
| "rewards/margins": 0.04192278906702995, | |
| "rewards/rejected": -0.764163613319397, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.20304568527918782, | |
| "grad_norm": 16202558.227455074, | |
| "learning_rate": 9.681174353198686e-07, | |
| "logits/chosen": -1.0018240213394165, | |
| "logits/rejected": -1.024642825126648, | |
| "logps/chosen": -0.2775546908378601, | |
| "logps/rejected": -0.31214436888694763, | |
| "loss": 185.9518, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.6938868165016174, | |
| "rewards/margins": 0.08647419512271881, | |
| "rewards/rejected": -0.7803609371185303, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.2137323002938819, | |
| "grad_norm": 881275.6537233666, | |
| "learning_rate": 9.612209208833646e-07, | |
| "logits/chosen": -1.0630947351455688, | |
| "logits/rejected": -1.0079935789108276, | |
| "logps/chosen": -0.28541457653045654, | |
| "logps/rejected": -0.27989768981933594, | |
| "loss": 377.6484, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.7135364413261414, | |
| "rewards/margins": -0.01379220187664032, | |
| "rewards/rejected": -0.6997443437576294, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.224418915308576, | |
| "grad_norm": 1471606.598734741, | |
| "learning_rate": 9.536793472839324e-07, | |
| "logits/chosen": -0.9882336854934692, | |
| "logits/rejected": -0.9416030645370483, | |
| "logps/chosen": -0.2841026186943054, | |
| "logps/rejected": -0.30027633905410767, | |
| "loss": 205.1062, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.7102565169334412, | |
| "rewards/margins": 0.0404343381524086, | |
| "rewards/rejected": -0.7506908178329468, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.2351055303232701, | |
| "grad_norm": 313885.99955306284, | |
| "learning_rate": 9.455032620941839e-07, | |
| "logits/chosen": -0.9041908383369446, | |
| "logits/rejected": -0.854825496673584, | |
| "logps/chosen": -0.33143380284309387, | |
| "logps/rejected": -0.3396168053150177, | |
| "loss": 2658.6553, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.8285845518112183, | |
| "rewards/margins": 0.020457318052649498, | |
| "rewards/rejected": -0.8490419387817383, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2457921453379642, | |
| "grad_norm": 682808.4048805884, | |
| "learning_rate": 9.367041003085648e-07, | |
| "logits/chosen": -0.9211395978927612, | |
| "logits/rejected": -0.9187518358230591, | |
| "logps/chosen": -0.2765989303588867, | |
| "logps/rejected": -0.2790865898132324, | |
| "loss": 2651.5432, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.6914974451065063, | |
| "rewards/margins": 0.006219107657670975, | |
| "rewards/rejected": -0.697716474533081, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.2564787603526583, | |
| "grad_norm": 527643.1252709947, | |
| "learning_rate": 9.272941683504808e-07, | |
| "logits/chosen": -1.0119305849075317, | |
| "logits/rejected": -0.9751386642456055, | |
| "logps/chosen": -0.28785568475723267, | |
| "logps/rejected": -0.31191155314445496, | |
| "loss": 232.3885, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.7196391820907593, | |
| "rewards/margins": 0.06013970449566841, | |
| "rewards/rejected": -0.7797788381576538, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2671653753673524, | |
| "grad_norm": 57160137.92819305, | |
| "learning_rate": 9.172866268606513e-07, | |
| "logits/chosen": -1.0446767807006836, | |
| "logits/rejected": -1.03428316116333, | |
| "logps/chosen": -0.3408173620700836, | |
| "logps/rejected": -0.33484262228012085, | |
| "loss": 171.7718, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.8520433306694031, | |
| "rewards/margins": -0.014936879277229309, | |
| "rewards/rejected": -0.8371064066886902, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.2778519903820465, | |
| "grad_norm": 354260.60461613233, | |
| "learning_rate": 9.066954722907638e-07, | |
| "logits/chosen": -1.0202778577804565, | |
| "logits/rejected": -1.040438175201416, | |
| "logps/chosen": -0.3069685399532318, | |
| "logps/rejected": -0.3384125232696533, | |
| "loss": 152.887, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.7674213647842407, | |
| "rewards/margins": 0.07861001789569855, | |
| "rewards/rejected": -0.8460313677787781, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2885386053967406, | |
| "grad_norm": 4161760.6479958617, | |
| "learning_rate": 8.955355173281707e-07, | |
| "logits/chosen": -0.9897885322570801, | |
| "logits/rejected": -0.9403419494628906, | |
| "logps/chosen": -0.3388122022151947, | |
| "logps/rejected": -0.29513686895370483, | |
| "loss": 127.9212, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.8470304608345032, | |
| "rewards/margins": -0.10918829590082169, | |
| "rewards/rejected": -0.7378422021865845, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.2992252204114347, | |
| "grad_norm": 1984829.9018358907, | |
| "learning_rate": 8.838223701790055e-07, | |
| "logits/chosen": -0.9738261103630066, | |
| "logits/rejected": -0.9614647030830383, | |
| "logps/chosen": -0.3720606565475464, | |
| "logps/rejected": -0.3473301827907562, | |
| "loss": 110.1865, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": -0.930151641368866, | |
| "rewards/margins": -0.06182613968849182, | |
| "rewards/rejected": -0.8683255314826965, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.30991183542612877, | |
| "grad_norm": 1513820.1920679864, | |
| "learning_rate": 8.71572412738697e-07, | |
| "logits/chosen": -1.0015193223953247, | |
| "logits/rejected": -0.9965044856071472, | |
| "logps/chosen": -0.3061389625072479, | |
| "logps/rejected": -0.3093434274196625, | |
| "loss": 3025.9289, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.7653473615646362, | |
| "rewards/margins": 0.008011135272681713, | |
| "rewards/rejected": -0.7733586430549622, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.32059845044082286, | |
| "grad_norm": 1970591.2689892622, | |
| "learning_rate": 8.588027776804058e-07, | |
| "logits/chosen": -0.9444905519485474, | |
| "logits/rejected": -0.9439139366149902, | |
| "logps/chosen": -0.3537100851535797, | |
| "logps/rejected": -0.35441845655441284, | |
| "loss": 221.1807, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.8842751383781433, | |
| "rewards/margins": 0.0017710126703605056, | |
| "rewards/rejected": -0.8860462307929993, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.33128506545551695, | |
| "grad_norm": 12808984.8595381, | |
| "learning_rate": 8.455313244934324e-07, | |
| "logits/chosen": -1.0120588541030884, | |
| "logits/rejected": -0.9681800603866577, | |
| "logps/chosen": -0.3124849796295166, | |
| "logps/rejected": -0.3563632667064667, | |
| "loss": 187.1941, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.7812124490737915, | |
| "rewards/margins": 0.10969575494527817, | |
| "rewards/rejected": -0.8909081220626831, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.34197168047021104, | |
| "grad_norm": 1183315.8265051153, | |
| "learning_rate": 8.317766145051057e-07, | |
| "logits/chosen": -1.0060454607009888, | |
| "logits/rejected": -0.9961159825325012, | |
| "logps/chosen": -0.2914329171180725, | |
| "logps/rejected": -0.3657309412956238, | |
| "loss": 120.0758, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.7285822629928589, | |
| "rewards/margins": 0.1857450008392334, | |
| "rewards/rejected": -0.9143272638320923, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3526582954849052, | |
| "grad_norm": 2641730.3542562006, | |
| "learning_rate": 8.175578849210894e-07, | |
| "logits/chosen": -1.0412616729736328, | |
| "logits/rejected": -0.9751707315444946, | |
| "logps/chosen": -0.33477407693862915, | |
| "logps/rejected": -0.31863099336624146, | |
| "loss": 222.9428, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8369352221488953, | |
| "rewards/margins": -0.04035765677690506, | |
| "rewards/rejected": -0.7965775728225708, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.36334491049959927, | |
| "grad_norm": 24200663.77841142, | |
| "learning_rate": 8.028950219204099e-07, | |
| "logits/chosen": -1.0198957920074463, | |
| "logits/rejected": -1.0187537670135498, | |
| "logps/chosen": -0.35278764367103577, | |
| "logps/rejected": -0.36079707741737366, | |
| "loss": 175.3017, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8819690942764282, | |
| "rewards/margins": 0.020023606717586517, | |
| "rewards/rejected": -0.901992678642273, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.37403152551429336, | |
| "grad_norm": 423144.2753567647, | |
| "learning_rate": 7.878085328428368e-07, | |
| "logits/chosen": -1.0390782356262207, | |
| "logits/rejected": -1.0463870763778687, | |
| "logps/chosen": -0.3290930390357971, | |
| "logps/rejected": -0.33625391125679016, | |
| "loss": 172.6517, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.8227324485778809, | |
| "rewards/margins": 0.01790226623415947, | |
| "rewards/rejected": -0.840634822845459, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.38471814052898745, | |
| "grad_norm": 66473449.795217186, | |
| "learning_rate": 7.723195175075135e-07, | |
| "logits/chosen": -1.074244737625122, | |
| "logits/rejected": -1.0753021240234375, | |
| "logps/chosen": -0.33172592520713806, | |
| "logps/rejected": -0.34797996282577515, | |
| "loss": 173.7831, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.8293148279190063, | |
| "rewards/margins": 0.04063502699136734, | |
| "rewards/rejected": -0.8699499368667603, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.39540475554368154, | |
| "grad_norm": 2508265.2610814595, | |
| "learning_rate": 7.564496387029531e-07, | |
| "logits/chosen": -1.105753779411316, | |
| "logits/rejected": -1.0690752267837524, | |
| "logps/chosen": -0.33374324440956116, | |
| "logps/rejected": -0.3317410349845886, | |
| "loss": 130.1423, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8343580961227417, | |
| "rewards/margins": -0.005005507729947567, | |
| "rewards/rejected": -0.8293525576591492, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.40609137055837563, | |
| "grad_norm": 1415188.1564555708, | |
| "learning_rate": 7.402210918896689e-07, | |
| "logits/chosen": -1.1109493970870972, | |
| "logits/rejected": -1.0873216390609741, | |
| "logps/chosen": -0.3215797245502472, | |
| "logps/rejected": -0.34585997462272644, | |
| "loss": 103.3292, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.8039493560791016, | |
| "rewards/margins": 0.06070064380764961, | |
| "rewards/rejected": -0.8646499514579773, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4167779855730697, | |
| "grad_norm": 22405463.240061384, | |
| "learning_rate": 7.236565741578162e-07, | |
| "logits/chosen": -1.0102512836456299, | |
| "logits/rejected": -0.995439350605011, | |
| "logps/chosen": -0.34978950023651123, | |
| "logps/rejected": -0.3523608446121216, | |
| "loss": 144.2631, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.8744736909866333, | |
| "rewards/margins": 0.006428359542042017, | |
| "rewards/rejected": -0.880902111530304, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.4274646005877638, | |
| "grad_norm": 364076.0574983151, | |
| "learning_rate": 7.067792524832603e-07, | |
| "logits/chosen": -1.0113328695297241, | |
| "logits/rejected": -1.0017603635787964, | |
| "logps/chosen": -0.3367648124694824, | |
| "logps/rejected": -0.334301233291626, | |
| "loss": 115.4949, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.8419120907783508, | |
| "rewards/margins": -0.006158898584544659, | |
| "rewards/rejected": -0.8357530832290649, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.4381512156024579, | |
| "grad_norm": 1540771.9395518457, | |
| "learning_rate": 6.896127313264642e-07, | |
| "logits/chosen": -1.1025888919830322, | |
| "logits/rejected": -1.0531136989593506, | |
| "logps/chosen": -0.32284116744995117, | |
| "logps/rejected": -0.34286996722221375, | |
| "loss": 117.5218, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.8071029782295227, | |
| "rewards/margins": 0.05007190629839897, | |
| "rewards/rejected": -0.8571747541427612, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.448837830617152, | |
| "grad_norm": 197109620.3195932, | |
| "learning_rate": 6.721810196195174e-07, | |
| "logits/chosen": -1.1090078353881836, | |
| "logits/rejected": -1.0331655740737915, | |
| "logps/chosen": -0.2971234619617462, | |
| "logps/rejected": -0.3048322796821594, | |
| "loss": 199.9444, | |
| "rewards/accuracies": 0.38749998807907104, | |
| "rewards/chosen": -0.7428085207939148, | |
| "rewards/margins": 0.01927214488387108, | |
| "rewards/rejected": -0.7620807886123657, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.45952444563184613, | |
| "grad_norm": 210941.10682300097, | |
| "learning_rate": 6.545084971874736e-07, | |
| "logits/chosen": -1.055289626121521, | |
| "logits/rejected": -0.9978870153427124, | |
| "logps/chosen": -0.31941694021224976, | |
| "logps/rejected": -0.3496856093406677, | |
| "loss": 117.6536, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.7985422015190125, | |
| "rewards/margins": 0.07567177712917328, | |
| "rewards/rejected": -0.8742140531539917, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.4702110606465402, | |
| "grad_norm": 17774561.440666944, | |
| "learning_rate": 6.3661988065096e-07, | |
| "logits/chosen": -1.0686638355255127, | |
| "logits/rejected": -1.0178725719451904, | |
| "logps/chosen": -0.33966127038002014, | |
| "logps/rejected": -0.3513311445713043, | |
| "loss": 109.0363, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.8491531610488892, | |
| "rewards/margins": 0.029174691066145897, | |
| "rewards/rejected": -0.8783278465270996, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.4808976756612343, | |
| "grad_norm": 1448517.5044393009, | |
| "learning_rate": 6.185401888577487e-07, | |
| "logits/chosen": -1.0401103496551514, | |
| "logits/rejected": -1.0147794485092163, | |
| "logps/chosen": -0.284532368183136, | |
| "logps/rejected": -0.2862989902496338, | |
| "loss": 111.407, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.7113308906555176, | |
| "rewards/margins": 0.004416565410792828, | |
| "rewards/rejected": -0.7157474756240845, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.4915842906759284, | |
| "grad_norm": 27408086.19895333, | |
| "learning_rate": 6.002947078916364e-07, | |
| "logits/chosen": -1.1030638217926025, | |
| "logits/rejected": -0.9990617632865906, | |
| "logps/chosen": -0.33675864338874817, | |
| "logps/rejected": -0.3289005756378174, | |
| "loss": 2706.1715, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.841896653175354, | |
| "rewards/margins": -0.01964510791003704, | |
| "rewards/rejected": -0.8222514986991882, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5022709056906225, | |
| "grad_norm": 317843.4109202252, | |
| "learning_rate": 5.819089557075688e-07, | |
| "logits/chosen": -1.0890393257141113, | |
| "logits/rejected": -1.0440254211425781, | |
| "logps/chosen": -0.34919267892837524, | |
| "logps/rejected": -0.3513543903827667, | |
| "loss": 238.4326, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.8729816675186157, | |
| "rewards/margins": 0.005404374096542597, | |
| "rewards/rejected": -0.8783860206604004, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.5129575207053166, | |
| "grad_norm": 85019.6413638533, | |
| "learning_rate": 5.634086464424742e-07, | |
| "logits/chosen": -1.1137946844100952, | |
| "logits/rejected": -1.076812505722046, | |
| "logps/chosen": -0.32847946882247925, | |
| "logps/rejected": -0.322710782289505, | |
| "loss": 114.2576, | |
| "rewards/accuracies": 0.38749998807907104, | |
| "rewards/chosen": -0.8211986422538757, | |
| "rewards/margins": -0.014421721920371056, | |
| "rewards/rejected": -0.8067768812179565, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5236441357200107, | |
| "grad_norm": 6299240.513263524, | |
| "learning_rate": 5.448196544517167e-07, | |
| "logits/chosen": -1.1339685916900635, | |
| "logits/rejected": -1.0941574573516846, | |
| "logps/chosen": -0.3057493269443512, | |
| "logps/rejected": -0.3173142373561859, | |
| "loss": 145.971, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.7643733024597168, | |
| "rewards/margins": 0.028912359848618507, | |
| "rewards/rejected": -0.7932857275009155, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.5343307507347048, | |
| "grad_norm": 847488.7365746452, | |
| "learning_rate": 5.26167978121472e-07, | |
| "logits/chosen": -1.1527339220046997, | |
| "logits/rejected": -1.114386796951294, | |
| "logps/chosen": -0.3197785019874573, | |
| "logps/rejected": -0.3439500629901886, | |
| "loss": 2684.4785, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.7994462251663208, | |
| "rewards/margins": 0.060428936034440994, | |
| "rewards/rejected": -0.8598750829696655, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5450173657493989, | |
| "grad_norm": 231777.21031654373, | |
| "learning_rate": 5.074797035076318e-07, | |
| "logits/chosen": -1.0928058624267578, | |
| "logits/rejected": -1.079099416732788, | |
| "logps/chosen": -0.3362935483455658, | |
| "logps/rejected": -0.36446088552474976, | |
| "loss": 94.7642, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.8407338261604309, | |
| "rewards/margins": 0.0704183503985405, | |
| "rewards/rejected": -0.911152184009552, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.555703980764093, | |
| "grad_norm": 147041.27143323392, | |
| "learning_rate": 4.887809678520975e-07, | |
| "logits/chosen": -1.0703433752059937, | |
| "logits/rejected": -1.0498476028442383, | |
| "logps/chosen": -0.29196763038635254, | |
| "logps/rejected": -0.3219326138496399, | |
| "loss": 84.8449, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.7299190759658813, | |
| "rewards/margins": 0.07491237670183182, | |
| "rewards/rejected": -0.8048315048217773, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.566390595778787, | |
| "grad_norm": 435566096526.86523, | |
| "learning_rate": 4.700979230274829e-07, | |
| "logits/chosen": -1.0955275297164917, | |
| "logits/rejected": -1.0940407514572144, | |
| "logps/chosen": -0.3326551914215088, | |
| "logps/rejected": -0.35088759660720825, | |
| "loss": 887.0658, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.831637978553772, | |
| "rewards/margins": 0.04558102414011955, | |
| "rewards/rejected": -0.877219021320343, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.5770772107934812, | |
| "grad_norm": 3520938.773348992, | |
| "learning_rate": 4.514566989613559e-07, | |
| "logits/chosen": -1.086938500404358, | |
| "logits/rejected": -1.0884182453155518, | |
| "logps/chosen": -0.3024354875087738, | |
| "logps/rejected": -0.29867392778396606, | |
| "loss": 228.8071, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.7560887336730957, | |
| "rewards/margins": -0.009403971955180168, | |
| "rewards/rejected": -0.7466848492622375, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.5877638258081752, | |
| "grad_norm": 62256.73172967315, | |
| "learning_rate": 4.328833670911724e-07, | |
| "logits/chosen": -0.9738815426826477, | |
| "logits/rejected": -0.9222286343574524, | |
| "logps/chosen": -0.2884615659713745, | |
| "logps/rejected": -0.3087163269519806, | |
| "loss": 1419.8171, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.721153974533081, | |
| "rewards/margins": 0.05063692852854729, | |
| "rewards/rejected": -0.7717908620834351, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.5984504408228694, | |
| "grad_norm": 803294.007310266, | |
| "learning_rate": 4.144039039010124e-07, | |
| "logits/chosen": -1.1537045240402222, | |
| "logits/rejected": -1.0968632698059082, | |
| "logps/chosen": -0.33216923475265503, | |
| "logps/rejected": -0.2991081774234772, | |
| "loss": 301.7224, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.8304230570793152, | |
| "rewards/margins": -0.08265267312526703, | |
| "rewards/rejected": -0.7477704286575317, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6091370558375635, | |
| "grad_norm": 263927.3164272957, | |
| "learning_rate": 3.960441545911204e-07, | |
| "logits/chosen": -1.044985055923462, | |
| "logits/rejected": -1.0020415782928467, | |
| "logps/chosen": -0.3105274736881256, | |
| "logps/rejected": -0.3335118591785431, | |
| "loss": 104.0724, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.7763187289237976, | |
| "rewards/margins": 0.057460904121398926, | |
| "rewards/rejected": -0.8337796330451965, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.6198236708522575, | |
| "grad_norm": 4071370.4437897406, | |
| "learning_rate": 3.778297969310529e-07, | |
| "logits/chosen": -1.071925163269043, | |
| "logits/rejected": -1.0407798290252686, | |
| "logps/chosen": -0.3102174699306488, | |
| "logps/rejected": -0.33250361680984497, | |
| "loss": 156.4641, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.7755436897277832, | |
| "rewards/margins": 0.055715300142765045, | |
| "rewards/rejected": -0.8312589526176453, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6305102858669517, | |
| "grad_norm": 965178.0635991972, | |
| "learning_rate": 3.5978630534699865e-07, | |
| "logits/chosen": -1.1488522291183472, | |
| "logits/rejected": -1.1228643655776978, | |
| "logps/chosen": -0.3139174282550812, | |
| "logps/rejected": -0.32147642970085144, | |
| "loss": 134.6723, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.784793496131897, | |
| "rewards/margins": 0.01889752224087715, | |
| "rewards/rejected": -0.8036910891532898, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.6411969008816457, | |
| "grad_norm": 20365408.82604466, | |
| "learning_rate": 3.4193891529348795e-07, | |
| "logits/chosen": -1.0468319654464722, | |
| "logits/rejected": -1.0043448209762573, | |
| "logps/chosen": -0.3311859369277954, | |
| "logps/rejected": -0.3362448513507843, | |
| "loss": 120.3211, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.8279649019241333, | |
| "rewards/margins": 0.012647300958633423, | |
| "rewards/rejected": -0.8406121134757996, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6518835158963399, | |
| "grad_norm": 68905.1696103493, | |
| "learning_rate": 3.243125879593286e-07, | |
| "logits/chosen": -1.1107118129730225, | |
| "logits/rejected": -1.0741993188858032, | |
| "logps/chosen": -0.3406161665916443, | |
| "logps/rejected": -0.32983919978141785, | |
| "loss": 118.2299, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.8515404462814331, | |
| "rewards/margins": -0.026942413300275803, | |
| "rewards/rejected": -0.8245980143547058, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.6625701309110339, | |
| "grad_norm": 3378905.921798347, | |
| "learning_rate": 3.069319753571269e-07, | |
| "logits/chosen": -1.069124460220337, | |
| "logits/rejected": -1.110024094581604, | |
| "logps/chosen": -0.3401089012622833, | |
| "logps/rejected": -0.37533271312713623, | |
| "loss": 151.3316, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8502721786499023, | |
| "rewards/margins": 0.08805962651968002, | |
| "rewards/rejected": -0.9383317828178406, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.673256745925728, | |
| "grad_norm": 27112706002.48435, | |
| "learning_rate": 2.898213858452173e-07, | |
| "logits/chosen": -1.2464616298675537, | |
| "logits/rejected": -1.1925503015518188, | |
| "logps/chosen": -0.34163057804107666, | |
| "logps/rejected": -0.34519410133361816, | |
| "loss": 951.2437, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.8540765047073364, | |
| "rewards/margins": 0.00890885479748249, | |
| "rewards/rejected": -0.8629853129386902, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.6839433609404221, | |
| "grad_norm": 34759765.70829812, | |
| "learning_rate": 2.730047501302266e-07, | |
| "logits/chosen": -1.1456632614135742, | |
| "logits/rejected": -1.1187629699707031, | |
| "logps/chosen": -0.3103678226470947, | |
| "logps/rejected": -0.35759711265563965, | |
| "loss": 140.3946, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.7759194374084473, | |
| "rewards/margins": 0.11807328462600708, | |
| "rewards/rejected": -0.8939927816390991, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.6946299759551162, | |
| "grad_norm": 250533.47771917153, | |
| "learning_rate": 2.5650558779781635e-07, | |
| "logits/chosen": -1.0254387855529785, | |
| "logits/rejected": -1.0484802722930908, | |
| "logps/chosen": -0.3333364427089691, | |
| "logps/rejected": -0.3406026363372803, | |
| "loss": 110.2831, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.8333410024642944, | |
| "rewards/margins": 0.01816548779606819, | |
| "rewards/rejected": -0.8515065312385559, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.7053165909698104, | |
| "grad_norm": 112400196.05235167, | |
| "learning_rate": 2.403469744184154e-07, | |
| "logits/chosen": -1.15934157371521, | |
| "logits/rejected": -1.1072492599487305, | |
| "logps/chosen": -0.3412119746208191, | |
| "logps/rejected": -0.37822234630584717, | |
| "loss": 171.9244, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.8530298471450806, | |
| "rewards/margins": 0.09252593666315079, | |
| "rewards/rejected": -0.9455558061599731, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7160032059845044, | |
| "grad_norm": 9446757.557474248, | |
| "learning_rate": 2.2455150927394878e-07, | |
| "logits/chosen": -1.1374088525772095, | |
| "logits/rejected": -1.1560612916946411, | |
| "logps/chosen": -0.3190115988254547, | |
| "logps/rejected": -0.34075072407722473, | |
| "loss": 93.1427, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.7975289225578308, | |
| "rewards/margins": 0.05434788018465042, | |
| "rewards/rejected": -0.8518768548965454, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.7266898209991985, | |
| "grad_norm": 16695168.934409656, | |
| "learning_rate": 2.0914128375069722e-07, | |
| "logits/chosen": -1.0877724885940552, | |
| "logits/rejected": -1.0620936155319214, | |
| "logps/chosen": -0.30620652437210083, | |
| "logps/rejected": -0.33592310547828674, | |
| "loss": 120.7583, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.7655162811279297, | |
| "rewards/margins": 0.07429146766662598, | |
| "rewards/rejected": -0.8398076891899109, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7373764360138926, | |
| "grad_norm": 4144624.6729695094, | |
| "learning_rate": 1.9413785044249676e-07, | |
| "logits/chosen": -1.0486472845077515, | |
| "logits/rejected": -1.0094027519226074, | |
| "logps/chosen": -0.30639034509658813, | |
| "logps/rejected": -0.321176141500473, | |
| "loss": 164.1478, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.765975832939148, | |
| "rewards/margins": 0.03696460276842117, | |
| "rewards/rejected": -0.8029405474662781, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.7480630510285867, | |
| "grad_norm": 23390472.719695035, | |
| "learning_rate": 1.7956219300748792e-07, | |
| "logits/chosen": -1.1522271633148193, | |
| "logits/rejected": -1.1415410041809082, | |
| "logps/chosen": -0.34453052282333374, | |
| "logps/rejected": -0.3794510066509247, | |
| "loss": 555.5947, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.8613262176513672, | |
| "rewards/margins": 0.08730128407478333, | |
| "rewards/rejected": -0.9486274719238281, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.7587496660432808, | |
| "grad_norm": 1280169.965933302, | |
| "learning_rate": 1.6543469682057104e-07, | |
| "logits/chosen": -1.1356937885284424, | |
| "logits/rejected": -1.0751426219940186, | |
| "logps/chosen": -0.3311988413333893, | |
| "logps/rejected": -0.3161237835884094, | |
| "loss": 82.217, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.827997088432312, | |
| "rewards/margins": -0.037687692791223526, | |
| "rewards/rejected": -0.7903094291687012, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.7694362810579749, | |
| "grad_norm": 1956574.339469866, | |
| "learning_rate": 1.5177512046261666e-07, | |
| "logits/chosen": -1.1374176740646362, | |
| "logits/rejected": -1.164374589920044, | |
| "logps/chosen": -0.3486614227294922, | |
| "logps/rejected": -0.3802019953727722, | |
| "loss": 102.3601, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.8716535568237305, | |
| "rewards/margins": 0.07885149866342545, | |
| "rewards/rejected": -0.9505050778388977, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.7801228960726689, | |
| "grad_norm": 84129.78530171402, | |
| "learning_rate": 1.3860256808630427e-07, | |
| "logits/chosen": -1.137064814567566, | |
| "logits/rejected": -1.0832656621932983, | |
| "logps/chosen": -0.32139506936073303, | |
| "logps/rejected": -0.3225245177745819, | |
| "loss": 117.2444, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8034876585006714, | |
| "rewards/margins": 0.0028236303478479385, | |
| "rewards/rejected": -0.806311309337616, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.7908095110873631, | |
| "grad_norm": 4136765.760326951, | |
| "learning_rate": 1.2593546269723647e-07, | |
| "logits/chosen": -1.1603561639785767, | |
| "logits/rejected": -1.1292134523391724, | |
| "logps/chosen": -0.33979296684265137, | |
| "logps/rejected": -0.35157322883605957, | |
| "loss": 106.6815, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.8494824171066284, | |
| "rewards/margins": 0.029450654983520508, | |
| "rewards/rejected": -0.8789331316947937, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8014961261020572, | |
| "grad_norm": 14999761.486437708, | |
| "learning_rate": 1.1379152038770029e-07, | |
| "logits/chosen": -1.1542747020721436, | |
| "logits/rejected": -1.172849416732788, | |
| "logps/chosen": -0.31457456946372986, | |
| "logps/rejected": -0.369545042514801, | |
| "loss": 120.5736, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.7864364385604858, | |
| "rewards/margins": 0.1374262273311615, | |
| "rewards/rejected": -0.923862636089325, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.8121827411167513, | |
| "grad_norm": 244556.56746403236, | |
| "learning_rate": 1.0218772555910954e-07, | |
| "logits/chosen": -1.2167600393295288, | |
| "logits/rejected": -1.1646716594696045, | |
| "logps/chosen": -0.37414881587028503, | |
| "logps/rejected": -0.3576270043849945, | |
| "loss": 113.0907, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.9353721737861633, | |
| "rewards/margins": -0.041304655373096466, | |
| "rewards/rejected": -0.8940675854682922, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8228693561314454, | |
| "grad_norm": 1775127.4439502584, | |
| "learning_rate": 9.114030716778432e-08, | |
| "logits/chosen": -1.1325044631958008, | |
| "logits/rejected": -1.110126256942749, | |
| "logps/chosen": -0.3134748637676239, | |
| "logps/rejected": -0.35259318351745605, | |
| "loss": 114.6353, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.7836871147155762, | |
| "rewards/margins": 0.09779568761587143, | |
| "rewards/rejected": -0.8814828991889954, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.8335559711461394, | |
| "grad_norm": 138946.22290507445, | |
| "learning_rate": 8.066471602728803e-08, | |
| "logits/chosen": -1.220655083656311, | |
| "logits/rejected": -1.1926963329315186, | |
| "logps/chosen": -0.33643871545791626, | |
| "logps/rejected": -0.3400726318359375, | |
| "loss": 205.2888, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.8410967588424683, | |
| "rewards/margins": 0.009084770455956459, | |
| "rewards/rejected": -0.8501815795898438, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8442425861608336, | |
| "grad_norm": 1993392.8238311838, | |
| "learning_rate": 7.077560319906694e-08, | |
| "logits/chosen": -1.0875790119171143, | |
| "logits/rejected": -1.0107152462005615, | |
| "logps/chosen": -0.3748469948768616, | |
| "logps/rejected": -0.380262553691864, | |
| "loss": 150.502, | |
| "rewards/accuracies": 0.4000000059604645, | |
| "rewards/chosen": -0.9371173977851868, | |
| "rewards/margins": 0.013538897037506104, | |
| "rewards/rejected": -0.950656533241272, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.8549292011755276, | |
| "grad_norm": 42068510.12370043, | |
| "learning_rate": 6.148679950161672e-08, | |
| "logits/chosen": -1.1116609573364258, | |
| "logits/rejected": -1.0628454685211182, | |
| "logps/chosen": -0.36634019017219543, | |
| "logps/rejected": -0.35744190216064453, | |
| "loss": 196.6313, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.9158504605293274, | |
| "rewards/margins": -0.022245775908231735, | |
| "rewards/rejected": -0.8936047554016113, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8549292011755276, | |
| "eval_logits/chosen": -1.337195634841919, | |
| "eval_logits/rejected": -1.2981722354888916, | |
| "eval_logps/chosen": -0.3401065170764923, | |
| "eval_logps/rejected": -0.35557428002357483, | |
| "eval_loss": 132.36317443847656, | |
| "eval_rewards/accuracies": 0.5040322542190552, | |
| "eval_rewards/chosen": -0.8502662181854248, | |
| "eval_rewards/margins": 0.038669489324092865, | |
| "eval_rewards/rejected": -0.8889357447624207, | |
| "eval_runtime": 72.0543, | |
| "eval_samples_per_second": 27.216, | |
| "eval_steps_per_second": 0.86, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.8656158161902218, | |
| "grad_norm": 5594982.665070933, | |
| "learning_rate": 5.2811296166831666e-08, | |
| "logits/chosen": -1.1403666734695435, | |
| "logits/rejected": -1.0579333305358887, | |
| "logps/chosen": -0.34073713421821594, | |
| "logps/rejected": -0.33352339267730713, | |
| "loss": 122.7746, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.8518427610397339, | |
| "rewards/margins": -0.01803441345691681, | |
| "rewards/rejected": -0.833808422088623, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.8763024312049158, | |
| "grad_norm": 365339.7208405054, | |
| "learning_rate": 4.4761226670592066e-08, | |
| "logits/chosen": -1.0983816385269165, | |
| "logits/rejected": -1.0836502313613892, | |
| "logps/chosen": -0.33261579275131226, | |
| "logps/rejected": -0.3417048752307892, | |
| "loss": 111.2202, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.831539511680603, | |
| "rewards/margins": 0.022722622379660606, | |
| "rewards/rejected": -0.8542621731758118, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.88698904621961, | |
| "grad_norm": 2850565.0664724754, | |
| "learning_rate": 3.734784976300165e-08, | |
| "logits/chosen": -1.0673637390136719, | |
| "logits/rejected": -1.040725827217102, | |
| "logps/chosen": -0.34171849489212036, | |
| "logps/rejected": -0.348112016916275, | |
| "loss": 191.3689, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.8542962074279785, | |
| "rewards/margins": 0.015983855351805687, | |
| "rewards/rejected": -0.8702800869941711, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.897675661234304, | |
| "grad_norm": 507688.39572092163, | |
| "learning_rate": 3.058153372200695e-08, | |
| "logits/chosen": -1.1354072093963623, | |
| "logits/rejected": -1.1297013759613037, | |
| "logps/chosen": -0.3288155198097229, | |
| "logps/rejected": -0.3461647629737854, | |
| "loss": 112.2704, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.8220387697219849, | |
| "rewards/margins": 0.04337311536073685, | |
| "rewards/rejected": -0.8654119372367859, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9083622762489981, | |
| "grad_norm": 7451525102.30645, | |
| "learning_rate": 2.4471741852423233e-08, | |
| "logits/chosen": -1.0978658199310303, | |
| "logits/rejected": -1.0448986291885376, | |
| "logps/chosen": -0.3529340624809265, | |
| "logps/rejected": -0.35007423162460327, | |
| "loss": 3170.9543, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.8823351860046387, | |
| "rewards/margins": -0.007149559445679188, | |
| "rewards/rejected": -0.8751856088638306, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.9190488912636923, | |
| "grad_norm": 10934704.996058388, | |
| "learning_rate": 1.9027019250647036e-08, | |
| "logits/chosen": -1.1120647192001343, | |
| "logits/rejected": -1.0629384517669678, | |
| "logps/chosen": -0.30386024713516235, | |
| "logps/rejected": -0.31913992762565613, | |
| "loss": 237.2099, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.759650707244873, | |
| "rewards/margins": 0.03819913789629936, | |
| "rewards/rejected": -0.7978497743606567, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9297355062783863, | |
| "grad_norm": 1059239098.6557789, | |
| "learning_rate": 1.4254980853566246e-08, | |
| "logits/chosen": -1.1208689212799072, | |
| "logits/rejected": -1.076522946357727, | |
| "logps/chosen": -0.3260021507740021, | |
| "logps/rejected": -0.32419848442077637, | |
| "loss": 252.7438, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8150054216384888, | |
| "rewards/margins": -0.004509164486080408, | |
| "rewards/rejected": -0.8104962110519409, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.9404221212930804, | |
| "grad_norm": 3163894.8555042273, | |
| "learning_rate": 1.016230078838226e-08, | |
| "logits/chosen": -1.1477479934692383, | |
| "logits/rejected": -1.1257246732711792, | |
| "logps/chosen": -0.3194740116596222, | |
| "logps/rejected": -0.3639461398124695, | |
| "loss": 283.9071, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.7986849546432495, | |
| "rewards/margins": 0.11118029057979584, | |
| "rewards/rejected": -0.9098652601242065, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.9511087363077745, | |
| "grad_norm": 313415.0194399257, | |
| "learning_rate": 6.754703038239329e-09, | |
| "logits/chosen": -1.1323530673980713, | |
| "logits/rejected": -1.0702521800994873, | |
| "logps/chosen": -0.323483407497406, | |
| "logps/rejected": -0.3070305287837982, | |
| "loss": 142.406, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.8087084889411926, | |
| "rewards/margins": -0.041132211685180664, | |
| "rewards/rejected": -0.7675763368606567, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.9617953513224686, | |
| "grad_norm": 312608.20391974325, | |
| "learning_rate": 4.036953436716895e-09, | |
| "logits/chosen": -1.0631930828094482, | |
| "logits/rejected": -1.0676857233047485, | |
| "logps/chosen": -0.34714624285697937, | |
| "logps/rejected": -0.3853607773780823, | |
| "loss": 98.3866, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8678655624389648, | |
| "rewards/margins": 0.09553632885217667, | |
| "rewards/rejected": -0.9634019136428833, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.9724819663371627, | |
| "grad_norm": 98094.25868242758, | |
| "learning_rate": 2.0128530023804656e-09, | |
| "logits/chosen": -1.1627556085586548, | |
| "logits/rejected": -1.131043791770935, | |
| "logps/chosen": -0.34627044200897217, | |
| "logps/rejected": -0.32559770345687866, | |
| "loss": 136.3491, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.8656761050224304, | |
| "rewards/margins": -0.05168183892965317, | |
| "rewards/rejected": -0.813994288444519, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.9831685813518568, | |
| "grad_norm": 25583015.430213835, | |
| "learning_rate": 6.852326227130833e-10, | |
| "logits/chosen": -1.1027119159698486, | |
| "logits/rejected": -1.1237401962280273, | |
| "logps/chosen": -0.33976924419403076, | |
| "logps/rejected": -0.3329155147075653, | |
| "loss": 116.0967, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.8494230508804321, | |
| "rewards/margins": -0.01713428646326065, | |
| "rewards/rejected": -0.8322887420654297, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.9938551963665508, | |
| "grad_norm": 351352.5694241463, | |
| "learning_rate": 5.594909486328348e-11, | |
| "logits/chosen": -1.212425947189331, | |
| "logits/rejected": -1.1084251403808594, | |
| "logps/chosen": -0.3214932084083557, | |
| "logps/rejected": -0.3376317620277405, | |
| "loss": 2736.6258, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.8037330508232117, | |
| "rewards/margins": 0.04034631699323654, | |
| "rewards/rejected": -0.844079315662384, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.9981298423724285, | |
| "step": 467, | |
| "total_flos": 0.0, | |
| "train_loss": 444.38003229635433, | |
| "train_runtime": 7255.1322, | |
| "train_samples_per_second": 8.253, | |
| "train_steps_per_second": 0.064 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 467, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 1000000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": false, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |