| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.753086419753085, | |
| "eval_steps": 500, | |
| "global_step": 600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.3292181069958848, | |
| "grad_norm": 0.5561477541923523, | |
| "learning_rate": 8.333333333333333e-07, | |
| "logits/chosen": 1.7485754489898682, | |
| "logits/rejected": 1.8832639455795288, | |
| "logps/chosen": -70.18267059326172, | |
| "logps/rejected": -77.9986343383789, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": 0.005453853867948055, | |
| "rewards/margins": 0.013218576088547707, | |
| "rewards/rejected": -0.007764720823615789, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6584362139917695, | |
| "grad_norm": 0.48141908645629883, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "logits/chosen": 1.9016907215118408, | |
| "logits/rejected": 1.9251121282577515, | |
| "logps/chosen": -96.5027847290039, | |
| "logps/rejected": -87.00735473632812, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.005797500256448984, | |
| "rewards/margins": -0.0004409264656715095, | |
| "rewards/rejected": -0.005356573965400457, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.9876543209876543, | |
| "grad_norm": 0.4639015197753906, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": 1.7938541173934937, | |
| "logits/rejected": 1.6962993144989014, | |
| "logps/chosen": -71.47590637207031, | |
| "logps/rejected": -66.45989227294922, | |
| "loss": 0.6942, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.004830303601920605, | |
| "rewards/margins": -0.00935445912182331, | |
| "rewards/rejected": 0.004524155054241419, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.316872427983539, | |
| "grad_norm": 0.44931092858314514, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "logits/chosen": 1.8256213665008545, | |
| "logits/rejected": 1.8677200078964233, | |
| "logps/chosen": -75.90711975097656, | |
| "logps/rejected": -76.26548767089844, | |
| "loss": 0.6935, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.004859285429120064, | |
| "rewards/margins": -0.007447429001331329, | |
| "rewards/rejected": 0.0025881435722112656, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.646090534979424, | |
| "grad_norm": 0.512350857257843, | |
| "learning_rate": 4.166666666666667e-06, | |
| "logits/chosen": 1.7572576999664307, | |
| "logits/rejected": 1.7408854961395264, | |
| "logps/chosen": -80.90664672851562, | |
| "logps/rejected": -85.82096862792969, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.00120059447363019, | |
| "rewards/margins": -0.0018891148502007127, | |
| "rewards/rejected": 0.0006885197362862527, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.9753086419753085, | |
| "grad_norm": 0.5260242819786072, | |
| "learning_rate": 5e-06, | |
| "logits/chosen": 1.827275037765503, | |
| "logits/rejected": 1.8168131113052368, | |
| "logps/chosen": -86.74467468261719, | |
| "logps/rejected": -79.20576477050781, | |
| "loss": 0.6937, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": 0.0014693590346723795, | |
| "rewards/margins": -0.0025326632894575596, | |
| "rewards/rejected": 0.00400202302262187, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.3045267489711936, | |
| "grad_norm": 0.5231289267539978, | |
| "learning_rate": 4.995770395678171e-06, | |
| "logits/chosen": 1.7851394414901733, | |
| "logits/rejected": 1.8952877521514893, | |
| "logps/chosen": -81.03253173828125, | |
| "logps/rejected": -88.5263442993164, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.003017458599060774, | |
| "rewards/margins": -0.002731734188273549, | |
| "rewards/rejected": -0.0002857256622519344, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.633744855967078, | |
| "grad_norm": 0.5552361011505127, | |
| "learning_rate": 4.983095894354858e-06, | |
| "logits/chosen": 1.8227930068969727, | |
| "logits/rejected": 1.7752052545547485, | |
| "logps/chosen": -89.98479461669922, | |
| "logps/rejected": -72.01054382324219, | |
| "loss": 0.6928, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": 0.0013810636010020971, | |
| "rewards/margins": -0.0008344938978552818, | |
| "rewards/rejected": 0.0022155570331960917, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.962962962962963, | |
| "grad_norm": 0.603434681892395, | |
| "learning_rate": 4.962019382530521e-06, | |
| "logits/chosen": 1.829049825668335, | |
| "logits/rejected": 1.78665030002594, | |
| "logps/chosen": -81.95549011230469, | |
| "logps/rejected": -76.07003021240234, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": 0.0062006814405322075, | |
| "rewards/margins": 0.003528360743075609, | |
| "rewards/rejected": 0.00267231953330338, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.292181069958848, | |
| "grad_norm": 0.6200206279754639, | |
| "learning_rate": 4.93261217644956e-06, | |
| "logits/chosen": 1.7920262813568115, | |
| "logits/rejected": 1.7817165851593018, | |
| "logps/chosen": -84.39167022705078, | |
| "logps/rejected": -85.04205322265625, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.0063859038054943085, | |
| "rewards/margins": 0.007152262143790722, | |
| "rewards/rejected": -0.0007663581636734307, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.6213991769547325, | |
| "grad_norm": 0.5937617421150208, | |
| "learning_rate": 4.894973780788722e-06, | |
| "logits/chosen": 1.8367125988006592, | |
| "logits/rejected": 1.8536920547485352, | |
| "logps/chosen": -67.41716003417969, | |
| "logps/rejected": -72.06455993652344, | |
| "loss": 0.6891, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.00015740413800813258, | |
| "rewards/margins": 0.013882984407246113, | |
| "rewards/rejected": -0.01404038816690445, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.950617283950617, | |
| "grad_norm": 0.8695696592330933, | |
| "learning_rate": 4.849231551964771e-06, | |
| "logits/chosen": 1.8238048553466797, | |
| "logits/rejected": 1.7972408533096313, | |
| "logps/chosen": -104.34183502197266, | |
| "logps/rejected": -81.703369140625, | |
| "loss": 0.6864, | |
| "rewards/accuracies": 0.6000000238418579, | |
| "rewards/chosen": 0.00020323302305769175, | |
| "rewards/margins": 0.011230283416807652, | |
| "rewards/rejected": -0.011027050204575062, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.279835390946502, | |
| "grad_norm": 0.7833828926086426, | |
| "learning_rate": 4.7955402672006855e-06, | |
| "logits/chosen": 1.7440074682235718, | |
| "logits/rejected": 1.7824723720550537, | |
| "logps/chosen": -92.3200912475586, | |
| "logps/rejected": -85.88248443603516, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": 0.0005388978170230985, | |
| "rewards/margins": 0.011814715340733528, | |
| "rewards/rejected": -0.011275815777480602, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.609053497942387, | |
| "grad_norm": 0.7381640672683716, | |
| "learning_rate": 4.734081600808531e-06, | |
| "logits/chosen": 1.6734364032745361, | |
| "logits/rejected": 1.7311270236968994, | |
| "logps/chosen": -88.11524963378906, | |
| "logps/rejected": -97.87281799316406, | |
| "loss": 0.682, | |
| "rewards/accuracies": 0.637499988079071, | |
| "rewards/chosen": -0.006451706402003765, | |
| "rewards/margins": 0.021939774975180626, | |
| "rewards/rejected": -0.028391480445861816, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.938271604938271, | |
| "grad_norm": 0.7793841361999512, | |
| "learning_rate": 4.665063509461098e-06, | |
| "logits/chosen": 1.8139768838882446, | |
| "logits/rejected": 1.766761064529419, | |
| "logps/chosen": -77.28334045410156, | |
| "logps/rejected": -78.78504943847656, | |
| "loss": 0.6777, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.011237703263759613, | |
| "rewards/margins": 0.03930521756410599, | |
| "rewards/rejected": -0.050542913377285004, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.267489711934156, | |
| "grad_norm": 0.7763285040855408, | |
| "learning_rate": 4.588719528532342e-06, | |
| "logits/chosen": 1.7640300989151, | |
| "logits/rejected": 1.6494834423065186, | |
| "logps/chosen": -91.71728515625, | |
| "logps/rejected": -85.06828308105469, | |
| "loss": 0.6714, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.034448202699422836, | |
| "rewards/margins": 0.02681097947061062, | |
| "rewards/rejected": -0.0612591877579689, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.596707818930041, | |
| "grad_norm": 0.9756498336791992, | |
| "learning_rate": 4.50530798188761e-06, | |
| "logits/chosen": 1.8123546838760376, | |
| "logits/rejected": 1.7651164531707764, | |
| "logps/chosen": -95.57429504394531, | |
| "logps/rejected": -83.24696350097656, | |
| "loss": 0.6667, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.02284305915236473, | |
| "rewards/margins": 0.04916313290596008, | |
| "rewards/rejected": -0.07200618833303452, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.925925925925926, | |
| "grad_norm": 0.8213481903076172, | |
| "learning_rate": 4.415111107797445e-06, | |
| "logits/chosen": 1.7565195560455322, | |
| "logits/rejected": 1.8321483135223389, | |
| "logps/chosen": -90.69538879394531, | |
| "logps/rejected": -77.74930572509766, | |
| "loss": 0.6651, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.030663728713989258, | |
| "rewards/margins": 0.07017870247364044, | |
| "rewards/rejected": -0.1008424386382103, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.255144032921811, | |
| "grad_norm": 1.067845344543457, | |
| "learning_rate": 4.318434103932622e-06, | |
| "logits/chosen": 1.7998554706573486, | |
| "logits/rejected": 1.7774969339370728, | |
| "logps/chosen": -86.76210021972656, | |
| "logps/rejected": -86.25608825683594, | |
| "loss": 0.66, | |
| "rewards/accuracies": 0.6499999761581421, | |
| "rewards/chosen": -0.07478730380535126, | |
| "rewards/margins": 0.07410295307636261, | |
| "rewards/rejected": -0.14889024198055267, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.584362139917696, | |
| "grad_norm": 0.905776858329773, | |
| "learning_rate": 4.215604094671835e-06, | |
| "logits/chosen": 1.8654216527938843, | |
| "logits/rejected": 1.8751890659332275, | |
| "logps/chosen": -74.4819107055664, | |
| "logps/rejected": -85.82235717773438, | |
| "loss": 0.654, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.08698664605617523, | |
| "rewards/margins": 0.06426878273487091, | |
| "rewards/rejected": -0.15125542879104614, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.91358024691358, | |
| "grad_norm": 0.9491797685623169, | |
| "learning_rate": 4.106969024216348e-06, | |
| "logits/chosen": 1.7801555395126343, | |
| "logits/rejected": 1.8760993480682373, | |
| "logps/chosen": -88.22879028320312, | |
| "logps/rejected": -91.16268157958984, | |
| "loss": 0.6381, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.05148346349596977, | |
| "rewards/margins": 0.13421732187271118, | |
| "rewards/rejected": -0.18570080399513245, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.242798353909465, | |
| "grad_norm": 1.0432075262069702, | |
| "learning_rate": 3.992896479256966e-06, | |
| "logits/chosen": 1.761461853981018, | |
| "logits/rejected": 1.7890026569366455, | |
| "logps/chosen": -76.50201416015625, | |
| "logps/rejected": -83.002685546875, | |
| "loss": 0.6387, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.1096138209104538, | |
| "rewards/margins": 0.13470278680324554, | |
| "rewards/rejected": -0.24431662261486053, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.57201646090535, | |
| "grad_norm": 0.9662573933601379, | |
| "learning_rate": 3.8737724451770155e-06, | |
| "logits/chosen": 1.7268747091293335, | |
| "logits/rejected": 1.8781719207763672, | |
| "logps/chosen": -85.2467269897461, | |
| "logps/rejected": -88.09255981445312, | |
| "loss": 0.6253, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.09576521068811417, | |
| "rewards/margins": 0.19639071822166443, | |
| "rewards/rejected": -0.292155921459198, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 7.901234567901234, | |
| "grad_norm": 1.0413649082183838, | |
| "learning_rate": 3.7500000000000005e-06, | |
| "logits/chosen": 1.8745641708374023, | |
| "logits/rejected": 1.9019807577133179, | |
| "logps/chosen": -82.0667724609375, | |
| "logps/rejected": -70.83448791503906, | |
| "loss": 0.627, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.1119164451956749, | |
| "rewards/margins": 0.1671919822692871, | |
| "rewards/rejected": -0.2791084349155426, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.23045267489712, | |
| "grad_norm": 1.1005665063858032, | |
| "learning_rate": 3.621997950501156e-06, | |
| "logits/chosen": 1.8319737911224365, | |
| "logits/rejected": 1.8594881296157837, | |
| "logps/chosen": -82.2616195678711, | |
| "logps/rejected": -94.71448516845703, | |
| "loss": 0.6196, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.21282243728637695, | |
| "rewards/margins": 0.11685125529766083, | |
| "rewards/rejected": -0.3296736776828766, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.559670781893004, | |
| "grad_norm": 1.0339275598526, | |
| "learning_rate": 3.4901994150978926e-06, | |
| "logits/chosen": 1.8168909549713135, | |
| "logits/rejected": 1.8122689723968506, | |
| "logps/chosen": -86.70075988769531, | |
| "logps/rejected": -87.91035461425781, | |
| "loss": 0.6097, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.21122360229492188, | |
| "rewards/margins": 0.1889980137348175, | |
| "rewards/rejected": -0.40022164583206177, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 8.88888888888889, | |
| "grad_norm": 1.0026566982269287, | |
| "learning_rate": 3.3550503583141726e-06, | |
| "logits/chosen": 1.7882308959960938, | |
| "logits/rejected": 1.7757456302642822, | |
| "logps/chosen": -98.28022766113281, | |
| "logps/rejected": -99.22371673583984, | |
| "loss": 0.6042, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.23864057660102844, | |
| "rewards/margins": 0.17245283722877502, | |
| "rewards/rejected": -0.4110933840274811, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.218106995884774, | |
| "grad_norm": 0.9841915965080261, | |
| "learning_rate": 3.217008081777726e-06, | |
| "logits/chosen": 1.8728599548339844, | |
| "logits/rejected": 1.7537968158721924, | |
| "logps/chosen": -110.29930114746094, | |
| "logps/rejected": -76.87739562988281, | |
| "loss": 0.5986, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.16357959806919098, | |
| "rewards/margins": 0.30878886580467224, | |
| "rewards/rejected": -0.47236841917037964, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 9.547325102880658, | |
| "grad_norm": 1.0610634088516235, | |
| "learning_rate": 3.0765396768561005e-06, | |
| "logits/chosen": 1.8760344982147217, | |
| "logits/rejected": 1.8611949682235718, | |
| "logps/chosen": -83.32715606689453, | |
| "logps/rejected": -91.76313781738281, | |
| "loss": 0.5953, | |
| "rewards/accuracies": 0.7250000238418579, | |
| "rewards/chosen": -0.24952685832977295, | |
| "rewards/margins": 0.23791106045246124, | |
| "rewards/rejected": -0.4874378740787506, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 9.876543209876543, | |
| "grad_norm": 1.0584696531295776, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "logits/chosen": 1.713905692100525, | |
| "logits/rejected": 1.761605978012085, | |
| "logps/chosen": -84.4806900024414, | |
| "logps/rejected": -84.85515594482422, | |
| "loss": 0.5853, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.3275205194950104, | |
| "rewards/margins": 0.20603664219379425, | |
| "rewards/rejected": -0.5335571765899658, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.205761316872428, | |
| "grad_norm": 1.2500941753387451, | |
| "learning_rate": 2.7902322853130758e-06, | |
| "logits/chosen": 1.70700204372406, | |
| "logits/rejected": 1.7708561420440674, | |
| "logps/chosen": -83.38414001464844, | |
| "logps/rejected": -84.97639465332031, | |
| "loss": 0.5889, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.2745892405509949, | |
| "rewards/margins": 0.3200107514858246, | |
| "rewards/rejected": -0.5945999622344971, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 10.534979423868313, | |
| "grad_norm": 1.0091391801834106, | |
| "learning_rate": 2.6453620722761897e-06, | |
| "logits/chosen": 1.656237006187439, | |
| "logits/rejected": 1.7875343561172485, | |
| "logps/chosen": -76.54080200195312, | |
| "logps/rejected": -99.77791595458984, | |
| "loss": 0.5805, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.2655033469200134, | |
| "rewards/margins": 0.23930945992469788, | |
| "rewards/rejected": -0.5048128366470337, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 10.864197530864198, | |
| "grad_norm": 1.280364990234375, | |
| "learning_rate": 2.5e-06, | |
| "logits/chosen": 1.7501579523086548, | |
| "logits/rejected": 1.8710143566131592, | |
| "logps/chosen": -95.80509948730469, | |
| "logps/rejected": -98.95433044433594, | |
| "loss": 0.5681, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.25459492206573486, | |
| "rewards/margins": 0.4196421504020691, | |
| "rewards/rejected": -0.674237072467804, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 11.193415637860083, | |
| "grad_norm": 1.0981961488723755, | |
| "learning_rate": 2.3546379277238107e-06, | |
| "logits/chosen": 1.781877875328064, | |
| "logits/rejected": 1.8056217432022095, | |
| "logps/chosen": -80.20987701416016, | |
| "logps/rejected": -94.3116226196289, | |
| "loss": 0.5684, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.39583852887153625, | |
| "rewards/margins": 0.2972935140132904, | |
| "rewards/rejected": -0.6931320428848267, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 11.522633744855966, | |
| "grad_norm": 1.1398284435272217, | |
| "learning_rate": 2.2097677146869242e-06, | |
| "logits/chosen": 1.7265570163726807, | |
| "logits/rejected": 1.7091245651245117, | |
| "logps/chosen": -75.80311584472656, | |
| "logps/rejected": -79.35089111328125, | |
| "loss": 0.5656, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.3116758465766907, | |
| "rewards/margins": 0.42105427384376526, | |
| "rewards/rejected": -0.7327300906181335, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 11.851851851851851, | |
| "grad_norm": 1.1577041149139404, | |
| "learning_rate": 2.0658795558326745e-06, | |
| "logits/chosen": 1.692983865737915, | |
| "logits/rejected": 1.7239530086517334, | |
| "logps/chosen": -91.54319763183594, | |
| "logps/rejected": -93.06980895996094, | |
| "loss": 0.5699, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.38909879326820374, | |
| "rewards/margins": 0.34535473585128784, | |
| "rewards/rejected": -0.734453558921814, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 12.181069958847736, | |
| "grad_norm": 1.405211329460144, | |
| "learning_rate": 1.9234603231439e-06, | |
| "logits/chosen": 1.7344582080841064, | |
| "logits/rejected": 1.7111711502075195, | |
| "logps/chosen": -88.46275329589844, | |
| "logps/rejected": -82.77845001220703, | |
| "loss": 0.5577, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.3398984372615814, | |
| "rewards/margins": 0.4276389181613922, | |
| "rewards/rejected": -0.7675372958183289, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 12.510288065843621, | |
| "grad_norm": 1.4324347972869873, | |
| "learning_rate": 1.7829919182222752e-06, | |
| "logits/chosen": 1.7103168964385986, | |
| "logits/rejected": 1.7571923732757568, | |
| "logps/chosen": -83.42256927490234, | |
| "logps/rejected": -95.48295593261719, | |
| "loss": 0.5493, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.39600270986557007, | |
| "rewards/margins": 0.447258323431015, | |
| "rewards/rejected": -0.8432610630989075, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 12.839506172839506, | |
| "grad_norm": 1.1979031562805176, | |
| "learning_rate": 1.6449496416858285e-06, | |
| "logits/chosen": 1.6545474529266357, | |
| "logits/rejected": 1.7649863958358765, | |
| "logps/chosen": -78.60997772216797, | |
| "logps/rejected": -91.09608459472656, | |
| "loss": 0.5618, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.3745049834251404, | |
| "rewards/margins": 0.4601859450340271, | |
| "rewards/rejected": -0.8346909284591675, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 13.168724279835391, | |
| "grad_norm": 1.164794683456421, | |
| "learning_rate": 1.509800584902108e-06, | |
| "logits/chosen": 1.7000430822372437, | |
| "logits/rejected": 1.7276875972747803, | |
| "logps/chosen": -90.08226013183594, | |
| "logps/rejected": -82.01007843017578, | |
| "loss": 0.5517, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.3675037622451782, | |
| "rewards/margins": 0.44133225083351135, | |
| "rewards/rejected": -0.8088359832763672, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 13.497942386831276, | |
| "grad_norm": 1.111011028289795, | |
| "learning_rate": 1.3780020494988447e-06, | |
| "logits/chosen": 1.5840178728103638, | |
| "logits/rejected": 1.7387195825576782, | |
| "logps/chosen": -88.83552551269531, | |
| "logps/rejected": -125.14837646484375, | |
| "loss": 0.5549, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.4572484493255615, | |
| "rewards/margins": 0.3457504212856293, | |
| "rewards/rejected": -0.8029988408088684, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 13.82716049382716, | |
| "grad_norm": 1.257519006729126, | |
| "learning_rate": 1.2500000000000007e-06, | |
| "logits/chosen": 1.6768709421157837, | |
| "logits/rejected": 1.6739391088485718, | |
| "logps/chosen": -88.93721008300781, | |
| "logps/rejected": -79.91072082519531, | |
| "loss": 0.5387, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.3928380012512207, | |
| "rewards/margins": 0.37974029779434204, | |
| "rewards/rejected": -0.7725783586502075, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 14.156378600823045, | |
| "grad_norm": 1.1822686195373535, | |
| "learning_rate": 1.1262275548229852e-06, | |
| "logits/chosen": 1.664902925491333, | |
| "logits/rejected": 1.75554621219635, | |
| "logps/chosen": -86.20747375488281, | |
| "logps/rejected": -88.08210754394531, | |
| "loss": 0.5424, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.4149021506309509, | |
| "rewards/margins": 0.4194130003452301, | |
| "rewards/rejected": -0.8343151211738586, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 14.48559670781893, | |
| "grad_norm": 1.1221873760223389, | |
| "learning_rate": 1.0071035207430352e-06, | |
| "logits/chosen": 1.6271368265151978, | |
| "logits/rejected": 1.7387148141860962, | |
| "logps/chosen": -73.69468688964844, | |
| "logps/rejected": -94.08692932128906, | |
| "loss": 0.5539, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.38762181997299194, | |
| "rewards/margins": 0.32877033948898315, | |
| "rewards/rejected": -0.7163921594619751, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 14.814814814814815, | |
| "grad_norm": 1.1013416051864624, | |
| "learning_rate": 8.930309757836517e-07, | |
| "logits/chosen": 1.711073637008667, | |
| "logits/rejected": 1.7389122247695923, | |
| "logps/chosen": -95.46138763427734, | |
| "logps/rejected": -89.34103393554688, | |
| "loss": 0.5277, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.37107351422309875, | |
| "rewards/margins": 0.5281612277030945, | |
| "rewards/rejected": -0.8992347717285156, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 15.1440329218107, | |
| "grad_norm": 1.1581361293792725, | |
| "learning_rate": 7.843959053281663e-07, | |
| "logits/chosen": 1.5964223146438599, | |
| "logits/rejected": 1.6815162897109985, | |
| "logps/chosen": -84.13243103027344, | |
| "logps/rejected": -86.10100555419922, | |
| "loss": 0.5482, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.48493900895118713, | |
| "rewards/margins": 0.46702641248703003, | |
| "rewards/rejected": -0.9519654512405396, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 15.473251028806585, | |
| "grad_norm": 1.1499443054199219, | |
| "learning_rate": 6.815658960673782e-07, | |
| "logits/chosen": 1.761904001235962, | |
| "logits/rejected": 1.7722113132476807, | |
| "logps/chosen": -83.77279663085938, | |
| "logps/rejected": -81.67822265625, | |
| "loss": 0.5428, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.439403772354126, | |
| "rewards/margins": 0.3410206437110901, | |
| "rewards/rejected": -0.7804244160652161, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 15.802469135802468, | |
| "grad_norm": 1.3699110746383667, | |
| "learning_rate": 5.848888922025553e-07, | |
| "logits/chosen": 1.7152798175811768, | |
| "logits/rejected": 1.7187302112579346, | |
| "logps/chosen": -91.2159652709961, | |
| "logps/rejected": -82.41645812988281, | |
| "loss": 0.5314, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.41314107179641724, | |
| "rewards/margins": 0.44486457109451294, | |
| "rewards/rejected": -0.858005702495575, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 16.131687242798353, | |
| "grad_norm": 1.2674903869628906, | |
| "learning_rate": 4.946920181123904e-07, | |
| "logits/chosen": 1.7741254568099976, | |
| "logits/rejected": 1.7767646312713623, | |
| "logps/chosen": -97.18695831298828, | |
| "logps/rejected": -101.12361907958984, | |
| "loss": 0.5384, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.4852083623409271, | |
| "rewards/margins": 0.3798518776893616, | |
| "rewards/rejected": -0.8650602102279663, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 16.46090534979424, | |
| "grad_norm": 1.2736115455627441, | |
| "learning_rate": 4.1128047146765936e-07, | |
| "logits/chosen": 1.7331253290176392, | |
| "logits/rejected": 1.8541399240493774, | |
| "logps/chosen": -89.29019165039062, | |
| "logps/rejected": -92.32213592529297, | |
| "loss": 0.5441, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.5169966816902161, | |
| "rewards/margins": 0.4245742857456207, | |
| "rewards/rejected": -0.9415708780288696, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 16.790123456790123, | |
| "grad_norm": 1.735156536102295, | |
| "learning_rate": 3.3493649053890325e-07, | |
| "logits/chosen": 1.6830623149871826, | |
| "logits/rejected": 1.6669334173202515, | |
| "logps/chosen": -106.5848388671875, | |
| "logps/rejected": -88.74864196777344, | |
| "loss": 0.5233, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.43169134855270386, | |
| "rewards/margins": 0.38342052698135376, | |
| "rewards/rejected": -0.8151118159294128, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 17.11934156378601, | |
| "grad_norm": 1.1315010786056519, | |
| "learning_rate": 2.6591839919146963e-07, | |
| "logits/chosen": 1.6711467504501343, | |
| "logits/rejected": 1.7918866872787476, | |
| "logps/chosen": -79.41645050048828, | |
| "logps/rejected": -94.12269592285156, | |
| "loss": 0.5301, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": -0.4840959906578064, | |
| "rewards/margins": 0.4427928030490875, | |
| "rewards/rejected": -0.9268887639045715, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 17.448559670781894, | |
| "grad_norm": 1.1289525032043457, | |
| "learning_rate": 2.044597327993153e-07, | |
| "logits/chosen": 1.6908950805664062, | |
| "logits/rejected": 1.7840102910995483, | |
| "logps/chosen": -81.63914489746094, | |
| "logps/rejected": -87.42573547363281, | |
| "loss": 0.5401, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.4083719849586487, | |
| "rewards/margins": 0.46273595094680786, | |
| "rewards/rejected": -0.8711079359054565, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 17.77777777777778, | |
| "grad_norm": 1.6032687425613403, | |
| "learning_rate": 1.507684480352292e-07, | |
| "logits/chosen": 1.5787999629974365, | |
| "logits/rejected": 1.7288663387298584, | |
| "logps/chosen": -79.30626678466797, | |
| "logps/rejected": -110.68818664550781, | |
| "loss": 0.5326, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.449043333530426, | |
| "rewards/margins": 0.5290273427963257, | |
| "rewards/rejected": -0.9780707359313965, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 18.106995884773664, | |
| "grad_norm": 1.1730103492736816, | |
| "learning_rate": 1.0502621921127776e-07, | |
| "logits/chosen": 1.7219009399414062, | |
| "logits/rejected": 1.7316343784332275, | |
| "logps/chosen": -89.55079650878906, | |
| "logps/rejected": -91.233642578125, | |
| "loss": 0.5312, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.42370209097862244, | |
| "rewards/margins": 0.44324246048927307, | |
| "rewards/rejected": -0.8669446110725403, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 18.43621399176955, | |
| "grad_norm": 1.4936864376068115, | |
| "learning_rate": 6.738782355044048e-08, | |
| "logits/chosen": 1.7580926418304443, | |
| "logits/rejected": 1.8186404705047607, | |
| "logps/chosen": -70.40943908691406, | |
| "logps/rejected": -91.91678619384766, | |
| "loss": 0.5362, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.42562800645828247, | |
| "rewards/margins": 0.34529608488082886, | |
| "rewards/rejected": -0.7709239721298218, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 18.765432098765434, | |
| "grad_norm": 1.1581302881240845, | |
| "learning_rate": 3.798061746947995e-08, | |
| "logits/chosen": 1.625957727432251, | |
| "logits/rejected": 1.6400096416473389, | |
| "logps/chosen": -95.5112533569336, | |
| "logps/rejected": -83.62013244628906, | |
| "loss": 0.5241, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.3977498412132263, | |
| "rewards/margins": 0.4945148527622223, | |
| "rewards/rejected": -0.8922646641731262, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 19.094650205761315, | |
| "grad_norm": 1.129463791847229, | |
| "learning_rate": 1.6904105645142443e-08, | |
| "logits/chosen": 1.6559406518936157, | |
| "logits/rejected": 1.744818091392517, | |
| "logps/chosen": -83.44347381591797, | |
| "logps/rejected": -98.82951354980469, | |
| "loss": 0.5302, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.4388834834098816, | |
| "rewards/margins": 0.4895119071006775, | |
| "rewards/rejected": -0.9283954501152039, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 19.4238683127572, | |
| "grad_norm": 1.193711519241333, | |
| "learning_rate": 4.229604321829561e-09, | |
| "logits/chosen": 1.818708062171936, | |
| "logits/rejected": 1.714835524559021, | |
| "logps/chosen": -118.84761047363281, | |
| "logps/rejected": -82.6863784790039, | |
| "loss": 0.5319, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.3866303563117981, | |
| "rewards/margins": 0.44671958684921265, | |
| "rewards/rejected": -0.8333500027656555, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 19.753086419753085, | |
| "grad_norm": 1.4505993127822876, | |
| "learning_rate": 0.0, | |
| "logits/chosen": 1.70465087890625, | |
| "logits/rejected": 1.680711030960083, | |
| "logps/chosen": -107.31657409667969, | |
| "logps/rejected": -92.33575439453125, | |
| "loss": 0.5352, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.4541262984275818, | |
| "rewards/margins": 0.4393085539340973, | |
| "rewards/rejected": -0.8934348821640015, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 19.753086419753085, | |
| "step": 600, | |
| "total_flos": 2.0386871270503875e+18, | |
| "train_loss": 0.6034941236178081, | |
| "train_runtime": 4431.3984, | |
| "train_samples_per_second": 8.747, | |
| "train_steps_per_second": 0.135 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 600, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0386871270503875e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |