| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 19.502092050209207, | |
| "eval_steps": 500, | |
| "global_step": 580, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.33472803347280333, | |
| "grad_norm": 3.9892160892486572, | |
| "learning_rate": 8.620689655172415e-07, | |
| "logits/chosen": -2.315223217010498, | |
| "logits/rejected": -2.3654401302337646, | |
| "logps/chosen": -65.86729431152344, | |
| "logps/rejected": -77.53572845458984, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.3499999940395355, | |
| "rewards/chosen": 0.0023138518445193768, | |
| "rewards/margins": -0.001122759305872023, | |
| "rewards/rejected": 0.0034366101026535034, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6694560669456067, | |
| "grad_norm": 3.5659756660461426, | |
| "learning_rate": 1.724137931034483e-06, | |
| "logits/chosen": -2.341399669647217, | |
| "logits/rejected": -2.3567094802856445, | |
| "logps/chosen": -66.60242462158203, | |
| "logps/rejected": -69.70094299316406, | |
| "loss": 0.6929, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.0013719359412789345, | |
| "rewards/margins": -0.0035000313073396683, | |
| "rewards/rejected": 0.002128095831722021, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.00418410041841, | |
| "grad_norm": 4.912586688995361, | |
| "learning_rate": 2.5862068965517246e-06, | |
| "logits/chosen": -2.3429622650146484, | |
| "logits/rejected": -2.3658394813537598, | |
| "logps/chosen": -71.6301040649414, | |
| "logps/rejected": -78.41346740722656, | |
| "loss": 0.6938, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": 0.003577103139832616, | |
| "rewards/margins": 0.00785654503852129, | |
| "rewards/rejected": -0.004279441200196743, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.3389121338912133, | |
| "grad_norm": 4.810107707977295, | |
| "learning_rate": 3.448275862068966e-06, | |
| "logits/chosen": -2.3610458374023438, | |
| "logits/rejected": -2.3885395526885986, | |
| "logps/chosen": -66.8291244506836, | |
| "logps/rejected": -62.15415573120117, | |
| "loss": 0.6893, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -6.734435737598687e-05, | |
| "rewards/margins": 0.006865750066936016, | |
| "rewards/rejected": -0.0069330958649516106, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.6736401673640167, | |
| "grad_norm": 4.670071125030518, | |
| "learning_rate": 4.310344827586207e-06, | |
| "logits/chosen": -2.304999351501465, | |
| "logits/rejected": -2.335301399230957, | |
| "logps/chosen": -75.09913635253906, | |
| "logps/rejected": -77.72399139404297, | |
| "loss": 0.6878, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": 0.003225918160751462, | |
| "rewards/margins": 0.010454346425831318, | |
| "rewards/rejected": -0.007228427566587925, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.00836820083682, | |
| "grad_norm": 4.2342000007629395, | |
| "learning_rate": 4.999818897894192e-06, | |
| "logits/chosen": -2.363574504852295, | |
| "logits/rejected": -2.363882064819336, | |
| "logps/chosen": -62.84125900268555, | |
| "logps/rejected": -61.92932891845703, | |
| "loss": 0.6855, | |
| "rewards/accuracies": 0.6625000238418579, | |
| "rewards/chosen": -0.0032769464887678623, | |
| "rewards/margins": 0.02090405486524105, | |
| "rewards/rejected": -0.024181004613637924, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.3430962343096233, | |
| "grad_norm": 4.369245529174805, | |
| "learning_rate": 4.9934830787948756e-06, | |
| "logits/chosen": -2.378016948699951, | |
| "logits/rejected": -2.373137950897217, | |
| "logps/chosen": -74.67327880859375, | |
| "logps/rejected": -69.20399475097656, | |
| "loss": 0.668, | |
| "rewards/accuracies": 0.7875000238418579, | |
| "rewards/chosen": -0.0003526444488670677, | |
| "rewards/margins": 0.04865006357431412, | |
| "rewards/rejected": -0.04900271072983742, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.6778242677824267, | |
| "grad_norm": 4.444687366485596, | |
| "learning_rate": 4.978118375700895e-06, | |
| "logits/chosen": -2.3403103351593018, | |
| "logits/rejected": -2.370321273803711, | |
| "logps/chosen": -77.29728698730469, | |
| "logps/rejected": -85.79756164550781, | |
| "loss": 0.6566, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": 0.0051120575517416, | |
| "rewards/margins": 0.09415190666913986, | |
| "rewards/rejected": -0.08903985470533371, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.01255230125523, | |
| "grad_norm": 4.876573085784912, | |
| "learning_rate": 4.953780424089803e-06, | |
| "logits/chosen": -2.3614611625671387, | |
| "logits/rejected": -2.385697841644287, | |
| "logps/chosen": -73.22442626953125, | |
| "logps/rejected": -82.25682067871094, | |
| "loss": 0.645, | |
| "rewards/accuracies": 0.800000011920929, | |
| "rewards/chosen": -0.016868198290467262, | |
| "rewards/margins": 0.10679063946008682, | |
| "rewards/rejected": -0.12365883588790894, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.3472803347280333, | |
| "grad_norm": 4.355966567993164, | |
| "learning_rate": 4.920557351506409e-06, | |
| "logits/chosen": -2.323256254196167, | |
| "logits/rejected": -2.341057300567627, | |
| "logps/chosen": -78.37105560302734, | |
| "logps/rejected": -86.8406982421875, | |
| "loss": 0.6072, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.015012519434094429, | |
| "rewards/margins": 0.20561759173870087, | |
| "rewards/rejected": -0.22063009440898895, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.7698744769874475, | |
| "grad_norm": 4.361391067504883, | |
| "learning_rate": 4.878569458453592e-06, | |
| "logits/chosen": -2.3163838386535645, | |
| "logits/rejected": -2.3566031455993652, | |
| "logps/chosen": -83.33145904541016, | |
| "logps/rejected": -96.48517608642578, | |
| "loss": 0.5908, | |
| "rewards/accuracies": 0.8374999761581421, | |
| "rewards/chosen": -0.08870697021484375, | |
| "rewards/margins": 0.24879300594329834, | |
| "rewards/rejected": -0.3374999761581421, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.104602510460251, | |
| "grad_norm": 4.315061569213867, | |
| "learning_rate": 4.827968782785062e-06, | |
| "logits/chosen": -2.3728129863739014, | |
| "logits/rejected": -2.3889667987823486, | |
| "logps/chosen": -73.0484619140625, | |
| "logps/rejected": -73.4913558959961, | |
| "loss": 0.5783, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.0605628564953804, | |
| "rewards/margins": 0.2945060133934021, | |
| "rewards/rejected": -0.3550689220428467, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.439330543933054, | |
| "grad_norm": 4.438860893249512, | |
| "learning_rate": 4.7689385491773934e-06, | |
| "logits/chosen": -2.3526523113250732, | |
| "logits/rejected": -2.364795684814453, | |
| "logps/chosen": -67.69630432128906, | |
| "logps/rejected": -84.85731506347656, | |
| "loss": 0.5338, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.1054786667227745, | |
| "rewards/margins": 0.4161924421787262, | |
| "rewards/rejected": -0.5216711759567261, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.7740585774058575, | |
| "grad_norm": 4.5405473709106445, | |
| "learning_rate": 4.70169250567482e-06, | |
| "logits/chosen": -2.3756489753723145, | |
| "logits/rejected": -2.374919891357422, | |
| "logps/chosen": -68.5466079711914, | |
| "logps/rejected": -76.15412902832031, | |
| "loss": 0.5215, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.16213981807231903, | |
| "rewards/margins": 0.47565969824790955, | |
| "rewards/rejected": -0.6377995610237122, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 5.108786610878661, | |
| "grad_norm": 4.596691608428955, | |
| "learning_rate": 4.626474149709127e-06, | |
| "logits/chosen": -2.428659439086914, | |
| "logits/rejected": -2.4141571521759033, | |
| "logps/chosen": -78.08479309082031, | |
| "logps/rejected": -68.3617172241211, | |
| "loss": 0.5019, | |
| "rewards/accuracies": 0.8500000238418579, | |
| "rewards/chosen": -0.20662447810173035, | |
| "rewards/margins": 0.4026559889316559, | |
| "rewards/rejected": -0.6092804670333862, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.443514644351464, | |
| "grad_norm": 4.364648818969727, | |
| "learning_rate": 4.54355584639723e-06, | |
| "logits/chosen": -2.408982992172241, | |
| "logits/rejected": -2.4170727729797363, | |
| "logps/chosen": -81.3556900024414, | |
| "logps/rejected": -86.85897064208984, | |
| "loss": 0.4586, | |
| "rewards/accuracies": 0.824999988079071, | |
| "rewards/chosen": -0.23941104114055634, | |
| "rewards/margins": 0.675674319267273, | |
| "rewards/rejected": -0.9150853157043457, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.7782426778242675, | |
| "grad_norm": 5.241800308227539, | |
| "learning_rate": 4.45323784230908e-06, | |
| "logits/chosen": -2.4194908142089844, | |
| "logits/rejected": -2.4498963356018066, | |
| "logps/chosen": -62.32392120361328, | |
| "logps/rejected": -76.39479064941406, | |
| "loss": 0.4442, | |
| "rewards/accuracies": 0.887499988079071, | |
| "rewards/chosen": -0.26662638783454895, | |
| "rewards/margins": 0.6662653088569641, | |
| "rewards/rejected": -0.9328916668891907, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 6.112970711297071, | |
| "grad_norm": 4.73954439163208, | |
| "learning_rate": 4.355847178277025e-06, | |
| "logits/chosen": -2.4365036487579346, | |
| "logits/rejected": -2.435439348220825, | |
| "logps/chosen": -73.06513977050781, | |
| "logps/rejected": -81.04569244384766, | |
| "loss": 0.4355, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.37924182415008545, | |
| "rewards/margins": 0.7773979902267456, | |
| "rewards/rejected": -1.156639814376831, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.447698744769874, | |
| "grad_norm": 5.250921726226807, | |
| "learning_rate": 4.2517365051833564e-06, | |
| "logits/chosen": -2.387922525405884, | |
| "logits/rejected": -2.3835678100585938, | |
| "logps/chosen": -64.85784912109375, | |
| "logps/rejected": -90.08439636230469, | |
| "loss": 0.3719, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.42228370904922485, | |
| "rewards/margins": 1.0562283992767334, | |
| "rewards/rejected": -1.478512167930603, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.7824267782426775, | |
| "grad_norm": 5.088508129119873, | |
| "learning_rate": 4.141282807014034e-06, | |
| "logits/chosen": -2.376319169998169, | |
| "logits/rejected": -2.3985953330993652, | |
| "logps/chosen": -70.64585876464844, | |
| "logps/rejected": -89.17048645019531, | |
| "loss": 0.3829, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.5233972072601318, | |
| "rewards/margins": 1.1063960790634155, | |
| "rewards/rejected": -1.629793405532837, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 7.117154811715481, | |
| "grad_norm": 4.6062092781066895, | |
| "learning_rate": 4.024886035802432e-06, | |
| "logits/chosen": -2.371851682662964, | |
| "logits/rejected": -2.3844287395477295, | |
| "logps/chosen": -74.63328552246094, | |
| "logps/rejected": -97.81452178955078, | |
| "loss": 0.3522, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -0.6278538703918457, | |
| "rewards/margins": 1.2317354679107666, | |
| "rewards/rejected": -1.8595889806747437, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.451882845188284, | |
| "grad_norm": 5.105669021606445, | |
| "learning_rate": 3.9029676634059565e-06, | |
| "logits/chosen": -2.4011385440826416, | |
| "logits/rejected": -2.4039382934570312, | |
| "logps/chosen": -75.92952728271484, | |
| "logps/rejected": -78.41490936279297, | |
| "loss": 0.3219, | |
| "rewards/accuracies": 0.8999999761581421, | |
| "rewards/chosen": -0.39645594358444214, | |
| "rewards/margins": 1.2095177173614502, | |
| "rewards/rejected": -1.6059738397598267, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.786610878661088, | |
| "grad_norm": 6.292915344238281, | |
| "learning_rate": 3.7759691553595214e-06, | |
| "logits/chosen": -2.3707780838012695, | |
| "logits/rejected": -2.377169609069824, | |
| "logps/chosen": -88.07064056396484, | |
| "logps/rejected": -108.6225814819336, | |
| "loss": 0.3041, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.9827474355697632, | |
| "rewards/margins": 1.3651618957519531, | |
| "rewards/rejected": -2.3479092121124268, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 8.121338912133892, | |
| "grad_norm": 5.0669097900390625, | |
| "learning_rate": 3.6443503723320837e-06, | |
| "logits/chosen": -2.3608062267303467, | |
| "logits/rejected": -2.3792402744293213, | |
| "logps/chosen": -72.83047485351562, | |
| "logps/rejected": -91.09341430664062, | |
| "loss": 0.3065, | |
| "rewards/accuracies": 0.9125000238418579, | |
| "rewards/chosen": -0.9334943890571594, | |
| "rewards/margins": 1.3210034370422363, | |
| "rewards/rejected": -2.25449800491333, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.456066945606695, | |
| "grad_norm": 5.0598931312561035, | |
| "learning_rate": 3.508587904974522e-06, | |
| "logits/chosen": -2.324855327606201, | |
| "logits/rejected": -2.364541530609131, | |
| "logps/chosen": -90.57644653320312, | |
| "logps/rejected": -106.41752624511719, | |
| "loss": 0.2498, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -0.8531273007392883, | |
| "rewards/margins": 1.8315904140472412, | |
| "rewards/rejected": -2.684717893600464, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.790794979079498, | |
| "grad_norm": 6.120776653289795, | |
| "learning_rate": 3.3691733481883693e-06, | |
| "logits/chosen": -2.3436760902404785, | |
| "logits/rejected": -2.3720099925994873, | |
| "logps/chosen": -86.95789337158203, | |
| "logps/rejected": -102.34903717041016, | |
| "loss": 0.2532, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": -1.1573801040649414, | |
| "rewards/margins": 1.7690637111663818, | |
| "rewards/rejected": -2.9264438152313232, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 9.125523012552302, | |
| "grad_norm": 4.666015625, | |
| "learning_rate": 3.226611521064278e-06, | |
| "logits/chosen": -2.3132309913635254, | |
| "logits/rejected": -2.309297800064087, | |
| "logps/chosen": -78.139404296875, | |
| "logps/rejected": -99.09760284423828, | |
| "loss": 0.2314, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.0649818181991577, | |
| "rewards/margins": 1.8774713277816772, | |
| "rewards/rejected": -2.942453384399414, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.460251046025105, | |
| "grad_norm": 8.85567855834961, | |
| "learning_rate": 3.0814186389357765e-06, | |
| "logits/chosen": -2.3629987239837646, | |
| "logits/rejected": -2.385927200317383, | |
| "logps/chosen": -91.09283447265625, | |
| "logps/rejected": -102.37603759765625, | |
| "loss": 0.2142, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.5404099225997925, | |
| "rewards/margins": 2.121422290802002, | |
| "rewards/rejected": -3.661832094192505, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 9.794979079497908, | |
| "grad_norm": 5.228074550628662, | |
| "learning_rate": 2.9341204441673267e-06, | |
| "logits/chosen": -2.356905221939087, | |
| "logits/rejected": -2.3635311126708984, | |
| "logps/chosen": -91.65778350830078, | |
| "logps/rejected": -117.89949035644531, | |
| "loss": 0.1881, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -1.6620346307754517, | |
| "rewards/margins": 2.1766200065612793, | |
| "rewards/rejected": -3.8386547565460205, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 10.129707112970712, | |
| "grad_norm": 5.115809440612793, | |
| "learning_rate": 2.785250302445062e-06, | |
| "logits/chosen": -2.2903695106506348, | |
| "logits/rejected": -2.2926692962646484, | |
| "logps/chosen": -104.5173110961914, | |
| "logps/rejected": -123.13216400146484, | |
| "loss": 0.1798, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.7224146127700806, | |
| "rewards/margins": 2.3892369270324707, | |
| "rewards/rejected": -4.111651420593262, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.464435146443515, | |
| "grad_norm": 5.882064342498779, | |
| "learning_rate": 2.6353472714635443e-06, | |
| "logits/chosen": -2.2836384773254395, | |
| "logits/rejected": -2.2969231605529785, | |
| "logps/chosen": -88.8235855102539, | |
| "logps/rejected": -119.67433166503906, | |
| "loss": 0.1558, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -1.6937462091445923, | |
| "rewards/margins": 2.4059743881225586, | |
| "rewards/rejected": -4.0997209548950195, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 10.799163179916318, | |
| "grad_norm": 6.9003376960754395, | |
| "learning_rate": 2.4849541490017868e-06, | |
| "logits/chosen": -2.289567232131958, | |
| "logits/rejected": -2.3216423988342285, | |
| "logps/chosen": -90.58432006835938, | |
| "logps/rejected": -118.13006591796875, | |
| "loss": 0.1538, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -1.6574989557266235, | |
| "rewards/margins": 2.9354054927825928, | |
| "rewards/rejected": -4.592904567718506, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 11.133891213389122, | |
| "grad_norm": 4.916522979736328, | |
| "learning_rate": 2.3346155074564712e-06, | |
| "logits/chosen": -2.2699310779571533, | |
| "logits/rejected": -2.3001017570495605, | |
| "logps/chosen": -100.2576675415039, | |
| "logps/rejected": -133.8759307861328, | |
| "loss": 0.1373, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.174388885498047, | |
| "rewards/margins": 3.038696050643921, | |
| "rewards/rejected": -5.213086128234863, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 11.468619246861925, | |
| "grad_norm": 6.739722728729248, | |
| "learning_rate": 2.184875721949277e-06, | |
| "logits/chosen": -2.2740581035614014, | |
| "logits/rejected": -2.315854549407959, | |
| "logps/chosen": -83.28224182128906, | |
| "logps/rejected": -107.7516098022461, | |
| "loss": 0.1257, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -1.777440071105957, | |
| "rewards/margins": 2.704913377761841, | |
| "rewards/rejected": -4.482353687286377, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 11.803347280334728, | |
| "grad_norm": 4.988001823425293, | |
| "learning_rate": 2.0362769991485514e-06, | |
| "logits/chosen": -2.2616047859191895, | |
| "logits/rejected": -2.2596449851989746, | |
| "logps/chosen": -107.07649230957031, | |
| "logps/rejected": -139.80697631835938, | |
| "loss": 0.1184, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.618734359741211, | |
| "rewards/margins": 3.291966199874878, | |
| "rewards/rejected": -5.910700798034668, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 12.138075313807532, | |
| "grad_norm": 4.956677436828613, | |
| "learning_rate": 1.8893574139429226e-06, | |
| "logits/chosen": -2.233889102935791, | |
| "logits/rejected": -2.2601330280303955, | |
| "logps/chosen": -95.82877349853516, | |
| "logps/rejected": -138.9019775390625, | |
| "loss": 0.1106, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -2.5194194316864014, | |
| "rewards/margins": 3.470710039138794, | |
| "rewards/rejected": -5.990128993988037, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 12.472803347280335, | |
| "grad_norm": 4.895273208618164, | |
| "learning_rate": 1.744648961076068e-06, | |
| "logits/chosen": -2.2324471473693848, | |
| "logits/rejected": -2.233158588409424, | |
| "logps/chosen": -117.90779113769531, | |
| "logps/rejected": -141.53753662109375, | |
| "loss": 0.0907, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": -2.7019529342651367, | |
| "rewards/margins": 3.4567368030548096, | |
| "rewards/rejected": -6.158689975738525, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 12.807531380753138, | |
| "grad_norm": 5.789585590362549, | |
| "learning_rate": 1.602675628797636e-06, | |
| "logits/chosen": -2.2296676635742188, | |
| "logits/rejected": -2.2535061836242676, | |
| "logps/chosen": -117.69709777832031, | |
| "logps/rejected": -150.61538696289062, | |
| "loss": 0.0923, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4674232006073, | |
| "rewards/margins": 3.8480961322784424, | |
| "rewards/rejected": -7.3155198097229, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 13.142259414225942, | |
| "grad_norm": 4.082385540008545, | |
| "learning_rate": 1.4639515015056205e-06, | |
| "logits/chosen": -2.232024908065796, | |
| "logits/rejected": -2.235680103302002, | |
| "logps/chosen": -96.60597229003906, | |
| "logps/rejected": -130.7404022216797, | |
| "loss": 0.0876, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": -2.816174268722534, | |
| "rewards/margins": 3.2225749492645264, | |
| "rewards/rejected": -6.038748741149902, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 13.476987447698745, | |
| "grad_norm": 4.423525333404541, | |
| "learning_rate": 1.328978898250525e-06, | |
| "logits/chosen": -2.2275261878967285, | |
| "logits/rejected": -2.2222421169281006, | |
| "logps/chosen": -107.16130065917969, | |
| "logps/rejected": -148.48500061035156, | |
| "loss": 0.0662, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.10858154296875, | |
| "rewards/margins": 3.9508070945739746, | |
| "rewards/rejected": -7.059388637542725, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 13.811715481171548, | |
| "grad_norm": 3.721898078918457, | |
| "learning_rate": 1.198246553841744e-06, | |
| "logits/chosen": -2.2333359718322754, | |
| "logits/rejected": -2.2442851066589355, | |
| "logps/chosen": -104.8399429321289, | |
| "logps/rejected": -137.98049926757812, | |
| "loss": 0.0808, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -3.3306171894073486, | |
| "rewards/margins": 3.471170425415039, | |
| "rewards/rejected": -6.80178689956665, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 14.146443514644352, | |
| "grad_norm": 4.411396026611328, | |
| "learning_rate": 1.0722278491423998e-06, | |
| "logits/chosen": -2.2033934593200684, | |
| "logits/rejected": -2.206735610961914, | |
| "logps/chosen": -122.04057312011719, | |
| "logps/rejected": -139.2510528564453, | |
| "loss": 0.0651, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.4570648670196533, | |
| "rewards/margins": 3.5551300048828125, | |
| "rewards/rejected": -7.012194633483887, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 14.481171548117155, | |
| "grad_norm": 4.514885902404785, | |
| "learning_rate": 9.513790969606926e-07, | |
| "logits/chosen": -2.1915841102600098, | |
| "logits/rejected": -2.23836088180542, | |
| "logps/chosen": -111.24171447753906, | |
| "logps/rejected": -159.8766326904297, | |
| "loss": 0.0609, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.719008207321167, | |
| "rewards/margins": 4.095301628112793, | |
| "rewards/rejected": -7.814309597015381, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 14.815899581589958, | |
| "grad_norm": 6.274470329284668, | |
| "learning_rate": 8.361378897445643e-07, | |
| "logits/chosen": -2.2278056144714355, | |
| "logits/rejected": -2.2360167503356934, | |
| "logps/chosen": -95.31124877929688, | |
| "logps/rejected": -136.5842742919922, | |
| "loss": 0.0624, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.1528682708740234, | |
| "rewards/margins": 4.095580577850342, | |
| "rewards/rejected": -7.248448848724365, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 15.150627615062762, | |
| "grad_norm": 4.49701452255249, | |
| "learning_rate": 7.269215150626391e-07, | |
| "logits/chosen": -2.196305513381958, | |
| "logits/rejected": -2.2363815307617188, | |
| "logps/chosen": -101.97003173828125, | |
| "logps/rejected": -151.15646362304688, | |
| "loss": 0.0513, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.562624454498291, | |
| "rewards/margins": 4.104978084564209, | |
| "rewards/rejected": -7.667603492736816, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 15.485355648535565, | |
| "grad_norm": 4.746140956878662, | |
| "learning_rate": 6.241254446089942e-07, | |
| "logits/chosen": -2.1973156929016113, | |
| "logits/rejected": -2.217236042022705, | |
| "logps/chosen": -108.36579895019531, | |
| "logps/rejected": -146.45358276367188, | |
| "loss": 0.0588, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -3.8717312812805176, | |
| "rewards/margins": 3.9130451679229736, | |
| "rewards/rejected": -7.784776210784912, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 15.820083682008368, | |
| "grad_norm": 2.910703182220459, | |
| "learning_rate": 5.281219022030423e-07, | |
| "logits/chosen": -2.1933655738830566, | |
| "logits/rejected": -2.193134307861328, | |
| "logps/chosen": -125.05366516113281, | |
| "logps/rejected": -158.47085571289062, | |
| "loss": 0.0484, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.9004642963409424, | |
| "rewards/margins": 4.252579689025879, | |
| "rewards/rejected": -8.153043746948242, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 16.15481171548117, | |
| "grad_norm": 2.814772367477417, | |
| "learning_rate": 4.392585159698087e-07, | |
| "logits/chosen": -2.1886072158813477, | |
| "logits/rejected": -2.1937201023101807, | |
| "logps/chosen": -113.6917724609375, | |
| "logps/rejected": -160.83851623535156, | |
| "loss": 0.0443, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.779496669769287, | |
| "rewards/margins": 4.261423587799072, | |
| "rewards/rejected": -8.04092025756836, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 16.489539748953973, | |
| "grad_norm": 3.579289197921753, | |
| "learning_rate": 3.578570595810274e-07, | |
| "logits/chosen": -2.19553542137146, | |
| "logits/rejected": -2.1956517696380615, | |
| "logps/chosen": -110.0953140258789, | |
| "logps/rejected": -165.99652099609375, | |
| "loss": 0.0483, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7303287982940674, | |
| "rewards/margins": 4.63196325302124, | |
| "rewards/rejected": -8.36229133605957, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 16.824267782426777, | |
| "grad_norm": 4.428997039794922, | |
| "learning_rate": 2.8421228711503127e-07, | |
| "logits/chosen": -2.1704812049865723, | |
| "logits/rejected": -2.183809280395508, | |
| "logps/chosen": -99.66941833496094, | |
| "logps/rejected": -152.3459930419922, | |
| "loss": 0.0468, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.7393298149108887, | |
| "rewards/margins": 4.549952030181885, | |
| "rewards/rejected": -8.289281845092773, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 17.15899581589958, | |
| "grad_norm": 3.5141501426696777, | |
| "learning_rate": 2.1859086575439225e-07, | |
| "logits/chosen": -2.114220380783081, | |
| "logits/rejected": -2.1453700065612793, | |
| "logps/chosen": -119.66983795166016, | |
| "logps/rejected": -161.91326904296875, | |
| "loss": 0.0398, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.162126064300537, | |
| "rewards/margins": 4.615090370178223, | |
| "rewards/rejected": -8.777216911315918, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 17.493723849372383, | |
| "grad_norm": 3.1655192375183105, | |
| "learning_rate": 1.6123041018599766e-07, | |
| "logits/chosen": -2.1598916053771973, | |
| "logits/rejected": -2.151259660720825, | |
| "logps/chosen": -112.63690185546875, | |
| "logps/rejected": -166.2643280029297, | |
| "loss": 0.0436, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -3.9895179271698, | |
| "rewards/margins": 4.71376895904541, | |
| "rewards/rejected": -8.703287124633789, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 17.828451882845187, | |
| "grad_norm": 3.882448673248291, | |
| "learning_rate": 1.1233862220001168e-07, | |
| "logits/chosen": -2.1259069442749023, | |
| "logits/rejected": -2.1679906845092773, | |
| "logps/chosen": -125.42464447021484, | |
| "logps/rejected": -172.642822265625, | |
| "loss": 0.0477, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.517868995666504, | |
| "rewards/margins": 4.534079551696777, | |
| "rewards/rejected": -9.051949501037598, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 18.16317991631799, | |
| "grad_norm": 4.275852203369141, | |
| "learning_rate": 7.209253860320897e-08, | |
| "logits/chosen": -2.1740193367004395, | |
| "logits/rejected": -2.1897895336151123, | |
| "logps/chosen": -133.6866455078125, | |
| "logps/rejected": -160.42288208007812, | |
| "loss": 0.0408, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.688433647155762, | |
| "rewards/margins": 4.159676551818848, | |
| "rewards/rejected": -8.848111152648926, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 18.497907949790793, | |
| "grad_norm": 3.586958646774292, | |
| "learning_rate": 4.063789016999331e-08, | |
| "logits/chosen": -2.157022476196289, | |
| "logits/rejected": -2.179140567779541, | |
| "logps/chosen": -122.80704498291016, | |
| "logps/rejected": -170.018798828125, | |
| "loss": 0.0423, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.441340923309326, | |
| "rewards/margins": 4.718876838684082, | |
| "rewards/rejected": -9.16021728515625, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 18.8326359832636, | |
| "grad_norm": 2.9948108196258545, | |
| "learning_rate": 1.808857395232788e-08, | |
| "logits/chosen": -2.1356325149536133, | |
| "logits/rejected": -2.1427738666534424, | |
| "logps/chosen": -112.40225982666016, | |
| "logps/rejected": -166.0186767578125, | |
| "loss": 0.04, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.3510541915893555, | |
| "rewards/margins": 4.859889030456543, | |
| "rewards/rejected": -9.210943222045898, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 19.1673640167364, | |
| "grad_norm": 3.9700310230255127, | |
| "learning_rate": 4.526240859345499e-09, | |
| "logits/chosen": -2.1602721214294434, | |
| "logits/rejected": -2.168781042098999, | |
| "logps/chosen": -125.03184509277344, | |
| "logps/rejected": -174.86129760742188, | |
| "loss": 0.041, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.168228626251221, | |
| "rewards/margins": 4.846875190734863, | |
| "rewards/rejected": -9.015104293823242, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 19.502092050209207, | |
| "grad_norm": 3.226668119430542, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -2.183656692504883, | |
| "logits/rejected": -2.190368175506592, | |
| "logps/chosen": -107.56755065917969, | |
| "logps/rejected": -153.33026123046875, | |
| "loss": 0.0408, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": -4.3311662673950195, | |
| "rewards/margins": 4.147943019866943, | |
| "rewards/rejected": -8.479108810424805, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 19.502092050209207, | |
| "step": 580, | |
| "total_flos": 2.1306294447112192e+18, | |
| "train_loss": 0.1631378454381022, | |
| "train_runtime": 3724.909, | |
| "train_samples_per_second": 10.245, | |
| "train_steps_per_second": 0.156 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 580, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.1306294447112192e+18, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |