| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 16.824267782426777, |
| "eval_steps": 500, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 3.9892160892486572, |
| "learning_rate": 8.620689655172415e-07, |
| "logits/chosen": -2.315223217010498, |
| "logits/rejected": -2.3654401302337646, |
| "logps/chosen": -65.86729431152344, |
| "logps/rejected": -77.53572845458984, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.3499999940395355, |
| "rewards/chosen": 0.0023138518445193768, |
| "rewards/margins": -0.001122759305872023, |
| "rewards/rejected": 0.0034366101026535034, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 3.5659756660461426, |
| "learning_rate": 1.724137931034483e-06, |
| "logits/chosen": -2.341399669647217, |
| "logits/rejected": -2.3567094802856445, |
| "logps/chosen": -66.60242462158203, |
| "logps/rejected": -69.70094299316406, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.0013719359412789345, |
| "rewards/margins": -0.0035000313073396683, |
| "rewards/rejected": 0.002128095831722021, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.00418410041841, |
| "grad_norm": 4.912586688995361, |
| "learning_rate": 2.5862068965517246e-06, |
| "logits/chosen": -2.3429622650146484, |
| "logits/rejected": -2.3658394813537598, |
| "logps/chosen": -71.6301040649414, |
| "logps/rejected": -78.41346740722656, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.003577103139832616, |
| "rewards/margins": 0.00785654503852129, |
| "rewards/rejected": -0.004279441200196743, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.3389121338912133, |
| "grad_norm": 4.810107707977295, |
| "learning_rate": 3.448275862068966e-06, |
| "logits/chosen": -2.3610458374023438, |
| "logits/rejected": -2.3885395526885986, |
| "logps/chosen": -66.8291244506836, |
| "logps/rejected": -62.15415573120117, |
| "loss": 0.6893, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -6.734435737598687e-05, |
| "rewards/margins": 0.006865750066936016, |
| "rewards/rejected": -0.0069330958649516106, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.6736401673640167, |
| "grad_norm": 4.670071125030518, |
| "learning_rate": 4.310344827586207e-06, |
| "logits/chosen": -2.304999351501465, |
| "logits/rejected": -2.335301399230957, |
| "logps/chosen": -75.09913635253906, |
| "logps/rejected": -77.72399139404297, |
| "loss": 0.6878, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.003225918160751462, |
| "rewards/margins": 0.010454346425831318, |
| "rewards/rejected": -0.007228427566587925, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.00836820083682, |
| "grad_norm": 4.2342000007629395, |
| "learning_rate": 4.999818897894192e-06, |
| "logits/chosen": -2.363574504852295, |
| "logits/rejected": -2.363882064819336, |
| "logps/chosen": -62.84125900268555, |
| "logps/rejected": -61.92932891845703, |
| "loss": 0.6855, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.0032769464887678623, |
| "rewards/margins": 0.02090405486524105, |
| "rewards/rejected": -0.024181004613637924, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.3430962343096233, |
| "grad_norm": 4.369245529174805, |
| "learning_rate": 4.9934830787948756e-06, |
| "logits/chosen": -2.378016948699951, |
| "logits/rejected": -2.373137950897217, |
| "logps/chosen": -74.67327880859375, |
| "logps/rejected": -69.20399475097656, |
| "loss": 0.668, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.0003526444488670677, |
| "rewards/margins": 0.04865006357431412, |
| "rewards/rejected": -0.04900271072983742, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.6778242677824267, |
| "grad_norm": 4.444687366485596, |
| "learning_rate": 4.978118375700895e-06, |
| "logits/chosen": -2.3403103351593018, |
| "logits/rejected": -2.370321273803711, |
| "logps/chosen": -77.29728698730469, |
| "logps/rejected": -85.79756164550781, |
| "loss": 0.6566, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.0051120575517416, |
| "rewards/margins": 0.09415190666913986, |
| "rewards/rejected": -0.08903985470533371, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.01255230125523, |
| "grad_norm": 4.876573085784912, |
| "learning_rate": 4.953780424089803e-06, |
| "logits/chosen": -2.3614611625671387, |
| "logits/rejected": -2.385697841644287, |
| "logps/chosen": -73.22442626953125, |
| "logps/rejected": -82.25682067871094, |
| "loss": 0.645, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.016868198290467262, |
| "rewards/margins": 0.10679063946008682, |
| "rewards/rejected": -0.12365883588790894, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.3472803347280333, |
| "grad_norm": 4.355966567993164, |
| "learning_rate": 4.920557351506409e-06, |
| "logits/chosen": -2.323256254196167, |
| "logits/rejected": -2.341057300567627, |
| "logps/chosen": -78.37105560302734, |
| "logps/rejected": -86.8406982421875, |
| "loss": 0.6072, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.015012519434094429, |
| "rewards/margins": 0.20561759173870087, |
| "rewards/rejected": -0.22063009440898895, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.7698744769874475, |
| "grad_norm": 4.361391067504883, |
| "learning_rate": 4.878569458453592e-06, |
| "logits/chosen": -2.3163838386535645, |
| "logits/rejected": -2.3566031455993652, |
| "logps/chosen": -83.33145904541016, |
| "logps/rejected": -96.48517608642578, |
| "loss": 0.5908, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.08870697021484375, |
| "rewards/margins": 0.24879300594329834, |
| "rewards/rejected": -0.3374999761581421, |
| "step": 110 |
| }, |
| { |
| "epoch": 4.104602510460251, |
| "grad_norm": 4.315061569213867, |
| "learning_rate": 4.827968782785062e-06, |
| "logits/chosen": -2.3728129863739014, |
| "logits/rejected": -2.3889667987823486, |
| "logps/chosen": -73.0484619140625, |
| "logps/rejected": -73.4913558959961, |
| "loss": 0.5783, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.0605628564953804, |
| "rewards/margins": 0.2945060133934021, |
| "rewards/rejected": -0.3550689220428467, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.439330543933054, |
| "grad_norm": 4.438860893249512, |
| "learning_rate": 4.7689385491773934e-06, |
| "logits/chosen": -2.3526523113250732, |
| "logits/rejected": -2.364795684814453, |
| "logps/chosen": -67.69630432128906, |
| "logps/rejected": -84.85731506347656, |
| "loss": 0.5338, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.1054786667227745, |
| "rewards/margins": 0.4161924421787262, |
| "rewards/rejected": -0.5216711759567261, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.7740585774058575, |
| "grad_norm": 4.5405473709106445, |
| "learning_rate": 4.70169250567482e-06, |
| "logits/chosen": -2.3756489753723145, |
| "logits/rejected": -2.374919891357422, |
| "logps/chosen": -68.5466079711914, |
| "logps/rejected": -76.15412902832031, |
| "loss": 0.5215, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.16213981807231903, |
| "rewards/margins": 0.47565969824790955, |
| "rewards/rejected": -0.6377995610237122, |
| "step": 140 |
| }, |
| { |
| "epoch": 5.108786610878661, |
| "grad_norm": 4.596691608428955, |
| "learning_rate": 4.626474149709127e-06, |
| "logits/chosen": -2.428659439086914, |
| "logits/rejected": -2.4141571521759033, |
| "logps/chosen": -78.08479309082031, |
| "logps/rejected": -68.3617172241211, |
| "loss": 0.5019, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.20662447810173035, |
| "rewards/margins": 0.4026559889316559, |
| "rewards/rejected": -0.6092804670333862, |
| "step": 150 |
| }, |
| { |
| "epoch": 5.443514644351464, |
| "grad_norm": 4.364648818969727, |
| "learning_rate": 4.54355584639723e-06, |
| "logits/chosen": -2.408982992172241, |
| "logits/rejected": -2.4170727729797363, |
| "logps/chosen": -81.3556900024414, |
| "logps/rejected": -86.85897064208984, |
| "loss": 0.4586, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.23941104114055634, |
| "rewards/margins": 0.675674319267273, |
| "rewards/rejected": -0.9150853157043457, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.7782426778242675, |
| "grad_norm": 5.241800308227539, |
| "learning_rate": 4.45323784230908e-06, |
| "logits/chosen": -2.4194908142089844, |
| "logits/rejected": -2.4498963356018066, |
| "logps/chosen": -62.32392120361328, |
| "logps/rejected": -76.39479064941406, |
| "loss": 0.4442, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.26662638783454895, |
| "rewards/margins": 0.6662653088569641, |
| "rewards/rejected": -0.9328916668891907, |
| "step": 170 |
| }, |
| { |
| "epoch": 6.112970711297071, |
| "grad_norm": 4.73954439163208, |
| "learning_rate": 4.355847178277025e-06, |
| "logits/chosen": -2.4365036487579346, |
| "logits/rejected": -2.435439348220825, |
| "logps/chosen": -73.06513977050781, |
| "logps/rejected": -81.04569244384766, |
| "loss": 0.4355, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.37924182415008545, |
| "rewards/margins": 0.7773979902267456, |
| "rewards/rejected": -1.156639814376831, |
| "step": 180 |
| }, |
| { |
| "epoch": 6.447698744769874, |
| "grad_norm": 5.250921726226807, |
| "learning_rate": 4.2517365051833564e-06, |
| "logits/chosen": -2.387922525405884, |
| "logits/rejected": -2.3835678100585938, |
| "logps/chosen": -64.85784912109375, |
| "logps/rejected": -90.08439636230469, |
| "loss": 0.3719, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.42228370904922485, |
| "rewards/margins": 1.0562283992767334, |
| "rewards/rejected": -1.478512167930603, |
| "step": 190 |
| }, |
| { |
| "epoch": 6.7824267782426775, |
| "grad_norm": 5.088508129119873, |
| "learning_rate": 4.141282807014034e-06, |
| "logits/chosen": -2.376319169998169, |
| "logits/rejected": -2.3985953330993652, |
| "logps/chosen": -70.64585876464844, |
| "logps/rejected": -89.17048645019531, |
| "loss": 0.3829, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.5233972072601318, |
| "rewards/margins": 1.1063960790634155, |
| "rewards/rejected": -1.629793405532837, |
| "step": 200 |
| }, |
| { |
| "epoch": 7.117154811715481, |
| "grad_norm": 4.6062092781066895, |
| "learning_rate": 4.024886035802432e-06, |
| "logits/chosen": -2.371851682662964, |
| "logits/rejected": -2.3844287395477295, |
| "logps/chosen": -74.63328552246094, |
| "logps/rejected": -97.81452178955078, |
| "loss": 0.3522, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.6278538703918457, |
| "rewards/margins": 1.2317354679107666, |
| "rewards/rejected": -1.8595889806747437, |
| "step": 210 |
| }, |
| { |
| "epoch": 7.451882845188284, |
| "grad_norm": 5.105669021606445, |
| "learning_rate": 3.9029676634059565e-06, |
| "logits/chosen": -2.4011385440826416, |
| "logits/rejected": -2.4039382934570312, |
| "logps/chosen": -75.92952728271484, |
| "logps/rejected": -78.41490936279297, |
| "loss": 0.3219, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.39645594358444214, |
| "rewards/margins": 1.2095177173614502, |
| "rewards/rejected": -1.6059738397598267, |
| "step": 220 |
| }, |
| { |
| "epoch": 7.786610878661088, |
| "grad_norm": 6.292915344238281, |
| "learning_rate": 3.7759691553595214e-06, |
| "logits/chosen": -2.3707780838012695, |
| "logits/rejected": -2.377169609069824, |
| "logps/chosen": -88.07064056396484, |
| "logps/rejected": -108.6225814819336, |
| "loss": 0.3041, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.9827474355697632, |
| "rewards/margins": 1.3651618957519531, |
| "rewards/rejected": -2.3479092121124268, |
| "step": 230 |
| }, |
| { |
| "epoch": 8.121338912133892, |
| "grad_norm": 5.0669097900390625, |
| "learning_rate": 3.6443503723320837e-06, |
| "logits/chosen": -2.3608062267303467, |
| "logits/rejected": -2.3792402744293213, |
| "logps/chosen": -72.83047485351562, |
| "logps/rejected": -91.09341430664062, |
| "loss": 0.3065, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.9334943890571594, |
| "rewards/margins": 1.3210034370422363, |
| "rewards/rejected": -2.25449800491333, |
| "step": 240 |
| }, |
| { |
| "epoch": 8.456066945606695, |
| "grad_norm": 5.0598931312561035, |
| "learning_rate": 3.508587904974522e-06, |
| "logits/chosen": -2.324855327606201, |
| "logits/rejected": -2.364541530609131, |
| "logps/chosen": -90.57644653320312, |
| "logps/rejected": -106.41752624511719, |
| "loss": 0.2498, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -0.8531273007392883, |
| "rewards/margins": 1.8315904140472412, |
| "rewards/rejected": -2.684717893600464, |
| "step": 250 |
| }, |
| { |
| "epoch": 8.790794979079498, |
| "grad_norm": 6.120776653289795, |
| "learning_rate": 3.3691733481883693e-06, |
| "logits/chosen": -2.3436760902404785, |
| "logits/rejected": -2.3720099925994873, |
| "logps/chosen": -86.95789337158203, |
| "logps/rejected": -102.34903717041016, |
| "loss": 0.2532, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.1573801040649414, |
| "rewards/margins": 1.7690637111663818, |
| "rewards/rejected": -2.9264438152313232, |
| "step": 260 |
| }, |
| { |
| "epoch": 9.125523012552302, |
| "grad_norm": 4.666015625, |
| "learning_rate": 3.226611521064278e-06, |
| "logits/chosen": -2.3132309913635254, |
| "logits/rejected": -2.309297800064087, |
| "logps/chosen": -78.139404296875, |
| "logps/rejected": -99.09760284423828, |
| "loss": 0.2314, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -1.0649818181991577, |
| "rewards/margins": 1.8774713277816772, |
| "rewards/rejected": -2.942453384399414, |
| "step": 270 |
| }, |
| { |
| "epoch": 9.460251046025105, |
| "grad_norm": 8.85567855834961, |
| "learning_rate": 3.0814186389357765e-06, |
| "logits/chosen": -2.3629987239837646, |
| "logits/rejected": -2.385927200317383, |
| "logps/chosen": -91.09283447265625, |
| "logps/rejected": -102.37603759765625, |
| "loss": 0.2142, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.5404099225997925, |
| "rewards/margins": 2.121422290802002, |
| "rewards/rejected": -3.661832094192505, |
| "step": 280 |
| }, |
| { |
| "epoch": 9.794979079497908, |
| "grad_norm": 5.228074550628662, |
| "learning_rate": 2.9341204441673267e-06, |
| "logits/chosen": -2.356905221939087, |
| "logits/rejected": -2.3635311126708984, |
| "logps/chosen": -91.65778350830078, |
| "logps/rejected": -117.89949035644531, |
| "loss": 0.1881, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6620346307754517, |
| "rewards/margins": 2.1766200065612793, |
| "rewards/rejected": -3.8386547565460205, |
| "step": 290 |
| }, |
| { |
| "epoch": 10.129707112970712, |
| "grad_norm": 5.115809440612793, |
| "learning_rate": 2.785250302445062e-06, |
| "logits/chosen": -2.2903695106506348, |
| "logits/rejected": -2.2926692962646484, |
| "logps/chosen": -104.5173110961914, |
| "logps/rejected": -123.13216400146484, |
| "loss": 0.1798, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.7224146127700806, |
| "rewards/margins": 2.3892369270324707, |
| "rewards/rejected": -4.111651420593262, |
| "step": 300 |
| }, |
| { |
| "epoch": 10.464435146443515, |
| "grad_norm": 5.882064342498779, |
| "learning_rate": 2.6353472714635443e-06, |
| "logits/chosen": -2.2836384773254395, |
| "logits/rejected": -2.2969231605529785, |
| "logps/chosen": -88.8235855102539, |
| "logps/rejected": -119.67433166503906, |
| "loss": 0.1558, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -1.6937462091445923, |
| "rewards/margins": 2.4059743881225586, |
| "rewards/rejected": -4.0997209548950195, |
| "step": 310 |
| }, |
| { |
| "epoch": 10.799163179916318, |
| "grad_norm": 6.9003376960754395, |
| "learning_rate": 2.4849541490017868e-06, |
| "logits/chosen": -2.289567232131958, |
| "logits/rejected": -2.3216423988342285, |
| "logps/chosen": -90.58432006835938, |
| "logps/rejected": -118.13006591796875, |
| "loss": 0.1538, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -1.6574989557266235, |
| "rewards/margins": 2.9354054927825928, |
| "rewards/rejected": -4.592904567718506, |
| "step": 320 |
| }, |
| { |
| "epoch": 11.133891213389122, |
| "grad_norm": 4.916522979736328, |
| "learning_rate": 2.3346155074564712e-06, |
| "logits/chosen": -2.2699310779571533, |
| "logits/rejected": -2.3001017570495605, |
| "logps/chosen": -100.2576675415039, |
| "logps/rejected": -133.8759307861328, |
| "loss": 0.1373, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -2.174388885498047, |
| "rewards/margins": 3.038696050643921, |
| "rewards/rejected": -5.213086128234863, |
| "step": 330 |
| }, |
| { |
| "epoch": 11.468619246861925, |
| "grad_norm": 6.739722728729248, |
| "learning_rate": 2.184875721949277e-06, |
| "logits/chosen": -2.2740581035614014, |
| "logits/rejected": -2.315854549407959, |
| "logps/chosen": -83.28224182128906, |
| "logps/rejected": -107.7516098022461, |
| "loss": 0.1257, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.777440071105957, |
| "rewards/margins": 2.704913377761841, |
| "rewards/rejected": -4.482353687286377, |
| "step": 340 |
| }, |
| { |
| "epoch": 11.803347280334728, |
| "grad_norm": 4.988001823425293, |
| "learning_rate": 2.0362769991485514e-06, |
| "logits/chosen": -2.2616047859191895, |
| "logits/rejected": -2.2596449851989746, |
| "logps/chosen": -107.07649230957031, |
| "logps/rejected": -139.80697631835938, |
| "loss": 0.1184, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.618734359741211, |
| "rewards/margins": 3.291966199874878, |
| "rewards/rejected": -5.910700798034668, |
| "step": 350 |
| }, |
| { |
| "epoch": 12.138075313807532, |
| "grad_norm": 4.956677436828613, |
| "learning_rate": 1.8893574139429226e-06, |
| "logits/chosen": -2.233889102935791, |
| "logits/rejected": -2.2601330280303955, |
| "logps/chosen": -95.82877349853516, |
| "logps/rejected": -138.9019775390625, |
| "loss": 0.1106, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -2.5194194316864014, |
| "rewards/margins": 3.470710039138794, |
| "rewards/rejected": -5.990128993988037, |
| "step": 360 |
| }, |
| { |
| "epoch": 12.472803347280335, |
| "grad_norm": 4.895273208618164, |
| "learning_rate": 1.744648961076068e-06, |
| "logits/chosen": -2.2324471473693848, |
| "logits/rejected": -2.233158588409424, |
| "logps/chosen": -117.90779113769531, |
| "logps/rejected": -141.53753662109375, |
| "loss": 0.0907, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -2.7019529342651367, |
| "rewards/margins": 3.4567368030548096, |
| "rewards/rejected": -6.158689975738525, |
| "step": 370 |
| }, |
| { |
| "epoch": 12.807531380753138, |
| "grad_norm": 5.789585590362549, |
| "learning_rate": 1.602675628797636e-06, |
| "logits/chosen": -2.2296676635742188, |
| "logits/rejected": -2.2535061836242676, |
| "logps/chosen": -117.69709777832031, |
| "logps/rejected": -150.61538696289062, |
| "loss": 0.0923, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4674232006073, |
| "rewards/margins": 3.8480961322784424, |
| "rewards/rejected": -7.3155198097229, |
| "step": 380 |
| }, |
| { |
| "epoch": 13.142259414225942, |
| "grad_norm": 4.082385540008545, |
| "learning_rate": 1.4639515015056205e-06, |
| "logits/chosen": -2.232024908065796, |
| "logits/rejected": -2.235680103302002, |
| "logps/chosen": -96.60597229003906, |
| "logps/rejected": -130.7404022216797, |
| "loss": 0.0876, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -2.816174268722534, |
| "rewards/margins": 3.2225749492645264, |
| "rewards/rejected": -6.038748741149902, |
| "step": 390 |
| }, |
| { |
| "epoch": 13.476987447698745, |
| "grad_norm": 4.423525333404541, |
| "learning_rate": 1.328978898250525e-06, |
| "logits/chosen": -2.2275261878967285, |
| "logits/rejected": -2.2222421169281006, |
| "logps/chosen": -107.16130065917969, |
| "logps/rejected": -148.48500061035156, |
| "loss": 0.0662, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.10858154296875, |
| "rewards/margins": 3.9508070945739746, |
| "rewards/rejected": -7.059388637542725, |
| "step": 400 |
| }, |
| { |
| "epoch": 13.811715481171548, |
| "grad_norm": 3.721898078918457, |
| "learning_rate": 1.198246553841744e-06, |
| "logits/chosen": -2.2333359718322754, |
| "logits/rejected": -2.2442851066589355, |
| "logps/chosen": -104.8399429321289, |
| "logps/rejected": -137.98049926757812, |
| "loss": 0.0808, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.3306171894073486, |
| "rewards/margins": 3.471170425415039, |
| "rewards/rejected": -6.80178689956665, |
| "step": 410 |
| }, |
| { |
| "epoch": 14.146443514644352, |
| "grad_norm": 4.411396026611328, |
| "learning_rate": 1.0722278491423998e-06, |
| "logits/chosen": -2.2033934593200684, |
| "logits/rejected": -2.206735610961914, |
| "logps/chosen": -122.04057312011719, |
| "logps/rejected": -139.2510528564453, |
| "loss": 0.0651, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4570648670196533, |
| "rewards/margins": 3.5551300048828125, |
| "rewards/rejected": -7.012194633483887, |
| "step": 420 |
| }, |
| { |
| "epoch": 14.481171548117155, |
| "grad_norm": 4.514885902404785, |
| "learning_rate": 9.513790969606926e-07, |
| "logits/chosen": -2.1915841102600098, |
| "logits/rejected": -2.23836088180542, |
| "logps/chosen": -111.24171447753906, |
| "logps/rejected": -159.8766326904297, |
| "loss": 0.0609, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.719008207321167, |
| "rewards/margins": 4.095301628112793, |
| "rewards/rejected": -7.814309597015381, |
| "step": 430 |
| }, |
| { |
| "epoch": 14.815899581589958, |
| "grad_norm": 6.274470329284668, |
| "learning_rate": 8.361378897445643e-07, |
| "logits/chosen": -2.2278056144714355, |
| "logits/rejected": -2.2360167503356934, |
| "logps/chosen": -95.31124877929688, |
| "logps/rejected": -136.5842742919922, |
| "loss": 0.0624, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.1528682708740234, |
| "rewards/margins": 4.095580577850342, |
| "rewards/rejected": -7.248448848724365, |
| "step": 440 |
| }, |
| { |
| "epoch": 15.150627615062762, |
| "grad_norm": 4.49701452255249, |
| "learning_rate": 7.269215150626391e-07, |
| "logits/chosen": -2.196305513381958, |
| "logits/rejected": -2.2363815307617188, |
| "logps/chosen": -101.97003173828125, |
| "logps/rejected": -151.15646362304688, |
| "loss": 0.0513, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.562624454498291, |
| "rewards/margins": 4.104978084564209, |
| "rewards/rejected": -7.667603492736816, |
| "step": 450 |
| }, |
| { |
| "epoch": 15.485355648535565, |
| "grad_norm": 4.746140956878662, |
| "learning_rate": 6.241254446089942e-07, |
| "logits/chosen": -2.1973156929016113, |
| "logits/rejected": -2.217236042022705, |
| "logps/chosen": -108.36579895019531, |
| "logps/rejected": -146.45358276367188, |
| "loss": 0.0588, |
| "rewards/accuracies": 0.987500011920929, |
| "rewards/chosen": -3.8717312812805176, |
| "rewards/margins": 3.9130451679229736, |
| "rewards/rejected": -7.784776210784912, |
| "step": 460 |
| }, |
| { |
| "epoch": 15.820083682008368, |
| "grad_norm": 2.910703182220459, |
| "learning_rate": 5.281219022030423e-07, |
| "logits/chosen": -2.1933655738830566, |
| "logits/rejected": -2.193134307861328, |
| "logps/chosen": -125.05366516113281, |
| "logps/rejected": -158.47085571289062, |
| "loss": 0.0484, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.9004642963409424, |
| "rewards/margins": 4.252579689025879, |
| "rewards/rejected": -8.153043746948242, |
| "step": 470 |
| }, |
| { |
| "epoch": 16.15481171548117, |
| "grad_norm": 2.814772367477417, |
| "learning_rate": 4.392585159698087e-07, |
| "logits/chosen": -2.1886072158813477, |
| "logits/rejected": -2.1937201023101807, |
| "logps/chosen": -113.6917724609375, |
| "logps/rejected": -160.83851623535156, |
| "loss": 0.0443, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.779496669769287, |
| "rewards/margins": 4.261423587799072, |
| "rewards/rejected": -8.04092025756836, |
| "step": 480 |
| }, |
| { |
| "epoch": 16.489539748953973, |
| "grad_norm": 3.579289197921753, |
| "learning_rate": 3.578570595810274e-07, |
| "logits/chosen": -2.19553542137146, |
| "logits/rejected": -2.1956517696380615, |
| "logps/chosen": -110.0953140258789, |
| "logps/rejected": -165.99652099609375, |
| "loss": 0.0483, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.7303287982940674, |
| "rewards/margins": 4.63196325302124, |
| "rewards/rejected": -8.36229133605957, |
| "step": 490 |
| }, |
| { |
| "epoch": 16.824267782426777, |
| "grad_norm": 4.428997039794922, |
| "learning_rate": 2.8421228711503127e-07, |
| "logits/chosen": -2.1704812049865723, |
| "logits/rejected": -2.183809280395508, |
| "logps/chosen": -99.66941833496094, |
| "logps/rejected": -152.3459930419922, |
| "loss": 0.0468, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.7393298149108887, |
| "rewards/margins": 4.549952030181885, |
| "rewards/rejected": -8.289281845092773, |
| "step": 500 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 580, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8361677496178442e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|