| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 10.129707112970712, |
| "eval_steps": 500, |
| "global_step": 300, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.33472803347280333, |
| "grad_norm": 3.9892160892486572, |
| "learning_rate": 8.620689655172415e-07, |
| "logits/chosen": -2.315223217010498, |
| "logits/rejected": -2.3654401302337646, |
| "logps/chosen": -65.86729431152344, |
| "logps/rejected": -77.53572845458984, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.3499999940395355, |
| "rewards/chosen": 0.0023138518445193768, |
| "rewards/margins": -0.001122759305872023, |
| "rewards/rejected": 0.0034366101026535034, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.6694560669456067, |
| "grad_norm": 3.5659756660461426, |
| "learning_rate": 1.724137931034483e-06, |
| "logits/chosen": -2.341399669647217, |
| "logits/rejected": -2.3567094802856445, |
| "logps/chosen": -66.60242462158203, |
| "logps/rejected": -69.70094299316406, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.0013719359412789345, |
| "rewards/margins": -0.0035000313073396683, |
| "rewards/rejected": 0.002128095831722021, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.00418410041841, |
| "grad_norm": 4.912586688995361, |
| "learning_rate": 2.5862068965517246e-06, |
| "logits/chosen": -2.3429622650146484, |
| "logits/rejected": -2.3658394813537598, |
| "logps/chosen": -71.6301040649414, |
| "logps/rejected": -78.41346740722656, |
| "loss": 0.6938, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.003577103139832616, |
| "rewards/margins": 0.00785654503852129, |
| "rewards/rejected": -0.004279441200196743, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.3389121338912133, |
| "grad_norm": 4.810107707977295, |
| "learning_rate": 3.448275862068966e-06, |
| "logits/chosen": -2.3610458374023438, |
| "logits/rejected": -2.3885395526885986, |
| "logps/chosen": -66.8291244506836, |
| "logps/rejected": -62.15415573120117, |
| "loss": 0.6893, |
| "rewards/accuracies": 0.5874999761581421, |
| "rewards/chosen": -6.734435737598687e-05, |
| "rewards/margins": 0.006865750066936016, |
| "rewards/rejected": -0.0069330958649516106, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.6736401673640167, |
| "grad_norm": 4.670071125030518, |
| "learning_rate": 4.310344827586207e-06, |
| "logits/chosen": -2.304999351501465, |
| "logits/rejected": -2.335301399230957, |
| "logps/chosen": -75.09913635253906, |
| "logps/rejected": -77.72399139404297, |
| "loss": 0.6878, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.003225918160751462, |
| "rewards/margins": 0.010454346425831318, |
| "rewards/rejected": -0.007228427566587925, |
| "step": 50 |
| }, |
| { |
| "epoch": 2.00836820083682, |
| "grad_norm": 4.2342000007629395, |
| "learning_rate": 4.999818897894192e-06, |
| "logits/chosen": -2.363574504852295, |
| "logits/rejected": -2.363882064819336, |
| "logps/chosen": -62.84125900268555, |
| "logps/rejected": -61.92932891845703, |
| "loss": 0.6855, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.0032769464887678623, |
| "rewards/margins": 0.02090405486524105, |
| "rewards/rejected": -0.024181004613637924, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.3430962343096233, |
| "grad_norm": 4.369245529174805, |
| "learning_rate": 4.9934830787948756e-06, |
| "logits/chosen": -2.378016948699951, |
| "logits/rejected": -2.373137950897217, |
| "logps/chosen": -74.67327880859375, |
| "logps/rejected": -69.20399475097656, |
| "loss": 0.668, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.0003526444488670677, |
| "rewards/margins": 0.04865006357431412, |
| "rewards/rejected": -0.04900271072983742, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.6778242677824267, |
| "grad_norm": 4.444687366485596, |
| "learning_rate": 4.978118375700895e-06, |
| "logits/chosen": -2.3403103351593018, |
| "logits/rejected": -2.370321273803711, |
| "logps/chosen": -77.29728698730469, |
| "logps/rejected": -85.79756164550781, |
| "loss": 0.6566, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": 0.0051120575517416, |
| "rewards/margins": 0.09415190666913986, |
| "rewards/rejected": -0.08903985470533371, |
| "step": 80 |
| }, |
| { |
| "epoch": 3.01255230125523, |
| "grad_norm": 4.876573085784912, |
| "learning_rate": 4.953780424089803e-06, |
| "logits/chosen": -2.3614611625671387, |
| "logits/rejected": -2.385697841644287, |
| "logps/chosen": -73.22442626953125, |
| "logps/rejected": -82.25682067871094, |
| "loss": 0.645, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.016868198290467262, |
| "rewards/margins": 0.10679063946008682, |
| "rewards/rejected": -0.12365883588790894, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.3472803347280333, |
| "grad_norm": 4.355966567993164, |
| "learning_rate": 4.920557351506409e-06, |
| "logits/chosen": -2.323256254196167, |
| "logits/rejected": -2.341057300567627, |
| "logps/chosen": -78.37105560302734, |
| "logps/rejected": -86.8406982421875, |
| "loss": 0.6072, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.015012519434094429, |
| "rewards/margins": 0.20561759173870087, |
| "rewards/rejected": -0.22063009440898895, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.7698744769874475, |
| "grad_norm": 4.361391067504883, |
| "learning_rate": 4.878569458453592e-06, |
| "logits/chosen": -2.3163838386535645, |
| "logits/rejected": -2.3566031455993652, |
| "logps/chosen": -83.33145904541016, |
| "logps/rejected": -96.48517608642578, |
| "loss": 0.5908, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.08870697021484375, |
| "rewards/margins": 0.24879300594329834, |
| "rewards/rejected": -0.3374999761581421, |
| "step": 110 |
| }, |
| { |
| "epoch": 4.104602510460251, |
| "grad_norm": 4.315061569213867, |
| "learning_rate": 4.827968782785062e-06, |
| "logits/chosen": -2.3728129863739014, |
| "logits/rejected": -2.3889667987823486, |
| "logps/chosen": -73.0484619140625, |
| "logps/rejected": -73.4913558959961, |
| "loss": 0.5783, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.0605628564953804, |
| "rewards/margins": 0.2945060133934021, |
| "rewards/rejected": -0.3550689220428467, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.439330543933054, |
| "grad_norm": 4.438860893249512, |
| "learning_rate": 4.7689385491773934e-06, |
| "logits/chosen": -2.3526523113250732, |
| "logits/rejected": -2.364795684814453, |
| "logps/chosen": -67.69630432128906, |
| "logps/rejected": -84.85731506347656, |
| "loss": 0.5338, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.1054786667227745, |
| "rewards/margins": 0.4161924421787262, |
| "rewards/rejected": -0.5216711759567261, |
| "step": 130 |
| }, |
| { |
| "epoch": 4.7740585774058575, |
| "grad_norm": 4.5405473709106445, |
| "learning_rate": 4.70169250567482e-06, |
| "logits/chosen": -2.3756489753723145, |
| "logits/rejected": -2.374919891357422, |
| "logps/chosen": -68.5466079711914, |
| "logps/rejected": -76.15412902832031, |
| "loss": 0.5215, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.16213981807231903, |
| "rewards/margins": 0.47565969824790955, |
| "rewards/rejected": -0.6377995610237122, |
| "step": 140 |
| }, |
| { |
| "epoch": 5.108786610878661, |
| "grad_norm": 4.596691608428955, |
| "learning_rate": 4.626474149709127e-06, |
| "logits/chosen": -2.428659439086914, |
| "logits/rejected": -2.4141571521759033, |
| "logps/chosen": -78.08479309082031, |
| "logps/rejected": -68.3617172241211, |
| "loss": 0.5019, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.20662447810173035, |
| "rewards/margins": 0.4026559889316559, |
| "rewards/rejected": -0.6092804670333862, |
| "step": 150 |
| }, |
| { |
| "epoch": 5.443514644351464, |
| "grad_norm": 4.364648818969727, |
| "learning_rate": 4.54355584639723e-06, |
| "logits/chosen": -2.408982992172241, |
| "logits/rejected": -2.4170727729797363, |
| "logps/chosen": -81.3556900024414, |
| "logps/rejected": -86.85897064208984, |
| "loss": 0.4586, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.23941104114055634, |
| "rewards/margins": 0.675674319267273, |
| "rewards/rejected": -0.9150853157043457, |
| "step": 160 |
| }, |
| { |
| "epoch": 5.7782426778242675, |
| "grad_norm": 5.241800308227539, |
| "learning_rate": 4.45323784230908e-06, |
| "logits/chosen": -2.4194908142089844, |
| "logits/rejected": -2.4498963356018066, |
| "logps/chosen": -62.32392120361328, |
| "logps/rejected": -76.39479064941406, |
| "loss": 0.4442, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.26662638783454895, |
| "rewards/margins": 0.6662653088569641, |
| "rewards/rejected": -0.9328916668891907, |
| "step": 170 |
| }, |
| { |
| "epoch": 6.112970711297071, |
| "grad_norm": 4.73954439163208, |
| "learning_rate": 4.355847178277025e-06, |
| "logits/chosen": -2.4365036487579346, |
| "logits/rejected": -2.435439348220825, |
| "logps/chosen": -73.06513977050781, |
| "logps/rejected": -81.04569244384766, |
| "loss": 0.4355, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.37924182415008545, |
| "rewards/margins": 0.7773979902267456, |
| "rewards/rejected": -1.156639814376831, |
| "step": 180 |
| }, |
| { |
| "epoch": 6.447698744769874, |
| "grad_norm": 5.250921726226807, |
| "learning_rate": 4.2517365051833564e-06, |
| "logits/chosen": -2.387922525405884, |
| "logits/rejected": -2.3835678100585938, |
| "logps/chosen": -64.85784912109375, |
| "logps/rejected": -90.08439636230469, |
| "loss": 0.3719, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.42228370904922485, |
| "rewards/margins": 1.0562283992767334, |
| "rewards/rejected": -1.478512167930603, |
| "step": 190 |
| }, |
| { |
| "epoch": 6.7824267782426775, |
| "grad_norm": 5.088508129119873, |
| "learning_rate": 4.141282807014034e-06, |
| "logits/chosen": -2.376319169998169, |
| "logits/rejected": -2.3985953330993652, |
| "logps/chosen": -70.64585876464844, |
| "logps/rejected": -89.17048645019531, |
| "loss": 0.3829, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.5233972072601318, |
| "rewards/margins": 1.1063960790634155, |
| "rewards/rejected": -1.629793405532837, |
| "step": 200 |
| }, |
| { |
| "epoch": 7.117154811715481, |
| "grad_norm": 4.6062092781066895, |
| "learning_rate": 4.024886035802432e-06, |
| "logits/chosen": -2.371851682662964, |
| "logits/rejected": -2.3844287395477295, |
| "logps/chosen": -74.63328552246094, |
| "logps/rejected": -97.81452178955078, |
| "loss": 0.3522, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.6278538703918457, |
| "rewards/margins": 1.2317354679107666, |
| "rewards/rejected": -1.8595889806747437, |
| "step": 210 |
| }, |
| { |
| "epoch": 7.451882845188284, |
| "grad_norm": 5.105669021606445, |
| "learning_rate": 3.9029676634059565e-06, |
| "logits/chosen": -2.4011385440826416, |
| "logits/rejected": -2.4039382934570312, |
| "logps/chosen": -75.92952728271484, |
| "logps/rejected": -78.41490936279297, |
| "loss": 0.3219, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.39645594358444214, |
| "rewards/margins": 1.2095177173614502, |
| "rewards/rejected": -1.6059738397598267, |
| "step": 220 |
| }, |
| { |
| "epoch": 7.786610878661088, |
| "grad_norm": 6.292915344238281, |
| "learning_rate": 3.7759691553595214e-06, |
| "logits/chosen": -2.3707780838012695, |
| "logits/rejected": -2.377169609069824, |
| "logps/chosen": -88.07064056396484, |
| "logps/rejected": -108.6225814819336, |
| "loss": 0.3041, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.9827474355697632, |
| "rewards/margins": 1.3651618957519531, |
| "rewards/rejected": -2.3479092121124268, |
| "step": 230 |
| }, |
| { |
| "epoch": 8.121338912133892, |
| "grad_norm": 5.0669097900390625, |
| "learning_rate": 3.6443503723320837e-06, |
| "logits/chosen": -2.3608062267303467, |
| "logits/rejected": -2.3792402744293213, |
| "logps/chosen": -72.83047485351562, |
| "logps/rejected": -91.09341430664062, |
| "loss": 0.3065, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.9334943890571594, |
| "rewards/margins": 1.3210034370422363, |
| "rewards/rejected": -2.25449800491333, |
| "step": 240 |
| }, |
| { |
| "epoch": 8.456066945606695, |
| "grad_norm": 5.0598931312561035, |
| "learning_rate": 3.508587904974522e-06, |
| "logits/chosen": -2.324855327606201, |
| "logits/rejected": -2.364541530609131, |
| "logps/chosen": -90.57644653320312, |
| "logps/rejected": -106.41752624511719, |
| "loss": 0.2498, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -0.8531273007392883, |
| "rewards/margins": 1.8315904140472412, |
| "rewards/rejected": -2.684717893600464, |
| "step": 250 |
| }, |
| { |
| "epoch": 8.790794979079498, |
| "grad_norm": 6.120776653289795, |
| "learning_rate": 3.3691733481883693e-06, |
| "logits/chosen": -2.3436760902404785, |
| "logits/rejected": -2.3720099925994873, |
| "logps/chosen": -86.95789337158203, |
| "logps/rejected": -102.34903717041016, |
| "loss": 0.2532, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.1573801040649414, |
| "rewards/margins": 1.7690637111663818, |
| "rewards/rejected": -2.9264438152313232, |
| "step": 260 |
| }, |
| { |
| "epoch": 9.125523012552302, |
| "grad_norm": 4.666015625, |
| "learning_rate": 3.226611521064278e-06, |
| "logits/chosen": -2.3132309913635254, |
| "logits/rejected": -2.309297800064087, |
| "logps/chosen": -78.139404296875, |
| "logps/rejected": -99.09760284423828, |
| "loss": 0.2314, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -1.0649818181991577, |
| "rewards/margins": 1.8774713277816772, |
| "rewards/rejected": -2.942453384399414, |
| "step": 270 |
| }, |
| { |
| "epoch": 9.460251046025105, |
| "grad_norm": 8.85567855834961, |
| "learning_rate": 3.0814186389357765e-06, |
| "logits/chosen": -2.3629987239837646, |
| "logits/rejected": -2.385927200317383, |
| "logps/chosen": -91.09283447265625, |
| "logps/rejected": -102.37603759765625, |
| "loss": 0.2142, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.5404099225997925, |
| "rewards/margins": 2.121422290802002, |
| "rewards/rejected": -3.661832094192505, |
| "step": 280 |
| }, |
| { |
| "epoch": 9.794979079497908, |
| "grad_norm": 5.228074550628662, |
| "learning_rate": 2.9341204441673267e-06, |
| "logits/chosen": -2.356905221939087, |
| "logits/rejected": -2.3635311126708984, |
| "logps/chosen": -91.65778350830078, |
| "logps/rejected": -117.89949035644531, |
| "loss": 0.1881, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6620346307754517, |
| "rewards/margins": 2.1766200065612793, |
| "rewards/rejected": -3.8386547565460205, |
| "step": 290 |
| }, |
| { |
| "epoch": 10.129707112970712, |
| "grad_norm": 5.115809440612793, |
| "learning_rate": 2.785250302445062e-06, |
| "logits/chosen": -2.2903695106506348, |
| "logits/rejected": -2.2926692962646484, |
| "logps/chosen": -104.5173110961914, |
| "logps/rejected": -123.13216400146484, |
| "loss": 0.1798, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": -1.7224146127700806, |
| "rewards/margins": 2.3892369270324707, |
| "rewards/rejected": -4.111651420593262, |
| "step": 300 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 580, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.101550169578537e+18, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|