| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 147, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.20408163265306123, |
| "grad_norm": 23.69936180114746, |
| "learning_rate": 1.2e-06, |
| "logits/chosen": -1.049736738204956, |
| "logits/rejected": -1.3097896575927734, |
| "logps/chosen": -594.0164184570312, |
| "logps/rejected": -527.3587036132812, |
| "loss": 0.6756, |
| "rewards/accuracies": 0.518750011920929, |
| "rewards/chosen": 0.049942269921302795, |
| "rewards/margins": 0.03757396712899208, |
| "rewards/rejected": 0.012368302792310715, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.40816326530612246, |
| "grad_norm": 4.33872652053833, |
| "learning_rate": 1.9954719225730845e-06, |
| "logits/chosen": -1.0503668785095215, |
| "logits/rejected": -1.2729942798614502, |
| "logps/chosen": -548.0631103515625, |
| "logps/rejected": -502.37054443359375, |
| "loss": 0.3614, |
| "rewards/accuracies": 0.948437511920929, |
| "rewards/chosen": 1.080135464668274, |
| "rewards/margins": 1.3379336595535278, |
| "rewards/rejected": -0.2577982246875763, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6122448979591837, |
| "grad_norm": 1.2732926607131958, |
| "learning_rate": 1.945000818714668e-06, |
| "logits/chosen": -1.0236886739730835, |
| "logits/rejected": -1.1413735151290894, |
| "logps/chosen": -566.3969116210938, |
| "logps/rejected": -538.9616088867188, |
| "loss": 0.0815, |
| "rewards/accuracies": 0.9609375, |
| "rewards/chosen": 2.209315538406372, |
| "rewards/margins": 6.348783493041992, |
| "rewards/rejected": -4.139468193054199, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.8163265306122449, |
| "grad_norm": 0.6404849290847778, |
| "learning_rate": 1.8412535328311812e-06, |
| "logits/chosen": -1.033827781677246, |
| "logits/rejected": -1.052042841911316, |
| "logps/chosen": -512.7654418945312, |
| "logps/rejected": -603.2037353515625, |
| "loss": 0.0676, |
| "rewards/accuracies": 0.9624999761581421, |
| "rewards/chosen": 1.7590594291687012, |
| "rewards/margins": 13.065619468688965, |
| "rewards/rejected": -11.306559562683105, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0204081632653061, |
| "grad_norm": 0.993448793888092, |
| "learning_rate": 1.690079011482112e-06, |
| "logits/chosen": -1.0401982069015503, |
| "logits/rejected": -1.0194432735443115, |
| "logps/chosen": -548.8553466796875, |
| "logps/rejected": -660.1163330078125, |
| "loss": 0.0565, |
| "rewards/accuracies": 0.9546875357627869, |
| "rewards/chosen": 1.041262149810791, |
| "rewards/margins": 17.123653411865234, |
| "rewards/rejected": -16.0823917388916, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.2244897959183674, |
| "grad_norm": 1.5318107604980469, |
| "learning_rate": 1.5e-06, |
| "logits/chosen": -1.053700566291809, |
| "logits/rejected": -1.0167734622955322, |
| "logps/chosen": -581.4273681640625, |
| "logps/rejected": -730.9420776367188, |
| "loss": 0.046, |
| "rewards/accuracies": 0.9640624523162842, |
| "rewards/chosen": 0.5817679762840271, |
| "rewards/margins": 21.025224685668945, |
| "rewards/rejected": -20.443456649780273, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 0.9771612882614136, |
| "learning_rate": 1.2817325568414297e-06, |
| "logits/chosen": -1.0788830518722534, |
| "logits/rejected": -1.027891993522644, |
| "logps/chosen": -565.0576782226562, |
| "logps/rejected": -717.9182739257812, |
| "loss": 0.0494, |
| "rewards/accuracies": 0.9515625238418579, |
| "rewards/chosen": -0.2791057229042053, |
| "rewards/margins": 21.86798858642578, |
| "rewards/rejected": -22.1470947265625, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.6326530612244898, |
| "grad_norm": 0.9185511469841003, |
| "learning_rate": 1.0475819158237424e-06, |
| "logits/chosen": -1.095476746559143, |
| "logits/rejected": -1.0277090072631836, |
| "logps/chosen": -589.8963623046875, |
| "logps/rejected": -739.2642822265625, |
| "loss": 0.052, |
| "rewards/accuracies": 0.957812488079071, |
| "rewards/chosen": -0.22568494081497192, |
| "rewards/margins": 22.34253692626953, |
| "rewards/rejected": -22.56822395324707, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.836734693877551, |
| "grad_norm": 0.8473263382911682, |
| "learning_rate": 8.107487556395901e-07, |
| "logits/chosen": -1.094291090965271, |
| "logits/rejected": -1.0417732000350952, |
| "logps/chosen": -553.2315673828125, |
| "logps/rejected": -720.135986328125, |
| "loss": 0.0472, |
| "rewards/accuracies": 0.9609375596046448, |
| "rewards/chosen": -0.06711739301681519, |
| "rewards/margins": 23.08963966369629, |
| "rewards/rejected": -23.15675926208496, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.0408163265306123, |
| "grad_norm": 0.14011289179325104, |
| "learning_rate": 5.845849869981136e-07, |
| "logits/chosen": -1.116294264793396, |
| "logits/rejected": -1.0436064004898071, |
| "logps/chosen": -572.4153442382812, |
| "logps/rejected": -768.2971801757812, |
| "loss": 0.0356, |
| "rewards/accuracies": 0.96875, |
| "rewards/chosen": -0.18042321503162384, |
| "rewards/margins": 25.167421340942383, |
| "rewards/rejected": -25.347843170166016, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.2448979591836733, |
| "grad_norm": 0.14527460932731628, |
| "learning_rate": 3.818410137793947e-07, |
| "logits/chosen": -1.1352490186691284, |
| "logits/rejected": -1.0526891946792603, |
| "logps/chosen": -590.7538452148438, |
| "logps/rejected": -767.3261108398438, |
| "loss": 0.043, |
| "rewards/accuracies": 0.9593750238418579, |
| "rewards/chosen": -0.6409277319908142, |
| "rewards/margins": 24.502239227294922, |
| "rewards/rejected": -25.14316749572754, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.4489795918367347, |
| "grad_norm": 0.25144967436790466, |
| "learning_rate": 2.139469052572127e-07, |
| "logits/chosen": -1.1227790117263794, |
| "logits/rejected": -1.0495781898498535, |
| "logps/chosen": -569.977294921875, |
| "logps/rejected": -750.4077758789062, |
| "loss": 0.0422, |
| "rewards/accuracies": 0.953125, |
| "rewards/chosen": -0.6355734467506409, |
| "rewards/margins": 23.878570556640625, |
| "rewards/rejected": -24.514142990112305, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.6530612244897958, |
| "grad_norm": 0.33485284447669983, |
| "learning_rate": 9.036800464548156e-08, |
| "logits/chosen": -1.1463559865951538, |
| "logits/rejected": -1.0605792999267578, |
| "logps/chosen": -559.5640258789062, |
| "logps/rejected": -743.02001953125, |
| "loss": 0.0409, |
| "rewards/accuracies": 0.9578125476837158, |
| "rewards/chosen": -0.5392616391181946, |
| "rewards/margins": 25.962358474731445, |
| "rewards/rejected": -26.501623153686523, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 0.08462539315223694, |
| "learning_rate": 1.807130273729329e-08, |
| "logits/chosen": -1.1435989141464233, |
| "logits/rejected": -1.0657496452331543, |
| "logps/chosen": -560.4498291015625, |
| "logps/rejected": -757.8974609375, |
| "loss": 0.0363, |
| "rewards/accuracies": 0.964062511920929, |
| "rewards/chosen": -0.5970851182937622, |
| "rewards/margins": 25.187143325805664, |
| "rewards/rejected": -25.784229278564453, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.0, |
| "step": 147, |
| "total_flos": 9.728979632799089e+17, |
| "train_loss": 0.11137714680461656, |
| "train_runtime": 2917.8588, |
| "train_samples_per_second": 3.22, |
| "train_steps_per_second": 0.05 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 147, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 9.728979632799089e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|