{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 3525, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05021276595744681, "grad_norm": 0.732947051525116, "kl": 12.475480079650879, "learning_rate": 5.014164305949008e-08, "logits/chosen": -15179654.095238095, "logits/rejected": -16528289.93175853, "logps/chosen": -2.1814988803514193, "logps/rejected": -5.992375464785652, "loss": 0.4997, "rewards/chosen": 0.0005148474550072527, "rewards/margins": 0.0009199919065958275, "rewards/rejected": -0.00040514445158857477, "step": 177 }, { "epoch": 0.10042553191489362, "grad_norm": 0.26544979214668274, "kl": 11.266656875610352, "learning_rate": 9.996847414880202e-08, "logits/chosen": -14902797.791245792, "logits/rejected": -16395014.176943699, "logps/chosen": -2.2833465678924663, "logps/rejected": -6.153056317722297, "loss": 0.4996, "rewards/chosen": -0.00040731574048096886, "rewards/margins": 0.000592714583635074, "rewards/rejected": -0.0010000303241160429, "step": 354 }, { "epoch": 0.15063829787234043, "grad_norm": 0.6797279715538025, "kl": 3.9419214725494385, "learning_rate": 9.438839848675913e-08, "logits/chosen": -14784543.875486381, "logits/rejected": -16617535.61345988, "logps/chosen": -2.2231409688867947, "logps/rejected": -6.279140995739862, "loss": 0.4988, "rewards/chosen": -0.0022345268772733812, "rewards/margins": 0.003318870893000191, "rewards/rejected": -0.005553397770273572, "step": 531 }, { "epoch": 0.20085106382978724, "grad_norm": 0.5770508050918579, "kl": 0.7956511974334717, "learning_rate": 8.880832282471626e-08, "logits/chosen": -14984102.078853047, "logits/rejected": -16609355.65171504, "logps/chosen": -2.5137440671202955, "logps/rejected": -6.364399530013193, "loss": 0.4975, "rewards/chosen": -0.006582849341908664, "rewards/margins": 0.007156510907123363, "rewards/rejected": -0.013739360249032027, "step": 708 }, { "epoch": 0.251063829787234, "grad_norm": 0.5370281934738159, "kl": 0.05692530423402786, "learning_rate": 8.32282471626734e-08, "logits/chosen": -14969774.628571428, "logits/rejected": -16552960.0, "logps/chosen": -2.374119785853795, "logps/rejected": -6.513669296049736, "loss": 0.4945, "rewards/chosen": -0.012183338403701782, "rewards/margins": 0.01736304331833208, "rewards/rejected": -0.029546381722033863, "step": 885 }, { "epoch": 0.30127659574468085, "grad_norm": 0.7874128222465515, "kl": 0.04236392676830292, "learning_rate": 7.76481715006305e-08, "logits/chosen": -14856485.372262774, "logits/rejected": -16540230.837127846, "logps/chosen": -2.2836047541486084, "logps/rejected": -6.687177187089536, "loss": 0.4906, "rewards/chosen": -0.021432671233685346, "rewards/margins": 0.029663742240055215, "rewards/rejected": -0.05109641347374056, "step": 1062 }, { "epoch": 0.35148936170212763, "grad_norm": 0.7597999572753906, "kl": 0.0, "learning_rate": 7.206809583858764e-08, "logits/chosen": -15033836.743362831, "logits/rejected": -16566120.551260505, "logps/chosen": -2.7796039412506914, "logps/rejected": -6.999364824054622, "loss": 0.4848, "rewards/chosen": -0.037276040136286645, "rewards/margins": 0.04471597297755264, "rewards/rejected": -0.08199201311383929, "step": 1239 }, { "epoch": 0.40170212765957447, "grad_norm": 1.355948567390442, "kl": 0.0319652259349823, "learning_rate": 6.648802017654477e-08, "logits/chosen": -15049823.426523298, "logits/rejected": -16636860.453825857, "logps/chosen": -3.031732155857975, "logps/rejected": -7.528678471306069, "loss": 0.4752, "rewards/chosen": -0.06634654110050543, "rewards/margins": 0.07183165772143553, "rewards/rejected": -0.13817819882194096, "step": 1416 }, { "epoch": 0.45191489361702125, "grad_norm": 0.9223116040229797, "kl": 0.0, "learning_rate": 6.090794451450188e-08, "logits/chosen": -15345349.56521739, "logits/rejected": -16821483.340350877, "logps/chosen": -3.2707338194916216, "logps/rejected": -8.287376644736842, "loss": 0.4649, "rewards/chosen": -0.10732046072033868, "rewards/margins": 0.09713628355734771, "rewards/rejected": -0.2044567442776864, "step": 1593 }, { "epoch": 0.502127659574468, "grad_norm": 0.8558508157730103, "kl": 0.0, "learning_rate": 5.5327868852459016e-08, "logits/chosen": -15458446.88372093, "logits/rejected": -16827832.373056997, "logps/chosen": -4.2163511764171515, "logps/rejected": -8.720990696513384, "loss": 0.4485, "rewards/chosen": -0.18739568725112796, "rewards/margins": 0.10582041852458987, "rewards/rejected": -0.29321610577571783, "step": 1770 }, { "epoch": 0.5523404255319149, "grad_norm": 1.2379802465438843, "kl": 0.0, "learning_rate": 4.9747793190416137e-08, "logits/chosen": -15368077.450847458, "logits/rejected": -17055221.495093666, "logps/chosen": -5.225153105137712, "logps/rejected": -10.289466714986618, "loss": 0.4349, "rewards/chosen": -0.31121714963751324, "rewards/margins": 0.10698996959893858, "rewards/rejected": -0.4182071192364518, "step": 1947 }, { "epoch": 0.6025531914893617, "grad_norm": 2.636925220489502, "kl": 0.0, "learning_rate": 4.4167717528373264e-08, "logits/chosen": -15860050.707692308, "logits/rejected": -17018940.23529412, "logps/chosen": -6.8304584209735575, "logps/rejected": -12.103902532980104, "loss": 0.4076, "rewards/chosen": -0.4518470470721905, "rewards/margins": 0.14684447744662443, "rewards/rejected": -0.5986915245188149, "step": 2124 }, { "epoch": 0.6527659574468085, "grad_norm": 2.6684844493865967, "kl": 0.0, "learning_rate": 3.858764186633039e-08, "logits/chosen": -15603414.0, "logits/rejected": -17277662.455172412, "logps/chosen": -8.103792190551758, "logps/rejected": -14.19166049299569, "loss": 0.3791, "rewards/chosen": -0.6035651564598083, "rewards/margins": 0.18009341708545024, "rewards/rejected": -0.7836585735452586, "step": 2301 }, { "epoch": 0.7029787234042553, "grad_norm": 2.259669542312622, "kl": 0.0, "learning_rate": 3.300756620428751e-08, "logits/chosen": -15778993.082706766, "logits/rejected": -17491624.292173915, "logps/chosen": -9.979437118186091, "logps/rejected": -16.35701086956522, "loss": 0.357, "rewards/chosen": -0.7678221508972627, "rewards/margins": 0.25122187933371554, "rewards/rejected": -1.0190440302309782, "step": 2478 }, { "epoch": 0.7531914893617021, "grad_norm": 3.3658533096313477, "kl": 0.0, "learning_rate": 2.742749054224464e-08, "logits/chosen": -15436312.291970802, "logits/rejected": -17457363.614711035, "logps/chosen": -11.540958599452555, "logps/rejected": -18.07249138025394, "loss": 0.3366, "rewards/chosen": -0.9344464601391423, "rewards/margins": 0.2448034252729635, "rewards/rejected": -1.1792498854121058, "step": 2655 }, { "epoch": 0.8034042553191489, "grad_norm": 2.318241834640503, "kl": 0.0, "learning_rate": 2.1847414880201765e-08, "logits/chosen": -15859763.621399177, "logits/rejected": -17528114.41432225, "logps/chosen": -13.565882804462449, "logps/rejected": -20.141737265558397, "loss": 0.3189, "rewards/chosen": -1.1600869261188271, "rewards/margins": 0.21018625745827224, "rewards/rejected": -1.3702731835770994, "step": 2832 }, { "epoch": 0.8536170212765958, "grad_norm": 1.956950306892395, "kl": 0.0, "learning_rate": 1.626733921815889e-08, "logits/chosen": -15627038.68164794, "logits/rejected": -17529227.69712794, "logps/chosen": -15.227195253979401, "logps/rejected": -21.2347761640557, "loss": 0.3081, "rewards/chosen": -1.2809460814972495, "rewards/margins": 0.21129570738846626, "rewards/rejected": -1.4922417888857158, "step": 3009 }, { "epoch": 0.9038297872340425, "grad_norm": 2.3128018379211426, "kl": 0.0, "learning_rate": 1.0687263556116015e-08, "logits/chosen": -16287571.457875459, "logits/rejected": -17474259.877515312, "logps/chosen": -16.702005351419412, "logps/rejected": -22.0252146216098, "loss": 0.2956, "rewards/chosen": -1.4336266150841346, "rewards/margins": 0.1556206671159419, "rewards/rejected": -1.5892472822000765, "step": 3186 }, { "epoch": 0.9540425531914893, "grad_norm": 0.8989251255989075, "kl": 0.0, "learning_rate": 5.1071878940731394e-09, "logits/chosen": -15992458.158730159, "logits/rejected": -17564913.04467354, "logps/chosen": -16.995010618179563, "logps/rejected": -22.893974173109967, "loss": 0.2823, "rewards/chosen": -1.4715233454628596, "rewards/margins": 0.19740445351292446, "rewards/rejected": -1.668927798975784, "step": 3363 }, { "epoch": 1.0, "step": 3525, "total_flos": 5.091091732590756e+17, "train_loss": 0.413429944193955, "train_runtime": 10515.385, "train_samples_per_second": 5.364, "train_steps_per_second": 0.335 } ], "logging_steps": 177, "max_steps": 3525, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5.091091732590756e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }