| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 3525, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.05021276595744681, | |
| "grad_norm": 0.732947051525116, | |
| "kl": 12.475480079650879, | |
| "learning_rate": 5.014164305949008e-08, | |
| "logits/chosen": -15179654.095238095, | |
| "logits/rejected": -16528289.93175853, | |
| "logps/chosen": -2.1814988803514193, | |
| "logps/rejected": -5.992375464785652, | |
| "loss": 0.4997, | |
| "rewards/chosen": 0.0005148474550072527, | |
| "rewards/margins": 0.0009199919065958275, | |
| "rewards/rejected": -0.00040514445158857477, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.10042553191489362, | |
| "grad_norm": 0.26544979214668274, | |
| "kl": 11.266656875610352, | |
| "learning_rate": 9.996847414880202e-08, | |
| "logits/chosen": -14902797.791245792, | |
| "logits/rejected": -16395014.176943699, | |
| "logps/chosen": -2.2833465678924663, | |
| "logps/rejected": -6.153056317722297, | |
| "loss": 0.4996, | |
| "rewards/chosen": -0.00040731574048096886, | |
| "rewards/margins": 0.000592714583635074, | |
| "rewards/rejected": -0.0010000303241160429, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.15063829787234043, | |
| "grad_norm": 0.6797279715538025, | |
| "kl": 3.9419214725494385, | |
| "learning_rate": 9.438839848675913e-08, | |
| "logits/chosen": -14784543.875486381, | |
| "logits/rejected": -16617535.61345988, | |
| "logps/chosen": -2.2231409688867947, | |
| "logps/rejected": -6.279140995739862, | |
| "loss": 0.4988, | |
| "rewards/chosen": -0.0022345268772733812, | |
| "rewards/margins": 0.003318870893000191, | |
| "rewards/rejected": -0.005553397770273572, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.20085106382978724, | |
| "grad_norm": 0.5770508050918579, | |
| "kl": 0.7956511974334717, | |
| "learning_rate": 8.880832282471626e-08, | |
| "logits/chosen": -14984102.078853047, | |
| "logits/rejected": -16609355.65171504, | |
| "logps/chosen": -2.5137440671202955, | |
| "logps/rejected": -6.364399530013193, | |
| "loss": 0.4975, | |
| "rewards/chosen": -0.006582849341908664, | |
| "rewards/margins": 0.007156510907123363, | |
| "rewards/rejected": -0.013739360249032027, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.251063829787234, | |
| "grad_norm": 0.5370281934738159, | |
| "kl": 0.05692530423402786, | |
| "learning_rate": 8.32282471626734e-08, | |
| "logits/chosen": -14969774.628571428, | |
| "logits/rejected": -16552960.0, | |
| "logps/chosen": -2.374119785853795, | |
| "logps/rejected": -6.513669296049736, | |
| "loss": 0.4945, | |
| "rewards/chosen": -0.012183338403701782, | |
| "rewards/margins": 0.01736304331833208, | |
| "rewards/rejected": -0.029546381722033863, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.30127659574468085, | |
| "grad_norm": 0.7874128222465515, | |
| "kl": 0.04236392676830292, | |
| "learning_rate": 7.76481715006305e-08, | |
| "logits/chosen": -14856485.372262774, | |
| "logits/rejected": -16540230.837127846, | |
| "logps/chosen": -2.2836047541486084, | |
| "logps/rejected": -6.687177187089536, | |
| "loss": 0.4906, | |
| "rewards/chosen": -0.021432671233685346, | |
| "rewards/margins": 0.029663742240055215, | |
| "rewards/rejected": -0.05109641347374056, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.35148936170212763, | |
| "grad_norm": 0.7597999572753906, | |
| "kl": 0.0, | |
| "learning_rate": 7.206809583858764e-08, | |
| "logits/chosen": -15033836.743362831, | |
| "logits/rejected": -16566120.551260505, | |
| "logps/chosen": -2.7796039412506914, | |
| "logps/rejected": -6.999364824054622, | |
| "loss": 0.4848, | |
| "rewards/chosen": -0.037276040136286645, | |
| "rewards/margins": 0.04471597297755264, | |
| "rewards/rejected": -0.08199201311383929, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.40170212765957447, | |
| "grad_norm": 1.355948567390442, | |
| "kl": 0.0319652259349823, | |
| "learning_rate": 6.648802017654477e-08, | |
| "logits/chosen": -15049823.426523298, | |
| "logits/rejected": -16636860.453825857, | |
| "logps/chosen": -3.031732155857975, | |
| "logps/rejected": -7.528678471306069, | |
| "loss": 0.4752, | |
| "rewards/chosen": -0.06634654110050543, | |
| "rewards/margins": 0.07183165772143553, | |
| "rewards/rejected": -0.13817819882194096, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.45191489361702125, | |
| "grad_norm": 0.9223116040229797, | |
| "kl": 0.0, | |
| "learning_rate": 6.090794451450188e-08, | |
| "logits/chosen": -15345349.56521739, | |
| "logits/rejected": -16821483.340350877, | |
| "logps/chosen": -3.2707338194916216, | |
| "logps/rejected": -8.287376644736842, | |
| "loss": 0.4649, | |
| "rewards/chosen": -0.10732046072033868, | |
| "rewards/margins": 0.09713628355734771, | |
| "rewards/rejected": -0.2044567442776864, | |
| "step": 1593 | |
| }, | |
| { | |
| "epoch": 0.502127659574468, | |
| "grad_norm": 0.8558508157730103, | |
| "kl": 0.0, | |
| "learning_rate": 5.5327868852459016e-08, | |
| "logits/chosen": -15458446.88372093, | |
| "logits/rejected": -16827832.373056997, | |
| "logps/chosen": -4.2163511764171515, | |
| "logps/rejected": -8.720990696513384, | |
| "loss": 0.4485, | |
| "rewards/chosen": -0.18739568725112796, | |
| "rewards/margins": 0.10582041852458987, | |
| "rewards/rejected": -0.29321610577571783, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5523404255319149, | |
| "grad_norm": 1.2379802465438843, | |
| "kl": 0.0, | |
| "learning_rate": 4.9747793190416137e-08, | |
| "logits/chosen": -15368077.450847458, | |
| "logits/rejected": -17055221.495093666, | |
| "logps/chosen": -5.225153105137712, | |
| "logps/rejected": -10.289466714986618, | |
| "loss": 0.4349, | |
| "rewards/chosen": -0.31121714963751324, | |
| "rewards/margins": 0.10698996959893858, | |
| "rewards/rejected": -0.4182071192364518, | |
| "step": 1947 | |
| }, | |
| { | |
| "epoch": 0.6025531914893617, | |
| "grad_norm": 2.636925220489502, | |
| "kl": 0.0, | |
| "learning_rate": 4.4167717528373264e-08, | |
| "logits/chosen": -15860050.707692308, | |
| "logits/rejected": -17018940.23529412, | |
| "logps/chosen": -6.8304584209735575, | |
| "logps/rejected": -12.103902532980104, | |
| "loss": 0.4076, | |
| "rewards/chosen": -0.4518470470721905, | |
| "rewards/margins": 0.14684447744662443, | |
| "rewards/rejected": -0.5986915245188149, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 0.6527659574468085, | |
| "grad_norm": 2.6684844493865967, | |
| "kl": 0.0, | |
| "learning_rate": 3.858764186633039e-08, | |
| "logits/chosen": -15603414.0, | |
| "logits/rejected": -17277662.455172412, | |
| "logps/chosen": -8.103792190551758, | |
| "logps/rejected": -14.19166049299569, | |
| "loss": 0.3791, | |
| "rewards/chosen": -0.6035651564598083, | |
| "rewards/margins": 0.18009341708545024, | |
| "rewards/rejected": -0.7836585735452586, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 0.7029787234042553, | |
| "grad_norm": 2.259669542312622, | |
| "kl": 0.0, | |
| "learning_rate": 3.300756620428751e-08, | |
| "logits/chosen": -15778993.082706766, | |
| "logits/rejected": -17491624.292173915, | |
| "logps/chosen": -9.979437118186091, | |
| "logps/rejected": -16.35701086956522, | |
| "loss": 0.357, | |
| "rewards/chosen": -0.7678221508972627, | |
| "rewards/margins": 0.25122187933371554, | |
| "rewards/rejected": -1.0190440302309782, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 0.7531914893617021, | |
| "grad_norm": 3.3658533096313477, | |
| "kl": 0.0, | |
| "learning_rate": 2.742749054224464e-08, | |
| "logits/chosen": -15436312.291970802, | |
| "logits/rejected": -17457363.614711035, | |
| "logps/chosen": -11.540958599452555, | |
| "logps/rejected": -18.07249138025394, | |
| "loss": 0.3366, | |
| "rewards/chosen": -0.9344464601391423, | |
| "rewards/margins": 0.2448034252729635, | |
| "rewards/rejected": -1.1792498854121058, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 0.8034042553191489, | |
| "grad_norm": 2.318241834640503, | |
| "kl": 0.0, | |
| "learning_rate": 2.1847414880201765e-08, | |
| "logits/chosen": -15859763.621399177, | |
| "logits/rejected": -17528114.41432225, | |
| "logps/chosen": -13.565882804462449, | |
| "logps/rejected": -20.141737265558397, | |
| "loss": 0.3189, | |
| "rewards/chosen": -1.1600869261188271, | |
| "rewards/margins": 0.21018625745827224, | |
| "rewards/rejected": -1.3702731835770994, | |
| "step": 2832 | |
| }, | |
| { | |
| "epoch": 0.8536170212765958, | |
| "grad_norm": 1.956950306892395, | |
| "kl": 0.0, | |
| "learning_rate": 1.626733921815889e-08, | |
| "logits/chosen": -15627038.68164794, | |
| "logits/rejected": -17529227.69712794, | |
| "logps/chosen": -15.227195253979401, | |
| "logps/rejected": -21.2347761640557, | |
| "loss": 0.3081, | |
| "rewards/chosen": -1.2809460814972495, | |
| "rewards/margins": 0.21129570738846626, | |
| "rewards/rejected": -1.4922417888857158, | |
| "step": 3009 | |
| }, | |
| { | |
| "epoch": 0.9038297872340425, | |
| "grad_norm": 2.3128018379211426, | |
| "kl": 0.0, | |
| "learning_rate": 1.0687263556116015e-08, | |
| "logits/chosen": -16287571.457875459, | |
| "logits/rejected": -17474259.877515312, | |
| "logps/chosen": -16.702005351419412, | |
| "logps/rejected": -22.0252146216098, | |
| "loss": 0.2956, | |
| "rewards/chosen": -1.4336266150841346, | |
| "rewards/margins": 0.1556206671159419, | |
| "rewards/rejected": -1.5892472822000765, | |
| "step": 3186 | |
| }, | |
| { | |
| "epoch": 0.9540425531914893, | |
| "grad_norm": 0.8989251255989075, | |
| "kl": 0.0, | |
| "learning_rate": 5.1071878940731394e-09, | |
| "logits/chosen": -15992458.158730159, | |
| "logits/rejected": -17564913.04467354, | |
| "logps/chosen": -16.995010618179563, | |
| "logps/rejected": -22.893974173109967, | |
| "loss": 0.2823, | |
| "rewards/chosen": -1.4715233454628596, | |
| "rewards/margins": 0.19740445351292446, | |
| "rewards/rejected": -1.668927798975784, | |
| "step": 3363 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 3525, | |
| "total_flos": 5.091091732590756e+17, | |
| "train_loss": 0.413429944193955, | |
| "train_runtime": 10515.385, | |
| "train_samples_per_second": 5.364, | |
| "train_steps_per_second": 0.335 | |
| } | |
| ], | |
| "logging_steps": 177, | |
| "max_steps": 3525, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.091091732590756e+17, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |