{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 225, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1342281879194631, "grad_norm": 37.765533447265625, "kl": 0.08906249701976776, "learning_rate": 9e-08, "logits/chosen": -11704729.6, "logits/rejected": -24988057.6, "logps/chosen": -343.025, "logps/rejected": -369.6, "loss": 0.4995, "num_unsafe": 0.5, "rewards/chosen": -0.000567626953125, "rewards/margins": 0.0042724609375, "rewards/rejected": -0.004840087890625, "step": 10 }, { "epoch": 0.2684563758389262, "grad_norm": 45.17057418823242, "kl": 0.17304687201976776, "learning_rate": 1.8999999999999998e-07, "logits/chosen": 4589977.6, "logits/rejected": -1248870.4, "logps/chosen": -337.7, "logps/rejected": -336.45, "loss": 0.504, "num_unsafe": 0.699999988079071, "rewards/chosen": -0.0170135498046875, "rewards/margins": -0.03390350341796875, "rewards/rejected": 0.01688995361328125, "step": 20 }, { "epoch": 0.40268456375838924, "grad_norm": 34.08852767944336, "kl": 0.09187011420726776, "learning_rate": 2.9e-07, "logits/chosen": 1790771.2, "logits/rejected": -5015142.4, "logps/chosen": -340.775, "logps/rejected": -351.55, "loss": 0.4883, "num_unsafe": 0.6499999761581421, "rewards/chosen": 0.020189189910888673, "rewards/margins": 0.09420499801635743, "rewards/rejected": -0.07401580810546875, "step": 30 }, { "epoch": 0.5369127516778524, "grad_norm": 32.13209533691406, "kl": 0.02812499925494194, "learning_rate": 3.8999999999999997e-07, "logits/chosen": -3015884.8, "logits/rejected": -15467315.2, "logps/chosen": -384.05, "logps/rejected": -349.35, "loss": 0.459, "num_unsafe": 0.6000000238418579, "rewards/chosen": 0.0526611328125, "rewards/margins": 0.33935546875, "rewards/rejected": -0.2866943359375, "step": 40 }, { "epoch": 0.6711409395973155, "grad_norm": 27.443809509277344, "kl": 0.140625, "learning_rate": 4.9e-07, "logits/chosen": 8273305.6, "logits/rejected": -5085593.6, "logps/chosen": -382.5, "logps/rejected": -387.775, "loss": 0.3974, "num_unsafe": 0.25, "rewards/chosen": 0.202978515625, "rewards/margins": 0.955419921875, "rewards/rejected": -0.75244140625, "step": 50 }, { "epoch": 0.8053691275167785, "grad_norm": 27.648706436157227, "kl": 0.015625, "learning_rate": 5.9e-07, "logits/chosen": 4796211.2, "logits/rejected": -1143603.2, "logps/chosen": -316.45, "logps/rejected": -333.0, "loss": 0.3345, "num_unsafe": 0.4000000059604645, "rewards/chosen": 0.3970947265625, "rewards/margins": 1.7534423828125, "rewards/rejected": -1.35634765625, "step": 60 }, { "epoch": 0.9395973154362416, "grad_norm": 18.576852798461914, "kl": 0.07734374701976776, "learning_rate": 6.9e-07, "logits/chosen": 330137.6, "logits/rejected": -10493440.0, "logps/chosen": -304.325, "logps/rejected": -347.8, "loss": 0.2515, "num_unsafe": 0.6000000238418579, "rewards/chosen": 0.79794921875, "rewards/margins": 3.04169921875, "rewards/rejected": -2.24375, "step": 70 }, { "epoch": 1.0671140939597314, "grad_norm": 15.856501579284668, "kl": 0.15131579339504242, "learning_rate": 7.9e-07, "logits/chosen": 3814022.736842105, "logits/rejected": -5148456.421052632, "logps/chosen": -327.3421052631579, "logps/rejected": -383.7368421052632, "loss": 0.2113, "num_unsafe": 0.5789473652839661, "rewards/chosen": 1.1128957648026316, "rewards/margins": 3.958701685855263, "rewards/rejected": -2.8458059210526314, "step": 80 }, { "epoch": 1.2013422818791946, "grad_norm": 22.200435638427734, "kl": 0.07187499850988388, "learning_rate": 8.9e-07, "logits/chosen": 5061427.2, "logits/rejected": -9743462.4, "logps/chosen": -325.575, "logps/rejected": -370.55, "loss": 0.161, "num_unsafe": 0.699999988079071, "rewards/chosen": 1.821484375, "rewards/margins": 5.42890625, "rewards/rejected": -3.607421875, "step": 90 }, { "epoch": 1.3355704697986577, "grad_norm": 11.401748657226562, "kl": 0.0, "learning_rate": 9.9e-07, "logits/chosen": 7218790.4, "logits/rejected": 6960742.4, "logps/chosen": -260.6125, "logps/rejected": -385.0, "loss": 0.1304, "num_unsafe": 0.6000000238418579, "rewards/chosen": 2.35836181640625, "rewards/margins": 6.27554931640625, "rewards/rejected": -3.9171875, "step": 100 }, { "epoch": 1.4697986577181208, "grad_norm": 5.563471794128418, "kl": 0.5796874761581421, "learning_rate": 9.872634363932886e-07, "logits/chosen": 4330291.2, "logits/rejected": -9574809.6, "logps/chosen": -400.125, "logps/rejected": -395.825, "loss": 0.1417, "num_unsafe": 0.550000011920929, "rewards/chosen": 2.4845703125, "rewards/margins": 5.8267578125, "rewards/rejected": -3.3421875, "step": 110 }, { "epoch": 1.604026845637584, "grad_norm": 0.8229545950889587, "kl": 1.169921875, "learning_rate": 9.440682244067722e-07, "logits/chosen": 14943027.2, "logits/rejected": 602931.2, "logps/chosen": -302.4875, "logps/rejected": -388.95, "loss": 0.0933, "num_unsafe": 0.5, "rewards/chosen": 3.31484375, "rewards/margins": 7.7796875, "rewards/rejected": -4.46484375, "step": 120 }, { "epoch": 1.738255033557047, "grad_norm": 0.960098385810852, "kl": 0.125, "learning_rate": 8.729705727120911e-07, "logits/chosen": 12668518.4, "logits/rejected": 3657728.0, "logps/chosen": -342.75, "logps/rejected": -402.15, "loss": 0.1193, "num_unsafe": 0.25, "rewards/chosen": 2.979296875, "rewards/margins": 7.533984375, "rewards/rejected": -4.5546875, "step": 130 }, { "epoch": 1.87248322147651, "grad_norm": 8.306931495666504, "kl": 0.71875, "learning_rate": 7.78437808244094e-07, "logits/chosen": 2376089.6, "logits/rejected": -10040934.4, "logps/chosen": -286.5, "logps/rejected": -374.8, "loss": 0.0991, "num_unsafe": 0.550000011920929, "rewards/chosen": 3.276904296875, "rewards/margins": 8.284716796875, "rewards/rejected": -5.0078125, "step": 140 }, { "epoch": 2.0, "grad_norm": 1.439923882484436, "kl": 0.08018092066049576, "learning_rate": 6.664097722614933e-07, "logits/chosen": 8874954.105263159, "logits/rejected": 2802310.736842105, "logps/chosen": -302.2631578947368, "logps/rejected": -389.42105263157896, "loss": 0.0798, "num_unsafe": 0.6315789222717285, "rewards/chosen": 3.531661184210526, "rewards/margins": 8.710115131578947, "rewards/rejected": -5.178453947368421, "step": 150 }, { "epoch": 2.134228187919463, "grad_norm": 3.9872214794158936, "kl": 0.16249999403953552, "learning_rate": 5.439255982753717e-07, "logits/chosen": -1116569.6, "logits/rejected": -15123660.8, "logps/chosen": -307.3375, "logps/rejected": -428.3, "loss": 0.0827, "num_unsafe": 0.5, "rewards/chosen": 3.5447265625, "rewards/margins": 9.397851562500001, "rewards/rejected": -5.853125, "step": 160 }, { "epoch": 2.2684563758389262, "grad_norm": 1.704241156578064, "kl": 0.24687500298023224, "learning_rate": 4.1868141740255817e-07, "logits/chosen": 15165849.6, "logits/rejected": 8513945.6, "logps/chosen": -305.3875, "logps/rejected": -393.5, "loss": 0.0919, "num_unsafe": 0.699999988079071, "rewards/chosen": 3.2162109375, "rewards/margins": 8.9154296875, "rewards/rejected": -5.69921875, "step": 170 }, { "epoch": 2.402684563758389, "grad_norm": 1.0867478847503662, "kl": 0.0, "learning_rate": 2.985467821431687e-07, "logits/chosen": 13270835.2, "logits/rejected": 5517721.6, "logps/chosen": -308.5625, "logps/rejected": -415.5, "loss": 0.0916, "num_unsafe": 0.6499999761581421, "rewards/chosen": 3.23125, "rewards/margins": 9.74296875, "rewards/rejected": -6.51171875, "step": 180 }, { "epoch": 2.5369127516778525, "grad_norm": 0.9046293497085571, "kl": 0.12187500298023224, "learning_rate": 1.9107019345483288e-07, "logits/chosen": 5835980.8, "logits/rejected": -6291456.0, "logps/chosen": -355.0, "logps/rejected": -414.2, "loss": 0.0807, "num_unsafe": 0.6000000238418579, "rewards/chosen": 2.9845703125, "rewards/margins": 9.7416015625, "rewards/rejected": -6.75703125, "step": 190 }, { "epoch": 2.6711409395973154, "grad_norm": 0.7929665446281433, "kl": 0.02500000037252903, "learning_rate": 1.030048006760823e-07, "logits/chosen": 15709798.4, "logits/rejected": 2888089.6, "logps/chosen": -350.425, "logps/rejected": -444.375, "loss": 0.1004, "num_unsafe": 0.25, "rewards/chosen": 3.40361328125, "rewards/margins": 9.811425781250001, "rewards/rejected": -6.4078125, "step": 200 }, { "epoch": 2.8053691275167782, "grad_norm": 6.052427291870117, "kl": 0.012500000186264515, "learning_rate": 3.9884076317064807e-08, "logits/chosen": 9807872.0, "logits/rejected": 3389440.0, "logps/chosen": -287.175, "logps/rejected": -379.05, "loss": 0.0973, "num_unsafe": 0.4000000059604645, "rewards/chosen": 3.32802734375, "rewards/margins": 9.24248046875, "rewards/rejected": -5.914453125, "step": 210 }, { "epoch": 2.9395973154362416, "grad_norm": 0.8664066195487976, "kl": 0.503125011920929, "learning_rate": 5.674127631043024e-09, "logits/chosen": 4027187.2, "logits/rejected": -7430963.2, "logps/chosen": -275.8, "logps/rejected": -387.6, "loss": 0.0668, "num_unsafe": 0.6000000238418579, "rewards/chosen": 3.6443359375, "rewards/margins": 9.8779296875, "rewards/rejected": -6.23359375, "step": 220 } ], "logging_steps": 10, "max_steps": 225, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 300.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }