| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 225, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.1342281879194631, | |
| "grad_norm": 37.765533447265625, | |
| "kl": 0.08906249701976776, | |
| "learning_rate": 9e-08, | |
| "logits/chosen": -11704729.6, | |
| "logits/rejected": -24988057.6, | |
| "logps/chosen": -343.025, | |
| "logps/rejected": -369.6, | |
| "loss": 0.4995, | |
| "num_unsafe": 0.5, | |
| "rewards/chosen": -0.000567626953125, | |
| "rewards/margins": 0.0042724609375, | |
| "rewards/rejected": -0.004840087890625, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.2684563758389262, | |
| "grad_norm": 45.17057418823242, | |
| "kl": 0.17304687201976776, | |
| "learning_rate": 1.8999999999999998e-07, | |
| "logits/chosen": 4589977.6, | |
| "logits/rejected": -1248870.4, | |
| "logps/chosen": -337.7, | |
| "logps/rejected": -336.45, | |
| "loss": 0.504, | |
| "num_unsafe": 0.699999988079071, | |
| "rewards/chosen": -0.0170135498046875, | |
| "rewards/margins": -0.03390350341796875, | |
| "rewards/rejected": 0.01688995361328125, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.40268456375838924, | |
| "grad_norm": 34.08852767944336, | |
| "kl": 0.09187011420726776, | |
| "learning_rate": 2.9e-07, | |
| "logits/chosen": 1790771.2, | |
| "logits/rejected": -5015142.4, | |
| "logps/chosen": -340.775, | |
| "logps/rejected": -351.55, | |
| "loss": 0.4883, | |
| "num_unsafe": 0.6499999761581421, | |
| "rewards/chosen": 0.020189189910888673, | |
| "rewards/margins": 0.09420499801635743, | |
| "rewards/rejected": -0.07401580810546875, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.5369127516778524, | |
| "grad_norm": 32.13209533691406, | |
| "kl": 0.02812499925494194, | |
| "learning_rate": 3.8999999999999997e-07, | |
| "logits/chosen": -3015884.8, | |
| "logits/rejected": -15467315.2, | |
| "logps/chosen": -384.05, | |
| "logps/rejected": -349.35, | |
| "loss": 0.459, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 0.0526611328125, | |
| "rewards/margins": 0.33935546875, | |
| "rewards/rejected": -0.2866943359375, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.6711409395973155, | |
| "grad_norm": 27.443809509277344, | |
| "kl": 0.140625, | |
| "learning_rate": 4.9e-07, | |
| "logits/chosen": 8273305.6, | |
| "logits/rejected": -5085593.6, | |
| "logps/chosen": -382.5, | |
| "logps/rejected": -387.775, | |
| "loss": 0.3974, | |
| "num_unsafe": 0.25, | |
| "rewards/chosen": 0.202978515625, | |
| "rewards/margins": 0.955419921875, | |
| "rewards/rejected": -0.75244140625, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.8053691275167785, | |
| "grad_norm": 27.648706436157227, | |
| "kl": 0.015625, | |
| "learning_rate": 5.9e-07, | |
| "logits/chosen": 4796211.2, | |
| "logits/rejected": -1143603.2, | |
| "logps/chosen": -316.45, | |
| "logps/rejected": -333.0, | |
| "loss": 0.3345, | |
| "num_unsafe": 0.4000000059604645, | |
| "rewards/chosen": 0.3970947265625, | |
| "rewards/margins": 1.7534423828125, | |
| "rewards/rejected": -1.35634765625, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.9395973154362416, | |
| "grad_norm": 18.576852798461914, | |
| "kl": 0.07734374701976776, | |
| "learning_rate": 6.9e-07, | |
| "logits/chosen": 330137.6, | |
| "logits/rejected": -10493440.0, | |
| "logps/chosen": -304.325, | |
| "logps/rejected": -347.8, | |
| "loss": 0.2515, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 0.79794921875, | |
| "rewards/margins": 3.04169921875, | |
| "rewards/rejected": -2.24375, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 1.0671140939597314, | |
| "grad_norm": 15.856501579284668, | |
| "kl": 0.15131579339504242, | |
| "learning_rate": 7.9e-07, | |
| "logits/chosen": 3814022.736842105, | |
| "logits/rejected": -5148456.421052632, | |
| "logps/chosen": -327.3421052631579, | |
| "logps/rejected": -383.7368421052632, | |
| "loss": 0.2113, | |
| "num_unsafe": 0.5789473652839661, | |
| "rewards/chosen": 1.1128957648026316, | |
| "rewards/margins": 3.958701685855263, | |
| "rewards/rejected": -2.8458059210526314, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.2013422818791946, | |
| "grad_norm": 22.200435638427734, | |
| "kl": 0.07187499850988388, | |
| "learning_rate": 8.9e-07, | |
| "logits/chosen": 5061427.2, | |
| "logits/rejected": -9743462.4, | |
| "logps/chosen": -325.575, | |
| "logps/rejected": -370.55, | |
| "loss": 0.161, | |
| "num_unsafe": 0.699999988079071, | |
| "rewards/chosen": 1.821484375, | |
| "rewards/margins": 5.42890625, | |
| "rewards/rejected": -3.607421875, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.3355704697986577, | |
| "grad_norm": 11.401748657226562, | |
| "kl": 0.0, | |
| "learning_rate": 9.9e-07, | |
| "logits/chosen": 7218790.4, | |
| "logits/rejected": 6960742.4, | |
| "logps/chosen": -260.6125, | |
| "logps/rejected": -385.0, | |
| "loss": 0.1304, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 2.35836181640625, | |
| "rewards/margins": 6.27554931640625, | |
| "rewards/rejected": -3.9171875, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.4697986577181208, | |
| "grad_norm": 5.563471794128418, | |
| "kl": 0.5796874761581421, | |
| "learning_rate": 9.872634363932886e-07, | |
| "logits/chosen": 4330291.2, | |
| "logits/rejected": -9574809.6, | |
| "logps/chosen": -400.125, | |
| "logps/rejected": -395.825, | |
| "loss": 0.1417, | |
| "num_unsafe": 0.550000011920929, | |
| "rewards/chosen": 2.4845703125, | |
| "rewards/margins": 5.8267578125, | |
| "rewards/rejected": -3.3421875, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.604026845637584, | |
| "grad_norm": 0.8229545950889587, | |
| "kl": 1.169921875, | |
| "learning_rate": 9.440682244067722e-07, | |
| "logits/chosen": 14943027.2, | |
| "logits/rejected": 602931.2, | |
| "logps/chosen": -302.4875, | |
| "logps/rejected": -388.95, | |
| "loss": 0.0933, | |
| "num_unsafe": 0.5, | |
| "rewards/chosen": 3.31484375, | |
| "rewards/margins": 7.7796875, | |
| "rewards/rejected": -4.46484375, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.738255033557047, | |
| "grad_norm": 0.960098385810852, | |
| "kl": 0.125, | |
| "learning_rate": 8.729705727120911e-07, | |
| "logits/chosen": 12668518.4, | |
| "logits/rejected": 3657728.0, | |
| "logps/chosen": -342.75, | |
| "logps/rejected": -402.15, | |
| "loss": 0.1193, | |
| "num_unsafe": 0.25, | |
| "rewards/chosen": 2.979296875, | |
| "rewards/margins": 7.533984375, | |
| "rewards/rejected": -4.5546875, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.87248322147651, | |
| "grad_norm": 8.306931495666504, | |
| "kl": 0.71875, | |
| "learning_rate": 7.78437808244094e-07, | |
| "logits/chosen": 2376089.6, | |
| "logits/rejected": -10040934.4, | |
| "logps/chosen": -286.5, | |
| "logps/rejected": -374.8, | |
| "loss": 0.0991, | |
| "num_unsafe": 0.550000011920929, | |
| "rewards/chosen": 3.276904296875, | |
| "rewards/margins": 8.284716796875, | |
| "rewards/rejected": -5.0078125, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 1.439923882484436, | |
| "kl": 0.08018092066049576, | |
| "learning_rate": 6.664097722614933e-07, | |
| "logits/chosen": 8874954.105263159, | |
| "logits/rejected": 2802310.736842105, | |
| "logps/chosen": -302.2631578947368, | |
| "logps/rejected": -389.42105263157896, | |
| "loss": 0.0798, | |
| "num_unsafe": 0.6315789222717285, | |
| "rewards/chosen": 3.531661184210526, | |
| "rewards/margins": 8.710115131578947, | |
| "rewards/rejected": -5.178453947368421, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 2.134228187919463, | |
| "grad_norm": 3.9872214794158936, | |
| "kl": 0.16249999403953552, | |
| "learning_rate": 5.439255982753717e-07, | |
| "logits/chosen": -1116569.6, | |
| "logits/rejected": -15123660.8, | |
| "logps/chosen": -307.3375, | |
| "logps/rejected": -428.3, | |
| "loss": 0.0827, | |
| "num_unsafe": 0.5, | |
| "rewards/chosen": 3.5447265625, | |
| "rewards/margins": 9.397851562500001, | |
| "rewards/rejected": -5.853125, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 2.2684563758389262, | |
| "grad_norm": 1.704241156578064, | |
| "kl": 0.24687500298023224, | |
| "learning_rate": 4.1868141740255817e-07, | |
| "logits/chosen": 15165849.6, | |
| "logits/rejected": 8513945.6, | |
| "logps/chosen": -305.3875, | |
| "logps/rejected": -393.5, | |
| "loss": 0.0919, | |
| "num_unsafe": 0.699999988079071, | |
| "rewards/chosen": 3.2162109375, | |
| "rewards/margins": 8.9154296875, | |
| "rewards/rejected": -5.69921875, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.402684563758389, | |
| "grad_norm": 1.0867478847503662, | |
| "kl": 0.0, | |
| "learning_rate": 2.985467821431687e-07, | |
| "logits/chosen": 13270835.2, | |
| "logits/rejected": 5517721.6, | |
| "logps/chosen": -308.5625, | |
| "logps/rejected": -415.5, | |
| "loss": 0.0916, | |
| "num_unsafe": 0.6499999761581421, | |
| "rewards/chosen": 3.23125, | |
| "rewards/margins": 9.74296875, | |
| "rewards/rejected": -6.51171875, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.5369127516778525, | |
| "grad_norm": 0.9046293497085571, | |
| "kl": 0.12187500298023224, | |
| "learning_rate": 1.9107019345483288e-07, | |
| "logits/chosen": 5835980.8, | |
| "logits/rejected": -6291456.0, | |
| "logps/chosen": -355.0, | |
| "logps/rejected": -414.2, | |
| "loss": 0.0807, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 2.9845703125, | |
| "rewards/margins": 9.7416015625, | |
| "rewards/rejected": -6.75703125, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.6711409395973154, | |
| "grad_norm": 0.7929665446281433, | |
| "kl": 0.02500000037252903, | |
| "learning_rate": 1.030048006760823e-07, | |
| "logits/chosen": 15709798.4, | |
| "logits/rejected": 2888089.6, | |
| "logps/chosen": -350.425, | |
| "logps/rejected": -444.375, | |
| "loss": 0.1004, | |
| "num_unsafe": 0.25, | |
| "rewards/chosen": 3.40361328125, | |
| "rewards/margins": 9.811425781250001, | |
| "rewards/rejected": -6.4078125, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.8053691275167782, | |
| "grad_norm": 6.052427291870117, | |
| "kl": 0.012500000186264515, | |
| "learning_rate": 3.9884076317064807e-08, | |
| "logits/chosen": 9807872.0, | |
| "logits/rejected": 3389440.0, | |
| "logps/chosen": -287.175, | |
| "logps/rejected": -379.05, | |
| "loss": 0.0973, | |
| "num_unsafe": 0.4000000059604645, | |
| "rewards/chosen": 3.32802734375, | |
| "rewards/margins": 9.24248046875, | |
| "rewards/rejected": -5.914453125, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.9395973154362416, | |
| "grad_norm": 0.8664066195487976, | |
| "kl": 0.503125011920929, | |
| "learning_rate": 5.674127631043024e-09, | |
| "logits/chosen": 4027187.2, | |
| "logits/rejected": -7430963.2, | |
| "logps/chosen": -275.8, | |
| "logps/rejected": -387.6, | |
| "loss": 0.0668, | |
| "num_unsafe": 0.6000000238418579, | |
| "rewards/chosen": 3.6443359375, | |
| "rewards/margins": 9.8779296875, | |
| "rewards/rejected": -6.23359375, | |
| "step": 220 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 225, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 300.0, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |