| { |
| "best_global_step": 100, |
| "best_metric": 0.6752368807792664, |
| "best_model_checkpoint": "models/dpo_fft_LFM2.5-1.2B-Instruct_argilla__distilabel-math-preference-dpo_20260222_210527/checkpoint-100", |
| "epoch": 0.6956521739130435, |
| "eval_steps": 100, |
| "global_step": 100, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06956521739130435, |
| "grad_norm": 87.0, |
| "learning_rate": 4.090909090909091e-07, |
| "logits/chosen": -1.0601829290390015, |
| "logits/rejected": -1.0425456762313843, |
| "logps/chosen": -332.2013244628906, |
| "logps/rejected": -333.1183776855469, |
| "loss": 0.6823273181915284, |
| "rewards/accuracies": 0.42500001192092896, |
| "rewards/chosen": 0.015406012535095215, |
| "rewards/margins": 0.03173117712140083, |
| "rewards/rejected": -0.01632516458630562, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.1391304347826087, |
| "grad_norm": 98.5, |
| "learning_rate": 8.636363636363636e-07, |
| "logits/chosen": -1.0965769290924072, |
| "logits/rejected": -1.0956510305404663, |
| "logps/chosen": -328.796875, |
| "logps/rejected": -312.0242919921875, |
| "loss": 0.6926839828491211, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.093757264316082, |
| "rewards/margins": 0.013911411166191101, |
| "rewards/rejected": 0.0798458456993103, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.20869565217391303, |
| "grad_norm": 96.5, |
| "learning_rate": 1.318181818181818e-06, |
| "logits/chosen": -1.1252676248550415, |
| "logits/rejected": -1.1598210334777832, |
| "logps/chosen": -326.04327392578125, |
| "logps/rejected": -303.9259033203125, |
| "loss": 0.7117842674255371, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.48031529784202576, |
| "rewards/margins": -0.010448494926095009, |
| "rewards/rejected": 0.49076375365257263, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.2782608695652174, |
| "grad_norm": 109.0, |
| "learning_rate": 1.7727272727272727e-06, |
| "logits/chosen": -1.0572926998138428, |
| "logits/rejected": -1.069678544998169, |
| "logps/chosen": -333.5104064941406, |
| "logps/rejected": -322.76116943359375, |
| "loss": 0.721163272857666, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 1.2552604675292969, |
| "rewards/margins": 0.020199721679091454, |
| "rewards/rejected": 1.2350608110427856, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.34782608695652173, |
| "grad_norm": 106.0, |
| "learning_rate": 1.99918061692433e-06, |
| "logits/chosen": -1.116310954093933, |
| "logits/rejected": -1.126555323600769, |
| "logps/chosen": -325.90625, |
| "logps/rejected": -320.7261047363281, |
| "loss": 0.7112587451934814, |
| "rewards/accuracies": 0.518750011920929, |
| "rewards/chosen": 0.9580303430557251, |
| "rewards/margins": 0.02043265663087368, |
| "rewards/rejected": 0.9375975728034973, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.41739130434782606, |
| "grad_norm": 134.0, |
| "learning_rate": 1.992633606781968e-06, |
| "logits/chosen": -1.0915653705596924, |
| "logits/rejected": -1.0714164972305298, |
| "logps/chosen": -335.96258544921875, |
| "logps/rejected": -329.37567138671875, |
| "loss": 0.6888086795806885, |
| "rewards/accuracies": 0.5062500238418579, |
| "rewards/chosen": 0.24013535678386688, |
| "rewards/margins": 0.025822216644883156, |
| "rewards/rejected": 0.21431314945220947, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.48695652173913045, |
| "grad_norm": 168.0, |
| "learning_rate": 1.9795824849893477e-06, |
| "logits/chosen": -1.124298334121704, |
| "logits/rejected": -1.1153584718704224, |
| "logps/chosen": -319.74371337890625, |
| "logps/rejected": -317.81964111328125, |
| "loss": 0.7498865127563477, |
| "rewards/accuracies": 0.45625001192092896, |
| "rewards/chosen": 0.3042285442352295, |
| "rewards/margins": -0.07379330694675446, |
| "rewards/rejected": 0.37802186608314514, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.5565217391304348, |
| "grad_norm": 93.5, |
| "learning_rate": 1.960112767443493e-06, |
| "logits/chosen": -1.1165910959243774, |
| "logits/rejected": -1.1083123683929443, |
| "logps/chosen": -314.81610107421875, |
| "logps/rejected": -312.41070556640625, |
| "loss": 0.67913818359375, |
| "rewards/accuracies": 0.581250011920929, |
| "rewards/chosen": 0.3251148760318756, |
| "rewards/margins": 0.07726944983005524, |
| "rewards/rejected": 0.24784541130065918, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.6260869565217392, |
| "grad_norm": 97.5, |
| "learning_rate": 1.9343520271137762e-06, |
| "logits/chosen": -1.0576120615005493, |
| "logits/rejected": -1.0416970252990723, |
| "logps/chosen": -333.35565185546875, |
| "logps/rejected": -329.2746276855469, |
| "loss": 0.6899321556091309, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": 1.0298190116882324, |
| "rewards/margins": 0.062107719480991364, |
| "rewards/rejected": 0.967711329460144, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "grad_norm": 106.0, |
| "learning_rate": 1.9024690581354698e-06, |
| "logits/chosen": -1.0332655906677246, |
| "logits/rejected": -1.0259943008422852, |
| "logps/chosen": -327.9278564453125, |
| "logps/rejected": -320.8587951660156, |
| "loss": 0.6782574653625488, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 1.1065479516983032, |
| "rewards/margins": 0.09923191368579865, |
| "rewards/rejected": 1.007315993309021, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.6956521739130435, |
| "eval_logits/chosen": -1.065671682357788, |
| "eval_logits/rejected": -1.0876761674880981, |
| "eval_logps/chosen": -315.0599670410156, |
| "eval_logps/rejected": -316.6776123046875, |
| "eval_loss": 0.6752368807792664, |
| "eval_rewards/accuracies": 0.5887096524238586, |
| "eval_rewards/chosen": 0.8812527060508728, |
| "eval_rewards/margins": 0.13870203495025635, |
| "eval_rewards/rejected": 0.7425506114959717, |
| "eval_runtime": 11.3291, |
| "eval_samples_per_second": 10.68, |
| "eval_steps_per_second": 2.736, |
| "step": 100 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 432, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|