| { | |
| "best_global_step": 98, | |
| "best_metric": 0.14128435, | |
| "best_model_checkpoint": "./output_dpo/v0-20260226-085120/checkpoint-98", | |
| "epoch": 1.9861635220125786, | |
| "eval_steps": 50, | |
| "global_step": 98, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02012578616352201, | |
| "grad_norm": 1.4377635717391968, | |
| "learning_rate": 2e-05, | |
| "logits/chosen": -1.7360858917236328, | |
| "logits/rejected": -1.7113451957702637, | |
| "logps/chosen": -111.01881408691406, | |
| "logps/rejected": -147.11973571777344, | |
| "loss": 1.319612741470337, | |
| "memory(GiB)": 239.65, | |
| "nll_loss": 0.6264656782150269, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1, | |
| "train_speed(iter/s)": 0.004523 | |
| }, | |
| { | |
| "epoch": 0.10062893081761007, | |
| "grad_norm": 1.4525984525680542, | |
| "learning_rate": 0.0001, | |
| "logits/chosen": -1.6965384483337402, | |
| "logits/rejected": -1.681287407875061, | |
| "logps/chosen": -111.46014404296875, | |
| "logps/rejected": -143.75, | |
| "loss": 1.361119270324707, | |
| "memory(GiB)": 239.65, | |
| "nll_loss": 0.6818519830703735, | |
| "rewards/accuracies": 0.5078125, | |
| "rewards/chosen": 0.04633765667676926, | |
| "rewards/margins": 0.02939797379076481, | |
| "rewards/rejected": 0.01693967543542385, | |
| "step": 5, | |
| "train_speed(iter/s)": 0.003936 | |
| }, | |
| { | |
| "epoch": 0.20125786163522014, | |
| "grad_norm": 1.0078742504119873, | |
| "learning_rate": 9.928848976574019e-05, | |
| "logits/chosen": -1.7403156757354736, | |
| "logits/rejected": -1.726575255393982, | |
| "logps/chosen": -92.17589569091797, | |
| "logps/rejected": -137.906005859375, | |
| "loss": 0.9127995491027832, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.5469792485237122, | |
| "rewards/accuracies": 0.925000011920929, | |
| "rewards/chosen": 2.0497705936431885, | |
| "rewards/margins": 1.318472146987915, | |
| "rewards/rejected": 0.7312980890274048, | |
| "step": 10, | |
| "train_speed(iter/s)": 0.003899 | |
| }, | |
| { | |
| "epoch": 0.3018867924528302, | |
| "grad_norm": 1.1189488172531128, | |
| "learning_rate": 9.717420893549902e-05, | |
| "logits/chosen": -1.8927457332611084, | |
| "logits/rejected": -1.8742872476577759, | |
| "logps/chosen": -56.06190872192383, | |
| "logps/rejected": -129.63563537597656, | |
| "loss": 0.5759311199188233, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.3772023618221283, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 5.639416694641113, | |
| "rewards/margins": 3.9854512214660645, | |
| "rewards/rejected": 1.6539649963378906, | |
| "step": 15, | |
| "train_speed(iter/s)": 0.003905 | |
| }, | |
| { | |
| "epoch": 0.4025157232704403, | |
| "grad_norm": 2.065215826034546, | |
| "learning_rate": 9.371733080722911e-05, | |
| "logits/chosen": -2.0726945400238037, | |
| "logits/rejected": -2.0517024993896484, | |
| "logps/chosen": -38.782867431640625, | |
| "logps/rejected": -141.28872680664062, | |
| "loss": 0.34540715217590334, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.24602404236793518, | |
| "rewards/accuracies": 0.9624999761581421, | |
| "rewards/chosen": 7.165956020355225, | |
| "rewards/margins": 7.171680450439453, | |
| "rewards/rejected": -0.005724119953811169, | |
| "step": 20, | |
| "train_speed(iter/s)": 0.003816 | |
| }, | |
| { | |
| "epoch": 0.5031446540880503, | |
| "grad_norm": 0.9638963937759399, | |
| "learning_rate": 8.90162395476046e-05, | |
| "logits/chosen": -2.205498456954956, | |
| "logits/rejected": -2.182650089263916, | |
| "logps/chosen": -34.5748405456543, | |
| "logps/rejected": -168.0699462890625, | |
| "loss": 0.29475107192993166, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.2225954234600067, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 7.66351842880249, | |
| "rewards/margins": 9.37935733795166, | |
| "rewards/rejected": -1.7158397436141968, | |
| "step": 25, | |
| "train_speed(iter/s)": 0.00375 | |
| }, | |
| { | |
| "epoch": 0.6037735849056604, | |
| "grad_norm": 0.5872039794921875, | |
| "learning_rate": 8.320473013836196e-05, | |
| "logits/chosen": -2.2474639415740967, | |
| "logits/rejected": -2.2216179370880127, | |
| "logps/chosen": -23.524024963378906, | |
| "logps/rejected": -159.84942626953125, | |
| "loss": 0.23147854804992676, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.18826261162757874, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 8.869623184204102, | |
| "rewards/margins": 10.225828170776367, | |
| "rewards/rejected": -1.3562055826187134, | |
| "step": 30, | |
| "train_speed(iter/s)": 0.003835 | |
| }, | |
| { | |
| "epoch": 0.7044025157232704, | |
| "grad_norm": 0.8212366700172424, | |
| "learning_rate": 7.644820051634812e-05, | |
| "logits/chosen": -2.2804150581359863, | |
| "logits/rejected": -2.2608768939971924, | |
| "logps/chosen": -20.996126174926758, | |
| "logps/rejected": -161.36029052734375, | |
| "loss": 0.1881607413291931, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.13474711775779724, | |
| "rewards/accuracies": 0.9750000238418579, | |
| "rewards/chosen": 9.420888900756836, | |
| "rewards/margins": 10.558382987976074, | |
| "rewards/rejected": -1.137495517730713, | |
| "step": 35, | |
| "train_speed(iter/s)": 0.003906 | |
| }, | |
| { | |
| "epoch": 0.8050314465408805, | |
| "grad_norm": 0.9303659200668335, | |
| "learning_rate": 6.89389442805288e-05, | |
| "logits/chosen": -2.2562363147735596, | |
| "logits/rejected": -2.2325804233551025, | |
| "logps/chosen": -26.601587295532227, | |
| "logps/rejected": -155.21389770507812, | |
| "loss": 0.21106297969818116, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.15431135892868042, | |
| "rewards/accuracies": 0.987500011920929, | |
| "rewards/chosen": 9.138971328735352, | |
| "rewards/margins": 9.48228931427002, | |
| "rewards/rejected": -0.34331730008125305, | |
| "step": 40, | |
| "train_speed(iter/s)": 0.003892 | |
| }, | |
| { | |
| "epoch": 0.9056603773584906, | |
| "grad_norm": 0.8759572505950928, | |
| "learning_rate": 6.0890677937442574e-05, | |
| "logits/chosen": -2.2504515647888184, | |
| "logits/rejected": -2.236832618713379, | |
| "logps/chosen": -24.932228088378906, | |
| "logps/rejected": -150.9632110595703, | |
| "loss": 0.21578831672668458, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.1573367863893509, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.940786361694336, | |
| "rewards/margins": 9.429086685180664, | |
| "rewards/rejected": -0.488300621509552, | |
| "step": 45, | |
| "train_speed(iter/s)": 0.003836 | |
| }, | |
| { | |
| "epoch": 1.020125786163522, | |
| "grad_norm": 1.6238784790039062, | |
| "learning_rate": 5.2532458441935636e-05, | |
| "logits/chosen": -2.3447046279907227, | |
| "logits/rejected": -2.316112995147705, | |
| "logps/chosen": -17.97600746154785, | |
| "logps/rejected": -169.5856475830078, | |
| "loss": 0.1865710735321045, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.1159815713763237, | |
| "rewards/accuracies": 0.9767441749572754, | |
| "rewards/chosen": 9.204967498779297, | |
| "rewards/margins": 11.411535263061523, | |
| "rewards/rejected": -2.2065672874450684, | |
| "step": 50, | |
| "train_speed(iter/s)": 0.003798 | |
| }, | |
| { | |
| "epoch": 1.020125786163522, | |
| "eval_logits/chosen": -2.462606191635132, | |
| "eval_logits/rejected": -2.437251091003418, | |
| "eval_logps/chosen": -19.061992645263672, | |
| "eval_logps/rejected": -184.38104248046875, | |
| "eval_loss": 0.1830219328403473, | |
| "eval_nll_loss": 0.17293420433998108, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 8.904085159301758, | |
| "eval_rewards/margins": 12.307174682617188, | |
| "eval_rewards/rejected": -3.4030885696411133, | |
| "eval_runtime": 55.6446, | |
| "eval_samples_per_second": 0.288, | |
| "eval_steps_per_second": 0.144, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.120754716981132, | |
| "grad_norm": 0.5176746249198914, | |
| "learning_rate": 4.410216414245771e-05, | |
| "logits/chosen": -2.3740134239196777, | |
| "logits/rejected": -2.3573694229125977, | |
| "logps/chosen": -26.2227840423584, | |
| "logps/rejected": -179.9822540283203, | |
| "loss": 0.19258421659469604, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.17000555992126465, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 8.813023567199707, | |
| "rewards/margins": 11.842904090881348, | |
| "rewards/rejected": -3.029881715774536, | |
| "step": 55, | |
| "train_speed(iter/s)": 0.00372 | |
| }, | |
| { | |
| "epoch": 1.221383647798742, | |
| "grad_norm": 0.6022250056266785, | |
| "learning_rate": 3.58397246658848e-05, | |
| "logits/chosen": -2.4972939491271973, | |
| "logits/rejected": -2.4699082374572754, | |
| "logps/chosen": -14.000228881835938, | |
| "logps/rejected": -196.9097442626953, | |
| "loss": 0.10635790824890137, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.08761530369520187, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.721292495727539, | |
| "rewards/margins": 14.493858337402344, | |
| "rewards/rejected": -4.772566795349121, | |
| "step": 60, | |
| "train_speed(iter/s)": 0.003743 | |
| }, | |
| { | |
| "epoch": 1.3220125786163521, | |
| "grad_norm": 0.2501760721206665, | |
| "learning_rate": 2.798029242211828e-05, | |
| "logits/chosen": -2.5347957611083984, | |
| "logits/rejected": -2.50445818901062, | |
| "logps/chosen": -23.887548446655273, | |
| "logps/rejected": -183.65591430664062, | |
| "loss": 0.18030774593353271, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.14212127029895782, | |
| "rewards/accuracies": 0.981249988079071, | |
| "rewards/chosen": 9.50097370147705, | |
| "rewards/margins": 12.938058853149414, | |
| "rewards/rejected": -3.4370861053466797, | |
| "step": 65, | |
| "train_speed(iter/s)": 0.003757 | |
| }, | |
| { | |
| "epoch": 1.4226415094339622, | |
| "grad_norm": 0.42134493589401245, | |
| "learning_rate": 2.074755007023461e-05, | |
| "logits/chosen": -2.5006675720214844, | |
| "logits/rejected": -2.478884220123291, | |
| "logps/chosen": -12.177281379699707, | |
| "logps/rejected": -190.2030487060547, | |
| "loss": 0.09010829329490662, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.07332514226436615, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 10.052157402038574, | |
| "rewards/margins": 13.963111877441406, | |
| "rewards/rejected": -3.910953998565674, | |
| "step": 70, | |
| "train_speed(iter/s)": 0.003774 | |
| }, | |
| { | |
| "epoch": 1.5232704402515722, | |
| "grad_norm": 0.5933993458747864, | |
| "learning_rate": 1.434734441843899e-05, | |
| "logits/chosen": -2.502887487411499, | |
| "logits/rejected": -2.486396551132202, | |
| "logps/chosen": -18.57794189453125, | |
| "logps/rejected": -170.333740234375, | |
| "loss": 0.13938431739807128, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.11240720748901367, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.312703132629395, | |
| "rewards/margins": 11.638362884521484, | |
| "rewards/rejected": -2.325660467147827, | |
| "step": 75, | |
| "train_speed(iter/s)": 0.00376 | |
| }, | |
| { | |
| "epoch": 1.6238993710691823, | |
| "grad_norm": 0.2634561061859131, | |
| "learning_rate": 8.961827939636196e-06, | |
| "logits/chosen": -2.5577776432037354, | |
| "logits/rejected": -2.5379796028137207, | |
| "logps/chosen": -16.603967666625977, | |
| "logps/rejected": -171.06466674804688, | |
| "loss": 0.10857141017913818, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.09158992022275925, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.913006782531738, | |
| "rewards/margins": 12.598286628723145, | |
| "rewards/rejected": -2.685279369354248, | |
| "step": 80, | |
| "train_speed(iter/s)": 0.003778 | |
| }, | |
| { | |
| "epoch": 1.7245283018867923, | |
| "grad_norm": 0.3385748267173767, | |
| "learning_rate": 4.744274637483936e-06, | |
| "logits/chosen": -2.562164783477783, | |
| "logits/rejected": -2.5376689434051514, | |
| "logps/chosen": -14.094012260437012, | |
| "logps/rejected": -163.73416137695312, | |
| "loss": 0.11240246295928955, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.09068052470684052, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 9.352496147155762, | |
| "rewards/margins": 11.771881103515625, | |
| "rewards/rejected": -2.419384479522705, | |
| "step": 85, | |
| "train_speed(iter/s)": 0.003777 | |
| }, | |
| { | |
| "epoch": 1.8251572327044026, | |
| "grad_norm": 0.3210693895816803, | |
| "learning_rate": 1.8147178055029579e-06, | |
| "logits/chosen": -2.602306842803955, | |
| "logits/rejected": -2.567457675933838, | |
| "logps/chosen": -17.956844329833984, | |
| "logps/rejected": -175.5157470703125, | |
| "loss": 0.11938213109970093, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.09758913516998291, | |
| "rewards/accuracies": 0.9937499761581421, | |
| "rewards/chosen": 9.622145652770996, | |
| "rewards/margins": 12.495707511901855, | |
| "rewards/rejected": -2.873561382293701, | |
| "step": 90, | |
| "train_speed(iter/s)": 0.003796 | |
| }, | |
| { | |
| "epoch": 1.9257861635220126, | |
| "grad_norm": 0.3329070210456848, | |
| "learning_rate": 2.5653383040524227e-07, | |
| "logits/chosen": -2.591177463531494, | |
| "logits/rejected": -2.568394422531128, | |
| "logps/chosen": -17.04227638244629, | |
| "logps/rejected": -188.1129913330078, | |
| "loss": 0.11813113689422608, | |
| "memory(GiB)": 284.39, | |
| "nll_loss": 0.10378739982843399, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 9.477154731750488, | |
| "rewards/margins": 13.704524040222168, | |
| "rewards/rejected": -4.227367877960205, | |
| "step": 95, | |
| "train_speed(iter/s)": 0.003803 | |
| }, | |
| { | |
| "epoch": 1.9861635220125786, | |
| "eval_logits/chosen": -2.6464767456054688, | |
| "eval_logits/rejected": -2.6153650283813477, | |
| "eval_logps/chosen": -15.376700401306152, | |
| "eval_logps/rejected": -193.30332946777344, | |
| "eval_loss": 0.14128434658050537, | |
| "eval_nll_loss": 0.14024823904037476, | |
| "eval_rewards/accuracies": 1.0, | |
| "eval_rewards/chosen": 9.272613525390625, | |
| "eval_rewards/margins": 13.567930221557617, | |
| "eval_rewards/rejected": -4.295315742492676, | |
| "eval_runtime": 55.5933, | |
| "eval_samples_per_second": 0.288, | |
| "eval_steps_per_second": 0.144, | |
| "step": 98 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 98, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5.261229460544324e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |