| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9999067251189254, | |
| "eval_steps": 500, | |
| "global_step": 2680, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.04999533625594627, | |
| "grad_norm": 5.705146789550781, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.5462052822113037, | |
| "logits/rejected": -0.46075063943862915, | |
| "logps/chosen": -69.6404800415039, | |
| "logps/rejected": -10.503483772277832, | |
| "loss": 0.69, | |
| "rewards/accuracies": 0.5541044473648071, | |
| "rewards/chosen": 0.005677139386534691, | |
| "rewards/margins": 0.006748478394001722, | |
| "rewards/rejected": -0.0010713385418057442, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.09999067251189254, | |
| "grad_norm": 4.263132572174072, | |
| "learning_rate": 1e-06, | |
| "logits/chosen": -0.5453211665153503, | |
| "logits/rejected": -0.4581734836101532, | |
| "logps/chosen": -71.13265228271484, | |
| "logps/rejected": -11.624285697937012, | |
| "loss": 0.6205, | |
| "rewards/accuracies": 0.9160447716712952, | |
| "rewards/chosen": 0.14199481904506683, | |
| "rewards/margins": 0.16203062236309052, | |
| "rewards/rejected": -0.020035814493894577, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.14998600876783882, | |
| "grad_norm": 1.5373331308364868, | |
| "learning_rate": 9.444444444444444e-07, | |
| "logits/chosen": -0.545798122882843, | |
| "logits/rejected": -0.4811278283596039, | |
| "logps/chosen": -61.10158920288086, | |
| "logps/rejected": -15.043923377990723, | |
| "loss": 0.3159, | |
| "rewards/accuracies": 0.9869402647018433, | |
| "rewards/chosen": 0.9139772653579712, | |
| "rewards/margins": 1.3145134449005127, | |
| "rewards/rejected": -0.4005362391471863, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.1999813450237851, | |
| "grad_norm": 0.5485444068908691, | |
| "learning_rate": 8.888888888888888e-07, | |
| "logits/chosen": -0.5420589447021484, | |
| "logits/rejected": -0.49538397789001465, | |
| "logps/chosen": -52.57987976074219, | |
| "logps/rejected": -23.705102920532227, | |
| "loss": 0.1156, | |
| "rewards/accuracies": 0.9906716346740723, | |
| "rewards/chosen": 1.417620301246643, | |
| "rewards/margins": 2.7980282306671143, | |
| "rewards/rejected": -1.3804079294204712, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.24997668127973136, | |
| "grad_norm": 0.19278554618358612, | |
| "learning_rate": 8.333333333333333e-07, | |
| "logits/chosen": -0.5059043765068054, | |
| "logits/rejected": -0.4366276264190674, | |
| "logps/chosen": -55.41277313232422, | |
| "logps/rejected": -34.412296295166016, | |
| "loss": 0.05, | |
| "rewards/accuracies": 0.9850745797157288, | |
| "rewards/chosen": 1.6597167253494263, | |
| "rewards/margins": 4.129410266876221, | |
| "rewards/rejected": -2.469693660736084, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.29997201753567765, | |
| "grad_norm": 0.5303434729576111, | |
| "learning_rate": 7.777777777777778e-07, | |
| "logits/chosen": -0.4820927679538727, | |
| "logits/rejected": -0.3845590353012085, | |
| "logps/chosen": -52.056922912597656, | |
| "logps/rejected": -41.084842681884766, | |
| "loss": 0.0391, | |
| "rewards/accuracies": 0.9860074520111084, | |
| "rewards/chosen": 1.6465966701507568, | |
| "rewards/margins": 4.822764873504639, | |
| "rewards/rejected": -3.1761679649353027, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.3499673537916239, | |
| "grad_norm": 0.06953659653663635, | |
| "learning_rate": 7.222222222222221e-07, | |
| "logits/chosen": -0.4638223350048065, | |
| "logits/rejected": -0.3506713807582855, | |
| "logps/chosen": -52.924964904785156, | |
| "logps/rejected": -46.31932830810547, | |
| "loss": 0.0416, | |
| "rewards/accuracies": 0.9850745797157288, | |
| "rewards/chosen": 1.6556929349899292, | |
| "rewards/margins": 5.256211757659912, | |
| "rewards/rejected": -3.6005189418792725, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.3999626900475702, | |
| "grad_norm": 0.13936500251293182, | |
| "learning_rate": 6.666666666666666e-07, | |
| "logits/chosen": -0.4642048180103302, | |
| "logits/rejected": -0.3347207307815552, | |
| "logps/chosen": -51.326290130615234, | |
| "logps/rejected": -47.818946838378906, | |
| "loss": 0.03, | |
| "rewards/accuracies": 0.9860074520111084, | |
| "rewards/chosen": 1.7104003429412842, | |
| "rewards/margins": 5.57481575012207, | |
| "rewards/rejected": -3.864415407180786, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.44995802630351645, | |
| "grad_norm": 0.10513754934072495, | |
| "learning_rate": 6.111111111111112e-07, | |
| "logits/chosen": -0.4259939193725586, | |
| "logits/rejected": -0.29308220744132996, | |
| "logps/chosen": -52.058380126953125, | |
| "logps/rejected": -50.53205871582031, | |
| "loss": 0.0353, | |
| "rewards/accuracies": 0.9813432693481445, | |
| "rewards/chosen": 1.7411428689956665, | |
| "rewards/margins": 5.778336048126221, | |
| "rewards/rejected": -4.037193775177002, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.4999533625594627, | |
| "grad_norm": 15.070347785949707, | |
| "learning_rate": 5.555555555555555e-07, | |
| "logits/chosen": -0.4217334985733032, | |
| "logits/rejected": -0.2778339684009552, | |
| "logps/chosen": -49.258907318115234, | |
| "logps/rejected": -52.084754943847656, | |
| "loss": 0.0386, | |
| "rewards/accuracies": 0.9785447716712952, | |
| "rewards/chosen": 1.7135344743728638, | |
| "rewards/margins": 5.933449745178223, | |
| "rewards/rejected": -4.21991491317749, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.549948698815409, | |
| "grad_norm": 0.07054832577705383, | |
| "learning_rate": 5e-07, | |
| "logits/chosen": -0.41517916321754456, | |
| "logits/rejected": -0.25792089104652405, | |
| "logps/chosen": -51.234615325927734, | |
| "logps/rejected": -53.174564361572266, | |
| "loss": 0.0263, | |
| "rewards/accuracies": 0.9850745797157288, | |
| "rewards/chosen": 1.7893245220184326, | |
| "rewards/margins": 6.14929723739624, | |
| "rewards/rejected": -4.3599724769592285, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 0.5999440350713553, | |
| "grad_norm": 1.686726689338684, | |
| "learning_rate": 4.444444444444444e-07, | |
| "logits/chosen": -0.3858945369720459, | |
| "logits/rejected": -0.22172169387340546, | |
| "logps/chosen": -51.00803756713867, | |
| "logps/rejected": -54.77092361450195, | |
| "loss": 0.0446, | |
| "rewards/accuracies": 0.9776118993759155, | |
| "rewards/chosen": 1.7425472736358643, | |
| "rewards/margins": 6.227110385894775, | |
| "rewards/rejected": -4.48456335067749, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 0.6499393713273015, | |
| "grad_norm": 6.180748462677002, | |
| "learning_rate": 3.888888888888889e-07, | |
| "logits/chosen": -0.3795308768749237, | |
| "logits/rejected": -0.213688462972641, | |
| "logps/chosen": -50.311119079589844, | |
| "logps/rejected": -57.47030258178711, | |
| "loss": 0.0178, | |
| "rewards/accuracies": 0.9944029450416565, | |
| "rewards/chosen": 1.7370685338974, | |
| "rewards/margins": 6.562605381011963, | |
| "rewards/rejected": -4.825536727905273, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 0.6999347075832478, | |
| "grad_norm": 1.3855689764022827, | |
| "learning_rate": 3.333333333333333e-07, | |
| "logits/chosen": -0.37974509596824646, | |
| "logits/rejected": -0.1997498720884323, | |
| "logps/chosen": -54.77206802368164, | |
| "logps/rejected": -58.50139617919922, | |
| "loss": 0.0329, | |
| "rewards/accuracies": 0.983208954334259, | |
| "rewards/chosen": 1.8076502084732056, | |
| "rewards/margins": 6.584301948547363, | |
| "rewards/rejected": -4.776651859283447, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 0.7499300438391941, | |
| "grad_norm": 3.921687364578247, | |
| "learning_rate": 2.7777777777777776e-07, | |
| "logits/chosen": -0.37658217549324036, | |
| "logits/rejected": -0.20967237651348114, | |
| "logps/chosen": -52.74870681762695, | |
| "logps/rejected": -60.60834503173828, | |
| "loss": 0.0284, | |
| "rewards/accuracies": 0.9878731369972229, | |
| "rewards/chosen": 1.7630212306976318, | |
| "rewards/margins": 6.771360397338867, | |
| "rewards/rejected": -5.008338928222656, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.7999253800951404, | |
| "grad_norm": 0.07866105437278748, | |
| "learning_rate": 2.222222222222222e-07, | |
| "logits/chosen": -0.3648207485675812, | |
| "logits/rejected": -0.1908709853887558, | |
| "logps/chosen": -52.76331329345703, | |
| "logps/rejected": -61.42716598510742, | |
| "loss": 0.0236, | |
| "rewards/accuracies": 0.9897387623786926, | |
| "rewards/chosen": 1.758689522743225, | |
| "rewards/margins": 6.900312900543213, | |
| "rewards/rejected": -5.141623497009277, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 0.8499207163510867, | |
| "grad_norm": 0.038292620331048965, | |
| "learning_rate": 1.6666666666666665e-07, | |
| "logits/chosen": -0.3632016181945801, | |
| "logits/rejected": -0.20296621322631836, | |
| "logps/chosen": -51.63236618041992, | |
| "logps/rejected": -64.2586898803711, | |
| "loss": 0.0385, | |
| "rewards/accuracies": 0.9822760820388794, | |
| "rewards/chosen": 1.6954851150512695, | |
| "rewards/margins": 6.922590732574463, | |
| "rewards/rejected": -5.227106094360352, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 0.8999160526070329, | |
| "grad_norm": 0.05809802561998367, | |
| "learning_rate": 1.111111111111111e-07, | |
| "logits/chosen": -0.3890366554260254, | |
| "logits/rejected": -0.21105322241783142, | |
| "logps/chosen": -52.29930114746094, | |
| "logps/rejected": -61.95232009887695, | |
| "loss": 0.0161, | |
| "rewards/accuracies": 0.9934701323509216, | |
| "rewards/chosen": 1.7759125232696533, | |
| "rewards/margins": 6.998130798339844, | |
| "rewards/rejected": -5.2222185134887695, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 0.9499113888629792, | |
| "grad_norm": 0.06595258414745331, | |
| "learning_rate": 5.555555555555555e-08, | |
| "logits/chosen": -0.37663742899894714, | |
| "logits/rejected": -0.19746284186840057, | |
| "logps/chosen": -52.22157669067383, | |
| "logps/rejected": -63.86149597167969, | |
| "loss": 0.0283, | |
| "rewards/accuracies": 0.9822760820388794, | |
| "rewards/chosen": 1.6980862617492676, | |
| "rewards/margins": 7.005319118499756, | |
| "rewards/rejected": -5.307232856750488, | |
| "step": 2546 | |
| }, | |
| { | |
| "epoch": 0.9999067251189254, | |
| "grad_norm": 0.06881717592477798, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -0.35994064807891846, | |
| "logits/rejected": -0.19044946134090424, | |
| "logps/chosen": -53.963340759277344, | |
| "logps/rejected": -64.53665924072266, | |
| "loss": 0.0188, | |
| "rewards/accuracies": 0.9925373196601868, | |
| "rewards/chosen": 1.7544020414352417, | |
| "rewards/margins": 7.084873676300049, | |
| "rewards/rejected": -5.330471515655518, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.9999067251189254, | |
| "step": 2680, | |
| "total_flos": 1.5261823890782945e+18, | |
| "train_loss": 0.11258992188012422, | |
| "train_runtime": 37304.1181, | |
| "train_samples_per_second": 0.575, | |
| "train_steps_per_second": 0.072 | |
| } | |
| ], | |
| "logging_steps": 134, | |
| "max_steps": 2680, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.5261823890782945e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |