| { | |
| "best_metric": 1.4484930038452148, | |
| "best_model_checkpoint": "saves/Falcon-7B-Instruct/lora/orpo-salt/checkpoint-1500", | |
| "epoch": 2.9969690846635686, | |
| "eval_steps": 500, | |
| "global_step": 1854, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01616488179430188, | |
| "grad_norm": 0.5467122793197632, | |
| "learning_rate": 4.999648198770648e-06, | |
| "logits/chosen": -14.078092575073242, | |
| "logits/rejected": -14.159353256225586, | |
| "logps/chosen": -1.7583353519439697, | |
| "logps/rejected": -1.8469493389129639, | |
| "loss": 1.8299, | |
| "odds_ratio_loss": 0.7155797481536865, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.17583352327346802, | |
| "rewards/margins": 0.008861413225531578, | |
| "rewards/rejected": -0.18469493091106415, | |
| "sft_loss": 1.7583353519439697, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03232976358860376, | |
| "grad_norm": 0.495731920003891, | |
| "learning_rate": 4.998578646361359e-06, | |
| "logits/chosen": -14.073513984680176, | |
| "logits/rejected": -14.144752502441406, | |
| "logps/chosen": -1.9236218929290771, | |
| "logps/rejected": -1.9451425075531006, | |
| "loss": 2.0003, | |
| "odds_ratio_loss": 0.766566812992096, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1923622190952301, | |
| "rewards/margins": 0.002152049448341131, | |
| "rewards/rejected": -0.19451424479484558, | |
| "sft_loss": 1.9236218929290771, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04849464538290564, | |
| "grad_norm": 0.6057537198066711, | |
| "learning_rate": 4.996791614004449e-06, | |
| "logits/chosen": -14.302851676940918, | |
| "logits/rejected": -14.224812507629395, | |
| "logps/chosen": -1.8387420177459717, | |
| "logps/rejected": -1.910175085067749, | |
| "loss": 1.9128, | |
| "odds_ratio_loss": 0.7409650087356567, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1838742196559906, | |
| "rewards/margins": 0.007143297698348761, | |
| "rewards/rejected": -0.1910175085067749, | |
| "sft_loss": 1.8387420177459717, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06465952717720752, | |
| "grad_norm": 0.5634093284606934, | |
| "learning_rate": 4.994287614855618e-06, | |
| "logits/chosen": -14.0798921585083, | |
| "logits/rejected": -14.19922161102295, | |
| "logps/chosen": -1.947654366493225, | |
| "logps/rejected": -1.9009010791778564, | |
| "loss": 2.0298, | |
| "odds_ratio_loss": 0.8212669491767883, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.19476543366909027, | |
| "rewards/margins": -0.004675320815294981, | |
| "rewards/rejected": -0.1900901347398758, | |
| "sft_loss": 1.947654366493225, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.0808244089715094, | |
| "grad_norm": 0.7957186698913574, | |
| "learning_rate": 4.991067367951343e-06, | |
| "logits/chosen": -14.371423721313477, | |
| "logits/rejected": -14.266546249389648, | |
| "logps/chosen": -2.017087697982788, | |
| "logps/rejected": -2.0035624504089355, | |
| "loss": 2.0958, | |
| "odds_ratio_loss": 0.7871265411376953, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.20170876383781433, | |
| "rewards/margins": -0.0013525458052754402, | |
| "rewards/rejected": -0.20035621523857117, | |
| "sft_loss": 2.017087697982788, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09698929076581128, | |
| "grad_norm": 0.5418820381164551, | |
| "learning_rate": 4.987131798002389e-06, | |
| "logits/chosen": -14.21721076965332, | |
| "logits/rejected": -14.099153518676758, | |
| "logps/chosen": -1.8751760721206665, | |
| "logps/rejected": -1.8855310678482056, | |
| "loss": 1.9577, | |
| "odds_ratio_loss": 0.8254929780960083, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.18751761317253113, | |
| "rewards/margins": 0.001035516383126378, | |
| "rewards/rejected": -0.188553124666214, | |
| "sft_loss": 1.8751760721206665, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.11315417256011315, | |
| "grad_norm": 1.0633864402770996, | |
| "learning_rate": 4.982482035128285e-06, | |
| "logits/chosen": -14.105901718139648, | |
| "logits/rejected": -14.193835258483887, | |
| "logps/chosen": -2.0220446586608887, | |
| "logps/rejected": -1.9594541788101196, | |
| "loss": 2.1089, | |
| "odds_ratio_loss": 0.8683654069900513, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.20220446586608887, | |
| "rewards/margins": -0.00625905767083168, | |
| "rewards/rejected": -0.19594541192054749, | |
| "sft_loss": 2.0220446586608887, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.12931905435441504, | |
| "grad_norm": 1.0158140659332275, | |
| "learning_rate": 4.9771194145328e-06, | |
| "logits/chosen": -14.075093269348145, | |
| "logits/rejected": -14.02421760559082, | |
| "logps/chosen": -1.6751682758331299, | |
| "logps/rejected": -1.7500627040863037, | |
| "loss": 1.7468, | |
| "odds_ratio_loss": 0.716758668422699, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.167516827583313, | |
| "rewards/margins": 0.00748945539817214, | |
| "rewards/rejected": -0.17500628530979156, | |
| "sft_loss": 1.6751682758331299, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.1454839361487169, | |
| "grad_norm": 1.3243364095687866, | |
| "learning_rate": 4.971045476120532e-06, | |
| "logits/chosen": -14.14300537109375, | |
| "logits/rejected": -14.079290390014648, | |
| "logps/chosen": -1.8245623111724854, | |
| "logps/rejected": -1.760660171508789, | |
| "loss": 1.9067, | |
| "odds_ratio_loss": 0.8211291432380676, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.18245622515678406, | |
| "rewards/margins": -0.006390226539224386, | |
| "rewards/rejected": -0.17606601119041443, | |
| "sft_loss": 1.8245623111724854, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.1616488179430188, | |
| "grad_norm": 0.7163342237472534, | |
| "learning_rate": 4.964261964054713e-06, | |
| "logits/chosen": -14.068964958190918, | |
| "logits/rejected": -14.082951545715332, | |
| "logps/chosen": -1.7527011632919312, | |
| "logps/rejected": -1.8138408660888672, | |
| "loss": 1.8297, | |
| "odds_ratio_loss": 0.7703070044517517, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.17527012526988983, | |
| "rewards/margins": 0.0061139510944485664, | |
| "rewards/rejected": -0.18138407170772552, | |
| "sft_loss": 1.7527011632919312, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17781369973732067, | |
| "grad_norm": 1.006773829460144, | |
| "learning_rate": 4.956770826256372e-06, | |
| "logits/chosen": -14.166906356811523, | |
| "logits/rejected": -14.120782852172852, | |
| "logps/chosen": -1.7077207565307617, | |
| "logps/rejected": -1.7365996837615967, | |
| "loss": 1.7844, | |
| "odds_ratio_loss": 0.7667573690414429, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.17077207565307617, | |
| "rewards/margins": 0.002887908834964037, | |
| "rewards/rejected": -0.17365998029708862, | |
| "sft_loss": 1.7077207565307617, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.19397858153162256, | |
| "grad_norm": 0.8139289617538452, | |
| "learning_rate": 4.94857421384497e-06, | |
| "logits/chosen": -14.175407409667969, | |
| "logits/rejected": -14.165875434875488, | |
| "logps/chosen": -1.692577600479126, | |
| "logps/rejected": -1.8239320516586304, | |
| "loss": 1.7682, | |
| "odds_ratio_loss": 0.7562084794044495, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.1692577451467514, | |
| "rewards/margins": 0.013135453686118126, | |
| "rewards/rejected": -0.18239320814609528, | |
| "sft_loss": 1.692577600479126, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.21014346332592443, | |
| "grad_norm": 1.0950274467468262, | |
| "learning_rate": 4.939674480520701e-06, | |
| "logits/chosen": -14.055421829223633, | |
| "logits/rejected": -14.265202522277832, | |
| "logps/chosen": -1.65860915184021, | |
| "logps/rejected": -1.6671603918075562, | |
| "loss": 1.7352, | |
| "odds_ratio_loss": 0.7663736939430237, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.16586092114448547, | |
| "rewards/margins": 0.0008551125647500157, | |
| "rewards/rejected": -0.16671602427959442, | |
| "sft_loss": 1.65860915184021, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2263083451202263, | |
| "grad_norm": 0.6190826892852783, | |
| "learning_rate": 4.930074181888613e-06, | |
| "logits/chosen": -14.116220474243164, | |
| "logits/rejected": -14.158090591430664, | |
| "logps/chosen": -1.7475076913833618, | |
| "logps/rejected": -1.736114501953125, | |
| "loss": 1.8234, | |
| "odds_ratio_loss": 0.7589074373245239, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.17475078999996185, | |
| "rewards/margins": -0.0011393536115065217, | |
| "rewards/rejected": -0.17361143231391907, | |
| "sft_loss": 1.7475076913833618, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.2424732269145282, | |
| "grad_norm": 0.8096482157707214, | |
| "learning_rate": 4.91977607472475e-06, | |
| "logits/chosen": -14.182394027709961, | |
| "logits/rejected": -14.252290725708008, | |
| "logps/chosen": -1.6399564743041992, | |
| "logps/rejected": -1.6184114217758179, | |
| "loss": 1.7178, | |
| "odds_ratio_loss": 0.778221607208252, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.1639956533908844, | |
| "rewards/margins": -0.0021545083727687597, | |
| "rewards/rejected": -0.16184113919734955, | |
| "sft_loss": 1.6399564743041992, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2586381087088301, | |
| "grad_norm": 1.5372618436813354, | |
| "learning_rate": 4.908783116184534e-06, | |
| "logits/chosen": -14.110807418823242, | |
| "logits/rejected": -14.087692260742188, | |
| "logps/chosen": -1.613721489906311, | |
| "logps/rejected": -1.7073653936386108, | |
| "loss": 1.6837, | |
| "odds_ratio_loss": 0.6995801329612732, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1613721400499344, | |
| "rewards/margins": 0.009364412166178226, | |
| "rewards/rejected": -0.17073655128479004, | |
| "sft_loss": 1.613721489906311, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.27480299050313195, | |
| "grad_norm": 1.0400787591934204, | |
| "learning_rate": 4.897098462953598e-06, | |
| "logits/chosen": -14.309249877929688, | |
| "logits/rejected": -14.144041061401367, | |
| "logps/chosen": -1.572377324104309, | |
| "logps/rejected": -1.679239273071289, | |
| "loss": 1.6438, | |
| "odds_ratio_loss": 0.7143967747688293, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.15723773837089539, | |
| "rewards/margins": 0.010686198249459267, | |
| "rewards/rejected": -0.1679239273071289, | |
| "sft_loss": 1.572377324104309, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2909678722974338, | |
| "grad_norm": 0.6752244234085083, | |
| "learning_rate": 4.884725470341331e-06, | |
| "logits/chosen": -14.362325668334961, | |
| "logits/rejected": -14.368985176086426, | |
| "logps/chosen": -1.5275907516479492, | |
| "logps/rejected": -1.6322838068008423, | |
| "loss": 1.5969, | |
| "odds_ratio_loss": 0.6928091645240784, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.15275909006595612, | |
| "rewards/margins": 0.01046929694712162, | |
| "rewards/rejected": -0.1632283627986908, | |
| "sft_loss": 1.5275907516479492, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.3071327540917357, | |
| "grad_norm": 1.5551739931106567, | |
| "learning_rate": 4.871667691317377e-06, | |
| "logits/chosen": -14.23143196105957, | |
| "logits/rejected": -14.168081283569336, | |
| "logps/chosen": -1.5617109537124634, | |
| "logps/rejected": -1.516629934310913, | |
| "loss": 1.6442, | |
| "odds_ratio_loss": 0.8246932029724121, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.15617111325263977, | |
| "rewards/margins": -0.004508105106651783, | |
| "rewards/rejected": -0.15166299045085907, | |
| "sft_loss": 1.5617109537124634, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3232976358860376, | |
| "grad_norm": 4.873908519744873, | |
| "learning_rate": 4.857928875491392e-06, | |
| "logits/chosen": -14.317342758178711, | |
| "logits/rejected": -14.135493278503418, | |
| "logps/chosen": -1.4843647480010986, | |
| "logps/rejected": -1.5346746444702148, | |
| "loss": 1.5575, | |
| "odds_ratio_loss": 0.7314870953559875, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.14843648672103882, | |
| "rewards/margins": 0.005030992440879345, | |
| "rewards/rejected": -0.15346747636795044, | |
| "sft_loss": 1.4843647480010986, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.33946251768033947, | |
| "grad_norm": 1.1008872985839844, | |
| "learning_rate": 4.843512968036314e-06, | |
| "logits/chosen": -13.899968147277832, | |
| "logits/rejected": -13.980463027954102, | |
| "logps/chosen": -1.4831616878509521, | |
| "logps/rejected": -1.464994192123413, | |
| "loss": 1.5606, | |
| "odds_ratio_loss": 0.7743188738822937, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1483161747455597, | |
| "rewards/margins": -0.0018167542293667793, | |
| "rewards/rejected": -0.1464994251728058, | |
| "sft_loss": 1.4831616878509521, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.35562739947464134, | |
| "grad_norm": 2.111262083053589, | |
| "learning_rate": 4.828424108555486e-06, | |
| "logits/chosen": -14.277219772338867, | |
| "logits/rejected": -14.1966552734375, | |
| "logps/chosen": -1.5998783111572266, | |
| "logps/rejected": -1.7076078653335571, | |
| "loss": 1.6726, | |
| "odds_ratio_loss": 0.727408230304718, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.15998782217502594, | |
| "rewards/margins": 0.010772952809929848, | |
| "rewards/rejected": -0.17076078057289124, | |
| "sft_loss": 1.5998783111572266, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3717922812689432, | |
| "grad_norm": 0.6497421264648438, | |
| "learning_rate": 4.812666629893957e-06, | |
| "logits/chosen": -14.255824089050293, | |
| "logits/rejected": -14.233850479125977, | |
| "logps/chosen": -1.5216138362884521, | |
| "logps/rejected": -1.4904725551605225, | |
| "loss": 1.599, | |
| "odds_ratio_loss": 0.7741049528121948, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.1521613895893097, | |
| "rewards/margins": -0.0031141184736043215, | |
| "rewards/rejected": -0.14904727041721344, | |
| "sft_loss": 1.5216138362884521, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.3879571630632451, | |
| "grad_norm": 1.4030089378356934, | |
| "learning_rate": 4.796245056894273e-06, | |
| "logits/chosen": -13.990198135375977, | |
| "logits/rejected": -14.032785415649414, | |
| "logps/chosen": -1.5593761205673218, | |
| "logps/rejected": -1.5817941427230835, | |
| "loss": 1.6382, | |
| "odds_ratio_loss": 0.7885618805885315, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.15593759715557098, | |
| "rewards/margins": 0.0022418068256229162, | |
| "rewards/rejected": -0.1581794172525406, | |
| "sft_loss": 1.5593761205673218, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.404122044857547, | |
| "grad_norm": 1.03659987449646, | |
| "learning_rate": 4.779164105097148e-06, | |
| "logits/chosen": -14.23992919921875, | |
| "logits/rejected": -14.331039428710938, | |
| "logps/chosen": -1.4630193710327148, | |
| "logps/rejected": -1.6595561504364014, | |
| "loss": 1.5308, | |
| "odds_ratio_loss": 0.6777212023735046, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.14630195498466492, | |
| "rewards/margins": 0.019653689116239548, | |
| "rewards/rejected": -0.16595561802387238, | |
| "sft_loss": 1.4630193710327148, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.42028692665184886, | |
| "grad_norm": 1.1558053493499756, | |
| "learning_rate": 4.761428679387373e-06, | |
| "logits/chosen": -14.19200611114502, | |
| "logits/rejected": -14.27843189239502, | |
| "logps/chosen": -1.4934606552124023, | |
| "logps/rejected": -1.5448919534683228, | |
| "loss": 1.5664, | |
| "odds_ratio_loss": 0.7296234369277954, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.14934605360031128, | |
| "rewards/margins": 0.005143154412508011, | |
| "rewards/rejected": -0.154489204287529, | |
| "sft_loss": 1.4934606552124023, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4364518084461507, | |
| "grad_norm": 1.3478955030441284, | |
| "learning_rate": 4.7430438725853515e-06, | |
| "logits/chosen": -14.099308967590332, | |
| "logits/rejected": -14.247446060180664, | |
| "logps/chosen": -1.5219833850860596, | |
| "logps/rejected": -1.7108709812164307, | |
| "loss": 1.5916, | |
| "odds_ratio_loss": 0.6957148313522339, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.15219834446907043, | |
| "rewards/margins": 0.01888876222074032, | |
| "rewards/rejected": -0.1710870862007141, | |
| "sft_loss": 1.5219833850860596, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4526166902404526, | |
| "grad_norm": 1.0543924570083618, | |
| "learning_rate": 4.724014963984669e-06, | |
| "logits/chosen": -14.321874618530273, | |
| "logits/rejected": -14.308130264282227, | |
| "logps/chosen": -1.4753090143203735, | |
| "logps/rejected": -1.6179271936416626, | |
| "loss": 1.5473, | |
| "odds_ratio_loss": 0.7201633453369141, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14753088355064392, | |
| "rewards/margins": 0.014261829666793346, | |
| "rewards/rejected": -0.16179272532463074, | |
| "sft_loss": 1.4753090143203735, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.4687815720347545, | |
| "grad_norm": 1.6008622646331787, | |
| "learning_rate": 4.704347417836116e-06, | |
| "logits/chosen": -14.192815780639648, | |
| "logits/rejected": -14.182914733886719, | |
| "logps/chosen": -1.373263955116272, | |
| "logps/rejected": -1.4777114391326904, | |
| "loss": 1.4462, | |
| "odds_ratio_loss": 0.7295758128166199, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.13732638955116272, | |
| "rewards/margins": 0.010444764979183674, | |
| "rewards/rejected": -0.14777114987373352, | |
| "sft_loss": 1.373263955116272, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4849464538290564, | |
| "grad_norm": 1.0440045595169067, | |
| "learning_rate": 4.684046881778603e-06, | |
| "logits/chosen": -13.9605131149292, | |
| "logits/rejected": -14.021821975708008, | |
| "logps/chosen": -1.3839852809906006, | |
| "logps/rejected": -1.4472886323928833, | |
| "loss": 1.456, | |
| "odds_ratio_loss": 0.719718337059021, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.13839852809906006, | |
| "rewards/margins": 0.006330335047096014, | |
| "rewards/rejected": -0.1447288691997528, | |
| "sft_loss": 1.3839852809906006, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5011113356233583, | |
| "grad_norm": 0.8026280999183655, | |
| "learning_rate": 4.663119185217409e-06, | |
| "logits/chosen": -14.247451782226562, | |
| "logits/rejected": -14.332074165344238, | |
| "logps/chosen": -1.4372491836547852, | |
| "logps/rejected": -1.5869617462158203, | |
| "loss": 1.5057, | |
| "odds_ratio_loss": 0.684893012046814, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1437249332666397, | |
| "rewards/margins": 0.014971258118748665, | |
| "rewards/rejected": -0.15869615972042084, | |
| "sft_loss": 1.4372491836547852, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5172762174176602, | |
| "grad_norm": 1.054210901260376, | |
| "learning_rate": 4.641570337650232e-06, | |
| "logits/chosen": -14.101099967956543, | |
| "logits/rejected": -14.234477043151855, | |
| "logps/chosen": -1.3175721168518066, | |
| "logps/rejected": -1.46291184425354, | |
| "loss": 1.3866, | |
| "odds_ratio_loss": 0.6904350519180298, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1317571997642517, | |
| "rewards/margins": 0.014533978886902332, | |
| "rewards/rejected": -0.14629118144512177, | |
| "sft_loss": 1.3175721168518066, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.533441099211962, | |
| "grad_norm": 1.6171979904174805, | |
| "learning_rate": 4.61940652694154e-06, | |
| "logits/chosen": -14.107089042663574, | |
| "logits/rejected": -14.126917839050293, | |
| "logps/chosen": -1.5025255680084229, | |
| "logps/rejected": -1.4795392751693726, | |
| "loss": 1.5835, | |
| "odds_ratio_loss": 0.8096711039543152, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.1502525359392166, | |
| "rewards/margins": -0.0022986275143921375, | |
| "rewards/rejected": -0.14795391261577606, | |
| "sft_loss": 1.5025255680084229, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5496059810062639, | |
| "grad_norm": 1.2122093439102173, | |
| "learning_rate": 4.596634117545689e-06, | |
| "logits/chosen": -14.346307754516602, | |
| "logits/rejected": -14.166845321655273, | |
| "logps/chosen": -1.5319068431854248, | |
| "logps/rejected": -1.624324083328247, | |
| "loss": 1.6054, | |
| "odds_ratio_loss": 0.735165536403656, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.15319068729877472, | |
| "rewards/margins": 0.00924170482903719, | |
| "rewards/rejected": -0.16243240237236023, | |
| "sft_loss": 1.5319068431854248, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5657708628005658, | |
| "grad_norm": 0.899023175239563, | |
| "learning_rate": 4.573259648679335e-06, | |
| "logits/chosen": -14.317461013793945, | |
| "logits/rejected": -14.103338241577148, | |
| "logps/chosen": -1.47697114944458, | |
| "logps/rejected": -1.648705244064331, | |
| "loss": 1.546, | |
| "odds_ratio_loss": 0.6902921199798584, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.14769712090492249, | |
| "rewards/margins": 0.017173420637845993, | |
| "rewards/rejected": -0.16487054526805878, | |
| "sft_loss": 1.47697114944458, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5819357445948676, | |
| "grad_norm": 2.3687381744384766, | |
| "learning_rate": 4.549289832443663e-06, | |
| "logits/chosen": -14.142545700073242, | |
| "logits/rejected": -14.211145401000977, | |
| "logps/chosen": -1.4514472484588623, | |
| "logps/rejected": -1.5542781352996826, | |
| "loss": 1.5233, | |
| "odds_ratio_loss": 0.7186037302017212, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.1451447308063507, | |
| "rewards/margins": 0.010283084586262703, | |
| "rewards/rejected": -0.15542782843112946, | |
| "sft_loss": 1.4514472484588623, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5981006263891695, | |
| "grad_norm": 1.039651870727539, | |
| "learning_rate": 4.524731551896978e-06, | |
| "logits/chosen": -14.117040634155273, | |
| "logits/rejected": -14.164260864257812, | |
| "logps/chosen": -1.3633731603622437, | |
| "logps/rejected": -1.4127264022827148, | |
| "loss": 1.4381, | |
| "odds_ratio_loss": 0.7473303079605103, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.13633732497692108, | |
| "rewards/margins": 0.004935313947498798, | |
| "rewards/rejected": -0.1412726640701294, | |
| "sft_loss": 1.3633731603622437, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6142655081834714, | |
| "grad_norm": 2.077622413635254, | |
| "learning_rate": 4.4995918590781925e-06, | |
| "logits/chosen": -14.212381362915039, | |
| "logits/rejected": -14.251853942871094, | |
| "logps/chosen": -1.3631454706192017, | |
| "logps/rejected": -1.4832844734191895, | |
| "loss": 1.437, | |
| "odds_ratio_loss": 0.7388315200805664, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1363145411014557, | |
| "rewards/margins": 0.012013902887701988, | |
| "rewards/rejected": -0.14832845330238342, | |
| "sft_loss": 1.3631454706192017, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6304303899777733, | |
| "grad_norm": 0.6616309881210327, | |
| "learning_rate": 4.473877972981797e-06, | |
| "logits/chosen": -14.166543960571289, | |
| "logits/rejected": -14.008458137512207, | |
| "logps/chosen": -1.414536476135254, | |
| "logps/rejected": -1.5125486850738525, | |
| "loss": 1.4849, | |
| "odds_ratio_loss": 0.7040683031082153, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.14145365357398987, | |
| "rewards/margins": 0.009801235981285572, | |
| "rewards/rejected": -0.15125489234924316, | |
| "sft_loss": 1.414536476135254, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6465952717720752, | |
| "grad_norm": 1.2422401905059814, | |
| "learning_rate": 4.447597277484894e-06, | |
| "logits/chosen": -14.10089111328125, | |
| "logits/rejected": -14.177225112915039, | |
| "logps/chosen": -1.3244436979293823, | |
| "logps/rejected": -1.434922456741333, | |
| "loss": 1.3936, | |
| "odds_ratio_loss": 0.6911473274230957, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.132444366812706, | |
| "rewards/margins": 0.011047879233956337, | |
| "rewards/rejected": -0.14349225163459778, | |
| "sft_loss": 1.3244436979293823, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6627601535663771, | |
| "grad_norm": 1.3308875560760498, | |
| "learning_rate": 4.42075731922687e-06, | |
| "logits/chosen": -14.254026412963867, | |
| "logits/rejected": -14.150421142578125, | |
| "logps/chosen": -1.4931491613388062, | |
| "logps/rejected": -1.5233150720596313, | |
| "loss": 1.5684, | |
| "odds_ratio_loss": 0.7521846890449524, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.14931491017341614, | |
| "rewards/margins": 0.0030165952630341053, | |
| "rewards/rejected": -0.15233151614665985, | |
| "sft_loss": 1.4931491613388062, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6789250353606789, | |
| "grad_norm": 1.4143937826156616, | |
| "learning_rate": 4.3933658054423465e-06, | |
| "logits/chosen": -14.156329154968262, | |
| "logits/rejected": -14.047518730163574, | |
| "logps/chosen": -1.338627576828003, | |
| "logps/rejected": -1.4370090961456299, | |
| "loss": 1.4095, | |
| "odds_ratio_loss": 0.70883709192276, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.13386276364326477, | |
| "rewards/margins": 0.009838144294917583, | |
| "rewards/rejected": -0.14370091259479523, | |
| "sft_loss": 1.338627576828003, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6950899171549808, | |
| "grad_norm": 2.3574774265289307, | |
| "learning_rate": 4.365430601748003e-06, | |
| "logits/chosen": -14.235176086425781, | |
| "logits/rejected": -14.395864486694336, | |
| "logps/chosen": -1.564626932144165, | |
| "logps/rejected": -1.5344398021697998, | |
| "loss": 1.6431, | |
| "odds_ratio_loss": 0.7849880456924438, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.15646269917488098, | |
| "rewards/margins": -0.0030187165830284357, | |
| "rewards/rejected": -0.15344397723674774, | |
| "sft_loss": 1.564626932144165, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7112547989492827, | |
| "grad_norm": 3.739943504333496, | |
| "learning_rate": 4.336959729883925e-06, | |
| "logits/chosen": -14.274754524230957, | |
| "logits/rejected": -14.191232681274414, | |
| "logps/chosen": -1.3745372295379639, | |
| "logps/rejected": -1.405700445175171, | |
| "loss": 1.4506, | |
| "odds_ratio_loss": 0.7607132196426392, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13745373487472534, | |
| "rewards/margins": 0.0031163152307271957, | |
| "rewards/rejected": -0.1405700445175171, | |
| "sft_loss": 1.3745372295379639, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7274196807435845, | |
| "grad_norm": 0.9312599301338196, | |
| "learning_rate": 4.307961365410118e-06, | |
| "logits/chosen": -14.044285774230957, | |
| "logits/rejected": -14.011823654174805, | |
| "logps/chosen": -1.4385414123535156, | |
| "logps/rejected": -1.4718294143676758, | |
| "loss": 1.5134, | |
| "odds_ratio_loss": 0.7482468485832214, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14385412633419037, | |
| "rewards/margins": 0.003328789724037051, | |
| "rewards/rejected": -0.14718294143676758, | |
| "sft_loss": 1.4385414123535156, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7435845625378864, | |
| "grad_norm": 1.4249197244644165, | |
| "learning_rate": 4.278443835358854e-06, | |
| "logits/chosen": -14.115106582641602, | |
| "logits/rejected": -14.075739860534668, | |
| "logps/chosen": -1.3712975978851318, | |
| "logps/rejected": -1.5527522563934326, | |
| "loss": 1.4406, | |
| "odds_ratio_loss": 0.6929912567138672, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1371297538280487, | |
| "rewards/margins": 0.018145468086004257, | |
| "rewards/rejected": -0.15527524054050446, | |
| "sft_loss": 1.3712975978851318, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7597494443321883, | |
| "grad_norm": 1.1615644693374634, | |
| "learning_rate": 4.248415615843523e-06, | |
| "logits/chosen": -14.288152694702148, | |
| "logits/rejected": -14.206695556640625, | |
| "logps/chosen": -1.4021141529083252, | |
| "logps/rejected": -1.416723370552063, | |
| "loss": 1.4775, | |
| "odds_ratio_loss": 0.7538274526596069, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.14021141827106476, | |
| "rewards/margins": 0.001460921368561685, | |
| "rewards/rejected": -0.14167232811450958, | |
| "sft_loss": 1.4021141529083252, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7759143261264903, | |
| "grad_norm": 1.276267409324646, | |
| "learning_rate": 4.217885329624666e-06, | |
| "logits/chosen": -14.302003860473633, | |
| "logits/rejected": -14.307230949401855, | |
| "logps/chosen": -1.346254587173462, | |
| "logps/rejected": -1.4862271547317505, | |
| "loss": 1.4137, | |
| "odds_ratio_loss": 0.6745720505714417, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.13462546467781067, | |
| "rewards/margins": 0.013997259549796581, | |
| "rewards/rejected": -0.14862270653247833, | |
| "sft_loss": 1.346254587173462, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 1.6030430793762207, | |
| "learning_rate": 4.186861743633911e-06, | |
| "logits/chosen": -14.13404369354248, | |
| "logits/rejected": -14.251507759094238, | |
| "logps/chosen": -1.4151580333709717, | |
| "logps/rejected": -1.5721826553344727, | |
| "loss": 1.4904, | |
| "odds_ratio_loss": 0.7523505091667175, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.1415158212184906, | |
| "rewards/margins": 0.015702461823821068, | |
| "rewards/rejected": -0.15721826255321503, | |
| "sft_loss": 1.4151580333709717, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.808244089715094, | |
| "grad_norm": 1.7222312688827515, | |
| "learning_rate": 4.155353766456497e-06, | |
| "logits/chosen": -14.4000825881958, | |
| "logits/rejected": -14.304115295410156, | |
| "logps/chosen": -1.433506727218628, | |
| "logps/rejected": -1.535611867904663, | |
| "loss": 1.5005, | |
| "odds_ratio_loss": 0.6703948378562927, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.14335067570209503, | |
| "rewards/margins": 0.010210518725216389, | |
| "rewards/rejected": -0.15356118977069855, | |
| "sft_loss": 1.433506727218628, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.808244089715094, | |
| "eval_logits/chosen": -14.227585792541504, | |
| "eval_logits/rejected": -14.265686988830566, | |
| "eval_logps/chosen": -1.4436272382736206, | |
| "eval_logps/rejected": -1.4898087978363037, | |
| "eval_loss": 1.5202080011367798, | |
| "eval_odds_ratio_loss": 0.7658076882362366, | |
| "eval_rewards/accuracies": 0.48181816935539246, | |
| "eval_rewards/chosen": -0.1443627029657364, | |
| "eval_rewards/margins": 0.004618145525455475, | |
| "eval_rewards/rejected": -0.14898087084293365, | |
| "eval_runtime": 207.676, | |
| "eval_samples_per_second": 5.297, | |
| "eval_sft_loss": 1.4436272382736206, | |
| "eval_steps_per_second": 2.648, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8244089715093958, | |
| "grad_norm": 1.143004059791565, | |
| "learning_rate": 4.123370445773134e-06, | |
| "logits/chosen": -14.356025695800781, | |
| "logits/rejected": -14.339376449584961, | |
| "logps/chosen": -1.4154841899871826, | |
| "logps/rejected": -1.4348183870315552, | |
| "loss": 1.4927, | |
| "odds_ratio_loss": 0.7723585963249207, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14154842495918274, | |
| "rewards/margins": 0.001933417865075171, | |
| "rewards/rejected": -0.14348182082176208, | |
| "sft_loss": 1.4154841899871826, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8405738533036977, | |
| "grad_norm": 3.6751832962036133, | |
| "learning_rate": 4.090920965761906e-06, | |
| "logits/chosen": -14.4230375289917, | |
| "logits/rejected": -14.330423355102539, | |
| "logps/chosen": -1.4806926250457764, | |
| "logps/rejected": -1.4873076677322388, | |
| "loss": 1.559, | |
| "odds_ratio_loss": 0.7833209037780762, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.14806927740573883, | |
| "rewards/margins": 0.0006614929297938943, | |
| "rewards/rejected": -0.14873075485229492, | |
| "sft_loss": 1.4806926250457764, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8567387350979996, | |
| "grad_norm": 4.592033386230469, | |
| "learning_rate": 4.058014644460991e-06, | |
| "logits/chosen": -14.309356689453125, | |
| "logits/rejected": -14.266693115234375, | |
| "logps/chosen": -1.4232040643692017, | |
| "logps/rejected": -1.4629483222961426, | |
| "loss": 1.4967, | |
| "odds_ratio_loss": 0.7350074052810669, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.1423204094171524, | |
| "rewards/margins": 0.003974422812461853, | |
| "rewards/rejected": -0.14629481732845306, | |
| "sft_loss": 1.4232040643692017, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8729036168923014, | |
| "grad_norm": 1.3515141010284424, | |
| "learning_rate": 4.024660931092939e-06, | |
| "logits/chosen": -14.12739086151123, | |
| "logits/rejected": -14.135973930358887, | |
| "logps/chosen": -1.4027074575424194, | |
| "logps/rejected": -1.5116406679153442, | |
| "loss": 1.4748, | |
| "odds_ratio_loss": 0.7212173938751221, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14027073979377747, | |
| "rewards/margins": 0.010893313214182854, | |
| "rewards/rejected": -0.15116406977176666, | |
| "sft_loss": 1.4027074575424194, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8890684986866033, | |
| "grad_norm": 3.3689217567443848, | |
| "learning_rate": 3.990869403351272e-06, | |
| "logits/chosen": -14.354001998901367, | |
| "logits/rejected": -14.225595474243164, | |
| "logps/chosen": -1.4652130603790283, | |
| "logps/rejected": -1.552912712097168, | |
| "loss": 1.5359, | |
| "odds_ratio_loss": 0.7067934274673462, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.14652130007743835, | |
| "rewards/margins": 0.008769966661930084, | |
| "rewards/rejected": -0.15529127418994904, | |
| "sft_loss": 1.4652130603790283, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9052333804809052, | |
| "grad_norm": 1.5204488039016724, | |
| "learning_rate": 3.956649764650206e-06, | |
| "logits/chosen": -14.487988471984863, | |
| "logits/rejected": -14.507904052734375, | |
| "logps/chosen": -1.4564487934112549, | |
| "logps/rejected": -1.5203144550323486, | |
| "loss": 1.5325, | |
| "odds_ratio_loss": 0.7608081102371216, | |
| "rewards/accuracies": 0.42500001192092896, | |
| "rewards/chosen": -0.1456448882818222, | |
| "rewards/margins": 0.006386570632457733, | |
| "rewards/rejected": -0.15203145146369934, | |
| "sft_loss": 1.4564487934112549, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9213982622752072, | |
| "grad_norm": 2.2319583892822266, | |
| "learning_rate": 3.92201184133826e-06, | |
| "logits/chosen": -14.393239974975586, | |
| "logits/rejected": -14.3502779006958, | |
| "logps/chosen": -1.3946270942687988, | |
| "logps/rejected": -1.444805383682251, | |
| "loss": 1.4679, | |
| "odds_ratio_loss": 0.7322729229927063, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13946272432804108, | |
| "rewards/margins": 0.005017831921577454, | |
| "rewards/rejected": -0.14448055624961853, | |
| "sft_loss": 1.3946270942687988, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.937563144069509, | |
| "grad_norm": 1.4617536067962646, | |
| "learning_rate": 3.886965579876572e-06, | |
| "logits/chosen": -14.353238105773926, | |
| "logits/rejected": -14.260797500610352, | |
| "logps/chosen": -1.3793189525604248, | |
| "logps/rejected": -1.445691704750061, | |
| "loss": 1.4501, | |
| "odds_ratio_loss": 0.7080078125, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13793189823627472, | |
| "rewards/margins": 0.006637275218963623, | |
| "rewards/rejected": -0.14456915855407715, | |
| "sft_loss": 1.3793189525604248, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9537280258638109, | |
| "grad_norm": 1.2430846691131592, | |
| "learning_rate": 3.851521043982716e-06, | |
| "logits/chosen": -14.31140422821045, | |
| "logits/rejected": -14.404243469238281, | |
| "logps/chosen": -1.424002766609192, | |
| "logps/rejected": -1.4054510593414307, | |
| "loss": 1.4998, | |
| "odds_ratio_loss": 0.7578663229942322, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14240026473999023, | |
| "rewards/margins": -0.0018551532411947846, | |
| "rewards/rejected": -0.14054511487483978, | |
| "sft_loss": 1.424002766609192, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9698929076581128, | |
| "grad_norm": 1.5072684288024902, | |
| "learning_rate": 3.81568841174086e-06, | |
| "logits/chosen": -14.169085502624512, | |
| "logits/rejected": -14.1954345703125, | |
| "logps/chosen": -1.4412424564361572, | |
| "logps/rejected": -1.4657504558563232, | |
| "loss": 1.5191, | |
| "odds_ratio_loss": 0.7788038849830627, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14412423968315125, | |
| "rewards/margins": 0.002450800035148859, | |
| "rewards/rejected": -0.14657504856586456, | |
| "sft_loss": 1.4412424564361572, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9860577894524146, | |
| "grad_norm": 1.2968331575393677, | |
| "learning_rate": 3.7794779726790664e-06, | |
| "logits/chosen": -14.130575180053711, | |
| "logits/rejected": -14.240781784057617, | |
| "logps/chosen": -1.3836543560028076, | |
| "logps/rejected": -1.457695722579956, | |
| "loss": 1.4561, | |
| "odds_ratio_loss": 0.7247332334518433, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.13836543262004852, | |
| "rewards/margins": 0.007404146250337362, | |
| "rewards/rejected": -0.14576958119869232, | |
| "sft_loss": 1.3836543560028076, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0022226712467166, | |
| "grad_norm": 4.868699550628662, | |
| "learning_rate": 3.7429001248146096e-06, | |
| "logits/chosen": -14.240348815917969, | |
| "logits/rejected": -14.297922134399414, | |
| "logps/chosen": -1.4243017435073853, | |
| "logps/rejected": -1.5530868768692017, | |
| "loss": 1.4924, | |
| "odds_ratio_loss": 0.680776059627533, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.1424301713705063, | |
| "rewards/margins": 0.012878507375717163, | |
| "rewards/rejected": -0.15530869364738464, | |
| "sft_loss": 1.4243017435073853, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.0183875530410185, | |
| "grad_norm": 0.8127214312553406, | |
| "learning_rate": 3.7059653716681227e-06, | |
| "logits/chosen": -14.380844116210938, | |
| "logits/rejected": -14.255830764770508, | |
| "logps/chosen": -1.4107029438018799, | |
| "logps/rejected": -1.521928071975708, | |
| "loss": 1.4861, | |
| "odds_ratio_loss": 0.7541464567184448, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14107032120227814, | |
| "rewards/margins": 0.01112250704318285, | |
| "rewards/rejected": -0.15219281613826752, | |
| "sft_loss": 1.4107029438018799, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0345524348353203, | |
| "grad_norm": 3.8503897190093994, | |
| "learning_rate": 3.668684319247463e-06, | |
| "logits/chosen": -14.447845458984375, | |
| "logits/rejected": -14.433076858520508, | |
| "logps/chosen": -1.367375135421753, | |
| "logps/rejected": -1.548612356185913, | |
| "loss": 1.4348, | |
| "odds_ratio_loss": 0.6741297841072083, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.13673751056194305, | |
| "rewards/margins": 0.018123725429177284, | |
| "rewards/rejected": -0.1548612415790558, | |
| "sft_loss": 1.367375135421753, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0507173166296222, | |
| "grad_norm": 0.9416384100914001, | |
| "learning_rate": 3.6310676730021373e-06, | |
| "logits/chosen": -14.3724946975708, | |
| "logits/rejected": -14.455398559570312, | |
| "logps/chosen": -1.3245970010757446, | |
| "logps/rejected": -1.3460277318954468, | |
| "loss": 1.3979, | |
| "odds_ratio_loss": 0.7330806255340576, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13245970010757446, | |
| "rewards/margins": 0.002143078250810504, | |
| "rewards/rejected": -0.13460277020931244, | |
| "sft_loss": 1.3245970010757446, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.066882198423924, | |
| "grad_norm": 2.8321056365966797, | |
| "learning_rate": 3.593126234749178e-06, | |
| "logits/chosen": -14.317327499389648, | |
| "logits/rejected": -14.38727855682373, | |
| "logps/chosen": -1.423680067062378, | |
| "logps/rejected": -1.4616180658340454, | |
| "loss": 1.4976, | |
| "odds_ratio_loss": 0.739305853843689, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14236800372600555, | |
| "rewards/margins": 0.0037938044406473637, | |
| "rewards/rejected": -0.14616182446479797, | |
| "sft_loss": 1.423680067062378, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.083047080218226, | |
| "grad_norm": 0.9518349766731262, | |
| "learning_rate": 3.554870899571343e-06, | |
| "logits/chosen": -14.144752502441406, | |
| "logits/rejected": -14.251813888549805, | |
| "logps/chosen": -1.4052397012710571, | |
| "logps/rejected": -1.5265625715255737, | |
| "loss": 1.4767, | |
| "odds_ratio_loss": 0.7148950695991516, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1405239850282669, | |
| "rewards/margins": 0.012132286094129086, | |
| "rewards/rejected": -0.15265627205371857, | |
| "sft_loss": 1.4052397012710571, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.0992119620125278, | |
| "grad_norm": 3.0823421478271484, | |
| "learning_rate": 3.5163126526885373e-06, | |
| "logits/chosen": -14.263737678527832, | |
| "logits/rejected": -14.341888427734375, | |
| "logps/chosen": -1.3758028745651245, | |
| "logps/rejected": -1.4713342189788818, | |
| "loss": 1.4506, | |
| "odds_ratio_loss": 0.748176097869873, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1375802755355835, | |
| "rewards/margins": 0.009553151205182076, | |
| "rewards/rejected": -0.14713343977928162, | |
| "sft_loss": 1.3758028745651245, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.1153768438068297, | |
| "grad_norm": 1.1957412958145142, | |
| "learning_rate": 3.4774625663033484e-06, | |
| "logits/chosen": -14.262721061706543, | |
| "logits/rejected": -14.248212814331055, | |
| "logps/chosen": -1.4033539295196533, | |
| "logps/rejected": -1.4489859342575073, | |
| "loss": 1.4783, | |
| "odds_ratio_loss": 0.7493518590927124, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.14033538103103638, | |
| "rewards/margins": 0.004563204478472471, | |
| "rewards/rejected": -0.14489860832691193, | |
| "sft_loss": 1.4033539295196533, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1315417256011315, | |
| "grad_norm": 1.0352710485458374, | |
| "learning_rate": 3.4383317964216067e-06, | |
| "logits/chosen": -14.168815612792969, | |
| "logits/rejected": -14.324069023132324, | |
| "logps/chosen": -1.3365106582641602, | |
| "logps/rejected": -1.3756332397460938, | |
| "loss": 1.4108, | |
| "odds_ratio_loss": 0.7429829835891724, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13365106284618378, | |
| "rewards/margins": 0.0039122505113482475, | |
| "rewards/rejected": -0.1375633180141449, | |
| "sft_loss": 1.3365106582641602, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1477066073954334, | |
| "grad_norm": 2.4808411598205566, | |
| "learning_rate": 3.398931579648877e-06, | |
| "logits/chosen": -14.3150053024292, | |
| "logits/rejected": -14.531530380249023, | |
| "logps/chosen": -1.4491299390792847, | |
| "logps/rejected": -1.5492023229599, | |
| "loss": 1.5203, | |
| "odds_ratio_loss": 0.7113555669784546, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.14491300284862518, | |
| "rewards/margins": 0.010007232427597046, | |
| "rewards/rejected": -0.15492023527622223, | |
| "sft_loss": 1.4491299390792847, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.1638714891897353, | |
| "grad_norm": 1.2726991176605225, | |
| "learning_rate": 3.359273229963813e-06, | |
| "logits/chosen": -14.357129096984863, | |
| "logits/rejected": -14.291903495788574, | |
| "logps/chosen": -1.3459408283233643, | |
| "logps/rejected": -1.3911712169647217, | |
| "loss": 1.421, | |
| "odds_ratio_loss": 0.750839114189148, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13459408283233643, | |
| "rewards/margins": 0.004523060750216246, | |
| "rewards/rejected": -0.13911715149879456, | |
| "sft_loss": 1.3459408283233643, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1800363709840371, | |
| "grad_norm": 1.0978913307189941, | |
| "learning_rate": 3.319368135469285e-06, | |
| "logits/chosen": -14.36750602722168, | |
| "logits/rejected": -14.435731887817383, | |
| "logps/chosen": -1.3765571117401123, | |
| "logps/rejected": -1.4039866924285889, | |
| "loss": 1.4538, | |
| "odds_ratio_loss": 0.7719755172729492, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.13765572011470795, | |
| "rewards/margins": 0.002742946846410632, | |
| "rewards/rejected": -0.14039869606494904, | |
| "sft_loss": 1.3765571117401123, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.196201252778339, | |
| "grad_norm": 2.1035361289978027, | |
| "learning_rate": 3.279227755122228e-06, | |
| "logits/chosen": -14.316058158874512, | |
| "logits/rejected": -14.294093132019043, | |
| "logps/chosen": -1.320318579673767, | |
| "logps/rejected": -1.5284496545791626, | |
| "loss": 1.3866, | |
| "odds_ratio_loss": 0.6632006764411926, | |
| "rewards/accuracies": 0.612500011920929, | |
| "rewards/chosen": -0.13203184306621552, | |
| "rewards/margins": 0.020813116803765297, | |
| "rewards/rejected": -0.15284495055675507, | |
| "sft_loss": 1.320318579673767, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.2123661345726409, | |
| "grad_norm": 3.223933696746826, | |
| "learning_rate": 3.2388636154431417e-06, | |
| "logits/chosen": -14.34916877746582, | |
| "logits/rejected": -14.280328750610352, | |
| "logps/chosen": -1.429145097732544, | |
| "logps/rejected": -1.5203419923782349, | |
| "loss": 1.502, | |
| "odds_ratio_loss": 0.7281750440597534, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1429145336151123, | |
| "rewards/margins": 0.009119677357375622, | |
| "rewards/rejected": -0.152034193277359, | |
| "sft_loss": 1.429145097732544, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.2285310163669427, | |
| "grad_norm": 1.1619030237197876, | |
| "learning_rate": 3.198287307206192e-06, | |
| "logits/chosen": -14.091611862182617, | |
| "logits/rejected": -14.187002182006836, | |
| "logps/chosen": -1.4056107997894287, | |
| "logps/rejected": -1.442886233329773, | |
| "loss": 1.4829, | |
| "odds_ratio_loss": 0.7725043296813965, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.1405610740184784, | |
| "rewards/margins": 0.003727543633431196, | |
| "rewards/rejected": -0.14428862929344177, | |
| "sft_loss": 1.4056107997894287, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.2446958981612446, | |
| "grad_norm": 1.0456814765930176, | |
| "learning_rate": 3.157510482110856e-06, | |
| "logits/chosen": -14.408856391906738, | |
| "logits/rejected": -14.243043899536133, | |
| "logps/chosen": -1.3281633853912354, | |
| "logps/rejected": -1.3863494396209717, | |
| "loss": 1.4004, | |
| "odds_ratio_loss": 0.7221428751945496, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.132816344499588, | |
| "rewards/margins": 0.005818599369376898, | |
| "rewards/rejected": -0.13863493502140045, | |
| "sft_loss": 1.3281633853912354, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.2608607799555465, | |
| "grad_norm": 1.2318408489227295, | |
| "learning_rate": 3.116544849436077e-06, | |
| "logits/chosen": -14.334813117980957, | |
| "logits/rejected": -14.20678997039795, | |
| "logps/chosen": -1.5153284072875977, | |
| "logps/rejected": -1.6125590801239014, | |
| "loss": 1.588, | |
| "odds_ratio_loss": 0.7266558408737183, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.151532843708992, | |
| "rewards/margins": 0.009723084978759289, | |
| "rewards/rejected": -0.16125592589378357, | |
| "sft_loss": 1.5153284072875977, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2770256617498483, | |
| "grad_norm": 1.3976880311965942, | |
| "learning_rate": 3.0754021726778848e-06, | |
| "logits/chosen": -14.33143138885498, | |
| "logits/rejected": -14.257779121398926, | |
| "logps/chosen": -1.3455626964569092, | |
| "logps/rejected": -1.4571717977523804, | |
| "loss": 1.4162, | |
| "odds_ratio_loss": 0.7065266370773315, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.13455626368522644, | |
| "rewards/margins": 0.011160916648805141, | |
| "rewards/rejected": -0.14571718871593475, | |
| "sft_loss": 1.3455626964569092, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.2931905435441502, | |
| "grad_norm": 0.7877367734909058, | |
| "learning_rate": 3.0340942661714463e-06, | |
| "logits/chosen": -14.352252006530762, | |
| "logits/rejected": -14.257513046264648, | |
| "logps/chosen": -1.4310262203216553, | |
| "logps/rejected": -1.4348089694976807, | |
| "loss": 1.5077, | |
| "odds_ratio_loss": 0.7662674188613892, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.14310263097286224, | |
| "rewards/margins": 0.00037826746120117605, | |
| "rewards/rejected": -0.14348089694976807, | |
| "sft_loss": 1.4310262203216553, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.3093554253384523, | |
| "grad_norm": 1.265386939048767, | |
| "learning_rate": 2.992632991698512e-06, | |
| "logits/chosen": -14.194437980651855, | |
| "logits/rejected": -14.312055587768555, | |
| "logps/chosen": -1.3498046398162842, | |
| "logps/rejected": -1.4344502687454224, | |
| "loss": 1.4207, | |
| "odds_ratio_loss": 0.7088189721107483, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1349804699420929, | |
| "rewards/margins": 0.008464555256068707, | |
| "rewards/rejected": -0.14344502985477448, | |
| "sft_loss": 1.3498046398162842, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.3255203071327541, | |
| "grad_norm": 1.7529423236846924, | |
| "learning_rate": 2.9510302550812537e-06, | |
| "logits/chosen": -14.307215690612793, | |
| "logits/rejected": -14.374090194702148, | |
| "logps/chosen": -1.3449764251708984, | |
| "logps/rejected": -1.5051848888397217, | |
| "loss": 1.4155, | |
| "odds_ratio_loss": 0.7051501274108887, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.13449765741825104, | |
| "rewards/margins": 0.016020851209759712, | |
| "rewards/rejected": -0.1505185067653656, | |
| "sft_loss": 1.3449764251708984, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.341685188927056, | |
| "grad_norm": 3.534449815750122, | |
| "learning_rate": 2.9092980027634325e-06, | |
| "logits/chosen": -14.194910049438477, | |
| "logits/rejected": -14.260457038879395, | |
| "logps/chosen": -1.3157680034637451, | |
| "logps/rejected": -1.39622163772583, | |
| "loss": 1.3858, | |
| "odds_ratio_loss": 0.7005105018615723, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.131576806306839, | |
| "rewards/margins": 0.008045351132750511, | |
| "rewards/rejected": -0.13962216675281525, | |
| "sft_loss": 1.3157680034637451, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.3578500707213579, | |
| "grad_norm": 1.6155622005462646, | |
| "learning_rate": 2.867448218379927e-06, | |
| "logits/chosen": -14.231335639953613, | |
| "logits/rejected": -14.248939514160156, | |
| "logps/chosen": -1.3620965480804443, | |
| "logps/rejected": -1.409558892250061, | |
| "loss": 1.4355, | |
| "odds_ratio_loss": 0.734248697757721, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1362096518278122, | |
| "rewards/margins": 0.0047462377697229385, | |
| "rewards/rejected": -0.14095589518547058, | |
| "sft_loss": 1.3620965480804443, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.3740149525156597, | |
| "grad_norm": 4.540154933929443, | |
| "learning_rate": 2.825492919315559e-06, | |
| "logits/chosen": -14.306146621704102, | |
| "logits/rejected": -14.476399421691895, | |
| "logps/chosen": -1.4043729305267334, | |
| "logps/rejected": -1.4499131441116333, | |
| "loss": 1.4789, | |
| "odds_ratio_loss": 0.7450671195983887, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1404372900724411, | |
| "rewards/margins": 0.004554024897515774, | |
| "rewards/rejected": -0.14499132335186005, | |
| "sft_loss": 1.4043729305267334, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.3901798343099616, | |
| "grad_norm": 1.2316781282424927, | |
| "learning_rate": 2.7834441532542482e-06, | |
| "logits/chosen": -14.352537155151367, | |
| "logits/rejected": -14.446965217590332, | |
| "logps/chosen": -1.3581891059875488, | |
| "logps/rejected": -1.4636138677597046, | |
| "loss": 1.4297, | |
| "odds_ratio_loss": 0.7155886888504028, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.13581891357898712, | |
| "rewards/margins": 0.01054247748106718, | |
| "rewards/rejected": -0.14636139571666718, | |
| "sft_loss": 1.3581891059875488, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.4063447161042635, | |
| "grad_norm": 0.915081799030304, | |
| "learning_rate": 2.74131399471945e-06, | |
| "logits/chosen": -14.232261657714844, | |
| "logits/rejected": -14.369558334350586, | |
| "logps/chosen": -1.4017927646636963, | |
| "logps/rejected": -1.4412128925323486, | |
| "loss": 1.4755, | |
| "odds_ratio_loss": 0.7375406622886658, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14017929136753082, | |
| "rewards/margins": 0.003942002542316914, | |
| "rewards/rejected": -0.14412127435207367, | |
| "sft_loss": 1.4017927646636963, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.4225095978985653, | |
| "grad_norm": 1.1700351238250732, | |
| "learning_rate": 2.6991145416068947e-06, | |
| "logits/chosen": -14.184051513671875, | |
| "logits/rejected": -14.361761093139648, | |
| "logps/chosen": -1.3888486623764038, | |
| "logps/rejected": -1.3866727352142334, | |
| "loss": 1.4645, | |
| "odds_ratio_loss": 0.7568970918655396, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13888487219810486, | |
| "rewards/margins": -0.00021760519302915782, | |
| "rewards/rejected": -0.1386672556400299, | |
| "sft_loss": 1.3888486623764038, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.4386744796928672, | |
| "grad_norm": 0.7416606545448303, | |
| "learning_rate": 2.6568579117106143e-06, | |
| "logits/chosen": -14.222585678100586, | |
| "logits/rejected": -14.173550605773926, | |
| "logps/chosen": -1.321872591972351, | |
| "logps/rejected": -1.451570749282837, | |
| "loss": 1.3933, | |
| "odds_ratio_loss": 0.7138932943344116, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.13218727707862854, | |
| "rewards/margins": 0.012969812378287315, | |
| "rewards/rejected": -0.1451570689678192, | |
| "sft_loss": 1.321872591972351, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.454839361487169, | |
| "grad_norm": 0.7456266283988953, | |
| "learning_rate": 2.6145562392432544e-06, | |
| "logits/chosen": -14.201733589172363, | |
| "logits/rejected": -14.159896850585938, | |
| "logps/chosen": -1.371537446975708, | |
| "logps/rejected": -1.4001505374908447, | |
| "loss": 1.4466, | |
| "odds_ratio_loss": 0.7501237392425537, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.137153759598732, | |
| "rewards/margins": 0.002861298155039549, | |
| "rewards/rejected": -0.14001503586769104, | |
| "sft_loss": 1.371537446975708, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.471004243281471, | |
| "grad_norm": 1.7800395488739014, | |
| "learning_rate": 2.5722216713516682e-06, | |
| "logits/chosen": -14.122312545776367, | |
| "logits/rejected": -14.1841402053833, | |
| "logps/chosen": -1.2916905879974365, | |
| "logps/rejected": -1.3739659786224365, | |
| "loss": 1.3653, | |
| "odds_ratio_loss": 0.7365130186080933, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.1291690617799759, | |
| "rewards/margins": 0.008227519690990448, | |
| "rewards/rejected": -0.13739657402038574, | |
| "sft_loss": 1.2916905879974365, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.4871691250757728, | |
| "grad_norm": 3.366191864013672, | |
| "learning_rate": 2.5298663646288064e-06, | |
| "logits/chosen": -14.279853820800781, | |
| "logits/rejected": -14.313766479492188, | |
| "logps/chosen": -1.3366254568099976, | |
| "logps/rejected": -1.4743283987045288, | |
| "loss": 1.4084, | |
| "odds_ratio_loss": 0.7178291082382202, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.13366253674030304, | |
| "rewards/margins": 0.013770299032330513, | |
| "rewards/rejected": -0.1474328488111496, | |
| "sft_loss": 1.3366254568099976, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.503334006870075, | |
| "grad_norm": 1.793541431427002, | |
| "learning_rate": 2.487502481622879e-06, | |
| "logits/chosen": -14.228408813476562, | |
| "logits/rejected": -14.142854690551758, | |
| "logps/chosen": -1.3270151615142822, | |
| "logps/rejected": -1.4341893196105957, | |
| "loss": 1.3983, | |
| "odds_ratio_loss": 0.7129431366920471, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.13270151615142822, | |
| "rewards/margins": 0.010717417113482952, | |
| "rewards/rejected": -0.14341893792152405, | |
| "sft_loss": 1.3270151615142822, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.5194988886643768, | |
| "grad_norm": 2.546449661254883, | |
| "learning_rate": 2.4451421873448253e-06, | |
| "logits/chosen": -14.15150260925293, | |
| "logits/rejected": -14.336977005004883, | |
| "logps/chosen": -1.431612253189087, | |
| "logps/rejected": -1.4608542919158936, | |
| "loss": 1.508, | |
| "odds_ratio_loss": 0.7637500762939453, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.14316122233867645, | |
| "rewards/margins": 0.002924212021753192, | |
| "rewards/rejected": -0.1460854411125183, | |
| "sft_loss": 1.431612253189087, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.5356637704586786, | |
| "grad_norm": 2.0193891525268555, | |
| "learning_rate": 2.40279764577506e-06, | |
| "logits/chosen": -14.358665466308594, | |
| "logits/rejected": -14.505513191223145, | |
| "logps/chosen": -1.403634786605835, | |
| "logps/rejected": -1.4488627910614014, | |
| "loss": 1.48, | |
| "odds_ratio_loss": 0.7633059620857239, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.14036348462104797, | |
| "rewards/margins": 0.004522812552750111, | |
| "rewards/rejected": -0.14488628506660461, | |
| "sft_loss": 1.403634786605835, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.5518286522529805, | |
| "grad_norm": 1.2108488082885742, | |
| "learning_rate": 2.3604810163705242e-06, | |
| "logits/chosen": -14.17876148223877, | |
| "logits/rejected": -14.2489652633667, | |
| "logps/chosen": -1.306792140007019, | |
| "logps/rejected": -1.3910942077636719, | |
| "loss": 1.377, | |
| "odds_ratio_loss": 0.7023099660873413, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1306792050600052, | |
| "rewards/margins": 0.00843021459877491, | |
| "rewards/rejected": -0.13910941779613495, | |
| "sft_loss": 1.306792140007019, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.5679935340472824, | |
| "grad_norm": 1.9210587739944458, | |
| "learning_rate": 2.3182044505730364e-06, | |
| "logits/chosen": -14.331990242004395, | |
| "logits/rejected": -14.305018424987793, | |
| "logps/chosen": -1.2632302045822144, | |
| "logps/rejected": -1.3584424257278442, | |
| "loss": 1.3349, | |
| "odds_ratio_loss": 0.7163167595863342, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.12632302939891815, | |
| "rewards/margins": 0.009521213360130787, | |
| "rewards/rejected": -0.13584424555301666, | |
| "sft_loss": 1.2632302045822144, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.5841584158415842, | |
| "grad_norm": 1.7603510618209839, | |
| "learning_rate": 2.275980088319941e-06, | |
| "logits/chosen": -14.362065315246582, | |
| "logits/rejected": -14.22284984588623, | |
| "logps/chosen": -1.269855260848999, | |
| "logps/rejected": -1.3405383825302124, | |
| "loss": 1.3406, | |
| "odds_ratio_loss": 0.7074419260025024, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.12698553502559662, | |
| "rewards/margins": 0.0070683010853827, | |
| "rewards/rejected": -0.13405382633209229, | |
| "sft_loss": 1.269855260848999, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.600323297635886, | |
| "grad_norm": 1.6920086145401, | |
| "learning_rate": 2.2338200545580577e-06, | |
| "logits/chosen": -14.224035263061523, | |
| "logits/rejected": -14.358423233032227, | |
| "logps/chosen": -1.2658283710479736, | |
| "logps/rejected": -1.4482189416885376, | |
| "loss": 1.3345, | |
| "odds_ratio_loss": 0.6871744990348816, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.12658283114433289, | |
| "rewards/margins": 0.01823904737830162, | |
| "rewards/rejected": -0.1448218822479248, | |
| "sft_loss": 1.2658283710479736, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.616488179430188, | |
| "grad_norm": 1.0991649627685547, | |
| "learning_rate": 2.191736455761947e-06, | |
| "logits/chosen": -14.324908256530762, | |
| "logits/rejected": -14.3560209274292, | |
| "logps/chosen": -1.2651708126068115, | |
| "logps/rejected": -1.290913701057434, | |
| "loss": 1.3401, | |
| "odds_ratio_loss": 0.749754786491394, | |
| "rewards/accuracies": 0.4437499940395355, | |
| "rewards/chosen": -0.12651710212230682, | |
| "rewards/margins": 0.0025742852594703436, | |
| "rewards/rejected": -0.12909138202667236, | |
| "sft_loss": 1.2651708126068115, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.616488179430188, | |
| "eval_logits/chosen": -14.268522262573242, | |
| "eval_logits/rejected": -14.308253288269043, | |
| "eval_logps/chosen": -1.3874938488006592, | |
| "eval_logps/rejected": -1.4423273801803589, | |
| "eval_loss": 1.4635207653045654, | |
| "eval_odds_ratio_loss": 0.7602682709693909, | |
| "eval_rewards/accuracies": 0.48363634943962097, | |
| "eval_rewards/chosen": -0.1387493908405304, | |
| "eval_rewards/margins": 0.00548336049541831, | |
| "eval_rewards/rejected": -0.14423276484012604, | |
| "eval_runtime": 207.8962, | |
| "eval_samples_per_second": 5.291, | |
| "eval_sft_loss": 1.3874938488006592, | |
| "eval_steps_per_second": 2.646, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6326530612244898, | |
| "grad_norm": 0.9229074716567993, | |
| "learning_rate": 2.1497413764574673e-06, | |
| "logits/chosen": -14.391751289367676, | |
| "logits/rejected": -14.302392959594727, | |
| "logps/chosen": -1.4207522869110107, | |
| "logps/rejected": -1.4941614866256714, | |
| "loss": 1.4937, | |
| "odds_ratio_loss": 0.7297941446304321, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.14207521080970764, | |
| "rewards/margins": 0.007340931333601475, | |
| "rewards/rejected": -0.14941613376140594, | |
| "sft_loss": 1.4207522869110107, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.6488179430187917, | |
| "grad_norm": 1.2489970922470093, | |
| "learning_rate": 2.1078468757516395e-06, | |
| "logits/chosen": -14.41105842590332, | |
| "logits/rejected": -14.309954643249512, | |
| "logps/chosen": -1.3737413883209229, | |
| "logps/rejected": -1.331855297088623, | |
| "loss": 1.453, | |
| "odds_ratio_loss": 0.7925962805747986, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.1373741328716278, | |
| "rewards/margins": -0.004188609775155783, | |
| "rewards/rejected": -0.1331855207681656, | |
| "sft_loss": 1.3737413883209229, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.6649828248130936, | |
| "grad_norm": 0.9103444814682007, | |
| "learning_rate": 2.0660649838698145e-06, | |
| "logits/chosen": -14.60859203338623, | |
| "logits/rejected": -14.583990097045898, | |
| "logps/chosen": -1.3282297849655151, | |
| "logps/rejected": -1.4166333675384521, | |
| "loss": 1.3999, | |
| "odds_ratio_loss": 0.7163518071174622, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.13282299041748047, | |
| "rewards/margins": 0.008840366266667843, | |
| "rewards/rejected": -0.1416633427143097, | |
| "sft_loss": 1.3282297849655151, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.6811477066073954, | |
| "grad_norm": 1.1333231925964355, | |
| "learning_rate": 2.0244076987011284e-06, | |
| "logits/chosen": -14.382695198059082, | |
| "logits/rejected": -14.247182846069336, | |
| "logps/chosen": -1.3871229887008667, | |
| "logps/rejected": -1.5080008506774902, | |
| "loss": 1.4558, | |
| "odds_ratio_loss": 0.68644779920578, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1387123018503189, | |
| "rewards/margins": 0.012087779119610786, | |
| "rewards/rejected": -0.15080007910728455, | |
| "sft_loss": 1.3871229887008667, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.6973125884016973, | |
| "grad_norm": 1.302032709121704, | |
| "learning_rate": 1.982886982353251e-06, | |
| "logits/chosen": -14.392558097839355, | |
| "logits/rejected": -14.241909980773926, | |
| "logps/chosen": -1.3640697002410889, | |
| "logps/rejected": -1.5009006261825562, | |
| "loss": 1.4359, | |
| "odds_ratio_loss": 0.7178789377212524, | |
| "rewards/accuracies": 0.4312500059604645, | |
| "rewards/chosen": -0.13640697300434113, | |
| "rewards/margins": 0.013683101162314415, | |
| "rewards/rejected": -0.1500900685787201, | |
| "sft_loss": 1.3640697002410889, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.7134774701959992, | |
| "grad_norm": 1.7859091758728027, | |
| "learning_rate": 1.941514757717392e-06, | |
| "logits/chosen": -14.138816833496094, | |
| "logits/rejected": -14.210226058959961, | |
| "logps/chosen": -1.3156766891479492, | |
| "logps/rejected": -1.4917762279510498, | |
| "loss": 1.3807, | |
| "odds_ratio_loss": 0.6497665643692017, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.13156768679618835, | |
| "rewards/margins": 0.01760994642972946, | |
| "rewards/rejected": -0.1491776406764984, | |
| "sft_loss": 1.3156766891479492, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.729642351990301, | |
| "grad_norm": 2.0628256797790527, | |
| "learning_rate": 1.9003029050445953e-06, | |
| "logits/chosen": -14.267855644226074, | |
| "logits/rejected": -14.399972915649414, | |
| "logps/chosen": -1.402465581893921, | |
| "logps/rejected": -1.4434514045715332, | |
| "loss": 1.4747, | |
| "odds_ratio_loss": 0.7224588990211487, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14024657011032104, | |
| "rewards/margins": 0.004098571836948395, | |
| "rewards/rejected": -0.14434513449668884, | |
| "sft_loss": 1.402465581893921, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.745807233784603, | |
| "grad_norm": 1.5042709112167358, | |
| "learning_rate": 1.8592632585342523e-06, | |
| "logits/chosen": -14.195714950561523, | |
| "logits/rejected": -14.285571098327637, | |
| "logps/chosen": -1.3312032222747803, | |
| "logps/rejected": -1.412341833114624, | |
| "loss": 1.4047, | |
| "odds_ratio_loss": 0.7354634404182434, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1331203281879425, | |
| "rewards/margins": 0.008113870397210121, | |
| "rewards/rejected": -0.14123418927192688, | |
| "sft_loss": 1.3312032222747803, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.7619721155789048, | |
| "grad_norm": 3.4297995567321777, | |
| "learning_rate": 1.8184076029358527e-06, | |
| "logits/chosen": -14.20643138885498, | |
| "logits/rejected": -14.019030570983887, | |
| "logps/chosen": -1.2683379650115967, | |
| "logps/rejected": -1.2236586809158325, | |
| "loss": 1.3443, | |
| "odds_ratio_loss": 0.7591326832771301, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.12683378159999847, | |
| "rewards/margins": -0.00446792459115386, | |
| "rewards/rejected": -0.12236586958169937, | |
| "sft_loss": 1.2683379650115967, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.7781369973732066, | |
| "grad_norm": 1.0218937397003174, | |
| "learning_rate": 1.7777476701649318e-06, | |
| "logits/chosen": -14.1577730178833, | |
| "logits/rejected": -14.125236511230469, | |
| "logps/chosen": -1.3477040529251099, | |
| "logps/rejected": -1.391446828842163, | |
| "loss": 1.4231, | |
| "odds_ratio_loss": 0.7540372610092163, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13477042317390442, | |
| "rewards/margins": 0.004374279640614986, | |
| "rewards/rejected": -0.1391446888446808, | |
| "sft_loss": 1.3477040529251099, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.7943018791675085, | |
| "grad_norm": 1.4984055757522583, | |
| "learning_rate": 1.7372951359341925e-06, | |
| "logits/chosen": -14.369695663452148, | |
| "logits/rejected": -14.277885437011719, | |
| "logps/chosen": -1.2875721454620361, | |
| "logps/rejected": -1.3878809213638306, | |
| "loss": 1.3577, | |
| "odds_ratio_loss": 0.7012876272201538, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.12875720858573914, | |
| "rewards/margins": 0.01003087218850851, | |
| "rewards/rejected": -0.13878807425498962, | |
| "sft_loss": 1.2875721454620361, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.8104667609618104, | |
| "grad_norm": 3.3275625705718994, | |
| "learning_rate": 1.6970616164007547e-06, | |
| "logits/chosen": -14.229268074035645, | |
| "logits/rejected": -14.10546875, | |
| "logps/chosen": -1.364091396331787, | |
| "logps/rejected": -1.3946739435195923, | |
| "loss": 1.4435, | |
| "odds_ratio_loss": 0.7942220568656921, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13640913367271423, | |
| "rewards/margins": 0.0030582635663449764, | |
| "rewards/rejected": -0.13946738839149475, | |
| "sft_loss": 1.364091396331787, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.8266316427561122, | |
| "grad_norm": 2.735656976699829, | |
| "learning_rate": 1.6570586648305276e-06, | |
| "logits/chosen": -14.143117904663086, | |
| "logits/rejected": -14.2241849899292, | |
| "logps/chosen": -1.344879150390625, | |
| "logps/rejected": -1.493446707725525, | |
| "loss": 1.4182, | |
| "odds_ratio_loss": 0.733532726764679, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.13448792695999146, | |
| "rewards/margins": 0.014856770634651184, | |
| "rewards/rejected": -0.14934466779232025, | |
| "sft_loss": 1.344879150390625, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.842796524550414, | |
| "grad_norm": 1.1568862199783325, | |
| "learning_rate": 1.6172977682806151e-06, | |
| "logits/chosen": -14.38661003112793, | |
| "logits/rejected": -14.517931938171387, | |
| "logps/chosen": -1.3603746891021729, | |
| "logps/rejected": -1.5093238353729248, | |
| "loss": 1.4288, | |
| "odds_ratio_loss": 0.68376624584198, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1360374540090561, | |
| "rewards/margins": 0.014894920401275158, | |
| "rewards/rejected": -0.15093238651752472, | |
| "sft_loss": 1.3603746891021729, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.858961406344716, | |
| "grad_norm": 1.1773515939712524, | |
| "learning_rate": 1.5777903443007586e-06, | |
| "logits/chosen": -14.423624992370605, | |
| "logits/rejected": -14.032621383666992, | |
| "logps/chosen": -1.387117624282837, | |
| "logps/rejected": -1.4605300426483154, | |
| "loss": 1.4607, | |
| "odds_ratio_loss": 0.7362414598464966, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13871176540851593, | |
| "rewards/margins": 0.007341254502534866, | |
| "rewards/rejected": -0.1460530012845993, | |
| "sft_loss": 1.387117624282837, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.8751262881390178, | |
| "grad_norm": 1.5692604780197144, | |
| "learning_rate": 1.5385477376547226e-06, | |
| "logits/chosen": -14.410656929016113, | |
| "logits/rejected": -14.352084159851074, | |
| "logps/chosen": -1.3973274230957031, | |
| "logps/rejected": -1.4963886737823486, | |
| "loss": 1.4675, | |
| "odds_ratio_loss": 0.7020548582077026, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.1397327482700348, | |
| "rewards/margins": 0.009906120598316193, | |
| "rewards/rejected": -0.14963887631893158, | |
| "sft_loss": 1.3973274230957031, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.89129116993332, | |
| "grad_norm": 3.0858218669891357, | |
| "learning_rate": 1.4995812170625845e-06, | |
| "logits/chosen": -14.365419387817383, | |
| "logits/rejected": -14.341082572937012, | |
| "logps/chosen": -1.4526535272598267, | |
| "logps/rejected": -1.5791641473770142, | |
| "loss": 1.5265, | |
| "odds_ratio_loss": 0.7380681037902832, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1452653706073761, | |
| "rewards/margins": 0.012651054188609123, | |
| "rewards/rejected": -0.15791639685630798, | |
| "sft_loss": 1.4526535272598267, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.9074560517276218, | |
| "grad_norm": 2.4256625175476074, | |
| "learning_rate": 1.4609019719648666e-06, | |
| "logits/chosen": -14.359014511108398, | |
| "logits/rejected": -14.343942642211914, | |
| "logps/chosen": -1.365081787109375, | |
| "logps/rejected": -1.4730589389801025, | |
| "loss": 1.4336, | |
| "odds_ratio_loss": 0.685504138469696, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.13650815188884735, | |
| "rewards/margins": 0.010797703638672829, | |
| "rewards/rejected": -0.14730587601661682, | |
| "sft_loss": 1.365081787109375, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.9236209335219236, | |
| "grad_norm": 2.2215967178344727, | |
| "learning_rate": 1.42252110930943e-06, | |
| "logits/chosen": -14.144754409790039, | |
| "logits/rejected": -14.116401672363281, | |
| "logps/chosen": -1.2247555255889893, | |
| "logps/rejected": -1.2106770277023315, | |
| "loss": 1.3031, | |
| "odds_ratio_loss": 0.7834988832473755, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.12247554957866669, | |
| "rewards/margins": -0.0014078498352319002, | |
| "rewards/rejected": -0.12106770277023315, | |
| "sft_loss": 1.2247555255889893, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.9397858153162255, | |
| "grad_norm": 1.6026244163513184, | |
| "learning_rate": 1.3844496503620493e-06, | |
| "logits/chosen": -14.315832138061523, | |
| "logits/rejected": -14.499916076660156, | |
| "logps/chosen": -1.4833340644836426, | |
| "logps/rejected": -1.521794080734253, | |
| "loss": 1.5547, | |
| "odds_ratio_loss": 0.7132872343063354, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.14833340048789978, | |
| "rewards/margins": 0.0038460283540189266, | |
| "rewards/rejected": -0.15217943489551544, | |
| "sft_loss": 1.4833340644836426, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.9559506971105274, | |
| "grad_norm": 1.1467649936676025, | |
| "learning_rate": 1.3466985275416081e-06, | |
| "logits/chosen": -14.316365242004395, | |
| "logits/rejected": -14.039219856262207, | |
| "logps/chosen": -1.4100277423858643, | |
| "logps/rejected": -1.4868837594985962, | |
| "loss": 1.4848, | |
| "odds_ratio_loss": 0.7481211423873901, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.14100277423858643, | |
| "rewards/margins": 0.00768560403957963, | |
| "rewards/rejected": -0.14868836104869843, | |
| "sft_loss": 1.4100277423858643, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.9721155789048292, | |
| "grad_norm": 1.3261767625808716, | |
| "learning_rate": 1.309278581280791e-06, | |
| "logits/chosen": -14.425065994262695, | |
| "logits/rejected": -14.19542121887207, | |
| "logps/chosen": -1.258156418800354, | |
| "logps/rejected": -1.3927624225616455, | |
| "loss": 1.3258, | |
| "odds_ratio_loss": 0.6761429309844971, | |
| "rewards/accuracies": 0.5562499761581421, | |
| "rewards/chosen": -0.12581565976142883, | |
| "rewards/margins": 0.013460601679980755, | |
| "rewards/rejected": -0.13927623629570007, | |
| "sft_loss": 1.258156418800354, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.9882804606991311, | |
| "grad_norm": 0.8793450593948364, | |
| "learning_rate": 1.272200556913199e-06, | |
| "logits/chosen": -14.331692695617676, | |
| "logits/rejected": -14.390342712402344, | |
| "logps/chosen": -1.2902759313583374, | |
| "logps/rejected": -1.398531198501587, | |
| "loss": 1.3633, | |
| "odds_ratio_loss": 0.7302906513214111, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1290276050567627, | |
| "rewards/margins": 0.01082551758736372, | |
| "rewards/rejected": -0.1398531198501587, | |
| "sft_loss": 1.2902759313583374, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.004445342493433, | |
| "grad_norm": 2.07963228225708, | |
| "learning_rate": 1.2354751015877698e-06, | |
| "logits/chosen": -14.254411697387695, | |
| "logits/rejected": -14.420768737792969, | |
| "logps/chosen": -1.2709214687347412, | |
| "logps/rejected": -1.4514631032943726, | |
| "loss": 1.3403, | |
| "odds_ratio_loss": 0.6936594247817993, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.1270921379327774, | |
| "rewards/margins": 0.018054189160466194, | |
| "rewards/rejected": -0.14514632523059845, | |
| "sft_loss": 1.2709214687347412, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.020610224287735, | |
| "grad_norm": 2.574068069458008, | |
| "learning_rate": 1.1991127612113945e-06, | |
| "logits/chosen": -14.361371040344238, | |
| "logits/rejected": -14.495355606079102, | |
| "logps/chosen": -1.3789875507354736, | |
| "logps/rejected": -1.5034908056259155, | |
| "loss": 1.4475, | |
| "odds_ratio_loss": 0.6847060322761536, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.1378987580537796, | |
| "rewards/margins": 0.012450330890715122, | |
| "rewards/rejected": -0.15034906566143036, | |
| "sft_loss": 1.3789875507354736, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.036775106082037, | |
| "grad_norm": 1.4936628341674805, | |
| "learning_rate": 1.1631239774206035e-06, | |
| "logits/chosen": -14.19866943359375, | |
| "logits/rejected": -14.191067695617676, | |
| "logps/chosen": -1.347879409790039, | |
| "logps/rejected": -1.4048999547958374, | |
| "loss": 1.4251, | |
| "odds_ratio_loss": 0.7725744247436523, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.1347879320383072, | |
| "rewards/margins": 0.005702070891857147, | |
| "rewards/rejected": -0.14049001038074493, | |
| "sft_loss": 1.347879409790039, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.052939987876339, | |
| "grad_norm": 1.7168585062026978, | |
| "learning_rate": 1.1275190845831978e-06, | |
| "logits/chosen": -14.3424711227417, | |
| "logits/rejected": -14.3289213180542, | |
| "logps/chosen": -1.3685007095336914, | |
| "logps/rejected": -1.4727340936660767, | |
| "loss": 1.4389, | |
| "odds_ratio_loss": 0.7035232782363892, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.13685005903244019, | |
| "rewards/margins": 0.010423343628644943, | |
| "rewards/rejected": -0.14727340638637543, | |
| "sft_loss": 1.3685007095336914, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.0691048696706407, | |
| "grad_norm": 1.1820368766784668, | |
| "learning_rate": 1.0923083068306778e-06, | |
| "logits/chosen": -14.398675918579102, | |
| "logits/rejected": -14.118631362915039, | |
| "logps/chosen": -1.2939175367355347, | |
| "logps/rejected": -1.473049283027649, | |
| "loss": 1.3601, | |
| "odds_ratio_loss": 0.662093997001648, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.12939175963401794, | |
| "rewards/margins": 0.017913173884153366, | |
| "rewards/rejected": -0.14730492234230042, | |
| "sft_loss": 1.2939175367355347, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.0852697514649425, | |
| "grad_norm": 1.1745166778564453, | |
| "learning_rate": 1.0575017551223348e-06, | |
| "logits/chosen": -14.3531494140625, | |
| "logits/rejected": -14.198529243469238, | |
| "logps/chosen": -1.2511951923370361, | |
| "logps/rejected": -1.3217878341674805, | |
| "loss": 1.3224, | |
| "odds_ratio_loss": 0.7121993899345398, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.12511952221393585, | |
| "rewards/margins": 0.007059249095618725, | |
| "rewards/rejected": -0.13217875361442566, | |
| "sft_loss": 1.2511951923370361, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.1014346332592444, | |
| "grad_norm": 0.894344687461853, | |
| "learning_rate": 1.023109424341833e-06, | |
| "logits/chosen": -14.153393745422363, | |
| "logits/rejected": -14.245986938476562, | |
| "logps/chosen": -1.3667266368865967, | |
| "logps/rejected": -1.42815363407135, | |
| "loss": 1.4394, | |
| "odds_ratio_loss": 0.727142333984375, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13667264580726624, | |
| "rewards/margins": 0.006142704281955957, | |
| "rewards/rejected": -0.14281536638736725, | |
| "sft_loss": 1.3667266368865967, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.1175995150535463, | |
| "grad_norm": 1.5093544721603394, | |
| "learning_rate": 9.891411904271273e-07, | |
| "logits/chosen": -14.242596626281738, | |
| "logits/rejected": -14.327380180358887, | |
| "logps/chosen": -1.3282233476638794, | |
| "logps/rejected": -1.3852262496948242, | |
| "loss": 1.4007, | |
| "odds_ratio_loss": 0.7251249551773071, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.13282233476638794, | |
| "rewards/margins": 0.005700295325368643, | |
| "rewards/rejected": -0.13852263987064362, | |
| "sft_loss": 1.3282233476638794, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.133764396847848, | |
| "grad_norm": 0.8299040198326111, | |
| "learning_rate": 9.556068075345363e-07, | |
| "logits/chosen": -14.465705871582031, | |
| "logits/rejected": -14.254651069641113, | |
| "logps/chosen": -1.2607736587524414, | |
| "logps/rejected": -1.3249403238296509, | |
| "loss": 1.3327, | |
| "odds_ratio_loss": 0.7195707559585571, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.12607736885547638, | |
| "rewards/margins": 0.006416681222617626, | |
| "rewards/rejected": -0.13249404728412628, | |
| "sft_loss": 1.2607736587524414, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.14992927864215, | |
| "grad_norm": 1.5431737899780273, | |
| "learning_rate": 9.225159052377838e-07, | |
| "logits/chosen": -14.418218612670898, | |
| "logits/rejected": -14.442914009094238, | |
| "logps/chosen": -1.369145393371582, | |
| "logps/rejected": -1.4892218112945557, | |
| "loss": 1.4395, | |
| "odds_ratio_loss": 0.7034425735473633, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.13691455125808716, | |
| "rewards/margins": 0.012007640674710274, | |
| "rewards/rejected": -0.1489221751689911, | |
| "sft_loss": 1.369145393371582, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.166094160436452, | |
| "grad_norm": 2.125438928604126, | |
| "learning_rate": 8.898779857628184e-07, | |
| "logits/chosen": -14.263992309570312, | |
| "logits/rejected": -14.439204216003418, | |
| "logps/chosen": -1.2737493515014648, | |
| "logps/rejected": -1.307660698890686, | |
| "loss": 1.3488, | |
| "odds_ratio_loss": 0.7507684826850891, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.12737493216991425, | |
| "rewards/margins": 0.003391148056834936, | |
| "rewards/rejected": -0.13076607882976532, | |
| "sft_loss": 1.2737493515014648, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.1822590422307537, | |
| "grad_norm": 1.0558884143829346, | |
| "learning_rate": 8.577024212591975e-07, | |
| "logits/chosen": -14.523656845092773, | |
| "logits/rejected": -14.395648002624512, | |
| "logps/chosen": -1.3369591236114502, | |
| "logps/rejected": -1.402151346206665, | |
| "loss": 1.4081, | |
| "odds_ratio_loss": 0.7112525701522827, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.13369593024253845, | |
| "rewards/margins": 0.006519217975437641, | |
| "rewards/rejected": -0.14021514356136322, | |
| "sft_loss": 1.3369591236114502, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.1984239240250556, | |
| "grad_norm": 1.1882685422897339, | |
| "learning_rate": 8.259984511088276e-07, | |
| "logits/chosen": -14.409403800964355, | |
| "logits/rejected": -14.405116081237793, | |
| "logps/chosen": -1.3154635429382324, | |
| "logps/rejected": -1.4095304012298584, | |
| "loss": 1.3863, | |
| "odds_ratio_loss": 0.7081496715545654, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.13154636323451996, | |
| "rewards/margins": 0.009406678378582, | |
| "rewards/rejected": -0.14095303416252136, | |
| "sft_loss": 1.3154635429382324, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.2145888058193575, | |
| "grad_norm": 1.6390233039855957, | |
| "learning_rate": 7.947751792728237e-07, | |
| "logits/chosen": -14.409843444824219, | |
| "logits/rejected": -14.329424858093262, | |
| "logps/chosen": -1.3204478025436401, | |
| "logps/rejected": -1.4512555599212646, | |
| "loss": 1.3901, | |
| "odds_ratio_loss": 0.6965182423591614, | |
| "rewards/accuracies": 0.5249999761581421, | |
| "rewards/chosen": -0.13204479217529297, | |
| "rewards/margins": 0.013080772943794727, | |
| "rewards/rejected": -0.14512555301189423, | |
| "sft_loss": 1.3204478025436401, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.2307536876136593, | |
| "grad_norm": 1.7825186252593994, | |
| "learning_rate": 7.640415716772626e-07, | |
| "logits/chosen": -14.333005905151367, | |
| "logits/rejected": -14.429731369018555, | |
| "logps/chosen": -1.3603641986846924, | |
| "logps/rejected": -1.4518425464630127, | |
| "loss": 1.4331, | |
| "odds_ratio_loss": 0.7270913124084473, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13603642582893372, | |
| "rewards/margins": 0.009147830307483673, | |
| "rewards/rejected": -0.1451842486858368, | |
| "sft_loss": 1.3603641986846924, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.246918569407961, | |
| "grad_norm": 1.125680685043335, | |
| "learning_rate": 7.338064536385722e-07, | |
| "logits/chosen": -14.394281387329102, | |
| "logits/rejected": -14.345739364624023, | |
| "logps/chosen": -1.3667652606964111, | |
| "logps/rejected": -1.5295965671539307, | |
| "loss": 1.435, | |
| "odds_ratio_loss": 0.6821550130844116, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.13667652010917664, | |
| "rewards/margins": 0.016283124685287476, | |
| "rewards/rejected": -0.1529596596956253, | |
| "sft_loss": 1.3667652606964111, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.263083451202263, | |
| "grad_norm": 1.7544102668762207, | |
| "learning_rate": 7.040785073292883e-07, | |
| "logits/chosen": -14.237360000610352, | |
| "logits/rejected": -14.33959674835205, | |
| "logps/chosen": -1.4276225566864014, | |
| "logps/rejected": -1.4824755191802979, | |
| "loss": 1.5027, | |
| "odds_ratio_loss": 0.750755786895752, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14276224374771118, | |
| "rewards/margins": 0.00548530463129282, | |
| "rewards/rejected": -0.14824756979942322, | |
| "sft_loss": 1.4276225566864014, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.279248332996565, | |
| "grad_norm": 1.7468085289001465, | |
| "learning_rate": 6.748662692849297e-07, | |
| "logits/chosen": -14.5598726272583, | |
| "logits/rejected": -14.531698226928711, | |
| "logps/chosen": -1.3492968082427979, | |
| "logps/rejected": -1.4934823513031006, | |
| "loss": 1.4184, | |
| "odds_ratio_loss": 0.6912583112716675, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.13492968678474426, | |
| "rewards/margins": 0.014418545179069042, | |
| "rewards/rejected": -0.14934822916984558, | |
| "sft_loss": 1.3492968082427979, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.295413214790867, | |
| "grad_norm": 3.2176520824432373, | |
| "learning_rate": 6.46178127952686e-07, | |
| "logits/chosen": -14.288836479187012, | |
| "logits/rejected": -14.204765319824219, | |
| "logps/chosen": -1.299232840538025, | |
| "logps/rejected": -1.4280776977539062, | |
| "loss": 1.3673, | |
| "odds_ratio_loss": 0.6802908182144165, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.12992329895496368, | |
| "rewards/margins": 0.01288448553532362, | |
| "rewards/rejected": -0.1428077667951584, | |
| "sft_loss": 1.299232840538025, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.3115780965851687, | |
| "grad_norm": 2.5991835594177246, | |
| "learning_rate": 6.180223212826289e-07, | |
| "logits/chosen": -14.347335815429688, | |
| "logits/rejected": -14.187026977539062, | |
| "logps/chosen": -1.2904529571533203, | |
| "logps/rejected": -1.3600698709487915, | |
| "loss": 1.362, | |
| "odds_ratio_loss": 0.7157233953475952, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.1290452927350998, | |
| "rewards/margins": 0.006961710751056671, | |
| "rewards/rejected": -0.13600699603557587, | |
| "sft_loss": 1.2904529571533203, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.3277429783794705, | |
| "grad_norm": 0.8683578968048096, | |
| "learning_rate": 5.904069343621443e-07, | |
| "logits/chosen": -14.465449333190918, | |
| "logits/rejected": -14.325057983398438, | |
| "logps/chosen": -1.299377202987671, | |
| "logps/rejected": -1.401989459991455, | |
| "loss": 1.3706, | |
| "odds_ratio_loss": 0.7122213244438171, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.12993772327899933, | |
| "rewards/margins": 0.010261224582791328, | |
| "rewards/rejected": -0.14019893109798431, | |
| "sft_loss": 1.299377202987671, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.3439078601737724, | |
| "grad_norm": 1.7288964986801147, | |
| "learning_rate": 5.633398970942544e-07, | |
| "logits/chosen": -14.3145170211792, | |
| "logits/rejected": -14.42223834991455, | |
| "logps/chosen": -1.2952549457550049, | |
| "logps/rejected": -1.3960306644439697, | |
| "loss": 1.3675, | |
| "odds_ratio_loss": 0.7228525876998901, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.12952548265457153, | |
| "rewards/margins": 0.010077586397528648, | |
| "rewards/rejected": -0.13960307836532593, | |
| "sft_loss": 1.2952549457550049, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.3600727419680743, | |
| "grad_norm": 1.8580021858215332, | |
| "learning_rate": 5.368289819205069e-07, | |
| "logits/chosen": -14.319725036621094, | |
| "logits/rejected": -14.285405158996582, | |
| "logps/chosen": -1.2445900440216064, | |
| "logps/rejected": -1.3483976125717163, | |
| "loss": 1.3139, | |
| "odds_ratio_loss": 0.6927712559700012, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.12445902824401855, | |
| "rewards/margins": 0.010380755178630352, | |
| "rewards/rejected": -0.13483977317810059, | |
| "sft_loss": 1.2445900440216064, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.376237623762376, | |
| "grad_norm": 2.3416638374328613, | |
| "learning_rate": 5.108818015890785e-07, | |
| "logits/chosen": -14.468851089477539, | |
| "logits/rejected": -14.461502075195312, | |
| "logps/chosen": -1.3592495918273926, | |
| "logps/rejected": -1.4990885257720947, | |
| "loss": 1.4311, | |
| "odds_ratio_loss": 0.7181252241134644, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.13592496514320374, | |
| "rewards/margins": 0.013983884826302528, | |
| "rewards/rejected": -0.14990884065628052, | |
| "sft_loss": 1.3592495918273926, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.392402505556678, | |
| "grad_norm": 1.5794059038162231, | |
| "learning_rate": 4.855058069687291e-07, | |
| "logits/chosen": -14.158782958984375, | |
| "logits/rejected": -14.074625015258789, | |
| "logps/chosen": -1.324530839920044, | |
| "logps/rejected": -1.366247296333313, | |
| "loss": 1.3974, | |
| "odds_ratio_loss": 0.7290586233139038, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.1324530839920044, | |
| "rewards/margins": 0.004171643406152725, | |
| "rewards/rejected": -0.13662473857402802, | |
| "sft_loss": 1.324530839920044, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.40856738735098, | |
| "grad_norm": 2.1180176734924316, | |
| "learning_rate": 4.607082849092523e-07, | |
| "logits/chosen": -14.219759941101074, | |
| "logits/rejected": -14.182577133178711, | |
| "logps/chosen": -1.4282917976379395, | |
| "logps/rejected": -1.4976496696472168, | |
| "loss": 1.5016, | |
| "odds_ratio_loss": 0.7326869368553162, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.14282917976379395, | |
| "rewards/margins": 0.006935800425708294, | |
| "rewards/rejected": -0.14976496994495392, | |
| "sft_loss": 1.4282917976379395, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.4247322691452817, | |
| "grad_norm": 2.495347738265991, | |
| "learning_rate": 4.3649635614901405e-07, | |
| "logits/chosen": -14.16241455078125, | |
| "logits/rejected": -14.45665168762207, | |
| "logps/chosen": -1.3701971769332886, | |
| "logps/rejected": -1.3534958362579346, | |
| "loss": 1.446, | |
| "odds_ratio_loss": 0.7579734921455383, | |
| "rewards/accuracies": 0.44999998807907104, | |
| "rewards/chosen": -0.13701972365379333, | |
| "rewards/margins": -0.0016701335553079844, | |
| "rewards/rejected": -0.1353495866060257, | |
| "sft_loss": 1.3701971769332886, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.4247322691452817, | |
| "eval_logits/chosen": -14.27784252166748, | |
| "eval_logits/rejected": -14.317824363708496, | |
| "eval_logps/chosen": -1.372594952583313, | |
| "eval_logps/rejected": -1.4290432929992676, | |
| "eval_loss": 1.4484930038452148, | |
| "eval_odds_ratio_loss": 0.7589808702468872, | |
| "eval_rewards/accuracies": 0.4809090793132782, | |
| "eval_rewards/chosen": -0.13725949823856354, | |
| "eval_rewards/margins": 0.005644842050969601, | |
| "eval_rewards/rejected": -0.1429043412208557, | |
| "eval_runtime": 396.2162, | |
| "eval_samples_per_second": 2.776, | |
| "eval_sft_loss": 1.372594952583313, | |
| "eval_steps_per_second": 1.388, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.4408971509395836, | |
| "grad_norm": 1.8667449951171875, | |
| "learning_rate": 4.128769732701973e-07, | |
| "logits/chosen": -14.2674560546875, | |
| "logits/rejected": -14.17170524597168, | |
| "logps/chosen": -1.3341007232666016, | |
| "logps/rejected": -1.4468257427215576, | |
| "loss": 1.4053, | |
| "odds_ratio_loss": 0.7120139002799988, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.13341006636619568, | |
| "rewards/margins": 0.011272510513663292, | |
| "rewards/rejected": -0.14468258619308472, | |
| "sft_loss": 1.3341007232666016, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.4570620327338855, | |
| "grad_norm": 2.940946102142334, | |
| "learning_rate": 3.8985691870233046e-07, | |
| "logits/chosen": -14.28807258605957, | |
| "logits/rejected": -14.214245796203613, | |
| "logps/chosen": -1.3024286031723022, | |
| "logps/rejected": -1.4218701124191284, | |
| "loss": 1.3737, | |
| "odds_ratio_loss": 0.712692379951477, | |
| "rewards/accuracies": 0.48124998807907104, | |
| "rewards/chosen": -0.13024285435676575, | |
| "rewards/margins": 0.011944140307605267, | |
| "rewards/rejected": -0.1421869993209839, | |
| "sft_loss": 1.3024286031723022, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.4732269145281873, | |
| "grad_norm": 2.6948108673095703, | |
| "learning_rate": 3.6744280277467904e-07, | |
| "logits/chosen": -14.425226211547852, | |
| "logits/rejected": -14.381690979003906, | |
| "logps/chosen": -1.4246366024017334, | |
| "logps/rejected": -1.426334023475647, | |
| "loss": 1.5046, | |
| "odds_ratio_loss": 0.7999409437179565, | |
| "rewards/accuracies": 0.45625001192092896, | |
| "rewards/chosen": -0.14246365427970886, | |
| "rewards/margins": 0.00016971743025351316, | |
| "rewards/rejected": -0.14263339340686798, | |
| "sft_loss": 1.4246366024017334, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.489391796322489, | |
| "grad_norm": 1.6409363746643066, | |
| "learning_rate": 3.456410618180503e-07, | |
| "logits/chosen": -13.974553108215332, | |
| "logits/rejected": -14.2942533493042, | |
| "logps/chosen": -1.2257071733474731, | |
| "logps/rejected": -1.43178391456604, | |
| "loss": 1.2927, | |
| "odds_ratio_loss": 0.6698334217071533, | |
| "rewards/accuracies": 0.543749988079071, | |
| "rewards/chosen": -0.1225707158446312, | |
| "rewards/margins": 0.020607685670256615, | |
| "rewards/rejected": -0.14317841827869415, | |
| "sft_loss": 1.2257071733474731, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.5055566781167915, | |
| "grad_norm": 1.3992644548416138, | |
| "learning_rate": 3.244579563165753e-07, | |
| "logits/chosen": -14.36426830291748, | |
| "logits/rejected": -14.48327922821045, | |
| "logps/chosen": -1.2957897186279297, | |
| "logps/rejected": -1.4375650882720947, | |
| "loss": 1.3673, | |
| "odds_ratio_loss": 0.7152336239814758, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.12957896292209625, | |
| "rewards/margins": 0.014177536591887474, | |
| "rewards/rejected": -0.14375647902488708, | |
| "sft_loss": 1.2957897186279297, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.521721559911093, | |
| "grad_norm": 0.9756754636764526, | |
| "learning_rate": 3.038995691099697e-07, | |
| "logits/chosen": -14.465911865234375, | |
| "logits/rejected": -14.273321151733398, | |
| "logps/chosen": -1.3624980449676514, | |
| "logps/rejected": -1.5072979927062988, | |
| "loss": 1.4344, | |
| "odds_ratio_loss": 0.7189978361129761, | |
| "rewards/accuracies": 0.4749999940395355, | |
| "rewards/chosen": -0.13624981045722961, | |
| "rewards/margins": 0.014479981735348701, | |
| "rewards/rejected": -0.15072980523109436, | |
| "sft_loss": 1.3624980449676514, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.5378864417053952, | |
| "grad_norm": 2.6390867233276367, | |
| "learning_rate": 2.839718036468192e-07, | |
| "logits/chosen": -14.324618339538574, | |
| "logits/rejected": -14.362611770629883, | |
| "logps/chosen": -1.4562547206878662, | |
| "logps/rejected": -1.4829699993133545, | |
| "loss": 1.5307, | |
| "odds_ratio_loss": 0.7442874312400818, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14562548696994781, | |
| "rewards/margins": 0.0026715078856796026, | |
| "rewards/rejected": -0.1482969969511032, | |
| "sft_loss": 1.4562547206878662, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.5540513234996967, | |
| "grad_norm": 1.9648209810256958, | |
| "learning_rate": 2.646803822893723e-07, | |
| "logits/chosen": -14.38152027130127, | |
| "logits/rejected": -14.392126083374023, | |
| "logps/chosen": -1.4547812938690186, | |
| "logps/rejected": -1.4928423166275024, | |
| "loss": 1.5325, | |
| "odds_ratio_loss": 0.7773637175559998, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.14547815918922424, | |
| "rewards/margins": 0.003806093242019415, | |
| "rewards/rejected": -0.1492842435836792, | |
| "sft_loss": 1.4547812938690186, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.570216205293999, | |
| "grad_norm": 1.1905252933502197, | |
| "learning_rate": 2.460308446703341e-07, | |
| "logits/chosen": -14.339777946472168, | |
| "logits/rejected": -14.1979398727417, | |
| "logps/chosen": -1.3354339599609375, | |
| "logps/rejected": -1.348439335823059, | |
| "loss": 1.4097, | |
| "odds_ratio_loss": 0.7425277829170227, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.13354340195655823, | |
| "rewards/margins": 0.0013005301589146256, | |
| "rewards/rejected": -0.13484393060207367, | |
| "sft_loss": 1.3354339599609375, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.5863810870883004, | |
| "grad_norm": 4.711751461029053, | |
| "learning_rate": 2.2802854610213143e-07, | |
| "logits/chosen": -14.302705764770508, | |
| "logits/rejected": -14.19762134552002, | |
| "logps/chosen": -1.3138768672943115, | |
| "logps/rejected": -1.4147188663482666, | |
| "loss": 1.3864, | |
| "odds_ratio_loss": 0.7257053256034851, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.13138769567012787, | |
| "rewards/margins": 0.010084209032356739, | |
| "rewards/rejected": -0.14147189259529114, | |
| "sft_loss": 1.3138768672943115, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.6025459688826027, | |
| "grad_norm": 4.042973518371582, | |
| "learning_rate": 2.106786560391072e-07, | |
| "logits/chosen": -14.2058744430542, | |
| "logits/rejected": -14.269085884094238, | |
| "logps/chosen": -1.3923499584197998, | |
| "logps/rejected": -1.3771612644195557, | |
| "loss": 1.4698, | |
| "odds_ratio_loss": 0.7747048139572144, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.1392349898815155, | |
| "rewards/margins": -0.0015188835095614195, | |
| "rewards/rejected": -0.1377161294221878, | |
| "sft_loss": 1.3923499584197998, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.6187108506769046, | |
| "grad_norm": 1.3606544733047485, | |
| "learning_rate": 1.9398615659308255e-07, | |
| "logits/chosen": -14.2599515914917, | |
| "logits/rejected": -14.334997177124023, | |
| "logps/chosen": -1.3270127773284912, | |
| "logps/rejected": -1.3853967189788818, | |
| "loss": 1.3982, | |
| "odds_ratio_loss": 0.7119258046150208, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.13270129263401031, | |
| "rewards/margins": 0.005838391836732626, | |
| "rewards/rejected": -0.13853967189788818, | |
| "sft_loss": 1.3270127773284912, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.6348757324712064, | |
| "grad_norm": 1.4494473934173584, | |
| "learning_rate": 1.7795584110272184e-07, | |
| "logits/chosen": -14.470367431640625, | |
| "logits/rejected": -14.478838920593262, | |
| "logps/chosen": -1.3744457960128784, | |
| "logps/rejected": -1.4546699523925781, | |
| "loss": 1.4475, | |
| "odds_ratio_loss": 0.730518639087677, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.13744458556175232, | |
| "rewards/margins": 0.008022413589060307, | |
| "rewards/rejected": -0.14546698331832886, | |
| "sft_loss": 1.3744457960128784, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.6510406142655083, | |
| "grad_norm": 2.888951539993286, | |
| "learning_rate": 1.6259231275709636e-07, | |
| "logits/chosen": -14.41100788116455, | |
| "logits/rejected": -14.428006172180176, | |
| "logps/chosen": -1.3241318464279175, | |
| "logps/rejected": -1.318234920501709, | |
| "loss": 1.4028, | |
| "odds_ratio_loss": 0.7864112257957458, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.13241317868232727, | |
| "rewards/margins": -0.000589700706768781, | |
| "rewards/rejected": -0.13182349503040314, | |
| "sft_loss": 1.3241318464279175, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.66720549605981, | |
| "grad_norm": 1.5565133094787598, | |
| "learning_rate": 1.478999832738548e-07, | |
| "logits/chosen": -14.382177352905273, | |
| "logits/rejected": -14.320945739746094, | |
| "logps/chosen": -1.297300934791565, | |
| "logps/rejected": -1.4187005758285522, | |
| "loss": 1.368, | |
| "odds_ratio_loss": 0.7067518830299377, | |
| "rewards/accuracies": 0.5375000238418579, | |
| "rewards/chosen": -0.12973010540008545, | |
| "rewards/margins": 0.01213997695595026, | |
| "rewards/rejected": -0.14187008142471313, | |
| "sft_loss": 1.297300934791565, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.683370377854112, | |
| "grad_norm": 2.0713951587677, | |
| "learning_rate": 1.338830716323769e-07, | |
| "logits/chosen": -14.337793350219727, | |
| "logits/rejected": -14.350440979003906, | |
| "logps/chosen": -1.3087949752807617, | |
| "logps/rejected": -1.350098967552185, | |
| "loss": 1.383, | |
| "odds_ratio_loss": 0.7419986724853516, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.1308794915676117, | |
| "rewards/margins": 0.004130417015403509, | |
| "rewards/rejected": -0.13500989973545074, | |
| "sft_loss": 1.3087949752807617, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.699535259648414, | |
| "grad_norm": 2.8654770851135254, | |
| "learning_rate": 1.205456028622723e-07, | |
| "logits/chosen": -14.387499809265137, | |
| "logits/rejected": -14.384310722351074, | |
| "logps/chosen": -1.2575846910476685, | |
| "logps/rejected": -1.4380841255187988, | |
| "loss": 1.3249, | |
| "odds_ratio_loss": 0.6730828285217285, | |
| "rewards/accuracies": 0.512499988079071, | |
| "rewards/chosen": -0.12575848400592804, | |
| "rewards/margins": 0.018049929291009903, | |
| "rewards/rejected": -0.14380840957164764, | |
| "sft_loss": 1.2575846910476685, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.7157001414427158, | |
| "grad_norm": 2.644263505935669, | |
| "learning_rate": 1.0789140688756805e-07, | |
| "logits/chosen": -14.564410209655762, | |
| "logits/rejected": -14.484796524047852, | |
| "logps/chosen": -1.331872582435608, | |
| "logps/rejected": -1.4917659759521484, | |
| "loss": 1.3983, | |
| "odds_ratio_loss": 0.6643630862236023, | |
| "rewards/accuracies": 0.5874999761581421, | |
| "rewards/chosen": -0.13318723440170288, | |
| "rewards/margins": 0.015989361330866814, | |
| "rewards/rejected": -0.14917659759521484, | |
| "sft_loss": 1.331872582435608, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.7318650232370176, | |
| "grad_norm": 1.8434594869613647, | |
| "learning_rate": 9.592411742693098e-08, | |
| "logits/chosen": -14.349563598632812, | |
| "logits/rejected": -14.297950744628906, | |
| "logps/chosen": -1.284172773361206, | |
| "logps/rejected": -1.3313789367675781, | |
| "loss": 1.3598, | |
| "odds_ratio_loss": 0.7563740611076355, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.12841728329658508, | |
| "rewards/margins": 0.004720622207969427, | |
| "rewards/rejected": -0.13313789665699005, | |
| "sft_loss": 1.284172773361206, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.7480299050313195, | |
| "grad_norm": 0.9198280572891235, | |
| "learning_rate": 8.464717095022168e-08, | |
| "logits/chosen": -14.535560607910156, | |
| "logits/rejected": -14.29857349395752, | |
| "logps/chosen": -1.291333794593811, | |
| "logps/rejected": -1.4038417339324951, | |
| "loss": 1.3626, | |
| "odds_ratio_loss": 0.7129305601119995, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.12913337349891663, | |
| "rewards/margins": 0.011250784620642662, | |
| "rewards/rejected": -0.14038416743278503, | |
| "sft_loss": 1.291333794593811, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.7641947868256214, | |
| "grad_norm": 1.85430908203125, | |
| "learning_rate": 7.406380569169841e-08, | |
| "logits/chosen": -14.304112434387207, | |
| "logits/rejected": -14.291776657104492, | |
| "logps/chosen": -1.3815504312515259, | |
| "logps/rejected": -1.3685299158096313, | |
| "loss": 1.4574, | |
| "odds_ratio_loss": 0.7585769891738892, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.13815505802631378, | |
| "rewards/margins": -0.0013020627666264772, | |
| "rewards/rejected": -0.13685297966003418, | |
| "sft_loss": 1.3815504312515259, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.7803596686199232, | |
| "grad_norm": 7.879937171936035, | |
| "learning_rate": 6.417706072013808e-08, | |
| "logits/chosen": -14.357699394226074, | |
| "logits/rejected": -14.520744323730469, | |
| "logps/chosen": -1.4151430130004883, | |
| "logps/rejected": -1.4842795133590698, | |
| "loss": 1.4887, | |
| "odds_ratio_loss": 0.7356118559837341, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.14151428639888763, | |
| "rewards/margins": 0.006913675460964441, | |
| "rewards/rejected": -0.14842796325683594, | |
| "sft_loss": 1.4151430130004883, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.796524550414225, | |
| "grad_norm": 2.3623361587524414, | |
| "learning_rate": 5.498977506615294e-08, | |
| "logits/chosen": -14.438512802124023, | |
| "logits/rejected": -14.370248794555664, | |
| "logps/chosen": -1.4021018743515015, | |
| "logps/rejected": -1.3835337162017822, | |
| "loss": 1.4818, | |
| "odds_ratio_loss": 0.796977698802948, | |
| "rewards/accuracies": 0.41874998807907104, | |
| "rewards/chosen": -0.14021018147468567, | |
| "rewards/margins": -0.0018568048253655434, | |
| "rewards/rejected": -0.1383533775806427, | |
| "sft_loss": 1.4021018743515015, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.812689432208527, | |
| "grad_norm": 1.0650444030761719, | |
| "learning_rate": 4.6504586906947756e-08, | |
| "logits/chosen": -14.35010051727295, | |
| "logits/rejected": -14.401901245117188, | |
| "logps/chosen": -1.3507376909255981, | |
| "logps/rejected": -1.4280903339385986, | |
| "loss": 1.4204, | |
| "odds_ratio_loss": 0.6963773369789124, | |
| "rewards/accuracies": 0.5687500238418579, | |
| "rewards/chosen": -0.13507376611232758, | |
| "rewards/margins": 0.007735266350209713, | |
| "rewards/rejected": -0.14280903339385986, | |
| "sft_loss": 1.3507376909255981, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.828854314002829, | |
| "grad_norm": 5.588193893432617, | |
| "learning_rate": 3.8723932808754914e-08, | |
| "logits/chosen": -14.620956420898438, | |
| "logits/rejected": -14.591873168945312, | |
| "logps/chosen": -1.4141243696212769, | |
| "logps/rejected": -1.4447482824325562, | |
| "loss": 1.4888, | |
| "odds_ratio_loss": 0.7466815710067749, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -0.1414124220609665, | |
| "rewards/margins": 0.003062391420826316, | |
| "rewards/rejected": -0.1444748193025589, | |
| "sft_loss": 1.4141243696212769, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.8450191957971307, | |
| "grad_norm": 2.8461813926696777, | |
| "learning_rate": 3.1650047027158014e-08, | |
| "logits/chosen": -14.406710624694824, | |
| "logits/rejected": -14.431941032409668, | |
| "logps/chosen": -1.3235969543457031, | |
| "logps/rejected": -1.378565788269043, | |
| "loss": 1.3941, | |
| "odds_ratio_loss": 0.7055075764656067, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.13235969841480255, | |
| "rewards/margins": 0.005496888421475887, | |
| "rewards/rejected": -0.13785657286643982, | |
| "sft_loss": 1.3235969543457031, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.8611840775914326, | |
| "grad_norm": 1.4648724794387817, | |
| "learning_rate": 2.5284960865517848e-08, | |
| "logits/chosen": -14.247715950012207, | |
| "logits/rejected": -14.30573844909668, | |
| "logps/chosen": -1.2652337551116943, | |
| "logps/rejected": -1.3874812126159668, | |
| "loss": 1.3373, | |
| "odds_ratio_loss": 0.7210808992385864, | |
| "rewards/accuracies": 0.48750001192092896, | |
| "rewards/chosen": -0.12652337551116943, | |
| "rewards/margins": 0.012224750593304634, | |
| "rewards/rejected": -0.13874812424182892, | |
| "sft_loss": 1.2652337551116943, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.8773489593857344, | |
| "grad_norm": 1.2711795568466187, | |
| "learning_rate": 1.9630502091670388e-08, | |
| "logits/chosen": -14.345422744750977, | |
| "logits/rejected": -14.210649490356445, | |
| "logps/chosen": -1.3347010612487793, | |
| "logps/rejected": -1.4864898920059204, | |
| "loss": 1.4034, | |
| "odds_ratio_loss": 0.686531126499176, | |
| "rewards/accuracies": 0.581250011920929, | |
| "rewards/chosen": -0.13347011804580688, | |
| "rewards/margins": 0.015178876928985119, | |
| "rewards/rejected": -0.14864897727966309, | |
| "sft_loss": 1.3347010612487793, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.8935138411800363, | |
| "grad_norm": 4.285287857055664, | |
| "learning_rate": 1.4688294413074677e-08, | |
| "logits/chosen": -14.240816116333008, | |
| "logits/rejected": -14.293863296508789, | |
| "logps/chosen": -1.2230440378189087, | |
| "logps/rejected": -1.3717424869537354, | |
| "loss": 1.2918, | |
| "odds_ratio_loss": 0.6871523857116699, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.12230439484119415, | |
| "rewards/margins": 0.014869834296405315, | |
| "rewards/rejected": -0.13717423379421234, | |
| "sft_loss": 1.2230440378189087, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.909678722974338, | |
| "grad_norm": 1.111965298652649, | |
| "learning_rate": 1.0459757010556626e-08, | |
| "logits/chosen": -14.294512748718262, | |
| "logits/rejected": -14.2905912399292, | |
| "logps/chosen": -1.3162596225738525, | |
| "logps/rejected": -1.357807993888855, | |
| "loss": 1.3902, | |
| "odds_ratio_loss": 0.7398349046707153, | |
| "rewards/accuracies": 0.5062500238418579, | |
| "rewards/chosen": -0.1316259652376175, | |
| "rewards/margins": 0.0041548521257936954, | |
| "rewards/rejected": -0.13578079640865326, | |
| "sft_loss": 1.3162596225738525, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.92584360476864, | |
| "grad_norm": 1.985671043395996, | |
| "learning_rate": 6.94610413078306e-09, | |
| "logits/chosen": -14.099322319030762, | |
| "logits/rejected": -14.289319038391113, | |
| "logps/chosen": -1.3942023515701294, | |
| "logps/rejected": -1.5463578701019287, | |
| "loss": 1.4669, | |
| "odds_ratio_loss": 0.7267955541610718, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.13942024111747742, | |
| "rewards/margins": 0.015215557999908924, | |
| "rewards/rejected": -0.15463578701019287, | |
| "sft_loss": 1.3942023515701294, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.942008486562942, | |
| "grad_norm": 1.1975542306900024, | |
| "learning_rate": 4.14834473758563e-09, | |
| "logits/chosen": -14.166104316711426, | |
| "logits/rejected": -14.219152450561523, | |
| "logps/chosen": -1.2467665672302246, | |
| "logps/rejected": -1.3985602855682373, | |
| "loss": 1.3162, | |
| "odds_ratio_loss": 0.6939627528190613, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.1246766597032547, | |
| "rewards/margins": 0.015179386362433434, | |
| "rewards/rejected": -0.13985604047775269, | |
| "sft_loss": 1.2467665672302246, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.9581733683572438, | |
| "grad_norm": 1.3036004304885864, | |
| "learning_rate": 2.067282222230349e-09, | |
| "logits/chosen": -14.375224113464355, | |
| "logits/rejected": -14.571484565734863, | |
| "logps/chosen": -1.326818585395813, | |
| "logps/rejected": -1.477850317955017, | |
| "loss": 1.3957, | |
| "odds_ratio_loss": 0.6886210441589355, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.13268187642097473, | |
| "rewards/margins": 0.015103173442184925, | |
| "rewards/rejected": -0.1477850377559662, | |
| "sft_loss": 1.326818585395813, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 2.9743382501515456, | |
| "grad_norm": 6.394278049468994, | |
| "learning_rate": 7.035141727212979e-10, | |
| "logits/chosen": -14.3215913772583, | |
| "logits/rejected": -14.438852310180664, | |
| "logps/chosen": -1.256394386291504, | |
| "logps/rejected": -1.3541960716247559, | |
| "loss": 1.3287, | |
| "odds_ratio_loss": 0.7228869199752808, | |
| "rewards/accuracies": 0.550000011920929, | |
| "rewards/chosen": -0.1256394237279892, | |
| "rewards/margins": 0.009780170395970345, | |
| "rewards/rejected": -0.13541960716247559, | |
| "sft_loss": 1.256394386291504, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 2.9905031319458475, | |
| "grad_norm": 2.8705546855926514, | |
| "learning_rate": 5.743220219761592e-11, | |
| "logits/chosen": -14.366948127746582, | |
| "logits/rejected": -14.415715217590332, | |
| "logps/chosen": -1.3598301410675049, | |
| "logps/rejected": -1.40765380859375, | |
| "loss": 1.4375, | |
| "odds_ratio_loss": 0.7764675617218018, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.13598300516605377, | |
| "rewards/margins": 0.004782381001859903, | |
| "rewards/rejected": -0.14076539874076843, | |
| "sft_loss": 1.3598301410675049, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 2.9969690846635686, | |
| "step": 1854, | |
| "total_flos": 1.9131711497471508e+18, | |
| "train_loss": 1.4823461713142765, | |
| "train_runtime": 22122.5243, | |
| "train_samples_per_second": 1.342, | |
| "train_steps_per_second": 0.084 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1854, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 1.9131711497471508e+18, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |