{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 4.0, "eval_steps": 5, "global_step": 120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1694915254237288, "grad_norm": 2.790442705154419, "learning_rate": 1.6666666666666664e-08, "log_odds_chosen": 0.5680092573165894, "log_odds_ratio": -0.5170037746429443, "logits/chosen": -0.5872289538383484, "logits/rejected": -0.1308290660381317, "logps/chosen": -1.1121927499771118, "logps/rejected": -1.5295088291168213, "loss": 1.2668, "nll_loss": 1.2151492834091187, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.11121927201747894, "rewards/margins": 0.04173160716891289, "rewards/rejected": -0.15295089781284332, "step": 5 }, { "epoch": 0.1694915254237288, "eval_log_odds_chosen": 0.344593346118927, "eval_log_odds_ratio": -0.6202001571655273, "eval_logits/chosen": -0.8292851448059082, "eval_logits/rejected": -0.7480515837669373, "eval_logps/chosen": -1.200099229812622, "eval_logps/rejected": -1.4381370544433594, "eval_loss": 1.3301414251327515, "eval_nll_loss": 1.310793399810791, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.12000991404056549, "eval_rewards/margins": 0.023803792893886566, "eval_rewards/rejected": -0.14381369948387146, "eval_runtime": 2.2841, "eval_samples_per_second": 6.567, "eval_steps_per_second": 1.751, "step": 5 }, { "epoch": 0.3389830508474576, "grad_norm": 3.6753456592559814, "learning_rate": 3.75e-08, "log_odds_chosen": 0.6411725878715515, "log_odds_ratio": -0.4649675488471985, "logits/chosen": -0.5985379219055176, "logits/rejected": -0.15392252802848816, "logps/chosen": -1.0119307041168213, "logps/rejected": -1.4758830070495605, "loss": 1.1771, "nll_loss": 1.1306263208389282, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.10119307041168213, "rewards/margins": 0.046395231038331985, "rewards/rejected": -0.14758829772472382, "step": 10 }, { "epoch": 0.3389830508474576, "eval_log_odds_chosen": 0.3463364243507385, "eval_log_odds_ratio": -0.6194710731506348, "eval_logits/chosen": -0.826720654964447, "eval_logits/rejected": -0.7461210489273071, "eval_logps/chosen": -1.1986223459243774, "eval_logps/rejected": -1.437687635421753, "eval_loss": 1.3279355764389038, "eval_nll_loss": 1.3084385395050049, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11986224353313446, "eval_rewards/margins": 0.023906530812382698, "eval_rewards/rejected": -0.143768772482872, "eval_runtime": 2.3646, "eval_samples_per_second": 6.343, "eval_steps_per_second": 1.692, "step": 10 }, { "epoch": 0.5084745762711864, "grad_norm": 2.6734206676483154, "learning_rate": 4.9074074074074074e-08, "log_odds_chosen": 0.5467322468757629, "log_odds_ratio": -0.5080639123916626, "logits/chosen": -0.6727955937385559, "logits/rejected": -0.2421862781047821, "logps/chosen": -1.0358315706253052, "logps/rejected": -1.4172828197479248, "loss": 1.1984, "nll_loss": 1.147613763809204, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.10358314216136932, "rewards/margins": 0.03814515098929405, "rewards/rejected": -0.14172828197479248, "step": 15 }, { "epoch": 0.5084745762711864, "eval_log_odds_chosen": 0.3439960777759552, "eval_log_odds_ratio": -0.6200249791145325, "eval_logits/chosen": -0.8270355463027954, "eval_logits/rejected": -0.7460318803787231, "eval_logps/chosen": -1.1992497444152832, "eval_logps/rejected": -1.4363036155700684, "eval_loss": 1.3248008489608765, "eval_nll_loss": 1.3052968978881836, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11992497742176056, "eval_rewards/margins": 0.023705393075942993, "eval_rewards/rejected": -0.14363038539886475, "eval_runtime": 2.3001, "eval_samples_per_second": 6.522, "eval_steps_per_second": 1.739, "step": 15 }, { "epoch": 0.6779661016949152, "grad_norm": 2.985229253768921, "learning_rate": 4.675925925925926e-08, "log_odds_chosen": 0.5538958311080933, "log_odds_ratio": -0.4844888150691986, "logits/chosen": -0.6328302621841431, "logits/rejected": -0.2196337729692459, "logps/chosen": -0.9897964596748352, "logps/rejected": -1.3761847019195557, "loss": 1.1464, "nll_loss": 1.0979585647583008, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.098979651927948, "rewards/margins": 0.038638822734355927, "rewards/rejected": -0.13761845231056213, "step": 20 }, { "epoch": 0.6779661016949152, "eval_log_odds_chosen": 0.3439162075519562, "eval_log_odds_ratio": -0.6207376718521118, "eval_logits/chosen": -0.8287538290023804, "eval_logits/rejected": -0.7500149011611938, "eval_logps/chosen": -1.1994065046310425, "eval_logps/rejected": -1.4364498853683472, "eval_loss": 1.3215163946151733, "eval_nll_loss": 1.3019251823425293, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11994065344333649, "eval_rewards/margins": 0.02370433509349823, "eval_rewards/rejected": -0.14364498853683472, "eval_runtime": 2.4028, "eval_samples_per_second": 6.243, "eval_steps_per_second": 1.665, "step": 20 }, { "epoch": 0.847457627118644, "grad_norm": 2.2889516353607178, "learning_rate": 4.444444444444444e-08, "log_odds_chosen": 0.5198447108268738, "log_odds_ratio": -0.5137643218040466, "logits/chosen": -0.5211045145988464, "logits/rejected": -0.12277780473232269, "logps/chosen": -0.9896445274353027, "logps/rejected": -1.3587530851364136, "loss": 1.1928, "nll_loss": 1.141424536705017, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.09896446019411087, "rewards/margins": 0.036910854279994965, "rewards/rejected": -0.13587531447410583, "step": 25 }, { "epoch": 0.847457627118644, "eval_log_odds_chosen": 0.3448036313056946, "eval_log_odds_ratio": -0.6197227239608765, "eval_logits/chosen": -0.8284635543823242, "eval_logits/rejected": -0.7477390170097351, "eval_logps/chosen": -1.1986567974090576, "eval_logps/rejected": -1.4361884593963623, "eval_loss": 1.3181345462799072, "eval_nll_loss": 1.2985508441925049, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11986568570137024, "eval_rewards/margins": 0.023753169924020767, "eval_rewards/rejected": -0.1436188519001007, "eval_runtime": 2.4698, "eval_samples_per_second": 6.073, "eval_steps_per_second": 1.62, "step": 25 }, { "epoch": 1.0, "grad_norm": 3.0400283336639404, "learning_rate": 4.2129629629629625e-08, "log_odds_chosen": 0.6260524988174438, "log_odds_ratio": -0.4879433214664459, "logits/chosen": -0.6383404731750488, "logits/rejected": -0.11991772055625916, "logps/chosen": -1.0283366441726685, "logps/rejected": -1.4722530841827393, "loss": 1.1457, "nll_loss": 1.1040095090866089, "rewards/accuracies": 0.875, "rewards/chosen": -0.10283366590738297, "rewards/margins": 0.04439166933298111, "rewards/rejected": -0.14722532033920288, "step": 30 }, { "epoch": 1.0, "eval_log_odds_chosen": 0.3446802496910095, "eval_log_odds_ratio": -0.6202990412712097, "eval_logits/chosen": -0.8293232321739197, "eval_logits/rejected": -0.749043881893158, "eval_logps/chosen": -1.1981878280639648, "eval_logps/rejected": -1.4355335235595703, "eval_loss": 1.3151671886444092, "eval_nll_loss": 1.295397162437439, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.119818776845932, "eval_rewards/margins": 0.02373456582427025, "eval_rewards/rejected": -0.14355334639549255, "eval_runtime": 2.3634, "eval_samples_per_second": 6.347, "eval_steps_per_second": 1.692, "step": 30 }, { "epoch": 1.1694915254237288, "grad_norm": 2.696627140045166, "learning_rate": 3.9814814814814815e-08, "log_odds_chosen": 0.49621137976646423, "log_odds_ratio": -0.5309565663337708, "logits/chosen": -0.6420741081237793, "logits/rejected": -0.18228447437286377, "logps/chosen": -1.008725881576538, "logps/rejected": -1.3604915142059326, "loss": 1.1125, "nll_loss": 1.0594511032104492, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.10087261348962784, "rewards/margins": 0.03517654910683632, "rewards/rejected": -0.13604915142059326, "step": 35 }, { "epoch": 1.1694915254237288, "eval_log_odds_chosen": 0.3452969193458557, "eval_log_odds_ratio": -0.6196750402450562, "eval_logits/chosen": -0.8258322477340698, "eval_logits/rejected": -0.7455801367759705, "eval_logps/chosen": -1.1991809606552124, "eval_logps/rejected": -1.4370383024215698, "eval_loss": 1.3133561611175537, "eval_nll_loss": 1.2936301231384277, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.119918093085289, "eval_rewards/margins": 0.023785727098584175, "eval_rewards/rejected": -0.14370381832122803, "eval_runtime": 2.2925, "eval_samples_per_second": 6.543, "eval_steps_per_second": 1.745, "step": 35 }, { "epoch": 1.3389830508474576, "grad_norm": 3.0447092056274414, "learning_rate": 3.75e-08, "log_odds_chosen": 0.6023409962654114, "log_odds_ratio": -0.48153096437454224, "logits/chosen": -0.6359589099884033, "logits/rejected": -0.12003572285175323, "logps/chosen": -1.014690637588501, "logps/rejected": -1.4444173574447632, "loss": 1.1185, "nll_loss": 1.0703380107879639, "rewards/accuracies": 0.875, "rewards/chosen": -0.10146906226873398, "rewards/margins": 0.04297268018126488, "rewards/rejected": -0.14444175362586975, "step": 40 }, { "epoch": 1.3389830508474576, "eval_log_odds_chosen": 0.3457057476043701, "eval_log_odds_ratio": -0.6200236082077026, "eval_logits/chosen": -0.8274150490760803, "eval_logits/rejected": -0.7458513379096985, "eval_logps/chosen": -1.1985080242156982, "eval_logps/rejected": -1.436950922012329, "eval_loss": 1.310854196548462, "eval_nll_loss": 1.2910186052322388, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11985080689191818, "eval_rewards/margins": 0.023844290524721146, "eval_rewards/rejected": -0.14369508624076843, "eval_runtime": 2.0709, "eval_samples_per_second": 7.243, "eval_steps_per_second": 1.932, "step": 40 }, { "epoch": 1.5084745762711864, "grad_norm": 2.4286935329437256, "learning_rate": 3.518518518518518e-08, "log_odds_chosen": 0.43364983797073364, "log_odds_ratio": -0.5388425588607788, "logits/chosen": -0.6143006086349487, "logits/rejected": -0.17402119934558868, "logps/chosen": -1.0169684886932373, "logps/rejected": -1.3061316013336182, "loss": 1.2022, "nll_loss": 1.1483418941497803, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.10169683396816254, "rewards/margins": 0.028916319832205772, "rewards/rejected": -0.13061316311359406, "step": 45 }, { "epoch": 1.5084745762711864, "eval_log_odds_chosen": 0.34640029072761536, "eval_log_odds_ratio": -0.6196874380111694, "eval_logits/chosen": -0.8271859884262085, "eval_logits/rejected": -0.7468405365943909, "eval_logps/chosen": -1.1981958150863647, "eval_logps/rejected": -1.4372470378875732, "eval_loss": 1.3086471557617188, "eval_nll_loss": 1.2887359857559204, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11981958150863647, "eval_rewards/margins": 0.02390512079000473, "eval_rewards/rejected": -0.1437247097492218, "eval_runtime": 2.4941, "eval_samples_per_second": 6.014, "eval_steps_per_second": 1.604, "step": 45 }, { "epoch": 1.6779661016949152, "grad_norm": 2.9268641471862793, "learning_rate": 3.287037037037037e-08, "log_odds_chosen": 0.5900410413742065, "log_odds_ratio": -0.49763163924217224, "logits/chosen": -0.5182631611824036, "logits/rejected": -0.06584630906581879, "logps/chosen": -1.1032148599624634, "logps/rejected": -1.5314563512802124, "loss": 1.2859, "nll_loss": 1.2361345291137695, "rewards/accuracies": 0.875, "rewards/chosen": -0.11032148450613022, "rewards/margins": 0.042824164032936096, "rewards/rejected": -0.1531456559896469, "step": 50 }, { "epoch": 1.6779661016949152, "eval_log_odds_chosen": 0.34582221508026123, "eval_log_odds_ratio": -0.6204431056976318, "eval_logits/chosen": -0.8273264169692993, "eval_logits/rejected": -0.7475500702857971, "eval_logps/chosen": -1.2007322311401367, "eval_logps/rejected": -1.439100980758667, "eval_loss": 1.3071892261505127, "eval_nll_loss": 1.2873029708862305, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.12007322907447815, "eval_rewards/margins": 0.02383686974644661, "eval_rewards/rejected": -0.14391009509563446, "eval_runtime": 2.3687, "eval_samples_per_second": 6.333, "eval_steps_per_second": 1.689, "step": 50 }, { "epoch": 1.847457627118644, "grad_norm": 2.923938274383545, "learning_rate": 3.0555555555555556e-08, "log_odds_chosen": 0.7271274328231812, "log_odds_ratio": -0.4399596154689789, "logits/chosen": -0.6317266225814819, "logits/rejected": -0.19635015726089478, "logps/chosen": -0.9985980987548828, "logps/rejected": -1.5293996334075928, "loss": 1.1276, "nll_loss": 1.083601474761963, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.0998598113656044, "rewards/margins": 0.05308016389608383, "rewards/rejected": -0.15293996036052704, "step": 55 }, { "epoch": 1.847457627118644, "eval_log_odds_chosen": 0.34833377599716187, "eval_log_odds_ratio": -0.6186259984970093, "eval_logits/chosen": -0.8269048929214478, "eval_logits/rejected": -0.7455801963806152, "eval_logps/chosen": -1.196911096572876, "eval_logps/rejected": -1.437239646911621, "eval_loss": 1.304530382156372, "eval_nll_loss": 1.2846966981887817, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11969111114740372, "eval_rewards/margins": 0.02403285726904869, "eval_rewards/rejected": -0.1437239646911621, "eval_runtime": 2.3288, "eval_samples_per_second": 6.441, "eval_steps_per_second": 1.718, "step": 55 }, { "epoch": 2.0, "grad_norm": 2.946364641189575, "learning_rate": 2.8240740740740736e-08, "log_odds_chosen": 0.6078373193740845, "log_odds_ratio": -0.485850065946579, "logits/chosen": -0.6273930668830872, "logits/rejected": -0.21480894088745117, "logps/chosen": -1.0067973136901855, "logps/rejected": -1.4497339725494385, "loss": 1.1433, "nll_loss": 1.1121139526367188, "rewards/accuracies": 0.8888888955116272, "rewards/chosen": -0.10067972540855408, "rewards/margins": 0.044293683022260666, "rewards/rejected": -0.14497341215610504, "step": 60 }, { "epoch": 2.0, "eval_log_odds_chosen": 0.34563085436820984, "eval_log_odds_ratio": -0.6200534105300903, "eval_logits/chosen": -0.8266342878341675, "eval_logits/rejected": -0.7474446296691895, "eval_logps/chosen": -1.198333740234375, "eval_logps/rejected": -1.4364194869995117, "eval_loss": 1.30304753780365, "eval_nll_loss": 1.2828768491744995, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11983337998390198, "eval_rewards/margins": 0.02380857989192009, "eval_rewards/rejected": -0.14364196360111237, "eval_runtime": 2.3652, "eval_samples_per_second": 6.342, "eval_steps_per_second": 1.691, "step": 60 }, { "epoch": 2.169491525423729, "grad_norm": 2.583970308303833, "learning_rate": 2.5925925925925923e-08, "log_odds_chosen": 0.6106057167053223, "log_odds_ratio": -0.4779096245765686, "logits/chosen": -0.686557412147522, "logits/rejected": -0.2725212275981903, "logps/chosen": -0.9596541523933411, "logps/rejected": -1.4074879884719849, "loss": 1.0813, "nll_loss": 1.0335239171981812, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.0959654226899147, "rewards/margins": 0.044783372431993484, "rewards/rejected": -0.1407487839460373, "step": 65 }, { "epoch": 2.169491525423729, "eval_log_odds_chosen": 0.3470456004142761, "eval_log_odds_ratio": -0.6191624402999878, "eval_logits/chosen": -0.8260448575019836, "eval_logits/rejected": -0.7457568645477295, "eval_logps/chosen": -1.1975462436676025, "eval_logps/rejected": -1.4365394115447998, "eval_loss": 1.3012058734893799, "eval_nll_loss": 1.2811425924301147, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.1197546198964119, "eval_rewards/margins": 0.023899313062429428, "eval_rewards/rejected": -0.14365392923355103, "eval_runtime": 2.0569, "eval_samples_per_second": 7.292, "eval_steps_per_second": 1.945, "step": 65 }, { "epoch": 2.3389830508474576, "grad_norm": 2.8739640712738037, "learning_rate": 2.361111111111111e-08, "log_odds_chosen": 0.6883528828620911, "log_odds_ratio": -0.4613940119743347, "logits/chosen": -0.6194564700126648, "logits/rejected": -0.16226115822792053, "logps/chosen": -1.0230361223220825, "logps/rejected": -1.5271246433258057, "loss": 1.0952, "nll_loss": 1.0490230321884155, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.10230360180139542, "rewards/margins": 0.05040886998176575, "rewards/rejected": -0.15271246433258057, "step": 70 }, { "epoch": 2.3389830508474576, "eval_log_odds_chosen": 0.34613096714019775, "eval_log_odds_ratio": -0.6190251111984253, "eval_logits/chosen": -0.8254708051681519, "eval_logits/rejected": -0.7465603351593018, "eval_logps/chosen": -1.1970798969268799, "eval_logps/rejected": -1.4355697631835938, "eval_loss": 1.2998255491256714, "eval_nll_loss": 1.279675006866455, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11970800161361694, "eval_rewards/margins": 0.023848986253142357, "eval_rewards/rejected": -0.14355698227882385, "eval_runtime": 2.1718, "eval_samples_per_second": 6.907, "eval_steps_per_second": 1.842, "step": 70 }, { "epoch": 2.5084745762711864, "grad_norm": 2.5808846950531006, "learning_rate": 2.1296296296296297e-08, "log_odds_chosen": 0.517440915107727, "log_odds_ratio": -0.5259458422660828, "logits/chosen": -0.5467637181282043, "logits/rejected": -0.10346652567386627, "logps/chosen": -1.0675327777862549, "logps/rejected": -1.437524437904358, "loss": 1.2494, "nll_loss": 1.1968111991882324, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.10675326734781265, "rewards/margins": 0.036999184638261795, "rewards/rejected": -0.14375244081020355, "step": 75 }, { "epoch": 2.5084745762711864, "eval_log_odds_chosen": 0.34606456756591797, "eval_log_odds_ratio": -0.620042622089386, "eval_logits/chosen": -0.8240770697593689, "eval_logits/rejected": -0.7445046305656433, "eval_logps/chosen": -1.1980996131896973, "eval_logps/rejected": -1.436529517173767, "eval_loss": 1.2983678579330444, "eval_nll_loss": 1.27822744846344, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11980997025966644, "eval_rewards/margins": 0.023842979222536087, "eval_rewards/rejected": -0.14365294575691223, "eval_runtime": 2.3948, "eval_samples_per_second": 6.264, "eval_steps_per_second": 1.67, "step": 75 }, { "epoch": 2.6779661016949152, "grad_norm": 3.2112486362457275, "learning_rate": 1.898148148148148e-08, "log_odds_chosen": 0.6882795095443726, "log_odds_ratio": -0.4549444615840912, "logits/chosen": -0.6079570651054382, "logits/rejected": -0.1440545618534088, "logps/chosen": -1.0264164209365845, "logps/rejected": -1.5247279405593872, "loss": 1.1376, "nll_loss": 1.0920751094818115, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10264164209365845, "rewards/margins": 0.04983116313815117, "rewards/rejected": -0.15247280895709991, "step": 80 }, { "epoch": 2.6779661016949152, "eval_log_odds_chosen": 0.34864068031311035, "eval_log_odds_ratio": -0.6184805631637573, "eval_logits/chosen": -0.8214948773384094, "eval_logits/rejected": -0.7387450933456421, "eval_logps/chosen": -1.1962885856628418, "eval_logps/rejected": -1.4366211891174316, "eval_loss": 1.2968833446502686, "eval_nll_loss": 1.2767653465270996, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11962885409593582, "eval_rewards/margins": 0.02403326891362667, "eval_rewards/rejected": -0.14366212487220764, "eval_runtime": 2.3377, "eval_samples_per_second": 6.417, "eval_steps_per_second": 1.711, "step": 80 }, { "epoch": 2.847457627118644, "grad_norm": 1.839969277381897, "learning_rate": 1.6666666666666664e-08, "log_odds_chosen": 0.4969852864742279, "log_odds_ratio": -0.532636284828186, "logits/chosen": -0.5632266998291016, "logits/rejected": -0.06907184422016144, "logps/chosen": -1.0492427349090576, "logps/rejected": -1.3902790546417236, "loss": 1.2407, "nll_loss": 1.1874761581420898, "rewards/accuracies": 0.8125, "rewards/chosen": -0.1049242839217186, "rewards/margins": 0.03410361707210541, "rewards/rejected": -0.1390278935432434, "step": 85 }, { "epoch": 2.847457627118644, "eval_log_odds_chosen": 0.34886401891708374, "eval_log_odds_ratio": -0.6193042993545532, "eval_logits/chosen": -0.8233250379562378, "eval_logits/rejected": -0.7452165484428406, "eval_logps/chosen": -1.1970219612121582, "eval_logps/rejected": -1.4376810789108276, "eval_loss": 1.2961064577102661, "eval_nll_loss": 1.2759020328521729, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11970219016075134, "eval_rewards/margins": 0.02406592108309269, "eval_rewards/rejected": -0.14376811683177948, "eval_runtime": 2.3326, "eval_samples_per_second": 6.431, "eval_steps_per_second": 1.715, "step": 85 }, { "epoch": 3.0, "grad_norm": 2.8928427696228027, "learning_rate": 1.435185185185185e-08, "log_odds_chosen": 0.45060819387435913, "log_odds_ratio": -0.5237997174263, "logits/chosen": -0.5375601649284363, "logits/rejected": -0.10669712722301483, "logps/chosen": -1.026106595993042, "logps/rejected": -1.3331278562545776, "loss": 1.1632, "nll_loss": 1.1324602365493774, "rewards/accuracies": 0.8611111044883728, "rewards/chosen": -0.10261066257953644, "rewards/margins": 0.030702121555805206, "rewards/rejected": -0.13331276178359985, "step": 90 }, { "epoch": 3.0, "eval_log_odds_chosen": 0.3472563922405243, "eval_log_odds_ratio": -0.6190530061721802, "eval_logits/chosen": -0.8260915279388428, "eval_logits/rejected": -0.7494507431983948, "eval_logps/chosen": -1.1969666481018066, "eval_logps/rejected": -1.4364831447601318, "eval_loss": 1.295333743095398, "eval_nll_loss": 1.2750121355056763, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11969666182994843, "eval_rewards/margins": 0.023951642215251923, "eval_rewards/rejected": -0.14364831149578094, "eval_runtime": 2.4234, "eval_samples_per_second": 6.19, "eval_steps_per_second": 1.651, "step": 90 }, { "epoch": 3.169491525423729, "grad_norm": 2.178267478942871, "learning_rate": 1.2037037037037036e-08, "log_odds_chosen": 0.6245774030685425, "log_odds_ratio": -0.47914019227027893, "logits/chosen": -0.5936748385429382, "logits/rejected": -0.12400760501623154, "logps/chosen": -1.0799325704574585, "logps/rejected": -1.5386043787002563, "loss": 1.2075, "nll_loss": 1.1595532894134521, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.10799328237771988, "rewards/margins": 0.045867159962654114, "rewards/rejected": -0.1538604199886322, "step": 95 }, { "epoch": 3.169491525423729, "eval_log_odds_chosen": 0.3449009954929352, "eval_log_odds_ratio": -0.6200248003005981, "eval_logits/chosen": -0.8238758444786072, "eval_logits/rejected": -0.7465205788612366, "eval_logps/chosen": -1.1979708671569824, "eval_logps/rejected": -1.4351496696472168, "eval_loss": 1.2949864864349365, "eval_nll_loss": 1.2746418714523315, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11979708075523376, "eval_rewards/margins": 0.02371787652373314, "eval_rewards/rejected": -0.1435149610042572, "eval_runtime": 2.3856, "eval_samples_per_second": 6.288, "eval_steps_per_second": 1.677, "step": 95 }, { "epoch": 3.3389830508474576, "grad_norm": 3.0120768547058105, "learning_rate": 9.722222222222223e-09, "log_odds_chosen": 0.6266916990280151, "log_odds_ratio": -0.46665820479393005, "logits/chosen": -0.6890040636062622, "logits/rejected": -0.2087690830230713, "logps/chosen": -0.9734174013137817, "logps/rejected": -1.4124778509140015, "loss": 1.1383, "nll_loss": 1.0916029214859009, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.09734174609184265, "rewards/margins": 0.04390605166554451, "rewards/rejected": -0.14124779403209686, "step": 100 }, { "epoch": 3.3389830508474576, "eval_log_odds_chosen": 0.3461822271347046, "eval_log_odds_ratio": -0.6199952960014343, "eval_logits/chosen": -0.8260993957519531, "eval_logits/rejected": -0.7494193911552429, "eval_logps/chosen": -1.1973499059677124, "eval_logps/rejected": -1.435782790184021, "eval_loss": 1.2943472862243652, "eval_nll_loss": 1.2740821838378906, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.1197349950671196, "eval_rewards/margins": 0.02384328469634056, "eval_rewards/rejected": -0.14357827603816986, "eval_runtime": 2.297, "eval_samples_per_second": 6.53, "eval_steps_per_second": 1.741, "step": 100 }, { "epoch": 3.5084745762711864, "grad_norm": 2.94162654876709, "learning_rate": 7.407407407407406e-09, "log_odds_chosen": 0.6522419452667236, "log_odds_ratio": -0.4762607216835022, "logits/chosen": -0.6378888487815857, "logits/rejected": -0.20835626125335693, "logps/chosen": -1.0351794958114624, "logps/rejected": -1.5168087482452393, "loss": 1.132, "nll_loss": 1.0843795537948608, "rewards/accuracies": 0.887499988079071, "rewards/chosen": -0.10351793467998505, "rewards/margins": 0.04816293716430664, "rewards/rejected": -0.15168088674545288, "step": 105 }, { "epoch": 3.5084745762711864, "eval_log_odds_chosen": 0.34624600410461426, "eval_log_odds_ratio": -0.6196528673171997, "eval_logits/chosen": -0.8230563402175903, "eval_logits/rejected": -0.7455496191978455, "eval_logps/chosen": -1.1972136497497559, "eval_logps/rejected": -1.4357357025146484, "eval_loss": 1.2938958406448364, "eval_nll_loss": 1.273663878440857, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11972138285636902, "eval_rewards/margins": 0.023852191865444183, "eval_rewards/rejected": -0.1435735523700714, "eval_runtime": 2.0374, "eval_samples_per_second": 7.362, "eval_steps_per_second": 1.963, "step": 105 }, { "epoch": 3.6779661016949152, "grad_norm": 2.145254373550415, "learning_rate": 5.092592592592592e-09, "log_odds_chosen": 0.4669066369533539, "log_odds_ratio": -0.5435744524002075, "logits/chosen": -0.5924472212791443, "logits/rejected": -0.16264298558235168, "logps/chosen": -1.0428065061569214, "logps/rejected": -1.3670099973678589, "loss": 1.2299, "nll_loss": 1.1755752563476562, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.10428065061569214, "rewards/margins": 0.03242034837603569, "rewards/rejected": -0.13670100271701813, "step": 110 }, { "epoch": 3.6779661016949152, "eval_log_odds_chosen": 0.34594425559043884, "eval_log_odds_ratio": -0.6201038360595703, "eval_logits/chosen": -0.8250141739845276, "eval_logits/rejected": -0.74653559923172, "eval_logps/chosen": -1.197659969329834, "eval_logps/rejected": -1.436202883720398, "eval_loss": 1.2942434549331665, "eval_nll_loss": 1.2739633321762085, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.1197659969329834, "eval_rewards/margins": 0.023854291066527367, "eval_rewards/rejected": -0.14362028241157532, "eval_runtime": 2.4284, "eval_samples_per_second": 6.177, "eval_steps_per_second": 1.647, "step": 110 }, { "epoch": 3.847457627118644, "grad_norm": 2.368039846420288, "learning_rate": 2.7777777777777776e-09, "log_odds_chosen": 0.5323175191879272, "log_odds_ratio": -0.5115704536437988, "logits/chosen": -0.5521407127380371, "logits/rejected": -0.0902065858244896, "logps/chosen": -1.022303819656372, "logps/rejected": -1.3985751867294312, "loss": 1.2074, "nll_loss": 1.1562185287475586, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.10223038494586945, "rewards/margins": 0.037627145648002625, "rewards/rejected": -0.13985753059387207, "step": 115 }, { "epoch": 3.847457627118644, "eval_log_odds_chosen": 0.3472224473953247, "eval_log_odds_ratio": -0.6191097497940063, "eval_logits/chosen": -0.8245607614517212, "eval_logits/rejected": -0.7454671859741211, "eval_logps/chosen": -1.1963317394256592, "eval_logps/rejected": -1.4353883266448975, "eval_loss": 1.2926907539367676, "eval_nll_loss": 1.2724549770355225, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11963316053152084, "eval_rewards/margins": 0.023905668407678604, "eval_rewards/rejected": -0.14353883266448975, "eval_runtime": 2.3939, "eval_samples_per_second": 6.266, "eval_steps_per_second": 1.671, "step": 115 }, { "epoch": 4.0, "grad_norm": 3.87864089012146, "learning_rate": 4.629629629629629e-10, "log_odds_chosen": 0.5788644552230835, "log_odds_ratio": -0.4914652109146118, "logits/chosen": -0.636318564414978, "logits/rejected": -0.16115230321884155, "logps/chosen": -1.008524775505066, "logps/rejected": -1.4221386909484863, "loss": 1.1531, "nll_loss": 1.090425968170166, "rewards/accuracies": 0.8472222089767456, "rewards/chosen": -0.10085248947143555, "rewards/margins": 0.04136139899492264, "rewards/rejected": -0.1422138810157776, "step": 120 }, { "epoch": 4.0, "eval_log_odds_chosen": 0.3468348979949951, "eval_log_odds_ratio": -0.619236946105957, "eval_logits/chosen": -0.8219494223594666, "eval_logits/rejected": -0.7437239289283752, "eval_logps/chosen": -1.1963438987731934, "eval_logps/rejected": -1.435250997543335, "eval_loss": 1.2931767702102661, "eval_nll_loss": 1.2729113101959229, "eval_rewards/accuracies": 0.6875, "eval_rewards/chosen": -0.11963438987731934, "eval_rewards/margins": 0.02389070764183998, "eval_rewards/rejected": -0.14352509379386902, "eval_runtime": 2.2505, "eval_samples_per_second": 6.665, "eval_steps_per_second": 1.777, "step": 120 } ], "logging_steps": 5, "max_steps": 120, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 10, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }