| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 4.0, |
| "eval_steps": 5, |
| "global_step": 120, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.1694915254237288, |
| "grad_norm": 2.790442705154419, |
| "learning_rate": 1.6666666666666664e-08, |
| "log_odds_chosen": 0.5680092573165894, |
| "log_odds_ratio": -0.5170037746429443, |
| "logits/chosen": -0.5872289538383484, |
| "logits/rejected": -0.1308290660381317, |
| "logps/chosen": -1.1121927499771118, |
| "logps/rejected": -1.5295088291168213, |
| "loss": 1.2668, |
| "nll_loss": 1.2151492834091187, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.11121927201747894, |
| "rewards/margins": 0.04173160716891289, |
| "rewards/rejected": -0.15295089781284332, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.1694915254237288, |
| "eval_log_odds_chosen": 0.344593346118927, |
| "eval_log_odds_ratio": -0.6202001571655273, |
| "eval_logits/chosen": -0.8292851448059082, |
| "eval_logits/rejected": -0.7480515837669373, |
| "eval_logps/chosen": -1.200099229812622, |
| "eval_logps/rejected": -1.4381370544433594, |
| "eval_loss": 1.3301414251327515, |
| "eval_nll_loss": 1.310793399810791, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.12000991404056549, |
| "eval_rewards/margins": 0.023803792893886566, |
| "eval_rewards/rejected": -0.14381369948387146, |
| "eval_runtime": 2.2841, |
| "eval_samples_per_second": 6.567, |
| "eval_steps_per_second": 1.751, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.3389830508474576, |
| "grad_norm": 3.6753456592559814, |
| "learning_rate": 3.75e-08, |
| "log_odds_chosen": 0.6411725878715515, |
| "log_odds_ratio": -0.4649675488471985, |
| "logits/chosen": -0.5985379219055176, |
| "logits/rejected": -0.15392252802848816, |
| "logps/chosen": -1.0119307041168213, |
| "logps/rejected": -1.4758830070495605, |
| "loss": 1.1771, |
| "nll_loss": 1.1306263208389282, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.10119307041168213, |
| "rewards/margins": 0.046395231038331985, |
| "rewards/rejected": -0.14758829772472382, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3389830508474576, |
| "eval_log_odds_chosen": 0.3463364243507385, |
| "eval_log_odds_ratio": -0.6194710731506348, |
| "eval_logits/chosen": -0.826720654964447, |
| "eval_logits/rejected": -0.7461210489273071, |
| "eval_logps/chosen": -1.1986223459243774, |
| "eval_logps/rejected": -1.437687635421753, |
| "eval_loss": 1.3279355764389038, |
| "eval_nll_loss": 1.3084385395050049, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11986224353313446, |
| "eval_rewards/margins": 0.023906530812382698, |
| "eval_rewards/rejected": -0.143768772482872, |
| "eval_runtime": 2.3646, |
| "eval_samples_per_second": 6.343, |
| "eval_steps_per_second": 1.692, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.5084745762711864, |
| "grad_norm": 2.6734206676483154, |
| "learning_rate": 4.9074074074074074e-08, |
| "log_odds_chosen": 0.5467322468757629, |
| "log_odds_ratio": -0.5080639123916626, |
| "logits/chosen": -0.6727955937385559, |
| "logits/rejected": -0.2421862781047821, |
| "logps/chosen": -1.0358315706253052, |
| "logps/rejected": -1.4172828197479248, |
| "loss": 1.1984, |
| "nll_loss": 1.147613763809204, |
| "rewards/accuracies": 0.8374999761581421, |
| "rewards/chosen": -0.10358314216136932, |
| "rewards/margins": 0.03814515098929405, |
| "rewards/rejected": -0.14172828197479248, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.5084745762711864, |
| "eval_log_odds_chosen": 0.3439960777759552, |
| "eval_log_odds_ratio": -0.6200249791145325, |
| "eval_logits/chosen": -0.8270355463027954, |
| "eval_logits/rejected": -0.7460318803787231, |
| "eval_logps/chosen": -1.1992497444152832, |
| "eval_logps/rejected": -1.4363036155700684, |
| "eval_loss": 1.3248008489608765, |
| "eval_nll_loss": 1.3052968978881836, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11992497742176056, |
| "eval_rewards/margins": 0.023705393075942993, |
| "eval_rewards/rejected": -0.14363038539886475, |
| "eval_runtime": 2.3001, |
| "eval_samples_per_second": 6.522, |
| "eval_steps_per_second": 1.739, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "grad_norm": 2.985229253768921, |
| "learning_rate": 4.675925925925926e-08, |
| "log_odds_chosen": 0.5538958311080933, |
| "log_odds_ratio": -0.4844888150691986, |
| "logits/chosen": -0.6328302621841431, |
| "logits/rejected": -0.2196337729692459, |
| "logps/chosen": -0.9897964596748352, |
| "logps/rejected": -1.3761847019195557, |
| "loss": 1.1464, |
| "nll_loss": 1.0979585647583008, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.098979651927948, |
| "rewards/margins": 0.038638822734355927, |
| "rewards/rejected": -0.13761845231056213, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6779661016949152, |
| "eval_log_odds_chosen": 0.3439162075519562, |
| "eval_log_odds_ratio": -0.6207376718521118, |
| "eval_logits/chosen": -0.8287538290023804, |
| "eval_logits/rejected": -0.7500149011611938, |
| "eval_logps/chosen": -1.1994065046310425, |
| "eval_logps/rejected": -1.4364498853683472, |
| "eval_loss": 1.3215163946151733, |
| "eval_nll_loss": 1.3019251823425293, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11994065344333649, |
| "eval_rewards/margins": 0.02370433509349823, |
| "eval_rewards/rejected": -0.14364498853683472, |
| "eval_runtime": 2.4028, |
| "eval_samples_per_second": 6.243, |
| "eval_steps_per_second": 1.665, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.847457627118644, |
| "grad_norm": 2.2889516353607178, |
| "learning_rate": 4.444444444444444e-08, |
| "log_odds_chosen": 0.5198447108268738, |
| "log_odds_ratio": -0.5137643218040466, |
| "logits/chosen": -0.5211045145988464, |
| "logits/rejected": -0.12277780473232269, |
| "logps/chosen": -0.9896445274353027, |
| "logps/rejected": -1.3587530851364136, |
| "loss": 1.1928, |
| "nll_loss": 1.141424536705017, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.09896446019411087, |
| "rewards/margins": 0.036910854279994965, |
| "rewards/rejected": -0.13587531447410583, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.847457627118644, |
| "eval_log_odds_chosen": 0.3448036313056946, |
| "eval_log_odds_ratio": -0.6197227239608765, |
| "eval_logits/chosen": -0.8284635543823242, |
| "eval_logits/rejected": -0.7477390170097351, |
| "eval_logps/chosen": -1.1986567974090576, |
| "eval_logps/rejected": -1.4361884593963623, |
| "eval_loss": 1.3181345462799072, |
| "eval_nll_loss": 1.2985508441925049, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11986568570137024, |
| "eval_rewards/margins": 0.023753169924020767, |
| "eval_rewards/rejected": -0.1436188519001007, |
| "eval_runtime": 2.4698, |
| "eval_samples_per_second": 6.073, |
| "eval_steps_per_second": 1.62, |
| "step": 25 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 3.0400283336639404, |
| "learning_rate": 4.2129629629629625e-08, |
| "log_odds_chosen": 0.6260524988174438, |
| "log_odds_ratio": -0.4879433214664459, |
| "logits/chosen": -0.6383404731750488, |
| "logits/rejected": -0.11991772055625916, |
| "logps/chosen": -1.0283366441726685, |
| "logps/rejected": -1.4722530841827393, |
| "loss": 1.1457, |
| "nll_loss": 1.1040095090866089, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.10283366590738297, |
| "rewards/margins": 0.04439166933298111, |
| "rewards/rejected": -0.14722532033920288, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_log_odds_chosen": 0.3446802496910095, |
| "eval_log_odds_ratio": -0.6202990412712097, |
| "eval_logits/chosen": -0.8293232321739197, |
| "eval_logits/rejected": -0.749043881893158, |
| "eval_logps/chosen": -1.1981878280639648, |
| "eval_logps/rejected": -1.4355335235595703, |
| "eval_loss": 1.3151671886444092, |
| "eval_nll_loss": 1.295397162437439, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.119818776845932, |
| "eval_rewards/margins": 0.02373456582427025, |
| "eval_rewards/rejected": -0.14355334639549255, |
| "eval_runtime": 2.3634, |
| "eval_samples_per_second": 6.347, |
| "eval_steps_per_second": 1.692, |
| "step": 30 |
| }, |
| { |
| "epoch": 1.1694915254237288, |
| "grad_norm": 2.696627140045166, |
| "learning_rate": 3.9814814814814815e-08, |
| "log_odds_chosen": 0.49621137976646423, |
| "log_odds_ratio": -0.5309565663337708, |
| "logits/chosen": -0.6420741081237793, |
| "logits/rejected": -0.18228447437286377, |
| "logps/chosen": -1.008725881576538, |
| "logps/rejected": -1.3604915142059326, |
| "loss": 1.1125, |
| "nll_loss": 1.0594511032104492, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.10087261348962784, |
| "rewards/margins": 0.03517654910683632, |
| "rewards/rejected": -0.13604915142059326, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.1694915254237288, |
| "eval_log_odds_chosen": 0.3452969193458557, |
| "eval_log_odds_ratio": -0.6196750402450562, |
| "eval_logits/chosen": -0.8258322477340698, |
| "eval_logits/rejected": -0.7455801367759705, |
| "eval_logps/chosen": -1.1991809606552124, |
| "eval_logps/rejected": -1.4370383024215698, |
| "eval_loss": 1.3133561611175537, |
| "eval_nll_loss": 1.2936301231384277, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.119918093085289, |
| "eval_rewards/margins": 0.023785727098584175, |
| "eval_rewards/rejected": -0.14370381832122803, |
| "eval_runtime": 2.2925, |
| "eval_samples_per_second": 6.543, |
| "eval_steps_per_second": 1.745, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.3389830508474576, |
| "grad_norm": 3.0447092056274414, |
| "learning_rate": 3.75e-08, |
| "log_odds_chosen": 0.6023409962654114, |
| "log_odds_ratio": -0.48153096437454224, |
| "logits/chosen": -0.6359589099884033, |
| "logits/rejected": -0.12003572285175323, |
| "logps/chosen": -1.014690637588501, |
| "logps/rejected": -1.4444173574447632, |
| "loss": 1.1185, |
| "nll_loss": 1.0703380107879639, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.10146906226873398, |
| "rewards/margins": 0.04297268018126488, |
| "rewards/rejected": -0.14444175362586975, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.3389830508474576, |
| "eval_log_odds_chosen": 0.3457057476043701, |
| "eval_log_odds_ratio": -0.6200236082077026, |
| "eval_logits/chosen": -0.8274150490760803, |
| "eval_logits/rejected": -0.7458513379096985, |
| "eval_logps/chosen": -1.1985080242156982, |
| "eval_logps/rejected": -1.436950922012329, |
| "eval_loss": 1.310854196548462, |
| "eval_nll_loss": 1.2910186052322388, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11985080689191818, |
| "eval_rewards/margins": 0.023844290524721146, |
| "eval_rewards/rejected": -0.14369508624076843, |
| "eval_runtime": 2.0709, |
| "eval_samples_per_second": 7.243, |
| "eval_steps_per_second": 1.932, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.5084745762711864, |
| "grad_norm": 2.4286935329437256, |
| "learning_rate": 3.518518518518518e-08, |
| "log_odds_chosen": 0.43364983797073364, |
| "log_odds_ratio": -0.5388425588607788, |
| "logits/chosen": -0.6143006086349487, |
| "logits/rejected": -0.17402119934558868, |
| "logps/chosen": -1.0169684886932373, |
| "logps/rejected": -1.3061316013336182, |
| "loss": 1.2022, |
| "nll_loss": 1.1483418941497803, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.10169683396816254, |
| "rewards/margins": 0.028916319832205772, |
| "rewards/rejected": -0.13061316311359406, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.5084745762711864, |
| "eval_log_odds_chosen": 0.34640029072761536, |
| "eval_log_odds_ratio": -0.6196874380111694, |
| "eval_logits/chosen": -0.8271859884262085, |
| "eval_logits/rejected": -0.7468405365943909, |
| "eval_logps/chosen": -1.1981958150863647, |
| "eval_logps/rejected": -1.4372470378875732, |
| "eval_loss": 1.3086471557617188, |
| "eval_nll_loss": 1.2887359857559204, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11981958150863647, |
| "eval_rewards/margins": 0.02390512079000473, |
| "eval_rewards/rejected": -0.1437247097492218, |
| "eval_runtime": 2.4941, |
| "eval_samples_per_second": 6.014, |
| "eval_steps_per_second": 1.604, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.6779661016949152, |
| "grad_norm": 2.9268641471862793, |
| "learning_rate": 3.287037037037037e-08, |
| "log_odds_chosen": 0.5900410413742065, |
| "log_odds_ratio": -0.49763163924217224, |
| "logits/chosen": -0.5182631611824036, |
| "logits/rejected": -0.06584630906581879, |
| "logps/chosen": -1.1032148599624634, |
| "logps/rejected": -1.5314563512802124, |
| "loss": 1.2859, |
| "nll_loss": 1.2361345291137695, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.11032148450613022, |
| "rewards/margins": 0.042824164032936096, |
| "rewards/rejected": -0.1531456559896469, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.6779661016949152, |
| "eval_log_odds_chosen": 0.34582221508026123, |
| "eval_log_odds_ratio": -0.6204431056976318, |
| "eval_logits/chosen": -0.8273264169692993, |
| "eval_logits/rejected": -0.7475500702857971, |
| "eval_logps/chosen": -1.2007322311401367, |
| "eval_logps/rejected": -1.439100980758667, |
| "eval_loss": 1.3071892261505127, |
| "eval_nll_loss": 1.2873029708862305, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.12007322907447815, |
| "eval_rewards/margins": 0.02383686974644661, |
| "eval_rewards/rejected": -0.14391009509563446, |
| "eval_runtime": 2.3687, |
| "eval_samples_per_second": 6.333, |
| "eval_steps_per_second": 1.689, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.847457627118644, |
| "grad_norm": 2.923938274383545, |
| "learning_rate": 3.0555555555555556e-08, |
| "log_odds_chosen": 0.7271274328231812, |
| "log_odds_ratio": -0.4399596154689789, |
| "logits/chosen": -0.6317266225814819, |
| "logits/rejected": -0.19635015726089478, |
| "logps/chosen": -0.9985980987548828, |
| "logps/rejected": -1.5293996334075928, |
| "loss": 1.1276, |
| "nll_loss": 1.083601474761963, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.0998598113656044, |
| "rewards/margins": 0.05308016389608383, |
| "rewards/rejected": -0.15293996036052704, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.847457627118644, |
| "eval_log_odds_chosen": 0.34833377599716187, |
| "eval_log_odds_ratio": -0.6186259984970093, |
| "eval_logits/chosen": -0.8269048929214478, |
| "eval_logits/rejected": -0.7455801963806152, |
| "eval_logps/chosen": -1.196911096572876, |
| "eval_logps/rejected": -1.437239646911621, |
| "eval_loss": 1.304530382156372, |
| "eval_nll_loss": 1.2846966981887817, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11969111114740372, |
| "eval_rewards/margins": 0.02403285726904869, |
| "eval_rewards/rejected": -0.1437239646911621, |
| "eval_runtime": 2.3288, |
| "eval_samples_per_second": 6.441, |
| "eval_steps_per_second": 1.718, |
| "step": 55 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 2.946364641189575, |
| "learning_rate": 2.8240740740740736e-08, |
| "log_odds_chosen": 0.6078373193740845, |
| "log_odds_ratio": -0.485850065946579, |
| "logits/chosen": -0.6273930668830872, |
| "logits/rejected": -0.21480894088745117, |
| "logps/chosen": -1.0067973136901855, |
| "logps/rejected": -1.4497339725494385, |
| "loss": 1.1433, |
| "nll_loss": 1.1121139526367188, |
| "rewards/accuracies": 0.8888888955116272, |
| "rewards/chosen": -0.10067972540855408, |
| "rewards/margins": 0.044293683022260666, |
| "rewards/rejected": -0.14497341215610504, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_log_odds_chosen": 0.34563085436820984, |
| "eval_log_odds_ratio": -0.6200534105300903, |
| "eval_logits/chosen": -0.8266342878341675, |
| "eval_logits/rejected": -0.7474446296691895, |
| "eval_logps/chosen": -1.198333740234375, |
| "eval_logps/rejected": -1.4364194869995117, |
| "eval_loss": 1.30304753780365, |
| "eval_nll_loss": 1.2828768491744995, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11983337998390198, |
| "eval_rewards/margins": 0.02380857989192009, |
| "eval_rewards/rejected": -0.14364196360111237, |
| "eval_runtime": 2.3652, |
| "eval_samples_per_second": 6.342, |
| "eval_steps_per_second": 1.691, |
| "step": 60 |
| }, |
| { |
| "epoch": 2.169491525423729, |
| "grad_norm": 2.583970308303833, |
| "learning_rate": 2.5925925925925923e-08, |
| "log_odds_chosen": 0.6106057167053223, |
| "log_odds_ratio": -0.4779096245765686, |
| "logits/chosen": -0.686557412147522, |
| "logits/rejected": -0.2725212275981903, |
| "logps/chosen": -0.9596541523933411, |
| "logps/rejected": -1.4074879884719849, |
| "loss": 1.0813, |
| "nll_loss": 1.0335239171981812, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.0959654226899147, |
| "rewards/margins": 0.044783372431993484, |
| "rewards/rejected": -0.1407487839460373, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.169491525423729, |
| "eval_log_odds_chosen": 0.3470456004142761, |
| "eval_log_odds_ratio": -0.6191624402999878, |
| "eval_logits/chosen": -0.8260448575019836, |
| "eval_logits/rejected": -0.7457568645477295, |
| "eval_logps/chosen": -1.1975462436676025, |
| "eval_logps/rejected": -1.4365394115447998, |
| "eval_loss": 1.3012058734893799, |
| "eval_nll_loss": 1.2811425924301147, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.1197546198964119, |
| "eval_rewards/margins": 0.023899313062429428, |
| "eval_rewards/rejected": -0.14365392923355103, |
| "eval_runtime": 2.0569, |
| "eval_samples_per_second": 7.292, |
| "eval_steps_per_second": 1.945, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.3389830508474576, |
| "grad_norm": 2.8739640712738037, |
| "learning_rate": 2.361111111111111e-08, |
| "log_odds_chosen": 0.6883528828620911, |
| "log_odds_ratio": -0.4613940119743347, |
| "logits/chosen": -0.6194564700126648, |
| "logits/rejected": -0.16226115822792053, |
| "logps/chosen": -1.0230361223220825, |
| "logps/rejected": -1.5271246433258057, |
| "loss": 1.0952, |
| "nll_loss": 1.0490230321884155, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.10230360180139542, |
| "rewards/margins": 0.05040886998176575, |
| "rewards/rejected": -0.15271246433258057, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.3389830508474576, |
| "eval_log_odds_chosen": 0.34613096714019775, |
| "eval_log_odds_ratio": -0.6190251111984253, |
| "eval_logits/chosen": -0.8254708051681519, |
| "eval_logits/rejected": -0.7465603351593018, |
| "eval_logps/chosen": -1.1970798969268799, |
| "eval_logps/rejected": -1.4355697631835938, |
| "eval_loss": 1.2998255491256714, |
| "eval_nll_loss": 1.279675006866455, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11970800161361694, |
| "eval_rewards/margins": 0.023848986253142357, |
| "eval_rewards/rejected": -0.14355698227882385, |
| "eval_runtime": 2.1718, |
| "eval_samples_per_second": 6.907, |
| "eval_steps_per_second": 1.842, |
| "step": 70 |
| }, |
| { |
| "epoch": 2.5084745762711864, |
| "grad_norm": 2.5808846950531006, |
| "learning_rate": 2.1296296296296297e-08, |
| "log_odds_chosen": 0.517440915107727, |
| "log_odds_ratio": -0.5259458422660828, |
| "logits/chosen": -0.5467637181282043, |
| "logits/rejected": -0.10346652567386627, |
| "logps/chosen": -1.0675327777862549, |
| "logps/rejected": -1.437524437904358, |
| "loss": 1.2494, |
| "nll_loss": 1.1968111991882324, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.10675326734781265, |
| "rewards/margins": 0.036999184638261795, |
| "rewards/rejected": -0.14375244081020355, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.5084745762711864, |
| "eval_log_odds_chosen": 0.34606456756591797, |
| "eval_log_odds_ratio": -0.620042622089386, |
| "eval_logits/chosen": -0.8240770697593689, |
| "eval_logits/rejected": -0.7445046305656433, |
| "eval_logps/chosen": -1.1980996131896973, |
| "eval_logps/rejected": -1.436529517173767, |
| "eval_loss": 1.2983678579330444, |
| "eval_nll_loss": 1.27822744846344, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11980997025966644, |
| "eval_rewards/margins": 0.023842979222536087, |
| "eval_rewards/rejected": -0.14365294575691223, |
| "eval_runtime": 2.3948, |
| "eval_samples_per_second": 6.264, |
| "eval_steps_per_second": 1.67, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.6779661016949152, |
| "grad_norm": 3.2112486362457275, |
| "learning_rate": 1.898148148148148e-08, |
| "log_odds_chosen": 0.6882795095443726, |
| "log_odds_ratio": -0.4549444615840912, |
| "logits/chosen": -0.6079570651054382, |
| "logits/rejected": -0.1440545618534088, |
| "logps/chosen": -1.0264164209365845, |
| "logps/rejected": -1.5247279405593872, |
| "loss": 1.1376, |
| "nll_loss": 1.0920751094818115, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.10264164209365845, |
| "rewards/margins": 0.04983116313815117, |
| "rewards/rejected": -0.15247280895709991, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.6779661016949152, |
| "eval_log_odds_chosen": 0.34864068031311035, |
| "eval_log_odds_ratio": -0.6184805631637573, |
| "eval_logits/chosen": -0.8214948773384094, |
| "eval_logits/rejected": -0.7387450933456421, |
| "eval_logps/chosen": -1.1962885856628418, |
| "eval_logps/rejected": -1.4366211891174316, |
| "eval_loss": 1.2968833446502686, |
| "eval_nll_loss": 1.2767653465270996, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11962885409593582, |
| "eval_rewards/margins": 0.02403326891362667, |
| "eval_rewards/rejected": -0.14366212487220764, |
| "eval_runtime": 2.3377, |
| "eval_samples_per_second": 6.417, |
| "eval_steps_per_second": 1.711, |
| "step": 80 |
| }, |
| { |
| "epoch": 2.847457627118644, |
| "grad_norm": 1.839969277381897, |
| "learning_rate": 1.6666666666666664e-08, |
| "log_odds_chosen": 0.4969852864742279, |
| "log_odds_ratio": -0.532636284828186, |
| "logits/chosen": -0.5632266998291016, |
| "logits/rejected": -0.06907184422016144, |
| "logps/chosen": -1.0492427349090576, |
| "logps/rejected": -1.3902790546417236, |
| "loss": 1.2407, |
| "nll_loss": 1.1874761581420898, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.1049242839217186, |
| "rewards/margins": 0.03410361707210541, |
| "rewards/rejected": -0.1390278935432434, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.847457627118644, |
| "eval_log_odds_chosen": 0.34886401891708374, |
| "eval_log_odds_ratio": -0.6193042993545532, |
| "eval_logits/chosen": -0.8233250379562378, |
| "eval_logits/rejected": -0.7452165484428406, |
| "eval_logps/chosen": -1.1970219612121582, |
| "eval_logps/rejected": -1.4376810789108276, |
| "eval_loss": 1.2961064577102661, |
| "eval_nll_loss": 1.2759020328521729, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11970219016075134, |
| "eval_rewards/margins": 0.02406592108309269, |
| "eval_rewards/rejected": -0.14376811683177948, |
| "eval_runtime": 2.3326, |
| "eval_samples_per_second": 6.431, |
| "eval_steps_per_second": 1.715, |
| "step": 85 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 2.8928427696228027, |
| "learning_rate": 1.435185185185185e-08, |
| "log_odds_chosen": 0.45060819387435913, |
| "log_odds_ratio": -0.5237997174263, |
| "logits/chosen": -0.5375601649284363, |
| "logits/rejected": -0.10669712722301483, |
| "logps/chosen": -1.026106595993042, |
| "logps/rejected": -1.3331278562545776, |
| "loss": 1.1632, |
| "nll_loss": 1.1324602365493774, |
| "rewards/accuracies": 0.8611111044883728, |
| "rewards/chosen": -0.10261066257953644, |
| "rewards/margins": 0.030702121555805206, |
| "rewards/rejected": -0.13331276178359985, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_log_odds_chosen": 0.3472563922405243, |
| "eval_log_odds_ratio": -0.6190530061721802, |
| "eval_logits/chosen": -0.8260915279388428, |
| "eval_logits/rejected": -0.7494507431983948, |
| "eval_logps/chosen": -1.1969666481018066, |
| "eval_logps/rejected": -1.4364831447601318, |
| "eval_loss": 1.295333743095398, |
| "eval_nll_loss": 1.2750121355056763, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11969666182994843, |
| "eval_rewards/margins": 0.023951642215251923, |
| "eval_rewards/rejected": -0.14364831149578094, |
| "eval_runtime": 2.4234, |
| "eval_samples_per_second": 6.19, |
| "eval_steps_per_second": 1.651, |
| "step": 90 |
| }, |
| { |
| "epoch": 3.169491525423729, |
| "grad_norm": 2.178267478942871, |
| "learning_rate": 1.2037037037037036e-08, |
| "log_odds_chosen": 0.6245774030685425, |
| "log_odds_ratio": -0.47914019227027893, |
| "logits/chosen": -0.5936748385429382, |
| "logits/rejected": -0.12400760501623154, |
| "logps/chosen": -1.0799325704574585, |
| "logps/rejected": -1.5386043787002563, |
| "loss": 1.2075, |
| "nll_loss": 1.1595532894134521, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": -0.10799328237771988, |
| "rewards/margins": 0.045867159962654114, |
| "rewards/rejected": -0.1538604199886322, |
| "step": 95 |
| }, |
| { |
| "epoch": 3.169491525423729, |
| "eval_log_odds_chosen": 0.3449009954929352, |
| "eval_log_odds_ratio": -0.6200248003005981, |
| "eval_logits/chosen": -0.8238758444786072, |
| "eval_logits/rejected": -0.7465205788612366, |
| "eval_logps/chosen": -1.1979708671569824, |
| "eval_logps/rejected": -1.4351496696472168, |
| "eval_loss": 1.2949864864349365, |
| "eval_nll_loss": 1.2746418714523315, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11979708075523376, |
| "eval_rewards/margins": 0.02371787652373314, |
| "eval_rewards/rejected": -0.1435149610042572, |
| "eval_runtime": 2.3856, |
| "eval_samples_per_second": 6.288, |
| "eval_steps_per_second": 1.677, |
| "step": 95 |
| }, |
| { |
| "epoch": 3.3389830508474576, |
| "grad_norm": 3.0120768547058105, |
| "learning_rate": 9.722222222222223e-09, |
| "log_odds_chosen": 0.6266916990280151, |
| "log_odds_ratio": -0.46665820479393005, |
| "logits/chosen": -0.6890040636062622, |
| "logits/rejected": -0.2087690830230713, |
| "logps/chosen": -0.9734174013137817, |
| "logps/rejected": -1.4124778509140015, |
| "loss": 1.1383, |
| "nll_loss": 1.0916029214859009, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.09734174609184265, |
| "rewards/margins": 0.04390605166554451, |
| "rewards/rejected": -0.14124779403209686, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.3389830508474576, |
| "eval_log_odds_chosen": 0.3461822271347046, |
| "eval_log_odds_ratio": -0.6199952960014343, |
| "eval_logits/chosen": -0.8260993957519531, |
| "eval_logits/rejected": -0.7494193911552429, |
| "eval_logps/chosen": -1.1973499059677124, |
| "eval_logps/rejected": -1.435782790184021, |
| "eval_loss": 1.2943472862243652, |
| "eval_nll_loss": 1.2740821838378906, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.1197349950671196, |
| "eval_rewards/margins": 0.02384328469634056, |
| "eval_rewards/rejected": -0.14357827603816986, |
| "eval_runtime": 2.297, |
| "eval_samples_per_second": 6.53, |
| "eval_steps_per_second": 1.741, |
| "step": 100 |
| }, |
| { |
| "epoch": 3.5084745762711864, |
| "grad_norm": 2.94162654876709, |
| "learning_rate": 7.407407407407406e-09, |
| "log_odds_chosen": 0.6522419452667236, |
| "log_odds_ratio": -0.4762607216835022, |
| "logits/chosen": -0.6378888487815857, |
| "logits/rejected": -0.20835626125335693, |
| "logps/chosen": -1.0351794958114624, |
| "logps/rejected": -1.5168087482452393, |
| "loss": 1.132, |
| "nll_loss": 1.0843795537948608, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.10351793467998505, |
| "rewards/margins": 0.04816293716430664, |
| "rewards/rejected": -0.15168088674545288, |
| "step": 105 |
| }, |
| { |
| "epoch": 3.5084745762711864, |
| "eval_log_odds_chosen": 0.34624600410461426, |
| "eval_log_odds_ratio": -0.6196528673171997, |
| "eval_logits/chosen": -0.8230563402175903, |
| "eval_logits/rejected": -0.7455496191978455, |
| "eval_logps/chosen": -1.1972136497497559, |
| "eval_logps/rejected": -1.4357357025146484, |
| "eval_loss": 1.2938958406448364, |
| "eval_nll_loss": 1.273663878440857, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11972138285636902, |
| "eval_rewards/margins": 0.023852191865444183, |
| "eval_rewards/rejected": -0.1435735523700714, |
| "eval_runtime": 2.0374, |
| "eval_samples_per_second": 7.362, |
| "eval_steps_per_second": 1.963, |
| "step": 105 |
| }, |
| { |
| "epoch": 3.6779661016949152, |
| "grad_norm": 2.145254373550415, |
| "learning_rate": 5.092592592592592e-09, |
| "log_odds_chosen": 0.4669066369533539, |
| "log_odds_ratio": -0.5435744524002075, |
| "logits/chosen": -0.5924472212791443, |
| "logits/rejected": -0.16264298558235168, |
| "logps/chosen": -1.0428065061569214, |
| "logps/rejected": -1.3670099973678589, |
| "loss": 1.2299, |
| "nll_loss": 1.1755752563476562, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.10428065061569214, |
| "rewards/margins": 0.03242034837603569, |
| "rewards/rejected": -0.13670100271701813, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.6779661016949152, |
| "eval_log_odds_chosen": 0.34594425559043884, |
| "eval_log_odds_ratio": -0.6201038360595703, |
| "eval_logits/chosen": -0.8250141739845276, |
| "eval_logits/rejected": -0.74653559923172, |
| "eval_logps/chosen": -1.197659969329834, |
| "eval_logps/rejected": -1.436202883720398, |
| "eval_loss": 1.2942434549331665, |
| "eval_nll_loss": 1.2739633321762085, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.1197659969329834, |
| "eval_rewards/margins": 0.023854291066527367, |
| "eval_rewards/rejected": -0.14362028241157532, |
| "eval_runtime": 2.4284, |
| "eval_samples_per_second": 6.177, |
| "eval_steps_per_second": 1.647, |
| "step": 110 |
| }, |
| { |
| "epoch": 3.847457627118644, |
| "grad_norm": 2.368039846420288, |
| "learning_rate": 2.7777777777777776e-09, |
| "log_odds_chosen": 0.5323175191879272, |
| "log_odds_ratio": -0.5115704536437988, |
| "logits/chosen": -0.5521407127380371, |
| "logits/rejected": -0.0902065858244896, |
| "logps/chosen": -1.022303819656372, |
| "logps/rejected": -1.3985751867294312, |
| "loss": 1.2074, |
| "nll_loss": 1.1562185287475586, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.10223038494586945, |
| "rewards/margins": 0.037627145648002625, |
| "rewards/rejected": -0.13985753059387207, |
| "step": 115 |
| }, |
| { |
| "epoch": 3.847457627118644, |
| "eval_log_odds_chosen": 0.3472224473953247, |
| "eval_log_odds_ratio": -0.6191097497940063, |
| "eval_logits/chosen": -0.8245607614517212, |
| "eval_logits/rejected": -0.7454671859741211, |
| "eval_logps/chosen": -1.1963317394256592, |
| "eval_logps/rejected": -1.4353883266448975, |
| "eval_loss": 1.2926907539367676, |
| "eval_nll_loss": 1.2724549770355225, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11963316053152084, |
| "eval_rewards/margins": 0.023905668407678604, |
| "eval_rewards/rejected": -0.14353883266448975, |
| "eval_runtime": 2.3939, |
| "eval_samples_per_second": 6.266, |
| "eval_steps_per_second": 1.671, |
| "step": 115 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 3.87864089012146, |
| "learning_rate": 4.629629629629629e-10, |
| "log_odds_chosen": 0.5788644552230835, |
| "log_odds_ratio": -0.4914652109146118, |
| "logits/chosen": -0.636318564414978, |
| "logits/rejected": -0.16115230321884155, |
| "logps/chosen": -1.008524775505066, |
| "logps/rejected": -1.4221386909484863, |
| "loss": 1.1531, |
| "nll_loss": 1.090425968170166, |
| "rewards/accuracies": 0.8472222089767456, |
| "rewards/chosen": -0.10085248947143555, |
| "rewards/margins": 0.04136139899492264, |
| "rewards/rejected": -0.1422138810157776, |
| "step": 120 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_log_odds_chosen": 0.3468348979949951, |
| "eval_log_odds_ratio": -0.619236946105957, |
| "eval_logits/chosen": -0.8219494223594666, |
| "eval_logits/rejected": -0.7437239289283752, |
| "eval_logps/chosen": -1.1963438987731934, |
| "eval_logps/rejected": -1.435250997543335, |
| "eval_loss": 1.2931767702102661, |
| "eval_nll_loss": 1.2729113101959229, |
| "eval_rewards/accuracies": 0.6875, |
| "eval_rewards/chosen": -0.11963438987731934, |
| "eval_rewards/margins": 0.02389070764183998, |
| "eval_rewards/rejected": -0.14352509379386902, |
| "eval_runtime": 2.2505, |
| "eval_samples_per_second": 6.665, |
| "eval_steps_per_second": 1.777, |
| "step": 120 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 120, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|