phantasm / trainer_state.json
Sela223's picture
Upload 8 files
0b2f132 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 5,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.1694915254237288,
"grad_norm": 2.790442705154419,
"learning_rate": 1.6666666666666664e-08,
"log_odds_chosen": 0.5680092573165894,
"log_odds_ratio": -0.5170037746429443,
"logits/chosen": -0.5872289538383484,
"logits/rejected": -0.1308290660381317,
"logps/chosen": -1.1121927499771118,
"logps/rejected": -1.5295088291168213,
"loss": 1.2668,
"nll_loss": 1.2151492834091187,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.11121927201747894,
"rewards/margins": 0.04173160716891289,
"rewards/rejected": -0.15295089781284332,
"step": 5
},
{
"epoch": 0.1694915254237288,
"eval_log_odds_chosen": 0.344593346118927,
"eval_log_odds_ratio": -0.6202001571655273,
"eval_logits/chosen": -0.8292851448059082,
"eval_logits/rejected": -0.7480515837669373,
"eval_logps/chosen": -1.200099229812622,
"eval_logps/rejected": -1.4381370544433594,
"eval_loss": 1.3301414251327515,
"eval_nll_loss": 1.310793399810791,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.12000991404056549,
"eval_rewards/margins": 0.023803792893886566,
"eval_rewards/rejected": -0.14381369948387146,
"eval_runtime": 2.2841,
"eval_samples_per_second": 6.567,
"eval_steps_per_second": 1.751,
"step": 5
},
{
"epoch": 0.3389830508474576,
"grad_norm": 3.6753456592559814,
"learning_rate": 3.75e-08,
"log_odds_chosen": 0.6411725878715515,
"log_odds_ratio": -0.4649675488471985,
"logits/chosen": -0.5985379219055176,
"logits/rejected": -0.15392252802848816,
"logps/chosen": -1.0119307041168213,
"logps/rejected": -1.4758830070495605,
"loss": 1.1771,
"nll_loss": 1.1306263208389282,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.10119307041168213,
"rewards/margins": 0.046395231038331985,
"rewards/rejected": -0.14758829772472382,
"step": 10
},
{
"epoch": 0.3389830508474576,
"eval_log_odds_chosen": 0.3463364243507385,
"eval_log_odds_ratio": -0.6194710731506348,
"eval_logits/chosen": -0.826720654964447,
"eval_logits/rejected": -0.7461210489273071,
"eval_logps/chosen": -1.1986223459243774,
"eval_logps/rejected": -1.437687635421753,
"eval_loss": 1.3279355764389038,
"eval_nll_loss": 1.3084385395050049,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11986224353313446,
"eval_rewards/margins": 0.023906530812382698,
"eval_rewards/rejected": -0.143768772482872,
"eval_runtime": 2.3646,
"eval_samples_per_second": 6.343,
"eval_steps_per_second": 1.692,
"step": 10
},
{
"epoch": 0.5084745762711864,
"grad_norm": 2.6734206676483154,
"learning_rate": 4.9074074074074074e-08,
"log_odds_chosen": 0.5467322468757629,
"log_odds_ratio": -0.5080639123916626,
"logits/chosen": -0.6727955937385559,
"logits/rejected": -0.2421862781047821,
"logps/chosen": -1.0358315706253052,
"logps/rejected": -1.4172828197479248,
"loss": 1.1984,
"nll_loss": 1.147613763809204,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.10358314216136932,
"rewards/margins": 0.03814515098929405,
"rewards/rejected": -0.14172828197479248,
"step": 15
},
{
"epoch": 0.5084745762711864,
"eval_log_odds_chosen": 0.3439960777759552,
"eval_log_odds_ratio": -0.6200249791145325,
"eval_logits/chosen": -0.8270355463027954,
"eval_logits/rejected": -0.7460318803787231,
"eval_logps/chosen": -1.1992497444152832,
"eval_logps/rejected": -1.4363036155700684,
"eval_loss": 1.3248008489608765,
"eval_nll_loss": 1.3052968978881836,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11992497742176056,
"eval_rewards/margins": 0.023705393075942993,
"eval_rewards/rejected": -0.14363038539886475,
"eval_runtime": 2.3001,
"eval_samples_per_second": 6.522,
"eval_steps_per_second": 1.739,
"step": 15
},
{
"epoch": 0.6779661016949152,
"grad_norm": 2.985229253768921,
"learning_rate": 4.675925925925926e-08,
"log_odds_chosen": 0.5538958311080933,
"log_odds_ratio": -0.4844888150691986,
"logits/chosen": -0.6328302621841431,
"logits/rejected": -0.2196337729692459,
"logps/chosen": -0.9897964596748352,
"logps/rejected": -1.3761847019195557,
"loss": 1.1464,
"nll_loss": 1.0979585647583008,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.098979651927948,
"rewards/margins": 0.038638822734355927,
"rewards/rejected": -0.13761845231056213,
"step": 20
},
{
"epoch": 0.6779661016949152,
"eval_log_odds_chosen": 0.3439162075519562,
"eval_log_odds_ratio": -0.6207376718521118,
"eval_logits/chosen": -0.8287538290023804,
"eval_logits/rejected": -0.7500149011611938,
"eval_logps/chosen": -1.1994065046310425,
"eval_logps/rejected": -1.4364498853683472,
"eval_loss": 1.3215163946151733,
"eval_nll_loss": 1.3019251823425293,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11994065344333649,
"eval_rewards/margins": 0.02370433509349823,
"eval_rewards/rejected": -0.14364498853683472,
"eval_runtime": 2.4028,
"eval_samples_per_second": 6.243,
"eval_steps_per_second": 1.665,
"step": 20
},
{
"epoch": 0.847457627118644,
"grad_norm": 2.2889516353607178,
"learning_rate": 4.444444444444444e-08,
"log_odds_chosen": 0.5198447108268738,
"log_odds_ratio": -0.5137643218040466,
"logits/chosen": -0.5211045145988464,
"logits/rejected": -0.12277780473232269,
"logps/chosen": -0.9896445274353027,
"logps/rejected": -1.3587530851364136,
"loss": 1.1928,
"nll_loss": 1.141424536705017,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.09896446019411087,
"rewards/margins": 0.036910854279994965,
"rewards/rejected": -0.13587531447410583,
"step": 25
},
{
"epoch": 0.847457627118644,
"eval_log_odds_chosen": 0.3448036313056946,
"eval_log_odds_ratio": -0.6197227239608765,
"eval_logits/chosen": -0.8284635543823242,
"eval_logits/rejected": -0.7477390170097351,
"eval_logps/chosen": -1.1986567974090576,
"eval_logps/rejected": -1.4361884593963623,
"eval_loss": 1.3181345462799072,
"eval_nll_loss": 1.2985508441925049,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11986568570137024,
"eval_rewards/margins": 0.023753169924020767,
"eval_rewards/rejected": -0.1436188519001007,
"eval_runtime": 2.4698,
"eval_samples_per_second": 6.073,
"eval_steps_per_second": 1.62,
"step": 25
},
{
"epoch": 1.0,
"grad_norm": 3.0400283336639404,
"learning_rate": 4.2129629629629625e-08,
"log_odds_chosen": 0.6260524988174438,
"log_odds_ratio": -0.4879433214664459,
"logits/chosen": -0.6383404731750488,
"logits/rejected": -0.11991772055625916,
"logps/chosen": -1.0283366441726685,
"logps/rejected": -1.4722530841827393,
"loss": 1.1457,
"nll_loss": 1.1040095090866089,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10283366590738297,
"rewards/margins": 0.04439166933298111,
"rewards/rejected": -0.14722532033920288,
"step": 30
},
{
"epoch": 1.0,
"eval_log_odds_chosen": 0.3446802496910095,
"eval_log_odds_ratio": -0.6202990412712097,
"eval_logits/chosen": -0.8293232321739197,
"eval_logits/rejected": -0.749043881893158,
"eval_logps/chosen": -1.1981878280639648,
"eval_logps/rejected": -1.4355335235595703,
"eval_loss": 1.3151671886444092,
"eval_nll_loss": 1.295397162437439,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.119818776845932,
"eval_rewards/margins": 0.02373456582427025,
"eval_rewards/rejected": -0.14355334639549255,
"eval_runtime": 2.3634,
"eval_samples_per_second": 6.347,
"eval_steps_per_second": 1.692,
"step": 30
},
{
"epoch": 1.1694915254237288,
"grad_norm": 2.696627140045166,
"learning_rate": 3.9814814814814815e-08,
"log_odds_chosen": 0.49621137976646423,
"log_odds_ratio": -0.5309565663337708,
"logits/chosen": -0.6420741081237793,
"logits/rejected": -0.18228447437286377,
"logps/chosen": -1.008725881576538,
"logps/rejected": -1.3604915142059326,
"loss": 1.1125,
"nll_loss": 1.0594511032104492,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.10087261348962784,
"rewards/margins": 0.03517654910683632,
"rewards/rejected": -0.13604915142059326,
"step": 35
},
{
"epoch": 1.1694915254237288,
"eval_log_odds_chosen": 0.3452969193458557,
"eval_log_odds_ratio": -0.6196750402450562,
"eval_logits/chosen": -0.8258322477340698,
"eval_logits/rejected": -0.7455801367759705,
"eval_logps/chosen": -1.1991809606552124,
"eval_logps/rejected": -1.4370383024215698,
"eval_loss": 1.3133561611175537,
"eval_nll_loss": 1.2936301231384277,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.119918093085289,
"eval_rewards/margins": 0.023785727098584175,
"eval_rewards/rejected": -0.14370381832122803,
"eval_runtime": 2.2925,
"eval_samples_per_second": 6.543,
"eval_steps_per_second": 1.745,
"step": 35
},
{
"epoch": 1.3389830508474576,
"grad_norm": 3.0447092056274414,
"learning_rate": 3.75e-08,
"log_odds_chosen": 0.6023409962654114,
"log_odds_ratio": -0.48153096437454224,
"logits/chosen": -0.6359589099884033,
"logits/rejected": -0.12003572285175323,
"logps/chosen": -1.014690637588501,
"logps/rejected": -1.4444173574447632,
"loss": 1.1185,
"nll_loss": 1.0703380107879639,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.10146906226873398,
"rewards/margins": 0.04297268018126488,
"rewards/rejected": -0.14444175362586975,
"step": 40
},
{
"epoch": 1.3389830508474576,
"eval_log_odds_chosen": 0.3457057476043701,
"eval_log_odds_ratio": -0.6200236082077026,
"eval_logits/chosen": -0.8274150490760803,
"eval_logits/rejected": -0.7458513379096985,
"eval_logps/chosen": -1.1985080242156982,
"eval_logps/rejected": -1.436950922012329,
"eval_loss": 1.310854196548462,
"eval_nll_loss": 1.2910186052322388,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11985080689191818,
"eval_rewards/margins": 0.023844290524721146,
"eval_rewards/rejected": -0.14369508624076843,
"eval_runtime": 2.0709,
"eval_samples_per_second": 7.243,
"eval_steps_per_second": 1.932,
"step": 40
},
{
"epoch": 1.5084745762711864,
"grad_norm": 2.4286935329437256,
"learning_rate": 3.518518518518518e-08,
"log_odds_chosen": 0.43364983797073364,
"log_odds_ratio": -0.5388425588607788,
"logits/chosen": -0.6143006086349487,
"logits/rejected": -0.17402119934558868,
"logps/chosen": -1.0169684886932373,
"logps/rejected": -1.3061316013336182,
"loss": 1.2022,
"nll_loss": 1.1483418941497803,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.10169683396816254,
"rewards/margins": 0.028916319832205772,
"rewards/rejected": -0.13061316311359406,
"step": 45
},
{
"epoch": 1.5084745762711864,
"eval_log_odds_chosen": 0.34640029072761536,
"eval_log_odds_ratio": -0.6196874380111694,
"eval_logits/chosen": -0.8271859884262085,
"eval_logits/rejected": -0.7468405365943909,
"eval_logps/chosen": -1.1981958150863647,
"eval_logps/rejected": -1.4372470378875732,
"eval_loss": 1.3086471557617188,
"eval_nll_loss": 1.2887359857559204,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11981958150863647,
"eval_rewards/margins": 0.02390512079000473,
"eval_rewards/rejected": -0.1437247097492218,
"eval_runtime": 2.4941,
"eval_samples_per_second": 6.014,
"eval_steps_per_second": 1.604,
"step": 45
},
{
"epoch": 1.6779661016949152,
"grad_norm": 2.9268641471862793,
"learning_rate": 3.287037037037037e-08,
"log_odds_chosen": 0.5900410413742065,
"log_odds_ratio": -0.49763163924217224,
"logits/chosen": -0.5182631611824036,
"logits/rejected": -0.06584630906581879,
"logps/chosen": -1.1032148599624634,
"logps/rejected": -1.5314563512802124,
"loss": 1.2859,
"nll_loss": 1.2361345291137695,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.11032148450613022,
"rewards/margins": 0.042824164032936096,
"rewards/rejected": -0.1531456559896469,
"step": 50
},
{
"epoch": 1.6779661016949152,
"eval_log_odds_chosen": 0.34582221508026123,
"eval_log_odds_ratio": -0.6204431056976318,
"eval_logits/chosen": -0.8273264169692993,
"eval_logits/rejected": -0.7475500702857971,
"eval_logps/chosen": -1.2007322311401367,
"eval_logps/rejected": -1.439100980758667,
"eval_loss": 1.3071892261505127,
"eval_nll_loss": 1.2873029708862305,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.12007322907447815,
"eval_rewards/margins": 0.02383686974644661,
"eval_rewards/rejected": -0.14391009509563446,
"eval_runtime": 2.3687,
"eval_samples_per_second": 6.333,
"eval_steps_per_second": 1.689,
"step": 50
},
{
"epoch": 1.847457627118644,
"grad_norm": 2.923938274383545,
"learning_rate": 3.0555555555555556e-08,
"log_odds_chosen": 0.7271274328231812,
"log_odds_ratio": -0.4399596154689789,
"logits/chosen": -0.6317266225814819,
"logits/rejected": -0.19635015726089478,
"logps/chosen": -0.9985980987548828,
"logps/rejected": -1.5293996334075928,
"loss": 1.1276,
"nll_loss": 1.083601474761963,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": -0.0998598113656044,
"rewards/margins": 0.05308016389608383,
"rewards/rejected": -0.15293996036052704,
"step": 55
},
{
"epoch": 1.847457627118644,
"eval_log_odds_chosen": 0.34833377599716187,
"eval_log_odds_ratio": -0.6186259984970093,
"eval_logits/chosen": -0.8269048929214478,
"eval_logits/rejected": -0.7455801963806152,
"eval_logps/chosen": -1.196911096572876,
"eval_logps/rejected": -1.437239646911621,
"eval_loss": 1.304530382156372,
"eval_nll_loss": 1.2846966981887817,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11969111114740372,
"eval_rewards/margins": 0.02403285726904869,
"eval_rewards/rejected": -0.1437239646911621,
"eval_runtime": 2.3288,
"eval_samples_per_second": 6.441,
"eval_steps_per_second": 1.718,
"step": 55
},
{
"epoch": 2.0,
"grad_norm": 2.946364641189575,
"learning_rate": 2.8240740740740736e-08,
"log_odds_chosen": 0.6078373193740845,
"log_odds_ratio": -0.485850065946579,
"logits/chosen": -0.6273930668830872,
"logits/rejected": -0.21480894088745117,
"logps/chosen": -1.0067973136901855,
"logps/rejected": -1.4497339725494385,
"loss": 1.1433,
"nll_loss": 1.1121139526367188,
"rewards/accuracies": 0.8888888955116272,
"rewards/chosen": -0.10067972540855408,
"rewards/margins": 0.044293683022260666,
"rewards/rejected": -0.14497341215610504,
"step": 60
},
{
"epoch": 2.0,
"eval_log_odds_chosen": 0.34563085436820984,
"eval_log_odds_ratio": -0.6200534105300903,
"eval_logits/chosen": -0.8266342878341675,
"eval_logits/rejected": -0.7474446296691895,
"eval_logps/chosen": -1.198333740234375,
"eval_logps/rejected": -1.4364194869995117,
"eval_loss": 1.30304753780365,
"eval_nll_loss": 1.2828768491744995,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11983337998390198,
"eval_rewards/margins": 0.02380857989192009,
"eval_rewards/rejected": -0.14364196360111237,
"eval_runtime": 2.3652,
"eval_samples_per_second": 6.342,
"eval_steps_per_second": 1.691,
"step": 60
},
{
"epoch": 2.169491525423729,
"grad_norm": 2.583970308303833,
"learning_rate": 2.5925925925925923e-08,
"log_odds_chosen": 0.6106057167053223,
"log_odds_ratio": -0.4779096245765686,
"logits/chosen": -0.686557412147522,
"logits/rejected": -0.2725212275981903,
"logps/chosen": -0.9596541523933411,
"logps/rejected": -1.4074879884719849,
"loss": 1.0813,
"nll_loss": 1.0335239171981812,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.0959654226899147,
"rewards/margins": 0.044783372431993484,
"rewards/rejected": -0.1407487839460373,
"step": 65
},
{
"epoch": 2.169491525423729,
"eval_log_odds_chosen": 0.3470456004142761,
"eval_log_odds_ratio": -0.6191624402999878,
"eval_logits/chosen": -0.8260448575019836,
"eval_logits/rejected": -0.7457568645477295,
"eval_logps/chosen": -1.1975462436676025,
"eval_logps/rejected": -1.4365394115447998,
"eval_loss": 1.3012058734893799,
"eval_nll_loss": 1.2811425924301147,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.1197546198964119,
"eval_rewards/margins": 0.023899313062429428,
"eval_rewards/rejected": -0.14365392923355103,
"eval_runtime": 2.0569,
"eval_samples_per_second": 7.292,
"eval_steps_per_second": 1.945,
"step": 65
},
{
"epoch": 2.3389830508474576,
"grad_norm": 2.8739640712738037,
"learning_rate": 2.361111111111111e-08,
"log_odds_chosen": 0.6883528828620911,
"log_odds_ratio": -0.4613940119743347,
"logits/chosen": -0.6194564700126648,
"logits/rejected": -0.16226115822792053,
"logps/chosen": -1.0230361223220825,
"logps/rejected": -1.5271246433258057,
"loss": 1.0952,
"nll_loss": 1.0490230321884155,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.10230360180139542,
"rewards/margins": 0.05040886998176575,
"rewards/rejected": -0.15271246433258057,
"step": 70
},
{
"epoch": 2.3389830508474576,
"eval_log_odds_chosen": 0.34613096714019775,
"eval_log_odds_ratio": -0.6190251111984253,
"eval_logits/chosen": -0.8254708051681519,
"eval_logits/rejected": -0.7465603351593018,
"eval_logps/chosen": -1.1970798969268799,
"eval_logps/rejected": -1.4355697631835938,
"eval_loss": 1.2998255491256714,
"eval_nll_loss": 1.279675006866455,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11970800161361694,
"eval_rewards/margins": 0.023848986253142357,
"eval_rewards/rejected": -0.14355698227882385,
"eval_runtime": 2.1718,
"eval_samples_per_second": 6.907,
"eval_steps_per_second": 1.842,
"step": 70
},
{
"epoch": 2.5084745762711864,
"grad_norm": 2.5808846950531006,
"learning_rate": 2.1296296296296297e-08,
"log_odds_chosen": 0.517440915107727,
"log_odds_ratio": -0.5259458422660828,
"logits/chosen": -0.5467637181282043,
"logits/rejected": -0.10346652567386627,
"logps/chosen": -1.0675327777862549,
"logps/rejected": -1.437524437904358,
"loss": 1.2494,
"nll_loss": 1.1968111991882324,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.10675326734781265,
"rewards/margins": 0.036999184638261795,
"rewards/rejected": -0.14375244081020355,
"step": 75
},
{
"epoch": 2.5084745762711864,
"eval_log_odds_chosen": 0.34606456756591797,
"eval_log_odds_ratio": -0.620042622089386,
"eval_logits/chosen": -0.8240770697593689,
"eval_logits/rejected": -0.7445046305656433,
"eval_logps/chosen": -1.1980996131896973,
"eval_logps/rejected": -1.436529517173767,
"eval_loss": 1.2983678579330444,
"eval_nll_loss": 1.27822744846344,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11980997025966644,
"eval_rewards/margins": 0.023842979222536087,
"eval_rewards/rejected": -0.14365294575691223,
"eval_runtime": 2.3948,
"eval_samples_per_second": 6.264,
"eval_steps_per_second": 1.67,
"step": 75
},
{
"epoch": 2.6779661016949152,
"grad_norm": 3.2112486362457275,
"learning_rate": 1.898148148148148e-08,
"log_odds_chosen": 0.6882795095443726,
"log_odds_ratio": -0.4549444615840912,
"logits/chosen": -0.6079570651054382,
"logits/rejected": -0.1440545618534088,
"logps/chosen": -1.0264164209365845,
"logps/rejected": -1.5247279405593872,
"loss": 1.1376,
"nll_loss": 1.0920751094818115,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.10264164209365845,
"rewards/margins": 0.04983116313815117,
"rewards/rejected": -0.15247280895709991,
"step": 80
},
{
"epoch": 2.6779661016949152,
"eval_log_odds_chosen": 0.34864068031311035,
"eval_log_odds_ratio": -0.6184805631637573,
"eval_logits/chosen": -0.8214948773384094,
"eval_logits/rejected": -0.7387450933456421,
"eval_logps/chosen": -1.1962885856628418,
"eval_logps/rejected": -1.4366211891174316,
"eval_loss": 1.2968833446502686,
"eval_nll_loss": 1.2767653465270996,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11962885409593582,
"eval_rewards/margins": 0.02403326891362667,
"eval_rewards/rejected": -0.14366212487220764,
"eval_runtime": 2.3377,
"eval_samples_per_second": 6.417,
"eval_steps_per_second": 1.711,
"step": 80
},
{
"epoch": 2.847457627118644,
"grad_norm": 1.839969277381897,
"learning_rate": 1.6666666666666664e-08,
"log_odds_chosen": 0.4969852864742279,
"log_odds_ratio": -0.532636284828186,
"logits/chosen": -0.5632266998291016,
"logits/rejected": -0.06907184422016144,
"logps/chosen": -1.0492427349090576,
"logps/rejected": -1.3902790546417236,
"loss": 1.2407,
"nll_loss": 1.1874761581420898,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.1049242839217186,
"rewards/margins": 0.03410361707210541,
"rewards/rejected": -0.1390278935432434,
"step": 85
},
{
"epoch": 2.847457627118644,
"eval_log_odds_chosen": 0.34886401891708374,
"eval_log_odds_ratio": -0.6193042993545532,
"eval_logits/chosen": -0.8233250379562378,
"eval_logits/rejected": -0.7452165484428406,
"eval_logps/chosen": -1.1970219612121582,
"eval_logps/rejected": -1.4376810789108276,
"eval_loss": 1.2961064577102661,
"eval_nll_loss": 1.2759020328521729,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11970219016075134,
"eval_rewards/margins": 0.02406592108309269,
"eval_rewards/rejected": -0.14376811683177948,
"eval_runtime": 2.3326,
"eval_samples_per_second": 6.431,
"eval_steps_per_second": 1.715,
"step": 85
},
{
"epoch": 3.0,
"grad_norm": 2.8928427696228027,
"learning_rate": 1.435185185185185e-08,
"log_odds_chosen": 0.45060819387435913,
"log_odds_ratio": -0.5237997174263,
"logits/chosen": -0.5375601649284363,
"logits/rejected": -0.10669712722301483,
"logps/chosen": -1.026106595993042,
"logps/rejected": -1.3331278562545776,
"loss": 1.1632,
"nll_loss": 1.1324602365493774,
"rewards/accuracies": 0.8611111044883728,
"rewards/chosen": -0.10261066257953644,
"rewards/margins": 0.030702121555805206,
"rewards/rejected": -0.13331276178359985,
"step": 90
},
{
"epoch": 3.0,
"eval_log_odds_chosen": 0.3472563922405243,
"eval_log_odds_ratio": -0.6190530061721802,
"eval_logits/chosen": -0.8260915279388428,
"eval_logits/rejected": -0.7494507431983948,
"eval_logps/chosen": -1.1969666481018066,
"eval_logps/rejected": -1.4364831447601318,
"eval_loss": 1.295333743095398,
"eval_nll_loss": 1.2750121355056763,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11969666182994843,
"eval_rewards/margins": 0.023951642215251923,
"eval_rewards/rejected": -0.14364831149578094,
"eval_runtime": 2.4234,
"eval_samples_per_second": 6.19,
"eval_steps_per_second": 1.651,
"step": 90
},
{
"epoch": 3.169491525423729,
"grad_norm": 2.178267478942871,
"learning_rate": 1.2037037037037036e-08,
"log_odds_chosen": 0.6245774030685425,
"log_odds_ratio": -0.47914019227027893,
"logits/chosen": -0.5936748385429382,
"logits/rejected": -0.12400760501623154,
"logps/chosen": -1.0799325704574585,
"logps/rejected": -1.5386043787002563,
"loss": 1.2075,
"nll_loss": 1.1595532894134521,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.10799328237771988,
"rewards/margins": 0.045867159962654114,
"rewards/rejected": -0.1538604199886322,
"step": 95
},
{
"epoch": 3.169491525423729,
"eval_log_odds_chosen": 0.3449009954929352,
"eval_log_odds_ratio": -0.6200248003005981,
"eval_logits/chosen": -0.8238758444786072,
"eval_logits/rejected": -0.7465205788612366,
"eval_logps/chosen": -1.1979708671569824,
"eval_logps/rejected": -1.4351496696472168,
"eval_loss": 1.2949864864349365,
"eval_nll_loss": 1.2746418714523315,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11979708075523376,
"eval_rewards/margins": 0.02371787652373314,
"eval_rewards/rejected": -0.1435149610042572,
"eval_runtime": 2.3856,
"eval_samples_per_second": 6.288,
"eval_steps_per_second": 1.677,
"step": 95
},
{
"epoch": 3.3389830508474576,
"grad_norm": 3.0120768547058105,
"learning_rate": 9.722222222222223e-09,
"log_odds_chosen": 0.6266916990280151,
"log_odds_ratio": -0.46665820479393005,
"logits/chosen": -0.6890040636062622,
"logits/rejected": -0.2087690830230713,
"logps/chosen": -0.9734174013137817,
"logps/rejected": -1.4124778509140015,
"loss": 1.1383,
"nll_loss": 1.0916029214859009,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.09734174609184265,
"rewards/margins": 0.04390605166554451,
"rewards/rejected": -0.14124779403209686,
"step": 100
},
{
"epoch": 3.3389830508474576,
"eval_log_odds_chosen": 0.3461822271347046,
"eval_log_odds_ratio": -0.6199952960014343,
"eval_logits/chosen": -0.8260993957519531,
"eval_logits/rejected": -0.7494193911552429,
"eval_logps/chosen": -1.1973499059677124,
"eval_logps/rejected": -1.435782790184021,
"eval_loss": 1.2943472862243652,
"eval_nll_loss": 1.2740821838378906,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.1197349950671196,
"eval_rewards/margins": 0.02384328469634056,
"eval_rewards/rejected": -0.14357827603816986,
"eval_runtime": 2.297,
"eval_samples_per_second": 6.53,
"eval_steps_per_second": 1.741,
"step": 100
},
{
"epoch": 3.5084745762711864,
"grad_norm": 2.94162654876709,
"learning_rate": 7.407407407407406e-09,
"log_odds_chosen": 0.6522419452667236,
"log_odds_ratio": -0.4762607216835022,
"logits/chosen": -0.6378888487815857,
"logits/rejected": -0.20835626125335693,
"logps/chosen": -1.0351794958114624,
"logps/rejected": -1.5168087482452393,
"loss": 1.132,
"nll_loss": 1.0843795537948608,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.10351793467998505,
"rewards/margins": 0.04816293716430664,
"rewards/rejected": -0.15168088674545288,
"step": 105
},
{
"epoch": 3.5084745762711864,
"eval_log_odds_chosen": 0.34624600410461426,
"eval_log_odds_ratio": -0.6196528673171997,
"eval_logits/chosen": -0.8230563402175903,
"eval_logits/rejected": -0.7455496191978455,
"eval_logps/chosen": -1.1972136497497559,
"eval_logps/rejected": -1.4357357025146484,
"eval_loss": 1.2938958406448364,
"eval_nll_loss": 1.273663878440857,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11972138285636902,
"eval_rewards/margins": 0.023852191865444183,
"eval_rewards/rejected": -0.1435735523700714,
"eval_runtime": 2.0374,
"eval_samples_per_second": 7.362,
"eval_steps_per_second": 1.963,
"step": 105
},
{
"epoch": 3.6779661016949152,
"grad_norm": 2.145254373550415,
"learning_rate": 5.092592592592592e-09,
"log_odds_chosen": 0.4669066369533539,
"log_odds_ratio": -0.5435744524002075,
"logits/chosen": -0.5924472212791443,
"logits/rejected": -0.16264298558235168,
"logps/chosen": -1.0428065061569214,
"logps/rejected": -1.3670099973678589,
"loss": 1.2299,
"nll_loss": 1.1755752563476562,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.10428065061569214,
"rewards/margins": 0.03242034837603569,
"rewards/rejected": -0.13670100271701813,
"step": 110
},
{
"epoch": 3.6779661016949152,
"eval_log_odds_chosen": 0.34594425559043884,
"eval_log_odds_ratio": -0.6201038360595703,
"eval_logits/chosen": -0.8250141739845276,
"eval_logits/rejected": -0.74653559923172,
"eval_logps/chosen": -1.197659969329834,
"eval_logps/rejected": -1.436202883720398,
"eval_loss": 1.2942434549331665,
"eval_nll_loss": 1.2739633321762085,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.1197659969329834,
"eval_rewards/margins": 0.023854291066527367,
"eval_rewards/rejected": -0.14362028241157532,
"eval_runtime": 2.4284,
"eval_samples_per_second": 6.177,
"eval_steps_per_second": 1.647,
"step": 110
},
{
"epoch": 3.847457627118644,
"grad_norm": 2.368039846420288,
"learning_rate": 2.7777777777777776e-09,
"log_odds_chosen": 0.5323175191879272,
"log_odds_ratio": -0.5115704536437988,
"logits/chosen": -0.5521407127380371,
"logits/rejected": -0.0902065858244896,
"logps/chosen": -1.022303819656372,
"logps/rejected": -1.3985751867294312,
"loss": 1.2074,
"nll_loss": 1.1562185287475586,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.10223038494586945,
"rewards/margins": 0.037627145648002625,
"rewards/rejected": -0.13985753059387207,
"step": 115
},
{
"epoch": 3.847457627118644,
"eval_log_odds_chosen": 0.3472224473953247,
"eval_log_odds_ratio": -0.6191097497940063,
"eval_logits/chosen": -0.8245607614517212,
"eval_logits/rejected": -0.7454671859741211,
"eval_logps/chosen": -1.1963317394256592,
"eval_logps/rejected": -1.4353883266448975,
"eval_loss": 1.2926907539367676,
"eval_nll_loss": 1.2724549770355225,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11963316053152084,
"eval_rewards/margins": 0.023905668407678604,
"eval_rewards/rejected": -0.14353883266448975,
"eval_runtime": 2.3939,
"eval_samples_per_second": 6.266,
"eval_steps_per_second": 1.671,
"step": 115
},
{
"epoch": 4.0,
"grad_norm": 3.87864089012146,
"learning_rate": 4.629629629629629e-10,
"log_odds_chosen": 0.5788644552230835,
"log_odds_ratio": -0.4914652109146118,
"logits/chosen": -0.636318564414978,
"logits/rejected": -0.16115230321884155,
"logps/chosen": -1.008524775505066,
"logps/rejected": -1.4221386909484863,
"loss": 1.1531,
"nll_loss": 1.090425968170166,
"rewards/accuracies": 0.8472222089767456,
"rewards/chosen": -0.10085248947143555,
"rewards/margins": 0.04136139899492264,
"rewards/rejected": -0.1422138810157776,
"step": 120
},
{
"epoch": 4.0,
"eval_log_odds_chosen": 0.3468348979949951,
"eval_log_odds_ratio": -0.619236946105957,
"eval_logits/chosen": -0.8219494223594666,
"eval_logits/rejected": -0.7437239289283752,
"eval_logps/chosen": -1.1963438987731934,
"eval_logps/rejected": -1.435250997543335,
"eval_loss": 1.2931767702102661,
"eval_nll_loss": 1.2729113101959229,
"eval_rewards/accuracies": 0.6875,
"eval_rewards/chosen": -0.11963438987731934,
"eval_rewards/margins": 0.02389070764183998,
"eval_rewards/rejected": -0.14352509379386902,
"eval_runtime": 2.2505,
"eval_samples_per_second": 6.665,
"eval_steps_per_second": 1.777,
"step": 120
}
],
"logging_steps": 5,
"max_steps": 120,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}