davidanugraha's picture
Upload folder using huggingface_hub
b786928 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.0,
"eval_steps": 500,
"global_step": 66,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.031067961165048542,
"grad_norm": 4.351982116699219,
"learning_rate": 0.0,
"logits/chosen": -2.1781115531921387,
"logits/rejected": -2.0086305141448975,
"logps/chosen": -0.42091915011405945,
"logps/rejected": -0.3680277466773987,
"loss": 1.3928,
"rewards/accuracies": 0.109375,
"rewards/chosen": -0.8418383598327637,
"rewards/margins": -0.10578285157680511,
"rewards/rejected": -0.7360554933547974,
"step": 1
},
{
"epoch": 0.062135922330097085,
"grad_norm": 4.56733512878418,
"learning_rate": 1.4285714285714285e-07,
"logits/chosen": -2.1738781929016113,
"logits/rejected": -1.9470911026000977,
"logps/chosen": -0.435973584651947,
"logps/rejected": -0.3837018609046936,
"loss": 1.3924,
"rewards/accuracies": 0.203125,
"rewards/chosen": -0.871947169303894,
"rewards/margins": -0.10454347729682922,
"rewards/rejected": -0.7674036026000977,
"step": 2
},
{
"epoch": 0.09320388349514563,
"grad_norm": 5.2480902671813965,
"learning_rate": 2.857142857142857e-07,
"logits/chosen": -1.8973408937454224,
"logits/rejected": -2.1369576454162598,
"logps/chosen": -0.44170576333999634,
"logps/rejected": -0.37628883123397827,
"loss": 1.4124,
"rewards/accuracies": 0.15625,
"rewards/chosen": -0.8834115862846375,
"rewards/margins": -0.13083389401435852,
"rewards/rejected": -0.7525776624679565,
"step": 3
},
{
"epoch": 0.12427184466019417,
"grad_norm": 4.116491317749023,
"learning_rate": 4.285714285714285e-07,
"logits/chosen": -1.882596731185913,
"logits/rejected": -2.0142886638641357,
"logps/chosen": -0.4188133180141449,
"logps/rejected": -0.36549994349479675,
"loss": 1.3944,
"rewards/accuracies": 0.1875,
"rewards/chosen": -0.8376266360282898,
"rewards/margins": -0.10662679374217987,
"rewards/rejected": -0.7309998273849487,
"step": 4
},
{
"epoch": 0.1553398058252427,
"grad_norm": 4.484996318817139,
"learning_rate": 5.714285714285714e-07,
"logits/chosen": -1.6435747146606445,
"logits/rejected": -1.9209181070327759,
"logps/chosen": -0.41533803939819336,
"logps/rejected": -0.35695987939834595,
"loss": 1.403,
"rewards/accuracies": 0.140625,
"rewards/chosen": -0.8306760787963867,
"rewards/margins": -0.11675624549388885,
"rewards/rejected": -0.7139198184013367,
"step": 5
},
{
"epoch": 0.18640776699029127,
"grad_norm": 4.051513671875,
"learning_rate": 7.142857142857143e-07,
"logits/chosen": -1.8809208869934082,
"logits/rejected": -2.0783591270446777,
"logps/chosen": -0.41764718294143677,
"logps/rejected": -0.36657023429870605,
"loss": 1.3926,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.8352943658828735,
"rewards/margins": -0.10215392708778381,
"rewards/rejected": -0.7331404685974121,
"step": 6
},
{
"epoch": 0.2174757281553398,
"grad_norm": 4.2792534828186035,
"learning_rate": 8.57142857142857e-07,
"logits/chosen": -1.6469590663909912,
"logits/rejected": -2.0544652938842773,
"logps/chosen": -0.4287745952606201,
"logps/rejected": -0.38292643427848816,
"loss": 1.3821,
"rewards/accuracies": 0.140625,
"rewards/chosen": -0.8575491905212402,
"rewards/margins": -0.09169630706310272,
"rewards/rejected": -0.7658529281616211,
"step": 7
},
{
"epoch": 0.24854368932038834,
"grad_norm": 3.5870370864868164,
"learning_rate": 1e-06,
"logits/chosen": -2.1423072814941406,
"logits/rejected": -2.0690231323242188,
"logps/chosen": -0.4060017466545105,
"logps/rejected": -0.3687712550163269,
"loss": 1.3692,
"rewards/accuracies": 0.234375,
"rewards/chosen": -0.8120035529136658,
"rewards/margins": -0.07446099817752838,
"rewards/rejected": -0.7375425100326538,
"step": 8
},
{
"epoch": 0.2796116504854369,
"grad_norm": 4.142773151397705,
"learning_rate": 9.830508474576272e-07,
"logits/chosen": -1.6917753219604492,
"logits/rejected": -2.048977851867676,
"logps/chosen": -0.4064311385154724,
"logps/rejected": -0.35661497712135315,
"loss": 1.388,
"rewards/accuracies": 0.140625,
"rewards/chosen": -0.8128622174263,
"rewards/margins": -0.09963233023881912,
"rewards/rejected": -0.7132298946380615,
"step": 9
},
{
"epoch": 0.3106796116504854,
"grad_norm": 4.133231163024902,
"learning_rate": 9.661016949152542e-07,
"logits/chosen": -1.9124929904937744,
"logits/rejected": -2.001279354095459,
"logps/chosen": -0.40286165475845337,
"logps/rejected": -0.37157756090164185,
"loss": 1.3602,
"rewards/accuracies": 0.1875,
"rewards/chosen": -0.8057233095169067,
"rewards/margins": -0.06256821006536484,
"rewards/rejected": -0.7431551218032837,
"step": 10
},
{
"epoch": 0.341747572815534,
"grad_norm": 3.5376193523406982,
"learning_rate": 9.491525423728813e-07,
"logits/chosen": -1.574399709701538,
"logits/rejected": -1.9749456644058228,
"logps/chosen": -0.38429805636405945,
"logps/rejected": -0.34970927238464355,
"loss": 1.3652,
"rewards/accuracies": 0.234375,
"rewards/chosen": -0.7685961127281189,
"rewards/margins": -0.06917756795883179,
"rewards/rejected": -0.6994185447692871,
"step": 11
},
{
"epoch": 0.37281553398058254,
"grad_norm": 3.29012393951416,
"learning_rate": 9.322033898305083e-07,
"logits/chosen": -2.0206141471862793,
"logits/rejected": -1.8409574031829834,
"logps/chosen": -0.3829426169395447,
"logps/rejected": -0.3571726381778717,
"loss": 1.3518,
"rewards/accuracies": 0.265625,
"rewards/chosen": -0.7658852338790894,
"rewards/margins": -0.05153997987508774,
"rewards/rejected": -0.7143452763557434,
"step": 12
},
{
"epoch": 0.40388349514563104,
"grad_norm": 2.9538028240203857,
"learning_rate": 9.152542372881356e-07,
"logits/chosen": -1.8269423246383667,
"logits/rejected": -1.921961784362793,
"logps/chosen": -0.378287136554718,
"logps/rejected": -0.3551200032234192,
"loss": 1.348,
"rewards/accuracies": 0.28125,
"rewards/chosen": -0.756574273109436,
"rewards/margins": -0.04633423313498497,
"rewards/rejected": -0.7102400064468384,
"step": 13
},
{
"epoch": 0.4349514563106796,
"grad_norm": 2.9866271018981934,
"learning_rate": 8.983050847457627e-07,
"logits/chosen": -1.4229835271835327,
"logits/rejected": -2.0510945320129395,
"logps/chosen": -0.353898286819458,
"logps/rejected": -0.3331609070301056,
"loss": 1.3444,
"rewards/accuracies": 0.265625,
"rewards/chosen": -0.707796573638916,
"rewards/margins": -0.04147477447986603,
"rewards/rejected": -0.6663218140602112,
"step": 14
},
{
"epoch": 0.46601941747572817,
"grad_norm": 2.7256155014038086,
"learning_rate": 8.813559322033897e-07,
"logits/chosen": -1.509905457496643,
"logits/rejected": -1.967405080795288,
"logps/chosen": -0.3514465391635895,
"logps/rejected": -0.3335171341896057,
"loss": 1.3402,
"rewards/accuracies": 0.359375,
"rewards/chosen": -0.702893078327179,
"rewards/margins": -0.035858701914548874,
"rewards/rejected": -0.6670343279838562,
"step": 15
},
{
"epoch": 0.4970873786407767,
"grad_norm": 2.9903502464294434,
"learning_rate": 8.64406779661017e-07,
"logits/chosen": -1.8509142398834229,
"logits/rejected": -1.8535449504852295,
"logps/chosen": -0.3595220148563385,
"logps/rejected": -0.3445945084095001,
"loss": 1.3357,
"rewards/accuracies": 0.390625,
"rewards/chosen": -0.7190440893173218,
"rewards/margins": -0.029855070635676384,
"rewards/rejected": -0.6891890168190002,
"step": 16
},
{
"epoch": 0.5281553398058253,
"grad_norm": 3.1175427436828613,
"learning_rate": 8.47457627118644e-07,
"logits/chosen": -1.441454291343689,
"logits/rejected": -1.8639237880706787,
"logps/chosen": -0.3568180799484253,
"logps/rejected": -0.3417421281337738,
"loss": 1.336,
"rewards/accuracies": 0.375,
"rewards/chosen": -0.7136362195014954,
"rewards/margins": -0.03015192598104477,
"rewards/rejected": -0.6834842562675476,
"step": 17
},
{
"epoch": 0.5592233009708738,
"grad_norm": 3.23822283744812,
"learning_rate": 8.305084745762712e-07,
"logits/chosen": -2.449878692626953,
"logits/rejected": -1.8969470262527466,
"logps/chosen": -0.3514954149723053,
"logps/rejected": -0.3440922498703003,
"loss": 1.3248,
"rewards/accuracies": 0.40625,
"rewards/chosen": -0.7029908895492554,
"rewards/margins": -0.014806347899138927,
"rewards/rejected": -0.6881846189498901,
"step": 18
},
{
"epoch": 0.5902912621359223,
"grad_norm": 3.0648770332336426,
"learning_rate": 8.135593220338983e-07,
"logits/chosen": -0.8830795288085938,
"logits/rejected": -1.7158455848693848,
"logps/chosen": -0.360805481672287,
"logps/rejected": -0.3484087288379669,
"loss": 1.3321,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.7216110229492188,
"rewards/margins": -0.024793537333607674,
"rewards/rejected": -0.6968174576759338,
"step": 19
},
{
"epoch": 0.6213592233009708,
"grad_norm": 3.0875160694122314,
"learning_rate": 7.966101694915253e-07,
"logits/chosen": -1.4594852924346924,
"logits/rejected": -1.7742846012115479,
"logps/chosen": -0.3592742085456848,
"logps/rejected": -0.35010093450546265,
"loss": 1.3272,
"rewards/accuracies": 0.421875,
"rewards/chosen": -0.7185484170913696,
"rewards/margins": -0.018346579745411873,
"rewards/rejected": -0.7002018690109253,
"step": 20
},
{
"epoch": 0.6524271844660194,
"grad_norm": 2.981257438659668,
"learning_rate": 7.796610169491526e-07,
"logits/chosen": -1.7105916738510132,
"logits/rejected": -1.9463882446289062,
"logps/chosen": -0.349058598279953,
"logps/rejected": -0.3355495035648346,
"loss": 1.3336,
"rewards/accuracies": 0.34375,
"rewards/chosen": -0.6981171369552612,
"rewards/margins": -0.027018137276172638,
"rewards/rejected": -0.6710990071296692,
"step": 21
},
{
"epoch": 0.683495145631068,
"grad_norm": 2.7408576011657715,
"learning_rate": 7.627118644067796e-07,
"logits/chosen": -1.390715479850769,
"logits/rejected": -1.7966665029525757,
"logps/chosen": -0.35181212425231934,
"logps/rejected": -0.3509633243083954,
"loss": 1.3151,
"rewards/accuracies": 0.484375,
"rewards/chosen": -0.7036243677139282,
"rewards/margins": -0.0016976958140730858,
"rewards/rejected": -0.7019267082214355,
"step": 22
},
{
"epoch": 0.7145631067961165,
"grad_norm": 2.6456706523895264,
"learning_rate": 7.457627118644067e-07,
"logits/chosen": -1.0340750217437744,
"logits/rejected": -1.785604476928711,
"logps/chosen": -0.32947736978530884,
"logps/rejected": -0.3392854332923889,
"loss": 1.2995,
"rewards/accuracies": 0.546875,
"rewards/chosen": -0.6589547395706177,
"rewards/margins": 0.019616127014160156,
"rewards/rejected": -0.6785709261894226,
"step": 23
},
{
"epoch": 0.7456310679611651,
"grad_norm": 2.7873260974884033,
"learning_rate": 7.288135593220338e-07,
"logits/chosen": -1.7551474571228027,
"logits/rejected": -1.686167597770691,
"logps/chosen": -0.3260516822338104,
"logps/rejected": -0.34417179226875305,
"loss": 1.2874,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.6521033644676208,
"rewards/margins": 0.036240264773368835,
"rewards/rejected": -0.6883435845375061,
"step": 24
},
{
"epoch": 0.7766990291262136,
"grad_norm": 2.6608099937438965,
"learning_rate": 7.11864406779661e-07,
"logits/chosen": -1.7619459629058838,
"logits/rejected": -1.6662724018096924,
"logps/chosen": -0.3297329843044281,
"logps/rejected": -0.34416788816452026,
"loss": 1.2928,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.6594659090042114,
"rewards/margins": 0.028869757428765297,
"rewards/rejected": -0.6883357763290405,
"step": 25
},
{
"epoch": 0.8077669902912621,
"grad_norm": 2.6482815742492676,
"learning_rate": 6.949152542372881e-07,
"logits/chosen": -1.59649658203125,
"logits/rejected": -1.6123666763305664,
"logps/chosen": -0.3259614109992981,
"logps/rejected": -0.3369159400463104,
"loss": 1.2976,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.6519228219985962,
"rewards/margins": 0.021909058094024658,
"rewards/rejected": -0.6738318800926208,
"step": 26
},
{
"epoch": 0.8388349514563107,
"grad_norm": 3.29089617729187,
"learning_rate": 6.779661016949152e-07,
"logits/chosen": -1.3679349422454834,
"logits/rejected": -1.7579234838485718,
"logps/chosen": -0.323432981967926,
"logps/rejected": -0.33978521823883057,
"loss": 1.2901,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.6468659043312073,
"rewards/margins": 0.032704513520002365,
"rewards/rejected": -0.6795704364776611,
"step": 27
},
{
"epoch": 0.8699029126213592,
"grad_norm": 2.870760917663574,
"learning_rate": 6.610169491525423e-07,
"logits/chosen": -2.230008363723755,
"logits/rejected": -1.7593262195587158,
"logps/chosen": -0.318013995885849,
"logps/rejected": -0.3359982967376709,
"loss": 1.2879,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.6360279321670532,
"rewards/margins": 0.03596857190132141,
"rewards/rejected": -0.6719965934753418,
"step": 28
},
{
"epoch": 0.9009708737864077,
"grad_norm": 3.2132599353790283,
"learning_rate": 6.440677966101694e-07,
"logits/chosen": -1.602566123008728,
"logits/rejected": -1.7038823366165161,
"logps/chosen": -0.3213762044906616,
"logps/rejected": -0.34595417976379395,
"loss": 1.2782,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.6427524089813232,
"rewards/margins": 0.04915595054626465,
"rewards/rejected": -0.6919083595275879,
"step": 29
},
{
"epoch": 0.9320388349514563,
"grad_norm": 2.9022674560546875,
"learning_rate": 6.271186440677966e-07,
"logits/chosen": -2.0426878929138184,
"logits/rejected": -1.8543050289154053,
"logps/chosen": -0.31085798144340515,
"logps/rejected": -0.3321978747844696,
"loss": 1.283,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.6217159628868103,
"rewards/margins": 0.04267987981438637,
"rewards/rejected": -0.664395809173584,
"step": 30
},
{
"epoch": 0.9631067961165048,
"grad_norm": 3.107703924179077,
"learning_rate": 6.101694915254237e-07,
"logits/chosen": -1.1401560306549072,
"logits/rejected": -1.7388765811920166,
"logps/chosen": -0.31906136870384216,
"logps/rejected": -0.35285234451293945,
"loss": 1.2653,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.6381226778030396,
"rewards/margins": 0.06758201867341995,
"rewards/rejected": -0.7057047486305237,
"step": 31
},
{
"epoch": 0.9941747572815534,
"grad_norm": 2.563669443130493,
"learning_rate": 5.932203389830508e-07,
"logits/chosen": -1.3182499408721924,
"logits/rejected": -1.7714648246765137,
"logps/chosen": -0.31395667791366577,
"logps/rejected": -0.3352799117565155,
"loss": 1.283,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.6279132962226868,
"rewards/margins": 0.042646490037441254,
"rewards/rejected": -0.670559823513031,
"step": 32
},
{
"epoch": 1.0,
"grad_norm": 0.8014712929725647,
"learning_rate": 5.76271186440678e-07,
"logits/chosen": -1.282576322555542,
"logits/rejected": -1.4453270435333252,
"logps/chosen": -0.3373573422431946,
"logps/rejected": -0.3649425506591797,
"loss": 0.2388,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6747146844863892,
"rewards/margins": 0.05517040938138962,
"rewards/rejected": -0.7298850417137146,
"step": 33
},
{
"epoch": 1.0310679611650486,
"grad_norm": 2.6547462940216064,
"learning_rate": 5.59322033898305e-07,
"logits/chosen": -1.7090933322906494,
"logits/rejected": -1.6342582702636719,
"logps/chosen": -0.3321191072463989,
"logps/rejected": -0.35813525319099426,
"loss": 1.2762,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.6642382144927979,
"rewards/margins": 0.05203229561448097,
"rewards/rejected": -0.7162705063819885,
"step": 34
},
{
"epoch": 1.062135922330097,
"grad_norm": 2.929637908935547,
"learning_rate": 5.423728813559322e-07,
"logits/chosen": -1.289225697517395,
"logits/rejected": -1.8013920783996582,
"logps/chosen": -0.3155551552772522,
"logps/rejected": -0.3497050404548645,
"loss": 1.2647,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6311103105545044,
"rewards/margins": 0.06829972565174103,
"rewards/rejected": -0.6994100213050842,
"step": 35
},
{
"epoch": 1.0932038834951456,
"grad_norm": 2.994192600250244,
"learning_rate": 5.254237288135593e-07,
"logits/chosen": -1.562676191329956,
"logits/rejected": -1.5841116905212402,
"logps/chosen": -0.3231390118598938,
"logps/rejected": -0.35967499017715454,
"loss": 1.2613,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.6462780237197876,
"rewards/margins": 0.07307194173336029,
"rewards/rejected": -0.7193499207496643,
"step": 36
},
{
"epoch": 1.1242718446601943,
"grad_norm": 3.348798990249634,
"learning_rate": 5.084745762711864e-07,
"logits/chosen": -1.6281405687332153,
"logits/rejected": -1.634437084197998,
"logps/chosen": -0.3220047950744629,
"logps/rejected": -0.35870927572250366,
"loss": 1.2613,
"rewards/accuracies": 0.71875,
"rewards/chosen": -0.6440095901489258,
"rewards/margins": 0.07340894639492035,
"rewards/rejected": -0.7174185514450073,
"step": 37
},
{
"epoch": 1.1553398058252426,
"grad_norm": 3.086836576461792,
"learning_rate": 4.915254237288136e-07,
"logits/chosen": -1.4916963577270508,
"logits/rejected": -1.7688562870025635,
"logps/chosen": -0.30526435375213623,
"logps/rejected": -0.353664755821228,
"loss": 1.245,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.6105287075042725,
"rewards/margins": 0.0968008041381836,
"rewards/rejected": -0.707329511642456,
"step": 38
},
{
"epoch": 1.1864077669902913,
"grad_norm": 2.953174352645874,
"learning_rate": 4.7457627118644066e-07,
"logits/chosen": -2.1249518394470215,
"logits/rejected": -1.8521509170532227,
"logps/chosen": -0.3190627992153168,
"logps/rejected": -0.35821837186813354,
"loss": 1.2579,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.6381255984306335,
"rewards/margins": 0.07831110060214996,
"rewards/rejected": -0.7164367437362671,
"step": 39
},
{
"epoch": 1.2174757281553399,
"grad_norm": 2.8506457805633545,
"learning_rate": 4.576271186440678e-07,
"logits/chosen": -1.2953447103500366,
"logits/rejected": -1.6804742813110352,
"logps/chosen": -0.329822838306427,
"logps/rejected": -0.3734983801841736,
"loss": 1.2516,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6596456170082092,
"rewards/margins": 0.08735115081071854,
"rewards/rejected": -0.7469968199729919,
"step": 40
},
{
"epoch": 1.2485436893203883,
"grad_norm": 2.7674953937530518,
"learning_rate": 4.4067796610169486e-07,
"logits/chosen": -2.0186150074005127,
"logits/rejected": -1.836294174194336,
"logps/chosen": -0.315221905708313,
"logps/rejected": -0.34855592250823975,
"loss": 1.266,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.630443811416626,
"rewards/margins": 0.0666680634021759,
"rewards/rejected": -0.6971118450164795,
"step": 41
},
{
"epoch": 1.279611650485437,
"grad_norm": 2.549076795578003,
"learning_rate": 4.23728813559322e-07,
"logits/chosen": -1.6427559852600098,
"logits/rejected": -1.9293004274368286,
"logps/chosen": -0.31294625997543335,
"logps/rejected": -0.34532809257507324,
"loss": 1.2677,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.6258925199508667,
"rewards/margins": 0.0647636353969574,
"rewards/rejected": -0.6906561255455017,
"step": 42
},
{
"epoch": 1.3106796116504853,
"grad_norm": 3.1989340782165527,
"learning_rate": 4.0677966101694916e-07,
"logits/chosen": -0.44606664776802063,
"logits/rejected": -1.8360792398452759,
"logps/chosen": -0.31408655643463135,
"logps/rejected": -0.3622249960899353,
"loss": 1.2452,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.6281731128692627,
"rewards/margins": 0.09627692401409149,
"rewards/rejected": -0.7244499921798706,
"step": 43
},
{
"epoch": 1.341747572815534,
"grad_norm": 3.1109275817871094,
"learning_rate": 3.898305084745763e-07,
"logits/chosen": -1.2755181789398193,
"logits/rejected": -1.837625503540039,
"logps/chosen": -0.31347301602363586,
"logps/rejected": -0.36295440793037415,
"loss": 1.2431,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.626945972442627,
"rewards/margins": 0.09896278381347656,
"rewards/rejected": -0.7259088754653931,
"step": 44
},
{
"epoch": 1.3728155339805825,
"grad_norm": 3.2324881553649902,
"learning_rate": 3.7288135593220336e-07,
"logits/chosen": -1.336294174194336,
"logits/rejected": -1.8953676223754883,
"logps/chosen": -0.3207179307937622,
"logps/rejected": -0.3695950508117676,
"loss": 1.244,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.6414358019828796,
"rewards/margins": 0.09775425493717194,
"rewards/rejected": -0.7391901016235352,
"step": 45
},
{
"epoch": 1.4038834951456312,
"grad_norm": 3.4072141647338867,
"learning_rate": 3.559322033898305e-07,
"logits/chosen": -1.3765511512756348,
"logits/rejected": -1.7508171796798706,
"logps/chosen": -0.3291308879852295,
"logps/rejected": -0.3830956518650055,
"loss": 1.237,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.658261775970459,
"rewards/margins": 0.10792950540781021,
"rewards/rejected": -0.766191303730011,
"step": 46
},
{
"epoch": 1.4349514563106796,
"grad_norm": 3.158661127090454,
"learning_rate": 3.389830508474576e-07,
"logits/chosen": -1.9516403675079346,
"logits/rejected": -1.741844654083252,
"logps/chosen": -0.33282211422920227,
"logps/rejected": -0.37869203090667725,
"loss": 1.2486,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6656442284584045,
"rewards/margins": 0.09173984825611115,
"rewards/rejected": -0.7573840618133545,
"step": 47
},
{
"epoch": 1.4660194174757282,
"grad_norm": 2.9642040729522705,
"learning_rate": 3.220338983050847e-07,
"logits/chosen": -1.4430270195007324,
"logits/rejected": -1.8442091941833496,
"logps/chosen": -0.3115312457084656,
"logps/rejected": -0.3656957149505615,
"loss": 1.2367,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.6230624914169312,
"rewards/margins": 0.10832884907722473,
"rewards/rejected": -0.7313913702964783,
"step": 48
},
{
"epoch": 1.4970873786407766,
"grad_norm": 3.461207628250122,
"learning_rate": 3.0508474576271186e-07,
"logits/chosen": -2.283356189727783,
"logits/rejected": -1.781760573387146,
"logps/chosen": -0.33550429344177246,
"logps/rejected": -0.39154571294784546,
"loss": 1.2353,
"rewards/accuracies": 0.734375,
"rewards/chosen": -0.6710086464881897,
"rewards/margins": 0.1120828241109848,
"rewards/rejected": -0.7830914258956909,
"step": 49
},
{
"epoch": 1.5281553398058252,
"grad_norm": 3.681100845336914,
"learning_rate": 2.88135593220339e-07,
"logits/chosen": -1.8599827289581299,
"logits/rejected": -1.890540599822998,
"logps/chosen": -0.32184410095214844,
"logps/rejected": -0.39129096269607544,
"loss": 1.2159,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.6436882019042969,
"rewards/margins": 0.1388937532901764,
"rewards/rejected": -0.7825819253921509,
"step": 50
},
{
"epoch": 1.5592233009708738,
"grad_norm": 3.253593921661377,
"learning_rate": 2.711864406779661e-07,
"logits/chosen": -1.5855774879455566,
"logits/rejected": -1.7609983682632446,
"logps/chosen": -0.3321957588195801,
"logps/rejected": -0.3847588002681732,
"loss": 1.2399,
"rewards/accuracies": 0.78125,
"rewards/chosen": -0.6643915176391602,
"rewards/margins": 0.10512607544660568,
"rewards/rejected": -0.7695176005363464,
"step": 51
},
{
"epoch": 1.5902912621359224,
"grad_norm": 3.7317774295806885,
"learning_rate": 2.542372881355932e-07,
"logits/chosen": -0.956875205039978,
"logits/rejected": -1.6611835956573486,
"logps/chosen": -0.3262099623680115,
"logps/rejected": -0.40217769145965576,
"loss": 1.2076,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.652419924736023,
"rewards/margins": 0.15193548798561096,
"rewards/rejected": -0.8043554425239563,
"step": 52
},
{
"epoch": 1.6213592233009708,
"grad_norm": 3.586879253387451,
"learning_rate": 2.3728813559322033e-07,
"logits/chosen": -2.1548349857330322,
"logits/rejected": -1.7199804782867432,
"logps/chosen": -0.34391993284225464,
"logps/rejected": -0.40578746795654297,
"loss": 1.2262,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.6878398656845093,
"rewards/margins": 0.12373502552509308,
"rewards/rejected": -0.8115749359130859,
"step": 53
},
{
"epoch": 1.6524271844660194,
"grad_norm": 3.669318437576294,
"learning_rate": 2.2033898305084743e-07,
"logits/chosen": -2.271226406097412,
"logits/rejected": -2.0481715202331543,
"logps/chosen": -0.3331441879272461,
"logps/rejected": -0.38672688603401184,
"loss": 1.2378,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.6662883758544922,
"rewards/margins": 0.10716544091701508,
"rewards/rejected": -0.7734538316726685,
"step": 54
},
{
"epoch": 1.6834951456310678,
"grad_norm": 3.6836650371551514,
"learning_rate": 2.0338983050847458e-07,
"logits/chosen": -2.120683193206787,
"logits/rejected": -1.630366563796997,
"logps/chosen": -0.3574049174785614,
"logps/rejected": -0.43239909410476685,
"loss": 1.2089,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.714809775352478,
"rewards/margins": 0.14998838305473328,
"rewards/rejected": -0.8647981882095337,
"step": 55
},
{
"epoch": 1.7145631067961165,
"grad_norm": 3.2008886337280273,
"learning_rate": 1.8644067796610168e-07,
"logits/chosen": -1.9383658170700073,
"logits/rejected": -1.9295272827148438,
"logps/chosen": -0.3346194922924042,
"logps/rejected": -0.384814590215683,
"loss": 1.2424,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.6692389845848083,
"rewards/margins": 0.10039019584655762,
"rewards/rejected": -0.769629180431366,
"step": 56
},
{
"epoch": 1.745631067961165,
"grad_norm": 4.206936359405518,
"learning_rate": 1.694915254237288e-07,
"logits/chosen": -1.215062141418457,
"logits/rejected": -1.7683817148208618,
"logps/chosen": -0.34487807750701904,
"logps/rejected": -0.43873488903045654,
"loss": 1.1825,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.6897561550140381,
"rewards/margins": 0.1877136081457138,
"rewards/rejected": -0.8774697780609131,
"step": 57
},
{
"epoch": 1.7766990291262137,
"grad_norm": 3.9559454917907715,
"learning_rate": 1.5254237288135593e-07,
"logits/chosen": -1.3999274969100952,
"logits/rejected": -1.713794231414795,
"logps/chosen": -0.37690550088882446,
"logps/rejected": -0.4559113383293152,
"loss": 1.2033,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.7538109421730042,
"rewards/margins": 0.15801168978214264,
"rewards/rejected": -0.9118226766586304,
"step": 58
},
{
"epoch": 1.807766990291262,
"grad_norm": 4.222906112670898,
"learning_rate": 1.3559322033898305e-07,
"logits/chosen": -2.0715479850769043,
"logits/rejected": -1.6846166849136353,
"logps/chosen": -0.3247872591018677,
"logps/rejected": -0.4152475893497467,
"loss": 1.1888,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.6495745182037354,
"rewards/margins": 0.18092064559459686,
"rewards/rejected": -0.8304951786994934,
"step": 59
},
{
"epoch": 1.8388349514563107,
"grad_norm": 3.675178289413452,
"learning_rate": 1.1864406779661017e-07,
"logits/chosen": -1.9317084550857544,
"logits/rejected": -1.8160873651504517,
"logps/chosen": -0.36124035716056824,
"logps/rejected": -0.44001373648643494,
"loss": 1.2037,
"rewards/accuracies": 0.859375,
"rewards/chosen": -0.7224806547164917,
"rewards/margins": 0.15754681825637817,
"rewards/rejected": -0.8800274729728699,
"step": 60
},
{
"epoch": 1.869902912621359,
"grad_norm": 3.314823627471924,
"learning_rate": 1.0169491525423729e-07,
"logits/chosen": -2.0573697090148926,
"logits/rejected": -1.8544249534606934,
"logps/chosen": -0.34472307562828064,
"logps/rejected": -0.4054674506187439,
"loss": 1.2287,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.6894460916519165,
"rewards/margins": 0.12148873507976532,
"rewards/rejected": -0.8109349012374878,
"step": 61
},
{
"epoch": 1.9009708737864077,
"grad_norm": 3.7633745670318604,
"learning_rate": 8.47457627118644e-08,
"logits/chosen": -1.8335437774658203,
"logits/rejected": -1.7075271606445312,
"logps/chosen": -0.36628228425979614,
"logps/rejected": -0.44996723532676697,
"loss": 1.1967,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.7325646281242371,
"rewards/margins": 0.16736987233161926,
"rewards/rejected": -0.8999344110488892,
"step": 62
},
{
"epoch": 1.9320388349514563,
"grad_norm": 4.369020938873291,
"learning_rate": 6.779661016949153e-08,
"logits/chosen": -2.20133900642395,
"logits/rejected": -1.7921724319458008,
"logps/chosen": -0.3454199433326721,
"logps/rejected": -0.43609410524368286,
"loss": 1.1872,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.6908398866653442,
"rewards/margins": 0.1813482791185379,
"rewards/rejected": -0.8721882104873657,
"step": 63
},
{
"epoch": 1.963106796116505,
"grad_norm": 3.21553373336792,
"learning_rate": 5.0847457627118645e-08,
"logits/chosen": -1.8866002559661865,
"logits/rejected": -1.80937659740448,
"logps/chosen": -0.33643844723701477,
"logps/rejected": -0.3921111226081848,
"loss": 1.2359,
"rewards/accuracies": 0.765625,
"rewards/chosen": -0.6728769540786743,
"rewards/margins": 0.11134527623653412,
"rewards/rejected": -0.7842222452163696,
"step": 64
},
{
"epoch": 1.9941747572815534,
"grad_norm": 3.8623664379119873,
"learning_rate": 3.3898305084745764e-08,
"logits/chosen": -1.909616231918335,
"logits/rejected": -1.7134193181991577,
"logps/chosen": -0.3720588684082031,
"logps/rejected": -0.45723646879196167,
"loss": 1.1957,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.7441176772117615,
"rewards/margins": 0.17035523056983948,
"rewards/rejected": -0.9144729375839233,
"step": 65
},
{
"epoch": 2.0,
"grad_norm": 1.0719321966171265,
"learning_rate": 1.6949152542372882e-08,
"logits/chosen": -2.4218411445617676,
"logits/rejected": -1.844456672668457,
"logps/chosen": -0.37341463565826416,
"logps/rejected": -0.42600566148757935,
"loss": 0.2323,
"rewards/accuracies": 0.8333333730697632,
"rewards/chosen": -0.7468292713165283,
"rewards/margins": 0.10518200695514679,
"rewards/rejected": -0.8520113229751587,
"step": 66
},
{
"epoch": 2.0,
"step": 66,
"total_flos": 24916491141120.0,
"train_loss": 1.2533311103329514,
"train_runtime": 1478.6034,
"train_samples_per_second": 2.785,
"train_steps_per_second": 0.045
}
],
"logging_steps": 1,
"max_steps": 66,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 24916491141120.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}