my-text-model-finetuned / checkpoints /trainer_state.json
aitask1024's picture
Upload task output Pendrokar/TTS_Arena
1d16fd0 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9933065595716197,
"eval_steps": 500,
"global_step": 1119,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.013386880856760375,
"grad_norm": 48.32637023925781,
"learning_rate": 1.3879733999999997e-06,
"logits/chosen": -2.7358782291412354,
"logits/rejected": -2.8863089084625244,
"logps/chosen": -154.43099975585938,
"logps/rejected": -76.9035873413086,
"loss": 0.6782,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.03575515002012253,
"rewards/margins": 0.0314163900911808,
"rewards/rejected": 0.004338760394603014,
"step": 5
},
{
"epoch": 0.02677376171352075,
"grad_norm": 18.94371223449707,
"learning_rate": 3.122940149999999e-06,
"logits/chosen": -2.7054200172424316,
"logits/rejected": -2.8710813522338867,
"logps/chosen": -155.5043182373047,
"logps/rejected": -79.5620346069336,
"loss": 0.4295,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": 0.7799822688102722,
"rewards/margins": 0.7580081224441528,
"rewards/rejected": 0.021974176168441772,
"step": 10
},
{
"epoch": 0.040160642570281124,
"grad_norm": 5.264588356018066,
"learning_rate": 4.8579069e-06,
"logits/chosen": -2.6499576568603516,
"logits/rejected": -2.841108798980713,
"logps/chosen": -137.66973876953125,
"logps/rejected": -80.86649322509766,
"loss": 0.0953,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.818009376525879,
"rewards/margins": 3.042386770248413,
"rewards/rejected": -0.22437739372253418,
"step": 15
},
{
"epoch": 0.0535475234270415,
"grad_norm": 1.9624146223068237,
"learning_rate": 6.592873649999998e-06,
"logits/chosen": -2.5176243782043457,
"logits/rejected": -2.8056235313415527,
"logps/chosen": -112.12801361083984,
"logps/rejected": -94.06233215332031,
"loss": 0.0214,
"rewards/accuracies": 1.0,
"rewards/chosen": 5.590962886810303,
"rewards/margins": 6.781089782714844,
"rewards/rejected": -1.1901264190673828,
"step": 20
},
{
"epoch": 0.06693440428380187,
"grad_norm": 0.09991835057735443,
"learning_rate": 8.3278404e-06,
"logits/chosen": -2.3332083225250244,
"logits/rejected": -2.6696295738220215,
"logps/chosen": -97.1298828125,
"logps/rejected": -108.11856842041016,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.456403732299805,
"rewards/margins": 11.336581230163574,
"rewards/rejected": -2.880176544189453,
"step": 25
},
{
"epoch": 0.08032128514056225,
"grad_norm": 0.006451677531003952,
"learning_rate": 1.006280715e-05,
"logits/chosen": -2.206895589828491,
"logits/rejected": -2.462498426437378,
"logps/chosen": -80.75566101074219,
"logps/rejected": -129.00411987304688,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.789403915405273,
"rewards/margins": 13.747503280639648,
"rewards/rejected": -4.958100318908691,
"step": 30
},
{
"epoch": 0.09370816599732262,
"grad_norm": 0.0004014262813143432,
"learning_rate": 1.1797773899999998e-05,
"logits/chosen": -2.10058331489563,
"logits/rejected": -2.342927932739258,
"logps/chosen": -79.42524719238281,
"logps/rejected": -143.72073364257812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.209558486938477,
"rewards/margins": 15.470553398132324,
"rewards/rejected": -6.260995864868164,
"step": 35
},
{
"epoch": 0.107095046854083,
"grad_norm": 0.00023443216923624277,
"learning_rate": 1.2144461232143962e-05,
"logits/chosen": -2.0052661895751953,
"logits/rejected": -2.1909799575805664,
"logps/chosen": -68.39270782470703,
"logps/rejected": -153.67198181152344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.552885055541992,
"rewards/margins": 17.000492095947266,
"rewards/rejected": -7.447608947753906,
"step": 40
},
{
"epoch": 0.12048192771084337,
"grad_norm": 0.009947865270078182,
"learning_rate": 1.214321810508581e-05,
"logits/chosen": -1.9368336200714111,
"logits/rejected": -2.0944368839263916,
"logps/chosen": -77.8448486328125,
"logps/rejected": -161.29324340820312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.328283309936523,
"rewards/margins": 18.502532958984375,
"rewards/rejected": -8.174247741699219,
"step": 45
},
{
"epoch": 0.13386880856760375,
"grad_norm": 0.0006397409015335143,
"learning_rate": 1.2141019003537938e-05,
"logits/chosen": -1.9432464838027954,
"logits/rejected": -1.997998833656311,
"logps/chosen": -67.01065063476562,
"logps/rejected": -168.06625366210938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.874837875366211,
"rewards/margins": 18.738964080810547,
"rewards/rejected": -8.864126205444336,
"step": 50
},
{
"epoch": 0.14725568942436412,
"grad_norm": 3.562494021025486e-05,
"learning_rate": 1.2137864389263077e-05,
"logits/chosen": -1.8939683437347412,
"logits/rejected": -1.9639301300048828,
"logps/chosen": -69.45832061767578,
"logps/rejected": -175.5773162841797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.973077774047852,
"rewards/margins": 19.39337921142578,
"rewards/rejected": -9.420300483703613,
"step": 55
},
{
"epoch": 0.1606425702811245,
"grad_norm": 0.00018317776266485453,
"learning_rate": 1.213375492466051e-05,
"logits/chosen": -1.8978850841522217,
"logits/rejected": -1.9658511877059937,
"logps/chosen": -61.8738899230957,
"logps/rejected": -180.20217895507812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.461588859558105,
"rewards/margins": 19.28191375732422,
"rewards/rejected": -9.82032585144043,
"step": 60
},
{
"epoch": 0.17402945113788487,
"grad_norm": 0.0010487588588148355,
"learning_rate": 1.2128691472626986e-05,
"logits/chosen": -1.9120140075683594,
"logits/rejected": -1.9368822574615479,
"logps/chosen": -61.631805419921875,
"logps/rejected": -174.88168334960938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.515580177307129,
"rewards/margins": 19.191221237182617,
"rewards/rejected": -9.675638198852539,
"step": 65
},
{
"epoch": 0.18741633199464525,
"grad_norm": 0.0005829242873005569,
"learning_rate": 1.2122675096375539e-05,
"logits/chosen": -1.8962472677230835,
"logits/rejected": -1.8821055889129639,
"logps/chosen": -67.1492691040039,
"logps/rejected": -182.51010131835938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.898228645324707,
"rewards/margins": 20.0985107421875,
"rewards/rejected": -10.200281143188477,
"step": 70
},
{
"epoch": 0.20080321285140562,
"grad_norm": 0.0001830089750001207,
"learning_rate": 1.2115707059212225e-05,
"logits/chosen": -1.9109745025634766,
"logits/rejected": -1.8918424844741821,
"logps/chosen": -60.247169494628906,
"logps/rejected": -170.59629821777344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.591668128967285,
"rewards/margins": 19.038829803466797,
"rewards/rejected": -9.447163581848145,
"step": 75
},
{
"epoch": 0.214190093708166,
"grad_norm": 3.0526671253028326e-06,
"learning_rate": 1.2107788824270861e-05,
"logits/chosen": -1.9272515773773193,
"logits/rejected": -1.94720458984375,
"logps/chosen": -58.57343673706055,
"logps/rejected": -181.34176635742188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.232949256896973,
"rewards/margins": 19.28238296508789,
"rewards/rejected": -10.049432754516602,
"step": 80
},
{
"epoch": 0.22757697456492637,
"grad_norm": 0.0006005926989018917,
"learning_rate": 1.2098922054205801e-05,
"logits/chosen": -1.9209476709365845,
"logits/rejected": -1.8958288431167603,
"logps/chosen": -58.07838821411133,
"logps/rejected": -187.4013671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 8.958385467529297,
"rewards/margins": 19.543930053710938,
"rewards/rejected": -10.585546493530273,
"step": 85
},
{
"epoch": 0.24096385542168675,
"grad_norm": 1.353817879135022e-05,
"learning_rate": 1.208910861084281e-05,
"logits/chosen": -1.8882849216461182,
"logits/rejected": -1.8732059001922607,
"logps/chosen": -70.58480834960938,
"logps/rejected": -178.2596435546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.439764022827148,
"rewards/margins": 20.539878845214844,
"rewards/rejected": -10.100113868713379,
"step": 90
},
{
"epoch": 0.2543507362784471,
"grad_norm": 0.00023403888917528093,
"learning_rate": 1.207835055478813e-05,
"logits/chosen": -1.8713138103485107,
"logits/rejected": -1.8445053100585938,
"logps/chosen": -72.9098892211914,
"logps/rejected": -176.0099334716797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.652149200439453,
"rewards/margins": 20.62753677368164,
"rewards/rejected": -9.975388526916504,
"step": 95
},
{
"epoch": 0.2677376171352075,
"grad_norm": 0.0002160475414711982,
"learning_rate": 1.2066650144995788e-05,
"logits/chosen": -1.8813066482543945,
"logits/rejected": -1.842013955116272,
"logps/chosen": -63.20849609375,
"logps/rejected": -184.35964965820312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.75583553314209,
"rewards/margins": 20.272876739501953,
"rewards/rejected": -10.517043113708496,
"step": 100
},
{
"epoch": 0.28112449799196787,
"grad_norm": 1.5363968486781232e-05,
"learning_rate": 1.2054009838293278e-05,
"logits/chosen": -1.9106378555297852,
"logits/rejected": -1.8844468593597412,
"logps/chosen": -57.907188415527344,
"logps/rejected": -182.01736450195312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.319896697998047,
"rewards/margins": 19.627681732177734,
"rewards/rejected": -10.307784080505371,
"step": 105
},
{
"epoch": 0.29451137884872824,
"grad_norm": 0.00032003922387957573,
"learning_rate": 1.2040432288865665e-05,
"logits/chosen": -1.8974215984344482,
"logits/rejected": -1.914280891418457,
"logps/chosen": -61.646949768066406,
"logps/rejected": -186.77334594726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.848616600036621,
"rewards/margins": 20.335248947143555,
"rewards/rejected": -10.486631393432617,
"step": 110
},
{
"epoch": 0.3078982597054886,
"grad_norm": 2.4666236640769057e-05,
"learning_rate": 1.2025920347698281e-05,
"logits/chosen": -1.8672893047332764,
"logits/rejected": -1.8574968576431274,
"logps/chosen": -62.46479034423828,
"logps/rejected": -183.5287628173828,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.873312950134277,
"rewards/margins": 20.34807777404785,
"rewards/rejected": -10.474763870239258,
"step": 115
},
{
"epoch": 0.321285140562249,
"grad_norm": 7.831333641661331e-05,
"learning_rate": 1.2010477061978072e-05,
"logits/chosen": -1.8667519092559814,
"logits/rejected": -1.8458188772201538,
"logps/chosen": -67.73392486572266,
"logps/rejected": -190.04678344726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.298925399780273,
"rewards/margins": 21.361431121826172,
"rewards/rejected": -11.062504768371582,
"step": 120
},
{
"epoch": 0.33467202141900937,
"grad_norm": 0.0011632780078798532,
"learning_rate": 1.1994105674453762e-05,
"logits/chosen": -1.906557321548462,
"logits/rejected": -1.8378311395645142,
"logps/chosen": -59.9715690612793,
"logps/rejected": -187.6858673095703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.155950546264648,
"rewards/margins": 20.995595932006836,
"rewards/rejected": -10.839643478393555,
"step": 125
},
{
"epoch": 0.34805890227576974,
"grad_norm": 2.287779534526635e-05,
"learning_rate": 1.1976809622754933e-05,
"logits/chosen": -1.8965215682983398,
"logits/rejected": -1.9305957555770874,
"logps/chosen": -60.8524284362793,
"logps/rejected": -182.12356567382812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.748926162719727,
"rewards/margins": 20.00579261779785,
"rewards/rejected": -10.256868362426758,
"step": 130
},
{
"epoch": 0.3614457831325301,
"grad_norm": 0.00024706361000426114,
"learning_rate": 1.1958592538670224e-05,
"logits/chosen": -1.8761208057403564,
"logits/rejected": -1.821118950843811,
"logps/chosen": -58.57818603515625,
"logps/rejected": -191.0096435546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.970569610595703,
"rewards/margins": 20.901823043823242,
"rewards/rejected": -10.931253433227539,
"step": 135
},
{
"epoch": 0.3748326639892905,
"grad_norm": 2.4137092623277567e-05,
"learning_rate": 1.1939458247384714e-05,
"logits/chosen": -1.878674864768982,
"logits/rejected": -1.8472044467926025,
"logps/chosen": -62.983184814453125,
"logps/rejected": -192.6395263671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.132028579711914,
"rewards/margins": 21.37057113647461,
"rewards/rejected": -11.238546371459961,
"step": 140
},
{
"epoch": 0.38821954484605087,
"grad_norm": 0.00031987050897441804,
"learning_rate": 1.191941076667672e-05,
"logits/chosen": -1.8975257873535156,
"logits/rejected": -1.8439594507217407,
"logps/chosen": -62.376380920410156,
"logps/rejected": -178.85665893554688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.485511779785156,
"rewards/margins": 19.73735809326172,
"rewards/rejected": -10.251847267150879,
"step": 145
},
{
"epoch": 0.40160642570281124,
"grad_norm": 3.340610419400036e-05,
"learning_rate": 1.1898454306074163e-05,
"logits/chosen": -1.8920434713363647,
"logits/rejected": -1.849535346031189,
"logps/chosen": -63.710914611816406,
"logps/rejected": -193.69337463378906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.413044929504395,
"rewards/margins": 20.71396255493164,
"rewards/rejected": -11.300919532775879,
"step": 150
},
{
"epoch": 0.4149933065595716,
"grad_norm": 0.00011380790965631604,
"learning_rate": 1.187659326597066e-05,
"logits/chosen": -1.8865476846694946,
"logits/rejected": -1.8176262378692627,
"logps/chosen": -68.44486999511719,
"logps/rejected": -200.52450561523438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.273351669311523,
"rewards/margins": 22.084421157836914,
"rewards/rejected": -11.811070442199707,
"step": 155
},
{
"epoch": 0.428380187416332,
"grad_norm": 4.0704333514440805e-05,
"learning_rate": 1.185383223670152e-05,
"logits/chosen": -1.8735504150390625,
"logits/rejected": -1.8823959827423096,
"logps/chosen": -57.306846618652344,
"logps/rejected": -183.9485626220703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.857657432556152,
"rewards/margins": 20.344463348388672,
"rewards/rejected": -10.486806869506836,
"step": 160
},
{
"epoch": 0.44176706827309237,
"grad_norm": 3.696778730954975e-05,
"learning_rate": 1.1830175997579895e-05,
"logits/chosen": -1.8666346073150635,
"logits/rejected": -1.8352988958358765,
"logps/chosen": -65.62974548339844,
"logps/rejected": -186.6559295654297,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.239971160888672,
"rewards/margins": 21.187232971191406,
"rewards/rejected": -10.947261810302734,
"step": 165
},
{
"epoch": 0.45515394912985274,
"grad_norm": 2.024579771386925e-05,
"learning_rate": 1.1805629515893225e-05,
"logits/chosen": -1.8567460775375366,
"logits/rejected": -1.7982889413833618,
"logps/chosen": -63.90752029418945,
"logps/rejected": -192.54873657226562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.393075942993164,
"rewards/margins": 21.65923500061035,
"rewards/rejected": -11.266157150268555,
"step": 170
},
{
"epoch": 0.4685408299866131,
"grad_norm": 0.0002714527945499867,
"learning_rate": 1.1780197945860211e-05,
"logits/chosen": -1.8908030986785889,
"logits/rejected": -1.8585189580917358,
"logps/chosen": -58.50342559814453,
"logps/rejected": -189.9221649169922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.500165939331055,
"rewards/margins": 20.399356842041016,
"rewards/rejected": -10.899189949035645,
"step": 175
},
{
"epoch": 0.4819277108433735,
"grad_norm": 0.0004148809239268303,
"learning_rate": 1.1753886627548548e-05,
"logits/chosen": -1.8859355449676514,
"logits/rejected": -1.8143894672393799,
"logps/chosen": -66.53254699707031,
"logps/rejected": -196.88986206054688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.707757949829102,
"rewards/margins": 21.089689254760742,
"rewards/rejected": -11.381932258605957,
"step": 180
},
{
"epoch": 0.49531459170013387,
"grad_norm": 4.077264020452276e-05,
"learning_rate": 1.172670108575363e-05,
"logits/chosen": -1.88116455078125,
"logits/rejected": -1.8810043334960938,
"logps/chosen": -75.669189453125,
"logps/rejected": -184.03500366210938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.555752754211426,
"rewards/margins": 21.035213470458984,
"rewards/rejected": -10.479463577270508,
"step": 185
},
{
"epoch": 0.5087014725568942,
"grad_norm": 0.00022642931435257196,
"learning_rate": 1.1698647028838462e-05,
"logits/chosen": -1.8791462182998657,
"logits/rejected": -1.8246482610702515,
"logps/chosen": -58.291839599609375,
"logps/rejected": -193.2161102294922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.682637214660645,
"rewards/margins": 20.842124938964844,
"rewards/rejected": -11.159486770629883,
"step": 190
},
{
"epoch": 0.5220883534136547,
"grad_norm": 0.00010173048212891445,
"learning_rate": 1.166973034753503e-05,
"logits/chosen": -1.9097864627838135,
"logits/rejected": -1.8443893194198608,
"logps/chosen": -54.96189498901367,
"logps/rejected": -183.68154907226562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.263029098510742,
"rewards/margins": 19.78293228149414,
"rewards/rejected": -10.519901275634766,
"step": 195
},
{
"epoch": 0.535475234270415,
"grad_norm": 3.119400207651779e-05,
"learning_rate": 1.1639957113707378e-05,
"logits/chosen": -1.8835645914077759,
"logits/rejected": -1.8237943649291992,
"logps/chosen": -66.83441162109375,
"logps/rejected": -185.5270538330078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.838064193725586,
"rewards/margins": 20.68081283569336,
"rewards/rejected": -10.842748641967773,
"step": 200
},
{
"epoch": 0.5488621151271754,
"grad_norm": 2.8754557206411846e-05,
"learning_rate": 1.1609333579076652e-05,
"logits/chosen": -1.8577368259429932,
"logits/rejected": -1.8328752517700195,
"logps/chosen": -66.59144592285156,
"logps/rejected": -180.5541534423828,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.479964256286621,
"rewards/margins": 21.016925811767578,
"rewards/rejected": -10.536964416503906,
"step": 205
},
{
"epoch": 0.5622489959839357,
"grad_norm": 0.00011976935638813302,
"learning_rate": 1.157786617390838e-05,
"logits/chosen": -1.9020277261734009,
"logits/rejected": -1.870615005493164,
"logps/chosen": -55.10723114013672,
"logps/rejected": -188.2975311279297,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.497312545776367,
"rewards/margins": 20.4838924407959,
"rewards/rejected": -10.986580848693848,
"step": 210
},
{
"epoch": 0.5756358768406962,
"grad_norm": 0.0002118870906997472,
"learning_rate": 1.1545561505662249e-05,
"logits/chosen": -1.876802682876587,
"logits/rejected": -1.8307838439941406,
"logps/chosen": -69.99948120117188,
"logps/rejected": -189.57418823242188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.201910018920898,
"rewards/margins": 21.337665557861328,
"rewards/rejected": -11.13575553894043,
"step": 215
},
{
"epoch": 0.5890227576974565,
"grad_norm": 8.499900286551565e-05,
"learning_rate": 1.1512426357604687e-05,
"logits/chosen": -1.8773601055145264,
"logits/rejected": -1.8962678909301758,
"logps/chosen": -68.12947082519531,
"logps/rejected": -185.85067749023438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.250208854675293,
"rewards/margins": 20.985477447509766,
"rewards/rejected": -10.735268592834473,
"step": 220
},
{
"epoch": 0.6024096385542169,
"grad_norm": 3.3592546969885007e-05,
"learning_rate": 1.147846768738454e-05,
"logits/chosen": -1.9119741916656494,
"logits/rejected": -1.8050686120986938,
"logps/chosen": -55.8441276550293,
"logps/rejected": -185.8286590576172,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.300341606140137,
"rewards/margins": 20.14506721496582,
"rewards/rejected": -10.844724655151367,
"step": 225
},
{
"epoch": 0.6157965194109772,
"grad_norm": 1.3038397810305469e-05,
"learning_rate": 1.1443692625572097e-05,
"logits/chosen": -1.8610095977783203,
"logits/rejected": -1.8084399700164795,
"logps/chosen": -66.45205688476562,
"logps/rejected": -190.3325958251953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.85809326171875,
"rewards/margins": 21.86368179321289,
"rewards/rejected": -11.00558853149414,
"step": 230
},
{
"epoch": 0.6291834002677377,
"grad_norm": 0.0004023597575724125,
"learning_rate": 1.140810847416185e-05,
"logits/chosen": -1.891358733177185,
"logits/rejected": -1.8402206897735596,
"logps/chosen": -67.42313385009766,
"logps/rejected": -189.2025604248047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.033833503723145,
"rewards/margins": 21.153135299682617,
"rewards/rejected": -11.119302749633789,
"step": 235
},
{
"epoch": 0.642570281124498,
"grad_norm": 1.1516180165926926e-05,
"learning_rate": 1.1371722705039222e-05,
"logits/chosen": -1.8551809787750244,
"logits/rejected": -1.8508669137954712,
"logps/chosen": -64.57566833496094,
"logps/rejected": -199.9986114501953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.904739379882812,
"rewards/margins": 22.700956344604492,
"rewards/rejected": -11.796217918395996,
"step": 240
},
{
"epoch": 0.6559571619812584,
"grad_norm": 2.9551241823355667e-05,
"learning_rate": 1.1334542958411638e-05,
"logits/chosen": -1.8598964214324951,
"logits/rejected": -1.7960243225097656,
"logps/chosen": -64.33881378173828,
"logps/rejected": -190.1427001953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.994267463684082,
"rewards/margins": 21.328704833984375,
"rewards/rejected": -11.334436416625977,
"step": 245
},
{
"epoch": 0.6693440428380187,
"grad_norm": 2.7200452677789144e-05,
"learning_rate": 1.129657704120426e-05,
"logits/chosen": -1.8504676818847656,
"logits/rejected": -1.7715709209442139,
"logps/chosen": -69.25659942626953,
"logps/rejected": -188.40867614746094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.336310386657715,
"rewards/margins": 21.38101577758789,
"rewards/rejected": -11.04470157623291,
"step": 250
},
{
"epoch": 0.6827309236947792,
"grad_norm": 1.9446013538981788e-05,
"learning_rate": 1.125783292542069e-05,
"logits/chosen": -1.8880693912506104,
"logits/rejected": -1.8214142322540283,
"logps/chosen": -61.559906005859375,
"logps/rejected": -190.22848510742188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.045111656188965,
"rewards/margins": 21.14029312133789,
"rewards/rejected": -11.095178604125977,
"step": 255
},
{
"epoch": 0.6961178045515395,
"grad_norm": 1.5772628103150055e-05,
"learning_rate": 1.1218318746469043e-05,
"logits/chosen": -1.8853015899658203,
"logits/rejected": -1.7798893451690674,
"logps/chosen": -59.47282791137695,
"logps/rejected": -192.4330291748047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.76961612701416,
"rewards/margins": 21.079383850097656,
"rewards/rejected": -11.309769630432129,
"step": 260
},
{
"epoch": 0.7095046854082999,
"grad_norm": 4.3517022277228534e-05,
"learning_rate": 1.1178042801453673e-05,
"logits/chosen": -1.8884027004241943,
"logits/rejected": -1.7771536111831665,
"logps/chosen": -68.0465087890625,
"logps/rejected": -191.3766632080078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.391663551330566,
"rewards/margins": 21.76491355895996,
"rewards/rejected": -11.373248100280762,
"step": 265
},
{
"epoch": 0.7228915662650602,
"grad_norm": 1.183638596558012e-05,
"learning_rate": 1.1137013547432978e-05,
"logits/chosen": -1.877772331237793,
"logits/rejected": -1.8006465435028076,
"logps/chosen": -60.1486701965332,
"logps/rejected": -190.3517608642578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.311609268188477,
"rewards/margins": 21.450910568237305,
"rewards/rejected": -11.139305114746094,
"step": 270
},
{
"epoch": 0.7362784471218207,
"grad_norm": 0.00011462459951872006,
"learning_rate": 1.1095239599643599e-05,
"logits/chosen": -1.8803141117095947,
"logits/rejected": -1.7724977731704712,
"logps/chosen": -69.33953094482422,
"logps/rejected": -206.20681762695312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.447629928588867,
"rewards/margins": 22.70147132873535,
"rewards/rejected": -12.253841400146484,
"step": 275
},
{
"epoch": 0.749665327978581,
"grad_norm": 3.4330005291849375e-05,
"learning_rate": 1.1052729729691409e-05,
"logits/chosen": -1.9076026678085327,
"logits/rejected": -1.8033192157745361,
"logps/chosen": -64.76860046386719,
"logps/rejected": -193.28390502929688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.796707153320312,
"rewards/margins": 21.169038772583008,
"rewards/rejected": -11.372334480285645,
"step": 280
},
{
"epoch": 0.7630522088353414,
"grad_norm": 0.0004265084571670741,
"learning_rate": 1.1009492863709674e-05,
"logits/chosen": -1.858690857887268,
"logits/rejected": -1.8127180337905884,
"logps/chosen": -63.8545036315918,
"logps/rejected": -192.01919555664062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.419605255126953,
"rewards/margins": 21.708141326904297,
"rewards/rejected": -11.28853702545166,
"step": 285
},
{
"epoch": 0.7764390896921017,
"grad_norm": 2.2610509404330514e-05,
"learning_rate": 1.0965538080484765e-05,
"logits/chosen": -1.8989194631576538,
"logits/rejected": -1.8063570261001587,
"logps/chosen": -59.67851638793945,
"logps/rejected": -196.120361328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.765997886657715,
"rewards/margins": 21.32438087463379,
"rewards/rejected": -11.558382034301758,
"step": 290
},
{
"epoch": 0.7898259705488622,
"grad_norm": 1.3567336281994358e-05,
"learning_rate": 1.0920874609549798e-05,
"logits/chosen": -1.870410680770874,
"logits/rejected": -1.8066644668579102,
"logps/chosen": -69.43475341796875,
"logps/rejected": -203.3020782470703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.31662368774414,
"rewards/margins": 22.408395767211914,
"rewards/rejected": -12.091771125793457,
"step": 295
},
{
"epoch": 0.8032128514056225,
"grad_norm": 6.72144815325737e-05,
"learning_rate": 1.0875511829246656e-05,
"logits/chosen": -1.8767350912094116,
"logits/rejected": -1.769721269607544,
"logps/chosen": -59.4417610168457,
"logps/rejected": -193.3367156982422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.873218536376953,
"rewards/margins": 21.551494598388672,
"rewards/rejected": -11.678277015686035,
"step": 300
},
{
"epoch": 0.8165997322623829,
"grad_norm": 6.611274147871882e-05,
"learning_rate": 1.0829459264756734e-05,
"logits/chosen": -1.871285080909729,
"logits/rejected": -1.7826554775238037,
"logps/chosen": -61.01091766357422,
"logps/rejected": -198.24710083007812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.198274612426758,
"rewards/margins": 21.897686004638672,
"rewards/rejected": -11.699411392211914,
"step": 305
},
{
"epoch": 0.8299866131191432,
"grad_norm": 1.358661029371433e-05,
"learning_rate": 1.0782726586100857e-05,
"logits/chosen": -1.8602094650268555,
"logits/rejected": -1.7963926792144775,
"logps/chosen": -63.376007080078125,
"logps/rejected": -202.0255889892578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.44829273223877,
"rewards/margins": 22.673847198486328,
"rewards/rejected": -12.225557327270508,
"step": 310
},
{
"epoch": 0.8433734939759037,
"grad_norm": 0.00011065916623920202,
"learning_rate": 1.0735323606108803e-05,
"logits/chosen": -1.87014639377594,
"logits/rejected": -1.7621206045150757,
"logps/chosen": -69.85023498535156,
"logps/rejected": -202.82557678222656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.451375007629395,
"rewards/margins": 22.6992130279541,
"rewards/rejected": -12.247835159301758,
"step": 315
},
{
"epoch": 0.856760374832664,
"grad_norm": 1.2892563063360285e-05,
"learning_rate": 1.0687260278358814e-05,
"logits/chosen": -1.8692089319229126,
"logits/rejected": -1.8146623373031616,
"logps/chosen": -62.0356330871582,
"logps/rejected": -198.94203186035156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.914308547973633,
"rewards/margins": 21.57122230529785,
"rewards/rejected": -11.656911849975586,
"step": 320
},
{
"epoch": 0.8701472556894244,
"grad_norm": 6.038762876414694e-06,
"learning_rate": 1.0638546695087565e-05,
"logits/chosen": -1.885371446609497,
"logits/rejected": -1.8294477462768555,
"logps/chosen": -61.702667236328125,
"logps/rejected": -192.01895141601562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.938339233398438,
"rewards/margins": 21.29090690612793,
"rewards/rejected": -11.35256576538086,
"step": 325
},
{
"epoch": 0.8835341365461847,
"grad_norm": 1.671519385126885e-05,
"learning_rate": 1.0589193085071023e-05,
"logits/chosen": -1.8809674978256226,
"logits/rejected": -1.794091820716858,
"logps/chosen": -73.87046813964844,
"logps/rejected": -199.53927612304688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.256256103515625,
"rewards/margins": 22.13440704345703,
"rewards/rejected": -11.878148078918457,
"step": 330
},
{
"epoch": 0.8969210174029452,
"grad_norm": 7.38472408556845e-06,
"learning_rate": 1.0539209811476632e-05,
"logits/chosen": -1.87711501121521,
"logits/rejected": -1.7717126607894897,
"logps/chosen": -61.84357452392578,
"logps/rejected": -199.06480407714844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.918207168579102,
"rewards/margins": 21.842174530029297,
"rewards/rejected": -11.923968315124512,
"step": 335
},
{
"epoch": 0.9103078982597055,
"grad_norm": 3.710574674187228e-06,
"learning_rate": 1.0488607369687263e-05,
"logits/chosen": -1.8811956644058228,
"logits/rejected": -1.7410866022109985,
"logps/chosen": -62.85358810424805,
"logps/rejected": -208.62289428710938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.954957008361816,
"rewards/margins": 22.574024200439453,
"rewards/rejected": -12.619064331054688,
"step": 340
},
{
"epoch": 0.9236947791164659,
"grad_norm": 3.9282083889702335e-05,
"learning_rate": 1.0437396385097436e-05,
"logits/chosen": -1.8749635219573975,
"logits/rejected": -1.8668371438980103,
"logps/chosen": -67.99688720703125,
"logps/rejected": -194.4200439453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.10667610168457,
"rewards/margins": 21.479095458984375,
"rewards/rejected": -11.372419357299805,
"step": 345
},
{
"epoch": 0.9370816599732262,
"grad_norm": 6.739242053299677e-06,
"learning_rate": 1.0385587610882203e-05,
"logits/chosen": -1.8696308135986328,
"logits/rejected": -1.7740843296051025,
"logps/chosen": -61.758819580078125,
"logps/rejected": -188.14012145996094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.408076286315918,
"rewards/margins": 21.486896514892578,
"rewards/rejected": -11.07882022857666,
"step": 350
},
{
"epoch": 0.9504685408299867,
"grad_norm": 2.1943731553619727e-05,
"learning_rate": 1.0333191925739228e-05,
"logits/chosen": -1.8707389831542969,
"logits/rejected": -1.745100975036621,
"logps/chosen": -54.22405242919922,
"logps/rejected": -189.01776123046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.374455451965332,
"rewards/margins": 20.611839294433594,
"rewards/rejected": -11.237382888793945,
"step": 355
},
{
"epoch": 0.963855421686747,
"grad_norm": 1.4479804121947382e-05,
"learning_rate": 1.0280220331604505e-05,
"logits/chosen": -1.886850118637085,
"logits/rejected": -1.7762616872787476,
"logps/chosen": -65.0347900390625,
"logps/rejected": -192.35006713867188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.607036590576172,
"rewards/margins": 21.074512481689453,
"rewards/rejected": -11.467473983764648,
"step": 360
},
{
"epoch": 0.9772423025435074,
"grad_norm": 6.089697853894904e-05,
"learning_rate": 1.0226683951342178e-05,
"logits/chosen": -1.86488938331604,
"logits/rejected": -1.7880140542984009,
"logps/chosen": -70.48530578613281,
"logps/rejected": -203.04815673828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.034753799438477,
"rewards/margins": 22.2109432220459,
"rewards/rejected": -12.176187515258789,
"step": 365
},
{
"epoch": 0.9906291834002677,
"grad_norm": 1.1036102478101384e-05,
"learning_rate": 1.017259402640901e-05,
"logits/chosen": -1.89974045753479,
"logits/rejected": -1.7988322973251343,
"logps/chosen": -62.814735412597656,
"logps/rejected": -194.54603576660156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.982305526733398,
"rewards/margins": 21.521968841552734,
"rewards/rejected": -11.53965950012207,
"step": 370
},
{
"epoch": 0.998661311914324,
"eval_logits/chosen": -1.9349273443222046,
"eval_logits/rejected": -1.7649630308151245,
"eval_logps/chosen": -63.54435729980469,
"eval_logps/rejected": -194.9509735107422,
"eval_loss": 1.3701900059004402e-07,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 10.00214958190918,
"eval_rewards/margins": 21.566242218017578,
"eval_rewards/rejected": -11.564092636108398,
"eval_runtime": 29.1535,
"eval_samples_per_second": 6.86,
"eval_steps_per_second": 6.86,
"step": 373
},
{
"epoch": 1.002677376171352,
"grad_norm": 1.2704935215879232e-05,
"learning_rate": 1.0117961914493904e-05,
"logits/chosen": -1.8601760864257812,
"logits/rejected": -1.7008464336395264,
"logps/chosen": -74.2369155883789,
"logps/rejected": -199.70252990722656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.066021919250488,
"rewards/margins": 22.14150619506836,
"rewards/rejected": -12.075483322143555,
"step": 375
},
{
"epoch": 1.0160642570281124,
"grad_norm": 5.538755431189202e-05,
"learning_rate": 1.0062799087133048e-05,
"logits/chosen": -1.8701107501983643,
"logits/rejected": -1.7948967218399048,
"logps/chosen": -63.428306579589844,
"logps/rejected": -198.6887969970703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.991368293762207,
"rewards/margins": 21.902233123779297,
"rewards/rejected": -11.910863876342773,
"step": 380
},
{
"epoch": 1.0294511378848727,
"grad_norm": 4.9225644033867866e-05,
"learning_rate": 1.0007117127301148e-05,
"logits/chosen": -1.8625848293304443,
"logits/rejected": -1.7941219806671143,
"logps/chosen": -63.298004150390625,
"logps/rejected": -200.94699096679688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.300239562988281,
"rewards/margins": 22.36918067932129,
"rewards/rejected": -12.068942070007324,
"step": 385
},
{
"epoch": 1.0428380187416333,
"grad_norm": 1.8702992747421376e-05,
"learning_rate": 9.950927726979255e-06,
"logits/chosen": -1.8798978328704834,
"logits/rejected": -1.7565393447875977,
"logps/chosen": -56.972923278808594,
"logps/rejected": -190.3721160888672,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.65361213684082,
"rewards/margins": 21.039199829101562,
"rewards/rejected": -11.385587692260742,
"step": 390
},
{
"epoch": 1.0562248995983936,
"grad_norm": 2.7559770387597382e-05,
"learning_rate": 9.89424268469971e-06,
"logits/chosen": -1.8960098028182983,
"logits/rejected": -1.7793407440185547,
"logps/chosen": -59.296142578125,
"logps/rejected": -201.22280883789062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.653037071228027,
"rewards/margins": 21.753782272338867,
"rewards/rejected": -12.10074520111084,
"step": 395
},
{
"epoch": 1.069611780455154,
"grad_norm": 0.0003086234792135656,
"learning_rate": 9.83707390306871e-06,
"logits/chosen": -1.8640127182006836,
"logits/rejected": -1.7932268381118774,
"logps/chosen": -67.73115539550781,
"logps/rejected": -197.77268981933594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.911338806152344,
"rewards/margins": 21.78359031677246,
"rewards/rejected": -11.872251510620117,
"step": 400
},
{
"epoch": 1.0829986613119142,
"grad_norm": 6.989373559918022e-06,
"learning_rate": 9.779433386267028e-06,
"logits/chosen": -1.8561309576034546,
"logits/rejected": -1.7354758977890015,
"logps/chosen": -58.682838439941406,
"logps/rejected": -199.67955017089844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.759561538696289,
"rewards/margins": 21.87822151184082,
"rewards/rejected": -12.118657112121582,
"step": 405
},
{
"epoch": 1.0963855421686748,
"grad_norm": 4.132572212256491e-05,
"learning_rate": 9.721333237529395e-06,
"logits/chosen": -1.8625351190567017,
"logits/rejected": -1.7816200256347656,
"logps/chosen": -60.27020263671875,
"logps/rejected": -198.3787384033203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.702848434448242,
"rewards/margins": 21.661922454833984,
"rewards/rejected": -11.959076881408691,
"step": 410
},
{
"epoch": 1.109772423025435,
"grad_norm": 9.149351171799935e-06,
"learning_rate": 9.662785656603096e-06,
"logits/chosen": -1.8707053661346436,
"logits/rejected": -1.7554585933685303,
"logps/chosen": -63.7075309753418,
"logps/rejected": -206.32568359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.28803825378418,
"rewards/margins": 22.78645896911621,
"rewards/rejected": -12.498420715332031,
"step": 415
},
{
"epoch": 1.1231593038821954,
"grad_norm": 1.0895137165789492e-05,
"learning_rate": 9.603802937186282e-06,
"logits/chosen": -1.8571460247039795,
"logits/rejected": -1.7835206985473633,
"logps/chosen": -71.52728271484375,
"logps/rejected": -204.67822265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.877116203308105,
"rewards/margins": 23.043004989624023,
"rewards/rejected": -12.165888786315918,
"step": 420
},
{
"epoch": 1.1365461847389557,
"grad_norm": 6.231999577721581e-05,
"learning_rate": 9.544397464346573e-06,
"logits/chosen": -1.844512939453125,
"logits/rejected": -1.759734869003296,
"logps/chosen": -70.35665130615234,
"logps/rejected": -192.6426544189453,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.559091567993164,
"rewards/margins": 22.039852142333984,
"rewards/rejected": -11.480762481689453,
"step": 425
},
{
"epoch": 1.1499330655957163,
"grad_norm": 0.00020394229795783758,
"learning_rate": 9.48458171192047e-06,
"logits/chosen": -1.8643211126327515,
"logits/rejected": -1.808932900428772,
"logps/chosen": -63.81361770629883,
"logps/rejected": -203.98910522460938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.149599075317383,
"rewards/margins": 22.48383140563965,
"rewards/rejected": -12.334233283996582,
"step": 430
},
{
"epoch": 1.1633199464524766,
"grad_norm": 0.00010723127343226224,
"learning_rate": 9.424368239894115e-06,
"logits/chosen": -1.8679778575897217,
"logits/rejected": -1.7837800979614258,
"logps/chosen": -63.05647659301758,
"logps/rejected": -189.9877166748047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.14889907836914,
"rewards/margins": 21.50381088256836,
"rewards/rejected": -11.354910850524902,
"step": 435
},
{
"epoch": 1.176706827309237,
"grad_norm": 5.072273052064702e-05,
"learning_rate": 9.363769691765979e-06,
"logits/chosen": -1.892148733139038,
"logits/rejected": -1.7716989517211914,
"logps/chosen": -56.24811935424805,
"logps/rejected": -198.82974243164062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.459526062011719,
"rewards/margins": 21.396852493286133,
"rewards/rejected": -11.93732738494873,
"step": 440
},
{
"epoch": 1.1900937081659972,
"grad_norm": 3.936883786082035e-06,
"learning_rate": 9.302798791892003e-06,
"logits/chosen": -1.886169672012329,
"logits/rejected": -1.799318552017212,
"logps/chosen": -61.040687561035156,
"logps/rejected": -195.46054077148438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.538091659545898,
"rewards/margins": 21.095800399780273,
"rewards/rejected": -11.557706832885742,
"step": 445
},
{
"epoch": 1.2034805890227578,
"grad_norm": 7.63648931751959e-05,
"learning_rate": 9.241468342813765e-06,
"logits/chosen": -1.8738839626312256,
"logits/rejected": -1.7493702173233032,
"logps/chosen": -64.00971984863281,
"logps/rejected": -193.2359161376953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.317130088806152,
"rewards/margins": 21.8746337890625,
"rewards/rejected": -11.557502746582031,
"step": 450
},
{
"epoch": 1.216867469879518,
"grad_norm": 2.4901968572521582e-05,
"learning_rate": 9.179791222570236e-06,
"logits/chosen": -1.870624303817749,
"logits/rejected": -1.7692861557006836,
"logps/chosen": -56.192413330078125,
"logps/rejected": -197.38587951660156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.873640060424805,
"rewards/margins": 21.597814559936523,
"rewards/rejected": -11.724173545837402,
"step": 455
},
{
"epoch": 1.2302543507362784,
"grad_norm": 0.00011131736391689628,
"learning_rate": 9.117780381993665e-06,
"logits/chosen": -1.8614346981048584,
"logits/rejected": -1.7440112829208374,
"logps/chosen": -63.95283889770508,
"logps/rejected": -197.30955505371094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.20496940612793,
"rewards/margins": 21.90732192993164,
"rewards/rejected": -11.702352523803711,
"step": 460
},
{
"epoch": 1.2436412315930387,
"grad_norm": 0.00010593160550342873,
"learning_rate": 9.055448841990199e-06,
"logits/chosen": -1.8694393634796143,
"logits/rejected": -1.799663782119751,
"logps/chosen": -66.95099639892578,
"logps/rejected": -200.08949279785156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.330759048461914,
"rewards/margins": 22.335012435913086,
"rewards/rejected": -12.004252433776855,
"step": 465
},
{
"epoch": 1.2570281124497993,
"grad_norm": 2.5527588149998337e-05,
"learning_rate": 8.992809690805775e-06,
"logits/chosen": -1.8609740734100342,
"logits/rejected": -1.74262273311615,
"logps/chosen": -63.916542053222656,
"logps/rejected": -192.27394104003906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.684073448181152,
"rewards/margins": 21.17348861694336,
"rewards/rejected": -11.489413261413574,
"step": 470
},
{
"epoch": 1.2704149933065596,
"grad_norm": 1.9687415260705166e-05,
"learning_rate": 8.929876081277882e-06,
"logits/chosen": -1.861687421798706,
"logits/rejected": -1.725548505783081,
"logps/chosen": -60.264564514160156,
"logps/rejected": -213.0454864501953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.275764465332031,
"rewards/margins": 23.038787841796875,
"rewards/rejected": -12.763025283813477,
"step": 475
},
{
"epoch": 1.28380187416332,
"grad_norm": 1.3463418326864485e-05,
"learning_rate": 8.866661228073754e-06,
"logits/chosen": -1.844506859779358,
"logits/rejected": -1.6935580968856812,
"logps/chosen": -66.41419982910156,
"logps/rejected": -208.23080444335938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.857114791870117,
"rewards/margins": 23.624019622802734,
"rewards/rejected": -12.766902923583984,
"step": 480
},
{
"epoch": 1.2971887550200802,
"grad_norm": 3.575617665774189e-05,
"learning_rate": 8.803178404915581e-06,
"logits/chosen": -1.8610200881958008,
"logits/rejected": -1.7305552959442139,
"logps/chosen": -62.477806091308594,
"logps/rejected": -208.88522338867188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.284475326538086,
"rewards/margins": 23.20441436767578,
"rewards/rejected": -12.919939994812012,
"step": 485
},
{
"epoch": 1.3105756358768406,
"grad_norm": 1.7733655113261193e-05,
"learning_rate": 8.739440941793324e-06,
"logits/chosen": -1.8665441274642944,
"logits/rejected": -1.7732328176498413,
"logps/chosen": -65.22151184082031,
"logps/rejected": -202.66506958007812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.311973571777344,
"rewards/margins": 22.648090362548828,
"rewards/rejected": -12.336113929748535,
"step": 490
},
{
"epoch": 1.323962516733601,
"grad_norm": 5.565172978094779e-05,
"learning_rate": 8.675462222165706e-06,
"logits/chosen": -1.8920332193374634,
"logits/rejected": -1.8146085739135742,
"logps/chosen": -58.67218780517578,
"logps/rejected": -198.10592651367188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.465251922607422,
"rewards/margins": 21.327327728271484,
"rewards/rejected": -11.862076759338379,
"step": 495
},
{
"epoch": 1.3373493975903614,
"grad_norm": 4.80995959151187e-06,
"learning_rate": 8.611255680149984e-06,
"logits/chosen": -1.8779428005218506,
"logits/rejected": -1.7206926345825195,
"logps/chosen": -56.64423751831055,
"logps/rejected": -197.15884399414062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.939409255981445,
"rewards/margins": 21.892318725585938,
"rewards/rejected": -11.952908515930176,
"step": 500
},
{
"epoch": 1.3373493975903614,
"eval_logits/chosen": -1.931687355041504,
"eval_logits/rejected": -1.7430627346038818,
"eval_logps/chosen": -63.374298095703125,
"eval_logps/rejected": -197.72195434570312,
"eval_loss": 1.0383198656427339e-07,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 10.019155502319336,
"eval_rewards/margins": 21.86034393310547,
"eval_rewards/rejected": -11.841187477111816,
"eval_runtime": 29.641,
"eval_samples_per_second": 6.747,
"eval_steps_per_second": 6.747,
"step": 500
},
{
"epoch": 1.3507362784471217,
"grad_norm": 1.1448615623521619e-05,
"learning_rate": 8.546834797701083e-06,
"logits/chosen": -1.8999868631362915,
"logits/rejected": -1.7782312631607056,
"logps/chosen": -56.27119064331055,
"logps/rejected": -200.72647094726562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.567099571228027,
"rewards/margins": 21.628511428833008,
"rewards/rejected": -12.061409950256348,
"step": 505
},
{
"epoch": 1.3641231593038823,
"grad_norm": 2.9590013582492247e-05,
"learning_rate": 8.482213101780686e-06,
"logits/chosen": -1.8799558877944946,
"logits/rejected": -1.7939083576202393,
"logps/chosen": -69.73988342285156,
"logps/rejected": -209.3248748779297,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.24708366394043,
"rewards/margins": 22.704837799072266,
"rewards/rejected": -12.45775318145752,
"step": 510
},
{
"epoch": 1.3775100401606426,
"grad_norm": 3.4167998819611967e-05,
"learning_rate": 8.41740416151686e-06,
"logits/chosen": -1.8656768798828125,
"logits/rejected": -1.730297327041626,
"logps/chosen": -64.72178649902344,
"logps/rejected": -198.3512725830078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.287378311157227,
"rewards/margins": 22.36208152770996,
"rewards/rejected": -12.074703216552734,
"step": 515
},
{
"epoch": 1.390896921017403,
"grad_norm": 3.03801989502972e-05,
"learning_rate": 8.352421585354853e-06,
"logits/chosen": -1.8596899509429932,
"logits/rejected": -1.6925132274627686,
"logps/chosen": -66.36322021484375,
"logps/rejected": -199.36143493652344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.167764663696289,
"rewards/margins": 22.33287811279297,
"rewards/rejected": -12.16511344909668,
"step": 520
},
{
"epoch": 1.4042838018741632,
"grad_norm": 1.139958476414904e-05,
"learning_rate": 8.287279018199613e-06,
"logits/chosen": -1.8639628887176514,
"logits/rejected": -1.7620197534561157,
"logps/chosen": -64.00345611572266,
"logps/rejected": -206.9742431640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.084488868713379,
"rewards/margins": 22.643537521362305,
"rewards/rejected": -12.559050559997559,
"step": 525
},
{
"epoch": 1.4176706827309236,
"grad_norm": 4.2092684452654794e-05,
"learning_rate": 8.221990138550654e-06,
"logits/chosen": -1.886985182762146,
"logits/rejected": -1.7139594554901123,
"logps/chosen": -61.876548767089844,
"logps/rejected": -207.38473510742188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.575916290283203,
"rewards/margins": 22.21279525756836,
"rewards/rejected": -12.636876106262207,
"step": 530
},
{
"epoch": 1.431057563587684,
"grad_norm": 0.00011076881492044777,
"learning_rate": 8.156568655629891e-06,
"logits/chosen": -1.868139624595642,
"logits/rejected": -1.7361793518066406,
"logps/chosen": -65.99727630615234,
"logps/rejected": -193.82699584960938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.533285140991211,
"rewards/margins": 22.23984718322754,
"rewards/rejected": -11.706562042236328,
"step": 535
},
{
"epoch": 1.4444444444444444,
"grad_norm": 1.3130883417034056e-05,
"learning_rate": 8.091028306502991e-06,
"logits/chosen": -1.8768796920776367,
"logits/rejected": -1.7341238260269165,
"logps/chosen": -56.3282585144043,
"logps/rejected": -198.73318481445312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.183573722839355,
"rewards/margins": 22.112152099609375,
"rewards/rejected": -11.92857551574707,
"step": 540
},
{
"epoch": 1.4578313253012047,
"grad_norm": 1.579059016876272e-06,
"learning_rate": 8.025382853194894e-06,
"logits/chosen": -1.8691343069076538,
"logits/rejected": -1.7641900777816772,
"logps/chosen": -69.58892059326172,
"logps/rejected": -199.70413208007812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.471430778503418,
"rewards/margins": 22.377849578857422,
"rewards/rejected": -11.90641975402832,
"step": 545
},
{
"epoch": 1.4712182061579653,
"grad_norm": 3.329586616018787e-05,
"learning_rate": 7.9596460798001e-06,
"logits/chosen": -1.8352091312408447,
"logits/rejected": -1.7521950006484985,
"logps/chosen": -66.96287536621094,
"logps/rejected": -195.3020782470703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 11.123357772827148,
"rewards/margins": 22.767568588256836,
"rewards/rejected": -11.644209861755371,
"step": 550
},
{
"epoch": 1.4846050870147256,
"grad_norm": 2.0335013687144965e-05,
"learning_rate": 7.893831789588308e-06,
"logits/chosen": -1.887690782546997,
"logits/rejected": -1.756474256515503,
"logps/chosen": -60.508277893066406,
"logps/rejected": -201.3896942138672,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.79334545135498,
"rewards/margins": 22.125324249267578,
"rewards/rejected": -12.331976890563965,
"step": 555
},
{
"epoch": 1.497991967871486,
"grad_norm": 8.999327292258386e-06,
"learning_rate": 7.827953802106033e-06,
"logits/chosen": -1.864855408668518,
"logits/rejected": -1.7428302764892578,
"logps/chosen": -69.32931518554688,
"logps/rejected": -202.78677368164062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.690114974975586,
"rewards/margins": 23.005146026611328,
"rewards/rejected": -12.315031051635742,
"step": 560
},
{
"epoch": 1.5113788487282465,
"grad_norm": 2.8161759473732673e-05,
"learning_rate": 7.762025950274813e-06,
"logits/chosen": -1.8802257776260376,
"logits/rejected": -1.7101455926895142,
"logps/chosen": -61.14844512939453,
"logps/rejected": -190.69073486328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.25882625579834,
"rewards/margins": 21.803659439086914,
"rewards/rejected": -11.544832229614258,
"step": 565
},
{
"epoch": 1.5247657295850066,
"grad_norm": 3.1670299449615413e-06,
"learning_rate": 7.696062077486596e-06,
"logits/chosen": -1.8524658679962158,
"logits/rejected": -1.7226076126098633,
"logps/chosen": -71.62201690673828,
"logps/rejected": -203.36416625976562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.312665939331055,
"rewards/margins": 22.732587814331055,
"rewards/rejected": -12.419922828674316,
"step": 570
},
{
"epoch": 1.538152610441767,
"grad_norm": 3.04284712910885e-05,
"learning_rate": 7.630076034696934e-06,
"logits/chosen": -1.8999922275543213,
"logits/rejected": -1.7562729120254517,
"logps/chosen": -55.377227783203125,
"logps/rejected": -203.68716430664062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.358214378356934,
"rewards/margins": 21.71164894104004,
"rewards/rejected": -12.353431701660156,
"step": 575
},
{
"epoch": 1.5515394912985274,
"grad_norm": 2.8392036256263964e-05,
"learning_rate": 7.564081677516588e-06,
"logits/chosen": -1.8879365921020508,
"logits/rejected": -1.7530921697616577,
"logps/chosen": -60.916473388671875,
"logps/rejected": -210.0749969482422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.68341064453125,
"rewards/margins": 22.60190773010254,
"rewards/rejected": -12.918497085571289,
"step": 580
},
{
"epoch": 1.5649263721552877,
"grad_norm": 6.038487845216878e-05,
"learning_rate": 7.4980928633021615e-06,
"logits/chosen": -1.8659874200820923,
"logits/rejected": -1.734829306602478,
"logps/chosen": -58.3009033203125,
"logps/rejected": -198.56259155273438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.786453247070312,
"rewards/margins": 21.83785629272461,
"rewards/rejected": -12.05140209197998,
"step": 585
},
{
"epoch": 1.5783132530120483,
"grad_norm": 0.0002005839196499437,
"learning_rate": 7.432123448246354e-06,
"logits/chosen": -1.8782905340194702,
"logits/rejected": -1.714125633239746,
"logps/chosen": -62.971839904785156,
"logps/rejected": -199.8243408203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.586263656616211,
"rewards/margins": 21.808002471923828,
"rewards/rejected": -12.221738815307617,
"step": 590
},
{
"epoch": 1.5917001338688086,
"grad_norm": 2.639082367750234e-06,
"learning_rate": 7.366187284468474e-06,
"logits/chosen": -1.8676035404205322,
"logits/rejected": -1.7441984415054321,
"logps/chosen": -63.17724609375,
"logps/rejected": -207.64389038085938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.685806274414062,
"rewards/margins": 23.19856834411621,
"rewards/rejected": -12.512762069702148,
"step": 595
},
{
"epoch": 1.605087014725569,
"grad_norm": 1.1028166227333713e-05,
"learning_rate": 7.300298217105793e-06,
"logits/chosen": -1.873063087463379,
"logits/rejected": -1.742378830909729,
"logps/chosen": -61.4983024597168,
"logps/rejected": -200.46188354492188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.306286811828613,
"rewards/margins": 22.533138275146484,
"rewards/rejected": -12.226852416992188,
"step": 600
},
{
"epoch": 1.6184738955823295,
"grad_norm": 5.2287599828559905e-05,
"learning_rate": 7.234470081406376e-06,
"logits/chosen": -1.8500477075576782,
"logits/rejected": -1.7297271490097046,
"logps/chosen": -71.09878540039062,
"logps/rejected": -198.9838409423828,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.490249633789062,
"rewards/margins": 22.542438507080078,
"rewards/rejected": -12.052189826965332,
"step": 605
},
{
"epoch": 1.6318607764390896,
"grad_norm": 3.5077468055533245e-05,
"learning_rate": 7.168716699823987e-06,
"logits/chosen": -1.8880395889282227,
"logits/rejected": -1.7822411060333252,
"logps/chosen": -60.97273635864258,
"logps/rejected": -207.91616821289062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.011301040649414,
"rewards/margins": 22.688114166259766,
"rewards/rejected": -12.676815032958984,
"step": 610
},
{
"epoch": 1.64524765729585,
"grad_norm": 7.598105185024906e-06,
"learning_rate": 7.103051879115679e-06,
"logits/chosen": -1.8631807565689087,
"logits/rejected": -1.698168158531189,
"logps/chosen": -62.397377014160156,
"logps/rejected": -208.0476531982422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.969112396240234,
"rewards/margins": 22.851131439208984,
"rewards/rejected": -12.88201904296875,
"step": 615
},
{
"epoch": 1.6586345381526104,
"grad_norm": 4.015982267446816e-05,
"learning_rate": 7.037489407442674e-06,
"logits/chosen": -1.85487961769104,
"logits/rejected": -1.75741708278656,
"logps/chosen": -66.20887756347656,
"logps/rejected": -200.14871215820312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.314077377319336,
"rewards/margins": 22.312610626220703,
"rewards/rejected": -11.998533248901367,
"step": 620
},
{
"epoch": 1.6720214190093707,
"grad_norm": 1.63569475262193e-05,
"learning_rate": 6.9720430514751625e-06,
"logits/chosen": -1.8645589351654053,
"logits/rejected": -1.7157907485961914,
"logps/chosen": -59.857513427734375,
"logps/rejected": -197.91986083984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.427450180053711,
"rewards/margins": 21.496850967407227,
"rewards/rejected": -12.069400787353516,
"step": 625
},
{
"epoch": 1.6854082998661313,
"grad_norm": 1.3075319657218643e-05,
"learning_rate": 6.9067265535016e-06,
"logits/chosen": -1.8522984981536865,
"logits/rejected": -1.8144248723983765,
"logps/chosen": -63.267921447753906,
"logps/rejected": -200.5415802001953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.205777168273926,
"rewards/margins": 22.44827651977539,
"rewards/rejected": -12.242500305175781,
"step": 630
},
{
"epoch": 1.6987951807228916,
"grad_norm": 8.631217497168109e-06,
"learning_rate": 6.841553628543135e-06,
"logits/chosen": -1.8666969537734985,
"logits/rejected": -1.7287845611572266,
"logps/chosen": -63.22052764892578,
"logps/rejected": -199.9890594482422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.489965438842773,
"rewards/margins": 22.612924575805664,
"rewards/rejected": -12.122960090637207,
"step": 635
},
{
"epoch": 1.712182061579652,
"grad_norm": 1.6783178580226377e-05,
"learning_rate": 6.776537961473755e-06,
"logits/chosen": -1.8770383596420288,
"logits/rejected": -1.7175519466400146,
"logps/chosen": -59.59788131713867,
"logps/rejected": -203.4702911376953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.750931739807129,
"rewards/margins": 22.120555877685547,
"rewards/rejected": -12.36962604522705,
"step": 640
},
{
"epoch": 1.7255689424364125,
"grad_norm": 6.6644392973103095e-06,
"learning_rate": 6.711693204146765e-06,
"logits/chosen": -1.8620790243148804,
"logits/rejected": -1.7563501596450806,
"logps/chosen": -72.01322937011719,
"logps/rejected": -202.68197631835938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.699959754943848,
"rewards/margins": 22.8387451171875,
"rewards/rejected": -12.138784408569336,
"step": 645
},
{
"epoch": 1.7389558232931726,
"grad_norm": 8.885542047210038e-05,
"learning_rate": 6.6470329725282045e-06,
"logits/chosen": -1.8752985000610352,
"logits/rejected": -1.7436233758926392,
"logps/chosen": -57.58866500854492,
"logps/rejected": -203.06430053710938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.94059944152832,
"rewards/margins": 22.154766082763672,
"rewards/rejected": -12.214167594909668,
"step": 650
},
{
"epoch": 1.752342704149933,
"grad_norm": 2.4713101083762012e-05,
"learning_rate": 6.5825708438377856e-06,
"logits/chosen": -1.8654638528823853,
"logits/rejected": -1.740407943725586,
"logps/chosen": -63.27727127075195,
"logps/rejected": -203.1544189453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.673643112182617,
"rewards/margins": 22.118864059448242,
"rewards/rejected": -12.445220947265625,
"step": 655
},
{
"epoch": 1.7657295850066934,
"grad_norm": 6.906106136739254e-05,
"learning_rate": 6.518320353697992e-06,
"logits/chosen": -1.8629789352416992,
"logits/rejected": -1.7562310695648193,
"logps/chosen": -61.98480987548828,
"logps/rejected": -205.32180786132812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.444381713867188,
"rewards/margins": 22.911869049072266,
"rewards/rejected": -12.467488288879395,
"step": 660
},
{
"epoch": 1.7791164658634537,
"grad_norm": 1.5507324860664085e-05,
"learning_rate": 6.454294993291879e-06,
"logits/chosen": -1.8623278141021729,
"logits/rejected": -1.68741774559021,
"logps/chosen": -66.75733184814453,
"logps/rejected": -205.45748901367188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.855257034301758,
"rewards/margins": 22.61016082763672,
"rewards/rejected": -12.754903793334961,
"step": 665
},
{
"epoch": 1.7925033467202143,
"grad_norm": 6.3960551415220834e-06,
"learning_rate": 6.390508206530243e-06,
"logits/chosen": -1.87300705909729,
"logits/rejected": -1.731795310974121,
"logps/chosen": -64.09671020507812,
"logps/rejected": -207.6633758544922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.862910270690918,
"rewards/margins": 22.58968162536621,
"rewards/rejected": -12.726774215698242,
"step": 670
},
{
"epoch": 1.8058902275769746,
"grad_norm": 5.351726599656104e-07,
"learning_rate": 6.326973387228678e-06,
"logits/chosen": -1.859619140625,
"logits/rejected": -1.7685467004776,
"logps/chosen": -69.70652770996094,
"logps/rejected": -205.14407348632812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.268574714660645,
"rewards/margins": 22.812740325927734,
"rewards/rejected": -12.544163703918457,
"step": 675
},
{
"epoch": 1.819277108433735,
"grad_norm": 2.628388028824702e-05,
"learning_rate": 6.263703876295187e-06,
"logits/chosen": -1.87285578250885,
"logits/rejected": -1.7142107486724854,
"logps/chosen": -65.52404022216797,
"logps/rejected": -213.24234008789062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.968465805053711,
"rewards/margins": 23.130929946899414,
"rewards/rejected": -13.162463188171387,
"step": 680
},
{
"epoch": 1.8326639892904955,
"grad_norm": 1.8713224562816322e-05,
"learning_rate": 6.200712958928871e-06,
"logits/chosen": -1.8909542560577393,
"logits/rejected": -1.7693777084350586,
"logps/chosen": -55.27473831176758,
"logps/rejected": -202.45265197753906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.046746253967285,
"rewards/margins": 22.324237823486328,
"rewards/rejected": -12.277493476867676,
"step": 685
},
{
"epoch": 1.8460508701472556,
"grad_norm": 7.098522928572493e-06,
"learning_rate": 6.138013861830348e-06,
"logits/chosen": -1.8942615985870361,
"logits/rejected": -1.731011986732483,
"logps/chosen": -53.98344802856445,
"logps/rejected": -205.3796844482422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.68769645690918,
"rewards/margins": 22.195293426513672,
"rewards/rejected": -12.507596969604492,
"step": 690
},
{
"epoch": 1.859437751004016,
"grad_norm": 3.2382187782786787e-05,
"learning_rate": 6.075619750424422e-06,
"logits/chosen": -1.873392105102539,
"logits/rejected": -1.6868184804916382,
"logps/chosen": -59.67496871948242,
"logps/rejected": -209.4269256591797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.969255447387695,
"rewards/margins": 22.771167755126953,
"rewards/rejected": -12.801912307739258,
"step": 695
},
{
"epoch": 1.8728246318607764,
"grad_norm": 6.702355221932521e-06,
"learning_rate": 6.013543726095646e-06,
"logits/chosen": -1.8677663803100586,
"logits/rejected": -1.71700119972229,
"logps/chosen": -70.77827453613281,
"logps/rejected": -204.8978729248047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.274455070495605,
"rewards/margins": 22.849491119384766,
"rewards/rejected": -12.575034141540527,
"step": 700
},
{
"epoch": 1.8862115127175367,
"grad_norm": 5.6430312724842224e-06,
"learning_rate": 5.9517988234373095e-06,
"logits/chosen": -1.8611056804656982,
"logits/rejected": -1.7488939762115479,
"logps/chosen": -70.99211883544922,
"logps/rejected": -209.8909912109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.573973655700684,
"rewards/margins": 23.441730499267578,
"rewards/rejected": -12.867757797241211,
"step": 705
},
{
"epoch": 1.8995983935742973,
"grad_norm": 9.81956509349402e-06,
"learning_rate": 5.890398007514474e-06,
"logits/chosen": -1.8544105291366577,
"logits/rejected": -1.7340114116668701,
"logps/chosen": -56.71294403076172,
"logps/rejected": -206.75668334960938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.037813186645508,
"rewards/margins": 22.693235397338867,
"rewards/rejected": -12.655420303344727,
"step": 710
},
{
"epoch": 1.9129852744310576,
"grad_norm": 8.173860805982258e-06,
"learning_rate": 5.8293541711415895e-06,
"logits/chosen": -1.8714882135391235,
"logits/rejected": -1.7200887203216553,
"logps/chosen": -72.78733825683594,
"logps/rejected": -192.37254333496094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.271878242492676,
"rewards/margins": 21.913005828857422,
"rewards/rejected": -11.641127586364746,
"step": 715
},
{
"epoch": 1.926372155287818,
"grad_norm": 0.00012220365169923753,
"learning_rate": 5.768680132175289e-06,
"logits/chosen": -1.8853578567504883,
"logits/rejected": -1.6917082071304321,
"logps/chosen": -60.888282775878906,
"logps/rejected": -199.157470703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.8045072555542,
"rewards/margins": 22.041522979736328,
"rewards/rejected": -12.237015724182129,
"step": 720
},
{
"epoch": 1.9397590361445785,
"grad_norm": 0.0001451301359338686,
"learning_rate": 5.708388630822922e-06,
"logits/chosen": -1.8622829914093018,
"logits/rejected": -1.7366501092910767,
"logps/chosen": -68.04113006591797,
"logps/rejected": -201.74124145507812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.160343170166016,
"rewards/margins": 22.52884864807129,
"rewards/rejected": -12.36850357055664,
"step": 725
},
{
"epoch": 1.9531459170013385,
"grad_norm": 6.776998816349078e-06,
"learning_rate": 5.648492326967392e-06,
"logits/chosen": -1.855958342552185,
"logits/rejected": -1.7051986455917358,
"logps/chosen": -65.05091857910156,
"logps/rejected": -209.4147491455078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.261792182922363,
"rewards/margins": 23.097671508789062,
"rewards/rejected": -12.835878372192383,
"step": 730
},
{
"epoch": 1.966532797858099,
"grad_norm": 8.551254722988233e-05,
"learning_rate": 5.589003797508865e-06,
"logits/chosen": -1.8737328052520752,
"logits/rejected": -1.7128698825836182,
"logps/chosen": -61.9605598449707,
"logps/rejected": -210.7420196533203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.895185470581055,
"rewards/margins": 23.087865829467773,
"rewards/rejected": -13.192680358886719,
"step": 735
},
{
"epoch": 1.9799196787148594,
"grad_norm": 1.1372939297871199e-05,
"learning_rate": 5.52993553372389e-06,
"logits/chosen": -1.8983303308486938,
"logits/rejected": -1.7375752925872803,
"logps/chosen": -64.04942321777344,
"logps/rejected": -205.53140258789062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.743837356567383,
"rewards/margins": 22.313587188720703,
"rewards/rejected": -12.569747924804688,
"step": 740
},
{
"epoch": 1.9933065595716197,
"grad_norm": 2.0934508938807994e-05,
"learning_rate": 5.471299938642517e-06,
"logits/chosen": -1.8644678592681885,
"logits/rejected": -1.6225488185882568,
"logps/chosen": -75.8808822631836,
"logps/rejected": -205.18276977539062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.244497299194336,
"rewards/margins": 22.91061019897461,
"rewards/rejected": -12.66611385345459,
"step": 745
},
{
"epoch": 1.9959839357429718,
"eval_logits/chosen": -1.9272390604019165,
"eval_logits/rejected": -1.7022486925125122,
"eval_logps/chosen": -63.15131378173828,
"eval_logps/rejected": -202.59934997558594,
"eval_loss": 6.345212000269385e-08,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 10.041454315185547,
"eval_rewards/margins": 22.370386123657227,
"eval_rewards/rejected": -12.328930854797363,
"eval_runtime": 33.4211,
"eval_samples_per_second": 5.984,
"eval_steps_per_second": 5.984,
"step": 746
},
{
"epoch": 2.005354752342704,
"grad_norm": 1.0485188795428257e-05,
"learning_rate": 5.413109324443927e-06,
"logits/chosen": -1.85820472240448,
"logits/rejected": -1.7645277976989746,
"logps/chosen": -62.93926239013672,
"logps/rejected": -206.52940368652344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.950172424316406,
"rewards/margins": 22.60118293762207,
"rewards/rejected": -12.651012420654297,
"step": 750
},
{
"epoch": 2.0187416331994643,
"grad_norm": 8.381151928915642e-06,
"learning_rate": 5.355375909871147e-06,
"logits/chosen": -1.865766167640686,
"logits/rejected": -1.7003635168075562,
"logps/chosen": -65.76628112792969,
"logps/rejected": -203.88465881347656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.34652328491211,
"rewards/margins": 22.93129539489746,
"rewards/rejected": -12.5847749710083,
"step": 755
},
{
"epoch": 2.032128514056225,
"grad_norm": 5.861081262992229e-06,
"learning_rate": 5.298111817665392e-06,
"logits/chosen": -1.8790661096572876,
"logits/rejected": -1.6590349674224854,
"logps/chosen": -61.801902770996094,
"logps/rejected": -207.1491241455078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.80274486541748,
"rewards/margins": 22.702667236328125,
"rewards/rejected": -12.899922370910645,
"step": 760
},
{
"epoch": 2.0455153949129854,
"grad_norm": 1.3709258382732514e-05,
"learning_rate": 5.2413290720205445e-06,
"logits/chosen": -1.8769207000732422,
"logits/rejected": -1.7207590341567993,
"logps/chosen": -65.25004577636719,
"logps/rejected": -205.05783081054688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.010208129882812,
"rewards/margins": 22.46693992614746,
"rewards/rejected": -12.456731796264648,
"step": 765
},
{
"epoch": 2.0589022757697455,
"grad_norm": 6.286778443609364e-06,
"learning_rate": 5.185039596058357e-06,
"logits/chosen": -1.8302192687988281,
"logits/rejected": -1.6823524236679077,
"logps/chosen": -57.266639709472656,
"logps/rejected": -213.8351287841797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.011209487915039,
"rewards/margins": 23.254796981811523,
"rewards/rejected": -13.243589401245117,
"step": 770
},
{
"epoch": 2.072289156626506,
"grad_norm": 1.2825248631997965e-05,
"learning_rate": 5.129255209324836e-06,
"logits/chosen": -1.8800920248031616,
"logits/rejected": -1.6835558414459229,
"logps/chosen": -66.84913635253906,
"logps/rejected": -201.17745971679688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.850937843322754,
"rewards/margins": 22.242956161499023,
"rewards/rejected": -12.39201831817627,
"step": 775
},
{
"epoch": 2.0856760374832666,
"grad_norm": 2.984725870192051e-05,
"learning_rate": 5.073987625308423e-06,
"logits/chosen": -1.8780286312103271,
"logits/rejected": -1.7410333156585693,
"logps/chosen": -67.51756286621094,
"logps/rejected": -206.67996215820312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.865274429321289,
"rewards/margins": 22.475534439086914,
"rewards/rejected": -12.610260963439941,
"step": 780
},
{
"epoch": 2.0990629183400267,
"grad_norm": 0.00011517904931679368,
"learning_rate": 5.019248448980402e-06,
"logits/chosen": -1.8741604089736938,
"logits/rejected": -1.7101694345474243,
"logps/chosen": -60.773651123046875,
"logps/rejected": -216.3609619140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.092966079711914,
"rewards/margins": 23.445796966552734,
"rewards/rejected": -13.35283088684082,
"step": 785
},
{
"epoch": 2.112449799196787,
"grad_norm": 0.00015422521391883492,
"learning_rate": 4.965049174358126e-06,
"logits/chosen": -1.8605587482452393,
"logits/rejected": -1.728424072265625,
"logps/chosen": -57.07219314575195,
"logps/rejected": -209.2133026123047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.606898307800293,
"rewards/margins": 23.471294403076172,
"rewards/rejected": -12.864397048950195,
"step": 790
},
{
"epoch": 2.1258366800535473,
"grad_norm": 8.874901141098235e-06,
"learning_rate": 4.911401182091517e-06,
"logits/chosen": -1.871817946434021,
"logits/rejected": -1.662431001663208,
"logps/chosen": -58.060302734375,
"logps/rejected": -210.3882293701172,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.023431777954102,
"rewards/margins": 22.93259620666504,
"rewards/rejected": -12.90916633605957,
"step": 795
},
{
"epoch": 2.139223560910308,
"grad_norm": 6.589568329218309e-06,
"learning_rate": 4.858315737073384e-06,
"logits/chosen": -1.8591467142105103,
"logits/rejected": -1.7722461223602295,
"logps/chosen": -57.941993713378906,
"logps/rejected": -201.37484741210938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.01035213470459,
"rewards/margins": 22.34813117980957,
"rewards/rejected": -12.337777137756348,
"step": 800
},
{
"epoch": 2.1526104417670684,
"grad_norm": 5.866213086846983e-06,
"learning_rate": 4.8058039860740515e-06,
"logits/chosen": -1.8637701272964478,
"logits/rejected": -1.7024104595184326,
"logps/chosen": -62.71826171875,
"logps/rejected": -204.09197998046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.38242244720459,
"rewards/margins": 22.9773006439209,
"rewards/rejected": -12.594879150390625,
"step": 805
},
{
"epoch": 2.1659973226238285,
"grad_norm": 5.492693617270561e-06,
"learning_rate": 4.753876955400771e-06,
"logits/chosen": -1.8641217947006226,
"logits/rejected": -1.7333282232284546,
"logps/chosen": -61.98204803466797,
"logps/rejected": -210.81973266601562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.179308891296387,
"rewards/margins": 23.154348373413086,
"rewards/rejected": -12.9750394821167,
"step": 810
},
{
"epoch": 2.179384203480589,
"grad_norm": 3.0393581255339086e-05,
"learning_rate": 4.702545548582452e-06,
"logits/chosen": -1.871565818786621,
"logits/rejected": -1.7063429355621338,
"logps/chosen": -63.68731689453125,
"logps/rejected": -205.74569702148438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.423490524291992,
"rewards/margins": 23.04408073425293,
"rewards/rejected": -12.620591163635254,
"step": 815
},
{
"epoch": 2.1927710843373496,
"grad_norm": 0.00012603566574398428,
"learning_rate": 4.651820544080155e-06,
"logits/chosen": -1.8732541799545288,
"logits/rejected": -1.743831992149353,
"logps/chosen": -59.885963439941406,
"logps/rejected": -203.7340850830078,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.865376472473145,
"rewards/margins": 22.34671974182129,
"rewards/rejected": -12.481344223022461,
"step": 820
},
{
"epoch": 2.2061579651941097,
"grad_norm": 0.00017827175906859338,
"learning_rate": 4.601712593023857e-06,
"logits/chosen": -1.8484163284301758,
"logits/rejected": -1.6758276224136353,
"logps/chosen": -66.60858154296875,
"logps/rejected": -206.3388671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.185609817504883,
"rewards/margins": 22.969905853271484,
"rewards/rejected": -12.784296035766602,
"step": 825
},
{
"epoch": 2.21954484605087,
"grad_norm": 7.549571819254197e-06,
"learning_rate": 4.552232216975945e-06,
"logits/chosen": -1.8551725149154663,
"logits/rejected": -1.7120177745819092,
"logps/chosen": -57.83213424682617,
"logps/rejected": -209.56076049804688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.335837364196777,
"rewards/margins": 23.33651351928711,
"rewards/rejected": -13.000676155090332,
"step": 830
},
{
"epoch": 2.2329317269076308,
"grad_norm": 3.6077890399610624e-05,
"learning_rate": 4.503389805721925e-06,
"logits/chosen": -1.8688617944717407,
"logits/rejected": -1.7254632711410522,
"logps/chosen": -68.70154571533203,
"logps/rejected": -204.951904296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.894525527954102,
"rewards/margins": 22.42927360534668,
"rewards/rejected": -12.534747123718262,
"step": 835
},
{
"epoch": 2.246318607764391,
"grad_norm": 0.00010842949996003881,
"learning_rate": 4.455195615088791e-06,
"logits/chosen": -1.8668763637542725,
"logits/rejected": -1.7311407327651978,
"logps/chosen": -56.98907470703125,
"logps/rejected": -207.2830047607422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.711297988891602,
"rewards/margins": 22.293800354003906,
"rewards/rejected": -12.582502365112305,
"step": 840
},
{
"epoch": 2.2597054886211514,
"grad_norm": 1.2728739420708735e-05,
"learning_rate": 4.407659764791537e-06,
"logits/chosen": -1.8525810241699219,
"logits/rejected": -1.724234938621521,
"logps/chosen": -64.33460998535156,
"logps/rejected": -203.0344696044922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.49009895324707,
"rewards/margins": 22.857555389404297,
"rewards/rejected": -12.36745548248291,
"step": 845
},
{
"epoch": 2.2730923694779115,
"grad_norm": 6.543518975377083e-05,
"learning_rate": 4.3607922363082345e-06,
"logits/chosen": -1.864620566368103,
"logits/rejected": -1.7289314270019531,
"logps/chosen": -57.764991760253906,
"logps/rejected": -202.0431365966797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.821728706359863,
"rewards/margins": 22.27083396911621,
"rewards/rejected": -12.449103355407715,
"step": 850
},
{
"epoch": 2.286479250334672,
"grad_norm": 1.0032449608843308e-05,
"learning_rate": 4.314602870784138e-06,
"logits/chosen": -1.8435178995132446,
"logits/rejected": -1.6707134246826172,
"logps/chosen": -63.69310760498047,
"logps/rejected": -214.45443725585938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.206135749816895,
"rewards/margins": 23.512680053710938,
"rewards/rejected": -13.306546211242676,
"step": 855
},
{
"epoch": 2.2998661311914326,
"grad_norm": 2.2827032353234245e-06,
"learning_rate": 4.2691013669652716e-06,
"logits/chosen": -1.8863375186920166,
"logits/rejected": -1.7014707326889038,
"logps/chosen": -65.29761505126953,
"logps/rejected": -213.1626739501953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.766716957092285,
"rewards/margins": 22.87672996520996,
"rewards/rejected": -13.110013008117676,
"step": 860
},
{
"epoch": 2.3132530120481927,
"grad_norm": 8.756860552239232e-06,
"learning_rate": 4.224297279161901e-06,
"logits/chosen": -1.8587490320205688,
"logits/rejected": -1.69536554813385,
"logps/chosen": -66.74024963378906,
"logps/rejected": -202.9591827392578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.559983253479004,
"rewards/margins": 23.037792205810547,
"rewards/rejected": -12.47780704498291,
"step": 865
},
{
"epoch": 2.326639892904953,
"grad_norm": 1.0455483788973652e-05,
"learning_rate": 4.180200015242344e-06,
"logits/chosen": -1.884346604347229,
"logits/rejected": -1.686924695968628,
"logps/chosen": -55.4239501953125,
"logps/rejected": -197.32254028320312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.581134796142578,
"rewards/margins": 21.723163604736328,
"rewards/rejected": -12.142029762268066,
"step": 870
},
{
"epoch": 2.3400267737617133,
"grad_norm": 1.2215328752063215e-05,
"learning_rate": 4.1368188346575155e-06,
"logits/chosen": -1.8889604806900024,
"logits/rejected": -1.6923484802246094,
"logps/chosen": -62.9194450378418,
"logps/rejected": -200.98641967773438,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.61778450012207,
"rewards/margins": 22.01634979248047,
"rewards/rejected": -12.398564338684082,
"step": 875
},
{
"epoch": 2.353413654618474,
"grad_norm": 1.9663550119730644e-05,
"learning_rate": 4.0941628464966635e-06,
"logits/chosen": -1.8721472024917603,
"logits/rejected": -1.7548141479492188,
"logps/chosen": -60.20267868041992,
"logps/rejected": -194.3836669921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.855585098266602,
"rewards/margins": 21.740840911865234,
"rewards/rejected": -11.88525676727295,
"step": 880
},
{
"epoch": 2.3668005354752344,
"grad_norm": 5.436282663140446e-06,
"learning_rate": 4.052241007574645e-06,
"logits/chosen": -1.8602116107940674,
"logits/rejected": -1.6991478204727173,
"logps/chosen": -65.68549346923828,
"logps/rejected": -204.56455993652344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.281954765319824,
"rewards/margins": 22.885639190673828,
"rewards/rejected": -12.603684425354004,
"step": 885
},
{
"epoch": 2.3801874163319945,
"grad_norm": 5.809453796246089e-05,
"learning_rate": 4.011062120551208e-06,
"logits/chosen": -1.8664907217025757,
"logits/rejected": -1.6747157573699951,
"logps/chosen": -58.36309814453125,
"logps/rejected": -204.2161102294922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.09082317352295,
"rewards/margins": 22.670461654663086,
"rewards/rejected": -12.57963752746582,
"step": 890
},
{
"epoch": 2.393574297188755,
"grad_norm": 1.9118770069326274e-05,
"learning_rate": 3.9706348320826135e-06,
"logits/chosen": -1.8767648935317993,
"logits/rejected": -1.6911203861236572,
"logps/chosen": -64.69419860839844,
"logps/rejected": -204.36233520507812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.22978401184082,
"rewards/margins": 22.811058044433594,
"rewards/rejected": -12.581275939941406,
"step": 895
},
{
"epoch": 2.4069611780455156,
"grad_norm": 1.5444951714016497e-05,
"learning_rate": 3.930967631006043e-06,
"logits/chosen": -1.85677170753479,
"logits/rejected": -1.6846284866333008,
"logps/chosen": -56.4135627746582,
"logps/rejected": -208.22659301757812,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.155652046203613,
"rewards/margins": 23.030597686767578,
"rewards/rejected": -12.874944686889648,
"step": 900
},
{
"epoch": 2.4203480589022757,
"grad_norm": 1.261055422219215e-05,
"learning_rate": 3.892068846557114e-06,
"logits/chosen": -1.8779144287109375,
"logits/rejected": -1.7127296924591064,
"logps/chosen": -59.67717742919922,
"logps/rejected": -204.9344482421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.156450271606445,
"rewards/margins": 22.72434425354004,
"rewards/rejected": -12.567895889282227,
"step": 905
},
{
"epoch": 2.433734939759036,
"grad_norm": 2.424490412522573e-05,
"learning_rate": 3.8539466466209426e-06,
"logits/chosen": -1.8967710733413696,
"logits/rejected": -1.7396190166473389,
"logps/chosen": -59.95705032348633,
"logps/rejected": -201.16258239746094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.140314102172852,
"rewards/margins": 22.46833038330078,
"rewards/rejected": -12.32801342010498,
"step": 910
},
{
"epoch": 2.4471218206157968,
"grad_norm": 1.6727104821256944e-06,
"learning_rate": 3.816609036017052e-06,
"logits/chosen": -1.861707329750061,
"logits/rejected": -1.6875286102294922,
"logps/chosen": -67.87788391113281,
"logps/rejected": -216.080078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.8674955368042,
"rewards/margins": 23.15651512145996,
"rewards/rejected": -13.289019584655762,
"step": 915
},
{
"epoch": 2.460508701472557,
"grad_norm": 0.0006341092521324754,
"learning_rate": 3.780063854818545e-06,
"logits/chosen": -1.8393361568450928,
"logits/rejected": -1.7207624912261963,
"logps/chosen": -67.45433044433594,
"logps/rejected": -207.20132446289062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.836288452148438,
"rewards/margins": 23.471036911010742,
"rewards/rejected": -12.634748458862305,
"step": 920
},
{
"epoch": 2.4738955823293174,
"grad_norm": 2.6886251362157054e-05,
"learning_rate": 3.744318776705866e-06,
"logits/chosen": -1.910638451576233,
"logits/rejected": -1.7034502029418945,
"logps/chosen": -58.13981246948242,
"logps/rejected": -209.4466552734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.597261428833008,
"rewards/margins": 22.449031829833984,
"rewards/rejected": -12.851768493652344,
"step": 925
},
{
"epoch": 2.4872824631860775,
"grad_norm": 1.630528458917979e-05,
"learning_rate": 3.709381307355487e-06,
"logits/chosen": -1.8732010126113892,
"logits/rejected": -1.6983455419540405,
"logps/chosen": -70.4014663696289,
"logps/rejected": -209.67861938476562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.463674545288086,
"rewards/margins": 23.396472930908203,
"rewards/rejected": -12.9327974319458,
"step": 930
},
{
"epoch": 2.500669344042838,
"grad_norm": 8.642481589049567e-06,
"learning_rate": 3.675258782863893e-06,
"logits/chosen": -1.8884601593017578,
"logits/rejected": -1.6976335048675537,
"logps/chosen": -62.5555419921875,
"logps/rejected": -209.5203094482422,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.015859603881836,
"rewards/margins": 22.983142852783203,
"rewards/rejected": -12.96728515625,
"step": 935
},
{
"epoch": 2.5140562248995986,
"grad_norm": 3.012664819834754e-05,
"learning_rate": 3.641958368207152e-06,
"logits/chosen": -1.8587169647216797,
"logits/rejected": -1.7203285694122314,
"logps/chosen": -66.09326171875,
"logps/rejected": -216.6996307373047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.267293930053711,
"rewards/margins": 23.60677146911621,
"rewards/rejected": -13.339475631713867,
"step": 940
},
{
"epoch": 2.5274431057563587,
"grad_norm": 3.527342414599843e-05,
"learning_rate": 3.609487055736439e-06,
"logits/chosen": -1.8892666101455688,
"logits/rejected": -1.6997896432876587,
"logps/chosen": -63.81671142578125,
"logps/rejected": -214.39913940429688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.233362197875977,
"rewards/margins": 22.56268882751465,
"rewards/rejected": -13.329327583312988,
"step": 945
},
{
"epoch": 2.540829986613119,
"grad_norm": 6.991498594288714e-06,
"learning_rate": 3.5778516637097892e-06,
"logits/chosen": -1.8411668539047241,
"logits/rejected": -1.6937415599822998,
"logps/chosen": -70.88134765625,
"logps/rejected": -214.8602294921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.95386791229248,
"rewards/margins": 24.302793502807617,
"rewards/rejected": -13.348925590515137,
"step": 950
},
{
"epoch": 2.5542168674698793,
"grad_norm": 8.049645657592919e-06,
"learning_rate": 3.547058834860421e-06,
"logits/chosen": -1.8594615459442139,
"logits/rejected": -1.6968889236450195,
"logps/chosen": -70.17243957519531,
"logps/rejected": -201.14242553710938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.171913146972656,
"rewards/margins": 22.592042922973633,
"rewards/rejected": -12.420127868652344,
"step": 955
},
{
"epoch": 2.56760374832664,
"grad_norm": 4.853548034589039e-06,
"learning_rate": 3.517115035001902e-06,
"logits/chosen": -1.848902940750122,
"logits/rejected": -1.678993821144104,
"logps/chosen": -67.01107788085938,
"logps/rejected": -207.88156127929688,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.421578407287598,
"rewards/margins": 23.362747192382812,
"rewards/rejected": -12.941169738769531,
"step": 960
},
{
"epoch": 2.5809906291834004,
"grad_norm": 1.5446234101545997e-05,
"learning_rate": 3.4880265516704755e-06,
"logits/chosen": -1.886223554611206,
"logits/rejected": -1.6841551065444946,
"logps/chosen": -53.77461624145508,
"logps/rejected": -209.840576171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.018083572387695,
"rewards/margins": 22.03033447265625,
"rewards/rejected": -13.012250900268555,
"step": 965
},
{
"epoch": 2.5943775100401605,
"grad_norm": 4.117569574191293e-07,
"learning_rate": 3.4597994928048157e-06,
"logits/chosen": -1.8674980401992798,
"logits/rejected": -1.7873084545135498,
"logps/chosen": -64.16064453125,
"logps/rejected": -204.54177856445312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.784576416015625,
"rewards/margins": 23.250102996826172,
"rewards/rejected": -12.465524673461914,
"step": 970
},
{
"epoch": 2.607764390896921,
"grad_norm": 0.00015623288345523179,
"learning_rate": 3.432439785463496e-06,
"logits/chosen": -1.8516432046890259,
"logits/rejected": -1.6646175384521484,
"logps/chosen": -60.28071975708008,
"logps/rejected": -211.36422729492188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.473731994628906,
"rewards/margins": 22.622058868408203,
"rewards/rejected": -13.14832878112793,
"step": 975
},
{
"epoch": 2.621151271753681,
"grad_norm": 2.727318678807933e-05,
"learning_rate": 3.405953174580438e-06,
"logits/chosen": -1.868819236755371,
"logits/rejected": -1.6735336780548096,
"logps/chosen": -68.70064544677734,
"logps/rejected": -216.250244140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.41883373260498,
"rewards/margins": 23.808761596679688,
"rewards/rejected": -13.389928817749023,
"step": 980
},
{
"epoch": 2.6345381526104417,
"grad_norm": 2.9853383239242248e-05,
"learning_rate": 3.380345221758599e-06,
"logits/chosen": -1.8542439937591553,
"logits/rejected": -1.734086036682129,
"logps/chosen": -55.98112869262695,
"logps/rejected": -208.2278289794922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.873754501342773,
"rewards/margins": 22.735363006591797,
"rewards/rejected": -12.861605644226074,
"step": 985
},
{
"epoch": 2.647925033467202,
"grad_norm": 2.1380317775765434e-05,
"learning_rate": 3.3556213041021635e-06,
"logits/chosen": -1.9123871326446533,
"logits/rejected": -1.659395456314087,
"logps/chosen": -59.75432205200195,
"logps/rejected": -213.9033203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.29840087890625,
"rewards/margins": 22.681549072265625,
"rewards/rejected": -13.383146286010742,
"step": 990
},
{
"epoch": 2.6613119143239627,
"grad_norm": 1.689990131126251e-05,
"learning_rate": 3.331786613087466e-06,
"logits/chosen": -1.8490474224090576,
"logits/rejected": -1.7181682586669922,
"logps/chosen": -73.18424224853516,
"logps/rejected": -204.5380859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.719615936279297,
"rewards/margins": 23.40756607055664,
"rewards/rejected": -12.687950134277344,
"step": 995
},
{
"epoch": 2.674698795180723,
"grad_norm": 1.4408194147108588e-05,
"learning_rate": 3.3088461534728977e-06,
"logits/chosen": -1.8690017461776733,
"logits/rejected": -1.7061388492584229,
"logps/chosen": -63.67757034301758,
"logps/rejected": -206.1077117919922,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.205856323242188,
"rewards/margins": 23.052112579345703,
"rewards/rejected": -12.846254348754883,
"step": 1000
},
{
"epoch": 2.674698795180723,
"eval_logits/chosen": -1.924623727798462,
"eval_logits/rejected": -1.679319143295288,
"eval_logps/chosen": -63.07221984863281,
"eval_logps/rejected": -205.45054626464844,
"eval_loss": 4.907800743580992e-08,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 10.049363136291504,
"eval_rewards/margins": 22.663414001464844,
"eval_rewards/rejected": -12.614049911499023,
"eval_runtime": 29.3063,
"eval_samples_per_second": 6.824,
"eval_steps_per_second": 6.824,
"step": 1000
},
{
"epoch": 2.6880856760374834,
"grad_norm": 9.276622586185113e-05,
"learning_rate": 3.2868047422480172e-06,
"logits/chosen": -1.8449962139129639,
"logits/rejected": -1.6728605031967163,
"logps/chosen": -72.02220916748047,
"logps/rejected": -206.3667449951172,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 11.057626724243164,
"rewards/margins": 23.6590576171875,
"rewards/rejected": -12.601430892944336,
"step": 1005
},
{
"epoch": 2.7014725568942435,
"grad_norm": 4.395175346871838e-05,
"learning_rate": 3.26566700762209e-06,
"logits/chosen": -1.878538727760315,
"logits/rejected": -1.684623122215271,
"logps/chosen": -60.65617752075195,
"logps/rejected": -218.4495391845703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.014341354370117,
"rewards/margins": 23.78032684326172,
"rewards/rejected": -13.765981674194336,
"step": 1010
},
{
"epoch": 2.714859437751004,
"grad_norm": 4.3231425479461905e-06,
"learning_rate": 3.2454373880522666e-06,
"logits/chosen": -1.8722236156463623,
"logits/rejected": -1.7218602895736694,
"logps/chosen": -65.2080307006836,
"logps/rejected": -204.86962890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.908885955810547,
"rewards/margins": 22.567001342773438,
"rewards/rejected": -12.658113479614258,
"step": 1015
},
{
"epoch": 2.7282463186077646,
"grad_norm": 4.5356948248809204e-05,
"learning_rate": 3.2261201313116e-06,
"logits/chosen": -1.844891905784607,
"logits/rejected": -1.7258058786392212,
"logps/chosen": -59.008201599121094,
"logps/rejected": -207.6454315185547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.589787483215332,
"rewards/margins": 23.398906707763672,
"rewards/rejected": -12.809122085571289,
"step": 1020
},
{
"epoch": 2.7416331994645247,
"grad_norm": 6.669775302725611e-06,
"learning_rate": 3.2077192935971174e-06,
"logits/chosen": -1.841321587562561,
"logits/rejected": -1.6863746643066406,
"logps/chosen": -58.30268478393555,
"logps/rejected": -206.64907836914062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.965548515319824,
"rewards/margins": 22.85080337524414,
"rewards/rejected": -12.885255813598633,
"step": 1025
},
{
"epoch": 2.755020080321285,
"grad_norm": 6.417185431928374e-06,
"learning_rate": 3.1902387386780987e-06,
"logits/chosen": -1.875192642211914,
"logits/rejected": -1.6304317712783813,
"logps/chosen": -57.858924865722656,
"logps/rejected": -208.5902557373047,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.979833602905273,
"rewards/margins": 22.941097259521484,
"rewards/rejected": -12.961263656616211,
"step": 1030
},
{
"epoch": 2.7684069611780453,
"grad_norm": 7.029405969660729e-05,
"learning_rate": 3.1736821370847745e-06,
"logits/chosen": -1.8532873392105103,
"logits/rejected": -1.7104461193084717,
"logps/chosen": -66.86785888671875,
"logps/rejected": -206.0425567626953,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.42613697052002,
"rewards/margins": 23.20665740966797,
"rewards/rejected": -12.780519485473633,
"step": 1035
},
{
"epoch": 2.781793842034806,
"grad_norm": 3.107989687123336e-05,
"learning_rate": 3.158052965337594e-06,
"logits/chosen": -1.857616662979126,
"logits/rejected": -1.6626615524291992,
"logps/chosen": -62.6285514831543,
"logps/rejected": -208.3471221923828,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.993135452270508,
"rewards/margins": 23.01392364501953,
"rewards/rejected": -13.020787239074707,
"step": 1040
},
{
"epoch": 2.7951807228915664,
"grad_norm": 1.721379931041156e-06,
"learning_rate": 3.1433545052172393e-06,
"logits/chosen": -1.863524079322815,
"logits/rejected": -1.6777336597442627,
"logps/chosen": -58.976402282714844,
"logps/rejected": -217.3468017578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.048527717590332,
"rewards/margins": 23.601850509643555,
"rewards/rejected": -13.553324699401855,
"step": 1045
},
{
"epoch": 2.8085676037483265,
"grad_norm": 0.00015129183884710073,
"learning_rate": 3.129589843075512e-06,
"logits/chosen": -1.8648707866668701,
"logits/rejected": -1.698838233947754,
"logps/chosen": -56.40584182739258,
"logps/rejected": -201.5301055908203,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.857680320739746,
"rewards/margins": 22.250385284423828,
"rewards/rejected": -12.392705917358398,
"step": 1050
},
{
"epoch": 2.821954484605087,
"grad_norm": 1.1143507435917854e-05,
"learning_rate": 3.116761869187279e-06,
"logits/chosen": -1.8589363098144531,
"logits/rejected": -1.7270303964614868,
"logps/chosen": -66.82688903808594,
"logps/rejected": -206.1546173095703,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.576805114746094,
"rewards/margins": 23.24510955810547,
"rewards/rejected": -12.668303489685059,
"step": 1055
},
{
"epoch": 2.835341365461847,
"grad_norm": 1.0546603334660176e-05,
"learning_rate": 3.1048732771435713e-06,
"logits/chosen": -1.8395435810089111,
"logits/rejected": -1.6673234701156616,
"logps/chosen": -82.46403503417969,
"logps/rejected": -201.4765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 11.403456687927246,
"rewards/margins": 23.960718154907227,
"rewards/rejected": -12.557262420654297,
"step": 1060
},
{
"epoch": 2.8487282463186077,
"grad_norm": 1.641225753701292e-05,
"learning_rate": 3.093926563285992e-06,
"logits/chosen": -1.8687680959701538,
"logits/rejected": -1.6740798950195312,
"logps/chosen": -59.4666748046875,
"logps/rejected": -203.675537109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.573221206665039,
"rewards/margins": 22.31708335876465,
"rewards/rejected": -12.743863105773926,
"step": 1065
},
{
"epoch": 2.862115127175368,
"grad_norm": 6.346489681163803e-05,
"learning_rate": 3.0839240261825406e-06,
"logits/chosen": -1.862137794494629,
"logits/rejected": -1.7026790380477905,
"logps/chosen": -74.59800720214844,
"logps/rejected": -208.82510375976562,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.843328475952148,
"rewards/margins": 23.61244010925293,
"rewards/rejected": -12.769109725952148,
"step": 1070
},
{
"epoch": 2.8755020080321287,
"grad_norm": 7.015174560365267e-06,
"learning_rate": 3.0748677661449626e-06,
"logits/chosen": -1.8922617435455322,
"logits/rejected": -1.663762092590332,
"logps/chosen": -53.6298942565918,
"logps/rejected": -209.82418823242188,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.294633865356445,
"rewards/margins": 22.25174331665039,
"rewards/rejected": -12.95710563659668,
"step": 1075
},
{
"epoch": 2.888888888888889,
"grad_norm": 9.642781151342206e-06,
"learning_rate": 3.0667596847877337e-06,
"logits/chosen": -1.8729660511016846,
"logits/rejected": -1.631821870803833,
"logps/chosen": -64.28204345703125,
"logps/rejected": -207.61941528320312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.652636528015137,
"rewards/margins": 22.619272232055664,
"rewards/rejected": -12.966634750366211,
"step": 1080
},
{
"epoch": 2.9022757697456494,
"grad_norm": 4.731034096039366e-06,
"learning_rate": 3.059601484628756e-06,
"logits/chosen": -1.864485740661621,
"logits/rejected": -1.7230615615844727,
"logps/chosen": -57.4099235534668,
"logps/rejected": -213.2287139892578,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.917817115783691,
"rewards/margins": 22.95882225036621,
"rewards/rejected": -13.041006088256836,
"step": 1085
},
{
"epoch": 2.9156626506024095,
"grad_norm": 2.3282762413145974e-05,
"learning_rate": 3.053394668731877e-06,
"logits/chosen": -1.8527206182479858,
"logits/rejected": -1.655686616897583,
"logps/chosen": -64.55480194091797,
"logps/rejected": -214.9347381591797,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.030226707458496,
"rewards/margins": 23.382362365722656,
"rewards/rejected": -13.352136611938477,
"step": 1090
},
{
"epoch": 2.92904953145917,
"grad_norm": 0.00011880494275828823,
"learning_rate": 3.0481405403912697e-06,
"logits/chosen": -1.8731340169906616,
"logits/rejected": -1.7239511013031006,
"logps/chosen": -63.4110107421875,
"logps/rejected": -210.80111694335938,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.36390495300293,
"rewards/margins": 23.33377456665039,
"rewards/rejected": -12.969869613647461,
"step": 1095
},
{
"epoch": 2.9424364123159306,
"grad_norm": 2.658485209394712e-05,
"learning_rate": 3.043840202857774e-06,
"logits/chosen": -1.851257562637329,
"logits/rejected": -1.6927006244659424,
"logps/chosen": -73.62875366210938,
"logps/rejected": -207.9374237060547,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.74313735961914,
"rewards/margins": 23.574596405029297,
"rewards/rejected": -12.831457138061523,
"step": 1100
},
{
"epoch": 2.9558232931726907,
"grad_norm": 2.298586650795187e-06,
"learning_rate": 3.0404945591072405e-06,
"logits/chosen": -1.852618932723999,
"logits/rejected": -1.7044318914413452,
"logps/chosen": -67.87403106689453,
"logps/rejected": -205.13784790039062,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.148408889770508,
"rewards/margins": 22.653907775878906,
"rewards/rejected": -12.505498886108398,
"step": 1105
},
{
"epoch": 2.969210174029451,
"grad_norm": 1.3922598327553715e-06,
"learning_rate": 3.0381043116509197e-06,
"logits/chosen": -1.8768529891967773,
"logits/rejected": -1.7174484729766846,
"logps/chosen": -71.07394409179688,
"logps/rejected": -204.34207153320312,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 10.052443504333496,
"rewards/margins": 22.617359161376953,
"rewards/rejected": -12.564915657043457,
"step": 1110
},
{
"epoch": 2.9825970548862113,
"grad_norm": 3.243790729356988e-07,
"learning_rate": 3.0366699623879565e-06,
"logits/chosen": -1.8698358535766602,
"logits/rejected": -1.6645195484161377,
"logps/chosen": -53.6240348815918,
"logps/rejected": -214.02001953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": 9.68481159210205,
"rewards/margins": 23.085359573364258,
"rewards/rejected": -13.400548934936523,
"step": 1115
},
{
"epoch": 2.9933065595716197,
"eval_logits/chosen": -1.9241392612457275,
"eval_logits/rejected": -1.6729750633239746,
"eval_logps/chosen": -63.028377532958984,
"eval_logps/rejected": -206.25421142578125,
"eval_loss": 4.4528654541409196e-08,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": 10.053747177124023,
"eval_rewards/margins": 22.7481632232666,
"eval_rewards/rejected": -12.694416046142578,
"eval_runtime": 29.0568,
"eval_samples_per_second": 6.883,
"eval_steps_per_second": 6.883,
"step": 1119
}
],
"logging_steps": 5,
"max_steps": 1119,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}