Fubukii's picture
Upload folder using huggingface_hub
6addcb9 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9997510580034852,
"eval_steps": 500,
"global_step": 3012,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.003319226620197494,
"grad_norm": 2.494830846786499,
"learning_rate": 9.9667994687915e-06,
"logits/chosen": -27.511184692382812,
"logits/rejected": -28.262775421142578,
"logps/chosen": -244.8615264892578,
"logps/rejected": -235.1686248779297,
"loss": 0.266,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 1.5318301916122437,
"rewards/margins": 1.4668998718261719,
"rewards/rejected": 0.06493023782968521,
"step": 10
},
{
"epoch": 0.006638453240394988,
"grad_norm": 0.7127860188484192,
"learning_rate": 9.933598937583003e-06,
"logits/chosen": -31.031789779663086,
"logits/rejected": -31.7587890625,
"logps/chosen": -198.0988311767578,
"logps/rejected": -194.5559539794922,
"loss": 0.0449,
"rewards/accuracies": 1.0,
"rewards/chosen": 3.054797649383545,
"rewards/margins": 3.3064472675323486,
"rewards/rejected": -0.2516496777534485,
"step": 20
},
{
"epoch": 0.009957679860592481,
"grad_norm": 0.22338563203811646,
"learning_rate": 9.900398406374503e-06,
"logits/chosen": -30.917648315429688,
"logits/rejected": -31.401653289794922,
"logps/chosen": -230.81179809570312,
"logps/rejected": -231.5104217529297,
"loss": 0.015,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.695403575897217,
"rewards/margins": 4.529758453369141,
"rewards/rejected": -1.834355115890503,
"step": 30
},
{
"epoch": 0.013276906480789975,
"grad_norm": 0.11917012184858322,
"learning_rate": 9.867197875166004e-06,
"logits/chosen": -32.53432083129883,
"logits/rejected": -33.02833938598633,
"logps/chosen": -243.38931274414062,
"logps/rejected": -249.16824340820312,
"loss": 0.0044,
"rewards/accuracies": 1.0,
"rewards/chosen": 2.072178363800049,
"rewards/margins": 6.1943559646606445,
"rewards/rejected": -4.122177600860596,
"step": 40
},
{
"epoch": 0.01659613310098747,
"grad_norm": 0.03350254148244858,
"learning_rate": 9.833997343957504e-06,
"logits/chosen": -33.12944030761719,
"logits/rejected": -33.550689697265625,
"logps/chosen": -231.4219970703125,
"logps/rejected": -241.4237823486328,
"loss": 0.0017,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.7218177318572998,
"rewards/margins": 7.393080711364746,
"rewards/rejected": -5.671263217926025,
"step": 50
},
{
"epoch": 0.019915359721184963,
"grad_norm": 0.04106535390019417,
"learning_rate": 9.800796812749004e-06,
"logits/chosen": -33.08533477783203,
"logits/rejected": -33.47816848754883,
"logps/chosen": -215.3968505859375,
"logps/rejected": -229.9862518310547,
"loss": 0.0007,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.6401796340942383,
"rewards/margins": 8.768369674682617,
"rewards/rejected": -7.128190517425537,
"step": 60
},
{
"epoch": 0.02323458634138246,
"grad_norm": 0.015592047944664955,
"learning_rate": 9.767596281540506e-06,
"logits/chosen": -33.04099655151367,
"logits/rejected": -33.49821853637695,
"logps/chosen": -261.6057434082031,
"logps/rejected": -281.25140380859375,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.20642319321632385,
"rewards/margins": 10.172657012939453,
"rewards/rejected": -9.966233253479004,
"step": 70
},
{
"epoch": 0.02655381296157995,
"grad_norm": 0.01825164072215557,
"learning_rate": 9.734395750332006e-06,
"logits/chosen": -33.06761169433594,
"logits/rejected": -33.45521545410156,
"logps/chosen": -222.248779296875,
"logps/rejected": -241.6016082763672,
"loss": 0.0004,
"rewards/accuracies": 1.0,
"rewards/chosen": 1.2139157056808472,
"rewards/margins": 10.25539493560791,
"rewards/rejected": -9.04148006439209,
"step": 80
},
{
"epoch": 0.029873039581777446,
"grad_norm": 0.009622551500797272,
"learning_rate": 9.701195219123508e-06,
"logits/chosen": -34.399051666259766,
"logits/rejected": -34.781593322753906,
"logps/chosen": -212.21316528320312,
"logps/rejected": -235.27783203125,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.9530667066574097,
"rewards/margins": 11.376951217651367,
"rewards/rejected": -10.423883438110352,
"step": 90
},
{
"epoch": 0.03319226620197494,
"grad_norm": 0.0019461100455373526,
"learning_rate": 9.667994687915008e-06,
"logits/chosen": -33.364990234375,
"logits/rejected": -33.812992095947266,
"logps/chosen": -249.4156494140625,
"logps/rejected": -274.4363098144531,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.25879842042922974,
"rewards/margins": 11.992888450622559,
"rewards/rejected": -11.734089851379395,
"step": 100
},
{
"epoch": 0.036511492822172434,
"grad_norm": 0.007654547691345215,
"learning_rate": 9.634794156706508e-06,
"logits/chosen": -34.42128372192383,
"logits/rejected": -34.87459182739258,
"logps/chosen": -234.20126342773438,
"logps/rejected": -258.58172607421875,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.7224393486976624,
"rewards/margins": 11.716227531433105,
"rewards/rejected": -10.99378776550293,
"step": 110
},
{
"epoch": 0.039830719442369926,
"grad_norm": 0.003787972964346409,
"learning_rate": 9.60159362549801e-06,
"logits/chosen": -34.04184341430664,
"logits/rejected": -34.68623733520508,
"logps/chosen": -235.21005249023438,
"logps/rejected": -263.50030517578125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.45280832052230835,
"rewards/margins": 12.783775329589844,
"rewards/rejected": -13.236584663391113,
"step": 120
},
{
"epoch": 0.043149946062567425,
"grad_norm": 0.0027999032754451036,
"learning_rate": 9.56839309428951e-06,
"logits/chosen": -34.30641174316406,
"logits/rejected": -34.804931640625,
"logps/chosen": -239.53280639648438,
"logps/rejected": -269.2521057128906,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6962872743606567,
"rewards/margins": 13.20808219909668,
"rewards/rejected": -13.90437126159668,
"step": 130
},
{
"epoch": 0.04646917268276492,
"grad_norm": 0.004512485582381487,
"learning_rate": 9.535192563081011e-06,
"logits/chosen": -34.37498474121094,
"logits/rejected": -34.75019454956055,
"logps/chosen": -230.13546752929688,
"logps/rejected": -257.3545837402344,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.05627549812197685,
"rewards/margins": 12.560346603393555,
"rewards/rejected": -12.616622924804688,
"step": 140
},
{
"epoch": 0.04978839930296241,
"grad_norm": 0.00021050537179689854,
"learning_rate": 9.501992031872511e-06,
"logits/chosen": -34.44464111328125,
"logits/rejected": -34.87554931640625,
"logps/chosen": -255.37686157226562,
"logps/rejected": -287.8848571777344,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.9349029064178467,
"rewards/margins": 14.170918464660645,
"rewards/rejected": -16.10582160949707,
"step": 150
},
{
"epoch": 0.0531076259231599,
"grad_norm": 0.007320842240005732,
"learning_rate": 9.468791500664011e-06,
"logits/chosen": -35.74467086791992,
"logits/rejected": -36.10135269165039,
"logps/chosen": -210.03109741210938,
"logps/rejected": -240.8317108154297,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17547592520713806,
"rewards/margins": 13.636068344116211,
"rewards/rejected": -13.811543464660645,
"step": 160
},
{
"epoch": 0.0564268525433574,
"grad_norm": 0.0010633851634338498,
"learning_rate": 9.435590969455513e-06,
"logits/chosen": -34.923927307128906,
"logits/rejected": -35.52933883666992,
"logps/chosen": -231.2039794921875,
"logps/rejected": -261.8594665527344,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.06495578587055206,
"rewards/margins": 13.507354736328125,
"rewards/rejected": -13.572309494018555,
"step": 170
},
{
"epoch": 0.05974607916355489,
"grad_norm": 0.006778092123568058,
"learning_rate": 9.402390438247013e-06,
"logits/chosen": -33.77501678466797,
"logits/rejected": -34.322486877441406,
"logps/chosen": -252.57046508789062,
"logps/rejected": -286.8854064941406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6652179956436157,
"rewards/margins": 14.697219848632812,
"rewards/rejected": -16.362438201904297,
"step": 180
},
{
"epoch": 0.06306530578375238,
"grad_norm": 0.009588481858372688,
"learning_rate": 9.369189907038513e-06,
"logits/chosen": -33.23898696899414,
"logits/rejected": -33.55398178100586,
"logps/chosen": -249.4516143798828,
"logps/rejected": -283.144287109375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.155587911605835,
"rewards/margins": 14.480944633483887,
"rewards/rejected": -15.636533737182617,
"step": 190
},
{
"epoch": 0.06638453240394988,
"grad_norm": 0.0020435138139873743,
"learning_rate": 9.335989375830013e-06,
"logits/chosen": -34.49266815185547,
"logits/rejected": -35.19890213012695,
"logps/chosen": -260.21063232421875,
"logps/rejected": -294.55059814453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3676701784133911,
"rewards/margins": 14.618782043457031,
"rewards/rejected": -15.986452102661133,
"step": 200
},
{
"epoch": 0.06970375902414737,
"grad_norm": 0.007681610994040966,
"learning_rate": 9.302788844621515e-06,
"logits/chosen": -35.03020095825195,
"logits/rejected": -35.49299621582031,
"logps/chosen": -238.98388671875,
"logps/rejected": -273.298583984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.093301773071289,
"rewards/margins": 14.62084674835205,
"rewards/rejected": -15.714147567749023,
"step": 210
},
{
"epoch": 0.07302298564434487,
"grad_norm": 0.0033791419118642807,
"learning_rate": 9.269588313413015e-06,
"logits/chosen": -35.38592529296875,
"logits/rejected": -36.00849533081055,
"logps/chosen": -260.4759826660156,
"logps/rejected": -298.17706298828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7291450500488281,
"rewards/margins": 15.51725959777832,
"rewards/rejected": -17.246402740478516,
"step": 220
},
{
"epoch": 0.07634221226454237,
"grad_norm": 0.0003220026264898479,
"learning_rate": 9.236387782204516e-06,
"logits/chosen": -34.74597930908203,
"logits/rejected": -35.35438537597656,
"logps/chosen": -247.0140838623047,
"logps/rejected": -284.0188903808594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3175166845321655,
"rewards/margins": 15.613149642944336,
"rewards/rejected": -16.930665969848633,
"step": 230
},
{
"epoch": 0.07966143888473985,
"grad_norm": 0.00033980957232415676,
"learning_rate": 9.203187250996016e-06,
"logits/chosen": -34.7844352722168,
"logits/rejected": -35.204917907714844,
"logps/chosen": -261.2896728515625,
"logps/rejected": -298.12152099609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.142932415008545,
"rewards/margins": 15.416728019714355,
"rewards/rejected": -17.559659957885742,
"step": 240
},
{
"epoch": 0.08298066550493735,
"grad_norm": 0.006538284942507744,
"learning_rate": 9.169986719787516e-06,
"logits/chosen": -35.57487487792969,
"logits/rejected": -36.020084381103516,
"logps/chosen": -253.6564483642578,
"logps/rejected": -289.6921691894531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6668405532836914,
"rewards/margins": 15.152114868164062,
"rewards/rejected": -16.818954467773438,
"step": 250
},
{
"epoch": 0.08629989212513485,
"grad_norm": 0.0009295985219068825,
"learning_rate": 9.136786188579018e-06,
"logits/chosen": -35.48679733276367,
"logits/rejected": -36.16688919067383,
"logps/chosen": -223.3815155029297,
"logps/rejected": -260.7580261230469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.45010191202163696,
"rewards/margins": 15.585649490356445,
"rewards/rejected": -16.035751342773438,
"step": 260
},
{
"epoch": 0.08961911874533234,
"grad_norm": 0.00021262650261633098,
"learning_rate": 9.103585657370518e-06,
"logits/chosen": -33.49461364746094,
"logits/rejected": -34.041141510009766,
"logps/chosen": -221.56228637695312,
"logps/rejected": -258.98822021484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.5517559051513672,
"rewards/margins": 15.583239555358887,
"rewards/rejected": -16.134998321533203,
"step": 270
},
{
"epoch": 0.09293834536552983,
"grad_norm": 0.0004733486275654286,
"learning_rate": 9.07038512616202e-06,
"logits/chosen": -33.337162017822266,
"logits/rejected": -33.851051330566406,
"logps/chosen": -244.8498077392578,
"logps/rejected": -283.07763671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3852789402008057,
"rewards/margins": 15.907007217407227,
"rewards/rejected": -17.292285919189453,
"step": 280
},
{
"epoch": 0.09625757198572732,
"grad_norm": 0.00015478464774787426,
"learning_rate": 9.03718459495352e-06,
"logits/chosen": -35.541263580322266,
"logits/rejected": -36.11621856689453,
"logps/chosen": -232.34432983398438,
"logps/rejected": -272.0796813964844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8322229385375977,
"rewards/margins": 16.295772552490234,
"rewards/rejected": -17.12799644470215,
"step": 290
},
{
"epoch": 0.09957679860592482,
"grad_norm": 0.002806061180308461,
"learning_rate": 9.00398406374502e-06,
"logits/chosen": -34.066505432128906,
"logits/rejected": -34.60965347290039,
"logps/chosen": -268.8088684082031,
"logps/rejected": -312.8736877441406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.632227659225464,
"rewards/margins": 17.530643463134766,
"rewards/rejected": -20.16286849975586,
"step": 300
},
{
"epoch": 0.10289602522612232,
"grad_norm": 0.0005554874078370631,
"learning_rate": 8.970783532536521e-06,
"logits/chosen": -34.408775329589844,
"logits/rejected": -35.35196304321289,
"logps/chosen": -229.3350067138672,
"logps/rejected": -270.11822509765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.1011072397232056,
"rewards/margins": 16.528867721557617,
"rewards/rejected": -17.629976272583008,
"step": 310
},
{
"epoch": 0.1062152518463198,
"grad_norm": 0.005228464491665363,
"learning_rate": 8.937583001328021e-06,
"logits/chosen": -34.59056854248047,
"logits/rejected": -35.54304504394531,
"logps/chosen": -223.6347198486328,
"logps/rejected": -263.6668395996094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.2593634128570557,
"rewards/margins": 16.28620147705078,
"rewards/rejected": -17.545564651489258,
"step": 320
},
{
"epoch": 0.1095344784665173,
"grad_norm": 0.0006935601704753935,
"learning_rate": 8.904382470119523e-06,
"logits/chosen": -35.739097595214844,
"logits/rejected": -36.549564361572266,
"logps/chosen": -238.09091186523438,
"logps/rejected": -280.36492919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.1441192626953125,
"rewards/margins": 16.90723419189453,
"rewards/rejected": -19.05135154724121,
"step": 330
},
{
"epoch": 0.1128537050867148,
"grad_norm": 0.00026731210527941585,
"learning_rate": 8.871181938911023e-06,
"logits/chosen": -33.79966735839844,
"logits/rejected": -34.6407356262207,
"logps/chosen": -216.0535888671875,
"logps/rejected": -253.9859619140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.6571240425109863,
"rewards/margins": 15.740079879760742,
"rewards/rejected": -16.39720344543457,
"step": 340
},
{
"epoch": 0.11617293170691229,
"grad_norm": 3.3017222449416295e-05,
"learning_rate": 8.837981407702523e-06,
"logits/chosen": -34.348304748535156,
"logits/rejected": -35.16481399536133,
"logps/chosen": -238.8940887451172,
"logps/rejected": -280.3075866699219,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.8426218032836914,
"rewards/margins": 16.77206039428711,
"rewards/rejected": -18.614681243896484,
"step": 350
},
{
"epoch": 0.11949215832710978,
"grad_norm": 0.0004305043548811227,
"learning_rate": 8.804780876494025e-06,
"logits/chosen": -35.531944274902344,
"logits/rejected": -36.35076904296875,
"logps/chosen": -238.10140991210938,
"logps/rejected": -280.59619140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7624365091323853,
"rewards/margins": 17.082622528076172,
"rewards/rejected": -18.84505844116211,
"step": 360
},
{
"epoch": 0.12281138494730728,
"grad_norm": 4.115190313314088e-05,
"learning_rate": 8.771580345285525e-06,
"logits/chosen": -34.63116455078125,
"logits/rejected": -35.59693908691406,
"logps/chosen": -265.68389892578125,
"logps/rejected": -310.3666076660156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.659745693206787,
"rewards/margins": 17.709857940673828,
"rewards/rejected": -20.36960220336914,
"step": 370
},
{
"epoch": 0.12613061156750477,
"grad_norm": 0.0020732053089886904,
"learning_rate": 8.738379814077027e-06,
"logits/chosen": -35.7321662902832,
"logits/rejected": -36.6037483215332,
"logps/chosen": -274.5724792480469,
"logps/rejected": -320.93438720703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.224485397338867,
"rewards/margins": 18.24944496154785,
"rewards/rejected": -21.47393035888672,
"step": 380
},
{
"epoch": 0.12944983818770225,
"grad_norm": 0.0006749048479832709,
"learning_rate": 8.705179282868527e-06,
"logits/chosen": -34.394935607910156,
"logits/rejected": -35.65528106689453,
"logps/chosen": -221.21981811523438,
"logps/rejected": -263.59820556640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.179241418838501,
"rewards/margins": 17.081579208374023,
"rewards/rejected": -18.260822296142578,
"step": 390
},
{
"epoch": 0.13276906480789977,
"grad_norm": 0.00046231126179918647,
"learning_rate": 8.671978751660027e-06,
"logits/chosen": -35.219017028808594,
"logits/rejected": -35.91897964477539,
"logps/chosen": -223.1756591796875,
"logps/rejected": -268.28399658203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4395145177841187,
"rewards/margins": 17.961841583251953,
"rewards/rejected": -19.401355743408203,
"step": 400
},
{
"epoch": 0.13608829142809725,
"grad_norm": 0.002402970800176263,
"learning_rate": 8.638778220451528e-06,
"logits/chosen": -36.17115783691406,
"logits/rejected": -37.01081085205078,
"logps/chosen": -251.10897827148438,
"logps/rejected": -297.64837646484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.4288759231567383,
"rewards/margins": 18.359947204589844,
"rewards/rejected": -20.7888240814209,
"step": 410
},
{
"epoch": 0.13940751804829474,
"grad_norm": 0.0008737234747968614,
"learning_rate": 8.605577689243028e-06,
"logits/chosen": -35.6660270690918,
"logits/rejected": -36.46641159057617,
"logps/chosen": -243.93917846679688,
"logps/rejected": -289.4454345703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.152601718902588,
"rewards/margins": 18.12082862854004,
"rewards/rejected": -20.27342987060547,
"step": 420
},
{
"epoch": 0.14272674466849225,
"grad_norm": 0.0008533377549611032,
"learning_rate": 8.57237715803453e-06,
"logits/chosen": -35.49746322631836,
"logits/rejected": -36.489322662353516,
"logps/chosen": -232.93820190429688,
"logps/rejected": -277.65216064453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.035013198852539,
"rewards/margins": 17.73954963684082,
"rewards/rejected": -19.77456283569336,
"step": 430
},
{
"epoch": 0.14604597128868974,
"grad_norm": 0.0005299286567606032,
"learning_rate": 8.53917662682603e-06,
"logits/chosen": -35.89002227783203,
"logits/rejected": -37.11347198486328,
"logps/chosen": -244.32278442382812,
"logps/rejected": -290.68408203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.9033141136169434,
"rewards/margins": 18.262744903564453,
"rewards/rejected": -21.166057586669922,
"step": 440
},
{
"epoch": 0.14936519790888722,
"grad_norm": 0.0007316975970752537,
"learning_rate": 8.50597609561753e-06,
"logits/chosen": -35.58525848388672,
"logits/rejected": -36.55177688598633,
"logps/chosen": -218.49252319335938,
"logps/rejected": -263.3166809082031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6088759899139404,
"rewards/margins": 17.73054313659668,
"rewards/rejected": -19.339420318603516,
"step": 450
},
{
"epoch": 0.15268442452908473,
"grad_norm": 0.0001297222770517692,
"learning_rate": 8.472775564409032e-06,
"logits/chosen": -35.233123779296875,
"logits/rejected": -36.260986328125,
"logps/chosen": -235.96353149414062,
"logps/rejected": -281.82904052734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.4060370922088623,
"rewards/margins": 18.074405670166016,
"rewards/rejected": -20.480443954467773,
"step": 460
},
{
"epoch": 0.15600365114928222,
"grad_norm": 0.00011668611841741949,
"learning_rate": 8.439575033200532e-06,
"logits/chosen": -36.725379943847656,
"logits/rejected": -37.734004974365234,
"logps/chosen": -261.49835205078125,
"logps/rejected": -308.49957275390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.4732394218444824,
"rewards/margins": 18.485958099365234,
"rewards/rejected": -20.959197998046875,
"step": 470
},
{
"epoch": 0.1593228777694797,
"grad_norm": 0.00016372502432204783,
"learning_rate": 8.406374501992033e-06,
"logits/chosen": -35.07146072387695,
"logits/rejected": -35.87404251098633,
"logps/chosen": -246.40573120117188,
"logps/rejected": -293.628662109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.4306976795196533,
"rewards/margins": 18.557546615600586,
"rewards/rejected": -20.988243103027344,
"step": 480
},
{
"epoch": 0.16264210438967722,
"grad_norm": 0.008033442310988903,
"learning_rate": 8.373173970783533e-06,
"logits/chosen": -35.59102249145508,
"logits/rejected": -36.70330047607422,
"logps/chosen": -233.5152587890625,
"logps/rejected": -280.5353088378906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.2914183139801025,
"rewards/margins": 18.466495513916016,
"rewards/rejected": -20.757911682128906,
"step": 490
},
{
"epoch": 0.1659613310098747,
"grad_norm": 0.0005811351002193987,
"learning_rate": 8.339973439575035e-06,
"logits/chosen": -34.38259506225586,
"logits/rejected": -35.335872650146484,
"logps/chosen": -245.3876953125,
"logps/rejected": -292.680419921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.7595839500427246,
"rewards/margins": 18.590240478515625,
"rewards/rejected": -21.349822998046875,
"step": 500
},
{
"epoch": 0.1692805576300722,
"grad_norm": 0.00011935765360249206,
"learning_rate": 8.306772908366535e-06,
"logits/chosen": -35.616981506347656,
"logits/rejected": -36.57094955444336,
"logps/chosen": -245.680908203125,
"logps/rejected": -293.33575439453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.987445592880249,
"rewards/margins": 18.696874618530273,
"rewards/rejected": -20.68431854248047,
"step": 510
},
{
"epoch": 0.1725997842502697,
"grad_norm": 0.00013885533553548157,
"learning_rate": 8.273572377158035e-06,
"logits/chosen": -35.12391662597656,
"logits/rejected": -35.98273468017578,
"logps/chosen": -233.94204711914062,
"logps/rejected": -281.42010498046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7768646478652954,
"rewards/margins": 18.548133850097656,
"rewards/rejected": -20.32499885559082,
"step": 520
},
{
"epoch": 0.17591901087046719,
"grad_norm": 0.00012209195119794458,
"learning_rate": 8.240371845949537e-06,
"logits/chosen": -35.81105041503906,
"logits/rejected": -37.02412033081055,
"logps/chosen": -266.5361022949219,
"logps/rejected": -317.65216064453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.5481743812561035,
"rewards/margins": 19.596721649169922,
"rewards/rejected": -23.144895553588867,
"step": 530
},
{
"epoch": 0.17923823749066467,
"grad_norm": 3.902066600858234e-05,
"learning_rate": 8.207171314741037e-06,
"logits/chosen": -36.01176071166992,
"logits/rejected": -36.81584930419922,
"logps/chosen": -240.7890625,
"logps/rejected": -292.3703308105469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.82381534576416,
"rewards/margins": 19.824779510498047,
"rewards/rejected": -22.648595809936523,
"step": 540
},
{
"epoch": 0.18255746411086216,
"grad_norm": 0.0014665070921182632,
"learning_rate": 8.173970783532539e-06,
"logits/chosen": -34.90106964111328,
"logits/rejected": -35.94670867919922,
"logps/chosen": -255.3992919921875,
"logps/rejected": -305.7498779296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.2722907066345215,
"rewards/margins": 19.493640899658203,
"rewards/rejected": -22.765932083129883,
"step": 550
},
{
"epoch": 0.18587669073105967,
"grad_norm": 0.0001265254250029102,
"learning_rate": 8.140770252324039e-06,
"logits/chosen": -36.034767150878906,
"logits/rejected": -37.131126403808594,
"logps/chosen": -241.76644897460938,
"logps/rejected": -290.7240905761719,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.036072254180908,
"rewards/margins": 19.04810905456543,
"rewards/rejected": -21.08418083190918,
"step": 560
},
{
"epoch": 0.18919591735125715,
"grad_norm": 0.00034473679261282086,
"learning_rate": 8.107569721115539e-06,
"logits/chosen": -35.518226623535156,
"logits/rejected": -36.253692626953125,
"logps/chosen": -231.61618041992188,
"logps/rejected": -282.1851501464844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.092636823654175,
"rewards/margins": 19.56393814086914,
"rewards/rejected": -21.656574249267578,
"step": 570
},
{
"epoch": 0.19251514397145464,
"grad_norm": 0.00030375979258678854,
"learning_rate": 8.074369189907039e-06,
"logits/chosen": -35.77162551879883,
"logits/rejected": -36.470115661621094,
"logps/chosen": -206.6995391845703,
"logps/rejected": -254.5850830078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6058409214019775,
"rewards/margins": 18.82034683227539,
"rewards/rejected": -20.42618751525879,
"step": 580
},
{
"epoch": 0.19583437059165215,
"grad_norm": 1.3439960639516357e-05,
"learning_rate": 8.041168658698539e-06,
"logits/chosen": -34.774444580078125,
"logits/rejected": -35.688079833984375,
"logps/chosen": -259.68499755859375,
"logps/rejected": -309.86865234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.393587827682495,
"rewards/margins": 19.45305061340332,
"rewards/rejected": -22.846635818481445,
"step": 590
},
{
"epoch": 0.19915359721184964,
"grad_norm": 4.3687683501048014e-05,
"learning_rate": 8.00796812749004e-06,
"logits/chosen": -34.41751480102539,
"logits/rejected": -35.5643310546875,
"logps/chosen": -239.58407592773438,
"logps/rejected": -288.7073974609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.1479239463806152,
"rewards/margins": 19.129615783691406,
"rewards/rejected": -21.277538299560547,
"step": 600
},
{
"epoch": 0.20247282383204712,
"grad_norm": 1.1106268175353762e-05,
"learning_rate": 7.97476759628154e-06,
"logits/chosen": -36.191566467285156,
"logits/rejected": -37.139347076416016,
"logps/chosen": -255.52566528320312,
"logps/rejected": -305.6492614746094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.047039031982422,
"rewards/margins": 19.359500885009766,
"rewards/rejected": -22.406539916992188,
"step": 610
},
{
"epoch": 0.20579205045224463,
"grad_norm": 7.13270710548386e-05,
"learning_rate": 7.941567065073042e-06,
"logits/chosen": -34.59242248535156,
"logits/rejected": -35.580257415771484,
"logps/chosen": -251.13967895507812,
"logps/rejected": -303.52215576171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.805518865585327,
"rewards/margins": 20.101978302001953,
"rewards/rejected": -22.907493591308594,
"step": 620
},
{
"epoch": 0.20911127707244212,
"grad_norm": 8.512644126312807e-05,
"learning_rate": 7.908366533864542e-06,
"logits/chosen": -35.026451110839844,
"logits/rejected": -36.46593475341797,
"logps/chosen": -235.6885528564453,
"logps/rejected": -285.78985595703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.588026762008667,
"rewards/margins": 19.415477752685547,
"rewards/rejected": -22.00350570678711,
"step": 630
},
{
"epoch": 0.2124305036926396,
"grad_norm": 0.0017007539281621575,
"learning_rate": 7.875166002656042e-06,
"logits/chosen": -36.41635513305664,
"logits/rejected": -37.88849639892578,
"logps/chosen": -223.83935546875,
"logps/rejected": -275.52423095703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.7281689643859863,
"rewards/margins": 19.8160457611084,
"rewards/rejected": -22.544214248657227,
"step": 640
},
{
"epoch": 0.21574973031283712,
"grad_norm": 5.1219332817709073e-05,
"learning_rate": 7.841965471447544e-06,
"logits/chosen": -36.743431091308594,
"logits/rejected": -37.54664611816406,
"logps/chosen": -223.3494110107422,
"logps/rejected": -274.8848571777344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.607266902923584,
"rewards/margins": 19.915449142456055,
"rewards/rejected": -22.522716522216797,
"step": 650
},
{
"epoch": 0.2190689569330346,
"grad_norm": 0.0008978871628642082,
"learning_rate": 7.808764940239044e-06,
"logits/chosen": -35.73522186279297,
"logits/rejected": -36.919593811035156,
"logps/chosen": -249.9674835205078,
"logps/rejected": -301.19256591796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.4148049354553223,
"rewards/margins": 19.588640213012695,
"rewards/rejected": -23.00344467163086,
"step": 660
},
{
"epoch": 0.2223881835532321,
"grad_norm": 0.0005557505646720529,
"learning_rate": 7.775564409030545e-06,
"logits/chosen": -35.63310241699219,
"logits/rejected": -36.968055725097656,
"logps/chosen": -234.2410125732422,
"logps/rejected": -285.2879333496094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.3587982654571533,
"rewards/margins": 19.75788688659668,
"rewards/rejected": -23.11668586730957,
"step": 670
},
{
"epoch": 0.2257074101734296,
"grad_norm": 0.0008148573688231409,
"learning_rate": 7.742363877822045e-06,
"logits/chosen": -35.244285583496094,
"logits/rejected": -36.04697799682617,
"logps/chosen": -239.9448699951172,
"logps/rejected": -293.1563415527344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.6865572929382324,
"rewards/margins": 20.494766235351562,
"rewards/rejected": -23.18132209777832,
"step": 680
},
{
"epoch": 0.2290266367936271,
"grad_norm": 0.0003090895479544997,
"learning_rate": 7.709163346613547e-06,
"logits/chosen": -35.87933349609375,
"logits/rejected": -37.27144241333008,
"logps/chosen": -245.03125,
"logps/rejected": -296.410400390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.099365234375,
"rewards/margins": 19.793514251708984,
"rewards/rejected": -22.892879486083984,
"step": 690
},
{
"epoch": 0.23234586341382457,
"grad_norm": 0.0007792682736180723,
"learning_rate": 7.675962815405047e-06,
"logits/chosen": -35.90936279296875,
"logits/rejected": -37.2296142578125,
"logps/chosen": -244.5153045654297,
"logps/rejected": -296.6192321777344,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.0061705112457275,
"rewards/margins": 19.91374397277832,
"rewards/rejected": -22.91991424560547,
"step": 700
},
{
"epoch": 0.23566509003402208,
"grad_norm": 0.0003932374238502234,
"learning_rate": 7.642762284196547e-06,
"logits/chosen": -35.35771942138672,
"logits/rejected": -36.759765625,
"logps/chosen": -249.65048217773438,
"logps/rejected": -299.1029357910156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.5528416633605957,
"rewards/margins": 19.263607025146484,
"rewards/rejected": -22.816450119018555,
"step": 710
},
{
"epoch": 0.23898431665421957,
"grad_norm": 5.749016418121755e-05,
"learning_rate": 7.609561752988048e-06,
"logits/chosen": -35.59196090698242,
"logits/rejected": -37.17299270629883,
"logps/chosen": -248.39657592773438,
"logps/rejected": -299.969482421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.843963146209717,
"rewards/margins": 19.887401580810547,
"rewards/rejected": -23.73136329650879,
"step": 720
},
{
"epoch": 0.24230354327441705,
"grad_norm": 1.9144599718856625e-05,
"learning_rate": 7.576361221779549e-06,
"logits/chosen": -34.93134307861328,
"logits/rejected": -36.642024993896484,
"logps/chosen": -249.6949462890625,
"logps/rejected": -304.68865966796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.464253902435303,
"rewards/margins": 21.0100154876709,
"rewards/rejected": -25.47426986694336,
"step": 730
},
{
"epoch": 0.24562276989461457,
"grad_norm": 1.135500042437343e-05,
"learning_rate": 7.54316069057105e-06,
"logits/chosen": -36.011444091796875,
"logits/rejected": -37.811126708984375,
"logps/chosen": -261.965576171875,
"logps/rejected": -317.7340087890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.150547027587891,
"rewards/margins": 21.09018325805664,
"rewards/rejected": -26.2407283782959,
"step": 740
},
{
"epoch": 0.24894199651481205,
"grad_norm": 0.0003412498044781387,
"learning_rate": 7.5099601593625505e-06,
"logits/chosen": -35.549930572509766,
"logits/rejected": -36.97046661376953,
"logps/chosen": -260.5018615722656,
"logps/rejected": -313.27996826171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.216989040374756,
"rewards/margins": 20.170087814331055,
"rewards/rejected": -25.387075424194336,
"step": 750
},
{
"epoch": 0.25226122313500954,
"grad_norm": 0.0001097567001124844,
"learning_rate": 7.476759628154051e-06,
"logits/chosen": -35.34148406982422,
"logits/rejected": -37.208404541015625,
"logps/chosen": -249.30068969726562,
"logps/rejected": -302.40704345703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.520068645477295,
"rewards/margins": 20.316801071166992,
"rewards/rejected": -24.836868286132812,
"step": 760
},
{
"epoch": 0.25558044975520705,
"grad_norm": 0.0003229718713555485,
"learning_rate": 7.443559096945551e-06,
"logits/chosen": -36.1650390625,
"logits/rejected": -38.04664611816406,
"logps/chosen": -240.16683959960938,
"logps/rejected": -290.6697998046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.272226810455322,
"rewards/margins": 19.5815372467041,
"rewards/rejected": -23.853763580322266,
"step": 770
},
{
"epoch": 0.2588996763754045,
"grad_norm": 0.00028919236501678824,
"learning_rate": 7.410358565737052e-06,
"logits/chosen": -35.1163444519043,
"logits/rejected": -37.21973419189453,
"logps/chosen": -280.9646911621094,
"logps/rejected": -339.1036682128906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.527701377868652,
"rewards/margins": 21.770851135253906,
"rewards/rejected": -28.29854965209961,
"step": 780
},
{
"epoch": 0.262218902995602,
"grad_norm": 1.0552365665716934e-06,
"learning_rate": 7.377158034528553e-06,
"logits/chosen": -36.21448516845703,
"logits/rejected": -37.57536315917969,
"logps/chosen": -268.54876708984375,
"logps/rejected": -326.67742919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.952622890472412,
"rewards/margins": 21.72669792175293,
"rewards/rejected": -27.6793212890625,
"step": 790
},
{
"epoch": 0.26553812961579953,
"grad_norm": 0.002526302356272936,
"learning_rate": 7.343957503320054e-06,
"logits/chosen": -35.75117492675781,
"logits/rejected": -37.249168395996094,
"logps/chosen": -255.9884796142578,
"logps/rejected": -310.1398010253906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.9749069213867188,
"rewards/margins": 20.556264877319336,
"rewards/rejected": -24.531173706054688,
"step": 800
},
{
"epoch": 0.268857356235997,
"grad_norm": 6.492368993349373e-05,
"learning_rate": 7.310756972111555e-06,
"logits/chosen": -37.125267028808594,
"logits/rejected": -38.65117263793945,
"logps/chosen": -247.0055389404297,
"logps/rejected": -303.65435791015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.017395973205566,
"rewards/margins": 21.206653594970703,
"rewards/rejected": -26.224048614501953,
"step": 810
},
{
"epoch": 0.2721765828561945,
"grad_norm": 0.00018094113329425454,
"learning_rate": 7.277556440903056e-06,
"logits/chosen": -36.480506896972656,
"logits/rejected": -38.204551696777344,
"logps/chosen": -259.48321533203125,
"logps/rejected": -314.32977294921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.440280437469482,
"rewards/margins": 20.785701751708984,
"rewards/rejected": -25.225984573364258,
"step": 820
},
{
"epoch": 0.275495809476392,
"grad_norm": 7.49337486922741e-05,
"learning_rate": 7.244355909694556e-06,
"logits/chosen": -34.9041633605957,
"logits/rejected": -36.328147888183594,
"logps/chosen": -253.56204223632812,
"logps/rejected": -308.58465576171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.441735744476318,
"rewards/margins": 20.866907119750977,
"rewards/rejected": -25.308639526367188,
"step": 830
},
{
"epoch": 0.2788150360965895,
"grad_norm": 7.805244240444154e-05,
"learning_rate": 7.2111553784860565e-06,
"logits/chosen": -37.03504180908203,
"logits/rejected": -39.16151809692383,
"logps/chosen": -238.0412139892578,
"logps/rejected": -291.0369567871094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.6084489822387695,
"rewards/margins": 20.149539947509766,
"rewards/rejected": -24.75798988342285,
"step": 840
},
{
"epoch": 0.282134262716787,
"grad_norm": 4.934472235618159e-05,
"learning_rate": 7.177954847277557e-06,
"logits/chosen": -35.14350509643555,
"logits/rejected": -36.96980285644531,
"logps/chosen": -226.21932983398438,
"logps/rejected": -278.470703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.3487021923065186,
"rewards/margins": 20.134754180908203,
"rewards/rejected": -23.483455657958984,
"step": 850
},
{
"epoch": 0.2854534893369845,
"grad_norm": 4.3870386434718966e-05,
"learning_rate": 7.144754316069058e-06,
"logits/chosen": -33.659706115722656,
"logits/rejected": -35.260860443115234,
"logps/chosen": -292.42681884765625,
"logps/rejected": -348.8122253417969,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.7106709480285645,
"rewards/margins": 21.273283004760742,
"rewards/rejected": -26.98395347595215,
"step": 860
},
{
"epoch": 0.28877271595718196,
"grad_norm": 0.00014428362192120403,
"learning_rate": 7.111553784860559e-06,
"logits/chosen": -36.988136291503906,
"logits/rejected": -38.66490173339844,
"logps/chosen": -241.1929168701172,
"logps/rejected": -297.6890869140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.3865532875061035,
"rewards/margins": 21.200180053710938,
"rewards/rejected": -25.586734771728516,
"step": 870
},
{
"epoch": 0.29209194257737947,
"grad_norm": 1.8365805090070353e-06,
"learning_rate": 7.078353253652059e-06,
"logits/chosen": -35.72251892089844,
"logits/rejected": -37.48300552368164,
"logps/chosen": -265.85968017578125,
"logps/rejected": -321.50103759765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.265963554382324,
"rewards/margins": 20.955617904663086,
"rewards/rejected": -26.221582412719727,
"step": 880
},
{
"epoch": 0.295411169197577,
"grad_norm": 0.0001635378139326349,
"learning_rate": 7.04515272244356e-06,
"logits/chosen": -34.546783447265625,
"logits/rejected": -36.24065399169922,
"logps/chosen": -262.6241149902344,
"logps/rejected": -318.17755126953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.923575401306152,
"rewards/margins": 21.00967025756836,
"rewards/rejected": -25.933246612548828,
"step": 890
},
{
"epoch": 0.29873039581777444,
"grad_norm": 4.392620030557737e-05,
"learning_rate": 7.011952191235061e-06,
"logits/chosen": -35.98107147216797,
"logits/rejected": -38.07038497924805,
"logps/chosen": -262.16668701171875,
"logps/rejected": -319.5645751953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.970273494720459,
"rewards/margins": 21.547801971435547,
"rewards/rejected": -26.5180721282959,
"step": 900
},
{
"epoch": 0.30204962243797195,
"grad_norm": 2.0514065909083e-05,
"learning_rate": 6.978751660026562e-06,
"logits/chosen": -37.26417922973633,
"logits/rejected": -39.03954315185547,
"logps/chosen": -227.80126953125,
"logps/rejected": -282.78240966796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.066802501678467,
"rewards/margins": 20.91775131225586,
"rewards/rejected": -24.98455238342285,
"step": 910
},
{
"epoch": 0.30536884905816947,
"grad_norm": 1.58273360284511e-05,
"learning_rate": 6.9455511288180625e-06,
"logits/chosen": -36.633819580078125,
"logits/rejected": -38.505821228027344,
"logps/chosen": -275.9680480957031,
"logps/rejected": -333.1492614746094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.068737983703613,
"rewards/margins": 21.3752498626709,
"rewards/rejected": -27.443988800048828,
"step": 920
},
{
"epoch": 0.3086880756783669,
"grad_norm": 1.6043051800807007e-05,
"learning_rate": 6.9123505976095625e-06,
"logits/chosen": -35.964290618896484,
"logits/rejected": -38.0440559387207,
"logps/chosen": -258.4613342285156,
"logps/rejected": -313.12127685546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.635455131530762,
"rewards/margins": 20.710844039916992,
"rewards/rejected": -25.34630012512207,
"step": 930
},
{
"epoch": 0.31200730229856444,
"grad_norm": 4.5550634240498766e-05,
"learning_rate": 6.879150066401063e-06,
"logits/chosen": -36.91032409667969,
"logits/rejected": -38.867332458496094,
"logps/chosen": -226.87857055664062,
"logps/rejected": -282.0513000488281,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.517120361328125,
"rewards/margins": 20.991397857666016,
"rewards/rejected": -24.50851821899414,
"step": 940
},
{
"epoch": 0.31532652891876195,
"grad_norm": 1.3055984709353652e-05,
"learning_rate": 6.845949535192563e-06,
"logits/chosen": -37.46794128417969,
"logits/rejected": -39.282188415527344,
"logps/chosen": -231.68319702148438,
"logps/rejected": -287.63372802734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.084538459777832,
"rewards/margins": 21.120777130126953,
"rewards/rejected": -25.205312728881836,
"step": 950
},
{
"epoch": 0.3186457555389594,
"grad_norm": 5.750143827754073e-05,
"learning_rate": 6.812749003984063e-06,
"logits/chosen": -37.20722961425781,
"logits/rejected": -38.91963195800781,
"logps/chosen": -247.7166748046875,
"logps/rejected": -307.2261657714844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.870044708251953,
"rewards/margins": 22.097675323486328,
"rewards/rejected": -26.967721939086914,
"step": 960
},
{
"epoch": 0.3219649821591569,
"grad_norm": 1.0725473657657858e-05,
"learning_rate": 6.779548472775564e-06,
"logits/chosen": -38.22545623779297,
"logits/rejected": -40.045082092285156,
"logps/chosen": -227.6758575439453,
"logps/rejected": -285.1622619628906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.086358547210693,
"rewards/margins": 21.647619247436523,
"rewards/rejected": -25.73398208618164,
"step": 970
},
{
"epoch": 0.32528420877935443,
"grad_norm": 6.18934936937876e-05,
"learning_rate": 6.746347941567065e-06,
"logits/chosen": -36.12615203857422,
"logits/rejected": -38.44302749633789,
"logps/chosen": -243.6610565185547,
"logps/rejected": -301.9700012207031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.701846122741699,
"rewards/margins": 21.918615341186523,
"rewards/rejected": -26.620458602905273,
"step": 980
},
{
"epoch": 0.3286034353995519,
"grad_norm": 1.2099483683414292e-05,
"learning_rate": 6.713147410358566e-06,
"logits/chosen": -36.2269172668457,
"logits/rejected": -38.308746337890625,
"logps/chosen": -249.517333984375,
"logps/rejected": -304.70123291015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.904040813446045,
"rewards/margins": 21.009593963623047,
"rewards/rejected": -25.913631439208984,
"step": 990
},
{
"epoch": 0.3319226620197494,
"grad_norm": 2.5862636903184466e-05,
"learning_rate": 6.679946879150067e-06,
"logits/chosen": -36.43896484375,
"logits/rejected": -38.08635711669922,
"logps/chosen": -267.87164306640625,
"logps/rejected": -327.8034362792969,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.200272560119629,
"rewards/margins": 22.356639862060547,
"rewards/rejected": -27.556909561157227,
"step": 1000
},
{
"epoch": 0.3352418886399469,
"grad_norm": 6.518360805785051e-06,
"learning_rate": 6.646746347941568e-06,
"logits/chosen": -37.341712951660156,
"logits/rejected": -39.31797790527344,
"logps/chosen": -251.332275390625,
"logps/rejected": -309.8183898925781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.140790939331055,
"rewards/margins": 21.90488052368164,
"rewards/rejected": -27.045673370361328,
"step": 1010
},
{
"epoch": 0.3385611152601444,
"grad_norm": 1.5175602129602339e-05,
"learning_rate": 6.613545816733068e-06,
"logits/chosen": -35.90704345703125,
"logits/rejected": -37.437522888183594,
"logps/chosen": -275.33441162109375,
"logps/rejected": -335.77117919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.6894025802612305,
"rewards/margins": 22.447458267211914,
"rewards/rejected": -28.136859893798828,
"step": 1020
},
{
"epoch": 0.3418803418803419,
"grad_norm": 0.00012730048911180347,
"learning_rate": 6.5803452855245685e-06,
"logits/chosen": -35.909645080566406,
"logits/rejected": -38.034759521484375,
"logps/chosen": -265.9467468261719,
"logps/rejected": -325.89459228515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.635085105895996,
"rewards/margins": 22.229894638061523,
"rewards/rejected": -27.864978790283203,
"step": 1030
},
{
"epoch": 0.3451995685005394,
"grad_norm": 1.0754079994512722e-05,
"learning_rate": 6.547144754316069e-06,
"logits/chosen": -36.612430572509766,
"logits/rejected": -38.43956756591797,
"logps/chosen": -257.3985595703125,
"logps/rejected": -317.87518310546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.553804874420166,
"rewards/margins": 22.50185775756836,
"rewards/rejected": -27.0556640625,
"step": 1040
},
{
"epoch": 0.34851879512073686,
"grad_norm": 0.00021651283896062523,
"learning_rate": 6.51394422310757e-06,
"logits/chosen": -36.52727508544922,
"logits/rejected": -38.221580505371094,
"logps/chosen": -248.41140747070312,
"logps/rejected": -308.6651611328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.484927177429199,
"rewards/margins": 22.511966705322266,
"rewards/rejected": -26.996891021728516,
"step": 1050
},
{
"epoch": 0.35183802174093437,
"grad_norm": 0.00016664496797602624,
"learning_rate": 6.480743691899071e-06,
"logits/chosen": -36.22939682006836,
"logits/rejected": -38.311370849609375,
"logps/chosen": -246.49362182617188,
"logps/rejected": -305.3147888183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.3643717765808105,
"rewards/margins": 21.965564727783203,
"rewards/rejected": -26.329936981201172,
"step": 1060
},
{
"epoch": 0.3551572483611319,
"grad_norm": 7.72338462411426e-05,
"learning_rate": 6.447543160690571e-06,
"logits/chosen": -36.22492980957031,
"logits/rejected": -38.05788803100586,
"logps/chosen": -249.41928100585938,
"logps/rejected": -306.9188232421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.453120708465576,
"rewards/margins": 21.600210189819336,
"rewards/rejected": -27.053333282470703,
"step": 1070
},
{
"epoch": 0.35847647498132934,
"grad_norm": 2.7300588044454344e-05,
"learning_rate": 6.414342629482072e-06,
"logits/chosen": -35.102294921875,
"logits/rejected": -37.027076721191406,
"logps/chosen": -251.52194213867188,
"logps/rejected": -308.4041748046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.3340044021606445,
"rewards/margins": 21.424251556396484,
"rewards/rejected": -25.758255004882812,
"step": 1080
},
{
"epoch": 0.36179570160152685,
"grad_norm": 8.087195965345018e-06,
"learning_rate": 6.381142098273573e-06,
"logits/chosen": -35.54331588745117,
"logits/rejected": -37.03045654296875,
"logps/chosen": -274.4892578125,
"logps/rejected": -336.3668518066406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.781479358673096,
"rewards/margins": 22.853675842285156,
"rewards/rejected": -28.63515281677246,
"step": 1090
},
{
"epoch": 0.3651149282217243,
"grad_norm": 9.18296427698806e-06,
"learning_rate": 6.347941567065074e-06,
"logits/chosen": -36.24702072143555,
"logits/rejected": -38.01585388183594,
"logps/chosen": -245.00448608398438,
"logps/rejected": -303.3841552734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.3061394691467285,
"rewards/margins": 21.940967559814453,
"rewards/rejected": -26.247106552124023,
"step": 1100
},
{
"epoch": 0.3684341548419218,
"grad_norm": 0.0001284556492464617,
"learning_rate": 6.3147410358565745e-06,
"logits/chosen": -35.398197174072266,
"logits/rejected": -36.98078536987305,
"logps/chosen": -260.3328857421875,
"logps/rejected": -318.5396423339844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.786358833312988,
"rewards/margins": 21.85409927368164,
"rewards/rejected": -26.640457153320312,
"step": 1110
},
{
"epoch": 0.37175338146211934,
"grad_norm": 9.004733874462545e-05,
"learning_rate": 6.2815405046480745e-06,
"logits/chosen": -35.57320785522461,
"logits/rejected": -37.42049026489258,
"logps/chosen": -239.71749877929688,
"logps/rejected": -300.3533630371094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.225986003875732,
"rewards/margins": 22.552154541015625,
"rewards/rejected": -26.778141021728516,
"step": 1120
},
{
"epoch": 0.3750726080823168,
"grad_norm": 2.8532302167150192e-05,
"learning_rate": 6.248339973439575e-06,
"logits/chosen": -36.609615325927734,
"logits/rejected": -38.67702865600586,
"logps/chosen": -245.4535675048828,
"logps/rejected": -304.02178955078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.093851566314697,
"rewards/margins": 21.945655822753906,
"rewards/rejected": -26.039508819580078,
"step": 1130
},
{
"epoch": 0.3783918347025143,
"grad_norm": 0.00011670405365293846,
"learning_rate": 6.215139442231076e-06,
"logits/chosen": -36.06822967529297,
"logits/rejected": -38.279300689697266,
"logps/chosen": -262.287841796875,
"logps/rejected": -324.5474548339844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.206493377685547,
"rewards/margins": 23.023685455322266,
"rewards/rejected": -29.230178833007812,
"step": 1140
},
{
"epoch": 0.3817110613227118,
"grad_norm": 1.6790625522844493e-05,
"learning_rate": 6.181938911022577e-06,
"logits/chosen": -36.757450103759766,
"logits/rejected": -38.45673370361328,
"logps/chosen": -251.1743927001953,
"logps/rejected": -310.25323486328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.7559704780578613,
"rewards/margins": 22.096744537353516,
"rewards/rejected": -25.852712631225586,
"step": 1150
},
{
"epoch": 0.3850302879429093,
"grad_norm": 0.0018879029667004943,
"learning_rate": 6.148738379814078e-06,
"logits/chosen": -35.55485916137695,
"logits/rejected": -37.381107330322266,
"logps/chosen": -246.38583374023438,
"logps/rejected": -306.12286376953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.242562770843506,
"rewards/margins": 22.17359161376953,
"rewards/rejected": -26.416152954101562,
"step": 1160
},
{
"epoch": 0.3883495145631068,
"grad_norm": 0.00015840095875319093,
"learning_rate": 6.115537848605578e-06,
"logits/chosen": -36.76372146606445,
"logits/rejected": -38.73911666870117,
"logps/chosen": -240.2417449951172,
"logps/rejected": -301.1226501464844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.495165824890137,
"rewards/margins": 22.608638763427734,
"rewards/rejected": -27.103809356689453,
"step": 1170
},
{
"epoch": 0.3916687411833043,
"grad_norm": 6.375902739819139e-05,
"learning_rate": 6.082337317397079e-06,
"logits/chosen": -36.941650390625,
"logits/rejected": -38.865909576416016,
"logps/chosen": -231.86373901367188,
"logps/rejected": -290.8138732910156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.150005340576172,
"rewards/margins": 22.098033905029297,
"rewards/rejected": -26.248037338256836,
"step": 1180
},
{
"epoch": 0.39498796780350176,
"grad_norm": 9.028934982779901e-06,
"learning_rate": 6.04913678618858e-06,
"logits/chosen": -35.28303527832031,
"logits/rejected": -37.00617218017578,
"logps/chosen": -276.6689758300781,
"logps/rejected": -339.3366394042969,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.4967780113220215,
"rewards/margins": 23.187326431274414,
"rewards/rejected": -28.68410301208496,
"step": 1190
},
{
"epoch": 0.3983071944236993,
"grad_norm": 6.348345209516992e-07,
"learning_rate": 6.0159362549800805e-06,
"logits/chosen": -36.24475860595703,
"logits/rejected": -37.87803268432617,
"logps/chosen": -224.39480590820312,
"logps/rejected": -285.2892761230469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.7975051403045654,
"rewards/margins": 22.595354080200195,
"rewards/rejected": -26.392858505249023,
"step": 1200
},
{
"epoch": 0.4016264210438968,
"grad_norm": 2.397124444541987e-05,
"learning_rate": 5.982735723771581e-06,
"logits/chosen": -35.9412727355957,
"logits/rejected": -37.60810089111328,
"logps/chosen": -260.8143310546875,
"logps/rejected": -321.2140197753906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.919522762298584,
"rewards/margins": 22.569488525390625,
"rewards/rejected": -27.489013671875,
"step": 1210
},
{
"epoch": 0.40494564766409424,
"grad_norm": 7.391794497380033e-05,
"learning_rate": 5.949535192563081e-06,
"logits/chosen": -35.552833557128906,
"logits/rejected": -37.327735900878906,
"logps/chosen": -245.9195556640625,
"logps/rejected": -303.10638427734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.919013261795044,
"rewards/margins": 21.555124282836914,
"rewards/rejected": -25.474132537841797,
"step": 1220
},
{
"epoch": 0.40826487428429176,
"grad_norm": 1.4663862202723976e-05,
"learning_rate": 5.916334661354582e-06,
"logits/chosen": -35.970951080322266,
"logits/rejected": -37.302703857421875,
"logps/chosen": -238.4794921875,
"logps/rejected": -298.0510559082031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.057009696960449,
"rewards/margins": 22.244709014892578,
"rewards/rejected": -26.30171775817871,
"step": 1230
},
{
"epoch": 0.41158410090448927,
"grad_norm": 1.8007700418820605e-05,
"learning_rate": 5.883134130146083e-06,
"logits/chosen": -35.56674575805664,
"logits/rejected": -37.349674224853516,
"logps/chosen": -241.77908325195312,
"logps/rejected": -301.67529296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.9492950439453125,
"rewards/margins": 22.323001861572266,
"rewards/rejected": -26.27229881286621,
"step": 1240
},
{
"epoch": 0.4149033275246867,
"grad_norm": 9.073022738448344e-06,
"learning_rate": 5.849933598937584e-06,
"logits/chosen": -35.80928039550781,
"logits/rejected": -37.83527374267578,
"logps/chosen": -269.43780517578125,
"logps/rejected": -330.70965576171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.043967247009277,
"rewards/margins": 22.84233283996582,
"rewards/rejected": -27.88629722595215,
"step": 1250
},
{
"epoch": 0.41822255414488424,
"grad_norm": 5.101820988784311e-06,
"learning_rate": 5.816733067729085e-06,
"logits/chosen": -36.2827033996582,
"logits/rejected": -37.87694549560547,
"logps/chosen": -249.8275604248047,
"logps/rejected": -310.7562561035156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.4787421226501465,
"rewards/margins": 22.689529418945312,
"rewards/rejected": -27.168270111083984,
"step": 1260
},
{
"epoch": 0.42154178076508175,
"grad_norm": 1.537020580144599e-05,
"learning_rate": 5.783532536520585e-06,
"logits/chosen": -37.90897750854492,
"logits/rejected": -40.028099060058594,
"logps/chosen": -227.4257049560547,
"logps/rejected": -288.1922912597656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.244109153747559,
"rewards/margins": 22.62444496154785,
"rewards/rejected": -26.868555068969727,
"step": 1270
},
{
"epoch": 0.4248610073852792,
"grad_norm": 0.0001243548176717013,
"learning_rate": 5.750332005312086e-06,
"logits/chosen": -35.16408920288086,
"logits/rejected": -36.898799896240234,
"logps/chosen": -281.3883056640625,
"logps/rejected": -342.1923828125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.85723876953125,
"rewards/margins": 22.52381134033203,
"rewards/rejected": -28.38104820251465,
"step": 1280
},
{
"epoch": 0.4281802340054767,
"grad_norm": 0.0001157997248810716,
"learning_rate": 5.7171314741035865e-06,
"logits/chosen": -36.70041275024414,
"logits/rejected": -38.27075958251953,
"logps/chosen": -250.15185546875,
"logps/rejected": -310.4129638671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.715483665466309,
"rewards/margins": 22.438804626464844,
"rewards/rejected": -27.154285430908203,
"step": 1290
},
{
"epoch": 0.43149946062567424,
"grad_norm": 0.00022845834610052407,
"learning_rate": 5.683930942895087e-06,
"logits/chosen": -36.59210968017578,
"logits/rejected": -38.293800354003906,
"logps/chosen": -257.09759521484375,
"logps/rejected": -319.6377868652344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.797603130340576,
"rewards/margins": 23.203828811645508,
"rewards/rejected": -28.00143051147461,
"step": 1300
},
{
"epoch": 0.4348186872458717,
"grad_norm": 0.0003446840273682028,
"learning_rate": 5.650730411686588e-06,
"logits/chosen": -36.782691955566406,
"logits/rejected": -39.01188278198242,
"logps/chosen": -237.51956176757812,
"logps/rejected": -299.38360595703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.052783489227295,
"rewards/margins": 22.880496978759766,
"rewards/rejected": -26.93328285217285,
"step": 1310
},
{
"epoch": 0.4381379138660692,
"grad_norm": 8.070600415521767e-06,
"learning_rate": 5.617529880478087e-06,
"logits/chosen": -35.52574920654297,
"logits/rejected": -37.07780075073242,
"logps/chosen": -247.0010223388672,
"logps/rejected": -309.9228515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.061142921447754,
"rewards/margins": 23.22298812866211,
"rewards/rejected": -27.284133911132812,
"step": 1320
},
{
"epoch": 0.4414571404862667,
"grad_norm": 3.095080319326371e-05,
"learning_rate": 5.584329349269588e-06,
"logits/chosen": -35.79111099243164,
"logits/rejected": -37.27584457397461,
"logps/chosen": -237.08206176757812,
"logps/rejected": -298.28515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.881493330001831,
"rewards/margins": 22.693397521972656,
"rewards/rejected": -26.574893951416016,
"step": 1330
},
{
"epoch": 0.4447763671064642,
"grad_norm": 0.00014526672021020204,
"learning_rate": 5.551128818061089e-06,
"logits/chosen": -36.553951263427734,
"logits/rejected": -38.48070526123047,
"logps/chosen": -218.1559295654297,
"logps/rejected": -276.0164489746094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.207418918609619,
"rewards/margins": 21.726083755493164,
"rewards/rejected": -24.933500289916992,
"step": 1340
},
{
"epoch": 0.4480955937266617,
"grad_norm": 1.6845664504216984e-05,
"learning_rate": 5.51792828685259e-06,
"logits/chosen": -36.16156768798828,
"logits/rejected": -38.066184997558594,
"logps/chosen": -282.4837341308594,
"logps/rejected": -346.942138671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.882527828216553,
"rewards/margins": 23.716060638427734,
"rewards/rejected": -29.598590850830078,
"step": 1350
},
{
"epoch": 0.4514148203468592,
"grad_norm": 0.0001524945255368948,
"learning_rate": 5.48472775564409e-06,
"logits/chosen": -37.370155334472656,
"logits/rejected": -39.46926498413086,
"logps/chosen": -248.5599822998047,
"logps/rejected": -312.64398193359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.415983200073242,
"rewards/margins": 23.64468002319336,
"rewards/rejected": -29.060659408569336,
"step": 1360
},
{
"epoch": 0.45473404696705666,
"grad_norm": 0.000820525863673538,
"learning_rate": 5.451527224435591e-06,
"logits/chosen": -36.646820068359375,
"logits/rejected": -38.414344787597656,
"logps/chosen": -233.5876007080078,
"logps/rejected": -296.04193115234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.6753764152526855,
"rewards/margins": 23.128164291381836,
"rewards/rejected": -27.803543090820312,
"step": 1370
},
{
"epoch": 0.4580532735872542,
"grad_norm": 2.5676483801362338e-06,
"learning_rate": 5.418326693227092e-06,
"logits/chosen": -36.1823616027832,
"logits/rejected": -37.99201202392578,
"logps/chosen": -261.86920166015625,
"logps/rejected": -326.8487243652344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.565834999084473,
"rewards/margins": 23.7719669342041,
"rewards/rejected": -29.337799072265625,
"step": 1380
},
{
"epoch": 0.4613725002074517,
"grad_norm": 6.69591172481887e-05,
"learning_rate": 5.3851261620185925e-06,
"logits/chosen": -37.112308502197266,
"logits/rejected": -39.388126373291016,
"logps/chosen": -253.88955688476562,
"logps/rejected": -315.8279113769531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.793484210968018,
"rewards/margins": 23.005237579345703,
"rewards/rejected": -27.798717498779297,
"step": 1390
},
{
"epoch": 0.46469172682764914,
"grad_norm": 3.6112989619141445e-05,
"learning_rate": 5.351925630810093e-06,
"logits/chosen": -37.045570373535156,
"logits/rejected": -38.63352966308594,
"logps/chosen": -274.2471618652344,
"logps/rejected": -340.7463684082031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.769705295562744,
"rewards/margins": 24.323572158813477,
"rewards/rejected": -30.093280792236328,
"step": 1400
},
{
"epoch": 0.46801095344784666,
"grad_norm": 9.693310857983306e-06,
"learning_rate": 5.318725099601593e-06,
"logits/chosen": -35.83592987060547,
"logits/rejected": -37.3544921875,
"logps/chosen": -263.32025146484375,
"logps/rejected": -329.5469665527344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.016288757324219,
"rewards/margins": 24.141952514648438,
"rewards/rejected": -30.158239364624023,
"step": 1410
},
{
"epoch": 0.47133018006804417,
"grad_norm": 2.5429770289520093e-07,
"learning_rate": 5.285524568393094e-06,
"logits/chosen": -36.3372917175293,
"logits/rejected": -37.873512268066406,
"logps/chosen": -268.4442138671875,
"logps/rejected": -334.9126281738281,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.9471821784973145,
"rewards/margins": 24.34046173095703,
"rewards/rejected": -29.287643432617188,
"step": 1420
},
{
"epoch": 0.4746494066882416,
"grad_norm": 8.884577255230397e-06,
"learning_rate": 5.252324037184595e-06,
"logits/chosen": -35.84831619262695,
"logits/rejected": -37.70962142944336,
"logps/chosen": -222.9773406982422,
"logps/rejected": -280.5433654785156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.2989723682403564,
"rewards/margins": 21.73419761657715,
"rewards/rejected": -25.03316879272461,
"step": 1430
},
{
"epoch": 0.47796863330843914,
"grad_norm": 5.5867065384518355e-05,
"learning_rate": 5.219123505976096e-06,
"logits/chosen": -35.37043380737305,
"logits/rejected": -36.962501525878906,
"logps/chosen": -269.3448791503906,
"logps/rejected": -334.05108642578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.493111610412598,
"rewards/margins": 23.792190551757812,
"rewards/rejected": -29.285303115844727,
"step": 1440
},
{
"epoch": 0.48128785992863665,
"grad_norm": 9.691136256151367e-06,
"learning_rate": 5.185922974767597e-06,
"logits/chosen": -37.72825241088867,
"logits/rejected": -39.378013610839844,
"logps/chosen": -240.7726287841797,
"logps/rejected": -302.41864013671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.8250885009765625,
"rewards/margins": 22.83572769165039,
"rewards/rejected": -26.660816192626953,
"step": 1450
},
{
"epoch": 0.4846070865488341,
"grad_norm": 8.449688903056085e-05,
"learning_rate": 5.152722443559097e-06,
"logits/chosen": -37.7960205078125,
"logits/rejected": -39.6239128112793,
"logps/chosen": -251.767333984375,
"logps/rejected": -314.9220886230469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.878867149353027,
"rewards/margins": 23.379364013671875,
"rewards/rejected": -28.258230209350586,
"step": 1460
},
{
"epoch": 0.4879263131690316,
"grad_norm": 0.00010858433233806863,
"learning_rate": 5.119521912350598e-06,
"logits/chosen": -37.04092025756836,
"logits/rejected": -38.59927749633789,
"logps/chosen": -243.720947265625,
"logps/rejected": -304.9571838378906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.011149883270264,
"rewards/margins": 22.803363800048828,
"rewards/rejected": -26.81451416015625,
"step": 1470
},
{
"epoch": 0.49124553978922914,
"grad_norm": 4.217286186758429e-05,
"learning_rate": 5.0863213811420985e-06,
"logits/chosen": -35.66263198852539,
"logits/rejected": -37.24384307861328,
"logps/chosen": -259.3061218261719,
"logps/rejected": -323.9267883300781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.250843048095703,
"rewards/margins": 23.775760650634766,
"rewards/rejected": -29.0266056060791,
"step": 1480
},
{
"epoch": 0.4945647664094266,
"grad_norm": 2.136345392500516e-05,
"learning_rate": 5.053120849933599e-06,
"logits/chosen": -35.713584899902344,
"logits/rejected": -37.325538635253906,
"logps/chosen": -277.80377197265625,
"logps/rejected": -343.5056457519531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.864058017730713,
"rewards/margins": 24.002731323242188,
"rewards/rejected": -29.866790771484375,
"step": 1490
},
{
"epoch": 0.4978839930296241,
"grad_norm": 8.999687452160288e-06,
"learning_rate": 5.0199203187251e-06,
"logits/chosen": -36.142784118652344,
"logits/rejected": -37.37236785888672,
"logps/chosen": -232.4190216064453,
"logps/rejected": -293.2521667480469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.933694839477539,
"rewards/margins": 22.6506404876709,
"rewards/rejected": -26.584335327148438,
"step": 1500
},
{
"epoch": 0.5012032196498216,
"grad_norm": 8.656315003463533e-06,
"learning_rate": 4.986719787516601e-06,
"logits/chosen": -35.22600173950195,
"logits/rejected": -36.61890411376953,
"logps/chosen": -249.437255859375,
"logps/rejected": -313.10650634765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.46209716796875,
"rewards/margins": 23.603939056396484,
"rewards/rejected": -28.0660343170166,
"step": 1510
},
{
"epoch": 0.5045224462700191,
"grad_norm": 1.5737525245640427e-05,
"learning_rate": 4.953519256308101e-06,
"logits/chosen": -36.45427322387695,
"logits/rejected": -38.449310302734375,
"logps/chosen": -269.5912780761719,
"logps/rejected": -336.03973388671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.367428779602051,
"rewards/margins": 24.243759155273438,
"rewards/rejected": -29.611186981201172,
"step": 1520
},
{
"epoch": 0.5078416728902165,
"grad_norm": 1.5654180742785684e-06,
"learning_rate": 4.920318725099602e-06,
"logits/chosen": -36.544769287109375,
"logits/rejected": -38.08495330810547,
"logps/chosen": -236.01699829101562,
"logps/rejected": -298.4991149902344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.073147296905518,
"rewards/margins": 23.109254837036133,
"rewards/rejected": -27.18239974975586,
"step": 1530
},
{
"epoch": 0.5111608995104141,
"grad_norm": 2.285624032083433e-05,
"learning_rate": 4.887118193891103e-06,
"logits/chosen": -36.97005081176758,
"logits/rejected": -38.57817840576172,
"logps/chosen": -223.80029296875,
"logps/rejected": -285.7514953613281,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.016161918640137,
"rewards/margins": 22.881366729736328,
"rewards/rejected": -26.897525787353516,
"step": 1540
},
{
"epoch": 0.5144801261306116,
"grad_norm": 1.6629717720206827e-05,
"learning_rate": 4.853917662682604e-06,
"logits/chosen": -37.56560516357422,
"logits/rejected": -39.122352600097656,
"logps/chosen": -243.0885467529297,
"logps/rejected": -303.94647216796875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.24282693862915,
"rewards/margins": 22.678638458251953,
"rewards/rejected": -26.921466827392578,
"step": 1550
},
{
"epoch": 0.517799352750809,
"grad_norm": 2.3593625883222558e-05,
"learning_rate": 4.8207171314741045e-06,
"logits/chosen": -35.30021286010742,
"logits/rejected": -37.36159133911133,
"logps/chosen": -259.1506042480469,
"logps/rejected": -325.35577392578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.579512596130371,
"rewards/margins": 24.202543258666992,
"rewards/rejected": -29.782054901123047,
"step": 1560
},
{
"epoch": 0.5211185793710066,
"grad_norm": 1.770765493347426e-06,
"learning_rate": 4.7875166002656045e-06,
"logits/chosen": -36.31800079345703,
"logits/rejected": -38.350250244140625,
"logps/chosen": -237.36587524414062,
"logps/rejected": -297.8676452636719,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.429483413696289,
"rewards/margins": 22.614795684814453,
"rewards/rejected": -27.04427719116211,
"step": 1570
},
{
"epoch": 0.524437805991204,
"grad_norm": 2.521073110983707e-05,
"learning_rate": 4.754316069057105e-06,
"logits/chosen": -34.50792694091797,
"logits/rejected": -36.35118865966797,
"logps/chosen": -312.0120544433594,
"logps/rejected": -376.3553161621094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.226397514343262,
"rewards/margins": 23.781085968017578,
"rewards/rejected": -31.007482528686523,
"step": 1580
},
{
"epoch": 0.5277570326114015,
"grad_norm": 2.6629986678017303e-05,
"learning_rate": 4.721115537848606e-06,
"logits/chosen": -37.28297805786133,
"logits/rejected": -39.56510543823242,
"logps/chosen": -232.8235626220703,
"logps/rejected": -293.8519592285156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.996170997619629,
"rewards/margins": 22.683502197265625,
"rewards/rejected": -27.679668426513672,
"step": 1590
},
{
"epoch": 0.5310762592315991,
"grad_norm": 3.985645162174478e-05,
"learning_rate": 4.687915006640107e-06,
"logits/chosen": -36.04001998901367,
"logits/rejected": -38.201377868652344,
"logps/chosen": -257.0011291503906,
"logps/rejected": -318.67889404296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.405674934387207,
"rewards/margins": 22.883712768554688,
"rewards/rejected": -28.28938865661621,
"step": 1600
},
{
"epoch": 0.5343954858517965,
"grad_norm": 4.466701648198068e-05,
"learning_rate": 4.654714475431607e-06,
"logits/chosen": -36.21506118774414,
"logits/rejected": -38.33916473388672,
"logps/chosen": -248.7204132080078,
"logps/rejected": -309.51446533203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.977995872497559,
"rewards/margins": 22.648815155029297,
"rewards/rejected": -27.626811981201172,
"step": 1610
},
{
"epoch": 0.537714712471994,
"grad_norm": 0.0006125931977294385,
"learning_rate": 4.621513944223108e-06,
"logits/chosen": -35.31191635131836,
"logits/rejected": -37.675506591796875,
"logps/chosen": -251.9046173095703,
"logps/rejected": -314.9078674316406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.107827186584473,
"rewards/margins": 23.261333465576172,
"rewards/rejected": -29.36916160583496,
"step": 1620
},
{
"epoch": 0.5410339390921916,
"grad_norm": 5.603829413303174e-05,
"learning_rate": 4.588313413014609e-06,
"logits/chosen": -34.84865951538086,
"logits/rejected": -37.49020004272461,
"logps/chosen": -249.0811309814453,
"logps/rejected": -310.94805908203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.208640098571777,
"rewards/margins": 22.867446899414062,
"rewards/rejected": -28.076086044311523,
"step": 1630
},
{
"epoch": 0.544353165712389,
"grad_norm": 3.166065289406106e-05,
"learning_rate": 4.555112881806109e-06,
"logits/chosen": -35.96332550048828,
"logits/rejected": -38.01958465576172,
"logps/chosen": -255.0028533935547,
"logps/rejected": -317.4012145996094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.1949872970581055,
"rewards/margins": 23.095088958740234,
"rewards/rejected": -28.290075302124023,
"step": 1640
},
{
"epoch": 0.5476723923325865,
"grad_norm": 0.00018184522923547775,
"learning_rate": 4.52191235059761e-06,
"logits/chosen": -36.66709518432617,
"logits/rejected": -38.78025436401367,
"logps/chosen": -276.5002136230469,
"logps/rejected": -337.06280517578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.941756725311279,
"rewards/margins": 22.575870513916016,
"rewards/rejected": -28.517627716064453,
"step": 1650
},
{
"epoch": 0.550991618952784,
"grad_norm": 3.8955668060225435e-06,
"learning_rate": 4.4887118193891105e-06,
"logits/chosen": -37.09162902832031,
"logits/rejected": -39.494346618652344,
"logps/chosen": -240.5594024658203,
"logps/rejected": -302.869873046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.489818096160889,
"rewards/margins": 23.031490325927734,
"rewards/rejected": -28.521312713623047,
"step": 1660
},
{
"epoch": 0.5543108455729815,
"grad_norm": 9.758635314938147e-06,
"learning_rate": 4.455511288180611e-06,
"logits/chosen": -35.8779296875,
"logits/rejected": -38.21814727783203,
"logps/chosen": -242.24783325195312,
"logps/rejected": -303.9131774902344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.195720672607422,
"rewards/margins": 22.870954513549805,
"rewards/rejected": -28.066675186157227,
"step": 1670
},
{
"epoch": 0.557630072193179,
"grad_norm": 1.1670150342979468e-05,
"learning_rate": 4.422310756972112e-06,
"logits/chosen": -34.99301528930664,
"logits/rejected": -37.15611267089844,
"logps/chosen": -276.33843994140625,
"logps/rejected": -343.9205017089844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.765660285949707,
"rewards/margins": 24.636295318603516,
"rewards/rejected": -31.401952743530273,
"step": 1680
},
{
"epoch": 0.5609492988133765,
"grad_norm": 2.343825326533988e-05,
"learning_rate": 4.389110225763612e-06,
"logits/chosen": -36.582054138183594,
"logits/rejected": -38.67331314086914,
"logps/chosen": -229.20010375976562,
"logps/rejected": -289.13934326171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.137927055358887,
"rewards/margins": 22.312835693359375,
"rewards/rejected": -26.450763702392578,
"step": 1690
},
{
"epoch": 0.564268525433574,
"grad_norm": 5.144063470652327e-05,
"learning_rate": 4.355909694555113e-06,
"logits/chosen": -36.755279541015625,
"logits/rejected": -39.31067657470703,
"logps/chosen": -238.5866241455078,
"logps/rejected": -300.41607666015625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.042891979217529,
"rewards/margins": 22.878875732421875,
"rewards/rejected": -27.921768188476562,
"step": 1700
},
{
"epoch": 0.5675877520537714,
"grad_norm": 3.793970972765237e-05,
"learning_rate": 4.322709163346614e-06,
"logits/chosen": -36.33258819580078,
"logits/rejected": -38.49297332763672,
"logps/chosen": -256.3636474609375,
"logps/rejected": -323.8926086425781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.737571716308594,
"rewards/margins": 24.59103012084961,
"rewards/rejected": -31.328601837158203,
"step": 1710
},
{
"epoch": 0.570906978673969,
"grad_norm": 2.3688435248914175e-05,
"learning_rate": 4.289508632138115e-06,
"logits/chosen": -37.646400451660156,
"logits/rejected": -39.79381561279297,
"logps/chosen": -239.6971893310547,
"logps/rejected": -301.040283203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.137190818786621,
"rewards/margins": 22.715267181396484,
"rewards/rejected": -27.852458953857422,
"step": 1720
},
{
"epoch": 0.5742262052941665,
"grad_norm": 0.0008279504254460335,
"learning_rate": 4.256308100929616e-06,
"logits/chosen": -36.02642059326172,
"logits/rejected": -38.3137092590332,
"logps/chosen": -249.07089233398438,
"logps/rejected": -311.4475402832031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.421334743499756,
"rewards/margins": 23.024635314941406,
"rewards/rejected": -28.445972442626953,
"step": 1730
},
{
"epoch": 0.5775454319143639,
"grad_norm": 1.2977415053683217e-06,
"learning_rate": 4.223107569721116e-06,
"logits/chosen": -36.99226379394531,
"logits/rejected": -39.35835647583008,
"logps/chosen": -230.90493774414062,
"logps/rejected": -291.55364990234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.519408702850342,
"rewards/margins": 22.53643798828125,
"rewards/rejected": -27.05584716796875,
"step": 1740
},
{
"epoch": 0.5808646585345615,
"grad_norm": 1.3930763088865206e-05,
"learning_rate": 4.1899070385126165e-06,
"logits/chosen": -36.491249084472656,
"logits/rejected": -38.7135124206543,
"logps/chosen": -240.33584594726562,
"logps/rejected": -305.18304443359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.365754127502441,
"rewards/margins": 23.769380569458008,
"rewards/rejected": -29.1351318359375,
"step": 1750
},
{
"epoch": 0.5841838851547589,
"grad_norm": 1.682070978858974e-05,
"learning_rate": 4.156706507304117e-06,
"logits/chosen": -36.36711883544922,
"logits/rejected": -38.498600006103516,
"logps/chosen": -257.9060974121094,
"logps/rejected": -323.3476257324219,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.284196376800537,
"rewards/margins": 23.89134407043457,
"rewards/rejected": -30.175540924072266,
"step": 1760
},
{
"epoch": 0.5875031117749564,
"grad_norm": 7.728056516498327e-06,
"learning_rate": 4.123505976095618e-06,
"logits/chosen": -37.04905700683594,
"logits/rejected": -39.33579635620117,
"logps/chosen": -227.35110473632812,
"logps/rejected": -292.36236572265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.201313495635986,
"rewards/margins": 23.991268157958984,
"rewards/rejected": -29.192584991455078,
"step": 1770
},
{
"epoch": 0.590822338395154,
"grad_norm": 0.00020755194418597966,
"learning_rate": 4.090305444887119e-06,
"logits/chosen": -36.47849655151367,
"logits/rejected": -38.484745025634766,
"logps/chosen": -261.4329833984375,
"logps/rejected": -325.38604736328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.6097822189331055,
"rewards/margins": 23.496807098388672,
"rewards/rejected": -29.106592178344727,
"step": 1780
},
{
"epoch": 0.5941415650153514,
"grad_norm": 0.0010281304130330682,
"learning_rate": 4.057104913678619e-06,
"logits/chosen": -36.252891540527344,
"logits/rejected": -38.18025207519531,
"logps/chosen": -231.2675018310547,
"logps/rejected": -294.30999755859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.148598670959473,
"rewards/margins": 23.31288719177246,
"rewards/rejected": -27.46148681640625,
"step": 1790
},
{
"epoch": 0.5974607916355489,
"grad_norm": 5.641156531055458e-05,
"learning_rate": 4.02390438247012e-06,
"logits/chosen": -36.627098083496094,
"logits/rejected": -38.566261291503906,
"logps/chosen": -257.6075134277344,
"logps/rejected": -322.8147888183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.343573570251465,
"rewards/margins": 23.8291072845459,
"rewards/rejected": -30.172677993774414,
"step": 1800
},
{
"epoch": 0.6007800182557465,
"grad_norm": 8.674534183228388e-05,
"learning_rate": 3.99070385126162e-06,
"logits/chosen": -37.30437469482422,
"logits/rejected": -39.19757843017578,
"logps/chosen": -236.25155639648438,
"logps/rejected": -299.7480773925781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.02539587020874,
"rewards/margins": 23.419660568237305,
"rewards/rejected": -28.445056915283203,
"step": 1810
},
{
"epoch": 0.6040992448759439,
"grad_norm": 2.2813930627307855e-05,
"learning_rate": 3.957503320053121e-06,
"logits/chosen": -36.5108528137207,
"logits/rejected": -38.77637481689453,
"logps/chosen": -243.909423828125,
"logps/rejected": -308.46575927734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.7918548583984375,
"rewards/margins": 23.672077178955078,
"rewards/rejected": -28.463932037353516,
"step": 1820
},
{
"epoch": 0.6074184714961414,
"grad_norm": 0.0004189323226455599,
"learning_rate": 3.924302788844622e-06,
"logits/chosen": -36.76378631591797,
"logits/rejected": -38.989498138427734,
"logps/chosen": -249.64974975585938,
"logps/rejected": -314.20989990234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.429641246795654,
"rewards/margins": 23.652019500732422,
"rewards/rejected": -29.0816593170166,
"step": 1830
},
{
"epoch": 0.6107376981163389,
"grad_norm": 3.000356628035661e-05,
"learning_rate": 3.8911022576361225e-06,
"logits/chosen": -36.149635314941406,
"logits/rejected": -38.40140914916992,
"logps/chosen": -273.1649475097656,
"logps/rejected": -340.8751525878906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.340195178985596,
"rewards/margins": 24.731584548950195,
"rewards/rejected": -31.07177734375,
"step": 1840
},
{
"epoch": 0.6140569247365364,
"grad_norm": 9.709197911433876e-05,
"learning_rate": 3.857901726427623e-06,
"logits/chosen": -35.064029693603516,
"logits/rejected": -36.718536376953125,
"logps/chosen": -254.9979248046875,
"logps/rejected": -318.6874084472656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.1480937004089355,
"rewards/margins": 23.502117156982422,
"rewards/rejected": -28.65021324157715,
"step": 1850
},
{
"epoch": 0.6173761513567338,
"grad_norm": 0.00014991410716902465,
"learning_rate": 3.824701195219123e-06,
"logits/chosen": -36.74406051635742,
"logits/rejected": -38.75735092163086,
"logps/chosen": -229.8621063232422,
"logps/rejected": -293.3158264160156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.392579078674316,
"rewards/margins": 23.361215591430664,
"rewards/rejected": -28.753793716430664,
"step": 1860
},
{
"epoch": 0.6206953779769314,
"grad_norm": 2.5267569071729667e-05,
"learning_rate": 3.7915006640106242e-06,
"logits/chosen": -35.470855712890625,
"logits/rejected": -37.23722457885742,
"logps/chosen": -276.9035949707031,
"logps/rejected": -342.387451171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.212436676025391,
"rewards/margins": 24.028600692749023,
"rewards/rejected": -30.241037368774414,
"step": 1870
},
{
"epoch": 0.6240146045971289,
"grad_norm": 1.762523243087344e-05,
"learning_rate": 3.758300132802125e-06,
"logits/chosen": -35.641090393066406,
"logits/rejected": -37.73635482788086,
"logps/chosen": -245.4388885498047,
"logps/rejected": -310.99713134765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.393484115600586,
"rewards/margins": 24.041391372680664,
"rewards/rejected": -29.43487548828125,
"step": 1880
},
{
"epoch": 0.6273338312173263,
"grad_norm": 1.0433415809529833e-05,
"learning_rate": 3.725099601593626e-06,
"logits/chosen": -35.56284713745117,
"logits/rejected": -37.38603210449219,
"logps/chosen": -238.94775390625,
"logps/rejected": -302.51947021484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.463973045349121,
"rewards/margins": 23.474252700805664,
"rewards/rejected": -27.9382266998291,
"step": 1890
},
{
"epoch": 0.6306530578375239,
"grad_norm": 2.4497583581251092e-05,
"learning_rate": 3.6918990703851264e-06,
"logits/chosen": -36.126556396484375,
"logits/rejected": -37.94770050048828,
"logps/chosen": -250.25961303710938,
"logps/rejected": -314.0647888183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.455031394958496,
"rewards/margins": 23.481657028198242,
"rewards/rejected": -28.936687469482422,
"step": 1900
},
{
"epoch": 0.6339722844577214,
"grad_norm": 3.172329888911918e-05,
"learning_rate": 3.6586985391766272e-06,
"logits/chosen": -36.24954605102539,
"logits/rejected": -38.31598663330078,
"logps/chosen": -253.3960418701172,
"logps/rejected": -318.83233642578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.337418556213379,
"rewards/margins": 23.940242767333984,
"rewards/rejected": -29.277660369873047,
"step": 1910
},
{
"epoch": 0.6372915110779188,
"grad_norm": 8.708640052645933e-06,
"learning_rate": 3.625498007968128e-06,
"logits/chosen": -35.00163269042969,
"logits/rejected": -37.10395812988281,
"logps/chosen": -264.4671936035156,
"logps/rejected": -328.429443359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.868958473205566,
"rewards/margins": 23.600505828857422,
"rewards/rejected": -29.469463348388672,
"step": 1920
},
{
"epoch": 0.6406107376981164,
"grad_norm": 3.279365409980528e-05,
"learning_rate": 3.5922974767596285e-06,
"logits/chosen": -36.706092834472656,
"logits/rejected": -38.54290008544922,
"logps/chosen": -237.87362670898438,
"logps/rejected": -302.92108154296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.26140832901001,
"rewards/margins": 23.881010055541992,
"rewards/rejected": -29.14241600036621,
"step": 1930
},
{
"epoch": 0.6439299643183138,
"grad_norm": 0.00011895268107764423,
"learning_rate": 3.5590969455511294e-06,
"logits/chosen": -35.864967346191406,
"logits/rejected": -38.541648864746094,
"logps/chosen": -261.33477783203125,
"logps/rejected": -328.9570007324219,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.419045448303223,
"rewards/margins": 24.578853607177734,
"rewards/rejected": -30.99790382385254,
"step": 1940
},
{
"epoch": 0.6472491909385113,
"grad_norm": 5.514123768080026e-05,
"learning_rate": 3.52589641434263e-06,
"logits/chosen": -36.08778762817383,
"logits/rejected": -38.50415802001953,
"logps/chosen": -251.5714874267578,
"logps/rejected": -316.7638244628906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.013431549072266,
"rewards/margins": 24.004486083984375,
"rewards/rejected": -30.017919540405273,
"step": 1950
},
{
"epoch": 0.6505684175587089,
"grad_norm": 0.00010633220517775044,
"learning_rate": 3.4926958831341307e-06,
"logits/chosen": -35.88835906982422,
"logits/rejected": -38.69109344482422,
"logps/chosen": -277.28228759765625,
"logps/rejected": -344.85955810546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.679495334625244,
"rewards/margins": 24.58434295654297,
"rewards/rejected": -32.26383972167969,
"step": 1960
},
{
"epoch": 0.6538876441789063,
"grad_norm": 4.945451905769005e-07,
"learning_rate": 3.4594953519256315e-06,
"logits/chosen": -35.732547760009766,
"logits/rejected": -37.812828063964844,
"logps/chosen": -267.87628173828125,
"logps/rejected": -333.97113037109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.297167778015137,
"rewards/margins": 24.186397552490234,
"rewards/rejected": -30.483562469482422,
"step": 1970
},
{
"epoch": 0.6572068707991038,
"grad_norm": 1.6296591638820246e-05,
"learning_rate": 3.4262948207171315e-06,
"logits/chosen": -36.72779083251953,
"logits/rejected": -39.340965270996094,
"logps/chosen": -263.29132080078125,
"logps/rejected": -329.0027770996094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.973439693450928,
"rewards/margins": 24.01589584350586,
"rewards/rejected": -29.989337921142578,
"step": 1980
},
{
"epoch": 0.6605260974193014,
"grad_norm": 1.2624731425603386e-05,
"learning_rate": 3.3930942895086324e-06,
"logits/chosen": -35.57170867919922,
"logits/rejected": -37.529754638671875,
"logps/chosen": -257.77154541015625,
"logps/rejected": -321.79388427734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.590646266937256,
"rewards/margins": 23.66408348083496,
"rewards/rejected": -29.254735946655273,
"step": 1990
},
{
"epoch": 0.6638453240394988,
"grad_norm": 2.9282214200065937e-06,
"learning_rate": 3.359893758300133e-06,
"logits/chosen": -36.09126663208008,
"logits/rejected": -38.71330642700195,
"logps/chosen": -268.74365234375,
"logps/rejected": -335.63519287109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.973354339599609,
"rewards/margins": 24.495656967163086,
"rewards/rejected": -31.469013214111328,
"step": 2000
},
{
"epoch": 0.6671645506596963,
"grad_norm": 4.54183009424014e-06,
"learning_rate": 3.3266932270916337e-06,
"logits/chosen": -35.75475311279297,
"logits/rejected": -38.265567779541016,
"logps/chosen": -265.66510009765625,
"logps/rejected": -333.2896423339844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.296258926391602,
"rewards/margins": 24.668832778930664,
"rewards/rejected": -30.9650936126709,
"step": 2010
},
{
"epoch": 0.6704837772798938,
"grad_norm": 7.087628546287306e-06,
"learning_rate": 3.293492695883134e-06,
"logits/chosen": -35.057525634765625,
"logits/rejected": -37.05876541137695,
"logps/chosen": -260.94903564453125,
"logps/rejected": -326.78289794921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.652168273925781,
"rewards/margins": 24.176315307617188,
"rewards/rejected": -29.828481674194336,
"step": 2020
},
{
"epoch": 0.6738030039000913,
"grad_norm": 4.373361832676892e-07,
"learning_rate": 3.260292164674635e-06,
"logits/chosen": -35.68403625488281,
"logits/rejected": -37.77031707763672,
"logps/chosen": -246.8516082763672,
"logps/rejected": -315.7016906738281,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.814517974853516,
"rewards/margins": 24.977245330810547,
"rewards/rejected": -30.791763305664062,
"step": 2030
},
{
"epoch": 0.6771222305202887,
"grad_norm": 4.2058504732267465e-06,
"learning_rate": 3.227091633466136e-06,
"logits/chosen": -36.04418182373047,
"logits/rejected": -38.19130325317383,
"logps/chosen": -270.22119140625,
"logps/rejected": -336.78619384765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.137897491455078,
"rewards/margins": 24.296836853027344,
"rewards/rejected": -30.434734344482422,
"step": 2040
},
{
"epoch": 0.6804414571404863,
"grad_norm": 2.4626022423035465e-05,
"learning_rate": 3.1938911022576362e-06,
"logits/chosen": -35.86650085449219,
"logits/rejected": -37.94574737548828,
"logps/chosen": -252.1937255859375,
"logps/rejected": -315.3446044921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.771885871887207,
"rewards/margins": 23.3013973236084,
"rewards/rejected": -29.073284149169922,
"step": 2050
},
{
"epoch": 0.6837606837606838,
"grad_norm": 7.916089816717431e-05,
"learning_rate": 3.160690571049137e-06,
"logits/chosen": -37.223426818847656,
"logits/rejected": -39.6295051574707,
"logps/chosen": -243.1242218017578,
"logps/rejected": -304.4497985839844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.6395769119262695,
"rewards/margins": 22.74526596069336,
"rewards/rejected": -28.384841918945312,
"step": 2060
},
{
"epoch": 0.6870799103808812,
"grad_norm": 2.3471836811950197e-06,
"learning_rate": 3.1274900398406375e-06,
"logits/chosen": -36.550941467285156,
"logits/rejected": -38.87454605102539,
"logps/chosen": -266.3443908691406,
"logps/rejected": -333.722900390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.885348320007324,
"rewards/margins": 24.666173934936523,
"rewards/rejected": -31.5515193939209,
"step": 2070
},
{
"epoch": 0.6903991370010788,
"grad_norm": 3.3997166610788554e-06,
"learning_rate": 3.0942895086321384e-06,
"logits/chosen": -36.20463943481445,
"logits/rejected": -38.63981628417969,
"logps/chosen": -260.6272888183594,
"logps/rejected": -327.7062683105469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.540787696838379,
"rewards/margins": 24.436195373535156,
"rewards/rejected": -30.97698402404785,
"step": 2080
},
{
"epoch": 0.6937183636212763,
"grad_norm": 6.613054665649543e-06,
"learning_rate": 3.0610889774236392e-06,
"logits/chosen": -37.008026123046875,
"logits/rejected": -39.31156921386719,
"logps/chosen": -230.788330078125,
"logps/rejected": -294.869873046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.067322731018066,
"rewards/margins": 23.62921905517578,
"rewards/rejected": -28.6965389251709,
"step": 2090
},
{
"epoch": 0.6970375902414737,
"grad_norm": 8.439514203928411e-06,
"learning_rate": 3.0278884462151397e-06,
"logits/chosen": -35.995094299316406,
"logits/rejected": -38.129146575927734,
"logps/chosen": -245.63327026367188,
"logps/rejected": -308.6886291503906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.045685291290283,
"rewards/margins": 23.421958923339844,
"rewards/rejected": -28.467641830444336,
"step": 2100
},
{
"epoch": 0.7003568168616713,
"grad_norm": 2.3334492652793415e-05,
"learning_rate": 2.9946879150066405e-06,
"logits/chosen": -35.943634033203125,
"logits/rejected": -38.231529235839844,
"logps/chosen": -279.72039794921875,
"logps/rejected": -347.6648864746094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.199967861175537,
"rewards/margins": 24.816007614135742,
"rewards/rejected": -32.01597595214844,
"step": 2110
},
{
"epoch": 0.7036760434818687,
"grad_norm": 8.327054092660546e-05,
"learning_rate": 2.961487383798141e-06,
"logits/chosen": -35.427879333496094,
"logits/rejected": -37.45615768432617,
"logps/chosen": -260.65289306640625,
"logps/rejected": -328.54962158203125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.8487420082092285,
"rewards/margins": 24.858335494995117,
"rewards/rejected": -30.707077026367188,
"step": 2120
},
{
"epoch": 0.7069952701020662,
"grad_norm": 7.448333235515747e-06,
"learning_rate": 2.928286852589642e-06,
"logits/chosen": -37.2711181640625,
"logits/rejected": -39.69253158569336,
"logps/chosen": -220.7040252685547,
"logps/rejected": -282.16668701171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.204867839813232,
"rewards/margins": 22.861181259155273,
"rewards/rejected": -27.066049575805664,
"step": 2130
},
{
"epoch": 0.7103144967222638,
"grad_norm": 1.7492104234406725e-05,
"learning_rate": 2.8950863213811427e-06,
"logits/chosen": -35.61443328857422,
"logits/rejected": -38.02817916870117,
"logps/chosen": -276.75323486328125,
"logps/rejected": -348.700927734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.692807197570801,
"rewards/margins": 25.893789291381836,
"rewards/rejected": -33.58660125732422,
"step": 2140
},
{
"epoch": 0.7136337233424612,
"grad_norm": 1.8488311752662412e-06,
"learning_rate": 2.861885790172643e-06,
"logits/chosen": -35.8737678527832,
"logits/rejected": -38.23582458496094,
"logps/chosen": -283.478759765625,
"logps/rejected": -347.2231750488281,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.430272102355957,
"rewards/margins": 23.48923110961914,
"rewards/rejected": -29.919504165649414,
"step": 2150
},
{
"epoch": 0.7169529499626587,
"grad_norm": 2.168203081964748e-06,
"learning_rate": 2.828685258964144e-06,
"logits/chosen": -37.18292999267578,
"logits/rejected": -39.45110321044922,
"logps/chosen": -234.93820190429688,
"logps/rejected": -298.44769287109375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.9636945724487305,
"rewards/margins": 23.42740249633789,
"rewards/rejected": -28.391098022460938,
"step": 2160
},
{
"epoch": 0.7202721765828562,
"grad_norm": 1.770351082086563e-05,
"learning_rate": 2.795484727755644e-06,
"logits/chosen": -37.75334930419922,
"logits/rejected": -39.95841979980469,
"logps/chosen": -245.21817016601562,
"logps/rejected": -308.2054138183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.453461170196533,
"rewards/margins": 23.370468139648438,
"rewards/rejected": -28.823928833007812,
"step": 2170
},
{
"epoch": 0.7235914032030537,
"grad_norm": 4.3876305426238105e-05,
"learning_rate": 2.762284196547145e-06,
"logits/chosen": -37.115257263183594,
"logits/rejected": -39.60409164428711,
"logps/chosen": -236.9832305908203,
"logps/rejected": -303.8288879394531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.18468713760376,
"rewards/margins": 24.370668411254883,
"rewards/rejected": -29.555355072021484,
"step": 2180
},
{
"epoch": 0.7269106298232512,
"grad_norm": 3.0235052690841258e-05,
"learning_rate": 2.7290836653386452e-06,
"logits/chosen": -37.33479690551758,
"logits/rejected": -39.598915100097656,
"logps/chosen": -229.43814086914062,
"logps/rejected": -293.3581848144531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.7233428955078125,
"rewards/margins": 23.499513626098633,
"rewards/rejected": -28.222854614257812,
"step": 2190
},
{
"epoch": 0.7302298564434486,
"grad_norm": 2.1708687199861743e-05,
"learning_rate": 2.695883134130146e-06,
"logits/chosen": -36.968074798583984,
"logits/rejected": -39.77884292602539,
"logps/chosen": -232.990478515625,
"logps/rejected": -301.4418029785156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.018186092376709,
"rewards/margins": 24.904294967651367,
"rewards/rejected": -30.922481536865234,
"step": 2200
},
{
"epoch": 0.7335490830636462,
"grad_norm": 7.78408139012754e-06,
"learning_rate": 2.662682602921647e-06,
"logits/chosen": -37.694664001464844,
"logits/rejected": -39.98257064819336,
"logps/chosen": -218.97183227539062,
"logps/rejected": -283.67132568359375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.842543601989746,
"rewards/margins": 23.720943450927734,
"rewards/rejected": -28.563491821289062,
"step": 2210
},
{
"epoch": 0.7368683096838436,
"grad_norm": 3.5446532820060384e-06,
"learning_rate": 2.6294820717131474e-06,
"logits/chosen": -37.003211975097656,
"logits/rejected": -39.224727630615234,
"logps/chosen": -240.2725830078125,
"logps/rejected": -306.5699768066406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.473374843597412,
"rewards/margins": 24.33763885498047,
"rewards/rejected": -29.811010360717773,
"step": 2220
},
{
"epoch": 0.7401875363040411,
"grad_norm": 2.044969551207032e-05,
"learning_rate": 2.5962815405046482e-06,
"logits/chosen": -34.97216033935547,
"logits/rejected": -37.055519104003906,
"logps/chosen": -234.8686981201172,
"logps/rejected": -300.5046081542969,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.184366703033447,
"rewards/margins": 24.180988311767578,
"rewards/rejected": -29.3653564453125,
"step": 2230
},
{
"epoch": 0.7435067629242387,
"grad_norm": 1.7181002931465628e-06,
"learning_rate": 2.563081009296149e-06,
"logits/chosen": -35.178504943847656,
"logits/rejected": -37.071800231933594,
"logps/chosen": -271.97137451171875,
"logps/rejected": -338.36407470703125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.9848222732543945,
"rewards/margins": 24.228235244750977,
"rewards/rejected": -31.21305274963379,
"step": 2240
},
{
"epoch": 0.7468259895444361,
"grad_norm": 1.943804272741545e-05,
"learning_rate": 2.5298804780876495e-06,
"logits/chosen": -35.294288635253906,
"logits/rejected": -37.824981689453125,
"logps/chosen": -286.2807312011719,
"logps/rejected": -353.0439758300781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.225214958190918,
"rewards/margins": 24.468542098999023,
"rewards/rejected": -31.69375991821289,
"step": 2250
},
{
"epoch": 0.7501452161646336,
"grad_norm": 5.341881660569925e-06,
"learning_rate": 2.4966799468791504e-06,
"logits/chosen": -35.99862289428711,
"logits/rejected": -38.410194396972656,
"logps/chosen": -260.35614013671875,
"logps/rejected": -328.924560546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.724026679992676,
"rewards/margins": 24.8992862701416,
"rewards/rejected": -31.623315811157227,
"step": 2260
},
{
"epoch": 0.7534644427848312,
"grad_norm": 2.7916195904253982e-05,
"learning_rate": 2.463479415670651e-06,
"logits/chosen": -36.70926284790039,
"logits/rejected": -38.943397521972656,
"logps/chosen": -232.7910919189453,
"logps/rejected": -294.95062255859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.800173759460449,
"rewards/margins": 23.0440673828125,
"rewards/rejected": -27.844242095947266,
"step": 2270
},
{
"epoch": 0.7567836694050286,
"grad_norm": 1.1390899089747109e-05,
"learning_rate": 2.4302788844621517e-06,
"logits/chosen": -35.52710723876953,
"logits/rejected": -37.439945220947266,
"logps/chosen": -253.768798828125,
"logps/rejected": -319.4378967285156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.10952615737915,
"rewards/margins": 24.009607315063477,
"rewards/rejected": -30.1191349029541,
"step": 2280
},
{
"epoch": 0.7601028960252261,
"grad_norm": 1.4922372884029755e-06,
"learning_rate": 2.3970783532536525e-06,
"logits/chosen": -36.5639762878418,
"logits/rejected": -38.71487808227539,
"logps/chosen": -249.2660675048828,
"logps/rejected": -315.0458984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.141018867492676,
"rewards/margins": 24.193401336669922,
"rewards/rejected": -30.33441734313965,
"step": 2290
},
{
"epoch": 0.7634221226454236,
"grad_norm": 1.5967334547895007e-06,
"learning_rate": 2.363877822045153e-06,
"logits/chosen": -36.093650817871094,
"logits/rejected": -38.5761604309082,
"logps/chosen": -240.87637329101562,
"logps/rejected": -308.2940368652344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.83968448638916,
"rewards/margins": 24.636394500732422,
"rewards/rejected": -30.4760799407959,
"step": 2300
},
{
"epoch": 0.7667413492656211,
"grad_norm": 9.941512644218164e-07,
"learning_rate": 2.3306772908366534e-06,
"logits/chosen": -36.442893981933594,
"logits/rejected": -38.91448974609375,
"logps/chosen": -273.2530822753906,
"logps/rejected": -342.6146545410156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.223963737487793,
"rewards/margins": 25.15464973449707,
"rewards/rejected": -32.37861633300781,
"step": 2310
},
{
"epoch": 0.7700605758858186,
"grad_norm": 4.130026809434639e-06,
"learning_rate": 2.2974767596281542e-06,
"logits/chosen": -36.173011779785156,
"logits/rejected": -38.719547271728516,
"logps/chosen": -264.0533752441406,
"logps/rejected": -334.3890075683594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.334656715393066,
"rewards/margins": 25.290576934814453,
"rewards/rejected": -32.62523651123047,
"step": 2320
},
{
"epoch": 0.7733798025060161,
"grad_norm": 1.1943691333726747e-06,
"learning_rate": 2.2642762284196547e-06,
"logits/chosen": -36.09328079223633,
"logits/rejected": -38.52931594848633,
"logps/chosen": -269.763916015625,
"logps/rejected": -336.2082824707031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.884180545806885,
"rewards/margins": 24.197860717773438,
"rewards/rejected": -31.082040786743164,
"step": 2330
},
{
"epoch": 0.7766990291262136,
"grad_norm": 3.132646452286281e-05,
"learning_rate": 2.2310756972111555e-06,
"logits/chosen": -36.489498138427734,
"logits/rejected": -38.6656379699707,
"logps/chosen": -226.9405975341797,
"logps/rejected": -290.4114990234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.617298126220703,
"rewards/margins": 23.44748306274414,
"rewards/rejected": -28.064783096313477,
"step": 2340
},
{
"epoch": 0.780018255746411,
"grad_norm": 0.004452559631317854,
"learning_rate": 2.1978751660026564e-06,
"logits/chosen": -36.05640411376953,
"logits/rejected": -38.674530029296875,
"logps/chosen": -252.38198852539062,
"logps/rejected": -319.01129150390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.577996730804443,
"rewards/margins": 24.36948013305664,
"rewards/rejected": -29.94747543334961,
"step": 2350
},
{
"epoch": 0.7833374823666086,
"grad_norm": 8.203298057196662e-05,
"learning_rate": 2.164674634794157e-06,
"logits/chosen": -36.65255355834961,
"logits/rejected": -38.996917724609375,
"logps/chosen": -255.0420684814453,
"logps/rejected": -322.2328186035156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.767322540283203,
"rewards/margins": 24.485187530517578,
"rewards/rejected": -30.252511978149414,
"step": 2360
},
{
"epoch": 0.7866567089868061,
"grad_norm": 3.436456972849555e-05,
"learning_rate": 2.1314741035856577e-06,
"logits/chosen": -34.79005813598633,
"logits/rejected": -36.92679977416992,
"logps/chosen": -250.50857543945312,
"logps/rejected": -317.89910888671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.7200026512146,
"rewards/margins": 24.521738052368164,
"rewards/rejected": -30.24173927307129,
"step": 2370
},
{
"epoch": 0.7899759356070035,
"grad_norm": 3.881140855810372e-06,
"learning_rate": 2.098273572377158e-06,
"logits/chosen": -36.45794677734375,
"logits/rejected": -38.66387176513672,
"logps/chosen": -275.30828857421875,
"logps/rejected": -343.1573181152344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.899484157562256,
"rewards/margins": 24.771820068359375,
"rewards/rejected": -31.67130470275879,
"step": 2380
},
{
"epoch": 0.7932951622272011,
"grad_norm": 1.58879871037243e-07,
"learning_rate": 2.065073041168659e-06,
"logits/chosen": -36.50375747680664,
"logits/rejected": -38.68590545654297,
"logps/chosen": -241.6002655029297,
"logps/rejected": -307.832763671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.272534370422363,
"rewards/margins": 24.189064025878906,
"rewards/rejected": -29.461597442626953,
"step": 2390
},
{
"epoch": 0.7966143888473985,
"grad_norm": 3.56239324901253e-05,
"learning_rate": 2.03187250996016e-06,
"logits/chosen": -36.17913055419922,
"logits/rejected": -38.23615264892578,
"logps/chosen": -249.85971069335938,
"logps/rejected": -315.089599609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.946499824523926,
"rewards/margins": 24.03165054321289,
"rewards/rejected": -28.9781494140625,
"step": 2400
},
{
"epoch": 0.799933615467596,
"grad_norm": 1.6728210539440624e-05,
"learning_rate": 1.9986719787516602e-06,
"logits/chosen": -36.62885665893555,
"logits/rejected": -39.18938446044922,
"logps/chosen": -238.7482147216797,
"logps/rejected": -304.65216064453125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.411585807800293,
"rewards/margins": 24.22550392150879,
"rewards/rejected": -29.6370906829834,
"step": 2410
},
{
"epoch": 0.8032528420877936,
"grad_norm": 4.99165580549743e-05,
"learning_rate": 1.9654714475431607e-06,
"logits/chosen": -34.9477424621582,
"logits/rejected": -37.074520111083984,
"logps/chosen": -269.28125,
"logps/rejected": -338.604248046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.398122787475586,
"rewards/margins": 25.21800994873047,
"rewards/rejected": -31.616130828857422,
"step": 2420
},
{
"epoch": 0.806572068707991,
"grad_norm": 2.385314473940525e-06,
"learning_rate": 1.9322709163346615e-06,
"logits/chosen": -36.0991096496582,
"logits/rejected": -38.03818130493164,
"logps/chosen": -244.12930297851562,
"logps/rejected": -314.333251953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.721272945404053,
"rewards/margins": 25.411584854125977,
"rewards/rejected": -31.132862091064453,
"step": 2430
},
{
"epoch": 0.8098912953281885,
"grad_norm": 5.150145170773612e-06,
"learning_rate": 1.8990703851261622e-06,
"logits/chosen": -36.011573791503906,
"logits/rejected": -38.14513397216797,
"logps/chosen": -248.9287567138672,
"logps/rejected": -314.93682861328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.756242275238037,
"rewards/margins": 24.10676383972168,
"rewards/rejected": -29.86301040649414,
"step": 2440
},
{
"epoch": 0.8132105219483861,
"grad_norm": 2.2313422959996387e-05,
"learning_rate": 1.8658698539176628e-06,
"logits/chosen": -36.681114196777344,
"logits/rejected": -39.09503936767578,
"logps/chosen": -242.25674438476562,
"logps/rejected": -306.0912170410156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.092987060546875,
"rewards/margins": 23.52627182006836,
"rewards/rejected": -28.619258880615234,
"step": 2450
},
{
"epoch": 0.8165297485685835,
"grad_norm": 7.533744064858183e-05,
"learning_rate": 1.8326693227091634e-06,
"logits/chosen": -35.47425079345703,
"logits/rejected": -37.82908630371094,
"logps/chosen": -252.47488403320312,
"logps/rejected": -320.1539306640625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.538949012756348,
"rewards/margins": 24.637981414794922,
"rewards/rejected": -30.176931381225586,
"step": 2460
},
{
"epoch": 0.819848975188781,
"grad_norm": 1.6146932466654107e-05,
"learning_rate": 1.7994687915006643e-06,
"logits/chosen": -36.48265838623047,
"logits/rejected": -38.716392517089844,
"logps/chosen": -251.6239776611328,
"logps/rejected": -321.3429870605469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.724469184875488,
"rewards/margins": 25.172916412353516,
"rewards/rejected": -31.897384643554688,
"step": 2470
},
{
"epoch": 0.8231682018089785,
"grad_norm": 7.460760389221832e-05,
"learning_rate": 1.766268260292165e-06,
"logits/chosen": -36.723182678222656,
"logits/rejected": -39.13579177856445,
"logps/chosen": -253.5547332763672,
"logps/rejected": -322.19049072265625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.035333633422852,
"rewards/margins": 25.05792236328125,
"rewards/rejected": -31.093252182006836,
"step": 2480
},
{
"epoch": 0.826487428429176,
"grad_norm": 7.737757186987437e-06,
"learning_rate": 1.7330677290836656e-06,
"logits/chosen": -36.27003479003906,
"logits/rejected": -38.642295837402344,
"logps/chosen": -245.1279754638672,
"logps/rejected": -313.0539855957031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.8328351974487305,
"rewards/margins": 24.66913414001465,
"rewards/rejected": -30.501968383789062,
"step": 2490
},
{
"epoch": 0.8298066550493735,
"grad_norm": 2.845164817699697e-05,
"learning_rate": 1.699867197875166e-06,
"logits/chosen": -35.327457427978516,
"logits/rejected": -37.247886657714844,
"logps/chosen": -250.5523681640625,
"logps/rejected": -317.6628112792969,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.3366007804870605,
"rewards/margins": 24.55581283569336,
"rewards/rejected": -29.89241600036621,
"step": 2500
},
{
"epoch": 0.833125881669571,
"grad_norm": 6.5803114921436645e-06,
"learning_rate": 1.6666666666666667e-06,
"logits/chosen": -34.98641586303711,
"logits/rejected": -37.47605514526367,
"logps/chosen": -256.0675354003906,
"logps/rejected": -325.28607177734375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.059304237365723,
"rewards/margins": 25.124370574951172,
"rewards/rejected": -31.183673858642578,
"step": 2510
},
{
"epoch": 0.8364451082897685,
"grad_norm": 0.00010842137999134138,
"learning_rate": 1.6334661354581673e-06,
"logits/chosen": -35.48594284057617,
"logits/rejected": -37.64731979370117,
"logps/chosen": -291.96661376953125,
"logps/rejected": -362.49005126953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.917998313903809,
"rewards/margins": 25.65730857849121,
"rewards/rejected": -32.5753059387207,
"step": 2520
},
{
"epoch": 0.8397643349099659,
"grad_norm": 3.7938156083328067e-07,
"learning_rate": 1.6002656042496682e-06,
"logits/chosen": -37.083927154541016,
"logits/rejected": -39.340457916259766,
"logps/chosen": -252.4654083251953,
"logps/rejected": -320.502685546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.359490394592285,
"rewards/margins": 24.804195404052734,
"rewards/rejected": -31.163684844970703,
"step": 2530
},
{
"epoch": 0.8430835615301635,
"grad_norm": 6.859098357381299e-05,
"learning_rate": 1.5670650730411688e-06,
"logits/chosen": -36.62004470825195,
"logits/rejected": -39.08134460449219,
"logps/chosen": -271.40533447265625,
"logps/rejected": -341.4969177246094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.194591522216797,
"rewards/margins": 25.380462646484375,
"rewards/rejected": -32.575050354003906,
"step": 2540
},
{
"epoch": 0.846402788150361,
"grad_norm": 3.553018541424535e-05,
"learning_rate": 1.5338645418326694e-06,
"logits/chosen": -36.68794631958008,
"logits/rejected": -39.383785247802734,
"logps/chosen": -257.90716552734375,
"logps/rejected": -327.06610107421875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.333869934082031,
"rewards/margins": 25.118257522583008,
"rewards/rejected": -31.452129364013672,
"step": 2550
},
{
"epoch": 0.8497220147705584,
"grad_norm": 6.490451050922275e-05,
"learning_rate": 1.50066401062417e-06,
"logits/chosen": -37.411705017089844,
"logits/rejected": -39.78534698486328,
"logps/chosen": -234.9873046875,
"logps/rejected": -303.38116455078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.998685359954834,
"rewards/margins": 24.948780059814453,
"rewards/rejected": -29.947467803955078,
"step": 2560
},
{
"epoch": 0.853041241390756,
"grad_norm": 1.5090402484929655e-05,
"learning_rate": 1.467463479415671e-06,
"logits/chosen": -36.00045394897461,
"logits/rejected": -37.93694305419922,
"logps/chosen": -231.26699829101562,
"logps/rejected": -297.6919860839844,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.228448867797852,
"rewards/margins": 24.28873062133789,
"rewards/rejected": -29.517181396484375,
"step": 2570
},
{
"epoch": 0.8563604680109534,
"grad_norm": 2.468251523168874e-06,
"learning_rate": 1.4342629482071716e-06,
"logits/chosen": -35.700321197509766,
"logits/rejected": -37.91484451293945,
"logps/chosen": -249.6962127685547,
"logps/rejected": -317.5003356933594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.312900066375732,
"rewards/margins": 24.76122283935547,
"rewards/rejected": -30.07412338256836,
"step": 2580
},
{
"epoch": 0.8596796946311509,
"grad_norm": 1.049622710525e-06,
"learning_rate": 1.401062416998672e-06,
"logits/chosen": -36.12137985229492,
"logits/rejected": -38.40441131591797,
"logps/chosen": -256.86279296875,
"logps/rejected": -325.9319763183594,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.079124927520752,
"rewards/margins": 25.098485946655273,
"rewards/rejected": -31.1776123046875,
"step": 2590
},
{
"epoch": 0.8629989212513485,
"grad_norm": 5.486945610755356e-06,
"learning_rate": 1.3678618857901727e-06,
"logits/chosen": -36.008670806884766,
"logits/rejected": -38.78040313720703,
"logps/chosen": -274.86517333984375,
"logps/rejected": -347.06109619140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.957380771636963,
"rewards/margins": 26.006641387939453,
"rewards/rejected": -33.96402359008789,
"step": 2600
},
{
"epoch": 0.8663181478715459,
"grad_norm": 1.6589292499702424e-06,
"learning_rate": 1.3346613545816733e-06,
"logits/chosen": -36.0199089050293,
"logits/rejected": -38.339603424072266,
"logps/chosen": -250.30520629882812,
"logps/rejected": -319.3824768066406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.83938455581665,
"rewards/margins": 25.026578903198242,
"rewards/rejected": -30.865966796875,
"step": 2610
},
{
"epoch": 0.8696373744917434,
"grad_norm": 2.6869927296502283e-06,
"learning_rate": 1.301460823373174e-06,
"logits/chosen": -35.62989044189453,
"logits/rejected": -38.205936431884766,
"logps/chosen": -257.1199035644531,
"logps/rejected": -323.7815246582031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.572248458862305,
"rewards/margins": 24.289873123168945,
"rewards/rejected": -29.862117767333984,
"step": 2620
},
{
"epoch": 0.872956601111941,
"grad_norm": 6.404624582501128e-05,
"learning_rate": 1.2682602921646748e-06,
"logits/chosen": -35.45529556274414,
"logits/rejected": -37.626609802246094,
"logps/chosen": -256.4237976074219,
"logps/rejected": -324.92919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.755781173706055,
"rewards/margins": 24.921703338623047,
"rewards/rejected": -30.6774845123291,
"step": 2630
},
{
"epoch": 0.8762758277321384,
"grad_norm": 9.178786058328114e-06,
"learning_rate": 1.2350597609561754e-06,
"logits/chosen": -36.738868713378906,
"logits/rejected": -39.251304626464844,
"logps/chosen": -261.7998962402344,
"logps/rejected": -330.28558349609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.661735534667969,
"rewards/margins": 24.9061222076416,
"rewards/rejected": -30.567859649658203,
"step": 2640
},
{
"epoch": 0.8795950543523359,
"grad_norm": 2.6355290174251422e-05,
"learning_rate": 1.201859229747676e-06,
"logits/chosen": -36.43751907348633,
"logits/rejected": -38.71375274658203,
"logps/chosen": -235.941162109375,
"logps/rejected": -302.32415771484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.947510719299316,
"rewards/margins": 24.280080795288086,
"rewards/rejected": -29.227588653564453,
"step": 2650
},
{
"epoch": 0.8829142809725334,
"grad_norm": 5.58648844162235e-06,
"learning_rate": 1.1686586985391767e-06,
"logits/chosen": -36.9846305847168,
"logits/rejected": -39.65496826171875,
"logps/chosen": -230.78744506835938,
"logps/rejected": -299.64617919921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.086447238922119,
"rewards/margins": 25.012691497802734,
"rewards/rejected": -30.099136352539062,
"step": 2660
},
{
"epoch": 0.8862335075927309,
"grad_norm": 7.070900755934417e-05,
"learning_rate": 1.1354581673306774e-06,
"logits/chosen": -36.42656707763672,
"logits/rejected": -38.978431701660156,
"logps/chosen": -281.9248046875,
"logps/rejected": -353.60308837890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.565675258636475,
"rewards/margins": 25.867778778076172,
"rewards/rejected": -33.433448791503906,
"step": 2670
},
{
"epoch": 0.8895527342129284,
"grad_norm": 6.121000296843704e-06,
"learning_rate": 1.102257636122178e-06,
"logits/chosen": -36.232933044433594,
"logits/rejected": -38.41249084472656,
"logps/chosen": -255.221435546875,
"logps/rejected": -324.15008544921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.308706760406494,
"rewards/margins": 25.112064361572266,
"rewards/rejected": -31.4207763671875,
"step": 2680
},
{
"epoch": 0.8928719608331259,
"grad_norm": 4.801144655175449e-07,
"learning_rate": 1.0690571049136787e-06,
"logits/chosen": -36.57440185546875,
"logits/rejected": -38.97066116333008,
"logps/chosen": -274.68109130859375,
"logps/rejected": -343.2183837890625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -8.028226852416992,
"rewards/margins": 24.872940063476562,
"rewards/rejected": -32.90116500854492,
"step": 2690
},
{
"epoch": 0.8961911874533234,
"grad_norm": 9.706584933155682e-06,
"learning_rate": 1.0358565737051795e-06,
"logits/chosen": -35.043174743652344,
"logits/rejected": -37.608097076416016,
"logps/chosen": -270.731689453125,
"logps/rejected": -338.45623779296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.948904514312744,
"rewards/margins": 24.692392349243164,
"rewards/rejected": -31.64129638671875,
"step": 2700
},
{
"epoch": 0.8995104140735208,
"grad_norm": 1.1282833838777151e-05,
"learning_rate": 1.00265604249668e-06,
"logits/chosen": -36.31340026855469,
"logits/rejected": -38.685543060302734,
"logps/chosen": -244.37234497070312,
"logps/rejected": -312.83197021484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.175669193267822,
"rewards/margins": 24.857559204101562,
"rewards/rejected": -31.03322410583496,
"step": 2710
},
{
"epoch": 0.9028296406937184,
"grad_norm": 2.1625977751682512e-05,
"learning_rate": 9.694555112881806e-07,
"logits/chosen": -36.470420837402344,
"logits/rejected": -38.7746696472168,
"logps/chosen": -256.0016174316406,
"logps/rejected": -326.2404479980469,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.9281110763549805,
"rewards/margins": 25.440807342529297,
"rewards/rejected": -32.368919372558594,
"step": 2720
},
{
"epoch": 0.9061488673139159,
"grad_norm": 6.486946222139522e-05,
"learning_rate": 9.362549800796813e-07,
"logits/chosen": -36.24733352661133,
"logits/rejected": -38.840904235839844,
"logps/chosen": -256.3532409667969,
"logps/rejected": -329.0278015136719,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.113903045654297,
"rewards/margins": 26.07107162475586,
"rewards/rejected": -33.184974670410156,
"step": 2730
},
{
"epoch": 0.9094680939341133,
"grad_norm": 1.6229272659984417e-05,
"learning_rate": 9.030544488711821e-07,
"logits/chosen": -35.76521682739258,
"logits/rejected": -38.19070053100586,
"logps/chosen": -270.4026184082031,
"logps/rejected": -339.18194580078125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.0731916427612305,
"rewards/margins": 24.943195343017578,
"rewards/rejected": -32.01638412475586,
"step": 2740
},
{
"epoch": 0.9127873205543109,
"grad_norm": 0.0001113278340199031,
"learning_rate": 8.698539176626827e-07,
"logits/chosen": -37.50344467163086,
"logits/rejected": -40.08824920654297,
"logps/chosen": -250.40576171875,
"logps/rejected": -319.7777404785156,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.421402931213379,
"rewards/margins": 25.24834442138672,
"rewards/rejected": -31.669748306274414,
"step": 2750
},
{
"epoch": 0.9161065471745083,
"grad_norm": 6.062048214516835e-07,
"learning_rate": 8.366533864541833e-07,
"logits/chosen": -35.75933074951172,
"logits/rejected": -38.32414627075195,
"logps/chosen": -278.0367431640625,
"logps/rejected": -348.5729064941406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.881424903869629,
"rewards/margins": 25.48324966430664,
"rewards/rejected": -33.36467361450195,
"step": 2760
},
{
"epoch": 0.9194257737947058,
"grad_norm": 9.18797104532132e-06,
"learning_rate": 8.03452855245684e-07,
"logits/chosen": -36.331016540527344,
"logits/rejected": -38.755123138427734,
"logps/chosen": -277.12640380859375,
"logps/rejected": -348.4110412597656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.622500419616699,
"rewards/margins": 25.761404037475586,
"rewards/rejected": -33.38390350341797,
"step": 2770
},
{
"epoch": 0.9227450004149034,
"grad_norm": 3.350157567183487e-05,
"learning_rate": 7.702523240371847e-07,
"logits/chosen": -34.45648193359375,
"logits/rejected": -36.82111358642578,
"logps/chosen": -268.44903564453125,
"logps/rejected": -338.30029296875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.01177978515625,
"rewards/margins": 25.25690460205078,
"rewards/rejected": -32.26868438720703,
"step": 2780
},
{
"epoch": 0.9260642270351008,
"grad_norm": 5.719254113500938e-05,
"learning_rate": 7.370517928286854e-07,
"logits/chosen": -36.46429443359375,
"logits/rejected": -39.10625457763672,
"logps/chosen": -228.64663696289062,
"logps/rejected": -293.049560546875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.573931694030762,
"rewards/margins": 23.767658233642578,
"rewards/rejected": -29.341588973999023,
"step": 2790
},
{
"epoch": 0.9293834536552983,
"grad_norm": 0.00011925002763746306,
"learning_rate": 7.03851261620186e-07,
"logits/chosen": -36.33781814575195,
"logits/rejected": -39.13783645629883,
"logps/chosen": -238.5530548095703,
"logps/rejected": -305.1521911621094,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.77554988861084,
"rewards/margins": 24.387807846069336,
"rewards/rejected": -30.16335678100586,
"step": 2800
},
{
"epoch": 0.9327026802754959,
"grad_norm": 5.343552402337082e-06,
"learning_rate": 6.706507304116866e-07,
"logits/chosen": -36.91948318481445,
"logits/rejected": -39.26295471191406,
"logps/chosen": -257.33135986328125,
"logps/rejected": -323.4559326171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.2154645919799805,
"rewards/margins": 24.268310546875,
"rewards/rejected": -30.483774185180664,
"step": 2810
},
{
"epoch": 0.9360219068956933,
"grad_norm": 2.8307771572144702e-05,
"learning_rate": 6.374501992031873e-07,
"logits/chosen": -34.397552490234375,
"logits/rejected": -37.383384704589844,
"logps/chosen": -290.0972900390625,
"logps/rejected": -361.43756103515625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -8.181169509887695,
"rewards/margins": 25.659854888916016,
"rewards/rejected": -33.841026306152344,
"step": 2820
},
{
"epoch": 0.9393411335158908,
"grad_norm": 5.62709647056181e-06,
"learning_rate": 6.04249667994688e-07,
"logits/chosen": -35.95421600341797,
"logits/rejected": -38.090457916259766,
"logps/chosen": -290.0306091308594,
"logps/rejected": -362.3145446777344,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -8.559988975524902,
"rewards/margins": 26.056650161743164,
"rewards/rejected": -34.61663818359375,
"step": 2830
},
{
"epoch": 0.9426603601360883,
"grad_norm": 2.8976512112421915e-05,
"learning_rate": 5.710491367861886e-07,
"logits/chosen": -36.20732879638672,
"logits/rejected": -38.5179557800293,
"logps/chosen": -271.90667724609375,
"logps/rejected": -342.0574035644531,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.341311454772949,
"rewards/margins": 25.435096740722656,
"rewards/rejected": -32.77640914916992,
"step": 2840
},
{
"epoch": 0.9459795867562858,
"grad_norm": 8.338892257597763e-06,
"learning_rate": 5.378486055776893e-07,
"logits/chosen": -35.963661193847656,
"logits/rejected": -38.3126106262207,
"logps/chosen": -245.7353057861328,
"logps/rejected": -313.90399169921875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.468613624572754,
"rewards/margins": 24.895671844482422,
"rewards/rejected": -30.364282608032227,
"step": 2850
},
{
"epoch": 0.9492988133764833,
"grad_norm": 2.6563682240521302e-06,
"learning_rate": 5.046480743691899e-07,
"logits/chosen": -34.87287139892578,
"logits/rejected": -37.09748458862305,
"logps/chosen": -276.9396057128906,
"logps/rejected": -347.16046142578125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.051947116851807,
"rewards/margins": 25.537128448486328,
"rewards/rejected": -32.589073181152344,
"step": 2860
},
{
"epoch": 0.9526180399966808,
"grad_norm": 3.179600525982096e-06,
"learning_rate": 4.714475431606906e-07,
"logits/chosen": -35.76880645751953,
"logits/rejected": -38.161956787109375,
"logps/chosen": -262.0658874511719,
"logps/rejected": -327.91729736328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.562504768371582,
"rewards/margins": 24.080707550048828,
"rewards/rejected": -30.643213272094727,
"step": 2870
},
{
"epoch": 0.9559372666168783,
"grad_norm": 2.855755155906081e-06,
"learning_rate": 4.382470119521913e-07,
"logits/chosen": -35.748382568359375,
"logits/rejected": -38.125755310058594,
"logps/chosen": -239.87118530273438,
"logps/rejected": -305.96588134765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.7444939613342285,
"rewards/margins": 24.06570816040039,
"rewards/rejected": -29.81020164489746,
"step": 2880
},
{
"epoch": 0.9592564932370757,
"grad_norm": 0.0001762977335602045,
"learning_rate": 4.0504648074369194e-07,
"logits/chosen": -36.54397201538086,
"logits/rejected": -39.024314880371094,
"logps/chosen": -240.02841186523438,
"logps/rejected": -306.2109680175781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.792388916015625,
"rewards/margins": 24.234182357788086,
"rewards/rejected": -30.026575088500977,
"step": 2890
},
{
"epoch": 0.9625757198572733,
"grad_norm": 4.043288208777085e-05,
"learning_rate": 3.718459495351926e-07,
"logits/chosen": -36.67218780517578,
"logits/rejected": -39.409828186035156,
"logps/chosen": -228.93173217773438,
"logps/rejected": -295.4620666503906,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -5.035333156585693,
"rewards/margins": 24.397380828857422,
"rewards/rejected": -29.43271255493164,
"step": 2900
},
{
"epoch": 0.9658949464774708,
"grad_norm": 3.066948920604773e-06,
"learning_rate": 3.3864541832669323e-07,
"logits/chosen": -36.082374572753906,
"logits/rejected": -38.355224609375,
"logps/chosen": -268.31268310546875,
"logps/rejected": -339.3592834472656,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.250875949859619,
"rewards/margins": 25.694320678710938,
"rewards/rejected": -32.94519805908203,
"step": 2910
},
{
"epoch": 0.9692141730976682,
"grad_norm": 1.1050363355025183e-05,
"learning_rate": 3.054448871181939e-07,
"logits/chosen": -36.860496520996094,
"logits/rejected": -39.4510383605957,
"logps/chosen": -244.20462036132812,
"logps/rejected": -314.990234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.0202860832214355,
"rewards/margins": 25.554231643676758,
"rewards/rejected": -31.57451820373535,
"step": 2920
},
{
"epoch": 0.9725333997178658,
"grad_norm": 4.18513263866771e-06,
"learning_rate": 2.7224435590969457e-07,
"logits/chosen": -36.25141143798828,
"logits/rejected": -38.87180709838867,
"logps/chosen": -242.8925323486328,
"logps/rejected": -312.47308349609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.114789962768555,
"rewards/margins": 25.346786499023438,
"rewards/rejected": -31.46157455444336,
"step": 2930
},
{
"epoch": 0.9758526263380632,
"grad_norm": 1.5998368326108903e-05,
"learning_rate": 2.390438247011952e-07,
"logits/chosen": -35.24533462524414,
"logits/rejected": -37.914817810058594,
"logps/chosen": -279.0862121582031,
"logps/rejected": -350.7375793457031,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.582180023193359,
"rewards/margins": 25.864376068115234,
"rewards/rejected": -33.446556091308594,
"step": 2940
},
{
"epoch": 0.9791718529582607,
"grad_norm": 1.7516629213787382e-06,
"learning_rate": 2.0584329349269588e-07,
"logits/chosen": -37.73789978027344,
"logits/rejected": -40.67639923095703,
"logps/chosen": -247.8397216796875,
"logps/rejected": -319.43109130859375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.405348777770996,
"rewards/margins": 25.796106338500977,
"rewards/rejected": -32.201454162597656,
"step": 2950
},
{
"epoch": 0.9824910795784583,
"grad_norm": 7.380790975730633e-06,
"learning_rate": 1.7264276228419655e-07,
"logits/chosen": -36.16815948486328,
"logits/rejected": -38.34816360473633,
"logps/chosen": -255.6117706298828,
"logps/rejected": -326.8828430175781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.3727617263793945,
"rewards/margins": 25.710683822631836,
"rewards/rejected": -33.08344650268555,
"step": 2960
},
{
"epoch": 0.9858103061986557,
"grad_norm": 3.3136252568510827e-06,
"learning_rate": 1.3944223107569722e-07,
"logits/chosen": -34.815086364746094,
"logits/rejected": -37.27305603027344,
"logps/chosen": -257.39678955078125,
"logps/rejected": -325.2138671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.813126564025879,
"rewards/margins": 24.72214698791504,
"rewards/rejected": -31.5352725982666,
"step": 2970
},
{
"epoch": 0.9891295328188532,
"grad_norm": 2.209369449701626e-05,
"learning_rate": 1.0624169986719788e-07,
"logits/chosen": -35.99190902709961,
"logits/rejected": -38.414974212646484,
"logps/chosen": -275.14166259765625,
"logps/rejected": -346.3056945800781,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.347718715667725,
"rewards/margins": 25.74213218688965,
"rewards/rejected": -33.08985137939453,
"step": 2980
},
{
"epoch": 0.9924487594390508,
"grad_norm": 4.2861594806709036e-07,
"learning_rate": 7.304116865869855e-08,
"logits/chosen": -36.379974365234375,
"logits/rejected": -38.90006637573242,
"logps/chosen": -258.8759765625,
"logps/rejected": -328.20111083984375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -7.374964714050293,
"rewards/margins": 25.241411209106445,
"rewards/rejected": -32.61638259887695,
"step": 2990
},
{
"epoch": 0.9957679860592482,
"grad_norm": 3.627765181590803e-06,
"learning_rate": 3.984063745019921e-08,
"logits/chosen": -35.75197982788086,
"logits/rejected": -38.48480987548828,
"logps/chosen": -257.9820861816406,
"logps/rejected": -326.41229248046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.912039279937744,
"rewards/margins": 24.800914764404297,
"rewards/rejected": -31.712955474853516,
"step": 3000
},
{
"epoch": 0.9990872126794457,
"grad_norm": 8.645590241940226e-06,
"learning_rate": 6.640106241699867e-09,
"logits/chosen": -35.83268356323242,
"logits/rejected": -38.067604064941406,
"logps/chosen": -274.35833740234375,
"logps/rejected": -341.5016174316406,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -6.786323547363281,
"rewards/margins": 24.36429786682129,
"rewards/rejected": -31.150623321533203,
"step": 3010
}
],
"logging_steps": 10,
"max_steps": 3012,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}