| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9997510580034852, |
| "eval_steps": 500, |
| "global_step": 3012, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.003319226620197494, |
| "grad_norm": 2.494830846786499, |
| "learning_rate": 9.9667994687915e-06, |
| "logits/chosen": -27.511184692382812, |
| "logits/rejected": -28.262775421142578, |
| "logps/chosen": -244.8615264892578, |
| "logps/rejected": -235.1686248779297, |
| "loss": 0.266, |
| "rewards/accuracies": 0.8999999761581421, |
| "rewards/chosen": 1.5318301916122437, |
| "rewards/margins": 1.4668998718261719, |
| "rewards/rejected": 0.06493023782968521, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.006638453240394988, |
| "grad_norm": 0.7127860188484192, |
| "learning_rate": 9.933598937583003e-06, |
| "logits/chosen": -31.031789779663086, |
| "logits/rejected": -31.7587890625, |
| "logps/chosen": -198.0988311767578, |
| "logps/rejected": -194.5559539794922, |
| "loss": 0.0449, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 3.054797649383545, |
| "rewards/margins": 3.3064472675323486, |
| "rewards/rejected": -0.2516496777534485, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.009957679860592481, |
| "grad_norm": 0.22338563203811646, |
| "learning_rate": 9.900398406374503e-06, |
| "logits/chosen": -30.917648315429688, |
| "logits/rejected": -31.401653289794922, |
| "logps/chosen": -230.81179809570312, |
| "logps/rejected": -231.5104217529297, |
| "loss": 0.015, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.695403575897217, |
| "rewards/margins": 4.529758453369141, |
| "rewards/rejected": -1.834355115890503, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.013276906480789975, |
| "grad_norm": 0.11917012184858322, |
| "learning_rate": 9.867197875166004e-06, |
| "logits/chosen": -32.53432083129883, |
| "logits/rejected": -33.02833938598633, |
| "logps/chosen": -243.38931274414062, |
| "logps/rejected": -249.16824340820312, |
| "loss": 0.0044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.072178363800049, |
| "rewards/margins": 6.1943559646606445, |
| "rewards/rejected": -4.122177600860596, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.01659613310098747, |
| "grad_norm": 0.03350254148244858, |
| "learning_rate": 9.833997343957504e-06, |
| "logits/chosen": -33.12944030761719, |
| "logits/rejected": -33.550689697265625, |
| "logps/chosen": -231.4219970703125, |
| "logps/rejected": -241.4237823486328, |
| "loss": 0.0017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.7218177318572998, |
| "rewards/margins": 7.393080711364746, |
| "rewards/rejected": -5.671263217926025, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.019915359721184963, |
| "grad_norm": 0.04106535390019417, |
| "learning_rate": 9.800796812749004e-06, |
| "logits/chosen": -33.08533477783203, |
| "logits/rejected": -33.47816848754883, |
| "logps/chosen": -215.3968505859375, |
| "logps/rejected": -229.9862518310547, |
| "loss": 0.0007, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.6401796340942383, |
| "rewards/margins": 8.768369674682617, |
| "rewards/rejected": -7.128190517425537, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02323458634138246, |
| "grad_norm": 0.015592047944664955, |
| "learning_rate": 9.767596281540506e-06, |
| "logits/chosen": -33.04099655151367, |
| "logits/rejected": -33.49821853637695, |
| "logps/chosen": -261.6057434082031, |
| "logps/rejected": -281.25140380859375, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20642319321632385, |
| "rewards/margins": 10.172657012939453, |
| "rewards/rejected": -9.966233253479004, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.02655381296157995, |
| "grad_norm": 0.01825164072215557, |
| "learning_rate": 9.734395750332006e-06, |
| "logits/chosen": -33.06761169433594, |
| "logits/rejected": -33.45521545410156, |
| "logps/chosen": -222.248779296875, |
| "logps/rejected": -241.6016082763672, |
| "loss": 0.0004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2139157056808472, |
| "rewards/margins": 10.25539493560791, |
| "rewards/rejected": -9.04148006439209, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.029873039581777446, |
| "grad_norm": 0.009622551500797272, |
| "learning_rate": 9.701195219123508e-06, |
| "logits/chosen": -34.399051666259766, |
| "logits/rejected": -34.781593322753906, |
| "logps/chosen": -212.21316528320312, |
| "logps/rejected": -235.27783203125, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.9530667066574097, |
| "rewards/margins": 11.376951217651367, |
| "rewards/rejected": -10.423883438110352, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03319226620197494, |
| "grad_norm": 0.0019461100455373526, |
| "learning_rate": 9.667994687915008e-06, |
| "logits/chosen": -33.364990234375, |
| "logits/rejected": -33.812992095947266, |
| "logps/chosen": -249.4156494140625, |
| "logps/rejected": -274.4363098144531, |
| "loss": 0.0003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.25879842042922974, |
| "rewards/margins": 11.992888450622559, |
| "rewards/rejected": -11.734089851379395, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.036511492822172434, |
| "grad_norm": 0.007654547691345215, |
| "learning_rate": 9.634794156706508e-06, |
| "logits/chosen": -34.42128372192383, |
| "logits/rejected": -34.87459182739258, |
| "logps/chosen": -234.20126342773438, |
| "logps/rejected": -258.58172607421875, |
| "loss": 0.0002, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7224393486976624, |
| "rewards/margins": 11.716227531433105, |
| "rewards/rejected": -10.99378776550293, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.039830719442369926, |
| "grad_norm": 0.003787972964346409, |
| "learning_rate": 9.60159362549801e-06, |
| "logits/chosen": -34.04184341430664, |
| "logits/rejected": -34.68623733520508, |
| "logps/chosen": -235.21005249023438, |
| "logps/rejected": -263.50030517578125, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.45280832052230835, |
| "rewards/margins": 12.783775329589844, |
| "rewards/rejected": -13.236584663391113, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.043149946062567425, |
| "grad_norm": 0.0027999032754451036, |
| "learning_rate": 9.56839309428951e-06, |
| "logits/chosen": -34.30641174316406, |
| "logits/rejected": -34.804931640625, |
| "logps/chosen": -239.53280639648438, |
| "logps/rejected": -269.2521057128906, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6962872743606567, |
| "rewards/margins": 13.20808219909668, |
| "rewards/rejected": -13.90437126159668, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.04646917268276492, |
| "grad_norm": 0.004512485582381487, |
| "learning_rate": 9.535192563081011e-06, |
| "logits/chosen": -34.37498474121094, |
| "logits/rejected": -34.75019454956055, |
| "logps/chosen": -230.13546752929688, |
| "logps/rejected": -257.3545837402344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.05627549812197685, |
| "rewards/margins": 12.560346603393555, |
| "rewards/rejected": -12.616622924804688, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.04978839930296241, |
| "grad_norm": 0.00021050537179689854, |
| "learning_rate": 9.501992031872511e-06, |
| "logits/chosen": -34.44464111328125, |
| "logits/rejected": -34.87554931640625, |
| "logps/chosen": -255.37686157226562, |
| "logps/rejected": -287.8848571777344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9349029064178467, |
| "rewards/margins": 14.170918464660645, |
| "rewards/rejected": -16.10582160949707, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.0531076259231599, |
| "grad_norm": 0.007320842240005732, |
| "learning_rate": 9.468791500664011e-06, |
| "logits/chosen": -35.74467086791992, |
| "logits/rejected": -36.10135269165039, |
| "logps/chosen": -210.03109741210938, |
| "logps/rejected": -240.8317108154297, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.17547592520713806, |
| "rewards/margins": 13.636068344116211, |
| "rewards/rejected": -13.811543464660645, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.0564268525433574, |
| "grad_norm": 0.0010633851634338498, |
| "learning_rate": 9.435590969455513e-06, |
| "logits/chosen": -34.923927307128906, |
| "logits/rejected": -35.52933883666992, |
| "logps/chosen": -231.2039794921875, |
| "logps/rejected": -261.8594665527344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06495578587055206, |
| "rewards/margins": 13.507354736328125, |
| "rewards/rejected": -13.572309494018555, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.05974607916355489, |
| "grad_norm": 0.006778092123568058, |
| "learning_rate": 9.402390438247013e-06, |
| "logits/chosen": -33.77501678466797, |
| "logits/rejected": -34.322486877441406, |
| "logps/chosen": -252.57046508789062, |
| "logps/rejected": -286.8854064941406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6652179956436157, |
| "rewards/margins": 14.697219848632812, |
| "rewards/rejected": -16.362438201904297, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06306530578375238, |
| "grad_norm": 0.009588481858372688, |
| "learning_rate": 9.369189907038513e-06, |
| "logits/chosen": -33.23898696899414, |
| "logits/rejected": -33.55398178100586, |
| "logps/chosen": -249.4516143798828, |
| "logps/rejected": -283.144287109375, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.155587911605835, |
| "rewards/margins": 14.480944633483887, |
| "rewards/rejected": -15.636533737182617, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06638453240394988, |
| "grad_norm": 0.0020435138139873743, |
| "learning_rate": 9.335989375830013e-06, |
| "logits/chosen": -34.49266815185547, |
| "logits/rejected": -35.19890213012695, |
| "logps/chosen": -260.21063232421875, |
| "logps/rejected": -294.55059814453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3676701784133911, |
| "rewards/margins": 14.618782043457031, |
| "rewards/rejected": -15.986452102661133, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.06970375902414737, |
| "grad_norm": 0.007681610994040966, |
| "learning_rate": 9.302788844621515e-06, |
| "logits/chosen": -35.03020095825195, |
| "logits/rejected": -35.49299621582031, |
| "logps/chosen": -238.98388671875, |
| "logps/rejected": -273.298583984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.093301773071289, |
| "rewards/margins": 14.62084674835205, |
| "rewards/rejected": -15.714147567749023, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07302298564434487, |
| "grad_norm": 0.0033791419118642807, |
| "learning_rate": 9.269588313413015e-06, |
| "logits/chosen": -35.38592529296875, |
| "logits/rejected": -36.00849533081055, |
| "logps/chosen": -260.4759826660156, |
| "logps/rejected": -298.17706298828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7291450500488281, |
| "rewards/margins": 15.51725959777832, |
| "rewards/rejected": -17.246402740478516, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.07634221226454237, |
| "grad_norm": 0.0003220026264898479, |
| "learning_rate": 9.236387782204516e-06, |
| "logits/chosen": -34.74597930908203, |
| "logits/rejected": -35.35438537597656, |
| "logps/chosen": -247.0140838623047, |
| "logps/rejected": -284.0188903808594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3175166845321655, |
| "rewards/margins": 15.613149642944336, |
| "rewards/rejected": -16.930665969848633, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.07966143888473985, |
| "grad_norm": 0.00033980957232415676, |
| "learning_rate": 9.203187250996016e-06, |
| "logits/chosen": -34.7844352722168, |
| "logits/rejected": -35.204917907714844, |
| "logps/chosen": -261.2896728515625, |
| "logps/rejected": -298.12152099609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.142932415008545, |
| "rewards/margins": 15.416728019714355, |
| "rewards/rejected": -17.559659957885742, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08298066550493735, |
| "grad_norm": 0.006538284942507744, |
| "learning_rate": 9.169986719787516e-06, |
| "logits/chosen": -35.57487487792969, |
| "logits/rejected": -36.020084381103516, |
| "logps/chosen": -253.6564483642578, |
| "logps/rejected": -289.6921691894531, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6668405532836914, |
| "rewards/margins": 15.152114868164062, |
| "rewards/rejected": -16.818954467773438, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.08629989212513485, |
| "grad_norm": 0.0009295985219068825, |
| "learning_rate": 9.136786188579018e-06, |
| "logits/chosen": -35.48679733276367, |
| "logits/rejected": -36.16688919067383, |
| "logps/chosen": -223.3815155029297, |
| "logps/rejected": -260.7580261230469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.45010191202163696, |
| "rewards/margins": 15.585649490356445, |
| "rewards/rejected": -16.035751342773438, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.08961911874533234, |
| "grad_norm": 0.00021262650261633098, |
| "learning_rate": 9.103585657370518e-06, |
| "logits/chosen": -33.49461364746094, |
| "logits/rejected": -34.041141510009766, |
| "logps/chosen": -221.56228637695312, |
| "logps/rejected": -258.98822021484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5517559051513672, |
| "rewards/margins": 15.583239555358887, |
| "rewards/rejected": -16.134998321533203, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09293834536552983, |
| "grad_norm": 0.0004733486275654286, |
| "learning_rate": 9.07038512616202e-06, |
| "logits/chosen": -33.337162017822266, |
| "logits/rejected": -33.851051330566406, |
| "logps/chosen": -244.8498077392578, |
| "logps/rejected": -283.07763671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3852789402008057, |
| "rewards/margins": 15.907007217407227, |
| "rewards/rejected": -17.292285919189453, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.09625757198572732, |
| "grad_norm": 0.00015478464774787426, |
| "learning_rate": 9.03718459495352e-06, |
| "logits/chosen": -35.541263580322266, |
| "logits/rejected": -36.11621856689453, |
| "logps/chosen": -232.34432983398438, |
| "logps/rejected": -272.0796813964844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8322229385375977, |
| "rewards/margins": 16.295772552490234, |
| "rewards/rejected": -17.12799644470215, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.09957679860592482, |
| "grad_norm": 0.002806061180308461, |
| "learning_rate": 9.00398406374502e-06, |
| "logits/chosen": -34.066505432128906, |
| "logits/rejected": -34.60965347290039, |
| "logps/chosen": -268.8088684082031, |
| "logps/rejected": -312.8736877441406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.632227659225464, |
| "rewards/margins": 17.530643463134766, |
| "rewards/rejected": -20.16286849975586, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10289602522612232, |
| "grad_norm": 0.0005554874078370631, |
| "learning_rate": 8.970783532536521e-06, |
| "logits/chosen": -34.408775329589844, |
| "logits/rejected": -35.35196304321289, |
| "logps/chosen": -229.3350067138672, |
| "logps/rejected": -270.11822509765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1011072397232056, |
| "rewards/margins": 16.528867721557617, |
| "rewards/rejected": -17.629976272583008, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.1062152518463198, |
| "grad_norm": 0.005228464491665363, |
| "learning_rate": 8.937583001328021e-06, |
| "logits/chosen": -34.59056854248047, |
| "logits/rejected": -35.54304504394531, |
| "logps/chosen": -223.6347198486328, |
| "logps/rejected": -263.6668395996094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2593634128570557, |
| "rewards/margins": 16.28620147705078, |
| "rewards/rejected": -17.545564651489258, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.1095344784665173, |
| "grad_norm": 0.0006935601704753935, |
| "learning_rate": 8.904382470119523e-06, |
| "logits/chosen": -35.739097595214844, |
| "logits/rejected": -36.549564361572266, |
| "logps/chosen": -238.09091186523438, |
| "logps/rejected": -280.36492919921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.1441192626953125, |
| "rewards/margins": 16.90723419189453, |
| "rewards/rejected": -19.05135154724121, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.1128537050867148, |
| "grad_norm": 0.00026731210527941585, |
| "learning_rate": 8.871181938911023e-06, |
| "logits/chosen": -33.79966735839844, |
| "logits/rejected": -34.6407356262207, |
| "logps/chosen": -216.0535888671875, |
| "logps/rejected": -253.9859619140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6571240425109863, |
| "rewards/margins": 15.740079879760742, |
| "rewards/rejected": -16.39720344543457, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.11617293170691229, |
| "grad_norm": 3.3017222449416295e-05, |
| "learning_rate": 8.837981407702523e-06, |
| "logits/chosen": -34.348304748535156, |
| "logits/rejected": -35.16481399536133, |
| "logps/chosen": -238.8940887451172, |
| "logps/rejected": -280.3075866699219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8426218032836914, |
| "rewards/margins": 16.77206039428711, |
| "rewards/rejected": -18.614681243896484, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.11949215832710978, |
| "grad_norm": 0.0004305043548811227, |
| "learning_rate": 8.804780876494025e-06, |
| "logits/chosen": -35.531944274902344, |
| "logits/rejected": -36.35076904296875, |
| "logps/chosen": -238.10140991210938, |
| "logps/rejected": -280.59619140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7624365091323853, |
| "rewards/margins": 17.082622528076172, |
| "rewards/rejected": -18.84505844116211, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.12281138494730728, |
| "grad_norm": 4.115190313314088e-05, |
| "learning_rate": 8.771580345285525e-06, |
| "logits/chosen": -34.63116455078125, |
| "logits/rejected": -35.59693908691406, |
| "logps/chosen": -265.68389892578125, |
| "logps/rejected": -310.3666076660156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.659745693206787, |
| "rewards/margins": 17.709857940673828, |
| "rewards/rejected": -20.36960220336914, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.12613061156750477, |
| "grad_norm": 0.0020732053089886904, |
| "learning_rate": 8.738379814077027e-06, |
| "logits/chosen": -35.7321662902832, |
| "logits/rejected": -36.6037483215332, |
| "logps/chosen": -274.5724792480469, |
| "logps/rejected": -320.93438720703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.224485397338867, |
| "rewards/margins": 18.24944496154785, |
| "rewards/rejected": -21.47393035888672, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.12944983818770225, |
| "grad_norm": 0.0006749048479832709, |
| "learning_rate": 8.705179282868527e-06, |
| "logits/chosen": -34.394935607910156, |
| "logits/rejected": -35.65528106689453, |
| "logps/chosen": -221.21981811523438, |
| "logps/rejected": -263.59820556640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.179241418838501, |
| "rewards/margins": 17.081579208374023, |
| "rewards/rejected": -18.260822296142578, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.13276906480789977, |
| "grad_norm": 0.00046231126179918647, |
| "learning_rate": 8.671978751660027e-06, |
| "logits/chosen": -35.219017028808594, |
| "logits/rejected": -35.91897964477539, |
| "logps/chosen": -223.1756591796875, |
| "logps/rejected": -268.28399658203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4395145177841187, |
| "rewards/margins": 17.961841583251953, |
| "rewards/rejected": -19.401355743408203, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.13608829142809725, |
| "grad_norm": 0.002402970800176263, |
| "learning_rate": 8.638778220451528e-06, |
| "logits/chosen": -36.17115783691406, |
| "logits/rejected": -37.01081085205078, |
| "logps/chosen": -251.10897827148438, |
| "logps/rejected": -297.64837646484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4288759231567383, |
| "rewards/margins": 18.359947204589844, |
| "rewards/rejected": -20.7888240814209, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.13940751804829474, |
| "grad_norm": 0.0008737234747968614, |
| "learning_rate": 8.605577689243028e-06, |
| "logits/chosen": -35.6660270690918, |
| "logits/rejected": -36.46641159057617, |
| "logps/chosen": -243.93917846679688, |
| "logps/rejected": -289.4454345703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.152601718902588, |
| "rewards/margins": 18.12082862854004, |
| "rewards/rejected": -20.27342987060547, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.14272674466849225, |
| "grad_norm": 0.0008533377549611032, |
| "learning_rate": 8.57237715803453e-06, |
| "logits/chosen": -35.49746322631836, |
| "logits/rejected": -36.489322662353516, |
| "logps/chosen": -232.93820190429688, |
| "logps/rejected": -277.65216064453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.035013198852539, |
| "rewards/margins": 17.73954963684082, |
| "rewards/rejected": -19.77456283569336, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.14604597128868974, |
| "grad_norm": 0.0005299286567606032, |
| "learning_rate": 8.53917662682603e-06, |
| "logits/chosen": -35.89002227783203, |
| "logits/rejected": -37.11347198486328, |
| "logps/chosen": -244.32278442382812, |
| "logps/rejected": -290.68408203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.9033141136169434, |
| "rewards/margins": 18.262744903564453, |
| "rewards/rejected": -21.166057586669922, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.14936519790888722, |
| "grad_norm": 0.0007316975970752537, |
| "learning_rate": 8.50597609561753e-06, |
| "logits/chosen": -35.58525848388672, |
| "logits/rejected": -36.55177688598633, |
| "logps/chosen": -218.49252319335938, |
| "logps/rejected": -263.3166809082031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6088759899139404, |
| "rewards/margins": 17.73054313659668, |
| "rewards/rejected": -19.339420318603516, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.15268442452908473, |
| "grad_norm": 0.0001297222770517692, |
| "learning_rate": 8.472775564409032e-06, |
| "logits/chosen": -35.233123779296875, |
| "logits/rejected": -36.260986328125, |
| "logps/chosen": -235.96353149414062, |
| "logps/rejected": -281.82904052734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4060370922088623, |
| "rewards/margins": 18.074405670166016, |
| "rewards/rejected": -20.480443954467773, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.15600365114928222, |
| "grad_norm": 0.00011668611841741949, |
| "learning_rate": 8.439575033200532e-06, |
| "logits/chosen": -36.725379943847656, |
| "logits/rejected": -37.734004974365234, |
| "logps/chosen": -261.49835205078125, |
| "logps/rejected": -308.49957275390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4732394218444824, |
| "rewards/margins": 18.485958099365234, |
| "rewards/rejected": -20.959197998046875, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1593228777694797, |
| "grad_norm": 0.00016372502432204783, |
| "learning_rate": 8.406374501992033e-06, |
| "logits/chosen": -35.07146072387695, |
| "logits/rejected": -35.87404251098633, |
| "logps/chosen": -246.40573120117188, |
| "logps/rejected": -293.628662109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.4306976795196533, |
| "rewards/margins": 18.557546615600586, |
| "rewards/rejected": -20.988243103027344, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.16264210438967722, |
| "grad_norm": 0.008033442310988903, |
| "learning_rate": 8.373173970783533e-06, |
| "logits/chosen": -35.59102249145508, |
| "logits/rejected": -36.70330047607422, |
| "logps/chosen": -233.5152587890625, |
| "logps/rejected": -280.5353088378906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2914183139801025, |
| "rewards/margins": 18.466495513916016, |
| "rewards/rejected": -20.757911682128906, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1659613310098747, |
| "grad_norm": 0.0005811351002193987, |
| "learning_rate": 8.339973439575035e-06, |
| "logits/chosen": -34.38259506225586, |
| "logits/rejected": -35.335872650146484, |
| "logps/chosen": -245.3876953125, |
| "logps/rejected": -292.680419921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.7595839500427246, |
| "rewards/margins": 18.590240478515625, |
| "rewards/rejected": -21.349822998046875, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.1692805576300722, |
| "grad_norm": 0.00011935765360249206, |
| "learning_rate": 8.306772908366535e-06, |
| "logits/chosen": -35.616981506347656, |
| "logits/rejected": -36.57094955444336, |
| "logps/chosen": -245.680908203125, |
| "logps/rejected": -293.33575439453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.987445592880249, |
| "rewards/margins": 18.696874618530273, |
| "rewards/rejected": -20.68431854248047, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.1725997842502697, |
| "grad_norm": 0.00013885533553548157, |
| "learning_rate": 8.273572377158035e-06, |
| "logits/chosen": -35.12391662597656, |
| "logits/rejected": -35.98273468017578, |
| "logps/chosen": -233.94204711914062, |
| "logps/rejected": -281.42010498046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7768646478652954, |
| "rewards/margins": 18.548133850097656, |
| "rewards/rejected": -20.32499885559082, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.17591901087046719, |
| "grad_norm": 0.00012209195119794458, |
| "learning_rate": 8.240371845949537e-06, |
| "logits/chosen": -35.81105041503906, |
| "logits/rejected": -37.02412033081055, |
| "logps/chosen": -266.5361022949219, |
| "logps/rejected": -317.65216064453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5481743812561035, |
| "rewards/margins": 19.596721649169922, |
| "rewards/rejected": -23.144895553588867, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.17923823749066467, |
| "grad_norm": 3.902066600858234e-05, |
| "learning_rate": 8.207171314741037e-06, |
| "logits/chosen": -36.01176071166992, |
| "logits/rejected": -36.81584930419922, |
| "logps/chosen": -240.7890625, |
| "logps/rejected": -292.3703308105469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.82381534576416, |
| "rewards/margins": 19.824779510498047, |
| "rewards/rejected": -22.648595809936523, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.18255746411086216, |
| "grad_norm": 0.0014665070921182632, |
| "learning_rate": 8.173970783532539e-06, |
| "logits/chosen": -34.90106964111328, |
| "logits/rejected": -35.94670867919922, |
| "logps/chosen": -255.3992919921875, |
| "logps/rejected": -305.7498779296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2722907066345215, |
| "rewards/margins": 19.493640899658203, |
| "rewards/rejected": -22.765932083129883, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.18587669073105967, |
| "grad_norm": 0.0001265254250029102, |
| "learning_rate": 8.140770252324039e-06, |
| "logits/chosen": -36.034767150878906, |
| "logits/rejected": -37.131126403808594, |
| "logps/chosen": -241.76644897460938, |
| "logps/rejected": -290.7240905761719, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.036072254180908, |
| "rewards/margins": 19.04810905456543, |
| "rewards/rejected": -21.08418083190918, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.18919591735125715, |
| "grad_norm": 0.00034473679261282086, |
| "learning_rate": 8.107569721115539e-06, |
| "logits/chosen": -35.518226623535156, |
| "logits/rejected": -36.253692626953125, |
| "logps/chosen": -231.61618041992188, |
| "logps/rejected": -282.1851501464844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.092636823654175, |
| "rewards/margins": 19.56393814086914, |
| "rewards/rejected": -21.656574249267578, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.19251514397145464, |
| "grad_norm": 0.00030375979258678854, |
| "learning_rate": 8.074369189907039e-06, |
| "logits/chosen": -35.77162551879883, |
| "logits/rejected": -36.470115661621094, |
| "logps/chosen": -206.6995391845703, |
| "logps/rejected": -254.5850830078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6058409214019775, |
| "rewards/margins": 18.82034683227539, |
| "rewards/rejected": -20.42618751525879, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.19583437059165215, |
| "grad_norm": 1.3439960639516357e-05, |
| "learning_rate": 8.041168658698539e-06, |
| "logits/chosen": -34.774444580078125, |
| "logits/rejected": -35.688079833984375, |
| "logps/chosen": -259.68499755859375, |
| "logps/rejected": -309.86865234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.393587827682495, |
| "rewards/margins": 19.45305061340332, |
| "rewards/rejected": -22.846635818481445, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.19915359721184964, |
| "grad_norm": 4.3687683501048014e-05, |
| "learning_rate": 8.00796812749004e-06, |
| "logits/chosen": -34.41751480102539, |
| "logits/rejected": -35.5643310546875, |
| "logps/chosen": -239.58407592773438, |
| "logps/rejected": -288.7073974609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.1479239463806152, |
| "rewards/margins": 19.129615783691406, |
| "rewards/rejected": -21.277538299560547, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.20247282383204712, |
| "grad_norm": 1.1106268175353762e-05, |
| "learning_rate": 7.97476759628154e-06, |
| "logits/chosen": -36.191566467285156, |
| "logits/rejected": -37.139347076416016, |
| "logps/chosen": -255.52566528320312, |
| "logps/rejected": -305.6492614746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.047039031982422, |
| "rewards/margins": 19.359500885009766, |
| "rewards/rejected": -22.406539916992188, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.20579205045224463, |
| "grad_norm": 7.13270710548386e-05, |
| "learning_rate": 7.941567065073042e-06, |
| "logits/chosen": -34.59242248535156, |
| "logits/rejected": -35.580257415771484, |
| "logps/chosen": -251.13967895507812, |
| "logps/rejected": -303.52215576171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.805518865585327, |
| "rewards/margins": 20.101978302001953, |
| "rewards/rejected": -22.907493591308594, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.20911127707244212, |
| "grad_norm": 8.512644126312807e-05, |
| "learning_rate": 7.908366533864542e-06, |
| "logits/chosen": -35.026451110839844, |
| "logits/rejected": -36.46593475341797, |
| "logps/chosen": -235.6885528564453, |
| "logps/rejected": -285.78985595703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.588026762008667, |
| "rewards/margins": 19.415477752685547, |
| "rewards/rejected": -22.00350570678711, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.2124305036926396, |
| "grad_norm": 0.0017007539281621575, |
| "learning_rate": 7.875166002656042e-06, |
| "logits/chosen": -36.41635513305664, |
| "logits/rejected": -37.88849639892578, |
| "logps/chosen": -223.83935546875, |
| "logps/rejected": -275.52423095703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.7281689643859863, |
| "rewards/margins": 19.8160457611084, |
| "rewards/rejected": -22.544214248657227, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.21574973031283712, |
| "grad_norm": 5.1219332817709073e-05, |
| "learning_rate": 7.841965471447544e-06, |
| "logits/chosen": -36.743431091308594, |
| "logits/rejected": -37.54664611816406, |
| "logps/chosen": -223.3494110107422, |
| "logps/rejected": -274.8848571777344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.607266902923584, |
| "rewards/margins": 19.915449142456055, |
| "rewards/rejected": -22.522716522216797, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.2190689569330346, |
| "grad_norm": 0.0008978871628642082, |
| "learning_rate": 7.808764940239044e-06, |
| "logits/chosen": -35.73522186279297, |
| "logits/rejected": -36.919593811035156, |
| "logps/chosen": -249.9674835205078, |
| "logps/rejected": -301.19256591796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.4148049354553223, |
| "rewards/margins": 19.588640213012695, |
| "rewards/rejected": -23.00344467163086, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.2223881835532321, |
| "grad_norm": 0.0005557505646720529, |
| "learning_rate": 7.775564409030545e-06, |
| "logits/chosen": -35.63310241699219, |
| "logits/rejected": -36.968055725097656, |
| "logps/chosen": -234.2410125732422, |
| "logps/rejected": -285.2879333496094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3587982654571533, |
| "rewards/margins": 19.75788688659668, |
| "rewards/rejected": -23.11668586730957, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.2257074101734296, |
| "grad_norm": 0.0008148573688231409, |
| "learning_rate": 7.742363877822045e-06, |
| "logits/chosen": -35.244285583496094, |
| "logits/rejected": -36.04697799682617, |
| "logps/chosen": -239.9448699951172, |
| "logps/rejected": -293.1563415527344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.6865572929382324, |
| "rewards/margins": 20.494766235351562, |
| "rewards/rejected": -23.18132209777832, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2290266367936271, |
| "grad_norm": 0.0003090895479544997, |
| "learning_rate": 7.709163346613547e-06, |
| "logits/chosen": -35.87933349609375, |
| "logits/rejected": -37.27144241333008, |
| "logps/chosen": -245.03125, |
| "logps/rejected": -296.410400390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.099365234375, |
| "rewards/margins": 19.793514251708984, |
| "rewards/rejected": -22.892879486083984, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.23234586341382457, |
| "grad_norm": 0.0007792682736180723, |
| "learning_rate": 7.675962815405047e-06, |
| "logits/chosen": -35.90936279296875, |
| "logits/rejected": -37.2296142578125, |
| "logps/chosen": -244.5153045654297, |
| "logps/rejected": -296.6192321777344, |
| "loss": 0.0001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.0061705112457275, |
| "rewards/margins": 19.91374397277832, |
| "rewards/rejected": -22.91991424560547, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.23566509003402208, |
| "grad_norm": 0.0003932374238502234, |
| "learning_rate": 7.642762284196547e-06, |
| "logits/chosen": -35.35771942138672, |
| "logits/rejected": -36.759765625, |
| "logps/chosen": -249.65048217773438, |
| "logps/rejected": -299.1029357910156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.5528416633605957, |
| "rewards/margins": 19.263607025146484, |
| "rewards/rejected": -22.816450119018555, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.23898431665421957, |
| "grad_norm": 5.749016418121755e-05, |
| "learning_rate": 7.609561752988048e-06, |
| "logits/chosen": -35.59196090698242, |
| "logits/rejected": -37.17299270629883, |
| "logps/chosen": -248.39657592773438, |
| "logps/rejected": -299.969482421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.843963146209717, |
| "rewards/margins": 19.887401580810547, |
| "rewards/rejected": -23.73136329650879, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.24230354327441705, |
| "grad_norm": 1.9144599718856625e-05, |
| "learning_rate": 7.576361221779549e-06, |
| "logits/chosen": -34.93134307861328, |
| "logits/rejected": -36.642024993896484, |
| "logps/chosen": -249.6949462890625, |
| "logps/rejected": -304.68865966796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.464253902435303, |
| "rewards/margins": 21.0100154876709, |
| "rewards/rejected": -25.47426986694336, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.24562276989461457, |
| "grad_norm": 1.135500042437343e-05, |
| "learning_rate": 7.54316069057105e-06, |
| "logits/chosen": -36.011444091796875, |
| "logits/rejected": -37.811126708984375, |
| "logps/chosen": -261.965576171875, |
| "logps/rejected": -317.7340087890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.150547027587891, |
| "rewards/margins": 21.09018325805664, |
| "rewards/rejected": -26.2407283782959, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.24894199651481205, |
| "grad_norm": 0.0003412498044781387, |
| "learning_rate": 7.5099601593625505e-06, |
| "logits/chosen": -35.549930572509766, |
| "logits/rejected": -36.97046661376953, |
| "logps/chosen": -260.5018615722656, |
| "logps/rejected": -313.27996826171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.216989040374756, |
| "rewards/margins": 20.170087814331055, |
| "rewards/rejected": -25.387075424194336, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.25226122313500954, |
| "grad_norm": 0.0001097567001124844, |
| "learning_rate": 7.476759628154051e-06, |
| "logits/chosen": -35.34148406982422, |
| "logits/rejected": -37.208404541015625, |
| "logps/chosen": -249.30068969726562, |
| "logps/rejected": -302.40704345703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.520068645477295, |
| "rewards/margins": 20.316801071166992, |
| "rewards/rejected": -24.836868286132812, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.25558044975520705, |
| "grad_norm": 0.0003229718713555485, |
| "learning_rate": 7.443559096945551e-06, |
| "logits/chosen": -36.1650390625, |
| "logits/rejected": -38.04664611816406, |
| "logps/chosen": -240.16683959960938, |
| "logps/rejected": -290.6697998046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.272226810455322, |
| "rewards/margins": 19.5815372467041, |
| "rewards/rejected": -23.853763580322266, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.2588996763754045, |
| "grad_norm": 0.00028919236501678824, |
| "learning_rate": 7.410358565737052e-06, |
| "logits/chosen": -35.1163444519043, |
| "logits/rejected": -37.21973419189453, |
| "logps/chosen": -280.9646911621094, |
| "logps/rejected": -339.1036682128906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.527701377868652, |
| "rewards/margins": 21.770851135253906, |
| "rewards/rejected": -28.29854965209961, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.262218902995602, |
| "grad_norm": 1.0552365665716934e-06, |
| "learning_rate": 7.377158034528553e-06, |
| "logits/chosen": -36.21448516845703, |
| "logits/rejected": -37.57536315917969, |
| "logps/chosen": -268.54876708984375, |
| "logps/rejected": -326.67742919921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.952622890472412, |
| "rewards/margins": 21.72669792175293, |
| "rewards/rejected": -27.6793212890625, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.26553812961579953, |
| "grad_norm": 0.002526302356272936, |
| "learning_rate": 7.343957503320054e-06, |
| "logits/chosen": -35.75117492675781, |
| "logits/rejected": -37.249168395996094, |
| "logps/chosen": -255.9884796142578, |
| "logps/rejected": -310.1398010253906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.9749069213867188, |
| "rewards/margins": 20.556264877319336, |
| "rewards/rejected": -24.531173706054688, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.268857356235997, |
| "grad_norm": 6.492368993349373e-05, |
| "learning_rate": 7.310756972111555e-06, |
| "logits/chosen": -37.125267028808594, |
| "logits/rejected": -38.65117263793945, |
| "logps/chosen": -247.0055389404297, |
| "logps/rejected": -303.65435791015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.017395973205566, |
| "rewards/margins": 21.206653594970703, |
| "rewards/rejected": -26.224048614501953, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.2721765828561945, |
| "grad_norm": 0.00018094113329425454, |
| "learning_rate": 7.277556440903056e-06, |
| "logits/chosen": -36.480506896972656, |
| "logits/rejected": -38.204551696777344, |
| "logps/chosen": -259.48321533203125, |
| "logps/rejected": -314.32977294921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.440280437469482, |
| "rewards/margins": 20.785701751708984, |
| "rewards/rejected": -25.225984573364258, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.275495809476392, |
| "grad_norm": 7.49337486922741e-05, |
| "learning_rate": 7.244355909694556e-06, |
| "logits/chosen": -34.9041633605957, |
| "logits/rejected": -36.328147888183594, |
| "logps/chosen": -253.56204223632812, |
| "logps/rejected": -308.58465576171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.441735744476318, |
| "rewards/margins": 20.866907119750977, |
| "rewards/rejected": -25.308639526367188, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.2788150360965895, |
| "grad_norm": 7.805244240444154e-05, |
| "learning_rate": 7.2111553784860565e-06, |
| "logits/chosen": -37.03504180908203, |
| "logits/rejected": -39.16151809692383, |
| "logps/chosen": -238.0412139892578, |
| "logps/rejected": -291.0369567871094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.6084489822387695, |
| "rewards/margins": 20.149539947509766, |
| "rewards/rejected": -24.75798988342285, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.282134262716787, |
| "grad_norm": 4.934472235618159e-05, |
| "learning_rate": 7.177954847277557e-06, |
| "logits/chosen": -35.14350509643555, |
| "logits/rejected": -36.96980285644531, |
| "logps/chosen": -226.21932983398438, |
| "logps/rejected": -278.470703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.3487021923065186, |
| "rewards/margins": 20.134754180908203, |
| "rewards/rejected": -23.483455657958984, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.2854534893369845, |
| "grad_norm": 4.3870386434718966e-05, |
| "learning_rate": 7.144754316069058e-06, |
| "logits/chosen": -33.659706115722656, |
| "logits/rejected": -35.260860443115234, |
| "logps/chosen": -292.42681884765625, |
| "logps/rejected": -348.8122253417969, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.7106709480285645, |
| "rewards/margins": 21.273283004760742, |
| "rewards/rejected": -26.98395347595215, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.28877271595718196, |
| "grad_norm": 0.00014428362192120403, |
| "learning_rate": 7.111553784860559e-06, |
| "logits/chosen": -36.988136291503906, |
| "logits/rejected": -38.66490173339844, |
| "logps/chosen": -241.1929168701172, |
| "logps/rejected": -297.6890869140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.3865532875061035, |
| "rewards/margins": 21.200180053710938, |
| "rewards/rejected": -25.586734771728516, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.29209194257737947, |
| "grad_norm": 1.8365805090070353e-06, |
| "learning_rate": 7.078353253652059e-06, |
| "logits/chosen": -35.72251892089844, |
| "logits/rejected": -37.48300552368164, |
| "logps/chosen": -265.85968017578125, |
| "logps/rejected": -321.50103759765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.265963554382324, |
| "rewards/margins": 20.955617904663086, |
| "rewards/rejected": -26.221582412719727, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.295411169197577, |
| "grad_norm": 0.0001635378139326349, |
| "learning_rate": 7.04515272244356e-06, |
| "logits/chosen": -34.546783447265625, |
| "logits/rejected": -36.24065399169922, |
| "logps/chosen": -262.6241149902344, |
| "logps/rejected": -318.17755126953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.923575401306152, |
| "rewards/margins": 21.00967025756836, |
| "rewards/rejected": -25.933246612548828, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.29873039581777444, |
| "grad_norm": 4.392620030557737e-05, |
| "learning_rate": 7.011952191235061e-06, |
| "logits/chosen": -35.98107147216797, |
| "logits/rejected": -38.07038497924805, |
| "logps/chosen": -262.16668701171875, |
| "logps/rejected": -319.5645751953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.970273494720459, |
| "rewards/margins": 21.547801971435547, |
| "rewards/rejected": -26.5180721282959, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.30204962243797195, |
| "grad_norm": 2.0514065909083e-05, |
| "learning_rate": 6.978751660026562e-06, |
| "logits/chosen": -37.26417922973633, |
| "logits/rejected": -39.03954315185547, |
| "logps/chosen": -227.80126953125, |
| "logps/rejected": -282.78240966796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.066802501678467, |
| "rewards/margins": 20.91775131225586, |
| "rewards/rejected": -24.98455238342285, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.30536884905816947, |
| "grad_norm": 1.58273360284511e-05, |
| "learning_rate": 6.9455511288180625e-06, |
| "logits/chosen": -36.633819580078125, |
| "logits/rejected": -38.505821228027344, |
| "logps/chosen": -275.9680480957031, |
| "logps/rejected": -333.1492614746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.068737983703613, |
| "rewards/margins": 21.3752498626709, |
| "rewards/rejected": -27.443988800048828, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3086880756783669, |
| "grad_norm": 1.6043051800807007e-05, |
| "learning_rate": 6.9123505976095625e-06, |
| "logits/chosen": -35.964290618896484, |
| "logits/rejected": -38.0440559387207, |
| "logps/chosen": -258.4613342285156, |
| "logps/rejected": -313.12127685546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.635455131530762, |
| "rewards/margins": 20.710844039916992, |
| "rewards/rejected": -25.34630012512207, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.31200730229856444, |
| "grad_norm": 4.5550634240498766e-05, |
| "learning_rate": 6.879150066401063e-06, |
| "logits/chosen": -36.91032409667969, |
| "logits/rejected": -38.867332458496094, |
| "logps/chosen": -226.87857055664062, |
| "logps/rejected": -282.0513000488281, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.517120361328125, |
| "rewards/margins": 20.991397857666016, |
| "rewards/rejected": -24.50851821899414, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.31532652891876195, |
| "grad_norm": 1.3055984709353652e-05, |
| "learning_rate": 6.845949535192563e-06, |
| "logits/chosen": -37.46794128417969, |
| "logits/rejected": -39.282188415527344, |
| "logps/chosen": -231.68319702148438, |
| "logps/rejected": -287.63372802734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.084538459777832, |
| "rewards/margins": 21.120777130126953, |
| "rewards/rejected": -25.205312728881836, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.3186457555389594, |
| "grad_norm": 5.750143827754073e-05, |
| "learning_rate": 6.812749003984063e-06, |
| "logits/chosen": -37.20722961425781, |
| "logits/rejected": -38.91963195800781, |
| "logps/chosen": -247.7166748046875, |
| "logps/rejected": -307.2261657714844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.870044708251953, |
| "rewards/margins": 22.097675323486328, |
| "rewards/rejected": -26.967721939086914, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3219649821591569, |
| "grad_norm": 1.0725473657657858e-05, |
| "learning_rate": 6.779548472775564e-06, |
| "logits/chosen": -38.22545623779297, |
| "logits/rejected": -40.045082092285156, |
| "logps/chosen": -227.6758575439453, |
| "logps/rejected": -285.1622619628906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.086358547210693, |
| "rewards/margins": 21.647619247436523, |
| "rewards/rejected": -25.73398208618164, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.32528420877935443, |
| "grad_norm": 6.18934936937876e-05, |
| "learning_rate": 6.746347941567065e-06, |
| "logits/chosen": -36.12615203857422, |
| "logits/rejected": -38.44302749633789, |
| "logps/chosen": -243.6610565185547, |
| "logps/rejected": -301.9700012207031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.701846122741699, |
| "rewards/margins": 21.918615341186523, |
| "rewards/rejected": -26.620458602905273, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3286034353995519, |
| "grad_norm": 1.2099483683414292e-05, |
| "learning_rate": 6.713147410358566e-06, |
| "logits/chosen": -36.2269172668457, |
| "logits/rejected": -38.308746337890625, |
| "logps/chosen": -249.517333984375, |
| "logps/rejected": -304.70123291015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.904040813446045, |
| "rewards/margins": 21.009593963623047, |
| "rewards/rejected": -25.913631439208984, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3319226620197494, |
| "grad_norm": 2.5862636903184466e-05, |
| "learning_rate": 6.679946879150067e-06, |
| "logits/chosen": -36.43896484375, |
| "logits/rejected": -38.08635711669922, |
| "logps/chosen": -267.87164306640625, |
| "logps/rejected": -327.8034362792969, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.200272560119629, |
| "rewards/margins": 22.356639862060547, |
| "rewards/rejected": -27.556909561157227, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.3352418886399469, |
| "grad_norm": 6.518360805785051e-06, |
| "learning_rate": 6.646746347941568e-06, |
| "logits/chosen": -37.341712951660156, |
| "logits/rejected": -39.31797790527344, |
| "logps/chosen": -251.332275390625, |
| "logps/rejected": -309.8183898925781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.140790939331055, |
| "rewards/margins": 21.90488052368164, |
| "rewards/rejected": -27.045673370361328, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.3385611152601444, |
| "grad_norm": 1.5175602129602339e-05, |
| "learning_rate": 6.613545816733068e-06, |
| "logits/chosen": -35.90704345703125, |
| "logits/rejected": -37.437522888183594, |
| "logps/chosen": -275.33441162109375, |
| "logps/rejected": -335.77117919921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.6894025802612305, |
| "rewards/margins": 22.447458267211914, |
| "rewards/rejected": -28.136859893798828, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 0.00012730048911180347, |
| "learning_rate": 6.5803452855245685e-06, |
| "logits/chosen": -35.909645080566406, |
| "logits/rejected": -38.034759521484375, |
| "logps/chosen": -265.9467468261719, |
| "logps/rejected": -325.89459228515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.635085105895996, |
| "rewards/margins": 22.229894638061523, |
| "rewards/rejected": -27.864978790283203, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.3451995685005394, |
| "grad_norm": 1.0754079994512722e-05, |
| "learning_rate": 6.547144754316069e-06, |
| "logits/chosen": -36.612430572509766, |
| "logits/rejected": -38.43956756591797, |
| "logps/chosen": -257.3985595703125, |
| "logps/rejected": -317.87518310546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.553804874420166, |
| "rewards/margins": 22.50185775756836, |
| "rewards/rejected": -27.0556640625, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.34851879512073686, |
| "grad_norm": 0.00021651283896062523, |
| "learning_rate": 6.51394422310757e-06, |
| "logits/chosen": -36.52727508544922, |
| "logits/rejected": -38.221580505371094, |
| "logps/chosen": -248.41140747070312, |
| "logps/rejected": -308.6651611328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.484927177429199, |
| "rewards/margins": 22.511966705322266, |
| "rewards/rejected": -26.996891021728516, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.35183802174093437, |
| "grad_norm": 0.00016664496797602624, |
| "learning_rate": 6.480743691899071e-06, |
| "logits/chosen": -36.22939682006836, |
| "logits/rejected": -38.311370849609375, |
| "logps/chosen": -246.49362182617188, |
| "logps/rejected": -305.3147888183594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.3643717765808105, |
| "rewards/margins": 21.965564727783203, |
| "rewards/rejected": -26.329936981201172, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3551572483611319, |
| "grad_norm": 7.72338462411426e-05, |
| "learning_rate": 6.447543160690571e-06, |
| "logits/chosen": -36.22492980957031, |
| "logits/rejected": -38.05788803100586, |
| "logps/chosen": -249.41928100585938, |
| "logps/rejected": -306.9188232421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.453120708465576, |
| "rewards/margins": 21.600210189819336, |
| "rewards/rejected": -27.053333282470703, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.35847647498132934, |
| "grad_norm": 2.7300588044454344e-05, |
| "learning_rate": 6.414342629482072e-06, |
| "logits/chosen": -35.102294921875, |
| "logits/rejected": -37.027076721191406, |
| "logps/chosen": -251.52194213867188, |
| "logps/rejected": -308.4041748046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.3340044021606445, |
| "rewards/margins": 21.424251556396484, |
| "rewards/rejected": -25.758255004882812, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.36179570160152685, |
| "grad_norm": 8.087195965345018e-06, |
| "learning_rate": 6.381142098273573e-06, |
| "logits/chosen": -35.54331588745117, |
| "logits/rejected": -37.03045654296875, |
| "logps/chosen": -274.4892578125, |
| "logps/rejected": -336.3668518066406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.781479358673096, |
| "rewards/margins": 22.853675842285156, |
| "rewards/rejected": -28.63515281677246, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.3651149282217243, |
| "grad_norm": 9.18296427698806e-06, |
| "learning_rate": 6.347941567065074e-06, |
| "logits/chosen": -36.24702072143555, |
| "logits/rejected": -38.01585388183594, |
| "logps/chosen": -245.00448608398438, |
| "logps/rejected": -303.3841552734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.3061394691467285, |
| "rewards/margins": 21.940967559814453, |
| "rewards/rejected": -26.247106552124023, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3684341548419218, |
| "grad_norm": 0.0001284556492464617, |
| "learning_rate": 6.3147410358565745e-06, |
| "logits/chosen": -35.398197174072266, |
| "logits/rejected": -36.98078536987305, |
| "logps/chosen": -260.3328857421875, |
| "logps/rejected": -318.5396423339844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.786358833312988, |
| "rewards/margins": 21.85409927368164, |
| "rewards/rejected": -26.640457153320312, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.37175338146211934, |
| "grad_norm": 9.004733874462545e-05, |
| "learning_rate": 6.2815405046480745e-06, |
| "logits/chosen": -35.57320785522461, |
| "logits/rejected": -37.42049026489258, |
| "logps/chosen": -239.71749877929688, |
| "logps/rejected": -300.3533630371094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.225986003875732, |
| "rewards/margins": 22.552154541015625, |
| "rewards/rejected": -26.778141021728516, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.3750726080823168, |
| "grad_norm": 2.8532302167150192e-05, |
| "learning_rate": 6.248339973439575e-06, |
| "logits/chosen": -36.609615325927734, |
| "logits/rejected": -38.67702865600586, |
| "logps/chosen": -245.4535675048828, |
| "logps/rejected": -304.02178955078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.093851566314697, |
| "rewards/margins": 21.945655822753906, |
| "rewards/rejected": -26.039508819580078, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.3783918347025143, |
| "grad_norm": 0.00011670405365293846, |
| "learning_rate": 6.215139442231076e-06, |
| "logits/chosen": -36.06822967529297, |
| "logits/rejected": -38.279300689697266, |
| "logps/chosen": -262.287841796875, |
| "logps/rejected": -324.5474548339844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.206493377685547, |
| "rewards/margins": 23.023685455322266, |
| "rewards/rejected": -29.230178833007812, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3817110613227118, |
| "grad_norm": 1.6790625522844493e-05, |
| "learning_rate": 6.181938911022577e-06, |
| "logits/chosen": -36.757450103759766, |
| "logits/rejected": -38.45673370361328, |
| "logps/chosen": -251.1743927001953, |
| "logps/rejected": -310.25323486328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.7559704780578613, |
| "rewards/margins": 22.096744537353516, |
| "rewards/rejected": -25.852712631225586, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3850302879429093, |
| "grad_norm": 0.0018879029667004943, |
| "learning_rate": 6.148738379814078e-06, |
| "logits/chosen": -35.55485916137695, |
| "logits/rejected": -37.381107330322266, |
| "logps/chosen": -246.38583374023438, |
| "logps/rejected": -306.12286376953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.242562770843506, |
| "rewards/margins": 22.17359161376953, |
| "rewards/rejected": -26.416152954101562, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.3883495145631068, |
| "grad_norm": 0.00015840095875319093, |
| "learning_rate": 6.115537848605578e-06, |
| "logits/chosen": -36.76372146606445, |
| "logits/rejected": -38.73911666870117, |
| "logps/chosen": -240.2417449951172, |
| "logps/rejected": -301.1226501464844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.495165824890137, |
| "rewards/margins": 22.608638763427734, |
| "rewards/rejected": -27.103809356689453, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.3916687411833043, |
| "grad_norm": 6.375902739819139e-05, |
| "learning_rate": 6.082337317397079e-06, |
| "logits/chosen": -36.941650390625, |
| "logits/rejected": -38.865909576416016, |
| "logps/chosen": -231.86373901367188, |
| "logps/rejected": -290.8138732910156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.150005340576172, |
| "rewards/margins": 22.098033905029297, |
| "rewards/rejected": -26.248037338256836, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.39498796780350176, |
| "grad_norm": 9.028934982779901e-06, |
| "learning_rate": 6.04913678618858e-06, |
| "logits/chosen": -35.28303527832031, |
| "logits/rejected": -37.00617218017578, |
| "logps/chosen": -276.6689758300781, |
| "logps/rejected": -339.3366394042969, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.4967780113220215, |
| "rewards/margins": 23.187326431274414, |
| "rewards/rejected": -28.68410301208496, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3983071944236993, |
| "grad_norm": 6.348345209516992e-07, |
| "learning_rate": 6.0159362549800805e-06, |
| "logits/chosen": -36.24475860595703, |
| "logits/rejected": -37.87803268432617, |
| "logps/chosen": -224.39480590820312, |
| "logps/rejected": -285.2892761230469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.7975051403045654, |
| "rewards/margins": 22.595354080200195, |
| "rewards/rejected": -26.392858505249023, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4016264210438968, |
| "grad_norm": 2.397124444541987e-05, |
| "learning_rate": 5.982735723771581e-06, |
| "logits/chosen": -35.9412727355957, |
| "logits/rejected": -37.60810089111328, |
| "logps/chosen": -260.8143310546875, |
| "logps/rejected": -321.2140197753906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.919522762298584, |
| "rewards/margins": 22.569488525390625, |
| "rewards/rejected": -27.489013671875, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.40494564766409424, |
| "grad_norm": 7.391794497380033e-05, |
| "learning_rate": 5.949535192563081e-06, |
| "logits/chosen": -35.552833557128906, |
| "logits/rejected": -37.327735900878906, |
| "logps/chosen": -245.9195556640625, |
| "logps/rejected": -303.10638427734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.919013261795044, |
| "rewards/margins": 21.555124282836914, |
| "rewards/rejected": -25.474132537841797, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.40826487428429176, |
| "grad_norm": 1.4663862202723976e-05, |
| "learning_rate": 5.916334661354582e-06, |
| "logits/chosen": -35.970951080322266, |
| "logits/rejected": -37.302703857421875, |
| "logps/chosen": -238.4794921875, |
| "logps/rejected": -298.0510559082031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.057009696960449, |
| "rewards/margins": 22.244709014892578, |
| "rewards/rejected": -26.30171775817871, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.41158410090448927, |
| "grad_norm": 1.8007700418820605e-05, |
| "learning_rate": 5.883134130146083e-06, |
| "logits/chosen": -35.56674575805664, |
| "logits/rejected": -37.349674224853516, |
| "logps/chosen": -241.77908325195312, |
| "logps/rejected": -301.67529296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.9492950439453125, |
| "rewards/margins": 22.323001861572266, |
| "rewards/rejected": -26.27229881286621, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.4149033275246867, |
| "grad_norm": 9.073022738448344e-06, |
| "learning_rate": 5.849933598937584e-06, |
| "logits/chosen": -35.80928039550781, |
| "logits/rejected": -37.83527374267578, |
| "logps/chosen": -269.43780517578125, |
| "logps/rejected": -330.70965576171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.043967247009277, |
| "rewards/margins": 22.84233283996582, |
| "rewards/rejected": -27.88629722595215, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.41822255414488424, |
| "grad_norm": 5.101820988784311e-06, |
| "learning_rate": 5.816733067729085e-06, |
| "logits/chosen": -36.2827033996582, |
| "logits/rejected": -37.87694549560547, |
| "logps/chosen": -249.8275604248047, |
| "logps/rejected": -310.7562561035156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.4787421226501465, |
| "rewards/margins": 22.689529418945312, |
| "rewards/rejected": -27.168270111083984, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.42154178076508175, |
| "grad_norm": 1.537020580144599e-05, |
| "learning_rate": 5.783532536520585e-06, |
| "logits/chosen": -37.90897750854492, |
| "logits/rejected": -40.028099060058594, |
| "logps/chosen": -227.4257049560547, |
| "logps/rejected": -288.1922912597656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.244109153747559, |
| "rewards/margins": 22.62444496154785, |
| "rewards/rejected": -26.868555068969727, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4248610073852792, |
| "grad_norm": 0.0001243548176717013, |
| "learning_rate": 5.750332005312086e-06, |
| "logits/chosen": -35.16408920288086, |
| "logits/rejected": -36.898799896240234, |
| "logps/chosen": -281.3883056640625, |
| "logps/rejected": -342.1923828125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.85723876953125, |
| "rewards/margins": 22.52381134033203, |
| "rewards/rejected": -28.38104820251465, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4281802340054767, |
| "grad_norm": 0.0001157997248810716, |
| "learning_rate": 5.7171314741035865e-06, |
| "logits/chosen": -36.70041275024414, |
| "logits/rejected": -38.27075958251953, |
| "logps/chosen": -250.15185546875, |
| "logps/rejected": -310.4129638671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.715483665466309, |
| "rewards/margins": 22.438804626464844, |
| "rewards/rejected": -27.154285430908203, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.43149946062567424, |
| "grad_norm": 0.00022845834610052407, |
| "learning_rate": 5.683930942895087e-06, |
| "logits/chosen": -36.59210968017578, |
| "logits/rejected": -38.293800354003906, |
| "logps/chosen": -257.09759521484375, |
| "logps/rejected": -319.6377868652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.797603130340576, |
| "rewards/margins": 23.203828811645508, |
| "rewards/rejected": -28.00143051147461, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.4348186872458717, |
| "grad_norm": 0.0003446840273682028, |
| "learning_rate": 5.650730411686588e-06, |
| "logits/chosen": -36.782691955566406, |
| "logits/rejected": -39.01188278198242, |
| "logps/chosen": -237.51956176757812, |
| "logps/rejected": -299.38360595703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.052783489227295, |
| "rewards/margins": 22.880496978759766, |
| "rewards/rejected": -26.93328285217285, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.4381379138660692, |
| "grad_norm": 8.070600415521767e-06, |
| "learning_rate": 5.617529880478087e-06, |
| "logits/chosen": -35.52574920654297, |
| "logits/rejected": -37.07780075073242, |
| "logps/chosen": -247.0010223388672, |
| "logps/rejected": -309.9228515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.061142921447754, |
| "rewards/margins": 23.22298812866211, |
| "rewards/rejected": -27.284133911132812, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.4414571404862667, |
| "grad_norm": 3.095080319326371e-05, |
| "learning_rate": 5.584329349269588e-06, |
| "logits/chosen": -35.79111099243164, |
| "logits/rejected": -37.27584457397461, |
| "logps/chosen": -237.08206176757812, |
| "logps/rejected": -298.28515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.881493330001831, |
| "rewards/margins": 22.693397521972656, |
| "rewards/rejected": -26.574893951416016, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.4447763671064642, |
| "grad_norm": 0.00014526672021020204, |
| "learning_rate": 5.551128818061089e-06, |
| "logits/chosen": -36.553951263427734, |
| "logits/rejected": -38.48070526123047, |
| "logps/chosen": -218.1559295654297, |
| "logps/rejected": -276.0164489746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.207418918609619, |
| "rewards/margins": 21.726083755493164, |
| "rewards/rejected": -24.933500289916992, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.4480955937266617, |
| "grad_norm": 1.6845664504216984e-05, |
| "learning_rate": 5.51792828685259e-06, |
| "logits/chosen": -36.16156768798828, |
| "logits/rejected": -38.066184997558594, |
| "logps/chosen": -282.4837341308594, |
| "logps/rejected": -346.942138671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.882527828216553, |
| "rewards/margins": 23.716060638427734, |
| "rewards/rejected": -29.598590850830078, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.4514148203468592, |
| "grad_norm": 0.0001524945255368948, |
| "learning_rate": 5.48472775564409e-06, |
| "logits/chosen": -37.370155334472656, |
| "logits/rejected": -39.46926498413086, |
| "logps/chosen": -248.5599822998047, |
| "logps/rejected": -312.64398193359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.415983200073242, |
| "rewards/margins": 23.64468002319336, |
| "rewards/rejected": -29.060659408569336, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.45473404696705666, |
| "grad_norm": 0.000820525863673538, |
| "learning_rate": 5.451527224435591e-06, |
| "logits/chosen": -36.646820068359375, |
| "logits/rejected": -38.414344787597656, |
| "logps/chosen": -233.5876007080078, |
| "logps/rejected": -296.04193115234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.6753764152526855, |
| "rewards/margins": 23.128164291381836, |
| "rewards/rejected": -27.803543090820312, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4580532735872542, |
| "grad_norm": 2.5676483801362338e-06, |
| "learning_rate": 5.418326693227092e-06, |
| "logits/chosen": -36.1823616027832, |
| "logits/rejected": -37.99201202392578, |
| "logps/chosen": -261.86920166015625, |
| "logps/rejected": -326.8487243652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.565834999084473, |
| "rewards/margins": 23.7719669342041, |
| "rewards/rejected": -29.337799072265625, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.4613725002074517, |
| "grad_norm": 6.69591172481887e-05, |
| "learning_rate": 5.3851261620185925e-06, |
| "logits/chosen": -37.112308502197266, |
| "logits/rejected": -39.388126373291016, |
| "logps/chosen": -253.88955688476562, |
| "logps/rejected": -315.8279113769531, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.793484210968018, |
| "rewards/margins": 23.005237579345703, |
| "rewards/rejected": -27.798717498779297, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.46469172682764914, |
| "grad_norm": 3.6112989619141445e-05, |
| "learning_rate": 5.351925630810093e-06, |
| "logits/chosen": -37.045570373535156, |
| "logits/rejected": -38.63352966308594, |
| "logps/chosen": -274.2471618652344, |
| "logps/rejected": -340.7463684082031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.769705295562744, |
| "rewards/margins": 24.323572158813477, |
| "rewards/rejected": -30.093280792236328, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.46801095344784666, |
| "grad_norm": 9.693310857983306e-06, |
| "learning_rate": 5.318725099601593e-06, |
| "logits/chosen": -35.83592987060547, |
| "logits/rejected": -37.3544921875, |
| "logps/chosen": -263.32025146484375, |
| "logps/rejected": -329.5469665527344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.016288757324219, |
| "rewards/margins": 24.141952514648438, |
| "rewards/rejected": -30.158239364624023, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.47133018006804417, |
| "grad_norm": 2.5429770289520093e-07, |
| "learning_rate": 5.285524568393094e-06, |
| "logits/chosen": -36.3372917175293, |
| "logits/rejected": -37.873512268066406, |
| "logps/chosen": -268.4442138671875, |
| "logps/rejected": -334.9126281738281, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.9471821784973145, |
| "rewards/margins": 24.34046173095703, |
| "rewards/rejected": -29.287643432617188, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.4746494066882416, |
| "grad_norm": 8.884577255230397e-06, |
| "learning_rate": 5.252324037184595e-06, |
| "logits/chosen": -35.84831619262695, |
| "logits/rejected": -37.70962142944336, |
| "logps/chosen": -222.9773406982422, |
| "logps/rejected": -280.5433654785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.2989723682403564, |
| "rewards/margins": 21.73419761657715, |
| "rewards/rejected": -25.03316879272461, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.47796863330843914, |
| "grad_norm": 5.5867065384518355e-05, |
| "learning_rate": 5.219123505976096e-06, |
| "logits/chosen": -35.37043380737305, |
| "logits/rejected": -36.962501525878906, |
| "logps/chosen": -269.3448791503906, |
| "logps/rejected": -334.05108642578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.493111610412598, |
| "rewards/margins": 23.792190551757812, |
| "rewards/rejected": -29.285303115844727, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.48128785992863665, |
| "grad_norm": 9.691136256151367e-06, |
| "learning_rate": 5.185922974767597e-06, |
| "logits/chosen": -37.72825241088867, |
| "logits/rejected": -39.378013610839844, |
| "logps/chosen": -240.7726287841797, |
| "logps/rejected": -302.41864013671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.8250885009765625, |
| "rewards/margins": 22.83572769165039, |
| "rewards/rejected": -26.660816192626953, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.4846070865488341, |
| "grad_norm": 8.449688903056085e-05, |
| "learning_rate": 5.152722443559097e-06, |
| "logits/chosen": -37.7960205078125, |
| "logits/rejected": -39.6239128112793, |
| "logps/chosen": -251.767333984375, |
| "logps/rejected": -314.9220886230469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.878867149353027, |
| "rewards/margins": 23.379364013671875, |
| "rewards/rejected": -28.258230209350586, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4879263131690316, |
| "grad_norm": 0.00010858433233806863, |
| "learning_rate": 5.119521912350598e-06, |
| "logits/chosen": -37.04092025756836, |
| "logits/rejected": -38.59927749633789, |
| "logps/chosen": -243.720947265625, |
| "logps/rejected": -304.9571838378906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.011149883270264, |
| "rewards/margins": 22.803363800048828, |
| "rewards/rejected": -26.81451416015625, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.49124553978922914, |
| "grad_norm": 4.217286186758429e-05, |
| "learning_rate": 5.0863213811420985e-06, |
| "logits/chosen": -35.66263198852539, |
| "logits/rejected": -37.24384307861328, |
| "logps/chosen": -259.3061218261719, |
| "logps/rejected": -323.9267883300781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.250843048095703, |
| "rewards/margins": 23.775760650634766, |
| "rewards/rejected": -29.0266056060791, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.4945647664094266, |
| "grad_norm": 2.136345392500516e-05, |
| "learning_rate": 5.053120849933599e-06, |
| "logits/chosen": -35.713584899902344, |
| "logits/rejected": -37.325538635253906, |
| "logps/chosen": -277.80377197265625, |
| "logps/rejected": -343.5056457519531, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.864058017730713, |
| "rewards/margins": 24.002731323242188, |
| "rewards/rejected": -29.866790771484375, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.4978839930296241, |
| "grad_norm": 8.999687452160288e-06, |
| "learning_rate": 5.0199203187251e-06, |
| "logits/chosen": -36.142784118652344, |
| "logits/rejected": -37.37236785888672, |
| "logps/chosen": -232.4190216064453, |
| "logps/rejected": -293.2521667480469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -3.933694839477539, |
| "rewards/margins": 22.6506404876709, |
| "rewards/rejected": -26.584335327148438, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5012032196498216, |
| "grad_norm": 8.656315003463533e-06, |
| "learning_rate": 4.986719787516601e-06, |
| "logits/chosen": -35.22600173950195, |
| "logits/rejected": -36.61890411376953, |
| "logps/chosen": -249.437255859375, |
| "logps/rejected": -313.10650634765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.46209716796875, |
| "rewards/margins": 23.603939056396484, |
| "rewards/rejected": -28.0660343170166, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5045224462700191, |
| "grad_norm": 1.5737525245640427e-05, |
| "learning_rate": 4.953519256308101e-06, |
| "logits/chosen": -36.45427322387695, |
| "logits/rejected": -38.449310302734375, |
| "logps/chosen": -269.5912780761719, |
| "logps/rejected": -336.03973388671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.367428779602051, |
| "rewards/margins": 24.243759155273438, |
| "rewards/rejected": -29.611186981201172, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5078416728902165, |
| "grad_norm": 1.5654180742785684e-06, |
| "learning_rate": 4.920318725099602e-06, |
| "logits/chosen": -36.544769287109375, |
| "logits/rejected": -38.08495330810547, |
| "logps/chosen": -236.01699829101562, |
| "logps/rejected": -298.4991149902344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.073147296905518, |
| "rewards/margins": 23.109254837036133, |
| "rewards/rejected": -27.18239974975586, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5111608995104141, |
| "grad_norm": 2.285624032083433e-05, |
| "learning_rate": 4.887118193891103e-06, |
| "logits/chosen": -36.97005081176758, |
| "logits/rejected": -38.57817840576172, |
| "logps/chosen": -223.80029296875, |
| "logps/rejected": -285.7514953613281, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.016161918640137, |
| "rewards/margins": 22.881366729736328, |
| "rewards/rejected": -26.897525787353516, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5144801261306116, |
| "grad_norm": 1.6629717720206827e-05, |
| "learning_rate": 4.853917662682604e-06, |
| "logits/chosen": -37.56560516357422, |
| "logits/rejected": -39.122352600097656, |
| "logps/chosen": -243.0885467529297, |
| "logps/rejected": -303.94647216796875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.24282693862915, |
| "rewards/margins": 22.678638458251953, |
| "rewards/rejected": -26.921466827392578, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.517799352750809, |
| "grad_norm": 2.3593625883222558e-05, |
| "learning_rate": 4.8207171314741045e-06, |
| "logits/chosen": -35.30021286010742, |
| "logits/rejected": -37.36159133911133, |
| "logps/chosen": -259.1506042480469, |
| "logps/rejected": -325.35577392578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.579512596130371, |
| "rewards/margins": 24.202543258666992, |
| "rewards/rejected": -29.782054901123047, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5211185793710066, |
| "grad_norm": 1.770765493347426e-06, |
| "learning_rate": 4.7875166002656045e-06, |
| "logits/chosen": -36.31800079345703, |
| "logits/rejected": -38.350250244140625, |
| "logps/chosen": -237.36587524414062, |
| "logps/rejected": -297.8676452636719, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.429483413696289, |
| "rewards/margins": 22.614795684814453, |
| "rewards/rejected": -27.04427719116211, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.524437805991204, |
| "grad_norm": 2.521073110983707e-05, |
| "learning_rate": 4.754316069057105e-06, |
| "logits/chosen": -34.50792694091797, |
| "logits/rejected": -36.35118865966797, |
| "logps/chosen": -312.0120544433594, |
| "logps/rejected": -376.3553161621094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.226397514343262, |
| "rewards/margins": 23.781085968017578, |
| "rewards/rejected": -31.007482528686523, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.5277570326114015, |
| "grad_norm": 2.6629986678017303e-05, |
| "learning_rate": 4.721115537848606e-06, |
| "logits/chosen": -37.28297805786133, |
| "logits/rejected": -39.56510543823242, |
| "logps/chosen": -232.8235626220703, |
| "logps/rejected": -293.8519592285156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.996170997619629, |
| "rewards/margins": 22.683502197265625, |
| "rewards/rejected": -27.679668426513672, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5310762592315991, |
| "grad_norm": 3.985645162174478e-05, |
| "learning_rate": 4.687915006640107e-06, |
| "logits/chosen": -36.04001998901367, |
| "logits/rejected": -38.201377868652344, |
| "logps/chosen": -257.0011291503906, |
| "logps/rejected": -318.67889404296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.405674934387207, |
| "rewards/margins": 22.883712768554688, |
| "rewards/rejected": -28.28938865661621, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5343954858517965, |
| "grad_norm": 4.466701648198068e-05, |
| "learning_rate": 4.654714475431607e-06, |
| "logits/chosen": -36.21506118774414, |
| "logits/rejected": -38.33916473388672, |
| "logps/chosen": -248.7204132080078, |
| "logps/rejected": -309.51446533203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.977995872497559, |
| "rewards/margins": 22.648815155029297, |
| "rewards/rejected": -27.626811981201172, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.537714712471994, |
| "grad_norm": 0.0006125931977294385, |
| "learning_rate": 4.621513944223108e-06, |
| "logits/chosen": -35.31191635131836, |
| "logits/rejected": -37.675506591796875, |
| "logps/chosen": -251.9046173095703, |
| "logps/rejected": -314.9078674316406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.107827186584473, |
| "rewards/margins": 23.261333465576172, |
| "rewards/rejected": -29.36916160583496, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5410339390921916, |
| "grad_norm": 5.603829413303174e-05, |
| "learning_rate": 4.588313413014609e-06, |
| "logits/chosen": -34.84865951538086, |
| "logits/rejected": -37.49020004272461, |
| "logps/chosen": -249.0811309814453, |
| "logps/rejected": -310.94805908203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.208640098571777, |
| "rewards/margins": 22.867446899414062, |
| "rewards/rejected": -28.076086044311523, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.544353165712389, |
| "grad_norm": 3.166065289406106e-05, |
| "learning_rate": 4.555112881806109e-06, |
| "logits/chosen": -35.96332550048828, |
| "logits/rejected": -38.01958465576172, |
| "logps/chosen": -255.0028533935547, |
| "logps/rejected": -317.4012145996094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.1949872970581055, |
| "rewards/margins": 23.095088958740234, |
| "rewards/rejected": -28.290075302124023, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5476723923325865, |
| "grad_norm": 0.00018184522923547775, |
| "learning_rate": 4.52191235059761e-06, |
| "logits/chosen": -36.66709518432617, |
| "logits/rejected": -38.78025436401367, |
| "logps/chosen": -276.5002136230469, |
| "logps/rejected": -337.06280517578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.941756725311279, |
| "rewards/margins": 22.575870513916016, |
| "rewards/rejected": -28.517627716064453, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.550991618952784, |
| "grad_norm": 3.8955668060225435e-06, |
| "learning_rate": 4.4887118193891105e-06, |
| "logits/chosen": -37.09162902832031, |
| "logits/rejected": -39.494346618652344, |
| "logps/chosen": -240.5594024658203, |
| "logps/rejected": -302.869873046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.489818096160889, |
| "rewards/margins": 23.031490325927734, |
| "rewards/rejected": -28.521312713623047, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.5543108455729815, |
| "grad_norm": 9.758635314938147e-06, |
| "learning_rate": 4.455511288180611e-06, |
| "logits/chosen": -35.8779296875, |
| "logits/rejected": -38.21814727783203, |
| "logps/chosen": -242.24783325195312, |
| "logps/rejected": -303.9131774902344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.195720672607422, |
| "rewards/margins": 22.870954513549805, |
| "rewards/rejected": -28.066675186157227, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.557630072193179, |
| "grad_norm": 1.1670150342979468e-05, |
| "learning_rate": 4.422310756972112e-06, |
| "logits/chosen": -34.99301528930664, |
| "logits/rejected": -37.15611267089844, |
| "logps/chosen": -276.33843994140625, |
| "logps/rejected": -343.9205017089844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.765660285949707, |
| "rewards/margins": 24.636295318603516, |
| "rewards/rejected": -31.401952743530273, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.5609492988133765, |
| "grad_norm": 2.343825326533988e-05, |
| "learning_rate": 4.389110225763612e-06, |
| "logits/chosen": -36.582054138183594, |
| "logits/rejected": -38.67331314086914, |
| "logps/chosen": -229.20010375976562, |
| "logps/rejected": -289.13934326171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.137927055358887, |
| "rewards/margins": 22.312835693359375, |
| "rewards/rejected": -26.450763702392578, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.564268525433574, |
| "grad_norm": 5.144063470652327e-05, |
| "learning_rate": 4.355909694555113e-06, |
| "logits/chosen": -36.755279541015625, |
| "logits/rejected": -39.31067657470703, |
| "logps/chosen": -238.5866241455078, |
| "logps/rejected": -300.41607666015625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.042891979217529, |
| "rewards/margins": 22.878875732421875, |
| "rewards/rejected": -27.921768188476562, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.5675877520537714, |
| "grad_norm": 3.793970972765237e-05, |
| "learning_rate": 4.322709163346614e-06, |
| "logits/chosen": -36.33258819580078, |
| "logits/rejected": -38.49297332763672, |
| "logps/chosen": -256.3636474609375, |
| "logps/rejected": -323.8926086425781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.737571716308594, |
| "rewards/margins": 24.59103012084961, |
| "rewards/rejected": -31.328601837158203, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.570906978673969, |
| "grad_norm": 2.3688435248914175e-05, |
| "learning_rate": 4.289508632138115e-06, |
| "logits/chosen": -37.646400451660156, |
| "logits/rejected": -39.79381561279297, |
| "logps/chosen": -239.6971893310547, |
| "logps/rejected": -301.040283203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.137190818786621, |
| "rewards/margins": 22.715267181396484, |
| "rewards/rejected": -27.852458953857422, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.5742262052941665, |
| "grad_norm": 0.0008279504254460335, |
| "learning_rate": 4.256308100929616e-06, |
| "logits/chosen": -36.02642059326172, |
| "logits/rejected": -38.3137092590332, |
| "logps/chosen": -249.07089233398438, |
| "logps/rejected": -311.4475402832031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.421334743499756, |
| "rewards/margins": 23.024635314941406, |
| "rewards/rejected": -28.445972442626953, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.5775454319143639, |
| "grad_norm": 1.2977415053683217e-06, |
| "learning_rate": 4.223107569721116e-06, |
| "logits/chosen": -36.99226379394531, |
| "logits/rejected": -39.35835647583008, |
| "logps/chosen": -230.90493774414062, |
| "logps/rejected": -291.55364990234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.519408702850342, |
| "rewards/margins": 22.53643798828125, |
| "rewards/rejected": -27.05584716796875, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.5808646585345615, |
| "grad_norm": 1.3930763088865206e-05, |
| "learning_rate": 4.1899070385126165e-06, |
| "logits/chosen": -36.491249084472656, |
| "logits/rejected": -38.7135124206543, |
| "logps/chosen": -240.33584594726562, |
| "logps/rejected": -305.18304443359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.365754127502441, |
| "rewards/margins": 23.769380569458008, |
| "rewards/rejected": -29.1351318359375, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.5841838851547589, |
| "grad_norm": 1.682070978858974e-05, |
| "learning_rate": 4.156706507304117e-06, |
| "logits/chosen": -36.36711883544922, |
| "logits/rejected": -38.498600006103516, |
| "logps/chosen": -257.9060974121094, |
| "logps/rejected": -323.3476257324219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.284196376800537, |
| "rewards/margins": 23.89134407043457, |
| "rewards/rejected": -30.175540924072266, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.5875031117749564, |
| "grad_norm": 7.728056516498327e-06, |
| "learning_rate": 4.123505976095618e-06, |
| "logits/chosen": -37.04905700683594, |
| "logits/rejected": -39.33579635620117, |
| "logps/chosen": -227.35110473632812, |
| "logps/rejected": -292.36236572265625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.201313495635986, |
| "rewards/margins": 23.991268157958984, |
| "rewards/rejected": -29.192584991455078, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.590822338395154, |
| "grad_norm": 0.00020755194418597966, |
| "learning_rate": 4.090305444887119e-06, |
| "logits/chosen": -36.47849655151367, |
| "logits/rejected": -38.484745025634766, |
| "logps/chosen": -261.4329833984375, |
| "logps/rejected": -325.38604736328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.6097822189331055, |
| "rewards/margins": 23.496807098388672, |
| "rewards/rejected": -29.106592178344727, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.5941415650153514, |
| "grad_norm": 0.0010281304130330682, |
| "learning_rate": 4.057104913678619e-06, |
| "logits/chosen": -36.252891540527344, |
| "logits/rejected": -38.18025207519531, |
| "logps/chosen": -231.2675018310547, |
| "logps/rejected": -294.30999755859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.148598670959473, |
| "rewards/margins": 23.31288719177246, |
| "rewards/rejected": -27.46148681640625, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.5974607916355489, |
| "grad_norm": 5.641156531055458e-05, |
| "learning_rate": 4.02390438247012e-06, |
| "logits/chosen": -36.627098083496094, |
| "logits/rejected": -38.566261291503906, |
| "logps/chosen": -257.6075134277344, |
| "logps/rejected": -322.8147888183594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.343573570251465, |
| "rewards/margins": 23.8291072845459, |
| "rewards/rejected": -30.172677993774414, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6007800182557465, |
| "grad_norm": 8.674534183228388e-05, |
| "learning_rate": 3.99070385126162e-06, |
| "logits/chosen": -37.30437469482422, |
| "logits/rejected": -39.19757843017578, |
| "logps/chosen": -236.25155639648438, |
| "logps/rejected": -299.7480773925781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.02539587020874, |
| "rewards/margins": 23.419660568237305, |
| "rewards/rejected": -28.445056915283203, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.6040992448759439, |
| "grad_norm": 2.2813930627307855e-05, |
| "learning_rate": 3.957503320053121e-06, |
| "logits/chosen": -36.5108528137207, |
| "logits/rejected": -38.77637481689453, |
| "logps/chosen": -243.909423828125, |
| "logps/rejected": -308.46575927734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.7918548583984375, |
| "rewards/margins": 23.672077178955078, |
| "rewards/rejected": -28.463932037353516, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6074184714961414, |
| "grad_norm": 0.0004189323226455599, |
| "learning_rate": 3.924302788844622e-06, |
| "logits/chosen": -36.76378631591797, |
| "logits/rejected": -38.989498138427734, |
| "logps/chosen": -249.64974975585938, |
| "logps/rejected": -314.20989990234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.429641246795654, |
| "rewards/margins": 23.652019500732422, |
| "rewards/rejected": -29.0816593170166, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6107376981163389, |
| "grad_norm": 3.000356628035661e-05, |
| "learning_rate": 3.8911022576361225e-06, |
| "logits/chosen": -36.149635314941406, |
| "logits/rejected": -38.40140914916992, |
| "logps/chosen": -273.1649475097656, |
| "logps/rejected": -340.8751525878906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.340195178985596, |
| "rewards/margins": 24.731584548950195, |
| "rewards/rejected": -31.07177734375, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6140569247365364, |
| "grad_norm": 9.709197911433876e-05, |
| "learning_rate": 3.857901726427623e-06, |
| "logits/chosen": -35.064029693603516, |
| "logits/rejected": -36.718536376953125, |
| "logps/chosen": -254.9979248046875, |
| "logps/rejected": -318.6874084472656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.1480937004089355, |
| "rewards/margins": 23.502117156982422, |
| "rewards/rejected": -28.65021324157715, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6173761513567338, |
| "grad_norm": 0.00014991410716902465, |
| "learning_rate": 3.824701195219123e-06, |
| "logits/chosen": -36.74406051635742, |
| "logits/rejected": -38.75735092163086, |
| "logps/chosen": -229.8621063232422, |
| "logps/rejected": -293.3158264160156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.392579078674316, |
| "rewards/margins": 23.361215591430664, |
| "rewards/rejected": -28.753793716430664, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6206953779769314, |
| "grad_norm": 2.5267569071729667e-05, |
| "learning_rate": 3.7915006640106242e-06, |
| "logits/chosen": -35.470855712890625, |
| "logits/rejected": -37.23722457885742, |
| "logps/chosen": -276.9035949707031, |
| "logps/rejected": -342.387451171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.212436676025391, |
| "rewards/margins": 24.028600692749023, |
| "rewards/rejected": -30.241037368774414, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6240146045971289, |
| "grad_norm": 1.762523243087344e-05, |
| "learning_rate": 3.758300132802125e-06, |
| "logits/chosen": -35.641090393066406, |
| "logits/rejected": -37.73635482788086, |
| "logps/chosen": -245.4388885498047, |
| "logps/rejected": -310.99713134765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.393484115600586, |
| "rewards/margins": 24.041391372680664, |
| "rewards/rejected": -29.43487548828125, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6273338312173263, |
| "grad_norm": 1.0433415809529833e-05, |
| "learning_rate": 3.725099601593626e-06, |
| "logits/chosen": -35.56284713745117, |
| "logits/rejected": -37.38603210449219, |
| "logps/chosen": -238.94775390625, |
| "logps/rejected": -302.51947021484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.463973045349121, |
| "rewards/margins": 23.474252700805664, |
| "rewards/rejected": -27.9382266998291, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6306530578375239, |
| "grad_norm": 2.4497583581251092e-05, |
| "learning_rate": 3.6918990703851264e-06, |
| "logits/chosen": -36.126556396484375, |
| "logits/rejected": -37.94770050048828, |
| "logps/chosen": -250.25961303710938, |
| "logps/rejected": -314.0647888183594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.455031394958496, |
| "rewards/margins": 23.481657028198242, |
| "rewards/rejected": -28.936687469482422, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6339722844577214, |
| "grad_norm": 3.172329888911918e-05, |
| "learning_rate": 3.6586985391766272e-06, |
| "logits/chosen": -36.24954605102539, |
| "logits/rejected": -38.31598663330078, |
| "logps/chosen": -253.3960418701172, |
| "logps/rejected": -318.83233642578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.337418556213379, |
| "rewards/margins": 23.940242767333984, |
| "rewards/rejected": -29.277660369873047, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6372915110779188, |
| "grad_norm": 8.708640052645933e-06, |
| "learning_rate": 3.625498007968128e-06, |
| "logits/chosen": -35.00163269042969, |
| "logits/rejected": -37.10395812988281, |
| "logps/chosen": -264.4671936035156, |
| "logps/rejected": -328.429443359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.868958473205566, |
| "rewards/margins": 23.600505828857422, |
| "rewards/rejected": -29.469463348388672, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.6406107376981164, |
| "grad_norm": 3.279365409980528e-05, |
| "learning_rate": 3.5922974767596285e-06, |
| "logits/chosen": -36.706092834472656, |
| "logits/rejected": -38.54290008544922, |
| "logps/chosen": -237.87362670898438, |
| "logps/rejected": -302.92108154296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.26140832901001, |
| "rewards/margins": 23.881010055541992, |
| "rewards/rejected": -29.14241600036621, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.6439299643183138, |
| "grad_norm": 0.00011895268107764423, |
| "learning_rate": 3.5590969455511294e-06, |
| "logits/chosen": -35.864967346191406, |
| "logits/rejected": -38.541648864746094, |
| "logps/chosen": -261.33477783203125, |
| "logps/rejected": -328.9570007324219, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.419045448303223, |
| "rewards/margins": 24.578853607177734, |
| "rewards/rejected": -30.99790382385254, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.6472491909385113, |
| "grad_norm": 5.514123768080026e-05, |
| "learning_rate": 3.52589641434263e-06, |
| "logits/chosen": -36.08778762817383, |
| "logits/rejected": -38.50415802001953, |
| "logps/chosen": -251.5714874267578, |
| "logps/rejected": -316.7638244628906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.013431549072266, |
| "rewards/margins": 24.004486083984375, |
| "rewards/rejected": -30.017919540405273, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.6505684175587089, |
| "grad_norm": 0.00010633220517775044, |
| "learning_rate": 3.4926958831341307e-06, |
| "logits/chosen": -35.88835906982422, |
| "logits/rejected": -38.69109344482422, |
| "logps/chosen": -277.28228759765625, |
| "logps/rejected": -344.85955810546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.679495334625244, |
| "rewards/margins": 24.58434295654297, |
| "rewards/rejected": -32.26383972167969, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.6538876441789063, |
| "grad_norm": 4.945451905769005e-07, |
| "learning_rate": 3.4594953519256315e-06, |
| "logits/chosen": -35.732547760009766, |
| "logits/rejected": -37.812828063964844, |
| "logps/chosen": -267.87628173828125, |
| "logps/rejected": -333.97113037109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.297167778015137, |
| "rewards/margins": 24.186397552490234, |
| "rewards/rejected": -30.483562469482422, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.6572068707991038, |
| "grad_norm": 1.6296591638820246e-05, |
| "learning_rate": 3.4262948207171315e-06, |
| "logits/chosen": -36.72779083251953, |
| "logits/rejected": -39.340965270996094, |
| "logps/chosen": -263.29132080078125, |
| "logps/rejected": -329.0027770996094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.973439693450928, |
| "rewards/margins": 24.01589584350586, |
| "rewards/rejected": -29.989337921142578, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.6605260974193014, |
| "grad_norm": 1.2624731425603386e-05, |
| "learning_rate": 3.3930942895086324e-06, |
| "logits/chosen": -35.57170867919922, |
| "logits/rejected": -37.529754638671875, |
| "logps/chosen": -257.77154541015625, |
| "logps/rejected": -321.79388427734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.590646266937256, |
| "rewards/margins": 23.66408348083496, |
| "rewards/rejected": -29.254735946655273, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.6638453240394988, |
| "grad_norm": 2.9282214200065937e-06, |
| "learning_rate": 3.359893758300133e-06, |
| "logits/chosen": -36.09126663208008, |
| "logits/rejected": -38.71330642700195, |
| "logps/chosen": -268.74365234375, |
| "logps/rejected": -335.63519287109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.973354339599609, |
| "rewards/margins": 24.495656967163086, |
| "rewards/rejected": -31.469013214111328, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.6671645506596963, |
| "grad_norm": 4.54183009424014e-06, |
| "learning_rate": 3.3266932270916337e-06, |
| "logits/chosen": -35.75475311279297, |
| "logits/rejected": -38.265567779541016, |
| "logps/chosen": -265.66510009765625, |
| "logps/rejected": -333.2896423339844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.296258926391602, |
| "rewards/margins": 24.668832778930664, |
| "rewards/rejected": -30.9650936126709, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.6704837772798938, |
| "grad_norm": 7.087628546287306e-06, |
| "learning_rate": 3.293492695883134e-06, |
| "logits/chosen": -35.057525634765625, |
| "logits/rejected": -37.05876541137695, |
| "logps/chosen": -260.94903564453125, |
| "logps/rejected": -326.78289794921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.652168273925781, |
| "rewards/margins": 24.176315307617188, |
| "rewards/rejected": -29.828481674194336, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.6738030039000913, |
| "grad_norm": 4.373361832676892e-07, |
| "learning_rate": 3.260292164674635e-06, |
| "logits/chosen": -35.68403625488281, |
| "logits/rejected": -37.77031707763672, |
| "logps/chosen": -246.8516082763672, |
| "logps/rejected": -315.7016906738281, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.814517974853516, |
| "rewards/margins": 24.977245330810547, |
| "rewards/rejected": -30.791763305664062, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.6771222305202887, |
| "grad_norm": 4.2058504732267465e-06, |
| "learning_rate": 3.227091633466136e-06, |
| "logits/chosen": -36.04418182373047, |
| "logits/rejected": -38.19130325317383, |
| "logps/chosen": -270.22119140625, |
| "logps/rejected": -336.78619384765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.137897491455078, |
| "rewards/margins": 24.296836853027344, |
| "rewards/rejected": -30.434734344482422, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.6804414571404863, |
| "grad_norm": 2.4626022423035465e-05, |
| "learning_rate": 3.1938911022576362e-06, |
| "logits/chosen": -35.86650085449219, |
| "logits/rejected": -37.94574737548828, |
| "logps/chosen": -252.1937255859375, |
| "logps/rejected": -315.3446044921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.771885871887207, |
| "rewards/margins": 23.3013973236084, |
| "rewards/rejected": -29.073284149169922, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 7.916089816717431e-05, |
| "learning_rate": 3.160690571049137e-06, |
| "logits/chosen": -37.223426818847656, |
| "logits/rejected": -39.6295051574707, |
| "logps/chosen": -243.1242218017578, |
| "logps/rejected": -304.4497985839844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.6395769119262695, |
| "rewards/margins": 22.74526596069336, |
| "rewards/rejected": -28.384841918945312, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.6870799103808812, |
| "grad_norm": 2.3471836811950197e-06, |
| "learning_rate": 3.1274900398406375e-06, |
| "logits/chosen": -36.550941467285156, |
| "logits/rejected": -38.87454605102539, |
| "logps/chosen": -266.3443908691406, |
| "logps/rejected": -333.722900390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.885348320007324, |
| "rewards/margins": 24.666173934936523, |
| "rewards/rejected": -31.5515193939209, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.6903991370010788, |
| "grad_norm": 3.3997166610788554e-06, |
| "learning_rate": 3.0942895086321384e-06, |
| "logits/chosen": -36.20463943481445, |
| "logits/rejected": -38.63981628417969, |
| "logps/chosen": -260.6272888183594, |
| "logps/rejected": -327.7062683105469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.540787696838379, |
| "rewards/margins": 24.436195373535156, |
| "rewards/rejected": -30.97698402404785, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.6937183636212763, |
| "grad_norm": 6.613054665649543e-06, |
| "learning_rate": 3.0610889774236392e-06, |
| "logits/chosen": -37.008026123046875, |
| "logits/rejected": -39.31156921386719, |
| "logps/chosen": -230.788330078125, |
| "logps/rejected": -294.869873046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.067322731018066, |
| "rewards/margins": 23.62921905517578, |
| "rewards/rejected": -28.6965389251709, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.6970375902414737, |
| "grad_norm": 8.439514203928411e-06, |
| "learning_rate": 3.0278884462151397e-06, |
| "logits/chosen": -35.995094299316406, |
| "logits/rejected": -38.129146575927734, |
| "logps/chosen": -245.63327026367188, |
| "logps/rejected": -308.6886291503906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.045685291290283, |
| "rewards/margins": 23.421958923339844, |
| "rewards/rejected": -28.467641830444336, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7003568168616713, |
| "grad_norm": 2.3334492652793415e-05, |
| "learning_rate": 2.9946879150066405e-06, |
| "logits/chosen": -35.943634033203125, |
| "logits/rejected": -38.231529235839844, |
| "logps/chosen": -279.72039794921875, |
| "logps/rejected": -347.6648864746094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.199967861175537, |
| "rewards/margins": 24.816007614135742, |
| "rewards/rejected": -32.01597595214844, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7036760434818687, |
| "grad_norm": 8.327054092660546e-05, |
| "learning_rate": 2.961487383798141e-06, |
| "logits/chosen": -35.427879333496094, |
| "logits/rejected": -37.45615768432617, |
| "logps/chosen": -260.65289306640625, |
| "logps/rejected": -328.54962158203125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.8487420082092285, |
| "rewards/margins": 24.858335494995117, |
| "rewards/rejected": -30.707077026367188, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7069952701020662, |
| "grad_norm": 7.448333235515747e-06, |
| "learning_rate": 2.928286852589642e-06, |
| "logits/chosen": -37.2711181640625, |
| "logits/rejected": -39.69253158569336, |
| "logps/chosen": -220.7040252685547, |
| "logps/rejected": -282.16668701171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.204867839813232, |
| "rewards/margins": 22.861181259155273, |
| "rewards/rejected": -27.066049575805664, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7103144967222638, |
| "grad_norm": 1.7492104234406725e-05, |
| "learning_rate": 2.8950863213811427e-06, |
| "logits/chosen": -35.61443328857422, |
| "logits/rejected": -38.02817916870117, |
| "logps/chosen": -276.75323486328125, |
| "logps/rejected": -348.700927734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.692807197570801, |
| "rewards/margins": 25.893789291381836, |
| "rewards/rejected": -33.58660125732422, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7136337233424612, |
| "grad_norm": 1.8488311752662412e-06, |
| "learning_rate": 2.861885790172643e-06, |
| "logits/chosen": -35.8737678527832, |
| "logits/rejected": -38.23582458496094, |
| "logps/chosen": -283.478759765625, |
| "logps/rejected": -347.2231750488281, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.430272102355957, |
| "rewards/margins": 23.48923110961914, |
| "rewards/rejected": -29.919504165649414, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.7169529499626587, |
| "grad_norm": 2.168203081964748e-06, |
| "learning_rate": 2.828685258964144e-06, |
| "logits/chosen": -37.18292999267578, |
| "logits/rejected": -39.45110321044922, |
| "logps/chosen": -234.93820190429688, |
| "logps/rejected": -298.44769287109375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.9636945724487305, |
| "rewards/margins": 23.42740249633789, |
| "rewards/rejected": -28.391098022460938, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7202721765828562, |
| "grad_norm": 1.770351082086563e-05, |
| "learning_rate": 2.795484727755644e-06, |
| "logits/chosen": -37.75334930419922, |
| "logits/rejected": -39.95841979980469, |
| "logps/chosen": -245.21817016601562, |
| "logps/rejected": -308.2054138183594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.453461170196533, |
| "rewards/margins": 23.370468139648438, |
| "rewards/rejected": -28.823928833007812, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7235914032030537, |
| "grad_norm": 4.3876305426238105e-05, |
| "learning_rate": 2.762284196547145e-06, |
| "logits/chosen": -37.115257263183594, |
| "logits/rejected": -39.60409164428711, |
| "logps/chosen": -236.9832305908203, |
| "logps/rejected": -303.8288879394531, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.18468713760376, |
| "rewards/margins": 24.370668411254883, |
| "rewards/rejected": -29.555355072021484, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7269106298232512, |
| "grad_norm": 3.0235052690841258e-05, |
| "learning_rate": 2.7290836653386452e-06, |
| "logits/chosen": -37.33479690551758, |
| "logits/rejected": -39.598915100097656, |
| "logps/chosen": -229.43814086914062, |
| "logps/rejected": -293.3581848144531, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.7233428955078125, |
| "rewards/margins": 23.499513626098633, |
| "rewards/rejected": -28.222854614257812, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7302298564434486, |
| "grad_norm": 2.1708687199861743e-05, |
| "learning_rate": 2.695883134130146e-06, |
| "logits/chosen": -36.968074798583984, |
| "logits/rejected": -39.77884292602539, |
| "logps/chosen": -232.990478515625, |
| "logps/rejected": -301.4418029785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.018186092376709, |
| "rewards/margins": 24.904294967651367, |
| "rewards/rejected": -30.922481536865234, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.7335490830636462, |
| "grad_norm": 7.78408139012754e-06, |
| "learning_rate": 2.662682602921647e-06, |
| "logits/chosen": -37.694664001464844, |
| "logits/rejected": -39.98257064819336, |
| "logps/chosen": -218.97183227539062, |
| "logps/rejected": -283.67132568359375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.842543601989746, |
| "rewards/margins": 23.720943450927734, |
| "rewards/rejected": -28.563491821289062, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.7368683096838436, |
| "grad_norm": 3.5446532820060384e-06, |
| "learning_rate": 2.6294820717131474e-06, |
| "logits/chosen": -37.003211975097656, |
| "logits/rejected": -39.224727630615234, |
| "logps/chosen": -240.2725830078125, |
| "logps/rejected": -306.5699768066406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.473374843597412, |
| "rewards/margins": 24.33763885498047, |
| "rewards/rejected": -29.811010360717773, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.7401875363040411, |
| "grad_norm": 2.044969551207032e-05, |
| "learning_rate": 2.5962815405046482e-06, |
| "logits/chosen": -34.97216033935547, |
| "logits/rejected": -37.055519104003906, |
| "logps/chosen": -234.8686981201172, |
| "logps/rejected": -300.5046081542969, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.184366703033447, |
| "rewards/margins": 24.180988311767578, |
| "rewards/rejected": -29.3653564453125, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.7435067629242387, |
| "grad_norm": 1.7181002931465628e-06, |
| "learning_rate": 2.563081009296149e-06, |
| "logits/chosen": -35.178504943847656, |
| "logits/rejected": -37.071800231933594, |
| "logps/chosen": -271.97137451171875, |
| "logps/rejected": -338.36407470703125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.9848222732543945, |
| "rewards/margins": 24.228235244750977, |
| "rewards/rejected": -31.21305274963379, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.7468259895444361, |
| "grad_norm": 1.943804272741545e-05, |
| "learning_rate": 2.5298804780876495e-06, |
| "logits/chosen": -35.294288635253906, |
| "logits/rejected": -37.824981689453125, |
| "logps/chosen": -286.2807312011719, |
| "logps/rejected": -353.0439758300781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.225214958190918, |
| "rewards/margins": 24.468542098999023, |
| "rewards/rejected": -31.69375991821289, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.7501452161646336, |
| "grad_norm": 5.341881660569925e-06, |
| "learning_rate": 2.4966799468791504e-06, |
| "logits/chosen": -35.99862289428711, |
| "logits/rejected": -38.410194396972656, |
| "logps/chosen": -260.35614013671875, |
| "logps/rejected": -328.924560546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.724026679992676, |
| "rewards/margins": 24.8992862701416, |
| "rewards/rejected": -31.623315811157227, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.7534644427848312, |
| "grad_norm": 2.7916195904253982e-05, |
| "learning_rate": 2.463479415670651e-06, |
| "logits/chosen": -36.70926284790039, |
| "logits/rejected": -38.943397521972656, |
| "logps/chosen": -232.7910919189453, |
| "logps/rejected": -294.95062255859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.800173759460449, |
| "rewards/margins": 23.0440673828125, |
| "rewards/rejected": -27.844242095947266, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.7567836694050286, |
| "grad_norm": 1.1390899089747109e-05, |
| "learning_rate": 2.4302788844621517e-06, |
| "logits/chosen": -35.52710723876953, |
| "logits/rejected": -37.439945220947266, |
| "logps/chosen": -253.768798828125, |
| "logps/rejected": -319.4378967285156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.10952615737915, |
| "rewards/margins": 24.009607315063477, |
| "rewards/rejected": -30.1191349029541, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.7601028960252261, |
| "grad_norm": 1.4922372884029755e-06, |
| "learning_rate": 2.3970783532536525e-06, |
| "logits/chosen": -36.5639762878418, |
| "logits/rejected": -38.71487808227539, |
| "logps/chosen": -249.2660675048828, |
| "logps/rejected": -315.0458984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.141018867492676, |
| "rewards/margins": 24.193401336669922, |
| "rewards/rejected": -30.33441734313965, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.7634221226454236, |
| "grad_norm": 1.5967334547895007e-06, |
| "learning_rate": 2.363877822045153e-06, |
| "logits/chosen": -36.093650817871094, |
| "logits/rejected": -38.5761604309082, |
| "logps/chosen": -240.87637329101562, |
| "logps/rejected": -308.2940368652344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.83968448638916, |
| "rewards/margins": 24.636394500732422, |
| "rewards/rejected": -30.4760799407959, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.7667413492656211, |
| "grad_norm": 9.941512644218164e-07, |
| "learning_rate": 2.3306772908366534e-06, |
| "logits/chosen": -36.442893981933594, |
| "logits/rejected": -38.91448974609375, |
| "logps/chosen": -273.2530822753906, |
| "logps/rejected": -342.6146545410156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.223963737487793, |
| "rewards/margins": 25.15464973449707, |
| "rewards/rejected": -32.37861633300781, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.7700605758858186, |
| "grad_norm": 4.130026809434639e-06, |
| "learning_rate": 2.2974767596281542e-06, |
| "logits/chosen": -36.173011779785156, |
| "logits/rejected": -38.719547271728516, |
| "logps/chosen": -264.0533752441406, |
| "logps/rejected": -334.3890075683594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.334656715393066, |
| "rewards/margins": 25.290576934814453, |
| "rewards/rejected": -32.62523651123047, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.7733798025060161, |
| "grad_norm": 1.1943691333726747e-06, |
| "learning_rate": 2.2642762284196547e-06, |
| "logits/chosen": -36.09328079223633, |
| "logits/rejected": -38.52931594848633, |
| "logps/chosen": -269.763916015625, |
| "logps/rejected": -336.2082824707031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.884180545806885, |
| "rewards/margins": 24.197860717773438, |
| "rewards/rejected": -31.082040786743164, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 3.132646452286281e-05, |
| "learning_rate": 2.2310756972111555e-06, |
| "logits/chosen": -36.489498138427734, |
| "logits/rejected": -38.6656379699707, |
| "logps/chosen": -226.9405975341797, |
| "logps/rejected": -290.4114990234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.617298126220703, |
| "rewards/margins": 23.44748306274414, |
| "rewards/rejected": -28.064783096313477, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.780018255746411, |
| "grad_norm": 0.004452559631317854, |
| "learning_rate": 2.1978751660026564e-06, |
| "logits/chosen": -36.05640411376953, |
| "logits/rejected": -38.674530029296875, |
| "logps/chosen": -252.38198852539062, |
| "logps/rejected": -319.01129150390625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.577996730804443, |
| "rewards/margins": 24.36948013305664, |
| "rewards/rejected": -29.94747543334961, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.7833374823666086, |
| "grad_norm": 8.203298057196662e-05, |
| "learning_rate": 2.164674634794157e-06, |
| "logits/chosen": -36.65255355834961, |
| "logits/rejected": -38.996917724609375, |
| "logps/chosen": -255.0420684814453, |
| "logps/rejected": -322.2328186035156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.767322540283203, |
| "rewards/margins": 24.485187530517578, |
| "rewards/rejected": -30.252511978149414, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.7866567089868061, |
| "grad_norm": 3.436456972849555e-05, |
| "learning_rate": 2.1314741035856577e-06, |
| "logits/chosen": -34.79005813598633, |
| "logits/rejected": -36.92679977416992, |
| "logps/chosen": -250.50857543945312, |
| "logps/rejected": -317.89910888671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.7200026512146, |
| "rewards/margins": 24.521738052368164, |
| "rewards/rejected": -30.24173927307129, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.7899759356070035, |
| "grad_norm": 3.881140855810372e-06, |
| "learning_rate": 2.098273572377158e-06, |
| "logits/chosen": -36.45794677734375, |
| "logits/rejected": -38.66387176513672, |
| "logps/chosen": -275.30828857421875, |
| "logps/rejected": -343.1573181152344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.899484157562256, |
| "rewards/margins": 24.771820068359375, |
| "rewards/rejected": -31.67130470275879, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.7932951622272011, |
| "grad_norm": 1.58879871037243e-07, |
| "learning_rate": 2.065073041168659e-06, |
| "logits/chosen": -36.50375747680664, |
| "logits/rejected": -38.68590545654297, |
| "logps/chosen": -241.6002655029297, |
| "logps/rejected": -307.832763671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.272534370422363, |
| "rewards/margins": 24.189064025878906, |
| "rewards/rejected": -29.461597442626953, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.7966143888473985, |
| "grad_norm": 3.56239324901253e-05, |
| "learning_rate": 2.03187250996016e-06, |
| "logits/chosen": -36.17913055419922, |
| "logits/rejected": -38.23615264892578, |
| "logps/chosen": -249.85971069335938, |
| "logps/rejected": -315.089599609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.946499824523926, |
| "rewards/margins": 24.03165054321289, |
| "rewards/rejected": -28.9781494140625, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.799933615467596, |
| "grad_norm": 1.6728210539440624e-05, |
| "learning_rate": 1.9986719787516602e-06, |
| "logits/chosen": -36.62885665893555, |
| "logits/rejected": -39.18938446044922, |
| "logps/chosen": -238.7482147216797, |
| "logps/rejected": -304.65216064453125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.411585807800293, |
| "rewards/margins": 24.22550392150879, |
| "rewards/rejected": -29.6370906829834, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.8032528420877936, |
| "grad_norm": 4.99165580549743e-05, |
| "learning_rate": 1.9654714475431607e-06, |
| "logits/chosen": -34.9477424621582, |
| "logits/rejected": -37.074520111083984, |
| "logps/chosen": -269.28125, |
| "logps/rejected": -338.604248046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.398122787475586, |
| "rewards/margins": 25.21800994873047, |
| "rewards/rejected": -31.616130828857422, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.806572068707991, |
| "grad_norm": 2.385314473940525e-06, |
| "learning_rate": 1.9322709163346615e-06, |
| "logits/chosen": -36.0991096496582, |
| "logits/rejected": -38.03818130493164, |
| "logps/chosen": -244.12930297851562, |
| "logps/rejected": -314.333251953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.721272945404053, |
| "rewards/margins": 25.411584854125977, |
| "rewards/rejected": -31.132862091064453, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.8098912953281885, |
| "grad_norm": 5.150145170773612e-06, |
| "learning_rate": 1.8990703851261622e-06, |
| "logits/chosen": -36.011573791503906, |
| "logits/rejected": -38.14513397216797, |
| "logps/chosen": -248.9287567138672, |
| "logps/rejected": -314.93682861328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.756242275238037, |
| "rewards/margins": 24.10676383972168, |
| "rewards/rejected": -29.86301040649414, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.8132105219483861, |
| "grad_norm": 2.2313422959996387e-05, |
| "learning_rate": 1.8658698539176628e-06, |
| "logits/chosen": -36.681114196777344, |
| "logits/rejected": -39.09503936767578, |
| "logps/chosen": -242.25674438476562, |
| "logps/rejected": -306.0912170410156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.092987060546875, |
| "rewards/margins": 23.52627182006836, |
| "rewards/rejected": -28.619258880615234, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.8165297485685835, |
| "grad_norm": 7.533744064858183e-05, |
| "learning_rate": 1.8326693227091634e-06, |
| "logits/chosen": -35.47425079345703, |
| "logits/rejected": -37.82908630371094, |
| "logps/chosen": -252.47488403320312, |
| "logps/rejected": -320.1539306640625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.538949012756348, |
| "rewards/margins": 24.637981414794922, |
| "rewards/rejected": -30.176931381225586, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.819848975188781, |
| "grad_norm": 1.6146932466654107e-05, |
| "learning_rate": 1.7994687915006643e-06, |
| "logits/chosen": -36.48265838623047, |
| "logits/rejected": -38.716392517089844, |
| "logps/chosen": -251.6239776611328, |
| "logps/rejected": -321.3429870605469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.724469184875488, |
| "rewards/margins": 25.172916412353516, |
| "rewards/rejected": -31.897384643554688, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.8231682018089785, |
| "grad_norm": 7.460760389221832e-05, |
| "learning_rate": 1.766268260292165e-06, |
| "logits/chosen": -36.723182678222656, |
| "logits/rejected": -39.13579177856445, |
| "logps/chosen": -253.5547332763672, |
| "logps/rejected": -322.19049072265625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.035333633422852, |
| "rewards/margins": 25.05792236328125, |
| "rewards/rejected": -31.093252182006836, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.826487428429176, |
| "grad_norm": 7.737757186987437e-06, |
| "learning_rate": 1.7330677290836656e-06, |
| "logits/chosen": -36.27003479003906, |
| "logits/rejected": -38.642295837402344, |
| "logps/chosen": -245.1279754638672, |
| "logps/rejected": -313.0539855957031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.8328351974487305, |
| "rewards/margins": 24.66913414001465, |
| "rewards/rejected": -30.501968383789062, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.8298066550493735, |
| "grad_norm": 2.845164817699697e-05, |
| "learning_rate": 1.699867197875166e-06, |
| "logits/chosen": -35.327457427978516, |
| "logits/rejected": -37.247886657714844, |
| "logps/chosen": -250.5523681640625, |
| "logps/rejected": -317.6628112792969, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.3366007804870605, |
| "rewards/margins": 24.55581283569336, |
| "rewards/rejected": -29.89241600036621, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.833125881669571, |
| "grad_norm": 6.5803114921436645e-06, |
| "learning_rate": 1.6666666666666667e-06, |
| "logits/chosen": -34.98641586303711, |
| "logits/rejected": -37.47605514526367, |
| "logps/chosen": -256.0675354003906, |
| "logps/rejected": -325.28607177734375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.059304237365723, |
| "rewards/margins": 25.124370574951172, |
| "rewards/rejected": -31.183673858642578, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.8364451082897685, |
| "grad_norm": 0.00010842137999134138, |
| "learning_rate": 1.6334661354581673e-06, |
| "logits/chosen": -35.48594284057617, |
| "logits/rejected": -37.64731979370117, |
| "logps/chosen": -291.96661376953125, |
| "logps/rejected": -362.49005126953125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.917998313903809, |
| "rewards/margins": 25.65730857849121, |
| "rewards/rejected": -32.5753059387207, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.8397643349099659, |
| "grad_norm": 3.7938156083328067e-07, |
| "learning_rate": 1.6002656042496682e-06, |
| "logits/chosen": -37.083927154541016, |
| "logits/rejected": -39.340457916259766, |
| "logps/chosen": -252.4654083251953, |
| "logps/rejected": -320.502685546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.359490394592285, |
| "rewards/margins": 24.804195404052734, |
| "rewards/rejected": -31.163684844970703, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.8430835615301635, |
| "grad_norm": 6.859098357381299e-05, |
| "learning_rate": 1.5670650730411688e-06, |
| "logits/chosen": -36.62004470825195, |
| "logits/rejected": -39.08134460449219, |
| "logps/chosen": -271.40533447265625, |
| "logps/rejected": -341.4969177246094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.194591522216797, |
| "rewards/margins": 25.380462646484375, |
| "rewards/rejected": -32.575050354003906, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.846402788150361, |
| "grad_norm": 3.553018541424535e-05, |
| "learning_rate": 1.5338645418326694e-06, |
| "logits/chosen": -36.68794631958008, |
| "logits/rejected": -39.383785247802734, |
| "logps/chosen": -257.90716552734375, |
| "logps/rejected": -327.06610107421875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.333869934082031, |
| "rewards/margins": 25.118257522583008, |
| "rewards/rejected": -31.452129364013672, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.8497220147705584, |
| "grad_norm": 6.490451050922275e-05, |
| "learning_rate": 1.50066401062417e-06, |
| "logits/chosen": -37.411705017089844, |
| "logits/rejected": -39.78534698486328, |
| "logps/chosen": -234.9873046875, |
| "logps/rejected": -303.38116455078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.998685359954834, |
| "rewards/margins": 24.948780059814453, |
| "rewards/rejected": -29.947467803955078, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.853041241390756, |
| "grad_norm": 1.5090402484929655e-05, |
| "learning_rate": 1.467463479415671e-06, |
| "logits/chosen": -36.00045394897461, |
| "logits/rejected": -37.93694305419922, |
| "logps/chosen": -231.26699829101562, |
| "logps/rejected": -297.6919860839844, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.228448867797852, |
| "rewards/margins": 24.28873062133789, |
| "rewards/rejected": -29.517181396484375, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.8563604680109534, |
| "grad_norm": 2.468251523168874e-06, |
| "learning_rate": 1.4342629482071716e-06, |
| "logits/chosen": -35.700321197509766, |
| "logits/rejected": -37.91484451293945, |
| "logps/chosen": -249.6962127685547, |
| "logps/rejected": -317.5003356933594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.312900066375732, |
| "rewards/margins": 24.76122283935547, |
| "rewards/rejected": -30.07412338256836, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.8596796946311509, |
| "grad_norm": 1.049622710525e-06, |
| "learning_rate": 1.401062416998672e-06, |
| "logits/chosen": -36.12137985229492, |
| "logits/rejected": -38.40441131591797, |
| "logps/chosen": -256.86279296875, |
| "logps/rejected": -325.9319763183594, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.079124927520752, |
| "rewards/margins": 25.098485946655273, |
| "rewards/rejected": -31.1776123046875, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.8629989212513485, |
| "grad_norm": 5.486945610755356e-06, |
| "learning_rate": 1.3678618857901727e-06, |
| "logits/chosen": -36.008670806884766, |
| "logits/rejected": -38.78040313720703, |
| "logps/chosen": -274.86517333984375, |
| "logps/rejected": -347.06109619140625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.957380771636963, |
| "rewards/margins": 26.006641387939453, |
| "rewards/rejected": -33.96402359008789, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.8663181478715459, |
| "grad_norm": 1.6589292499702424e-06, |
| "learning_rate": 1.3346613545816733e-06, |
| "logits/chosen": -36.0199089050293, |
| "logits/rejected": -38.339603424072266, |
| "logps/chosen": -250.30520629882812, |
| "logps/rejected": -319.3824768066406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.83938455581665, |
| "rewards/margins": 25.026578903198242, |
| "rewards/rejected": -30.865966796875, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.8696373744917434, |
| "grad_norm": 2.6869927296502283e-06, |
| "learning_rate": 1.301460823373174e-06, |
| "logits/chosen": -35.62989044189453, |
| "logits/rejected": -38.205936431884766, |
| "logps/chosen": -257.1199035644531, |
| "logps/rejected": -323.7815246582031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.572248458862305, |
| "rewards/margins": 24.289873123168945, |
| "rewards/rejected": -29.862117767333984, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.872956601111941, |
| "grad_norm": 6.404624582501128e-05, |
| "learning_rate": 1.2682602921646748e-06, |
| "logits/chosen": -35.45529556274414, |
| "logits/rejected": -37.626609802246094, |
| "logps/chosen": -256.4237976074219, |
| "logps/rejected": -324.92919921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.755781173706055, |
| "rewards/margins": 24.921703338623047, |
| "rewards/rejected": -30.6774845123291, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.8762758277321384, |
| "grad_norm": 9.178786058328114e-06, |
| "learning_rate": 1.2350597609561754e-06, |
| "logits/chosen": -36.738868713378906, |
| "logits/rejected": -39.251304626464844, |
| "logps/chosen": -261.7998962402344, |
| "logps/rejected": -330.28558349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.661735534667969, |
| "rewards/margins": 24.9061222076416, |
| "rewards/rejected": -30.567859649658203, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.8795950543523359, |
| "grad_norm": 2.6355290174251422e-05, |
| "learning_rate": 1.201859229747676e-06, |
| "logits/chosen": -36.43751907348633, |
| "logits/rejected": -38.71375274658203, |
| "logps/chosen": -235.941162109375, |
| "logps/rejected": -302.32415771484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -4.947510719299316, |
| "rewards/margins": 24.280080795288086, |
| "rewards/rejected": -29.227588653564453, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.8829142809725334, |
| "grad_norm": 5.58648844162235e-06, |
| "learning_rate": 1.1686586985391767e-06, |
| "logits/chosen": -36.9846305847168, |
| "logits/rejected": -39.65496826171875, |
| "logps/chosen": -230.78744506835938, |
| "logps/rejected": -299.64617919921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.086447238922119, |
| "rewards/margins": 25.012691497802734, |
| "rewards/rejected": -30.099136352539062, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.8862335075927309, |
| "grad_norm": 7.070900755934417e-05, |
| "learning_rate": 1.1354581673306774e-06, |
| "logits/chosen": -36.42656707763672, |
| "logits/rejected": -38.978431701660156, |
| "logps/chosen": -281.9248046875, |
| "logps/rejected": -353.60308837890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.565675258636475, |
| "rewards/margins": 25.867778778076172, |
| "rewards/rejected": -33.433448791503906, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.8895527342129284, |
| "grad_norm": 6.121000296843704e-06, |
| "learning_rate": 1.102257636122178e-06, |
| "logits/chosen": -36.232933044433594, |
| "logits/rejected": -38.41249084472656, |
| "logps/chosen": -255.221435546875, |
| "logps/rejected": -324.15008544921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.308706760406494, |
| "rewards/margins": 25.112064361572266, |
| "rewards/rejected": -31.4207763671875, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.8928719608331259, |
| "grad_norm": 4.801144655175449e-07, |
| "learning_rate": 1.0690571049136787e-06, |
| "logits/chosen": -36.57440185546875, |
| "logits/rejected": -38.97066116333008, |
| "logps/chosen": -274.68109130859375, |
| "logps/rejected": -343.2183837890625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -8.028226852416992, |
| "rewards/margins": 24.872940063476562, |
| "rewards/rejected": -32.90116500854492, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.8961911874533234, |
| "grad_norm": 9.706584933155682e-06, |
| "learning_rate": 1.0358565737051795e-06, |
| "logits/chosen": -35.043174743652344, |
| "logits/rejected": -37.608097076416016, |
| "logps/chosen": -270.731689453125, |
| "logps/rejected": -338.45623779296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.948904514312744, |
| "rewards/margins": 24.692392349243164, |
| "rewards/rejected": -31.64129638671875, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.8995104140735208, |
| "grad_norm": 1.1282833838777151e-05, |
| "learning_rate": 1.00265604249668e-06, |
| "logits/chosen": -36.31340026855469, |
| "logits/rejected": -38.685543060302734, |
| "logps/chosen": -244.37234497070312, |
| "logps/rejected": -312.83197021484375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.175669193267822, |
| "rewards/margins": 24.857559204101562, |
| "rewards/rejected": -31.03322410583496, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.9028296406937184, |
| "grad_norm": 2.1625977751682512e-05, |
| "learning_rate": 9.694555112881806e-07, |
| "logits/chosen": -36.470420837402344, |
| "logits/rejected": -38.7746696472168, |
| "logps/chosen": -256.0016174316406, |
| "logps/rejected": -326.2404479980469, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.9281110763549805, |
| "rewards/margins": 25.440807342529297, |
| "rewards/rejected": -32.368919372558594, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.9061488673139159, |
| "grad_norm": 6.486946222139522e-05, |
| "learning_rate": 9.362549800796813e-07, |
| "logits/chosen": -36.24733352661133, |
| "logits/rejected": -38.840904235839844, |
| "logps/chosen": -256.3532409667969, |
| "logps/rejected": -329.0278015136719, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.113903045654297, |
| "rewards/margins": 26.07107162475586, |
| "rewards/rejected": -33.184974670410156, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.9094680939341133, |
| "grad_norm": 1.6229272659984417e-05, |
| "learning_rate": 9.030544488711821e-07, |
| "logits/chosen": -35.76521682739258, |
| "logits/rejected": -38.19070053100586, |
| "logps/chosen": -270.4026184082031, |
| "logps/rejected": -339.18194580078125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.0731916427612305, |
| "rewards/margins": 24.943195343017578, |
| "rewards/rejected": -32.01638412475586, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.9127873205543109, |
| "grad_norm": 0.0001113278340199031, |
| "learning_rate": 8.698539176626827e-07, |
| "logits/chosen": -37.50344467163086, |
| "logits/rejected": -40.08824920654297, |
| "logps/chosen": -250.40576171875, |
| "logps/rejected": -319.7777404785156, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.421402931213379, |
| "rewards/margins": 25.24834442138672, |
| "rewards/rejected": -31.669748306274414, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.9161065471745083, |
| "grad_norm": 6.062048214516835e-07, |
| "learning_rate": 8.366533864541833e-07, |
| "logits/chosen": -35.75933074951172, |
| "logits/rejected": -38.32414627075195, |
| "logps/chosen": -278.0367431640625, |
| "logps/rejected": -348.5729064941406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.881424903869629, |
| "rewards/margins": 25.48324966430664, |
| "rewards/rejected": -33.36467361450195, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.9194257737947058, |
| "grad_norm": 9.18797104532132e-06, |
| "learning_rate": 8.03452855245684e-07, |
| "logits/chosen": -36.331016540527344, |
| "logits/rejected": -38.755123138427734, |
| "logps/chosen": -277.12640380859375, |
| "logps/rejected": -348.4110412597656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.622500419616699, |
| "rewards/margins": 25.761404037475586, |
| "rewards/rejected": -33.38390350341797, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.9227450004149034, |
| "grad_norm": 3.350157567183487e-05, |
| "learning_rate": 7.702523240371847e-07, |
| "logits/chosen": -34.45648193359375, |
| "logits/rejected": -36.82111358642578, |
| "logps/chosen": -268.44903564453125, |
| "logps/rejected": -338.30029296875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.01177978515625, |
| "rewards/margins": 25.25690460205078, |
| "rewards/rejected": -32.26868438720703, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.9260642270351008, |
| "grad_norm": 5.719254113500938e-05, |
| "learning_rate": 7.370517928286854e-07, |
| "logits/chosen": -36.46429443359375, |
| "logits/rejected": -39.10625457763672, |
| "logps/chosen": -228.64663696289062, |
| "logps/rejected": -293.049560546875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.573931694030762, |
| "rewards/margins": 23.767658233642578, |
| "rewards/rejected": -29.341588973999023, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.9293834536552983, |
| "grad_norm": 0.00011925002763746306, |
| "learning_rate": 7.03851261620186e-07, |
| "logits/chosen": -36.33781814575195, |
| "logits/rejected": -39.13783645629883, |
| "logps/chosen": -238.5530548095703, |
| "logps/rejected": -305.1521911621094, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.77554988861084, |
| "rewards/margins": 24.387807846069336, |
| "rewards/rejected": -30.16335678100586, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.9327026802754959, |
| "grad_norm": 5.343552402337082e-06, |
| "learning_rate": 6.706507304116866e-07, |
| "logits/chosen": -36.91948318481445, |
| "logits/rejected": -39.26295471191406, |
| "logps/chosen": -257.33135986328125, |
| "logps/rejected": -323.4559326171875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.2154645919799805, |
| "rewards/margins": 24.268310546875, |
| "rewards/rejected": -30.483774185180664, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.9360219068956933, |
| "grad_norm": 2.8307771572144702e-05, |
| "learning_rate": 6.374501992031873e-07, |
| "logits/chosen": -34.397552490234375, |
| "logits/rejected": -37.383384704589844, |
| "logps/chosen": -290.0972900390625, |
| "logps/rejected": -361.43756103515625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -8.181169509887695, |
| "rewards/margins": 25.659854888916016, |
| "rewards/rejected": -33.841026306152344, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.9393411335158908, |
| "grad_norm": 5.62709647056181e-06, |
| "learning_rate": 6.04249667994688e-07, |
| "logits/chosen": -35.95421600341797, |
| "logits/rejected": -38.090457916259766, |
| "logps/chosen": -290.0306091308594, |
| "logps/rejected": -362.3145446777344, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -8.559988975524902, |
| "rewards/margins": 26.056650161743164, |
| "rewards/rejected": -34.61663818359375, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.9426603601360883, |
| "grad_norm": 2.8976512112421915e-05, |
| "learning_rate": 5.710491367861886e-07, |
| "logits/chosen": -36.20732879638672, |
| "logits/rejected": -38.5179557800293, |
| "logps/chosen": -271.90667724609375, |
| "logps/rejected": -342.0574035644531, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.341311454772949, |
| "rewards/margins": 25.435096740722656, |
| "rewards/rejected": -32.77640914916992, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.9459795867562858, |
| "grad_norm": 8.338892257597763e-06, |
| "learning_rate": 5.378486055776893e-07, |
| "logits/chosen": -35.963661193847656, |
| "logits/rejected": -38.3126106262207, |
| "logps/chosen": -245.7353057861328, |
| "logps/rejected": -313.90399169921875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.468613624572754, |
| "rewards/margins": 24.895671844482422, |
| "rewards/rejected": -30.364282608032227, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.9492988133764833, |
| "grad_norm": 2.6563682240521302e-06, |
| "learning_rate": 5.046480743691899e-07, |
| "logits/chosen": -34.87287139892578, |
| "logits/rejected": -37.09748458862305, |
| "logps/chosen": -276.9396057128906, |
| "logps/rejected": -347.16046142578125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.051947116851807, |
| "rewards/margins": 25.537128448486328, |
| "rewards/rejected": -32.589073181152344, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.9526180399966808, |
| "grad_norm": 3.179600525982096e-06, |
| "learning_rate": 4.714475431606906e-07, |
| "logits/chosen": -35.76880645751953, |
| "logits/rejected": -38.161956787109375, |
| "logps/chosen": -262.0658874511719, |
| "logps/rejected": -327.91729736328125, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.562504768371582, |
| "rewards/margins": 24.080707550048828, |
| "rewards/rejected": -30.643213272094727, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.9559372666168783, |
| "grad_norm": 2.855755155906081e-06, |
| "learning_rate": 4.382470119521913e-07, |
| "logits/chosen": -35.748382568359375, |
| "logits/rejected": -38.125755310058594, |
| "logps/chosen": -239.87118530273438, |
| "logps/rejected": -305.96588134765625, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.7444939613342285, |
| "rewards/margins": 24.06570816040039, |
| "rewards/rejected": -29.81020164489746, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.9592564932370757, |
| "grad_norm": 0.0001762977335602045, |
| "learning_rate": 4.0504648074369194e-07, |
| "logits/chosen": -36.54397201538086, |
| "logits/rejected": -39.024314880371094, |
| "logps/chosen": -240.02841186523438, |
| "logps/rejected": -306.2109680175781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.792388916015625, |
| "rewards/margins": 24.234182357788086, |
| "rewards/rejected": -30.026575088500977, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.9625757198572733, |
| "grad_norm": 4.043288208777085e-05, |
| "learning_rate": 3.718459495351926e-07, |
| "logits/chosen": -36.67218780517578, |
| "logits/rejected": -39.409828186035156, |
| "logps/chosen": -228.93173217773438, |
| "logps/rejected": -295.4620666503906, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -5.035333156585693, |
| "rewards/margins": 24.397380828857422, |
| "rewards/rejected": -29.43271255493164, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.9658949464774708, |
| "grad_norm": 3.066948920604773e-06, |
| "learning_rate": 3.3864541832669323e-07, |
| "logits/chosen": -36.082374572753906, |
| "logits/rejected": -38.355224609375, |
| "logps/chosen": -268.31268310546875, |
| "logps/rejected": -339.3592834472656, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.250875949859619, |
| "rewards/margins": 25.694320678710938, |
| "rewards/rejected": -32.94519805908203, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.9692141730976682, |
| "grad_norm": 1.1050363355025183e-05, |
| "learning_rate": 3.054448871181939e-07, |
| "logits/chosen": -36.860496520996094, |
| "logits/rejected": -39.4510383605957, |
| "logps/chosen": -244.20462036132812, |
| "logps/rejected": -314.990234375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.0202860832214355, |
| "rewards/margins": 25.554231643676758, |
| "rewards/rejected": -31.57451820373535, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.9725333997178658, |
| "grad_norm": 4.18513263866771e-06, |
| "learning_rate": 2.7224435590969457e-07, |
| "logits/chosen": -36.25141143798828, |
| "logits/rejected": -38.87180709838867, |
| "logps/chosen": -242.8925323486328, |
| "logps/rejected": -312.47308349609375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.114789962768555, |
| "rewards/margins": 25.346786499023438, |
| "rewards/rejected": -31.46157455444336, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.9758526263380632, |
| "grad_norm": 1.5998368326108903e-05, |
| "learning_rate": 2.390438247011952e-07, |
| "logits/chosen": -35.24533462524414, |
| "logits/rejected": -37.914817810058594, |
| "logps/chosen": -279.0862121582031, |
| "logps/rejected": -350.7375793457031, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.582180023193359, |
| "rewards/margins": 25.864376068115234, |
| "rewards/rejected": -33.446556091308594, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.9791718529582607, |
| "grad_norm": 1.7516629213787382e-06, |
| "learning_rate": 2.0584329349269588e-07, |
| "logits/chosen": -37.73789978027344, |
| "logits/rejected": -40.67639923095703, |
| "logps/chosen": -247.8397216796875, |
| "logps/rejected": -319.43109130859375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.405348777770996, |
| "rewards/margins": 25.796106338500977, |
| "rewards/rejected": -32.201454162597656, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.9824910795784583, |
| "grad_norm": 7.380790975730633e-06, |
| "learning_rate": 1.7264276228419655e-07, |
| "logits/chosen": -36.16815948486328, |
| "logits/rejected": -38.34816360473633, |
| "logps/chosen": -255.6117706298828, |
| "logps/rejected": -326.8828430175781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.3727617263793945, |
| "rewards/margins": 25.710683822631836, |
| "rewards/rejected": -33.08344650268555, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.9858103061986557, |
| "grad_norm": 3.3136252568510827e-06, |
| "learning_rate": 1.3944223107569722e-07, |
| "logits/chosen": -34.815086364746094, |
| "logits/rejected": -37.27305603027344, |
| "logps/chosen": -257.39678955078125, |
| "logps/rejected": -325.2138671875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.813126564025879, |
| "rewards/margins": 24.72214698791504, |
| "rewards/rejected": -31.5352725982666, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.9891295328188532, |
| "grad_norm": 2.209369449701626e-05, |
| "learning_rate": 1.0624169986719788e-07, |
| "logits/chosen": -35.99190902709961, |
| "logits/rejected": -38.414974212646484, |
| "logps/chosen": -275.14166259765625, |
| "logps/rejected": -346.3056945800781, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.347718715667725, |
| "rewards/margins": 25.74213218688965, |
| "rewards/rejected": -33.08985137939453, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.9924487594390508, |
| "grad_norm": 4.2861594806709036e-07, |
| "learning_rate": 7.304116865869855e-08, |
| "logits/chosen": -36.379974365234375, |
| "logits/rejected": -38.90006637573242, |
| "logps/chosen": -258.8759765625, |
| "logps/rejected": -328.20111083984375, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -7.374964714050293, |
| "rewards/margins": 25.241411209106445, |
| "rewards/rejected": -32.61638259887695, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.9957679860592482, |
| "grad_norm": 3.627765181590803e-06, |
| "learning_rate": 3.984063745019921e-08, |
| "logits/chosen": -35.75197982788086, |
| "logits/rejected": -38.48480987548828, |
| "logps/chosen": -257.9820861816406, |
| "logps/rejected": -326.41229248046875, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.912039279937744, |
| "rewards/margins": 24.800914764404297, |
| "rewards/rejected": -31.712955474853516, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.9990872126794457, |
| "grad_norm": 8.645590241940226e-06, |
| "learning_rate": 6.640106241699867e-09, |
| "logits/chosen": -35.83268356323242, |
| "logits/rejected": -38.067604064941406, |
| "logps/chosen": -274.35833740234375, |
| "logps/rejected": -341.5016174316406, |
| "loss": 0.0, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -6.786323547363281, |
| "rewards/margins": 24.36429786682129, |
| "rewards/rejected": -31.150623321533203, |
| "step": 3010 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 3012, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|