| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.9978094194961664, | |
| "eval_steps": 50000, | |
| "global_step": 1216, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.008214676889375685, | |
| "grad_norm": 47.48785366410319, | |
| "learning_rate": 4.0983606557377046e-08, | |
| "logits/chosen": 26.403932571411133, | |
| "logits/rejected": 25.755094528198242, | |
| "logps/chosen": -185.5782928466797, | |
| "logps/rejected": -79.66442108154297, | |
| "loss": 1.7879, | |
| "rewards/accuracies": 0.30666670203208923, | |
| "rewards/chosen": 0.008285612799227238, | |
| "rewards/margins": 0.017053820192813873, | |
| "rewards/rejected": -0.008768204599618912, | |
| "sft_loss": 0.6387583017349243, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01642935377875137, | |
| "grad_norm": 36.134481992571715, | |
| "learning_rate": 8.196721311475409e-08, | |
| "logits/chosen": 25.775484085083008, | |
| "logits/rejected": 25.31159210205078, | |
| "logps/chosen": -152.4672088623047, | |
| "logps/rejected": -72.757080078125, | |
| "loss": 1.6789, | |
| "rewards/accuracies": 0.7333334684371948, | |
| "rewards/chosen": -0.026889141649007797, | |
| "rewards/margins": 0.14848218858242035, | |
| "rewards/rejected": -0.17537136375904083, | |
| "sft_loss": 0.6469724774360657, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.024644030668127054, | |
| "grad_norm": 19.978551205164187, | |
| "learning_rate": 1.2295081967213113e-07, | |
| "logits/chosen": 26.670787811279297, | |
| "logits/rejected": 26.257781982421875, | |
| "logps/chosen": -176.73304748535156, | |
| "logps/rejected": -84.2028579711914, | |
| "loss": 1.4459, | |
| "rewards/accuracies": 0.8666666746139526, | |
| "rewards/chosen": -0.1812039315700531, | |
| "rewards/margins": 0.5640282034873962, | |
| "rewards/rejected": -0.7452322244644165, | |
| "sft_loss": 0.6364741921424866, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03285870755750274, | |
| "grad_norm": 20.48799482343835, | |
| "learning_rate": 1.6393442622950818e-07, | |
| "logits/chosen": 26.263166427612305, | |
| "logits/rejected": 26.03022003173828, | |
| "logps/chosen": -214.57823181152344, | |
| "logps/rejected": -111.45527648925781, | |
| "loss": 1.316, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -0.642541766166687, | |
| "rewards/margins": 1.2724699974060059, | |
| "rewards/rejected": -1.9150116443634033, | |
| "sft_loss": 0.7241686582565308, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04107338444687842, | |
| "grad_norm": 24.43893120773317, | |
| "learning_rate": 2.0491803278688524e-07, | |
| "logits/chosen": 25.63840103149414, | |
| "logits/rejected": 25.88968849182129, | |
| "logps/chosen": -180.67430114746094, | |
| "logps/rejected": -108.99486541748047, | |
| "loss": 1.26, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -0.9794896245002747, | |
| "rewards/margins": 1.706125020980835, | |
| "rewards/rejected": -2.6856143474578857, | |
| "sft_loss": 0.7140628695487976, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04928806133625411, | |
| "grad_norm": 15.575922163206743, | |
| "learning_rate": 2.4590163934426226e-07, | |
| "logits/chosen": 25.174482345581055, | |
| "logits/rejected": 25.23969841003418, | |
| "logps/chosen": -213.48123168945312, | |
| "logps/rejected": -114.4116439819336, | |
| "loss": 1.1511, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -0.9717932343482971, | |
| "rewards/margins": 2.3237061500549316, | |
| "rewards/rejected": -3.295499563217163, | |
| "sft_loss": 0.6879211664199829, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05750273822562979, | |
| "grad_norm": 12.317269413176323, | |
| "learning_rate": 2.868852459016393e-07, | |
| "logits/chosen": 24.615764617919922, | |
| "logits/rejected": 24.808069229125977, | |
| "logps/chosen": -202.15489196777344, | |
| "logps/rejected": -124.00420379638672, | |
| "loss": 1.0435, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -1.0643211603164673, | |
| "rewards/margins": 2.588770627975464, | |
| "rewards/rejected": -3.6530916690826416, | |
| "sft_loss": 0.7430208325386047, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.06571741511500548, | |
| "grad_norm": 11.98913328054039, | |
| "learning_rate": 3.2786885245901637e-07, | |
| "logits/chosen": 24.245140075683594, | |
| "logits/rejected": 24.268098831176758, | |
| "logps/chosen": -207.348876953125, | |
| "logps/rejected": -116.2168960571289, | |
| "loss": 0.9343, | |
| "rewards/accuracies": 0.9333333969116211, | |
| "rewards/chosen": -1.0519558191299438, | |
| "rewards/margins": 2.5677475929260254, | |
| "rewards/rejected": -3.619703531265259, | |
| "sft_loss": 0.7124413251876831, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07393209200438117, | |
| "grad_norm": 11.849547311712948, | |
| "learning_rate": 3.6885245901639347e-07, | |
| "logits/chosen": 22.61182403564453, | |
| "logits/rejected": 22.616382598876953, | |
| "logps/chosen": -222.93838500976562, | |
| "logps/rejected": -123.43074798583984, | |
| "loss": 0.8683, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -1.442903995513916, | |
| "rewards/margins": 2.7517101764678955, | |
| "rewards/rejected": -4.194613456726074, | |
| "sft_loss": 0.702341616153717, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08214676889375684, | |
| "grad_norm": 11.859772629239353, | |
| "learning_rate": 4.0983606557377047e-07, | |
| "logits/chosen": 20.62839126586914, | |
| "logits/rejected": 20.336801528930664, | |
| "logps/chosen": -241.59852600097656, | |
| "logps/rejected": -132.82681274414062, | |
| "loss": 0.7963, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -2.2198777198791504, | |
| "rewards/margins": 3.0024592876434326, | |
| "rewards/rejected": -5.2223358154296875, | |
| "sft_loss": 0.7061720490455627, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09036144578313253, | |
| "grad_norm": 9.406942779681957, | |
| "learning_rate": 4.508196721311475e-07, | |
| "logits/chosen": 19.715351104736328, | |
| "logits/rejected": 20.35331153869629, | |
| "logps/chosen": -208.7209930419922, | |
| "logps/rejected": -150.72914123535156, | |
| "loss": 0.8148, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -2.6436386108398438, | |
| "rewards/margins": 3.832695722579956, | |
| "rewards/rejected": -6.476334571838379, | |
| "sft_loss": 0.7786983251571655, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.09857612267250822, | |
| "grad_norm": 10.934185099116656, | |
| "learning_rate": 4.918032786885245e-07, | |
| "logits/chosen": 20.9300537109375, | |
| "logits/rejected": 21.388505935668945, | |
| "logps/chosen": -192.5828399658203, | |
| "logps/rejected": -125.1326904296875, | |
| "loss": 0.8114, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -2.3978421688079834, | |
| "rewards/margins": 3.1992502212524414, | |
| "rewards/rejected": -5.597092628479004, | |
| "sft_loss": 0.698898434638977, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10679079956188389, | |
| "grad_norm": 12.361759850691927, | |
| "learning_rate": 4.999852034151641e-07, | |
| "logits/chosen": 19.11568832397461, | |
| "logits/rejected": 19.857196807861328, | |
| "logps/chosen": -242.90460205078125, | |
| "logps/rejected": -149.67938232421875, | |
| "loss": 0.7666, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -2.5540611743927, | |
| "rewards/margins": 3.71470308303833, | |
| "rewards/rejected": -6.268764019012451, | |
| "sft_loss": 0.7993389368057251, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.11500547645125958, | |
| "grad_norm": 14.492049698010485, | |
| "learning_rate": 4.999250952911133e-07, | |
| "logits/chosen": 20.96298599243164, | |
| "logits/rejected": 20.906280517578125, | |
| "logps/chosen": -236.47763061523438, | |
| "logps/rejected": -142.59445190429688, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -2.4225642681121826, | |
| "rewards/margins": 4.021193981170654, | |
| "rewards/rejected": -6.4437575340271, | |
| "sft_loss": 0.8038942217826843, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.12322015334063527, | |
| "grad_norm": 17.551745284718073, | |
| "learning_rate": 4.998187619501184e-07, | |
| "logits/chosen": 20.637529373168945, | |
| "logits/rejected": 21.148029327392578, | |
| "logps/chosen": -266.9391784667969, | |
| "logps/rejected": -173.1654510498047, | |
| "loss": 0.6651, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -3.129103660583496, | |
| "rewards/margins": 5.091865062713623, | |
| "rewards/rejected": -8.220968246459961, | |
| "sft_loss": 0.8789225816726685, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13143483023001096, | |
| "grad_norm": 21.266109357356587, | |
| "learning_rate": 4.996662230591989e-07, | |
| "logits/chosen": 18.540781021118164, | |
| "logits/rejected": 19.185565948486328, | |
| "logps/chosen": -252.1251983642578, | |
| "logps/rejected": -169.13851928710938, | |
| "loss": 0.706, | |
| "rewards/accuracies": 0.9333333969116211, | |
| "rewards/chosen": -3.4190313816070557, | |
| "rewards/margins": 4.7408881187438965, | |
| "rewards/rejected": -8.159918785095215, | |
| "sft_loss": 0.8200284242630005, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.13964950711938665, | |
| "grad_norm": 14.68921798619268, | |
| "learning_rate": 4.994675068313813e-07, | |
| "logits/chosen": 17.844524383544922, | |
| "logits/rejected": 19.307209014892578, | |
| "logps/chosen": -235.93295288085938, | |
| "logps/rejected": -164.65467834472656, | |
| "loss": 0.6425, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -3.202953577041626, | |
| "rewards/margins": 4.453563213348389, | |
| "rewards/rejected": -7.656517028808594, | |
| "sft_loss": 0.8084096908569336, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.14786418400876233, | |
| "grad_norm": 9.391954380287526, | |
| "learning_rate": 4.992226500204806e-07, | |
| "logits/chosen": 18.810604095458984, | |
| "logits/rejected": 19.509326934814453, | |
| "logps/chosen": -239.79638671875, | |
| "logps/rejected": -149.21372985839844, | |
| "loss": 0.6741, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -2.8668696880340576, | |
| "rewards/margins": 4.099938869476318, | |
| "rewards/rejected": -6.966808795928955, | |
| "sft_loss": 0.8505186438560486, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.156078860898138, | |
| "grad_norm": 8.292078325061183, | |
| "learning_rate": 4.989316979143029e-07, | |
| "logits/chosen": 19.036439895629883, | |
| "logits/rejected": 18.50504493713379, | |
| "logps/chosen": -243.55430603027344, | |
| "logps/rejected": -141.56640625, | |
| "loss": 0.7786, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -2.8055150508880615, | |
| "rewards/margins": 4.023467540740967, | |
| "rewards/rejected": -6.828982830047607, | |
| "sft_loss": 0.8537193536758423, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.16429353778751368, | |
| "grad_norm": 11.759496127210191, | |
| "learning_rate": 4.985947043262686e-07, | |
| "logits/chosen": 18.438268661499023, | |
| "logits/rejected": 18.92384147644043, | |
| "logps/chosen": -256.82135009765625, | |
| "logps/rejected": -162.3760223388672, | |
| "loss": 0.656, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -3.251594066619873, | |
| "rewards/margins": 4.7661452293396, | |
| "rewards/rejected": -8.017740249633789, | |
| "sft_loss": 0.8523219227790833, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17250821467688937, | |
| "grad_norm": 13.225983298656475, | |
| "learning_rate": 4.982117315854593e-07, | |
| "logits/chosen": 19.018491744995117, | |
| "logits/rejected": 19.4432373046875, | |
| "logps/chosen": -242.88742065429688, | |
| "logps/rejected": -160.6437225341797, | |
| "loss": 0.6173, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -3.371706962585449, | |
| "rewards/margins": 4.9150261878967285, | |
| "rewards/rejected": -8.286733627319336, | |
| "sft_loss": 0.8633176684379578, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.18072289156626506, | |
| "grad_norm": 33.60275949690272, | |
| "learning_rate": 4.977828505250903e-07, | |
| "logits/chosen": 18.26275062561035, | |
| "logits/rejected": 18.561012268066406, | |
| "logps/chosen": -232.76333618164062, | |
| "logps/rejected": -153.5156707763672, | |
| "loss": 0.6725, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -3.7783102989196777, | |
| "rewards/margins": 4.282144069671631, | |
| "rewards/rejected": -8.060454368591309, | |
| "sft_loss": 0.8514001369476318, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.18893756845564075, | |
| "grad_norm": 29.7322754671122, | |
| "learning_rate": 4.973081404694087e-07, | |
| "logits/chosen": 17.40985679626465, | |
| "logits/rejected": 18.532135009765625, | |
| "logps/chosen": -263.5098571777344, | |
| "logps/rejected": -179.07461547851562, | |
| "loss": 0.6416, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -4.028925895690918, | |
| "rewards/margins": 5.305994033813477, | |
| "rewards/rejected": -9.334918975830078, | |
| "sft_loss": 0.9138454794883728, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.19715224534501644, | |
| "grad_norm": 11.64226677208211, | |
| "learning_rate": 4.967876892190227e-07, | |
| "logits/chosen": 18.535491943359375, | |
| "logits/rejected": 18.528560638427734, | |
| "logps/chosen": -261.1396484375, | |
| "logps/rejected": -164.66261291503906, | |
| "loss": 0.6327, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -3.7333872318267822, | |
| "rewards/margins": 4.9736409187316895, | |
| "rewards/rejected": -8.707027435302734, | |
| "sft_loss": 0.8873167634010315, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.20536692223439212, | |
| "grad_norm": 10.419489404925384, | |
| "learning_rate": 4.962215930346614e-07, | |
| "logits/chosen": 18.076738357543945, | |
| "logits/rejected": 18.797412872314453, | |
| "logps/chosen": -240.43885803222656, | |
| "logps/rejected": -170.57994079589844, | |
| "loss": 0.6021, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -3.8788936138153076, | |
| "rewards/margins": 5.037622928619385, | |
| "rewards/rejected": -8.91651725769043, | |
| "sft_loss": 0.8787587285041809, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.21358159912376778, | |
| "grad_norm": 13.538147865005195, | |
| "learning_rate": 4.956099566193716e-07, | |
| "logits/chosen": 17.794748306274414, | |
| "logits/rejected": 18.117393493652344, | |
| "logps/chosen": -263.0421447753906, | |
| "logps/rejected": -180.68548583984375, | |
| "loss": 0.5662, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.157100677490234, | |
| "rewards/margins": 5.220449924468994, | |
| "rewards/rejected": -9.377551078796387, | |
| "sft_loss": 0.8972741961479187, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.22179627601314347, | |
| "grad_norm": 14.579358533691158, | |
| "learning_rate": 4.949528930991521e-07, | |
| "logits/chosen": 17.554058074951172, | |
| "logits/rejected": 18.180675506591797, | |
| "logps/chosen": -265.0473327636719, | |
| "logps/rejected": -177.85751342773438, | |
| "loss": 0.6399, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -3.8493740558624268, | |
| "rewards/margins": 5.235028266906738, | |
| "rewards/rejected": -9.084402084350586, | |
| "sft_loss": 0.8204969167709351, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.23001095290251916, | |
| "grad_norm": 12.455186291161809, | |
| "learning_rate": 4.9425052400203e-07, | |
| "logits/chosen": 17.611921310424805, | |
| "logits/rejected": 17.878339767456055, | |
| "logps/chosen": -265.25787353515625, | |
| "logps/rejected": -185.50375366210938, | |
| "loss": 0.6103, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -4.896492958068848, | |
| "rewards/margins": 4.968528747558594, | |
| "rewards/rejected": -9.865021705627441, | |
| "sft_loss": 0.8832098245620728, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.23822562979189485, | |
| "grad_norm": 12.455826945227212, | |
| "learning_rate": 4.935029792355834e-07, | |
| "logits/chosen": 17.996692657470703, | |
| "logits/rejected": 18.594377517700195, | |
| "logps/chosen": -286.6059875488281, | |
| "logps/rejected": -200.05149841308594, | |
| "loss": 0.543, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -5.263542175292969, | |
| "rewards/margins": 5.605571269989014, | |
| "rewards/rejected": -10.86911392211914, | |
| "sft_loss": 0.8996745944023132, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.24644030668127054, | |
| "grad_norm": 15.362573644388778, | |
| "learning_rate": 4.927103970629147e-07, | |
| "logits/chosen": 18.072965621948242, | |
| "logits/rejected": 18.25052261352539, | |
| "logps/chosen": -269.8097839355469, | |
| "logps/rejected": -185.32431030273438, | |
| "loss": 0.6219, | |
| "rewards/accuracies": 0.9333333969116211, | |
| "rewards/chosen": -4.9397172927856445, | |
| "rewards/margins": 5.206496715545654, | |
| "rewards/rejected": -10.14621353149414, | |
| "sft_loss": 0.7995728254318237, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.2546549835706462, | |
| "grad_norm": 11.01295684823168, | |
| "learning_rate": 4.918729240770775e-07, | |
| "logits/chosen": 17.353046417236328, | |
| "logits/rejected": 18.587129592895508, | |
| "logps/chosen": -240.89488220214844, | |
| "logps/rejected": -173.4665069580078, | |
| "loss": 0.5702, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -4.678676128387451, | |
| "rewards/margins": 5.200152397155762, | |
| "rewards/rejected": -9.878829002380371, | |
| "sft_loss": 0.9399448037147522, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.2628696604600219, | |
| "grad_norm": 19.41550454155631, | |
| "learning_rate": 4.909907151739633e-07, | |
| "logits/chosen": 18.130189895629883, | |
| "logits/rejected": 18.379247665405273, | |
| "logps/chosen": -292.39990234375, | |
| "logps/rejected": -188.62220764160156, | |
| "loss": 0.6561, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -4.744537353515625, | |
| "rewards/margins": 5.8243794441223145, | |
| "rewards/rejected": -10.568917274475098, | |
| "sft_loss": 0.8947219848632812, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.2710843373493976, | |
| "grad_norm": 11.498941704903908, | |
| "learning_rate": 4.900639335236526e-07, | |
| "logits/chosen": 18.79334259033203, | |
| "logits/rejected": 19.24587059020996, | |
| "logps/chosen": -271.9427185058594, | |
| "logps/rejected": -179.41293334960938, | |
| "loss": 0.607, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -4.425381183624268, | |
| "rewards/margins": 5.343040943145752, | |
| "rewards/rejected": -9.76842212677002, | |
| "sft_loss": 0.9064626097679138, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2792990142387733, | |
| "grad_norm": 10.84500529690262, | |
| "learning_rate": 4.890927505402359e-07, | |
| "logits/chosen": 16.892650604248047, | |
| "logits/rejected": 17.597482681274414, | |
| "logps/chosen": -238.55162048339844, | |
| "logps/rejected": -170.00466918945312, | |
| "loss": 0.5889, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -4.459685802459717, | |
| "rewards/margins": 4.842031002044678, | |
| "rewards/rejected": -9.301715850830078, | |
| "sft_loss": 0.8489271402359009, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.28751369112814895, | |
| "grad_norm": 16.18327307978759, | |
| "learning_rate": 4.880773458501089e-07, | |
| "logits/chosen": 19.4614315032959, | |
| "logits/rejected": 19.801300048828125, | |
| "logps/chosen": -232.73573303222656, | |
| "logps/rejected": -165.04103088378906, | |
| "loss": 0.5662, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -4.241008758544922, | |
| "rewards/margins": 4.859863758087158, | |
| "rewards/rejected": -9.100872993469238, | |
| "sft_loss": 0.8601513504981995, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.29572836801752467, | |
| "grad_norm": 10.178119222467004, | |
| "learning_rate": 4.870179072587498e-07, | |
| "logits/chosen": 17.228599548339844, | |
| "logits/rejected": 17.30803871154785, | |
| "logps/chosen": -250.42587280273438, | |
| "logps/rejected": -171.54635620117188, | |
| "loss": 0.6129, | |
| "rewards/accuracies": 0.9333333373069763, | |
| "rewards/chosen": -5.068057537078857, | |
| "rewards/margins": 5.1040239334106445, | |
| "rewards/rejected": -10.172082901000977, | |
| "sft_loss": 0.9672516584396362, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.30394304490690033, | |
| "grad_norm": 8.317024863573055, | |
| "learning_rate": 4.859146307159841e-07, | |
| "logits/chosen": 18.039478302001953, | |
| "logits/rejected": 18.52968406677246, | |
| "logps/chosen": -248.23155212402344, | |
| "logps/rejected": -179.2881317138672, | |
| "loss": 0.5417, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.143929481506348, | |
| "rewards/margins": 5.1267170906066895, | |
| "rewards/rejected": -10.270648002624512, | |
| "sft_loss": 0.8881379961967468, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.312157721796276, | |
| "grad_norm": 11.89542898588366, | |
| "learning_rate": 4.847677202797414e-07, | |
| "logits/chosen": 18.8001708984375, | |
| "logits/rejected": 19.126699447631836, | |
| "logps/chosen": -263.02789306640625, | |
| "logps/rejected": -183.99911499023438, | |
| "loss": 0.5551, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.0858845710754395, | |
| "rewards/margins": 5.6055006980896, | |
| "rewards/rejected": -10.691385269165039, | |
| "sft_loss": 0.8070122599601746, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.3203723986856517, | |
| "grad_norm": 12.1886798786308, | |
| "learning_rate": 4.835773880783144e-07, | |
| "logits/chosen": 16.390464782714844, | |
| "logits/rejected": 17.854284286499023, | |
| "logps/chosen": -269.9723815917969, | |
| "logps/rejected": -200.60789489746094, | |
| "loss": 0.5446, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.6444902420043945, | |
| "rewards/margins": 6.389484882354736, | |
| "rewards/rejected": -12.033974647521973, | |
| "sft_loss": 0.8605390191078186, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.32858707557502737, | |
| "grad_norm": 11.13911341274398, | |
| "learning_rate": 4.823438542711238e-07, | |
| "logits/chosen": 17.828205108642578, | |
| "logits/rejected": 18.60173797607422, | |
| "logps/chosen": -277.97259521484375, | |
| "logps/rejected": -203.9155731201172, | |
| "loss": 0.5444, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -5.445572853088379, | |
| "rewards/margins": 6.231374740600586, | |
| "rewards/rejected": -11.676946640014648, | |
| "sft_loss": 0.9524543881416321, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3368017524644031, | |
| "grad_norm": 59.69351759377879, | |
| "learning_rate": 4.81067347007999e-07, | |
| "logits/chosen": 18.93602752685547, | |
| "logits/rejected": 19.728424072265625, | |
| "logps/chosen": -247.34567260742188, | |
| "logps/rejected": -173.0783233642578, | |
| "loss": 0.6075, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -4.630053997039795, | |
| "rewards/margins": 4.911181449890137, | |
| "rewards/rejected": -9.54123592376709, | |
| "sft_loss": 0.9002848863601685, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.34501642935377874, | |
| "grad_norm": 8.657351709118194, | |
| "learning_rate": 4.797481023869801e-07, | |
| "logits/chosen": 18.50823974609375, | |
| "logits/rejected": 18.78363037109375, | |
| "logps/chosen": -245.55979919433594, | |
| "logps/rejected": -182.1737518310547, | |
| "loss": 0.5425, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.3583760261535645, | |
| "rewards/margins": 5.406437397003174, | |
| "rewards/rejected": -10.764813423156738, | |
| "sft_loss": 0.9510916471481323, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.35323110624315446, | |
| "grad_norm": 19.28978220217397, | |
| "learning_rate": 4.783863644106502e-07, | |
| "logits/chosen": 17.9003849029541, | |
| "logits/rejected": 19.15799903869629, | |
| "logps/chosen": -240.30958557128906, | |
| "logps/rejected": -187.32284545898438, | |
| "loss": 0.546, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -5.292669296264648, | |
| "rewards/margins": 5.617033004760742, | |
| "rewards/rejected": -10.909701347351074, | |
| "sft_loss": 0.9965067505836487, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3614457831325301, | |
| "grad_norm": 8.529326956491959, | |
| "learning_rate": 4.769823849410053e-07, | |
| "logits/chosen": 15.990920066833496, | |
| "logits/rejected": 17.267040252685547, | |
| "logps/chosen": -283.7446594238281, | |
| "logps/rejected": -209.57525634765625, | |
| "loss": 0.5062, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -5.516228675842285, | |
| "rewards/margins": 6.538068771362305, | |
| "rewards/rejected": -12.054296493530273, | |
| "sft_loss": 0.9376140832901001, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3696604600219058, | |
| "grad_norm": 14.616903394027977, | |
| "learning_rate": 4.7553642365287127e-07, | |
| "logits/chosen": 16.816274642944336, | |
| "logits/rejected": 17.819963455200195, | |
| "logps/chosen": -245.84878540039062, | |
| "logps/rejected": -188.35284423828125, | |
| "loss": 0.5832, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -5.176213264465332, | |
| "rewards/margins": 5.302474498748779, | |
| "rewards/rejected": -10.478687286376953, | |
| "sft_loss": 1.0134352445602417, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3778751369112815, | |
| "grad_norm": 15.650965079934865, | |
| "learning_rate": 4.7404874798587493e-07, | |
| "logits/chosen": 18.04664421081543, | |
| "logits/rejected": 19.232574462890625, | |
| "logps/chosen": -268.1763610839844, | |
| "logps/rejected": -193.1671600341797, | |
| "loss": 0.5248, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -5.173998832702637, | |
| "rewards/margins": 5.888847827911377, | |
| "rewards/rejected": -11.062848091125488, | |
| "sft_loss": 0.9188562035560608, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.38608981380065716, | |
| "grad_norm": 9.155968536476317, | |
| "learning_rate": 4.7251963309497965e-07, | |
| "logits/chosen": 17.16444206237793, | |
| "logits/rejected": 18.188404083251953, | |
| "logps/chosen": -281.6944580078125, | |
| "logps/rejected": -214.91883850097656, | |
| "loss": 0.5831, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -6.146281719207764, | |
| "rewards/margins": 6.598486423492432, | |
| "rewards/rejected": -12.744769096374512, | |
| "sft_loss": 1.0649549961090088, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.39430449069003287, | |
| "grad_norm": 13.480064050397614, | |
| "learning_rate": 4.709493617995938e-07, | |
| "logits/chosen": 18.09016227722168, | |
| "logits/rejected": 18.207592010498047, | |
| "logps/chosen": -278.3957214355469, | |
| "logps/rejected": -195.16822814941406, | |
| "loss": 0.4846, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -5.534964084625244, | |
| "rewards/margins": 6.029869079589844, | |
| "rewards/rejected": -11.564833641052246, | |
| "sft_loss": 0.9166081547737122, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.40251916757940853, | |
| "grad_norm": 8.853519364453792, | |
| "learning_rate": 4.6933822453126114e-07, | |
| "logits/chosen": 17.334672927856445, | |
| "logits/rejected": 18.275968551635742, | |
| "logps/chosen": -229.73594665527344, | |
| "logps/rejected": -182.89251708984375, | |
| "loss": 0.5795, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.739324569702148, | |
| "rewards/margins": 5.62018346786499, | |
| "rewards/rejected": -11.35950756072998, | |
| "sft_loss": 1.0507081747055054, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.41073384446878425, | |
| "grad_norm": 23.105340732527253, | |
| "learning_rate": 4.676865192799443e-07, | |
| "logits/chosen": 18.659299850463867, | |
| "logits/rejected": 19.426942825317383, | |
| "logps/chosen": -310.3028869628906, | |
| "logps/rejected": -233.80967712402344, | |
| "loss": 0.5041, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -7.381309509277344, | |
| "rewards/margins": 6.830047607421875, | |
| "rewards/rejected": -14.211358070373535, | |
| "sft_loss": 0.9847605228424072, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4189485213581599, | |
| "grad_norm": 12.714869052495171, | |
| "learning_rate": 4.65994551538909e-07, | |
| "logits/chosen": 17.69913101196289, | |
| "logits/rejected": 17.626365661621094, | |
| "logps/chosen": -286.1001892089844, | |
| "logps/rejected": -213.40573120117188, | |
| "loss": 0.5671, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -6.464048385620117, | |
| "rewards/margins": 6.595151424407959, | |
| "rewards/rejected": -13.059199333190918, | |
| "sft_loss": 1.0706934928894043, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.42716319824753557, | |
| "grad_norm": 18.758504329716533, | |
| "learning_rate": 4.642626342482215e-07, | |
| "logits/chosen": 17.131309509277344, | |
| "logits/rejected": 17.48920440673828, | |
| "logps/chosen": -231.87130737304688, | |
| "logps/rejected": -174.91970825195312, | |
| "loss": 0.5728, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -4.785408973693848, | |
| "rewards/margins": 5.311648368835449, | |
| "rewards/rejected": -10.097058296203613, | |
| "sft_loss": 0.9056914448738098, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.4353778751369113, | |
| "grad_norm": 16.34948060980786, | |
| "learning_rate": 4.624910877368684e-07, | |
| "logits/chosen": 17.2136287689209, | |
| "logits/rejected": 18.958431243896484, | |
| "logps/chosen": -265.6873474121094, | |
| "logps/rejected": -200.37913513183594, | |
| "loss": 0.5359, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -5.0484795570373535, | |
| "rewards/margins": 6.235151767730713, | |
| "rewards/rejected": -11.283629417419434, | |
| "sft_loss": 0.897827684879303, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.44359255202628695, | |
| "grad_norm": 8.468377967197654, | |
| "learning_rate": 4.606802396635098e-07, | |
| "logits/chosen": 18.035551071166992, | |
| "logits/rejected": 19.360517501831055, | |
| "logps/chosen": -279.75555419921875, | |
| "logps/rejected": -217.06832885742188, | |
| "loss": 0.4866, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -6.324933052062988, | |
| "rewards/margins": 6.823997974395752, | |
| "rewards/rejected": -13.148929595947266, | |
| "sft_loss": 0.9062218070030212, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.45180722891566266, | |
| "grad_norm": 8.67268227774844, | |
| "learning_rate": 4.588304249558763e-07, | |
| "logits/chosen": 17.523601531982422, | |
| "logits/rejected": 17.99420166015625, | |
| "logps/chosen": -290.8741760253906, | |
| "logps/rejected": -215.23924255371094, | |
| "loss": 0.5245, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -6.641848087310791, | |
| "rewards/margins": 6.469297409057617, | |
| "rewards/rejected": -13.11114501953125, | |
| "sft_loss": 0.9921270608901978, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.4600219058050383, | |
| "grad_norm": 12.040362030861928, | |
| "learning_rate": 4.569419857488228e-07, | |
| "logits/chosen": 17.7161808013916, | |
| "logits/rejected": 17.987571716308594, | |
| "logps/chosen": -297.76318359375, | |
| "logps/rejected": -205.46383666992188, | |
| "loss": 0.5407, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -5.587214469909668, | |
| "rewards/margins": 6.473574638366699, | |
| "rewards/rejected": -12.060790061950684, | |
| "sft_loss": 0.9882974028587341, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.46823658269441404, | |
| "grad_norm": 18.26018008195662, | |
| "learning_rate": 4.550152713210478e-07, | |
| "logits/chosen": 17.55337905883789, | |
| "logits/rejected": 18.636327743530273, | |
| "logps/chosen": -247.40650939941406, | |
| "logps/rejected": -190.1718292236328, | |
| "loss": 0.5136, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -5.608924865722656, | |
| "rewards/margins": 5.688971996307373, | |
| "rewards/rejected": -11.297897338867188, | |
| "sft_loss": 0.9460915327072144, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.4764512595837897, | |
| "grad_norm": 12.950141797441761, | |
| "learning_rate": 4.530506380304925e-07, | |
| "logits/chosen": 16.12598419189453, | |
| "logits/rejected": 16.963117599487305, | |
| "logps/chosen": -315.90838623046875, | |
| "logps/rejected": -234.9226837158203, | |
| "loss": 0.5254, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.075807094573975, | |
| "rewards/margins": 7.303346157073975, | |
| "rewards/rejected": -14.37915325164795, | |
| "sft_loss": 1.0791391134262085, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4846659364731654, | |
| "grad_norm": 8.510727267783134, | |
| "learning_rate": 4.510484492484301e-07, | |
| "logits/chosen": 16.052139282226562, | |
| "logits/rejected": 18.621992111206055, | |
| "logps/chosen": -293.8525695800781, | |
| "logps/rejected": -249.84762573242188, | |
| "loss": 0.502, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.701572895050049, | |
| "rewards/margins": 7.963890552520752, | |
| "rewards/rejected": -15.665464401245117, | |
| "sft_loss": 1.0291332006454468, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4928806133625411, | |
| "grad_norm": 11.486611534499634, | |
| "learning_rate": 4.4900907529225797e-07, | |
| "logits/chosen": 15.679919242858887, | |
| "logits/rejected": 16.096633911132812, | |
| "logps/chosen": -295.52557373046875, | |
| "logps/rejected": -208.35264587402344, | |
| "loss": 0.5684, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.8986592292785645, | |
| "rewards/margins": 6.808130264282227, | |
| "rewards/rejected": -12.70678997039795, | |
| "sft_loss": 0.9438207149505615, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5010952902519168, | |
| "grad_norm": 11.462228668252441, | |
| "learning_rate": 4.46932893357005e-07, | |
| "logits/chosen": 17.438947677612305, | |
| "logits/rejected": 18.582027435302734, | |
| "logps/chosen": -282.1226501464844, | |
| "logps/rejected": -213.70437622070312, | |
| "loss": 0.4316, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -5.921773433685303, | |
| "rewards/margins": 6.760589599609375, | |
| "rewards/rejected": -12.68236255645752, | |
| "sft_loss": 0.9546439051628113, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5093099671412924, | |
| "grad_norm": 25.401018773660823, | |
| "learning_rate": 4.448202874455672e-07, | |
| "logits/chosen": 16.973630905151367, | |
| "logits/rejected": 17.916053771972656, | |
| "logps/chosen": -303.2902526855469, | |
| "logps/rejected": -214.27862548828125, | |
| "loss": 0.5904, | |
| "rewards/accuracies": 0.9200000762939453, | |
| "rewards/chosen": -6.163903713226318, | |
| "rewards/margins": 6.488450050354004, | |
| "rewards/rejected": -12.65235424041748, | |
| "sft_loss": 1.065365195274353, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5175246440306681, | |
| "grad_norm": 9.834689967221987, | |
| "learning_rate": 4.426716482976838e-07, | |
| "logits/chosen": 18.023340225219727, | |
| "logits/rejected": 19.0910587310791, | |
| "logps/chosen": -296.31610107421875, | |
| "logps/rejected": -209.1268768310547, | |
| "loss": 0.5109, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.737242221832275, | |
| "rewards/margins": 6.57784366607666, | |
| "rewards/rejected": -12.315085411071777, | |
| "sft_loss": 0.966189444065094, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5257393209200438, | |
| "grad_norm": 12.349530912080413, | |
| "learning_rate": 4.4048737331766774e-07, | |
| "logits/chosen": 19.084957122802734, | |
| "logits/rejected": 19.039499282836914, | |
| "logps/chosen": -273.5611877441406, | |
| "logps/rejected": -193.39707946777344, | |
| "loss": 0.5342, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.792872428894043, | |
| "rewards/margins": 5.747686386108398, | |
| "rewards/rejected": -11.540557861328125, | |
| "sft_loss": 0.8884872198104858, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5339539978094195, | |
| "grad_norm": 13.644842027024733, | |
| "learning_rate": 4.3826786650090273e-07, | |
| "logits/chosen": 15.30917739868164, | |
| "logits/rejected": 16.686445236206055, | |
| "logps/chosen": -261.4600524902344, | |
| "logps/rejected": -197.12661743164062, | |
| "loss": 0.5439, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -6.051290988922119, | |
| "rewards/margins": 5.911606311798096, | |
| "rewards/rejected": -11.962896347045898, | |
| "sft_loss": 1.0287508964538574, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5421686746987951, | |
| "grad_norm": 14.919840859492023, | |
| "learning_rate": 4.3601353835912235e-07, | |
| "logits/chosen": 17.14605712890625, | |
| "logits/rejected": 18.71445655822754, | |
| "logps/chosen": -240.4210968017578, | |
| "logps/rejected": -191.06373596191406, | |
| "loss": 0.5566, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -6.086501121520996, | |
| "rewards/margins": 5.632846355438232, | |
| "rewards/rejected": -11.719347953796387, | |
| "sft_loss": 0.9403523206710815, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5503833515881709, | |
| "grad_norm": 34.32717833778974, | |
| "learning_rate": 4.337248058444831e-07, | |
| "logits/chosen": 15.827594757080078, | |
| "logits/rejected": 16.74897575378418, | |
| "logps/chosen": -327.0185852050781, | |
| "logps/rejected": -250.9954376220703, | |
| "loss": 0.5323, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.161806106567383, | |
| "rewards/margins": 7.682919979095459, | |
| "rewards/rejected": -15.844725608825684, | |
| "sft_loss": 1.1408532857894897, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5585980284775466, | |
| "grad_norm": 13.092331667096667, | |
| "learning_rate": 4.3140209227244617e-07, | |
| "logits/chosen": 17.278669357299805, | |
| "logits/rejected": 18.425344467163086, | |
| "logps/chosen": -254.86746215820312, | |
| "logps/rejected": -201.89547729492188, | |
| "loss": 0.5321, | |
| "rewards/accuracies": 0.9200000762939453, | |
| "rewards/chosen": -6.691502094268799, | |
| "rewards/margins": 6.277873516082764, | |
| "rewards/rejected": -12.969375610351562, | |
| "sft_loss": 1.0744267702102661, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5668127053669222, | |
| "grad_norm": 12.663182255141733, | |
| "learning_rate": 4.2904582724348316e-07, | |
| "logits/chosen": 16.910207748413086, | |
| "logits/rejected": 17.029691696166992, | |
| "logps/chosen": -287.6109313964844, | |
| "logps/rejected": -202.47837829589844, | |
| "loss": 0.4913, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -5.874098777770996, | |
| "rewards/margins": 6.561362266540527, | |
| "rewards/rejected": -12.43545913696289, | |
| "sft_loss": 1.1810688972473145, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5750273822562979, | |
| "grad_norm": 18.101790487079715, | |
| "learning_rate": 4.266564465636182e-07, | |
| "logits/chosen": 17.891399383544922, | |
| "logits/rejected": 19.3447208404541, | |
| "logps/chosen": -306.7535705566406, | |
| "logps/rejected": -237.83753967285156, | |
| "loss": 0.482, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.050681114196777, | |
| "rewards/margins": 7.2094807624816895, | |
| "rewards/rejected": -14.260162353515625, | |
| "sft_loss": 0.964589536190033, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5832420591456736, | |
| "grad_norm": 9.764394722665243, | |
| "learning_rate": 4.242343921638234e-07, | |
| "logits/chosen": 17.71145248413086, | |
| "logits/rejected": 18.48440170288086, | |
| "logps/chosen": -317.6193542480469, | |
| "logps/rejected": -230.48606872558594, | |
| "loss": 0.45, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -6.803144931793213, | |
| "rewards/margins": 7.825214862823486, | |
| "rewards/rejected": -14.6283597946167, | |
| "sft_loss": 1.0540062189102173, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.5914567360350493, | |
| "grad_norm": 10.821777272670147, | |
| "learning_rate": 4.2178011201828044e-07, | |
| "logits/chosen": 17.3190975189209, | |
| "logits/rejected": 17.47244644165039, | |
| "logps/chosen": -288.40374755859375, | |
| "logps/rejected": -211.689453125, | |
| "loss": 0.5051, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -6.330109596252441, | |
| "rewards/margins": 6.8288254737854, | |
| "rewards/rejected": -13.158934593200684, | |
| "sft_loss": 1.0400768518447876, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5996714129244249, | |
| "grad_norm": 17.857419277834527, | |
| "learning_rate": 4.1929406006152546e-07, | |
| "logits/chosen": 18.516992568969727, | |
| "logits/rejected": 19.116985321044922, | |
| "logps/chosen": -281.31695556640625, | |
| "logps/rejected": -213.61634826660156, | |
| "loss": 0.5566, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -6.528250217437744, | |
| "rewards/margins": 6.995584487915039, | |
| "rewards/rejected": -13.523836135864258, | |
| "sft_loss": 1.0151982307434082, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6078860898138007, | |
| "grad_norm": 14.30267906956284, | |
| "learning_rate": 4.167766961044906e-07, | |
| "logits/chosen": 18.10727882385254, | |
| "logits/rejected": 18.658222198486328, | |
| "logps/chosen": -276.7471923828125, | |
| "logps/rejected": -210.30068969726562, | |
| "loss": 0.4918, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -5.930126667022705, | |
| "rewards/margins": 6.7833099365234375, | |
| "rewards/rejected": -12.7134370803833, | |
| "sft_loss": 0.8878603577613831, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.6161007667031764, | |
| "grad_norm": 13.544123616662414, | |
| "learning_rate": 4.1422848574945923e-07, | |
| "logits/chosen": 18.04473876953125, | |
| "logits/rejected": 18.60536003112793, | |
| "logps/chosen": -297.9788513183594, | |
| "logps/rejected": -217.53721618652344, | |
| "loss": 0.486, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -5.85421085357666, | |
| "rewards/margins": 7.5605010986328125, | |
| "rewards/rejected": -13.414711952209473, | |
| "sft_loss": 1.005669355392456, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.624315443592552, | |
| "grad_norm": 21.50695855504068, | |
| "learning_rate": 4.1164990030394985e-07, | |
| "logits/chosen": 17.071107864379883, | |
| "logits/rejected": 18.0479679107666, | |
| "logps/chosen": -287.5808410644531, | |
| "logps/rejected": -229.66249084472656, | |
| "loss": 0.5873, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -7.32340145111084, | |
| "rewards/margins": 7.0957746505737305, | |
| "rewards/rejected": -14.419175148010254, | |
| "sft_loss": 0.9805389046669006, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6325301204819277, | |
| "grad_norm": 8.192201815991636, | |
| "learning_rate": 4.09041416693545e-07, | |
| "logits/chosen": 17.63469886779785, | |
| "logits/rejected": 18.505117416381836, | |
| "logps/chosen": -279.4613342285156, | |
| "logps/rejected": -218.5486297607422, | |
| "loss": 0.5224, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.260526657104492, | |
| "rewards/margins": 6.764527320861816, | |
| "rewards/rejected": -14.025053024291992, | |
| "sft_loss": 1.06680166721344, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6407447973713034, | |
| "grad_norm": 47.97635903653397, | |
| "learning_rate": 4.064035173736804e-07, | |
| "logits/chosen": 15.768574714660645, | |
| "logits/rejected": 16.24512481689453, | |
| "logps/chosen": -303.8434753417969, | |
| "logps/rejected": -227.7042999267578, | |
| "loss": 0.5142, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -6.973790168762207, | |
| "rewards/margins": 7.414584159851074, | |
| "rewards/rejected": -14.388375282287598, | |
| "sft_loss": 1.1620056629180908, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6489594742606791, | |
| "grad_norm": 22.56750049467428, | |
| "learning_rate": 4.0373669024041225e-07, | |
| "logits/chosen": 17.480152130126953, | |
| "logits/rejected": 19.36970329284668, | |
| "logps/chosen": -268.9180908203125, | |
| "logps/rejected": -223.1499786376953, | |
| "loss": 0.48, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.060788154602051, | |
| "rewards/margins": 7.270442485809326, | |
| "rewards/rejected": -14.331231117248535, | |
| "sft_loss": 1.0084587335586548, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.6571741511500547, | |
| "grad_norm": 14.852542656702267, | |
| "learning_rate": 4.010414285401776e-07, | |
| "logits/chosen": 19.486713409423828, | |
| "logits/rejected": 19.6448917388916, | |
| "logps/chosen": -278.3014831542969, | |
| "logps/rejected": -204.4377899169922, | |
| "loss": 0.4865, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.484006404876709, | |
| "rewards/margins": 6.507460594177246, | |
| "rewards/rejected": -12.991467475891113, | |
| "sft_loss": 1.0000892877578735, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6653888280394304, | |
| "grad_norm": 10.619044380244409, | |
| "learning_rate": 3.9831823077856565e-07, | |
| "logits/chosen": 16.79458236694336, | |
| "logits/rejected": 17.91153907775879, | |
| "logps/chosen": -281.0224304199219, | |
| "logps/rejected": -210.1667022705078, | |
| "loss": 0.5159, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -6.140867710113525, | |
| "rewards/margins": 6.682094097137451, | |
| "rewards/rejected": -12.822961807250977, | |
| "sft_loss": 1.0717185735702515, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.6736035049288062, | |
| "grad_norm": 47.72028643830778, | |
| "learning_rate": 3.95567600628115e-07, | |
| "logits/chosen": 17.3284912109375, | |
| "logits/rejected": 17.72430419921875, | |
| "logps/chosen": -275.4824523925781, | |
| "logps/rejected": -210.34494018554688, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -6.135570049285889, | |
| "rewards/margins": 6.734328746795654, | |
| "rewards/rejected": -12.86989974975586, | |
| "sft_loss": 0.9496582746505737, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6818181818181818, | |
| "grad_norm": 13.659449625348234, | |
| "learning_rate": 3.9279004683515783e-07, | |
| "logits/chosen": 17.051794052124023, | |
| "logits/rejected": 18.201574325561523, | |
| "logps/chosen": -283.5098876953125, | |
| "logps/rejected": -217.13720703125, | |
| "loss": 0.4834, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.5031657218933105, | |
| "rewards/margins": 6.981623649597168, | |
| "rewards/rejected": -13.484789848327637, | |
| "sft_loss": 1.008143663406372, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6900328587075575, | |
| "grad_norm": 10.688936022811054, | |
| "learning_rate": 3.8998608312572234e-07, | |
| "logits/chosen": 18.112707138061523, | |
| "logits/rejected": 18.169342041015625, | |
| "logps/chosen": -316.6014709472656, | |
| "logps/rejected": -224.16824340820312, | |
| "loss": 0.4278, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.8555474281311035, | |
| "rewards/margins": 7.1582255363464355, | |
| "rewards/rejected": -14.013773918151855, | |
| "sft_loss": 0.9130622148513794, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6982475355969332, | |
| "grad_norm": 13.038331606353893, | |
| "learning_rate": 3.8715622811051753e-07, | |
| "logits/chosen": 17.96015739440918, | |
| "logits/rejected": 18.90926742553711, | |
| "logps/chosen": -330.01348876953125, | |
| "logps/rejected": -245.21107482910156, | |
| "loss": 0.4744, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.965524196624756, | |
| "rewards/margins": 7.482056617736816, | |
| "rewards/rejected": -15.44758129119873, | |
| "sft_loss": 0.9873117208480835, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7064622124863089, | |
| "grad_norm": 14.058645020755897, | |
| "learning_rate": 3.843010051890114e-07, | |
| "logits/chosen": 16.319496154785156, | |
| "logits/rejected": 16.970029830932617, | |
| "logps/chosen": -317.0173645019531, | |
| "logps/rejected": -243.7187042236328, | |
| "loss": 0.5166, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -7.986619472503662, | |
| "rewards/margins": 7.993711471557617, | |
| "rewards/rejected": -15.980331420898438, | |
| "sft_loss": 1.082601547241211, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7146768893756845, | |
| "grad_norm": 19.82457118000122, | |
| "learning_rate": 3.8142094245262615e-07, | |
| "logits/chosen": 17.59951400756836, | |
| "logits/rejected": 17.434412002563477, | |
| "logps/chosen": -294.1492919921875, | |
| "logps/rejected": -218.65521240234375, | |
| "loss": 0.5787, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -7.534255504608154, | |
| "rewards/margins": 6.933282852172852, | |
| "rewards/rejected": -14.467540740966797, | |
| "sft_loss": 1.660515308380127, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7228915662650602, | |
| "grad_norm": 11.251537042250078, | |
| "learning_rate": 3.785165725870637e-07, | |
| "logits/chosen": 17.26852798461914, | |
| "logits/rejected": 17.4658203125, | |
| "logps/chosen": -318.1449279785156, | |
| "logps/rejected": -243.87478637695312, | |
| "loss": 0.4501, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.651638507843018, | |
| "rewards/margins": 7.704569339752197, | |
| "rewards/rejected": -15.356207847595215, | |
| "sft_loss": 1.0064337253570557, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.731106243154436, | |
| "grad_norm": 13.388962596712888, | |
| "learning_rate": 3.7558843277378203e-07, | |
| "logits/chosen": 17.070295333862305, | |
| "logits/rejected": 17.869474411010742, | |
| "logps/chosen": -280.3146057128906, | |
| "logps/rejected": -216.09710693359375, | |
| "loss": 0.4821, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -6.6985931396484375, | |
| "rewards/margins": 7.212075233459473, | |
| "rewards/rejected": -13.91066837310791, | |
| "sft_loss": 0.9864783883094788, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7393209200438116, | |
| "grad_norm": 14.813083085351893, | |
| "learning_rate": 3.726370645906407e-07, | |
| "logits/chosen": 16.521230697631836, | |
| "logits/rejected": 17.734365463256836, | |
| "logps/chosen": -294.2370300292969, | |
| "logps/rejected": -221.69178771972656, | |
| "loss": 0.4907, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.402077674865723, | |
| "rewards/margins": 6.9355292320251465, | |
| "rewards/rejected": -14.337605476379395, | |
| "sft_loss": 1.1839743852615356, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7475355969331873, | |
| "grad_norm": 12.059854940698536, | |
| "learning_rate": 3.6966301391173204e-07, | |
| "logits/chosen": 17.135530471801758, | |
| "logits/rejected": 19.162967681884766, | |
| "logps/chosen": -284.18438720703125, | |
| "logps/rejected": -233.11984252929688, | |
| "loss": 0.5102, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -7.429366111755371, | |
| "rewards/margins": 7.891510009765625, | |
| "rewards/rejected": -15.320878028869629, | |
| "sft_loss": 1.079641580581665, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.755750273822563, | |
| "grad_norm": 22.260550575758643, | |
| "learning_rate": 3.6666683080641843e-07, | |
| "logits/chosen": 15.536272048950195, | |
| "logits/rejected": 16.60968780517578, | |
| "logps/chosen": -310.630859375, | |
| "logps/rejected": -241.0422821044922, | |
| "loss": 0.4597, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.208867073059082, | |
| "rewards/margins": 7.248252868652344, | |
| "rewards/rejected": -15.457121849060059, | |
| "sft_loss": 1.0596128702163696, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7639649507119387, | |
| "grad_norm": 12.19610863222244, | |
| "learning_rate": 3.636490694375937e-07, | |
| "logits/chosen": 17.03879165649414, | |
| "logits/rejected": 17.748197555541992, | |
| "logps/chosen": -308.9512023925781, | |
| "logps/rejected": -236.08970642089844, | |
| "loss": 0.4273, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.292715549468994, | |
| "rewards/margins": 8.177362442016602, | |
| "rewards/rejected": -15.470076560974121, | |
| "sft_loss": 1.0068012475967407, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7721796276013143, | |
| "grad_norm": 13.22565269945024, | |
| "learning_rate": 3.6061028795918734e-07, | |
| "logits/chosen": 17.87092399597168, | |
| "logits/rejected": 18.572694778442383, | |
| "logps/chosen": -314.8690490722656, | |
| "logps/rejected": -240.42343139648438, | |
| "loss": 0.5971, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.011045455932617, | |
| "rewards/margins": 7.702009677886963, | |
| "rewards/rejected": -15.713056564331055, | |
| "sft_loss": 1.0346639156341553, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.78039430449069, | |
| "grad_norm": 23.36877627131626, | |
| "learning_rate": 3.5755104841292974e-07, | |
| "logits/chosen": 16.52726936340332, | |
| "logits/rejected": 18.124269485473633, | |
| "logps/chosen": -261.4451599121094, | |
| "logps/rejected": -216.3064727783203, | |
| "loss": 0.5188, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.079422950744629, | |
| "rewards/margins": 6.967349052429199, | |
| "rewards/rejected": -14.046771049499512, | |
| "sft_loss": 1.0945566892623901, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7886089813800657, | |
| "grad_norm": 12.346922738371601, | |
| "learning_rate": 3.544719166243998e-07, | |
| "logits/chosen": 17.161659240722656, | |
| "logits/rejected": 18.612253189086914, | |
| "logps/chosen": -295.6679992675781, | |
| "logps/rejected": -228.33984375, | |
| "loss": 0.4422, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -6.870236873626709, | |
| "rewards/margins": 7.495952129364014, | |
| "rewards/rejected": -14.36618709564209, | |
| "sft_loss": 0.9808112382888794, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.7968236582694413, | |
| "grad_norm": 14.120403338012792, | |
| "learning_rate": 3.513734620983716e-07, | |
| "logits/chosen": 17.235340118408203, | |
| "logits/rejected": 18.787269592285156, | |
| "logps/chosen": -289.2434997558594, | |
| "logps/rejected": -240.0524444580078, | |
| "loss": 0.4205, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.261631965637207, | |
| "rewards/margins": 8.297459602355957, | |
| "rewards/rejected": -15.55909252166748, | |
| "sft_loss": 0.9492250084877014, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8050383351588171, | |
| "grad_norm": 14.978501832234636, | |
| "learning_rate": 3.482562579134809e-07, | |
| "logits/chosen": 15.85843276977539, | |
| "logits/rejected": 17.14594268798828, | |
| "logps/chosen": -256.8265380859375, | |
| "logps/rejected": -214.51412963867188, | |
| "loss": 0.466, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -7.612859725952148, | |
| "rewards/margins": 6.866227626800537, | |
| "rewards/rejected": -14.479085922241211, | |
| "sft_loss": 1.0439454317092896, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8132530120481928, | |
| "grad_norm": 13.645938681155632, | |
| "learning_rate": 3.4512088061623073e-07, | |
| "logits/chosen": 17.91840171813965, | |
| "logits/rejected": 18.105796813964844, | |
| "logps/chosen": -344.9450378417969, | |
| "logps/rejected": -257.0929870605469, | |
| "loss": 0.434, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.392577171325684, | |
| "rewards/margins": 8.40063762664795, | |
| "rewards/rejected": -16.793216705322266, | |
| "sft_loss": 1.052524447441101, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.8214676889375685, | |
| "grad_norm": 11.793731298003246, | |
| "learning_rate": 3.419679101143555e-07, | |
| "logits/chosen": 16.95572280883789, | |
| "logits/rejected": 18.109580993652344, | |
| "logps/chosen": -257.8283996582031, | |
| "logps/rejected": -217.70062255859375, | |
| "loss": 0.4059, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.232075214385986, | |
| "rewards/margins": 7.084912300109863, | |
| "rewards/rejected": -14.316986083984375, | |
| "sft_loss": 1.070483684539795, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8296823658269441, | |
| "grad_norm": 18.160358009772516, | |
| "learning_rate": 3.387979295695632e-07, | |
| "logits/chosen": 17.402151107788086, | |
| "logits/rejected": 17.819072723388672, | |
| "logps/chosen": -284.08599853515625, | |
| "logps/rejected": -228.4375, | |
| "loss": 0.4832, | |
| "rewards/accuracies": 0.9333333969116211, | |
| "rewards/chosen": -7.799540042877197, | |
| "rewards/margins": 7.30112886428833, | |
| "rewards/rejected": -15.100667953491211, | |
| "sft_loss": 1.0201059579849243, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.8378970427163198, | |
| "grad_norm": 24.681797914609845, | |
| "learning_rate": 3.356115252896764e-07, | |
| "logits/chosen": 16.481372833251953, | |
| "logits/rejected": 17.393707275390625, | |
| "logps/chosen": -318.48956298828125, | |
| "logps/rejected": -238.67076110839844, | |
| "loss": 0.4569, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.8467302322387695, | |
| "rewards/margins": 7.6555867195129395, | |
| "rewards/rejected": -15.502315521240234, | |
| "sft_loss": 1.1412904262542725, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.8461117196056955, | |
| "grad_norm": 11.780066809990752, | |
| "learning_rate": 3.3240928662019043e-07, | |
| "logits/chosen": 14.776932716369629, | |
| "logits/rejected": 16.346778869628906, | |
| "logps/chosen": -313.47589111328125, | |
| "logps/rejected": -242.91506958007812, | |
| "loss": 0.4196, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -7.860676288604736, | |
| "rewards/margins": 8.015517234802246, | |
| "rewards/rejected": -15.876194953918457, | |
| "sft_loss": 1.059720516204834, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.8543263964950711, | |
| "grad_norm": 14.114725277489077, | |
| "learning_rate": 3.291918058352706e-07, | |
| "logits/chosen": 16.27129554748535, | |
| "logits/rejected": 17.153289794921875, | |
| "logps/chosen": -306.25506591796875, | |
| "logps/rejected": -249.3704071044922, | |
| "loss": 0.5092, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.551309585571289, | |
| "rewards/margins": 7.208839416503906, | |
| "rewards/rejected": -16.760149002075195, | |
| "sft_loss": 1.1138993501663208, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8625410733844469, | |
| "grad_norm": 27.760604608400726, | |
| "learning_rate": 3.259596780282074e-07, | |
| "logits/chosen": 18.246183395385742, | |
| "logits/rejected": 18.89859390258789, | |
| "logps/chosen": -346.7146301269531, | |
| "logps/rejected": -260.1651916503906, | |
| "loss": 0.4395, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.043220520019531, | |
| "rewards/margins": 8.856348991394043, | |
| "rewards/rejected": -16.899568557739258, | |
| "sft_loss": 1.1765520572662354, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.8707557502738226, | |
| "grad_norm": 15.402410015157315, | |
| "learning_rate": 3.2271350100134975e-07, | |
| "logits/chosen": 17.567943572998047, | |
| "logits/rejected": 17.768869400024414, | |
| "logps/chosen": -298.6788024902344, | |
| "logps/rejected": -236.3932647705078, | |
| "loss": 0.4193, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.671787738800049, | |
| "rewards/margins": 7.834874629974365, | |
| "rewards/rejected": -15.506662368774414, | |
| "sft_loss": 1.071178913116455, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8789704271631983, | |
| "grad_norm": 18.947114003342495, | |
| "learning_rate": 3.1945387515553843e-07, | |
| "logits/chosen": 17.647369384765625, | |
| "logits/rejected": 18.73533821105957, | |
| "logps/chosen": -310.0240478515625, | |
| "logps/rejected": -251.67193603515625, | |
| "loss": 0.441, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.7946672439575195, | |
| "rewards/margins": 9.0897798538208, | |
| "rewards/rejected": -16.88444709777832, | |
| "sft_loss": 1.0311574935913086, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.8871851040525739, | |
| "grad_norm": 11.041389503496823, | |
| "learning_rate": 3.1618140337905764e-07, | |
| "logits/chosen": 17.451311111450195, | |
| "logits/rejected": 18.353700637817383, | |
| "logps/chosen": -297.8014831542969, | |
| "logps/rejected": -240.24606323242188, | |
| "loss": 0.4126, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.106889724731445, | |
| "rewards/margins": 7.919802188873291, | |
| "rewards/rejected": -16.02669334411621, | |
| "sft_loss": 1.1384520530700684, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8953997809419496, | |
| "grad_norm": 9.858801498232785, | |
| "learning_rate": 3.128966909361271e-07, | |
| "logits/chosen": 16.695926666259766, | |
| "logits/rejected": 18.67499351501465, | |
| "logps/chosen": -320.1283874511719, | |
| "logps/rejected": -254.82162475585938, | |
| "loss": 0.3699, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -8.027070999145508, | |
| "rewards/margins": 8.428789138793945, | |
| "rewards/rejected": -16.455860137939453, | |
| "sft_loss": 1.0505129098892212, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.9036144578313253, | |
| "grad_norm": 16.103503361054337, | |
| "learning_rate": 3.096003453549549e-07, | |
| "logits/chosen": 17.31558609008789, | |
| "logits/rejected": 17.725223541259766, | |
| "logps/chosen": -345.3844299316406, | |
| "logps/rejected": -261.2863464355469, | |
| "loss": 0.4497, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.239363670349121, | |
| "rewards/margins": 9.474674224853516, | |
| "rewards/rejected": -17.714040756225586, | |
| "sft_loss": 1.020671010017395, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.911829134720701, | |
| "grad_norm": 12.01821136380653, | |
| "learning_rate": 3.06292976315371e-07, | |
| "logits/chosen": 16.277523040771484, | |
| "logits/rejected": 17.34755516052246, | |
| "logps/chosen": -304.7778625488281, | |
| "logps/rejected": -241.48277282714844, | |
| "loss": 0.4126, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.762810230255127, | |
| "rewards/margins": 8.414238929748535, | |
| "rewards/rejected": -16.17704963684082, | |
| "sft_loss": 1.1222290992736816, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.9200438116100766, | |
| "grad_norm": 11.41112495788056, | |
| "learning_rate": 3.0297519553606324e-07, | |
| "logits/chosen": 17.731529235839844, | |
| "logits/rejected": 18.088359832763672, | |
| "logps/chosen": -305.7876281738281, | |
| "logps/rejected": -246.57879638671875, | |
| "loss": 0.4401, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -7.948428630828857, | |
| "rewards/margins": 8.658875465393066, | |
| "rewards/rejected": -16.60730743408203, | |
| "sft_loss": 1.067797064781189, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9282584884994524, | |
| "grad_norm": 21.985722962679343, | |
| "learning_rate": 2.996476166614363e-07, | |
| "logits/chosen": 15.972024917602539, | |
| "logits/rejected": 16.38096809387207, | |
| "logps/chosen": -330.54388427734375, | |
| "logps/rejected": -267.4414367675781, | |
| "loss": 0.5027, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.933554649353027, | |
| "rewards/margins": 9.095858573913574, | |
| "rewards/rejected": -18.0294132232666, | |
| "sft_loss": 1.1063634157180786, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.9364731653888281, | |
| "grad_norm": 10.308382930028264, | |
| "learning_rate": 2.963108551481142e-07, | |
| "logits/chosen": 17.77937889099121, | |
| "logits/rejected": 18.134130477905273, | |
| "logps/chosen": -339.63079833984375, | |
| "logps/rejected": -260.2466735839844, | |
| "loss": 0.4519, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.120518684387207, | |
| "rewards/margins": 9.045032501220703, | |
| "rewards/rejected": -17.165552139282227, | |
| "sft_loss": 1.072819471359253, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9446878422782037, | |
| "grad_norm": 15.634526225587425, | |
| "learning_rate": 2.929655281511075e-07, | |
| "logits/chosen": 16.544097900390625, | |
| "logits/rejected": 17.375316619873047, | |
| "logps/chosen": -319.2738037109375, | |
| "logps/rejected": -257.0357971191406, | |
| "loss": 0.4126, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.096101760864258, | |
| "rewards/margins": 8.786704063415527, | |
| "rewards/rejected": -16.8828067779541, | |
| "sft_loss": 1.0927081108093262, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.9529025191675794, | |
| "grad_norm": 8.788361215925173, | |
| "learning_rate": 2.896122544096667e-07, | |
| "logits/chosen": 16.77577018737793, | |
| "logits/rejected": 17.813331604003906, | |
| "logps/chosen": -297.43548583984375, | |
| "logps/rejected": -240.00099182128906, | |
| "loss": 0.4592, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -7.802213191986084, | |
| "rewards/margins": 8.326114654541016, | |
| "rewards/rejected": -16.12833023071289, | |
| "sft_loss": 1.088619589805603, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9611171960569551, | |
| "grad_norm": 20.34248392425272, | |
| "learning_rate": 2.8625165413284307e-07, | |
| "logits/chosen": 16.004566192626953, | |
| "logits/rejected": 17.70891761779785, | |
| "logps/chosen": -328.6180725097656, | |
| "logps/rejected": -263.9577941894531, | |
| "loss": 0.5055, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.101932525634766, | |
| "rewards/margins": 9.129469871520996, | |
| "rewards/rejected": -17.23140525817871, | |
| "sft_loss": 1.0326135158538818, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.9693318729463308, | |
| "grad_norm": 13.09030046415886, | |
| "learning_rate": 2.8288434888477626e-07, | |
| "logits/chosen": 18.028348922729492, | |
| "logits/rejected": 17.76748275756836, | |
| "logps/chosen": -287.28692626953125, | |
| "logps/rejected": -231.44729614257812, | |
| "loss": 0.3908, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -7.940645217895508, | |
| "rewards/margins": 7.91243839263916, | |
| "rewards/rejected": -15.853084564208984, | |
| "sft_loss": 1.0779129266738892, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9775465498357064, | |
| "grad_norm": 20.95262748964158, | |
| "learning_rate": 2.795109614697326e-07, | |
| "logits/chosen": 17.00741195678711, | |
| "logits/rejected": 18.209590911865234, | |
| "logps/chosen": -275.52880859375, | |
| "logps/rejected": -232.07052612304688, | |
| "loss": 0.4225, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -7.112081050872803, | |
| "rewards/margins": 8.281967163085938, | |
| "rewards/rejected": -15.394047737121582, | |
| "sft_loss": 1.0076452493667603, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.9857612267250822, | |
| "grad_norm": 13.158949539443237, | |
| "learning_rate": 2.761321158169134e-07, | |
| "logits/chosen": 18.07162094116211, | |
| "logits/rejected": 19.637807846069336, | |
| "logps/chosen": -307.5865478515625, | |
| "logps/rejected": -249.9253387451172, | |
| "loss": 0.4339, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.806564807891846, | |
| "rewards/margins": 8.727023124694824, | |
| "rewards/rejected": -16.53359031677246, | |
| "sft_loss": 1.06932532787323, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9939759036144579, | |
| "grad_norm": 13.610109275739992, | |
| "learning_rate": 2.727484368650553e-07, | |
| "logits/chosen": 15.262972831726074, | |
| "logits/rejected": 16.486412048339844, | |
| "logps/chosen": -305.6347351074219, | |
| "logps/rejected": -252.50546264648438, | |
| "loss": 0.4625, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.769743919372559, | |
| "rewards/margins": 8.321878433227539, | |
| "rewards/rejected": -17.091623306274414, | |
| "sft_loss": 1.1903793811798096, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.0021905805038336, | |
| "grad_norm": 9.988555947945434, | |
| "learning_rate": 2.6936055044684425e-07, | |
| "logits/chosen": 17.130857467651367, | |
| "logits/rejected": 17.868497848510742, | |
| "logps/chosen": -278.2147216796875, | |
| "logps/rejected": -229.0367889404297, | |
| "loss": 0.4205, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.578054428100586, | |
| "rewards/margins": 7.36480188369751, | |
| "rewards/rejected": -15.942855834960938, | |
| "sft_loss": 1.0933631658554077, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0104052573932092, | |
| "grad_norm": 11.824094414048218, | |
| "learning_rate": 2.659690831731631e-07, | |
| "logits/chosen": 17.553348541259766, | |
| "logits/rejected": 18.92648696899414, | |
| "logps/chosen": -317.8105163574219, | |
| "logps/rejected": -263.2023620605469, | |
| "loss": 0.3385, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.87080192565918, | |
| "rewards/margins": 9.268915176391602, | |
| "rewards/rejected": -18.13971710205078, | |
| "sft_loss": 1.0447877645492554, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.0186199342825848, | |
| "grad_norm": 15.737059861781074, | |
| "learning_rate": 2.6257466231719676e-07, | |
| "logits/chosen": 15.165780067443848, | |
| "logits/rejected": 16.453243255615234, | |
| "logps/chosen": -338.23773193359375, | |
| "logps/rejected": -283.7428283691406, | |
| "loss": 0.3123, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.231574058532715, | |
| "rewards/margins": 10.158638954162598, | |
| "rewards/rejected": -19.390214920043945, | |
| "sft_loss": 1.2299811840057373, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.0268346111719606, | |
| "grad_norm": 11.900623330243908, | |
| "learning_rate": 2.591779156984137e-07, | |
| "logits/chosen": 16.764328002929688, | |
| "logits/rejected": 16.837923049926758, | |
| "logps/chosen": -322.6804504394531, | |
| "logps/rejected": -269.0111999511719, | |
| "loss": 0.3671, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -9.284835815429688, | |
| "rewards/margins": 9.551142692565918, | |
| "rewards/rejected": -18.83597755432129, | |
| "sft_loss": 1.0855733156204224, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.0350492880613362, | |
| "grad_norm": 18.88025576733879, | |
| "learning_rate": 2.557794715664465e-07, | |
| "logits/chosen": 15.582106590270996, | |
| "logits/rejected": 16.574077606201172, | |
| "logps/chosen": -330.9181213378906, | |
| "logps/rejected": -281.83709716796875, | |
| "loss": 0.4083, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.849162101745605, | |
| "rewards/margins": 10.020377159118652, | |
| "rewards/rejected": -19.86954116821289, | |
| "sft_loss": 1.11058509349823, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0432639649507118, | |
| "grad_norm": 22.56812145625195, | |
| "learning_rate": 2.5237995848489417e-07, | |
| "logits/chosen": 16.257413864135742, | |
| "logits/rejected": 16.71412467956543, | |
| "logps/chosen": -332.62506103515625, | |
| "logps/rejected": -271.0566101074219, | |
| "loss": 0.4569, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.809398651123047, | |
| "rewards/margins": 10.076090812683105, | |
| "rewards/rejected": -18.88549041748047, | |
| "sft_loss": 1.1897673606872559, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.0514786418400877, | |
| "grad_norm": 10.647617140402389, | |
| "learning_rate": 2.48980005215064e-07, | |
| "logits/chosen": 16.611183166503906, | |
| "logits/rejected": 17.89920425415039, | |
| "logps/chosen": -271.6616516113281, | |
| "logps/rejected": -231.13978576660156, | |
| "loss": 0.4444, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.08376407623291, | |
| "rewards/margins": 8.19190502166748, | |
| "rewards/rejected": -16.27566909790039, | |
| "sft_loss": 1.3704915046691895, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0596933187294633, | |
| "grad_norm": 19.247491047471033, | |
| "learning_rate": 2.45580240599679e-07, | |
| "logits/chosen": 16.49073028564453, | |
| "logits/rejected": 17.990306854248047, | |
| "logps/chosen": -358.3551025390625, | |
| "logps/rejected": -288.8968505859375, | |
| "loss": 0.3691, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.500346183776855, | |
| "rewards/margins": 10.489169120788574, | |
| "rewards/rejected": -18.98951530456543, | |
| "sft_loss": 1.2408881187438965, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.067907995618839, | |
| "grad_norm": 13.44526599292449, | |
| "learning_rate": 2.421812934465696e-07, | |
| "logits/chosen": 17.065837860107422, | |
| "logits/rejected": 17.75263214111328, | |
| "logps/chosen": -308.9762878417969, | |
| "logps/rejected": -256.1690979003906, | |
| "loss": 0.3945, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.130703926086426, | |
| "rewards/margins": 9.368220329284668, | |
| "rewards/rejected": -17.49892234802246, | |
| "sft_loss": 1.1205145120620728, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.0761226725082147, | |
| "grad_norm": 10.753673167776007, | |
| "learning_rate": 2.3878379241237134e-07, | |
| "logits/chosen": 16.457183837890625, | |
| "logits/rejected": 17.42021942138672, | |
| "logps/chosen": -312.5380554199219, | |
| "logps/rejected": -251.23977661132812, | |
| "loss": 0.3696, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.389382362365723, | |
| "rewards/margins": 8.924761772155762, | |
| "rewards/rejected": -17.314144134521484, | |
| "sft_loss": 1.2173506021499634, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.0843373493975903, | |
| "grad_norm": 23.82423804722956, | |
| "learning_rate": 2.3538836588625077e-07, | |
| "logits/chosen": 15.20209789276123, | |
| "logits/rejected": 15.774395942687988, | |
| "logps/chosen": -297.73260498046875, | |
| "logps/rejected": -246.4073944091797, | |
| "loss": 0.4032, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -8.610387802124023, | |
| "rewards/margins": 8.70635986328125, | |
| "rewards/rejected": -17.31674575805664, | |
| "sft_loss": 1.3788336515426636, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0925520262869661, | |
| "grad_norm": 7.166962184073318, | |
| "learning_rate": 2.3199564187368153e-07, | |
| "logits/chosen": 15.194981575012207, | |
| "logits/rejected": 17.136018753051758, | |
| "logps/chosen": -328.6063537597656, | |
| "logps/rejected": -288.6786804199219, | |
| "loss": 0.366, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.775790214538574, | |
| "rewards/margins": 10.185883522033691, | |
| "rewards/rejected": -19.9616756439209, | |
| "sft_loss": 1.1107780933380127, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.1007667031763417, | |
| "grad_norm": 13.216204703949911, | |
| "learning_rate": 2.2860624788029013e-07, | |
| "logits/chosen": 16.70530891418457, | |
| "logits/rejected": 17.76304817199707, | |
| "logps/chosen": -289.44476318359375, | |
| "logps/rejected": -245.6142120361328, | |
| "loss": 0.4321, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -8.357013702392578, | |
| "rewards/margins": 8.433321952819824, | |
| "rewards/rejected": -16.790334701538086, | |
| "sft_loss": 1.1908717155456543, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.1089813800657173, | |
| "grad_norm": 26.032896310058877, | |
| "learning_rate": 2.2522081079579497e-07, | |
| "logits/chosen": 15.079482078552246, | |
| "logits/rejected": 16.43825340270996, | |
| "logps/chosen": -327.8377380371094, | |
| "logps/rejected": -283.44158935546875, | |
| "loss": 0.389, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.565984725952148, | |
| "rewards/margins": 10.288603782653809, | |
| "rewards/rejected": -19.854589462280273, | |
| "sft_loss": 1.4105526208877563, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.1171960569550932, | |
| "grad_norm": 7.35341298145847, | |
| "learning_rate": 2.2183995677805967e-07, | |
| "logits/chosen": 15.347798347473145, | |
| "logits/rejected": 16.887144088745117, | |
| "logps/chosen": -343.8727722167969, | |
| "logps/rejected": -289.7627258300781, | |
| "loss": 0.3343, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.85545539855957, | |
| "rewards/margins": 10.418365478515625, | |
| "rewards/rejected": -20.273822784423828, | |
| "sft_loss": 1.2016042470932007, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.1254107338444688, | |
| "grad_norm": 13.095979555911432, | |
| "learning_rate": 2.1846431113728062e-07, | |
| "logits/chosen": 15.633400917053223, | |
| "logits/rejected": 17.45536994934082, | |
| "logps/chosen": -328.1496887207031, | |
| "logps/rejected": -281.7301025390625, | |
| "loss": 0.3718, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.103165626525879, | |
| "rewards/margins": 10.700647354125977, | |
| "rewards/rejected": -19.80381202697754, | |
| "sft_loss": 1.198488473892212, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.1336254107338444, | |
| "grad_norm": 17.038758672339643, | |
| "learning_rate": 2.1509449822033205e-07, | |
| "logits/chosen": 16.633058547973633, | |
| "logits/rejected": 17.105684280395508, | |
| "logps/chosen": -340.9743957519531, | |
| "logps/rejected": -273.4366455078125, | |
| "loss": 0.3328, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.657751083374023, | |
| "rewards/margins": 9.9077787399292, | |
| "rewards/rejected": -18.565532684326172, | |
| "sft_loss": 1.1622406244277954, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1418400876232202, | |
| "grad_norm": 13.181289081121232, | |
| "learning_rate": 2.1173114129528957e-07, | |
| "logits/chosen": 16.235170364379883, | |
| "logits/rejected": 17.971439361572266, | |
| "logps/chosen": -289.8466491699219, | |
| "logps/rejected": -249.1376495361328, | |
| "loss": 0.3625, | |
| "rewards/accuracies": 0.9333333373069763, | |
| "rewards/chosen": -8.312536239624023, | |
| "rewards/margins": 9.367281913757324, | |
| "rewards/rejected": -17.679819107055664, | |
| "sft_loss": 1.2810382843017578, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.1500547645125958, | |
| "grad_norm": 13.226133090678903, | |
| "learning_rate": 2.0837486243615226e-07, | |
| "logits/chosen": 16.742103576660156, | |
| "logits/rejected": 17.46257781982422, | |
| "logps/chosen": -364.11041259765625, | |
| "logps/rejected": -300.90618896484375, | |
| "loss": 0.3981, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.691442489624023, | |
| "rewards/margins": 11.124072074890137, | |
| "rewards/rejected": -20.81551742553711, | |
| "sft_loss": 1.0426690578460693, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1582694414019716, | |
| "grad_norm": 16.747134822775763, | |
| "learning_rate": 2.0502628240778653e-07, | |
| "logits/chosen": 17.3011474609375, | |
| "logits/rejected": 19.28099822998047, | |
| "logps/chosen": -329.4310607910156, | |
| "logps/rejected": -291.73443603515625, | |
| "loss": 0.3664, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.230022430419922, | |
| "rewards/margins": 11.087077140808105, | |
| "rewards/rejected": -20.31709861755371, | |
| "sft_loss": 1.0452929735183716, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.1664841182913472, | |
| "grad_norm": 11.712195080946406, | |
| "learning_rate": 2.0168602055111173e-07, | |
| "logits/chosen": 16.063915252685547, | |
| "logits/rejected": 17.033220291137695, | |
| "logps/chosen": -324.21099853515625, | |
| "logps/rejected": -281.9880065917969, | |
| "loss": 0.3326, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.243894577026367, | |
| "rewards/margins": 10.908008575439453, | |
| "rewards/rejected": -20.15190315246582, | |
| "sft_loss": 1.1959102153778076, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.1746987951807228, | |
| "grad_norm": 19.52291295321317, | |
| "learning_rate": 1.9835469466854887e-07, | |
| "logits/chosen": 14.572199821472168, | |
| "logits/rejected": 16.15847396850586, | |
| "logps/chosen": -322.0695495605469, | |
| "logps/rejected": -283.8585205078125, | |
| "loss": 0.3275, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.329268455505371, | |
| "rewards/margins": 10.466280937194824, | |
| "rewards/rejected": -19.795551300048828, | |
| "sft_loss": 1.1618155241012573, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.1829134720700987, | |
| "grad_norm": 14.04137372253548, | |
| "learning_rate": 1.9503292090975454e-07, | |
| "logits/chosen": 16.88302993774414, | |
| "logits/rejected": 17.57504653930664, | |
| "logps/chosen": -292.8112487792969, | |
| "logps/rejected": -249.99221801757812, | |
| "loss": 0.3841, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.16586685180664, | |
| "rewards/margins": 9.12321662902832, | |
| "rewards/rejected": -18.28908348083496, | |
| "sft_loss": 1.2042182683944702, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1911281489594743, | |
| "grad_norm": 12.34681171872866, | |
| "learning_rate": 1.917213136576602e-07, | |
| "logits/chosen": 16.656551361083984, | |
| "logits/rejected": 17.51203155517578, | |
| "logps/chosen": -327.6507568359375, | |
| "logps/rejected": -284.38262939453125, | |
| "loss": 0.3207, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.919057846069336, | |
| "rewards/margins": 10.376687049865723, | |
| "rewards/rejected": -20.295743942260742, | |
| "sft_loss": 1.18035089969635, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.1993428258488499, | |
| "grad_norm": 10.050794300712155, | |
| "learning_rate": 1.8842048541483756e-07, | |
| "logits/chosen": 18.090221405029297, | |
| "logits/rejected": 18.187620162963867, | |
| "logps/chosen": -322.1310119628906, | |
| "logps/rejected": -253.3239288330078, | |
| "loss": 0.3945, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.852034568786621, | |
| "rewards/margins": 9.027352333068848, | |
| "rewards/rejected": -17.879384994506836, | |
| "sft_loss": 1.199164628982544, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.2075575027382257, | |
| "grad_norm": 11.698463225887238, | |
| "learning_rate": 1.8513104669021314e-07, | |
| "logits/chosen": 15.768450736999512, | |
| "logits/rejected": 17.4649715423584, | |
| "logps/chosen": -315.5854797363281, | |
| "logps/rejected": -270.3199462890625, | |
| "loss": 0.3727, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.129364967346191, | |
| "rewards/margins": 9.496920585632324, | |
| "rewards/rejected": -18.626283645629883, | |
| "sft_loss": 1.1171187162399292, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.2157721796276013, | |
| "grad_norm": 15.670433550127342, | |
| "learning_rate": 1.8185360588615057e-07, | |
| "logits/chosen": 17.373594284057617, | |
| "logits/rejected": 18.17388916015625, | |
| "logps/chosen": -349.6602478027344, | |
| "logps/rejected": -286.2644958496094, | |
| "loss": 0.3583, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.793112754821777, | |
| "rewards/margins": 10.82769775390625, | |
| "rewards/rejected": -19.620811462402344, | |
| "sft_loss": 1.1327273845672607, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.223986856516977, | |
| "grad_norm": 17.513419996090132, | |
| "learning_rate": 1.7858876918592232e-07, | |
| "logits/chosen": 15.862748146057129, | |
| "logits/rejected": 17.21187400817871, | |
| "logps/chosen": -301.255859375, | |
| "logps/rejected": -256.63555908203125, | |
| "loss": 0.3533, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.769881248474121, | |
| "rewards/margins": 9.70648193359375, | |
| "rewards/rejected": -18.476362228393555, | |
| "sft_loss": 1.1204417943954468, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.2322015334063527, | |
| "grad_norm": 19.125724690968823, | |
| "learning_rate": 1.7533714044159299e-07, | |
| "logits/chosen": 15.58492374420166, | |
| "logits/rejected": 16.52800941467285, | |
| "logps/chosen": -298.8733215332031, | |
| "logps/rejected": -268.4566650390625, | |
| "loss": 0.4265, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.785615921020508, | |
| "rewards/margins": 9.117348670959473, | |
| "rewards/rejected": -18.902963638305664, | |
| "sft_loss": 1.6064594984054565, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.2404162102957283, | |
| "grad_norm": 17.968784609019274, | |
| "learning_rate": 1.7209932106233264e-07, | |
| "logits/chosen": 15.145374298095703, | |
| "logits/rejected": 17.433292388916016, | |
| "logps/chosen": -342.9417724609375, | |
| "logps/rejected": -296.39654541015625, | |
| "loss": 0.3766, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.673720359802246, | |
| "rewards/margins": 10.815841674804688, | |
| "rewards/rejected": -20.489561080932617, | |
| "sft_loss": 1.145885944366455, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.248630887185104, | |
| "grad_norm": 13.684786311914898, | |
| "learning_rate": 1.688759099031824e-07, | |
| "logits/chosen": 15.70371150970459, | |
| "logits/rejected": 16.69938087463379, | |
| "logps/chosen": -361.2178955078125, | |
| "logps/rejected": -309.79150390625, | |
| "loss": 0.3508, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -10.506484031677246, | |
| "rewards/margins": 11.59350872039795, | |
| "rewards/rejected": -22.099994659423828, | |
| "sft_loss": 1.1850322484970093, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.2568455640744798, | |
| "grad_norm": 14.244960468313039, | |
| "learning_rate": 1.656675031542925e-07, | |
| "logits/chosen": 17.195899963378906, | |
| "logits/rejected": 18.426219940185547, | |
| "logps/chosen": -363.3425598144531, | |
| "logps/rejected": -301.96063232421875, | |
| "loss": 0.3397, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.729473114013672, | |
| "rewards/margins": 11.475974082946777, | |
| "rewards/rejected": -21.205448150634766, | |
| "sft_loss": 1.1794105768203735, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.2650602409638554, | |
| "grad_norm": 8.622276211404598, | |
| "learning_rate": 1.6247469423065343e-07, | |
| "logits/chosen": 16.508113861083984, | |
| "logits/rejected": 17.097890853881836, | |
| "logps/chosen": -305.1572570800781, | |
| "logps/rejected": -249.677001953125, | |
| "loss": 0.3759, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.539449691772461, | |
| "rewards/margins": 8.976499557495117, | |
| "rewards/rejected": -17.515949249267578, | |
| "sft_loss": 1.196576714515686, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.273274917853231, | |
| "grad_norm": 12.358403358119775, | |
| "learning_rate": 1.5929807366233977e-07, | |
| "logits/chosen": 16.241657257080078, | |
| "logits/rejected": 17.03815269470215, | |
| "logps/chosen": -369.39556884765625, | |
| "logps/rejected": -303.64337158203125, | |
| "loss": 0.3163, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.143651008605957, | |
| "rewards/margins": 11.741597175598145, | |
| "rewards/rejected": -20.885250091552734, | |
| "sft_loss": 1.1366469860076904, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.2814895947426068, | |
| "grad_norm": 16.14061914284979, | |
| "learning_rate": 1.5613822898528794e-07, | |
| "logits/chosen": 16.795856475830078, | |
| "logits/rejected": 17.53175163269043, | |
| "logps/chosen": -345.46929931640625, | |
| "logps/rejected": -292.4604187011719, | |
| "loss": 0.3369, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.835062026977539, | |
| "rewards/margins": 11.170233726501465, | |
| "rewards/rejected": -21.005298614501953, | |
| "sft_loss": 1.3101640939712524, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2897042716319824, | |
| "grad_norm": 12.538981244658086, | |
| "learning_rate": 1.5299574463262794e-07, | |
| "logits/chosen": 15.523879051208496, | |
| "logits/rejected": 16.796798706054688, | |
| "logps/chosen": -377.0471496582031, | |
| "logps/rejected": -319.5939025878906, | |
| "loss": 0.4028, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -10.438889503479004, | |
| "rewards/margins": 12.12649917602539, | |
| "rewards/rejected": -22.565387725830078, | |
| "sft_loss": 1.1697484254837036, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.297918948521358, | |
| "grad_norm": 13.959611183566771, | |
| "learning_rate": 1.4987120182658877e-07, | |
| "logits/chosen": 15.972567558288574, | |
| "logits/rejected": 18.35633659362793, | |
| "logps/chosen": -330.76104736328125, | |
| "logps/rejected": -282.9498291015625, | |
| "loss": 0.3757, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.672747611999512, | |
| "rewards/margins": 10.330121994018555, | |
| "rewards/rejected": -20.002866744995117, | |
| "sft_loss": 1.1246702671051025, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.3061336254107339, | |
| "grad_norm": 12.65020419545928, | |
| "learning_rate": 1.4676517847099745e-07, | |
| "logits/chosen": 16.62309455871582, | |
| "logits/rejected": 17.682994842529297, | |
| "logps/chosen": -309.1587829589844, | |
| "logps/rejected": -255.12290954589844, | |
| "loss": 0.3603, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.413141250610352, | |
| "rewards/margins": 9.33283805847168, | |
| "rewards/rejected": -17.7459774017334, | |
| "sft_loss": 1.1139575242996216, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.3143483023001095, | |
| "grad_norm": 11.10720994563204, | |
| "learning_rate": 1.4367824904439242e-07, | |
| "logits/chosen": 17.087141036987305, | |
| "logits/rejected": 17.25540542602539, | |
| "logps/chosen": -336.4616394042969, | |
| "logps/rejected": -273.6061096191406, | |
| "loss": 0.371, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.36069393157959, | |
| "rewards/margins": 10.413783073425293, | |
| "rewards/rejected": -18.774477005004883, | |
| "sft_loss": 1.0689451694488525, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.3225629791894853, | |
| "grad_norm": 12.783247596774917, | |
| "learning_rate": 1.4061098449376985e-07, | |
| "logits/chosen": 15.60853099822998, | |
| "logits/rejected": 17.57704734802246, | |
| "logps/chosen": -362.2177734375, | |
| "logps/rejected": -308.759765625, | |
| "loss": 0.3288, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.05817699432373, | |
| "rewards/margins": 11.833967208862305, | |
| "rewards/rejected": -20.89214324951172, | |
| "sft_loss": 1.2039010524749756, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.330777656078861, | |
| "grad_norm": 8.359077848319595, | |
| "learning_rate": 1.375639521289836e-07, | |
| "logits/chosen": 15.683825492858887, | |
| "logits/rejected": 16.602642059326172, | |
| "logps/chosen": -332.6221008300781, | |
| "logps/rejected": -278.2598571777344, | |
| "loss": 0.3387, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.19861888885498, | |
| "rewards/margins": 10.37482738494873, | |
| "rewards/rejected": -19.57344627380371, | |
| "sft_loss": 1.17559015750885, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.3389923329682367, | |
| "grad_norm": 13.496245877040751, | |
| "learning_rate": 1.3453771551781756e-07, | |
| "logits/chosen": 16.44358253479004, | |
| "logits/rejected": 17.437644958496094, | |
| "logps/chosen": -307.6462707519531, | |
| "logps/rejected": -271.81683349609375, | |
| "loss": 0.3318, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.48726749420166, | |
| "rewards/margins": 10.40507698059082, | |
| "rewards/rejected": -18.892345428466797, | |
| "sft_loss": 1.1855844259262085, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.3472070098576123, | |
| "grad_norm": 14.433148985359804, | |
| "learning_rate": 1.3153283438175034e-07, | |
| "logits/chosen": 15.872283935546875, | |
| "logits/rejected": 16.650604248046875, | |
| "logps/chosen": -324.4306945800781, | |
| "logps/rejected": -276.83929443359375, | |
| "loss": 0.3743, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.549914360046387, | |
| "rewards/margins": 10.544774055480957, | |
| "rewards/rejected": -20.094688415527344, | |
| "sft_loss": 1.1671338081359863, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.355421686746988, | |
| "grad_norm": 22.22401225520154, | |
| "learning_rate": 1.2854986449243124e-07, | |
| "logits/chosen": 16.34712028503418, | |
| "logits/rejected": 16.94756317138672, | |
| "logps/chosen": -331.7503662109375, | |
| "logps/rejected": -286.41705322265625, | |
| "loss": 0.3285, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.810836791992188, | |
| "rewards/margins": 10.913623809814453, | |
| "rewards/rejected": -20.724462509155273, | |
| "sft_loss": 1.0781916379928589, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.3636363636363638, | |
| "grad_norm": 11.973621147714551, | |
| "learning_rate": 1.2558935756888675e-07, | |
| "logits/chosen": 15.828746795654297, | |
| "logits/rejected": 16.91975212097168, | |
| "logps/chosen": -322.3880310058594, | |
| "logps/rejected": -279.2362365722656, | |
| "loss": 0.3542, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.630596160888672, | |
| "rewards/margins": 10.590496063232422, | |
| "rewards/rejected": -20.221094131469727, | |
| "sft_loss": 1.1420843601226807, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.3718510405257394, | |
| "grad_norm": 12.897955971332296, | |
| "learning_rate": 1.226518611754767e-07, | |
| "logits/chosen": 17.223234176635742, | |
| "logits/rejected": 18.44441795349121, | |
| "logps/chosen": -314.6831970214844, | |
| "logps/rejected": -273.42083740234375, | |
| "loss": 0.3494, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.791934967041016, | |
| "rewards/margins": 10.404925346374512, | |
| "rewards/rejected": -19.196863174438477, | |
| "sft_loss": 1.11257803440094, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.380065717415115, | |
| "grad_norm": 14.822041663775748, | |
| "learning_rate": 1.1973791862061871e-07, | |
| "logits/chosen": 15.981986045837402, | |
| "logits/rejected": 16.508832931518555, | |
| "logps/chosen": -357.4217529296875, | |
| "logps/rejected": -279.4723815917969, | |
| "loss": 0.4071, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -8.533044815063477, | |
| "rewards/margins": 10.845856666564941, | |
| "rewards/rejected": -19.378902435302734, | |
| "sft_loss": 1.071024775505066, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.3882803943044908, | |
| "grad_norm": 12.166531109745293, | |
| "learning_rate": 1.1684806885630003e-07, | |
| "logits/chosen": 17.19085693359375, | |
| "logits/rejected": 18.22423553466797, | |
| "logps/chosen": -336.6310729980469, | |
| "logps/rejected": -288.2579040527344, | |
| "loss": 0.3543, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.727254867553711, | |
| "rewards/margins": 11.067011833190918, | |
| "rewards/rejected": -19.794267654418945, | |
| "sft_loss": 1.0941708087921143, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.3964950711938664, | |
| "grad_norm": 19.61480809183216, | |
| "learning_rate": 1.1398284637839486e-07, | |
| "logits/chosen": 17.393543243408203, | |
| "logits/rejected": 17.97818946838379, | |
| "logps/chosen": -290.88043212890625, | |
| "logps/rejected": -248.78334045410156, | |
| "loss": 0.3532, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.74341869354248, | |
| "rewards/margins": 8.96402359008789, | |
| "rewards/rejected": -17.707439422607422, | |
| "sft_loss": 1.3463881015777588, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.404709748083242, | |
| "grad_norm": 13.04687226615894, | |
| "learning_rate": 1.1114278112780601e-07, | |
| "logits/chosen": 16.697458267211914, | |
| "logits/rejected": 17.817760467529297, | |
| "logps/chosen": -376.94256591796875, | |
| "logps/rejected": -319.7321472167969, | |
| "loss": 0.308, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -10.05405044555664, | |
| "rewards/margins": 12.681156158447266, | |
| "rewards/rejected": -22.735204696655273, | |
| "sft_loss": 1.1224801540374756, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.4129244249726178, | |
| "grad_norm": 13.443707852848624, | |
| "learning_rate": 1.08328398392449e-07, | |
| "logits/chosen": 17.408639907836914, | |
| "logits/rejected": 17.620332717895508, | |
| "logps/chosen": -365.28131103515625, | |
| "logps/rejected": -308.3528137207031, | |
| "loss": 0.3755, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -10.822293281555176, | |
| "rewards/margins": 11.438949584960938, | |
| "rewards/rejected": -22.261241912841797, | |
| "sft_loss": 1.178871750831604, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.4211391018619934, | |
| "grad_norm": 21.58859732751979, | |
| "learning_rate": 1.0554021871009677e-07, | |
| "logits/chosen": 16.947927474975586, | |
| "logits/rejected": 17.420812606811523, | |
| "logps/chosen": -340.0753479003906, | |
| "logps/rejected": -297.9937438964844, | |
| "loss": 0.3588, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.554227828979492, | |
| "rewards/margins": 12.149679183959961, | |
| "rewards/rejected": -21.70391082763672, | |
| "sft_loss": 1.3246734142303467, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.429353778751369, | |
| "grad_norm": 13.8734601142875, | |
| "learning_rate": 1.0277875777210299e-07, | |
| "logits/chosen": 14.887709617614746, | |
| "logits/rejected": 15.843902587890625, | |
| "logps/chosen": -324.3350830078125, | |
| "logps/rejected": -275.6741943359375, | |
| "loss": 0.3712, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.113089561462402, | |
| "rewards/margins": 10.9337158203125, | |
| "rewards/rejected": -20.046804428100586, | |
| "sft_loss": 1.2739402055740356, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.4375684556407449, | |
| "grad_norm": 13.714339626163223, | |
| "learning_rate": 1.0004452632802158e-07, | |
| "logits/chosen": 17.476552963256836, | |
| "logits/rejected": 17.923315048217773, | |
| "logps/chosen": -338.1813049316406, | |
| "logps/rejected": -277.5501403808594, | |
| "loss": 0.3129, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.576931953430176, | |
| "rewards/margins": 10.827016830444336, | |
| "rewards/rejected": -19.403947830200195, | |
| "sft_loss": 1.1658498048782349, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.4457831325301205, | |
| "grad_norm": 13.805309365020234, | |
| "learning_rate": 9.733803009114044e-08, | |
| "logits/chosen": 16.891300201416016, | |
| "logits/rejected": 17.32049560546875, | |
| "logps/chosen": -322.0257263183594, | |
| "logps/rejected": -274.27691650390625, | |
| "loss": 0.316, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.144042015075684, | |
| "rewards/margins": 10.516057014465332, | |
| "rewards/rejected": -18.660099029541016, | |
| "sft_loss": 1.110759973526001, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.453997809419496, | |
| "grad_norm": 29.77032111690104, | |
| "learning_rate": 9.465976964494682e-08, | |
| "logits/chosen": 16.620283126831055, | |
| "logits/rejected": 17.72939682006836, | |
| "logps/chosen": -300.1767578125, | |
| "logps/rejected": -261.1438903808594, | |
| "loss": 0.361, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.963627815246582, | |
| "rewards/margins": 9.891008377075195, | |
| "rewards/rejected": -18.854639053344727, | |
| "sft_loss": 1.2920080423355103, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.462212486308872, | |
| "grad_norm": 9.782780560332286, | |
| "learning_rate": 9.201024035054053e-08, | |
| "logits/chosen": 17.15985107421875, | |
| "logits/rejected": 17.535512924194336, | |
| "logps/chosen": -286.6101379394531, | |
| "logps/rejected": -247.57127380371094, | |
| "loss": 0.3835, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.331561088562012, | |
| "rewards/margins": 9.115301132202148, | |
| "rewards/rejected": -18.446863174438477, | |
| "sft_loss": 1.3567354679107666, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.4704271631982475, | |
| "grad_norm": 8.768405187815805, | |
| "learning_rate": 8.938993225501495e-08, | |
| "logits/chosen": 17.89764976501465, | |
| "logits/rejected": 18.452497482299805, | |
| "logps/chosen": -351.6549987792969, | |
| "logps/rejected": -302.9189453125, | |
| "loss": 0.3592, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.618634223937988, | |
| "rewards/margins": 11.819962501525879, | |
| "rewards/rejected": -21.4385986328125, | |
| "sft_loss": 1.0768134593963623, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.4786418400876231, | |
| "grad_norm": 21.82788195022886, | |
| "learning_rate": 8.679933000081879e-08, | |
| "logits/chosen": 15.745450019836426, | |
| "logits/rejected": 17.15949249267578, | |
| "logps/chosen": -307.5598449707031, | |
| "logps/rejected": -271.531494140625, | |
| "loss": 0.3801, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.928607940673828, | |
| "rewards/margins": 10.275431632995605, | |
| "rewards/rejected": -19.204038619995117, | |
| "sft_loss": 1.1987248659133911, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.486856516976999, | |
| "grad_norm": 12.077209434939249, | |
| "learning_rate": 8.423891273611855e-08, | |
| "logits/chosen": 16.016569137573242, | |
| "logits/rejected": 16.249284744262695, | |
| "logps/chosen": -311.76934814453125, | |
| "logps/rejected": -261.8121643066406, | |
| "loss": 0.3799, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.412449836730957, | |
| "rewards/margins": 10.322086334228516, | |
| "rewards/rejected": -18.734539031982422, | |
| "sft_loss": 1.2180228233337402, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.4950711938663745, | |
| "grad_norm": 20.15671717033895, | |
| "learning_rate": 8.170915402617739e-08, | |
| "logits/chosen": 15.889266014099121, | |
| "logits/rejected": 17.218164443969727, | |
| "logps/chosen": -335.0419921875, | |
| "logps/rejected": -293.2705078125, | |
| "loss": 0.4051, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.390657424926758, | |
| "rewards/margins": 11.274243354797363, | |
| "rewards/rejected": -20.664899826049805, | |
| "sft_loss": 1.1832726001739502, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.5032858707557502, | |
| "grad_norm": 11.069682914043863, | |
| "learning_rate": 7.921052176576643e-08, | |
| "logits/chosen": 17.052453994750977, | |
| "logits/rejected": 17.67256736755371, | |
| "logps/chosen": -305.6400146484375, | |
| "logps/rejected": -266.4335632324219, | |
| "loss": 0.3165, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.719382286071777, | |
| "rewards/margins": 10.110651016235352, | |
| "rewards/rejected": -18.830034255981445, | |
| "sft_loss": 1.0706188678741455, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.511500547645126, | |
| "grad_norm": 27.258481926608287, | |
| "learning_rate": 7.674347809262377e-08, | |
| "logits/chosen": 16.615238189697266, | |
| "logits/rejected": 17.932260513305664, | |
| "logps/chosen": -288.8174743652344, | |
| "logps/rejected": -250.63177490234375, | |
| "loss": 0.3758, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -7.821852684020996, | |
| "rewards/margins": 9.426899909973145, | |
| "rewards/rejected": -17.24875259399414, | |
| "sft_loss": 1.12588369846344, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.5197152245345018, | |
| "grad_norm": 8.415837096456798, | |
| "learning_rate": 7.430847930198009e-08, | |
| "logits/chosen": 16.921852111816406, | |
| "logits/rejected": 17.39198875427246, | |
| "logps/chosen": -329.8725891113281, | |
| "logps/rejected": -274.1763000488281, | |
| "loss": 0.3708, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.946272850036621, | |
| "rewards/margins": 11.179486274719238, | |
| "rewards/rejected": -19.12575912475586, | |
| "sft_loss": 1.286713719367981, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.5279299014238772, | |
| "grad_norm": 7.8387923698583295, | |
| "learning_rate": 7.190597576216384e-08, | |
| "logits/chosen": 15.69840145111084, | |
| "logits/rejected": 17.983213424682617, | |
| "logps/chosen": -329.1253967285156, | |
| "logps/rejected": -290.71051025390625, | |
| "loss": 0.3144, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.661620140075684, | |
| "rewards/margins": 11.160269737243652, | |
| "rewards/rejected": -19.821889877319336, | |
| "sft_loss": 1.1312789916992188, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.536144578313253, | |
| "grad_norm": 14.005325625629936, | |
| "learning_rate": 6.953641183130224e-08, | |
| "logits/chosen": 16.529827117919922, | |
| "logits/rejected": 16.534809112548828, | |
| "logps/chosen": -333.02813720703125, | |
| "logps/rejected": -275.6182556152344, | |
| "loss": 0.3675, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.715841293334961, | |
| "rewards/margins": 9.975454330444336, | |
| "rewards/rejected": -19.691295623779297, | |
| "sft_loss": 1.2341707944869995, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.5443592552026288, | |
| "grad_norm": 11.238181780972436, | |
| "learning_rate": 6.720022577513507e-08, | |
| "logits/chosen": 15.408208847045898, | |
| "logits/rejected": 16.01373291015625, | |
| "logps/chosen": -350.6366882324219, | |
| "logps/rejected": -291.2669677734375, | |
| "loss": 0.3381, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -10.044300079345703, | |
| "rewards/margins": 10.780247688293457, | |
| "rewards/rejected": -20.82455062866211, | |
| "sft_loss": 1.26254141330719, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.5525739320920042, | |
| "grad_norm": 11.413642178268471, | |
| "learning_rate": 6.489784968595444e-08, | |
| "logits/chosen": 15.467609405517578, | |
| "logits/rejected": 16.952180862426758, | |
| "logps/chosen": -346.5306091308594, | |
| "logps/rejected": -312.6312561035156, | |
| "loss": 0.3402, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.993128776550293, | |
| "rewards/margins": 12.808844566345215, | |
| "rewards/rejected": -22.801973342895508, | |
| "sft_loss": 1.1826088428497314, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.56078860898138, | |
| "grad_norm": 22.79199458890795, | |
| "learning_rate": 6.262970940268652e-08, | |
| "logits/chosen": 16.051044464111328, | |
| "logits/rejected": 17.10271453857422, | |
| "logps/chosen": -313.6996765136719, | |
| "logps/rejected": -278.2881774902344, | |
| "loss": 0.333, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.396943092346191, | |
| "rewards/margins": 10.23829460144043, | |
| "rewards/rejected": -19.635236740112305, | |
| "sft_loss": 1.1279245615005493, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.5690032858707559, | |
| "grad_norm": 11.668850401054987, | |
| "learning_rate": 6.039622443213008e-08, | |
| "logits/chosen": 16.13634490966797, | |
| "logits/rejected": 17.919300079345703, | |
| "logps/chosen": -325.7288513183594, | |
| "logps/rejected": -289.1236267089844, | |
| "loss": 0.3346, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.695369720458984, | |
| "rewards/margins": 11.087947845458984, | |
| "rewards/rejected": -20.78331756591797, | |
| "sft_loss": 1.1951278448104858, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.5772179627601315, | |
| "grad_norm": 13.415709297062323, | |
| "learning_rate": 5.8197807871366e-08, | |
| "logits/chosen": 15.244779586791992, | |
| "logits/rejected": 16.526262283325195, | |
| "logps/chosen": -370.6669616699219, | |
| "logps/rejected": -322.87847900390625, | |
| "loss": 0.3428, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.789223670959473, | |
| "rewards/margins": 12.587509155273438, | |
| "rewards/rejected": -22.376733779907227, | |
| "sft_loss": 2.1045873165130615, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.585432639649507, | |
| "grad_norm": 13.58873079620651, | |
| "learning_rate": 5.6034866331352376e-08, | |
| "logits/chosen": 15.409506797790527, | |
| "logits/rejected": 16.128753662109375, | |
| "logps/chosen": -322.9807434082031, | |
| "logps/rejected": -271.06378173828125, | |
| "loss": 0.347, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.693291664123535, | |
| "rewards/margins": 10.159637451171875, | |
| "rewards/rejected": -19.852930068969727, | |
| "sft_loss": 1.1238617897033691, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.593647316538883, | |
| "grad_norm": 16.504268173121613, | |
| "learning_rate": 5.390779986171934e-08, | |
| "logits/chosen": 15.72015380859375, | |
| "logits/rejected": 17.518657684326172, | |
| "logps/chosen": -337.39349365234375, | |
| "logps/rejected": -302.06109619140625, | |
| "loss": 0.3214, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.937000274658203, | |
| "rewards/margins": 11.78524112701416, | |
| "rewards/rejected": -20.72224235534668, | |
| "sft_loss": 1.129492998123169, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.6018619934282585, | |
| "grad_norm": 14.941336561605484, | |
| "learning_rate": 5.1817001876777314e-08, | |
| "logits/chosen": 15.710195541381836, | |
| "logits/rejected": 16.9680233001709, | |
| "logps/chosen": -324.51251220703125, | |
| "logps/rejected": -286.7372741699219, | |
| "loss": 0.3363, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.67973518371582, | |
| "rewards/margins": 11.069150924682617, | |
| "rewards/rejected": -19.74888801574707, | |
| "sft_loss": 1.168811559677124, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.6100766703176341, | |
| "grad_norm": 11.368129107493246, | |
| "learning_rate": 4.9762859082752464e-08, | |
| "logits/chosen": 17.196496963500977, | |
| "logits/rejected": 18.05078125, | |
| "logps/chosen": -340.8441162109375, | |
| "logps/rejected": -291.5513610839844, | |
| "loss": 0.332, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.99682903289795, | |
| "rewards/margins": 11.247660636901855, | |
| "rewards/rejected": -20.244489669799805, | |
| "sft_loss": 1.040310025215149, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.61829134720701, | |
| "grad_norm": 17.375398637805176, | |
| "learning_rate": 4.774575140626316e-08, | |
| "logits/chosen": 15.612386703491211, | |
| "logits/rejected": 17.049909591674805, | |
| "logps/chosen": -315.4412841796875, | |
| "logps/rejected": -273.022216796875, | |
| "loss": 0.2981, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.78695011138916, | |
| "rewards/margins": 10.45934772491455, | |
| "rewards/rejected": -19.24629783630371, | |
| "sft_loss": 1.122090458869934, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.6265060240963856, | |
| "grad_norm": 18.391059447329464, | |
| "learning_rate": 4.5766051924049975e-08, | |
| "logits/chosen": 19.033084869384766, | |
| "logits/rejected": 19.09506607055664, | |
| "logps/chosen": -344.99224853515625, | |
| "logps/rejected": -281.4374084472656, | |
| "loss": 0.4023, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.408563613891602, | |
| "rewards/margins": 11.344144821166992, | |
| "rewards/rejected": -19.752708435058594, | |
| "sft_loss": 1.2188175916671753, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.6347207009857612, | |
| "grad_norm": 4.7857387318547255, | |
| "learning_rate": 4.3824126793972934e-08, | |
| "logits/chosen": 15.44153118133545, | |
| "logits/rejected": 16.74248504638672, | |
| "logps/chosen": -348.91326904296875, | |
| "logps/rejected": -291.33905029296875, | |
| "loss": 0.3604, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.16443157196045, | |
| "rewards/margins": 12.226564407348633, | |
| "rewards/rejected": -20.390995025634766, | |
| "sft_loss": 1.1215661764144897, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.642935377875137, | |
| "grad_norm": 8.425137005894317, | |
| "learning_rate": 4.192033518728819e-08, | |
| "logits/chosen": 16.596193313598633, | |
| "logits/rejected": 16.706600189208984, | |
| "logps/chosen": -337.87109375, | |
| "logps/rejected": -279.28277587890625, | |
| "loss": 0.3546, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.45007038116455, | |
| "rewards/margins": 11.166516304016113, | |
| "rewards/rejected": -19.616586685180664, | |
| "sft_loss": 1.3097057342529297, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6511500547645126, | |
| "grad_norm": 10.216638281397124, | |
| "learning_rate": 4.0055029222217125e-08, | |
| "logits/chosen": 16.447404861450195, | |
| "logits/rejected": 16.960412979125977, | |
| "logps/chosen": -313.47698974609375, | |
| "logps/rejected": -269.1077880859375, | |
| "loss": 0.3193, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.434925079345703, | |
| "rewards/margins": 10.487255096435547, | |
| "rewards/rejected": -19.92218017578125, | |
| "sft_loss": 1.099938988685608, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.6593647316538882, | |
| "grad_norm": 10.435302161258754, | |
| "learning_rate": 3.8228553898819904e-08, | |
| "logits/chosen": 17.95560073852539, | |
| "logits/rejected": 19.009355545043945, | |
| "logps/chosen": -340.97222900390625, | |
| "logps/rejected": -298.7887268066406, | |
| "loss": 0.3949, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.733698844909668, | |
| "rewards/margins": 11.381470680236816, | |
| "rewards/rejected": -21.115171432495117, | |
| "sft_loss": 1.1103211641311646, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.667579408543264, | |
| "grad_norm": 11.907900026431262, | |
| "learning_rate": 3.6441247035185416e-08, | |
| "logits/chosen": 16.81635284423828, | |
| "logits/rejected": 17.959022521972656, | |
| "logps/chosen": -361.63812255859375, | |
| "logps/rejected": -303.6453552246094, | |
| "loss": 0.3353, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.310194969177246, | |
| "rewards/margins": 11.503978729248047, | |
| "rewards/rejected": -20.814172744750977, | |
| "sft_loss": 1.1227651834487915, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.6757940854326396, | |
| "grad_norm": 10.027831680647658, | |
| "learning_rate": 3.4693439204949855e-08, | |
| "logits/chosen": 15.768338203430176, | |
| "logits/rejected": 17.33998680114746, | |
| "logps/chosen": -292.4506530761719, | |
| "logps/rejected": -263.465087890625, | |
| "loss": 0.3701, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.007841110229492, | |
| "rewards/margins": 10.145407676696777, | |
| "rewards/rejected": -19.153249740600586, | |
| "sft_loss": 1.1951355934143066, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.6840087623220152, | |
| "grad_norm": 19.083513373797096, | |
| "learning_rate": 3.298545367615493e-08, | |
| "logits/chosen": 17.174707412719727, | |
| "logits/rejected": 17.86057472229004, | |
| "logps/chosen": -288.18280029296875, | |
| "logps/rejected": -254.59439086914062, | |
| "loss": 0.4406, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -8.990920066833496, | |
| "rewards/margins": 9.506522178649902, | |
| "rewards/rejected": -18.4974422454834, | |
| "sft_loss": 1.2072545289993286, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.692223439211391, | |
| "grad_norm": 12.02229671131509, | |
| "learning_rate": 3.13176063514575e-08, | |
| "logits/chosen": 17.051944732666016, | |
| "logits/rejected": 17.904996871948242, | |
| "logps/chosen": -359.4859619140625, | |
| "logps/rejected": -295.76361083984375, | |
| "loss": 0.3592, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.758131980895996, | |
| "rewards/margins": 11.989044189453125, | |
| "rewards/rejected": -20.747175216674805, | |
| "sft_loss": 1.2417008876800537, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.7004381161007667, | |
| "grad_norm": 14.595666831687033, | |
| "learning_rate": 2.96902057097011e-08, | |
| "logits/chosen": 16.427305221557617, | |
| "logits/rejected": 17.641498565673828, | |
| "logps/chosen": -320.2253723144531, | |
| "logps/rejected": -269.6889953613281, | |
| "loss": 0.3571, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -8.658390998840332, | |
| "rewards/margins": 10.258994102478027, | |
| "rewards/rejected": -18.91738510131836, | |
| "sft_loss": 1.332204818725586, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.7086527929901423, | |
| "grad_norm": 13.068829943035729, | |
| "learning_rate": 2.8103552748861475e-08, | |
| "logits/chosen": 15.954511642456055, | |
| "logits/rejected": 16.74055290222168, | |
| "logps/chosen": -331.81707763671875, | |
| "logps/rejected": -280.3811950683594, | |
| "loss": 0.335, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.056927680969238, | |
| "rewards/margins": 10.4561767578125, | |
| "rewards/rejected": -19.51310157775879, | |
| "sft_loss": 1.1305441856384277, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.716867469879518, | |
| "grad_norm": 14.364271003384296, | |
| "learning_rate": 2.65579409303745e-08, | |
| "logits/chosen": 17.06740951538086, | |
| "logits/rejected": 17.10344886779785, | |
| "logps/chosen": -364.3813171386719, | |
| "logps/rejected": -293.8392333984375, | |
| "loss": 0.3632, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.905304908752441, | |
| "rewards/margins": 11.902792930603027, | |
| "rewards/rejected": -20.808101654052734, | |
| "sft_loss": 1.149087905883789, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.7250821467688937, | |
| "grad_norm": 21.44861485257077, | |
| "learning_rate": 2.505365612485874e-08, | |
| "logits/chosen": 14.690909385681152, | |
| "logits/rejected": 15.39016056060791, | |
| "logps/chosen": -310.1071472167969, | |
| "logps/rejected": -257.1431884765625, | |
| "loss": 0.3935, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -9.03943157196045, | |
| "rewards/margins": 9.36133098602295, | |
| "rewards/rejected": -18.4007625579834, | |
| "sft_loss": 1.492135763168335, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.7332968236582693, | |
| "grad_norm": 10.165639822250112, | |
| "learning_rate": 2.3590976559242275e-08, | |
| "logits/chosen": 16.5327091217041, | |
| "logits/rejected": 17.50569725036621, | |
| "logps/chosen": -327.5498962402344, | |
| "logps/rejected": -288.2828674316406, | |
| "loss": 0.3287, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.587510108947754, | |
| "rewards/margins": 10.448949813842773, | |
| "rewards/rejected": -20.036460876464844, | |
| "sft_loss": 1.2338570356369019, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.7415115005476451, | |
| "grad_norm": 9.87040734328389, | |
| "learning_rate": 2.21701727653025e-08, | |
| "logits/chosen": 15.633200645446777, | |
| "logits/rejected": 16.086591720581055, | |
| "logps/chosen": -352.7239990234375, | |
| "logps/rejected": -294.7661437988281, | |
| "loss": 0.3506, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.915904998779297, | |
| "rewards/margins": 11.233012199401855, | |
| "rewards/rejected": -21.14891815185547, | |
| "sft_loss": 1.2049648761749268, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.749726177437021, | |
| "grad_norm": 12.834737803326664, | |
| "learning_rate": 2.0791507529629522e-08, | |
| "logits/chosen": 16.351898193359375, | |
| "logits/rejected": 17.47950553894043, | |
| "logps/chosen": -281.7489318847656, | |
| "logps/rejected": -243.97483825683594, | |
| "loss": 0.3882, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.137645721435547, | |
| "rewards/margins": 8.15616226196289, | |
| "rewards/rejected": -17.29380989074707, | |
| "sft_loss": 1.157172679901123, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.7579408543263964, | |
| "grad_norm": 17.205116768747743, | |
| "learning_rate": 1.945523584502262e-08, | |
| "logits/chosen": 17.508634567260742, | |
| "logits/rejected": 17.94008445739746, | |
| "logps/chosen": -381.6427917480469, | |
| "logps/rejected": -311.2584228515625, | |
| "loss": 0.277, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.13695240020752, | |
| "rewards/margins": 12.883686065673828, | |
| "rewards/rejected": -22.02063751220703, | |
| "sft_loss": 1.055487036705017, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.7661555312157722, | |
| "grad_norm": 20.851515512896743, | |
| "learning_rate": 1.8161604863327072e-08, | |
| "logits/chosen": 15.488776206970215, | |
| "logits/rejected": 16.223703384399414, | |
| "logps/chosen": -325.0180358886719, | |
| "logps/rejected": -262.5523376464844, | |
| "loss": 0.3441, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.707998275756836, | |
| "rewards/margins": 9.961923599243164, | |
| "rewards/rejected": -18.669921875, | |
| "sft_loss": 1.1589832305908203, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.774370208105148, | |
| "grad_norm": 17.972861201786518, | |
| "learning_rate": 1.691085384972235e-08, | |
| "logits/chosen": 14.909817695617676, | |
| "logits/rejected": 15.637177467346191, | |
| "logps/chosen": -278.62322998046875, | |
| "logps/rejected": -248.10516357421875, | |
| "loss": 0.3273, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.496481895446777, | |
| "rewards/margins": 9.37110424041748, | |
| "rewards/rejected": -17.867582321166992, | |
| "sft_loss": 1.2477223873138428, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.7825848849945234, | |
| "grad_norm": 14.287110123465489, | |
| "learning_rate": 1.570321413846845e-08, | |
| "logits/chosen": 15.394953727722168, | |
| "logits/rejected": 17.261220932006836, | |
| "logps/chosen": -303.1915588378906, | |
| "logps/rejected": -277.51458740234375, | |
| "loss": 0.2832, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -8.813228607177734, | |
| "rewards/margins": 10.778543472290039, | |
| "rewards/rejected": -19.59177017211914, | |
| "sft_loss": 1.2371479272842407, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.7907995618838992, | |
| "grad_norm": 25.07441398024989, | |
| "learning_rate": 1.4538909090118846e-08, | |
| "logits/chosen": 16.854040145874023, | |
| "logits/rejected": 16.584880828857422, | |
| "logps/chosen": -322.2169494628906, | |
| "logps/rejected": -270.48895263671875, | |
| "loss": 0.3503, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.387935638427734, | |
| "rewards/margins": 10.15947437286377, | |
| "rewards/rejected": -19.547407150268555, | |
| "sft_loss": 1.2250884771347046, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.799014238773275, | |
| "grad_norm": 12.397083886048673, | |
| "learning_rate": 1.3418154050208936e-08, | |
| "logits/chosen": 15.345029830932617, | |
| "logits/rejected": 16.834665298461914, | |
| "logps/chosen": -297.9521484375, | |
| "logps/rejected": -269.69659423828125, | |
| "loss": 0.3526, | |
| "rewards/accuracies": 0.9066667556762695, | |
| "rewards/chosen": -8.90621280670166, | |
| "rewards/margins": 10.480603218078613, | |
| "rewards/rejected": -19.386816024780273, | |
| "sft_loss": 1.1300204992294312, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.8072289156626506, | |
| "grad_norm": 11.032455088728524, | |
| "learning_rate": 1.2341156309426447e-08, | |
| "logits/chosen": 14.950087547302246, | |
| "logits/rejected": 16.54684829711914, | |
| "logps/chosen": -332.92596435546875, | |
| "logps/rejected": -291.2406005859375, | |
| "loss": 0.289, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.32970905303955, | |
| "rewards/margins": 11.054911613464355, | |
| "rewards/rejected": -20.384618759155273, | |
| "sft_loss": 1.0616583824157715, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.8154435925520263, | |
| "grad_norm": 12.222936203639813, | |
| "learning_rate": 1.130811506527149e-08, | |
| "logits/chosen": 16.257431030273438, | |
| "logits/rejected": 17.80784034729004, | |
| "logps/chosen": -374.90716552734375, | |
| "logps/rejected": -309.1212158203125, | |
| "loss": 0.2761, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.983473777770996, | |
| "rewards/margins": 11.696609497070312, | |
| "rewards/rejected": -21.680082321166992, | |
| "sft_loss": 1.1142687797546387, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.823658269441402, | |
| "grad_norm": 8.603199609340459, | |
| "learning_rate": 1.0319221385213934e-08, | |
| "logits/chosen": 15.376051902770996, | |
| "logits/rejected": 16.714609146118164, | |
| "logps/chosen": -314.27996826171875, | |
| "logps/rejected": -280.79901123046875, | |
| "loss": 0.3201, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.61314868927002, | |
| "rewards/margins": 9.986039161682129, | |
| "rewards/rejected": -19.59918785095215, | |
| "sft_loss": 1.2583483457565308, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.8318729463307777, | |
| "grad_norm": 19.393082549696974, | |
| "learning_rate": 9.374658171354411e-09, | |
| "logits/chosen": 16.10991859436035, | |
| "logits/rejected": 17.19182586669922, | |
| "logps/chosen": -335.8138122558594, | |
| "logps/rejected": -285.86859130859375, | |
| "loss": 0.3573, | |
| "rewards/accuracies": 0.9466666579246521, | |
| "rewards/chosen": -9.598699569702148, | |
| "rewards/margins": 11.110600471496582, | |
| "rewards/rejected": -20.709299087524414, | |
| "sft_loss": 1.2626595497131348, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.8400876232201533, | |
| "grad_norm": 11.87556668069316, | |
| "learning_rate": 8.474600126594983e-09, | |
| "logits/chosen": 16.182172775268555, | |
| "logits/rejected": 17.73249053955078, | |
| "logps/chosen": -327.0877685546875, | |
| "logps/rejected": -281.38848876953125, | |
| "loss": 0.3247, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.166089057922363, | |
| "rewards/margins": 11.149109840393066, | |
| "rewards/rejected": -20.315196990966797, | |
| "sft_loss": 1.3075504302978516, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.8483023001095291, | |
| "grad_norm": 14.892979384936938, | |
| "learning_rate": 7.619213722327184e-09, | |
| "logits/chosen": 16.07329750061035, | |
| "logits/rejected": 16.353158950805664, | |
| "logps/chosen": -328.3527526855469, | |
| "logps/rejected": -281.48565673828125, | |
| "loss": 0.3187, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.420902252197266, | |
| "rewards/margins": 10.730939865112305, | |
| "rewards/rejected": -20.151844024658203, | |
| "sft_loss": 1.2091686725616455, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.8565169769989047, | |
| "grad_norm": 12.380319924456133, | |
| "learning_rate": 6.808657167641896e-09, | |
| "logits/chosen": 15.801959037780762, | |
| "logits/rejected": 16.7104434967041, | |
| "logps/chosen": -357.0127258300781, | |
| "logps/rejected": -303.44989013671875, | |
| "loss": 0.3863, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.63782024383545, | |
| "rewards/margins": 12.076054573059082, | |
| "rewards/rejected": -21.71387481689453, | |
| "sft_loss": 1.1681187152862549, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.8647316538882803, | |
| "grad_norm": 15.785691804360567, | |
| "learning_rate": 6.043080380067539e-09, | |
| "logits/chosen": 15.678844451904297, | |
| "logits/rejected": 16.41909408569336, | |
| "logps/chosen": -383.7453918457031, | |
| "logps/rejected": -308.8125915527344, | |
| "loss": 0.3156, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.041726112365723, | |
| "rewards/margins": 12.560318946838379, | |
| "rewards/rejected": -21.602046966552734, | |
| "sft_loss": 1.186676263809204, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.8729463307776562, | |
| "grad_norm": 19.758267623181617, | |
| "learning_rate": 5.322624957841998e-09, | |
| "logits/chosen": 16.686138153076172, | |
| "logits/rejected": 17.78066062927246, | |
| "logps/chosen": -342.8313293457031, | |
| "logps/rejected": -297.6686096191406, | |
| "loss": 0.38, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.914877891540527, | |
| "rewards/margins": 11.297541618347168, | |
| "rewards/rejected": -21.21242332458496, | |
| "sft_loss": 1.1149108409881592, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.8811610076670318, | |
| "grad_norm": 14.356418798551582, | |
| "learning_rate": 4.647424153723101e-09, | |
| "logits/chosen": 16.441852569580078, | |
| "logits/rejected": 16.586217880249023, | |
| "logps/chosen": -318.8826599121094, | |
| "logps/rejected": -271.4314880371094, | |
| "loss": 0.367, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.366458892822266, | |
| "rewards/margins": 10.299457550048828, | |
| "rewards/rejected": -19.665918350219727, | |
| "sft_loss": 1.2187005281448364, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.8893756845564074, | |
| "grad_norm": 18.826459574147577, | |
| "learning_rate": 4.0176028503425826e-09, | |
| "logits/chosen": 15.749044418334961, | |
| "logits/rejected": 16.83735466003418, | |
| "logps/chosen": -308.5406188964844, | |
| "logps/rejected": -271.7100830078125, | |
| "loss": 0.3801, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.177282333374023, | |
| "rewards/margins": 10.317461967468262, | |
| "rewards/rejected": -19.4947452545166, | |
| "sft_loss": 1.252463698387146, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.8975903614457832, | |
| "grad_norm": 16.226959543929514, | |
| "learning_rate": 3.433277537108481e-09, | |
| "logits/chosen": 15.832767486572266, | |
| "logits/rejected": 17.746004104614258, | |
| "logps/chosen": -343.33447265625, | |
| "logps/rejected": -305.2869873046875, | |
| "loss": 0.335, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.902791976928711, | |
| "rewards/margins": 11.3204345703125, | |
| "rewards/rejected": -21.223228454589844, | |
| "sft_loss": 1.2560192346572876, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.9058050383351588, | |
| "grad_norm": 14.860604119401957, | |
| "learning_rate": 2.8945562886593944e-09, | |
| "logits/chosen": 14.95615005493164, | |
| "logits/rejected": 16.35462760925293, | |
| "logps/chosen": -287.0328369140625, | |
| "logps/rejected": -257.26080322265625, | |
| "loss": 0.3677, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.02625560760498, | |
| "rewards/margins": 9.515448570251465, | |
| "rewards/rejected": -18.541706085205078, | |
| "sft_loss": 1.1147348880767822, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.9140197152245344, | |
| "grad_norm": 12.84326688048793, | |
| "learning_rate": 2.4015387448756976e-09, | |
| "logits/chosen": 15.258326530456543, | |
| "logits/rejected": 16.413604736328125, | |
| "logps/chosen": -337.6728820800781, | |
| "logps/rejected": -276.5948181152344, | |
| "loss": 0.333, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -8.658801078796387, | |
| "rewards/margins": 11.077197074890137, | |
| "rewards/rejected": -19.736000061035156, | |
| "sft_loss": 1.313868761062622, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.9222343921139102, | |
| "grad_norm": 9.16948186267524, | |
| "learning_rate": 1.954316092450281e-09, | |
| "logits/chosen": 16.7126522064209, | |
| "logits/rejected": 16.963319778442383, | |
| "logps/chosen": -349.0697326660156, | |
| "logps/rejected": -294.1761169433594, | |
| "loss": 0.299, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.689640998840332, | |
| "rewards/margins": 11.0868558883667, | |
| "rewards/rejected": -20.77649688720703, | |
| "sft_loss": 1.2454497814178467, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.9304490690032858, | |
| "grad_norm": 19.811301652971856, | |
| "learning_rate": 1.5529710480231272e-09, | |
| "logits/chosen": 17.24116325378418, | |
| "logits/rejected": 16.968626022338867, | |
| "logps/chosen": -310.8689270019531, | |
| "logps/rejected": -274.0096740722656, | |
| "loss": 0.3, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.42411994934082, | |
| "rewards/margins": 10.301589012145996, | |
| "rewards/rejected": -19.725709915161133, | |
| "sft_loss": 1.0850669145584106, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.9386637458926614, | |
| "grad_norm": 12.455631194759059, | |
| "learning_rate": 1.1975778428823524e-09, | |
| "logits/chosen": 15.130066871643066, | |
| "logits/rejected": 16.740190505981445, | |
| "logps/chosen": -351.4178466796875, | |
| "logps/rejected": -299.97235107421875, | |
| "loss": 0.3093, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.884756088256836, | |
| "rewards/margins": 11.209836959838867, | |
| "rewards/rejected": -21.094594955444336, | |
| "sft_loss": 1.0997297763824463, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.9468784227820373, | |
| "grad_norm": 12.90904121827512, | |
| "learning_rate": 8.882022092346064e-10, | |
| "logits/chosen": 16.643354415893555, | |
| "logits/rejected": 16.99618148803711, | |
| "logps/chosen": -355.08087158203125, | |
| "logps/rejected": -291.8462219238281, | |
| "loss": 0.3245, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.204696655273438, | |
| "rewards/margins": 11.567381858825684, | |
| "rewards/rejected": -20.77208137512207, | |
| "sft_loss": 1.2387458086013794, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.9550930996714129, | |
| "grad_norm": 10.02772673186922, | |
| "learning_rate": 6.249013680474368e-10, | |
| "logits/chosen": 16.724010467529297, | |
| "logits/rejected": 16.2373104095459, | |
| "logps/chosen": -319.0643310546875, | |
| "logps/rejected": -268.2914123535156, | |
| "loss": 0.3367, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.252176284790039, | |
| "rewards/margins": 9.910921096801758, | |
| "rewards/rejected": -19.163097381591797, | |
| "sft_loss": 1.1700729131698608, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.9633077765607885, | |
| "grad_norm": 16.734218614778506, | |
| "learning_rate": 4.0772401846608794e-10, | |
| "logits/chosen": 17.680179595947266, | |
| "logits/rejected": 17.80653190612793, | |
| "logps/chosen": -305.4862060546875, | |
| "logps/rejected": -267.6892395019531, | |
| "loss": 0.4133, | |
| "rewards/accuracies": 0.9466667175292969, | |
| "rewards/chosen": -9.576127052307129, | |
| "rewards/margins": 9.75358772277832, | |
| "rewards/rejected": -19.329715728759766, | |
| "sft_loss": 1.1736282110214233, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.9715224534501643, | |
| "grad_norm": 19.267511912968715, | |
| "learning_rate": 2.367103288061223e-10, | |
| "logits/chosen": 16.904399871826172, | |
| "logits/rejected": 16.482337951660156, | |
| "logps/chosen": -316.0256652832031, | |
| "logps/rejected": -265.80157470703125, | |
| "loss": 0.3574, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.74451732635498, | |
| "rewards/margins": 9.55905532836914, | |
| "rewards/rejected": -19.303569793701172, | |
| "sft_loss": 1.2237191200256348, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.9797371303395401, | |
| "grad_norm": 11.51131736701793, | |
| "learning_rate": 1.1189192912416933e-10, | |
| "logits/chosen": 15.607586860656738, | |
| "logits/rejected": 16.690214157104492, | |
| "logps/chosen": -370.86328125, | |
| "logps/rejected": -313.1533508300781, | |
| "loss": 0.2989, | |
| "rewards/accuracies": 0.9866666793823242, | |
| "rewards/chosen": -9.272278785705566, | |
| "rewards/margins": 12.769195556640625, | |
| "rewards/rejected": -22.041475296020508, | |
| "sft_loss": 1.1835730075836182, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.9879518072289155, | |
| "grad_norm": 11.903861482033115, | |
| "learning_rate": 3.329190536757731e-11, | |
| "logits/chosen": 17.456689834594727, | |
| "logits/rejected": 18.812978744506836, | |
| "logps/chosen": -314.75823974609375, | |
| "logps/rejected": -271.9325256347656, | |
| "loss": 0.3344, | |
| "rewards/accuracies": 0.9733333587646484, | |
| "rewards/chosen": -9.277753829956055, | |
| "rewards/margins": 10.492895126342773, | |
| "rewards/rejected": -19.770648956298828, | |
| "sft_loss": 1.1376186609268188, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.9961664841182913, | |
| "grad_norm": 14.744908012876596, | |
| "learning_rate": 9.247951046897906e-13, | |
| "logits/chosen": 16.54582977294922, | |
| "logits/rejected": 18.33929443359375, | |
| "logps/chosen": -319.89813232421875, | |
| "logps/rejected": -279.7975769042969, | |
| "loss": 0.352, | |
| "rewards/accuracies": 0.9600000381469727, | |
| "rewards/chosen": -9.205850601196289, | |
| "rewards/margins": 10.397418975830078, | |
| "rewards/rejected": -19.603271484375, | |
| "sft_loss": 1.1400221586227417, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 1.9978094194961664, | |
| "step": 1216, | |
| "total_flos": 200111899688960.0, | |
| "train_loss": 0.4716386401069988, | |
| "train_runtime": 41653.1021, | |
| "train_samples_per_second": 1.753, | |
| "train_steps_per_second": 0.029 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1216, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 2, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 200111899688960.0, | |
| "train_batch_size": 5, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |