Invalid JSON: Unexpected token 'N', ..."/chosen": NaN,
"... is not valid JSON
| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.977142857142857, | |
| "eval_steps": 500, | |
| "global_step": 348, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.045714285714285714, | |
| "grad_norm": 9.729541778564453, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.5949249267578, | |
| "logps/rejected": -178.94993591308594, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.15625, | |
| "rewards/chosen": 0.00019290449563413858, | |
| "rewards/margins": -6.33835734333843e-05, | |
| "rewards/rejected": 0.0002562880690675229, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 9.54272174835205, | |
| "learning_rate": 4.5714285714285716e-05, | |
| "logits/chosen": -0.7379971146583557, | |
| "logits/rejected": -0.7608282566070557, | |
| "logps/chosen": -318.36737060546875, | |
| "logps/rejected": -172.83837890625, | |
| "loss": 0.6918, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 0.003367400262504816, | |
| "rewards/margins": 0.0027868689503520727, | |
| "rewards/rejected": 0.0005805314285680652, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.13714285714285715, | |
| "grad_norm": 9.01282024383545, | |
| "learning_rate": 6.857142857142858e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -212.95884704589844, | |
| "logps/rejected": -170.42819213867188, | |
| "loss": 0.6875, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.012343340553343296, | |
| "rewards/margins": 0.011433703824877739, | |
| "rewards/rejected": 0.0009096383582800627, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 6.08139181137085, | |
| "learning_rate": 9.142857142857143e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -237.0392608642578, | |
| "logps/rejected": -140.843994140625, | |
| "loss": 0.6786, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.019704176113009453, | |
| "rewards/margins": 0.02994394302368164, | |
| "rewards/rejected": -0.010239768773317337, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 12.110494613647461, | |
| "learning_rate": 0.00011428571428571428, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -238.46463012695312, | |
| "logps/rejected": -191.14645385742188, | |
| "loss": 0.661, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.06102071329951286, | |
| "rewards/margins": 0.06703755259513855, | |
| "rewards/rejected": -0.00601684395223856, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.2742857142857143, | |
| "grad_norm": 15.145161628723145, | |
| "learning_rate": 0.00013714285714285716, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -263.39276123046875, | |
| "logps/rejected": -203.8231658935547, | |
| "loss": 0.639, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.1136322170495987, | |
| "rewards/margins": 0.11692351847887039, | |
| "rewards/rejected": -0.0032912972383201122, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 15.971263885498047, | |
| "learning_rate": 0.00016, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -315.7078857421875, | |
| "logps/rejected": -146.5008544921875, | |
| "loss": 0.6162, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.19662690162658691, | |
| "rewards/margins": 0.1738075613975525, | |
| "rewards/rejected": 0.022819330915808678, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.3657142857142857, | |
| "grad_norm": 20.183231353759766, | |
| "learning_rate": 0.00018285714285714286, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -294.2346496582031, | |
| "logps/rejected": -153.24160766601562, | |
| "loss": 0.5742, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.2808985710144043, | |
| "rewards/margins": 0.28646522760391235, | |
| "rewards/rejected": -0.005566636100411415, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.4114285714285714, | |
| "grad_norm": 21.948104858398438, | |
| "learning_rate": 0.00019936102236421725, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -258.82904052734375, | |
| "logps/rejected": -166.54611206054688, | |
| "loss": 0.5684, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": 0.3410307466983795, | |
| "rewards/margins": 0.33058807253837585, | |
| "rewards/rejected": 0.010442652739584446, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 19.31505584716797, | |
| "learning_rate": 0.00019680511182108628, | |
| "logits/chosen": -0.8340643644332886, | |
| "logits/rejected": -0.8946108222007751, | |
| "logps/chosen": -280.0050964355469, | |
| "logps/rejected": -182.23654174804688, | |
| "loss": 0.4948, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.5975265502929688, | |
| "rewards/margins": 0.6292851567268372, | |
| "rewards/rejected": -0.03175865486264229, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5028571428571429, | |
| "grad_norm": 38.077674865722656, | |
| "learning_rate": 0.00019424920127795528, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -242.5420379638672, | |
| "logps/rejected": -165.37367248535156, | |
| "loss": 0.4782, | |
| "rewards/accuracies": 0.78125, | |
| "rewards/chosen": 0.7896119356155396, | |
| "rewards/margins": 0.6563507318496704, | |
| "rewards/rejected": 0.13326111435890198, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.5485714285714286, | |
| "grad_norm": 23.51544761657715, | |
| "learning_rate": 0.00019169329073482429, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -215.16128540039062, | |
| "logps/rejected": -155.5576629638672, | |
| "loss": 0.414, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.8685276508331299, | |
| "rewards/margins": 0.8956663012504578, | |
| "rewards/rejected": -0.027138609439134598, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.5942857142857143, | |
| "grad_norm": 31.693307876586914, | |
| "learning_rate": 0.0001891373801916933, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -284.3824462890625, | |
| "logps/rejected": -146.8328857421875, | |
| "loss": 0.4731, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.7844900488853455, | |
| "rewards/margins": 0.7386621236801147, | |
| "rewards/rejected": 0.045827917754650116, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 62.286956787109375, | |
| "learning_rate": 0.00018658146964856232, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -324.68414306640625, | |
| "logps/rejected": -158.55052185058594, | |
| "loss": 0.4529, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": 1.036560297012329, | |
| "rewards/margins": 1.0899850130081177, | |
| "rewards/rejected": -0.05342472344636917, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 34.29658508300781, | |
| "learning_rate": 0.00018402555910543132, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -266.7518615722656, | |
| "logps/rejected": -208.8983612060547, | |
| "loss": 0.3165, | |
| "rewards/accuracies": 0.8125, | |
| "rewards/chosen": 0.8459180593490601, | |
| "rewards/margins": 1.586979627609253, | |
| "rewards/rejected": -0.7410615682601929, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.7314285714285714, | |
| "grad_norm": 22.293718338012695, | |
| "learning_rate": 0.00018146964856230032, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -260.175537109375, | |
| "logps/rejected": -140.4886932373047, | |
| "loss": 0.4188, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.5953944325447083, | |
| "rewards/margins": 1.1207698583602905, | |
| "rewards/rejected": -0.525375485420227, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.7771428571428571, | |
| "grad_norm": 29.842836380004883, | |
| "learning_rate": 0.00017891373801916932, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.6659698486328, | |
| "logps/rejected": -188.1643829345703, | |
| "loss": 0.4504, | |
| "rewards/accuracies": 0.71875, | |
| "rewards/chosen": 0.519951581954956, | |
| "rewards/margins": 1.0750960111618042, | |
| "rewards/rejected": -0.5551444292068481, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.8228571428571428, | |
| "grad_norm": 22.95765495300293, | |
| "learning_rate": 0.00017635782747603835, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -251.21548461914062, | |
| "logps/rejected": -263.4191589355469, | |
| "loss": 0.2434, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.7837429642677307, | |
| "rewards/margins": 1.9659894704818726, | |
| "rewards/rejected": -1.182246446609497, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.8685714285714285, | |
| "grad_norm": 36.607723236083984, | |
| "learning_rate": 0.00017380191693290735, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -264.87799072265625, | |
| "logps/rejected": -164.2783203125, | |
| "loss": 0.3611, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": 0.374313622713089, | |
| "rewards/margins": 1.7166688442230225, | |
| "rewards/rejected": -1.3423552513122559, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 36.1689567565918, | |
| "learning_rate": 0.00017124600638977638, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -271.47064208984375, | |
| "logps/rejected": -264.6214599609375, | |
| "loss": 0.2544, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.4015069603919983, | |
| "rewards/margins": 2.162703037261963, | |
| "rewards/rejected": -1.7611961364746094, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 56.92383575439453, | |
| "learning_rate": 0.00016869009584664536, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -280.0853271484375, | |
| "logps/rejected": -271.6772766113281, | |
| "loss": 0.2838, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.2982442378997803, | |
| "rewards/margins": 2.493306875228882, | |
| "rewards/rejected": -2.1950626373291016, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 1.0057142857142858, | |
| "grad_norm": 29.9643497467041, | |
| "learning_rate": 0.0001661341853035144, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -266.5316467285156, | |
| "logps/rejected": -181.1546173095703, | |
| "loss": 0.2052, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.5022339224815369, | |
| "rewards/margins": 2.6957812309265137, | |
| "rewards/rejected": -2.193547010421753, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 1.0514285714285714, | |
| "grad_norm": 27.335933685302734, | |
| "learning_rate": 0.0001635782747603834, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -234.8162078857422, | |
| "logps/rejected": -178.24375915527344, | |
| "loss": 0.1396, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.7748296856880188, | |
| "rewards/margins": 2.9161019325256348, | |
| "rewards/rejected": -2.1412723064422607, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 1.0971428571428572, | |
| "grad_norm": 8.942264556884766, | |
| "learning_rate": 0.00016102236421725242, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -233.2910919189453, | |
| "logps/rejected": -179.97129821777344, | |
| "loss": 0.1361, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.129379153251648, | |
| "rewards/margins": 3.637566328048706, | |
| "rewards/rejected": -2.5081870555877686, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 1.1428571428571428, | |
| "grad_norm": 20.588306427001953, | |
| "learning_rate": 0.00015846645367412142, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -222.8678741455078, | |
| "logps/rejected": -157.59368896484375, | |
| "loss": 0.1604, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.250205159187317, | |
| "rewards/margins": 3.2522096633911133, | |
| "rewards/rejected": -2.002004623413086, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.1885714285714286, | |
| "grad_norm": 9.417901992797852, | |
| "learning_rate": 0.00015591054313099042, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -223.03469848632812, | |
| "logps/rejected": -164.5848388671875, | |
| "loss": 0.1148, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.3484156131744385, | |
| "rewards/margins": 3.837554454803467, | |
| "rewards/rejected": -2.4891388416290283, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 1.2342857142857142, | |
| "grad_norm": 13.386661529541016, | |
| "learning_rate": 0.00015335463258785942, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -243.3107147216797, | |
| "logps/rejected": -269.83856201171875, | |
| "loss": 0.0796, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.3695310354232788, | |
| "rewards/margins": 4.65057373046875, | |
| "rewards/rejected": -3.28104305267334, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "grad_norm": 6.952698230743408, | |
| "learning_rate": 0.00015079872204472845, | |
| "logits/chosen": -0.8169791102409363, | |
| "logits/rejected": -1.029231071472168, | |
| "logps/chosen": -263.39453125, | |
| "logps/rejected": -199.8799285888672, | |
| "loss": 0.0387, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1781620979309082, | |
| "rewards/margins": 4.557692527770996, | |
| "rewards/rejected": -3.379530429840088, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 1.3257142857142856, | |
| "grad_norm": 8.539137840270996, | |
| "learning_rate": 0.00014824281150159745, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -283.40386962890625, | |
| "logps/rejected": -248.05551147460938, | |
| "loss": 0.1084, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.1177380084991455, | |
| "rewards/margins": 4.677387237548828, | |
| "rewards/rejected": -3.559649705886841, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 1.3714285714285714, | |
| "grad_norm": 18.487228393554688, | |
| "learning_rate": 0.00014568690095846646, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -272.1732177734375, | |
| "logps/rejected": -196.45614624023438, | |
| "loss": 0.1468, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 1.3658459186553955, | |
| "rewards/margins": 4.759303092956543, | |
| "rewards/rejected": -3.3934574127197266, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.4171428571428573, | |
| "grad_norm": 31.59523582458496, | |
| "learning_rate": 0.00014313099041533546, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -289.93524169921875, | |
| "logps/rejected": -226.3542938232422, | |
| "loss": 0.1106, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.8534113168716431, | |
| "rewards/margins": 4.326902389526367, | |
| "rewards/rejected": -3.4734907150268555, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 1.4628571428571429, | |
| "grad_norm": 22.807483673095703, | |
| "learning_rate": 0.0001405750798722045, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -215.40234375, | |
| "logps/rejected": -209.85098266601562, | |
| "loss": 0.0704, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.3758734464645386, | |
| "rewards/margins": 4.276954174041748, | |
| "rewards/rejected": -2.90108060836792, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 1.5085714285714285, | |
| "grad_norm": 55.56698226928711, | |
| "learning_rate": 0.0001380191693290735, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -263.39739990234375, | |
| "logps/rejected": -191.40892028808594, | |
| "loss": 0.1477, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.2295846939086914, | |
| "rewards/margins": 4.052473068237305, | |
| "rewards/rejected": -2.8228886127471924, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 1.5542857142857143, | |
| "grad_norm": 5.410626411437988, | |
| "learning_rate": 0.0001354632587859425, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -250.39862060546875, | |
| "logps/rejected": -169.7389678955078, | |
| "loss": 0.0811, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 2.030120849609375, | |
| "rewards/margins": 4.691845417022705, | |
| "rewards/rejected": -2.661724090576172, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "grad_norm": 2.478210210800171, | |
| "learning_rate": 0.0001329073482428115, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -202.17880249023438, | |
| "logps/rejected": -235.31553649902344, | |
| "loss": 0.0554, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.5112950801849365, | |
| "rewards/margins": 5.312258720397949, | |
| "rewards/rejected": -3.800963878631592, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.6457142857142857, | |
| "grad_norm": 12.282026290893555, | |
| "learning_rate": 0.00013035143769968052, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -256.1723937988281, | |
| "logps/rejected": -190.2920684814453, | |
| "loss": 0.1364, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.9841121435165405, | |
| "rewards/margins": 4.532729625701904, | |
| "rewards/rejected": -3.5486178398132324, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 1.6914285714285713, | |
| "grad_norm": 0.7443946003913879, | |
| "learning_rate": 0.00012779552715654952, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.3223114013672, | |
| "logps/rejected": -232.26467895507812, | |
| "loss": 0.1134, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.4249160289764404, | |
| "rewards/margins": 5.272634506225586, | |
| "rewards/rejected": -3.8477184772491455, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 1.737142857142857, | |
| "grad_norm": 0.9860565662384033, | |
| "learning_rate": 0.00012523961661341855, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -280.0784606933594, | |
| "logps/rejected": -178.75973510742188, | |
| "loss": 0.1469, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 1.374025821685791, | |
| "rewards/margins": 5.189028739929199, | |
| "rewards/rejected": -3.8150031566619873, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 1.782857142857143, | |
| "grad_norm": 11.089580535888672, | |
| "learning_rate": 0.00012268370607028753, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -309.3710021972656, | |
| "logps/rejected": -288.64410400390625, | |
| "loss": 0.071, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.8088248372077942, | |
| "rewards/margins": 6.306546211242676, | |
| "rewards/rejected": -5.4977216720581055, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 1.8285714285714287, | |
| "grad_norm": 0.6896222829818726, | |
| "learning_rate": 0.00012012779552715656, | |
| "logits/chosen": -1.1236371994018555, | |
| "logits/rejected": -1.1221413612365723, | |
| "logps/chosen": -247.50177001953125, | |
| "logps/rejected": -255.9164581298828, | |
| "loss": 0.0608, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.0703375339508057, | |
| "rewards/margins": 6.536956787109375, | |
| "rewards/rejected": -5.466619491577148, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.8742857142857143, | |
| "grad_norm": 8.98990535736084, | |
| "learning_rate": 0.00011757188498402556, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -215.19583129882812, | |
| "logps/rejected": -233.2379608154297, | |
| "loss": 0.1398, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 1.2311248779296875, | |
| "rewards/margins": 5.066349506378174, | |
| "rewards/rejected": -3.8352248668670654, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "grad_norm": 16.68961524963379, | |
| "learning_rate": 0.00011501597444089457, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -312.2574768066406, | |
| "logps/rejected": -231.15431213378906, | |
| "loss": 0.11, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.1968990564346313, | |
| "rewards/margins": 5.266256332397461, | |
| "rewards/rejected": -4.069357395172119, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 1.9657142857142857, | |
| "grad_norm": 1.3579024076461792, | |
| "learning_rate": 0.00011246006389776358, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -305.48406982421875, | |
| "logps/rejected": -181.08071899414062, | |
| "loss": 0.1175, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.1569255590438843, | |
| "rewards/margins": 4.935606956481934, | |
| "rewards/rejected": -3.7786810398101807, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 2.0114285714285716, | |
| "grad_norm": 0.5345720052719116, | |
| "learning_rate": 0.0001099041533546326, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -209.57493591308594, | |
| "logps/rejected": -274.29595947265625, | |
| "loss": 0.0498, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.321765661239624, | |
| "rewards/margins": 6.712408542633057, | |
| "rewards/rejected": -5.390643119812012, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 2.057142857142857, | |
| "grad_norm": 2.9336695671081543, | |
| "learning_rate": 0.0001073482428115016, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -300.12738037109375, | |
| "logps/rejected": -247.28753662109375, | |
| "loss": 0.0481, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 2.2360713481903076, | |
| "rewards/margins": 6.878186225891113, | |
| "rewards/rejected": -4.642114639282227, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.1028571428571428, | |
| "grad_norm": 0.23953957855701447, | |
| "learning_rate": 0.00010479233226837062, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -275.3216552734375, | |
| "logps/rejected": -312.7816162109375, | |
| "loss": 0.0474, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.4861125349998474, | |
| "rewards/margins": 7.128695487976074, | |
| "rewards/rejected": -6.64258337020874, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 2.1485714285714286, | |
| "grad_norm": 1.1941184997558594, | |
| "learning_rate": 0.00010223642172523961, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.19110107421875, | |
| "logps/rejected": -240.98788452148438, | |
| "loss": 0.0466, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.1594161987304688, | |
| "rewards/margins": 8.421497344970703, | |
| "rewards/rejected": -7.262081146240234, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 2.1942857142857144, | |
| "grad_norm": 1.1224029064178467, | |
| "learning_rate": 9.968051118210863e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -256.90692138671875, | |
| "logps/rejected": -187.83580017089844, | |
| "loss": 0.114, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.700792133808136, | |
| "rewards/margins": 6.588268756866455, | |
| "rewards/rejected": -5.887476444244385, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "grad_norm": 0.10384131222963333, | |
| "learning_rate": 9.712460063897764e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -240.05372619628906, | |
| "logps/rejected": -221.209716796875, | |
| "loss": 0.0878, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.4040710926055908, | |
| "rewards/margins": 6.949329853057861, | |
| "rewards/rejected": -5.54525899887085, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 2.2857142857142856, | |
| "grad_norm": 0.09286891669034958, | |
| "learning_rate": 9.456869009584664e-05, | |
| "logits/chosen": -0.8439121246337891, | |
| "logits/rejected": -0.8557687401771545, | |
| "logps/chosen": -268.4107666015625, | |
| "logps/rejected": -287.407958984375, | |
| "loss": 0.0051, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.4993197917938232, | |
| "rewards/margins": 8.422232627868652, | |
| "rewards/rejected": -6.922913551330566, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.3314285714285714, | |
| "grad_norm": 0.9010165929794312, | |
| "learning_rate": 9.201277955271566e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -270.5591125488281, | |
| "logps/rejected": -280.8585510253906, | |
| "loss": 0.0576, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.2924140691757202, | |
| "rewards/margins": 9.145613670349121, | |
| "rewards/rejected": -7.853200435638428, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 2.3771428571428572, | |
| "grad_norm": 2.572065591812134, | |
| "learning_rate": 8.945686900958466e-05, | |
| "logits/chosen": -0.8399416208267212, | |
| "logits/rejected": -0.9658093452453613, | |
| "logps/chosen": -277.0357666015625, | |
| "logps/rejected": -281.85369873046875, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.1569018363952637, | |
| "rewards/margins": 8.929794311523438, | |
| "rewards/rejected": -7.772891998291016, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 2.422857142857143, | |
| "grad_norm": 0.09240057319402695, | |
| "learning_rate": 8.690095846645368e-05, | |
| "logits/chosen": -0.9032736420631409, | |
| "logits/rejected": -1.0178804397583008, | |
| "logps/chosen": -296.9342041015625, | |
| "logps/rejected": -282.25433349609375, | |
| "loss": 0.0024, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.5577135682106018, | |
| "rewards/margins": 8.976801872253418, | |
| "rewards/rejected": -8.419088363647461, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.4685714285714284, | |
| "grad_norm": 0.6668811440467834, | |
| "learning_rate": 8.434504792332268e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -226.7338104248047, | |
| "logps/rejected": -203.0109100341797, | |
| "loss": 0.066, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.8103513717651367, | |
| "rewards/margins": 8.133030891418457, | |
| "rewards/rejected": -7.32267951965332, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 2.5142857142857142, | |
| "grad_norm": 0.0593394860625267, | |
| "learning_rate": 8.17891373801917e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -303.2545471191406, | |
| "logps/rejected": -261.12567138671875, | |
| "loss": 0.0247, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.732958436012268, | |
| "rewards/margins": 10.343001365661621, | |
| "rewards/rejected": -8.610042572021484, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "grad_norm": 2.342472553253174, | |
| "learning_rate": 7.923322683706071e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -211.95657348632812, | |
| "logps/rejected": -243.17343139648438, | |
| "loss": 0.1104, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.8941321969032288, | |
| "rewards/margins": 7.801623344421387, | |
| "rewards/rejected": -6.907491207122803, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 2.605714285714286, | |
| "grad_norm": 1.0987083911895752, | |
| "learning_rate": 7.667731629392971e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -309.615966796875, | |
| "logps/rejected": -240.02651977539062, | |
| "loss": 0.0247, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.5686383247375488, | |
| "rewards/margins": 8.230088233947754, | |
| "rewards/rejected": -6.661448955535889, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 2.6514285714285712, | |
| "grad_norm": 0.5238648653030396, | |
| "learning_rate": 7.412140575079873e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.031494140625, | |
| "logps/rejected": -187.8708953857422, | |
| "loss": 0.0689, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.1489320993423462, | |
| "rewards/margins": 8.070334434509277, | |
| "rewards/rejected": -6.9214019775390625, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 2.697142857142857, | |
| "grad_norm": 0.6709648966789246, | |
| "learning_rate": 7.156549520766773e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -319.14190673828125, | |
| "logps/rejected": -232.24835205078125, | |
| "loss": 0.0232, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.610769510269165, | |
| "rewards/margins": 8.165452003479004, | |
| "rewards/rejected": -6.554682731628418, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 2.742857142857143, | |
| "grad_norm": 0.08443068712949753, | |
| "learning_rate": 6.900958466453674e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.30429077148438, | |
| "logps/rejected": -262.72845458984375, | |
| "loss": 0.069, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.0201094150543213, | |
| "rewards/margins": 8.405547142028809, | |
| "rewards/rejected": -7.385438442230225, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.7885714285714287, | |
| "grad_norm": 0.962510347366333, | |
| "learning_rate": 6.645367412140575e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -253.1976776123047, | |
| "logps/rejected": -213.32867431640625, | |
| "loss": 0.0908, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.9948629140853882, | |
| "rewards/margins": 8.913736343383789, | |
| "rewards/rejected": -7.918873310089111, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 2.8342857142857145, | |
| "grad_norm": 0.09702732414007187, | |
| "learning_rate": 6.389776357827476e-05, | |
| "logits/chosen": -0.919600248336792, | |
| "logits/rejected": -1.0969903469085693, | |
| "logps/chosen": -262.49688720703125, | |
| "logps/rejected": -316.8937072753906, | |
| "loss": 0.0016, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.6478074789047241, | |
| "rewards/margins": 10.203094482421875, | |
| "rewards/rejected": -8.55528736114502, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "grad_norm": 1.1994304656982422, | |
| "learning_rate": 6.134185303514376e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -232.12115478515625, | |
| "logps/rejected": -297.3643798828125, | |
| "loss": 0.0235, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.6859785318374634, | |
| "rewards/margins": 10.150031089782715, | |
| "rewards/rejected": -9.4640531539917, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 2.9257142857142857, | |
| "grad_norm": 0.09092514961957932, | |
| "learning_rate": 5.878594249201278e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -253.23870849609375, | |
| "logps/rejected": -259.6832580566406, | |
| "loss": 0.0227, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.6735504865646362, | |
| "rewards/margins": 9.920062065124512, | |
| "rewards/rejected": -9.246511459350586, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 2.9714285714285715, | |
| "grad_norm": 0.5308630466461182, | |
| "learning_rate": 5.623003194888179e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -197.1396026611328, | |
| "logps/rejected": -211.8688507080078, | |
| "loss": 0.1108, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 1.3177775144577026, | |
| "rewards/margins": 8.077342987060547, | |
| "rewards/rejected": -6.759566307067871, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.0171428571428573, | |
| "grad_norm": 0.019223162904381752, | |
| "learning_rate": 5.36741214057508e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -236.93826293945312, | |
| "logps/rejected": -194.27305603027344, | |
| "loss": 0.0887, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 1.6951510906219482, | |
| "rewards/margins": 8.964152336120605, | |
| "rewards/rejected": -7.269002437591553, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 3.0628571428571427, | |
| "grad_norm": 0.024787016212940216, | |
| "learning_rate": 5.1118210862619806e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -200.06399536132812, | |
| "logps/rejected": -265.58721923828125, | |
| "loss": 0.0869, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": 0.28237906098365784, | |
| "rewards/margins": 9.305129051208496, | |
| "rewards/rejected": -9.022750854492188, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 3.1085714285714285, | |
| "grad_norm": 0.02963140606880188, | |
| "learning_rate": 4.856230031948882e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -273.082275390625, | |
| "logps/rejected": -252.20896911621094, | |
| "loss": 0.0218, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.426784634590149, | |
| "rewards/margins": 10.334057807922363, | |
| "rewards/rejected": -8.90727424621582, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 3.1542857142857144, | |
| "grad_norm": 0.02248663455247879, | |
| "learning_rate": 4.600638977635783e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -226.61148071289062, | |
| "logps/rejected": -308.6155700683594, | |
| "loss": 0.0434, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": -0.20196402072906494, | |
| "rewards/margins": 11.114026069641113, | |
| "rewards/rejected": -11.315988540649414, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "grad_norm": 0.1719914972782135, | |
| "learning_rate": 4.345047923322684e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -273.6976013183594, | |
| "logps/rejected": -253.73367309570312, | |
| "loss": 0.0438, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.6315146684646606, | |
| "rewards/margins": 11.08044147491455, | |
| "rewards/rejected": -10.448925018310547, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.2457142857142856, | |
| "grad_norm": 0.07747913151979446, | |
| "learning_rate": 4.089456869009585e-05, | |
| "logits/chosen": -1.1313278675079346, | |
| "logits/rejected": -1.0815073251724243, | |
| "logps/chosen": -281.3612060546875, | |
| "logps/rejected": -231.41818237304688, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.41760969161987305, | |
| "rewards/margins": 10.470647811889648, | |
| "rewards/rejected": -10.053038597106934, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 3.2914285714285714, | |
| "grad_norm": 0.019036294892430305, | |
| "learning_rate": 3.8338658146964856e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -256.3636169433594, | |
| "logps/rejected": -321.89471435546875, | |
| "loss": 0.0219, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.9431450366973877, | |
| "rewards/margins": 11.828181266784668, | |
| "rewards/rejected": -10.88503646850586, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 3.337142857142857, | |
| "grad_norm": 0.5823835730552673, | |
| "learning_rate": 3.5782747603833865e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -229.27243041992188, | |
| "logps/rejected": -306.8113708496094, | |
| "loss": 0.0868, | |
| "rewards/accuracies": 0.875, | |
| "rewards/chosen": -0.37009963393211365, | |
| "rewards/margins": 10.902031898498535, | |
| "rewards/rejected": -11.272132873535156, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 3.382857142857143, | |
| "grad_norm": 0.0879807099699974, | |
| "learning_rate": 3.322683706070287e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -348.1703796386719, | |
| "logps/rejected": -276.8051452636719, | |
| "loss": 0.0438, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.8444142937660217, | |
| "rewards/margins": 11.02114486694336, | |
| "rewards/rejected": -10.176729202270508, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 3.4285714285714284, | |
| "grad_norm": 0.10615295171737671, | |
| "learning_rate": 3.067092651757188e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -236.68743896484375, | |
| "logps/rejected": -297.0989990234375, | |
| "loss": 0.0435, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 0.44540417194366455, | |
| "rewards/margins": 10.856500625610352, | |
| "rewards/rejected": -10.411096572875977, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.474285714285714, | |
| "grad_norm": 0.1389647126197815, | |
| "learning_rate": 2.8115015974440894e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -233.98634338378906, | |
| "logps/rejected": -225.74227905273438, | |
| "loss": 0.1087, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.649261474609375, | |
| "rewards/margins": 8.719908714294434, | |
| "rewards/rejected": -8.070647239685059, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "grad_norm": 0.08364980667829514, | |
| "learning_rate": 2.5559105431309903e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -247.9769287109375, | |
| "logps/rejected": -222.0983123779297, | |
| "loss": 0.0434, | |
| "rewards/accuracies": 0.9375, | |
| "rewards/chosen": 1.2255136966705322, | |
| "rewards/margins": 10.814766883850098, | |
| "rewards/rejected": -9.589252471923828, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 3.565714285714286, | |
| "grad_norm": 0.024187587201595306, | |
| "learning_rate": 2.3003194888178915e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -230.9536590576172, | |
| "logps/rejected": -308.1141662597656, | |
| "loss": 0.0651, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 0.6026107668876648, | |
| "rewards/margins": 10.963134765625, | |
| "rewards/rejected": -10.36052417755127, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 3.611428571428571, | |
| "grad_norm": 0.041134633123874664, | |
| "learning_rate": 2.0447284345047924e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -278.3012390136719, | |
| "logps/rejected": -295.2455139160156, | |
| "loss": 0.065, | |
| "rewards/accuracies": 0.90625, | |
| "rewards/chosen": 1.5797958374023438, | |
| "rewards/margins": 11.178966522216797, | |
| "rewards/rejected": -9.599170684814453, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 3.657142857142857, | |
| "grad_norm": 0.018303895369172096, | |
| "learning_rate": 1.7891373801916932e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -296.68463134765625, | |
| "logps/rejected": -361.7939453125, | |
| "loss": 0.0217, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 1.0021604299545288, | |
| "rewards/margins": 12.465021133422852, | |
| "rewards/rejected": -11.462860107421875, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.702857142857143, | |
| "grad_norm": 1.3282262086868286, | |
| "learning_rate": 1.533546325878594e-05, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -227.36346435546875, | |
| "logps/rejected": -220.03289794921875, | |
| "loss": 0.1087, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 0.9805165529251099, | |
| "rewards/margins": 9.542567253112793, | |
| "rewards/rejected": -8.562050819396973, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 3.7485714285714287, | |
| "grad_norm": 0.14205706119537354, | |
| "learning_rate": 1.2779552715654951e-05, | |
| "logits/chosen": -1.0746899843215942, | |
| "logits/rejected": -1.233121395111084, | |
| "logps/chosen": -327.04486083984375, | |
| "logps/rejected": -355.2626953125, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 0.632662832736969, | |
| "rewards/margins": 11.084798812866211, | |
| "rewards/rejected": -10.452136039733887, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 3.7942857142857145, | |
| "grad_norm": 0.30354443192481995, | |
| "learning_rate": 1.0223642172523962e-05, | |
| "logits/chosen": -1.0154728889465332, | |
| "logits/rejected": -1.2137951850891113, | |
| "logps/chosen": -208.1333465576172, | |
| "logps/rejected": -226.25259399414062, | |
| "loss": 0.0002, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 1.5299694538116455, | |
| "rewards/margins": 10.347978591918945, | |
| "rewards/rejected": -8.818008422851562, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "grad_norm": 0.016442114487290382, | |
| "learning_rate": 7.66773162939297e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.98435974121094, | |
| "logps/rejected": -204.31524658203125, | |
| "loss": 0.1092, | |
| "rewards/accuracies": 0.84375, | |
| "rewards/chosen": 1.1629523038864136, | |
| "rewards/margins": 8.743239402770996, | |
| "rewards/rejected": -7.580286979675293, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 3.8857142857142857, | |
| "grad_norm": 0.1124640479683876, | |
| "learning_rate": 5.111821086261981e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -260.33941650390625, | |
| "logps/rejected": -250.80712890625, | |
| "loss": 0.022, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 2.213810682296753, | |
| "rewards/margins": 11.022459983825684, | |
| "rewards/rejected": -8.808650016784668, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.9314285714285715, | |
| "grad_norm": 0.2672106623649597, | |
| "learning_rate": 2.5559105431309904e-06, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -317.84588623046875, | |
| "logps/rejected": -330.1520080566406, | |
| "loss": 0.0218, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.6001543998718262, | |
| "rewards/margins": 11.707775115966797, | |
| "rewards/rejected": -11.107621192932129, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 3.977142857142857, | |
| "grad_norm": 0.14436927437782288, | |
| "learning_rate": 0.0, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -237.04251098632812, | |
| "logps/rejected": -338.6295471191406, | |
| "loss": 0.0219, | |
| "rewards/accuracies": 0.96875, | |
| "rewards/chosen": 0.9118407964706421, | |
| "rewards/margins": 11.192133903503418, | |
| "rewards/rejected": -10.280294418334961, | |
| "step": 348 | |
| } | |
| ], | |
| "logging_steps": 4, | |
| "max_steps": 348, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |