| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 66, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.031067961165048542, |
| "grad_norm": 4.351982116699219, |
| "learning_rate": 0.0, |
| "logits/chosen": -2.1781115531921387, |
| "logits/rejected": -2.0086305141448975, |
| "logps/chosen": -0.42091915011405945, |
| "logps/rejected": -0.3680277466773987, |
| "loss": 1.3928, |
| "rewards/accuracies": 0.109375, |
| "rewards/chosen": -0.8418383598327637, |
| "rewards/margins": -0.10578285157680511, |
| "rewards/rejected": -0.7360554933547974, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.062135922330097085, |
| "grad_norm": 4.56733512878418, |
| "learning_rate": 1.4285714285714285e-07, |
| "logits/chosen": -2.1738781929016113, |
| "logits/rejected": -1.9470911026000977, |
| "logps/chosen": -0.435973584651947, |
| "logps/rejected": -0.3837018609046936, |
| "loss": 1.3924, |
| "rewards/accuracies": 0.203125, |
| "rewards/chosen": -0.871947169303894, |
| "rewards/margins": -0.10454347729682922, |
| "rewards/rejected": -0.7674036026000977, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.09320388349514563, |
| "grad_norm": 5.2480902671813965, |
| "learning_rate": 2.857142857142857e-07, |
| "logits/chosen": -1.8973408937454224, |
| "logits/rejected": -2.1369576454162598, |
| "logps/chosen": -0.44170576333999634, |
| "logps/rejected": -0.37628883123397827, |
| "loss": 1.4124, |
| "rewards/accuracies": 0.15625, |
| "rewards/chosen": -0.8834115862846375, |
| "rewards/margins": -0.13083389401435852, |
| "rewards/rejected": -0.7525776624679565, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.12427184466019417, |
| "grad_norm": 4.116491317749023, |
| "learning_rate": 4.285714285714285e-07, |
| "logits/chosen": -1.882596731185913, |
| "logits/rejected": -2.0142886638641357, |
| "logps/chosen": -0.4188133180141449, |
| "logps/rejected": -0.36549994349479675, |
| "loss": 1.3944, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.8376266360282898, |
| "rewards/margins": -0.10662679374217987, |
| "rewards/rejected": -0.7309998273849487, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.1553398058252427, |
| "grad_norm": 4.484996318817139, |
| "learning_rate": 5.714285714285714e-07, |
| "logits/chosen": -1.6435747146606445, |
| "logits/rejected": -1.9209181070327759, |
| "logps/chosen": -0.41533803939819336, |
| "logps/rejected": -0.35695987939834595, |
| "loss": 1.403, |
| "rewards/accuracies": 0.140625, |
| "rewards/chosen": -0.8306760787963867, |
| "rewards/margins": -0.11675624549388885, |
| "rewards/rejected": -0.7139198184013367, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.18640776699029127, |
| "grad_norm": 4.051513671875, |
| "learning_rate": 7.142857142857143e-07, |
| "logits/chosen": -1.8809208869934082, |
| "logits/rejected": -2.0783591270446777, |
| "logps/chosen": -0.41764718294143677, |
| "logps/rejected": -0.36657023429870605, |
| "loss": 1.3926, |
| "rewards/accuracies": 0.28125, |
| "rewards/chosen": -0.8352943658828735, |
| "rewards/margins": -0.10215392708778381, |
| "rewards/rejected": -0.7331404685974121, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.2174757281553398, |
| "grad_norm": 4.2792534828186035, |
| "learning_rate": 8.57142857142857e-07, |
| "logits/chosen": -1.6469590663909912, |
| "logits/rejected": -2.0544652938842773, |
| "logps/chosen": -0.4287745952606201, |
| "logps/rejected": -0.38292643427848816, |
| "loss": 1.3821, |
| "rewards/accuracies": 0.140625, |
| "rewards/chosen": -0.8575491905212402, |
| "rewards/margins": -0.09169630706310272, |
| "rewards/rejected": -0.7658529281616211, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.24854368932038834, |
| "grad_norm": 3.5870370864868164, |
| "learning_rate": 1e-06, |
| "logits/chosen": -2.1423072814941406, |
| "logits/rejected": -2.0690231323242188, |
| "logps/chosen": -0.4060017466545105, |
| "logps/rejected": -0.3687712550163269, |
| "loss": 1.3692, |
| "rewards/accuracies": 0.234375, |
| "rewards/chosen": -0.8120035529136658, |
| "rewards/margins": -0.07446099817752838, |
| "rewards/rejected": -0.7375425100326538, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.2796116504854369, |
| "grad_norm": 4.142773151397705, |
| "learning_rate": 9.830508474576272e-07, |
| "logits/chosen": -1.6917753219604492, |
| "logits/rejected": -2.048977851867676, |
| "logps/chosen": -0.4064311385154724, |
| "logps/rejected": -0.35661497712135315, |
| "loss": 1.388, |
| "rewards/accuracies": 0.140625, |
| "rewards/chosen": -0.8128622174263, |
| "rewards/margins": -0.09963233023881912, |
| "rewards/rejected": -0.7132298946380615, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.3106796116504854, |
| "grad_norm": 4.133231163024902, |
| "learning_rate": 9.661016949152542e-07, |
| "logits/chosen": -1.9124929904937744, |
| "logits/rejected": -2.001279354095459, |
| "logps/chosen": -0.40286165475845337, |
| "logps/rejected": -0.37157756090164185, |
| "loss": 1.3602, |
| "rewards/accuracies": 0.1875, |
| "rewards/chosen": -0.8057233095169067, |
| "rewards/margins": -0.06256821006536484, |
| "rewards/rejected": -0.7431551218032837, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.341747572815534, |
| "grad_norm": 3.5376193523406982, |
| "learning_rate": 9.491525423728813e-07, |
| "logits/chosen": -1.574399709701538, |
| "logits/rejected": -1.9749456644058228, |
| "logps/chosen": -0.38429805636405945, |
| "logps/rejected": -0.34970927238464355, |
| "loss": 1.3652, |
| "rewards/accuracies": 0.234375, |
| "rewards/chosen": -0.7685961127281189, |
| "rewards/margins": -0.06917756795883179, |
| "rewards/rejected": -0.6994185447692871, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.37281553398058254, |
| "grad_norm": 3.29012393951416, |
| "learning_rate": 9.322033898305083e-07, |
| "logits/chosen": -2.0206141471862793, |
| "logits/rejected": -1.8409574031829834, |
| "logps/chosen": -0.3829426169395447, |
| "logps/rejected": -0.3571726381778717, |
| "loss": 1.3518, |
| "rewards/accuracies": 0.265625, |
| "rewards/chosen": -0.7658852338790894, |
| "rewards/margins": -0.05153997987508774, |
| "rewards/rejected": -0.7143452763557434, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.40388349514563104, |
| "grad_norm": 2.9538028240203857, |
| "learning_rate": 9.152542372881356e-07, |
| "logits/chosen": -1.8269423246383667, |
| "logits/rejected": -1.921961784362793, |
| "logps/chosen": -0.378287136554718, |
| "logps/rejected": -0.3551200032234192, |
| "loss": 1.348, |
| "rewards/accuracies": 0.28125, |
| "rewards/chosen": -0.756574273109436, |
| "rewards/margins": -0.04633423313498497, |
| "rewards/rejected": -0.7102400064468384, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.4349514563106796, |
| "grad_norm": 2.9866271018981934, |
| "learning_rate": 8.983050847457627e-07, |
| "logits/chosen": -1.4229835271835327, |
| "logits/rejected": -2.0510945320129395, |
| "logps/chosen": -0.353898286819458, |
| "logps/rejected": -0.3331609070301056, |
| "loss": 1.3444, |
| "rewards/accuracies": 0.265625, |
| "rewards/chosen": -0.707796573638916, |
| "rewards/margins": -0.04147477447986603, |
| "rewards/rejected": -0.6663218140602112, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.46601941747572817, |
| "grad_norm": 2.7256155014038086, |
| "learning_rate": 8.813559322033897e-07, |
| "logits/chosen": -1.509905457496643, |
| "logits/rejected": -1.967405080795288, |
| "logps/chosen": -0.3514465391635895, |
| "logps/rejected": -0.3335171341896057, |
| "loss": 1.3402, |
| "rewards/accuracies": 0.359375, |
| "rewards/chosen": -0.702893078327179, |
| "rewards/margins": -0.035858701914548874, |
| "rewards/rejected": -0.6670343279838562, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4970873786407767, |
| "grad_norm": 2.9903502464294434, |
| "learning_rate": 8.64406779661017e-07, |
| "logits/chosen": -1.8509142398834229, |
| "logits/rejected": -1.8535449504852295, |
| "logps/chosen": -0.3595220148563385, |
| "logps/rejected": -0.3445945084095001, |
| "loss": 1.3357, |
| "rewards/accuracies": 0.390625, |
| "rewards/chosen": -0.7190440893173218, |
| "rewards/margins": -0.029855070635676384, |
| "rewards/rejected": -0.6891890168190002, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.5281553398058253, |
| "grad_norm": 3.1175427436828613, |
| "learning_rate": 8.47457627118644e-07, |
| "logits/chosen": -1.441454291343689, |
| "logits/rejected": -1.8639237880706787, |
| "logps/chosen": -0.3568180799484253, |
| "logps/rejected": -0.3417421281337738, |
| "loss": 1.336, |
| "rewards/accuracies": 0.375, |
| "rewards/chosen": -0.7136362195014954, |
| "rewards/margins": -0.03015192598104477, |
| "rewards/rejected": -0.6834842562675476, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5592233009708738, |
| "grad_norm": 3.23822283744812, |
| "learning_rate": 8.305084745762712e-07, |
| "logits/chosen": -2.449878692626953, |
| "logits/rejected": -1.8969470262527466, |
| "logps/chosen": -0.3514954149723053, |
| "logps/rejected": -0.3440922498703003, |
| "loss": 1.3248, |
| "rewards/accuracies": 0.40625, |
| "rewards/chosen": -0.7029908895492554, |
| "rewards/margins": -0.014806347899138927, |
| "rewards/rejected": -0.6881846189498901, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5902912621359223, |
| "grad_norm": 3.0648770332336426, |
| "learning_rate": 8.135593220338983e-07, |
| "logits/chosen": -0.8830795288085938, |
| "logits/rejected": -1.7158455848693848, |
| "logps/chosen": -0.360805481672287, |
| "logps/rejected": -0.3484087288379669, |
| "loss": 1.3321, |
| "rewards/accuracies": 0.34375, |
| "rewards/chosen": -0.7216110229492188, |
| "rewards/margins": -0.024793537333607674, |
| "rewards/rejected": -0.6968174576759338, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.6213592233009708, |
| "grad_norm": 3.0875160694122314, |
| "learning_rate": 7.966101694915253e-07, |
| "logits/chosen": -1.4594852924346924, |
| "logits/rejected": -1.7742846012115479, |
| "logps/chosen": -0.3592742085456848, |
| "logps/rejected": -0.35010093450546265, |
| "loss": 1.3272, |
| "rewards/accuracies": 0.421875, |
| "rewards/chosen": -0.7185484170913696, |
| "rewards/margins": -0.018346579745411873, |
| "rewards/rejected": -0.7002018690109253, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.6524271844660194, |
| "grad_norm": 2.981257438659668, |
| "learning_rate": 7.796610169491526e-07, |
| "logits/chosen": -1.7105916738510132, |
| "logits/rejected": -1.9463882446289062, |
| "logps/chosen": -0.349058598279953, |
| "logps/rejected": -0.3355495035648346, |
| "loss": 1.3336, |
| "rewards/accuracies": 0.34375, |
| "rewards/chosen": -0.6981171369552612, |
| "rewards/margins": -0.027018137276172638, |
| "rewards/rejected": -0.6710990071296692, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.683495145631068, |
| "grad_norm": 2.7408576011657715, |
| "learning_rate": 7.627118644067796e-07, |
| "logits/chosen": -1.390715479850769, |
| "logits/rejected": -1.7966665029525757, |
| "logps/chosen": -0.35181212425231934, |
| "logps/rejected": -0.3509633243083954, |
| "loss": 1.3151, |
| "rewards/accuracies": 0.484375, |
| "rewards/chosen": -0.7036243677139282, |
| "rewards/margins": -0.0016976958140730858, |
| "rewards/rejected": -0.7019267082214355, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.7145631067961165, |
| "grad_norm": 2.6456706523895264, |
| "learning_rate": 7.457627118644067e-07, |
| "logits/chosen": -1.0340750217437744, |
| "logits/rejected": -1.785604476928711, |
| "logps/chosen": -0.32947736978530884, |
| "logps/rejected": -0.3392854332923889, |
| "loss": 1.2995, |
| "rewards/accuracies": 0.546875, |
| "rewards/chosen": -0.6589547395706177, |
| "rewards/margins": 0.019616127014160156, |
| "rewards/rejected": -0.6785709261894226, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.7456310679611651, |
| "grad_norm": 2.7873260974884033, |
| "learning_rate": 7.288135593220338e-07, |
| "logits/chosen": -1.7551474571228027, |
| "logits/rejected": -1.686167597770691, |
| "logps/chosen": -0.3260516822338104, |
| "logps/rejected": -0.34417179226875305, |
| "loss": 1.2874, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.6521033644676208, |
| "rewards/margins": 0.036240264773368835, |
| "rewards/rejected": -0.6883435845375061, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7766990291262136, |
| "grad_norm": 2.6608099937438965, |
| "learning_rate": 7.11864406779661e-07, |
| "logits/chosen": -1.7619459629058838, |
| "logits/rejected": -1.6662724018096924, |
| "logps/chosen": -0.3297329843044281, |
| "logps/rejected": -0.34416788816452026, |
| "loss": 1.2928, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.6594659090042114, |
| "rewards/margins": 0.028869757428765297, |
| "rewards/rejected": -0.6883357763290405, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.8077669902912621, |
| "grad_norm": 2.6482815742492676, |
| "learning_rate": 6.949152542372881e-07, |
| "logits/chosen": -1.59649658203125, |
| "logits/rejected": -1.6123666763305664, |
| "logps/chosen": -0.3259614109992981, |
| "logps/rejected": -0.3369159400463104, |
| "loss": 1.2976, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6519228219985962, |
| "rewards/margins": 0.021909058094024658, |
| "rewards/rejected": -0.6738318800926208, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.8388349514563107, |
| "grad_norm": 3.29089617729187, |
| "learning_rate": 6.779661016949152e-07, |
| "logits/chosen": -1.3679349422454834, |
| "logits/rejected": -1.7579234838485718, |
| "logps/chosen": -0.323432981967926, |
| "logps/rejected": -0.33978521823883057, |
| "loss": 1.2901, |
| "rewards/accuracies": 0.65625, |
| "rewards/chosen": -0.6468659043312073, |
| "rewards/margins": 0.032704513520002365, |
| "rewards/rejected": -0.6795704364776611, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.8699029126213592, |
| "grad_norm": 2.870760917663574, |
| "learning_rate": 6.610169491525423e-07, |
| "logits/chosen": -2.230008363723755, |
| "logits/rejected": -1.7593262195587158, |
| "logps/chosen": -0.318013995885849, |
| "logps/rejected": -0.3359982967376709, |
| "loss": 1.2879, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.6360279321670532, |
| "rewards/margins": 0.03596857190132141, |
| "rewards/rejected": -0.6719965934753418, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.9009708737864077, |
| "grad_norm": 3.2132599353790283, |
| "learning_rate": 6.440677966101694e-07, |
| "logits/chosen": -1.602566123008728, |
| "logits/rejected": -1.7038823366165161, |
| "logps/chosen": -0.3213762044906616, |
| "logps/rejected": -0.34595417976379395, |
| "loss": 1.2782, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.6427524089813232, |
| "rewards/margins": 0.04915595054626465, |
| "rewards/rejected": -0.6919083595275879, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.9320388349514563, |
| "grad_norm": 2.9022674560546875, |
| "learning_rate": 6.271186440677966e-07, |
| "logits/chosen": -2.0426878929138184, |
| "logits/rejected": -1.8543050289154053, |
| "logps/chosen": -0.31085798144340515, |
| "logps/rejected": -0.3321978747844696, |
| "loss": 1.283, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.6217159628868103, |
| "rewards/margins": 0.04267987981438637, |
| "rewards/rejected": -0.664395809173584, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.9631067961165048, |
| "grad_norm": 3.107703924179077, |
| "learning_rate": 6.101694915254237e-07, |
| "logits/chosen": -1.1401560306549072, |
| "logits/rejected": -1.7388765811920166, |
| "logps/chosen": -0.31906136870384216, |
| "logps/rejected": -0.35285234451293945, |
| "loss": 1.2653, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.6381226778030396, |
| "rewards/margins": 0.06758201867341995, |
| "rewards/rejected": -0.7057047486305237, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.9941747572815534, |
| "grad_norm": 2.563669443130493, |
| "learning_rate": 5.932203389830508e-07, |
| "logits/chosen": -1.3182499408721924, |
| "logits/rejected": -1.7714648246765137, |
| "logps/chosen": -0.31395667791366577, |
| "logps/rejected": -0.3352799117565155, |
| "loss": 1.283, |
| "rewards/accuracies": 0.640625, |
| "rewards/chosen": -0.6279132962226868, |
| "rewards/margins": 0.042646490037441254, |
| "rewards/rejected": -0.670559823513031, |
| "step": 32 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8014712929725647, |
| "learning_rate": 5.76271186440678e-07, |
| "logits/chosen": -1.282576322555542, |
| "logits/rejected": -1.4453270435333252, |
| "logps/chosen": -0.3373573422431946, |
| "logps/rejected": -0.3649425506591797, |
| "loss": 0.2388, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6747146844863892, |
| "rewards/margins": 0.05517040938138962, |
| "rewards/rejected": -0.7298850417137146, |
| "step": 33 |
| }, |
| { |
| "epoch": 1.0310679611650486, |
| "grad_norm": 2.6547462940216064, |
| "learning_rate": 5.59322033898305e-07, |
| "logits/chosen": -1.7090933322906494, |
| "logits/rejected": -1.6342582702636719, |
| "logps/chosen": -0.3321191072463989, |
| "logps/rejected": -0.35813525319099426, |
| "loss": 1.2762, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.6642382144927979, |
| "rewards/margins": 0.05203229561448097, |
| "rewards/rejected": -0.7162705063819885, |
| "step": 34 |
| }, |
| { |
| "epoch": 1.062135922330097, |
| "grad_norm": 2.929637908935547, |
| "learning_rate": 5.423728813559322e-07, |
| "logits/chosen": -1.289225697517395, |
| "logits/rejected": -1.8013920783996582, |
| "logps/chosen": -0.3155551552772522, |
| "logps/rejected": -0.3497050404548645, |
| "loss": 1.2647, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6311103105545044, |
| "rewards/margins": 0.06829972565174103, |
| "rewards/rejected": -0.6994100213050842, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0932038834951456, |
| "grad_norm": 2.994192600250244, |
| "learning_rate": 5.254237288135593e-07, |
| "logits/chosen": -1.562676191329956, |
| "logits/rejected": -1.5841116905212402, |
| "logps/chosen": -0.3231390118598938, |
| "logps/rejected": -0.35967499017715454, |
| "loss": 1.2613, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.6462780237197876, |
| "rewards/margins": 0.07307194173336029, |
| "rewards/rejected": -0.7193499207496643, |
| "step": 36 |
| }, |
| { |
| "epoch": 1.1242718446601943, |
| "grad_norm": 3.348798990249634, |
| "learning_rate": 5.084745762711864e-07, |
| "logits/chosen": -1.6281405687332153, |
| "logits/rejected": -1.634437084197998, |
| "logps/chosen": -0.3220047950744629, |
| "logps/rejected": -0.35870927572250366, |
| "loss": 1.2613, |
| "rewards/accuracies": 0.71875, |
| "rewards/chosen": -0.6440095901489258, |
| "rewards/margins": 0.07340894639492035, |
| "rewards/rejected": -0.7174185514450073, |
| "step": 37 |
| }, |
| { |
| "epoch": 1.1553398058252426, |
| "grad_norm": 3.086836576461792, |
| "learning_rate": 4.915254237288136e-07, |
| "logits/chosen": -1.4916963577270508, |
| "logits/rejected": -1.7688562870025635, |
| "logps/chosen": -0.30526435375213623, |
| "logps/rejected": -0.353664755821228, |
| "loss": 1.245, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.6105287075042725, |
| "rewards/margins": 0.0968008041381836, |
| "rewards/rejected": -0.707329511642456, |
| "step": 38 |
| }, |
| { |
| "epoch": 1.1864077669902913, |
| "grad_norm": 2.953174352645874, |
| "learning_rate": 4.7457627118644066e-07, |
| "logits/chosen": -2.1249518394470215, |
| "logits/rejected": -1.8521509170532227, |
| "logps/chosen": -0.3190627992153168, |
| "logps/rejected": -0.35821837186813354, |
| "loss": 1.2579, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.6381255984306335, |
| "rewards/margins": 0.07831110060214996, |
| "rewards/rejected": -0.7164367437362671, |
| "step": 39 |
| }, |
| { |
| "epoch": 1.2174757281553399, |
| "grad_norm": 2.8506457805633545, |
| "learning_rate": 4.576271186440678e-07, |
| "logits/chosen": -1.2953447103500366, |
| "logits/rejected": -1.6804742813110352, |
| "logps/chosen": -0.329822838306427, |
| "logps/rejected": -0.3734983801841736, |
| "loss": 1.2516, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6596456170082092, |
| "rewards/margins": 0.08735115081071854, |
| "rewards/rejected": -0.7469968199729919, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.2485436893203883, |
| "grad_norm": 2.7674953937530518, |
| "learning_rate": 4.4067796610169486e-07, |
| "logits/chosen": -2.0186150074005127, |
| "logits/rejected": -1.836294174194336, |
| "logps/chosen": -0.315221905708313, |
| "logps/rejected": -0.34855592250823975, |
| "loss": 1.266, |
| "rewards/accuracies": 0.703125, |
| "rewards/chosen": -0.630443811416626, |
| "rewards/margins": 0.0666680634021759, |
| "rewards/rejected": -0.6971118450164795, |
| "step": 41 |
| }, |
| { |
| "epoch": 1.279611650485437, |
| "grad_norm": 2.549076795578003, |
| "learning_rate": 4.23728813559322e-07, |
| "logits/chosen": -1.6427559852600098, |
| "logits/rejected": -1.9293004274368286, |
| "logps/chosen": -0.31294625997543335, |
| "logps/rejected": -0.34532809257507324, |
| "loss": 1.2677, |
| "rewards/accuracies": 0.671875, |
| "rewards/chosen": -0.6258925199508667, |
| "rewards/margins": 0.0647636353969574, |
| "rewards/rejected": -0.6906561255455017, |
| "step": 42 |
| }, |
| { |
| "epoch": 1.3106796116504853, |
| "grad_norm": 3.1989340782165527, |
| "learning_rate": 4.0677966101694916e-07, |
| "logits/chosen": -0.44606664776802063, |
| "logits/rejected": -1.8360792398452759, |
| "logps/chosen": -0.31408655643463135, |
| "logps/rejected": -0.3622249960899353, |
| "loss": 1.2452, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.6281731128692627, |
| "rewards/margins": 0.09627692401409149, |
| "rewards/rejected": -0.7244499921798706, |
| "step": 43 |
| }, |
| { |
| "epoch": 1.341747572815534, |
| "grad_norm": 3.1109275817871094, |
| "learning_rate": 3.898305084745763e-07, |
| "logits/chosen": -1.2755181789398193, |
| "logits/rejected": -1.837625503540039, |
| "logps/chosen": -0.31347301602363586, |
| "logps/rejected": -0.36295440793037415, |
| "loss": 1.2431, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.626945972442627, |
| "rewards/margins": 0.09896278381347656, |
| "rewards/rejected": -0.7259088754653931, |
| "step": 44 |
| }, |
| { |
| "epoch": 1.3728155339805825, |
| "grad_norm": 3.2324881553649902, |
| "learning_rate": 3.7288135593220336e-07, |
| "logits/chosen": -1.336294174194336, |
| "logits/rejected": -1.8953676223754883, |
| "logps/chosen": -0.3207179307937622, |
| "logps/rejected": -0.3695950508117676, |
| "loss": 1.244, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.6414358019828796, |
| "rewards/margins": 0.09775425493717194, |
| "rewards/rejected": -0.7391901016235352, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.4038834951456312, |
| "grad_norm": 3.4072141647338867, |
| "learning_rate": 3.559322033898305e-07, |
| "logits/chosen": -1.3765511512756348, |
| "logits/rejected": -1.7508171796798706, |
| "logps/chosen": -0.3291308879852295, |
| "logps/rejected": -0.3830956518650055, |
| "loss": 1.237, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.658261775970459, |
| "rewards/margins": 0.10792950540781021, |
| "rewards/rejected": -0.766191303730011, |
| "step": 46 |
| }, |
| { |
| "epoch": 1.4349514563106796, |
| "grad_norm": 3.158661127090454, |
| "learning_rate": 3.389830508474576e-07, |
| "logits/chosen": -1.9516403675079346, |
| "logits/rejected": -1.741844654083252, |
| "logps/chosen": -0.33282211422920227, |
| "logps/rejected": -0.37869203090667725, |
| "loss": 1.2486, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6656442284584045, |
| "rewards/margins": 0.09173984825611115, |
| "rewards/rejected": -0.7573840618133545, |
| "step": 47 |
| }, |
| { |
| "epoch": 1.4660194174757282, |
| "grad_norm": 2.9642040729522705, |
| "learning_rate": 3.220338983050847e-07, |
| "logits/chosen": -1.4430270195007324, |
| "logits/rejected": -1.8442091941833496, |
| "logps/chosen": -0.3115312457084656, |
| "logps/rejected": -0.3656957149505615, |
| "loss": 1.2367, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.6230624914169312, |
| "rewards/margins": 0.10832884907722473, |
| "rewards/rejected": -0.7313913702964783, |
| "step": 48 |
| }, |
| { |
| "epoch": 1.4970873786407766, |
| "grad_norm": 3.461207628250122, |
| "learning_rate": 3.0508474576271186e-07, |
| "logits/chosen": -2.283356189727783, |
| "logits/rejected": -1.781760573387146, |
| "logps/chosen": -0.33550429344177246, |
| "logps/rejected": -0.39154571294784546, |
| "loss": 1.2353, |
| "rewards/accuracies": 0.734375, |
| "rewards/chosen": -0.6710086464881897, |
| "rewards/margins": 0.1120828241109848, |
| "rewards/rejected": -0.7830914258956909, |
| "step": 49 |
| }, |
| { |
| "epoch": 1.5281553398058252, |
| "grad_norm": 3.681100845336914, |
| "learning_rate": 2.88135593220339e-07, |
| "logits/chosen": -1.8599827289581299, |
| "logits/rejected": -1.890540599822998, |
| "logps/chosen": -0.32184410095214844, |
| "logps/rejected": -0.39129096269607544, |
| "loss": 1.2159, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.6436882019042969, |
| "rewards/margins": 0.1388937532901764, |
| "rewards/rejected": -0.7825819253921509, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.5592233009708738, |
| "grad_norm": 3.253593921661377, |
| "learning_rate": 2.711864406779661e-07, |
| "logits/chosen": -1.5855774879455566, |
| "logits/rejected": -1.7609983682632446, |
| "logps/chosen": -0.3321957588195801, |
| "logps/rejected": -0.3847588002681732, |
| "loss": 1.2399, |
| "rewards/accuracies": 0.78125, |
| "rewards/chosen": -0.6643915176391602, |
| "rewards/margins": 0.10512607544660568, |
| "rewards/rejected": -0.7695176005363464, |
| "step": 51 |
| }, |
| { |
| "epoch": 1.5902912621359224, |
| "grad_norm": 3.7317774295806885, |
| "learning_rate": 2.542372881355932e-07, |
| "logits/chosen": -0.956875205039978, |
| "logits/rejected": -1.6611835956573486, |
| "logps/chosen": -0.3262099623680115, |
| "logps/rejected": -0.40217769145965576, |
| "loss": 1.2076, |
| "rewards/accuracies": 0.796875, |
| "rewards/chosen": -0.652419924736023, |
| "rewards/margins": 0.15193548798561096, |
| "rewards/rejected": -0.8043554425239563, |
| "step": 52 |
| }, |
| { |
| "epoch": 1.6213592233009708, |
| "grad_norm": 3.586879253387451, |
| "learning_rate": 2.3728813559322033e-07, |
| "logits/chosen": -2.1548349857330322, |
| "logits/rejected": -1.7199804782867432, |
| "logps/chosen": -0.34391993284225464, |
| "logps/rejected": -0.40578746795654297, |
| "loss": 1.2262, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.6878398656845093, |
| "rewards/margins": 0.12373502552509308, |
| "rewards/rejected": -0.8115749359130859, |
| "step": 53 |
| }, |
| { |
| "epoch": 1.6524271844660194, |
| "grad_norm": 3.669318437576294, |
| "learning_rate": 2.2033898305084743e-07, |
| "logits/chosen": -2.271226406097412, |
| "logits/rejected": -2.0481715202331543, |
| "logps/chosen": -0.3331441879272461, |
| "logps/rejected": -0.38672688603401184, |
| "loss": 1.2378, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.6662883758544922, |
| "rewards/margins": 0.10716544091701508, |
| "rewards/rejected": -0.7734538316726685, |
| "step": 54 |
| }, |
| { |
| "epoch": 1.6834951456310678, |
| "grad_norm": 3.6836650371551514, |
| "learning_rate": 2.0338983050847458e-07, |
| "logits/chosen": -2.120683193206787, |
| "logits/rejected": -1.630366563796997, |
| "logps/chosen": -0.3574049174785614, |
| "logps/rejected": -0.43239909410476685, |
| "loss": 1.2089, |
| "rewards/accuracies": 0.828125, |
| "rewards/chosen": -0.714809775352478, |
| "rewards/margins": 0.14998838305473328, |
| "rewards/rejected": -0.8647981882095337, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.7145631067961165, |
| "grad_norm": 3.2008886337280273, |
| "learning_rate": 1.8644067796610168e-07, |
| "logits/chosen": -1.9383658170700073, |
| "logits/rejected": -1.9295272827148438, |
| "logps/chosen": -0.3346194922924042, |
| "logps/rejected": -0.384814590215683, |
| "loss": 1.2424, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6692389845848083, |
| "rewards/margins": 0.10039019584655762, |
| "rewards/rejected": -0.769629180431366, |
| "step": 56 |
| }, |
| { |
| "epoch": 1.745631067961165, |
| "grad_norm": 4.206936359405518, |
| "learning_rate": 1.694915254237288e-07, |
| "logits/chosen": -1.215062141418457, |
| "logits/rejected": -1.7683817148208618, |
| "logps/chosen": -0.34487807750701904, |
| "logps/rejected": -0.43873488903045654, |
| "loss": 1.1825, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.6897561550140381, |
| "rewards/margins": 0.1877136081457138, |
| "rewards/rejected": -0.8774697780609131, |
| "step": 57 |
| }, |
| { |
| "epoch": 1.7766990291262137, |
| "grad_norm": 3.9559454917907715, |
| "learning_rate": 1.5254237288135593e-07, |
| "logits/chosen": -1.3999274969100952, |
| "logits/rejected": -1.713794231414795, |
| "logps/chosen": -0.37690550088882446, |
| "logps/rejected": -0.4559113383293152, |
| "loss": 1.2033, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7538109421730042, |
| "rewards/margins": 0.15801168978214264, |
| "rewards/rejected": -0.9118226766586304, |
| "step": 58 |
| }, |
| { |
| "epoch": 1.807766990291262, |
| "grad_norm": 4.222906112670898, |
| "learning_rate": 1.3559322033898305e-07, |
| "logits/chosen": -2.0715479850769043, |
| "logits/rejected": -1.6846166849136353, |
| "logps/chosen": -0.3247872591018677, |
| "logps/rejected": -0.4152475893497467, |
| "loss": 1.1888, |
| "rewards/accuracies": 0.84375, |
| "rewards/chosen": -0.6495745182037354, |
| "rewards/margins": 0.18092064559459686, |
| "rewards/rejected": -0.8304951786994934, |
| "step": 59 |
| }, |
| { |
| "epoch": 1.8388349514563107, |
| "grad_norm": 3.675178289413452, |
| "learning_rate": 1.1864406779661017e-07, |
| "logits/chosen": -1.9317084550857544, |
| "logits/rejected": -1.8160873651504517, |
| "logps/chosen": -0.36124035716056824, |
| "logps/rejected": -0.44001373648643494, |
| "loss": 1.2037, |
| "rewards/accuracies": 0.859375, |
| "rewards/chosen": -0.7224806547164917, |
| "rewards/margins": 0.15754681825637817, |
| "rewards/rejected": -0.8800274729728699, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.869902912621359, |
| "grad_norm": 3.314823627471924, |
| "learning_rate": 1.0169491525423729e-07, |
| "logits/chosen": -2.0573697090148926, |
| "logits/rejected": -1.8544249534606934, |
| "logps/chosen": -0.34472307562828064, |
| "logps/rejected": -0.4054674506187439, |
| "loss": 1.2287, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.6894460916519165, |
| "rewards/margins": 0.12148873507976532, |
| "rewards/rejected": -0.8109349012374878, |
| "step": 61 |
| }, |
| { |
| "epoch": 1.9009708737864077, |
| "grad_norm": 3.7633745670318604, |
| "learning_rate": 8.47457627118644e-08, |
| "logits/chosen": -1.8335437774658203, |
| "logits/rejected": -1.7075271606445312, |
| "logps/chosen": -0.36628228425979614, |
| "logps/rejected": -0.44996723532676697, |
| "loss": 1.1967, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.7325646281242371, |
| "rewards/margins": 0.16736987233161926, |
| "rewards/rejected": -0.8999344110488892, |
| "step": 62 |
| }, |
| { |
| "epoch": 1.9320388349514563, |
| "grad_norm": 4.369020938873291, |
| "learning_rate": 6.779661016949153e-08, |
| "logits/chosen": -2.20133900642395, |
| "logits/rejected": -1.7921724319458008, |
| "logps/chosen": -0.3454199433326721, |
| "logps/rejected": -0.43609410524368286, |
| "loss": 1.1872, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.6908398866653442, |
| "rewards/margins": 0.1813482791185379, |
| "rewards/rejected": -0.8721882104873657, |
| "step": 63 |
| }, |
| { |
| "epoch": 1.963106796116505, |
| "grad_norm": 3.21553373336792, |
| "learning_rate": 5.0847457627118645e-08, |
| "logits/chosen": -1.8866002559661865, |
| "logits/rejected": -1.80937659740448, |
| "logps/chosen": -0.33643844723701477, |
| "logps/rejected": -0.3921111226081848, |
| "loss": 1.2359, |
| "rewards/accuracies": 0.765625, |
| "rewards/chosen": -0.6728769540786743, |
| "rewards/margins": 0.11134527623653412, |
| "rewards/rejected": -0.7842222452163696, |
| "step": 64 |
| }, |
| { |
| "epoch": 1.9941747572815534, |
| "grad_norm": 3.8623664379119873, |
| "learning_rate": 3.3898305084745764e-08, |
| "logits/chosen": -1.909616231918335, |
| "logits/rejected": -1.7134193181991577, |
| "logps/chosen": -0.3720588684082031, |
| "logps/rejected": -0.45723646879196167, |
| "loss": 1.1957, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7441176772117615, |
| "rewards/margins": 0.17035523056983948, |
| "rewards/rejected": -0.9144729375839233, |
| "step": 65 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.0719321966171265, |
| "learning_rate": 1.6949152542372882e-08, |
| "logits/chosen": -2.4218411445617676, |
| "logits/rejected": -1.844456672668457, |
| "logps/chosen": -0.37341463565826416, |
| "logps/rejected": -0.42600566148757935, |
| "loss": 0.2323, |
| "rewards/accuracies": 0.8333333730697632, |
| "rewards/chosen": -0.7468292713165283, |
| "rewards/margins": 0.10518200695514679, |
| "rewards/rejected": -0.8520113229751587, |
| "step": 66 |
| }, |
| { |
| "epoch": 2.0, |
| "step": 66, |
| "total_flos": 24916491141120.0, |
| "train_loss": 1.2533311103329514, |
| "train_runtime": 1478.6034, |
| "train_samples_per_second": 2.785, |
| "train_steps_per_second": 0.045 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 66, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 24916491141120.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|