| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.018685040289618125, | |
| "eval_steps": 500, | |
| "global_step": 40, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00046712600724045314, | |
| "grad_norm": 29.624664306640625, | |
| "learning_rate": 0.0, | |
| "logits/chosen": -3.153887987136841, | |
| "logits/rejected": -3.3905792236328125, | |
| "logps/chosen": -164.62596130371094, | |
| "logps/rejected": -154.77557373046875, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0009342520144809063, | |
| "grad_norm": 29.32240104675293, | |
| "learning_rate": 1.5e-06, | |
| "logits/chosen": -3.1478431224823, | |
| "logits/rejected": -3.0448203086853027, | |
| "logps/chosen": -156.60809326171875, | |
| "logps/rejected": -134.02630615234375, | |
| "loss": 0.6931, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0014013780217213593, | |
| "grad_norm": 39.618228912353516, | |
| "learning_rate": 3e-06, | |
| "logits/chosen": -3.099796772003174, | |
| "logits/rejected": -3.1112475395202637, | |
| "logps/chosen": -139.82913208007812, | |
| "logps/rejected": -142.367919921875, | |
| "loss": 0.6916, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": 0.0017557624960318208, | |
| "rewards/margins": 0.0038681034930050373, | |
| "rewards/rejected": -0.0021123411133885384, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0018685040289618125, | |
| "grad_norm": 48.11921691894531, | |
| "learning_rate": 4.5e-06, | |
| "logits/chosen": -2.7930030822753906, | |
| "logits/rejected": -3.279337167739868, | |
| "logps/chosen": -147.18673706054688, | |
| "logps/rejected": -148.54122924804688, | |
| "loss": 0.6996, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.033105019479990005, | |
| "rewards/margins": -0.008252889849245548, | |
| "rewards/rejected": -0.024852126836776733, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.0023356300362022656, | |
| "grad_norm": 33.73259353637695, | |
| "learning_rate": 6e-06, | |
| "logits/chosen": -3.058988094329834, | |
| "logits/rejected": -2.9058432579040527, | |
| "logps/chosen": -157.9241180419922, | |
| "logps/rejected": -181.765380859375, | |
| "loss": 0.728, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.08487213402986526, | |
| "rewards/margins": -0.055237509310245514, | |
| "rewards/rejected": -0.029634615406394005, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0028027560434427186, | |
| "grad_norm": 33.588905334472656, | |
| "learning_rate": 7.5e-06, | |
| "logits/chosen": -3.073216199874878, | |
| "logits/rejected": -2.8430886268615723, | |
| "logps/chosen": -158.05972290039062, | |
| "logps/rejected": -150.7171630859375, | |
| "loss": 0.759, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.1916729062795639, | |
| "rewards/margins": -0.05884008854627609, | |
| "rewards/rejected": -0.1328328400850296, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0032698820506831716, | |
| "grad_norm": 34.93351745605469, | |
| "learning_rate": 9e-06, | |
| "logits/chosen": -3.4525327682495117, | |
| "logits/rejected": -3.336601495742798, | |
| "logps/chosen": -164.04249572753906, | |
| "logps/rejected": -165.49948120117188, | |
| "loss": 0.769, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.322049081325531, | |
| "rewards/margins": -0.07949253916740417, | |
| "rewards/rejected": -0.24255654215812683, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.003737008057923625, | |
| "grad_norm": 32.109195709228516, | |
| "learning_rate": 1.05e-05, | |
| "logits/chosen": -3.057377338409424, | |
| "logits/rejected": -2.9476959705352783, | |
| "logps/chosen": -179.3075408935547, | |
| "logps/rejected": -163.44024658203125, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.4484987258911133, | |
| "rewards/margins": 0.13945631682872772, | |
| "rewards/rejected": -0.5879549980163574, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.004204134065164078, | |
| "grad_norm": 38.499698638916016, | |
| "learning_rate": 1.2e-05, | |
| "logits/chosen": -3.4018871784210205, | |
| "logits/rejected": -2.770911455154419, | |
| "logps/chosen": -138.96697998046875, | |
| "logps/rejected": -155.197509765625, | |
| "loss": 0.7849, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.5717523097991943, | |
| "rewards/margins": 0.018239814788103104, | |
| "rewards/rejected": -0.589992105960846, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.004671260072404531, | |
| "grad_norm": 29.717857360839844, | |
| "learning_rate": 1.3500000000000001e-05, | |
| "logits/chosen": -3.2118136882781982, | |
| "logits/rejected": -2.8460254669189453, | |
| "logps/chosen": -158.110107421875, | |
| "logps/rejected": -147.25413513183594, | |
| "loss": 0.8467, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.4080456793308258, | |
| "rewards/margins": -0.05621982365846634, | |
| "rewards/rejected": -0.3518258333206177, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005138386079644984, | |
| "grad_norm": 35.5257682800293, | |
| "learning_rate": 1.5e-05, | |
| "logits/chosen": -2.7199769020080566, | |
| "logits/rejected": -3.1992592811584473, | |
| "logps/chosen": -182.67335510253906, | |
| "logps/rejected": -205.45220947265625, | |
| "loss": 0.7824, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.34646672010421753, | |
| "rewards/margins": 0.4741722643375397, | |
| "rewards/rejected": -0.8206390142440796, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.005605512086885437, | |
| "grad_norm": 33.356773376464844, | |
| "learning_rate": 1.65e-05, | |
| "logits/chosen": -3.128831386566162, | |
| "logits/rejected": -3.167382001876831, | |
| "logps/chosen": -157.21823120117188, | |
| "logps/rejected": -169.51663208007812, | |
| "loss": 0.6211, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.16993041336536407, | |
| "rewards/margins": 0.4490576684474945, | |
| "rewards/rejected": -0.6189880967140198, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.00607263809412589, | |
| "grad_norm": 43.2087516784668, | |
| "learning_rate": 1.8e-05, | |
| "logits/chosen": -3.032745838165283, | |
| "logits/rejected": -3.1566920280456543, | |
| "logps/chosen": -160.5401153564453, | |
| "logps/rejected": -156.610107421875, | |
| "loss": 0.8161, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.329096257686615, | |
| "rewards/margins": 0.2510414123535156, | |
| "rewards/rejected": -0.5801376700401306, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.006539764101366343, | |
| "grad_norm": 44.841331481933594, | |
| "learning_rate": 1.95e-05, | |
| "logits/chosen": -2.9390594959259033, | |
| "logits/rejected": -2.639657974243164, | |
| "logps/chosen": -158.00621032714844, | |
| "logps/rejected": -205.90988159179688, | |
| "loss": 0.6193, | |
| "rewards/accuracies": 0.625, | |
| "rewards/chosen": -0.4224599599838257, | |
| "rewards/margins": 0.8774706721305847, | |
| "rewards/rejected": -1.2999305725097656, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.007006890108606796, | |
| "grad_norm": 60.16761779785156, | |
| "learning_rate": 2.1e-05, | |
| "logits/chosen": -2.844076156616211, | |
| "logits/rejected": -3.0058369636535645, | |
| "logps/chosen": -174.17816162109375, | |
| "logps/rejected": -162.8614959716797, | |
| "loss": 0.9068, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.4890695810317993, | |
| "rewards/margins": 0.31313809752464294, | |
| "rewards/rejected": -0.8022076487541199, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.00747401611584725, | |
| "grad_norm": 42.41041946411133, | |
| "learning_rate": 2.25e-05, | |
| "logits/chosen": -2.8459668159484863, | |
| "logits/rejected": -2.870767593383789, | |
| "logps/chosen": -159.3683624267578, | |
| "logps/rejected": -137.30758666992188, | |
| "loss": 1.0076, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -0.8330074548721313, | |
| "rewards/margins": -0.10208512842655182, | |
| "rewards/rejected": -0.7309223413467407, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.007941142123087703, | |
| "grad_norm": 46.14881134033203, | |
| "learning_rate": 2.4e-05, | |
| "logits/chosen": -3.2765953540802, | |
| "logits/rejected": -3.2238590717315674, | |
| "logps/chosen": -165.30923461914062, | |
| "logps/rejected": -130.40892028808594, | |
| "loss": 0.8999, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.6034781336784363, | |
| "rewards/margins": 0.04197956249117851, | |
| "rewards/rejected": -0.6454576849937439, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.008408268130328156, | |
| "grad_norm": 36.91789245605469, | |
| "learning_rate": 2.55e-05, | |
| "logits/chosen": -2.8234622478485107, | |
| "logits/rejected": -3.0411720275878906, | |
| "logps/chosen": -175.682373046875, | |
| "logps/rejected": -149.590576171875, | |
| "loss": 1.2818, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -1.4478638172149658, | |
| "rewards/margins": -0.04692135751247406, | |
| "rewards/rejected": -1.4009425640106201, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.00887539413756861, | |
| "grad_norm": 53.27765655517578, | |
| "learning_rate": 2.7000000000000002e-05, | |
| "logits/chosen": -3.185443639755249, | |
| "logits/rejected": -3.126272439956665, | |
| "logps/chosen": -187.12286376953125, | |
| "logps/rejected": -188.28640747070312, | |
| "loss": 0.7968, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -1.0597200393676758, | |
| "rewards/margins": 0.5409759283065796, | |
| "rewards/rejected": -1.6006958484649658, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.009342520144809062, | |
| "grad_norm": 40.505897521972656, | |
| "learning_rate": 2.8499999999999998e-05, | |
| "logits/chosen": -2.845191478729248, | |
| "logits/rejected": -2.9458093643188477, | |
| "logps/chosen": -177.2282257080078, | |
| "logps/rejected": -210.2263641357422, | |
| "loss": 1.3512, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -2.458928108215332, | |
| "rewards/margins": -0.08396562933921814, | |
| "rewards/rejected": -2.374962329864502, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.009809646152049515, | |
| "grad_norm": 68.49879455566406, | |
| "learning_rate": 3e-05, | |
| "logits/chosen": -3.1144859790802, | |
| "logits/rejected": -2.9034385681152344, | |
| "logps/chosen": -216.12496948242188, | |
| "logps/rejected": -190.55833435058594, | |
| "loss": 1.5832, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -3.5558087825775146, | |
| "rewards/margins": -0.48250633478164673, | |
| "rewards/rejected": -3.0733022689819336, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.010276772159289968, | |
| "grad_norm": 61.08313751220703, | |
| "learning_rate": 2.9999922925895862e-05, | |
| "logits/chosen": -3.1215250492095947, | |
| "logits/rejected": -2.597733974456787, | |
| "logps/chosen": -219.85137939453125, | |
| "logps/rejected": -196.4637908935547, | |
| "loss": 1.4738, | |
| "rewards/accuracies": 0.40625, | |
| "rewards/chosen": -3.155580997467041, | |
| "rewards/margins": -0.5727983117103577, | |
| "rewards/rejected": -2.582782745361328, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.010743898166530421, | |
| "grad_norm": 41.851783752441406, | |
| "learning_rate": 2.999969170437549e-05, | |
| "logits/chosen": -3.0340187549591064, | |
| "logits/rejected": -2.84521484375, | |
| "logps/chosen": -177.37728881835938, | |
| "logps/rejected": -183.36668395996094, | |
| "loss": 1.0908, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -1.8575204610824585, | |
| "rewards/margins": 0.3737794756889343, | |
| "rewards/rejected": -2.231299877166748, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.011211024173770874, | |
| "grad_norm": 39.802371978759766, | |
| "learning_rate": 2.9999306337815055e-05, | |
| "logits/chosen": -2.8932125568389893, | |
| "logits/rejected": -3.1646995544433594, | |
| "logps/chosen": -176.2078399658203, | |
| "logps/rejected": -199.5233612060547, | |
| "loss": 1.0934, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -1.9304168224334717, | |
| "rewards/margins": 0.2455454170703888, | |
| "rewards/rejected": -2.175962448120117, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.011678150181011327, | |
| "grad_norm": 29.43556785583496, | |
| "learning_rate": 2.999876683017479e-05, | |
| "logits/chosen": -2.940061092376709, | |
| "logits/rejected": -2.888075590133667, | |
| "logps/chosen": -193.4315948486328, | |
| "logps/rejected": -197.52081298828125, | |
| "loss": 1.1539, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": -1.5905036926269531, | |
| "rewards/margins": 0.9262561202049255, | |
| "rewards/rejected": -2.5167598724365234, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.01214527618825178, | |
| "grad_norm": 54.44322204589844, | |
| "learning_rate": 2.999807318699897e-05, | |
| "logits/chosen": -2.6563143730163574, | |
| "logits/rejected": -2.6990714073181152, | |
| "logps/chosen": -156.08848571777344, | |
| "logps/rejected": -182.99139404296875, | |
| "loss": 1.2892, | |
| "rewards/accuracies": 0.4375, | |
| "rewards/chosen": -1.465782642364502, | |
| "rewards/margins": 0.17339667677879333, | |
| "rewards/rejected": -1.6391793489456177, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.012612402195492234, | |
| "grad_norm": 33.6212272644043, | |
| "learning_rate": 2.999722541541585e-05, | |
| "logits/chosen": -2.61735200881958, | |
| "logits/rejected": -2.4723963737487793, | |
| "logps/chosen": -155.68017578125, | |
| "logps/rejected": -158.19589233398438, | |
| "loss": 1.0627, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -1.1587157249450684, | |
| "rewards/margins": 0.1697230488061905, | |
| "rewards/rejected": -1.3284387588500977, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.013079528202732687, | |
| "grad_norm": 19.592470169067383, | |
| "learning_rate": 2.99962235241376e-05, | |
| "logits/chosen": -2.5871520042419434, | |
| "logits/rejected": -2.7644474506378174, | |
| "logps/chosen": -184.37257385253906, | |
| "logps/rejected": -169.0127716064453, | |
| "loss": 0.8552, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.5363420248031616, | |
| "rewards/margins": 0.4760582447052002, | |
| "rewards/rejected": -1.0124002695083618, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.01354665420997314, | |
| "grad_norm": 20.270267486572266, | |
| "learning_rate": 2.9995067523460198e-05, | |
| "logits/chosen": -2.8748791217803955, | |
| "logits/rejected": -3.1334304809570312, | |
| "logps/chosen": -157.93455505371094, | |
| "logps/rejected": -176.13912963867188, | |
| "loss": 0.9041, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.36636897921562195, | |
| "rewards/margins": 0.27877098321914673, | |
| "rewards/rejected": -0.6451399922370911, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.014013780217213593, | |
| "grad_norm": 43.62104415893555, | |
| "learning_rate": 2.9993757425263343e-05, | |
| "logits/chosen": -2.7570629119873047, | |
| "logits/rejected": -2.771437644958496, | |
| "logps/chosen": -157.29891967773438, | |
| "logps/rejected": -166.71913146972656, | |
| "loss": 1.5401, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -0.970974862575531, | |
| "rewards/margins": -0.46308204531669617, | |
| "rewards/rejected": -0.5078927874565125, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.014480906224454046, | |
| "grad_norm": 21.04092025756836, | |
| "learning_rate": 2.999229324301032e-05, | |
| "logits/chosen": -2.814603328704834, | |
| "logits/rejected": -2.561202049255371, | |
| "logps/chosen": -126.19819641113281, | |
| "logps/rejected": -131.07566833496094, | |
| "loss": 0.7815, | |
| "rewards/accuracies": 0.5, | |
| "rewards/chosen": 0.11197692900896072, | |
| "rewards/margins": 0.5568960309028625, | |
| "rewards/rejected": -0.4449191689491272, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0149480322316945, | |
| "grad_norm": 23.462085723876953, | |
| "learning_rate": 2.9990674991747865e-05, | |
| "logits/chosen": -2.608139753341675, | |
| "logits/rejected": -3.061997175216675, | |
| "logps/chosen": -158.59423828125, | |
| "logps/rejected": -130.51840209960938, | |
| "loss": 1.2301, | |
| "rewards/accuracies": 0.46875, | |
| "rewards/chosen": -0.5943432450294495, | |
| "rewards/margins": -0.38749587535858154, | |
| "rewards/rejected": -0.2068473994731903, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.015415158238934953, | |
| "grad_norm": 27.918336868286133, | |
| "learning_rate": 2.9988902688106014e-05, | |
| "logits/chosen": -2.9852523803710938, | |
| "logits/rejected": -2.5511088371276855, | |
| "logps/chosen": -166.7327880859375, | |
| "logps/rejected": -155.55520629882812, | |
| "loss": 0.8347, | |
| "rewards/accuracies": 0.65625, | |
| "rewards/chosen": -0.009264327585697174, | |
| "rewards/margins": 0.6062629222869873, | |
| "rewards/rejected": -0.615527331829071, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.015882284246175406, | |
| "grad_norm": 25.436614990234375, | |
| "learning_rate": 2.9986976350297933e-05, | |
| "logits/chosen": -2.850193500518799, | |
| "logits/rejected": -2.510417938232422, | |
| "logps/chosen": -149.20751953125, | |
| "logps/rejected": -155.28610229492188, | |
| "loss": 0.9801, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.5431436896324158, | |
| "rewards/margins": 0.01562432199716568, | |
| "rewards/rejected": -0.5587680339813232, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.01634941025341586, | |
| "grad_norm": 38.4265022277832, | |
| "learning_rate": 2.9984895998119723e-05, | |
| "logits/chosen": -2.4000887870788574, | |
| "logits/rejected": -2.29819393157959, | |
| "logps/chosen": -171.50152587890625, | |
| "logps/rejected": -191.80938720703125, | |
| "loss": 1.2524, | |
| "rewards/accuracies": 0.5625, | |
| "rewards/chosen": -1.1593430042266846, | |
| "rewards/margins": 0.3696390390396118, | |
| "rewards/rejected": -1.5289819240570068, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.016816536260656312, | |
| "grad_norm": 31.203420639038086, | |
| "learning_rate": 2.998266165295021e-05, | |
| "logits/chosen": -2.8619065284729004, | |
| "logits/rejected": -2.9606268405914307, | |
| "logps/chosen": -144.86415100097656, | |
| "logps/rejected": -180.39205932617188, | |
| "loss": 0.7866, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.18791238963603973, | |
| "rewards/margins": 0.6746108531951904, | |
| "rewards/rejected": -0.8625231981277466, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.017283662267896766, | |
| "grad_norm": 24.40894889831543, | |
| "learning_rate": 2.9980273337750767e-05, | |
| "logits/chosen": -2.757246255874634, | |
| "logits/rejected": -2.466960906982422, | |
| "logps/chosen": -192.2352294921875, | |
| "logps/rejected": -181.03878784179688, | |
| "loss": 0.8876, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -0.6490573287010193, | |
| "rewards/margins": 0.9436599016189575, | |
| "rewards/rejected": -1.592717170715332, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.01775078827513722, | |
| "grad_norm": 27.00850486755371, | |
| "learning_rate": 2.9977731077065013e-05, | |
| "logits/chosen": -2.9453818798065186, | |
| "logits/rejected": -2.8008580207824707, | |
| "logps/chosen": -152.46038818359375, | |
| "logps/rejected": -173.5645751953125, | |
| "loss": 0.6833, | |
| "rewards/accuracies": 0.6875, | |
| "rewards/chosen": -0.2769155204296112, | |
| "rewards/margins": 1.4065779447555542, | |
| "rewards/rejected": -1.6834933757781982, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.01821791428237767, | |
| "grad_norm": 30.273073196411133, | |
| "learning_rate": 2.9975034897018614e-05, | |
| "logits/chosen": -2.7667531967163086, | |
| "logits/rejected": -2.948810338973999, | |
| "logps/chosen": -123.97679901123047, | |
| "logps/rejected": -174.9097900390625, | |
| "loss": 1.0514, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.6435041427612305, | |
| "rewards/margins": 0.23302727937698364, | |
| "rewards/rejected": -0.8765315413475037, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.018685040289618125, | |
| "grad_norm": 33.39718246459961, | |
| "learning_rate": 2.9972184825318994e-05, | |
| "logits/chosen": -2.9786558151245117, | |
| "logits/rejected": -2.691629409790039, | |
| "logps/chosen": -186.82821655273438, | |
| "logps/rejected": -167.65603637695312, | |
| "loss": 1.2466, | |
| "rewards/accuracies": 0.59375, | |
| "rewards/chosen": -1.0377531051635742, | |
| "rewards/margins": 0.2545713186264038, | |
| "rewards/rejected": -1.292324423789978, | |
| "step": 40 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 1000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |