| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.6413136438793545, |
| "eval_steps": 500, |
| "global_step": 1800, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.045612634699811846, |
| "grad_norm": 59.46120071411133, |
| "learning_rate": 6.409090909090908e-07, |
| "logits/chosen": -1.3211605548858643, |
| "logits/rejected": -1.3492553234100342, |
| "logps/chosen": -169.80906677246094, |
| "logps/rejected": -200.97677612304688, |
| "loss": 0.6812, |
| "rewards/accuracies": 0.5562499761581421, |
| "rewards/chosen": -0.0059404876083135605, |
| "rewards/margins": 0.025952553376555443, |
| "rewards/rejected": -0.031893040984869, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09122526939962369, |
| "grad_norm": 86.95048522949219, |
| "learning_rate": 1.3227272727272727e-06, |
| "logits/chosen": -1.411544919013977, |
| "logits/rejected": -1.4442178010940552, |
| "logps/chosen": -178.4121551513672, |
| "logps/rejected": -219.94424438476562, |
| "loss": 0.5771, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.29405269026756287, |
| "rewards/margins": 0.4449906647205353, |
| "rewards/rejected": -0.7390434145927429, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13683790409943555, |
| "grad_norm": 64.74836730957031, |
| "learning_rate": 1.9909090909090913e-06, |
| "logits/chosen": -1.485386610031128, |
| "logits/rejected": -1.5248019695281982, |
| "logps/chosen": -175.85545349121094, |
| "logps/rejected": -215.98086547851562, |
| "loss": 0.4865, |
| "rewards/accuracies": 0.7524999976158142, |
| "rewards/chosen": -0.47845691442489624, |
| "rewards/margins": 1.005491852760315, |
| "rewards/rejected": -1.483948826789856, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.18245053879924739, |
| "grad_norm": 48.148223876953125, |
| "learning_rate": 2.6727272727272727e-06, |
| "logits/chosen": -1.6364929676055908, |
| "logits/rejected": -1.6319957971572876, |
| "logps/chosen": -179.0496063232422, |
| "logps/rejected": -229.71234130859375, |
| "loss": 0.4315, |
| "rewards/accuracies": 0.7674999833106995, |
| "rewards/chosen": -0.8819997906684875, |
| "rewards/margins": 1.617713212966919, |
| "rewards/rejected": -2.499713182449341, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22806317349905925, |
| "grad_norm": 90.83224487304688, |
| "learning_rate": 2.9987134348280704e-06, |
| "logits/chosen": -1.5377447605133057, |
| "logits/rejected": -1.5789711475372314, |
| "logps/chosen": -189.28240966796875, |
| "logps/rejected": -241.5106964111328, |
| "loss": 0.4431, |
| "rewards/accuracies": 0.7674999833106995, |
| "rewards/chosen": -1.1264184713363647, |
| "rewards/margins": 1.8479437828063965, |
| "rewards/rejected": -2.9743621349334717, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.2736758081988711, |
| "grad_norm": 54.77507781982422, |
| "learning_rate": 2.9890189525113655e-06, |
| "logits/chosen": -1.6410760879516602, |
| "logits/rejected": -1.5978457927703857, |
| "logps/chosen": -178.2059326171875, |
| "logps/rejected": -239.80654907226562, |
| "loss": 0.4138, |
| "rewards/accuracies": 0.7987499833106995, |
| "rewards/chosen": -1.0319082736968994, |
| "rewards/margins": 2.2714104652404785, |
| "rewards/rejected": -3.303318500518799, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.3192884428986829, |
| "grad_norm": 40.69072723388672, |
| "learning_rate": 2.9698817631509474e-06, |
| "logits/chosen": -1.7185229063034058, |
| "logits/rejected": -1.751165509223938, |
| "logps/chosen": -190.39962768554688, |
| "logps/rejected": -247.27247619628906, |
| "loss": 0.385, |
| "rewards/accuracies": 0.8112499713897705, |
| "rewards/chosen": -1.313816785812378, |
| "rewards/margins": 2.4288673400878906, |
| "rewards/rejected": -3.7426836490631104, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.36490107759849477, |
| "grad_norm": 51.27244567871094, |
| "learning_rate": 2.9414232264329826e-06, |
| "logits/chosen": -1.6467254161834717, |
| "logits/rejected": -1.6478239297866821, |
| "logps/chosen": -179.54312133789062, |
| "logps/rejected": -231.5401153564453, |
| "loss": 0.431, |
| "rewards/accuracies": 0.7837499976158142, |
| "rewards/chosen": -1.276109218597412, |
| "rewards/margins": 2.3179662227630615, |
| "rewards/rejected": -3.5940752029418945, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.41051371229830663, |
| "grad_norm": 70.65045928955078, |
| "learning_rate": 2.9038238139523195e-06, |
| "logits/chosen": -1.649796724319458, |
| "logits/rejected": -1.6734592914581299, |
| "logps/chosen": -178.8260955810547, |
| "logps/rejected": -234.79722595214844, |
| "loss": 0.3768, |
| "rewards/accuracies": 0.8149999976158142, |
| "rewards/chosen": -1.090864896774292, |
| "rewards/margins": 2.503946304321289, |
| "rewards/rejected": -3.59481143951416, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.4561263469981185, |
| "grad_norm": 53.61136245727539, |
| "learning_rate": 2.857321964740558e-06, |
| "logits/chosen": -1.5642222166061401, |
| "logits/rejected": -1.5648502111434937, |
| "logps/chosen": -191.90611267089844, |
| "logps/rejected": -252.19386291503906, |
| "loss": 0.3945, |
| "rewards/accuracies": 0.7962499856948853, |
| "rewards/chosen": -1.3972392082214355, |
| "rewards/margins": 2.93603515625, |
| "rewards/rejected": -4.3332743644714355, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.5017389816979303, |
| "grad_norm": 51.6324577331543, |
| "learning_rate": 2.802212573189981e-06, |
| "logits/chosen": -1.6049363613128662, |
| "logits/rejected": -1.6026443243026733, |
| "logps/chosen": -187.35401916503906, |
| "logps/rejected": -257.5898742675781, |
| "loss": 0.331, |
| "rewards/accuracies": 0.8287500143051147, |
| "rewards/chosen": -1.2774113416671753, |
| "rewards/margins": 3.223512649536133, |
| "rewards/rejected": -4.500924110412598, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.5473516163977422, |
| "grad_norm": 40.728816986083984, |
| "learning_rate": 2.738845118962275e-06, |
| "logits/chosen": -1.6046830415725708, |
| "logits/rejected": -1.5583266019821167, |
| "logps/chosen": -197.91168212890625, |
| "logps/rejected": -263.0049743652344, |
| "loss": 0.3482, |
| "rewards/accuracies": 0.8224999904632568, |
| "rewards/chosen": -1.6200157403945923, |
| "rewards/margins": 3.3425958156585693, |
| "rewards/rejected": -4.962611198425293, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.592964251097554, |
| "grad_norm": 45.273345947265625, |
| "learning_rate": 2.6676214507413463e-06, |
| "logits/chosen": -1.6240191459655762, |
| "logits/rejected": -1.611716866493225, |
| "logps/chosen": -194.59600830078125, |
| "logps/rejected": -265.8725891113281, |
| "loss": 0.3288, |
| "rewards/accuracies": 0.8412500023841858, |
| "rewards/chosen": -1.3388230800628662, |
| "rewards/margins": 3.5074219703674316, |
| "rewards/rejected": -4.846245288848877, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.6385768857973658, |
| "grad_norm": 34.892608642578125, |
| "learning_rate": 2.5889932378846963e-06, |
| "logits/chosen": -1.7866308689117432, |
| "logits/rejected": -1.7742173671722412, |
| "logps/chosen": -184.9108428955078, |
| "logps/rejected": -247.7977294921875, |
| "loss": 0.3715, |
| "rewards/accuracies": 0.8149999976158142, |
| "rewards/chosen": -1.4488279819488525, |
| "rewards/margins": 3.0412204265594482, |
| "rewards/rejected": -4.490048408508301, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.6841895204971777, |
| "grad_norm": 55.344505310058594, |
| "learning_rate": 2.5052340273394205e-06, |
| "logits/chosen": -1.826340675354004, |
| "logits/rejected": -1.8333815336227417, |
| "logps/chosen": -184.3016357421875, |
| "logps/rejected": -246.4990692138672, |
| "loss": 0.3508, |
| "rewards/accuracies": 0.8274999856948853, |
| "rewards/chosen": -1.5381311178207397, |
| "rewards/margins": 3.2081944942474365, |
| "rewards/rejected": -4.746325492858887, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.7298021551969895, |
| "grad_norm": 16.396432876586914, |
| "learning_rate": 2.4134580840681784e-06, |
| "logits/chosen": -1.8897172212600708, |
| "logits/rejected": -1.9127064943313599, |
| "logps/chosen": -206.5291748046875, |
| "logps/rejected": -274.1864318847656, |
| "loss": 0.3101, |
| "rewards/accuracies": 0.8550000190734863, |
| "rewards/chosen": -2.1525042057037354, |
| "rewards/margins": 3.880983591079712, |
| "rewards/rejected": -6.033487319946289, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.7754147898968015, |
| "grad_norm": 37.60529327392578, |
| "learning_rate": 2.3158893890861404e-06, |
| "logits/chosen": -1.7926125526428223, |
| "logits/rejected": -1.7985824346542358, |
| "logps/chosen": -196.04129028320312, |
| "logps/rejected": -259.4538269042969, |
| "loss": 0.3114, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.2751173973083496, |
| "rewards/margins": 3.9349923133850098, |
| "rewards/rejected": -6.210109710693359, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.8210274245966133, |
| "grad_norm": 38.6811408996582, |
| "learning_rate": 2.213146680389758e-06, |
| "logits/chosen": -1.8608883619308472, |
| "logits/rejected": -1.8560402393341064, |
| "logps/chosen": -196.21221923828125, |
| "logps/rejected": -274.54345703125, |
| "loss": 0.3121, |
| "rewards/accuracies": 0.8450000286102295, |
| "rewards/chosen": -2.3408617973327637, |
| "rewards/margins": 4.028858184814453, |
| "rewards/rejected": -6.369719982147217, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.8666400592964251, |
| "grad_norm": 67.59371948242188, |
| "learning_rate": 2.1058815073078422e-06, |
| "logits/chosen": -1.945671558380127, |
| "logits/rejected": -1.9288123846054077, |
| "logps/chosen": -197.5050811767578, |
| "logps/rejected": -271.6333312988281, |
| "loss": 0.2909, |
| "rewards/accuracies": 0.8537499904632568, |
| "rewards/chosen": -1.4789001941680908, |
| "rewards/margins": 4.0447797775268555, |
| "rewards/rejected": -5.523679256439209, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.912252693996237, |
| "grad_norm": 28.43963050842285, |
| "learning_rate": 1.99477409866065e-06, |
| "logits/chosen": -1.9384291172027588, |
| "logits/rejected": -1.9162278175354004, |
| "logps/chosen": -200.1200408935547, |
| "logps/rejected": -264.21453857421875, |
| "loss": 0.3136, |
| "rewards/accuracies": 0.8462499976158142, |
| "rewards/chosen": -1.7790579795837402, |
| "rewards/margins": 3.894080400466919, |
| "rewards/rejected": -5.673138618469238, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.9578653286960488, |
| "grad_norm": 49.5960807800293, |
| "learning_rate": 1.8805290490461682e-06, |
| "logits/chosen": -1.9607409238815308, |
| "logits/rejected": -1.9329227209091187, |
| "logps/chosen": -190.69606018066406, |
| "logps/rejected": -262.543212890625, |
| "loss": 0.2701, |
| "rewards/accuracies": 0.8799999952316284, |
| "rewards/chosen": -1.4124534130096436, |
| "rewards/margins": 3.9889495372772217, |
| "rewards/rejected": -5.401403427124023, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.0027367580819888, |
| "grad_norm": 8.592507362365723, |
| "learning_rate": 1.763870850609299e-06, |
| "logits/chosen": -2.1728835105895996, |
| "logits/rejected": -2.0995230674743652, |
| "logps/chosen": -191.92657470703125, |
| "logps/rejected": -273.9614562988281, |
| "loss": 0.2427, |
| "rewards/accuracies": 0.8856416940689087, |
| "rewards/chosen": -1.5243728160858154, |
| "rewards/margins": 4.81222677230835, |
| "rewards/rejected": -6.336598873138428, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.0483493927818006, |
| "grad_norm": 15.577631950378418, |
| "learning_rate": 1.6455392986294975e-06, |
| "logits/chosen": -2.4582772254943848, |
| "logits/rejected": -2.3846638202667236, |
| "logps/chosen": -189.32957458496094, |
| "logps/rejected": -283.9933166503906, |
| "loss": 0.063, |
| "rewards/accuracies": 0.9825000166893005, |
| "rewards/chosen": -1.279435634613037, |
| "rewards/margins": 6.347279071807861, |
| "rewards/rejected": -7.626713752746582, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.0939620274816124, |
| "grad_norm": 43.521793365478516, |
| "learning_rate": 1.5262848000626241e-06, |
| "logits/chosen": -3.1720001697540283, |
| "logits/rejected": -3.0936548709869385, |
| "logps/chosen": -191.5170440673828, |
| "logps/rejected": -302.63201904296875, |
| "loss": 0.0713, |
| "rewards/accuracies": 0.9800000190734863, |
| "rewards/chosen": -1.885518193244934, |
| "rewards/margins": 7.22011661529541, |
| "rewards/rejected": -9.105633735656738, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.1395746621814242, |
| "grad_norm": 24.051679611206055, |
| "learning_rate": 1.4068636147881868e-06, |
| "logits/chosen": -3.590402126312256, |
| "logits/rejected": -3.4608168601989746, |
| "logps/chosen": -199.92221069335938, |
| "logps/rejected": -318.69219970703125, |
| "loss": 0.05, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -2.2472312450408936, |
| "rewards/margins": 8.160175323486328, |
| "rewards/rejected": -10.407405853271484, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.185187296881236, |
| "grad_norm": 9.708622932434082, |
| "learning_rate": 1.288033059739901e-06, |
| "logits/chosen": -3.806685209274292, |
| "logits/rejected": -3.6277265548706055, |
| "logps/chosen": -205.2801971435547, |
| "logps/rejected": -317.83807373046875, |
| "loss": 0.063, |
| "rewards/accuracies": 0.9825000166893005, |
| "rewards/chosen": -2.7129626274108887, |
| "rewards/margins": 8.290043830871582, |
| "rewards/rejected": -11.003006935119629, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.230799931581048, |
| "grad_norm": 2.236952781677246, |
| "learning_rate": 1.170546706332872e-06, |
| "logits/chosen": -3.8718817234039307, |
| "logits/rejected": -3.7462046146392822, |
| "logps/chosen": -209.9652862548828, |
| "logps/rejected": -328.6727600097656, |
| "loss": 0.0501, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -3.4696946144104004, |
| "rewards/margins": 8.770748138427734, |
| "rewards/rejected": -12.240442276000977, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.2764125662808599, |
| "grad_norm": 1.4985854625701904, |
| "learning_rate": 1.0551496016432202e-06, |
| "logits/chosen": -4.127021312713623, |
| "logits/rejected": -3.9822707176208496, |
| "logps/chosen": -216.8661651611328, |
| "logps/rejected": -341.8252258300781, |
| "loss": 0.0577, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -3.4729561805725098, |
| "rewards/margins": 8.868853569030762, |
| "rewards/rejected": -12.34181022644043, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.3220252009806717, |
| "grad_norm": 37.12932205200195, |
| "learning_rate": 9.425735436453361e-07, |
| "logits/chosen": -4.2538580894470215, |
| "logits/rejected": -4.086177825927734, |
| "logps/chosen": -206.88836669921875, |
| "logps/rejected": -324.8296813964844, |
| "loss": 0.0603, |
| "rewards/accuracies": 0.9775000214576721, |
| "rewards/chosen": -3.3129966259002686, |
| "rewards/margins": 8.81570053100586, |
| "rewards/rejected": -12.128695487976074, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.3676378356804835, |
| "grad_norm": 19.34769630432129, |
| "learning_rate": 8.356741060752298e-07, |
| "logits/chosen": -4.326617240905762, |
| "logits/rejected": -4.108016490936279, |
| "logps/chosen": -211.08132934570312, |
| "logps/rejected": -342.179443359375, |
| "loss": 0.0639, |
| "rewards/accuracies": 0.9787499904632568, |
| "rewards/chosen": -3.5119857788085938, |
| "rewards/margins": 9.29565715789795, |
| "rewards/rejected": -12.807641983032227, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.4132504703802953, |
| "grad_norm": 3.8228871822357178, |
| "learning_rate": 7.307683094676016e-07, |
| "logits/chosen": -4.324881076812744, |
| "logits/rejected": -4.193660259246826, |
| "logps/chosen": -209.66769409179688, |
| "logps/rejected": -341.2529296875, |
| "loss": 0.0562, |
| "rewards/accuracies": 0.981249988079071, |
| "rewards/chosen": -3.7620954513549805, |
| "rewards/margins": 9.563469886779785, |
| "rewards/rejected": -13.32556438446045, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.4588631050801073, |
| "grad_norm": 10.610180854797363, |
| "learning_rate": 6.30740643853663e-07, |
| "logits/chosen": -4.231253623962402, |
| "logits/rejected": -4.145637512207031, |
| "logps/chosen": -206.0325927734375, |
| "logps/rejected": -329.6341857910156, |
| "loss": 0.0599, |
| "rewards/accuracies": 0.9775000214576721, |
| "rewards/chosen": -3.3670105934143066, |
| "rewards/margins": 9.107893943786621, |
| "rewards/rejected": -12.474905014038086, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.5044757397799189, |
| "grad_norm": 3.419102191925049, |
| "learning_rate": 5.362254409462939e-07, |
| "logits/chosen": -4.283145904541016, |
| "logits/rejected": -4.177493095397949, |
| "logps/chosen": -207.3034210205078, |
| "logps/rejected": -323.41595458984375, |
| "loss": 0.0611, |
| "rewards/accuracies": 0.9737499952316284, |
| "rewards/chosen": -3.4872689247131348, |
| "rewards/margins": 8.951678276062012, |
| "rewards/rejected": -12.438947677612305, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.550088374479731, |
| "grad_norm": 9.434532165527344, |
| "learning_rate": 4.478220748305115e-07, |
| "logits/chosen": -4.194823265075684, |
| "logits/rejected": -4.0666184425354, |
| "logps/chosen": -206.98306274414062, |
| "logps/rejected": -332.0705871582031, |
| "loss": 0.0626, |
| "rewards/accuracies": 0.9800000190734863, |
| "rewards/chosen": -3.25546932220459, |
| "rewards/margins": 9.118634223937988, |
| "rewards/rejected": -12.374103546142578, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.5957010091795427, |
| "grad_norm": 25.252395629882812, |
| "learning_rate": 3.6609116099512447e-07, |
| "logits/chosen": -4.359646320343018, |
| "logits/rejected": -4.207290172576904, |
| "logps/chosen": -213.02706909179688, |
| "logps/rejected": -331.1492919921875, |
| "loss": 0.0584, |
| "rewards/accuracies": 0.9787499904632568, |
| "rewards/chosen": -3.5253746509552, |
| "rewards/margins": 9.35997200012207, |
| "rewards/rejected": -12.885346412658691, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.6413136438793545, |
| "grad_norm": 3.1381542682647705, |
| "learning_rate": 2.915510011544664e-07, |
| "logits/chosen": -4.435996055603027, |
| "logits/rejected": -4.291849136352539, |
| "logps/chosen": -211.94927978515625, |
| "logps/rejected": -346.5935974121094, |
| "loss": 0.0417, |
| "rewards/accuracies": 0.9850000143051147, |
| "rewards/chosen": -3.7163028717041016, |
| "rewards/margins": 9.784122467041016, |
| "rewards/rejected": -13.5004243850708, |
| "step": 1800 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 2192, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|