| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 5972, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033489618218352314, | |
| "grad_norm": 39.56058883666992, | |
| "learning_rate": 4.375e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.2135009765625, | |
| "logps/rejected": -218.9482421875, | |
| "loss": 0.6927, | |
| "rewards/accuracies": 0.41499999165534973, | |
| "rewards/chosen": 0.0009201900684274733, | |
| "rewards/margins": 0.0012563117779791355, | |
| "rewards/rejected": -0.00033612194238230586, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.06697923643670463, | |
| "grad_norm": 55.64208984375, | |
| "learning_rate": 8.839285714285714e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.50753784179688, | |
| "logps/rejected": -224.6807403564453, | |
| "loss": 0.6932, | |
| "rewards/accuracies": 0.4087499976158142, | |
| "rewards/chosen": -0.0016289422055706382, | |
| "rewards/margins": 0.0002692897687666118, | |
| "rewards/rejected": -0.001898231916129589, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.10046885465505694, | |
| "grad_norm": 48.16231918334961, | |
| "learning_rate": 1.3303571428571427e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -164.7879638671875, | |
| "logps/rejected": -219.03224182128906, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.4325000047683716, | |
| "rewards/chosen": -0.0006416282267309725, | |
| "rewards/margins": 0.0016548261046409607, | |
| "rewards/rejected": -0.0022964540403336287, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.13395847287340926, | |
| "grad_norm": 45.87161636352539, | |
| "learning_rate": 1.776785714285714e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.67550659179688, | |
| "logps/rejected": -221.42312622070312, | |
| "loss": 0.6933, | |
| "rewards/accuracies": 0.4050000011920929, | |
| "rewards/chosen": -0.003886653808876872, | |
| "rewards/margins": 8.521832205587998e-05, | |
| "rewards/rejected": -0.003971872851252556, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16744809109176156, | |
| "grad_norm": 34.68981170654297, | |
| "learning_rate": 2.2232142857142856e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.92369079589844, | |
| "logps/rejected": -222.72586059570312, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.4625000059604645, | |
| "rewards/chosen": -0.004237725865095854, | |
| "rewards/margins": 0.005978057160973549, | |
| "rewards/rejected": -0.010215784423053265, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20093770931011387, | |
| "grad_norm": 45.68313217163086, | |
| "learning_rate": 2.669642857142857e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -176.19508361816406, | |
| "logps/rejected": -220.88339233398438, | |
| "loss": 0.6866, | |
| "rewards/accuracies": 0.4937500059604645, | |
| "rewards/chosen": -0.010833066888153553, | |
| "rewards/margins": 0.014277225360274315, | |
| "rewards/rejected": -0.025110295042395592, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23442732752846618, | |
| "grad_norm": 45.355247497558594, | |
| "learning_rate": 3.1160714285714285e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.2874314785003662, | |
| "logps/chosen": -172.1802520751953, | |
| "logps/rejected": -222.34373474121094, | |
| "loss": 0.6817, | |
| "rewards/accuracies": 0.518750011920929, | |
| "rewards/chosen": -0.022201180458068848, | |
| "rewards/margins": 0.024774856865406036, | |
| "rewards/rejected": -0.04697604104876518, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2679169457468185, | |
| "grad_norm": 44.401466369628906, | |
| "learning_rate": 3.5625e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -171.78598022460938, | |
| "logps/rejected": -225.00076293945312, | |
| "loss": 0.6694, | |
| "rewards/accuracies": 0.5525000095367432, | |
| "rewards/chosen": -0.04056182876229286, | |
| "rewards/margins": 0.05375281721353531, | |
| "rewards/rejected": -0.09431464225053787, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3014065639651708, | |
| "grad_norm": 42.0828742980957, | |
| "learning_rate": 4.008928571428571e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.93777465820312, | |
| "logps/rejected": -225.8251953125, | |
| "loss": 0.6615, | |
| "rewards/accuracies": 0.5262500047683716, | |
| "rewards/chosen": -0.08230926841497421, | |
| "rewards/margins": 0.07786127924919128, | |
| "rewards/rejected": -0.1601705402135849, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.33489618218352313, | |
| "grad_norm": 37.493553161621094, | |
| "learning_rate": 4.455357142857143e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.60545349121094, | |
| "logps/rejected": -223.0897674560547, | |
| "loss": 0.6547, | |
| "rewards/accuracies": 0.5099999904632568, | |
| "rewards/chosen": -0.12488727271556854, | |
| "rewards/margins": 0.10414745658636093, | |
| "rewards/rejected": -0.22903470695018768, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3683858004018754, | |
| "grad_norm": 42.67152404785156, | |
| "learning_rate": 4.901785714285714e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -173.47787475585938, | |
| "logps/rejected": -232.50518798828125, | |
| "loss": 0.631, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.15443742275238037, | |
| "rewards/margins": 0.18413911759853363, | |
| "rewards/rejected": -0.3385765552520752, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.40187541862022774, | |
| "grad_norm": 32.57563018798828, | |
| "learning_rate": 5.348214285714285e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -181.27125549316406, | |
| "logps/rejected": -241.04226684570312, | |
| "loss": 0.6242, | |
| "rewards/accuracies": 0.53125, | |
| "rewards/chosen": -0.2177175134420395, | |
| "rewards/margins": 0.22946205735206604, | |
| "rewards/rejected": -0.4471796154975891, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.43536503683858, | |
| "grad_norm": 47.70533752441406, | |
| "learning_rate": 5.794642857142857e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -195.95242309570312, | |
| "logps/rejected": -240.94540405273438, | |
| "loss": 0.6072, | |
| "rewards/accuracies": 0.5475000143051147, | |
| "rewards/chosen": -0.31218427419662476, | |
| "rewards/margins": 0.3060773015022278, | |
| "rewards/rejected": -0.6182616353034973, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.46885465505693236, | |
| "grad_norm": 55.132423400878906, | |
| "learning_rate": 6.241071428571429e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.1224365234375, | |
| "logps/rejected": -239.97381591796875, | |
| "loss": 0.598, | |
| "rewards/accuracies": 0.5475000143051147, | |
| "rewards/chosen": -0.38468390703201294, | |
| "rewards/margins": 0.3886369466781616, | |
| "rewards/rejected": -0.7733209133148193, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5023442732752846, | |
| "grad_norm": 35.69628143310547, | |
| "learning_rate": 6.6875e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -181.9203643798828, | |
| "logps/rejected": -238.3597412109375, | |
| "loss": 0.6054, | |
| "rewards/accuracies": 0.5350000262260437, | |
| "rewards/chosen": -0.4038671851158142, | |
| "rewards/margins": 0.38458195328712463, | |
| "rewards/rejected": -0.7884491086006165, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.535833891493637, | |
| "grad_norm": 52.34265899658203, | |
| "learning_rate": 7.133928571428571e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -173.30198669433594, | |
| "logps/rejected": -238.93728637695312, | |
| "loss": 0.5574, | |
| "rewards/accuracies": 0.5899999737739563, | |
| "rewards/chosen": -0.4324275553226471, | |
| "rewards/margins": 0.5758498907089233, | |
| "rewards/rejected": -1.008277416229248, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5693235097119893, | |
| "grad_norm": 50.31780242919922, | |
| "learning_rate": 7.580357142857143e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -177.4162139892578, | |
| "logps/rejected": -239.99441528320312, | |
| "loss": 0.5693, | |
| "rewards/accuracies": 0.5762500166893005, | |
| "rewards/chosen": -0.4396001696586609, | |
| "rewards/margins": 0.5518670678138733, | |
| "rewards/rejected": -0.991467297077179, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6028131279303416, | |
| "grad_norm": 35.81449508666992, | |
| "learning_rate": 7.995271867612292e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.32102966308594, | |
| "logps/rejected": -241.9414520263672, | |
| "loss": 0.5773, | |
| "rewards/accuracies": 0.5774999856948853, | |
| "rewards/chosen": -0.5533062219619751, | |
| "rewards/margins": 0.5976377129554749, | |
| "rewards/rejected": -1.1509439945220947, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6363027461486939, | |
| "grad_norm": 37.500022888183594, | |
| "learning_rate": 7.916469661150512e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.38853454589844, | |
| "logps/rejected": -238.58958435058594, | |
| "loss": 0.5608, | |
| "rewards/accuracies": 0.574999988079071, | |
| "rewards/chosen": -0.5429355502128601, | |
| "rewards/margins": 0.6483522057533264, | |
| "rewards/rejected": -1.1912877559661865, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6697923643670463, | |
| "grad_norm": 45.89781188964844, | |
| "learning_rate": 7.837667454688732e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.39967346191406, | |
| "logps/rejected": -242.6263427734375, | |
| "loss": 0.5456, | |
| "rewards/accuracies": 0.5824999809265137, | |
| "rewards/chosen": -0.546024739742279, | |
| "rewards/margins": 0.7407156229019165, | |
| "rewards/rejected": -1.2867404222488403, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7032819825853985, | |
| "grad_norm": 30.887229919433594, | |
| "learning_rate": 7.75886524822695e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -180.49740600585938, | |
| "logps/rejected": -256.4322509765625, | |
| "loss": 0.5166, | |
| "rewards/accuracies": 0.6274999976158142, | |
| "rewards/chosen": -0.6010158658027649, | |
| "rewards/margins": 0.8881167769432068, | |
| "rewards/rejected": -1.4891326427459717, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7367716008037508, | |
| "grad_norm": 54.525856018066406, | |
| "learning_rate": 7.680063041765169e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -169.29653930664062, | |
| "logps/rejected": -241.8204345703125, | |
| "loss": 0.5213, | |
| "rewards/accuracies": 0.5849999785423279, | |
| "rewards/chosen": -0.6621356010437012, | |
| "rewards/margins": 0.8917463421821594, | |
| "rewards/rejected": -1.553882122039795, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7702612190221031, | |
| "grad_norm": 37.80088424682617, | |
| "learning_rate": 7.601260835303388e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.52398681640625, | |
| "logps/rejected": -246.53189086914062, | |
| "loss": 0.5385, | |
| "rewards/accuracies": 0.5950000286102295, | |
| "rewards/chosen": -0.6485376954078674, | |
| "rewards/margins": 0.8500573039054871, | |
| "rewards/rejected": -1.4985949993133545, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8037508372404555, | |
| "grad_norm": 38.089324951171875, | |
| "learning_rate": 7.522458628841607e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.8751220703125, | |
| "logps/rejected": -272.0120849609375, | |
| "loss": 0.5175, | |
| "rewards/accuracies": 0.6162499785423279, | |
| "rewards/chosen": -0.6311337351799011, | |
| "rewards/margins": 0.9966024160385132, | |
| "rewards/rejected": -1.627736210823059, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8372404554588078, | |
| "grad_norm": 39.7374153137207, | |
| "learning_rate": 7.443656422379827e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.6376190185547, | |
| "logps/rejected": -257.7156982421875, | |
| "loss": 0.5304, | |
| "rewards/accuracies": 0.5975000262260437, | |
| "rewards/chosen": -0.7539389133453369, | |
| "rewards/margins": 1.0195887088775635, | |
| "rewards/rejected": -1.7735275030136108, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.87073007367716, | |
| "grad_norm": 30.075101852416992, | |
| "learning_rate": 7.364854215918045e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.0233154296875, | |
| "logps/rejected": -246.95262145996094, | |
| "loss": 0.5185, | |
| "rewards/accuracies": 0.6162499785423279, | |
| "rewards/chosen": -0.6855795383453369, | |
| "rewards/margins": 0.9803519248962402, | |
| "rewards/rejected": -1.6659313440322876, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9042196918955124, | |
| "grad_norm": 39.85395431518555, | |
| "learning_rate": 7.286052009456264e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.7262725830078, | |
| "logps/rejected": -253.09869384765625, | |
| "loss": 0.5237, | |
| "rewards/accuracies": 0.6012499928474426, | |
| "rewards/chosen": -0.606716513633728, | |
| "rewards/margins": 0.8856968879699707, | |
| "rewards/rejected": -1.4924132823944092, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9377093101138647, | |
| "grad_norm": 66.29072570800781, | |
| "learning_rate": 7.207249802994484e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.25836181640625, | |
| "logps/rejected": -241.17901611328125, | |
| "loss": 0.5323, | |
| "rewards/accuracies": 0.6012499928474426, | |
| "rewards/chosen": -0.6343129873275757, | |
| "rewards/margins": 0.9016135931015015, | |
| "rewards/rejected": -1.5359266996383667, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.971198928332217, | |
| "grad_norm": 36.76164245605469, | |
| "learning_rate": 7.128447596532703e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.4203643798828, | |
| "logps/rejected": -249.50076293945312, | |
| "loss": 0.5378, | |
| "rewards/accuracies": 0.5849999785423279, | |
| "rewards/chosen": -0.5479399561882019, | |
| "rewards/margins": 0.8743146657943726, | |
| "rewards/rejected": -1.4222546815872192, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 1.0046885465505693, | |
| "grad_norm": 53.488407135009766, | |
| "learning_rate": 7.049645390070921e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.12411499023438, | |
| "logps/rejected": -253.10464477539062, | |
| "loss": 0.4944, | |
| "rewards/accuracies": 0.6324999928474426, | |
| "rewards/chosen": -0.5486608147621155, | |
| "rewards/margins": 1.0833656787872314, | |
| "rewards/rejected": -1.6320266723632812, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.0381781647689217, | |
| "grad_norm": 49.80898666381836, | |
| "learning_rate": 6.97084318360914e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.8605499267578, | |
| "logps/rejected": -265.5860595703125, | |
| "loss": 0.4357, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6526676416397095, | |
| "rewards/margins": 1.3229660987854004, | |
| "rewards/rejected": -1.9756335020065308, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 1.0716677829872738, | |
| "grad_norm": 39.074790954589844, | |
| "learning_rate": 6.89204097714736e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -185.23265075683594, | |
| "logps/rejected": -257.21270751953125, | |
| "loss": 0.455, | |
| "rewards/accuracies": 0.6762499809265137, | |
| "rewards/chosen": -0.6439327001571655, | |
| "rewards/margins": 1.2369264364242554, | |
| "rewards/rejected": -1.8808592557907104, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.1051574012056262, | |
| "grad_norm": 24.9747257232666, | |
| "learning_rate": 6.813238770685579e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -193.9910125732422, | |
| "logps/rejected": -257.0543518066406, | |
| "loss": 0.4256, | |
| "rewards/accuracies": 0.6912500262260437, | |
| "rewards/chosen": -0.5875076055526733, | |
| "rewards/margins": 1.313684344291687, | |
| "rewards/rejected": -1.9011921882629395, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.1386470194239786, | |
| "grad_norm": 27.623506546020508, | |
| "learning_rate": 6.734436564223798e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -192.95932006835938, | |
| "logps/rejected": -246.83566284179688, | |
| "loss": 0.451, | |
| "rewards/accuracies": 0.6650000214576721, | |
| "rewards/chosen": -0.7364577651023865, | |
| "rewards/margins": 1.2061336040496826, | |
| "rewards/rejected": -1.9425911903381348, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 1.1721366376423308, | |
| "grad_norm": 39.15848922729492, | |
| "learning_rate": 6.655634357762017e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.35861206054688, | |
| "logps/rejected": -249.7042236328125, | |
| "loss": 0.4414, | |
| "rewards/accuracies": 0.6700000166893005, | |
| "rewards/chosen": -0.6322548985481262, | |
| "rewards/margins": 1.4189176559448242, | |
| "rewards/rejected": -2.0511724948883057, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 1.2056262558606832, | |
| "grad_norm": 46.19662857055664, | |
| "learning_rate": 6.576832151300236e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.02719116210938, | |
| "logps/rejected": -266.9911193847656, | |
| "loss": 0.4244, | |
| "rewards/accuracies": 0.7012500166893005, | |
| "rewards/chosen": -0.6425164937973022, | |
| "rewards/margins": 1.4480139017105103, | |
| "rewards/rejected": -2.0905306339263916, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.2391158740790356, | |
| "grad_norm": 25.113298416137695, | |
| "learning_rate": 6.498029944838455e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -178.42767333984375, | |
| "logps/rejected": -260.0691223144531, | |
| "loss": 0.4309, | |
| "rewards/accuracies": 0.6812499761581421, | |
| "rewards/chosen": -0.6952015161514282, | |
| "rewards/margins": 1.4248483180999756, | |
| "rewards/rejected": -2.1200499534606934, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 1.2726054922973877, | |
| "grad_norm": 44.80360412597656, | |
| "learning_rate": 6.419227738376675e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.84951782226562, | |
| "logps/rejected": -257.0547790527344, | |
| "loss": 0.4364, | |
| "rewards/accuracies": 0.6762499809265137, | |
| "rewards/chosen": -0.8910938501358032, | |
| "rewards/margins": 1.557470679283142, | |
| "rewards/rejected": -2.4485647678375244, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.3060951105157401, | |
| "grad_norm": 28.531023025512695, | |
| "learning_rate": 6.340425531914892e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -185.15121459960938, | |
| "logps/rejected": -261.5601501464844, | |
| "loss": 0.4392, | |
| "rewards/accuracies": 0.6725000143051147, | |
| "rewards/chosen": -0.7338109612464905, | |
| "rewards/margins": 1.4515758752822876, | |
| "rewards/rejected": -2.185386896133423, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.3395847287340925, | |
| "grad_norm": 28.397706985473633, | |
| "learning_rate": 6.261623325453112e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.82705688476562, | |
| "logps/rejected": -271.1513366699219, | |
| "loss": 0.4268, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": -0.7382247447967529, | |
| "rewards/margins": 1.5511670112609863, | |
| "rewards/rejected": -2.2893919944763184, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.3730743469524447, | |
| "grad_norm": 35.965423583984375, | |
| "learning_rate": 6.182821118991332e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -192.85757446289062, | |
| "logps/rejected": -273.0481262207031, | |
| "loss": 0.4352, | |
| "rewards/accuracies": 0.6850000023841858, | |
| "rewards/chosen": -0.8608375787734985, | |
| "rewards/margins": 1.4874813556671143, | |
| "rewards/rejected": -2.3483190536499023, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.406563965170797, | |
| "grad_norm": 38.594947814941406, | |
| "learning_rate": 6.10401891252955e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -195.38641357421875, | |
| "logps/rejected": -278.737548828125, | |
| "loss": 0.4166, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.7014285922050476, | |
| "rewards/margins": 1.5582598447799683, | |
| "rewards/rejected": -2.259688377380371, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.4400535833891492, | |
| "grad_norm": 35.8038215637207, | |
| "learning_rate": 6.025216706067769e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.76324462890625, | |
| "logps/rejected": -275.5191650390625, | |
| "loss": 0.404, | |
| "rewards/accuracies": 0.7087500095367432, | |
| "rewards/chosen": -0.6745861172676086, | |
| "rewards/margins": 1.6506869792938232, | |
| "rewards/rejected": -2.325273036956787, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.4735432016075016, | |
| "grad_norm": 29.62619972229004, | |
| "learning_rate": 5.946414499605989e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.25250244140625, | |
| "logps/rejected": -272.7938232421875, | |
| "loss": 0.398, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.6346092224121094, | |
| "rewards/margins": 1.7176785469055176, | |
| "rewards/rejected": -2.352287769317627, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.507032819825854, | |
| "grad_norm": 21.93035316467285, | |
| "learning_rate": 5.867612293144208e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.32916259765625, | |
| "logps/rejected": -260.4537048339844, | |
| "loss": 0.4424, | |
| "rewards/accuracies": 0.668749988079071, | |
| "rewards/chosen": -0.6742202639579773, | |
| "rewards/margins": 1.4148352146148682, | |
| "rewards/rejected": -2.0890555381774902, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.5405224380442064, | |
| "grad_norm": 15.887839317321777, | |
| "learning_rate": 5.788810086682427e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.4200439453125, | |
| "logps/rejected": -274.1068420410156, | |
| "loss": 0.4002, | |
| "rewards/accuracies": 0.6837499737739563, | |
| "rewards/chosen": -0.6341544985771179, | |
| "rewards/margins": 1.6712123155593872, | |
| "rewards/rejected": -2.3053667545318604, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.5740120562625586, | |
| "grad_norm": 46.13706588745117, | |
| "learning_rate": 5.710007880220646e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.80099487304688, | |
| "logps/rejected": -268.6807861328125, | |
| "loss": 0.4078, | |
| "rewards/accuracies": 0.6899999976158142, | |
| "rewards/chosen": -0.7255478501319885, | |
| "rewards/margins": 1.6650432348251343, | |
| "rewards/rejected": -2.3905911445617676, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.607501674480911, | |
| "grad_norm": 41.01249313354492, | |
| "learning_rate": 5.631205673758865e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -189.23605346679688, | |
| "logps/rejected": -268.6483154296875, | |
| "loss": 0.4133, | |
| "rewards/accuracies": 0.6787499785423279, | |
| "rewards/chosen": -0.8096724152565002, | |
| "rewards/margins": 1.5884754657745361, | |
| "rewards/rejected": -2.3981478214263916, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 1.6409912926992631, | |
| "grad_norm": 44.18216323852539, | |
| "learning_rate": 5.552403467297084e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -206.16220092773438, | |
| "logps/rejected": -275.8337097167969, | |
| "loss": 0.42, | |
| "rewards/accuracies": 0.6887500286102295, | |
| "rewards/chosen": -0.776029109954834, | |
| "rewards/margins": 1.5833215713500977, | |
| "rewards/rejected": -2.3593506813049316, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 1.6744809109176155, | |
| "grad_norm": 38.77958679199219, | |
| "learning_rate": 5.473601260835303e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.59031677246094, | |
| "logps/rejected": -268.516357421875, | |
| "loss": 0.4145, | |
| "rewards/accuracies": 0.6850000023841858, | |
| "rewards/chosen": -0.6448932886123657, | |
| "rewards/margins": 1.565537691116333, | |
| "rewards/rejected": -2.210430860519409, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.707970529135968, | |
| "grad_norm": 36.07415008544922, | |
| "learning_rate": 5.394799054373523e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -183.8679656982422, | |
| "logps/rejected": -262.7795104980469, | |
| "loss": 0.3909, | |
| "rewards/accuracies": 0.7037500143051147, | |
| "rewards/chosen": -0.6056129336357117, | |
| "rewards/margins": 1.6474745273590088, | |
| "rewards/rejected": -2.2530875205993652, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 1.7414601473543203, | |
| "grad_norm": 59.49274826049805, | |
| "learning_rate": 5.315996847911741e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.5425567626953, | |
| "logps/rejected": -271.7103576660156, | |
| "loss": 0.3796, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.7344449758529663, | |
| "rewards/margins": 1.7059468030929565, | |
| "rewards/rejected": -2.440391778945923, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 1.7749497655726725, | |
| "grad_norm": 41.25373077392578, | |
| "learning_rate": 5.23719464144996e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.00929260253906, | |
| "logps/rejected": -268.0653076171875, | |
| "loss": 0.3793, | |
| "rewards/accuracies": 0.7099999785423279, | |
| "rewards/chosen": -0.7967619895935059, | |
| "rewards/margins": 1.9842884540557861, | |
| "rewards/rejected": -2.781050443649292, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 1.8084393837910246, | |
| "grad_norm": 37.203304290771484, | |
| "learning_rate": 5.15839243498818e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.30181884765625, | |
| "logps/rejected": -272.9234313964844, | |
| "loss": 0.4057, | |
| "rewards/accuracies": 0.6825000047683716, | |
| "rewards/chosen": -0.7267603874206543, | |
| "rewards/margins": 1.7311309576034546, | |
| "rewards/rejected": -2.4578914642333984, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 1.841929002009377, | |
| "grad_norm": 30.321313858032227, | |
| "learning_rate": 5.079590228526398e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -189.3111114501953, | |
| "logps/rejected": -270.3074645996094, | |
| "loss": 0.4081, | |
| "rewards/accuracies": 0.6800000071525574, | |
| "rewards/chosen": -0.751646101474762, | |
| "rewards/margins": 1.640588641166687, | |
| "rewards/rejected": -2.3922348022460938, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 1.8754186202277294, | |
| "grad_norm": 47.42466735839844, | |
| "learning_rate": 5.000788022064617e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -179.08119201660156, | |
| "logps/rejected": -277.5658264160156, | |
| "loss": 0.3837, | |
| "rewards/accuracies": 0.6949999928474426, | |
| "rewards/chosen": -0.6461160182952881, | |
| "rewards/margins": 1.8302369117736816, | |
| "rewards/rejected": -2.476353168487549, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 1.9089082384460818, | |
| "grad_norm": 37.78199005126953, | |
| "learning_rate": 4.921985815602837e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -195.4869384765625, | |
| "logps/rejected": -276.9580383300781, | |
| "loss": 0.3995, | |
| "rewards/accuracies": 0.699999988079071, | |
| "rewards/chosen": -0.7339184284210205, | |
| "rewards/margins": 1.7630858421325684, | |
| "rewards/rejected": -2.4970040321350098, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 1.942397856664434, | |
| "grad_norm": 42.2109375, | |
| "learning_rate": 4.843183609141055e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -199.12640380859375, | |
| "logps/rejected": -275.89630126953125, | |
| "loss": 0.4105, | |
| "rewards/accuracies": 0.675000011920929, | |
| "rewards/chosen": -0.6928785443305969, | |
| "rewards/margins": 1.7767040729522705, | |
| "rewards/rejected": -2.4695825576782227, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 1.9758874748827864, | |
| "grad_norm": 50.837059020996094, | |
| "learning_rate": 4.764381402679275e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.2418212890625, | |
| "logps/rejected": -285.7373046875, | |
| "loss": 0.3963, | |
| "rewards/accuracies": 0.7012500166893005, | |
| "rewards/chosen": -0.7950295805931091, | |
| "rewards/margins": 1.9002233743667603, | |
| "rewards/rejected": -2.6952526569366455, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 2.0093770931011385, | |
| "grad_norm": 24.105684280395508, | |
| "learning_rate": 4.685579196217494e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -175.44351196289062, | |
| "logps/rejected": -268.713134765625, | |
| "loss": 0.3725, | |
| "rewards/accuracies": 0.7087500095367432, | |
| "rewards/chosen": -0.7846677303314209, | |
| "rewards/margins": 1.9885753393173218, | |
| "rewards/rejected": -2.7732431888580322, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.042866711319491, | |
| "grad_norm": 21.2971248626709, | |
| "learning_rate": 4.606776989755713e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -189.33584594726562, | |
| "logps/rejected": -282.147705078125, | |
| "loss": 0.3284, | |
| "rewards/accuracies": 0.7512500286102295, | |
| "rewards/chosen": -0.6468074917793274, | |
| "rewards/margins": 2.03031849861145, | |
| "rewards/rejected": -2.677126169204712, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 2.0763563295378433, | |
| "grad_norm": 13.780171394348145, | |
| "learning_rate": 4.527974783293932e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.3354949951172, | |
| "logps/rejected": -282.95489501953125, | |
| "loss": 0.3183, | |
| "rewards/accuracies": 0.7549999952316284, | |
| "rewards/chosen": -0.7194473147392273, | |
| "rewards/margins": 2.1648471355438232, | |
| "rewards/rejected": -2.884294033050537, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 2.1098459477561957, | |
| "grad_norm": 27.040647506713867, | |
| "learning_rate": 4.449172576832151e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.97789001464844, | |
| "logps/rejected": -267.7035217285156, | |
| "loss": 0.3298, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.7726535797119141, | |
| "rewards/margins": 2.0831313133239746, | |
| "rewards/rejected": -2.8557848930358887, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 2.1433355659745477, | |
| "grad_norm": 52.36626052856445, | |
| "learning_rate": 4.37037037037037e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.05853271484375, | |
| "logps/rejected": -278.2367248535156, | |
| "loss": 0.3381, | |
| "rewards/accuracies": 0.7287499904632568, | |
| "rewards/chosen": -0.7423791289329529, | |
| "rewards/margins": 2.1862621307373047, | |
| "rewards/rejected": -2.9286410808563232, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 2.1768251841929, | |
| "grad_norm": 44.21054458618164, | |
| "learning_rate": 4.2915681639085896e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -194.27188110351562, | |
| "logps/rejected": -286.78887939453125, | |
| "loss": 0.3391, | |
| "rewards/accuracies": 0.7212499976158142, | |
| "rewards/chosen": -0.7143966555595398, | |
| "rewards/margins": 2.098987579345703, | |
| "rewards/rejected": -2.8133840560913086, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 2.2103148024112524, | |
| "grad_norm": 55.730262756347656, | |
| "learning_rate": 4.212765957446808e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.7841796875, | |
| "logps/rejected": -289.6191101074219, | |
| "loss": 0.3417, | |
| "rewards/accuracies": 0.7174999713897705, | |
| "rewards/chosen": -0.747239351272583, | |
| "rewards/margins": 2.2462081909179688, | |
| "rewards/rejected": -2.993447780609131, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 2.243804420629605, | |
| "grad_norm": 34.05455017089844, | |
| "learning_rate": 4.1339637509850275e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.21963500976562, | |
| "logps/rejected": -282.9302062988281, | |
| "loss": 0.3288, | |
| "rewards/accuracies": 0.7275000214576721, | |
| "rewards/chosen": -0.8579057455062866, | |
| "rewards/margins": 2.2049171924591064, | |
| "rewards/rejected": -3.0628225803375244, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 2.2772940388479572, | |
| "grad_norm": 32.748878479003906, | |
| "learning_rate": 4.0551615445232467e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -194.06427001953125, | |
| "logps/rejected": -288.862548828125, | |
| "loss": 0.3154, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -0.9077091217041016, | |
| "rewards/margins": 2.30654239654541, | |
| "rewards/rejected": -3.2142515182495117, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 2.3107836570663096, | |
| "grad_norm": 26.387239456176758, | |
| "learning_rate": 3.976359338061466e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.67759704589844, | |
| "logps/rejected": -284.309326171875, | |
| "loss": 0.3273, | |
| "rewards/accuracies": 0.7287499904632568, | |
| "rewards/chosen": -0.9315968155860901, | |
| "rewards/margins": 2.26393985748291, | |
| "rewards/rejected": -3.1955366134643555, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 2.3442732752846616, | |
| "grad_norm": 26.215883255004883, | |
| "learning_rate": 3.8975571315996845e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -193.22149658203125, | |
| "logps/rejected": -286.2984313964844, | |
| "loss": 0.3245, | |
| "rewards/accuracies": 0.7200000286102295, | |
| "rewards/chosen": -0.839231014251709, | |
| "rewards/margins": 2.3949248790740967, | |
| "rewards/rejected": -3.2341556549072266, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.377762893503014, | |
| "grad_norm": 45.05733108520508, | |
| "learning_rate": 3.8187549251379037e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -192.80043029785156, | |
| "logps/rejected": -286.0914306640625, | |
| "loss": 0.3323, | |
| "rewards/accuracies": 0.7162500023841858, | |
| "rewards/chosen": -0.8909017443656921, | |
| "rewards/margins": 2.335562229156494, | |
| "rewards/rejected": -3.226464033126831, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 2.4112525117213663, | |
| "grad_norm": 23.256881713867188, | |
| "learning_rate": 3.739952718676123e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -197.46514892578125, | |
| "logps/rejected": -286.53509521484375, | |
| "loss": 0.3074, | |
| "rewards/accuracies": 0.7787500023841858, | |
| "rewards/chosen": -0.8636592030525208, | |
| "rewards/margins": 2.3110320568084717, | |
| "rewards/rejected": -3.1746912002563477, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 2.4447421299397187, | |
| "grad_norm": 29.816986083984375, | |
| "learning_rate": 3.661150512214342e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.59291076660156, | |
| "logps/rejected": -285.0599365234375, | |
| "loss": 0.3406, | |
| "rewards/accuracies": 0.7225000262260437, | |
| "rewards/chosen": -0.8795223236083984, | |
| "rewards/margins": 2.274043083190918, | |
| "rewards/rejected": -3.1535654067993164, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 2.478231748158071, | |
| "grad_norm": 58.20719528198242, | |
| "learning_rate": 3.582348305752561e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -193.24021911621094, | |
| "logps/rejected": -280.94549560546875, | |
| "loss": 0.351, | |
| "rewards/accuracies": 0.7300000190734863, | |
| "rewards/chosen": -0.9534088373184204, | |
| "rewards/margins": 2.271278142929077, | |
| "rewards/rejected": -3.224686861038208, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 2.511721366376423, | |
| "grad_norm": 61.28821563720703, | |
| "learning_rate": 3.50354609929078e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -182.4898681640625, | |
| "logps/rejected": -285.96759033203125, | |
| "loss": 0.3099, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.9077944755554199, | |
| "rewards/margins": 2.507885217666626, | |
| "rewards/rejected": -3.4156792163848877, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 2.5452109845947755, | |
| "grad_norm": 31.814802169799805, | |
| "learning_rate": 3.424743892828999e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -192.01673889160156, | |
| "logps/rejected": -291.91192626953125, | |
| "loss": 0.3273, | |
| "rewards/accuracies": 0.7487499713897705, | |
| "rewards/chosen": -1.0059828758239746, | |
| "rewards/margins": 2.3609018325805664, | |
| "rewards/rejected": -3.366884469985962, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 2.578700602813128, | |
| "grad_norm": 27.818708419799805, | |
| "learning_rate": 3.345941686367218e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -193.6220703125, | |
| "logps/rejected": -282.2691345214844, | |
| "loss": 0.3249, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.8329002261161804, | |
| "rewards/margins": 2.3765745162963867, | |
| "rewards/rejected": -3.209474802017212, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 2.6121902210314802, | |
| "grad_norm": 43.8161735534668, | |
| "learning_rate": 3.2671394799054374e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.08872985839844, | |
| "logps/rejected": -288.9317932128906, | |
| "loss": 0.3261, | |
| "rewards/accuracies": 0.7124999761581421, | |
| "rewards/chosen": -0.8284339308738708, | |
| "rewards/margins": 2.3834288120269775, | |
| "rewards/rejected": -3.211862564086914, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 2.6456798392498326, | |
| "grad_norm": 40.35237503051758, | |
| "learning_rate": 3.188337273443656e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.78111267089844, | |
| "logps/rejected": -292.10601806640625, | |
| "loss": 0.3219, | |
| "rewards/accuracies": 0.737500011920929, | |
| "rewards/chosen": -0.8962965607643127, | |
| "rewards/margins": 2.3189728260040283, | |
| "rewards/rejected": -3.2152698040008545, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 2.679169457468185, | |
| "grad_norm": 51.2871208190918, | |
| "learning_rate": 3.1095350669818753e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -197.29296875, | |
| "logps/rejected": -297.2159729003906, | |
| "loss": 0.3349, | |
| "rewards/accuracies": 0.7425000071525574, | |
| "rewards/chosen": -0.9539132118225098, | |
| "rewards/margins": 2.459329605102539, | |
| "rewards/rejected": -3.4132425785064697, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.7126590756865374, | |
| "grad_norm": 26.952434539794922, | |
| "learning_rate": 3.0307328605200945e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -197.95140075683594, | |
| "logps/rejected": -294.74676513671875, | |
| "loss": 0.3164, | |
| "rewards/accuracies": 0.7425000071525574, | |
| "rewards/chosen": -0.8712408542633057, | |
| "rewards/margins": 2.3473589420318604, | |
| "rewards/rejected": -3.218599796295166, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 2.7461486939048894, | |
| "grad_norm": 23.623044967651367, | |
| "learning_rate": 2.9519306540583136e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.4278106689453, | |
| "logps/rejected": -270.7493896484375, | |
| "loss": 0.3482, | |
| "rewards/accuracies": 0.706250011920929, | |
| "rewards/chosen": -0.8394105434417725, | |
| "rewards/margins": 2.1500954627990723, | |
| "rewards/rejected": -2.989506244659424, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 2.7796383121232418, | |
| "grad_norm": 33.7256965637207, | |
| "learning_rate": 2.8731284475965323e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.12367248535156, | |
| "logps/rejected": -286.8393859863281, | |
| "loss": 0.3118, | |
| "rewards/accuracies": 0.7450000047683716, | |
| "rewards/chosen": -0.8797828555107117, | |
| "rewards/margins": 2.488528251647949, | |
| "rewards/rejected": -3.3683111667633057, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 2.813127930341594, | |
| "grad_norm": 26.706951141357422, | |
| "learning_rate": 2.7943262411347515e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.7408447265625, | |
| "logps/rejected": -285.0479736328125, | |
| "loss": 0.3168, | |
| "rewards/accuracies": 0.7350000143051147, | |
| "rewards/chosen": -0.8466379642486572, | |
| "rewards/margins": 2.4300098419189453, | |
| "rewards/rejected": -3.2766480445861816, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 2.8466175485599465, | |
| "grad_norm": 38.752418518066406, | |
| "learning_rate": 2.7155240346729707e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -198.39134216308594, | |
| "logps/rejected": -290.9305725097656, | |
| "loss": 0.3135, | |
| "rewards/accuracies": 0.7450000047683716, | |
| "rewards/chosen": -0.9479385614395142, | |
| "rewards/margins": 2.4617414474487305, | |
| "rewards/rejected": -3.409679889678955, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 2.8801071667782985, | |
| "grad_norm": 22.20810317993164, | |
| "learning_rate": 2.63672182821119e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -202.04925537109375, | |
| "logps/rejected": -298.950927734375, | |
| "loss": 0.2977, | |
| "rewards/accuracies": 0.7562500238418579, | |
| "rewards/chosen": -0.897186815738678, | |
| "rewards/margins": 2.3512582778930664, | |
| "rewards/rejected": -3.2484447956085205, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 2.913596784996651, | |
| "grad_norm": 66.3785171508789, | |
| "learning_rate": 2.557919621749409e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -199.11300659179688, | |
| "logps/rejected": -298.3018798828125, | |
| "loss": 0.3053, | |
| "rewards/accuracies": 0.7587500214576721, | |
| "rewards/chosen": -0.9929912090301514, | |
| "rewards/margins": 2.481295585632324, | |
| "rewards/rejected": -3.4742870330810547, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 2.9470864032150033, | |
| "grad_norm": 37.80686569213867, | |
| "learning_rate": 2.4791174152876277e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -202.31398010253906, | |
| "logps/rejected": -276.3575439453125, | |
| "loss": 0.3206, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -0.9125861525535583, | |
| "rewards/margins": 2.299701690673828, | |
| "rewards/rejected": -3.2122879028320312, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 2.9805760214333556, | |
| "grad_norm": 125.0142593383789, | |
| "learning_rate": 2.4003152088258474e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.2313232421875, | |
| "logps/rejected": -284.8965759277344, | |
| "loss": 0.3186, | |
| "rewards/accuracies": 0.731249988079071, | |
| "rewards/chosen": -0.8962329030036926, | |
| "rewards/margins": 2.4796411991119385, | |
| "rewards/rejected": -3.3758738040924072, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 3.014065639651708, | |
| "grad_norm": 36.955318450927734, | |
| "learning_rate": 2.321513002364066e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.8556671142578, | |
| "logps/rejected": -289.2830810546875, | |
| "loss": 0.3003, | |
| "rewards/accuracies": 0.762499988079071, | |
| "rewards/chosen": -0.953887403011322, | |
| "rewards/margins": 2.597318410873413, | |
| "rewards/rejected": -3.551206111907959, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.0475552578700604, | |
| "grad_norm": 35.30141830444336, | |
| "learning_rate": 2.242710795902285e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -201.59471130371094, | |
| "logps/rejected": -296.01190185546875, | |
| "loss": 0.2955, | |
| "rewards/accuracies": 0.7574999928474426, | |
| "rewards/chosen": -0.8759480118751526, | |
| "rewards/margins": 2.536679744720459, | |
| "rewards/rejected": -3.4126272201538086, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 3.081044876088413, | |
| "grad_norm": 19.832714080810547, | |
| "learning_rate": 2.163908589440504e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -187.59324645996094, | |
| "logps/rejected": -301.8227233886719, | |
| "loss": 0.2666, | |
| "rewards/accuracies": 0.7749999761581421, | |
| "rewards/chosen": -0.9135188460350037, | |
| "rewards/margins": 2.812459707260132, | |
| "rewards/rejected": -3.7259786128997803, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 3.1145344943067648, | |
| "grad_norm": 20.465190887451172, | |
| "learning_rate": 2.0851063829787233e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.68150329589844, | |
| "logps/rejected": -292.4168701171875, | |
| "loss": 0.2952, | |
| "rewards/accuracies": 0.7337499856948853, | |
| "rewards/chosen": -0.7880871295928955, | |
| "rewards/margins": 2.723849296569824, | |
| "rewards/rejected": -3.5119359493255615, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 3.148024112525117, | |
| "grad_norm": 16.645957946777344, | |
| "learning_rate": 2.0063041765169423e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -181.14376831054688, | |
| "logps/rejected": -276.7168884277344, | |
| "loss": 0.2973, | |
| "rewards/accuracies": 0.7350000143051147, | |
| "rewards/chosen": -0.8847752213478088, | |
| "rewards/margins": 2.6361799240112305, | |
| "rewards/rejected": -3.5209546089172363, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 3.1815137307434695, | |
| "grad_norm": 16.746036529541016, | |
| "learning_rate": 1.9275019700551615e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.2080078125, | |
| "logps/rejected": -303.0468444824219, | |
| "loss": 0.2812, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -0.9689957499504089, | |
| "rewards/margins": 2.876345157623291, | |
| "rewards/rejected": -3.845341205596924, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 3.215003348961822, | |
| "grad_norm": 22.3815975189209, | |
| "learning_rate": 1.8486997635933806e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -203.40260314941406, | |
| "logps/rejected": -296.6044616699219, | |
| "loss": 0.308, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -0.9666973352432251, | |
| "rewards/margins": 2.647951364517212, | |
| "rewards/rejected": -3.6146488189697266, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 3.2484929671801743, | |
| "grad_norm": 22.031641006469727, | |
| "learning_rate": 1.7698975571315996e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -184.9111328125, | |
| "logps/rejected": -287.4691162109375, | |
| "loss": 0.2826, | |
| "rewards/accuracies": 0.7450000047683716, | |
| "rewards/chosen": -0.9380254149436951, | |
| "rewards/margins": 2.7892355918884277, | |
| "rewards/rejected": -3.7272610664367676, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 3.2819825853985263, | |
| "grad_norm": 31.574790954589844, | |
| "learning_rate": 1.6910953506698187e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -191.68785095214844, | |
| "logps/rejected": -300.5950622558594, | |
| "loss": 0.273, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -0.9448862671852112, | |
| "rewards/margins": 2.9543023109436035, | |
| "rewards/rejected": -3.899188756942749, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 3.3154722036168787, | |
| "grad_norm": 20.961929321289062, | |
| "learning_rate": 1.612293144208038e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -189.8438720703125, | |
| "logps/rejected": -291.8954162597656, | |
| "loss": 0.2805, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -0.9848695397377014, | |
| "rewards/margins": 2.715031385421753, | |
| "rewards/rejected": -3.6999011039733887, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 3.348961821835231, | |
| "grad_norm": 81.02938079833984, | |
| "learning_rate": 1.5334909377462568e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -194.07749938964844, | |
| "logps/rejected": -288.35015869140625, | |
| "loss": 0.2876, | |
| "rewards/accuracies": 0.7537500262260437, | |
| "rewards/chosen": -1.0682913064956665, | |
| "rewards/margins": 2.6784591674804688, | |
| "rewards/rejected": -3.746750593185425, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.3824514400535834, | |
| "grad_norm": 49.065887451171875, | |
| "learning_rate": 1.454688731284476e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -203.08660888671875, | |
| "logps/rejected": -315.59271240234375, | |
| "loss": 0.2685, | |
| "rewards/accuracies": 0.7699999809265137, | |
| "rewards/chosen": -0.986056923866272, | |
| "rewards/margins": 3.007080078125, | |
| "rewards/rejected": -3.9931368827819824, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 3.415941058271936, | |
| "grad_norm": 21.368894577026367, | |
| "learning_rate": 1.375886524822695e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -198.26324462890625, | |
| "logps/rejected": -298.6892395019531, | |
| "loss": 0.2805, | |
| "rewards/accuracies": 0.768750011920929, | |
| "rewards/chosen": -1.0937081575393677, | |
| "rewards/margins": 2.7569918632507324, | |
| "rewards/rejected": -3.8506996631622314, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 3.4494306764902882, | |
| "grad_norm": 18.683109283447266, | |
| "learning_rate": 1.2970843183609141e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -205.4937286376953, | |
| "logps/rejected": -288.3065185546875, | |
| "loss": 0.2852, | |
| "rewards/accuracies": 0.7662500143051147, | |
| "rewards/chosen": -1.0320967435836792, | |
| "rewards/margins": 2.5120697021484375, | |
| "rewards/rejected": -3.544166326522827, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 3.48292029470864, | |
| "grad_norm": 38.7464599609375, | |
| "learning_rate": 1.218282111899133e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -194.1973876953125, | |
| "logps/rejected": -299.29559326171875, | |
| "loss": 0.2871, | |
| "rewards/accuracies": 0.7437499761581421, | |
| "rewards/chosen": -1.104549527168274, | |
| "rewards/margins": 2.853426218032837, | |
| "rewards/rejected": -3.9579761028289795, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 3.5164099129269926, | |
| "grad_norm": 7.350229740142822, | |
| "learning_rate": 1.1394799054373522e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -201.8234405517578, | |
| "logps/rejected": -290.8706970214844, | |
| "loss": 0.3006, | |
| "rewards/accuracies": 0.7387499809265137, | |
| "rewards/chosen": -1.0995410680770874, | |
| "rewards/margins": 2.636826992034912, | |
| "rewards/rejected": -3.736368417739868, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 3.549899531145345, | |
| "grad_norm": 19.686573028564453, | |
| "learning_rate": 1.0606776989755713e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -190.27627563476562, | |
| "logps/rejected": -288.27923583984375, | |
| "loss": 0.2949, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -1.0866522789001465, | |
| "rewards/margins": 2.7148284912109375, | |
| "rewards/rejected": -3.801480770111084, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 3.5833891493636973, | |
| "grad_norm": 23.88344955444336, | |
| "learning_rate": 9.818754925137903e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -196.48269653320312, | |
| "logps/rejected": -295.1553039550781, | |
| "loss": 0.2976, | |
| "rewards/accuracies": 0.7262499928474426, | |
| "rewards/chosen": -1.0540306568145752, | |
| "rewards/margins": 2.6128809452056885, | |
| "rewards/rejected": -3.6669113636016846, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 3.6168787675820493, | |
| "grad_norm": 28.22519874572754, | |
| "learning_rate": 9.030732860520094e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -196.87808227539062, | |
| "logps/rejected": -294.7259216308594, | |
| "loss": 0.2903, | |
| "rewards/accuracies": 0.7487499713897705, | |
| "rewards/chosen": -1.0297123193740845, | |
| "rewards/margins": 2.7472548484802246, | |
| "rewards/rejected": -3.7769670486450195, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 3.6503683858004017, | |
| "grad_norm": 34.18745422363281, | |
| "learning_rate": 8.242710795902284e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -195.99404907226562, | |
| "logps/rejected": -290.40350341796875, | |
| "loss": 0.2871, | |
| "rewards/accuracies": 0.7524999976158142, | |
| "rewards/chosen": -1.050000786781311, | |
| "rewards/margins": 2.8044447898864746, | |
| "rewards/rejected": -3.8544461727142334, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 3.683858004018754, | |
| "grad_norm": 47.866973876953125, | |
| "learning_rate": 7.454688731284475e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -200.01785278320312, | |
| "logps/rejected": -314.4879150390625, | |
| "loss": 0.2842, | |
| "rewards/accuracies": 0.7637500166893005, | |
| "rewards/chosen": -1.1343244314193726, | |
| "rewards/margins": 2.814058780670166, | |
| "rewards/rejected": -3.9483835697174072, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.7173476222371065, | |
| "grad_norm": 42.807334899902344, | |
| "learning_rate": 6.666666666666665e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -195.06690979003906, | |
| "logps/rejected": -304.86456298828125, | |
| "loss": 0.2987, | |
| "rewards/accuracies": 0.7262499928474426, | |
| "rewards/chosen": -0.996014416217804, | |
| "rewards/margins": 2.8309714794158936, | |
| "rewards/rejected": -3.826986074447632, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 3.750837240455459, | |
| "grad_norm": 43.85563278198242, | |
| "learning_rate": 5.8786446020488567e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -192.9456787109375, | |
| "logps/rejected": -302.6964111328125, | |
| "loss": 0.2717, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -0.9534288644790649, | |
| "rewards/margins": 2.9733974933624268, | |
| "rewards/rejected": -3.926826238632202, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 3.7843268586738112, | |
| "grad_norm": 15.730545043945312, | |
| "learning_rate": 5.090622537431047e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -201.83468627929688, | |
| "logps/rejected": -303.9621887207031, | |
| "loss": 0.2778, | |
| "rewards/accuracies": 0.7425000071525574, | |
| "rewards/chosen": -1.114385962486267, | |
| "rewards/margins": 2.769357919692993, | |
| "rewards/rejected": -3.88374400138855, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 3.8178164768921636, | |
| "grad_norm": 8.295851707458496, | |
| "learning_rate": 4.3026004728132384e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -188.35511779785156, | |
| "logps/rejected": -294.769287109375, | |
| "loss": 0.2819, | |
| "rewards/accuracies": 0.7637500166893005, | |
| "rewards/chosen": -1.0945212841033936, | |
| "rewards/margins": 2.837056875228882, | |
| "rewards/rejected": -3.9315783977508545, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 3.8513060951105156, | |
| "grad_norm": 26.289419174194336, | |
| "learning_rate": 3.5145784081954295e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -189.79083251953125, | |
| "logps/rejected": -306.619140625, | |
| "loss": 0.275, | |
| "rewards/accuracies": 0.7674999833106995, | |
| "rewards/chosen": -1.047197699546814, | |
| "rewards/margins": 2.954050064086914, | |
| "rewards/rejected": -4.001247406005859, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 3.884795713328868, | |
| "grad_norm": 39.87799835205078, | |
| "learning_rate": 2.72655634357762e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -195.89584350585938, | |
| "logps/rejected": -300.447998046875, | |
| "loss": 0.3016, | |
| "rewards/accuracies": 0.7362499833106995, | |
| "rewards/chosen": -1.0226908922195435, | |
| "rewards/margins": 2.853440284729004, | |
| "rewards/rejected": -3.876131057739258, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 3.9182853315472204, | |
| "grad_norm": 34.19044494628906, | |
| "learning_rate": 1.938534278959811e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -198.2811737060547, | |
| "logps/rejected": -297.48712158203125, | |
| "loss": 0.2798, | |
| "rewards/accuracies": 0.7612500190734863, | |
| "rewards/chosen": -0.9482996463775635, | |
| "rewards/margins": 2.7399239540100098, | |
| "rewards/rejected": -3.6882238388061523, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 3.9517749497655728, | |
| "grad_norm": 50.39924621582031, | |
| "learning_rate": 1.1505122143420016e-08, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -186.64804077148438, | |
| "logps/rejected": -296.8222961425781, | |
| "loss": 0.2861, | |
| "rewards/accuracies": 0.75, | |
| "rewards/chosen": -1.1375445127487183, | |
| "rewards/margins": 2.8117544651031494, | |
| "rewards/rejected": -3.9492990970611572, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 3.985264567983925, | |
| "grad_norm": 24.80521583557129, | |
| "learning_rate": 3.6249014972419224e-09, | |
| "logits/chosen": NaN, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -192.86546325683594, | |
| "logps/rejected": -305.5084228515625, | |
| "loss": 0.2767, | |
| "rewards/accuracies": 0.7475000023841858, | |
| "rewards/chosen": -1.0181684494018555, | |
| "rewards/margins": 2.8778107166290283, | |
| "rewards/rejected": -3.895979642868042, | |
| "step": 5950 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 5972, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |