| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 20.0, | |
| "eval_steps": 200, | |
| "global_step": 620, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32454361054766734, | |
| "grad_norm": 193.0, | |
| "kl": 0.18435561656951904, | |
| "learning_rate": 6e-08, | |
| "logits/chosen": -60439219.2, | |
| "logits/rejected": -88406048.0, | |
| "logps/chosen": -197.2506591796875, | |
| "logps/rejected": -107.289501953125, | |
| "loss": 3.5312, | |
| "rewards/chosen": -0.0054339878261089325, | |
| "rewards/margins": -0.004661996196955442, | |
| "rewards/rejected": -0.00077199162915349, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.6490872210953347, | |
| "grad_norm": 184.0, | |
| "kl": 0.2278171330690384, | |
| "learning_rate": 1.2666666666666666e-07, | |
| "logits/chosen": -60431888.30573248, | |
| "logits/rejected": -90372359.85276073, | |
| "logps/chosen": -227.72788117038218, | |
| "logps/rejected": -115.19334739263803, | |
| "loss": 3.529, | |
| "rewards/chosen": -0.005934715650643512, | |
| "rewards/margins": -0.0023042475658752288, | |
| "rewards/rejected": -0.0036304680847682835, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.973630831643002, | |
| "grad_norm": 212.0, | |
| "kl": 0.25923511385917664, | |
| "learning_rate": 1.9333333333333332e-07, | |
| "logits/chosen": -64896310.01834863, | |
| "logits/rejected": -90925383.15654951, | |
| "logps/chosen": -202.24849483944953, | |
| "logps/rejected": -113.89132388178913, | |
| "loss": 3.493, | |
| "rewards/chosen": -0.006282405386641849, | |
| "rewards/margins": 0.008622396832151831, | |
| "rewards/rejected": -0.01490480221879368, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.2920892494929006, | |
| "grad_norm": 176.0, | |
| "kl": 0.1959868222475052, | |
| "learning_rate": 2.6e-07, | |
| "logits/chosen": -59720155.54179566, | |
| "logits/rejected": -88351780.93114755, | |
| "logps/chosen": -197.90833010835914, | |
| "logps/rejected": -108.10444415983606, | |
| "loss": 3.5435, | |
| "rewards/chosen": 0.006978450544847424, | |
| "rewards/margins": 0.021960525532540336, | |
| "rewards/rejected": -0.014982074987692912, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.616632860040568, | |
| "grad_norm": 183.0, | |
| "kl": 0.21559596061706543, | |
| "learning_rate": 3.2666666666666663e-07, | |
| "logits/chosen": -59467023.25827815, | |
| "logits/rejected": -90559948.49704142, | |
| "logps/chosen": -228.09385347682118, | |
| "logps/rejected": -112.74208348742603, | |
| "loss": 3.5032, | |
| "rewards/chosen": 0.0010771212593609135, | |
| "rewards/margins": 0.03369407513736861, | |
| "rewards/rejected": -0.0326169538780077, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.9411764705882353, | |
| "grad_norm": 166.0, | |
| "kl": 0.15380892157554626, | |
| "learning_rate": 3.933333333333333e-07, | |
| "logits/chosen": -65427847.25970149, | |
| "logits/rejected": -90375137.78360656, | |
| "logps/chosen": -201.5051072761194, | |
| "logps/rejected": -115.42854764344263, | |
| "loss": 3.4839, | |
| "rewards/chosen": -0.005884787217894597, | |
| "rewards/margins": 0.059975266923290735, | |
| "rewards/rejected": -0.06586005414118533, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.259634888438134, | |
| "grad_norm": 177.0, | |
| "kl": 0.14670009911060333, | |
| "learning_rate": 4.6e-07, | |
| "logits/chosen": -59004054.974358976, | |
| "logits/rejected": -88976837.67088607, | |
| "logps/chosen": -197.41224709535257, | |
| "logps/rejected": -108.29176967958861, | |
| "loss": 3.5035, | |
| "rewards/chosen": 0.0009808578552343906, | |
| "rewards/margins": 0.09745390586015737, | |
| "rewards/rejected": -0.09647304800492298, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.584178498985801, | |
| "grad_norm": 147.0, | |
| "kl": 0.17133259773254395, | |
| "learning_rate": 5.266666666666666e-07, | |
| "logits/chosen": -60354307.32467532, | |
| "logits/rejected": -89784492.72289157, | |
| "logps/chosen": -219.9271002435065, | |
| "logps/rejected": -113.98176298945783, | |
| "loss": 3.4706, | |
| "rewards/chosen": -0.01354174180464311, | |
| "rewards/margins": 0.1299854844710652, | |
| "rewards/rejected": -0.1435272262757083, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.9087221095334685, | |
| "grad_norm": 197.0, | |
| "kl": 0.18291868269443512, | |
| "learning_rate": 5.933333333333334e-07, | |
| "logits/chosen": -64280637.686746985, | |
| "logits/rejected": -91105393.03896104, | |
| "logps/chosen": -209.47286803463857, | |
| "logps/rejected": -117.5850497159091, | |
| "loss": 3.4451, | |
| "rewards/chosen": -0.034842827233923485, | |
| "rewards/margins": 0.20260719032359223, | |
| "rewards/rejected": -0.2374500175575157, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 3.227180527383367, | |
| "grad_norm": 197.0, | |
| "kl": 0.15808846056461334, | |
| "learning_rate": 6.6e-07, | |
| "logits/chosen": -61499443.52201258, | |
| "logits/rejected": -88841982.34838709, | |
| "logps/chosen": -191.46870086477986, | |
| "logps/rejected": -110.64470766129033, | |
| "loss": 3.4412, | |
| "rewards/chosen": -0.02490143655980908, | |
| "rewards/margins": 0.2772539658390694, | |
| "rewards/rejected": -0.3021554023988785, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.5517241379310347, | |
| "grad_norm": 187.0, | |
| "kl": 0.20476070046424866, | |
| "learning_rate": 7.266666666666667e-07, | |
| "logits/chosen": -59606039.973244146, | |
| "logits/rejected": -90108102.19354838, | |
| "logps/chosen": -218.58996132943145, | |
| "logps/rejected": -116.83943135997067, | |
| "loss": 3.4053, | |
| "rewards/chosen": -0.04491897889204249, | |
| "rewards/margins": 0.3912458546913978, | |
| "rewards/rejected": -0.43616483358344027, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.8762677484787016, | |
| "grad_norm": 177.0, | |
| "kl": 0.2384704351425171, | |
| "learning_rate": 7.933333333333333e-07, | |
| "logits/chosen": -63627667.66376811, | |
| "logits/rejected": -90490970.25084746, | |
| "logps/chosen": -210.72527173913045, | |
| "logps/rejected": -119.54580243644068, | |
| "loss": 3.3335, | |
| "rewards/chosen": -0.08938233195871546, | |
| "rewards/margins": 0.5145636426175557, | |
| "rewards/rejected": -0.6039459745762712, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.1947261663286, | |
| "grad_norm": 180.0, | |
| "kl": 0.1893850862979889, | |
| "learning_rate": 8.599999999999999e-07, | |
| "logits/chosen": -60919781.574193545, | |
| "logits/rejected": -90189328.10062893, | |
| "logps/chosen": -201.86630544354838, | |
| "logps/rejected": -114.74926297169812, | |
| "loss": 3.3418, | |
| "rewards/chosen": -0.05891021605460874, | |
| "rewards/margins": 0.6146469454804984, | |
| "rewards/rejected": -0.6735571615351071, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.519269776876268, | |
| "grad_norm": 175.0, | |
| "kl": 0.2661321759223938, | |
| "learning_rate": 9.266666666666665e-07, | |
| "logits/chosen": -59107307.58803987, | |
| "logits/rejected": -89497455.00884956, | |
| "logps/chosen": -215.70617473006644, | |
| "logps/rejected": -120.84332365412979, | |
| "loss": 3.2899, | |
| "rewards/chosen": -0.11127648084266638, | |
| "rewards/margins": 0.7554082443082831, | |
| "rewards/rejected": -0.8666847251509495, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.8438133874239355, | |
| "grad_norm": 177.0, | |
| "kl": 0.3608871102333069, | |
| "learning_rate": 9.933333333333333e-07, | |
| "logits/chosen": -64112037.64705882, | |
| "logits/rejected": -91098432.85333334, | |
| "logps/chosen": -204.3817325367647, | |
| "logps/rejected": -125.74838541666666, | |
| "loss": 3.23, | |
| "rewards/chosen": -0.1627967497881721, | |
| "rewards/margins": 0.8743928864422965, | |
| "rewards/rejected": -1.0371896362304687, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 5.162271805273834, | |
| "grad_norm": 137.0, | |
| "kl": 0.2991156578063965, | |
| "learning_rate": 9.99095521855875e-07, | |
| "logits/chosen": -60793345.625396825, | |
| "logits/rejected": -90675919.74440895, | |
| "logps/chosen": -211.70634920634922, | |
| "logps/rejected": -120.8386456669329, | |
| "loss": 3.217, | |
| "rewards/chosen": -0.10563917614164807, | |
| "rewards/margins": 0.9914023347846198, | |
| "rewards/rejected": -1.097041510926268, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.486815415821501, | |
| "grad_norm": 171.0, | |
| "kl": 0.35243138670921326, | |
| "learning_rate": 9.959731316773258e-07, | |
| "logits/chosen": -57961989.13712375, | |
| "logits/rejected": -89004902.85043988, | |
| "logps/chosen": -207.07096571906354, | |
| "logps/rejected": -122.59219208211144, | |
| "loss": 3.15, | |
| "rewards/chosen": -0.1434617823980325, | |
| "rewards/margins": 1.1325593303963681, | |
| "rewards/rejected": -1.2760211127944006, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.811359026369169, | |
| "grad_norm": 185.0, | |
| "kl": 0.4362719655036926, | |
| "learning_rate": 9.906356050933962e-07, | |
| "logits/chosen": -64087541.48973607, | |
| "logits/rejected": -91216310.36789298, | |
| "logps/chosen": -203.97832661290323, | |
| "logps/rejected": -131.44156302257525, | |
| "loss": 3.0881, | |
| "rewards/chosen": -0.23974457234581195, | |
| "rewards/margins": 1.1809270756605634, | |
| "rewards/rejected": -1.4206716480063755, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 6.129817444219067, | |
| "grad_norm": 209.0, | |
| "kl": 0.45517590641975403, | |
| "learning_rate": 9.831067807935138e-07, | |
| "logits/chosen": -60818541.48427673, | |
| "logits/rejected": -91439209.7032258, | |
| "logps/chosen": -216.79994595125785, | |
| "logps/rejected": -123.5953125, | |
| "loss": 3.1009, | |
| "rewards/chosen": -0.14091354945920548, | |
| "rewards/margins": 1.3181333373540505, | |
| "rewards/rejected": -1.459046886813256, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.454361054766734, | |
| "grad_norm": 174.0, | |
| "kl": 0.3785388171672821, | |
| "learning_rate": 9.73420284334652e-07, | |
| "logits/chosen": -57674728.34323432, | |
| "logits/rejected": -88681900.43916914, | |
| "logps/chosen": -205.04843492161717, | |
| "logps/rejected": -123.72198395771514, | |
| "loss": 3.0149, | |
| "rewards/chosen": -0.1368737551245359, | |
| "rewards/margins": 1.4349443391007053, | |
| "rewards/rejected": -1.571818094225241, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.454361054766734, | |
| "eval_kl": 0.035786211490631104, | |
| "eval_logits/chosen": -67682705.2972973, | |
| "eval_logits/rejected": -106589274.61946903, | |
| "eval_logps/chosen": -223.63279490427928, | |
| "eval_logps/rejected": -130.98892941095133, | |
| "eval_loss": 0.3622306287288666, | |
| "eval_rewards/chosen": -0.1184040877196166, | |
| "eval_rewards/margins": 1.4728804700617717, | |
| "eval_rewards/rejected": -1.5912845577813883, | |
| "eval_runtime": 14.432, | |
| "eval_samples_per_second": 15.175, | |
| "eval_steps_per_second": 0.97, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.778904665314402, | |
| "grad_norm": 173.0, | |
| "kl": 0.486126571893692, | |
| "learning_rate": 9.616193779614293e-07, | |
| "logits/chosen": -62086706.13649852, | |
| "logits/rejected": -91359813.28052805, | |
| "logps/chosen": -207.32766135014836, | |
| "logps/rejected": -134.37520627062707, | |
| "loss": 2.9831, | |
| "rewards/chosen": -0.22745244510095974, | |
| "rewards/margins": 1.4696961454905213, | |
| "rewards/rejected": -1.697148590591481, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 7.0973630831643, | |
| "grad_norm": 238.0, | |
| "kl": 0.4411180913448334, | |
| "learning_rate": 9.477567673864215e-07, | |
| "logits/chosen": -61770599.064935066, | |
| "logits/rejected": -91440755.2, | |
| "logps/chosen": -212.82518262987014, | |
| "logps/rejected": -127.96240234375, | |
| "loss": 2.9719, | |
| "rewards/chosen": -0.23503605731121904, | |
| "rewards/margins": 1.439167243164855, | |
| "rewards/rejected": -1.6742033004760741, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.421906693711968, | |
| "grad_norm": 696.0, | |
| "kl": 0.43413224816322327, | |
| "learning_rate": 9.318943663936569e-07, | |
| "logits/chosen": -58800922.256410256, | |
| "logits/rejected": -88753464.19512194, | |
| "logps/chosen": -204.23775540865384, | |
| "logps/rejected": -127.21961937881098, | |
| "loss": 2.9524, | |
| "rewards/chosen": -0.15640850556202424, | |
| "rewards/margins": 1.593839914967225, | |
| "rewards/rejected": -1.7502484205292492, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 7.746450304259635, | |
| "grad_norm": 155.0, | |
| "kl": 0.6114085912704468, | |
| "learning_rate": 9.141030203166256e-07, | |
| "logits/chosen": -60832057.65765766, | |
| "logits/rejected": -91749279.27035831, | |
| "logps/chosen": -210.65941722972974, | |
| "logps/rejected": -133.82223381514657, | |
| "loss": 2.9045, | |
| "rewards/chosen": -0.2403512674051004, | |
| "rewards/margins": 1.6371219486983197, | |
| "rewards/rejected": -1.8774732161034202, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.064908722109534, | |
| "grad_norm": 177.0, | |
| "kl": 0.5247067213058472, | |
| "learning_rate": 8.944621896258224e-07, | |
| "logits/chosen": -61470391.79487179, | |
| "logits/rejected": -90531684.4556962, | |
| "logps/chosen": -212.98465044070514, | |
| "logps/rejected": -129.2169822982595, | |
| "loss": 2.9026, | |
| "rewards/chosen": -0.2432682330791767, | |
| "rewards/margins": 1.5595254387340156, | |
| "rewards/rejected": -1.8027936718131923, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.3894523326572, | |
| "grad_norm": 144.0, | |
| "kl": 0.4949173033237457, | |
| "learning_rate": 8.730595950389967e-07, | |
| "logits/chosen": -58573346.13333333, | |
| "logits/rejected": -89080557.88307692, | |
| "logps/chosen": -201.41715029761906, | |
| "logps/rejected": -128.4549278846154, | |
| "loss": 2.8883, | |
| "rewards/chosen": -0.1385447789752294, | |
| "rewards/margins": 1.7706690882122706, | |
| "rewards/rejected": -1.9092138671875, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 8.713995943204868, | |
| "grad_norm": 185.0, | |
| "kl": 0.6798511743545532, | |
| "learning_rate": 8.499908257391323e-07, | |
| "logits/chosen": -60232947.512195125, | |
| "logits/rejected": -91839363.28205128, | |
| "logps/chosen": -217.515625, | |
| "logps/rejected": -134.24834735576923, | |
| "loss": 2.8604, | |
| "rewards/chosen": -0.22204266524896388, | |
| "rewards/margins": 1.757865030814738, | |
| "rewards/rejected": -1.9799076960637019, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 9.032454361054766, | |
| "grad_norm": 240.0, | |
| "kl": 0.4586775600910187, | |
| "learning_rate": 8.253589124499511e-07, | |
| "logits/chosen": -61861802.11612903, | |
| "logits/rejected": -91389275.7735849, | |
| "logps/chosen": -207.82133316532259, | |
| "logps/rejected": -133.32677378144655, | |
| "loss": 2.8302, | |
| "rewards/chosen": -0.2830538349766885, | |
| "rewards/margins": 1.7209184404753555, | |
| "rewards/rejected": -2.003972275452044, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 9.356997971602434, | |
| "grad_norm": 164.0, | |
| "kl": 0.61592036485672, | |
| "learning_rate": 7.992738672756908e-07, | |
| "logits/chosen": -58843218.940809965, | |
| "logits/rejected": -88515118.54545455, | |
| "logps/chosen": -197.16750632788163, | |
| "logps/rejected": -129.25817985893417, | |
| "loss": 2.8523, | |
| "rewards/chosen": -0.1824735837562062, | |
| "rewards/margins": 1.795535009381529, | |
| "rewards/rejected": -1.9780085931377351, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 9.681541582150102, | |
| "grad_norm": 137.0, | |
| "kl": 0.7404313087463379, | |
| "learning_rate": 7.718521923603404e-07, | |
| "logits/chosen": -59332853.50157729, | |
| "logits/rejected": -91459862.98452012, | |
| "logps/chosen": -231.09537657728706, | |
| "logps/rejected": -134.74608165634675, | |
| "loss": 2.8027, | |
| "rewards/chosen": -0.2009657730439484, | |
| "rewards/margins": 1.85811701405938, | |
| "rewards/rejected": -2.0590827871033284, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 207.0, | |
| "kl": 0.44328153133392334, | |
| "learning_rate": 7.43216359560785e-07, | |
| "logits/chosen": -62495649.72698413, | |
| "logits/rejected": -91457778.09584664, | |
| "logps/chosen": -200.75829613095237, | |
| "logps/rejected": -134.88111521565494, | |
| "loss": 2.7823, | |
| "rewards/chosen": -0.29915979778955853, | |
| "rewards/margins": 1.8331558409384765, | |
| "rewards/rejected": -2.132315638728035, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 10.324543610547668, | |
| "grad_norm": 182.0, | |
| "kl": 0.5860379338264465, | |
| "learning_rate": 7.134942634577615e-07, | |
| "logits/chosen": -58812569.6, | |
| "logits/rejected": -88819168.0, | |
| "logps/chosen": -199.1839599609375, | |
| "logps/rejected": -127.58912353515625, | |
| "loss": 2.8078, | |
| "rewards/chosen": -0.19876351356506347, | |
| "rewards/margins": 1.8319713115692138, | |
| "rewards/rejected": -2.030734825134277, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 10.649087221095336, | |
| "grad_norm": 177.0, | |
| "kl": 0.6635628938674927, | |
| "learning_rate": 6.828186501476144e-07, | |
| "logits/chosen": -58466127.89808917, | |
| "logits/rejected": -90940309.20245399, | |
| "logps/chosen": -229.41757066082803, | |
| "logps/rejected": -136.31909029907976, | |
| "loss": 2.7883, | |
| "rewards/chosen": -0.17490250897255671, | |
| "rewards/margins": 1.9413016884488392, | |
| "rewards/rejected": -2.116204197421396, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 10.973630831643002, | |
| "grad_norm": 176.0, | |
| "kl": 0.6288160681724548, | |
| "learning_rate": 6.513265243660057e-07, | |
| "logits/chosen": -62876155.30275229, | |
| "logits/rejected": -91420423.36102237, | |
| "logps/chosen": -204.8246129587156, | |
| "logps/rejected": -136.0083491413738, | |
| "loss": 2.7516, | |
| "rewards/chosen": -0.2638938507173404, | |
| "rewards/margins": 1.962711744392983, | |
| "rewards/rejected": -2.2266055951103234, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 11.2920892494929, | |
| "grad_norm": 165.0, | |
| "kl": 0.6786984205245972, | |
| "learning_rate": 6.191585375915055e-07, | |
| "logits/chosen": -58107764.50773994, | |
| "logits/rejected": -88665648.68196721, | |
| "logps/chosen": -199.96833881578948, | |
| "logps/rejected": -128.3748463114754, | |
| "loss": 2.8082, | |
| "rewards/chosen": -0.19902199193050987, | |
| "rewards/margins": 1.8429992122831171, | |
| "rewards/rejected": -2.042021204213627, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 11.616632860040568, | |
| "grad_norm": 167.0, | |
| "kl": 0.6609476208686829, | |
| "learning_rate": 5.864583598619467e-07, | |
| "logits/chosen": -57476970.80794702, | |
| "logits/rejected": -91102129.23076923, | |
| "logps/chosen": -229.5999586092715, | |
| "logps/rejected": -134.39487795857988, | |
| "loss": 2.7377, | |
| "rewards/chosen": -0.1495321286435159, | |
| "rewards/margins": 2.048365336627841, | |
| "rewards/rejected": -2.197897465271357, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 11.941176470588236, | |
| "grad_norm": 141.0, | |
| "kl": 0.6255931854248047, | |
| "learning_rate": 5.533720381091582e-07, | |
| "logits/chosen": -63461816.16716418, | |
| "logits/rejected": -90867329.2590164, | |
| "logps/chosen": -204.81716417910448, | |
| "logps/rejected": -137.47254098360656, | |
| "loss": 2.7423, | |
| "rewards/chosen": -0.3370915427136777, | |
| "rewards/margins": 1.9331667260491296, | |
| "rewards/rejected": -2.2702582687628072, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 12.259634888438134, | |
| "grad_norm": 160.0, | |
| "kl": 0.6641746163368225, | |
| "learning_rate": 5.200473438779146e-07, | |
| "logits/chosen": -57381835.48717949, | |
| "logits/rejected": -89336883.84810127, | |
| "logps/chosen": -199.01509915865384, | |
| "logps/rejected": -127.97672320015823, | |
| "loss": 2.7637, | |
| "rewards/chosen": -0.159302613674066, | |
| "rewards/margins": 1.9056654471694863, | |
| "rewards/rejected": -2.0649680608435523, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 12.584178498985802, | |
| "grad_norm": 158.0, | |
| "kl": 0.605785071849823, | |
| "learning_rate": 4.866331133423456e-07, | |
| "logits/chosen": -58439706.597402595, | |
| "logits/rejected": -90199761.73493975, | |
| "logps/chosen": -222.0545606737013, | |
| "logps/rejected": -134.65968561746988, | |
| "loss": 2.7272, | |
| "rewards/chosen": -0.22628873354428775, | |
| "rewards/margins": 1.985029133137282, | |
| "rewards/rejected": -2.21131786668157, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 12.908722109533468, | |
| "grad_norm": 192.0, | |
| "kl": 0.7094799280166626, | |
| "learning_rate": 4.5327858256745065e-07, | |
| "logits/chosen": -62306581.590361446, | |
| "logits/rejected": -91545985.66233766, | |
| "logps/chosen": -212.20241905120483, | |
| "logps/rejected": -138.62365564123377, | |
| "loss": 2.7339, | |
| "rewards/chosen": -0.30779806389866105, | |
| "rewards/margins": 2.033511124349614, | |
| "rewards/rejected": -2.341309188248275, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 12.908722109533468, | |
| "eval_kl": 0.035703618079423904, | |
| "eval_logits/chosen": -67034790.05405405, | |
| "eval_logits/rejected": -106966931.25663717, | |
| "eval_logps/chosen": -224.06151463963963, | |
| "eval_logps/rejected": -137.08888447179203, | |
| "eval_loss": 0.3353511095046997, | |
| "eval_rewards/chosen": -0.16127505603137318, | |
| "eval_rewards/margins": 2.040005520961506, | |
| "eval_rewards/rejected": -2.201280576992879, | |
| "eval_runtime": 14.3688, | |
| "eval_samples_per_second": 15.241, | |
| "eval_steps_per_second": 0.974, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 13.227180527383368, | |
| "grad_norm": 160.0, | |
| "kl": 0.6558622121810913, | |
| "learning_rate": 4.201327209846065e-07, | |
| "logits/chosen": -59866034.716981135, | |
| "logits/rejected": -89190664.25806452, | |
| "logps/chosen": -193.23755650550314, | |
| "logps/rejected": -128.39615675403226, | |
| "loss": 2.7624, | |
| "rewards/chosen": -0.20178618521060585, | |
| "rewards/margins": 1.875514249517217, | |
| "rewards/rejected": -2.0773004347278228, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 13.551724137931034, | |
| "grad_norm": 156.0, | |
| "kl": 0.5715658664703369, | |
| "learning_rate": 3.873435660579217e-07, | |
| "logits/chosen": -57941598.18060201, | |
| "logits/rejected": -90468988.62170088, | |
| "logps/chosen": -220.37727320234114, | |
| "logps/rejected": -135.28165093475073, | |
| "loss": 2.7148, | |
| "rewards/chosen": -0.22364995950041805, | |
| "rewards/margins": 2.0567365988756814, | |
| "rewards/rejected": -2.2803865583760996, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 13.876267748478702, | |
| "grad_norm": 147.0, | |
| "kl": 0.8168804049491882, | |
| "learning_rate": 3.5505756211298774e-07, | |
| "logits/chosen": -61979449.136231884, | |
| "logits/rejected": -90915301.96610169, | |
| "logps/chosen": -212.7193161231884, | |
| "logps/rejected": -136.42921080508475, | |
| "loss": 2.7174, | |
| "rewards/chosen": -0.2887859178626019, | |
| "rewards/margins": 2.0035011004769108, | |
| "rewards/rejected": -2.2922870183395125, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 14.1947261663286, | |
| "grad_norm": 188.0, | |
| "kl": 0.6269903779029846, | |
| "learning_rate": 3.234189062809695e-07, | |
| "logits/chosen": -59424662.29677419, | |
| "logits/rejected": -90425550.08805032, | |
| "logps/chosen": -203.18240927419356, | |
| "logps/rejected": -129.64670548349056, | |
| "loss": 2.7431, | |
| "rewards/chosen": -0.19051946824596774, | |
| "rewards/margins": 1.9727823955143509, | |
| "rewards/rejected": -2.1633018637603185, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 14.519269776876268, | |
| "grad_norm": 145.0, | |
| "kl": 0.5869894027709961, | |
| "learning_rate": 2.9256890447921315e-07, | |
| "logits/chosen": -57757369.408637874, | |
| "logits/rejected": -89745903.38643068, | |
| "logps/chosen": -217.05743874584718, | |
| "logps/rejected": -134.99330521755164, | |
| "loss": 2.7134, | |
| "rewards/chosen": -0.24640435000194663, | |
| "rewards/margins": 2.035277397209521, | |
| "rewards/rejected": -2.2816817472114677, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 14.843813387423936, | |
| "grad_norm": 160.0, | |
| "kl": 0.7495726346969604, | |
| "learning_rate": 2.626453403047172e-07, | |
| "logits/chosen": -62740118.5882353, | |
| "logits/rejected": -91247097.17333333, | |
| "logps/chosen": -205.96771599264707, | |
| "logps/rejected": -138.3257421875, | |
| "loss": 2.7088, | |
| "rewards/chosen": -0.32139582914464615, | |
| "rewards/margins": 1.973530928667854, | |
| "rewards/rejected": -2.2949267578125, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 15.162271805273834, | |
| "grad_norm": 138.0, | |
| "kl": 0.6461220979690552, | |
| "learning_rate": 2.3378185965914078e-07, | |
| "logits/chosen": -59542024.12698413, | |
| "logits/rejected": -90673073.48242812, | |
| "logps/chosen": -212.47857142857143, | |
| "logps/rejected": -131.6886232028754, | |
| "loss": 2.7498, | |
| "rewards/chosen": -0.1828639923580109, | |
| "rewards/margins": 1.9991756036633548, | |
| "rewards/rejected": -2.1820395960213657, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 15.486815415821502, | |
| "grad_norm": 153.0, | |
| "kl": 0.6038868427276611, | |
| "learning_rate": 2.0610737385376348e-07, | |
| "logits/chosen": -56933078.04682274, | |
| "logits/rejected": -88981978.46334311, | |
| "logps/chosen": -207.75867474916387, | |
| "logps/rejected": -132.5094391495601, | |
| "loss": 2.7032, | |
| "rewards/chosen": -0.21223314470272, | |
| "rewards/margins": 2.0555130507128774, | |
| "rewards/rejected": -2.2677461954155973, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 15.811359026369168, | |
| "grad_norm": 169.0, | |
| "kl": 0.7091981172561646, | |
| "learning_rate": 1.7974548386027584e-07, | |
| "logits/chosen": -62982918.75659824, | |
| "logits/rejected": -91177055.89297658, | |
| "logps/chosen": -205.08644153225808, | |
| "logps/rejected": -140.23990123327758, | |
| "loss": 2.6976, | |
| "rewards/chosen": -0.3505571203147911, | |
| "rewards/margins": 1.9499471239760744, | |
| "rewards/rejected": -2.3005042442908654, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 16.129817444219068, | |
| "grad_norm": 170.0, | |
| "kl": 0.7773324251174927, | |
| "learning_rate": 1.5481392827883488e-07, | |
| "logits/chosen": -59927378.11320755, | |
| "logits/rejected": -91354719.79354839, | |
| "logps/chosen": -217.3079304245283, | |
| "logps/rejected": -131.10647681451613, | |
| "loss": 2.7533, | |
| "rewards/chosen": -0.19171231347809797, | |
| "rewards/margins": 2.018452205801136, | |
| "rewards/rejected": -2.210164519279234, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 16.454361054766736, | |
| "grad_norm": 159.0, | |
| "kl": 0.592677116394043, | |
| "learning_rate": 1.3142405748889457e-07, | |
| "logits/chosen": -56867931.24752475, | |
| "logits/rejected": -88677786.20771514, | |
| "logps/chosen": -205.24737004950495, | |
| "logps/rejected": -130.59708827893175, | |
| "loss": 2.7022, | |
| "rewards/chosen": -0.15676989413724088, | |
| "rewards/margins": 2.102558342428505, | |
| "rewards/rejected": -2.2593282365657457, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 16.7789046653144, | |
| "grad_norm": 176.0, | |
| "kl": 0.7139925956726074, | |
| "learning_rate": 1.096803363313803e-07, | |
| "logits/chosen": -61236336.4272997, | |
| "logits/rejected": -91304395.61716172, | |
| "logps/chosen": -207.9202290430267, | |
| "logps/rejected": -140.73229940181517, | |
| "loss": 2.7113, | |
| "rewards/chosen": -0.2867103825690838, | |
| "rewards/margins": 2.0461471585561224, | |
| "rewards/rejected": -2.332857541125206, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 17.0973630831643, | |
| "grad_norm": 197.0, | |
| "kl": 0.7058033347129822, | |
| "learning_rate": 8.967987754335022e-08, | |
| "logits/chosen": -61097139.53246753, | |
| "logits/rejected": -91390720.0, | |
| "logps/chosen": -213.40300324675326, | |
| "logps/rejected": -133.4157958984375, | |
| "loss": 2.7195, | |
| "rewards/chosen": -0.29281839147790684, | |
| "rewards/margins": 1.9267250655533432, | |
| "rewards/rejected": -2.21954345703125, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 17.421906693711968, | |
| "grad_norm": 264.0, | |
| "kl": 0.5918253064155579, | |
| "learning_rate": 7.15120080289368e-08, | |
| "logits/chosen": -58138827.48717949, | |
| "logits/rejected": -88649209.75609756, | |
| "logps/chosen": -204.44078024839743, | |
| "logps/rejected": -132.16799256859755, | |
| "loss": 2.73, | |
| "rewards/chosen": -0.17671105800530848, | |
| "rewards/margins": 2.0683754258337728, | |
| "rewards/rejected": -2.2450864838390814, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 17.746450304259636, | |
| "grad_norm": 157.0, | |
| "kl": 0.7473562359809875, | |
| "learning_rate": 5.5257869903709006e-08, | |
| "logits/chosen": -60289140.85285285, | |
| "logits/rejected": -91669847.55700326, | |
| "logps/chosen": -210.96473817567568, | |
| "logps/rejected": -138.43526058631923, | |
| "loss": 2.7077, | |
| "rewards/chosen": -0.27088488329638233, | |
| "rewards/margins": 2.0678908508750347, | |
| "rewards/rejected": -2.338775734171417, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 18.064908722109532, | |
| "grad_norm": 163.0, | |
| "kl": 0.6325186491012573, | |
| "learning_rate": 4.099005809428596e-08, | |
| "logits/chosen": -60974821.743589744, | |
| "logits/rejected": -90441339.1392405, | |
| "logps/chosen": -213.37244591346155, | |
| "logps/rejected": -133.14664507515823, | |
| "loss": 2.7211, | |
| "rewards/chosen": -0.2820472228221404, | |
| "rewards/margins": 1.9137137696867599, | |
| "rewards/rejected": -2.1957609925089003, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 18.3894523326572, | |
| "grad_norm": 138.0, | |
| "kl": 0.6061395406723022, | |
| "learning_rate": 2.8772296111772677e-08, | |
| "logits/chosen": -58149936.76190476, | |
| "logits/rejected": -89025810.11692308, | |
| "logps/chosen": -201.52972470238095, | |
| "logps/rejected": -132.06661057692307, | |
| "loss": 2.7258, | |
| "rewards/chosen": -0.14980345226469494, | |
| "rewards/margins": 2.120578909514151, | |
| "rewards/rejected": -2.270382361778846, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 18.713995943204868, | |
| "grad_norm": 191.0, | |
| "kl": 0.7928330898284912, | |
| "learning_rate": 1.865915144708985e-08, | |
| "logits/chosen": -59847180.487804875, | |
| "logits/rejected": -91838811.8974359, | |
| "logps/chosen": -217.81192835365854, | |
| "logps/rejected": -137.56884765625, | |
| "loss": 2.7109, | |
| "rewards/chosen": -0.25167286105272246, | |
| "rewards/margins": 2.060285256310058, | |
| "rewards/rejected": -2.3119581173627806, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 19.032454361054768, | |
| "grad_norm": 228.0, | |
| "kl": 0.5313221216201782, | |
| "learning_rate": 1.0695791859313297e-08, | |
| "logits/chosen": -61405038.658064514, | |
| "logits/rejected": -91388741.2327044, | |
| "logps/chosen": -208.06592741935484, | |
| "logps/rejected": -136.2116868121069, | |
| "loss": 2.7008, | |
| "rewards/chosen": -0.3075132308467742, | |
| "rewards/margins": 1.9849507315402146, | |
| "rewards/rejected": -2.292463962386989, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 19.356997971602436, | |
| "grad_norm": 159.0, | |
| "kl": 0.7078633308410645, | |
| "learning_rate": 4.917783645496887e-09, | |
| "logits/chosen": -58602562.99065421, | |
| "logits/rejected": -88561907.96238245, | |
| "logps/chosen": -197.22442075545172, | |
| "logps/rejected": -131.6894592476489, | |
| "loss": 2.7409, | |
| "rewards/chosen": -0.1881643990489924, | |
| "rewards/margins": 2.0329730513575512, | |
| "rewards/rejected": -2.2211374504065438, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 19.356997971602436, | |
| "eval_kl": 0.060564398765563965, | |
| "eval_logits/chosen": -67062447.27927928, | |
| "eval_logits/rejected": -106975032.63716814, | |
| "eval_logps/chosen": -223.90932925112614, | |
| "eval_logps/rejected": -137.44655696902655, | |
| "eval_loss": 0.33327072858810425, | |
| "eval_rewards/chosen": -0.14605889878831468, | |
| "eval_rewards/margins": 2.090988279767064, | |
| "eval_rewards/rejected": -2.237047178555379, | |
| "eval_runtime": 14.3431, | |
| "eval_samples_per_second": 15.269, | |
| "eval_steps_per_second": 0.976, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 19.6815415821501, | |
| "grad_norm": 133.0, | |
| "kl": 0.8249608278274536, | |
| "learning_rate": 1.350932792956394e-09, | |
| "logits/chosen": -59016821.90536278, | |
| "logits/rejected": -91488921.75851393, | |
| "logps/chosen": -231.22170741324922, | |
| "logps/rejected": -136.9874467879257, | |
| "loss": 2.7014, | |
| "rewards/chosen": -0.21359941259919657, | |
| "rewards/margins": 2.069619963247359, | |
| "rewards/rejected": -2.2832193758465555, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "grad_norm": 176.0, | |
| "kl": 0.49784502387046814, | |
| "learning_rate": 1.1169723465487279e-11, | |
| "logits/chosen": -62227758.32380953, | |
| "logits/rejected": -91514088.28115016, | |
| "logps/chosen": -201.05374503968255, | |
| "logps/rejected": -136.85892571884983, | |
| "loss": 2.6991, | |
| "rewards/chosen": -0.32870684426928326, | |
| "rewards/margins": 2.0013896107776414, | |
| "rewards/rejected": -2.3300964550469248, | |
| "step": 620 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 620, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 20, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |