| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.08921004505107274, | |
| "eval_steps": 1000, | |
| "global_step": 1000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "completion_length": 225.1666717529297, | |
| "epoch": 8.921004505107276e-05, | |
| "grad_norm": 62.695068359375, | |
| "learning_rate": 2.5e-07, | |
| "loss": 12.7202, | |
| "reward": 0.19306249171495438, | |
| "reward_std": 0.5882241576910019, | |
| "rewards/correctness_reward_func": 0.3333333432674408, | |
| "rewards/int_reward_func": 0.0833333358168602, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.22360415756702423, | |
| "step": 1, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.00017842009010214551, | |
| "grad_norm": 59.6503791809082, | |
| "learning_rate": 5e-07, | |
| "loss": 13.0732, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00026763013515321824, | |
| "grad_norm": 69.65516662597656, | |
| "learning_rate": 7.5e-07, | |
| "loss": 12.9681, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.00035684018020429103, | |
| "grad_norm": 57.81648635864258, | |
| "learning_rate": 1e-06, | |
| "loss": 8.1042, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.00044605022525536376, | |
| "grad_norm": 57.6408576965332, | |
| "learning_rate": 1.25e-06, | |
| "loss": 8.6056, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.0005352602703064365, | |
| "grad_norm": 58.459903717041016, | |
| "learning_rate": 1.5e-06, | |
| "loss": 10.4929, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.0006244703153575092, | |
| "grad_norm": 62.41658020019531, | |
| "learning_rate": 1.7500000000000002e-06, | |
| "loss": 13.1206, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0007136803604085821, | |
| "grad_norm": 66.22370910644531, | |
| "learning_rate": 2e-06, | |
| "loss": 13.2007, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.0008028904054596548, | |
| "grad_norm": 66.21946716308594, | |
| "learning_rate": 2.25e-06, | |
| "loss": 12.3522, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.0008921004505107275, | |
| "grad_norm": 65.43058776855469, | |
| "learning_rate": 2.5e-06, | |
| "loss": 7.9566, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0009813104955618004, | |
| "grad_norm": 54.532962799072266, | |
| "learning_rate": 2.75e-06, | |
| "loss": 8.8616, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.001070520540612873, | |
| "grad_norm": 56.53645706176758, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3095, | |
| "step": 12 | |
| }, | |
| { | |
| "completion_length": 222.9791717529297, | |
| "epoch": 0.0011597305856639458, | |
| "grad_norm": 78.4708023071289, | |
| "learning_rate": 3e-06, | |
| "loss": -24.6031, | |
| "reward": 0.011666670441627502, | |
| "reward_std": 0.524684801697731, | |
| "rewards/correctness_reward_func": 0.1250000037252903, | |
| "rewards/int_reward_func": 0.041666666977107525, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.1550000049173832, | |
| "step": 13, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0012489406307150184, | |
| "grad_norm": 88.30101776123047, | |
| "learning_rate": 3e-06, | |
| "loss": -18.3145, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.0013381506757660913, | |
| "grad_norm": 101.97128295898438, | |
| "learning_rate": 3e-06, | |
| "loss": -7.4151, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.0014273607208171641, | |
| "grad_norm": 91.58382415771484, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9073, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.0015165707658682367, | |
| "grad_norm": 90.10670471191406, | |
| "learning_rate": 3e-06, | |
| "loss": -13.5176, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.0016057808109193096, | |
| "grad_norm": 80.67254638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.2813, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.0016949908559703822, | |
| "grad_norm": 75.51331329345703, | |
| "learning_rate": 3e-06, | |
| "loss": -24.6926, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.001784200901021455, | |
| "grad_norm": 78.15167999267578, | |
| "learning_rate": 3e-06, | |
| "loss": -18.5973, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0018734109460725277, | |
| "grad_norm": 89.70745086669922, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9364, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.0019626209911236007, | |
| "grad_norm": 89.28164672851562, | |
| "learning_rate": 3e-06, | |
| "loss": -9.434, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.002051831036174673, | |
| "grad_norm": 98.30489349365234, | |
| "learning_rate": 3e-06, | |
| "loss": -14.6494, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.002141041081225746, | |
| "grad_norm": 92.3221206665039, | |
| "learning_rate": 3e-06, | |
| "loss": -17.5654, | |
| "step": 24 | |
| }, | |
| { | |
| "completion_length": 224.95834350585938, | |
| "epoch": 0.002230251126276819, | |
| "grad_norm": 77.60931396484375, | |
| "learning_rate": 3e-06, | |
| "loss": 12.7299, | |
| "reward": 0.08664583414793015, | |
| "reward_std": 0.529650554060936, | |
| "rewards/correctness_reward_func": 0.1666666679084301, | |
| "rewards/int_reward_func": 0.041666666977107525, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.12168749794363976, | |
| "step": 25, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0023194611713278916, | |
| "grad_norm": 55.48664474487305, | |
| "learning_rate": 3e-06, | |
| "loss": 11.3297, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.0024086712163789645, | |
| "grad_norm": 64.88197326660156, | |
| "learning_rate": 3e-06, | |
| "loss": 7.6398, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.002497881261430037, | |
| "grad_norm": 66.41521453857422, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9742, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.0025870913064811097, | |
| "grad_norm": 60.356266021728516, | |
| "learning_rate": 3e-06, | |
| "loss": 18.3629, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.0026763013515321826, | |
| "grad_norm": 67.53816986083984, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3122, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.0027655113965832554, | |
| "grad_norm": 81.81299591064453, | |
| "learning_rate": 3e-06, | |
| "loss": 12.4031, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.0028547214416343282, | |
| "grad_norm": 58.01384735107422, | |
| "learning_rate": 3e-06, | |
| "loss": 11.3115, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0029439314866854006, | |
| "grad_norm": 60.38798522949219, | |
| "learning_rate": 3e-06, | |
| "loss": 7.5438, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.0030331415317364735, | |
| "grad_norm": 76.68485260009766, | |
| "learning_rate": 3e-06, | |
| "loss": 9.8314, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.0031223515767875463, | |
| "grad_norm": 63.667381286621094, | |
| "learning_rate": 3e-06, | |
| "loss": 18.0907, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.003211561621838619, | |
| "grad_norm": 64.93324279785156, | |
| "learning_rate": 3e-06, | |
| "loss": 9.6529, | |
| "step": 36 | |
| }, | |
| { | |
| "completion_length": 200.9166717529297, | |
| "epoch": 0.003300771666889692, | |
| "grad_norm": 55.603302001953125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1381, | |
| "reward": 0.20900000631809235, | |
| "reward_std": 0.5408279597759247, | |
| "rewards/correctness_reward_func": 0.2500000074505806, | |
| "rewards/int_reward_func": 0.09374999813735485, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.1347499955445528, | |
| "step": 37, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0033899817119407644, | |
| "grad_norm": 59.070777893066406, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7825, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0034791917569918372, | |
| "grad_norm": 73.52457427978516, | |
| "learning_rate": 3e-06, | |
| "loss": -11.5592, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.00356840180204291, | |
| "grad_norm": 68.8139419555664, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9847, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.003657611847093983, | |
| "grad_norm": 74.64259338378906, | |
| "learning_rate": 3e-06, | |
| "loss": -7.7023, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.0037468218921450553, | |
| "grad_norm": 68.76261901855469, | |
| "learning_rate": 3e-06, | |
| "loss": -11.4536, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.003836031937196128, | |
| "grad_norm": 57.10056686401367, | |
| "learning_rate": 3e-06, | |
| "loss": -3.0195, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.003925241982247201, | |
| "grad_norm": 57.4798583984375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3677, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.004014452027298274, | |
| "grad_norm": 62.251949310302734, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4481, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.004103662072349346, | |
| "grad_norm": 67.0556640625, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2431, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.0041928721174004195, | |
| "grad_norm": 79.22687530517578, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9896, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.004282082162451492, | |
| "grad_norm": 83.6895980834961, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4645, | |
| "step": 48 | |
| }, | |
| { | |
| "completion_length": 186.0, | |
| "epoch": 0.004371292207502565, | |
| "grad_norm": 187.8282928466797, | |
| "learning_rate": 3e-06, | |
| "loss": -44.8522, | |
| "reward": 0.5453333556652069, | |
| "reward_std": 0.9364342093467712, | |
| "rewards/correctness_reward_func": 0.4583333358168602, | |
| "rewards/int_reward_func": 0.15625, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.06924999551847577, | |
| "step": 49, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.004460502252553638, | |
| "grad_norm": 92.9270248413086, | |
| "learning_rate": 3e-06, | |
| "loss": -41.2773, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.00454971229760471, | |
| "grad_norm": 212.18917846679688, | |
| "learning_rate": 3e-06, | |
| "loss": -42.3882, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.004638922342655783, | |
| "grad_norm": 102.22235870361328, | |
| "learning_rate": 3e-06, | |
| "loss": -42.8879, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.004728132387706856, | |
| "grad_norm": 79.1269302368164, | |
| "learning_rate": 3e-06, | |
| "loss": -44.708, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.004817342432757929, | |
| "grad_norm": 94.53079986572266, | |
| "learning_rate": 3e-06, | |
| "loss": -41.656, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.004906552477809001, | |
| "grad_norm": 91.7303695678711, | |
| "learning_rate": 3e-06, | |
| "loss": -45.3257, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.004995762522860074, | |
| "grad_norm": 92.66773986816406, | |
| "learning_rate": 3e-06, | |
| "loss": -41.4113, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.005084972567911147, | |
| "grad_norm": 123.76467895507812, | |
| "learning_rate": 3e-06, | |
| "loss": -43.4643, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.0051741826129622194, | |
| "grad_norm": 109.21142578125, | |
| "learning_rate": 3e-06, | |
| "loss": -44.7136, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.005263392658013293, | |
| "grad_norm": 83.24272155761719, | |
| "learning_rate": 3e-06, | |
| "loss": -45.7862, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.005352602703064365, | |
| "grad_norm": 94.45966339111328, | |
| "learning_rate": 3e-06, | |
| "loss": -42.5492, | |
| "step": 60 | |
| }, | |
| { | |
| "completion_length": 225.625, | |
| "epoch": 0.0054418127481154375, | |
| "grad_norm": 125.85611724853516, | |
| "learning_rate": 3e-06, | |
| "loss": 28.3825, | |
| "reward": 0.31822918355464935, | |
| "reward_std": 0.9613562524318695, | |
| "rewards/correctness_reward_func": 0.375, | |
| "rewards/int_reward_func": 0.1145833320915699, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.17135417833924294, | |
| "step": 61, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.005531022793166511, | |
| "grad_norm": 128.64669799804688, | |
| "learning_rate": 3e-06, | |
| "loss": 4.8596, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.005620232838217583, | |
| "grad_norm": 287.9391784667969, | |
| "learning_rate": 3e-06, | |
| "loss": 20.5521, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.0057094428832686565, | |
| "grad_norm": 111.01509857177734, | |
| "learning_rate": 3e-06, | |
| "loss": 16.6241, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.005798652928319729, | |
| "grad_norm": 123.25679016113281, | |
| "learning_rate": 3e-06, | |
| "loss": 6.8919, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.005887862973370801, | |
| "grad_norm": 115.68987274169922, | |
| "learning_rate": 3e-06, | |
| "loss": 19.3061, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.0059770730184218746, | |
| "grad_norm": 128.9923553466797, | |
| "learning_rate": 3e-06, | |
| "loss": 27.4792, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.006066283063472947, | |
| "grad_norm": 130.64230346679688, | |
| "learning_rate": 3e-06, | |
| "loss": 3.8702, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.00615549310852402, | |
| "grad_norm": 169.2925262451172, | |
| "learning_rate": 3e-06, | |
| "loss": 19.2163, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.006244703153575093, | |
| "grad_norm": 104.88905334472656, | |
| "learning_rate": 3e-06, | |
| "loss": 14.5854, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.006333913198626165, | |
| "grad_norm": 134.32022094726562, | |
| "learning_rate": 3e-06, | |
| "loss": 5.6117, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.006423123243677238, | |
| "grad_norm": 124.52132415771484, | |
| "learning_rate": 3e-06, | |
| "loss": 18.0908, | |
| "step": 72 | |
| }, | |
| { | |
| "completion_length": 203.81250762939453, | |
| "epoch": 0.006512333288728311, | |
| "grad_norm": 87.01981353759766, | |
| "learning_rate": 3e-06, | |
| "loss": 14.5961, | |
| "reward": 0.25443750619888306, | |
| "reward_std": 0.6893003582954407, | |
| "rewards/correctness_reward_func": 0.2916666641831398, | |
| "rewards/int_reward_func": 0.09374999813735485, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.13097916916012764, | |
| "step": 73, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.006601543333779384, | |
| "grad_norm": 83.70246887207031, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7203, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.006690753378830456, | |
| "grad_norm": 80.23466491699219, | |
| "learning_rate": 3e-06, | |
| "loss": 30.3319, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.006779963423881529, | |
| "grad_norm": 74.68209838867188, | |
| "learning_rate": 3e-06, | |
| "loss": 23.2, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.006869173468932602, | |
| "grad_norm": 81.28849029541016, | |
| "learning_rate": 3e-06, | |
| "loss": 11.7216, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.0069583835139836745, | |
| "grad_norm": 85.60411071777344, | |
| "learning_rate": 3e-06, | |
| "loss": 19.9348, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.007047593559034747, | |
| "grad_norm": 95.26403045654297, | |
| "learning_rate": 3e-06, | |
| "loss": 13.5735, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.00713680360408582, | |
| "grad_norm": 81.69352722167969, | |
| "learning_rate": 3e-06, | |
| "loss": 19.4906, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.0072260136491368926, | |
| "grad_norm": 80.9581527709961, | |
| "learning_rate": 3e-06, | |
| "loss": 29.1989, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.007315223694187966, | |
| "grad_norm": 87.37995147705078, | |
| "learning_rate": 3e-06, | |
| "loss": 23.4541, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.007404433739239038, | |
| "grad_norm": 90.7470932006836, | |
| "learning_rate": 3e-06, | |
| "loss": 10.7907, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.007493643784290111, | |
| "grad_norm": 352.26953125, | |
| "learning_rate": 3e-06, | |
| "loss": 18.1423, | |
| "step": 84 | |
| }, | |
| { | |
| "completion_length": 185.6875, | |
| "epoch": 0.007582853829341184, | |
| "grad_norm": 78.5768051147461, | |
| "learning_rate": 3e-06, | |
| "loss": -2.0743, | |
| "reward": 0.6054166778922081, | |
| "reward_std": 0.8349271714687347, | |
| "rewards/correctness_reward_func": 0.5, | |
| "rewards/int_reward_func": 0.125, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.019583335146307945, | |
| "step": 85, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.007672063874392256, | |
| "grad_norm": 125.65332794189453, | |
| "learning_rate": 3e-06, | |
| "loss": -18.0183, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.00776127391944333, | |
| "grad_norm": 93.01673889160156, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9219, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.007850483964494403, | |
| "grad_norm": 85.38358306884766, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3606, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.007939694009545474, | |
| "grad_norm": 99.59243774414062, | |
| "learning_rate": 3e-06, | |
| "loss": -18.9376, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.008028904054596548, | |
| "grad_norm": 96.83404541015625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.9748, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.008118114099647621, | |
| "grad_norm": 81.16954803466797, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2134, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.008207324144698692, | |
| "grad_norm": 123.15869140625, | |
| "learning_rate": 3e-06, | |
| "loss": -19.8823, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.008296534189749766, | |
| "grad_norm": 93.05419158935547, | |
| "learning_rate": 3e-06, | |
| "loss": -4.5813, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.008385744234800839, | |
| "grad_norm": 106.2331314086914, | |
| "learning_rate": 3e-06, | |
| "loss": -8.6969, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.00847495427985191, | |
| "grad_norm": 99.65939331054688, | |
| "learning_rate": 3e-06, | |
| "loss": -21.3275, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.008564164324902984, | |
| "grad_norm": 94.40375518798828, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7937, | |
| "step": 96 | |
| }, | |
| { | |
| "completion_length": 217.25000762939453, | |
| "epoch": 0.008653374369954057, | |
| "grad_norm": 133.5598907470703, | |
| "learning_rate": 3e-06, | |
| "loss": -68.7329, | |
| "reward": 0.6968958526849747, | |
| "reward_std": 0.7409922480583191, | |
| "rewards/correctness_reward_func": 0.5833333283662796, | |
| "rewards/int_reward_func": 0.1979166641831398, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.08435416780412197, | |
| "step": 97, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.00874258441500513, | |
| "grad_norm": 136.60848999023438, | |
| "learning_rate": 3e-06, | |
| "loss": -74.5256, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.008831794460056202, | |
| "grad_norm": 123.70120239257812, | |
| "learning_rate": 3e-06, | |
| "loss": -59.6774, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.008921004505107275, | |
| "grad_norm": 150.22532653808594, | |
| "learning_rate": 3e-06, | |
| "loss": -69.5624, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.009010214550158348, | |
| "grad_norm": 126.68507385253906, | |
| "learning_rate": 3e-06, | |
| "loss": -62.8973, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.00909942459520942, | |
| "grad_norm": 105.47962951660156, | |
| "learning_rate": 3e-06, | |
| "loss": -61.6182, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.009188634640260493, | |
| "grad_norm": 144.26048278808594, | |
| "learning_rate": 3e-06, | |
| "loss": -70.5109, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.009277844685311567, | |
| "grad_norm": 141.22325134277344, | |
| "learning_rate": 3e-06, | |
| "loss": -76.5479, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.009367054730362638, | |
| "grad_norm": 139.37173461914062, | |
| "learning_rate": 3e-06, | |
| "loss": -62.353, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.009456264775413711, | |
| "grad_norm": 150.77801513671875, | |
| "learning_rate": 3e-06, | |
| "loss": -72.2384, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.009545474820464785, | |
| "grad_norm": 138.2374267578125, | |
| "learning_rate": 3e-06, | |
| "loss": -65.3746, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.009634684865515858, | |
| "grad_norm": 132.50453186035156, | |
| "learning_rate": 3e-06, | |
| "loss": -64.141, | |
| "step": 108 | |
| }, | |
| { | |
| "completion_length": 198.125, | |
| "epoch": 0.00972389491056693, | |
| "grad_norm": 187.5413055419922, | |
| "learning_rate": 3e-06, | |
| "loss": 44.1489, | |
| "reward": 0.7788957953453064, | |
| "reward_std": 0.7549726963043213, | |
| "rewards/correctness_reward_func": 0.6666666567325592, | |
| "rewards/int_reward_func": 0.1770833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.06485416181385517, | |
| "step": 109, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.009813104955618003, | |
| "grad_norm": 138.89434814453125, | |
| "learning_rate": 3e-06, | |
| "loss": 51.6646, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.009902315000669076, | |
| "grad_norm": 128.95484924316406, | |
| "learning_rate": 3e-06, | |
| "loss": 31.7993, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.009991525045720148, | |
| "grad_norm": 126.7931900024414, | |
| "learning_rate": 3e-06, | |
| "loss": 38.5454, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.01008073509077122, | |
| "grad_norm": 125.33599853515625, | |
| "learning_rate": 3e-06, | |
| "loss": 40.3822, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.010169945135822294, | |
| "grad_norm": 139.41482543945312, | |
| "learning_rate": 3e-06, | |
| "loss": 32.5052, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.010259155180873366, | |
| "grad_norm": 169.09432983398438, | |
| "learning_rate": 3e-06, | |
| "loss": 43.5542, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.010348365225924439, | |
| "grad_norm": 133.872802734375, | |
| "learning_rate": 3e-06, | |
| "loss": 50.3469, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.010437575270975512, | |
| "grad_norm": 125.77018737792969, | |
| "learning_rate": 3e-06, | |
| "loss": 31.112, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.010526785316026585, | |
| "grad_norm": 128.32257080078125, | |
| "learning_rate": 3e-06, | |
| "loss": 36.629, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.010615995361077657, | |
| "grad_norm": 124.38401794433594, | |
| "learning_rate": 3e-06, | |
| "loss": 39.8284, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.01070520540612873, | |
| "grad_norm": 138.24668884277344, | |
| "learning_rate": 3e-06, | |
| "loss": 31.1433, | |
| "step": 120 | |
| }, | |
| { | |
| "completion_length": 191.6041717529297, | |
| "epoch": 0.010794415451179804, | |
| "grad_norm": 199.1646270751953, | |
| "learning_rate": 3e-06, | |
| "loss": 86.314, | |
| "reward": 1.0014583468437195, | |
| "reward_std": 0.8148851096630096, | |
| "rewards/correctness_reward_func": 0.875, | |
| "rewards/int_reward_func": 0.2291666641831398, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.10270833596587181, | |
| "step": 121, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.010883625496230875, | |
| "grad_norm": 195.7254638671875, | |
| "learning_rate": 3e-06, | |
| "loss": 76.1289, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.010972835541281948, | |
| "grad_norm": 175.13900756835938, | |
| "learning_rate": 3e-06, | |
| "loss": 86.1448, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.011062045586333022, | |
| "grad_norm": 182.21661376953125, | |
| "learning_rate": 3e-06, | |
| "loss": 90.0805, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.011151255631384093, | |
| "grad_norm": 189.17214965820312, | |
| "learning_rate": 3e-06, | |
| "loss": 76.0951, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.011240465676435166, | |
| "grad_norm": 195.55718994140625, | |
| "learning_rate": 3e-06, | |
| "loss": 89.5242, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.01132967572148624, | |
| "grad_norm": 171.1396484375, | |
| "learning_rate": 3e-06, | |
| "loss": 82.3705, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.011418885766537313, | |
| "grad_norm": 189.04995727539062, | |
| "learning_rate": 3e-06, | |
| "loss": 71.8677, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.011508095811588384, | |
| "grad_norm": 162.9297332763672, | |
| "learning_rate": 3e-06, | |
| "loss": 81.2432, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.011597305856639458, | |
| "grad_norm": 173.23104858398438, | |
| "learning_rate": 3e-06, | |
| "loss": 85.8069, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.011686515901690531, | |
| "grad_norm": 162.6637420654297, | |
| "learning_rate": 3e-06, | |
| "loss": 69.8347, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.011775725946741603, | |
| "grad_norm": 190.06675720214844, | |
| "learning_rate": 3e-06, | |
| "loss": 84.5222, | |
| "step": 132 | |
| }, | |
| { | |
| "completion_length": 207.33334350585938, | |
| "epoch": 0.011864935991792676, | |
| "grad_norm": 147.16957092285156, | |
| "learning_rate": 3e-06, | |
| "loss": -63.4697, | |
| "reward": 0.6791666746139526, | |
| "reward_std": 1.0425111949443817, | |
| "rewards/correctness_reward_func": 0.5833333134651184, | |
| "rewards/int_reward_func": 0.1770833358168602, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.08124999818392098, | |
| "step": 133, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.011954146036843749, | |
| "grad_norm": 154.6719970703125, | |
| "learning_rate": 3e-06, | |
| "loss": -70.5112, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.01204335608189482, | |
| "grad_norm": 137.32408142089844, | |
| "learning_rate": 3e-06, | |
| "loss": -41.1322, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.012132566126945894, | |
| "grad_norm": 126.37704467773438, | |
| "learning_rate": 3e-06, | |
| "loss": -53.2367, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.012221776171996967, | |
| "grad_norm": 152.24891662597656, | |
| "learning_rate": 3e-06, | |
| "loss": -58.4567, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.01231098621704804, | |
| "grad_norm": 116.28028106689453, | |
| "learning_rate": 3e-06, | |
| "loss": -46.2973, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.012400196262099112, | |
| "grad_norm": 152.08795166015625, | |
| "learning_rate": 3e-06, | |
| "loss": -62.8325, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.012489406307150185, | |
| "grad_norm": 146.10671997070312, | |
| "learning_rate": 3e-06, | |
| "loss": -71.6559, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.012578616352201259, | |
| "grad_norm": 149.14556884765625, | |
| "learning_rate": 3e-06, | |
| "loss": -42.1534, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.01266782639725233, | |
| "grad_norm": 151.06182861328125, | |
| "learning_rate": 3e-06, | |
| "loss": -55.6968, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.012757036442303403, | |
| "grad_norm": 145.29530334472656, | |
| "learning_rate": 3e-06, | |
| "loss": -60.2759, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.012846246487354477, | |
| "grad_norm": 124.00696563720703, | |
| "learning_rate": 3e-06, | |
| "loss": -48.5856, | |
| "step": 144 | |
| }, | |
| { | |
| "completion_length": 200.14583587646484, | |
| "epoch": 0.012935456532405548, | |
| "grad_norm": 104.97675323486328, | |
| "learning_rate": 3e-06, | |
| "loss": -22.2294, | |
| "reward": 0.21922918409109116, | |
| "reward_std": 0.6296879947185516, | |
| "rewards/correctness_reward_func": 0.2916666716337204, | |
| "rewards/int_reward_func": 0.062499999068677425, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.13493750616908073, | |
| "step": 145, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.013024666577456621, | |
| "grad_norm": 83.18937683105469, | |
| "learning_rate": 3e-06, | |
| "loss": -22.3765, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.013113876622507695, | |
| "grad_norm": 96.22801971435547, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5564, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.013203086667558768, | |
| "grad_norm": 102.87374877929688, | |
| "learning_rate": 3e-06, | |
| "loss": -24.9001, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.01329229671260984, | |
| "grad_norm": 110.96674346923828, | |
| "learning_rate": 3e-06, | |
| "loss": -18.8972, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.013381506757660913, | |
| "grad_norm": 91.87604522705078, | |
| "learning_rate": 3e-06, | |
| "loss": -18.1615, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.013470716802711986, | |
| "grad_norm": 88.4422836303711, | |
| "learning_rate": 3e-06, | |
| "loss": -23.0431, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.013559926847763058, | |
| "grad_norm": 83.86327362060547, | |
| "learning_rate": 3e-06, | |
| "loss": -23.25, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.01364913689281413, | |
| "grad_norm": 82.81922149658203, | |
| "learning_rate": 3e-06, | |
| "loss": -23.1331, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.013738346937865204, | |
| "grad_norm": 104.8452377319336, | |
| "learning_rate": 3e-06, | |
| "loss": -26.8428, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.013827556982916276, | |
| "grad_norm": 92.94257354736328, | |
| "learning_rate": 3e-06, | |
| "loss": -20.0667, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.013916767027967349, | |
| "grad_norm": 84.95638275146484, | |
| "learning_rate": 3e-06, | |
| "loss": -19.1472, | |
| "step": 156 | |
| }, | |
| { | |
| "completion_length": 231.7916717529297, | |
| "epoch": 0.014005977073018422, | |
| "grad_norm": 139.96688842773438, | |
| "learning_rate": 3e-06, | |
| "loss": 21.1797, | |
| "reward": 0.6720625460147858, | |
| "reward_std": 0.9181468784809113, | |
| "rewards/correctness_reward_func": 0.5833333432674408, | |
| "rewards/int_reward_func": 0.2187500074505806, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.13002083078026772, | |
| "step": 157, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.014095187118069494, | |
| "grad_norm": 109.121337890625, | |
| "learning_rate": 3e-06, | |
| "loss": 10.5428, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.014184397163120567, | |
| "grad_norm": 94.9039306640625, | |
| "learning_rate": 3e-06, | |
| "loss": 4.8126, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.01427360720817164, | |
| "grad_norm": 109.7251968383789, | |
| "learning_rate": 3e-06, | |
| "loss": 5.2961, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.014362817253222714, | |
| "grad_norm": 103.42703247070312, | |
| "learning_rate": 3e-06, | |
| "loss": 4.0648, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.014452027298273785, | |
| "grad_norm": 127.93770599365234, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0101, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.014541237343324858, | |
| "grad_norm": 145.8150634765625, | |
| "learning_rate": 3e-06, | |
| "loss": 18.3559, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.014630447388375932, | |
| "grad_norm": 116.2653579711914, | |
| "learning_rate": 3e-06, | |
| "loss": 8.3424, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.014719657433427003, | |
| "grad_norm": 104.55130767822266, | |
| "learning_rate": 3e-06, | |
| "loss": 2.084, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.014808867478478076, | |
| "grad_norm": 114.84294128417969, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2571, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.01489807752352915, | |
| "grad_norm": 99.8189468383789, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9219, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.014987287568580221, | |
| "grad_norm": 142.80715942382812, | |
| "learning_rate": 3e-06, | |
| "loss": 3.505, | |
| "step": 168 | |
| }, | |
| { | |
| "completion_length": 214.77083587646484, | |
| "epoch": 0.015076497613631295, | |
| "grad_norm": 114.3720703125, | |
| "learning_rate": 3e-06, | |
| "loss": -32.1329, | |
| "reward": 1.0511458218097687, | |
| "reward_std": 1.0028848350048065, | |
| "rewards/correctness_reward_func": 0.9166666865348816, | |
| "rewards/int_reward_func": 0.3125, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.17802083492279053, | |
| "step": 169, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.015165707658682368, | |
| "grad_norm": 100.3597412109375, | |
| "learning_rate": 3e-06, | |
| "loss": -38.7511, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.015254917703733441, | |
| "grad_norm": 108.30574035644531, | |
| "learning_rate": 3e-06, | |
| "loss": -46.3083, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.015344127748784513, | |
| "grad_norm": 116.34545135498047, | |
| "learning_rate": 3e-06, | |
| "loss": -39.7363, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.015433337793835586, | |
| "grad_norm": 113.52851104736328, | |
| "learning_rate": 3e-06, | |
| "loss": -34.5686, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.01552254783888666, | |
| "grad_norm": 110.65509796142578, | |
| "learning_rate": 3e-06, | |
| "loss": -32.8796, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.01561175788393773, | |
| "grad_norm": 107.06590270996094, | |
| "learning_rate": 3e-06, | |
| "loss": -32.6552, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.015700967928988806, | |
| "grad_norm": 100.2861557006836, | |
| "learning_rate": 3e-06, | |
| "loss": -39.6106, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.015790177974039876, | |
| "grad_norm": 107.69467163085938, | |
| "learning_rate": 3e-06, | |
| "loss": -46.9244, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.01587938801909095, | |
| "grad_norm": 96.8420181274414, | |
| "learning_rate": 3e-06, | |
| "loss": -40.93, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.015968598064142022, | |
| "grad_norm": 113.12389373779297, | |
| "learning_rate": 3e-06, | |
| "loss": -37.0258, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.016057808109193095, | |
| "grad_norm": 116.10971069335938, | |
| "learning_rate": 3e-06, | |
| "loss": -34.9046, | |
| "step": 180 | |
| }, | |
| { | |
| "completion_length": 218.64583587646484, | |
| "epoch": 0.01614701815424417, | |
| "grad_norm": 307.6673889160156, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0457, | |
| "reward": 1.4360832571983337, | |
| "reward_std": 1.0610616505146027, | |
| "rewards/correctness_reward_func": 1.25, | |
| "rewards/int_reward_func": 0.34375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.15766665898263454, | |
| "step": 181, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.016236228199295242, | |
| "grad_norm": 125.99212646484375, | |
| "learning_rate": 3e-06, | |
| "loss": 1.016, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.01632543824434631, | |
| "grad_norm": 112.21588897705078, | |
| "learning_rate": 3e-06, | |
| "loss": -17.115, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.016414648289397385, | |
| "grad_norm": 118.06622314453125, | |
| "learning_rate": 3e-06, | |
| "loss": -0.4864, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.016503858334448458, | |
| "grad_norm": 116.36631774902344, | |
| "learning_rate": 3e-06, | |
| "loss": 3.4437, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.01659306837949953, | |
| "grad_norm": 124.60052490234375, | |
| "learning_rate": 3e-06, | |
| "loss": -27.5515, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.016682278424550605, | |
| "grad_norm": 160.65628051757812, | |
| "learning_rate": 3e-06, | |
| "loss": -29.3863, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.016771488469601678, | |
| "grad_norm": 127.9763412475586, | |
| "learning_rate": 3e-06, | |
| "loss": 0.7423, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.01686069851465275, | |
| "grad_norm": 116.69316101074219, | |
| "learning_rate": 3e-06, | |
| "loss": -18.6518, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.01694990855970382, | |
| "grad_norm": 114.2183609008789, | |
| "learning_rate": 3e-06, | |
| "loss": -0.87, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.017039118604754894, | |
| "grad_norm": 126.1614761352539, | |
| "learning_rate": 3e-06, | |
| "loss": 2.8213, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.017128328649805968, | |
| "grad_norm": 134.43527221679688, | |
| "learning_rate": 3e-06, | |
| "loss": -28.8518, | |
| "step": 192 | |
| }, | |
| { | |
| "completion_length": 247.08334350585938, | |
| "epoch": 0.01721753869485704, | |
| "grad_norm": 111.3412094116211, | |
| "learning_rate": 3e-06, | |
| "loss": -7.8814, | |
| "reward": 1.0157291293144226, | |
| "reward_std": 0.7945153564214706, | |
| "rewards/correctness_reward_func": 0.9166666865348816, | |
| "rewards/int_reward_func": 0.2916666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.1926041767001152, | |
| "step": 193, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.017306748739908114, | |
| "grad_norm": 116.42599487304688, | |
| "learning_rate": 3e-06, | |
| "loss": -15.1245, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.017395958784959187, | |
| "grad_norm": 136.37391662597656, | |
| "learning_rate": 3e-06, | |
| "loss": -9.2426, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.01748516883001026, | |
| "grad_norm": 97.36872863769531, | |
| "learning_rate": 3e-06, | |
| "loss": -10.8671, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.01757437887506133, | |
| "grad_norm": 125.0397720336914, | |
| "learning_rate": 3e-06, | |
| "loss": -7.0755, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.017663588920112404, | |
| "grad_norm": 171.17971801757812, | |
| "learning_rate": 3e-06, | |
| "loss": -16.797, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.017752798965163477, | |
| "grad_norm": 100.81266021728516, | |
| "learning_rate": 3e-06, | |
| "loss": -8.4577, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.01784200901021455, | |
| "grad_norm": 127.79389953613281, | |
| "learning_rate": 3e-06, | |
| "loss": -16.3874, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.017931219055265624, | |
| "grad_norm": 131.9748077392578, | |
| "learning_rate": 3e-06, | |
| "loss": -10.7973, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.018020429100316697, | |
| "grad_norm": 100.95606231689453, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0026, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.018109639145367767, | |
| "grad_norm": 131.19261169433594, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3155, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.01819884919041884, | |
| "grad_norm": 164.74656677246094, | |
| "learning_rate": 3e-06, | |
| "loss": -18.9275, | |
| "step": 204 | |
| }, | |
| { | |
| "completion_length": 191.7291717529297, | |
| "epoch": 0.018288059235469913, | |
| "grad_norm": 150.95191955566406, | |
| "learning_rate": 3e-06, | |
| "loss": 50.6719, | |
| "reward": 1.3118958473205566, | |
| "reward_std": 0.8902758955955505, | |
| "rewards/correctness_reward_func": 0.9999999701976776, | |
| "rewards/int_reward_func": 0.34375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.03185417060740292, | |
| "step": 205, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.018377269280520987, | |
| "grad_norm": 128.34344482421875, | |
| "learning_rate": 3e-06, | |
| "loss": 32.3302, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.01846647932557206, | |
| "grad_norm": 136.15789794921875, | |
| "learning_rate": 3e-06, | |
| "loss": 33.8857, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.018555689370623133, | |
| "grad_norm": 140.50901794433594, | |
| "learning_rate": 3e-06, | |
| "loss": 28.924, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.018644899415674206, | |
| "grad_norm": 168.0647430419922, | |
| "learning_rate": 3e-06, | |
| "loss": 31.3019, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.018734109460725276, | |
| "grad_norm": 133.79208374023438, | |
| "learning_rate": 3e-06, | |
| "loss": 31.6401, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.01882331950577635, | |
| "grad_norm": 161.34898376464844, | |
| "learning_rate": 3e-06, | |
| "loss": 49.2047, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.018912529550827423, | |
| "grad_norm": 129.22007751464844, | |
| "learning_rate": 3e-06, | |
| "loss": 30.2, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.019001739595878496, | |
| "grad_norm": 143.37449645996094, | |
| "learning_rate": 3e-06, | |
| "loss": 31.2762, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.01909094964092957, | |
| "grad_norm": 140.57894897460938, | |
| "learning_rate": 3e-06, | |
| "loss": 26.7715, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.019180159685980643, | |
| "grad_norm": 148.71348571777344, | |
| "learning_rate": 3e-06, | |
| "loss": 28.729, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.019269369731031716, | |
| "grad_norm": 137.0448455810547, | |
| "learning_rate": 3e-06, | |
| "loss": 29.3048, | |
| "step": 216 | |
| }, | |
| { | |
| "completion_length": 241.0, | |
| "epoch": 0.019358579776082786, | |
| "grad_norm": 167.66650390625, | |
| "learning_rate": 3e-06, | |
| "loss": -43.2087, | |
| "reward": 1.6041667461395264, | |
| "reward_std": 0.9945478439331055, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.4270833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.1979166641831398, | |
| "step": 217, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.01944778982113386, | |
| "grad_norm": 168.0265350341797, | |
| "learning_rate": 3e-06, | |
| "loss": -45.6767, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.019536999866184932, | |
| "grad_norm": 148.4340362548828, | |
| "learning_rate": 3e-06, | |
| "loss": -30.5571, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.019626209911236005, | |
| "grad_norm": 139.6564178466797, | |
| "learning_rate": 3e-06, | |
| "loss": -44.7743, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.01971541995628708, | |
| "grad_norm": 147.22129821777344, | |
| "learning_rate": 3e-06, | |
| "loss": -41.9365, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.019804630001338152, | |
| "grad_norm": 190.81561279296875, | |
| "learning_rate": 3e-06, | |
| "loss": -48.2229, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.019893840046389222, | |
| "grad_norm": 165.86917114257812, | |
| "learning_rate": 3e-06, | |
| "loss": -43.317, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.019983050091440295, | |
| "grad_norm": 162.9475555419922, | |
| "learning_rate": 3e-06, | |
| "loss": -48.1878, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.02007226013649137, | |
| "grad_norm": 179.08360290527344, | |
| "learning_rate": 3e-06, | |
| "loss": -33.3052, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.02016147018154244, | |
| "grad_norm": 133.29290771484375, | |
| "learning_rate": 3e-06, | |
| "loss": -45.8993, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.020250680226593515, | |
| "grad_norm": 155.86611938476562, | |
| "learning_rate": 3e-06, | |
| "loss": -43.9261, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.020339890271644588, | |
| "grad_norm": 154.34974670410156, | |
| "learning_rate": 3e-06, | |
| "loss": -50.4381, | |
| "step": 228 | |
| }, | |
| { | |
| "completion_length": 239.43750762939453, | |
| "epoch": 0.02042910031669566, | |
| "grad_norm": 105.5196304321289, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4059, | |
| "reward": 1.5316042304039001, | |
| "reward_std": 0.8583633303642273, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.3541666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.19756250455975533, | |
| "step": 229, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.02051831036174673, | |
| "grad_norm": 119.5712890625, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2594, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.020607520406797804, | |
| "grad_norm": 128.1366424560547, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0605, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.020696730451848878, | |
| "grad_norm": 126.55559539794922, | |
| "learning_rate": 3e-06, | |
| "loss": -15.4799, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.02078594049689995, | |
| "grad_norm": 137.93882751464844, | |
| "learning_rate": 3e-06, | |
| "loss": -18.5312, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.020875150541951024, | |
| "grad_norm": 108.0162124633789, | |
| "learning_rate": 3e-06, | |
| "loss": -11.7573, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.020964360587002098, | |
| "grad_norm": 118.95193481445312, | |
| "learning_rate": 3e-06, | |
| "loss": -1.1434, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.02105357063205317, | |
| "grad_norm": 126.50416564941406, | |
| "learning_rate": 3e-06, | |
| "loss": -3.7423, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.02114278067710424, | |
| "grad_norm": 130.68190002441406, | |
| "learning_rate": 3e-06, | |
| "loss": -14.5207, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.021231990722155314, | |
| "grad_norm": 129.162109375, | |
| "learning_rate": 3e-06, | |
| "loss": -16.3237, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.021321200767206387, | |
| "grad_norm": 145.95396423339844, | |
| "learning_rate": 3e-06, | |
| "loss": -20.6294, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.02141041081225746, | |
| "grad_norm": 107.8385009765625, | |
| "learning_rate": 3e-06, | |
| "loss": -14.0773, | |
| "step": 240 | |
| }, | |
| { | |
| "completion_length": 206.9791717529297, | |
| "epoch": 0.021499620857308534, | |
| "grad_norm": 102.38019561767578, | |
| "learning_rate": 3e-06, | |
| "loss": -32.3486, | |
| "reward": 1.035479187965393, | |
| "reward_std": 0.7589404881000519, | |
| "rewards/correctness_reward_func": 0.8750000149011612, | |
| "rewards/int_reward_func": 0.3020833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.14160417020320892, | |
| "step": 241, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.021588830902359607, | |
| "grad_norm": 110.24679565429688, | |
| "learning_rate": 3e-06, | |
| "loss": -38.6199, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.021678040947410677, | |
| "grad_norm": 118.22930145263672, | |
| "learning_rate": 3e-06, | |
| "loss": -52.9139, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.02176725099246175, | |
| "grad_norm": 118.6080322265625, | |
| "learning_rate": 3e-06, | |
| "loss": -43.3805, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.021856461037512823, | |
| "grad_norm": 106.9905776977539, | |
| "learning_rate": 3e-06, | |
| "loss": -36.7945, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.021945671082563897, | |
| "grad_norm": 111.37010955810547, | |
| "learning_rate": 3e-06, | |
| "loss": -36.4452, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.02203488112761497, | |
| "grad_norm": 104.93065643310547, | |
| "learning_rate": 3e-06, | |
| "loss": -34.2096, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.022124091172666043, | |
| "grad_norm": 117.96737670898438, | |
| "learning_rate": 3e-06, | |
| "loss": -40.621, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.022213301217717116, | |
| "grad_norm": 118.701904296875, | |
| "learning_rate": 3e-06, | |
| "loss": -54.4138, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.022302511262768186, | |
| "grad_norm": 118.43307495117188, | |
| "learning_rate": 3e-06, | |
| "loss": -45.0393, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02239172130781926, | |
| "grad_norm": 114.41901397705078, | |
| "learning_rate": 3e-06, | |
| "loss": -37.6304, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.022480931352870333, | |
| "grad_norm": 123.03970336914062, | |
| "learning_rate": 3e-06, | |
| "loss": -39.0638, | |
| "step": 252 | |
| }, | |
| { | |
| "completion_length": 211.20833587646484, | |
| "epoch": 0.022570141397921406, | |
| "grad_norm": 139.677734375, | |
| "learning_rate": 3e-06, | |
| "loss": -27.6756, | |
| "reward": 1.5570417046546936, | |
| "reward_std": 1.1208258867263794, | |
| "rewards/correctness_reward_func": 1.2916666865348816, | |
| "rewards/int_reward_func": 0.40625, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.14087500423192978, | |
| "step": 253, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.02265935144297248, | |
| "grad_norm": 145.56021118164062, | |
| "learning_rate": 3e-06, | |
| "loss": -40.3289, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.022748561488023553, | |
| "grad_norm": 138.8564453125, | |
| "learning_rate": 3e-06, | |
| "loss": -39.1766, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.022837771533074626, | |
| "grad_norm": 229.50186157226562, | |
| "learning_rate": 3e-06, | |
| "loss": -43.9568, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.022926981578125696, | |
| "grad_norm": 138.42791748046875, | |
| "learning_rate": 3e-06, | |
| "loss": -52.4297, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.02301619162317677, | |
| "grad_norm": 147.58364868164062, | |
| "learning_rate": 3e-06, | |
| "loss": -53.5477, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.023105401668227842, | |
| "grad_norm": 140.5048828125, | |
| "learning_rate": 3e-06, | |
| "loss": -28.1418, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.023194611713278915, | |
| "grad_norm": 139.11508178710938, | |
| "learning_rate": 3e-06, | |
| "loss": -42.7612, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.02328382175832999, | |
| "grad_norm": 146.18580627441406, | |
| "learning_rate": 3e-06, | |
| "loss": -39.909, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.023373031803381062, | |
| "grad_norm": 264.3643493652344, | |
| "learning_rate": 3e-06, | |
| "loss": -46.2595, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.023462241848432132, | |
| "grad_norm": 154.1084747314453, | |
| "learning_rate": 3e-06, | |
| "loss": -55.2424, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.023551451893483205, | |
| "grad_norm": 156.28662109375, | |
| "learning_rate": 3e-06, | |
| "loss": -55.6531, | |
| "step": 264 | |
| }, | |
| { | |
| "completion_length": 227.14583587646484, | |
| "epoch": 0.02364066193853428, | |
| "grad_norm": 115.90379333496094, | |
| "learning_rate": 3e-06, | |
| "loss": -30.7492, | |
| "reward": 1.6066043376922607, | |
| "reward_std": 0.8875448107719421, | |
| "rewards/correctness_reward_func": 1.2916666865348816, | |
| "rewards/int_reward_func": 0.3958333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.010416666977107525, | |
| "rewards/xmlcount_reward_func": -0.09131250530481339, | |
| "step": 265, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.02372987198358535, | |
| "grad_norm": 113.86587524414062, | |
| "learning_rate": 3e-06, | |
| "loss": -29.6989, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.023819082028636425, | |
| "grad_norm": 110.4273681640625, | |
| "learning_rate": 3e-06, | |
| "loss": -32.0614, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.023908292073687498, | |
| "grad_norm": 111.84119415283203, | |
| "learning_rate": 3e-06, | |
| "loss": -31.9073, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.02399750211873857, | |
| "grad_norm": 103.93081665039062, | |
| "learning_rate": 3e-06, | |
| "loss": -22.3506, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.02408671216378964, | |
| "grad_norm": 120.32383728027344, | |
| "learning_rate": 3e-06, | |
| "loss": -28.5629, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.024175922208840715, | |
| "grad_norm": 124.92536163330078, | |
| "learning_rate": 3e-06, | |
| "loss": -33.0776, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.024265132253891788, | |
| "grad_norm": 119.54340362548828, | |
| "learning_rate": 3e-06, | |
| "loss": -31.6735, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.02435434229894286, | |
| "grad_norm": 128.8444061279297, | |
| "learning_rate": 3e-06, | |
| "loss": -33.8033, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.024443552343993934, | |
| "grad_norm": 123.08969116210938, | |
| "learning_rate": 3e-06, | |
| "loss": -34.4538, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.024532762389045008, | |
| "grad_norm": 111.98983001708984, | |
| "learning_rate": 3e-06, | |
| "loss": -24.6449, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.02462197243409608, | |
| "grad_norm": 123.31842041015625, | |
| "learning_rate": 3e-06, | |
| "loss": -31.1417, | |
| "step": 276 | |
| }, | |
| { | |
| "completion_length": 211.20833587646484, | |
| "epoch": 0.02471118247914715, | |
| "grad_norm": 83.42295837402344, | |
| "learning_rate": 3e-06, | |
| "loss": -49.3896, | |
| "reward": 1.6565834283828735, | |
| "reward_std": 0.7390342950820923, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.4166666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.13508333638310432, | |
| "step": 277, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.024800392524198224, | |
| "grad_norm": 78.70240783691406, | |
| "learning_rate": 3e-06, | |
| "loss": -60.2538, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.024889602569249297, | |
| "grad_norm": 87.03772735595703, | |
| "learning_rate": 3e-06, | |
| "loss": -54.701, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.02497881261430037, | |
| "grad_norm": 105.03215789794922, | |
| "learning_rate": 3e-06, | |
| "loss": -50.647, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.025068022659351444, | |
| "grad_norm": 94.19722747802734, | |
| "learning_rate": 3e-06, | |
| "loss": -53.7356, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.025157232704402517, | |
| "grad_norm": 71.46943664550781, | |
| "learning_rate": 3e-06, | |
| "loss": -54.5847, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.025246442749453587, | |
| "grad_norm": 90.4788589477539, | |
| "learning_rate": 3e-06, | |
| "loss": -50.5539, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.02533565279450466, | |
| "grad_norm": 74.81779479980469, | |
| "learning_rate": 3e-06, | |
| "loss": -61.3813, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.025424862839555733, | |
| "grad_norm": 85.80409240722656, | |
| "learning_rate": 3e-06, | |
| "loss": -55.7379, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.025514072884606807, | |
| "grad_norm": 135.24191284179688, | |
| "learning_rate": 3e-06, | |
| "loss": -52.1614, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.02560328292965788, | |
| "grad_norm": 94.01042175292969, | |
| "learning_rate": 3e-06, | |
| "loss": -55.4857, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.025692492974708953, | |
| "grad_norm": 72.32071685791016, | |
| "learning_rate": 3e-06, | |
| "loss": -56.3565, | |
| "step": 288 | |
| }, | |
| { | |
| "completion_length": 221.45833587646484, | |
| "epoch": 0.025781703019760027, | |
| "grad_norm": 225.52276611328125, | |
| "learning_rate": 3e-06, | |
| "loss": -67.7081, | |
| "reward": 1.960687518119812, | |
| "reward_std": 0.8211362063884735, | |
| "rewards/correctness_reward_func": 1.625, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.11222916468977928, | |
| "step": 289, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.025870913064811096, | |
| "grad_norm": 239.94651794433594, | |
| "learning_rate": 3e-06, | |
| "loss": -66.0587, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.02596012310986217, | |
| "grad_norm": 173.2037353515625, | |
| "learning_rate": 3e-06, | |
| "loss": -59.416, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.026049333154913243, | |
| "grad_norm": 228.50621032714844, | |
| "learning_rate": 3e-06, | |
| "loss": -70.6059, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.026138543199964316, | |
| "grad_norm": 213.36802673339844, | |
| "learning_rate": 3e-06, | |
| "loss": -68.8733, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.02622775324501539, | |
| "grad_norm": 389.8759460449219, | |
| "learning_rate": 3e-06, | |
| "loss": -108.725, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.026316963290066463, | |
| "grad_norm": 241.96009826660156, | |
| "learning_rate": 3e-06, | |
| "loss": -73.0107, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.026406173335117536, | |
| "grad_norm": 282.705322265625, | |
| "learning_rate": 3e-06, | |
| "loss": -71.4601, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.026495383380168606, | |
| "grad_norm": 182.99859619140625, | |
| "learning_rate": 3e-06, | |
| "loss": -62.8503, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.02658459342521968, | |
| "grad_norm": 237.8432159423828, | |
| "learning_rate": 3e-06, | |
| "loss": -76.095, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.026673803470270752, | |
| "grad_norm": 224.10140991210938, | |
| "learning_rate": 3e-06, | |
| "loss": -71.9696, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.026763013515321826, | |
| "grad_norm": 401.25421142578125, | |
| "learning_rate": 3e-06, | |
| "loss": -119.7468, | |
| "step": 300 | |
| }, | |
| { | |
| "completion_length": 181.00000762939453, | |
| "epoch": 0.0268522235603729, | |
| "grad_norm": 102.42095184326172, | |
| "learning_rate": 3e-06, | |
| "loss": 11.7663, | |
| "reward": 1.6927291750907898, | |
| "reward_std": 0.8399400115013123, | |
| "rewards/correctness_reward_func": 1.3333333730697632, | |
| "rewards/int_reward_func": 0.40625, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.0468541644513607, | |
| "step": 301, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.026941433605423972, | |
| "grad_norm": 97.59688568115234, | |
| "learning_rate": 3e-06, | |
| "loss": 4.3671, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.027030643650475042, | |
| "grad_norm": 119.19691467285156, | |
| "learning_rate": 3e-06, | |
| "loss": 3.0122, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.027119853695526115, | |
| "grad_norm": 102.54327392578125, | |
| "learning_rate": 3e-06, | |
| "loss": 5.7653, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.02720906374057719, | |
| "grad_norm": 127.24678802490234, | |
| "learning_rate": 3e-06, | |
| "loss": 9.0808, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.02729827378562826, | |
| "grad_norm": 115.35128784179688, | |
| "learning_rate": 3e-06, | |
| "loss": 9.7375, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.027387483830679335, | |
| "grad_norm": 109.96597290039062, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9794, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.02747669387573041, | |
| "grad_norm": 116.67013549804688, | |
| "learning_rate": 3e-06, | |
| "loss": 4.0605, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.02756590392078148, | |
| "grad_norm": 100.0082015991211, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0719, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.02765511396583255, | |
| "grad_norm": 103.2455062866211, | |
| "learning_rate": 3e-06, | |
| "loss": 3.753, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.027744324010883625, | |
| "grad_norm": 139.74317932128906, | |
| "learning_rate": 3e-06, | |
| "loss": 7.435, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.027833534055934698, | |
| "grad_norm": 126.05006408691406, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1927, | |
| "step": 312 | |
| }, | |
| { | |
| "completion_length": 187.87500762939453, | |
| "epoch": 0.02792274410098577, | |
| "grad_norm": 182.04180908203125, | |
| "learning_rate": 3e-06, | |
| "loss": -90.2214, | |
| "reward": 1.3959375023841858, | |
| "reward_std": 0.7920421957969666, | |
| "rewards/correctness_reward_func": 1.0833333432674408, | |
| "rewards/int_reward_func": 0.40625, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.09364583343267441, | |
| "step": 313, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.028011954146036844, | |
| "grad_norm": 238.57090759277344, | |
| "learning_rate": 3e-06, | |
| "loss": -100.422, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.028101164191087918, | |
| "grad_norm": 235.39544677734375, | |
| "learning_rate": 3e-06, | |
| "loss": -102.3354, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.028190374236138988, | |
| "grad_norm": 223.8190460205078, | |
| "learning_rate": 3e-06, | |
| "loss": -109.9957, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.02827958428119006, | |
| "grad_norm": 225.00672912597656, | |
| "learning_rate": 3e-06, | |
| "loss": -109.9375, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.028368794326241134, | |
| "grad_norm": 247.57774353027344, | |
| "learning_rate": 3e-06, | |
| "loss": -125.4302, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.028458004371292207, | |
| "grad_norm": 193.24212646484375, | |
| "learning_rate": 3e-06, | |
| "loss": -93.1797, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.02854721441634328, | |
| "grad_norm": 264.4795227050781, | |
| "learning_rate": 3e-06, | |
| "loss": -104.7149, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.028636424461394354, | |
| "grad_norm": 226.05810546875, | |
| "learning_rate": 3e-06, | |
| "loss": -107.5763, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.028725634506445427, | |
| "grad_norm": 239.6378173828125, | |
| "learning_rate": 3e-06, | |
| "loss": -115.9954, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.028814844551496497, | |
| "grad_norm": 240.8443145751953, | |
| "learning_rate": 3e-06, | |
| "loss": -117.6999, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.02890405459654757, | |
| "grad_norm": 261.65643310546875, | |
| "learning_rate": 3e-06, | |
| "loss": -132.8027, | |
| "step": 324 | |
| }, | |
| { | |
| "completion_length": 189.8541717529297, | |
| "epoch": 0.028993264641598643, | |
| "grad_norm": 168.10452270507812, | |
| "learning_rate": 3e-06, | |
| "loss": 12.7588, | |
| "reward": 1.7772499918937683, | |
| "reward_std": 0.9346717596054077, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.4479166567325592, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.045666664838790894, | |
| "step": 325, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.029082474686649717, | |
| "grad_norm": 182.863037109375, | |
| "learning_rate": 3e-06, | |
| "loss": 0.3851, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.02917168473170079, | |
| "grad_norm": 214.54574584960938, | |
| "learning_rate": 3e-06, | |
| "loss": 17.6492, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.029260894776751863, | |
| "grad_norm": 187.80931091308594, | |
| "learning_rate": 3e-06, | |
| "loss": 7.692, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.029350104821802937, | |
| "grad_norm": 195.0843505859375, | |
| "learning_rate": 3e-06, | |
| "loss": 12.8044, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.029439314866854006, | |
| "grad_norm": 168.82028198242188, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5147, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.02952852491190508, | |
| "grad_norm": 176.14859008789062, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9965, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.029617734956956153, | |
| "grad_norm": 202.02247619628906, | |
| "learning_rate": 3e-06, | |
| "loss": -1.8138, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.029706945002007226, | |
| "grad_norm": 216.37252807617188, | |
| "learning_rate": 3e-06, | |
| "loss": 15.5179, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.0297961550470583, | |
| "grad_norm": 200.23558044433594, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0549, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.029885365092109373, | |
| "grad_norm": 177.7020263671875, | |
| "learning_rate": 3e-06, | |
| "loss": 11.199, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.029974575137160443, | |
| "grad_norm": 170.23106384277344, | |
| "learning_rate": 3e-06, | |
| "loss": -10.9367, | |
| "step": 336 | |
| }, | |
| { | |
| "completion_length": 224.9166717529297, | |
| "epoch": 0.030063785182211516, | |
| "grad_norm": 127.8658218383789, | |
| "learning_rate": 3e-06, | |
| "loss": -59.3587, | |
| "reward": 1.6053959131240845, | |
| "reward_std": 0.5731277614831924, | |
| "rewards/correctness_reward_func": 1.3333333134651184, | |
| "rewards/int_reward_func": 0.4270833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.1550208330154419, | |
| "step": 337, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03015299522726259, | |
| "grad_norm": 133.41494750976562, | |
| "learning_rate": 3e-06, | |
| "loss": -53.4241, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.030242205272313662, | |
| "grad_norm": 170.7308807373047, | |
| "learning_rate": 3e-06, | |
| "loss": -65.4722, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.030331415317364736, | |
| "grad_norm": 172.28118896484375, | |
| "learning_rate": 3e-06, | |
| "loss": -53.201, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.03042062536241581, | |
| "grad_norm": 118.70462799072266, | |
| "learning_rate": 3e-06, | |
| "loss": -50.8363, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.030509835407466882, | |
| "grad_norm": 143.119384765625, | |
| "learning_rate": 3e-06, | |
| "loss": -60.764, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.030599045452517952, | |
| "grad_norm": 143.7277374267578, | |
| "learning_rate": 3e-06, | |
| "loss": -62.8186, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.030688255497569025, | |
| "grad_norm": 157.5625, | |
| "learning_rate": 3e-06, | |
| "loss": -57.4741, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.0307774655426201, | |
| "grad_norm": 191.64804077148438, | |
| "learning_rate": 3e-06, | |
| "loss": -71.2662, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.030866675587671172, | |
| "grad_norm": 206.0039520263672, | |
| "learning_rate": 3e-06, | |
| "loss": -56.9883, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.030955885632722245, | |
| "grad_norm": 132.1703643798828, | |
| "learning_rate": 3e-06, | |
| "loss": -54.0822, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.03104509567777332, | |
| "grad_norm": 144.338623046875, | |
| "learning_rate": 3e-06, | |
| "loss": -66.1545, | |
| "step": 348 | |
| }, | |
| { | |
| "completion_length": 161.1041717529297, | |
| "epoch": 0.03113430572282439, | |
| "grad_norm": 176.29396057128906, | |
| "learning_rate": 3e-06, | |
| "loss": 2.568, | |
| "reward": 1.8858751058578491, | |
| "reward_std": 0.5198497474193573, | |
| "rewards/correctness_reward_func": 1.4583333134651184, | |
| "rewards/int_reward_func": 0.40625, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.021291667595505714, | |
| "step": 349, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03122351576787546, | |
| "grad_norm": 162.1043701171875, | |
| "learning_rate": 3e-06, | |
| "loss": 11.6461, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.03131272581292654, | |
| "grad_norm": 147.42918395996094, | |
| "learning_rate": 3e-06, | |
| "loss": 19.3425, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.03140193585797761, | |
| "grad_norm": 146.65992736816406, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1848, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.03149114590302868, | |
| "grad_norm": 137.17474365234375, | |
| "learning_rate": 3e-06, | |
| "loss": 9.5202, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.03158035594807975, | |
| "grad_norm": 176.24244689941406, | |
| "learning_rate": 3e-06, | |
| "loss": 24.3653, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.031669565993130824, | |
| "grad_norm": 176.59144592285156, | |
| "learning_rate": 3e-06, | |
| "loss": 1.4274, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.0317587760381819, | |
| "grad_norm": 161.0966339111328, | |
| "learning_rate": 3e-06, | |
| "loss": 10.7367, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.03184798608323297, | |
| "grad_norm": 164.76675415039062, | |
| "learning_rate": 3e-06, | |
| "loss": 18.5651, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.031937196128284044, | |
| "grad_norm": 227.15631103515625, | |
| "learning_rate": 3e-06, | |
| "loss": 2.7137, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.03202640617333512, | |
| "grad_norm": 152.36514282226562, | |
| "learning_rate": 3e-06, | |
| "loss": 8.0471, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.03211561621838619, | |
| "grad_norm": 162.82603454589844, | |
| "learning_rate": 3e-06, | |
| "loss": 24.2858, | |
| "step": 360 | |
| }, | |
| { | |
| "completion_length": 173.6666717529297, | |
| "epoch": 0.032204826263437264, | |
| "grad_norm": 112.11152648925781, | |
| "learning_rate": 3e-06, | |
| "loss": -47.6777, | |
| "reward": 1.9486668109893799, | |
| "reward_std": 0.614804282784462, | |
| "rewards/correctness_reward_func": 1.5, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": -0.04091667756438255, | |
| "step": 361, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03229403630848834, | |
| "grad_norm": 212.41893005371094, | |
| "learning_rate": 3e-06, | |
| "loss": -57.8799, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.03238324635353941, | |
| "grad_norm": 129.3856201171875, | |
| "learning_rate": 3e-06, | |
| "loss": -42.8209, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.032472456398590484, | |
| "grad_norm": 90.02410888671875, | |
| "learning_rate": 3e-06, | |
| "loss": -37.0157, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.03256166644364156, | |
| "grad_norm": 122.2002944946289, | |
| "learning_rate": 3e-06, | |
| "loss": -42.0898, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.03265087648869262, | |
| "grad_norm": 114.75045776367188, | |
| "learning_rate": 3e-06, | |
| "loss": -37.9465, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.0327400865337437, | |
| "grad_norm": 117.86136627197266, | |
| "learning_rate": 3e-06, | |
| "loss": -50.0162, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.03282929657879477, | |
| "grad_norm": 224.55755615234375, | |
| "learning_rate": 3e-06, | |
| "loss": -62.4077, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.03291850662384584, | |
| "grad_norm": 145.33380126953125, | |
| "learning_rate": 3e-06, | |
| "loss": -45.7888, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.033007716668896916, | |
| "grad_norm": 107.85284423828125, | |
| "learning_rate": 3e-06, | |
| "loss": -38.8828, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.03309692671394799, | |
| "grad_norm": 143.64854431152344, | |
| "learning_rate": 3e-06, | |
| "loss": -44.4827, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.03318613675899906, | |
| "grad_norm": 120.4244155883789, | |
| "learning_rate": 3e-06, | |
| "loss": -40.0586, | |
| "step": 372 | |
| }, | |
| { | |
| "completion_length": 170.3125, | |
| "epoch": 0.033275346804050136, | |
| "grad_norm": 199.8765869140625, | |
| "learning_rate": 3e-06, | |
| "loss": -16.0424, | |
| "reward": 1.5020000338554382, | |
| "reward_std": 0.6375356912612915, | |
| "rewards/correctness_reward_func": 1.0416666865348816, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.012416664510965347, | |
| "step": 373, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03336455684910121, | |
| "grad_norm": 186.22103881835938, | |
| "learning_rate": 3e-06, | |
| "loss": -12.8464, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.03345376689415228, | |
| "grad_norm": 133.84971618652344, | |
| "learning_rate": 3e-06, | |
| "loss": -16.0581, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.033542976939203356, | |
| "grad_norm": 124.62361145019531, | |
| "learning_rate": 3e-06, | |
| "loss": -20.0662, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.03363218698425443, | |
| "grad_norm": 177.62574768066406, | |
| "learning_rate": 3e-06, | |
| "loss": -16.7656, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.0337213970293055, | |
| "grad_norm": 162.92381286621094, | |
| "learning_rate": 3e-06, | |
| "loss": -19.1936, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.033810607074356576, | |
| "grad_norm": 167.49449157714844, | |
| "learning_rate": 3e-06, | |
| "loss": -16.9396, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.03389981711940764, | |
| "grad_norm": 180.7197723388672, | |
| "learning_rate": 3e-06, | |
| "loss": -14.2949, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.033989027164458716, | |
| "grad_norm": 158.6161346435547, | |
| "learning_rate": 3e-06, | |
| "loss": -18.1083, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.03407823720950979, | |
| "grad_norm": 136.7860870361328, | |
| "learning_rate": 3e-06, | |
| "loss": -21.9306, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.03416744725456086, | |
| "grad_norm": 200.51185607910156, | |
| "learning_rate": 3e-06, | |
| "loss": -18.8755, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.034256657299611935, | |
| "grad_norm": 174.00477600097656, | |
| "learning_rate": 3e-06, | |
| "loss": -21.6131, | |
| "step": 384 | |
| }, | |
| { | |
| "completion_length": 168.0416717529297, | |
| "epoch": 0.03434586734466301, | |
| "grad_norm": 270.6558837890625, | |
| "learning_rate": 3e-06, | |
| "loss": -108.9543, | |
| "reward": 1.5282500386238098, | |
| "reward_std": 0.8121029734611511, | |
| "rewards/correctness_reward_func": 1.0416666567325592, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.03866666788235307, | |
| "step": 385, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03443507738971408, | |
| "grad_norm": 316.0130615234375, | |
| "learning_rate": 3e-06, | |
| "loss": -121.254, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.034524287434765155, | |
| "grad_norm": 257.7696228027344, | |
| "learning_rate": 3e-06, | |
| "loss": -102.8911, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.03461349747981623, | |
| "grad_norm": 305.755126953125, | |
| "learning_rate": 3e-06, | |
| "loss": -114.5659, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.0347027075248673, | |
| "grad_norm": 219.5818328857422, | |
| "learning_rate": 3e-06, | |
| "loss": -114.7208, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.034791917569918375, | |
| "grad_norm": 255.7522430419922, | |
| "learning_rate": 3e-06, | |
| "loss": -109.7934, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.03488112761496945, | |
| "grad_norm": 291.77642822265625, | |
| "learning_rate": 3e-06, | |
| "loss": -116.5826, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.03497033766002052, | |
| "grad_norm": 333.5157165527344, | |
| "learning_rate": 3e-06, | |
| "loss": -132.3897, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.03505954770507159, | |
| "grad_norm": 267.8763122558594, | |
| "learning_rate": 3e-06, | |
| "loss": -110.8159, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.03514875775012266, | |
| "grad_norm": 312.3733215332031, | |
| "learning_rate": 3e-06, | |
| "loss": -127.8078, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.035237967795173734, | |
| "grad_norm": 242.0186309814453, | |
| "learning_rate": 3e-06, | |
| "loss": -123.3703, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.03532717784022481, | |
| "grad_norm": 297.62847900390625, | |
| "learning_rate": 3e-06, | |
| "loss": -120.3945, | |
| "step": 396 | |
| }, | |
| { | |
| "completion_length": 171.45833587646484, | |
| "epoch": 0.03541638788527588, | |
| "grad_norm": 192.1062774658203, | |
| "learning_rate": 3e-06, | |
| "loss": -81.6189, | |
| "reward": 1.9622292518615723, | |
| "reward_std": 0.8264816105365753, | |
| "rewards/correctness_reward_func": 1.4583333134651184, | |
| "rewards/int_reward_func": 0.46875, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0351458303630352, | |
| "step": 397, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.035505597930326954, | |
| "grad_norm": 202.8217010498047, | |
| "learning_rate": 3e-06, | |
| "loss": -67.341, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.03559480797537803, | |
| "grad_norm": 218.82249450683594, | |
| "learning_rate": 3e-06, | |
| "loss": -65.3347, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.0356840180204291, | |
| "grad_norm": 232.65196228027344, | |
| "learning_rate": 3e-06, | |
| "loss": -76.8989, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.035773228065480174, | |
| "grad_norm": 198.04103088378906, | |
| "learning_rate": 3e-06, | |
| "loss": -74.488, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.03586243811053125, | |
| "grad_norm": 211.86273193359375, | |
| "learning_rate": 3e-06, | |
| "loss": -75.6096, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.03595164815558232, | |
| "grad_norm": 218.77589416503906, | |
| "learning_rate": 3e-06, | |
| "loss": -87.9174, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.036040858200633394, | |
| "grad_norm": 243.4962615966797, | |
| "learning_rate": 3e-06, | |
| "loss": -70.1026, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.03613006824568447, | |
| "grad_norm": 242.30494689941406, | |
| "learning_rate": 3e-06, | |
| "loss": -71.4579, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.03621927829073553, | |
| "grad_norm": 274.28948974609375, | |
| "learning_rate": 3e-06, | |
| "loss": -83.8044, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.03630848833578661, | |
| "grad_norm": 257.0942077636719, | |
| "learning_rate": 3e-06, | |
| "loss": -82.445, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.03639769838083768, | |
| "grad_norm": 255.2320556640625, | |
| "learning_rate": 3e-06, | |
| "loss": -81.9402, | |
| "step": 408 | |
| }, | |
| { | |
| "completion_length": 125.70833587646484, | |
| "epoch": 0.03648690842588875, | |
| "grad_norm": 185.7193145751953, | |
| "learning_rate": 3e-06, | |
| "loss": 46.4742, | |
| "reward": 1.8683959245681763, | |
| "reward_std": 0.8172085583209991, | |
| "rewards/correctness_reward_func": 1.3333333730697632, | |
| "rewards/int_reward_func": 0.4270833432674408, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.10797916725277901, | |
| "step": 409, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03657611847093983, | |
| "grad_norm": 218.23338317871094, | |
| "learning_rate": 3e-06, | |
| "loss": 29.3517, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.0366653285159909, | |
| "grad_norm": 180.90330505371094, | |
| "learning_rate": 3e-06, | |
| "loss": 55.3162, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.03675453856104197, | |
| "grad_norm": 216.37953186035156, | |
| "learning_rate": 3e-06, | |
| "loss": 50.2096, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.036843748606093046, | |
| "grad_norm": 198.8724822998047, | |
| "learning_rate": 3e-06, | |
| "loss": 51.636, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.03693295865114412, | |
| "grad_norm": 184.89627075195312, | |
| "learning_rate": 3e-06, | |
| "loss": 44.8369, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.03702216869619519, | |
| "grad_norm": 167.6713104248047, | |
| "learning_rate": 3e-06, | |
| "loss": 44.1546, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.037111378741246266, | |
| "grad_norm": 192.13140869140625, | |
| "learning_rate": 3e-06, | |
| "loss": 27.4686, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.03720058878629734, | |
| "grad_norm": 177.4408721923828, | |
| "learning_rate": 3e-06, | |
| "loss": 53.37, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.03728979883134841, | |
| "grad_norm": 223.81668090820312, | |
| "learning_rate": 3e-06, | |
| "loss": 45.2759, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.037379008876399486, | |
| "grad_norm": 207.5684356689453, | |
| "learning_rate": 3e-06, | |
| "loss": 46.924, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.03746821892145055, | |
| "grad_norm": 180.81484985351562, | |
| "learning_rate": 3e-06, | |
| "loss": 42.0928, | |
| "step": 420 | |
| }, | |
| { | |
| "completion_length": 153.89583587646484, | |
| "epoch": 0.037557428966501626, | |
| "grad_norm": 247.00067138671875, | |
| "learning_rate": 3e-06, | |
| "loss": 14.4654, | |
| "reward": 1.776770830154419, | |
| "reward_std": 0.6972799003124237, | |
| "rewards/correctness_reward_func": 1.25, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.07885416969656944, | |
| "step": 421, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0376466390115527, | |
| "grad_norm": 244.66824340820312, | |
| "learning_rate": 3e-06, | |
| "loss": 34.9601, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.03773584905660377, | |
| "grad_norm": 276.21539306640625, | |
| "learning_rate": 3e-06, | |
| "loss": 14.6261, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.037825059101654845, | |
| "grad_norm": 288.96246337890625, | |
| "learning_rate": 3e-06, | |
| "loss": 41.9368, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.03791426914670592, | |
| "grad_norm": 303.6945495605469, | |
| "learning_rate": 3e-06, | |
| "loss": 23.9119, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.03800347919175699, | |
| "grad_norm": 274.27142333984375, | |
| "learning_rate": 3e-06, | |
| "loss": 27.399, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.038092689236808065, | |
| "grad_norm": 233.245361328125, | |
| "learning_rate": 3e-06, | |
| "loss": 10.175, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.03818189928185914, | |
| "grad_norm": 256.8597412109375, | |
| "learning_rate": 3e-06, | |
| "loss": 31.9908, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.03827110932691021, | |
| "grad_norm": 270.4859619140625, | |
| "learning_rate": 3e-06, | |
| "loss": 10.7867, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.038360319371961285, | |
| "grad_norm": 301.17181396484375, | |
| "learning_rate": 3e-06, | |
| "loss": 40.6524, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.03844952941701236, | |
| "grad_norm": 303.94488525390625, | |
| "learning_rate": 3e-06, | |
| "loss": 21.1706, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.03853873946206343, | |
| "grad_norm": 258.3034973144531, | |
| "learning_rate": 3e-06, | |
| "loss": 22.7622, | |
| "step": 432 | |
| }, | |
| { | |
| "completion_length": 107.54166793823242, | |
| "epoch": 0.0386279495071145, | |
| "grad_norm": 134.02154541015625, | |
| "learning_rate": 3e-06, | |
| "loss": -22.4999, | |
| "reward": 2.2951666712760925, | |
| "reward_std": 0.38126008585095406, | |
| "rewards/correctness_reward_func": 1.5833333134651184, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.21183334290981293, | |
| "step": 433, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.03871715955216557, | |
| "grad_norm": 191.98023986816406, | |
| "learning_rate": 3e-06, | |
| "loss": -14.0492, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.038806369597216644, | |
| "grad_norm": 154.30328369140625, | |
| "learning_rate": 3e-06, | |
| "loss": -10.9444, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.03889557964226772, | |
| "grad_norm": 134.01214599609375, | |
| "learning_rate": 3e-06, | |
| "loss": -17.7574, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.03898478968731879, | |
| "grad_norm": 132.3379364013672, | |
| "learning_rate": 3e-06, | |
| "loss": -18.487, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.039073999732369864, | |
| "grad_norm": 146.31573486328125, | |
| "learning_rate": 3e-06, | |
| "loss": -12.7164, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.03916320977742094, | |
| "grad_norm": 136.05592346191406, | |
| "learning_rate": 3e-06, | |
| "loss": -23.1439, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.03925241982247201, | |
| "grad_norm": 138.1117706298828, | |
| "learning_rate": 3e-06, | |
| "loss": -15.7255, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.039341629867523084, | |
| "grad_norm": 166.34922790527344, | |
| "learning_rate": 3e-06, | |
| "loss": -12.6375, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.03943083991257416, | |
| "grad_norm": 132.994140625, | |
| "learning_rate": 3e-06, | |
| "loss": -20.1431, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.03952004995762523, | |
| "grad_norm": 129.54771423339844, | |
| "learning_rate": 3e-06, | |
| "loss": -19.3478, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.039609260002676304, | |
| "grad_norm": 152.91607666015625, | |
| "learning_rate": 3e-06, | |
| "loss": -14.3018, | |
| "step": 444 | |
| }, | |
| { | |
| "completion_length": 160.7916717529297, | |
| "epoch": 0.03969847004772738, | |
| "grad_norm": 165.9615020751953, | |
| "learning_rate": 3e-06, | |
| "loss": 38.8687, | |
| "reward": 1.6081042885780334, | |
| "reward_std": 0.45602357387542725, | |
| "rewards/correctness_reward_func": 1.0833333432674408, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0768541656434536, | |
| "step": 445, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.039787680092778444, | |
| "grad_norm": 165.71353149414062, | |
| "learning_rate": 3e-06, | |
| "loss": 32.8818, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.03987689013782952, | |
| "grad_norm": 185.27174377441406, | |
| "learning_rate": 3e-06, | |
| "loss": 29.8206, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.03996610018288059, | |
| "grad_norm": 163.79954528808594, | |
| "learning_rate": 3e-06, | |
| "loss": 28.2399, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.04005531022793166, | |
| "grad_norm": 167.2331085205078, | |
| "learning_rate": 3e-06, | |
| "loss": 42.2504, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.04014452027298274, | |
| "grad_norm": 157.44320678710938, | |
| "learning_rate": 3e-06, | |
| "loss": 47.0631, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.04023373031803381, | |
| "grad_norm": 167.7976837158203, | |
| "learning_rate": 3e-06, | |
| "loss": 37.7299, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.04032294036308488, | |
| "grad_norm": 171.96420288085938, | |
| "learning_rate": 3e-06, | |
| "loss": 31.7018, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.040412150408135956, | |
| "grad_norm": 164.95046997070312, | |
| "learning_rate": 3e-06, | |
| "loss": 28.9306, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.04050136045318703, | |
| "grad_norm": 146.903076171875, | |
| "learning_rate": 3e-06, | |
| "loss": 26.551, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.0405905704982381, | |
| "grad_norm": 182.6881561279297, | |
| "learning_rate": 3e-06, | |
| "loss": 41.0788, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.040679780543289176, | |
| "grad_norm": 147.5907440185547, | |
| "learning_rate": 3e-06, | |
| "loss": 44.0529, | |
| "step": 456 | |
| }, | |
| { | |
| "completion_length": 143.8541717529297, | |
| "epoch": 0.04076899058834025, | |
| "grad_norm": 252.46615600585938, | |
| "learning_rate": 3e-06, | |
| "loss": -60.641, | |
| "reward": 1.8890208005905151, | |
| "reward_std": 0.4024546667933464, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.4166666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.09735416248440742, | |
| "step": 457, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04085820063339132, | |
| "grad_norm": 223.07171630859375, | |
| "learning_rate": 3e-06, | |
| "loss": -42.0093, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.040947410678442396, | |
| "grad_norm": 237.3083953857422, | |
| "learning_rate": 3e-06, | |
| "loss": -57.9657, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.04103662072349346, | |
| "grad_norm": 225.29269409179688, | |
| "learning_rate": 3e-06, | |
| "loss": -41.8661, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.041125830768544536, | |
| "grad_norm": 210.0297088623047, | |
| "learning_rate": 3e-06, | |
| "loss": -47.2305, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.04121504081359561, | |
| "grad_norm": 262.04473876953125, | |
| "learning_rate": 3e-06, | |
| "loss": -50.9342, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.04130425085864668, | |
| "grad_norm": 252.53802490234375, | |
| "learning_rate": 3e-06, | |
| "loss": -62.6208, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.041393460903697755, | |
| "grad_norm": 221.40121459960938, | |
| "learning_rate": 3e-06, | |
| "loss": -43.7282, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.04148267094874883, | |
| "grad_norm": 231.59335327148438, | |
| "learning_rate": 3e-06, | |
| "loss": -60.5224, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.0415718809937999, | |
| "grad_norm": 207.73471069335938, | |
| "learning_rate": 3e-06, | |
| "loss": -44.5223, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.041661091038850975, | |
| "grad_norm": 217.08779907226562, | |
| "learning_rate": 3e-06, | |
| "loss": -50.6021, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.04175030108390205, | |
| "grad_norm": 254.29269409179688, | |
| "learning_rate": 3e-06, | |
| "loss": -55.4887, | |
| "step": 468 | |
| }, | |
| { | |
| "completion_length": 122.50000381469727, | |
| "epoch": 0.04183951112895312, | |
| "grad_norm": 147.28335571289062, | |
| "learning_rate": 3e-06, | |
| "loss": 12.9864, | |
| "reward": 2.310395896434784, | |
| "reward_std": 0.44204503297805786, | |
| "rewards/correctness_reward_func": 1.6666666269302368, | |
| "rewards/int_reward_func": 0.46875, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.010416666977107525, | |
| "rewards/xmlcount_reward_func": 0.16456249356269836, | |
| "step": 469, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.041928721174004195, | |
| "grad_norm": 161.9978485107422, | |
| "learning_rate": 3e-06, | |
| "loss": 34.7546, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.04201793121905527, | |
| "grad_norm": 165.3116455078125, | |
| "learning_rate": 3e-06, | |
| "loss": 17.4188, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.04210714126410634, | |
| "grad_norm": 142.81861877441406, | |
| "learning_rate": 3e-06, | |
| "loss": 18.3006, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.04219635130915741, | |
| "grad_norm": 168.01116943359375, | |
| "learning_rate": 3e-06, | |
| "loss": 17.6413, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.04228556135420848, | |
| "grad_norm": 207.03326416015625, | |
| "learning_rate": 3e-06, | |
| "loss": 15.9259, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.042374771399259555, | |
| "grad_norm": 141.62599182128906, | |
| "learning_rate": 3e-06, | |
| "loss": 12.4355, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.04246398144431063, | |
| "grad_norm": 180.9537353515625, | |
| "learning_rate": 3e-06, | |
| "loss": 32.9299, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.0425531914893617, | |
| "grad_norm": 163.92254638671875, | |
| "learning_rate": 3e-06, | |
| "loss": 17.1089, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.042642401534412774, | |
| "grad_norm": 145.9250030517578, | |
| "learning_rate": 3e-06, | |
| "loss": 17.069, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.04273161157946385, | |
| "grad_norm": 152.68319702148438, | |
| "learning_rate": 3e-06, | |
| "loss": 16.6025, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.04282082162451492, | |
| "grad_norm": 222.65997314453125, | |
| "learning_rate": 3e-06, | |
| "loss": 14.2435, | |
| "step": 480 | |
| }, | |
| { | |
| "completion_length": 116.66666793823242, | |
| "epoch": 0.042910031669565994, | |
| "grad_norm": 166.38722229003906, | |
| "learning_rate": 3e-06, | |
| "loss": 13.4616, | |
| "reward": 1.5636458992958069, | |
| "reward_std": 0.6559399664402008, | |
| "rewards/correctness_reward_func": 0.9999999701976776, | |
| "rewards/int_reward_func": 0.4166666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.1469791643321514, | |
| "step": 481, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04299924171461707, | |
| "grad_norm": 222.22879028320312, | |
| "learning_rate": 3e-06, | |
| "loss": 18.8121, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.04308845175966814, | |
| "grad_norm": 221.17059326171875, | |
| "learning_rate": 3e-06, | |
| "loss": 21.4877, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.043177661804719214, | |
| "grad_norm": 142.53189086914062, | |
| "learning_rate": 3e-06, | |
| "loss": 16.8492, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.04326687184977029, | |
| "grad_norm": 170.13198852539062, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7898, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.043356081894821354, | |
| "grad_norm": 161.23110961914062, | |
| "learning_rate": 3e-06, | |
| "loss": 12.8851, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.04344529193987243, | |
| "grad_norm": 175.66587829589844, | |
| "learning_rate": 3e-06, | |
| "loss": 10.8873, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.0435345019849235, | |
| "grad_norm": 195.75050354003906, | |
| "learning_rate": 3e-06, | |
| "loss": 15.6694, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.04362371202997457, | |
| "grad_norm": 190.2042236328125, | |
| "learning_rate": 3e-06, | |
| "loss": 19.0926, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.04371292207502565, | |
| "grad_norm": 146.10504150390625, | |
| "learning_rate": 3e-06, | |
| "loss": 13.8907, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.04380213212007672, | |
| "grad_norm": 149.26614379882812, | |
| "learning_rate": 3e-06, | |
| "loss": 17.0683, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.04389134216512779, | |
| "grad_norm": 178.4593963623047, | |
| "learning_rate": 3e-06, | |
| "loss": 10.2799, | |
| "step": 492 | |
| }, | |
| { | |
| "completion_length": 137.50000762939453, | |
| "epoch": 0.043980552210178867, | |
| "grad_norm": 87.93815612792969, | |
| "learning_rate": 3e-06, | |
| "loss": -35.7257, | |
| "reward": 2.5712709426879883, | |
| "reward_std": 0.18458116799592972, | |
| "rewards/correctness_reward_func": 1.9583333134651184, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.11293749511241913, | |
| "step": 493, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04406976225522994, | |
| "grad_norm": 78.96405029296875, | |
| "learning_rate": 3e-06, | |
| "loss": -37.1011, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.04415897230028101, | |
| "grad_norm": 113.70293426513672, | |
| "learning_rate": 3e-06, | |
| "loss": -46.3151, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.044248182345332086, | |
| "grad_norm": 90.45478820800781, | |
| "learning_rate": 3e-06, | |
| "loss": -43.5143, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.04433739239038316, | |
| "grad_norm": 106.42904663085938, | |
| "learning_rate": 3e-06, | |
| "loss": -42.7944, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.04442660243543423, | |
| "grad_norm": 106.20608520507812, | |
| "learning_rate": 3e-06, | |
| "loss": -48.1815, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.0445158124804853, | |
| "grad_norm": 93.41876220703125, | |
| "learning_rate": 3e-06, | |
| "loss": -37.7992, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.04460502252553637, | |
| "grad_norm": 91.40050506591797, | |
| "learning_rate": 3e-06, | |
| "loss": -38.651, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.044694232570587446, | |
| "grad_norm": 116.4251480102539, | |
| "learning_rate": 3e-06, | |
| "loss": -49.2276, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.04478344261563852, | |
| "grad_norm": 92.2903060913086, | |
| "learning_rate": 3e-06, | |
| "loss": -46.591, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.04487265266068959, | |
| "grad_norm": 110.7293472290039, | |
| "learning_rate": 3e-06, | |
| "loss": -45.7648, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.044961862705740666, | |
| "grad_norm": 108.03923797607422, | |
| "learning_rate": 3e-06, | |
| "loss": -51.9054, | |
| "step": 504 | |
| }, | |
| { | |
| "completion_length": 131.14583587646484, | |
| "epoch": 0.04505107275079174, | |
| "grad_norm": 452.2289733886719, | |
| "learning_rate": 3e-06, | |
| "loss": 29.1255, | |
| "reward": 1.9235833883285522, | |
| "reward_std": 0.813209742307663, | |
| "rewards/correctness_reward_func": 1.3333333730697632, | |
| "rewards/int_reward_func": 0.4270833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.16316666454076767, | |
| "step": 505, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04514028279584281, | |
| "grad_norm": 284.8124694824219, | |
| "learning_rate": 3e-06, | |
| "loss": 31.7163, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.045229492840893885, | |
| "grad_norm": 361.7442626953125, | |
| "learning_rate": 3e-06, | |
| "loss": 27.2282, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.04531870288594496, | |
| "grad_norm": 283.879638671875, | |
| "learning_rate": 3e-06, | |
| "loss": 42.2817, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.04540791293099603, | |
| "grad_norm": 316.19000244140625, | |
| "learning_rate": 3e-06, | |
| "loss": 26.7891, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.045497122976047105, | |
| "grad_norm": 370.62652587890625, | |
| "learning_rate": 3e-06, | |
| "loss": 34.1636, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.04558633302109818, | |
| "grad_norm": 273.1391296386719, | |
| "learning_rate": 3e-06, | |
| "loss": 27.6705, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.04567554306614925, | |
| "grad_norm": 307.9808044433594, | |
| "learning_rate": 3e-06, | |
| "loss": 28.9577, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.04576475311120032, | |
| "grad_norm": 374.3335876464844, | |
| "learning_rate": 3e-06, | |
| "loss": 22.2669, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.04585396315625139, | |
| "grad_norm": 395.0052795410156, | |
| "learning_rate": 3e-06, | |
| "loss": 39.239, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.045943173201302465, | |
| "grad_norm": 539.7128295898438, | |
| "learning_rate": 3e-06, | |
| "loss": 25.6475, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.04603238324635354, | |
| "grad_norm": 348.81170654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 31.7762, | |
| "step": 516 | |
| }, | |
| { | |
| "completion_length": 115.83333587646484, | |
| "epoch": 0.04612159329140461, | |
| "grad_norm": 177.44480895996094, | |
| "learning_rate": 3e-06, | |
| "loss": -44.7672, | |
| "reward": 2.343208432197571, | |
| "reward_std": 0.4359329864382744, | |
| "rewards/correctness_reward_func": 1.6666666269302368, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.18695833534002304, | |
| "step": 517, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.046210803336455684, | |
| "grad_norm": 199.65908813476562, | |
| "learning_rate": 3e-06, | |
| "loss": -51.4622, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.04630001338150676, | |
| "grad_norm": 175.2034149169922, | |
| "learning_rate": 3e-06, | |
| "loss": -55.1536, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.04638922342655783, | |
| "grad_norm": 160.91688537597656, | |
| "learning_rate": 3e-06, | |
| "loss": -37.3164, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.046478433471608904, | |
| "grad_norm": 165.5592498779297, | |
| "learning_rate": 3e-06, | |
| "loss": -42.9147, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.04656764351665998, | |
| "grad_norm": 154.5955047607422, | |
| "learning_rate": 3e-06, | |
| "loss": -48.9731, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.04665685356171105, | |
| "grad_norm": 202.06838989257812, | |
| "learning_rate": 3e-06, | |
| "loss": -46.1992, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.046746063606762124, | |
| "grad_norm": 216.6766357421875, | |
| "learning_rate": 3e-06, | |
| "loss": -54.2271, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.0468352736518132, | |
| "grad_norm": 212.4103240966797, | |
| "learning_rate": 3e-06, | |
| "loss": -59.7351, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.046924483696864264, | |
| "grad_norm": 160.86546325683594, | |
| "learning_rate": 3e-06, | |
| "loss": -40.5866, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.04701369374191534, | |
| "grad_norm": 171.24478149414062, | |
| "learning_rate": 3e-06, | |
| "loss": -47.2665, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.04710290378696641, | |
| "grad_norm": 165.52357482910156, | |
| "learning_rate": 3e-06, | |
| "loss": -53.477, | |
| "step": 528 | |
| }, | |
| { | |
| "completion_length": 119.77083587646484, | |
| "epoch": 0.047192113832017483, | |
| "grad_norm": 382.3139953613281, | |
| "learning_rate": 3e-06, | |
| "loss": 86.4678, | |
| "reward": 1.9181458950042725, | |
| "reward_std": 0.6776820421218872, | |
| "rewards/correctness_reward_func": 1.3333333134651184, | |
| "rewards/int_reward_func": 0.4166666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.1681458279490471, | |
| "step": 529, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04728132387706856, | |
| "grad_norm": 264.3744812011719, | |
| "learning_rate": 3e-06, | |
| "loss": 87.2703, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.04737053392211963, | |
| "grad_norm": 273.3477783203125, | |
| "learning_rate": 3e-06, | |
| "loss": 74.9615, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.0474597439671707, | |
| "grad_norm": 326.87078857421875, | |
| "learning_rate": 3e-06, | |
| "loss": 90.7838, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.04754895401222178, | |
| "grad_norm": 294.74041748046875, | |
| "learning_rate": 3e-06, | |
| "loss": 102.1572, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.04763816405727285, | |
| "grad_norm": 312.48626708984375, | |
| "learning_rate": 3e-06, | |
| "loss": 93.4423, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.04772737410232392, | |
| "grad_norm": 383.2833557128906, | |
| "learning_rate": 3e-06, | |
| "loss": 86.6238, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.047816584147374996, | |
| "grad_norm": 316.42926025390625, | |
| "learning_rate": 3e-06, | |
| "loss": 86.6783, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.04790579419242607, | |
| "grad_norm": 268.37506103515625, | |
| "learning_rate": 3e-06, | |
| "loss": 72.0206, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.04799500423747714, | |
| "grad_norm": 337.4726867675781, | |
| "learning_rate": 3e-06, | |
| "loss": 87.3427, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.04808421428252821, | |
| "grad_norm": 284.79827880859375, | |
| "learning_rate": 3e-06, | |
| "loss": 98.66, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.04817342432757928, | |
| "grad_norm": 321.74609375, | |
| "learning_rate": 3e-06, | |
| "loss": 87.6142, | |
| "step": 540 | |
| }, | |
| { | |
| "completion_length": 125.37500762939453, | |
| "epoch": 0.048262634372630356, | |
| "grad_norm": 187.67727661132812, | |
| "learning_rate": 3e-06, | |
| "loss": -1.5514, | |
| "reward": 1.9883333444595337, | |
| "reward_std": 0.6124217808246613, | |
| "rewards/correctness_reward_func": 1.4166666865348816, | |
| "rewards/int_reward_func": 0.4375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.1341666616499424, | |
| "step": 541, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.04835184441768143, | |
| "grad_norm": 202.66070556640625, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9568, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.0484410544627325, | |
| "grad_norm": 180.55126953125, | |
| "learning_rate": 3e-06, | |
| "loss": -6.8013, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.048530264507783576, | |
| "grad_norm": 161.08514404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.4725, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.04861947455283465, | |
| "grad_norm": 220.28076171875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0972, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.04870868459788572, | |
| "grad_norm": 321.00994873046875, | |
| "learning_rate": 3e-06, | |
| "loss": -10.5751, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.048797894642936795, | |
| "grad_norm": 197.3623046875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6929, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.04888710468798787, | |
| "grad_norm": 213.94691467285156, | |
| "learning_rate": 3e-06, | |
| "loss": 1.3407, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.04897631473303894, | |
| "grad_norm": 254.2111053466797, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3544, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.049065524778090015, | |
| "grad_norm": 155.93460083007812, | |
| "learning_rate": 3e-06, | |
| "loss": -5.7191, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.04915473482314109, | |
| "grad_norm": 175.17147827148438, | |
| "learning_rate": 3e-06, | |
| "loss": -2.0149, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.04924394486819216, | |
| "grad_norm": 220.0244140625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.1544, | |
| "step": 552 | |
| }, | |
| { | |
| "completion_length": 138.62500762939453, | |
| "epoch": 0.04933315491324323, | |
| "grad_norm": 255.7532501220703, | |
| "learning_rate": 3e-06, | |
| "loss": -42.4297, | |
| "reward": 2.4352500438690186, | |
| "reward_std": 0.32932066917419434, | |
| "rewards/correctness_reward_func": 1.8333333134651184, | |
| "rewards/int_reward_func": 0.4791666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.12275000661611557, | |
| "step": 553, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0494223649582943, | |
| "grad_norm": 263.04913330078125, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7121, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.049511575003345375, | |
| "grad_norm": 187.12600708007812, | |
| "learning_rate": 3e-06, | |
| "loss": -32.293, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.04960078504839645, | |
| "grad_norm": 233.73802185058594, | |
| "learning_rate": 3e-06, | |
| "loss": -34.2649, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.04968999509344752, | |
| "grad_norm": 238.03567504882812, | |
| "learning_rate": 3e-06, | |
| "loss": -48.0373, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.049779205138498595, | |
| "grad_norm": 211.17440795898438, | |
| "learning_rate": 3e-06, | |
| "loss": -34.5469, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.04986841518354967, | |
| "grad_norm": 209.6473388671875, | |
| "learning_rate": 3e-06, | |
| "loss": -45.0123, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.04995762522860074, | |
| "grad_norm": 219.1716766357422, | |
| "learning_rate": 3e-06, | |
| "loss": -29.3907, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.050046835273651814, | |
| "grad_norm": 194.5946502685547, | |
| "learning_rate": 3e-06, | |
| "loss": -33.3813, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.05013604531870289, | |
| "grad_norm": 230.82928466796875, | |
| "learning_rate": 3e-06, | |
| "loss": -37.409, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.05022525536375396, | |
| "grad_norm": 268.3168640136719, | |
| "learning_rate": 3e-06, | |
| "loss": -50.4616, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.050314465408805034, | |
| "grad_norm": 225.2816925048828, | |
| "learning_rate": 3e-06, | |
| "loss": -36.9552, | |
| "step": 564 | |
| }, | |
| { | |
| "completion_length": 138.81250381469727, | |
| "epoch": 0.05040367545385611, | |
| "grad_norm": 236.8852996826172, | |
| "learning_rate": 3e-06, | |
| "loss": 45.2003, | |
| "reward": 1.8692501783370972, | |
| "reward_std": 0.7652427852153778, | |
| "rewards/correctness_reward_func": 1.25, | |
| "rewards/int_reward_func": 0.46875, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.15049999579787254, | |
| "step": 565, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.050492885498907174, | |
| "grad_norm": 258.247802734375, | |
| "learning_rate": 3e-06, | |
| "loss": 49.6383, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.05058209554395825, | |
| "grad_norm": 318.1617126464844, | |
| "learning_rate": 3e-06, | |
| "loss": 58.9489, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.05067130558900932, | |
| "grad_norm": 226.23045349121094, | |
| "learning_rate": 3e-06, | |
| "loss": 44.3005, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.050760515634060394, | |
| "grad_norm": 303.47760009765625, | |
| "learning_rate": 3e-06, | |
| "loss": 28.8653, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.05084972567911147, | |
| "grad_norm": 248.53013610839844, | |
| "learning_rate": 3e-06, | |
| "loss": 45.7073, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.05093893572416254, | |
| "grad_norm": 228.76365661621094, | |
| "learning_rate": 3e-06, | |
| "loss": 40.9719, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.05102814576921361, | |
| "grad_norm": 236.98915100097656, | |
| "learning_rate": 3e-06, | |
| "loss": 44.3296, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.05111735581426469, | |
| "grad_norm": 318.7423400878906, | |
| "learning_rate": 3e-06, | |
| "loss": 54.9659, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.05120656585931576, | |
| "grad_norm": 226.2831268310547, | |
| "learning_rate": 3e-06, | |
| "loss": 40.0793, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.05129577590436683, | |
| "grad_norm": 321.7300109863281, | |
| "learning_rate": 3e-06, | |
| "loss": 25.4049, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.051384985949417906, | |
| "grad_norm": 225.1118927001953, | |
| "learning_rate": 3e-06, | |
| "loss": 42.5627, | |
| "step": 576 | |
| }, | |
| { | |
| "completion_length": 114.56250381469727, | |
| "epoch": 0.05147419599446898, | |
| "grad_norm": 151.06048583984375, | |
| "learning_rate": 3e-06, | |
| "loss": 33.113, | |
| "reward": 2.1812918186187744, | |
| "reward_std": 0.530484139919281, | |
| "rewards/correctness_reward_func": 1.5833333134651184, | |
| "rewards/int_reward_func": 0.4375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.16045832633972168, | |
| "step": 577, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.05156340603952005, | |
| "grad_norm": 182.15431213378906, | |
| "learning_rate": 3e-06, | |
| "loss": 19.2473, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.05165261608457112, | |
| "grad_norm": 169.3698272705078, | |
| "learning_rate": 3e-06, | |
| "loss": 25.8161, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.05174182612962219, | |
| "grad_norm": 155.8734588623047, | |
| "learning_rate": 3e-06, | |
| "loss": 19.9518, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.051831036174673266, | |
| "grad_norm": 177.54641723632812, | |
| "learning_rate": 3e-06, | |
| "loss": 23.4249, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.05192024621972434, | |
| "grad_norm": 143.13719177246094, | |
| "learning_rate": 3e-06, | |
| "loss": 7.9228, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.05200945626477541, | |
| "grad_norm": 146.39906311035156, | |
| "learning_rate": 3e-06, | |
| "loss": 30.8804, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.052098666309826486, | |
| "grad_norm": 166.2614288330078, | |
| "learning_rate": 3e-06, | |
| "loss": 17.3062, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.05218787635487756, | |
| "grad_norm": 158.1491241455078, | |
| "learning_rate": 3e-06, | |
| "loss": 23.1792, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.05227708639992863, | |
| "grad_norm": 124.15723419189453, | |
| "learning_rate": 3e-06, | |
| "loss": 18.8908, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.052366296444979706, | |
| "grad_norm": 155.26602172851562, | |
| "learning_rate": 3e-06, | |
| "loss": 21.968, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.05245550649003078, | |
| "grad_norm": 155.0635528564453, | |
| "learning_rate": 3e-06, | |
| "loss": 6.3548, | |
| "step": 588 | |
| }, | |
| { | |
| "completion_length": 161.45833587646484, | |
| "epoch": 0.05254471653508185, | |
| "grad_norm": 317.9678649902344, | |
| "learning_rate": 3e-06, | |
| "loss": 6.4115, | |
| "reward": 1.8582292199134827, | |
| "reward_std": 0.48372724652290344, | |
| "rewards/correctness_reward_func": 1.3333333134651184, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.03531250241212547, | |
| "step": 589, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.052633926580132925, | |
| "grad_norm": 267.894287109375, | |
| "learning_rate": 3e-06, | |
| "loss": 6.0494, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.052723136625184, | |
| "grad_norm": 197.32470703125, | |
| "learning_rate": 3e-06, | |
| "loss": 8.4095, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.05281234667023507, | |
| "grad_norm": 208.84291076660156, | |
| "learning_rate": 3e-06, | |
| "loss": 2.183, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.05290155671528614, | |
| "grad_norm": 213.50672912597656, | |
| "learning_rate": 3e-06, | |
| "loss": -5.6327, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.05299076676033721, | |
| "grad_norm": 264.34210205078125, | |
| "learning_rate": 3e-06, | |
| "loss": 0.0463, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.053079976805388285, | |
| "grad_norm": 269.38372802734375, | |
| "learning_rate": 3e-06, | |
| "loss": 3.4921, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.05316918685043936, | |
| "grad_norm": 233.82005310058594, | |
| "learning_rate": 3e-06, | |
| "loss": 3.9645, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.05325839689549043, | |
| "grad_norm": 174.2704620361328, | |
| "learning_rate": 3e-06, | |
| "loss": 6.6277, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.053347606940541505, | |
| "grad_norm": 197.27203369140625, | |
| "learning_rate": 3e-06, | |
| "loss": -0.034, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.05343681698559258, | |
| "grad_norm": 195.1741943359375, | |
| "learning_rate": 3e-06, | |
| "loss": -7.7007, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.05352602703064365, | |
| "grad_norm": 218.35403442382812, | |
| "learning_rate": 3e-06, | |
| "loss": -2.732, | |
| "step": 600 | |
| }, | |
| { | |
| "completion_length": 163.20833587646484, | |
| "epoch": 0.053615237075694724, | |
| "grad_norm": 89.40003204345703, | |
| "learning_rate": 3e-06, | |
| "loss": -15.5412, | |
| "reward": 1.958250105381012, | |
| "reward_std": 0.20268601924180984, | |
| "rewards/correctness_reward_func": 1.4583333134651184, | |
| "rewards/int_reward_func": 0.4375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.06241666525602341, | |
| "step": 601, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0537044471207458, | |
| "grad_norm": 85.92318725585938, | |
| "learning_rate": 3e-06, | |
| "loss": -19.9741, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.05379365716579687, | |
| "grad_norm": 89.12326049804688, | |
| "learning_rate": 3e-06, | |
| "loss": -28.9968, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.053882867210847944, | |
| "grad_norm": 109.64755249023438, | |
| "learning_rate": 3e-06, | |
| "loss": -21.6802, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.05397207725589902, | |
| "grad_norm": 99.89476776123047, | |
| "learning_rate": 3e-06, | |
| "loss": -16.4453, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.054061287300950084, | |
| "grad_norm": 140.71066284179688, | |
| "learning_rate": 3e-06, | |
| "loss": -24.1026, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.05415049734600116, | |
| "grad_norm": 80.75763702392578, | |
| "learning_rate": 3e-06, | |
| "loss": -16.613, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.05423970739105223, | |
| "grad_norm": 85.42610168457031, | |
| "learning_rate": 3e-06, | |
| "loss": -21.2449, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.054328917436103304, | |
| "grad_norm": 93.39994812011719, | |
| "learning_rate": 3e-06, | |
| "loss": -30.1845, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.05441812748115438, | |
| "grad_norm": 96.1513671875, | |
| "learning_rate": 3e-06, | |
| "loss": -22.8781, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.05450733752620545, | |
| "grad_norm": 98.65193176269531, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6772, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.05459654757125652, | |
| "grad_norm": 130.4186248779297, | |
| "learning_rate": 3e-06, | |
| "loss": -25.9751, | |
| "step": 612 | |
| }, | |
| { | |
| "completion_length": 129.4375, | |
| "epoch": 0.0546857576163076, | |
| "grad_norm": 178.97259521484375, | |
| "learning_rate": 3e-06, | |
| "loss": -41.7741, | |
| "reward": 1.9304792881011963, | |
| "reward_std": 0.4192664921283722, | |
| "rewards/correctness_reward_func": 1.2916666865348816, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.14922916889190674, | |
| "step": 613, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.05477496766135867, | |
| "grad_norm": 159.00680541992188, | |
| "learning_rate": 3e-06, | |
| "loss": -36.4024, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.05486417770640974, | |
| "grad_norm": 154.65304565429688, | |
| "learning_rate": 3e-06, | |
| "loss": -35.2622, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.05495338775146082, | |
| "grad_norm": 239.50408935546875, | |
| "learning_rate": 3e-06, | |
| "loss": -34.9883, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.05504259779651189, | |
| "grad_norm": 191.45263671875, | |
| "learning_rate": 3e-06, | |
| "loss": -30.4894, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.05513180784156296, | |
| "grad_norm": 172.67025756835938, | |
| "learning_rate": 3e-06, | |
| "loss": -40.9277, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.05522101788661403, | |
| "grad_norm": 180.4842071533203, | |
| "learning_rate": 3e-06, | |
| "loss": -44.2844, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.0553102279316651, | |
| "grad_norm": 175.10528564453125, | |
| "learning_rate": 3e-06, | |
| "loss": -39.7696, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.055399437976716176, | |
| "grad_norm": 224.33847045898438, | |
| "learning_rate": 3e-06, | |
| "loss": -38.3443, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.05548864802176725, | |
| "grad_norm": 196.98231506347656, | |
| "learning_rate": 3e-06, | |
| "loss": -36.4789, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.05557785806681832, | |
| "grad_norm": 205.22146606445312, | |
| "learning_rate": 3e-06, | |
| "loss": -32.5847, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.055667068111869396, | |
| "grad_norm": 189.9784393310547, | |
| "learning_rate": 3e-06, | |
| "loss": -43.7861, | |
| "step": 624 | |
| }, | |
| { | |
| "completion_length": 135.1666717529297, | |
| "epoch": 0.05575627815692047, | |
| "grad_norm": 190.61593627929688, | |
| "learning_rate": 3e-06, | |
| "loss": -23.515, | |
| "reward": 1.7549793124198914, | |
| "reward_std": 0.5575149804353714, | |
| "rewards/correctness_reward_func": 1.1666666865348816, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.14039582759141922, | |
| "step": 625, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.05584548820197154, | |
| "grad_norm": 263.34173583984375, | |
| "learning_rate": 3e-06, | |
| "loss": -26.4535, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.055934698247022616, | |
| "grad_norm": 203.12969970703125, | |
| "learning_rate": 3e-06, | |
| "loss": -20.2943, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.05602390829207369, | |
| "grad_norm": 186.3466033935547, | |
| "learning_rate": 3e-06, | |
| "loss": -22.5875, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.05611311833712476, | |
| "grad_norm": 206.43478393554688, | |
| "learning_rate": 3e-06, | |
| "loss": -14.8606, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.056202328382175835, | |
| "grad_norm": 230.95394897460938, | |
| "learning_rate": 3e-06, | |
| "loss": -11.2027, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.05629153842722691, | |
| "grad_norm": 208.40184020996094, | |
| "learning_rate": 3e-06, | |
| "loss": -24.9903, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.056380748472277975, | |
| "grad_norm": 283.0361328125, | |
| "learning_rate": 3e-06, | |
| "loss": -28.8647, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.05646995851732905, | |
| "grad_norm": 243.43634033203125, | |
| "learning_rate": 3e-06, | |
| "loss": -20.9854, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.05655916856238012, | |
| "grad_norm": 196.8306121826172, | |
| "learning_rate": 3e-06, | |
| "loss": -25.1397, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.056648378607431195, | |
| "grad_norm": 204.37130737304688, | |
| "learning_rate": 3e-06, | |
| "loss": -17.3989, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.05673758865248227, | |
| "grad_norm": 222.9701385498047, | |
| "learning_rate": 3e-06, | |
| "loss": -14.9143, | |
| "step": 636 | |
| }, | |
| { | |
| "completion_length": 117.18750381469727, | |
| "epoch": 0.05682679869753334, | |
| "grad_norm": 152.56382751464844, | |
| "learning_rate": 3e-06, | |
| "loss": -53.4148, | |
| "reward": 2.4275625944137573, | |
| "reward_std": 0.3748088702559471, | |
| "rewards/correctness_reward_func": 1.75, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.18797916173934937, | |
| "step": 637, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.056916008742584415, | |
| "grad_norm": 187.37356567382812, | |
| "learning_rate": 3e-06, | |
| "loss": -49.109, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.05700521878763549, | |
| "grad_norm": 188.10548400878906, | |
| "learning_rate": 3e-06, | |
| "loss": -49.7143, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.05709442883268656, | |
| "grad_norm": 152.7540283203125, | |
| "learning_rate": 3e-06, | |
| "loss": -46.4195, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.057183638877737634, | |
| "grad_norm": 191.8466796875, | |
| "learning_rate": 3e-06, | |
| "loss": -54.4961, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.05727284892278871, | |
| "grad_norm": 160.72772216796875, | |
| "learning_rate": 3e-06, | |
| "loss": -46.0513, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.05736205896783978, | |
| "grad_norm": 163.2805938720703, | |
| "learning_rate": 3e-06, | |
| "loss": -55.7387, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.057451269012890854, | |
| "grad_norm": 189.55470275878906, | |
| "learning_rate": 3e-06, | |
| "loss": -51.4447, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.05754047905794193, | |
| "grad_norm": 182.82583618164062, | |
| "learning_rate": 3e-06, | |
| "loss": -51.1561, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.057629689102992994, | |
| "grad_norm": 177.5033721923828, | |
| "learning_rate": 3e-06, | |
| "loss": -49.5066, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.05771889914804407, | |
| "grad_norm": 208.1852569580078, | |
| "learning_rate": 3e-06, | |
| "loss": -59.1063, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.05780810919309514, | |
| "grad_norm": 185.0785369873047, | |
| "learning_rate": 3e-06, | |
| "loss": -49.7641, | |
| "step": 648 | |
| }, | |
| { | |
| "completion_length": 127.75000762939453, | |
| "epoch": 0.057897319238146214, | |
| "grad_norm": 332.4126892089844, | |
| "learning_rate": 3e-06, | |
| "loss": 24.9465, | |
| "reward": 2.10916668176651, | |
| "reward_std": 0.7774414718151093, | |
| "rewards/correctness_reward_func": 1.5, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.15083333104848862, | |
| "step": 649, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.05798652928319729, | |
| "grad_norm": 274.673828125, | |
| "learning_rate": 3e-06, | |
| "loss": 19.6904, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.05807573932824836, | |
| "grad_norm": 225.66183471679688, | |
| "learning_rate": 3e-06, | |
| "loss": 17.079, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.058164949373299434, | |
| "grad_norm": 236.45230102539062, | |
| "learning_rate": 3e-06, | |
| "loss": 16.4363, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.05825415941835051, | |
| "grad_norm": 304.5491943359375, | |
| "learning_rate": 3e-06, | |
| "loss": 33.7894, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.05834336946340158, | |
| "grad_norm": 294.2102966308594, | |
| "learning_rate": 3e-06, | |
| "loss": 12.1637, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.05843257950845265, | |
| "grad_norm": 307.0853271484375, | |
| "learning_rate": 3e-06, | |
| "loss": 23.9968, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.05852178955350373, | |
| "grad_norm": 267.52960205078125, | |
| "learning_rate": 3e-06, | |
| "loss": 18.2069, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.0586109995985548, | |
| "grad_norm": 219.6736602783203, | |
| "learning_rate": 3e-06, | |
| "loss": 16.1471, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.05870020964360587, | |
| "grad_norm": 312.0810852050781, | |
| "learning_rate": 3e-06, | |
| "loss": 15.3629, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.05878941968865694, | |
| "grad_norm": 355.7622985839844, | |
| "learning_rate": 3e-06, | |
| "loss": 32.1285, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.05887862973370801, | |
| "grad_norm": 243.60047912597656, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9719, | |
| "step": 660 | |
| }, | |
| { | |
| "completion_length": 128.37500381469727, | |
| "epoch": 0.058967839778759086, | |
| "grad_norm": 160.17491149902344, | |
| "learning_rate": 3e-06, | |
| "loss": -4.7968, | |
| "reward": 2.220729112625122, | |
| "reward_std": 0.37828393280506134, | |
| "rewards/correctness_reward_func": 1.5833333134651184, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.13739583641290665, | |
| "step": 661, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.05905704982381016, | |
| "grad_norm": 213.04843139648438, | |
| "learning_rate": 3e-06, | |
| "loss": -17.5158, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.05914625986886123, | |
| "grad_norm": 360.1976318359375, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8696, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.059235469913912306, | |
| "grad_norm": 165.9031524658203, | |
| "learning_rate": 3e-06, | |
| "loss": 9.1976, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.05932467995896338, | |
| "grad_norm": 185.02491760253906, | |
| "learning_rate": 3e-06, | |
| "loss": 0.8362, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.05941389000401445, | |
| "grad_norm": 186.4868927001953, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8959, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.059503100049065526, | |
| "grad_norm": 172.38906860351562, | |
| "learning_rate": 3e-06, | |
| "loss": -8.3078, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.0595923100941166, | |
| "grad_norm": 205.17637634277344, | |
| "learning_rate": 3e-06, | |
| "loss": -19.4471, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.05968152013916767, | |
| "grad_norm": 412.4108581542969, | |
| "learning_rate": 3e-06, | |
| "loss": -9.0223, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.059770730184218746, | |
| "grad_norm": 165.48020935058594, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0711, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.05985994022926982, | |
| "grad_norm": 185.8058624267578, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3781, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.059949150274320885, | |
| "grad_norm": 214.62686157226562, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8208, | |
| "step": 672 | |
| }, | |
| { | |
| "completion_length": 110.20833587646484, | |
| "epoch": 0.06003836031937196, | |
| "grad_norm": 223.97824096679688, | |
| "learning_rate": 3e-06, | |
| "loss": 17.6495, | |
| "reward": 1.9757083654403687, | |
| "reward_std": 0.7511122822761536, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.4375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.16320832818746567, | |
| "step": 673, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06012757036442303, | |
| "grad_norm": 180.975341796875, | |
| "learning_rate": 3e-06, | |
| "loss": -3.9399, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.060216780409474105, | |
| "grad_norm": 222.6065216064453, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3809, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.06030599045452518, | |
| "grad_norm": 192.075439453125, | |
| "learning_rate": 3e-06, | |
| "loss": 14.9998, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.06039520049957625, | |
| "grad_norm": 200.7101593017578, | |
| "learning_rate": 3e-06, | |
| "loss": 2.2906, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.060484410544627325, | |
| "grad_norm": 200.1593017578125, | |
| "learning_rate": 3e-06, | |
| "loss": 13.137, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.0605736205896784, | |
| "grad_norm": 185.83168029785156, | |
| "learning_rate": 3e-06, | |
| "loss": 16.2497, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.06066283063472947, | |
| "grad_norm": 208.6911163330078, | |
| "learning_rate": 3e-06, | |
| "loss": -5.308, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.060752040679780545, | |
| "grad_norm": 231.81312561035156, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1785, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.06084125072483162, | |
| "grad_norm": 187.45535278320312, | |
| "learning_rate": 3e-06, | |
| "loss": 12.5542, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.06093046076988269, | |
| "grad_norm": 182.04257202148438, | |
| "learning_rate": 3e-06, | |
| "loss": -1.0645, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.061019670814933764, | |
| "grad_norm": 186.71937561035156, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3416, | |
| "step": 684 | |
| }, | |
| { | |
| "completion_length": 130.4791717529297, | |
| "epoch": 0.06110888085998484, | |
| "grad_norm": 169.67178344726562, | |
| "learning_rate": 3e-06, | |
| "loss": 32.9847, | |
| "reward": 2.2736042737960815, | |
| "reward_std": 0.5084125399589539, | |
| "rewards/correctness_reward_func": 1.6666666865348816, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.14860416948795319, | |
| "step": 685, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.061198090905035904, | |
| "grad_norm": 231.2995147705078, | |
| "learning_rate": 3e-06, | |
| "loss": 18.3002, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.06128730095008698, | |
| "grad_norm": 162.75083923339844, | |
| "learning_rate": 3e-06, | |
| "loss": 29.4583, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.06137651099513805, | |
| "grad_norm": 209.54966735839844, | |
| "learning_rate": 3e-06, | |
| "loss": 42.4661, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.061465721040189124, | |
| "grad_norm": 396.1800537109375, | |
| "learning_rate": 3e-06, | |
| "loss": 42.6963, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.0615549310852402, | |
| "grad_norm": 144.02049255371094, | |
| "learning_rate": 3e-06, | |
| "loss": 23.2087, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.06164414113029127, | |
| "grad_norm": 179.72213745117188, | |
| "learning_rate": 3e-06, | |
| "loss": 28.8186, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.061733351175342344, | |
| "grad_norm": 143.9263153076172, | |
| "learning_rate": 3e-06, | |
| "loss": 16.2638, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.06182256122039342, | |
| "grad_norm": 148.50750732421875, | |
| "learning_rate": 3e-06, | |
| "loss": 27.5098, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.06191177126544449, | |
| "grad_norm": 178.09515380859375, | |
| "learning_rate": 3e-06, | |
| "loss": 38.1567, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.06200098131049556, | |
| "grad_norm": 319.7694396972656, | |
| "learning_rate": 3e-06, | |
| "loss": 36.9073, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.06209019135554664, | |
| "grad_norm": 137.5644989013672, | |
| "learning_rate": 3e-06, | |
| "loss": 19.9691, | |
| "step": 696 | |
| }, | |
| { | |
| "completion_length": 138.20833587646484, | |
| "epoch": 0.06217940140059771, | |
| "grad_norm": 243.18804931640625, | |
| "learning_rate": 3e-06, | |
| "loss": 63.9782, | |
| "reward": 1.6311666369438171, | |
| "reward_std": 0.6314830482006073, | |
| "rewards/correctness_reward_func": 1.0833333730697632, | |
| "rewards/int_reward_func": 0.4166666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.13116667047142982, | |
| "step": 697, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06226861144564878, | |
| "grad_norm": 248.19979858398438, | |
| "learning_rate": 3e-06, | |
| "loss": 62.0708, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.06235782149069985, | |
| "grad_norm": 192.6903839111328, | |
| "learning_rate": 3e-06, | |
| "loss": 44.2788, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.06244703153575092, | |
| "grad_norm": 187.61729431152344, | |
| "learning_rate": 3e-06, | |
| "loss": 45.635, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.062536241580802, | |
| "grad_norm": 270.56439208984375, | |
| "learning_rate": 3e-06, | |
| "loss": 61.8371, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.06262545162585308, | |
| "grad_norm": 186.34654235839844, | |
| "learning_rate": 3e-06, | |
| "loss": 35.0754, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.06271466167090414, | |
| "grad_norm": 193.72666931152344, | |
| "learning_rate": 3e-06, | |
| "loss": 56.4102, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.06280387171595522, | |
| "grad_norm": 213.4738006591797, | |
| "learning_rate": 3e-06, | |
| "loss": 55.6052, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.06289308176100629, | |
| "grad_norm": 178.9663543701172, | |
| "learning_rate": 3e-06, | |
| "loss": 38.9399, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.06298229180605736, | |
| "grad_norm": 155.2235870361328, | |
| "learning_rate": 3e-06, | |
| "loss": 40.2563, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.06307150185110844, | |
| "grad_norm": 190.83424377441406, | |
| "learning_rate": 3e-06, | |
| "loss": 54.1227, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.0631607118961595, | |
| "grad_norm": 151.27175903320312, | |
| "learning_rate": 3e-06, | |
| "loss": 29.2768, | |
| "step": 708 | |
| }, | |
| { | |
| "completion_length": 177.89584350585938, | |
| "epoch": 0.06324992194121058, | |
| "grad_norm": 383.90155029296875, | |
| "learning_rate": 3e-06, | |
| "loss": -57.4567, | |
| "reward": 1.6143542528152466, | |
| "reward_std": 1.1507561206817627, | |
| "rewards/correctness_reward_func": 1.125, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.041437502950429916, | |
| "step": 709, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06333913198626165, | |
| "grad_norm": 228.4014892578125, | |
| "learning_rate": 3e-06, | |
| "loss": -22.536, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.06342834203131273, | |
| "grad_norm": 247.7998809814453, | |
| "learning_rate": 3e-06, | |
| "loss": -34.7459, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.0635175520763638, | |
| "grad_norm": 256.2286376953125, | |
| "learning_rate": 3e-06, | |
| "loss": -30.4245, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.06360676212141488, | |
| "grad_norm": 254.9169158935547, | |
| "learning_rate": 3e-06, | |
| "loss": -38.2843, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.06369597216646594, | |
| "grad_norm": 321.43609619140625, | |
| "learning_rate": 3e-06, | |
| "loss": -35.5213, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.06378518221151702, | |
| "grad_norm": 349.0517272949219, | |
| "learning_rate": 3e-06, | |
| "loss": -55.9526, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.06387439225656809, | |
| "grad_norm": 209.25282287597656, | |
| "learning_rate": 3e-06, | |
| "loss": -22.3323, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.06396360230161917, | |
| "grad_norm": 247.7156219482422, | |
| "learning_rate": 3e-06, | |
| "loss": -35.9537, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.06405281234667023, | |
| "grad_norm": 274.3576965332031, | |
| "learning_rate": 3e-06, | |
| "loss": -33.4144, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.0641420223917213, | |
| "grad_norm": 287.4893798828125, | |
| "learning_rate": 3e-06, | |
| "loss": -40.4848, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.06423123243677238, | |
| "grad_norm": 375.7614440917969, | |
| "learning_rate": 3e-06, | |
| "loss": -39.3635, | |
| "step": 720 | |
| }, | |
| { | |
| "completion_length": 109.22916793823242, | |
| "epoch": 0.06432044248182345, | |
| "grad_norm": 123.53705596923828, | |
| "learning_rate": 3e-06, | |
| "loss": -30.919, | |
| "reward": 2.3711042404174805, | |
| "reward_std": 0.4830681085586548, | |
| "rewards/correctness_reward_func": 1.7083333730697632, | |
| "rewards/int_reward_func": 0.46875, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.1940208300948143, | |
| "step": 721, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06440965252687453, | |
| "grad_norm": 114.69866943359375, | |
| "learning_rate": 3e-06, | |
| "loss": -29.2645, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.0644988625719256, | |
| "grad_norm": 118.97370910644531, | |
| "learning_rate": 3e-06, | |
| "loss": -25.0985, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.06458807261697667, | |
| "grad_norm": 121.6566162109375, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0591, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.06467728266202774, | |
| "grad_norm": 182.92691040039062, | |
| "learning_rate": 3e-06, | |
| "loss": -28.0901, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.06476649270707882, | |
| "grad_norm": 156.50718688964844, | |
| "learning_rate": 3e-06, | |
| "loss": -30.7831, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.06485570275212989, | |
| "grad_norm": 132.53089904785156, | |
| "learning_rate": 3e-06, | |
| "loss": -33.3058, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.06494491279718097, | |
| "grad_norm": 112.21791076660156, | |
| "learning_rate": 3e-06, | |
| "loss": -31.8015, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.06503412284223203, | |
| "grad_norm": 130.97052001953125, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7953, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.06512333288728311, | |
| "grad_norm": 128.9853515625, | |
| "learning_rate": 3e-06, | |
| "loss": -32.2601, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.06521254293233418, | |
| "grad_norm": 146.0636444091797, | |
| "learning_rate": 3e-06, | |
| "loss": -31.9068, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.06530175297738525, | |
| "grad_norm": 148.94358825683594, | |
| "learning_rate": 3e-06, | |
| "loss": -35.98, | |
| "step": 732 | |
| }, | |
| { | |
| "completion_length": 133.77083587646484, | |
| "epoch": 0.06539096302243633, | |
| "grad_norm": 200.87583923339844, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8156, | |
| "reward": 1.7448542714118958, | |
| "reward_std": 0.7897588908672333, | |
| "rewards/correctness_reward_func": 1.1666666865348816, | |
| "rewards/int_reward_func": 0.4479166716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.1302708424627781, | |
| "step": 733, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0654801730674874, | |
| "grad_norm": 254.33050537109375, | |
| "learning_rate": 3e-06, | |
| "loss": 39.6914, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.06556938311253847, | |
| "grad_norm": 207.40354919433594, | |
| "learning_rate": 3e-06, | |
| "loss": 18.8055, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.06565859315758954, | |
| "grad_norm": 255.23114013671875, | |
| "learning_rate": 3e-06, | |
| "loss": 12.5947, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.06574780320264062, | |
| "grad_norm": 183.82200622558594, | |
| "learning_rate": 3e-06, | |
| "loss": 11.0044, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.06583701324769169, | |
| "grad_norm": 226.2420654296875, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4847, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.06592622329274277, | |
| "grad_norm": 233.6065673828125, | |
| "learning_rate": 3e-06, | |
| "loss": 4.5426, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.06601543333779383, | |
| "grad_norm": 264.205078125, | |
| "learning_rate": 3e-06, | |
| "loss": 39.3686, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.06610464338284491, | |
| "grad_norm": 241.85284423828125, | |
| "learning_rate": 3e-06, | |
| "loss": 17.7268, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.06619385342789598, | |
| "grad_norm": 221.0516357421875, | |
| "learning_rate": 3e-06, | |
| "loss": 13.2982, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.06628306347294706, | |
| "grad_norm": 212.37222290039062, | |
| "learning_rate": 3e-06, | |
| "loss": 7.7467, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.06637227351799813, | |
| "grad_norm": 263.42919921875, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1754, | |
| "step": 744 | |
| }, | |
| { | |
| "completion_length": 125.64583587646484, | |
| "epoch": 0.0664614835630492, | |
| "grad_norm": 193.34835815429688, | |
| "learning_rate": 3e-06, | |
| "loss": 40.2546, | |
| "reward": 1.7169584035873413, | |
| "reward_std": 0.4941745698451996, | |
| "rewards/correctness_reward_func": 1.0833333730697632, | |
| "rewards/int_reward_func": 0.4791666567325592, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.15445833653211594, | |
| "step": 745, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06655069360810027, | |
| "grad_norm": 212.19464111328125, | |
| "learning_rate": 3e-06, | |
| "loss": 41.4745, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.06663990365315134, | |
| "grad_norm": 237.55845642089844, | |
| "learning_rate": 3e-06, | |
| "loss": 47.5379, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.06672911369820242, | |
| "grad_norm": 185.4857635498047, | |
| "learning_rate": 3e-06, | |
| "loss": 43.0404, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.06681832374325349, | |
| "grad_norm": 158.21678161621094, | |
| "learning_rate": 3e-06, | |
| "loss": 32.8558, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.06690753378830457, | |
| "grad_norm": 212.29397583007812, | |
| "learning_rate": 3e-06, | |
| "loss": 41.9557, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06699674383335563, | |
| "grad_norm": 181.2362060546875, | |
| "learning_rate": 3e-06, | |
| "loss": 38.4185, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.06708595387840671, | |
| "grad_norm": 186.73841857910156, | |
| "learning_rate": 3e-06, | |
| "loss": 37.1992, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.06717516392345778, | |
| "grad_norm": 182.0499267578125, | |
| "learning_rate": 3e-06, | |
| "loss": 42.4944, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.06726437396850886, | |
| "grad_norm": 161.4265899658203, | |
| "learning_rate": 3e-06, | |
| "loss": 40.1143, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.06735358401355993, | |
| "grad_norm": 145.66175842285156, | |
| "learning_rate": 3e-06, | |
| "loss": 29.066, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.067442794058611, | |
| "grad_norm": 188.43362426757812, | |
| "learning_rate": 3e-06, | |
| "loss": 34.8647, | |
| "step": 756 | |
| }, | |
| { | |
| "completion_length": 115.64583587646484, | |
| "epoch": 0.06753200410366207, | |
| "grad_norm": 176.6951446533203, | |
| "learning_rate": 3e-06, | |
| "loss": 23.7273, | |
| "reward": 1.9728541374206543, | |
| "reward_std": 0.22583025321364403, | |
| "rewards/correctness_reward_func": 1.375, | |
| "rewards/int_reward_func": 0.4270833432674408, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.17077083885669708, | |
| "step": 757, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06762121414871315, | |
| "grad_norm": 134.009521484375, | |
| "learning_rate": 3e-06, | |
| "loss": 29.2333, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.06771042419376422, | |
| "grad_norm": 131.3091278076172, | |
| "learning_rate": 3e-06, | |
| "loss": 29.9313, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.06779963423881528, | |
| "grad_norm": 128.05767822265625, | |
| "learning_rate": 3e-06, | |
| "loss": 22.7093, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.06788884428386636, | |
| "grad_norm": 115.9305419921875, | |
| "learning_rate": 3e-06, | |
| "loss": 23.3578, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.06797805432891743, | |
| "grad_norm": 127.96971893310547, | |
| "learning_rate": 3e-06, | |
| "loss": 26.8069, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.06806726437396851, | |
| "grad_norm": 172.38279724121094, | |
| "learning_rate": 3e-06, | |
| "loss": 20.0156, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.06815647441901958, | |
| "grad_norm": 122.19217681884766, | |
| "learning_rate": 3e-06, | |
| "loss": 23.256, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.06824568446407066, | |
| "grad_norm": 98.1166763305664, | |
| "learning_rate": 3e-06, | |
| "loss": 25.2087, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.06833489450912172, | |
| "grad_norm": 104.1299819946289, | |
| "learning_rate": 3e-06, | |
| "loss": 18.9453, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.0684241045541728, | |
| "grad_norm": 108.13124084472656, | |
| "learning_rate": 3e-06, | |
| "loss": 18.3841, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.06851331459922387, | |
| "grad_norm": 107.33203887939453, | |
| "learning_rate": 3e-06, | |
| "loss": 21.8827, | |
| "step": 768 | |
| }, | |
| { | |
| "completion_length": 140.7291717529297, | |
| "epoch": 0.06860252464427495, | |
| "grad_norm": 176.08241271972656, | |
| "learning_rate": 3e-06, | |
| "loss": 1.9837, | |
| "reward": 2.0843957662582397, | |
| "reward_std": 0.670438677072525, | |
| "rewards/correctness_reward_func": 1.5, | |
| "rewards/int_reward_func": 0.46875, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.11564583331346512, | |
| "step": 769, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.06869173468932602, | |
| "grad_norm": 184.2529754638672, | |
| "learning_rate": 3e-06, | |
| "loss": -9.1612, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.0687809447343771, | |
| "grad_norm": 124.57987213134766, | |
| "learning_rate": 3e-06, | |
| "loss": -1.167, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.06887015477942816, | |
| "grad_norm": 146.37869262695312, | |
| "learning_rate": 3e-06, | |
| "loss": 7.9908, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.06895936482447923, | |
| "grad_norm": 159.84788513183594, | |
| "learning_rate": 3e-06, | |
| "loss": -7.8619, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.06904857486953031, | |
| "grad_norm": 165.3255157470703, | |
| "learning_rate": 3e-06, | |
| "loss": -9.9255, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.06913778491458138, | |
| "grad_norm": 147.72352600097656, | |
| "learning_rate": 3e-06, | |
| "loss": 0.4622, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.06922699495963246, | |
| "grad_norm": 161.2108917236328, | |
| "learning_rate": 3e-06, | |
| "loss": -9.3763, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.06931620500468352, | |
| "grad_norm": 117.9613265991211, | |
| "learning_rate": 3e-06, | |
| "loss": -2.9584, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.0694054150497346, | |
| "grad_norm": 127.0103988647461, | |
| "learning_rate": 3e-06, | |
| "loss": 4.5212, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.06949462509478567, | |
| "grad_norm": 161.12701416015625, | |
| "learning_rate": 3e-06, | |
| "loss": -7.3502, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.06958383513983675, | |
| "grad_norm": 147.03277587890625, | |
| "learning_rate": 3e-06, | |
| "loss": -12.0696, | |
| "step": 780 | |
| }, | |
| { | |
| "completion_length": 146.75000762939453, | |
| "epoch": 0.06967304518488782, | |
| "grad_norm": 93.77994537353516, | |
| "learning_rate": 3e-06, | |
| "loss": -9.5819, | |
| "reward": 2.0504584312438965, | |
| "reward_std": 0.46765226125717163, | |
| "rewards/correctness_reward_func": 1.4166666865348816, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.010416666977107525, | |
| "rewards/xmlcount_reward_func": 0.12337498925626278, | |
| "step": 781, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0697622552299389, | |
| "grad_norm": 109.34754180908203, | |
| "learning_rate": 3e-06, | |
| "loss": -14.1501, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.06985146527498996, | |
| "grad_norm": 83.16600799560547, | |
| "learning_rate": 3e-06, | |
| "loss": -17.3924, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.06994067532004104, | |
| "grad_norm": 83.6520767211914, | |
| "learning_rate": 3e-06, | |
| "loss": -3.3917, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.07002988536509211, | |
| "grad_norm": 111.59048461914062, | |
| "learning_rate": 3e-06, | |
| "loss": -18.6001, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.07011909541014318, | |
| "grad_norm": 81.26487731933594, | |
| "learning_rate": 3e-06, | |
| "loss": -4.8149, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.07020830545519426, | |
| "grad_norm": 88.5013198852539, | |
| "learning_rate": 3e-06, | |
| "loss": -10.8751, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.07029751550024532, | |
| "grad_norm": 106.22066497802734, | |
| "learning_rate": 3e-06, | |
| "loss": -14.8712, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.0703867255452964, | |
| "grad_norm": 95.8133544921875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.8064, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.07047593559034747, | |
| "grad_norm": 105.98171997070312, | |
| "learning_rate": 3e-06, | |
| "loss": -4.9442, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.07056514563539855, | |
| "grad_norm": 108.74724578857422, | |
| "learning_rate": 3e-06, | |
| "loss": -20.1345, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.07065435568044962, | |
| "grad_norm": 217.0005340576172, | |
| "learning_rate": 3e-06, | |
| "loss": -5.632, | |
| "step": 792 | |
| }, | |
| { | |
| "completion_length": 137.1041717529297, | |
| "epoch": 0.0707435657255007, | |
| "grad_norm": 186.0244140625, | |
| "learning_rate": 3e-06, | |
| "loss": 20.4605, | |
| "reward": 1.8869168162345886, | |
| "reward_std": 0.7256337702274323, | |
| "rewards/correctness_reward_func": 1.2916666567325592, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.13691667094826698, | |
| "step": 793, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07083277577055176, | |
| "grad_norm": 139.76405334472656, | |
| "learning_rate": 3e-06, | |
| "loss": 14.9958, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.07092198581560284, | |
| "grad_norm": 155.77352905273438, | |
| "learning_rate": 3e-06, | |
| "loss": 24.0522, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.07101119586065391, | |
| "grad_norm": 147.90013122558594, | |
| "learning_rate": 3e-06, | |
| "loss": 30.8391, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.07110040590570499, | |
| "grad_norm": 154.99143981933594, | |
| "learning_rate": 3e-06, | |
| "loss": 24.957, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.07118961595075605, | |
| "grad_norm": 154.12411499023438, | |
| "learning_rate": 3e-06, | |
| "loss": 32.2635, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.07127882599580712, | |
| "grad_norm": 142.52955627441406, | |
| "learning_rate": 3e-06, | |
| "loss": 19.5332, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.0713680360408582, | |
| "grad_norm": 140.1791229248047, | |
| "learning_rate": 3e-06, | |
| "loss": 13.5796, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.07145724608590927, | |
| "grad_norm": 144.1186981201172, | |
| "learning_rate": 3e-06, | |
| "loss": 21.398, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.07154645613096035, | |
| "grad_norm": 139.25230407714844, | |
| "learning_rate": 3e-06, | |
| "loss": 28.6905, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.07163566617601141, | |
| "grad_norm": 151.95538330078125, | |
| "learning_rate": 3e-06, | |
| "loss": 21.6279, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.0717248762210625, | |
| "grad_norm": 143.84974670410156, | |
| "learning_rate": 3e-06, | |
| "loss": 29.0759, | |
| "step": 804 | |
| }, | |
| { | |
| "completion_length": 136.22916793823242, | |
| "epoch": 0.07181408626611356, | |
| "grad_norm": 92.62751770019531, | |
| "learning_rate": 3e-06, | |
| "loss": -21.4484, | |
| "reward": 2.414271116256714, | |
| "reward_std": 0.33920496702194214, | |
| "rewards/correctness_reward_func": 1.8333333134651184, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.12260417267680168, | |
| "step": 805, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07190329631116464, | |
| "grad_norm": 60.42007827758789, | |
| "learning_rate": 3e-06, | |
| "loss": -12.7491, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.07199250635621571, | |
| "grad_norm": 94.1617660522461, | |
| "learning_rate": 3e-06, | |
| "loss": -14.5131, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.07208171640126679, | |
| "grad_norm": 68.65794372558594, | |
| "learning_rate": 3e-06, | |
| "loss": -14.1192, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.07217092644631785, | |
| "grad_norm": 79.4446792602539, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4833, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.07226013649136893, | |
| "grad_norm": 82.45279693603516, | |
| "learning_rate": 3e-06, | |
| "loss": -8.9581, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.07234934653642, | |
| "grad_norm": 101.833984375, | |
| "learning_rate": 3e-06, | |
| "loss": -22.4901, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.07243855658147107, | |
| "grad_norm": 78.60984802246094, | |
| "learning_rate": 3e-06, | |
| "loss": -12.9701, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.07252776662652215, | |
| "grad_norm": 100.63545989990234, | |
| "learning_rate": 3e-06, | |
| "loss": -15.4353, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.07261697667157321, | |
| "grad_norm": 66.36518096923828, | |
| "learning_rate": 3e-06, | |
| "loss": -14.7871, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.0727061867166243, | |
| "grad_norm": 73.31499481201172, | |
| "learning_rate": 3e-06, | |
| "loss": -13.5328, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.07279539676167536, | |
| "grad_norm": 81.8609848022461, | |
| "learning_rate": 3e-06, | |
| "loss": -10.202, | |
| "step": 816 | |
| }, | |
| { | |
| "completion_length": 176.0416717529297, | |
| "epoch": 0.07288460680672644, | |
| "grad_norm": 85.71199035644531, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3775, | |
| "reward": 1.308291733264923, | |
| "reward_std": 0.4305167943239212, | |
| "rewards/correctness_reward_func": 0.8333333432674408, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.016624998301267624, | |
| "step": 817, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0729738168517775, | |
| "grad_norm": 113.99066162109375, | |
| "learning_rate": 3e-06, | |
| "loss": 3.9682, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.07306302689682859, | |
| "grad_norm": 90.42432403564453, | |
| "learning_rate": 3e-06, | |
| "loss": -8.968, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.07315223694187965, | |
| "grad_norm": 86.66998291015625, | |
| "learning_rate": 3e-06, | |
| "loss": 5.9931, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.07324144698693073, | |
| "grad_norm": 112.87352752685547, | |
| "learning_rate": 3e-06, | |
| "loss": 3.7934, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.0733306570319818, | |
| "grad_norm": 116.59276580810547, | |
| "learning_rate": 3e-06, | |
| "loss": -12.3296, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.07341986707703288, | |
| "grad_norm": 95.53129577636719, | |
| "learning_rate": 3e-06, | |
| "loss": -3.2342, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.07350907712208395, | |
| "grad_norm": 134.6486053466797, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4981, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.07359828716713501, | |
| "grad_norm": 80.79833221435547, | |
| "learning_rate": 3e-06, | |
| "loss": -9.3798, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.07368749721218609, | |
| "grad_norm": 107.38970184326172, | |
| "learning_rate": 3e-06, | |
| "loss": 4.8459, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.07377670725723716, | |
| "grad_norm": 91.76937866210938, | |
| "learning_rate": 3e-06, | |
| "loss": 2.6928, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.07386591730228824, | |
| "grad_norm": 112.15656280517578, | |
| "learning_rate": 3e-06, | |
| "loss": -13.4598, | |
| "step": 828 | |
| }, | |
| { | |
| "completion_length": 161.2916717529297, | |
| "epoch": 0.0739551273473393, | |
| "grad_norm": 225.7854461669922, | |
| "learning_rate": 3e-06, | |
| "loss": -7.2711, | |
| "reward": 1.8568333387374878, | |
| "reward_std": 0.38829553686082363, | |
| "rewards/correctness_reward_func": 1.2916666865348816, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.0755833312869072, | |
| "step": 829, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07404433739239039, | |
| "grad_norm": 203.64381408691406, | |
| "learning_rate": 3e-06, | |
| "loss": -24.0722, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.07413354743744145, | |
| "grad_norm": 470.9283752441406, | |
| "learning_rate": 3e-06, | |
| "loss": 7.9581, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.07422275748249253, | |
| "grad_norm": 261.8198547363281, | |
| "learning_rate": 3e-06, | |
| "loss": -13.8961, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.0743119675275436, | |
| "grad_norm": 238.60263061523438, | |
| "learning_rate": 3e-06, | |
| "loss": -17.5778, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.07440117757259468, | |
| "grad_norm": 251.20684814453125, | |
| "learning_rate": 3e-06, | |
| "loss": -13.8026, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.07449038761764575, | |
| "grad_norm": 233.15805053710938, | |
| "learning_rate": 3e-06, | |
| "loss": -9.8419, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.07457959766269683, | |
| "grad_norm": 188.42831420898438, | |
| "learning_rate": 3e-06, | |
| "loss": -27.1095, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.07466880770774789, | |
| "grad_norm": 330.0888671875, | |
| "learning_rate": 3e-06, | |
| "loss": 2.663, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.07475801775279897, | |
| "grad_norm": 187.619873046875, | |
| "learning_rate": 3e-06, | |
| "loss": -17.3052, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.07484722779785004, | |
| "grad_norm": 273.087646484375, | |
| "learning_rate": 3e-06, | |
| "loss": -20.1137, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.0749364378429011, | |
| "grad_norm": 231.94540405273438, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6819, | |
| "step": 840 | |
| }, | |
| { | |
| "completion_length": 124.5625, | |
| "epoch": 0.07502564788795218, | |
| "grad_norm": 169.8292236328125, | |
| "learning_rate": 3e-06, | |
| "loss": -14.9169, | |
| "reward": 2.1858333349227905, | |
| "reward_std": 0.5357859879732132, | |
| "rewards/correctness_reward_func": 1.5833333134651184, | |
| "rewards/int_reward_func": 0.4270833432674408, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.17541665583848953, | |
| "step": 841, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07511485793300325, | |
| "grad_norm": 151.44129943847656, | |
| "learning_rate": 3e-06, | |
| "loss": -38.7775, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.07520406797805433, | |
| "grad_norm": 141.34671020507812, | |
| "learning_rate": 3e-06, | |
| "loss": -32.3555, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.0752932780231054, | |
| "grad_norm": 117.83955383300781, | |
| "learning_rate": 3e-06, | |
| "loss": -35.7298, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.07538248806815648, | |
| "grad_norm": 113.38582611083984, | |
| "learning_rate": 3e-06, | |
| "loss": -36.8355, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.07547169811320754, | |
| "grad_norm": 147.53521728515625, | |
| "learning_rate": 3e-06, | |
| "loss": -34.7305, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.07556090815825862, | |
| "grad_norm": 167.8444061279297, | |
| "learning_rate": 3e-06, | |
| "loss": -17.6609, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.07565011820330969, | |
| "grad_norm": 177.19976806640625, | |
| "learning_rate": 3e-06, | |
| "loss": -42.6765, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.07573932824836077, | |
| "grad_norm": 207.4672393798828, | |
| "learning_rate": 3e-06, | |
| "loss": -36.7629, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.07582853829341184, | |
| "grad_norm": 124.84293365478516, | |
| "learning_rate": 3e-06, | |
| "loss": -39.1349, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.07591774833846292, | |
| "grad_norm": 134.89764404296875, | |
| "learning_rate": 3e-06, | |
| "loss": -41.2224, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.07600695838351398, | |
| "grad_norm": 161.6527862548828, | |
| "learning_rate": 3e-06, | |
| "loss": -39.203, | |
| "step": 852 | |
| }, | |
| { | |
| "completion_length": 147.7291717529297, | |
| "epoch": 0.07609616842856505, | |
| "grad_norm": 371.6798095703125, | |
| "learning_rate": 3e-06, | |
| "loss": 88.7083, | |
| "reward": 2.0986876487731934, | |
| "reward_std": 0.5909168422222137, | |
| "rewards/correctness_reward_func": 1.5, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.09868749976158142, | |
| "step": 853, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07618537847361613, | |
| "grad_norm": 385.34136962890625, | |
| "learning_rate": 3e-06, | |
| "loss": 86.0723, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.0762745885186672, | |
| "grad_norm": 360.78021240234375, | |
| "learning_rate": 3e-06, | |
| "loss": 71.1204, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.07636379856371828, | |
| "grad_norm": 293.267333984375, | |
| "learning_rate": 3e-06, | |
| "loss": 66.9494, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.07645300860876934, | |
| "grad_norm": 440.7154846191406, | |
| "learning_rate": 3e-06, | |
| "loss": 88.5771, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.07654221865382042, | |
| "grad_norm": 327.457275390625, | |
| "learning_rate": 3e-06, | |
| "loss": 58.8516, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.07663142869887149, | |
| "grad_norm": 371.9436340332031, | |
| "learning_rate": 3e-06, | |
| "loss": 85.2973, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.07672063874392257, | |
| "grad_norm": 389.5568542480469, | |
| "learning_rate": 3e-06, | |
| "loss": 79.8231, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.07680984878897364, | |
| "grad_norm": 321.9656066894531, | |
| "learning_rate": 3e-06, | |
| "loss": 63.4841, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.07689905883402472, | |
| "grad_norm": 284.66876220703125, | |
| "learning_rate": 3e-06, | |
| "loss": 59.0441, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.07698826887907578, | |
| "grad_norm": 410.6514587402344, | |
| "learning_rate": 3e-06, | |
| "loss": 76.3094, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.07707747892412686, | |
| "grad_norm": 284.0197448730469, | |
| "learning_rate": 3e-06, | |
| "loss": 49.468, | |
| "step": 864 | |
| }, | |
| { | |
| "completion_length": 144.1041717529297, | |
| "epoch": 0.07716668896917793, | |
| "grad_norm": 226.17822265625, | |
| "learning_rate": 3e-06, | |
| "loss": 56.9406, | |
| "reward": 2.379916787147522, | |
| "reward_std": 0.430880606174469, | |
| "rewards/correctness_reward_func": 1.7916666865348816, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.08824999909847975, | |
| "step": 865, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.077255899014229, | |
| "grad_norm": 186.55560302734375, | |
| "learning_rate": 3e-06, | |
| "loss": 52.0502, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.07734510905928008, | |
| "grad_norm": 184.80516052246094, | |
| "learning_rate": 3e-06, | |
| "loss": 46.5618, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.07743431910433114, | |
| "grad_norm": 178.5349884033203, | |
| "learning_rate": 3e-06, | |
| "loss": 46.2696, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.07752352914938222, | |
| "grad_norm": 148.83154296875, | |
| "learning_rate": 3e-06, | |
| "loss": 29.464, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.07761273919443329, | |
| "grad_norm": 161.14889526367188, | |
| "learning_rate": 3e-06, | |
| "loss": 51.0483, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.07770194923948437, | |
| "grad_norm": 192.32308959960938, | |
| "learning_rate": 3e-06, | |
| "loss": 47.9675, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.07779115928453544, | |
| "grad_norm": 152.79583740234375, | |
| "learning_rate": 3e-06, | |
| "loss": 43.9552, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.07788036932958652, | |
| "grad_norm": 151.7612762451172, | |
| "learning_rate": 3e-06, | |
| "loss": 38.7329, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.07796957937463758, | |
| "grad_norm": 133.1282196044922, | |
| "learning_rate": 3e-06, | |
| "loss": 38.5692, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.07805878941968866, | |
| "grad_norm": 103.07962036132812, | |
| "learning_rate": 3e-06, | |
| "loss": 24.2915, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.07814799946473973, | |
| "grad_norm": 129.3807373046875, | |
| "learning_rate": 3e-06, | |
| "loss": 43.1067, | |
| "step": 876 | |
| }, | |
| { | |
| "completion_length": 127.54166793823242, | |
| "epoch": 0.07823720950979081, | |
| "grad_norm": 125.33885192871094, | |
| "learning_rate": 3e-06, | |
| "loss": -23.6161, | |
| "reward": 1.667020857334137, | |
| "reward_std": 0.5597978234291077, | |
| "rewards/correctness_reward_func": 1.0833333432674408, | |
| "rewards/int_reward_func": 0.40625, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.17743750661611557, | |
| "step": 877, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07832641955484188, | |
| "grad_norm": 138.50013732910156, | |
| "learning_rate": 3e-06, | |
| "loss": -16.1757, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.07841562959989294, | |
| "grad_norm": 130.26280212402344, | |
| "learning_rate": 3e-06, | |
| "loss": -21.8816, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.07850483964494402, | |
| "grad_norm": 141.026123046875, | |
| "learning_rate": 3e-06, | |
| "loss": -22.0761, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.07859404968999509, | |
| "grad_norm": 126.53893280029297, | |
| "learning_rate": 3e-06, | |
| "loss": -23.4112, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.07868325973504617, | |
| "grad_norm": 153.45120239257812, | |
| "learning_rate": 3e-06, | |
| "loss": -17.7169, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.07877246978009723, | |
| "grad_norm": 122.84283447265625, | |
| "learning_rate": 3e-06, | |
| "loss": -24.8704, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.07886167982514831, | |
| "grad_norm": 157.95201110839844, | |
| "learning_rate": 3e-06, | |
| "loss": -16.2334, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.07895088987019938, | |
| "grad_norm": 136.01124572753906, | |
| "learning_rate": 3e-06, | |
| "loss": -23.3998, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.07904009991525046, | |
| "grad_norm": 135.98423767089844, | |
| "learning_rate": 3e-06, | |
| "loss": -23.6646, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.07912930996030153, | |
| "grad_norm": 131.24002075195312, | |
| "learning_rate": 3e-06, | |
| "loss": -25.7454, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.07921852000535261, | |
| "grad_norm": 124.26398468017578, | |
| "learning_rate": 3e-06, | |
| "loss": -20.446, | |
| "step": 888 | |
| }, | |
| { | |
| "completion_length": 153.58333587646484, | |
| "epoch": 0.07930773005040367, | |
| "grad_norm": 240.47964477539062, | |
| "learning_rate": 3e-06, | |
| "loss": -52.9796, | |
| "reward": 1.8959583044052124, | |
| "reward_std": 0.6871494352817535, | |
| "rewards/correctness_reward_func": 1.3333333432674408, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.10429166257381439, | |
| "step": 889, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.07939694009545475, | |
| "grad_norm": 205.4910430908203, | |
| "learning_rate": 3e-06, | |
| "loss": -52.7249, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.07948615014050582, | |
| "grad_norm": 242.2780303955078, | |
| "learning_rate": 3e-06, | |
| "loss": -67.3242, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.07957536018555689, | |
| "grad_norm": 262.0589599609375, | |
| "learning_rate": 3e-06, | |
| "loss": -66.9661, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.07966457023060797, | |
| "grad_norm": 186.11415100097656, | |
| "learning_rate": 3e-06, | |
| "loss": -68.4566, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.07975378027565903, | |
| "grad_norm": 254.95228576660156, | |
| "learning_rate": 3e-06, | |
| "loss": -66.0288, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.07984299032071011, | |
| "grad_norm": 270.2388000488281, | |
| "learning_rate": 3e-06, | |
| "loss": -59.1402, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.07993220036576118, | |
| "grad_norm": 232.1254119873047, | |
| "learning_rate": 3e-06, | |
| "loss": -58.3748, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.08002141041081226, | |
| "grad_norm": 423.2415466308594, | |
| "learning_rate": 3e-06, | |
| "loss": -74.5377, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.08011062045586333, | |
| "grad_norm": 289.6065673828125, | |
| "learning_rate": 3e-06, | |
| "loss": -76.1074, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.0801998305009144, | |
| "grad_norm": 212.4766845703125, | |
| "learning_rate": 3e-06, | |
| "loss": -74.2601, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.08028904054596547, | |
| "grad_norm": 286.6225891113281, | |
| "learning_rate": 3e-06, | |
| "loss": -74.3892, | |
| "step": 900 | |
| }, | |
| { | |
| "completion_length": 119.62500381469727, | |
| "epoch": 0.08037825059101655, | |
| "grad_norm": 123.23504638671875, | |
| "learning_rate": 3e-06, | |
| "loss": -22.0192, | |
| "reward": 2.5415626764297485, | |
| "reward_std": 0.190566536039114, | |
| "rewards/correctness_reward_func": 1.9166666865348816, | |
| "rewards/int_reward_func": 0.4791666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.1457291655242443, | |
| "step": 901, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08046746063606762, | |
| "grad_norm": 134.62196350097656, | |
| "learning_rate": 3e-06, | |
| "loss": -26.971, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.0805566706811187, | |
| "grad_norm": 211.32479858398438, | |
| "learning_rate": 3e-06, | |
| "loss": -31.3724, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.08064588072616977, | |
| "grad_norm": 160.55055236816406, | |
| "learning_rate": 3e-06, | |
| "loss": -22.228, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.08073509077122083, | |
| "grad_norm": 125.40478515625, | |
| "learning_rate": 3e-06, | |
| "loss": -21.2257, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.08082430081627191, | |
| "grad_norm": 111.1106948852539, | |
| "learning_rate": 3e-06, | |
| "loss": -22.3095, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.08091351086132298, | |
| "grad_norm": 122.1114501953125, | |
| "learning_rate": 3e-06, | |
| "loss": -24.7909, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.08100272090637406, | |
| "grad_norm": 156.01158142089844, | |
| "learning_rate": 3e-06, | |
| "loss": -31.0448, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.08109193095142513, | |
| "grad_norm": 158.0888214111328, | |
| "learning_rate": 3e-06, | |
| "loss": -35.1506, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.0811811409964762, | |
| "grad_norm": 156.11680603027344, | |
| "learning_rate": 3e-06, | |
| "loss": -26.2504, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.08127035104152727, | |
| "grad_norm": 136.36370849609375, | |
| "learning_rate": 3e-06, | |
| "loss": -24.5191, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.08135956108657835, | |
| "grad_norm": 138.4123077392578, | |
| "learning_rate": 3e-06, | |
| "loss": -25.2287, | |
| "step": 912 | |
| }, | |
| { | |
| "completion_length": 123.97917175292969, | |
| "epoch": 0.08144877113162942, | |
| "grad_norm": 69.13970184326172, | |
| "learning_rate": 3e-06, | |
| "loss": -4.2059, | |
| "reward": 2.349874973297119, | |
| "reward_std": 0.39924251288175583, | |
| "rewards/correctness_reward_func": 1.75, | |
| "rewards/int_reward_func": 0.4583333283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.14154166728258133, | |
| "step": 913, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0815379811766805, | |
| "grad_norm": 109.77488708496094, | |
| "learning_rate": 3e-06, | |
| "loss": -6.6804, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.08162719122173157, | |
| "grad_norm": 108.82147216796875, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8191, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.08171640126678265, | |
| "grad_norm": 88.40335083007812, | |
| "learning_rate": 3e-06, | |
| "loss": -5.8692, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.08180561131183371, | |
| "grad_norm": 76.1854019165039, | |
| "learning_rate": 3e-06, | |
| "loss": -1.3803, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.08189482135688479, | |
| "grad_norm": 94.09133911132812, | |
| "learning_rate": 3e-06, | |
| "loss": -2.3375, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.08198403140193586, | |
| "grad_norm": 84.88536071777344, | |
| "learning_rate": 3e-06, | |
| "loss": -5.6229, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.08207324144698692, | |
| "grad_norm": 92.1208267211914, | |
| "learning_rate": 3e-06, | |
| "loss": -7.5509, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.082162451492038, | |
| "grad_norm": 89.02661895751953, | |
| "learning_rate": 3e-06, | |
| "loss": 0.5948, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.08225166153708907, | |
| "grad_norm": 95.09249114990234, | |
| "learning_rate": 3e-06, | |
| "loss": -6.4904, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.08234087158214015, | |
| "grad_norm": 83.8741683959961, | |
| "learning_rate": 3e-06, | |
| "loss": -2.875, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.08243008162719122, | |
| "grad_norm": 129.45420837402344, | |
| "learning_rate": 3e-06, | |
| "loss": -3.085, | |
| "step": 924 | |
| }, | |
| { | |
| "completion_length": 115.27083587646484, | |
| "epoch": 0.0825192916722423, | |
| "grad_norm": 250.4253387451172, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4605, | |
| "reward": 2.1223334074020386, | |
| "reward_std": 0.7158277630805969, | |
| "rewards/correctness_reward_func": 1.5, | |
| "rewards/int_reward_func": 0.4166666716337204, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.205666683614254, | |
| "step": 925, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08260850171729336, | |
| "grad_norm": 318.8369140625, | |
| "learning_rate": 3e-06, | |
| "loss": 16.4358, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.08269771176234444, | |
| "grad_norm": 314.372314453125, | |
| "learning_rate": 3e-06, | |
| "loss": 5.4103, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.08278692180739551, | |
| "grad_norm": 255.00933837890625, | |
| "learning_rate": 3e-06, | |
| "loss": -3.1256, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.08287613185244659, | |
| "grad_norm": 357.3619384765625, | |
| "learning_rate": 3e-06, | |
| "loss": 3.4132, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.08296534189749766, | |
| "grad_norm": 409.3254089355469, | |
| "learning_rate": 3e-06, | |
| "loss": 23.0602, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.08305455194254874, | |
| "grad_norm": 270.6861877441406, | |
| "learning_rate": 3e-06, | |
| "loss": 8.4527, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.0831437619875998, | |
| "grad_norm": 507.520263671875, | |
| "learning_rate": 3e-06, | |
| "loss": 14.712, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.08323297203265087, | |
| "grad_norm": 281.0194091796875, | |
| "learning_rate": 3e-06, | |
| "loss": 4.5989, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.08332218207770195, | |
| "grad_norm": 275.3479309082031, | |
| "learning_rate": 3e-06, | |
| "loss": -5.1609, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.08341139212275302, | |
| "grad_norm": 358.3206481933594, | |
| "learning_rate": 3e-06, | |
| "loss": 3.3467, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.0835006021678041, | |
| "grad_norm": 403.45440673828125, | |
| "learning_rate": 3e-06, | |
| "loss": 20.7584, | |
| "step": 936 | |
| }, | |
| { | |
| "completion_length": 131.7291717529297, | |
| "epoch": 0.08358981221285516, | |
| "grad_norm": 112.89104461669922, | |
| "learning_rate": 3e-06, | |
| "loss": -12.4613, | |
| "reward": 1.9657083749771118, | |
| "reward_std": 0.33454202115535736, | |
| "rewards/correctness_reward_func": 1.3333333432674408, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.13237499818205833, | |
| "step": 937, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08367902225790624, | |
| "grad_norm": 158.0906524658203, | |
| "learning_rate": 3e-06, | |
| "loss": -6.7935, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.08376823230295731, | |
| "grad_norm": 127.69352722167969, | |
| "learning_rate": 3e-06, | |
| "loss": -15.3734, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.08385744234800839, | |
| "grad_norm": 207.05262756347656, | |
| "learning_rate": 3e-06, | |
| "loss": -25.0891, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.08394665239305946, | |
| "grad_norm": 546.4678344726562, | |
| "learning_rate": 3e-06, | |
| "loss": -29.0194, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.08403586243811054, | |
| "grad_norm": 141.02198791503906, | |
| "learning_rate": 3e-06, | |
| "loss": -10.7265, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.0841250724831616, | |
| "grad_norm": 137.6843719482422, | |
| "learning_rate": 3e-06, | |
| "loss": -13.3029, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.08421428252821268, | |
| "grad_norm": 211.74227905273438, | |
| "learning_rate": 3e-06, | |
| "loss": -8.8958, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.08430349257326375, | |
| "grad_norm": 123.87110900878906, | |
| "learning_rate": 3e-06, | |
| "loss": -16.9913, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.08439270261831482, | |
| "grad_norm": 206.8551025390625, | |
| "learning_rate": 3e-06, | |
| "loss": -26.9321, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.0844819126633659, | |
| "grad_norm": 193.33346557617188, | |
| "learning_rate": 3e-06, | |
| "loss": -30.975, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.08457112270841696, | |
| "grad_norm": 147.73297119140625, | |
| "learning_rate": 3e-06, | |
| "loss": -13.515, | |
| "step": 948 | |
| }, | |
| { | |
| "completion_length": 159.95833587646484, | |
| "epoch": 0.08466033275346804, | |
| "grad_norm": 186.3380889892578, | |
| "learning_rate": 3e-06, | |
| "loss": 9.8972, | |
| "reward": 1.8234166502952576, | |
| "reward_std": 0.42612800002098083, | |
| "rewards/correctness_reward_func": 1.2916666865348816, | |
| "rewards/int_reward_func": 0.4375, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.09425000101327896, | |
| "step": 949, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08474954279851911, | |
| "grad_norm": 178.0104522705078, | |
| "learning_rate": 3e-06, | |
| "loss": 10.3947, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.08483875284357019, | |
| "grad_norm": 160.924560546875, | |
| "learning_rate": 3e-06, | |
| "loss": 8.1323, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.08492796288862126, | |
| "grad_norm": 144.38978576660156, | |
| "learning_rate": 3e-06, | |
| "loss": 5.1531, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.08501717293367234, | |
| "grad_norm": 174.2298126220703, | |
| "learning_rate": 3e-06, | |
| "loss": 5.4548, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.0851063829787234, | |
| "grad_norm": 164.93479919433594, | |
| "learning_rate": 3e-06, | |
| "loss": 10.7603, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.08519559302377448, | |
| "grad_norm": 198.3860626220703, | |
| "learning_rate": 3e-06, | |
| "loss": 9.1507, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.08528480306882555, | |
| "grad_norm": 160.76519775390625, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4591, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.08537401311387663, | |
| "grad_norm": 170.39776611328125, | |
| "learning_rate": 3e-06, | |
| "loss": 7.0709, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.0854632231589277, | |
| "grad_norm": 145.32798767089844, | |
| "learning_rate": 3e-06, | |
| "loss": 4.1507, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.08555243320397876, | |
| "grad_norm": 170.50514221191406, | |
| "learning_rate": 3e-06, | |
| "loss": 4.4292, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.08564164324902984, | |
| "grad_norm": 197.32290649414062, | |
| "learning_rate": 3e-06, | |
| "loss": 10.1245, | |
| "step": 960 | |
| }, | |
| { | |
| "completion_length": 149.68750762939453, | |
| "epoch": 0.08573085329408091, | |
| "grad_norm": 530.301025390625, | |
| "learning_rate": 3e-06, | |
| "loss": 6.2918, | |
| "reward": 2.106416702270508, | |
| "reward_std": 0.5645134299993515, | |
| "rewards/correctness_reward_func": 1.5416666865348816, | |
| "rewards/int_reward_func": 0.4791666567325592, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.08558332687243819, | |
| "step": 961, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08582006333913199, | |
| "grad_norm": 593.3743286132812, | |
| "learning_rate": 3e-06, | |
| "loss": -27.7628, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.08590927338418305, | |
| "grad_norm": 378.6949157714844, | |
| "learning_rate": 3e-06, | |
| "loss": -25.6701, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.08599848342923413, | |
| "grad_norm": 402.416748046875, | |
| "learning_rate": 3e-06, | |
| "loss": 10.4982, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.0860876934742852, | |
| "grad_norm": 443.3346862792969, | |
| "learning_rate": 3e-06, | |
| "loss": -39.2448, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.08617690351933628, | |
| "grad_norm": 401.20574951171875, | |
| "learning_rate": 3e-06, | |
| "loss": -68.5925, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.08626611356438735, | |
| "grad_norm": 553.38720703125, | |
| "learning_rate": 3e-06, | |
| "loss": 1.8602, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.08635532360943843, | |
| "grad_norm": 628.0134887695312, | |
| "learning_rate": 3e-06, | |
| "loss": -36.1923, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.0864445336544895, | |
| "grad_norm": 380.9430847167969, | |
| "learning_rate": 3e-06, | |
| "loss": -32.3079, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.08653374369954057, | |
| "grad_norm": 385.8163146972656, | |
| "learning_rate": 3e-06, | |
| "loss": 3.3819, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.08662295374459164, | |
| "grad_norm": 432.78118896484375, | |
| "learning_rate": 3e-06, | |
| "loss": -46.8594, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.08671216378964271, | |
| "grad_norm": 439.5821533203125, | |
| "learning_rate": 3e-06, | |
| "loss": -77.4002, | |
| "step": 972 | |
| }, | |
| { | |
| "completion_length": 121.68750381469727, | |
| "epoch": 0.08680137383469379, | |
| "grad_norm": 35.27005386352539, | |
| "learning_rate": 3e-06, | |
| "loss": 3.8068, | |
| "reward": 2.284437596797943, | |
| "reward_std": 0.15820645913481712, | |
| "rewards/correctness_reward_func": 1.625, | |
| "rewards/int_reward_func": 0.5, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.15943749248981476, | |
| "step": 973, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08689058387974485, | |
| "grad_norm": 68.60123443603516, | |
| "learning_rate": 3e-06, | |
| "loss": 5.8697, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.08697979392479593, | |
| "grad_norm": 69.07678985595703, | |
| "learning_rate": 3e-06, | |
| "loss": 2.0005, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.087069003969847, | |
| "grad_norm": 39.30900955200195, | |
| "learning_rate": 3e-06, | |
| "loss": 6.0659, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.08715821401489808, | |
| "grad_norm": 77.51853942871094, | |
| "learning_rate": 3e-06, | |
| "loss": 5.9335, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.08724742405994915, | |
| "grad_norm": 60.07703399658203, | |
| "learning_rate": 3e-06, | |
| "loss": 3.043, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.08733663410500023, | |
| "grad_norm": 39.25843811035156, | |
| "learning_rate": 3e-06, | |
| "loss": 3.6489, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.0874258441500513, | |
| "grad_norm": 46.68893051147461, | |
| "learning_rate": 3e-06, | |
| "loss": 5.0957, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.08751505419510237, | |
| "grad_norm": 55.4852180480957, | |
| "learning_rate": 3e-06, | |
| "loss": 0.9956, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.08760426424015344, | |
| "grad_norm": 51.21168518066406, | |
| "learning_rate": 3e-06, | |
| "loss": 5.5624, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.08769347428520452, | |
| "grad_norm": 64.15937805175781, | |
| "learning_rate": 3e-06, | |
| "loss": 4.0323, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.08778268433025559, | |
| "grad_norm": 65.25579833984375, | |
| "learning_rate": 3e-06, | |
| "loss": 1.7638, | |
| "step": 984 | |
| }, | |
| { | |
| "completion_length": 138.68750762939453, | |
| "epoch": 0.08787189437530665, | |
| "grad_norm": 277.1241760253906, | |
| "learning_rate": 3e-06, | |
| "loss": 10.9365, | |
| "reward": 2.176750063896179, | |
| "reward_std": 0.40325865149497986, | |
| "rewards/correctness_reward_func": 1.5833333134651184, | |
| "rewards/int_reward_func": 0.4791666567325592, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.11425000056624413, | |
| "step": 985, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.08796110442035773, | |
| "grad_norm": 363.5508117675781, | |
| "learning_rate": 3e-06, | |
| "loss": -2.6393, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.0880503144654088, | |
| "grad_norm": 337.4767150878906, | |
| "learning_rate": 3e-06, | |
| "loss": 9.4007, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.08813952451045988, | |
| "grad_norm": 292.9395751953125, | |
| "learning_rate": 3e-06, | |
| "loss": -2.1309, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.08822873455551095, | |
| "grad_norm": 246.76112365722656, | |
| "learning_rate": 3e-06, | |
| "loss": -9.132, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.08831794460056203, | |
| "grad_norm": 267.3565368652344, | |
| "learning_rate": 3e-06, | |
| "loss": -11.7626, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.08840715464561309, | |
| "grad_norm": 257.4312438964844, | |
| "learning_rate": 3e-06, | |
| "loss": 5.7132, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.08849636469066417, | |
| "grad_norm": 317.8547058105469, | |
| "learning_rate": 3e-06, | |
| "loss": -9.7682, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.08858557473571524, | |
| "grad_norm": 260.3039855957031, | |
| "learning_rate": 3e-06, | |
| "loss": 2.4633, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.08867478478076632, | |
| "grad_norm": 261.14697265625, | |
| "learning_rate": 3e-06, | |
| "loss": -9.4669, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.08876399482581739, | |
| "grad_norm": 181.8609161376953, | |
| "learning_rate": 3e-06, | |
| "loss": -13.1009, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.08885320487086847, | |
| "grad_norm": 236.33563232421875, | |
| "learning_rate": 3e-06, | |
| "loss": -15.7579, | |
| "step": 996 | |
| }, | |
| { | |
| "completion_length": 168.9791717529297, | |
| "epoch": 0.08894241491591953, | |
| "grad_norm": 502.4021911621094, | |
| "learning_rate": 3e-06, | |
| "loss": -65.14, | |
| "reward": 1.9697707891464233, | |
| "reward_std": 0.505499929189682, | |
| "rewards/correctness_reward_func": 1.4166666269302368, | |
| "rewards/int_reward_func": 0.4895833283662796, | |
| "rewards/soft_format_reward_func": 0.0, | |
| "rewards/strict_format_reward_func": 0.0, | |
| "rewards/xmlcount_reward_func": 0.06352083757519722, | |
| "step": 997, | |
| "zero_std_ratio": 0.0 | |
| }, | |
| { | |
| "epoch": 0.0890316249609706, | |
| "grad_norm": 564.5300903320312, | |
| "learning_rate": 3e-06, | |
| "loss": -45.4159, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.08912083500602168, | |
| "grad_norm": 453.83160400390625, | |
| "learning_rate": 3e-06, | |
| "loss": -31.2339, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.08921004505107274, | |
| "grad_norm": 449.2571716308594, | |
| "learning_rate": 3e-06, | |
| "loss": -71.5707, | |
| "step": 1000 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 112090, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": true, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |