| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.0025012506253126563, |
| "eval_steps": 1000, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "completion_length": 218.8541717529297, |
| "epoch": 1.0005002501250626e-06, |
| "grad_norm": 7.439283847808838, |
| "learning_rate": 3e-09, |
| "loss": 7.2972, |
| "reward": 0.0396825447678566, |
| "reward_std": 0.06933804973959923, |
| "rewards/sudoku_reward_func": 0.0396825410425663, |
| "step": 1, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 2.001000500250125e-06, |
| "grad_norm": 7.519765853881836, |
| "learning_rate": 6e-09, |
| "loss": 7.7068, |
| "step": 2 |
| }, |
| { |
| "epoch": 3.0015007503751877e-06, |
| "grad_norm": 12.11757755279541, |
| "learning_rate": 9.000000000000001e-09, |
| "loss": 8.009, |
| "step": 3 |
| }, |
| { |
| "epoch": 4.00200100050025e-06, |
| "grad_norm": 7.8453369140625, |
| "learning_rate": 1.2e-08, |
| "loss": 6.9207, |
| "step": 4 |
| }, |
| { |
| "epoch": 5.002501250625313e-06, |
| "grad_norm": 7.893465995788574, |
| "learning_rate": 1.5000000000000002e-08, |
| "loss": 7.3495, |
| "step": 5 |
| }, |
| { |
| "epoch": 6.003001500750375e-06, |
| "grad_norm": 7.371875286102295, |
| "learning_rate": 1.8000000000000002e-08, |
| "loss": 7.7256, |
| "step": 6 |
| }, |
| { |
| "epoch": 7.003501750875438e-06, |
| "grad_norm": 12.777957916259766, |
| "learning_rate": 2.1e-08, |
| "loss": 8.0202, |
| "step": 7 |
| }, |
| { |
| "epoch": 8.0040020010005e-06, |
| "grad_norm": 7.41762638092041, |
| "learning_rate": 2.4e-08, |
| "loss": 6.9169, |
| "step": 8 |
| }, |
| { |
| "completion_length": 230.6041717529297, |
| "epoch": 9.004502251125562e-06, |
| "grad_norm": 7.3082780838012695, |
| "learning_rate": 2.7e-08, |
| "loss": 2.7803, |
| "reward": 0.02740951138548553, |
| "reward_std": 0.0447020698338747, |
| "rewards/sudoku_reward_func": 0.02740951138548553, |
| "step": 9, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 1.0005002501250626e-05, |
| "grad_norm": 6.063836097717285, |
| "learning_rate": 3.0000000000000004e-08, |
| "loss": 2.5601, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.1005502751375687e-05, |
| "grad_norm": 7.145730495452881, |
| "learning_rate": 3.3e-08, |
| "loss": 2.988, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.200600300150075e-05, |
| "grad_norm": 6.994553565979004, |
| "learning_rate": 3.6000000000000005e-08, |
| "loss": 3.2755, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.3006503251625812e-05, |
| "grad_norm": 8.28451919555664, |
| "learning_rate": 3.9e-08, |
| "loss": 2.7599, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.4007003501750876e-05, |
| "grad_norm": 6.728439807891846, |
| "learning_rate": 4.2e-08, |
| "loss": 2.518, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.5007503751875938e-05, |
| "grad_norm": 7.120697975158691, |
| "learning_rate": 4.5e-08, |
| "loss": 2.9896, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.6008004002001e-05, |
| "grad_norm": 7.128693103790283, |
| "learning_rate": 4.8e-08, |
| "loss": 3.3136, |
| "step": 16 |
| }, |
| { |
| "completion_length": 235.87500762939453, |
| "epoch": 1.7008504252126064e-05, |
| "grad_norm": 11.133870124816895, |
| "learning_rate": 5.100000000000001e-08, |
| "loss": 1.6499, |
| "reward": 0.06601355969905853, |
| "reward_std": 0.08363537862896919, |
| "rewards/sudoku_reward_func": 0.06601355969905853, |
| "step": 17, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 1.8009004502251124e-05, |
| "grad_norm": 13.384941101074219, |
| "learning_rate": 5.4e-08, |
| "loss": 1.5589, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.9009504752376188e-05, |
| "grad_norm": 9.533072471618652, |
| "learning_rate": 5.7e-08, |
| "loss": 1.4709, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.001000500250125e-05, |
| "grad_norm": 39.541507720947266, |
| "learning_rate": 6.000000000000001e-08, |
| "loss": 3.2441, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.1010505252626315e-05, |
| "grad_norm": 11.768808364868164, |
| "learning_rate": 6.300000000000001e-08, |
| "loss": 1.7169, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.2011005502751375e-05, |
| "grad_norm": 14.177431106567383, |
| "learning_rate": 6.6e-08, |
| "loss": 1.5112, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.3011505752876438e-05, |
| "grad_norm": 9.358668327331543, |
| "learning_rate": 6.9e-08, |
| "loss": 1.4272, |
| "step": 23 |
| }, |
| { |
| "epoch": 2.40120060030015e-05, |
| "grad_norm": 24.521499633789062, |
| "learning_rate": 7.200000000000001e-08, |
| "loss": 3.247, |
| "step": 24 |
| }, |
| { |
| "completion_length": 233.37500762939453, |
| "epoch": 2.5012506253126565e-05, |
| "grad_norm": 5.031166076660156, |
| "learning_rate": 7.500000000000001e-08, |
| "loss": 2.2568, |
| "reward": 0.022362764924764633, |
| "reward_std": 0.0353931887075305, |
| "rewards/sudoku_reward_func": 0.022362764924764633, |
| "step": 25, |
| "zero_std_ratio": 0.5 |
| }, |
| { |
| "epoch": 2.6013006503251625e-05, |
| "grad_norm": 5.3725972175598145, |
| "learning_rate": 7.8e-08, |
| "loss": 1.8075, |
| "step": 26 |
| }, |
| { |
| "epoch": 2.7013506753376688e-05, |
| "grad_norm": 5.61990213394165, |
| "learning_rate": 8.1e-08, |
| "loss": 1.7722, |
| "step": 27 |
| }, |
| { |
| "epoch": 2.8014007003501752e-05, |
| "grad_norm": 4.879170894622803, |
| "learning_rate": 8.4e-08, |
| "loss": 1.4841, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.9014507253626812e-05, |
| "grad_norm": 5.542116165161133, |
| "learning_rate": 8.700000000000001e-08, |
| "loss": 2.2553, |
| "step": 29 |
| }, |
| { |
| "epoch": 3.0015007503751875e-05, |
| "grad_norm": 6.498071193695068, |
| "learning_rate": 9e-08, |
| "loss": 1.7906, |
| "step": 30 |
| }, |
| { |
| "epoch": 3.1015507753876935e-05, |
| "grad_norm": 5.03284215927124, |
| "learning_rate": 9.3e-08, |
| "loss": 1.7829, |
| "step": 31 |
| }, |
| { |
| "epoch": 3.2016008004002e-05, |
| "grad_norm": 5.980422019958496, |
| "learning_rate": 9.6e-08, |
| "loss": 1.4834, |
| "step": 32 |
| }, |
| { |
| "completion_length": 236.77084350585938, |
| "epoch": 3.301650825412706e-05, |
| "grad_norm": 10.07999324798584, |
| "learning_rate": 9.900000000000001e-08, |
| "loss": 4.6713, |
| "reward": 0.045345570892095566, |
| "reward_std": 0.060756947845220566, |
| "rewards/sudoku_reward_func": 0.04534556902945042, |
| "step": 33, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 3.401700850425213e-05, |
| "grad_norm": 10.255942344665527, |
| "learning_rate": 1.0200000000000001e-07, |
| "loss": 2.6158, |
| "step": 34 |
| }, |
| { |
| "epoch": 3.501750875437719e-05, |
| "grad_norm": 8.722171783447266, |
| "learning_rate": 1.0500000000000001e-07, |
| "loss": 3.5174, |
| "step": 35 |
| }, |
| { |
| "epoch": 3.601800900450225e-05, |
| "grad_norm": 15.56158447265625, |
| "learning_rate": 1.08e-07, |
| "loss": 4.9071, |
| "step": 36 |
| }, |
| { |
| "epoch": 3.7018509254627316e-05, |
| "grad_norm": 9.570954322814941, |
| "learning_rate": 1.11e-07, |
| "loss": 4.6103, |
| "step": 37 |
| }, |
| { |
| "epoch": 3.8019009504752376e-05, |
| "grad_norm": 9.923803329467773, |
| "learning_rate": 1.14e-07, |
| "loss": 2.5701, |
| "step": 38 |
| }, |
| { |
| "epoch": 3.9019509754877436e-05, |
| "grad_norm": 8.636165618896484, |
| "learning_rate": 1.17e-07, |
| "loss": 3.5118, |
| "step": 39 |
| }, |
| { |
| "epoch": 4.00200100050025e-05, |
| "grad_norm": 13.333678245544434, |
| "learning_rate": 1.2000000000000002e-07, |
| "loss": 4.9252, |
| "step": 40 |
| }, |
| { |
| "completion_length": 238.06250762939453, |
| "epoch": 4.102051025512756e-05, |
| "grad_norm": 7.653266429901123, |
| "learning_rate": 1.23e-07, |
| "loss": -1.3987, |
| "reward": 0.03835979010909796, |
| "reward_std": 0.06741865165531635, |
| "rewards/sudoku_reward_func": 0.03835978824645281, |
| "step": 41, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 4.202101050525263e-05, |
| "grad_norm": 9.044347763061523, |
| "learning_rate": 1.2600000000000002e-07, |
| "loss": -1.8583, |
| "step": 42 |
| }, |
| { |
| "epoch": 4.302151075537769e-05, |
| "grad_norm": 9.422472953796387, |
| "learning_rate": 1.29e-07, |
| "loss": -1.1338, |
| "step": 43 |
| }, |
| { |
| "epoch": 4.402201100550275e-05, |
| "grad_norm": 9.382232666015625, |
| "learning_rate": 1.32e-07, |
| "loss": -1.1015, |
| "step": 44 |
| }, |
| { |
| "epoch": 4.5022511255627816e-05, |
| "grad_norm": 7.433849334716797, |
| "learning_rate": 1.35e-07, |
| "loss": -1.3966, |
| "step": 45 |
| }, |
| { |
| "epoch": 4.6023011505752876e-05, |
| "grad_norm": 8.31912612915039, |
| "learning_rate": 1.38e-07, |
| "loss": -1.8804, |
| "step": 46 |
| }, |
| { |
| "epoch": 4.7023511755877936e-05, |
| "grad_norm": 9.417556762695312, |
| "learning_rate": 1.41e-07, |
| "loss": -1.1407, |
| "step": 47 |
| }, |
| { |
| "epoch": 4.8024012006003e-05, |
| "grad_norm": 9.155328750610352, |
| "learning_rate": 1.4400000000000002e-07, |
| "loss": -1.1026, |
| "step": 48 |
| }, |
| { |
| "completion_length": 238.95834350585938, |
| "epoch": 4.902451225612806e-05, |
| "grad_norm": 9.605969429016113, |
| "learning_rate": 1.47e-07, |
| "loss": 8.5284, |
| "reward": 0.038690481800585985, |
| "reward_std": 0.06603045156225562, |
| "rewards/sudoku_reward_func": 0.03869047784246504, |
| "step": 49, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 5.002501250625313e-05, |
| "grad_norm": 8.600493431091309, |
| "learning_rate": 1.5000000000000002e-07, |
| "loss": 8.2116, |
| "step": 50 |
| }, |
| { |
| "epoch": 5.102551275637819e-05, |
| "grad_norm": 9.14062213897705, |
| "learning_rate": 1.53e-07, |
| "loss": 8.6505, |
| "step": 51 |
| }, |
| { |
| "epoch": 5.202601300650325e-05, |
| "grad_norm": 10.15638256072998, |
| "learning_rate": 1.56e-07, |
| "loss": 8.738, |
| "step": 52 |
| }, |
| { |
| "epoch": 5.3026513256628317e-05, |
| "grad_norm": 9.743338584899902, |
| "learning_rate": 1.59e-07, |
| "loss": 8.5234, |
| "step": 53 |
| }, |
| { |
| "epoch": 5.4027013506753377e-05, |
| "grad_norm": 8.36423110961914, |
| "learning_rate": 1.62e-07, |
| "loss": 8.2441, |
| "step": 54 |
| }, |
| { |
| "epoch": 5.502751375687844e-05, |
| "grad_norm": 9.903505325317383, |
| "learning_rate": 1.65e-07, |
| "loss": 8.6868, |
| "step": 55 |
| }, |
| { |
| "epoch": 5.6028014007003503e-05, |
| "grad_norm": 10.892803192138672, |
| "learning_rate": 1.68e-07, |
| "loss": 8.8243, |
| "step": 56 |
| }, |
| { |
| "completion_length": 231.58333587646484, |
| "epoch": 5.7028514257128563e-05, |
| "grad_norm": 8.197606086730957, |
| "learning_rate": 1.71e-07, |
| "loss": 2.8601, |
| "reward": 0.03100198693573475, |
| "reward_std": 0.05434095114469528, |
| "rewards/sudoku_reward_func": 0.031001986004412174, |
| "step": 57, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 5.8029014507253623e-05, |
| "grad_norm": 11.048020362854004, |
| "learning_rate": 1.7400000000000002e-07, |
| "loss": 3.2602, |
| "step": 58 |
| }, |
| { |
| "epoch": 5.902951475737869e-05, |
| "grad_norm": 6.528013706207275, |
| "learning_rate": 1.7699999999999998e-07, |
| "loss": 2.1119, |
| "step": 59 |
| }, |
| { |
| "epoch": 6.003001500750375e-05, |
| "grad_norm": 11.171043395996094, |
| "learning_rate": 1.8e-07, |
| "loss": 3.1792, |
| "step": 60 |
| }, |
| { |
| "epoch": 6.103051525762882e-05, |
| "grad_norm": 8.444694519042969, |
| "learning_rate": 1.83e-07, |
| "loss": 2.8315, |
| "step": 61 |
| }, |
| { |
| "epoch": 6.203101550775387e-05, |
| "grad_norm": 9.485188484191895, |
| "learning_rate": 1.86e-07, |
| "loss": 3.2579, |
| "step": 62 |
| }, |
| { |
| "epoch": 6.303151575787894e-05, |
| "grad_norm": 6.87290096282959, |
| "learning_rate": 1.89e-07, |
| "loss": 2.1385, |
| "step": 63 |
| }, |
| { |
| "epoch": 6.4032016008004e-05, |
| "grad_norm": 12.49797248840332, |
| "learning_rate": 1.92e-07, |
| "loss": 3.1374, |
| "step": 64 |
| }, |
| { |
| "completion_length": 243.7916717529297, |
| "epoch": 6.503251625812907e-05, |
| "grad_norm": 16.387470245361328, |
| "learning_rate": 1.95e-07, |
| "loss": 5.5435, |
| "reward": 0.039468348026275635, |
| "reward_std": 0.07617796957492828, |
| "rewards/sudoku_reward_func": 0.039468344300985336, |
| "step": 65, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 6.603301650825412e-05, |
| "grad_norm": 16.877925872802734, |
| "learning_rate": 1.9800000000000003e-07, |
| "loss": 4.5106, |
| "step": 66 |
| }, |
| { |
| "epoch": 6.703351675837919e-05, |
| "grad_norm": 16.774993896484375, |
| "learning_rate": 2.01e-07, |
| "loss": 6.3509, |
| "step": 67 |
| }, |
| { |
| "epoch": 6.803401700850426e-05, |
| "grad_norm": 9.752405166625977, |
| "learning_rate": 2.0400000000000003e-07, |
| "loss": 4.2743, |
| "step": 68 |
| }, |
| { |
| "epoch": 6.903451725862931e-05, |
| "grad_norm": 13.019120216369629, |
| "learning_rate": 2.0700000000000001e-07, |
| "loss": 5.4941, |
| "step": 69 |
| }, |
| { |
| "epoch": 7.003501750875438e-05, |
| "grad_norm": 15.15886402130127, |
| "learning_rate": 2.1000000000000003e-07, |
| "loss": 4.561, |
| "step": 70 |
| }, |
| { |
| "epoch": 7.103551775887944e-05, |
| "grad_norm": 17.407318115234375, |
| "learning_rate": 2.13e-07, |
| "loss": 6.3596, |
| "step": 71 |
| }, |
| { |
| "epoch": 7.20360180090045e-05, |
| "grad_norm": 9.901360511779785, |
| "learning_rate": 2.16e-07, |
| "loss": 4.3023, |
| "step": 72 |
| }, |
| { |
| "completion_length": 242.58333587646484, |
| "epoch": 7.303651825912956e-05, |
| "grad_norm": 7.898802280426025, |
| "learning_rate": 2.19e-07, |
| "loss": 3.3781, |
| "reward": 0.03922784514725208, |
| "reward_std": 0.06382527574896812, |
| "rewards/sudoku_reward_func": 0.039227843284606934, |
| "step": 73, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 7.403701850925463e-05, |
| "grad_norm": 10.132791519165039, |
| "learning_rate": 2.22e-07, |
| "loss": 3.9789, |
| "step": 74 |
| }, |
| { |
| "epoch": 7.503751875937968e-05, |
| "grad_norm": 9.61319351196289, |
| "learning_rate": 2.25e-07, |
| "loss": 4.1563, |
| "step": 75 |
| }, |
| { |
| "epoch": 7.603801900950475e-05, |
| "grad_norm": 10.665925979614258, |
| "learning_rate": 2.28e-07, |
| "loss": 3.454, |
| "step": 76 |
| }, |
| { |
| "epoch": 7.703851925962982e-05, |
| "grad_norm": 8.118515014648438, |
| "learning_rate": 2.31e-07, |
| "loss": 3.4117, |
| "step": 77 |
| }, |
| { |
| "epoch": 7.803901950975487e-05, |
| "grad_norm": 7.520627975463867, |
| "learning_rate": 2.34e-07, |
| "loss": 3.9269, |
| "step": 78 |
| }, |
| { |
| "epoch": 7.903951975987994e-05, |
| "grad_norm": 10.15380573272705, |
| "learning_rate": 2.3700000000000002e-07, |
| "loss": 4.1432, |
| "step": 79 |
| }, |
| { |
| "epoch": 8.0040020010005e-05, |
| "grad_norm": 10.807955741882324, |
| "learning_rate": 2.4000000000000003e-07, |
| "loss": 3.4715, |
| "step": 80 |
| }, |
| { |
| "completion_length": 233.93750762939453, |
| "epoch": 8.104052026013007e-05, |
| "grad_norm": 8.597204208374023, |
| "learning_rate": 2.43e-07, |
| "loss": -0.6342, |
| "reward": 0.04493221268057823, |
| "reward_std": 0.08249906450510025, |
| "rewards/sudoku_reward_func": 0.04493220895528793, |
| "step": 81, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 8.204102051025512e-05, |
| "grad_norm": 9.011427879333496, |
| "learning_rate": 2.46e-07, |
| "loss": -1.2335, |
| "step": 82 |
| }, |
| { |
| "epoch": 8.304152076038019e-05, |
| "grad_norm": 13.026226997375488, |
| "learning_rate": 2.49e-07, |
| "loss": 0.0816, |
| "step": 83 |
| }, |
| { |
| "epoch": 8.404202101050526e-05, |
| "grad_norm": 9.908291816711426, |
| "learning_rate": 2.5200000000000003e-07, |
| "loss": -0.9982, |
| "step": 84 |
| }, |
| { |
| "epoch": 8.504252126063031e-05, |
| "grad_norm": 9.28254222869873, |
| "learning_rate": 2.5500000000000005e-07, |
| "loss": -0.5884, |
| "step": 85 |
| }, |
| { |
| "epoch": 8.604302151075538e-05, |
| "grad_norm": 8.442070960998535, |
| "learning_rate": 2.58e-07, |
| "loss": -1.2796, |
| "step": 86 |
| }, |
| { |
| "epoch": 8.704352176088045e-05, |
| "grad_norm": 12.563162803649902, |
| "learning_rate": 2.6099999999999997e-07, |
| "loss": 0.0909, |
| "step": 87 |
| }, |
| { |
| "epoch": 8.80440220110055e-05, |
| "grad_norm": 8.839503288269043, |
| "learning_rate": 2.64e-07, |
| "loss": -0.9965, |
| "step": 88 |
| }, |
| { |
| "completion_length": 244.83334350585938, |
| "epoch": 8.904452226113057e-05, |
| "grad_norm": 8.823372840881348, |
| "learning_rate": 2.67e-07, |
| "loss": 0.4445, |
| "reward": 0.03761574160307646, |
| "reward_std": 0.06432248279452324, |
| "rewards/sudoku_reward_func": 0.03761574160307646, |
| "step": 89, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 9.004502251125563e-05, |
| "grad_norm": 8.954886436462402, |
| "learning_rate": 2.7e-07, |
| "loss": 0.5046, |
| "step": 90 |
| }, |
| { |
| "epoch": 9.104552276138069e-05, |
| "grad_norm": 10.275053024291992, |
| "learning_rate": 2.73e-07, |
| "loss": 1.1653, |
| "step": 91 |
| }, |
| { |
| "epoch": 9.204602301150575e-05, |
| "grad_norm": 19.105579376220703, |
| "learning_rate": 2.76e-07, |
| "loss": 2.4353, |
| "step": 92 |
| }, |
| { |
| "epoch": 9.304652326163082e-05, |
| "grad_norm": 8.878145217895508, |
| "learning_rate": 2.79e-07, |
| "loss": 0.4359, |
| "step": 93 |
| }, |
| { |
| "epoch": 9.404702351175587e-05, |
| "grad_norm": 9.172167778015137, |
| "learning_rate": 2.82e-07, |
| "loss": 0.5311, |
| "step": 94 |
| }, |
| { |
| "epoch": 9.504752376188094e-05, |
| "grad_norm": 10.269847869873047, |
| "learning_rate": 2.85e-07, |
| "loss": 1.1261, |
| "step": 95 |
| }, |
| { |
| "epoch": 9.6048024012006e-05, |
| "grad_norm": 20.466459274291992, |
| "learning_rate": 2.8800000000000004e-07, |
| "loss": 2.4395, |
| "step": 96 |
| }, |
| { |
| "completion_length": 239.14583587646484, |
| "epoch": 9.704852426213106e-05, |
| "grad_norm": 15.091897964477539, |
| "learning_rate": 2.91e-07, |
| "loss": 7.7526, |
| "reward": 0.05989583395421505, |
| "reward_std": 0.10059662535786629, |
| "rewards/sudoku_reward_func": 0.05989583395421505, |
| "step": 97, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 9.804902451225613e-05, |
| "grad_norm": 14.478265762329102, |
| "learning_rate": 2.94e-07, |
| "loss": 8.9155, |
| "step": 98 |
| }, |
| { |
| "epoch": 9.904952476238119e-05, |
| "grad_norm": 11.164327621459961, |
| "learning_rate": 2.97e-07, |
| "loss": 6.5003, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.00010005002501250626, |
| "grad_norm": 10.038124084472656, |
| "learning_rate": 3.0000000000000004e-07, |
| "loss": 7.4044, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.00010105052526263131, |
| "grad_norm": 14.379571914672852, |
| "learning_rate": 3.0300000000000005e-07, |
| "loss": 7.8001, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.00010205102551275638, |
| "grad_norm": 14.172938346862793, |
| "learning_rate": 3.06e-07, |
| "loss": 8.9515, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.00010305152576288145, |
| "grad_norm": 11.172471046447754, |
| "learning_rate": 3.09e-07, |
| "loss": 6.5193, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0001040520260130065, |
| "grad_norm": 11.093944549560547, |
| "learning_rate": 3.12e-07, |
| "loss": 7.3289, |
| "step": 104 |
| }, |
| { |
| "completion_length": 229.89584350585938, |
| "epoch": 0.00010505252626313157, |
| "grad_norm": 9.635990142822266, |
| "learning_rate": 3.15e-07, |
| "loss": -1.1911, |
| "reward": 0.04885912872850895, |
| "reward_std": 0.0644612517207861, |
| "rewards/sudoku_reward_func": 0.048859127797186375, |
| "step": 105, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00010605302651325663, |
| "grad_norm": 9.855486869812012, |
| "learning_rate": 3.18e-07, |
| "loss": -0.616, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.00010705352676338169, |
| "grad_norm": 10.122713088989258, |
| "learning_rate": 3.21e-07, |
| "loss": -0.7726, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.00010805402701350675, |
| "grad_norm": 9.637285232543945, |
| "learning_rate": 3.24e-07, |
| "loss": -1.0351, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.00010905452726363182, |
| "grad_norm": 10.141641616821289, |
| "learning_rate": 3.27e-07, |
| "loss": -1.1738, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.00011005502751375687, |
| "grad_norm": 10.051895141601562, |
| "learning_rate": 3.3e-07, |
| "loss": -0.581, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.00011105552776388194, |
| "grad_norm": 11.444948196411133, |
| "learning_rate": 3.3300000000000003e-07, |
| "loss": -0.7803, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.00011205602801400701, |
| "grad_norm": 9.775928497314453, |
| "learning_rate": 3.36e-07, |
| "loss": -0.9849, |
| "step": 112 |
| }, |
| { |
| "completion_length": 217.5625, |
| "epoch": 0.00011305652826413206, |
| "grad_norm": 15.8468656539917, |
| "learning_rate": 3.39e-07, |
| "loss": 12.9496, |
| "reward": 0.042493388056755066, |
| "reward_std": 0.07390820980072021, |
| "rewards/sudoku_reward_func": 0.042493388056755066, |
| "step": 113, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00011405702851425713, |
| "grad_norm": 11.421664237976074, |
| "learning_rate": 3.42e-07, |
| "loss": 10.6176, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0001150575287643822, |
| "grad_norm": 11.024772644042969, |
| "learning_rate": 3.4500000000000003e-07, |
| "loss": 12.5574, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.00011605802901450725, |
| "grad_norm": 10.388362884521484, |
| "learning_rate": 3.4800000000000005e-07, |
| "loss": 12.2676, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.00011705852926463231, |
| "grad_norm": 17.08087921142578, |
| "learning_rate": 3.51e-07, |
| "loss": 13.0406, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.00011805902951475738, |
| "grad_norm": 10.449882507324219, |
| "learning_rate": 3.5399999999999997e-07, |
| "loss": 10.5809, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.00011905952976488245, |
| "grad_norm": 11.035545349121094, |
| "learning_rate": 3.57e-07, |
| "loss": 12.5377, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.0001200600300150075, |
| "grad_norm": 10.570043563842773, |
| "learning_rate": 3.6e-07, |
| "loss": 12.2341, |
| "step": 120 |
| }, |
| { |
| "completion_length": 242.06250762939453, |
| "epoch": 0.00012106053026513257, |
| "grad_norm": 11.524609565734863, |
| "learning_rate": 3.63e-07, |
| "loss": -0.1825, |
| "reward": 0.05667162872850895, |
| "reward_std": 0.0804666131734848, |
| "rewards/sudoku_reward_func": 0.056671624071896076, |
| "step": 121, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00012206103051525763, |
| "grad_norm": 12.43232536315918, |
| "learning_rate": 3.66e-07, |
| "loss": -0.3778, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0001230615307653827, |
| "grad_norm": 23.712289810180664, |
| "learning_rate": 3.69e-07, |
| "loss": -2.2729, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.00012406203101550774, |
| "grad_norm": 14.794097900390625, |
| "learning_rate": 3.72e-07, |
| "loss": 0.5418, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.0001250625312656328, |
| "grad_norm": 10.758546829223633, |
| "learning_rate": 3.75e-07, |
| "loss": -0.2122, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.00012606303151575787, |
| "grad_norm": 11.061405181884766, |
| "learning_rate": 3.78e-07, |
| "loss": -0.43, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.00012706353176588294, |
| "grad_norm": 15.273740768432617, |
| "learning_rate": 3.8100000000000004e-07, |
| "loss": -2.2138, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.000128064032016008, |
| "grad_norm": 14.798585891723633, |
| "learning_rate": 3.84e-07, |
| "loss": 0.4373, |
| "step": 128 |
| }, |
| { |
| "completion_length": 232.83333587646484, |
| "epoch": 0.00012906453226613307, |
| "grad_norm": 5.877110004425049, |
| "learning_rate": 3.87e-07, |
| "loss": 1.6496, |
| "reward": 0.02467758022248745, |
| "reward_std": 0.05041925609111786, |
| "rewards/sudoku_reward_func": 0.02467758022248745, |
| "step": 129, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 0.00013006503251625814, |
| "grad_norm": 6.033883094787598, |
| "learning_rate": 3.9e-07, |
| "loss": 0.7818, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.00013106553276638318, |
| "grad_norm": 6.772444725036621, |
| "learning_rate": 3.9300000000000004e-07, |
| "loss": 0.9868, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.00013206603301650825, |
| "grad_norm": 6.536540508270264, |
| "learning_rate": 3.9600000000000005e-07, |
| "loss": 0.3655, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.00013306653326663331, |
| "grad_norm": 5.516957759857178, |
| "learning_rate": 3.99e-07, |
| "loss": 1.6722, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.00013406703351675838, |
| "grad_norm": 6.0041046142578125, |
| "learning_rate": 4.02e-07, |
| "loss": 0.8035, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.00013506753376688345, |
| "grad_norm": 7.185412883758545, |
| "learning_rate": 4.0500000000000004e-07, |
| "loss": 0.9725, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.00013606803401700852, |
| "grad_norm": 6.690100193023682, |
| "learning_rate": 4.0800000000000005e-07, |
| "loss": 0.3469, |
| "step": 136 |
| }, |
| { |
| "completion_length": 238.33334350585938, |
| "epoch": 0.00013706853426713355, |
| "grad_norm": 6.418752193450928, |
| "learning_rate": 4.1100000000000007e-07, |
| "loss": 1.8285, |
| "reward": 0.023892195895314217, |
| "reward_std": 0.047397417947649956, |
| "rewards/sudoku_reward_func": 0.023892195895314217, |
| "step": 137, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 0.00013806903451725862, |
| "grad_norm": 6.306288719177246, |
| "learning_rate": 4.1400000000000003e-07, |
| "loss": 2.1302, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.0001390695347673837, |
| "grad_norm": 8.030327796936035, |
| "learning_rate": 4.1700000000000004e-07, |
| "loss": 1.4886, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.00014007003501750876, |
| "grad_norm": 7.097723484039307, |
| "learning_rate": 4.2000000000000006e-07, |
| "loss": 2.2219, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.00014107053526763382, |
| "grad_norm": 5.7919511795043945, |
| "learning_rate": 4.2299999999999996e-07, |
| "loss": 1.8461, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.0001420710355177589, |
| "grad_norm": 6.026665210723877, |
| "learning_rate": 4.26e-07, |
| "loss": 2.0779, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.00014307153576788393, |
| "grad_norm": 8.162043571472168, |
| "learning_rate": 4.29e-07, |
| "loss": 1.5113, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.000144072036018009, |
| "grad_norm": 6.930771350860596, |
| "learning_rate": 4.32e-07, |
| "loss": 2.2161, |
| "step": 144 |
| }, |
| { |
| "completion_length": 227.9166717529297, |
| "epoch": 0.00014507253626813406, |
| "grad_norm": 8.324076652526855, |
| "learning_rate": 4.3499999999999996e-07, |
| "loss": -2.2184, |
| "reward": 0.034808654338121414, |
| "reward_std": 0.06299007683992386, |
| "rewards/sudoku_reward_func": 0.034808652475476265, |
| "step": 145, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 0.00014607303651825913, |
| "grad_norm": 8.019487380981445, |
| "learning_rate": 4.38e-07, |
| "loss": -1.9534, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.0001470735367683842, |
| "grad_norm": 8.434709548950195, |
| "learning_rate": 4.41e-07, |
| "loss": -1.8272, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.00014807403701850926, |
| "grad_norm": 6.320549488067627, |
| "learning_rate": 4.44e-07, |
| "loss": -2.1752, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.00014907453726863433, |
| "grad_norm": 8.163012504577637, |
| "learning_rate": 4.4699999999999997e-07, |
| "loss": -2.1973, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.00015007503751875937, |
| "grad_norm": 8.064225196838379, |
| "learning_rate": 4.5e-07, |
| "loss": -1.9865, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.00015107553776888444, |
| "grad_norm": 8.826040267944336, |
| "learning_rate": 4.53e-07, |
| "loss": -1.7932, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.0001520760380190095, |
| "grad_norm": 7.065883636474609, |
| "learning_rate": 4.56e-07, |
| "loss": -2.189, |
| "step": 152 |
| }, |
| { |
| "completion_length": 219.7291717529297, |
| "epoch": 0.00015307653826913457, |
| "grad_norm": 16.795761108398438, |
| "learning_rate": 4.59e-07, |
| "loss": 7.2122, |
| "reward": 0.03798776492476463, |
| "reward_std": 0.0777844786643982, |
| "rewards/sudoku_reward_func": 0.03798776492476463, |
| "step": 153, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00015407703851925964, |
| "grad_norm": 8.526628494262695, |
| "learning_rate": 4.62e-07, |
| "loss": 6.6345, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.0001550775387693847, |
| "grad_norm": 8.999878883361816, |
| "learning_rate": 4.65e-07, |
| "loss": 6.9569, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.00015607803901950974, |
| "grad_norm": 9.370837211608887, |
| "learning_rate": 4.68e-07, |
| "loss": 7.0462, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.0001570785392696348, |
| "grad_norm": 19.611669540405273, |
| "learning_rate": 4.71e-07, |
| "loss": 7.1868, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.00015807903951975988, |
| "grad_norm": 8.714893341064453, |
| "learning_rate": 4.7400000000000004e-07, |
| "loss": 6.6027, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.00015907953976988494, |
| "grad_norm": 8.8904447555542, |
| "learning_rate": 4.77e-07, |
| "loss": 6.9645, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.00016008004002001, |
| "grad_norm": 9.179741859436035, |
| "learning_rate": 4.800000000000001e-07, |
| "loss": 7.0361, |
| "step": 160 |
| }, |
| { |
| "completion_length": 236.89584350585938, |
| "epoch": 0.00016108054027013508, |
| "grad_norm": 7.844521522521973, |
| "learning_rate": 4.830000000000001e-07, |
| "loss": 3.4178, |
| "reward": 0.03401951119303703, |
| "reward_std": 0.053184038028120995, |
| "rewards/sudoku_reward_func": 0.034019509330391884, |
| "step": 161, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00016208104052026014, |
| "grad_norm": 6.946485996246338, |
| "learning_rate": 4.86e-07, |
| "loss": 3.1326, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.00016308154077038518, |
| "grad_norm": 8.468039512634277, |
| "learning_rate": 4.89e-07, |
| "loss": 3.4034, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.00016408204102051025, |
| "grad_norm": 7.4369330406188965, |
| "learning_rate": 4.92e-07, |
| "loss": 3.0264, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.00016508254127063532, |
| "grad_norm": 8.125073432922363, |
| "learning_rate": 4.95e-07, |
| "loss": 3.4745, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.00016608304152076038, |
| "grad_norm": 6.560446739196777, |
| "learning_rate": 4.98e-07, |
| "loss": 3.0914, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.00016708354177088545, |
| "grad_norm": 9.359310150146484, |
| "learning_rate": 5.01e-07, |
| "loss": 3.4344, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.00016808404202101052, |
| "grad_norm": 7.4871368408203125, |
| "learning_rate": 5.040000000000001e-07, |
| "loss": 3.0149, |
| "step": 168 |
| }, |
| { |
| "completion_length": 229.06250762939453, |
| "epoch": 0.00016908454227113556, |
| "grad_norm": 10.238948822021484, |
| "learning_rate": 5.070000000000001e-07, |
| "loss": 1.0765, |
| "reward": 0.053778110072016716, |
| "reward_std": 0.09089003875851631, |
| "rewards/sudoku_reward_func": 0.05377810634672642, |
| "step": 169, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00017008504252126062, |
| "grad_norm": 11.512720108032227, |
| "learning_rate": 5.100000000000001e-07, |
| "loss": 2.2342, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.0001710855427713857, |
| "grad_norm": 11.702943801879883, |
| "learning_rate": 5.13e-07, |
| "loss": 0.7152, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.00017208604302151076, |
| "grad_norm": 13.017841339111328, |
| "learning_rate": 5.16e-07, |
| "loss": 1.9784, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.00017308654327163582, |
| "grad_norm": 10.525778770446777, |
| "learning_rate": 5.189999999999999e-07, |
| "loss": 1.0766, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.0001740870435217609, |
| "grad_norm": 11.924694061279297, |
| "learning_rate": 5.219999999999999e-07, |
| "loss": 2.2755, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.00017508754377188593, |
| "grad_norm": 10.760505676269531, |
| "learning_rate": 5.25e-07, |
| "loss": 0.7167, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.000176088044022011, |
| "grad_norm": 11.227055549621582, |
| "learning_rate": 5.28e-07, |
| "loss": 2.0339, |
| "step": 176 |
| }, |
| { |
| "completion_length": 230.8541717529297, |
| "epoch": 0.00017708854427213606, |
| "grad_norm": 16.376419067382812, |
| "learning_rate": 5.31e-07, |
| "loss": 9.7452, |
| "reward": 0.03583829663693905, |
| "reward_std": 0.06714264582842588, |
| "rewards/sudoku_reward_func": 0.03583829663693905, |
| "step": 177, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 0.00017808904452226113, |
| "grad_norm": 11.184220314025879, |
| "learning_rate": 5.34e-07, |
| "loss": 9.9333, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.0001790895447723862, |
| "grad_norm": 11.791999816894531, |
| "learning_rate": 5.37e-07, |
| "loss": 9.8864, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.00018009004502251126, |
| "grad_norm": 10.82250690460205, |
| "learning_rate": 5.4e-07, |
| "loss": 10.3131, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.00018109054527263633, |
| "grad_norm": 16.504940032958984, |
| "learning_rate": 5.43e-07, |
| "loss": 9.7328, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.00018209104552276137, |
| "grad_norm": 11.314173698425293, |
| "learning_rate": 5.46e-07, |
| "loss": 9.9869, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.00018309154577288644, |
| "grad_norm": 11.449384689331055, |
| "learning_rate": 5.49e-07, |
| "loss": 9.9188, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.0001840920460230115, |
| "grad_norm": 11.834486961364746, |
| "learning_rate": 5.52e-07, |
| "loss": 10.3294, |
| "step": 184 |
| }, |
| { |
| "completion_length": 223.68750762939453, |
| "epoch": 0.00018509254627313657, |
| "grad_norm": 7.6580023765563965, |
| "learning_rate": 5.55e-07, |
| "loss": 1.6827, |
| "reward": 0.023478839080780745, |
| "reward_std": 0.05215226113796234, |
| "rewards/sudoku_reward_func": 0.023478837218135595, |
| "step": 185, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00018609304652326164, |
| "grad_norm": 7.352197647094727, |
| "learning_rate": 5.58e-07, |
| "loss": 1.1462, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.0001870935467733867, |
| "grad_norm": 8.335673332214355, |
| "learning_rate": 5.61e-07, |
| "loss": 1.429, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.00018809404702351174, |
| "grad_norm": 9.88043212890625, |
| "learning_rate": 5.64e-07, |
| "loss": 1.4269, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.0001890945472736368, |
| "grad_norm": 11.749013900756836, |
| "learning_rate": 5.67e-07, |
| "loss": 1.7256, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.00019009504752376188, |
| "grad_norm": 7.314451694488525, |
| "learning_rate": 5.7e-07, |
| "loss": 1.139, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.00019109554777388694, |
| "grad_norm": 8.525728225708008, |
| "learning_rate": 5.730000000000001e-07, |
| "loss": 1.3557, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.000192096048024012, |
| "grad_norm": 10.000328063964844, |
| "learning_rate": 5.760000000000001e-07, |
| "loss": 1.3769, |
| "step": 192 |
| }, |
| { |
| "completion_length": 230.45833587646484, |
| "epoch": 0.00019309654827413708, |
| "grad_norm": 9.96373462677002, |
| "learning_rate": 5.79e-07, |
| "loss": 2.7711, |
| "reward": 0.04001322854310274, |
| "reward_std": 0.07591889426112175, |
| "rewards/sudoku_reward_func": 0.04001322854310274, |
| "step": 193, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00019409704852426212, |
| "grad_norm": 8.532519340515137, |
| "learning_rate": 5.82e-07, |
| "loss": 4.3165, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.00019509754877438718, |
| "grad_norm": 8.89317798614502, |
| "learning_rate": 5.85e-07, |
| "loss": 3.8256, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.00019609804902451225, |
| "grad_norm": 9.849699020385742, |
| "learning_rate": 5.88e-07, |
| "loss": 3.3788, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.00019709854927463732, |
| "grad_norm": 9.646321296691895, |
| "learning_rate": 5.91e-07, |
| "loss": 2.692, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.00019809904952476239, |
| "grad_norm": 11.351709365844727, |
| "learning_rate": 5.94e-07, |
| "loss": 4.3232, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.00019909954977488745, |
| "grad_norm": 8.894522666931152, |
| "learning_rate": 5.970000000000001e-07, |
| "loss": 3.8403, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.00020010005002501252, |
| "grad_norm": 9.612089157104492, |
| "learning_rate": 6.000000000000001e-07, |
| "loss": 3.3513, |
| "step": 200 |
| }, |
| { |
| "completion_length": 238.43750762939453, |
| "epoch": 0.00020110055027513756, |
| "grad_norm": 7.658564567565918, |
| "learning_rate": 6.030000000000001e-07, |
| "loss": 1.9131, |
| "reward": 0.02852182649075985, |
| "reward_std": 0.040328510105609894, |
| "rewards/sudoku_reward_func": 0.028521825559437275, |
| "step": 201, |
| "zero_std_ratio": 0.375 |
| }, |
| { |
| "epoch": 0.00020210105052526263, |
| "grad_norm": 5.2299089431762695, |
| "learning_rate": 6.060000000000001e-07, |
| "loss": 1.2648, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.0002031015507753877, |
| "grad_norm": 10.499113082885742, |
| "learning_rate": 6.09e-07, |
| "loss": 2.2544, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.00020410205102551276, |
| "grad_norm": 6.83461332321167, |
| "learning_rate": 6.12e-07, |
| "loss": 1.888, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.00020510255127563783, |
| "grad_norm": 9.024131774902344, |
| "learning_rate": 6.149999999999999e-07, |
| "loss": 1.9116, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.0002061030515257629, |
| "grad_norm": 5.002796649932861, |
| "learning_rate": 6.18e-07, |
| "loss": 1.2636, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.00020710355177588793, |
| "grad_norm": 8.772700309753418, |
| "learning_rate": 6.21e-07, |
| "loss": 2.2221, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.000208104052026013, |
| "grad_norm": 5.337297439575195, |
| "learning_rate": 6.24e-07, |
| "loss": 1.9176, |
| "step": 208 |
| }, |
| { |
| "completion_length": 234.2916717529297, |
| "epoch": 0.00020910455227613807, |
| "grad_norm": 9.960871696472168, |
| "learning_rate": 6.27e-07, |
| "loss": 4.9261, |
| "reward": 0.0486111119389534, |
| "reward_std": 0.07930124551057816, |
| "rewards/sudoku_reward_func": 0.0486111119389534, |
| "step": 209, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00021010505252626313, |
| "grad_norm": 9.485045433044434, |
| "learning_rate": 6.3e-07, |
| "loss": 4.2217, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.0002111055527763882, |
| "grad_norm": 11.977108001708984, |
| "learning_rate": 6.33e-07, |
| "loss": 4.4983, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.00021210605302651327, |
| "grad_norm": 12.709733009338379, |
| "learning_rate": 6.36e-07, |
| "loss": 4.5912, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.0002131065532766383, |
| "grad_norm": 9.682394027709961, |
| "learning_rate": 6.39e-07, |
| "loss": 4.9071, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.00021410705352676337, |
| "grad_norm": 12.194422721862793, |
| "learning_rate": 6.42e-07, |
| "loss": 4.1101, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.00021510755377688844, |
| "grad_norm": 11.770171165466309, |
| "learning_rate": 6.45e-07, |
| "loss": 4.435, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.0002161080540270135, |
| "grad_norm": 14.138443946838379, |
| "learning_rate": 6.48e-07, |
| "loss": 4.6484, |
| "step": 216 |
| }, |
| { |
| "completion_length": 243.8541717529297, |
| "epoch": 0.00021710855427713857, |
| "grad_norm": 10.657322883605957, |
| "learning_rate": 6.51e-07, |
| "loss": 0.3958, |
| "reward": 0.06342066638171673, |
| "reward_std": 0.09584061056375504, |
| "rewards/sudoku_reward_func": 0.06342066638171673, |
| "step": 217, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00021810905452726364, |
| "grad_norm": 11.380950927734375, |
| "learning_rate": 6.54e-07, |
| "loss": 2.1351, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.0002191095547773887, |
| "grad_norm": 10.573854446411133, |
| "learning_rate": 6.57e-07, |
| "loss": 0.7276, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.00022011005502751375, |
| "grad_norm": 13.61559772491455, |
| "learning_rate": 6.6e-07, |
| "loss": 1.6265, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.0002211105552776388, |
| "grad_norm": 9.072285652160645, |
| "learning_rate": 6.63e-07, |
| "loss": 0.4138, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.00022211105552776388, |
| "grad_norm": 10.759437561035156, |
| "learning_rate": 6.660000000000001e-07, |
| "loss": 2.135, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.00022311155577788895, |
| "grad_norm": 10.8936128616333, |
| "learning_rate": 6.690000000000001e-07, |
| "loss": 0.6748, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.00022411205602801401, |
| "grad_norm": 16.358993530273438, |
| "learning_rate": 6.72e-07, |
| "loss": 1.6985, |
| "step": 224 |
| }, |
| { |
| "completion_length": 220.50000762939453, |
| "epoch": 0.00022511255627813908, |
| "grad_norm": 7.5341715812683105, |
| "learning_rate": 6.75e-07, |
| "loss": -1.8022, |
| "reward": 0.03451554290950298, |
| "reward_std": 0.07399624213576317, |
| "rewards/sudoku_reward_func": 0.03451554290950298, |
| "step": 225, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00022611305652826412, |
| "grad_norm": 17.420621871948242, |
| "learning_rate": 6.78e-07, |
| "loss": -0.1952, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.0002271135567783892, |
| "grad_norm": 8.836989402770996, |
| "learning_rate": 6.81e-07, |
| "loss": -0.4381, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.00022811405702851425, |
| "grad_norm": 7.346076488494873, |
| "learning_rate": 6.84e-07, |
| "loss": -1.746, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.00022911455727863932, |
| "grad_norm": 7.384093761444092, |
| "learning_rate": 6.87e-07, |
| "loss": -1.845, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.0002301150575287644, |
| "grad_norm": 17.348112106323242, |
| "learning_rate": 6.900000000000001e-07, |
| "loss": -0.2476, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.00023111555777888945, |
| "grad_norm": 8.641575813293457, |
| "learning_rate": 6.930000000000001e-07, |
| "loss": -0.4975, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0002321160580290145, |
| "grad_norm": 8.628252983093262, |
| "learning_rate": 6.960000000000001e-07, |
| "loss": -1.8189, |
| "step": 232 |
| }, |
| { |
| "completion_length": 235.43750762939453, |
| "epoch": 0.00023311655827913956, |
| "grad_norm": 4.170182228088379, |
| "learning_rate": 6.990000000000001e-07, |
| "loss": 1.5497, |
| "reward": 0.01355820195749402, |
| "reward_std": 0.02904263837262988, |
| "rewards/sudoku_reward_func": 0.013558201724663377, |
| "step": 233, |
| "zero_std_ratio": 0.5 |
| }, |
| { |
| "epoch": 0.00023411705852926463, |
| "grad_norm": 3.8872923851013184, |
| "learning_rate": 7.02e-07, |
| "loss": 1.4407, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.0002351175587793897, |
| "grad_norm": 3.796323537826538, |
| "learning_rate": 7.05e-07, |
| "loss": 1.1411, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.00023611805902951476, |
| "grad_norm": 5.579533100128174, |
| "learning_rate": 7.079999999999999e-07, |
| "loss": 1.2369, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.00023711855927963983, |
| "grad_norm": 3.9385132789611816, |
| "learning_rate": 7.11e-07, |
| "loss": 1.5498, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.0002381190595297649, |
| "grad_norm": 4.1062164306640625, |
| "learning_rate": 7.14e-07, |
| "loss": 1.4336, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.00023911955977988993, |
| "grad_norm": 4.245860576629639, |
| "learning_rate": 7.17e-07, |
| "loss": 1.1133, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.000240120060030015, |
| "grad_norm": 6.223697662353516, |
| "learning_rate": 7.2e-07, |
| "loss": 1.2535, |
| "step": 240 |
| }, |
| { |
| "completion_length": 232.6666717529297, |
| "epoch": 0.00024112056028014007, |
| "grad_norm": 28.815874099731445, |
| "learning_rate": 7.23e-07, |
| "loss": 14.1627, |
| "reward": 0.11326058581471443, |
| "reward_std": 0.11245110630989075, |
| "rewards/sudoku_reward_func": 0.11326058581471443, |
| "step": 241, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00024212106053026513, |
| "grad_norm": 14.755169868469238, |
| "learning_rate": 7.26e-07, |
| "loss": 12.0119, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0002431215607803902, |
| "grad_norm": 19.791038513183594, |
| "learning_rate": 7.29e-07, |
| "loss": 13.7708, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.00024412206103051527, |
| "grad_norm": 13.325358390808105, |
| "learning_rate": 7.32e-07, |
| "loss": 12.5308, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.0002451225612806403, |
| "grad_norm": 35.826072692871094, |
| "learning_rate": 7.350000000000001e-07, |
| "loss": 14.0292, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.0002461230615307654, |
| "grad_norm": 12.46027946472168, |
| "learning_rate": 7.38e-07, |
| "loss": 11.9516, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.00024712356178089044, |
| "grad_norm": 15.507015228271484, |
| "learning_rate": 7.41e-07, |
| "loss": 13.7996, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.0002481240620310155, |
| "grad_norm": 13.646905899047852, |
| "learning_rate": 7.44e-07, |
| "loss": 12.5209, |
| "step": 248 |
| }, |
| { |
| "completion_length": 227.7916717529297, |
| "epoch": 0.0002491245622811406, |
| "grad_norm": 10.473774909973145, |
| "learning_rate": 7.47e-07, |
| "loss": 6.6821, |
| "reward": 0.06304113194346428, |
| "reward_std": 0.10152465477585793, |
| "rewards/sudoku_reward_func": 0.06304113194346428, |
| "step": 249, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0002501250625312656, |
| "grad_norm": 11.4276704788208, |
| "learning_rate": 7.5e-07, |
| "loss": 6.1763, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0002511255627813907, |
| "grad_norm": 11.7334566116333, |
| "learning_rate": 7.53e-07, |
| "loss": 5.9234, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.00025212606303151575, |
| "grad_norm": 11.439474105834961, |
| "learning_rate": 7.56e-07, |
| "loss": 7.2595, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.00025312656328164084, |
| "grad_norm": 11.427284240722656, |
| "learning_rate": 7.590000000000001e-07, |
| "loss": 6.6806, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0002541270635317659, |
| "grad_norm": 11.73374080657959, |
| "learning_rate": 7.620000000000001e-07, |
| "loss": 6.118, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.0002551275637818909, |
| "grad_norm": 11.451107025146484, |
| "learning_rate": 7.65e-07, |
| "loss": 5.8955, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.000256128064032016, |
| "grad_norm": 11.921669960021973, |
| "learning_rate": 7.68e-07, |
| "loss": 7.1842, |
| "step": 256 |
| }, |
| { |
| "completion_length": 237.06250762939453, |
| "epoch": 0.00025712856428214106, |
| "grad_norm": 6.466955184936523, |
| "learning_rate": 7.71e-07, |
| "loss": 2.0766, |
| "reward": 0.027695106342434883, |
| "reward_std": 0.06247997470200062, |
| "rewards/sudoku_reward_func": 0.02769510541111231, |
| "step": 257, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00025812906453226615, |
| "grad_norm": 7.86901330947876, |
| "learning_rate": 7.74e-07, |
| "loss": 2.768, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.0002591295647823912, |
| "grad_norm": 7.55520486831665, |
| "learning_rate": 7.77e-07, |
| "loss": 2.1036, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.0002601300650325163, |
| "grad_norm": 7.022609233856201, |
| "learning_rate": 7.8e-07, |
| "loss": 1.9049, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.0002611305652826413, |
| "grad_norm": 7.077910423278809, |
| "learning_rate": 7.830000000000001e-07, |
| "loss": 2.0455, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.00026213106553276636, |
| "grad_norm": 7.5524492263793945, |
| "learning_rate": 7.860000000000001e-07, |
| "loss": 2.7672, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.00026313156578289146, |
| "grad_norm": 7.9139251708984375, |
| "learning_rate": 7.890000000000001e-07, |
| "loss": 2.1218, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0002641320660330165, |
| "grad_norm": 7.026261329650879, |
| "learning_rate": 7.920000000000001e-07, |
| "loss": 1.9044, |
| "step": 264 |
| }, |
| { |
| "completion_length": 235.9166717529297, |
| "epoch": 0.0002651325662831416, |
| "grad_norm": 10.94743824005127, |
| "learning_rate": 7.95e-07, |
| "loss": 3.926, |
| "reward": 0.04860359709709883, |
| "reward_std": 0.07168097421526909, |
| "rewards/sudoku_reward_func": 0.04860359709709883, |
| "step": 265, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00026613306653326663, |
| "grad_norm": 12.212309837341309, |
| "learning_rate": 7.98e-07, |
| "loss": 3.6554, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.00026713356678339167, |
| "grad_norm": 11.789071083068848, |
| "learning_rate": 8.01e-07, |
| "loss": 5.5649, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.00026813406703351676, |
| "grad_norm": 10.68334674835205, |
| "learning_rate": 8.04e-07, |
| "loss": 4.3805, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.0002691345672836418, |
| "grad_norm": 11.618034362792969, |
| "learning_rate": 8.070000000000001e-07, |
| "loss": 3.9353, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.0002701350675337669, |
| "grad_norm": 12.148497581481934, |
| "learning_rate": 8.100000000000001e-07, |
| "loss": 3.5802, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.00027113556778389194, |
| "grad_norm": 11.314140319824219, |
| "learning_rate": 8.130000000000001e-07, |
| "loss": 5.4743, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.00027213606803401703, |
| "grad_norm": 11.643878936767578, |
| "learning_rate": 8.160000000000001e-07, |
| "loss": 4.3817, |
| "step": 272 |
| }, |
| { |
| "completion_length": 235.70833587646484, |
| "epoch": 0.00027313656828414207, |
| "grad_norm": 8.467658042907715, |
| "learning_rate": 8.190000000000001e-07, |
| "loss": 10.9059, |
| "reward": 0.042369380593299866, |
| "reward_std": 0.08460133895277977, |
| "rewards/sudoku_reward_func": 0.042369380593299866, |
| "step": 273, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0002741370685342671, |
| "grad_norm": 11.485238075256348, |
| "learning_rate": 8.220000000000001e-07, |
| "loss": 11.3646, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.0002751375687843922, |
| "grad_norm": 8.469305992126465, |
| "learning_rate": 8.25e-07, |
| "loss": 10.9724, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.00027613806903451724, |
| "grad_norm": 9.359997749328613, |
| "learning_rate": 8.280000000000001e-07, |
| "loss": 10.7772, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.00027713856928464234, |
| "grad_norm": 8.635589599609375, |
| "learning_rate": 8.310000000000001e-07, |
| "loss": 10.9291, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.0002781390695347674, |
| "grad_norm": 11.921236991882324, |
| "learning_rate": 8.340000000000001e-07, |
| "loss": 11.3009, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.00027913956978489247, |
| "grad_norm": 8.240606307983398, |
| "learning_rate": 8.370000000000001e-07, |
| "loss": 10.9624, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.0002801400700350175, |
| "grad_norm": 9.36181354522705, |
| "learning_rate": 8.400000000000001e-07, |
| "loss": 10.7465, |
| "step": 280 |
| }, |
| { |
| "completion_length": 235.4791717529297, |
| "epoch": 0.00028114057028514255, |
| "grad_norm": 9.021184921264648, |
| "learning_rate": 8.430000000000001e-07, |
| "loss": 4.9804, |
| "reward": 0.03388047218322754, |
| "reward_std": 0.07418958842754364, |
| "rewards/sudoku_reward_func": 0.03388047032058239, |
| "step": 281, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.00028214107053526764, |
| "grad_norm": 9.126776695251465, |
| "learning_rate": 8.459999999999999e-07, |
| "loss": 4.9054, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.0002831415707853927, |
| "grad_norm": 9.088274955749512, |
| "learning_rate": 8.489999999999999e-07, |
| "loss": 5.4592, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.0002841420710355178, |
| "grad_norm": 12.914833068847656, |
| "learning_rate": 8.52e-07, |
| "loss": 4.9845, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.0002851425712856428, |
| "grad_norm": 9.606204986572266, |
| "learning_rate": 8.55e-07, |
| "loss": 4.9761, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.00028614307153576786, |
| "grad_norm": 9.16411018371582, |
| "learning_rate": 8.58e-07, |
| "loss": 4.846, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.00028714357178589295, |
| "grad_norm": 9.097843170166016, |
| "learning_rate": 8.61e-07, |
| "loss": 5.3997, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.000288144072036018, |
| "grad_norm": 9.768306732177734, |
| "learning_rate": 8.64e-07, |
| "loss": 4.9431, |
| "step": 288 |
| }, |
| { |
| "completion_length": 234.77084350585938, |
| "epoch": 0.0002891445722861431, |
| "grad_norm": 11.16508960723877, |
| "learning_rate": 8.669999999999999e-07, |
| "loss": -4.0727, |
| "reward": 0.049933863803744316, |
| "reward_std": 0.09329613298177719, |
| "rewards/sudoku_reward_func": 0.049933863803744316, |
| "step": 289, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0002901450725362681, |
| "grad_norm": 12.644773483276367, |
| "learning_rate": 8.699999999999999e-07, |
| "loss": -3.3604, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.0002911455727863932, |
| "grad_norm": 13.333436965942383, |
| "learning_rate": 8.729999999999999e-07, |
| "loss": -3.7767, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.00029214607303651826, |
| "grad_norm": 12.671016693115234, |
| "learning_rate": 8.76e-07, |
| "loss": -2.786, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.0002931465732866433, |
| "grad_norm": 12.357577323913574, |
| "learning_rate": 8.79e-07, |
| "loss": -4.1053, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.0002941470735367684, |
| "grad_norm": 14.383247375488281, |
| "learning_rate": 8.82e-07, |
| "loss": -3.3782, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.00029514757378689343, |
| "grad_norm": 13.393773078918457, |
| "learning_rate": 8.85e-07, |
| "loss": -3.8462, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.0002961480740370185, |
| "grad_norm": 12.599241256713867, |
| "learning_rate": 8.88e-07, |
| "loss": -2.8205, |
| "step": 296 |
| }, |
| { |
| "completion_length": 232.89584350585938, |
| "epoch": 0.00029714857428714356, |
| "grad_norm": 15.046712875366211, |
| "learning_rate": 8.91e-07, |
| "loss": 1.2729, |
| "reward": 0.047825731337070465, |
| "reward_std": 0.08659476786851883, |
| "rewards/sudoku_reward_func": 0.04782572761178017, |
| "step": 297, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00029814907453726866, |
| "grad_norm": 10.604710578918457, |
| "learning_rate": 8.939999999999999e-07, |
| "loss": -0.2316, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.0002991495747873937, |
| "grad_norm": 12.712968826293945, |
| "learning_rate": 8.969999999999999e-07, |
| "loss": -0.0159, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.00030015007503751874, |
| "grad_norm": 13.237211227416992, |
| "learning_rate": 9e-07, |
| "loss": 1.4319, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.00030115057528764383, |
| "grad_norm": 15.451652526855469, |
| "learning_rate": 9.03e-07, |
| "loss": 1.1957, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.00030215107553776887, |
| "grad_norm": 10.60921573638916, |
| "learning_rate": 9.06e-07, |
| "loss": -0.2241, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.00030315157578789397, |
| "grad_norm": 12.147521018981934, |
| "learning_rate": 9.09e-07, |
| "loss": -0.0156, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.000304152076038019, |
| "grad_norm": 13.540789604187012, |
| "learning_rate": 9.12e-07, |
| "loss": 1.3268, |
| "step": 304 |
| }, |
| { |
| "completion_length": 240.83333587646484, |
| "epoch": 0.0003051525762881441, |
| "grad_norm": 22.218473434448242, |
| "learning_rate": 9.15e-07, |
| "loss": 5.2482, |
| "reward": 0.0706845298409462, |
| "reward_std": 0.11625630408525467, |
| "rewards/sudoku_reward_func": 0.07068452797830105, |
| "step": 305, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00030615307653826914, |
| "grad_norm": 11.328912734985352, |
| "learning_rate": 9.18e-07, |
| "loss": 3.6993, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.0003071535767883942, |
| "grad_norm": 12.521968841552734, |
| "learning_rate": 9.210000000000001e-07, |
| "loss": 2.4513, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.00030815407703851927, |
| "grad_norm": 13.72804069519043, |
| "learning_rate": 9.24e-07, |
| "loss": 3.8968, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.0003091545772886443, |
| "grad_norm": 18.00409698486328, |
| "learning_rate": 9.27e-07, |
| "loss": 5.2393, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.0003101550775387694, |
| "grad_norm": 11.605079650878906, |
| "learning_rate": 9.3e-07, |
| "loss": 3.6261, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.00031115557778889445, |
| "grad_norm": 11.738133430480957, |
| "learning_rate": 9.33e-07, |
| "loss": 2.4419, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.0003121560780390195, |
| "grad_norm": 14.07581901550293, |
| "learning_rate": 9.36e-07, |
| "loss": 3.8252, |
| "step": 312 |
| }, |
| { |
| "completion_length": 243.81250762939453, |
| "epoch": 0.0003131565782891446, |
| "grad_norm": 8.576881408691406, |
| "learning_rate": 9.39e-07, |
| "loss": 1.2863, |
| "reward": 0.0376909002661705, |
| "reward_std": 0.06233246065676212, |
| "rewards/sudoku_reward_func": 0.03769089933484793, |
| "step": 313, |
| "zero_std_ratio": 0.25 |
| }, |
| { |
| "epoch": 0.0003141570785392696, |
| "grad_norm": 9.49803352355957, |
| "learning_rate": 9.42e-07, |
| "loss": 1.8182, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.0003151575787893947, |
| "grad_norm": 8.959271430969238, |
| "learning_rate": 9.450000000000001e-07, |
| "loss": 1.2478, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.00031615807903951975, |
| "grad_norm": 10.243912696838379, |
| "learning_rate": 9.480000000000001e-07, |
| "loss": 1.412, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.00031715857928964485, |
| "grad_norm": 12.459125518798828, |
| "learning_rate": 9.510000000000001e-07, |
| "loss": 1.2536, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.0003181590795397699, |
| "grad_norm": 9.807161331176758, |
| "learning_rate": 9.54e-07, |
| "loss": 1.7685, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.0003191595797898949, |
| "grad_norm": 9.090300559997559, |
| "learning_rate": 9.570000000000001e-07, |
| "loss": 1.1907, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.00032016008004002, |
| "grad_norm": 11.044015884399414, |
| "learning_rate": 9.600000000000001e-07, |
| "loss": 1.3517, |
| "step": 320 |
| }, |
| { |
| "completion_length": 236.4166717529297, |
| "epoch": 0.00032116058029014506, |
| "grad_norm": 14.772443771362305, |
| "learning_rate": 9.630000000000001e-07, |
| "loss": 4.4002, |
| "reward": 0.08391203731298447, |
| "reward_std": 0.12630317360162735, |
| "rewards/sudoku_reward_func": 0.08391203731298447, |
| "step": 321, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00032216108054027015, |
| "grad_norm": 12.570160865783691, |
| "learning_rate": 9.660000000000002e-07, |
| "loss": 3.2618, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.0003231615807903952, |
| "grad_norm": 18.8726749420166, |
| "learning_rate": 9.69e-07, |
| "loss": 1.9231, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.0003241620810405203, |
| "grad_norm": 15.474459648132324, |
| "learning_rate": 9.72e-07, |
| "loss": 3.6165, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.0003251625812906453, |
| "grad_norm": 13.179699897766113, |
| "learning_rate": 9.75e-07, |
| "loss": 4.3286, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.00032616308154077037, |
| "grad_norm": 12.312911033630371, |
| "learning_rate": 9.78e-07, |
| "loss": 3.1953, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.00032716358179089546, |
| "grad_norm": 23.187116622924805, |
| "learning_rate": 9.81e-07, |
| "loss": 1.7782, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.0003281640820410205, |
| "grad_norm": 14.457850456237793, |
| "learning_rate": 9.84e-07, |
| "loss": 3.5443, |
| "step": 328 |
| }, |
| { |
| "completion_length": 239.1041717529297, |
| "epoch": 0.0003291645822911456, |
| "grad_norm": 13.674739837646484, |
| "learning_rate": 9.87e-07, |
| "loss": 1.863, |
| "reward": 0.099082350730896, |
| "reward_std": 0.12339252233505249, |
| "rewards/sudoku_reward_func": 0.0990823395550251, |
| "step": 329, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00033016508254127063, |
| "grad_norm": 12.24724006652832, |
| "learning_rate": 9.9e-07, |
| "loss": 2.4264, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0003311655827913957, |
| "grad_norm": 17.608428955078125, |
| "learning_rate": 9.93e-07, |
| "loss": 3.1095, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.00033216608304152077, |
| "grad_norm": 15.516730308532715, |
| "learning_rate": 9.96e-07, |
| "loss": 3.0734, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.0003331665832916458, |
| "grad_norm": 12.981306076049805, |
| "learning_rate": 9.99e-07, |
| "loss": 1.7985, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.0003341670835417709, |
| "grad_norm": 12.763707160949707, |
| "learning_rate": 1.002e-06, |
| "loss": 2.3478, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.00033516758379189594, |
| "grad_norm": 16.93824577331543, |
| "learning_rate": 1.0050000000000001e-06, |
| "loss": 3.0187, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.00033616808404202103, |
| "grad_norm": 16.1212215423584, |
| "learning_rate": 1.0080000000000001e-06, |
| "loss": 2.9999, |
| "step": 336 |
| }, |
| { |
| "completion_length": 239.37500762939453, |
| "epoch": 0.0003371685842921461, |
| "grad_norm": 8.778818130493164, |
| "learning_rate": 1.0110000000000001e-06, |
| "loss": 4.0337, |
| "reward": 0.05348875932395458, |
| "reward_std": 0.07841756939888, |
| "rewards/sudoku_reward_func": 0.05348875932395458, |
| "step": 337, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0003381690845422711, |
| "grad_norm": 8.754796028137207, |
| "learning_rate": 1.0140000000000002e-06, |
| "loss": 3.7786, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.0003391695847923962, |
| "grad_norm": 9.616628646850586, |
| "learning_rate": 1.0170000000000002e-06, |
| "loss": 3.6712, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.00034017008504252125, |
| "grad_norm": 8.855981826782227, |
| "learning_rate": 1.0200000000000002e-06, |
| "loss": 4.6132, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.00034117058529264634, |
| "grad_norm": 8.571745872497559, |
| "learning_rate": 1.0230000000000002e-06, |
| "loss": 3.9668, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.0003421710855427714, |
| "grad_norm": 9.43879222869873, |
| "learning_rate": 1.026e-06, |
| "loss": 3.7596, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.0003431715857928965, |
| "grad_norm": 9.735145568847656, |
| "learning_rate": 1.029e-06, |
| "loss": 3.6587, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.0003441720860430215, |
| "grad_norm": 9.055402755737305, |
| "learning_rate": 1.032e-06, |
| "loss": 4.514, |
| "step": 344 |
| }, |
| { |
| "completion_length": 240.4166717529297, |
| "epoch": 0.00034517258629314655, |
| "grad_norm": 15.025571823120117, |
| "learning_rate": 1.035e-06, |
| "loss": -2.9335, |
| "reward": 0.06999308802187443, |
| "reward_std": 0.10092847421765327, |
| "rewards/sudoku_reward_func": 0.06999308802187443, |
| "step": 345, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.00034617308654327165, |
| "grad_norm": 14.629435539245605, |
| "learning_rate": 1.0379999999999998e-06, |
| "loss": -2.6866, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.0003471735867933967, |
| "grad_norm": 15.487167358398438, |
| "learning_rate": 1.0409999999999999e-06, |
| "loss": -3.771, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.0003481740870435218, |
| "grad_norm": 15.948243141174316, |
| "learning_rate": 1.0439999999999999e-06, |
| "loss": -2.2402, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.0003491745872936468, |
| "grad_norm": 15.299474716186523, |
| "learning_rate": 1.0469999999999999e-06, |
| "loss": -3.0113, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.00035017508754377186, |
| "grad_norm": 14.349522590637207, |
| "learning_rate": 1.05e-06, |
| "loss": -2.88, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.00035117558779389695, |
| "grad_norm": 15.201149940490723, |
| "learning_rate": 1.053e-06, |
| "loss": -3.9468, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.000352176088044022, |
| "grad_norm": 16.916872024536133, |
| "learning_rate": 1.056e-06, |
| "loss": -2.4322, |
| "step": 352 |
| }, |
| { |
| "completion_length": 231.12500762939453, |
| "epoch": 0.0003531765882941471, |
| "grad_norm": 18.65467071533203, |
| "learning_rate": 1.059e-06, |
| "loss": 1.9933, |
| "reward": 0.0762648843228817, |
| "reward_std": 0.1246400736272335, |
| "rewards/sudoku_reward_func": 0.0762648805975914, |
| "step": 353, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00035417708854427213, |
| "grad_norm": 20.7829532623291, |
| "learning_rate": 1.062e-06, |
| "loss": 0.0083, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.0003551775887943972, |
| "grad_norm": 17.963436126708984, |
| "learning_rate": 1.065e-06, |
| "loss": 1.9007, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.00035617808904452226, |
| "grad_norm": 19.813440322875977, |
| "learning_rate": 1.068e-06, |
| "loss": 2.9544, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.0003571785892946473, |
| "grad_norm": 18.973485946655273, |
| "learning_rate": 1.071e-06, |
| "loss": 1.6812, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.0003581790895447724, |
| "grad_norm": 21.765804290771484, |
| "learning_rate": 1.074e-06, |
| "loss": -0.2427, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.00035917958979489743, |
| "grad_norm": 18.01424789428711, |
| "learning_rate": 1.077e-06, |
| "loss": 1.7125, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.00036018009004502253, |
| "grad_norm": 20.650293350219727, |
| "learning_rate": 1.08e-06, |
| "loss": 2.7495, |
| "step": 360 |
| }, |
| { |
| "completion_length": 232.1666717529297, |
| "epoch": 0.00036118059029514757, |
| "grad_norm": 17.942243576049805, |
| "learning_rate": 1.083e-06, |
| "loss": 5.3031, |
| "reward": 0.10309194773435593, |
| "reward_std": 0.14464747160673141, |
| "rewards/sudoku_reward_func": 0.10309194400906563, |
| "step": 361, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00036218109054527266, |
| "grad_norm": 18.253305435180664, |
| "learning_rate": 1.086e-06, |
| "loss": 5.155, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.0003631815907953977, |
| "grad_norm": 20.967313766479492, |
| "learning_rate": 1.089e-06, |
| "loss": 5.5673, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.00036418209104552274, |
| "grad_norm": 21.44413185119629, |
| "learning_rate": 1.092e-06, |
| "loss": 4.7692, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.00036518259129564784, |
| "grad_norm": 19.41208267211914, |
| "learning_rate": 1.095e-06, |
| "loss": 5.0225, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.0003661830915457729, |
| "grad_norm": 18.210580825805664, |
| "learning_rate": 1.098e-06, |
| "loss": 4.8703, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.00036718359179589797, |
| "grad_norm": 20.35464859008789, |
| "learning_rate": 1.101e-06, |
| "loss": 5.2985, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.000368184092046023, |
| "grad_norm": 24.97935676574707, |
| "learning_rate": 1.104e-06, |
| "loss": 4.5277, |
| "step": 368 |
| }, |
| { |
| "completion_length": 240.83333587646484, |
| "epoch": 0.00036918459229614805, |
| "grad_norm": 11.803683280944824, |
| "learning_rate": 1.107e-06, |
| "loss": -7.3946, |
| "reward": 0.0628720261156559, |
| "reward_std": 0.07477889209985733, |
| "rewards/sudoku_reward_func": 0.0628720223903656, |
| "step": 369, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00037018509254627314, |
| "grad_norm": 10.526047706604004, |
| "learning_rate": 1.11e-06, |
| "loss": -6.4134, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.0003711855927963982, |
| "grad_norm": 10.823116302490234, |
| "learning_rate": 1.113e-06, |
| "loss": -6.2425, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.0003721860930465233, |
| "grad_norm": 11.820639610290527, |
| "learning_rate": 1.116e-06, |
| "loss": -6.4356, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.0003731865932966483, |
| "grad_norm": 12.025969505310059, |
| "learning_rate": 1.119e-06, |
| "loss": -7.4929, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.0003741870935467734, |
| "grad_norm": 10.814313888549805, |
| "learning_rate": 1.122e-06, |
| "loss": -6.5665, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.00037518759379689845, |
| "grad_norm": 10.791754722595215, |
| "learning_rate": 1.125e-06, |
| "loss": -6.3787, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.0003761880940470235, |
| "grad_norm": 12.164316177368164, |
| "learning_rate": 1.128e-06, |
| "loss": -6.5785, |
| "step": 376 |
| }, |
| { |
| "completion_length": 235.81250762939453, |
| "epoch": 0.0003771885942971486, |
| "grad_norm": 20.148202896118164, |
| "learning_rate": 1.131e-06, |
| "loss": 7.9792, |
| "reward": 0.1282242238521576, |
| "reward_std": 0.1380881443619728, |
| "rewards/sudoku_reward_func": 0.1282242089509964, |
| "step": 377, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0003781890945472736, |
| "grad_norm": 20.059968948364258, |
| "learning_rate": 1.134e-06, |
| "loss": 7.7242, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.0003791895947973987, |
| "grad_norm": 19.917285919189453, |
| "learning_rate": 1.137e-06, |
| "loss": 6.6093, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.00038019009504752376, |
| "grad_norm": 18.866975784301758, |
| "learning_rate": 1.14e-06, |
| "loss": 5.9883, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.00038119059529764885, |
| "grad_norm": 20.893918991088867, |
| "learning_rate": 1.1430000000000001e-06, |
| "loss": 7.736, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.0003821910955477739, |
| "grad_norm": 20.95090103149414, |
| "learning_rate": 1.1460000000000001e-06, |
| "loss": 7.446, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.00038319159579789893, |
| "grad_norm": 18.311017990112305, |
| "learning_rate": 1.1490000000000001e-06, |
| "loss": 6.3593, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.000384192096048024, |
| "grad_norm": 18.9256591796875, |
| "learning_rate": 1.1520000000000002e-06, |
| "loss": 5.6758, |
| "step": 384 |
| }, |
| { |
| "completion_length": 242.7291717529297, |
| "epoch": 0.00038519259629814906, |
| "grad_norm": 15.097347259521484, |
| "learning_rate": 1.155e-06, |
| "loss": -5.6666, |
| "reward": 0.10884891077876091, |
| "reward_std": 0.12249365448951721, |
| "rewards/sudoku_reward_func": 0.10884890332818031, |
| "step": 385, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00038619309654827416, |
| "grad_norm": 14.708651542663574, |
| "learning_rate": 1.158e-06, |
| "loss": -4.3701, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.0003871935967983992, |
| "grad_norm": 15.039358139038086, |
| "learning_rate": 1.161e-06, |
| "loss": -3.7308, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.00038819409704852424, |
| "grad_norm": 15.061366081237793, |
| "learning_rate": 1.164e-06, |
| "loss": -4.6885, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.00038919459729864933, |
| "grad_norm": 16.103355407714844, |
| "learning_rate": 1.167e-06, |
| "loss": -5.7928, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.00039019509754877437, |
| "grad_norm": 14.791950225830078, |
| "learning_rate": 1.17e-06, |
| "loss": -4.5289, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.00039119559779889946, |
| "grad_norm": 14.480550765991211, |
| "learning_rate": 1.173e-06, |
| "loss": -3.8723, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.0003921960980490245, |
| "grad_norm": 15.546393394470215, |
| "learning_rate": 1.176e-06, |
| "loss": -4.8869, |
| "step": 392 |
| }, |
| { |
| "completion_length": 242.83333587646484, |
| "epoch": 0.0003931965982991496, |
| "grad_norm": 14.729490280151367, |
| "learning_rate": 1.179e-06, |
| "loss": -5.427, |
| "reward": 0.09122851490974426, |
| "reward_std": 0.12679633498191833, |
| "rewards/sudoku_reward_func": 0.09122850745916367, |
| "step": 393, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00039419709854927464, |
| "grad_norm": 13.33181095123291, |
| "learning_rate": 1.182e-06, |
| "loss": -5.2525, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.0003951975987993997, |
| "grad_norm": 14.865705490112305, |
| "learning_rate": 1.185e-06, |
| "loss": -5.7037, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.00039619809904952477, |
| "grad_norm": 12.856032371520996, |
| "learning_rate": 1.188e-06, |
| "loss": -6.5057, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.0003971985992996498, |
| "grad_norm": 16.378400802612305, |
| "learning_rate": 1.1910000000000001e-06, |
| "loss": -5.5324, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.0003981990995497749, |
| "grad_norm": 14.952813148498535, |
| "learning_rate": 1.1940000000000001e-06, |
| "loss": -5.4037, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.00039919959979989994, |
| "grad_norm": 13.535036087036133, |
| "learning_rate": 1.1970000000000001e-06, |
| "loss": -5.8358, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.00040020010005002504, |
| "grad_norm": 12.8466157913208, |
| "learning_rate": 1.2000000000000002e-06, |
| "loss": -6.6436, |
| "step": 400 |
| }, |
| { |
| "completion_length": 246.3541717529297, |
| "epoch": 0.0004012006003001501, |
| "grad_norm": 12.46061897277832, |
| "learning_rate": 1.2030000000000002e-06, |
| "loss": 0.0263, |
| "reward": 0.06971500627696514, |
| "reward_std": 0.10182120278477669, |
| "rewards/sudoku_reward_func": 0.06971500627696514, |
| "step": 401, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.0004022011005502751, |
| "grad_norm": 13.909893989562988, |
| "learning_rate": 1.2060000000000002e-06, |
| "loss": -0.1189, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.0004032016008004002, |
| "grad_norm": 14.540249824523926, |
| "learning_rate": 1.2090000000000002e-06, |
| "loss": -0.3745, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.00040420210105052525, |
| "grad_norm": 15.110182762145996, |
| "learning_rate": 1.2120000000000002e-06, |
| "loss": -0.7959, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.00040520260130065034, |
| "grad_norm": 12.969428062438965, |
| "learning_rate": 1.215e-06, |
| "loss": -0.0583, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.0004062031015507754, |
| "grad_norm": 15.375487327575684, |
| "learning_rate": 1.218e-06, |
| "loss": -0.2408, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.0004072036018009004, |
| "grad_norm": 14.822982788085938, |
| "learning_rate": 1.221e-06, |
| "loss": -0.4963, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.0004082041020510255, |
| "grad_norm": 16.54659080505371, |
| "learning_rate": 1.224e-06, |
| "loss": -0.8767, |
| "step": 408 |
| }, |
| { |
| "completion_length": 248.64583587646484, |
| "epoch": 0.00040920460230115056, |
| "grad_norm": 20.481712341308594, |
| "learning_rate": 1.2269999999999999e-06, |
| "loss": 4.346, |
| "reward": 0.11594367399811745, |
| "reward_std": 0.1314607784152031, |
| "rewards/sudoku_reward_func": 0.11594367027282715, |
| "step": 409, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00041020510255127565, |
| "grad_norm": 22.77071762084961, |
| "learning_rate": 1.2299999999999999e-06, |
| "loss": 4.7742, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.0004112056028014007, |
| "grad_norm": 17.92951774597168, |
| "learning_rate": 1.2329999999999999e-06, |
| "loss": 1.959, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.0004122061030515258, |
| "grad_norm": 18.547788619995117, |
| "learning_rate": 1.236e-06, |
| "loss": 1.3136, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.0004132066033016508, |
| "grad_norm": 18.12384605407715, |
| "learning_rate": 1.239e-06, |
| "loss": 4.2181, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.00041420710355177586, |
| "grad_norm": 21.05364418029785, |
| "learning_rate": 1.242e-06, |
| "loss": 4.8127, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.00041520760380190096, |
| "grad_norm": 19.076353073120117, |
| "learning_rate": 1.245e-06, |
| "loss": 2.0259, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.000416208104052026, |
| "grad_norm": 19.371305465698242, |
| "learning_rate": 1.248e-06, |
| "loss": 1.1727, |
| "step": 416 |
| }, |
| { |
| "completion_length": 238.6666717529297, |
| "epoch": 0.0004172086043021511, |
| "grad_norm": 24.886581420898438, |
| "learning_rate": 1.251e-06, |
| "loss": 2.8817, |
| "reward": 0.1366717889904976, |
| "reward_std": 0.17538512498140335, |
| "rewards/sudoku_reward_func": 0.136671781539917, |
| "step": 417, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00041820910455227613, |
| "grad_norm": 24.532011032104492, |
| "learning_rate": 1.254e-06, |
| "loss": 0.4373, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.0004192096048024012, |
| "grad_norm": 21.299440383911133, |
| "learning_rate": 1.257e-06, |
| "loss": 1.7123, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.00042021010505252627, |
| "grad_norm": 22.578105926513672, |
| "learning_rate": 1.26e-06, |
| "loss": -0.2846, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.0004212106053026513, |
| "grad_norm": 25.076955795288086, |
| "learning_rate": 1.263e-06, |
| "loss": 2.7225, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.0004222111055527764, |
| "grad_norm": 24.209144592285156, |
| "learning_rate": 1.266e-06, |
| "loss": 0.1744, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.00042321160580290144, |
| "grad_norm": 20.4233455657959, |
| "learning_rate": 1.269e-06, |
| "loss": 1.5553, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.00042421210605302653, |
| "grad_norm": 23.477943420410156, |
| "learning_rate": 1.272e-06, |
| "loss": -0.4778, |
| "step": 424 |
| }, |
| { |
| "completion_length": 239.25, |
| "epoch": 0.00042521260630315157, |
| "grad_norm": 22.97435188293457, |
| "learning_rate": 1.275e-06, |
| "loss": -10.5125, |
| "reward": 0.12210648879408836, |
| "reward_std": 0.14136488735675812, |
| "rewards/sudoku_reward_func": 0.12210648134350777, |
| "step": 425, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0004262131065532766, |
| "grad_norm": 23.24193572998047, |
| "learning_rate": 1.278e-06, |
| "loss": -10.8064, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.0004272136068034017, |
| "grad_norm": 23.464582443237305, |
| "learning_rate": 1.281e-06, |
| "loss": -10.3222, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.00042821410705352675, |
| "grad_norm": 26.24444007873535, |
| "learning_rate": 1.284e-06, |
| "loss": -10.2436, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.00042921460730365184, |
| "grad_norm": 23.67002296447754, |
| "learning_rate": 1.287e-06, |
| "loss": -10.6952, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.0004302151075537769, |
| "grad_norm": 23.789400100708008, |
| "learning_rate": 1.29e-06, |
| "loss": -11.0674, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.000431215607803902, |
| "grad_norm": 23.746618270874023, |
| "learning_rate": 1.293e-06, |
| "loss": -10.7141, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.000432216108054027, |
| "grad_norm": 26.598947525024414, |
| "learning_rate": 1.296e-06, |
| "loss": -10.6483, |
| "step": 432 |
| }, |
| { |
| "completion_length": 243.02083587646484, |
| "epoch": 0.00043321660830415205, |
| "grad_norm": 21.561887741088867, |
| "learning_rate": 1.299e-06, |
| "loss": -4.2898, |
| "reward": 0.16087963432073593, |
| "reward_std": 0.14164353907108307, |
| "rewards/sudoku_reward_func": 0.16087962687015533, |
| "step": 433, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00043421710855427715, |
| "grad_norm": 19.212657928466797, |
| "learning_rate": 1.302e-06, |
| "loss": -3.3441, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.0004352176088044022, |
| "grad_norm": 22.98586654663086, |
| "learning_rate": 1.305e-06, |
| "loss": -3.923, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.0004362181090545273, |
| "grad_norm": 23.236225128173828, |
| "learning_rate": 1.308e-06, |
| "loss": -5.2046, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.0004372186093046523, |
| "grad_norm": 21.43988800048828, |
| "learning_rate": 1.311e-06, |
| "loss": -4.4946, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.0004382191095547774, |
| "grad_norm": 19.148176193237305, |
| "learning_rate": 1.314e-06, |
| "loss": -3.5995, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.00043921960980490245, |
| "grad_norm": 24.136995315551758, |
| "learning_rate": 1.317e-06, |
| "loss": -4.2286, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.0004402201100550275, |
| "grad_norm": 23.57278823852539, |
| "learning_rate": 1.32e-06, |
| "loss": -5.5188, |
| "step": 440 |
| }, |
| { |
| "completion_length": 233.2916717529297, |
| "epoch": 0.0004412206103051526, |
| "grad_norm": 14.671692848205566, |
| "learning_rate": 1.323e-06, |
| "loss": -4.0371, |
| "reward": 0.09998046606779099, |
| "reward_std": 0.10778994113206863, |
| "rewards/sudoku_reward_func": 0.09998045861721039, |
| "step": 441, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0004422211105552776, |
| "grad_norm": 14.384123802185059, |
| "learning_rate": 1.326e-06, |
| "loss": -3.9219, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.0004432216108054027, |
| "grad_norm": 17.219676971435547, |
| "learning_rate": 1.3290000000000001e-06, |
| "loss": -3.6826, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.00044422211105552776, |
| "grad_norm": 11.303711891174316, |
| "learning_rate": 1.3320000000000001e-06, |
| "loss": -3.5771, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.0004452226113056528, |
| "grad_norm": 11.632588386535645, |
| "learning_rate": 1.3350000000000001e-06, |
| "loss": -4.084, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.0004462231115557779, |
| "grad_norm": 13.563810348510742, |
| "learning_rate": 1.3380000000000001e-06, |
| "loss": -4.0316, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.00044722361180590293, |
| "grad_norm": 16.55260467529297, |
| "learning_rate": 1.3410000000000002e-06, |
| "loss": -3.7837, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.00044822411205602803, |
| "grad_norm": 11.298389434814453, |
| "learning_rate": 1.344e-06, |
| "loss": -3.5847, |
| "step": 448 |
| }, |
| { |
| "completion_length": 247.14583587646484, |
| "epoch": 0.00044922461230615307, |
| "grad_norm": 25.21985626220703, |
| "learning_rate": 1.347e-06, |
| "loss": 6.0425, |
| "reward": 0.1645434945821762, |
| "reward_std": 0.15197737514972687, |
| "rewards/sudoku_reward_func": 0.1645434945821762, |
| "step": 449, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00045022511255627816, |
| "grad_norm": 23.20073699951172, |
| "learning_rate": 1.35e-06, |
| "loss": 5.8398, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.0004512256128064032, |
| "grad_norm": 24.370487213134766, |
| "learning_rate": 1.353e-06, |
| "loss": 6.6032, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.00045222611305652824, |
| "grad_norm": 23.520915985107422, |
| "learning_rate": 1.356e-06, |
| "loss": 5.9308, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.00045322661330665333, |
| "grad_norm": 25.149738311767578, |
| "learning_rate": 1.359e-06, |
| "loss": 6.0518, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.0004542271135567784, |
| "grad_norm": 22.509672164916992, |
| "learning_rate": 1.362e-06, |
| "loss": 5.8463, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.00045522761380690347, |
| "grad_norm": 23.885211944580078, |
| "learning_rate": 1.365e-06, |
| "loss": 6.5497, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.0004562281140570285, |
| "grad_norm": 23.235668182373047, |
| "learning_rate": 1.368e-06, |
| "loss": 5.8002, |
| "step": 456 |
| }, |
| { |
| "completion_length": 233.52083587646484, |
| "epoch": 0.0004572286143071536, |
| "grad_norm": 13.065637588500977, |
| "learning_rate": 1.371e-06, |
| "loss": -4.969, |
| "reward": 0.11541005969047546, |
| "reward_std": 0.12353448569774628, |
| "rewards/sudoku_reward_func": 0.11541005223989487, |
| "step": 457, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00045822911455727864, |
| "grad_norm": 15.100817680358887, |
| "learning_rate": 1.374e-06, |
| "loss": -4.0103, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.0004592296148074037, |
| "grad_norm": 13.022795677185059, |
| "learning_rate": 1.3770000000000001e-06, |
| "loss": -4.4582, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.0004602301150575288, |
| "grad_norm": 15.367870330810547, |
| "learning_rate": 1.3800000000000001e-06, |
| "loss": -4.6818, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.0004612306153076538, |
| "grad_norm": 13.363811492919922, |
| "learning_rate": 1.3830000000000001e-06, |
| "loss": -5.0405, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.0004622311155577789, |
| "grad_norm": 22.934417724609375, |
| "learning_rate": 1.3860000000000002e-06, |
| "loss": -4.0986, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.00046323161580790395, |
| "grad_norm": 14.163771629333496, |
| "learning_rate": 1.3890000000000002e-06, |
| "loss": -4.5348, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.000464232116058029, |
| "grad_norm": 14.093031883239746, |
| "learning_rate": 1.3920000000000002e-06, |
| "loss": -4.6754, |
| "step": 464 |
| }, |
| { |
| "completion_length": 238.33333587646484, |
| "epoch": 0.0004652326163081541, |
| "grad_norm": 18.271038055419922, |
| "learning_rate": 1.3950000000000002e-06, |
| "loss": 3.2038, |
| "reward": 0.1049107164144516, |
| "reward_std": 0.11827318742871284, |
| "rewards/sudoku_reward_func": 0.1049107164144516, |
| "step": 465, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0004662331165582791, |
| "grad_norm": 20.824806213378906, |
| "learning_rate": 1.3980000000000002e-06, |
| "loss": 2.997, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.0004672336168084042, |
| "grad_norm": 16.93488883972168, |
| "learning_rate": 1.401e-06, |
| "loss": 2.7855, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.00046823411705852925, |
| "grad_norm": 17.172334671020508, |
| "learning_rate": 1.404e-06, |
| "loss": 2.7988, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.00046923461730865435, |
| "grad_norm": 18.538854598999023, |
| "learning_rate": 1.407e-06, |
| "loss": 3.0902, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.0004702351175587794, |
| "grad_norm": 20.743358612060547, |
| "learning_rate": 1.41e-06, |
| "loss": 2.799, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.00047123561780890443, |
| "grad_norm": 17.690128326416016, |
| "learning_rate": 1.4129999999999999e-06, |
| "loss": 2.6563, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.0004722361180590295, |
| "grad_norm": 17.46112632751465, |
| "learning_rate": 1.4159999999999999e-06, |
| "loss": 2.5592, |
| "step": 472 |
| }, |
| { |
| "completion_length": 234.6875, |
| "epoch": 0.00047323661830915456, |
| "grad_norm": 14.19119930267334, |
| "learning_rate": 1.4189999999999999e-06, |
| "loss": 5.9519, |
| "reward": 0.1398809626698494, |
| "reward_std": 0.12144653871655464, |
| "rewards/sudoku_reward_func": 0.1398809552192688, |
| "step": 473, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00047423711855927966, |
| "grad_norm": 15.279950141906738, |
| "learning_rate": 1.422e-06, |
| "loss": 6.337, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.0004752376188094047, |
| "grad_norm": 15.904949188232422, |
| "learning_rate": 1.425e-06, |
| "loss": 6.6797, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.0004762381190595298, |
| "grad_norm": 15.839500427246094, |
| "learning_rate": 1.428e-06, |
| "loss": 4.1356, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.00047723861930965483, |
| "grad_norm": 14.34893798828125, |
| "learning_rate": 1.431e-06, |
| "loss": 5.7966, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.00047823911955977987, |
| "grad_norm": 15.247032165527344, |
| "learning_rate": 1.434e-06, |
| "loss": 6.1297, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.00047923961980990496, |
| "grad_norm": 17.156147003173828, |
| "learning_rate": 1.437e-06, |
| "loss": 6.4489, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.00048024012006003, |
| "grad_norm": 14.10362720489502, |
| "learning_rate": 1.44e-06, |
| "loss": 4.0195, |
| "step": 480 |
| }, |
| { |
| "completion_length": 244.4791717529297, |
| "epoch": 0.0004812406203101551, |
| "grad_norm": 12.527475357055664, |
| "learning_rate": 1.443e-06, |
| "loss": 3.0738, |
| "reward": 0.11999834701418877, |
| "reward_std": 0.13045284524559975, |
| "rewards/sudoku_reward_func": 0.11999834701418877, |
| "step": 481, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00048224112056028014, |
| "grad_norm": 13.442937850952148, |
| "learning_rate": 1.446e-06, |
| "loss": 2.4034, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.00048324162081040523, |
| "grad_norm": 16.119911193847656, |
| "learning_rate": 1.449e-06, |
| "loss": 4.0128, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.00048424212106053027, |
| "grad_norm": 17.460521697998047, |
| "learning_rate": 1.452e-06, |
| "loss": 3.8593, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.0004852426213106553, |
| "grad_norm": 13.456750869750977, |
| "learning_rate": 1.455e-06, |
| "loss": 2.987, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.0004862431215607804, |
| "grad_norm": 13.413694381713867, |
| "learning_rate": 1.458e-06, |
| "loss": 2.3376, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.00048724362181090544, |
| "grad_norm": 17.877710342407227, |
| "learning_rate": 1.461e-06, |
| "loss": 3.8962, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.00048824412206103054, |
| "grad_norm": 18.55176544189453, |
| "learning_rate": 1.464e-06, |
| "loss": 3.7211, |
| "step": 488 |
| }, |
| { |
| "completion_length": 226.89583587646484, |
| "epoch": 0.0004892446223111556, |
| "grad_norm": 15.29721736907959, |
| "learning_rate": 1.467e-06, |
| "loss": 1.0551, |
| "reward": 0.11888227611780167, |
| "reward_std": 0.14588766545057297, |
| "rewards/sudoku_reward_func": 0.11888227611780167, |
| "step": 489, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0004902451225612806, |
| "grad_norm": 14.319069862365723, |
| "learning_rate": 1.4700000000000001e-06, |
| "loss": 0.2161, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.0004912456228114057, |
| "grad_norm": 16.261802673339844, |
| "learning_rate": 1.473e-06, |
| "loss": -1.4613, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.0004922461230615308, |
| "grad_norm": 19.536277770996094, |
| "learning_rate": 1.476e-06, |
| "loss": -1.9982, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.0004932466233116558, |
| "grad_norm": 14.717618942260742, |
| "learning_rate": 1.479e-06, |
| "loss": 0.9957, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.0004942471235617809, |
| "grad_norm": 14.713449478149414, |
| "learning_rate": 1.482e-06, |
| "loss": 0.1798, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.000495247623811906, |
| "grad_norm": 15.294724464416504, |
| "learning_rate": 1.485e-06, |
| "loss": -1.6165, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.000496248124062031, |
| "grad_norm": 18.315418243408203, |
| "learning_rate": 1.488e-06, |
| "loss": -2.0018, |
| "step": 496 |
| }, |
| { |
| "completion_length": 246.4166717529297, |
| "epoch": 0.0004972486243121561, |
| "grad_norm": 19.199636459350586, |
| "learning_rate": 1.491e-06, |
| "loss": 0.9573, |
| "reward": 0.17453178763389587, |
| "reward_std": 0.13891105726361275, |
| "rewards/sudoku_reward_func": 0.17453177273273468, |
| "step": 497, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0004982491245622812, |
| "grad_norm": 33.779850006103516, |
| "learning_rate": 1.494e-06, |
| "loss": 2.7422, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.0004992496248124062, |
| "grad_norm": 19.790515899658203, |
| "learning_rate": 1.497e-06, |
| "loss": 0.8765, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.0005002501250625312, |
| "grad_norm": 36.270423889160156, |
| "learning_rate": 1.5e-06, |
| "loss": 1.9353, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.0005012506253126563, |
| "grad_norm": 18.44716453552246, |
| "learning_rate": 1.503e-06, |
| "loss": 0.8864, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.0005022511255627814, |
| "grad_norm": 32.266937255859375, |
| "learning_rate": 1.506e-06, |
| "loss": 2.6481, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.0005032516258129064, |
| "grad_norm": 20.195646286010742, |
| "learning_rate": 1.509e-06, |
| "loss": 0.6608, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.0005042521260630315, |
| "grad_norm": 24.758575439453125, |
| "learning_rate": 1.512e-06, |
| "loss": 1.8413, |
| "step": 504 |
| }, |
| { |
| "completion_length": 235.83333587646484, |
| "epoch": 0.0005052526263131566, |
| "grad_norm": 17.032028198242188, |
| "learning_rate": 1.5150000000000001e-06, |
| "loss": -4.68, |
| "reward": 0.15310095250606537, |
| "reward_std": 0.15669506788253784, |
| "rewards/sudoku_reward_func": 0.15310094505548477, |
| "step": 505, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005062531265632817, |
| "grad_norm": 17.817955017089844, |
| "learning_rate": 1.5180000000000001e-06, |
| "loss": -5.7518, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.0005072536268134067, |
| "grad_norm": 20.450265884399414, |
| "learning_rate": 1.5210000000000001e-06, |
| "loss": -4.9771, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.0005082541270635318, |
| "grad_norm": 14.851226806640625, |
| "learning_rate": 1.5240000000000001e-06, |
| "loss": -4.6316, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.0005092546273136569, |
| "grad_norm": 17.262596130371094, |
| "learning_rate": 1.5270000000000002e-06, |
| "loss": -4.953, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.0005102551275637818, |
| "grad_norm": 17.57811737060547, |
| "learning_rate": 1.53e-06, |
| "loss": -5.7844, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.0005112556278139069, |
| "grad_norm": 19.80550765991211, |
| "learning_rate": 1.533e-06, |
| "loss": -5.0486, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.000512256128064032, |
| "grad_norm": 16.75063133239746, |
| "learning_rate": 1.536e-06, |
| "loss": -4.7158, |
| "step": 512 |
| }, |
| { |
| "completion_length": 235.68750762939453, |
| "epoch": 0.0005132566283141571, |
| "grad_norm": 15.94558048248291, |
| "learning_rate": 1.539e-06, |
| "loss": 0.6142, |
| "reward": 0.11516203731298447, |
| "reward_std": 0.11813021078705788, |
| "rewards/sudoku_reward_func": 0.11516203358769417, |
| "step": 513, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005142571285642821, |
| "grad_norm": 16.774120330810547, |
| "learning_rate": 1.542e-06, |
| "loss": 0.9026, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.0005152576288144072, |
| "grad_norm": 17.249300003051758, |
| "learning_rate": 1.545e-06, |
| "loss": 0.3232, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.0005162581290645323, |
| "grad_norm": 15.842235565185547, |
| "learning_rate": 1.548e-06, |
| "loss": 0.4665, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.0005172586293146573, |
| "grad_norm": 15.682981491088867, |
| "learning_rate": 1.551e-06, |
| "loss": 0.4711, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.0005182591295647824, |
| "grad_norm": 17.212305068969727, |
| "learning_rate": 1.554e-06, |
| "loss": 0.7602, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.0005192596298149075, |
| "grad_norm": 16.74599838256836, |
| "learning_rate": 1.557e-06, |
| "loss": 0.1957, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.0005202601300650326, |
| "grad_norm": 15.095890998840332, |
| "learning_rate": 1.56e-06, |
| "loss": 0.2926, |
| "step": 520 |
| }, |
| { |
| "completion_length": 236.02083587646484, |
| "epoch": 0.0005212606303151576, |
| "grad_norm": 18.716089248657227, |
| "learning_rate": 1.5630000000000001e-06, |
| "loss": 2.8831, |
| "reward": 0.13464632630348206, |
| "reward_std": 0.13019248098134995, |
| "rewards/sudoku_reward_func": 0.13464632630348206, |
| "step": 521, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005222611305652826, |
| "grad_norm": 18.289785385131836, |
| "learning_rate": 1.5660000000000001e-06, |
| "loss": 3.3606, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.0005232616308154077, |
| "grad_norm": 17.296327590942383, |
| "learning_rate": 1.5690000000000001e-06, |
| "loss": 2.1803, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.0005242621310655327, |
| "grad_norm": 16.455181121826172, |
| "learning_rate": 1.5720000000000002e-06, |
| "loss": 3.8618, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.0005252626313156578, |
| "grad_norm": 17.295499801635742, |
| "learning_rate": 1.5750000000000002e-06, |
| "loss": 2.8034, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.0005262631315657829, |
| "grad_norm": 17.529541015625, |
| "learning_rate": 1.5780000000000002e-06, |
| "loss": 3.2605, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.000527263631815908, |
| "grad_norm": 16.862716674804688, |
| "learning_rate": 1.5810000000000002e-06, |
| "loss": 2.0709, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.000528264132066033, |
| "grad_norm": 16.12912368774414, |
| "learning_rate": 1.5840000000000002e-06, |
| "loss": 3.6643, |
| "step": 528 |
| }, |
| { |
| "completion_length": 236.33334350585938, |
| "epoch": 0.0005292646323161581, |
| "grad_norm": 16.581037521362305, |
| "learning_rate": 1.5870000000000002e-06, |
| "loss": 6.703, |
| "reward": 0.12632275372743607, |
| "reward_std": 0.14028233289718628, |
| "rewards/sudoku_reward_func": 0.12632274255156517, |
| "step": 529, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005302651325662832, |
| "grad_norm": 19.885190963745117, |
| "learning_rate": 1.59e-06, |
| "loss": 8.2868, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.0005312656328164082, |
| "grad_norm": 17.113710403442383, |
| "learning_rate": 1.593e-06, |
| "loss": 6.2016, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.0005322661330665333, |
| "grad_norm": 16.769590377807617, |
| "learning_rate": 1.596e-06, |
| "loss": 6.119, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.0005332666333166584, |
| "grad_norm": 15.997132301330566, |
| "learning_rate": 1.599e-06, |
| "loss": 6.4742, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.0005342671335667833, |
| "grad_norm": 21.274337768554688, |
| "learning_rate": 1.602e-06, |
| "loss": 8.0824, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.0005352676338169084, |
| "grad_norm": 16.52000617980957, |
| "learning_rate": 1.605e-06, |
| "loss": 5.991, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.0005362681340670335, |
| "grad_norm": 16.728483200073242, |
| "learning_rate": 1.608e-06, |
| "loss": 5.9626, |
| "step": 536 |
| }, |
| { |
| "completion_length": 246.02083587646484, |
| "epoch": 0.0005372686343171586, |
| "grad_norm": 19.841209411621094, |
| "learning_rate": 1.6110000000000001e-06, |
| "loss": 4.3933, |
| "reward": 0.17916516959667206, |
| "reward_std": 0.16655328124761581, |
| "rewards/sudoku_reward_func": 0.17916516214609146, |
| "step": 537, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005382691345672836, |
| "grad_norm": 21.26980972290039, |
| "learning_rate": 1.6140000000000001e-06, |
| "loss": 3.4108, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.0005392696348174087, |
| "grad_norm": 18.89370346069336, |
| "learning_rate": 1.6170000000000001e-06, |
| "loss": 1.9086, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.0005402701350675338, |
| "grad_norm": 19.744796752929688, |
| "learning_rate": 1.6200000000000002e-06, |
| "loss": 1.4578, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.0005412706353176588, |
| "grad_norm": 21.075912475585938, |
| "learning_rate": 1.6230000000000002e-06, |
| "loss": 4.0819, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.0005422711355677839, |
| "grad_norm": 19.98610496520996, |
| "learning_rate": 1.6260000000000002e-06, |
| "loss": 3.0692, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.000543271635817909, |
| "grad_norm": 19.12093162536621, |
| "learning_rate": 1.6290000000000002e-06, |
| "loss": 1.5771, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.0005442721360680341, |
| "grad_norm": 20.679964065551758, |
| "learning_rate": 1.6320000000000002e-06, |
| "loss": 1.1791, |
| "step": 544 |
| }, |
| { |
| "completion_length": 228.52083587646484, |
| "epoch": 0.000545272636318159, |
| "grad_norm": 15.315961837768555, |
| "learning_rate": 1.6350000000000002e-06, |
| "loss": -6.7685, |
| "reward": 0.16216104477643967, |
| "reward_std": 0.12737327441573143, |
| "rewards/sudoku_reward_func": 0.16216104477643967, |
| "step": 545, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005462731365682841, |
| "grad_norm": 16.78080940246582, |
| "learning_rate": 1.6380000000000002e-06, |
| "loss": -6.3753, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.0005472736368184092, |
| "grad_norm": 14.62249755859375, |
| "learning_rate": 1.6410000000000003e-06, |
| "loss": -5.4868, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.0005482741370685342, |
| "grad_norm": 15.880613327026367, |
| "learning_rate": 1.6440000000000003e-06, |
| "loss": -5.5422, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.0005492746373186593, |
| "grad_norm": 15.091645240783691, |
| "learning_rate": 1.647e-06, |
| "loss": -6.8809, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.0005502751375687844, |
| "grad_norm": 16.886802673339844, |
| "learning_rate": 1.65e-06, |
| "loss": -6.6144, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.0005512756378189095, |
| "grad_norm": 15.56815242767334, |
| "learning_rate": 1.653e-06, |
| "loss": -5.6943, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.0005522761380690345, |
| "grad_norm": 15.692801475524902, |
| "learning_rate": 1.6560000000000001e-06, |
| "loss": -5.7414, |
| "step": 552 |
| }, |
| { |
| "completion_length": 225.77083587646484, |
| "epoch": 0.0005532766383191596, |
| "grad_norm": 14.722847938537598, |
| "learning_rate": 1.6590000000000001e-06, |
| "loss": -2.6607, |
| "reward": 0.16656897217035294, |
| "reward_std": 0.11782306060194969, |
| "rewards/sudoku_reward_func": 0.16656896471977234, |
| "step": 553, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005542771385692847, |
| "grad_norm": 18.4454402923584, |
| "learning_rate": 1.6620000000000001e-06, |
| "loss": -1.9324, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.0005552776388194097, |
| "grad_norm": 13.558207511901855, |
| "learning_rate": 1.6650000000000002e-06, |
| "loss": -2.5447, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.0005562781390695348, |
| "grad_norm": 17.83416175842285, |
| "learning_rate": 1.6680000000000002e-06, |
| "loss": -0.7552, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.0005572786393196598, |
| "grad_norm": 14.537623405456543, |
| "learning_rate": 1.6710000000000002e-06, |
| "loss": -2.8753, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.0005582791395697849, |
| "grad_norm": 18.245431900024414, |
| "learning_rate": 1.6740000000000002e-06, |
| "loss": -2.1203, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.0005592796398199099, |
| "grad_norm": 13.693886756896973, |
| "learning_rate": 1.6770000000000002e-06, |
| "loss": -2.7668, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.000560280140070035, |
| "grad_norm": 16.825647354125977, |
| "learning_rate": 1.6800000000000002e-06, |
| "loss": -0.9397, |
| "step": 560 |
| }, |
| { |
| "completion_length": 230.62500762939453, |
| "epoch": 0.0005612806403201601, |
| "grad_norm": 24.731273651123047, |
| "learning_rate": 1.6830000000000002e-06, |
| "loss": 0.4022, |
| "reward": 0.1626984179019928, |
| "reward_std": 0.14903082698583603, |
| "rewards/sudoku_reward_func": 0.1626984104514122, |
| "step": 561, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005622811405702851, |
| "grad_norm": 23.209733963012695, |
| "learning_rate": 1.6860000000000002e-06, |
| "loss": 0.5578, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.0005632816408204102, |
| "grad_norm": 21.375064849853516, |
| "learning_rate": 1.6889999999999998e-06, |
| "loss": 1.3835, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.0005642821410705353, |
| "grad_norm": 22.1075496673584, |
| "learning_rate": 1.6919999999999999e-06, |
| "loss": 1.5969, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.0005652826413206604, |
| "grad_norm": 25.393898010253906, |
| "learning_rate": 1.6949999999999999e-06, |
| "loss": 0.2459, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.0005662831415707854, |
| "grad_norm": 21.525785446166992, |
| "learning_rate": 1.6979999999999999e-06, |
| "loss": 0.4449, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.0005672836418209105, |
| "grad_norm": 20.55721664428711, |
| "learning_rate": 1.7009999999999999e-06, |
| "loss": 1.1952, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.0005682841420710356, |
| "grad_norm": 22.30531120300293, |
| "learning_rate": 1.704e-06, |
| "loss": 1.2676, |
| "step": 568 |
| }, |
| { |
| "completion_length": 235.375, |
| "epoch": 0.0005692846423211605, |
| "grad_norm": 22.03434181213379, |
| "learning_rate": 1.707e-06, |
| "loss": 4.3422, |
| "reward": 0.20998678356409073, |
| "reward_std": 0.18079549074172974, |
| "rewards/sudoku_reward_func": 0.20998677611351013, |
| "step": 569, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005702851425712856, |
| "grad_norm": 32.50233459472656, |
| "learning_rate": 1.71e-06, |
| "loss": 2.9145, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.0005712856428214107, |
| "grad_norm": 46.461246490478516, |
| "learning_rate": 1.713e-06, |
| "loss": 6.4925, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.0005722861430715357, |
| "grad_norm": 24.894067764282227, |
| "learning_rate": 1.716e-06, |
| "loss": 3.9048, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.0005732866433216608, |
| "grad_norm": 22.30925941467285, |
| "learning_rate": 1.719e-06, |
| "loss": 4.0346, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.0005742871435717859, |
| "grad_norm": 26.490528106689453, |
| "learning_rate": 1.722e-06, |
| "loss": 2.5862, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.000575287643821911, |
| "grad_norm": 50.72327423095703, |
| "learning_rate": 1.725e-06, |
| "loss": 6.3471, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.000576288144072036, |
| "grad_norm": 23.435848236083984, |
| "learning_rate": 1.728e-06, |
| "loss": 3.6186, |
| "step": 576 |
| }, |
| { |
| "completion_length": 230.9791717529297, |
| "epoch": 0.0005772886443221611, |
| "grad_norm": 60.76210021972656, |
| "learning_rate": 1.7309999999999998e-06, |
| "loss": 9.3164, |
| "reward": 0.16644121706485748, |
| "reward_std": 0.1695634052157402, |
| "rewards/sudoku_reward_func": 0.16644120961427689, |
| "step": 577, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005782891445722862, |
| "grad_norm": 23.458660125732422, |
| "learning_rate": 1.7339999999999998e-06, |
| "loss": 10.5865, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.0005792896448224112, |
| "grad_norm": 22.109237670898438, |
| "learning_rate": 1.7369999999999998e-06, |
| "loss": 9.5307, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.0005802901450725362, |
| "grad_norm": 20.859468460083008, |
| "learning_rate": 1.7399999999999999e-06, |
| "loss": 11.4446, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.0005812906453226613, |
| "grad_norm": 19.533044815063477, |
| "learning_rate": 1.7429999999999999e-06, |
| "loss": 9.1447, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.0005822911455727864, |
| "grad_norm": 23.0717716217041, |
| "learning_rate": 1.7459999999999999e-06, |
| "loss": 10.3917, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.0005832916458229114, |
| "grad_norm": 23.079936981201172, |
| "learning_rate": 1.749e-06, |
| "loss": 9.4, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.0005842921460730365, |
| "grad_norm": 21.109201431274414, |
| "learning_rate": 1.752e-06, |
| "loss": 11.2249, |
| "step": 584 |
| }, |
| { |
| "completion_length": 236.4791717529297, |
| "epoch": 0.0005852926463231616, |
| "grad_norm": 15.436999320983887, |
| "learning_rate": 1.755e-06, |
| "loss": -1.8151, |
| "reward": 0.1277281753718853, |
| "reward_std": 0.11017344892024994, |
| "rewards/sudoku_reward_func": 0.1277281753718853, |
| "step": 585, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005862931465732866, |
| "grad_norm": 14.275968551635742, |
| "learning_rate": 1.758e-06, |
| "loss": -1.466, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.0005872936468234117, |
| "grad_norm": 15.81253719329834, |
| "learning_rate": 1.761e-06, |
| "loss": -2.2609, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.0005882941470735368, |
| "grad_norm": 14.601688385009766, |
| "learning_rate": 1.764e-06, |
| "loss": -1.7179, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.0005892946473236619, |
| "grad_norm": 14.18327522277832, |
| "learning_rate": 1.767e-06, |
| "loss": -1.9194, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.0005902951475737869, |
| "grad_norm": 14.13171672821045, |
| "learning_rate": 1.77e-06, |
| "loss": -1.6122, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.000591295647823912, |
| "grad_norm": 17.244855880737305, |
| "learning_rate": 1.773e-06, |
| "loss": -2.3572, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.000592296148074037, |
| "grad_norm": 15.741546630859375, |
| "learning_rate": 1.776e-06, |
| "loss": -1.8189, |
| "step": 592 |
| }, |
| { |
| "completion_length": 235.0416717529297, |
| "epoch": 0.000593296648324162, |
| "grad_norm": 25.220417022705078, |
| "learning_rate": 1.779e-06, |
| "loss": 5.7432, |
| "reward": 0.2039930671453476, |
| "reward_std": 0.1585550457239151, |
| "rewards/sudoku_reward_func": 0.203993059694767, |
| "step": 593, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0005942971485742871, |
| "grad_norm": 24.24148941040039, |
| "learning_rate": 1.782e-06, |
| "loss": 7.6423, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.0005952976488244122, |
| "grad_norm": 33.06986999511719, |
| "learning_rate": 1.785e-06, |
| "loss": 9.136, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.0005962981490745373, |
| "grad_norm": 26.336284637451172, |
| "learning_rate": 1.7879999999999999e-06, |
| "loss": 6.7126, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.0005972986493246623, |
| "grad_norm": 25.30630111694336, |
| "learning_rate": 1.7909999999999999e-06, |
| "loss": 5.4191, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.0005982991495747874, |
| "grad_norm": 24.444787979125977, |
| "learning_rate": 1.7939999999999999e-06, |
| "loss": 7.3168, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.0005992996498249125, |
| "grad_norm": 31.10487174987793, |
| "learning_rate": 1.797e-06, |
| "loss": 8.7812, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.0006003001500750375, |
| "grad_norm": 24.562986373901367, |
| "learning_rate": 1.8e-06, |
| "loss": 6.3572, |
| "step": 600 |
| }, |
| { |
| "completion_length": 239.6041717529297, |
| "epoch": 0.0006013006503251626, |
| "grad_norm": 17.44729232788086, |
| "learning_rate": 1.803e-06, |
| "loss": -1.0856, |
| "reward": 0.15695270895957947, |
| "reward_std": 0.1305704563856125, |
| "rewards/sudoku_reward_func": 0.15695270895957947, |
| "step": 601, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006023011505752877, |
| "grad_norm": 17.973352432250977, |
| "learning_rate": 1.806e-06, |
| "loss": -1.5651, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.0006033016508254128, |
| "grad_norm": 17.87987518310547, |
| "learning_rate": 1.809e-06, |
| "loss": -1.5749, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.0006043021510755377, |
| "grad_norm": 17.67951202392578, |
| "learning_rate": 1.812e-06, |
| "loss": -1.5796, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.0006053026513256628, |
| "grad_norm": 18.45180892944336, |
| "learning_rate": 1.815e-06, |
| "loss": -1.2079, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.0006063031515757879, |
| "grad_norm": 20.35084342956543, |
| "learning_rate": 1.818e-06, |
| "loss": -1.6413, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.0006073036518259129, |
| "grad_norm": 17.7126407623291, |
| "learning_rate": 1.821e-06, |
| "loss": -1.7646, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.000608304152076038, |
| "grad_norm": 17.698877334594727, |
| "learning_rate": 1.824e-06, |
| "loss": -1.7541, |
| "step": 608 |
| }, |
| { |
| "completion_length": 224.4791717529297, |
| "epoch": 0.0006093046523261631, |
| "grad_norm": 31.530330657958984, |
| "learning_rate": 1.827e-06, |
| "loss": 12.5315, |
| "reward": 0.21771661192178726, |
| "reward_std": 0.2013006955385208, |
| "rewards/sudoku_reward_func": 0.21771660447120667, |
| "step": 609, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006103051525762882, |
| "grad_norm": 29.887935638427734, |
| "learning_rate": 1.83e-06, |
| "loss": 14.9137, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.0006113056528264132, |
| "grad_norm": 33.39522171020508, |
| "learning_rate": 1.833e-06, |
| "loss": 15.9311, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.0006123061530765383, |
| "grad_norm": 32.336585998535156, |
| "learning_rate": 1.836e-06, |
| "loss": 12.8304, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.0006133066533266634, |
| "grad_norm": 33.42409133911133, |
| "learning_rate": 1.839e-06, |
| "loss": 12.3461, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.0006143071535767884, |
| "grad_norm": 31.57464027404785, |
| "learning_rate": 1.8420000000000001e-06, |
| "loss": 14.4472, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.0006153076538269135, |
| "grad_norm": 30.877559661865234, |
| "learning_rate": 1.8450000000000001e-06, |
| "loss": 15.3655, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.0006163081540770385, |
| "grad_norm": 34.25657272338867, |
| "learning_rate": 1.848e-06, |
| "loss": 12.2993, |
| "step": 616 |
| }, |
| { |
| "completion_length": 242.52084350585938, |
| "epoch": 0.0006173086543271635, |
| "grad_norm": 19.929956436157227, |
| "learning_rate": 1.851e-06, |
| "loss": 5.3496, |
| "reward": 0.17189379036426544, |
| "reward_std": 0.1432012841105461, |
| "rewards/sudoku_reward_func": 0.17189379036426544, |
| "step": 617, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006183091545772886, |
| "grad_norm": 21.747644424438477, |
| "learning_rate": 1.854e-06, |
| "loss": 3.4253, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.0006193096548274137, |
| "grad_norm": 19.652868270874023, |
| "learning_rate": 1.857e-06, |
| "loss": 4.5252, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.0006203101550775388, |
| "grad_norm": 18.432842254638672, |
| "learning_rate": 1.86e-06, |
| "loss": 4.9895, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.0006213106553276638, |
| "grad_norm": 19.793832778930664, |
| "learning_rate": 1.863e-06, |
| "loss": 5.1259, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.0006223111555777889, |
| "grad_norm": 24.654544830322266, |
| "learning_rate": 1.866e-06, |
| "loss": 3.2193, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.000623311655827914, |
| "grad_norm": 21.008930206298828, |
| "learning_rate": 1.869e-06, |
| "loss": 4.2154, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.000624312156078039, |
| "grad_norm": 19.32270622253418, |
| "learning_rate": 1.872e-06, |
| "loss": 4.7233, |
| "step": 624 |
| }, |
| { |
| "completion_length": 252.43750762939453, |
| "epoch": 0.0006253126563281641, |
| "grad_norm": 19.41009521484375, |
| "learning_rate": 1.875e-06, |
| "loss": -6.2214, |
| "reward": 0.14897486940026283, |
| "reward_std": 0.13010858744382858, |
| "rewards/sudoku_reward_func": 0.14897486940026283, |
| "step": 625, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006263131565782892, |
| "grad_norm": 18.7078914642334, |
| "learning_rate": 1.878e-06, |
| "loss": -5.9653, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.0006273136568284143, |
| "grad_norm": 16.962142944335938, |
| "learning_rate": 1.881e-06, |
| "loss": -4.7354, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.0006283141570785392, |
| "grad_norm": 18.850299835205078, |
| "learning_rate": 1.884e-06, |
| "loss": -6.3041, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.0006293146573286643, |
| "grad_norm": 23.01152229309082, |
| "learning_rate": 1.887e-06, |
| "loss": -6.3494, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.0006303151575787894, |
| "grad_norm": 18.785537719726562, |
| "learning_rate": 1.8900000000000001e-06, |
| "loss": -6.116, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.0006313156578289144, |
| "grad_norm": 19.264942169189453, |
| "learning_rate": 1.8930000000000001e-06, |
| "loss": -4.9139, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.0006323161580790395, |
| "grad_norm": 17.685102462768555, |
| "learning_rate": 1.8960000000000001e-06, |
| "loss": -6.5377, |
| "step": 632 |
| }, |
| { |
| "completion_length": 254.0625, |
| "epoch": 0.0006333166583291646, |
| "grad_norm": 16.194541931152344, |
| "learning_rate": 1.8990000000000002e-06, |
| "loss": -0.5502, |
| "reward": 0.1833701804280281, |
| "reward_std": 0.12051501497626305, |
| "rewards/sudoku_reward_func": 0.1833701729774475, |
| "step": 633, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006343171585792897, |
| "grad_norm": 17.165401458740234, |
| "learning_rate": 1.9020000000000002e-06, |
| "loss": 0.6505, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.0006353176588294147, |
| "grad_norm": 18.674211502075195, |
| "learning_rate": 1.905e-06, |
| "loss": -0.3794, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.0006363181590795398, |
| "grad_norm": 17.070940017700195, |
| "learning_rate": 1.908e-06, |
| "loss": 0.2266, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.0006373186593296649, |
| "grad_norm": 15.575688362121582, |
| "learning_rate": 1.911e-06, |
| "loss": -0.7375, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.0006383191595797899, |
| "grad_norm": 23.437768936157227, |
| "learning_rate": 1.9140000000000002e-06, |
| "loss": 0.5262, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.0006393196598299149, |
| "grad_norm": 18.74703598022461, |
| "learning_rate": 1.917e-06, |
| "loss": -0.6114, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.00064032016008004, |
| "grad_norm": 16.295331954956055, |
| "learning_rate": 1.9200000000000003e-06, |
| "loss": 0.0056, |
| "step": 640 |
| }, |
| { |
| "completion_length": 251.45833587646484, |
| "epoch": 0.0006413206603301651, |
| "grad_norm": 21.831514358520508, |
| "learning_rate": 1.923e-06, |
| "loss": 0.2665, |
| "reward": 0.17208169400691986, |
| "reward_std": 0.14966098219156265, |
| "rewards/sudoku_reward_func": 0.17208168655633926, |
| "step": 641, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006423211605802901, |
| "grad_norm": 24.15346336364746, |
| "learning_rate": 1.9260000000000003e-06, |
| "loss": -0.7073, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.0006433216608304152, |
| "grad_norm": 21.65900993347168, |
| "learning_rate": 1.929e-06, |
| "loss": -1.115, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.0006443221610805403, |
| "grad_norm": 23.846370697021484, |
| "learning_rate": 1.9320000000000003e-06, |
| "loss": 0.2187, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.0006453226613306653, |
| "grad_norm": 21.314346313476562, |
| "learning_rate": 1.935e-06, |
| "loss": -0.0856, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.0006463231615807904, |
| "grad_norm": 25.002071380615234, |
| "learning_rate": 1.938e-06, |
| "loss": -1.0247, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.0006473236618309155, |
| "grad_norm": 21.471878051757812, |
| "learning_rate": 1.941e-06, |
| "loss": -1.4162, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.0006483241620810406, |
| "grad_norm": 23.55864715576172, |
| "learning_rate": 1.944e-06, |
| "loss": -0.163, |
| "step": 648 |
| }, |
| { |
| "completion_length": 250.5, |
| "epoch": 0.0006493246623311656, |
| "grad_norm": 19.517107009887695, |
| "learning_rate": 1.947e-06, |
| "loss": -1.1045, |
| "reward": 0.18660564720630646, |
| "reward_std": 0.1325959712266922, |
| "rewards/sudoku_reward_func": 0.18660564720630646, |
| "step": 649, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006503251625812907, |
| "grad_norm": 18.44375991821289, |
| "learning_rate": 1.95e-06, |
| "loss": -0.3916, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.0006513256628314157, |
| "grad_norm": 22.07648468017578, |
| "learning_rate": 1.953e-06, |
| "loss": -0.2807, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.0006523261630815407, |
| "grad_norm": 23.970638275146484, |
| "learning_rate": 1.956e-06, |
| "loss": -2.3437, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.0006533266633316658, |
| "grad_norm": 21.739980697631836, |
| "learning_rate": 1.9590000000000002e-06, |
| "loss": -1.5197, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.0006543271635817909, |
| "grad_norm": 21.033462524414062, |
| "learning_rate": 1.962e-06, |
| "loss": -0.7898, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.0006553276638319159, |
| "grad_norm": 23.045095443725586, |
| "learning_rate": 1.9650000000000002e-06, |
| "loss": -0.7434, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.000656328164082041, |
| "grad_norm": 24.260587692260742, |
| "learning_rate": 1.968e-06, |
| "loss": -2.9049, |
| "step": 656 |
| }, |
| { |
| "completion_length": 248.58334350585938, |
| "epoch": 0.0006573286643321661, |
| "grad_norm": 25.21026039123535, |
| "learning_rate": 1.9710000000000003e-06, |
| "loss": -5.561, |
| "reward": 0.20556382834911346, |
| "reward_std": 0.17001141607761383, |
| "rewards/sudoku_reward_func": 0.20556382089853287, |
| "step": 657, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006583291645822912, |
| "grad_norm": 25.44139862060547, |
| "learning_rate": 1.974e-06, |
| "loss": -4.1767, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.0006593296648324162, |
| "grad_norm": 27.466651916503906, |
| "learning_rate": 1.9770000000000003e-06, |
| "loss": -6.3143, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.0006603301650825413, |
| "grad_norm": 27.116817474365234, |
| "learning_rate": 1.98e-06, |
| "loss": -5.6863, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.0006613306653326664, |
| "grad_norm": 26.143383026123047, |
| "learning_rate": 1.9830000000000003e-06, |
| "loss": -5.8748, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.0006623311655827913, |
| "grad_norm": 30.05205726623535, |
| "learning_rate": 1.986e-06, |
| "loss": -4.5714, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.0006633316658329164, |
| "grad_norm": 28.53112030029297, |
| "learning_rate": 1.9890000000000004e-06, |
| "loss": -6.6444, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.0006643321660830415, |
| "grad_norm": 27.286012649536133, |
| "learning_rate": 1.992e-06, |
| "loss": -6.099, |
| "step": 664 |
| }, |
| { |
| "completion_length": 242.6041717529297, |
| "epoch": 0.0006653326663331666, |
| "grad_norm": 25.060420989990234, |
| "learning_rate": 1.995e-06, |
| "loss": -2.4716, |
| "reward": 0.2606647089123726, |
| "reward_std": 0.1507035493850708, |
| "rewards/sudoku_reward_func": 0.2606647089123726, |
| "step": 665, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006663331665832916, |
| "grad_norm": 24.94710922241211, |
| "learning_rate": 1.998e-06, |
| "loss": -2.414, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.0006673336668334167, |
| "grad_norm": 24.827306747436523, |
| "learning_rate": 2.001e-06, |
| "loss": -3.3365, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.0006683341670835418, |
| "grad_norm": 24.628686904907227, |
| "learning_rate": 2.004e-06, |
| "loss": -3.3505, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.0006693346673336668, |
| "grad_norm": 24.80254364013672, |
| "learning_rate": 2.007e-06, |
| "loss": -2.7403, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.0006703351675837919, |
| "grad_norm": 24.99594497680664, |
| "learning_rate": 2.0100000000000002e-06, |
| "loss": -2.6696, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.000671335667833917, |
| "grad_norm": 27.10658073425293, |
| "learning_rate": 2.013e-06, |
| "loss": -3.8388, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.0006723361680840421, |
| "grad_norm": 29.956634521484375, |
| "learning_rate": 2.0160000000000003e-06, |
| "loss": -3.6414, |
| "step": 672 |
| }, |
| { |
| "completion_length": 247.64584350585938, |
| "epoch": 0.000673336668334167, |
| "grad_norm": 26.138870239257812, |
| "learning_rate": 2.019e-06, |
| "loss": 5.4727, |
| "reward": 0.18890543282032013, |
| "reward_std": 0.1443747878074646, |
| "rewards/sudoku_reward_func": 0.18890542536973953, |
| "step": 673, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006743371685842921, |
| "grad_norm": 27.671852111816406, |
| "learning_rate": 2.0220000000000003e-06, |
| "loss": 6.6836, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.0006753376688344172, |
| "grad_norm": 30.31881332397461, |
| "learning_rate": 2.025e-06, |
| "loss": 5.5997, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.0006763381690845422, |
| "grad_norm": 23.821044921875, |
| "learning_rate": 2.0280000000000003e-06, |
| "loss": 4.0779, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.0006773386693346673, |
| "grad_norm": 24.792219161987305, |
| "learning_rate": 2.031e-06, |
| "loss": 5.4354, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.0006783391695847924, |
| "grad_norm": 28.62071990966797, |
| "learning_rate": 2.0340000000000003e-06, |
| "loss": 6.375, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.0006793396698349175, |
| "grad_norm": 24.46523666381836, |
| "learning_rate": 2.037e-06, |
| "loss": 5.4811, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.0006803401700850425, |
| "grad_norm": 23.84539031982422, |
| "learning_rate": 2.0400000000000004e-06, |
| "loss": 3.906, |
| "step": 680 |
| }, |
| { |
| "completion_length": 255.14584350585938, |
| "epoch": 0.0006813406703351676, |
| "grad_norm": 29.295547485351562, |
| "learning_rate": 2.043e-06, |
| "loss": -4.581, |
| "reward": 0.22625061869621277, |
| "reward_std": 0.17611468583345413, |
| "rewards/sudoku_reward_func": 0.22625060379505157, |
| "step": 681, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006823411705852927, |
| "grad_norm": 30.2603759765625, |
| "learning_rate": 2.0460000000000004e-06, |
| "loss": -4.5551, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.0006833416708354177, |
| "grad_norm": 35.28757095336914, |
| "learning_rate": 2.049e-06, |
| "loss": -5.3189, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.0006843421710855428, |
| "grad_norm": 34.68866729736328, |
| "learning_rate": 2.052e-06, |
| "loss": -5.9553, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.0006853426713356679, |
| "grad_norm": 29.826597213745117, |
| "learning_rate": 2.0550000000000002e-06, |
| "loss": -4.8571, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.000686343171585793, |
| "grad_norm": 35.04398727416992, |
| "learning_rate": 2.058e-06, |
| "loss": -4.8504, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.0006873436718359179, |
| "grad_norm": 31.422697067260742, |
| "learning_rate": 2.0610000000000003e-06, |
| "loss": -5.5763, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.000688344172086043, |
| "grad_norm": 32.0524787902832, |
| "learning_rate": 2.064e-06, |
| "loss": -6.1022, |
| "step": 688 |
| }, |
| { |
| "completion_length": 244.89583587646484, |
| "epoch": 0.0006893446723361681, |
| "grad_norm": 29.540935516357422, |
| "learning_rate": 2.067e-06, |
| "loss": -1.289, |
| "reward": 0.1987847313284874, |
| "reward_std": 0.13025881350040436, |
| "rewards/sudoku_reward_func": 0.1987847313284874, |
| "step": 689, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006903451725862931, |
| "grad_norm": 28.919418334960938, |
| "learning_rate": 2.07e-06, |
| "loss": 0.1461, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.0006913456728364182, |
| "grad_norm": 23.043699264526367, |
| "learning_rate": 2.073e-06, |
| "loss": -0.6692, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.0006923461730865433, |
| "grad_norm": 23.756433486938477, |
| "learning_rate": 2.0759999999999997e-06, |
| "loss": 0.7958, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.0006933466733366683, |
| "grad_norm": 29.989591598510742, |
| "learning_rate": 2.079e-06, |
| "loss": -1.4441, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.0006943471735867934, |
| "grad_norm": 31.252445220947266, |
| "learning_rate": 2.0819999999999997e-06, |
| "loss": -0.1056, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.0006953476738369185, |
| "grad_norm": 23.959022521972656, |
| "learning_rate": 2.085e-06, |
| "loss": -0.9871, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.0006963481740870436, |
| "grad_norm": 22.977869033813477, |
| "learning_rate": 2.0879999999999997e-06, |
| "loss": 0.6185, |
| "step": 696 |
| }, |
| { |
| "completion_length": 252.7916717529297, |
| "epoch": 0.0006973486743371685, |
| "grad_norm": 24.07022476196289, |
| "learning_rate": 2.091e-06, |
| "loss": -3.2906, |
| "reward": 0.2117680013179779, |
| "reward_std": 0.11290831118822098, |
| "rewards/sudoku_reward_func": 0.2117679864168167, |
| "step": 697, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0006983491745872936, |
| "grad_norm": 17.388952255249023, |
| "learning_rate": 2.0939999999999998e-06, |
| "loss": -3.8824, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.0006993496748374187, |
| "grad_norm": 25.840599060058594, |
| "learning_rate": 2.097e-06, |
| "loss": -0.2673, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.0007003501750875437, |
| "grad_norm": 25.69615364074707, |
| "learning_rate": 2.1e-06, |
| "loss": -1.1923, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.0007013506753376688, |
| "grad_norm": 23.536897659301758, |
| "learning_rate": 2.103e-06, |
| "loss": -3.5789, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.0007023511755877939, |
| "grad_norm": 17.143381118774414, |
| "learning_rate": 2.106e-06, |
| "loss": -4.0471, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.000703351675837919, |
| "grad_norm": 27.47609519958496, |
| "learning_rate": 2.109e-06, |
| "loss": -0.4917, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.000704352176088044, |
| "grad_norm": 37.165225982666016, |
| "learning_rate": 2.112e-06, |
| "loss": -1.5096, |
| "step": 704 |
| }, |
| { |
| "completion_length": 245.81250762939453, |
| "epoch": 0.0007053526763381691, |
| "grad_norm": 24.30489158630371, |
| "learning_rate": 2.115e-06, |
| "loss": -4.6945, |
| "reward": 0.167658731341362, |
| "reward_std": 0.11763971298933029, |
| "rewards/sudoku_reward_func": 0.167658731341362, |
| "step": 705, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007063531765882942, |
| "grad_norm": 23.941953659057617, |
| "learning_rate": 2.118e-06, |
| "loss": -4.4556, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.0007073536768384192, |
| "grad_norm": 24.57003402709961, |
| "learning_rate": 2.121e-06, |
| "loss": -4.9353, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.0007083541770885443, |
| "grad_norm": 26.972028732299805, |
| "learning_rate": 2.124e-06, |
| "loss": -6.7715, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.0007093546773386693, |
| "grad_norm": 27.066307067871094, |
| "learning_rate": 2.127e-06, |
| "loss": -4.7776, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.0007103551775887944, |
| "grad_norm": 21.551305770874023, |
| "learning_rate": 2.13e-06, |
| "loss": -4.4619, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.0007113556778389194, |
| "grad_norm": 25.452531814575195, |
| "learning_rate": 2.133e-06, |
| "loss": -5.1383, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.0007123561780890445, |
| "grad_norm": 27.460329055786133, |
| "learning_rate": 2.136e-06, |
| "loss": -7.0182, |
| "step": 712 |
| }, |
| { |
| "completion_length": 248.52083587646484, |
| "epoch": 0.0007133566783391696, |
| "grad_norm": 28.8990478515625, |
| "learning_rate": 2.1389999999999998e-06, |
| "loss": -3.1244, |
| "reward": 0.21841933578252792, |
| "reward_std": 0.14541570469737053, |
| "rewards/sudoku_reward_func": 0.21841932088136673, |
| "step": 713, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007143571785892946, |
| "grad_norm": 26.447952270507812, |
| "learning_rate": 2.142e-06, |
| "loss": -1.9208, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.0007153576788394197, |
| "grad_norm": 31.950580596923828, |
| "learning_rate": 2.145e-06, |
| "loss": -2.0995, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.0007163581790895448, |
| "grad_norm": 24.73855209350586, |
| "learning_rate": 2.148e-06, |
| "loss": -4.2209, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.0007173586793396699, |
| "grad_norm": 27.81092071533203, |
| "learning_rate": 2.151e-06, |
| "loss": -3.2356, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.0007183591795897949, |
| "grad_norm": 28.132692337036133, |
| "learning_rate": 2.154e-06, |
| "loss": -2.1771, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.00071935967983992, |
| "grad_norm": 29.067340850830078, |
| "learning_rate": 2.157e-06, |
| "loss": -2.3805, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.0007203601800900451, |
| "grad_norm": 25.294504165649414, |
| "learning_rate": 2.16e-06, |
| "loss": -4.5135, |
| "step": 720 |
| }, |
| { |
| "completion_length": 248.2916717529297, |
| "epoch": 0.00072136068034017, |
| "grad_norm": 26.602191925048828, |
| "learning_rate": 2.163e-06, |
| "loss": -16.483, |
| "reward": 0.2065032720565796, |
| "reward_std": 0.13350703567266464, |
| "rewards/sudoku_reward_func": 0.2065032720565796, |
| "step": 721, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.0007223611805902951, |
| "grad_norm": 29.65574836730957, |
| "learning_rate": 2.166e-06, |
| "loss": -14.9039, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.0007233616808404202, |
| "grad_norm": 31.21291732788086, |
| "learning_rate": 2.169e-06, |
| "loss": -15.1445, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.0007243621810905453, |
| "grad_norm": 31.365436553955078, |
| "learning_rate": 2.172e-06, |
| "loss": -14.1896, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.0007253626813406703, |
| "grad_norm": 26.096881866455078, |
| "learning_rate": 2.175e-06, |
| "loss": -16.666, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.0007263631815907954, |
| "grad_norm": 31.20346450805664, |
| "learning_rate": 2.178e-06, |
| "loss": -15.4398, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.0007273636818409205, |
| "grad_norm": 30.130516052246094, |
| "learning_rate": 2.181e-06, |
| "loss": -15.3874, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.0007283641820910455, |
| "grad_norm": 31.089357376098633, |
| "learning_rate": 2.184e-06, |
| "loss": -14.4237, |
| "step": 728 |
| }, |
| { |
| "completion_length": 243.2916717529297, |
| "epoch": 0.0007293646823411706, |
| "grad_norm": 28.103757858276367, |
| "learning_rate": 2.187e-06, |
| "loss": 3.8517, |
| "reward": 0.23788856714963913, |
| "reward_std": 0.15394366532564163, |
| "rewards/sudoku_reward_func": 0.23788856714963913, |
| "step": 729, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007303651825912957, |
| "grad_norm": 32.89771270751953, |
| "learning_rate": 2.19e-06, |
| "loss": 5.6428, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.0007313656828414207, |
| "grad_norm": 26.92860221862793, |
| "learning_rate": 2.193e-06, |
| "loss": 3.6873, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.0007323661830915458, |
| "grad_norm": 35.90951919555664, |
| "learning_rate": 2.196e-06, |
| "loss": 6.3758, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.0007333666833416708, |
| "grad_norm": 30.617719650268555, |
| "learning_rate": 2.199e-06, |
| "loss": 3.6392, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.0007343671835917959, |
| "grad_norm": 32.82959747314453, |
| "learning_rate": 2.202e-06, |
| "loss": 5.2549, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.0007353676838419209, |
| "grad_norm": 33.0678596496582, |
| "learning_rate": 2.205e-06, |
| "loss": 3.1626, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.000736368184092046, |
| "grad_norm": 34.600852966308594, |
| "learning_rate": 2.208e-06, |
| "loss": 5.8968, |
| "step": 736 |
| }, |
| { |
| "completion_length": 250.52084350585938, |
| "epoch": 0.0007373686843421711, |
| "grad_norm": 42.83121109008789, |
| "learning_rate": 2.211e-06, |
| "loss": 1.0832, |
| "reward": 0.16931217163801193, |
| "reward_std": 0.15145771950483322, |
| "rewards/sudoku_reward_func": 0.16931216418743134, |
| "step": 737, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007383691845922961, |
| "grad_norm": 32.792449951171875, |
| "learning_rate": 2.214e-06, |
| "loss": 1.4861, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.0007393696848424212, |
| "grad_norm": 31.13545799255371, |
| "learning_rate": 2.217e-06, |
| "loss": 2.7522, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.0007403701850925463, |
| "grad_norm": 25.863052368164062, |
| "learning_rate": 2.22e-06, |
| "loss": 1.8101, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.0007413706853426714, |
| "grad_norm": 35.75248718261719, |
| "learning_rate": 2.223e-06, |
| "loss": 1.0109, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.0007423711855927964, |
| "grad_norm": 32.12532424926758, |
| "learning_rate": 2.226e-06, |
| "loss": 1.1726, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.0007433716858429215, |
| "grad_norm": 29.697542190551758, |
| "learning_rate": 2.229e-06, |
| "loss": 2.4716, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.0007443721860930466, |
| "grad_norm": 27.35248374938965, |
| "learning_rate": 2.232e-06, |
| "loss": 1.5878, |
| "step": 744 |
| }, |
| { |
| "completion_length": 249.3541717529297, |
| "epoch": 0.0007453726863431715, |
| "grad_norm": 23.706253051757812, |
| "learning_rate": 2.235e-06, |
| "loss": 6.3962, |
| "reward": 0.1799355298280716, |
| "reward_std": 0.12801172584295273, |
| "rewards/sudoku_reward_func": 0.179935522377491, |
| "step": 745, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007463731865932966, |
| "grad_norm": 23.791215896606445, |
| "learning_rate": 2.238e-06, |
| "loss": 5.8298, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.0007473736868434217, |
| "grad_norm": 23.94332504272461, |
| "learning_rate": 2.2410000000000002e-06, |
| "loss": 7.1122, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.0007483741870935468, |
| "grad_norm": 24.198583602905273, |
| "learning_rate": 2.244e-06, |
| "loss": 5.6156, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.0007493746873436718, |
| "grad_norm": 25.003089904785156, |
| "learning_rate": 2.2470000000000003e-06, |
| "loss": 6.3101, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.0007503751875937969, |
| "grad_norm": 23.13814926147461, |
| "learning_rate": 2.25e-06, |
| "loss": 5.5756, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.000751375687843922, |
| "grad_norm": 23.709775924682617, |
| "learning_rate": 2.253e-06, |
| "loss": 6.8625, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.000752376188094047, |
| "grad_norm": 33.373619079589844, |
| "learning_rate": 2.256e-06, |
| "loss": 5.3697, |
| "step": 752 |
| }, |
| { |
| "completion_length": 249.33334350585938, |
| "epoch": 0.0007533766883441721, |
| "grad_norm": 36.459205627441406, |
| "learning_rate": 2.259e-06, |
| "loss": 1.3885, |
| "reward": 0.181175597012043, |
| "reward_std": 0.15113066881895065, |
| "rewards/sudoku_reward_func": 0.1811755895614624, |
| "step": 753, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007543771885942972, |
| "grad_norm": 33.78456115722656, |
| "learning_rate": 2.262e-06, |
| "loss": 3.3189, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.0007553776888444223, |
| "grad_norm": 33.32498550415039, |
| "learning_rate": 2.265e-06, |
| "loss": 0.7809, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.0007563781890945472, |
| "grad_norm": 33.475223541259766, |
| "learning_rate": 2.268e-06, |
| "loss": 0.8343, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.0007573786893446723, |
| "grad_norm": 32.76923370361328, |
| "learning_rate": 2.271e-06, |
| "loss": 1.1428, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.0007583791895947974, |
| "grad_norm": 28.283016204833984, |
| "learning_rate": 2.274e-06, |
| "loss": 3.1434, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.0007593796898449224, |
| "grad_norm": 35.577857971191406, |
| "learning_rate": 2.277e-06, |
| "loss": 0.4024, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.0007603801900950475, |
| "grad_norm": 32.67893600463867, |
| "learning_rate": 2.28e-06, |
| "loss": 0.3714, |
| "step": 760 |
| }, |
| { |
| "completion_length": 247.0625, |
| "epoch": 0.0007613806903451726, |
| "grad_norm": 26.973373413085938, |
| "learning_rate": 2.283e-06, |
| "loss": -4.7676, |
| "reward": 0.20238097012043, |
| "reward_std": 0.12780283018946648, |
| "rewards/sudoku_reward_func": 0.2023809626698494, |
| "step": 761, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007623811905952977, |
| "grad_norm": 26.879213333129883, |
| "learning_rate": 2.2860000000000002e-06, |
| "loss": -4.5721, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.0007633816908454227, |
| "grad_norm": 28.65926170349121, |
| "learning_rate": 2.289e-06, |
| "loss": -4.6671, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.0007643821910955478, |
| "grad_norm": 24.28839874267578, |
| "learning_rate": 2.2920000000000002e-06, |
| "loss": -5.1815, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.0007653826913456729, |
| "grad_norm": 26.704910278320312, |
| "learning_rate": 2.295e-06, |
| "loss": -5.2439, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.0007663831915957979, |
| "grad_norm": 25.38524055480957, |
| "learning_rate": 2.2980000000000003e-06, |
| "loss": -4.8948, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.000767383691845923, |
| "grad_norm": 29.275630950927734, |
| "learning_rate": 2.301e-06, |
| "loss": -5.139, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.000768384192096048, |
| "grad_norm": 22.713115692138672, |
| "learning_rate": 2.3040000000000003e-06, |
| "loss": -5.7018, |
| "step": 768 |
| }, |
| { |
| "completion_length": 249.4375, |
| "epoch": 0.0007693846923461731, |
| "grad_norm": 43.691341400146484, |
| "learning_rate": 2.307e-06, |
| "loss": 3.4159, |
| "reward": 0.20924274623394012, |
| "reward_std": 0.1474134773015976, |
| "rewards/sudoku_reward_func": 0.20924272388219833, |
| "step": 769, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007703851925962981, |
| "grad_norm": 27.533424377441406, |
| "learning_rate": 2.31e-06, |
| "loss": 2.5735, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.0007713856928464232, |
| "grad_norm": 25.509496688842773, |
| "learning_rate": 2.313e-06, |
| "loss": 3.6815, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.0007723861930965483, |
| "grad_norm": 24.885425567626953, |
| "learning_rate": 2.316e-06, |
| "loss": 3.8332, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.0007733866933466733, |
| "grad_norm": 35.75920867919922, |
| "learning_rate": 2.319e-06, |
| "loss": 3.1062, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.0007743871935967984, |
| "grad_norm": 29.339269638061523, |
| "learning_rate": 2.322e-06, |
| "loss": 2.5655, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.0007753876938469235, |
| "grad_norm": 23.58751678466797, |
| "learning_rate": 2.325e-06, |
| "loss": 3.6055, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.0007763881940970485, |
| "grad_norm": 23.617435455322266, |
| "learning_rate": 2.328e-06, |
| "loss": 3.6078, |
| "step": 776 |
| }, |
| { |
| "completion_length": 254.875, |
| "epoch": 0.0007773886943471736, |
| "grad_norm": 35.409183502197266, |
| "learning_rate": 2.3310000000000002e-06, |
| "loss": -14.0541, |
| "reward": 0.18220899999141693, |
| "reward_std": 0.1431182250380516, |
| "rewards/sudoku_reward_func": 0.18220899999141693, |
| "step": 777, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007783891945972987, |
| "grad_norm": 41.375572204589844, |
| "learning_rate": 2.334e-06, |
| "loss": -13.7587, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.0007793896948474238, |
| "grad_norm": 32.68427276611328, |
| "learning_rate": 2.3370000000000002e-06, |
| "loss": -14.2352, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.0007803901950975487, |
| "grad_norm": 39.738059997558594, |
| "learning_rate": 2.34e-06, |
| "loss": -14.0971, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.0007813906953476738, |
| "grad_norm": 38.458805084228516, |
| "learning_rate": 2.3430000000000003e-06, |
| "loss": -14.3691, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.0007823911955977989, |
| "grad_norm": 34.345001220703125, |
| "learning_rate": 2.346e-06, |
| "loss": -14.0191, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.0007833916958479239, |
| "grad_norm": 31.24772071838379, |
| "learning_rate": 2.3490000000000003e-06, |
| "loss": -14.7715, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.000784392196098049, |
| "grad_norm": 37.2220344543457, |
| "learning_rate": 2.352e-06, |
| "loss": -14.7264, |
| "step": 784 |
| }, |
| { |
| "completion_length": 252.14583587646484, |
| "epoch": 0.0007853926963481741, |
| "grad_norm": 34.27210998535156, |
| "learning_rate": 2.3550000000000003e-06, |
| "loss": -3.8389, |
| "reward": 0.23557373881340027, |
| "reward_std": 0.1432676911354065, |
| "rewards/sudoku_reward_func": 0.23557373881340027, |
| "step": 785, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007863931965982992, |
| "grad_norm": 44.486698150634766, |
| "learning_rate": 2.358e-06, |
| "loss": -3.4254, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.0007873936968484242, |
| "grad_norm": 32.860557556152344, |
| "learning_rate": 2.3610000000000003e-06, |
| "loss": -3.6627, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.0007883941970985493, |
| "grad_norm": 29.94903564453125, |
| "learning_rate": 2.364e-06, |
| "loss": -4.305, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.0007893946973486744, |
| "grad_norm": 29.018800735473633, |
| "learning_rate": 2.367e-06, |
| "loss": -4.0097, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.0007903951975987994, |
| "grad_norm": 42.00046920776367, |
| "learning_rate": 2.37e-06, |
| "loss": -3.9625, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.0007913956978489244, |
| "grad_norm": 32.21451950073242, |
| "learning_rate": 2.373e-06, |
| "loss": -4.1312, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.0007923961980990495, |
| "grad_norm": 31.023622512817383, |
| "learning_rate": 2.376e-06, |
| "loss": -4.6582, |
| "step": 792 |
| }, |
| { |
| "completion_length": 246.5416717529297, |
| "epoch": 0.0007933966983491746, |
| "grad_norm": 57.225624084472656, |
| "learning_rate": 2.379e-06, |
| "loss": -2.409, |
| "reward": 0.171502985060215, |
| "reward_std": 0.15528041124343872, |
| "rewards/sudoku_reward_func": 0.1715029776096344, |
| "step": 793, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0007943971985992996, |
| "grad_norm": 54.69853973388672, |
| "learning_rate": 2.3820000000000002e-06, |
| "loss": 0.6507, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.0007953976988494247, |
| "grad_norm": 31.700592041015625, |
| "learning_rate": 2.385e-06, |
| "loss": -0.2206, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.0007963981990995498, |
| "grad_norm": 34.947086334228516, |
| "learning_rate": 2.3880000000000003e-06, |
| "loss": -0.3802, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.0007973986993496748, |
| "grad_norm": 35.65658950805664, |
| "learning_rate": 2.391e-06, |
| "loss": -2.6027, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.0007983991995997999, |
| "grad_norm": 41.322940826416016, |
| "learning_rate": 2.3940000000000003e-06, |
| "loss": 0.2528, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.000799399699849925, |
| "grad_norm": 31.205036163330078, |
| "learning_rate": 2.397e-06, |
| "loss": -0.4945, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.0008004002001000501, |
| "grad_norm": 30.227001190185547, |
| "learning_rate": 2.4000000000000003e-06, |
| "loss": -0.8871, |
| "step": 800 |
| }, |
| { |
| "completion_length": 245.5, |
| "epoch": 0.0008014007003501751, |
| "grad_norm": 33.39512252807617, |
| "learning_rate": 2.403e-06, |
| "loss": -8.9157, |
| "reward": 0.1831597313284874, |
| "reward_std": 0.16883236914873123, |
| "rewards/sudoku_reward_func": 0.1831597313284874, |
| "step": 801, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008024012006003002, |
| "grad_norm": 34.010154724121094, |
| "learning_rate": 2.4060000000000003e-06, |
| "loss": -10.499, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.0008034017008504252, |
| "grad_norm": 32.17269515991211, |
| "learning_rate": 2.409e-06, |
| "loss": -8.7243, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.0008044022011005502, |
| "grad_norm": 32.730064392089844, |
| "learning_rate": 2.4120000000000004e-06, |
| "loss": -11.5972, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.0008054027013506753, |
| "grad_norm": 32.50396728515625, |
| "learning_rate": 2.415e-06, |
| "loss": -9.1913, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.0008064032016008004, |
| "grad_norm": 35.29380798339844, |
| "learning_rate": 2.4180000000000004e-06, |
| "loss": -11.002, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.0008074037018509255, |
| "grad_norm": 32.23307418823242, |
| "learning_rate": 2.421e-06, |
| "loss": -9.3948, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.0008084042021010505, |
| "grad_norm": 36.28053665161133, |
| "learning_rate": 2.4240000000000004e-06, |
| "loss": -12.0591, |
| "step": 808 |
| }, |
| { |
| "completion_length": 249.56250762939453, |
| "epoch": 0.0008094047023511756, |
| "grad_norm": 42.41727066040039, |
| "learning_rate": 2.4270000000000002e-06, |
| "loss": -10.5996, |
| "reward": 0.2089533880352974, |
| "reward_std": 0.11930705606937408, |
| "rewards/sudoku_reward_func": 0.2089533805847168, |
| "step": 809, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.0008104052026013007, |
| "grad_norm": 34.009498596191406, |
| "learning_rate": 2.43e-06, |
| "loss": -9.9286, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.0008114057028514257, |
| "grad_norm": 30.14313316345215, |
| "learning_rate": 2.4330000000000003e-06, |
| "loss": -12.0992, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.0008124062031015508, |
| "grad_norm": 37.952056884765625, |
| "learning_rate": 2.436e-06, |
| "loss": -10.552, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.0008134067033516759, |
| "grad_norm": 44.20967483520508, |
| "learning_rate": 2.439e-06, |
| "loss": -11.1506, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.0008144072036018008, |
| "grad_norm": 36.3494873046875, |
| "learning_rate": 2.442e-06, |
| "loss": -10.4949, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.0008154077038519259, |
| "grad_norm": 33.20271682739258, |
| "learning_rate": 2.445e-06, |
| "loss": -12.5342, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.000816408204102051, |
| "grad_norm": 39.655853271484375, |
| "learning_rate": 2.448e-06, |
| "loss": -11.3508, |
| "step": 816 |
| }, |
| { |
| "completion_length": 248.2916717529297, |
| "epoch": 0.0008174087043521761, |
| "grad_norm": 40.428096771240234, |
| "learning_rate": 2.451e-06, |
| "loss": -8.2092, |
| "reward": 0.2371031790971756, |
| "reward_std": 0.15679361671209335, |
| "rewards/sudoku_reward_func": 0.237103171646595, |
| "step": 817, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008184092046023011, |
| "grad_norm": 54.35465621948242, |
| "learning_rate": 2.4539999999999997e-06, |
| "loss": -7.2284, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.0008194097048524262, |
| "grad_norm": 37.36104965209961, |
| "learning_rate": 2.457e-06, |
| "loss": -7.3311, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.0008204102051025513, |
| "grad_norm": 42.316307067871094, |
| "learning_rate": 2.4599999999999997e-06, |
| "loss": -9.6601, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.0008214107053526763, |
| "grad_norm": 41.3543701171875, |
| "learning_rate": 2.463e-06, |
| "loss": -8.9068, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.0008224112056028014, |
| "grad_norm": 57.49537658691406, |
| "learning_rate": 2.4659999999999998e-06, |
| "loss": -7.369, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.0008234117058529265, |
| "grad_norm": 38.88594436645508, |
| "learning_rate": 2.469e-06, |
| "loss": -8.1573, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.0008244122061030516, |
| "grad_norm": 48.43411636352539, |
| "learning_rate": 2.472e-06, |
| "loss": -9.5986, |
| "step": 824 |
| }, |
| { |
| "completion_length": 233.1875, |
| "epoch": 0.0008254127063531766, |
| "grad_norm": 58.4598274230957, |
| "learning_rate": 2.475e-06, |
| "loss": -4.3023, |
| "reward": 0.2301587462425232, |
| "reward_std": 0.14847226440906525, |
| "rewards/sudoku_reward_func": 0.230158731341362, |
| "step": 825, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008264132066033016, |
| "grad_norm": 48.626136779785156, |
| "learning_rate": 2.478e-06, |
| "loss": -2.8084, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.0008274137068534267, |
| "grad_norm": 56.721961975097656, |
| "learning_rate": 2.481e-06, |
| "loss": -5.9076, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.0008284142071035517, |
| "grad_norm": 61.319942474365234, |
| "learning_rate": 2.484e-06, |
| "loss": -4.8713, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.0008294147073536768, |
| "grad_norm": 70.11338806152344, |
| "learning_rate": 2.487e-06, |
| "loss": -5.3066, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.0008304152076038019, |
| "grad_norm": 48.47807693481445, |
| "learning_rate": 2.49e-06, |
| "loss": -3.7843, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.000831415707853927, |
| "grad_norm": 49.2767333984375, |
| "learning_rate": 2.493e-06, |
| "loss": -7.192, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.000832416208104052, |
| "grad_norm": 65.00503540039062, |
| "learning_rate": 2.496e-06, |
| "loss": -6.7884, |
| "step": 832 |
| }, |
| { |
| "completion_length": 227.06250762939453, |
| "epoch": 0.0008334167083541771, |
| "grad_norm": 60.06196212768555, |
| "learning_rate": 2.499e-06, |
| "loss": 13.0532, |
| "reward": 0.19473380595445633, |
| "reward_std": 0.15059228241443634, |
| "rewards/sudoku_reward_func": 0.19473379850387573, |
| "step": 833, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008344172086043022, |
| "grad_norm": 88.75370025634766, |
| "learning_rate": 2.502e-06, |
| "loss": 14.0123, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.0008354177088544272, |
| "grad_norm": 64.58737182617188, |
| "learning_rate": 2.505e-06, |
| "loss": 13.2359, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.0008364182091045523, |
| "grad_norm": 74.0726089477539, |
| "learning_rate": 2.508e-06, |
| "loss": 13.5593, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.0008374187093546774, |
| "grad_norm": 65.56517791748047, |
| "learning_rate": 2.5109999999999998e-06, |
| "loss": 13.6362, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.0008384192096048025, |
| "grad_norm": 86.18720245361328, |
| "learning_rate": 2.514e-06, |
| "loss": 14.6564, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.0008394197098549274, |
| "grad_norm": 64.94049835205078, |
| "learning_rate": 2.5169999999999998e-06, |
| "loss": 13.2589, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.0008404202101050525, |
| "grad_norm": 70.9004898071289, |
| "learning_rate": 2.52e-06, |
| "loss": 13.0416, |
| "step": 840 |
| }, |
| { |
| "completion_length": 239.0, |
| "epoch": 0.0008414207103551776, |
| "grad_norm": 54.30680465698242, |
| "learning_rate": 2.523e-06, |
| "loss": 6.8374, |
| "reward": 0.2005208358168602, |
| "reward_std": 0.1658303141593933, |
| "rewards/sudoku_reward_func": 0.2005208358168602, |
| "step": 841, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008424212106053026, |
| "grad_norm": 52.91537857055664, |
| "learning_rate": 2.526e-06, |
| "loss": 6.5411, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.0008434217108554277, |
| "grad_norm": 68.12520599365234, |
| "learning_rate": 2.529e-06, |
| "loss": 3.6742, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.0008444222111055528, |
| "grad_norm": 64.33242797851562, |
| "learning_rate": 2.532e-06, |
| "loss": 6.8825, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.0008454227113556779, |
| "grad_norm": 54.45749282836914, |
| "learning_rate": 2.535e-06, |
| "loss": 6.8283, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.0008464232116058029, |
| "grad_norm": 48.27708435058594, |
| "learning_rate": 2.538e-06, |
| "loss": 5.9642, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.000847423711855928, |
| "grad_norm": 61.76557540893555, |
| "learning_rate": 2.541e-06, |
| "loss": 3.4451, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.0008484242121060531, |
| "grad_norm": 68.36261749267578, |
| "learning_rate": 2.544e-06, |
| "loss": 6.5138, |
| "step": 848 |
| }, |
| { |
| "completion_length": 243.37500762939453, |
| "epoch": 0.000849424712356178, |
| "grad_norm": 54.307151794433594, |
| "learning_rate": 2.547e-06, |
| "loss": -14.0519, |
| "reward": 0.21928737312555313, |
| "reward_std": 0.1534140184521675, |
| "rewards/sudoku_reward_func": 0.21928736567497253, |
| "step": 849, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008504252126063031, |
| "grad_norm": 53.22282028198242, |
| "learning_rate": 2.55e-06, |
| "loss": -14.7871, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.0008514257128564282, |
| "grad_norm": 54.85883712768555, |
| "learning_rate": 2.553e-06, |
| "loss": -14.8958, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.0008524262131065532, |
| "grad_norm": 44.00740432739258, |
| "learning_rate": 2.556e-06, |
| "loss": -15.3165, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.0008534267133566783, |
| "grad_norm": 47.5384521484375, |
| "learning_rate": 2.559e-06, |
| "loss": -14.4872, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.0008544272136068034, |
| "grad_norm": 56.67496109008789, |
| "learning_rate": 2.562e-06, |
| "loss": -15.1607, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.0008554277138569285, |
| "grad_norm": 54.683143615722656, |
| "learning_rate": 2.565e-06, |
| "loss": -15.3318, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.0008564282141070535, |
| "grad_norm": 48.874916076660156, |
| "learning_rate": 2.568e-06, |
| "loss": -15.6385, |
| "step": 856 |
| }, |
| { |
| "completion_length": 237.75000762939453, |
| "epoch": 0.0008574287143571786, |
| "grad_norm": 55.119384765625, |
| "learning_rate": 2.571e-06, |
| "loss": 7.5436, |
| "reward": 0.2546972781419754, |
| "reward_std": 0.16300001740455627, |
| "rewards/sudoku_reward_func": 0.2546972706913948, |
| "step": 857, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008584292146073037, |
| "grad_norm": 51.099952697753906, |
| "learning_rate": 2.574e-06, |
| "loss": 7.3429, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.0008594297148574287, |
| "grad_norm": 59.46302032470703, |
| "learning_rate": 2.577e-06, |
| "loss": 8.0542, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.0008604302151075538, |
| "grad_norm": 47.472633361816406, |
| "learning_rate": 2.58e-06, |
| "loss": 6.304, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.0008614307153576789, |
| "grad_norm": 49.4232063293457, |
| "learning_rate": 2.583e-06, |
| "loss": 7.2967, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.000862431215607804, |
| "grad_norm": 49.1822624206543, |
| "learning_rate": 2.586e-06, |
| "loss": 7.064, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.0008634317158579289, |
| "grad_norm": 56.62918472290039, |
| "learning_rate": 2.589e-06, |
| "loss": 7.7846, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.000864432216108054, |
| "grad_norm": 56.20520782470703, |
| "learning_rate": 2.592e-06, |
| "loss": 5.455, |
| "step": 864 |
| }, |
| { |
| "completion_length": 246.1041717529297, |
| "epoch": 0.0008654327163581791, |
| "grad_norm": 48.346553802490234, |
| "learning_rate": 2.595e-06, |
| "loss": -11.4518, |
| "reward": 0.20085152238607407, |
| "reward_std": 0.13320738822221756, |
| "rewards/sudoku_reward_func": 0.20085152238607407, |
| "step": 865, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008664332166083041, |
| "grad_norm": 52.97309112548828, |
| "learning_rate": 2.598e-06, |
| "loss": -10.5137, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.0008674337168584292, |
| "grad_norm": 41.53067398071289, |
| "learning_rate": 2.601e-06, |
| "loss": -13.1581, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.0008684342171085543, |
| "grad_norm": 50.84276580810547, |
| "learning_rate": 2.604e-06, |
| "loss": -10.5355, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.0008694347173586794, |
| "grad_norm": 49.1697998046875, |
| "learning_rate": 2.607e-06, |
| "loss": -12.0282, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.0008704352176088044, |
| "grad_norm": 45.84278106689453, |
| "learning_rate": 2.61e-06, |
| "loss": -11.2891, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.0008714357178589295, |
| "grad_norm": 36.355979919433594, |
| "learning_rate": 2.6130000000000002e-06, |
| "loss": -13.4586, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.0008724362181090546, |
| "grad_norm": 46.167972564697266, |
| "learning_rate": 2.616e-06, |
| "loss": -11.315, |
| "step": 872 |
| }, |
| { |
| "completion_length": 235.6041717529297, |
| "epoch": 0.0008734367183591795, |
| "grad_norm": 43.850440979003906, |
| "learning_rate": 2.6190000000000003e-06, |
| "loss": -18.2923, |
| "reward": 0.229662723839283, |
| "reward_std": 0.16215338557958603, |
| "rewards/sudoku_reward_func": 0.2296627163887024, |
| "step": 873, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008744372186093046, |
| "grad_norm": 51.63322830200195, |
| "learning_rate": 2.622e-06, |
| "loss": -21.9418, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.0008754377188594297, |
| "grad_norm": 44.274967193603516, |
| "learning_rate": 2.6250000000000003e-06, |
| "loss": -19.8107, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.0008764382191095548, |
| "grad_norm": 35.89291763305664, |
| "learning_rate": 2.628e-06, |
| "loss": -19.523, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.0008774387193596798, |
| "grad_norm": 42.89470291137695, |
| "learning_rate": 2.631e-06, |
| "loss": -18.7292, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.0008784392196098049, |
| "grad_norm": 50.21378707885742, |
| "learning_rate": 2.634e-06, |
| "loss": -22.1391, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.00087943971985993, |
| "grad_norm": 41.582706451416016, |
| "learning_rate": 2.637e-06, |
| "loss": -20.1133, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.000880440220110055, |
| "grad_norm": 35.33237838745117, |
| "learning_rate": 2.64e-06, |
| "loss": -19.9634, |
| "step": 880 |
| }, |
| { |
| "completion_length": 244.6666717529297, |
| "epoch": 0.0008814407203601801, |
| "grad_norm": 66.10181427001953, |
| "learning_rate": 2.643e-06, |
| "loss": -11.6534, |
| "reward": 0.2147516831755638, |
| "reward_std": 0.1552412137389183, |
| "rewards/sudoku_reward_func": 0.2147516831755638, |
| "step": 881, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008824412206103052, |
| "grad_norm": 55.456111907958984, |
| "learning_rate": 2.646e-06, |
| "loss": -13.2263, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.0008834417208604303, |
| "grad_norm": 52.62863540649414, |
| "learning_rate": 2.649e-06, |
| "loss": -12.9538, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.0008844422211105553, |
| "grad_norm": 35.29569625854492, |
| "learning_rate": 2.652e-06, |
| "loss": -10.0486, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.0008854427213606803, |
| "grad_norm": 57.16860580444336, |
| "learning_rate": 2.655e-06, |
| "loss": -11.931, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.0008864432216108054, |
| "grad_norm": 42.70516586303711, |
| "learning_rate": 2.6580000000000002e-06, |
| "loss": -13.5012, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.0008874437218609304, |
| "grad_norm": 48.49374008178711, |
| "learning_rate": 2.661e-06, |
| "loss": -13.4838, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.0008884442221110555, |
| "grad_norm": 37.89622116088867, |
| "learning_rate": 2.6640000000000002e-06, |
| "loss": -10.5131, |
| "step": 888 |
| }, |
| { |
| "completion_length": 252.64583587646484, |
| "epoch": 0.0008894447223611806, |
| "grad_norm": 61.70961380004883, |
| "learning_rate": 2.667e-06, |
| "loss": -9.5585, |
| "reward": 0.2173859253525734, |
| "reward_std": 0.15939748287200928, |
| "rewards/sudoku_reward_func": 0.2173859179019928, |
| "step": 889, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008904452226113056, |
| "grad_norm": 54.10370635986328, |
| "learning_rate": 2.6700000000000003e-06, |
| "loss": -14.3615, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.0008914457228614307, |
| "grad_norm": 45.82687759399414, |
| "learning_rate": 2.673e-06, |
| "loss": -10.4068, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.0008924462231115558, |
| "grad_norm": 48.732147216796875, |
| "learning_rate": 2.6760000000000003e-06, |
| "loss": -11.3894, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.0008934467233616809, |
| "grad_norm": 89.220458984375, |
| "learning_rate": 2.679e-06, |
| "loss": -10.2901, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.0008944472236118059, |
| "grad_norm": 65.57836151123047, |
| "learning_rate": 2.6820000000000003e-06, |
| "loss": -15.0504, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.000895447723861931, |
| "grad_norm": 52.363243103027344, |
| "learning_rate": 2.685e-06, |
| "loss": -10.9473, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.0008964482241120561, |
| "grad_norm": 58.80600357055664, |
| "learning_rate": 2.688e-06, |
| "loss": -11.7646, |
| "step": 896 |
| }, |
| { |
| "completion_length": 243.89584350585938, |
| "epoch": 0.000897448724362181, |
| "grad_norm": 59.68390655517578, |
| "learning_rate": 2.691e-06, |
| "loss": -18.4827, |
| "reward": 0.243675597012043, |
| "reward_std": 0.1689695119857788, |
| "rewards/sudoku_reward_func": 0.2436755895614624, |
| "step": 897, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0008984492246123061, |
| "grad_norm": 57.939754486083984, |
| "learning_rate": 2.694e-06, |
| "loss": -18.4617, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.0008994497248624312, |
| "grad_norm": 59.48532485961914, |
| "learning_rate": 2.697e-06, |
| "loss": -18.3377, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.0009004502251125563, |
| "grad_norm": 68.90072631835938, |
| "learning_rate": 2.7e-06, |
| "loss": -16.435, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.0009014507253626813, |
| "grad_norm": 62.35285186767578, |
| "learning_rate": 2.703e-06, |
| "loss": -19.3488, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.0009024512256128064, |
| "grad_norm": 65.77997589111328, |
| "learning_rate": 2.706e-06, |
| "loss": -19.3106, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.0009034517258629315, |
| "grad_norm": 60.621700286865234, |
| "learning_rate": 2.7090000000000002e-06, |
| "loss": -19.6105, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.0009044522261130565, |
| "grad_norm": 61.568870544433594, |
| "learning_rate": 2.712e-06, |
| "loss": -17.7438, |
| "step": 904 |
| }, |
| { |
| "completion_length": 247.9791717529297, |
| "epoch": 0.0009054527263631816, |
| "grad_norm": 37.43316650390625, |
| "learning_rate": 2.7150000000000003e-06, |
| "loss": -5.8514, |
| "reward": 0.2286706417798996, |
| "reward_std": 0.1333366557955742, |
| "rewards/sudoku_reward_func": 0.2286706417798996, |
| "step": 905, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009064532266133067, |
| "grad_norm": 41.17620849609375, |
| "learning_rate": 2.718e-06, |
| "loss": -5.1501, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.0009074537268634318, |
| "grad_norm": 46.76512908935547, |
| "learning_rate": 2.7210000000000003e-06, |
| "loss": -7.0313, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.0009084542271135567, |
| "grad_norm": 44.03168869018555, |
| "learning_rate": 2.724e-06, |
| "loss": -5.5208, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.0009094547273636818, |
| "grad_norm": 40.06308364868164, |
| "learning_rate": 2.7270000000000003e-06, |
| "loss": -6.4912, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.0009104552276138069, |
| "grad_norm": 43.71113204956055, |
| "learning_rate": 2.73e-06, |
| "loss": -5.5134, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.0009114557278639319, |
| "grad_norm": 43.34052276611328, |
| "learning_rate": 2.7330000000000003e-06, |
| "loss": -7.919, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.000912456228114057, |
| "grad_norm": 41.50727844238281, |
| "learning_rate": 2.736e-06, |
| "loss": -6.0544, |
| "step": 912 |
| }, |
| { |
| "completion_length": 243.4791717529297, |
| "epoch": 0.0009134567283641821, |
| "grad_norm": 92.41754913330078, |
| "learning_rate": 2.7390000000000004e-06, |
| "loss": -20.6525, |
| "reward": 0.2719945013523102, |
| "reward_std": 0.1805286407470703, |
| "rewards/sudoku_reward_func": 0.2719945013523102, |
| "step": 913, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009144572286143072, |
| "grad_norm": 83.57636260986328, |
| "learning_rate": 2.742e-06, |
| "loss": -21.188, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.0009154577288644322, |
| "grad_norm": 75.21762084960938, |
| "learning_rate": 2.745e-06, |
| "loss": -22.7858, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.0009164582291145573, |
| "grad_norm": 78.82347106933594, |
| "learning_rate": 2.748e-06, |
| "loss": -24.0821, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.0009174587293646824, |
| "grad_norm": 87.21110534667969, |
| "learning_rate": 2.751e-06, |
| "loss": -22.2703, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.0009184592296148074, |
| "grad_norm": 84.7657470703125, |
| "learning_rate": 2.7540000000000002e-06, |
| "loss": -22.8605, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.0009194597298649325, |
| "grad_norm": 74.08875274658203, |
| "learning_rate": 2.757e-06, |
| "loss": -24.4158, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.0009204602301150575, |
| "grad_norm": 91.21599578857422, |
| "learning_rate": 2.7600000000000003e-06, |
| "loss": -26.2348, |
| "step": 920 |
| }, |
| { |
| "completion_length": 248.9791717529297, |
| "epoch": 0.0009214607303651826, |
| "grad_norm": 78.67549133300781, |
| "learning_rate": 2.763e-06, |
| "loss": 0.443, |
| "reward": 0.23371364176273346, |
| "reward_std": 0.174501433968544, |
| "rewards/sudoku_reward_func": 0.23371363431215286, |
| "step": 921, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009224612306153076, |
| "grad_norm": 94.62200927734375, |
| "learning_rate": 2.7660000000000003e-06, |
| "loss": 2.2315, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.0009234617308654327, |
| "grad_norm": 98.74855041503906, |
| "learning_rate": 2.769e-06, |
| "loss": 3.319, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.0009244622311155578, |
| "grad_norm": 105.0526351928711, |
| "learning_rate": 2.7720000000000003e-06, |
| "loss": 0.0095, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.0009254627313656828, |
| "grad_norm": 104.87490844726562, |
| "learning_rate": 2.775e-06, |
| "loss": 0.2426, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.0009264632316158079, |
| "grad_norm": 115.48833465576172, |
| "learning_rate": 2.7780000000000003e-06, |
| "loss": 2.4548, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.000927463731865933, |
| "grad_norm": 100.13760375976562, |
| "learning_rate": 2.781e-06, |
| "loss": 3.4853, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.000928464232116058, |
| "grad_norm": 111.59932708740234, |
| "learning_rate": 2.7840000000000004e-06, |
| "loss": -0.2007, |
| "step": 928 |
| }, |
| { |
| "completion_length": 240.68750762939453, |
| "epoch": 0.0009294647323661831, |
| "grad_norm": 90.83345031738281, |
| "learning_rate": 2.787e-06, |
| "loss": -0.0686, |
| "reward": 0.27306924760341644, |
| "reward_std": 0.14821404218673706, |
| "rewards/sudoku_reward_func": 0.27306922525167465, |
| "step": 929, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009304652326163082, |
| "grad_norm": 97.31766510009766, |
| "learning_rate": 2.7900000000000004e-06, |
| "loss": 2.2642, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.0009314657328664333, |
| "grad_norm": 74.02940368652344, |
| "learning_rate": 2.793e-06, |
| "loss": -2.8585, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.0009324662331165582, |
| "grad_norm": 110.25257873535156, |
| "learning_rate": 2.7960000000000004e-06, |
| "loss": -4.8888, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.0009334667333666833, |
| "grad_norm": 94.64034271240234, |
| "learning_rate": 2.7990000000000002e-06, |
| "loss": -0.4688, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.0009344672336168084, |
| "grad_norm": 80.84830474853516, |
| "learning_rate": 2.802e-06, |
| "loss": 1.6228, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.0009354677338669334, |
| "grad_norm": 65.42864227294922, |
| "learning_rate": 2.8050000000000002e-06, |
| "loss": -2.8248, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.0009364682341170585, |
| "grad_norm": 126.89608764648438, |
| "learning_rate": 2.808e-06, |
| "loss": -6.1213, |
| "step": 936 |
| }, |
| { |
| "completion_length": 246.75000762939453, |
| "epoch": 0.0009374687343671836, |
| "grad_norm": 88.34729766845703, |
| "learning_rate": 2.8110000000000003e-06, |
| "loss": -12.6919, |
| "reward": 0.21135085821151733, |
| "reward_std": 0.13452807068824768, |
| "rewards/sudoku_reward_func": 0.21135085821151733, |
| "step": 937, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009384692346173087, |
| "grad_norm": 72.44952392578125, |
| "learning_rate": 2.814e-06, |
| "loss": -10.9051, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.0009394697348674337, |
| "grad_norm": 125.90797424316406, |
| "learning_rate": 2.817e-06, |
| "loss": -14.7098, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.0009404702351175588, |
| "grad_norm": 94.34359741210938, |
| "learning_rate": 2.82e-06, |
| "loss": -12.7713, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.0009414707353676839, |
| "grad_norm": 87.32075500488281, |
| "learning_rate": 2.823e-06, |
| "loss": -13.6036, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.0009424712356178089, |
| "grad_norm": 73.44007873535156, |
| "learning_rate": 2.8259999999999997e-06, |
| "loss": -11.6647, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.000943471735867934, |
| "grad_norm": 153.84591674804688, |
| "learning_rate": 2.829e-06, |
| "loss": -17.473, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.000944472236118059, |
| "grad_norm": 112.31997680664062, |
| "learning_rate": 2.8319999999999997e-06, |
| "loss": -14.8225, |
| "step": 944 |
| }, |
| { |
| "completion_length": 239.00000762939453, |
| "epoch": 0.0009454727363681841, |
| "grad_norm": 105.95699310302734, |
| "learning_rate": 2.835e-06, |
| "loss": -1.1112, |
| "reward": 0.2568768113851547, |
| "reward_std": 0.16827785968780518, |
| "rewards/sudoku_reward_func": 0.25687679648399353, |
| "step": 945, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009464732366183091, |
| "grad_norm": 122.6570816040039, |
| "learning_rate": 2.8379999999999998e-06, |
| "loss": 0.4598, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.0009474737368684342, |
| "grad_norm": 125.27985382080078, |
| "learning_rate": 2.841e-06, |
| "loss": -1.2065, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.0009484742371185593, |
| "grad_norm": 125.6364974975586, |
| "learning_rate": 2.844e-06, |
| "loss": 7.3372, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.0009494747373686843, |
| "grad_norm": 107.11581420898438, |
| "learning_rate": 2.847e-06, |
| "loss": -1.2886, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.0009504752376188094, |
| "grad_norm": 129.3809051513672, |
| "learning_rate": 2.85e-06, |
| "loss": 0.2694, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.0009514757378689345, |
| "grad_norm": 117.40351867675781, |
| "learning_rate": 2.853e-06, |
| "loss": -1.7224, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.0009524762381190596, |
| "grad_norm": 135.25901794433594, |
| "learning_rate": 2.856e-06, |
| "loss": 6.8023, |
| "step": 952 |
| }, |
| { |
| "completion_length": 247.83334350585938, |
| "epoch": 0.0009534767383691846, |
| "grad_norm": 102.14061737060547, |
| "learning_rate": 2.859e-06, |
| "loss": 8.0822, |
| "reward": 0.227430559694767, |
| "reward_std": 0.14220082387328148, |
| "rewards/sudoku_reward_func": 0.2274305522441864, |
| "step": 953, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009544772386193097, |
| "grad_norm": 129.8813934326172, |
| "learning_rate": 2.862e-06, |
| "loss": 15.1066, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.0009554777388694348, |
| "grad_norm": 106.110107421875, |
| "learning_rate": 2.865e-06, |
| "loss": 7.7215, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.0009564782391195597, |
| "grad_norm": 69.91547393798828, |
| "learning_rate": 2.868e-06, |
| "loss": 7.6638, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.0009574787393696848, |
| "grad_norm": 109.26384735107422, |
| "learning_rate": 2.871e-06, |
| "loss": 7.1161, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.0009584792396198099, |
| "grad_norm": 130.6197052001953, |
| "learning_rate": 2.874e-06, |
| "loss": 12.9955, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.000959479739869935, |
| "grad_norm": 69.60470581054688, |
| "learning_rate": 2.877e-06, |
| "loss": 6.9533, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.00096048024012006, |
| "grad_norm": 67.70174407958984, |
| "learning_rate": 2.88e-06, |
| "loss": 6.9371, |
| "step": 960 |
| }, |
| { |
| "completion_length": 243.9791717529297, |
| "epoch": 0.0009614807403701851, |
| "grad_norm": 102.05725860595703, |
| "learning_rate": 2.883e-06, |
| "loss": 9.4034, |
| "reward": 0.23429233580827713, |
| "reward_std": 0.1784105822443962, |
| "rewards/sudoku_reward_func": 0.23429233580827713, |
| "step": 961, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009624812406203102, |
| "grad_norm": 93.39031982421875, |
| "learning_rate": 2.886e-06, |
| "loss": 11.6216, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.0009634817408704352, |
| "grad_norm": 86.00289916992188, |
| "learning_rate": 2.8889999999999998e-06, |
| "loss": 8.0131, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.0009644822411205603, |
| "grad_norm": 88.48316192626953, |
| "learning_rate": 2.892e-06, |
| "loss": 11.9298, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.0009654827413706854, |
| "grad_norm": 91.39781188964844, |
| "learning_rate": 2.895e-06, |
| "loss": 8.7401, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.0009664832416208105, |
| "grad_norm": 85.42029571533203, |
| "learning_rate": 2.898e-06, |
| "loss": 9.4002, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.0009674837418709354, |
| "grad_norm": 67.53469848632812, |
| "learning_rate": 2.901e-06, |
| "loss": 7.4259, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.0009684842421210605, |
| "grad_norm": 83.41094207763672, |
| "learning_rate": 2.904e-06, |
| "loss": 11.5478, |
| "step": 968 |
| }, |
| { |
| "completion_length": 245.27083587646484, |
| "epoch": 0.0009694847423711856, |
| "grad_norm": 246.9258270263672, |
| "learning_rate": 2.907e-06, |
| "loss": 5.9896, |
| "reward": 0.21226026117801666, |
| "reward_std": 0.1643363982439041, |
| "rewards/sudoku_reward_func": 0.21226025372743607, |
| "step": 969, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009704852426213106, |
| "grad_norm": 98.34902954101562, |
| "learning_rate": 2.91e-06, |
| "loss": 6.1014, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.0009714857428714357, |
| "grad_norm": 109.06088256835938, |
| "learning_rate": 2.913e-06, |
| "loss": 7.1094, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.0009724862431215608, |
| "grad_norm": 79.995849609375, |
| "learning_rate": 2.916e-06, |
| "loss": 3.4938, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.0009734867433716858, |
| "grad_norm": 111.15068054199219, |
| "learning_rate": 2.919e-06, |
| "loss": 3.5674, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.0009744872436218109, |
| "grad_norm": 68.43648529052734, |
| "learning_rate": 2.922e-06, |
| "loss": 4.8717, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.000975487743871936, |
| "grad_norm": 91.81321716308594, |
| "learning_rate": 2.925e-06, |
| "loss": 5.182, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.0009764882441220611, |
| "grad_norm": 70.03031921386719, |
| "learning_rate": 2.928e-06, |
| "loss": 2.105, |
| "step": 976 |
| }, |
| { |
| "completion_length": 248.9166717529297, |
| "epoch": 0.0009774887443721862, |
| "grad_norm": 49.68278503417969, |
| "learning_rate": 2.931e-06, |
| "loss": 2.851, |
| "reward": 0.20291833579540253, |
| "reward_std": 0.1318095251917839, |
| "rewards/sudoku_reward_func": 0.20291832834482193, |
| "step": 977, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009784892446223113, |
| "grad_norm": 86.14371490478516, |
| "learning_rate": 2.934e-06, |
| "loss": 3.8071, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.0009794897448724361, |
| "grad_norm": 68.95272064208984, |
| "learning_rate": 2.937e-06, |
| "loss": 4.1672, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.0009804902451225612, |
| "grad_norm": 45.35859298706055, |
| "learning_rate": 2.9400000000000002e-06, |
| "loss": 1.9626, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.0009814907453726863, |
| "grad_norm": 44.2899169921875, |
| "learning_rate": 2.943e-06, |
| "loss": 2.4469, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.0009824912456228114, |
| "grad_norm": 62.7624397277832, |
| "learning_rate": 2.946e-06, |
| "loss": 2.4931, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.0009834917458729365, |
| "grad_norm": 63.1762809753418, |
| "learning_rate": 2.949e-06, |
| "loss": 3.0954, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.0009844922461230616, |
| "grad_norm": 41.33958053588867, |
| "learning_rate": 2.952e-06, |
| "loss": 1.4097, |
| "step": 984 |
| }, |
| { |
| "completion_length": 241.83333587646484, |
| "epoch": 0.0009854927463731867, |
| "grad_norm": 83.71294403076172, |
| "learning_rate": 2.955e-06, |
| "loss": 15.8541, |
| "reward": 0.219866082072258, |
| "reward_std": 0.13635045289993286, |
| "rewards/sudoku_reward_func": 0.219866082072258, |
| "step": 985, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009864932466233116, |
| "grad_norm": 88.49727630615234, |
| "learning_rate": 2.958e-06, |
| "loss": 13.5028, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.0009874937468734367, |
| "grad_norm": 62.60023880004883, |
| "learning_rate": 2.961e-06, |
| "loss": 13.6597, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.0009884942471235618, |
| "grad_norm": 67.28657531738281, |
| "learning_rate": 2.964e-06, |
| "loss": 15.6197, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.0009894947473736869, |
| "grad_norm": 91.06583404541016, |
| "learning_rate": 2.967e-06, |
| "loss": 14.1141, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.000990495247623812, |
| "grad_norm": 70.22718811035156, |
| "learning_rate": 2.97e-06, |
| "loss": 12.7172, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.000991495747873937, |
| "grad_norm": 56.140846252441406, |
| "learning_rate": 2.973e-06, |
| "loss": 12.1588, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.000992496248124062, |
| "grad_norm": 61.37893295288086, |
| "learning_rate": 2.976e-06, |
| "loss": 14.1818, |
| "step": 992 |
| }, |
| { |
| "completion_length": 255.77084350585938, |
| "epoch": 0.000993496748374187, |
| "grad_norm": 62.03451919555664, |
| "learning_rate": 2.979e-06, |
| "loss": 3.4047, |
| "reward": 0.1975446492433548, |
| "reward_std": 0.1418343260884285, |
| "rewards/sudoku_reward_func": 0.1975446492433548, |
| "step": 993, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0009944972486243121, |
| "grad_norm": 52.507904052734375, |
| "learning_rate": 2.982e-06, |
| "loss": 2.0955, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.0009954977488744372, |
| "grad_norm": 50.54158401489258, |
| "learning_rate": 2.9850000000000002e-06, |
| "loss": 4.8062, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.0009964982491245623, |
| "grad_norm": 43.728912353515625, |
| "learning_rate": 2.988e-06, |
| "loss": 3.4528, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.0009974987493746874, |
| "grad_norm": 52.202728271484375, |
| "learning_rate": 2.9910000000000002e-06, |
| "loss": 2.6404, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.0009984992496248125, |
| "grad_norm": 52.02908706665039, |
| "learning_rate": 2.994e-06, |
| "loss": 0.8025, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.0009994997498749374, |
| "grad_norm": 54.37822341918945, |
| "learning_rate": 2.9970000000000003e-06, |
| "loss": 3.864, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.0010005002501250625, |
| "grad_norm": 44.46600341796875, |
| "learning_rate": 3e-06, |
| "loss": 2.609, |
| "step": 1000 |
| }, |
| { |
| "completion_length": 254.37500762939453, |
| "epoch": 0.0010015007503751876, |
| "grad_norm": 39.40637969970703, |
| "learning_rate": 3e-06, |
| "loss": -9.2458, |
| "reward": 0.22263558954000473, |
| "reward_std": 0.14237912744283676, |
| "rewards/sudoku_reward_func": 0.22263558954000473, |
| "step": 1001, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010025012506253126, |
| "grad_norm": 48.54051208496094, |
| "learning_rate": 3e-06, |
| "loss": -8.9324, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.0010035017508754377, |
| "grad_norm": 43.79606628417969, |
| "learning_rate": 3e-06, |
| "loss": -6.4303, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.0010045022511255628, |
| "grad_norm": 36.05558776855469, |
| "learning_rate": 3e-06, |
| "loss": -9.1181, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.001005502751375688, |
| "grad_norm": 39.247318267822266, |
| "learning_rate": 3e-06, |
| "loss": -9.8013, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.0010065032516258128, |
| "grad_norm": 45.25431442260742, |
| "learning_rate": 3e-06, |
| "loss": -9.7636, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.001007503751875938, |
| "grad_norm": 37.57381820678711, |
| "learning_rate": 3e-06, |
| "loss": -7.187, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.001008504252126063, |
| "grad_norm": 33.81916046142578, |
| "learning_rate": 3e-06, |
| "loss": -9.3165, |
| "step": 1008 |
| }, |
| { |
| "completion_length": 251.7916717529297, |
| "epoch": 0.001009504752376188, |
| "grad_norm": 47.115623474121094, |
| "learning_rate": 3e-06, |
| "loss": -3.2863, |
| "reward": 0.15844081342220306, |
| "reward_std": 0.1177215576171875, |
| "rewards/sudoku_reward_func": 0.15844080597162247, |
| "step": 1009, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010105052526263132, |
| "grad_norm": 41.37596893310547, |
| "learning_rate": 3e-06, |
| "loss": -3.0274, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.0010115057528764383, |
| "grad_norm": 42.2933464050293, |
| "learning_rate": 3e-06, |
| "loss": -3.8172, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.0010125062531265634, |
| "grad_norm": 35.31851577758789, |
| "learning_rate": 3e-06, |
| "loss": -3.7782, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.0010135067533766882, |
| "grad_norm": 43.999046325683594, |
| "learning_rate": 3e-06, |
| "loss": -3.1858, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.0010145072536268133, |
| "grad_norm": 42.06627655029297, |
| "learning_rate": 3e-06, |
| "loss": -3.3128, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.0010155077538769384, |
| "grad_norm": 43.68073272705078, |
| "learning_rate": 3e-06, |
| "loss": -4.1191, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.0010165082541270635, |
| "grad_norm": 40.35986328125, |
| "learning_rate": 3e-06, |
| "loss": -4.3652, |
| "step": 1016 |
| }, |
| { |
| "completion_length": 251.5, |
| "epoch": 0.0010175087543771886, |
| "grad_norm": 43.43013000488281, |
| "learning_rate": 3e-06, |
| "loss": -5.5952, |
| "reward": 0.21097884327173233, |
| "reward_std": 0.15194199979305267, |
| "rewards/sudoku_reward_func": 0.21097883582115173, |
| "step": 1017, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010185092546273137, |
| "grad_norm": 43.78632736206055, |
| "learning_rate": 3e-06, |
| "loss": -5.3231, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.0010195097548774388, |
| "grad_norm": 38.955196380615234, |
| "learning_rate": 3e-06, |
| "loss": -6.054, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.0010205102551275637, |
| "grad_norm": 54.36684799194336, |
| "learning_rate": 3e-06, |
| "loss": -3.6614, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.0010215107553776888, |
| "grad_norm": 52.5922966003418, |
| "learning_rate": 3e-06, |
| "loss": -5.9705, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.0010225112556278139, |
| "grad_norm": 43.55205535888672, |
| "learning_rate": 3e-06, |
| "loss": -5.892, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.001023511755877939, |
| "grad_norm": 48.446205139160156, |
| "learning_rate": 3e-06, |
| "loss": -6.5397, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.001024512256128064, |
| "grad_norm": 56.804771423339844, |
| "learning_rate": 3e-06, |
| "loss": -4.1653, |
| "step": 1024 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0010255127563781892, |
| "grad_norm": 48.26102828979492, |
| "learning_rate": 3e-06, |
| "loss": -10.1381, |
| "reward": 0.2154017984867096, |
| "reward_std": 0.13763760775327682, |
| "rewards/sudoku_reward_func": 0.2154017835855484, |
| "step": 1025, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010265132566283143, |
| "grad_norm": 54.459964752197266, |
| "learning_rate": 3e-06, |
| "loss": -10.2523, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.0010275137568784391, |
| "grad_norm": 66.99800109863281, |
| "learning_rate": 3e-06, |
| "loss": -8.145, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.0010285142571285642, |
| "grad_norm": 40.35697937011719, |
| "learning_rate": 3e-06, |
| "loss": -9.0834, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.0010295147573786893, |
| "grad_norm": 48.033973693847656, |
| "learning_rate": 3e-06, |
| "loss": -10.6708, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.0010305152576288144, |
| "grad_norm": 44.81524658203125, |
| "learning_rate": 3e-06, |
| "loss": -10.8063, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.0010315157578789395, |
| "grad_norm": 58.0858154296875, |
| "learning_rate": 3e-06, |
| "loss": -9.0872, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.0010325162581290646, |
| "grad_norm": 41.79676818847656, |
| "learning_rate": 3e-06, |
| "loss": -9.7248, |
| "step": 1032 |
| }, |
| { |
| "completion_length": 254.9791717529297, |
| "epoch": 0.0010335167583791897, |
| "grad_norm": 61.14440155029297, |
| "learning_rate": 3e-06, |
| "loss": 2.4693, |
| "reward": 0.17128128558397293, |
| "reward_std": 0.11857020482420921, |
| "rewards/sudoku_reward_func": 0.17128127813339233, |
| "step": 1033, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010345172586293146, |
| "grad_norm": 34.947303771972656, |
| "learning_rate": 3e-06, |
| "loss": 1.9397, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.0010355177588794397, |
| "grad_norm": 42.94151306152344, |
| "learning_rate": 3e-06, |
| "loss": 1.5863, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.0010365182591295648, |
| "grad_norm": 34.938148498535156, |
| "learning_rate": 3e-06, |
| "loss": 2.1598, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.0010375187593796898, |
| "grad_norm": 43.56894302368164, |
| "learning_rate": 3e-06, |
| "loss": 1.5508, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.001038519259629815, |
| "grad_norm": 27.966718673706055, |
| "learning_rate": 3e-06, |
| "loss": 1.1268, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.00103951975987994, |
| "grad_norm": 34.90840530395508, |
| "learning_rate": 3e-06, |
| "loss": 0.9044, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.0010405202601300651, |
| "grad_norm": 32.87578201293945, |
| "learning_rate": 3e-06, |
| "loss": 1.2856, |
| "step": 1040 |
| }, |
| { |
| "completion_length": 251.62500762939453, |
| "epoch": 0.00104152076038019, |
| "grad_norm": 56.22600555419922, |
| "learning_rate": 3e-06, |
| "loss": 0.9322, |
| "reward": 0.18572255223989487, |
| "reward_std": 0.12399409711360931, |
| "rewards/sudoku_reward_func": 0.18572255223989487, |
| "step": 1041, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001042521260630315, |
| "grad_norm": 33.610408782958984, |
| "learning_rate": 3e-06, |
| "loss": 2.2944, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.0010435217608804402, |
| "grad_norm": 33.504329681396484, |
| "learning_rate": 3e-06, |
| "loss": 1.6829, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.0010445222611305653, |
| "grad_norm": 36.757469177246094, |
| "learning_rate": 3e-06, |
| "loss": 2.9836, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.0010455227613806904, |
| "grad_norm": 30.555280685424805, |
| "learning_rate": 3e-06, |
| "loss": 0.8575, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.0010465232616308155, |
| "grad_norm": 31.86289405822754, |
| "learning_rate": 3e-06, |
| "loss": 1.8835, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.0010475237618809406, |
| "grad_norm": 34.755680084228516, |
| "learning_rate": 3e-06, |
| "loss": 1.1176, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.0010485242621310654, |
| "grad_norm": 40.08235549926758, |
| "learning_rate": 3e-06, |
| "loss": 2.0918, |
| "step": 1048 |
| }, |
| { |
| "completion_length": 253.08333587646484, |
| "epoch": 0.0010495247623811905, |
| "grad_norm": 21.937795639038086, |
| "learning_rate": 3e-06, |
| "loss": -2.3447, |
| "reward": 0.165798619389534, |
| "reward_std": 0.08688194304704666, |
| "rewards/sudoku_reward_func": 0.1657986119389534, |
| "step": 1049, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010505252626313156, |
| "grad_norm": 25.62210464477539, |
| "learning_rate": 3e-06, |
| "loss": -3.3928, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.0010515257628814407, |
| "grad_norm": 25.852378845214844, |
| "learning_rate": 3e-06, |
| "loss": -2.4151, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.0010525262631315658, |
| "grad_norm": 19.493253707885742, |
| "learning_rate": 3e-06, |
| "loss": -3.1267, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.001053526763381691, |
| "grad_norm": 24.54071807861328, |
| "learning_rate": 3e-06, |
| "loss": -2.3195, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.001054527263631816, |
| "grad_norm": 26.212997436523438, |
| "learning_rate": 3e-06, |
| "loss": -3.525, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.0010555277638819409, |
| "grad_norm": 34.78599166870117, |
| "learning_rate": 3e-06, |
| "loss": -2.55, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.001056528264132066, |
| "grad_norm": 20.467262268066406, |
| "learning_rate": 3e-06, |
| "loss": -3.3289, |
| "step": 1056 |
| }, |
| { |
| "completion_length": 254.58333587646484, |
| "epoch": 0.001057528764382191, |
| "grad_norm": 49.550392150878906, |
| "learning_rate": 3e-06, |
| "loss": -12.7646, |
| "reward": 0.1769593432545662, |
| "reward_std": 0.15440654009580612, |
| "rewards/sudoku_reward_func": 0.1769593358039856, |
| "step": 1057, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010585292646323162, |
| "grad_norm": 61.013832092285156, |
| "learning_rate": 3e-06, |
| "loss": -11.774, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.0010595297648824413, |
| "grad_norm": 39.312164306640625, |
| "learning_rate": 3e-06, |
| "loss": -12.574, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.0010605302651325664, |
| "grad_norm": 49.83845520019531, |
| "learning_rate": 3e-06, |
| "loss": -14.3973, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.0010615307653826915, |
| "grad_norm": 41.74903106689453, |
| "learning_rate": 3e-06, |
| "loss": -13.5319, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.0010625312656328163, |
| "grad_norm": 63.667415618896484, |
| "learning_rate": 3e-06, |
| "loss": -12.8829, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.0010635317658829414, |
| "grad_norm": 39.259647369384766, |
| "learning_rate": 3e-06, |
| "loss": -13.3493, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.0010645322661330665, |
| "grad_norm": 48.72328186035156, |
| "learning_rate": 3e-06, |
| "loss": -15.4117, |
| "step": 1064 |
| }, |
| { |
| "completion_length": 254.68750762939453, |
| "epoch": 0.0010655327663831916, |
| "grad_norm": 37.72417449951172, |
| "learning_rate": 3e-06, |
| "loss": -1.2786, |
| "reward": 0.2013888955116272, |
| "reward_std": 0.15765716135501862, |
| "rewards/sudoku_reward_func": 0.2013888955116272, |
| "step": 1065, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010665332666333167, |
| "grad_norm": 50.46000671386719, |
| "learning_rate": 3e-06, |
| "loss": -2.2704, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.0010675337668834418, |
| "grad_norm": 58.26871109008789, |
| "learning_rate": 3e-06, |
| "loss": -2.746, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.0010685342671335667, |
| "grad_norm": 72.73383331298828, |
| "learning_rate": 3e-06, |
| "loss": -1.7437, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.0010695347673836918, |
| "grad_norm": 37.29910659790039, |
| "learning_rate": 3e-06, |
| "loss": -1.6776, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.0010705352676338169, |
| "grad_norm": 44.55699157714844, |
| "learning_rate": 3e-06, |
| "loss": -2.8655, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.001071535767883942, |
| "grad_norm": 46.04912567138672, |
| "learning_rate": 3e-06, |
| "loss": -3.3467, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.001072536268134067, |
| "grad_norm": 47.104732513427734, |
| "learning_rate": 3e-06, |
| "loss": -2.3808, |
| "step": 1072 |
| }, |
| { |
| "completion_length": 255.7916717529297, |
| "epoch": 0.0010735367683841921, |
| "grad_norm": 47.69097137451172, |
| "learning_rate": 3e-06, |
| "loss": -12.6992, |
| "reward": 0.26855987310409546, |
| "reward_std": 0.16888362169265747, |
| "rewards/sudoku_reward_func": 0.26855985820293427, |
| "step": 1073, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010745372686343172, |
| "grad_norm": 57.80671691894531, |
| "learning_rate": 3e-06, |
| "loss": -14.2458, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.0010755377688844421, |
| "grad_norm": 47.09156799316406, |
| "learning_rate": 3e-06, |
| "loss": -15.3588, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.0010765382691345672, |
| "grad_norm": 59.22261428833008, |
| "learning_rate": 3e-06, |
| "loss": -13.2847, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.0010775387693846923, |
| "grad_norm": 52.122535705566406, |
| "learning_rate": 3e-06, |
| "loss": -12.8921, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.0010785392696348174, |
| "grad_norm": 69.0072250366211, |
| "learning_rate": 3e-06, |
| "loss": -14.7293, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.0010795397698849425, |
| "grad_norm": 44.61542510986328, |
| "learning_rate": 3e-06, |
| "loss": -15.8092, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.0010805402701350676, |
| "grad_norm": 52.3683967590332, |
| "learning_rate": 3e-06, |
| "loss": -14.0392, |
| "step": 1080 |
| }, |
| { |
| "completion_length": 251.6041717529297, |
| "epoch": 0.0010815407703851927, |
| "grad_norm": 47.1711311340332, |
| "learning_rate": 3e-06, |
| "loss": -11.0198, |
| "reward": 0.19797680526971817, |
| "reward_std": 0.16620083153247833, |
| "rewards/sudoku_reward_func": 0.19797679781913757, |
| "step": 1081, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010825412706353176, |
| "grad_norm": 48.20561981201172, |
| "learning_rate": 3e-06, |
| "loss": -12.7956, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.0010835417708854427, |
| "grad_norm": 48.29547882080078, |
| "learning_rate": 3e-06, |
| "loss": -12.0332, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.0010845422711355677, |
| "grad_norm": 50.09273147583008, |
| "learning_rate": 3e-06, |
| "loss": -11.6346, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.0010855427713856928, |
| "grad_norm": 48.44402313232422, |
| "learning_rate": 3e-06, |
| "loss": -11.6618, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.001086543271635818, |
| "grad_norm": 49.130584716796875, |
| "learning_rate": 3e-06, |
| "loss": -13.341, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.001087543771885943, |
| "grad_norm": 51.19216537475586, |
| "learning_rate": 3e-06, |
| "loss": -12.571, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.0010885442721360681, |
| "grad_norm": 47.3951301574707, |
| "learning_rate": 3e-06, |
| "loss": -12.3729, |
| "step": 1088 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.001089544772386193, |
| "grad_norm": 45.89775466918945, |
| "learning_rate": 3e-06, |
| "loss": -9.4464, |
| "reward": 0.2154017984867096, |
| "reward_std": 0.141516774892807, |
| "rewards/sudoku_reward_func": 0.2154017835855484, |
| "step": 1089, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001090545272636318, |
| "grad_norm": 47.289772033691406, |
| "learning_rate": 3e-06, |
| "loss": -10.7988, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.0010915457728864432, |
| "grad_norm": 59.00400161743164, |
| "learning_rate": 3e-06, |
| "loss": -12.4682, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.0010925462731365683, |
| "grad_norm": 58.620784759521484, |
| "learning_rate": 3e-06, |
| "loss": -12.318, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.0010935467733866934, |
| "grad_norm": 55.14871597290039, |
| "learning_rate": 3e-06, |
| "loss": -9.9248, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.0010945472736368185, |
| "grad_norm": 41.1626091003418, |
| "learning_rate": 3e-06, |
| "loss": -11.404, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.0010955477738869436, |
| "grad_norm": 50.45378875732422, |
| "learning_rate": 3e-06, |
| "loss": -13.1491, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.0010965482741370684, |
| "grad_norm": 37.13240432739258, |
| "learning_rate": 3e-06, |
| "loss": -12.8651, |
| "step": 1096 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0010975487743871935, |
| "grad_norm": 58.38191223144531, |
| "learning_rate": 3e-06, |
| "loss": -3.4455, |
| "reward": 0.22999339550733566, |
| "reward_std": 0.18920356035232544, |
| "rewards/sudoku_reward_func": 0.22999338060617447, |
| "step": 1097, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0010985492746373186, |
| "grad_norm": 58.06026077270508, |
| "learning_rate": 3e-06, |
| "loss": 0.3434, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.0010995497748874437, |
| "grad_norm": 52.834251403808594, |
| "learning_rate": 3e-06, |
| "loss": -4.7924, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.0011005502751375688, |
| "grad_norm": 62.384254455566406, |
| "learning_rate": 3e-06, |
| "loss": -2.7561, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.001101550775387694, |
| "grad_norm": 54.30180358886719, |
| "learning_rate": 3e-06, |
| "loss": -3.8362, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.001102551275637819, |
| "grad_norm": 64.72964477539062, |
| "learning_rate": 3e-06, |
| "loss": -0.2608, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.0011035517758879439, |
| "grad_norm": 59.001922607421875, |
| "learning_rate": 3e-06, |
| "loss": -5.3006, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.001104552276138069, |
| "grad_norm": 60.54927444458008, |
| "learning_rate": 3e-06, |
| "loss": -3.0542, |
| "step": 1104 |
| }, |
| { |
| "completion_length": 253.7916717529297, |
| "epoch": 0.001105552776388194, |
| "grad_norm": 34.46814727783203, |
| "learning_rate": 3e-06, |
| "loss": 1.3126, |
| "reward": 0.20547740161418915, |
| "reward_std": 0.14204465597867966, |
| "rewards/sudoku_reward_func": 0.20547740161418915, |
| "step": 1105, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011065532766383192, |
| "grad_norm": 38.980445861816406, |
| "learning_rate": 3e-06, |
| "loss": 0.8968, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.0011075537768884443, |
| "grad_norm": 41.922882080078125, |
| "learning_rate": 3e-06, |
| "loss": 0.7973, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.0011085542771385693, |
| "grad_norm": 35.87057113647461, |
| "learning_rate": 3e-06, |
| "loss": 1.8764, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.0011095547773886944, |
| "grad_norm": 37.61541748046875, |
| "learning_rate": 3e-06, |
| "loss": 1.0825, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.0011105552776388193, |
| "grad_norm": 38.23784255981445, |
| "learning_rate": 3e-06, |
| "loss": 0.5459, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.0011115557778889444, |
| "grad_norm": 35.42008590698242, |
| "learning_rate": 3e-06, |
| "loss": 0.5068, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.0011125562781390695, |
| "grad_norm": 34.2984504699707, |
| "learning_rate": 3e-06, |
| "loss": 1.6137, |
| "step": 1112 |
| }, |
| { |
| "completion_length": 253.7291717529297, |
| "epoch": 0.0011135567783891946, |
| "grad_norm": 64.51300048828125, |
| "learning_rate": 3e-06, |
| "loss": -27.9773, |
| "reward": 0.21698381751775742, |
| "reward_std": 0.19872380048036575, |
| "rewards/sudoku_reward_func": 0.21698381751775742, |
| "step": 1113, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011145572786393197, |
| "grad_norm": 51.81525802612305, |
| "learning_rate": 3e-06, |
| "loss": -26.6845, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.0011155577788894448, |
| "grad_norm": 74.68866729736328, |
| "learning_rate": 3e-06, |
| "loss": -26.7574, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.0011165582791395699, |
| "grad_norm": 91.27005004882812, |
| "learning_rate": 3e-06, |
| "loss": -29.4966, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.0011175587793896948, |
| "grad_norm": 70.424072265625, |
| "learning_rate": 3e-06, |
| "loss": -28.8359, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.0011185592796398199, |
| "grad_norm": 61.16431427001953, |
| "learning_rate": 3e-06, |
| "loss": -27.8278, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.001119559779889945, |
| "grad_norm": 71.94561004638672, |
| "learning_rate": 3e-06, |
| "loss": -28.0802, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.00112056028014007, |
| "grad_norm": 82.03819274902344, |
| "learning_rate": 3e-06, |
| "loss": -31.3154, |
| "step": 1120 |
| }, |
| { |
| "completion_length": 255.52083587646484, |
| "epoch": 0.0011215607803901951, |
| "grad_norm": 62.45363235473633, |
| "learning_rate": 3e-06, |
| "loss": -9.9586, |
| "reward": 0.2145337387919426, |
| "reward_std": 0.16341029852628708, |
| "rewards/sudoku_reward_func": 0.2145337387919426, |
| "step": 1121, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011225612806403202, |
| "grad_norm": 53.67804718017578, |
| "learning_rate": 3e-06, |
| "loss": -9.8162, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.0011235617808904453, |
| "grad_norm": 57.61661148071289, |
| "learning_rate": 3e-06, |
| "loss": -10.5356, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.0011245622811405702, |
| "grad_norm": 73.10306549072266, |
| "learning_rate": 3e-06, |
| "loss": -11.704, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.0011255627813906953, |
| "grad_norm": 71.7237548828125, |
| "learning_rate": 3e-06, |
| "loss": -10.8365, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.0011265632816408204, |
| "grad_norm": 65.70649719238281, |
| "learning_rate": 3e-06, |
| "loss": -10.4143, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.0011275637818909455, |
| "grad_norm": 61.187156677246094, |
| "learning_rate": 3e-06, |
| "loss": -11.2278, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.0011285642821410706, |
| "grad_norm": 73.90250396728516, |
| "learning_rate": 3e-06, |
| "loss": -12.7642, |
| "step": 1128 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0011295647823911957, |
| "grad_norm": 57.4097785949707, |
| "learning_rate": 3e-06, |
| "loss": -4.7173, |
| "reward": 0.25728265941143036, |
| "reward_std": 0.1203451007604599, |
| "rewards/sudoku_reward_func": 0.25728263705968857, |
| "step": 1129, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011305652826413208, |
| "grad_norm": 66.7146987915039, |
| "learning_rate": 3e-06, |
| "loss": -4.9575, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.0011315657828914456, |
| "grad_norm": 64.86263275146484, |
| "learning_rate": 3e-06, |
| "loss": -4.5152, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.0011325662831415707, |
| "grad_norm": 62.34523391723633, |
| "learning_rate": 3e-06, |
| "loss": -6.3271, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.0011335667833916958, |
| "grad_norm": 51.701087951660156, |
| "learning_rate": 3e-06, |
| "loss": -5.0724, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.001134567283641821, |
| "grad_norm": 57.253665924072266, |
| "learning_rate": 3e-06, |
| "loss": -5.2917, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.001135567783891946, |
| "grad_norm": 86.78887176513672, |
| "learning_rate": 3e-06, |
| "loss": -5.2676, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.0011365682841420711, |
| "grad_norm": 57.795284271240234, |
| "learning_rate": 3e-06, |
| "loss": -6.9075, |
| "step": 1136 |
| }, |
| { |
| "completion_length": 255.5625, |
| "epoch": 0.0011375687843921962, |
| "grad_norm": 64.22807312011719, |
| "learning_rate": 3e-06, |
| "loss": -7.4062, |
| "reward": 0.1966765895485878, |
| "reward_std": 0.14253421127796173, |
| "rewards/sudoku_reward_func": 0.1966765895485878, |
| "step": 1137, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001138569284642321, |
| "grad_norm": 76.80013275146484, |
| "learning_rate": 3e-06, |
| "loss": -6.0915, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.0011395697848924462, |
| "grad_norm": 69.39733123779297, |
| "learning_rate": 3e-06, |
| "loss": -5.2055, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.0011405702851425713, |
| "grad_norm": 58.3795166015625, |
| "learning_rate": 3e-06, |
| "loss": -7.0196, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.0011415707853926964, |
| "grad_norm": 48.28700637817383, |
| "learning_rate": 3e-06, |
| "loss": -7.8775, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.0011425712856428215, |
| "grad_norm": 66.8180160522461, |
| "learning_rate": 3e-06, |
| "loss": -7.0476, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.0011435717858929466, |
| "grad_norm": 60.36480712890625, |
| "learning_rate": 3e-06, |
| "loss": -6.1894, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.0011445722861430714, |
| "grad_norm": 46.661373138427734, |
| "learning_rate": 3e-06, |
| "loss": -7.787, |
| "step": 1144 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0011455727863931965, |
| "grad_norm": 63.26940155029297, |
| "learning_rate": 3e-06, |
| "loss": -12.6312, |
| "reward": 0.21070076525211334, |
| "reward_std": 0.16143980622291565, |
| "rewards/sudoku_reward_func": 0.21070076525211334, |
| "step": 1145, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011465732866433216, |
| "grad_norm": 59.096675872802734, |
| "learning_rate": 3e-06, |
| "loss": -10.4162, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.0011475737868934467, |
| "grad_norm": 68.54641723632812, |
| "learning_rate": 3e-06, |
| "loss": -8.9985, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.0011485742871435718, |
| "grad_norm": 77.7593002319336, |
| "learning_rate": 3e-06, |
| "loss": -10.295, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.001149574787393697, |
| "grad_norm": 52.21497344970703, |
| "learning_rate": 3e-06, |
| "loss": -13.3169, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.001150575287643822, |
| "grad_norm": 44.6879997253418, |
| "learning_rate": 3e-06, |
| "loss": -11.347, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.0011515757878939469, |
| "grad_norm": 49.52695083618164, |
| "learning_rate": 3e-06, |
| "loss": -9.9074, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.001152576288144072, |
| "grad_norm": 52.84867477416992, |
| "learning_rate": 3e-06, |
| "loss": -10.8295, |
| "step": 1152 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.001153576788394197, |
| "grad_norm": 47.332157135009766, |
| "learning_rate": 3e-06, |
| "loss": -10.6754, |
| "reward": 0.23607730120420456, |
| "reward_std": 0.15124260634183884, |
| "rewards/sudoku_reward_func": 0.23607730120420456, |
| "step": 1153, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011545772886443221, |
| "grad_norm": 59.60795211791992, |
| "learning_rate": 3e-06, |
| "loss": -9.6053, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.0011555777888944472, |
| "grad_norm": 58.676998138427734, |
| "learning_rate": 3e-06, |
| "loss": -6.6886, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.0011565782891445723, |
| "grad_norm": 44.790306091308594, |
| "learning_rate": 3e-06, |
| "loss": -7.5866, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.0011575787893946974, |
| "grad_norm": 47.08787536621094, |
| "learning_rate": 3e-06, |
| "loss": -11.3928, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.0011585792896448223, |
| "grad_norm": 62.33521270751953, |
| "learning_rate": 3e-06, |
| "loss": -10.1897, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.0011595797898949474, |
| "grad_norm": 62.34925842285156, |
| "learning_rate": 3e-06, |
| "loss": -6.9214, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.0011605802901450725, |
| "grad_norm": 51.26759719848633, |
| "learning_rate": 3e-06, |
| "loss": -8.3383, |
| "step": 1160 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0011615807903951976, |
| "grad_norm": 71.22803497314453, |
| "learning_rate": 3e-06, |
| "loss": -3.411, |
| "reward": 0.1722470298409462, |
| "reward_std": 0.15417955815792084, |
| "rewards/sudoku_reward_func": 0.1722470298409462, |
| "step": 1161, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011625812906453227, |
| "grad_norm": 71.8301010131836, |
| "learning_rate": 3e-06, |
| "loss": -6.7294, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.0011635817908954478, |
| "grad_norm": 98.72702026367188, |
| "learning_rate": 3e-06, |
| "loss": -8.4408, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.0011645822911455729, |
| "grad_norm": 48.08713912963867, |
| "learning_rate": 3e-06, |
| "loss": -3.6254, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.0011655827913956977, |
| "grad_norm": 68.0713882446289, |
| "learning_rate": 3e-06, |
| "loss": -4.2022, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.0011665832916458228, |
| "grad_norm": 58.155616760253906, |
| "learning_rate": 3e-06, |
| "loss": -7.46, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.001167583791895948, |
| "grad_norm": 85.09232330322266, |
| "learning_rate": 3e-06, |
| "loss": -9.5867, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.001168584292146073, |
| "grad_norm": 43.816776275634766, |
| "learning_rate": 3e-06, |
| "loss": -3.9229, |
| "step": 1168 |
| }, |
| { |
| "completion_length": 254.2916717529297, |
| "epoch": 0.0011695847923961981, |
| "grad_norm": 51.38243103027344, |
| "learning_rate": 3e-06, |
| "loss": -11.8953, |
| "reward": 0.20840098708868027, |
| "reward_std": 0.1413535289466381, |
| "rewards/sudoku_reward_func": 0.20840098708868027, |
| "step": 1169, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011705852926463232, |
| "grad_norm": 45.753936767578125, |
| "learning_rate": 3e-06, |
| "loss": -11.8943, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.0011715857928964483, |
| "grad_norm": 49.84098434448242, |
| "learning_rate": 3e-06, |
| "loss": -13.6678, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.0011725862931465732, |
| "grad_norm": 54.85823440551758, |
| "learning_rate": 3e-06, |
| "loss": -13.16, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.0011735867933966983, |
| "grad_norm": 48.858642578125, |
| "learning_rate": 3e-06, |
| "loss": -12.1275, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.0011745872936468234, |
| "grad_norm": 73.31692504882812, |
| "learning_rate": 3e-06, |
| "loss": -12.7694, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.0011755877938969485, |
| "grad_norm": 62.74945831298828, |
| "learning_rate": 3e-06, |
| "loss": -14.5101, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.0011765882941470736, |
| "grad_norm": 46.59790802001953, |
| "learning_rate": 3e-06, |
| "loss": -14.0565, |
| "step": 1176 |
| }, |
| { |
| "completion_length": 255.6875, |
| "epoch": 0.0011775887943971987, |
| "grad_norm": 75.01581573486328, |
| "learning_rate": 3e-06, |
| "loss": -11.7232, |
| "reward": 0.20382773131132126, |
| "reward_std": 0.12743021547794342, |
| "rewards/sudoku_reward_func": 0.20382773131132126, |
| "step": 1177, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011785892946473238, |
| "grad_norm": 50.770267486572266, |
| "learning_rate": 3e-06, |
| "loss": -11.7161, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.0011795897948974486, |
| "grad_norm": 46.51319122314453, |
| "learning_rate": 3e-06, |
| "loss": -12.1454, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.0011805902951475737, |
| "grad_norm": 66.0778579711914, |
| "learning_rate": 3e-06, |
| "loss": -12.9209, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.0011815907953976988, |
| "grad_norm": 79.279296875, |
| "learning_rate": 3e-06, |
| "loss": -11.9834, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.001182591295647824, |
| "grad_norm": 90.76998138427734, |
| "learning_rate": 3e-06, |
| "loss": -11.7098, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.001183591795897949, |
| "grad_norm": 45.429054260253906, |
| "learning_rate": 3e-06, |
| "loss": -12.816, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.001184592296148074, |
| "grad_norm": 72.65291595458984, |
| "learning_rate": 3e-06, |
| "loss": -13.4472, |
| "step": 1184 |
| }, |
| { |
| "completion_length": 255.0416717529297, |
| "epoch": 0.0011855927963981992, |
| "grad_norm": 51.79472732543945, |
| "learning_rate": 3e-06, |
| "loss": -8.3447, |
| "reward": 0.21494334936141968, |
| "reward_std": 0.18318727612495422, |
| "rewards/sudoku_reward_func": 0.21494334936141968, |
| "step": 1185, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001186593296648324, |
| "grad_norm": 81.84302520751953, |
| "learning_rate": 3e-06, |
| "loss": -6.9186, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.0011875937968984492, |
| "grad_norm": 65.76778411865234, |
| "learning_rate": 3e-06, |
| "loss": -9.9974, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.0011885942971485743, |
| "grad_norm": 49.771148681640625, |
| "learning_rate": 3e-06, |
| "loss": -6.0897, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.0011895947973986994, |
| "grad_norm": 57.41340255737305, |
| "learning_rate": 3e-06, |
| "loss": -9.1539, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.0011905952976488244, |
| "grad_norm": 84.84779357910156, |
| "learning_rate": 3e-06, |
| "loss": -7.8363, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.0011915957978989495, |
| "grad_norm": 62.59910202026367, |
| "learning_rate": 3e-06, |
| "loss": -10.681, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.0011925962981490746, |
| "grad_norm": 55.14677047729492, |
| "learning_rate": 3e-06, |
| "loss": -6.7142, |
| "step": 1192 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0011935967983991995, |
| "grad_norm": 55.2512321472168, |
| "learning_rate": 3e-06, |
| "loss": -6.7691, |
| "reward": 0.19457221776247025, |
| "reward_std": 0.12072273343801498, |
| "rewards/sudoku_reward_func": 0.19457221776247025, |
| "step": 1193, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0011945972986493246, |
| "grad_norm": 58.17645263671875, |
| "learning_rate": 3e-06, |
| "loss": -7.746, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.0011955977988994497, |
| "grad_norm": 54.57912826538086, |
| "learning_rate": 3e-06, |
| "loss": -6.2499, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.0011965982991495748, |
| "grad_norm": 50.100582122802734, |
| "learning_rate": 3e-06, |
| "loss": -8.7012, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.0011975987993996999, |
| "grad_norm": 58.364891052246094, |
| "learning_rate": 3e-06, |
| "loss": -6.9944, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.001198599299649825, |
| "grad_norm": 63.98237991333008, |
| "learning_rate": 3e-06, |
| "loss": -8.0062, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.00119959979989995, |
| "grad_norm": 55.945518493652344, |
| "learning_rate": 3e-06, |
| "loss": -7.0796, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.001200600300150075, |
| "grad_norm": 43.91253662109375, |
| "learning_rate": 3e-06, |
| "loss": -9.2514, |
| "step": 1200 |
| }, |
| { |
| "completion_length": 255.8541717529297, |
| "epoch": 0.0012016008004002, |
| "grad_norm": 54.789920806884766, |
| "learning_rate": 3e-06, |
| "loss": -17.2824, |
| "reward": 0.22809194028377533, |
| "reward_std": 0.15452970564365387, |
| "rewards/sudoku_reward_func": 0.22809194028377533, |
| "step": 1201, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012026013006503251, |
| "grad_norm": 75.46862030029297, |
| "learning_rate": 3e-06, |
| "loss": -17.5235, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.0012036018009004502, |
| "grad_norm": 51.897701263427734, |
| "learning_rate": 3e-06, |
| "loss": -17.4954, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.0012046023011505753, |
| "grad_norm": 56.08158874511719, |
| "learning_rate": 3e-06, |
| "loss": -18.0722, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.0012056028014007004, |
| "grad_norm": 44.373287200927734, |
| "learning_rate": 3e-06, |
| "loss": -17.5788, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.0012066033016508255, |
| "grad_norm": 57.38613510131836, |
| "learning_rate": 3e-06, |
| "loss": -18.2698, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.0012076038019009504, |
| "grad_norm": 63.77848434448242, |
| "learning_rate": 3e-06, |
| "loss": -17.855, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.0012086043021510755, |
| "grad_norm": 54.5859375, |
| "learning_rate": 3e-06, |
| "loss": -18.4923, |
| "step": 1208 |
| }, |
| { |
| "completion_length": 252.4791717529297, |
| "epoch": 0.0012096048024012006, |
| "grad_norm": 54.04340744018555, |
| "learning_rate": 3e-06, |
| "loss": -9.2032, |
| "reward": 0.23073744028806686, |
| "reward_std": 0.151360884308815, |
| "rewards/sudoku_reward_func": 0.23073744028806686, |
| "step": 1209, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012106053026513257, |
| "grad_norm": 68.70407104492188, |
| "learning_rate": 3e-06, |
| "loss": -10.1651, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.0012116058029014508, |
| "grad_norm": 60.362545013427734, |
| "learning_rate": 3e-06, |
| "loss": -9.5503, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.0012126063031515759, |
| "grad_norm": 53.39244842529297, |
| "learning_rate": 3e-06, |
| "loss": -10.7794, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.001213606803401701, |
| "grad_norm": 55.661537170410156, |
| "learning_rate": 3e-06, |
| "loss": -9.8296, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.0012146073036518258, |
| "grad_norm": 68.5494613647461, |
| "learning_rate": 3e-06, |
| "loss": -11.2466, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.001215607803901951, |
| "grad_norm": 80.74198913574219, |
| "learning_rate": 3e-06, |
| "loss": -10.4096, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.001216608304152076, |
| "grad_norm": 63.03583908081055, |
| "learning_rate": 3e-06, |
| "loss": -11.251, |
| "step": 1216 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0012176088044022011, |
| "grad_norm": 54.193382263183594, |
| "learning_rate": 3e-06, |
| "loss": -6.744, |
| "reward": 0.22689320147037506, |
| "reward_std": 0.15158719569444656, |
| "rewards/sudoku_reward_func": 0.22689320147037506, |
| "step": 1217, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012186093046523262, |
| "grad_norm": 61.95039749145508, |
| "learning_rate": 3e-06, |
| "loss": -8.6779, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.0012196098049024513, |
| "grad_norm": 60.906612396240234, |
| "learning_rate": 3e-06, |
| "loss": -11.6589, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.0012206103051525764, |
| "grad_norm": 64.95401000976562, |
| "learning_rate": 3e-06, |
| "loss": -6.9082, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.0012216108054027013, |
| "grad_norm": 51.398624420166016, |
| "learning_rate": 3e-06, |
| "loss": -6.9897, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.0012226113056528264, |
| "grad_norm": 63.6590690612793, |
| "learning_rate": 3e-06, |
| "loss": -9.0396, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.0012236118059029515, |
| "grad_norm": 56.660369873046875, |
| "learning_rate": 3e-06, |
| "loss": -12.1785, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.0012246123061530766, |
| "grad_norm": 71.9052734375, |
| "learning_rate": 3e-06, |
| "loss": -7.0529, |
| "step": 1224 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0012256128064032016, |
| "grad_norm": 68.56539916992188, |
| "learning_rate": 3e-06, |
| "loss": -8.8322, |
| "reward": 0.1755952499806881, |
| "reward_std": 0.13179953396320343, |
| "rewards/sudoku_reward_func": 0.1755952425301075, |
| "step": 1225, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012266133066533267, |
| "grad_norm": 76.60578155517578, |
| "learning_rate": 3e-06, |
| "loss": -10.5883, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.0012276138069034516, |
| "grad_norm": 71.67562103271484, |
| "learning_rate": 3e-06, |
| "loss": -7.2645, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.0012286143071535767, |
| "grad_norm": 55.47669982910156, |
| "learning_rate": 3e-06, |
| "loss": -8.8148, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.0012296148074037018, |
| "grad_norm": 89.62641906738281, |
| "learning_rate": 3e-06, |
| "loss": -8.9664, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.001230615307653827, |
| "grad_norm": 67.79340362548828, |
| "learning_rate": 3e-06, |
| "loss": -11.0511, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.001231615807903952, |
| "grad_norm": 68.45556640625, |
| "learning_rate": 3e-06, |
| "loss": -8.3416, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.001232616308154077, |
| "grad_norm": 59.216705322265625, |
| "learning_rate": 3e-06, |
| "loss": -8.9795, |
| "step": 1232 |
| }, |
| { |
| "completion_length": 252.3541717529297, |
| "epoch": 0.0012336168084042022, |
| "grad_norm": 55.95851516723633, |
| "learning_rate": 3e-06, |
| "loss": -7.4608, |
| "reward": 0.18629751354455948, |
| "reward_std": 0.1258675828576088, |
| "rewards/sudoku_reward_func": 0.18629750609397888, |
| "step": 1233, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001234617308654327, |
| "grad_norm": 47.71657943725586, |
| "learning_rate": 3e-06, |
| "loss": -4.2586, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.0012356178089044522, |
| "grad_norm": 63.48736572265625, |
| "learning_rate": 3e-06, |
| "loss": -7.4636, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.0012366183091545772, |
| "grad_norm": 46.31603240966797, |
| "learning_rate": 3e-06, |
| "loss": -8.7069, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.0012376188094047023, |
| "grad_norm": 57.96319580078125, |
| "learning_rate": 3e-06, |
| "loss": -7.4238, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.0012386193096548274, |
| "grad_norm": 45.64490509033203, |
| "learning_rate": 3e-06, |
| "loss": -4.4522, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.0012396198099049525, |
| "grad_norm": 68.51618957519531, |
| "learning_rate": 3e-06, |
| "loss": -7.908, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.0012406203101550776, |
| "grad_norm": 47.038169860839844, |
| "learning_rate": 3e-06, |
| "loss": -9.3185, |
| "step": 1240 |
| }, |
| { |
| "completion_length": 254.02083587646484, |
| "epoch": 0.0012416208104052025, |
| "grad_norm": 40.134315490722656, |
| "learning_rate": 3e-06, |
| "loss": 11.8948, |
| "reward": 0.19241898506879807, |
| "reward_std": 0.11422308534383774, |
| "rewards/sudoku_reward_func": 0.19241898506879807, |
| "step": 1241, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012426213106553276, |
| "grad_norm": 44.94253921508789, |
| "learning_rate": 3e-06, |
| "loss": 11.3922, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.0012436218109054527, |
| "grad_norm": 42.705142974853516, |
| "learning_rate": 3e-06, |
| "loss": 10.8539, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.0012446223111555778, |
| "grad_norm": 54.742881774902344, |
| "learning_rate": 3e-06, |
| "loss": 12.6673, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.0012456228114057029, |
| "grad_norm": 38.904541015625, |
| "learning_rate": 3e-06, |
| "loss": 11.573, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.001246623311655828, |
| "grad_norm": 44.749977111816406, |
| "learning_rate": 3e-06, |
| "loss": 11.1428, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.001247623811905953, |
| "grad_norm": 42.752193450927734, |
| "learning_rate": 3e-06, |
| "loss": 10.1868, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.001248624312156078, |
| "grad_norm": 52.36620330810547, |
| "learning_rate": 3e-06, |
| "loss": 12.2819, |
| "step": 1248 |
| }, |
| { |
| "completion_length": 253.8541717529297, |
| "epoch": 0.001249624812406203, |
| "grad_norm": 51.714195251464844, |
| "learning_rate": 3e-06, |
| "loss": -10.4317, |
| "reward": 0.22540509700775146, |
| "reward_std": 0.1407158188521862, |
| "rewards/sudoku_reward_func": 0.22540509700775146, |
| "step": 1249, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012506253126563281, |
| "grad_norm": 58.5916633605957, |
| "learning_rate": 3e-06, |
| "loss": -10.271, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.0012516258129064532, |
| "grad_norm": 50.90818786621094, |
| "learning_rate": 3e-06, |
| "loss": -15.043, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.0012526263131565783, |
| "grad_norm": 61.283321380615234, |
| "learning_rate": 3e-06, |
| "loss": -15.4831, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.0012536268134067034, |
| "grad_norm": 49.761512756347656, |
| "learning_rate": 3e-06, |
| "loss": -10.8181, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.0012546273136568285, |
| "grad_norm": 65.47783660888672, |
| "learning_rate": 3e-06, |
| "loss": -10.4775, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.0012556278139069534, |
| "grad_norm": 49.240692138671875, |
| "learning_rate": 3e-06, |
| "loss": -15.3536, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.0012566283141570785, |
| "grad_norm": 73.38108825683594, |
| "learning_rate": 3e-06, |
| "loss": -16.2277, |
| "step": 1256 |
| }, |
| { |
| "completion_length": 251.3541717529297, |
| "epoch": 0.0012576288144072036, |
| "grad_norm": 103.87187194824219, |
| "learning_rate": 3e-06, |
| "loss": -9.3538, |
| "reward": 0.21862224489450455, |
| "reward_std": 0.14190439134836197, |
| "rewards/sudoku_reward_func": 0.21862224489450455, |
| "step": 1257, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012586293146573287, |
| "grad_norm": 79.26856994628906, |
| "learning_rate": 3e-06, |
| "loss": -10.7503, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.0012596298149074538, |
| "grad_norm": 94.93610382080078, |
| "learning_rate": 3e-06, |
| "loss": -9.2469, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.0012606303151575789, |
| "grad_norm": 76.20452117919922, |
| "learning_rate": 3e-06, |
| "loss": -9.0145, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.001261630815407704, |
| "grad_norm": 115.39997100830078, |
| "learning_rate": 3e-06, |
| "loss": -10.6488, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.0012626313156578288, |
| "grad_norm": 77.61318969726562, |
| "learning_rate": 3e-06, |
| "loss": -11.5868, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.001263631815907954, |
| "grad_norm": 101.30606079101562, |
| "learning_rate": 3e-06, |
| "loss": -9.5905, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.001264632316158079, |
| "grad_norm": 79.39830780029297, |
| "learning_rate": 3e-06, |
| "loss": -10.3842, |
| "step": 1264 |
| }, |
| { |
| "completion_length": 253.4791717529297, |
| "epoch": 0.001265632816408204, |
| "grad_norm": 70.97663879394531, |
| "learning_rate": 3e-06, |
| "loss": -3.1305, |
| "reward": 0.23012492060661316, |
| "reward_std": 0.1151810809969902, |
| "rewards/sudoku_reward_func": 0.23012491315603256, |
| "step": 1265, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012666333166583292, |
| "grad_norm": 62.975276947021484, |
| "learning_rate": 3e-06, |
| "loss": -5.8182, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.0012676338169084543, |
| "grad_norm": 58.47067642211914, |
| "learning_rate": 3e-06, |
| "loss": -3.038, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.0012686343171585794, |
| "grad_norm": 59.895057678222656, |
| "learning_rate": 3e-06, |
| "loss": -2.2264, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.0012696348174087043, |
| "grad_norm": 69.18795013427734, |
| "learning_rate": 3e-06, |
| "loss": -3.2379, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.0012706353176588294, |
| "grad_norm": 60.994815826416016, |
| "learning_rate": 3e-06, |
| "loss": -6.9695, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.0012716358179089544, |
| "grad_norm": 52.13833999633789, |
| "learning_rate": 3e-06, |
| "loss": -3.9799, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.0012726363181590795, |
| "grad_norm": 63.659725189208984, |
| "learning_rate": 3e-06, |
| "loss": -2.0911, |
| "step": 1272 |
| }, |
| { |
| "completion_length": 254.375, |
| "epoch": 0.0012736368184092046, |
| "grad_norm": 115.85366821289062, |
| "learning_rate": 3e-06, |
| "loss": -0.5977, |
| "reward": 0.21176423132419586, |
| "reward_std": 0.14812293648719788, |
| "rewards/sudoku_reward_func": 0.21176422387361526, |
| "step": 1273, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012746373186593297, |
| "grad_norm": 82.96026611328125, |
| "learning_rate": 3e-06, |
| "loss": -5.8223, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.0012756378189094548, |
| "grad_norm": 101.27959442138672, |
| "learning_rate": 3e-06, |
| "loss": -2.9291, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.0012766383191595797, |
| "grad_norm": 84.06151580810547, |
| "learning_rate": 3e-06, |
| "loss": -3.468, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.0012776388194097048, |
| "grad_norm": 119.58338165283203, |
| "learning_rate": 3e-06, |
| "loss": -0.5509, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.0012786393196598299, |
| "grad_norm": 89.5655746459961, |
| "learning_rate": 3e-06, |
| "loss": -6.6584, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.001279639819909955, |
| "grad_norm": 100.6349105834961, |
| "learning_rate": 3e-06, |
| "loss": -3.7463, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.00128064032016008, |
| "grad_norm": 84.62515258789062, |
| "learning_rate": 3e-06, |
| "loss": -4.8434, |
| "step": 1280 |
| }, |
| { |
| "completion_length": 252.39583587646484, |
| "epoch": 0.0012816408204102052, |
| "grad_norm": 94.54007720947266, |
| "learning_rate": 3e-06, |
| "loss": 5.2631, |
| "reward": 0.26583169400691986, |
| "reward_std": 0.12800082564353943, |
| "rewards/sudoku_reward_func": 0.26583168655633926, |
| "step": 1281, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012826413206603303, |
| "grad_norm": 98.34558868408203, |
| "learning_rate": 3e-06, |
| "loss": 5.4646, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.0012836418209104551, |
| "grad_norm": 93.9869384765625, |
| "learning_rate": 3e-06, |
| "loss": 5.4419, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.0012846423211605802, |
| "grad_norm": 87.3626708984375, |
| "learning_rate": 3e-06, |
| "loss": 3.0002, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.0012856428214107053, |
| "grad_norm": 90.73570251464844, |
| "learning_rate": 3e-06, |
| "loss": 4.9438, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.0012866433216608304, |
| "grad_norm": 109.58126831054688, |
| "learning_rate": 3e-06, |
| "loss": 5.7411, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.0012876438219109555, |
| "grad_norm": 100.88875579833984, |
| "learning_rate": 3e-06, |
| "loss": 5.1317, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.0012886443221610806, |
| "grad_norm": 87.1065673828125, |
| "learning_rate": 3e-06, |
| "loss": 2.495, |
| "step": 1288 |
| }, |
| { |
| "completion_length": 252.4791717529297, |
| "epoch": 0.0012896448224112057, |
| "grad_norm": 98.72467803955078, |
| "learning_rate": 3e-06, |
| "loss": -12.6574, |
| "reward": 0.22350365668535233, |
| "reward_std": 0.17447489500045776, |
| "rewards/sudoku_reward_func": 0.22350364923477173, |
| "step": 1289, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012906453226613306, |
| "grad_norm": 128.9749755859375, |
| "learning_rate": 3e-06, |
| "loss": -5.734, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.0012916458229114557, |
| "grad_norm": 102.89720916748047, |
| "learning_rate": 3e-06, |
| "loss": -4.3407, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.0012926463231615808, |
| "grad_norm": 91.6898422241211, |
| "learning_rate": 3e-06, |
| "loss": -9.1816, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.0012936468234117059, |
| "grad_norm": 107.5361557006836, |
| "learning_rate": 3e-06, |
| "loss": -13.5125, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.001294647323661831, |
| "grad_norm": 112.34242248535156, |
| "learning_rate": 3e-06, |
| "loss": -7.4742, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.001295647823911956, |
| "grad_norm": 90.103515625, |
| "learning_rate": 3e-06, |
| "loss": -5.8833, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.0012966483241620811, |
| "grad_norm": 87.0075454711914, |
| "learning_rate": 3e-06, |
| "loss": -10.2467, |
| "step": 1296 |
| }, |
| { |
| "completion_length": 255.27083587646484, |
| "epoch": 0.001297648824412206, |
| "grad_norm": 102.01131439208984, |
| "learning_rate": 3e-06, |
| "loss": -11.5775, |
| "reward": 0.20604482293128967, |
| "reward_std": 0.15800370275974274, |
| "rewards/sudoku_reward_func": 0.20604482293128967, |
| "step": 1297, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0012986493246623311, |
| "grad_norm": 112.05081939697266, |
| "learning_rate": 3e-06, |
| "loss": -8.5013, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.0012996498249124562, |
| "grad_norm": 76.74340057373047, |
| "learning_rate": 3e-06, |
| "loss": -10.8034, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.0013006503251625813, |
| "grad_norm": 92.91266632080078, |
| "learning_rate": 3e-06, |
| "loss": -10.9193, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.0013016508254127064, |
| "grad_norm": 78.14289093017578, |
| "learning_rate": 3e-06, |
| "loss": -12.0713, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.0013026513256628315, |
| "grad_norm": 85.2936019897461, |
| "learning_rate": 3e-06, |
| "loss": -9.2757, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.0013036518259129564, |
| "grad_norm": 71.67143249511719, |
| "learning_rate": 3e-06, |
| "loss": -11.6347, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.0013046523261630815, |
| "grad_norm": 89.5588607788086, |
| "learning_rate": 3e-06, |
| "loss": -10.9821, |
| "step": 1304 |
| }, |
| { |
| "completion_length": 248.5416717529297, |
| "epoch": 0.0013056528264132066, |
| "grad_norm": 61.13591384887695, |
| "learning_rate": 3e-06, |
| "loss": -1.4544, |
| "reward": 0.24454365670681, |
| "reward_std": 0.13666882365942, |
| "rewards/sudoku_reward_func": 0.24454365670681, |
| "step": 1305, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013066533266633317, |
| "grad_norm": 65.24658966064453, |
| "learning_rate": 3e-06, |
| "loss": -1.3029, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.0013076538269134567, |
| "grad_norm": 67.64944458007812, |
| "learning_rate": 3e-06, |
| "loss": -4.5651, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.0013086543271635818, |
| "grad_norm": 92.36503601074219, |
| "learning_rate": 3e-06, |
| "loss": -2.1237, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.001309654827413707, |
| "grad_norm": 63.10952377319336, |
| "learning_rate": 3e-06, |
| "loss": -1.846, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.0013106553276638318, |
| "grad_norm": 63.06090545654297, |
| "learning_rate": 3e-06, |
| "loss": -1.8813, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.001311655827913957, |
| "grad_norm": 68.20687866210938, |
| "learning_rate": 3e-06, |
| "loss": -5.8478, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.001312656328164082, |
| "grad_norm": 95.44178009033203, |
| "learning_rate": 3e-06, |
| "loss": -3.2949, |
| "step": 1312 |
| }, |
| { |
| "completion_length": 246.5625, |
| "epoch": 0.001313656828414207, |
| "grad_norm": 82.63322448730469, |
| "learning_rate": 3e-06, |
| "loss": -8.5331, |
| "reward": 0.21932870894670486, |
| "reward_std": 0.13176480680704117, |
| "rewards/sudoku_reward_func": 0.21932870149612427, |
| "step": 1313, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013146573286643322, |
| "grad_norm": 76.15507507324219, |
| "learning_rate": 3e-06, |
| "loss": -10.7565, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.0013156578289144573, |
| "grad_norm": 66.5208969116211, |
| "learning_rate": 3e-06, |
| "loss": -10.1243, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.0013166583291645824, |
| "grad_norm": 63.920650482177734, |
| "learning_rate": 3e-06, |
| "loss": -9.6918, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.0013176588294147073, |
| "grad_norm": 112.46867370605469, |
| "learning_rate": 3e-06, |
| "loss": -8.9053, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.0013186593296648323, |
| "grad_norm": 96.82019805908203, |
| "learning_rate": 3e-06, |
| "loss": -10.5068, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.0013196598299149574, |
| "grad_norm": 78.8730697631836, |
| "learning_rate": 3e-06, |
| "loss": -10.6982, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.0013206603301650825, |
| "grad_norm": 70.09661865234375, |
| "learning_rate": 3e-06, |
| "loss": -10.2522, |
| "step": 1320 |
| }, |
| { |
| "completion_length": 250.83333587646484, |
| "epoch": 0.0013216608304152076, |
| "grad_norm": 86.71615600585938, |
| "learning_rate": 3e-06, |
| "loss": 2.4734, |
| "reward": 0.19102858752012253, |
| "reward_std": 0.14162860810756683, |
| "rewards/sudoku_reward_func": 0.19102858752012253, |
| "step": 1321, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013226613306653327, |
| "grad_norm": 106.76415252685547, |
| "learning_rate": 3e-06, |
| "loss": -0.4374, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.0013236618309154578, |
| "grad_norm": 185.66705322265625, |
| "learning_rate": 3e-06, |
| "loss": 3.0146, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.0013246623311655827, |
| "grad_norm": 108.97735595703125, |
| "learning_rate": 3e-06, |
| "loss": 2.749, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.0013256628314157078, |
| "grad_norm": 90.60322570800781, |
| "learning_rate": 3e-06, |
| "loss": 2.3065, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.0013266633316658329, |
| "grad_norm": 106.5020751953125, |
| "learning_rate": 3e-06, |
| "loss": -0.4537, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.001327663831915958, |
| "grad_norm": 173.3131866455078, |
| "learning_rate": 3e-06, |
| "loss": 2.367, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.001328664332166083, |
| "grad_norm": 98.478515625, |
| "learning_rate": 3e-06, |
| "loss": 2.0123, |
| "step": 1328 |
| }, |
| { |
| "completion_length": 249.8541717529297, |
| "epoch": 0.0013296648324162082, |
| "grad_norm": 127.2477798461914, |
| "learning_rate": 3e-06, |
| "loss": -3.8098, |
| "reward": 0.2223086580634117, |
| "reward_std": 0.14327675849199295, |
| "rewards/sudoku_reward_func": 0.2223086580634117, |
| "step": 1329, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013306653326663333, |
| "grad_norm": 75.55018615722656, |
| "learning_rate": 3e-06, |
| "loss": 0.4287, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.0013316658329164581, |
| "grad_norm": 87.05211639404297, |
| "learning_rate": 3e-06, |
| "loss": -0.1133, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.0013326663331665832, |
| "grad_norm": 74.08929443359375, |
| "learning_rate": 3e-06, |
| "loss": -3.0703, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.0013336668334167083, |
| "grad_norm": 145.09320068359375, |
| "learning_rate": 3e-06, |
| "loss": -4.448, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.0013346673336668334, |
| "grad_norm": 79.23677062988281, |
| "learning_rate": 3e-06, |
| "loss": -0.3325, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.0013356678339169585, |
| "grad_norm": 86.81128692626953, |
| "learning_rate": 3e-06, |
| "loss": -0.9774, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.0013366683341670836, |
| "grad_norm": 82.84042358398438, |
| "learning_rate": 3e-06, |
| "loss": -3.4287, |
| "step": 1336 |
| }, |
| { |
| "completion_length": 233.20833587646484, |
| "epoch": 0.0013376688344172087, |
| "grad_norm": 95.14927673339844, |
| "learning_rate": 3e-06, |
| "loss": -7.883, |
| "reward": 0.2618408799171448, |
| "reward_std": 0.12445945292711258, |
| "rewards/sudoku_reward_func": 0.2618408799171448, |
| "step": 1337, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013386693346673336, |
| "grad_norm": 68.84015655517578, |
| "learning_rate": 3e-06, |
| "loss": -8.7071, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.0013396698349174587, |
| "grad_norm": 127.49705505371094, |
| "learning_rate": 3e-06, |
| "loss": -4.8652, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.0013406703351675838, |
| "grad_norm": 87.26109313964844, |
| "learning_rate": 3e-06, |
| "loss": -4.9352, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.0013416708354177089, |
| "grad_norm": 91.16841888427734, |
| "learning_rate": 3e-06, |
| "loss": -8.759, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.001342671335667834, |
| "grad_norm": 76.6376953125, |
| "learning_rate": 3e-06, |
| "loss": -9.6058, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.001343671835917959, |
| "grad_norm": 90.81112670898438, |
| "learning_rate": 3e-06, |
| "loss": -5.3411, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.0013446723361680841, |
| "grad_norm": 112.43119049072266, |
| "learning_rate": 3e-06, |
| "loss": -4.9454, |
| "step": 1344 |
| }, |
| { |
| "completion_length": 249.6041717529297, |
| "epoch": 0.001345672836418209, |
| "grad_norm": 122.31782531738281, |
| "learning_rate": 3e-06, |
| "loss": 1.6905, |
| "reward": 0.28765709698200226, |
| "reward_std": 0.15683971345424652, |
| "rewards/sudoku_reward_func": 0.28765709698200226, |
| "step": 1345, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001346673336668334, |
| "grad_norm": 156.43411254882812, |
| "learning_rate": 3e-06, |
| "loss": -5.0148, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.0013476738369184592, |
| "grad_norm": 168.4134521484375, |
| "learning_rate": 3e-06, |
| "loss": 2.5596, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.0013486743371685843, |
| "grad_norm": 177.4627227783203, |
| "learning_rate": 3e-06, |
| "loss": 9.4151, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.0013496748374187094, |
| "grad_norm": 166.94285583496094, |
| "learning_rate": 3e-06, |
| "loss": 0.9821, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.0013506753376688345, |
| "grad_norm": 170.2594757080078, |
| "learning_rate": 3e-06, |
| "loss": -6.3015, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.0013516758379189596, |
| "grad_norm": 163.38580322265625, |
| "learning_rate": 3e-06, |
| "loss": 1.8278, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.0013526763381690845, |
| "grad_norm": 175.76690673828125, |
| "learning_rate": 3e-06, |
| "loss": 9.595, |
| "step": 1352 |
| }, |
| { |
| "completion_length": 244.9791717529297, |
| "epoch": 0.0013536768384192095, |
| "grad_norm": 97.57686614990234, |
| "learning_rate": 3e-06, |
| "loss": 3.503, |
| "reward": 0.27925462275743484, |
| "reward_std": 0.13963400572538376, |
| "rewards/sudoku_reward_func": 0.27925462275743484, |
| "step": 1353, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013546773386693346, |
| "grad_norm": 103.25208282470703, |
| "learning_rate": 3e-06, |
| "loss": 1.375, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.0013556778389194597, |
| "grad_norm": 120.28804779052734, |
| "learning_rate": 3e-06, |
| "loss": -0.7448, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.0013566783391695848, |
| "grad_norm": 77.3034439086914, |
| "learning_rate": 3e-06, |
| "loss": 5.2681, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.00135767883941971, |
| "grad_norm": 114.31840515136719, |
| "learning_rate": 3e-06, |
| "loss": 2.6711, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.001358679339669835, |
| "grad_norm": 121.68708038330078, |
| "learning_rate": 3e-06, |
| "loss": 1.068, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.00135967983991996, |
| "grad_norm": 183.0277862548828, |
| "learning_rate": 3e-06, |
| "loss": -1.5462, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.001360680340170085, |
| "grad_norm": 79.93998718261719, |
| "learning_rate": 3e-06, |
| "loss": 4.9505, |
| "step": 1360 |
| }, |
| { |
| "completion_length": 248.06250762939453, |
| "epoch": 0.00136168084042021, |
| "grad_norm": 133.2718963623047, |
| "learning_rate": 3e-06, |
| "loss": -6.8724, |
| "reward": 0.2313988208770752, |
| "reward_std": 0.15942735970020294, |
| "rewards/sudoku_reward_func": 0.2313988208770752, |
| "step": 1361, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013626813406703352, |
| "grad_norm": 143.41510009765625, |
| "learning_rate": 3e-06, |
| "loss": -11.1885, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.0013636818409204603, |
| "grad_norm": 156.24859619140625, |
| "learning_rate": 3e-06, |
| "loss": -4.2768, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.0013646823411705854, |
| "grad_norm": 148.48779296875, |
| "learning_rate": 3e-06, |
| "loss": -6.0221, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.0013656828414207105, |
| "grad_norm": 126.07758331298828, |
| "learning_rate": 3e-06, |
| "loss": -7.2337, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.0013666833416708353, |
| "grad_norm": 146.1149444580078, |
| "learning_rate": 3e-06, |
| "loss": -12.7754, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.0013676838419209604, |
| "grad_norm": 166.75257873535156, |
| "learning_rate": 3e-06, |
| "loss": -5.6501, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.0013686843421710855, |
| "grad_norm": 117.26903533935547, |
| "learning_rate": 3e-06, |
| "loss": -7.462, |
| "step": 1368 |
| }, |
| { |
| "completion_length": 244.37500762939453, |
| "epoch": 0.0013696848424212106, |
| "grad_norm": 155.39373779296875, |
| "learning_rate": 3e-06, |
| "loss": -3.6877, |
| "reward": 0.25125136226415634, |
| "reward_std": 0.15128708630800247, |
| "rewards/sudoku_reward_func": 0.25125134736299515, |
| "step": 1369, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013706853426713357, |
| "grad_norm": 113.63846588134766, |
| "learning_rate": 3e-06, |
| "loss": -3.9167, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.0013716858429214608, |
| "grad_norm": 72.45719909667969, |
| "learning_rate": 3e-06, |
| "loss": -1.3301, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.001372686343171586, |
| "grad_norm": 78.77584838867188, |
| "learning_rate": 3e-06, |
| "loss": -6.9234, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.0013736868434217108, |
| "grad_norm": 151.5078125, |
| "learning_rate": 3e-06, |
| "loss": -3.098, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.0013746873436718359, |
| "grad_norm": 65.08445739746094, |
| "learning_rate": 3e-06, |
| "loss": -4.1381, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.001375687843921961, |
| "grad_norm": 74.84840393066406, |
| "learning_rate": 3e-06, |
| "loss": -1.6518, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.001376688344172086, |
| "grad_norm": 75.1297836303711, |
| "learning_rate": 3e-06, |
| "loss": -7.4169, |
| "step": 1376 |
| }, |
| { |
| "completion_length": 248.4791717529297, |
| "epoch": 0.0013776888444222112, |
| "grad_norm": 91.91381072998047, |
| "learning_rate": 3e-06, |
| "loss": -6.5628, |
| "reward": 0.2777778059244156, |
| "reward_std": 0.15616532415151596, |
| "rewards/sudoku_reward_func": 0.2777777910232544, |
| "step": 1377, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013786893446723362, |
| "grad_norm": 86.98223114013672, |
| "learning_rate": 3e-06, |
| "loss": -6.736, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.0013796898449224613, |
| "grad_norm": 119.746337890625, |
| "learning_rate": 3e-06, |
| "loss": -6.1626, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.0013806903451725862, |
| "grad_norm": 102.7596435546875, |
| "learning_rate": 3e-06, |
| "loss": -6.9399, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.0013816908454227113, |
| "grad_norm": 88.32569885253906, |
| "learning_rate": 3e-06, |
| "loss": -6.8012, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.0013826913456728364, |
| "grad_norm": 63.3649787902832, |
| "learning_rate": 3e-06, |
| "loss": -7.2393, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.0013836918459229615, |
| "grad_norm": 82.19108581542969, |
| "learning_rate": 3e-06, |
| "loss": -6.9574, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.0013846923461730866, |
| "grad_norm": 74.44951629638672, |
| "learning_rate": 3e-06, |
| "loss": -7.6355, |
| "step": 1384 |
| }, |
| { |
| "completion_length": 251.6666717529297, |
| "epoch": 0.0013856928464232117, |
| "grad_norm": 79.18040466308594, |
| "learning_rate": 3e-06, |
| "loss": -4.2122, |
| "reward": 0.24041006714105606, |
| "reward_std": 0.13425163179636002, |
| "rewards/sudoku_reward_func": 0.24041006714105606, |
| "step": 1385, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0013866933466733366, |
| "grad_norm": 71.49141693115234, |
| "learning_rate": 3e-06, |
| "loss": -5.4048, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.0013876938469234617, |
| "grad_norm": 63.46376419067383, |
| "learning_rate": 3e-06, |
| "loss": -6.3293, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.0013886943471735867, |
| "grad_norm": 71.38902282714844, |
| "learning_rate": 3e-06, |
| "loss": -7.0593, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.0013896948474237118, |
| "grad_norm": 66.57677459716797, |
| "learning_rate": 3e-06, |
| "loss": -5.1928, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.001390695347673837, |
| "grad_norm": 71.48369598388672, |
| "learning_rate": 3e-06, |
| "loss": -6.1357, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.001391695847923962, |
| "grad_norm": 61.57743453979492, |
| "learning_rate": 3e-06, |
| "loss": -6.5625, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.0013926963481740871, |
| "grad_norm": 58.65646743774414, |
| "learning_rate": 3e-06, |
| "loss": -7.4159, |
| "step": 1392 |
| }, |
| { |
| "completion_length": 247.06250762939453, |
| "epoch": 0.001393696848424212, |
| "grad_norm": 47.037384033203125, |
| "learning_rate": 3e-06, |
| "loss": -4.6575, |
| "reward": 0.24611443281173706, |
| "reward_std": 0.1536630243062973, |
| "rewards/sudoku_reward_func": 0.24611442536115646, |
| "step": 1393, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001394697348674337, |
| "grad_norm": 85.59077453613281, |
| "learning_rate": 3e-06, |
| "loss": -3.5807, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.0013956978489244622, |
| "grad_norm": 56.778385162353516, |
| "learning_rate": 3e-06, |
| "loss": -3.5657, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.0013966983491745873, |
| "grad_norm": 56.52222442626953, |
| "learning_rate": 3e-06, |
| "loss": -3.2542, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.0013976988494247124, |
| "grad_norm": 55.23912811279297, |
| "learning_rate": 3e-06, |
| "loss": -5.5549, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.0013986993496748375, |
| "grad_norm": 76.44470977783203, |
| "learning_rate": 3e-06, |
| "loss": -4.2466, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.0013996998499249626, |
| "grad_norm": 60.509979248046875, |
| "learning_rate": 3e-06, |
| "loss": -4.577, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.0014007003501750874, |
| "grad_norm": 48.10431671142578, |
| "learning_rate": 3e-06, |
| "loss": -3.913, |
| "step": 1400 |
| }, |
| { |
| "completion_length": 255.45833587646484, |
| "epoch": 0.0014017008504252125, |
| "grad_norm": 72.41065216064453, |
| "learning_rate": 3e-06, |
| "loss": -11.1462, |
| "reward": 0.234995037317276, |
| "reward_std": 0.17775796353816986, |
| "rewards/sudoku_reward_func": 0.234995037317276, |
| "step": 1401, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014027013506753376, |
| "grad_norm": 84.73635864257812, |
| "learning_rate": 3e-06, |
| "loss": -5.2094, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.0014037018509254627, |
| "grad_norm": 76.14923095703125, |
| "learning_rate": 3e-06, |
| "loss": -5.9781, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.0014047023511755878, |
| "grad_norm": 75.8636245727539, |
| "learning_rate": 3e-06, |
| "loss": -11.0312, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.001405702851425713, |
| "grad_norm": 82.16057586669922, |
| "learning_rate": 3e-06, |
| "loss": -11.7936, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.001406703351675838, |
| "grad_norm": 72.84368896484375, |
| "learning_rate": 3e-06, |
| "loss": -5.8866, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.0014077038519259629, |
| "grad_norm": 76.86295318603516, |
| "learning_rate": 3e-06, |
| "loss": -6.5346, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.001408704352176088, |
| "grad_norm": 75.8145523071289, |
| "learning_rate": 3e-06, |
| "loss": -11.6837, |
| "step": 1408 |
| }, |
| { |
| "completion_length": 255.0416717529297, |
| "epoch": 0.001409704852426213, |
| "grad_norm": 76.15098571777344, |
| "learning_rate": 3e-06, |
| "loss": -3.7352, |
| "reward": 0.2776537910103798, |
| "reward_std": 0.16747048497200012, |
| "rewards/sudoku_reward_func": 0.2776537910103798, |
| "step": 1409, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014107053526763382, |
| "grad_norm": 106.66970825195312, |
| "learning_rate": 3e-06, |
| "loss": -4.3643, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.0014117058529264633, |
| "grad_norm": 64.76913452148438, |
| "learning_rate": 3e-06, |
| "loss": -6.6609, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.0014127063531765884, |
| "grad_norm": 113.87123107910156, |
| "learning_rate": 3e-06, |
| "loss": -4.4335, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.0014137068534267134, |
| "grad_norm": 130.8094482421875, |
| "learning_rate": 3e-06, |
| "loss": -4.8342, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.0014147073536768383, |
| "grad_norm": 85.35004425048828, |
| "learning_rate": 3e-06, |
| "loss": -5.7942, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.0014157078539269634, |
| "grad_norm": 69.62791442871094, |
| "learning_rate": 3e-06, |
| "loss": -8.0415, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.0014167083541770885, |
| "grad_norm": 116.4716567993164, |
| "learning_rate": 3e-06, |
| "loss": -6.9187, |
| "step": 1416 |
| }, |
| { |
| "completion_length": 246.52083587646484, |
| "epoch": 0.0014177088544272136, |
| "grad_norm": 68.39940643310547, |
| "learning_rate": 3e-06, |
| "loss": 12.7806, |
| "reward": 0.2607887014746666, |
| "reward_std": 0.15520118921995163, |
| "rewards/sudoku_reward_func": 0.2607886865735054, |
| "step": 1417, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014187093546773387, |
| "grad_norm": 65.6619644165039, |
| "learning_rate": 3e-06, |
| "loss": 15.2863, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.0014197098549274638, |
| "grad_norm": 67.66735076904297, |
| "learning_rate": 3e-06, |
| "loss": 12.5973, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.0014207103551775889, |
| "grad_norm": 61.57198715209961, |
| "learning_rate": 3e-06, |
| "loss": 12.9858, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.0014217108554277138, |
| "grad_norm": 95.22171020507812, |
| "learning_rate": 3e-06, |
| "loss": 11.3072, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.0014227113556778389, |
| "grad_norm": 136.36041259765625, |
| "learning_rate": 3e-06, |
| "loss": 15.4914, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.001423711855927964, |
| "grad_norm": 87.2356185913086, |
| "learning_rate": 3e-06, |
| "loss": 12.1819, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.001424712356178089, |
| "grad_norm": 85.48587036132812, |
| "learning_rate": 3e-06, |
| "loss": 12.1587, |
| "step": 1424 |
| }, |
| { |
| "completion_length": 252.625, |
| "epoch": 0.0014257128564282141, |
| "grad_norm": 124.8813705444336, |
| "learning_rate": 3e-06, |
| "loss": 0.3882, |
| "reward": 0.22999338805675507, |
| "reward_std": 0.13858719915151596, |
| "rewards/sudoku_reward_func": 0.22999338805675507, |
| "step": 1425, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014267133566783392, |
| "grad_norm": 110.09771728515625, |
| "learning_rate": 3e-06, |
| "loss": 1.7846, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.0014277138569284643, |
| "grad_norm": 120.795654296875, |
| "learning_rate": 3e-06, |
| "loss": 0.6717, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.0014287143571785892, |
| "grad_norm": 98.05519104003906, |
| "learning_rate": 3e-06, |
| "loss": 0.1646, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.0014297148574287143, |
| "grad_norm": 145.90524291992188, |
| "learning_rate": 3e-06, |
| "loss": -0.9419, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.0014307153576788394, |
| "grad_norm": 84.8663558959961, |
| "learning_rate": 3e-06, |
| "loss": 0.2669, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.0014317158579289645, |
| "grad_norm": 90.54247283935547, |
| "learning_rate": 3e-06, |
| "loss": -1.3302, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.0014327163581790896, |
| "grad_norm": 93.9137954711914, |
| "learning_rate": 3e-06, |
| "loss": -1.5561, |
| "step": 1432 |
| }, |
| { |
| "completion_length": 252.20833587646484, |
| "epoch": 0.0014337168584292147, |
| "grad_norm": 91.17662811279297, |
| "learning_rate": 3e-06, |
| "loss": 1.4802, |
| "reward": 0.22301137447357178, |
| "reward_std": 0.13641764968633652, |
| "rewards/sudoku_reward_func": 0.22301135957241058, |
| "step": 1433, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014347173586793398, |
| "grad_norm": 83.0351791381836, |
| "learning_rate": 3e-06, |
| "loss": 3.8915, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.0014357178589294646, |
| "grad_norm": 101.50720977783203, |
| "learning_rate": 3e-06, |
| "loss": 7.0151, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.0014367183591795897, |
| "grad_norm": 85.99288177490234, |
| "learning_rate": 3e-06, |
| "loss": 0.5616, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.0014377188594297148, |
| "grad_norm": 76.99950408935547, |
| "learning_rate": 3e-06, |
| "loss": 1.1366, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.00143871935967984, |
| "grad_norm": 80.017333984375, |
| "learning_rate": 3e-06, |
| "loss": 3.0612, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.001439719859929965, |
| "grad_norm": 110.35974884033203, |
| "learning_rate": 3e-06, |
| "loss": 5.9277, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.0014407203601800901, |
| "grad_norm": 99.30435943603516, |
| "learning_rate": 3e-06, |
| "loss": -1.2894, |
| "step": 1440 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0014417208604302152, |
| "grad_norm": 106.5459976196289, |
| "learning_rate": 3e-06, |
| "loss": -14.271, |
| "reward": 0.247519850730896, |
| "reward_std": 0.1321713551878929, |
| "rewards/sudoku_reward_func": 0.247519850730896, |
| "step": 1441, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00144272136068034, |
| "grad_norm": 91.2779769897461, |
| "learning_rate": 3e-06, |
| "loss": -8.3958, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.0014437218609304652, |
| "grad_norm": 116.3575210571289, |
| "learning_rate": 3e-06, |
| "loss": -13.9669, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.0014447223611805903, |
| "grad_norm": 132.5576171875, |
| "learning_rate": 3e-06, |
| "loss": -8.9636, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.0014457228614307154, |
| "grad_norm": 143.8059844970703, |
| "learning_rate": 3e-06, |
| "loss": -13.8777, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.0014467233616808405, |
| "grad_norm": 85.30062866210938, |
| "learning_rate": 3e-06, |
| "loss": -8.6056, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.0014477238619309656, |
| "grad_norm": 97.61617279052734, |
| "learning_rate": 3e-06, |
| "loss": -14.6353, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.0014487243621810906, |
| "grad_norm": 147.2740020751953, |
| "learning_rate": 3e-06, |
| "loss": -9.864, |
| "step": 1448 |
| }, |
| { |
| "completion_length": 250.4791717529297, |
| "epoch": 0.0014497248624312155, |
| "grad_norm": 137.6214599609375, |
| "learning_rate": 3e-06, |
| "loss": -6.1127, |
| "reward": 0.25371648371219635, |
| "reward_std": 0.15944789350032806, |
| "rewards/sudoku_reward_func": 0.25371648371219635, |
| "step": 1449, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014507253626813406, |
| "grad_norm": 106.05636596679688, |
| "learning_rate": 3e-06, |
| "loss": -5.666, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.0014517258629314657, |
| "grad_norm": 110.15316772460938, |
| "learning_rate": 3e-06, |
| "loss": -7.7442, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.0014527263631815908, |
| "grad_norm": 91.43193054199219, |
| "learning_rate": 3e-06, |
| "loss": -4.0562, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.001453726863431716, |
| "grad_norm": 129.03883361816406, |
| "learning_rate": 3e-06, |
| "loss": -6.7951, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.001454727363681841, |
| "grad_norm": 131.1297607421875, |
| "learning_rate": 3e-06, |
| "loss": -6.6216, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.001455727863931966, |
| "grad_norm": 117.30534362792969, |
| "learning_rate": 3e-06, |
| "loss": -8.6388, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.001456728364182091, |
| "grad_norm": 103.80106353759766, |
| "learning_rate": 3e-06, |
| "loss": -4.4661, |
| "step": 1456 |
| }, |
| { |
| "completion_length": 246.12500762939453, |
| "epoch": 0.001457728864432216, |
| "grad_norm": 87.62139129638672, |
| "learning_rate": 3e-06, |
| "loss": -6.7951, |
| "reward": 0.233506940305233, |
| "reward_std": 0.13778656721115112, |
| "rewards/sudoku_reward_func": 0.233506940305233, |
| "step": 1457, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014587293646823412, |
| "grad_norm": 84.07535552978516, |
| "learning_rate": 3e-06, |
| "loss": -7.339, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.0014597298649324662, |
| "grad_norm": 57.86692810058594, |
| "learning_rate": 3e-06, |
| "loss": -6.35, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.0014607303651825913, |
| "grad_norm": 99.71435546875, |
| "learning_rate": 3e-06, |
| "loss": -6.6156, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.0014617308654327164, |
| "grad_norm": 104.31732940673828, |
| "learning_rate": 3e-06, |
| "loss": -8.1659, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.0014627313656828413, |
| "grad_norm": 91.96192932128906, |
| "learning_rate": 3e-06, |
| "loss": -7.4639, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.0014637318659329664, |
| "grad_norm": 60.986629486083984, |
| "learning_rate": 3e-06, |
| "loss": -7.0631, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.0014647323661830915, |
| "grad_norm": 97.76730346679688, |
| "learning_rate": 3e-06, |
| "loss": -7.033, |
| "step": 1464 |
| }, |
| { |
| "completion_length": 245.12500762939453, |
| "epoch": 0.0014657328664332166, |
| "grad_norm": 114.5215835571289, |
| "learning_rate": 3e-06, |
| "loss": 3.3239, |
| "reward": 0.22682179510593414, |
| "reward_std": 0.1532593071460724, |
| "rewards/sudoku_reward_func": 0.22682178765535355, |
| "step": 1465, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014667333666833417, |
| "grad_norm": 107.46202087402344, |
| "learning_rate": 3e-06, |
| "loss": 6.8997, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.0014677338669334668, |
| "grad_norm": 110.28289031982422, |
| "learning_rate": 3e-06, |
| "loss": 6.234, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.0014687343671835919, |
| "grad_norm": 98.04695129394531, |
| "learning_rate": 3e-06, |
| "loss": 1.3534, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.0014697348674337168, |
| "grad_norm": 151.7884979248047, |
| "learning_rate": 3e-06, |
| "loss": 2.3593, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.0014707353676838418, |
| "grad_norm": 92.70248413085938, |
| "learning_rate": 3e-06, |
| "loss": 6.602, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.001471735867933967, |
| "grad_norm": 102.759765625, |
| "learning_rate": 3e-06, |
| "loss": 5.3602, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.001472736368184092, |
| "grad_norm": 94.43704986572266, |
| "learning_rate": 3e-06, |
| "loss": 0.9451, |
| "step": 1472 |
| }, |
| { |
| "completion_length": 252.6666717529297, |
| "epoch": 0.0014737368684342171, |
| "grad_norm": 188.50820922851562, |
| "learning_rate": 3e-06, |
| "loss": 0.5014, |
| "reward": 0.26963459700345993, |
| "reward_std": 0.14512023329734802, |
| "rewards/sudoku_reward_func": 0.26963459700345993, |
| "step": 1473, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014747373686843422, |
| "grad_norm": 128.79505920410156, |
| "learning_rate": 3e-06, |
| "loss": -3.2434, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.0014757378689344673, |
| "grad_norm": 123.74315643310547, |
| "learning_rate": 3e-06, |
| "loss": -6.2439, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.0014767383691845922, |
| "grad_norm": 90.2215805053711, |
| "learning_rate": 3e-06, |
| "loss": -2.9395, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.0014777388694347173, |
| "grad_norm": 215.22544860839844, |
| "learning_rate": 3e-06, |
| "loss": -1.6467, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.0014787393696848424, |
| "grad_norm": 88.53108215332031, |
| "learning_rate": 3e-06, |
| "loss": -4.445, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.0014797398699349675, |
| "grad_norm": 111.55303955078125, |
| "learning_rate": 3e-06, |
| "loss": -6.7549, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.0014807403701850926, |
| "grad_norm": 111.30841827392578, |
| "learning_rate": 3e-06, |
| "loss": -2.8563, |
| "step": 1480 |
| }, |
| { |
| "completion_length": 253.06250762939453, |
| "epoch": 0.0014817408704352177, |
| "grad_norm": 83.2706069946289, |
| "learning_rate": 3e-06, |
| "loss": 5.2902, |
| "reward": 0.24458499252796173, |
| "reward_std": 0.1385301575064659, |
| "rewards/sudoku_reward_func": 0.24458499252796173, |
| "step": 1481, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014827413706853428, |
| "grad_norm": 74.04076385498047, |
| "learning_rate": 3e-06, |
| "loss": 7.6081, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.0014837418709354676, |
| "grad_norm": 88.90835571289062, |
| "learning_rate": 3e-06, |
| "loss": 7.3049, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.0014847423711855927, |
| "grad_norm": 92.47587585449219, |
| "learning_rate": 3e-06, |
| "loss": 9.4078, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.0014857428714357178, |
| "grad_norm": 76.9234848022461, |
| "learning_rate": 3e-06, |
| "loss": 4.3459, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.001486743371685843, |
| "grad_norm": 75.98670959472656, |
| "learning_rate": 3e-06, |
| "loss": 6.8549, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.001487743871935968, |
| "grad_norm": 129.2482452392578, |
| "learning_rate": 3e-06, |
| "loss": 5.5137, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.001488744372186093, |
| "grad_norm": 79.61399841308594, |
| "learning_rate": 3e-06, |
| "loss": 8.9599, |
| "step": 1488 |
| }, |
| { |
| "completion_length": 251.125, |
| "epoch": 0.0014897448724362182, |
| "grad_norm": 98.78363037109375, |
| "learning_rate": 3e-06, |
| "loss": 2.7972, |
| "reward": 0.25284092128276825, |
| "reward_std": 0.1391737014055252, |
| "rewards/sudoku_reward_func": 0.25284091383218765, |
| "step": 1489, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001490745372686343, |
| "grad_norm": 85.65895080566406, |
| "learning_rate": 3e-06, |
| "loss": 1.2383, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.0014917458729364682, |
| "grad_norm": 112.22046661376953, |
| "learning_rate": 3e-06, |
| "loss": 5.8015, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.0014927463731865933, |
| "grad_norm": 67.51846313476562, |
| "learning_rate": 3e-06, |
| "loss": 0.434, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.0014937468734367184, |
| "grad_norm": 83.65746307373047, |
| "learning_rate": 3e-06, |
| "loss": 2.4033, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.0014947473736868435, |
| "grad_norm": 94.6855697631836, |
| "learning_rate": 3e-06, |
| "loss": -0.0726, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.0014957478739369685, |
| "grad_norm": 76.3798599243164, |
| "learning_rate": 3e-06, |
| "loss": 5.4418, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.0014967483741870936, |
| "grad_norm": 65.88536071777344, |
| "learning_rate": 3e-06, |
| "loss": -0.3561, |
| "step": 1496 |
| }, |
| { |
| "completion_length": 247.64583587646484, |
| "epoch": 0.0014977488744372185, |
| "grad_norm": 81.58489227294922, |
| "learning_rate": 3e-06, |
| "loss": -0.9713, |
| "reward": 0.19385448843240738, |
| "reward_std": 0.13647598028182983, |
| "rewards/sudoku_reward_func": 0.19385448098182678, |
| "step": 1497, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0014987493746873436, |
| "grad_norm": 89.6312026977539, |
| "learning_rate": 3e-06, |
| "loss": -6.1808, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.0014997498749374687, |
| "grad_norm": 100.32272338867188, |
| "learning_rate": 3e-06, |
| "loss": -2.441, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.0015007503751875938, |
| "grad_norm": 68.68706512451172, |
| "learning_rate": 3e-06, |
| "loss": -3.726, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.0015017508754377189, |
| "grad_norm": 76.70758819580078, |
| "learning_rate": 3e-06, |
| "loss": -1.8507, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.001502751375687844, |
| "grad_norm": 64.95220184326172, |
| "learning_rate": 3e-06, |
| "loss": -7.1908, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.001503751875937969, |
| "grad_norm": 113.56958770751953, |
| "learning_rate": 3e-06, |
| "loss": -4.5099, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.001504752376188094, |
| "grad_norm": 61.816226959228516, |
| "learning_rate": 3e-06, |
| "loss": -4.6618, |
| "step": 1504 |
| }, |
| { |
| "completion_length": 249.0625, |
| "epoch": 0.001505752876438219, |
| "grad_norm": 84.65864562988281, |
| "learning_rate": 3e-06, |
| "loss": -3.4845, |
| "reward": 0.21722058206796646, |
| "reward_std": 0.14737464487552643, |
| "rewards/sudoku_reward_func": 0.21722057461738586, |
| "step": 1505, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015067533766883441, |
| "grad_norm": 114.67820739746094, |
| "learning_rate": 3e-06, |
| "loss": 1.9507, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.0015077538769384692, |
| "grad_norm": 97.06100463867188, |
| "learning_rate": 3e-06, |
| "loss": 0.9134, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.0015087543771885943, |
| "grad_norm": 85.14713287353516, |
| "learning_rate": 3e-06, |
| "loss": -0.671, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.0015097548774387194, |
| "grad_norm": 75.44242095947266, |
| "learning_rate": 3e-06, |
| "loss": -4.8392, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.0015107553776888445, |
| "grad_norm": 98.38030242919922, |
| "learning_rate": 3e-06, |
| "loss": 1.2949, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.0015117558779389694, |
| "grad_norm": 67.19142150878906, |
| "learning_rate": 3e-06, |
| "loss": 0.4119, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.0015127563781890945, |
| "grad_norm": 112.94633483886719, |
| "learning_rate": 3e-06, |
| "loss": -1.4086, |
| "step": 1512 |
| }, |
| { |
| "completion_length": 250.89583587646484, |
| "epoch": 0.0015137568784392196, |
| "grad_norm": 57.52448272705078, |
| "learning_rate": 3e-06, |
| "loss": -5.1907, |
| "reward": 0.23578043282032013, |
| "reward_std": 0.14374738186597824, |
| "rewards/sudoku_reward_func": 0.23578043282032013, |
| "step": 1513, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015147573786893447, |
| "grad_norm": 65.3283920288086, |
| "learning_rate": 3e-06, |
| "loss": -5.2681, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.0015157578789394698, |
| "grad_norm": 125.39384460449219, |
| "learning_rate": 3e-06, |
| "loss": -6.6646, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.0015167583791895949, |
| "grad_norm": 120.92166900634766, |
| "learning_rate": 3e-06, |
| "loss": -6.0842, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.00151775887943972, |
| "grad_norm": 64.44194793701172, |
| "learning_rate": 3e-06, |
| "loss": -5.4585, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.0015187593796898448, |
| "grad_norm": 68.04533386230469, |
| "learning_rate": 3e-06, |
| "loss": -5.6854, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.00151975987993997, |
| "grad_norm": 104.33889770507812, |
| "learning_rate": 3e-06, |
| "loss": -6.8329, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.001520760380190095, |
| "grad_norm": 100.65160369873047, |
| "learning_rate": 3e-06, |
| "loss": -6.3702, |
| "step": 1520 |
| }, |
| { |
| "completion_length": 249.50000762939453, |
| "epoch": 0.0015217608804402201, |
| "grad_norm": 225.0299530029297, |
| "learning_rate": 3e-06, |
| "loss": 2.0878, |
| "reward": 0.293744757771492, |
| "reward_std": 0.19449011981487274, |
| "rewards/sudoku_reward_func": 0.2937447428703308, |
| "step": 1521, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015227613806903452, |
| "grad_norm": 176.6975555419922, |
| "learning_rate": 3e-06, |
| "loss": -2.4676, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.0015237618809404703, |
| "grad_norm": 84.19757080078125, |
| "learning_rate": 3e-06, |
| "loss": -1.3043, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.0015247623811905954, |
| "grad_norm": 86.49832153320312, |
| "learning_rate": 3e-06, |
| "loss": -3.4045, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.0015257628814407203, |
| "grad_norm": 169.01022338867188, |
| "learning_rate": 3e-06, |
| "loss": -0.4526, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.0015267633816908454, |
| "grad_norm": 152.21177673339844, |
| "learning_rate": 3e-06, |
| "loss": -4.8907, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.0015277638819409705, |
| "grad_norm": 86.3626708984375, |
| "learning_rate": 3e-06, |
| "loss": -2.5317, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.0015287643821910956, |
| "grad_norm": 68.27998352050781, |
| "learning_rate": 3e-06, |
| "loss": -4.5565, |
| "step": 1528 |
| }, |
| { |
| "completion_length": 249.5416717529297, |
| "epoch": 0.0015297648824412207, |
| "grad_norm": 123.3398208618164, |
| "learning_rate": 3e-06, |
| "loss": -6.8746, |
| "reward": 0.22935831546783447, |
| "reward_std": 0.1520671397447586, |
| "rewards/sudoku_reward_func": 0.22935831546783447, |
| "step": 1529, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015307653826913457, |
| "grad_norm": 69.14616394042969, |
| "learning_rate": 3e-06, |
| "loss": -4.4294, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.0015317658829414708, |
| "grad_norm": 90.8684310913086, |
| "learning_rate": 3e-06, |
| "loss": -2.9881, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.0015327663831915957, |
| "grad_norm": 72.3403091430664, |
| "learning_rate": 3e-06, |
| "loss": -4.3715, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.0015337668834417208, |
| "grad_norm": 135.02798461914062, |
| "learning_rate": 3e-06, |
| "loss": -5.9151, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.001534767383691846, |
| "grad_norm": 85.45219421386719, |
| "learning_rate": 3e-06, |
| "loss": -4.3543, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.001535767883941971, |
| "grad_norm": 58.03328323364258, |
| "learning_rate": 3e-06, |
| "loss": -3.612, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.001536768384192096, |
| "grad_norm": 56.990333557128906, |
| "learning_rate": 3e-06, |
| "loss": -5.0622, |
| "step": 1536 |
| }, |
| { |
| "completion_length": 254.0, |
| "epoch": 0.0015377688844422212, |
| "grad_norm": 81.3235092163086, |
| "learning_rate": 3e-06, |
| "loss": -10.1589, |
| "reward": 0.29837438464164734, |
| "reward_std": 0.15097320824861526, |
| "rewards/sudoku_reward_func": 0.29837436974048615, |
| "step": 1537, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015387693846923463, |
| "grad_norm": 106.20044708251953, |
| "learning_rate": 3e-06, |
| "loss": -13.3899, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.0015397698849424712, |
| "grad_norm": 74.74054718017578, |
| "learning_rate": 3e-06, |
| "loss": -9.4645, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.0015407703851925963, |
| "grad_norm": 141.25428771972656, |
| "learning_rate": 3e-06, |
| "loss": -12.3875, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.0015417708854427213, |
| "grad_norm": 89.22872924804688, |
| "learning_rate": 3e-06, |
| "loss": -10.428, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.0015427713856928464, |
| "grad_norm": 90.24919128417969, |
| "learning_rate": 3e-06, |
| "loss": -14.6637, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.0015437718859429715, |
| "grad_norm": 89.6397933959961, |
| "learning_rate": 3e-06, |
| "loss": -10.8819, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.0015447723861930966, |
| "grad_norm": 140.13125610351562, |
| "learning_rate": 3e-06, |
| "loss": -14.8474, |
| "step": 1544 |
| }, |
| { |
| "completion_length": 254.2291717529297, |
| "epoch": 0.0015457728864432215, |
| "grad_norm": 83.32096862792969, |
| "learning_rate": 3e-06, |
| "loss": 2.6586, |
| "reward": 0.19969412684440613, |
| "reward_std": 0.1379682719707489, |
| "rewards/sudoku_reward_func": 0.19969411194324493, |
| "step": 1545, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015467733866933466, |
| "grad_norm": 83.81671905517578, |
| "learning_rate": 3e-06, |
| "loss": 3.664, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.0015477738869434717, |
| "grad_norm": 58.57638931274414, |
| "learning_rate": 3e-06, |
| "loss": 4.2643, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.0015487743871935968, |
| "grad_norm": 92.9087905883789, |
| "learning_rate": 3e-06, |
| "loss": 1.3365, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.0015497748874437219, |
| "grad_norm": 88.11719512939453, |
| "learning_rate": 3e-06, |
| "loss": -0.389, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.001550775387693847, |
| "grad_norm": 113.19729614257812, |
| "learning_rate": 3e-06, |
| "loss": 1.6035, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.001551775887943972, |
| "grad_norm": 81.20172882080078, |
| "learning_rate": 3e-06, |
| "loss": 4.2581, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.001552776388194097, |
| "grad_norm": 96.5271987915039, |
| "learning_rate": 3e-06, |
| "loss": -0.6346, |
| "step": 1552 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.001553776888444222, |
| "grad_norm": 142.41549682617188, |
| "learning_rate": 3e-06, |
| "loss": -6.5003, |
| "reward": 0.23867394775152206, |
| "reward_std": 0.1645166575908661, |
| "rewards/sudoku_reward_func": 0.23867394775152206, |
| "step": 1553, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015547773886943471, |
| "grad_norm": 192.58761596679688, |
| "learning_rate": 3e-06, |
| "loss": -4.5212, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.0015557778889444722, |
| "grad_norm": 108.9868392944336, |
| "learning_rate": 3e-06, |
| "loss": -9.6799, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.0015567783891945973, |
| "grad_norm": 92.08625030517578, |
| "learning_rate": 3e-06, |
| "loss": -3.8365, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.0015577788894447224, |
| "grad_norm": 143.28524780273438, |
| "learning_rate": 3e-06, |
| "loss": -6.7169, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.0015587793896948475, |
| "grad_norm": 96.56403350830078, |
| "learning_rate": 3e-06, |
| "loss": -5.0673, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.0015597798899449724, |
| "grad_norm": 109.43830871582031, |
| "learning_rate": 3e-06, |
| "loss": -10.2858, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.0015607803901950975, |
| "grad_norm": 88.09711456298828, |
| "learning_rate": 3e-06, |
| "loss": -4.9645, |
| "step": 1560 |
| }, |
| { |
| "completion_length": 253.18750762939453, |
| "epoch": 0.0015617808904452226, |
| "grad_norm": 82.52039337158203, |
| "learning_rate": 3e-06, |
| "loss": -4.6242, |
| "reward": 0.2604166865348816, |
| "reward_std": 0.13141649216413498, |
| "rewards/sudoku_reward_func": 0.2604166716337204, |
| "step": 1561, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015627813906953477, |
| "grad_norm": 62.24523162841797, |
| "learning_rate": 3e-06, |
| "loss": -6.4732, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.0015637818909454728, |
| "grad_norm": 97.1363525390625, |
| "learning_rate": 3e-06, |
| "loss": -4.7735, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.0015647823911955979, |
| "grad_norm": 92.79659271240234, |
| "learning_rate": 3e-06, |
| "loss": -5.7465, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.001565782891445723, |
| "grad_norm": 75.58289337158203, |
| "learning_rate": 3e-06, |
| "loss": -5.6129, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.0015667833916958478, |
| "grad_norm": 79.69831085205078, |
| "learning_rate": 3e-06, |
| "loss": -6.9765, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.001567783891945973, |
| "grad_norm": 99.76036834716797, |
| "learning_rate": 3e-06, |
| "loss": -5.4218, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.001568784392196098, |
| "grad_norm": 73.5775146484375, |
| "learning_rate": 3e-06, |
| "loss": -6.1781, |
| "step": 1568 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.001569784892446223, |
| "grad_norm": 69.83203125, |
| "learning_rate": 3e-06, |
| "loss": -3.86, |
| "reward": 0.2596275433897972, |
| "reward_std": 0.1274852231144905, |
| "rewards/sudoku_reward_func": 0.259627528488636, |
| "step": 1569, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015707853926963482, |
| "grad_norm": 84.85794067382812, |
| "learning_rate": 3e-06, |
| "loss": -1.9833, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.0015717858929464733, |
| "grad_norm": 74.86754608154297, |
| "learning_rate": 3e-06, |
| "loss": -7.1057, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.0015727863931965984, |
| "grad_norm": 64.1947250366211, |
| "learning_rate": 3e-06, |
| "loss": -2.2024, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.0015737868934467233, |
| "grad_norm": 63.00054168701172, |
| "learning_rate": 3e-06, |
| "loss": -4.747, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.0015747873936968484, |
| "grad_norm": 89.75048065185547, |
| "learning_rate": 3e-06, |
| "loss": -2.3625, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.0015757878939469735, |
| "grad_norm": 82.22975158691406, |
| "learning_rate": 3e-06, |
| "loss": -8.4312, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.0015767883941970985, |
| "grad_norm": 65.96247863769531, |
| "learning_rate": 3e-06, |
| "loss": -2.6335, |
| "step": 1576 |
| }, |
| { |
| "completion_length": 252.08333587646484, |
| "epoch": 0.0015777888944472236, |
| "grad_norm": 116.85649108886719, |
| "learning_rate": 3e-06, |
| "loss": -3.3996, |
| "reward": 0.19888994842767715, |
| "reward_std": 0.13466180860996246, |
| "rewards/sudoku_reward_func": 0.19888994097709656, |
| "step": 1577, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015787893946973487, |
| "grad_norm": 73.25578308105469, |
| "learning_rate": 3e-06, |
| "loss": -3.9517, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.0015797898949474738, |
| "grad_norm": 82.38320922851562, |
| "learning_rate": 3e-06, |
| "loss": 0.7611, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.0015807903951975987, |
| "grad_norm": 77.25627899169922, |
| "learning_rate": 3e-06, |
| "loss": -2.21, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.0015817908954477238, |
| "grad_norm": 82.29784393310547, |
| "learning_rate": 3e-06, |
| "loss": -4.6331, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.001582791395697849, |
| "grad_norm": 75.16769409179688, |
| "learning_rate": 3e-06, |
| "loss": -4.2407, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.001583791895947974, |
| "grad_norm": 71.37103271484375, |
| "learning_rate": 3e-06, |
| "loss": 0.0222, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.001584792396198099, |
| "grad_norm": 87.73910522460938, |
| "learning_rate": 3e-06, |
| "loss": -2.4734, |
| "step": 1584 |
| }, |
| { |
| "completion_length": 251.27084350585938, |
| "epoch": 0.0015857928964482242, |
| "grad_norm": 102.77171325683594, |
| "learning_rate": 3e-06, |
| "loss": -0.3869, |
| "reward": 0.24416036158800125, |
| "reward_std": 0.12802283093333244, |
| "rewards/sudoku_reward_func": 0.24416035413742065, |
| "step": 1585, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015867933966983493, |
| "grad_norm": 111.1654052734375, |
| "learning_rate": 3e-06, |
| "loss": -1.6993, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.0015877938969484741, |
| "grad_norm": 98.75373077392578, |
| "learning_rate": 3e-06, |
| "loss": -1.3497, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.0015887943971985992, |
| "grad_norm": 123.21533203125, |
| "learning_rate": 3e-06, |
| "loss": -0.2909, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.0015897948974487243, |
| "grad_norm": 115.61258697509766, |
| "learning_rate": 3e-06, |
| "loss": -1.0612, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.0015907953976988494, |
| "grad_norm": 103.52163696289062, |
| "learning_rate": 3e-06, |
| "loss": -2.3525, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.0015917958979489745, |
| "grad_norm": 119.00145721435547, |
| "learning_rate": 3e-06, |
| "loss": -1.7235, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.0015927963981990996, |
| "grad_norm": 105.48393249511719, |
| "learning_rate": 3e-06, |
| "loss": -1.6225, |
| "step": 1592 |
| }, |
| { |
| "completion_length": 253.7916717529297, |
| "epoch": 0.0015937968984492247, |
| "grad_norm": 125.09081268310547, |
| "learning_rate": 3e-06, |
| "loss": 5.2883, |
| "reward": 0.250248022377491, |
| "reward_std": 0.12681767344474792, |
| "rewards/sudoku_reward_func": 0.250248022377491, |
| "step": 1593, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0015947973986993496, |
| "grad_norm": 125.6259536743164, |
| "learning_rate": 3e-06, |
| "loss": -1.9275, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.0015957978989494747, |
| "grad_norm": 103.04039764404297, |
| "learning_rate": 3e-06, |
| "loss": -5.8767, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.0015967983991995998, |
| "grad_norm": 123.74522399902344, |
| "learning_rate": 3e-06, |
| "loss": -1.9645, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.0015977988994497249, |
| "grad_norm": 121.4541015625, |
| "learning_rate": 3e-06, |
| "loss": 5.1423, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.00159879939969985, |
| "grad_norm": 133.47821044921875, |
| "learning_rate": 3e-06, |
| "loss": -1.9671, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.001599799899949975, |
| "grad_norm": 105.77659606933594, |
| "learning_rate": 3e-06, |
| "loss": -6.2592, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.0016008004002001002, |
| "grad_norm": 93.46845245361328, |
| "learning_rate": 3e-06, |
| "loss": -2.6713, |
| "step": 1600 |
| }, |
| { |
| "completion_length": 254.5416717529297, |
| "epoch": 0.001601800900450225, |
| "grad_norm": 136.07968139648438, |
| "learning_rate": 3e-06, |
| "loss": -7.7159, |
| "reward": 0.2539908140897751, |
| "reward_std": 0.1405995786190033, |
| "rewards/sudoku_reward_func": 0.2539908140897751, |
| "step": 1601, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016028014007003501, |
| "grad_norm": 160.49053955078125, |
| "learning_rate": 3e-06, |
| "loss": -6.9086, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.0016038019009504752, |
| "grad_norm": 117.0043716430664, |
| "learning_rate": 3e-06, |
| "loss": -2.7393, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.0016048024012006003, |
| "grad_norm": 105.5345687866211, |
| "learning_rate": 3e-06, |
| "loss": -6.4129, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.0016058029014507254, |
| "grad_norm": 108.58619689941406, |
| "learning_rate": 3e-06, |
| "loss": -8.0772, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.0016068034017008505, |
| "grad_norm": 168.36338806152344, |
| "learning_rate": 3e-06, |
| "loss": -9.2435, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.0016078039019509756, |
| "grad_norm": 124.85694122314453, |
| "learning_rate": 3e-06, |
| "loss": -5.0364, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.0016088044022011005, |
| "grad_norm": 91.1465835571289, |
| "learning_rate": 3e-06, |
| "loss": -7.5496, |
| "step": 1608 |
| }, |
| { |
| "completion_length": 250.6041717529297, |
| "epoch": 0.0016098049024512256, |
| "grad_norm": 158.19471740722656, |
| "learning_rate": 3e-06, |
| "loss": -10.0427, |
| "reward": 0.22151951491832733, |
| "reward_std": 0.14843863993883133, |
| "rewards/sudoku_reward_func": 0.22151951491832733, |
| "step": 1609, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016108054027013507, |
| "grad_norm": 140.9024658203125, |
| "learning_rate": 3e-06, |
| "loss": -9.1608, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.0016118059029514758, |
| "grad_norm": 106.24528503417969, |
| "learning_rate": 3e-06, |
| "loss": -15.0477, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.0016128064032016008, |
| "grad_norm": 131.50599670410156, |
| "learning_rate": 3e-06, |
| "loss": -13.0933, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.001613806903451726, |
| "grad_norm": 134.1167755126953, |
| "learning_rate": 3e-06, |
| "loss": -10.9769, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.001614807403701851, |
| "grad_norm": 134.45484924316406, |
| "learning_rate": 3e-06, |
| "loss": -10.1419, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.001615807903951976, |
| "grad_norm": 128.87808227539062, |
| "learning_rate": 3e-06, |
| "loss": -14.6942, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.001616808404202101, |
| "grad_norm": 103.39990997314453, |
| "learning_rate": 3e-06, |
| "loss": -14.5943, |
| "step": 1616 |
| }, |
| { |
| "completion_length": 253.6875, |
| "epoch": 0.001617808904452226, |
| "grad_norm": 117.34486389160156, |
| "learning_rate": 3e-06, |
| "loss": 2.1868, |
| "reward": 0.23395414650440216, |
| "reward_std": 0.1375764161348343, |
| "rewards/sudoku_reward_func": 0.23395412415266037, |
| "step": 1617, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016188094047023512, |
| "grad_norm": 114.16409301757812, |
| "learning_rate": 3e-06, |
| "loss": -3.7694, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.0016198099049524763, |
| "grad_norm": 145.38967895507812, |
| "learning_rate": 3e-06, |
| "loss": -2.0291, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.0016208104052026014, |
| "grad_norm": 120.34522247314453, |
| "learning_rate": 3e-06, |
| "loss": -4.8787, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.0016218109054527263, |
| "grad_norm": 120.42835235595703, |
| "learning_rate": 3e-06, |
| "loss": 2.1333, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.0016228114057028514, |
| "grad_norm": 104.11591339111328, |
| "learning_rate": 3e-06, |
| "loss": -4.6544, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.0016238119059529764, |
| "grad_norm": 143.9630889892578, |
| "learning_rate": 3e-06, |
| "loss": -2.2529, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.0016248124062031015, |
| "grad_norm": 139.08253479003906, |
| "learning_rate": 3e-06, |
| "loss": -5.4911, |
| "step": 1624 |
| }, |
| { |
| "completion_length": 248.95833587646484, |
| "epoch": 0.0016258129064532266, |
| "grad_norm": 106.52497863769531, |
| "learning_rate": 3e-06, |
| "loss": -4.0487, |
| "reward": 0.2699652835726738, |
| "reward_std": 0.13865990936756134, |
| "rewards/sudoku_reward_func": 0.2699652835726738, |
| "step": 1625, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016268134067033517, |
| "grad_norm": 205.29147338867188, |
| "learning_rate": 3e-06, |
| "loss": -6.9653, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.0016278139069534768, |
| "grad_norm": 154.57858276367188, |
| "learning_rate": 3e-06, |
| "loss": 1.8228, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.0016288144072036017, |
| "grad_norm": 107.60708618164062, |
| "learning_rate": 3e-06, |
| "loss": 0.0389, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.0016298149074537268, |
| "grad_norm": 109.25032043457031, |
| "learning_rate": 3e-06, |
| "loss": -4.4817, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.0016308154077038519, |
| "grad_norm": 142.53073120117188, |
| "learning_rate": 3e-06, |
| "loss": -7.5816, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.001631815907953977, |
| "grad_norm": 119.20352935791016, |
| "learning_rate": 3e-06, |
| "loss": 1.6034, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.001632816408204102, |
| "grad_norm": 153.0146484375, |
| "learning_rate": 3e-06, |
| "loss": -1.5781, |
| "step": 1632 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0016338169084542272, |
| "grad_norm": 196.0118408203125, |
| "learning_rate": 3e-06, |
| "loss": 5.2555, |
| "reward": 0.21412037312984467, |
| "reward_std": 0.16516636312007904, |
| "rewards/sudoku_reward_func": 0.21412037312984467, |
| "step": 1633, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016348174087043523, |
| "grad_norm": 120.1426773071289, |
| "learning_rate": 3e-06, |
| "loss": 5.5743, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.0016358179089544771, |
| "grad_norm": 113.17626190185547, |
| "learning_rate": 3e-06, |
| "loss": 2.7044, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.0016368184092046022, |
| "grad_norm": 157.77957153320312, |
| "learning_rate": 3e-06, |
| "loss": 3.0029, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.0016378189094547273, |
| "grad_norm": 142.81764221191406, |
| "learning_rate": 3e-06, |
| "loss": 3.6418, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.0016388194097048524, |
| "grad_norm": 151.52130126953125, |
| "learning_rate": 3e-06, |
| "loss": 3.9227, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.0016398199099549775, |
| "grad_norm": 134.97328186035156, |
| "learning_rate": 3e-06, |
| "loss": 1.3015, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.0016408204102051026, |
| "grad_norm": 128.61497497558594, |
| "learning_rate": 3e-06, |
| "loss": 2.455, |
| "step": 1640 |
| }, |
| { |
| "completion_length": 251.1041717529297, |
| "epoch": 0.0016418209104552277, |
| "grad_norm": 131.1600799560547, |
| "learning_rate": 3e-06, |
| "loss": 1.0932, |
| "reward": 0.2405754178762436, |
| "reward_std": 0.13715411722660065, |
| "rewards/sudoku_reward_func": 0.2405753955245018, |
| "step": 1641, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016428214107053526, |
| "grad_norm": 143.12283325195312, |
| "learning_rate": 3e-06, |
| "loss": 4.3751, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.0016438219109554777, |
| "grad_norm": 156.13925170898438, |
| "learning_rate": 3e-06, |
| "loss": 1.8976, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.0016448224112056028, |
| "grad_norm": 104.6756591796875, |
| "learning_rate": 3e-06, |
| "loss": 6.0342, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.0016458229114557279, |
| "grad_norm": 125.25997924804688, |
| "learning_rate": 3e-06, |
| "loss": -0.2949, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.001646823411705853, |
| "grad_norm": 139.3275146484375, |
| "learning_rate": 3e-06, |
| "loss": 2.4151, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.001647823911955978, |
| "grad_norm": 87.91356658935547, |
| "learning_rate": 3e-06, |
| "loss": 0.6755, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.0016488244122061031, |
| "grad_norm": 156.1615447998047, |
| "learning_rate": 3e-06, |
| "loss": 3.1565, |
| "step": 1648 |
| }, |
| { |
| "completion_length": 253.87500762939453, |
| "epoch": 0.001649824912456228, |
| "grad_norm": 133.72607421875, |
| "learning_rate": 3e-06, |
| "loss": -0.155, |
| "reward": 0.1815100461244583, |
| "reward_std": 0.16103161871433258, |
| "rewards/sudoku_reward_func": 0.1815100461244583, |
| "step": 1649, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016508254127063531, |
| "grad_norm": 283.2836608886719, |
| "learning_rate": 3e-06, |
| "loss": -1.3618, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.0016518259129564782, |
| "grad_norm": 167.18289184570312, |
| "learning_rate": 3e-06, |
| "loss": -7.732, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.0016528264132066033, |
| "grad_norm": 124.50619506835938, |
| "learning_rate": 3e-06, |
| "loss": -7.0091, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.0016538269134567284, |
| "grad_norm": 177.51052856445312, |
| "learning_rate": 3e-06, |
| "loss": -0.1884, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.0016548274137068535, |
| "grad_norm": 92.6873779296875, |
| "learning_rate": 3e-06, |
| "loss": -3.7635, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.0016558279139569786, |
| "grad_norm": 99.19071197509766, |
| "learning_rate": 3e-06, |
| "loss": -7.8231, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.0016568284142071035, |
| "grad_norm": 145.24159240722656, |
| "learning_rate": 3e-06, |
| "loss": -6.9523, |
| "step": 1656 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0016578289144572286, |
| "grad_norm": 149.080322265625, |
| "learning_rate": 3e-06, |
| "loss": -6.2626, |
| "reward": 0.24648644030094147, |
| "reward_std": 0.16061823815107346, |
| "rewards/sudoku_reward_func": 0.24648643285036087, |
| "step": 1657, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016588294147073536, |
| "grad_norm": 109.03119659423828, |
| "learning_rate": 3e-06, |
| "loss": -1.8432, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.0016598299149574787, |
| "grad_norm": 133.4971923828125, |
| "learning_rate": 3e-06, |
| "loss": -3.2182, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.0016608304152076038, |
| "grad_norm": 101.81431579589844, |
| "learning_rate": 3e-06, |
| "loss": -0.4809, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.001661830915457729, |
| "grad_norm": 148.29449462890625, |
| "learning_rate": 3e-06, |
| "loss": -7.8211, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.001662831415707854, |
| "grad_norm": 125.23590850830078, |
| "learning_rate": 3e-06, |
| "loss": -3.2888, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.001663831915957979, |
| "grad_norm": 142.0076904296875, |
| "learning_rate": 3e-06, |
| "loss": -3.8196, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.001664832416208104, |
| "grad_norm": 107.60643005371094, |
| "learning_rate": 3e-06, |
| "loss": -1.6867, |
| "step": 1664 |
| }, |
| { |
| "completion_length": 254.7291717529297, |
| "epoch": 0.001665832916458229, |
| "grad_norm": 227.44874572753906, |
| "learning_rate": 3e-06, |
| "loss": -9.3651, |
| "reward": 0.2640542536973953, |
| "reward_std": 0.17234958708286285, |
| "rewards/sudoku_reward_func": 0.26405423879623413, |
| "step": 1665, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016668334167083542, |
| "grad_norm": 145.90663146972656, |
| "learning_rate": 3e-06, |
| "loss": -6.723, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.0016678339169584793, |
| "grad_norm": 205.7703399658203, |
| "learning_rate": 3e-06, |
| "loss": -11.4297, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.0016688344172086044, |
| "grad_norm": 128.5513153076172, |
| "learning_rate": 3e-06, |
| "loss": -12.0292, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.0016698349174587295, |
| "grad_norm": 217.45556640625, |
| "learning_rate": 3e-06, |
| "loss": -8.7485, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.0016708354177088543, |
| "grad_norm": 123.4818115234375, |
| "learning_rate": 3e-06, |
| "loss": -8.3128, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.0016718359179589794, |
| "grad_norm": 174.58563232421875, |
| "learning_rate": 3e-06, |
| "loss": -11.4492, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.0016728364182091045, |
| "grad_norm": 139.7147674560547, |
| "learning_rate": 3e-06, |
| "loss": -12.6301, |
| "step": 1672 |
| }, |
| { |
| "completion_length": 251.83334350585938, |
| "epoch": 0.0016738369184592296, |
| "grad_norm": 119.95178985595703, |
| "learning_rate": 3e-06, |
| "loss": 1.2102, |
| "reward": 0.2717013955116272, |
| "reward_std": 0.16230863332748413, |
| "rewards/sudoku_reward_func": 0.2717013955116272, |
| "step": 1673, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016748374187093547, |
| "grad_norm": 133.8927459716797, |
| "learning_rate": 3e-06, |
| "loss": -2.6016, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.0016758379189594798, |
| "grad_norm": 154.2207794189453, |
| "learning_rate": 3e-06, |
| "loss": 1.9318, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.001676838419209605, |
| "grad_norm": 114.0203857421875, |
| "learning_rate": 3e-06, |
| "loss": -1.2058, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.0016778389194597298, |
| "grad_norm": 122.6033706665039, |
| "learning_rate": 3e-06, |
| "loss": 0.6539, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.0016788394197098549, |
| "grad_norm": 127.75208282470703, |
| "learning_rate": 3e-06, |
| "loss": -3.674, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.00167983991995998, |
| "grad_norm": 174.46522521972656, |
| "learning_rate": 3e-06, |
| "loss": 0.9551, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.001680840420210105, |
| "grad_norm": 138.7799835205078, |
| "learning_rate": 3e-06, |
| "loss": -1.7151, |
| "step": 1680 |
| }, |
| { |
| "completion_length": 254.6666717529297, |
| "epoch": 0.0016818409204602302, |
| "grad_norm": 151.94314575195312, |
| "learning_rate": 3e-06, |
| "loss": 6.1425, |
| "reward": 0.22201555222272873, |
| "reward_std": 0.13886961340904236, |
| "rewards/sudoku_reward_func": 0.22201555222272873, |
| "step": 1681, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016828414207103552, |
| "grad_norm": 143.5550994873047, |
| "learning_rate": 3e-06, |
| "loss": 4.1679, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.0016838419209604803, |
| "grad_norm": 140.15504455566406, |
| "learning_rate": 3e-06, |
| "loss": 4.78, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.0016848424212106052, |
| "grad_norm": 191.37742614746094, |
| "learning_rate": 3e-06, |
| "loss": 9.2405, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.0016858429214607303, |
| "grad_norm": 178.3914337158203, |
| "learning_rate": 3e-06, |
| "loss": 2.8219, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.0016868434217108554, |
| "grad_norm": 74.8600082397461, |
| "learning_rate": 3e-06, |
| "loss": 2.2428, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.0016878439219609805, |
| "grad_norm": 122.69676971435547, |
| "learning_rate": 3e-06, |
| "loss": 2.1578, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.0016888444222111056, |
| "grad_norm": 189.11329650878906, |
| "learning_rate": 3e-06, |
| "loss": 5.1728, |
| "step": 1688 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0016898449224612307, |
| "grad_norm": 102.0486068725586, |
| "learning_rate": 3e-06, |
| "loss": 1.2396, |
| "reward": 0.24384094774723053, |
| "reward_std": 0.13583382219076157, |
| "rewards/sudoku_reward_func": 0.24384094774723053, |
| "step": 1689, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0016908454227113558, |
| "grad_norm": 65.69036102294922, |
| "learning_rate": 3e-06, |
| "loss": 1.7486, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.0016918459229614807, |
| "grad_norm": 91.4066162109375, |
| "learning_rate": 3e-06, |
| "loss": 1.4402, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.0016928464232116058, |
| "grad_norm": 89.79181671142578, |
| "learning_rate": 3e-06, |
| "loss": 0.494, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.0016938469234617308, |
| "grad_norm": 75.11154174804688, |
| "learning_rate": 3e-06, |
| "loss": 0.0127, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.001694847423711856, |
| "grad_norm": 60.495384216308594, |
| "learning_rate": 3e-06, |
| "loss": 1.491, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.001695847923961981, |
| "grad_norm": 79.2197036743164, |
| "learning_rate": 3e-06, |
| "loss": 1.074, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.0016968484242121061, |
| "grad_norm": 105.94502258300781, |
| "learning_rate": 3e-06, |
| "loss": -0.5889, |
| "step": 1696 |
| }, |
| { |
| "completion_length": 255.8125, |
| "epoch": 0.001697848924462231, |
| "grad_norm": 118.99041748046875, |
| "learning_rate": 3e-06, |
| "loss": -8.0986, |
| "reward": 0.25305887311697006, |
| "reward_std": 0.1613667756319046, |
| "rewards/sudoku_reward_func": 0.25305885821580887, |
| "step": 1697, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001698849424712356, |
| "grad_norm": 81.08747863769531, |
| "learning_rate": 3e-06, |
| "loss": -9.7944, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.0016998499249624812, |
| "grad_norm": 76.02010345458984, |
| "learning_rate": 3e-06, |
| "loss": -8.2251, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.0017008504252126063, |
| "grad_norm": 89.18621063232422, |
| "learning_rate": 3e-06, |
| "loss": -9.8214, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.0017018509254627314, |
| "grad_norm": 112.59184265136719, |
| "learning_rate": 3e-06, |
| "loss": -9.0111, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.0017028514257128565, |
| "grad_norm": 78.08659362792969, |
| "learning_rate": 3e-06, |
| "loss": -10.6981, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.0017038519259629816, |
| "grad_norm": 88.7759780883789, |
| "learning_rate": 3e-06, |
| "loss": -9.1064, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.0017048524262131064, |
| "grad_norm": 80.99433898925781, |
| "learning_rate": 3e-06, |
| "loss": -11.0677, |
| "step": 1704 |
| }, |
| { |
| "completion_length": 253.25, |
| "epoch": 0.0017058529264632315, |
| "grad_norm": 118.51707458496094, |
| "learning_rate": 3e-06, |
| "loss": -0.7235, |
| "reward": 0.2441716343164444, |
| "reward_std": 0.15575607120990753, |
| "rewards/sudoku_reward_func": 0.2441716343164444, |
| "step": 1705, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017068534267133566, |
| "grad_norm": 87.36502838134766, |
| "learning_rate": 3e-06, |
| "loss": 3.1127, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.0017078539269634817, |
| "grad_norm": 82.34584045410156, |
| "learning_rate": 3e-06, |
| "loss": -0.7819, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.0017088544272136068, |
| "grad_norm": 74.79312896728516, |
| "learning_rate": 3e-06, |
| "loss": 2.0932, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.001709854927463732, |
| "grad_norm": 107.77957153320312, |
| "learning_rate": 3e-06, |
| "loss": -1.5862, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.001710855427713857, |
| "grad_norm": 78.9430160522461, |
| "learning_rate": 3e-06, |
| "loss": 2.499, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.0017118559279639819, |
| "grad_norm": 88.64921569824219, |
| "learning_rate": 3e-06, |
| "loss": -1.5229, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.001712856428214107, |
| "grad_norm": 80.30286407470703, |
| "learning_rate": 3e-06, |
| "loss": 1.0471, |
| "step": 1712 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.001713856928464232, |
| "grad_norm": 102.88541412353516, |
| "learning_rate": 3e-06, |
| "loss": 5.9438, |
| "reward": 0.23086145520210266, |
| "reward_std": 0.1465577483177185, |
| "rewards/sudoku_reward_func": 0.23086144030094147, |
| "step": 1713, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017148574287143572, |
| "grad_norm": 96.9382095336914, |
| "learning_rate": 3e-06, |
| "loss": -1.8535, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.0017158579289644823, |
| "grad_norm": 93.00968170166016, |
| "learning_rate": 3e-06, |
| "loss": 2.9559, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.0017168584292146074, |
| "grad_norm": 115.1343002319336, |
| "learning_rate": 3e-06, |
| "loss": 5.5838, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.0017178589294647325, |
| "grad_norm": 117.99451446533203, |
| "learning_rate": 3e-06, |
| "loss": 5.6028, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.0017188594297148573, |
| "grad_norm": 105.47667694091797, |
| "learning_rate": 3e-06, |
| "loss": -2.0703, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.0017198599299649824, |
| "grad_norm": 87.09989166259766, |
| "learning_rate": 3e-06, |
| "loss": 1.6817, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.0017208604302151075, |
| "grad_norm": 111.20448303222656, |
| "learning_rate": 3e-06, |
| "loss": 5.465, |
| "step": 1720 |
| }, |
| { |
| "completion_length": 252.1666717529297, |
| "epoch": 0.0017218609304652326, |
| "grad_norm": 138.27731323242188, |
| "learning_rate": 3e-06, |
| "loss": -15.3219, |
| "reward": 0.23909859359264374, |
| "reward_std": 0.14476260542869568, |
| "rewards/sudoku_reward_func": 0.23909857124090195, |
| "step": 1721, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017228614307153577, |
| "grad_norm": 109.71159362792969, |
| "learning_rate": 3e-06, |
| "loss": -13.3281, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.0017238619309654828, |
| "grad_norm": 103.57756042480469, |
| "learning_rate": 3e-06, |
| "loss": -12.6453, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.001724862431215608, |
| "grad_norm": 126.73169708251953, |
| "learning_rate": 3e-06, |
| "loss": -9.6316, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.0017258629314657328, |
| "grad_norm": 124.72553253173828, |
| "learning_rate": 3e-06, |
| "loss": -15.9503, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.0017268634317158579, |
| "grad_norm": 105.17279052734375, |
| "learning_rate": 3e-06, |
| "loss": -14.3943, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.001727863931965983, |
| "grad_norm": 102.4739990234375, |
| "learning_rate": 3e-06, |
| "loss": -13.3378, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.001728864432216108, |
| "grad_norm": 138.15892028808594, |
| "learning_rate": 3e-06, |
| "loss": -9.9653, |
| "step": 1728 |
| }, |
| { |
| "completion_length": 254.39584350585938, |
| "epoch": 0.0017298649324662331, |
| "grad_norm": 96.33226776123047, |
| "learning_rate": 3e-06, |
| "loss": -11.8497, |
| "reward": 0.2301737666130066, |
| "reward_std": 0.14909013360738754, |
| "rewards/sudoku_reward_func": 0.2301737666130066, |
| "step": 1729, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017308654327163582, |
| "grad_norm": 86.83834838867188, |
| "learning_rate": 3e-06, |
| "loss": -16.6763, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.0017318659329664833, |
| "grad_norm": 101.6478271484375, |
| "learning_rate": 3e-06, |
| "loss": -10.558, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.0017328664332166082, |
| "grad_norm": 93.8929214477539, |
| "learning_rate": 3e-06, |
| "loss": -11.1721, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.0017338669334667333, |
| "grad_norm": 89.75656127929688, |
| "learning_rate": 3e-06, |
| "loss": -12.3965, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.0017348674337168584, |
| "grad_norm": 97.66020202636719, |
| "learning_rate": 3e-06, |
| "loss": -17.2704, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.0017358679339669835, |
| "grad_norm": 88.83231353759766, |
| "learning_rate": 3e-06, |
| "loss": -11.6396, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.0017368684342171086, |
| "grad_norm": 119.40660858154297, |
| "learning_rate": 3e-06, |
| "loss": -11.6067, |
| "step": 1736 |
| }, |
| { |
| "completion_length": 252.1666717529297, |
| "epoch": 0.0017378689344672337, |
| "grad_norm": 119.9146957397461, |
| "learning_rate": 3e-06, |
| "loss": -9.2346, |
| "reward": 0.28505294024944305, |
| "reward_std": 0.1518668606877327, |
| "rewards/sudoku_reward_func": 0.28505291789770126, |
| "step": 1737, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017388694347173588, |
| "grad_norm": 151.11672973632812, |
| "learning_rate": 3e-06, |
| "loss": -5.4549, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.0017398699349674837, |
| "grad_norm": 101.99298095703125, |
| "learning_rate": 3e-06, |
| "loss": -6.1653, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.0017408704352176087, |
| "grad_norm": 143.7563018798828, |
| "learning_rate": 3e-06, |
| "loss": -5.4064, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.0017418709354677338, |
| "grad_norm": 130.0487823486328, |
| "learning_rate": 3e-06, |
| "loss": -10.9143, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.001742871435717859, |
| "grad_norm": 164.75473022460938, |
| "learning_rate": 3e-06, |
| "loss": -7.4136, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.001743871935967984, |
| "grad_norm": 102.13026428222656, |
| "learning_rate": 3e-06, |
| "loss": -7.0587, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.0017448724362181091, |
| "grad_norm": 105.52335357666016, |
| "learning_rate": 3e-06, |
| "loss": -6.7822, |
| "step": 1744 |
| }, |
| { |
| "completion_length": 253.58334350585938, |
| "epoch": 0.0017458729364682342, |
| "grad_norm": 89.50044250488281, |
| "learning_rate": 3e-06, |
| "loss": -1.5483, |
| "reward": 0.22809945046901703, |
| "reward_std": 0.12719708681106567, |
| "rewards/sudoku_reward_func": 0.22809944301843643, |
| "step": 1745, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001746873436718359, |
| "grad_norm": 101.14523315429688, |
| "learning_rate": 3e-06, |
| "loss": -2.5046, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.0017478739369684842, |
| "grad_norm": 62.526153564453125, |
| "learning_rate": 3e-06, |
| "loss": -0.9477, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.0017488744372186093, |
| "grad_norm": 67.7402572631836, |
| "learning_rate": 3e-06, |
| "loss": -7.4768, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.0017498749374687344, |
| "grad_norm": 115.06365203857422, |
| "learning_rate": 3e-06, |
| "loss": -2.7306, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.0017508754377188595, |
| "grad_norm": 87.61795043945312, |
| "learning_rate": 3e-06, |
| "loss": -2.4428, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.0017518759379689846, |
| "grad_norm": 64.8736801147461, |
| "learning_rate": 3e-06, |
| "loss": -1.5924, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.0017528764382191097, |
| "grad_norm": 86.32395935058594, |
| "learning_rate": 3e-06, |
| "loss": -7.8121, |
| "step": 1752 |
| }, |
| { |
| "completion_length": 254.3125, |
| "epoch": 0.0017538769384692345, |
| "grad_norm": 124.07573699951172, |
| "learning_rate": 3e-06, |
| "loss": 0.7756, |
| "reward": 0.24261214584112167, |
| "reward_std": 0.15232361108064651, |
| "rewards/sudoku_reward_func": 0.24261213093996048, |
| "step": 1753, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017548774387193596, |
| "grad_norm": 96.29853820800781, |
| "learning_rate": 3e-06, |
| "loss": -2.1126, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.0017558779389694847, |
| "grad_norm": 90.17716979980469, |
| "learning_rate": 3e-06, |
| "loss": 3.0019, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.0017568784392196098, |
| "grad_norm": 100.68418884277344, |
| "learning_rate": 3e-06, |
| "loss": -1.2563, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.001757878939469735, |
| "grad_norm": 115.46549987792969, |
| "learning_rate": 3e-06, |
| "loss": -0.2155, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.00175887943971986, |
| "grad_norm": 105.72683715820312, |
| "learning_rate": 3e-06, |
| "loss": -2.9011, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.001759879939969985, |
| "grad_norm": 84.93206787109375, |
| "learning_rate": 3e-06, |
| "loss": 2.8186, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.00176088044022011, |
| "grad_norm": 107.25251007080078, |
| "learning_rate": 3e-06, |
| "loss": -1.8318, |
| "step": 1760 |
| }, |
| { |
| "completion_length": 252.8125, |
| "epoch": 0.001761880940470235, |
| "grad_norm": 103.16302490234375, |
| "learning_rate": 3e-06, |
| "loss": -3.5031, |
| "reward": 0.20936673879623413, |
| "reward_std": 0.13528703153133392, |
| "rewards/sudoku_reward_func": 0.20936672389507294, |
| "step": 1761, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017628814407203602, |
| "grad_norm": 148.88758850097656, |
| "learning_rate": 3e-06, |
| "loss": -4.4631, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.0017638819409704853, |
| "grad_norm": 76.8835678100586, |
| "learning_rate": 3e-06, |
| "loss": -4.734, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.0017648824412206103, |
| "grad_norm": 178.9423828125, |
| "learning_rate": 3e-06, |
| "loss": -7.8421, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.0017658829414707354, |
| "grad_norm": 113.23098754882812, |
| "learning_rate": 3e-06, |
| "loss": -4.039, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.0017668834417208605, |
| "grad_norm": 125.62858581542969, |
| "learning_rate": 3e-06, |
| "loss": -5.1876, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.0017678839419709854, |
| "grad_norm": 81.04549407958984, |
| "learning_rate": 3e-06, |
| "loss": -5.3517, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.0017688844422211105, |
| "grad_norm": 155.84767150878906, |
| "learning_rate": 3e-06, |
| "loss": -7.7305, |
| "step": 1768 |
| }, |
| { |
| "completion_length": 253.52084350585938, |
| "epoch": 0.0017698849424712356, |
| "grad_norm": 183.12640380859375, |
| "learning_rate": 3e-06, |
| "loss": 5.874, |
| "reward": 0.23503637313842773, |
| "reward_std": 0.15780103206634521, |
| "rewards/sudoku_reward_func": 0.23503635823726654, |
| "step": 1769, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017708854427213607, |
| "grad_norm": 134.1517333984375, |
| "learning_rate": 3e-06, |
| "loss": 5.0481, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.0017718859429714858, |
| "grad_norm": 200.28854370117188, |
| "learning_rate": 3e-06, |
| "loss": 9.3885, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.0017728864432216109, |
| "grad_norm": 118.0494155883789, |
| "learning_rate": 3e-06, |
| "loss": 11.8479, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.001773886943471736, |
| "grad_norm": 153.0768585205078, |
| "learning_rate": 3e-06, |
| "loss": 4.5118, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.0017748874437218609, |
| "grad_norm": 134.91383361816406, |
| "learning_rate": 3e-06, |
| "loss": 4.0104, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.001775887943971986, |
| "grad_norm": 102.95285034179688, |
| "learning_rate": 3e-06, |
| "loss": 7.8211, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.001776888444222111, |
| "grad_norm": 161.75311279296875, |
| "learning_rate": 3e-06, |
| "loss": 9.3988, |
| "step": 1776 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0017778889444722361, |
| "grad_norm": 76.7910385131836, |
| "learning_rate": 3e-06, |
| "loss": 2.0049, |
| "reward": 0.24829020351171494, |
| "reward_std": 0.1371561922132969, |
| "rewards/sudoku_reward_func": 0.24829020351171494, |
| "step": 1777, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017788894447223612, |
| "grad_norm": 115.39936828613281, |
| "learning_rate": 3e-06, |
| "loss": 0.9716, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.0017798899449724863, |
| "grad_norm": 96.10639953613281, |
| "learning_rate": 3e-06, |
| "loss": -0.8323, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.0017808904452226112, |
| "grad_norm": 98.98098754882812, |
| "learning_rate": 3e-06, |
| "loss": -3.2226, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.0017818909454727363, |
| "grad_norm": 63.91598129272461, |
| "learning_rate": 3e-06, |
| "loss": 1.5459, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.0017828914457228614, |
| "grad_norm": 85.2409896850586, |
| "learning_rate": 3e-06, |
| "loss": 0.9568, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.0017838919459729865, |
| "grad_norm": 92.9672622680664, |
| "learning_rate": 3e-06, |
| "loss": -1.4394, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.0017848924462231116, |
| "grad_norm": 105.71380615234375, |
| "learning_rate": 3e-06, |
| "loss": -3.4215, |
| "step": 1784 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0017858929464732367, |
| "grad_norm": 136.5750274658203, |
| "learning_rate": 3e-06, |
| "loss": -7.522, |
| "reward": 0.23626518994569778, |
| "reward_std": 0.1622900366783142, |
| "rewards/sudoku_reward_func": 0.23626518994569778, |
| "step": 1785, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0017868934467233618, |
| "grad_norm": 206.81781005859375, |
| "learning_rate": 3e-06, |
| "loss": -8.6697, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.0017878939469734866, |
| "grad_norm": 103.22502136230469, |
| "learning_rate": 3e-06, |
| "loss": -13.4631, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.0017888944472236117, |
| "grad_norm": 160.32032775878906, |
| "learning_rate": 3e-06, |
| "loss": -6.0248, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.0017898949474737368, |
| "grad_norm": 138.78746032714844, |
| "learning_rate": 3e-06, |
| "loss": -7.9239, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.001790895447723862, |
| "grad_norm": 199.31930541992188, |
| "learning_rate": 3e-06, |
| "loss": -10.0847, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.001791895947973987, |
| "grad_norm": 106.71943664550781, |
| "learning_rate": 3e-06, |
| "loss": -13.6951, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.0017928964482241121, |
| "grad_norm": 159.4205322265625, |
| "learning_rate": 3e-06, |
| "loss": -6.4479, |
| "step": 1792 |
| }, |
| { |
| "completion_length": 254.1875, |
| "epoch": 0.0017938969484742372, |
| "grad_norm": 96.4322509765625, |
| "learning_rate": 3e-06, |
| "loss": -9.344, |
| "reward": 0.2364831268787384, |
| "reward_std": 0.15473373234272003, |
| "rewards/sudoku_reward_func": 0.2364831268787384, |
| "step": 1793, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001794897448724362, |
| "grad_norm": 153.9132537841797, |
| "learning_rate": 3e-06, |
| "loss": -14.0934, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.0017958979489744872, |
| "grad_norm": 76.87872314453125, |
| "learning_rate": 3e-06, |
| "loss": -13.9281, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.0017968984492246123, |
| "grad_norm": 55.318077087402344, |
| "learning_rate": 3e-06, |
| "loss": -11.7386, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.0017978989494747374, |
| "grad_norm": 121.17082214355469, |
| "learning_rate": 3e-06, |
| "loss": -9.2901, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.0017988994497248625, |
| "grad_norm": 131.7681427001953, |
| "learning_rate": 3e-06, |
| "loss": -14.4251, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.0017998999499749875, |
| "grad_norm": 76.07759857177734, |
| "learning_rate": 3e-06, |
| "loss": -14.1956, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.0018009004502251126, |
| "grad_norm": 62.16264724731445, |
| "learning_rate": 3e-06, |
| "loss": -12.19, |
| "step": 1800 |
| }, |
| { |
| "completion_length": 252.43750762939453, |
| "epoch": 0.0018019009504752375, |
| "grad_norm": 108.04124450683594, |
| "learning_rate": 3e-06, |
| "loss": 5.9732, |
| "reward": 0.25009771436452866, |
| "reward_std": 0.1837964430451393, |
| "rewards/sudoku_reward_func": 0.25009771436452866, |
| "step": 1801, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018029014507253626, |
| "grad_norm": 148.05174255371094, |
| "learning_rate": 3e-06, |
| "loss": 9.4788, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.0018039019509754877, |
| "grad_norm": 158.5712890625, |
| "learning_rate": 3e-06, |
| "loss": 7.4384, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.0018049024512256128, |
| "grad_norm": 84.07239532470703, |
| "learning_rate": 3e-06, |
| "loss": 9.1441, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.001805902951475738, |
| "grad_norm": 108.87689971923828, |
| "learning_rate": 3e-06, |
| "loss": 4.8344, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.001806903451725863, |
| "grad_norm": 143.6099853515625, |
| "learning_rate": 3e-06, |
| "loss": 8.816, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.001807903951975988, |
| "grad_norm": 150.4852752685547, |
| "learning_rate": 3e-06, |
| "loss": 7.8367, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.001808904452226113, |
| "grad_norm": 149.5937042236328, |
| "learning_rate": 3e-06, |
| "loss": 7.4894, |
| "step": 1808 |
| }, |
| { |
| "completion_length": 254.0416717529297, |
| "epoch": 0.001809904952476238, |
| "grad_norm": 63.49519348144531, |
| "learning_rate": 3e-06, |
| "loss": -1.8099, |
| "reward": 0.23842592537403107, |
| "reward_std": 0.12684232741594315, |
| "rewards/sudoku_reward_func": 0.23842591792345047, |
| "step": 1809, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018109054527263631, |
| "grad_norm": 75.89506530761719, |
| "learning_rate": 3e-06, |
| "loss": -5.6479, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.0018119059529764882, |
| "grad_norm": 63.72089385986328, |
| "learning_rate": 3e-06, |
| "loss": -3.8267, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.0018129064532266133, |
| "grad_norm": 119.11578369140625, |
| "learning_rate": 3e-06, |
| "loss": -3.6712, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.0018139069534767384, |
| "grad_norm": 56.39674377441406, |
| "learning_rate": 3e-06, |
| "loss": -2.3428, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.0018149074537268635, |
| "grad_norm": 58.924068450927734, |
| "learning_rate": 3e-06, |
| "loss": -5.7407, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.0018159079539769884, |
| "grad_norm": 52.398563385009766, |
| "learning_rate": 3e-06, |
| "loss": -4.5608, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.0018169084542271135, |
| "grad_norm": 114.20697021484375, |
| "learning_rate": 3e-06, |
| "loss": -4.3273, |
| "step": 1816 |
| }, |
| { |
| "completion_length": 254.875, |
| "epoch": 0.0018179089544772386, |
| "grad_norm": 63.99118423461914, |
| "learning_rate": 3e-06, |
| "loss": -10.1683, |
| "reward": 0.24574241042137146, |
| "reward_std": 0.15981419384479523, |
| "rewards/sudoku_reward_func": 0.24574239552021027, |
| "step": 1817, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018189094547273637, |
| "grad_norm": 54.27163314819336, |
| "learning_rate": 3e-06, |
| "loss": -7.9241, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.0018199099549774888, |
| "grad_norm": 98.15486907958984, |
| "learning_rate": 3e-06, |
| "loss": -5.2589, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.0018209104552276139, |
| "grad_norm": 80.08932495117188, |
| "learning_rate": 3e-06, |
| "loss": -10.737, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.001821910955477739, |
| "grad_norm": 68.28329467773438, |
| "learning_rate": 3e-06, |
| "loss": -10.8951, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.0018229114557278638, |
| "grad_norm": 53.481849670410156, |
| "learning_rate": 3e-06, |
| "loss": -8.5383, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.001823911955977989, |
| "grad_norm": 97.5908203125, |
| "learning_rate": 3e-06, |
| "loss": -5.6582, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.001824912456228114, |
| "grad_norm": 92.74129486083984, |
| "learning_rate": 3e-06, |
| "loss": -11.6666, |
| "step": 1824 |
| }, |
| { |
| "completion_length": 255.45833587646484, |
| "epoch": 0.0018259129564782391, |
| "grad_norm": 68.78041076660156, |
| "learning_rate": 3e-06, |
| "loss": -2.671, |
| "reward": 0.21817130595445633, |
| "reward_std": 0.15033115446567535, |
| "rewards/sudoku_reward_func": 0.21817129850387573, |
| "step": 1825, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018269134567283642, |
| "grad_norm": 55.90199661254883, |
| "learning_rate": 3e-06, |
| "loss": -0.1168, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.0018279139569784893, |
| "grad_norm": 116.6128921508789, |
| "learning_rate": 3e-06, |
| "loss": -2.0107, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.0018289144572286144, |
| "grad_norm": 110.54891967773438, |
| "learning_rate": 3e-06, |
| "loss": -0.969, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.0018299149574787393, |
| "grad_norm": 94.7934341430664, |
| "learning_rate": 3e-06, |
| "loss": -3.8128, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.0018309154577288644, |
| "grad_norm": 64.49011993408203, |
| "learning_rate": 3e-06, |
| "loss": -0.8491, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.0018319159579789895, |
| "grad_norm": 158.86155700683594, |
| "learning_rate": 3e-06, |
| "loss": -4.1421, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.0018329164582291146, |
| "grad_norm": 105.3134536743164, |
| "learning_rate": 3e-06, |
| "loss": -3.6942, |
| "step": 1832 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0018339169584792397, |
| "grad_norm": 83.44578552246094, |
| "learning_rate": 3e-06, |
| "loss": -27.8578, |
| "reward": 0.2110615223646164, |
| "reward_std": 0.14463221281766891, |
| "rewards/sudoku_reward_func": 0.2110615149140358, |
| "step": 1833, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018349174587293648, |
| "grad_norm": 111.02509307861328, |
| "learning_rate": 3e-06, |
| "loss": -22.4974, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.0018359179589794898, |
| "grad_norm": 88.58699798583984, |
| "learning_rate": 3e-06, |
| "loss": -21.2492, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.0018369184592296147, |
| "grad_norm": 79.12022399902344, |
| "learning_rate": 3e-06, |
| "loss": -25.1015, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.0018379189594797398, |
| "grad_norm": 101.9961166381836, |
| "learning_rate": 3e-06, |
| "loss": -28.2455, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.001838919459729865, |
| "grad_norm": 148.4155731201172, |
| "learning_rate": 3e-06, |
| "loss": -22.2837, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.00183991995997999, |
| "grad_norm": 168.54803466796875, |
| "learning_rate": 3e-06, |
| "loss": -21.5606, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.001840920460230115, |
| "grad_norm": 78.82660675048828, |
| "learning_rate": 3e-06, |
| "loss": -26.2552, |
| "step": 1840 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0018419209604802402, |
| "grad_norm": 288.81829833984375, |
| "learning_rate": 3e-06, |
| "loss": -8.0671, |
| "reward": 0.25539247691631317, |
| "reward_std": 0.1576203741133213, |
| "rewards/sudoku_reward_func": 0.2553924694657326, |
| "step": 1841, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018429214607303653, |
| "grad_norm": 133.83079528808594, |
| "learning_rate": 3e-06, |
| "loss": -5.4173, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.0018439219609804902, |
| "grad_norm": 207.3365478515625, |
| "learning_rate": 3e-06, |
| "loss": -12.2163, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.0018449224612306153, |
| "grad_norm": 163.4165496826172, |
| "learning_rate": 3e-06, |
| "loss": -4.0535, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.0018459229614807404, |
| "grad_norm": 273.75286865234375, |
| "learning_rate": 3e-06, |
| "loss": -11.5609, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.0018469234617308654, |
| "grad_norm": 142.3900146484375, |
| "learning_rate": 3e-06, |
| "loss": -7.2858, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.0018479239619809905, |
| "grad_norm": 126.44757843017578, |
| "learning_rate": 3e-06, |
| "loss": -15.2951, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.0018489244622311156, |
| "grad_norm": 206.6569061279297, |
| "learning_rate": 3e-06, |
| "loss": -9.2882, |
| "step": 1848 |
| }, |
| { |
| "completion_length": 254.875, |
| "epoch": 0.0018499249624812407, |
| "grad_norm": 56.17991256713867, |
| "learning_rate": 3e-06, |
| "loss": -5.9357, |
| "reward": 0.2160218358039856, |
| "reward_std": 0.15568187460303307, |
| "rewards/sudoku_reward_func": 0.2160218134522438, |
| "step": 1849, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018509254627313656, |
| "grad_norm": 121.19395446777344, |
| "learning_rate": 3e-06, |
| "loss": -6.2401, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.0018519259629814907, |
| "grad_norm": 84.88428497314453, |
| "learning_rate": 3e-06, |
| "loss": -3.9807, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.0018529264632316158, |
| "grad_norm": 151.30287170410156, |
| "learning_rate": 3e-06, |
| "loss": -1.2683, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.0018539269634817409, |
| "grad_norm": 103.15193939208984, |
| "learning_rate": 3e-06, |
| "loss": -6.3061, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.001854927463731866, |
| "grad_norm": 95.67821502685547, |
| "learning_rate": 3e-06, |
| "loss": -8.745, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.001855927963981991, |
| "grad_norm": 158.7672576904297, |
| "learning_rate": 3e-06, |
| "loss": -3.6772, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.001856928464232116, |
| "grad_norm": 132.406982421875, |
| "learning_rate": 3e-06, |
| "loss": -1.2915, |
| "step": 1856 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.001857928964482241, |
| "grad_norm": 215.91055297851562, |
| "learning_rate": 3e-06, |
| "loss": -1.8523, |
| "reward": 0.23044809699058533, |
| "reward_std": 0.14949989318847656, |
| "rewards/sudoku_reward_func": 0.23044808208942413, |
| "step": 1857, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018589294647323661, |
| "grad_norm": 133.122314453125, |
| "learning_rate": 3e-06, |
| "loss": -3.0281, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.0018599299649824912, |
| "grad_norm": 162.92601013183594, |
| "learning_rate": 3e-06, |
| "loss": -1.8028, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.0018609304652326163, |
| "grad_norm": 132.5195770263672, |
| "learning_rate": 3e-06, |
| "loss": 1.1273, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.0018619309654827414, |
| "grad_norm": 143.70330810546875, |
| "learning_rate": 3e-06, |
| "loss": -4.3285, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.0018629314657328665, |
| "grad_norm": 126.70255279541016, |
| "learning_rate": 3e-06, |
| "loss": -4.0841, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.0018639319659829914, |
| "grad_norm": 194.76414489746094, |
| "learning_rate": 3e-06, |
| "loss": -2.7091, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.0018649324662331165, |
| "grad_norm": 186.4334716796875, |
| "learning_rate": 3e-06, |
| "loss": -0.7513, |
| "step": 1864 |
| }, |
| { |
| "completion_length": 249.2916717529297, |
| "epoch": 0.0018659329664832416, |
| "grad_norm": 172.89756774902344, |
| "learning_rate": 3e-06, |
| "loss": -1.016, |
| "reward": 0.24346517026424408, |
| "reward_std": 0.1537521705031395, |
| "rewards/sudoku_reward_func": 0.24346517026424408, |
| "step": 1865, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018669334667333667, |
| "grad_norm": 140.2328338623047, |
| "learning_rate": 3e-06, |
| "loss": -0.0186, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.0018679339669834918, |
| "grad_norm": 167.56224060058594, |
| "learning_rate": 3e-06, |
| "loss": -1.1759, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.0018689344672336169, |
| "grad_norm": 129.8845977783203, |
| "learning_rate": 3e-06, |
| "loss": 4.8728, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.001869934967483742, |
| "grad_norm": 170.2915802001953, |
| "learning_rate": 3e-06, |
| "loss": -0.5258, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.0018709354677338668, |
| "grad_norm": 114.28946685791016, |
| "learning_rate": 3e-06, |
| "loss": -0.7656, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.001871935967983992, |
| "grad_norm": 186.74124145507812, |
| "learning_rate": 3e-06, |
| "loss": -2.6374, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.001872936468234117, |
| "grad_norm": 153.4672393798828, |
| "learning_rate": 3e-06, |
| "loss": 4.74, |
| "step": 1872 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0018739369684842421, |
| "grad_norm": 164.3131103515625, |
| "learning_rate": 3e-06, |
| "loss": -5.3978, |
| "reward": 0.2078373059630394, |
| "reward_std": 0.15452614426612854, |
| "rewards/sudoku_reward_func": 0.2078373059630394, |
| "step": 1873, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018749374687343672, |
| "grad_norm": 190.09767150878906, |
| "learning_rate": 3e-06, |
| "loss": -3.6008, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.0018759379689844923, |
| "grad_norm": 309.4437255859375, |
| "learning_rate": 3e-06, |
| "loss": -1.9107, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.0018769384692346174, |
| "grad_norm": 193.36318969726562, |
| "learning_rate": 3e-06, |
| "loss": 0.3439, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.0018779389694847423, |
| "grad_norm": 130.9011993408203, |
| "learning_rate": 3e-06, |
| "loss": -6.1956, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.0018789394697348674, |
| "grad_norm": 190.30758666992188, |
| "learning_rate": 3e-06, |
| "loss": -4.9197, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.0018799399699849925, |
| "grad_norm": 171.53958129882812, |
| "learning_rate": 3e-06, |
| "loss": -3.2976, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.0018809404702351176, |
| "grad_norm": 245.52931213378906, |
| "learning_rate": 3e-06, |
| "loss": -3.0038, |
| "step": 1880 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0018819409704852426, |
| "grad_norm": 128.33712768554688, |
| "learning_rate": 3e-06, |
| "loss": -12.8456, |
| "reward": 0.24286392331123352, |
| "reward_std": 0.15595544129610062, |
| "rewards/sudoku_reward_func": 0.24286391586065292, |
| "step": 1881, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018829414707353677, |
| "grad_norm": 120.86560821533203, |
| "learning_rate": 3e-06, |
| "loss": -10.8183, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.0018839419709854928, |
| "grad_norm": 223.6404266357422, |
| "learning_rate": 3e-06, |
| "loss": -17.097, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.0018849424712356177, |
| "grad_norm": 105.8646469116211, |
| "learning_rate": 3e-06, |
| "loss": -11.4175, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.0018859429714857428, |
| "grad_norm": 121.3120346069336, |
| "learning_rate": 3e-06, |
| "loss": -12.857, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.001886943471735868, |
| "grad_norm": 138.72531127929688, |
| "learning_rate": 3e-06, |
| "loss": -11.7301, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.001887943971985993, |
| "grad_norm": 215.96949768066406, |
| "learning_rate": 3e-06, |
| "loss": -17.1948, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.001888944472236118, |
| "grad_norm": 111.08206939697266, |
| "learning_rate": 3e-06, |
| "loss": -12.625, |
| "step": 1888 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0018899449724862432, |
| "grad_norm": 156.65145874023438, |
| "learning_rate": 3e-06, |
| "loss": 0.5665, |
| "reward": 0.25066138803958893, |
| "reward_std": 0.12561482936143875, |
| "rewards/sudoku_reward_func": 0.25066138803958893, |
| "step": 1889, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018909454727363683, |
| "grad_norm": 107.05677032470703, |
| "learning_rate": 3e-06, |
| "loss": -1.7127, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.0018919459729864932, |
| "grad_norm": 131.33168029785156, |
| "learning_rate": 3e-06, |
| "loss": 2.0091, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.0018929464732366182, |
| "grad_norm": 90.71002197265625, |
| "learning_rate": 3e-06, |
| "loss": 4.5182, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.0018939469734867433, |
| "grad_norm": 138.46646118164062, |
| "learning_rate": 3e-06, |
| "loss": 0.3023, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.0018949474737368684, |
| "grad_norm": 117.40104675292969, |
| "learning_rate": 3e-06, |
| "loss": -1.9082, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.0018959479739869935, |
| "grad_norm": 141.14013671875, |
| "learning_rate": 3e-06, |
| "loss": 0.8231, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.0018969484742371186, |
| "grad_norm": 164.23402404785156, |
| "learning_rate": 3e-06, |
| "loss": 4.1673, |
| "step": 1896 |
| }, |
| { |
| "completion_length": 254.20833587646484, |
| "epoch": 0.0018979489744872437, |
| "grad_norm": 177.48680114746094, |
| "learning_rate": 3e-06, |
| "loss": -10.6794, |
| "reward": 0.24317581951618195, |
| "reward_std": 0.14005287736654282, |
| "rewards/sudoku_reward_func": 0.24317579716444016, |
| "step": 1897, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0018989494747373686, |
| "grad_norm": 230.07167053222656, |
| "learning_rate": 3e-06, |
| "loss": -11.7156, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.0018999499749874937, |
| "grad_norm": 184.52452087402344, |
| "learning_rate": 3e-06, |
| "loss": -2.0613, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.0019009504752376188, |
| "grad_norm": 199.63613891601562, |
| "learning_rate": 3e-06, |
| "loss": -9.7111, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.0019019509754877439, |
| "grad_norm": 261.729736328125, |
| "learning_rate": 3e-06, |
| "loss": -12.5346, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.001902951475737869, |
| "grad_norm": 174.4585418701172, |
| "learning_rate": 3e-06, |
| "loss": -14.4528, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.001903951975987994, |
| "grad_norm": 189.9020233154297, |
| "learning_rate": 3e-06, |
| "loss": -3.4384, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.0019049524762381192, |
| "grad_norm": 160.3231964111328, |
| "learning_rate": 3e-06, |
| "loss": -11.0285, |
| "step": 1904 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.001905952976488244, |
| "grad_norm": 193.59310913085938, |
| "learning_rate": 3e-06, |
| "loss": -6.2462, |
| "reward": 0.25355491042137146, |
| "reward_std": 0.16307833790779114, |
| "rewards/sudoku_reward_func": 0.25355489552021027, |
| "step": 1905, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019069534767383691, |
| "grad_norm": 207.5780792236328, |
| "learning_rate": 3e-06, |
| "loss": -6.7655, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.0019079539769884942, |
| "grad_norm": 170.86962890625, |
| "learning_rate": 3e-06, |
| "loss": -10.8921, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.0019089544772386193, |
| "grad_norm": 140.1287841796875, |
| "learning_rate": 3e-06, |
| "loss": -18.6721, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.0019099549774887444, |
| "grad_norm": 270.27630615234375, |
| "learning_rate": 3e-06, |
| "loss": -6.2828, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.0019109554777388695, |
| "grad_norm": 183.57421875, |
| "learning_rate": 3e-06, |
| "loss": -8.3827, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.0019119559779889946, |
| "grad_norm": 210.7086944580078, |
| "learning_rate": 3e-06, |
| "loss": -12.2565, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.0019129564782391195, |
| "grad_norm": 148.4086456298828, |
| "learning_rate": 3e-06, |
| "loss": -19.4607, |
| "step": 1912 |
| }, |
| { |
| "completion_length": 253.125, |
| "epoch": 0.0019139569784892446, |
| "grad_norm": 140.03799438476562, |
| "learning_rate": 3e-06, |
| "loss": 6.7997, |
| "reward": 0.2821180745959282, |
| "reward_std": 0.16406304389238358, |
| "rewards/sudoku_reward_func": 0.282118059694767, |
| "step": 1913, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019149574787393697, |
| "grad_norm": 161.60719299316406, |
| "learning_rate": 3e-06, |
| "loss": -3.7872, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.0019159579789894948, |
| "grad_norm": 165.4402313232422, |
| "learning_rate": 3e-06, |
| "loss": -0.9041, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.0019169584792396198, |
| "grad_norm": 174.52320861816406, |
| "learning_rate": 3e-06, |
| "loss": 5.3668, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.001917958979489745, |
| "grad_norm": 172.8831329345703, |
| "learning_rate": 3e-06, |
| "loss": 5.6679, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.00191895947973987, |
| "grad_norm": 207.86276245117188, |
| "learning_rate": 3e-06, |
| "loss": -4.6441, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.001919959979989995, |
| "grad_norm": 141.2104034423828, |
| "learning_rate": 3e-06, |
| "loss": -2.1364, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.00192096048024012, |
| "grad_norm": 168.36842346191406, |
| "learning_rate": 3e-06, |
| "loss": 4.9658, |
| "step": 1920 |
| }, |
| { |
| "completion_length": 253.95834350585938, |
| "epoch": 0.001921960980490245, |
| "grad_norm": 89.40079498291016, |
| "learning_rate": 3e-06, |
| "loss": -10.3347, |
| "reward": 0.28163330256938934, |
| "reward_std": 0.11973906680941582, |
| "rewards/sudoku_reward_func": 0.28163329511880875, |
| "step": 1921, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019229614807403702, |
| "grad_norm": 155.6529083251953, |
| "learning_rate": 3e-06, |
| "loss": -11.0518, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.0019239619809904953, |
| "grad_norm": 137.2110595703125, |
| "learning_rate": 3e-06, |
| "loss": -12.0976, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.0019249624812406204, |
| "grad_norm": 133.95614624023438, |
| "learning_rate": 3e-06, |
| "loss": -9.0308, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.0019259629814907455, |
| "grad_norm": 108.86006927490234, |
| "learning_rate": 3e-06, |
| "loss": -10.1471, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.0019269634817408704, |
| "grad_norm": 147.3195037841797, |
| "learning_rate": 3e-06, |
| "loss": -12.1494, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.0019279639819909954, |
| "grad_norm": 127.62635803222656, |
| "learning_rate": 3e-06, |
| "loss": -13.4963, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.0019289644822411205, |
| "grad_norm": 123.51240539550781, |
| "learning_rate": 3e-06, |
| "loss": -10.2573, |
| "step": 1928 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0019299649824912456, |
| "grad_norm": 151.781982421875, |
| "learning_rate": 3e-06, |
| "loss": -22.1567, |
| "reward": 0.26459161937236786, |
| "reward_std": 0.17789562046527863, |
| "rewards/sudoku_reward_func": 0.26459160447120667, |
| "step": 1929, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019309654827413707, |
| "grad_norm": 157.08926391601562, |
| "learning_rate": 3e-06, |
| "loss": -19.2895, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.0019319659829914958, |
| "grad_norm": 170.2781219482422, |
| "learning_rate": 3e-06, |
| "loss": -12.0409, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.001932966483241621, |
| "grad_norm": 162.08419799804688, |
| "learning_rate": 3e-06, |
| "loss": -15.2176, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.0019339669834917458, |
| "grad_norm": 162.1764373779297, |
| "learning_rate": 3e-06, |
| "loss": -22.3389, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.0019349674837418709, |
| "grad_norm": 153.0626983642578, |
| "learning_rate": 3e-06, |
| "loss": -20.8159, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.001935967983991996, |
| "grad_norm": 126.0255126953125, |
| "learning_rate": 3e-06, |
| "loss": -13.5504, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.001936968484242121, |
| "grad_norm": 145.0688934326172, |
| "learning_rate": 3e-06, |
| "loss": -16.9399, |
| "step": 1936 |
| }, |
| { |
| "completion_length": 254.14583587646484, |
| "epoch": 0.0019379689844922462, |
| "grad_norm": 110.93611907958984, |
| "learning_rate": 3e-06, |
| "loss": -9.5221, |
| "reward": 0.24819251149892807, |
| "reward_std": 0.13325944542884827, |
| "rewards/sudoku_reward_func": 0.24819249659776688, |
| "step": 1937, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019389694847423713, |
| "grad_norm": 197.55638122558594, |
| "learning_rate": 3e-06, |
| "loss": -11.8475, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.0019399699849924961, |
| "grad_norm": 132.9469757080078, |
| "learning_rate": 3e-06, |
| "loss": -13.7213, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.0019409704852426212, |
| "grad_norm": 145.57211303710938, |
| "learning_rate": 3e-06, |
| "loss": -10.4007, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.0019419709854927463, |
| "grad_norm": 139.77806091308594, |
| "learning_rate": 3e-06, |
| "loss": -11.1832, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.0019429714857428714, |
| "grad_norm": 175.5628662109375, |
| "learning_rate": 3e-06, |
| "loss": -12.1468, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.0019439719859929965, |
| "grad_norm": 179.16539001464844, |
| "learning_rate": 3e-06, |
| "loss": -15.5776, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.0019449724862431216, |
| "grad_norm": 140.24085998535156, |
| "learning_rate": 3e-06, |
| "loss": -11.1777, |
| "step": 1944 |
| }, |
| { |
| "completion_length": 255.95833587646484, |
| "epoch": 0.0019459729864932467, |
| "grad_norm": 171.1797332763672, |
| "learning_rate": 3e-06, |
| "loss": -2.8557, |
| "reward": 0.24582508206367493, |
| "reward_std": 0.15176919847726822, |
| "rewards/sudoku_reward_func": 0.24582506716251373, |
| "step": 1945, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019469734867433716, |
| "grad_norm": 192.55026245117188, |
| "learning_rate": 3e-06, |
| "loss": -0.0457, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.0019479739869934967, |
| "grad_norm": 227.40118408203125, |
| "learning_rate": 3e-06, |
| "loss": -1.2861, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.0019489744872436218, |
| "grad_norm": 251.78111267089844, |
| "learning_rate": 3e-06, |
| "loss": 0.6302, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.0019499749874937469, |
| "grad_norm": 199.07493591308594, |
| "learning_rate": 3e-06, |
| "loss": -3.8908, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.001950975487743872, |
| "grad_norm": 179.95046997070312, |
| "learning_rate": 3e-06, |
| "loss": -1.2324, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.001951975987993997, |
| "grad_norm": 189.5157470703125, |
| "learning_rate": 3e-06, |
| "loss": -3.2241, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.0019529764882441221, |
| "grad_norm": 246.80743408203125, |
| "learning_rate": 3e-06, |
| "loss": -0.6323, |
| "step": 1952 |
| }, |
| { |
| "completion_length": 255.9166717529297, |
| "epoch": 0.001953976988494247, |
| "grad_norm": 133.2589569091797, |
| "learning_rate": 3e-06, |
| "loss": 2.4421, |
| "reward": 0.2585640847682953, |
| "reward_std": 0.1397685706615448, |
| "rewards/sudoku_reward_func": 0.2585640847682953, |
| "step": 1953, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019549774887443723, |
| "grad_norm": 144.25059509277344, |
| "learning_rate": 3e-06, |
| "loss": -2.5594, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.001955977988994497, |
| "grad_norm": 193.25198364257812, |
| "learning_rate": 3e-06, |
| "loss": 1.5943, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.0019569784892446225, |
| "grad_norm": 146.21253967285156, |
| "learning_rate": 3e-06, |
| "loss": -0.3917, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.0019579789894947474, |
| "grad_norm": 157.32179260253906, |
| "learning_rate": 3e-06, |
| "loss": 1.1938, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.0019589794897448723, |
| "grad_norm": 131.586669921875, |
| "learning_rate": 3e-06, |
| "loss": -3.8024, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.0019599799899949976, |
| "grad_norm": 246.2342071533203, |
| "learning_rate": 3e-06, |
| "loss": 0.7853, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.0019609804902451225, |
| "grad_norm": 115.52106475830078, |
| "learning_rate": 3e-06, |
| "loss": -2.1448, |
| "step": 1960 |
| }, |
| { |
| "completion_length": 255.83333587646484, |
| "epoch": 0.0019619809904952478, |
| "grad_norm": 189.30178833007812, |
| "learning_rate": 3e-06, |
| "loss": 1.9125, |
| "reward": 0.2099228948354721, |
| "reward_std": 0.13680537045001984, |
| "rewards/sudoku_reward_func": 0.2099228873848915, |
| "step": 1961, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019629814907453727, |
| "grad_norm": 292.6360168457031, |
| "learning_rate": 3e-06, |
| "loss": 8.2895, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.001963981990995498, |
| "grad_norm": 222.63095092773438, |
| "learning_rate": 3e-06, |
| "loss": 2.543, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.001964982491245623, |
| "grad_norm": 154.17628479003906, |
| "learning_rate": 3e-06, |
| "loss": 3.8953, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.0019659829914957477, |
| "grad_norm": 155.28807067871094, |
| "learning_rate": 3e-06, |
| "loss": 1.0186, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.001966983491745873, |
| "grad_norm": 280.9315185546875, |
| "learning_rate": 3e-06, |
| "loss": 6.3139, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.001967983991995998, |
| "grad_norm": 150.9969482421875, |
| "learning_rate": 3e-06, |
| "loss": 1.5908, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.001968984492246123, |
| "grad_norm": 167.98971557617188, |
| "learning_rate": 3e-06, |
| "loss": 2.7659, |
| "step": 1968 |
| }, |
| { |
| "completion_length": 253.33334350585938, |
| "epoch": 0.001969984992496248, |
| "grad_norm": 195.38546752929688, |
| "learning_rate": 3e-06, |
| "loss": -7.7222, |
| "reward": 0.21969321370124817, |
| "reward_std": 0.14372562617063522, |
| "rewards/sudoku_reward_func": 0.21969321370124817, |
| "step": 1969, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019709854927463734, |
| "grad_norm": 204.2120361328125, |
| "learning_rate": 3e-06, |
| "loss": -23.4097, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.0019719859929964983, |
| "grad_norm": 177.75064086914062, |
| "learning_rate": 3e-06, |
| "loss": -13.5707, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.001972986493246623, |
| "grad_norm": 199.55172729492188, |
| "learning_rate": 3e-06, |
| "loss": -13.0697, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.0019739869934967485, |
| "grad_norm": 187.18035888671875, |
| "learning_rate": 3e-06, |
| "loss": -7.5141, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.0019749874937468733, |
| "grad_norm": 167.5812225341797, |
| "learning_rate": 3e-06, |
| "loss": -23.3993, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.0019759879939969987, |
| "grad_norm": 222.2601776123047, |
| "learning_rate": 3e-06, |
| "loss": -14.8587, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.0019769884942471235, |
| "grad_norm": 158.96644592285156, |
| "learning_rate": 3e-06, |
| "loss": -11.5531, |
| "step": 1976 |
| }, |
| { |
| "completion_length": 255.9166717529297, |
| "epoch": 0.0019779889944972484, |
| "grad_norm": 211.06991577148438, |
| "learning_rate": 3e-06, |
| "loss": -8.5541, |
| "reward": 0.2523975223302841, |
| "reward_std": 0.17239974439144135, |
| "rewards/sudoku_reward_func": 0.25239749997854233, |
| "step": 1977, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0019789894947473737, |
| "grad_norm": 296.3025817871094, |
| "learning_rate": 3e-06, |
| "loss": -12.2237, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.0019799899949974986, |
| "grad_norm": 373.58056640625, |
| "learning_rate": 3e-06, |
| "loss": -15.4929, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.001980990495247624, |
| "grad_norm": 185.84808349609375, |
| "learning_rate": 3e-06, |
| "loss": -11.0735, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.001981990995497749, |
| "grad_norm": 243.6155548095703, |
| "learning_rate": 3e-06, |
| "loss": -8.8876, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.001982991495747874, |
| "grad_norm": 293.4427185058594, |
| "learning_rate": 3e-06, |
| "loss": -13.6924, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.001983991995997999, |
| "grad_norm": 197.66085815429688, |
| "learning_rate": 3e-06, |
| "loss": -18.5818, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.001984992496248124, |
| "grad_norm": 185.08505249023438, |
| "learning_rate": 3e-06, |
| "loss": -13.7895, |
| "step": 1984 |
| }, |
| { |
| "completion_length": 255.9166717529297, |
| "epoch": 0.001985992996498249, |
| "grad_norm": 195.1097412109375, |
| "learning_rate": 3e-06, |
| "loss": -13.2296, |
| "reward": 0.24486306309700012, |
| "reward_std": 0.1268276385962963, |
| "rewards/sudoku_reward_func": 0.24486306309700012, |
| "step": 1985, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001986993496748374, |
| "grad_norm": 169.93081665039062, |
| "learning_rate": 3e-06, |
| "loss": -8.3141, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.0019879939969984993, |
| "grad_norm": 173.13099670410156, |
| "learning_rate": 3e-06, |
| "loss": -10.358, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.0019889944972486242, |
| "grad_norm": 197.01596069335938, |
| "learning_rate": 3e-06, |
| "loss": -4.8024, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.0019899949974987495, |
| "grad_norm": 211.9609832763672, |
| "learning_rate": 3e-06, |
| "loss": -13.3649, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.0019909954977488744, |
| "grad_norm": 186.90805053710938, |
| "learning_rate": 3e-06, |
| "loss": -9.3327, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.0019919959979989993, |
| "grad_norm": 78.69964599609375, |
| "learning_rate": 3e-06, |
| "loss": -11.159, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.0019929964982491246, |
| "grad_norm": 201.1640167236328, |
| "learning_rate": 3e-06, |
| "loss": -5.9724, |
| "step": 1992 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0019939969984992495, |
| "grad_norm": 125.78178405761719, |
| "learning_rate": 3e-06, |
| "loss": -6.6924, |
| "reward": 0.2843502163887024, |
| "reward_std": 0.13726307824254036, |
| "rewards/sudoku_reward_func": 0.2843502014875412, |
| "step": 1993, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.001994997498749375, |
| "grad_norm": 177.04168701171875, |
| "learning_rate": 3e-06, |
| "loss": -1.6726, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.0019959979989994997, |
| "grad_norm": 305.504150390625, |
| "learning_rate": 3e-06, |
| "loss": -7.7491, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.001996998499249625, |
| "grad_norm": 113.48680877685547, |
| "learning_rate": 3e-06, |
| "loss": -0.761, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.00199799899949975, |
| "grad_norm": 108.60614013671875, |
| "learning_rate": 3e-06, |
| "loss": -7.7709, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.0019989994997498747, |
| "grad_norm": 105.45082092285156, |
| "learning_rate": 3e-06, |
| "loss": -4.1487, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.002, |
| "grad_norm": 213.99227905273438, |
| "learning_rate": 3e-06, |
| "loss": -9.8322, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.002001000500250125, |
| "grad_norm": 134.13356018066406, |
| "learning_rate": 3e-06, |
| "loss": -2.2642, |
| "step": 2000 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0020020010005002502, |
| "grad_norm": 242.3455352783203, |
| "learning_rate": 3e-06, |
| "loss": -2.1762, |
| "reward": 0.2152777761220932, |
| "reward_std": 0.161978080868721, |
| "rewards/sudoku_reward_func": 0.2152777686715126, |
| "step": 2001, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002003001500750375, |
| "grad_norm": 409.4090576171875, |
| "learning_rate": 3e-06, |
| "loss": -4.2201, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.0020040020010005004, |
| "grad_norm": 174.99839782714844, |
| "learning_rate": 3e-06, |
| "loss": 1.8727, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.0020050025012506253, |
| "grad_norm": 214.46160888671875, |
| "learning_rate": 3e-06, |
| "loss": 8.1319, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.00200600300150075, |
| "grad_norm": 235.8225860595703, |
| "learning_rate": 3e-06, |
| "loss": -2.0186, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.0020070035017508755, |
| "grad_norm": 435.18310546875, |
| "learning_rate": 3e-06, |
| "loss": -3.7778, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.0020080040020010004, |
| "grad_norm": 188.9642791748047, |
| "learning_rate": 3e-06, |
| "loss": 1.6308, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.0020090045022511257, |
| "grad_norm": 254.4283447265625, |
| "learning_rate": 3e-06, |
| "loss": 7.1249, |
| "step": 2008 |
| }, |
| { |
| "completion_length": 254.81250762939453, |
| "epoch": 0.0020100050025012505, |
| "grad_norm": 307.87603759765625, |
| "learning_rate": 3e-06, |
| "loss": 6.6596, |
| "reward": 0.2307787761092186, |
| "reward_std": 0.15682782232761383, |
| "rewards/sudoku_reward_func": 0.2307787761092186, |
| "step": 2009, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002011005502751376, |
| "grad_norm": 192.25518798828125, |
| "learning_rate": 3e-06, |
| "loss": -0.1586, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.0020120060030015007, |
| "grad_norm": 180.306640625, |
| "learning_rate": 3e-06, |
| "loss": 0.0719, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.0020130065032516256, |
| "grad_norm": 170.99240112304688, |
| "learning_rate": 3e-06, |
| "loss": 5.0591, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.002014007003501751, |
| "grad_norm": 346.23883056640625, |
| "learning_rate": 3e-06, |
| "loss": 4.0824, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.002015007503751876, |
| "grad_norm": 311.57666015625, |
| "learning_rate": 3e-06, |
| "loss": -3.0988, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.002016008004002001, |
| "grad_norm": 158.6090545654297, |
| "learning_rate": 3e-06, |
| "loss": -4.0589, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.002017008504252126, |
| "grad_norm": 157.3676300048828, |
| "learning_rate": 3e-06, |
| "loss": 2.9554, |
| "step": 2016 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0020180090045022513, |
| "grad_norm": 161.91995239257812, |
| "learning_rate": 3e-06, |
| "loss": -4.3946, |
| "reward": 0.30390965938568115, |
| "reward_std": 0.15090640634298325, |
| "rewards/sudoku_reward_func": 0.30390962958335876, |
| "step": 2017, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002019009504752376, |
| "grad_norm": 165.71839904785156, |
| "learning_rate": 3e-06, |
| "loss": -1.4257, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.002020010005002501, |
| "grad_norm": 328.2276916503906, |
| "learning_rate": 3e-06, |
| "loss": 5.6413, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.0020210105052526264, |
| "grad_norm": 206.0242919921875, |
| "learning_rate": 3e-06, |
| "loss": 6.9081, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.0020220110055027512, |
| "grad_norm": 207.88768005371094, |
| "learning_rate": 3e-06, |
| "loss": -1.5048, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.0020230115057528766, |
| "grad_norm": 157.71121215820312, |
| "learning_rate": 3e-06, |
| "loss": -2.585, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.0020240120060030014, |
| "grad_norm": 158.0244140625, |
| "learning_rate": 3e-06, |
| "loss": 2.7574, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.0020250125062531267, |
| "grad_norm": 270.9903564453125, |
| "learning_rate": 3e-06, |
| "loss": 3.6836, |
| "step": 2024 |
| }, |
| { |
| "completion_length": 253.5625, |
| "epoch": 0.0020260130065032516, |
| "grad_norm": 123.82396697998047, |
| "learning_rate": 3e-06, |
| "loss": 8.2977, |
| "reward": 0.20379765331745148, |
| "reward_std": 0.13752370700240135, |
| "rewards/sudoku_reward_func": 0.20379765331745148, |
| "step": 2025, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020270135067533765, |
| "grad_norm": 147.5820770263672, |
| "learning_rate": 3e-06, |
| "loss": 2.0875, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.002028014007003502, |
| "grad_norm": 164.91355895996094, |
| "learning_rate": 3e-06, |
| "loss": 4.2679, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.0020290145072536267, |
| "grad_norm": 204.01260375976562, |
| "learning_rate": 3e-06, |
| "loss": 2.1948, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.002030015007503752, |
| "grad_norm": 155.6641387939453, |
| "learning_rate": 3e-06, |
| "loss": 8.1561, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.002031015507753877, |
| "grad_norm": 145.83570861816406, |
| "learning_rate": 3e-06, |
| "loss": 1.8471, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.002032016008004002, |
| "grad_norm": 129.6785888671875, |
| "learning_rate": 3e-06, |
| "loss": 3.1864, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.002033016508254127, |
| "grad_norm": 136.57089233398438, |
| "learning_rate": 3e-06, |
| "loss": 0.1599, |
| "step": 2032 |
| }, |
| { |
| "completion_length": 255.39584350585938, |
| "epoch": 0.002034017008504252, |
| "grad_norm": 256.8916015625, |
| "learning_rate": 3e-06, |
| "loss": 1.5327, |
| "reward": 0.2647569626569748, |
| "reward_std": 0.15292219817638397, |
| "rewards/sudoku_reward_func": 0.2647569477558136, |
| "step": 2033, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020350175087543772, |
| "grad_norm": 221.5432586669922, |
| "learning_rate": 3e-06, |
| "loss": -0.6326, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.002036018009004502, |
| "grad_norm": 115.88921356201172, |
| "learning_rate": 3e-06, |
| "loss": -8.1754, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.0020370185092546274, |
| "grad_norm": 155.23406982421875, |
| "learning_rate": 3e-06, |
| "loss": -1.9255, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.0020380190095047523, |
| "grad_norm": 174.22718811035156, |
| "learning_rate": 3e-06, |
| "loss": 0.6805, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.0020390195097548776, |
| "grad_norm": 271.7087097167969, |
| "learning_rate": 3e-06, |
| "loss": -0.6905, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.0020400200100050025, |
| "grad_norm": 117.21466827392578, |
| "learning_rate": 3e-06, |
| "loss": -9.2192, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.0020410205102551274, |
| "grad_norm": 161.93284606933594, |
| "learning_rate": 3e-06, |
| "loss": -3.25, |
| "step": 2040 |
| }, |
| { |
| "completion_length": 253.25, |
| "epoch": 0.0020420210105052527, |
| "grad_norm": 508.3278503417969, |
| "learning_rate": 3e-06, |
| "loss": -4.7108, |
| "reward": 0.26471560448408127, |
| "reward_std": 0.17860908806324005, |
| "rewards/sudoku_reward_func": 0.26471560448408127, |
| "step": 2041, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020430215107553776, |
| "grad_norm": 267.98602294921875, |
| "learning_rate": 3e-06, |
| "loss": -8.4861, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.002044022011005503, |
| "grad_norm": 243.9029083251953, |
| "learning_rate": 3e-06, |
| "loss": -3.0436, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.0020450225112556277, |
| "grad_norm": 202.64193725585938, |
| "learning_rate": 3e-06, |
| "loss": -8.469, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.002046023011505753, |
| "grad_norm": 304.8101501464844, |
| "learning_rate": 3e-06, |
| "loss": -7.1241, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.002047023511755878, |
| "grad_norm": 294.1930847167969, |
| "learning_rate": 3e-06, |
| "loss": -7.5035, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.002048024012006003, |
| "grad_norm": 237.90809631347656, |
| "learning_rate": 3e-06, |
| "loss": -5.4658, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.002049024512256128, |
| "grad_norm": 154.24185180664062, |
| "learning_rate": 3e-06, |
| "loss": -8.7881, |
| "step": 2048 |
| }, |
| { |
| "completion_length": 253.4375, |
| "epoch": 0.002050025012506253, |
| "grad_norm": 111.57600402832031, |
| "learning_rate": 3e-06, |
| "loss": -13.8521, |
| "reward": 0.22884725779294968, |
| "reward_std": 0.1646890565752983, |
| "rewards/sudoku_reward_func": 0.22884725779294968, |
| "step": 2049, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020510255127563783, |
| "grad_norm": 177.33627319335938, |
| "learning_rate": 3e-06, |
| "loss": -8.5202, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.002052026013006503, |
| "grad_norm": 189.1322479248047, |
| "learning_rate": 3e-06, |
| "loss": -11.823, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.0020530265132566285, |
| "grad_norm": 319.8341064453125, |
| "learning_rate": 3e-06, |
| "loss": -13.3699, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.0020540270135067534, |
| "grad_norm": 97.67252349853516, |
| "learning_rate": 3e-06, |
| "loss": -14.6061, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.0020550275137568783, |
| "grad_norm": 141.40914916992188, |
| "learning_rate": 3e-06, |
| "loss": -9.0696, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.0020560280140070036, |
| "grad_norm": 165.0148468017578, |
| "learning_rate": 3e-06, |
| "loss": -13.5339, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.0020570285142571284, |
| "grad_norm": 279.2270202636719, |
| "learning_rate": 3e-06, |
| "loss": -15.6225, |
| "step": 2056 |
| }, |
| { |
| "completion_length": 254.58333587646484, |
| "epoch": 0.0020580290145072538, |
| "grad_norm": 149.38491821289062, |
| "learning_rate": 3e-06, |
| "loss": 6.731, |
| "reward": 0.2059771865606308, |
| "reward_std": 0.13232478126883507, |
| "rewards/sudoku_reward_func": 0.2059771791100502, |
| "step": 2057, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020590295147573786, |
| "grad_norm": 115.99948120117188, |
| "learning_rate": 3e-06, |
| "loss": 4.0264, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.002060030015007504, |
| "grad_norm": 121.04714965820312, |
| "learning_rate": 3e-06, |
| "loss": 2.4383, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.002061030515257629, |
| "grad_norm": 261.35858154296875, |
| "learning_rate": 3e-06, |
| "loss": 4.7794, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.0020620310155077537, |
| "grad_norm": 161.84022521972656, |
| "learning_rate": 3e-06, |
| "loss": 6.2386, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.002063031515757879, |
| "grad_norm": 105.91159057617188, |
| "learning_rate": 3e-06, |
| "loss": 3.5483, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.002064032016008004, |
| "grad_norm": 104.31018829345703, |
| "learning_rate": 3e-06, |
| "loss": 1.5695, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.002065032516258129, |
| "grad_norm": 207.45989990234375, |
| "learning_rate": 3e-06, |
| "loss": 3.7625, |
| "step": 2064 |
| }, |
| { |
| "completion_length": 254.83333587646484, |
| "epoch": 0.002066033016508254, |
| "grad_norm": 109.15100860595703, |
| "learning_rate": 3e-06, |
| "loss": -15.7269, |
| "reward": 0.26253609359264374, |
| "reward_std": 0.13556809723377228, |
| "rewards/sudoku_reward_func": 0.26253607869148254, |
| "step": 2065, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020670335167583794, |
| "grad_norm": 360.2762145996094, |
| "learning_rate": 3e-06, |
| "loss": -14.5243, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.0020680340170085043, |
| "grad_norm": 135.55064392089844, |
| "learning_rate": 3e-06, |
| "loss": -9.1608, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.002069034517258629, |
| "grad_norm": 163.361328125, |
| "learning_rate": 3e-06, |
| "loss": -8.2301, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.0020700350175087544, |
| "grad_norm": 125.7318115234375, |
| "learning_rate": 3e-06, |
| "loss": -16.3416, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.0020710355177588793, |
| "grad_norm": 258.9736633300781, |
| "learning_rate": 3e-06, |
| "loss": -16.9302, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.0020720360180090046, |
| "grad_norm": 140.67019653320312, |
| "learning_rate": 3e-06, |
| "loss": -10.9255, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.0020730365182591295, |
| "grad_norm": 165.7702178955078, |
| "learning_rate": 3e-06, |
| "loss": -10.728, |
| "step": 2072 |
| }, |
| { |
| "completion_length": 255.9375, |
| "epoch": 0.002074037018509255, |
| "grad_norm": 620.3129272460938, |
| "learning_rate": 3e-06, |
| "loss": 1.4765, |
| "reward": 0.25063884258270264, |
| "reward_std": 0.16608520597219467, |
| "rewards/sudoku_reward_func": 0.25063882768154144, |
| "step": 2073, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020750375187593797, |
| "grad_norm": 172.11087036132812, |
| "learning_rate": 3e-06, |
| "loss": -14.5391, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.0020760380190095046, |
| "grad_norm": 191.67532348632812, |
| "learning_rate": 3e-06, |
| "loss": -5.3397, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.00207703851925963, |
| "grad_norm": 294.1646423339844, |
| "learning_rate": 3e-06, |
| "loss": -8.6712, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.0020780390195097548, |
| "grad_norm": 597.5181274414062, |
| "learning_rate": 3e-06, |
| "loss": 0.3632, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.00207903951975988, |
| "grad_norm": 167.82606506347656, |
| "learning_rate": 3e-06, |
| "loss": -16.7106, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.002080040020010005, |
| "grad_norm": 219.1627960205078, |
| "learning_rate": 3e-06, |
| "loss": -6.4416, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.0020810405202601303, |
| "grad_norm": 258.1119689941406, |
| "learning_rate": 3e-06, |
| "loss": -12.8399, |
| "step": 2080 |
| }, |
| { |
| "completion_length": 254.9375, |
| "epoch": 0.002082041020510255, |
| "grad_norm": 178.03309631347656, |
| "learning_rate": 3e-06, |
| "loss": -6.8223, |
| "reward": 0.23602844774723053, |
| "reward_std": 0.15177836269140244, |
| "rewards/sudoku_reward_func": 0.23602844029664993, |
| "step": 2081, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.00208304152076038, |
| "grad_norm": 131.65509033203125, |
| "learning_rate": 3e-06, |
| "loss": -4.6811, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.0020840420210105053, |
| "grad_norm": 318.57958984375, |
| "learning_rate": 3e-06, |
| "loss": 1.6984, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.00208504252126063, |
| "grad_norm": 277.5106506347656, |
| "learning_rate": 3e-06, |
| "loss": 2.3091, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.0020860430215107555, |
| "grad_norm": 152.53269958496094, |
| "learning_rate": 3e-06, |
| "loss": -7.6793, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.0020870435217608804, |
| "grad_norm": 236.90296936035156, |
| "learning_rate": 3e-06, |
| "loss": -5.8063, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.0020880440220110057, |
| "grad_norm": 309.64208984375, |
| "learning_rate": 3e-06, |
| "loss": -0.1638, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.0020890445222611306, |
| "grad_norm": 289.9593505859375, |
| "learning_rate": 3e-06, |
| "loss": -0.9137, |
| "step": 2088 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0020900450225112555, |
| "grad_norm": 195.6490478515625, |
| "learning_rate": 3e-06, |
| "loss": 14.6669, |
| "reward": 0.2501089870929718, |
| "reward_std": 0.15444093942642212, |
| "rewards/sudoku_reward_func": 0.2501089796423912, |
| "step": 2089, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0020910455227613808, |
| "grad_norm": 189.63902282714844, |
| "learning_rate": 3e-06, |
| "loss": 20.3839, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.0020920460230115056, |
| "grad_norm": 183.08782958984375, |
| "learning_rate": 3e-06, |
| "loss": 22.8672, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.002093046523261631, |
| "grad_norm": 167.2587890625, |
| "learning_rate": 3e-06, |
| "loss": 16.6521, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.002094047023511756, |
| "grad_norm": 142.52476501464844, |
| "learning_rate": 3e-06, |
| "loss": 13.5591, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.002095047523761881, |
| "grad_norm": 143.21453857421875, |
| "learning_rate": 3e-06, |
| "loss": 17.2013, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.002096048024012006, |
| "grad_norm": 148.69102478027344, |
| "learning_rate": 3e-06, |
| "loss": 19.4678, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.002097048524262131, |
| "grad_norm": 120.7786865234375, |
| "learning_rate": 3e-06, |
| "loss": 14.8181, |
| "step": 2096 |
| }, |
| { |
| "completion_length": 253.52084350585938, |
| "epoch": 0.002098049024512256, |
| "grad_norm": 128.90467834472656, |
| "learning_rate": 3e-06, |
| "loss": -18.5774, |
| "reward": 0.24012070894241333, |
| "reward_std": 0.1463899165391922, |
| "rewards/sudoku_reward_func": 0.24012070894241333, |
| "step": 2097, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002099049524762381, |
| "grad_norm": 162.4427490234375, |
| "learning_rate": 3e-06, |
| "loss": -23.592, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.0021000500250125064, |
| "grad_norm": 142.89620971679688, |
| "learning_rate": 3e-06, |
| "loss": -22.294, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.0021010505252626313, |
| "grad_norm": 160.10720825195312, |
| "learning_rate": 3e-06, |
| "loss": -22.4738, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.0021020510255127566, |
| "grad_norm": 125.08181762695312, |
| "learning_rate": 3e-06, |
| "loss": -18.162, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.0021030515257628815, |
| "grad_norm": 141.7852020263672, |
| "learning_rate": 3e-06, |
| "loss": -23.9315, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.0021040520260130063, |
| "grad_norm": 120.26238250732422, |
| "learning_rate": 3e-06, |
| "loss": -23.2946, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.0021050525262631316, |
| "grad_norm": 155.07212829589844, |
| "learning_rate": 3e-06, |
| "loss": -24.0824, |
| "step": 2104 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0021060530265132565, |
| "grad_norm": 159.4878387451172, |
| "learning_rate": 3e-06, |
| "loss": -12.6954, |
| "reward": 0.25752314925193787, |
| "reward_std": 0.1472446396946907, |
| "rewards/sudoku_reward_func": 0.25752314925193787, |
| "step": 2105, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002107053526763382, |
| "grad_norm": 103.97441101074219, |
| "learning_rate": 3e-06, |
| "loss": -11.32, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.0021080540270135067, |
| "grad_norm": 144.20181274414062, |
| "learning_rate": 3e-06, |
| "loss": -12.5571, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.002109054527263632, |
| "grad_norm": 117.40840148925781, |
| "learning_rate": 3e-06, |
| "loss": -11.9496, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.002110055027513757, |
| "grad_norm": 138.6615447998047, |
| "learning_rate": 3e-06, |
| "loss": -14.2046, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.0021110555277638818, |
| "grad_norm": 77.72933197021484, |
| "learning_rate": 3e-06, |
| "loss": -12.6948, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.002112056028014007, |
| "grad_norm": 397.400146484375, |
| "learning_rate": 3e-06, |
| "loss": -14.8182, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.002113056528264132, |
| "grad_norm": 225.19723510742188, |
| "learning_rate": 3e-06, |
| "loss": -15.2176, |
| "step": 2112 |
| }, |
| { |
| "completion_length": 253.6666717529297, |
| "epoch": 0.0021140570285142573, |
| "grad_norm": 125.04232788085938, |
| "learning_rate": 3e-06, |
| "loss": -4.1518, |
| "reward": 0.23790735751390457, |
| "reward_std": 0.13891858607530594, |
| "rewards/sudoku_reward_func": 0.23790735751390457, |
| "step": 2113, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002115057528764382, |
| "grad_norm": 134.87939453125, |
| "learning_rate": 3e-06, |
| "loss": -13.3869, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.0021160580290145075, |
| "grad_norm": 135.5575714111328, |
| "learning_rate": 3e-06, |
| "loss": -7.0319, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.0021170585292646323, |
| "grad_norm": 163.67832946777344, |
| "learning_rate": 3e-06, |
| "loss": -15.5896, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.0021180590295147572, |
| "grad_norm": 157.14891052246094, |
| "learning_rate": 3e-06, |
| "loss": -5.0652, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.0021190595297648825, |
| "grad_norm": 382.6764831542969, |
| "learning_rate": 3e-06, |
| "loss": -11.7335, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.0021200600300150074, |
| "grad_norm": 279.73651123046875, |
| "learning_rate": 3e-06, |
| "loss": -6.3247, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.0021210605302651327, |
| "grad_norm": 188.39959716796875, |
| "learning_rate": 3e-06, |
| "loss": -15.4458, |
| "step": 2120 |
| }, |
| { |
| "completion_length": 255.95834350585938, |
| "epoch": 0.0021220610305152576, |
| "grad_norm": 232.8545379638672, |
| "learning_rate": 3e-06, |
| "loss": -7.1581, |
| "reward": 0.27688343822956085, |
| "reward_std": 0.1465592235326767, |
| "rewards/sudoku_reward_func": 0.27688342332839966, |
| "step": 2121, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002123061530765383, |
| "grad_norm": 172.3257598876953, |
| "learning_rate": 3e-06, |
| "loss": -11.4882, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.0021240620310155078, |
| "grad_norm": 188.06607055664062, |
| "learning_rate": 3e-06, |
| "loss": -13.6338, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.0021250625312656327, |
| "grad_norm": 204.9393768310547, |
| "learning_rate": 3e-06, |
| "loss": -20.2453, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.002126063031515758, |
| "grad_norm": 244.41342163085938, |
| "learning_rate": 3e-06, |
| "loss": -9.4582, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.002127063531765883, |
| "grad_norm": 189.36769104003906, |
| "learning_rate": 3e-06, |
| "loss": -13.526, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.002128064032016008, |
| "grad_norm": 194.4271697998047, |
| "learning_rate": 3e-06, |
| "loss": -15.8828, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.002129064532266133, |
| "grad_norm": 203.0721435546875, |
| "learning_rate": 3e-06, |
| "loss": -22.4253, |
| "step": 2128 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0021300650325162583, |
| "grad_norm": 198.96873474121094, |
| "learning_rate": 3e-06, |
| "loss": 2.4782, |
| "reward": 0.3081071227788925, |
| "reward_std": 0.1305009424686432, |
| "rewards/sudoku_reward_func": 0.3081071227788925, |
| "step": 2129, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021310655327663832, |
| "grad_norm": 225.70071411132812, |
| "learning_rate": 3e-06, |
| "loss": -3.3406, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.002132066033016508, |
| "grad_norm": 182.7887725830078, |
| "learning_rate": 3e-06, |
| "loss": -6.5365, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.0021330665332666334, |
| "grad_norm": 256.8357238769531, |
| "learning_rate": 3e-06, |
| "loss": -6.6293, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.0021340670335167583, |
| "grad_norm": 175.3928680419922, |
| "learning_rate": 3e-06, |
| "loss": 2.0084, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.0021350675337668836, |
| "grad_norm": 236.942138671875, |
| "learning_rate": 3e-06, |
| "loss": -4.0, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.0021360680340170085, |
| "grad_norm": 124.70699310302734, |
| "learning_rate": 3e-06, |
| "loss": -8.0161, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.0021370685342671334, |
| "grad_norm": 208.55020141601562, |
| "learning_rate": 3e-06, |
| "loss": -7.0063, |
| "step": 2136 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0021380690345172587, |
| "grad_norm": 451.7789001464844, |
| "learning_rate": 3e-06, |
| "loss": 23.7947, |
| "reward": 0.22189154475927353, |
| "reward_std": 0.1443362608551979, |
| "rewards/sudoku_reward_func": 0.22189154475927353, |
| "step": 2137, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021390695347673835, |
| "grad_norm": 239.3140106201172, |
| "learning_rate": 3e-06, |
| "loss": 24.3879, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.002140070035017509, |
| "grad_norm": 414.748779296875, |
| "learning_rate": 3e-06, |
| "loss": 29.8622, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.0021410705352676337, |
| "grad_norm": 366.05584716796875, |
| "learning_rate": 3e-06, |
| "loss": 24.7021, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.002142071035517759, |
| "grad_norm": 233.06585693359375, |
| "learning_rate": 3e-06, |
| "loss": 24.4789, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.002143071535767884, |
| "grad_norm": 322.3249816894531, |
| "learning_rate": 3e-06, |
| "loss": 22.6371, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.002144072036018009, |
| "grad_norm": 310.0884094238281, |
| "learning_rate": 3e-06, |
| "loss": 27.6157, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.002145072536268134, |
| "grad_norm": 295.13995361328125, |
| "learning_rate": 3e-06, |
| "loss": 21.8875, |
| "step": 2144 |
| }, |
| { |
| "completion_length": 253.62500762939453, |
| "epoch": 0.002146073036518259, |
| "grad_norm": 271.6358337402344, |
| "learning_rate": 3e-06, |
| "loss": 1.5988, |
| "reward": 0.22103850543498993, |
| "reward_std": 0.17497707903385162, |
| "rewards/sudoku_reward_func": 0.22103849798440933, |
| "step": 2145, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021470735367683843, |
| "grad_norm": 412.4436950683594, |
| "learning_rate": 3e-06, |
| "loss": 11.7546, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.002148074037018509, |
| "grad_norm": 187.46017456054688, |
| "learning_rate": 3e-06, |
| "loss": 3.9364, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.0021490745372686345, |
| "grad_norm": 193.0413055419922, |
| "learning_rate": 3e-06, |
| "loss": -0.1165, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.0021500750375187594, |
| "grad_norm": 305.0658874511719, |
| "learning_rate": 3e-06, |
| "loss": 1.0991, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.0021510755377688842, |
| "grad_norm": 441.3357849121094, |
| "learning_rate": 3e-06, |
| "loss": 10.5642, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.0021520760380190095, |
| "grad_norm": 196.80613708496094, |
| "learning_rate": 3e-06, |
| "loss": 2.8893, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.0021530765382691344, |
| "grad_norm": 197.22515869140625, |
| "learning_rate": 3e-06, |
| "loss": -0.7456, |
| "step": 2152 |
| }, |
| { |
| "completion_length": 250.6666717529297, |
| "epoch": 0.0021540770385192597, |
| "grad_norm": 162.09518432617188, |
| "learning_rate": 3e-06, |
| "loss": -16.1398, |
| "reward": 0.2230752632021904, |
| "reward_std": 0.15181417018175125, |
| "rewards/sudoku_reward_func": 0.2230752483010292, |
| "step": 2153, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021550775387693846, |
| "grad_norm": 153.7283477783203, |
| "learning_rate": 3e-06, |
| "loss": -15.0541, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.00215607803901951, |
| "grad_norm": 188.96493530273438, |
| "learning_rate": 3e-06, |
| "loss": -26.0853, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.002157078539269635, |
| "grad_norm": 177.75912475585938, |
| "learning_rate": 3e-06, |
| "loss": -5.8482, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.0021580790395197597, |
| "grad_norm": 166.35536193847656, |
| "learning_rate": 3e-06, |
| "loss": -16.6153, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.002159079539769885, |
| "grad_norm": 130.83131408691406, |
| "learning_rate": 3e-06, |
| "loss": -15.7326, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.00216008004002001, |
| "grad_norm": 237.99929809570312, |
| "learning_rate": 3e-06, |
| "loss": -24.3056, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.002161080540270135, |
| "grad_norm": 227.08934020996094, |
| "learning_rate": 3e-06, |
| "loss": -7.8361, |
| "step": 2160 |
| }, |
| { |
| "completion_length": 255.9166717529297, |
| "epoch": 0.00216208104052026, |
| "grad_norm": 372.7931213378906, |
| "learning_rate": 3e-06, |
| "loss": -11.8087, |
| "reward": 0.2559824585914612, |
| "reward_std": 0.12336140125989914, |
| "rewards/sudoku_reward_func": 0.2559824511408806, |
| "step": 2161, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021630815407703854, |
| "grad_norm": 267.86474609375, |
| "learning_rate": 3e-06, |
| "loss": -11.9906, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.0021640820410205102, |
| "grad_norm": 111.28511810302734, |
| "learning_rate": 3e-06, |
| "loss": -13.1757, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.002165082541270635, |
| "grad_norm": 184.33836364746094, |
| "learning_rate": 3e-06, |
| "loss": -12.4034, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.0021660830415207604, |
| "grad_norm": 167.9713592529297, |
| "learning_rate": 3e-06, |
| "loss": -15.7842, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.0021670835417708853, |
| "grad_norm": 193.59280395507812, |
| "learning_rate": 3e-06, |
| "loss": -12.9642, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.0021680840420210106, |
| "grad_norm": 100.986572265625, |
| "learning_rate": 3e-06, |
| "loss": -13.2477, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.0021690845422711355, |
| "grad_norm": 180.26425170898438, |
| "learning_rate": 3e-06, |
| "loss": -13.685, |
| "step": 2168 |
| }, |
| { |
| "completion_length": 255.81250762939453, |
| "epoch": 0.002170085042521261, |
| "grad_norm": 257.19793701171875, |
| "learning_rate": 3e-06, |
| "loss": 1.9957, |
| "reward": 0.25946594774723053, |
| "reward_std": 0.15108813345432281, |
| "rewards/sudoku_reward_func": 0.25946594774723053, |
| "step": 2169, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021710855427713857, |
| "grad_norm": 203.61459350585938, |
| "learning_rate": 3e-06, |
| "loss": 8.0578, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.0021720860430215106, |
| "grad_norm": 166.8609619140625, |
| "learning_rate": 3e-06, |
| "loss": -4.9759, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.002173086543271636, |
| "grad_norm": 230.0701446533203, |
| "learning_rate": 3e-06, |
| "loss": -0.9933, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.0021740870435217607, |
| "grad_norm": 291.85162353515625, |
| "learning_rate": 3e-06, |
| "loss": 1.5621, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.002175087543771886, |
| "grad_norm": 176.3345184326172, |
| "learning_rate": 3e-06, |
| "loss": 6.0896, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.002176088044022011, |
| "grad_norm": 147.5749969482422, |
| "learning_rate": 3e-06, |
| "loss": -6.063, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.0021770885442721362, |
| "grad_norm": 166.72695922851562, |
| "learning_rate": 3e-06, |
| "loss": -1.8773, |
| "step": 2176 |
| }, |
| { |
| "completion_length": 255.89583587646484, |
| "epoch": 0.002178089044522261, |
| "grad_norm": 123.5069808959961, |
| "learning_rate": 3e-06, |
| "loss": -15.7872, |
| "reward": 0.19102109223604202, |
| "reward_std": 0.13286863267421722, |
| "rewards/sudoku_reward_func": 0.19102108478546143, |
| "step": 2177, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002179089544772386, |
| "grad_norm": 148.22042846679688, |
| "learning_rate": 3e-06, |
| "loss": -17.1538, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.0021800900450225113, |
| "grad_norm": 216.6683807373047, |
| "learning_rate": 3e-06, |
| "loss": -19.8152, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.002181090545272636, |
| "grad_norm": 206.0824737548828, |
| "learning_rate": 3e-06, |
| "loss": -20.6436, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.0021820910455227615, |
| "grad_norm": 155.43673706054688, |
| "learning_rate": 3e-06, |
| "loss": -17.2832, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.0021830915457728864, |
| "grad_norm": 108.36864471435547, |
| "learning_rate": 3e-06, |
| "loss": -17.956, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.0021840920460230117, |
| "grad_norm": 176.97877502441406, |
| "learning_rate": 3e-06, |
| "loss": -21.1888, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.0021850925462731366, |
| "grad_norm": 140.15602111816406, |
| "learning_rate": 3e-06, |
| "loss": -22.7284, |
| "step": 2184 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0021860930465232614, |
| "grad_norm": 201.99246215820312, |
| "learning_rate": 3e-06, |
| "loss": -2.748, |
| "reward": 0.229414701461792, |
| "reward_std": 0.09967641159892082, |
| "rewards/sudoku_reward_func": 0.2294146865606308, |
| "step": 2185, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0021870935467733867, |
| "grad_norm": 114.0577621459961, |
| "learning_rate": 3e-06, |
| "loss": -4.1496, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.0021880940470235116, |
| "grad_norm": 118.37448120117188, |
| "learning_rate": 3e-06, |
| "loss": -9.0165, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.002189094547273637, |
| "grad_norm": 160.56996154785156, |
| "learning_rate": 3e-06, |
| "loss": -3.3018, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.002190095047523762, |
| "grad_norm": 196.15040588378906, |
| "learning_rate": 3e-06, |
| "loss": -5.2191, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.002191095547773887, |
| "grad_norm": 166.47886657714844, |
| "learning_rate": 3e-06, |
| "loss": -5.5121, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.002192096048024012, |
| "grad_norm": 133.74618530273438, |
| "learning_rate": 3e-06, |
| "loss": -10.0543, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.002193096548274137, |
| "grad_norm": 275.3774108886719, |
| "learning_rate": 3e-06, |
| "loss": -6.3413, |
| "step": 2192 |
| }, |
| { |
| "completion_length": 255.93750762939453, |
| "epoch": 0.002194097048524262, |
| "grad_norm": 241.90306091308594, |
| "learning_rate": 3e-06, |
| "loss": -25.2367, |
| "reward": 0.2638888955116272, |
| "reward_std": 0.1872684732079506, |
| "rewards/sudoku_reward_func": 0.2638888955116272, |
| "step": 2193, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002195097548774387, |
| "grad_norm": 348.1951904296875, |
| "learning_rate": 3e-06, |
| "loss": -10.1058, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.0021960980490245124, |
| "grad_norm": 345.2223205566406, |
| "learning_rate": 3e-06, |
| "loss": -28.1819, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.0021970985492746373, |
| "grad_norm": 194.86376953125, |
| "learning_rate": 3e-06, |
| "loss": -12.9991, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.0021980990495247626, |
| "grad_norm": 380.6177978515625, |
| "learning_rate": 3e-06, |
| "loss": -25.8226, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.0021990995497748874, |
| "grad_norm": 203.87657165527344, |
| "learning_rate": 3e-06, |
| "loss": -11.971, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.0022001000500250123, |
| "grad_norm": 202.9221649169922, |
| "learning_rate": 3e-06, |
| "loss": -32.1544, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.0022011005502751376, |
| "grad_norm": 315.40374755859375, |
| "learning_rate": 3e-06, |
| "loss": -13.7635, |
| "step": 2200 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0022021010505252625, |
| "grad_norm": 392.06121826171875, |
| "learning_rate": 3e-06, |
| "loss": -4.4377, |
| "reward": 0.26651185750961304, |
| "reward_std": 0.11879193782806396, |
| "rewards/sudoku_reward_func": 0.26651184260845184, |
| "step": 2201, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002203101550775388, |
| "grad_norm": 110.69683837890625, |
| "learning_rate": 3e-06, |
| "loss": -6.0209, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.0022041020510255127, |
| "grad_norm": 144.53733825683594, |
| "learning_rate": 3e-06, |
| "loss": -7.4633, |
| "step": 2203 |
| }, |
| { |
| "epoch": 0.002205102551275638, |
| "grad_norm": 192.95216369628906, |
| "learning_rate": 3e-06, |
| "loss": -7.0989, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.002206103051525763, |
| "grad_norm": 206.1815948486328, |
| "learning_rate": 3e-06, |
| "loss": -9.3645, |
| "step": 2205 |
| }, |
| { |
| "epoch": 0.0022071035517758878, |
| "grad_norm": 107.30200958251953, |
| "learning_rate": 3e-06, |
| "loss": -7.0976, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.002208104052026013, |
| "grad_norm": 149.86318969726562, |
| "learning_rate": 3e-06, |
| "loss": -9.3709, |
| "step": 2207 |
| }, |
| { |
| "epoch": 0.002209104552276138, |
| "grad_norm": 229.2109375, |
| "learning_rate": 3e-06, |
| "loss": -7.4561, |
| "step": 2208 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0022101050525262633, |
| "grad_norm": 305.0406188964844, |
| "learning_rate": 3e-06, |
| "loss": 2.6266, |
| "reward": 0.2063116356730461, |
| "reward_std": 0.1306929923593998, |
| "rewards/sudoku_reward_func": 0.2063116356730461, |
| "step": 2209, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002211105552776388, |
| "grad_norm": 203.2418670654297, |
| "learning_rate": 3e-06, |
| "loss": 6.8086, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.0022121060530265134, |
| "grad_norm": 121.47610473632812, |
| "learning_rate": 3e-06, |
| "loss": 6.2834, |
| "step": 2211 |
| }, |
| { |
| "epoch": 0.0022131065532766383, |
| "grad_norm": 227.87969970703125, |
| "learning_rate": 3e-06, |
| "loss": 11.4717, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.002214107053526763, |
| "grad_norm": 240.4669189453125, |
| "learning_rate": 3e-06, |
| "loss": 1.7777, |
| "step": 2213 |
| }, |
| { |
| "epoch": 0.0022151075537768885, |
| "grad_norm": 134.55894470214844, |
| "learning_rate": 3e-06, |
| "loss": 6.5632, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.0022161080540270134, |
| "grad_norm": 113.4019546508789, |
| "learning_rate": 3e-06, |
| "loss": 5.987, |
| "step": 2215 |
| }, |
| { |
| "epoch": 0.0022171085542771387, |
| "grad_norm": 127.93946075439453, |
| "learning_rate": 3e-06, |
| "loss": 10.0904, |
| "step": 2216 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0022181090545272636, |
| "grad_norm": 160.65890502929688, |
| "learning_rate": 3e-06, |
| "loss": -14.7841, |
| "reward": 0.25946594774723053, |
| "reward_std": 0.1456310823559761, |
| "rewards/sudoku_reward_func": 0.25946593284606934, |
| "step": 2217, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002219109554777389, |
| "grad_norm": 150.86474609375, |
| "learning_rate": 3e-06, |
| "loss": -12.5941, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.0022201100550275138, |
| "grad_norm": 130.49571228027344, |
| "learning_rate": 3e-06, |
| "loss": -6.8444, |
| "step": 2219 |
| }, |
| { |
| "epoch": 0.0022211105552776386, |
| "grad_norm": 211.9480743408203, |
| "learning_rate": 3e-06, |
| "loss": -9.5312, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.002222111055527764, |
| "grad_norm": 160.16744995117188, |
| "learning_rate": 3e-06, |
| "loss": -16.0951, |
| "step": 2221 |
| }, |
| { |
| "epoch": 0.002223111555777889, |
| "grad_norm": 140.18873596191406, |
| "learning_rate": 3e-06, |
| "loss": -13.801, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.002224112056028014, |
| "grad_norm": 130.58078002929688, |
| "learning_rate": 3e-06, |
| "loss": -8.401, |
| "step": 2223 |
| }, |
| { |
| "epoch": 0.002225112556278139, |
| "grad_norm": 153.523193359375, |
| "learning_rate": 3e-06, |
| "loss": -10.8995, |
| "step": 2224 |
| }, |
| { |
| "completion_length": 255.93750762939453, |
| "epoch": 0.0022261130565282643, |
| "grad_norm": 314.0987243652344, |
| "learning_rate": 3e-06, |
| "loss": 56.8432, |
| "reward": 0.2909226268529892, |
| "reward_std": 0.15687301754951477, |
| "rewards/sudoku_reward_func": 0.2909226268529892, |
| "step": 2225, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002227113556778389, |
| "grad_norm": 327.9870300292969, |
| "learning_rate": 3e-06, |
| "loss": 52.9292, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.002228114057028514, |
| "grad_norm": 322.2725830078125, |
| "learning_rate": 3e-06, |
| "loss": 50.9084, |
| "step": 2227 |
| }, |
| { |
| "epoch": 0.0022291145572786394, |
| "grad_norm": 303.08941650390625, |
| "learning_rate": 3e-06, |
| "loss": 49.6696, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.0022301150575287643, |
| "grad_norm": 304.82855224609375, |
| "learning_rate": 3e-06, |
| "loss": 55.3564, |
| "step": 2229 |
| }, |
| { |
| "epoch": 0.0022311155577788896, |
| "grad_norm": 333.8318786621094, |
| "learning_rate": 3e-06, |
| "loss": 49.5088, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.0022321160580290145, |
| "grad_norm": 315.6372375488281, |
| "learning_rate": 3e-06, |
| "loss": 46.0253, |
| "step": 2231 |
| }, |
| { |
| "epoch": 0.0022331165582791398, |
| "grad_norm": 282.1758117675781, |
| "learning_rate": 3e-06, |
| "loss": 43.9942, |
| "step": 2232 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0022341170585292646, |
| "grad_norm": 149.5388946533203, |
| "learning_rate": 3e-06, |
| "loss": 19.3477, |
| "reward": 0.30179397761821747, |
| "reward_std": 0.1400880292057991, |
| "rewards/sudoku_reward_func": 0.30179397761821747, |
| "step": 2233, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0022351175587793895, |
| "grad_norm": 148.94393920898438, |
| "learning_rate": 3e-06, |
| "loss": 18.7291, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.002236118059029515, |
| "grad_norm": 110.72798156738281, |
| "learning_rate": 3e-06, |
| "loss": 15.4445, |
| "step": 2235 |
| }, |
| { |
| "epoch": 0.0022371185592796397, |
| "grad_norm": 127.52186584472656, |
| "learning_rate": 3e-06, |
| "loss": 14.8732, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.002238119059529765, |
| "grad_norm": 142.9965057373047, |
| "learning_rate": 3e-06, |
| "loss": 17.0905, |
| "step": 2237 |
| }, |
| { |
| "epoch": 0.00223911955977989, |
| "grad_norm": 118.69670867919922, |
| "learning_rate": 3e-06, |
| "loss": 16.1556, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.002240120060030015, |
| "grad_norm": 104.7170639038086, |
| "learning_rate": 3e-06, |
| "loss": 13.162, |
| "step": 2239 |
| }, |
| { |
| "epoch": 0.00224112056028014, |
| "grad_norm": 127.44586944580078, |
| "learning_rate": 3e-06, |
| "loss": 12.5951, |
| "step": 2240 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.002242121060530265, |
| "grad_norm": 173.59176635742188, |
| "learning_rate": 3e-06, |
| "loss": -16.5816, |
| "reward": 0.25268685817718506, |
| "reward_std": 0.145609550178051, |
| "rewards/sudoku_reward_func": 0.25268684327602386, |
| "step": 2241, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0022431215607803903, |
| "grad_norm": 95.00807189941406, |
| "learning_rate": 3e-06, |
| "loss": -11.9075, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.002244122061030515, |
| "grad_norm": 94.31062316894531, |
| "learning_rate": 3e-06, |
| "loss": -14.3515, |
| "step": 2243 |
| }, |
| { |
| "epoch": 0.0022451225612806405, |
| "grad_norm": 93.2145767211914, |
| "learning_rate": 3e-06, |
| "loss": -17.7514, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.0022461230615307653, |
| "grad_norm": 176.99551391601562, |
| "learning_rate": 3e-06, |
| "loss": -16.3497, |
| "step": 2245 |
| }, |
| { |
| "epoch": 0.0022471235617808906, |
| "grad_norm": 84.77751159667969, |
| "learning_rate": 3e-06, |
| "loss": -12.8933, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.0022481240620310155, |
| "grad_norm": 103.69670104980469, |
| "learning_rate": 3e-06, |
| "loss": -15.0246, |
| "step": 2247 |
| }, |
| { |
| "epoch": 0.0022491245622811404, |
| "grad_norm": 102.71160888671875, |
| "learning_rate": 3e-06, |
| "loss": -18.5848, |
| "step": 2248 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0022501250625312657, |
| "grad_norm": 118.6711196899414, |
| "learning_rate": 3e-06, |
| "loss": 23.5176, |
| "reward": 0.27385087311267853, |
| "reward_std": 0.16272394359111786, |
| "rewards/sudoku_reward_func": 0.27385087311267853, |
| "step": 2249, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0022511255627813906, |
| "grad_norm": 122.66015625, |
| "learning_rate": 3e-06, |
| "loss": 23.9114, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.002252126063031516, |
| "grad_norm": 324.5727844238281, |
| "learning_rate": 3e-06, |
| "loss": 23.4655, |
| "step": 2251 |
| }, |
| { |
| "epoch": 0.0022531265632816408, |
| "grad_norm": 139.6287384033203, |
| "learning_rate": 3e-06, |
| "loss": 21.8016, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.002254127063531766, |
| "grad_norm": 131.1999053955078, |
| "learning_rate": 3e-06, |
| "loss": 22.3795, |
| "step": 2253 |
| }, |
| { |
| "epoch": 0.002255127563781891, |
| "grad_norm": 122.19425964355469, |
| "learning_rate": 3e-06, |
| "loss": 21.7393, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.002256128064032016, |
| "grad_norm": 217.8507080078125, |
| "learning_rate": 3e-06, |
| "loss": 17.6008, |
| "step": 2255 |
| }, |
| { |
| "epoch": 0.002257128564282141, |
| "grad_norm": 320.115234375, |
| "learning_rate": 3e-06, |
| "loss": 19.0938, |
| "step": 2256 |
| }, |
| { |
| "completion_length": 254.8541717529297, |
| "epoch": 0.002258129064532266, |
| "grad_norm": 109.14167022705078, |
| "learning_rate": 3e-06, |
| "loss": -16.7314, |
| "reward": 0.22406356036663055, |
| "reward_std": 0.14896760880947113, |
| "rewards/sudoku_reward_func": 0.22406355291604996, |
| "step": 2257, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0022591295647823913, |
| "grad_norm": 276.05255126953125, |
| "learning_rate": 3e-06, |
| "loss": -10.0484, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.002260130065032516, |
| "grad_norm": 165.75491333007812, |
| "learning_rate": 3e-06, |
| "loss": -9.5876, |
| "step": 2259 |
| }, |
| { |
| "epoch": 0.0022611305652826415, |
| "grad_norm": 257.5172424316406, |
| "learning_rate": 3e-06, |
| "loss": -11.2125, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.0022621310655327664, |
| "grad_norm": 170.18350219726562, |
| "learning_rate": 3e-06, |
| "loss": -17.6049, |
| "step": 2261 |
| }, |
| { |
| "epoch": 0.0022631315657828913, |
| "grad_norm": 217.0912628173828, |
| "learning_rate": 3e-06, |
| "loss": -15.0741, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.0022641320660330166, |
| "grad_norm": 310.0478820800781, |
| "learning_rate": 3e-06, |
| "loss": -11.486, |
| "step": 2263 |
| }, |
| { |
| "epoch": 0.0022651325662831415, |
| "grad_norm": 233.90057373046875, |
| "learning_rate": 3e-06, |
| "loss": -17.2378, |
| "step": 2264 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0022661330665332668, |
| "grad_norm": 261.5735168457031, |
| "learning_rate": 3e-06, |
| "loss": 1.2377, |
| "reward": 0.2796379029750824, |
| "reward_std": 0.15749312937259674, |
| "rewards/sudoku_reward_func": 0.2796379029750824, |
| "step": 2265, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0022671335667833917, |
| "grad_norm": 186.02745056152344, |
| "learning_rate": 3e-06, |
| "loss": -0.9633, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.002268134067033517, |
| "grad_norm": 366.9317321777344, |
| "learning_rate": 3e-06, |
| "loss": 5.7376, |
| "step": 2267 |
| }, |
| { |
| "epoch": 0.002269134567283642, |
| "grad_norm": 285.3451843261719, |
| "learning_rate": 3e-06, |
| "loss": 11.1022, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.0022701350675337667, |
| "grad_norm": 247.7568359375, |
| "learning_rate": 3e-06, |
| "loss": -0.5247, |
| "step": 2269 |
| }, |
| { |
| "epoch": 0.002271135567783892, |
| "grad_norm": 214.0550079345703, |
| "learning_rate": 3e-06, |
| "loss": -4.5155, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.002272136068034017, |
| "grad_norm": 144.87843322753906, |
| "learning_rate": 3e-06, |
| "loss": 4.6787, |
| "step": 2271 |
| }, |
| { |
| "epoch": 0.0022731365682841422, |
| "grad_norm": 312.683837890625, |
| "learning_rate": 3e-06, |
| "loss": 6.7345, |
| "step": 2272 |
| }, |
| { |
| "completion_length": 255.87500762939453, |
| "epoch": 0.002274137068534267, |
| "grad_norm": 90.921630859375, |
| "learning_rate": 3e-06, |
| "loss": -17.0006, |
| "reward": 0.19374174624681473, |
| "reward_std": 0.09137369692325592, |
| "rewards/sudoku_reward_func": 0.19374173879623413, |
| "step": 2273, |
| "zero_std_ratio": 0.125 |
| }, |
| { |
| "epoch": 0.0022751375687843924, |
| "grad_norm": 156.21347045898438, |
| "learning_rate": 3e-06, |
| "loss": -14.9885, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.0022761380690345173, |
| "grad_norm": 146.11834716796875, |
| "learning_rate": 3e-06, |
| "loss": -11.7141, |
| "step": 2275 |
| }, |
| { |
| "epoch": 0.002277138569284642, |
| "grad_norm": 125.28398895263672, |
| "learning_rate": 3e-06, |
| "loss": -13.9846, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.0022781390695347675, |
| "grad_norm": 136.9147491455078, |
| "learning_rate": 3e-06, |
| "loss": -17.4603, |
| "step": 2277 |
| }, |
| { |
| "epoch": 0.0022791395697848923, |
| "grad_norm": 116.94140625, |
| "learning_rate": 3e-06, |
| "loss": -14.504, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.0022801400700350177, |
| "grad_norm": 135.58804321289062, |
| "learning_rate": 3e-06, |
| "loss": -13.5907, |
| "step": 2279 |
| }, |
| { |
| "epoch": 0.0022811405702851425, |
| "grad_norm": 88.2119369506836, |
| "learning_rate": 3e-06, |
| "loss": -14.4023, |
| "step": 2280 |
| }, |
| { |
| "completion_length": 255.45833587646484, |
| "epoch": 0.002282141070535268, |
| "grad_norm": 322.60455322265625, |
| "learning_rate": 3e-06, |
| "loss": 1.4808, |
| "reward": 0.28232476115226746, |
| "reward_std": 0.16080554574728012, |
| "rewards/sudoku_reward_func": 0.28232474625110626, |
| "step": 2281, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0022831415707853927, |
| "grad_norm": 130.08282470703125, |
| "learning_rate": 3e-06, |
| "loss": -8.7648, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.0022841420710355176, |
| "grad_norm": 200.008056640625, |
| "learning_rate": 3e-06, |
| "loss": 3.0487, |
| "step": 2283 |
| }, |
| { |
| "epoch": 0.002285142571285643, |
| "grad_norm": 159.8165283203125, |
| "learning_rate": 3e-06, |
| "loss": 5.1691, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.002286143071535768, |
| "grad_norm": 409.3653869628906, |
| "learning_rate": 3e-06, |
| "loss": 2.0149, |
| "step": 2285 |
| }, |
| { |
| "epoch": 0.002287143571785893, |
| "grad_norm": 121.8490219116211, |
| "learning_rate": 3e-06, |
| "loss": -9.2563, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.002288144072036018, |
| "grad_norm": 172.1815185546875, |
| "learning_rate": 3e-06, |
| "loss": 0.5314, |
| "step": 2287 |
| }, |
| { |
| "epoch": 0.002289144572286143, |
| "grad_norm": 169.503662109375, |
| "learning_rate": 3e-06, |
| "loss": 4.3363, |
| "step": 2288 |
| }, |
| { |
| "completion_length": 255.83333587646484, |
| "epoch": 0.002290145072536268, |
| "grad_norm": 158.5614776611328, |
| "learning_rate": 3e-06, |
| "loss": -3.7874, |
| "reward": 0.23768188804388046, |
| "reward_std": 0.13747499138116837, |
| "rewards/sudoku_reward_func": 0.23768187314271927, |
| "step": 2289, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002291145572786393, |
| "grad_norm": 243.24232482910156, |
| "learning_rate": 3e-06, |
| "loss": 2.901, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.0022921460730365184, |
| "grad_norm": 171.938720703125, |
| "learning_rate": 3e-06, |
| "loss": 3.3795, |
| "step": 2291 |
| }, |
| { |
| "epoch": 0.0022931465732866432, |
| "grad_norm": 224.5988311767578, |
| "learning_rate": 3e-06, |
| "loss": 3.9502, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.0022941470735367685, |
| "grad_norm": 158.8858642578125, |
| "learning_rate": 3e-06, |
| "loss": -4.3771, |
| "step": 2293 |
| }, |
| { |
| "epoch": 0.0022951475737868934, |
| "grad_norm": 196.80746459960938, |
| "learning_rate": 3e-06, |
| "loss": 2.5411, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.0022961480740370183, |
| "grad_norm": 126.51914978027344, |
| "learning_rate": 3e-06, |
| "loss": 4.0453, |
| "step": 2295 |
| }, |
| { |
| "epoch": 0.0022971485742871436, |
| "grad_norm": 195.58306884765625, |
| "learning_rate": 3e-06, |
| "loss": 2.4945, |
| "step": 2296 |
| }, |
| { |
| "completion_length": 255.89583587646484, |
| "epoch": 0.0022981490745372685, |
| "grad_norm": 200.89503479003906, |
| "learning_rate": 3e-06, |
| "loss": -19.6789, |
| "reward": 0.2456597313284874, |
| "reward_std": 0.16119515150785446, |
| "rewards/sudoku_reward_func": 0.2456597313284874, |
| "step": 2297, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002299149574787394, |
| "grad_norm": 190.7845001220703, |
| "learning_rate": 3e-06, |
| "loss": -20.1583, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.0023001500750375187, |
| "grad_norm": 211.36935424804688, |
| "learning_rate": 3e-06, |
| "loss": -17.8248, |
| "step": 2299 |
| }, |
| { |
| "epoch": 0.002301150575287644, |
| "grad_norm": 210.21969604492188, |
| "learning_rate": 3e-06, |
| "loss": -21.7427, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.002302151075537769, |
| "grad_norm": 282.9001159667969, |
| "learning_rate": 3e-06, |
| "loss": -21.4653, |
| "step": 2301 |
| }, |
| { |
| "epoch": 0.0023031515757878937, |
| "grad_norm": 310.6829833984375, |
| "learning_rate": 3e-06, |
| "loss": -22.4297, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.002304152076038019, |
| "grad_norm": 210.1687774658203, |
| "learning_rate": 3e-06, |
| "loss": -18.4755, |
| "step": 2303 |
| }, |
| { |
| "epoch": 0.002305152576288144, |
| "grad_norm": 190.2571258544922, |
| "learning_rate": 3e-06, |
| "loss": -24.6334, |
| "step": 2304 |
| }, |
| { |
| "completion_length": 255.81250762939453, |
| "epoch": 0.0023061530765382692, |
| "grad_norm": 242.8677215576172, |
| "learning_rate": 3e-06, |
| "loss": 8.2897, |
| "reward": 0.23668982088565826, |
| "reward_std": 0.144187830388546, |
| "rewards/sudoku_reward_func": 0.23668982088565826, |
| "step": 2305, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002307153576788394, |
| "grad_norm": 216.795654296875, |
| "learning_rate": 3e-06, |
| "loss": 3.4369, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.0023081540770385194, |
| "grad_norm": 218.32254028320312, |
| "learning_rate": 3e-06, |
| "loss": -1.9715, |
| "step": 2307 |
| }, |
| { |
| "epoch": 0.0023091545772886443, |
| "grad_norm": 195.0783233642578, |
| "learning_rate": 3e-06, |
| "loss": 5.2904, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.002310155077538769, |
| "grad_norm": 202.93618774414062, |
| "learning_rate": 3e-06, |
| "loss": 8.8413, |
| "step": 2309 |
| }, |
| { |
| "epoch": 0.0023111555777888945, |
| "grad_norm": 306.6898498535156, |
| "learning_rate": 3e-06, |
| "loss": 2.429, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.0023121560780390194, |
| "grad_norm": 217.85650634765625, |
| "learning_rate": 3e-06, |
| "loss": -3.4418, |
| "step": 2311 |
| }, |
| { |
| "epoch": 0.0023131565782891447, |
| "grad_norm": 293.9922790527344, |
| "learning_rate": 3e-06, |
| "loss": 4.8448, |
| "step": 2312 |
| }, |
| { |
| "completion_length": 255.7916717529297, |
| "epoch": 0.0023141570785392696, |
| "grad_norm": 298.2754211425781, |
| "learning_rate": 3e-06, |
| "loss": -6.4534, |
| "reward": 0.24974070489406586, |
| "reward_std": 0.1554877981543541, |
| "rewards/sudoku_reward_func": 0.24974069744348526, |
| "step": 2313, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002315157578789395, |
| "grad_norm": 471.14935302734375, |
| "learning_rate": 3e-06, |
| "loss": -15.3898, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.0023161580790395197, |
| "grad_norm": 694.3447265625, |
| "learning_rate": 3e-06, |
| "loss": -5.2157, |
| "step": 2315 |
| }, |
| { |
| "epoch": 0.0023171585792896446, |
| "grad_norm": 415.9455261230469, |
| "learning_rate": 3e-06, |
| "loss": -21.4852, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.00231815907953977, |
| "grad_norm": 309.5008850097656, |
| "learning_rate": 3e-06, |
| "loss": -7.4771, |
| "step": 2317 |
| }, |
| { |
| "epoch": 0.002319159579789895, |
| "grad_norm": 368.5068664550781, |
| "learning_rate": 3e-06, |
| "loss": -14.7027, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.00232016008004002, |
| "grad_norm": 254.92578125, |
| "learning_rate": 3e-06, |
| "loss": -3.7084, |
| "step": 2319 |
| }, |
| { |
| "epoch": 0.002321160580290145, |
| "grad_norm": 579.2901000976562, |
| "learning_rate": 3e-06, |
| "loss": -19.9798, |
| "step": 2320 |
| }, |
| { |
| "completion_length": 255.87500762939453, |
| "epoch": 0.0023221610805402703, |
| "grad_norm": 389.4748840332031, |
| "learning_rate": 3e-06, |
| "loss": -23.0951, |
| "reward": 0.2786458432674408, |
| "reward_std": 0.18757472187280655, |
| "rewards/sudoku_reward_func": 0.2786458134651184, |
| "step": 2321, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002323161580790395, |
| "grad_norm": 415.79949951171875, |
| "learning_rate": 3e-06, |
| "loss": -9.136, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.00232416208104052, |
| "grad_norm": 173.99655151367188, |
| "learning_rate": 3e-06, |
| "loss": -26.185, |
| "step": 2323 |
| }, |
| { |
| "epoch": 0.0023251625812906454, |
| "grad_norm": 273.4259338378906, |
| "learning_rate": 3e-06, |
| "loss": -19.4097, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.0023261630815407702, |
| "grad_norm": 391.3758239746094, |
| "learning_rate": 3e-06, |
| "loss": -25.6756, |
| "step": 2325 |
| }, |
| { |
| "epoch": 0.0023271635817908956, |
| "grad_norm": 349.4239501953125, |
| "learning_rate": 3e-06, |
| "loss": -12.373, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.0023281640820410204, |
| "grad_norm": 206.3245849609375, |
| "learning_rate": 3e-06, |
| "loss": -26.9021, |
| "step": 2327 |
| }, |
| { |
| "epoch": 0.0023291645822911457, |
| "grad_norm": 213.48085021972656, |
| "learning_rate": 3e-06, |
| "loss": -21.3958, |
| "step": 2328 |
| }, |
| { |
| "completion_length": 255.83333587646484, |
| "epoch": 0.0023301650825412706, |
| "grad_norm": 269.2904968261719, |
| "learning_rate": 3e-06, |
| "loss": 0.1758, |
| "reward": 0.2615740895271301, |
| "reward_std": 0.14590194076299667, |
| "rewards/sudoku_reward_func": 0.2615740895271301, |
| "step": 2329, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023311655827913955, |
| "grad_norm": 259.6731262207031, |
| "learning_rate": 3e-06, |
| "loss": 3.9599, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.002332166083041521, |
| "grad_norm": 188.71871948242188, |
| "learning_rate": 3e-06, |
| "loss": 5.147, |
| "step": 2331 |
| }, |
| { |
| "epoch": 0.0023331665832916457, |
| "grad_norm": 129.87255859375, |
| "learning_rate": 3e-06, |
| "loss": -5.0955, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.002334167083541771, |
| "grad_norm": 193.02645874023438, |
| "learning_rate": 3e-06, |
| "loss": -3.1335, |
| "step": 2333 |
| }, |
| { |
| "epoch": 0.002335167583791896, |
| "grad_norm": 289.1039123535156, |
| "learning_rate": 3e-06, |
| "loss": -0.141, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.002336168084042021, |
| "grad_norm": 291.5890808105469, |
| "learning_rate": 3e-06, |
| "loss": 0.3956, |
| "step": 2335 |
| }, |
| { |
| "epoch": 0.002337168584292146, |
| "grad_norm": 284.3049011230469, |
| "learning_rate": 3e-06, |
| "loss": -5.9039, |
| "step": 2336 |
| }, |
| { |
| "completion_length": 255.9166717529297, |
| "epoch": 0.002338169084542271, |
| "grad_norm": 174.27735900878906, |
| "learning_rate": 3e-06, |
| "loss": 13.7326, |
| "reward": 0.23280423879623413, |
| "reward_std": 0.12118640542030334, |
| "rewards/sudoku_reward_func": 0.23280422389507294, |
| "step": 2337, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023391695847923962, |
| "grad_norm": 557.00146484375, |
| "learning_rate": 3e-06, |
| "loss": 7.0329, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.002340170085042521, |
| "grad_norm": 125.9331283569336, |
| "learning_rate": 3e-06, |
| "loss": 11.8082, |
| "step": 2339 |
| }, |
| { |
| "epoch": 0.0023411705852926464, |
| "grad_norm": 334.607666015625, |
| "learning_rate": 3e-06, |
| "loss": 6.1679, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.0023421710855427713, |
| "grad_norm": 169.49258422851562, |
| "learning_rate": 3e-06, |
| "loss": 12.367, |
| "step": 2341 |
| }, |
| { |
| "epoch": 0.0023431715857928966, |
| "grad_norm": 193.27243041992188, |
| "learning_rate": 3e-06, |
| "loss": 3.7953, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.0023441720860430215, |
| "grad_norm": 172.27786254882812, |
| "learning_rate": 3e-06, |
| "loss": 10.0385, |
| "step": 2343 |
| }, |
| { |
| "epoch": 0.0023451725862931464, |
| "grad_norm": 208.5091552734375, |
| "learning_rate": 3e-06, |
| "loss": 6.3254, |
| "step": 2344 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0023461730865432717, |
| "grad_norm": 127.00196838378906, |
| "learning_rate": 3e-06, |
| "loss": -3.2699, |
| "reward": 0.2292906790971756, |
| "reward_std": 0.11905381456017494, |
| "rewards/sudoku_reward_func": 0.2292906790971756, |
| "step": 2345, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023471735867933966, |
| "grad_norm": 210.29026794433594, |
| "learning_rate": 3e-06, |
| "loss": -4.7211, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.002348174087043522, |
| "grad_norm": 168.79876708984375, |
| "learning_rate": 3e-06, |
| "loss": -6.6915, |
| "step": 2347 |
| }, |
| { |
| "epoch": 0.0023491745872936468, |
| "grad_norm": 204.80516052246094, |
| "learning_rate": 3e-06, |
| "loss": -9.3385, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.002350175087543772, |
| "grad_norm": 187.41796875, |
| "learning_rate": 3e-06, |
| "loss": -5.4357, |
| "step": 2349 |
| }, |
| { |
| "epoch": 0.002351175587793897, |
| "grad_norm": 166.33779907226562, |
| "learning_rate": 3e-06, |
| "loss": -6.8552, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.002352176088044022, |
| "grad_norm": 246.25677490234375, |
| "learning_rate": 3e-06, |
| "loss": -9.6765, |
| "step": 2351 |
| }, |
| { |
| "epoch": 0.002353176588294147, |
| "grad_norm": 263.0240783691406, |
| "learning_rate": 3e-06, |
| "loss": -11.8961, |
| "step": 2352 |
| }, |
| { |
| "completion_length": 255.9375, |
| "epoch": 0.002354177088544272, |
| "grad_norm": 303.3554382324219, |
| "learning_rate": 3e-06, |
| "loss": -23.6641, |
| "reward": 0.24605431407690048, |
| "reward_std": 0.13267472386360168, |
| "rewards/sudoku_reward_func": 0.2460542991757393, |
| "step": 2353, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023551775887943973, |
| "grad_norm": 218.33726501464844, |
| "learning_rate": 3e-06, |
| "loss": -23.8785, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.002356178089044522, |
| "grad_norm": 204.11302185058594, |
| "learning_rate": 3e-06, |
| "loss": -10.7146, |
| "step": 2355 |
| }, |
| { |
| "epoch": 0.0023571785892946475, |
| "grad_norm": 155.26736450195312, |
| "learning_rate": 3e-06, |
| "loss": -28.534, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.0023581790895447724, |
| "grad_norm": 188.44581604003906, |
| "learning_rate": 3e-06, |
| "loss": -22.4155, |
| "step": 2357 |
| }, |
| { |
| "epoch": 0.0023591795897948973, |
| "grad_norm": 332.4205017089844, |
| "learning_rate": 3e-06, |
| "loss": -23.8225, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.0023601800900450226, |
| "grad_norm": 246.54193115234375, |
| "learning_rate": 3e-06, |
| "loss": -13.5497, |
| "step": 2359 |
| }, |
| { |
| "epoch": 0.0023611805902951474, |
| "grad_norm": 184.51052856445312, |
| "learning_rate": 3e-06, |
| "loss": -29.9814, |
| "step": 2360 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0023621810905452728, |
| "grad_norm": 254.96438598632812, |
| "learning_rate": 3e-06, |
| "loss": -9.346, |
| "reward": 0.2498760148882866, |
| "reward_std": 0.16076447814702988, |
| "rewards/sudoku_reward_func": 0.2498759999871254, |
| "step": 2361, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023631815907953976, |
| "grad_norm": 354.0221252441406, |
| "learning_rate": 3e-06, |
| "loss": -2.763, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.002364182091045523, |
| "grad_norm": 464.5136413574219, |
| "learning_rate": 3e-06, |
| "loss": -4.9485, |
| "step": 2363 |
| }, |
| { |
| "epoch": 0.002365182591295648, |
| "grad_norm": 256.3653869628906, |
| "learning_rate": 3e-06, |
| "loss": -5.8165, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.0023661830915457727, |
| "grad_norm": 371.20037841796875, |
| "learning_rate": 3e-06, |
| "loss": -12.4337, |
| "step": 2365 |
| }, |
| { |
| "epoch": 0.002367183591795898, |
| "grad_norm": 299.24871826171875, |
| "learning_rate": 3e-06, |
| "loss": -4.3339, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.002368184092046023, |
| "grad_norm": 494.33038330078125, |
| "learning_rate": 3e-06, |
| "loss": -6.8858, |
| "step": 2367 |
| }, |
| { |
| "epoch": 0.002369184592296148, |
| "grad_norm": 150.08218383789062, |
| "learning_rate": 3e-06, |
| "loss": -6.5177, |
| "step": 2368 |
| }, |
| { |
| "completion_length": 255.95833587646484, |
| "epoch": 0.002370185092546273, |
| "grad_norm": 261.2372741699219, |
| "learning_rate": 3e-06, |
| "loss": 6.1855, |
| "reward": 0.2589285746216774, |
| "reward_std": 0.1584344282746315, |
| "rewards/sudoku_reward_func": 0.2589285746216774, |
| "step": 2369, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023711855927963984, |
| "grad_norm": 194.16799926757812, |
| "learning_rate": 3e-06, |
| "loss": 11.0327, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.0023721860930465233, |
| "grad_norm": 173.90184020996094, |
| "learning_rate": 3e-06, |
| "loss": 10.0381, |
| "step": 2371 |
| }, |
| { |
| "epoch": 0.002373186593296648, |
| "grad_norm": 260.4774475097656, |
| "learning_rate": 3e-06, |
| "loss": 9.2442, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.0023741870935467735, |
| "grad_norm": 268.046630859375, |
| "learning_rate": 3e-06, |
| "loss": 3.7758, |
| "step": 2373 |
| }, |
| { |
| "epoch": 0.0023751875937968983, |
| "grad_norm": 159.11863708496094, |
| "learning_rate": 3e-06, |
| "loss": 9.8232, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.0023761880940470236, |
| "grad_norm": 267.4617614746094, |
| "learning_rate": 3e-06, |
| "loss": 9.251, |
| "step": 2375 |
| }, |
| { |
| "epoch": 0.0023771885942971485, |
| "grad_norm": 247.35926818847656, |
| "learning_rate": 3e-06, |
| "loss": 6.8656, |
| "step": 2376 |
| }, |
| { |
| "completion_length": 255.87500762939453, |
| "epoch": 0.002378189094547274, |
| "grad_norm": 296.4661560058594, |
| "learning_rate": 3e-06, |
| "loss": -17.1805, |
| "reward": 0.19931082427501678, |
| "reward_std": 0.1540454626083374, |
| "rewards/sudoku_reward_func": 0.1993108168244362, |
| "step": 2377, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023791895947973987, |
| "grad_norm": 273.8221740722656, |
| "learning_rate": 3e-06, |
| "loss": -11.0449, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.0023801900950475236, |
| "grad_norm": 287.01287841796875, |
| "learning_rate": 3e-06, |
| "loss": -15.9991, |
| "step": 2379 |
| }, |
| { |
| "epoch": 0.002381190595297649, |
| "grad_norm": 322.6713562011719, |
| "learning_rate": 3e-06, |
| "loss": -13.4008, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.0023821910955477738, |
| "grad_norm": 227.85231018066406, |
| "learning_rate": 3e-06, |
| "loss": -18.8711, |
| "step": 2381 |
| }, |
| { |
| "epoch": 0.002383191595797899, |
| "grad_norm": 209.1007537841797, |
| "learning_rate": 3e-06, |
| "loss": -12.8625, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.002384192096048024, |
| "grad_norm": 264.0327453613281, |
| "learning_rate": 3e-06, |
| "loss": -15.8456, |
| "step": 2383 |
| }, |
| { |
| "epoch": 0.0023851925962981493, |
| "grad_norm": 170.41293334960938, |
| "learning_rate": 3e-06, |
| "loss": -13.2896, |
| "step": 2384 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.002386193096548274, |
| "grad_norm": 219.16714477539062, |
| "learning_rate": 3e-06, |
| "loss": -8.9917, |
| "reward": 0.20667991042137146, |
| "reward_std": 0.14874431490898132, |
| "rewards/sudoku_reward_func": 0.20667989552021027, |
| "step": 2385, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002387193596798399, |
| "grad_norm": 272.3582763671875, |
| "learning_rate": 3e-06, |
| "loss": 0.2413, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.0023881940970485243, |
| "grad_norm": 213.18934631347656, |
| "learning_rate": 3e-06, |
| "loss": -3.9258, |
| "step": 2387 |
| }, |
| { |
| "epoch": 0.002389194597298649, |
| "grad_norm": 190.1105194091797, |
| "learning_rate": 3e-06, |
| "loss": -6.8053, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.0023901950975487745, |
| "grad_norm": 157.9113311767578, |
| "learning_rate": 3e-06, |
| "loss": -11.0441, |
| "step": 2389 |
| }, |
| { |
| "epoch": 0.0023911955977988994, |
| "grad_norm": 364.7066955566406, |
| "learning_rate": 3e-06, |
| "loss": -2.5138, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.0023921960980490247, |
| "grad_norm": 288.8036804199219, |
| "learning_rate": 3e-06, |
| "loss": -4.8312, |
| "step": 2391 |
| }, |
| { |
| "epoch": 0.0023931965982991496, |
| "grad_norm": 206.28895568847656, |
| "learning_rate": 3e-06, |
| "loss": -10.4112, |
| "step": 2392 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0023941970985492745, |
| "grad_norm": 234.87863159179688, |
| "learning_rate": 3e-06, |
| "loss": -29.7829, |
| "reward": 0.23030905425548553, |
| "reward_std": 0.1565355882048607, |
| "rewards/sudoku_reward_func": 0.23030905425548553, |
| "step": 2393, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0023951975987993998, |
| "grad_norm": 225.86874389648438, |
| "learning_rate": 3e-06, |
| "loss": -21.8689, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.0023961980990495246, |
| "grad_norm": 317.64678955078125, |
| "learning_rate": 3e-06, |
| "loss": -27.3714, |
| "step": 2395 |
| }, |
| { |
| "epoch": 0.00239719859929965, |
| "grad_norm": 273.0003356933594, |
| "learning_rate": 3e-06, |
| "loss": -35.7363, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.002398199099549775, |
| "grad_norm": 332.5831604003906, |
| "learning_rate": 3e-06, |
| "loss": -28.3564, |
| "step": 2397 |
| }, |
| { |
| "epoch": 0.0023991995997999, |
| "grad_norm": 258.6806640625, |
| "learning_rate": 3e-06, |
| "loss": -24.7617, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.002400200100050025, |
| "grad_norm": 270.16070556640625, |
| "learning_rate": 3e-06, |
| "loss": -27.9452, |
| "step": 2399 |
| }, |
| { |
| "epoch": 0.00240120060030015, |
| "grad_norm": 352.531494140625, |
| "learning_rate": 3e-06, |
| "loss": -36.7671, |
| "step": 2400 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.002402201100550275, |
| "grad_norm": 202.87521362304688, |
| "learning_rate": 3e-06, |
| "loss": -18.3767, |
| "reward": 0.26331019401550293, |
| "reward_std": 0.14331235736608505, |
| "rewards/sudoku_reward_func": 0.26331018656492233, |
| "step": 2401, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024032016008004, |
| "grad_norm": 231.7044677734375, |
| "learning_rate": 3e-06, |
| "loss": -13.1758, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.0024042021010505254, |
| "grad_norm": 230.66302490234375, |
| "learning_rate": 3e-06, |
| "loss": -10.3043, |
| "step": 2403 |
| }, |
| { |
| "epoch": 0.0024052026013006503, |
| "grad_norm": 281.7815856933594, |
| "learning_rate": 3e-06, |
| "loss": -17.6013, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.0024062031015507756, |
| "grad_norm": 201.96786499023438, |
| "learning_rate": 3e-06, |
| "loss": -20.4518, |
| "step": 2405 |
| }, |
| { |
| "epoch": 0.0024072036018009005, |
| "grad_norm": 299.170166015625, |
| "learning_rate": 3e-06, |
| "loss": -15.459, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.0024082041020510253, |
| "grad_norm": 265.810546875, |
| "learning_rate": 3e-06, |
| "loss": -12.8288, |
| "step": 2407 |
| }, |
| { |
| "epoch": 0.0024092046023011507, |
| "grad_norm": 261.6772155761719, |
| "learning_rate": 3e-06, |
| "loss": -19.0075, |
| "step": 2408 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0024102051025512755, |
| "grad_norm": 413.2362976074219, |
| "learning_rate": 3e-06, |
| "loss": 20.3793, |
| "reward": 0.232127845287323, |
| "reward_std": 0.146223783493042, |
| "rewards/sudoku_reward_func": 0.232127845287323, |
| "step": 2409, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002411205602801401, |
| "grad_norm": 314.0757751464844, |
| "learning_rate": 3e-06, |
| "loss": 15.9171, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.0024122061030515257, |
| "grad_norm": 390.8291931152344, |
| "learning_rate": 3e-06, |
| "loss": 16.0378, |
| "step": 2411 |
| }, |
| { |
| "epoch": 0.002413206603301651, |
| "grad_norm": 293.4915466308594, |
| "learning_rate": 3e-06, |
| "loss": 14.1956, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.002414207103551776, |
| "grad_norm": 233.78207397460938, |
| "learning_rate": 3e-06, |
| "loss": 17.9772, |
| "step": 2413 |
| }, |
| { |
| "epoch": 0.002415207603801901, |
| "grad_norm": 255.8182830810547, |
| "learning_rate": 3e-06, |
| "loss": 13.7614, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.002416208104052026, |
| "grad_norm": 353.9018859863281, |
| "learning_rate": 3e-06, |
| "loss": 13.5526, |
| "step": 2415 |
| }, |
| { |
| "epoch": 0.002417208604302151, |
| "grad_norm": 179.1558074951172, |
| "learning_rate": 3e-06, |
| "loss": 15.9556, |
| "step": 2416 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0024182091045522763, |
| "grad_norm": 215.33831787109375, |
| "learning_rate": 3e-06, |
| "loss": -3.8544, |
| "reward": 0.25442297756671906, |
| "reward_std": 0.15895532071590424, |
| "rewards/sudoku_reward_func": 0.25442295521497726, |
| "step": 2417, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002419209604802401, |
| "grad_norm": 478.9371337890625, |
| "learning_rate": 3e-06, |
| "loss": -17.7737, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.0024202101050525265, |
| "grad_norm": 244.527099609375, |
| "learning_rate": 3e-06, |
| "loss": -0.1481, |
| "step": 2419 |
| }, |
| { |
| "epoch": 0.0024212106053026513, |
| "grad_norm": 146.69509887695312, |
| "learning_rate": 3e-06, |
| "loss": -5.8427, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.0024222111055527762, |
| "grad_norm": 217.09681701660156, |
| "learning_rate": 3e-06, |
| "loss": -4.5632, |
| "step": 2421 |
| }, |
| { |
| "epoch": 0.0024232116058029015, |
| "grad_norm": 437.3373107910156, |
| "learning_rate": 3e-06, |
| "loss": -17.72, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.0024242121060530264, |
| "grad_norm": 255.216064453125, |
| "learning_rate": 3e-06, |
| "loss": -0.7436, |
| "step": 2423 |
| }, |
| { |
| "epoch": 0.0024252126063031517, |
| "grad_norm": 164.95989990234375, |
| "learning_rate": 3e-06, |
| "loss": -7.5431, |
| "step": 2424 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0024262131065532766, |
| "grad_norm": 251.02464294433594, |
| "learning_rate": 3e-06, |
| "loss": 29.46, |
| "reward": 0.2371031865477562, |
| "reward_std": 0.13912386447191238, |
| "rewards/sudoku_reward_func": 0.2371031865477562, |
| "step": 2425, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002427213606803402, |
| "grad_norm": 178.2760467529297, |
| "learning_rate": 3e-06, |
| "loss": 17.2126, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.002428214107053527, |
| "grad_norm": 188.71414184570312, |
| "learning_rate": 3e-06, |
| "loss": 28.4213, |
| "step": 2427 |
| }, |
| { |
| "epoch": 0.0024292146073036517, |
| "grad_norm": 151.65293884277344, |
| "learning_rate": 3e-06, |
| "loss": 22.4749, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.002430215107553777, |
| "grad_norm": 320.76190185546875, |
| "learning_rate": 3e-06, |
| "loss": 27.6502, |
| "step": 2429 |
| }, |
| { |
| "epoch": 0.002431215607803902, |
| "grad_norm": 202.29144287109375, |
| "learning_rate": 3e-06, |
| "loss": 15.5383, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.002432216108054027, |
| "grad_norm": 199.7091064453125, |
| "learning_rate": 3e-06, |
| "loss": 25.4369, |
| "step": 2431 |
| }, |
| { |
| "epoch": 0.002433216608304152, |
| "grad_norm": 169.84693908691406, |
| "learning_rate": 3e-06, |
| "loss": 20.8165, |
| "step": 2432 |
| }, |
| { |
| "completion_length": 256.0, |
| "epoch": 0.0024342171085542774, |
| "grad_norm": 86.38954162597656, |
| "learning_rate": 3e-06, |
| "loss": -3.2782, |
| "reward": 0.23937289416790009, |
| "reward_std": 0.10821668431162834, |
| "rewards/sudoku_reward_func": 0.2393728867173195, |
| "step": 2433, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024352176088044022, |
| "grad_norm": 210.60997009277344, |
| "learning_rate": 3e-06, |
| "loss": -1.2576, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.002436218109054527, |
| "grad_norm": 105.67581176757812, |
| "learning_rate": 3e-06, |
| "loss": -3.552, |
| "step": 2435 |
| }, |
| { |
| "epoch": 0.0024372186093046524, |
| "grad_norm": 146.38681030273438, |
| "learning_rate": 3e-06, |
| "loss": -2.7907, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.0024382191095547773, |
| "grad_norm": 103.02706146240234, |
| "learning_rate": 3e-06, |
| "loss": -4.2819, |
| "step": 2437 |
| }, |
| { |
| "epoch": 0.0024392196098049026, |
| "grad_norm": 108.49909973144531, |
| "learning_rate": 3e-06, |
| "loss": -2.6245, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.0024402201100550275, |
| "grad_norm": 120.60899353027344, |
| "learning_rate": 3e-06, |
| "loss": -3.8004, |
| "step": 2439 |
| }, |
| { |
| "epoch": 0.002441220610305153, |
| "grad_norm": 119.53984832763672, |
| "learning_rate": 3e-06, |
| "loss": -4.4491, |
| "step": 2440 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.0024422211105552777, |
| "grad_norm": 217.67234802246094, |
| "learning_rate": 3e-06, |
| "loss": 24.9726, |
| "reward": 0.22883598506450653, |
| "reward_std": 0.1351177804172039, |
| "rewards/sudoku_reward_func": 0.22883598506450653, |
| "step": 2441, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024432216108054025, |
| "grad_norm": 234.8048095703125, |
| "learning_rate": 3e-06, |
| "loss": 25.3563, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.002444222111055528, |
| "grad_norm": 189.23411560058594, |
| "learning_rate": 3e-06, |
| "loss": 21.1638, |
| "step": 2443 |
| }, |
| { |
| "epoch": 0.0024452226113056527, |
| "grad_norm": 263.4746398925781, |
| "learning_rate": 3e-06, |
| "loss": 22.9557, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.002446223111555778, |
| "grad_norm": 185.4212646484375, |
| "learning_rate": 3e-06, |
| "loss": 22.3036, |
| "step": 2445 |
| }, |
| { |
| "epoch": 0.002447223611805903, |
| "grad_norm": 193.51065063476562, |
| "learning_rate": 3e-06, |
| "loss": 21.5142, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.002448224112056028, |
| "grad_norm": 157.37619018554688, |
| "learning_rate": 3e-06, |
| "loss": 18.6296, |
| "step": 2447 |
| }, |
| { |
| "epoch": 0.002449224612306153, |
| "grad_norm": 159.09471130371094, |
| "learning_rate": 3e-06, |
| "loss": 18.2331, |
| "step": 2448 |
| }, |
| { |
| "completion_length": 254.9375, |
| "epoch": 0.002450225112556278, |
| "grad_norm": 203.94557189941406, |
| "learning_rate": 3e-06, |
| "loss": -39.4361, |
| "reward": 0.24582882970571518, |
| "reward_std": 0.15585172921419144, |
| "rewards/sudoku_reward_func": 0.24582882970571518, |
| "step": 2449, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024512256128064033, |
| "grad_norm": 126.65997314453125, |
| "learning_rate": 3e-06, |
| "loss": -35.301, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.002452226113056528, |
| "grad_norm": 237.62548828125, |
| "learning_rate": 3e-06, |
| "loss": -38.4792, |
| "step": 2451 |
| }, |
| { |
| "epoch": 0.0024532266133066535, |
| "grad_norm": 135.26449584960938, |
| "learning_rate": 3e-06, |
| "loss": -37.523, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.0024542271135567784, |
| "grad_norm": 249.38072204589844, |
| "learning_rate": 3e-06, |
| "loss": -39.7402, |
| "step": 2453 |
| }, |
| { |
| "epoch": 0.0024552276138069032, |
| "grad_norm": 126.46034240722656, |
| "learning_rate": 3e-06, |
| "loss": -36.0648, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.0024562281140570285, |
| "grad_norm": 189.29177856445312, |
| "learning_rate": 3e-06, |
| "loss": -39.9833, |
| "step": 2455 |
| }, |
| { |
| "epoch": 0.0024572286143071534, |
| "grad_norm": 107.6065902709961, |
| "learning_rate": 3e-06, |
| "loss": -38.7203, |
| "step": 2456 |
| }, |
| { |
| "completion_length": 250.68750762939453, |
| "epoch": 0.0024582291145572787, |
| "grad_norm": 99.97529602050781, |
| "learning_rate": 3e-06, |
| "loss": 15.4324, |
| "reward": 0.20304235816001892, |
| "reward_std": 0.13311458751559258, |
| "rewards/sudoku_reward_func": 0.20304234325885773, |
| "step": 2457, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024592296148074036, |
| "grad_norm": 188.85643005371094, |
| "learning_rate": 3e-06, |
| "loss": 12.5375, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.002460230115057529, |
| "grad_norm": 142.94924926757812, |
| "learning_rate": 3e-06, |
| "loss": 9.5263, |
| "step": 2459 |
| }, |
| { |
| "epoch": 0.002461230615307654, |
| "grad_norm": 96.89965057373047, |
| "learning_rate": 3e-06, |
| "loss": 9.125, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.0024622311155577787, |
| "grad_norm": 103.0599136352539, |
| "learning_rate": 3e-06, |
| "loss": 14.582, |
| "step": 2461 |
| }, |
| { |
| "epoch": 0.002463231615807904, |
| "grad_norm": 136.94151306152344, |
| "learning_rate": 3e-06, |
| "loss": 12.7515, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.002464232116058029, |
| "grad_norm": 113.3371810913086, |
| "learning_rate": 3e-06, |
| "loss": 7.4074, |
| "step": 2463 |
| }, |
| { |
| "epoch": 0.002465232616308154, |
| "grad_norm": 230.5481719970703, |
| "learning_rate": 3e-06, |
| "loss": 7.5585, |
| "step": 2464 |
| }, |
| { |
| "completion_length": 255.9791717529297, |
| "epoch": 0.002466233116558279, |
| "grad_norm": 235.51907348632812, |
| "learning_rate": 3e-06, |
| "loss": -28.1552, |
| "reward": 0.25078538805246353, |
| "reward_std": 0.17135849595069885, |
| "rewards/sudoku_reward_func": 0.25078538805246353, |
| "step": 2465, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024672336168084044, |
| "grad_norm": 226.1004638671875, |
| "learning_rate": 3e-06, |
| "loss": -22.9924, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.0024682341170585292, |
| "grad_norm": 418.1987609863281, |
| "learning_rate": 3e-06, |
| "loss": -30.068, |
| "step": 2467 |
| }, |
| { |
| "epoch": 0.002469234617308654, |
| "grad_norm": 180.6246795654297, |
| "learning_rate": 3e-06, |
| "loss": -13.8185, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.0024702351175587794, |
| "grad_norm": 309.594970703125, |
| "learning_rate": 3e-06, |
| "loss": -30.2354, |
| "step": 2469 |
| }, |
| { |
| "epoch": 0.0024712356178089043, |
| "grad_norm": 376.1004333496094, |
| "learning_rate": 3e-06, |
| "loss": -21.4337, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.0024722361180590296, |
| "grad_norm": 279.6305236816406, |
| "learning_rate": 3e-06, |
| "loss": -32.3534, |
| "step": 2471 |
| }, |
| { |
| "epoch": 0.0024732366183091545, |
| "grad_norm": 546.125732421875, |
| "learning_rate": 3e-06, |
| "loss": -17.4937, |
| "step": 2472 |
| }, |
| { |
| "completion_length": 253.62500762939453, |
| "epoch": 0.00247423711855928, |
| "grad_norm": 273.2940368652344, |
| "learning_rate": 3e-06, |
| "loss": 7.4418, |
| "reward": 0.2406994178891182, |
| "reward_std": 0.1260107159614563, |
| "rewards/sudoku_reward_func": 0.2406994178891182, |
| "step": 2473, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024752376188094047, |
| "grad_norm": 169.90762329101562, |
| "learning_rate": 3e-06, |
| "loss": 14.9494, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.0024762381190595296, |
| "grad_norm": 231.994140625, |
| "learning_rate": 3e-06, |
| "loss": 5.9744, |
| "step": 2475 |
| }, |
| { |
| "epoch": 0.002477238619309655, |
| "grad_norm": 154.6354522705078, |
| "learning_rate": 3e-06, |
| "loss": 9.9395, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.0024782391195597797, |
| "grad_norm": 459.22686767578125, |
| "learning_rate": 3e-06, |
| "loss": 5.8099, |
| "step": 2477 |
| }, |
| { |
| "epoch": 0.002479239619809905, |
| "grad_norm": 220.0775909423828, |
| "learning_rate": 3e-06, |
| "loss": 13.5452, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.00248024012006003, |
| "grad_norm": 285.4416198730469, |
| "learning_rate": 3e-06, |
| "loss": 5.4125, |
| "step": 2479 |
| }, |
| { |
| "epoch": 0.0024812406203101552, |
| "grad_norm": 148.00022888183594, |
| "learning_rate": 3e-06, |
| "loss": 8.1059, |
| "step": 2480 |
| }, |
| { |
| "completion_length": 255.95834350585938, |
| "epoch": 0.00248224112056028, |
| "grad_norm": 267.9368591308594, |
| "learning_rate": 3e-06, |
| "loss": 7.4214, |
| "reward": 0.23054204881191254, |
| "reward_std": 0.1383199244737625, |
| "rewards/sudoku_reward_func": 0.23054202646017075, |
| "step": 2481, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002483241620810405, |
| "grad_norm": 284.4049072265625, |
| "learning_rate": 3e-06, |
| "loss": 2.4674, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.0024842421210605303, |
| "grad_norm": 191.92669677734375, |
| "learning_rate": 3e-06, |
| "loss": 3.3142, |
| "step": 2483 |
| }, |
| { |
| "epoch": 0.002485242621310655, |
| "grad_norm": 337.16033935546875, |
| "learning_rate": 3e-06, |
| "loss": 2.5026, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.0024862431215607805, |
| "grad_norm": 586.3452758789062, |
| "learning_rate": 3e-06, |
| "loss": 2.5096, |
| "step": 2485 |
| }, |
| { |
| "epoch": 0.0024872436218109054, |
| "grad_norm": 344.7769775390625, |
| "learning_rate": 3e-06, |
| "loss": -1.1625, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.0024882441220610307, |
| "grad_norm": 227.2752227783203, |
| "learning_rate": 3e-06, |
| "loss": 0.9586, |
| "step": 2487 |
| }, |
| { |
| "epoch": 0.0024892446223111556, |
| "grad_norm": 303.7214050292969, |
| "learning_rate": 3e-06, |
| "loss": -1.7598, |
| "step": 2488 |
| }, |
| { |
| "completion_length": 255.9166717529297, |
| "epoch": 0.0024902451225612804, |
| "grad_norm": 161.27249145507812, |
| "learning_rate": 3e-06, |
| "loss": -5.8149, |
| "reward": 0.2981564402580261, |
| "reward_std": 0.16518419981002808, |
| "rewards/sudoku_reward_func": 0.29815642535686493, |
| "step": 2489, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.0024912456228114058, |
| "grad_norm": 188.5457763671875, |
| "learning_rate": 3e-06, |
| "loss": -6.1626, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.0024922461230615306, |
| "grad_norm": 205.0718231201172, |
| "learning_rate": 3e-06, |
| "loss": -6.9568, |
| "step": 2491 |
| }, |
| { |
| "epoch": 0.002493246623311656, |
| "grad_norm": 155.51025390625, |
| "learning_rate": 3e-06, |
| "loss": -9.4797, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.002494247123561781, |
| "grad_norm": 152.71153259277344, |
| "learning_rate": 3e-06, |
| "loss": -8.4538, |
| "step": 2493 |
| }, |
| { |
| "epoch": 0.002495247623811906, |
| "grad_norm": 125.3695068359375, |
| "learning_rate": 3e-06, |
| "loss": -6.4118, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.002496248124062031, |
| "grad_norm": 128.93853759765625, |
| "learning_rate": 3e-06, |
| "loss": -6.2118, |
| "step": 2495 |
| }, |
| { |
| "epoch": 0.002497248624312156, |
| "grad_norm": 181.36378479003906, |
| "learning_rate": 3e-06, |
| "loss": -9.2876, |
| "step": 2496 |
| }, |
| { |
| "completion_length": 255.8541717529297, |
| "epoch": 0.002498249124562281, |
| "grad_norm": 154.56900024414062, |
| "learning_rate": 3e-06, |
| "loss": -5.0903, |
| "reward": 0.29621364921331406, |
| "reward_std": 0.14325331896543503, |
| "rewards/sudoku_reward_func": 0.29621363431215286, |
| "step": 2497, |
| "zero_std_ratio": 0.0 |
| }, |
| { |
| "epoch": 0.002499249624812406, |
| "grad_norm": 141.0087127685547, |
| "learning_rate": 3e-06, |
| "loss": -9.3757, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.0025002501250625314, |
| "grad_norm": 119.01480102539062, |
| "learning_rate": 3e-06, |
| "loss": -3.5487, |
| "step": 2499 |
| }, |
| { |
| "epoch": 0.0025012506253126563, |
| "grad_norm": 170.59515380859375, |
| "learning_rate": 3e-06, |
| "loss": -3.0218, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 9995000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|