diff --git "a/weights/David-decoupled-deep_efficiency/20251012_221046/best_model_acc66.74_metadata.json" "b/weights/David-decoupled-deep_efficiency/20251012_221046/best_model_acc66.74_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-decoupled-deep_efficiency/20251012_221046/best_model_acc66.74_metadata.json" @@ -0,0 +1,620 @@ +{ + "epoch": 8, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(33786.)", + "exp_avg": "tensor([[-1.6394e-06, -3.8026e-05, -4.8285e-06, ..., 3.2422e-05,\n 1.0196e-05, -4.4090e-06],\n [-4.0207e-07, 7.2419e-05, -1.3794e-05, ..., -5.0495e-06,\n 3.1762e-06, -1.2588e-05],\n [ 8.8791e-05, -1.2570e-05, -1.9372e-05, ..., 1.4037e-05,\n 3.3276e-06, -1.3505e-05],\n ...,\n [-3.3919e-05, 4.9065e-05, -6.2633e-05, ..., -1.2544e-06,\n -9.7273e-06, -2.5956e-05],\n [ 6.6792e-06, -4.1182e-06, 3.7227e-06, ..., 1.6065e-05,\n -2.5623e-05, 2.8550e-06],\n [-3.3091e-06, -1.4320e-05, 6.8647e-06, ..., -3.0301e-07,\n 1.5650e-05, -3.8918e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.0501e-09, 5.8487e-08, 1.7338e-08, ..., 6.6077e-09, 3.7991e-09,\n 5.1225e-09],\n [2.0438e-08, 8.8907e-08, 1.7759e-08, ..., 3.1629e-08, 9.4188e-09,\n 1.0735e-08],\n [4.7898e-08, 3.9938e-08, 6.2454e-09, ..., 1.0952e-08, 5.7753e-09,\n 6.9006e-09],\n ...,\n [1.3479e-08, 4.2680e-08, 2.2714e-08, ..., 1.1680e-08, 4.5987e-09,\n 1.5629e-08],\n [3.4114e-08, 4.1601e-08, 1.8717e-08, ..., 3.1649e-08, 9.3334e-09,\n 8.6726e-09],\n [4.7327e-09, 2.7679e-08, 2.8108e-08, ..., 3.6042e-09, 1.8280e-09,\n 3.9578e-09]], device='cuda:0')" + }, + "1": { + "step": "tensor(33786.)", + "exp_avg": "tensor([-4.0339e-06, 5.9242e-04, 1.0752e-03, -9.5675e-04, -6.6691e-05,\n -1.9037e-03, -2.8900e-04, -9.4883e-04, 9.6531e-04, 2.6507e-04,\n -3.4771e-04, 5.6052e-45, 5.5824e-04, -1.3004e-04, 6.9080e-04,\n 7.7258e-05, 1.3615e-03, 8.1525e-04, 1.7727e-04, -4.3447e-04,\n 3.9531e-04, -3.3021e-04, 6.6750e-04, -7.4431e-04, -2.0141e-04,\n 1.5002e-04, 1.3105e-03, -5.5863e-04, 3.7235e-04, -1.0168e-03,\n 5.1992e-04, -5.2440e-04, -1.7719e-03, 4.1518e-04, 6.3885e-05,\n -1.9159e-03, -4.6765e-04, 4.6535e-04, 3.5217e-04, -7.5159e-04,\n 4.3766e-04, -9.2808e-05, 2.8278e-04, 1.1532e-03, -1.4708e-03,\n -3.4939e-03, 6.8811e-04, 5.9450e-04, 2.8058e-04, 4.1925e-05,\n 3.4359e-06, -1.3315e-04, 2.9793e-04, -1.3120e-04, 5.7296e-04,\n 4.4198e-04, 9.8266e-04, -5.2315e-04, 1.0114e-03, -6.8252e-05,\n -3.6388e-05, 3.1386e-04, 3.1240e-04, -1.2510e-05, 7.1987e-04,\n 2.9083e-04, -2.4639e-04, 5.6052e-45, -2.8306e-04, -8.4460e-04,\n -5.1949e-05, 1.3368e-03, 8.5857e-04, 1.0147e-04, 1.8772e-04,\n 8.9980e-04, -1.8657e-04, -7.3899e-04, -7.1244e-04, 5.9759e-04,\n -1.4636e-04, 1.3304e-03, 1.1787e-03, 2.5968e-04, -1.5194e-03,\n 1.9516e-05, 8.7140e-04, -1.2961e-04, -2.1418e-03, 5.6052e-45,\n 2.0293e-03, 5.5527e-04, -6.2737e-05, -7.2766e-04, 7.4661e-04,\n 1.3575e-03, 7.7071e-04, 6.2842e-05, -8.6233e-05, 2.2487e-05,\n 1.5176e-03, -3.4434e-03, 2.0854e-03, -2.4396e-04, 5.2260e-04,\n 1.1891e-05, 6.2593e-04, 1.0886e-04, -1.0934e-04, 4.2720e-04,\n -4.0998e-04, -5.9052e-04, 1.6056e-04, -1.5818e-04, 3.1156e-04,\n -7.9609e-06, 3.6849e-04, 7.8989e-04, 2.0345e-03, -3.2618e-04,\n -1.5152e-03, 3.4039e-04, 1.0253e-04, 1.5395e-04, -5.4253e-04,\n -6.4730e-04, -1.2357e-03, 1.2562e-04, -4.3661e-04, -1.1204e-03,\n -4.8045e-06, 7.8375e-04, -7.8654e-05, 1.0913e-04, 5.4603e-05,\n 1.0544e-04, -1.0438e-03, -7.6750e-04, -3.3474e-04, 9.4244e-04,\n 6.7824e-05, 4.3745e-04, -4.0682e-05, -7.3643e-04, -1.9351e-04,\n 2.9428e-04, -4.8356e-04, -1.1146e-04, -4.2581e-04, -4.9060e-05,\n -1.7179e-03, -2.8707e-04, -4.6252e-04, 2.6362e-04, 1.3544e-03,\n 2.9169e-04, 1.1523e-04, 6.4400e-04, -7.3098e-04, 1.3910e-03,\n -1.0513e-03, -6.4991e-04, 1.1615e-03, -5.2980e-04, 1.2444e-03,\n 1.2754e-04, -1.5175e-04, 2.1790e-04, 9.7570e-04, -2.0253e-04,\n -4.3171e-04, 4.7728e-04, -4.3448e-05, 1.0635e-03, -7.7924e-04,\n -4.2044e-04, 5.2382e-04, -2.9214e-03, -3.2888e-04, 5.1779e-05,\n 9.6847e-04, -4.1934e-04, 2.8519e-04, 5.1442e-04, -1.5605e-03,\n 7.2590e-04, 8.1123e-04, -4.6783e-04, 1.5748e-05, 9.4646e-04,\n -7.8046e-04, 8.4511e-04, 1.0928e-03, -2.3252e-04, -2.1461e-03,\n 1.5352e-03, 1.0056e-04, 5.9744e-04, 4.7877e-04, 8.1881e-04,\n -1.7217e-03, 7.1903e-05, -1.1564e-03, 5.6423e-04, 1.5858e-04,\n 3.1149e-06, 3.0255e-04, 5.6052e-45, -7.5191e-04, -1.9210e-04,\n 9.8367e-04, 1.3492e-03, 2.0052e-04, -3.4015e-04, 1.7229e-04,\n -5.4486e-04, -2.1596e-04, -3.1453e-04, -4.0070e-04, 1.5173e-03,\n 4.7524e-04, 5.6572e-05, 6.3510e-04, 1.0901e-03, 1.7242e-03,\n -6.3319e-05, 7.4630e-04, -7.0196e-05, -1.4605e-03, 1.0191e-05,\n -1.9708e-04, -2.6454e-04, 5.1317e-04, -5.4319e-04, -5.3863e-04,\n -1.8088e-03, -1.3365e-03, 8.3811e-06, 2.2511e-04, -1.4822e-03,\n -8.6440e-05, -6.1446e-04, 5.3155e-04, -8.2047e-04, 8.3527e-04,\n 4.1679e-05, 2.4471e-04, -1.2676e-03, -4.2869e-04, 3.6270e-04,\n 1.2729e-03, 3.2220e-05, 5.4709e-06, 1.1272e-03, 2.0031e-04,\n -1.1779e-03, -4.3564e-04, -8.4553e-04, -2.6591e-04, 1.4008e-03,\n 1.5146e-04, 3.5410e-05, -1.2375e-03, 3.2360e-04, 1.0627e-04,\n 1.2645e-03, 1.6330e-05, 1.0601e-03, -1.8087e-05, 3.1859e-04,\n 8.9551e-04, -1.2502e-03, 1.3298e-03, -9.6777e-04, -4.2762e-04,\n -1.8953e-04, -6.9712e-04, -2.8317e-04, 2.3365e-04, -1.4715e-03,\n -2.8077e-04, -7.0023e-04, 8.9449e-04, 2.8343e-05, 5.4728e-04,\n 4.9478e-04, 4.2225e-04, 5.4247e-04, -1.1133e-03, -2.8234e-04,\n -6.3094e-04, 4.7133e-04, 3.1595e-04, 9.7451e-05, 1.9941e-03,\n 3.6467e-04, 6.1574e-05, 6.8337e-06, -3.1852e-04, 3.9203e-04,\n 7.3021e-04, -7.3061e-04, 5.3278e-04, 2.0965e-07, 2.8646e-04,\n 9.6758e-06, 2.1013e-04, -2.8458e-04, 7.9361e-04, -2.1819e-03,\n -1.7363e-04, -3.2784e-03, -1.6171e-04, 9.2875e-05, 2.7809e-04,\n -4.9095e-04, 8.9198e-05, 1.4745e-03, 9.5171e-04, 1.5024e-03,\n -9.6656e-05, 1.0135e-03, -3.4317e-04, 5.8850e-04, -1.7785e-05,\n -2.7441e-05, 8.8213e-04, 1.2508e-03, 1.2142e-03, -1.1567e-03,\n -4.6535e-04, -4.1144e-05, -2.6458e-05, -8.5577e-05, 9.7329e-05,\n 4.9858e-04, 3.9194e-03, -1.4818e-03, 1.1733e-03, 5.0527e-04,\n -1.9036e-04, 1.0213e-03, 1.1197e-04, 1.5305e-04, 9.3244e-04,\n 1.2435e-04, -7.1889e-04, 9.0368e-04, 3.8639e-04, -7.5432e-04,\n 2.9649e-04, 1.3189e-03, 1.6637e-03, -6.8998e-05, -2.1871e-03,\n 4.1588e-04, 1.7975e-03, 2.3114e-05, -1.0753e-05, 1.3467e-03,\n 3.3040e-04, 1.4809e-03, -8.6291e-04, -6.2900e-04, 6.1473e-04,\n 4.5672e-05, -3.2129e-04, 2.1930e-04, 8.6363e-04, 1.1410e-03,\n 7.9236e-04, 7.7677e-04, 2.8648e-04, -1.1044e-03, 8.2117e-04,\n 7.7490e-04, -9.5172e-04, -8.4918e-04, -7.3836e-04, 1.2836e-03,\n 3.9567e-04, 6.9437e-05, -3.6026e-05, 2.5980e-04, 1.1529e-04,\n 4.3587e-05, 1.3942e-03, 1.3909e-03, -1.3043e-04, -3.0625e-04,\n -2.6236e-04, 9.0426e-04, 6.0376e-04, -6.5414e-04, 4.9176e-05,\n 7.5315e-04, -3.0633e-04, 1.0614e-04, -3.1491e-04, -8.7845e-04,\n -3.8886e-04, -3.4395e-04, 1.2642e-03, -8.9264e-05, -3.9307e-04,\n 6.6711e-04, 6.3614e-04, -2.1865e-03, 1.0177e-03, 7.9811e-04,\n 3.3554e-04, -8.9764e-04, -1.6820e-03, 5.2156e-04, -1.3191e-04,\n 5.8689e-04, -1.5524e-03, 3.7147e-04, -4.9045e-04, -1.1060e-04,\n -3.0847e-04, -3.0988e-04, -2.1990e-04, 2.7889e-04, -1.4630e-04,\n -5.7518e-04, -7.4132e-04, -4.8705e-04, 1.4009e-03, 5.6627e-04,\n 5.0587e-04, -7.0174e-04, 5.6052e-45, -6.4828e-04, -1.2046e-04,\n -3.6537e-04, 2.8875e-04, 1.2973e-03, -2.4725e-03, 4.2367e-03,\n -1.7669e-04, -5.6205e-04, -3.6037e-04, 7.8473e-04, -1.3725e-03,\n -2.8177e-03, 2.1811e-04, -2.3519e-04, 4.9812e-04, -4.8889e-04,\n -5.4955e-04, -4.8738e-04, 7.7285e-04, 1.4409e-03, 6.8287e-04,\n 1.3471e-03, -6.4234e-04, 1.3929e-03, -3.3691e-05, -8.2486e-04,\n 5.6052e-45, -5.6507e-04, -1.6254e-04, -1.0697e-03, 1.3810e-03,\n 1.2069e-03, -1.0093e-03, -2.0096e-03, -3.4083e-04, 8.0373e-05,\n -5.8986e-04, -1.6488e-04, -3.9414e-04, 7.2422e-04, 7.4750e-04,\n 1.3486e-03, 1.4817e-04, -4.9097e-04, 2.0856e-04, 1.2559e-03,\n 6.2059e-04, 5.0269e-04, -3.5859e-04, -1.2624e-03, -8.0909e-05,\n 5.8094e-04, -5.5847e-04, -1.1054e-03, 5.1668e-04, 4.5760e-04,\n -8.2050e-04, 3.6376e-04, -1.1773e-03, -2.1717e-04, -5.7750e-04,\n -9.6495e-04, -1.2538e-04, -4.2891e-06, -3.3069e-04, 2.0589e-04,\n 6.8622e-04, 2.3542e-04, 3.3869e-05, -1.3100e-03, 8.2859e-04,\n 1.6970e-03, -1.4995e-03, -8.2199e-04, 5.4590e-05, 6.9118e-04,\n -6.6834e-04, -7.7697e-05, -1.0526e-06, 5.2214e-04, -1.6642e-03,\n -3.7569e-04, 6.2321e-04, -2.6433e-04, 4.0100e-04, 5.8378e-04,\n -8.2047e-04, 8.3949e-05, 5.3031e-04, 4.0429e-04, 1.5765e-03,\n 7.5046e-05, -2.6393e-03, 5.3357e-04, -1.1110e-04, 6.8526e-05,\n -1.2480e-03, -1.8364e-03, -1.1646e-03, 1.9122e-04, 4.6967e-04,\n -2.7879e-04, 1.3220e-03, 1.2774e-03, -1.1928e-03, 6.1875e-04,\n 1.8462e-03, 7.2332e-04, 1.5091e-04, -7.0243e-04, 6.4383e-05,\n -2.5307e-03, -2.9026e-04, 4.9441e-05, -7.2691e-04, 1.0037e-03,\n -5.4452e-04, -4.4410e-04, -3.7181e-05, -2.7195e-03, -9.0455e-04,\n -2.6468e-04, -3.5092e-04, 7.0757e-04, 1.8391e-03, -2.7227e-04,\n 6.8014e-04, 1.9233e-03, -1.8151e-04, 7.0822e-04, 5.7628e-04,\n -4.8586e-04, -3.4008e-04, 1.4003e-03, 5.0269e-05, 2.7996e-04,\n 2.6950e-04, -8.1356e-04, -1.0163e-03, 3.0067e-04, -1.2687e-03,\n -1.7655e-03, -8.7908e-04, 3.6038e-05, -1.0393e-03, 9.6364e-04,\n 5.3531e-05, 2.4040e-04, -3.2868e-04, -8.7244e-04, 3.0748e-04,\n 4.5376e-04, 1.0608e-04, -2.0348e-03, -1.8228e-04, -8.6443e-04,\n -3.5765e-04, -7.3776e-04, -3.7436e-04, 2.9605e-04, 9.4826e-04,\n 6.3248e-04, -1.7585e-03, -1.9468e-03, 9.0905e-04, 5.4222e-04,\n 1.1233e-03, -8.4390e-04, -5.1126e-04, 2.2923e-04, -7.0813e-04,\n -7.0047e-04, 9.4897e-04, -2.4089e-04, -2.3130e-04, -1.2010e-03,\n -1.0229e-03, -3.2701e-05, 6.6510e-04, -8.6070e-04, 4.0318e-04,\n 4.4579e-04, -4.6055e-04, -9.2496e-06, 3.9548e-04, -1.7452e-03,\n 3.7614e-04, -9.9871e-04, -1.0969e-03, 7.9111e-05, 1.5769e-04,\n 1.2805e-03, 1.3920e-03, 7.5946e-04, 1.4639e-04, 2.6323e-04,\n -7.0206e-04, -1.5991e-03, -6.7996e-04, 9.7421e-04, 5.9832e-04,\n -4.9618e-04, -4.9720e-04, -6.2379e-05, 5.0708e-05, 1.8249e-04],\n device='cuda:0')", + "exp_avg_sq": "tensor([5.1039e-06, 1.6844e-05, 1.1349e-05, 1.0202e-05, 3.8665e-06, 2.7928e-05,\n 1.1958e-05, 1.7620e-05, 1.3196e-05, 5.0646e-06, 1.6238e-05, 8.7581e-21,\n 2.1789e-05, 7.8492e-06, 6.8444e-05, 5.7388e-06, 1.7487e-05, 1.2856e-05,\n 2.2960e-05, 1.6138e-05, 9.1322e-06, 4.7409e-06, 9.7427e-06, 1.5163e-05,\n 1.2895e-05, 4.6107e-06, 1.3241e-05, 6.6521e-06, 1.6980e-05, 1.0964e-05,\n 1.0168e-05, 2.0315e-05, 1.3815e-05, 6.0580e-06, 1.0229e-05, 8.6864e-06,\n 1.4274e-05, 5.9275e-06, 1.9056e-05, 1.2498e-05, 1.3307e-05, 7.6425e-06,\n 1.0847e-05, 4.3766e-06, 2.1233e-05, 3.0763e-05, 1.4161e-05, 5.8745e-06,\n 7.9466e-06, 1.3048e-05, 1.7543e-05, 1.3689e-05, 4.0187e-06, 2.3271e-05,\n 1.8566e-05, 3.2914e-05, 8.5647e-06, 1.1999e-05, 7.1693e-06, 8.0755e-06,\n 8.8710e-06, 2.5325e-05, 6.4026e-06, 3.9778e-06, 1.6477e-05, 2.1253e-05,\n 4.8416e-06, 3.7069e-21, 9.7922e-06, 1.6911e-05, 7.5696e-06, 1.2292e-05,\n 2.5099e-05, 7.9252e-06, 9.2001e-06, 9.8273e-06, 2.6553e-06, 1.2536e-05,\n 1.3315e-05, 9.2888e-06, 7.5241e-06, 1.2510e-05, 2.7864e-05, 1.7603e-05,\n 1.7755e-05, 9.8679e-06, 1.5049e-05, 1.6848e-05, 2.2070e-05, 5.5911e-22,\n 1.3651e-05, 1.4030e-05, 1.4028e-05, 9.3181e-06, 1.5067e-05, 9.1728e-06,\n 9.7203e-06, 1.6654e-05, 9.2620e-06, 1.3936e-05, 8.0419e-06, 2.2889e-05,\n 4.3298e-05, 7.7326e-06, 8.0109e-06, 1.0855e-05, 1.0380e-05, 1.1583e-05,\n 7.0651e-06, 1.2120e-05, 8.0337e-06, 1.4584e-05, 1.2716e-05, 6.4330e-06,\n 4.6553e-06, 1.6283e-05, 3.4080e-06, 6.3650e-06, 1.4707e-05, 4.1659e-06,\n 1.2648e-05, 9.5571e-06, 3.7583e-06, 4.9270e-06, 8.3064e-06, 1.2938e-05,\n 2.0073e-05, 1.5567e-05, 1.3170e-05, 1.4016e-05, 6.7981e-06, 8.9955e-06,\n 3.5251e-06, 1.2036e-05, 1.1626e-05, 4.0784e-06, 1.4544e-05, 6.7512e-06,\n 1.4595e-05, 1.6444e-05, 1.5149e-05, 3.7404e-06, 5.6182e-06, 1.6103e-05,\n 5.9731e-06, 9.2156e-06, 1.0325e-05, 5.4659e-06, 6.1682e-06, 1.7581e-05,\n 3.2958e-05, 5.2698e-06, 2.8427e-06, 4.4710e-06, 1.4459e-05, 8.2498e-06,\n 8.8987e-06, 3.9310e-06, 1.5486e-05, 2.5017e-05, 1.0641e-05, 1.1028e-05,\n 4.1401e-06, 6.1330e-06, 1.3861e-05, 1.2649e-05, 3.5461e-06, 1.0779e-05,\n 8.9499e-06, 1.7170e-05, 1.1069e-05, 4.8376e-06, 1.3990e-05, 8.0528e-06,\n 2.0158e-05, 1.6413e-05, 3.7774e-06, 1.9547e-05, 1.1732e-05, 8.2701e-06,\n 1.1915e-05, 1.8221e-05, 1.2448e-05, 1.3936e-05, 1.0142e-05, 1.6468e-05,\n 2.2834e-05, 4.5347e-06, 7.1773e-06, 1.3886e-05, 1.8003e-05, 6.7908e-06,\n 4.4669e-06, 9.1040e-06, 1.0427e-05, 1.7811e-05, 3.3568e-05, 1.1405e-05,\n 1.3263e-05, 1.8077e-05, 7.8022e-06, 9.1804e-06, 1.1198e-05, 1.3094e-05,\n 2.2439e-05, 5.9532e-06, 1.3076e-05, 1.0268e-20, 1.0906e-05, 1.2872e-05,\n 8.1847e-06, 1.6282e-05, 1.5435e-05, 4.6707e-06, 7.3460e-06, 1.9579e-05,\n 7.7993e-06, 2.6606e-06, 9.4620e-06, 4.5769e-06, 7.6716e-06, 9.8244e-06,\n 3.9557e-06, 9.9686e-06, 1.4005e-05, 7.2584e-06, 3.4116e-06, 1.3974e-05,\n 1.2488e-05, 5.3437e-06, 2.7865e-06, 8.7117e-06, 1.1266e-05, 9.2185e-06,\n 5.1323e-06, 1.2486e-05, 1.3186e-05, 7.3764e-06, 6.4823e-06, 1.5473e-05,\n 1.5758e-05, 7.3470e-06, 1.4510e-05, 1.4122e-05, 1.1979e-05, 8.4953e-06,\n 1.4092e-05, 1.0035e-05, 4.2370e-06, 2.0717e-05, 2.1907e-05, 1.4445e-05,\n 1.2660e-05, 1.5435e-05, 7.2685e-06, 2.6638e-05, 1.5652e-05, 9.8579e-06,\n 5.9333e-06, 4.2884e-06, 1.2851e-05, 8.4710e-06, 3.8682e-05, 4.5599e-06,\n 9.3781e-06, 2.5364e-05, 1.8260e-05, 9.7996e-06, 7.8641e-06, 4.6240e-06,\n 1.7231e-05, 8.6987e-06, 1.9373e-05, 1.4952e-05, 2.7557e-05, 4.4518e-06,\n 1.1681e-05, 1.1967e-05, 4.0197e-06, 9.9735e-06, 1.6875e-05, 6.9866e-06,\n 1.3057e-05, 1.1615e-05, 1.1458e-05, 6.5119e-06, 1.5066e-05, 8.8979e-06,\n 4.4230e-06, 1.1890e-05, 4.3585e-06, 1.4422e-05, 4.1061e-06, 9.1703e-06,\n 9.6130e-06, 3.4733e-06, 8.8687e-06, 3.4439e-05, 1.3732e-05, 5.6882e-06,\n 1.6301e-05, 1.4485e-05, 8.7020e-06, 1.3974e-05, 8.1184e-06, 5.4976e-06,\n 4.9106e-06, 1.5043e-05, 1.4785e-05, 2.9431e-05, 5.1649e-06, 2.4898e-05,\n 7.8358e-06, 1.2661e-05, 1.3856e-05, 1.6359e-05, 1.4832e-05, 1.0883e-05,\n 1.2211e-05, 2.9718e-05, 2.2722e-05, 1.1062e-05, 8.0810e-06, 7.1401e-06,\n 9.3250e-06, 4.7733e-06, 2.1673e-05, 1.7198e-05, 9.3717e-06, 1.9322e-05,\n 1.5372e-05, 7.6321e-06, 1.1217e-05, 1.0297e-05, 1.8389e-05, 1.5299e-05,\n 2.9040e-05, 1.7535e-05, 1.8973e-05, 1.7679e-05, 1.6797e-05, 4.6352e-06,\n 1.5748e-05, 1.3033e-05, 2.1003e-05, 1.3559e-05, 2.1409e-05, 2.5643e-05,\n 3.0186e-06, 9.0163e-06, 1.6015e-05, 1.6511e-05, 1.5690e-05, 1.3199e-05,\n 5.3416e-05, 1.1792e-05, 3.2892e-05, 5.2823e-06, 1.5155e-05, 1.9167e-05,\n 9.2996e-06, 1.2524e-05, 7.1074e-06, 2.2040e-05, 1.1564e-05, 1.1846e-05,\n 3.0581e-06, 5.9256e-06, 8.1443e-06, 2.1594e-05, 2.6339e-05, 1.4058e-05,\n 6.9488e-06, 1.0019e-05, 5.8573e-06, 1.6678e-05, 1.0988e-05, 4.6071e-06,\n 1.3563e-05, 3.7428e-05, 1.1435e-05, 1.2151e-05, 1.0492e-05, 6.3555e-06,\n 6.6253e-06, 1.0976e-05, 1.5448e-05, 9.7487e-06, 1.1776e-05, 2.0529e-06,\n 6.5797e-06, 1.1337e-05, 6.8409e-06, 6.3109e-06, 8.3024e-06, 1.6525e-05,\n 5.4155e-06, 1.2044e-05, 4.3817e-06, 1.0512e-05, 2.4460e-05, 2.5750e-05,\n 1.2361e-05, 1.4566e-05, 1.1948e-05, 1.7283e-05, 3.2147e-05, 1.8369e-05,\n 9.4166e-06, 8.3549e-06, 1.5286e-05, 7.2048e-06, 1.2996e-05, 1.0670e-05,\n 4.0321e-06, 9.8192e-06, 1.5613e-05, 1.4090e-05, 3.8047e-06, 8.7863e-06,\n 1.9608e-05, 1.0013e-05, 9.8066e-06, 1.0791e-05, 3.2615e-06, 1.2147e-05,\n 1.7457e-05, 1.0502e-05, 1.4863e-05, 5.1899e-06, 1.1839e-05, 9.3048e-06,\n 8.4789e-21, 9.3467e-06, 5.3674e-06, 2.1255e-05, 1.4044e-05, 2.3155e-05,\n 1.4090e-05, 4.1395e-05, 7.2055e-06, 8.1872e-06, 1.7232e-05, 2.0485e-05,\n 1.1469e-05, 1.4775e-05, 1.2535e-05, 1.8492e-05, 1.3837e-05, 1.2076e-05,\n 8.5511e-06, 1.4501e-05, 8.0952e-06, 1.9081e-05, 1.1057e-05, 1.6553e-05,\n 9.4799e-06, 9.8653e-06, 9.1204e-06, 5.0890e-06, 2.9064e-21, 1.1232e-05,\n 5.9084e-06, 5.4480e-06, 1.5955e-05, 2.6550e-05, 1.8315e-05, 1.5123e-05,\n 4.2298e-06, 8.5829e-06, 1.4692e-05, 1.2549e-05, 4.3609e-06, 1.1869e-05,\n 7.5232e-06, 2.8525e-05, 1.0939e-05, 1.3312e-05, 1.3339e-05, 1.5584e-05,\n 1.9812e-05, 1.2272e-05, 1.2921e-05, 2.2517e-05, 1.9562e-05, 7.4217e-06,\n 1.6121e-05, 6.4936e-06, 1.1587e-05, 1.9648e-05, 1.5663e-05, 9.9906e-06,\n 1.1590e-05, 1.2661e-05, 1.0737e-05, 1.2943e-05, 9.1609e-06, 1.3432e-05,\n 3.4771e-06, 2.7022e-05, 9.6360e-06, 1.0424e-05, 5.3539e-06, 2.5467e-05,\n 1.3216e-05, 1.8709e-05, 1.7720e-05, 1.1568e-05, 6.5544e-06, 8.1851e-06,\n 8.2161e-06, 1.0797e-05, 1.4950e-05, 1.1168e-05, 3.4804e-05, 1.7811e-05,\n 1.2106e-05, 1.2128e-05, 1.5853e-05, 6.2445e-06, 4.2327e-05, 1.7157e-05,\n 1.5525e-05, 1.2623e-05, 2.1454e-05, 5.7911e-06, 1.6883e-05, 6.8209e-06,\n 1.5051e-05, 1.5794e-06, 1.7817e-05, 1.6352e-05, 6.4334e-06, 5.7055e-06,\n 1.0604e-05, 2.2507e-05, 1.9606e-05, 1.9684e-05, 4.5792e-05, 3.3550e-05,\n 1.4975e-05, 1.2729e-05, 9.5270e-06, 1.6064e-05, 9.1711e-06, 3.5579e-05,\n 3.2574e-06, 7.3700e-06, 8.1747e-06, 1.0451e-05, 5.9933e-06, 3.9388e-05,\n 5.7528e-06, 1.5800e-05, 2.4416e-05, 7.4117e-06, 8.2154e-06, 2.3299e-05,\n 2.1561e-05, 1.7777e-05, 6.0225e-06, 1.9000e-05, 1.2473e-05, 3.0695e-05,\n 3.2102e-06, 8.6050e-06, 1.2303e-05, 2.0003e-05, 6.2091e-06, 5.3596e-06,\n 7.2047e-06, 7.7767e-06, 1.9062e-05, 1.1425e-05, 1.4523e-05, 1.2683e-05,\n 1.2903e-05, 5.5083e-06, 1.0455e-05, 9.1924e-06, 5.0042e-06, 4.9532e-06,\n 8.8151e-06, 1.6384e-05, 1.3066e-05, 1.7250e-05, 5.1012e-06, 1.5093e-05,\n 2.3492e-05, 2.7971e-05, 6.5206e-06, 5.9623e-06, 9.6332e-06, 1.2835e-05,\n 1.3602e-05, 1.6708e-05, 1.1509e-05, 1.4753e-05, 5.2102e-06, 1.0058e-05,\n 1.1959e-05, 1.5610e-05, 9.8800e-06, 1.2453e-05, 5.0011e-06, 8.6815e-06,\n 6.5719e-06, 4.1686e-05, 1.2278e-05, 1.1322e-05, 1.3211e-05, 8.4233e-06,\n 8.0337e-06, 2.7005e-05, 1.5327e-05, 9.6980e-06, 5.4633e-06, 4.0255e-06,\n 7.7265e-06, 9.9653e-06, 5.1486e-06, 1.5473e-05, 2.4714e-05, 1.5402e-05,\n 1.2884e-05, 1.1174e-05, 1.9810e-05, 1.1660e-05, 5.0689e-06, 6.8094e-06,\n 1.1927e-05, 1.0219e-05, 1.4208e-05, 1.0883e-05, 1.0597e-05, 8.7541e-06,\n 1.4076e-05, 8.0370e-06, 1.5546e-05, 3.5346e-06], device='cuda:0')" + }, + "2": { + "step": "tensor(33786.)", + "exp_avg": "tensor([[ 7.1619e-07, -1.6094e-05, -8.2206e-07, ..., -2.8356e-06,\n 5.6743e-07, 7.3237e-07],\n [ 1.0164e-05, -5.3829e-06, -2.1776e-05, ..., -3.0650e-06,\n -9.4918e-06, 4.8473e-06],\n [-1.3460e-05, -2.9949e-06, 8.1895e-07, ..., -1.2049e-05,\n -6.4027e-06, -1.5268e-05],\n ...,\n [-1.6409e-06, -7.3322e-07, -2.6860e-05, ..., 4.3038e-06,\n -1.5065e-05, -9.1820e-06],\n [ 3.1231e-06, 4.9209e-06, 8.5058e-06, ..., 1.2098e-05,\n 1.5251e-06, -1.2794e-05],\n [-6.6194e-06, -3.0424e-05, -1.6452e-05, ..., -3.6614e-06,\n 8.8612e-06, -2.5940e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.4966e-10, 1.2463e-09, 7.9732e-10, ..., 9.4220e-10, 1.8568e-09,\n 3.3089e-10],\n [1.5804e-09, 2.5432e-09, 2.5776e-09, ..., 3.7723e-09, 2.0349e-09,\n 1.1582e-09],\n [1.5606e-09, 1.6883e-09, 2.2721e-09, ..., 2.4781e-09, 1.9057e-09,\n 1.6646e-09],\n ...,\n [1.3175e-09, 2.7645e-09, 6.0784e-09, ..., 2.2398e-09, 2.9048e-09,\n 1.2483e-09],\n [1.7969e-09, 2.7023e-09, 4.6891e-09, ..., 2.3321e-09, 1.7503e-09,\n 1.4869e-09],\n [1.5983e-09, 2.6043e-09, 2.2519e-09, ..., 3.3071e-09, 2.4005e-09,\n 6.3585e-10]], device='cuda:0')" + }, + "3": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-1.9679e-06, 8.8933e-07, -4.2866e-07, ..., 1.2438e-06,\n -6.1909e-08, -1.4501e-06],\n [-1.0680e-06, 4.4548e-07, -1.6250e-06, ..., 7.5484e-06,\n -7.4237e-07, 4.4129e-06],\n [ 2.7598e-06, 3.8903e-06, -1.3958e-06, ..., 4.0815e-06,\n -1.6781e-07, 4.6982e-06],\n ...,\n [ 5.6831e-07, 1.5876e-06, -6.7157e-07, ..., -6.3311e-07,\n 4.0129e-07, -1.3226e-06],\n [-2.5518e-07, 2.1459e-06, 5.5399e-06, ..., -4.3021e-06,\n -3.0552e-06, -3.6092e-06],\n [-1.0413e-07, -8.9051e-08, -1.0669e-06, ..., 3.8172e-06,\n 3.1807e-07, 9.7355e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.0348e-10, 3.4031e-10, 4.9100e-11, ..., 5.6713e-11, 5.7093e-11,\n 5.6738e-11],\n [4.2553e-10, 4.8233e-10, 2.2655e-10, ..., 8.8152e-10, 2.0563e-10,\n 2.8093e-10],\n [1.4453e-10, 1.1902e-10, 8.8595e-11, ..., 4.6251e-10, 9.5873e-11,\n 1.3474e-10],\n ...,\n [1.8654e-10, 4.0179e-10, 7.0094e-11, ..., 6.1672e-11, 1.1529e-10,\n 6.7821e-11],\n [1.0706e-09, 9.2283e-10, 7.5085e-10, ..., 1.6154e-10, 1.4217e-10,\n 1.7006e-10],\n [1.1749e-10, 9.6106e-11, 6.7534e-11, ..., 3.3452e-10, 5.9535e-11,\n 8.2078e-11]], device='cuda:0')" + }, + "4": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.2193e-05, 1.3959e-04, 1.0600e-04, ..., 1.0483e-05,\n -4.5671e-05, 2.6518e-05], device='cuda:0')", + "exp_avg_sq": "tensor([9.0436e-08, 3.4560e-07, 1.9164e-07, ..., 1.1860e-07, 2.1457e-07,\n 1.1560e-07], device='cuda:0')" + }, + "5": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 7.5994e-07, -1.2049e-06, -5.4432e-07, ..., -7.0945e-07,\n 9.3950e-07, 1.3510e-07],\n [-8.3006e-07, -3.4700e-07, -3.0154e-07, ..., -5.2112e-07,\n -1.0819e-06, 3.4717e-07],\n [-9.9258e-08, 5.9564e-07, -1.8814e-07, ..., 1.4109e-07,\n -1.5245e-06, 7.3616e-07],\n ...,\n [-8.0206e-07, -2.6299e-07, 3.4192e-07, ..., 2.0779e-07,\n 7.1647e-07, 2.8544e-07],\n [ 3.1108e-07, 1.3828e-06, 5.4419e-07, ..., -3.5915e-07,\n 7.0185e-07, 3.5279e-07],\n [ 9.8166e-07, -1.0071e-06, -2.2100e-06, ..., 4.7175e-07,\n 6.5054e-07, -2.6204e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4949e-11, 9.4716e-12, 8.7386e-12, ..., 4.2052e-12, 8.8007e-12,\n 3.3019e-12],\n [2.3192e-11, 1.2090e-11, 1.4451e-11, ..., 7.3714e-12, 9.7599e-12,\n 9.1832e-12],\n [1.0541e-11, 1.1903e-11, 1.7708e-11, ..., 1.1952e-11, 1.3027e-11,\n 8.8280e-12],\n ...,\n [1.5844e-11, 2.1395e-11, 1.4261e-11, ..., 1.0575e-11, 2.1393e-11,\n 8.9496e-12],\n [1.2180e-11, 1.1457e-11, 1.1219e-11, ..., 6.8574e-12, 2.2361e-11,\n 5.6715e-12],\n [2.7808e-11, 2.2153e-11, 1.9759e-11, ..., 1.0218e-11, 1.1666e-11,\n 8.5141e-12]], device='cuda:0')" + }, + "15": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.8620e-18], device='cuda:0')" + }, + "16": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([8.7945e-19, 4.1642e-19, 8.5873e-20], device='cuda:0')" + }, + "17": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([2.9698e-16, 1.8356e-17, 1.9719e-17, 1.4451e-17, 2.2153e-17],\n device='cuda:0')" + }, + "19": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3316e-22, 5.1639e-23, 1.9774e-22, ..., 1.7949e-22, 1.7824e-24,\n 7.6751e-23],\n [1.2594e-24, 1.1309e-24, 3.3854e-26, ..., 6.7019e-25, 9.6440e-27,\n 4.8159e-26],\n [2.3018e-21, 2.2086e-22, 6.9263e-22, ..., 9.2105e-22, 1.0094e-22,\n 3.1125e-22],\n ...,\n [6.8662e-24, 2.7693e-25, 1.8780e-24, ..., 1.5759e-23, 8.4324e-26,\n 7.1880e-27],\n [1.4201e-23, 6.1064e-25, 6.7086e-24, ..., 2.8740e-23, 6.5964e-25,\n 1.8950e-24],\n [4.2500e-23, 7.3307e-26, 5.1959e-23, ..., 3.2979e-23, 4.7065e-25,\n 2.0470e-23]], device='cuda:0')" + }, + "20": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.3981e-19, 1.5312e-22, 3.3692e-18, 1.2567e-21, 1.3744e-22, 2.5620e-19,\n 1.2541e-20, 8.6379e-19, 1.0562e-19, 2.6527e-21, 2.5786e-18, 9.3329e-19,\n 2.4364e-20, 4.5375e-22, 1.5055e-23, 4.6031e-19, 5.3622e-19, 7.3075e-20,\n 9.5263e-20, 6.7324e-22, 5.0693e-19, 3.2978e-20, 4.6554e-19, 5.1703e-21,\n 3.8830e-20, 1.8150e-18, 6.9516e-22, 3.8269e-21, 2.3958e-19, 5.4489e-20,\n 4.1253e-19, 8.4503e-19, 6.9971e-19, 1.5492e-19, 2.3029e-20, 1.7664e-20,\n 6.0770e-19, 2.5269e-19, 3.9040e-20, 5.4663e-20, 7.6947e-21, 3.3646e-19,\n 4.1614e-20, 9.9494e-19, 6.6503e-21, 2.6402e-20, 1.6732e-18, 2.5601e-22,\n 1.9931e-21, 5.2728e-20, 6.0623e-21, 2.5379e-20, 1.7329e-20, 1.1290e-19,\n 1.4661e-20, 8.6726e-22, 2.7272e-20, 6.2704e-21, 4.6114e-19, 1.2881e-18,\n 8.5168e-19, 1.0880e-20, 3.0442e-19, 6.3478e-21, 4.3186e-19, 4.7776e-19,\n 4.0415e-21, 1.2936e-20, 2.9814e-18, 9.8928e-20, 4.2701e-21, 5.0590e-22,\n 2.4950e-19, 1.7852e-18, 1.4997e-19, 3.3716e-19, 1.2750e-20, 1.0983e-19,\n 6.3120e-23, 1.2245e-21, 7.5143e-19, 6.0003e-20, 2.6802e-21, 1.9357e-18,\n 7.2142e-21, 6.0684e-20, 1.8435e-19, 3.1196e-19, 7.6340e-21, 1.8433e-20,\n 1.3538e-18, 3.8700e-21, 6.9092e-19, 2.0762e-19, 2.5589e-19, 2.4394e-21,\n 4.5708e-19, 7.1338e-23, 4.9614e-20, 1.9365e-20, 1.1962e-22, 1.4926e-18,\n 4.7717e-20, 4.9310e-20, 7.3627e-20, 1.4328e-22, 3.7570e-21, 1.3208e-19,\n 5.1681e-19, 2.4177e-19, 7.6274e-23, 4.8240e-22, 5.8468e-19, 1.1724e-20,\n 1.5686e-22, 8.6917e-20, 3.8751e-19, 4.1854e-19, 5.7706e-22, 1.9626e-21,\n 3.4945e-20, 2.1196e-19, 9.5068e-20, 7.1010e-19, 5.1911e-21, 1.4328e-20,\n 6.1232e-18, 2.7559e-22, 6.1377e-20, 5.9481e-21, 2.6198e-19, 3.2125e-21,\n 6.8799e-19, 1.4764e-23, 1.8703e-21, 3.0096e-20, 6.7816e-19, 8.8751e-20,\n 5.6064e-22, 1.1402e-20, 3.0245e-22, 4.4572e-20, 1.3885e-19, 1.9501e-19,\n 4.8923e-21, 8.3593e-20, 7.8589e-22, 5.5789e-22, 2.1765e-20, 5.4657e-20,\n 2.0010e-18, 5.6973e-22, 3.0933e-22, 1.7065e-20, 2.8402e-18, 8.9857e-21,\n 4.2064e-20, 2.3062e-18, 2.1064e-18, 1.9082e-19, 8.9673e-21, 4.8041e-19,\n 5.0564e-21, 5.3524e-20, 7.7520e-19, 1.5756e-19, 1.9045e-20, 6.0615e-19,\n 1.1926e-20, 2.5955e-21, 1.2464e-21, 9.2310e-21, 1.7320e-21, 7.1143e-19,\n 2.0842e-21, 3.3382e-21, 4.4145e-21, 5.5231e-20, 6.2731e-19, 1.2473e-20,\n 1.3261e-20, 1.9867e-19, 1.5763e-20, 7.7765e-19, 8.1610e-19, 9.0665e-20,\n 2.5221e-20, 4.7384e-21, 6.3336e-19, 1.3504e-19, 3.1244e-19, 4.2387e-19,\n 1.0433e-18, 1.8487e-18, 1.8682e-21, 1.9987e-20, 1.5095e-19, 5.0412e-21,\n 2.2426e-20, 1.8216e-22, 3.2331e-22, 2.4946e-21, 8.6757e-20, 2.9204e-19,\n 1.2791e-19, 3.8108e-20, 1.1299e-19, 8.2803e-21, 1.9159e-20, 2.6061e-20,\n 8.0579e-20, 4.6750e-19, 3.2832e-23, 7.8816e-22, 5.7746e-21, 1.7112e-19,\n 3.5754e-21, 3.2837e-19, 8.1136e-20, 1.6031e-19, 2.2910e-20, 1.1046e-18,\n 3.6730e-20, 1.3684e-19, 2.9155e-19, 4.2213e-20, 5.5616e-19, 3.3604e-20,\n 1.7134e-24, 1.3147e-19, 1.3631e-19, 1.2437e-19, 2.3472e-19, 1.1400e-21,\n 2.5175e-19, 5.8102e-20, 1.3366e-21, 2.4274e-21, 8.4565e-20, 1.0036e-18,\n 6.9775e-21, 8.5936e-20, 3.3090e-18, 6.3579e-21, 8.8361e-20, 4.4571e-20,\n 1.9203e-22, 1.8907e-19, 6.3332e-19, 6.3925e-20, 5.5050e-21, 2.2469e-19,\n 6.7788e-20, 3.0986e-20, 4.7535e-20, 1.0903e-19], device='cuda:0')" + }, + "21": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.4100e-22, 9.3167e-25, 5.5543e-21, 1.0408e-25, 4.3462e-25, 1.6112e-22,\n 2.1191e-24, 2.9459e-21, 3.9075e-23, 1.1717e-24, 3.0016e-21, 6.8706e-22,\n 5.9770e-23, 9.9447e-26, 7.4342e-24, 9.2383e-23, 2.8875e-22, 8.7389e-24,\n 4.0625e-23, 5.9573e-28, 1.2537e-22, 5.8255e-24, 7.2887e-22, 2.9781e-24,\n 5.5614e-23, 1.7178e-21, 6.9587e-24, 1.2524e-24, 3.9281e-23, 1.4622e-23,\n 1.9239e-22, 8.6936e-22, 1.0725e-21, 1.1921e-22, 5.9362e-23, 5.7462e-24,\n 1.1374e-22, 2.5400e-23, 5.6601e-23, 7.1212e-23, 2.0010e-23, 1.6733e-22,\n 3.3070e-23, 9.0257e-22, 3.0335e-24, 1.3180e-23, 2.4029e-21, 6.8318e-26,\n 2.2849e-25, 3.4812e-24, 1.6507e-24, 7.5108e-24, 3.6173e-23, 1.8053e-23,\n 1.6161e-24, 5.3937e-26, 1.6594e-23, 2.4625e-24, 1.1021e-21, 6.6876e-22,\n 1.2629e-21, 1.1658e-24, 6.7944e-22, 1.7605e-24, 1.0551e-21, 1.8819e-22,\n 9.4064e-25, 1.3750e-23, 1.1767e-20, 3.4883e-23, 6.5516e-24, 1.1158e-25,\n 4.1004e-22, 7.6982e-22, 4.7897e-23, 3.0311e-22, 1.5625e-23, 2.1102e-23,\n 1.7722e-23, 1.3284e-24, 8.4397e-23, 1.3904e-23, 2.8336e-25, 1.5780e-21,\n 3.8204e-23, 9.0725e-24, 1.1667e-23, 4.2497e-23, 2.8081e-24, 4.8647e-23,\n 2.1798e-21, 2.2716e-24, 2.5351e-22, 6.0811e-23, 2.3645e-22, 3.9816e-23,\n 6.9720e-22, 1.3168e-24, 4.2807e-24, 2.9109e-24, 1.9872e-25, 8.0916e-22,\n 1.9546e-23, 2.2305e-23, 1.9800e-23, 2.7255e-25, 2.8355e-25, 6.1606e-24,\n 1.1826e-21, 2.6194e-22, 2.0634e-23, 1.1982e-25, 1.2003e-22, 1.2049e-23,\n 4.5636e-24, 2.8818e-23, 3.6268e-22, 1.4547e-22, 2.3127e-24, 9.8489e-25,\n 9.1725e-24, 2.9679e-22, 2.7479e-23, 6.6660e-23, 1.3550e-24, 1.2863e-23,\n 1.1196e-20, 1.3281e-24, 8.1954e-24, 2.2001e-23, 2.3094e-22, 3.5112e-23,\n 1.2260e-21, 1.5981e-24, 8.4881e-26, 1.2434e-23, 3.7006e-22, 4.0795e-23,\n 1.5228e-24, 3.1694e-23, 3.8579e-26, 5.3544e-23, 4.1920e-23, 1.5314e-22,\n 5.1283e-23, 6.1092e-24, 5.1162e-25, 1.5704e-24, 6.7471e-24, 2.2146e-23,\n 8.2350e-21, 2.6742e-25, 1.1057e-25, 2.5068e-24, 6.1183e-21, 8.1726e-25,\n 7.8573e-23, 1.2763e-21, 3.9642e-21, 6.1627e-23, 4.7941e-23, 2.0390e-22,\n 2.4972e-23, 8.0363e-24, 1.5652e-22, 3.9333e-22, 1.6128e-23, 4.1356e-22,\n 3.5359e-23, 9.4550e-25, 1.8929e-24, 1.3581e-24, 2.4851e-25, 2.5634e-22,\n 8.5862e-25, 1.6824e-24, 1.3764e-23, 2.7790e-22, 7.0573e-22, 6.0463e-24,\n 1.2207e-24, 4.1154e-23, 1.1106e-23, 6.8833e-22, 1.7264e-21, 2.3984e-23,\n 2.3590e-23, 6.2854e-23, 5.8934e-22, 1.0380e-22, 9.1971e-23, 3.2072e-22,\n 2.5398e-21, 1.4669e-21, 6.3770e-24, 4.1989e-24, 3.5801e-23, 1.0729e-24,\n 7.8415e-24, 1.3427e-25, 1.1646e-23, 1.0411e-24, 2.7583e-22, 8.8743e-22,\n 1.9167e-23, 2.4483e-23, 7.4575e-23, 1.2795e-24, 1.3371e-24, 3.6201e-23,\n 2.3031e-23, 9.6456e-22, 2.1757e-23, 2.9234e-25, 3.8542e-25, 4.5123e-22,\n 2.8937e-24, 1.0772e-22, 5.9077e-23, 6.1024e-23, 3.8781e-23, 7.6849e-22,\n 1.8042e-23, 3.6586e-24, 5.6350e-23, 7.0417e-24, 3.6479e-22, 2.7319e-24,\n 1.8678e-24, 1.3975e-22, 5.8776e-23, 1.2140e-22, 1.2634e-22, 3.2732e-23,\n 6.5203e-23, 7.6488e-23, 3.6988e-24, 2.0077e-24, 1.7811e-23, 6.0475e-22,\n 2.6773e-24, 7.2092e-23, 5.8626e-21, 6.7130e-25, 2.0464e-23, 4.0751e-23,\n 2.8277e-26, 4.1557e-22, 1.9836e-22, 6.8913e-23, 1.9558e-24, 9.8066e-23,\n 1.1116e-22, 5.6766e-24, 2.6350e-23, 2.6131e-22], device='cuda:0')" + }, + "22": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.5931e-22, 1.2543e-25, 4.7976e-21, 1.4922e-24, 1.0447e-25, 3.5693e-22,\n 1.1651e-23, 1.3569e-21, 1.3196e-22, 4.0281e-25, 3.2998e-21, 1.1729e-21,\n 2.5235e-23, 2.4996e-25, 1.7024e-24, 5.7740e-22, 6.4774e-22, 9.2362e-23,\n 9.6128e-23, 1.8459e-25, 6.6810e-22, 4.4784e-23, 5.2607e-22, 2.2221e-24,\n 2.7351e-23, 2.3003e-21, 2.7979e-26, 3.7249e-24, 2.9353e-22, 6.6832e-23,\n 5.0663e-22, 1.0352e-21, 1.0529e-21, 1.6098e-22, 5.6594e-23, 2.1092e-23,\n 7.8106e-22, 3.3834e-22, 7.5817e-23, 1.0230e-22, 4.2607e-24, 3.9844e-22,\n 3.1363e-23, 1.4131e-21, 6.6511e-24, 3.1668e-23, 2.4160e-21, 8.4657e-25,\n 9.1846e-25, 7.1490e-23, 6.1700e-24, 2.1429e-23, 4.2610e-23, 1.6624e-22,\n 1.8639e-23, 7.5279e-27, 3.1273e-23, 6.5901e-26, 7.3779e-22, 1.6846e-21,\n 1.2711e-21, 1.4957e-23, 3.1341e-22, 1.2795e-23, 6.9911e-22, 6.8249e-22,\n 2.7559e-24, 2.6023e-23, 3.5836e-21, 1.4574e-22, 1.5767e-24, 7.1775e-25,\n 2.7744e-22, 2.4986e-21, 2.2072e-22, 3.7586e-22, 6.9726e-24, 1.4458e-22,\n 1.3803e-24, 2.9209e-24, 1.0295e-21, 7.1340e-23, 4.2347e-25, 2.7525e-21,\n 4.0168e-24, 7.9977e-23, 2.3749e-22, 4.1049e-22, 6.2263e-24, 1.2630e-23,\n 1.9928e-21, 5.9538e-24, 9.7222e-22, 2.5360e-22, 3.8474e-22, 9.0754e-24,\n 7.0364e-22, 3.2796e-25, 6.8511e-23, 2.4490e-23, 1.4373e-26, 1.8864e-21,\n 6.2929e-23, 5.8008e-23, 1.1568e-22, 6.8123e-25, 5.0088e-24, 1.6819e-22,\n 8.2157e-22, 3.5558e-22, 1.2264e-24, 6.4518e-26, 7.6150e-22, 2.6791e-23,\n 6.9474e-25, 1.1325e-22, 4.4504e-22, 5.8323e-22, 1.1710e-25, 5.5548e-24,\n 3.4728e-23, 3.4074e-22, 1.1252e-22, 9.7970e-22, 3.9484e-24, 1.5959e-23,\n 7.7887e-21, 3.7501e-24, 7.8484e-23, 2.9871e-24, 3.0245e-22, 1.9403e-23,\n 1.0389e-21, 4.4782e-25, 2.0377e-24, 3.6380e-23, 8.3769e-22, 1.0561e-22,\n 9.1409e-26, 1.0266e-23, 6.2826e-27, 5.7766e-23, 1.9255e-22, 3.0046e-22,\n 1.4052e-23, 1.1137e-22, 2.1429e-24, 2.4099e-24, 2.4932e-23, 7.3671e-23,\n 3.0017e-21, 7.7599e-26, 2.8542e-27, 2.7489e-23, 4.0969e-21, 1.5046e-23,\n 8.5666e-23, 3.2231e-21, 3.0475e-21, 2.8534e-22, 5.6841e-24, 6.2187e-22,\n 2.6037e-23, 7.5656e-23, 1.0739e-21, 2.7830e-22, 2.7331e-23, 8.8633e-22,\n 3.4968e-23, 4.6636e-24, 1.3712e-25, 2.7857e-24, 1.6130e-24, 9.2635e-22,\n 4.5454e-24, 2.6753e-24, 1.1299e-23, 1.3237e-22, 9.3641e-22, 1.7515e-23,\n 1.6280e-23, 2.6189e-22, 2.0549e-23, 1.1412e-21, 1.2342e-21, 9.6669e-23,\n 4.7656e-23, 1.2670e-23, 9.2324e-22, 1.5697e-22, 4.3494e-22, 6.4146e-22,\n 1.5768e-21, 2.3470e-21, 7.4574e-25, 2.9066e-23, 1.9382e-22, 7.8759e-24,\n 3.2173e-23, 2.0878e-24, 5.5146e-26, 2.1403e-24, 1.7982e-22, 4.9646e-22,\n 1.6663e-22, 3.4497e-23, 1.1624e-22, 1.4128e-23, 2.8929e-23, 1.8911e-23,\n 1.1505e-22, 7.3536e-22, 7.7821e-25, 1.9963e-24, 8.2960e-24, 3.1395e-22,\n 2.9922e-24, 4.0156e-22, 1.2270e-22, 1.9087e-22, 5.5780e-23, 1.3587e-21,\n 5.1801e-23, 1.7738e-22, 3.6966e-22, 4.0941e-23, 8.2287e-22, 4.1673e-23,\n 3.1099e-25, 1.2462e-22, 1.7179e-22, 1.2727e-22, 2.9210e-22, 4.0804e-24,\n 3.6268e-22, 5.8298e-23, 6.6264e-24, 1.6547e-24, 9.5199e-23, 1.4225e-21,\n 8.1799e-24, 1.4140e-22, 4.1577e-21, 6.7293e-24, 1.0843e-22, 4.0498e-23,\n 2.0851e-25, 1.6743e-22, 7.9204e-22, 5.3439e-23, 8.9728e-24, 2.6571e-22,\n 4.8724e-23, 3.9122e-23, 5.0893e-23, 1.0503e-22], device='cuda:0')" + }, + "23": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0784e-25, 2.0383e-24, 1.5524e-24, ..., 2.6484e-24, 3.7221e-25,\n 3.4375e-25],\n [1.7955e-22, 2.4046e-23, 3.6403e-23, ..., 3.7905e-23, 9.1556e-24,\n 1.7526e-23],\n [1.3176e-21, 8.1245e-23, 4.2687e-22, ..., 5.5051e-22, 5.7431e-23,\n 1.5772e-22],\n ...,\n [2.2760e-22, 2.2276e-23, 1.2088e-22, ..., 1.7854e-22, 1.2505e-23,\n 5.7459e-23],\n [1.5578e-24, 4.6739e-25, 8.0313e-26, ..., 4.1285e-25, 8.0129e-26,\n 1.7564e-25],\n [3.9916e-25, 2.3853e-24, 8.1093e-25, ..., 4.3775e-24, 9.6544e-26,\n 7.7974e-25]], device='cuda:0')" + }, + "24": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3157e-21, 2.1284e-19, 2.1942e-18, 3.7493e-21, 7.4282e-21, 2.5742e-19,\n 1.8251e-21, 3.0661e-21, 1.2372e-19, 2.1021e-20, 3.5262e-21, 6.3523e-19,\n 3.4012e-21, 6.1212e-21, 4.0721e-20, 9.7449e-19, 1.1629e-19, 3.1627e-19,\n 1.9954e-19, 2.1861e-22, 1.8998e-21, 2.8067e-19, 1.1270e-21, 9.8231e-20,\n 4.8290e-20, 2.0973e-19, 2.3591e-19, 7.2173e-20, 4.2475e-19, 4.5281e-19,\n 3.8190e-19, 5.9185e-20, 4.1990e-19, 1.8577e-19, 3.6261e-20, 4.7410e-19,\n 2.3446e-18, 3.9281e-21, 6.9369e-20, 5.6396e-22, 1.0984e-18, 4.2152e-20,\n 6.2098e-23, 7.3245e-19, 1.5932e-21, 7.2822e-20, 2.3370e-18, 9.8077e-21,\n 5.8024e-20, 1.0745e-18, 1.0119e-20, 1.5413e-20, 1.3462e-21, 1.5923e-19,\n 3.8047e-21, 8.2199e-20, 7.5929e-20, 1.5269e-20, 9.3933e-21, 5.8304e-21,\n 2.6068e-20, 6.5037e-22, 4.6999e-21, 4.7406e-20, 1.3284e-20, 2.8020e-21,\n 6.0916e-21, 4.9373e-21, 1.7808e-18, 9.7033e-20, 3.2368e-19, 7.8379e-21,\n 3.0446e-19, 2.3524e-20, 5.8126e-19, 8.4294e-22, 2.9175e-20, 8.6565e-19,\n 5.7264e-19, 5.1560e-21, 3.1110e-20, 2.1085e-19, 6.0153e-20, 6.2711e-19,\n 2.0180e-22, 1.2794e-19, 9.2201e-19, 1.0942e-18, 1.3097e-21, 6.9931e-22,\n 1.4212e-21, 2.6697e-21, 1.0977e-18, 8.2689e-20, 1.1062e-18, 6.0426e-19,\n 3.9833e-19, 5.8748e-23, 6.1393e-19, 1.6461e-19, 3.8857e-20, 2.0056e-18,\n 3.1858e-19, 3.3954e-20, 2.5053e-19, 3.6506e-21, 1.7270e-21, 7.5189e-19,\n 9.7705e-21, 2.4601e-19, 8.6984e-19, 1.0328e-19, 1.0766e-18, 3.5311e-23,\n 8.3123e-21, 4.7923e-19, 2.0168e-20, 7.4370e-19, 1.8627e-19, 2.7741e-20,\n 7.7595e-21, 2.9396e-20, 8.5044e-19, 3.0104e-18, 8.1388e-20, 1.4943e-19,\n 4.9103e-18, 1.1212e-23, 6.5078e-19, 2.7348e-20, 3.4359e-20, 8.3641e-21,\n 1.6521e-19, 1.5848e-19, 8.1206e-20, 1.0703e-19, 6.2292e-20, 2.3219e-19,\n 1.5388e-19, 3.8087e-18, 1.3403e-23, 3.0682e-19, 3.6243e-19, 6.5949e-20,\n 4.3339e-20, 4.7613e-19, 1.2294e-20, 5.1312e-19, 1.4811e-20, 2.5983e-21,\n 8.8014e-19, 8.5519e-22, 8.5900e-22, 5.8733e-21, 4.6929e-19, 3.4604e-21,\n 3.8854e-19, 3.2447e-18, 1.4840e-18, 1.1621e-19, 8.2225e-21, 8.0573e-21,\n 6.8530e-21, 2.5409e-19, 3.9978e-19, 3.5786e-21, 6.4033e-23, 5.7442e-19,\n 2.3925e-22, 7.1856e-19, 1.1483e-19, 1.1851e-20, 1.0534e-20, 1.7579e-18,\n 2.7437e-20, 1.9366e-22, 4.1363e-21, 3.3605e-20, 1.7356e-19, 1.1506e-20,\n 1.5974e-19, 3.9740e-19, 5.3994e-20, 1.4998e-19, 1.2817e-19, 2.6249e-19,\n 1.8502e-21, 2.3251e-18, 6.0891e-19, 7.6629e-22, 5.1184e-19, 3.8952e-21,\n 5.7280e-20, 5.0754e-21, 3.2177e-19, 1.0777e-21, 1.5228e-18, 1.2750e-20,\n 1.5491e-21, 3.9212e-22, 4.2897e-19, 2.5321e-19, 2.1633e-22, 6.0075e-20,\n 2.9475e-23, 6.4133e-21, 4.3150e-20, 4.3777e-21, 4.2419e-21, 3.7969e-20,\n 9.4601e-19, 1.4593e-19, 1.2768e-20, 2.5549e-21, 3.4490e-19, 5.4828e-20,\n 3.7856e-22, 4.4098e-22, 2.5199e-20, 2.9296e-22, 2.4798e-20, 1.1583e-18,\n 1.8599e-19, 1.2874e-20, 1.1121e-18, 2.7508e-20, 1.5796e-22, 1.4797e-19,\n 9.5444e-24, 2.8317e-20, 8.8738e-19, 2.7213e-19, 6.9188e-22, 8.3257e-22,\n 5.2369e-19, 5.6772e-20, 8.6581e-21, 5.4387e-20, 5.3019e-21, 4.2461e-19,\n 3.2306e-22, 5.5029e-20, 3.2527e-19, 7.2577e-23, 1.0249e-21, 1.0659e-20,\n 1.0424e-20, 1.2045e-19, 1.4862e-18, 1.9212e-21, 7.9797e-20, 9.8045e-19,\n 3.6593e-22, 5.3746e-19, 1.6025e-21, 5.2849e-21], device='cuda:0')" + }, + "25": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7748e-23, 1.1314e-22, 5.5482e-21, 2.5911e-25, 5.1375e-25, 5.7803e-22,\n 1.3119e-24, 6.1636e-24, 5.5684e-23, 1.2528e-24, 6.1865e-23, 5.0118e-22,\n 7.9618e-24, 1.6256e-25, 3.0509e-23, 6.0100e-22, 1.2321e-23, 1.5500e-22,\n 5.8114e-23, 4.1961e-26, 3.5347e-23, 7.3027e-23, 6.9141e-24, 1.1302e-22,\n 7.8789e-24, 1.5453e-23, 1.6297e-22, 4.2054e-23, 2.2074e-22, 4.8507e-22,\n 1.7193e-22, 3.0239e-24, 3.3161e-22, 4.7975e-23, 8.1494e-24, 1.1173e-21,\n 2.5511e-21, 4.3387e-23, 1.8702e-22, 6.4681e-24, 1.2781e-21, 3.6988e-24,\n 4.6317e-25, 4.3789e-22, 8.0577e-25, 4.6411e-23, 4.7165e-21, 5.3917e-23,\n 2.0820e-23, 8.3975e-22, 3.7257e-24, 3.6372e-25, 8.0728e-26, 3.9272e-23,\n 4.6880e-25, 4.1729e-23, 4.8379e-23, 5.8241e-23, 1.8506e-24, 6.1818e-23,\n 1.0853e-23, 2.4107e-24, 1.0984e-23, 8.9611e-23, 1.5867e-24, 2.7647e-23,\n 2.0602e-24, 8.5177e-23, 1.8575e-21, 1.3666e-22, 1.2340e-22, 6.3402e-25,\n 2.4199e-22, 8.9298e-24, 2.2908e-21, 7.8210e-24, 1.8603e-23, 3.5039e-22,\n 6.8523e-22, 2.5486e-23, 2.6118e-23, 3.9198e-23, 7.6659e-24, 3.6882e-22,\n 2.8276e-23, 7.0766e-24, 5.4868e-22, 1.9435e-22, 5.1728e-24, 2.5946e-23,\n 2.9112e-23, 1.0578e-23, 2.1791e-22, 8.0222e-24, 2.4492e-21, 2.9693e-22,\n 3.9644e-22, 2.7407e-26, 1.2927e-21, 2.8042e-23, 2.9926e-24, 1.9956e-21,\n 4.1285e-22, 1.4361e-23, 3.1765e-22, 3.6859e-25, 5.0182e-24, 1.5035e-21,\n 8.9446e-24, 7.7027e-23, 2.9950e-22, 1.8301e-22, 3.4213e-22, 3.7798e-25,\n 3.8537e-24, 2.2987e-22, 1.6084e-24, 2.3813e-22, 4.5539e-22, 1.6515e-23,\n 1.2949e-24, 2.3226e-23, 3.9022e-22, 2.5323e-21, 2.7314e-22, 5.9993e-23,\n 7.3712e-21, 2.1120e-25, 1.1036e-21, 1.4876e-23, 2.2140e-23, 2.0359e-24,\n 1.2308e-22, 3.8921e-23, 2.8205e-23, 2.5396e-23, 4.9875e-24, 7.6401e-23,\n 1.1159e-22, 8.9476e-21, 6.6327e-26, 1.0274e-22, 3.7881e-22, 6.9472e-23,\n 1.6494e-23, 6.9061e-23, 4.9531e-24, 8.3043e-22, 9.7054e-24, 2.6624e-25,\n 6.4572e-22, 3.1545e-24, 9.4249e-24, 1.5646e-23, 1.5641e-22, 1.3819e-24,\n 6.3585e-22, 1.1603e-20, 9.3104e-22, 8.9246e-24, 3.7344e-23, 1.6529e-23,\n 2.5933e-24, 7.4788e-23, 2.4497e-22, 1.5055e-24, 1.2864e-24, 8.8374e-22,\n 3.0964e-25, 1.1403e-21, 4.1593e-23, 2.2128e-25, 2.1984e-23, 5.8695e-22,\n 5.3384e-24, 3.5172e-24, 1.2540e-24, 1.2973e-23, 2.4311e-22, 1.5794e-24,\n 9.2469e-23, 1.2866e-22, 1.2965e-22, 6.0244e-23, 2.5955e-23, 1.2493e-22,\n 2.4500e-24, 5.1876e-21, 8.5703e-22, 4.0834e-24, 1.8100e-22, 2.5694e-24,\n 2.2818e-23, 4.0793e-23, 1.8944e-22, 1.6978e-24, 1.1348e-21, 4.3534e-24,\n 2.4561e-24, 5.0732e-25, 8.5802e-22, 2.0507e-22, 9.2521e-25, 1.1416e-22,\n 6.8568e-24, 1.8033e-23, 2.5187e-24, 1.6920e-23, 2.0787e-23, 3.4915e-23,\n 1.9435e-22, 2.4953e-22, 4.1819e-23, 1.4589e-23, 1.8300e-21, 7.7961e-23,\n 4.0450e-24, 4.7989e-24, 2.3250e-24, 2.6024e-24, 2.1017e-23, 7.0105e-22,\n 5.4383e-23, 3.8888e-23, 1.2985e-21, 3.1403e-24, 7.1151e-24, 3.3025e-23,\n 9.6870e-25, 9.0610e-24, 9.8643e-22, 1.7334e-22, 4.1344e-24, 2.8887e-23,\n 5.8165e-22, 1.0848e-22, 2.5571e-23, 3.5359e-22, 5.9400e-25, 1.2519e-22,\n 2.9819e-25, 5.2541e-23, 7.0642e-23, 2.0939e-26, 1.3309e-23, 1.6795e-24,\n 3.7574e-24, 4.8833e-23, 5.9583e-22, 1.6487e-24, 5.2032e-23, 1.0615e-21,\n 2.9876e-25, 1.7452e-22, 1.4091e-25, 5.6035e-24], device='cuda:0')" + }, + "26": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3046e-24, 1.7039e-22, 3.2971e-21, 1.7119e-24, 1.3470e-24, 3.9391e-22,\n 6.7631e-25, 7.1780e-24, 1.2060e-22, 1.3271e-23, 5.7460e-24, 6.4651e-22,\n 1.4254e-23, 1.0392e-24, 1.0268e-22, 1.0228e-21, 1.0598e-22, 3.1567e-22,\n 1.7118e-22, 3.3928e-26, 3.3365e-24, 2.6216e-22, 7.8569e-25, 1.8068e-22,\n 3.5660e-23, 2.2228e-22, 3.9407e-22, 1.5500e-22, 4.0245e-22, 4.0466e-22,\n 3.6186e-22, 5.3728e-23, 6.7856e-22, 1.6244e-22, 2.0821e-23, 8.2671e-22,\n 2.5919e-21, 3.0968e-24, 1.6954e-22, 1.2801e-24, 1.1683e-21, 3.5524e-23,\n 2.7264e-25, 1.1376e-21, 2.8969e-24, 2.8409e-23, 3.4490e-21, 4.9876e-23,\n 3.9942e-23, 1.1101e-21, 2.0385e-23, 1.8445e-25, 1.1889e-24, 2.7131e-22,\n 2.6150e-24, 4.2498e-23, 3.9420e-23, 5.5863e-23, 2.4357e-23, 8.6210e-24,\n 5.2493e-23, 3.1864e-25, 2.7969e-24, 1.1950e-22, 3.3379e-23, 3.9750e-24,\n 1.8246e-23, 5.1733e-23, 1.9268e-21, 1.7960e-22, 2.9072e-22, 3.9784e-24,\n 2.7572e-22, 4.3365e-23, 1.0249e-21, 1.2581e-24, 6.8603e-24, 8.9783e-22,\n 9.4046e-22, 3.1122e-23, 5.4171e-23, 1.8898e-22, 4.3784e-23, 9.7787e-22,\n 9.5357e-26, 1.2673e-22, 9.4585e-22, 1.2411e-21, 1.0643e-23, 5.2120e-25,\n 3.1220e-24, 1.5096e-23, 1.6056e-21, 7.8646e-23, 1.8006e-21, 8.6427e-22,\n 6.7102e-22, 4.3379e-27, 1.0292e-21, 1.4963e-22, 4.7619e-24, 2.2197e-21,\n 5.6932e-22, 1.8917e-23, 4.6678e-22, 2.3217e-24, 1.4486e-23, 6.9222e-22,\n 1.8786e-23, 3.9020e-22, 9.2700e-22, 2.2109e-22, 1.1695e-21, 1.2658e-24,\n 6.3071e-24, 7.6046e-22, 1.2451e-23, 1.1018e-21, 3.8755e-22, 1.6381e-23,\n 5.1475e-24, 7.9230e-23, 1.2876e-21, 4.2687e-21, 2.0748e-22, 2.7115e-22,\n 5.5692e-21, 5.2536e-26, 6.0583e-22, 2.4734e-23, 2.6739e-23, 3.4683e-24,\n 2.8416e-22, 1.4641e-22, 4.9095e-23, 8.2870e-23, 6.0178e-23, 2.0067e-22,\n 2.8936e-22, 5.5330e-21, 9.8925e-25, 4.8565e-22, 6.2352e-22, 1.3872e-22,\n 6.4779e-23, 4.9859e-22, 7.9608e-24, 4.3768e-22, 2.5563e-23, 2.4827e-24,\n 1.3163e-21, 3.2291e-25, 1.3785e-23, 5.7436e-24, 6.7669e-22, 3.5851e-24,\n 6.8604e-22, 4.8535e-21, 2.1848e-21, 2.0223e-22, 8.1526e-24, 6.2338e-24,\n 7.5470e-24, 2.6794e-22, 6.0623e-22, 1.1394e-23, 4.1558e-25, 9.3587e-22,\n 4.2045e-24, 6.3693e-22, 9.1391e-23, 1.6490e-24, 4.4964e-23, 1.9599e-21,\n 1.6709e-23, 8.7232e-24, 2.1986e-24, 6.8885e-23, 3.3113e-22, 7.8955e-24,\n 1.1661e-22, 4.2493e-22, 1.5271e-22, 2.5259e-22, 2.2207e-22, 2.1201e-22,\n 5.0043e-24, 3.4830e-21, 1.0018e-21, 7.4826e-25, 7.8123e-22, 1.1889e-23,\n 8.9295e-23, 3.6844e-24, 5.1739e-22, 1.2965e-23, 1.6612e-21, 7.8411e-24,\n 1.0940e-24, 2.4367e-24, 7.5585e-22, 4.4649e-22, 4.8341e-25, 1.5518e-22,\n 9.1257e-26, 8.7544e-24, 2.9099e-23, 3.4162e-23, 4.2740e-24, 1.3880e-23,\n 1.0411e-21, 2.9938e-22, 1.0399e-23, 2.4179e-23, 6.9357e-22, 1.2573e-22,\n 5.9310e-26, 7.6069e-25, 5.4509e-23, 2.6684e-25, 1.0283e-23, 1.2382e-21,\n 3.2193e-22, 1.2195e-23, 1.1522e-21, 1.3649e-23, 1.3662e-24, 1.4566e-22,\n 1.8122e-28, 7.4571e-24, 1.3905e-21, 2.1479e-22, 1.0489e-24, 1.0663e-24,\n 8.4508e-22, 2.6053e-23, 4.5774e-23, 1.8283e-22, 2.5386e-24, 6.6928e-22,\n 5.1308e-25, 1.1442e-22, 3.5682e-22, 6.6307e-26, 1.6211e-24, 3.2414e-24,\n 6.5420e-24, 8.4205e-23, 1.6142e-21, 1.4626e-24, 1.6469e-22, 9.9545e-22,\n 3.9293e-26, 8.4245e-22, 1.1725e-24, 6.4087e-24], device='cuda:0')" + }, + "27": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.7624e-24, 1.9488e-24, 3.0749e-23, ..., 1.6501e-25, 8.5118e-25,\n 7.8458e-24],\n [2.0309e-24, 4.6023e-25, 2.9421e-26, ..., 2.3859e-25, 5.4427e-26,\n 8.4103e-26],\n [1.6586e-25, 9.2156e-25, 1.6601e-24, ..., 1.2087e-24, 5.8811e-26,\n 3.2096e-26],\n ...,\n [3.1719e-22, 7.2113e-24, 1.8102e-22, ..., 1.4597e-23, 3.2528e-24,\n 3.0434e-23],\n [3.8201e-22, 9.1035e-23, 6.9973e-23, ..., 1.1027e-22, 2.0750e-23,\n 4.1467e-23],\n [1.8019e-23, 9.0228e-26, 2.5857e-24, ..., 5.5384e-25, 4.1392e-25,\n 2.6018e-25]], device='cuda:0')" + }, + "28": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.7776e-20, 1.3697e-21, 1.2209e-22, 1.8274e-20, 7.4976e-22, 5.5144e-19,\n 6.3033e-20, 4.0665e-20, 9.5141e-22, 2.7237e-22, 1.8948e-21, 4.5311e-20,\n 8.3050e-21, 3.5073e-20, 4.6875e-21, 1.0064e-19, 3.2037e-21, 5.0479e-19,\n 2.4676e-19, 1.2190e-19, 2.3755e-19, 4.5897e-19, 3.3022e-19, 2.2300e-20,\n 8.5964e-20, 1.5882e-18, 7.8920e-20, 1.5569e-19, 9.1133e-21, 1.0494e-20,\n 6.2774e-19, 6.3991e-23, 3.2556e-19, 8.0720e-20, 9.9527e-23, 1.1720e-19,\n 1.8794e-18, 7.8964e-19, 2.9138e-20, 2.0918e-19, 9.2947e-21, 3.1723e-19,\n 8.8286e-20, 2.3762e-19, 3.2966e-22, 2.1680e-22, 2.0430e-19, 7.1946e-21,\n 3.4450e-22, 9.1092e-19, 6.6882e-21, 3.3313e-22, 5.0037e-21, 9.3999e-19,\n 1.6178e-20, 4.9304e-22, 1.4091e-20, 1.6912e-20, 1.2994e-19, 1.6214e-18,\n 4.7267e-19, 1.8513e-22, 2.6770e-19, 7.5409e-21, 1.9968e-21, 1.0573e-20,\n 1.5227e-21, 2.2458e-20, 4.5177e-19, 3.7057e-19, 8.0055e-22, 2.5123e-20,\n 1.0239e-19, 8.6981e-22, 5.0327e-20, 9.7866e-23, 4.1343e-21, 1.4214e-18,\n 3.4702e-19, 6.6198e-22, 3.7996e-20, 1.7532e-20, 6.3846e-20, 1.2893e-21,\n 3.7686e-18, 1.4137e-19, 6.2666e-20, 9.0459e-19, 5.6311e-21, 1.6534e-18,\n 1.0051e-18, 1.4039e-21, 2.5615e-21, 3.6965e-19, 7.5511e-19, 1.7147e-18,\n 1.5411e-19, 3.3881e-23, 2.1129e-22, 6.5836e-19, 5.3862e-22, 1.6715e-18,\n 1.4098e-19, 3.0405e-22, 5.3258e-19, 2.1858e-20, 1.5928e-22, 3.0681e-19,\n 2.4512e-19, 2.9980e-19, 4.3379e-20, 3.8738e-20, 5.2605e-19, 3.0196e-20,\n 1.0507e-20, 2.7850e-19, 2.3630e-20, 2.8188e-21, 1.7640e-21, 1.0176e-21,\n 4.9210e-22, 2.8148e-19, 2.0202e-19, 4.4987e-20, 1.3154e-19, 6.8826e-20,\n 2.1297e-18, 5.1763e-21, 1.3226e-20, 3.4224e-21, 4.9437e-20, 2.9187e-21,\n 4.5463e-19, 6.8114e-21, 1.6229e-19, 1.8911e-20, 9.1739e-20, 3.3145e-24,\n 7.8013e-21, 1.6192e-18, 7.2887e-22, 4.1184e-19, 5.7690e-21, 2.9977e-19,\n 1.1433e-19, 3.2099e-18, 3.0160e-21, 3.2943e-19, 1.2399e-18, 1.5962e-21,\n 3.4882e-22, 1.9467e-20, 8.7574e-21, 4.5710e-21, 3.5773e-18, 3.2350e-19,\n 2.1672e-20, 7.6553e-20, 1.6356e-18, 7.1225e-21, 3.3448e-19, 6.5392e-20,\n 2.6716e-20, 3.7846e-20, 4.4275e-18, 2.7182e-19, 3.0621e-21, 3.5198e-19,\n 7.5902e-21, 2.6832e-20, 4.3337e-19, 9.7758e-21, 1.8690e-20, 9.0542e-19,\n 6.4623e-21, 5.7750e-21, 1.6025e-21, 3.3401e-20, 6.1710e-19, 2.0557e-19,\n 3.8503e-22, 1.5508e-18, 2.6835e-20, 3.7860e-19, 9.8619e-20, 1.0828e-20,\n 2.7998e-19, 2.7478e-18, 3.4097e-19, 1.4351e-20, 5.1613e-19, 2.6750e-19,\n 6.9548e-20, 9.3216e-21, 6.0880e-19, 3.9785e-20, 5.2875e-21, 1.0598e-19,\n 4.0180e-19, 8.3398e-20, 2.2177e-19, 2.7917e-22, 5.0851e-20, 1.1884e-21,\n 3.2280e-20, 1.3778e-22, 3.8548e-21, 3.2413e-22, 1.2726e-19, 1.1401e-19,\n 1.7571e-19, 4.6695e-22, 3.1725e-19, 1.4055e-21, 5.3378e-21, 1.5318e-20,\n 9.9526e-20, 5.6040e-19, 4.6161e-20, 7.0665e-20, 1.6893e-22, 1.8008e-22,\n 2.3957e-19, 9.2048e-22, 1.7311e-19, 4.8158e-20, 4.3245e-19, 4.2343e-21,\n 1.2973e-21, 1.1049e-23, 2.2842e-18, 4.9629e-20, 3.4664e-19, 8.1992e-19,\n 3.3233e-19, 1.6356e-21, 1.1189e-21, 4.0695e-22, 1.4032e-21, 7.5037e-19,\n 1.8253e-21, 1.2803e-19, 1.4904e-21, 2.5716e-19, 2.6928e-19, 9.1716e-21,\n 9.7771e-22, 2.8649e-21, 1.4993e-19, 2.8882e-21, 1.2702e-19, 2.0795e-19,\n 4.8563e-21, 3.1617e-19, 4.5940e-19, 9.4326e-21], device='cuda:0')" + }, + "29": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.4418e-24, 5.0933e-25, 2.8473e-23, 2.9787e-23, 4.7279e-26, 3.0310e-22,\n 5.6400e-23, 6.8464e-24, 3.6765e-24, 8.7453e-25, 3.2297e-23, 6.4139e-24,\n 2.8427e-24, 1.8464e-23, 2.1030e-24, 3.5192e-23, 2.7558e-24, 1.4264e-22,\n 1.6548e-22, 1.4095e-22, 1.7500e-23, 2.5195e-22, 4.0615e-22, 1.8633e-24,\n 1.0824e-22, 2.0662e-21, 2.0500e-23, 2.2696e-22, 1.8700e-23, 1.9274e-24,\n 7.3559e-22, 5.6970e-24, 5.1051e-22, 1.0029e-22, 1.6439e-26, 1.3553e-23,\n 2.7460e-21, 2.2265e-22, 1.9288e-23, 4.1061e-22, 2.3984e-23, 6.0628e-23,\n 2.8915e-22, 1.0238e-22, 1.0468e-24, 5.2052e-24, 9.4993e-24, 3.7046e-24,\n 1.9245e-24, 1.1539e-21, 3.0012e-24, 2.7474e-26, 1.9021e-24, 3.3984e-21,\n 3.7576e-24, 1.3442e-24, 4.0176e-23, 3.3913e-23, 2.2981e-22, 6.5750e-22,\n 1.9419e-22, 2.4173e-26, 4.4748e-22, 5.7732e-24, 1.7375e-24, 1.2482e-23,\n 3.1056e-24, 2.1326e-23, 1.0546e-22, 1.1887e-21, 2.6765e-24, 1.1104e-23,\n 8.1669e-23, 3.6306e-23, 8.7457e-24, 2.3847e-24, 5.1555e-24, 1.5096e-21,\n 2.1511e-22, 3.4866e-25, 1.9347e-23, 2.7979e-24, 9.1715e-23, 1.5800e-23,\n 5.3560e-21, 5.8168e-23, 9.9734e-24, 2.4468e-22, 5.0430e-24, 2.7822e-21,\n 2.1667e-21, 3.4664e-25, 7.5739e-23, 1.8742e-22, 7.1734e-22, 5.1169e-21,\n 3.2455e-23, 2.2944e-25, 1.5220e-23, 4.6796e-22, 9.6901e-27, 2.1870e-21,\n 5.5923e-23, 7.5182e-25, 4.8242e-22, 1.9865e-23, 9.7850e-26, 3.3773e-22,\n 5.7639e-23, 6.0133e-22, 2.5450e-23, 3.1927e-23, 1.8243e-22, 4.0161e-24,\n 1.6543e-24, 7.4069e-23, 6.7246e-24, 5.3016e-23, 2.0691e-24, 1.9637e-25,\n 1.6561e-25, 5.1417e-22, 3.7265e-23, 4.0788e-23, 3.7279e-22, 1.5750e-23,\n 8.8116e-22, 3.6287e-25, 7.0822e-24, 1.3989e-23, 4.6299e-23, 3.1154e-24,\n 1.0161e-21, 2.1026e-24, 4.9892e-23, 7.1896e-24, 1.0585e-23, 8.2411e-27,\n 1.5025e-24, 6.5175e-22, 2.2250e-24, 2.0724e-22, 1.1241e-23, 3.0207e-22,\n 4.9562e-23, 6.0035e-21, 9.2095e-24, 6.6162e-22, 6.0473e-22, 2.0789e-24,\n 2.5170e-23, 8.5165e-23, 2.2628e-24, 9.1875e-24, 6.7265e-21, 3.4184e-22,\n 3.1935e-23, 5.4217e-23, 1.2594e-21, 1.5349e-24, 3.0732e-23, 8.3645e-24,\n 1.5883e-23, 5.0778e-24, 1.6380e-20, 7.0863e-22, 9.9537e-24, 1.9211e-22,\n 2.2380e-23, 2.2784e-24, 4.4721e-22, 2.4333e-24, 7.0178e-23, 5.1791e-22,\n 4.0537e-24, 2.4604e-24, 1.5313e-23, 5.1179e-24, 2.0551e-21, 3.1215e-23,\n 7.0114e-25, 1.5846e-21, 4.9345e-24, 1.9188e-22, 7.6826e-24, 3.9781e-24,\n 2.1878e-22, 6.3691e-21, 2.8852e-22, 4.7577e-24, 2.2242e-22, 1.4693e-22,\n 2.3650e-23, 5.4693e-23, 4.7364e-22, 6.6594e-24, 2.4447e-23, 6.1108e-23,\n 3.6355e-22, 2.9505e-22, 9.7081e-23, 4.9599e-24, 3.5266e-23, 1.2673e-24,\n 1.2691e-23, 3.9594e-25, 5.2776e-24, 5.2924e-25, 1.6647e-23, 7.5595e-23,\n 1.1655e-23, 1.1665e-23, 5.3170e-23, 5.7276e-24, 3.2946e-24, 1.4690e-24,\n 2.5286e-23, 9.4922e-22, 7.4555e-24, 1.1116e-22, 1.2320e-25, 1.0822e-23,\n 2.0237e-22, 1.8352e-23, 2.4797e-23, 3.1295e-23, 2.5593e-22, 6.3534e-24,\n 1.4085e-24, 1.7260e-26, 3.4541e-21, 7.0092e-23, 3.6965e-22, 6.9926e-22,\n 1.1082e-22, 2.5946e-23, 1.8856e-24, 9.0748e-24, 5.1376e-25, 2.5852e-22,\n 5.0918e-25, 7.3510e-23, 1.7076e-23, 3.5569e-22, 1.4757e-22, 1.9344e-23,\n 3.6471e-25, 3.2466e-24, 3.0698e-23, 2.0852e-25, 1.2926e-22, 2.0507e-22,\n 2.3563e-24, 1.4567e-22, 6.2876e-22, 1.2337e-23], device='cuda:0')" + }, + "30": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.3899e-23, 1.1194e-25, 1.0526e-24, 1.7402e-23, 6.0980e-25, 7.7489e-22,\n 6.7857e-23, 5.7512e-23, 2.9511e-24, 1.1440e-24, 1.1689e-24, 6.1134e-23,\n 1.3596e-23, 3.4802e-23, 4.7834e-24, 1.4986e-22, 7.8593e-24, 6.5922e-22,\n 3.0227e-22, 1.3957e-22, 3.2140e-22, 5.9574e-22, 4.2163e-22, 2.8005e-23,\n 9.6396e-23, 2.0361e-21, 1.0957e-22, 2.3605e-22, 5.3284e-24, 1.6294e-23,\n 7.8987e-22, 5.1449e-25, 4.7668e-22, 9.6194e-23, 1.1899e-25, 1.6144e-22,\n 2.4713e-21, 1.0265e-21, 4.4769e-23, 3.2066e-22, 7.0882e-24, 4.1549e-22,\n 1.4906e-22, 3.1040e-22, 7.2344e-25, 4.8472e-24, 2.7729e-22, 9.9724e-24,\n 1.2171e-27, 1.1746e-21, 5.5078e-24, 1.3067e-24, 2.9180e-24, 1.3933e-21,\n 2.2344e-23, 8.8526e-25, 1.6854e-23, 3.1963e-23, 1.8469e-22, 2.1646e-21,\n 6.5803e-22, 4.4868e-27, 3.1476e-22, 1.5555e-23, 1.0543e-24, 1.1217e-23,\n 1.4297e-24, 4.2543e-23, 5.8387e-22, 5.7459e-22, 1.7608e-24, 2.4892e-23,\n 1.4282e-22, 2.8330e-24, 6.5890e-23, 5.2845e-26, 6.5395e-24, 1.8508e-21,\n 4.9116e-22, 1.0316e-24, 4.4628e-23, 2.6768e-23, 7.0979e-23, 4.7911e-25,\n 4.9102e-21, 1.9856e-22, 9.2005e-23, 1.1955e-21, 5.6114e-24, 2.1619e-21,\n 1.4569e-21, 1.4061e-25, 1.3636e-24, 4.6293e-22, 1.0594e-21, 2.4880e-21,\n 2.0891e-22, 1.9585e-25, 1.3709e-24, 8.2961e-22, 5.2891e-25, 2.1869e-21,\n 1.9941e-22, 4.4213e-25, 7.5778e-22, 2.5029e-23, 3.8243e-25, 3.9463e-22,\n 3.3743e-22, 4.6072e-22, 7.1906e-23, 5.7382e-23, 7.2133e-22, 4.1096e-23,\n 1.1278e-23, 3.8674e-22, 2.6376e-23, 9.1469e-24, 1.2944e-24, 4.1704e-25,\n 1.6243e-24, 4.2092e-22, 2.6972e-22, 5.3416e-23, 2.1779e-22, 9.3317e-23,\n 2.8169e-21, 6.0578e-24, 1.9027e-23, 2.0039e-24, 6.0153e-23, 7.1685e-24,\n 6.7296e-22, 1.1688e-23, 2.0234e-22, 2.2098e-23, 1.2144e-22, 2.6126e-25,\n 1.1385e-23, 2.2383e-21, 1.0220e-24, 5.7649e-22, 3.0973e-24, 4.3811e-22,\n 1.5147e-22, 4.2647e-21, 1.2173e-23, 3.7998e-22, 1.7013e-21, 5.3024e-25,\n 2.1113e-24, 1.8776e-23, 1.0743e-23, 2.4543e-24, 4.9867e-21, 4.1244e-22,\n 3.9102e-23, 9.7082e-23, 2.2808e-21, 7.2927e-24, 4.5043e-22, 9.3045e-23,\n 4.1904e-23, 5.4331e-23, 6.2047e-21, 4.1390e-22, 1.3445e-23, 4.9733e-22,\n 1.8865e-23, 3.7001e-23, 5.7564e-22, 9.3432e-24, 4.0139e-23, 1.2334e-21,\n 1.3122e-23, 1.0038e-23, 1.0052e-23, 4.7982e-23, 9.0897e-22, 2.6696e-22,\n 1.9668e-24, 1.9898e-21, 3.3641e-23, 5.2691e-22, 1.2967e-22, 9.1796e-24,\n 4.0286e-22, 3.8539e-21, 4.9346e-22, 2.0131e-23, 7.1791e-22, 3.8316e-22,\n 8.7267e-23, 6.5773e-24, 8.5651e-22, 5.6905e-23, 3.4202e-24, 1.3809e-22,\n 5.0174e-22, 1.4950e-22, 3.1757e-22, 7.5409e-26, 7.4951e-23, 2.9470e-24,\n 5.2008e-23, 7.6897e-25, 8.4272e-24, 1.1997e-25, 1.7459e-22, 1.3991e-22,\n 2.3124e-22, 2.9706e-26, 4.4533e-22, 3.9496e-24, 3.7611e-24, 1.9030e-23,\n 1.4108e-22, 6.8860e-22, 6.0096e-23, 8.2858e-23, 4.0753e-25, 8.8608e-25,\n 3.4987e-22, 1.6272e-25, 2.3945e-22, 5.9224e-23, 6.0721e-22, 1.7854e-24,\n 2.6910e-25, 1.2293e-25, 3.1790e-21, 5.0304e-23, 4.5315e-22, 1.0387e-21,\n 4.6591e-22, 1.3432e-23, 2.7853e-24, 2.9508e-24, 2.6432e-24, 1.0305e-21,\n 6.9563e-25, 1.7003e-22, 4.8811e-24, 3.0133e-22, 3.4660e-22, 2.2145e-23,\n 3.8574e-24, 1.0252e-24, 1.9865e-22, 5.2815e-24, 1.9253e-22, 2.8070e-22,\n 7.6435e-24, 4.3770e-22, 5.7346e-22, 2.1068e-23], device='cuda:0')" + }, + "31": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5185e-22, 1.6116e-25, 2.5090e-22, ..., 5.0511e-23, 5.5250e-25,\n 4.9011e-23],\n [4.4913e-23, 8.6262e-24, 2.2516e-24, ..., 3.3876e-23, 6.5223e-24,\n 1.7126e-24],\n [1.5144e-21, 8.9544e-23, 4.1510e-22, ..., 8.0227e-22, 6.9298e-23,\n 2.0799e-22],\n ...,\n [3.9409e-23, 7.6998e-24, 1.2861e-23, ..., 8.5966e-25, 7.6879e-25,\n 4.5609e-24],\n [3.3052e-23, 2.7015e-24, 1.0955e-23, ..., 4.7099e-24, 7.9876e-25,\n 3.8008e-24],\n [2.6381e-23, 3.6343e-25, 1.6368e-23, ..., 8.0718e-24, 7.6101e-26,\n 3.5692e-24]], device='cuda:0')" + }, + "32": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3294e-19, 5.8624e-20, 2.7012e-18, 8.3395e-21, 1.7511e-21, 2.8665e-19,\n 1.0568e-22, 2.1753e-20, 5.9139e-19, 1.3201e-19, 3.0159e-20, 4.7376e-19,\n 6.5490e-22, 9.2655e-22, 1.5895e-19, 4.1423e-19, 3.4326e-19, 8.2352e-22,\n 2.4133e-22, 4.1338e-22, 9.5899e-20, 7.0126e-19, 2.2698e-21, 2.2496e-20,\n 5.3225e-22, 8.6370e-19, 1.1615e-21, 5.8702e-20, 1.8845e-19, 3.6123e-22,\n 9.6425e-20, 4.6365e-19, 3.2572e-19, 1.0703e-20, 2.6376e-20, 6.1223e-20,\n 2.5114e-18, 3.1100e-18, 7.7703e-22, 1.1070e-19, 4.6135e-19, 4.8615e-19,\n 4.8613e-22, 7.9726e-19, 4.5925e-23, 3.3982e-21, 7.3228e-20, 1.0925e-19,\n 6.4020e-22, 6.6614e-20, 1.4153e-20, 8.4850e-21, 1.5398e-21, 8.5085e-19,\n 7.6931e-20, 1.1288e-21, 5.4212e-22, 2.1256e-21, 4.7957e-19, 4.3126e-19,\n 4.0823e-19, 2.3332e-21, 1.7825e-19, 5.5882e-20, 4.4635e-19, 1.5994e-18,\n 1.1165e-19, 7.7956e-21, 9.7692e-20, 1.2679e-20, 6.9808e-20, 1.7275e-21,\n 6.4163e-20, 5.5473e-22, 6.3216e-19, 9.3509e-20, 1.9165e-20, 4.2118e-19,\n 1.0468e-18, 2.4659e-21, 9.9558e-20, 1.6501e-19, 2.7182e-20, 3.0516e-19,\n 1.1023e-18, 2.7073e-19, 3.0198e-20, 1.4427e-18, 5.5153e-21, 5.3402e-19,\n 2.9563e-19, 4.5894e-22, 4.9639e-20, 3.3872e-19, 4.2384e-19, 1.7924e-18,\n 3.1677e-19, 1.2633e-20, 1.0180e-20, 2.3358e-20, 1.9913e-20, 7.8682e-20,\n 3.3875e-19, 9.1376e-21, 3.2035e-19, 8.8462e-22, 5.2052e-20, 5.6166e-20,\n 3.4602e-19, 2.3325e-19, 1.3828e-18, 1.0696e-19, 6.1086e-20, 2.7180e-21,\n 1.7747e-20, 1.7952e-18, 3.0966e-20, 2.6716e-20, 7.3153e-19, 3.1757e-22,\n 6.7039e-20, 3.8423e-21, 5.7550e-19, 3.7515e-18, 4.4786e-20, 1.5421e-19,\n 1.2944e-20, 5.4857e-21, 3.2029e-19, 9.2420e-22, 1.2037e-20, 2.1278e-21,\n 4.0834e-19, 4.8847e-20, 2.1944e-22, 7.2764e-21, 7.8966e-19, 2.0659e-19,\n 1.2493e-19, 3.0470e-18, 5.0140e-21, 1.9035e-21, 7.4310e-21, 1.9905e-21,\n 2.2708e-18, 3.0531e-19, 4.5841e-20, 5.4360e-19, 7.3815e-19, 6.2779e-20,\n 5.6316e-21, 5.9222e-21, 3.5906e-21, 5.1432e-19, 1.7212e-18, 1.1625e-19,\n 3.4383e-20, 2.0971e-18, 1.6033e-18, 3.1046e-22, 9.7449e-19, 2.0260e-18,\n 3.0447e-21, 1.0796e-18, 4.3705e-19, 1.5433e-19, 5.5673e-21, 5.1719e-20,\n 4.6056e-22, 2.4560e-21, 3.3933e-19, 5.7557e-23, 3.2347e-20, 4.1855e-18,\n 6.6571e-23, 5.1044e-22, 7.9096e-21, 2.5802e-22, 2.0650e-20, 2.3515e-21,\n 1.8956e-21, 9.5376e-19, 3.1804e-19, 1.2847e-18, 1.2041e-18, 1.3301e-19,\n 1.0975e-19, 6.7969e-23, 3.2899e-23, 3.0394e-19, 9.1830e-20, 1.3496e-20,\n 4.0454e-22, 3.7726e-19, 2.9495e-22, 1.9878e-19, 2.2956e-19, 2.9136e-19,\n 1.9366e-19, 4.9589e-20, 2.9867e-19, 8.2190e-20, 6.6069e-20, 1.7049e-19,\n 2.6250e-20, 7.2697e-21, 5.0095e-21, 2.6232e-21, 6.8285e-19, 2.2658e-21,\n 3.4094e-18, 5.2881e-20, 2.1931e-18, 1.2988e-21, 7.0956e-19, 1.6112e-19,\n 1.2684e-19, 3.5364e-19, 6.1655e-21, 5.3794e-20, 4.8883e-21, 9.0640e-19,\n 1.5336e-19, 2.4506e-21, 2.7706e-19, 2.4791e-21, 1.1730e-18, 3.1565e-19,\n 1.7299e-19, 4.5898e-22, 2.6313e-18, 1.1978e-20, 7.2828e-20, 2.5564e-19,\n 7.8506e-19, 2.3092e-22, 9.7199e-22, 1.5306e-19, 6.6020e-20, 9.4539e-20,\n 5.7274e-22, 1.9425e-19, 1.3642e-18, 7.0085e-20, 8.4721e-20, 4.4734e-23,\n 3.9278e-21, 1.1420e-21, 4.6775e-22, 1.9558e-21, 2.5627e-19, 1.0254e-18,\n 2.3199e-21, 4.8072e-20, 3.7969e-20, 3.0921e-20], device='cuda:0')" + }, + "33": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([9.9823e-23, 1.0185e-22, 4.8896e-21, 6.9390e-24, 2.4852e-25, 7.6779e-23,\n 4.9440e-26, 5.7625e-24, 4.4172e-22, 1.2277e-22, 1.1437e-23, 3.6330e-22,\n 3.5836e-25, 3.4094e-26, 3.8330e-23, 1.2479e-22, 2.7247e-22, 6.5225e-24,\n 1.1558e-24, 9.7870e-27, 4.5052e-23, 1.4377e-21, 1.0916e-23, 1.3491e-23,\n 9.7919e-26, 5.3369e-22, 4.7847e-24, 2.2278e-23, 8.0838e-23, 2.4562e-24,\n 2.9228e-23, 1.5052e-22, 2.1208e-22, 9.4214e-25, 2.6266e-23, 3.3202e-23,\n 4.4221e-21, 3.5388e-21, 5.4068e-25, 1.4325e-22, 2.9642e-22, 5.7805e-22,\n 5.2262e-25, 4.8776e-22, 4.4523e-25, 1.1191e-24, 2.3059e-23, 2.2171e-22,\n 2.0043e-26, 1.1029e-23, 6.6138e-24, 3.4716e-24, 1.6017e-26, 9.7848e-22,\n 1.2304e-22, 1.0516e-25, 1.0791e-25, 1.4285e-25, 6.8106e-22, 2.4065e-23,\n 5.5582e-23, 1.5821e-24, 1.9541e-22, 8.7217e-24, 2.4465e-22, 2.3330e-21,\n 1.3220e-22, 3.9372e-24, 1.0750e-22, 5.1862e-24, 1.2475e-23, 2.4331e-25,\n 1.2583e-23, 2.9869e-23, 4.1066e-22, 1.7185e-23, 2.7526e-23, 1.7270e-22,\n 2.0818e-21, 6.7670e-25, 2.4454e-23, 1.8263e-22, 2.5989e-24, 2.2461e-23,\n 7.1975e-22, 1.1984e-22, 9.6344e-24, 6.4098e-22, 2.3675e-24, 8.8900e-23,\n 7.7832e-23, 2.4957e-24, 1.4338e-23, 8.0820e-23, 3.6082e-22, 2.2031e-21,\n 1.5730e-22, 1.7940e-23, 9.0094e-24, 1.2628e-23, 3.6005e-24, 3.4777e-23,\n 2.7415e-22, 3.6183e-23, 2.0966e-22, 2.5388e-26, 5.7394e-23, 4.5327e-24,\n 1.7417e-22, 1.3724e-22, 9.7782e-22, 1.0286e-22, 9.9777e-24, 4.3446e-27,\n 1.1544e-23, 2.3327e-21, 1.5261e-23, 1.2434e-23, 2.1996e-21, 1.6697e-25,\n 2.2887e-23, 8.4245e-25, 2.7260e-22, 7.7032e-21, 3.9359e-23, 1.0282e-22,\n 3.6079e-23, 4.1309e-25, 6.7631e-22, 1.0719e-23, 3.9058e-24, 8.7448e-26,\n 3.6126e-22, 1.1711e-23, 1.2916e-25, 3.0449e-24, 3.4134e-22, 2.1556e-22,\n 8.9348e-23, 6.5826e-21, 4.8333e-23, 2.4726e-23, 2.1203e-23, 3.2218e-24,\n 4.6829e-21, 3.4542e-23, 7.9727e-23, 1.0860e-21, 1.1894e-22, 6.6429e-23,\n 1.7060e-23, 5.8565e-24, 9.6023e-24, 2.1719e-22, 8.0672e-22, 8.6084e-23,\n 2.6077e-23, 1.7872e-21, 7.3225e-22, 1.0778e-23, 3.8337e-22, 2.0171e-21,\n 2.9135e-24, 2.2376e-21, 1.3877e-22, 1.8940e-22, 5.0050e-24, 3.6043e-23,\n 1.0170e-25, 2.6904e-24, 4.3417e-22, 4.2647e-25, 2.3233e-23, 5.8881e-21,\n 5.1703e-25, 1.4459e-25, 1.5160e-23, 2.5437e-24, 7.3225e-24, 6.4412e-24,\n 4.2221e-25, 4.3813e-22, 3.2736e-22, 2.7877e-21, 1.7609e-21, 6.0987e-23,\n 5.3832e-23, 5.7304e-23, 1.1517e-23, 8.4230e-22, 4.1629e-24, 6.8888e-24,\n 1.9347e-23, 1.1878e-22, 1.7917e-23, 4.7790e-22, 8.9694e-23, 4.8243e-22,\n 1.0128e-22, 4.8058e-23, 1.5876e-22, 1.4288e-23, 1.7867e-23, 1.7701e-22,\n 5.1255e-24, 1.7711e-24, 1.5336e-24, 8.3310e-24, 8.7355e-22, 1.4758e-24,\n 5.2089e-21, 2.9603e-24, 2.9376e-21, 1.5780e-25, 1.1228e-21, 1.4468e-22,\n 1.9594e-22, 2.5153e-22, 1.8638e-24, 3.3607e-23, 1.1808e-24, 1.3411e-21,\n 1.1503e-22, 2.2618e-23, 1.3632e-22, 4.4533e-25, 2.2326e-21, 1.5056e-22,\n 4.8331e-22, 8.1771e-27, 1.1293e-20, 3.0858e-24, 2.2068e-23, 2.4155e-23,\n 4.6169e-22, 2.1023e-25, 1.7186e-26, 1.9011e-22, 1.8389e-23, 2.3347e-23,\n 2.6295e-25, 1.4612e-22, 1.0398e-21, 1.0490e-22, 1.7130e-23, 5.6623e-25,\n 6.4097e-24, 1.5630e-24, 1.7827e-23, 2.0654e-25, 3.0640e-22, 2.5039e-21,\n 5.4816e-25, 1.2187e-23, 2.4048e-23, 6.9502e-23], device='cuda:0')" + }, + "34": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.2933e-22, 7.5540e-23, 3.5979e-21, 1.0881e-23, 1.0062e-25, 3.6311e-22,\n 2.1821e-25, 1.9286e-23, 7.4640e-22, 1.6503e-22, 5.0631e-23, 6.3809e-22,\n 2.1938e-24, 3.7287e-25, 2.0324e-22, 5.4457e-22, 4.4497e-22, 3.8012e-24,\n 1.1412e-24, 9.7035e-27, 1.4812e-22, 8.9733e-22, 7.0893e-25, 2.0019e-23,\n 3.7733e-26, 1.1584e-21, 2.6226e-25, 7.5486e-23, 2.5148e-22, 2.0153e-25,\n 1.3374e-22, 6.0229e-22, 4.3328e-22, 1.6452e-23, 3.0238e-23, 7.5709e-23,\n 3.2763e-21, 4.0427e-21, 2.2877e-26, 1.5913e-22, 6.3333e-22, 6.1284e-22,\n 1.1461e-25, 1.0150e-21, 6.2991e-25, 7.1066e-24, 7.9556e-23, 1.5620e-22,\n 8.5021e-26, 9.7653e-23, 1.1117e-23, 9.1921e-24, 1.7439e-25, 1.1363e-21,\n 9.9460e-23, 9.6720e-25, 8.4873e-27, 1.2142e-24, 6.6287e-22, 5.8611e-22,\n 5.2689e-22, 4.4593e-24, 2.2969e-22, 7.1628e-23, 5.9007e-22, 2.1410e-21,\n 1.4827e-22, 3.6239e-24, 1.2818e-22, 1.2828e-23, 9.5252e-23, 4.8792e-26,\n 1.0056e-22, 2.3922e-27, 8.4289e-22, 1.1955e-22, 3.3948e-23, 5.8054e-22,\n 1.3958e-21, 5.5801e-25, 1.1153e-22, 1.9246e-22, 4.0649e-23, 3.8098e-22,\n 1.4057e-21, 3.6268e-22, 4.7238e-23, 1.8909e-21, 6.0213e-24, 7.0634e-22,\n 3.7427e-22, 3.9670e-24, 5.2717e-23, 4.4019e-22, 5.7626e-22, 2.3949e-21,\n 4.2029e-22, 2.0689e-23, 9.4887e-24, 3.6601e-23, 2.5185e-23, 1.0757e-22,\n 4.5254e-22, 1.9925e-23, 4.3531e-22, 3.9204e-27, 7.7769e-23, 8.5768e-23,\n 4.3822e-22, 3.0854e-22, 1.8302e-21, 1.4899e-22, 8.9376e-23, 1.1229e-25,\n 3.1054e-23, 2.3843e-21, 5.1568e-23, 2.5642e-23, 1.0080e-21, 3.0184e-27,\n 9.1274e-23, 2.2523e-24, 7.5183e-22, 5.0280e-21, 6.0773e-23, 2.0710e-22,\n 2.1037e-23, 6.7065e-24, 4.0604e-22, 1.1155e-25, 2.6493e-23, 1.4750e-24,\n 5.4856e-22, 6.3158e-23, 4.6296e-26, 1.6048e-23, 1.0381e-21, 2.7070e-22,\n 1.7245e-22, 4.0692e-21, 1.3239e-23, 6.6588e-24, 5.7551e-24, 3.8508e-25,\n 3.0213e-21, 4.1180e-22, 6.3912e-23, 6.7625e-22, 9.5031e-22, 8.5547e-23,\n 3.9270e-24, 9.2697e-24, 7.7376e-24, 6.8429e-22, 2.2601e-21, 1.5432e-22,\n 5.0324e-23, 2.7732e-21, 2.0876e-21, 3.3183e-24, 1.2957e-21, 2.6418e-21,\n 1.0620e-23, 1.3556e-21, 5.5411e-22, 2.1805e-22, 9.8786e-24, 6.0867e-23,\n 4.1783e-26, 5.5170e-24, 4.3001e-22, 1.3648e-25, 4.7340e-23, 5.3634e-21,\n 2.8995e-25, 1.7515e-25, 1.1689e-23, 2.6190e-25, 2.1812e-23, 3.1619e-25,\n 2.6211e-24, 1.2383e-21, 4.2899e-22, 1.7202e-21, 1.6196e-21, 1.7926e-22,\n 1.4677e-22, 1.8911e-24, 2.2621e-24, 3.6731e-22, 1.0987e-22, 1.0059e-23,\n 5.7167e-24, 4.8406e-22, 2.8728e-24, 2.8107e-22, 3.2462e-22, 3.6707e-22,\n 2.5837e-22, 7.0831e-23, 3.9542e-22, 1.0533e-22, 8.8845e-23, 2.2495e-22,\n 3.8845e-23, 1.4129e-23, 8.7921e-24, 7.0926e-24, 8.8690e-22, 3.4949e-25,\n 4.4034e-21, 6.1831e-23, 2.8123e-21, 1.9975e-24, 9.5269e-22, 2.1055e-22,\n 1.6772e-22, 4.3853e-22, 4.2904e-24, 7.7281e-23, 5.2917e-24, 1.1159e-21,\n 2.0819e-22, 8.3509e-24, 3.5668e-22, 1.7554e-24, 1.5811e-21, 4.0257e-22,\n 1.9554e-22, 7.1531e-26, 3.5539e-21, 1.8753e-23, 1.1604e-22, 3.5618e-22,\n 1.0341e-21, 2.2279e-24, 7.5577e-28, 2.1114e-22, 9.0948e-23, 1.0987e-22,\n 5.6973e-26, 2.6554e-22, 1.7876e-21, 7.5912e-23, 1.2595e-22, 1.8104e-24,\n 1.0594e-23, 2.2199e-25, 1.7047e-26, 1.3295e-24, 3.5384e-22, 1.2649e-21,\n 2.3790e-24, 5.1689e-23, 5.2719e-23, 4.5916e-23], device='cuda:0')" + }, + "35": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.1939e-26, 1.9079e-24, 1.7678e-25, ..., 4.0649e-23, 1.1735e-25,\n 2.3645e-25],\n [9.2024e-24, 7.2880e-26, 1.6533e-23, ..., 2.2853e-24, 1.9443e-25,\n 2.1157e-24],\n [2.1202e-21, 5.0942e-22, 5.5233e-22, ..., 3.6965e-22, 1.0307e-22,\n 2.3912e-22],\n ...,\n [4.6034e-23, 1.5002e-23, 1.4576e-24, ..., 2.1080e-24, 1.9784e-24,\n 1.3093e-25],\n [4.4564e-24, 3.7483e-24, 1.8224e-24, ..., 1.1100e-24, 8.6983e-25,\n 1.5792e-25],\n [3.4899e-24, 4.1549e-24, 2.3115e-24, ..., 6.3132e-24, 1.8547e-24,\n 1.1479e-24]], device='cuda:0')" + }, + "36": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.5575e-20, 1.8272e-20, 2.3649e-18, 2.0960e-21, 1.3403e-21, 4.5190e-22,\n 2.5221e-22, 1.1065e-21, 5.7170e-19, 8.6112e-22, 3.2698e-18, 1.4252e-20,\n 1.1997e-20, 1.2355e-22, 4.6100e-20, 5.7994e-20, 6.2757e-24, 4.3447e-19,\n 2.0250e-22, 1.0978e-21, 1.0843e-18, 1.6950e-20, 1.9967e-20, 2.0091e-19,\n 5.3223e-21, 1.4497e-18, 3.2791e-21, 1.8654e-21, 3.4556e-19, 2.0601e-20,\n 1.8759e-19, 5.8300e-19, 5.2324e-19, 1.4145e-23, 3.4732e-21, 5.4670e-20,\n 2.8005e-18, 4.7757e-18, 8.5174e-20, 1.1100e-19, 6.4581e-19, 6.8315e-21,\n 1.5984e-21, 4.3603e-19, 3.4691e-22, 1.3799e-20, 2.1165e-22, 6.6465e-20,\n 5.3847e-20, 2.3583e-19, 1.3737e-19, 9.1847e-20, 2.4906e-21, 8.0890e-21,\n 1.1793e-19, 2.3114e-20, 1.2011e-21, 1.0969e-19, 5.6183e-19, 1.6370e-19,\n 9.1793e-19, 3.5493e-21, 1.1206e-19, 4.5425e-20, 7.9552e-19, 7.2589e-20,\n 4.4081e-19, 7.0602e-20, 2.3422e-20, 4.6479e-19, 3.6839e-19, 4.8369e-22,\n 3.9128e-19, 1.2938e-21, 2.1310e-21, 2.0216e-20, 3.3536e-22, 5.0941e-19,\n 1.8958e-19, 6.4952e-20, 1.2168e-18, 2.3004e-21, 4.1207e-22, 4.7510e-20,\n 1.0612e-20, 2.4192e-20, 7.9249e-19, 2.8266e-21, 6.9859e-21, 2.4407e-18,\n 3.5537e-20, 7.2002e-22, 1.5822e-19, 2.8953e-20, 9.5252e-19, 1.7919e-18,\n 6.0219e-23, 3.1269e-22, 2.7740e-19, 2.2037e-20, 1.6784e-21, 8.9303e-21,\n 6.8403e-19, 9.4484e-21, 2.3758e-21, 3.4212e-21, 6.0801e-22, 1.2133e-22,\n 7.9906e-19, 3.1921e-20, 1.9955e-18, 1.4489e-19, 1.3370e-19, 3.1157e-21,\n 2.9057e-19, 2.1712e-18, 7.5177e-20, 8.9395e-21, 3.1371e-19, 4.7264e-21,\n 2.5900e-19, 1.7418e-21, 1.7503e-18, 1.0752e-19, 5.1652e-20, 5.7769e-19,\n 1.1475e-18, 4.4071e-22, 1.4948e-19, 1.1940e-18, 1.0601e-20, 6.6133e-23,\n 1.1559e-22, 8.6102e-23, 6.5665e-20, 1.1153e-19, 1.1504e-20, 6.6956e-20,\n 4.4175e-20, 2.0272e-18, 9.7177e-22, 1.8930e-19, 3.6737e-22, 5.6232e-19,\n 1.4513e-21, 3.3263e-18, 5.4228e-21, 6.0008e-20, 3.6727e-19, 3.3126e-20,\n 6.3191e-19, 4.1394e-23, 1.0004e-20, 2.5666e-19, 5.1111e-19, 8.6127e-20,\n 4.2018e-20, 7.5625e-19, 2.3225e-18, 9.1500e-19, 1.9016e-18, 1.9845e-18,\n 2.0802e-21, 1.0704e-20, 1.2929e-18, 1.7771e-20, 3.2787e-21, 1.4343e-22,\n 3.4751e-22, 2.1250e-20, 2.2747e-19, 2.9002e-21, 5.6346e-20, 3.7408e-20,\n 2.4744e-19, 1.1276e-22, 3.1098e-22, 2.6637e-23, 2.9444e-20, 6.7351e-20,\n 1.1746e-22, 5.9004e-19, 4.1624e-19, 3.7641e-19, 2.5427e-19, 4.1761e-21,\n 1.9666e-19, 2.8739e-20, 7.8465e-20, 4.6220e-21, 1.9845e-19, 2.0781e-20,\n 6.1767e-19, 3.2732e-18, 1.6302e-18, 7.9582e-20, 5.2102e-19, 1.1678e-19,\n 1.7165e-20, 1.3769e-21, 2.8530e-21, 7.0107e-19, 1.7581e-19, 8.3306e-20,\n 4.4073e-19, 1.6765e-20, 2.5928e-20, 7.6156e-21, 1.6228e-20, 5.2823e-21,\n 3.7471e-19, 3.1104e-19, 2.6905e-18, 4.7716e-20, 1.0496e-19, 4.1955e-19,\n 1.2478e-20, 1.6349e-20, 4.9996e-19, 2.3505e-22, 4.7021e-20, 7.7940e-20,\n 2.2338e-19, 1.4051e-19, 7.5930e-19, 3.1618e-20, 2.4868e-19, 2.2730e-20,\n 1.1938e-20, 8.7275e-22, 3.7424e-20, 7.5032e-20, 1.2069e-21, 6.4216e-19,\n 1.3662e-18, 2.2208e-21, 6.5889e-21, 2.1263e-21, 3.8657e-20, 1.8849e-18,\n 6.4233e-20, 1.6677e-21, 8.6915e-20, 3.2575e-20, 3.1420e-20, 5.7655e-21,\n 1.9401e-21, 4.8084e-20, 1.8175e-18, 2.3188e-22, 2.4504e-19, 5.2423e-19,\n 9.2183e-23, 1.8778e-20, 9.2195e-22, 1.0462e-20], device='cuda:0')" + }, + "37": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.6453e-23, 8.1312e-24, 2.8406e-21, 3.6342e-24, 2.4111e-25, 1.7153e-23,\n 2.8572e-25, 1.1675e-23, 8.8814e-22, 5.9941e-24, 1.9360e-21, 2.2750e-23,\n 6.4648e-24, 7.4577e-25, 3.0894e-23, 8.1780e-24, 5.1487e-24, 6.0782e-22,\n 5.3543e-25, 1.9172e-25, 2.8072e-22, 9.3482e-24, 1.7665e-24, 9.6258e-23,\n 3.1735e-24, 1.5155e-21, 1.0592e-23, 1.3226e-24, 2.4345e-22, 4.4980e-24,\n 3.5510e-22, 5.6187e-22, 7.0681e-22, 5.4063e-25, 5.0830e-25, 9.5467e-24,\n 5.3842e-21, 9.7120e-21, 8.7943e-23, 5.0110e-23, 3.8667e-22, 1.5107e-24,\n 4.7697e-24, 1.6634e-22, 2.7093e-24, 9.2205e-24, 3.9333e-23, 1.6224e-22,\n 4.0224e-23, 5.4244e-23, 3.4472e-23, 1.0284e-22, 9.1164e-24, 9.2883e-24,\n 7.1112e-23, 7.8956e-23, 1.5558e-25, 3.9185e-23, 1.3956e-21, 4.2085e-23,\n 4.4425e-22, 1.1426e-24, 6.9614e-23, 5.8021e-24, 1.4544e-21, 8.2760e-24,\n 6.8680e-22, 1.6553e-22, 1.4913e-23, 4.2504e-22, 2.9092e-22, 1.1340e-23,\n 5.2724e-22, 2.1755e-23, 1.4222e-23, 1.7319e-24, 3.7579e-25, 9.6999e-23,\n 6.0699e-23, 7.2467e-23, 1.6306e-21, 7.4756e-25, 3.6669e-24, 2.9379e-23,\n 1.3890e-23, 1.5877e-24, 7.9298e-22, 5.5844e-23, 1.8760e-24, 4.4794e-21,\n 1.2572e-23, 3.9699e-24, 5.4110e-24, 3.2954e-24, 2.4238e-21, 8.9958e-22,\n 6.4137e-24, 9.2874e-27, 7.7509e-23, 3.7391e-24, 1.0722e-24, 5.0899e-23,\n 6.1066e-22, 3.4780e-24, 2.7696e-24, 9.9308e-24, 1.8402e-24, 2.3660e-24,\n 6.7846e-22, 3.1021e-24, 4.0124e-21, 1.3114e-22, 2.4081e-23, 2.2346e-25,\n 8.9298e-23, 2.3974e-21, 3.6641e-23, 4.3252e-23, 3.2453e-22, 1.9976e-25,\n 2.1877e-22, 4.8516e-24, 1.4384e-21, 1.2528e-22, 4.1471e-23, 3.9479e-22,\n 1.9443e-22, 1.0932e-26, 3.2785e-22, 2.4832e-21, 1.8244e-23, 5.0169e-25,\n 1.1675e-23, 8.7507e-25, 1.3008e-23, 1.1430e-22, 1.3840e-23, 5.5336e-23,\n 5.0559e-23, 9.7260e-22, 3.4571e-26, 1.4675e-23, 8.7328e-24, 1.4303e-21,\n 3.0259e-23, 4.7160e-21, 1.2856e-24, 1.5864e-23, 6.9875e-23, 5.4725e-23,\n 1.3823e-22, 3.7650e-26, 6.3843e-24, 2.8110e-22, 6.7992e-23, 1.7043e-22,\n 5.8597e-24, 1.4975e-22, 1.7855e-21, 1.2734e-21, 3.5019e-21, 1.8475e-21,\n 1.5470e-25, 1.6171e-23, 4.8186e-22, 1.5048e-23, 8.4897e-24, 5.3983e-24,\n 3.6461e-25, 1.8716e-24, 6.1857e-22, 1.8000e-25, 3.3219e-23, 3.9292e-23,\n 4.5281e-22, 1.4756e-25, 4.0382e-26, 2.2381e-24, 1.0831e-24, 1.1181e-23,\n 9.2775e-25, 7.6551e-22, 3.8434e-22, 8.3363e-23, 6.9986e-23, 1.5292e-24,\n 1.9412e-22, 2.9052e-23, 2.1777e-23, 2.3714e-24, 7.1972e-23, 3.4302e-24,\n 1.7512e-22, 1.7727e-21, 2.9344e-21, 9.9659e-23, 3.9410e-22, 2.4831e-22,\n 6.5075e-24, 1.3842e-26, 8.6923e-24, 5.6719e-22, 2.2433e-22, 4.8952e-23,\n 5.2619e-22, 4.2608e-23, 8.3529e-24, 9.3436e-25, 6.2597e-24, 1.7398e-24,\n 1.0810e-22, 1.2926e-22, 3.8383e-21, 9.2284e-23, 1.1517e-23, 8.1630e-22,\n 2.5811e-24, 8.2460e-24, 4.6539e-22, 4.6263e-25, 7.8100e-24, 1.1219e-23,\n 1.2601e-22, 4.3980e-23, 1.0927e-21, 3.6481e-23, 3.5739e-23, 4.8805e-24,\n 1.8489e-24, 1.6333e-25, 2.1907e-23, 3.1487e-23, 4.5125e-24, 5.3128e-22,\n 1.0050e-21, 2.0354e-23, 8.7346e-25, 1.0402e-23, 5.1863e-23, 2.2286e-21,\n 3.3465e-23, 6.8119e-24, 7.0042e-23, 1.2353e-23, 5.4441e-24, 7.1916e-25,\n 3.6080e-24, 5.4606e-23, 3.8772e-21, 1.2395e-24, 1.4396e-22, 2.4172e-22,\n 8.2828e-26, 7.3232e-24, 9.5492e-24, 7.3966e-24], device='cuda:0')" + }, + "38": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9951e-23, 2.5739e-23, 2.8341e-21, 7.2485e-24, 6.8742e-25, 7.0195e-25,\n 4.8549e-26, 9.0857e-25, 7.9269e-22, 5.4776e-25, 4.2135e-21, 1.9003e-23,\n 1.5305e-23, 1.2000e-25, 4.6054e-23, 7.9693e-23, 3.1918e-26, 6.0559e-22,\n 1.1070e-25, 4.2766e-25, 1.4040e-21, 2.3816e-23, 2.7019e-23, 2.2027e-22,\n 1.2040e-23, 1.8944e-21, 2.1442e-24, 2.6555e-24, 4.7167e-22, 3.2687e-23,\n 2.7571e-22, 8.0541e-22, 5.9981e-22, 3.8365e-26, 2.8664e-24, 6.7479e-23,\n 3.6986e-21, 6.2128e-21, 8.6063e-23, 1.2190e-22, 8.4014e-22, 8.2521e-24,\n 5.6280e-24, 5.2772e-22, 2.9726e-24, 1.1289e-23, 6.8851e-26, 6.9325e-23,\n 7.7336e-23, 3.1876e-22, 1.6103e-22, 1.2591e-22, 7.7046e-24, 9.8177e-24,\n 1.6833e-22, 4.3586e-23, 1.7575e-24, 1.2013e-22, 6.5789e-22, 2.0727e-22,\n 1.1137e-21, 3.5598e-24, 1.6498e-22, 5.1933e-23, 9.1870e-22, 8.7260e-23,\n 5.0195e-22, 7.2551e-23, 2.7315e-23, 5.3887e-22, 4.9381e-22, 6.8093e-24,\n 5.5866e-22, 1.7288e-24, 1.5468e-24, 2.4996e-23, 1.3191e-24, 6.7293e-22,\n 2.3155e-22, 5.8970e-23, 1.5324e-21, 2.8961e-24, 1.1692e-24, 5.6082e-23,\n 1.3261e-23, 2.9144e-23, 1.0640e-21, 4.9233e-24, 8.8713e-24, 3.2025e-21,\n 4.6625e-23, 7.9804e-24, 1.9478e-22, 3.6951e-23, 1.1143e-21, 2.1693e-21,\n 2.1611e-26, 5.0352e-26, 3.3294e-22, 2.9807e-23, 2.3636e-24, 1.3407e-23,\n 8.1531e-22, 8.2422e-24, 3.4690e-24, 1.0292e-23, 5.2439e-24, 1.7065e-25,\n 9.4265e-22, 3.8988e-23, 2.6374e-21, 1.5864e-22, 1.7321e-22, 2.5996e-24,\n 3.7909e-22, 2.6316e-21, 1.0094e-22, 8.3417e-24, 3.6589e-22, 4.6285e-24,\n 3.5747e-22, 3.8506e-24, 2.1720e-21, 1.5031e-22, 6.0921e-23, 6.8191e-22,\n 1.4860e-21, 6.7047e-25, 2.4966e-22, 1.6032e-21, 3.1896e-23, 1.4149e-25,\n 6.3096e-26, 3.0346e-27, 8.9147e-23, 1.6087e-22, 1.3816e-23, 9.9488e-23,\n 4.2344e-23, 2.4495e-21, 8.4602e-25, 2.2319e-22, 1.5296e-25, 6.4837e-22,\n 1.6516e-24, 4.3453e-21, 4.7466e-24, 8.3509e-23, 4.4385e-22, 6.4069e-23,\n 7.8662e-22, 1.6625e-25, 8.3733e-24, 3.7586e-22, 6.4141e-22, 1.3614e-22,\n 4.5315e-23, 9.1532e-22, 2.8479e-21, 1.0694e-21, 2.5233e-21, 2.6045e-21,\n 1.7686e-24, 1.3986e-23, 1.5947e-21, 2.4388e-23, 1.2802e-23, 2.6193e-25,\n 2.8694e-25, 2.8823e-23, 3.4881e-22, 3.0246e-24, 6.1510e-23, 4.3493e-23,\n 3.6058e-22, 4.9038e-25, 2.0788e-25, 2.1878e-25, 3.5876e-23, 8.5458e-23,\n 4.0938e-27, 8.1035e-22, 4.9035e-22, 4.4251e-22, 3.0079e-22, 5.2372e-24,\n 2.1466e-22, 4.0470e-23, 9.5982e-23, 7.0622e-24, 2.3289e-22, 2.3227e-23,\n 7.5276e-22, 4.1889e-21, 1.9361e-21, 8.0015e-23, 6.7185e-22, 1.9204e-22,\n 2.3960e-23, 1.1608e-24, 1.9600e-24, 8.3666e-22, 1.9625e-22, 9.0885e-23,\n 5.9428e-22, 3.6590e-23, 3.6448e-23, 6.6169e-24, 2.1140e-23, 8.6402e-24,\n 4.8539e-22, 3.6045e-22, 3.5495e-21, 4.9753e-23, 1.1991e-22, 4.9615e-22,\n 1.4428e-23, 2.1073e-23, 5.8370e-22, 1.1838e-24, 4.9368e-23, 1.0050e-22,\n 2.5186e-22, 1.8500e-22, 1.0301e-21, 2.3122e-23, 3.0441e-22, 2.9484e-23,\n 1.4252e-23, 2.3943e-24, 5.0815e-23, 1.1218e-22, 1.2811e-24, 8.2565e-22,\n 1.6436e-21, 1.2215e-23, 5.5441e-24, 1.9515e-24, 6.2419e-23, 2.2978e-21,\n 7.2728e-23, 1.4463e-24, 1.0536e-22, 4.5406e-23, 4.3566e-23, 4.4535e-24,\n 8.7500e-24, 7.1568e-23, 2.4098e-21, 5.2607e-25, 2.7807e-22, 6.9888e-22,\n 2.6566e-25, 2.3147e-23, 9.3034e-24, 1.0800e-23], device='cuda:0')" + }, + "39": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n ...,\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.9670e-26, 3.8231e-24, 1.2879e-23, ..., 7.3239e-25, 2.7638e-24,\n 3.4195e-25],\n [4.6122e-26, 4.3288e-24, 1.4656e-24, ..., 3.0111e-24, 1.8568e-23,\n 6.0645e-24],\n [1.5957e-29, 1.3122e-25, 5.8648e-25, ..., 3.8632e-25, 9.5349e-27,\n 4.7554e-24],\n ...,\n [3.3252e-25, 6.4792e-22, 3.6305e-21, ..., 4.9857e-24, 1.4362e-21,\n 5.2943e-21],\n [1.5867e-24, 1.7883e-21, 1.1599e-20, ..., 5.6415e-24, 4.0833e-21,\n 1.6366e-20],\n [2.3366e-24, 2.4225e-21, 1.5677e-20, ..., 1.3377e-23, 4.7376e-21,\n 2.0169e-20]], device='cuda:0')" + }, + "40": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9823e-25, 2.7105e-23, 2.9013e-27, 2.2446e-23, 3.0366e-24, 1.7295e-23,\n 2.3387e-22, 4.1699e-23, 3.2780e-23, 7.3250e-24, 2.3509e-22, 4.3783e-23,\n 3.8695e-22, 1.2650e-23, 4.0333e-24, 7.2063e-26, 1.9562e-22, 1.8879e-23,\n 3.7357e-22, 4.6201e-23, 3.5237e-24, 1.3448e-22, 1.0446e-23, 1.2092e-22,\n 5.1455e-23, 4.0591e-23, 1.7868e-23, 5.2194e-24, 2.1972e-24, 8.8641e-24,\n 1.0736e-22, 7.0959e-23, 4.0269e-25, 2.8625e-23, 6.7717e-23, 2.1371e-23,\n 2.5941e-23, 4.2092e-23, 8.9616e-25, 3.0063e-26, 5.5715e-23, 2.0910e-24,\n 4.1719e-23, 2.2521e-24, 1.7073e-24, 8.9492e-25, 1.1640e-24, 1.9440e-23,\n 1.4828e-25, 2.7488e-24, 1.6464e-23, 9.6543e-23, 4.5989e-24, 3.6501e-24,\n 4.7810e-24, 1.0057e-23, 1.1325e-25, 1.0436e-24, 7.6274e-25, 1.7295e-23,\n 6.2380e-23, 1.8165e-23, 4.8940e-23, 3.9015e-24, 3.0202e-24, 3.0522e-22,\n 1.2882e-23, 5.1398e-24, 7.3839e-22, 7.2919e-23, 4.6874e-25, 3.9852e-22,\n 1.2084e-24, 2.9853e-22, 3.9244e-22, 1.4731e-22, 7.0021e-22, 2.8450e-22,\n 5.6954e-23, 1.2514e-22, 5.1040e-24, 8.8189e-22, 2.9205e-22, 2.6768e-22,\n 1.3364e-22, 1.2064e-22, 4.6004e-24, 2.3124e-24, 1.6438e-22, 1.0284e-22,\n 2.6696e-22, 6.6066e-23, 1.9319e-23, 1.5835e-22, 1.3249e-23, 8.4484e-24,\n 4.0017e-23, 7.5071e-23, 1.6215e-24, 4.8410e-23, 1.2152e-24, 3.7022e-25,\n 2.3639e-22, 9.2865e-23, 5.5000e-22, 1.1042e-22, 1.1910e-22, 3.3857e-22,\n 1.1713e-22, 1.5366e-22, 1.0715e-22, 5.6528e-23, 9.6661e-24, 2.0404e-22,\n 1.2722e-25, 1.5688e-22, 1.6461e-25, 1.6199e-22, 5.7208e-22, 5.2367e-22,\n 1.9574e-24, 1.3908e-22, 2.1699e-22, 4.4495e-22, 9.7177e-25, 5.0596e-24,\n 2.5128e-22, 3.2321e-23, 6.6920e-24, 2.7811e-24, 8.5571e-24, 7.1048e-25,\n 3.4757e-23, 6.8559e-24, 7.2842e-24, 7.2871e-23, 3.4743e-23, 4.3144e-24,\n 4.4632e-24, 1.0834e-23, 5.0398e-26, 3.2717e-24, 9.5118e-23, 3.7922e-23,\n 2.5928e-23, 6.4296e-23, 6.3803e-24, 3.9142e-23, 6.2162e-23, 1.3478e-23,\n 2.0705e-23, 2.0769e-23, 2.9123e-23, 1.6717e-22, 9.9855e-25, 8.6819e-23,\n 2.4665e-24, 1.7562e-23, 8.3354e-24, 7.0844e-23, 6.8158e-23, 3.4560e-23,\n 7.1135e-23, 4.2963e-24, 5.6691e-23, 2.8777e-24, 1.4140e-24, 1.2428e-23,\n 3.4032e-24, 1.9664e-23, 1.6519e-23, 2.7013e-22, 5.8226e-24, 1.7925e-23,\n 9.3496e-23, 1.8923e-22, 1.0211e-22, 1.5200e-22, 2.8942e-23, 2.8716e-22,\n 1.9360e-22, 5.9866e-24, 1.0799e-24, 4.3101e-25, 1.6183e-23, 2.6676e-23,\n 1.7168e-23, 3.5842e-23, 9.9020e-23, 1.2577e-22, 6.1393e-23, 7.7298e-23,\n 1.3736e-22, 5.3371e-23, 8.3470e-23, 1.3794e-24, 3.5868e-22, 6.2426e-23,\n 2.5386e-25, 6.8325e-23, 2.8811e-22, 1.4247e-22, 3.0733e-23, 2.2270e-22,\n 1.2253e-22, 1.7269e-23, 4.4189e-23, 2.4717e-22, 8.9410e-22, 6.8430e-23,\n 4.0105e-25, 9.8226e-23, 1.6100e-23, 5.7456e-22, 2.5378e-22, 1.1823e-22,\n 9.9195e-24, 1.2789e-22, 7.0077e-23, 5.5138e-23, 1.3750e-22, 2.1344e-22,\n 9.2955e-23, 2.8894e-23, 2.1330e-24, 5.4442e-24, 3.7442e-23, 1.9356e-24,\n 2.9966e-24, 1.9385e-24, 4.6801e-24, 1.2249e-25, 6.0242e-25, 5.1848e-23,\n 6.6723e-24, 7.2651e-24, 2.2724e-23, 1.7371e-24, 2.2733e-23, 2.3464e-23,\n 3.6993e-25, 3.3802e-24, 8.3387e-25, 7.0037e-25, 8.0780e-25, 7.1200e-23,\n 9.5621e-24, 4.9988e-24, 1.3406e-24, 6.6962e-24, 1.5465e-23, 1.6909e-26,\n 3.4357e-24, 5.6001e-23, 1.7950e-24, 2.2027e-23, 1.6737e-33, 3.5704e-34,\n 3.2114e-34, 9.8827e-33, 5.1806e-33, 3.5531e-32, 1.9250e-33, 1.4119e-34,\n 3.9341e-34, 5.2253e-32, 2.7260e-33, 4.1128e-34, 2.2837e-33, 8.3650e-35,\n 9.1681e-34, 6.6679e-33, 7.6884e-34, 7.4590e-34, 2.1874e-34, 1.3371e-32,\n 6.5718e-33, 4.1701e-33, 1.6670e-33, 1.6371e-32, 4.3880e-33, 1.7727e-33,\n 2.1661e-32, 1.6467e-33, 1.4363e-32, 7.3792e-33, 2.1765e-35, 2.6594e-32,\n 7.8726e-33, 4.3525e-32, 2.1184e-32, 5.3249e-33, 6.4091e-33, 1.0932e-32,\n 2.5548e-32, 1.3434e-33, 8.9637e-34, 3.4411e-33, 1.4972e-32, 1.6658e-32,\n 4.1598e-33, 6.0197e-36, 4.2373e-33, 1.3313e-32, 4.1820e-34, 7.3525e-33,\n 2.7028e-32, 9.3593e-34, 4.1701e-33, 1.3361e-32, 2.7873e-34, 1.3253e-34,\n 1.2036e-33, 3.5404e-32, 9.1487e-33, 1.8466e-32, 9.8272e-34, 2.3565e-33,\n 6.3042e-33, 2.2452e-35, 2.4888e-32, 1.4598e-31, 1.3158e-32, 1.4566e-31,\n 1.1603e-31, 7.2152e-32, 1.5524e-32, 1.0304e-33, 1.6339e-34, 3.4687e-32,\n 3.2024e-32, 2.2078e-32, 1.2645e-31, 2.4449e-32, 2.0891e-31, 6.2009e-32,\n 6.2641e-33, 7.4804e-32, 1.3981e-33, 1.7368e-32, 7.3010e-32, 1.2632e-31,\n 1.2267e-33, 2.9785e-36, 2.6608e-32, 3.5595e-34, 7.5417e-34, 3.6482e-33,\n 1.9578e-32, 1.7310e-33, 2.5169e-33, 1.0716e-34, 1.4255e-32, 7.9432e-33,\n 9.4523e-33, 1.6155e-31, 2.4249e-32, 1.3766e-32, 1.7406e-33, 2.0608e-32,\n 1.8125e-31, 1.8502e-31, 1.5814e-33, 4.4993e-33, 9.5718e-33, 3.8459e-33,\n 1.0740e-32, 2.9676e-33, 3.0930e-32, 4.1259e-32, 5.5056e-34, 1.4135e-31,\n 7.5903e-32, 9.3647e-32, 1.4130e-33, 1.5266e-32, 9.4098e-33, 6.2626e-32,\n 1.1753e-32, 2.8609e-32, 2.7671e-33, 1.1783e-34, 1.2681e-33, 8.5752e-34,\n 7.5580e-34, 7.0854e-33, 3.3619e-33, 4.8166e-34, 1.9855e-32, 1.6927e-32,\n 3.2180e-34, 1.3969e-33, 3.3922e-32, 3.1037e-34, 1.6343e-33, 4.7208e-32,\n 2.3346e-32, 9.3224e-33, 9.1724e-33, 1.1801e-33, 2.1533e-32, 1.9855e-32,\n 1.8332e-33, 8.0880e-33, 1.2617e-33, 1.0128e-32, 4.0402e-32, 4.9266e-33,\n 1.2066e-34, 5.6682e-33, 3.2599e-32, 3.0513e-32, 3.3058e-36, 2.3649e-32,\n 1.7721e-33, 5.6852e-33, 4.1968e-32, 1.7425e-33, 1.6297e-34, 2.3301e-33,\n 2.7507e-33, 8.0460e-34, 7.0701e-33, 1.3566e-33, 8.2487e-33, 5.1279e-33,\n 2.1068e-34, 3.9098e-32, 4.6253e-33, 9.4353e-33, 1.5874e-32, 3.4190e-32,\n 8.2942e-33, 8.2510e-33, 1.3616e-32, 1.2105e-31, 1.3239e-31, 3.2730e-33,\n 1.6644e-32, 6.8869e-33, 1.4505e-33, 4.5233e-32, 2.3468e-32, 2.2326e-32,\n 2.9113e-33, 1.0131e-33, 7.5266e-35, 9.8350e-33, 2.3471e-32, 5.7410e-32,\n 1.5471e-31, 3.9703e-32, 5.0142e-34, 1.6638e-32, 6.7489e-32, 1.0923e-33,\n 8.2275e-33, 2.1963e-32, 3.9393e-33, 3.1944e-34, 3.9591e-32, 8.8446e-33,\n 2.6218e-32, 7.7922e-32, 6.1588e-33, 2.6762e-33, 3.1963e-32, 1.4019e-32,\n 3.6117e-32, 3.4125e-34, 6.6303e-34, 3.5430e-33, 4.5915e-32, 5.1408e-32,\n 4.3973e-32, 1.6789e-33, 3.9815e-32, 1.8610e-33, 5.8746e-33, 5.2899e-33,\n 1.8611e-33, 1.3509e-34, 1.9638e-32, 2.1589e-33, 3.1155e-32, 1.2176e-34,\n 1.3975e-33, 3.9412e-32, 1.4878e-33, 1.8004e-34, 3.2990e-32, 3.7739e-32,\n 7.3891e-35, 4.6546e-34, 1.3378e-33, 1.9053e-32, 5.5726e-33, 1.9623e-32,\n 3.6594e-32, 4.8963e-32, 7.3241e-34, 2.4099e-33, 3.5455e-33, 7.0318e-33,\n 1.1945e-32, 1.4720e-33, 1.8677e-34, 8.2900e-34, 2.3051e-33, 5.2374e-34,\n 2.3826e-32, 2.7424e-33, 3.2610e-21, 3.1630e-19, 1.3120e-19, 5.1104e-20,\n 1.8478e-20, 2.0318e-19, 2.4964e-19, 1.8015e-19, 7.6026e-20, 1.0947e-21,\n 1.8611e-21, 4.1057e-21, 5.7362e-20, 7.3112e-21, 5.6151e-20, 2.0545e-20,\n 9.7224e-20, 1.6067e-23, 7.1629e-20, 1.0059e-20, 2.7668e-20, 7.0878e-21,\n 2.3664e-20, 1.4004e-19, 2.2284e-19, 2.9727e-20, 4.4127e-20, 5.2405e-20,\n 1.2111e-19, 7.7991e-21, 1.2540e-20, 1.4413e-22, 7.3444e-21, 2.3453e-21,\n 2.0183e-20, 2.5411e-19, 5.3052e-22, 4.4799e-20, 6.6058e-21, 2.9457e-20,\n 6.0917e-20, 5.7082e-19, 2.4237e-20, 8.4452e-21, 5.7260e-20, 1.8042e-22,\n 7.3057e-21, 3.9638e-20, 1.1066e-19, 9.9676e-22, 7.1497e-20, 8.4005e-21,\n 3.4641e-20, 4.8476e-20, 9.9200e-21, 1.7325e-19, 1.9529e-20, 6.9173e-20,\n 2.7513e-20, 5.6800e-21, 3.0734e-20, 2.1001e-19, 3.8753e-21, 8.8921e-20,\n 1.0193e-19, 9.9929e-21, 6.3331e-20, 5.9690e-21, 1.5269e-21, 1.7401e-20,\n 4.1437e-20, 1.9401e-20, 6.6391e-20, 1.9845e-20, 1.4911e-21, 2.0816e-20,\n 1.3484e-21, 8.0654e-23, 1.4622e-19, 1.9054e-19, 1.0384e-19, 2.5985e-20,\n 4.9172e-19, 6.9095e-20, 5.9761e-21, 2.3371e-20, 1.0839e-19, 1.2573e-20,\n 8.7853e-20, 3.9407e-19, 5.5356e-20, 2.0034e-19, 2.0496e-20, 1.4885e-19,\n 7.8123e-20, 1.7615e-20, 5.8012e-21, 2.9118e-22, 1.9288e-21, 2.0241e-20,\n 8.0811e-21, 1.1358e-19, 8.4944e-21, 1.3664e-19, 1.3525e-22, 6.5937e-21,\n 2.7097e-19, 4.4143e-20, 2.6022e-21, 1.7238e-20, 2.5243e-19, 5.6019e-21,\n 1.1486e-19, 1.7289e-21, 1.0644e-20, 8.2586e-21, 2.2518e-21, 1.7303e-19,\n 7.1689e-20, 1.1194e-19, 4.8095e-21, 1.4487e-19, 8.9808e-21, 1.5761e-20,\n 1.1474e-20, 2.4656e-19, 5.7951e-20, 4.0961e-20, 2.1919e-20, 2.2648e-20,\n 1.1472e-19, 9.6403e-20, 3.9094e-20, 5.5613e-20, 4.1928e-20, 6.8394e-22,\n 4.2995e-21, 1.2118e-20, 5.8280e-21, 1.7977e-22, 2.7227e-20, 1.6665e-22,\n 1.4094e-19, 2.5257e-22, 1.9001e-20, 1.0897e-20, 6.9233e-20, 1.1045e-21,\n 6.5265e-20, 1.5054e-20, 6.7024e-21, 9.5161e-20, 4.2101e-20, 4.4006e-22,\n 1.2201e-20, 1.8899e-20, 1.3545e-19, 3.9067e-22, 6.8597e-21, 4.2047e-21,\n 8.6984e-21, 1.5052e-19, 8.4448e-21, 2.5757e-20, 1.5937e-22, 2.7601e-21,\n 9.1635e-20, 1.0362e-19, 5.2753e-20, 3.5621e-21, 6.4204e-20, 1.7487e-19,\n 1.3493e-20, 5.7729e-21, 1.8485e-20, 1.6414e-21, 4.1218e-22, 2.7262e-20,\n 4.8729e-20, 1.8929e-20, 4.0162e-20, 6.7238e-20, 2.5080e-21, 1.8411e-21,\n 7.0465e-20, 1.4848e-19, 1.1379e-19, 3.8911e-20, 1.0481e-19, 1.7892e-22,\n 5.3691e-20, 2.5146e-22, 3.7346e-20, 2.5288e-19, 3.6656e-21, 1.4194e-20,\n 2.3712e-22, 8.2812e-20, 7.1615e-20, 5.1989e-20, 5.4278e-21, 1.4172e-19,\n 1.4164e-19, 5.9310e-21, 9.9729e-20, 8.1058e-20, 1.9989e-20, 1.2122e-20,\n 2.5921e-20, 3.5162e-20, 4.6244e-20, 3.0558e-21, 4.4758e-22, 4.0083e-21,\n 6.2635e-21, 5.6728e-20, 7.0417e-20, 5.4618e-20, 8.5967e-20, 7.8143e-21,\n 1.1255e-20, 1.1992e-19, 7.1846e-20, 1.2585e-22, 2.5222e-20, 4.2781e-20,\n 2.4597e-20, 2.1268e-19, 9.3690e-21, 2.2479e-21, 5.9061e-20, 6.1939e-22,\n 3.6052e-20, 1.2306e-20, 1.7861e-20, 2.1979e-21, 3.8451e-20, 3.1765e-20,\n 1.0829e-20, 5.4689e-23, 4.6983e-21, 1.0057e-19, 1.1476e-20, 8.9680e-20,\n 1.7522e-20, 3.0820e-20, 1.4680e-19, 2.4976e-21, 2.1273e-22, 1.0557e-19,\n 6.4347e-22, 1.2743e-20, 4.3068e-21, 7.2939e-21, 2.3291e-20, 2.9159e-20],\n device='cuda:0')" + }, + "41": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3133e-20, 3.7611e-22, 2.5851e-20, ..., 1.1410e-23, 1.1436e-20,\n 5.7221e-21],\n [4.6513e-21, 1.9740e-22, 9.9501e-21, ..., 5.6458e-24, 4.9785e-21,\n 2.1203e-21],\n [7.9673e-21, 5.0177e-22, 1.4044e-20, ..., 1.3197e-23, 6.7338e-21,\n 3.1230e-21],\n ...,\n [1.3708e-20, 3.0107e-22, 3.0171e-20, ..., 1.6866e-23, 1.6979e-20,\n 6.3997e-21],\n [4.1791e-23, 1.9799e-22, 9.0394e-23, ..., 1.6483e-23, 2.8739e-23,\n 6.2357e-23],\n [2.0552e-22, 1.6293e-22, 7.1649e-22, ..., 1.3492e-23, 3.7973e-22,\n 1.7656e-22]], device='cuda:0')" + }, + "42": { + "step": "tensor(30032.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.0069e-19, 8.0928e-20, 1.1254e-19, 2.1407e-20, 1.1044e-19, 9.0028e-20,\n 2.2122e-19, 8.2284e-20, 1.2590e-19, 7.1496e-20, 6.8300e-20, 7.6924e-20,\n 4.7882e-19, 6.1816e-20, 2.1120e-19, 3.4554e-19, 2.3216e-20, 5.8687e-20,\n 1.2343e-20, 2.4849e-19, 3.0600e-21, 5.2218e-20, 3.0532e-20, 1.0969e-20,\n 3.2719e-20, 6.2750e-19, 7.1784e-20, 8.3207e-19, 2.8959e-19, 2.7904e-20,\n 7.6720e-19, 1.6187e-19, 9.9010e-20, 4.8010e-19, 4.3686e-21, 1.3035e-18,\n 2.5626e-19, 1.7477e-19, 4.7616e-20, 3.0850e-20, 5.2967e-22, 6.8691e-20,\n 1.0870e-20, 2.1126e-19, 1.4702e-20, 2.0112e-19, 1.2669e-19, 6.1190e-20,\n 5.8494e-19, 9.2015e-20, 5.9354e-20, 2.5479e-19, 2.8818e-19, 4.7664e-19,\n 2.2496e-19, 5.4642e-22, 2.4078e-20, 3.0347e-20, 5.1005e-21, 1.6393e-20,\n 1.7737e-19, 2.0185e-19, 2.7054e-20, 1.7550e-20, 4.9546e-19, 7.0331e-20,\n 1.3601e-19, 1.8430e-19, 4.2689e-19, 3.4539e-19, 7.8437e-21, 1.4971e-20,\n 1.1655e-19, 2.1947e-19, 8.4504e-20, 1.7794e-20, 6.5113e-20, 7.2240e-20,\n 4.2921e-20, 8.4666e-20, 1.9920e-19, 1.7178e-20, 2.6339e-19, 1.9933e-20,\n 1.6169e-19, 4.2296e-19, 6.4232e-19, 5.3039e-20, 6.6585e-21, 7.6066e-21,\n 4.0767e-19, 4.0355e-19, 4.4385e-19, 1.1466e-19, 5.1988e-19, 8.9073e-20,\n 4.7622e-20, 4.6366e-19, 4.7006e-21, 4.0864e-22, 2.4870e-19, 2.1546e-20,\n 2.4960e-19, 2.7796e-19, 1.1642e-21, 2.3518e-22, 1.9885e-19, 5.8232e-20,\n 1.1946e-19, 1.6688e-20, 6.1591e-20, 2.3512e-19, 2.2530e-19, 3.8837e-19,\n 5.9288e-20, 1.9127e-19, 2.9319e-19, 7.0575e-20, 2.2699e-20, 1.4914e-19,\n 8.8437e-20, 3.9677e-21, 1.0373e-19, 4.8280e-20, 4.3321e-19, 1.4426e-19,\n 1.4239e-19, 2.0509e-19, 1.2401e-19, 2.5792e-22, 1.9889e-21, 4.3852e-19,\n 4.3744e-19, 4.0426e-19, 1.9463e-20, 2.6113e-20, 1.0820e-20, 7.1359e-20,\n 1.2329e-19, 4.5580e-21, 3.2414e-19, 6.9464e-19, 2.4894e-20, 5.8169e-19,\n 1.0998e-19, 2.5038e-19, 1.4816e-19, 4.3750e-20, 7.9333e-19, 1.3830e-19,\n 6.3622e-21, 4.2792e-19, 9.2831e-21, 3.8528e-19, 3.3330e-19, 1.3410e-19,\n 7.6868e-20, 2.2022e-20, 5.9872e-20, 1.1297e-19, 3.6169e-19, 1.0914e-20,\n 3.9312e-19, 5.0400e-19, 5.6032e-20, 4.1043e-21, 3.4047e-20, 2.7288e-22,\n 5.1238e-20, 1.1079e-19, 3.0976e-19, 2.2878e-20, 7.3530e-20, 2.5684e-20,\n 2.4410e-19, 8.4879e-21, 2.8234e-19, 2.9152e-21, 5.2316e-19, 1.8227e-19,\n 1.6747e-20, 1.5994e-20, 1.7604e-19, 5.4286e-19, 4.0209e-20, 6.5194e-21,\n 3.4241e-22, 1.1717e-19, 2.7513e-19, 2.2113e-20, 2.4132e-21, 2.7130e-20,\n 9.4094e-19, 4.4143e-19, 2.5863e-19, 4.0827e-19, 2.4762e-22, 3.4625e-19,\n 1.6423e-20, 5.4020e-19, 6.9828e-21, 1.1548e-19, 4.0071e-19, 1.0129e-18,\n 2.1100e-19, 2.2205e-20, 2.0494e-19, 5.8843e-20, 2.3424e-20, 5.3304e-20,\n 3.2397e-21, 6.9169e-21, 1.5943e-20, 3.2645e-20, 8.2265e-20, 3.3292e-19,\n 5.6103e-20, 3.6441e-19, 1.8790e-19, 4.9300e-20, 2.5862e-19, 1.4727e-20,\n 2.7309e-19, 7.8558e-22, 4.8110e-20, 8.4144e-19, 1.2917e-19, 5.5049e-21,\n 1.8468e-19, 3.3283e-19, 6.6688e-20, 5.2711e-21, 8.8000e-20, 5.0599e-20,\n 3.1509e-19, 3.1435e-19, 2.4581e-19, 7.5665e-20, 2.3872e-20, 5.1224e-19,\n 3.0695e-20, 1.6006e-19, 1.4934e-19, 2.8861e-19, 2.7043e-19, 2.3569e-20,\n 8.7359e-19, 3.5336e-22, 2.2354e-20, 1.0385e-20, 1.1986e-20, 1.6853e-20,\n 9.0157e-20, 2.5167e-19, 4.8195e-22, 6.0052e-21], device='cuda:0')" + }, + "43": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.7556e-20, 4.3553e-20, 5.7089e-20, ..., 1.1858e-19, 2.5608e-19,\n 1.3105e-19],\n [3.7305e-21, 2.8779e-21, 3.1840e-21, ..., 7.2590e-21, 1.4640e-20,\n 7.6580e-21],\n [3.7735e-21, 2.8453e-21, 4.0264e-21, ..., 7.9818e-21, 1.7884e-20,\n 9.1167e-21],\n [2.7194e-21, 2.0266e-21, 2.7327e-21, ..., 5.7221e-21, 1.2093e-20,\n 6.1661e-21],\n [4.2787e-21, 3.2613e-21, 4.4669e-21, ..., 8.9128e-21, 1.9989e-20,\n 1.0134e-20]], device='cuda:0')" + }, + "44": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([3.2998e-17, 2.0396e-18, 2.1910e-18, 1.6056e-18, 2.4614e-18],\n device='cuda:0')" + }, + "45": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.7523e-20, 4.3479e-20, 5.7079e-20, ..., 1.1851e-19, 2.5606e-19,\n 1.3100e-19],\n [3.7276e-21, 2.8714e-21, 3.1831e-21, ..., 7.2531e-21, 1.4639e-20,\n 7.6535e-21],\n [3.7703e-21, 2.8379e-21, 4.0254e-21, ..., 7.9749e-21, 1.7882e-20,\n 9.1115e-21],\n [2.7186e-21, 2.0248e-21, 2.7324e-21, ..., 5.7204e-21, 1.2092e-20,\n 6.1648e-21],\n [4.2770e-21, 3.2573e-21, 4.4663e-21, ..., 8.9090e-21, 1.9988e-20,\n 1.0131e-20]], device='cuda:0')" + }, + "46": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([3.2997e-17, 2.0396e-18, 2.1910e-18, 1.6056e-18, 2.4614e-18],\n device='cuda:0')" + }, + "47": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.7523e-20, 4.3479e-20, 5.7079e-20, ..., 1.1851e-19, 2.5606e-19,\n 1.3100e-19],\n [3.7276e-21, 2.8714e-21, 3.1831e-21, ..., 7.2531e-21, 1.4639e-20,\n 7.6535e-21],\n [3.7703e-21, 2.8379e-21, 4.0254e-21, ..., 7.9749e-21, 1.7882e-20,\n 9.1115e-21],\n [2.7186e-21, 2.0248e-21, 2.7324e-21, ..., 5.7204e-21, 1.2092e-20,\n 6.1648e-21],\n [4.2770e-21, 3.2573e-21, 4.4663e-21, ..., 8.9090e-21, 1.9988e-20,\n 1.0131e-20]], device='cuda:0')" + }, + "48": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([3.2997e-17, 2.0396e-18, 2.1910e-18, 1.6056e-18, 2.4614e-18],\n device='cuda:0')" + }, + "6": { + "step": "tensor(26278.)", + "exp_avg": "tensor([[ 1.0108e-05, 5.2160e-06, 3.1880e-07, ..., 8.8146e-07,\n -5.2219e-06, -1.2114e-05],\n [-1.2103e-07, 8.9993e-06, -6.2633e-07, ..., 2.6394e-06,\n -9.1874e-07, -4.5229e-06],\n [ 5.5225e-06, 1.4815e-06, -2.2806e-06, ..., 3.7808e-06,\n -1.2730e-06, 3.7159e-07],\n ...,\n [-2.3047e-07, 5.0437e-06, -1.8130e-06, ..., 2.9910e-06,\n -1.2031e-06, -2.1820e-06],\n [ 1.1463e-06, 5.5917e-06, 1.8434e-06, ..., 1.5357e-06,\n -1.2164e-06, 1.0239e-06],\n [-3.6810e-07, -1.9909e-06, -2.1953e-06, ..., -5.0264e-07,\n 2.9877e-06, 3.9246e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9462e-10, 1.3950e-09, 3.2846e-10, ..., 1.3804e-10, 9.1588e-11,\n 1.2309e-10],\n [1.1124e-09, 1.2392e-09, 5.6931e-10, ..., 3.9139e-10, 1.9904e-10,\n 4.3604e-10],\n [7.4550e-11, 6.2686e-11, 4.5122e-11, ..., 4.6881e-10, 5.9127e-11,\n 7.3016e-11],\n ...,\n [9.4128e-11, 6.0456e-10, 1.8005e-10, ..., 6.0673e-11, 5.5595e-11,\n 4.4850e-11],\n [2.2296e-10, 9.7525e-10, 2.4970e-10, ..., 4.5039e-10, 1.2051e-10,\n 1.3829e-10],\n [1.5860e-10, 2.4397e-10, 1.2511e-10, ..., 6.2624e-10, 1.3403e-10,\n 1.7363e-10]], device='cuda:0')" + }, + "7": { + "step": "tensor(26278.)", + "exp_avg": "tensor([-2.1883e-04, 4.2165e-05, 1.0653e-04, ..., -6.7810e-05,\n 8.4017e-05, 5.5078e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.1733e-07, 3.4900e-07, 1.1711e-07, ..., 6.4956e-08, 1.8902e-07,\n 2.3370e-07], device='cuda:0')" + }, + "8": { + "step": "tensor(26278.)", + "exp_avg": "tensor([[ 5.0761e-08, -2.6830e-08, 3.1207e-07, ..., 3.2752e-07,\n -4.3330e-08, -1.7174e-07],\n [ 5.8610e-07, -3.4468e-07, -4.7010e-07, ..., -2.3026e-07,\n 1.3091e-06, -6.3706e-07],\n [ 9.2108e-07, 9.7140e-08, -3.3638e-07, ..., -1.1359e-08,\n -6.2418e-07, 1.4884e-06],\n ...,\n [-1.4709e-06, 3.4984e-07, -1.3759e-07, ..., -7.9107e-08,\n 2.6313e-06, -3.0071e-07],\n [ 5.4704e-07, -4.5639e-07, 6.3422e-07, ..., -2.1705e-07,\n 3.1137e-07, 8.6171e-07],\n [ 3.1071e-07, -1.1615e-07, 9.3601e-08, ..., 9.3046e-07,\n 4.9762e-07, 9.4355e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7105e-12, 1.8722e-12, 2.3547e-12, ..., 2.4608e-12, 3.5613e-12,\n 9.6395e-12],\n [4.9235e-12, 2.2658e-12, 2.7337e-12, ..., 4.2249e-12, 6.1203e-12,\n 1.0156e-11],\n [9.2423e-12, 4.1321e-12, 5.3066e-12, ..., 4.2400e-12, 8.2805e-12,\n 1.2468e-11],\n ...,\n [9.4024e-12, 4.0508e-12, 3.5402e-12, ..., 6.7694e-12, 1.0634e-11,\n 1.1639e-11],\n [5.5871e-12, 3.7781e-12, 3.9502e-12, ..., 5.3903e-12, 8.4683e-12,\n 1.4751e-11],\n [5.3075e-12, 4.1199e-12, 3.4743e-12, ..., 4.0161e-12, 9.0629e-12,\n 1.6330e-11]], device='cuda:0')" + }, + "9": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 8.4708e-07, 3.3128e-06, 5.0090e-06, ..., 1.7089e-06,\n 1.8813e-06, -1.0913e-06],\n [ 1.7053e-06, -1.4005e-06, -7.0137e-06, ..., 1.1033e-06,\n -1.8878e-06, -6.1049e-07],\n [ 8.0151e-08, -4.1911e-06, -1.8658e-06, ..., 2.0987e-06,\n -1.7232e-06, 1.9671e-06],\n ...,\n [-4.9685e-06, -9.5589e-06, 2.1790e-06, ..., -5.7175e-06,\n -3.8654e-06, -3.4079e-06],\n [-9.2276e-07, -9.5096e-06, 6.3277e-06, ..., -7.6715e-07,\n 2.9900e-06, 3.4643e-07],\n [ 4.6597e-08, -1.4149e-07, -1.2942e-07, ..., 2.6913e-07,\n 4.9392e-08, 1.0139e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3142e-10, 1.1762e-09, 5.2103e-10, ..., 1.4099e-10, 6.4547e-11,\n 8.3010e-11],\n [2.5607e-09, 2.3538e-09, 8.0960e-10, ..., 5.1431e-10, 6.1891e-10,\n 5.3274e-10],\n [2.2478e-10, 2.5330e-10, 8.3083e-11, ..., 2.1914e-10, 8.8845e-11,\n 7.6695e-11],\n ...,\n [1.2683e-10, 3.7503e-10, 1.6119e-10, ..., 3.5339e-10, 8.1560e-11,\n 5.6869e-11],\n [7.9688e-11, 4.2394e-10, 3.9410e-10, ..., 1.0397e-10, 1.4328e-10,\n 9.5468e-11],\n [4.7026e-12, 1.1955e-11, 4.8804e-12, ..., 4.5336e-12, 3.7001e-12,\n 6.7327e-12]], device='cuda:0')" + }, + "10": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 2.5781e-05, -1.8436e-04, 3.2953e-05, ..., -1.8008e-04,\n -8.7386e-05, -1.2162e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.0679e-07, 6.8696e-07, 1.5290e-07, ..., 1.3440e-07, 9.5988e-08,\n 6.5500e-09], device='cuda:0')" + }, + "11": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-2.1756e-07, 1.3850e-07, -3.6318e-07, ..., 6.8486e-08,\n 3.2838e-07, 3.0914e-08],\n [-2.2627e-07, -9.6406e-07, -1.6263e-07, ..., 9.6899e-08,\n -9.1113e-08, 9.8679e-08],\n [ 1.1231e-07, 8.3263e-07, 1.4422e-07, ..., -6.9373e-07,\n 4.6793e-07, 1.0344e-07],\n ...,\n [-1.2039e-07, 4.2140e-07, -1.1561e-07, ..., -4.6256e-08,\n 6.1612e-07, 4.7108e-08],\n [ 4.1552e-07, 1.0233e-06, -6.7193e-07, ..., -7.0796e-08,\n -1.7949e-06, -2.3367e-09],\n [-7.4346e-07, 3.6132e-07, -3.8054e-07, ..., -2.4294e-07,\n -5.3384e-07, 3.7541e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.6234e-13, 8.4730e-12, 1.6188e-12, ..., 9.6966e-13, 1.3400e-12,\n 1.3942e-13],\n [1.9530e-12, 1.8483e-11, 1.4601e-12, ..., 2.3044e-12, 9.5414e-13,\n 3.4331e-13],\n [2.5098e-12, 1.5167e-11, 2.1613e-12, ..., 1.2038e-12, 7.4424e-13,\n 4.7366e-13],\n ...,\n [1.7878e-12, 1.7904e-11, 1.5893e-12, ..., 1.5196e-12, 6.4476e-12,\n 2.4404e-13],\n [2.5477e-12, 1.2755e-11, 7.5242e-12, ..., 2.1337e-12, 6.0982e-12,\n 2.1012e-13],\n [1.8573e-12, 2.6721e-11, 3.7031e-12, ..., 1.1972e-12, 3.0911e-12,\n 5.5711e-13]], device='cuda:0')" + }, + "12": { + "step": "tensor(18770.)", + "exp_avg": "tensor([[-6.7010e-07, -3.4995e-08, 1.0237e-06, ..., 4.1586e-07,\n 4.3761e-07, -5.4444e-07],\n [ 1.2324e-06, -3.4212e-07, 5.2078e-07, ..., 6.5440e-08,\n -1.0772e-06, -4.3331e-07],\n [-1.0162e-05, 2.2741e-05, 3.8723e-06, ..., -1.6707e-05,\n 1.5655e-07, -6.7539e-06],\n ...,\n [ 1.0879e-05, -9.4347e-06, 2.6170e-06, ..., 1.7440e-06,\n 3.1817e-06, 7.0785e-07],\n [-7.6489e-07, 1.5480e-05, -5.6368e-07, ..., 2.7106e-07,\n 2.6182e-06, 2.5406e-06],\n [ 4.6703e-06, -8.5334e-06, -3.2652e-06, ..., 1.6533e-06,\n 2.1186e-06, 1.6099e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.9739e-12, 7.6445e-12, 4.5193e-12, ..., 1.0746e-11, 3.8911e-12,\n 3.0293e-12],\n [7.4811e-11, 1.6118e-10, 2.7705e-11, ..., 1.8590e-10, 2.6656e-11,\n 2.2517e-11],\n [7.3668e-09, 3.2518e-09, 1.7870e-09, ..., 2.2397e-09, 1.0711e-09,\n 1.3612e-09],\n ...,\n [2.3598e-09, 1.5373e-09, 1.8669e-10, ..., 4.2885e-10, 4.0252e-10,\n 2.5151e-10],\n [4.1840e-10, 1.3765e-09, 5.3079e-10, ..., 1.3863e-10, 1.0834e-10,\n 1.0715e-10],\n [2.8942e-10, 7.7109e-10, 2.8934e-10, ..., 1.3008e-10, 1.1241e-10,\n 1.0848e-10]], device='cuda:0')" + }, + "13": { + "step": "tensor(18770.)", + "exp_avg": "tensor([-1.1603e-05, -1.2557e-05, -1.5749e-04, ..., 9.4033e-05,\n 2.0744e-05, 7.7921e-05], device='cuda:0')", + "exp_avg_sq": "tensor([8.3402e-09, 6.4263e-08, 1.5771e-06, ..., 4.3503e-07, 1.4827e-07,\n 1.6352e-07], device='cuda:0')" + }, + "14": { + "step": "tensor(18770.)", + "exp_avg": "tensor([[-1.8362e-08, -2.9194e-07, 4.1247e-07, ..., 3.8050e-07,\n 4.6080e-07, -3.3862e-07],\n [ 5.2519e-08, -4.0178e-07, -1.9896e-06, ..., 1.1194e-07,\n 1.5435e-07, -1.1789e-06],\n [-8.7620e-08, 1.0300e-07, -1.6191e-06, ..., 2.5995e-07,\n 1.4808e-07, 1.4106e-06],\n ...,\n [ 5.7146e-08, -1.2996e-07, 9.3550e-07, ..., 3.0392e-07,\n 1.5655e-07, 1.7764e-06],\n [-1.3914e-08, -8.8252e-08, -1.0394e-06, ..., 1.1590e-07,\n 2.3128e-07, 1.0150e-06],\n [-7.1192e-09, -2.6868e-07, 1.9067e-06, ..., -4.2038e-07,\n -1.9725e-07, 2.8490e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.6216e-14, 1.2771e-12, 1.9609e-12, ..., 4.4857e-12, 9.8085e-13,\n 2.1195e-12],\n [1.9371e-13, 1.4795e-12, 9.2724e-12, ..., 3.9260e-12, 1.7307e-12,\n 5.3300e-12],\n [1.7257e-13, 1.6866e-12, 8.3073e-12, ..., 3.6734e-12, 1.2447e-12,\n 6.1768e-12],\n ...,\n [2.7825e-13, 1.0276e-12, 8.2121e-12, ..., 5.4478e-12, 1.3013e-12,\n 5.2394e-12],\n [2.0956e-13, 1.2385e-12, 1.8219e-11, ..., 9.6094e-12, 1.5096e-12,\n 3.7212e-12],\n [2.2328e-13, 6.7638e-13, 1.2262e-11, ..., 1.9239e-12, 1.8828e-12,\n 1.1406e-11]], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.00024569294678237997, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1, + 2 + ] + }, + { + "lr": 0.00024569294678237997, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 3, + 4, + 5 + ] + }, + { + "lr": 0.00024569294678237997, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 6, + 7, + 8 + ] + }, + { + "lr": 0.00024569294678237997, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 9, + 10, + 11 + ] + }, + { + "lr": 0.00024569294678237997, + "name": "scale_1280", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 12, + 13, + 14 + ] + }, + { + "lr": 0.00012333423752026375, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 9, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 9, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.00024569294678237997, + 0.00024569294678237997, + 0.00024569294678237997, + 0.00024569294678237997, + 0.00024569294678237997, + 0.00012333423752026375 + ] + }, + "metrics": { + "best_val_acc": 66.74266666666666, + "best_epoch": 8, + "scale_accuracies": { + "256": 66.74266666666666, + "512": 72.66333333333333, + "768": 74.31466666666667, + "1024": 74.97066666666667, + "1280": 75.24 + }, + "training_history": { + "epochs": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "train_loss": [ + 2.9751985085156605, + 2.963648736889717, + 3.1708614105992834, + 3.378361599522826, + 3.580668337115215, + 3.3858161155505577, + 3.3002336309130165, + 3.234047183662764, + 3.18145428309748 + ], + "train_acc": [ + 56.42811072509152, + 61.669816138983705, + 62.79436378447671, + 63.576593319476174, + 64.19873443508926, + 64.70800970261227, + 65.18647972252381, + 65.5778156425613, + 65.91854665837215 + ], + "val_acc": [ + 62.524, + 64.036, + 64.86466666666666, + 65.18733333333333, + 65.698, + 66.138, + 66.39066666666666, + 66.56066666666666, + 66.74266666666666 + ], + "scale_accs": { + "256": [ + 62.524, + 64.036, + 64.86466666666666, + 65.18733333333333, + 65.698, + 66.138, + 66.39066666666666, + 66.56066666666666, + 66.74266666666666 + ], + "512": [ + 69.152, + 70.62733333333334, + 71.12733333333334, + 71.67266666666667, + 72.07333333333334, + 72.244, + 72.50933333333333, + 72.66333333333333 + ], + "768": [ + 70.71, + 72.20066666666666, + 73.10133333333333, + 73.588, + 73.892, + 74.01266666666666, + 74.31466666666667 + ], + "1024": [ + 71.38866666666667, + 73.18, + 73.954, + 74.42666666666666, + 74.71466666666667, + 74.97066666666667 + ], + "1280": [ + 72.18466666666667, + 73.66933333333333, + 74.454, + 74.934, + 75.24 + ] + }, + "lr": [ + 0.00975530705321762, + 0.00904518046337755, + 0.00793913236883622, + 0.00654543046337755, + 0.005000500000000001, + 0.0034555695366224513, + 0.0020618676311637816, + 0.0009558195366224509, + 0.00024569294678237997 + ] + } + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_221046", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": [ + "clip_vit_b16", + "clip_vit_laion_b32", + "clip_vit_b32" + ], + "num_classes": 1000, + "preset": "high_accuracy", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "scale_warmup_epochs_override": { + "256": 0, + "512": 1, + "768": 2, + "1024": 3, + "1280": 4 + }, + "num_epochs": 10, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.2, + "rose_max_weight": 0.8, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.01, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "never", + "freeze_threshold": 90.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/david-shared-space", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file