diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,226 +1,226 @@ { - "epoch": 3, + "epoch": 4, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[-1.4727e-05, 9.6125e-05, 4.4182e-06, ..., 1.8888e-05,\n 3.2354e-05, -3.2978e-05],\n [ 1.7186e-05, 2.1444e-05, 6.2785e-05, ..., -5.0763e-05,\n -4.9622e-05, -2.0544e-06],\n [-3.5583e-17, -6.3245e-17, 3.9489e-18, ..., -1.4693e-17,\n 8.8409e-18, 1.0104e-16],\n ...,\n [ 5.4912e-05, -1.1023e-05, 8.0683e-06, ..., 5.8102e-05,\n 9.9074e-05, 9.2469e-06],\n [-2.0231e-06, -2.8735e-05, -4.6638e-05, ..., -2.6582e-06,\n 6.9262e-06, 1.7296e-05],\n [ 1.3488e-05, 2.0492e-05, -1.1510e-05, ..., -1.8018e-05,\n 9.9040e-06, 2.5564e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3783e-08, 1.7052e-08, 7.6170e-09, ..., 9.6923e-09, 8.6143e-09,\n 5.4852e-09],\n [1.2322e-08, 1.1216e-08, 1.1246e-08, ..., 9.0442e-09, 6.9648e-09,\n 5.6959e-09],\n [1.4516e-12, 1.7526e-12, 1.7107e-12, ..., 2.4980e-12, 4.7983e-13,\n 1.2027e-12],\n ...,\n [1.4175e-08, 1.1985e-08, 1.0139e-08, ..., 8.5773e-09, 8.2615e-09,\n 6.2049e-09],\n [1.5163e-08, 1.3115e-08, 9.3555e-09, ..., 1.0157e-08, 8.0871e-09,\n 6.6994e-09],\n [3.3138e-09, 4.9418e-09, 3.8416e-09, ..., 2.1946e-09, 2.6334e-09,\n 2.2108e-09]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 7.8277e-05, 4.9578e-06, 2.3352e-05, ..., -1.4571e-05,\n -4.8797e-05, 3.8431e-05],\n [ 8.8573e-07, 2.5922e-05, -5.2547e-05, ..., 2.1645e-05,\n -4.8459e-05, 3.4791e-05],\n [-4.6154e-08, 4.1890e-08, 1.3520e-08, ..., -2.1759e-08,\n 5.8983e-09, 6.1123e-09],\n ...,\n [-5.7400e-05, -1.7486e-05, -1.8404e-05, ..., -6.6152e-06,\n 1.5364e-05, -1.8938e-05],\n [-2.3509e-05, 4.3610e-05, -2.1072e-05, ..., -6.2180e-05,\n -1.5057e-05, 3.1203e-05],\n [ 1.3180e-05, -9.1399e-06, 2.6388e-05, ..., -1.5124e-05,\n -1.3041e-05, 3.6407e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3581e-08, 1.6482e-08, 7.6694e-09, ..., 9.3753e-09, 9.0039e-09,\n 5.7178e-09],\n [1.3218e-08, 1.1571e-08, 1.1668e-08, ..., 9.4941e-09, 7.0953e-09,\n 5.6897e-09],\n [7.1313e-13, 6.7727e-13, 5.5255e-13, ..., 8.2565e-13, 1.6906e-13,\n 4.4071e-13],\n ...,\n [1.3172e-08, 1.1464e-08, 9.6745e-09, ..., 7.9186e-09, 7.1973e-09,\n 5.6510e-09],\n [1.5851e-08, 1.3371e-08, 9.1543e-09, ..., 1.0205e-08, 8.3490e-09,\n 6.9020e-09],\n [3.4848e-09, 5.3274e-09, 3.8560e-09, ..., 2.3512e-09, 2.4853e-09,\n 2.4110e-09]], device='cuda:0')" }, "1": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 2.9744e-03, -2.3959e-03, 4.5018e-15, ..., -6.1077e-04,\n 1.2236e-03, 7.8891e-04], device='cuda:0')", - "exp_avg_sq": "tensor([1.7595e-05, 1.5581e-05, 3.1536e-09, ..., 1.7378e-05, 1.7380e-05,\n 5.7453e-06], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([ 2.4973e-03, -1.4545e-03, 8.9858e-07, ..., -3.1942e-04,\n 8.1322e-04, -1.3398e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.7700e-05, 1.6147e-05, 1.1544e-09, ..., 1.6169e-05, 1.7467e-05,\n 6.0526e-06], device='cuda:0')" }, "2": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[-6.2350e-06, 6.8298e-06, 5.6052e-45, ..., -9.7226e-06,\n 9.8143e-06, 2.7689e-06],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-1.7465e-07, 8.9511e-07, 0.0000e+00, ..., 2.4717e-06,\n -3.0347e-06, -2.3281e-08],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [-4.9470e-06, -5.0006e-06, -5.6052e-45, ..., 4.7047e-06,\n -1.4633e-05, 4.3875e-06],\n [ 1.2712e-06, -3.1989e-07, -5.6052e-45, ..., 1.8128e-06,\n 1.0142e-05, -6.3883e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.8631e-09, 7.4611e-10, 7.9790e-13, ..., 2.6214e-09, 2.3772e-09,\n 5.1907e-10],\n [8.5335e-13, 1.5629e-11, 0.0000e+00, ..., 2.9320e-12, 1.0848e-15,\n 1.0799e-11],\n [1.1750e-10, 4.8641e-10, 0.0000e+00, ..., 2.1420e-10, 5.5720e-10,\n 1.0681e-10],\n ...,\n [0.0000e+00, 3.6588e-18, 0.0000e+00, ..., 1.0719e-19, 1.2801e-19,\n 0.0000e+00],\n [5.3824e-09, 1.0494e-09, 1.1498e-13, ..., 2.7933e-09, 7.2901e-10,\n 2.1291e-09],\n [1.1844e-09, 1.2613e-09, 6.1294e-13, ..., 8.5529e-10, 6.3261e-09,\n 8.3949e-10]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[-1.9668e-06, 1.8664e-06, 5.6052e-45, ..., -9.4905e-06,\n -2.9698e-07, 1.2132e-06],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-7.2952e-08, -2.9029e-06, 0.0000e+00, ..., -9.9339e-07,\n -8.3676e-06, -1.6951e-08],\n ...,\n [ 0.0000e+00, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 0.0000e+00],\n [ 2.0413e-06, -9.1185e-07, -5.6052e-45, ..., 4.2819e-06,\n -1.9084e-06, 4.0816e-06],\n [ 9.7527e-07, 2.1144e-06, -5.6052e-45, ..., -4.1452e-07,\n 1.3256e-05, -5.2286e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7336e-09, 4.6745e-10, 2.2801e-13, ..., 2.4491e-09, 1.2078e-09,\n 3.7316e-10],\n [2.4385e-13, 4.4660e-12, 0.0000e+00, ..., 8.3785e-13, 3.0998e-16,\n 3.0858e-12],\n [3.2643e-10, 5.8905e-10, 0.0000e+00, ..., 4.1421e-10, 1.6597e-09,\n 3.9613e-11],\n ...,\n [0.0000e+00, 1.0455e-18, 0.0000e+00, ..., 3.0629e-20, 3.6581e-20,\n 0.0000e+00],\n [5.6132e-09, 9.2022e-10, 3.2857e-14, ..., 2.3175e-09, 6.9180e-10,\n 1.4418e-09],\n [1.7004e-09, 1.2668e-09, 1.7515e-13, ..., 6.6202e-10, 5.6460e-09,\n 6.5041e-10]], device='cuda:0')" }, "3": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 1.3401e-04, 5.6052e-45, -6.2644e-06, -9.7304e-05, 4.3269e-06,\n 5.6052e-45, -2.0040e-04, -8.3697e-05, -7.2948e-06, -1.2949e-04,\n -6.2824e-05, 4.2856e-16, 9.5880e-05, -6.8601e-05, -1.0370e-04,\n 4.3672e-05, 5.7833e-05, 3.2737e-05, -1.1208e-05, -4.5552e-05,\n 8.2982e-37, 7.3079e-05, -1.1610e-05, -3.0190e-05, 1.4747e-04,\n 7.2015e-05, 1.6198e-05, 5.6052e-45, -8.1331e-05, 1.6237e-04,\n -7.9612e-05, 2.6094e-04, -1.5501e-04, 2.3547e-05, -7.1045e-05,\n 3.5669e-05, 5.6052e-45, -9.4261e-05, -1.0254e-04, 3.4336e-05,\n 5.6989e-05, 2.8937e-05, -1.8554e-04, -1.3883e-04, 7.4411e-05,\n -4.5730e-05, -5.9864e-04, -1.4483e-04, 1.4346e-04, 3.9030e-04,\n 1.0786e-34, -2.0056e-04, -2.7017e-04, 2.9667e-04, 5.9601e-05,\n -3.5043e-05, -2.6332e-05, 9.1417e-05, 1.2760e-04, 3.0233e-05,\n -1.5171e-05, -8.3641e-05, -2.4594e-04, 1.6736e-04, -4.0906e-05,\n -6.9888e-05, -4.3127e-05, -2.2948e-05, 5.0552e-13, 1.9833e-12,\n 2.5597e-05, 5.6052e-45, -2.7023e-04, -1.4586e-04, -1.3290e-04,\n -3.2931e-05, 5.6052e-45, 1.7882e-04, -8.5191e-05, 3.4768e-06,\n 3.3717e-05, 2.9722e-05, 4.2705e-05, -5.6052e-45, 1.5721e-04,\n 1.3741e-04, -1.1317e-04, 4.9899e-05, -2.9393e-05, 3.7463e-04,\n -2.0011e-05, 5.7755e-05, 8.8730e-05, 1.2077e-04, -2.7251e-04,\n 9.4886e-05, -2.7866e-06, -4.8543e-05, 3.8890e-05, 2.3308e-05,\n 6.5394e-05, -1.6433e-05, -2.1861e-04, 4.4827e-05, 2.2400e-04,\n 3.6094e-05, 1.2216e-04, 3.2721e-04, -1.3657e-04, -2.3235e-05,\n -1.6783e-04, -1.3674e-04, -3.0435e-04, -3.4201e-05, -1.8890e-04,\n -1.4058e-04, 1.3106e-04, -5.6735e-05, 1.8936e-05, 5.6052e-45,\n 5.4879e-05, 7.0258e-05, 5.8783e-05, 4.5210e-14, -7.5807e-05,\n 1.6036e-04, 3.7173e-06, 3.0738e-07, -9.3992e-05, -1.2458e-07,\n 6.4962e-06, -2.5152e-05, 5.6052e-45, -1.8557e-04, 8.4301e-16,\n 9.0713e-05, 2.1242e-10, 3.5781e-04, 4.9424e-05, 1.0746e-04,\n 1.1300e-20, 5.6052e-45, 7.9660e-05, -2.5780e-20, 5.6052e-45,\n 5.6052e-45, -1.8502e-04, 3.3052e-05, 5.1554e-21, 5.1123e-06,\n -1.1424e-05, 3.6018e-05, 9.1746e-05, -5.6057e-05, -4.7201e-06,\n -6.7439e-05, -8.9118e-06, 1.9876e-04, 7.4944e-10, 1.3363e-04,\n 1.5282e-05, 1.2615e-04, -7.2584e-05, 1.1550e-04, 8.1575e-06,\n -1.4684e-04, 4.2468e-07, 5.6052e-45, 9.0784e-05, -9.2113e-05,\n 2.0557e-04, 4.9073e-05, 1.0615e-04, 8.5641e-05, 5.9585e-05,\n 5.6973e-05, -1.1878e-04, 1.0991e-04, 1.7263e-04, -8.4363e-05,\n -9.6052e-05, 7.2729e-05, -1.2001e-05, 1.5239e-05, 5.6052e-45,\n 5.6052e-45, -1.1592e-04, -6.2606e-06, 5.5454e-05, 2.3503e-05,\n 1.8138e-04, -7.2504e-06, 3.1775e-04, -5.2582e-05, -1.9290e-05,\n -1.0439e-04, 6.1520e-05, 1.6071e-04, 2.3279e-05, 5.6052e-45,\n 1.3808e-05, 1.9843e-04, -1.7049e-04, -2.4280e-04, -1.7729e-04,\n -4.9945e-05, -5.8612e-05, 8.9853e-05, 5.6052e-45, 5.6052e-45,\n 8.5312e-05, 5.6052e-45, 4.1249e-05, 8.2501e-05, -1.0660e-04,\n 1.1107e-04, -8.7361e-05, 8.9445e-06, 1.2801e-04, 2.5612e-04,\n 1.0368e-04, -2.1521e-04, -2.0458e-04, 3.2321e-05, 3.1305e-04,\n -5.6052e-45, -6.3493e-05, -4.0146e-05, -7.6074e-07, 1.2196e-04,\n -1.9392e-04, 1.3858e-04, 9.0847e-05, 1.4484e-04, 5.6052e-45,\n -3.3414e-04, -4.0924e-04, 5.6052e-45, -6.2524e-05, 1.3521e-04,\n -5.4264e-06, 5.6052e-45, -4.2800e-05, 1.9403e-04, -1.8363e-05,\n -5.5718e-05, 5.7732e-05, -9.1581e-05, -7.3404e-06, 1.2515e-29,\n -2.2163e-05, 6.3616e-05, 7.1164e-05, 1.9167e-04, 6.0801e-05,\n 6.2982e-05, 5.6052e-45, 3.6656e-05, 9.9906e-06, 7.8404e-05,\n 5.6052e-45, -2.9096e-05, -2.7413e-05, -3.1304e-05, 1.2835e-04,\n 1.8700e-05, -9.1180e-05, 1.0435e-04, 2.9978e-04, 1.1939e-05,\n -1.2081e-04, -7.6675e-05, 1.0620e-04, 2.2361e-05, 7.3982e-05,\n -1.6413e-06, -5.5709e-05, 4.7025e-39, -7.3152e-05, -1.0914e-04,\n -5.6052e-45, 1.3320e-04, 4.0947e-05, 1.7639e-04, 1.2749e-04,\n -2.2215e-05, -5.6385e-05, -1.4954e-05, 1.8229e-05, -8.7766e-05,\n 5.3702e-05, 2.5272e-05, 2.7594e-05, -1.8341e-04, 5.0277e-05,\n 5.8091e-06, -2.7338e-05, -1.2987e-04, -2.2459e-04, -7.9666e-05,\n -6.8944e-05, -2.2566e-05, 9.8629e-06, -6.5832e-05, 2.4214e-05,\n 5.6052e-45, 7.4029e-15, -3.7809e-05, -5.6630e-05, -1.6457e-04,\n -1.7598e-04, 5.6052e-45, -8.9594e-06, -5.4417e-05, 1.5756e-04,\n 5.6052e-45, 2.9428e-06, 1.1504e-04, -1.1578e-05, 1.6289e-05,\n -8.7762e-05, 4.8958e-05, 3.3315e-05, 1.0445e-04, 1.0805e-05,\n 5.6052e-45, -1.0699e-04, -7.7283e-06, 4.1137e-04, -7.2044e-05,\n 1.9609e-04, -4.0758e-06, 3.8561e-05, -1.0593e-05, 7.6603e-05,\n -4.2874e-06, -2.0838e-04, 5.5386e-05, 5.6052e-45, -2.8946e-09,\n -1.0648e-04, 4.2920e-05, -1.7413e-05, 1.8005e-07, 7.6674e-05,\n -5.6801e-05, -3.8644e-05, -1.0763e-04, -2.1174e-04, 2.1255e-04,\n -4.5556e-05, -1.9779e-04, -3.9448e-05, 9.6265e-31, -2.2905e-04,\n 1.5332e-04, -2.3975e-05, -9.1510e-06, 5.3081e-05, 3.8210e-05,\n -9.8098e-05, -8.6392e-05, 5.6052e-45, -2.3820e-04, -6.5929e-05,\n -1.5640e-05, -5.8210e-06, 4.2587e-05, -1.0191e-04, -8.7200e-05,\n 8.9749e-05, -6.6093e-06, 5.9356e-05, 7.1627e-05, 7.0712e-05,\n 6.5979e-06, -1.1135e-22, -1.0719e-05, 5.6052e-45, 1.0914e-04,\n 2.4111e-04, -2.0877e-04, -1.9686e-05, -1.3509e-05, 5.6052e-45,\n 4.1146e-05, -5.1564e-09, 2.0104e-04, 2.5153e-04, -2.8091e-04,\n -5.0162e-05, 7.9999e-05, 2.4352e-06, 6.1628e-06, -1.4080e-04,\n -5.3284e-05, -4.5643e-05, -1.1892e-30, 5.6052e-45, -4.9967e-06,\n 7.1555e-05, 2.4361e-05, 2.5469e-04, -1.6059e-04, 9.5334e-06,\n 4.4156e-04, 5.6052e-45, 1.0917e-06, -4.0461e-07, -1.3370e-05,\n -1.2816e-04, 1.6562e-20, -5.0005e-05, 1.9150e-04, -3.0641e-05,\n -8.1052e-05, -1.1349e-09, -1.1912e-04, 1.7612e-05, 1.3307e-04,\n -3.1517e-05, -2.4467e-04, 8.8918e-05, 1.1595e-20, 5.6052e-45,\n 6.2538e-05, 3.3598e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 1.4366e-04, 6.0018e-06, 6.1842e-05, 1.4459e-04, 2.5917e-05,\n 5.6052e-45, 2.1359e-04, -1.7919e-04, -1.0860e-04, 5.6052e-45,\n 1.1594e-04, -2.3318e-04, -7.6987e-04, 5.6052e-45, -3.4201e-04,\n 1.1699e-04, 6.3067e-06, 2.2314e-04, 2.2639e-04, -2.9622e-06,\n -1.0983e-04, 3.9249e-05, -3.9467e-05, -5.2579e-05, -1.4382e-04,\n 1.7714e-04, 2.0510e-12, -2.4685e-04, -4.1326e-38, 8.0420e-06,\n 1.7039e-04, -7.5062e-05, 5.6052e-45, 6.0590e-05, 5.6052e-45,\n 1.5977e-04, 2.3573e-05, 3.0964e-04, -5.9194e-05, 1.0892e-04,\n -9.0616e-06, 5.6052e-45, 1.7595e-05, 7.9936e-05, -3.3682e-05,\n 5.6052e-45, -3.1364e-05, -7.3109e-06, 1.3499e-05, -3.6335e-05,\n -2.1982e-04, 6.6899e-06, 1.8728e-04, -6.4828e-05, -1.5052e-04,\n -2.4139e-04, 5.8454e-05, 1.5121e-05, 8.1400e-06, -7.1012e-05,\n -2.1785e-04, 5.0432e-05, -3.9808e-05, 8.7696e-05, -3.7668e-06,\n -3.7282e-05, -1.9960e-04, -1.5776e-04, 8.5493e-05, 3.6515e-05,\n 5.6052e-45, 6.6274e-05, 5.3892e-05, -4.3374e-04, 1.3706e-04,\n 1.7446e-05, 5.2743e-05, 6.3901e-05, -1.6095e-04, -2.2230e-04,\n 2.5615e-05, -1.5683e-04, -2.4848e-04, -2.4546e-04, -1.2350e-04,\n -1.3206e-06, 5.6552e-05, -5.4684e-05, 6.2284e-41, -1.2618e-05,\n 1.1526e-04, -1.5721e-04, -4.6146e-05, 1.0260e-05, 2.3262e-05,\n -5.7413e-05, 5.6052e-45, -1.0545e-04, -4.3232e-04, 1.6360e-04,\n 1.1938e-04, -5.1265e-05, -7.4470e-05, -1.9854e-04, 1.2509e-05,\n -5.4566e-06, 3.2106e-05, 5.6052e-45, -3.2203e-05, -2.5878e-04,\n -1.6058e-04, -6.8822e-05, -1.0292e-04, 1.8069e-04, 1.4212e-04,\n -1.2589e-09, -3.5599e-05, 6.8286e-05, 4.0978e-05, 1.2103e-06,\n -1.4554e-04, 1.0070e-05, 5.5421e-05, 1.7426e-04, 1.7820e-04,\n -3.1262e-05, 2.7132e-05, 8.9497e-06, -2.1564e-05, -1.3612e-04,\n 5.6052e-45, 1.6903e-04, -1.3442e-10, 9.0311e-05, 8.3781e-05,\n 2.3219e-04, -8.2306e-05, 5.6052e-45, 9.4486e-05, 5.6052e-45,\n 1.1960e-04, -1.1955e-05, -6.7594e-05, -1.2846e-31, -1.1176e-04,\n -2.8732e-04, -2.1194e-04, 3.5659e-05, 1.1161e-04, -8.9805e-05,\n 1.8802e-04, 7.5613e-24, 1.0864e-04, 8.3588e-06, 5.6052e-45,\n -1.1543e-04, 6.7276e-05, -4.0843e-04, -3.2496e-05, -3.9364e-06,\n -5.7613e-04, 5.6052e-45, 1.7492e-04, 2.4271e-05, 8.8970e-05,\n -1.8492e-05, 2.7960e-05, -6.2142e-05, 8.2053e-05, -1.5701e-05,\n -5.2802e-05, 4.2292e-04, -3.0062e-05, -5.8785e-05, -9.9055e-05,\n -6.6835e-05, 2.3692e-04, 9.8381e-05, 6.2732e-05, 9.8214e-05,\n -1.1315e-04, 8.5333e-05, -1.0048e-04, 8.8580e-29, -1.7057e-04,\n 5.6052e-45, 2.7111e-04, 1.5613e-33, 3.2786e-05, 8.3056e-05,\n 8.1977e-05, -1.6679e-04, -2.7298e-05, 5.4330e-05, -1.1984e-05,\n 1.3030e-04, 5.6052e-45, -2.9161e-05, -5.8067e-05, 3.7128e-05,\n 2.6681e-04, 4.6839e-33, 5.6052e-45, 5.6052e-45, 1.2124e-05,\n -1.4921e-04, 5.6052e-45, 1.8558e-04, 1.8449e-04, -2.9584e-04,\n 1.7813e-04, 5.6052e-45, 8.9779e-05, -1.0995e-07, -2.9693e-05,\n -2.0330e-05, -1.8277e-05, 4.8589e-05, -2.1496e-05, 1.0102e-04,\n -2.0851e-04, 1.2364e-04, -8.2080e-05, 1.2275e-04, -3.2149e-04,\n 1.7918e-04, -6.8584e-05, -1.2537e-06, 7.2832e-07, 5.6584e-27,\n 2.0016e-04, -6.4768e-05, -6.0360e-06, -4.3189e-05, -3.8007e-04,\n -1.6184e-04, -1.9628e-05, -4.1078e-05, -1.8256e-04, 5.6052e-45,\n -1.5106e-04, 5.6052e-45, -7.7646e-05, 2.2833e-04, -2.9517e-05,\n -2.0022e-04, -1.7055e-04, 4.5815e-23, -4.8939e-14, 3.4503e-17,\n -2.1483e-05, -9.6001e-05, -7.0151e-05, 4.1070e-18, -3.7208e-04,\n -5.6052e-45, 1.7335e-04, 3.0939e-05, 6.4471e-05, -9.1275e-05,\n 4.8288e-06, 1.1388e-05, 5.6052e-45, 5.0152e-05, -6.1304e-17,\n 1.0785e-04, 1.7371e-04, -2.2477e-05, 5.6052e-45, -6.1792e-05,\n -1.6953e-05, 4.7309e-05, 3.5355e-05, 1.0680e-04, 3.7922e-04,\n -3.9507e-05, 5.6052e-45, 9.6949e-05, -7.0336e-05, 5.6052e-45,\n 1.8669e-05, -8.3326e-06, 2.1600e-05, 2.1865e-04, 5.2768e-05,\n 4.2536e-05, -1.0377e-05, -8.6186e-05, -4.0438e-05, -6.3138e-05,\n 2.6625e-44, 3.2045e-04, -4.2511e-04, 5.3401e-05, -1.1481e-05,\n 3.6375e-05, -7.7616e-05, 1.9719e-04, -2.6246e-04, -6.0979e-05,\n 1.4997e-04, -2.0473e-05, -5.4263e-05, 2.4133e-04, 5.6052e-45,\n -1.9760e-05, 5.3072e-06, 3.7807e-04, -7.1875e-05, -1.2391e-05,\n -2.3003e-05, 2.9123e-04, -1.0618e-05, 1.5811e-04, -1.0353e-04,\n 1.2496e-04, 4.6253e-05, 5.6052e-45, 2.6541e-04, -5.6052e-45,\n 2.1772e-04, -5.4464e-05, -2.7460e-05, -7.9709e-05, -4.6111e-05,\n -1.3586e-04, 4.7484e-05, 1.5138e-04, -7.9942e-05, 3.7365e-05,\n -7.7960e-05, 3.8141e-05, -9.5504e-05, 4.1283e-05, 5.6052e-45,\n 5.6052e-45, -1.9719e-04, -3.6908e-05], device='cuda:0')", - "exp_avg_sq": "tensor([3.0380e-07, 1.9233e-08, 1.0556e-07, 2.1013e-07, 6.8335e-07, 1.7923e-07,\n 7.2973e-07, 8.0230e-07, 3.0135e-07, 6.1795e-07, 4.7625e-07, 9.8988e-08,\n 2.7964e-07, 2.7323e-07, 3.3026e-07, 2.9766e-07, 8.0041e-07, 1.1004e-07,\n 6.8968e-08, 5.7496e-07, 2.2461e-09, 1.4262e-07, 2.9527e-07, 4.1855e-07,\n 5.9638e-07, 4.0160e-07, 1.7916e-07, 4.0701e-10, 4.4812e-07, 4.5438e-07,\n 1.2342e-07, 3.0474e-07, 3.3277e-07, 6.0140e-07, 2.5711e-07, 2.0870e-07,\n 2.1858e-07, 4.8595e-07, 3.5936e-07, 3.8917e-07, 7.8102e-07, 2.5362e-07,\n 5.3277e-07, 4.7205e-07, 5.2115e-07, 3.0704e-07, 3.0444e-07, 2.7239e-07,\n 1.2952e-06, 3.6878e-07, 3.4910e-07, 2.8666e-06, 3.3334e-07, 3.0342e-07,\n 4.6782e-07, 3.0882e-07, 6.1505e-07, 1.3318e-07, 1.6384e-07, 3.9241e-07,\n 8.0069e-08, 3.0053e-07, 7.8173e-07, 4.0093e-07, 1.2501e-07, 2.1717e-07,\n 5.8386e-07, 5.7866e-07, 1.8960e-07, 4.5134e-08, 3.7749e-07, 1.5882e-07,\n 1.9642e-07, 7.7855e-07, 3.4774e-07, 3.7408e-07, 1.1023e-09, 4.9544e-07,\n 1.5054e-07, 2.2383e-07, 4.8735e-07, 3.2335e-07, 6.7948e-07, 3.0434e-09,\n 1.1438e-06, 4.0259e-07, 4.6238e-07, 6.6913e-07, 3.1319e-07, 6.6444e-07,\n 2.8941e-07, 2.5493e-07, 1.7481e-07, 4.0561e-07, 1.7040e-07, 7.8234e-07,\n 6.3053e-07, 1.3262e-06, 4.5120e-07, 4.5966e-07, 4.9289e-07, 4.9123e-07,\n 1.2510e-06, 1.1191e-06, 3.5053e-07, 4.8870e-07, 2.2621e-07, 3.0295e-07,\n 3.0211e-07, 1.4257e-07, 3.2761e-07, 8.6332e-07, 4.0771e-07, 1.5286e-07,\n 7.3538e-07, 4.9836e-07, 3.1888e-07, 1.7604e-07, 2.0767e-07, 8.3321e-08,\n 4.4242e-07, 8.2368e-07, 2.5955e-07, 2.0879e-07, 3.3569e-07, 4.6795e-07,\n 5.0674e-07, 6.6572e-09, 5.1733e-07, 9.2935e-07, 3.8101e-07, 3.4569e-07,\n 9.1206e-09, 4.1331e-07, 9.7729e-08, 2.6119e-07, 3.6959e-11, 5.2451e-07,\n 3.0240e-07, 1.5803e-07, 9.1956e-08, 5.0567e-08, 2.1107e-07, 1.4452e-11,\n 1.9591e-10, 2.6059e-07, 5.0701e-07, 4.9784e-07, 7.0014e-07, 1.4566e-07,\n 1.6431e-07, 3.9500e-07, 2.0949e-07, 2.6852e-07, 1.0472e-09, 4.4968e-07,\n 2.7375e-07, 4.5113e-07, 2.8294e-07, 4.8288e-07, 2.9469e-07, 1.9686e-07,\n 4.4421e-07, 5.6312e-07, 2.9010e-07, 3.3266e-07, 1.1093e-07, 3.6837e-13,\n 3.8433e-07, 2.3040e-07, 6.1631e-07, 5.4605e-07, 3.1648e-07, 3.8913e-07,\n 4.2884e-07, 3.0395e-07, 2.0576e-08, 3.9191e-07, 6.4621e-07, 1.0034e-06,\n 7.0445e-07, 4.7306e-07, 4.1150e-07, 2.9840e-08, 1.5472e-11, 8.8957e-18,\n 6.1379e-07, 3.2217e-07, 9.7838e-08, 6.5025e-08, 2.8337e-07, 2.0267e-07,\n 4.8268e-07, 7.1945e-07, 5.4258e-07, 2.3230e-07, 5.1714e-07, 5.5017e-07,\n 2.7430e-07, 2.3575e-18, 2.9973e-07, 2.3375e-07, 5.1576e-07, 5.7059e-07,\n 5.5288e-07, 2.1338e-07, 5.5459e-07, 3.8478e-07, 9.7323e-08, 1.7671e-09,\n 5.9448e-07, 2.4792e-09, 2.4035e-07, 7.1660e-07, 4.0483e-07, 3.3432e-07,\n 3.6109e-07, 1.7698e-07, 4.3250e-07, 3.0066e-07, 1.7002e-07, 5.9540e-07,\n 5.0384e-07, 2.7146e-07, 5.0855e-07, 3.4904e-07, 9.3621e-07, 6.4101e-07,\n 5.8351e-07, 2.1858e-08, 3.5078e-07, 3.9265e-07, 3.3761e-07, 5.5734e-07,\n 2.2086e-07, 4.4415e-07, 8.2942e-07, 4.1882e-11, 2.7328e-07, 3.9960e-07,\n 1.7239e-07, 1.1489e-07, 5.5878e-07, 3.3763e-07, 2.5329e-07, 3.5024e-07,\n 3.2152e-07, 6.4553e-07, 2.4551e-07, 1.4655e-07, 9.6021e-07, 6.9306e-07,\n 5.3421e-07, 7.0191e-07, 2.7405e-07, 5.7646e-07, 4.4792e-11, 2.2125e-07,\n 1.3333e-09, 1.0273e-06, 2.6385e-16, 4.6021e-07, 3.6965e-07, 2.8913e-07,\n 9.5331e-07, 3.8840e-07, 2.5962e-07, 2.9360e-07, 6.9963e-07, 5.2288e-07,\n 5.9592e-07, 3.1082e-07, 2.5969e-07, 5.4306e-07, 6.6744e-07, 3.0148e-07,\n 5.5723e-07, 2.2844e-10, 1.3626e-07, 8.1888e-07, 6.5139e-07, 8.4597e-07,\n 4.2713e-07, 4.1160e-07, 6.4585e-07, 2.1387e-07, 5.5418e-07, 4.2897e-07,\n 9.0262e-08, 1.1844e-06, 2.0439e-07, 5.3059e-07, 8.2696e-07, 1.1408e-07,\n 4.4328e-07, 9.1578e-07, 4.1502e-07, 9.4161e-07, 3.1687e-07, 9.8828e-08,\n 3.8534e-07, 1.0197e-06, 1.2808e-07, 1.2342e-07, 3.4204e-08, 5.2602e-10,\n 6.4733e-09, 1.7292e-07, 4.0258e-07, 5.4018e-07, 5.6576e-07, 3.4226e-11,\n 3.8913e-07, 2.2600e-07, 3.3617e-07, 7.5124e-11, 3.7880e-08, 1.5708e-06,\n 2.2929e-07, 1.9618e-07, 4.4145e-07, 9.5838e-08, 8.9763e-08, 4.2837e-07,\n 1.8630e-07, 2.7255e-07, 7.8292e-07, 3.9285e-07, 3.8545e-07, 3.6836e-07,\n 4.7575e-07, 1.7880e-07, 4.3758e-07, 2.4945e-07, 9.9114e-08, 1.0054e-06,\n 2.8805e-07, 7.7193e-08, 2.0470e-16, 1.0235e-09, 9.1542e-07, 3.5065e-07,\n 1.1617e-06, 1.5697e-07, 6.8922e-07, 2.9627e-07, 4.0131e-07, 2.1285e-06,\n 3.6185e-07, 5.9936e-07, 5.3026e-07, 4.4480e-07, 1.2824e-07, 5.5730e-08,\n 2.6184e-07, 5.8635e-07, 5.7782e-08, 6.3389e-08, 2.8339e-07, 1.2567e-06,\n 4.1085e-07, 3.8948e-07, 2.3112e-07, 7.6905e-07, 2.9469e-07, 9.3211e-08,\n 1.7478e-07, 5.3746e-07, 2.1484e-07, 1.0586e-07, 2.6029e-07, 1.2016e-07,\n 2.0043e-07, 2.3664e-07, 3.1045e-07, 5.4381e-07, 4.0236e-08, 2.4137e-08,\n 1.0973e-07, 2.0398e-07, 6.3087e-07, 4.9828e-07, 1.9658e-07, 4.1973e-07,\n 2.2932e-07, 4.1989e-07, 5.8898e-10, 4.0663e-07, 8.9292e-07, 2.4337e-07,\n 5.2566e-07, 4.9358e-07, 1.4879e-07, 1.0609e-07, 5.3481e-07, 6.6801e-07,\n 1.4215e-08, 9.5981e-08, 7.8868e-07, 1.0743e-07, 5.9881e-07, 4.2796e-07,\n 5.0853e-07, 8.1076e-07, 4.3308e-07, 4.3371e-07, 6.6591e-07, 6.4107e-07,\n 4.7806e-07, 5.4921e-07, 5.5221e-07, 1.4197e-12, 4.3262e-07, 4.0414e-07,\n 9.9625e-08, 3.1448e-07, 1.3464e-10, 5.2623e-07, 2.9090e-07, 2.5351e-07,\n 4.8134e-07, 5.8672e-07, 5.3430e-07, 9.5992e-11, 6.2985e-18, 1.5279e-07,\n 3.0184e-07, 5.2968e-08, 1.4562e-10, 3.0454e-10, 4.4557e-07, 3.1367e-07,\n 5.0715e-07, 2.8853e-07, 7.0969e-07, 3.3753e-08, 4.3424e-07, 2.4365e-07,\n 4.6865e-07, 7.9405e-10, 1.6416e-07, 3.0958e-07, 2.2918e-07, 9.6290e-10,\n 1.8240e-07, 6.5641e-07, 2.7917e-07, 2.9221e-07, 7.7363e-08, 1.0996e-07,\n 5.1921e-07, 2.1606e-07, 4.8488e-07, 4.1493e-07, 4.8223e-07, 5.3809e-07,\n 5.2602e-08, 7.3114e-07, 2.9842e-08, 3.9052e-08, 1.5101e-07, 4.4197e-07,\n 7.2704e-12, 3.7921e-07, 6.2610e-08, 3.7938e-07, 1.6789e-07, 2.1171e-07,\n 5.7963e-07, 1.5091e-07, 2.1527e-07, 2.9432e-09, 2.1664e-08, 4.9649e-07,\n 1.0613e-07, 1.3070e-10, 2.3585e-07, 2.4848e-07, 7.0061e-07, 6.4573e-07,\n 4.6628e-07, 5.9714e-07, 4.1805e-07, 7.1341e-07, 3.1454e-07, 4.0249e-07,\n 7.3150e-07, 6.2744e-07, 3.6217e-07, 3.5176e-07, 4.7318e-07, 1.3775e-07,\n 5.8945e-07, 3.8523e-07, 3.5848e-07, 3.9295e-07, 4.1564e-07, 5.6205e-07,\n 4.6178e-07, 2.3511e-07, 1.5755e-06, 7.3405e-07, 2.8774e-07, 1.7637e-07,\n 5.2392e-07, 2.5125e-07, 3.6837e-07, 4.6389e-07, 2.9226e-07, 5.5101e-07,\n 3.8778e-07, 4.4171e-07, 4.9384e-07, 6.4828e-07, 1.0818e-07, 8.4257e-08,\n 2.3892e-07, 1.9794e-07, 2.1823e-07, 4.6622e-07, 3.9399e-07, 1.2944e-07,\n 3.5331e-07, 3.4420e-07, 2.4789e-07, 3.1963e-07, 6.2532e-12, 5.7142e-07,\n 2.9454e-07, 3.0652e-07, 5.2712e-07, 7.3129e-07, 4.6107e-07, 5.2264e-07,\n 2.4153e-07, 3.2139e-08, 4.8467e-07, 7.2869e-07, 3.2880e-07, 3.2831e-07,\n 6.0122e-07, 3.6744e-07, 6.2868e-07, 7.8528e-07, 1.2375e-06, 2.6220e-09,\n 3.1474e-07, 4.5343e-07, 4.7370e-07, 1.1446e-06, 7.9570e-07, 5.1680e-07,\n 4.6073e-07, 3.9374e-07, 2.2723e-07, 1.6543e-06, 2.8791e-07, 5.0370e-07,\n 2.0832e-07, 4.3860e-07, 4.4276e-11, 8.4545e-07, 2.1916e-08, 2.7474e-07,\n 4.4274e-07, 3.0861e-07, 2.2163e-07, 1.0390e-09, 6.7856e-07, 5.0676e-08,\n 3.2882e-07, 1.7897e-07, 3.7333e-07, 4.3378e-08, 3.7269e-07, 7.2058e-07,\n 4.3799e-07, 1.0235e-07, 3.3094e-07, 3.1783e-07, 5.0619e-07, 1.3178e-10,\n 1.3501e-07, 2.0200e-07, 1.7494e-14, 5.7658e-07, 3.8591e-07, 4.2911e-07,\n 2.4771e-07, 3.5637e-07, 2.9421e-07, 2.0331e-11, 3.7917e-07, 5.1487e-07,\n 2.5705e-07, 2.6928e-07, 2.1682e-07, 2.5868e-07, 2.2750e-07, 3.2723e-07,\n 3.2292e-07, 3.9027e-07, 5.2630e-07, 2.4402e-07, 2.6727e-07, 6.1129e-07,\n 4.3542e-07, 2.8849e-08, 4.0093e-07, 3.1644e-07, 4.9146e-07, 2.7416e-07,\n 3.0059e-07, 3.6327e-07, 5.6547e-07, 9.1957e-11, 4.2031e-07, 1.0670e-07,\n 3.3871e-07, 7.0829e-07, 5.9199e-07, 5.2642e-07, 7.9199e-07, 7.0231e-07,\n 4.8826e-07, 4.1514e-07, 9.1235e-12, 4.6365e-07, 3.7160e-07, 5.4612e-08,\n 1.5087e-07, 3.4358e-08, 2.3952e-09, 7.4654e-11, 2.7093e-07, 9.4191e-07,\n 2.8261e-13, 2.9807e-07, 4.2693e-07, 4.7382e-07, 7.8762e-07, 3.0292e-11,\n 5.6325e-07, 7.9079e-07, 9.0142e-08, 3.4525e-07, 3.5780e-07, 3.3833e-07,\n 8.7611e-07, 1.8663e-07, 3.9496e-07, 4.0111e-07, 4.2342e-07, 4.6491e-07,\n 2.7207e-07, 3.0106e-07, 7.1026e-07, 1.7047e-07, 7.0059e-07, 6.8783e-09,\n 8.9622e-07, 6.9310e-07, 6.2852e-07, 8.3384e-07, 2.6018e-07, 1.0243e-06,\n 5.3992e-07, 1.1786e-07, 6.6475e-07, 1.0875e-08, 3.4351e-07, 2.6625e-12,\n 2.1019e-07, 2.4502e-07, 3.3709e-07, 3.0551e-07, 6.8947e-07, 1.3428e-08,\n 1.1972e-07, 2.8665e-10, 6.3546e-07, 4.5789e-07, 4.9026e-07, 6.7815e-07,\n 3.6684e-07, 1.2523e-07, 5.5975e-07, 2.1415e-07, 3.3469e-07, 7.1102e-07,\n 3.3528e-09, 1.1463e-07, 1.0279e-06, 1.1539e-07, 6.0722e-09, 2.5076e-07,\n 6.8400e-07, 4.0000e-07, 8.1364e-11, 4.1349e-07, 4.3431e-07, 3.2538e-07,\n 5.1001e-07, 6.2983e-07, 5.9404e-07, 3.2815e-07, 6.4774e-10, 1.0648e-06,\n 4.0801e-07, 5.1052e-12, 5.2051e-07, 2.0598e-07, 2.1339e-07, 5.8207e-07,\n 4.8607e-07, 1.0650e-07, 3.5299e-07, 3.6529e-07, 2.0474e-07, 6.3783e-07,\n 4.5560e-07, 8.4534e-07, 3.5970e-07, 4.3624e-07, 7.3694e-07, 1.1698e-07,\n 3.1775e-07, 6.6452e-07, 7.5309e-07, 7.9342e-07, 1.7874e-07, 9.4435e-07,\n 1.4076e-07, 4.9081e-07, 6.1167e-07, 1.3327e-07, 5.9404e-07, 3.4911e-07,\n 2.6819e-07, 1.9796e-07, 3.0262e-07, 7.9704e-07, 1.3227e-06, 3.5589e-07,\n 1.6008e-07, 2.6013e-07, 7.5773e-07, 4.0754e-07, 4.2530e-07, 1.2245e-06,\n 3.3805e-07, 9.9470e-08, 1.2866e-07, 2.7458e-07, 3.3954e-07, 5.9832e-07,\n 1.3696e-07, 2.5196e-07, 3.4275e-07, 3.7349e-07, 1.9722e-07, 1.9988e-07,\n 6.6512e-07, 5.7936e-07, 6.4840e-10, 9.8007e-17, 6.7902e-07, 2.2627e-07],\n device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([ 3.1521e-05, -1.2311e-23, 1.9869e-04, -7.6081e-05, 9.2364e-05,\n 3.6461e-21, 1.5613e-04, -2.0859e-04, -3.8042e-04, 3.2588e-04,\n 1.8848e-04, 1.0349e-36, 9.5781e-05, 2.4403e-05, 7.2596e-05,\n 2.0570e-04, 1.0269e-04, -2.7391e-05, 9.5326e-06, 5.0980e-05,\n -5.2700e-11, 8.5553e-05, -1.4996e-06, -3.7008e-06, -9.6250e-06,\n 9.8987e-05, 7.0346e-05, 5.6052e-45, -8.5273e-05, -1.5279e-05,\n 4.9009e-05, -3.8952e-06, -9.9135e-05, -3.3720e-06, 2.0738e-04,\n 5.9665e-05, 2.4778e-12, 2.9097e-05, -2.2584e-04, -1.2372e-04,\n -2.7847e-04, 7.7207e-05, -1.4023e-04, -1.9990e-04, 3.2947e-05,\n 9.3529e-05, -5.7796e-04, -8.3135e-05, -1.1395e-04, 1.1283e-05,\n 5.6052e-45, 4.5786e-06, 6.0989e-05, 7.4132e-05, 1.6638e-05,\n 1.0488e-04, 3.6284e-06, -4.1525e-05, 6.5765e-05, 3.4452e-05,\n -1.2250e-04, -1.6437e-04, -1.7052e-04, -1.3674e-04, -2.7126e-05,\n 2.8745e-05, 2.6645e-05, 2.6426e-04, -9.0528e-05, -2.3231e-05,\n 1.5187e-04, -9.4069e-27, 1.4530e-04, -1.8770e-05, 3.1416e-05,\n 3.7059e-06, 5.6052e-45, 5.8168e-04, -1.5514e-04, -2.7781e-05,\n -6.9010e-05, -1.8606e-04, -5.1562e-05, -1.3361e-33, -4.8092e-05,\n 4.0571e-05, 3.9434e-05, -3.3354e-04, -6.4586e-05, -6.3396e-05,\n 1.4266e-05, 3.2508e-05, -7.4531e-06, 2.6662e-05, 6.6810e-05,\n -3.4404e-04, -4.5648e-05, -1.1256e-05, -5.0714e-04, 1.7754e-04,\n 4.2586e-05, -9.2744e-05, 2.0982e-04, 3.1810e-05, 1.5493e-04,\n -4.9194e-04, -9.5192e-06, -4.9092e-05, 1.8848e-04, 1.5955e-04,\n -1.8737e-04, -1.4154e-04, 1.8291e-05, -5.5272e-06, 2.6245e-05,\n -2.0715e-04, -4.3237e-04, 1.2184e-04, -3.0999e-05, 5.6052e-45,\n -7.0565e-06, -1.2552e-04, -6.1019e-06, 5.6052e-45, -7.7955e-05,\n -1.7411e-04, 2.6180e-05, -2.3926e-14, -1.5691e-05, -2.9317e-05,\n -7.4496e-05, -2.7922e-05, 5.6052e-45, -4.2639e-05, -6.4004e-08,\n 1.6341e-04, 1.2682e-36, 4.4140e-05, -4.8538e-05, 5.6490e-05,\n 5.6052e-45, 5.6052e-45, -1.1404e-04, -1.7011e-09, 5.6052e-45,\n 5.6052e-45, 1.4518e-05, -1.0252e-04, 5.6052e-45, 7.7463e-05,\n -2.3733e-05, -9.5883e-06, 4.6006e-05, 1.8538e-05, 4.7356e-05,\n -8.8006e-06, 1.7081e-05, 7.8186e-05, 1.9898e-17, -1.0368e-04,\n -4.6848e-05, 8.1403e-05, 1.4906e-04, 3.2709e-05, 9.1180e-07,\n 1.8322e-04, -1.4427e-08, 5.6052e-45, 3.2181e-05, -1.4238e-04,\n 2.7206e-04, 3.6773e-04, 9.1961e-05, -8.8061e-05, 2.8030e-04,\n 2.4392e-05, -1.8160e-05, 2.8750e-05, 3.0516e-04, -2.0983e-04,\n 1.3941e-04, 7.4210e-05, -5.1194e-05, -5.7053e-05, 5.6052e-45,\n 1.2510e-40, 1.5681e-04, 1.4842e-04, 3.7542e-05, 9.3564e-05,\n 2.6983e-05, -7.0105e-05, -8.6893e-05, 5.4492e-05, -6.9348e-05,\n -1.6202e-05, 2.3960e-04, 1.0463e-04, -3.4105e-05, 5.6052e-45,\n 1.9414e-05, 6.7005e-05, 5.8286e-05, 1.7512e-04, 1.6003e-04,\n 1.6040e-05, -3.8507e-04, 4.6186e-05, -8.6852e-07, 5.6052e-45,\n -1.1705e-04, 6.1573e-28, -6.7348e-05, 9.9849e-05, -1.7887e-04,\n 8.5411e-05, -3.1872e-04, 5.7193e-05, -8.4574e-05, 1.1582e-05,\n -7.0984e-05, -4.8982e-05, -2.1179e-05, 6.4510e-05, 1.3626e-04,\n -5.6052e-45, -1.6633e-04, 1.4200e-04, 4.6956e-05, 2.9281e-05,\n -5.5499e-05, 6.6239e-06, -1.2351e-04, -1.0753e-04, 1.0038e-30,\n 2.1582e-04, 3.4709e-04, 5.6052e-45, 1.4857e-04, 1.9096e-04,\n 4.9188e-05, 5.6052e-45, 1.7566e-04, 8.0078e-05, -3.0343e-05,\n 1.8349e-04, -4.2999e-05, -2.2348e-04, -1.4158e-04, 9.6752e-36,\n -1.9542e-04, -2.5392e-04, -1.0916e-04, -1.3869e-04, -2.5880e-05,\n 1.1429e-04, 4.1407e-40, 9.7374e-05, 1.9185e-06, 1.3247e-04,\n 5.6052e-45, 3.0778e-04, 3.9682e-05, 3.3542e-04, -1.8589e-05,\n -4.2212e-05, -1.8472e-04, 9.9252e-06, 3.6182e-04, -4.3417e-05,\n -4.5407e-05, 3.5957e-05, 2.6875e-05, -8.2606e-05, -2.1872e-04,\n -1.3050e-04, -4.2799e-05, 1.2146e-11, 6.1240e-05, -9.0383e-05,\n 1.6296e-23, 1.2110e-05, 4.9330e-05, 9.5203e-05, 1.5581e-04,\n -1.1960e-04, -5.1113e-06, 9.0011e-05, -9.0278e-05, 2.4280e-05,\n 5.2265e-05, -4.0891e-05, 2.2373e-05, -3.3726e-05, -2.3673e-04,\n 4.1865e-05, -2.5736e-04, -1.1135e-04, 2.0706e-04, 7.1923e-05,\n -8.1276e-05, -1.7075e-04, 3.2565e-05, -2.4586e-05, -2.6026e-05,\n 5.6052e-45, -2.0990e-09, -1.1199e-04, -1.8375e-04, 5.6440e-05,\n -1.1075e-04, 5.6052e-45, 1.2897e-04, 1.0463e-04, 3.4039e-04,\n 5.6052e-45, 8.2677e-05, 2.9165e-05, 8.2419e-06, -1.9874e-05,\n 5.6985e-05, -2.3537e-06, 2.9127e-05, -6.5033e-05, -6.7711e-05,\n 5.6052e-45, -2.3083e-04, 1.8713e-04, 2.6040e-04, 1.0097e-04,\n 1.4297e-04, -4.5225e-05, 8.7781e-05, 6.1370e-05, 4.8244e-05,\n -5.0370e-05, -2.7739e-05, -8.9205e-06, 5.6052e-45, -5.1096e-08,\n -1.4312e-04, 1.5349e-04, -1.2869e-05, 6.4124e-05, 8.5775e-07,\n -4.4252e-05, 1.0077e-04, 9.8495e-05, 1.6101e-05, 1.3111e-04,\n -1.2295e-05, -3.1138e-05, 2.0143e-06, 5.6052e-45, -5.1130e-05,\n -1.6264e-04, -8.6253e-06, -6.4645e-05, 2.3816e-05, -1.9372e-05,\n 1.7761e-04, 1.8192e-04, 5.6052e-45, 7.8429e-05, -5.5876e-05,\n -1.8691e-04, -1.0389e-06, -2.8513e-04, 3.8670e-05, -1.4340e-06,\n 4.6800e-05, 5.1832e-05, 4.5576e-06, 1.4786e-05, 5.1367e-05,\n 8.2881e-05, -1.5044e-18, -1.1618e-04, 5.6052e-45, 1.6986e-04,\n 5.3990e-05, 2.2211e-04, -1.2810e-05, 1.3811e-04, 5.6052e-45,\n -8.7641e-05, 8.9677e-06, 4.6456e-05, 1.3708e-04, 9.9589e-05,\n 1.8055e-05, 1.0672e-04, -1.6417e-04, -2.4381e-05, 6.0763e-05,\n 8.5640e-05, 8.0096e-05, -5.8416e-14, 5.6052e-45, 7.8898e-05,\n -4.5476e-05, -1.2014e-04, -3.8826e-04, 9.1430e-05, 6.2766e-05,\n 1.1925e-04, -6.9678e-31, -2.9612e-05, 1.4610e-04, 1.6963e-04,\n -2.6295e-04, 5.6052e-45, 4.7689e-05, -1.7575e-04, 3.6474e-05,\n 5.3510e-05, -5.1009e-06, 9.4082e-05, 1.4956e-04, 6.5515e-05,\n -4.2547e-05, -1.0017e-04, 3.7764e-05, 1.7355e-28, 5.6052e-45,\n -2.5037e-05, 3.7096e-05, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.5992e-05, 2.3257e-06, 3.9828e-05, -4.1754e-05, 3.3585e-05,\n 5.6052e-45, 8.1778e-06, -3.5955e-06, 2.8306e-05, 5.6052e-45,\n -8.1166e-05, 1.1623e-04, -4.8540e-05, 5.6052e-45, -2.2035e-05,\n -3.7871e-05, 1.2515e-04, 9.4850e-05, -5.8377e-05, -5.0248e-05,\n -7.7020e-04, -4.0442e-05, 3.1163e-04, 5.8285e-05, 3.5595e-05,\n -1.1196e-04, 8.8795e-09, -2.3106e-04, 4.9203e-19, 7.4549e-06,\n -2.9382e-05, 1.2463e-04, 5.6052e-45, 6.8643e-05, 5.7257e-06,\n 2.4418e-04, 4.5223e-05, 4.7212e-05, -1.0576e-04, 9.7705e-05,\n -1.0929e-04, 5.6052e-45, 6.1244e-05, 2.1550e-05, -7.7412e-05,\n 5.6052e-45, -1.7830e-04, 1.4335e-04, -1.1816e-04, 1.2455e-04,\n 3.5963e-04, -5.2330e-05, 2.0618e-04, -2.8834e-04, 1.4231e-05,\n 9.5560e-05, -2.3463e-04, 1.3726e-04, -1.9363e-04, -1.1542e-04,\n 5.9719e-05, 1.0063e-04, -1.3939e-05, -5.4109e-05, 5.1683e-05,\n 1.1323e-04, 1.1217e-04, 8.4128e-05, -1.4960e-04, 1.0453e-04,\n 5.6052e-45, 5.4710e-05, -1.4260e-05, 4.4808e-05, 4.0193e-06,\n 6.7333e-05, -1.0266e-04, 2.2544e-04, -2.5572e-04, 1.5214e-04,\n -1.0401e-04, -9.3180e-05, -1.8644e-04, -1.3265e-05, 5.6218e-05,\n -6.8698e-05, -6.4009e-05, -7.5259e-05, 5.6052e-45, 5.5681e-05,\n -6.3020e-05, 7.3126e-05, -2.7347e-05, -2.0996e-04, 7.8351e-05,\n -7.5844e-05, 5.6052e-45, -2.9850e-05, -4.5902e-05, -9.4897e-05,\n 1.4031e-04, -1.8612e-04, 1.1664e-04, -1.2641e-05, -4.4997e-06,\n 1.2655e-04, 1.8093e-05, 5.6052e-45, 2.4961e-05, -4.5604e-04,\n -1.1351e-04, -2.6668e-04, 4.3833e-05, -2.9797e-05, -1.0482e-04,\n -1.7394e-18, -1.0907e-04, -4.1047e-05, 2.0687e-04, 6.5075e-05,\n -1.1219e-04, -6.0677e-05, -6.6832e-05, -9.9832e-05, 1.9359e-04,\n 4.2535e-05, -1.1121e-04, 1.2293e-04, 6.2709e-05, 1.7709e-05,\n 1.2128e-11, -3.2536e-05, 3.2604e-05, -3.6219e-05, 1.8014e-04,\n 1.2130e-04, 7.4951e-05, 5.6052e-45, 1.2233e-04, 5.6052e-45,\n 9.9826e-06, 3.7471e-05, 5.0985e-05, 9.5499e-31, 1.5749e-04,\n -1.2691e-04, -2.4912e-05, 1.2256e-05, -2.9885e-04, -5.6501e-05,\n 1.6152e-04, 5.6052e-45, 4.7886e-05, -3.1392e-05, 5.6052e-45,\n -3.0492e-04, 1.2449e-05, 1.7495e-04, -1.2557e-04, -6.6562e-05,\n -1.2466e-04, 5.3562e-41, 7.5010e-05, 1.0619e-04, -6.9164e-06,\n 7.9408e-05, 2.1803e-05, 2.1552e-05, -5.3166e-05, -1.1018e-05,\n -1.0263e-04, 6.3220e-05, -4.5029e-04, -1.7432e-05, -3.5489e-04,\n 1.9478e-04, -3.9741e-05, -3.7136e-06, 2.1685e-04, 2.1252e-05,\n 3.0014e-04, 1.7813e-04, 1.5587e-04, 2.4100e-19, -1.4856e-05,\n 5.6052e-45, 2.1316e-04, -2.5933e-35, -6.4867e-05, 8.8672e-05,\n 2.7863e-05, -1.9007e-04, -1.1425e-05, 1.5760e-04, 3.1228e-04,\n -2.7241e-04, 5.6052e-45, 6.6249e-05, 1.4112e-05, 1.8199e-05,\n 9.0675e-06, 5.6052e-45, 5.6052e-45, 5.6052e-45, 1.0783e-04,\n -2.0323e-05, 5.6052e-45, 1.3441e-04, 2.0043e-04, 6.8588e-05,\n 7.1324e-05, 5.6052e-45, -2.0448e-04, -6.6880e-05, 1.1991e-05,\n -2.2671e-04, -2.2429e-04, -4.4375e-04, -2.0082e-06, -1.5322e-05,\n 8.8474e-05, -1.5098e-05, -9.4342e-05, -9.2000e-05, 1.1421e-04,\n 8.1657e-05, 9.4784e-05, -2.4266e-05, -1.2188e-05, -7.4300e-06,\n -4.2374e-04, -3.5490e-05, -2.5278e-05, 2.5053e-05, -3.2132e-04,\n 3.1103e-04, -4.0482e-05, -1.7681e-05, 1.2131e-05, -1.0499e-17,\n 1.4120e-04, 5.6052e-45, -2.2712e-04, -2.8662e-04, -2.7610e-04,\n 1.4849e-05, 1.9036e-04, -5.6577e-08, -5.6052e-45, 5.6052e-45,\n 3.5628e-05, -2.1885e-04, -1.8366e-04, 5.6052e-45, -2.8512e-06,\n 8.1273e-27, 1.3676e-04, 1.9340e-04, -6.2535e-05, -2.0510e-05,\n -3.6166e-05, -1.4578e-04, 5.6052e-45, -8.7900e-05, -4.6426e-05,\n -1.0444e-04, -5.7292e-05, -1.2850e-05, 4.7803e-37, -2.4854e-05,\n -1.0326e-04, -1.1522e-04, -1.5457e-06, 1.5733e-04, -3.5736e-05,\n 4.3578e-06, 5.6052e-45, 3.1331e-04, -1.4163e-05, 5.6052e-45,\n 1.8118e-05, -1.6546e-04, -1.0439e-04, 6.6036e-05, 9.8913e-05,\n 5.7397e-05, 8.3186e-05, 6.0364e-05, -1.2176e-04, 1.7343e-04,\n 5.6052e-45, -2.2644e-04, -9.2222e-05, 8.9380e-05, -1.3487e-04,\n 5.0878e-05, -7.2112e-05, 7.9752e-05, 3.3456e-05, -3.7188e-05,\n 2.7403e-05, 2.0137e-04, 1.7045e-04, 1.6360e-04, 5.6052e-45,\n 4.5360e-05, 5.2170e-04, -2.3817e-04, -6.5981e-05, -4.7639e-05,\n 8.2872e-05, 2.8165e-05, -6.6102e-06, -3.9644e-05, 4.8232e-05,\n 6.3479e-05, -8.7849e-05, 6.2002e-13, 8.0774e-05, -5.6052e-45,\n -1.1994e-04, -5.1305e-05, 7.6211e-05, 2.3491e-05, 2.3746e-04,\n 4.0031e-06, -3.8844e-05, 2.6814e-04, -9.4837e-06, -2.1366e-04,\n -3.0659e-05, -9.7009e-05, 1.2654e-04, -9.0477e-05, 5.6052e-45,\n 5.6052e-45, -2.6142e-04, 8.2354e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.9787e-07, 5.4960e-09, 1.1300e-07, 1.6804e-07, 4.0614e-07, 5.1216e-08,\n 4.6980e-07, 3.4866e-07, 1.7439e-07, 4.0015e-07, 2.8661e-07, 2.8287e-08,\n 2.2284e-07, 1.1822e-07, 2.2500e-07, 2.4259e-07, 4.1483e-07, 6.1545e-08,\n 9.5374e-08, 2.7792e-07, 6.4184e-10, 1.1385e-07, 1.5269e-07, 2.8641e-07,\n 4.0428e-07, 3.1632e-07, 1.2460e-07, 1.1631e-10, 2.8061e-07, 2.8552e-07,\n 1.0643e-07, 2.1704e-07, 2.0715e-07, 2.9785e-07, 1.7895e-07, 1.7890e-07,\n 6.2461e-08, 2.7321e-07, 2.5917e-07, 2.4279e-07, 4.0728e-07, 1.9662e-07,\n 3.7759e-07, 3.2161e-07, 3.0701e-07, 1.6605e-07, 2.5025e-07, 2.1285e-07,\n 6.8579e-07, 2.5682e-07, 9.9758e-08, 1.1159e-06, 2.2410e-07, 1.8671e-07,\n 2.5009e-07, 1.5559e-07, 2.9956e-07, 1.1061e-07, 1.0357e-07, 2.4951e-07,\n 7.8470e-08, 1.8998e-07, 4.8784e-07, 2.3819e-07, 8.7685e-08, 1.6258e-07,\n 3.9505e-07, 2.9135e-07, 6.3024e-08, 1.7952e-08, 2.5887e-07, 4.5384e-08,\n 1.6177e-07, 6.0441e-07, 2.4969e-07, 2.8148e-07, 3.1500e-10, 4.1651e-07,\n 1.4168e-07, 1.9136e-07, 1.7970e-07, 2.5519e-07, 3.7545e-07, 8.6969e-10,\n 4.1920e-07, 3.2175e-07, 2.9942e-07, 4.1721e-07, 2.1455e-07, 4.0023e-07,\n 2.3185e-07, 1.6560e-07, 1.7416e-07, 2.8483e-07, 1.4649e-07, 4.3482e-07,\n 3.2429e-07, 4.0879e-07, 2.6879e-07, 3.0304e-07, 2.8274e-07, 3.0158e-07,\n 6.6636e-07, 4.3836e-07, 2.4077e-07, 3.1853e-07, 1.7897e-07, 2.2143e-07,\n 1.9495e-07, 1.1852e-07, 2.2477e-07, 4.4604e-07, 2.4669e-07, 7.5318e-08,\n 4.2339e-07, 2.7609e-07, 2.2352e-07, 1.4964e-07, 1.1498e-07, 2.3810e-08,\n 2.5876e-07, 4.5794e-07, 1.5655e-07, 5.9664e-08, 2.2381e-07, 3.1641e-07,\n 2.5708e-07, 1.9063e-09, 3.5785e-07, 2.9290e-07, 2.3824e-07, 1.9994e-07,\n 2.6063e-09, 2.8204e-07, 2.8045e-08, 2.2149e-07, 1.0561e-11, 2.9762e-07,\n 2.1885e-07, 8.3143e-08, 2.6277e-08, 1.4450e-08, 1.6451e-07, 4.1534e-12,\n 5.5984e-11, 7.4466e-08, 3.3294e-07, 3.0760e-07, 2.0007e-07, 1.2725e-07,\n 1.8955e-07, 3.1000e-07, 1.6888e-07, 2.2580e-07, 2.4992e-08, 2.7353e-07,\n 1.2937e-07, 2.6569e-07, 8.0852e-08, 2.2294e-07, 2.7319e-07, 1.6697e-07,\n 3.2299e-07, 4.1591e-07, 1.7182e-07, 2.3193e-07, 3.1699e-08, 1.0526e-13,\n 2.4305e-07, 1.8340e-07, 3.5148e-07, 3.2496e-07, 2.3349e-07, 2.9714e-07,\n 2.8921e-07, 2.4326e-07, 6.3664e-08, 2.9252e-07, 3.6861e-07, 4.8608e-07,\n 4.1012e-07, 2.6704e-07, 2.9031e-07, 8.4731e-08, 4.4213e-12, 6.1502e-17,\n 4.0849e-07, 2.4099e-07, 8.6675e-08, 8.9670e-08, 1.8568e-07, 1.7040e-07,\n 3.1860e-07, 4.0422e-07, 3.1660e-07, 1.5315e-07, 3.8357e-07, 3.3134e-07,\n 2.2169e-07, 6.7367e-19, 1.1606e-07, 1.4955e-07, 3.5342e-07, 3.3610e-07,\n 4.2231e-07, 1.9406e-07, 3.4893e-07, 2.1355e-07, 3.3644e-08, 5.0495e-10,\n 4.1043e-07, 7.0846e-10, 1.5706e-07, 4.4913e-07, 2.6942e-07, 2.5480e-07,\n 2.8717e-07, 1.4769e-07, 2.6766e-07, 2.1021e-07, 1.0581e-07, 3.8328e-07,\n 3.3786e-07, 2.0272e-07, 1.9837e-07, 9.9742e-08, 3.9717e-07, 4.3938e-07,\n 3.9772e-07, 3.0652e-08, 2.3608e-07, 2.4613e-07, 2.4020e-07, 4.3484e-07,\n 6.3114e-08, 3.4000e-07, 4.3882e-07, 1.1968e-11, 1.8443e-07, 2.7445e-07,\n 7.3912e-08, 3.2830e-08, 3.6227e-07, 2.3162e-07, 1.9343e-07, 2.6808e-07,\n 2.6612e-07, 3.7563e-07, 2.0068e-07, 4.1878e-08, 4.2593e-07, 5.0508e-07,\n 2.9274e-07, 3.9036e-07, 2.0392e-07, 3.9488e-07, 1.2804e-11, 1.4010e-07,\n 3.2694e-08, 5.5286e-07, 7.5396e-17, 3.0049e-07, 2.5423e-07, 1.8087e-07,\n 4.6354e-07, 2.0251e-07, 1.9328e-07, 1.8850e-07, 3.8694e-07, 2.1267e-07,\n 3.3415e-07, 2.4537e-07, 1.8378e-07, 3.1171e-07, 4.4294e-07, 2.1939e-07,\n 3.5608e-07, 6.5279e-11, 1.0816e-07, 4.7690e-07, 1.8614e-07, 4.9006e-07,\n 1.6984e-07, 2.8415e-07, 3.9908e-07, 1.7342e-07, 3.7931e-07, 3.0388e-07,\n 1.1356e-07, 4.8912e-07, 1.6712e-07, 2.7119e-07, 2.7078e-07, 1.1328e-07,\n 3.2003e-07, 3.2696e-07, 2.2132e-07, 4.2784e-07, 2.1075e-07, 9.7866e-08,\n 3.1647e-07, 4.4262e-07, 1.1758e-07, 1.2251e-07, 8.0221e-08, 1.5031e-10,\n 1.8498e-09, 1.2419e-07, 2.7385e-07, 2.9803e-07, 2.8875e-07, 9.7804e-12,\n 2.4710e-07, 1.7429e-07, 2.8125e-07, 2.1467e-11, 4.8201e-08, 6.4668e-07,\n 1.2869e-07, 1.4276e-07, 2.8334e-07, 6.6329e-08, 1.0736e-07, 2.8329e-07,\n 1.4484e-07, 7.7883e-08, 3.6745e-07, 2.8226e-07, 2.5990e-07, 2.6925e-07,\n 3.1118e-07, 1.3543e-07, 2.8249e-07, 1.7772e-07, 1.0112e-07, 4.9530e-07,\n 2.1232e-07, 1.0651e-07, 5.8496e-17, 3.0583e-10, 5.2431e-07, 2.2759e-07,\n 4.9460e-07, 1.0667e-07, 4.1261e-07, 2.1008e-07, 2.6537e-07, 6.6856e-07,\n 2.7858e-07, 3.2998e-07, 3.2988e-07, 2.8305e-07, 1.0520e-07, 1.5925e-08,\n 2.0489e-07, 3.9704e-07, 7.3340e-08, 4.6286e-08, 2.1087e-07, 4.2721e-07,\n 2.6884e-07, 2.6139e-07, 6.6044e-08, 4.0949e-07, 1.9711e-07, 7.6486e-08,\n 1.4768e-07, 3.6390e-07, 1.7253e-07, 9.1067e-08, 1.5973e-07, 5.7074e-08,\n 1.6770e-07, 1.8363e-07, 2.1989e-07, 2.0705e-07, 1.1498e-08, 6.0450e-08,\n 3.1357e-08, 1.4296e-07, 4.5719e-07, 3.2743e-07, 1.3067e-07, 2.8867e-07,\n 6.5530e-08, 3.4019e-07, 2.5523e-08, 2.9716e-07, 5.2069e-07, 1.5840e-07,\n 2.9442e-07, 3.4228e-07, 1.7393e-07, 8.7288e-08, 3.4933e-07, 4.2848e-07,\n 4.7862e-08, 2.7427e-08, 2.2537e-07, 7.1154e-08, 2.7043e-07, 3.2720e-07,\n 3.6741e-07, 5.1447e-07, 2.8477e-07, 3.1218e-07, 1.9029e-07, 3.8031e-07,\n 2.9843e-07, 3.6367e-07, 3.2553e-07, 4.0569e-13, 2.0285e-07, 2.8167e-07,\n 9.3794e-08, 2.2244e-07, 9.6574e-11, 3.5288e-07, 2.2628e-07, 1.8566e-07,\n 2.5817e-07, 3.7823e-07, 3.7120e-07, 2.7439e-11, 1.7998e-18, 1.3752e-07,\n 1.3430e-07, 1.5136e-08, 4.1611e-11, 8.7024e-11, 3.0947e-07, 2.2442e-07,\n 2.5766e-07, 2.1140e-07, 4.1888e-07, 9.6453e-09, 3.1538e-07, 2.1064e-07,\n 3.0039e-07, 2.2691e-10, 1.1761e-07, 2.0779e-07, 1.7273e-07, 2.7516e-10,\n 1.4389e-07, 4.6454e-07, 1.8539e-07, 2.1644e-07, 9.7542e-08, 9.3139e-08,\n 4.5987e-07, 1.4690e-07, 2.8715e-07, 2.8575e-07, 3.3087e-07, 2.8683e-07,\n 1.5031e-08, 5.1000e-07, 8.5275e-09, 6.4609e-08, 1.4915e-07, 1.7750e-07,\n 2.0776e-12, 2.5730e-07, 1.8606e-08, 2.5418e-07, 1.4350e-07, 1.4683e-07,\n 3.2795e-07, 1.3224e-07, 1.3860e-07, 8.4104e-10, 4.8989e-08, 3.5556e-07,\n 9.8230e-08, 3.7350e-11, 2.1002e-07, 2.1275e-07, 2.4015e-07, 4.3612e-07,\n 2.7251e-07, 3.6574e-07, 2.8081e-07, 4.2960e-07, 2.4329e-07, 2.5733e-07,\n 4.6028e-07, 3.6361e-07, 2.7757e-07, 2.4876e-07, 2.6837e-07, 1.1946e-07,\n 3.8260e-07, 1.8528e-07, 2.7602e-07, 1.5977e-07, 2.6357e-07, 3.0740e-07,\n 3.3309e-07, 1.2267e-07, 4.5022e-07, 5.1684e-07, 2.0089e-07, 1.6249e-07,\n 3.2634e-07, 1.4522e-07, 2.8576e-07, 3.2658e-07, 2.3547e-07, 3.4754e-07,\n 2.0625e-07, 3.2378e-07, 4.1414e-07, 3.8249e-07, 1.2767e-07, 5.3386e-08,\n 1.8244e-07, 1.3534e-07, 6.2360e-08, 3.2263e-07, 2.4196e-07, 1.1394e-07,\n 2.4923e-07, 2.3806e-07, 1.9702e-07, 2.6968e-07, 1.7869e-12, 3.7659e-07,\n 1.8706e-07, 2.2615e-07, 3.1584e-07, 4.0406e-07, 2.8103e-07, 3.2139e-07,\n 2.0145e-07, 5.7128e-08, 2.7118e-07, 2.0823e-07, 1.6825e-07, 2.7683e-07,\n 3.9637e-07, 3.0334e-07, 4.4156e-07, 5.2473e-07, 5.2753e-07, 7.4925e-10,\n 2.3900e-07, 2.9518e-07, 3.5120e-07, 3.4552e-07, 4.1989e-07, 3.6406e-07,\n 2.5464e-07, 2.6091e-07, 1.9751e-07, 4.8832e-07, 2.3650e-07, 3.2886e-07,\n 1.7686e-07, 2.6034e-07, 1.4182e-11, 4.1228e-07, 6.4464e-08, 1.8793e-07,\n 2.7504e-07, 2.5095e-07, 1.6190e-07, 2.9690e-10, 3.5443e-07, 1.4481e-08,\n 2.5138e-07, 1.3949e-07, 2.7057e-07, 1.2396e-08, 1.9369e-07, 3.1879e-07,\n 2.9602e-07, 8.4515e-08, 1.9143e-07, 2.0396e-07, 3.3760e-07, 3.7658e-11,\n 1.1979e-07, 9.7749e-08, 4.9991e-15, 3.7056e-07, 2.7733e-07, 2.6132e-07,\n 1.9219e-07, 1.8769e-07, 2.0875e-07, 5.8114e-12, 2.0218e-07, 2.8349e-07,\n 1.5433e-07, 1.7063e-07, 1.3770e-07, 2.0156e-07, 1.6915e-07, 1.5519e-07,\n 2.6530e-07, 1.9047e-07, 3.4263e-07, 1.8360e-07, 2.3873e-07, 3.8015e-07,\n 3.5195e-07, 4.6418e-08, 2.7560e-07, 2.2705e-07, 3.1265e-07, 2.1213e-07,\n 2.2522e-07, 1.0381e-07, 3.1899e-07, 2.6277e-11, 2.9310e-07, 3.0489e-08,\n 2.5398e-07, 4.7390e-07, 3.9462e-07, 3.5780e-07, 4.4137e-07, 4.4500e-07,\n 2.2057e-07, 2.8115e-07, 2.6071e-12, 2.9250e-07, 2.7853e-07, 5.4884e-08,\n 8.6325e-08, 9.8180e-09, 6.8446e-10, 2.1333e-11, 2.2386e-07, 4.4458e-07,\n 8.0758e-14, 1.9700e-07, 2.9701e-07, 3.4827e-07, 4.6211e-07, 8.6561e-12,\n 3.3383e-07, 2.4823e-07, 8.7855e-08, 2.2781e-07, 2.4956e-07, 2.6614e-07,\n 4.8229e-07, 1.1767e-07, 2.5167e-07, 2.5303e-07, 3.5467e-07, 3.1926e-07,\n 1.8272e-07, 2.1954e-07, 4.5158e-07, 1.1523e-07, 3.1622e-07, 9.1926e-09,\n 6.0841e-07, 2.4912e-07, 3.9583e-07, 2.9193e-07, 1.7358e-07, 5.4395e-07,\n 3.4455e-07, 1.0588e-07, 3.2158e-07, 3.1075e-09, 2.2421e-07, 7.6082e-13,\n 1.4163e-07, 1.9026e-07, 2.2687e-07, 2.6986e-07, 4.8505e-07, 3.8383e-09,\n 3.4211e-08, 8.1914e-11, 4.3863e-07, 2.9797e-07, 2.9739e-07, 1.9379e-07,\n 2.5495e-07, 3.5784e-08, 3.6520e-07, 1.6319e-07, 2.2808e-07, 3.7866e-07,\n 3.4859e-08, 1.1638e-07, 2.9372e-07, 1.0222e-07, 2.6341e-08, 1.7009e-07,\n 3.9296e-07, 2.4202e-07, 2.3250e-11, 1.9694e-07, 2.9199e-07, 2.6650e-07,\n 3.2000e-07, 3.9770e-07, 3.7168e-07, 2.1164e-07, 1.8510e-10, 8.0523e-07,\n 3.5398e-07, 1.4589e-12, 3.5360e-07, 1.7886e-07, 1.3013e-07, 3.3589e-07,\n 3.0434e-07, 1.0719e-07, 2.5082e-07, 2.9619e-07, 1.5261e-07, 3.7090e-07,\n 1.3019e-07, 5.0792e-07, 2.3075e-07, 3.0127e-07, 2.9691e-07, 1.0198e-07,\n 2.2873e-07, 4.0267e-07, 5.7454e-07, 4.4278e-07, 1.1760e-07, 4.0341e-07,\n 1.6072e-07, 3.4070e-07, 1.7479e-07, 1.0532e-07, 3.6764e-07, 2.6853e-07,\n 1.9878e-07, 1.5118e-07, 1.9043e-07, 4.3375e-07, 4.2423e-07, 2.2043e-07,\n 1.2792e-07, 2.2036e-07, 3.2919e-07, 1.1646e-07, 2.3857e-07, 3.4992e-07,\n 2.0766e-07, 9.7829e-08, 1.3398e-07, 1.9801e-07, 2.5945e-07, 3.9670e-07,\n 9.5537e-08, 1.7171e-07, 2.4698e-07, 2.7714e-07, 1.1461e-07, 1.6431e-07,\n 3.7028e-07, 1.9785e-07, 1.8528e-10, 2.8006e-17, 3.5061e-07, 1.6847e-07],\n device='cuda:0')" }, "4": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[ 1.4316e-06, -5.6052e-45, -2.0064e-06, ..., 5.6052e-45,\n 1.7084e-06, -3.0339e-07],\n [-6.7878e-06, -5.6052e-45, 6.7377e-06, ..., -5.6052e-45,\n -2.4318e-05, 4.1811e-06],\n [ 1.8044e-05, -5.6052e-45, -4.1622e-06, ..., -5.6052e-45,\n 1.5543e-05, -5.2742e-06],\n ...,\n [ 9.2628e-08, -5.6052e-45, -6.4906e-07, ..., -5.6052e-45,\n -9.4916e-06, -4.6556e-06],\n [-1.9014e-06, 5.6052e-45, 9.9980e-07, ..., -5.6052e-45,\n -8.8697e-06, -6.8626e-06],\n [-5.4511e-06, 5.6052e-45, 1.0360e-05, ..., -5.6052e-45,\n 8.2508e-06, 3.0087e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.4567e-09, 2.3978e-13, 1.4296e-10, ..., 2.7470e-19, 2.2476e-09,\n 4.8657e-10],\n [4.4572e-09, 4.4215e-12, 2.5097e-10, ..., 1.4039e-17, 3.3906e-09,\n 2.2608e-09],\n [6.2393e-09, 2.6919e-14, 2.9377e-10, ..., 6.1229e-18, 3.4560e-09,\n 2.3802e-09],\n ...,\n [5.8083e-09, 4.3764e-12, 3.1514e-10, ..., 5.3568e-18, 1.9122e-09,\n 1.4335e-09],\n [7.5745e-09, 3.6680e-13, 2.8010e-10, ..., 7.9834e-20, 3.2970e-09,\n 3.3057e-09],\n [5.9110e-09, 5.6333e-13, 4.8798e-10, ..., 5.1164e-18, 2.7308e-09,\n 2.6614e-09]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[-5.8735e-07, 4.8569e-25, 2.0672e-05, ..., 5.6052e-45,\n -4.0880e-05, 7.9777e-06],\n [ 9.2971e-06, 1.5598e-25, 1.5837e-06, ..., -5.6052e-45,\n -2.5973e-06, -5.9872e-06],\n [-1.1278e-05, 1.6732e-25, 2.5069e-05, ..., -5.6052e-45,\n 5.0401e-05, 2.4138e-05],\n ...,\n [ 1.6577e-06, 1.8463e-25, -2.1244e-05, ..., -5.6052e-45,\n 5.0692e-05, -7.5169e-06],\n [-8.4020e-06, 4.3994e-25, 1.0613e-05, ..., -5.6052e-45,\n -8.8597e-06, 1.5104e-05],\n [ 2.0186e-06, 3.7173e-25, -2.4462e-05, ..., -5.6052e-45,\n 5.2665e-05, 1.3639e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5131e-09, 6.8519e-14, 2.4536e-10, ..., 7.8498e-20, 1.7754e-09,\n 3.8327e-10],\n [2.9172e-09, 1.2635e-12, 5.1498e-10, ..., 4.0118e-18, 2.8007e-09,\n 1.8120e-09],\n [4.2391e-09, 7.6922e-15, 9.4877e-10, ..., 1.7497e-18, 3.0958e-09,\n 1.9701e-09],\n ...,\n [3.9652e-09, 1.2506e-12, 7.4609e-10, ..., 1.5308e-18, 1.7783e-09,\n 1.1084e-09],\n [4.7277e-09, 1.0482e-13, 6.8378e-10, ..., 2.2813e-20, 2.5905e-09,\n 2.7353e-09],\n [3.8885e-09, 1.6098e-13, 1.2032e-09, ..., 1.4620e-18, 2.5534e-09,\n 2.0446e-09]], device='cuda:0')" }, "5": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-6.6167e-19, -5.5776e-11, 0.0000e+00, ..., -2.1565e-10,\n -2.7090e-19, -5.8115e-24],\n [ 1.7766e-06, 1.7971e-06, -5.6052e-45, ..., 2.7743e-06,\n -7.4871e-06, -4.4757e-07],\n [-6.9732e-08, -8.8972e-07, -5.6052e-45, ..., -2.2696e-07,\n 4.7031e-07, -8.5477e-07],\n ...,\n [ 2.3844e-06, -1.5031e-06, 5.6052e-45, ..., -9.9324e-08,\n -1.6296e-07, -1.8572e-05],\n [ 7.6787e-08, 1.2739e-06, -5.6052e-45, ..., 3.3988e-06,\n 3.0498e-06, 3.7120e-08],\n [ 2.5113e-06, 1.5274e-06, -5.6052e-45, ..., 1.4171e-06,\n -1.0460e-07, 4.7918e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3051e-12, 1.2449e-12, 0.0000e+00, ..., 2.3701e-13, 6.3638e-12,\n 1.6822e-13],\n [2.6424e-10, 1.1281e-10, 2.9522e-13, ..., 4.1773e-10, 2.3751e-10,\n 3.1496e-11],\n [1.1785e-11, 1.2350e-11, 1.9559e-14, ..., 2.4892e-11, 1.1284e-10,\n 1.7720e-11],\n ...,\n [3.8727e-10, 1.4767e-10, 4.2815e-13, ..., 4.3400e-11, 1.0708e-10,\n 2.1363e-09],\n [3.3517e-12, 2.5129e-10, 4.1120e-15, ..., 1.5983e-10, 2.1311e-10,\n 1.8791e-11],\n [1.4092e-10, 5.6812e-11, 7.3950e-14, ..., 2.6643e-10, 7.2655e-11,\n 5.8548e-10]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 4.1077e-08, -1.3984e-08, 0.0000e+00, ..., 1.0988e-07,\n 4.0628e-09, 9.9676e-13],\n [-5.4944e-06, -6.3427e-06, -5.6052e-45, ..., 1.8983e-06,\n 2.2179e-07, 6.9046e-09],\n [ 1.2553e-07, -8.2448e-07, -2.8306e-43, ..., -8.5227e-08,\n 2.0630e-06, -1.0564e-08],\n ...,\n [ 7.8341e-07, 6.8223e-07, 5.6052e-45, ..., -4.3376e-07,\n 9.5179e-07, -3.3700e-06],\n [ 1.0092e-07, -8.9138e-07, -5.6052e-45, ..., 9.4612e-08,\n 1.6499e-06, 3.1060e-08],\n [ 1.8421e-06, 1.3908e-06, 5.6052e-45, ..., -4.0586e-06,\n -2.3236e-06, -2.7607e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.5349e-13, 7.0549e-13, 0.0000e+00, ..., 9.0973e-13, 1.8718e-12,\n 4.9006e-14],\n [2.7152e-10, 7.4804e-11, 8.4360e-14, ..., 3.1960e-10, 1.2772e-10,\n 1.9254e-11],\n [6.6905e-12, 8.3255e-12, 5.5893e-15, ..., 1.2933e-11, 7.6728e-11,\n 1.2273e-11],\n ...,\n [4.4578e-10, 1.3013e-10, 1.2235e-13, ..., 2.7417e-11, 8.1413e-11,\n 2.1616e-09],\n [1.7065e-11, 1.6678e-10, 1.1750e-15, ..., 1.4255e-10, 1.2765e-10,\n 7.0486e-12],\n [7.8672e-11, 3.3206e-11, 2.1205e-14, ..., 2.0737e-10, 4.8531e-11,\n 6.5964e-10]], device='cuda:0')" }, "6": { - "step": "tensor(3756.)", - "exp_avg": "tensor([-2.9739e-08, -9.6404e-06, -2.4299e-05, ..., 4.5289e-06,\n 3.8405e-06, 3.5178e-06], device='cuda:0')", - "exp_avg_sq": "tensor([1.3030e-10, 2.7239e-08, 7.4001e-09, ..., 2.0782e-08, 8.5761e-09,\n 1.6234e-08], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-7.2008e-07, -3.0324e-05, -1.2121e-05, ..., 4.6873e-06,\n 5.6040e-06, 1.8653e-05], device='cuda:0')", + "exp_avg_sq": "tensor([6.4102e-11, 1.6039e-08, 4.9925e-09, ..., 1.4547e-08, 6.3299e-09,\n 9.7342e-09], device='cuda:0')" }, "7": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 6.0665e-11, 5.0995e-07, 3.5536e-07, ..., -2.0912e-08,\n 1.1108e-06, 9.4519e-09],\n [ 5.1774e-10, 1.0021e-07, -3.0247e-08, ..., -1.3988e-06,\n -7.6887e-07, -1.2255e-07],\n [-1.6020e-10, 7.1003e-07, 3.6876e-07, ..., -2.2104e-07,\n -4.4563e-07, -6.5591e-08],\n ...,\n [-1.7519e-09, 5.9486e-07, -6.1973e-07, ..., 5.2647e-07,\n -7.6905e-07, -2.5892e-07],\n [-2.1909e-09, 2.0069e-08, 3.5291e-07, ..., -1.4907e-06,\n -9.7642e-07, -5.1371e-08],\n [-3.9906e-13, 2.3625e-07, -7.6400e-07, ..., 1.1686e-06,\n 1.6430e-07, 6.5122e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.9655e-12, 1.6720e-11, 3.5387e-11, ..., 1.6368e-11, 1.4670e-11,\n 1.8055e-11],\n [2.4545e-11, 4.3705e-11, 3.4227e-11, ..., 3.0594e-11, 1.8008e-11,\n 2.5034e-11],\n [2.2587e-12, 3.3762e-11, 4.4664e-11, ..., 2.8628e-11, 1.5808e-11,\n 3.6068e-11],\n ...,\n [5.7318e-12, 4.1034e-11, 2.1585e-11, ..., 3.3085e-11, 2.2133e-11,\n 4.9021e-11],\n [6.8614e-12, 5.0979e-11, 6.9217e-11, ..., 3.5820e-11, 2.0390e-11,\n 3.5244e-11],\n [3.8234e-12, 3.4248e-11, 1.4577e-10, ..., 2.9569e-11, 1.8069e-11,\n 2.7822e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[-4.4023e-08, 7.2592e-08, 4.2675e-07, ..., -1.4111e-07,\n 3.2388e-08, -3.2449e-07],\n [ 3.0744e-08, -9.8427e-07, 2.1796e-07, ..., -2.7479e-07,\n 1.4039e-06, -6.8012e-08],\n [-2.4845e-08, 6.9495e-07, 3.8541e-07, ..., 2.7620e-07,\n 2.3047e-07, 1.5663e-06],\n ...,\n [ 1.5190e-10, -6.3821e-07, -6.1442e-07, ..., 7.7054e-07,\n -1.7749e-07, 1.1047e-07],\n [-1.8115e-08, 1.3022e-06, -3.2499e-07, ..., -1.0946e-07,\n 1.2228e-06, 6.6345e-07],\n [-1.7501e-07, -5.5627e-07, -2.9534e-08, ..., -1.1809e-07,\n -1.7901e-07, 4.4734e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6072e-12, 9.8176e-12, 2.4876e-11, ..., 1.0221e-11, 8.3117e-12,\n 9.9785e-12],\n [7.3777e-12, 2.5521e-11, 2.1757e-11, ..., 1.8811e-11, 1.0008e-11,\n 1.2684e-11],\n [9.2885e-13, 2.0288e-11, 1.8688e-11, ..., 1.8455e-11, 8.5347e-12,\n 2.0158e-11],\n ...,\n [1.8921e-12, 2.1504e-11, 1.4157e-11, ..., 2.1163e-11, 1.1271e-11,\n 2.5519e-11],\n [2.2871e-12, 2.9846e-11, 4.8118e-11, ..., 2.3456e-11, 9.3664e-12,\n 2.0419e-11],\n [1.6263e-12, 1.9157e-11, 1.0400e-10, ..., 1.7849e-11, 1.0994e-11,\n 1.5045e-11]], device='cuda:0')" }, "14": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.1544e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.7587e-07], device='cuda:0')" }, "15": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.9396e-10, 4.4944e-08, 3.3791e-08], device='cuda:0')" + "exp_avg_sq": "tensor([2.2688e-10, 1.2843e-08, 9.6561e-09], device='cuda:0')" }, "16": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.7810e-05, 4.9263e-06, 5.3304e-06, 5.6940e-06], device='cuda:0')" + "exp_avg_sq": "tensor([1.3662e-05, 1.4077e-06, 1.5232e-06, 1.6271e-06], device='cuda:0')" }, "18": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.0973e-11, 3.1820e-11, 0.0000e+00, ..., 2.0996e-10, 7.5513e-11,\n 2.4238e-12],\n [1.0875e-11, 2.7956e-11, 0.0000e+00, ..., 2.5702e-11, 9.0376e-11,\n 2.5470e-11],\n [3.2684e-12, 1.1535e-11, 0.0000e+00, ..., 3.8547e-12, 2.4990e-11,\n 8.1845e-12],\n ...,\n [4.1414e-13, 2.2274e-12, 0.0000e+00, ..., 3.1765e-12, 4.3990e-11,\n 2.4782e-13],\n [7.9773e-11, 8.3864e-11, 0.0000e+00, ..., 1.0151e-10, 4.2545e-10,\n 2.5532e-11],\n [2.4918e-13, 1.9365e-13, 0.0000e+00, ..., 3.8328e-13, 3.8018e-12,\n 9.0047e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.4566e-11, 9.0929e-12, 0.0000e+00, ..., 5.9999e-11, 2.1578e-11,\n 6.9262e-13],\n [3.1077e-12, 7.9886e-12, 0.0000e+00, ..., 7.3445e-12, 2.5826e-11,\n 7.2784e-12],\n [9.3398e-13, 3.2961e-12, 0.0000e+00, ..., 1.1015e-12, 7.1412e-12,\n 2.3388e-12],\n ...,\n [1.1834e-13, 6.3651e-13, 0.0000e+00, ..., 9.0771e-13, 1.2570e-11,\n 7.0816e-14],\n [2.2796e-11, 2.3965e-11, 0.0000e+00, ..., 2.9006e-11, 1.2158e-10,\n 7.2961e-12],\n [7.1204e-14, 5.5338e-14, 0.0000e+00, ..., 1.0953e-13, 1.0864e-12,\n 2.5732e-13]], device='cuda:0')" }, "19": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.2111e-08, 3.2879e-08, 7.5124e-09, 1.3638e-08, 4.9617e-09, 4.8081e-11,\n 8.5368e-10, 1.3541e-09, 3.4605e-09, 1.9590e-10, 1.0975e-08, 5.5811e-08,\n 2.2998e-08, 2.7859e-08, 4.2059e-09, 2.6230e-08, 6.2517e-08, 5.1822e-09,\n 3.4662e-08, 5.6780e-08, 6.7777e-10, 2.3856e-08, 5.1573e-09, 5.0992e-08,\n 1.0316e-08, 2.0878e-08, 9.3048e-09, 5.7882e-09, 1.5762e-09, 9.7644e-09,\n 2.9211e-08, 1.4639e-08, 5.3193e-10, 1.3428e-07, 6.9624e-08, 3.3850e-08,\n 3.5099e-09, 3.8708e-10, 2.6605e-08, 2.5928e-08, 1.2721e-07, 3.1465e-09,\n 5.6150e-10, 1.7782e-09, 4.4891e-09, 3.2614e-08, 1.4023e-08, 9.1412e-09,\n 8.8950e-09, 1.0427e-08, 4.2656e-10, 1.5015e-08, 3.8137e-08, 2.3267e-08,\n 4.9338e-09, 2.5728e-09, 1.7534e-08, 2.2334e-10, 6.7443e-09, 2.4248e-09,\n 7.5530e-09, 1.6687e-08, 1.0761e-08, 6.2370e-09, 6.1128e-10, 3.9807e-09,\n 1.4475e-08, 8.5150e-10, 1.8245e-08, 1.7173e-08, 1.6425e-10, 5.5442e-09,\n 1.4666e-08, 6.6673e-10, 5.8840e-09, 5.0412e-11, 3.9012e-09, 9.9621e-08,\n 1.9807e-09, 1.8306e-10, 3.0539e-08, 4.0876e-08, 6.6988e-12, 2.2793e-10,\n 7.2817e-09, 2.5292e-08, 7.6099e-10, 1.2944e-08, 1.6594e-09, 2.8781e-08,\n 4.5703e-09, 4.1744e-08, 5.3209e-08, 3.7651e-09, 5.2714e-09, 2.0984e-08,\n 1.3834e-09, 8.2495e-08, 2.7518e-08, 9.8563e-08, 2.8130e-08, 1.9492e-09,\n 3.8896e-10, 1.1903e-08, 9.8927e-10, 9.8263e-08, 2.4666e-09, 6.9886e-09,\n 1.5747e-08, 1.2149e-10, 1.2677e-07, 2.3485e-09, 3.7704e-08, 1.6013e-08,\n 5.4320e-09, 7.4722e-08, 1.8617e-09, 9.0578e-09, 2.5009e-08, 1.1966e-08,\n 1.3053e-08, 2.4153e-08, 2.3984e-09, 4.0316e-10, 1.5722e-10, 8.7827e-09,\n 2.0945e-07, 1.6332e-09, 1.3748e-08, 2.7266e-09, 3.6691e-10, 6.7357e-08,\n 5.6610e-09, 9.4812e-09, 3.8372e-08, 2.7400e-08, 2.3158e-08, 7.0999e-08,\n 3.0614e-08, 6.7351e-12, 2.7926e-10, 4.5310e-10, 2.4655e-08, 1.0118e-07,\n 5.6669e-08, 3.7232e-09, 3.8080e-09, 4.9158e-09, 3.0326e-08, 6.8675e-11,\n 1.5716e-09, 3.1572e-08, 1.0899e-08, 1.2138e-08, 2.7155e-08, 9.3420e-10,\n 8.5297e-09, 2.2411e-08, 4.3538e-09, 7.2589e-08, 3.4418e-08, 5.6478e-09,\n 1.3397e-08, 3.6374e-08, 2.2994e-09, 1.6452e-08, 1.6195e-07, 2.4607e-08,\n 1.7003e-08, 7.8402e-09, 1.1340e-09, 2.7050e-08, 1.1058e-07, 2.7812e-08,\n 3.6550e-08, 2.6624e-10, 3.6853e-09, 1.2725e-08, 1.4501e-08, 1.5547e-08,\n 1.9080e-08, 8.9087e-09, 1.7888e-09, 1.0741e-08, 2.0446e-08, 5.6734e-11,\n 5.3550e-09, 2.1993e-08, 5.5786e-09, 8.8656e-08, 6.6271e-08, 6.8729e-09,\n 4.5497e-09, 1.9707e-09, 1.3152e-11, 3.1757e-07, 3.7217e-09, 6.4078e-09,\n 7.0747e-09, 1.5511e-07, 1.5454e-12, 1.4485e-07, 2.7247e-08, 4.1455e-09,\n 2.6466e-08, 3.2520e-07, 2.9900e-08, 9.3312e-09, 1.3543e-10, 4.1974e-08,\n 3.3888e-09, 6.1255e-08, 9.5377e-09, 1.9433e-08, 1.4876e-10, 2.4746e-08,\n 1.9029e-07, 1.0772e-10, 2.0974e-09, 6.1515e-09, 4.5288e-09, 1.2659e-07,\n 1.5943e-08, 9.1026e-08, 1.2619e-08, 1.5323e-11, 3.2511e-10, 1.8625e-09,\n 6.9031e-10, 2.6577e-08, 2.2722e-10, 2.4286e-10, 4.6132e-08, 1.8042e-08,\n 1.4071e-08, 3.1350e-09, 3.7526e-09, 2.8405e-08, 3.5563e-09, 3.1250e-09,\n 3.6252e-08, 1.4754e-10, 7.9813e-09, 4.8939e-08, 8.5311e-09, 2.4439e-10,\n 4.5672e-10, 2.0537e-08, 2.3187e-09, 7.3865e-08, 3.8509e-08, 3.8693e-09,\n 3.3264e-08, 3.9276e-09, 1.1109e-07, 1.9643e-09], device='cuda:0')" + "exp_avg_sq": "tensor([2.0606e-08, 9.3954e-09, 2.1467e-09, 3.8972e-09, 1.4179e-09, 1.3740e-11,\n 2.4395e-10, 3.8693e-10, 9.8885e-10, 5.5979e-11, 3.1361e-09, 1.5948e-08,\n 6.5719e-09, 7.9609e-09, 1.2019e-09, 7.4954e-09, 1.7865e-08, 1.4808e-09,\n 9.9049e-09, 1.6225e-08, 1.9368e-10, 6.8170e-09, 1.4737e-09, 1.4571e-08,\n 2.9480e-09, 5.9659e-09, 2.6589e-09, 1.6540e-09, 4.5040e-10, 2.7903e-09,\n 8.3472e-09, 4.1833e-09, 1.5200e-10, 3.8372e-08, 1.9896e-08, 9.6730e-09,\n 1.0030e-09, 1.1061e-10, 7.6025e-09, 7.4092e-09, 3.6351e-08, 8.9915e-10,\n 1.6045e-10, 5.0813e-10, 1.2828e-09, 9.3198e-09, 4.0073e-09, 2.6122e-09,\n 2.5418e-09, 2.9797e-09, 1.2189e-10, 4.2907e-09, 1.0898e-08, 6.6486e-09,\n 1.4099e-09, 7.3521e-10, 5.0104e-09, 6.3822e-11, 1.9272e-09, 6.9291e-10,\n 2.1583e-09, 4.7684e-09, 3.0751e-09, 1.7823e-09, 1.7468e-10, 1.1375e-09,\n 4.1364e-09, 2.4332e-10, 5.2138e-09, 4.9074e-09, 4.6936e-11, 1.5843e-09,\n 4.1910e-09, 1.9052e-10, 1.6814e-09, 1.4406e-11, 1.1148e-09, 2.8467e-08,\n 5.6600e-10, 5.2312e-11, 8.7266e-09, 1.1680e-08, 1.9142e-12, 6.5134e-11,\n 2.0808e-09, 7.2274e-09, 2.1746e-10, 3.6989e-09, 4.7418e-10, 8.2244e-09,\n 1.3060e-09, 1.1929e-08, 1.5205e-08, 1.0759e-09, 1.5064e-09, 5.9965e-09,\n 3.9531e-10, 2.3574e-08, 7.8634e-09, 2.8165e-08, 8.0385e-09, 5.5699e-10,\n 1.1115e-10, 3.4015e-09, 2.8269e-10, 2.8079e-08, 7.0484e-10, 1.9970e-09,\n 4.4999e-09, 3.4717e-11, 3.6227e-08, 6.7111e-10, 1.0774e-08, 4.5758e-09,\n 1.5522e-09, 2.1353e-08, 5.3199e-10, 2.5884e-09, 7.1466e-09, 3.4193e-09,\n 3.7299e-09, 6.9018e-09, 6.8537e-10, 1.1521e-10, 4.4927e-11, 2.5097e-09,\n 5.9853e-08, 4.6670e-10, 3.9286e-09, 7.7916e-10, 1.0485e-10, 1.9248e-08,\n 1.6177e-09, 2.7093e-09, 1.0965e-08, 7.8298e-09, 6.6174e-09, 2.0289e-08,\n 8.7483e-09, 1.9246e-12, 7.9800e-11, 1.2948e-10, 7.0455e-09, 2.8913e-08,\n 1.6194e-08, 1.0639e-09, 1.0882e-09, 1.4047e-09, 8.6660e-09, 1.9625e-11,\n 4.4909e-10, 9.0218e-09, 3.1145e-09, 3.4686e-09, 7.7597e-09, 2.6695e-10,\n 2.4374e-09, 6.4040e-09, 1.2441e-09, 2.0743e-08, 9.8351e-09, 1.6139e-09,\n 3.8283e-09, 1.0394e-08, 6.5708e-10, 4.7012e-09, 4.6280e-08, 7.0317e-09,\n 4.8587e-09, 2.2404e-09, 3.2405e-10, 7.7298e-09, 3.1598e-08, 7.9476e-09,\n 1.0445e-08, 7.6081e-11, 1.0531e-09, 3.6363e-09, 4.1439e-09, 4.4426e-09,\n 5.4522e-09, 2.5457e-09, 5.1116e-10, 3.0694e-09, 5.8425e-09, 1.6212e-11,\n 1.5302e-09, 6.2847e-09, 1.5941e-09, 2.5334e-08, 1.8937e-08, 1.9640e-09,\n 1.3001e-09, 5.6315e-10, 3.7583e-12, 9.0749e-08, 1.0635e-09, 1.8311e-09,\n 2.0217e-09, 4.4324e-08, 4.4160e-13, 4.1393e-08, 7.7860e-09, 1.1846e-09,\n 7.5630e-09, 9.2929e-08, 8.5440e-09, 2.6665e-09, 3.8699e-11, 1.1994e-08,\n 9.6838e-10, 1.7504e-08, 2.7255e-09, 5.5532e-09, 4.2509e-11, 7.0713e-09,\n 5.4377e-08, 3.0782e-11, 5.9934e-10, 1.7578e-09, 1.2941e-09, 3.6175e-08,\n 4.5558e-09, 2.6011e-08, 3.6060e-09, 4.3788e-12, 9.2904e-11, 5.3223e-10,\n 1.9726e-10, 7.5946e-09, 6.4931e-11, 6.9400e-11, 1.3183e-08, 5.1555e-09,\n 4.0208e-09, 8.9585e-10, 1.0723e-09, 8.1170e-09, 1.0163e-09, 8.9298e-10,\n 1.0359e-08, 4.2160e-11, 2.2807e-09, 1.3985e-08, 2.4378e-09, 6.9836e-11,\n 1.3051e-10, 5.8687e-09, 6.6259e-10, 2.1107e-08, 1.1004e-08, 1.1057e-09,\n 9.5054e-09, 1.1223e-09, 3.1745e-08, 5.6132e-10], device='cuda:0')" }, "20": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7790e-10, 1.6269e-10, 1.0941e-11, 7.1396e-11, 8.3760e-12, 6.9079e-13,\n 1.5190e-12, 7.0536e-12, 4.1884e-12, 2.4605e-13, 2.0215e-11, 1.0123e-10,\n 5.8283e-11, 1.3111e-10, 1.3496e-11, 6.1782e-11, 2.0264e-10, 8.9879e-12,\n 1.1212e-10, 1.6155e-10, 3.3478e-13, 1.0034e-10, 1.1346e-11, 2.0811e-10,\n 4.0451e-11, 4.0521e-11, 2.3532e-11, 1.9232e-11, 5.7199e-12, 1.7282e-11,\n 8.9209e-11, 4.3432e-11, 3.3877e-12, 3.6152e-10, 2.6976e-10, 8.4419e-11,\n 6.9684e-12, 7.0523e-15, 5.5283e-11, 4.6872e-11, 5.3808e-10, 7.5062e-12,\n 2.3888e-12, 7.8012e-12, 8.9683e-12, 7.3366e-11, 3.0546e-11, 1.9284e-11,\n 1.3604e-11, 1.6111e-11, 3.3422e-13, 4.1026e-11, 7.9180e-11, 5.0719e-11,\n 6.7584e-12, 1.2133e-11, 5.6724e-11, 4.6730e-13, 2.2259e-11, 6.8681e-12,\n 1.3637e-11, 1.2448e-10, 3.0185e-11, 1.4980e-11, 9.2504e-15, 6.3595e-12,\n 3.1605e-11, 4.6230e-13, 3.3689e-11, 5.5601e-11, 3.8651e-12, 1.2739e-11,\n 1.6284e-11, 2.2385e-12, 1.0761e-11, 3.2580e-12, 2.2748e-11, 5.0582e-10,\n 1.7336e-12, 5.1733e-16, 1.2791e-10, 1.0597e-10, 9.7390e-13, 1.1260e-12,\n 1.4323e-11, 5.6508e-11, 3.0326e-12, 6.0596e-11, 4.8633e-12, 5.3667e-11,\n 1.3843e-11, 9.5388e-11, 1.9342e-10, 4.7739e-12, 1.4584e-11, 7.3961e-11,\n 2.9954e-12, 1.7571e-10, 1.0709e-10, 2.7715e-10, 8.4309e-11, 9.0741e-12,\n 4.7564e-12, 2.1098e-11, 4.1266e-13, 2.3091e-10, 2.5784e-12, 1.6476e-11,\n 3.2788e-11, 1.7170e-13, 3.2358e-10, 7.3207e-12, 1.0685e-10, 3.3622e-11,\n 9.9119e-12, 3.0160e-10, 7.3347e-12, 1.9313e-11, 7.5407e-11, 2.9027e-11,\n 2.4016e-11, 5.0302e-11, 2.0157e-12, 3.8903e-14, 1.6044e-12, 1.3516e-11,\n 9.4712e-10, 6.1938e-12, 4.3911e-11, 4.1586e-12, 1.3445e-13, 2.0088e-10,\n 1.3853e-11, 2.8208e-11, 7.3658e-11, 6.1878e-11, 6.5044e-11, 1.8325e-10,\n 8.3751e-11, 1.1363e-12, 7.1612e-12, 2.5963e-12, 5.4175e-11, 2.9740e-10,\n 3.5609e-10, 1.4738e-11, 1.5297e-11, 1.1507e-11, 6.2821e-11, 1.7417e-14,\n 1.5732e-11, 6.9229e-11, 3.5189e-11, 1.3819e-11, 6.3704e-11, 5.6596e-12,\n 1.2588e-11, 3.8181e-11, 7.1622e-12, 2.8710e-10, 6.7116e-11, 6.3917e-12,\n 3.1212e-11, 1.2125e-10, 6.6820e-12, 2.1380e-11, 5.5671e-10, 4.6670e-11,\n 5.2028e-11, 1.9980e-11, 5.8114e-13, 1.0789e-10, 2.6636e-10, 1.0380e-10,\n 6.8801e-11, 1.0242e-12, 5.7591e-12, 5.6919e-11, 4.4419e-11, 4.0603e-11,\n 4.9273e-11, 1.7240e-11, 2.1723e-12, 1.6127e-11, 5.1182e-11, 3.0596e-17,\n 8.2496e-12, 4.8927e-11, 4.9129e-12, 2.2199e-10, 1.7297e-10, 8.6567e-12,\n 1.3530e-11, 1.0782e-11, 1.2397e-12, 1.2311e-09, 4.7380e-12, 1.0261e-11,\n 2.1412e-11, 4.2631e-10, 3.6725e-13, 3.8920e-10, 4.9167e-11, 5.6627e-12,\n 8.8036e-11, 8.2881e-10, 6.4764e-11, 2.3547e-11, 4.4392e-14, 6.5591e-11,\n 4.6419e-12, 1.5269e-10, 6.4762e-11, 5.9305e-11, 8.8904e-14, 4.2974e-11,\n 9.0533e-10, 3.6934e-14, 7.8743e-12, 7.3688e-12, 5.7845e-12, 5.5685e-10,\n 8.8715e-11, 2.2151e-10, 3.7307e-11, 3.2603e-13, 1.5803e-14, 5.5225e-12,\n 6.8373e-13, 7.4729e-11, 1.8366e-12, 1.7104e-13, 2.7213e-10, 7.9667e-11,\n 4.2585e-11, 7.3028e-12, 4.7998e-12, 8.2617e-11, 9.7606e-12, 3.1201e-12,\n 8.9763e-11, 3.4369e-14, 2.3947e-11, 1.2605e-10, 1.2937e-11, 4.1897e-14,\n 1.0757e-13, 3.9285e-11, 3.6571e-12, 1.9934e-10, 7.8250e-11, 5.4386e-12,\n 7.4508e-11, 7.7728e-12, 3.5577e-10, 1.0344e-11], device='cuda:0')" + "exp_avg_sq": "tensor([5.0837e-11, 4.6489e-11, 3.1264e-12, 2.0402e-11, 2.3935e-12, 1.9740e-13,\n 4.3405e-13, 2.0156e-12, 1.1969e-12, 7.0310e-14, 5.7767e-12, 2.8928e-11,\n 1.6655e-11, 3.7466e-11, 3.8567e-12, 1.7655e-11, 5.7906e-11, 2.5684e-12,\n 3.2040e-11, 4.6163e-11, 9.5665e-14, 2.8674e-11, 3.2421e-12, 5.9469e-11,\n 1.1559e-11, 1.1579e-11, 6.7244e-12, 5.4956e-12, 1.6345e-12, 4.9386e-12,\n 2.5492e-11, 1.2411e-11, 9.6807e-13, 1.0331e-10, 7.7087e-11, 2.4123e-11,\n 1.9913e-12, 2.0152e-15, 1.5798e-11, 1.3394e-11, 1.5376e-10, 2.1449e-12,\n 6.8263e-13, 2.2293e-12, 2.5628e-12, 2.0965e-11, 8.7288e-12, 5.5107e-12,\n 3.8874e-12, 4.6039e-12, 9.5507e-14, 1.1724e-11, 2.2626e-11, 1.4493e-11,\n 1.9313e-12, 3.4671e-12, 1.6209e-11, 1.3354e-13, 6.3606e-12, 1.9626e-12,\n 3.8969e-12, 3.5572e-11, 8.6256e-12, 4.2808e-12, 2.6434e-15, 1.8173e-12,\n 9.0315e-12, 1.3211e-13, 9.6269e-12, 1.5888e-11, 1.1045e-12, 3.6402e-12,\n 4.6533e-12, 6.3967e-13, 3.0752e-12, 9.3100e-13, 6.5003e-12, 1.4454e-10,\n 4.9540e-13, 1.4783e-16, 3.6552e-11, 3.0283e-11, 2.7830e-13, 3.2177e-13,\n 4.0930e-12, 1.6148e-11, 8.6660e-13, 1.7316e-11, 1.3897e-12, 1.5336e-11,\n 3.9559e-12, 2.7258e-11, 5.5273e-11, 1.3642e-12, 4.1675e-12, 2.1135e-11,\n 8.5597e-13, 5.0211e-11, 3.0602e-11, 7.9197e-11, 2.4092e-11, 2.5930e-12,\n 1.3592e-12, 6.0288e-12, 1.1792e-13, 6.5984e-11, 7.3680e-13, 4.7081e-12,\n 9.3694e-12, 4.9065e-14, 9.2464e-11, 2.0920e-12, 3.0535e-11, 9.6079e-12,\n 2.8324e-12, 8.6184e-11, 2.0960e-12, 5.5189e-12, 2.1548e-11, 8.2947e-12,\n 6.8629e-12, 1.4374e-11, 5.7599e-13, 1.1117e-14, 4.5848e-13, 3.8622e-12,\n 2.7065e-10, 1.7699e-12, 1.2548e-11, 1.1884e-12, 3.8420e-14, 5.7402e-11,\n 3.9586e-12, 8.0607e-12, 2.1048e-11, 1.7682e-11, 1.8587e-11, 5.2364e-11,\n 2.3933e-11, 3.2469e-13, 2.0464e-12, 7.4192e-13, 1.5481e-11, 8.4985e-11,\n 1.0176e-10, 4.2116e-12, 4.3713e-12, 3.2883e-12, 1.7952e-11, 4.9772e-15,\n 4.4954e-12, 1.9783e-11, 1.0056e-11, 3.9488e-12, 1.8204e-11, 1.6173e-12,\n 3.5971e-12, 1.0911e-11, 2.0467e-12, 8.2042e-11, 1.9179e-11, 1.8265e-12,\n 8.9190e-12, 3.4649e-11, 1.9094e-12, 6.1096e-12, 1.5909e-10, 1.3336e-11,\n 1.4868e-11, 5.7095e-12, 1.6607e-13, 3.0832e-11, 7.6113e-11, 2.9662e-11,\n 1.9661e-11, 2.9267e-13, 1.6457e-12, 1.6265e-11, 1.2693e-11, 1.1603e-11,\n 1.4080e-11, 4.9266e-12, 6.2074e-13, 4.6085e-12, 1.4626e-11, 8.7429e-18,\n 2.3574e-12, 1.3981e-11, 1.4039e-12, 6.3436e-11, 4.9428e-11, 2.4737e-12,\n 3.8664e-12, 3.0811e-12, 3.5427e-13, 3.5179e-10, 1.3539e-12, 2.9321e-12,\n 6.1185e-12, 1.2182e-10, 1.0495e-13, 1.1122e-10, 1.4050e-11, 1.6182e-12,\n 2.5157e-11, 2.3684e-10, 1.8507e-11, 6.7287e-12, 1.2685e-14, 1.8743e-11,\n 1.3265e-12, 4.3632e-11, 1.8506e-11, 1.6947e-11, 2.5405e-14, 1.2280e-11,\n 2.5870e-10, 1.0554e-14, 2.2501e-12, 2.1057e-12, 1.6530e-12, 1.5912e-10,\n 2.5351e-11, 6.3299e-11, 1.0661e-11, 9.3165e-14, 4.5158e-15, 1.5781e-12,\n 1.9538e-13, 2.1354e-11, 5.2482e-13, 4.8876e-14, 7.7762e-11, 2.2765e-11,\n 1.2169e-11, 2.0868e-12, 1.3716e-12, 2.3608e-11, 2.7892e-12, 8.9159e-13,\n 2.5651e-11, 9.8212e-15, 6.8431e-12, 3.6019e-11, 3.6967e-12, 1.1972e-14,\n 3.0740e-14, 1.1226e-11, 1.0451e-12, 5.6963e-11, 2.2360e-11, 1.5541e-12,\n 2.1291e-11, 2.2211e-12, 1.0166e-10, 2.9559e-12], device='cuda:0')" }, "21": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.0513e-10, 1.5575e-10, 2.1444e-11, 7.0091e-11, 1.1022e-11, 4.6821e-13,\n 2.1236e-12, 1.1535e-11, 7.0403e-12, 1.3797e-13, 3.3509e-11, 1.8913e-10,\n 6.6702e-11, 1.2998e-10, 2.2715e-11, 1.1982e-10, 2.7097e-10, 1.4327e-11,\n 9.9581e-11, 2.3511e-10, 3.7849e-13, 1.1235e-10, 2.6005e-11, 2.1952e-10,\n 5.4043e-11, 6.2579e-11, 4.3333e-11, 3.0406e-11, 1.1779e-11, 2.5193e-11,\n 1.3100e-10, 6.8157e-11, 5.2974e-12, 4.6215e-10, 2.9776e-10, 1.4542e-10,\n 5.0357e-12, 1.4637e-15, 1.1742e-10, 8.9180e-11, 4.1383e-10, 1.7460e-11,\n 5.7616e-12, 1.3781e-11, 1.5376e-11, 1.0484e-10, 6.4757e-11, 4.5137e-11,\n 2.2351e-11, 3.3642e-11, 5.4620e-13, 6.6384e-11, 1.2431e-10, 7.2921e-11,\n 1.2303e-11, 1.6644e-11, 8.1039e-11, 4.2135e-13, 3.7581e-11, 1.4433e-11,\n 1.8074e-11, 8.7669e-11, 4.9232e-11, 2.8531e-11, 1.5199e-13, 9.9489e-12,\n 6.8435e-11, 1.2492e-12, 8.1119e-11, 8.3226e-11, 4.2490e-12, 2.8142e-11,\n 5.0979e-11, 4.8307e-12, 1.3991e-11, 2.9860e-12, 2.4227e-11, 4.4023e-10,\n 2.6126e-12, 4.1517e-16, 1.4752e-10, 1.8406e-10, 1.0729e-12, 2.4360e-12,\n 3.7119e-11, 1.1778e-10, 5.3554e-12, 6.8532e-11, 5.3812e-12, 1.2666e-10,\n 2.3924e-11, 1.7967e-10, 1.7135e-10, 9.9855e-12, 2.3722e-11, 5.6677e-11,\n 3.9287e-12, 2.8169e-10, 1.2903e-10, 4.1281e-10, 1.2400e-10, 1.3545e-11,\n 5.4244e-12, 3.2447e-11, 6.0957e-13, 3.2432e-10, 4.3927e-12, 3.6572e-11,\n 4.4313e-11, 1.1394e-13, 4.2573e-10, 1.4291e-11, 1.6960e-10, 5.8996e-11,\n 1.3047e-11, 2.3604e-10, 1.0083e-11, 2.4425e-11, 1.1439e-10, 5.7849e-11,\n 3.6987e-11, 7.1244e-11, 3.7083e-12, 1.0728e-13, 2.9877e-12, 2.5195e-11,\n 8.4202e-10, 9.1658e-12, 6.9414e-11, 7.6778e-12, 1.5522e-13, 2.9171e-10,\n 2.8897e-11, 4.4917e-11, 1.2096e-10, 8.9421e-11, 1.0258e-10, 2.3944e-10,\n 1.3629e-10, 1.3858e-12, 5.4149e-12, 4.2468e-12, 7.8913e-11, 3.4614e-10,\n 2.6335e-10, 2.2066e-11, 2.1842e-11, 2.5692e-11, 9.9432e-11, 3.8885e-14,\n 1.4426e-11, 9.4984e-11, 5.5247e-11, 3.4817e-11, 7.5395e-11, 8.6049e-12,\n 2.1349e-11, 8.1247e-11, 1.1546e-11, 3.0766e-10, 1.1302e-10, 1.5715e-11,\n 3.1514e-11, 1.6358e-10, 1.2356e-11, 4.6988e-11, 5.7828e-10, 7.8436e-11,\n 7.7943e-11, 3.8300e-11, 1.5973e-12, 7.0442e-11, 3.7929e-10, 1.3183e-10,\n 1.0991e-10, 2.0925e-12, 8.5671e-12, 6.8027e-11, 3.3177e-11, 7.4329e-11,\n 5.0393e-11, 2.2260e-11, 3.6225e-12, 2.9345e-11, 8.5717e-11, 8.7955e-15,\n 1.6436e-11, 6.2760e-11, 1.4996e-11, 3.6393e-10, 2.2117e-10, 1.8147e-11,\n 2.4757e-11, 1.3007e-11, 1.6284e-12, 1.3031e-09, 7.5288e-12, 1.3835e-11,\n 3.4351e-11, 5.4659e-10, 9.1853e-13, 6.0308e-10, 8.7673e-11, 9.2035e-12,\n 1.2414e-10, 1.1244e-09, 8.9607e-11, 2.1932e-11, 1.1213e-13, 1.4592e-10,\n 5.6155e-12, 2.6417e-10, 5.4364e-11, 9.3880e-11, 1.0629e-13, 8.2542e-11,\n 6.3900e-10, 2.4898e-14, 1.3253e-11, 1.3728e-11, 8.4119e-12, 5.3086e-10,\n 8.2700e-11, 3.8727e-10, 6.4844e-11, 6.0027e-13, 1.0833e-14, 1.2548e-11,\n 1.7443e-12, 1.1423e-10, 3.6742e-12, 3.6943e-13, 2.0632e-10, 8.5290e-11,\n 6.4915e-11, 1.5698e-11, 9.7177e-12, 1.2743e-10, 2.0249e-11, 7.5757e-12,\n 1.6361e-10, 3.2480e-14, 3.8847e-11, 1.5313e-10, 2.4281e-11, 8.0992e-14,\n 9.9368e-14, 6.5417e-11, 3.6521e-12, 3.1803e-10, 1.2287e-10, 9.0137e-12,\n 9.9869e-11, 1.3188e-11, 3.6839e-10, 1.4414e-11], device='cuda:0')" + "exp_avg_sq": "tensor([8.7193e-11, 4.4508e-11, 6.1277e-12, 2.0029e-11, 3.1496e-12, 1.3380e-13,\n 6.0683e-13, 3.2961e-12, 2.0118e-12, 3.9426e-14, 9.5755e-12, 5.4047e-11,\n 1.9061e-11, 3.7143e-11, 6.4909e-12, 3.4239e-11, 7.7432e-11, 4.0941e-12,\n 2.8456e-11, 6.7186e-11, 1.0816e-13, 3.2105e-11, 7.4311e-12, 6.2731e-11,\n 1.5443e-11, 1.7882e-11, 1.2383e-11, 8.6888e-12, 3.3658e-12, 7.1992e-12,\n 3.7434e-11, 1.9476e-11, 1.5138e-12, 1.3206e-10, 8.5087e-11, 4.1555e-11,\n 1.4390e-12, 4.1827e-16, 3.3554e-11, 2.5484e-11, 1.1825e-10, 4.9894e-12,\n 1.6464e-12, 3.9380e-12, 4.3938e-12, 2.9960e-11, 1.8505e-11, 1.2898e-11,\n 6.3870e-12, 9.6134e-12, 1.5608e-13, 1.8970e-11, 3.5524e-11, 2.0838e-11,\n 3.5157e-12, 4.7561e-12, 2.3158e-11, 1.2040e-13, 1.0739e-11, 4.1243e-12,\n 5.1648e-12, 2.5052e-11, 1.4068e-11, 8.1531e-12, 4.3431e-14, 2.8430e-12,\n 1.9556e-11, 3.5698e-13, 2.3180e-11, 2.3782e-11, 1.2142e-12, 8.0418e-12,\n 1.4568e-11, 1.3804e-12, 3.9982e-12, 8.5327e-13, 6.9230e-12, 1.2580e-10,\n 7.4656e-13, 1.1864e-16, 4.2155e-11, 5.2598e-11, 3.0659e-13, 6.9611e-13,\n 1.0607e-11, 3.3656e-11, 1.5304e-12, 1.9584e-11, 1.5377e-12, 3.6193e-11,\n 6.8366e-12, 5.1342e-11, 4.8966e-11, 2.8534e-12, 6.7788e-12, 1.6196e-11,\n 1.1227e-12, 8.0495e-11, 3.6871e-11, 1.1796e-10, 3.5435e-11, 3.8705e-12,\n 1.5501e-12, 9.2719e-12, 1.7419e-13, 9.2677e-11, 1.2553e-12, 1.0451e-11,\n 1.2663e-11, 3.2559e-14, 1.2166e-10, 4.0837e-12, 4.8465e-11, 1.6859e-11,\n 3.7283e-12, 6.7451e-11, 2.8814e-12, 6.9796e-12, 3.2689e-11, 1.6531e-11,\n 1.0569e-11, 2.0359e-11, 1.0597e-12, 3.0656e-14, 8.5377e-13, 7.1995e-12,\n 2.4062e-10, 2.6192e-12, 1.9836e-11, 2.1940e-12, 4.4356e-14, 8.3359e-11,\n 8.2576e-12, 1.2836e-11, 3.4564e-11, 2.5553e-11, 2.9313e-11, 6.8423e-11,\n 3.8947e-11, 3.9599e-13, 1.5474e-12, 1.2136e-12, 2.2550e-11, 9.8913e-11,\n 7.5255e-11, 6.3054e-12, 6.2416e-12, 7.3417e-12, 2.8414e-11, 1.1112e-14,\n 4.1223e-12, 2.7142e-11, 1.5787e-11, 9.9493e-12, 2.1545e-11, 2.4589e-12,\n 6.1008e-12, 2.3217e-11, 3.2995e-12, 8.7918e-11, 3.2296e-11, 4.4906e-12,\n 9.0052e-12, 4.6745e-11, 3.5309e-12, 1.3427e-11, 1.6525e-10, 2.2414e-11,\n 2.2273e-11, 1.0944e-11, 4.5643e-13, 2.0129e-11, 1.0838e-10, 3.7671e-11,\n 3.1408e-11, 5.9795e-13, 2.4481e-12, 1.9439e-11, 9.4806e-12, 2.1240e-11,\n 1.4400e-11, 6.3610e-12, 1.0352e-12, 8.3857e-12, 2.4494e-11, 2.5134e-15,\n 4.6966e-12, 1.7934e-11, 4.2851e-12, 1.0400e-10, 6.3202e-11, 5.1856e-12,\n 7.0746e-12, 3.7169e-12, 4.6532e-13, 3.7237e-10, 2.1514e-12, 3.9536e-12,\n 9.8160e-12, 1.5619e-10, 2.6248e-13, 1.7233e-10, 2.5053e-11, 2.6300e-12,\n 3.5473e-11, 3.2130e-10, 2.5606e-11, 6.2672e-12, 3.2042e-14, 4.1699e-11,\n 1.6047e-12, 7.5488e-11, 1.5535e-11, 2.6827e-11, 3.0372e-14, 2.3587e-11,\n 1.8260e-10, 7.1149e-15, 3.7872e-12, 3.9228e-12, 2.4038e-12, 1.5170e-10,\n 2.3632e-11, 1.1067e-10, 1.8530e-11, 1.7153e-13, 3.0955e-15, 3.5857e-12,\n 4.9844e-13, 3.2643e-11, 1.0499e-12, 1.0557e-13, 5.8959e-11, 2.4372e-11,\n 1.8550e-11, 4.4858e-12, 2.7769e-12, 3.6414e-11, 5.7864e-12, 2.1648e-12,\n 4.6752e-11, 9.2814e-15, 1.1101e-11, 4.3758e-11, 6.9385e-12, 2.3144e-14,\n 2.8395e-14, 1.8693e-11, 1.0436e-12, 9.0881e-11, 3.5112e-11, 2.5757e-12,\n 2.8538e-11, 3.7685e-12, 1.0527e-10, 4.1190e-12], device='cuda:0')" }, "22": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1560e-11, 8.0988e-11, 0.0000e+00, ..., 1.1793e-10, 1.9284e-10,\n 2.9505e-11],\n [9.8892e-12, 1.1037e-11, 0.0000e+00, ..., 1.8213e-11, 5.6181e-11,\n 3.5712e-14],\n [1.3277e-11, 7.2124e-12, 0.0000e+00, ..., 1.2129e-11, 2.5914e-11,\n 2.0870e-12],\n ...,\n [2.2678e-11, 2.7961e-11, 0.0000e+00, ..., 3.4535e-11, 6.5349e-11,\n 2.0787e-11],\n [6.9048e-11, 2.1923e-11, 0.0000e+00, ..., 2.1793e-11, 1.3829e-10,\n 1.3165e-11],\n [3.3798e-13, 4.7331e-13, 0.0000e+00, ..., 5.1037e-12, 8.1343e-12,\n 1.6603e-12]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.1609e-12, 2.3143e-11, 0.0000e+00, ..., 3.3698e-11, 5.5105e-11,\n 8.4312e-12],\n [2.8259e-12, 3.1539e-12, 0.0000e+00, ..., 5.2046e-12, 1.6054e-11,\n 1.0205e-14],\n [3.7940e-12, 2.0610e-12, 0.0000e+00, ..., 3.4659e-12, 7.4052e-12,\n 5.9638e-13],\n ...,\n [6.4804e-12, 7.9899e-12, 0.0000e+00, ..., 9.8687e-12, 1.8674e-11,\n 5.9402e-12],\n [1.9731e-11, 6.2648e-12, 0.0000e+00, ..., 6.2276e-12, 3.9516e-11,\n 3.7619e-12],\n [9.6579e-14, 1.3525e-13, 0.0000e+00, ..., 1.4584e-12, 2.3245e-12,\n 4.7443e-13]], device='cuda:0')" }, "23": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([8.4225e-08, 6.4827e-09, 1.4318e-08, 1.0776e-08, 3.6744e-09, 1.9709e-09,\n 8.4779e-09, 5.0441e-12, 3.3806e-09, 5.7340e-10, 6.3655e-08, 1.2604e-08,\n 1.6958e-08, 1.1003e-08, 3.2199e-09, 7.9464e-09, 3.3301e-08, 2.1783e-09,\n 6.3045e-09, 6.7154e-08, 2.6508e-10, 2.5495e-08, 1.1098e-08, 6.4117e-09,\n 8.5802e-10, 2.6741e-08, 8.4472e-09, 1.3430e-09, 9.3909e-10, 7.4798e-09,\n 3.6947e-08, 7.6929e-09, 9.4371e-10, 2.7032e-07, 2.6701e-08, 1.8710e-08,\n 3.2041e-09, 2.3221e-13, 2.4967e-08, 1.2592e-07, 1.5836e-08, 9.6666e-09,\n 1.7088e-09, 9.8305e-09, 5.7402e-08, 4.2591e-08, 2.4085e-08, 1.4093e-08,\n 3.5506e-09, 6.1164e-08, 6.8584e-10, 1.1055e-07, 7.6628e-08, 1.7476e-08,\n 1.7733e-09, 3.2039e-10, 4.2040e-08, 1.3366e-09, 1.9196e-08, 3.8704e-09,\n 9.0331e-09, 1.0359e-08, 5.5109e-08, 2.9251e-08, 1.3347e-09, 3.4533e-09,\n 2.0612e-08, 3.6759e-09, 2.8863e-08, 6.9394e-10, 1.0194e-12, 1.1842e-08,\n 2.1779e-08, 3.8259e-10, 9.2368e-09, 1.1288e-09, 3.2011e-09, 6.1437e-08,\n 9.0324e-10, 3.8610e-10, 4.9360e-09, 4.3288e-08, 1.2346e-12, 3.2082e-09,\n 8.9891e-10, 3.2609e-08, 1.3122e-08, 7.3530e-09, 1.2973e-08, 1.9002e-07,\n 2.5455e-09, 4.2844e-08, 5.4640e-08, 8.2637e-09, 1.0368e-07, 7.7469e-09,\n 9.1732e-09, 5.8241e-08, 1.9572e-08, 2.2822e-08, 5.0699e-08, 2.8543e-09,\n 5.4980e-10, 2.0483e-08, 7.5908e-10, 1.0933e-07, 6.1589e-09, 1.2466e-08,\n 3.8080e-09, 1.7609e-11, 8.9409e-08, 6.3975e-09, 8.8797e-08, 2.7106e-08,\n 2.9657e-09, 4.9330e-09, 5.8773e-09, 4.9946e-09, 3.7687e-09, 9.1593e-09,\n 2.7416e-08, 1.6033e-08, 2.7241e-09, 2.1730e-12, 1.6124e-11, 9.8662e-09,\n 5.4893e-08, 3.5593e-08, 2.8801e-09, 4.7822e-08, 3.7524e-11, 2.3633e-07,\n 1.0752e-08, 1.9987e-08, 3.4288e-08, 9.2061e-08, 2.9207e-08, 9.2808e-08,\n 2.5316e-08, 1.1549e-11, 3.3116e-10, 1.7291e-09, 1.1114e-08, 1.0007e-07,\n 1.7450e-08, 4.7252e-09, 4.1815e-09, 1.6207e-09, 9.9479e-08, 2.1361e-09,\n 1.7445e-09, 8.3099e-09, 9.0801e-09, 3.6140e-08, 1.2320e-08, 1.6501e-09,\n 8.6985e-09, 9.2382e-08, 2.6118e-09, 1.2719e-07, 3.2328e-08, 4.6996e-08,\n 4.9061e-09, 4.2766e-08, 3.2712e-08, 2.6953e-08, 2.5970e-07, 3.6534e-08,\n 4.0179e-08, 5.1962e-09, 1.6575e-09, 6.9891e-09, 4.8166e-08, 1.4817e-08,\n 2.7635e-08, 2.9599e-10, 2.4345e-09, 1.6239e-09, 3.2966e-09, 1.1606e-08,\n 1.7048e-09, 9.4042e-09, 6.2063e-09, 8.3183e-09, 6.4176e-08, 2.1812e-10,\n 6.5004e-09, 1.9903e-08, 3.8224e-09, 1.0317e-07, 1.0878e-08, 1.5542e-08,\n 1.6785e-08, 1.3264e-09, 5.6429e-12, 2.9518e-07, 3.5091e-09, 3.6378e-09,\n 2.1716e-08, 5.6544e-08, 2.2925e-12, 1.6052e-08, 6.0830e-08, 7.1972e-10,\n 3.6561e-08, 1.2169e-07, 1.9176e-08, 3.9387e-09, 1.4712e-11, 1.6631e-07,\n 3.4187e-09, 5.7036e-08, 1.7526e-09, 7.8365e-09, 1.3260e-09, 1.5818e-08,\n 2.0912e-08, 7.1910e-11, 1.5913e-09, 2.0598e-09, 6.8790e-09, 2.6727e-08,\n 4.9516e-09, 9.4755e-08, 1.6702e-08, 6.8230e-10, 2.1085e-10, 1.0184e-09,\n 3.7851e-09, 8.6734e-09, 2.6319e-10, 9.2426e-10, 1.5111e-08, 1.2237e-08,\n 4.9158e-08, 1.5402e-08, 3.6000e-09, 2.7453e-08, 4.1064e-09, 7.2027e-09,\n 2.6330e-08, 5.4694e-11, 3.4435e-08, 4.2725e-08, 1.4837e-08, 3.3744e-11,\n 1.1783e-12, 1.4722e-07, 7.1078e-10, 5.4102e-08, 1.8561e-08, 3.1920e-09,\n 2.4371e-08, 2.5498e-08, 5.9184e-08, 1.6690e-09], device='cuda:0')" + "exp_avg_sq": "tensor([2.4068e-08, 1.8525e-09, 4.0916e-09, 3.0792e-09, 1.0500e-09, 5.6321e-10,\n 2.4226e-09, 1.4414e-12, 9.6605e-10, 1.6385e-10, 1.8190e-08, 3.6018e-09,\n 4.8459e-09, 3.1442e-09, 9.2011e-10, 2.2707e-09, 9.5160e-09, 6.2247e-10,\n 1.8016e-09, 1.9190e-08, 7.5750e-11, 7.2855e-09, 3.1714e-09, 1.8322e-09,\n 2.4519e-10, 7.6415e-09, 2.4138e-09, 3.8378e-10, 2.6835e-10, 2.1374e-09,\n 1.0558e-08, 2.1983e-09, 2.6967e-10, 7.7247e-08, 7.6302e-09, 5.3465e-09,\n 9.1561e-10, 6.6355e-14, 7.1345e-09, 3.5982e-08, 4.5253e-09, 2.7623e-09,\n 4.8831e-10, 2.8092e-09, 1.6403e-08, 1.2171e-08, 6.8825e-09, 4.0271e-09,\n 1.0146e-09, 1.7478e-08, 1.9599e-10, 3.1591e-08, 2.1897e-08, 4.9939e-09,\n 5.0674e-10, 9.1554e-11, 1.2013e-08, 3.8195e-10, 5.4853e-09, 1.1060e-09,\n 2.5813e-09, 2.9600e-09, 1.5748e-08, 8.3587e-09, 3.8140e-10, 9.8680e-10,\n 5.8901e-09, 1.0504e-09, 8.2478e-09, 1.9830e-10, 2.9131e-13, 3.3840e-09,\n 6.2236e-09, 1.0933e-10, 2.6395e-09, 3.2257e-10, 9.1474e-10, 1.7556e-08,\n 2.5811e-10, 1.1033e-10, 1.4105e-09, 1.2370e-08, 3.5278e-13, 9.1676e-10,\n 2.5687e-10, 9.3183e-09, 3.7498e-09, 2.1012e-09, 3.7071e-09, 5.4300e-08,\n 7.2739e-10, 1.2243e-08, 1.5614e-08, 2.3614e-09, 2.9628e-08, 2.2137e-09,\n 2.6213e-09, 1.6643e-08, 5.5929e-09, 6.5217e-09, 1.4488e-08, 8.1564e-10,\n 1.5711e-10, 5.8532e-09, 2.1691e-10, 3.1242e-08, 1.7600e-09, 3.5624e-09,\n 1.0882e-09, 5.0319e-12, 2.5549e-08, 1.8281e-09, 2.5374e-08, 7.7458e-09,\n 8.4746e-10, 1.4096e-09, 1.6795e-09, 1.4273e-09, 1.0769e-09, 2.6173e-09,\n 7.8344e-09, 4.5816e-09, 7.7843e-10, 6.2094e-13, 4.6075e-12, 2.8193e-09,\n 1.5686e-08, 1.0171e-08, 8.2302e-10, 1.3666e-08, 1.0723e-11, 6.7534e-08,\n 3.0725e-09, 5.7113e-09, 9.7981e-09, 2.6307e-08, 8.3463e-09, 2.6521e-08,\n 7.2343e-09, 3.3002e-12, 9.4631e-11, 4.9411e-10, 3.1760e-09, 2.8597e-08,\n 4.9864e-09, 1.3503e-09, 1.1949e-09, 4.6312e-10, 2.8427e-08, 6.1040e-10,\n 4.9852e-10, 2.3746e-09, 2.5947e-09, 1.0327e-08, 3.5206e-09, 4.7152e-10,\n 2.4857e-09, 2.6399e-08, 7.4635e-10, 3.6347e-08, 9.2380e-09, 1.3430e-08,\n 1.4019e-09, 1.2221e-08, 9.3478e-09, 7.7021e-09, 7.4211e-08, 1.0440e-08,\n 1.1482e-08, 1.4849e-09, 4.7364e-10, 1.9972e-09, 1.3764e-08, 4.2342e-09,\n 7.8969e-09, 8.4580e-11, 6.9569e-10, 4.6405e-10, 9.4203e-10, 3.3164e-09,\n 4.8717e-10, 2.6873e-09, 1.7735e-09, 2.3770e-09, 1.8339e-08, 6.2330e-11,\n 1.8575e-09, 5.6874e-09, 1.0923e-09, 2.9482e-08, 3.1086e-09, 4.4414e-09,\n 4.7966e-09, 3.7903e-10, 1.6125e-12, 8.4350e-08, 1.0028e-09, 1.0395e-09,\n 6.2056e-09, 1.6158e-08, 6.5511e-13, 4.5870e-09, 1.7383e-08, 2.0567e-10,\n 1.0448e-08, 3.4773e-08, 5.4796e-09, 1.1255e-09, 4.2040e-12, 4.7524e-08,\n 9.7694e-10, 1.6298e-08, 5.0081e-10, 2.2393e-09, 3.7892e-10, 4.5201e-09,\n 5.9757e-09, 2.0549e-11, 4.5472e-10, 5.8861e-10, 1.9657e-09, 7.6375e-09,\n 1.4150e-09, 2.7077e-08, 4.7726e-09, 1.9497e-10, 6.0251e-11, 2.9102e-10,\n 1.0816e-09, 2.4785e-09, 7.5208e-11, 2.6411e-10, 4.3182e-09, 3.4969e-09,\n 1.4047e-08, 4.4011e-09, 1.0287e-09, 7.8449e-09, 1.1734e-09, 2.0582e-09,\n 7.5240e-09, 1.5629e-11, 9.8400e-09, 1.2209e-08, 4.2397e-09, 9.6427e-12,\n 3.3672e-13, 4.2070e-08, 2.0311e-10, 1.5460e-08, 5.3040e-09, 9.1214e-10,\n 6.9643e-09, 7.2863e-09, 1.6912e-08, 4.7694e-10], device='cuda:0')" }, "24": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.3607e-10, 1.9945e-11, 2.2618e-11, 4.8943e-11, 5.7245e-12, 7.7296e-12,\n 1.9874e-11, 6.6517e-14, 4.0669e-12, 3.9016e-13, 2.2701e-10, 3.0962e-11,\n 3.5272e-11, 2.6249e-11, 9.0196e-12, 2.1639e-11, 8.0203e-11, 4.4325e-12,\n 1.3834e-11, 1.4930e-10, 3.3716e-15, 8.7484e-11, 3.3982e-11, 1.7694e-11,\n 3.6163e-12, 4.9162e-11, 1.8853e-11, 4.6925e-12, 3.6842e-12, 1.8470e-11,\n 1.4142e-10, 1.4198e-11, 2.3337e-12, 1.0703e-09, 6.1493e-11, 3.9413e-11,\n 7.7897e-12, 6.8290e-14, 6.8748e-11, 2.7973e-10, 3.0704e-11, 3.2334e-11,\n 3.9016e-12, 8.1365e-11, 1.7134e-10, 8.3755e-11, 1.0961e-10, 3.6025e-11,\n 5.6657e-12, 2.3442e-10, 6.0286e-13, 3.3300e-10, 1.9069e-10, 2.9691e-11,\n 1.5149e-12, 1.0691e-12, 1.3799e-10, 8.6448e-13, 5.3552e-11, 9.6509e-12,\n 2.0269e-11, 2.9830e-11, 2.3755e-10, 8.0327e-11, 9.4425e-13, 4.8443e-12,\n 5.4972e-11, 4.0544e-12, 5.9456e-11, 3.4577e-12, 1.2082e-12, 2.3947e-11,\n 4.4412e-11, 2.2857e-12, 1.6562e-11, 7.7383e-12, 1.0240e-11, 1.5831e-10,\n 1.4478e-12, 4.1140e-13, 1.1735e-11, 1.1392e-10, 1.0968e-13, 2.5628e-11,\n 4.1481e-12, 7.9250e-11, 5.8972e-11, 1.6828e-11, 2.3511e-11, 6.7470e-10,\n 6.9766e-12, 1.0222e-10, 1.9119e-10, 1.2247e-11, 6.6787e-10, 1.4874e-11,\n 1.1836e-11, 1.1702e-10, 5.0910e-11, 5.8704e-11, 2.1477e-10, 2.1523e-11,\n 2.7573e-12, 5.3230e-11, 3.1759e-13, 2.0314e-10, 1.2040e-11, 3.1298e-11,\n 5.9693e-12, 7.5840e-13, 1.9457e-10, 2.4290e-11, 5.5529e-10, 5.5362e-11,\n 2.5743e-12, 8.6809e-12, 1.6295e-11, 7.5862e-12, 1.3589e-11, 3.0671e-11,\n 7.8734e-11, 4.0800e-11, 3.0384e-12, 9.6824e-15, 3.7982e-13, 1.6545e-11,\n 1.2079e-10, 2.1141e-10, 6.4276e-12, 1.6092e-10, 4.7817e-14, 7.6573e-10,\n 2.6831e-11, 4.6611e-11, 5.0863e-11, 3.3441e-10, 6.6292e-11, 2.6944e-10,\n 6.0296e-11, 8.5896e-14, 3.3998e-12, 6.3583e-12, 1.8858e-11, 3.1241e-10,\n 3.6802e-11, 1.6015e-11, 1.2990e-11, 4.6177e-12, 3.4670e-10, 3.0521e-12,\n 7.5410e-12, 1.3930e-11, 2.6464e-11, 1.2413e-10, 2.0366e-11, 8.0685e-12,\n 1.2903e-11, 2.7453e-10, 5.8608e-12, 3.9014e-10, 6.7917e-11, 1.8072e-10,\n 6.8885e-12, 1.4483e-10, 1.9892e-10, 6.0304e-11, 8.9032e-10, 6.9247e-11,\n 1.7386e-10, 1.9383e-11, 1.2296e-12, 1.0011e-11, 9.0774e-11, 4.5199e-11,\n 5.0982e-11, 9.7453e-13, 2.7995e-12, 3.8589e-12, 5.8758e-12, 2.0089e-11,\n 3.0966e-12, 2.1989e-11, 1.0508e-11, 1.4288e-11, 1.3746e-10, 1.4013e-15,\n 1.1774e-11, 4.8419e-11, 5.0439e-12, 2.8029e-10, 1.3596e-11, 4.0133e-11,\n 7.6996e-11, 4.7961e-12, 1.3948e-13, 1.1657e-09, 6.4210e-12, 5.6273e-12,\n 8.6420e-11, 8.1251e-11, 3.8309e-13, 2.7230e-11, 2.0780e-10, 8.5418e-13,\n 1.1205e-10, 2.2843e-10, 3.1953e-11, 6.1977e-12, 4.7916e-15, 5.2943e-10,\n 7.1391e-12, 1.2784e-10, 8.3176e-12, 1.7169e-11, 1.5431e-12, 3.0409e-11,\n 3.9176e-11, 2.9241e-14, 4.8892e-12, 2.5186e-12, 1.6747e-11, 7.7124e-11,\n 2.2586e-11, 3.0467e-10, 5.8003e-11, 2.6686e-12, 9.5941e-14, 1.0678e-11,\n 5.9802e-12, 2.4934e-11, 1.3672e-12, 1.5035e-12, 2.5898e-11, 4.9198e-11,\n 1.2395e-10, 5.0263e-11, 5.6412e-12, 6.0251e-11, 1.2638e-11, 1.0800e-11,\n 6.6876e-11, 2.3196e-14, 2.9113e-10, 1.0428e-10, 2.4292e-11, 2.2975e-14,\n 4.1844e-13, 6.9453e-10, 7.5081e-13, 1.6763e-10, 3.5416e-11, 4.7824e-12,\n 5.5815e-11, 5.2864e-11, 1.2906e-10, 6.2596e-12], device='cuda:0')" + "exp_avg_sq": "tensor([6.7459e-11, 5.6995e-12, 6.4632e-12, 1.3986e-11, 1.6358e-12, 2.2088e-12,\n 5.6793e-12, 1.9008e-14, 1.1622e-12, 1.1149e-13, 6.4869e-11, 8.8477e-12,\n 1.0079e-11, 7.5010e-12, 2.5774e-12, 6.1835e-12, 2.2919e-11, 1.2666e-12,\n 3.9531e-12, 4.2664e-11, 9.6346e-16, 2.4999e-11, 9.7106e-12, 5.0563e-12,\n 1.0334e-12, 1.4048e-11, 5.3874e-12, 1.3409e-12, 1.0528e-12, 5.2779e-12,\n 4.0411e-11, 4.0571e-12, 6.6688e-13, 3.0584e-10, 1.7572e-11, 1.1262e-11,\n 2.2260e-12, 1.9514e-14, 1.9645e-11, 7.9935e-11, 8.7740e-12, 9.2396e-12,\n 1.1149e-12, 2.3251e-11, 4.8961e-11, 2.3934e-11, 3.1321e-11, 1.0294e-11,\n 1.6190e-12, 6.6987e-11, 1.7227e-13, 9.5156e-11, 5.4492e-11, 8.4844e-12,\n 4.3288e-13, 3.0550e-13, 3.9433e-11, 2.4703e-13, 1.5303e-11, 2.7578e-12,\n 5.7921e-12, 8.5241e-12, 6.7881e-11, 2.2954e-11, 2.6983e-13, 1.3843e-12,\n 1.5709e-11, 1.1586e-12, 1.6990e-11, 9.8805e-13, 3.4525e-13, 6.8430e-12,\n 1.2691e-11, 6.5315e-13, 4.7326e-12, 2.2113e-12, 2.9261e-12, 4.5239e-11,\n 4.1373e-13, 1.1756e-13, 3.3534e-12, 3.2552e-11, 3.1341e-14, 7.3233e-12,\n 1.1854e-12, 2.2646e-11, 1.6852e-11, 4.8089e-12, 6.7183e-12, 1.9280e-10,\n 1.9936e-12, 2.9210e-11, 5.4635e-11, 3.4997e-12, 1.9085e-10, 4.2505e-12,\n 3.3823e-12, 3.3439e-11, 1.4548e-11, 1.6775e-11, 6.1373e-11, 6.1503e-12,\n 7.8793e-13, 1.5211e-11, 9.0753e-14, 5.8048e-11, 3.4404e-12, 8.9438e-12,\n 1.7058e-12, 2.1672e-13, 5.5600e-11, 6.9412e-12, 1.5868e-10, 1.5820e-11,\n 7.3562e-13, 2.4806e-12, 4.6563e-12, 2.1678e-12, 3.8832e-12, 8.7645e-12,\n 2.2499e-11, 1.1659e-11, 8.6824e-13, 2.7668e-15, 1.0854e-13, 4.7279e-12,\n 3.4516e-11, 6.0411e-11, 1.8367e-12, 4.5985e-11, 1.3664e-14, 2.1881e-10,\n 7.6672e-12, 1.3320e-11, 1.4535e-11, 9.5560e-11, 1.8944e-11, 7.6995e-11,\n 1.7230e-11, 2.4545e-14, 9.7151e-13, 1.8169e-12, 5.3888e-12, 8.9275e-11,\n 1.0517e-11, 4.5763e-12, 3.7119e-12, 1.3196e-12, 9.9073e-11, 8.7216e-13,\n 2.1549e-12, 3.9805e-12, 7.5624e-12, 3.5470e-11, 5.8198e-12, 2.3056e-12,\n 3.6871e-12, 7.8449e-11, 1.6748e-12, 1.1149e-10, 1.9408e-11, 5.1643e-11,\n 1.9684e-12, 4.1386e-11, 5.6843e-11, 1.7232e-11, 2.5442e-10, 1.9788e-11,\n 4.9682e-11, 5.5388e-12, 3.5136e-13, 2.8608e-12, 2.5939e-11, 1.2916e-11,\n 1.4569e-11, 2.7848e-13, 7.9999e-13, 1.1027e-12, 1.6791e-12, 5.7407e-12,\n 8.8488e-13, 6.2834e-12, 3.0028e-12, 4.0829e-12, 3.9279e-11, 4.0042e-16,\n 3.3644e-12, 1.3836e-11, 1.4413e-12, 8.0094e-11, 3.8851e-12, 1.1468e-11,\n 2.2002e-11, 1.3705e-12, 3.9858e-14, 3.3310e-10, 1.8349e-12, 1.6081e-12,\n 2.4695e-11, 2.3218e-11, 1.0947e-13, 7.7812e-12, 5.9381e-11, 2.4409e-13,\n 3.2020e-11, 6.5276e-11, 9.1309e-12, 1.7710e-12, 1.3692e-15, 1.5129e-10,\n 2.0401e-12, 3.6533e-11, 2.3768e-12, 4.9061e-12, 4.4097e-13, 8.6896e-12,\n 1.1195e-11, 8.3558e-15, 1.3971e-12, 7.1971e-13, 4.7857e-12, 2.2039e-11,\n 6.4540e-12, 8.7061e-11, 1.6575e-11, 7.6258e-13, 2.7416e-14, 3.0512e-12,\n 1.7089e-12, 7.1250e-12, 3.9067e-13, 4.2965e-13, 7.4007e-12, 1.4059e-11,\n 3.5419e-11, 1.4363e-11, 1.6120e-12, 1.7217e-11, 3.6114e-12, 3.0862e-12,\n 1.9110e-11, 6.6285e-15, 8.3193e-11, 2.9799e-11, 6.9415e-12, 6.5653e-15,\n 1.1957e-13, 1.9847e-10, 2.1455e-13, 4.7901e-11, 1.0120e-11, 1.3666e-12,\n 1.5950e-11, 1.5106e-11, 3.6879e-11, 1.7887e-12], device='cuda:0')" }, "25": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.4265e-10, 3.0212e-11, 4.0765e-11, 5.5289e-11, 8.5776e-12, 1.2280e-11,\n 2.4862e-11, 7.8080e-14, 7.3564e-12, 6.8002e-13, 2.0306e-10, 4.5677e-11,\n 5.1027e-11, 5.0980e-11, 1.6319e-11, 3.9740e-11, 1.4519e-10, 6.1712e-12,\n 2.0181e-11, 2.9025e-10, 5.2942e-14, 1.1621e-10, 5.5338e-11, 2.9994e-11,\n 4.5531e-12, 8.7310e-11, 4.0065e-11, 8.2425e-12, 6.5946e-12, 2.2147e-11,\n 1.6741e-10, 3.4238e-11, 5.8441e-12, 9.5292e-10, 1.1681e-10, 8.1433e-11,\n 6.0592e-12, 5.1639e-14, 1.1207e-10, 4.4267e-10, 5.5458e-11, 4.6713e-11,\n 9.7270e-12, 5.4094e-11, 1.8771e-10, 1.4890e-10, 1.1069e-10, 6.8041e-11,\n 9.1984e-12, 1.9708e-10, 6.1039e-13, 4.5144e-10, 2.4894e-10, 5.5605e-11,\n 3.1983e-12, 2.6059e-12, 1.8749e-10, 2.3920e-12, 8.2471e-11, 2.1571e-11,\n 2.3275e-11, 4.8595e-11, 2.3680e-10, 1.2706e-10, 1.6388e-12, 9.1917e-12,\n 9.1630e-11, 8.5510e-12, 1.2267e-10, 4.2707e-12, 9.9833e-13, 5.6501e-11,\n 7.0037e-11, 2.4135e-12, 2.6429e-11, 8.7272e-12, 1.8807e-11, 2.5466e-10,\n 2.3747e-12, 3.6465e-13, 2.6136e-11, 1.9395e-10, 2.9274e-13, 2.1699e-11,\n 5.4411e-12, 1.3870e-10, 6.5897e-11, 3.3245e-11, 4.0413e-11, 7.6364e-10,\n 1.2573e-11, 1.8567e-10, 1.7401e-10, 2.5691e-11, 4.4432e-10, 2.3881e-11,\n 2.8973e-11, 2.0585e-10, 8.7235e-11, 1.0670e-10, 2.1822e-10, 1.9181e-11,\n 5.2261e-12, 5.9551e-11, 9.2023e-13, 3.7351e-10, 1.5057e-11, 6.0347e-11,\n 1.0925e-11, 1.3533e-12, 3.0698e-10, 3.2875e-11, 3.7894e-10, 9.3377e-11,\n 7.8337e-12, 1.6029e-11, 3.0287e-11, 1.5123e-11, 1.8403e-11, 4.7361e-11,\n 8.1076e-11, 4.8846e-11, 6.1148e-12, 6.3400e-15, 4.0194e-13, 3.0156e-11,\n 2.3402e-10, 1.6195e-10, 1.4564e-11, 1.4565e-10, 6.3462e-14, 9.5830e-10,\n 4.8729e-11, 8.5856e-11, 1.0933e-10, 3.0841e-10, 1.3009e-10, 3.1804e-10,\n 1.0899e-10, 1.8693e-13, 4.3121e-12, 1.1590e-11, 3.4595e-11, 3.3676e-10,\n 7.4044e-11, 2.5527e-11, 2.1033e-11, 8.4884e-12, 3.3721e-10, 3.4511e-12,\n 1.2265e-11, 2.7525e-11, 4.3966e-11, 1.0703e-10, 3.5519e-11, 1.1748e-11,\n 2.3389e-11, 3.1297e-10, 7.7766e-12, 5.1856e-10, 1.0745e-10, 1.4155e-10,\n 1.1620e-11, 1.9107e-10, 1.5184e-10, 8.4683e-11, 9.0645e-10, 1.2405e-10,\n 1.7090e-10, 2.5404e-11, 3.1714e-12, 1.9841e-11, 1.6683e-10, 6.9547e-11,\n 8.9782e-11, 2.2877e-12, 5.4094e-12, 7.8971e-12, 7.6062e-12, 5.3234e-11,\n 4.5422e-12, 2.3344e-11, 1.7005e-11, 2.5748e-11, 2.6816e-10, 2.3603e-14,\n 2.1114e-11, 5.8993e-11, 1.0038e-11, 4.2875e-10, 3.6536e-11, 4.5192e-11,\n 7.9710e-11, 7.6203e-12, 3.6740e-13, 1.1842e-09, 8.3702e-12, 9.6403e-12,\n 1.0096e-10, 1.9400e-10, 9.2688e-13, 7.1295e-11, 1.9138e-10, 1.6145e-12,\n 1.5139e-10, 4.2722e-10, 6.3078e-11, 1.0156e-11, 8.4990e-15, 5.9950e-10,\n 6.6542e-12, 2.3953e-10, 1.2381e-11, 3.8197e-11, 2.0847e-12, 5.3041e-11,\n 6.9748e-11, 2.8708e-13, 9.6897e-12, 4.6694e-12, 1.3999e-11, 1.1688e-10,\n 2.6434e-11, 3.9690e-10, 7.9003e-11, 5.4838e-12, 7.6108e-14, 9.4221e-12,\n 7.9402e-12, 4.0666e-11, 2.9221e-12, 1.6195e-12, 6.7952e-11, 6.2351e-11,\n 2.1121e-10, 7.2599e-11, 8.3638e-12, 1.1610e-10, 2.3893e-11, 2.0477e-11,\n 1.1678e-10, 5.0899e-14, 1.6209e-10, 1.4210e-10, 4.3411e-11, 1.7286e-13,\n 8.6543e-13, 5.0095e-10, 1.2069e-12, 2.1956e-10, 5.6707e-11, 7.3629e-12,\n 7.5372e-11, 7.6170e-11, 1.9247e-10, 1.1305e-11], device='cuda:0')" + "exp_avg_sq": "tensor([9.7916e-11, 8.6332e-12, 1.1649e-11, 1.5799e-11, 2.4511e-12, 3.5090e-12,\n 7.1045e-12, 2.2312e-14, 2.1021e-12, 1.9432e-13, 5.8027e-11, 1.3053e-11,\n 1.4581e-11, 1.4568e-11, 4.6632e-12, 1.1356e-11, 4.1490e-11, 1.7635e-12,\n 5.7668e-12, 8.2942e-11, 1.5129e-14, 3.3209e-11, 1.5813e-11, 8.5709e-12,\n 1.3011e-12, 2.4950e-11, 1.1449e-11, 2.3554e-12, 1.8845e-12, 6.3287e-12,\n 4.7840e-11, 9.7838e-12, 1.6700e-12, 2.7230e-10, 3.3379e-11, 2.3270e-11,\n 1.7315e-12, 1.4756e-14, 3.2026e-11, 1.2650e-10, 1.5848e-11, 1.3349e-11,\n 2.7796e-12, 1.5458e-11, 5.3639e-11, 4.2551e-11, 3.1631e-11, 1.9443e-11,\n 2.6285e-12, 5.6316e-11, 1.7442e-13, 1.2900e-10, 7.1136e-11, 1.5890e-11,\n 9.1393e-13, 7.4466e-13, 5.3577e-11, 6.8353e-13, 2.3567e-11, 6.1642e-12,\n 6.6510e-12, 1.3886e-11, 6.7668e-11, 3.6309e-11, 4.6829e-13, 2.6266e-12,\n 2.6184e-11, 2.4435e-12, 3.5054e-11, 1.2204e-12, 2.8528e-13, 1.6146e-11,\n 2.0014e-11, 6.8967e-13, 7.5523e-12, 2.4939e-12, 5.3743e-12, 7.2772e-11,\n 6.7860e-13, 1.0420e-13, 7.4685e-12, 5.5422e-11, 8.3653e-14, 6.2007e-12,\n 1.5548e-12, 3.9635e-11, 1.8831e-11, 9.5001e-12, 1.1548e-11, 2.1822e-10,\n 3.5930e-12, 5.3056e-11, 4.9723e-11, 7.3413e-12, 1.2697e-10, 6.8243e-12,\n 8.2792e-12, 5.8824e-11, 2.4928e-11, 3.0490e-11, 6.2359e-11, 5.4812e-12,\n 1.4934e-12, 1.7017e-11, 2.6296e-13, 1.0673e-10, 4.3026e-12, 1.7245e-11,\n 3.1220e-12, 3.8673e-13, 8.7722e-11, 9.3942e-12, 1.0829e-10, 2.6683e-11,\n 2.2385e-12, 4.5805e-12, 8.6547e-12, 4.3216e-12, 5.2588e-12, 1.3534e-11,\n 2.3168e-11, 1.3958e-11, 1.7474e-12, 1.8117e-15, 1.1486e-13, 8.6173e-12,\n 6.6872e-11, 4.6279e-11, 4.1619e-12, 4.1620e-11, 1.8135e-14, 2.7384e-10,\n 1.3925e-11, 2.4534e-11, 3.1241e-11, 8.8130e-11, 3.7173e-11, 9.0881e-11,\n 3.1144e-11, 5.3417e-14, 1.2322e-12, 3.3120e-12, 9.8857e-12, 9.6233e-11,\n 2.1159e-11, 7.2945e-12, 6.0104e-12, 2.4256e-12, 9.6361e-11, 9.8618e-13,\n 3.5047e-12, 7.8655e-12, 1.2564e-11, 3.0585e-11, 1.0150e-11, 3.3570e-12,\n 6.6837e-12, 8.9433e-11, 2.2222e-12, 1.4818e-10, 3.0704e-11, 4.0448e-11,\n 3.3205e-12, 5.4601e-11, 4.3388e-11, 2.4199e-11, 2.5903e-10, 3.5448e-11,\n 4.8836e-11, 7.2593e-12, 9.0626e-13, 5.6698e-12, 4.7673e-11, 1.9874e-11,\n 2.5656e-11, 6.5372e-13, 1.5458e-12, 2.2567e-12, 2.1735e-12, 1.5212e-11,\n 1.2980e-12, 6.6707e-12, 4.8594e-12, 7.3577e-12, 7.6629e-11, 6.7447e-15,\n 6.0334e-12, 1.6858e-11, 2.8684e-12, 1.2252e-10, 1.0440e-11, 1.2914e-11,\n 2.2778e-11, 2.1776e-12, 1.0499e-13, 3.3840e-10, 2.3918e-12, 2.7548e-12,\n 2.8851e-11, 5.5438e-11, 2.6486e-13, 2.0373e-11, 5.4687e-11, 4.6136e-13,\n 4.3260e-11, 1.2208e-10, 1.8025e-11, 2.9023e-12, 2.4287e-15, 1.7131e-10,\n 1.9015e-12, 6.8448e-11, 3.5380e-12, 1.0915e-11, 5.9572e-13, 1.5157e-11,\n 1.9931e-11, 8.2035e-14, 2.7689e-12, 1.3343e-12, 4.0005e-12, 3.3400e-11,\n 7.5538e-12, 1.1342e-10, 2.2576e-11, 1.5670e-12, 2.1748e-14, 2.6924e-12,\n 2.2690e-12, 1.1621e-11, 8.3502e-13, 4.6279e-13, 1.9418e-11, 1.7817e-11,\n 6.0354e-11, 2.0746e-11, 2.3900e-12, 3.3176e-11, 6.8275e-12, 5.8514e-12,\n 3.3369e-11, 1.4545e-14, 4.6317e-11, 4.0607e-11, 1.2405e-11, 4.9396e-14,\n 2.4730e-13, 1.4315e-10, 3.4487e-13, 6.2741e-11, 1.6204e-11, 2.1040e-12,\n 2.1538e-11, 2.1766e-11, 5.4999e-11, 3.2305e-12], device='cuda:0')" }, "26": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.6523e-11, 2.2839e-11, 0.0000e+00, ..., 4.8343e-11, 7.7953e-11,\n 9.5223e-12],\n [1.5275e-12, 2.1236e-12, 0.0000e+00, ..., 5.9349e-14, 1.9132e-12,\n 6.1098e-14],\n [1.2346e-11, 9.6482e-12, 0.0000e+00, ..., 9.9309e-12, 1.0726e-11,\n 6.9763e-12],\n ...,\n [6.1480e-12, 5.0524e-12, 0.0000e+00, ..., 3.5641e-12, 3.4809e-11,\n 1.0046e-12],\n [3.6078e-11, 1.1165e-11, 0.0000e+00, ..., 9.6220e-12, 7.8859e-11,\n 1.7464e-11],\n [7.2961e-12, 1.2827e-12, 0.0000e+00, ..., 1.7698e-12, 1.5203e-11,\n 1.5983e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[4.7214e-12, 6.5265e-12, 0.0000e+00, ..., 1.3814e-11, 2.2276e-11,\n 2.7211e-12],\n [4.3650e-13, 6.0684e-13, 0.0000e+00, ..., 1.6960e-14, 5.4672e-13,\n 1.7459e-14],\n [3.5281e-12, 2.7571e-12, 0.0000e+00, ..., 2.8378e-12, 3.0650e-12,\n 1.9935e-12],\n ...,\n [1.7568e-12, 1.4438e-12, 0.0000e+00, ..., 1.0185e-12, 9.9470e-12,\n 2.8708e-13],\n [1.0310e-11, 3.1905e-12, 0.0000e+00, ..., 2.7496e-12, 2.2534e-11,\n 4.9905e-12],\n [2.0849e-12, 3.6655e-13, 0.0000e+00, ..., 5.0572e-13, 4.3443e-12,\n 4.5673e-14]], device='cuda:0')" }, "27": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.8002e-08, 1.2558e-09, 9.9677e-09, 1.2952e-08, 3.9440e-09, 9.9414e-10,\n 1.3083e-08, 3.1839e-10, 5.8711e-09, 8.9604e-10, 2.9766e-08, 1.0302e-07,\n 1.7539e-08, 2.0625e-08, 2.1519e-08, 8.9075e-08, 9.9828e-09, 9.8969e-09,\n 3.3406e-08, 3.8840e-08, 1.2455e-10, 4.1729e-09, 8.4165e-09, 1.0435e-08,\n 9.6646e-09, 1.7917e-08, 1.4243e-08, 2.7976e-09, 4.0881e-10, 4.6564e-09,\n 2.4745e-08, 1.6348e-08, 2.5635e-09, 1.2574e-07, 4.5543e-08, 2.7914e-08,\n 8.3039e-10, 1.5738e-11, 1.1709e-08, 3.7811e-07, 5.6406e-08, 7.9301e-09,\n 2.4858e-09, 1.5558e-09, 6.5213e-08, 1.1484e-07, 1.7889e-08, 3.9629e-08,\n 4.9369e-09, 3.1725e-08, 6.1180e-10, 6.7542e-08, 1.7221e-08, 1.5514e-08,\n 2.4656e-09, 2.1507e-10, 2.9410e-08, 1.0914e-09, 1.9547e-08, 1.7106e-09,\n 1.0045e-08, 1.9230e-08, 4.8478e-08, 2.5836e-08, 7.2870e-10, 1.8030e-09,\n 1.9384e-08, 1.2619e-09, 1.2291e-08, 2.3355e-09, 4.2637e-11, 1.1912e-08,\n 5.0345e-08, 3.6976e-09, 1.0193e-08, 6.8137e-11, 6.6193e-10, 4.1121e-08,\n 2.6537e-09, 8.7627e-13, 2.4340e-08, 2.9292e-08, 4.1947e-11, 2.4865e-09,\n 1.7354e-08, 7.4127e-08, 5.9587e-09, 2.5594e-08, 5.0701e-09, 4.2279e-08,\n 2.9941e-08, 1.8416e-08, 4.6809e-08, 8.5408e-09, 1.5082e-08, 2.3117e-09,\n 7.8632e-09, 5.3833e-08, 1.3062e-08, 1.0872e-07, 9.6308e-08, 2.3564e-09,\n 4.1886e-11, 1.2935e-08, 7.2694e-10, 8.5288e-08, 6.8354e-09, 6.2425e-09,\n 1.8601e-08, 1.1600e-10, 9.2201e-09, 6.0165e-11, 6.0091e-08, 5.8863e-08,\n 4.6401e-09, 5.1457e-08, 4.3939e-09, 8.3924e-09, 1.2434e-08, 1.2771e-08,\n 4.4508e-09, 1.0127e-09, 1.8261e-09, 2.4225e-12, 2.3730e-10, 1.1429e-08,\n 9.7132e-08, 1.6043e-08, 4.0566e-08, 1.8446e-08, 1.0186e-10, 1.1705e-07,\n 1.8872e-08, 1.7443e-08, 4.2597e-08, 2.4855e-08, 2.7699e-08, 3.7168e-08,\n 2.5247e-08, 9.7272e-11, 7.3678e-10, 1.3575e-09, 9.9954e-09, 1.2719e-07,\n 2.4302e-08, 1.4846e-09, 2.2919e-08, 2.7686e-09, 2.3400e-08, 5.7885e-10,\n 1.1277e-09, 3.9652e-08, 2.8927e-08, 3.6201e-08, 7.6870e-09, 1.2959e-09,\n 2.3050e-08, 6.8239e-08, 1.1971e-08, 1.5084e-08, 1.8636e-08, 2.1397e-08,\n 1.1616e-08, 1.5510e-08, 1.4522e-08, 1.3669e-08, 9.8119e-08, 7.5573e-08,\n 1.1031e-08, 4.1622e-08, 4.1439e-09, 2.9903e-09, 6.1755e-08, 5.8756e-09,\n 4.4103e-08, 3.2804e-09, 1.2622e-09, 3.9822e-09, 4.2587e-09, 4.7354e-09,\n 5.5817e-09, 4.8043e-09, 1.7500e-09, 1.2467e-08, 1.9183e-07, 2.9304e-11,\n 9.5105e-09, 7.9750e-09, 5.6301e-09, 5.7538e-08, 1.6915e-08, 2.0743e-09,\n 8.5543e-09, 1.1794e-08, 8.9580e-11, 1.0450e-07, 5.4986e-09, 8.3739e-10,\n 2.6117e-08, 1.1589e-07, 9.7045e-14, 1.1333e-08, 5.6249e-08, 1.2518e-08,\n 1.0453e-08, 8.1609e-08, 2.0183e-08, 4.3484e-09, 4.3344e-12, 9.7874e-08,\n 1.9881e-09, 9.8071e-08, 6.0255e-11, 2.1747e-08, 3.1212e-10, 2.6130e-08,\n 3.6780e-08, 6.2525e-13, 3.1092e-09, 4.3233e-09, 3.6802e-09, 1.1969e-07,\n 6.3275e-09, 2.5105e-08, 5.6523e-09, 2.3257e-10, 1.2948e-10, 4.6126e-10,\n 4.2289e-09, 8.4262e-09, 7.0643e-10, 1.0365e-10, 4.4089e-08, 3.1469e-09,\n 2.3133e-09, 1.6132e-08, 1.3722e-08, 2.0191e-08, 8.3504e-09, 6.4792e-09,\n 2.1896e-09, 4.9600e-12, 1.6624e-08, 1.0780e-08, 4.8227e-09, 1.5405e-10,\n 9.9534e-11, 8.2353e-08, 3.8264e-09, 9.4948e-08, 3.0183e-08, 3.2947e-09,\n 4.5319e-08, 1.2809e-08, 2.6757e-08, 5.0855e-09], device='cuda:0')" + "exp_avg_sq": "tensor([8.0017e-09, 3.5886e-10, 2.8484e-09, 3.7011e-09, 1.1270e-09, 2.8408e-10,\n 3.7384e-09, 9.0982e-11, 1.6777e-09, 2.5605e-10, 8.5059e-09, 2.9439e-08,\n 5.0118e-09, 5.8939e-09, 6.1493e-09, 2.5454e-08, 2.8527e-09, 2.8281e-09,\n 9.5461e-09, 1.1099e-08, 3.5591e-11, 1.1925e-09, 2.4051e-09, 2.9819e-09,\n 2.7617e-09, 5.1198e-09, 4.0701e-09, 7.9944e-10, 1.1682e-10, 1.3306e-09,\n 7.0711e-09, 4.6717e-09, 7.3253e-10, 3.5931e-08, 1.3014e-08, 7.9766e-09,\n 2.3729e-10, 4.4974e-12, 3.3460e-09, 1.0805e-07, 1.6118e-08, 2.2661e-09,\n 7.1033e-10, 4.4458e-10, 1.8635e-08, 3.2817e-08, 5.1118e-09, 1.1324e-08,\n 1.4108e-09, 9.0657e-09, 1.7483e-10, 1.9301e-08, 4.9210e-09, 4.4334e-09,\n 7.0456e-10, 6.1458e-11, 8.4043e-09, 3.1188e-10, 5.5857e-09, 4.8882e-10,\n 2.8705e-09, 5.4953e-09, 1.3853e-08, 7.3829e-09, 2.0823e-10, 5.1522e-10,\n 5.5390e-09, 3.6060e-10, 3.5124e-09, 6.6737e-10, 1.2184e-11, 3.4039e-09,\n 1.4386e-08, 1.0566e-09, 2.9127e-09, 1.9471e-11, 1.8915e-10, 1.1751e-08,\n 7.5831e-10, 2.5040e-13, 6.9552e-09, 8.3705e-09, 1.1987e-11, 7.1052e-10,\n 4.9591e-09, 2.1182e-08, 1.7027e-09, 7.3138e-09, 1.4488e-09, 1.2081e-08,\n 8.5557e-09, 5.2625e-09, 1.3376e-08, 2.4406e-09, 4.3098e-09, 6.6058e-10,\n 2.2470e-09, 1.5383e-08, 3.7325e-09, 3.1067e-08, 2.7521e-08, 6.7337e-10,\n 1.1969e-11, 3.6962e-09, 2.0773e-10, 2.4372e-08, 1.9533e-09, 1.7839e-09,\n 5.3154e-09, 3.3147e-11, 2.6347e-09, 1.7193e-11, 1.7172e-08, 1.6821e-08,\n 1.3259e-09, 1.4704e-08, 1.2556e-09, 2.3982e-09, 3.5531e-09, 3.6494e-09,\n 1.2719e-09, 2.8937e-10, 5.2182e-10, 6.9224e-13, 6.7809e-11, 3.2660e-09,\n 2.7756e-08, 4.5843e-09, 1.1592e-08, 5.2709e-09, 2.9109e-11, 3.3448e-08,\n 5.3927e-09, 4.9845e-09, 1.2172e-08, 7.1024e-09, 7.9153e-09, 1.0621e-08,\n 7.2145e-09, 2.7796e-11, 2.1054e-10, 3.8792e-10, 2.8563e-09, 3.6347e-08,\n 6.9445e-09, 4.2423e-10, 6.5492e-09, 7.9116e-10, 6.6866e-09, 1.6541e-10,\n 3.2224e-10, 1.1331e-08, 8.2662e-09, 1.0345e-08, 2.1966e-09, 3.7033e-10,\n 6.5868e-09, 1.9500e-08, 3.4209e-09, 4.3105e-09, 5.3255e-09, 6.1143e-09,\n 3.3194e-09, 4.4322e-09, 4.1498e-09, 3.9059e-09, 2.8038e-08, 2.1595e-08,\n 3.1523e-09, 1.1894e-08, 1.1842e-09, 8.5449e-10, 1.7647e-08, 1.6790e-09,\n 1.2603e-08, 9.3739e-10, 3.6067e-10, 1.1379e-09, 1.2170e-09, 1.3532e-09,\n 1.5950e-09, 1.3729e-09, 5.0009e-10, 3.5625e-09, 5.4818e-08, 8.3738e-12,\n 2.7177e-09, 2.2789e-09, 1.6088e-09, 1.6442e-08, 4.8335e-09, 5.9276e-10,\n 2.4445e-09, 3.3702e-09, 2.5598e-11, 2.9862e-08, 1.5713e-09, 2.3929e-10,\n 7.4631e-09, 3.3116e-08, 2.7731e-14, 3.2384e-09, 1.6074e-08, 3.5772e-09,\n 2.9871e-09, 2.3320e-08, 5.7675e-09, 1.2426e-09, 1.2386e-12, 2.7968e-08,\n 5.6812e-10, 2.8025e-08, 1.7218e-11, 6.2144e-09, 8.9191e-11, 7.4668e-09,\n 1.0510e-08, 1.7867e-13, 8.8848e-10, 1.2354e-09, 1.0516e-09, 3.4202e-08,\n 1.8081e-09, 7.1740e-09, 1.6152e-09, 6.6459e-11, 3.6999e-11, 1.3181e-10,\n 1.2085e-09, 2.4079e-09, 2.0187e-10, 2.9620e-11, 1.2599e-08, 8.9925e-10,\n 6.6103e-10, 4.6097e-09, 3.9212e-09, 5.7696e-09, 2.3862e-09, 1.8515e-09,\n 6.2569e-10, 1.4174e-12, 4.7505e-09, 3.0804e-09, 1.3781e-09, 4.4022e-11,\n 2.8443e-11, 2.3533e-08, 1.0934e-09, 2.7132e-08, 8.6251e-09, 9.4150e-10,\n 1.2950e-08, 3.6602e-09, 7.6460e-09, 1.4532e-09], device='cuda:0')" }, "28": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.0103e-11, 6.2154e-12, 1.8249e-11, 4.2320e-11, 7.5490e-12, 4.6646e-12,\n 2.6931e-11, 1.7845e-12, 9.1293e-12, 1.1108e-12, 5.5828e-11, 2.7234e-10,\n 3.8285e-11, 5.1594e-11, 8.2755e-11, 3.6930e-10, 3.0121e-11, 2.4332e-11,\n 1.1263e-10, 7.7013e-11, 1.3621e-14, 1.1050e-11, 3.1324e-11, 2.1238e-11,\n 3.4462e-11, 4.3214e-11, 2.9650e-11, 8.8257e-12, 1.0504e-12, 8.1513e-12,\n 6.6982e-11, 3.8900e-11, 6.0331e-12, 3.1551e-10, 1.1914e-10, 8.4267e-11,\n 1.7790e-12, 6.7210e-15, 2.8409e-11, 2.3114e-09, 1.2657e-10, 2.5368e-11,\n 8.3013e-12, 4.6423e-12, 2.2076e-10, 5.0676e-10, 5.5228e-11, 1.8052e-10,\n 5.3616e-12, 6.5799e-11, 6.0960e-13, 1.7059e-10, 3.3054e-11, 3.4281e-11,\n 3.4894e-12, 8.1512e-13, 9.4252e-11, 9.2770e-13, 7.9857e-11, 5.9414e-12,\n 2.3268e-11, 1.3808e-10, 2.3114e-10, 9.0488e-11, 3.4094e-13, 2.0861e-12,\n 5.2482e-11, 1.1142e-12, 2.0124e-11, 7.5775e-12, 1.6710e-12, 3.5926e-11,\n 1.1706e-10, 8.7250e-12, 2.4436e-11, 7.1509e-13, 2.3790e-12, 1.1794e-10,\n 3.7635e-12, 6.7018e-14, 9.0448e-11, 6.3031e-11, 7.3212e-14, 1.2079e-11,\n 6.8769e-11, 2.1570e-10, 1.7254e-11, 1.0922e-10, 1.2408e-11, 9.6877e-11,\n 1.6967e-10, 4.6125e-11, 1.1565e-10, 1.2507e-11, 3.2623e-11, 2.2587e-12,\n 1.5874e-11, 9.5619e-11, 3.0741e-11, 3.0424e-10, 6.3055e-10, 1.0752e-11,\n 2.4050e-13, 2.9488e-11, 3.8529e-13, 1.8861e-10, 2.0842e-11, 1.7004e-11,\n 5.5658e-11, 4.8095e-16, 2.4236e-11, 1.2164e-12, 1.8386e-10, 1.1601e-10,\n 6.4813e-12, 1.3548e-10, 1.4700e-11, 1.6099e-11, 3.1189e-11, 3.3599e-11,\n 5.8390e-12, 4.1564e-12, 3.3805e-12, 1.4334e-14, 9.4504e-13, 2.1549e-11,\n 2.4737e-10, 4.9821e-11, 2.0644e-10, 4.1812e-11, 2.1978e-14, 3.0566e-10,\n 8.7576e-11, 3.3899e-11, 1.3279e-10, 4.5112e-11, 4.7403e-11, 6.3141e-11,\n 5.4405e-11, 5.3466e-15, 3.6731e-12, 8.2674e-12, 1.1890e-11, 3.6325e-10,\n 6.2392e-11, 4.3899e-12, 1.0395e-10, 7.2831e-12, 4.1386e-11, 1.1723e-13,\n 3.0863e-12, 1.0303e-10, 1.7837e-10, 1.4669e-10, 7.7417e-12, 1.1120e-11,\n 1.0917e-10, 1.8194e-10, 2.6319e-11, 3.2440e-11, 2.5378e-11, 3.5772e-11,\n 5.6969e-11, 2.8598e-11, 4.5177e-11, 3.1989e-11, 1.7773e-10, 2.4926e-10,\n 2.2696e-11, 9.8017e-11, 7.6389e-12, 4.3522e-12, 1.3552e-10, 1.1560e-11,\n 1.2898e-10, 1.9743e-11, 2.7364e-12, 9.2525e-12, 8.3060e-12, 1.6641e-11,\n 7.5195e-12, 1.0479e-11, 1.9730e-12, 2.3169e-11, 5.8990e-10, 1.9942e-13,\n 1.5106e-11, 1.4495e-11, 6.2707e-12, 1.2622e-10, 2.4988e-11, 3.4482e-12,\n 3.0059e-11, 5.4250e-11, 3.7490e-13, 2.3208e-10, 1.8594e-11, 1.2860e-12,\n 1.2123e-10, 2.8710e-10, 3.8324e-14, 2.5816e-11, 2.0515e-10, 4.1194e-11,\n 3.8656e-11, 1.6229e-10, 3.9285e-11, 1.1292e-11, 1.5844e-14, 2.0926e-10,\n 3.1335e-12, 2.6612e-10, 6.9423e-13, 5.8171e-11, 3.2665e-13, 5.8957e-11,\n 5.1828e-11, 2.8132e-13, 1.0423e-11, 7.6097e-12, 9.3058e-12, 5.4207e-10,\n 2.2946e-11, 6.5704e-11, 1.5700e-11, 2.5184e-12, 3.5781e-15, 2.6457e-12,\n 5.8231e-12, 1.9231e-11, 2.7204e-12, 1.9752e-15, 1.6429e-10, 1.0628e-11,\n 8.4059e-12, 5.8283e-11, 4.9868e-11, 4.1743e-11, 3.7634e-11, 1.4522e-11,\n 1.1301e-11, 1.0126e-13, 5.5800e-11, 1.8047e-11, 8.7960e-12, 1.5485e-13,\n 1.7935e-13, 2.7379e-10, 8.7622e-12, 3.1001e-10, 5.1382e-11, 5.6802e-12,\n 1.8443e-10, 2.3225e-11, 4.8267e-11, 3.2875e-11], device='cuda:0')" + "exp_avg_sq": "tensor([1.4317e-11, 1.7761e-12, 5.2149e-12, 1.2093e-11, 2.1572e-12, 1.3330e-12,\n 7.6957e-12, 5.0994e-13, 2.6088e-12, 3.1743e-13, 1.5953e-11, 7.7825e-11,\n 1.0940e-11, 1.4743e-11, 2.3648e-11, 1.0553e-10, 8.6074e-12, 6.9530e-12,\n 3.2184e-11, 2.2007e-11, 3.8922e-15, 3.1577e-12, 8.9512e-12, 6.0688e-12,\n 9.8478e-12, 1.2349e-11, 8.4726e-12, 2.5220e-12, 3.0015e-13, 2.3293e-12,\n 1.9141e-11, 1.1116e-11, 1.7240e-12, 9.0160e-11, 3.4044e-11, 2.4080e-11,\n 5.0835e-13, 1.9206e-15, 8.1180e-12, 6.6052e-10, 3.6168e-11, 7.2491e-12,\n 2.3722e-12, 1.3266e-12, 6.3084e-11, 1.4481e-10, 1.5782e-11, 5.1584e-11,\n 1.5321e-12, 1.8803e-11, 1.7420e-13, 4.8747e-11, 9.4453e-12, 9.7962e-12,\n 9.9713e-13, 2.3293e-13, 2.6933e-11, 2.6510e-13, 2.2820e-11, 1.6978e-12,\n 6.6490e-12, 3.9456e-11, 6.6051e-11, 2.5858e-11, 9.7426e-14, 5.9612e-13,\n 1.4997e-11, 3.1839e-13, 5.7506e-12, 2.1653e-12, 4.7749e-13, 1.0266e-11,\n 3.3451e-11, 2.4932e-12, 6.9827e-12, 2.0434e-13, 6.7983e-13, 3.3701e-11,\n 1.0754e-12, 1.9151e-14, 2.5846e-11, 1.8012e-11, 2.0921e-14, 3.4518e-12,\n 1.9651e-11, 6.1637e-11, 4.9304e-12, 3.1211e-11, 3.5457e-12, 2.7683e-11,\n 4.8484e-11, 1.3181e-11, 3.3047e-11, 3.5740e-12, 9.3222e-12, 6.4545e-13,\n 4.5360e-12, 2.7324e-11, 8.7845e-12, 8.6939e-11, 1.8019e-10, 3.0724e-12,\n 6.8726e-14, 8.4265e-12, 1.1010e-13, 5.3897e-11, 5.9558e-12, 4.8591e-12,\n 1.5905e-11, 1.3743e-16, 6.9257e-12, 3.4760e-13, 5.2540e-11, 3.3151e-11,\n 1.8521e-12, 3.8715e-11, 4.2007e-12, 4.6004e-12, 8.9126e-12, 9.6011e-12,\n 1.6685e-12, 1.1877e-12, 9.6600e-13, 4.0961e-15, 2.7005e-13, 6.1579e-12,\n 7.0688e-11, 1.4237e-11, 5.8992e-11, 1.1948e-11, 6.2804e-15, 8.7344e-11,\n 2.5026e-11, 9.6870e-12, 3.7946e-11, 1.2891e-11, 1.3546e-11, 1.8043e-11,\n 1.5547e-11, 1.5278e-15, 1.0496e-12, 2.3625e-12, 3.3976e-12, 1.0380e-10,\n 1.7829e-11, 1.2544e-12, 2.9706e-11, 2.0812e-12, 1.1826e-11, 3.3501e-14,\n 8.8193e-13, 2.9442e-11, 5.0972e-11, 4.1918e-11, 2.2123e-12, 3.1777e-12,\n 3.1197e-11, 5.1991e-11, 7.5208e-12, 9.2699e-12, 7.2519e-12, 1.0222e-11,\n 1.6279e-11, 8.1721e-12, 1.2910e-11, 9.1411e-12, 5.0787e-11, 7.1228e-11,\n 6.4857e-12, 2.8009e-11, 2.1829e-12, 1.2437e-12, 3.8725e-11, 3.3033e-12,\n 3.6856e-11, 5.6416e-12, 7.8195e-13, 2.6440e-12, 2.3735e-12, 4.7552e-12,\n 2.1488e-12, 2.9943e-12, 5.6379e-13, 6.6208e-12, 1.6857e-10, 5.6987e-14,\n 4.3166e-12, 4.1422e-12, 1.7919e-12, 3.6067e-11, 7.1404e-12, 9.8535e-13,\n 8.5896e-12, 1.5502e-11, 1.0713e-13, 6.6319e-11, 5.3133e-12, 3.6748e-13,\n 3.4643e-11, 8.2040e-11, 1.0951e-14, 7.3772e-12, 5.8623e-11, 1.1771e-11,\n 1.1046e-11, 4.6375e-11, 1.1226e-11, 3.2267e-12, 4.5276e-15, 5.9797e-11,\n 8.9542e-13, 7.6046e-11, 1.9838e-13, 1.6623e-11, 9.3342e-14, 1.6848e-11,\n 1.4810e-11, 8.0388e-14, 2.9784e-12, 2.1745e-12, 2.6592e-12, 1.5490e-10,\n 6.5571e-12, 1.8776e-11, 4.4865e-12, 7.1965e-13, 1.0225e-15, 7.5602e-13,\n 1.6640e-12, 5.4954e-12, 7.7738e-13, 5.6442e-16, 4.6946e-11, 3.0372e-12,\n 2.4021e-12, 1.6655e-11, 1.4250e-11, 1.1929e-11, 1.0754e-11, 4.1497e-12,\n 3.2295e-12, 2.8935e-14, 1.5945e-11, 5.1569e-12, 2.5135e-12, 4.4248e-14,\n 5.1250e-14, 7.8238e-11, 2.5039e-12, 8.8587e-11, 1.4683e-11, 1.6232e-12,\n 5.2703e-11, 6.6369e-12, 1.3793e-11, 9.3942e-12], device='cuda:0')" }, "29": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.1175e-10, 6.1407e-12, 3.3378e-11, 5.6844e-11, 9.1420e-12, 6.3499e-12,\n 4.0327e-11, 2.8890e-12, 1.6648e-11, 1.8200e-12, 9.8243e-11, 3.7597e-10,\n 5.8565e-11, 8.3940e-11, 9.7887e-11, 3.8261e-10, 5.1423e-11, 3.3398e-11,\n 1.0983e-10, 1.6758e-10, 1.8884e-15, 1.9684e-11, 4.0781e-11, 4.8221e-11,\n 4.7061e-11, 5.9480e-11, 6.3597e-11, 1.4986e-11, 2.5902e-12, 1.4524e-11,\n 1.1187e-10, 7.0319e-11, 1.3500e-11, 4.5642e-10, 1.9698e-10, 1.2633e-10,\n 1.4273e-12, 3.0880e-14, 5.2089e-11, 1.3758e-09, 1.9682e-10, 3.9989e-11,\n 1.3496e-11, 9.1634e-12, 2.2685e-10, 4.0273e-10, 8.2201e-11, 1.7915e-10,\n 1.3391e-11, 1.1071e-10, 1.0643e-12, 2.8714e-10, 5.6662e-11, 5.3231e-11,\n 6.3519e-12, 2.0968e-12, 1.2804e-10, 2.8405e-12, 9.2080e-11, 9.6447e-12,\n 2.9401e-11, 9.4403e-11, 2.1007e-10, 1.1503e-10, 1.2016e-12, 5.1792e-12,\n 8.7571e-11, 2.7338e-12, 5.4149e-11, 1.1532e-11, 1.0791e-12, 5.7065e-11,\n 1.6817e-10, 2.0037e-11, 3.1307e-11, 6.0083e-13, 3.8449e-12, 1.7947e-10,\n 5.5588e-12, 7.7973e-14, 1.0776e-10, 1.3110e-10, 2.8111e-13, 1.4070e-11,\n 8.2813e-11, 3.2079e-10, 2.7051e-11, 1.1155e-10, 1.7438e-11, 1.8643e-10,\n 1.3818e-10, 8.2095e-11, 1.6401e-10, 2.7676e-11, 6.6159e-11, 6.7613e-12,\n 2.5738e-11, 1.9841e-10, 6.2294e-11, 4.3802e-10, 4.1583e-10, 1.4146e-11,\n 4.1309e-13, 3.9840e-11, 5.7572e-13, 3.1159e-10, 1.7532e-11, 3.0061e-11,\n 5.9316e-11, 1.1653e-14, 3.2842e-11, 3.3119e-13, 2.5079e-10, 2.0994e-10,\n 1.4808e-11, 1.7953e-10, 2.1373e-11, 2.6550e-11, 5.6491e-11, 5.3923e-11,\n 1.4152e-11, 3.3621e-12, 3.8950e-12, 3.7379e-14, 2.2298e-12, 3.6134e-11,\n 4.1212e-10, 7.4117e-11, 1.7857e-10, 5.9771e-11, 1.3366e-15, 4.7123e-10,\n 8.9461e-11, 7.8504e-11, 1.4510e-10, 8.5226e-11, 1.2061e-10, 1.3687e-10,\n 1.1076e-10, 1.1503e-14, 5.4741e-12, 9.6672e-12, 3.4396e-11, 4.4545e-10,\n 1.0231e-10, 8.3022e-12, 1.0472e-10, 1.4587e-11, 7.8876e-11, 9.5617e-13,\n 6.7576e-12, 1.3398e-10, 1.3141e-10, 1.2017e-10, 2.3463e-11, 9.4206e-12,\n 7.0559e-11, 2.4921e-10, 3.7013e-11, 6.9580e-11, 6.6123e-11, 7.2590e-11,\n 3.1459e-11, 6.6347e-11, 6.5372e-11, 4.4564e-11, 3.6050e-10, 2.6163e-10,\n 5.3350e-11, 1.8586e-10, 1.0327e-11, 1.0183e-11, 2.2064e-10, 2.7180e-11,\n 1.4784e-10, 2.0191e-11, 3.6389e-12, 2.0067e-11, 1.0944e-11, 2.3900e-11,\n 1.6584e-11, 1.3991e-11, 4.6731e-12, 3.8772e-11, 7.5244e-10, 3.7384e-13,\n 3.1793e-11, 2.5018e-11, 1.6271e-11, 2.3746e-10, 6.1987e-11, 6.4278e-12,\n 4.0324e-11, 5.9276e-11, 1.3750e-12, 4.5249e-10, 1.4872e-11, 2.2041e-12,\n 1.2164e-10, 4.3315e-10, 3.5175e-13, 5.1927e-11, 1.9405e-10, 3.4431e-11,\n 5.1244e-11, 3.1820e-10, 6.7868e-11, 1.3274e-11, 1.3502e-13, 3.5655e-10,\n 4.3476e-12, 3.9484e-10, 6.5469e-13, 9.5634e-11, 2.5881e-13, 9.6803e-11,\n 1.2709e-10, 3.3063e-13, 1.7259e-11, 1.3803e-11, 7.7530e-12, 4.9663e-10,\n 3.1444e-11, 1.1619e-10, 2.9543e-11, 3.3107e-12, 5.5745e-15, 4.2977e-12,\n 1.1121e-11, 3.7511e-11, 4.8570e-12, 5.8807e-14, 1.9445e-10, 1.5678e-11,\n 1.1263e-11, 7.4685e-11, 3.9949e-11, 9.1436e-11, 4.1807e-11, 1.9073e-11,\n 8.8981e-12, 2.3744e-13, 7.6405e-11, 3.7500e-11, 1.6108e-11, 1.8837e-13,\n 7.8244e-14, 2.8531e-10, 9.0973e-12, 3.9205e-10, 1.0172e-10, 8.9423e-12,\n 1.4901e-10, 4.1223e-11, 1.0190e-10, 2.8246e-11], device='cuda:0')" + "exp_avg_sq": "tensor([3.1934e-11, 1.7548e-12, 9.5380e-12, 1.6244e-11, 2.6124e-12, 1.8145e-12,\n 1.1524e-11, 8.2556e-13, 4.7574e-12, 5.2007e-13, 2.8074e-11, 1.0744e-10,\n 1.6735e-11, 2.3987e-11, 2.7972e-11, 1.0933e-10, 1.4694e-11, 9.5436e-12,\n 3.1386e-11, 4.7888e-11, 5.3961e-16, 5.6248e-12, 1.1653e-11, 1.3780e-11,\n 1.3448e-11, 1.6997e-11, 1.8173e-11, 4.2824e-12, 7.4018e-13, 4.1504e-12,\n 3.1968e-11, 2.0094e-11, 3.8576e-12, 1.3043e-10, 5.6288e-11, 3.6099e-11,\n 4.0786e-13, 8.8243e-15, 1.4885e-11, 3.9315e-10, 5.6244e-11, 1.1427e-11,\n 3.8567e-12, 2.6185e-12, 6.4825e-11, 1.1508e-10, 2.3489e-11, 5.1193e-11,\n 3.8265e-12, 3.1635e-11, 3.0413e-13, 8.2052e-11, 1.6192e-11, 1.5211e-11,\n 1.8151e-12, 5.9917e-13, 3.6588e-11, 8.1168e-13, 2.6313e-11, 2.7560e-12,\n 8.4016e-12, 2.6976e-11, 6.0028e-11, 3.2869e-11, 3.4338e-13, 1.4800e-12,\n 2.5024e-11, 7.8122e-13, 1.5474e-11, 3.2953e-12, 3.0835e-13, 1.6307e-11,\n 4.8055e-11, 5.7258e-12, 8.9462e-12, 1.7169e-13, 1.0987e-12, 5.1285e-11,\n 1.5885e-12, 2.2281e-14, 3.0793e-11, 3.7463e-11, 8.0329e-14, 4.0207e-12,\n 2.3664e-11, 9.1668e-11, 7.7300e-12, 3.1877e-11, 4.9830e-12, 5.3273e-11,\n 3.9486e-11, 2.3459e-11, 4.6866e-11, 7.9087e-12, 1.8906e-11, 1.9321e-12,\n 7.3549e-12, 5.6698e-11, 1.7801e-11, 1.2517e-10, 1.1883e-10, 4.0422e-12,\n 1.1804e-13, 1.1385e-11, 1.6452e-13, 8.9039e-11, 5.0101e-12, 8.5903e-12,\n 1.6950e-11, 3.3300e-15, 9.3849e-12, 9.4640e-14, 7.1665e-11, 5.9992e-11,\n 4.2314e-12, 5.1302e-11, 6.1076e-12, 7.5868e-12, 1.6143e-11, 1.5409e-11,\n 4.0441e-12, 9.6075e-13, 1.1130e-12, 1.0681e-14, 6.3718e-13, 1.0326e-11,\n 1.1777e-10, 2.1180e-11, 5.1027e-11, 1.7080e-11, 3.8194e-16, 1.3466e-10,\n 2.5564e-11, 2.2433e-11, 4.1464e-11, 2.4354e-11, 3.4466e-11, 3.9113e-11,\n 3.1651e-11, 3.2870e-15, 1.5643e-12, 2.7625e-12, 9.8289e-12, 1.2729e-10,\n 2.9236e-11, 2.3724e-12, 2.9924e-11, 4.1682e-12, 2.2539e-11, 2.7323e-13,\n 1.9310e-12, 3.8285e-11, 3.7551e-11, 3.4339e-11, 6.7048e-12, 2.6920e-12,\n 2.0163e-11, 7.1214e-11, 1.0577e-11, 1.9883e-11, 1.8895e-11, 2.0743e-11,\n 8.9897e-12, 1.8959e-11, 1.8681e-11, 1.2735e-11, 1.0302e-10, 7.4763e-11,\n 1.5245e-11, 5.3110e-11, 2.9510e-12, 2.9100e-12, 6.3050e-11, 7.7669e-12,\n 4.2246e-11, 5.7697e-12, 1.0398e-12, 5.7344e-12, 3.1274e-12, 6.8295e-12,\n 4.7389e-12, 3.9979e-12, 1.3354e-12, 1.1079e-11, 2.1501e-10, 1.0683e-13,\n 9.0852e-12, 7.1491e-12, 4.6496e-12, 6.7857e-11, 1.7713e-11, 1.8368e-12,\n 1.1523e-11, 1.6939e-11, 3.9290e-13, 1.2930e-10, 4.2499e-12, 6.2984e-13,\n 3.4761e-11, 1.2378e-10, 1.0052e-13, 1.4839e-11, 5.5452e-11, 9.8390e-12,\n 1.4643e-11, 9.0929e-11, 1.9394e-11, 3.7931e-12, 3.8584e-14, 1.0189e-10,\n 1.2424e-12, 1.1283e-10, 1.8708e-13, 2.7328e-11, 7.3957e-14, 2.7662e-11,\n 3.6318e-11, 9.4482e-14, 4.9319e-12, 3.9444e-12, 2.2155e-12, 1.4192e-10,\n 8.9853e-12, 3.3201e-11, 8.4421e-12, 9.4607e-13, 1.5929e-15, 1.2281e-12,\n 3.1780e-12, 1.0719e-11, 1.3879e-12, 1.6804e-14, 5.5566e-11, 4.4800e-12,\n 3.2184e-12, 2.1342e-11, 1.1416e-11, 2.6129e-11, 1.1947e-11, 5.4501e-12,\n 2.5427e-12, 6.7851e-14, 2.1833e-11, 1.0716e-11, 4.6031e-12, 5.3829e-14,\n 2.2359e-14, 8.1531e-11, 2.5996e-12, 1.1203e-10, 2.9068e-11, 2.5553e-12,\n 4.2582e-11, 1.1780e-11, 2.9119e-11, 8.0715e-12], device='cuda:0')" }, "30": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 0.0000e+00, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.0290e-11, 3.4752e-11, 0.0000e+00, ..., 1.1954e-10, 6.8025e-11,\n 7.2722e-11],\n [1.6438e-11, 6.1398e-12, 0.0000e+00, ..., 5.1351e-12, 5.9480e-11,\n 2.6822e-12],\n [1.3388e-11, 2.3625e-11, 0.0000e+00, ..., 1.6434e-11, 1.1148e-10,\n 6.5913e-12],\n ...,\n [3.2368e-12, 8.8122e-12, 0.0000e+00, ..., 1.1208e-11, 1.6658e-11,\n 1.0284e-13],\n [7.3905e-11, 6.5699e-11, 0.0000e+00, ..., 9.5535e-11, 4.0751e-10,\n 8.3076e-11],\n [5.9663e-13, 7.9775e-13, 0.0000e+00, ..., 3.0771e-12, 7.1962e-12,\n 7.4882e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.9404e-12, 9.9306e-12, 0.0000e+00, ..., 3.4161e-11, 1.9439e-11,\n 2.0781e-11],\n [4.6973e-12, 1.7545e-12, 0.0000e+00, ..., 1.4674e-12, 1.6997e-11,\n 7.6646e-13],\n [3.8258e-12, 6.7511e-12, 0.0000e+00, ..., 4.6961e-12, 3.1858e-11,\n 1.8835e-12],\n ...,\n [9.2494e-13, 2.5181e-12, 0.0000e+00, ..., 3.2028e-12, 4.7602e-12,\n 2.9387e-14],\n [2.1119e-11, 1.8774e-11, 0.0000e+00, ..., 2.7300e-11, 1.1645e-10,\n 2.3740e-11],\n [1.7049e-13, 2.2796e-13, 0.0000e+00, ..., 8.7930e-13, 2.0564e-12,\n 2.1398e-14]], device='cuda:0')" }, "31": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.3764e-08, 1.9821e-08, 2.5203e-08, 7.8433e-10, 7.1265e-09, 7.5381e-10,\n 7.7316e-09, 6.6121e-10, 4.5179e-09, 2.8956e-09, 5.9486e-08, 5.8634e-08,\n 8.0472e-09, 5.8094e-09, 1.1367e-08, 1.5378e-09, 2.8869e-08, 8.5876e-09,\n 2.8837e-08, 8.0288e-08, 1.3206e-10, 2.3015e-08, 5.0714e-09, 4.7115e-08,\n 3.4751e-09, 7.6315e-09, 5.0460e-08, 8.1573e-10, 2.8650e-09, 8.7480e-09,\n 1.5175e-08, 4.9953e-08, 6.6016e-11, 8.9904e-08, 5.1727e-08, 6.1407e-08,\n 2.3292e-09, 8.3018e-11, 3.3369e-08, 2.2033e-07, 9.2600e-08, 2.6077e-09,\n 2.3699e-09, 2.3888e-09, 1.6839e-08, 3.8438e-08, 2.3427e-08, 1.0827e-08,\n 2.3583e-09, 3.2571e-08, 8.7300e-10, 8.1564e-08, 4.0880e-08, 2.7237e-08,\n 1.2057e-09, 2.5041e-09, 1.8331e-08, 2.1446e-09, 1.3591e-08, 4.2970e-09,\n 5.2508e-09, 1.3590e-08, 2.2369e-08, 5.5023e-08, 5.7644e-10, 1.3715e-08,\n 1.3209e-08, 3.1935e-09, 2.6313e-08, 3.8202e-09, 2.6792e-11, 1.0646e-08,\n 4.3691e-08, 4.2580e-09, 2.6074e-09, 3.3750e-11, 6.8538e-09, 7.6681e-08,\n 3.0003e-09, 2.1532e-11, 3.1244e-08, 9.9781e-08, 4.4596e-11, 7.3391e-10,\n 1.2392e-08, 7.3869e-08, 3.3352e-09, 1.5151e-08, 1.1084e-08, 2.8372e-09,\n 1.5695e-08, 5.6593e-08, 1.7239e-08, 2.0001e-08, 6.4141e-08, 1.5987e-08,\n 2.8347e-08, 9.6179e-08, 1.4992e-08, 1.1634e-07, 1.0838e-08, 2.2195e-09,\n 4.3241e-12, 1.1476e-08, 1.2597e-10, 1.3868e-07, 3.9696e-09, 1.5688e-08,\n 1.1929e-08, 8.2922e-11, 1.0515e-07, 4.6514e-09, 3.2663e-08, 4.9191e-08,\n 8.5946e-09, 6.8487e-08, 1.4763e-09, 5.4382e-09, 8.5083e-09, 4.5663e-08,\n 4.7620e-09, 3.6462e-09, 2.0300e-09, 3.4089e-11, 3.3745e-10, 2.4535e-08,\n 4.1057e-08, 6.0488e-09, 2.0346e-08, 4.1697e-08, 4.5526e-10, 9.3491e-08,\n 2.2832e-08, 3.4181e-09, 3.9576e-08, 1.4973e-08, 7.4919e-08, 1.4518e-08,\n 1.1441e-08, 8.2310e-12, 2.9175e-11, 2.8201e-10, 5.1309e-08, 1.9023e-07,\n 3.3310e-08, 6.5578e-09, 8.0474e-09, 1.2731e-08, 1.1243e-08, 9.6075e-10,\n 2.9414e-09, 2.6057e-08, 1.8896e-08, 4.6248e-09, 5.6601e-09, 3.3480e-09,\n 9.0034e-09, 1.0133e-07, 4.4839e-09, 4.8574e-09, 1.6052e-08, 5.0253e-09,\n 4.7850e-09, 1.4336e-08, 4.5499e-09, 2.1351e-08, 9.7902e-08, 7.0829e-09,\n 3.3436e-08, 6.0626e-09, 3.0731e-10, 1.1103e-08, 2.0520e-08, 3.2099e-09,\n 4.1534e-08, 7.0607e-09, 4.9028e-09, 4.6829e-09, 5.2200e-09, 1.4113e-08,\n 1.7042e-08, 5.0846e-09, 7.6073e-09, 1.2634e-08, 1.1163e-07, 6.5236e-11,\n 8.7523e-09, 3.2901e-08, 5.8424e-09, 1.0721e-07, 1.8625e-08, 1.0966e-08,\n 6.3051e-09, 2.1986e-09, 9.6458e-12, 2.9731e-07, 4.1541e-09, 8.8677e-10,\n 8.7141e-09, 4.0768e-08, 4.6791e-11, 1.2094e-07, 8.3366e-09, 1.0403e-08,\n 1.0221e-08, 2.8085e-08, 5.0987e-08, 1.0383e-09, 2.3746e-11, 1.9463e-08,\n 5.6225e-09, 1.4293e-08, 5.3268e-10, 3.5572e-08, 7.0444e-10, 7.7462e-08,\n 8.9879e-08, 3.5378e-10, 8.8513e-10, 9.0044e-09, 3.6677e-09, 5.0185e-08,\n 4.6670e-09, 6.0762e-08, 8.1954e-09, 1.1250e-12, 1.0422e-11, 8.0350e-10,\n 1.2471e-09, 1.3132e-08, 3.4566e-10, 1.1660e-09, 1.3567e-08, 7.8708e-09,\n 5.3712e-08, 4.1341e-10, 3.7748e-09, 2.5146e-08, 7.0253e-09, 8.2296e-09,\n 1.4923e-08, 6.0857e-11, 5.9091e-09, 2.1082e-08, 8.6632e-09, 2.0902e-10,\n 4.8714e-10, 4.0266e-08, 2.7908e-09, 9.0114e-09, 7.2471e-08, 1.1101e-09,\n 4.5198e-08, 3.9964e-09, 1.0761e-07, 2.5738e-09], device='cuda:0')" + "exp_avg_sq": "tensor([1.8221e-08, 5.6640e-09, 7.2018e-09, 2.2413e-10, 2.0364e-09, 2.1541e-10,\n 2.2094e-09, 1.8894e-10, 1.2910e-09, 8.2745e-10, 1.6998e-08, 1.6755e-08,\n 2.2996e-09, 1.6601e-09, 3.2482e-09, 4.3943e-10, 8.2496e-09, 2.4540e-09,\n 8.2403e-09, 2.2943e-08, 3.7738e-11, 6.5768e-09, 1.4492e-09, 1.3464e-08,\n 9.9305e-10, 2.1807e-09, 1.4419e-08, 2.3310e-10, 8.1870e-10, 2.4998e-09,\n 4.3365e-09, 1.4275e-08, 1.8865e-11, 2.5691e-08, 1.4782e-08, 1.7548e-08,\n 6.6557e-10, 2.3723e-11, 9.5354e-09, 6.2961e-08, 2.6461e-08, 7.4517e-10,\n 6.7723e-10, 6.8263e-10, 4.8119e-09, 1.0984e-08, 6.6944e-09, 3.0939e-09,\n 6.7389e-10, 9.3074e-09, 2.4947e-10, 2.3308e-08, 1.1682e-08, 7.7831e-09,\n 3.4453e-10, 7.1556e-10, 5.2383e-09, 6.1283e-10, 3.8836e-09, 1.2279e-09,\n 1.5005e-09, 3.8834e-09, 6.3922e-09, 1.5723e-08, 1.6472e-10, 3.9191e-09,\n 3.7746e-09, 9.1257e-10, 7.5192e-09, 1.0917e-09, 7.6560e-12, 3.0421e-09,\n 1.2485e-08, 1.2168e-09, 7.4507e-10, 9.6445e-12, 1.9585e-09, 2.1912e-08,\n 8.5736e-10, 6.1528e-12, 8.9283e-09, 2.8513e-08, 1.2744e-11, 2.0972e-10,\n 3.5412e-09, 2.1109e-08, 9.5305e-10, 4.3294e-09, 3.1672e-09, 8.1074e-10,\n 4.4851e-09, 1.6172e-08, 4.9262e-09, 5.7154e-09, 1.8329e-08, 4.5684e-09,\n 8.1005e-09, 2.7484e-08, 4.2840e-09, 3.3246e-08, 3.0969e-09, 6.3425e-10,\n 1.2356e-12, 3.2793e-09, 3.5997e-11, 3.9629e-08, 1.1344e-09, 4.4831e-09,\n 3.4088e-09, 2.3696e-11, 3.0048e-08, 1.3292e-09, 9.3338e-09, 1.4057e-08,\n 2.4560e-09, 1.9571e-08, 4.2185e-10, 1.5540e-09, 2.4313e-09, 1.3049e-08,\n 1.3608e-09, 1.0419e-09, 5.8009e-10, 9.7413e-12, 9.6428e-11, 7.0110e-09,\n 1.1732e-08, 1.7285e-09, 5.8141e-09, 1.1915e-08, 1.3010e-10, 2.6716e-08,\n 6.5243e-09, 9.7674e-10, 1.1309e-08, 4.2786e-09, 2.1409e-08, 4.1486e-09,\n 3.2693e-09, 2.3521e-12, 8.3370e-12, 8.0587e-11, 1.4662e-08, 5.4359e-08,\n 9.5185e-09, 1.8740e-09, 2.2996e-09, 3.6381e-09, 3.2128e-09, 2.7454e-10,\n 8.4052e-10, 7.4460e-09, 5.3998e-09, 1.3216e-09, 1.6174e-09, 9.5671e-10,\n 2.5728e-09, 2.8957e-08, 1.2813e-09, 1.3880e-09, 4.5871e-09, 1.4360e-09,\n 1.3674e-09, 4.0966e-09, 1.3002e-09, 6.1012e-09, 2.7976e-08, 2.0240e-09,\n 9.5547e-09, 1.7324e-09, 8.7816e-11, 3.1728e-09, 5.8639e-09, 9.1727e-10,\n 1.1869e-08, 2.0177e-09, 1.4010e-09, 1.3382e-09, 1.4917e-09, 4.0330e-09,\n 4.8699e-09, 1.4530e-09, 2.1739e-09, 3.6102e-09, 3.1899e-08, 1.8642e-11,\n 2.5011e-09, 9.4017e-09, 1.6695e-09, 3.0636e-08, 5.3221e-09, 3.1337e-09,\n 1.8017e-09, 6.2825e-10, 2.7564e-12, 8.4957e-08, 1.1871e-09, 2.5340e-10,\n 2.4901e-09, 1.1650e-08, 1.3371e-11, 3.4560e-08, 2.3823e-09, 2.9728e-09,\n 2.9207e-09, 8.0255e-09, 1.4570e-08, 2.9670e-10, 6.7857e-12, 5.5618e-09,\n 1.6067e-09, 4.0845e-09, 1.5222e-10, 1.0165e-08, 2.0130e-10, 2.2135e-08,\n 2.5684e-08, 1.0110e-10, 2.5293e-10, 2.5731e-09, 1.0481e-09, 1.4341e-08,\n 1.3336e-09, 1.7363e-08, 2.3419e-09, 3.2148e-13, 2.9780e-12, 2.2961e-10,\n 3.5637e-10, 3.7527e-09, 9.8776e-11, 3.3320e-10, 3.8770e-09, 2.2491e-09,\n 1.5349e-08, 1.1814e-10, 1.0787e-09, 7.1858e-09, 2.0075e-09, 2.3517e-09,\n 4.2644e-09, 1.7390e-11, 1.6886e-09, 6.0244e-09, 2.4756e-09, 5.9730e-11,\n 1.3920e-10, 1.1506e-08, 7.9750e-10, 2.5751e-09, 2.0709e-08, 3.1722e-10,\n 1.2916e-08, 1.1420e-09, 3.0749e-08, 7.3548e-10], device='cuda:0')" }, "32": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.3744e-10, 6.3530e-11, 7.0398e-11, 4.1665e-12, 1.5786e-11, 2.2377e-12,\n 1.1277e-11, 5.3811e-12, 6.1644e-12, 6.9503e-12, 1.0951e-10, 9.6403e-11,\n 1.6656e-11, 1.4721e-11, 3.7533e-11, 1.1166e-11, 6.8892e-11, 1.4945e-11,\n 7.0558e-11, 1.9091e-10, 6.3541e-14, 8.0797e-11, 2.4171e-11, 9.9242e-11,\n 1.1665e-11, 8.8314e-12, 2.4096e-10, 2.5116e-12, 9.9642e-12, 1.6790e-11,\n 2.2960e-11, 3.6385e-10, 6.2080e-13, 1.5372e-10, 1.5384e-10, 2.3387e-10,\n 2.9168e-12, 9.4051e-14, 9.7350e-11, 5.5770e-10, 2.7387e-10, 4.3368e-12,\n 6.3147e-12, 1.0875e-11, 2.1830e-11, 6.8942e-11, 9.6639e-11, 2.6071e-11,\n 3.0100e-12, 7.5640e-11, 9.5345e-13, 2.4988e-10, 8.4960e-11, 5.9330e-11,\n 1.4108e-12, 1.7856e-11, 3.9493e-11, 2.3733e-12, 6.8804e-11, 1.4816e-11,\n 1.2075e-11, 5.5737e-11, 7.1020e-11, 2.4709e-10, 6.9496e-13, 3.9911e-11,\n 3.5991e-11, 5.1208e-12, 6.1118e-11, 8.2823e-12, 7.7604e-15, 3.8825e-11,\n 1.1168e-10, 1.4080e-11, 3.9419e-12, 1.2763e-13, 4.8721e-11, 2.2290e-10,\n 6.2475e-12, 1.1533e-15, 1.8239e-10, 5.4918e-10, 2.7082e-13, 4.1221e-12,\n 5.5762e-11, 2.1572e-10, 1.2771e-11, 5.9119e-11, 2.3608e-11, 1.4899e-11,\n 4.2714e-11, 1.3561e-10, 2.4963e-11, 5.2821e-11, 2.1926e-10, 3.1899e-11,\n 7.5819e-11, 2.2013e-10, 3.8214e-11, 3.3698e-10, 2.3963e-11, 1.3450e-11,\n 6.1698e-13, 2.1983e-11, 6.3942e-14, 4.2697e-10, 7.8871e-12, 7.9499e-11,\n 1.9172e-11, 3.0502e-17, 3.2205e-10, 1.3696e-11, 8.2310e-11, 8.5610e-11,\n 1.5303e-11, 2.1515e-10, 4.6201e-12, 8.8981e-12, 1.6949e-11, 1.9737e-10,\n 5.8030e-12, 5.2242e-12, 2.1539e-12, 1.5949e-12, 3.1322e-12, 8.1223e-11,\n 7.3516e-11, 1.5219e-11, 6.2618e-11, 1.5721e-10, 2.3725e-14, 1.7546e-10,\n 8.8099e-11, 1.0463e-11, 6.5597e-11, 2.1547e-11, 2.5569e-10, 3.2752e-11,\n 2.8977e-11, 1.2675e-15, 1.1988e-14, 1.3880e-12, 1.2330e-10, 7.8728e-10,\n 1.1544e-10, 2.4932e-11, 1.9749e-11, 7.6027e-11, 1.3307e-11, 9.5751e-13,\n 1.9285e-11, 5.3204e-11, 7.4776e-11, 7.5444e-12, 7.4633e-12, 2.6754e-11,\n 1.5933e-11, 2.5745e-10, 6.7742e-12, 2.1145e-11, 3.5574e-11, 7.4715e-12,\n 5.7909e-12, 4.8856e-11, 1.0913e-11, 4.6359e-11, 1.8348e-10, 1.3572e-11,\n 1.6100e-10, 1.9969e-11, 9.6820e-14, 2.0839e-11, 3.3341e-11, 9.7979e-12,\n 1.1789e-10, 4.8948e-11, 7.2222e-12, 1.5115e-11, 1.0905e-11, 3.0175e-11,\n 3.8603e-11, 7.8104e-12, 1.6229e-11, 2.0702e-11, 2.5917e-10, 4.2093e-15,\n 1.4752e-11, 1.2001e-10, 6.6833e-12, 2.3390e-10, 3.1349e-11, 2.6997e-11,\n 2.4396e-11, 6.9103e-12, 2.4885e-14, 1.2180e-09, 4.4625e-12, 6.5711e-13,\n 1.8184e-11, 7.8135e-11, 1.6123e-12, 5.3109e-10, 1.4627e-11, 2.8714e-11,\n 2.0383e-11, 6.2688e-11, 1.3153e-10, 1.8246e-12, 1.7132e-15, 3.3872e-11,\n 1.1596e-11, 3.2383e-11, 1.6811e-12, 1.2458e-10, 5.0868e-13, 2.2452e-10,\n 2.1178e-10, 4.2867e-14, 2.4637e-12, 1.9301e-11, 7.1466e-12, 1.2200e-10,\n 1.5776e-11, 1.2922e-10, 2.2457e-11, 1.0202e-13, 7.5649e-14, 2.9396e-12,\n 1.7718e-12, 3.9649e-11, 1.0350e-12, 6.7008e-13, 4.6952e-11, 2.7535e-11,\n 1.6931e-10, 2.8040e-12, 5.5776e-12, 5.3708e-11, 3.0018e-11, 1.5073e-11,\n 2.9314e-11, 3.3129e-14, 1.6777e-11, 6.0101e-11, 1.4166e-11, 1.6508e-14,\n 2.0936e-13, 6.5881e-11, 4.4100e-12, 3.0788e-11, 2.1312e-10, 1.0377e-12,\n 1.6207e-10, 7.5313e-12, 2.8696e-10, 8.1122e-12], device='cuda:0')" + "exp_avg_sq": "tensor([3.9274e-11, 1.8154e-11, 2.0117e-11, 1.1906e-12, 4.5109e-12, 6.3944e-13,\n 3.2225e-12, 1.5377e-12, 1.7615e-12, 1.9861e-12, 3.1294e-11, 2.7548e-11,\n 4.7595e-12, 4.2067e-12, 1.0725e-11, 3.1908e-12, 1.9686e-11, 4.2706e-12,\n 2.0162e-11, 5.4554e-11, 1.8157e-14, 2.3088e-11, 6.9070e-12, 2.8359e-11,\n 3.3333e-12, 2.5237e-12, 6.8857e-11, 7.1771e-13, 2.8473e-12, 4.7980e-12,\n 6.5610e-12, 1.0397e-10, 1.7740e-13, 4.3926e-11, 4.3960e-11, 6.6829e-11,\n 8.3350e-13, 2.6876e-14, 2.7819e-11, 1.5937e-10, 7.8260e-11, 1.2393e-12,\n 1.8045e-12, 3.1076e-12, 6.2382e-12, 1.9701e-11, 2.7615e-11, 7.4501e-12,\n 8.6012e-13, 2.1615e-11, 2.7246e-13, 7.1405e-11, 2.4278e-11, 1.6954e-11,\n 4.0315e-13, 5.1025e-12, 1.1286e-11, 6.7818e-13, 1.9661e-11, 4.2338e-12,\n 3.4507e-12, 1.5927e-11, 2.0294e-11, 7.0608e-11, 1.9859e-13, 1.1405e-11,\n 1.0285e-11, 1.4633e-12, 1.7465e-11, 2.3667e-12, 2.2176e-15, 1.1095e-11,\n 3.1915e-11, 4.0233e-12, 1.1264e-12, 3.6471e-14, 1.3923e-11, 6.3695e-11,\n 1.7853e-12, 3.2955e-16, 5.2120e-11, 1.5693e-10, 7.7389e-14, 1.1779e-12,\n 1.5935e-11, 6.1642e-11, 3.6494e-12, 1.6894e-11, 6.7462e-12, 4.2576e-12,\n 1.2206e-11, 3.8752e-11, 7.1334e-12, 1.5094e-11, 6.2655e-11, 9.1153e-12,\n 2.1666e-11, 6.2905e-11, 1.0920e-11, 9.6296e-11, 6.8475e-12, 3.8434e-12,\n 1.7631e-13, 6.2818e-12, 1.8272e-14, 1.2201e-10, 2.2538e-12, 2.2718e-11,\n 5.4785e-12, 8.7161e-18, 9.2027e-11, 3.9136e-12, 2.3521e-11, 2.4464e-11,\n 4.3730e-12, 6.1481e-11, 1.3202e-12, 2.5427e-12, 4.8434e-12, 5.6399e-11,\n 1.6583e-12, 1.4929e-12, 6.1549e-13, 4.5575e-13, 8.9504e-13, 2.3210e-11,\n 2.1008e-11, 4.3488e-12, 1.7894e-11, 4.4923e-11, 6.7795e-15, 5.0140e-11,\n 2.5175e-11, 2.9899e-12, 1.8745e-11, 6.1572e-12, 7.3065e-11, 9.3592e-12,\n 8.2805e-12, 3.6218e-16, 3.4257e-15, 3.9662e-13, 3.5235e-11, 2.2497e-10,\n 3.2989e-11, 7.1245e-12, 5.6435e-12, 2.1725e-11, 3.8026e-12, 2.7362e-13,\n 5.5107e-12, 1.5203e-11, 2.1368e-11, 2.1559e-12, 2.1327e-12, 7.6453e-12,\n 4.5530e-12, 7.3567e-11, 1.9358e-12, 6.0424e-12, 1.0166e-11, 2.1350e-12,\n 1.6548e-12, 1.3961e-11, 3.1183e-12, 1.3247e-11, 5.2430e-11, 3.8783e-12,\n 4.6006e-11, 5.7063e-12, 2.7667e-14, 5.9548e-12, 9.5274e-12, 2.7998e-12,\n 3.3688e-11, 1.3987e-11, 2.0638e-12, 4.3191e-12, 3.1162e-12, 8.6227e-12,\n 1.1031e-11, 2.2319e-12, 4.6375e-12, 5.9159e-12, 7.4060e-11, 1.2028e-15,\n 4.2156e-12, 3.4294e-11, 1.9098e-12, 6.6839e-11, 8.9582e-12, 7.7147e-12,\n 6.9713e-12, 1.9747e-12, 7.1110e-15, 3.4806e-10, 1.2752e-12, 1.8777e-13,\n 5.1962e-12, 2.2328e-11, 4.6073e-13, 1.5176e-10, 4.1798e-12, 8.2054e-12,\n 5.8246e-12, 1.7914e-11, 3.7585e-11, 5.2139e-13, 4.8957e-16, 9.6792e-12,\n 3.3138e-12, 9.2538e-12, 4.8039e-13, 3.5599e-11, 1.4536e-13, 6.4158e-11,\n 6.0519e-11, 1.2249e-14, 7.0403e-13, 5.5154e-12, 2.0422e-12, 3.4862e-11,\n 4.5081e-12, 3.6924e-11, 6.4172e-12, 2.9153e-14, 2.1617e-14, 8.4001e-13,\n 5.0630e-13, 1.1330e-11, 2.9576e-13, 1.9148e-13, 1.3417e-11, 7.8683e-12,\n 4.8381e-11, 8.0127e-13, 1.5938e-12, 1.5348e-11, 8.5778e-12, 4.3071e-12,\n 8.3768e-12, 9.4670e-15, 4.7940e-12, 1.7174e-11, 4.0482e-12, 4.7172e-15,\n 5.9826e-14, 1.8826e-11, 1.2602e-12, 8.7980e-12, 6.0900e-11, 2.9653e-13,\n 4.6311e-11, 2.1521e-12, 8.2001e-11, 2.3181e-12], device='cuda:0')" }, "33": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.6360e-10, 9.2939e-11, 7.3853e-11, 4.7984e-12, 1.5413e-11, 5.9927e-12,\n 2.1221e-11, 6.9494e-12, 1.0395e-11, 4.8349e-12, 1.9421e-10, 2.0866e-10,\n 2.5053e-11, 2.7767e-11, 5.9042e-11, 6.3912e-12, 1.2938e-10, 2.2011e-11,\n 9.2735e-11, 3.3475e-10, 5.2375e-14, 1.0673e-10, 2.9837e-11, 1.9798e-10,\n 1.9332e-11, 2.4125e-11, 2.2648e-10, 4.7826e-12, 1.7350e-11, 2.2936e-11,\n 7.2053e-11, 2.3387e-10, 6.9818e-13, 3.0788e-10, 2.1935e-10, 2.7153e-10,\n 4.9021e-12, 1.6882e-13, 1.4872e-10, 7.7158e-10, 3.1002e-10, 1.5357e-11,\n 1.4050e-11, 1.4896e-11, 5.6786e-11, 1.2830e-10, 1.1311e-10, 5.2841e-11,\n 6.2552e-12, 1.1027e-10, 8.2097e-13, 3.4682e-10, 1.3895e-10, 8.2626e-11,\n 2.3986e-12, 1.9143e-11, 8.1395e-11, 5.2705e-12, 7.0198e-11, 2.3547e-11,\n 1.1688e-11, 6.9415e-11, 1.0662e-10, 2.4491e-10, 9.5876e-13, 3.5882e-11,\n 6.5687e-11, 6.8441e-12, 1.1379e-10, 1.9462e-11, 1.9415e-16, 5.3342e-11,\n 1.4096e-10, 2.4188e-11, 7.2361e-12, 5.8660e-13, 3.9710e-11, 3.1694e-10,\n 4.5761e-12, 3.1429e-14, 1.4798e-10, 4.3358e-10, 3.6980e-13, 6.6146e-12,\n 6.2917e-11, 3.0869e-10, 1.9074e-11, 7.5018e-11, 3.6114e-11, 1.2506e-11,\n 7.2208e-11, 2.5004e-10, 5.6117e-11, 6.3773e-11, 2.7980e-10, 4.6031e-11,\n 8.6662e-11, 3.2859e-10, 7.4888e-11, 4.8878e-10, 5.2257e-11, 1.4549e-11,\n 1.3075e-12, 3.2073e-11, 3.8748e-14, 4.6796e-10, 8.1365e-12, 7.8105e-11,\n 3.4891e-11, 4.0404e-14, 3.5398e-10, 2.5288e-11, 1.4714e-10, 1.6913e-10,\n 2.1369e-11, 2.2728e-10, 7.8915e-12, 1.5367e-11, 3.8422e-11, 2.0996e-10,\n 1.4474e-11, 1.0430e-11, 3.8241e-12, 8.4413e-13, 3.8923e-12, 7.2130e-11,\n 1.7275e-10, 3.0480e-11, 9.4208e-11, 1.2566e-10, 1.2805e-13, 3.8305e-10,\n 1.0725e-10, 1.7918e-11, 1.2992e-10, 4.8392e-11, 3.1695e-10, 5.0084e-11,\n 5.3773e-11, 5.1615e-14, 1.1176e-13, 2.4511e-12, 1.5747e-10, 6.4113e-10,\n 1.4634e-10, 3.4679e-11, 3.8355e-11, 6.8156e-11, 3.7340e-11, 1.0187e-12,\n 1.9843e-11, 8.2853e-11, 9.1927e-11, 1.3982e-11, 1.7356e-11, 2.4218e-11,\n 2.5300e-11, 3.4611e-10, 1.2792e-11, 2.1794e-11, 5.3633e-11, 1.4741e-11,\n 1.1765e-11, 6.5881e-11, 2.3392e-11, 6.6885e-11, 3.4475e-10, 2.3423e-11,\n 1.5886e-10, 2.9277e-11, 5.3438e-13, 3.1662e-11, 7.2386e-11, 1.7528e-11,\n 1.3442e-10, 4.3150e-11, 1.2338e-11, 2.6690e-11, 1.0975e-11, 6.4125e-11,\n 4.6690e-11, 1.3386e-11, 1.8233e-11, 3.6301e-11, 4.7146e-10, 3.4831e-14,\n 2.7540e-11, 9.4828e-11, 1.6067e-11, 4.3476e-10, 6.2788e-11, 3.1833e-11,\n 3.2373e-11, 1.4074e-11, 2.3614e-14, 1.2010e-09, 9.7706e-12, 1.6303e-12,\n 4.3188e-11, 1.4622e-10, 2.8316e-12, 5.2067e-10, 2.9550e-11, 2.6245e-11,\n 4.8572e-11, 1.0593e-10, 1.5673e-10, 2.9676e-12, 8.1213e-14, 6.9776e-11,\n 1.2185e-11, 5.9873e-11, 4.0374e-12, 1.5661e-10, 5.7107e-13, 2.5561e-10,\n 3.0422e-10, 5.2449e-14, 5.0222e-12, 2.2505e-11, 5.6157e-12, 2.1887e-10,\n 2.6266e-11, 2.6026e-10, 3.9286e-11, 2.7037e-13, 6.1939e-14, 6.2063e-12,\n 2.9635e-12, 6.3547e-11, 2.2986e-12, 1.3641e-12, 6.1611e-11, 4.0654e-11,\n 2.3326e-10, 2.1189e-12, 9.0830e-12, 1.1284e-10, 3.8420e-11, 2.1788e-11,\n 6.7063e-11, 3.4594e-14, 3.1557e-11, 7.4428e-11, 2.5783e-11, 7.4963e-16,\n 2.9105e-13, 1.3076e-10, 6.2234e-12, 4.1436e-11, 2.4361e-10, 2.2581e-12,\n 1.3500e-10, 1.2405e-11, 3.7407e-10, 1.5844e-11], device='cuda:0')" + "exp_avg_sq": "tensor([7.5326e-11, 2.6558e-11, 2.1104e-11, 1.3712e-12, 4.4045e-12, 1.7125e-12,\n 6.0641e-12, 1.9858e-12, 2.9704e-12, 1.3816e-12, 5.5497e-11, 5.9627e-11,\n 7.1590e-12, 7.9347e-12, 1.6872e-11, 1.8263e-12, 3.6970e-11, 6.2897e-12,\n 2.6500e-11, 9.5658e-11, 1.4967e-14, 3.0500e-11, 8.5262e-12, 5.6574e-11,\n 5.5242e-12, 6.8938e-12, 6.4719e-11, 1.3667e-12, 4.9579e-12, 6.5542e-12,\n 2.0590e-11, 6.6831e-11, 1.9951e-13, 8.7980e-11, 6.2681e-11, 7.7593e-11,\n 1.4008e-12, 4.8241e-14, 4.2497e-11, 2.2049e-10, 8.8591e-11, 4.3884e-12,\n 4.0150e-12, 4.2567e-12, 1.6227e-11, 3.6663e-11, 3.2323e-11, 1.5100e-11,\n 1.7875e-12, 3.1510e-11, 2.3460e-13, 9.9106e-11, 3.9705e-11, 2.3611e-11,\n 6.8542e-13, 5.4702e-12, 2.3259e-11, 1.5061e-12, 2.0060e-11, 6.7288e-12,\n 3.3398e-12, 1.9836e-11, 3.0468e-11, 6.9986e-11, 2.7397e-13, 1.0254e-11,\n 1.8771e-11, 1.9558e-12, 3.2517e-11, 5.5613e-12, 5.5478e-17, 1.5243e-11,\n 4.0280e-11, 6.9120e-12, 2.0678e-12, 1.6763e-13, 1.1348e-11, 9.0567e-11,\n 1.3077e-12, 8.9810e-15, 4.2286e-11, 1.2390e-10, 1.0567e-13, 1.8902e-12,\n 1.7979e-11, 8.8212e-11, 5.4504e-12, 2.1437e-11, 1.0320e-11, 3.5736e-12,\n 2.0634e-11, 7.1451e-11, 1.6036e-11, 1.8224e-11, 7.9955e-11, 1.3154e-11,\n 2.4764e-11, 9.3898e-11, 2.1400e-11, 1.3967e-10, 1.4933e-11, 4.1574e-12,\n 3.7364e-13, 9.1650e-12, 1.1073e-14, 1.3372e-10, 2.3251e-12, 2.2319e-11,\n 9.9705e-12, 1.1546e-14, 1.0115e-10, 7.2264e-12, 4.2047e-11, 4.8330e-11,\n 6.1062e-12, 6.4947e-11, 2.2551e-12, 4.3913e-12, 1.0979e-11, 5.9999e-11,\n 4.1362e-12, 2.9805e-12, 1.0928e-12, 2.4122e-13, 1.1122e-12, 2.0612e-11,\n 4.9365e-11, 8.7100e-12, 2.6921e-11, 3.5909e-11, 3.6592e-14, 1.0946e-10,\n 3.0648e-11, 5.1202e-12, 3.7125e-11, 1.3828e-11, 9.0570e-11, 1.4312e-11,\n 1.5366e-11, 1.4749e-14, 3.1936e-14, 7.0042e-13, 4.4997e-11, 1.8321e-10,\n 4.1819e-11, 9.9099e-12, 1.0960e-11, 1.9476e-11, 1.0670e-11, 2.9111e-13,\n 5.6703e-12, 2.3676e-11, 2.6269e-11, 3.9955e-12, 4.9595e-12, 6.9205e-12,\n 7.2297e-12, 9.8903e-11, 3.6554e-12, 6.2277e-12, 1.5326e-11, 4.2122e-12,\n 3.3620e-12, 1.8826e-11, 6.6846e-12, 1.9113e-11, 9.8514e-11, 6.6932e-12,\n 4.5396e-11, 8.3662e-12, 1.5270e-13, 9.0478e-12, 2.0685e-11, 5.0089e-12,\n 3.8412e-11, 1.2330e-11, 3.5258e-12, 7.6270e-12, 3.1362e-12, 1.8324e-11,\n 1.3342e-11, 3.8250e-12, 5.2101e-12, 1.0373e-11, 1.3472e-10, 9.9532e-15,\n 7.8698e-12, 2.7098e-11, 4.5912e-12, 1.2424e-10, 1.7942e-11, 9.0964e-12,\n 9.2508e-12, 4.0216e-12, 6.7478e-15, 3.4320e-10, 2.7920e-12, 4.6587e-13,\n 1.2341e-11, 4.1783e-11, 8.0916e-13, 1.4878e-10, 8.4440e-12, 7.4997e-12,\n 1.3880e-11, 3.0270e-11, 4.4786e-11, 8.4800e-13, 2.3207e-14, 1.9939e-11,\n 3.4819e-12, 1.7109e-11, 1.1537e-12, 4.4752e-11, 1.6319e-13, 7.3044e-11,\n 8.6934e-11, 1.4988e-14, 1.4351e-12, 6.4310e-12, 1.6047e-12, 6.2543e-11,\n 7.5057e-12, 7.4372e-11, 1.1226e-11, 7.7261e-14, 1.7699e-14, 1.7735e-12,\n 8.4685e-13, 1.8159e-11, 6.5684e-13, 3.8980e-13, 1.7606e-11, 1.1617e-11,\n 6.6657e-11, 6.0551e-13, 2.5955e-12, 3.2245e-11, 1.0979e-11, 6.2260e-12,\n 1.9164e-11, 9.8857e-15, 9.0175e-12, 2.1268e-11, 7.3676e-12, 2.1421e-16,\n 8.3170e-14, 3.7367e-11, 1.7784e-12, 1.1841e-11, 6.9613e-11, 6.4526e-13,\n 3.8576e-11, 3.5449e-12, 1.0689e-10, 4.5275e-12], device='cuda:0')" }, "34": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.3956e-13, 3.3927e-12, 2.0697e-12, ..., 4.6844e-13, 2.5647e-12,\n 1.5412e-12],\n [6.6873e-14, 1.0004e-14, 1.0904e-13, ..., 2.9099e-13, 3.3827e-13,\n 1.2056e-14],\n [3.2596e-13, 1.4336e-12, 4.5203e-13, ..., 2.3131e-13, 1.3929e-12,\n 2.6653e-13],\n ...,\n [8.1566e-12, 1.9348e-11, 2.0051e-11, ..., 3.5628e-12, 3.0083e-11,\n 4.3538e-11],\n [5.4466e-11, 8.6148e-11, 1.3296e-10, ..., 2.9526e-11, 1.3010e-10,\n 2.3886e-10],\n [7.4728e-10, 1.6575e-09, 1.9807e-09, ..., 4.2166e-10, 2.4948e-09,\n 3.9141e-09]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.6849e-13, 9.6948e-13, 5.9143e-13, ..., 1.3386e-13, 7.3290e-13,\n 4.4041e-13],\n [1.9109e-14, 2.8588e-15, 3.1159e-14, ..., 8.3152e-14, 9.6665e-14,\n 3.4452e-15],\n [9.3147e-14, 4.0966e-13, 1.2917e-13, ..., 6.6098e-14, 3.9804e-13,\n 7.6162e-14],\n ...,\n [2.3308e-12, 5.5287e-12, 5.7296e-12, ..., 1.0181e-12, 8.5963e-12,\n 1.2441e-11],\n [1.5564e-11, 2.4617e-11, 3.7995e-11, ..., 8.4373e-12, 3.7176e-11,\n 6.8257e-11],\n [2.1354e-10, 4.7365e-10, 5.6600e-10, ..., 1.2049e-10, 7.1290e-10,\n 1.1185e-09]], device='cuda:0')" }, "35": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8943e-11, 6.0880e-13, 5.4349e-12, 3.5765e-13, 2.3948e-12, 3.5404e-12,\n 5.2278e-13, 4.5224e-13, 2.2031e-14, 3.6910e-12, 2.9938e-12, 5.1779e-12,\n 1.2861e-13, 1.0887e-13, 1.3246e-12, 8.6880e-13, 2.8659e-13, 1.1343e-11,\n 3.8710e-12, 4.9580e-14, 1.9520e-13, 1.5840e-12, 3.6573e-12, 1.4108e-13,\n 6.8328e-12, 8.0135e-13, 1.1346e-12, 1.1569e-13, 2.6369e-13, 5.3940e-13,\n 5.3837e-13, 8.9543e-12, 1.2976e-12, 6.9462e-14, 2.6203e-15, 2.6771e-12,\n 9.5264e-13, 3.8380e-13, 1.1476e-14, 5.1169e-16, 1.5547e-12, 1.9455e-13,\n 9.5000e-13, 4.4856e-13, 7.6735e-14, 8.7357e-13, 1.1061e-12, 1.1223e-12,\n 5.9256e-13, 2.5180e-12, 9.7715e-13, 2.7792e-13, 3.6265e-13, 2.1356e-13,\n 8.0111e-15, 1.0892e-14, 2.4930e-13, 2.0010e-14, 8.0212e-13, 3.4114e-14,\n 2.8329e-12, 9.4482e-13, 2.2296e-15, 1.7188e-12, 3.3241e-12, 1.1322e-12,\n 1.6975e-12, 3.3612e-13, 4.3049e-14, 2.6535e-14, 1.7393e-12, 1.8373e-13,\n 1.4619e-14, 7.1826e-13, 4.1851e-13, 1.1281e-12, 8.2974e-13, 2.1488e-13,\n 3.5587e-18, 7.7924e-13, 5.9344e-13, 5.2234e-12, 7.9930e-14, 7.3847e-13,\n 1.0046e-13, 5.0687e-13, 2.9547e-13, 8.1760e-16, 5.0275e-13, 6.5252e-14,\n 2.3339e-15, 3.7867e-13, 3.3861e-13, 9.6499e-12, 3.1794e-12, 2.2110e-12,\n 2.4088e-13, 1.1366e-11, 7.8515e-13, 1.4561e-12, 1.3533e-13, 1.5568e-13,\n 7.6674e-14, 8.3274e-13, 1.0122e-14, 2.9040e-12, 4.7688e-12, 5.3007e-13,\n 6.0189e-13, 9.0486e-14, 9.1532e-13, 1.0394e-11, 7.8628e-14, 1.1908e-11,\n 1.6517e-12, 7.3584e-12, 7.2209e-19, 6.7498e-12, 4.9169e-12, 2.4334e-12,\n 9.3127e-12, 4.1663e-14, 4.8620e-12, 1.8333e-14, 2.6535e-12, 4.4197e-12,\n 4.0054e-12, 2.9011e-12, 5.3551e-12, 8.1039e-12, 9.3198e-13, 1.8177e-12,\n 5.4742e-14, 3.1236e-12, 8.9741e-13, 8.2082e-13, 1.8979e-12, 2.5698e-12,\n 2.1592e-15, 5.0137e-13, 6.3034e-12, 2.3197e-14, 1.3258e-12, 4.8916e-13,\n 3.0621e-12, 1.2738e-11, 2.5328e-12, 3.2529e-14, 8.0335e-12, 1.2183e-13,\n 2.5910e-13, 2.1048e-12, 8.7397e-12, 7.1834e-13, 3.0863e-12, 4.6303e-12,\n 3.4096e-14, 8.5983e-15, 3.1518e-13, 1.5495e-12, 1.4783e-12, 4.0998e-13,\n 6.1718e-12, 3.1924e-12, 3.5549e-13, 3.4301e-15, 6.8498e-14, 9.7045e-15,\n 1.9378e-13, 8.7261e-13, 1.7645e-13, 1.0704e-13, 7.3898e-13, 8.9519e-13,\n 8.1484e-14, 2.2862e-14, 8.5662e-13, 1.9778e-13, 1.2283e-13, 1.7506e-14,\n 1.4273e-12, 4.1516e-15, 3.0870e-13, 1.3326e-12, 3.7939e-13, 1.7690e-13,\n 1.3220e-12, 4.4381e-13, 4.4533e-12, 1.4540e-13, 1.2418e-12, 1.0236e-13,\n 1.1248e-12, 1.1169e-11, 1.5764e-12, 3.3593e-12, 1.7183e-12, 5.0227e-12,\n 5.7447e-16, 6.0995e-14, 2.9216e-13, 4.3378e-13, 7.4785e-13, 6.1606e-12,\n 1.5466e-13, 4.6308e-13, 3.4629e-14, 5.2157e-13, 6.0078e-14, 1.3951e-12,\n 2.3874e-16, 9.3814e-13, 6.3384e-14, 6.9099e-13, 6.3577e-12, 3.2230e-12,\n 4.7347e-12, 1.2805e-12, 1.6204e-12, 5.0408e-12, 1.2801e-12, 2.4445e-14,\n 6.4812e-13, 5.7651e-12, 3.6876e-14, 7.6110e-13, 4.6088e-12, 1.6386e-12,\n 1.4455e-12, 1.3985e-12, 1.2104e-12, 3.6037e-14, 5.3157e-13, 2.8080e-13,\n 1.3027e-12, 3.5753e-13, 9.0030e-15, 8.2592e-14, 3.3352e-13, 3.5124e-14,\n 3.2309e-12, 8.9114e-13, 1.3643e-13, 1.0800e-12, 3.3078e-14, 1.2351e-12,\n 3.3174e-12, 4.2567e-13, 1.4104e-12, 4.3967e-13, 3.3542e-13, 3.4495e-12,\n 1.3397e-12, 3.8170e-12, 3.5707e-12, 7.7399e-13, 1.2279e-28, 5.4281e-30,\n 2.2146e-27, 1.5571e-28, 2.4794e-27, 9.7447e-29, 6.3920e-29, 1.3541e-27,\n 8.8735e-28, 1.2916e-27, 3.8992e-29, 4.8296e-29, 1.3545e-27, 1.2173e-27,\n 6.7107e-29, 1.4787e-28, 5.4476e-28, 3.4706e-31, 3.4437e-27, 5.4508e-28,\n 1.7937e-28, 1.2252e-27, 5.4490e-28, 5.5938e-28, 4.0901e-28, 1.3064e-28,\n 3.2096e-28, 3.6733e-29, 2.7755e-27, 5.7045e-28, 4.9553e-28, 4.4134e-28,\n 2.3889e-27, 7.2364e-28, 7.8060e-29, 2.7661e-28, 4.3956e-28, 6.2721e-30,\n 6.0376e-29, 1.8939e-29, 1.3039e-30, 5.8274e-29, 2.8857e-28, 4.1426e-30,\n 4.7328e-28, 1.3914e-28, 7.3526e-28, 3.7991e-30, 6.0136e-28, 5.1804e-30,\n 1.5853e-28, 1.8108e-32, 2.2263e-28, 1.0598e-27, 3.0751e-28, 5.9020e-29,\n 2.3024e-27, 2.9620e-28, 4.2783e-27, 3.4426e-28, 9.8902e-30, 5.5013e-29,\n 4.9942e-28, 2.9611e-28, 7.6291e-29, 3.9760e-28, 1.4552e-27, 1.9068e-27,\n 1.0155e-27, 4.1728e-29, 3.5528e-27, 2.5897e-27, 2.7216e-27, 4.5425e-27,\n 9.3976e-27, 2.9550e-28, 5.8564e-27, 1.2455e-28, 4.3133e-28, 8.6016e-29,\n 1.2676e-27, 5.0211e-27, 3.0000e-27, 1.9547e-27, 5.2008e-28, 9.4065e-28,\n 4.5777e-27, 1.0295e-26, 5.7657e-28, 1.5792e-27, 9.0902e-28, 4.7601e-27,\n 4.9945e-28, 2.9185e-29, 3.3120e-28, 3.1620e-28, 2.9406e-27, 9.9974e-30,\n 5.9955e-28, 4.3356e-30, 3.4088e-28, 1.6096e-27, 2.3664e-27, 2.0281e-28,\n 5.3500e-29, 2.3370e-28, 4.0437e-28, 2.3191e-29, 1.4244e-29, 6.6585e-29,\n 7.6972e-29, 1.3162e-29, 3.0464e-28, 3.8078e-28, 5.3862e-28, 4.8851e-28,\n 3.4742e-28, 4.5877e-30, 1.5496e-31, 8.9275e-30, 6.3231e-30, 6.5683e-28,\n 9.1070e-28, 6.6657e-28, 3.1473e-28, 8.5394e-28, 4.2325e-30, 1.4310e-27,\n 1.5236e-29, 1.7500e-28, 7.1282e-29, 2.2714e-27, 9.0791e-28, 1.4196e-27,\n 1.1810e-27, 2.5015e-29, 2.2250e-29, 2.6264e-28, 5.2583e-28, 1.6275e-27,\n 1.7203e-28, 8.1616e-29, 4.6830e-29, 7.0932e-30, 2.6699e-27, 2.5112e-28,\n 7.5474e-28, 5.6867e-29, 8.9415e-29, 5.7835e-27, 9.8314e-28, 1.1179e-29,\n 7.9791e-28, 9.4715e-28, 2.7491e-27, 1.6342e-27, 2.0932e-27, 1.0013e-27,\n 1.5111e-29, 1.1784e-26, 2.6698e-27, 3.2743e-29, 3.2294e-27, 8.2041e-28,\n 7.1402e-28, 4.2047e-29, 4.8475e-28, 2.3107e-28, 2.4069e-28, 9.4603e-28,\n 7.5661e-28, 9.0036e-32, 7.0199e-28, 7.0955e-29, 1.0924e-29, 1.6696e-28,\n 1.7096e-27, 1.2098e-27, 6.2551e-30, 9.5288e-28, 2.7661e-28, 1.9867e-28,\n 3.3293e-27, 3.3345e-27, 3.5294e-28, 1.8138e-29, 3.4073e-27, 3.1570e-27,\n 1.4721e-27, 1.8723e-28, 1.9106e-28, 7.5879e-28, 8.8983e-28, 9.9653e-28,\n 4.9038e-27, 3.4517e-28, 1.0812e-28, 1.7071e-27, 3.2952e-28, 1.6469e-27,\n 2.5991e-28, 2.8173e-28, 2.6719e-28, 1.2727e-27, 2.4833e-27, 1.7908e-27,\n 1.1929e-29, 4.5231e-29, 5.8124e-28, 2.4015e-31, 1.7628e-28, 1.1060e-27,\n 1.1805e-28, 3.4253e-28, 5.0537e-28, 1.0087e-27, 3.1515e-28, 5.0143e-28,\n 1.0378e-27, 5.6795e-28, 4.9043e-29, 1.1371e-28, 3.6224e-29, 6.5833e-29,\n 4.4744e-28, 2.9672e-28, 8.3693e-28, 2.8744e-27, 1.8124e-27, 1.2437e-27,\n 8.9012e-28, 1.0631e-27, 3.8396e-29, 2.2022e-27, 6.6647e-29, 1.0043e-28,\n 4.7376e-28, 2.7471e-28, 5.0053e-28, 1.0111e-28, 7.7574e-29, 1.5571e-32,\n 4.6990e-29, 3.8884e-29, 7.2331e-28, 1.2859e-27, 1.0003e-28, 8.6424e-28,\n 3.8025e-28, 1.1365e-27, 9.3906e-30, 3.1913e-28, 9.0955e-29, 1.3464e-27,\n 2.0880e-28, 1.2040e-28, 2.4986e-09, 3.9114e-08, 6.8851e-09, 6.5659e-09,\n 5.1950e-08, 4.6159e-08, 6.2966e-10, 3.3561e-10, 8.4139e-10, 8.4212e-09,\n 1.1154e-11, 2.2451e-09, 4.0754e-08, 1.3709e-09, 2.2557e-09, 1.0328e-08,\n 1.5163e-08, 3.1146e-09, 2.7762e-08, 1.0475e-08, 1.1618e-11, 3.2626e-11,\n 5.6344e-09, 3.7119e-08, 3.1271e-08, 1.0894e-10, 1.4086e-08, 7.5759e-10,\n 3.9633e-09, 8.0407e-10, 1.7528e-08, 1.4636e-08, 5.6730e-08, 6.0895e-10,\n 1.1606e-08, 1.6661e-08, 9.8282e-09, 9.9473e-09, 8.7729e-09, 3.4675e-10,\n 5.3726e-08, 1.7594e-10, 8.6912e-10, 1.5302e-09, 2.6018e-08, 2.2249e-09,\n 4.8656e-09, 2.0304e-09, 3.9053e-10, 3.7186e-09, 7.4541e-09, 1.3315e-09,\n 2.1742e-08, 2.0520e-08, 2.2115e-09, 1.5498e-09, 2.3981e-09, 1.7381e-10,\n 1.4041e-08, 5.3266e-09, 1.1354e-08, 1.1237e-08, 1.1831e-09, 3.0636e-10,\n 6.3250e-13, 3.0836e-11, 9.5246e-11, 2.8485e-10, 4.1080e-09, 2.9276e-10,\n 3.6582e-09, 9.5400e-09, 1.3730e-08, 2.1885e-09, 4.9595e-09, 6.2621e-11,\n 2.7969e-09, 8.2821e-08, 4.3456e-10, 4.1751e-09, 5.6054e-09, 3.8335e-08,\n 1.4070e-11, 1.0873e-08, 3.1329e-09, 1.5132e-08, 9.6606e-09, 1.1218e-08,\n 9.1663e-13, 3.5782e-08, 2.4054e-08, 2.1898e-11, 1.0773e-09, 2.5912e-08,\n 1.2036e-09, 5.0268e-09, 7.1047e-11, 1.7855e-08, 3.4377e-09, 1.6709e-08,\n 8.7089e-09, 3.0200e-08, 3.8035e-10, 7.3023e-09, 1.4972e-08, 5.4558e-08,\n 2.4126e-08, 4.2581e-11, 9.1474e-10, 9.1602e-09, 5.7255e-09, 1.0573e-09,\n 1.6131e-09, 1.9070e-09, 5.6556e-09, 8.4113e-10, 7.1263e-09, 1.4143e-10,\n 2.1889e-08, 2.7665e-10, 2.1447e-08, 2.4569e-08, 7.5702e-09, 2.2309e-10,\n 2.5872e-09, 3.0288e-10, 7.0827e-09, 1.6787e-08, 1.6762e-09, 4.3483e-10,\n 1.7453e-08, 8.6602e-10, 1.0193e-08, 5.7436e-09, 2.9590e-09, 2.0153e-11,\n 4.1118e-10, 4.3550e-08, 5.6122e-10, 3.0214e-08, 5.2171e-12, 1.4955e-08,\n 5.6666e-09, 5.6606e-09, 1.6707e-09, 2.0355e-08, 7.6573e-09, 2.9739e-09,\n 1.2344e-08, 3.8526e-11, 2.8055e-08, 1.7671e-09, 2.1757e-10, 2.9173e-09,\n 5.7679e-09, 2.2622e-08, 4.3595e-09, 1.0362e-08, 1.6073e-10, 2.6063e-08,\n 7.6202e-09, 7.7877e-09, 6.1113e-09, 4.7795e-08, 4.3530e-11, 1.3522e-09,\n 4.1473e-08, 3.2369e-09, 3.8239e-09, 1.9893e-10, 6.6263e-09, 1.7144e-08,\n 1.4897e-08, 2.2655e-08, 5.4972e-10, 6.7426e-09, 3.9347e-09, 3.5210e-09,\n 3.5985e-09, 2.2669e-09, 5.7359e-09, 1.4390e-08, 1.1052e-09, 2.4539e-08,\n 2.7404e-08, 6.6588e-09, 6.5398e-09, 6.2245e-09, 4.5019e-11, 1.3320e-09,\n 1.3565e-09, 2.9256e-09, 1.3235e-08, 1.6763e-09, 7.2938e-10, 1.0089e-08,\n 2.7841e-12, 2.4527e-08, 2.0740e-10, 2.6874e-08, 1.1433e-08, 3.2139e-08,\n 8.2959e-10, 5.4973e-09, 2.1587e-08, 8.1227e-09, 7.8862e-10, 2.0835e-08,\n 1.5874e-09, 1.0099e-08, 5.1302e-11, 1.5560e-09, 4.5448e-08, 9.9150e-09,\n 4.5614e-08, 5.1323e-09, 7.3280e-09, 1.8151e-11, 2.5346e-09, 6.3917e-09,\n 5.8774e-11, 4.3025e-10, 5.5214e-09, 1.7679e-09, 2.5401e-09, 1.4469e-09,\n 1.6393e-08, 1.6421e-09, 4.1467e-10, 1.3688e-10, 4.5325e-11, 7.8938e-11,\n 9.8910e-10, 1.3557e-08, 9.4581e-10, 1.9339e-09, 2.4170e-10, 2.3081e-08,\n 1.1318e-08, 2.1947e-08, 4.9937e-08, 1.0343e-09, 2.7029e-08, 1.1031e-08,\n 2.4495e-09, 9.1746e-08, 1.9799e-09, 9.4413e-09, 3.1655e-09, 2.2715e-09,\n 1.1817e-09, 1.0151e-08, 5.4682e-09, 2.6411e-10, 1.3679e-09, 2.3504e-08],\n device='cuda:0')" + "exp_avg_sq": "tensor([5.4132e-12, 1.7397e-13, 1.5531e-12, 1.0220e-13, 6.8432e-13, 1.0117e-12,\n 1.4939e-13, 1.2923e-13, 6.2956e-15, 1.0547e-12, 8.5550e-13, 1.4796e-12,\n 3.6750e-14, 3.1111e-14, 3.7853e-13, 2.4827e-13, 8.1897e-14, 3.2414e-12,\n 1.1062e-12, 1.4168e-14, 5.5779e-14, 4.5263e-13, 1.0451e-12, 4.0314e-14,\n 1.9525e-12, 2.2899e-13, 3.2423e-13, 3.3060e-14, 7.5353e-14, 1.5414e-13,\n 1.5384e-13, 2.5588e-12, 3.7081e-13, 1.9849e-14, 7.4876e-16, 7.6500e-13,\n 2.7223e-13, 1.0967e-13, 3.2793e-15, 1.4622e-16, 4.4427e-13, 5.5595e-14,\n 2.7147e-13, 1.2818e-13, 2.1928e-14, 2.4963e-13, 3.1609e-13, 3.2070e-13,\n 1.6933e-13, 7.1953e-13, 2.7923e-13, 7.9419e-14, 1.0363e-13, 6.1026e-14,\n 2.2892e-15, 3.1126e-15, 7.1239e-14, 5.7180e-15, 2.2921e-13, 9.7482e-15,\n 8.0954e-13, 2.6999e-13, 6.3713e-16, 4.9117e-13, 9.4990e-13, 3.2353e-13,\n 4.8509e-13, 9.6050e-14, 1.2301e-14, 7.5826e-15, 4.9701e-13, 5.2501e-14,\n 4.1775e-15, 2.0525e-13, 1.1959e-13, 3.2236e-13, 2.3710e-13, 6.1403e-14,\n 1.0169e-18, 2.2267e-13, 1.6958e-13, 1.4926e-12, 2.2841e-14, 2.1102e-13,\n 2.8707e-14, 1.4484e-13, 8.4433e-14, 2.3364e-16, 1.4367e-13, 1.8646e-14,\n 6.6693e-16, 1.0821e-13, 9.6760e-14, 2.7575e-12, 9.0854e-13, 6.3181e-13,\n 6.8834e-14, 3.2481e-12, 2.2436e-13, 4.1608e-13, 3.8672e-14, 4.4486e-14,\n 2.1910e-14, 2.3796e-13, 2.8925e-15, 8.2985e-13, 1.3627e-12, 1.5147e-13,\n 1.7199e-13, 2.5857e-14, 2.6156e-13, 2.9702e-12, 2.2469e-14, 3.4028e-12,\n 4.7200e-13, 2.1027e-12, 2.0634e-19, 1.9288e-12, 1.4050e-12, 6.9537e-13,\n 2.6612e-12, 1.1906e-14, 1.3893e-12, 5.2388e-15, 7.5825e-13, 1.2630e-12,\n 1.1446e-12, 8.2902e-13, 1.5303e-12, 2.3157e-12, 2.6632e-13, 5.1942e-13,\n 1.5643e-14, 8.9259e-13, 2.5644e-13, 2.3456e-13, 5.4234e-13, 7.3434e-13,\n 6.1700e-16, 1.4327e-13, 1.8013e-12, 6.6288e-15, 3.7886e-13, 1.3978e-13,\n 8.7503e-13, 3.6400e-12, 7.2377e-13, 9.2955e-15, 2.2956e-12, 3.4813e-14,\n 7.4039e-14, 6.0147e-13, 2.4974e-12, 2.0527e-13, 8.8194e-13, 1.3231e-12,\n 9.7432e-15, 2.4570e-15, 9.0065e-14, 4.4279e-13, 4.2243e-13, 1.1716e-13,\n 1.7636e-12, 9.1224e-13, 1.0158e-13, 9.8018e-16, 1.9574e-14, 2.7731e-15,\n 5.5374e-14, 2.4936e-13, 5.0422e-14, 3.0588e-14, 2.1117e-13, 2.5581e-13,\n 2.3285e-14, 6.5329e-15, 2.4479e-13, 5.6516e-14, 3.5099e-14, 5.0023e-15,\n 4.0785e-13, 1.1863e-15, 8.8212e-14, 3.8081e-13, 1.0841e-13, 5.0552e-14,\n 3.7777e-13, 1.2682e-13, 1.2726e-12, 4.1548e-14, 3.5486e-13, 2.9250e-14,\n 3.2142e-13, 3.1917e-12, 4.5048e-13, 9.5994e-13, 4.9102e-13, 1.4353e-12,\n 1.6416e-16, 1.7430e-14, 8.3488e-14, 1.2396e-13, 2.1370e-13, 1.7604e-12,\n 4.4196e-14, 1.3233e-13, 9.8956e-15, 1.4904e-13, 1.7168e-14, 3.9867e-13,\n 6.8222e-17, 2.6808e-13, 1.8113e-14, 1.9746e-13, 1.8168e-12, 9.2099e-13,\n 1.3530e-12, 3.6590e-13, 4.6303e-13, 1.4405e-12, 3.6581e-13, 6.9853e-15,\n 1.8520e-13, 1.6474e-12, 1.0538e-14, 2.1749e-13, 1.3170e-12, 4.6824e-13,\n 4.1306e-13, 3.9962e-13, 3.4587e-13, 1.0298e-14, 1.5190e-13, 8.0241e-14,\n 3.7225e-13, 1.0217e-13, 2.5727e-15, 2.3601e-14, 9.5305e-14, 1.0037e-14,\n 9.2326e-13, 2.5465e-13, 3.8985e-14, 3.0861e-13, 9.4522e-15, 3.5294e-13,\n 9.4798e-13, 1.2164e-13, 4.0304e-13, 1.2564e-13, 9.5848e-14, 9.8571e-13,\n 3.8284e-13, 1.0907e-12, 1.0203e-12, 2.2117e-13, 3.5089e-29, 1.5511e-30,\n 6.3283e-28, 4.4494e-29, 7.0852e-28, 2.7846e-29, 1.8266e-29, 3.8694e-28,\n 2.5357e-28, 3.6908e-28, 1.1142e-29, 1.3801e-29, 3.8705e-28, 3.4785e-28,\n 1.9176e-29, 4.2256e-29, 1.5567e-28, 9.9175e-32, 9.8407e-28, 1.5576e-28,\n 5.1256e-29, 3.5011e-28, 1.5571e-28, 1.5985e-28, 1.1688e-28, 3.7331e-29,\n 9.1718e-29, 1.0497e-29, 7.9312e-28, 1.6301e-28, 1.4160e-28, 1.2612e-28,\n 6.8264e-28, 2.0679e-28, 2.2306e-29, 7.9042e-29, 1.2561e-28, 1.7923e-30,\n 1.7253e-29, 5.4119e-30, 3.7259e-31, 1.6652e-29, 8.2462e-29, 1.1838e-30,\n 1.3524e-28, 3.9761e-29, 2.1011e-28, 1.0856e-30, 1.7184e-28, 1.4803e-30,\n 4.5301e-29, 5.1744e-33, 6.3617e-29, 3.0284e-28, 8.7874e-29, 1.6866e-29,\n 6.5793e-28, 8.4642e-29, 1.2226e-27, 9.8374e-29, 2.8262e-30, 1.5720e-29,\n 1.4271e-28, 8.4614e-29, 2.1801e-29, 1.1362e-28, 4.1582e-28, 5.4487e-28,\n 2.9017e-28, 1.1924e-29, 1.0152e-27, 7.4004e-28, 7.7771e-28, 1.2980e-27,\n 2.6854e-27, 8.4441e-29, 1.6735e-27, 3.5590e-29, 1.2326e-28, 2.4580e-29,\n 3.6223e-28, 1.4348e-27, 8.5728e-28, 5.5858e-28, 1.4862e-28, 2.6880e-28,\n 1.3081e-27, 2.9420e-27, 1.6476e-28, 4.5126e-28, 2.5976e-28, 1.3602e-27,\n 1.4272e-28, 8.3397e-30, 9.4642e-29, 9.0357e-29, 8.4031e-28, 2.8568e-30,\n 1.7133e-28, 1.2389e-30, 9.7410e-29, 4.5995e-28, 6.7622e-28, 5.7953e-29,\n 1.5288e-29, 6.6781e-29, 1.1555e-28, 6.6269e-30, 4.0705e-30, 1.9027e-29,\n 2.1995e-29, 3.7612e-30, 8.7054e-29, 1.0881e-28, 1.5392e-28, 1.3960e-28,\n 9.9277e-29, 1.3110e-30, 4.4281e-32, 2.5511e-30, 1.8069e-30, 1.8770e-28,\n 2.6024e-28, 1.9048e-28, 8.9937e-29, 2.4402e-28, 1.2095e-30, 4.0893e-28,\n 4.3537e-30, 5.0007e-29, 2.0369e-29, 6.4908e-28, 2.5944e-28, 4.0565e-28,\n 3.3748e-28, 7.1481e-30, 6.3580e-30, 7.5053e-29, 1.5026e-28, 4.6507e-28,\n 4.9160e-29, 2.3322e-29, 1.3382e-29, 2.0269e-30, 7.6294e-28, 7.1760e-29,\n 2.1567e-28, 1.6250e-29, 2.5551e-29, 1.6527e-27, 2.8094e-28, 3.1944e-30,\n 2.2801e-28, 2.7066e-28, 7.8557e-28, 4.6698e-28, 5.9814e-28, 2.8614e-28,\n 4.3182e-30, 3.3673e-27, 7.6292e-28, 9.3566e-30, 9.2283e-28, 2.3444e-28,\n 2.0404e-28, 1.2015e-29, 1.3852e-28, 6.6030e-29, 6.8780e-29, 2.7034e-28,\n 2.1621e-28, 2.5729e-32, 2.0060e-28, 2.0276e-29, 3.1217e-30, 4.7710e-29,\n 4.8852e-28, 3.4571e-28, 1.7875e-30, 2.7229e-28, 7.9044e-29, 5.6772e-29,\n 9.5136e-28, 9.5287e-28, 1.0085e-28, 5.1830e-30, 9.7366e-28, 9.0213e-28,\n 4.2066e-28, 5.3503e-29, 5.4597e-29, 2.1683e-28, 2.5428e-28, 2.8477e-28,\n 1.4013e-27, 9.8635e-29, 3.0897e-29, 4.8783e-28, 9.4163e-29, 4.7062e-28,\n 7.4272e-29, 8.0507e-29, 7.6351e-29, 3.6369e-28, 7.0961e-28, 5.1172e-28,\n 3.4087e-30, 1.2925e-29, 1.6609e-28, 6.8624e-32, 5.0372e-29, 3.1605e-28,\n 3.3733e-29, 9.7882e-29, 1.4441e-28, 2.8824e-28, 9.0057e-29, 1.4329e-28,\n 2.9657e-28, 1.6230e-28, 1.4014e-29, 3.2494e-29, 1.0351e-29, 1.8812e-29,\n 1.2786e-28, 8.4791e-29, 2.3916e-28, 8.2138e-28, 5.1789e-28, 3.5540e-28,\n 2.5436e-28, 3.0378e-28, 1.0972e-29, 6.2930e-28, 1.9045e-29, 2.8699e-29,\n 1.3538e-28, 7.8502e-29, 1.4303e-28, 2.8894e-29, 2.2167e-29, 4.4495e-33,\n 1.3428e-29, 1.1111e-29, 2.0669e-28, 3.6744e-28, 2.8585e-29, 2.4696e-28,\n 1.0866e-28, 3.2475e-28, 2.6835e-30, 9.1194e-29, 2.5991e-29, 3.8474e-28,\n 5.9666e-29, 3.4405e-29, 7.1399e-10, 1.1177e-08, 1.9675e-09, 1.8763e-09,\n 1.4845e-08, 1.3190e-08, 1.7993e-10, 9.5904e-11, 2.4043e-10, 2.4064e-09,\n 3.1874e-12, 6.4155e-10, 1.1646e-08, 3.9173e-10, 6.4458e-10, 2.9514e-09,\n 4.3329e-09, 8.9002e-10, 7.9333e-09, 2.9932e-09, 3.3198e-12, 9.3232e-12,\n 1.6101e-09, 1.0607e-08, 8.9361e-09, 3.1132e-11, 4.0251e-09, 2.1649e-10,\n 1.1326e-09, 2.2977e-10, 5.0089e-09, 4.1825e-09, 1.6211e-08, 1.7401e-10,\n 3.3165e-09, 4.7609e-09, 2.8085e-09, 2.8425e-09, 2.5069e-09, 9.9086e-11,\n 1.5353e-08, 5.0276e-11, 2.4836e-10, 4.3728e-10, 7.4349e-09, 6.3579e-10,\n 1.3904e-09, 5.8019e-10, 1.1160e-10, 1.0626e-09, 2.1301e-09, 3.8048e-10,\n 6.2130e-09, 5.8638e-09, 6.3195e-10, 4.4286e-10, 6.8526e-10, 4.9667e-11,\n 4.0125e-09, 1.5221e-09, 3.2444e-09, 3.2109e-09, 3.3807e-10, 8.7544e-11,\n 1.8074e-13, 8.8117e-12, 2.7217e-11, 8.1398e-11, 1.1739e-09, 8.3657e-11,\n 1.0454e-09, 2.7261e-09, 3.9234e-09, 6.2538e-10, 1.4172e-09, 1.7894e-11,\n 7.9923e-10, 2.3667e-08, 1.2418e-10, 1.1931e-09, 1.6018e-09, 1.0955e-08,\n 4.0205e-12, 3.1071e-09, 8.9526e-10, 4.3239e-09, 2.7606e-09, 3.2057e-09,\n 2.6194e-13, 1.0225e-08, 6.8735e-09, 6.2574e-12, 3.0784e-10, 7.4047e-09,\n 3.4392e-10, 1.4365e-09, 2.0302e-11, 5.1021e-09, 9.8236e-10, 4.7747e-09,\n 2.4886e-09, 8.6299e-09, 1.0869e-10, 2.0867e-09, 4.2783e-09, 1.5590e-08,\n 6.8941e-09, 1.2168e-11, 2.6139e-10, 2.6176e-09, 1.6361e-09, 3.0214e-10,\n 4.6096e-10, 5.4494e-10, 1.6161e-09, 2.4036e-10, 2.0364e-09, 4.0415e-11,\n 6.2549e-09, 7.9055e-11, 6.1285e-09, 7.0209e-09, 2.1632e-09, 6.3750e-11,\n 7.3932e-10, 8.6549e-11, 2.0239e-09, 4.7969e-09, 4.7898e-10, 1.2426e-10,\n 4.9874e-09, 2.4747e-10, 2.9128e-09, 1.6413e-09, 8.4555e-10, 5.7589e-12,\n 1.1750e-10, 1.2445e-08, 1.6037e-10, 8.6340e-09, 1.4908e-12, 4.2736e-09,\n 1.6193e-09, 1.6175e-09, 4.7741e-10, 5.8166e-09, 2.1881e-09, 8.4980e-10,\n 3.5274e-09, 1.1009e-11, 8.0170e-09, 5.0497e-10, 6.2172e-11, 8.3364e-10,\n 1.6482e-09, 6.4645e-09, 1.2458e-09, 2.9611e-09, 4.5930e-11, 7.4478e-09,\n 2.1775e-09, 2.2254e-09, 1.7463e-09, 1.3658e-08, 1.2439e-11, 3.8641e-10,\n 1.1851e-08, 9.2497e-10, 1.0927e-09, 5.6846e-11, 1.8935e-09, 4.8989e-09,\n 4.2568e-09, 6.4737e-09, 1.5709e-10, 1.9267e-09, 1.1244e-09, 1.0062e-09,\n 1.0283e-09, 6.4778e-10, 1.6391e-09, 4.1121e-09, 3.1583e-10, 7.0122e-09,\n 7.8309e-09, 1.9028e-09, 1.8688e-09, 1.7787e-09, 1.2865e-11, 3.8062e-10,\n 3.8764e-10, 8.3601e-10, 3.7819e-09, 4.7902e-10, 2.0843e-10, 2.8830e-09,\n 7.9559e-13, 7.0089e-09, 5.9267e-11, 7.6795e-09, 3.2672e-09, 9.1839e-09,\n 2.3706e-10, 1.5709e-09, 6.1686e-09, 2.3211e-09, 2.2535e-10, 5.9539e-09,\n 4.5362e-10, 2.8859e-09, 1.4660e-11, 4.4464e-10, 1.2987e-08, 2.8333e-09,\n 1.3035e-08, 1.4666e-09, 2.0940e-09, 5.1867e-12, 7.2429e-10, 1.8265e-09,\n 1.6795e-11, 1.2295e-10, 1.5778e-09, 5.0519e-10, 7.2586e-10, 4.1345e-10,\n 4.6845e-09, 4.6924e-10, 1.1850e-10, 3.9115e-11, 1.2952e-11, 2.2557e-11,\n 2.8264e-10, 3.8740e-09, 2.7027e-10, 5.5263e-10, 6.9069e-11, 6.5955e-09,\n 3.2343e-09, 6.2715e-09, 1.4270e-08, 2.9555e-10, 7.7238e-09, 3.1521e-09,\n 6.9997e-10, 2.6217e-08, 5.6577e-10, 2.6979e-09, 9.0457e-10, 6.4911e-10,\n 3.3768e-10, 2.9007e-09, 1.5626e-09, 7.5473e-11, 3.9088e-10, 6.7166e-09],\n device='cuda:0')" }, "36": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.3694e-10, 4.6735e-13, 4.6736e-09, ..., 1.2103e-11, 1.2184e-10,\n 3.3681e-10],\n [1.1344e-09, 1.1823e-13, 6.6290e-09, ..., 1.8972e-11, 1.8005e-10,\n 4.7550e-10],\n [6.4060e-10, 8.1485e-14, 3.7466e-09, ..., 9.8993e-12, 1.0663e-10,\n 2.4227e-10],\n ...,\n [4.8473e-10, 6.9188e-14, 2.9398e-09, ..., 4.5469e-12, 7.3345e-11,\n 1.9500e-10],\n [6.3681e-11, 1.7031e-13, 3.5078e-10, ..., 1.2200e-12, 6.8951e-12,\n 2.6436e-11],\n [3.3694e-10, 4.0601e-13, 1.9772e-09, ..., 5.7542e-12, 4.6046e-11,\n 1.3530e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.3916e-10, 1.3355e-13, 1.3355e-09, ..., 3.4586e-12, 3.4818e-11,\n 9.6246e-11],\n [3.2417e-10, 3.3785e-14, 1.8943e-09, ..., 5.4215e-12, 5.1450e-11,\n 1.3588e-10],\n [1.8306e-10, 2.3285e-14, 1.0706e-09, ..., 2.8288e-12, 3.0470e-11,\n 6.9231e-11],\n ...,\n [1.3852e-10, 1.9771e-14, 8.4007e-10, ..., 1.2993e-12, 2.0959e-11,\n 5.5722e-11],\n [1.8197e-11, 4.8669e-14, 1.0024e-10, ..., 3.4863e-13, 1.9703e-12,\n 7.5544e-12],\n [9.6284e-11, 1.1602e-13, 5.6499e-10, ..., 1.6443e-12, 1.3158e-11,\n 3.8664e-11]], device='cuda:0')" }, "37": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.0278e-08, 4.2122e-08, 2.2526e-08, 1.1960e-07, 3.6297e-10, 3.4689e-09,\n 6.3526e-08, 1.1928e-07, 9.6481e-08, 4.8045e-08, 2.0824e-09, 1.0032e-07,\n 4.9560e-09, 2.4715e-10, 1.3045e-08, 1.2464e-10, 5.4488e-08, 1.0319e-07,\n 2.8858e-08, 6.1650e-09, 1.1236e-08, 8.2696e-09, 3.9366e-08, 5.0464e-10,\n 1.3117e-09, 1.1131e-08, 1.1477e-09, 1.6780e-08, 7.9520e-09, 2.4966e-08,\n 5.4590e-11, 1.3397e-08, 1.3556e-08, 4.7212e-09, 6.0783e-10, 2.6578e-08,\n 9.1005e-10, 1.5282e-07, 2.5627e-08, 1.8239e-09, 5.7572e-08, 3.7503e-09,\n 1.1020e-08, 9.9646e-10, 2.0108e-08, 3.4486e-08, 4.3303e-10, 4.5049e-11,\n 2.8289e-08, 2.5703e-08, 3.6717e-08, 1.5303e-09, 1.5958e-08, 5.9812e-08,\n 7.6409e-10, 6.6833e-09, 6.0457e-08, 2.3985e-10, 2.7744e-09, 4.9809e-09,\n 2.0908e-08, 1.6365e-08, 3.3667e-09, 3.1989e-08, 2.0111e-10, 5.0423e-08,\n 6.8678e-09, 1.3130e-08, 1.4785e-08, 5.4138e-09, 2.5434e-09, 4.9828e-08,\n 6.1051e-08, 1.2791e-07, 8.2542e-10, 9.9865e-11, 5.0368e-09, 4.3005e-08,\n 6.5166e-10, 1.2368e-10, 1.0109e-08, 6.4100e-10, 2.5630e-08, 4.4027e-09,\n 4.8446e-09, 2.5042e-09, 4.8932e-09, 7.0352e-09, 7.2524e-09, 8.4610e-08,\n 7.3736e-11, 5.0303e-08, 2.0860e-08, 5.6048e-08, 1.3659e-07, 8.7062e-08,\n 8.8410e-09, 1.1919e-07, 4.2847e-09, 1.5065e-09, 5.3054e-08, 1.8238e-11,\n 2.3128e-08, 1.3973e-08, 1.3172e-09, 3.6967e-08, 2.2218e-11, 2.1771e-07,\n 4.5104e-08, 4.6792e-08, 1.6537e-10, 1.3535e-09, 4.1915e-09, 4.1814e-08,\n 1.3181e-08, 3.5137e-08, 7.4303e-11, 4.3430e-08, 5.4611e-08, 2.1893e-08,\n 7.3014e-08, 2.8844e-08, 2.2545e-08, 3.2309e-09, 1.5275e-07, 8.0100e-08,\n 2.0359e-08, 8.4137e-09, 1.7443e-08, 6.2083e-11, 1.8164e-07, 1.7653e-08,\n 2.2931e-10, 6.7719e-11, 1.1798e-07, 4.5510e-08, 6.8527e-09, 3.0112e-09,\n 2.7846e-10, 9.1177e-11, 7.5215e-08, 1.5644e-10, 1.1286e-07, 2.1960e-09,\n 2.5478e-09, 5.4009e-09, 4.4940e-08, 3.7695e-08, 1.0947e-08, 1.3183e-10,\n 2.1706e-09, 6.9260e-10, 8.2188e-11, 7.4310e-08, 6.5752e-09, 9.7488e-08,\n 2.3390e-09, 5.4649e-08, 7.3940e-09, 7.4058e-08, 1.9360e-08, 3.1512e-09,\n 3.3246e-08, 2.8923e-08, 4.0128e-08, 8.8438e-08, 1.8581e-07, 8.7791e-11,\n 3.4864e-10, 1.6908e-08, 9.7773e-09, 1.4886e-10, 5.9959e-08, 7.7176e-09,\n 8.6685e-08, 1.0810e-10, 4.0510e-08, 4.5204e-09, 1.9503e-08, 1.9573e-08,\n 2.0868e-07, 2.0778e-08, 1.3417e-08, 1.9946e-11, 3.9092e-09, 6.0593e-08,\n 2.4750e-10, 2.2137e-08, 5.6398e-08, 5.0250e-09, 5.7622e-08, 5.0104e-09,\n 1.4526e-08, 1.3296e-08, 8.7857e-08, 4.7014e-08, 5.5317e-08, 4.0607e-10,\n 1.7622e-08, 1.1286e-07, 1.2067e-10, 1.0468e-08, 5.3961e-09, 7.9129e-08,\n 7.7309e-08, 1.1058e-08, 3.6575e-08, 7.5450e-09, 5.4045e-10, 2.0616e-10,\n 2.4483e-10, 1.2312e-08, 6.3438e-08, 5.6463e-08, 2.0375e-08, 7.0879e-08,\n 8.6092e-09, 4.2586e-08, 3.1023e-10, 1.1175e-09, 2.1339e-11, 6.0146e-09,\n 6.6716e-08, 4.9719e-09, 1.6767e-09, 3.4338e-08, 9.6663e-09, 3.0184e-08,\n 2.1465e-09, 4.2577e-08, 5.9586e-10, 2.3298e-08, 7.2417e-08, 3.0546e-10,\n 6.0301e-08, 1.3531e-09, 1.4318e-08, 5.3745e-11, 1.1369e-07, 2.2915e-09,\n 1.0421e-08, 2.3526e-09, 2.7524e-09, 1.6122e-08, 2.3726e-08, 1.6441e-08,\n 5.9434e-08, 2.2601e-08, 9.6215e-08, 9.2450e-09, 5.0041e-08, 1.0848e-12,\n 3.9442e-09, 1.8584e-08, 2.4022e-09, 1.2787e-08], device='cuda:0')" + "exp_avg_sq": "tensor([8.6521e-09, 1.2037e-08, 6.4371e-09, 3.4176e-08, 1.0372e-10, 9.9126e-10,\n 1.8153e-08, 3.4086e-08, 2.7570e-08, 1.3729e-08, 5.9507e-10, 2.8667e-08,\n 1.4162e-09, 7.0624e-11, 3.7278e-09, 3.5617e-11, 1.5570e-08, 2.9486e-08,\n 8.2463e-09, 1.7617e-09, 3.2107e-09, 2.3631e-09, 1.1249e-08, 1.4420e-10,\n 3.7484e-10, 3.1807e-09, 3.2796e-10, 4.7950e-09, 2.2723e-09, 7.1341e-09,\n 1.5600e-11, 3.8282e-09, 3.8737e-09, 1.3491e-09, 1.7369e-10, 7.5948e-09,\n 2.6005e-10, 4.3670e-08, 7.3232e-09, 5.2118e-10, 1.6452e-08, 1.0717e-09,\n 3.1490e-09, 2.8475e-10, 5.7460e-09, 9.8548e-09, 1.2374e-10, 1.2873e-11,\n 8.0839e-09, 7.3448e-09, 1.0492e-08, 4.3729e-10, 4.5600e-09, 1.7092e-08,\n 2.1834e-10, 1.9098e-09, 1.7276e-08, 6.8540e-11, 7.9279e-10, 1.4233e-09,\n 5.9747e-09, 4.6765e-09, 9.6207e-10, 9.1411e-09, 5.7467e-11, 1.4409e-08,\n 1.9625e-09, 3.7520e-09, 4.2249e-09, 1.5470e-09, 7.2679e-10, 1.4239e-08,\n 1.7446e-08, 3.6551e-08, 2.3587e-10, 2.8537e-11, 1.4393e-09, 1.2289e-08,\n 1.8622e-10, 3.5342e-11, 2.8888e-09, 1.8317e-10, 7.3241e-09, 1.2581e-09,\n 1.3844e-09, 7.1560e-10, 1.3983e-09, 2.0104e-09, 2.0724e-09, 2.4178e-08,\n 2.1071e-11, 1.4375e-08, 5.9609e-09, 1.6016e-08, 3.9033e-08, 2.4879e-08,\n 2.5264e-09, 3.4061e-08, 1.2244e-09, 4.3048e-10, 1.5161e-08, 5.2118e-12,\n 6.6090e-09, 3.9928e-09, 3.7640e-10, 1.0564e-08, 6.3490e-12, 6.2212e-08,\n 1.2889e-08, 1.3371e-08, 4.7254e-11, 3.8677e-10, 1.1978e-09, 1.1949e-08,\n 3.7665e-09, 1.0041e-08, 2.1233e-11, 1.2410e-08, 1.5605e-08, 6.2562e-09,\n 2.0864e-08, 8.2424e-09, 6.4423e-09, 9.2326e-10, 4.3649e-08, 2.2889e-08,\n 5.8177e-09, 2.4043e-09, 4.9843e-09, 1.7741e-11, 5.1904e-08, 5.0444e-09,\n 6.5528e-11, 1.9351e-11, 3.3714e-08, 1.3005e-08, 1.9582e-09, 8.6047e-10,\n 7.9572e-11, 2.6055e-11, 2.1493e-08, 4.4705e-11, 3.2250e-08, 6.2753e-10,\n 7.2805e-10, 1.5434e-09, 1.2842e-08, 1.0772e-08, 3.1283e-09, 3.7672e-11,\n 6.2025e-10, 1.9792e-10, 2.3486e-11, 2.1235e-08, 1.8789e-09, 2.7858e-08,\n 6.6839e-10, 1.5616e-08, 2.1129e-09, 2.1163e-08, 5.5321e-09, 9.0048e-10,\n 9.5002e-09, 8.2650e-09, 1.1467e-08, 2.5272e-08, 5.3098e-08, 2.5087e-11,\n 9.9628e-11, 4.8316e-09, 2.7939e-09, 4.2537e-11, 1.7134e-08, 2.2054e-09,\n 2.4771e-08, 3.0890e-11, 1.1576e-08, 1.2917e-09, 5.5733e-09, 5.5932e-09,\n 5.9631e-08, 5.9374e-09, 3.8339e-09, 5.6999e-12, 1.1171e-09, 1.7315e-08,\n 7.0726e-11, 6.3258e-09, 1.6116e-08, 1.4359e-09, 1.6466e-08, 1.4318e-09,\n 4.1509e-09, 3.7994e-09, 2.5106e-08, 1.3435e-08, 1.5807e-08, 1.1604e-10,\n 5.0356e-09, 3.2251e-08, 3.4481e-11, 2.9914e-09, 1.5420e-09, 2.2612e-08,\n 2.2092e-08, 3.1598e-09, 1.0452e-08, 2.1560e-09, 1.5444e-10, 5.8911e-11,\n 6.9963e-11, 3.5183e-09, 1.8128e-08, 1.6135e-08, 5.8224e-09, 2.0254e-08,\n 2.4601e-09, 1.2169e-08, 8.8650e-11, 3.1932e-10, 6.0976e-12, 1.7187e-09,\n 1.9065e-08, 1.4207e-09, 4.7914e-10, 9.8124e-09, 2.7622e-09, 8.6252e-09,\n 6.1337e-10, 1.2167e-08, 1.7027e-10, 6.6576e-09, 2.0694e-08, 8.7289e-11,\n 1.7231e-08, 3.8666e-10, 4.0916e-09, 1.5358e-11, 3.2489e-08, 6.5481e-10,\n 2.9780e-09, 6.7227e-10, 7.8653e-10, 4.6069e-09, 6.7799e-09, 4.6983e-09,\n 1.6984e-08, 6.4585e-09, 2.7494e-08, 2.6418e-09, 1.4300e-08, 3.0999e-13,\n 1.1271e-09, 5.3104e-09, 6.8644e-10, 3.6539e-09], device='cuda:0')" }, "38": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1184e-07, 5.6076e-08, 8.0267e-08, ..., 1.0224e-08, 6.2404e-08,\n 1.1523e-07],\n [2.1874e-08, 5.6856e-09, 8.5121e-09, ..., 1.1335e-09, 6.3006e-09,\n 1.1424e-08],\n [2.3828e-08, 6.1433e-09, 8.8690e-09, ..., 1.0915e-09, 7.0728e-09,\n 1.3213e-08],\n [2.4965e-08, 6.8926e-09, 9.3854e-09, ..., 1.1838e-09, 7.4531e-09,\n 1.3838e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.0536e-08, 1.6024e-08, 2.2937e-08, ..., 2.9215e-09, 1.7832e-08,\n 3.2929e-08],\n [6.2506e-09, 1.6247e-09, 2.4324e-09, ..., 3.2392e-10, 1.8005e-09,\n 3.2645e-09],\n [6.8091e-09, 1.7555e-09, 2.5344e-09, ..., 3.1189e-10, 2.0211e-09,\n 3.7756e-09],\n [7.1340e-09, 1.9696e-09, 2.6820e-09, ..., 3.3829e-10, 2.1298e-09,\n 3.9542e-09]], device='cuda:0')" }, "39": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.3123e-06, 5.4737e-07, 5.9227e-07, 6.3267e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.5180e-06, 1.5641e-07, 1.6925e-07, 1.8079e-07], device='cuda:0')" }, "40": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1184e-07, 5.6076e-08, 8.0267e-08, ..., 1.0224e-08, 6.2404e-08,\n 1.1523e-07],\n [2.1874e-08, 5.6856e-09, 8.5121e-09, ..., 1.1335e-09, 6.3006e-09,\n 1.1424e-08],\n [2.3828e-08, 6.1433e-09, 8.8690e-09, ..., 1.0915e-09, 7.0728e-09,\n 1.3213e-08],\n [2.4965e-08, 6.8926e-09, 9.3854e-09, ..., 1.1839e-09, 7.4531e-09,\n 1.3838e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.0536e-08, 1.6024e-08, 2.2937e-08, ..., 2.9215e-09, 1.7832e-08,\n 3.2929e-08],\n [6.2506e-09, 1.6247e-09, 2.4324e-09, ..., 3.2392e-10, 1.8005e-09,\n 3.2645e-09],\n [6.8091e-09, 1.7555e-09, 2.5344e-09, ..., 3.1189e-10, 2.0211e-09,\n 3.7756e-09],\n [7.1340e-09, 1.9696e-09, 2.6820e-09, ..., 3.3829e-10, 2.1298e-09,\n 3.9542e-09]], device='cuda:0')" }, "41": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.3123e-06, 5.4737e-07, 5.9227e-07, 6.3267e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.5180e-06, 1.5641e-07, 1.6925e-07, 1.8079e-07], device='cuda:0')" }, "42": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.1184e-07, 5.6076e-08, 8.0267e-08, ..., 1.0224e-08, 6.2404e-08,\n 1.1523e-07],\n [2.1874e-08, 5.6856e-09, 8.5121e-09, ..., 1.1335e-09, 6.3006e-09,\n 1.1424e-08],\n [2.3828e-08, 6.1433e-09, 8.8690e-09, ..., 1.0915e-09, 7.0728e-09,\n 1.3213e-08],\n [2.4965e-08, 6.8926e-09, 9.3854e-09, ..., 1.1838e-09, 7.4531e-09,\n 1.3838e-08]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.0536e-08, 1.6024e-08, 2.2937e-08, ..., 2.9215e-09, 1.7832e-08,\n 3.2929e-08],\n [6.2506e-09, 1.6247e-09, 2.4324e-09, ..., 3.2392e-10, 1.8005e-09,\n 3.2645e-09],\n [6.8091e-09, 1.7555e-09, 2.5344e-09, ..., 3.1189e-10, 2.0211e-09,\n 3.7756e-09],\n [7.1340e-09, 1.9696e-09, 2.6820e-09, ..., 3.3829e-10, 2.1298e-09,\n 3.9542e-09]], device='cuda:0')" }, "43": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.3123e-06, 5.4737e-07, 5.9227e-07, 6.3267e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.5180e-06, 1.5641e-07, 1.6925e-07, 1.8079e-07], device='cuda:0')" }, "8": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-4.4523e-06, 1.1523e-06, 0.0000e+00, ..., 3.5463e-07,\n -1.4896e-06, 6.8977e-07],\n [-2.0151e-07, 1.6960e-06, 0.0000e+00, ..., -2.3096e-07,\n -6.8775e-07, -3.3926e-07],\n [ 1.2007e-06, 7.4930e-07, 0.0000e+00, ..., -7.8574e-09,\n -5.8041e-08, 1.2709e-06],\n ...,\n [ 6.1018e-07, 1.3851e-06, 0.0000e+00, ..., -1.8327e-07,\n -1.4359e-06, 1.8392e-09],\n [-5.3767e-07, 9.5174e-07, 0.0000e+00, ..., 1.5572e-07,\n -1.4112e-06, 1.0046e-07],\n [-1.2861e-07, 1.6370e-07, 0.0000e+00, ..., -1.8408e-07,\n 2.3764e-07, 3.4351e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.2666e-11, 2.4534e-11, 0.0000e+00, ..., 1.0180e-10, 1.3348e-10,\n 9.1413e-11],\n [5.9084e-11, 7.7246e-11, 0.0000e+00, ..., 8.7205e-11, 8.4473e-10,\n 1.9648e-11],\n [1.9848e-10, 2.7334e-11, 0.0000e+00, ..., 5.7313e-11, 3.1346e-10,\n 1.1452e-10],\n ...,\n [9.5413e-11, 5.8716e-11, 0.0000e+00, ..., 3.6664e-11, 1.8609e-10,\n 2.3802e-11],\n [1.1595e-10, 1.2614e-10, 0.0000e+00, ..., 1.6966e-10, 2.0447e-10,\n 7.5163e-12],\n [6.5990e-11, 2.3642e-11, 0.0000e+00, ..., 4.4316e-11, 3.2325e-11,\n 3.8416e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-4.8903e-07, -1.4045e-07, 0.0000e+00, ..., -7.3340e-07,\n 1.4998e-05, 2.1285e-07],\n [-1.9784e-08, 1.2568e-06, 0.0000e+00, ..., -1.4949e-07,\n -3.2219e-06, -4.5879e-07],\n [ 1.6308e-07, 5.0047e-07, 5.6052e-45, ..., -1.2668e-06,\n -2.1327e-06, -3.3276e-07],\n ...,\n [ 3.7087e-07, 2.1190e-06, 0.0000e+00, ..., 2.7206e-06,\n 3.8326e-06, -1.6441e-08],\n [-1.9052e-07, 3.0003e-07, 4.2088e-11, ..., 5.3748e-07,\n 9.0616e-06, 2.4403e-07],\n [ 1.0096e-07, 3.6192e-08, 0.0000e+00, ..., -4.0276e-08,\n -8.6625e-08, 8.4811e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.7180e-11, 9.6704e-12, 0.0000e+00, ..., 6.1756e-11, 8.3537e-11,\n 7.5953e-11],\n [3.4743e-11, 5.2152e-11, 0.0000e+00, ..., 3.9450e-11, 6.6994e-10,\n 2.2686e-11],\n [1.5295e-10, 1.7517e-11, 9.5494e-17, ..., 3.3178e-11, 2.2306e-10,\n 4.8251e-11],\n ...,\n [6.5790e-11, 6.3602e-11, 0.0000e+00, ..., 2.4873e-11, 2.1711e-10,\n 9.2870e-12],\n [7.1215e-11, 7.2712e-11, 3.3815e-18, ..., 1.1008e-10, 1.3688e-10,\n 1.2221e-11],\n [8.9506e-11, 3.5121e-11, 0.0000e+00, ..., 3.3409e-11, 1.1314e-11,\n 1.1286e-11]], device='cuda:0')" }, "9": { - "step": "tensor(2504.)", - "exp_avg": "tensor([-1.9333e-06, -4.2817e-06, 1.0080e-05, ..., -4.0209e-06,\n -9.4793e-06, 1.8024e-06], device='cuda:0')", - "exp_avg_sq": "tensor([8.8601e-09, 2.0776e-08, 1.4356e-08, ..., 1.3059e-08, 1.3017e-08,\n 4.9354e-09], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 2.4792e-05, 1.3956e-05, -4.1845e-06, ..., 4.0973e-05,\n -1.0948e-05, 3.6630e-08], device='cuda:0')", + "exp_avg_sq": "tensor([6.9141e-09, 1.2114e-08, 8.5247e-09, ..., 9.6235e-09, 8.3769e-09,\n 5.0240e-09], device='cuda:0')" }, "10": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-6.4523e-07, 3.1645e-07, -4.4169e-07, ..., 3.6505e-08,\n 5.2030e-07, 5.2288e-10],\n [-7.5371e-08, 5.2285e-07, 1.9666e-07, ..., -1.6403e-06,\n -6.7216e-07, 3.5034e-08],\n [-7.4451e-07, -8.2593e-07, -4.5041e-07, ..., -3.8111e-07,\n -2.1297e-07, -4.1838e-07],\n ...,\n [-1.9955e-06, -1.9980e-08, 6.7451e-07, ..., 6.9857e-07,\n 1.1548e-06, 2.4766e-07],\n [ 1.5569e-06, 5.5988e-07, 2.6527e-07, ..., 1.7804e-06,\n -7.9711e-08, 5.6187e-07],\n [ 2.0127e-07, 1.6426e-06, -8.8116e-07, ..., 1.2192e-06,\n -3.6353e-07, 4.3646e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.2961e-11, 2.8509e-11, 2.0315e-11, ..., 9.4200e-12, 1.9704e-11,\n 3.5367e-11],\n [1.0941e-11, 5.4378e-11, 2.0592e-11, ..., 2.0660e-11, 1.7938e-11,\n 1.1181e-11],\n [1.6481e-11, 4.8725e-11, 2.3653e-11, ..., 2.5206e-11, 2.5710e-11,\n 2.2670e-11],\n ...,\n [3.8730e-11, 5.8155e-11, 2.8541e-11, ..., 2.6765e-11, 2.1327e-11,\n 7.1015e-11],\n [2.3826e-11, 4.7132e-11, 2.2704e-11, ..., 2.4364e-11, 1.8158e-11,\n 1.4312e-11],\n [1.7663e-11, 4.7892e-11, 2.9680e-11, ..., 4.1836e-11, 2.4641e-11,\n 1.7300e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-7.2394e-07, 9.9176e-07, 3.3189e-07, ..., 1.7271e-07,\n -1.6048e-07, 1.9659e-07],\n [ 2.8910e-07, 2.1317e-06, 3.0884e-07, ..., -6.8876e-07,\n -9.0864e-08, 3.8183e-07],\n [ 5.6754e-07, -1.7926e-06, 1.3411e-06, ..., -3.3674e-07,\n 2.2415e-07, -2.7177e-07],\n ...,\n [-8.5051e-07, 3.0009e-06, -1.7829e-07, ..., -7.6412e-07,\n 5.3012e-07, -1.4112e-07],\n [ 5.1514e-07, 4.0788e-07, -6.8600e-07, ..., -5.6059e-07,\n -1.2135e-07, -6.5150e-08],\n [-1.0949e-06, 3.6378e-07, -7.0776e-08, ..., 6.4191e-08,\n 5.4157e-07, -3.2797e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.4067e-12, 1.2408e-11, 1.0855e-11, ..., 5.0959e-12, 8.8449e-12,\n 1.4629e-11],\n [6.8501e-12, 2.5491e-11, 1.0946e-11, ..., 1.3110e-11, 9.4138e-12,\n 8.2610e-12],\n [1.0422e-11, 2.5428e-11, 1.3102e-11, ..., 1.3734e-11, 1.3396e-11,\n 1.5140e-11],\n ...,\n [1.7972e-11, 3.0262e-11, 1.4742e-11, ..., 1.4682e-11, 1.1497e-11,\n 3.0129e-11],\n [1.2878e-11, 2.3289e-11, 1.1450e-11, ..., 1.6605e-11, 9.6001e-12,\n 9.2498e-12],\n [8.9619e-12, 2.5655e-11, 1.4756e-11, ..., 2.5977e-11, 1.3260e-11,\n 9.4459e-12]], device='cuda:0')" }, "11": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-9.5452e-07, -1.3101e-06, 0.0000e+00, ..., -3.3857e-06,\n 5.9020e-06, -1.4618e-07],\n [ 3.7597e-06, -3.0803e-06, 6.4780e-21, ..., -9.9085e-07,\n 5.6761e-07, 1.9089e-06],\n [-1.7598e-06, 1.0675e-07, 0.0000e+00, ..., 2.9983e-07,\n 7.6658e-07, -2.6469e-08],\n ...,\n [-1.4907e-05, -2.7848e-08, 5.6052e-45, ..., -1.9234e-07,\n 4.3957e-07, -3.8273e-07],\n [-2.3764e-06, 3.1821e-07, 0.0000e+00, ..., 1.0096e-06,\n -9.1933e-07, -3.8665e-08],\n [ 3.4229e-07, 1.3795e-06, -2.6979e-20, ..., -3.6340e-06,\n 3.2684e-07, 1.6937e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3583e-10, 2.2402e-10, 0.0000e+00, ..., 1.3020e-09, 5.5486e-10,\n 1.4219e-10],\n [4.4909e-10, 3.4460e-11, 1.0823e-16, ..., 6.0535e-11, 2.2124e-10,\n 2.3886e-10],\n [1.6617e-10, 8.5684e-11, 0.0000e+00, ..., 4.4212e-11, 2.7147e-10,\n 8.9163e-12],\n ...,\n [1.2742e-10, 7.2834e-12, 5.0207e-17, ..., 1.2851e-11, 6.7411e-11,\n 1.8696e-11],\n [1.6373e-10, 4.2294e-11, 0.0000e+00, ..., 4.6947e-10, 1.2421e-09,\n 2.3724e-11],\n [3.5465e-11, 1.3497e-10, 8.1546e-15, ..., 7.5636e-10, 1.3995e-10,\n 1.6871e-11]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 1.2778e-06, 1.0016e-06, 1.4296e-09, ..., 1.3046e-06,\n 3.0553e-06, 5.1079e-06],\n [ 8.0536e-07, -5.7379e-08, 6.2426e-23, ..., 1.0864e-07,\n -5.8014e-07, 2.4104e-06],\n [ 4.7670e-07, 6.8543e-09, 0.0000e+00, ..., 2.0272e-07,\n -4.9184e-06, 3.7464e-08],\n ...,\n [ 3.7641e-07, 3.5784e-09, 5.6052e-45, ..., 3.5863e-09,\n 2.7128e-08, 3.5433e-09],\n [ 2.3868e-07, 7.5190e-08, 0.0000e+00, ..., 2.8456e-06,\n 7.6243e-06, -1.0378e-07],\n [-4.4136e-07, 2.0859e-07, 5.1894e-12, ..., -6.9046e-07,\n -4.3875e-07, 1.5809e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.1974e-11, 1.0273e-10, 3.3333e-15, ..., 7.2439e-10, 2.1659e-10,\n 6.2048e-11],\n [1.9480e-10, 1.6404e-11, 3.0942e-17, ..., 3.5723e-11, 1.8729e-10,\n 1.4771e-10],\n [1.1885e-10, 3.8970e-11, 0.0000e+00, ..., 1.9061e-11, 2.5919e-10,\n 1.5255e-11],\n ...,\n [1.1889e-10, 1.8331e-11, 1.4347e-17, ..., 6.5394e-12, 3.5567e-11,\n 8.2972e-12],\n [7.3297e-11, 3.0906e-11, 0.0000e+00, ..., 2.4757e-10, 1.0390e-09,\n 8.8339e-12],\n [3.3567e-11, 9.7932e-11, 2.4873e-15, ..., 6.1352e-10, 8.3351e-11,\n 1.6817e-11]], device='cuda:0')" }, "12": { - "step": "tensor(1252.)", - "exp_avg": "tensor([ 7.3165e-07, -1.4954e-05, 1.8879e-05, ..., -5.3266e-05,\n -5.9604e-05, -4.2511e-06], device='cuda:0')", - "exp_avg_sq": "tensor([2.4369e-08, 1.8732e-08, 1.3634e-08, ..., 8.3645e-09, 1.9563e-08,\n 1.7500e-08], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 2.4118e-05, 4.5042e-05, -3.2372e-06, ..., -8.2233e-07,\n 4.6033e-05, -2.9744e-06], device='cuda:0')", + "exp_avg_sq": "tensor([1.1918e-08, 1.0005e-08, 9.7556e-09, ..., 6.0090e-09, 1.3704e-08,\n 1.0873e-08], device='cuda:0')" }, "13": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-3.2940e-07, -2.4490e-07, -1.0957e-06, ..., 1.3607e-07,\n -1.1163e-07, -5.4119e-07],\n [ 1.5030e-08, 4.0016e-07, 3.9324e-07, ..., 4.1304e-07,\n 6.4674e-07, 1.1241e-07],\n [-1.4720e-06, 2.2739e-06, -4.3752e-07, ..., 4.0518e-06,\n -1.0429e-08, -4.5754e-07],\n ...,\n [-5.5502e-07, 1.6260e-08, -9.2345e-07, ..., 7.5728e-07,\n 8.5722e-07, 5.5773e-09],\n [-1.7185e-06, 6.2725e-07, 8.6740e-07, ..., -6.7087e-07,\n -6.7863e-08, 8.3310e-07],\n [ 3.6407e-07, -2.9836e-07, -3.4236e-08, ..., -3.5914e-07,\n 5.4213e-07, -9.2315e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.2573e-11, 1.4459e-11, 4.4655e-11, ..., 1.2465e-11, 8.6706e-12,\n 1.8555e-11],\n [3.4603e-11, 2.2091e-11, 5.5515e-11, ..., 2.2348e-11, 1.1782e-11,\n 4.8839e-11],\n [3.4585e-11, 2.2773e-11, 5.4309e-11, ..., 3.2049e-11, 1.9993e-11,\n 2.8135e-11],\n ...,\n [4.5234e-11, 5.5801e-11, 4.9056e-11, ..., 1.5769e-11, 1.7477e-11,\n 4.8252e-11],\n [5.2270e-11, 4.6982e-11, 2.3132e-11, ..., 2.4825e-11, 1.5397e-11,\n 3.4846e-11],\n [3.6778e-11, 3.0157e-11, 1.4188e-11, ..., 1.4417e-11, 1.2249e-11,\n 7.6116e-11]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-8.3479e-07, 5.4764e-07, -1.0775e-06, ..., 1.1893e-07,\n 4.6882e-07, -8.5413e-07],\n [-3.7459e-08, -3.5614e-07, 7.4919e-07, ..., -8.6935e-07,\n 3.3207e-07, -7.6068e-07],\n [-5.8790e-07, 1.2587e-07, -1.2202e-06, ..., -8.2444e-07,\n -2.7528e-07, -3.6729e-07],\n ...,\n [-1.3363e-06, -1.0929e-06, -4.5763e-07, ..., 4.2793e-07,\n -7.8387e-07, 6.9590e-08],\n [-4.0714e-07, -1.3150e-06, -1.2504e-07, ..., -4.0317e-07,\n 4.5231e-07, -2.0854e-07],\n [ 1.0503e-06, -5.4846e-07, -1.1344e-07, ..., -1.7724e-07,\n -2.3885e-07, -6.0194e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5478e-11, 7.0544e-12, 3.1112e-11, ..., 5.3519e-12, 5.3167e-12,\n 8.8194e-12],\n [1.5035e-11, 9.8603e-12, 4.0443e-11, ..., 1.0026e-11, 6.5719e-12,\n 1.8348e-11],\n [1.5861e-11, 1.0703e-11, 4.2777e-11, ..., 2.0316e-11, 1.0770e-11,\n 1.3061e-11],\n ...,\n [2.0773e-11, 2.2542e-11, 2.2820e-11, ..., 7.9171e-12, 1.1264e-11,\n 1.9694e-11],\n [2.3086e-11, 2.1862e-11, 1.4965e-11, ..., 1.5300e-11, 8.7356e-12,\n 1.6244e-11],\n [1.7134e-11, 1.4242e-11, 9.0803e-12, ..., 8.5805e-12, 7.6396e-12,\n 2.8204e-11]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "shared", "betas": [ 0.9, @@ -242,7 +242,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_384", "betas": [ 0.9, @@ -265,7 +265,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_768", "betas": [ 0.9, @@ -288,7 +288,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_1024", "betas": [ 0.9, @@ -311,7 +311,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_1280", "betas": [ 0.9, @@ -334,7 +334,7 @@ ] }, { - "lr": 0.0032728879774401812, + "lr": 0.0025005, "name": "fusion", "betas": [ 0.9, @@ -390,7 +390,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 4, + "T_cur": 5, "base_lrs": [ 0.01, 0.01, @@ -399,27 +399,27 @@ 0.01, 0.005 ], - "last_epoch": 4, + "last_epoch": 5, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.0032728879774401812 + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.0025005 ] }, "metrics": { - "best_val_acc": 81.84, - "best_epoch": 3, + "best_val_acc": 82.08, + "best_epoch": 4, "scale_accuracies": { - "384": 81.84, - "768": 81.864, - "1024": 81.866, - "1280": 81.398 + "384": 82.08, + "768": 82.112, + "1024": 82.248, + "1280": 81.954 } }, "train_config": {