diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,196 +1,211 @@ { - "epoch": 1, + "epoch": 2, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-6.5487e-06, 2.5376e-05, -1.5614e-05, ..., 2.9876e-05,\n -3.7149e-05, 2.6294e-05],\n [ 1.2961e-05, -1.9146e-06, 3.7802e-05, ..., 3.0303e-05,\n 4.4067e-05, -7.5155e-06],\n [-3.5080e-05, -1.4147e-05, -2.7791e-06, ..., 2.4522e-05,\n 2.8205e-05, 1.9891e-05],\n ...,\n [ 3.4402e-06, -2.0476e-05, 4.0793e-05, ..., 2.4010e-05,\n -3.1729e-05, 7.9134e-06],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-4.1767e-05, -7.7922e-05, 5.1855e-05, ..., -2.1684e-05,\n -2.3477e-05, -6.0733e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.3577e-08, 1.1693e-08, 6.7396e-09, ..., 8.5626e-09, 7.6407e-09,\n 6.8233e-09],\n [1.0395e-08, 1.0609e-08, 6.7780e-09, ..., 6.2896e-09, 7.6135e-09,\n 5.3988e-09],\n [1.6860e-08, 1.5571e-08, 1.8368e-08, ..., 1.1968e-08, 9.9803e-09,\n 8.9247e-09],\n ...,\n [1.5494e-08, 1.7835e-08, 2.1564e-08, ..., 1.2079e-08, 1.0224e-08,\n 1.0152e-08],\n [1.3452e-11, 7.0176e-11, 2.1326e-11, ..., 1.2502e-13, 6.7292e-11,\n 3.9654e-13],\n [1.2289e-08, 1.3047e-08, 1.4017e-08, ..., 1.0277e-08, 7.2716e-09,\n 6.7571e-09]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 2.0200e-05, 6.9546e-06, -4.4373e-06, ..., -5.1469e-07,\n -2.5300e-05, 3.7132e-06],\n [ 2.5399e-05, 3.9343e-06, 2.0262e-05, ..., 8.2956e-06,\n -3.6518e-05, 1.0635e-05],\n [-4.1251e-05, -4.3051e-05, -2.7402e-05, ..., -4.3821e-05,\n 6.9783e-05, 1.2444e-04],\n ...,\n [-4.7492e-05, -2.5534e-05, 6.6693e-06, ..., -9.5359e-05,\n 4.9265e-05, 5.3679e-05],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 3.2520e-05, 3.7310e-05, 1.0988e-05, ..., 2.7937e-05,\n -2.8946e-05, -8.9554e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1972e-08, 9.8755e-09, 5.8050e-09, ..., 7.0010e-09, 6.9905e-09,\n 5.9818e-09],\n [7.3475e-09, 8.0042e-09, 4.8599e-09, ..., 4.4245e-09, 5.4986e-09,\n 4.2453e-09],\n [1.3247e-08, 1.2973e-08, 1.5058e-08, ..., 1.0874e-08, 8.5007e-09,\n 8.9039e-09],\n ...,\n [1.4224e-08, 1.5884e-08, 1.9922e-08, ..., 1.0331e-08, 9.2969e-09,\n 9.4526e-09],\n [3.8440e-12, 2.0053e-11, 6.0940e-12, ..., 3.5726e-14, 1.9229e-11,\n 1.1331e-13],\n [1.0925e-08, 1.1112e-08, 1.1635e-08, ..., 9.0384e-09, 6.4266e-09,\n 5.6635e-09]], device='cuda:0')" }, "1": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 7.0269e-04, -4.3829e-04, 6.4416e-04, ..., 4.0348e-04,\n 5.6052e-45, 1.4129e-03], device='cuda:0')", - "exp_avg_sq": "tensor([1.7100e-05, 1.4015e-05, 2.7751e-05, ..., 2.6989e-05, 4.5548e-07,\n 2.1548e-05], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([ 1.4248e-04, 9.8034e-05, 6.0917e-04, ..., -2.0007e-03,\n 5.6052e-45, 1.5087e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.4772e-05, 9.7933e-06, 2.1549e-05, ..., 2.3464e-05, 1.3016e-07,\n 1.6867e-05], device='cuda:0')" }, "2": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 8.5754e-07, -1.2882e-06, 1.7601e-05, ..., 7.4367e-05,\n 5.6052e-45, 2.1378e-05],\n [ 1.2624e-05, -2.4106e-05, -4.0388e-05, ..., 2.1485e-07,\n -5.6052e-45, -3.7897e-06],\n [-1.4413e-06, -1.7281e-05, 4.5049e-06, ..., 7.7915e-07,\n -5.6052e-45, -1.8508e-05],\n ...,\n [ 5.6052e-45, 3.0647e-20, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 2.4805e-06, -9.2363e-10, 7.3470e-07, ..., 1.4883e-05,\n -5.6052e-45, 4.3061e-07],\n [-3.4704e-06, -2.2494e-05, 8.1961e-05, ..., -1.8339e-06,\n 5.6052e-45, 3.5895e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.6828e-08, 6.3362e-10, 4.2841e-09, ..., 2.7095e-08, 1.0725e-11,\n 6.9180e-09],\n [7.5494e-09, 2.3728e-09, 4.8358e-09, ..., 1.8989e-09, 5.9508e-10,\n 8.8431e-09],\n [1.2735e-08, 3.1009e-09, 4.3913e-09, ..., 3.6356e-09, 4.1009e-09,\n 6.4680e-09],\n ...,\n [3.8527e-10, 1.6064e-11, 1.2190e-12, ..., 1.5355e-11, 4.1181e-10,\n 6.6941e-12],\n [1.5946e-09, 5.7076e-11, 4.2836e-10, ..., 4.9555e-10, 8.3073e-13,\n 2.7276e-10],\n [9.3836e-09, 8.1502e-09, 4.3388e-08, ..., 5.0469e-09, 7.3599e-11,\n 1.2771e-08]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-1.4940e-06, -2.7771e-06, 1.2737e-05, ..., 7.9829e-06,\n 5.6052e-45, 4.7992e-06],\n [ 9.7114e-06, -1.2580e-06, 9.7320e-06, ..., 2.3625e-06,\n -5.6052e-45, 2.4328e-06],\n [ 4.4615e-06, 2.7206e-05, 1.0038e-05, ..., 6.4543e-07,\n -5.6052e-45, -1.3107e-05],\n ...,\n [ 7.2919e-08, 2.5010e-08, 1.4688e-06, ..., -1.1694e-08,\n 5.6052e-45, -3.5726e-08],\n [ 2.0505e-06, 2.0283e-08, 4.6289e-06, ..., -2.5378e-06,\n -5.6052e-45, -3.5847e-08],\n [-1.0648e-05, 4.6832e-06, 1.7744e-05, ..., -1.0498e-06,\n 5.6052e-45, 5.2346e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.4221e-09, 3.1795e-10, 3.1735e-09, ..., 1.9677e-08, 3.0648e-12,\n 3.4460e-09],\n [5.7773e-09, 1.8901e-09, 2.2236e-09, ..., 1.0204e-09, 1.7005e-10,\n 6.5865e-09],\n [4.6247e-09, 1.8038e-09, 2.1060e-09, ..., 1.7610e-09, 1.1719e-09,\n 4.1075e-09],\n ...,\n [1.1772e-10, 4.6982e-12, 2.4893e-11, ..., 2.7241e-11, 1.1768e-10,\n 5.1512e-11],\n [1.8076e-09, 3.3614e-11, 2.9277e-10, ..., 7.3601e-10, 2.3739e-13,\n 7.5951e-10],\n [6.7744e-09, 3.6211e-09, 2.4603e-08, ..., 3.6917e-09, 2.1032e-11,\n 7.0957e-09]], device='cuda:0')" }, "3": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 4.8291e-04, -2.5564e-04, -4.3181e-04, 2.1158e-04, -3.0085e-04,\n -4.3630e-04, 1.2255e-04, -1.7331e-04, 4.0720e-05, 3.0700e-04,\n -2.8976e-04, -2.0572e-04, -1.2855e-04, 8.4173e-05, 5.6052e-45,\n 1.3033e-04, 1.0698e-04, 4.2868e-04, 1.3082e-04, 5.9594e-05,\n 2.2269e-10, -2.5721e-04, -2.1352e-04, 2.9803e-04, 5.8206e-04,\n -1.6162e-04, 6.6934e-04, 3.3385e-04, -1.7816e-05, 1.6846e-04,\n -5.9458e-04, -8.4374e-05, -3.4366e-04, 1.1044e-04, 1.5403e-04,\n 1.8146e-04, -2.5653e-05, -2.4346e-04, 2.1387e-04, -3.1512e-04,\n 5.2261e-04, -2.2226e-04, -2.6270e-04, 4.8686e-04, -6.0873e-05,\n -1.1042e-04, 1.3970e-11, -3.0145e-05, 1.4523e-04, 2.3919e-04,\n -1.4231e-04, -7.1367e-06, 1.6169e-04, -1.5712e-04, -6.3746e-05,\n 3.6516e-04, -8.7714e-05, -1.1106e-04, 6.9032e-04, 7.0514e-05,\n -1.6933e-04, 2.7264e-05, -1.8100e-04, -1.2855e-04, -5.1260e-05,\n 2.2919e-04, 2.9277e-04, -2.7643e-04, 2.0755e-04, -3.0680e-05,\n 2.3027e-04, 5.6052e-45, 2.1486e-04, -1.1510e-05, 2.0711e-04,\n 1.7116e-05, -3.8606e-04, 5.1995e-05, 3.4637e-04, 7.9762e-05,\n -4.8078e-05, 2.8860e-04, -4.6137e-05, 2.5023e-04, 1.2823e-04,\n 7.1506e-04, -3.5781e-04, -7.2730e-04, -1.3969e-04, -2.4508e-04,\n 3.1036e-04, -1.1096e-03, -2.1758e-04, 8.5803e-05, -7.0037e-05,\n -1.3117e-04, -2.5449e-04, -4.8244e-04, -2.2335e-05, -1.1209e-04,\n -3.0958e-04, 2.7037e-04, -2.6344e-04, 1.3473e-04, -7.5282e-05,\n -2.3703e-04, -4.4581e-05, -2.2416e-05, 6.9121e-05, -7.5774e-05,\n 5.6052e-45, -3.8188e-04, 1.5103e-03, 8.5651e-05, 1.4353e-04,\n -2.5883e-04, 4.8807e-05, -1.9233e-04, 1.5809e-04, 4.6912e-04,\n 1.1051e-04, 4.2287e-04, -7.7485e-05, 1.7526e-04, -3.9805e-04,\n 6.3991e-05, 6.9549e-05, 6.7361e-05, -3.3535e-04, -1.7842e-05,\n 5.6052e-45, -3.6844e-04, -3.5981e-04, 1.2043e-04, -5.2232e-05,\n 2.4118e-04, 3.0183e-04, 5.0486e-04, -7.6292e-05, 3.7732e-04,\n -1.8886e-04, -6.0142e-05, -4.5446e-34, 1.3062e-04, -1.0633e-04,\n -2.0652e-05, -2.9445e-06, 2.3875e-04, 4.1429e-04, 3.3621e-04,\n -5.2464e-05, -1.6028e-04, 3.2129e-10, 2.7243e-04, -6.8243e-05,\n 5.6052e-45, 1.6547e-04, -2.5309e-04, 9.4012e-05, -3.2854e-04,\n 1.2941e-04, -1.0363e-04, 1.4340e-04, -4.6644e-05, 2.2398e-04,\n -7.3773e-05, 1.0335e-04, 1.5786e-04, 1.8143e-04, -9.0634e-05,\n 6.4002e-06, -1.1434e-10, 5.6052e-45, -4.1033e-04, 3.0408e-04,\n -2.1363e-04, 8.8088e-05, -1.2317e-05, -1.3269e-04, 3.9362e-05,\n -1.1127e-04, 2.6498e-04, -3.5545e-04, 7.3270e-05, 1.0562e-04,\n 3.1790e-04, 5.5133e-05, -1.8820e-04, -3.5068e-04, 1.6522e-04,\n 3.3838e-04, 3.2116e-04, -5.0605e-04, 4.2653e-05, 7.6346e-04,\n 1.3238e-05, -6.1535e-04, -3.5559e-05, 5.6052e-45, -2.0554e-05,\n -2.1058e-04, 2.9927e-04, -2.0218e-05, 7.8256e-06, 1.1005e-04,\n 1.2626e-04, 7.3845e-16, 6.1334e-05, 1.1194e-04, -1.0006e-04,\n 3.0287e-04, -4.1067e-04, 5.3052e-09, -1.1673e-04, 1.0101e-04,\n -8.9416e-05, -4.5582e-04, -4.6005e-04, 2.8221e-04, -5.5363e-05,\n -1.9462e-05, 3.5281e-15, -4.2685e-05, -1.4782e-04, -1.8144e-05,\n -3.4890e-04, 2.7212e-04, 1.1848e-04, -1.8938e-04, -1.0332e-05,\n -1.8632e-04, -3.7595e-04, 1.5851e-04, 9.6373e-05, -3.2823e-04,\n -1.5103e-04, 7.7956e-05, 7.7259e-06, 1.1799e-04, 8.2638e-04,\n 8.7609e-05, -4.6048e-04, -4.3501e-04, 9.8771e-05, -7.7503e-05,\n 6.0935e-04, 5.2890e-05, 1.8882e-04, 2.8896e-04, 4.2721e-04,\n 5.1860e-06, 4.1138e-04, -4.0457e-04, 1.1343e-04, 5.2320e-04,\n 1.1119e-04, -6.6602e-05, -2.8657e-05, -7.4795e-04, 5.4243e-05,\n -2.4777e-04, -4.4339e-05, -1.8332e-05, 2.7390e-05, -1.3787e-05,\n 2.0493e-04, -3.4339e-04, -3.2810e-04, -2.4531e-04, -3.2918e-04,\n -1.7741e-04, -3.0680e-05, -4.9926e-04, 1.7674e-04, -2.6172e-06,\n -1.5041e-04, -2.3448e-05, 1.5938e-04, -5.2957e-05, 4.7453e-06,\n 1.2788e-04, -1.1425e-05, 5.9837e-05, 4.0198e-04, 2.6523e-04,\n 3.3642e-04, -5.6052e-45, -4.4810e-04, 1.3778e-05, -1.1877e-05,\n -3.8714e-04, -1.1944e-04, 1.7534e-04, 3.7224e-05, 8.0351e-13,\n -2.9278e-04, 4.2262e-04, -1.6328e-04, 9.9815e-05, 1.9199e-04,\n 2.0700e-04, -4.5008e-05, 1.8056e-05, 3.3261e-04, -3.6401e-04,\n 2.8145e-05, -8.1369e-05, 3.7911e-05, -8.6648e-05, 2.7017e-04,\n 7.8279e-05, 2.8385e-04, 3.6892e-05, 1.0750e-04, 3.1548e-04,\n -1.6263e-05, 4.6995e-05, -2.8492e-04, -9.8753e-10, -3.2162e-04,\n -9.3303e-05, 3.0079e-04, -1.6723e-04, -3.4915e-04, -1.3853e-05,\n -5.9657e-04, -3.9228e-04, 1.2713e-04, -1.8899e-04, -5.1591e-04,\n 2.0431e-04, 3.7862e-05, 8.7674e-05, -3.3424e-04, -6.5328e-05,\n 1.7870e-04, -6.1974e-04, -1.4127e-04, 5.6052e-45, 1.5290e-05,\n 1.7084e-04, -2.5020e-04, 1.2879e-04, 1.6466e-31, -2.0900e-04,\n 3.9337e-04, -1.0026e-04, -7.7029e-07, -4.2284e-05, 6.6850e-06,\n -1.4822e-04, 6.6300e-05, 1.5027e-04, 1.1608e-04, 2.1946e-04,\n 1.8761e-05, -2.9173e-05, -3.0694e-05, 1.1066e-04, -2.6597e-06,\n 4.0898e-04, -9.4613e-05, 4.3921e-04, 1.1763e-04, -5.5133e-05,\n 2.8915e-04, 5.6052e-45, 1.5755e-04, 2.1505e-04, 2.8558e-04,\n -2.3842e-04, 6.5126e-05, 1.6964e-04, 1.5295e-04, 1.8430e-04,\n 6.4572e-05, 6.7611e-05, 3.0185e-04, -1.9374e-16, -2.9602e-05,\n 5.6052e-45, 1.8956e-04, -1.3602e-04, 1.9873e-04, -9.2877e-05,\n -2.0909e-04, -4.7458e-04, -2.0321e-04, -2.9955e-04, -1.9161e-04,\n -2.0671e-04, -2.3793e-04, 3.0047e-05, 1.8534e-04, -1.6973e-05,\n -5.3114e-05, -2.3462e-05, 1.3416e-04, 1.5063e-05, 1.4101e-05,\n 1.7726e-04, 4.3427e-11, 7.0808e-05, 3.6130e-04, -4.9985e-04,\n 2.7836e-04, -8.5321e-05, -1.2437e-05, 2.2005e-04, -8.3558e-05,\n 2.4206e-04, -1.5784e-03, -4.7367e-04, 4.2100e-04, -4.2546e-04,\n -6.9676e-05, 1.0802e-04, -2.0906e-04, -2.8870e-04, -9.8914e-05,\n 1.1694e-35, -2.4278e-04, 2.4333e-04, 8.2304e-05, 5.5331e-04,\n 3.0735e-04, 2.4192e-04, -1.4057e-04, 3.1486e-05, 7.3920e-04,\n -3.0131e-04, 6.4959e-04, 3.0990e-04, -5.0570e-05, 1.5650e-04,\n -1.8757e-04, 4.9364e-04, 5.5478e-05, 9.3269e-17, 4.6102e-05,\n -1.0786e-04, -5.3178e-04, 4.4132e-06, 3.4059e-05, 7.3637e-05,\n -6.6301e-04, 5.6052e-45, 7.6759e-12, 2.2653e-11, -3.7650e-04,\n 2.2187e-05, 2.0610e-04, 3.5358e-04, -1.2537e-04, -9.5501e-05,\n -7.8345e-04, 1.5879e-05, 3.7188e-04, 5.1549e-30, 2.3533e-04,\n 2.9753e-05, 6.4350e-12, 1.3436e-04, -3.1901e-04, 3.6983e-04,\n 4.0607e-05, 2.0178e-04, -1.0115e-04, 6.8562e-05, 1.7706e-04,\n -2.3557e-04, 3.0862e-05, -3.8017e-04, 3.5232e-04, 4.6563e-05,\n 8.4701e-05, -1.1805e-04, -1.7671e-04, -5.0819e-04, 2.6310e-06,\n -5.5245e-04, 2.8356e-05, -8.2160e-05, -1.6378e-05, -7.7622e-05,\n -2.2861e-04, 6.2837e-06, -3.8526e-04, 3.5571e-04, -5.0082e-04,\n 2.0735e-04, 2.4880e-05, 1.8058e-04, -1.0186e-04, 7.3934e-05,\n 5.6052e-45, -1.8872e-04, -2.4286e-04, 9.6475e-05, 5.0200e-04,\n 2.2723e-04, -4.3321e-04, 3.1933e-04, -1.0678e-04, -1.0349e-04,\n 4.4266e-04, 6.0360e-05, 7.9237e-04, -1.0873e-04, 6.8859e-15,\n -1.4409e-05, -1.1024e-04], device='cuda:0')", - "exp_avg_sq": "tensor([3.9896e-06, 7.5239e-06, 7.9639e-06, 1.6433e-06, 6.3922e-06, 3.4436e-06,\n 9.8297e-07, 4.0627e-06, 5.4213e-06, 8.6105e-07, 7.7692e-06, 1.4785e-06,\n 3.0740e-06, 1.5375e-06, 4.7801e-17, 2.2869e-06, 1.1345e-05, 4.9883e-06,\n 9.6034e-07, 3.7881e-06, 1.3495e-07, 1.9154e-06, 7.4161e-06, 9.0188e-07,\n 1.6585e-06, 2.3075e-06, 4.0416e-06, 2.4903e-06, 5.0178e-07, 4.2812e-07,\n 6.2065e-06, 5.8321e-06, 4.8366e-06, 5.3284e-06, 8.4287e-07, 1.0477e-06,\n 1.8842e-06, 3.6888e-06, 8.5693e-06, 6.6826e-07, 3.5616e-06, 5.1926e-06,\n 5.7897e-06, 2.5477e-07, 7.0840e-06, 4.2692e-06, 2.0025e-05, 9.6372e-08,\n 3.0595e-06, 1.1710e-06, 2.1831e-06, 2.3525e-06, 2.1864e-06, 1.1829e-06,\n 1.9764e-06, 2.8575e-06, 2.2550e-06, 1.2867e-06, 1.6381e-06, 2.8839e-06,\n 2.1710e-06, 1.9398e-06, 2.1995e-06, 4.2982e-06, 2.2450e-06, 1.7098e-07,\n 4.2707e-06, 6.4152e-06, 1.4496e-06, 1.0326e-06, 2.6124e-06, 9.2378e-14,\n 2.0461e-06, 3.0031e-06, 2.5085e-07, 1.7877e-06, 2.0705e-06, 4.2785e-06,\n 2.9079e-06, 1.9977e-06, 1.0710e-06, 8.6923e-06, 1.2193e-06, 1.6034e-06,\n 8.5171e-07, 4.4000e-07, 1.7231e-06, 5.9634e-06, 6.5855e-07, 3.4327e-06,\n 2.1731e-06, 5.8914e-06, 6.3589e-06, 1.0862e-05, 4.8623e-07, 3.9726e-06,\n 3.2038e-06, 5.0853e-06, 7.1915e-06, 2.9196e-06, 2.1784e-06, 2.7809e-06,\n 1.9895e-06, 1.8209e-05, 1.2181e-06, 4.6158e-06, 2.6063e-06, 2.3693e-06,\n 3.0493e-06, 5.5274e-07, 1.4051e-10, 1.5788e-06, 4.7022e-07, 5.7959e-06,\n 4.3986e-06, 2.9514e-06, 1.4703e-06, 4.8755e-06, 1.3748e-06, 1.6845e-06,\n 2.5174e-06, 2.1577e-06, 3.1676e-06, 3.2486e-06, 2.7285e-06, 2.6541e-06,\n 1.4464e-06, 5.7576e-06, 1.2252e-06, 2.2612e-06, 1.8943e-09, 1.2079e-06,\n 1.6734e-06, 1.1217e-06, 2.0025e-06, 1.1009e-06, 5.5733e-06, 2.2683e-06,\n 4.0396e-06, 2.9978e-06, 4.9764e-06, 1.9954e-06, 4.5119e-07, 5.0830e-06,\n 5.0591e-06, 3.1787e-06, 6.1178e-06, 1.8077e-06, 1.0388e-06, 6.5480e-06,\n 4.1302e-06, 2.0099e-06, 4.2343e-06, 8.2210e-06, 1.7033e-06, 4.4688e-08,\n 1.0941e-06, 2.9944e-06, 1.8909e-06, 9.8260e-07, 6.9394e-07, 2.3830e-06,\n 1.2400e-06, 6.7745e-06, 2.3055e-06, 7.6112e-07, 2.6990e-06, 3.3100e-06,\n 2.7546e-06, 9.8988e-07, 1.4851e-06, 3.7457e-06, 4.4796e-07, 7.9835e-06,\n 2.0622e-06, 1.1313e-06, 3.3808e-06, 2.2618e-06, 8.8259e-07, 3.0024e-07,\n 1.4767e-06, 6.9091e-07, 3.9290e-06, 2.0474e-06, 2.0032e-07, 2.0592e-05,\n 8.6954e-07, 1.7096e-06, 5.7384e-06, 2.6765e-06, 9.4508e-06, 2.0357e-06,\n 4.3685e-06, 4.8244e-06, 6.5279e-06, 5.3856e-06, 3.4892e-06, 5.1663e-07,\n 7.1123e-15, 5.1385e-06, 2.3259e-06, 1.1826e-06, 1.2718e-06, 2.2320e-06,\n 4.3948e-06, 1.3297e-06, 8.7965e-06, 2.1525e-06, 2.2698e-07, 1.0078e-07,\n 3.9990e-06, 1.3277e-05, 1.4527e-05, 3.7113e-06, 4.7110e-06, 4.0241e-06,\n 2.2627e-06, 4.5358e-07, 3.5602e-06, 6.1550e-06, 1.1840e-06, 4.2067e-06,\n 2.4423e-06, 3.6417e-06, 4.0634e-06, 4.0652e-06, 3.5629e-06, 2.4816e-05,\n 3.1304e-06, 3.7137e-08, 1.7859e-06, 4.0646e-06, 3.7811e-06, 3.1756e-06,\n 2.2575e-06, 3.9166e-07, 3.2261e-06, 3.4727e-06, 2.9968e-06, 1.8894e-05,\n 1.7357e-06, 2.1319e-06, 7.1791e-07, 1.8786e-06, 7.5709e-07, 1.5452e-06,\n 1.1327e-06, 3.0156e-06, 3.3402e-06, 5.9001e-06, 3.2954e-06, 2.8084e-06,\n 2.3975e-06, 4.7194e-06, 1.8270e-06, 1.9746e-06, 1.3654e-06, 2.1741e-06,\n 2.6169e-06, 3.3020e-07, 1.5069e-06, 1.0339e-06, 2.6893e-06, 1.5916e-06,\n 8.1606e-09, 6.9078e-07, 8.0266e-06, 1.9265e-06, 4.6486e-06, 1.6766e-06,\n 2.0741e-06, 2.4987e-06, 8.2676e-07, 3.6847e-06, 3.2538e-07, 2.0592e-06,\n 5.5140e-06, 7.7698e-06, 1.4958e-06, 1.4576e-06, 1.5332e-06, 7.5373e-06,\n 3.0060e-06, 1.0311e-06, 4.8499e-06, 3.8247e-06, 6.1329e-07, 2.0824e-06,\n 1.2495e-06, 2.5490e-06, 7.1466e-06, 5.6165e-06, 8.0272e-06, 8.8825e-08,\n 2.7826e-06, 2.2634e-06, 3.2044e-06, 2.1830e-06, 2.1512e-06, 1.4450e-06,\n 2.0576e-07, 1.7389e-06, 3.0312e-06, 4.8065e-06, 6.5037e-06, 9.5130e-07,\n 1.6794e-06, 4.7446e-06, 1.8124e-06, 2.6443e-06, 1.9910e-06, 6.7251e-07,\n 3.6012e-06, 1.7359e-06, 5.1801e-06, 2.2244e-06, 5.4730e-07, 5.6501e-06,\n 6.7547e-06, 1.9276e-06, 5.7703e-06, 2.1108e-06, 7.1595e-06, 2.4202e-06,\n 2.5887e-07, 2.5562e-06, 3.8686e-06, 7.5490e-07, 3.7486e-06, 3.9485e-06,\n 4.2257e-06, 3.5658e-06, 4.6477e-06, 6.8250e-07, 2.0143e-06, 4.0064e-06,\n 2.8325e-06, 5.9433e-07, 1.8962e-16, 1.6756e-06, 5.1432e-06, 1.6700e-06,\n 8.4100e-06, 1.2270e-09, 4.8930e-06, 3.0121e-05, 1.3328e-06, 2.2728e-06,\n 9.7845e-07, 3.5324e-06, 1.5843e-06, 1.3657e-05, 1.8971e-06, 2.1689e-06,\n 6.3947e-06, 9.3440e-07, 8.2979e-07, 1.7361e-06, 2.4282e-06, 1.7145e-06,\n 2.9677e-06, 4.3123e-07, 4.3169e-06, 6.0273e-06, 2.3104e-06, 6.8735e-06,\n 6.3245e-10, 2.0070e-06, 5.3693e-07, 3.4809e-06, 2.3608e-06, 2.8884e-06,\n 1.8522e-06, 6.4250e-06, 1.9642e-08, 1.2474e-06, 2.0066e-06, 2.6481e-06,\n 1.5626e-06, 1.0489e-06, 2.3322e-07, 3.2182e-06, 4.1447e-06, 3.2395e-06,\n 1.3825e-06, 5.3317e-06, 2.5010e-06, 2.6091e-07, 3.6701e-06, 6.1508e-06,\n 3.2606e-06, 4.3809e-06, 8.5496e-06, 2.9267e-06, 6.0393e-06, 7.4129e-07,\n 7.4056e-07, 4.4162e-06, 1.1795e-08, 5.4210e-06, 1.5646e-06, 7.1567e-06,\n 4.3184e-06, 2.9652e-06, 6.1824e-06, 2.1889e-06, 2.8854e-06, 6.6331e-06,\n 2.2245e-06, 2.3057e-06, 6.5047e-06, 2.3783e-06, 1.0951e-05, 5.0573e-06,\n 3.1538e-06, 1.3562e-06, 9.1222e-06, 2.8989e-07, 1.5558e-06, 8.6860e-07,\n 1.0014e-05, 2.3144e-06, 1.4919e-06, 6.0428e-06, 5.6467e-06, 2.9347e-06,\n 8.0428e-07, 4.6789e-06, 7.7919e-07, 8.5051e-06, 6.3417e-06, 4.8177e-06,\n 1.0312e-06, 1.5377e-06, 2.4740e-06, 5.7683e-07, 1.9854e-06, 4.5343e-07,\n 7.3856e-06, 7.3306e-06, 3.0349e-06, 2.0796e-06, 5.3854e-07, 3.4951e-06,\n 5.0104e-06, 3.2180e-06, 9.0164e-12, 4.1122e-07, 9.9064e-07, 4.5467e-06,\n 2.5611e-06, 8.9574e-07, 1.6890e-06, 2.6067e-06, 4.6340e-06, 2.2322e-06,\n 1.0704e-06, 1.1332e-06, 1.5119e-07, 5.3338e-06, 2.2163e-06, 1.4951e-06,\n 4.2938e-07, 5.1469e-06, 4.9264e-06, 1.0663e-06, 4.5091e-06, 4.8379e-06,\n 2.8278e-07, 8.3315e-06, 7.9743e-06, 5.0650e-06, 5.4380e-06, 3.4426e-06,\n 9.5527e-07, 1.5880e-06, 2.3695e-06, 2.5021e-06, 3.7635e-07, 1.8286e-07,\n 3.7947e-06, 1.5111e-06, 1.4507e-06, 1.8654e-06, 2.4062e-06, 3.8474e-06,\n 3.5159e-07, 2.7651e-06, 2.7836e-06, 6.1307e-06, 8.4544e-06, 3.4597e-06,\n 1.5840e-06, 4.7137e-06, 1.1930e-06, 2.4725e-16, 1.7560e-06, 6.4081e-06,\n 2.0739e-06, 4.1012e-06, 1.6733e-06, 4.2465e-06, 1.4267e-06, 4.0660e-06,\n 1.0331e-06, 2.6052e-05, 9.6603e-08, 4.8336e-06, 2.8538e-06, 4.3294e-07,\n 4.3020e-07, 3.2071e-06], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-4.2354e-06, 1.3448e-04, -5.9424e-05, -1.4676e-04, 1.3375e-04,\n 1.3534e-04, 6.8031e-05, 1.0383e-04, 1.7101e-04, 2.1226e-04,\n -4.1089e-04, -3.2337e-05, 5.8954e-05, -1.7273e-04, -4.7637e-05,\n -2.6821e-04, 2.7133e-04, -4.4195e-05, -9.8756e-05, 4.2214e-05,\n 3.4375e-05, 5.0467e-04, 1.4436e-04, 2.3770e-04, 3.0763e-04,\n -5.4243e-05, -2.0677e-04, 2.8820e-05, 3.5699e-05, 3.0537e-05,\n -1.9370e-04, 1.9151e-05, -5.5251e-05, 4.9451e-04, 1.4622e-04,\n -3.1571e-05, 2.0731e-04, 1.9890e-04, -2.3882e-04, 2.4290e-05,\n 7.6714e-05, -9.2404e-05, -2.4822e-04, -2.1697e-04, 2.6723e-04,\n 2.6225e-04, 5.6052e-45, -6.5067e-05, -4.7104e-04, -2.1265e-04,\n -9.6759e-05, 8.4967e-05, -3.7053e-04, -1.7717e-05, -1.3804e-04,\n -1.1851e-04, -1.8507e-04, 2.0417e-04, 1.2220e-04, 1.6588e-04,\n 2.6629e-05, 2.2281e-05, -1.7709e-04, -2.5598e-04, 3.7906e-04,\n -7.5635e-07, 3.4796e-07, 4.5234e-04, -5.2854e-05, -1.4958e-04,\n 9.2461e-05, 5.6052e-45, -2.3730e-04, 1.2000e-05, -1.3854e-04,\n -1.3954e-04, -7.3927e-05, -5.1092e-04, 4.6618e-04, -3.4842e-05,\n 1.7187e-05, -6.0548e-05, -7.2412e-05, 1.3329e-04, 4.0596e-05,\n -8.9056e-05, 1.6277e-04, -4.0762e-04, 1.0798e-04, -1.0760e-04,\n 1.9640e-04, -1.5285e-04, 1.0596e-04, 5.2607e-04, 1.5921e-04,\n -8.7192e-05, 9.4661e-05, 2.7345e-05, -2.6171e-05, 1.2558e-04,\n 1.7883e-04, -1.5030e-04, 3.3634e-04, 1.8439e-04, 2.8033e-04,\n 3.7320e-05, -1.7267e-04, -5.1119e-05, 2.8148e-05, 1.4167e-04,\n 2.9538e-05, -1.7093e-04, -2.5089e-06, -1.8960e-04, -2.3239e-04,\n -4.3403e-05, -1.2090e-04, -1.8278e-04, 8.1240e-05, 1.1309e-04,\n -1.6033e-04, -3.9163e-04, 5.4308e-05, -1.3685e-04, 1.6986e-04,\n 6.9067e-05, 2.7893e-04, 2.2753e-05, 1.6989e-04, 5.6084e-05,\n 3.0745e-05, 1.2574e-06, -4.1006e-05, -2.5382e-05, 2.1100e-04,\n 1.9422e-05, -1.7200e-04, 2.2103e-04, 1.3905e-04, -1.7439e-04,\n -2.5989e-04, 2.3274e-04, -1.0612e-09, 1.5557e-04, -4.9147e-04,\n -9.0219e-05, 7.5693e-04, 2.2738e-04, 1.1517e-04, 2.2906e-04,\n -2.9798e-04, 4.5840e-05, 1.6273e-04, -2.0640e-04, -7.4069e-05,\n 1.5970e-23, -1.0709e-04, -2.5168e-04, -6.8605e-06, 3.2401e-04,\n 2.7275e-06, 1.3247e-04, -1.2913e-04, 3.8019e-05, -5.8873e-05,\n 1.4864e-04, 3.2900e-04, 1.2371e-04, 2.2854e-04, -7.1115e-05,\n -3.4770e-04, 5.5523e-05, 2.4515e-09, 1.7212e-05, -1.9593e-04,\n -4.5119e-06, -1.4417e-04, 6.8145e-05, -2.0669e-05, -2.0610e-04,\n -1.3443e-04, 3.6675e-05, 4.6967e-05, 1.1396e-04, 2.2813e-05,\n -3.4212e-04, -1.4196e-05, 9.6005e-05, -3.1541e-04, -4.0779e-04,\n -6.9823e-05, -1.1949e-04, -2.2425e-04, -1.9160e-06, -6.0634e-04,\n -1.3737e-04, 2.2862e-04, -6.6388e-05, 5.6052e-45, 6.9460e-04,\n -6.5316e-04, 3.5569e-04, 1.6868e-04, -6.7405e-05, -8.2623e-05,\n -6.3719e-05, 6.1423e-06, 1.3086e-04, -1.4459e-04, 2.9688e-05,\n 1.4315e-04, -2.6242e-04, 1.2983e-10, -1.9497e-04, 2.7965e-04,\n -2.6015e-05, 1.5390e-04, -1.1775e-04, 1.8142e-05, 2.9505e-05,\n -2.0350e-04, -3.1178e-05, -2.2314e-04, 3.1495e-05, 8.6018e-05,\n 5.6545e-05, 3.0073e-04, -1.9920e-04, -6.2118e-05, 7.0131e-06,\n -6.1069e-05, 1.6774e-04, -1.6492e-04, -2.2286e-04, -4.9063e-04,\n 7.6867e-05, -1.6470e-04, -1.8087e-04, -6.1079e-05, 1.5219e-04,\n -2.7600e-04, -2.3856e-04, 9.7170e-05, 1.2598e-04, -2.8327e-05,\n -3.1162e-04, 2.1665e-04, 1.5048e-04, 1.1819e-05, 3.0029e-04,\n -2.3063e-04, -1.1239e-04, -9.0509e-05, -7.0209e-05, -1.5892e-04,\n 2.7323e-04, -1.3264e-04, -1.1719e-06, -1.4420e-04, -3.6471e-05,\n -2.3004e-04, 2.1942e-04, 1.3559e-04, -7.7497e-05, -1.1237e-04,\n -2.7543e-04, -7.3248e-05, -1.5809e-04, 1.0603e-04, 4.7414e-05,\n -5.1807e-04, -3.3485e-05, -1.7661e-04, 2.2631e-04, -1.4340e-05,\n 3.2403e-05, -3.6270e-07, 1.9481e-05, 4.8682e-05, 1.5153e-04,\n -4.9188e-05, -8.3502e-05, 3.9726e-04, 2.8676e-05, 1.5178e-05,\n 6.8513e-05, 5.6052e-45, -1.2744e-04, -5.5835e-04, 1.9644e-05,\n -2.1151e-04, 4.1278e-04, -7.6977e-05, -1.4815e-04, 2.0830e-09,\n 1.8047e-04, -5.0000e-04, 1.1269e-04, -2.3457e-04, 5.5554e-04,\n 1.5227e-04, -1.9988e-04, 6.9627e-05, 1.1868e-04, -2.0665e-05,\n 1.5917e-04, 2.2842e-05, 5.9253e-05, 1.5904e-05, -1.4405e-05,\n 1.8588e-04, 7.5580e-05, -1.0213e-04, 2.3788e-04, -1.9104e-04,\n -5.6973e-05, 1.7629e-05, -1.5695e-04, 8.7595e-09, 4.1259e-04,\n 5.4443e-05, -2.1727e-04, 6.0220e-04, -5.7139e-04, -5.9646e-05,\n 4.2906e-06, -1.3247e-05, 1.6984e-04, -1.7933e-04, -3.3608e-04,\n -3.6630e-04, 2.6951e-04, 7.3664e-05, 9.8586e-05, 1.6354e-04,\n 3.9515e-04, 2.3590e-04, 8.5189e-05, 1.0193e-05, -6.0491e-05,\n 2.9599e-04, 1.3484e-04, -2.5004e-04, 5.6052e-45, 1.5051e-04,\n 8.1851e-05, 3.7550e-05, 1.2592e-04, -2.9566e-04, -1.0266e-04,\n 5.8379e-05, -1.0645e-04, -7.2468e-05, -2.0963e-04, -6.5886e-05,\n 6.5956e-05, 5.9764e-05, -2.0087e-04, -3.2889e-04, 8.5220e-07,\n -1.1671e-05, 1.4584e-05, -5.4969e-04, 1.7199e-04, -2.7161e-05,\n -8.2776e-04, 5.6052e-45, 3.7136e-04, 1.7297e-04, 2.1373e-04,\n -1.3089e-04, 8.1643e-05, -9.2219e-05, -1.8388e-04, 4.3712e-05,\n 3.7698e-05, -2.5810e-05, -1.9006e-04, 2.3750e-09, -4.9211e-05,\n 5.6052e-45, -1.8265e-04, -3.5422e-04, -1.0468e-06, 4.4950e-04,\n -3.1649e-04, 2.0444e-04, -3.6391e-04, 1.2175e-05, -5.0211e-04,\n 5.2784e-05, -5.2293e-04, -4.2740e-04, 1.6705e-04, 1.0219e-04,\n 4.7815e-05, -6.6331e-05, -4.5611e-04, 1.5246e-04, 1.5739e-04,\n 4.7733e-04, 3.9272e-07, 2.3830e-04, 5.9319e-05, 4.5270e-04,\n -3.5245e-04, 9.5081e-05, -1.6726e-04, 4.2264e-05, -8.9374e-05,\n 2.0626e-07, -1.4427e-04, 4.0503e-05, 2.0860e-04, 2.8218e-05,\n -5.7122e-08, 1.0053e-04, 4.0819e-04, 1.0712e-04, 2.7747e-04,\n -9.0054e-19, -7.9078e-05, 4.0387e-04, -3.4760e-04, -5.1501e-04,\n -5.9287e-05, -1.2007e-04, 6.5789e-05, 6.6710e-05, -1.1485e-04,\n -5.7748e-05, 1.8013e-04, -2.5347e-04, 2.4699e-04, -1.3127e-04,\n 9.5733e-05, -1.0475e-04, 1.7956e-04, 4.7747e-19, -1.2689e-05,\n 3.3522e-04, -2.1153e-05, -9.8155e-05, -2.8657e-05, -4.5075e-05,\n 3.6810e-05, 5.6052e-45, 4.5932e-05, 6.3551e-06, 6.4844e-04,\n -5.9835e-05, -1.0900e-04, -5.8369e-05, 4.1225e-07, 4.4265e-04,\n 3.4683e-04, -1.0606e-04, -2.2460e-04, 5.6052e-45, 3.7994e-05,\n -1.0121e-04, -5.6745e-05, 1.3552e-04, -7.0956e-04, -8.3731e-05,\n 2.2150e-04, -1.1114e-04, -2.6300e-04, -6.0898e-05, -1.6076e-04,\n 1.6677e-04, 2.7173e-05, -1.8271e-04, 2.8918e-04, 4.9967e-05,\n -1.5533e-04, 2.3383e-04, 4.0336e-04, 2.6907e-04, 2.1664e-04,\n -4.6610e-04, -2.6643e-04, -3.0407e-04, -2.3704e-04, 5.1528e-04,\n 2.7558e-04, -7.5780e-05, -1.0639e-05, -2.2184e-04, 1.5145e-04,\n 2.4171e-04, 2.1529e-04, -3.1804e-04, 1.0446e-04, 3.8818e-04,\n 5.6052e-45, -7.7565e-05, -4.7026e-04, 1.4259e-05, -2.6511e-04,\n -2.2909e-05, -1.8242e-05, -9.3722e-05, -2.9263e-04, -1.0762e-04,\n -1.0074e-04, 3.0688e-04, 1.4594e-05, 1.2267e-04, 2.6901e-06,\n -2.7051e-04, 1.3666e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.4554e-06, 2.5213e-06, 2.6140e-06, 9.6287e-07, 1.9939e-06, 1.3888e-06,\n 4.8780e-07, 1.6520e-06, 1.8723e-06, 5.3239e-07, 2.9290e-06, 8.4858e-07,\n 1.0530e-06, 8.5005e-07, 4.3285e-08, 1.1720e-06, 3.5344e-06, 2.0516e-06,\n 5.4965e-07, 1.3278e-06, 4.0704e-08, 9.5410e-07, 2.9679e-06, 5.7598e-07,\n 9.1441e-07, 1.0929e-06, 1.7075e-06, 1.2874e-06, 3.3173e-07, 3.0796e-07,\n 2.1996e-06, 2.2915e-06, 2.0893e-06, 2.2674e-06, 4.3131e-07, 6.3833e-07,\n 8.8796e-07, 1.6836e-06, 3.0731e-06, 5.4268e-07, 1.6485e-06, 2.1920e-06,\n 2.1725e-06, 2.9815e-07, 2.5128e-06, 1.8611e-06, 5.7224e-06, 2.4344e-07,\n 1.5748e-06, 6.8503e-07, 9.1802e-07, 7.6462e-07, 1.2126e-06, 7.1692e-07,\n 8.9730e-07, 1.3914e-06, 1.2044e-06, 7.3877e-07, 1.0242e-06, 1.4200e-06,\n 1.0163e-06, 1.1552e-06, 1.0452e-06, 1.7410e-06, 9.8761e-07, 2.3690e-07,\n 1.5812e-06, 2.1680e-06, 6.9427e-07, 4.9753e-07, 1.1573e-06, 2.6398e-14,\n 1.0114e-06, 1.2315e-06, 2.3449e-07, 9.5379e-07, 1.1662e-06, 1.8036e-06,\n 1.4246e-06, 1.0727e-06, 5.1481e-07, 2.7327e-06, 5.9584e-07, 9.1145e-07,\n 6.1327e-07, 3.1167e-07, 9.5529e-07, 2.5192e-06, 4.4056e-07, 1.5582e-06,\n 9.7772e-07, 2.3035e-06, 2.0995e-06, 3.5747e-06, 3.0550e-07, 1.7864e-06,\n 1.5714e-06, 2.1560e-06, 2.8325e-06, 1.1355e-06, 1.1979e-06, 1.2010e-06,\n 1.0322e-06, 5.5199e-06, 6.6592e-07, 1.6128e-06, 1.3012e-06, 1.0009e-06,\n 1.1363e-06, 3.9673e-07, 1.4860e-08, 1.0012e-06, 3.4161e-07, 2.1508e-06,\n 1.7157e-06, 1.3608e-06, 7.5625e-07, 2.0877e-06, 6.6858e-07, 7.8183e-07,\n 1.2075e-06, 1.0971e-06, 1.5552e-06, 1.4649e-06, 1.3107e-06, 9.4821e-07,\n 9.6256e-07, 1.9361e-06, 6.6079e-07, 8.0378e-07, 1.2021e-07, 5.6519e-07,\n 8.2638e-07, 5.1987e-07, 8.5929e-07, 7.2588e-07, 2.2949e-06, 1.1535e-06,\n 1.5613e-06, 1.5151e-06, 2.0835e-06, 1.0284e-06, 1.2893e-07, 2.3955e-06,\n 1.8081e-06, 1.3427e-06, 2.3867e-06, 8.7616e-07, 6.3760e-07, 2.3951e-06,\n 1.5932e-06, 1.0788e-06, 1.2585e-06, 3.2115e-06, 8.0055e-07, 1.2770e-08,\n 7.5840e-07, 1.2285e-06, 7.5801e-07, 5.8737e-07, 3.3137e-07, 9.6173e-07,\n 7.2072e-07, 2.1865e-06, 1.1399e-06, 4.8044e-07, 1.3596e-06, 1.4820e-06,\n 1.3579e-06, 6.1702e-07, 9.6522e-07, 1.1021e-06, 1.2801e-07, 2.7462e-06,\n 9.7674e-07, 6.1156e-07, 1.4827e-06, 1.0420e-06, 5.3013e-07, 3.0434e-07,\n 7.0086e-07, 4.2572e-07, 1.3754e-06, 7.1364e-07, 2.0647e-07, 6.3612e-06,\n 5.8871e-07, 9.5986e-07, 2.1640e-06, 1.3111e-06, 3.2259e-06, 1.0973e-06,\n 1.8200e-06, 1.8279e-06, 2.7027e-06, 1.9997e-06, 1.6459e-06, 3.6693e-07,\n 2.0324e-15, 2.1965e-06, 1.2540e-06, 6.5980e-07, 7.2036e-07, 1.2319e-06,\n 1.5178e-06, 7.4510e-07, 2.5187e-06, 1.3337e-06, 1.8312e-07, 1.6070e-07,\n 1.7423e-06, 4.1123e-06, 4.1512e-06, 1.4561e-06, 1.5808e-06, 1.5276e-06,\n 1.0949e-06, 3.2871e-07, 1.5649e-06, 2.2665e-06, 6.3539e-07, 1.2285e-06,\n 1.0122e-06, 1.4994e-06, 1.7693e-06, 1.8090e-06, 1.7563e-06, 7.3680e-06,\n 1.4275e-06, 1.2975e-07, 8.3446e-07, 1.6383e-06, 1.6524e-06, 1.2932e-06,\n 1.2199e-06, 3.7305e-07, 1.4101e-06, 1.3046e-06, 1.4285e-06, 5.9457e-06,\n 7.8896e-07, 1.1602e-06, 4.7797e-07, 8.8140e-07, 4.0100e-07, 6.3900e-07,\n 6.6551e-07, 1.1825e-06, 1.2251e-06, 2.1468e-06, 1.2484e-06, 1.3097e-06,\n 1.1517e-06, 2.0049e-06, 1.1220e-06, 1.1125e-06, 7.3190e-07, 9.0837e-07,\n 1.2688e-06, 3.3259e-07, 9.7679e-07, 7.1192e-07, 1.2320e-06, 9.0563e-07,\n 6.6419e-08, 4.5639e-07, 2.7854e-06, 1.0197e-06, 2.1106e-06, 9.6874e-07,\n 1.0663e-06, 1.2440e-06, 5.3861e-07, 1.9891e-06, 2.4900e-07, 8.7615e-07,\n 1.7587e-06, 2.8108e-06, 8.2325e-07, 5.8260e-07, 8.9142e-07, 2.4374e-06,\n 1.2917e-06, 6.7416e-07, 1.8113e-06, 1.6508e-06, 1.7525e-07, 1.1282e-06,\n 6.6859e-07, 1.2907e-06, 2.6217e-06, 2.0227e-06, 3.0455e-06, 2.3084e-07,\n 7.9516e-07, 1.2609e-06, 1.2418e-06, 1.0237e-06, 1.1812e-06, 8.1825e-07,\n 2.4435e-07, 7.8511e-07, 1.4067e-06, 2.1176e-06, 2.1928e-06, 5.7255e-07,\n 8.4722e-07, 1.8728e-06, 7.5107e-07, 1.2659e-06, 9.9225e-07, 4.0839e-07,\n 1.1764e-06, 8.7107e-07, 2.1875e-06, 1.1139e-06, 3.5296e-07, 2.2436e-06,\n 1.9302e-06, 9.7751e-07, 1.9048e-06, 9.7792e-07, 2.8772e-06, 1.0048e-06,\n 1.8059e-07, 1.2470e-06, 1.6680e-06, 4.4279e-07, 1.6855e-06, 1.4359e-06,\n 1.7988e-06, 1.6785e-06, 1.8209e-06, 4.6506e-07, 1.0879e-06, 1.6263e-06,\n 1.3544e-06, 4.4004e-07, 9.9119e-09, 9.3768e-07, 1.9232e-06, 8.8857e-07,\n 2.6297e-06, 3.5073e-10, 1.6386e-06, 8.9089e-06, 5.8988e-07, 6.9263e-07,\n 5.9933e-07, 1.6123e-06, 8.1002e-07, 4.1808e-06, 9.0652e-07, 1.0890e-06,\n 2.5160e-06, 5.4631e-07, 3.8423e-07, 9.5356e-07, 1.1906e-06, 8.5488e-07,\n 1.3432e-06, 2.7385e-07, 1.6533e-06, 2.3954e-06, 1.1258e-06, 2.3577e-06,\n 1.8073e-10, 1.1278e-06, 4.3261e-07, 1.4697e-06, 1.0457e-06, 1.3122e-06,\n 1.1417e-06, 2.0615e-06, 8.7217e-08, 7.2067e-07, 8.4989e-07, 1.0815e-06,\n 4.4653e-07, 5.5057e-07, 6.6644e-08, 1.2776e-06, 1.8736e-06, 1.5206e-06,\n 8.9888e-07, 2.1476e-06, 1.2283e-06, 2.5265e-07, 1.3937e-06, 2.4745e-06,\n 1.3811e-06, 1.6669e-06, 2.8776e-06, 1.4860e-06, 1.8525e-06, 4.4091e-07,\n 5.0093e-07, 1.6629e-06, 1.4297e-07, 2.1538e-06, 9.5211e-07, 2.0451e-06,\n 1.9846e-06, 1.2225e-06, 2.5995e-06, 1.1852e-06, 1.3022e-06, 2.6559e-06,\n 1.0329e-06, 1.1867e-06, 2.4668e-06, 9.7227e-07, 3.4997e-06, 2.0995e-06,\n 1.2795e-06, 6.9701e-07, 3.2912e-06, 3.7561e-07, 7.8532e-07, 5.2881e-07,\n 2.8615e-06, 1.0604e-06, 7.8574e-07, 1.8737e-06, 2.4743e-06, 1.1524e-06,\n 4.2787e-07, 1.7682e-06, 4.5772e-07, 3.2040e-06, 2.4344e-06, 2.0015e-06,\n 6.0110e-07, 8.2637e-07, 9.0659e-07, 3.8323e-07, 1.0880e-06, 2.8759e-07,\n 2.1105e-06, 2.2732e-06, 1.4480e-06, 1.0381e-06, 3.5559e-07, 1.2118e-06,\n 2.2585e-06, 1.3119e-06, 2.5765e-12, 2.5062e-07, 2.9086e-07, 2.0739e-06,\n 1.2995e-06, 4.3895e-07, 8.7124e-07, 1.3497e-06, 1.9873e-06, 1.1674e-06,\n 4.3733e-07, 6.6190e-07, 4.3203e-08, 1.9006e-06, 1.1874e-06, 4.7626e-07,\n 4.0287e-07, 2.0183e-06, 2.1260e-06, 5.2739e-07, 1.5424e-06, 1.9125e-06,\n 3.0119e-07, 2.7369e-06, 3.1310e-06, 1.6176e-06, 2.0674e-06, 1.4429e-06,\n 6.8457e-07, 7.7333e-07, 1.0593e-06, 1.2266e-06, 3.4016e-07, 3.3616e-07,\n 1.8557e-06, 8.3727e-07, 6.9007e-07, 9.9125e-07, 1.2053e-06, 1.6995e-06,\n 2.6207e-07, 1.3920e-06, 1.3232e-06, 2.4398e-06, 3.2136e-06, 1.4705e-06,\n 7.8275e-07, 2.0293e-06, 7.2521e-07, 7.0652e-17, 8.8540e-07, 2.6564e-06,\n 8.4161e-07, 1.7714e-06, 8.7464e-07, 1.7277e-06, 7.2035e-07, 1.7894e-06,\n 6.8101e-07, 7.7902e-06, 1.4491e-07, 1.7492e-06, 9.3984e-07, 1.2646e-07,\n 3.5874e-07, 1.4109e-06], device='cuda:0')" }, "4": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-4.7434e-07, 5.5272e-06, -2.1584e-05, ..., -1.4595e-17,\n 2.2460e-06, -1.8307e-05],\n [-9.1374e-06, 2.8713e-05, 4.3151e-05, ..., 7.6944e-18,\n -3.3722e-07, -5.5412e-05],\n [-3.6589e-05, 3.7703e-06, 1.7371e-05, ..., 4.0278e-19,\n -9.7063e-06, 7.0266e-06],\n ...,\n [-3.3800e-05, -5.3896e-06, -2.0663e-05, ..., 3.0230e-17,\n -4.1607e-06, 6.1071e-06],\n [-9.1887e-05, -2.6352e-05, 3.2071e-05, ..., -1.9443e-17,\n -1.3118e-05, -3.5037e-05],\n [ 1.0257e-05, 2.1846e-05, -4.8494e-05, ..., -3.1201e-18,\n -5.1401e-07, 3.9305e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.4190e-08, 8.3515e-09, 7.6031e-09, ..., 2.8315e-11, 9.6867e-10,\n 1.2486e-08],\n [2.6961e-08, 2.8445e-08, 2.8901e-08, ..., 4.1825e-12, 1.2572e-09,\n 3.0186e-08],\n [1.7654e-08, 2.1029e-08, 1.4906e-08, ..., 5.9217e-11, 2.5814e-09,\n 1.4448e-08],\n ...,\n [9.6995e-08, 1.8377e-08, 2.6906e-08, ..., 4.8164e-11, 6.2385e-09,\n 2.2655e-08],\n [2.8616e-08, 2.5150e-08, 2.9667e-08, ..., 2.8299e-11, 1.1291e-08,\n 2.6722e-08],\n [3.0176e-08, 2.6521e-08, 3.6977e-08, ..., 2.5931e-10, 1.0656e-09,\n 2.9561e-08]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-4.7321e-06, -2.1011e-05, 4.2604e-06, ..., -2.2388e-06,\n 2.4603e-05, 1.2164e-05],\n [ 1.4467e-05, 2.0076e-05, 8.7436e-06, ..., 5.2824e-07,\n -2.5103e-06, 4.4747e-05],\n [-2.2753e-06, -1.6327e-05, 9.4067e-06, ..., -1.4927e-06,\n -9.3528e-06, -1.1774e-05],\n ...,\n [-4.6699e-06, -1.7926e-05, 3.0818e-05, ..., -1.1351e-07,\n 1.7310e-05, 3.3637e-05],\n [-5.1734e-06, -2.5513e-05, -4.5289e-06, ..., 7.2830e-07,\n -2.8206e-05, 3.5093e-05],\n [-3.4571e-06, -3.5165e-06, 1.6682e-05, ..., 5.2553e-07,\n 1.2418e-05, -6.9892e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.3919e-09, 4.3173e-09, 4.1285e-09, ..., 1.5485e-11, 1.0645e-09,\n 7.8018e-09],\n [1.5999e-08, 1.4190e-08, 1.5870e-08, ..., 2.2582e-11, 1.5371e-09,\n 1.7056e-08],\n [1.0366e-08, 1.1089e-08, 7.6525e-09, ..., 3.2699e-11, 2.8370e-09,\n 8.3681e-09],\n ...,\n [6.4379e-08, 9.4463e-09, 1.6002e-08, ..., 2.7769e-11, 6.0283e-09,\n 1.3994e-08],\n [1.6412e-08, 1.1209e-08, 1.4943e-08, ..., 2.6561e-11, 1.0656e-08,\n 1.8085e-08],\n [1.8190e-08, 1.3872e-08, 2.2112e-08, ..., 9.0720e-11, 1.4063e-09,\n 1.9662e-08]], device='cuda:0')" }, "5": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[ 3.0101e-06, 2.5657e-06, 4.0959e-06, ..., -6.9569e-07,\n 0.0000e+00, 3.3073e-07],\n [ 5.6511e-06, 8.4860e-07, -2.4813e-07, ..., -3.9613e-06,\n 0.0000e+00, 2.0307e-06],\n [ 6.1239e-07, -5.1405e-07, 3.1662e-06, ..., 4.7834e-06,\n 0.0000e+00, 1.1445e-05],\n ...,\n [-3.1599e-07, -1.0996e-06, 2.2961e-08, ..., -1.9261e-07,\n 0.0000e+00, 6.9726e-08],\n [-1.7232e-07, 2.9206e-07, 4.6680e-06, ..., 2.7917e-06,\n 0.0000e+00, 1.3129e-05],\n [-7.0413e-07, -4.6396e-06, -1.0318e-06, ..., 7.6837e-07,\n 0.0000e+00, -5.4133e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1560e-10, 9.4684e-11, 6.5009e-11, ..., 9.2650e-11, 0.0000e+00,\n 7.3883e-11],\n [2.5016e-10, 3.4571e-11, 1.1843e-10, ..., 1.0969e-10, 0.0000e+00,\n 8.7191e-11],\n [1.7990e-10, 7.6386e-11, 2.0102e-10, ..., 3.0328e-10, 0.0000e+00,\n 4.6720e-10],\n ...,\n [3.0435e-10, 3.7310e-11, 6.5483e-11, ..., 4.5357e-11, 0.0000e+00,\n 5.3669e-11],\n [2.1396e-11, 3.7439e-11, 3.4767e-10, ..., 3.8044e-10, 0.0000e+00,\n 1.3308e-10],\n [1.5595e-10, 3.4240e-11, 8.3075e-11, ..., 6.8885e-10, 0.0000e+00,\n 1.1229e-10]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-4.7012e-07, -7.3777e-07, -2.0913e-08, ..., 2.9709e-06,\n 0.0000e+00, 1.8065e-07],\n [ 4.7888e-07, -5.2783e-07, -5.4394e-06, ..., 7.1456e-07,\n 0.0000e+00, -5.7333e-07],\n [-1.7245e-06, 2.8285e-08, -1.2192e-06, ..., -1.1390e-06,\n 0.0000e+00, 1.1182e-06],\n ...,\n [ 9.1075e-07, -2.6976e-09, -5.1482e-07, ..., 1.2119e-06,\n 0.0000e+00, -1.2295e-06],\n [ 1.7053e-07, 1.9637e-07, 5.5900e-06, ..., 1.5029e-07,\n 0.0000e+00, 2.1391e-06],\n [-1.5078e-05, -5.7391e-07, -2.3026e-06, ..., 4.9890e-06,\n 0.0000e+00, 1.3733e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.9760e-11, 3.4901e-11, 3.4804e-11, ..., 3.4289e-11, 0.0000e+00,\n 3.9011e-11],\n [1.1854e-10, 1.9802e-11, 6.0283e-11, ..., 7.4011e-11, 0.0000e+00,\n 5.0569e-11],\n [8.2026e-11, 3.9707e-11, 9.6103e-11, ..., 1.7762e-10, 0.0000e+00,\n 2.5030e-10],\n ...,\n [1.2526e-10, 2.7859e-11, 3.1889e-11, ..., 2.1602e-11, 0.0000e+00,\n 2.8700e-11],\n [8.7372e-12, 1.6425e-11, 2.0967e-10, ..., 2.2563e-10, 0.0000e+00,\n 5.9174e-11],\n [7.4851e-11, 2.1077e-11, 4.6558e-11, ..., 5.8076e-10, 0.0000e+00,\n 8.4299e-11]], device='cuda:0')" }, "6": { - "step": "tensor(1252.)", - "exp_avg": "tensor([-1.2679e-05, 1.8230e-05, 9.1510e-05, ..., -1.5824e-05,\n 5.8570e-05, -9.8966e-06], device='cuda:0')", - "exp_avg_sq": "tensor([3.2873e-08, 1.9230e-08, 2.6594e-08, ..., 3.9394e-08, 1.5814e-08,\n 1.3749e-08], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([-3.8286e-05, -5.1168e-05, -1.8607e-05, ..., 4.4341e-06,\n 1.3603e-05, -7.9370e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.4733e-08, 9.8558e-09, 1.2989e-08, ..., 1.3383e-08, 8.0348e-09,\n 8.1342e-09], device='cuda:0')" }, "7": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-4.1048e-06, -9.7021e-07, 2.0351e-06, ..., -1.1260e-07,\n 3.7870e-06, 1.1030e-07],\n [-4.0283e-06, 8.0687e-07, 3.4043e-06, ..., -1.3372e-06,\n -1.7005e-06, -1.7600e-07],\n [ 3.8263e-06, 1.5942e-06, -8.8412e-07, ..., 4.4283e-07,\n -1.4096e-07, 2.6081e-06],\n ...,\n [ 1.7094e-06, 5.9732e-08, 5.5631e-06, ..., 1.1461e-06,\n 4.7124e-06, -4.2240e-07],\n [ 2.2764e-06, -1.9859e-06, -1.4960e-06, ..., -1.7104e-07,\n -5.4290e-07, 1.2185e-06],\n [ 2.7370e-06, 1.0855e-06, 3.2234e-08, ..., -4.4717e-06,\n -1.3623e-06, -3.7549e-08]], device='cuda:0')", - "exp_avg_sq": "tensor([[9.5813e-11, 3.9480e-11, 5.4657e-11, ..., 1.5098e-10, 5.0947e-11,\n 9.6082e-11],\n [8.6957e-11, 6.1018e-11, 1.0126e-10, ..., 2.3654e-10, 5.3615e-11,\n 7.7247e-11],\n [1.1279e-10, 8.1611e-11, 7.4024e-11, ..., 1.7155e-10, 4.5052e-11,\n 7.5932e-11],\n ...,\n [1.5090e-10, 9.9205e-11, 1.4221e-10, ..., 1.5250e-10, 6.8065e-11,\n 1.0326e-10],\n [1.3339e-10, 1.1160e-10, 1.0131e-10, ..., 1.3085e-10, 7.5745e-11,\n 9.7685e-11],\n [1.1225e-10, 8.5538e-11, 9.5493e-11, ..., 1.1275e-10, 8.4256e-11,\n 1.4121e-10]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[ 3.8474e-06, -1.9643e-06, 3.9881e-07, ..., -4.5422e-07,\n -4.6485e-08, -1.4101e-06],\n [-2.6359e-06, 5.0941e-06, 1.4407e-07, ..., -6.3996e-07,\n 1.3961e-06, -9.6175e-07],\n [-1.4232e-06, -9.3329e-07, 8.8541e-07, ..., 5.8504e-07,\n 1.7551e-06, 1.6989e-06],\n ...,\n [ 2.4495e-06, -2.7868e-06, 1.0619e-06, ..., 2.1478e-07,\n 1.1004e-07, 8.4253e-07],\n [-3.4984e-06, -2.7277e-06, -6.0601e-07, ..., 2.0281e-07,\n 2.2607e-06, -1.9553e-06],\n [ 2.9739e-06, -5.3207e-06, -1.2438e-06, ..., -1.8337e-06,\n 6.6011e-07, 1.5386e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.5035e-11, 1.9958e-11, 2.5617e-11, ..., 4.8452e-11, 2.2268e-11,\n 3.5659e-11],\n [4.7549e-11, 3.3696e-11, 4.4745e-11, ..., 8.3269e-11, 3.1257e-11,\n 3.8746e-11],\n [5.3500e-11, 4.2032e-11, 3.7786e-11, ..., 6.0638e-11, 2.6572e-11,\n 4.4568e-11],\n ...,\n [6.2969e-11, 5.7636e-11, 6.5114e-11, ..., 5.8026e-11, 3.9667e-11,\n 4.7584e-11],\n [5.7187e-11, 4.8551e-11, 4.6471e-11, ..., 4.7439e-11, 3.5216e-11,\n 4.8510e-11],\n [6.0510e-11, 4.4427e-11, 4.9550e-11, ..., 1.1570e-10, 3.9470e-11,\n 5.3078e-11]], device='cuda:0')" }, "32": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.0205e-05], device='cuda:0')" + "exp_avg_sq": "tensor([2.9162e-06], device='cuda:0')" }, "33": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.2899e-08, 4.5158e-07, 1.3751e-07], device='cuda:0')" + "exp_avg_sq": "tensor([2.6547e-08, 1.2904e-07, 3.9296e-08], device='cuda:0')" }, "34": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([5.5438e-04, 5.0262e-06, 6.7886e-06, 7.0446e-06, 8.0768e-06, 9.2780e-06,\n 7.3764e-06, 5.4626e-06, 6.8998e-06, 6.1315e-06], device='cuda:0')" + "exp_avg_sq": "tensor([1.5842e-04, 1.4363e-06, 1.9399e-06, 2.0131e-06, 2.3080e-06, 2.6513e-06,\n 2.1079e-06, 1.5610e-06, 1.9717e-06, 1.7521e-06], device='cuda:0')" }, "36": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[8.5617e-13, 3.8580e-13, 5.7321e-13, ..., 2.5057e-12, 0.0000e+00,\n 8.4567e-13],\n [1.5412e-13, 3.7260e-13, 1.1207e-12, ..., 1.2390e-12, 0.0000e+00,\n 4.9739e-13],\n [1.3831e-10, 3.2724e-11, 1.8183e-09, ..., 5.8295e-10, 0.0000e+00,\n 1.6266e-09],\n ...,\n [3.8547e-11, 1.6031e-12, 3.0360e-10, ..., 1.6942e-10, 0.0000e+00,\n 4.5586e-10],\n [1.0390e-12, 3.7387e-13, 1.0988e-13, ..., 4.0797e-12, 0.0000e+00,\n 2.1036e-12],\n [3.5053e-14, 6.5772e-13, 8.5922e-14, ..., 2.2030e-13, 0.0000e+00,\n 8.9740e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[2.4466e-13, 1.1025e-13, 1.6380e-13, ..., 7.1602e-13, 0.0000e+00,\n 2.4166e-13],\n [4.4041e-14, 1.0647e-13, 3.2025e-13, ..., 3.5405e-13, 0.0000e+00,\n 1.4213e-13],\n [3.9524e-11, 9.3513e-12, 5.1959e-10, ..., 1.6658e-10, 0.0000e+00,\n 4.6482e-10],\n ...,\n [1.1015e-11, 4.5811e-13, 8.6757e-11, ..., 4.8414e-11, 0.0000e+00,\n 1.3027e-10],\n [2.9690e-13, 1.0684e-13, 3.1399e-14, ..., 1.1658e-12, 0.0000e+00,\n 6.0113e-13],\n [1.0017e-14, 1.8795e-13, 2.4553e-14, ..., 6.2953e-14, 0.0000e+00,\n 2.5644e-13]], device='cuda:0')" }, "37": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.5205e-09, 5.2476e-10, 1.9788e-06, 2.6257e-09, 9.5513e-07, 3.2895e-07,\n 6.2935e-08, 7.2219e-08, 1.8250e-06, 6.6781e-09, 9.2785e-07, 1.1290e-07,\n 1.0008e-09, 2.0875e-08, 1.4738e-10, 1.0586e-08, 1.5102e-07, 1.7103e-07,\n 1.1058e-08, 1.3280e-08, 1.6198e-07, 7.4729e-09, 1.1264e-07, 6.7097e-08,\n 5.6708e-08, 5.6498e-07, 1.0739e-07, 9.3509e-08, 7.1160e-08, 1.4183e-06,\n 5.2107e-07, 3.2244e-07, 3.7562e-06, 6.1085e-09, 5.0327e-08, 1.1257e-08,\n 1.5649e-07, 6.9550e-09, 6.3168e-07, 1.6684e-08, 6.2554e-07, 1.0668e-08,\n 3.3229e-08, 5.9226e-07, 9.8328e-10, 3.5888e-08, 5.5521e-08, 5.5358e-08,\n 1.1306e-08, 3.9873e-07, 1.3778e-07, 1.4774e-07, 9.8972e-07, 6.4728e-08,\n 1.0997e-08, 7.9250e-08, 1.5804e-11, 3.8596e-08, 1.7001e-07, 1.6666e-06,\n 2.0548e-07, 1.8230e-08, 5.8109e-10, 6.2061e-08, 7.0521e-08, 9.3277e-07,\n 8.5117e-09, 6.2495e-08, 2.0511e-08, 1.4336e-07, 8.7757e-08, 7.6543e-08,\n 2.3618e-08, 3.4935e-07, 4.9107e-07, 2.7730e-07, 3.3645e-07, 8.1236e-08,\n 2.4295e-08, 6.7176e-10, 8.3200e-08, 3.2324e-08, 8.3135e-09, 4.6814e-08,\n 6.5912e-08, 2.0713e-10, 6.8789e-07, 1.1038e-07, 2.0139e-08, 9.0013e-10,\n 6.4538e-09, 6.5871e-08, 3.7654e-07, 5.8949e-07, 5.5232e-07, 4.1942e-08,\n 4.9454e-07, 2.0732e-07, 1.4283e-07, 2.0586e-08, 6.0639e-08, 2.5450e-09,\n 1.0838e-08, 6.4979e-07, 2.6111e-07, 4.5610e-07, 1.4863e-07, 1.4253e-09,\n 6.1144e-07, 5.1083e-08, 8.8646e-08, 1.2258e-08, 1.2984e-07, 6.3116e-08,\n 5.8429e-08, 2.7626e-06, 2.0175e-09, 9.3236e-08, 1.7960e-08, 8.6325e-07,\n 8.7920e-07, 1.1998e-07, 2.7457e-07, 1.0817e-08, 1.3559e-08, 2.3689e-07,\n 1.2796e-07, 1.4455e-07, 1.9353e-08, 4.5338e-07, 3.5780e-10, 1.0223e-08,\n 2.4646e-06, 2.2470e-07, 4.2334e-09, 4.8762e-08, 1.5270e-07, 6.0775e-08,\n 1.0001e-06, 1.9263e-06, 4.5023e-08, 1.3929e-07, 1.1827e-08, 1.9517e-09,\n 2.1555e-08, 9.7660e-08, 1.0275e-07, 1.9374e-07, 7.3002e-09, 4.1298e-07,\n 3.6959e-09, 1.0168e-08, 3.3923e-07, 1.5128e-07, 1.2772e-07, 7.3781e-07,\n 1.0652e-08, 5.6608e-07, 4.9001e-09, 2.6453e-08, 6.3476e-10, 4.5639e-07,\n 4.2358e-10, 3.4460e-08, 5.8766e-07, 3.3355e-07, 1.5710e-08, 2.0631e-07,\n 1.7561e-09, 1.0282e-08, 4.4072e-08, 2.4453e-07, 5.8039e-07, 2.0436e-07,\n 1.2344e-07, 4.2002e-08, 6.0749e-07, 7.3601e-07, 1.7637e-09, 1.3493e-10,\n 1.3402e-09, 2.1931e-07, 1.7672e-08, 6.6855e-09, 2.2969e-08, 8.5756e-07,\n 5.3387e-08, 1.0521e-09, 1.9118e-08, 3.1670e-08, 3.7381e-07, 2.2098e-09,\n 3.4964e-07, 6.9833e-07, 2.0766e-07, 5.6554e-08, 1.6939e-08, 1.9129e-07,\n 5.1738e-08, 2.1650e-07, 5.7837e-08, 3.4784e-09, 1.8107e-06, 3.5660e-09,\n 1.2412e-07, 2.3246e-10, 1.3995e-07, 4.6555e-07, 1.2239e-09, 3.1817e-08,\n 5.3514e-07, 3.2345e-08, 2.4670e-07, 1.6605e-06, 1.5308e-06, 1.6875e-07,\n 2.5189e-09, 7.6853e-08, 4.4765e-07, 9.2023e-10, 1.0565e-07, 1.8615e-06,\n 5.7766e-08, 1.0114e-07, 3.3489e-07, 4.3080e-08, 1.6483e-08, 1.5048e-07,\n 9.0664e-08, 7.5595e-08, 4.2811e-08, 1.7689e-07, 7.9431e-08, 4.2311e-08,\n 1.8373e-07, 2.9094e-08, 5.8919e-08, 7.3794e-08, 1.7942e-07, 3.1607e-09,\n 8.8820e-07, 3.0696e-08, 8.3345e-09, 3.0296e-07, 1.0395e-07, 1.5553e-09,\n 1.6195e-07, 1.0773e-06, 8.0162e-07, 1.1748e-06, 3.9445e-08, 1.6555e-07,\n 1.2588e-07, 5.4843e-07, 4.3440e-09, 5.4639e-10], device='cuda:0')" + "exp_avg_sq": "tensor([4.3449e-10, 1.4995e-10, 5.6545e-07, 7.5030e-10, 2.7294e-07, 9.4000e-08,\n 1.7984e-08, 2.0637e-08, 5.2152e-07, 1.9083e-09, 2.6514e-07, 3.2263e-08,\n 2.8600e-10, 5.9653e-09, 4.2114e-11, 3.0251e-09, 4.3156e-08, 4.8874e-08,\n 3.1599e-09, 3.7950e-09, 4.6286e-08, 2.1354e-09, 3.2188e-08, 1.9173e-08,\n 1.6205e-08, 1.6145e-07, 3.0688e-08, 2.6721e-08, 2.0334e-08, 4.0529e-07,\n 1.4890e-07, 9.2141e-08, 1.0734e-06, 1.7456e-09, 1.4381e-08, 3.2167e-09,\n 4.4719e-08, 1.9875e-09, 1.8051e-07, 4.7677e-09, 1.7875e-07, 3.0483e-09,\n 9.4954e-09, 1.6924e-07, 2.8098e-10, 1.0255e-08, 1.5865e-08, 1.5819e-08,\n 3.2308e-09, 1.1394e-07, 3.9371e-08, 4.2218e-08, 2.8282e-07, 1.8497e-08,\n 3.1426e-09, 2.2646e-08, 4.5160e-12, 1.1029e-08, 4.8581e-08, 4.7625e-07,\n 5.8718e-08, 5.2094e-09, 1.6605e-10, 1.7734e-08, 2.0152e-08, 2.6655e-07,\n 2.4323e-09, 1.7859e-08, 5.8611e-09, 4.0966e-08, 2.5077e-08, 2.1873e-08,\n 6.7490e-09, 9.9829e-08, 1.4033e-07, 7.9241e-08, 9.6144e-08, 2.3214e-08,\n 6.9424e-09, 1.9196e-10, 2.3775e-08, 9.2369e-09, 2.3756e-09, 1.3377e-08,\n 1.8835e-08, 5.9188e-11, 1.9657e-07, 3.1543e-08, 5.7549e-09, 2.5722e-10,\n 1.8442e-09, 1.8823e-08, 1.0760e-07, 1.6845e-07, 1.5783e-07, 1.1985e-08,\n 1.4132e-07, 5.9245e-08, 4.0814e-08, 5.8825e-09, 1.7328e-08, 7.2725e-10,\n 3.0969e-09, 1.8568e-07, 7.4615e-08, 1.3033e-07, 4.2473e-08, 4.0730e-10,\n 1.7472e-07, 1.4597e-08, 2.5331e-08, 3.5029e-09, 3.7103e-08, 1.8036e-08,\n 1.6697e-08, 7.8944e-07, 5.7653e-10, 2.6643e-08, 5.1322e-09, 2.4668e-07,\n 2.5124e-07, 3.4285e-08, 7.8462e-08, 3.0910e-09, 3.8747e-09, 6.7693e-08,\n 3.6566e-08, 4.1307e-08, 5.5303e-09, 1.2956e-07, 1.0224e-10, 2.9212e-09,\n 7.0427e-07, 6.4210e-08, 1.2097e-09, 1.3934e-08, 4.3636e-08, 1.7367e-08,\n 2.8578e-07, 5.5045e-07, 1.2866e-08, 3.9804e-08, 3.3798e-09, 5.5772e-10,\n 6.1596e-09, 2.7907e-08, 2.9360e-08, 5.5362e-08, 2.0861e-09, 1.1801e-07,\n 1.0561e-09, 2.9056e-09, 9.6938e-08, 4.3230e-08, 3.6498e-08, 2.1084e-07,\n 3.0440e-09, 1.6176e-07, 1.4002e-09, 7.5592e-09, 1.8139e-10, 1.3042e-07,\n 1.2104e-10, 9.8472e-09, 1.6793e-07, 9.5314e-08, 4.4893e-09, 5.8954e-08,\n 5.0181e-10, 2.9382e-09, 1.2594e-08, 6.9876e-08, 1.6585e-07, 5.8398e-08,\n 3.5273e-08, 1.2002e-08, 1.7360e-07, 2.1032e-07, 5.0398e-10, 3.8558e-11,\n 3.8297e-10, 6.2670e-08, 5.0500e-09, 1.9104e-09, 6.5636e-09, 2.4505e-07,\n 1.5256e-08, 3.0066e-10, 5.4633e-09, 9.0498e-09, 1.0682e-07, 6.3146e-10,\n 9.9912e-08, 1.9955e-07, 5.9340e-08, 1.6161e-08, 4.8405e-09, 5.4663e-08,\n 1.4785e-08, 6.1865e-08, 1.6527e-08, 9.9398e-10, 5.1742e-07, 1.0190e-09,\n 3.5468e-08, 6.6426e-11, 3.9993e-08, 1.3303e-07, 3.4973e-10, 9.0920e-09,\n 1.5292e-07, 9.2427e-09, 7.0496e-08, 4.7450e-07, 4.3742e-07, 4.8221e-08,\n 7.1978e-10, 2.1961e-08, 1.2792e-07, 2.6296e-10, 3.0190e-08, 5.3195e-07,\n 1.6507e-08, 2.8903e-08, 9.5696e-08, 1.2311e-08, 4.7102e-09, 4.3001e-08,\n 2.5908e-08, 2.1602e-08, 1.2233e-08, 5.0547e-08, 2.2698e-08, 1.2091e-08,\n 5.2502e-08, 8.3139e-09, 1.6837e-08, 2.1087e-08, 5.1270e-08, 9.0318e-10,\n 2.5381e-07, 8.7717e-09, 2.3816e-09, 8.6574e-08, 2.9703e-08, 4.4443e-10,\n 4.6280e-08, 3.0786e-07, 2.2907e-07, 3.3571e-07, 1.1272e-08, 4.7306e-08,\n 3.5970e-08, 1.5672e-07, 1.2413e-09, 1.5613e-10], device='cuda:0')" }, "38": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.4157e-12, 3.9175e-15, 3.8014e-09, 1.2986e-11, 2.0221e-09, 6.3644e-10,\n 1.1092e-10, 1.6195e-10, 4.2003e-09, 1.6614e-11, 3.3201e-09, 1.9342e-10,\n 6.0522e-12, 2.6083e-11, 4.7363e-13, 3.8343e-11, 3.3533e-10, 4.7056e-10,\n 4.0119e-11, 2.8996e-11, 2.6340e-10, 1.8367e-11, 1.4920e-10, 1.1537e-10,\n 1.0618e-10, 7.0336e-10, 1.6471e-10, 1.6737e-10, 4.0719e-10, 4.1164e-09,\n 6.4315e-10, 5.6599e-10, 7.0984e-09, 8.9658e-12, 8.7550e-11, 4.2654e-11,\n 1.9901e-10, 2.0929e-11, 1.6008e-09, 2.3433e-11, 1.7631e-09, 3.6338e-11,\n 9.7853e-11, 7.8672e-10, 9.8603e-13, 3.9510e-11, 8.7327e-11, 7.9479e-11,\n 3.0895e-11, 9.4936e-10, 1.8199e-10, 3.8837e-10, 2.6778e-09, 9.1703e-11,\n 1.7982e-11, 1.1757e-10, 9.5042e-13, 4.9615e-11, 2.3878e-10, 4.8454e-09,\n 3.3764e-10, 2.8562e-11, 5.7210e-15, 1.3884e-10, 1.2678e-10, 2.5839e-09,\n 2.1543e-11, 8.6786e-11, 4.1759e-11, 1.8520e-10, 1.4771e-10, 1.3911e-10,\n 2.0522e-11, 7.0095e-10, 8.4641e-10, 4.2222e-10, 9.3172e-10, 1.2408e-10,\n 6.4008e-11, 1.7457e-13, 1.4692e-10, 4.6667e-11, 1.6082e-11, 5.8736e-11,\n 9.6476e-11, 6.1533e-13, 1.0480e-09, 2.6875e-10, 4.5418e-11, 1.6203e-13,\n 1.5658e-11, 1.1045e-10, 8.4748e-10, 1.6157e-09, 6.6712e-10, 5.2171e-11,\n 6.9246e-10, 3.0430e-10, 2.0328e-10, 9.0462e-11, 8.4162e-11, 3.6697e-12,\n 1.8055e-11, 1.2998e-09, 7.5954e-10, 8.8037e-10, 2.1494e-10, 4.8062e-12,\n 2.1931e-09, 9.3395e-11, 1.9854e-10, 2.5918e-11, 2.0794e-10, 7.9095e-11,\n 1.0910e-10, 7.1152e-09, 4.8052e-12, 9.4377e-11, 9.4015e-11, 2.7182e-09,\n 1.5178e-09, 1.6508e-10, 3.4776e-10, 9.2241e-11, 3.8677e-11, 5.0806e-10,\n 2.6096e-10, 3.4038e-10, 3.7559e-11, 1.4779e-09, 5.9446e-12, 2.0393e-11,\n 6.6907e-09, 2.0418e-10, 6.0045e-12, 6.6667e-11, 5.2233e-10, 8.6762e-11,\n 2.8422e-09, 6.1952e-09, 8.9529e-11, 2.9690e-10, 4.5075e-11, 8.2334e-12,\n 3.6488e-11, 1.4942e-10, 2.0272e-10, 3.6326e-10, 2.7427e-11, 1.4682e-09,\n 5.7553e-12, 1.1859e-11, 7.2607e-10, 3.0026e-10, 2.7933e-10, 1.3090e-09,\n 1.5469e-11, 6.5654e-10, 7.1626e-12, 3.9689e-11, 2.4035e-12, 7.5481e-10,\n 1.6289e-12, 7.8572e-11, 1.0086e-09, 5.2377e-10, 1.9441e-11, 4.3057e-10,\n 6.8905e-12, 3.4349e-11, 4.1038e-11, 1.4303e-09, 1.4340e-09, 6.0753e-10,\n 3.5909e-10, 9.7467e-11, 1.1975e-09, 1.8871e-09, 7.5118e-12, 1.1516e-13,\n 2.8150e-12, 6.0070e-10, 5.4452e-11, 1.2162e-11, 8.0961e-11, 1.1150e-09,\n 8.8337e-11, 2.2162e-12, 3.7477e-11, 5.6633e-11, 8.3693e-10, 1.4018e-11,\n 6.5256e-10, 2.5398e-09, 3.0423e-10, 1.6468e-10, 3.0411e-11, 3.2888e-10,\n 7.3046e-11, 4.0883e-10, 9.4185e-11, 2.5207e-11, 2.9326e-09, 1.2155e-11,\n 1.9035e-10, 4.4863e-13, 3.2079e-10, 1.0658e-09, 4.1982e-12, 3.5389e-11,\n 9.4323e-10, 4.3934e-11, 4.2428e-10, 3.0721e-09, 2.4557e-09, 3.1735e-10,\n 4.2134e-12, 1.2434e-10, 7.3293e-10, 8.1065e-13, 1.8698e-10, 3.1921e-09,\n 1.3736e-10, 1.8609e-10, 4.7737e-10, 9.8716e-11, 3.8515e-11, 2.4644e-10,\n 9.6719e-11, 1.7344e-10, 8.1548e-11, 4.8666e-10, 1.1880e-10, 7.7604e-11,\n 2.8153e-10, 6.4333e-11, 7.4233e-11, 1.0618e-10, 6.2621e-10, 6.7338e-12,\n 2.4572e-09, 4.9382e-11, 8.0881e-11, 3.2516e-10, 1.4567e-10, 1.1589e-11,\n 3.4705e-10, 3.3458e-09, 1.3086e-09, 4.1087e-09, 6.4370e-11, 3.4161e-10,\n 2.0284e-10, 9.4924e-10, 7.5082e-12, 2.0389e-12], device='cuda:0')" + "exp_avg_sq": "tensor([9.7608e-13, 1.1194e-15, 1.0863e-09, 3.7109e-12, 5.7782e-10, 1.8187e-10,\n 3.1695e-11, 4.6279e-11, 1.2003e-09, 4.7476e-12, 9.4873e-10, 5.5272e-11,\n 1.7295e-12, 7.4535e-12, 1.3534e-13, 1.0957e-11, 9.5824e-11, 1.3447e-10,\n 1.1464e-11, 8.2857e-12, 7.5268e-11, 5.2486e-12, 4.2635e-11, 3.2969e-11,\n 3.0343e-11, 2.0099e-10, 4.7067e-11, 4.7828e-11, 1.1636e-10, 1.1763e-09,\n 1.8379e-10, 1.6174e-10, 2.0284e-09, 2.5620e-12, 2.5018e-11, 1.2189e-11,\n 5.6868e-11, 5.9805e-12, 4.5743e-10, 6.6962e-12, 5.0382e-10, 1.0384e-11,\n 2.7962e-11, 2.2481e-10, 2.8177e-13, 1.1290e-11, 2.4955e-11, 2.2712e-11,\n 8.8284e-12, 2.7129e-10, 5.2006e-11, 1.1098e-10, 7.6520e-10, 2.6205e-11,\n 5.1385e-12, 3.3596e-11, 2.7159e-13, 1.4178e-11, 6.8232e-11, 1.3846e-09,\n 9.6483e-11, 8.1619e-12, 1.6348e-15, 3.9674e-11, 3.6228e-11, 7.3838e-10,\n 6.1560e-12, 2.4800e-11, 1.1933e-11, 5.2921e-11, 4.2210e-11, 3.9752e-11,\n 5.8645e-12, 2.0030e-10, 2.4187e-10, 1.2065e-10, 2.6624e-10, 3.5456e-11,\n 1.8291e-11, 4.9886e-14, 4.1983e-11, 1.3335e-11, 4.5955e-12, 1.6784e-11,\n 2.7569e-11, 1.7583e-13, 2.9947e-10, 7.6798e-11, 1.2979e-11, 4.6303e-14,\n 4.4743e-12, 3.1561e-11, 2.4217e-10, 4.6169e-10, 1.9063e-10, 1.4908e-11,\n 1.9788e-10, 8.6956e-11, 5.8089e-11, 2.5850e-11, 2.4050e-11, 1.0486e-12,\n 5.1594e-12, 3.7143e-10, 2.1704e-10, 2.5157e-10, 6.1422e-11, 1.3734e-12,\n 6.2670e-10, 2.6688e-11, 5.6735e-11, 7.4063e-12, 5.9422e-11, 2.2602e-11,\n 3.1177e-11, 2.0332e-09, 1.3731e-12, 2.6969e-11, 2.6865e-11, 7.7675e-10,\n 4.3372e-10, 4.7172e-11, 9.9376e-11, 2.6359e-11, 1.1052e-11, 1.4518e-10,\n 7.4572e-11, 9.7267e-11, 1.0733e-11, 4.2233e-10, 1.6987e-12, 5.8276e-12,\n 1.9119e-09, 5.8346e-11, 1.7158e-12, 1.9051e-11, 1.4926e-10, 2.4793e-11,\n 8.1218e-10, 1.7703e-09, 2.5584e-11, 8.4842e-11, 1.2880e-11, 2.3528e-12,\n 1.0427e-11, 4.2697e-11, 5.7930e-11, 1.0380e-10, 7.8375e-12, 4.1956e-10,\n 1.6446e-12, 3.3888e-12, 2.0748e-10, 8.5803e-11, 7.9821e-11, 3.7406e-10,\n 4.4205e-12, 1.8761e-10, 2.0468e-12, 1.1341e-11, 6.8681e-13, 2.1569e-10,\n 4.6548e-13, 2.2453e-11, 2.8821e-10, 1.4967e-10, 5.5556e-12, 1.2304e-10,\n 1.9690e-12, 9.8156e-12, 1.1727e-11, 4.0871e-10, 4.0978e-10, 1.7361e-10,\n 1.0261e-10, 2.7852e-11, 3.4219e-10, 5.3926e-10, 2.1466e-12, 3.2907e-14,\n 8.0440e-13, 1.7165e-10, 1.5560e-11, 3.4755e-12, 2.3135e-11, 3.1862e-10,\n 2.5243e-11, 6.3331e-13, 1.0709e-11, 1.6183e-11, 2.3916e-10, 4.0058e-12,\n 1.8647e-10, 7.2576e-10, 8.6935e-11, 4.7059e-11, 8.6901e-12, 9.3981e-11,\n 2.0874e-11, 1.1683e-10, 2.6914e-11, 7.2031e-12, 8.3801e-10, 3.4734e-12,\n 5.4395e-11, 1.2820e-13, 9.1668e-11, 3.0457e-10, 1.1997e-12, 1.0113e-11,\n 2.6954e-10, 1.2555e-11, 1.2124e-10, 8.7787e-10, 7.0173e-10, 9.0684e-11,\n 1.2040e-12, 3.5532e-11, 2.0944e-10, 2.3165e-13, 5.3430e-11, 9.1217e-10,\n 3.9251e-11, 5.3177e-11, 1.3641e-10, 2.8209e-11, 1.1006e-11, 7.0422e-11,\n 2.7638e-11, 4.9561e-11, 2.3303e-11, 1.3907e-10, 3.3947e-11, 2.2176e-11,\n 8.0450e-11, 1.8384e-11, 2.1213e-11, 3.0342e-11, 1.7894e-10, 1.9242e-12,\n 7.0216e-10, 1.4111e-11, 2.3112e-11, 9.2916e-11, 4.1625e-11, 3.3116e-12,\n 9.9173e-11, 9.5607e-10, 3.7395e-10, 1.1741e-09, 1.8394e-11, 9.7617e-11,\n 5.7963e-11, 2.7125e-10, 2.1455e-12, 5.8264e-13], device='cuda:0')" }, "39": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([6.9625e-12, 2.6584e-13, 5.4413e-09, 1.1720e-11, 2.4811e-09, 8.3259e-10,\n 1.9729e-10, 2.1200e-10, 5.0888e-09, 3.5403e-11, 2.6697e-09, 3.2240e-10,\n 9.5191e-12, 6.3828e-11, 1.9153e-12, 4.8473e-11, 4.5348e-10, 5.1036e-10,\n 3.5939e-11, 4.6322e-11, 4.6564e-10, 2.5564e-11, 3.2743e-10, 2.0313e-10,\n 1.6881e-10, 1.5379e-09, 3.2514e-10, 2.5083e-10, 2.7322e-10, 3.9363e-09,\n 1.3524e-09, 8.9747e-10, 1.0097e-08, 1.5780e-11, 1.5113e-10, 4.4392e-11,\n 4.0343e-10, 3.8043e-11, 1.7132e-09, 5.5931e-11, 1.6136e-09, 4.0068e-11,\n 1.1101e-10, 1.7430e-09, 6.9943e-13, 8.1120e-11, 1.6293e-10, 1.7230e-10,\n 3.3755e-11, 1.1027e-09, 4.1043e-10, 3.4183e-10, 2.5918e-09, 1.8110e-10,\n 3.7334e-11, 2.4240e-10, 2.3300e-12, 1.0319e-10, 4.5769e-10, 4.5333e-09,\n 5.4628e-10, 5.4070e-11, 5.3420e-13, 2.0255e-10, 2.1867e-10, 2.3965e-09,\n 2.9215e-11, 1.7849e-10, 6.5387e-11, 4.1233e-10, 2.6592e-10, 2.3482e-10,\n 6.1665e-11, 1.0026e-09, 1.4129e-09, 7.6142e-10, 9.4839e-10, 2.4134e-10,\n 8.3165e-11, 1.5867e-12, 2.4586e-10, 8.5004e-11, 3.2816e-11, 1.4624e-10,\n 1.6686e-10, 2.7381e-12, 1.8644e-09, 3.3153e-10, 9.1590e-11, 1.7293e-13,\n 2.0175e-11, 1.8498e-10, 9.6604e-10, 1.5311e-09, 1.3791e-09, 9.6520e-11,\n 1.3164e-09, 6.0654e-10, 3.7163e-10, 1.0252e-10, 1.8143e-10, 8.8390e-12,\n 3.7042e-11, 1.7032e-09, 7.6663e-10, 1.0737e-09, 4.4356e-10, 8.4501e-12,\n 1.5182e-09, 1.5344e-10, 2.6895e-10, 4.6287e-11, 3.3242e-10, 1.4669e-10,\n 1.5943e-10, 7.4431e-09, 1.0765e-11, 2.5183e-10, 8.3451e-11, 2.4550e-09,\n 2.3553e-09, 3.4171e-10, 7.7141e-10, 9.5666e-11, 4.5866e-11, 7.0394e-10,\n 3.1980e-10, 4.0994e-10, 6.7436e-11, 1.2758e-09, 8.0501e-12, 3.4567e-11,\n 6.6127e-09, 5.9586e-10, 1.3141e-11, 1.5270e-10, 4.5395e-10, 1.4594e-10,\n 2.6189e-09, 5.1143e-09, 1.4644e-10, 4.2293e-10, 3.5195e-11, 1.0058e-11,\n 6.8708e-11, 2.8024e-10, 2.6205e-10, 5.4021e-10, 3.4839e-11, 1.1890e-09,\n 1.2374e-11, 2.5128e-11, 8.8740e-10, 3.7570e-10, 3.6628e-10, 2.0232e-09,\n 3.3047e-11, 1.5030e-09, 1.5486e-11, 7.3335e-11, 1.2151e-11, 1.3046e-09,\n 5.9214e-12, 1.0406e-10, 1.6584e-09, 8.6535e-10, 4.8357e-11, 5.6139e-10,\n 9.4154e-12, 3.5384e-11, 1.1043e-10, 7.5018e-10, 1.4876e-09, 6.3438e-10,\n 3.3111e-10, 1.3638e-10, 1.7152e-09, 2.0975e-09, 2.1112e-11, 4.1099e-13,\n 4.6604e-12, 5.9061e-10, 6.1238e-11, 2.3696e-11, 6.9740e-11, 2.3387e-09,\n 1.4756e-10, 4.6172e-12, 6.7654e-11, 9.5395e-11, 1.0171e-09, 1.8665e-11,\n 8.9227e-10, 1.9591e-09, 5.6243e-10, 1.7268e-10, 5.2882e-11, 4.6707e-10,\n 1.3986e-10, 6.2074e-10, 1.7294e-10, 2.8221e-11, 4.8531e-09, 2.0365e-11,\n 3.3428e-10, 5.8793e-13, 4.0158e-10, 1.2186e-09, 1.4407e-11, 1.0370e-10,\n 1.5123e-09, 7.7660e-11, 7.0503e-10, 4.3489e-09, 4.0868e-09, 4.5354e-10,\n 1.0561e-11, 2.0806e-10, 1.1926e-09, 3.0604e-12, 2.5689e-10, 5.0849e-09,\n 1.6846e-10, 2.8563e-10, 9.8041e-10, 1.0404e-10, 5.8027e-11, 4.2691e-10,\n 2.5815e-10, 2.2213e-10, 1.3868e-10, 4.7920e-10, 1.8562e-10, 1.2570e-10,\n 5.2652e-10, 9.2961e-11, 1.5038e-10, 2.1929e-10, 4.5148e-10, 1.4908e-11,\n 2.3761e-09, 9.2375e-11, 6.2388e-11, 8.7565e-10, 2.8528e-10, 1.5453e-11,\n 4.8870e-10, 2.7393e-09, 2.3294e-09, 3.3327e-09, 1.2180e-10, 4.2462e-10,\n 2.9872e-10, 1.4971e-09, 1.8397e-11, 1.9902e-12], device='cuda:0')" + "exp_avg_sq": "tensor([1.9896e-12, 7.5965e-14, 1.5549e-09, 3.3491e-12, 7.0899e-10, 2.3792e-10,\n 5.6378e-11, 6.0581e-11, 1.4542e-09, 1.0117e-11, 7.6288e-10, 9.2129e-11,\n 2.7202e-12, 1.8239e-11, 5.4731e-13, 1.3852e-11, 1.2958e-10, 1.4584e-10,\n 1.0270e-11, 1.3237e-11, 1.3306e-10, 7.3050e-12, 9.3567e-11, 5.8046e-11,\n 4.8238e-11, 4.3946e-10, 9.2911e-11, 7.1677e-11, 7.8074e-11, 1.1248e-09,\n 3.8646e-10, 2.5646e-10, 2.8853e-09, 4.5091e-12, 4.3186e-11, 1.2685e-11,\n 1.1528e-10, 1.0871e-11, 4.8956e-10, 1.5983e-11, 4.6110e-10, 1.1450e-11,\n 3.1721e-11, 4.9808e-10, 1.9987e-13, 2.3181e-11, 4.6557e-11, 4.9235e-11,\n 9.6458e-12, 3.1509e-10, 1.1728e-10, 9.7681e-11, 7.4063e-10, 5.1752e-11,\n 1.0669e-11, 6.9267e-11, 6.6581e-13, 2.9486e-11, 1.3079e-10, 1.2954e-09,\n 1.5610e-10, 1.5451e-11, 1.5265e-13, 5.7880e-11, 6.2487e-11, 6.8481e-10,\n 8.3485e-12, 5.1004e-11, 1.8685e-11, 1.1783e-10, 7.5989e-11, 6.7101e-11,\n 1.7621e-11, 2.8650e-10, 4.0375e-10, 2.1758e-10, 2.7101e-10, 6.8966e-11,\n 2.3765e-11, 4.5341e-13, 7.0256e-11, 2.4291e-11, 9.3773e-12, 4.1789e-11,\n 4.7683e-11, 7.8242e-13, 5.3276e-10, 9.4739e-11, 2.6172e-11, 4.9416e-14,\n 5.7652e-12, 5.2859e-11, 2.7605e-10, 4.3752e-10, 3.9409e-10, 2.7581e-11,\n 3.7616e-10, 1.7332e-10, 1.0620e-10, 2.9295e-11, 5.1844e-11, 2.5258e-12,\n 1.0585e-11, 4.8670e-10, 2.1907e-10, 3.0682e-10, 1.2675e-10, 2.4147e-12,\n 4.3385e-10, 4.3847e-11, 7.6855e-11, 1.3227e-11, 9.4992e-11, 4.1919e-11,\n 4.5560e-11, 2.1269e-09, 3.0762e-12, 7.1961e-11, 2.3847e-11, 7.0154e-10,\n 6.7304e-10, 9.7647e-11, 2.2043e-10, 2.7337e-11, 1.3107e-11, 2.0116e-10,\n 9.1385e-11, 1.1714e-10, 1.9270e-11, 3.6458e-10, 2.3004e-12, 9.8777e-12,\n 1.8896e-09, 1.7027e-10, 3.7551e-12, 4.3634e-11, 1.2972e-10, 4.1704e-11,\n 7.4838e-10, 1.4614e-09, 4.1845e-11, 1.2085e-10, 1.0057e-11, 2.8742e-12,\n 1.9634e-11, 8.0082e-11, 7.4882e-11, 1.5437e-10, 9.9555e-12, 3.3976e-10,\n 3.5359e-12, 7.1806e-12, 2.5358e-10, 1.0736e-10, 1.0467e-10, 5.7816e-10,\n 9.4434e-12, 4.2948e-10, 4.4254e-12, 2.0956e-11, 3.4721e-12, 3.7280e-10,\n 1.6921e-12, 2.9737e-11, 4.7389e-10, 2.4728e-10, 1.3818e-11, 1.6042e-10,\n 2.6905e-12, 1.0111e-11, 3.1557e-11, 2.1437e-10, 4.2510e-10, 1.8128e-10,\n 9.4618e-11, 3.8971e-11, 4.9013e-10, 5.9938e-10, 6.0330e-12, 1.1744e-13,\n 1.3317e-12, 1.6877e-10, 1.7499e-11, 6.7712e-12, 1.9929e-11, 6.6830e-10,\n 4.2167e-11, 1.3194e-12, 1.9333e-11, 2.7260e-11, 2.9066e-10, 5.3338e-12,\n 2.5497e-10, 5.5984e-10, 1.6072e-10, 4.9344e-11, 1.5111e-11, 1.3347e-10,\n 3.9965e-11, 1.7738e-10, 4.9419e-11, 8.0643e-12, 1.3868e-09, 5.8194e-12,\n 9.5523e-11, 1.6801e-13, 1.1475e-10, 3.4822e-10, 4.1171e-12, 2.9632e-11,\n 4.3216e-10, 2.2192e-11, 2.0147e-10, 1.2427e-09, 1.1678e-09, 1.2960e-10,\n 3.0178e-12, 5.9456e-11, 3.4078e-10, 8.7455e-13, 7.3409e-11, 1.4530e-09,\n 4.8139e-11, 8.1621e-11, 2.8016e-10, 2.9731e-11, 1.6582e-11, 1.2199e-10,\n 7.3767e-11, 6.3476e-11, 3.9628e-11, 1.3693e-10, 5.3042e-11, 3.5920e-11,\n 1.5046e-10, 2.6564e-11, 4.2972e-11, 6.2663e-11, 1.2901e-10, 4.2601e-12,\n 6.7899e-10, 2.6397e-11, 1.7828e-11, 2.5023e-10, 8.1521e-11, 4.4157e-12,\n 1.3965e-10, 7.8277e-10, 6.6564e-10, 9.5235e-10, 3.4804e-11, 1.2134e-10,\n 8.5361e-11, 4.2782e-10, 5.2570e-12, 5.6871e-13], device='cuda:0')" }, "40": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.5302e-16, 6.2628e-15, 4.2924e-13, ..., 1.7690e-13, 0.0000e+00,\n 1.7843e-12],\n [4.0761e-13, 4.7288e-14, 4.2443e-12, ..., 2.6116e-12, 0.0000e+00,\n 1.1343e-12],\n [1.3045e-10, 2.0125e-12, 1.7471e-09, ..., 2.7354e-10, 0.0000e+00,\n 1.2553e-09],\n ...,\n [3.0265e-11, 8.4253e-12, 5.3383e-10, ..., 3.0336e-10, 0.0000e+00,\n 2.7306e-10],\n [1.7447e-13, 3.8504e-13, 6.4586e-12, ..., 5.6001e-13, 0.0000e+00,\n 3.0927e-12],\n [3.2159e-13, 4.2120e-12, 2.8661e-11, ..., 3.3525e-12, 0.0000e+00,\n 2.8187e-11]], device='cuda:0')" + "exp_avg_sq": "tensor([[4.3727e-17, 1.7896e-15, 1.2266e-13, ..., 5.0551e-14, 0.0000e+00,\n 5.0988e-13],\n [1.1648e-13, 1.3513e-14, 1.2128e-12, ..., 7.4628e-13, 0.0000e+00,\n 3.2413e-13],\n [3.7276e-11, 5.7508e-13, 4.9925e-10, ..., 7.8167e-11, 0.0000e+00,\n 3.5871e-10],\n ...,\n [8.6486e-12, 2.4076e-12, 1.5255e-10, ..., 8.6689e-11, 0.0000e+00,\n 7.8028e-11],\n [4.9856e-14, 1.1003e-13, 1.8456e-12, ..., 1.6003e-13, 0.0000e+00,\n 8.8376e-13],\n [9.1895e-14, 1.2036e-12, 8.1900e-12, ..., 9.5800e-13, 0.0000e+00,\n 8.0546e-12]], device='cuda:0')" }, "41": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.4500e-11, 3.0960e-10, 5.8807e-07, 3.4283e-10, 1.2918e-07, 1.8692e-07,\n 9.7052e-08, 1.2508e-08, 1.9835e-06, 5.5876e-08, 4.5920e-07, 4.1178e-07,\n 1.2332e-09, 3.8563e-08, 8.0065e-10, 1.6007e-09, 1.8632e-07, 1.0440e-07,\n 2.2309e-08, 5.6634e-09, 3.2874e-08, 3.2752e-09, 2.0936e-07, 9.9332e-08,\n 2.6486e-07, 3.5583e-07, 1.1404e-07, 1.6544e-07, 6.4761e-08, 9.2388e-07,\n 9.7580e-07, 3.3532e-08, 3.9054e-06, 3.4369e-08, 4.1347e-08, 3.7331e-09,\n 1.4272e-07, 3.0301e-09, 9.3137e-08, 3.2977e-08, 3.1842e-07, 3.0186e-09,\n 7.6151e-10, 1.9364e-06, 4.8367e-10, 8.7125e-08, 2.0556e-07, 1.6527e-07,\n 1.8701e-09, 1.0829e-07, 1.6241e-07, 2.1101e-08, 6.8796e-07, 4.5137e-08,\n 2.2346e-08, 4.6204e-07, 1.4795e-08, 4.2186e-08, 8.5427e-07, 8.4059e-07,\n 1.7995e-07, 2.3648e-07, 5.6839e-09, 1.1424e-07, 1.4030e-07, 7.8387e-07,\n 4.7427e-09, 2.0615e-07, 4.3437e-08, 9.9338e-08, 3.6234e-08, 3.8659e-08,\n 1.0360e-07, 1.1112e-06, 3.3588e-07, 9.8226e-07, 2.0825e-07, 2.9627e-08,\n 3.1239e-08, 1.4278e-09, 9.6419e-08, 1.3819e-08, 8.8938e-09, 2.5157e-07,\n 7.3054e-08, 7.4006e-09, 1.4662e-06, 1.4752e-07, 4.2348e-08, 3.5063e-10,\n 2.6564e-09, 9.3940e-08, 1.6522e-07, 4.5484e-08, 1.5795e-06, 6.0359e-09,\n 1.4745e-06, 1.0199e-06, 1.7728e-07, 1.1725e-08, 3.9841e-08, 2.3938e-08,\n 2.6475e-09, 7.4751e-07, 8.7026e-09, 1.2386e-07, 9.8182e-08, 1.6555e-08,\n 1.0797e-07, 4.8907e-07, 1.2602e-07, 8.4716e-09, 5.2544e-07, 5.3712e-08,\n 4.1240e-08, 1.5781e-07, 1.4271e-08, 1.2202e-07, 1.5161e-08, 4.1434e-07,\n 1.1392e-06, 1.4457e-07, 1.2933e-07, 1.6854e-09, 2.9352e-08, 1.6609e-06,\n 1.2190e-08, 9.1147e-08, 3.4512e-08, 1.0923e-07, 1.2111e-09, 3.4709e-08,\n 1.6283e-06, 7.3864e-08, 3.2583e-09, 1.3790e-07, 8.7629e-08, 1.3515e-08,\n 4.5121e-07, 8.0147e-07, 1.4454e-08, 4.0128e-08, 5.8417e-07, 9.7694e-10,\n 2.1115e-08, 1.2766e-07, 4.0851e-08, 4.8071e-07, 8.7073e-09, 2.8140e-07,\n 2.6125e-08, 1.5874e-09, 4.2443e-07, 1.1387e-07, 2.9232e-08, 4.1343e-07,\n 1.4944e-07, 5.6797e-07, 8.2764e-10, 7.3083e-08, 3.2165e-09, 3.9211e-07,\n 3.5880e-10, 6.2788e-07, 8.3299e-08, 4.1736e-07, 4.2689e-08, 1.4791e-07,\n 3.0218e-09, 1.0683e-10, 3.5737e-08, 1.3549e-07, 1.2271e-07, 1.5255e-07,\n 5.6751e-08, 4.7979e-08, 6.7625e-07, 1.5238e-07, 1.2629e-09, 2.2444e-10,\n 1.4564e-08, 1.5476e-07, 1.8288e-09, 1.6791e-08, 8.6563e-09, 1.9039e-06,\n 1.8063e-07, 2.5896e-09, 3.5770e-08, 4.2600e-08, 5.9752e-07, 1.7094e-09,\n 2.5354e-07, 6.4618e-07, 4.2274e-07, 5.0062e-09, 5.9634e-09, 1.8653e-08,\n 1.8532e-07, 5.2283e-07, 2.6762e-07, 3.1563e-09, 1.1658e-06, 1.4536e-10,\n 7.7433e-08, 5.2290e-10, 4.4760e-08, 3.8129e-07, 1.0689e-08, 1.2044e-08,\n 7.1760e-07, 2.2552e-08, 2.7894e-07, 1.5366e-06, 5.1693e-06, 3.8285e-08,\n 6.3293e-09, 2.3702e-07, 1.1737e-07, 1.5678e-10, 9.2035e-08, 2.1233e-06,\n 2.9787e-08, 1.9468e-07, 1.7569e-07, 2.1984e-08, 1.2298e-08, 2.9168e-07,\n 3.6018e-07, 1.8169e-08, 3.5005e-07, 5.2962e-08, 1.6852e-07, 2.2558e-07,\n 2.0079e-07, 1.0210e-07, 5.8506e-08, 1.1377e-07, 3.4350e-08, 6.6163e-09,\n 5.9188e-07, 1.2391e-07, 3.8333e-09, 4.9244e-07, 5.0556e-08, 1.0141e-09,\n 4.6247e-07, 5.6265e-07, 1.4163e-06, 4.3693e-07, 1.3582e-08, 3.3684e-07,\n 3.0680e-08, 4.4998e-07, 5.3412e-09, 3.8550e-08], device='cuda:0')" + "exp_avg_sq": "tensor([4.1435e-12, 8.8471e-11, 1.6805e-07, 9.7967e-11, 3.6913e-08, 5.3415e-08,\n 2.7733e-08, 3.5743e-09, 5.6681e-07, 1.5967e-08, 1.3122e-07, 1.1767e-07,\n 3.5239e-10, 1.1020e-08, 2.2879e-10, 4.5740e-10, 5.3242e-08, 2.9833e-08,\n 6.3751e-09, 1.6184e-09, 9.3940e-09, 9.3591e-10, 5.9826e-08, 2.8385e-08,\n 7.5685e-08, 1.0168e-07, 3.2587e-08, 4.7275e-08, 1.8506e-08, 2.6400e-07,\n 2.7884e-07, 9.5819e-09, 1.1160e-06, 9.8213e-09, 1.1815e-08, 1.0668e-09,\n 4.0782e-08, 8.6586e-10, 2.6615e-08, 9.4234e-09, 9.0991e-08, 8.6258e-10,\n 2.1761e-10, 5.5334e-07, 1.3821e-10, 2.4897e-08, 5.8740e-08, 4.7227e-08,\n 5.3439e-10, 3.0944e-08, 4.6410e-08, 6.0299e-09, 1.9659e-07, 1.2898e-08,\n 6.3854e-09, 1.3203e-07, 4.2277e-09, 1.2055e-08, 2.4411e-07, 2.4021e-07,\n 5.1423e-08, 6.7575e-08, 1.6242e-09, 3.2644e-08, 4.0092e-08, 2.2400e-07,\n 1.3553e-09, 5.8910e-08, 1.2413e-08, 2.8387e-08, 1.0354e-08, 1.1047e-08,\n 2.9606e-08, 3.1754e-07, 9.5979e-08, 2.8069e-07, 5.9508e-08, 8.4661e-09,\n 8.9267e-09, 4.0799e-10, 2.7552e-08, 3.9489e-09, 2.5415e-09, 7.1887e-08,\n 2.0876e-08, 2.1148e-09, 4.1897e-07, 4.2155e-08, 1.2101e-08, 1.0019e-10,\n 7.5908e-10, 2.6844e-08, 4.7212e-08, 1.2997e-08, 4.5136e-07, 1.7248e-09,\n 4.2134e-07, 2.9144e-07, 5.0660e-08, 3.3505e-09, 1.1385e-08, 6.8405e-09,\n 7.5654e-10, 2.1361e-07, 2.4868e-09, 3.5393e-08, 2.8056e-08, 4.7307e-09,\n 3.0854e-08, 1.3976e-07, 3.6011e-08, 2.4208e-09, 1.5015e-07, 1.5349e-08,\n 1.1785e-08, 4.5095e-08, 4.0780e-09, 3.4869e-08, 4.3325e-09, 1.1840e-07,\n 3.2553e-07, 4.1313e-08, 3.6957e-08, 4.8162e-10, 8.3875e-09, 4.7461e-07,\n 3.4835e-09, 2.6046e-08, 9.8621e-09, 3.1214e-08, 3.4608e-10, 9.9184e-09,\n 4.6529e-07, 2.1107e-08, 9.3108e-10, 3.9406e-08, 2.5041e-08, 3.8622e-09,\n 1.2894e-07, 2.2903e-07, 4.1305e-09, 1.1467e-08, 1.6693e-07, 2.7917e-10,\n 6.0337e-09, 3.6480e-08, 1.1674e-08, 1.3737e-07, 2.4882e-09, 8.0412e-08,\n 7.4655e-09, 4.5361e-10, 1.2128e-07, 3.2538e-08, 8.3534e-09, 1.1814e-07,\n 4.2702e-08, 1.6230e-07, 2.3650e-10, 2.0884e-08, 9.1913e-10, 1.1205e-07,\n 1.0253e-10, 1.7942e-07, 2.3803e-08, 1.1926e-07, 1.2199e-08, 4.2265e-08,\n 8.6351e-10, 3.0527e-11, 1.0212e-08, 3.8718e-08, 3.5067e-08, 4.3592e-08,\n 1.6217e-08, 1.3710e-08, 1.9324e-07, 4.3543e-08, 3.6089e-10, 6.4134e-11,\n 4.1617e-09, 4.4225e-08, 5.2260e-10, 4.7983e-09, 2.4736e-09, 5.4404e-07,\n 5.1616e-08, 7.4001e-10, 1.0222e-08, 1.2173e-08, 1.7075e-07, 4.8847e-10,\n 7.2451e-08, 1.8465e-07, 1.2080e-07, 1.4306e-09, 1.7041e-09, 5.3302e-09,\n 5.2956e-08, 1.4940e-07, 7.6474e-08, 9.0193e-10, 3.3315e-07, 4.1539e-11,\n 2.2127e-08, 1.4942e-10, 1.2791e-08, 1.0896e-07, 3.0543e-09, 3.4418e-09,\n 2.0506e-07, 6.4443e-09, 7.9709e-08, 4.3909e-07, 1.4772e-06, 1.0940e-08,\n 1.8086e-09, 6.7729e-08, 3.3539e-08, 4.4802e-11, 2.6300e-08, 6.0676e-07,\n 8.5119e-09, 5.5632e-08, 5.0205e-08, 6.2822e-09, 3.5143e-09, 8.3351e-08,\n 1.0292e-07, 5.1919e-09, 1.0003e-07, 1.5134e-08, 4.8156e-08, 6.4461e-08,\n 5.7378e-08, 2.9177e-08, 1.6719e-08, 3.2510e-08, 9.8158e-09, 1.8907e-09,\n 1.6913e-07, 3.5408e-08, 1.0954e-09, 1.4072e-07, 1.4447e-08, 2.8978e-10,\n 1.3215e-07, 1.6078e-07, 4.0471e-07, 1.2486e-07, 3.8812e-09, 9.6254e-08,\n 8.7670e-09, 1.2858e-07, 1.5263e-09, 1.1016e-08], device='cuda:0')" }, "42": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([4.7758e-13, 6.7627e-13, 1.0244e-09, 2.8893e-12, 2.2874e-10, 3.1590e-10,\n 1.6345e-10, 2.0454e-11, 3.5764e-09, 1.2008e-10, 7.3341e-10, 6.6727e-10,\n 3.6766e-12, 5.4483e-11, 2.4884e-12, 4.2326e-12, 3.5146e-10, 4.3603e-10,\n 5.8293e-11, 1.8885e-11, 7.7874e-11, 8.6643e-12, 5.4478e-10, 2.0369e-10,\n 9.4863e-10, 5.5546e-10, 2.3038e-10, 4.9937e-10, 2.5076e-10, 1.5294e-09,\n 1.8032e-09, 8.2423e-11, 7.8735e-09, 3.8330e-11, 6.8699e-11, 5.5716e-12,\n 2.2577e-10, 1.6134e-11, 1.2749e-10, 7.1089e-11, 4.1874e-10, 3.8218e-11,\n 1.0113e-12, 6.4663e-09, 1.9817e-12, 1.5347e-10, 5.6523e-10, 6.5425e-10,\n 5.6975e-12, 1.7205e-10, 3.1560e-10, 2.6428e-11, 1.2018e-09, 7.2751e-11,\n 4.7804e-11, 7.1777e-10, 5.3112e-11, 5.6978e-11, 2.0242e-09, 1.1161e-09,\n 2.6445e-10, 6.4692e-10, 1.5260e-11, 1.8200e-10, 3.5962e-10, 1.7148e-09,\n 1.3985e-11, 3.0361e-10, 8.0425e-11, 1.1110e-10, 5.9136e-11, 8.5164e-11,\n 1.6879e-10, 2.6885e-09, 4.6515e-10, 2.1264e-09, 4.1409e-10, 5.2063e-11,\n 6.1575e-11, 1.5220e-12, 1.1806e-10, 2.0703e-11, 2.7195e-11, 9.8999e-10,\n 9.4948e-11, 4.0241e-11, 3.9001e-09, 5.1563e-10, 1.0666e-10, 2.1048e-12,\n 1.2865e-11, 1.5340e-10, 2.1503e-10, 7.2810e-11, 4.2096e-09, 1.4299e-11,\n 3.7640e-09, 2.6111e-09, 3.3116e-10, 4.6603e-11, 7.7067e-11, 4.9500e-11,\n 8.1435e-12, 1.3909e-09, 3.1118e-11, 1.7159e-10, 1.1510e-10, 4.2622e-11,\n 1.0853e-10, 1.1302e-09, 2.8188e-10, 1.1287e-11, 8.0459e-10, 6.7604e-11,\n 1.0363e-10, 2.6460e-10, 5.2444e-11, 1.3670e-10, 5.6605e-11, 5.8429e-10,\n 2.1661e-09, 4.2879e-10, 1.4291e-10, 3.8703e-12, 8.9625e-11, 3.3754e-09,\n 2.3777e-11, 2.5283e-10, 7.9176e-11, 1.9232e-10, 6.9353e-12, 4.6387e-11,\n 3.0844e-09, 1.1973e-10, 8.3659e-12, 2.7960e-10, 2.3634e-10, 2.9958e-11,\n 7.2383e-10, 1.2712e-09, 2.9041e-11, 6.0597e-11, 1.1167e-09, 6.5230e-12,\n 5.4907e-11, 2.4928e-10, 5.7217e-11, 1.5603e-09, 3.6638e-11, 9.2969e-10,\n 5.1252e-11, 9.8327e-13, 8.7172e-10, 2.1687e-10, 5.4633e-11, 4.2906e-10,\n 2.3614e-10, 7.2816e-10, 1.2193e-12, 1.5464e-10, 3.7974e-11, 8.5350e-10,\n 2.0100e-12, 2.0278e-09, 1.4532e-10, 7.1479e-10, 1.0346e-10, 3.2136e-10,\n 6.9291e-12, 3.1105e-13, 4.9956e-11, 3.3686e-10, 1.5449e-10, 2.1240e-10,\n 1.2696e-10, 1.0608e-10, 1.4905e-09, 2.8398e-10, 4.4651e-12, 5.3673e-13,\n 2.3865e-11, 4.1056e-10, 1.7048e-12, 3.2531e-11, 2.1767e-11, 3.2969e-09,\n 2.2932e-10, 1.4121e-11, 1.5964e-10, 1.1336e-10, 1.8350e-09, 1.2668e-11,\n 3.7981e-10, 1.3961e-09, 7.4075e-10, 1.5013e-11, 5.7722e-12, 3.5599e-11,\n 2.9092e-10, 1.2402e-09, 6.2753e-10, 1.7736e-11, 1.9900e-09, 3.3760e-12,\n 8.5459e-11, 9.8909e-13, 5.7070e-11, 8.0964e-10, 4.3801e-11, 1.8781e-11,\n 2.1782e-09, 2.3474e-11, 3.7081e-10, 3.1376e-09, 1.4944e-08, 5.6278e-11,\n 1.1124e-11, 4.2123e-10, 1.7074e-10, 1.4187e-13, 9.1275e-11, 3.8016e-09,\n 7.8244e-11, 6.0852e-10, 3.1766e-10, 3.5844e-11, 3.2091e-11, 6.0993e-10,\n 6.2294e-10, 2.8173e-11, 4.4473e-10, 1.0925e-10, 3.5011e-10, 4.6640e-10,\n 3.2127e-10, 1.8803e-10, 9.5348e-11, 2.2374e-10, 5.2797e-11, 1.3258e-11,\n 1.2191e-09, 3.1201e-10, 3.7378e-11, 6.8411e-10, 5.2262e-11, 6.7156e-12,\n 1.2593e-09, 1.3536e-09, 3.3407e-09, 7.1341e-10, 2.1970e-11, 7.2621e-10,\n 4.6304e-11, 1.0235e-09, 1.3830e-11, 1.6391e-10], device='cuda:0')" + "exp_avg_sq": "tensor([1.3647e-13, 1.9325e-13, 2.9273e-10, 8.2563e-13, 6.5363e-11, 9.0272e-11,\n 4.6708e-11, 5.8450e-12, 1.0220e-09, 3.4314e-11, 2.0958e-10, 1.9068e-10,\n 1.0506e-12, 1.5569e-11, 7.1107e-13, 1.2095e-12, 1.0043e-10, 1.2460e-10,\n 1.6658e-11, 5.3964e-12, 2.2253e-11, 2.4759e-12, 1.5567e-10, 5.8207e-11,\n 2.7108e-10, 1.5873e-10, 6.5832e-11, 1.4270e-10, 7.1656e-11, 4.3703e-10,\n 5.1528e-10, 2.3553e-11, 2.2499e-09, 1.0953e-11, 1.9631e-11, 1.5921e-12,\n 6.4515e-11, 4.6103e-12, 3.6432e-11, 2.0314e-11, 1.1966e-10, 1.0921e-11,\n 2.8899e-13, 1.8478e-09, 5.6628e-13, 4.3854e-11, 1.6152e-10, 1.8696e-10,\n 1.6281e-12, 4.9165e-11, 9.0185e-11, 7.5521e-12, 3.4342e-10, 2.0789e-11,\n 1.3660e-11, 2.0511e-10, 1.5177e-11, 1.6282e-11, 5.7843e-10, 3.1893e-10,\n 7.5569e-11, 1.8486e-10, 4.3607e-12, 5.2007e-11, 1.0276e-10, 4.9002e-10,\n 3.9964e-12, 8.6758e-11, 2.2982e-11, 3.1748e-11, 1.6898e-11, 2.4336e-11,\n 4.8232e-11, 7.6825e-10, 1.3292e-10, 6.0765e-10, 1.1833e-10, 1.4877e-11,\n 1.7596e-11, 4.3494e-13, 3.3737e-11, 5.9161e-12, 7.7713e-12, 2.8290e-10,\n 2.7132e-11, 1.1499e-11, 1.1145e-09, 1.4734e-10, 3.0478e-11, 6.0147e-13,\n 3.6764e-12, 4.3836e-11, 6.1445e-11, 2.0806e-11, 1.2029e-09, 4.0861e-12,\n 1.0756e-09, 7.4613e-10, 9.4631e-11, 1.3317e-11, 2.2022e-11, 1.4145e-11,\n 2.3271e-12, 3.9747e-10, 8.8923e-12, 4.9034e-11, 3.2892e-11, 1.2180e-11,\n 3.1013e-11, 3.2295e-10, 8.0548e-11, 3.2254e-12, 2.2992e-10, 1.9318e-11,\n 2.9614e-11, 7.5611e-11, 1.4986e-11, 3.9062e-11, 1.6175e-11, 1.6697e-10,\n 6.1897e-10, 1.2253e-10, 4.0839e-11, 1.1060e-12, 2.5611e-11, 9.6454e-10,\n 6.7945e-12, 7.2247e-11, 2.2625e-11, 5.4957e-11, 1.9818e-12, 1.3255e-11,\n 8.8138e-10, 3.4214e-11, 2.3906e-12, 7.9897e-11, 6.7536e-11, 8.5608e-12,\n 2.0684e-10, 3.6324e-10, 8.2986e-12, 1.7316e-11, 3.1910e-10, 1.8640e-12,\n 1.5690e-11, 7.1234e-11, 1.6350e-11, 4.4587e-10, 1.0470e-11, 2.6567e-10,\n 1.4646e-11, 2.8098e-13, 2.4910e-10, 6.1972e-11, 1.5612e-11, 1.2261e-10,\n 6.7480e-11, 2.0808e-10, 3.4842e-13, 4.4190e-11, 1.0851e-11, 2.4389e-10,\n 5.7437e-13, 5.7945e-10, 4.1526e-11, 2.0426e-10, 2.9564e-11, 9.1831e-11,\n 1.9800e-12, 8.8884e-14, 1.4275e-11, 9.6261e-11, 4.4146e-11, 6.0694e-11,\n 3.6279e-11, 3.0314e-11, 4.2592e-10, 8.1149e-11, 1.2759e-12, 1.5337e-13,\n 6.8195e-12, 1.1732e-10, 4.8715e-13, 9.2960e-12, 6.2201e-12, 9.4211e-10,\n 6.5529e-11, 4.0351e-12, 4.5618e-11, 3.2395e-11, 5.2436e-10, 3.6201e-12,\n 1.0853e-10, 3.9896e-10, 2.1167e-10, 4.2900e-12, 1.6494e-12, 1.0173e-11,\n 8.3132e-11, 3.5440e-10, 1.7932e-10, 5.0682e-12, 5.6866e-10, 9.6471e-13,\n 2.4421e-11, 2.8264e-13, 1.6308e-11, 2.3136e-10, 1.2517e-11, 5.3668e-12,\n 6.2242e-10, 6.7078e-12, 1.0596e-10, 8.9660e-10, 4.2702e-09, 1.6082e-11,\n 3.1789e-12, 1.2037e-10, 4.8791e-11, 4.0539e-14, 2.6083e-11, 1.0863e-09,\n 2.2359e-11, 1.7389e-10, 9.0773e-11, 1.0243e-11, 9.1704e-12, 1.7429e-10,\n 1.7801e-10, 8.0506e-12, 1.2709e-10, 3.1220e-11, 1.0005e-10, 1.3328e-10,\n 9.1804e-11, 5.3731e-11, 2.7246e-11, 6.3937e-11, 1.5087e-11, 3.7885e-12,\n 3.4838e-10, 8.9159e-11, 1.0681e-11, 1.9549e-10, 1.4934e-11, 1.9190e-12,\n 3.5986e-10, 3.8681e-10, 9.5462e-10, 2.0386e-10, 6.2782e-12, 2.0752e-10,\n 1.3232e-11, 2.9248e-10, 3.9519e-12, 4.6839e-11], device='cuda:0')" }, "43": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.6445e-12, 1.2249e-12, 1.6986e-09, 4.7860e-12, 3.5241e-10, 4.6337e-10,\n 2.7442e-10, 4.4795e-11, 5.4882e-09, 1.6236e-10, 1.3027e-09, 1.0037e-09,\n 3.6731e-12, 1.2044e-10, 3.6955e-12, 1.2153e-11, 5.5990e-10, 3.3967e-10,\n 5.3713e-11, 2.5794e-11, 1.1227e-10, 1.5596e-11, 5.4659e-10, 2.9518e-10,\n 7.8614e-10, 9.1692e-10, 3.3578e-10, 4.2629e-10, 2.4423e-10, 2.5627e-09,\n 2.4175e-09, 1.1207e-10, 1.0882e-08, 8.3509e-11, 1.4087e-10, 1.8742e-11,\n 3.6708e-10, 2.5692e-11, 2.8237e-10, 9.8792e-11, 7.6604e-10, 2.3932e-11,\n 4.5759e-12, 5.4281e-09, 2.1528e-12, 2.0570e-10, 5.1990e-10, 5.0765e-10,\n 1.0298e-11, 3.1958e-10, 5.0515e-10, 5.3534e-11, 1.7806e-09, 1.1701e-10,\n 7.6047e-11, 1.2309e-09, 5.8852e-11, 1.0750e-10, 2.2041e-09, 2.4190e-09,\n 4.3760e-10, 5.5049e-10, 2.6427e-11, 3.3089e-10, 4.0586e-10, 1.9473e-09,\n 1.7345e-11, 5.5045e-10, 1.3033e-10, 2.9230e-10, 1.0948e-10, 1.1423e-10,\n 2.1780e-10, 3.0580e-09, 9.6768e-10, 2.4995e-09, 6.0191e-10, 9.4243e-11,\n 1.0678e-10, 3.2738e-12, 2.6670e-10, 4.2775e-11, 3.8174e-11, 7.2452e-10,\n 1.7887e-10, 3.6003e-11, 4.0476e-09, 4.6466e-10, 1.4902e-10, 2.9878e-12,\n 1.3651e-11, 2.4228e-10, 4.3454e-10, 1.3684e-10, 3.9621e-09, 1.5589e-11,\n 3.7399e-09, 2.5888e-09, 4.0079e-10, 7.4724e-11, 1.2032e-10, 8.6943e-11,\n 7.7332e-12, 1.9371e-09, 2.5914e-11, 3.2453e-10, 2.7778e-10, 6.3679e-11,\n 2.7851e-10, 1.1998e-09, 3.6876e-10, 2.8963e-11, 1.4010e-09, 1.2074e-10,\n 1.3386e-10, 4.5977e-10, 5.5935e-11, 3.0822e-10, 7.5750e-11, 1.1885e-09,\n 3.1113e-09, 4.5325e-10, 3.7331e-10, 2.7075e-11, 1.0793e-10, 4.2376e-09,\n 3.3019e-11, 2.8366e-10, 1.0479e-10, 3.1525e-10, 1.0598e-11, 1.1341e-10,\n 4.1847e-09, 1.9941e-10, 1.4394e-11, 3.6188e-10, 2.8055e-10, 3.3389e-11,\n 1.1318e-09, 2.0211e-09, 5.4310e-11, 1.2792e-10, 1.4403e-09, 1.1939e-11,\n 7.3864e-11, 3.7376e-10, 9.5369e-11, 1.2492e-09, 3.3421e-11, 8.1694e-10,\n 8.6153e-11, 1.9288e-12, 1.0883e-09, 2.6979e-10, 1.0039e-10, 1.0742e-09,\n 3.6303e-10, 1.3748e-09, 4.5390e-12, 2.1547e-10, 5.1819e-11, 1.1462e-09,\n 3.9891e-12, 1.7652e-09, 2.3859e-10, 1.0644e-09, 1.0574e-10, 3.7797e-10,\n 1.1437e-11, 8.5478e-13, 9.3218e-11, 4.1405e-10, 3.4413e-10, 4.5205e-10,\n 1.6082e-10, 1.4070e-10, 1.9017e-09, 4.2417e-10, 1.7604e-11, 1.1617e-12,\n 5.4046e-11, 4.2079e-10, 7.1078e-12, 5.8924e-11, 3.5249e-11, 5.2411e-09,\n 4.3736e-10, 1.3052e-11, 1.3156e-10, 1.4843e-10, 1.4738e-09, 2.2839e-11,\n 6.3863e-10, 1.7215e-09, 1.0293e-09, 1.4004e-11, 2.2158e-11, 6.4037e-11,\n 4.9465e-10, 1.4978e-09, 6.7643e-10, 2.8910e-11, 3.0409e-09, 7.2722e-12,\n 2.0067e-10, 2.1542e-12, 1.4275e-10, 9.4018e-10, 5.3982e-11, 3.8925e-11,\n 2.0367e-09, 5.1902e-11, 8.0363e-10, 4.1232e-09, 1.4020e-08, 1.0292e-10,\n 2.0971e-11, 5.5926e-10, 3.2990e-10, 7.5276e-13, 2.1162e-10, 5.8156e-09,\n 1.0059e-10, 5.7838e-10, 5.3634e-10, 4.9068e-11, 4.3483e-11, 8.1174e-10,\n 1.0043e-09, 5.8581e-11, 9.5466e-10, 1.6777e-10, 3.9938e-10, 6.6900e-10,\n 5.7567e-10, 3.2335e-10, 1.5514e-10, 3.5882e-10, 8.9085e-11, 2.0760e-11,\n 1.5311e-09, 3.7412e-10, 4.9570e-11, 1.3592e-09, 1.3662e-10, 1.6030e-11,\n 1.2789e-09, 1.4291e-09, 3.8967e-09, 1.2846e-09, 4.0862e-11, 8.1536e-10,\n 7.2967e-11, 1.3209e-09, 2.6654e-11, 1.3283e-10], device='cuda:0')" + "exp_avg_sq": "tensor([4.6992e-13, 3.5003e-13, 4.8538e-10, 1.3676e-12, 1.0071e-10, 1.3241e-10,\n 7.8417e-11, 1.2800e-11, 1.5683e-09, 4.6396e-11, 3.7225e-10, 2.8681e-10,\n 1.0496e-12, 3.4415e-11, 1.0560e-12, 3.4727e-12, 1.6000e-10, 9.7064e-11,\n 1.5349e-11, 7.3708e-12, 3.2081e-11, 4.4567e-12, 1.5619e-10, 8.4351e-11,\n 2.2465e-10, 2.6202e-10, 9.5953e-11, 1.2181e-10, 6.9791e-11, 7.3231e-10,\n 6.9082e-10, 3.2024e-11, 3.1097e-09, 2.3863e-11, 4.0254e-11, 5.3557e-12,\n 1.0489e-10, 7.3418e-12, 8.0690e-11, 2.8230e-11, 2.1890e-10, 6.8388e-12,\n 1.3076e-12, 1.5511e-09, 6.1519e-13, 5.8779e-11, 1.4857e-10, 1.4506e-10,\n 2.9426e-12, 9.1324e-11, 1.4435e-10, 1.5298e-11, 5.0882e-10, 3.3437e-11,\n 2.1731e-11, 3.5174e-10, 1.6818e-11, 3.0719e-11, 6.2984e-10, 6.9125e-10,\n 1.2505e-10, 1.5731e-10, 7.5518e-12, 9.4554e-11, 1.1598e-10, 5.5647e-10,\n 4.9565e-12, 1.5730e-10, 3.7242e-11, 8.3527e-11, 3.1284e-11, 3.2642e-11,\n 6.2239e-11, 8.7385e-10, 2.7652e-10, 7.1426e-10, 1.7200e-10, 2.6931e-11,\n 3.0514e-11, 9.3550e-13, 7.6212e-11, 1.2223e-11, 1.0908e-11, 2.0704e-10,\n 5.1114e-11, 1.0288e-11, 1.1566e-09, 1.3278e-10, 4.2583e-11, 8.5378e-13,\n 3.9008e-12, 6.9233e-11, 1.2417e-10, 3.9102e-11, 1.1322e-09, 4.4546e-12,\n 1.0687e-09, 7.3978e-10, 1.1453e-10, 2.1353e-11, 3.4384e-11, 2.4845e-11,\n 2.2098e-12, 5.5353e-10, 7.4050e-12, 9.2737e-11, 7.9379e-11, 1.8197e-11,\n 7.9588e-11, 3.4285e-10, 1.0538e-10, 8.2763e-12, 4.0034e-10, 3.4503e-11,\n 3.8251e-11, 1.3138e-10, 1.5984e-11, 8.8077e-11, 2.1646e-11, 3.3964e-10,\n 8.8907e-10, 1.2952e-10, 1.0668e-10, 7.7369e-12, 3.0841e-11, 1.2109e-09,\n 9.4353e-12, 8.1058e-11, 2.9945e-11, 9.0084e-11, 3.0283e-12, 3.2408e-11,\n 1.1958e-09, 5.6983e-11, 4.1133e-12, 1.0341e-10, 8.0168e-11, 9.5412e-12,\n 3.2342e-10, 5.7753e-10, 1.5520e-11, 3.6555e-11, 4.1156e-10, 3.4117e-12,\n 2.1107e-11, 1.0680e-10, 2.7252e-11, 3.5697e-10, 9.5505e-12, 2.3345e-10,\n 2.4619e-11, 5.5116e-13, 3.1098e-10, 7.7093e-11, 2.8687e-11, 3.0695e-10,\n 1.0374e-10, 3.9287e-10, 1.2971e-12, 6.1574e-11, 1.4808e-11, 3.2753e-10,\n 1.1399e-12, 5.0441e-10, 6.8178e-11, 3.0417e-10, 3.0216e-11, 1.0801e-10,\n 3.2683e-12, 2.4426e-13, 2.6638e-11, 1.1832e-10, 9.8339e-11, 1.2918e-10,\n 4.5957e-11, 4.0205e-11, 5.4341e-10, 1.2121e-10, 5.0304e-12, 3.3196e-13,\n 1.5444e-11, 1.2025e-10, 2.0311e-12, 1.6838e-11, 1.0073e-11, 1.4977e-09,\n 1.2498e-10, 3.7298e-12, 3.7595e-11, 4.2414e-11, 4.2114e-10, 6.5263e-12,\n 1.8249e-10, 4.9194e-10, 2.9412e-10, 4.0016e-12, 6.3318e-12, 1.8299e-11,\n 1.4135e-10, 4.2799e-10, 1.9329e-10, 8.2611e-12, 8.6897e-10, 2.0781e-12,\n 5.7344e-11, 6.1559e-13, 4.0791e-11, 2.6866e-10, 1.5426e-11, 1.1123e-11,\n 5.8200e-10, 1.4832e-11, 2.2964e-10, 1.1782e-09, 4.0065e-09, 2.9411e-11,\n 5.9926e-12, 1.5981e-10, 9.4270e-11, 2.1511e-13, 6.0472e-11, 1.6619e-09,\n 2.8745e-11, 1.6528e-10, 1.5326e-10, 1.4022e-11, 1.2426e-11, 2.3196e-10,\n 2.8698e-10, 1.6740e-11, 2.7280e-10, 4.7941e-11, 1.1412e-10, 1.9117e-10,\n 1.6450e-10, 9.2400e-11, 4.4333e-11, 1.0254e-10, 2.5457e-11, 5.9324e-12,\n 4.3751e-10, 1.0691e-10, 1.4165e-11, 3.8841e-10, 3.9041e-11, 4.5808e-12,\n 3.6545e-10, 4.0837e-10, 1.1135e-09, 3.6709e-10, 1.1677e-11, 2.3300e-10,\n 2.0851e-11, 3.7745e-10, 7.6166e-12, 3.7956e-11], device='cuda:0')" }, "44": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.2366e-14, 7.1849e-14, 2.8644e-13, ..., 5.1413e-14, 0.0000e+00,\n 1.1358e-12],\n [1.8429e-13, 2.2770e-14, 2.0824e-12, ..., 1.3564e-12, 0.0000e+00,\n 6.2287e-12],\n [3.5905e-10, 4.2827e-11, 2.7295e-09, ..., 1.7021e-09, 0.0000e+00,\n 4.4988e-09],\n ...,\n [9.5712e-11, 1.1608e-11, 6.5441e-10, ..., 3.0899e-10, 0.0000e+00,\n 3.3665e-10],\n [4.1620e-13, 2.5252e-13, 9.2285e-13, ..., 6.9060e-12, 0.0000e+00,\n 4.1653e-12],\n [9.1600e-14, 3.8876e-14, 3.4638e-12, ..., 2.6049e-12, 0.0000e+00,\n 1.7723e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.2106e-14, 2.0532e-14, 8.1853e-14, ..., 1.4692e-14, 0.0000e+00,\n 3.2458e-13],\n [5.2662e-14, 6.5066e-15, 5.9507e-13, ..., 3.8760e-13, 0.0000e+00,\n 1.7799e-12],\n [1.0260e-10, 1.2238e-11, 7.7998e-10, ..., 4.8639e-10, 0.0000e+00,\n 1.2856e-09],\n ...,\n [2.7350e-11, 3.3171e-12, 1.8700e-10, ..., 8.8297e-11, 0.0000e+00,\n 9.6199e-11],\n [1.1893e-13, 7.2161e-14, 2.6371e-13, ..., 1.9735e-12, 0.0000e+00,\n 1.1903e-12],\n [2.6176e-14, 1.1109e-14, 9.8980e-13, ..., 7.4437e-13, 0.0000e+00,\n 5.0645e-14]], device='cuda:0')" }, "45": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7822e-10, 4.3962e-10, 2.2380e-06, 7.2782e-11, 7.4231e-07, 4.6350e-07,\n 1.1025e-07, 6.4957e-08, 2.2610e-06, 4.6852e-08, 5.9819e-08, 2.1331e-07,\n 2.5848e-09, 4.8413e-08, 1.0354e-08, 1.8771e-08, 1.6925e-07, 4.1632e-08,\n 5.8250e-09, 1.8614e-09, 1.5942e-07, 7.7077e-09, 2.1464e-07, 2.2812e-08,\n 1.9567e-07, 6.9815e-07, 8.5342e-08, 8.7704e-08, 3.1293e-08, 7.0103e-07,\n 2.4387e-08, 9.4520e-08, 3.6804e-06, 4.4702e-08, 2.4053e-08, 1.3793e-09,\n 2.4134e-07, 1.3956e-09, 1.4870e-07, 4.7371e-08, 1.1263e-07, 4.1976e-09,\n 1.2556e-09, 3.4910e-07, 1.1355e-10, 1.2997e-07, 5.7475e-08, 7.7207e-08,\n 1.4688e-08, 5.5853e-08, 4.6973e-07, 2.9783e-08, 1.8344e-07, 3.4029e-08,\n 1.8918e-08, 9.2629e-07, 5.4673e-09, 7.8669e-08, 9.8041e-08, 1.6858e-06,\n 1.7401e-07, 1.6473e-07, 8.7526e-09, 2.6315e-07, 1.5336e-07, 5.8494e-07,\n 1.6944e-09, 3.1845e-07, 2.3134e-08, 7.7322e-08, 7.8907e-08, 2.3187e-07,\n 9.4872e-09, 6.2340e-07, 6.6821e-07, 5.4466e-07, 3.8194e-08, 4.0183e-08,\n 1.1585e-08, 3.6347e-09, 1.6055e-07, 1.1375e-08, 1.7652e-08, 1.2683e-08,\n 1.5983e-07, 4.0654e-10, 4.9459e-07, 6.9810e-08, 3.7828e-08, 1.4971e-10,\n 1.0133e-09, 8.6661e-08, 2.5717e-07, 1.7454e-07, 5.1027e-07, 2.9203e-08,\n 8.1160e-07, 5.3754e-07, 2.5459e-08, 2.7982e-08, 9.0308e-08, 3.1885e-08,\n 5.7641e-08, 8.3784e-07, 1.2258e-07, 1.1459e-07, 1.8015e-07, 1.2470e-08,\n 1.2531e-07, 1.8878e-08, 4.8252e-09, 3.0474e-08, 3.0502e-08, 7.6278e-08,\n 1.2492e-08, 1.6953e-07, 2.0031e-08, 9.1512e-07, 8.5962e-09, 5.3044e-07,\n 9.8175e-07, 7.7765e-08, 6.9584e-07, 4.0787e-09, 3.4640e-08, 1.2769e-06,\n 6.9507e-08, 3.6511e-08, 3.7161e-08, 1.1397e-07, 3.1569e-09, 5.4550e-08,\n 2.8864e-06, 2.5569e-07, 3.9701e-09, 1.7313e-07, 1.0175e-07, 1.0821e-07,\n 9.3230e-07, 3.9580e-07, 8.0902e-08, 1.7947e-08, 2.5266e-07, 1.4815e-09,\n 1.0242e-09, 1.8366e-07, 1.3172e-07, 1.5720e-07, 2.5250e-09, 4.0184e-08,\n 5.3119e-08, 2.7187e-11, 2.0524e-07, 4.9719e-08, 9.4860e-09, 1.0410e-06,\n 6.2306e-08, 2.5967e-07, 7.3887e-10, 3.0867e-08, 7.0863e-09, 6.3195e-07,\n 1.1806e-09, 3.1385e-07, 1.0960e-06, 1.9992e-07, 3.1737e-08, 1.6833e-07,\n 2.6665e-09, 5.8074e-11, 1.4581e-07, 3.3608e-07, 8.1434e-07, 9.3510e-08,\n 1.1281e-07, 2.1190e-09, 2.9283e-07, 7.0428e-08, 5.9039e-09, 7.1441e-09,\n 1.7536e-08, 1.7571e-07, 7.8326e-10, 5.4185e-09, 2.4292e-08, 3.2018e-06,\n 5.0425e-08, 6.9466e-10, 8.0443e-09, 2.5542e-08, 5.1511e-07, 2.1792e-09,\n 3.8034e-07, 1.2212e-07, 6.6683e-08, 7.8117e-08, 6.7330e-09, 1.9359e-07,\n 1.3612e-07, 2.2249e-07, 2.0226e-07, 1.0008e-09, 1.0191e-06, 3.2110e-10,\n 1.3164e-07, 5.9543e-10, 7.0565e-08, 3.0861e-07, 7.8355e-09, 6.1556e-08,\n 2.1222e-07, 7.4815e-08, 7.0431e-07, 1.0200e-06, 1.8333e-06, 2.6011e-07,\n 3.3532e-08, 4.3626e-08, 1.2882e-06, 6.9494e-09, 1.2412e-07, 1.7991e-06,\n 1.0051e-08, 1.3913e-07, 1.6559e-06, 2.5578e-08, 3.7195e-08, 1.7340e-07,\n 2.7597e-07, 9.4673e-08, 1.0661e-07, 7.9912e-08, 1.6176e-07, 5.9674e-08,\n 1.8085e-07, 2.3706e-07, 2.6956e-08, 2.0045e-08, 1.2359e-07, 3.7067e-09,\n 1.1294e-06, 7.9139e-08, 4.8813e-09, 5.9585e-07, 1.8351e-07, 8.5000e-10,\n 2.2074e-07, 8.9261e-08, 1.9929e-06, 1.4112e-06, 3.0480e-08, 8.6920e-08,\n 3.5152e-08, 5.5037e-07, 1.1214e-08, 3.0625e-09], device='cuda:0')" + "exp_avg_sq": "tensor([5.0929e-11, 1.2562e-10, 6.3953e-07, 2.0798e-11, 2.1212e-07, 1.3245e-07,\n 3.1504e-08, 1.8562e-08, 6.4610e-07, 1.3388e-08, 1.7094e-08, 6.0955e-08,\n 7.3864e-10, 1.3834e-08, 2.9586e-09, 5.3640e-09, 4.8364e-08, 1.1897e-08,\n 1.6646e-09, 5.3191e-10, 4.5554e-08, 2.2025e-09, 6.1336e-08, 6.5187e-09,\n 5.5913e-08, 1.9950e-07, 2.4387e-08, 2.5062e-08, 8.9422e-09, 2.0032e-07,\n 6.9687e-09, 2.7010e-08, 1.0517e-06, 1.2774e-08, 6.8732e-09, 3.9415e-10,\n 6.8966e-08, 3.9880e-10, 4.2493e-08, 1.3537e-08, 3.2184e-08, 1.1995e-09,\n 3.5878e-10, 9.9759e-08, 3.2447e-11, 3.7140e-08, 1.6424e-08, 2.2063e-08,\n 4.1971e-09, 1.5961e-08, 1.3423e-07, 8.5107e-09, 5.2419e-08, 9.7240e-09,\n 5.4060e-09, 2.6469e-07, 1.5623e-09, 2.2480e-08, 2.8016e-08, 4.8174e-07,\n 4.9724e-08, 4.7073e-08, 2.5011e-09, 7.5196e-08, 4.3823e-08, 1.6715e-07,\n 4.8419e-10, 9.1000e-08, 6.6108e-09, 2.2095e-08, 2.2548e-08, 6.6258e-08,\n 2.7110e-09, 1.7814e-07, 1.9095e-07, 1.5564e-07, 1.0914e-08, 1.1483e-08,\n 3.3106e-09, 1.0386e-09, 4.5879e-08, 3.2506e-09, 5.0442e-09, 3.6242e-09,\n 4.5672e-08, 1.1617e-10, 1.4133e-07, 1.9949e-08, 1.0810e-08, 4.2780e-11,\n 2.8955e-10, 2.4764e-08, 7.3487e-08, 4.9877e-08, 1.4581e-07, 8.3450e-09,\n 2.3192e-07, 1.5361e-07, 7.2750e-09, 7.9960e-09, 2.5806e-08, 9.1113e-09,\n 1.6471e-08, 2.3942e-07, 3.5028e-08, 3.2745e-08, 5.1480e-08, 3.5633e-09,\n 3.5809e-08, 5.3944e-09, 1.3788e-09, 8.7081e-09, 8.7161e-09, 2.1797e-08,\n 3.5697e-09, 4.8445e-08, 5.7240e-09, 2.6150e-07, 2.4564e-09, 1.5158e-07,\n 2.8054e-07, 2.2222e-08, 1.9884e-07, 1.1655e-09, 9.8988e-09, 3.6489e-07,\n 1.9862e-08, 1.0433e-08, 1.0619e-08, 3.2568e-08, 9.0212e-10, 1.5588e-08,\n 8.2482e-07, 7.3066e-08, 1.1345e-09, 4.9472e-08, 2.9077e-08, 3.0921e-08,\n 2.6641e-07, 1.1310e-07, 2.3118e-08, 5.1284e-09, 7.2200e-08, 4.2334e-10,\n 2.9268e-10, 5.2481e-08, 3.7639e-08, 4.4921e-08, 7.2153e-10, 1.1483e-08,\n 1.5179e-08, 7.7690e-12, 5.8648e-08, 1.4207e-08, 2.7107e-09, 2.9746e-07,\n 1.7804e-08, 7.4202e-08, 2.1114e-10, 8.8206e-09, 2.0250e-09, 1.8058e-07,\n 3.3736e-10, 8.9686e-08, 3.1320e-07, 5.7130e-08, 9.0692e-09, 4.8102e-08,\n 7.6198e-10, 1.6595e-11, 4.1666e-08, 9.6038e-08, 2.3271e-07, 2.6721e-08,\n 3.2236e-08, 6.0552e-10, 8.3679e-08, 2.0125e-08, 1.6871e-09, 2.0415e-09,\n 5.0111e-09, 5.0211e-08, 2.2382e-10, 1.5484e-09, 6.9417e-09, 9.1493e-07,\n 1.4409e-08, 1.9850e-10, 2.2987e-09, 7.2989e-09, 1.4720e-07, 6.2273e-10,\n 1.0868e-07, 3.4898e-08, 1.9055e-08, 2.2322e-08, 1.9240e-09, 5.5321e-08,\n 3.8897e-08, 6.3577e-08, 5.7797e-08, 2.8599e-10, 2.9121e-07, 9.1756e-11,\n 3.7616e-08, 1.7015e-10, 2.0164e-08, 8.8187e-08, 2.2390e-09, 1.7590e-08,\n 6.0643e-08, 2.1379e-08, 2.0126e-07, 2.9146e-07, 5.2387e-07, 7.4330e-08,\n 9.5821e-09, 1.2467e-08, 3.6812e-07, 1.9859e-09, 3.5469e-08, 5.1412e-07,\n 2.8722e-09, 3.9758e-08, 4.7319e-07, 7.3090e-09, 1.0629e-08, 4.9551e-08,\n 7.8860e-08, 2.7054e-08, 3.0466e-08, 2.2836e-08, 4.6225e-08, 1.7052e-08,\n 5.1679e-08, 6.7742e-08, 7.7028e-09, 5.7280e-09, 3.5317e-08, 1.0592e-09,\n 3.2274e-07, 2.2615e-08, 1.3949e-09, 1.7027e-07, 5.2440e-08, 2.4289e-10,\n 6.3077e-08, 2.5507e-08, 5.6949e-07, 4.0325e-07, 8.7100e-09, 2.4838e-08,\n 1.0045e-08, 1.5727e-07, 3.2044e-09, 8.7514e-10], device='cuda:0')" }, "46": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.2470e-13, 5.3485e-12, 4.9718e-09, 1.0009e-13, 1.2861e-09, 8.6632e-10,\n 1.9196e-10, 2.0632e-10, 4.9250e-09, 1.1343e-10, 1.1564e-10, 3.6552e-10,\n 4.9127e-12, 8.0061e-11, 2.3923e-11, 6.8734e-11, 3.3643e-10, 8.6356e-11,\n 1.4389e-11, 9.7967e-12, 2.4418e-10, 3.9699e-11, 6.1055e-10, 3.7170e-11,\n 6.6144e-10, 8.5142e-10, 1.7472e-10, 1.6607e-10, 1.6681e-10, 1.3529e-09,\n 7.7451e-11, 1.8343e-10, 7.5449e-09, 6.2109e-11, 3.8231e-11, 6.7371e-12,\n 3.9375e-10, 2.5595e-11, 2.2362e-10, 1.2142e-10, 1.6345e-10, 6.3411e-12,\n 2.9482e-12, 6.7447e-10, 2.1906e-13, 2.7818e-10, 7.6798e-11, 2.2797e-10,\n 4.8352e-11, 7.5894e-11, 8.3580e-10, 3.7373e-11, 2.1633e-10, 5.2001e-11,\n 9.7746e-11, 2.5812e-09, 1.7783e-11, 1.1992e-10, 1.5949e-10, 3.4143e-09,\n 2.4611e-10, 2.6931e-10, 2.8777e-11, 2.7663e-10, 3.0707e-10, 9.3445e-10,\n 3.7733e-12, 5.2034e-10, 3.4302e-11, 1.1316e-10, 1.4315e-10, 5.7199e-10,\n 1.1031e-11, 1.0752e-09, 1.6882e-09, 8.1056e-10, 5.2305e-11, 5.3914e-11,\n 1.5247e-11, 2.4763e-12, 2.2498e-10, 1.5741e-11, 3.4142e-11, 1.8224e-11,\n 4.0901e-10, 2.9509e-12, 6.8980e-10, 2.9667e-10, 8.4653e-11, 3.9098e-13,\n 1.4309e-13, 1.5941e-10, 4.6685e-10, 2.5482e-10, 6.2940e-10, 2.7321e-11,\n 1.5163e-09, 8.4336e-10, 5.0199e-11, 1.0830e-10, 1.8255e-10, 6.2676e-11,\n 1.3385e-10, 1.9740e-09, 2.2513e-10, 1.5329e-10, 2.6464e-10, 2.5308e-11,\n 1.1295e-10, 4.5210e-11, 1.5300e-11, 1.2202e-10, 1.0924e-10, 1.0265e-10,\n 1.5704e-11, 4.6926e-10, 5.7819e-11, 3.1370e-09, 3.5587e-11, 1.2408e-09,\n 1.7719e-09, 1.6966e-10, 1.6531e-09, 4.6239e-11, 1.0237e-10, 2.7849e-09,\n 6.2776e-11, 4.1654e-11, 8.2411e-11, 2.2394e-10, 9.8595e-12, 8.6640e-11,\n 1.0394e-08, 2.8953e-10, 1.0354e-11, 3.8732e-10, 3.8160e-10, 2.2133e-10,\n 2.2645e-09, 4.1176e-10, 1.6619e-10, 4.3052e-11, 4.0508e-10, 4.8288e-12,\n 3.4468e-12, 6.1923e-10, 2.1036e-10, 2.4739e-10, 7.0967e-12, 5.9943e-11,\n 1.7700e-10, 3.3839e-13, 2.6335e-10, 7.7264e-11, 1.4828e-11, 1.4848e-09,\n 8.4506e-11, 3.4337e-10, 7.4052e-13, 5.8367e-11, 1.5617e-11, 1.3250e-09,\n 1.2375e-11, 7.0201e-10, 2.6343e-09, 2.6205e-10, 6.7592e-11, 3.7208e-10,\n 1.1240e-11, 2.0303e-12, 2.8769e-10, 1.4783e-09, 1.8350e-09, 1.2964e-10,\n 3.1039e-10, 5.7327e-12, 3.9825e-10, 1.0810e-10, 3.8624e-11, 4.0534e-11,\n 4.3598e-11, 4.4263e-10, 2.6337e-12, 1.3595e-11, 1.1019e-10, 8.9206e-09,\n 6.0882e-11, 2.3838e-12, 1.2525e-11, 8.4632e-11, 1.5660e-09, 9.3568e-12,\n 5.9281e-10, 1.7346e-10, 1.4699e-10, 1.9416e-10, 1.0082e-11, 2.7228e-10,\n 1.8644e-10, 3.0982e-10, 3.9166e-10, 4.9989e-12, 1.1778e-09, 2.0979e-12,\n 2.1174e-10, 1.8027e-12, 1.0445e-10, 5.2898e-10, 5.7535e-11, 1.3479e-10,\n 3.2701e-10, 9.8005e-11, 1.5655e-09, 1.5691e-09, 2.8197e-09, 6.8945e-10,\n 7.7763e-11, 6.0101e-11, 2.5555e-09, 2.5001e-11, 1.8007e-10, 4.3475e-09,\n 2.4506e-11, 3.4930e-10, 4.5788e-09, 4.4223e-11, 1.7852e-10, 2.0406e-10,\n 4.5536e-10, 1.9099e-10, 1.9442e-10, 1.1055e-10, 4.1720e-10, 1.0492e-10,\n 2.5410e-10, 4.9390e-10, 4.2624e-11, 3.1611e-11, 2.4183e-10, 9.9917e-12,\n 3.6151e-09, 1.3068e-10, 2.0369e-11, 8.7565e-10, 2.0189e-10, 2.0843e-12,\n 4.2299e-10, 1.0378e-10, 5.0481e-09, 5.7914e-09, 4.5684e-11, 1.0009e-10,\n 5.9640e-11, 1.3588e-09, 2.7853e-11, 6.0281e-12], device='cuda:0')" + "exp_avg_sq": "tensor([2.0709e-13, 1.5284e-12, 1.4207e-09, 2.8602e-14, 3.6751e-10, 2.4756e-10,\n 5.4854e-11, 5.8958e-11, 1.4073e-09, 3.2415e-11, 3.3046e-11, 1.0445e-10,\n 1.4038e-12, 2.2878e-11, 6.8362e-12, 1.9641e-11, 9.6137e-11, 2.4677e-11,\n 4.1118e-12, 2.7995e-12, 6.9778e-11, 1.1344e-11, 1.7447e-10, 1.0622e-11,\n 1.8901e-10, 2.4330e-10, 4.9928e-11, 4.7455e-11, 4.7668e-11, 3.8660e-10,\n 2.2132e-11, 5.2415e-11, 2.1560e-09, 1.7748e-11, 1.0925e-11, 1.9252e-12,\n 1.1252e-10, 7.3140e-12, 6.3900e-11, 3.4697e-11, 4.6708e-11, 1.8120e-12,\n 8.4246e-13, 1.9274e-10, 6.2597e-14, 7.9493e-11, 2.1946e-11, 6.5144e-11,\n 1.3817e-11, 2.1687e-11, 2.3884e-10, 1.0680e-11, 6.1817e-11, 1.4860e-11,\n 2.7932e-11, 7.3760e-10, 5.0816e-12, 3.4268e-11, 4.5576e-11, 9.7565e-10,\n 7.0328e-11, 7.6956e-11, 8.2233e-12, 7.9050e-11, 8.7749e-11, 2.6703e-10,\n 1.0782e-12, 1.4869e-10, 9.8022e-12, 3.2336e-11, 4.0908e-11, 1.6345e-10,\n 3.1522e-12, 3.0726e-10, 4.8242e-10, 2.3163e-10, 1.4946e-11, 1.5406e-11,\n 4.3571e-12, 7.0761e-13, 6.4290e-11, 4.4980e-12, 9.7563e-12, 5.2077e-12,\n 1.1688e-10, 8.4324e-13, 1.9712e-10, 8.4777e-11, 2.4190e-11, 1.1173e-13,\n 4.0890e-14, 4.5553e-11, 1.3341e-10, 7.2817e-11, 1.7986e-10, 7.8072e-12,\n 4.3330e-10, 2.4100e-10, 1.4345e-11, 3.0948e-11, 5.2165e-11, 1.7910e-11,\n 3.8250e-11, 5.6409e-10, 6.4333e-11, 4.3805e-11, 7.5622e-11, 7.2321e-12,\n 3.2276e-11, 1.2919e-11, 4.3722e-12, 3.4869e-11, 3.1216e-11, 2.9333e-11,\n 4.4876e-12, 1.3410e-10, 1.6522e-11, 8.9643e-10, 1.0169e-11, 3.5456e-10,\n 5.0633e-10, 4.8483e-11, 4.7240e-10, 1.3213e-11, 2.9254e-11, 7.9580e-10,\n 1.7939e-11, 1.1903e-11, 2.3550e-11, 6.3993e-11, 2.8174e-12, 2.4758e-11,\n 2.9703e-09, 8.2736e-11, 2.9587e-12, 1.1068e-10, 1.0904e-10, 6.3247e-11,\n 6.4709e-10, 1.1766e-10, 4.7490e-11, 1.2303e-11, 1.1575e-10, 1.3799e-12,\n 9.8496e-13, 1.7695e-10, 6.0111e-11, 7.0693e-11, 2.0279e-12, 1.7129e-11,\n 5.0578e-11, 9.6699e-14, 7.5254e-11, 2.2079e-11, 4.2372e-12, 4.2428e-10,\n 2.4148e-11, 9.8121e-11, 2.1161e-13, 1.6679e-11, 4.4626e-12, 3.7864e-10,\n 3.5362e-12, 2.0061e-10, 7.5276e-10, 7.4883e-11, 1.9315e-11, 1.0632e-10,\n 3.2121e-12, 5.8017e-13, 8.2209e-11, 4.2245e-10, 5.2436e-10, 3.7047e-11,\n 8.8697e-11, 1.6382e-12, 1.1380e-10, 3.0892e-11, 1.1037e-11, 1.1583e-11,\n 1.2458e-11, 1.2649e-10, 7.5260e-13, 3.8848e-12, 3.1489e-11, 2.5491e-09,\n 1.7397e-11, 6.8119e-13, 3.5791e-12, 2.4184e-11, 4.4751e-10, 2.6738e-12,\n 1.6940e-10, 4.9568e-11, 4.2003e-11, 5.5484e-11, 2.8810e-12, 7.7807e-11,\n 5.3277e-11, 8.8533e-11, 1.1192e-10, 1.4285e-12, 3.3657e-10, 5.9950e-13,\n 6.0507e-11, 5.1514e-13, 2.9848e-11, 1.5116e-10, 1.6441e-11, 3.8518e-11,\n 9.3446e-11, 2.8006e-11, 4.4735e-10, 4.4838e-10, 8.0577e-10, 1.9701e-10,\n 2.2221e-11, 1.7174e-11, 7.3024e-10, 7.1442e-12, 5.1457e-11, 1.2423e-09,\n 7.0027e-12, 9.9814e-11, 1.3084e-09, 1.2637e-11, 5.1014e-11, 5.8311e-11,\n 1.3012e-10, 5.4578e-11, 5.5556e-11, 3.1591e-11, 1.1922e-10, 2.9983e-11,\n 7.2612e-11, 1.4114e-10, 1.2180e-11, 9.0331e-12, 6.9106e-11, 2.8552e-12,\n 1.0330e-09, 3.7343e-11, 5.8205e-12, 2.5022e-10, 5.7691e-11, 5.9559e-13,\n 1.2087e-10, 2.9655e-11, 1.4425e-09, 1.6549e-09, 1.3055e-11, 2.8602e-11,\n 1.7043e-11, 3.8830e-10, 7.9593e-12, 1.7226e-12], device='cuda:0')" }, "47": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.8411e-12, 3.7422e-12, 6.1501e-09, 9.0648e-13, 1.8196e-09, 1.1343e-09,\n 3.1610e-10, 2.1773e-10, 6.0568e-09, 1.3680e-10, 1.8995e-10, 5.3274e-10,\n 1.1130e-11, 1.5981e-10, 4.5594e-11, 6.0320e-11, 4.8867e-10, 1.3550e-10,\n 2.1762e-11, 1.3088e-11, 4.6512e-10, 3.9230e-11, 5.5316e-10, 7.8453e-11,\n 6.1277e-10, 1.7551e-09, 2.6763e-10, 2.3784e-10, 1.3746e-10, 1.9862e-09,\n 6.9778e-11, 2.4795e-10, 1.0023e-08, 1.0764e-10, 7.4832e-11, 9.8143e-12,\n 5.6172e-10, 2.6403e-11, 4.5621e-10, 1.5368e-10, 2.8615e-10, 1.7626e-11,\n 5.3461e-12, 1.0621e-09, 5.6323e-13, 2.9149e-10, 1.6602e-10, 2.7095e-10,\n 5.4456e-11, 1.7333e-10, 1.4171e-09, 6.2775e-11, 4.5837e-10, 9.3126e-11,\n 7.8869e-11, 2.3446e-09, 2.6361e-11, 1.8121e-10, 2.5587e-10, 4.7339e-09,\n 4.0092e-10, 3.8160e-10, 3.4907e-11, 7.4397e-10, 4.4438e-10, 1.3785e-09,\n 8.3280e-12, 7.8685e-10, 7.1460e-11, 2.3979e-10, 2.5299e-10, 7.1679e-10,\n 2.0088e-11, 1.7633e-09, 1.9091e-09, 1.4147e-09, 1.2347e-10, 1.3174e-10,\n 3.9452e-11, 4.0321e-12, 4.8931e-10, 3.2512e-11, 5.4960e-11, 4.1590e-11,\n 3.6315e-10, 8.9155e-12, 1.4453e-09, 2.5343e-10, 1.3630e-10, 5.4409e-13,\n 1.4136e-12, 2.2076e-10, 6.3117e-10, 4.6141e-10, 1.3106e-09, 6.6336e-11,\n 2.1094e-09, 1.3700e-09, 6.7975e-11, 1.1569e-10, 2.8009e-10, 1.0128e-10,\n 1.9212e-10, 2.1462e-09, 3.6092e-10, 2.8683e-10, 5.1785e-10, 4.6920e-11,\n 2.8242e-10, 5.5369e-11, 1.5171e-11, 1.1689e-10, 8.3802e-11, 1.7344e-10,\n 4.4499e-11, 4.7693e-10, 7.9943e-11, 2.2314e-09, 4.2977e-11, 1.5121e-09,\n 2.7670e-09, 2.5618e-10, 1.7903e-09, 7.0186e-11, 1.2733e-10, 3.4119e-09,\n 1.6229e-10, 1.1107e-10, 1.2282e-10, 3.3876e-10, 1.6535e-11, 1.7465e-10,\n 7.3580e-09, 6.4095e-10, 1.6522e-11, 4.1842e-10, 3.0378e-10, 2.4732e-10,\n 2.3062e-09, 1.0851e-09, 2.3842e-10, 5.5610e-11, 6.2447e-10, 1.0406e-11,\n 4.8762e-12, 5.5664e-10, 2.7518e-10, 4.4490e-10, 1.5564e-11, 1.3197e-10,\n 1.8440e-10, 1.3077e-14, 5.3439e-10, 1.2036e-10, 2.9629e-11, 2.5747e-09,\n 1.6547e-10, 6.6792e-10, 1.0015e-12, 1.0835e-10, 3.0259e-11, 1.7671e-09,\n 1.5535e-11, 8.9524e-10, 3.0939e-09, 5.1128e-10, 7.5500e-11, 4.2195e-10,\n 1.6969e-11, 2.2606e-12, 3.3918e-10, 9.9845e-10, 2.0221e-09, 2.8622e-10,\n 2.8020e-10, 8.4099e-12, 8.3529e-10, 2.1504e-10, 3.9834e-11, 3.5319e-11,\n 7.0413e-11, 4.7174e-10, 3.5363e-12, 2.1594e-11, 9.7161e-11, 8.7371e-09,\n 1.2447e-10, 4.9374e-12, 2.8993e-11, 1.0062e-10, 1.2946e-09, 1.9195e-11,\n 9.4857e-10, 3.8607e-10, 1.7260e-10, 2.5397e-10, 2.2514e-11, 4.8734e-10,\n 3.5630e-10, 6.5545e-10, 5.0235e-10, 1.6209e-11, 2.7298e-09, 6.6706e-12,\n 3.0855e-10, 1.3488e-12, 2.1847e-10, 8.2380e-10, 4.9272e-11, 1.8937e-10,\n 5.9272e-10, 1.6740e-10, 1.9638e-09, 2.6743e-09, 5.1661e-09, 6.2150e-10,\n 1.2199e-10, 1.0347e-10, 3.2855e-09, 3.1234e-11, 2.9124e-10, 4.8010e-09,\n 3.7948e-11, 4.3074e-10, 4.5872e-09, 4.4771e-11, 1.3553e-10, 4.9994e-10,\n 8.0495e-10, 2.5657e-10, 3.1752e-10, 2.2855e-10, 3.6502e-10, 1.8279e-10,\n 5.0842e-10, 6.8130e-10, 7.8568e-11, 6.2396e-11, 2.8934e-10, 1.4746e-11,\n 2.8127e-09, 2.2328e-10, 3.6025e-11, 1.6845e-09, 4.3489e-10, 5.2344e-12,\n 6.0528e-10, 2.1946e-10, 5.4647e-09, 3.9838e-09, 9.9467e-11, 2.1404e-10,\n 7.9761e-11, 1.5300e-09, 4.1305e-11, 1.3643e-11], device='cuda:0')" + "exp_avg_sq": "tensor([5.2611e-13, 1.0694e-12, 1.7574e-09, 2.5903e-13, 5.1998e-10, 3.2414e-10,\n 9.0328e-11, 6.2218e-11, 1.7308e-09, 3.9091e-11, 5.4279e-11, 1.5223e-10,\n 3.1804e-12, 4.5667e-11, 1.3029e-11, 1.7237e-11, 1.3964e-10, 3.8720e-11,\n 6.2188e-12, 3.7401e-12, 1.3291e-10, 1.1210e-11, 1.5807e-10, 2.2419e-11,\n 1.7510e-10, 5.0154e-10, 7.6477e-11, 6.7966e-11, 3.9279e-11, 5.6758e-10,\n 1.9939e-11, 7.0852e-11, 2.8642e-09, 3.0760e-11, 2.1384e-11, 2.8045e-12,\n 1.6052e-10, 7.5449e-12, 1.3037e-10, 4.3915e-11, 8.1770e-11, 5.0368e-12,\n 1.5277e-12, 3.0349e-10, 1.6095e-13, 8.3296e-11, 4.7442e-11, 7.7426e-11,\n 1.5561e-11, 4.9531e-11, 4.0496e-10, 1.7938e-11, 1.3098e-10, 2.6612e-11,\n 2.2537e-11, 6.6998e-10, 7.5328e-12, 5.1782e-11, 7.3117e-11, 1.3528e-09,\n 1.1457e-10, 1.0905e-10, 9.9748e-12, 2.1259e-10, 1.2699e-10, 3.9391e-10,\n 2.3798e-12, 2.2485e-10, 2.0420e-11, 6.8522e-11, 7.2295e-11, 2.0483e-10,\n 5.7402e-12, 5.0387e-10, 5.4554e-10, 4.0426e-10, 3.5284e-11, 3.7646e-11,\n 1.1274e-11, 1.1522e-12, 1.3983e-10, 9.2905e-12, 1.5705e-11, 1.1885e-11,\n 1.0377e-10, 2.5477e-12, 4.1300e-10, 7.2419e-11, 3.8948e-11, 1.5548e-13,\n 4.0396e-13, 6.3083e-11, 1.8036e-10, 1.3185e-10, 3.7451e-10, 1.8956e-11,\n 6.0277e-10, 3.9149e-10, 1.9424e-11, 3.3059e-11, 8.0039e-11, 2.8940e-11,\n 5.4900e-11, 6.1330e-10, 1.0313e-10, 8.1963e-11, 1.4798e-10, 1.3408e-11,\n 8.0703e-11, 1.5822e-11, 4.3353e-12, 3.3402e-11, 2.3947e-11, 4.9562e-11,\n 1.2716e-11, 1.3629e-10, 2.2844e-11, 6.3763e-10, 1.2281e-11, 4.3210e-10,\n 7.9070e-10, 7.3206e-11, 5.1161e-10, 2.0056e-11, 3.6385e-11, 9.7498e-10,\n 4.6376e-11, 3.1740e-11, 3.5098e-11, 9.6802e-11, 4.7251e-12, 4.9908e-11,\n 2.1026e-09, 1.8316e-10, 4.7214e-12, 1.1957e-10, 8.6809e-11, 7.0675e-11,\n 6.5901e-10, 3.1007e-10, 6.8129e-11, 1.5891e-11, 1.7845e-10, 2.9737e-12,\n 1.3934e-12, 1.5907e-10, 7.8634e-11, 1.2713e-10, 4.4477e-12, 3.7710e-11,\n 5.2694e-11, 3.7368e-15, 1.5270e-10, 3.4394e-11, 8.4666e-12, 7.3573e-10,\n 4.7284e-11, 1.9086e-10, 2.8617e-13, 3.0961e-11, 8.6467e-12, 5.0497e-10,\n 4.4393e-12, 2.5582e-10, 8.8411e-10, 1.4610e-10, 2.1575e-11, 1.2057e-10,\n 4.8491e-12, 6.4597e-13, 9.6923e-11, 2.8531e-10, 5.7783e-10, 8.1790e-11,\n 8.0069e-11, 2.4032e-12, 2.3869e-10, 6.1449e-11, 1.1383e-11, 1.0093e-11,\n 2.0121e-11, 1.3480e-10, 1.0105e-12, 6.1707e-12, 2.7765e-11, 2.4967e-09,\n 3.5569e-11, 1.4109e-12, 8.2850e-12, 2.8754e-11, 3.6994e-10, 5.4850e-12,\n 2.7106e-10, 1.1032e-10, 4.9323e-11, 7.2575e-11, 6.4336e-12, 1.3926e-10,\n 1.0182e-10, 1.8730e-10, 1.4355e-10, 4.6318e-12, 7.8007e-10, 1.9062e-12,\n 8.8170e-11, 3.8544e-13, 6.2430e-11, 2.3541e-10, 1.4080e-11, 5.4115e-11,\n 1.6938e-10, 4.7837e-11, 5.6117e-10, 7.6421e-10, 1.4763e-09, 1.7760e-10,\n 3.4860e-11, 2.9567e-11, 9.3884e-10, 8.9252e-12, 8.3224e-11, 1.3719e-09,\n 1.0844e-11, 1.2309e-10, 1.3108e-09, 1.2794e-11, 3.8730e-11, 1.4286e-10,\n 2.3002e-10, 7.3317e-11, 9.0733e-11, 6.5311e-11, 1.0431e-10, 5.2233e-11,\n 1.4528e-10, 1.9469e-10, 2.2451e-11, 1.7830e-11, 8.2681e-11, 4.2138e-12,\n 8.0376e-10, 6.3804e-11, 1.0294e-11, 4.8136e-10, 1.2427e-10, 1.4958e-12,\n 1.7296e-10, 6.2714e-11, 1.5616e-09, 1.1384e-09, 2.8424e-11, 6.1164e-11,\n 2.2792e-11, 4.3720e-10, 1.1803e-11, 3.8987e-12], device='cuda:0')" }, "48": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.5531e-13, 4.6390e-14, 7.2510e-13, ..., 8.4498e-13, 0.0000e+00,\n 1.1458e-13],\n [5.1462e-14, 2.1132e-13, 2.6714e-12, ..., 3.6115e-12, 0.0000e+00,\n 6.3134e-12],\n [4.2688e-10, 8.3459e-12, 1.7847e-09, ..., 4.5341e-10, 0.0000e+00,\n 2.5601e-09],\n ...,\n [5.6349e-11, 2.3332e-12, 6.4918e-10, ..., 1.9218e-10, 0.0000e+00,\n 2.0660e-10],\n [7.0121e-12, 8.1777e-15, 2.1286e-13, ..., 8.9361e-13, 0.0000e+00,\n 9.1599e-13],\n [2.6331e-14, 1.5641e-14, 5.8247e-12, ..., 4.9471e-13, 0.0000e+00,\n 3.9035e-12]], device='cuda:0')" + "exp_avg_sq": "tensor([[7.2958e-14, 1.3256e-14, 2.0720e-13, ..., 2.4146e-13, 0.0000e+00,\n 3.2743e-14],\n [1.4706e-14, 6.0386e-14, 7.6336e-13, ..., 1.0320e-12, 0.0000e+00,\n 1.8041e-12],\n [1.2199e-10, 2.3849e-12, 5.1000e-10, ..., 1.2957e-10, 0.0000e+00,\n 7.3157e-10],\n ...,\n [1.6102e-11, 6.6673e-13, 1.8551e-10, ..., 5.4916e-11, 0.0000e+00,\n 5.9036e-11],\n [2.0038e-12, 2.3368e-15, 6.0825e-14, ..., 2.5535e-13, 0.0000e+00,\n 2.6175e-13],\n [7.5244e-15, 4.4695e-15, 1.6644e-12, ..., 1.4137e-13, 0.0000e+00,\n 1.1155e-12]], device='cuda:0')" }, "49": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.1833e-10, 1.9059e-09, 1.0224e-06, 1.0618e-09, 1.6677e-07, 5.3742e-07,\n 2.5208e-07, 5.6792e-08, 2.6834e-08, 5.4854e-08, 7.0797e-07, 2.0849e-07,\n 3.8925e-08, 1.3915e-07, 6.0414e-10, 3.9464e-08, 1.0493e-07, 4.9635e-08,\n 7.9301e-09, 1.7895e-08, 2.5655e-07, 4.9556e-09, 2.0942e-08, 7.8984e-08,\n 1.3229e-07, 6.9070e-08, 1.0948e-07, 2.2267e-07, 3.9730e-08, 4.0138e-07,\n 5.4401e-07, 6.3541e-07, 3.4504e-06, 1.0877e-08, 1.5046e-07, 2.9956e-09,\n 3.0807e-07, 5.0870e-09, 1.1365e-06, 3.2973e-08, 1.8146e-07, 4.2475e-09,\n 1.8805e-08, 7.4021e-07, 3.3700e-10, 3.7232e-08, 1.5870e-07, 1.0960e-07,\n 2.1539e-08, 2.0879e-07, 3.8528e-07, 1.2870e-07, 4.2039e-07, 3.4275e-08,\n 4.3575e-08, 9.6536e-07, 3.3405e-09, 1.4733e-07, 7.7006e-07, 9.5572e-07,\n 1.2220e-07, 1.0898e-07, 4.3357e-09, 7.5369e-07, 3.5981e-08, 1.0013e-06,\n 1.2166e-09, 2.5423e-07, 1.4312e-07, 1.4374e-07, 2.9543e-07, 4.7605e-07,\n 1.2706e-07, 3.5102e-07, 1.5511e-07, 5.9388e-07, 3.4037e-07, 1.3418e-07,\n 2.0661e-08, 7.9652e-10, 7.0736e-07, 5.1190e-08, 4.0809e-08, 2.7452e-07,\n 6.4623e-08, 9.4834e-09, 1.5329e-06, 4.7214e-08, 5.4048e-08, 2.2999e-09,\n 5.6987e-10, 5.8103e-08, 2.1761e-07, 4.7846e-08, 1.3356e-06, 4.1223e-08,\n 7.9052e-07, 1.0924e-06, 1.5747e-07, 9.5177e-08, 9.6253e-08, 4.0825e-08,\n 1.8209e-08, 3.1546e-07, 1.8230e-07, 1.2890e-07, 1.8624e-07, 1.4362e-08,\n 1.2106e-07, 1.4589e-07, 6.8944e-08, 1.7278e-08, 8.4535e-07, 2.1236e-08,\n 2.6463e-08, 8.4260e-07, 2.7270e-08, 1.7521e-07, 3.1853e-08, 5.3969e-08,\n 1.8239e-06, 1.5129e-07, 6.7972e-07, 1.2140e-08, 7.2223e-09, 2.1322e-06,\n 2.7827e-07, 1.4535e-08, 7.3210e-09, 3.1073e-07, 4.5182e-11, 6.3602e-08,\n 1.1174e-06, 7.0615e-07, 2.8712e-09, 7.1425e-08, 1.7269e-07, 5.8442e-08,\n 6.3197e-07, 4.6225e-07, 7.3896e-08, 6.7360e-08, 3.1401e-07, 4.8572e-10,\n 2.4054e-08, 3.6276e-08, 2.3277e-08, 3.8438e-07, 2.2381e-08, 3.6557e-08,\n 5.0654e-08, 6.6071e-08, 1.4935e-07, 1.1270e-07, 4.3305e-08, 5.1441e-07,\n 1.1440e-08, 1.6864e-06, 6.0739e-10, 3.1206e-08, 1.2656e-08, 4.3360e-08,\n 6.5124e-10, 7.8884e-08, 4.8748e-07, 3.1792e-07, 1.9543e-08, 9.0684e-08,\n 9.1522e-11, 1.5823e-08, 7.3504e-09, 1.3544e-07, 1.0870e-07, 1.3440e-07,\n 7.0785e-08, 1.7730e-08, 5.8003e-07, 3.3806e-07, 3.3361e-09, 8.5282e-10,\n 9.2109e-08, 2.4965e-07, 6.9916e-10, 5.3545e-08, 3.7317e-08, 7.3253e-07,\n 1.4774e-08, 3.7850e-09, 3.9765e-08, 1.7274e-08, 7.3699e-08, 8.5863e-10,\n 1.7438e-07, 5.7051e-08, 4.0328e-07, 4.9120e-08, 2.2512e-08, 1.6312e-07,\n 7.9262e-07, 7.6470e-08, 1.1224e-07, 2.3798e-09, 1.3540e-06, 3.0167e-10,\n 1.6765e-07, 6.8146e-10, 1.8443e-07, 1.8331e-07, 7.7284e-09, 1.1346e-07,\n 1.0695e-06, 6.3408e-08, 7.5411e-08, 9.0167e-07, 3.5745e-06, 4.5401e-08,\n 3.7260e-08, 3.7750e-08, 1.8856e-06, 2.0683e-09, 3.5004e-08, 3.5966e-06,\n 4.8595e-09, 2.7395e-07, 1.2885e-06, 2.2373e-08, 3.8550e-08, 5.6633e-07,\n 1.3789e-07, 4.1079e-08, 7.5170e-07, 1.6199e-07, 9.2727e-09, 5.9861e-08,\n 2.5748e-07, 2.5551e-07, 3.8266e-08, 4.7660e-08, 2.7831e-08, 1.7497e-08,\n 1.5822e-07, 1.1832e-07, 9.6653e-09, 1.6321e-06, 2.1811e-07, 2.0026e-09,\n 1.7894e-07, 7.3657e-07, 9.2054e-07, 8.0237e-07, 2.5349e-08, 8.7621e-08,\n 3.5018e-08, 3.3352e-07, 3.6205e-09, 9.2716e-10], device='cuda:0')" + "exp_avg_sq": "tensor([6.2390e-11, 5.4462e-10, 2.9217e-07, 3.0341e-10, 4.7655e-08, 1.5357e-07,\n 7.2034e-08, 1.6229e-08, 7.6680e-09, 1.5675e-08, 2.0231e-07, 5.9578e-08,\n 1.1123e-08, 3.9763e-08, 1.7264e-10, 1.1277e-08, 2.9985e-08, 1.4183e-08,\n 2.2661e-09, 5.1135e-09, 7.3310e-08, 1.4161e-09, 5.9843e-09, 2.2570e-08,\n 3.7802e-08, 1.9737e-08, 3.1284e-08, 6.3630e-08, 1.1353e-08, 1.1470e-07,\n 1.5546e-07, 1.8157e-07, 9.8599e-07, 3.1081e-09, 4.2994e-08, 8.5602e-10,\n 8.8033e-08, 1.4536e-09, 3.2477e-07, 9.4221e-09, 5.1853e-08, 1.2138e-09,\n 5.3736e-09, 2.1152e-07, 9.6300e-11, 1.0639e-08, 4.5348e-08, 3.1318e-08,\n 6.1548e-09, 5.9665e-08, 1.1010e-07, 3.6776e-08, 1.2013e-07, 9.7944e-09,\n 1.2452e-08, 2.7586e-07, 9.5456e-10, 4.2102e-08, 2.2005e-07, 2.7310e-07,\n 3.4919e-08, 3.1142e-08, 1.2390e-09, 2.1537e-07, 1.0282e-08, 2.8614e-07,\n 3.4765e-10, 7.2648e-08, 4.0897e-08, 4.1076e-08, 8.4422e-08, 1.3603e-07,\n 3.6307e-08, 1.0031e-07, 4.4324e-08, 1.6970e-07, 9.7262e-08, 3.8344e-08,\n 5.9041e-09, 2.2761e-10, 2.0213e-07, 1.4628e-08, 1.1662e-08, 7.8446e-08,\n 1.8466e-08, 2.7099e-09, 4.3804e-07, 1.3492e-08, 1.5445e-08, 6.5721e-10,\n 1.6285e-10, 1.6603e-08, 6.2185e-08, 1.3672e-08, 3.8167e-07, 1.1780e-08,\n 2.2590e-07, 3.1217e-07, 4.4999e-08, 2.7197e-08, 2.7505e-08, 1.1666e-08,\n 5.2033e-09, 9.0144e-08, 5.2093e-08, 3.6835e-08, 5.3219e-08, 4.1042e-09,\n 3.4595e-08, 4.1689e-08, 1.9701e-08, 4.9373e-09, 2.4157e-07, 6.0685e-09,\n 7.5620e-09, 2.4078e-07, 7.7927e-09, 5.0066e-08, 9.1023e-09, 1.5422e-08,\n 5.2120e-07, 4.3232e-08, 1.9424e-07, 3.4690e-09, 2.0638e-09, 6.0929e-07,\n 7.9518e-08, 4.1536e-09, 2.0920e-09, 8.8793e-08, 1.2911e-11, 1.8175e-08,\n 3.1931e-07, 2.0179e-07, 8.2045e-10, 2.0410e-08, 4.9349e-08, 1.6700e-08,\n 1.8059e-07, 1.3209e-07, 2.1116e-08, 1.9249e-08, 8.9731e-08, 1.3880e-10,\n 6.8736e-09, 1.0366e-08, 6.6517e-09, 1.0984e-07, 6.3954e-09, 1.0446e-08,\n 1.4475e-08, 1.8880e-08, 4.2678e-08, 3.2204e-08, 1.2375e-08, 1.4700e-07,\n 3.2691e-09, 4.8191e-07, 1.7357e-10, 8.9174e-09, 3.6165e-09, 1.2390e-08,\n 1.8610e-10, 2.2542e-08, 1.3930e-07, 9.0849e-08, 5.5846e-09, 2.5914e-08,\n 2.6153e-11, 4.5215e-09, 2.1004e-09, 3.8704e-08, 3.1063e-08, 3.8407e-08,\n 2.0227e-08, 5.0665e-09, 1.6575e-07, 9.6604e-08, 9.5331e-10, 2.4370e-10,\n 2.6321e-08, 7.1340e-08, 1.9979e-10, 1.5301e-08, 1.0664e-08, 2.0933e-07,\n 4.2217e-09, 1.0816e-09, 1.1363e-08, 4.9363e-09, 2.1060e-08, 2.4536e-10,\n 4.9831e-08, 1.6303e-08, 1.1524e-07, 1.4036e-08, 6.4329e-09, 4.6614e-08,\n 2.2650e-07, 2.1852e-08, 3.2073e-08, 6.8005e-10, 3.8691e-07, 8.6204e-11,\n 4.7909e-08, 1.9473e-10, 5.2701e-08, 5.2383e-08, 2.2084e-09, 3.2422e-08,\n 3.0561e-07, 1.8119e-08, 2.1549e-08, 2.5766e-07, 1.0214e-06, 1.2974e-08,\n 1.0647e-08, 1.0787e-08, 5.3882e-07, 5.9102e-10, 1.0003e-08, 1.0277e-06,\n 1.3887e-09, 7.8284e-08, 3.6820e-07, 6.3934e-09, 1.1016e-08, 1.6183e-07,\n 3.9403e-08, 1.1739e-08, 2.1480e-07, 4.6290e-08, 2.6497e-09, 1.7106e-08,\n 7.3577e-08, 7.3015e-08, 1.0935e-08, 1.3619e-08, 7.9529e-09, 5.0000e-09,\n 4.5211e-08, 3.3810e-08, 2.7619e-09, 4.6639e-07, 6.2327e-08, 5.7226e-10,\n 5.1135e-08, 2.1048e-07, 2.6305e-07, 2.2928e-07, 7.2436e-09, 2.5038e-08,\n 1.0007e-08, 9.5307e-08, 1.0346e-09, 2.6494e-10], device='cuda:0')" }, "50": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.6905e-13, 7.2910e-12, 1.5155e-09, 1.6067e-11, 1.4434e-10, 1.2255e-09,\n 7.0053e-10, 1.2580e-10, 2.6235e-10, 9.4654e-11, 1.5125e-09, 3.7089e-10,\n 1.6932e-10, 2.5306e-10, 2.4741e-12, 1.4116e-10, 1.7504e-10, 9.8628e-11,\n 3.0490e-11, 3.2783e-11, 4.7852e-10, 8.0924e-12, 4.0363e-11, 1.1170e-10,\n 2.5066e-10, 1.4251e-10, 1.3989e-10, 9.7118e-10, 2.5911e-10, 4.8855e-10,\n 7.2456e-10, 1.4078e-09, 4.8700e-09, 1.2826e-11, 5.8534e-10, 4.1581e-12,\n 6.1651e-10, 3.8089e-11, 3.5873e-09, 4.5192e-11, 1.8683e-10, 1.2279e-11,\n 3.3646e-11, 1.1132e-09, 3.0171e-14, 4.3275e-11, 3.7943e-10, 2.3837e-10,\n 1.0933e-10, 3.4858e-10, 7.6420e-10, 2.4677e-10, 7.4998e-10, 4.3550e-11,\n 9.2647e-11, 2.2404e-09, 8.6699e-12, 4.2198e-10, 1.1057e-09, 1.2353e-09,\n 1.3085e-10, 1.5795e-10, 7.1820e-12, 1.3985e-09, 5.6212e-11, 2.9202e-09,\n 3.4097e-12, 3.2269e-10, 3.6299e-10, 1.7072e-10, 6.9807e-10, 1.1138e-09,\n 3.1177e-10, 4.7172e-10, 2.0328e-10, 9.0003e-10, 7.0837e-10, 1.9364e-10,\n 2.8883e-11, 2.4103e-12, 1.1895e-09, 1.4742e-10, 1.0916e-10, 9.9416e-10,\n 8.1885e-11, 2.5191e-11, 3.2665e-09, 5.2382e-11, 1.0302e-10, 3.5613e-12,\n 3.5952e-14, 1.0871e-10, 3.4934e-10, 9.2027e-11, 2.7776e-09, 5.9734e-11,\n 1.0518e-09, 2.3789e-09, 2.1994e-10, 5.5199e-10, 1.8208e-10, 7.1780e-11,\n 3.1180e-11, 5.5157e-10, 3.3081e-10, 1.8969e-10, 3.1221e-10, 4.3541e-11,\n 1.5900e-10, 1.7290e-10, 1.1027e-10, 2.3648e-11, 1.5029e-09, 1.9059e-11,\n 2.9193e-11, 1.2322e-09, 6.8483e-11, 3.0002e-10, 1.1459e-10, 8.9695e-11,\n 4.4962e-09, 2.1741e-10, 1.5631e-09, 1.0193e-10, 2.1470e-11, 6.0887e-09,\n 7.7667e-10, 2.7926e-11, 2.0440e-11, 7.9264e-10, 1.7660e-13, 1.3539e-10,\n 1.5569e-09, 1.2480e-09, 7.6222e-12, 1.0804e-10, 5.8446e-10, 6.7984e-11,\n 1.1404e-09, 6.0543e-10, 1.4714e-10, 1.1541e-10, 4.0932e-10, 3.4612e-12,\n 3.3890e-11, 5.5459e-11, 1.8387e-11, 1.2344e-09, 7.4442e-11, 5.9922e-11,\n 1.5009e-10, 1.5257e-10, 2.4164e-10, 1.9975e-10, 9.5529e-11, 1.0656e-09,\n 1.9492e-11, 3.9764e-09, 8.3201e-13, 4.3370e-11, 2.8432e-11, 9.9510e-11,\n 2.7417e-12, 1.3370e-10, 1.0196e-09, 5.6609e-10, 2.0421e-11, 1.3816e-10,\n 1.2007e-12, 3.8889e-11, 1.1650e-11, 3.0210e-10, 1.5791e-10, 2.1108e-10,\n 1.3247e-10, 2.5909e-11, 8.9319e-10, 5.4677e-10, 3.7238e-11, 3.1055e-12,\n 3.3295e-10, 7.0058e-10, 1.2629e-12, 1.0333e-10, 1.4545e-10, 8.9407e-10,\n 3.4076e-11, 8.3891e-12, 1.2428e-10, 3.0150e-11, 1.1227e-10, 4.3421e-12,\n 2.0763e-10, 8.5591e-11, 7.5009e-10, 9.0297e-11, 3.2866e-11, 2.3724e-10,\n 2.5953e-09, 1.1966e-10, 2.3486e-10, 1.4452e-11, 1.5982e-09, 4.3280e-12,\n 4.1226e-10, 5.7468e-13, 6.0109e-10, 2.0973e-10, 5.1485e-11, 3.3391e-10,\n 3.1920e-09, 9.6117e-11, 1.8347e-10, 9.3581e-10, 5.9203e-09, 7.7667e-11,\n 7.9610e-11, 3.4329e-11, 5.6625e-09, 6.5028e-12, 2.8417e-11, 7.9057e-09,\n 1.2669e-11, 9.1391e-10, 2.7608e-09, 3.6215e-11, 7.3023e-11, 1.4705e-09,\n 1.3585e-10, 5.8832e-11, 1.4617e-09, 3.2594e-10, 1.6061e-11, 7.9389e-11,\n 4.2372e-10, 6.2512e-10, 7.6596e-11, 7.9474e-11, 4.3523e-11, 4.1201e-11,\n 2.0376e-10, 2.4167e-10, 4.4176e-11, 3.6978e-09, 3.4152e-10, 1.8055e-11,\n 2.6974e-10, 1.6258e-09, 1.2714e-09, 1.6451e-09, 4.0357e-11, 1.4538e-10,\n 3.0051e-11, 4.6796e-10, 7.7214e-12, 5.1225e-12], device='cuda:0')" + "exp_avg_sq": "tensor([4.8306e-14, 2.0835e-12, 4.3307e-10, 4.5913e-12, 4.1247e-11, 3.5021e-10,\n 2.0018e-10, 3.5948e-11, 7.4968e-11, 2.7048e-11, 4.3220e-10, 1.0599e-10,\n 4.8384e-11, 7.2312e-11, 7.0699e-13, 4.0339e-11, 5.0020e-11, 2.8184e-11,\n 8.7129e-12, 9.3681e-12, 1.3674e-10, 2.3125e-12, 1.1534e-11, 3.1919e-11,\n 7.1628e-11, 4.0723e-11, 3.9974e-11, 2.7752e-10, 7.4042e-11, 1.3961e-10,\n 2.0705e-10, 4.0230e-10, 1.3916e-09, 3.6652e-12, 1.6727e-10, 1.1882e-12,\n 1.7617e-10, 1.0884e-11, 1.0251e-09, 1.2914e-11, 5.3387e-11, 3.5089e-12,\n 9.6147e-12, 3.1812e-10, 8.6216e-15, 1.2366e-11, 1.0842e-10, 6.8116e-11,\n 3.1243e-11, 9.9608e-11, 2.1838e-10, 7.0517e-11, 2.1431e-10, 1.2445e-11,\n 2.6475e-11, 6.4020e-10, 2.4775e-12, 1.2058e-10, 3.1595e-10, 3.5299e-10,\n 3.7392e-11, 4.5136e-11, 2.0523e-12, 3.9962e-10, 1.6063e-11, 8.3447e-10,\n 9.7436e-13, 9.2211e-11, 1.0373e-10, 4.8785e-11, 1.9948e-10, 3.1826e-10,\n 8.9091e-11, 1.3480e-10, 5.8089e-11, 2.5719e-10, 2.0242e-10, 5.5334e-11,\n 8.2534e-12, 6.8878e-13, 3.3990e-10, 4.2125e-11, 3.1194e-11, 2.8409e-10,\n 2.3399e-11, 7.1984e-12, 9.3342e-10, 1.4969e-11, 2.9438e-11, 1.0177e-12,\n 1.0274e-14, 3.1065e-11, 9.9826e-11, 2.6297e-11, 7.9372e-10, 1.7069e-11,\n 3.0055e-10, 6.7979e-10, 6.2848e-11, 1.5774e-10, 5.2031e-11, 2.0512e-11,\n 8.9098e-12, 1.5762e-10, 9.4530e-11, 5.4205e-11, 8.9215e-11, 1.2442e-11,\n 4.5435e-11, 4.9408e-11, 3.1510e-11, 6.7576e-12, 4.2945e-10, 5.4464e-12,\n 8.3420e-12, 3.5212e-10, 1.9570e-11, 8.5732e-11, 3.2745e-11, 2.5631e-11,\n 1.2848e-09, 6.2126e-11, 4.4666e-10, 2.9127e-11, 6.1353e-12, 1.7399e-09,\n 2.2194e-10, 7.9801e-12, 5.8408e-12, 2.2650e-10, 5.0465e-14, 3.8690e-11,\n 4.4491e-10, 3.5664e-10, 2.1781e-12, 3.0872e-11, 1.6701e-10, 1.9427e-11,\n 3.2586e-10, 1.7301e-10, 4.2048e-11, 3.2980e-11, 1.1697e-10, 9.8907e-13,\n 9.6844e-12, 1.5848e-11, 5.2543e-12, 3.5275e-10, 2.1272e-11, 1.7123e-11,\n 4.2890e-11, 4.3598e-11, 6.9051e-11, 5.7079e-11, 2.7298e-11, 3.0451e-10,\n 5.5699e-12, 1.1363e-09, 2.3775e-13, 1.2393e-11, 8.1246e-12, 2.8436e-11,\n 7.8345e-13, 3.8206e-11, 2.9136e-10, 1.6176e-10, 5.8354e-12, 3.9479e-11,\n 3.4312e-13, 1.1113e-11, 3.3290e-12, 8.6327e-11, 4.5124e-11, 6.0319e-11,\n 3.7854e-11, 7.4038e-12, 2.5524e-10, 1.5624e-10, 1.0641e-11, 8.8743e-13,\n 9.5143e-11, 2.0020e-10, 3.6089e-13, 2.9529e-11, 4.1563e-11, 2.5549e-10,\n 9.7375e-12, 2.3972e-12, 3.5515e-11, 8.6157e-12, 3.2083e-11, 1.2408e-12,\n 5.9331e-11, 2.4458e-11, 2.1434e-10, 2.5803e-11, 9.3918e-12, 6.7793e-11,\n 7.4163e-10, 3.4193e-11, 6.7113e-11, 4.1298e-12, 4.5669e-10, 1.2368e-12,\n 1.1781e-10, 1.6422e-13, 1.7177e-10, 5.9933e-11, 1.4712e-11, 9.5416e-11,\n 9.1213e-10, 2.7466e-11, 5.2429e-11, 2.6741e-10, 1.6918e-09, 2.2194e-11,\n 2.2749e-11, 9.8098e-12, 1.6181e-09, 1.8582e-12, 8.1203e-12, 2.2591e-09,\n 3.6203e-12, 2.6116e-10, 7.8891e-10, 1.0349e-11, 2.0867e-11, 4.2022e-10,\n 3.8819e-11, 1.6812e-11, 4.1769e-10, 9.3139e-11, 4.5895e-12, 2.2686e-11,\n 1.2108e-10, 1.7863e-10, 2.1888e-11, 2.2710e-11, 1.2437e-11, 1.1774e-11,\n 5.8225e-11, 6.9059e-11, 1.2624e-11, 1.0567e-09, 9.7592e-11, 5.1594e-12,\n 7.7079e-11, 4.6457e-10, 3.6330e-10, 4.7010e-10, 1.1532e-11, 4.1545e-11,\n 8.5872e-12, 1.3372e-10, 2.2064e-12, 1.4638e-12], device='cuda:0')" }, "51": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7178e-12, 8.5265e-12, 2.7230e-09, 9.0802e-12, 4.4355e-10, 1.3681e-09,\n 6.8832e-10, 1.7993e-10, 8.6404e-11, 1.6716e-10, 1.9648e-09, 5.0991e-10,\n 1.0670e-10, 4.0451e-10, 3.1395e-12, 1.2328e-10, 3.0426e-10, 1.4355e-10,\n 3.0835e-11, 5.6005e-11, 7.2453e-10, 1.5176e-11, 8.1496e-11, 2.3947e-10,\n 3.8240e-10, 1.9729e-10, 2.9935e-10, 5.9354e-10, 2.0248e-10, 1.0554e-09,\n 1.4364e-09, 1.6602e-09, 9.3225e-09, 2.7308e-11, 4.3885e-10, 9.4605e-12,\n 7.8873e-10, 4.3053e-11, 3.1078e-09, 9.8792e-11, 4.8653e-10, 1.5057e-11,\n 5.5194e-11, 2.0068e-09, 9.3192e-13, 9.1429e-11, 4.3268e-10, 3.2496e-10,\n 7.6510e-11, 5.8554e-10, 1.0578e-09, 2.9689e-10, 1.0971e-09, 9.2427e-11,\n 1.1893e-10, 2.4268e-09, 1.5338e-11, 3.7551e-10, 1.9740e-09, 2.4938e-09,\n 3.2297e-10, 2.6856e-10, 1.3809e-11, 1.9893e-09, 1.0802e-10, 2.5727e-09,\n 4.1349e-12, 6.5460e-10, 3.9878e-10, 3.8649e-10, 8.2354e-10, 1.2885e-09,\n 3.0501e-10, 9.4850e-10, 4.5367e-10, 1.5164e-09, 9.2259e-10, 3.7710e-10,\n 6.0742e-11, 2.3957e-12, 1.8626e-09, 1.4106e-10, 1.1968e-10, 7.6211e-10,\n 1.5652e-10, 3.2405e-11, 4.0715e-09, 1.2859e-10, 1.7743e-10, 2.7171e-12,\n 2.3741e-12, 1.5879e-10, 5.7714e-10, 1.3833e-10, 3.4349e-09, 1.0097e-10,\n 2.0439e-09, 2.8046e-09, 3.7170e-10, 3.1138e-10, 2.7889e-10, 1.2737e-10,\n 5.7255e-11, 8.6450e-10, 5.2544e-10, 3.4150e-10, 5.2092e-10, 5.5831e-11,\n 3.0370e-10, 3.8706e-10, 2.0374e-10, 4.8747e-11, 2.2136e-09, 5.1074e-11,\n 8.0188e-11, 2.3445e-09, 9.2418e-11, 4.5967e-10, 1.2323e-10, 1.5894e-10,\n 4.9430e-09, 4.3881e-10, 1.7555e-09, 1.2100e-10, 2.8193e-11, 5.4534e-09,\n 6.8518e-10, 4.5434e-11, 2.6544e-11, 8.4947e-10, 5.8973e-13, 1.9438e-10,\n 2.7977e-09, 1.7679e-09, 1.0930e-11, 1.9385e-10, 5.0925e-10, 1.4146e-10,\n 1.6126e-09, 1.1959e-09, 2.1198e-10, 2.0124e-10, 8.0479e-10, 1.4477e-11,\n 7.4819e-11, 1.0212e-10, 5.8684e-11, 1.0197e-09, 6.5932e-11, 1.0437e-10,\n 1.4809e-10, 1.5294e-10, 4.0620e-10, 2.7899e-10, 1.4049e-10, 1.4176e-09,\n 3.4189e-11, 4.4444e-09, 1.7086e-12, 9.8562e-11, 4.2098e-11, 1.3539e-10,\n 6.3049e-12, 2.4731e-10, 1.2886e-09, 8.2312e-10, 5.1710e-11, 2.8357e-10,\n 4.3008e-12, 5.2946e-11, 1.7800e-11, 3.9488e-10, 3.0107e-10, 3.9571e-10,\n 2.0675e-10, 5.4496e-11, 1.5194e-09, 9.3989e-10, 4.6521e-11, 4.2340e-12,\n 2.6005e-10, 7.2639e-10, 2.4738e-12, 1.5415e-10, 1.2594e-10, 2.0360e-09,\n 3.9123e-11, 1.5619e-11, 1.2692e-10, 5.1148e-11, 2.4046e-10, 1.3289e-11,\n 4.2778e-10, 1.6748e-10, 9.9478e-10, 1.5486e-10, 7.4036e-11, 4.2597e-10,\n 1.9826e-09, 2.3842e-10, 3.1979e-10, 2.6726e-11, 3.3673e-09, 1.0519e-11,\n 4.0055e-10, 8.0965e-13, 5.1333e-10, 4.5662e-10, 5.5969e-11, 3.4369e-10,\n 2.9001e-09, 1.3291e-10, 2.3321e-10, 2.5594e-09, 9.4386e-09, 1.4279e-10,\n 1.0247e-10, 8.6839e-11, 5.0045e-09, 1.0161e-11, 8.1889e-11, 9.6081e-09,\n 1.7302e-11, 7.3192e-10, 3.4326e-09, 4.9832e-11, 1.0961e-10, 1.5495e-09,\n 4.0267e-10, 1.1999e-10, 2.0568e-09, 4.3483e-10, 2.0238e-11, 1.6793e-10,\n 7.3553e-10, 6.9368e-10, 1.1577e-10, 1.5129e-10, 7.7977e-11, 4.8862e-11,\n 4.4279e-10, 3.4491e-10, 5.9811e-11, 4.2917e-09, 5.9998e-10, 2.5645e-11,\n 4.9311e-10, 1.9843e-09, 2.4717e-09, 2.2407e-09, 7.1018e-11, 2.3691e-10,\n 8.5265e-11, 9.1337e-10, 1.1109e-11, 3.8461e-12], device='cuda:0')" + "exp_avg_sq": "tensor([4.9089e-13, 2.4365e-12, 7.7813e-10, 2.5947e-12, 1.2675e-10, 3.9093e-10,\n 1.9669e-10, 5.1417e-11, 2.4691e-11, 4.7766e-11, 5.6145e-10, 1.4571e-10,\n 3.0490e-11, 1.1559e-10, 8.9714e-13, 3.5228e-11, 8.6944e-11, 4.1020e-11,\n 8.8113e-12, 1.6004e-11, 2.0704e-10, 4.3367e-12, 2.3288e-11, 6.8431e-11,\n 1.0927e-10, 5.6378e-11, 8.5542e-11, 1.6961e-10, 5.7860e-11, 3.0159e-10,\n 4.1047e-10, 4.7443e-10, 2.6640e-09, 7.8035e-12, 1.2540e-10, 2.7034e-12,\n 2.2539e-10, 1.2303e-11, 8.8808e-10, 2.8231e-11, 1.3903e-10, 4.3027e-12,\n 1.5772e-11, 5.7346e-10, 2.6630e-13, 2.6127e-11, 1.2364e-10, 9.2861e-11,\n 2.1863e-11, 1.6732e-10, 3.0227e-10, 8.4838e-11, 3.1349e-10, 2.6412e-11,\n 3.3984e-11, 6.9348e-10, 4.3829e-12, 1.0731e-10, 5.6408e-10, 7.1262e-10,\n 9.2290e-11, 7.6742e-11, 3.9461e-12, 5.6847e-10, 3.0868e-11, 7.3516e-10,\n 1.1816e-12, 1.8706e-10, 1.1395e-10, 1.1044e-10, 2.3533e-10, 3.6819e-10,\n 8.7160e-11, 2.7104e-10, 1.2964e-10, 4.3334e-10, 2.6364e-10, 1.0776e-10,\n 1.7357e-11, 6.8460e-13, 5.3226e-10, 4.0309e-11, 3.4200e-11, 2.1778e-10,\n 4.4728e-11, 9.2600e-12, 1.1635e-09, 3.6746e-11, 5.0701e-11, 7.7643e-13,\n 6.7843e-13, 4.5374e-11, 1.6492e-10, 3.9530e-11, 9.8154e-10, 2.8854e-11,\n 5.8406e-10, 8.0145e-10, 1.0621e-10, 8.8981e-11, 7.9695e-11, 3.6396e-11,\n 1.6361e-11, 2.4704e-10, 1.5015e-10, 9.7586e-11, 1.4886e-10, 1.5954e-11,\n 8.6786e-11, 1.1061e-10, 5.8220e-11, 1.3930e-11, 6.3256e-10, 1.4595e-11,\n 2.2914e-11, 6.6995e-10, 2.6409e-11, 1.3135e-10, 3.5213e-11, 4.5418e-11,\n 1.4125e-09, 1.2539e-10, 5.0166e-10, 3.4576e-11, 8.0564e-12, 1.5583e-09,\n 1.9580e-10, 1.2983e-11, 7.5851e-12, 2.4274e-10, 1.6852e-13, 5.5544e-11,\n 7.9947e-10, 5.0520e-10, 3.1233e-12, 5.5395e-11, 1.4552e-10, 4.0425e-11,\n 4.6082e-10, 3.4174e-10, 6.0575e-11, 5.7507e-11, 2.2998e-10, 4.1370e-12,\n 2.1380e-11, 2.9182e-11, 1.6769e-11, 2.9139e-10, 1.8841e-11, 2.9826e-11,\n 4.2317e-11, 4.3703e-11, 1.1608e-10, 7.9723e-11, 4.0146e-11, 4.0508e-10,\n 9.7699e-12, 1.2700e-09, 4.8824e-13, 2.8165e-11, 1.2030e-11, 3.8689e-11,\n 1.8017e-12, 7.0670e-11, 3.6823e-10, 2.3521e-10, 1.4777e-11, 8.1032e-11,\n 1.2290e-12, 1.5130e-11, 5.0865e-12, 1.1284e-10, 8.6032e-11, 1.1308e-10,\n 5.9079e-11, 1.5573e-11, 4.3417e-10, 2.6858e-10, 1.3294e-11, 1.2099e-12,\n 7.4310e-11, 2.0757e-10, 7.0692e-13, 4.4050e-11, 3.5988e-11, 5.8181e-10,\n 1.1180e-11, 4.4631e-12, 3.6267e-11, 1.4616e-11, 6.8715e-11, 3.7975e-12,\n 1.2224e-10, 4.7858e-11, 2.8427e-10, 4.4253e-11, 2.1156e-11, 1.2172e-10,\n 5.6654e-10, 6.8130e-11, 9.1381e-11, 7.6373e-12, 9.6225e-10, 3.0059e-12,\n 1.1446e-10, 2.3136e-13, 1.4669e-10, 1.3048e-10, 1.5994e-11, 9.8211e-11,\n 8.2874e-10, 3.7980e-11, 6.6640e-11, 7.3136e-10, 2.6972e-09, 4.0804e-11,\n 2.9282e-11, 2.4815e-11, 1.4301e-09, 2.9035e-12, 2.3400e-11, 2.7456e-09,\n 4.9441e-12, 2.0915e-10, 9.8090e-10, 1.4240e-11, 3.1322e-11, 4.4277e-10,\n 1.1507e-10, 3.4287e-11, 5.8775e-10, 1.2426e-10, 5.7832e-12, 4.7988e-11,\n 2.1018e-10, 1.9822e-10, 3.3083e-11, 4.3231e-11, 2.2282e-11, 1.3963e-11,\n 1.2653e-10, 9.8560e-11, 1.7091e-11, 1.2264e-09, 1.7145e-10, 7.3281e-12,\n 1.4091e-10, 5.6703e-10, 7.0631e-10, 6.4030e-10, 2.0294e-11, 6.7700e-11,\n 2.4365e-11, 2.6100e-10, 3.1744e-12, 1.0991e-12], device='cuda:0')" }, "52": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.2930e-13, 3.3677e-13, 6.2536e-13, ..., 4.0701e-14, 1.9296e-14,\n 3.0661e-12],\n [9.8088e-14, 2.8970e-13, 9.6089e-13, ..., 3.7956e-12, 2.2670e-13,\n 5.0259e-13],\n [5.9773e-13, 2.1491e-14, 3.1756e-13, ..., 1.8341e-12, 1.8942e-14,\n 2.6669e-14],\n ...,\n [8.3221e-10, 1.3545e-08, 1.8412e-08, ..., 3.1511e-08, 1.6084e-08,\n 7.9560e-09],\n [2.4794e-10, 4.1078e-09, 5.6095e-09, ..., 9.0654e-09, 5.1054e-09,\n 2.6476e-09],\n [1.0193e-10, 1.1980e-09, 1.7813e-09, ..., 2.9303e-09, 1.2655e-09,\n 6.2704e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.2268e-13, 9.6233e-14, 1.7870e-13, ..., 1.1631e-14, 5.5140e-15,\n 8.7616e-13],\n [2.8029e-14, 8.2784e-14, 2.7458e-13, ..., 1.0846e-12, 6.4782e-14,\n 1.4362e-13],\n [1.7080e-13, 6.1412e-15, 9.0746e-14, ..., 5.2410e-13, 5.4130e-15,\n 7.6207e-15],\n ...,\n [2.3781e-10, 3.8705e-09, 5.2613e-09, ..., 9.0047e-09, 4.5962e-09,\n 2.2735e-09],\n [7.0851e-11, 1.1738e-09, 1.6030e-09, ..., 2.5905e-09, 1.4589e-09,\n 7.5656e-10],\n [2.9127e-11, 3.4235e-10, 5.0902e-10, ..., 8.3736e-10, 3.6162e-10,\n 1.7918e-10]], device='cuda:0')" }, "53": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.3805e-12, 2.7064e-12, 8.1324e-12, 9.8798e-13, 4.0378e-12, 6.4448e-12,\n 1.8372e-12, 7.8436e-12, 1.9193e-11, 6.4870e-11, 2.3451e-12, 9.7381e-13,\n 2.7040e-11, 7.3874e-12, 5.0047e-11, 2.9547e-14, 5.0341e-12, 1.1304e-12,\n 1.3282e-11, 3.6631e-11, 3.1298e-12, 8.1735e-15, 1.4013e-13, 7.7884e-13,\n 4.7357e-12, 3.2885e-12, 7.0766e-12, 9.9049e-12, 9.1632e-13, 7.5312e-12,\n 2.0696e-11, 2.3994e-12, 1.4298e-11, 2.2629e-11, 5.0797e-11, 3.1224e-11,\n 8.0226e-12, 8.5366e-12, 1.5683e-11, 3.5205e-12, 6.2587e-13, 1.0876e-11,\n 1.1373e-11, 5.4916e-14, 6.8664e-11, 3.2665e-11, 3.1085e-13, 9.7862e-14,\n 3.7047e-12, 1.1685e-11, 1.2746e-11, 7.9052e-12, 9.0455e-12, 4.1092e-12,\n 3.7447e-11, 1.3988e-11, 2.0786e-12, 3.8740e-11, 2.9684e-13, 7.3815e-12,\n 1.0934e-11, 2.7555e-12, 2.3017e-13, 7.1222e-11, 1.7930e-12, 4.2777e-11,\n 2.6527e-13, 4.4786e-11, 8.2884e-12, 8.4211e-11, 9.9418e-11, 2.4095e-12,\n 2.3981e-11, 6.6218e-11, 4.7202e-11, 4.3890e-12, 1.1675e-10, 4.4706e-11,\n 1.6385e-10, 4.4130e-13, 4.6774e-12, 1.4029e-11, 5.5540e-12, 1.8742e-11,\n 5.9249e-11, 2.1807e-12, 4.4125e-12, 1.4295e-11, 1.7046e-10, 4.0927e-12,\n 3.2325e-11, 2.3006e-13, 1.3414e-10, 1.2896e-10, 2.2121e-11, 1.8650e-11,\n 1.4741e-13, 2.3861e-11, 2.4508e-11, 1.8647e-11, 5.4359e-11, 1.3267e-13,\n 7.0374e-13, 2.7764e-13, 1.4327e-13, 2.2681e-12, 5.7716e-13, 3.3009e-12,\n 1.3414e-11, 4.2548e-11, 6.5330e-12, 3.4117e-12, 1.9561e-12, 1.0763e-11,\n 2.9615e-12, 4.1206e-11, 1.5385e-11, 3.0028e-11, 4.6874e-11, 1.5840e-11,\n 6.3584e-13, 1.5031e-13, 2.9929e-11, 4.0423e-12, 2.7707e-12, 1.5955e-12,\n 5.1955e-11, 8.3817e-12, 1.2892e-12, 4.7587e-11, 2.0306e-12, 5.7446e-12,\n 8.1333e-12, 3.1507e-11, 5.9600e-11, 5.4952e-13, 2.2838e-12, 4.5355e-12,\n 3.2540e-12, 8.5390e-14, 1.3468e-11, 3.7458e-12, 1.0957e-12, 4.9242e-14,\n 1.0857e-11, 1.7258e-11, 2.5736e-11, 1.1282e-11, 2.0005e-12, 8.2152e-12,\n 1.9262e-13, 2.7312e-11, 2.8379e-13, 8.2323e-13, 4.1002e-11, 2.0227e-11,\n 6.5927e-14, 4.0680e-14, 2.7177e-12, 1.8191e-11, 8.1059e-11, 5.0280e-12,\n 4.8703e-12, 2.5765e-12, 7.1337e-12, 8.4138e-12, 6.8731e-12, 1.3435e-11,\n 1.5574e-13, 1.6823e-12, 1.9154e-11, 3.8573e-12, 4.8624e-12, 2.8448e-11,\n 1.7878e-14, 2.0560e-11, 2.6943e-11, 2.4516e-11, 2.0808e-13, 1.3652e-12,\n 1.5618e-12, 9.9253e-12, 2.5418e-14, 6.4303e-13, 4.6334e-12, 5.4118e-11,\n 5.1712e-12, 8.4464e-13, 2.2737e-11, 2.5778e-12, 2.2218e-12, 1.8644e-11,\n 1.3129e-11, 1.5637e-11, 8.5005e-12, 2.4176e-11, 2.9179e-14, 1.7593e-12,\n 1.3120e-11, 1.4922e-11, 2.4809e-13, 3.8441e-11, 1.8365e-12, 2.2219e-11,\n 1.2066e-12, 9.4618e-13, 3.6978e-13, 7.2779e-12, 4.6827e-11, 1.5022e-12,\n 7.6516e-12, 1.2361e-11, 2.9612e-11, 3.1475e-12, 1.6360e-13, 3.5257e-12,\n 2.0567e-12, 1.9968e-12, 9.9869e-13, 2.2313e-12, 5.6911e-13, 1.5478e-10,\n 4.1095e-11, 2.5544e-11, 7.7141e-14, 8.4773e-13, 8.4603e-12, 2.7331e-11,\n 8.1049e-11, 2.5866e-11, 9.1206e-13, 4.9672e-12, 2.8738e-11, 2.5146e-13,\n 1.6463e-11, 1.2550e-11, 3.3951e-11, 7.4178e-12, 5.3552e-12, 1.1049e-12,\n 2.2133e-12, 9.8487e-13, 2.6168e-11, 6.7452e-12, 6.4937e-12, 4.5877e-11,\n 8.0871e-12, 7.7435e-14, 1.7692e-11, 1.7946e-11, 7.4812e-11, 1.5348e-11,\n 2.2026e-11, 1.2046e-11, 1.8886e-14, 9.5404e-12, 5.1681e-26, 1.2656e-27,\n 8.0507e-27, 1.4645e-27, 8.2712e-27, 2.4624e-27, 1.3296e-26, 3.2054e-28,\n 7.2611e-27, 1.1293e-26, 1.0567e-26, 2.7145e-28, 6.8606e-30, 5.7677e-28,\n 3.2438e-27, 2.4242e-28, 5.6323e-27, 4.3143e-28, 5.1368e-27, 1.2361e-27,\n 7.9424e-29, 3.9836e-27, 2.2530e-28, 4.6423e-27, 6.5725e-29, 3.9724e-27,\n 8.7763e-27, 3.7954e-28, 2.2908e-26, 7.8134e-27, 1.2710e-27, 1.8308e-27,\n 2.3449e-27, 1.4032e-26, 1.3099e-27, 1.0797e-27, 4.3889e-28, 2.3786e-27,\n 6.4068e-27, 1.2668e-27, 2.2466e-28, 2.6049e-27, 5.2390e-28, 1.1586e-27,\n 1.0950e-27, 9.8759e-27, 4.0423e-27, 1.3542e-27, 1.4519e-27, 9.0156e-28,\n 1.8049e-27, 5.9806e-27, 6.0322e-27, 7.5603e-27, 3.0791e-27, 7.0809e-28,\n 6.2953e-27, 1.0836e-26, 1.4852e-26, 6.4825e-27, 5.2357e-27, 3.6288e-27,\n 2.4965e-28, 2.2470e-27, 1.6932e-29, 8.7699e-27, 2.3221e-26, 7.2386e-27,\n 1.9557e-28, 1.1622e-27, 7.0664e-27, 2.3630e-27, 3.8323e-27, 2.3050e-28,\n 1.9741e-27, 1.0676e-27, 3.7560e-27, 1.0581e-26, 3.6450e-26, 1.3353e-26,\n 1.4464e-27, 1.0958e-26, 1.3274e-27, 1.7711e-26, 2.1670e-27, 2.0000e-27,\n 1.7805e-26, 6.1904e-27, 3.2077e-27, 4.1045e-28, 7.1691e-28, 3.9027e-27,\n 4.0934e-27, 8.6136e-28, 3.3280e-27, 2.3665e-28, 3.1419e-28, 1.0285e-26,\n 5.4030e-28, 3.0961e-27, 4.6359e-27, 3.2337e-28, 2.8248e-27, 2.6693e-27,\n 1.0714e-27, 1.4701e-27, 9.1743e-28, 1.5314e-26, 6.2206e-27, 1.1076e-27,\n 8.4715e-28, 4.5054e-27, 4.8786e-27, 9.1623e-27, 3.8589e-27, 1.8057e-26,\n 4.9588e-27, 6.7179e-27, 1.5622e-26, 1.9181e-26, 1.1606e-26, 6.1133e-27,\n 7.2123e-27, 3.5318e-27, 8.0869e-28, 7.2445e-28, 4.0231e-26, 4.7263e-29,\n 7.6419e-27, 4.5496e-28, 1.7874e-27, 2.1785e-27, 5.6086e-27, 1.8123e-27,\n 4.7531e-28, 5.3379e-31, 1.2205e-28, 7.1795e-28, 1.6115e-27, 4.1880e-27,\n 3.9828e-28, 9.4616e-27, 3.6250e-26, 4.0921e-27, 1.1708e-26, 1.5601e-27,\n 6.4014e-27, 1.7656e-26, 8.1355e-27, 5.4522e-28, 5.8711e-28, 2.3778e-26,\n 4.0940e-27, 4.6026e-27, 2.1346e-28, 2.9286e-27, 1.0138e-26, 3.3198e-27,\n 1.7396e-29, 4.5788e-27, 1.3476e-27, 1.3117e-27, 7.4461e-27, 6.8829e-28,\n 4.8115e-28, 4.1762e-27, 2.7617e-27, 1.7685e-28, 1.6260e-27, 1.8931e-26,\n 1.1181e-27, 3.8852e-26, 1.9767e-26, 2.7854e-28, 6.4402e-27, 2.0423e-26,\n 2.0209e-26, 7.6392e-28, 9.5860e-28, 1.6744e-27, 1.1754e-27, 5.0564e-27,\n 1.0015e-28, 5.0989e-27, 2.6084e-27, 6.5613e-28, 2.3808e-27, 4.4037e-29,\n 8.2445e-27, 5.0491e-29, 1.0140e-26, 5.6604e-27, 6.2445e-27, 1.4185e-26,\n 1.2930e-26, 4.6870e-28, 2.6421e-26, 2.3999e-27, 6.1156e-27, 1.3206e-27,\n 3.9070e-29, 2.2141e-27, 3.0468e-27, 5.0823e-27, 5.8683e-28, 1.8215e-27,\n 1.1972e-28, 2.7628e-27, 4.1555e-27, 1.8203e-28, 8.1477e-27, 1.5472e-28,\n 4.6718e-27, 1.7416e-27, 8.9128e-27, 4.0794e-27, 2.3235e-28, 1.8459e-27,\n 4.4075e-27, 1.2022e-27, 1.3331e-27, 2.4289e-27, 4.7527e-27, 2.7902e-27,\n 3.6761e-27, 1.0633e-28, 4.7637e-27, 5.1222e-27, 1.3682e-26, 1.3890e-26,\n 3.4540e-28, 2.5426e-27, 3.6268e-28, 2.4244e-27, 5.1891e-29, 6.6877e-28,\n 8.0262e-28, 4.2827e-27, 1.4736e-28, 2.1720e-28, 9.5469e-28, 1.0670e-26,\n 7.6173e-28, 2.1880e-27, 5.8210e-28, 7.2911e-27, 1.5042e-26, 1.5661e-30,\n 1.4317e-26, 1.0076e-26, 1.3788e-26, 6.4891e-29, 2.2694e-27, 1.5552e-26,\n 6.8510e-28, 3.0688e-28, 1.9741e-08, 2.9818e-08, 3.1371e-10, 2.5055e-07,\n 6.8171e-08, 1.7102e-08, 4.3110e-08, 1.2146e-07, 2.4175e-09, 3.0194e-08,\n 1.9971e-07, 4.7390e-10, 1.4042e-08, 5.0886e-08, 2.5256e-07, 5.7205e-09,\n 7.4770e-08, 2.5121e-07, 1.7260e-08, 2.8902e-08, 8.0456e-08, 1.4231e-10,\n 1.0581e-07, 3.2324e-07, 2.0374e-08, 3.5230e-07, 7.4534e-09, 5.6349e-08,\n 4.6325e-08, 2.1224e-07, 4.5310e-08, 4.9152e-10, 7.9918e-09, 5.2018e-10,\n 9.7323e-08, 1.3495e-07, 1.3520e-07, 2.0762e-08, 3.9543e-09, 1.3219e-07,\n 2.5071e-07, 1.2593e-07, 1.4904e-07, 1.1630e-07, 5.0517e-08, 3.3111e-10,\n 9.0524e-08, 2.5779e-08, 1.2357e-07, 8.8062e-08, 4.9287e-07, 9.5858e-08,\n 1.6336e-08, 7.5934e-10, 4.2182e-08, 4.2073e-08, 2.7014e-07, 2.3054e-08,\n 6.4615e-08, 6.5285e-08, 6.6946e-08, 2.5697e-08, 2.4719e-08, 1.8158e-08,\n 7.0205e-09, 5.6560e-10, 1.4686e-07, 5.4765e-08, 7.4472e-08, 9.6591e-08,\n 6.9832e-09, 2.2654e-09, 1.3796e-09, 3.2762e-07, 1.0606e-08, 4.4741e-07,\n 8.5709e-08, 2.4843e-07, 1.1399e-07, 1.1534e-08, 7.2232e-08, 8.4328e-07,\n 2.9504e-08, 1.8642e-07, 6.7726e-09, 2.5339e-07, 9.1557e-08, 5.8028e-09,\n 1.6255e-09, 1.0142e-07, 1.4751e-07, 6.8805e-08, 4.9797e-10, 1.4124e-07,\n 3.6170e-07, 1.7826e-08, 7.6169e-09, 1.4783e-07, 1.3918e-08, 1.3992e-08,\n 1.8731e-08, 8.8032e-08, 2.3569e-09, 1.1555e-08, 3.1221e-08, 3.1174e-07,\n 1.8865e-07, 2.6857e-08, 2.8579e-09, 3.3495e-08, 1.5101e-08, 5.0373e-08,\n 9.4129e-08, 5.2151e-09, 7.2276e-08, 1.0059e-07, 2.6601e-08, 1.2566e-08,\n 1.7757e-07, 3.4807e-09, 1.2390e-08, 6.8112e-07, 1.2461e-07, 4.3027e-07,\n 1.5525e-07, 5.1489e-08, 3.8283e-09, 6.2970e-09, 7.7801e-08, 8.7216e-08,\n 4.8663e-08, 6.8801e-08, 1.7221e-08, 2.9136e-10, 7.4458e-08, 2.7850e-08,\n 3.9992e-08, 1.8442e-07, 2.0811e-08, 4.0683e-09, 3.2918e-11, 1.5251e-07,\n 6.6011e-08, 1.5385e-09, 4.5343e-08, 3.6653e-08, 9.5636e-09, 3.2614e-07,\n 1.3928e-08, 2.5167e-10, 4.3062e-08, 4.8713e-07, 2.5298e-08, 2.0659e-08,\n 1.9318e-07, 5.7944e-09, 2.1956e-09, 3.6506e-08, 3.1759e-11, 6.1904e-08,\n 1.5583e-07, 2.1993e-07, 6.3584e-09, 4.1581e-08, 5.7952e-08, 1.0425e-07,\n 4.9477e-08, 2.4017e-08, 3.9915e-09, 2.6075e-07, 1.6078e-08, 3.3641e-07,\n 2.1281e-07, 1.6057e-08, 5.4207e-08, 1.4079e-08, 8.6897e-10, 1.7836e-08,\n 1.4545e-07, 5.6102e-10, 1.0116e-07, 2.8908e-07, 7.6346e-10, 6.8759e-08,\n 1.9102e-07, 1.7971e-07, 4.8584e-08, 2.8078e-08, 2.1932e-09, 1.8486e-07,\n 3.3701e-09, 6.2589e-08, 2.8368e-07, 2.9363e-07, 5.1111e-08, 8.6089e-08,\n 1.5358e-07, 3.7125e-07, 3.5849e-09, 1.6467e-07, 2.0462e-08, 1.0312e-08,\n 3.8234e-07, 2.6229e-08, 1.2117e-08, 4.6776e-08, 1.5164e-08, 2.7510e-08,\n 4.2749e-08, 1.4014e-08, 8.0728e-08, 6.0557e-08, 1.9434e-07, 1.5068e-08,\n 2.2478e-09, 6.1159e-07, 1.9252e-08, 3.6212e-07, 7.3469e-10, 3.9592e-08,\n 3.0981e-08, 1.3762e-07, 1.5711e-07, 9.2762e-08, 2.0082e-07, 7.1155e-09,\n 1.2734e-07, 1.6874e-08, 7.8473e-07, 5.0801e-09, 1.8532e-08, 7.7144e-08,\n 7.8833e-08, 4.6066e-08, 1.6102e-07, 5.8267e-08, 1.6340e-08, 1.4501e-08,\n 9.8361e-08, 5.8929e-10, 1.4328e-07, 5.4075e-09, 1.3292e-11, 2.6820e-08,\n 1.3641e-07, 1.8011e-09, 3.6048e-08, 3.5134e-09, 4.7454e-09, 3.1854e-08,\n 1.1403e-07, 8.2362e-08, 6.1980e-08, 2.6902e-07, 8.1296e-08, 2.3720e-08],\n device='cuda:0')" + "exp_avg_sq": "tensor([9.6599e-13, 7.7337e-13, 2.3239e-12, 2.8232e-13, 1.1538e-12, 1.8417e-12,\n 5.2498e-13, 2.2414e-12, 5.4845e-12, 1.8537e-11, 6.7013e-13, 2.7827e-13,\n 7.7269e-12, 2.1110e-12, 1.4301e-11, 8.4434e-15, 1.4385e-12, 3.2301e-13,\n 3.7953e-12, 1.0468e-11, 8.9435e-13, 2.3356e-15, 4.0043e-14, 2.2256e-13,\n 1.3533e-12, 9.3972e-13, 2.0222e-12, 2.8304e-12, 2.6185e-13, 2.1521e-12,\n 5.9141e-12, 6.8564e-13, 4.0859e-12, 6.4664e-12, 1.4516e-11, 8.9225e-12,\n 2.2925e-12, 2.4394e-12, 4.4815e-12, 1.0060e-12, 1.7885e-13, 3.1079e-12,\n 3.2498e-12, 1.5693e-14, 1.9621e-11, 9.3342e-12, 8.8827e-14, 2.7965e-14,\n 1.0587e-12, 3.3391e-12, 3.6423e-12, 2.2590e-12, 2.5848e-12, 1.1742e-12,\n 1.0701e-11, 3.9971e-12, 5.9399e-13, 1.1070e-11, 8.4825e-14, 2.1093e-12,\n 3.1245e-12, 7.8742e-13, 6.5772e-14, 2.0352e-11, 5.1237e-13, 1.2224e-11,\n 7.5804e-14, 1.2798e-11, 2.3685e-12, 2.4064e-11, 2.8409e-11, 6.8853e-13,\n 6.8526e-12, 1.8922e-11, 1.3488e-11, 1.2542e-12, 3.3363e-11, 1.2775e-11,\n 4.6821e-11, 1.2610e-13, 1.3366e-12, 4.0089e-12, 1.5871e-12, 5.3558e-12,\n 1.6931e-11, 6.2315e-13, 1.2609e-12, 4.0849e-12, 4.8711e-11, 1.1695e-12,\n 9.2371e-12, 6.5742e-14, 3.8331e-11, 3.6851e-11, 6.3212e-12, 5.3293e-12,\n 4.2124e-14, 6.8184e-12, 7.0034e-12, 5.3285e-12, 1.5534e-11, 3.7913e-14,\n 2.0110e-13, 7.9337e-14, 4.0942e-14, 6.4814e-13, 1.6493e-13, 9.4325e-13,\n 3.8332e-12, 1.2158e-11, 1.8669e-12, 9.7492e-13, 5.5897e-13, 3.0755e-12,\n 8.4626e-13, 1.1775e-11, 4.3964e-12, 8.5807e-12, 1.3395e-11, 4.5265e-12,\n 1.8170e-13, 4.2953e-14, 8.5525e-12, 1.1551e-12, 7.9174e-13, 4.5594e-13,\n 1.4847e-11, 2.3952e-12, 3.6840e-13, 1.3598e-11, 5.8025e-13, 1.6416e-12,\n 2.3241e-12, 9.0034e-12, 1.7031e-11, 1.5703e-13, 6.5262e-13, 1.2961e-12,\n 9.2986e-13, 2.4401e-14, 3.8486e-12, 1.0704e-12, 3.1311e-13, 1.4071e-14,\n 3.1025e-12, 4.9315e-12, 7.3543e-12, 3.2241e-12, 5.7166e-13, 2.3476e-12,\n 5.5043e-14, 7.8045e-12, 8.1097e-14, 2.3525e-13, 1.1717e-11, 5.7800e-12,\n 1.8839e-14, 1.1625e-14, 7.7660e-13, 5.1983e-12, 2.3163e-11, 1.4368e-12,\n 1.3917e-12, 7.3625e-13, 2.0385e-12, 2.4043e-12, 1.9640e-12, 3.8391e-12,\n 4.4503e-14, 4.8072e-13, 5.4733e-12, 1.1023e-12, 1.3895e-12, 8.1293e-12,\n 5.1086e-15, 5.8752e-12, 7.6991e-12, 7.0058e-12, 5.9461e-14, 3.9010e-13,\n 4.4629e-13, 2.8362e-12, 7.2634e-15, 1.8375e-13, 1.3240e-12, 1.5465e-11,\n 1.4777e-12, 2.4136e-13, 6.4973e-12, 7.3662e-13, 6.3489e-13, 5.3276e-12,\n 3.7518e-12, 4.4684e-12, 2.4291e-12, 6.9084e-12, 8.3382e-15, 5.0274e-13,\n 3.7492e-12, 4.2641e-12, 7.0893e-14, 1.0985e-11, 5.2478e-13, 6.3493e-12,\n 3.4481e-13, 2.7038e-13, 1.0567e-13, 2.0797e-12, 1.3381e-11, 4.2926e-13,\n 2.1865e-12, 3.5322e-12, 8.4619e-12, 8.9942e-13, 4.6750e-14, 1.0075e-12,\n 5.8771e-13, 5.7060e-13, 2.8538e-13, 6.3761e-13, 1.6263e-13, 4.4229e-11,\n 1.1743e-11, 7.2994e-12, 2.2044e-14, 2.4224e-13, 2.4176e-12, 7.8100e-12,\n 2.3160e-11, 7.3913e-12, 2.6063e-13, 1.4194e-12, 8.2121e-12, 7.1858e-14,\n 4.7045e-12, 3.5864e-12, 9.7017e-12, 2.1197e-12, 1.5303e-12, 3.1574e-13,\n 6.3246e-13, 2.8143e-13, 7.4777e-12, 1.9275e-12, 1.8556e-12, 1.3110e-11,\n 2.3109e-12, 2.2128e-14, 5.0556e-12, 5.1281e-12, 2.1378e-11, 4.3857e-12,\n 6.2941e-12, 3.4421e-12, 5.3969e-15, 2.7262e-12, 1.4768e-26, 3.6166e-28,\n 2.3005e-27, 4.1848e-28, 2.3636e-27, 7.0365e-28, 3.7993e-27, 9.1597e-29,\n 2.0749e-27, 3.2270e-27, 3.0197e-27, 7.7570e-29, 1.9605e-30, 1.6482e-28,\n 9.2693e-28, 6.9273e-29, 1.6095e-27, 1.2328e-28, 1.4679e-27, 3.5322e-28,\n 2.2696e-29, 1.1383e-27, 6.4381e-29, 1.3266e-27, 1.8781e-29, 1.1351e-27,\n 2.5079e-27, 1.0846e-28, 6.5462e-27, 2.2327e-27, 3.6319e-28, 5.2318e-28,\n 6.7009e-28, 4.0099e-27, 3.7432e-28, 3.0853e-28, 1.2542e-28, 6.7971e-28,\n 1.8308e-27, 3.6199e-28, 6.4199e-29, 7.4438e-28, 1.4971e-28, 3.3108e-28,\n 3.1291e-28, 2.8221e-27, 1.1551e-27, 3.8699e-28, 4.1489e-28, 2.5763e-28,\n 5.1577e-28, 1.7090e-27, 1.7237e-27, 2.1604e-27, 8.7987e-28, 2.0234e-28,\n 1.7989e-27, 3.0965e-27, 4.2440e-27, 1.8524e-27, 1.4961e-27, 1.0369e-27,\n 7.1340e-29, 6.4209e-28, 4.8386e-30, 2.5061e-27, 6.6355e-27, 2.0685e-27,\n 5.5885e-29, 3.3210e-28, 2.0193e-27, 6.7523e-28, 1.0951e-27, 6.5867e-29,\n 5.6411e-28, 3.0509e-28, 1.0733e-27, 3.0235e-27, 1.0416e-26, 3.8156e-27,\n 4.1333e-28, 3.1313e-27, 3.7932e-28, 5.0612e-27, 6.1922e-28, 5.7150e-28,\n 5.0879e-27, 1.7690e-27, 9.1663e-28, 1.1729e-28, 2.0486e-28, 1.1152e-27,\n 1.1697e-27, 2.4614e-28, 9.5100e-28, 6.7625e-29, 8.9783e-29, 2.9391e-27,\n 1.5440e-28, 8.8474e-28, 1.3247e-27, 9.2406e-29, 8.0721e-28, 7.6277e-28,\n 3.0616e-28, 4.2008e-28, 2.6216e-28, 4.3762e-27, 1.7776e-27, 3.1651e-28,\n 2.4208e-28, 1.2875e-27, 1.3941e-27, 2.6182e-27, 1.1027e-27, 5.1599e-27,\n 1.4170e-27, 1.9197e-27, 4.4641e-27, 5.4811e-27, 3.3166e-27, 1.7469e-27,\n 2.0610e-27, 1.0092e-27, 2.3109e-28, 2.0702e-28, 1.1496e-26, 1.3506e-29,\n 2.1837e-27, 1.3001e-28, 5.1075e-28, 6.2252e-28, 1.6027e-27, 5.1788e-28,\n 1.3582e-28, 1.5254e-31, 3.4878e-29, 2.0516e-28, 4.6051e-28, 1.1967e-27,\n 1.1381e-28, 2.7037e-27, 1.0359e-26, 1.1694e-27, 3.3457e-27, 4.4580e-28,\n 1.8292e-27, 5.0454e-27, 2.3248e-27, 1.5580e-28, 1.6777e-28, 6.7946e-27,\n 1.1699e-27, 1.3152e-27, 6.0998e-29, 8.3686e-28, 2.8971e-27, 9.4865e-28,\n 4.9710e-30, 1.3084e-27, 3.8510e-28, 3.7484e-28, 2.1278e-27, 1.9669e-28,\n 1.3749e-28, 1.1934e-27, 7.8919e-28, 5.0536e-29, 4.6463e-28, 5.4097e-27,\n 3.1949e-28, 1.1102e-26, 5.6487e-27, 7.9594e-29, 1.8403e-27, 5.8360e-27,\n 5.7748e-27, 2.1830e-28, 2.7393e-28, 4.7848e-28, 3.3589e-28, 1.4449e-27,\n 2.8619e-29, 1.4570e-27, 7.4537e-28, 1.8749e-28, 6.8033e-28, 1.2584e-29,\n 2.3559e-27, 1.4428e-29, 2.8977e-27, 1.6175e-27, 1.7844e-27, 4.0535e-27,\n 3.6950e-27, 1.3393e-28, 7.5500e-27, 6.8578e-28, 1.7476e-27, 3.7738e-28,\n 1.1165e-29, 6.3270e-28, 8.7065e-28, 1.4523e-27, 1.6769e-28, 5.2050e-28,\n 3.4211e-29, 7.8949e-28, 1.1875e-27, 5.2015e-29, 2.3283e-27, 4.4213e-29,\n 1.3350e-27, 4.9767e-28, 2.5469e-27, 1.1657e-27, 6.6396e-29, 5.2748e-28,\n 1.2595e-27, 3.4355e-28, 3.8094e-28, 6.9408e-28, 1.3581e-27, 7.9731e-28,\n 1.0505e-27, 3.0384e-29, 1.3613e-27, 1.4637e-27, 3.9097e-27, 3.9691e-27,\n 9.8702e-29, 7.2656e-28, 1.0364e-28, 6.9280e-28, 1.4828e-29, 1.9111e-28,\n 2.2935e-28, 1.2238e-27, 4.2109e-29, 6.2066e-29, 2.7281e-28, 3.0489e-27,\n 2.1767e-28, 6.2523e-28, 1.6634e-28, 2.0835e-27, 4.2983e-27, 4.4752e-31,\n 4.0911e-27, 2.8794e-27, 3.9400e-27, 1.8543e-29, 6.4850e-28, 4.4441e-27,\n 1.9577e-28, 8.7693e-29, 5.6413e-09, 8.5206e-09, 8.9646e-11, 7.1598e-08,\n 1.9480e-08, 4.8869e-09, 1.2319e-08, 3.4708e-08, 6.9081e-10, 8.6282e-09,\n 5.7069e-08, 1.3542e-10, 4.0128e-09, 1.4541e-08, 7.2172e-08, 1.6347e-09,\n 2.1366e-08, 7.1786e-08, 4.9321e-09, 8.2589e-09, 2.2991e-08, 4.0668e-11,\n 3.0236e-08, 9.2369e-08, 5.8221e-09, 1.0067e-07, 2.1299e-09, 1.6102e-08,\n 1.3238e-08, 6.0649e-08, 1.2948e-08, 1.4045e-10, 2.2837e-09, 1.4865e-10,\n 2.7811e-08, 3.8563e-08, 3.8636e-08, 5.9329e-09, 1.1300e-09, 3.7776e-08,\n 7.1642e-08, 3.5985e-08, 4.2588e-08, 3.3233e-08, 1.4436e-08, 9.4618e-11,\n 2.5868e-08, 7.3666e-09, 3.5312e-08, 2.5164e-08, 1.4084e-07, 2.7392e-08,\n 4.6680e-09, 2.1699e-10, 1.2054e-08, 1.2023e-08, 7.7194e-08, 6.5879e-09,\n 1.8464e-08, 1.8656e-08, 1.9130e-08, 7.3431e-09, 7.0636e-09, 5.1888e-09,\n 2.0062e-09, 1.6162e-10, 4.1966e-08, 1.5650e-08, 2.1281e-08, 2.7602e-08,\n 1.9955e-09, 6.4735e-10, 3.9423e-10, 9.3620e-08, 3.0307e-09, 1.2785e-07,\n 2.4492e-08, 7.0990e-08, 3.2573e-08, 3.2960e-09, 2.0641e-08, 2.4098e-07,\n 8.4311e-09, 5.3271e-08, 1.9353e-09, 7.2408e-08, 2.6163e-08, 1.6582e-09,\n 4.6449e-10, 2.8980e-08, 4.2153e-08, 1.9662e-08, 1.4230e-10, 4.0361e-08,\n 1.0336e-07, 5.0938e-09, 2.1766e-09, 4.2244e-08, 3.9771e-09, 3.9983e-09,\n 5.3525e-09, 2.5156e-08, 6.7352e-10, 3.3019e-09, 8.9217e-09, 8.9081e-08,\n 5.3908e-08, 7.6747e-09, 8.1665e-10, 9.5714e-09, 4.3151e-09, 1.4395e-08,\n 2.6898e-08, 1.4903e-09, 2.0653e-08, 2.8744e-08, 7.6015e-09, 3.5908e-09,\n 5.0741e-08, 9.9465e-10, 3.5407e-09, 1.9464e-07, 3.5608e-08, 1.2295e-07,\n 4.4363e-08, 1.4713e-08, 1.0940e-09, 1.7994e-09, 2.2232e-08, 2.4923e-08,\n 1.3906e-08, 1.9661e-08, 4.9210e-09, 8.3257e-11, 2.1277e-08, 7.9584e-09,\n 1.1428e-08, 5.2699e-08, 5.9468e-09, 1.1626e-09, 9.4067e-12, 4.3580e-08,\n 1.8863e-08, 4.3963e-10, 1.2957e-08, 1.0474e-08, 2.7329e-09, 9.3196e-08,\n 3.9799e-09, 7.1916e-11, 1.2305e-08, 1.3920e-07, 7.2292e-09, 5.9036e-09,\n 5.5204e-08, 1.6558e-09, 6.2742e-10, 1.0432e-08, 9.0754e-12, 1.7690e-08,\n 4.4528e-08, 6.2847e-08, 1.8170e-09, 1.1882e-08, 1.6560e-08, 2.9789e-08,\n 1.4138e-08, 6.8629e-09, 1.1406e-09, 7.4511e-08, 4.5943e-09, 9.6131e-08,\n 6.0814e-08, 4.5883e-09, 1.5490e-08, 4.0232e-09, 2.4832e-10, 5.0967e-09,\n 4.1563e-08, 1.6032e-10, 2.8908e-08, 8.2607e-08, 2.1816e-10, 1.9648e-08,\n 5.4585e-08, 5.1354e-08, 1.3883e-08, 8.0234e-09, 6.2674e-10, 5.2826e-08,\n 9.6304e-10, 1.7885e-08, 8.1065e-08, 8.3908e-08, 1.4606e-08, 2.4601e-08,\n 4.3888e-08, 1.0609e-07, 1.0244e-09, 4.7056e-08, 5.8473e-09, 2.9468e-09,\n 1.0926e-07, 7.4952e-09, 3.4626e-09, 1.3367e-08, 4.3331e-09, 7.8613e-09,\n 1.2216e-08, 4.0047e-09, 2.3069e-08, 1.7305e-08, 5.5534e-08, 4.3059e-09,\n 6.4233e-10, 1.7477e-07, 5.5015e-09, 1.0348e-07, 2.0994e-10, 1.1314e-08,\n 8.8530e-09, 3.9325e-08, 4.4897e-08, 2.6507e-08, 5.7387e-08, 2.0333e-09,\n 3.6388e-08, 4.8218e-09, 2.2424e-07, 1.4517e-09, 5.2956e-09, 2.2044e-08,\n 2.2527e-08, 1.3164e-08, 4.6014e-08, 1.6650e-08, 4.6693e-09, 4.1437e-09,\n 2.8107e-08, 1.6839e-10, 4.0942e-08, 1.5452e-09, 3.7984e-12, 7.6640e-09,\n 3.8981e-08, 5.1469e-10, 1.0301e-08, 1.0040e-09, 1.3560e-09, 9.1027e-09,\n 3.2586e-08, 2.3536e-08, 1.7711e-08, 7.6874e-08, 2.3231e-08, 6.7780e-09],\n device='cuda:0')" }, "54": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.4434e-09, 1.7839e-10, 1.2424e-10, ..., 4.1215e-10, 2.1877e-09,\n 2.8533e-09],\n [2.8695e-09, 1.3143e-10, 9.4008e-11, ..., 3.4807e-10, 1.2431e-09,\n 1.6278e-09],\n [5.2489e-08, 1.1901e-09, 1.0407e-09, ..., 1.7223e-09, 2.0985e-08,\n 2.6356e-08],\n ...,\n [1.6510e-08, 4.1339e-10, 3.0003e-10, ..., 7.5960e-10, 6.6014e-09,\n 8.7650e-09],\n [4.0952e-10, 3.1015e-11, 9.5972e-12, ..., 1.5631e-10, 1.6103e-10,\n 1.5033e-10],\n [9.2564e-10, 7.4983e-11, 5.8380e-11, ..., 2.5131e-10, 4.5530e-10,\n 4.7888e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.5555e-09, 5.0975e-11, 3.5502e-11, ..., 1.1778e-10, 6.2514e-10,\n 8.1535e-10],\n [8.1997e-10, 3.7558e-11, 2.6864e-11, ..., 9.9465e-11, 3.5523e-10,\n 4.6516e-10],\n [1.4999e-08, 3.4008e-10, 2.9738e-10, ..., 4.9217e-10, 5.9968e-09,\n 7.5315e-09],\n ...,\n [4.7178e-09, 1.1813e-10, 8.5736e-11, ..., 2.1706e-10, 1.8864e-09,\n 2.5047e-09],\n [1.1702e-10, 8.8628e-12, 2.7425e-12, ..., 4.4666e-11, 4.6017e-11,\n 4.2958e-11],\n [2.6451e-10, 2.1427e-11, 1.6683e-11, ..., 7.1814e-11, 1.3010e-10,\n 1.3684e-10]], device='cuda:0')" }, "55": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([9.7427e-08, 5.2193e-08, 9.2730e-07, 4.9945e-07, 3.7437e-07, 5.1105e-10,\n 1.2446e-07, 1.2918e-07, 2.2319e-07, 2.2275e-08, 4.1187e-07, 5.4207e-09,\n 3.2647e-07, 1.9129e-10, 3.2127e-07, 7.8444e-10, 1.7226e-08, 1.5221e-07,\n 1.4243e-07, 2.4401e-07, 3.4944e-07, 5.3406e-08, 4.0863e-07, 1.8428e-07,\n 1.0291e-06, 2.7800e-07, 2.2149e-07, 5.1656e-08, 2.0655e-07, 4.9759e-07,\n 2.6058e-07, 9.0863e-07, 1.0113e-07, 1.1706e-08, 4.1967e-08, 4.5034e-08,\n 1.1552e-07, 3.8261e-07, 2.3706e-07, 4.8787e-08, 1.3213e-07, 3.7046e-07,\n 3.8959e-09, 1.4381e-07, 2.5622e-07, 9.0186e-08, 9.4469e-08, 2.9799e-07,\n 1.1134e-06, 5.5526e-08, 4.2286e-07, 2.1584e-07, 1.0534e-07, 8.9367e-08,\n 2.3222e-09, 8.1666e-07, 1.5420e-08, 3.4296e-07, 8.3493e-08, 8.0518e-07,\n 4.5733e-07, 3.1176e-07, 9.8574e-09, 1.6608e-07, 7.1366e-08, 7.8576e-08,\n 8.6404e-08, 8.1865e-07, 8.1955e-09, 3.8843e-07, 2.9373e-09, 3.1463e-07,\n 5.2072e-09, 3.2246e-07, 1.9626e-09, 8.6147e-08, 7.1818e-07, 4.1084e-08,\n 3.0371e-07, 5.7687e-07, 2.6702e-07, 1.8878e-07, 2.1761e-07, 3.0014e-07,\n 2.7184e-07, 3.8336e-07, 1.3160e-07, 1.8474e-07, 2.2536e-07, 1.2369e-08,\n 6.4542e-08, 8.1075e-08, 1.4338e-06, 3.9101e-08, 1.9254e-07, 2.0125e-10,\n 1.0323e-08, 3.2797e-09, 7.2564e-10, 2.2313e-08, 1.4333e-06, 1.0201e-08,\n 1.3574e-07, 1.1914e-06, 9.3816e-08, 2.4867e-08, 3.9060e-08, 1.0036e-08,\n 2.7437e-07, 6.5249e-07, 3.6990e-09, 1.9181e-07, 7.6413e-08, 7.4743e-08,\n 2.4378e-07, 5.7536e-08, 1.4273e-08, 8.0696e-09, 1.8574e-07, 1.4480e-07,\n 2.8497e-09, 2.0361e-07, 1.4403e-08, 1.4520e-06, 5.9560e-07, 1.4949e-06,\n 5.5694e-07, 3.5024e-08, 3.0969e-08, 8.6664e-08, 6.7288e-07, 1.1024e-08,\n 6.9186e-08, 3.1050e-07, 1.0874e-09, 4.3597e-09, 1.9325e-07, 4.5478e-07,\n 2.2255e-07, 5.3648e-08, 9.9846e-08, 2.6217e-07, 1.3643e-07, 1.2648e-07,\n 1.3721e-07, 3.0364e-07, 1.1325e-07, 1.7292e-07, 1.2241e-07, 7.9339e-07,\n 4.4155e-10, 9.0914e-08, 1.1112e-08, 9.1860e-09, 1.9179e-06, 8.3493e-07,\n 4.1598e-08, 7.7987e-07, 1.7706e-07, 1.0881e-06, 2.1953e-08, 2.2485e-08,\n 1.4442e-07, 6.1340e-10, 1.0600e-06, 1.7781e-07, 2.3401e-07, 3.7440e-08,\n 1.6731e-07, 7.6755e-07, 1.0512e-08, 1.8175e-07, 2.4728e-07, 6.7934e-08,\n 7.5378e-07, 1.8483e-07, 1.4735e-07, 5.1871e-09, 1.3245e-07, 3.9901e-08,\n 3.5192e-07, 1.6310e-07, 2.4625e-08, 5.0259e-08, 4.8644e-09, 9.7327e-08,\n 4.2051e-08, 6.2123e-07, 1.5360e-08, 5.8888e-08, 5.0887e-08, 1.1234e-08,\n 3.6767e-08, 3.9913e-07, 1.4469e-07, 1.2697e-07, 3.8717e-07, 2.1117e-07,\n 2.2724e-07, 1.0568e-07, 1.1303e-07, 4.7550e-07, 2.9045e-07, 7.8405e-09,\n 4.9996e-07, 1.7004e-08, 3.8506e-07, 5.0939e-07, 5.2427e-07, 5.0409e-07,\n 1.4442e-07, 1.0930e-07, 4.6672e-07, 1.8724e-08, 4.2709e-07, 2.0230e-07,\n 2.7237e-10, 2.9442e-07, 7.0228e-10, 2.1976e-07, 1.2667e-07, 3.1634e-06,\n 3.6989e-07, 1.1303e-06, 1.0309e-08, 5.1111e-08, 2.3843e-07, 1.8287e-08,\n 6.5854e-08, 3.6715e-09, 1.1337e-07, 2.8966e-07, 3.0414e-07, 4.5633e-08,\n 1.0813e-06, 1.4162e-07, 6.2555e-08, 6.8277e-09, 6.1479e-08, 7.9651e-10,\n 3.4042e-08, 2.5733e-07, 1.8850e-08, 1.7226e-08, 1.2391e-07, 7.7653e-07,\n 1.2969e-07, 4.0378e-07, 1.9765e-07, 9.6381e-08, 8.1458e-07, 1.8939e-08,\n 6.3938e-07, 2.9462e-07, 5.5584e-09, 1.6220e-08], device='cuda:0')" + "exp_avg_sq": "tensor([2.7840e-08, 1.4914e-08, 2.6498e-07, 1.4272e-07, 1.0698e-07, 1.4604e-10,\n 3.5565e-08, 3.6914e-08, 6.3778e-08, 6.3653e-09, 1.1769e-07, 1.5490e-09,\n 9.3293e-08, 5.4663e-11, 9.1807e-08, 2.2416e-10, 4.9224e-09, 4.3495e-08,\n 4.0701e-08, 6.9728e-08, 9.9854e-08, 1.5261e-08, 1.1677e-07, 5.2660e-08,\n 2.9408e-07, 7.9440e-08, 6.3294e-08, 1.4761e-08, 5.9022e-08, 1.4219e-07,\n 7.4463e-08, 2.5965e-07, 2.8899e-08, 3.3452e-09, 1.1992e-08, 1.2869e-08,\n 3.3009e-08, 1.0934e-07, 6.7741e-08, 1.3941e-08, 3.7757e-08, 1.0586e-07,\n 1.1133e-09, 4.1094e-08, 7.3216e-08, 2.5771e-08, 2.6995e-08, 8.5153e-08,\n 3.1818e-07, 1.5867e-08, 1.2083e-07, 6.1679e-08, 3.0101e-08, 2.5537e-08,\n 6.6358e-10, 2.3337e-07, 4.4064e-09, 9.8003e-08, 2.3859e-08, 2.3009e-07,\n 1.3069e-07, 8.9088e-08, 2.8168e-09, 4.7459e-08, 2.0393e-08, 2.2454e-08,\n 2.4691e-08, 2.3393e-07, 2.3419e-09, 1.1100e-07, 8.3937e-10, 8.9908e-08,\n 1.4880e-09, 9.2145e-08, 5.6082e-10, 2.4617e-08, 2.0522e-07, 1.1740e-08,\n 8.6787e-08, 1.6485e-07, 7.6304e-08, 5.3946e-08, 6.2183e-08, 8.5768e-08,\n 7.7680e-08, 1.0955e-07, 3.7605e-08, 5.2790e-08, 6.4399e-08, 3.5345e-09,\n 1.8443e-08, 2.3168e-08, 4.0973e-07, 1.1173e-08, 5.5019e-08, 5.7509e-11,\n 2.9498e-09, 9.3719e-10, 2.0736e-10, 6.3760e-09, 4.0957e-07, 2.9149e-09,\n 3.8789e-08, 3.4046e-07, 2.6809e-08, 7.1059e-09, 1.1162e-08, 2.8679e-09,\n 7.8404e-08, 1.8645e-07, 1.0570e-09, 5.4811e-08, 2.1836e-08, 2.1358e-08,\n 6.9662e-08, 1.6441e-08, 4.0785e-09, 2.3060e-09, 5.3077e-08, 4.1377e-08,\n 8.1432e-10, 5.8184e-08, 4.1158e-09, 4.1491e-07, 1.7020e-07, 4.2717e-07,\n 1.5915e-07, 1.0008e-08, 8.8496e-09, 2.4765e-08, 1.9228e-07, 3.1502e-09,\n 1.9770e-08, 8.8728e-08, 3.1074e-10, 1.2458e-09, 5.5222e-08, 1.2996e-07,\n 6.3596e-08, 1.5330e-08, 2.8532e-08, 7.4917e-08, 3.8985e-08, 3.6143e-08,\n 3.9208e-08, 8.6769e-08, 3.2363e-08, 4.9413e-08, 3.4979e-08, 2.2672e-07,\n 1.2618e-10, 2.5979e-08, 3.1752e-09, 2.6250e-09, 5.4804e-07, 2.3859e-07,\n 1.1887e-08, 2.2285e-07, 5.0597e-08, 3.1095e-07, 6.2732e-09, 6.4251e-09,\n 4.1269e-08, 1.7529e-10, 3.0291e-07, 5.0812e-08, 6.6871e-08, 1.0699e-08,\n 4.7810e-08, 2.1933e-07, 3.0039e-09, 5.1935e-08, 7.0662e-08, 1.9413e-08,\n 2.1540e-07, 5.2816e-08, 4.2105e-08, 1.4822e-09, 3.7850e-08, 1.1402e-08,\n 1.0056e-07, 4.6608e-08, 7.0368e-09, 1.4362e-08, 1.3900e-09, 2.7812e-08,\n 1.2016e-08, 1.7752e-07, 4.3894e-09, 1.6828e-08, 1.4541e-08, 3.2103e-09,\n 1.0507e-08, 1.1406e-07, 4.1346e-08, 3.6283e-08, 1.1064e-07, 6.0342e-08,\n 6.4936e-08, 3.0198e-08, 3.2300e-08, 1.3588e-07, 8.3000e-08, 2.2405e-09,\n 1.4287e-07, 4.8590e-09, 1.1003e-07, 1.4556e-07, 1.4981e-07, 1.4405e-07,\n 4.1268e-08, 3.1234e-08, 1.3337e-07, 5.3506e-09, 1.2204e-07, 5.7810e-08,\n 7.7831e-11, 8.4132e-08, 2.0068e-10, 6.2798e-08, 3.6198e-08, 9.0396e-07,\n 1.0570e-07, 3.2299e-07, 2.9459e-09, 1.4605e-08, 6.8133e-08, 5.2256e-09,\n 1.8818e-08, 1.0492e-09, 3.2396e-08, 8.2772e-08, 8.6912e-08, 1.3040e-08,\n 3.0898e-07, 4.0469e-08, 1.7876e-08, 1.9511e-09, 1.7568e-08, 2.2761e-10,\n 9.7278e-09, 7.3534e-08, 5.3865e-09, 4.9226e-09, 3.5408e-08, 2.2190e-07,\n 3.7059e-08, 1.1538e-07, 5.6480e-08, 2.7542e-08, 2.3277e-07, 5.4118e-09,\n 1.8271e-07, 8.4190e-08, 1.5884e-09, 4.6350e-09], device='cuda:0')" }, "56": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.3897e-06, 5.1840e-07, 1.4343e-06, ..., 6.2110e-07, 5.3174e-08,\n 8.1584e-09],\n [2.1533e-08, 5.6116e-09, 1.2739e-08, ..., 5.1202e-09, 5.0719e-10,\n 1.0081e-10],\n [2.9327e-08, 6.1882e-09, 1.7499e-08, ..., 7.7531e-09, 6.5603e-10,\n 9.4048e-11],\n ...,\n [2.3976e-08, 5.0555e-09, 1.4473e-08, ..., 6.1616e-09, 5.6814e-10,\n 1.1808e-10],\n [2.9702e-08, 6.4139e-09, 1.7854e-08, ..., 7.7272e-09, 5.5072e-10,\n 5.5245e-11],\n [2.6357e-08, 5.5610e-09, 1.6484e-08, ..., 7.0557e-09, 6.3659e-10,\n 1.6132e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.8286e-07, 1.4814e-07, 4.0987e-07, ..., 1.7749e-07, 1.5195e-08,\n 2.3313e-09],\n [6.1532e-09, 1.6036e-09, 3.6404e-09, ..., 1.4631e-09, 1.4493e-10,\n 2.8807e-11],\n [8.3805e-09, 1.7683e-09, 5.0006e-09, ..., 2.2155e-09, 1.8747e-10,\n 2.6875e-11],\n ...,\n [6.8514e-09, 1.4446e-09, 4.1357e-09, ..., 1.7607e-09, 1.6235e-10,\n 3.3743e-11],\n [8.4876e-09, 1.8328e-09, 5.1018e-09, ..., 2.2081e-09, 1.5737e-10,\n 1.5787e-11],\n [7.5318e-09, 1.5891e-09, 4.7106e-09, ..., 2.0162e-09, 1.8191e-10,\n 4.6098e-11]], device='cuda:0')" }, "57": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([6.1598e-05, 5.5846e-07, 7.5429e-07, 7.8274e-07, 8.9742e-07, 1.0309e-06,\n 8.1960e-07, 6.0696e-07, 7.6664e-07, 6.8128e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.7602e-05, 1.5959e-07, 2.1555e-07, 2.2367e-07, 2.5645e-07, 2.9458e-07,\n 2.3421e-07, 1.7344e-07, 2.1907e-07, 1.9468e-07], device='cuda:0')" }, "58": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.3894e-06, 5.1831e-07, 1.4339e-06, ..., 6.2110e-07, 5.3053e-08,\n 8.0760e-09],\n [2.1529e-08, 5.6102e-09, 1.2733e-08, ..., 5.1202e-09, 5.0541e-10,\n 9.9605e-11],\n [2.9324e-08, 6.1871e-09, 1.7495e-08, ..., 7.7530e-09, 6.5469e-10,\n 9.3129e-11],\n ...,\n [2.3972e-08, 5.0539e-09, 1.4466e-08, ..., 6.1616e-09, 5.6614e-10,\n 1.1671e-10],\n [2.9701e-08, 6.4135e-09, 1.7852e-08, ..., 7.7272e-09, 5.5023e-10,\n 5.4919e-11],\n [2.6350e-08, 5.5581e-09, 1.6472e-08, ..., 7.0557e-09, 6.3303e-10,\n 1.5891e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.8279e-07, 1.4811e-07, 4.0975e-07, ..., 1.7748e-07, 1.5160e-08,\n 2.3078e-09],\n [6.1521e-09, 1.6032e-09, 3.6387e-09, ..., 1.4631e-09, 1.4443e-10,\n 2.8463e-11],\n [8.3797e-09, 1.7680e-09, 4.9992e-09, ..., 2.2155e-09, 1.8708e-10,\n 2.6612e-11],\n ...,\n [6.8502e-09, 1.4442e-09, 4.1338e-09, ..., 1.7607e-09, 1.6178e-10,\n 3.3352e-11],\n [8.4873e-09, 1.8327e-09, 5.1014e-09, ..., 2.2081e-09, 1.5723e-10,\n 1.5693e-11],\n [7.5298e-09, 1.5883e-09, 4.7071e-09, ..., 2.0162e-09, 1.8089e-10,\n 4.5409e-11]], device='cuda:0')" }, "59": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([6.1598e-05, 5.5846e-07, 7.5429e-07, 7.8273e-07, 8.9742e-07, 1.0309e-06,\n 8.1960e-07, 6.0696e-07, 7.6664e-07, 6.8127e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.7602e-05, 1.5958e-07, 2.1554e-07, 2.2367e-07, 2.5644e-07, 2.9458e-07,\n 2.3421e-07, 1.7344e-07, 2.1907e-07, 1.9468e-07], device='cuda:0')" }, "60": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.3897e-06, 5.1840e-07, 1.4343e-06, ..., 6.2110e-07, 5.3174e-08,\n 8.1584e-09],\n [2.1533e-08, 5.6116e-09, 1.2739e-08, ..., 5.1202e-09, 5.0719e-10,\n 1.0081e-10],\n [2.9327e-08, 6.1882e-09, 1.7499e-08, ..., 7.7531e-09, 6.5603e-10,\n 9.4048e-11],\n ...,\n [2.3976e-08, 5.0555e-09, 1.4473e-08, ..., 6.1616e-09, 5.6814e-10,\n 1.1808e-10],\n [2.9702e-08, 6.4139e-09, 1.7854e-08, ..., 7.7272e-09, 5.5072e-10,\n 5.5245e-11],\n [2.6357e-08, 5.5610e-09, 1.6484e-08, ..., 7.0557e-09, 6.3659e-10,\n 1.6132e-10]], device='cuda:0')" + "exp_avg_sq": "tensor([[6.8286e-07, 1.4814e-07, 4.0987e-07, ..., 1.7749e-07, 1.5195e-08,\n 2.3313e-09],\n [6.1532e-09, 1.6036e-09, 3.6404e-09, ..., 1.4631e-09, 1.4493e-10,\n 2.8807e-11],\n [8.3805e-09, 1.7683e-09, 5.0006e-09, ..., 2.2155e-09, 1.8747e-10,\n 2.6875e-11],\n ...,\n [6.8514e-09, 1.4446e-09, 4.1357e-09, ..., 1.7607e-09, 1.6235e-10,\n 3.3743e-11],\n [8.4876e-09, 1.8328e-09, 5.1018e-09, ..., 2.2081e-09, 1.5737e-10,\n 1.5787e-11],\n [7.5318e-09, 1.5891e-09, 4.7106e-09, ..., 2.0162e-09, 1.8191e-10,\n 4.6098e-11]], device='cuda:0')" }, "61": { - "step": "tensor(1252.)", + "step": "tensor(2504.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([6.1598e-05, 5.5846e-07, 7.5429e-07, 7.8274e-07, 8.9742e-07, 1.0309e-06,\n 8.1960e-07, 6.0696e-07, 7.6664e-07, 6.8128e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.7602e-05, 1.5959e-07, 2.1555e-07, 2.2367e-07, 2.5645e-07, 2.9458e-07,\n 2.3421e-07, 1.7344e-07, 2.1907e-07, 1.9468e-07], device='cuda:0')" + }, + "8": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.6940e-08, 1.0421e-07, -1.3288e-06, ..., -5.9247e-07,\n 0.0000e+00, 8.0896e-08],\n [-7.7066e-07, 4.9276e-07, -5.0040e-06, ..., 1.7518e-07,\n 0.0000e+00, -8.5854e-07],\n [-3.6757e-07, -6.2345e-08, -4.6319e-07, ..., -1.1075e-06,\n 0.0000e+00, -7.0942e-08],\n ...,\n [ 4.8650e-07, 2.2015e-07, 5.6982e-07, ..., -3.1488e-07,\n 0.0000e+00, -1.3362e-06],\n [-4.5949e-08, 3.4063e-07, -4.5927e-06, ..., -3.7472e-06,\n 0.0000e+00, 2.6777e-06],\n [ 1.2183e-06, -8.2704e-08, 2.4157e-07, ..., 4.4974e-07,\n 0.0000e+00, 1.8756e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.4401e-11, 6.3512e-12, 9.0515e-11, ..., 1.5715e-10, 0.0000e+00,\n 1.1136e-11],\n [8.3869e-11, 4.1723e-12, 2.9103e-10, ..., 1.2710e-10, 0.0000e+00,\n 1.6049e-10],\n [8.3632e-12, 3.2500e-11, 1.8402e-10, ..., 2.9239e-11, 0.0000e+00,\n 5.7891e-11],\n ...,\n [4.2487e-11, 2.3882e-11, 8.9071e-11, ..., 1.5775e-10, 0.0000e+00,\n 1.1755e-10],\n [3.2484e-11, 1.2114e-11, 4.5235e-10, ..., 1.8298e-10, 0.0000e+00,\n 1.6563e-10],\n [1.3594e-10, 6.2787e-11, 1.3780e-10, ..., 8.7576e-11, 0.0000e+00,\n 3.4097e-11]], device='cuda:0')" + }, + "9": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-3.2845e-05, -1.5522e-05, -2.0349e-05, ..., 1.0252e-05,\n 4.2399e-06, -5.6976e-06], device='cuda:0')", + "exp_avg_sq": "tensor([7.8435e-09, 8.2531e-09, 6.5519e-09, ..., 9.3517e-09, 8.9199e-09,\n 9.4827e-09], device='cuda:0')" + }, + "10": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[ 5.0901e-07, 1.0410e-06, -4.9172e-07, ..., -1.4800e-07,\n -7.1067e-07, -6.9847e-07],\n [-1.6265e-06, -1.1797e-06, -1.4047e-07, ..., -2.9216e-07,\n -1.0706e-06, 4.2325e-07],\n [-1.4608e-06, 9.6109e-07, 6.2948e-07, ..., 6.1738e-07,\n -5.7938e-07, -7.8541e-07],\n ...,\n [-1.1353e-07, -1.9804e-06, -1.8893e-07, ..., 1.3498e-06,\n 1.0685e-06, -4.0433e-07],\n [ 8.0074e-07, 1.2154e-06, 6.4108e-07, ..., -3.4137e-08,\n 4.8190e-06, 3.3035e-07],\n [-4.6718e-07, -3.9547e-07, 7.0761e-07, ..., 5.4978e-07,\n 1.2743e-06, -1.9790e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.3773e-12, 1.0407e-11, 9.7621e-12, ..., 2.8393e-11, 1.6891e-11,\n 2.1111e-11],\n [3.5501e-11, 3.6343e-11, 1.9952e-11, ..., 3.4184e-11, 2.4156e-11,\n 2.2737e-11],\n [2.3849e-11, 2.8548e-11, 2.3079e-11, ..., 3.6115e-11, 2.6304e-11,\n 3.2381e-11],\n ...,\n [4.0467e-11, 2.8116e-11, 3.1167e-11, ..., 6.6901e-11, 3.0531e-11,\n 2.2274e-11],\n [2.4032e-11, 2.3324e-11, 2.4323e-11, ..., 3.2283e-11, 3.5228e-11,\n 2.6384e-11],\n [3.7279e-11, 2.3102e-11, 1.9188e-11, ..., 3.3174e-11, 3.3645e-11,\n 3.9917e-11]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "shared", "betas": [ 0.9, @@ -212,7 +227,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_256", "betas": [ 0.9, @@ -235,7 +250,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_512", "betas": [ 0.9, @@ -258,7 +273,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_768", "betas": [ 0.9, @@ -281,7 +296,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_1024", "betas": [ 0.9, @@ -304,7 +319,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_1280", "betas": [ 0.9, @@ -327,7 +342,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_1536", "betas": [ 0.9, @@ -350,7 +365,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_1792", "betas": [ 0.9, @@ -373,7 +388,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_2048", "betas": [ 0.9, @@ -396,7 +411,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_2304", "betas": [ 0.9, @@ -419,7 +434,7 @@ ] }, { - "lr": 0.00904518046337755, + "lr": 0.00793913236883622, "name": "scale_2560", "betas": [ 0.9, @@ -442,7 +457,7 @@ ] }, { - "lr": 0.004522637977440181, + "lr": 0.003969669238105037, "name": "fusion", "betas": [ 0.9, @@ -498,7 +513,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 2, + "T_cur": 3, "base_lrs": [ 0.01, 0.01, @@ -513,31 +528,32 @@ 0.01, 0.005 ], - "last_epoch": 2, + "last_epoch": 3, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.00904518046337755, - 0.004522637977440181 + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.00793913236883622, + 0.003969669238105037 ] }, "metrics": { - "best_val_acc": 80.816, - "best_epoch": 1, + "best_val_acc": 81.35, + "best_epoch": 2, "scale_accuracies": { - "256": 80.816, - "512": 80.742 + "256": 81.35, + "512": 81.248, + "768": 80.87 } }, "train_config": {