| { | |
| "epoch": 9, | |
| "optimizer_state_dict": { | |
| "state": { | |
| "0": { | |
| "step": "tensor(12520.)", | |
| "exp_avg": "tensor([[-9.9270e-05, -2.4159e-04, 5.0129e-05, ..., 1.2280e-04,\n 3.7515e-06, 1.4372e-04],\n [-3.1994e-05, 1.9035e-05, 2.1199e-04, ..., -8.2258e-07,\n 6.6183e-05, -4.8447e-05],\n [ 1.7236e-04, -6.5137e-05, 1.9107e-04, ..., -1.4268e-04,\n -1.3081e-05, 3.8953e-05],\n ...,\n [-7.0740e-05, 1.9976e-04, 1.4614e-04, ..., 6.5639e-05,\n -4.1633e-05, -8.3606e-05],\n [-8.9631e-06, -6.7986e-05, -7.9535e-05, ..., 8.7796e-05,\n 2.0017e-05, 2.2045e-05],\n [-7.8553e-05, 2.0722e-04, -1.8843e-04, ..., -1.4236e-05,\n 5.1226e-05, -5.1407e-05]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[6.1228e-07, 1.1706e-06, 5.9148e-07, ..., 4.9620e-07, 4.1757e-07,\n 2.9897e-07],\n [4.3065e-07, 5.0649e-07, 6.6805e-07, ..., 3.0059e-07, 2.7858e-07,\n 2.8537e-07],\n [3.7073e-07, 3.8674e-07, 2.8546e-07, ..., 2.1470e-07, 2.9530e-07,\n 2.1305e-07],\n ...,\n [7.0661e-07, 4.5202e-07, 3.4550e-07, ..., 3.9981e-07, 3.0807e-07,\n 3.4916e-07],\n [2.8896e-07, 3.8617e-07, 1.7456e-07, ..., 2.2695e-07, 1.5475e-07,\n 1.1588e-07],\n [8.0215e-07, 7.6109e-07, 3.9632e-07, ..., 4.5643e-07, 3.8443e-07,\n 3.1862e-07]], device='cuda:0')" | |
| }, | |
| "1": { | |
| "step": "tensor(12520.)", | |
| "exp_avg": "tensor([ 0.0127, -0.0075, -0.0049, ..., -0.0037, 0.0028, -0.0004],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([0.0009, 0.0008, 0.0005, ..., 0.0008, 0.0004, 0.0009], device='cuda:0')" | |
| }, | |
| "2": { | |
| "step": "tensor(12520.)", | |
| "exp_avg": "tensor([[ 1.1740e-04, 1.1428e-04, 1.5765e-05, ..., -2.7922e-06,\n 5.4216e-05, -2.7883e-05],\n [ 1.1946e-04, 1.4588e-05, -1.1028e-05, ..., 8.9411e-08,\n -2.6733e-05, 1.7444e-05],\n [ 6.7180e-06, -7.8208e-08, -9.1261e-06, ..., 1.6132e-06,\n -7.8087e-06, 1.8373e-05],\n ...,\n [ 1.2968e-06, 6.9628e-06, 5.3621e-05, ..., 1.0807e-06,\n -1.3602e-06, 1.5340e-07],\n [-1.4027e-05, -6.4610e-06, -1.8955e-06, ..., 2.9326e-05,\n 1.9285e-05, 1.7961e-06],\n [-1.8629e-05, 2.6219e-06, 1.3074e-08, ..., -5.8890e-05,\n 1.5220e-09, 3.2119e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.2875e-07, 6.5746e-07, 1.0195e-08, ..., 1.7812e-07, 2.1200e-08,\n 7.2220e-08],\n [2.3325e-07, 2.2450e-07, 6.8626e-08, ..., 2.9336e-07, 2.4918e-08,\n 9.7669e-08],\n [2.6953e-08, 2.3108e-08, 5.5293e-08, ..., 2.5770e-09, 8.3951e-09,\n 2.9685e-08],\n ...,\n [2.1509e-09, 2.8648e-08, 5.2156e-09, ..., 2.7779e-08, 3.5858e-09,\n 1.0932e-09],\n [1.2592e-08, 1.0065e-08, 7.7965e-09, ..., 2.8223e-08, 3.0333e-08,\n 1.2154e-08],\n [6.5655e-08, 1.1390e-09, 1.4646e-10, ..., 1.0094e-07, 1.3983e-10,\n 7.6486e-09]], device='cuda:0')" | |
| }, | |
| "3": { | |
| "step": "tensor(12520.)", | |
| "exp_avg": "tensor([ 1.9511e-02, 6.9506e-03, -1.0083e-02, 1.8968e-02, 2.7172e-03,\n 4.2991e-03, -2.1185e-02, 2.0312e-03, -1.4169e-02, 1.3313e-03,\n 1.0365e-02, 2.6231e-02, -1.5332e-02, -1.7445e-02, -2.4997e-02,\n 2.5412e-03, -1.4228e-02, 1.9739e-02, -1.6964e-02, -8.3868e-03,\n 1.2759e-02, 1.1132e-02, 1.7047e-02, -9.9902e-04, -2.9909e-05,\n -2.2733e-02, -2.2926e-02, -5.1793e-03, -9.7092e-04, -1.2994e-02,\n -2.6224e-02, 8.3910e-03, 1.0435e-02, -3.5318e-04, -6.7253e-03,\n -1.4003e-03, 7.9698e-04, -1.8165e-02, 1.2187e-02, -1.5747e-03,\n -1.5473e-02, 7.1278e-03, 1.3785e-03, 5.6052e-45, 1.8208e-03,\n 1.2375e-02, -9.6852e-03, 4.7330e-03, 1.7919e-02, -1.9750e-02,\n 8.2322e-03, -8.9247e-03, 1.2063e-02, 5.2242e-03, 4.9939e-03,\n 5.9130e-03, 5.9265e-03, -1.1081e-02, 1.0252e-02, -3.5098e-03,\n 7.1533e-03, 2.2522e-03, -2.0357e-02, 1.0258e-02, 2.0550e-03,\n 9.7590e-03, 7.1399e-03, 2.0357e-02, 5.1226e-03, -5.3975e-03,\n -9.1847e-05, -1.3358e-03, 2.0687e-03, 9.7278e-03, -2.6908e-03,\n 2.6422e-03, 2.9596e-03, -6.1153e-03, 1.0208e-02, -7.3911e-03,\n -4.8404e-04, 2.0282e-02, -6.8465e-04, 9.8752e-03, -1.6894e-02,\n 1.0097e-02, 2.4636e-03, 5.7274e-03, -1.6817e-02, -7.1810e-03,\n 7.7824e-03, -1.2886e-02, 4.7244e-03, -1.4434e-03, 8.7350e-03,\n 2.4552e-03, -1.7357e-03, -1.5775e-02, -6.7411e-03, 6.9729e-04,\n 2.5053e-03, -1.2245e-02, 7.4715e-03, 5.6052e-45, -2.3240e-04,\n 3.0097e-02, -6.6223e-03, -3.4722e-03, 4.3258e-03, 4.6504e-03,\n 5.6052e-45, 1.7863e-03, 2.0902e-03, 1.9052e-03, -2.2135e-02,\n -1.1976e-03, -1.9605e-02, 6.8637e-03, -6.0429e-03, -1.8129e-03,\n 1.5759e-03, 4.1232e-03, -1.0530e-02, 5.1245e-03, 3.1680e-03,\n -1.4429e-03, -4.9818e-03, 9.9603e-03, -1.7723e-02, 2.7255e-04,\n -6.3307e-03, -1.2619e-02, 1.8492e-03, -1.1470e-02, 6.1388e-03,\n 8.7766e-03, -4.0099e-03, -6.5934e-03, -7.3675e-03, 1.3096e-02,\n 3.8895e-03, 3.6916e-03, -1.0381e-03, -2.6459e-02, 1.2630e-03,\n -2.6433e-03, 2.3003e-02, -5.3292e-03, 1.0025e-02, 1.5430e-03,\n 9.3603e-03, -5.6313e-03, 1.1296e-03, 3.0704e-03, 1.0329e-02,\n -1.5601e-02, 5.6052e-45, -1.2625e-03, -1.0682e-02, -4.7921e-03,\n -4.0649e-03, -9.1546e-03, -3.7482e-03, 2.0107e-02, -1.9207e-03,\n 1.2206e-02, -4.6081e-03, -6.8359e-03, 4.4501e-03, 1.3183e-03,\n 3.4806e-03, -8.0362e-03, -5.5216e-03, 2.0154e-02, 1.2750e-02,\n 5.1766e-03, 7.9484e-03, -6.6957e-03, -6.1670e-03, -5.8658e-03,\n 1.0105e-02, 3.2644e-03, 8.7908e-04, 9.9866e-03, -7.0735e-03,\n -1.6462e-02, 8.8396e-03, 4.6199e-02, 6.0348e-03, -1.6540e-02,\n -3.7193e-03, 6.3901e-03, -2.0036e-02, -1.5393e-02, -6.6192e-03,\n -3.7508e-03, 3.6231e-03, -3.2163e-03, -2.6943e-03, 1.4017e-02,\n -1.0177e-02, 1.0017e-02, 1.7620e-04, -9.0455e-03, -4.0209e-03,\n -1.2687e-02, 9.8769e-04, 1.2066e-02, -8.0107e-03, -1.6378e-02,\n -7.2718e-03, 1.9488e-02, 1.8630e-02, 1.3925e-02, -1.5034e-02,\n -2.3681e-03, -2.8835e-03, -1.7536e-02, 2.3173e-02, -2.9053e-03,\n -1.5442e-02, 5.6052e-45, -1.7673e-02, 3.4239e-03, -1.5946e-03,\n -1.8756e-03, 1.7871e-03, 3.8133e-03, 7.6637e-03, 1.3388e-02,\n 7.3654e-03, 2.3220e-02, -2.2526e-02, -9.5972e-03, 1.0807e-02,\n 1.2816e-03, -6.4188e-03, -7.9562e-04, -1.9612e-02, 1.2111e-02,\n 2.9505e-02, 5.3345e-03, -6.5112e-04, -4.7001e-03, 5.8181e-03,\n 1.1021e-02, 1.0632e-02, 1.7870e-02, 1.0780e-02, 8.5290e-03,\n -3.6194e-02, 1.3034e-02, -1.3721e-02, -1.7554e-03, 1.2523e-02,\n -2.9715e-03, -3.5666e-02, 7.3016e-03, 9.9066e-03, 1.4977e-02,\n 5.6052e-45, -7.1411e-03, 5.4084e-04, 1.1133e-03, 5.7220e-03,\n 3.0104e-02, -5.9067e-03, 2.8195e-02, 1.8974e-02, 6.8588e-03,\n -5.0651e-03, -9.3586e-03, 7.1638e-03, -1.1426e-02, 6.5563e-03,\n -7.2979e-03, -3.0505e-03, 5.9289e-03, -5.1571e-03, 1.0104e-02,\n 4.8856e-03, 4.8481e-03, -5.3020e-03, -8.2236e-03, 6.2136e-03,\n 1.0321e-03, 7.5762e-03, -3.9628e-03, 1.2479e-02, 1.8122e-02,\n -8.4003e-04, 3.5918e-02, -7.8490e-03, -4.9391e-03, 1.7735e-02,\n -7.2713e-03, -5.4344e-03, 6.6030e-03, 5.6052e-45, -1.2105e-02,\n -1.3944e-02, -6.2862e-04, -7.4790e-03, -5.3877e-03, 2.2794e-02,\n 5.5870e-03, 1.0276e-02, -3.4782e-02, 1.8983e-02, 1.9802e-02,\n -5.4836e-04, -6.4412e-04, -8.7856e-03, -6.2564e-03, 1.2325e-02,\n -3.9062e-04, 1.2209e-02, 2.1885e-03, -8.4182e-03, 3.7524e-03,\n -1.0901e-02, 1.1504e-02, -1.5609e-02, -6.8256e-03, -2.9743e-03,\n 5.6052e-45, 5.6052e-45, 2.0755e-02, -6.5098e-03, -7.9869e-03,\n -1.9020e-03, -1.1333e-02, 5.9622e-03, -1.4227e-02, 2.8746e-03,\n 8.0759e-03, 3.9429e-04, 1.5446e-03, 1.0568e-02, -3.8484e-02,\n -1.2999e-02, 8.9322e-03, 1.0768e-02, 1.2503e-02, 7.7764e-03,\n -4.2929e-03, -8.4506e-03, -2.4353e-03, 8.0296e-03, 8.3179e-08,\n -1.2680e-02, 9.3506e-03, -1.0811e-02, 9.4139e-03, 5.6052e-45,\n 4.6249e-03, -3.6066e-03, -7.6927e-03, 6.5655e-03, -1.6889e-02,\n -4.5433e-02, -4.2138e-03, -8.5030e-03, -1.2861e-03, -1.6391e-02,\n 1.1901e-02, 1.5896e-02, 9.0422e-03, -1.6679e-02, 6.6491e-03,\n 6.2940e-03, 8.2020e-03, 8.1818e-04, -5.3519e-04, -4.8851e-03,\n -9.2474e-04, 8.3279e-03, -8.3323e-04, -2.5214e-03, 4.0896e-03,\n -2.6112e-02, -2.3106e-03, 1.4733e-02, -6.0277e-03, 8.6462e-03,\n 8.0762e-03, -6.1178e-03, -1.5022e-03, 5.7271e-03, 4.7776e-03,\n 2.6046e-03, -1.8930e-02, -9.1204e-03, 2.7954e-02, -4.7649e-03,\n -3.3943e-03, 1.4724e-02, 6.1523e-03, -7.6239e-03, -1.9867e-02,\n 1.9270e-03, 9.2355e-03, -1.1944e-02, -1.9281e-02, -8.2742e-03,\n -4.6794e-03, 4.2691e-03, 2.9848e-02, 1.8356e-02, -1.6839e-02,\n 5.8125e-04, -7.7729e-03, 1.3712e-02, 2.2432e-02, 7.0487e-03,\n 2.6313e-03, -2.6713e-02, -7.4824e-03, -1.3847e-02, 8.1582e-03,\n 1.2721e-02, -1.0112e-02, 1.6946e-02, 5.2969e-03, -3.7987e-03,\n 1.3897e-02, 1.7389e-02, -1.7821e-03, -3.2921e-02, -7.6704e-03,\n -4.2462e-03, 8.9521e-03, 2.1380e-02, 5.6052e-45, 4.8339e-03,\n -2.3215e-03, 1.9080e-02, -4.3955e-02, 1.9365e-02, -4.3743e-03,\n 2.4216e-03, -1.9831e-02, -2.9650e-03, -1.8534e-03, 1.7396e-02,\n -1.4643e-02, -5.2335e-03, -1.2750e-02, -9.3254e-03, 7.3426e-03,\n 1.4724e-02, 1.8236e-02, 8.4386e-03, 1.4849e-02, 7.5800e-03,\n -1.8906e-02, -1.0455e-02, -6.6064e-03, -5.4638e-04, -8.1686e-03,\n 1.5641e-02, 3.1144e-02, -6.7198e-03, 2.3705e-02, -7.8522e-03,\n -7.9433e-03, -2.2116e-03, -2.9622e-02, -8.1604e-03, 3.0510e-02,\n -3.4050e-03, 2.5203e-03, -8.7269e-03, 9.6401e-03, -5.6829e-03,\n 4.2790e-03, 8.5569e-03, -7.6691e-04, -1.1263e-03, 1.0956e-02,\n -2.8839e-03, -3.0111e-03, 5.4021e-03, 1.1000e-02, -1.2246e-02,\n -8.5044e-03, -4.7140e-03, 5.6052e-45, 1.2741e-04, -7.3998e-03,\n 1.3998e-02, 1.7721e-03, 1.8246e-02, 3.2299e-02, 5.9573e-03,\n -7.8546e-03, -2.1181e-02, -1.0059e-02, 1.2162e-03, 5.6052e-45,\n -2.9884e-03, -7.7489e-03, 5.6052e-45, -1.3646e-02, -9.4834e-04,\n -1.1811e-03, 2.4833e-02, 3.2547e-03, 4.5740e-03, -5.1036e-03,\n -2.7761e-03, 3.3953e-03], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.9896e-03, 1.6192e-03, 7.4309e-04, 1.5102e-03, 1.0414e-03, 1.5385e-03,\n 1.5499e-03, 1.4326e-03, 1.5702e-03, 1.8818e-03, 7.7917e-04, 1.7349e-03,\n 8.6239e-04, 1.5607e-03, 1.5937e-03, 4.1046e-04, 1.1052e-03, 1.7060e-03,\n 1.0990e-03, 9.0839e-04, 1.5332e-03, 1.8795e-03, 1.3294e-03, 1.5896e-03,\n 1.7486e-03, 6.1301e-04, 1.7422e-03, 1.3872e-03, 1.5030e-03, 1.5249e-03,\n 1.3365e-03, 1.6960e-03, 1.4856e-03, 1.6635e-03, 1.7254e-03, 1.5323e-03,\n 8.4151e-04, 1.7102e-03, 1.4396e-03, 1.7202e-03, 1.2194e-03, 1.6074e-03,\n 6.6563e-04, 1.3162e-13, 1.6458e-03, 7.1545e-04, 1.8411e-03, 1.4364e-03,\n 1.8122e-03, 2.2166e-03, 8.2868e-04, 1.7638e-03, 9.7079e-04, 1.7346e-03,\n 1.6962e-03, 1.3235e-03, 1.5914e-03, 2.2199e-03, 1.7727e-03, 1.3655e-03,\n 1.7134e-03, 1.9694e-03, 1.6105e-03, 1.6026e-03, 2.0514e-03, 1.5601e-03,\n 1.8594e-03, 1.5347e-03, 1.7137e-03, 7.1645e-04, 1.7469e-03, 4.4093e-04,\n 1.5091e-03, 1.6848e-03, 1.5299e-03, 2.4852e-04, 1.6083e-03, 1.8678e-03,\n 1.6137e-03, 1.5280e-03, 1.4816e-03, 1.6377e-03, 1.6676e-03, 1.6202e-03,\n 1.8443e-03, 1.5514e-03, 1.6315e-03, 1.1119e-03, 1.6622e-03, 6.6609e-04,\n 1.2977e-03, 1.6610e-03, 4.4669e-04, 3.1037e-04, 1.8028e-03, 1.7914e-03,\n 8.4542e-04, 1.6510e-03, 1.4541e-03, 1.3524e-03, 1.4468e-03, 1.1896e-03,\n 1.3671e-03, 7.7775e-11, 1.5418e-03, 1.6798e-03, 1.8564e-03, 1.2269e-03,\n 1.7508e-03, 1.0712e-03, 7.1554e-11, 1.5194e-03, 1.3303e-03, 1.2711e-03,\n 2.0901e-03, 1.9539e-03, 2.0112e-03, 1.5830e-03, 1.5138e-03, 3.2581e-04,\n 2.8823e-04, 1.8132e-03, 1.5059e-03, 1.6714e-03, 1.6732e-03, 1.1213e-03,\n 1.8621e-03, 1.6757e-03, 2.1940e-03, 1.5679e-03, 1.7734e-03, 2.0268e-03,\n 9.7985e-04, 1.9139e-03, 1.5971e-03, 1.4592e-03, 1.8716e-03, 1.4804e-03,\n 1.5999e-03, 1.8288e-03, 1.4723e-03, 1.8832e-04, 2.5900e-03, 1.5124e-03,\n 1.5024e-03, 1.7353e-03, 1.4986e-03, 1.3284e-03, 1.5987e-03, 1.4843e-03,\n 1.4012e-03, 4.0973e-04, 1.9557e-03, 1.4501e-03, 1.5300e-03, 1.5627e-03,\n 6.5983e-13, 1.8356e-03, 9.4995e-04, 7.8635e-04, 1.7807e-03, 1.7338e-03,\n 1.6129e-03, 1.5308e-03, 1.5207e-03, 1.6042e-03, 1.6115e-03, 7.0294e-04,\n 1.5006e-03, 1.5877e-03, 1.7181e-03, 1.4232e-03, 1.4140e-03, 1.7054e-03,\n 1.7119e-03, 5.3149e-04, 1.5331e-03, 1.8610e-03, 1.4752e-03, 1.9198e-03,\n 1.6150e-03, 1.4040e-03, 6.3307e-04, 1.1120e-03, 1.7848e-03, 1.7432e-03,\n 1.3748e-03, 1.7353e-03, 1.2088e-03, 1.3254e-03, 1.3866e-03, 8.9734e-04,\n 1.5382e-03, 1.6883e-03, 3.2250e-04, 6.9678e-04, 4.5001e-04, 9.9714e-04,\n 1.2799e-03, 1.7289e-03, 7.6059e-04, 1.4583e-03, 1.2813e-03, 1.2508e-03,\n 1.8127e-03, 1.5135e-03, 6.2436e-04, 1.0572e-03, 1.2193e-03, 2.2651e-03,\n 2.3411e-04, 1.6194e-03, 1.5719e-03, 1.7740e-03, 1.6460e-03, 2.1988e-03,\n 1.8898e-03, 1.9214e-03, 1.3094e-03, 1.9908e-03, 1.6468e-03, 1.6575e-11,\n 1.1757e-03, 1.5421e-03, 1.1047e-03, 1.5493e-03, 5.5532e-04, 1.3568e-03,\n 1.8085e-03, 1.4448e-03, 5.2646e-04, 1.7468e-03, 1.8274e-03, 1.5903e-03,\n 1.6459e-03, 1.2114e-03, 1.6377e-03, 1.1897e-03, 1.5624e-03, 1.2867e-03,\n 1.5166e-03, 1.5157e-03, 2.0304e-03, 9.8375e-04, 1.6894e-03, 8.4983e-04,\n 1.5900e-03, 1.5337e-03, 1.9980e-03, 1.8152e-03, 1.6455e-03, 9.6780e-04,\n 1.6649e-03, 6.4285e-04, 1.7108e-03, 1.7033e-03, 2.0798e-03, 3.7678e-04,\n 1.7541e-03, 1.3402e-03, 8.0994e-12, 5.6565e-04, 3.8858e-04, 8.5621e-04,\n 1.4645e-03, 1.6301e-03, 1.7321e-03, 1.9630e-03, 1.8748e-03, 1.3112e-03,\n 1.6498e-03, 1.0007e-03, 1.6931e-03, 1.6932e-03, 1.5890e-03, 1.2656e-03,\n 1.8171e-03, 1.2263e-03, 9.8429e-04, 1.0777e-03, 1.7521e-03, 1.9884e-03,\n 1.5321e-03, 1.2063e-03, 1.0167e-03, 1.7760e-03, 1.2175e-03, 1.8351e-03,\n 1.5830e-03, 1.2125e-03, 1.2547e-03, 1.8881e-03, 1.7250e-03, 1.6912e-03,\n 1.2681e-03, 1.5853e-03, 1.6489e-03, 7.6288e-04, 2.4593e-10, 1.1393e-03,\n 1.4007e-03, 1.6305e-03, 1.6597e-03, 1.4911e-03, 1.6373e-03, 1.3250e-03,\n 1.7393e-03, 1.7903e-03, 1.7310e-03, 8.9286e-04, 1.6431e-03, 1.7217e-03,\n 1.8578e-03, 1.3348e-03, 1.7713e-03, 1.6791e-03, 7.8365e-04, 1.6678e-03,\n 6.0249e-04, 1.6983e-03, 1.5241e-03, 1.7047e-03, 1.6089e-03, 1.6932e-03,\n 1.5808e-03, 3.7612e-11, 1.6564e-10, 1.4567e-03, 1.5184e-03, 1.6825e-03,\n 1.7988e-03, 5.3254e-04, 1.7048e-03, 1.9162e-03, 4.5807e-04, 1.7627e-03,\n 6.5470e-04, 4.0264e-04, 1.0476e-03, 2.1538e-03, 1.7512e-03, 1.8366e-03,\n 1.9461e-03, 1.6308e-03, 1.2636e-03, 1.9132e-03, 1.6611e-03, 1.7682e-03,\n 1.3740e-03, 2.1431e-08, 1.0546e-03, 1.5010e-03, 1.6539e-03, 2.1214e-03,\n 3.4100e-11, 7.0415e-04, 1.7981e-03, 1.5853e-03, 1.4968e-03, 1.7685e-03,\n 1.8504e-03, 1.6906e-03, 1.9932e-03, 1.8469e-03, 1.8458e-03, 1.8708e-03,\n 1.6662e-03, 1.5943e-03, 1.5650e-03, 1.2555e-03, 1.0854e-03, 1.2981e-03,\n 2.2623e-04, 2.9869e-04, 5.0872e-04, 4.2771e-04, 1.3344e-03, 1.1791e-03,\n 1.5040e-03, 8.3522e-04, 1.7688e-03, 1.8772e-03, 1.2795e-03, 1.5983e-03,\n 1.0435e-03, 1.4906e-03, 1.8219e-03, 1.6843e-03, 1.6499e-03, 9.6398e-04,\n 1.7376e-03, 1.6524e-03, 1.7063e-03, 1.5947e-03, 1.7915e-03, 1.3036e-03,\n 1.8816e-03, 5.5209e-04, 1.0257e-03, 1.9770e-03, 1.4345e-03, 1.8096e-03,\n 1.6920e-03, 1.2044e-03, 1.1979e-03, 2.0302e-03, 1.4457e-03, 9.7419e-04,\n 1.4598e-03, 5.4142e-04, 8.2084e-04, 1.7955e-03, 1.8036e-03, 1.5516e-03,\n 1.3249e-03, 1.7100e-03, 1.8453e-03, 1.3643e-03, 1.1943e-03, 7.5515e-04,\n 1.5640e-03, 1.2113e-03, 1.7688e-03, 1.6305e-03, 1.9388e-03, 7.1474e-04,\n 1.6248e-03, 1.6367e-03, 1.5523e-03, 2.0517e-03, 1.4580e-03, 1.2400e-03,\n 1.5988e-03, 6.7588e-11, 1.0059e-03, 1.4931e-03, 1.5865e-03, 1.5422e-03,\n 1.9085e-03, 1.9287e-03, 9.6084e-04, 1.7747e-03, 1.6481e-03, 9.4520e-04,\n 1.6151e-03, 2.2480e-03, 1.6226e-03, 1.1736e-03, 1.6929e-03, 1.1907e-03,\n 1.7150e-03, 6.7656e-04, 1.4553e-03, 1.6348e-03, 9.6784e-04, 1.8114e-03,\n 1.7104e-03, 1.2591e-03, 1.4348e-03, 1.3408e-03, 1.8407e-03, 1.7110e-03,\n 3.6782e-04, 1.0991e-03, 1.0196e-03, 1.8113e-03, 1.4975e-03, 2.3875e-03,\n 1.4370e-03, 1.7985e-03, 2.2634e-03, 1.0219e-04, 1.6914e-03, 1.7864e-03,\n 5.9631e-04, 1.6480e-03, 1.7542e-03, 1.1329e-03, 1.6150e-03, 1.2689e-03,\n 1.7163e-03, 2.1372e-04, 8.6340e-04, 1.4387e-03, 1.6399e-03, 9.9038e-04,\n 1.5827e-03, 2.7331e-10, 1.6860e-03, 1.5329e-03, 1.2306e-03, 1.5966e-03,\n 1.8651e-03, 1.8576e-03, 1.4777e-03, 1.1299e-03, 1.5408e-03, 1.9145e-03,\n 6.7806e-04, 2.5355e-11, 7.3496e-04, 1.5753e-03, 4.0440e-12, 1.4262e-03,\n 1.7346e-03, 4.9467e-04, 1.7059e-03, 1.5636e-03, 1.2211e-03, 5.1138e-04,\n 9.9606e-04, 4.4755e-04], device='cuda:0')" | |
| }, | |
| "4": { | |
| "step": "tensor(12520.)", | |
| "exp_avg": "tensor([[-1.1603e-04, -7.2180e-05, -6.8200e-06, ..., -3.7957e-05,\n -3.6474e-05, 8.3883e-06],\n [-3.1862e-04, -2.9079e-04, -1.1923e-04, ..., 5.9291e-06,\n -2.6353e-05, -1.1167e-05],\n [ 9.0245e-05, -1.1618e-04, -1.3191e-04, ..., -3.7056e-05,\n -3.2090e-05, -5.9773e-06],\n ...,\n [ 2.0464e-05, 2.5113e-04, 7.8686e-05, ..., -3.5638e-05,\n 5.6761e-05, 2.1299e-06],\n [ 2.1851e-04, -8.2745e-05, -9.7305e-05, ..., -3.2420e-05,\n -3.5891e-05, -9.4916e-06],\n [-6.1973e-05, 1.4258e-04, 3.6584e-05, ..., 6.0151e-06,\n 1.5815e-05, -1.2339e-05]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.8436e-07, 1.9019e-07, 6.4558e-08, ..., 7.2467e-08, 2.8769e-08,\n 2.0301e-09],\n [2.7110e-07, 4.2105e-07, 6.8834e-08, ..., 6.5734e-09, 5.3442e-08,\n 9.3025e-09],\n [2.5400e-07, 3.3724e-07, 3.1337e-08, ..., 2.2257e-08, 2.5362e-08,\n 6.2967e-09],\n ...,\n [2.2145e-07, 4.0158e-07, 4.5172e-08, ..., 1.4928e-08, 4.0852e-08,\n 6.3123e-09],\n [2.4068e-07, 4.1265e-07, 6.1488e-08, ..., 3.3132e-08, 3.5791e-08,\n 6.5386e-09],\n [2.7841e-07, 4.7630e-07, 9.9658e-08, ..., 1.7437e-08, 4.3402e-08,\n 3.7219e-09]], device='cuda:0')" | |
| }, | |
| "5": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[ 3.6006e-07, 9.7012e-06, -2.1446e-06, ..., -1.4667e-06,\n -3.2192e-06, -1.8183e-06],\n [-2.1102e-06, 4.5407e-06, 1.8034e-07, ..., 6.9103e-06,\n -1.1861e-06, 9.6395e-07],\n [ 8.9797e-06, 3.7855e-07, 1.9977e-07, ..., 9.6130e-07,\n -4.5049e-06, -1.3059e-06],\n ...,\n [ 9.8283e-07, 8.3796e-07, 3.7906e-07, ..., -3.4531e-06,\n 3.2448e-08, 7.5696e-07],\n [ 2.0982e-06, -8.8663e-07, -2.1483e-06, ..., -2.6246e-06,\n -3.8057e-06, -7.2866e-07],\n [ 2.0941e-06, -6.4180e-07, 1.4211e-06, ..., 1.0729e-06,\n 1.4869e-06, -4.0201e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.8778e-09, 2.8365e-09, 2.3096e-10, ..., 1.9583e-10, 3.1291e-10,\n 3.1409e-10],\n [6.9860e-11, 5.7301e-10, 1.5734e-10, ..., 1.6924e-09, 6.0537e-11,\n 3.1012e-10],\n [1.0276e-09, 1.2304e-10, 1.1813e-10, ..., 1.0910e-10, 1.3679e-10,\n 2.6847e-10],\n ...,\n [7.3490e-10, 4.3244e-10, 3.3003e-10, ..., 4.1564e-10, 4.1223e-10,\n 2.5278e-10],\n [1.5688e-10, 2.6137e-09, 1.9009e-10, ..., 6.4373e-10, 2.5419e-10,\n 8.3016e-10],\n [2.1526e-09, 6.4689e-11, 7.2514e-11, ..., 1.2361e-10, 5.9988e-10,\n 1.0668e-10]], device='cuda:0')" | |
| }, | |
| "6": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-0.0003, 0.0010, 0.0004, ..., 0.0009, -0.0010, -0.0002],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([8.0420e-06, 5.1718e-06, 7.8633e-06, ..., 6.3100e-06, 7.2935e-06,\n 6.1704e-06], device='cuda:0')" | |
| }, | |
| "7": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[-3.9468e-07, -1.4398e-06, -1.6093e-06, ..., -6.7405e-06,\n -1.2354e-06, -6.2667e-06],\n [ 8.0402e-06, -3.1995e-06, 2.3087e-06, ..., -1.0365e-05,\n 5.3376e-06, 1.3790e-06],\n [-1.9451e-05, -5.7234e-07, 5.2295e-07, ..., 5.4512e-06,\n -6.9806e-06, 7.3281e-06],\n ...,\n [-1.9768e-05, 8.0208e-06, -7.7789e-06, ..., -1.3025e-05,\n 2.1635e-07, 3.8415e-06],\n [-2.4233e-05, 6.1694e-07, 7.6695e-06, ..., 1.9476e-05,\n -3.6363e-06, -4.7517e-06],\n [ 1.2602e-05, -1.8574e-06, -1.0812e-07, ..., 7.8476e-06,\n 6.7473e-06, -1.9763e-05]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.2644e-10, 1.2184e-10, 2.2076e-10, ..., 1.4659e-10, 3.9367e-10,\n 2.7063e-10],\n [4.7394e-10, 3.0027e-10, 3.0030e-10, ..., 3.8746e-10, 6.8016e-10,\n 4.2141e-10],\n [5.0821e-10, 2.8190e-10, 4.0303e-10, ..., 3.5680e-10, 5.8227e-10,\n 3.2101e-10],\n ...,\n [5.3783e-10, 4.7337e-10, 4.8334e-10, ..., 5.3744e-10, 7.3464e-10,\n 5.4990e-10],\n [5.6432e-10, 4.5099e-10, 4.0309e-10, ..., 4.6259e-10, 6.0267e-10,\n 3.6671e-10],\n [5.2303e-10, 2.7361e-10, 5.4626e-10, ..., 4.1392e-10, 5.9129e-10,\n 5.0162e-10]], device='cuda:0')" | |
| }, | |
| "32": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-9.8809e-15], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.0089e-07], device='cuda:0')" | |
| }, | |
| "33": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-6.9767e-17, -6.0682e-17, 1.3045e-16], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.9329e-11, 4.9022e-11, 4.5787e-12], device='cuda:0')" | |
| }, | |
| "34": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-5.6052e-45, -1.9693e-16, -8.8140e-17, -5.4565e-17, -6.4385e-17,\n -1.0633e-16, -1.0633e-16, -7.5543e-17, -1.1859e-16, 3.9511e-17],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.8710e-08, 4.6507e-10, 4.4744e-10, 3.5208e-10, 4.2302e-10, 3.9256e-10,\n 6.5811e-10, 5.8856e-10, 6.3561e-10, 4.1392e-10], device='cuda:0')" | |
| }, | |
| "36": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[-2.2811e-19, -6.2928e-20, -2.2522e-19, ..., -3.8903e-19,\n -1.8255e-19, -6.1764e-20],\n [-3.5223e-20, -2.3537e-20, -1.6878e-20, ..., 4.2459e-20,\n 9.5294e-21, 4.9724e-21],\n [-1.8845e-19, -6.7964e-20, 8.0461e-21, ..., -9.8934e-22,\n -9.8233e-20, -3.0501e-20],\n ...,\n [-5.4423e-20, 2.9470e-20, -3.0945e-20, ..., -8.7855e-20,\n -4.8698e-20, -1.4449e-20],\n [ 4.6955e-19, 2.4615e-19, -6.9280e-20, ..., 8.7456e-19,\n 2.7888e-19, 1.3837e-19],\n [-1.4807e-19, -1.5970e-19, 2.1102e-21, ..., 1.2720e-19,\n -3.8428e-20, -4.6390e-20]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[5.2844e-15, 1.1646e-15, 3.1471e-15, ..., 3.6161e-15, 4.5604e-15,\n 6.1537e-15],\n [1.4604e-17, 2.6199e-17, 3.7012e-17, ..., 2.1258e-17, 1.8111e-17,\n 3.5155e-17],\n [1.0675e-16, 5.6349e-17, 1.3809e-16, ..., 5.9093e-17, 2.1745e-16,\n 1.1457e-16],\n ...,\n [1.2508e-16, 8.9547e-17, 8.3195e-17, ..., 1.4170e-16, 9.5707e-17,\n 2.0503e-16],\n [2.7190e-15, 6.9546e-16, 2.1594e-15, ..., 1.4440e-15, 2.7171e-15,\n 2.9578e-15],\n [1.8732e-15, 1.0061e-15, 2.2750e-15, ..., 8.5497e-16, 2.3959e-15,\n 3.0116e-15]], device='cuda:0')" | |
| }, | |
| "37": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-1.6262e-16, 6.0433e-18, -8.2259e-17, 1.6614e-19, 3.3757e-18,\n -2.2074e-16, 5.3763e-17, 9.1245e-17, 1.8588e-16, 2.7694e-16,\n 3.4477e-16, -3.0135e-16, 3.6651e-16, -8.7458e-17, -1.1426e-16,\n -6.9822e-17, 1.9374e-16, -2.6170e-16, -2.7377e-16, 2.1062e-17,\n 5.2166e-17, -3.6185e-17, -1.5226e-17, -1.8062e-16, -1.1222e-18,\n 5.0073e-16, 4.8599e-19, 6.4632e-16, 1.2524e-16, -2.9913e-17,\n 5.2510e-17, 5.4280e-16, -1.7451e-16, -4.0161e-16, 9.8711e-17,\n -4.0710e-18, -2.3226e-17, -1.3019e-16, 1.1279e-17, -9.8196e-17,\n -2.3856e-17, 1.2043e-16, -1.3722e-16, -1.6768e-16, 2.9319e-16,\n -1.1535e-16, -3.7301e-16, 3.6753e-16, -2.0173e-16, -5.0327e-17,\n -3.6106e-16, -5.5307e-17, 1.6505e-17, -5.1545e-17, -7.2445e-17,\n -3.7323e-17, -8.2989e-17, 7.8776e-17, 1.1338e-16, -1.5941e-17,\n -9.6479e-17, -4.5454e-18, 5.4048e-16, 6.2554e-16, 1.7787e-16,\n -5.7563e-17, -1.8633e-17, -1.4489e-16, 1.6623e-16, -1.4744e-16,\n 9.2268e-17, -2.5249e-16, -7.2703e-18, 4.7925e-18, -1.6506e-16,\n -4.6148e-17, 4.5609e-17, 1.0102e-16, -8.8676e-17, 4.5970e-17,\n 6.1784e-17, 2.0719e-16, 4.1404e-17, 5.7611e-18, 2.3886e-16,\n -4.5346e-17, 2.9594e-17, -4.1335e-16, 1.1111e-16, 2.1332e-16,\n -1.5669e-16, -2.5630e-16, 1.2908e-17, 3.6764e-17, 5.2881e-17,\n 1.4046e-17, -2.9302e-16, 3.2821e-16, 1.4377e-17, -2.7327e-17,\n -2.6461e-16, -1.9510e-16, -3.1546e-16, -3.5870e-17, -1.5355e-16,\n -4.2275e-16, 2.3228e-16, 8.2933e-17, -2.1519e-17, -1.4664e-16,\n -2.7548e-16, -7.2352e-17, 7.4988e-17, -2.7605e-16, -2.9730e-17,\n 5.2956e-17, 3.6418e-16, -9.3431e-17, 1.5488e-17, -8.6781e-17,\n 1.7221e-16, -8.4299e-17, 6.1897e-17, 1.5567e-16, -2.6173e-16,\n -9.1255e-17, 4.6248e-17, -2.5615e-16, -2.3591e-17, -2.0257e-16,\n -1.8834e-17, 3.9036e-17, 8.3897e-17, -1.3045e-16, -3.2079e-17,\n 4.7749e-16, 9.4537e-19, 1.9513e-18, -5.3330e-17, -2.8023e-16,\n 9.9639e-20, 2.6216e-16, 1.8005e-16, -5.4181e-17, 2.9915e-17,\n 9.7983e-18, 2.0706e-16, 6.7825e-17, 7.6737e-18, -1.7325e-16,\n -9.4070e-18, 4.6624e-16, 1.1661e-16, -3.2957e-16, 4.1066e-17,\n -8.8897e-17, 6.4248e-16, -1.4195e-16, 1.0377e-16, 3.8938e-16,\n 1.9523e-16, 2.4904e-17, 2.8712e-18, 2.2639e-16, -2.3107e-16,\n -7.8867e-17, 1.4098e-16, -1.9886e-16, -4.1944e-16, -6.4249e-17,\n 2.1881e-16, -2.0770e-16, -3.4876e-16, -1.3906e-16, -9.8134e-17,\n 4.0223e-17, -6.6810e-17, -1.1572e-16, -1.6922e-16, -2.8170e-16,\n -2.1848e-16, -3.9249e-17, -2.4401e-16, 7.6934e-17, 6.7183e-17,\n 1.9073e-16, 6.4646e-17, 1.3662e-17, -3.4925e-17, -1.5088e-17,\n 6.8297e-17, -5.1983e-17, 2.2687e-17, -1.3197e-16, -9.9831e-17,\n -2.2799e-17, 1.8489e-18, -1.6105e-16, 4.9987e-16, -1.3800e-16,\n -8.3858e-17, 1.1353e-16, -1.4406e-16, 4.2932e-16, 3.9131e-16,\n -2.1257e-16, 4.2413e-16, -3.0710e-17, -1.7617e-18, 8.1662e-17,\n -4.3138e-16, -7.5447e-17, 2.0426e-17, -1.6393e-17, -6.6477e-17,\n -5.3541e-16, -2.2055e-16, 1.5032e-16, -5.8478e-17, 3.2860e-17,\n -1.9161e-16, 1.5039e-16, -7.7325e-17, 2.6530e-16, 1.3330e-16,\n 3.2809e-16, 2.1136e-17, -1.3872e-16, 3.6597e-17, -1.1536e-16,\n -1.8474e-16, -2.8729e-17, 9.5825e-17, 3.7739e-16, 6.9798e-17,\n 6.2243e-17, -1.4814e-16, -2.0348e-16, 2.5197e-16, 9.9891e-17,\n -1.2075e-17, -1.6650e-16, 1.9949e-16, -1.6888e-16, -1.4488e-16,\n -9.0350e-17, 4.3299e-16, -9.2803e-17, 2.5260e-16, 5.0769e-17,\n 5.4877e-19, -4.3928e-17, -2.6548e-17, -2.2461e-17, 2.6454e-16,\n -1.4196e-16], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.0121e-09, 9.7657e-12, 2.5370e-11, 1.9799e-10, 4.5044e-13, 3.8857e-10,\n 2.8234e-12, 1.0712e-10, 1.0656e-11, 3.7820e-10, 9.2003e-11, 1.6082e-09,\n 7.9957e-10, 4.2885e-11, 7.3281e-11, 2.2161e-10, 3.8829e-11, 4.7285e-11,\n 3.2084e-10, 6.5370e-12, 2.6847e-13, 1.4970e-13, 5.6751e-11, 1.6186e-10,\n 1.5701e-13, 4.1439e-09, 7.7457e-11, 2.5316e-09, 8.1811e-11, 2.0206e-10,\n 6.0820e-11, 6.6350e-10, 1.3345e-10, 4.2954e-10, 1.1042e-10, 2.1872e-10,\n 4.3185e-11, 2.3533e-09, 2.6152e-13, 2.6124e-11, 2.9639e-13, 6.3858e-12,\n 1.4050e-12, 2.2332e-13, 2.2560e-10, 1.8176e-09, 1.2042e-11, 1.7365e-09,\n 1.6287e-09, 2.1098e-11, 4.0802e-10, 1.9074e-10, 4.5435e-12, 4.1615e-11,\n 1.0281e-10, 2.1947e-10, 2.0284e-12, 9.8219e-12, 2.7875e-10, 1.8328e-10,\n 3.0254e-10, 1.7206e-12, 1.2999e-09, 1.4221e-09, 1.1831e-12, 2.3375e-12,\n 2.4882e-11, 9.6716e-10, 2.0239e-11, 4.4560e-10, 8.6781e-12, 2.6239e-11,\n 2.0304e-11, 1.0498e-10, 1.2951e-09, 4.5213e-14, 1.7317e-12, 4.0697e-11,\n 4.3188e-10, 3.5294e-11, 1.1105e-11, 6.2804e-12, 8.1821e-10, 7.6098e-13,\n 1.9165e-11, 5.3378e-11, 2.6488e-10, 1.3077e-09, 2.5490e-10, 1.1912e-11,\n 1.8460e-09, 4.0074e-10, 1.3147e-12, 7.6483e-11, 7.0964e-12, 1.0117e-11,\n 1.6621e-09, 1.7678e-10, 1.3634e-11, 3.3397e-12, 2.7781e-10, 1.0803e-10,\n 1.4552e-09, 3.0631e-11, 1.2416e-09, 1.3378e-09, 1.3758e-12, 1.6054e-10,\n 1.7475e-11, 2.8712e-11, 3.4249e-10, 1.6538e-13, 2.5048e-12, 3.5461e-10,\n 1.4624e-12, 3.2421e-10, 8.8728e-10, 1.2849e-12, 8.3220e-11, 4.3370e-11,\n 2.7671e-10, 4.4438e-11, 4.2294e-12, 9.7908e-12, 2.0363e-10, 8.1840e-10,\n 1.6314e-10, 1.0763e-09, 2.5978e-10, 3.0731e-09, 4.7495e-11, 5.1931e-12,\n 4.1479e-11, 6.8058e-10, 1.1773e-13, 6.0664e-10, 1.7273e-11, 1.8592e-13,\n 1.8611e-10, 2.5785e-10, 4.5298e-10, 1.4976e-09, 4.5083e-12, 2.7971e-12,\n 6.2027e-13, 1.4123e-11, 9.9921e-10, 7.1461e-11, 9.1874e-11, 1.2682e-10,\n 3.4096e-13, 4.5201e-09, 1.0467e-11, 3.3029e-11, 1.8677e-13, 7.7636e-10,\n 3.5069e-09, 1.7742e-11, 2.2942e-11, 2.8840e-10, 5.3200e-10, 1.4565e-12,\n 1.9793e-13, 1.5858e-09, 3.9648e-10, 4.6188e-11, 1.5748e-11, 7.1833e-10,\n 4.0026e-10, 3.6494e-10, 7.9356e-10, 2.7604e-10, 1.5511e-09, 1.2969e-11,\n 9.3039e-11, 4.3199e-11, 1.3951e-11, 4.1539e-10, 3.3086e-11, 4.2876e-10,\n 1.9573e-09, 5.4406e-13, 5.1076e-10, 1.4620e-10, 1.2578e-10, 9.2760e-10,\n 1.2249e-10, 6.5451e-11, 1.0353e-10, 9.5091e-10, 3.2867e-11, 6.5296e-11,\n 9.4922e-11, 1.0344e-09, 7.2438e-10, 7.6307e-12, 2.5407e-12, 1.8170e-10,\n 1.0498e-10, 2.3986e-10, 8.8957e-11, 5.4303e-09, 8.9065e-11, 7.7159e-10,\n 3.7957e-10, 1.1996e-09, 6.0959e-10, 4.2093e-13, 3.6763e-10, 7.1489e-10,\n 2.2952e-09, 6.2794e-13, 4.2829e-11, 1.1420e-12, 1.7413e-11, 3.7316e-10,\n 6.8375e-11, 3.2924e-12, 5.8888e-13, 1.5387e-12, 1.0486e-09, 5.7420e-10,\n 1.4762e-10, 1.5669e-09, 1.7816e-10, 6.4987e-10, 7.1731e-12, 1.8621e-10,\n 3.7785e-11, 1.5192e-10, 1.5285e-09, 7.9705e-10, 1.8736e-10, 2.4750e-10,\n 3.6251e-11, 7.8743e-11, 9.9094e-12, 3.4575e-09, 9.5228e-11, 1.4066e-10,\n 1.8097e-10, 2.2788e-09, 1.6999e-10, 9.6931e-12, 2.1960e-11, 5.0160e-12,\n 2.2753e-10, 1.0020e-11, 1.0371e-11, 7.6928e-11, 3.4785e-11, 7.9588e-10,\n 1.3083e-11, 4.1607e-11, 5.2109e-10, 5.9114e-10], device='cuda:0')" | |
| }, | |
| "38": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 1.5967e-18, 2.6979e-18, -1.9916e-20, 3.4365e-17, 3.2890e-19,\n 4.6077e-20, 1.3192e-17, 2.5904e-19, 1.8686e-18, 4.3822e-17,\n 1.0315e-17, 3.2737e-21, 5.6400e-17, 7.5394e-18, -1.3699e-19,\n 2.2720e-17, 3.2465e-18, 1.5379e-19, 4.2496e-19, 1.1063e-19,\n 5.4800e-19, -1.2990e-20, 1.0599e-19, 1.6714e-17, 5.6989e-19,\n 6.7384e-17, -5.3199e-19, 5.2324e-17, 3.5095e-17, 1.0226e-19,\n 7.7347e-19, 3.6509e-17, 2.0556e-18, 4.5799e-19, -7.6678e-19,\n 3.9141e-21, 1.7318e-19, 2.3649e-19, 2.5574e-19, -1.6458e-20,\n -9.1563e-21, 5.6615e-18, 2.5108e-19, 3.2882e-19, 6.6902e-18,\n -2.8884e-19, 2.1334e-17, 5.2703e-17, 1.9145e-18, 1.3184e-17,\n 2.2069e-17, 2.9953e-20, 2.6580e-18, 7.5889e-21, 1.5842e-17,\n 5.9208e-18, 9.8947e-21, 5.4185e-19, 1.8389e-17, 4.7170e-17,\n -2.3180e-20, -7.6825e-20, 2.5161e-17, 5.1298e-17, 1.2141e-18,\n -4.2905e-20, 2.5059e-20, 4.7758e-20, 1.0497e-17, 6.7885e-20,\n 2.7640e-18, 2.2748e-19, 3.5124e-20, 3.0880e-19, 3.7246e-17,\n -1.9483e-19, 1.2235e-18, -8.7355e-19, -2.9075e-21, 1.8682e-18,\n 1.1244e-18, 1.8058e-18, 2.3037e-18, 5.3757e-19, 1.0932e-17,\n -3.4822e-19, 7.1403e-18, -3.5306e-19, -1.3984e-18, 1.5599e-17,\n 1.0971e-17, -3.5764e-20, 1.7271e-18, 2.2453e-19, 9.2442e-19,\n 4.0687e-19, 6.1673e-20, 2.6296e-17, 5.7993e-19, -4.8348e-20,\n 4.1642e-18, 1.2889e-18, 2.7854e-17, 1.7806e-19, -5.9758e-21,\n 2.6886e-17, 6.5852e-18, -3.2760e-19, -3.8220e-20, 4.8755e-20,\n 2.1512e-17, -5.0247e-20, 6.6118e-19, 1.2062e-17, -2.0217e-19,\n 2.6281e-18, 7.1460e-17, -3.1175e-21, 7.6034e-18, 2.7368e-17,\n 1.9216e-17, -7.4413e-20, 1.5789e-18, 2.0375e-18, 7.7500e-20,\n 3.0953e-17, -1.8258e-19, 7.1116e-19, -2.8347e-21, 1.6097e-19,\n -2.5202e-19, 1.1714e-20, 2.6191e-19, -9.3285e-20, -8.5734e-20,\n 4.6538e-17, -2.8944e-19, -3.8108e-19, -2.8041e-19, -2.6826e-20,\n 1.6737e-18, 3.3393e-17, -1.3474e-18, -1.5907e-19, 8.7254e-20,\n 3.6441e-19, 4.1471e-17, 2.1679e-18, 1.3259e-19, 5.6835e-21,\n 1.9313e-19, 6.7008e-17, -1.5669e-19, 2.2752e-19, 1.4092e-18,\n 9.2822e-21, 4.9126e-17, -7.1353e-20, -1.1061e-18, 3.6707e-17,\n 9.5472e-18, 2.5796e-18, 2.5921e-19, 5.6466e-17, 6.4938e-18,\n 1.6799e-20, 1.7061e-17, 4.5042e-18, 2.4739e-17, -1.2503e-19,\n 3.1374e-17, 1.9816e-17, 8.4606e-19, 1.9971e-20, 6.5224e-18,\n -4.8839e-19, -6.7754e-20, 1.5654e-17, 6.9551e-20, 3.0959e-18,\n 1.1470e-18, -8.7068e-20, 2.5018e-19, 6.7200e-18, -3.8930e-19,\n 3.3902e-18, 7.0719e-19, 2.4460e-18, -1.7638e-19, 1.2326e-19,\n -1.3233e-19, -5.6809e-20, 6.3055e-20, 1.9787e-17, -1.3141e-19,\n 9.4371e-20, 2.9743e-19, 2.6133e-17, 1.2048e-17, 2.4076e-17,\n -6.1821e-20, 4.4927e-17, 5.7303e-18, 2.7925e-17, 2.4716e-17,\n 1.3137e-19, 4.7715e-17, 8.6779e-21, 2.5952e-17, 1.1788e-17,\n 3.1697e-19, -1.7042e-19, 8.7179e-19, 6.9560e-20, 9.5808e-19,\n 2.5079e-18, 8.0637e-18, -1.5363e-18, -3.8587e-22, 1.1299e-19,\n -2.3029e-20, 1.2781e-18, 4.2588e-17, 3.4158e-17, 6.6685e-18,\n 8.6500e-18, 2.3560e-19, 4.4456e-20, -5.5133e-19, 1.3445e-19,\n 1.3504e-19, 2.2518e-17, -7.4567e-19, 1.2890e-17, 7.3732e-19,\n 9.7422e-18, 2.0733e-19, 5.4712e-18, 7.5361e-18, 9.9516e-20,\n -2.2520e-19, 4.0435e-20, -2.4279e-18, -9.9662e-21, 3.7129e-22,\n 5.1173e-20, 1.3896e-17, -8.5699e-20, 1.2341e-17, 1.1081e-18,\n 4.3426e-19, -7.0881e-20, -2.0267e-19, 3.0909e-19, 1.2580e-17,\n -8.5686e-21], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.1656e-13, 2.7018e-14, 2.5511e-16, 8.7914e-13, 2.6748e-15, 2.2630e-15,\n 1.0681e-13, 5.5224e-16, 2.3703e-16, 1.5327e-12, 2.1339e-14, 1.1645e-13,\n 2.7979e-12, 9.4915e-14, 4.6448e-16, 3.7531e-13, 3.3017e-16, 5.2201e-16,\n 5.7920e-16, 2.4099e-15, 6.3105e-17, 2.0469e-17, 1.6353e-15, 2.0116e-13,\n 3.3323e-16, 7.7665e-12, 1.2424e-16, 2.7099e-12, 1.0616e-12, 1.9985e-15,\n 1.1247e-16, 7.4871e-13, 7.4047e-16, 2.1966e-14, 2.4762e-17, 6.0417e-16,\n 2.2719e-15, 2.8926e-13, 3.5600e-16, 3.3077e-15, 1.6840e-16, 4.9818e-16,\n 3.1636e-16, 7.7707e-17, 1.1157e-13, 1.1096e-14, 1.1880e-12, 3.0062e-12,\n 2.1713e-13, 9.4566e-14, 1.1215e-12, 8.2969e-16, 1.0308e-16, 6.7550e-15,\n 3.8249e-13, 7.9538e-14, 8.6749e-16, 3.0359e-17, 1.5750e-13, 1.8198e-12,\n 6.5825e-15, 6.5924e-17, 2.4079e-13, 1.7273e-12, 5.8457e-16, 8.0033e-16,\n 3.5135e-15, 2.6296e-14, 2.3987e-13, 7.6491e-15, 2.4823e-16, 1.3018e-15,\n 6.9329e-18, 3.8176e-16, 3.1688e-12, 1.0582e-16, 1.0438e-17, 3.1057e-17,\n 9.0160e-16, 3.0639e-14, 1.2753e-17, 8.1735e-17, 1.7772e-13, 1.3831e-17,\n 5.8437e-15, 5.7972e-16, 2.1766e-13, 4.4185e-14, 1.2435e-14, 4.3112e-13,\n 3.0580e-13, 3.1575e-14, 2.9337e-16, 8.1306e-17, 1.2244e-14, 8.9363e-15,\n 1.0515e-13, 1.8746e-13, 4.1729e-17, 7.3598e-17, 5.5653e-14, 3.5371e-16,\n 2.9349e-12, 6.0687e-16, 4.1020e-14, 2.9723e-12, 2.8179e-16, 3.8104e-17,\n 3.1141e-15, 1.8837e-15, 8.3416e-13, 5.9172e-16, 3.7600e-18, 2.7591e-13,\n 4.7870e-15, 1.2684e-13, 5.0289e-12, 3.0719e-15, 1.4354e-13, 5.1664e-13,\n 8.4344e-13, 5.0286e-16, 1.9934e-17, 1.5226e-16, 2.7530e-15, 1.6747e-12,\n 8.1070e-17, 1.0511e-13, 3.2929e-16, 3.2505e-13, 4.9168e-16, 2.4106e-15,\n 1.5335e-17, 4.4982e-14, 2.9394e-16, 1.3878e-12, 5.2950e-15, 1.6474e-16,\n 2.7354e-17, 2.4434e-16, 2.2877e-14, 1.1930e-12, 1.6760e-17, 8.2250e-16,\n 1.1913e-15, 3.7719e-15, 1.3944e-12, 1.4939e-14, 8.3698e-15, 3.5651e-15,\n 4.6446e-16, 8.3344e-12, 2.2317e-18, 1.0349e-14, 1.1208e-16, 9.2932e-15,\n 2.4586e-12, 9.7988e-17, 1.3193e-15, 5.2147e-13, 2.7841e-13, 4.0645e-16,\n 2.6186e-16, 4.1989e-12, 8.8084e-14, 9.8974e-16, 7.6912e-14, 3.0411e-13,\n 1.3955e-12, 1.2970e-14, 8.1974e-13, 4.9996e-13, 9.8514e-14, 2.4903e-15,\n 8.9340e-14, 2.4526e-18, 9.1159e-16, 2.6559e-13, 3.0611e-15, 5.8743e-14,\n 3.6570e-13, 3.1558e-16, 1.7678e-15, 1.9204e-13, 5.2728e-17, 1.9480e-13,\n 1.4242e-15, 3.6021e-14, 5.4829e-15, 1.2926e-14, 1.9618e-17, 4.7272e-18,\n 1.1220e-16, 6.4961e-13, 9.0060e-15, 1.3949e-15, 4.3298e-15, 6.8547e-13,\n 1.7720e-14, 8.7492e-13, 3.0591e-18, 6.8968e-12, 2.4777e-13, 2.8624e-13,\n 1.2024e-13, 8.1850e-14, 1.2214e-12, 2.3753e-17, 4.9065e-13, 7.9828e-14,\n 2.9369e-13, 3.9567e-16, 4.7594e-18, 3.7541e-17, 5.5810e-15, 7.0499e-14,\n 7.2329e-14, 7.6489e-17, 7.1230e-16, 9.1802e-18, 3.9247e-14, 5.8167e-14,\n 2.0062e-12, 1.1736e-12, 1.7835e-13, 3.1260e-13, 2.4668e-17, 4.1221e-15,\n 2.2674e-16, 1.2430e-14, 1.5216e-13, 7.0315e-13, 4.1927e-17, 6.9606e-15,\n 1.1729e-18, 2.5788e-13, 4.8983e-15, 1.5246e-12, 4.7336e-16, 4.6290e-15,\n 5.7024e-15, 1.1212e-13, 3.4463e-16, 2.9761e-15, 4.3947e-15, 1.0787e-15,\n 6.4933e-15, 5.7832e-16, 1.4817e-14, 1.0140e-14, 6.5315e-18, 9.6972e-15,\n 4.0143e-15, 8.5015e-16, 2.6538e-14, 9.2595e-15], device='cuda:0')" | |
| }, | |
| "39": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 5.0238e-18, -1.7341e-18, 1.3535e-19, 2.0285e-17, 9.0156e-19,\n 1.6319e-18, 1.4498e-17, -1.2159e-19, 1.0622e-17, 2.6107e-17,\n 1.7831e-17, -1.2791e-18, 2.9521e-17, 1.0428e-17, 1.1980e-18,\n 1.6296e-17, 1.1732e-17, -7.8801e-19, -2.5163e-18, 2.8406e-18,\n -3.1311e-19, 3.6706e-19, 8.8284e-19, 1.3128e-17, -2.3714e-19,\n 3.3057e-17, 4.9863e-20, 3.2492e-17, 2.2038e-17, 7.3555e-19,\n -5.9746e-19, 2.7722e-17, 5.6662e-18, 1.0522e-18, 5.2447e-19,\n 2.1495e-20, 8.1850e-19, 3.3463e-18, 3.2792e-20, 1.0250e-19,\n 6.6209e-19, 1.2280e-17, -1.1338e-18, -1.2825e-18, 1.5070e-17,\n -1.1209e-18, 1.2817e-17, 2.8896e-17, 4.4569e-18, 1.3322e-17,\n 1.3175e-17, -5.4164e-20, 8.4508e-18, 1.2207e-18, 1.4193e-17,\n 9.8458e-18, 1.0350e-19, -3.6249e-19, 1.7287e-17, 2.3067e-17,\n 1.3854e-18, -4.3640e-20, 2.5197e-17, 3.1809e-17, 8.8773e-18,\n 1.3355e-19, 6.4377e-19, -3.4504e-19, 1.5183e-17, -7.6866e-19,\n 9.7165e-18, -2.2355e-18, 5.3919e-19, 2.1271e-19, 1.9133e-17,\n 7.8581e-19, -8.4114e-19, 6.0925e-19, -2.4229e-19, -1.3703e-18,\n -8.4407e-19, 1.0687e-17, 8.5368e-18, -3.2375e-19, 1.6313e-17,\n 2.0246e-18, 1.1458e-17, -1.3047e-18, 4.8885e-19, 1.7681e-17,\n 1.1221e-17, -2.2616e-19, -1.1141e-18, 2.0447e-19, -5.4574e-19,\n 1.2585e-18, -1.7479e-18, 2.2283e-17, 2.8236e-19, 4.0922e-18,\n 6.4766e-18, 4.4965e-18, 1.5185e-17, 4.4436e-19, 4.4425e-19,\n 1.4067e-17, 1.4097e-17, 1.6950e-19, 1.7534e-18, -6.4620e-19,\n 1.3842e-17, -6.5768e-19, -4.9258e-19, 1.0464e-17, 1.7405e-18,\n 8.8375e-18, 3.1898e-17, -9.5106e-19, 1.1481e-17, 1.7632e-17,\n 1.8313e-17, 8.2745e-19, -1.1520e-18, 1.0124e-17, -1.1884e-18,\n 1.8425e-17, 9.8469e-20, 2.3385e-18, 1.7864e-20, 1.8033e-18,\n -2.5215e-19, 3.5137e-18, -1.7814e-19, 1.4730e-18, 7.3123e-19,\n 2.9133e-17, 3.4550e-18, -3.3360e-20, -4.1375e-19, -1.0264e-18,\n 6.6470e-18, 2.3099e-17, 1.0566e-18, 1.9570e-18, -7.4915e-20,\n 1.0986e-18, 2.4420e-17, -1.2024e-18, 2.3030e-19, 1.4167e-18,\n 1.1064e-19, 3.2482e-17, 1.2742e-19, -3.8711e-18, -9.7260e-19,\n -3.0615e-19, 3.1759e-17, 1.4274e-18, 8.8693e-19, 2.5815e-17,\n 1.5021e-17, -1.8758e-18, 2.3285e-19, 2.7449e-17, 8.2699e-18,\n 2.9700e-19, 1.7331e-17, 6.9811e-18, 1.3228e-17, 1.4675e-18,\n 2.2394e-17, 1.4036e-17, 1.8363e-18, 1.6285e-19, 9.7996e-18,\n 2.2302e-19, 4.2689e-19, 1.3499e-17, -1.6818e-18, 5.4027e-18,\n 3.8119e-18, 5.1772e-19, -2.0931e-18, 1.1786e-17, 2.5291e-19,\n 1.1748e-17, -2.9098e-19, 8.6467e-18, -3.4257e-19, 5.4116e-19,\n 1.0332e-19, 5.2548e-19, -2.2608e-20, 1.4712e-17, -5.4114e-19,\n 1.1377e-18, 1.0071e-18, 1.6348e-17, 2.0895e-17, 1.5964e-17,\n -4.6813e-19, 2.4035e-17, 8.4196e-18, 2.4332e-17, 2.2837e-17,\n 1.1525e-19, 2.8472e-17, 4.7111e-20, 1.8054e-17, 1.4667e-17,\n 7.8245e-19, 1.0607e-18, -3.4754e-19, 8.2598e-20, 5.6017e-18,\n 2.8149e-18, 9.1467e-18, 1.2630e-18, -3.4265e-19, -2.4521e-20,\n 9.1875e-20, 8.9775e-18, 2.1205e-17, 2.3630e-17, 1.2585e-17,\n 1.6531e-17, -3.4730e-20, 1.1025e-20, 3.0892e-18, 2.5060e-18,\n 1.6575e-18, 1.6907e-17, 4.9715e-19, 1.9020e-17, -5.6757e-19,\n 1.3169e-17, -1.0520e-18, 8.0226e-18, 1.4950e-17, -6.1887e-20,\n 2.0888e-18, -1.1222e-19, 1.8419e-18, -3.7532e-19, -7.8763e-19,\n 8.3139e-19, 2.0307e-17, 9.6365e-19, 1.7064e-17, -5.2789e-19,\n -1.4197e-19, 5.6690e-19, 2.3021e-18, 5.6027e-19, 1.7352e-17,\n 3.3916e-19], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.1964e-13, 1.1759e-14, 1.3371e-14, 5.4760e-13, 9.1747e-16, 5.8751e-15,\n 1.6538e-13, 1.9893e-16, 1.8262e-14, 7.9380e-13, 1.8491e-13, 2.5666e-13,\n 1.1980e-12, 1.3747e-13, 7.2658e-15, 4.4401e-13, 4.6661e-14, 2.0726e-14,\n 1.3411e-14, 1.2145e-15, 4.3724e-16, 5.2456e-18, 6.7649e-16, 3.3701e-13,\n 8.5398e-17, 3.0328e-12, 5.9996e-15, 1.9342e-12, 4.9635e-13, 2.6909e-14,\n 1.6744e-14, 8.1114e-13, 9.9549e-14, 9.5966e-14, 1.1696e-14, 1.7348e-14,\n 9.8427e-16, 5.1242e-13, 2.0978e-16, 1.5503e-15, 4.3652e-17, 5.2463e-14,\n 6.6255e-17, 5.3771e-18, 1.7197e-13, 3.9228e-13, 4.0481e-13, 1.6548e-12,\n 1.5505e-13, 1.7678e-13, 5.0811e-13, 4.7305e-14, 2.4460e-14, 3.2154e-15,\n 2.3130e-13, 5.4677e-14, 2.8292e-16, 1.8896e-15, 3.6700e-13, 6.5836e-13,\n 2.6476e-14, 2.4534e-16, 9.0411e-13, 1.3179e-12, 2.4182e-15, 5.1017e-16,\n 1.6670e-15, 1.6939e-13, 2.0674e-13, 5.8907e-14, 2.5198e-14, 6.8053e-16,\n 7.1772e-15, 2.6024e-14, 1.2983e-12, 7.0622e-18, 1.3523e-15, 1.4656e-15,\n 2.7669e-14, 1.1921e-14, 2.0570e-15, 1.2710e-14, 1.9331e-13, 1.1999e-16,\n 8.9871e-14, 1.0524e-14, 1.4928e-13, 7.7202e-14, 4.2040e-15, 2.4431e-13,\n 9.7269e-13, 7.8959e-14, 1.0695e-15, 3.5954e-17, 6.2810e-15, 4.1023e-15,\n 3.0678e-13, 3.6337e-13, 1.0374e-14, 2.6886e-14, 1.8319e-13, 8.3932e-14,\n 1.4293e-12, 2.9279e-16, 1.7657e-13, 1.3253e-12, 3.6033e-14, 1.2930e-17,\n 1.5569e-15, 9.6140e-16, 5.3960e-13, 2.5786e-17, 6.1181e-17, 3.2384e-13,\n 2.5443e-15, 1.6413e-13, 1.4465e-12, 2.9534e-16, 1.1281e-13, 3.7213e-13,\n 3.7396e-13, 1.0862e-14, 1.3684e-15, 1.8355e-14, 7.9240e-15, 1.0343e-12,\n 1.6530e-14, 1.1113e-13, 2.0198e-14, 6.0369e-13, 2.3624e-16, 1.1949e-15,\n 5.7497e-15, 1.3433e-13, 8.0671e-17, 9.0337e-13, 5.0069e-14, 1.0351e-17,\n 1.0271e-14, 1.0098e-15, 2.3545e-14, 1.1452e-12, 8.8082e-17, 3.7835e-16,\n 1.0344e-16, 1.9324e-15, 1.0330e-12, 5.1166e-15, 3.3290e-15, 1.5899e-14,\n 6.4197e-17, 3.1699e-12, 9.0791e-17, 4.7586e-15, 2.9360e-16, 1.2425e-13,\n 2.2685e-12, 1.7020e-14, 4.8784e-15, 4.9544e-13, 1.5663e-13, 9.2547e-16,\n 1.4743e-16, 1.6766e-12, 2.9183e-13, 4.7847e-16, 1.6138e-13, 1.6097e-13,\n 5.7104e-13, 6.4764e-14, 8.3998e-13, 4.8342e-13, 8.2662e-14, 1.3061e-15,\n 1.2358e-13, 2.5521e-16, 4.5782e-16, 4.3904e-13, 1.4972e-15, 2.3593e-13,\n 3.4035e-13, 1.0791e-16, 3.2289e-14, 1.4541e-13, 9.8710e-15, 3.0913e-13,\n 4.8592e-16, 6.4963e-14, 2.3714e-15, 2.2236e-13, 1.3929e-15, 4.8991e-15,\n 5.8178e-15, 9.0381e-13, 1.0817e-13, 6.7833e-16, 1.7925e-15, 4.6907e-13,\n 1.8476e-13, 3.8168e-13, 8.0325e-16, 3.2358e-12, 1.3837e-13, 7.1137e-13,\n 4.0826e-13, 1.9476e-13, 8.6136e-13, 1.7601e-18, 5.5550e-13, 5.2321e-13,\n 2.2095e-13, 1.4593e-16, 4.5127e-16, 6.6862e-17, 5.1202e-14, 3.2863e-13,\n 1.7065e-13, 7.6285e-17, 1.3964e-16, 5.7870e-17, 1.6083e-13, 1.0995e-13,\n 6.9800e-13, 1.1720e-12, 1.7324e-13, 3.4477e-13, 3.6979e-15, 2.5770e-14,\n 2.9402e-14, 3.7137e-14, 2.7731e-13, 7.1210e-13, 4.0079e-14, 2.4101e-13,\n 7.8215e-16, 1.4458e-13, 2.0337e-15, 6.3661e-13, 1.0050e-13, 1.4686e-15,\n 2.9054e-14, 4.7978e-13, 2.8108e-14, 1.5636e-15, 2.0846e-15, 4.9837e-16,\n 2.2780e-13, 2.6849e-16, 1.0940e-13, 3.4731e-15, 7.1850e-16, 1.3752e-13,\n 4.2244e-14, 3.9929e-16, 3.8167e-13, 4.8346e-14], device='cuda:0')" | |
| }, | |
| "40": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[ 1.3897e-20, -5.1581e-21, 2.4950e-20, ..., 1.1185e-21,\n 1.5859e-20, 5.3446e-22],\n [ 3.2329e-20, 3.4154e-20, 3.5930e-20, ..., -2.1796e-20,\n 1.4656e-20, 4.8783e-21],\n [-1.4470e-19, -1.6699e-19, -6.5196e-20, ..., -2.7737e-19,\n -8.6337e-20, -6.3772e-20],\n ...,\n [ 1.3396e-20, -4.8136e-20, -8.3993e-21, ..., -4.9968e-20,\n -7.6083e-21, -2.5942e-20],\n [-1.1857e-19, -2.1069e-20, 1.6033e-20, ..., -3.0433e-19,\n -1.3869e-20, -2.8400e-20],\n [-5.9315e-20, -1.8313e-20, 3.8895e-20, ..., -1.5971e-20,\n -5.2056e-20, 3.7403e-21]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[4.0958e-16, 1.1391e-16, 3.8145e-16, ..., 4.5885e-16, 4.8204e-16,\n 6.3393e-16],\n [1.4828e-17, 2.3527e-17, 1.2238e-17, ..., 1.3775e-17, 7.8538e-17,\n 3.3161e-17],\n [2.2404e-17, 3.0888e-18, 1.0437e-17, ..., 3.4478e-18, 2.1482e-17,\n 9.6781e-18],\n ...,\n [1.6072e-17, 1.7605e-18, 1.0460e-17, ..., 2.3448e-18, 1.2585e-17,\n 4.7835e-18],\n [1.2649e-15, 3.4399e-16, 9.0695e-16, ..., 1.0139e-15, 1.5554e-15,\n 1.7392e-15],\n [2.7183e-16, 8.0608e-17, 2.6901e-16, ..., 1.0048e-16, 3.6329e-16,\n 2.8371e-16]], device='cuda:0')" | |
| }, | |
| "41": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 2.1935e-18, 2.4371e-18, -9.1740e-17, 4.3076e-18, 7.3993e-17,\n -9.6844e-18, -2.5657e-17, -1.2283e-16, 1.6964e-16, 2.0604e-16,\n 1.4103e-17, -6.4167e-19, 1.6137e-16, 2.9079e-18, -1.8674e-16,\n 2.8460e-16, 3.6892e-16, 1.4869e-17, -4.1067e-17, 3.3325e-18,\n 1.1546e-16, 5.7009e-18, -3.9867e-17, 1.0080e-16, 6.1045e-17,\n 6.3604e-16, 6.0328e-17, 5.3646e-16, 4.1686e-16, 1.2464e-16,\n -1.6916e-16, 5.2131e-16, -2.7864e-16, -6.9987e-18, -1.4901e-17,\n -9.2730e-17, 3.9670e-17, -8.8136e-18, -2.4802e-16, 6.7692e-17,\n -2.8557e-17, 1.4906e-16, -7.4122e-17, -1.1500e-16, 5.1557e-17,\n -2.8991e-16, 1.4747e-19, 3.5718e-16, -1.9492e-16, 9.4188e-17,\n 1.9202e-17, 2.1627e-17, 2.2233e-16, -3.2334e-16, -3.1111e-18,\n -6.4947e-17, -3.8709e-16, 2.6800e-16, 5.1686e-17, -1.7197e-16,\n -7.5244e-17, -1.0517e-16, 2.9914e-16, 3.2384e-17, 5.5793e-18,\n -7.2773e-17, 2.2481e-17, -2.7819e-17, -9.2439e-18, 4.5088e-17,\n 1.4231e-18, 5.2152e-18, -3.2798e-16, 5.1646e-17, -3.6065e-16,\n 3.2183e-17, -1.1382e-18, -5.8276e-17, -3.7068e-16, 3.0287e-16,\n -4.5904e-17, -1.8293e-17, -3.1120e-17, 1.1834e-16, 1.1251e-16,\n 4.7196e-17, -2.9440e-17, 1.0942e-16, -1.8208e-16, 1.0729e-16,\n -3.8454e-16, 4.6293e-17, 2.6110e-16, -4.3427e-17, -4.8414e-17,\n 2.2553e-17, 2.2819e-17, -1.2845e-16, 2.8961e-16, -5.9871e-17,\n 3.3738e-18, 4.0068e-17, 1.8130e-17, -9.5899e-17, -1.6078e-17,\n 1.0101e-16, -3.9186e-17, 5.7499e-17, 3.7498e-16, 4.5765e-17,\n -5.4084e-17, 1.5651e-16, -4.2442e-17, -1.3168e-17, 1.4410e-17,\n 9.0110e-17, 1.6557e-16, -1.5352e-16, -2.1413e-17, 4.1431e-18,\n -7.8633e-18, -8.2939e-17, -1.6585e-16, 8.8545e-17, -2.2133e-17,\n 6.0181e-17, -3.2621e-16, 2.5414e-17, -1.1798e-16, -2.3810e-16,\n 1.4482e-17, 2.7004e-16, -4.1625e-17, 2.4880e-17, 5.7444e-17,\n 4.9891e-16, -2.1024e-16, 2.3371e-17, 5.0962e-17, 8.8830e-17,\n -1.9631e-16, -3.0466e-16, -1.4732e-16, 4.3203e-18, 4.1829e-17,\n 5.2808e-17, 1.8780e-16, 1.8331e-16, -3.5461e-17, -5.0920e-17,\n -4.4388e-17, 4.4321e-16, -1.3214e-16, -2.7403e-17, 8.6408e-17,\n -1.8750e-16, -2.2036e-17, -1.8805e-16, -2.1008e-16, 2.5340e-17,\n -9.1612e-17, 5.2282e-16, -2.1899e-17, 4.4340e-18, 5.2307e-17,\n -9.2037e-17, 6.1449e-18, -6.8137e-17, -2.5193e-17, -5.3587e-18,\n 3.4532e-17, 1.1365e-16, -1.3630e-16, -1.4879e-16, -1.1258e-16,\n -2.2229e-17, 4.7273e-19, 2.0892e-17, -3.5979e-17, -8.8450e-17,\n -3.8022e-17, -2.2427e-17, 6.7759e-17, 1.9687e-18, 4.0541e-17,\n -5.2827e-17, -1.8003e-17, -1.8754e-16, 2.5053e-17, -1.0752e-16,\n -2.4430e-16, -2.6775e-16, -1.2157e-16, 1.9445e-17, -2.1136e-17,\n 1.2491e-17, -7.7301e-17, -7.7090e-17, 4.4441e-16, -3.6240e-16,\n 9.0491e-17, -1.2096e-18, 3.2751e-17, 3.7590e-16, -8.6303e-17,\n 3.8664e-17, 3.0342e-16, 7.8975e-18, -6.9699e-17, -3.4801e-17,\n -1.6562e-17, -4.8118e-17, -2.7389e-16, 5.9889e-17, 2.1803e-19,\n -8.7477e-17, -6.6840e-17, -2.4859e-16, -1.4073e-16, -8.6907e-17,\n -4.0153e-17, -2.8038e-17, 1.7190e-17, 2.2907e-17, -1.8084e-17,\n -2.1819e-17, -2.4711e-17, -1.8443e-17, 3.2737e-16, -5.6667e-18,\n -1.1414e-16, 2.7278e-17, 8.0256e-17, -1.4893e-17, -6.7308e-18,\n -4.5506e-17, -1.2001e-16, 6.5552e-18, -1.1755e-16, -1.9857e-16,\n 4.0848e-17, -2.0041e-16, 1.3710e-16, -8.3504e-18, 2.3915e-17,\n -3.8849e-17, 3.1536e-17, -2.8959e-17, -1.3280e-16, 2.4129e-17,\n -4.4575e-17, -4.5748e-17, 5.2093e-17, -3.8637e-17, -2.4193e-17,\n -2.1470e-17], device='cuda:0')", | |
| "exp_avg_sq": "tensor([9.4996e-11, 7.5769e-12, 3.6706e-12, 2.1233e-12, 2.5804e-10, 5.9092e-11,\n 5.9533e-12, 2.2520e-10, 6.7360e-10, 1.2736e-10, 8.7820e-14, 4.9800e-10,\n 6.8883e-10, 1.8200e-11, 2.2385e-11, 9.2562e-10, 2.0795e-09, 6.3584e-13,\n 6.9524e-11, 1.6844e-12, 8.2374e-10, 5.1995e-12, 8.5453e-11, 1.7078e-11,\n 2.4814e-11, 3.7412e-09, 1.1330e-10, 1.7749e-10, 1.8646e-10, 8.1517e-11,\n 4.1836e-09, 8.6247e-10, 6.1126e-10, 3.2126e-11, 2.0133e-10, 9.6009e-10,\n 1.4197e-11, 2.0101e-09, 3.3631e-10, 6.7620e-11, 5.1657e-11, 1.6805e-11,\n 8.7024e-11, 1.0766e-11, 4.8780e-10, 4.6218e-09, 4.1446e-13, 9.2650e-10,\n 4.2745e-10, 5.6265e-10, 2.8902e-11, 1.3180e-12, 1.0766e-09, 2.6140e-09,\n 1.3039e-12, 1.4712e-10, 7.1593e-11, 1.4865e-10, 1.0248e-11, 5.9279e-10,\n 1.8466e-11, 3.5504e-11, 1.4027e-09, 1.4305e-11, 6.0993e-12, 1.4519e-09,\n 6.0651e-11, 2.1497e-10, 5.9009e-12, 1.9933e-10, 1.4279e-11, 1.0633e-10,\n 4.6061e-10, 1.1053e-11, 1.7665e-09, 2.7960e-11, 2.8210e-12, 5.2425e-11,\n 1.6183e-09, 1.0792e-09, 4.2064e-12, 1.8716e-12, 1.0197e-10, 1.9479e-11,\n 1.6181e-10, 2.8855e-11, 7.6803e-12, 5.2759e-12, 3.1738e-11, 3.5833e-11,\n 2.5678e-09, 7.2342e-13, 2.7026e-10, 1.9184e-12, 1.7652e-09, 4.1966e-11,\n 1.4348e-10, 6.1789e-11, 3.2469e-10, 1.3920e-10, 2.1006e-11, 1.1640e-11,\n 1.2147e-12, 3.7078e-11, 2.7413e-10, 3.2354e-10, 1.5830e-10, 1.1420e-11,\n 4.6984e-10, 2.8054e-11, 8.7594e-11, 1.4962e-11, 7.2365e-11, 5.2885e-13,\n 1.5343e-11, 1.0533e-10, 1.8438e-10, 2.4931e-11, 6.2691e-11, 8.6766e-12,\n 1.2155e-11, 3.3278e-10, 7.8852e-11, 4.7094e-10, 3.1269e-11, 5.0204e-13,\n 1.2744e-09, 1.1966e-12, 6.0913e-10, 2.7169e-09, 4.9025e-11, 4.6770e-10,\n 2.4233e-11, 5.3172e-11, 4.5347e-10, 1.3567e-10, 1.7647e-11, 3.5868e-11,\n 1.5371e-11, 1.3513e-11, 2.4088e-10, 6.8288e-10, 8.9511e-12, 9.6847e-13,\n 8.5667e-12, 1.5134e-12, 5.0066e-10, 7.0874e-10, 6.3044e-12, 1.0133e-11,\n 2.9886e-10, 1.4491e-09, 8.9208e-11, 2.5700e-12, 6.3060e-10, 6.0739e-10,\n 3.0142e-09, 1.3504e-10, 2.5979e-09, 1.8156e-12, 2.4989e-10, 1.2030e-09,\n 1.1643e-11, 1.4033e-12, 3.9159e-10, 3.3196e-10, 1.9987e-11, 2.0169e-10,\n 2.5072e-11, 7.1008e-13, 1.5744e-11, 1.3808e-10, 5.1479e-10, 8.1375e-10,\n 9.1554e-11, 3.3933e-11, 6.5603e-13, 3.2952e-11, 1.4943e-12, 6.7853e-10,\n 8.8313e-12, 6.9232e-12, 1.0047e-10, 2.7973e-12, 1.0828e-12, 3.2439e-10,\n 1.0209e-11, 1.4679e-10, 1.2395e-10, 1.8038e-09, 1.0314e-09, 4.1838e-10,\n 2.7220e-11, 9.3940e-12, 3.9699e-13, 1.4233e-10, 4.5748e-10, 2.1408e-10,\n 4.8606e-10, 1.8525e-10, 2.2425e-11, 6.1953e-11, 7.4251e-13, 1.4330e-09,\n 5.3358e-11, 1.0479e-12, 5.5299e-10, 7.7087e-13, 1.4478e-11, 3.6525e-11,\n 7.5149e-13, 9.3159e-12, 2.9166e-10, 4.8678e-11, 5.9533e-13, 2.2806e-12,\n 6.1629e-11, 6.8759e-10, 2.0959e-11, 6.0776e-12, 6.2146e-10, 8.1743e-10,\n 4.9028e-13, 1.6072e-09, 1.5704e-12, 2.0746e-10, 2.8848e-11, 6.1008e-12,\n 4.2115e-10, 7.9604e-13, 7.8716e-10, 5.2075e-12, 3.0648e-11, 1.5632e-12,\n 4.2095e-11, 6.1887e-11, 1.7045e-09, 2.1721e-09, 4.2194e-10, 1.0245e-10,\n 2.1375e-11, 2.7183e-09, 3.6235e-11, 2.0817e-13, 7.4806e-11, 1.0848e-12,\n 1.4355e-11, 2.5433e-11, 2.1413e-10, 2.6480e-12, 1.0605e-11, 4.3615e-10,\n 2.7646e-11, 1.1608e-12, 2.7390e-10, 4.6686e-11], device='cuda:0')" | |
| }, | |
| "42": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-1.6887e-19, 1.3600e-18, -2.7512e-20, 6.9022e-19, 2.9635e-17,\n 2.4584e-19, 7.3960e-21, 2.0741e-18, 2.7333e-17, 3.3951e-17,\n 1.2700e-19, 3.5882e-21, 1.3168e-17, 1.8047e-19, 2.5683e-18,\n 3.9744e-17, 1.8857e-17, -1.4418e-19, -2.3337e-19, -3.3237e-19,\n 3.3015e-17, -6.0888e-20, -2.9422e-20, 5.2773e-18, 7.2350e-19,\n 3.9220e-17, -8.4690e-19, 9.5606e-18, 1.2830e-17, 1.4340e-17,\n 2.6445e-18, 2.6474e-17, 4.7005e-18, 1.5366e-19, -5.0160e-20,\n -8.5298e-22, 6.4906e-18, 1.5080e-18, 1.0778e-17, 2.5740e-19,\n -1.7819e-19, 8.4148e-18, 1.1742e-20, 8.2459e-20, 1.9560e-18,\n 3.0030e-19, 3.1850e-19, 4.6390e-17, 1.6279e-19, 2.9299e-17,\n 3.8633e-19, 1.7479e-19, 3.4871e-17, 1.0480e-18, -8.7943e-20,\n -1.7955e-19, -3.1815e-19, 1.5581e-17, 1.6333e-18, 2.7993e-17,\n 2.8091e-20, 3.8499e-20, 2.9977e-17, 1.6127e-18, 4.4398e-19,\n 1.2437e-18, 1.1291e-17, -5.2524e-20, 3.9274e-18, -2.7235e-19,\n 3.3405e-19, -2.2175e-19, 2.1879e-18, 6.4142e-19, 1.9616e-17,\n 1.4873e-19, 1.5163e-19, -7.7430e-21, 1.9516e-19, 2.2326e-17,\n -1.7490e-19, 1.4416e-19, -1.2592e-19, -6.5650e-19, 3.7470e-17,\n 5.8309e-19, -9.3113e-20, -7.1144e-19, 4.1452e-19, 1.8411e-17,\n 1.3818e-17, 8.7793e-20, 1.8441e-17, -1.7664e-19, 3.5786e-17,\n 5.2109e-18, 3.0695e-20, 2.5852e-17, 1.3976e-17, 2.0503e-17,\n 2.8438e-19, 6.9468e-19, 5.2330e-19, 8.0125e-18, 4.2395e-20,\n 9.8878e-18, 3.1906e-17, -2.2430e-19, 1.0699e-17, 4.0728e-19,\n 4.9727e-18, -1.7537e-18, -1.0877e-19, 6.8916e-20, 3.7072e-19,\n 7.2088e-18, 4.1948e-17, 2.2759e-19, 6.5045e-20, 1.6422e-19,\n -9.9429e-20, 3.2763e-19, 1.2500e-17, 1.1830e-17, 2.0643e-20,\n -9.2436e-20, -3.5667e-19, -5.6642e-20, -1.7593e-21, -1.9676e-20,\n -2.1182e-19, 4.0045e-17, -4.4124e-20, 4.7857e-19, 5.3402e-18,\n 2.2743e-17, 3.7897e-18, 5.0730e-20, -4.1618e-19, -6.4730e-19,\n -9.0365e-21, 2.0176e-17, 1.2356e-19, 8.3420e-20, 1.4714e-19,\n 1.5987e-18, 1.1405e-17, 3.6382e-17, -2.0054e-19, -2.0442e-19,\n -1.4843e-19, 2.0132e-17, 2.5271e-21, -2.0586e-19, 2.8198e-17,\n 4.8871e-21, 3.4292e-17, 1.4053e-18, 2.6708e-19, -1.6374e-19,\n -9.8136e-20, 4.3498e-17, 3.3735e-19, 3.8617e-19, 8.4199e-18,\n -2.7175e-20, 4.9685e-19, 4.8601e-19, 6.2093e-19, 1.2140e-19,\n 1.0449e-18, 9.2871e-18, 3.8958e-20, 5.9189e-20, 1.8833e-17,\n 2.1434e-20, -2.9024e-19, 7.9209e-19, 1.0170e-21, 8.7524e-18,\n -2.9720e-19, -2.1443e-19, -6.2806e-19, -2.6291e-19, -2.2753e-19,\n -9.1495e-20, -3.4524e-19, 5.3990e-18, 6.2578e-20, 9.5924e-20,\n 1.3622e-18, 8.0444e-18, -1.7394e-20, 1.7218e-18, -1.2642e-19,\n 8.9306e-18, 1.0185e-17, 1.1599e-17, 3.7519e-17, 1.6764e-17,\n -8.7591e-19, 1.1616e-18, 5.0298e-19, 5.1256e-17, 1.4167e-17,\n -4.3199e-19, 5.0771e-17, 6.8023e-20, -2.0987e-19, 1.8693e-18,\n 6.2798e-20, 4.0174e-18, 1.8459e-19, 9.5958e-20, 2.4510e-19,\n -8.4252e-20, -1.7936e-19, 2.7677e-20, 1.7012e-19, 2.3416e-20,\n -1.7167e-19, -2.1565e-19, 7.9444e-19, 1.8059e-17, -1.6912e-19,\n -1.2607e-19, -5.7606e-20, -1.3312e-19, 3.7372e-17, -2.1189e-19,\n -3.1870e-20, 1.7638e-19, -2.3952e-19, -1.7737e-19, 1.1257e-19,\n -3.3188e-20, 7.8494e-20, 4.5763e-18, 2.1236e-17, 7.7357e-20,\n 5.0233e-19, 7.5551e-20, -1.2868e-18, 6.0996e-20, -1.1286e-19,\n 9.6776e-22, 9.0079e-19, 8.4333e-20, 2.4976e-17, 1.1983e-18,\n -7.4418e-20, 1.8266e-20, 1.3301e-17, -1.1725e-19, 5.6889e-18,\n 3.7626e-20], device='cuda:0')", | |
| "exp_avg_sq": "tensor([7.1914e-16, 2.3819e-14, 3.1998e-17, 1.1253e-14, 1.1244e-12, 3.6406e-16,\n 4.9342e-16, 5.2957e-14, 1.2935e-12, 1.0801e-12, 2.3845e-16, 8.1864e-16,\n 1.5223e-13, 6.5550e-15, 1.8285e-13, 2.1454e-12, 3.5154e-13, 2.9410e-17,\n 1.8517e-15, 2.4977e-15, 2.0506e-12, 4.7242e-16, 2.4002e-14, 9.8235e-16,\n 2.4340e-14, 2.5972e-12, 7.6620e-17, 8.2695e-16, 2.2621e-14, 4.0227e-13,\n 1.8724e-12, 5.9922e-13, 1.4277e-13, 2.1257e-15, 4.9857e-16, 7.5791e-14,\n 2.3329e-13, 4.7537e-13, 5.7531e-13, 1.2333e-16, 1.3205e-15, 6.4056e-15,\n 1.0629e-14, 3.6241e-16, 1.4520e-13, 3.2904e-13, 7.0973e-17, 3.4028e-12,\n 1.5103e-14, 1.1553e-12, 5.8573e-15, 2.4102e-17, 1.8199e-12, 9.1333e-13,\n 3.5223e-15, 5.0369e-17, 2.1119e-14, 6.7916e-14, 2.5480e-14, 2.8285e-12,\n 1.2968e-15, 3.1034e-15, 1.3194e-12, 2.7513e-14, 1.5135e-14, 3.5364e-13,\n 5.4799e-14, 1.4662e-15, 2.5295e-16, 1.8490e-16, 1.0877e-14, 2.0757e-17,\n 2.4260e-14, 3.9594e-17, 4.6464e-12, 4.3668e-18, 5.3099e-17, 1.2712e-15,\n 1.6502e-13, 3.6057e-13, 6.2973e-16, 6.0945e-15, 4.1866e-15, 1.7724e-16,\n 1.4082e-12, 1.8186e-17, 3.0787e-16, 9.5593e-18, 3.1945e-15, 1.8461e-13,\n 3.2720e-12, 3.8430e-17, 7.1518e-13, 3.2616e-17, 5.2212e-12, 8.5226e-14,\n 7.9405e-17, 1.0559e-12, 5.4211e-13, 6.3320e-13, 5.7035e-15, 1.3711e-14,\n 8.6781e-15, 8.8993e-14, 1.5068e-15, 3.7970e-14, 1.5489e-12, 4.8655e-19,\n 3.4559e-13, 1.2588e-17, 1.0525e-14, 5.6628e-17, 2.3095e-15, 1.5857e-16,\n 1.0628e-14, 1.5815e-13, 1.7690e-12, 8.1469e-15, 4.5183e-18, 4.0651e-15,\n 1.9349e-15, 1.4493e-14, 5.5708e-13, 1.4302e-13, 4.2621e-16, 4.2284e-16,\n 8.5692e-14, 1.5703e-16, 1.7559e-14, 5.6720e-13, 1.5808e-15, 2.0777e-12,\n 3.2186e-15, 1.7768e-17, 1.9499e-13, 1.2623e-13, 1.5767e-13, 5.4662e-15,\n 1.3750e-17, 4.9923e-18, 4.0536e-15, 2.0344e-12, 3.4997e-16, 1.0486e-16,\n 1.4378e-16, 9.6124e-17, 5.3906e-14, 1.8725e-12, 1.5165e-15, 1.9161e-17,\n 1.4355e-14, 2.7550e-13, 1.0605e-16, 1.3519e-15, 1.6082e-12, 3.8251e-14,\n 6.1930e-12, 1.6487e-14, 8.3722e-14, 5.5571e-17, 2.0135e-14, 2.2133e-12,\n 3.7731e-16, 1.1015e-14, 5.2900e-14, 2.2293e-15, 5.5691e-15, 4.5493e-14,\n 1.2745e-16, 1.0488e-16, 3.0607e-14, 2.6877e-14, 1.2328e-14, 5.9030e-14,\n 5.5117e-13, 4.5065e-18, 7.5596e-16, 1.2951e-14, 4.3565e-16, 2.0592e-13,\n 2.9328e-15, 5.9428e-16, 3.1589e-16, 4.8516e-16, 9.5883e-19, 8.7057e-15,\n 6.4439e-18, 1.9739e-13, 7.8212e-17, 2.3121e-13, 5.0725e-14, 3.1780e-13,\n 2.0701e-15, 4.0906e-14, 3.9503e-17, 1.4369e-13, 1.9730e-13, 2.6546e-13,\n 1.0832e-12, 1.4001e-12, 2.7846e-17, 9.1207e-16, 1.3683e-17, 4.4412e-12,\n 3.6795e-13, 1.7072e-16, 2.8381e-12, 9.3156e-18, 9.4549e-17, 1.6022e-16,\n 1.3875e-16, 1.9793e-13, 3.6047e-15, 2.8124e-17, 1.2891e-17, 4.9386e-17,\n 1.1026e-16, 1.6105e-14, 3.3814e-15, 1.2064e-15, 2.2461e-14, 3.9035e-14,\n 1.5388e-17, 9.4098e-13, 3.5753e-16, 1.4287e-15, 3.5002e-18, 7.2735e-16,\n 1.7081e-12, 7.3686e-17, 5.2319e-14, 1.5824e-16, 2.5573e-16, 8.9910e-17,\n 3.1927e-16, 1.4953e-17, 2.0108e-13, 9.2863e-13, 1.0758e-12, 2.3355e-15,\n 3.5477e-17, 4.2429e-13, 2.9272e-16, 9.2810e-18, 5.8546e-17, 1.1424e-16,\n 1.9200e-14, 4.3378e-16, 1.3461e-12, 2.9432e-16, 2.4225e-16, 2.7780e-15,\n 6.7908e-14, 1.2658e-16, 1.6885e-14, 4.4287e-15], device='cuda:0')" | |
| }, | |
| "43": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 1.2427e-18, -4.5071e-19, -1.5182e-19, -1.0762e-19, 1.7328e-17,\n -4.8811e-20, 7.8141e-19, 4.4998e-18, 1.8204e-17, 2.0103e-17,\n 4.6409e-18, 8.8088e-21, 1.4138e-17, 5.0873e-20, 4.4208e-18,\n 2.2280e-17, 1.8896e-17, 1.4775e-20, -5.6981e-19, 3.0396e-18,\n 1.8678e-17, 2.0125e-18, 2.9321e-18, 1.0077e-17, 5.7559e-18,\n 2.7437e-17, 4.4025e-19, 1.9340e-17, 1.8129e-17, 1.4004e-17,\n 4.4413e-18, 2.3225e-17, 4.9533e-18, -4.2594e-20, -1.2531e-19,\n 1.6503e-18, 9.5625e-18, 5.5921e-18, 8.1731e-18, -1.8624e-19,\n 9.9633e-19, 1.2233e-17, -6.5180e-19, -1.0926e-18, 7.0380e-18,\n -3.1683e-18, 1.0462e-21, 2.4666e-17, 1.2513e-18, 1.7483e-17,\n -3.3993e-20, -1.3744e-19, 2.0422e-17, 1.4475e-18, 8.6416e-19,\n 9.3958e-19, -5.5770e-19, 1.6542e-17, -1.2586e-18, 1.3879e-17,\n -1.8643e-19, -7.8209e-19, 2.0527e-17, -1.1566e-18, -2.7829e-21,\n 4.4381e-18, 1.1479e-17, -5.4950e-20, 7.5339e-18, 1.8526e-19,\n -5.8929e-20, 1.5273e-20, 2.7982e-18, -4.8980e-19, 9.8562e-18,\n -6.1370e-20, 1.1386e-19, -6.1768e-19, 4.3192e-19, 1.8817e-17,\n 9.4396e-19, 5.0415e-19, 1.3637e-18, 4.9393e-19, 1.9439e-17,\n -4.3229e-19, 1.0304e-18, 5.7742e-19, -2.4159e-18, 1.4890e-17,\n 7.8028e-18, -6.3046e-20, 1.7329e-17, 5.7684e-19, 1.7124e-17,\n 8.8703e-18, 3.6855e-21, 1.3954e-17, 1.6241e-17, 1.3339e-17,\n 1.1950e-19, -4.9925e-19, 1.0499e-18, 8.6362e-18, 4.8713e-19,\n 1.2085e-17, 1.6330e-17, 1.6396e-19, 1.6674e-17, -2.9613e-19,\n 7.6947e-18, 1.4124e-18, 8.9168e-19, 2.2499e-19, 2.2563e-19,\n 1.0825e-17, 2.0964e-17, -1.5990e-18, 2.7986e-19, 1.2232e-18,\n 1.7963e-18, 2.8972e-18, 9.6404e-18, 1.2666e-17, 2.6346e-19,\n 4.4519e-18, -9.8212e-19, 4.1461e-20, 1.8444e-19, 5.2566e-19,\n -3.2124e-20, 2.2251e-17, 5.4680e-19, -2.4719e-19, 9.2216e-18,\n 2.2003e-17, 5.0306e-18, -2.7846e-20, 2.1818e-19, 5.1925e-19,\n -6.7338e-19, 1.0435e-17, -1.2945e-18, 6.4860e-19, -1.2302e-19,\n 6.7534e-18, 1.3940e-17, 2.0234e-17, 1.3672e-18, 9.9201e-19,\n 1.6216e-18, 2.0562e-17, -7.6615e-20, -3.2615e-19, 1.7153e-17,\n -3.2965e-19, 1.7121e-17, 3.2007e-18, -1.8089e-18, 3.4455e-18,\n 1.3854e-18, 2.6573e-17, 3.8283e-18, 3.3449e-19, 1.0727e-17,\n 3.4747e-19, 2.9470e-19, 3.5530e-18, 4.6644e-18, 1.7113e-19,\n -6.8624e-19, 1.1994e-17, 8.4669e-19, 8.8707e-19, 1.2257e-17,\n 8.3150e-20, 2.1065e-18, -5.3620e-19, -9.0046e-20, 9.0221e-18,\n 1.2438e-18, 1.6093e-18, 4.6736e-19, 2.2922e-18, 1.2201e-19,\n 6.8337e-19, 1.5858e-18, 6.1218e-18, -2.9566e-20, 1.6379e-18,\n 2.5298e-18, 6.7615e-18, -1.1614e-18, -1.1266e-18, -1.0225e-19,\n 1.0441e-17, 9.6989e-18, 1.0247e-17, 2.4247e-17, 8.9139e-18,\n 6.0120e-19, 5.6155e-18, -3.8341e-19, 2.5662e-17, 1.1163e-17,\n 3.1442e-19, 2.4505e-17, 4.3773e-21, 1.7560e-18, 5.6998e-18,\n -3.2623e-22, 7.1675e-18, 6.8464e-19, -5.8184e-20, 1.3494e-19,\n 1.2786e-18, 1.3816e-18, -2.1511e-18, -9.9601e-19, -1.0262e-19,\n 1.4079e-18, 1.9565e-18, -6.3358e-19, 1.3711e-17, 5.8501e-19,\n 6.9512e-19, 3.6229e-19, 1.4071e-18, 2.2284e-17, 9.9410e-19,\n 3.9408e-19, 4.3903e-18, 1.8627e-19, 2.4158e-18, 3.3733e-19,\n 3.7569e-19, 1.4711e-18, 8.0524e-18, 1.2847e-17, 5.3632e-19,\n -3.6532e-19, 4.6113e-19, 9.6309e-19, 2.2482e-20, 6.3676e-20,\n -3.2621e-19, -5.4877e-19, 1.6150e-19, 1.3664e-17, 6.0171e-18,\n 1.1738e-18, -4.6913e-20, 1.2547e-17, 8.2083e-19, 8.3250e-18,\n 2.9908e-18], device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.6134e-14, 1.0898e-14, 1.9157e-17, 6.1989e-15, 4.6074e-13, 1.6127e-16,\n 2.8316e-16, 4.0509e-14, 9.4072e-13, 5.3790e-13, 1.0886e-14, 1.3160e-13,\n 6.3349e-13, 3.0694e-15, 1.3578e-13, 1.1527e-12, 1.3582e-12, 5.7353e-18,\n 8.8421e-16, 3.9400e-14, 1.1084e-12, 2.1376e-16, 6.0472e-14, 6.1291e-14,\n 7.8392e-14, 2.5703e-12, 1.1551e-14, 1.8445e-13, 2.5081e-13, 2.8135e-13,\n 1.0645e-12, 9.0263e-13, 4.9096e-13, 1.1195e-15, 2.5868e-14, 1.6989e-13,\n 1.7662e-13, 5.8168e-13, 4.4460e-13, 2.0763e-14, 1.2515e-14, 9.3403e-14,\n 4.8678e-15, 1.5348e-16, 2.0419e-13, 1.2914e-12, 2.4268e-15, 1.3444e-12,\n 1.8565e-14, 8.0256e-13, 2.6954e-15, 9.4345e-16, 1.1615e-12, 4.2162e-13,\n 2.0652e-15, 3.9760e-16, 9.1205e-14, 2.6261e-13, 1.3089e-14, 9.9100e-13,\n 6.8769e-16, 1.6359e-15, 1.2874e-12, 1.1936e-14, 7.7276e-15, 3.3683e-13,\n 1.8723e-13, 4.2697e-14, 4.5788e-14, 4.6468e-14, 5.4063e-15, 6.6925e-15,\n 1.2551e-14, 2.8551e-15, 1.6689e-12, 2.7387e-15, 2.0837e-15, 6.6149e-16,\n 1.0476e-13, 8.8228e-13, 3.3481e-16, 3.0176e-15, 3.8969e-14, 5.0704e-15,\n 5.8746e-13, 4.9134e-15, 1.5493e-16, 4.0480e-16, 1.7583e-15, 2.3408e-13,\n 1.6053e-12, 6.5632e-16, 4.4142e-13, 1.3536e-17, 1.7577e-12, 1.7464e-13,\n 4.1144e-14, 4.5438e-13, 2.8341e-13, 4.3880e-13, 2.9365e-15, 6.8832e-15,\n 4.2186e-15, 1.5901e-13, 4.5991e-14, 3.2800e-13, 6.1708e-13, 1.4866e-17,\n 3.0081e-13, 2.1479e-15, 1.3936e-13, 2.1288e-15, 2.0609e-14, 3.9578e-17,\n 4.8667e-15, 2.2612e-13, 6.7441e-13, 4.2011e-15, 6.8098e-16, 2.1296e-15,\n 1.0647e-15, 2.0844e-14, 2.3373e-13, 4.7292e-13, 1.2066e-14, 5.2152e-16,\n 1.0659e-13, 8.8981e-17, 7.2295e-14, 5.9218e-13, 7.1427e-16, 9.2574e-13,\n 1.6790e-15, 1.4401e-14, 1.1399e-13, 3.0643e-13, 1.7958e-13, 2.5939e-15,\n 6.8139e-16, 2.1558e-16, 8.4165e-15, 8.3276e-13, 1.5694e-16, 1.5259e-14,\n 4.4811e-15, 2.8623e-14, 4.4626e-13, 9.9721e-13, 8.2265e-16, 8.9902e-15,\n 6.8185e-14, 1.0534e-12, 9.9835e-15, 2.7065e-16, 9.0188e-13, 8.0982e-14,\n 2.4488e-12, 1.0327e-13, 6.5599e-13, 3.1213e-15, 6.7150e-14, 1.3619e-12,\n 1.6247e-14, 5.9151e-15, 3.4204e-13, 2.0376e-14, 2.6201e-15, 6.5503e-14,\n 3.5712e-14, 3.4395e-17, 1.3600e-14, 2.1538e-13, 3.2574e-14, 1.0629e-13,\n 3.4100e-13, 1.0468e-15, 3.0208e-16, 5.2605e-15, 9.7331e-17, 4.9104e-13,\n 1.5217e-15, 3.2662e-16, 1.2905e-14, 2.0288e-16, 4.9241e-19, 9.4849e-14,\n 1.1123e-14, 9.1501e-14, 1.7155e-14, 4.6939e-13, 4.0071e-14, 1.2185e-13,\n 1.0597e-15, 1.6828e-14, 2.3866e-16, 1.4068e-13, 4.8266e-13, 3.7010e-13,\n 7.9597e-13, 5.2832e-13, 1.0353e-15, 5.0699e-14, 1.9759e-16, 1.6625e-12,\n 2.4968e-13, 6.9045e-17, 1.0543e-12, 3.2506e-18, 1.4036e-14, 5.6108e-14,\n 1.8717e-17, 1.5773e-13, 3.9344e-15, 9.1345e-15, 1.1373e-16, 4.3913e-15,\n 1.2503e-14, 7.0777e-14, 1.8645e-15, 5.1248e-16, 1.3724e-13, 1.9906e-13,\n 1.9854e-16, 1.2053e-12, 1.1987e-16, 7.0801e-14, 1.7309e-15, 3.2990e-16,\n 8.2807e-13, 6.9812e-17, 1.8020e-13, 7.5480e-15, 9.4433e-15, 4.0991e-15,\n 1.3931e-16, 8.3856e-16, 3.9370e-13, 5.0031e-13, 6.2557e-13, 2.1146e-14,\n 4.2369e-15, 6.4665e-13, 8.6270e-15, 2.0273e-16, 1.0962e-14, 3.6944e-17,\n 8.7281e-15, 2.3281e-16, 6.3344e-13, 1.1757e-14, 1.2641e-16, 8.5553e-14,\n 1.7647e-13, 3.1550e-17, 2.4777e-13, 2.2517e-14], device='cuda:0')" | |
| }, | |
| "44": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[ 4.7223e-19, 2.7764e-19, 2.6759e-19, ..., 3.7111e-19,\n 1.8113e-19, 1.0892e-19],\n [-1.0262e-19, -1.0239e-19, 1.8850e-20, ..., 1.1319e-20,\n -3.5369e-20, -2.2764e-20],\n [-4.3228e-20, -2.1313e-20, 5.9858e-21, ..., -1.4232e-19,\n 2.3925e-20, -1.1264e-20],\n ...,\n [ 2.0976e-20, 2.2411e-20, -1.1458e-20, ..., -3.0719e-21,\n 1.2795e-20, 1.1000e-22],\n [-5.5341e-20, -4.7743e-21, -1.5562e-19, ..., -1.0769e-19,\n 1.0990e-20, 3.9360e-21],\n [-4.3868e-19, -2.0866e-19, 5.9240e-20, ..., -4.9556e-19,\n -2.9081e-19, -1.4601e-19]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.6455e-15, 5.2247e-16, 1.0786e-15, ..., 2.0194e-15, 1.5721e-15,\n 2.2057e-15],\n [3.6121e-17, 1.9157e-17, 6.9249e-18, ..., 5.7346e-17, 2.0906e-17,\n 5.1030e-17],\n [2.3892e-18, 4.3165e-18, 4.6274e-18, ..., 7.7974e-18, 2.8096e-18,\n 1.4195e-18],\n ...,\n [2.0567e-16, 6.2262e-17, 1.3843e-16, ..., 1.1925e-16, 2.2242e-16,\n 2.8358e-16],\n [7.5583e-18, 1.0159e-17, 3.6289e-17, ..., 4.7343e-17, 7.9216e-17,\n 1.5122e-16],\n [4.8236e-16, 2.4613e-16, 3.4620e-16, ..., 4.1368e-16, 6.5577e-16,\n 9.1928e-16]], device='cuda:0')" | |
| }, | |
| "45": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 2.6103e-16, -4.5251e-17, -7.6156e-18, 7.9594e-17, -1.6740e-16,\n -3.4543e-17, 4.0689e-17, -5.8184e-17, 2.5062e-16, 5.8163e-17,\n -2.1345e-17, -7.4002e-17, 1.1228e-16, 1.7702e-16, 1.4494e-17,\n 1.2036e-16, 1.6421e-16, -2.6153e-17, 6.1234e-17, -3.4264e-17,\n -1.6666e-17, -2.8895e-16, -2.9942e-17, -8.3594e-17, 2.0728e-16,\n 2.6700e-17, 1.8480e-17, 1.6380e-16, 1.2645e-16, 5.5510e-17,\n -1.1447e-16, 4.0199e-17, 9.4426e-17, -1.9952e-17, -2.3893e-17,\n -4.3072e-17, 2.5043e-17, -2.8057e-17, -2.6924e-16, 2.8118e-17,\n -1.6322e-16, -6.0795e-17, -9.6533e-17, -5.5495e-17, -6.3985e-17,\n -1.0015e-16, 4.6395e-17, 1.9091e-16, 4.9512e-17, 4.7906e-17,\n 1.2109e-16, -2.2627e-16, 1.2933e-16, 4.4481e-17, 1.5781e-16,\n -4.6750e-17, -1.1745e-16, -1.2035e-16, 1.0171e-16, -2.2037e-17,\n -7.2189e-17, -2.6564e-17, 4.1353e-16, 1.5567e-16, 2.4229e-16,\n -5.8563e-17, -8.6583e-18, 2.3381e-17, -8.0337e-18, -1.5015e-17,\n 1.9125e-16, -1.4355e-16, -8.9520e-17, -8.4905e-18, 4.3621e-16,\n 5.0794e-17, 1.0082e-16, 4.5367e-17, -1.8592e-16, 2.7290e-16,\n 6.4336e-18, -4.0163e-18, -3.1095e-17, -3.2836e-17, -2.2834e-16,\n -4.4528e-17, 1.1038e-17, 3.5711e-17, -2.6870e-16, -2.2775e-17,\n 8.4196e-17, -2.3823e-17, 1.0406e-16, -2.5047e-16, 4.5249e-17,\n -6.6439e-19, 8.8493e-17, -1.0474e-16, -3.1845e-17, 9.7154e-17,\n -2.5764e-17, -9.4552e-17, 2.8098e-16, 1.2471e-17, -1.4946e-16,\n -5.1584e-17, 2.2842e-16, -1.8275e-16, 5.8484e-18, -2.8095e-16,\n 1.2115e-16, -1.2818e-16, 2.3683e-18, 9.6343e-18, 4.8032e-16,\n 4.1232e-17, -4.9970e-18, 9.1808e-17, -3.8912e-17, 1.7908e-16,\n -2.8546e-17, -1.6757e-17, -2.2345e-17, -4.1744e-16, 5.6424e-17,\n 2.4377e-16, 2.9155e-17, -9.4168e-17, -1.1881e-17, -1.2193e-16,\n -1.0091e-16, 4.1441e-17, 1.0424e-16, -1.7823e-17, 3.1771e-17,\n -4.8821e-17, 1.7730e-18, -1.1640e-16, -4.6198e-17, 8.3681e-17,\n -1.0544e-16, 1.0686e-16, -8.6839e-17, 2.4285e-16, 1.4370e-16,\n 1.4017e-17, 3.5821e-17, 1.7301e-16, -2.6964e-16, 1.5426e-17,\n 1.5781e-16, 9.8733e-17, -1.2671e-17, -1.6228e-16, 3.4489e-17,\n 1.4361e-17, 4.0205e-17, 4.6320e-17, -1.0866e-16, 3.9224e-17,\n -8.9016e-17, 3.6497e-16, 2.3270e-17, 2.5566e-16, 1.2191e-16,\n -2.0124e-16, 1.4158e-16, 5.3695e-17, 1.9852e-16, -8.4909e-17,\n -2.5216e-16, -1.7145e-18, -3.9083e-18, 2.5254e-17, 1.3925e-17,\n -1.3038e-17, 3.7903e-19, 2.6253e-17, 4.6944e-17, -5.2509e-17,\n -2.0305e-18, -4.7112e-17, -1.2488e-16, 1.1717e-17, 4.9950e-17,\n 1.6353e-18, -1.1628e-17, -4.7972e-17, -5.6161e-17, -7.9779e-18,\n -1.8611e-16, -2.0306e-17, 4.4456e-17, 3.9415e-16, -8.7940e-17,\n -1.0721e-16, -3.4335e-18, -1.1606e-16, 5.9445e-17, -9.0137e-17,\n -1.0566e-16, 1.2969e-16, 3.8971e-17, 3.9964e-18, -1.2649e-16,\n -3.7615e-17, 2.4289e-16, -2.4656e-16, 2.1084e-17, 6.1353e-17,\n -7.0217e-17, 1.2055e-17, -4.9752e-17, -4.6188e-17, -3.2635e-17,\n 1.9663e-17, 2.2989e-17, -7.6138e-17, -2.8887e-16, -5.6769e-17,\n -2.3712e-16, -8.3923e-17, 3.2830e-17, 2.0085e-16, 3.4694e-17,\n -2.1337e-18, -2.2277e-17, 1.6596e-16, -4.0889e-18, -8.5110e-17,\n 1.6939e-17, -6.2282e-17, -1.6142e-16, 6.6407e-18, -7.6783e-17,\n 3.9284e-17, 2.0343e-17, -2.4830e-16, 4.8207e-18, 1.2119e-17,\n 4.7886e-17, -1.2965e-16, -7.8437e-17, -1.0675e-16, -1.6053e-16,\n -6.7849e-17, 4.2892e-17, -1.1119e-16, 1.7816e-16, 2.9133e-18,\n -1.4810e-16, -1.4494e-16, 1.7370e-17, 2.2952e-17, -2.8924e-17,\n -2.9081e-16], device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.6617e-10, 9.3975e-12, 2.8272e-13, 1.0328e-09, 6.7947e-10, 1.4540e-11,\n 1.0585e-12, 9.1469e-11, 9.8133e-10, 1.4914e-10, 1.5122e-10, 1.1084e-09,\n 1.0495e-09, 1.3127e-11, 3.5029e-12, 4.2171e-12, 4.9898e-12, 6.1356e-12,\n 8.4408e-12, 1.3433e-10, 6.6478e-13, 9.5917e-11, 2.1354e-11, 3.2578e-11,\n 2.8420e-11, 9.1112e-12, 4.1899e-11, 1.1839e-09, 1.7334e-10, 2.4791e-12,\n 3.1255e-09, 1.0788e-12, 7.2588e-10, 2.5626e-11, 1.6001e-10, 2.1111e-09,\n 1.6172e-11, 2.2567e-11, 3.3058e-10, 6.2868e-13, 1.2479e-10, 2.5440e-10,\n 4.4146e-12, 3.5062e-11, 2.5488e-12, 1.6818e-12, 5.3374e-12, 7.0419e-10,\n 1.0716e-12, 2.1258e-10, 1.3736e-10, 1.1456e-10, 1.0428e-10, 1.9518e-11,\n 2.3042e-11, 9.3280e-11, 1.7931e-12, 1.6990e-11, 1.6906e-09, 2.6435e-11,\n 3.4043e-10, 1.9183e-10, 6.4777e-10, 2.2584e-09, 1.0019e-09, 9.9075e-10,\n 6.3638e-14, 7.1226e-13, 4.8414e-12, 2.7641e-10, 1.9500e-09, 2.0109e-10,\n 5.3256e-11, 2.6044e-11, 2.8458e-10, 1.0340e-11, 8.1876e-12, 2.5664e-11,\n 7.9093e-10, 1.1820e-09, 3.2163e-13, 2.2548e-12, 8.0364e-11, 2.9559e-10,\n 7.1518e-10, 3.1502e-11, 1.0047e-13, 3.1307e-13, 1.4128e-11, 2.7130e-10,\n 5.2084e-10, 6.7106e-11, 3.9160e-10, 7.5564e-10, 1.7311e-12, 1.0200e-12,\n 1.7296e-11, 3.7755e-10, 1.9465e-10, 8.4737e-11, 2.2347e-10, 1.2585e-09,\n 7.7497e-10, 2.9774e-11, 2.7784e-10, 2.8899e-10, 4.2474e-11, 4.0061e-11,\n 1.1799e-12, 1.0395e-09, 3.2837e-10, 1.9320e-11, 1.3857e-10, 1.9734e-11,\n 5.0194e-10, 1.5667e-11, 3.4266e-13, 9.1899e-12, 1.1028e-12, 7.5168e-12,\n 7.0300e-13, 2.4096e-11, 2.9338e-12, 1.1292e-09, 3.8496e-12, 2.9219e-10,\n 1.0464e-10, 6.7024e-10, 6.8095e-11, 3.4225e-09, 8.9912e-11, 8.3718e-13,\n 9.6682e-11, 5.5273e-12, 5.4184e-12, 7.4369e-10, 1.6160e-12, 1.8997e-11,\n 2.4693e-11, 8.8954e-13, 2.8252e-10, 2.0118e-11, 1.0189e-12, 4.6862e-11,\n 1.2136e-09, 4.0977e-13, 2.6645e-11, 4.5410e-11, 6.4675e-10, 1.9284e-11,\n 1.5261e-09, 1.2664e-11, 6.7058e-11, 7.2196e-12, 1.6310e-10, 1.1994e-10,\n 5.8873e-12, 3.2090e-10, 2.1906e-09, 1.9173e-11, 1.9002e-10, 2.7330e-10,\n 3.8846e-10, 1.6006e-09, 8.8592e-11, 5.3136e-10, 4.5309e-10, 5.9672e-12,\n 5.7758e-10, 5.3060e-10, 1.2087e-09, 2.4063e-12, 4.7021e-11, 3.6406e-11,\n 9.0106e-12, 6.9066e-11, 5.4677e-13, 1.3127e-11, 5.6425e-11, 3.7687e-11,\n 5.8943e-10, 8.2304e-10, 2.4459e-11, 3.4302e-12, 1.8354e-11, 2.2654e-09,\n 1.2334e-11, 7.5592e-12, 6.1041e-10, 4.2583e-10, 6.1336e-10, 4.3176e-12,\n 6.5853e-13, 1.0356e-09, 5.7683e-11, 2.0923e-10, 2.0482e-11, 2.3421e-10,\n 1.2112e-09, 1.6705e-10, 2.0490e-12, 6.4075e-09, 4.1740e-12, 7.9771e-10,\n 5.7591e-10, 2.4322e-10, 4.5573e-10, 2.7144e-10, 2.1168e-11, 9.8014e-12,\n 9.0292e-10, 3.7026e-13, 2.2052e-10, 4.1796e-13, 6.0915e-11, 5.6946e-12,\n 2.7874e-12, 4.1684e-11, 7.1054e-10, 3.8439e-12, 1.5577e-09, 3.2081e-11,\n 5.1020e-12, 1.8279e-09, 4.8339e-13, 2.1426e-11, 2.1879e-10, 9.9246e-11,\n 1.6386e-11, 1.5444e-11, 1.5636e-12, 1.2969e-10, 3.5948e-11, 7.8461e-12,\n 5.7705e-10, 5.7472e-11, 6.6026e-13, 3.5979e-09, 2.6052e-12, 2.9326e-12,\n 2.0767e-10, 3.7142e-09, 3.2467e-12, 2.5113e-10, 4.8615e-10, 3.2402e-12,\n 1.4346e-09, 2.4301e-10, 4.6794e-10, 1.7528e-13, 4.9930e-11, 1.8659e-09,\n 1.5883e-11, 5.7732e-11, 8.7531e-12, 1.7155e-10], device='cuda:0')" | |
| }, | |
| "46": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 5.4985e-18, 7.5099e-19, -7.1761e-20, 1.7417e-17, 1.0680e-17,\n -1.4582e-20, 7.1492e-19, 2.3326e-18, 2.0752e-17, 9.7786e-18,\n 1.1812e-17, 5.2469e-20, 3.4303e-17, 1.3055e-17, -3.7379e-19,\n 1.4686e-18, 8.4971e-19, -1.1549e-20, -7.1837e-19, 6.5995e-19,\n 2.7548e-18, -2.6989e-19, 2.1055e-18, 2.2298e-18, 1.3400e-17,\n 3.7725e-18, -5.1959e-19, 3.0446e-17, 2.4762e-17, 4.7966e-19,\n 1.5060e-18, 1.7634e-18, 9.4321e-18, -2.0854e-20, -7.8881e-20,\n 3.1134e-18, 6.9915e-20, 4.0379e-20, 1.1272e-17, -4.8957e-20,\n 4.9264e-19, 2.0839e-17, 1.1152e-19, 3.7561e-20, -8.7760e-20,\n 3.4160e-19, 3.2080e-18, 1.4565e-17, -8.3034e-20, 1.1326e-17,\n 1.2753e-17, -5.1264e-20, 4.2452e-18, 1.6133e-19, 6.5751e-18,\n 3.0887e-18, 1.1988e-19, 1.1859e-18, 2.1199e-17, 1.4755e-18,\n -1.0185e-19, -1.1652e-19, 1.4030e-17, 1.7596e-17, 2.2994e-17,\n 2.5548e-19, 6.0443e-19, -2.2678e-19, 7.7654e-20, -8.6024e-20,\n 3.0250e-17, 2.6081e-19, 1.1747e-19, 1.4850e-19, 1.5119e-17,\n 1.2531e-17, 2.9655e-18, -2.4620e-19, 1.3751e-18, 1.8301e-17,\n 7.0804e-19, 4.6735e-19, -5.0898e-20, 4.1067e-18, 1.9459e-17,\n 8.0453e-21, 3.7855e-19, -2.7747e-19, 7.2738e-19, 2.0378e-17,\n 5.1207e-18, -2.0123e-19, 1.1510e-17, 4.1665e-20, 1.6288e-18,\n -1.1190e-19, -6.2655e-19, 2.5639e-17, 5.2140e-18, 8.3259e-18,\n 4.3309e-18, 2.2248e-17, 2.9402e-17, 9.7717e-19, -1.0246e-20,\n 7.4784e-18, 8.3625e-18, 3.6268e-20, 6.3321e-19, -3.9379e-19,\n 8.1253e-18, 1.5405e-19, -2.4833e-19, 1.9584e-19, 2.5220e-17,\n 2.3822e-18, 1.0611e-19, -1.3177e-18, -5.5227e-21, 3.9095e-18,\n -6.6834e-20, 2.4233e-20, -9.4602e-20, 9.6675e-18, -1.8678e-20,\n 2.1549e-17, -2.9232e-19, -7.3672e-22, -6.6123e-20, 1.3074e-18,\n 9.6904e-20, 1.7874e-18, 1.9298e-18, -1.2973e-19, 9.7134e-19,\n 2.7693e-17, 1.4943e-21, 1.8874e-19, -1.7084e-19, -9.1735e-19,\n -1.1716e-20, 1.4547e-18, 5.9919e-20, 2.6134e-17, 6.1303e-18,\n 5.1127e-19, 2.5929e-18, 3.9788e-18, -1.9016e-19, 3.9793e-20,\n 5.4066e-18, 2.3539e-18, 8.7365e-20, 3.3831e-19, 3.0702e-17,\n -3.8221e-20, 3.7604e-18, 6.1478e-18, 7.3990e-20, 3.0938e-19,\n 3.4813e-18, 1.2054e-17, 9.4812e-18, 3.5938e-17, 2.8729e-18,\n -4.0633e-20, 3.1372e-17, 6.6109e-18, 1.5437e-17, 4.1067e-19,\n 9.3329e-18, 2.3310e-19, 1.6370e-19, 7.1433e-19, 4.8504e-19,\n -3.9564e-20, 1.0358e-18, 1.0389e-18, -7.3568e-19, 3.5570e-19,\n 5.5390e-18, 4.5111e-18, 2.1826e-19, 1.6320e-19, -8.1299e-20,\n 5.1084e-18, 5.6564e-20, 7.8232e-19, 3.1436e-21, 2.3758e-19,\n 3.9373e-18, 1.0577e-20, -8.9380e-20, 2.2845e-17, 1.8437e-20,\n 1.3796e-17, 1.2314e-18, 1.2642e-17, 2.6725e-17, 5.2216e-18,\n 1.2744e-19, 2.8630e-17, 8.4340e-19, 3.0906e-17, 1.4136e-17,\n -6.8122e-20, 1.4074e-17, 2.9087e-18, 3.4254e-19, 1.6124e-18,\n 6.7993e-20, 9.1071e-19, -1.7133e-19, -4.5095e-21, 9.2296e-20,\n 6.2784e-19, 3.7708e-19, -1.3549e-19, 2.6644e-19, -3.9821e-20,\n -1.3674e-19, -7.8012e-21, 8.7941e-20, 1.6105e-17, 5.0687e-19,\n 1.0038e-18, 9.7630e-18, 1.8372e-18, -3.1371e-19, 1.7628e-18,\n 3.8750e-19, 2.4226e-18, 1.5668e-21, 7.7759e-19, 3.8303e-18,\n 2.5651e-18, -2.5030e-19, 3.7462e-19, 2.9622e-19, 2.1014e-19,\n 1.1623e-18, 1.4547e-18, 1.9174e-19, 6.9991e-19, 3.5451e-19,\n -2.3565e-20, 2.1081e-17, 1.8733e-18, 3.2692e-17, 1.1067e-19,\n 4.5087e-20, 2.3927e-18, 1.2048e-17, -2.0704e-19, 6.4385e-19,\n 1.7224e-18], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.0387e-13, 2.3195e-16, 2.0198e-16, 6.9582e-13, 6.0734e-13, 5.7586e-16,\n 1.7772e-17, 1.0205e-13, 8.4935e-13, 4.0113e-13, 6.0359e-13, 1.2646e-14,\n 3.4922e-12, 3.1860e-13, 4.9027e-17, 4.1572e-16, 2.5159e-16, 2.0705e-15,\n 5.1188e-18, 3.3735e-14, 9.9331e-15, 1.3226e-14, 1.0414e-13, 5.1501e-15,\n 1.1863e-13, 5.5282e-14, 4.7658e-15, 2.8163e-12, 6.2726e-13, 2.0481e-17,\n 9.9298e-13, 5.4618e-15, 1.3945e-13, 4.0749e-17, 1.8068e-16, 6.8011e-13,\n 1.0386e-17, 1.1890e-14, 9.2969e-13, 2.7282e-16, 6.2943e-14, 1.0532e-12,\n 2.8824e-15, 7.3068e-18, 1.7507e-15, 3.7786e-15, 4.4093e-17, 2.3570e-13,\n 3.1553e-17, 1.3281e-13, 9.8294e-14, 1.6597e-14, 4.0117e-16, 1.5277e-16,\n 5.0792e-15, 6.1473e-14, 1.1017e-16, 5.6107e-15, 1.6471e-12, 3.2799e-16,\n 1.8801e-14, 1.4699e-16, 7.9284e-14, 8.8317e-13, 7.7329e-13, 1.0625e-13,\n 1.2932e-16, 1.6234e-16, 2.7049e-17, 3.5226e-16, 2.3541e-12, 4.2977e-17,\n 2.6933e-15, 2.8107e-17, 3.4801e-14, 3.5846e-13, 8.3896e-17, 4.2509e-17,\n 3.4509e-14, 4.3776e-13, 6.3079e-17, 4.6416e-15, 3.2029e-15, 1.2601e-13,\n 2.3929e-12, 3.6075e-15, 1.4641e-16, 2.4573e-16, 3.7437e-15, 8.5014e-13,\n 3.7269e-15, 6.0602e-16, 2.1388e-13, 3.9803e-14, 5.9960e-15, 1.6170e-15,\n 1.2128e-16, 2.2698e-12, 3.1791e-13, 1.8140e-14, 9.3068e-15, 2.5426e-12,\n 1.1321e-12, 4.9941e-15, 1.0996e-14, 8.9320e-14, 3.5675e-15, 1.6032e-16,\n 3.3210e-17, 1.2230e-13, 2.8502e-14, 4.6590e-15, 2.3910e-15, 2.3091e-15,\n 4.2319e-13, 2.3722e-15, 3.5355e-15, 8.3905e-16, 5.9942e-17, 4.5827e-16,\n 8.2035e-16, 1.5512e-15, 6.9296e-17, 2.9020e-12, 5.2312e-18, 5.4909e-13,\n 3.3894e-16, 1.6380e-14, 4.7779e-15, 9.6523e-13, 2.0668e-18, 4.4081e-15,\n 4.5529e-14, 2.1668e-15, 7.5573e-17, 2.7266e-12, 1.3649e-16, 1.0515e-16,\n 1.6961e-15, 4.7519e-17, 1.4037e-14, 3.5897e-14, 2.2578e-16, 6.8189e-13,\n 6.2771e-13, 8.0755e-17, 4.3228e-14, 1.5226e-16, 1.1279e-13, 1.1483e-17,\n 6.6100e-13, 2.6235e-14, 2.1529e-17, 6.9494e-15, 1.5924e-12, 2.8050e-16,\n 2.1359e-14, 5.5374e-14, 3.9470e-14, 9.3109e-15, 2.5005e-13, 3.2596e-14,\n 1.3261e-13, 3.1712e-12, 9.6781e-16, 1.3207e-14, 1.5768e-12, 1.6234e-13,\n 2.2216e-13, 9.0874e-14, 1.1748e-12, 3.9961e-15, 1.7194e-17, 1.5953e-16,\n 6.2392e-15, 9.2996e-17, 3.0958e-15, 1.2483e-14, 4.4227e-17, 4.4125e-16,\n 4.9874e-13, 3.5757e-13, 1.8138e-15, 9.1472e-17, 1.9540e-17, 1.2872e-12,\n 9.9969e-16, 2.4383e-14, 4.1593e-14, 2.5716e-16, 3.7569e-14, 4.9950e-17,\n 1.7220e-16, 5.1443e-13, 1.9297e-14, 4.1644e-13, 9.3681e-15, 4.2983e-13,\n 3.0953e-12, 9.2157e-14, 1.6416e-16, 6.1167e-12, 1.1603e-17, 3.2051e-12,\n 8.4174e-13, 2.0119e-16, 1.4767e-13, 6.5596e-14, 5.6200e-15, 2.0649e-14,\n 5.5769e-14, 8.3769e-17, 2.2418e-15, 1.6825e-16, 1.8425e-15, 7.5456e-15,\n 1.2103e-17, 6.0689e-15, 2.8520e-14, 5.8881e-17, 2.3352e-13, 1.1678e-14,\n 9.1142e-18, 3.9039e-13, 1.2691e-16, 1.4144e-16, 1.1781e-13, 2.5736e-14,\n 1.6056e-16, 4.5484e-14, 7.9788e-17, 5.7755e-15, 1.3043e-14, 9.5001e-15,\n 1.3086e-13, 2.3826e-14, 2.2776e-16, 1.6269e-12, 2.9994e-15, 1.8049e-17,\n 6.8552e-14, 1.2611e-12, 2.7576e-15, 6.8074e-14, 1.2068e-14, 1.7843e-15,\n 1.4548e-12, 9.8821e-15, 1.6879e-12, 5.8807e-17, 8.9935e-15, 7.5358e-13,\n 3.5444e-13, 4.4148e-16, 3.2091e-16, 9.0406e-14], device='cuda:0')" | |
| }, | |
| "47": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 1.1779e-17, 3.4275e-18, -3.1097e-20, 1.2733e-17, 7.5127e-18,\n 4.2730e-20, -5.3629e-19, 4.8940e-18, 1.6253e-17, 1.0283e-17,\n 9.7704e-18, -2.3403e-19, 1.6932e-17, 1.3164e-17, 1.8623e-18,\n 7.2711e-18, 7.5982e-18, 5.9289e-20, 5.1563e-19, 3.7246e-18,\n -2.1648e-18, -7.2280e-19, 5.0016e-18, 4.5848e-18, 1.3701e-17,\n -2.5351e-18, 1.8951e-19, 1.6860e-17, 1.5111e-17, -3.7838e-19,\n 3.3725e-18, -1.2411e-18, 1.0554e-17, -2.7590e-20, -1.0035e-19,\n 5.4722e-18, -4.8836e-20, 6.1601e-19, 6.7351e-18, 4.1318e-20,\n 1.6286e-18, 1.1813e-17, -5.2036e-19, -1.5909e-19, 4.9329e-19,\n -1.3445e-18, 7.1748e-18, 1.3731e-17, 6.9990e-20, 1.0620e-17,\n 1.2097e-17, -1.4797e-18, 9.0538e-18, -1.2564e-19, 1.0480e-17,\n 5.6038e-18, -8.6545e-19, 3.0485e-18, 1.4007e-17, 4.9435e-18,\n 1.0867e-18, -1.7247e-19, 1.7344e-17, 1.3870e-17, 1.6611e-17,\n 2.1519e-18, -5.2674e-19, 1.4383e-19, 2.6286e-19, -2.6796e-20,\n 1.7404e-17, -1.5991e-18, 1.9004e-18, 6.0784e-20, 1.7844e-17,\n 1.0920e-17, 7.7616e-18, 1.9325e-19, 2.4643e-18, 1.5949e-17,\n -5.8273e-19, -9.4691e-20, 1.3846e-18, 6.3356e-18, 9.5106e-18,\n 1.3817e-19, -2.4159e-19, 2.2053e-19, -4.2612e-18, 1.2080e-17,\n 8.7685e-18, 9.5371e-19, 1.1399e-17, 1.1634e-19, -1.0559e-18,\n 1.3679e-18, 4.6681e-19, 1.2371e-17, 6.9625e-18, 1.0246e-17,\n 6.6304e-18, 1.1566e-17, 1.8416e-17, -6.7469e-19, 1.9903e-20,\n 7.7134e-18, 1.2413e-17, -5.4620e-19, 2.3812e-19, -1.1845e-18,\n 1.0613e-17, -1.6920e-18, 1.5687e-18, -7.8002e-20, 2.0910e-17,\n 6.6317e-18, 2.7054e-19, 1.0976e-18, 6.9499e-20, 9.7771e-18,\n 2.4334e-19, 1.2222e-19, 5.4216e-19, 4.9415e-18, 1.1638e-20,\n 1.6311e-17, 2.0332e-19, -3.5008e-21, -1.3341e-19, 3.0530e-18,\n -5.7090e-19, -1.3094e-18, 7.2758e-18, 1.0555e-18, -7.6072e-19,\n 1.3477e-17, 7.7291e-19, -1.1803e-18, -5.7563e-19, 7.3282e-19,\n 8.4789e-19, -9.7034e-19, -4.3218e-19, 1.7207e-17, 1.0212e-17,\n -1.8263e-19, -1.8321e-18, 9.7047e-18, -6.5474e-19, -2.4866e-20,\n 1.0118e-17, -1.5238e-18, 7.5838e-20, -2.1666e-18, 1.5139e-17,\n 2.8834e-20, -2.4746e-18, 8.7005e-18, -6.5658e-19, -2.1786e-19,\n 5.2714e-18, 1.5920e-17, 9.5259e-18, 1.9180e-17, 8.3451e-18,\n -2.5675e-19, 1.6772e-17, 8.8947e-18, 1.3919e-17, 2.3516e-18,\n 6.1839e-18, 3.6456e-19, -8.0035e-20, -5.3594e-19, -3.4723e-19,\n 4.4310e-19, -7.8235e-19, -8.2028e-19, 4.7365e-19, 2.8427e-18,\n 7.5509e-18, 6.4177e-18, -1.5432e-18, 3.9171e-18, 5.6094e-20,\n 7.2790e-18, 6.4715e-20, 3.7107e-18, 2.0032e-18, 1.3599e-20,\n 4.5072e-18, 2.0195e-19, 7.2629e-20, 1.8957e-17, -8.8536e-19,\n 9.3209e-18, -8.5712e-19, 8.8104e-18, 1.4710e-17, 6.3661e-18,\n -7.2631e-19, 1.6100e-17, -6.5743e-19, 1.4737e-17, 9.1453e-18,\n -1.8332e-19, 1.4349e-17, 3.2049e-18, -1.9896e-19, -1.1706e-18,\n 1.9616e-18, -7.2016e-19, 8.0909e-19, 8.2203e-20, 2.7792e-18,\n -4.5447e-19, -2.5750e-19, -9.1538e-19, -3.7228e-18, 3.0527e-19,\n -8.1509e-19, -2.1142e-19, -7.0198e-20, 1.4111e-17, -4.1529e-19,\n -7.6280e-19, 9.0033e-18, 8.3246e-18, 1.7947e-18, 4.0987e-18,\n -3.1896e-19, 4.8773e-18, -1.8717e-18, -4.3172e-19, 5.6055e-18,\n 6.6293e-18, 1.8638e-19, 9.2306e-19, -1.0520e-19, -7.1427e-20,\n 5.5032e-18, 3.1313e-18, -7.0360e-19, 2.5960e-18, -1.7307e-18,\n -4.8663e-20, 1.3194e-17, 3.7841e-18, 1.7541e-17, 1.4175e-18,\n 7.0886e-19, 3.7685e-18, 1.0386e-17, 2.1346e-18, 3.5864e-18,\n 1.8352e-18], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.2504e-13, 2.1218e-14, 1.1072e-16, 8.8268e-13, 6.0964e-13, 3.2931e-16,\n 5.7222e-17, 8.3647e-14, 9.9311e-13, 3.4220e-13, 3.8567e-13, 3.1115e-13,\n 1.3148e-12, 2.0017e-13, 9.9152e-15, 1.4691e-14, 1.0716e-14, 1.0633e-15,\n 2.1847e-16, 5.8959e-14, 5.3637e-15, 2.4967e-14, 1.0283e-13, 6.2850e-14,\n 1.8971e-13, 2.0093e-14, 2.4218e-15, 1.3406e-12, 4.4683e-13, 4.4252e-16,\n 9.0964e-13, 3.1619e-15, 4.8374e-13, 8.8016e-16, 3.3617e-14, 4.2102e-13,\n 1.2713e-15, 5.7762e-15, 4.1987e-13, 7.8681e-17, 7.7068e-14, 5.8712e-13,\n 1.4751e-15, 1.7579e-15, 9.3882e-16, 1.0077e-15, 4.2884e-14, 6.6850e-13,\n 3.5396e-18, 3.3083e-13, 2.2348e-13, 7.5604e-14, 1.1328e-13, 5.2184e-15,\n 7.6665e-14, 5.1590e-14, 4.7575e-17, 5.0463e-14, 1.2524e-12, 4.3162e-14,\n 8.7585e-14, 3.6307e-14, 5.3738e-13, 1.5366e-12, 9.3049e-13, 2.6871e-13,\n 4.4904e-17, 2.1773e-16, 7.1694e-15, 6.4218e-14, 1.5651e-12, 1.8345e-14,\n 1.5276e-14, 4.2944e-15, 2.9170e-13, 1.9466e-13, 2.9503e-14, 1.5501e-15,\n 2.6413e-14, 9.0710e-13, 5.7707e-17, 2.5521e-15, 4.2823e-14, 7.3208e-14,\n 9.3990e-13, 1.6185e-15, 5.0464e-17, 3.2707e-17, 1.9734e-15, 5.2263e-13,\n 3.3178e-13, 3.7952e-14, 4.9559e-13, 3.8467e-14, 2.0446e-15, 7.6691e-16,\n 4.2050e-15, 7.6187e-13, 1.8782e-13, 1.6465e-13, 1.8231e-13, 9.2134e-13,\n 8.6688e-13, 1.9634e-15, 3.4878e-14, 2.9351e-13, 1.0304e-13, 1.2969e-14,\n 4.0707e-15, 1.2723e-13, 2.8603e-13, 2.4892e-15, 2.1976e-14, 1.1576e-15,\n 6.2084e-13, 7.0463e-14, 1.8961e-15, 2.1660e-15, 2.1986e-17, 2.8463e-14,\n 4.1944e-16, 6.7959e-16, 4.0733e-15, 1.0523e-12, 3.9318e-17, 5.3485e-13,\n 1.9631e-14, 1.4493e-13, 2.0428e-15, 8.8586e-13, 1.9466e-15, 2.4478e-15,\n 1.0156e-13, 1.0776e-15, 1.6050e-15, 1.0478e-12, 1.1295e-14, 5.2289e-15,\n 9.4863e-16, 7.8058e-18, 5.3465e-14, 1.7781e-14, 1.1956e-16, 3.5142e-13,\n 4.0619e-13, 5.5607e-16, 1.5530e-14, 7.2152e-14, 1.1877e-13, 4.6273e-16,\n 4.1229e-13, 1.4799e-14, 3.5619e-15, 3.0192e-15, 5.7154e-13, 2.4748e-14,\n 1.1081e-14, 1.1204e-13, 5.9338e-13, 4.0620e-15, 1.6860e-13, 3.1674e-13,\n 3.5209e-13, 1.5346e-12, 7.0237e-14, 3.3862e-14, 7.1791e-13, 1.5043e-13,\n 4.6300e-13, 1.3637e-13, 1.0032e-12, 2.1744e-15, 4.1426e-15, 6.9416e-15,\n 3.2528e-15, 7.0335e-15, 1.5838e-15, 6.0645e-15, 6.5894e-15, 2.6830e-14,\n 2.5568e-13, 1.5806e-13, 9.9837e-16, 3.2106e-14, 8.9145e-16, 6.2731e-13,\n 5.1006e-16, 6.2359e-14, 8.9880e-14, 1.2574e-13, 2.1799e-14, 2.2255e-15,\n 4.5526e-17, 9.1077e-13, 9.1855e-15, 3.2736e-13, 3.9206e-15, 3.0600e-13,\n 1.3828e-12, 1.4510e-13, 7.6644e-17, 3.5973e-12, 3.3204e-16, 1.0651e-12,\n 6.0441e-13, 4.6092e-14, 4.7334e-13, 3.1544e-14, 2.5341e-15, 1.0024e-14,\n 1.7331e-13, 2.6595e-17, 2.7886e-14, 8.7754e-17, 1.3559e-14, 3.8576e-15,\n 3.7908e-16, 3.0418e-15, 1.3555e-13, 3.5602e-17, 3.6169e-13, 5.5520e-15,\n 4.8989e-16, 1.1603e-12, 1.0658e-17, 7.3306e-15, 3.0087e-13, 1.1777e-13,\n 2.8620e-14, 1.0579e-13, 3.9357e-16, 1.0258e-13, 6.6897e-15, 4.2594e-15,\n 6.5678e-14, 4.2522e-14, 2.0355e-16, 9.9366e-13, 1.7353e-15, 1.2900e-15,\n 1.0172e-13, 9.6875e-13, 1.2108e-15, 1.0768e-13, 1.0487e-13, 8.9908e-16,\n 1.1325e-12, 1.6601e-14, 8.1502e-13, 4.9161e-15, 2.5504e-14, 3.4735e-13,\n 2.3972e-13, 7.6660e-15, 1.6566e-14, 4.7048e-14], device='cuda:0')" | |
| }, | |
| "48": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[ 9.3519e-20, 5.0462e-21, -2.6107e-20, ..., 1.0409e-19,\n 2.3295e-20, 1.8624e-21],\n [-1.7548e-19, 1.2053e-20, 5.1671e-20, ..., 2.2006e-19,\n -7.4855e-20, -2.5719e-20],\n [-1.8075e-19, -1.3344e-19, -8.2590e-20, ..., -9.7240e-20,\n -7.7039e-20, -6.0814e-20],\n ...,\n [-1.1443e-19, -3.9936e-20, 3.7008e-21, ..., -1.0363e-19,\n -4.2080e-20, -3.4864e-20],\n [ 3.0713e-20, -5.4085e-20, -1.3722e-19, ..., -4.6571e-20,\n -1.2354e-19, -1.8233e-20],\n [ 2.7598e-20, -4.5913e-21, 2.9648e-20, ..., 1.0621e-19,\n 2.7535e-21, -1.4184e-21]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[5.0015e-16, 1.3490e-16, 3.1659e-16, ..., 3.2384e-16, 3.5463e-16,\n 5.0151e-16],\n [8.1454e-15, 2.3894e-15, 6.1127e-15, ..., 3.6791e-15, 8.6182e-15,\n 9.8296e-15],\n [4.2944e-16, 8.0911e-17, 1.9348e-16, ..., 2.6503e-16, 3.2263e-16,\n 5.7013e-16],\n ...,\n [2.1533e-16, 1.1220e-16, 1.6937e-16, ..., 8.2980e-17, 2.7606e-16,\n 1.4249e-16],\n [9.8541e-15, 2.9033e-15, 6.3484e-15, ..., 6.0652e-15, 9.5171e-15,\n 1.1100e-14],\n [1.5268e-16, 8.3415e-17, 1.4222e-16, ..., 8.4888e-17, 3.2047e-16,\n 2.9654e-16]], device='cuda:0')" | |
| }, | |
| "49": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 4.2972e-17, -2.0209e-17, -1.2506e-16, 2.3776e-16, -1.5726e-19,\n -5.0209e-17, 6.6287e-17, -2.9741e-17, 3.6545e-16, 5.0255e-17,\n -6.5718e-17, -1.0131e-16, 1.3718e-16, -7.0054e-18, -1.4160e-16,\n 4.1844e-16, 2.4302e-16, -2.6578e-17, 2.4442e-17, -2.4774e-17,\n 8.4520e-17, 7.5104e-17, -6.4881e-17, -4.1679e-18, -3.8800e-17,\n 3.6045e-16, 3.9022e-17, 3.9905e-16, 3.1755e-16, 7.8111e-17,\n -1.1286e-17, 1.9281e-16, 1.1495e-17, -3.7015e-17, -6.7442e-17,\n -7.8517e-18, -6.2088e-17, 2.0371e-16, -2.8217e-17, -7.6799e-17,\n -8.8473e-17, -4.9774e-18, -1.1000e-16, 1.3866e-17, 2.8963e-17,\n -1.1997e-16, -1.0892e-18, 2.9639e-16, -2.3318e-17, 1.8798e-16,\n -1.4088e-17, 5.6382e-17, 3.3575e-17, -2.4405e-16, -1.0407e-16,\n -3.4317e-16, -1.9931e-18, 1.5597e-16, -1.4022e-17, 2.7958e-18,\n -3.0705e-17, 5.2199e-17, 2.7264e-16, 9.8342e-17, -2.3141e-18,\n -1.2735e-16, 8.6884e-17, -9.3547e-17, 2.4096e-17, -6.4618e-17,\n 1.8074e-16, 5.5795e-17, -1.1145e-16, -2.0617e-16, 2.1695e-17,\n -2.7999e-16, -1.1172e-16, 5.7939e-17, -5.7005e-17, 3.1039e-16,\n -9.6634e-17, -4.5028e-16, -2.3488e-17, -1.5724e-16, 2.1413e-16,\n -1.9536e-16, -1.4584e-16, -3.8402e-20, -1.7667e-16, 6.1641e-17,\n 6.4324e-17, 4.5622e-17, 1.0305e-16, 3.6852e-17, 4.1957e-17,\n 1.8503e-16, -6.7285e-17, -1.4875e-17, -9.0845e-17, -2.8447e-18,\n -1.2374e-16, -2.4572e-17, 9.1831e-17, 4.2648e-17, -4.3180e-17,\n 3.7457e-17, 5.5843e-17, 6.2598e-17, 4.3527e-16, -3.8598e-17,\n 4.6978e-17, -1.8809e-16, -1.0445e-16, -1.2476e-16, 2.7144e-17,\n 1.0937e-16, 2.2188e-16, -7.2499e-17, 7.2032e-18, 2.7954e-16,\n -6.6887e-17, 4.7114e-17, -1.7564e-17, 8.6357e-17, 5.4002e-17,\n 7.9386e-17, -1.7632e-16, -1.5074e-16, -7.0694e-17, -1.6645e-16,\n -5.9654e-17, 1.9489e-17, -1.4477e-16, 5.3351e-17, 5.6368e-17,\n 3.3985e-17, -1.3816e-17, -7.6782e-18, -2.7664e-18, 2.0976e-17,\n -8.6296e-17, 1.0393e-16, -5.5593e-17, 3.0597e-17, -8.4204e-17,\n -5.8951e-17, -9.8480e-17, 3.1367e-16, -6.1540e-17, -5.8593e-17,\n 9.1150e-17, 2.2651e-16, -1.9579e-16, -3.9020e-17, 1.4993e-16,\n 5.9089e-17, 1.0335e-16, 3.0026e-18, -1.3656e-16, -5.4572e-17,\n -1.8174e-18, -1.0433e-17, -2.2272e-16, 2.8946e-17, -4.0109e-17,\n 1.4139e-18, 8.5634e-17, -7.7137e-18, -2.3382e-17, 2.2307e-17,\n 4.6686e-17, 3.0216e-18, -2.2190e-16, -1.2472e-16, -1.0493e-18,\n -1.0266e-16, 3.1279e-17, 2.0478e-17, 1.9868e-17, -2.0088e-16,\n -1.0869e-18, -7.6241e-17, 8.6177e-17, -2.5589e-18, 6.5149e-17,\n -1.5773e-16, -7.2758e-17, -1.5441e-18, -9.7853e-17, -2.7602e-16,\n -9.4238e-18, -1.5781e-16, -5.9963e-17, 2.8285e-16, 5.3272e-17,\n -2.0511e-17, 1.7728e-17, 2.9603e-17, 3.4274e-16, 2.2893e-17,\n -1.5098e-17, 7.0498e-17, -5.1143e-18, 2.5869e-16, 4.8694e-18,\n 4.9861e-17, 2.8116e-16, 2.2843e-17, -2.2160e-17, -1.3250e-16,\n -2.2056e-17, -1.6927e-17, -2.0198e-16, -1.0486e-16, -3.4095e-17,\n 9.0047e-17, -2.2854e-16, 6.6288e-17, -1.0038e-16, -2.4884e-16,\n -6.3564e-17, -1.1641e-16, 4.9008e-17, -3.5069e-17, -2.1239e-17,\n 5.5800e-18, 6.0488e-18, 2.5335e-16, 1.7583e-16, -1.5492e-17,\n -2.5463e-17, 2.1204e-17, -1.3165e-16, 5.0741e-17, -5.6985e-17,\n -1.2915e-17, -1.2264e-17, -3.0370e-17, 5.9703e-17, -4.8047e-16,\n 1.7735e-17, -7.3288e-17, 7.5999e-17, 2.2953e-17, 5.0060e-17,\n 7.4198e-19, 4.5324e-17, -9.1798e-17, 3.6572e-18, -2.4156e-16,\n -1.8321e-16, 2.6515e-17, -1.5536e-17, -6.4679e-17, -6.4076e-17,\n -2.9413e-18], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.1016e-10, 1.8472e-09, 8.3803e-11, 1.2364e-09, 1.9034e-10, 6.6079e-12,\n 8.3711e-12, 1.2420e-10, 6.5565e-10, 2.8613e-10, 3.5844e-11, 4.5756e-12,\n 1.2378e-10, 1.6076e-11, 7.0230e-11, 9.1903e-10, 1.9401e-09, 1.3114e-11,\n 5.3580e-13, 1.9861e-11, 1.6631e-10, 2.1455e-11, 2.3557e-11, 4.2057e-11,\n 1.1412e-11, 7.7243e-11, 4.0362e-11, 2.8942e-09, 7.3864e-11, 3.9697e-11,\n 3.4725e-10, 7.4171e-10, 4.3589e-11, 1.7269e-11, 1.9994e-10, 1.1693e-10,\n 9.0539e-11, 1.1925e-09, 6.0186e-12, 4.6064e-10, 4.1208e-11, 8.3232e-12,\n 9.9418e-12, 7.8993e-11, 2.3197e-11, 8.8664e-12, 3.9193e-12, 5.1060e-10,\n 1.0586e-10, 1.0676e-10, 2.3817e-12, 2.9696e-12, 2.7384e-09, 2.5410e-09,\n 1.3175e-10, 3.0842e-10, 3.7474e-13, 1.0181e-10, 5.6539e-11, 1.8747e-11,\n 2.2593e-10, 3.6985e-11, 1.5735e-09, 1.0607e-09, 1.0156e-09, 1.5464e-09,\n 2.2901e-11, 6.5652e-12, 5.0787e-13, 4.0077e-10, 1.3160e-09, 4.2124e-13,\n 9.7253e-11, 2.5006e-10, 2.0789e-11, 4.9591e-11, 3.6197e-11, 4.8479e-12,\n 2.2741e-10, 3.5274e-10, 1.2016e-10, 1.0897e-09, 1.1413e-10, 4.0655e-10,\n 7.0302e-12, 2.6119e-10, 1.1399e-10, 8.1058e-11, 5.0173e-10, 1.7848e-12,\n 1.4624e-11, 7.7736e-13, 2.7045e-10, 3.3688e-12, 5.4353e-12, 1.1579e-10,\n 5.6215e-12, 2.0103e-12, 5.0837e-11, 1.4520e-12, 9.3848e-10, 3.7877e-12,\n 2.0773e-13, 4.3769e-11, 3.8409e-10, 6.2162e-13, 8.1050e-11, 1.0342e-12,\n 4.1976e-10, 8.9416e-12, 6.4584e-13, 5.0936e-12, 2.5609e-10, 4.0937e-11,\n 4.5918e-10, 5.0140e-11, 4.1669e-10, 1.0167e-09, 2.2326e-11, 4.5650e-11,\n 3.0062e-10, 1.7042e-12, 7.4501e-12, 3.4045e-10, 2.9396e-12, 7.5061e-10,\n 1.4546e-09, 1.9261e-10, 2.4137e-10, 3.5164e-09, 6.7670e-11, 5.6997e-13,\n 1.8991e-10, 4.5061e-11, 3.8927e-10, 4.2517e-12, 1.9921e-12, 7.3868e-12,\n 4.8491e-11, 5.7796e-13, 4.6773e-12, 2.5076e-11, 1.1996e-12, 2.2662e-13,\n 1.9170e-09, 1.6158e-10, 1.7547e-09, 4.9786e-10, 5.5455e-10, 1.6765e-11,\n 7.8359e-10, 4.2250e-09, 1.9946e-10, 4.2175e-12, 2.1312e-10, 4.2489e-12,\n 2.0657e-09, 1.1021e-11, 1.6985e-09, 7.7820e-12, 1.1512e-10, 5.4088e-12,\n 3.4614e-10, 3.0337e-12, 1.1182e-11, 2.6092e-11, 2.9921e-11, 1.2253e-11,\n 1.4610e-11, 8.0310e-12, 2.0602e-12, 9.8876e-11, 1.0518e-09, 1.1137e-09,\n 8.0451e-13, 1.1932e-11, 6.2784e-10, 1.1696e-09, 1.3931e-13, 3.1572e-10,\n 6.6788e-10, 4.2085e-10, 3.1622e-11, 7.2562e-14, 4.6914e-12, 2.1507e-09,\n 3.3748e-10, 1.7682e-11, 9.6962e-10, 4.4823e-09, 2.0866e-11, 3.2438e-10,\n 7.6039e-11, 8.2182e-10, 1.4483e-10, 7.6462e-12, 1.1433e-11, 2.8733e-11,\n 6.5636e-10, 2.0198e-11, 2.3770e-13, 1.8989e-10, 3.1081e-12, 1.0075e-09,\n 3.9221e-13, 4.8054e-11, 1.2871e-10, 1.1593e-11, 1.0969e-11, 9.1633e-11,\n 2.4373e-11, 3.2088e-12, 5.5027e-10, 1.5603e-10, 5.6143e-11, 4.5222e-12,\n 1.1108e-10, 1.9882e-12, 9.7009e-12, 6.9105e-10, 5.8073e-10, 9.9201e-10,\n 3.3721e-11, 9.8036e-11, 1.4890e-11, 4.9955e-10, 1.3178e-11, 1.0578e-10,\n 1.2049e-10, 1.4934e-11, 2.0772e-10, 4.2361e-12, 1.8565e-11, 2.7873e-10,\n 1.4075e-10, 5.0724e-11, 2.5832e-10, 2.6458e-11, 3.5871e-10, 4.5298e-10,\n 1.5299e-10, 3.3903e-11, 2.4484e-11, 1.8966e-12, 3.2451e-12, 2.0558e-13,\n 3.1849e-12, 2.2685e-10, 1.6405e-11, 2.2558e-10, 5.6251e-11, 8.5142e-13,\n 3.3090e-12, 6.4797e-11, 2.0362e-09, 5.1029e-11], device='cuda:0')" | |
| }, | |
| "50": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 6.2376e-20, 1.8446e-17, -2.9771e-20, 1.8988e-17, 6.0303e-18,\n -1.8040e-20, 7.5048e-18, 1.4313e-18, 2.7289e-17, 3.0338e-17,\n -8.0056e-20, 1.3870e-19, 4.7696e-18, 1.1824e-18, 4.7231e-18,\n 2.5679e-17, 1.4565e-17, -9.7961e-20, -1.6905e-19, 2.2506e-20,\n 1.2786e-17, 8.3486e-18, 1.7877e-18, 3.7686e-18, 1.6972e-17,\n 5.3007e-18, -5.5153e-19, 4.2372e-17, 2.4029e-17, 8.0480e-20,\n 9.5282e-20, 2.9830e-17, 1.9414e-18, -1.7216e-21, 3.5702e-20,\n 1.2501e-19, 7.5660e-19, 3.8989e-18, -6.0244e-20, 4.9802e-19,\n 1.0118e-18, 5.1757e-19, 5.0002e-20, -1.6646e-19, 2.6160e-19,\n 1.9143e-19, 2.6311e-19, 3.7957e-17, -1.4693e-19, 1.1830e-17,\n 6.3144e-20, -2.5308e-20, 2.8549e-17, -2.7012e-19, 4.6466e-18,\n -7.1383e-19, -1.1032e-19, 1.2259e-17, 2.7860e-18, 1.8669e-19,\n -7.4585e-20, -3.7889e-19, 2.3508e-17, 1.2940e-17, 1.5834e-17,\n 1.6393e-18, 7.9984e-18, 5.0947e-20, 8.3005e-19, 2.4124e-20,\n 1.5705e-17, -5.8124e-19, 3.3752e-19, 7.1493e-19, 1.4398e-18,\n 1.7384e-18, 1.3516e-17, -4.5108e-19, -1.6133e-19, 9.3773e-18,\n 3.6023e-18, 6.3631e-18, -6.0928e-20, 1.2981e-18, 3.3077e-18,\n 1.8331e-18, 1.7305e-19, -1.0125e-19, 3.6110e-19, 1.7733e-18,\n 7.1614e-19, 7.7105e-19, 2.4027e-17, 2.8142e-19, 1.9803e-18,\n 9.2870e-18, 2.5826e-20, 1.6666e-18, 5.2096e-18, -2.8537e-19,\n 1.1767e-17, 6.9016e-20, 2.7681e-19, 1.0935e-17, -9.2493e-20,\n 3.2383e-19, 5.8367e-18, -2.8874e-19, 2.3068e-17, 5.4697e-20,\n 1.2825e-18, 3.8950e-19, -4.5557e-21, -2.7925e-20, 2.1701e-17,\n 6.6084e-18, 1.6076e-17, 7.4797e-20, 1.2018e-19, 9.0350e-18,\n 6.3742e-19, 7.1973e-19, 1.0187e-20, 8.4740e-18, 1.5102e-20,\n 2.3127e-17, -3.6734e-20, 8.8026e-20, 4.8662e-20, 9.7701e-19,\n 4.6374e-20, 1.9682e-18, 1.4644e-18, 4.8266e-18, 7.5595e-18,\n 2.4380e-18, -4.9178e-21, 7.6415e-20, -2.4451e-19, -3.3767e-19,\n 2.8174e-20, 1.6513e-18, 3.0074e-22, 1.2765e-18, 2.9605e-18,\n 2.6841e-17, 1.9724e-17, 1.4402e-17, -3.0666e-20, 2.1767e-20,\n 3.6236e-18, 3.0875e-17, -1.1332e-20, -4.1472e-20, 1.6513e-17,\n -9.4327e-20, 2.0762e-17, 1.6978e-19, 2.7269e-19, -6.6163e-20,\n 9.9907e-20, 8.7711e-19, 1.3104e-17, 1.0977e-18, 3.4371e-20,\n 6.7736e-20, 5.2319e-18, 2.9442e-19, 1.4653e-18, 5.3981e-19,\n 1.9552e-18, 5.3208e-18, 1.8104e-18, 4.5591e-19, 8.1472e-19,\n 1.7024e-18, 3.0072e-17, 2.9873e-17, -6.7036e-20, 1.1206e-18,\n 1.3476e-19, 1.8420e-18, -1.1044e-18, 1.7676e-19, -4.3839e-19,\n 2.6321e-18, 2.0514e-18, 3.2338e-19, 4.3332e-19, -2.0580e-19,\n 8.5001e-20, 3.9192e-18, 4.9718e-20, 2.1110e-17, -7.0067e-19,\n -3.6005e-20, 4.7125e-19, 6.4211e-19, 2.9983e-17, 4.5860e-19,\n -1.0102e-19, 2.9227e-18, 2.1796e-19, 2.7646e-17, 9.8034e-19,\n -5.1468e-19, 8.7831e-18, 5.4231e-19, -4.6064e-20, 1.5508e-18,\n 5.0545e-20, -1.4655e-19, 5.7798e-20, 3.8827e-18, 5.1904e-18,\n 1.4946e-18, 2.5426e-18, -5.9539e-19, 5.6959e-20, 1.0743e-18,\n -8.5566e-20, 1.9199e-19, 3.9808e-18, 2.0013e-18, 4.2540e-20,\n 2.4121e-18, 1.4651e-19, 7.9655e-18, 7.6580e-18, 5.3465e-20,\n -6.3778e-23, 5.8318e-19, 3.7873e-21, 1.5474e-17, -1.5357e-19,\n 1.3829e-19, 3.0556e-20, -6.9622e-20, 1.1123e-17, -1.9579e-18,\n 5.5287e-18, -1.9952e-20, -6.9401e-19, 3.4802e-19, -5.3414e-20,\n -2.1164e-19, 1.0584e-18, 1.9007e-19, 9.3979e-19, 1.5262e-17,\n 4.2082e-20, -1.3212e-20, -8.3410e-20, 6.0342e-19, 1.7732e-17,\n 8.7654e-20], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.1599e-14, 1.7099e-12, 8.4818e-15, 4.9837e-13, 1.8892e-14, 1.2642e-16,\n 5.7020e-14, 3.6097e-14, 6.9016e-13, 2.1909e-12, 8.1152e-15, 3.6111e-15,\n 3.3505e-16, 3.4177e-15, 1.6625e-13, 5.3354e-13, 3.5002e-13, 3.2412e-17,\n 3.2232e-17, 8.6831e-15, 2.3202e-13, 9.5329e-14, 7.8326e-14, 1.6130e-15,\n 3.7801e-13, 6.4174e-16, 4.1443e-15, 5.5640e-12, 5.4836e-13, 4.5658e-15,\n 4.5680e-16, 1.8534e-12, 2.3184e-16, 3.7644e-16, 1.9977e-15, 3.9722e-17,\n 1.9543e-14, 3.8829e-13, 6.1532e-17, 7.9149e-14, 4.7081e-14, 6.4655e-15,\n 3.0298e-15, 3.9405e-15, 9.3613e-17, 8.2019e-15, 3.4943e-15, 2.2854e-12,\n 1.0412e-15, 5.4949e-14, 8.2108e-16, 3.0049e-17, 4.9924e-12, 4.3946e-13,\n 6.2942e-14, 2.3807e-15, 9.0778e-17, 6.1558e-14, 1.9432e-16, 2.8972e-15,\n 1.4020e-15, 1.3414e-16, 1.0341e-12, 2.9771e-13, 8.9187e-13, 5.4999e-13,\n 1.2578e-14, 2.8519e-15, 2.3581e-16, 3.8613e-15, 3.1940e-13, 1.0750e-16,\n 1.3614e-15, 7.6057e-14, 1.8706e-14, 1.9603e-13, 4.2659e-13, 2.8703e-17,\n 3.0542e-16, 6.5049e-15, 1.9882e-13, 2.3002e-12, 9.3243e-16, 9.3145e-14,\n 3.1677e-16, 5.9232e-14, 1.5347e-14, 8.7107e-15, 2.6806e-15, 8.5964e-17,\n 1.3190e-14, 3.8737e-17, 1.3378e-12, 2.2929e-19, 1.3432e-14, 2.1252e-13,\n 4.5380e-15, 1.4088e-16, 2.0454e-13, 3.0321e-17, 7.6052e-13, 1.0231e-16,\n 1.8714e-16, 1.0319e-13, 3.3723e-15, 1.5847e-16, 6.4369e-15, 1.7426e-17,\n 8.6896e-13, 4.9439e-16, 1.5289e-15, 1.6786e-15, 9.4676e-15, 5.1671e-17,\n 9.9884e-13, 2.1574e-14, 2.1940e-13, 4.9226e-15, 1.0415e-15, 6.3390e-15,\n 7.5965e-14, 6.8139e-18, 3.7542e-16, 3.7931e-14, 2.9961e-18, 1.5049e-12,\n 7.1973e-14, 1.5490e-15, 9.8371e-16, 1.2998e-12, 3.2679e-18, 1.3759e-15,\n 1.2827e-13, 2.4667e-14, 3.0836e-13, 2.5001e-14, 2.0367e-15, 1.7245e-15,\n 1.6362e-15, 2.3615e-16, 8.9597e-17, 4.1868e-14, 1.3711e-16, 7.8609e-16,\n 9.1863e-13, 1.3976e-12, 2.6099e-12, 6.1996e-14, 4.3513e-14, 8.5484e-16,\n 2.3800e-13, 4.4681e-12, 5.4116e-16, 5.1355e-15, 2.5965e-13, 1.4244e-16,\n 1.5964e-12, 3.2068e-15, 2.5396e-14, 1.4511e-16, 7.1330e-17, 1.4641e-14,\n 9.5530e-13, 1.0249e-14, 2.3286e-16, 4.6677e-18, 2.0738e-15, 2.6735e-15,\n 1.5462e-16, 4.6478e-17, 4.8490e-15, 9.9357e-15, 2.0856e-13, 1.4410e-13,\n 1.6920e-15, 5.4772e-15, 2.2264e-12, 3.0530e-12, 1.5111e-16, 6.9966e-15,\n 6.9102e-14, 1.1380e-13, 4.9810e-17, 6.8155e-16, 6.0561e-18, 1.4100e-12,\n 3.1291e-14, 4.5229e-18, 1.3729e-13, 1.7537e-12, 1.2086e-15, 9.9373e-14,\n 2.5999e-16, 5.9343e-13, 2.6631e-16, 9.7640e-17, 5.3048e-15, 1.0785e-14,\n 1.1144e-12, 8.0573e-15, 1.2693e-16, 1.7323e-15, 5.7768e-17, 1.4997e-12,\n 2.0346e-16, 1.6798e-16, 3.9071e-15, 6.4494e-18, 8.0209e-17, 3.3542e-16,\n 6.3095e-15, 2.8906e-17, 3.9590e-14, 8.6004e-14, 3.5663e-14, 1.3702e-14,\n 8.8370e-14, 3.1704e-16, 3.7411e-15, 2.3340e-14, 4.6773e-14, 1.5764e-13,\n 8.8206e-16, 4.2311e-16, 3.1389e-15, 2.1494e-13, 4.5511e-18, 2.4078e-13,\n 6.0419e-14, 4.8303e-17, 5.5834e-16, 5.7064e-15, 8.4249e-15, 2.7174e-13,\n 1.1541e-15, 3.7551e-17, 2.4897e-16, 1.5583e-14, 1.4012e-13, 9.9210e-14,\n 2.2814e-13, 1.6676e-14, 5.2801e-17, 1.4969e-17, 6.3282e-17, 7.1023e-17,\n 8.5346e-15, 2.9245e-16, 1.0125e-14, 1.0828e-12, 5.4494e-15, 7.6673e-16,\n 6.4407e-17, 2.4143e-14, 2.3266e-12, 2.5989e-17], device='cuda:0')" | |
| }, | |
| "51": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 4.1163e-18, 1.1108e-17, 2.2251e-19, 1.4863e-17, 7.4277e-18,\n 5.9114e-21, 9.0091e-18, 4.2197e-18, 1.8489e-17, 1.4623e-17,\n 1.7384e-18, -6.3661e-19, 9.0691e-18, 4.6038e-18, 4.9301e-18,\n 1.8983e-17, 1.3732e-17, -1.8646e-19, 1.1691e-19, 2.7250e-18,\n 1.0999e-17, 9.5107e-18, 4.0613e-18, 6.2941e-18, 1.0555e-17,\n 1.3233e-17, 2.7534e-19, 2.1661e-17, 1.7221e-17, 4.7005e-18,\n 9.9840e-20, 1.6399e-17, 5.4273e-18, -2.6190e-20, -3.3459e-19,\n -1.5970e-20, 3.0800e-18, 9.5168e-18, 4.7092e-19, 2.3061e-18,\n 2.9999e-18, -4.9881e-20, -9.4393e-19, -1.5981e-21, -1.9943e-19,\n -1.3373e-18, 2.6578e-19, 1.9293e-17, 1.8651e-18, 1.2264e-17,\n 3.6287e-19, 2.0483e-20, 1.4062e-17, -5.7654e-19, 5.4900e-18,\n -1.3625e-18, -2.3764e-21, 1.1844e-17, 5.5686e-18, -1.8594e-20,\n 3.4139e-19, 3.1237e-19, 1.6359e-17, 1.1319e-17, 1.0689e-17,\n 3.1556e-18, 9.4993e-18, -4.9922e-19, -5.9992e-19, -2.4931e-19,\n 1.3176e-17, 4.3904e-19, 1.8264e-18, 1.5044e-18, -1.1307e-18,\n 1.8812e-18, 8.6962e-18, 3.5379e-19, 6.2141e-19, 1.3361e-17,\n 4.8988e-18, 3.2717e-18, 4.3165e-19, 2.4753e-18, 9.5614e-18,\n 2.5595e-18, 1.1414e-18, -1.0053e-19, -2.0915e-18, 6.1394e-18,\n -5.5048e-19, -5.9025e-19, 1.4107e-17, -2.1810e-19, -1.4171e-18,\n 1.1359e-17, 6.1155e-20, 4.8797e-18, 5.9001e-18, 1.7269e-18,\n 8.0252e-18, 2.0484e-18, 4.5311e-18, 9.8428e-18, 5.2083e-19,\n 3.3425e-18, 8.2160e-18, 2.2003e-19, 1.8685e-17, 4.5647e-19,\n -9.2080e-19, -2.2588e-18, 3.7436e-19, 1.1987e-19, 1.2454e-17,\n 9.2969e-18, 1.3943e-17, -6.6387e-19, -1.2602e-20, 1.2879e-17,\n 2.8859e-18, -5.4935e-19, 7.2136e-19, 9.6536e-18, -1.8552e-20,\n 1.3618e-17, -5.8145e-19, -1.0501e-18, -1.0744e-19, 2.0805e-18,\n -3.6405e-19, -1.4690e-18, 2.8288e-18, 7.7877e-18, 8.7957e-18,\n -1.7126e-18, 5.4634e-19, -5.3179e-21, -1.1352e-19, 1.4755e-19,\n -1.8755e-19, -1.0634e-18, -1.4749e-19, -9.3921e-19, 4.7355e-18,\n 1.2559e-17, 1.0479e-17, 1.4870e-17, 1.4617e-18, -1.0103e-19,\n 7.4740e-18, 1.7084e-17, -8.3721e-20, -3.6636e-19, 1.2931e-17,\n 7.3098e-20, 1.3296e-17, -1.2628e-19, -1.3913e-18, 1.1548e-18,\n -3.4599e-20, -2.0720e-19, 7.3721e-18, -8.1212e-19, 2.4062e-18,\n -3.0782e-21, 8.3807e-18, 1.1576e-19, 4.4447e-18, -3.7060e-19,\n -1.3695e-18, 7.1818e-18, 2.2672e-18, 1.7356e-18, -5.0130e-19,\n 3.6347e-18, 1.4183e-17, 1.4131e-17, 6.1895e-20, 1.9089e-18,\n 3.1728e-18, 3.8878e-18, 8.3030e-19, 1.6612e-20, 3.6624e-19,\n 3.8650e-18, 4.3523e-18, -1.7976e-19, 2.0448e-18, -2.3597e-19,\n 4.4057e-21, 4.4988e-18, -2.7299e-19, 1.5914e-17, 5.1261e-19,\n 3.9071e-19, -2.8912e-19, -4.6044e-19, 1.8740e-17, -3.2218e-19,\n -3.8288e-20, 7.0508e-18, -5.2201e-20, 1.6909e-17, -4.7793e-19,\n 3.9295e-19, 1.2748e-17, -4.2077e-19, 4.1911e-19, 3.0450e-18,\n 5.3035e-19, 8.6087e-19, 2.9338e-20, 5.0669e-18, 6.6676e-18,\n -1.0106e-18, 2.8071e-18, 4.8313e-19, -5.9746e-19, 1.5549e-18,\n 1.4454e-18, 1.5292e-18, 7.0218e-18, 4.6899e-18, 5.8210e-19,\n 5.5201e-18, -5.5763e-20, 1.1956e-17, 1.0740e-17, 2.7657e-19,\n 1.4653e-21, -4.4375e-19, -1.0620e-18, 1.1260e-17, 6.7899e-19,\n 1.3104e-19, 4.0731e-20, 1.0829e-18, 1.0113e-17, -2.1532e-18,\n 7.2980e-18, 2.7611e-19, 5.5391e-19, -2.8965e-19, 4.4133e-20,\n 4.6977e-20, -7.8955e-19, 1.9978e-18, -4.2554e-19, 7.8001e-18,\n -1.5824e-19, 7.2115e-21, 5.4218e-19, 2.9287e-18, 1.0420e-17,\n 2.0629e-20], device='cuda:0')", | |
| "exp_avg_sq": "tensor([7.8107e-14, 1.2745e-12, 2.8326e-14, 8.6165e-13, 1.8348e-13, 6.9607e-17,\n 9.4710e-14, 4.3225e-14, 7.1345e-13, 6.8446e-13, 4.5899e-14, 1.7897e-15,\n 1.2501e-13, 3.2633e-14, 8.1482e-14, 7.4300e-13, 1.1267e-12, 1.3423e-15,\n 5.0484e-18, 3.3346e-14, 3.2480e-13, 1.0541e-13, 8.5794e-14, 6.8664e-14,\n 2.2368e-13, 7.4404e-14, 1.9805e-15, 2.1831e-12, 3.1285e-13, 4.8550e-14,\n 9.7307e-14, 9.2567e-13, 4.4776e-14, 2.0658e-16, 3.5736e-14, 1.9492e-14,\n 2.8933e-14, 4.2339e-13, 4.1145e-15, 1.1283e-13, 6.0537e-14, 2.8854e-15,\n 1.4809e-15, 1.5023e-15, 6.3441e-15, 3.4481e-15, 1.8018e-15, 8.4802e-13,\n 4.6188e-15, 1.9624e-13, 4.4897e-16, 5.9815e-16, 1.9160e-12, 5.6519e-13,\n 1.0066e-13, 1.9215e-15, 2.0182e-17, 1.8959e-13, 6.2688e-14, 1.2682e-15,\n 5.3360e-14, 6.3653e-15, 1.1396e-12, 7.4438e-13, 8.3280e-13, 3.4353e-13,\n 9.2354e-14, 1.4771e-15, 6.9274e-17, 8.3390e-14, 8.5860e-13, 9.7983e-18,\n 2.2225e-14, 4.6831e-14, 7.4793e-15, 1.1187e-13, 2.2863e-13, 1.5080e-16,\n 1.2496e-14, 2.6335e-13, 1.0581e-13, 9.0607e-13, 3.0957e-14, 5.8427e-14,\n 2.0432e-14, 3.1692e-14, 2.1339e-14, 3.5741e-15, 5.7608e-14, 1.8199e-14,\n 7.2951e-15, 5.7423e-16, 5.8992e-13, 7.5145e-18, 6.8064e-15, 2.2635e-13,\n 2.3968e-15, 2.7644e-14, 1.8871e-13, 1.5168e-14, 6.1424e-13, 8.4979e-15,\n 2.2455e-15, 1.4384e-13, 6.3255e-14, 3.6627e-15, 1.1737e-13, 1.1700e-17,\n 5.2910e-13, 2.3638e-16, 4.1768e-16, 7.8039e-16, 3.4585e-14, 1.1463e-14,\n 6.4008e-13, 1.2789e-13, 4.5486e-13, 2.5510e-13, 4.7236e-16, 1.0467e-13,\n 1.1582e-13, 9.4652e-17, 2.0094e-16, 3.0049e-13, 4.3548e-16, 8.8255e-13,\n 2.6350e-13, 1.1429e-14, 2.5110e-14, 8.8365e-13, 1.6811e-15, 5.7698e-16,\n 8.7486e-14, 1.2638e-13, 1.2894e-13, 1.0726e-14, 1.1327e-15, 8.9491e-16,\n 7.2535e-16, 7.0793e-17, 4.5803e-17, 1.7076e-14, 5.3259e-17, 2.0922e-16,\n 5.3229e-13, 4.6110e-13, 1.3086e-12, 4.0515e-13, 1.3519e-13, 4.2535e-16,\n 2.0698e-13, 2.5448e-12, 1.7022e-15, 1.7033e-15, 3.2427e-13, 9.2728e-16,\n 1.3794e-12, 1.6963e-15, 4.0001e-13, 7.9918e-15, 2.1899e-14, 6.0532e-15,\n 3.8710e-13, 5.0580e-15, 1.4803e-14, 2.5249e-16, 6.7159e-14, 1.1679e-15,\n 3.0200e-14, 2.4870e-15, 2.1191e-15, 1.2103e-13, 9.3510e-14, 1.7286e-13,\n 7.4172e-16, 5.0668e-14, 7.4465e-13, 1.0176e-12, 1.1905e-17, 1.9142e-13,\n 1.8125e-13, 9.5430e-14, 2.9375e-15, 3.3653e-16, 2.9517e-16, 5.7053e-13,\n 2.2983e-14, 8.8005e-17, 1.7535e-13, 1.2070e-12, 6.1864e-16, 1.3897e-13,\n 6.6221e-15, 7.2764e-13, 2.9061e-14, 2.6344e-15, 2.5333e-15, 4.2198e-15,\n 7.8633e-13, 3.4486e-15, 2.2362e-17, 1.3027e-13, 2.8670e-15, 9.6159e-13,\n 2.4096e-16, 1.1093e-14, 1.5202e-13, 3.1662e-16, 3.1903e-15, 8.5265e-14,\n 3.0744e-15, 8.5967e-15, 6.1489e-14, 4.2452e-14, 8.9301e-14, 7.2986e-15,\n 7.4999e-14, 2.7784e-16, 1.9454e-15, 1.1513e-14, 1.2507e-13, 2.4293e-13,\n 6.2024e-14, 8.3903e-14, 1.3508e-15, 2.0621e-13, 3.5585e-16, 1.6558e-13,\n 2.0718e-13, 7.1188e-15, 4.5676e-14, 2.7019e-15, 4.4104e-15, 3.4328e-13,\n 9.1913e-15, 3.0881e-15, 5.4542e-14, 8.2604e-15, 3.2305e-13, 1.5771e-13,\n 1.6578e-13, 8.5870e-15, 2.4340e-15, 4.5875e-16, 9.5438e-16, 8.7357e-19,\n 4.6105e-15, 6.8563e-16, 3.8778e-15, 4.7712e-13, 1.6904e-14, 7.2798e-17,\n 1.4053e-14, 4.8281e-14, 1.3195e-12, 4.4480e-15], device='cuda:0')" | |
| }, | |
| "52": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[ 4.1478e-19, -4.5751e-19, -7.9602e-20, ..., -2.4848e-19,\n -1.4291e-19, -6.9824e-21],\n [ 1.3577e-19, -1.1775e-19, -4.7374e-20, ..., -8.8068e-20,\n -4.1327e-20, -4.6673e-20],\n [-2.0762e-19, 8.5875e-19, -4.0641e-20, ..., 7.4351e-20,\n 9.1060e-19, 4.7975e-21],\n ...,\n [-1.4597e-19, 2.1064e-17, -2.8408e-18, ..., -5.7316e-19,\n 2.8811e-17, -1.3147e-19],\n [ 3.4877e-20, 6.6023e-18, -1.1443e-18, ..., -4.0005e-19,\n 9.5091e-18, 3.9121e-19],\n [ 7.6115e-20, -1.8495e-17, 2.7099e-18, ..., 6.7525e-19,\n -2.5836e-17, -1.7326e-19]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.4525e-14, 4.2337e-14, 1.3856e-14, ..., 5.5712e-15, 5.1855e-14,\n 1.6473e-13],\n [1.1028e-15, 8.3874e-16, 9.4927e-17, ..., 9.8196e-16, 1.1406e-15,\n 5.0620e-15],\n [4.5982e-15, 3.1281e-15, 2.4253e-17, ..., 2.4698e-15, 6.5441e-15,\n 1.3863e-14],\n ...,\n [5.9592e-14, 9.5897e-15, 8.3519e-16, ..., 1.5692e-15, 5.8812e-14,\n 6.8000e-14],\n [3.8348e-12, 4.7650e-13, 1.3347e-14, ..., 2.8036e-13, 1.6933e-12,\n 4.7828e-12],\n [8.5048e-13, 3.0914e-13, 2.5127e-15, ..., 7.6231e-14, 9.0608e-13,\n 1.4690e-12]], device='cuda:0')" | |
| }, | |
| "53": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 2.6761e-19, 2.5861e-19, 7.4647e-19, -3.9514e-19, -2.7857e-18,\n 1.5997e-19, -2.0442e-18, 1.4400e-18, -1.5334e-18, -9.5001e-19,\n -4.2706e-19, -4.9663e-19, 2.3497e-18, 2.6341e-19, 8.1499e-19,\n 2.6080e-18, -1.0344e-18, 1.4514e-18, 2.0018e-18, 3.7250e-19,\n 1.8832e-18, 3.8996e-19, -1.3935e-18, -4.4217e-19, 6.9282e-19,\n 1.0950e-18, -2.6213e-18, 3.3064e-18, 8.2660e-19, 2.3649e-19,\n -1.1734e-18, -1.1267e-18, 4.3554e-19, -1.0981e-18, 2.1981e-19,\n 1.1525e-19, 1.0751e-18, 3.7592e-19, 1.2024e-18, 1.9871e-19,\n 5.6136e-19, -1.4416e-18, 1.7491e-19, -8.1167e-19, 9.8144e-19,\n 2.4153e-19, -1.7665e-18, 9.6958e-20, 6.5534e-19, -6.8519e-19,\n 7.7972e-19, 8.7344e-19, 4.8866e-19, 5.4247e-20, 7.4582e-19,\n -1.9856e-19, 1.8852e-19, -2.9427e-19, 7.8384e-19, -4.5869e-19,\n -1.2595e-18, -1.4369e-18, -5.1973e-19, -8.4623e-19, 1.4464e-18,\n 1.3832e-19, -1.6062e-18, 1.5288e-18, -8.5593e-20, -1.7421e-18,\n -1.9510e-18, 1.0295e-18, 2.5789e-18, -3.6621e-19, 5.6770e-19,\n 7.2789e-20, 5.2417e-19, -1.2558e-18, -1.5914e-18, -6.9277e-19,\n -7.2078e-20, -7.0288e-19, 1.3473e-18, -1.6905e-18, -1.3055e-18,\n 1.4015e-18, 7.2599e-19, -1.7105e-18, 1.3225e-18, 6.2649e-19,\n -1.7388e-18, 1.4138e-18, -2.6362e-19, 3.6949e-19, 2.5942e-18,\n 1.6476e-18, 1.0162e-18, -1.6453e-19, 2.8693e-19, -7.4632e-19,\n -1.4103e-19, -3.6307e-19, -1.9372e-20, -5.0395e-19, 7.6446e-21,\n 5.1150e-19, 1.3155e-19, -1.9558e-19, -3.5436e-20, 6.8613e-21,\n 6.7308e-20, -8.0714e-20, -2.2332e-19, 7.7540e-19, -6.0590e-20,\n -3.8145e-19, 4.5701e-19, -6.6640e-19, 1.5730e-19, 1.1711e-18,\n 8.4697e-20, -6.2729e-22, 2.1204e-19, 9.4240e-19, -8.6153e-19,\n 2.1065e-19, 2.9393e-19, -7.7487e-19, -1.5014e-18, -1.2319e-18,\n 8.1346e-19, -8.7188e-19, -2.2591e-18, 5.3341e-19, 4.0882e-19,\n -6.5522e-19, -4.5450e-19, 1.4779e-18, -5.7753e-19, 7.2202e-19,\n 7.9044e-19, 4.2932e-20, -3.9483e-19, -1.4767e-19, -8.6421e-19,\n -1.8732e-18, 9.9599e-19, 5.1998e-19, -1.7458e-18, 9.3410e-19,\n -1.6299e-18, -6.4317e-19, -6.2012e-19, -1.3713e-19, -7.7235e-20,\n -7.9739e-19, -2.2490e-18, 1.0938e-18, 5.5397e-19, -2.3469e-18,\n -1.0190e-18, 1.3440e-18, 1.5352e-18, 1.2175e-19, -1.3060e-19,\n -7.8297e-19, 1.5425e-19, -2.9090e-18, -1.1346e-18, 2.0385e-18,\n 1.0909e-18, 1.4131e-18, -1.6322e-19, 1.3291e-18, 2.7612e-18,\n 2.3848e-18, 1.1090e-18, 3.5221e-19, 2.4952e-18, -2.4981e-18,\n -1.1135e-18, 2.7619e-18, -1.3989e-19, -1.8762e-18, -6.8325e-20,\n 2.0195e-18, -7.4921e-19, -1.2584e-18, 1.2511e-18, -5.6031e-19,\n -2.8798e-19, -9.9180e-19, -4.7979e-20, 1.7502e-20, -2.2418e-19,\n 4.7230e-19, 7.5516e-20, -5.9482e-19, -5.2875e-19, 4.1214e-19,\n -1.9636e-20, 1.7545e-19, -1.9516e-21, -5.2638e-19, -3.3608e-19,\n 4.1931e-19, -1.3817e-19, 2.5175e-19, -1.2880e-19, -5.6140e-19,\n 4.6597e-20, 3.9905e-19, -3.2568e-19, 1.6867e-19, 2.2272e-19,\n 7.4078e-19, -3.8712e-19, -1.5930e-19, 4.3387e-19, 2.1328e-19,\n -2.8433e-20, -5.8232e-19, -9.6584e-20, -2.0642e-19, -1.4004e-18,\n 7.7002e-19, -1.6992e-18, 8.2251e-19, 1.0249e-18, 1.2143e-18,\n 1.9430e-18, -4.7534e-19, 2.7695e-18, 9.4577e-19, -6.1627e-20,\n 8.3151e-19, -3.1162e-18, 1.0450e-18, 2.4484e-20, -5.4648e-19,\n -5.9830e-19, 9.4161e-19, 8.4204e-19, 1.8196e-19, 5.9129e-19,\n 2.7971e-18, 2.6853e-18, -1.1717e-18, -4.1480e-19, -2.6977e-18,\n -1.9525e-18, -5.4210e-19, 3.5436e-19, 6.2924e-19, -1.2867e-18,\n -6.8140e-19, 5.2967e-26, 2.3275e-26, 4.5496e-26, 1.8624e-25,\n 2.0925e-25, 1.5335e-25, 2.6922e-25, -1.2636e-25, -8.4306e-26,\n 6.1487e-26, -1.7149e-25, 1.8808e-25, -6.6682e-25, -2.1579e-25,\n -2.2222e-25, -2.3859e-25, -5.7684e-26, -2.1900e-25, -1.3111e-25,\n -8.2714e-26, -8.2026e-26, -2.4927e-27, -2.7134e-26, 1.5883e-25,\n -2.3426e-25, -8.7231e-26, 1.8670e-25, -3.7305e-25, -5.5336e-26,\n 2.8131e-25, 1.5093e-25, 8.3668e-26, 1.2159e-25, -9.4063e-26,\n 1.7707e-25, -1.3573e-25, 2.5586e-25, 5.3187e-26, 1.8760e-25,\n -8.2201e-26, 2.2079e-26, -2.5713e-25, -1.0468e-26, -1.8647e-25,\n 4.6056e-26, -4.8721e-26, -4.2023e-26, 1.3577e-25, 1.3631e-25,\n -3.1377e-26, 2.0681e-25, -9.8957e-26, 8.8930e-26, -1.8530e-25,\n 8.9536e-26, 1.1377e-25, -1.3591e-25, 6.6037e-26, -2.7074e-26,\n -9.2175e-26, -8.8350e-26, -1.5131e-25, -1.9844e-26, -2.3213e-25,\n -2.2623e-25, 8.4792e-26, -1.8322e-25, -1.4291e-25, -2.0091e-25,\n -1.6045e-25, -7.8014e-27, 2.5138e-26, -2.0571e-25, -1.6183e-27,\n 1.1729e-25, 6.2185e-26, -1.2369e-25, -1.4291e-26, 7.2013e-26,\n -1.1134e-25, -3.5245e-26, 1.1561e-25, 3.1020e-25, -2.3991e-26,\n -7.6003e-26, 7.4056e-26, -1.3517e-26, 1.0498e-25, 1.8329e-25,\n 5.9882e-26, 2.3935e-26, 5.9205e-26, -3.4656e-26, 2.2411e-26,\n 2.6236e-25, 7.0755e-26, -1.5823e-25, 3.0371e-26, -3.0243e-26,\n 1.4768e-25, 1.8944e-25, -4.7039e-26, 6.0758e-26, 8.5768e-26,\n -4.5045e-26, -8.2475e-26, 7.4602e-26, -3.3364e-26, 6.7207e-27,\n 5.1775e-26, -6.4378e-26, -2.2127e-25, 1.2299e-25, -6.8030e-26,\n -1.6345e-25, -9.5251e-26, 9.9127e-26, 2.5350e-25, -7.7568e-26,\n 8.3467e-26, 2.7606e-26, 2.0462e-26, 7.0853e-26, 2.0536e-25,\n 1.7822e-25, 5.6269e-27, -1.0899e-25, 9.7207e-26, 1.8567e-25,\n 1.1753e-25, 1.0632e-25, 1.9781e-25, 1.2223e-25, 1.8928e-27,\n -3.8312e-26, 8.8336e-26, -2.1886e-26, -1.6257e-25, 2.2104e-25,\n -1.1626e-25, 1.2778e-25, -8.6999e-26, 6.1748e-26, -6.8031e-26,\n 2.5599e-26, 1.0229e-25, -1.6927e-25, 2.0946e-25, 1.4314e-25,\n -1.0777e-25, 1.7350e-25, 1.0941e-25, 1.1681e-26, 6.0916e-26,\n 1.8917e-25, -1.5198e-25, 1.3824e-25, -1.1101e-25, -8.8919e-26,\n 1.6532e-25, 9.8353e-26, 1.3696e-26, 4.3011e-26, -5.0086e-26,\n 1.0589e-25, 1.5718e-25, -8.2621e-26, 1.9385e-25, 1.4844e-26,\n -1.0261e-25, 1.3809e-25, -6.4010e-26, 1.3820e-25, -1.3150e-25,\n -7.1980e-26, 5.6015e-26, 7.7230e-26, 1.4051e-26, -3.9472e-26,\n -2.7598e-26, -1.8280e-25, -1.9554e-25, -7.8289e-27, -6.9060e-26,\n -7.8848e-26, -5.8782e-26, -9.3492e-26, -8.8829e-26, 6.9442e-26,\n 1.1852e-26, -1.2764e-26, 1.1064e-26, 1.9273e-26, 1.3440e-25,\n -6.6792e-26, 5.0660e-26, -1.8089e-25, 6.6102e-26, -8.1113e-26,\n -1.0957e-25, -9.3380e-26, 1.0583e-25, 1.4257e-25, -1.8359e-25,\n 3.8411e-26, 2.2015e-26, 5.8754e-26, -7.7034e-26, 1.1599e-25,\n 3.5389e-25, -1.3130e-25, 6.6803e-26, 1.6398e-25, -1.5367e-25,\n 5.5188e-26, -1.6934e-25, -1.8692e-26, 3.1850e-25, -1.3711e-25,\n 1.9986e-25, -2.9565e-25, -5.4891e-26, -1.6334e-25, -7.6675e-26,\n 1.8772e-25, -3.7190e-25, 2.1493e-25, 2.5694e-25, -8.2941e-26,\n -1.4049e-25, 4.4968e-26, 6.1223e-25, -2.8243e-25, 1.9338e-25,\n 7.5803e-26, 3.6118e-25, -5.6884e-26, -1.1635e-25, 2.5336e-25,\n -2.4712e-25, 2.1513e-25, -2.9160e-25, -6.2304e-26, -7.7086e-26,\n 1.8346e-25, 1.4768e-25, -3.2938e-25, -1.4861e-25, -1.7107e-25,\n -7.5270e-26, 1.1291e-25, -2.7495e-26, -6.3406e-26, 1.7980e-25,\n 1.8341e-25, 3.1557e-26, 1.9207e-18, -1.3739e-17, 1.1803e-17,\n 8.7053e-18, -4.4180e-17, 2.8176e-17, 3.4005e-17, -7.2045e-18,\n 1.0349e-17, 1.2839e-17, 1.8153e-17, -2.5981e-17, 1.3878e-17,\n -2.0911e-17, -2.8069e-17, -8.4580e-18, 2.7611e-17, -2.3677e-17,\n 7.7190e-18, -2.1097e-17, 2.6240e-17, -1.9657e-17, 4.0304e-17,\n 2.5857e-17, 2.6500e-17, 2.3283e-18, 4.6097e-17, 2.7597e-17,\n -1.5415e-17, 2.6213e-17, -3.5412e-17, -2.5438e-17, 2.4244e-17,\n -2.7393e-17, 2.2867e-17, 9.3039e-18, -4.3743e-17, -8.8440e-18,\n 4.4520e-18, 2.5126e-17, -7.8056e-18, -4.5192e-17, -3.0817e-17,\n -2.3414e-17, 1.3686e-17, 2.1870e-17, -4.6317e-17, 4.2805e-17,\n 2.8809e-17, 2.2584e-17, -1.3645e-17, 1.3945e-17, -1.1356e-17,\n 2.7191e-17, 1.2127e-17, -2.9503e-17, 2.5081e-17, -2.5217e-17,\n 2.3919e-17, 9.7352e-18, -4.3279e-17, 2.4849e-17, -3.5690e-17,\n 9.7758e-20, -3.6045e-17, -3.7418e-18, 2.7342e-17, -2.4427e-17,\n 3.4712e-17, -4.3274e-17, -5.8204e-18, 3.4964e-17, -2.7286e-17,\n 1.6866e-17, -3.2482e-17, 1.0307e-17, -9.1433e-18, 6.4767e-18,\n -4.3145e-18, 1.1213e-17, -8.3644e-18, 2.8962e-17, -2.3423e-17,\n -2.4028e-17, -1.5478e-17, -4.1254e-18, 8.8289e-18, 1.2767e-17,\n -1.9842e-17, 1.9771e-17, 2.7262e-17, 2.3958e-17, -1.9269e-17,\n -3.0927e-17, 1.0519e-17, -7.9541e-18, 1.4115e-17, -3.5068e-17,\n 7.0867e-18, -1.9688e-17, -8.3140e-18, -1.6412e-17, -1.2060e-17,\n 4.1472e-17, -1.8940e-17, 5.0296e-18, -2.2354e-17, 5.8782e-18,\n 2.4691e-17, 1.6031e-17, -3.8009e-17, -1.5983e-17, -1.5016e-17,\n -3.1463e-17, -2.2225e-17, -1.2135e-17, -2.6240e-17, 4.4835e-17,\n -2.9325e-17, 1.7988e-17, -3.9498e-17, -2.0047e-17, -4.0009e-17,\n -1.0295e-17, -3.2800e-17, 3.8227e-17, 1.7306e-17, -7.0677e-19,\n 4.1648e-17, -7.9851e-18, 2.5051e-17, -2.8199e-17, -1.1787e-17,\n -2.6638e-17, 3.0440e-18, 3.6171e-17, -7.8373e-18, 3.0611e-17,\n -3.7488e-17, 2.2951e-17, 4.3522e-17, -6.2113e-18, 1.0843e-17,\n 1.9459e-17, 1.5580e-17, 2.6164e-17, -2.8435e-17, -1.0027e-17,\n 1.4239e-17, -1.0881e-17, -1.3968e-17, -2.1907e-17, 2.9322e-17,\n -3.3005e-18, 2.4804e-17, 1.8789e-17, 3.2092e-17, 1.7631e-17,\n 2.6953e-17, 3.1162e-17, -3.4447e-17, -1.8631e-17, 1.9202e-17,\n -1.5439e-17, 3.1770e-17, -3.2389e-18, 1.5613e-17, 1.5508e-17,\n -2.6029e-17, -4.2012e-17, 3.1549e-17, 1.5720e-18, -1.0094e-17,\n 2.7037e-17, 1.3859e-17, -5.4697e-18, 4.6209e-17, 2.6979e-17,\n -1.9248e-17, 2.6764e-17, 1.2160e-17, 8.4420e-18, -8.1433e-18,\n 2.7092e-17, -9.1335e-18, 8.2371e-18, -2.6672e-17, -1.0806e-17,\n 1.9344e-17, 3.0276e-17, -1.0147e-17, 1.3138e-17, 8.4893e-18,\n 2.8660e-17, -2.3515e-17, 2.5064e-17, -1.4490e-17, 3.7750e-17,\n -1.1053e-17, -5.7687e-18, -5.5597e-18, -3.7537e-17, -3.9107e-17,\n -2.8662e-17, 6.5651e-18, 2.1648e-17, 1.1812e-17, -3.3637e-17,\n 1.0313e-17, 2.8138e-17, -1.7022e-17, -2.8123e-17, 3.8136e-17,\n -1.1304e-17, -1.5705e-17, 2.5958e-17, -4.9114e-18, 1.1871e-17,\n 1.2897e-17, -1.7652e-18, -2.9136e-18, 2.3226e-17, -2.0121e-17,\n -3.5170e-17, 4.0475e-17, 2.9658e-18, 3.1105e-17, -2.1140e-17,\n -3.6790e-17, -1.1635e-17, 1.2998e-17, 3.8349e-17, 3.8826e-17,\n 2.9052e-17, -2.1500e-17, 1.4066e-17, -2.7046e-17, 2.5518e-17,\n -1.3206e-17, 3.0421e-17, 1.1027e-17, 2.8452e-17, 1.5212e-17,\n 3.4899e-17, 4.0723e-17, 6.3761e-19, -1.9616e-17, 3.5082e-17,\n 2.7912e-17, -1.2385e-17, 1.1662e-17, 3.4956e-17, -3.2490e-17,\n 2.9588e-17, 1.1436e-17, -2.7904e-17], device='cuda:0')", | |
| "exp_avg_sq": "tensor([5.9335e-14, 4.5659e-15, 3.5366e-14, 2.4446e-14, 3.0921e-14, 5.0523e-14,\n 5.6265e-15, 6.5207e-14, 8.4403e-14, 5.9571e-15, 3.3114e-14, 1.1645e-14,\n 8.3403e-14, 1.8835e-15, 9.6047e-14, 1.3368e-14, 1.9055e-14, 1.6796e-14,\n 1.9176e-14, 9.6094e-15, 1.1148e-14, 2.2637e-14, 2.0246e-15, 2.6779e-14,\n 3.5840e-14, 2.4047e-15, 3.6743e-14, 1.2301e-13, 7.1642e-15, 1.2259e-15,\n 3.4974e-14, 4.0234e-14, 1.5252e-13, 5.8705e-16, 1.2987e-14, 1.7060e-14,\n 1.1016e-13, 4.4028e-16, 2.2724e-14, 7.4952e-15, 1.0861e-14, 1.6391e-15,\n 1.5319e-13, 4.3085e-14, 9.5338e-14, 4.3728e-15, 2.3704e-13, 9.3472e-14,\n 2.6998e-14, 2.6805e-15, 7.4326e-15, 1.3903e-13, 3.8343e-15, 2.6129e-14,\n 1.0677e-15, 4.8395e-15, 9.1078e-14, 2.1316e-14, 1.1902e-14, 8.5831e-14,\n 6.7670e-14, 1.7325e-13, 1.4776e-13, 3.7837e-14, 8.7831e-14, 6.3995e-15,\n 1.6441e-14, 2.4120e-14, 1.9463e-15, 1.2160e-13, 1.2957e-13, 9.7977e-15,\n 1.0618e-13, 4.2050e-15, 9.9581e-15, 1.3749e-13, 1.6072e-14, 4.7697e-14,\n 2.0499e-14, 6.3614e-14, 2.9306e-14, 4.6659e-15, 2.6761e-14, 1.1927e-13,\n 1.0839e-13, 3.0471e-14, 1.5235e-14, 6.5313e-14, 2.8312e-14, 8.3429e-15,\n 5.0398e-14, 3.1588e-14, 3.3802e-14, 1.5718e-15, 9.3074e-14, 2.1906e-14,\n 1.0727e-14, 3.4241e-15, 3.4181e-14, 1.1473e-14, 3.8875e-15, 4.1570e-15,\n 1.3339e-14, 1.9262e-14, 8.5034e-15, 1.0208e-14, 4.4854e-15, 3.2325e-14,\n 7.6295e-15, 1.4646e-14, 9.2864e-16, 4.1974e-14, 6.0469e-14, 7.0696e-15,\n 1.1084e-14, 4.9030e-15, 2.0438e-14, 2.1314e-15, 2.2893e-15, 7.5848e-15,\n 2.3753e-15, 3.0826e-15, 9.5138e-15, 1.7349e-14, 1.2560e-14, 5.7294e-16,\n 2.0279e-14, 2.7265e-15, 9.7896e-15, 1.8186e-14, 1.1043e-13, 1.0015e-13,\n 7.0296e-14, 3.1670e-14, 3.7313e-14, 2.7001e-15, 1.6238e-13, 5.1117e-14,\n 2.9450e-14, 1.8335e-14, 4.1895e-14, 5.0173e-15, 6.3538e-15, 6.3322e-15,\n 4.3524e-15, 1.1641e-13, 9.2496e-15, 4.9361e-15, 3.1826e-13, 1.1315e-14,\n 3.0439e-14, 1.5301e-13, 7.4942e-14, 2.0962e-15, 9.7774e-15, 5.1073e-16,\n 1.6522e-13, 6.6906e-15, 5.6734e-14, 9.1171e-14, 2.7002e-16, 3.3173e-16,\n 3.1894e-15, 8.8865e-15, 8.5823e-16, 5.9091e-15, 1.0667e-15, 2.5022e-14,\n 4.3235e-15, 5.0213e-15, 2.9652e-15, 1.1201e-15, 6.3218e-15, 8.6803e-16,\n 6.5652e-16, 2.7175e-15, 2.7750e-16, 9.5788e-15, 2.2065e-15, 4.9606e-15,\n 1.1050e-14, 6.8325e-15, 1.9170e-15, 1.9261e-15, 3.8395e-16, 1.5787e-15,\n 1.0919e-14, 5.8386e-15, 4.1540e-15, 4.9516e-15, 7.7928e-15, 8.0797e-15,\n 1.1585e-14, 1.0973e-13, 2.6122e-13, 2.1213e-13, 1.0503e-13, 3.7969e-15,\n 3.1518e-14, 1.7062e-15, 5.3888e-14, 2.1604e-13, 2.9807e-14, 4.0644e-14,\n 4.0850e-14, 1.1372e-14, 3.9315e-14, 1.5126e-15, 1.6910e-13, 3.9940e-14,\n 1.2652e-13, 4.5601e-15, 6.7947e-14, 1.4525e-15, 3.0382e-16, 2.7987e-13,\n 2.1922e-14, 3.0106e-14, 4.9168e-15, 2.9003e-13, 6.6736e-15, 1.8257e-13,\n 8.7812e-14, 5.6686e-16, 2.0714e-14, 1.1790e-14, 6.3804e-15, 2.7585e-14,\n 6.8005e-15, 3.7437e-16, 4.5018e-16, 1.4405e-14, 1.0095e-14, 7.5653e-15,\n 4.4487e-14, 2.7761e-15, 2.2173e-14, 1.5094e-14, 5.7008e-16, 3.0475e-15,\n 2.2083e-14, 4.5327e-14, 1.4287e-14, 6.4441e-14, 3.3061e-15, 1.0540e-14,\n 1.0160e-13, 2.7975e-14, 2.2608e-15, 8.3423e-15, 1.7925e-14, 8.2158e-15,\n 9.3093e-15, 1.0032e-14, 7.1738e-16, 1.9721e-15, 4.0593e-31, 9.4210e-30,\n 5.7776e-30, 1.0757e-30, 1.5627e-30, 2.8909e-30, 1.7354e-30, 5.0189e-31,\n 1.0248e-30, 5.5932e-31, 3.7312e-30, 4.4284e-31, 3.5163e-30, 4.6962e-31,\n 6.9450e-31, 3.1645e-30, 1.3145e-30, 1.7121e-30, 1.6889e-30, 2.7597e-31,\n 8.1035e-30, 1.7435e-30, 7.7724e-30, 6.6581e-30, 6.2186e-30, 9.5049e-31,\n 4.9666e-30, 2.0841e-30, 4.5777e-30, 1.2941e-30, 6.3208e-31, 9.4209e-30,\n 3.7116e-30, 5.5193e-31, 2.8991e-30, 5.6576e-30, 9.4317e-30, 1.7009e-30,\n 4.1287e-30, 8.5542e-30, 1.3108e-30, 1.1905e-30, 6.2372e-30, 4.5326e-30,\n 3.1862e-30, 4.1420e-30, 7.7894e-30, 1.7685e-30, 3.1729e-30, 1.4243e-30,\n 9.0599e-31, 2.6116e-30, 7.4002e-31, 1.4601e-30, 3.7271e-30, 5.1487e-30,\n 1.0679e-29, 3.6637e-30, 4.9852e-30, 3.8656e-30, 6.2046e-30, 7.0402e-30,\n 1.9491e-30, 3.0271e-30, 1.6070e-29, 1.2479e-30, 9.3843e-30, 5.7457e-30,\n 4.2706e-30, 2.3979e-30, 1.6373e-30, 3.6676e-30, 5.3700e-30, 2.3199e-29,\n 8.5376e-31, 6.0949e-31, 6.2859e-31, 1.0704e-31, 1.6949e-30, 1.3694e-30,\n 7.1602e-31, 1.8904e-30, 7.5619e-30, 3.0590e-30, 1.2524e-30, 1.7604e-30,\n 2.6615e-30, 2.5914e-30, 1.1849e-30, 2.2157e-30, 2.9891e-31, 1.6098e-30,\n 4.1199e-31, 1.5430e-30, 2.1662e-30, 1.8596e-30, 2.4233e-30, 5.2618e-31,\n 1.3385e-30, 2.3804e-30, 1.2654e-30, 4.1448e-31, 6.0096e-31, 1.2183e-30,\n 2.5080e-30, 1.2577e-30, 3.6148e-31, 2.2066e-30, 5.4703e-31, 9.1367e-31,\n 1.2309e-30, 1.7432e-30, 1.8111e-30, 5.6030e-31, 4.7521e-30, 1.7451e-30,\n 8.2927e-31, 1.1860e-30, 8.3312e-31, 2.4130e-30, 1.0632e-30, 1.6367e-30,\n 9.9378e-31, 8.6291e-31, 9.0274e-31, 1.1216e-30, 6.5964e-31, 4.6906e-31,\n 9.9412e-30, 5.3781e-30, 2.4690e-30, 5.8673e-30, 1.4073e-29, 7.0354e-30,\n 1.4551e-30, 1.1125e-29, 1.6655e-31, 1.2701e-30, 3.3003e-30, 1.8518e-29,\n 1.7526e-30, 2.2795e-30, 7.9451e-30, 2.0661e-30, 8.3797e-30, 9.0717e-30,\n 1.8004e-30, 7.0820e-31, 6.1605e-30, 2.2148e-30, 4.5461e-30, 2.1100e-29,\n 1.4586e-30, 3.0042e-30, 2.3700e-30, 8.5689e-30, 1.5212e-29, 1.5497e-29,\n 6.0526e-30, 6.7678e-31, 1.2238e-30, 1.8202e-30, 6.1477e-31, 5.0648e-31,\n 6.7468e-31, 2.0910e-30, 1.4013e-31, 2.2709e-30, 8.1507e-31, 6.9976e-31,\n 1.3924e-31, 1.1317e-30, 3.7304e-31, 1.5528e-30, 8.7345e-31, 2.6898e-30,\n 6.3109e-31, 1.2501e-30, 3.0667e-31, 2.8563e-31, 1.0204e-31, 2.4954e-31,\n 7.5181e-31, 1.6874e-31, 1.4475e-30, 1.0183e-30, 3.0631e-30, 6.0021e-31,\n 3.3330e-30, 2.6073e-31, 3.0975e-31, 6.6294e-31, 4.7617e-31, 1.0825e-29,\n 6.8114e-31, 2.0770e-30, 1.6620e-30, 6.1660e-30, 1.3919e-30, 6.0182e-31,\n 1.9883e-30, 3.9105e-30, 7.3991e-31, 3.2377e-31, 7.3490e-30, 2.5961e-31,\n 7.9655e-31, 1.1805e-30, 8.2928e-31, 4.9991e-30, 3.8137e-30, 4.7926e-30,\n 4.0282e-31, 1.2792e-30, 2.5839e-31, 1.0273e-30, 8.9487e-31, 2.7934e-30,\n 1.5186e-30, 2.8874e-30, 1.2830e-30, 1.2794e-30, 1.3337e-30, 3.2474e-31,\n 5.4497e-30, 1.0239e-29, 5.6781e-30, 2.1364e-30, 1.2188e-29, 1.0730e-29,\n 4.1801e-30, 4.8401e-30, 1.6918e-30, 8.5119e-30, 6.6696e-30, 3.3669e-30,\n 2.4608e-30, 3.7165e-30, 4.7281e-30, 9.1094e-31, 5.4290e-30, 2.1423e-30,\n 3.2786e-30, 1.0778e-30, 1.3351e-30, 8.8779e-30, 3.0671e-30, 1.3974e-29,\n 2.1368e-30, 8.7274e-31, 6.2334e-30, 7.6972e-30, 6.9575e-30, 2.4894e-30,\n 7.3632e-31, 7.0313e-30, 1.1481e-11, 1.7940e-12, 4.8464e-12, 2.0796e-12,\n 2.9223e-11, 2.4150e-12, 1.9556e-11, 2.9966e-12, 5.2900e-12, 2.9798e-13,\n 3.7148e-13, 5.7438e-12, 5.4855e-12, 7.5507e-13, 2.0792e-12, 2.2798e-11,\n 6.8904e-12, 1.7824e-12, 1.2799e-11, 2.7805e-12, 1.2712e-11, 1.9304e-12,\n 3.1819e-12, 7.2568e-13, 1.6178e-12, 1.6648e-12, 1.8215e-11, 2.0382e-11,\n 1.3421e-11, 1.4083e-11, 4.9709e-12, 2.6817e-12, 4.2181e-12, 6.4188e-12,\n 1.0050e-11, 3.3879e-13, 4.2080e-11, 4.4219e-12, 7.5063e-13, 8.2971e-12,\n 1.5847e-12, 9.3427e-12, 2.0265e-11, 2.3818e-11, 1.0825e-11, 9.9105e-13,\n 2.8670e-11, 6.6457e-12, 9.1221e-12, 2.0828e-12, 2.0576e-12, 1.3073e-11,\n 2.5279e-11, 1.1508e-11, 4.6502e-12, 3.7543e-12, 6.5768e-12, 1.2554e-12,\n 2.3568e-12, 2.7098e-12, 4.1615e-11, 4.1394e-12, 2.9937e-12, 4.3352e-13,\n 4.7475e-12, 4.6502e-13, 8.4905e-12, 2.0242e-12, 1.0295e-11, 4.2750e-11,\n 1.8729e-12, 1.3409e-11, 1.8656e-11, 3.0489e-12, 1.8969e-11, 2.3343e-12,\n 2.5338e-13, 6.6690e-13, 1.9096e-13, 2.9282e-12, 2.7176e-12, 2.1761e-11,\n 1.0965e-12, 1.3282e-11, 4.5671e-12, 2.3417e-12, 3.3332e-12, 2.4356e-13,\n 5.5775e-12, 3.1636e-12, 2.8718e-12, 1.7392e-11, 2.4994e-12, 2.3615e-11,\n 2.0463e-11, 1.8157e-12, 7.3768e-12, 2.3369e-11, 1.8019e-11, 6.9399e-12,\n 1.0255e-13, 7.6951e-12, 2.0336e-12, 1.2931e-11, 3.3819e-11, 9.6010e-13,\n 5.4694e-13, 1.9258e-12, 1.1008e-12, 2.0583e-12, 3.7056e-11, 1.4739e-12,\n 7.1146e-13, 3.8129e-12, 7.4482e-12, 8.1890e-12, 2.0246e-11, 1.5505e-11,\n 2.3246e-12, 2.2134e-12, 2.1496e-11, 5.7237e-13, 2.4467e-11, 9.5215e-13,\n 1.9491e-12, 8.8229e-12, 1.7435e-12, 3.3686e-13, 1.8132e-11, 4.5359e-12,\n 1.1280e-11, 1.4686e-11, 5.7701e-13, 7.3036e-12, 6.6200e-13, 5.6026e-12,\n 4.0108e-12, 1.8562e-11, 1.4234e-11, 9.5347e-13, 1.5994e-11, 7.1786e-12,\n 7.6142e-13, 1.0632e-12, 1.7348e-11, 9.1034e-12, 1.5604e-12, 4.5821e-12,\n 7.3434e-12, 2.3364e-12, 4.9727e-13, 8.8169e-13, 2.4218e-11, 4.7283e-13,\n 5.3854e-12, 1.3553e-12, 2.0924e-11, 5.3003e-12, 1.9939e-12, 1.2092e-11,\n 7.7613e-12, 2.7261e-12, 1.4186e-11, 7.5054e-13, 1.1917e-11, 5.3641e-13,\n 2.2325e-12, 2.9559e-11, 1.5924e-12, 8.3113e-12, 1.5561e-12, 6.8127e-12,\n 3.7893e-12, 5.2333e-12, 8.6028e-12, 2.1591e-12, 1.1816e-11, 2.8746e-12,\n 1.8316e-12, 3.0081e-12, 1.3598e-12, 3.4302e-13, 3.7997e-14, 3.0912e-12,\n 7.4409e-12, 1.3586e-13, 5.9953e-12, 1.7178e-12, 1.1777e-11, 6.0148e-12,\n 1.0218e-11, 2.9694e-12, 2.6011e-12, 6.3945e-12, 1.4234e-12, 1.1386e-11,\n 1.1752e-12, 3.7346e-12, 2.0838e-13, 1.6627e-11, 1.2954e-12, 4.6795e-12,\n 9.3576e-12, 5.1063e-12, 6.8601e-12, 1.0431e-11, 1.3997e-11, 8.6353e-12,\n 4.9794e-12, 3.1946e-12, 4.5722e-12, 1.0911e-11, 4.0033e-11, 6.6826e-12,\n 4.0834e-12, 1.6480e-11, 7.9940e-12, 4.8775e-12, 7.3548e-12, 1.8822e-12,\n 3.1027e-11, 1.6193e-12, 7.7251e-12, 2.2813e-11, 6.7606e-12, 2.0708e-13,\n 4.0597e-12, 3.5501e-12, 3.6969e-11, 8.7220e-12, 2.2401e-13, 8.4889e-12,\n 2.0486e-11, 1.3180e-11, 7.5685e-12, 1.2739e-11, 1.8584e-12, 4.0581e-11,\n 1.1421e-12, 1.9518e-11, 4.5798e-12, 2.4051e-12, 5.1415e-13, 5.0127e-11,\n 4.2322e-12, 4.6025e-12, 1.4500e-11, 7.1411e-12, 1.8671e-11, 9.7725e-13,\n 1.6774e-11, 1.0260e-11, 1.5097e-11, 5.6443e-13, 1.8369e-11, 6.3043e-12],\n device='cuda:0')" | |
| }, | |
| "54": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[ 1.5525e-17, -1.3945e-17, 7.2692e-18, ..., 1.0364e-17,\n 1.8924e-17, -1.9596e-17],\n [ 5.8580e-18, -4.7421e-18, 1.8714e-18, ..., 2.7328e-18,\n 5.0105e-18, -6.6429e-18],\n [-1.2379e-17, 1.0920e-17, -4.8538e-18, ..., -7.9190e-18,\n -1.4099e-17, 1.4689e-17],\n ...,\n [-1.7189e-17, 1.5367e-17, -7.2762e-18, ..., -1.1673e-17,\n -2.1891e-17, 2.0675e-17],\n [-2.0625e-17, 1.8782e-17, -1.0709e-17, ..., -1.5108e-17,\n -2.8248e-17, 2.6592e-17],\n [-1.5750e-18, 1.1507e-18, -1.9518e-19, ..., -1.2606e-18,\n -2.1031e-18, 1.9422e-18]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.7853e-13, 1.0938e-12, 2.9403e-12, ..., 1.4900e-14, 1.7034e-12,\n 1.9082e-12],\n [2.0826e-13, 1.5721e-12, 3.5368e-12, ..., 2.2702e-14, 1.1003e-12,\n 1.8369e-12],\n [1.4757e-13, 6.0119e-13, 8.7105e-13, ..., 1.4159e-14, 5.0470e-13,\n 7.4490e-13],\n ...,\n [2.3599e-13, 1.1594e-12, 4.1060e-12, ..., 2.7645e-14, 2.9316e-12,\n 2.2774e-12],\n [3.4152e-14, 2.2290e-13, 5.9565e-13, ..., 4.0742e-15, 4.8181e-13,\n 5.6945e-13],\n [1.5637e-13, 7.9045e-13, 2.2659e-12, ..., 4.0144e-15, 7.7423e-13,\n 1.0550e-12]], device='cuda:0')" | |
| }, | |
| "55": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([ 2.1277e-17, 6.2090e-18, -1.6098e-17, -1.3543e-17, -2.7214e-17,\n 1.3776e-17, -1.4997e-17, 2.9677e-17, -8.9127e-18, 5.4868e-18,\n -4.8728e-18, 1.2053e-17, 2.3123e-17, 1.1195e-18, 1.6330e-17,\n 1.7144e-17, 1.1087e-17, 1.4858e-17, -5.7154e-18, -1.3971e-17,\n 2.4173e-17, -2.8891e-18, 1.5782e-17, -2.5609e-17, 1.9812e-17,\n 1.9053e-17, -1.5367e-17, 6.6085e-18, -1.2933e-17, -1.5460e-17,\n -1.4472e-17, -1.8777e-17, -5.7074e-18, 2.4164e-17, -2.2798e-17,\n -2.9970e-17, -2.8893e-17, -1.3117e-17, -1.0539e-17, -2.8209e-17,\n -2.7212e-17, -2.8871e-17, -1.8720e-17, 2.2333e-17, -2.1120e-17,\n 1.7242e-17, -1.8794e-17, 1.8231e-17, 7.7991e-18, -1.6628e-17,\n 1.5937e-17, 1.0493e-18, -1.5996e-17, -9.6546e-18, -2.7347e-17,\n 2.9655e-17, 1.3615e-17, -1.6372e-17, 2.3276e-17, -1.4922e-17,\n -1.8975e-17, 1.4615e-17, -1.2138e-17, -1.8334e-17, 2.8754e-17,\n 2.1801e-17, 2.7956e-17, 1.8056e-17, 1.7989e-17, 1.7544e-17,\n 1.6169e-17, -2.0708e-17, -2.0956e-17, -1.6670e-17, 7.5446e-18,\n 1.6311e-17, 2.1726e-17, 2.1515e-17, 2.8920e-17, -4.5764e-18,\n 2.9805e-17, 8.5604e-18, 6.6689e-18, -1.3300e-17, -2.1054e-17,\n 1.1250e-17, -1.4445e-18, -1.9278e-17, -6.7872e-18, 8.4593e-18,\n -1.8909e-17, -2.5156e-17, 7.4039e-19, 2.6601e-17, -1.6478e-17,\n 2.5315e-17, -2.3907e-17, 1.6189e-17, -3.0433e-17, -1.9078e-17,\n -2.6531e-17, 2.8953e-17, 1.1449e-17, -2.1012e-17, -1.9225e-17,\n -1.1799e-17, -1.3781e-17, 6.4349e-18, 1.1810e-17, -2.2181e-17,\n 1.1783e-17, 1.7921e-17, -2.7900e-17, 2.5472e-17, -1.6786e-17,\n 1.5075e-17, 2.8205e-18, 8.7890e-18, -2.3032e-17, -1.2233e-17,\n -6.2135e-19, 1.0076e-17, 1.9643e-17, -3.9525e-18, 3.1365e-17,\n 1.7118e-17, -3.7808e-17, 3.0684e-18, 2.5497e-17, -6.5524e-18,\n -1.8373e-17, 5.0990e-18, 1.2793e-17, 1.3639e-17, 1.8831e-17,\n -2.0837e-17, -1.1555e-17, 1.1319e-17, -1.0416e-17, -2.4379e-18,\n 2.7368e-17, -1.3750e-17, 2.4901e-17, 2.1363e-17, 5.8823e-18,\n -8.1904e-18, 1.2496e-17, -1.2872e-17, -1.0890e-17, -1.4924e-17,\n 4.6922e-18, 1.1799e-17, 2.6137e-17, -1.1892e-17, -2.2261e-17,\n -1.1834e-17, 1.4119e-18, 2.5620e-17, -1.0104e-18, -1.5882e-17,\n -2.2684e-17, 2.4433e-17, 6.4467e-18, -2.6730e-17, 1.3017e-17,\n -1.5630e-18, 2.2319e-17, -2.5554e-17, -1.4116e-17, -2.4234e-17,\n -1.7471e-18, 1.2022e-17, 3.0848e-17, -2.2147e-17, 1.2640e-17,\n -2.9667e-17, -1.0986e-17, -1.9616e-17, -2.1947e-17, 2.8199e-17,\n 2.0826e-17, 1.2378e-17, -2.1436e-17, -1.1317e-17, 1.9404e-17,\n -9.3148e-18, 2.2380e-17, -1.0833e-17, -2.7960e-17, 1.4544e-17,\n 1.5750e-17, 2.4585e-17, -7.8683e-18, 1.3904e-17, 2.3991e-17,\n -1.9341e-17, -1.8294e-18, 9.0071e-18, -1.3472e-17, -1.2437e-17,\n -1.4336e-17, -1.4217e-17, -2.3352e-17, 2.4465e-17, -1.9098e-17,\n -9.4184e-18, -3.1321e-17, -1.9442e-17, -1.3504e-17, -1.0207e-17,\n 1.4621e-17, 2.6676e-17, -8.3351e-18, -3.2085e-17, -2.8578e-17,\n -1.2243e-17, 1.4347e-17, -1.3352e-17, 7.8160e-18, -2.4277e-17,\n 1.6964e-17, -2.8854e-17, -2.4234e-17, -2.1356e-17, 1.8327e-17,\n -9.0986e-18, 2.2274e-17, 3.0628e-17, -2.7638e-17, -2.1708e-17,\n 1.3045e-17, 1.8286e-17, 2.7970e-17, 1.4122e-17, -2.3284e-17,\n 2.0778e-17, 8.5519e-19, 4.9221e-18, 1.5505e-17, 1.8121e-17,\n 2.2302e-17, 1.2901e-17, -1.2592e-18, 1.6797e-17, 1.0979e-17,\n 1.6511e-17, 9.9846e-18, -2.1081e-17, -1.8047e-17, -2.4234e-17,\n -1.9169e-17, -2.4225e-17, 9.9202e-18, -2.3842e-17, -3.0585e-17,\n -2.2085e-18], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.4809e-11, 1.9969e-11, 4.8687e-12, 8.2918e-11, 1.0478e-10, 3.7133e-12,\n 4.6042e-12, 1.1238e-12, 5.1887e-12, 5.9708e-13, 1.1537e-12, 4.5608e-12,\n 6.2213e-12, 2.2324e-11, 5.2120e-12, 1.1921e-12, 2.5194e-11, 2.6131e-11,\n 1.0898e-11, 1.5615e-11, 9.3943e-11, 6.3398e-11, 1.1541e-11, 4.8650e-11,\n 2.4942e-11, 6.0803e-11, 6.2559e-13, 3.5669e-11, 2.8526e-12, 3.0439e-12,\n 9.8194e-12, 6.6593e-12, 3.4277e-12, 3.4470e-12, 6.4045e-11, 3.6994e-12,\n 1.7490e-10, 1.5926e-11, 2.4166e-11, 9.5127e-12, 2.3182e-12, 4.7197e-12,\n 4.1186e-12, 1.8392e-11, 2.7079e-11, 4.3257e-11, 1.8101e-12, 1.1108e-11,\n 2.5151e-12, 5.2129e-12, 9.8792e-12, 2.7518e-12, 3.0331e-11, 4.1765e-12,\n 1.1731e-10, 1.0726e-11, 5.2067e-12, 7.7201e-11, 4.3610e-11, 9.0502e-13,\n 8.3068e-11, 4.8856e-11, 5.2514e-11, 1.6090e-11, 9.7700e-12, 6.5077e-11,\n 5.9942e-11, 9.1304e-13, 1.1564e-12, 5.1901e-11, 8.1627e-11, 1.1849e-11,\n 1.5289e-11, 2.6732e-12, 4.4331e-12, 4.3237e-11, 1.0892e-12, 1.4322e-11,\n 4.4128e-11, 2.7217e-12, 6.8809e-12, 4.8977e-12, 2.4572e-11, 1.0533e-11,\n 1.2149e-11, 2.7178e-12, 6.9388e-11, 8.8317e-13, 1.0332e-11, 5.2452e-12,\n 3.6242e-12, 2.1598e-11, 8.6078e-11, 1.2038e-11, 1.1256e-12, 9.8686e-12,\n 3.6469e-12, 8.7301e-12, 2.1445e-11, 7.0000e-13, 2.5291e-11, 1.1471e-12,\n 9.7081e-12, 4.1111e-11, 5.2351e-12, 1.0884e-11, 1.1596e-12, 1.0733e-11,\n 2.8661e-11, 7.8567e-12, 1.3275e-11, 8.6181e-12, 6.3490e-12, 7.9244e-13,\n 3.7177e-12, 5.9114e-12, 7.0354e-12, 2.3342e-11, 1.9920e-11, 1.2718e-11,\n 1.6904e-11, 4.8513e-12, 1.2015e-11, 2.8069e-12, 8.0302e-11, 5.4461e-12,\n 3.8780e-12, 4.5909e-12, 1.9358e-11, 2.1503e-11, 2.0812e-11, 6.2432e-11,\n 8.5198e-13, 4.6105e-11, 4.1415e-11, 3.6906e-12, 3.3851e-11, 4.6848e-12,\n 8.6141e-12, 1.5392e-11, 1.1533e-11, 1.6163e-11, 3.0682e-11, 4.0923e-11,\n 1.6923e-12, 7.9892e-12, 2.4830e-12, 3.0034e-13, 1.5660e-12, 5.5185e-12,\n 6.3920e-13, 6.1451e-11, 7.9381e-12, 1.7220e-11, 6.2697e-11, 1.1608e-11,\n 1.4566e-11, 7.8096e-11, 5.1472e-13, 4.1773e-12, 5.1702e-12, 1.7899e-12,\n 5.1336e-12, 5.1933e-11, 3.6837e-11, 1.7790e-11, 2.8353e-11, 4.4289e-11,\n 4.5683e-12, 1.2191e-11, 1.0725e-12, 1.4024e-11, 2.4003e-11, 1.1205e-11,\n 1.7314e-11, 1.4785e-11, 1.1502e-12, 2.2936e-11, 1.9377e-11, 1.0962e-12,\n 1.3354e-11, 8.2187e-12, 2.5580e-12, 4.5390e-11, 3.4518e-12, 8.1549e-11,\n 3.4286e-12, 1.0283e-12, 2.2271e-12, 1.7122e-12, 3.1862e-11, 2.2411e-11,\n 1.1358e-11, 9.4711e-12, 6.6072e-11, 8.8949e-12, 1.6775e-11, 5.7923e-13,\n 7.5824e-11, 4.5316e-12, 3.1308e-11, 1.1600e-11, 2.2417e-11, 1.3666e-11,\n 1.1663e-11, 8.6776e-13, 2.8955e-12, 3.6868e-11, 2.0821e-11, 5.4422e-11,\n 4.9803e-11, 5.3806e-11, 7.8004e-12, 1.7325e-11, 7.9522e-12, 5.8263e-11,\n 3.7803e-11, 3.5876e-11, 5.2193e-11, 9.4936e-13, 1.5882e-12, 2.6694e-12,\n 1.5705e-11, 5.4735e-11, 2.0234e-11, 4.3359e-12, 1.0595e-11, 7.8007e-11,\n 1.0033e-11, 3.2646e-11, 1.7131e-12, 2.5507e-11, 3.8247e-11, 2.8966e-12,\n 5.4108e-12, 1.1303e-11, 1.6130e-11, 5.1610e-13, 2.5464e-11, 8.5046e-13,\n 1.8851e-12, 5.6572e-12, 2.6620e-11, 1.0697e-12, 1.8517e-11, 5.7095e-12,\n 6.5114e-13, 7.9954e-13, 4.1929e-11, 9.3870e-13, 1.0345e-11, 3.8609e-11,\n 5.7544e-12, 2.1306e-11, 2.8330e-12, 1.1945e-11], device='cuda:0')" | |
| }, | |
| "56": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.7718e-17, -9.1275e-17, 7.0865e-17, ..., 1.4171e-16,\n 8.7987e-17, -5.7328e-17],\n [-2.5558e-17, -4.0896e-17, 3.1379e-17, ..., 6.3195e-17,\n 3.9377e-17, -2.5222e-17],\n ...,\n [-2.2612e-17, -3.6089e-17, 2.7469e-17, ..., 5.4076e-17,\n 3.3486e-17, -2.1284e-17],\n [-3.6650e-17, -5.5304e-17, 4.1982e-17, ..., 8.8621e-17,\n 5.6532e-17, -3.6744e-17],\n [ 1.0868e-17, 1.8612e-17, -1.4662e-17, ..., -2.6661e-17,\n -1.5778e-17, 9.4757e-18]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.4590e-11, 5.4878e-11, 1.3733e-10, ..., 4.8345e-10, 2.1764e-10,\n 3.2864e-11],\n [1.8320e-13, 7.8730e-13, 1.6968e-12, ..., 5.9591e-12, 2.6710e-12,\n 4.4729e-13],\n [1.6813e-13, 6.1341e-13, 1.5780e-12, ..., 5.5929e-12, 2.5129e-12,\n 3.6784e-13],\n ...,\n [2.0766e-13, 7.9377e-13, 2.0467e-12, ..., 7.0320e-12, 3.2206e-12,\n 5.1007e-13],\n [3.2517e-13, 1.1197e-12, 2.4165e-12, ..., 1.0143e-11, 4.0976e-12,\n 4.5260e-13],\n [1.7131e-13, 5.9762e-13, 1.4904e-12, ..., 5.4609e-12, 2.4029e-12,\n 3.3057e-13]], device='cuda:0')" | |
| }, | |
| "57": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-5.6052e-45, -6.5856e-17, -2.9474e-17, -1.8247e-17, -2.1531e-17,\n -3.5558e-17, -3.5558e-17, -2.5262e-17, -3.9658e-17, 1.3213e-17],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.3046e-09, 5.1715e-11, 4.9756e-11, 3.9149e-11, 4.7036e-11, 4.3653e-11,\n 7.3179e-11, 6.5447e-11, 7.0697e-11, 4.6031e-11], device='cuda:0')" | |
| }, | |
| "58": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.7079e-17, -9.0265e-17, 7.0081e-17, ..., 1.4014e-16,\n 8.7013e-17, -5.6694e-17],\n [-2.5276e-17, -4.0444e-17, 3.1032e-17, ..., 6.2496e-17,\n 3.8941e-17, -2.4943e-17],\n ...,\n [-2.2362e-17, -3.5690e-17, 2.7165e-17, ..., 5.3478e-17,\n 3.3116e-17, -2.1049e-17],\n [-3.6244e-17, -5.4692e-17, 4.1518e-17, ..., 8.7641e-17,\n 5.5906e-17, -3.6338e-17],\n [ 1.0748e-17, 1.8406e-17, -1.4499e-17, ..., -2.6366e-17,\n -1.5603e-17, 9.3708e-18]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.4516e-11, 5.4654e-11, 1.3693e-10, ..., 4.8129e-10, 2.1688e-10,\n 3.2827e-11],\n [1.8219e-13, 7.8411e-13, 1.6918e-12, ..., 5.9308e-12, 2.6613e-12,\n 4.4681e-13],\n [1.6728e-13, 6.1089e-13, 1.5733e-12, ..., 5.5681e-12, 2.5042e-12,\n 3.6741e-13],\n ...,\n [2.0662e-13, 7.9067e-13, 2.0409e-12, ..., 7.0015e-12, 3.2098e-12,\n 5.0955e-13],\n [3.2338e-13, 1.1139e-12, 2.4074e-12, ..., 1.0092e-11, 4.0804e-12,\n 4.5171e-13],\n [1.7043e-13, 5.9502e-13, 1.4857e-12, ..., 5.4360e-12, 2.3941e-12,\n 3.3014e-13]], device='cuda:0')" | |
| }, | |
| "59": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-5.6052e-45, -6.5127e-17, -2.9148e-17, -1.8045e-17, -2.1292e-17,\n -3.5164e-17, -3.5164e-17, -2.4982e-17, -3.9219e-17, 1.3067e-17],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.2940e-09, 5.1592e-11, 4.9633e-11, 3.9062e-11, 4.6935e-11, 4.3547e-11,\n 7.3012e-11, 6.5292e-11, 7.0476e-11, 4.5910e-11], device='cuda:0')" | |
| }, | |
| "60": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.7801e-17, -9.1407e-17, 7.0967e-17, ..., 1.4191e-16,\n 8.8114e-17, -5.7411e-17],\n [-2.5595e-17, -4.0955e-17, 3.1425e-17, ..., 6.3287e-17,\n 3.9433e-17, -2.5259e-17],\n ...,\n [-2.2645e-17, -3.6141e-17, 2.7509e-17, ..., 5.4154e-17,\n 3.3535e-17, -2.1315e-17],\n [-3.6702e-17, -5.5383e-17, 4.2043e-17, ..., 8.8749e-17,\n 5.6613e-17, -3.6797e-17],\n [ 1.0884e-17, 1.8639e-17, -1.4683e-17, ..., -2.6700e-17,\n -1.5801e-17, 9.4894e-18]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.4591e-11, 5.4881e-11, 1.3734e-10, ..., 4.8347e-10, 2.1764e-10,\n 3.2865e-11],\n [1.8321e-13, 7.8735e-13, 1.6968e-12, ..., 5.9594e-12, 2.6710e-12,\n 4.4729e-13],\n [1.6814e-13, 6.1344e-13, 1.5780e-12, ..., 5.5930e-12, 2.5130e-12,\n 3.6784e-13],\n ...,\n [2.0767e-13, 7.9381e-13, 2.0468e-12, ..., 7.0323e-12, 3.2207e-12,\n 5.1008e-13],\n [3.2519e-13, 1.1197e-12, 2.4165e-12, ..., 1.0144e-11, 4.0978e-12,\n 4.5261e-13],\n [1.7132e-13, 5.9766e-13, 1.4904e-12, ..., 5.4611e-12, 2.4029e-12,\n 3.3058e-13]], device='cuda:0')" | |
| }, | |
| "61": { | |
| "step": "tensor(11268.)", | |
| "exp_avg": "tensor([-5.6052e-45, -6.5950e-17, -2.9517e-17, -1.8273e-17, -2.1562e-17,\n -3.5609e-17, -3.5609e-17, -2.5298e-17, -3.9715e-17, 1.3232e-17],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.3047e-09, 5.1716e-11, 4.9757e-11, 3.9149e-11, 4.7036e-11, 4.3654e-11,\n 7.3179e-11, 6.5447e-11, 7.0698e-11, 4.6032e-11], device='cuda:0')" | |
| }, | |
| "8": { | |
| "step": "tensor(10016.)", | |
| "exp_avg": "tensor([[-2.9819e-06, -1.2593e-06, 2.7079e-06, ..., 1.2533e-06,\n 1.2662e-07, -8.1620e-08],\n [-1.0440e-06, 2.2405e-07, -2.1519e-07, ..., -1.7915e-06,\n -1.1876e-06, -8.4086e-07],\n [ 4.1764e-06, 2.1153e-06, 2.9915e-07, ..., -7.3533e-07,\n 7.6024e-06, 3.7155e-07],\n ...,\n [-5.2413e-06, 5.9172e-07, -2.6044e-06, ..., 1.0206e-07,\n 1.8111e-06, 4.3652e-07],\n [-2.5892e-06, 1.4727e-05, 4.3999e-06, ..., 1.6250e-07,\n 2.0857e-07, 8.8153e-07],\n [ 2.5106e-06, 9.8568e-07, 2.1669e-06, ..., 7.5256e-07,\n 8.2572e-07, 4.7254e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.7658e-10, 5.5254e-10, 1.3281e-10, ..., 8.9302e-11, 1.5872e-10,\n 1.5663e-10],\n [1.5277e-09, 1.5524e-09, 2.1873e-11, ..., 2.8095e-10, 7.9601e-11,\n 1.4127e-10],\n [1.2165e-09, 5.0074e-10, 3.7017e-11, ..., 4.3918e-11, 6.6387e-11,\n 3.6932e-10],\n ...,\n [4.9806e-10, 1.4632e-10, 9.8791e-11, ..., 5.7790e-11, 7.9320e-11,\n 3.9874e-11],\n [5.0193e-10, 8.6651e-09, 3.1123e-10, ..., 1.2630e-09, 3.6616e-10,\n 6.1277e-11],\n [1.0821e-10, 2.2826e-11, 1.7232e-10, ..., 2.0606e-10, 1.2895e-10,\n 1.9788e-10]], device='cuda:0')" | |
| }, | |
| "9": { | |
| "step": "tensor(10016.)", | |
| "exp_avg": "tensor([-3.9326e-05, -2.6494e-03, -6.3762e-04, ..., -4.9168e-04,\n -8.5423e-04, 6.7660e-05], device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.5181e-06, 7.7087e-06, 5.5251e-06, ..., 4.4513e-06, 8.0965e-06,\n 3.6392e-06], device='cuda:0')" | |
| }, | |
| "10": { | |
| "step": "tensor(10016.)", | |
| "exp_avg": "tensor([[ 6.3976e-06, 3.1728e-06, 4.5112e-06, ..., -2.4899e-06,\n -2.5282e-06, 4.7272e-06],\n [-6.4280e-07, -9.0884e-06, -9.8977e-06, ..., -2.5351e-06,\n 3.9169e-07, -1.2254e-07],\n [-7.6188e-07, 1.8653e-06, -8.4535e-06, ..., -5.9199e-06,\n 1.3951e-06, -2.3003e-06],\n ...,\n [ 2.5076e-06, -9.1081e-06, -1.2055e-06, ..., -4.4919e-06,\n -2.1039e-08, 3.5736e-07],\n [-3.0880e-06, 2.7794e-06, 2.6415e-06, ..., -1.4591e-06,\n -2.8293e-06, -8.0489e-07],\n [ 6.5080e-06, -2.7963e-06, 1.3997e-05, ..., 2.5035e-06,\n 6.8457e-07, -3.3145e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[5.2496e-11, 8.0007e-11, 8.6462e-11, ..., 5.6155e-11, 4.4942e-11,\n 7.0316e-11],\n [1.1454e-10, 1.3229e-10, 1.6491e-10, ..., 8.4326e-11, 7.6892e-11,\n 7.2812e-11],\n [1.1476e-10, 1.2579e-10, 2.4293e-10, ..., 1.9542e-10, 1.2330e-10,\n 5.8327e-11],\n ...,\n [1.0588e-10, 1.4958e-10, 1.5320e-10, ..., 9.5125e-11, 9.8005e-11,\n 7.8764e-11],\n [1.4147e-10, 2.0768e-10, 2.2889e-10, ..., 1.4059e-10, 1.4687e-10,\n 1.3168e-10],\n [1.1536e-10, 2.1086e-10, 2.4998e-10, ..., 1.0897e-10, 7.3044e-11,\n 1.5817e-10]], device='cuda:0')" | |
| }, | |
| "11": { | |
| "step": "tensor(8764.)", | |
| "exp_avg": "tensor([[ 2.0081e-06, -2.1347e-06, 1.3081e-06, ..., -5.9117e-07,\n -9.6834e-07, -1.7816e-06],\n [-1.0807e-06, -5.6924e-07, -1.4492e-07, ..., 5.7069e-07,\n -1.2322e-06, -5.5887e-06],\n [-3.8371e-06, 4.7876e-07, -5.0085e-06, ..., -7.6486e-07,\n 1.4883e-07, 1.5666e-06],\n ...,\n [-4.1793e-06, 1.7283e-07, 1.7835e-06, ..., -4.3896e-06,\n 7.1816e-07, 8.4211e-07],\n [-1.1123e-05, -5.5336e-06, 1.9128e-06, ..., 2.4932e-07,\n 2.8451e-08, 7.2564e-07],\n [ 5.0460e-07, 1.4879e-06, -1.0357e-06, ..., -2.0064e-07,\n 1.2113e-07, 2.0969e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.2405e-10, 9.6256e-11, 4.7586e-11, ..., 5.2996e-11, 6.8279e-11,\n 2.5138e-10],\n [8.0325e-10, 4.1979e-10, 3.7742e-11, ..., 1.5681e-10, 6.9634e-11,\n 1.1671e-10],\n [4.5432e-10, 4.4832e-10, 1.1756e-10, ..., 2.7097e-10, 4.6419e-11,\n 2.0284e-11],\n ...,\n [2.4634e-10, 5.6369e-11, 3.3814e-10, ..., 7.7170e-11, 1.1222e-10,\n 1.2655e-09],\n [7.0358e-10, 1.7394e-10, 1.2082e-10, ..., 1.6077e-10, 5.1030e-11,\n 6.6610e-10],\n [9.3055e-10, 3.8880e-10, 7.3437e-11, ..., 4.1563e-11, 6.9317e-11,\n 3.4114e-10]], device='cuda:0')" | |
| }, | |
| "12": { | |
| "step": "tensor(8764.)", | |
| "exp_avg": "tensor([ 0.0006, -0.0006, -0.0002, ..., 0.0007, 0.0008, 0.0003],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.0542e-06, 8.9968e-06, 3.7732e-06, ..., 5.3096e-06, 3.6205e-06,\n 5.5853e-06], device='cuda:0')" | |
| }, | |
| "13": { | |
| "step": "tensor(8764.)", | |
| "exp_avg": "tensor([[ 4.0503e-07, 2.7375e-07, -2.5211e-06, ..., -1.0494e-06,\n 1.5410e-06, -2.2484e-07],\n [-1.7681e-08, 3.4097e-07, -2.0796e-06, ..., -1.1335e-06,\n -2.0673e-06, -8.6576e-07],\n [ 1.6182e-06, -2.7016e-08, 1.4637e-06, ..., -1.7681e-06,\n 1.6668e-06, 4.0200e-08],\n ...,\n [ 6.2015e-07, 3.7422e-07, 2.5837e-07, ..., 6.3971e-07,\n 2.4660e-06, 5.3395e-07],\n [ 2.0843e-06, -7.8787e-07, -4.4302e-06, ..., -1.0452e-07,\n 1.5874e-06, -5.1988e-07],\n [-2.0093e-06, -2.2083e-07, -8.6085e-06, ..., -2.1191e-06,\n -1.6469e-06, -6.1518e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.4983e-11, 2.9884e-11, 3.6494e-11, ..., 3.2233e-11, 2.8253e-11,\n 3.1578e-11],\n [5.4898e-11, 6.1188e-11, 6.1811e-11, ..., 4.9708e-11, 5.0031e-11,\n 5.7662e-11],\n [5.4607e-11, 4.9759e-11, 7.9794e-11, ..., 4.9807e-11, 4.7579e-11,\n 8.3576e-11],\n ...,\n [6.0843e-11, 4.8247e-11, 6.1840e-11, ..., 6.0363e-11, 6.0263e-11,\n 9.0621e-11],\n [5.1453e-11, 5.7305e-11, 6.0144e-11, ..., 5.5547e-11, 4.7824e-11,\n 6.7695e-11],\n [7.3951e-11, 7.5231e-11, 6.0667e-11, ..., 6.2371e-11, 6.0319e-11,\n 9.0164e-11]], device='cuda:0')" | |
| }, | |
| "14": { | |
| "step": "tensor(7512.)", | |
| "exp_avg": "tensor([[ 1.9048e-06, 2.0496e-07, -1.9633e-06, ..., 1.4763e-06,\n -3.4654e-06, 2.3949e-06],\n [-1.0047e-06, 1.1970e-06, 4.7875e-09, ..., 1.8258e-06,\n -3.6695e-08, -2.3304e-08],\n [ 6.4248e-07, -1.3588e-05, -6.3729e-07, ..., -1.3410e-06,\n 4.0935e-06, 1.1278e-06],\n ...,\n [ 1.6923e-07, 3.0294e-06, 7.3120e-07, ..., -1.7978e-06,\n 1.9902e-07, -9.9793e-06],\n [-6.9270e-06, -1.7624e-08, 4.0203e-06, ..., 2.1830e-07,\n -5.6415e-07, 1.2364e-06],\n [-1.7836e-06, -1.1704e-07, 9.7023e-07, ..., -1.2790e-06,\n 2.1659e-06, 1.8867e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[3.6579e-10, 7.5284e-12, 7.0700e-11, ..., 1.1730e-10, 5.4518e-10,\n 8.7805e-11],\n [1.2107e-10, 2.1897e-10, 7.9887e-11, ..., 2.6745e-10, 4.7375e-12,\n 2.1307e-10],\n [1.1585e-10, 3.2036e-09, 6.2928e-11, ..., 1.1376e-10, 4.7436e-10,\n 2.4478e-10],\n ...,\n [1.9680e-11, 7.8861e-10, 3.2198e-10, ..., 5.3343e-10, 2.2183e-11,\n 4.2572e-10],\n [2.6233e-09, 8.8065e-11, 2.8610e-10, ..., 4.0427e-11, 1.2777e-10,\n 2.5922e-11],\n [1.0295e-10, 4.2779e-11, 3.6109e-10, ..., 1.7293e-10, 7.9280e-11,\n 4.7117e-10]], device='cuda:0')" | |
| }, | |
| "15": { | |
| "step": "tensor(7512.)", | |
| "exp_avg": "tensor([ 0.0006, 0.0002, -0.0009, ..., -0.0009, -0.0010, -0.0013],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.5147e-06, 4.5258e-06, 6.0365e-06, ..., 3.5091e-06, 7.4168e-06,\n 4.6976e-06], device='cuda:0')" | |
| }, | |
| "16": { | |
| "step": "tensor(7512.)", | |
| "exp_avg": "tensor([[ 1.3237e-06, -4.1868e-07, 2.1795e-06, ..., 1.1839e-06,\n -1.1295e-06, 6.3024e-07],\n [-2.5105e-06, 5.3425e-07, -2.3553e-06, ..., -1.9468e-06,\n -4.6531e-06, -3.1055e-07],\n [ 1.6713e-06, 1.5696e-07, 3.8628e-06, ..., 1.4365e-06,\n -1.0560e-06, -3.5584e-06],\n ...,\n [ 4.0267e-06, 1.8755e-06, 9.5928e-08, ..., 9.8512e-07,\n 1.7034e-06, 4.2816e-06],\n [-2.5556e-06, -4.3512e-07, -7.4822e-07, ..., 1.5350e-06,\n -3.6387e-06, -4.9596e-07],\n [-1.0612e-06, 1.7309e-06, 9.9800e-07, ..., -3.6121e-07,\n -6.2912e-07, 2.1769e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.0789e-11, 4.4123e-11, 4.6035e-11, ..., 2.0206e-11, 3.0834e-11,\n 3.8941e-11],\n [3.3402e-11, 5.8188e-11, 4.2249e-11, ..., 5.3732e-11, 4.3940e-11,\n 4.2419e-11],\n [5.6982e-11, 4.5994e-11, 6.1296e-11, ..., 6.6524e-11, 4.9431e-11,\n 4.6511e-11],\n ...,\n [5.7894e-11, 7.2095e-11, 5.2603e-11, ..., 4.3395e-11, 7.7582e-11,\n 5.4136e-11],\n [4.1576e-11, 5.7716e-11, 5.9587e-11, ..., 5.8579e-11, 6.1621e-11,\n 5.9843e-11],\n [5.1010e-11, 6.3166e-11, 4.8250e-11, ..., 3.5492e-11, 7.2295e-11,\n 5.9348e-11]], device='cuda:0')" | |
| }, | |
| "17": { | |
| "step": "tensor(6260.)", | |
| "exp_avg": "tensor([[ 6.3019e-07, 8.4683e-07, 2.0280e-07, ..., -3.9916e-07,\n -2.5804e-07, 1.8680e-06],\n [-3.2087e-06, -4.1874e-07, -7.9302e-08, ..., 4.1375e-06,\n 4.1160e-07, -2.5924e-06],\n [ 3.6980e-06, -2.4480e-06, -4.0711e-06, ..., 3.4043e-06,\n 1.5972e-06, 3.2606e-06],\n ...,\n [-5.0528e-08, -2.1556e-06, 3.3564e-06, ..., -3.6791e-07,\n 1.6712e-07, 3.0307e-07],\n [ 1.9198e-07, -1.3557e-06, 1.3584e-06, ..., 7.9242e-07,\n 4.6035e-07, 1.8441e-06],\n [ 1.3207e-06, -8.9052e-07, -2.6311e-06, ..., -1.4624e-06,\n -4.4931e-06, -9.7901e-08]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[5.8148e-11, 6.2179e-11, 4.4763e-11, ..., 8.1257e-10, 6.5449e-11,\n 1.7387e-10],\n [5.6877e-10, 1.1387e-09, 6.1322e-11, ..., 1.8890e-09, 2.2111e-11,\n 3.5966e-10],\n [3.0462e-10, 1.1343e-09, 1.0292e-10, ..., 1.6972e-09, 1.3426e-10,\n 1.7321e-10],\n ...,\n [8.0437e-11, 1.2848e-10, 3.7045e-10, ..., 3.2727e-11, 5.6868e-11,\n 5.4607e-11],\n [3.0404e-10, 1.3487e-10, 7.3900e-11, ..., 4.5239e-11, 5.8117e-11,\n 8.7631e-11],\n [6.7747e-10, 5.7039e-10, 1.3959e-09, ..., 2.8098e-10, 2.0881e-10,\n 1.7794e-10]], device='cuda:0')" | |
| }, | |
| "18": { | |
| "step": "tensor(6260.)", | |
| "exp_avg": "tensor([ 2.0619e-04, -2.0953e-05, -1.7464e-03, ..., 1.0895e-04,\n -6.2601e-05, 2.6825e-04], device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.0329e-06, 8.2879e-06, 5.0801e-06, ..., 3.3066e-06, 5.3262e-06,\n 5.6625e-06], device='cuda:0')" | |
| }, | |
| "19": { | |
| "step": "tensor(6260.)", | |
| "exp_avg": "tensor([[-2.1086e-08, 4.0762e-07, 1.0251e-06, ..., 9.8694e-07,\n 3.3293e-06, 8.1154e-07],\n [ 1.4937e-07, -8.7782e-07, -1.0028e-06, ..., -1.7608e-06,\n -1.4217e-06, -4.0655e-09],\n [-7.0338e-07, 6.1001e-08, 2.1111e-06, ..., 1.3383e-06,\n 3.2359e-06, 8.4152e-06],\n ...,\n [-1.2632e-06, 6.7354e-07, 3.6083e-07, ..., -5.8423e-07,\n -7.5468e-07, 1.2504e-06],\n [ 2.5051e-07, -3.0058e-06, 4.4030e-07, ..., -1.6040e-06,\n -2.5607e-06, -1.4616e-06],\n [ 2.5601e-07, 5.2906e-07, -3.1702e-06, ..., -1.2607e-07,\n 2.9962e-06, 3.4762e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.1617e-11, 3.2573e-11, 2.4863e-11, ..., 2.4352e-11, 2.9161e-11,\n 1.6177e-11],\n [3.8401e-11, 5.7815e-11, 3.8148e-11, ..., 3.0578e-11, 4.7141e-11,\n 3.1313e-11],\n [4.4365e-11, 8.9344e-11, 6.1073e-11, ..., 5.6434e-11, 5.8817e-11,\n 4.3919e-11],\n ...,\n [5.0888e-11, 7.6667e-11, 6.4355e-11, ..., 4.4528e-11, 5.1380e-11,\n 3.9897e-11],\n [3.6534e-11, 8.8003e-11, 6.6215e-11, ..., 4.4633e-11, 6.0796e-11,\n 4.3696e-11],\n [4.1784e-11, 6.5144e-11, 5.4649e-11, ..., 5.4530e-11, 4.4183e-11,\n 4.0323e-11]], device='cuda:0')" | |
| }, | |
| "20": { | |
| "step": "tensor(5008.)", | |
| "exp_avg": "tensor([[-2.0048e-06, -1.4706e-06, 2.7805e-06, ..., 5.7209e-06,\n 3.0937e-06, 3.8294e-06],\n [ 3.1100e-06, -2.1000e-07, -4.9577e-07, ..., 1.5889e-06,\n 6.4337e-06, -2.2196e-06],\n [ 2.5495e-07, 3.5757e-06, 1.7661e-07, ..., 3.9668e-06,\n 9.2233e-08, 7.8831e-07],\n ...,\n [-1.6909e-05, 2.3850e-06, -2.0554e-06, ..., 8.3286e-06,\n -1.0186e-06, -1.8644e-06],\n [ 6.3395e-06, -9.7787e-07, -2.2742e-06, ..., -7.5363e-06,\n 4.3637e-07, -3.2972e-06],\n [ 5.5377e-06, 5.2233e-06, -1.8986e-07, ..., -4.1585e-06,\n 2.7170e-07, 5.2437e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.4887e-10, 2.3144e-09, 3.9113e-10, ..., 2.1599e-09, 1.4411e-10,\n 9.4567e-10],\n [7.5313e-10, 2.9662e-10, 8.4360e-11, ..., 5.8207e-11, 4.4449e-10,\n 3.9437e-10],\n [4.0306e-10, 3.7801e-10, 1.0923e-10, ..., 6.0319e-10, 6.4834e-11,\n 4.1043e-11],\n ...,\n [1.6942e-09, 2.6975e-10, 7.8301e-11, ..., 2.6968e-10, 2.6602e-10,\n 2.7674e-10],\n [5.7131e-10, 1.4413e-10, 2.5212e-10, ..., 7.8205e-10, 1.4395e-10,\n 4.0134e-10],\n [3.3096e-10, 8.2589e-10, 4.3690e-11, ..., 2.2680e-09, 1.0820e-10,\n 4.8958e-10]], device='cuda:0')" | |
| }, | |
| "21": { | |
| "step": "tensor(5008.)", | |
| "exp_avg": "tensor([-1.6399e-04, -1.1485e-03, 1.2898e-03, ..., -1.1177e-03,\n -7.3017e-05, 3.9362e-04], device='cuda:0')", | |
| "exp_avg_sq": "tensor([7.0807e-06, 1.0713e-05, 5.5961e-06, ..., 9.5194e-06, 6.7277e-06,\n 6.5718e-06], device='cuda:0')" | |
| }, | |
| "22": { | |
| "step": "tensor(5008.)", | |
| "exp_avg": "tensor([[ 4.5019e-06, -2.1875e-07, -4.0267e-07, ..., 9.7608e-07,\n -4.3911e-07, -9.5252e-07],\n [-2.6711e-06, -1.4974e-06, -2.9197e-06, ..., -3.9099e-07,\n 1.7802e-07, 2.5120e-06],\n [ 1.4916e-06, -2.0382e-06, 1.6976e-06, ..., 3.4246e-07,\n 2.0549e-06, -3.0434e-06],\n ...,\n [ 6.3813e-08, -4.9036e-07, -1.4810e-06, ..., -5.6770e-06,\n -4.1303e-06, -1.0845e-06],\n [-8.6403e-07, 2.2121e-07, -8.8813e-07, ..., -8.2742e-07,\n 1.5048e-06, 7.4062e-07],\n [ 9.7497e-07, 3.8290e-07, 1.8565e-07, ..., -6.9877e-06,\n 2.7093e-06, 2.5223e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[6.1645e-11, 3.8608e-11, 3.6425e-11, ..., 3.5735e-11, 4.5517e-11,\n 6.8801e-11],\n [6.5077e-11, 5.8707e-11, 5.7128e-11, ..., 6.6685e-11, 4.9969e-11,\n 9.1782e-11],\n [1.3781e-10, 7.9505e-11, 6.7430e-11, ..., 7.3173e-11, 6.3379e-11,\n 1.1423e-10],\n ...,\n [8.6550e-11, 5.8967e-11, 6.7493e-11, ..., 1.2418e-10, 7.5120e-11,\n 7.9235e-11],\n [7.9565e-11, 9.1800e-11, 6.0551e-11, ..., 7.8031e-11, 6.0280e-11,\n 7.4257e-11],\n [6.1798e-11, 6.4527e-11, 7.7270e-11, ..., 5.9252e-11, 7.5657e-11,\n 7.2519e-11]], device='cuda:0')" | |
| }, | |
| "23": { | |
| "step": "tensor(3756.)", | |
| "exp_avg": "tensor([[-6.3595e-07, -4.4322e-06, -5.6657e-06, ..., -1.2967e-05,\n 9.3519e-07, 8.1898e-08],\n [ 9.9535e-06, 7.2941e-06, -2.3732e-06, ..., -2.4254e-06,\n 2.3701e-06, 1.3436e-06],\n [ 5.0576e-06, -1.4622e-06, 6.1296e-08, ..., 1.1814e-06,\n -2.1217e-06, -5.3394e-07],\n ...,\n [ 3.8829e-08, 1.6256e-06, 5.1584e-07, ..., -3.8633e-07,\n 4.9759e-07, 2.4139e-07],\n [-8.0929e-06, -1.6384e-07, -2.3783e-06, ..., 2.1166e-06,\n 5.3096e-07, -1.6760e-06],\n [-6.3199e-06, -6.0744e-06, -1.4808e-06, ..., 1.7258e-06,\n -1.0114e-08, 5.4522e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.2819e-09, 5.7283e-10, 1.8987e-10, ..., 1.3798e-09, 1.3258e-09,\n 2.3369e-10],\n [2.3088e-09, 1.5385e-09, 7.1584e-11, ..., 3.2202e-10, 2.3734e-10,\n 3.2062e-10],\n [7.5190e-10, 3.8474e-09, 9.2571e-11, ..., 1.0295e-09, 1.3936e-10,\n 1.4480e-10],\n ...,\n [8.8327e-10, 2.9406e-10, 2.3873e-10, ..., 7.3744e-11, 1.1347e-10,\n 7.3792e-10],\n [1.8262e-09, 3.1621e-10, 1.6411e-10, ..., 2.1299e-10, 1.1328e-10,\n 2.9924e-10],\n [2.4306e-10, 2.5811e-10, 7.0808e-11, ..., 6.6061e-10, 2.1775e-10,\n 5.7477e-10]], device='cuda:0')" | |
| }, | |
| "24": { | |
| "step": "tensor(3756.)", | |
| "exp_avg": "tensor([ 0.0004, 0.0007, 0.0007, ..., -0.0011, 0.0002, 0.0004],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([8.3869e-06, 8.7267e-06, 1.0933e-05, ..., 1.1978e-05, 8.6893e-06,\n 6.8412e-06], device='cuda:0')" | |
| }, | |
| "25": { | |
| "step": "tensor(3756.)", | |
| "exp_avg": "tensor([[ 1.7762e-06, 7.1575e-07, 2.1035e-07, ..., 1.5922e-06,\n 1.9023e-06, -1.0994e-06],\n [-1.4937e-07, -1.2259e-06, -2.5734e-07, ..., -6.8691e-07,\n 2.5514e-06, -4.2756e-06],\n [ 1.4863e-06, -3.2982e-06, -4.9254e-07, ..., 1.4742e-06,\n 2.9405e-08, -7.3828e-06],\n ...,\n [ 1.0603e-06, -8.6805e-07, 2.9784e-06, ..., -1.7944e-06,\n 1.0168e-06, -7.6501e-07],\n [-2.7603e-06, -6.8997e-07, 7.7679e-07, ..., -4.3055e-07,\n -1.7447e-07, -3.4795e-06],\n [ 1.7761e-06, 4.6724e-06, 3.3050e-06, ..., 1.9734e-06,\n -3.5385e-06, -4.0831e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[5.2072e-11, 4.8198e-11, 6.0200e-11, ..., 5.4607e-11, 4.1525e-11,\n 5.5222e-11],\n [1.0486e-10, 7.9214e-11, 7.2516e-11, ..., 1.0718e-10, 7.1313e-11,\n 8.4669e-11],\n [1.2700e-10, 9.5137e-11, 9.7201e-11, ..., 1.3265e-10, 9.3126e-11,\n 1.0907e-10],\n ...,\n [1.1192e-10, 1.4214e-10, 1.1846e-10, ..., 1.1509e-10, 1.0447e-10,\n 9.0819e-11],\n [1.2580e-10, 1.2638e-10, 1.1187e-10, ..., 1.3113e-10, 9.0700e-11,\n 9.6040e-11],\n [1.0805e-10, 9.1509e-11, 1.2124e-10, ..., 1.0665e-10, 7.9828e-11,\n 1.0059e-10]], device='cuda:0')" | |
| }, | |
| "26": { | |
| "step": "tensor(2504.)", | |
| "exp_avg": "tensor([[ 5.4596e-06, 8.8343e-07, -6.7653e-06, ..., -1.1465e-06,\n -3.6330e-06, -1.2710e-07],\n [ 8.9846e-06, 5.1116e-07, 4.4069e-06, ..., 1.3666e-06,\n -5.9930e-06, 2.6100e-06],\n [-1.0072e-05, -4.9547e-06, -2.5590e-06, ..., -7.9646e-07,\n 4.5969e-07, 3.1333e-06],\n ...,\n [ 1.4135e-06, 9.0012e-07, 7.6890e-07, ..., -1.2531e-06,\n -1.8312e-06, 3.1819e-06],\n [-4.0206e-06, 7.3293e-06, 2.4445e-06, ..., 8.0319e-06,\n 3.3986e-06, -1.1267e-05],\n [ 7.4903e-06, -7.4428e-06, -1.0024e-06, ..., 3.2456e-06,\n -4.7811e-06, 1.2378e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[5.3456e-10, 6.5472e-10, 2.4026e-10, ..., 8.3078e-10, 7.3683e-10,\n 5.8153e-10],\n [1.1806e-09, 5.1644e-10, 2.7715e-10, ..., 4.0262e-10, 6.1515e-10,\n 5.6363e-10],\n [1.6786e-09, 3.0437e-10, 1.3866e-10, ..., 1.6388e-10, 2.6775e-10,\n 2.6987e-10],\n ...,\n [1.0638e-09, 1.7631e-10, 2.6976e-10, ..., 2.5441e-10, 6.9735e-10,\n 1.4919e-10],\n [1.1316e-09, 4.7505e-10, 3.2824e-10, ..., 5.8279e-10, 3.3860e-10,\n 2.3329e-10],\n [1.1610e-09, 5.3798e-10, 1.4324e-10, ..., 2.9321e-10, 4.4957e-10,\n 2.0921e-10]], device='cuda:0')" | |
| }, | |
| "27": { | |
| "step": "tensor(2504.)", | |
| "exp_avg": "tensor([-1.9442e-04, 9.8447e-04, 4.8903e-04, ..., 7.6089e-05,\n 1.3082e-03, 2.1109e-03], device='cuda:0')", | |
| "exp_avg_sq": "tensor([9.6080e-06, 9.3145e-06, 7.9120e-06, ..., 6.6135e-06, 8.9451e-06,\n 7.6912e-06], device='cuda:0')" | |
| }, | |
| "28": { | |
| "step": "tensor(2504.)", | |
| "exp_avg": "tensor([[ 3.0228e-06, 6.7502e-06, 4.4905e-06, ..., 2.7596e-06,\n 2.0397e-06, -1.2951e-06],\n [-8.1769e-07, -5.3249e-07, -1.4128e-06, ..., 3.2264e-06,\n -1.0062e-06, 1.5665e-06],\n [ 3.4709e-06, 2.2590e-06, -2.3121e-06, ..., -5.1713e-06,\n 1.0008e-05, 8.7895e-07],\n ...,\n [ 4.1428e-06, 6.3215e-06, 2.6624e-06, ..., 1.4151e-06,\n -2.8532e-06, 3.9339e-06],\n [-1.5110e-06, -5.4941e-06, 2.6538e-06, ..., -1.1846e-06,\n 3.4043e-06, -6.4283e-06],\n [ 2.8687e-06, 1.2324e-06, 4.1226e-06, ..., -4.2158e-06,\n 1.5495e-06, -3.5180e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.1249e-10, 9.7055e-11, 1.1881e-10, ..., 7.6389e-11, 1.0429e-10,\n 1.0683e-10],\n [2.2343e-10, 1.7838e-10, 1.4235e-10, ..., 1.2899e-10, 1.7503e-10,\n 1.6242e-10],\n [2.1327e-10, 1.7810e-10, 1.7901e-10, ..., 1.2291e-10, 1.6446e-10,\n 1.9718e-10],\n ...,\n [2.1270e-10, 2.1879e-10, 1.7868e-10, ..., 1.2274e-10, 2.0256e-10,\n 2.0010e-10],\n [2.2719e-10, 1.8997e-10, 1.5297e-10, ..., 1.5550e-10, 1.8401e-10,\n 2.5032e-10],\n [2.4548e-10, 1.8822e-10, 1.6542e-10, ..., 1.4661e-10, 1.9475e-10,\n 1.9219e-10]], device='cuda:0')" | |
| }, | |
| "29": { | |
| "step": "tensor(1252.)", | |
| "exp_avg": "tensor([[ 1.7396e-05, 2.8889e-06, 6.3008e-07, ..., 3.5397e-06,\n -7.7587e-07, -1.7636e-06],\n [ 1.5565e-06, -1.0606e-05, -1.8372e-06, ..., 4.0004e-06,\n 2.2710e-06, -9.0266e-06],\n [-4.6712e-06, 7.0731e-07, -5.0852e-07, ..., 4.2067e-06,\n -6.7368e-06, -4.2794e-06],\n ...,\n [ 1.0258e-05, -2.0727e-06, -5.1611e-06, ..., 4.2656e-06,\n 5.2878e-06, -3.7740e-06],\n [-1.1450e-05, 1.1451e-05, 1.9242e-06, ..., -3.3060e-06,\n -5.9104e-07, 4.1646e-06],\n [ 4.3536e-06, 4.0074e-06, 2.4896e-06, ..., 2.7030e-06,\n 1.2243e-05, -3.6140e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[9.7973e-10, 3.9808e-10, 2.2814e-10, ..., 1.0998e-09, 2.3596e-10,\n 3.5226e-10],\n [1.2002e-09, 8.2512e-10, 3.8301e-10, ..., 4.0786e-10, 2.9416e-10,\n 4.1961e-10],\n [5.6497e-10, 5.4139e-10, 3.1238e-10, ..., 4.9484e-10, 7.4444e-10,\n 2.9050e-10],\n ...,\n [7.2456e-10, 6.3203e-10, 2.8997e-10, ..., 3.5307e-10, 1.9483e-10,\n 3.8901e-10],\n [2.0645e-09, 8.0739e-10, 1.9461e-10, ..., 7.7152e-10, 4.7335e-10,\n 4.1782e-10],\n [9.2669e-10, 1.3762e-09, 4.4013e-10, ..., 6.1449e-10, 3.7630e-10,\n 3.2102e-10]], device='cuda:0')" | |
| }, | |
| "30": { | |
| "step": "tensor(1252.)", | |
| "exp_avg": "tensor([ 1.0550e-03, 4.3304e-04, 2.0965e-03, ..., -7.3293e-04,\n -2.0526e-05, -9.9024e-04], device='cuda:0')", | |
| "exp_avg_sq": "tensor([6.9264e-06, 8.2350e-06, 9.0953e-06, ..., 7.1879e-06, 8.1070e-06,\n 9.1265e-06], device='cuda:0')" | |
| }, | |
| "31": { | |
| "step": "tensor(1252.)", | |
| "exp_avg": "tensor([[-1.0186e-06, 4.7824e-07, -2.7244e-07, ..., 6.4243e-07,\n 5.0615e-07, 4.4039e-06],\n [-6.3994e-06, 1.9167e-06, -9.3942e-06, ..., -5.0595e-06,\n -4.8789e-06, 1.1779e-06],\n [ 1.0014e-05, 5.4978e-06, 6.8451e-06, ..., 8.1586e-06,\n -1.7879e-06, 1.2506e-05],\n ...,\n [ 2.2308e-07, -1.2268e-06, -3.3500e-06, ..., 1.3749e-06,\n 3.0473e-06, 2.1178e-06],\n [-1.3359e-06, -4.8953e-06, -3.7229e-06, ..., 4.7893e-06,\n -1.9948e-06, -3.2672e-06],\n [-9.4335e-06, -1.5596e-06, -7.2374e-06, ..., -2.4386e-06,\n -2.5867e-06, -2.9986e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.4699e-10, 1.4953e-10, 1.8075e-10, ..., 1.6474e-10, 1.6583e-10,\n 2.0680e-10],\n [2.4661e-10, 2.5946e-10, 3.3705e-10, ..., 2.4060e-10, 3.0579e-10,\n 3.3672e-10],\n [2.6317e-10, 3.3013e-10, 3.5582e-10, ..., 2.4121e-10, 3.3068e-10,\n 4.3207e-10],\n ...,\n [2.3559e-10, 3.1269e-10, 3.1436e-10, ..., 1.9887e-10, 3.0976e-10,\n 3.3347e-10],\n [2.3840e-10, 3.7776e-10, 3.5535e-10, ..., 2.8536e-10, 2.9344e-10,\n 3.8079e-10],\n [2.4509e-10, 2.8058e-10, 3.6491e-10, ..., 2.1353e-10, 3.3156e-10,\n 4.1660e-10]], device='cuda:0')" | |
| } | |
| }, | |
| "param_groups": [ | |
| { | |
| "lr": 0.001, | |
| "name": "shared", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 0, | |
| 1 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_256", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 2, | |
| 3, | |
| 4 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_512", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 5, | |
| 6, | |
| 7 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_768", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 8, | |
| 9, | |
| 10 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_1024", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 11, | |
| 12, | |
| 13 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_1280", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 14, | |
| 15, | |
| 16 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_1536", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 17, | |
| 18, | |
| 19 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_1792", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 20, | |
| 21, | |
| 22 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_2048", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 23, | |
| 24, | |
| 25 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_2304", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 26, | |
| 27, | |
| 28 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001, | |
| "name": "scale_2560", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.001, | |
| "params": [ | |
| 29, | |
| 30, | |
| 31 | |
| ] | |
| }, | |
| { | |
| "lr": 0.0005, | |
| "name": "fusion", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.0005, | |
| "params": [ | |
| 32, | |
| 33, | |
| 34, | |
| 35, | |
| 36, | |
| 37, | |
| 38, | |
| 39, | |
| 40, | |
| 41, | |
| 42, | |
| 43, | |
| 44, | |
| 45, | |
| 46, | |
| 47, | |
| 48, | |
| 49, | |
| 50, | |
| 51, | |
| 52, | |
| 53, | |
| 54, | |
| 55, | |
| 56, | |
| 57, | |
| 58, | |
| 59, | |
| 60, | |
| 61 | |
| ] | |
| } | |
| ] | |
| }, | |
| "scheduler_state_dict": { | |
| "T_0": 10, | |
| "T_i": 20, | |
| "T_mult": 2, | |
| "eta_min": 1e-06, | |
| "T_cur": 0, | |
| "base_lrs": [ | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.0005 | |
| ], | |
| "last_epoch": 10, | |
| "_step_count": 0, | |
| "_is_initial": false, | |
| "_get_lr_called_within_step": false, | |
| "_last_lr": [ | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.001, | |
| 0.0005 | |
| ] | |
| }, | |
| "metrics": { | |
| "final_val_acc": 83.04 | |
| }, | |
| "train_config": { | |
| "name": "david_training", | |
| "run_id": "20251012_060013", | |
| "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", | |
| "model_variant": "clip_vit_l14", | |
| "num_classes": 1000, | |
| "preset": "clip_vit_l14_deep", | |
| "custom_config_path": null, | |
| "num_classes_override": null, | |
| "use_belly_override": null, | |
| "belly_expand_override": null, | |
| "progressive_training_override": true, | |
| "scale_warmup_epochs_override": { | |
| "256": 0, | |
| "512": 1, | |
| "768": 2, | |
| "1024": 3, | |
| "1280": 4, | |
| "1536": 5, | |
| "1792": 6, | |
| "2048": 7, | |
| "2304": 8, | |
| "2560": 9 | |
| }, | |
| "num_epochs": 10, | |
| "batch_size": 1024, | |
| "learning_rate": 0.001, | |
| "weight_decay": 1e-05, | |
| "warmup_epochs": 0, | |
| "use_rose_loss": true, | |
| "rose_initial_weight": 0.1, | |
| "rose_max_weight": 0.5, | |
| "rose_weight_schedule": "adaptive", | |
| "use_cayley_loss": false, | |
| "cayley_weight": 0.001, | |
| "scale_loss_balance": null, | |
| "use_mixed_precision": false, | |
| "gradient_clip": 5.0, | |
| "scheduler_type": "cosine_restarts", | |
| "min_lr": 1e-06, | |
| "freeze_strategy": "never", | |
| "freeze_threshold": 90.0, | |
| "unfreeze_on_plateau": true, | |
| "patience": 10, | |
| "track_gradients": true, | |
| "gradient_scale_threshold": 1e-07, | |
| "gradient_scale_multiplier": 5.0, | |
| "log_interval": 50, | |
| "val_interval": 1, | |
| "save_interval": 5, | |
| "log_fusion_weights": true, | |
| "log_loss_components": true, | |
| "save_format": "safetensors", | |
| "hf_repo": "AbstractPhil/gated-david", | |
| "upload_to_hub": true, | |
| "base_dir": "./david_training", | |
| "num_workers": 10, | |
| "pin_memory": true, | |
| "prefetch_factor": 4, | |
| "persistent_workers": true | |
| } | |
| } |