david-shared-space / weights /David-decoupled-deep_efficiency /20251012_221046 /best_model_acc66.14_metadata.json
| { | |
| "epoch": 5, | |
| "optimizer_state_dict": { | |
| "state": { | |
| "0": { | |
| "step": "tensor(22524.)", | |
| "exp_avg": "tensor([[-8.6509e-06, 1.4315e-05, 1.7819e-05, ..., -6.3765e-06,\n -4.2225e-06, 1.0202e-05],\n [ 2.0678e-05, 5.9889e-05, 7.4268e-06, ..., 3.5957e-05,\n 2.7878e-05, 1.9444e-05],\n [ 2.6985e-05, -8.9259e-06, -2.3418e-06, ..., 2.0111e-06,\n 1.3668e-05, -1.5354e-05],\n ...,\n [ 2.0144e-05, -6.5389e-05, -1.2010e-05, ..., 3.6561e-05,\n -2.0349e-06, 2.9103e-05],\n [-1.3964e-05, -8.7595e-06, 1.6698e-05, ..., -1.3900e-05,\n 7.7694e-06, 1.6151e-05],\n [ 1.2662e-05, 4.2877e-05, -2.1730e-05, ..., 5.2258e-06,\n 8.9826e-07, 2.5583e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[6.5850e-09, 5.0963e-08, 1.5801e-08, ..., 4.8489e-09, 2.6209e-09,\n 3.4168e-09],\n [1.9692e-08, 8.5263e-08, 1.9033e-08, ..., 3.2395e-08, 9.1604e-09,\n 1.0283e-08],\n [4.2930e-08, 2.7212e-08, 5.8191e-09, ..., 1.1222e-08, 6.4048e-09,\n 7.3821e-09],\n ...,\n [1.2315e-08, 4.4164e-08, 2.1333e-08, ..., 1.1370e-08, 3.9813e-09,\n 1.4617e-08],\n [3.3301e-08, 4.0149e-08, 1.6848e-08, ..., 3.0424e-08, 8.9654e-09,\n 9.5108e-09],\n [4.9815e-09, 2.8874e-08, 3.1220e-08, ..., 3.7437e-09, 1.8111e-09,\n 4.8346e-09]], device='cuda:0')" | |
| }, | |
| "1": { | |
| "step": "tensor(22524.)", | |
| "exp_avg": "tensor([-4.7902e-04, 1.2943e-03, 3.8765e-04, -1.3115e-03, -4.1507e-04,\n 2.2538e-04, 7.5913e-04, 2.0305e-03, -6.0261e-04, 5.4091e-04,\n -5.1329e-04, 5.6052e-45, -3.9789e-04, 4.8129e-04, 1.4700e-03,\n 1.2565e-03, -6.3375e-04, -6.6590e-04, -2.5548e-04, 1.1545e-03,\n 1.4219e-04, 2.4551e-04, -1.1627e-03, -7.2287e-05, -5.0733e-04,\n 4.1479e-04, 4.6079e-05, -1.2255e-03, 1.0311e-03, 8.5369e-04,\n 7.7395e-04, -1.8197e-03, -2.7467e-03, -1.2016e-04, 8.6066e-04,\n 4.6543e-04, -3.6773e-04, -3.4520e-04, 1.7480e-03, 8.3374e-04,\n 1.1204e-03, -5.7888e-04, -8.9921e-04, 5.7656e-04, 1.4032e-03,\n -4.4105e-04, 7.4108e-04, 7.8547e-04, -1.0756e-03, -1.7675e-04,\n -1.0195e-03, -2.4514e-04, 1.8437e-04, -3.4294e-04, 5.1221e-04,\n -5.6808e-04, 5.4712e-04, -1.4776e-03, 5.6456e-05, -5.0071e-04,\n -2.0029e-04, 2.3126e-03, -2.5012e-04, 3.6695e-05, -6.0432e-05,\n 1.9479e-04, -6.3050e-05, 5.6052e-45, -3.7091e-05, -3.4872e-04,\n 1.7411e-04, -9.4025e-04, 1.1695e-03, 5.6861e-04, -2.4160e-04,\n 5.9464e-04, -9.5776e-04, -9.5064e-04, -2.4585e-04, -1.9847e-03,\n 1.2038e-04, 5.0442e-04, -7.8487e-04, 4.3050e-04, 2.9113e-03,\n -6.9185e-04, -4.0704e-04, -1.6041e-03, 8.8090e-04, 5.6052e-45,\n -1.3426e-03, -1.5176e-03, -2.6245e-04, 6.4159e-05, 4.2912e-04,\n -1.5245e-03, 7.7238e-04, -8.5658e-04, 1.4268e-04, 6.3112e-04,\n -5.8964e-04, 1.5381e-03, -1.1738e-03, -8.0462e-04, -1.9623e-03,\n -1.3333e-03, -2.2537e-03, 9.3422e-04, 1.3077e-03, -2.3486e-04,\n -9.4483e-06, 1.7415e-03, -1.0077e-04, -1.4252e-03, 2.7778e-04,\n 6.9536e-04, 3.4755e-04, 5.3184e-04, 3.5007e-04, -6.2910e-04,\n 4.8062e-04, 2.2584e-04, 3.9012e-04, -5.0704e-04, -5.1204e-04,\n 4.3255e-04, 7.0598e-04, -7.5457e-04, 8.5115e-04, -8.8282e-04,\n 8.9724e-04, -7.5740e-04, 6.8768e-04, -1.1882e-03, 9.7841e-04,\n -2.1960e-04, 2.0471e-03, -2.9127e-04, 2.5747e-04, -1.0464e-03,\n -1.1357e-03, 1.8251e-04, 1.3784e-03, -2.6689e-04, -6.8215e-04,\n -3.2360e-04, 1.4855e-03, -4.2901e-04, 6.5791e-04, 2.9026e-04,\n -6.9223e-04, 7.0697e-05, 4.1292e-04, 1.8217e-04, -6.7467e-04,\n -2.6649e-04, 5.0052e-04, -6.1994e-04, 9.5421e-04, -2.5146e-04,\n -4.5031e-04, -1.5121e-03, 3.2586e-05, 3.1753e-04, 6.8736e-04,\n 9.0753e-05, 2.4258e-05, 5.6464e-04, -3.7818e-04, -1.0727e-03,\n 8.3873e-04, 5.1768e-04, -1.9438e-04, 1.1386e-03, 6.9781e-06,\n -2.0084e-04, -1.8183e-04, 1.5689e-03, 5.4110e-04, 1.3751e-03,\n -6.0531e-05, 4.3466e-04, -3.7863e-04, -4.0120e-04, 5.6112e-04,\n -2.3013e-04, -1.7111e-03, 6.2577e-05, -3.7015e-04, 1.3642e-03,\n -7.5253e-04, 7.1803e-04, 1.3151e-04, 2.7399e-04, -1.0643e-04,\n 1.0911e-04, -4.4625e-04, 4.5505e-04, 8.8642e-04, 8.7511e-04,\n -1.8867e-04, -3.0135e-04, 4.8795e-04, -1.2674e-03, -2.1905e-04,\n -1.3967e-04, 2.5057e-03, 5.6052e-45, -5.4697e-05, -3.6409e-03,\n 6.6305e-04, -6.8425e-04, 2.0704e-04, 3.6627e-04, 3.6842e-04,\n 2.4542e-05, -3.6124e-04, -4.7427e-04, -5.6677e-04, 2.8851e-04,\n -2.0747e-04, -2.7737e-04, -1.7162e-04, 4.6788e-04, 1.0692e-03,\n 1.2748e-03, -3.9702e-04, -3.8328e-04, 9.3525e-04, -1.1696e-04,\n -2.0235e-04, 2.4746e-04, 4.3073e-04, 6.1824e-04, -6.6287e-06,\n -5.8560e-04, 1.1792e-03, -8.1596e-04, 5.3704e-04, -4.1328e-04,\n -2.4250e-03, -2.1968e-04, 5.7797e-04, 2.8291e-04, 8.4739e-04,\n 4.1948e-04, 1.6463e-03, 4.3554e-04, 2.4244e-04, 6.7497e-04,\n -7.9792e-04, 8.5332e-05, 1.1794e-03, 1.2124e-04, -6.8492e-05,\n -1.0865e-03, -1.0116e-04, 2.6218e-04, -6.6596e-04, 7.0522e-04,\n -7.3213e-04, -6.5532e-04, 7.9657e-04, -5.6573e-04, 7.1365e-04,\n -9.2735e-04, 7.6646e-04, -2.1137e-04, 6.7723e-04, -9.0416e-04,\n -5.6598e-04, 1.5523e-03, 2.9115e-04, -1.1477e-03, -1.4831e-03,\n -6.8547e-04, -8.9196e-04, 7.7568e-04, -8.0964e-04, -3.6915e-04,\n -7.8520e-04, -1.1471e-03, -5.5946e-04, 5.0019e-04, -3.8736e-04,\n 4.6060e-04, 2.5198e-03, 3.3492e-04, 2.9495e-04, 1.1661e-03,\n 8.9269e-05, 6.6174e-04, 1.6825e-04, -2.6949e-04, -1.8090e-04,\n 7.0622e-06, -4.8242e-04, 1.6308e-04, -5.2388e-04, -9.0862e-05,\n -1.3015e-03, 9.8372e-04, 3.7699e-04, 2.1572e-04, -1.4615e-03,\n 8.1436e-04, 3.4181e-04, -1.3247e-03, -1.1078e-05, 2.7469e-04,\n -5.5602e-04, -6.4787e-05, 2.2316e-04, -8.7062e-04, 8.9595e-04,\n 9.9387e-04, -3.6264e-03, 1.6302e-04, 9.1054e-04, 3.8473e-03,\n 1.2588e-03, -3.5592e-04, 1.9358e-04, -4.8570e-04, 4.0183e-04,\n 4.6761e-04, 7.3203e-04, 1.5505e-03, 1.1792e-03, 1.4632e-03,\n 4.7079e-04, -3.2892e-04, -1.4679e-04, 2.4864e-04, 6.0998e-04,\n -2.6093e-04, 1.9975e-03, -5.2780e-04, -3.5323e-04, 1.2101e-03,\n -4.1488e-05, -8.1641e-04, 3.0619e-04, 9.9673e-04, -9.2556e-04,\n -8.5753e-05, -9.0175e-04, -6.0418e-04, -9.2282e-04, 1.5459e-03,\n -5.2211e-04, 4.8508e-04, 5.5216e-05, -3.8463e-04, -1.5522e-03,\n -4.0794e-04, -2.3967e-03, 5.9443e-04, -2.0100e-03, -1.0725e-03,\n -5.9909e-04, 1.5566e-03, 5.4841e-04, -1.4437e-03, 9.4991e-04,\n 5.3963e-04, 1.1779e-05, -2.1925e-04, -5.1585e-04, 4.2467e-04,\n -1.1262e-03, -9.6048e-04, -3.5033e-04, -9.7954e-04, -3.4724e-04,\n -3.5882e-04, 8.7680e-04, 2.3914e-04, -1.5372e-03, -1.2846e-03,\n -3.2208e-04, 6.7397e-04, -3.2628e-04, -2.5471e-04, 7.0052e-05,\n -9.6946e-04, -5.2720e-04, -1.0390e-03, 3.1722e-04, -4.1668e-04,\n -9.3798e-05, -1.4807e-03, -6.9398e-04, -9.3855e-04, -1.8308e-03,\n 1.2780e-03, 2.7221e-04, 5.0826e-04, -1.6393e-04, 6.2440e-05,\n -5.8101e-04, 5.6126e-04, -2.5655e-03, -1.2927e-03, -5.3095e-04,\n -2.6307e-03, 7.9990e-04, -1.2779e-03, -1.1657e-03, 8.3550e-05,\n 2.1737e-04, 3.8525e-04, 1.2673e-03, 5.7553e-04, 2.0100e-04,\n -1.3547e-03, -3.0961e-04, 6.8611e-04, 3.1475e-04, -6.2740e-05,\n 2.1930e-04, 8.3096e-04, 1.9103e-04, -1.1484e-04, 6.2800e-04,\n 1.1945e-04, -8.3465e-04, 1.1941e-04, -5.4514e-04, 3.2466e-04,\n 7.0409e-04, 2.0101e-03, 5.6052e-45, -7.9048e-04, -1.4154e-04,\n -3.9189e-04, 2.9441e-04, -1.0515e-03, -3.8168e-04, 9.5836e-04,\n 2.4882e-04, -8.5945e-05, -9.3926e-04, 1.1226e-03, 7.7664e-04,\n -1.7744e-03, -1.8769e-03, 3.5597e-05, 5.9827e-04, -2.3864e-04,\n 2.0068e-04, 2.2694e-04, -8.9781e-05, -2.1318e-04, -1.0491e-04,\n 4.6384e-04, -6.1889e-04, 1.5499e-03, 5.5488e-04, 9.0216e-04,\n 5.6052e-45, 1.6907e-04, -1.0542e-03, 6.8936e-04, -7.9909e-04,\n -1.2688e-03, -2.2392e-03, -5.8348e-04, 1.1690e-04, -5.2274e-04,\n -9.9879e-04, 7.5928e-04, -8.0799e-05, -8.4941e-04, 2.1589e-04,\n -2.3785e-03, -6.0274e-04, -1.0683e-03, -1.4757e-03, 9.4299e-04,\n 1.8436e-03, 4.9271e-04, -8.9028e-04, -7.5754e-04, -1.0078e-03,\n 9.9474e-05, 6.0844e-04, -2.8729e-04, -8.8998e-05, 6.0926e-04,\n 6.5180e-04, 8.0907e-04, -1.2635e-04, 3.3929e-04, 7.6076e-04,\n -5.4685e-04, -1.0856e-04, 9.1429e-06, 4.0691e-04, -1.7218e-03,\n -7.8494e-04, 4.1160e-04, 5.4589e-04, 1.5147e-03, -2.6253e-04,\n 2.3065e-04, 3.5583e-04, -1.0285e-03, -5.5397e-04, -4.9730e-04,\n 1.2192e-03, -1.0513e-04, -1.5843e-03, 5.9282e-04, -3.0626e-04,\n 6.1722e-04, 1.8160e-04, 3.1551e-04, -1.7087e-04, -9.0489e-04,\n 1.8807e-03, -9.7246e-04, -8.3955e-04, -5.3498e-05, 1.1800e-04,\n 5.4144e-04, 3.8296e-04, 5.0246e-04, -1.1214e-03, 4.1081e-05,\n -1.2340e-04, -4.7785e-05, -6.4439e-04, -2.1082e-03, -8.1617e-04,\n 1.8629e-03, 1.4622e-03, -7.3303e-04, -1.0139e-03, 1.4167e-03,\n 1.9127e-03, 2.3051e-03, 7.9306e-04, -6.1075e-05, 4.2339e-04,\n -2.3520e-03, -1.6010e-05, -1.7688e-04, -1.1749e-03, 1.5803e-04,\n 8.3028e-05, 1.9208e-04, -6.7542e-05, 4.1920e-05, 5.6367e-04,\n 5.6793e-04, 1.4860e-03, -2.6938e-05, -7.2006e-04, 5.7836e-04,\n -6.9443e-05, -6.1467e-04, 5.2861e-04, -1.9001e-03, 5.8810e-04,\n -1.0543e-03, 1.0086e-03, -8.1662e-04, -2.4007e-04, -7.5105e-04,\n 6.4235e-05, 3.7598e-04, -1.1683e-03, -6.9048e-05, 8.4846e-04,\n -3.5674e-04, 1.4217e-03, -9.5682e-05, -1.1723e-03, 2.4853e-04,\n 1.9329e-04, 1.4067e-04, 2.5128e-04, -1.5640e-03, 1.2988e-04,\n 1.3712e-03, 6.6985e-04, 5.3442e-04, 1.7928e-03, 1.1577e-03,\n 8.2740e-05, 2.7683e-04, 9.3657e-04, -3.9372e-04, -2.5532e-05,\n 1.3204e-04, -4.0164e-04, -7.1305e-05, 8.4184e-07, 1.6085e-04,\n 1.2489e-03, -1.7079e-05, -3.3681e-04, -6.9054e-04, -1.2794e-03,\n 9.3389e-04, -4.6663e-04, -1.1079e-03, -1.3854e-03, -1.1441e-04,\n 2.0744e-04, 7.0822e-05, 1.4165e-03, 5.3848e-04, 6.7991e-04,\n 2.9557e-04, -2.2325e-04, 1.2343e-04, 4.4152e-04, 4.2183e-04,\n -8.4073e-04, 1.0957e-03, -1.1252e-03, 1.3218e-03, 4.9169e-04,\n -2.0541e-04, 5.3188e-05, -4.1109e-04, 8.5805e-04, -9.9477e-04,\n 1.9325e-04, 2.5531e-04, 1.3708e-03, -8.3464e-04, -2.1673e-04,\n -3.5631e-04, 6.1267e-04, 5.9986e-04, -3.0903e-04, 1.8987e-04],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.6767e-06, 1.6528e-05, 1.0794e-05, 1.1140e-05, 4.1509e-06, 2.8383e-05,\n 1.2104e-05, 1.8470e-05, 1.2783e-05, 5.0832e-06, 1.5150e-05, 6.8520e-16,\n 2.0483e-05, 6.9041e-06, 2.1428e-05, 6.1626e-06, 1.4822e-05, 1.4114e-05,\n 2.4964e-05, 1.6722e-05, 9.3715e-06, 4.9216e-06, 1.0887e-05, 1.3885e-05,\n 1.6779e-05, 4.2594e-06, 1.4291e-05, 6.8912e-06, 1.8889e-05, 1.1227e-05,\n 1.0050e-05, 2.0557e-05, 1.4487e-05, 6.5328e-06, 1.0779e-05, 8.7164e-06,\n 1.2474e-05, 5.8475e-06, 1.9493e-05, 1.3952e-05, 1.2391e-05, 7.3194e-06,\n 1.0699e-05, 4.4522e-06, 2.2552e-05, 2.9074e-05, 1.4350e-05, 5.6969e-06,\n 7.8356e-06, 1.4140e-05, 1.6632e-05, 9.1006e-06, 4.3323e-06, 2.1218e-05,\n 1.8611e-05, 3.1139e-05, 9.3026e-06, 1.2135e-05, 7.0125e-06, 8.4515e-06,\n 8.6665e-06, 2.0092e-05, 7.2080e-06, 2.3250e-06, 1.5025e-05, 2.0966e-05,\n 5.1531e-06, 2.9001e-16, 1.0974e-05, 1.7447e-05, 6.3264e-06, 1.3577e-05,\n 2.4672e-05, 7.6615e-06, 9.9958e-06, 8.9400e-06, 2.8359e-06, 1.2857e-05,\n 1.2635e-05, 9.3112e-06, 7.2826e-06, 1.7186e-05, 3.0259e-05, 1.6307e-05,\n 1.8302e-05, 8.8068e-06, 1.5084e-05, 1.5439e-05, 2.2071e-05, 4.3743e-17,\n 1.3127e-05, 1.4746e-05, 1.5058e-05, 8.9002e-06, 1.5746e-05, 9.1525e-06,\n 8.3895e-06, 1.5310e-05, 8.8368e-06, 1.3602e-05, 7.6442e-06, 2.3014e-05,\n 4.9923e-05, 7.8114e-06, 6.2415e-06, 1.1076e-05, 1.0341e-05, 1.1874e-05,\n 7.4763e-06, 1.2041e-05, 8.2443e-06, 1.5174e-05, 1.2597e-05, 5.8154e-06,\n 4.1384e-06, 1.8159e-05, 3.6818e-06, 6.5049e-06, 1.5087e-05, 4.0357e-06,\n 1.1716e-05, 1.0535e-05, 3.7123e-06, 5.2108e-06, 8.2770e-06, 1.4251e-05,\n 1.9437e-05, 1.6591e-05, 1.3263e-05, 1.3848e-05, 6.8797e-06, 9.0862e-06,\n 3.4421e-06, 1.1448e-05, 1.1308e-05, 4.5663e-06, 1.4264e-05, 6.8762e-06,\n 1.4889e-05, 1.6745e-05, 1.5468e-05, 3.5001e-06, 6.1055e-06, 1.0604e-05,\n 5.7990e-06, 9.8751e-06, 9.0821e-06, 6.0395e-06, 6.3517e-06, 1.6565e-05,\n 3.3111e-05, 5.3459e-06, 3.5059e-06, 4.3457e-06, 1.4284e-05, 8.7724e-06,\n 8.6308e-06, 3.9970e-06, 1.5762e-05, 3.3928e-05, 9.9485e-06, 1.1589e-05,\n 4.0277e-06, 6.0445e-06, 1.3573e-05, 1.3933e-05, 3.1760e-06, 1.1491e-05,\n 8.4940e-06, 9.0985e-06, 1.0739e-05, 4.8505e-06, 1.4046e-05, 6.7646e-06,\n 2.0259e-05, 1.5564e-05, 3.9291e-06, 2.0320e-05, 1.0350e-05, 9.1194e-06,\n 1.1677e-05, 2.0942e-05, 1.3937e-05, 1.5230e-05, 1.0317e-05, 1.7099e-05,\n 1.7625e-05, 4.6591e-06, 7.0066e-06, 1.4674e-05, 1.8538e-05, 6.1245e-06,\n 4.1978e-06, 8.3382e-06, 1.0991e-05, 1.6398e-05, 3.0191e-05, 1.0231e-05,\n 1.3168e-05, 1.9108e-05, 7.9940e-06, 9.1785e-06, 1.4809e-05, 1.3283e-05,\n 2.3365e-05, 5.3736e-06, 1.4960e-05, 8.0333e-16, 1.0682e-05, 1.3537e-05,\n 8.2758e-06, 1.5158e-05, 1.5257e-05, 4.7817e-06, 6.7812e-06, 1.7703e-05,\n 8.0204e-06, 2.3462e-06, 1.0417e-05, 4.5896e-06, 7.0083e-06, 9.9476e-06,\n 4.0513e-06, 7.9999e-06, 1.4163e-05, 6.9775e-06, 3.8204e-06, 1.2765e-05,\n 1.2345e-05, 5.7900e-06, 2.5943e-06, 8.4414e-06, 1.2164e-05, 8.8961e-06,\n 5.3360e-06, 1.3887e-05, 1.5211e-05, 7.6178e-06, 6.8909e-06, 1.6442e-05,\n 1.5346e-05, 8.8686e-06, 1.4501e-05, 1.4585e-05, 1.0783e-05, 8.4106e-06,\n 1.4196e-05, 8.5893e-06, 3.8075e-06, 2.0521e-05, 2.1880e-05, 1.3570e-05,\n 1.1752e-05, 1.3728e-05, 6.8586e-06, 2.4599e-05, 1.5419e-05, 9.3222e-06,\n 6.1331e-06, 4.3916e-06, 1.3806e-05, 8.5546e-06, 4.1456e-05, 4.5743e-06,\n 1.0084e-05, 2.3065e-05, 1.8982e-05, 9.2811e-06, 7.6503e-06, 4.2722e-06,\n 1.6299e-05, 8.2718e-06, 1.9653e-05, 1.7952e-05, 2.8516e-05, 4.4306e-06,\n 7.9708e-06, 1.2775e-05, 4.3587e-06, 1.0271e-05, 1.8136e-05, 6.3731e-06,\n 1.3811e-05, 1.0910e-05, 1.3164e-05, 6.8872e-06, 8.1055e-06, 7.2992e-06,\n 3.9983e-06, 1.2089e-05, 4.4398e-06, 1.3714e-05, 3.7936e-06, 9.0386e-06,\n 8.6107e-06, 3.5278e-06, 1.1870e-05, 3.4522e-05, 1.3635e-05, 5.5816e-06,\n 1.9624e-05, 1.5085e-05, 8.9578e-06, 1.2206e-05, 8.5508e-06, 5.9190e-06,\n 5.2361e-06, 1.4307e-05, 1.4272e-05, 2.6902e-05, 5.5074e-06, 2.5968e-05,\n 7.4086e-06, 1.2807e-05, 1.3684e-05, 1.8261e-05, 1.6504e-05, 1.0348e-05,\n 1.2037e-05, 3.0778e-05, 2.4134e-05, 1.2024e-05, 7.9634e-06, 6.8098e-06,\n 9.6573e-06, 4.1516e-06, 2.2433e-05, 1.7015e-05, 9.9350e-06, 1.9387e-05,\n 1.2294e-05, 7.4499e-06, 9.3659e-06, 1.2923e-05, 1.8923e-05, 1.8292e-05,\n 2.0948e-05, 1.5334e-05, 1.9118e-05, 2.4341e-05, 1.7992e-05, 4.8579e-06,\n 1.6159e-05, 1.2738e-05, 1.9203e-05, 1.4965e-05, 2.1682e-05, 3.4071e-05,\n 2.8952e-06, 8.8746e-06, 1.5773e-05, 1.6655e-05, 1.3895e-05, 1.3025e-05,\n 5.2214e-05, 1.2669e-05, 4.1866e-05, 6.0847e-06, 1.5166e-05, 1.9589e-05,\n 9.5149e-06, 1.1650e-05, 8.3154e-06, 4.0169e-05, 1.0931e-05, 1.2072e-05,\n 3.0029e-06, 6.2362e-06, 8.5054e-06, 2.0589e-05, 2.4902e-05, 1.5773e-05,\n 6.5409e-06, 9.9724e-06, 5.9191e-06, 1.6257e-05, 1.1881e-05, 4.4288e-06,\n 1.3176e-05, 4.0353e-05, 1.0964e-05, 1.1498e-05, 1.0773e-05, 6.1223e-06,\n 6.8575e-06, 1.1621e-05, 1.5363e-05, 1.0222e-05, 1.0864e-05, 2.0054e-06,\n 6.9499e-06, 1.2780e-05, 8.9564e-06, 6.2457e-06, 8.8115e-06, 1.6528e-05,\n 5.3420e-06, 1.1415e-05, 3.9167e-06, 1.0053e-05, 2.2807e-05, 2.5587e-05,\n 1.2946e-05, 1.4647e-05, 1.1142e-05, 1.6622e-05, 3.2545e-05, 1.8671e-05,\n 1.0088e-05, 8.9694e-06, 1.6505e-05, 7.2239e-06, 1.3323e-05, 9.5571e-06,\n 3.4788e-06, 9.5469e-06, 1.8961e-05, 1.3772e-05, 3.3244e-06, 9.6223e-06,\n 2.0105e-05, 1.0234e-05, 1.0126e-05, 1.1204e-05, 3.1499e-06, 1.2399e-05,\n 1.8639e-05, 1.7403e-05, 1.3560e-05, 4.7670e-06, 1.0715e-05, 1.0637e-05,\n 6.6336e-16, 9.3421e-06, 4.8423e-06, 2.1540e-05, 1.5031e-05, 2.2374e-05,\n 1.4916e-05, 4.0317e-05, 7.1067e-06, 7.5261e-06, 1.5338e-05, 1.9317e-05,\n 1.1614e-05, 1.5365e-05, 1.3069e-05, 1.8785e-05, 1.3499e-05, 1.0522e-05,\n 8.5364e-06, 1.5496e-05, 8.3840e-06, 2.1526e-05, 1.2325e-05, 1.6292e-05,\n 9.5968e-06, 1.0082e-05, 9.7543e-06, 5.1364e-06, 2.2738e-16, 1.1697e-05,\n 6.3120e-06, 5.4001e-06, 1.4628e-05, 1.6258e-05, 2.0220e-05, 1.5880e-05,\n 4.3253e-06, 8.8865e-06, 9.0071e-06, 1.2237e-05, 3.8585e-06, 1.2725e-05,\n 7.9443e-06, 3.5481e-05, 1.2050e-05, 1.4226e-05, 1.4911e-05, 1.5727e-05,\n 1.9883e-05, 1.2791e-05, 7.9024e-06, 2.2474e-05, 1.8020e-05, 7.8447e-06,\n 1.4376e-05, 6.3294e-06, 1.2383e-05, 1.9577e-05, 1.6966e-05, 9.5428e-06,\n 1.1641e-05, 1.5072e-05, 1.0872e-05, 1.4192e-05, 8.4525e-06, 1.4292e-05,\n 3.2911e-06, 2.9145e-05, 1.0012e-05, 1.1183e-05, 5.5619e-06, 2.7209e-05,\n 1.3038e-05, 2.0425e-05, 1.5636e-05, 1.0907e-05, 7.0734e-06, 6.8893e-06,\n 1.9514e-05, 1.1137e-05, 1.5193e-05, 1.1171e-05, 3.6924e-05, 1.6003e-05,\n 1.2508e-05, 1.2724e-05, 1.6245e-05, 5.9375e-06, 2.5058e-05, 1.7718e-05,\n 1.7504e-05, 3.0778e-05, 2.0465e-05, 5.8754e-06, 1.7310e-05, 6.2988e-06,\n 1.5245e-05, 1.5622e-06, 1.8960e-05, 1.6622e-05, 6.5572e-06, 6.8325e-06,\n 1.1101e-05, 2.1969e-05, 1.9704e-05, 1.8398e-05, 4.6926e-05, 3.1353e-05,\n 1.4957e-05, 1.4468e-05, 9.7816e-06, 1.6592e-05, 8.8114e-06, 4.3858e-05,\n 3.2096e-06, 7.1258e-06, 8.3968e-06, 1.0646e-05, 5.5195e-06, 1.8033e-05,\n 4.8645e-06, 1.5098e-05, 2.2369e-05, 8.0387e-06, 9.0622e-06, 3.0362e-05,\n 2.1088e-05, 1.9809e-05, 6.9650e-06, 2.0328e-05, 1.3262e-05, 3.1453e-05,\n 3.0931e-06, 9.3037e-06, 1.1330e-05, 1.5642e-05, 5.8846e-06, 5.2859e-06,\n 7.0920e-06, 8.1546e-06, 2.1747e-05, 1.0250e-05, 1.3924e-05, 1.3442e-05,\n 1.0632e-05, 5.6320e-06, 1.0203e-05, 9.1722e-06, 4.6934e-06, 4.9000e-06,\n 8.0601e-06, 1.6676e-05, 1.1803e-05, 1.9953e-05, 5.3813e-06, 1.4848e-05,\n 2.3791e-05, 2.7906e-05, 6.4938e-06, 5.2782e-06, 1.0871e-05, 9.1961e-06,\n 1.4097e-05, 1.5956e-05, 1.3502e-05, 1.6420e-05, 5.3347e-06, 9.9802e-06,\n 1.3116e-05, 1.5667e-05, 9.5766e-06, 5.3143e-06, 4.6929e-06, 8.4527e-06,\n 6.7724e-06, 4.5652e-05, 1.1804e-05, 1.1844e-05, 1.4785e-05, 8.2764e-06,\n 7.9880e-06, 2.8193e-05, 1.4949e-05, 9.3526e-06, 4.8368e-06, 4.1956e-06,\n 6.4077e-06, 8.7678e-06, 5.7180e-06, 1.5137e-05, 2.5474e-05, 1.6246e-05,\n 1.4753e-05, 1.1158e-05, 1.9502e-05, 1.2867e-05, 5.0250e-06, 6.9415e-06,\n 1.1949e-05, 1.0559e-05, 1.4843e-05, 1.4635e-05, 1.0129e-05, 9.3061e-06,\n 1.3085e-05, 7.5861e-06, 1.5700e-05, 4.0174e-06], device='cuda:0')" | |
| }, | |
| "2": { | |
| "step": "tensor(22524.)", | |
| "exp_avg": "tensor([[ 4.1103e-06, 4.2056e-06, -2.6698e-06, ..., 9.9210e-06,\n -4.8017e-06, -3.1378e-06],\n [-7.4533e-06, 2.0835e-06, -1.0217e-06, ..., 1.7796e-05,\n 3.2568e-06, 2.1404e-06],\n [ 1.9778e-05, 2.5416e-06, -1.2145e-05, ..., -1.8165e-05,\n -4.1530e-06, 2.3002e-06],\n ...,\n [-7.0231e-06, 4.9178e-07, 2.7755e-07, ..., -3.8870e-06,\n 4.9990e-06, -1.6229e-05],\n [-2.0597e-06, 1.4713e-05, -1.7459e-05, ..., 1.4009e-05,\n -3.4754e-06, -8.0748e-06],\n [-7.3191e-06, 8.9195e-06, -1.4922e-05, ..., 1.1042e-05,\n -1.7559e-05, -4.4753e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[7.7095e-10, 1.3916e-09, 8.7947e-10, ..., 1.1578e-09, 2.0012e-09,\n 4.0926e-10],\n [1.8507e-09, 3.0810e-09, 2.7911e-09, ..., 4.1519e-09, 2.2733e-09,\n 1.3899e-09],\n [1.9141e-09, 1.8626e-09, 2.7825e-09, ..., 2.8466e-09, 2.1864e-09,\n 1.8411e-09],\n ...,\n [1.4381e-09, 2.8964e-09, 6.7759e-09, ..., 2.5818e-09, 3.1158e-09,\n 1.6589e-09],\n [1.9354e-09, 2.8925e-09, 5.1713e-09, ..., 2.6985e-09, 1.9441e-09,\n 1.8255e-09],\n [1.8910e-09, 2.8970e-09, 2.3336e-09, ..., 3.7552e-09, 2.6155e-09,\n 7.4279e-10]], device='cuda:0')" | |
| }, | |
| "3": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 1.5135e-05, -1.4839e-05, -1.1829e-06, ..., -3.4931e-06,\n 1.9051e-06, 7.5098e-06],\n [ 5.5517e-07, -5.2816e-06, -3.6629e-06, ..., -2.5302e-06,\n -4.0189e-06, 7.8595e-07],\n [ 2.6635e-06, -3.7355e-07, -2.9183e-06, ..., -2.3819e-06,\n 2.2176e-06, -1.3135e-06],\n ...,\n [-2.8400e-06, 1.4441e-06, -3.3233e-07, ..., -3.2915e-06,\n -1.6418e-06, 4.4173e-06],\n [-3.0892e-06, 3.5271e-06, -2.0365e-06, ..., -2.6424e-06,\n 3.2887e-06, 3.7480e-06],\n [-1.1514e-06, -1.1868e-06, 2.0897e-06, ..., 1.3855e-06,\n -1.1567e-06, 6.9637e-08]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.0446e-10, 2.9312e-10, 5.1783e-11, ..., 5.2173e-11, 5.8100e-11,\n 4.9395e-11],\n [4.4647e-10, 4.6557e-10, 2.0628e-10, ..., 8.8552e-10, 2.0280e-10,\n 2.7636e-10],\n [1.6825e-10, 2.4880e-10, 1.4648e-10, ..., 6.7026e-10, 1.3805e-10,\n 1.4825e-10],\n ...,\n [2.0685e-10, 5.4330e-10, 6.8533e-11, ..., 6.9460e-11, 1.1671e-10,\n 6.7621e-11],\n [1.0861e-09, 8.9004e-10, 7.6169e-10, ..., 1.6336e-10, 1.5857e-10,\n 1.7800e-10],\n [9.8868e-11, 8.8481e-11, 5.9541e-11, ..., 3.1189e-10, 5.5704e-11,\n 7.3388e-11]], device='cuda:0')" | |
| }, | |
| "4": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([ 2.4904e-04, -5.4819e-05, -8.0356e-05, ..., -9.0444e-05,\n 7.2425e-05, 1.3514e-05], device='cuda:0')", | |
| "exp_avg_sq": "tensor([8.4278e-08, 3.4102e-07, 3.9303e-07, ..., 1.3699e-07, 2.1551e-07,\n 1.0358e-07], device='cuda:0')" | |
| }, | |
| "5": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 1.1923e-06, -2.7873e-07, -1.1859e-06, ..., 1.6586e-07,\n 3.0941e-07, 2.4495e-07],\n [-2.4577e-06, -1.2766e-06, -8.1279e-07, ..., -2.9816e-08,\n 1.1089e-06, 2.8121e-07],\n [ 2.0680e-06, -9.6654e-07, -1.3403e-06, ..., -3.0553e-07,\n 9.4190e-07, -2.1023e-07],\n ...,\n [-1.4901e-06, -2.1700e-07, 1.0011e-06, ..., -2.0885e-06,\n 1.4330e-06, -2.8291e-08],\n [ 5.7843e-07, -4.1884e-07, 8.8222e-07, ..., 1.0577e-08,\n 6.5771e-07, 8.0727e-08],\n [-3.7981e-07, -1.8789e-07, -4.1925e-06, ..., -4.1264e-07,\n -1.0725e-06, 1.2110e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.7192e-11, 1.1352e-11, 8.6392e-12, ..., 4.8547e-12, 1.0056e-11,\n 4.2380e-12],\n [2.6631e-11, 1.3132e-11, 1.5137e-11, ..., 8.1191e-12, 1.0967e-11,\n 1.1314e-11],\n [1.2774e-11, 1.3585e-11, 1.8600e-11, ..., 1.2810e-11, 1.4818e-11,\n 1.1020e-11],\n ...,\n [1.7405e-11, 2.3901e-11, 1.8315e-11, ..., 1.2677e-11, 2.3009e-11,\n 1.2258e-11],\n [1.3256e-11, 1.3579e-11, 1.4650e-11, ..., 7.3735e-12, 2.4098e-11,\n 7.8302e-12],\n [3.0348e-11, 2.5824e-11, 2.1426e-11, ..., 1.1702e-11, 1.3567e-11,\n 1.1289e-11]], device='cuda:0')" | |
| }, | |
| "15": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.5862e-13], device='cuda:0')" | |
| }, | |
| "16": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([6.8805e-14, 3.2579e-14, 6.7184e-15], device='cuda:0')" | |
| }, | |
| "17": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.3234e-11, 1.4361e-12, 1.5427e-12, 1.1306e-12, 1.7332e-12],\n device='cuda:0')" | |
| }, | |
| "19": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.0418e-17, 4.0401e-18, 1.5471e-17, ..., 1.4043e-17, 1.3945e-19,\n 6.0047e-18],\n [9.8529e-20, 8.8481e-20, 2.6486e-21, ..., 5.2433e-20, 7.5451e-22,\n 3.7677e-21],\n [1.8009e-16, 1.7279e-17, 5.4189e-17, ..., 7.2060e-17, 7.8972e-18,\n 2.4351e-17],\n ...,\n [5.3718e-19, 2.1666e-20, 1.4693e-19, ..., 1.2329e-18, 6.5972e-21,\n 5.6236e-22],\n [1.1110e-18, 4.7774e-20, 5.2486e-19, ..., 2.2485e-18, 5.1608e-20,\n 1.4826e-19],\n [3.3250e-18, 5.7353e-21, 4.0651e-18, ..., 2.5802e-18, 3.6822e-20,\n 1.6015e-18]], device='cuda:0')" | |
| }, | |
| "20": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.4409e-14, 1.1979e-17, 2.6359e-13, 9.8319e-17, 1.0753e-17, 2.0045e-14,\n 9.8114e-16, 6.7580e-14, 8.2629e-15, 2.0754e-16, 2.0174e-13, 7.3018e-14,\n 1.9062e-15, 3.5500e-17, 1.1778e-18, 3.6013e-14, 4.1952e-14, 5.7171e-15,\n 7.4530e-15, 5.2671e-17, 3.9660e-14, 2.5801e-15, 3.6422e-14, 4.0450e-16,\n 3.0379e-15, 1.4200e-13, 5.4387e-17, 2.9940e-16, 1.8744e-14, 4.2630e-15,\n 3.2275e-14, 6.6112e-14, 5.4743e-14, 1.2121e-14, 1.8017e-15, 1.3820e-15,\n 4.7544e-14, 1.9769e-14, 3.0543e-15, 4.2766e-15, 6.0200e-16, 2.6323e-14,\n 3.2557e-15, 7.7840e-14, 5.2029e-16, 2.0656e-15, 1.3091e-13, 2.0029e-17,\n 1.5593e-16, 4.1252e-15, 4.7429e-16, 1.9856e-15, 1.3557e-15, 8.8333e-15,\n 1.1470e-15, 6.7851e-17, 2.1337e-15, 4.9057e-16, 3.6078e-14, 1.0077e-13,\n 6.6632e-14, 8.5123e-16, 2.3817e-14, 4.9663e-16, 3.3787e-14, 3.7378e-14,\n 3.1620e-16, 1.0121e-15, 2.3326e-13, 7.7397e-15, 3.3408e-16, 3.9580e-17,\n 1.9520e-14, 1.3967e-13, 1.1733e-14, 2.6378e-14, 9.9753e-16, 8.5924e-15,\n 4.9383e-18, 9.5798e-17, 5.8789e-14, 4.6944e-15, 2.0969e-16, 1.5144e-13,\n 5.6441e-16, 4.7477e-15, 1.4423e-14, 2.4407e-14, 5.9726e-16, 1.4421e-15,\n 1.0592e-13, 3.0278e-16, 5.4055e-14, 1.6244e-14, 2.0020e-14, 1.9085e-16,\n 3.5760e-14, 5.5812e-18, 3.8816e-15, 1.5151e-15, 9.3585e-18, 1.1677e-13,\n 3.7332e-15, 3.8578e-15, 5.7603e-15, 1.1210e-17, 2.9394e-16, 1.0334e-14,\n 4.0433e-14, 1.8915e-14, 5.9674e-18, 3.7741e-17, 4.5743e-14, 9.1723e-16,\n 1.2272e-17, 6.8001e-15, 3.0317e-14, 3.2745e-14, 4.5147e-17, 1.5355e-16,\n 2.7340e-15, 1.6583e-14, 7.4378e-15, 5.5556e-14, 4.0613e-16, 1.1209e-15,\n 4.7905e-13, 2.1561e-17, 4.8019e-15, 4.6535e-16, 2.0496e-14, 2.5133e-16,\n 5.3826e-14, 1.1551e-18, 1.4632e-16, 2.3546e-15, 5.3057e-14, 6.9436e-15,\n 4.3863e-17, 8.9207e-16, 2.3662e-17, 3.4872e-15, 1.0863e-14, 1.5257e-14,\n 3.8276e-16, 6.5400e-15, 6.1485e-17, 4.3647e-17, 1.7028e-15, 4.2761e-15,\n 1.5655e-13, 4.4574e-17, 2.4201e-17, 1.3351e-15, 2.2221e-13, 7.0300e-16,\n 3.2909e-15, 1.8043e-13, 1.6480e-13, 1.4929e-14, 7.0157e-16, 3.7586e-14,\n 3.9559e-16, 4.1875e-15, 6.0649e-14, 1.2327e-14, 1.4900e-15, 4.7423e-14,\n 9.3304e-16, 2.0306e-16, 9.7516e-17, 7.2220e-16, 1.3551e-16, 5.5659e-14,\n 1.6306e-16, 2.6117e-16, 3.4537e-16, 4.3211e-15, 4.9079e-14, 9.7588e-16,\n 1.0375e-15, 1.5543e-14, 1.2332e-15, 6.0841e-14, 6.3848e-14, 7.0933e-15,\n 1.9732e-15, 3.7071e-16, 4.9552e-14, 1.0565e-14, 2.4444e-14, 3.3162e-14,\n 8.1623e-14, 1.4463e-13, 1.4616e-16, 1.5637e-15, 1.1810e-14, 3.9440e-16,\n 1.7545e-15, 1.4251e-17, 2.5295e-17, 1.9517e-16, 6.7875e-15, 2.2848e-14,\n 1.0007e-14, 2.9815e-15, 8.8403e-15, 6.4782e-16, 1.4989e-15, 2.0389e-15,\n 6.3042e-15, 3.6575e-14, 2.5686e-18, 6.1662e-17, 4.5178e-16, 1.3388e-14,\n 2.7973e-16, 2.5690e-14, 6.3478e-15, 1.2542e-14, 1.7924e-15, 8.6422e-14,\n 2.8736e-15, 1.0706e-14, 2.2810e-14, 3.3026e-15, 4.3512e-14, 2.6291e-15,\n 1.3405e-19, 1.0286e-14, 1.0664e-14, 9.7301e-15, 1.8363e-14, 8.9192e-17,\n 1.9696e-14, 4.5457e-15, 1.0457e-16, 1.8991e-16, 6.6160e-15, 7.8518e-14,\n 5.4589e-16, 6.7233e-15, 2.5888e-13, 4.9742e-16, 6.9130e-15, 3.4871e-15,\n 1.5024e-17, 1.4792e-14, 4.9549e-14, 5.0012e-15, 4.3069e-16, 1.7579e-14,\n 5.3035e-15, 2.4243e-15, 3.7189e-15, 8.5304e-15], device='cuda:0')" | |
| }, | |
| "21": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.1032e-17, 7.2890e-20, 4.3455e-16, 8.1431e-21, 3.4003e-20, 1.2605e-17,\n 1.6579e-19, 2.3048e-16, 3.0571e-18, 9.1667e-20, 2.3484e-16, 5.3753e-17,\n 4.6762e-18, 7.7803e-21, 5.8163e-19, 7.2277e-18, 2.2591e-17, 6.8370e-19,\n 3.1784e-18, 4.6608e-23, 9.8083e-18, 4.5576e-19, 5.7024e-17, 2.3300e-19,\n 4.3510e-18, 1.3440e-16, 5.4442e-19, 9.7982e-20, 3.0732e-18, 1.1440e-18,\n 1.5052e-17, 6.8016e-17, 8.3910e-17, 9.3269e-18, 4.6442e-18, 4.4956e-19,\n 8.8984e-18, 1.9872e-18, 4.4283e-18, 5.5714e-18, 1.5655e-18, 1.3091e-17,\n 2.5873e-18, 7.0614e-17, 2.3733e-19, 1.0311e-18, 1.8800e-16, 5.3449e-21,\n 1.7877e-20, 2.7235e-19, 1.2914e-19, 5.8762e-19, 2.8300e-18, 1.4124e-18,\n 1.2643e-19, 4.2198e-21, 1.2982e-18, 1.9266e-19, 8.6227e-17, 5.2321e-17,\n 9.8804e-17, 9.1206e-20, 5.3157e-17, 1.3773e-19, 8.2545e-17, 1.4724e-17,\n 7.3592e-20, 1.0757e-18, 9.2059e-16, 2.7291e-18, 5.1258e-19, 8.7299e-21,\n 3.2080e-17, 6.0228e-17, 3.7473e-18, 2.3714e-17, 1.2224e-18, 1.6509e-18,\n 1.3865e-18, 1.0393e-19, 6.6029e-18, 1.0878e-18, 2.2169e-20, 1.2346e-16,\n 2.9890e-18, 7.0980e-19, 9.1277e-19, 3.3248e-18, 2.1969e-19, 3.8059e-18,\n 1.7054e-16, 1.7772e-19, 1.9834e-17, 4.7576e-18, 1.8499e-17, 3.1150e-18,\n 5.4547e-17, 1.0302e-19, 3.3491e-19, 2.2773e-19, 1.5547e-20, 6.3306e-17,\n 1.5292e-18, 1.7451e-18, 1.5491e-18, 2.1324e-20, 2.2184e-20, 4.8198e-19,\n 9.2519e-17, 2.0493e-17, 1.6143e-18, 9.3746e-21, 9.3905e-18, 9.4271e-19,\n 3.5704e-19, 2.2546e-18, 2.8375e-17, 1.1381e-17, 1.8094e-19, 7.7054e-20,\n 7.1762e-19, 2.3219e-17, 2.1498e-18, 5.2152e-18, 1.0601e-19, 1.0063e-18,\n 8.7597e-16, 1.0390e-19, 6.4118e-19, 1.7213e-18, 1.8068e-17, 2.7470e-18,\n 9.5921e-17, 1.2503e-19, 6.6408e-21, 9.7276e-19, 2.8952e-17, 3.1916e-18,\n 1.1914e-19, 2.4797e-18, 3.0183e-21, 4.1891e-18, 3.2797e-18, 1.1981e-17,\n 4.0122e-18, 4.7796e-19, 4.0027e-20, 1.2286e-19, 5.2787e-19, 1.7327e-18,\n 6.4428e-16, 2.0922e-20, 8.6505e-21, 1.9613e-19, 4.7867e-16, 6.3939e-20,\n 6.1473e-18, 9.9850e-17, 3.1014e-16, 4.8214e-18, 3.7507e-18, 1.5952e-17,\n 1.9537e-18, 6.2873e-19, 1.2245e-17, 3.0773e-17, 1.2618e-18, 3.2355e-17,\n 2.7664e-18, 7.3972e-20, 1.4809e-19, 1.0625e-19, 1.9443e-20, 2.0055e-17,\n 6.7175e-20, 1.3163e-19, 1.0768e-18, 2.1742e-17, 5.5213e-17, 4.7304e-19,\n 9.5505e-20, 3.2198e-18, 8.6893e-19, 5.3853e-17, 1.3506e-16, 1.8764e-18,\n 1.8456e-18, 4.9174e-18, 4.6107e-17, 8.1212e-18, 7.1955e-18, 2.5092e-17,\n 1.9871e-16, 1.1477e-16, 4.9891e-19, 3.2850e-19, 2.8010e-18, 8.3940e-20,\n 6.1349e-19, 1.0505e-20, 9.1112e-19, 8.1453e-20, 2.1580e-17, 6.9429e-17,\n 1.4996e-18, 1.9155e-18, 5.8345e-18, 1.0010e-19, 1.0461e-19, 2.8322e-18,\n 1.8019e-18, 7.5464e-17, 1.7022e-18, 2.2872e-20, 3.0154e-20, 3.5303e-17,\n 2.2639e-19, 8.4278e-18, 4.6220e-18, 4.7743e-18, 3.0341e-18, 6.0124e-17,\n 1.4115e-18, 2.8624e-19, 4.4086e-18, 5.5092e-19, 2.8540e-17, 2.1374e-19,\n 1.4613e-19, 1.0934e-17, 4.5984e-18, 9.4980e-18, 9.8844e-18, 2.5609e-18,\n 5.1013e-18, 5.9841e-18, 2.8938e-19, 1.5708e-19, 1.3935e-18, 4.7313e-17,\n 2.0946e-19, 5.6402e-18, 4.5867e-16, 5.2520e-20, 1.6010e-18, 3.1882e-18,\n 2.2123e-21, 3.2513e-17, 1.5519e-17, 5.3915e-18, 1.5301e-19, 7.6723e-18,\n 8.6966e-18, 4.4412e-19, 2.0615e-18, 2.0444e-17], device='cuda:0')" | |
| }, | |
| "22": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.3758e-17, 9.8128e-21, 3.7535e-16, 1.1674e-19, 8.1736e-21, 2.7925e-17,\n 9.1156e-19, 1.0615e-16, 1.0324e-17, 3.1515e-20, 2.5816e-16, 9.1763e-17,\n 1.9743e-18, 1.9556e-20, 1.3319e-19, 4.5173e-17, 5.0677e-17, 7.2260e-18,\n 7.5207e-18, 1.4442e-20, 5.2269e-17, 3.5037e-18, 4.1158e-17, 1.7385e-19,\n 2.1399e-18, 1.7997e-16, 2.1890e-21, 2.9142e-19, 2.2965e-17, 5.2287e-18,\n 3.9637e-17, 8.0988e-17, 8.2377e-17, 1.2594e-17, 4.4277e-18, 1.6502e-18,\n 6.1108e-17, 2.6471e-17, 5.9317e-18, 8.0033e-18, 3.3334e-19, 3.1172e-17,\n 2.4537e-18, 1.1056e-16, 5.2035e-19, 2.4776e-18, 1.8902e-16, 6.6232e-20,\n 7.1857e-20, 5.5931e-18, 4.8272e-19, 1.6765e-18, 3.3337e-18, 1.3006e-17,\n 1.4583e-18, 5.8895e-22, 2.4467e-18, 5.1558e-21, 5.7722e-17, 1.3179e-16,\n 9.9450e-17, 1.1702e-18, 2.4520e-17, 1.0010e-18, 5.4696e-17, 5.3396e-17,\n 2.1561e-19, 2.0360e-18, 2.8037e-16, 1.1402e-17, 1.2335e-19, 5.6154e-20,\n 2.1706e-17, 1.9548e-16, 1.7269e-17, 2.9406e-17, 5.4551e-19, 1.1312e-17,\n 1.0799e-19, 2.2852e-19, 8.0548e-17, 5.5814e-18, 3.3131e-20, 2.1534e-16,\n 3.1426e-19, 6.2571e-18, 1.8580e-17, 3.2115e-17, 4.8712e-19, 9.8810e-19,\n 1.5591e-16, 4.6580e-19, 7.6063e-17, 1.9841e-17, 3.0101e-17, 7.1002e-19,\n 5.5051e-17, 2.5658e-20, 5.3600e-18, 1.9160e-18, 1.1245e-21, 1.4759e-16,\n 4.9233e-18, 4.5383e-18, 9.0500e-18, 5.3296e-20, 3.9187e-19, 1.3158e-17,\n 6.4277e-17, 2.7820e-17, 9.5948e-20, 5.0476e-21, 5.9577e-17, 2.0960e-18,\n 5.4354e-20, 8.8602e-18, 3.4818e-17, 4.5630e-17, 9.1612e-21, 4.3459e-19,\n 2.7170e-18, 2.6658e-17, 8.8034e-18, 7.6648e-17, 3.0891e-19, 1.2486e-18,\n 6.0936e-16, 2.9339e-19, 6.1403e-18, 2.3370e-19, 2.3662e-17, 1.5181e-18,\n 8.1277e-17, 3.5035e-20, 1.5942e-19, 2.8462e-18, 6.5538e-17, 8.2623e-18,\n 7.1515e-21, 8.0316e-19, 4.9153e-22, 4.5194e-18, 1.5064e-17, 2.3507e-17,\n 1.0994e-18, 8.7128e-18, 1.6766e-19, 1.8854e-19, 1.9506e-18, 5.7638e-18,\n 2.3484e-16, 6.0711e-21, 2.2330e-22, 2.1506e-18, 3.2053e-16, 1.1771e-18,\n 6.7022e-18, 2.5217e-16, 2.3842e-16, 2.2324e-17, 4.4470e-19, 4.8653e-17,\n 2.0370e-18, 5.9191e-18, 8.4014e-17, 2.1773e-17, 2.1383e-18, 6.9343e-17,\n 2.7358e-18, 3.6486e-19, 1.0728e-20, 2.1794e-19, 1.2619e-19, 7.2474e-17,\n 3.5561e-19, 2.0930e-19, 8.8402e-19, 1.0356e-17, 7.3261e-17, 1.3703e-18,\n 1.2737e-18, 2.0490e-17, 1.6077e-18, 8.9284e-17, 9.6563e-17, 7.5631e-18,\n 3.7284e-18, 9.9126e-19, 7.2231e-17, 1.2281e-17, 3.4028e-17, 5.0186e-17,\n 1.2336e-16, 1.8362e-16, 5.8344e-20, 2.2740e-18, 1.5164e-17, 6.1618e-19,\n 2.5171e-18, 1.6334e-19, 4.3144e-21, 1.6745e-19, 1.4068e-17, 3.8841e-17,\n 1.3036e-17, 2.6990e-18, 9.0941e-18, 1.1053e-18, 2.2633e-18, 1.4796e-18,\n 9.0014e-18, 5.7532e-17, 6.0884e-20, 1.5618e-19, 6.4905e-19, 2.4562e-17,\n 2.3410e-19, 3.1416e-17, 9.5999e-18, 1.4933e-17, 4.3640e-18, 1.0630e-16,\n 4.0527e-18, 1.3877e-17, 2.8921e-17, 3.2031e-18, 6.4378e-17, 3.2603e-18,\n 2.4331e-20, 9.7501e-18, 1.3440e-17, 9.9572e-18, 2.2853e-17, 3.1923e-19,\n 2.8375e-17, 4.5610e-18, 5.1842e-19, 1.2945e-19, 7.4480e-18, 1.1129e-16,\n 6.3997e-19, 1.1062e-17, 3.2528e-16, 5.2648e-19, 8.4833e-18, 3.1684e-18,\n 1.6313e-20, 1.3099e-17, 6.1966e-17, 4.1809e-18, 7.0200e-19, 2.0788e-17,\n 3.8120e-18, 3.0608e-18, 3.9817e-18, 8.2171e-18], device='cuda:0')" | |
| }, | |
| "23": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[8.4370e-21, 1.5947e-19, 1.2145e-19, ..., 2.0720e-19, 2.9121e-20,\n 2.6894e-20],\n [1.4048e-17, 1.8813e-18, 2.8480e-18, ..., 2.9655e-18, 7.1631e-19,\n 1.3712e-18],\n [1.0309e-16, 6.3563e-18, 3.3396e-17, ..., 4.3070e-17, 4.4931e-18,\n 1.2340e-17],\n ...,\n [1.7807e-17, 1.7428e-18, 9.4571e-18, ..., 1.3968e-17, 9.7834e-19,\n 4.4954e-18],\n [1.2187e-19, 3.6567e-20, 6.2834e-21, ..., 3.2300e-20, 6.2690e-21,\n 1.3741e-20],\n [3.1229e-20, 1.8662e-19, 6.3444e-20, ..., 3.4248e-19, 7.5532e-21,\n 6.1004e-20]], device='cuda:0')" | |
| }, | |
| "24": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.5941e-16, 1.6652e-14, 1.7167e-13, 2.9333e-16, 5.8116e-16, 2.0140e-14,\n 1.4279e-16, 2.3988e-16, 9.6798e-15, 1.6446e-15, 2.7588e-16, 4.9698e-14,\n 2.6609e-16, 4.7890e-16, 3.1859e-15, 7.6240e-14, 9.0983e-15, 2.4744e-14,\n 1.5611e-14, 1.7104e-17, 1.4863e-16, 2.1959e-14, 8.8174e-17, 7.6852e-15,\n 3.7781e-15, 1.6409e-14, 1.8457e-14, 5.6465e-15, 3.3231e-14, 3.5426e-14,\n 2.9878e-14, 4.6304e-15, 3.2852e-14, 1.4534e-14, 2.8369e-15, 3.7092e-14,\n 1.8343e-13, 3.0732e-16, 5.4272e-15, 4.4123e-17, 8.5931e-14, 3.2978e-15,\n 4.8583e-18, 5.7305e-14, 1.2465e-16, 5.6973e-15, 1.8284e-13, 7.6732e-16,\n 4.5396e-15, 8.4062e-14, 7.9165e-16, 1.2058e-15, 1.0533e-16, 1.2458e-14,\n 2.9767e-16, 6.4309e-15, 5.9404e-15, 1.1946e-15, 7.3490e-16, 4.5615e-16,\n 2.0395e-15, 5.0882e-17, 3.6771e-16, 3.7089e-15, 1.0393e-15, 2.1922e-16,\n 4.7659e-16, 3.8628e-16, 1.3932e-13, 7.5915e-15, 2.5323e-14, 6.1320e-16,\n 2.3820e-14, 1.8405e-15, 4.5475e-14, 6.5949e-17, 2.2825e-15, 6.7726e-14,\n 4.4802e-14, 4.0339e-16, 2.4339e-15, 1.6496e-14, 4.7061e-15, 4.9063e-14,\n 1.5788e-17, 1.0010e-14, 7.2135e-14, 8.5605e-14, 1.0247e-16, 5.4711e-17,\n 1.1119e-16, 2.0887e-16, 8.5877e-14, 6.4693e-15, 8.6543e-14, 4.7275e-14,\n 3.1164e-14, 4.5962e-18, 4.8031e-14, 1.2879e-14, 3.0400e-15, 1.5691e-13,\n 2.4925e-14, 2.6564e-15, 1.9601e-14, 2.8561e-16, 1.3512e-16, 5.8825e-14,\n 7.6441e-16, 1.9247e-14, 6.8053e-14, 8.0801e-15, 8.4231e-14, 2.7626e-18,\n 6.5032e-16, 3.7493e-14, 1.5779e-15, 5.8184e-14, 1.4573e-14, 2.1704e-15,\n 6.0707e-16, 2.2999e-15, 6.6535e-14, 2.3552e-13, 6.3675e-15, 1.1691e-14,\n 3.8416e-13, 8.7719e-19, 5.0915e-14, 2.1396e-15, 2.6881e-15, 6.5438e-16,\n 1.2925e-14, 1.2399e-14, 6.3533e-15, 8.3736e-15, 4.8735e-15, 1.8166e-14,\n 1.2039e-14, 2.9798e-13, 1.0486e-18, 2.4004e-14, 2.8356e-14, 5.1596e-15,\n 3.3907e-15, 3.7251e-14, 9.6182e-16, 4.0145e-14, 1.1588e-15, 2.0328e-16,\n 6.8859e-14, 6.6907e-17, 6.7206e-17, 4.5950e-16, 3.6716e-14, 2.7073e-16,\n 3.0398e-14, 2.5385e-13, 1.1611e-13, 9.0919e-15, 6.4330e-16, 6.3037e-16,\n 5.3615e-16, 1.9879e-14, 3.1277e-14, 2.7998e-16, 5.0097e-18, 4.4941e-14,\n 1.8718e-17, 5.6217e-14, 8.9840e-15, 9.2718e-16, 8.2413e-16, 1.3753e-13,\n 2.1466e-15, 1.5151e-17, 3.2361e-16, 2.6292e-15, 1.3579e-14, 9.0021e-16,\n 1.2497e-14, 3.1091e-14, 4.2243e-15, 1.1734e-14, 1.0028e-14, 2.0536e-14,\n 1.4476e-16, 1.8191e-13, 4.7639e-14, 5.9952e-17, 4.0045e-14, 3.0475e-16,\n 4.4814e-15, 3.9708e-16, 2.5174e-14, 8.4312e-17, 1.1914e-13, 9.9748e-16,\n 1.2120e-16, 3.0678e-17, 3.3561e-14, 1.9810e-14, 1.6925e-17, 4.7001e-15,\n 2.3060e-18, 5.0175e-16, 3.3759e-15, 3.4249e-16, 3.3187e-16, 2.9705e-15,\n 7.4013e-14, 1.1417e-14, 9.9895e-16, 1.9988e-16, 2.6983e-14, 4.2895e-15,\n 2.9617e-17, 3.4500e-17, 1.9715e-15, 2.2920e-17, 1.9401e-15, 9.0623e-14,\n 1.4551e-14, 1.0072e-15, 8.7006e-14, 2.1522e-15, 1.2358e-17, 1.1576e-14,\n 7.4672e-19, 2.2154e-15, 6.9426e-14, 2.1290e-14, 5.4130e-17, 6.5137e-17,\n 4.0971e-14, 4.4416e-15, 6.7738e-16, 4.2550e-15, 4.1480e-16, 3.3220e-14,\n 2.5275e-17, 4.3052e-15, 2.5448e-14, 5.6781e-18, 8.0182e-17, 8.3394e-16,\n 8.1551e-16, 9.4235e-15, 1.1628e-13, 1.5030e-16, 6.2430e-15, 7.6707e-14,\n 2.8629e-17, 4.2049e-14, 1.2537e-16, 4.1347e-16], device='cuda:0')" | |
| }, | |
| "25": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.3885e-18, 8.8519e-18, 4.3407e-16, 2.0272e-20, 4.0194e-20, 4.5223e-17,\n 1.0264e-19, 4.8222e-19, 4.3565e-18, 9.8014e-20, 4.8400e-18, 3.9210e-17,\n 6.2290e-19, 1.2718e-20, 2.3869e-18, 4.7020e-17, 9.6395e-19, 1.2127e-17,\n 4.5467e-18, 3.2829e-21, 2.7654e-18, 5.7133e-18, 5.4094e-19, 8.8420e-18,\n 6.1642e-19, 1.2090e-18, 1.2750e-17, 3.2902e-18, 1.7270e-17, 3.7950e-17,\n 1.3451e-17, 2.3658e-19, 2.5944e-17, 3.7533e-18, 6.3758e-19, 8.7412e-17,\n 1.9959e-16, 3.3945e-18, 1.4632e-17, 5.0604e-19, 9.9995e-17, 2.8938e-19,\n 3.6236e-20, 3.4259e-17, 6.3040e-20, 3.6310e-18, 3.6901e-16, 4.2182e-18,\n 1.6289e-18, 6.5699e-17, 2.9149e-19, 2.8456e-20, 6.3159e-21, 3.0725e-18,\n 3.6677e-20, 3.2647e-18, 3.7850e-18, 4.5566e-18, 1.4478e-19, 4.8364e-18,\n 8.4909e-19, 1.8861e-19, 8.5935e-19, 7.0108e-18, 1.2414e-19, 2.1630e-18,\n 1.6119e-19, 6.6640e-18, 1.4532e-16, 1.0692e-17, 9.6540e-18, 4.9603e-20,\n 1.8932e-17, 6.9864e-19, 1.7923e-16, 6.1189e-19, 1.4554e-18, 2.7413e-17,\n 5.3610e-17, 1.9939e-18, 2.0434e-18, 3.0667e-18, 5.9975e-19, 2.8855e-17,\n 2.2122e-18, 5.5365e-19, 4.2927e-17, 1.5205e-17, 4.0470e-19, 2.0299e-18,\n 2.2776e-18, 8.2760e-19, 1.7048e-17, 6.2763e-19, 1.9162e-16, 2.3231e-17,\n 3.1016e-17, 2.1443e-21, 1.0113e-16, 2.1939e-18, 2.3413e-19, 1.5613e-16,\n 3.2300e-17, 1.1236e-18, 2.4852e-17, 2.8837e-20, 3.9260e-19, 1.1763e-16,\n 6.9979e-19, 6.0263e-18, 2.3432e-17, 1.4318e-17, 2.6767e-17, 2.9572e-20,\n 3.0150e-19, 1.7984e-17, 1.2583e-19, 1.8631e-17, 3.5628e-17, 1.2921e-18,\n 1.0131e-19, 1.8171e-18, 3.0529e-17, 1.9811e-16, 2.1370e-17, 4.6937e-18,\n 5.7670e-16, 1.6523e-20, 8.6341e-17, 1.1639e-18, 1.7322e-18, 1.5928e-19,\n 9.6297e-18, 3.0450e-18, 2.2067e-18, 1.9869e-18, 3.9021e-19, 5.9774e-18,\n 8.7306e-18, 7.0003e-16, 5.1892e-21, 8.0381e-18, 2.9637e-17, 5.4352e-18,\n 1.2904e-18, 5.4030e-18, 3.8751e-19, 6.4970e-17, 7.5931e-19, 2.0830e-20,\n 5.0519e-17, 2.4680e-19, 7.3737e-19, 1.2241e-18, 1.2237e-17, 1.0811e-19,\n 4.9746e-17, 9.0777e-16, 7.2841e-17, 6.9823e-19, 2.9216e-18, 1.2932e-18,\n 2.0289e-19, 5.8511e-18, 1.9166e-17, 1.1778e-19, 1.0065e-19, 6.9140e-17,\n 2.4225e-20, 8.9211e-17, 3.2541e-18, 1.7312e-20, 1.7199e-18, 4.5921e-17,\n 4.1766e-19, 2.7517e-19, 9.8107e-20, 1.0150e-18, 1.9020e-17, 1.2357e-19,\n 7.2345e-18, 1.0066e-17, 1.0144e-17, 4.7133e-18, 2.0306e-18, 9.7742e-18,\n 1.9168e-19, 4.0586e-16, 6.7051e-17, 3.1947e-19, 1.4161e-17, 2.0102e-19,\n 1.7852e-18, 3.1915e-18, 1.4821e-17, 1.3283e-19, 8.8781e-17, 3.4059e-19,\n 1.9216e-19, 3.9691e-20, 6.7128e-17, 1.6044e-17, 7.2385e-20, 8.9314e-18,\n 5.3645e-19, 1.4108e-18, 1.9706e-19, 1.3238e-18, 1.6263e-18, 2.7317e-18,\n 1.5205e-17, 1.9522e-17, 3.2718e-18, 1.1414e-18, 1.4317e-16, 6.0994e-18,\n 3.1647e-19, 3.7545e-19, 1.8190e-19, 2.0360e-19, 1.6443e-18, 5.4847e-17,\n 4.2548e-18, 3.0424e-18, 1.0159e-16, 2.4568e-19, 5.5666e-19, 2.5838e-18,\n 7.5788e-20, 7.0890e-19, 7.7175e-17, 1.3561e-17, 3.2346e-19, 2.2600e-18,\n 4.5506e-17, 8.4871e-18, 2.0006e-18, 2.7664e-17, 4.6472e-20, 9.7947e-18,\n 2.3329e-20, 4.1106e-18, 5.5268e-18, 1.6382e-21, 1.0413e-18, 1.3140e-19,\n 2.9397e-19, 3.8205e-18, 4.6616e-17, 1.2899e-19, 4.0708e-18, 8.3047e-17,\n 2.3374e-20, 1.3654e-17, 1.1024e-20, 4.3839e-19], device='cuda:0')" | |
| }, | |
| "26": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.5854e-19, 1.3331e-17, 2.5795e-16, 1.3393e-19, 1.0539e-19, 3.0818e-17,\n 5.2912e-20, 5.6158e-19, 9.4350e-18, 1.0383e-18, 4.4955e-19, 5.0580e-17,\n 1.1152e-18, 8.1300e-20, 8.0335e-18, 8.0019e-17, 8.2917e-18, 2.4697e-17,\n 1.3393e-17, 2.6544e-21, 2.6103e-19, 2.0510e-17, 6.1470e-20, 1.4135e-17,\n 2.7899e-18, 1.7390e-17, 3.0831e-17, 1.2127e-17, 3.1486e-17, 3.1659e-17,\n 2.8310e-17, 4.2034e-18, 5.3088e-17, 1.2709e-17, 1.6290e-18, 6.4679e-17,\n 2.0278e-16, 2.4228e-19, 1.3264e-17, 1.0015e-19, 9.1403e-17, 2.7792e-18,\n 2.1330e-20, 8.8999e-17, 2.2664e-19, 2.2226e-18, 2.6983e-16, 3.9022e-18,\n 3.1249e-18, 8.6848e-17, 1.5948e-18, 1.4431e-20, 9.3013e-20, 2.1226e-17,\n 2.0459e-19, 3.3249e-18, 3.0840e-18, 4.3705e-18, 1.9056e-18, 6.7447e-19,\n 4.1069e-18, 2.4930e-20, 2.1882e-19, 9.3495e-18, 2.6115e-18, 3.1099e-19,\n 1.4275e-18, 4.0474e-18, 1.5074e-16, 1.4051e-17, 2.2745e-17, 3.1125e-19,\n 2.1571e-17, 3.3927e-18, 8.0185e-17, 9.8431e-20, 5.3672e-19, 7.0243e-17,\n 7.3578e-17, 2.4349e-18, 4.2381e-18, 1.4785e-17, 3.4255e-18, 7.6505e-17,\n 7.4604e-21, 9.9145e-18, 7.3999e-17, 9.7099e-17, 8.3265e-19, 4.0777e-20,\n 2.4425e-19, 1.1811e-18, 1.2562e-16, 6.1530e-18, 1.4087e-16, 6.7617e-17,\n 5.2498e-17, 3.3938e-22, 8.0517e-17, 1.1706e-17, 3.7255e-19, 1.7366e-16,\n 4.4541e-17, 1.4800e-18, 3.6520e-17, 1.8164e-19, 1.1334e-18, 5.4156e-17,\n 1.4698e-18, 3.0528e-17, 7.2525e-17, 1.7297e-17, 9.1499e-17, 9.9032e-20,\n 4.9344e-19, 5.9496e-17, 9.7413e-19, 8.6204e-17, 3.0320e-17, 1.2816e-18,\n 4.0272e-19, 6.1986e-18, 1.0074e-16, 3.3397e-16, 1.6232e-17, 2.1214e-17,\n 4.3571e-16, 4.1102e-21, 4.7398e-17, 1.9351e-18, 2.0920e-18, 2.7135e-19,\n 2.2232e-17, 1.1455e-17, 3.8410e-18, 6.4834e-18, 4.7081e-18, 1.5699e-17,\n 2.2638e-17, 4.3288e-16, 7.7395e-20, 3.7996e-17, 4.8782e-17, 1.0853e-17,\n 5.0681e-18, 3.9008e-17, 6.2283e-19, 3.4242e-17, 2.0000e-18, 1.9424e-19,\n 1.0298e-16, 2.5263e-20, 1.0785e-18, 4.4936e-19, 5.2942e-17, 2.8049e-19,\n 5.3674e-17, 3.7972e-16, 1.7093e-16, 1.5821e-17, 6.3782e-19, 4.8771e-19,\n 5.9045e-19, 2.0963e-17, 4.7429e-17, 8.9141e-19, 3.2513e-20, 7.3220e-17,\n 3.2894e-19, 4.9831e-17, 7.1502e-18, 1.2901e-19, 3.5178e-18, 1.5333e-16,\n 1.3073e-18, 6.8247e-19, 1.7201e-19, 5.3893e-18, 2.5906e-17, 6.1772e-19,\n 9.1228e-18, 3.3245e-17, 1.1948e-17, 1.9762e-17, 1.7374e-17, 1.6587e-17,\n 3.9152e-19, 2.7249e-16, 7.8374e-17, 5.8541e-20, 6.1120e-17, 9.3012e-19,\n 6.9862e-18, 2.8826e-19, 4.0479e-17, 1.0143e-18, 1.2997e-16, 6.1346e-19,\n 8.5590e-20, 1.9064e-19, 5.9135e-17, 3.4932e-17, 3.7821e-20, 1.2141e-17,\n 7.1396e-21, 6.8491e-19, 2.2766e-18, 2.6727e-18, 3.3438e-19, 1.0859e-18,\n 8.1456e-17, 2.3422e-17, 8.1360e-19, 1.8917e-18, 5.4263e-17, 9.8368e-18,\n 4.6402e-21, 5.9514e-20, 4.2646e-18, 2.0877e-20, 8.0451e-19, 9.6872e-17,\n 2.5186e-17, 9.5411e-19, 9.0146e-17, 1.0679e-18, 1.0689e-19, 1.1396e-17,\n 1.4178e-23, 5.8341e-19, 1.0879e-16, 1.6804e-17, 8.2065e-20, 8.3425e-20,\n 6.6116e-17, 2.0383e-18, 3.5812e-18, 1.4304e-17, 1.9861e-19, 5.2362e-17,\n 4.0141e-20, 8.9516e-18, 2.7916e-17, 5.1876e-21, 1.2683e-19, 2.5359e-19,\n 5.1182e-19, 6.5879e-18, 1.2629e-16, 1.1443e-19, 1.2885e-17, 7.7881e-17,\n 3.0741e-21, 6.5910e-17, 9.1734e-20, 5.0139e-19], device='cuda:0')" | |
| }, | |
| "27": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[6.8553e-19, 1.5247e-19, 2.4057e-18, ..., 1.2910e-20, 6.6593e-20,\n 6.1383e-19],\n [1.5889e-19, 3.6007e-20, 2.3018e-21, ..., 1.8666e-20, 4.2581e-21,\n 6.5799e-21],\n [1.2976e-20, 7.2099e-20, 1.2988e-19, ..., 9.4560e-20, 4.6011e-21,\n 2.5111e-21],\n ...,\n [2.4816e-17, 5.6418e-19, 1.4163e-17, ..., 1.1420e-18, 2.5449e-19,\n 2.3811e-18],\n [2.9887e-17, 7.1223e-18, 5.4744e-18, ..., 8.6268e-18, 1.6234e-18,\n 3.2442e-18],\n [1.4098e-18, 7.0591e-21, 2.0230e-19, ..., 4.3330e-20, 3.2384e-20,\n 2.0356e-20]], device='cuda:0')" | |
| }, | |
| "28": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.1731e-15, 1.0716e-16, 9.5522e-18, 1.4297e-15, 5.8658e-17, 4.3143e-14,\n 4.9315e-15, 3.1815e-15, 7.4435e-17, 2.1309e-17, 1.4824e-16, 3.5450e-15,\n 6.4976e-16, 2.7440e-15, 3.6673e-16, 7.8734e-15, 2.5065e-16, 3.9493e-14,\n 1.9306e-14, 9.5367e-15, 1.8585e-14, 3.5908e-14, 2.5835e-14, 1.7447e-15,\n 6.7255e-15, 1.2425e-13, 6.1744e-15, 1.2181e-14, 7.1299e-16, 8.2100e-16,\n 4.9112e-14, 5.0064e-18, 2.5471e-14, 6.3153e-15, 7.7866e-18, 9.1691e-15,\n 1.4703e-13, 6.1778e-14, 2.2797e-15, 1.6366e-14, 7.2718e-16, 2.4819e-14,\n 6.9072e-15, 1.8591e-14, 2.5792e-17, 1.6961e-17, 1.5983e-14, 5.6288e-16,\n 2.6952e-17, 7.1267e-14, 5.2326e-16, 2.6063e-17, 3.9147e-16, 7.3541e-14,\n 1.2657e-15, 3.8574e-17, 1.1024e-15, 1.3231e-15, 1.0166e-14, 1.2686e-13,\n 3.6980e-14, 1.4484e-17, 2.0944e-14, 5.8997e-16, 1.5622e-16, 8.2720e-16,\n 1.1913e-16, 1.7571e-15, 3.5345e-14, 2.8992e-14, 6.2632e-17, 1.9656e-15,\n 8.0108e-15, 6.8051e-17, 3.9374e-15, 7.6567e-18, 3.2345e-16, 1.1120e-13,\n 2.7150e-14, 5.1791e-17, 2.9726e-15, 1.3716e-15, 4.9951e-15, 1.0087e-16,\n 2.9484e-13, 1.1060e-14, 4.9028e-15, 7.0772e-14, 4.4056e-16, 1.2936e-13,\n 7.8636e-14, 1.0984e-16, 2.0040e-16, 2.8920e-14, 5.9077e-14, 1.3415e-13,\n 1.2057e-14, 2.6507e-18, 1.6531e-17, 5.1507e-14, 4.2140e-17, 1.3077e-13,\n 1.1030e-14, 2.3788e-17, 4.1667e-14, 1.7101e-15, 1.2461e-17, 2.4003e-14,\n 1.9178e-14, 2.3455e-14, 3.3938e-15, 3.0307e-15, 4.1157e-14, 2.3624e-15,\n 8.2204e-16, 2.1789e-14, 1.8487e-15, 2.2054e-16, 1.3801e-16, 7.9614e-17,\n 3.8500e-17, 2.2022e-14, 1.5805e-14, 3.5196e-15, 1.0291e-14, 5.3846e-15,\n 1.6662e-13, 4.0497e-16, 1.0348e-15, 2.6775e-16, 3.8678e-15, 2.2835e-16,\n 3.5568e-14, 5.3289e-16, 1.2697e-14, 1.4795e-15, 7.1774e-15, 2.5931e-19,\n 6.1035e-16, 1.2668e-13, 5.7024e-17, 3.2221e-14, 4.5134e-16, 2.3453e-14,\n 8.9450e-15, 2.5113e-13, 2.3596e-16, 2.5774e-14, 9.7006e-14, 1.2488e-16,\n 2.7291e-17, 1.5230e-15, 6.8515e-16, 3.5762e-16, 2.7988e-13, 2.5309e-14,\n 1.6955e-15, 5.9892e-15, 1.2797e-13, 5.5724e-16, 2.6168e-14, 5.1160e-15,\n 2.0902e-15, 2.9609e-15, 3.4639e-13, 2.1266e-14, 2.3957e-16, 2.7538e-14,\n 5.9383e-16, 2.0992e-15, 3.3905e-14, 7.6482e-16, 1.4622e-15, 7.0837e-14,\n 5.0558e-16, 4.5182e-16, 1.2537e-16, 2.6131e-15, 4.8279e-14, 1.6083e-14,\n 3.0123e-17, 1.2133e-13, 2.0995e-15, 2.9620e-14, 7.7155e-15, 8.4711e-16,\n 2.1905e-14, 2.1498e-13, 2.6676e-14, 1.1228e-15, 4.0380e-14, 2.0928e-14,\n 5.4412e-15, 7.2929e-16, 4.7631e-14, 3.1127e-15, 4.1368e-16, 8.2914e-15,\n 3.1436e-14, 6.5247e-15, 1.7351e-14, 2.1841e-17, 3.9784e-15, 9.2974e-17,\n 2.5254e-15, 1.0779e-17, 3.0158e-16, 2.5359e-17, 9.9561e-15, 8.9197e-15,\n 1.3747e-14, 3.6532e-17, 2.4821e-14, 1.0996e-16, 4.1761e-16, 1.1984e-15,\n 7.7866e-15, 4.3844e-14, 3.6115e-15, 5.5285e-15, 1.3217e-17, 1.4089e-17,\n 1.8743e-14, 7.2015e-17, 1.3543e-14, 3.7677e-15, 3.3833e-14, 3.3128e-16,\n 1.0150e-16, 8.6447e-19, 1.7870e-13, 3.8828e-15, 2.7120e-14, 6.4148e-14,\n 2.6000e-14, 1.2797e-16, 8.7537e-17, 3.1838e-17, 1.0978e-16, 5.8706e-14,\n 1.4280e-16, 1.0017e-14, 1.1660e-16, 2.0119e-14, 2.1067e-14, 7.1756e-16,\n 7.6492e-17, 2.2414e-16, 1.1730e-14, 2.2596e-16, 9.9373e-15, 1.6269e-14,\n 3.7994e-16, 2.4736e-14, 3.5942e-14, 7.3797e-16], device='cuda:0')" | |
| }, | |
| "29": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([5.8222e-19, 3.9848e-20, 2.2276e-18, 2.3305e-18, 3.6990e-21, 2.3713e-17,\n 4.4125e-18, 5.3564e-19, 2.8763e-19, 6.8421e-20, 2.5268e-18, 5.0180e-19,\n 2.2240e-19, 1.4445e-18, 1.6453e-19, 2.7533e-18, 2.1561e-19, 1.1159e-17,\n 1.2947e-17, 1.1027e-17, 1.3691e-18, 1.9712e-17, 3.1776e-17, 1.4578e-19,\n 8.4682e-18, 1.6165e-16, 1.6039e-18, 1.7756e-17, 1.4630e-18, 1.5079e-19,\n 5.7550e-17, 4.4571e-19, 3.9940e-17, 7.8460e-18, 1.2862e-21, 1.0604e-18,\n 2.1484e-16, 1.7419e-17, 1.5091e-18, 3.2124e-17, 1.8764e-18, 4.7433e-18,\n 2.2622e-17, 8.0102e-18, 8.1895e-20, 4.0724e-19, 7.4319e-19, 2.8983e-19,\n 1.5056e-19, 9.0280e-17, 2.3480e-19, 2.1495e-21, 1.4881e-19, 2.6588e-16,\n 2.9398e-19, 1.0517e-19, 3.1432e-18, 2.6532e-18, 1.7979e-17, 5.1441e-17,\n 1.5192e-17, 1.8912e-21, 3.5009e-17, 4.5167e-19, 1.3594e-19, 9.7655e-19,\n 2.4297e-19, 1.6685e-18, 8.2506e-18, 9.2997e-17, 2.0940e-19, 8.6870e-19,\n 6.3895e-18, 2.8405e-18, 6.8423e-19, 1.8657e-19, 4.0335e-19, 1.1811e-16,\n 1.6829e-17, 2.7278e-20, 1.5136e-18, 2.1890e-19, 7.1754e-18, 1.2361e-18,\n 4.1903e-16, 4.5509e-18, 7.8028e-19, 1.9143e-17, 3.9455e-19, 2.1767e-16,\n 1.6951e-16, 2.7120e-20, 5.9256e-18, 1.4663e-17, 5.6122e-17, 4.0032e-16,\n 2.5392e-18, 1.7950e-20, 1.1908e-18, 3.6611e-17, 7.5811e-22, 1.7111e-16,\n 4.3752e-18, 5.8819e-20, 3.7743e-17, 1.5542e-18, 7.6554e-21, 2.6423e-17,\n 4.5095e-18, 4.7046e-17, 1.9912e-18, 2.4979e-18, 1.4272e-17, 3.1421e-19,\n 1.2942e-19, 5.7949e-18, 5.2611e-19, 4.1478e-18, 1.6188e-19, 1.5364e-20,\n 1.2957e-20, 4.0227e-17, 2.9155e-18, 3.1911e-18, 2.9166e-17, 1.2322e-18,\n 6.8939e-17, 2.8390e-20, 5.5408e-19, 1.0944e-18, 3.6222e-18, 2.4374e-19,\n 7.9496e-17, 1.6450e-19, 3.9034e-18, 5.6249e-19, 8.2815e-19, 6.4475e-22,\n 1.1755e-19, 5.0991e-17, 1.7408e-19, 1.6213e-17, 8.7947e-19, 2.3633e-17,\n 3.8775e-18, 4.6969e-16, 7.2052e-19, 5.1762e-17, 4.7312e-17, 1.6265e-19,\n 1.9692e-18, 6.6630e-18, 1.7704e-19, 7.1879e-19, 5.2626e-16, 2.6744e-17,\n 2.4985e-18, 4.2418e-18, 9.8527e-17, 1.2008e-19, 2.4044e-18, 6.5441e-19,\n 1.2426e-18, 3.9727e-19, 1.2815e-15, 5.5441e-17, 7.7874e-19, 1.5030e-17,\n 1.7509e-18, 1.7826e-19, 3.4988e-17, 1.9037e-19, 5.4905e-18, 4.0519e-17,\n 3.1714e-19, 1.9249e-19, 1.1980e-18, 4.0041e-19, 1.6079e-16, 2.4421e-18,\n 5.4855e-20, 1.2398e-16, 3.8606e-19, 1.5012e-17, 6.0105e-19, 3.1123e-19,\n 1.7116e-17, 4.9830e-16, 2.2573e-17, 3.7222e-19, 1.7401e-17, 1.1496e-17,\n 1.8503e-18, 4.2790e-18, 3.7056e-17, 5.2101e-19, 1.9126e-18, 4.7809e-18,\n 2.8443e-17, 2.3084e-17, 7.5953e-18, 3.8805e-19, 2.7591e-18, 9.9152e-20,\n 9.9289e-19, 3.0976e-20, 4.1290e-19, 4.1406e-20, 1.3024e-18, 5.9143e-18,\n 9.1181e-19, 9.1260e-19, 4.1598e-18, 4.4811e-19, 2.5776e-19, 1.1493e-19,\n 1.9783e-18, 7.4264e-17, 5.8329e-19, 8.6965e-18, 9.6386e-21, 8.4668e-19,\n 1.5832e-17, 1.4358e-18, 1.9400e-18, 2.4484e-18, 2.0023e-17, 4.9707e-19,\n 1.1019e-19, 1.3503e-21, 2.7023e-16, 5.4837e-18, 2.8920e-17, 5.4707e-17,\n 8.6702e-18, 2.0299e-18, 1.4752e-19, 7.0998e-19, 4.0194e-20, 2.0226e-17,\n 3.9836e-20, 5.7512e-18, 1.3360e-18, 2.7828e-17, 1.1546e-17, 1.5134e-18,\n 2.8533e-20, 2.5400e-19, 2.4017e-18, 1.6314e-20, 1.0113e-17, 1.6044e-17,\n 1.8435e-19, 1.1397e-17, 4.9191e-17, 9.6519e-19], device='cuda:0')" | |
| }, | |
| "30": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.4345e-18, 8.7580e-21, 8.2350e-20, 1.3615e-18, 4.7708e-20, 6.0624e-17,\n 5.3089e-18, 4.4995e-18, 2.3088e-19, 8.9506e-20, 9.1454e-20, 4.7829e-18,\n 1.0637e-18, 2.7228e-18, 3.7423e-19, 1.1724e-17, 6.1488e-19, 5.1575e-17,\n 2.3649e-17, 1.0919e-17, 2.5146e-17, 4.6609e-17, 3.2987e-17, 2.1910e-18,\n 7.5417e-18, 1.5930e-16, 8.5720e-18, 1.8467e-17, 4.1687e-19, 1.2748e-18,\n 6.1796e-17, 4.0251e-20, 3.7294e-17, 7.5259e-18, 9.3097e-21, 1.2630e-17,\n 1.9334e-16, 8.0313e-17, 3.5026e-18, 2.5087e-17, 5.5456e-19, 3.2506e-17,\n 1.1662e-17, 2.4285e-17, 5.6599e-20, 3.7922e-19, 2.1694e-17, 7.8020e-19,\n 9.5220e-23, 9.1895e-17, 4.3091e-19, 1.0223e-19, 2.2829e-19, 1.0901e-16,\n 1.7481e-18, 6.9259e-20, 1.3186e-18, 2.5006e-18, 1.4450e-17, 1.6935e-16,\n 5.1482e-17, 3.5103e-22, 2.4626e-17, 1.2170e-18, 8.2485e-20, 8.7758e-19,\n 1.1186e-19, 3.3284e-18, 4.5680e-17, 4.4954e-17, 1.3776e-19, 1.9475e-18,\n 1.1174e-17, 2.2164e-19, 5.1550e-18, 4.1344e-21, 5.1163e-19, 1.4480e-16,\n 3.8426e-17, 8.0709e-20, 3.4915e-18, 2.0942e-18, 5.5532e-18, 3.7483e-20,\n 3.8416e-16, 1.5535e-17, 7.1981e-18, 9.3532e-17, 4.3902e-19, 1.6914e-16,\n 1.1398e-16, 1.1001e-20, 1.0668e-19, 3.6218e-17, 8.2882e-17, 1.9465e-16,\n 1.6345e-17, 1.5323e-20, 1.0725e-19, 6.4906e-17, 4.1380e-20, 1.7110e-16,\n 1.5601e-17, 3.4591e-20, 5.9286e-17, 1.9582e-18, 2.9920e-20, 3.0875e-17,\n 2.6399e-17, 3.6045e-17, 5.6257e-18, 4.4893e-18, 5.6434e-17, 3.2152e-18,\n 8.8233e-19, 3.0257e-17, 2.0635e-18, 7.1562e-19, 1.0127e-19, 3.2628e-20,\n 1.2708e-19, 3.2932e-17, 2.1102e-17, 4.1790e-18, 1.7039e-17, 7.3008e-18,\n 2.2038e-16, 4.7394e-19, 1.4886e-18, 1.5678e-19, 4.7061e-18, 5.6083e-19,\n 5.2650e-17, 9.1441e-19, 1.5831e-17, 1.7289e-18, 9.5008e-18, 2.0440e-20,\n 8.9075e-19, 1.7512e-16, 7.9958e-20, 4.5103e-17, 2.4232e-19, 3.4276e-17,\n 1.1851e-17, 3.3365e-16, 9.5236e-19, 2.9728e-17, 1.3310e-16, 4.1484e-20,\n 1.6518e-19, 1.4689e-18, 8.4049e-19, 1.9202e-19, 3.9014e-16, 3.2268e-17,\n 3.0592e-18, 7.5954e-18, 1.7844e-16, 5.7055e-19, 3.5240e-17, 7.2795e-18,\n 3.2784e-18, 4.2506e-18, 4.8543e-16, 3.2382e-17, 1.0519e-18, 3.8910e-17,\n 1.4759e-18, 2.8948e-18, 4.5036e-17, 7.3098e-19, 3.1403e-18, 9.6493e-17,\n 1.0266e-18, 7.8535e-19, 7.8641e-19, 3.7539e-18, 7.1114e-17, 2.0886e-17,\n 1.5387e-19, 1.5568e-16, 2.6320e-18, 4.1223e-17, 1.0145e-17, 7.1819e-19,\n 3.1518e-17, 3.0151e-16, 3.8606e-17, 1.5750e-18, 5.6167e-17, 2.9977e-17,\n 6.8274e-18, 5.1459e-19, 6.7010e-17, 4.4520e-18, 2.6759e-19, 1.0804e-17,\n 3.9254e-17, 1.1696e-17, 2.4845e-17, 5.8997e-21, 5.8639e-18, 2.3056e-19,\n 4.0689e-18, 6.0161e-20, 6.5931e-19, 9.3863e-21, 1.3659e-17, 1.0946e-17,\n 1.8092e-17, 2.3241e-21, 3.4841e-17, 3.0900e-19, 2.9425e-19, 1.4889e-18,\n 1.1038e-17, 5.3873e-17, 4.7017e-18, 6.4825e-18, 3.1883e-20, 6.9324e-20,\n 2.7372e-17, 1.2731e-20, 1.8734e-17, 4.6335e-18, 4.7505e-17, 1.3969e-19,\n 2.1053e-20, 9.6175e-21, 2.4872e-16, 3.9356e-18, 3.5452e-17, 8.1262e-17,\n 3.6451e-17, 1.0509e-18, 2.1791e-19, 2.3086e-19, 2.0679e-19, 8.0620e-17,\n 5.4423e-20, 1.3303e-17, 3.8188e-19, 2.3575e-17, 2.7117e-17, 1.7325e-18,\n 3.0179e-19, 8.0204e-20, 1.5541e-17, 4.1321e-19, 1.5062e-17, 2.1961e-17,\n 5.9800e-19, 3.4244e-17, 4.4866e-17, 1.6483e-18], device='cuda:0')" | |
| }, | |
| "31": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.1880e-17, 1.2609e-20, 1.9629e-17, ..., 3.9518e-18, 4.3225e-20,\n 3.8344e-18],\n [3.5138e-18, 6.7488e-19, 1.7616e-19, ..., 2.6503e-18, 5.1028e-19,\n 1.3399e-19],\n [1.1848e-16, 7.0056e-18, 3.2476e-17, ..., 6.2767e-17, 5.4216e-18,\n 1.6272e-17],\n ...,\n [3.0832e-18, 6.0241e-19, 1.0062e-18, ..., 6.7257e-20, 6.0147e-20,\n 3.5683e-19],\n [2.5858e-18, 2.1135e-19, 8.5711e-19, ..., 3.6848e-19, 6.2492e-20,\n 2.9736e-19],\n [2.0639e-18, 2.8433e-20, 1.2806e-18, ..., 6.3151e-19, 5.9539e-21,\n 2.7924e-19]], device='cuda:0')" | |
| }, | |
| "32": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.6048e-14, 4.5865e-15, 2.1133e-13, 6.5245e-16, 1.3700e-16, 2.2427e-14,\n 8.2677e-18, 1.7019e-15, 4.6269e-14, 1.0328e-14, 2.3595e-15, 3.7065e-14,\n 5.1237e-17, 7.2490e-17, 1.2436e-14, 3.2408e-14, 2.6855e-14, 6.4429e-17,\n 1.8881e-17, 3.2341e-17, 7.5028e-15, 5.4864e-14, 1.7758e-16, 1.7600e-15,\n 4.1642e-17, 6.7573e-14, 9.0873e-17, 4.5927e-15, 1.4744e-14, 2.8262e-17,\n 7.5439e-15, 3.6274e-14, 2.5483e-14, 8.3738e-16, 2.0635e-15, 4.7898e-15,\n 1.9648e-13, 2.4332e-13, 6.0792e-17, 8.6611e-15, 3.6095e-14, 3.8035e-14,\n 3.8033e-17, 6.2375e-14, 3.5930e-18, 2.6587e-16, 5.7291e-15, 8.5470e-15,\n 5.0087e-17, 5.2116e-15, 1.1073e-15, 6.6384e-16, 1.2047e-16, 6.6567e-14,\n 6.0188e-15, 8.8312e-17, 4.2413e-17, 1.6630e-16, 3.7520e-14, 3.3740e-14,\n 3.1938e-14, 1.8254e-16, 1.3946e-14, 4.3720e-15, 3.4921e-14, 1.2513e-13,\n 8.7352e-15, 6.0990e-16, 7.6431e-15, 9.9195e-16, 5.4615e-15, 1.3515e-16,\n 5.0199e-15, 4.3400e-17, 4.9458e-14, 7.3158e-15, 1.4994e-15, 3.2952e-14,\n 8.1900e-14, 1.9292e-16, 7.7890e-15, 1.2910e-14, 2.1266e-15, 2.3874e-14,\n 8.6242e-14, 2.1181e-14, 2.3626e-15, 1.1287e-13, 4.3150e-16, 4.1780e-14,\n 2.3129e-14, 3.5905e-17, 3.8836e-15, 2.6500e-14, 3.3160e-14, 1.4023e-13,\n 2.4783e-14, 9.8836e-16, 7.9641e-16, 1.8275e-15, 1.5579e-15, 6.1558e-15,\n 2.6502e-14, 7.1490e-16, 2.5063e-14, 6.9209e-17, 4.0724e-15, 4.3942e-15,\n 2.7071e-14, 1.8249e-14, 1.0819e-13, 8.3680e-15, 4.7791e-15, 2.1265e-16,\n 1.3885e-15, 1.4045e-13, 2.4227e-15, 2.0901e-15, 5.7232e-14, 2.4846e-17,\n 5.2449e-15, 3.0061e-16, 4.5025e-14, 2.9351e-13, 3.5039e-15, 1.2065e-14,\n 1.0127e-15, 4.2918e-16, 2.5058e-14, 7.2306e-17, 9.4170e-16, 1.6647e-16,\n 3.1947e-14, 3.8216e-15, 1.7168e-17, 5.6928e-16, 6.1780e-14, 1.6163e-14,\n 9.7743e-15, 2.3839e-13, 3.9228e-16, 1.4893e-16, 5.8137e-16, 1.5573e-16,\n 1.7766e-13, 2.3886e-14, 3.5864e-15, 4.2529e-14, 5.7750e-14, 4.9116e-15,\n 4.4060e-16, 4.6333e-16, 2.8092e-16, 4.0238e-14, 1.3466e-13, 9.0948e-15,\n 2.6900e-15, 1.6407e-13, 1.2544e-13, 2.4290e-17, 7.6240e-14, 1.5851e-13,\n 2.3820e-16, 8.4462e-14, 3.4193e-14, 1.2074e-14, 4.3557e-16, 4.0463e-15,\n 3.6033e-17, 1.9215e-16, 2.6548e-14, 4.5031e-18, 2.5307e-15, 3.2746e-13,\n 5.2083e-18, 3.9935e-17, 6.1882e-16, 2.0187e-17, 1.6156e-15, 1.8397e-16,\n 1.4831e-16, 7.4618e-14, 2.4882e-14, 1.0051e-13, 9.4205e-14, 1.0406e-14,\n 8.5861e-15, 5.3176e-18, 2.5739e-18, 2.3779e-14, 7.1844e-15, 1.0559e-15,\n 3.1650e-17, 2.9515e-14, 2.3076e-17, 1.5552e-14, 1.7960e-14, 2.2795e-14,\n 1.5151e-14, 3.8797e-15, 2.3367e-14, 6.4302e-15, 5.1690e-15, 1.3339e-14,\n 2.0537e-15, 5.6876e-16, 3.9193e-16, 2.0523e-16, 5.3424e-14, 1.7726e-16,\n 2.6674e-13, 4.1372e-15, 1.7158e-13, 1.0161e-16, 5.5513e-14, 1.2605e-14,\n 9.9234e-15, 2.7668e-14, 4.8237e-16, 4.2087e-15, 3.8244e-16, 7.0914e-14,\n 1.1998e-14, 1.9173e-16, 2.1676e-14, 1.9396e-16, 9.1768e-14, 2.4696e-14,\n 1.3534e-14, 3.5909e-17, 2.0586e-13, 9.3716e-16, 5.6978e-15, 2.0001e-14,\n 6.1420e-14, 1.8066e-17, 7.6045e-17, 1.1975e-14, 5.1651e-15, 7.3964e-15,\n 4.4809e-17, 1.5198e-14, 1.0673e-13, 5.4832e-15, 6.6282e-15, 3.4998e-18,\n 3.0730e-16, 8.9350e-17, 3.6595e-17, 1.5302e-16, 2.0049e-14, 8.0224e-14,\n 1.8150e-16, 3.7610e-15, 2.9705e-15, 2.4192e-15], device='cuda:0')" | |
| }, | |
| "33": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([7.8098e-18, 7.9686e-18, 3.8255e-16, 5.4289e-19, 1.9443e-20, 6.0069e-18,\n 3.8680e-21, 4.5084e-19, 3.4558e-17, 9.6051e-18, 8.9480e-19, 2.8423e-17,\n 2.8037e-20, 2.6674e-21, 2.9988e-18, 9.7632e-18, 2.1317e-17, 5.1029e-19,\n 9.0423e-20, 7.6570e-22, 3.5247e-18, 1.1248e-16, 8.5404e-19, 1.0555e-18,\n 7.6608e-21, 4.1754e-17, 3.7434e-19, 1.7429e-18, 6.3245e-18, 1.9217e-19,\n 2.2867e-18, 1.1776e-17, 1.6593e-17, 7.3709e-20, 2.0550e-18, 2.5976e-18,\n 3.4597e-16, 2.7686e-16, 4.2301e-20, 1.1207e-17, 2.3191e-17, 4.5225e-17,\n 4.0888e-20, 3.8161e-17, 3.4834e-20, 8.7557e-20, 1.8041e-18, 1.7346e-17,\n 1.5681e-21, 8.6285e-19, 5.1744e-19, 2.7161e-19, 1.2531e-21, 7.6553e-17,\n 9.6258e-18, 8.2274e-21, 8.4425e-21, 1.1176e-20, 5.3284e-17, 1.8828e-18,\n 4.3485e-18, 1.2378e-19, 1.5288e-17, 6.8235e-19, 1.9141e-17, 1.8252e-16,\n 1.0343e-17, 3.0803e-19, 8.4103e-18, 4.0575e-19, 9.7601e-19, 1.9036e-20,\n 9.8443e-19, 2.3369e-18, 3.2129e-17, 1.3445e-18, 2.1536e-18, 1.3511e-17,\n 1.6287e-16, 5.2943e-20, 1.9132e-18, 1.4288e-17, 2.0333e-19, 1.7573e-18,\n 5.6311e-17, 9.3755e-18, 7.5376e-19, 5.0148e-17, 1.8522e-19, 6.9552e-18,\n 6.0893e-18, 1.9526e-19, 1.1218e-18, 6.3231e-18, 2.8230e-17, 1.7236e-16,\n 1.2307e-17, 1.4036e-18, 7.0486e-19, 9.8799e-19, 2.8169e-19, 2.7208e-18,\n 2.1448e-17, 2.8308e-18, 1.6403e-17, 1.9863e-21, 4.4903e-18, 3.5462e-19,\n 1.3627e-17, 1.0737e-17, 7.6501e-17, 8.0478e-18, 7.8062e-19, 3.3991e-22,\n 9.0314e-19, 1.8250e-16, 1.1939e-18, 9.7277e-19, 1.7209e-16, 1.3063e-20,\n 1.7906e-18, 6.5910e-20, 2.1327e-17, 6.0267e-16, 3.0793e-18, 8.0443e-18,\n 2.8227e-18, 3.2318e-20, 5.2912e-17, 8.3864e-19, 3.0558e-19, 6.8416e-21,\n 2.8264e-17, 9.1626e-19, 1.0105e-20, 2.3822e-19, 2.6705e-17, 1.6865e-17,\n 6.9903e-18, 5.1500e-16, 3.7814e-18, 1.9345e-18, 1.6588e-18, 2.5206e-19,\n 3.6637e-16, 2.7024e-18, 6.2376e-18, 8.4962e-17, 9.3052e-18, 5.1972e-18,\n 1.3347e-18, 4.5819e-19, 7.5125e-19, 1.6992e-17, 6.3114e-17, 6.7349e-18,\n 2.0401e-18, 1.3982e-16, 5.7289e-17, 8.4326e-19, 2.9994e-17, 1.5781e-16,\n 2.2794e-19, 1.7506e-16, 1.0857e-17, 1.4818e-17, 3.9157e-19, 2.8199e-18,\n 7.9564e-21, 2.1049e-19, 3.3968e-17, 3.3365e-20, 1.8177e-18, 4.6066e-16,\n 4.0450e-20, 1.1312e-20, 1.1861e-18, 1.9901e-19, 5.7289e-19, 5.0394e-19,\n 3.3032e-20, 3.4278e-17, 2.5612e-17, 2.1810e-16, 1.3777e-16, 4.7714e-18,\n 4.2117e-18, 4.4833e-18, 9.0102e-19, 6.5899e-17, 3.2569e-19, 5.3895e-19,\n 1.5136e-18, 9.2928e-18, 1.4018e-18, 3.7389e-17, 7.0173e-18, 3.7744e-17,\n 7.9235e-18, 3.7599e-18, 1.2421e-17, 1.1178e-18, 1.3978e-18, 1.3849e-17,\n 4.0100e-19, 1.3856e-19, 1.1999e-19, 6.5179e-19, 6.8344e-17, 1.1546e-19,\n 4.0752e-16, 2.3160e-19, 2.2982e-16, 1.2346e-20, 8.7844e-17, 1.1319e-17,\n 1.5330e-17, 1.9679e-17, 1.4582e-19, 2.6293e-18, 9.2384e-20, 1.0493e-16,\n 8.9995e-18, 1.7695e-18, 1.0665e-17, 3.4841e-20, 1.7467e-16, 1.1779e-17,\n 3.7813e-17, 6.3975e-22, 8.8355e-16, 2.4142e-19, 1.7265e-18, 1.8898e-18,\n 3.6121e-17, 1.6448e-20, 1.3446e-21, 1.4873e-17, 1.4387e-18, 1.8266e-18,\n 2.0572e-20, 1.1432e-17, 8.1352e-17, 8.2073e-18, 1.3402e-18, 4.4300e-20,\n 5.0147e-19, 1.2229e-19, 1.3947e-18, 1.6159e-20, 2.3972e-17, 1.9589e-16,\n 4.2886e-20, 9.5348e-19, 1.8814e-18, 5.4375e-18], device='cuda:0')" | |
| }, | |
| "34": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([3.3589e-17, 5.9099e-18, 2.8148e-16, 8.5128e-19, 7.8720e-21, 2.8408e-17,\n 1.7072e-20, 1.5089e-18, 5.8396e-17, 1.2912e-17, 3.9612e-18, 4.9922e-17,\n 1.7164e-19, 2.9172e-20, 1.5901e-17, 4.2605e-17, 3.4813e-17, 2.9739e-19,\n 8.9286e-20, 7.5917e-22, 1.1589e-17, 7.0204e-17, 5.5464e-20, 1.5662e-18,\n 2.9521e-21, 9.0633e-17, 2.0518e-20, 5.9058e-18, 1.9675e-17, 1.5767e-20,\n 1.0463e-17, 4.7121e-17, 3.3898e-17, 1.2872e-18, 2.3657e-18, 5.9232e-18,\n 2.5632e-16, 3.1629e-16, 1.7899e-21, 1.2450e-17, 4.9549e-17, 4.7947e-17,\n 8.9669e-21, 7.9411e-17, 4.9282e-20, 5.5599e-19, 6.2242e-18, 1.2221e-17,\n 6.6517e-21, 7.6400e-18, 8.6972e-19, 7.1916e-19, 1.3643e-20, 8.8904e-17,\n 7.7814e-18, 7.5670e-20, 6.6402e-22, 9.4995e-20, 5.1860e-17, 4.5855e-17,\n 4.1222e-17, 3.4888e-19, 1.7970e-17, 5.6039e-18, 4.6165e-17, 1.6751e-16,\n 1.1600e-17, 2.8352e-19, 1.0028e-17, 1.0036e-18, 7.4521e-18, 3.8173e-21,\n 7.8676e-18, 1.8716e-22, 6.5945e-17, 9.3528e-18, 2.6559e-18, 4.5419e-17,\n 1.0920e-16, 4.3656e-20, 8.7257e-18, 1.5057e-17, 3.1802e-18, 2.9807e-17,\n 1.0997e-16, 2.8374e-17, 3.6958e-18, 1.4794e-16, 4.7108e-19, 5.5262e-17,\n 2.9281e-17, 3.1037e-19, 4.1244e-18, 3.4439e-17, 4.5084e-17, 1.8737e-16,\n 3.2882e-17, 1.6187e-18, 7.4237e-19, 2.8635e-18, 1.9704e-18, 8.4158e-18,\n 3.5405e-17, 1.5589e-18, 3.4057e-17, 3.0672e-22, 6.0844e-18, 6.7101e-18,\n 3.4285e-17, 2.4139e-17, 1.4319e-16, 1.1656e-17, 6.9924e-18, 8.7850e-21,\n 2.4295e-18, 1.8654e-16, 4.0345e-18, 2.0061e-18, 7.8866e-17, 2.3614e-22,\n 7.1409e-18, 1.7621e-19, 5.8820e-17, 3.9337e-16, 4.7546e-18, 1.6202e-17,\n 1.6458e-18, 5.2469e-19, 3.1767e-17, 8.7270e-21, 2.0727e-18, 1.1540e-19,\n 4.2917e-17, 4.9413e-18, 3.6220e-21, 1.2556e-18, 8.1219e-17, 2.1179e-17,\n 1.3492e-17, 3.1836e-16, 1.0358e-18, 5.2096e-19, 4.5026e-19, 3.0127e-20,\n 2.3638e-16, 3.2218e-17, 5.0002e-18, 5.2907e-17, 7.4349e-17, 6.6929e-18,\n 3.0724e-19, 7.2523e-19, 6.0536e-19, 5.3536e-17, 1.7682e-16, 1.2074e-17,\n 3.9372e-18, 2.1696e-16, 1.6332e-16, 2.5961e-19, 1.0137e-16, 2.0668e-16,\n 8.3085e-19, 1.0606e-16, 4.3351e-17, 1.7059e-17, 7.7286e-19, 4.7620e-18,\n 3.2690e-21, 4.3163e-19, 3.3643e-17, 1.0678e-20, 3.7037e-18, 4.1961e-16,\n 2.2685e-20, 1.3703e-20, 9.1448e-19, 2.0490e-20, 1.7065e-18, 2.4738e-20,\n 2.0507e-19, 9.6880e-17, 3.3563e-17, 1.3459e-16, 1.2671e-16, 1.4025e-17,\n 1.1483e-17, 1.4796e-19, 1.7698e-19, 2.8737e-17, 8.5961e-18, 7.8700e-19,\n 4.4726e-19, 3.7871e-17, 2.2475e-19, 2.1990e-17, 2.5397e-17, 2.8718e-17,\n 2.0214e-17, 5.5415e-18, 3.0936e-17, 8.2409e-18, 6.9509e-18, 1.7599e-17,\n 3.0391e-18, 1.1054e-18, 6.8786e-19, 5.5490e-19, 6.9388e-17, 2.7343e-20,\n 3.4451e-16, 4.8374e-18, 2.2003e-16, 1.5628e-19, 7.4535e-17, 1.6472e-17,\n 1.3122e-17, 3.4309e-17, 3.3567e-19, 6.0462e-18, 4.1400e-19, 8.7300e-17,\n 1.6288e-17, 6.5335e-19, 2.7905e-17, 1.3734e-19, 1.2370e-16, 3.1495e-17,\n 1.5298e-17, 5.5964e-21, 2.7805e-16, 1.4671e-18, 9.0783e-18, 2.7866e-17,\n 8.0902e-17, 1.7431e-19, 5.9129e-23, 1.6518e-17, 7.1154e-18, 8.5957e-18,\n 4.4574e-21, 2.0775e-17, 1.3986e-16, 5.9391e-18, 9.8538e-18, 1.4164e-19,\n 8.2887e-19, 1.7368e-20, 1.3337e-21, 1.0402e-19, 2.7683e-17, 9.8961e-17,\n 1.8612e-19, 4.0439e-18, 4.1245e-18, 3.5923e-18], device='cuda:0')" | |
| }, | |
| "35": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[4.0636e-21, 1.4927e-19, 1.3830e-20, ..., 3.1802e-18, 9.1811e-21,\n 1.8499e-20],\n [7.1996e-19, 5.7019e-21, 1.2935e-18, ..., 1.7880e-19, 1.5212e-20,\n 1.6552e-19],\n [1.6588e-16, 3.9855e-17, 4.3213e-17, ..., 2.8920e-17, 8.0640e-18,\n 1.8708e-17],\n ...,\n [3.6016e-18, 1.1737e-18, 1.1404e-19, ..., 1.6492e-19, 1.5478e-19,\n 1.0244e-20],\n [3.4865e-19, 2.9325e-19, 1.4258e-19, ..., 8.6844e-20, 6.8052e-20,\n 1.2355e-20],\n [2.7304e-19, 3.2506e-19, 1.8084e-19, ..., 4.9392e-19, 1.4510e-19,\n 8.9805e-20]], device='cuda:0')" | |
| }, | |
| "36": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.2185e-15, 1.4296e-15, 1.8502e-13, 1.6398e-16, 1.0486e-16, 3.5355e-17,\n 1.9732e-17, 8.6567e-17, 4.4728e-14, 6.7370e-17, 2.5581e-13, 1.1150e-15,\n 9.3861e-16, 9.6663e-18, 3.6067e-15, 4.5373e-15, 4.9099e-19, 3.3992e-14,\n 1.5843e-17, 8.5886e-17, 8.4833e-14, 1.3261e-15, 1.5621e-15, 1.5718e-14,\n 4.1640e-16, 1.1342e-13, 2.5654e-16, 1.4594e-16, 2.7035e-14, 1.6118e-15,\n 1.4677e-14, 4.5612e-14, 4.0936e-14, 1.1066e-18, 2.7173e-16, 4.2772e-15,\n 2.1910e-13, 3.7363e-13, 6.6637e-15, 8.6841e-15, 5.0526e-14, 5.3447e-16,\n 1.2505e-16, 3.4114e-14, 2.7141e-17, 1.0796e-15, 1.6559e-17, 5.1999e-15,\n 4.2128e-15, 1.8451e-14, 1.0748e-14, 7.1858e-15, 1.9485e-16, 6.3285e-16,\n 9.2261e-15, 1.8084e-15, 9.3968e-17, 8.5818e-15, 4.3956e-14, 1.2807e-14,\n 7.1816e-14, 2.7768e-16, 8.7670e-15, 3.5539e-15, 6.2239e-14, 5.6791e-15,\n 3.4488e-14, 5.5236e-15, 1.8325e-15, 3.6363e-14, 2.8822e-14, 3.7842e-17,\n 3.0612e-14, 1.0122e-16, 1.6672e-16, 1.5816e-15, 2.6238e-17, 3.9854e-14,\n 1.4832e-14, 5.0816e-15, 9.5197e-14, 1.7998e-16, 3.2239e-17, 3.7170e-15,\n 8.3028e-16, 1.8927e-15, 6.2001e-14, 2.2114e-16, 5.4655e-16, 1.9095e-13,\n 2.7803e-15, 5.6332e-17, 1.2379e-14, 2.2651e-15, 7.4522e-14, 1.4019e-13,\n 4.7113e-18, 2.4464e-17, 2.1703e-14, 1.7241e-15, 1.3132e-16, 6.9867e-16,\n 5.3516e-14, 7.3921e-16, 1.8587e-16, 2.6766e-16, 4.7568e-17, 9.4923e-18,\n 6.2516e-14, 2.4974e-15, 1.5612e-13, 1.1336e-14, 1.0460e-14, 2.4376e-16,\n 2.2733e-14, 1.6987e-13, 5.8816e-15, 6.9939e-16, 2.4544e-14, 3.6978e-16,\n 2.0264e-14, 1.3627e-16, 1.3694e-13, 8.4120e-15, 4.0411e-15, 4.5196e-14,\n 8.9778e-14, 3.4479e-17, 1.1695e-14, 9.3415e-14, 8.2941e-16, 5.1740e-18,\n 9.0432e-18, 6.7363e-18, 5.1374e-15, 8.7258e-15, 9.0004e-16, 5.2384e-15,\n 3.4561e-15, 1.5860e-13, 7.6028e-17, 1.4810e-14, 2.8742e-17, 4.3994e-14,\n 1.1354e-16, 2.6024e-13, 4.2426e-16, 4.6948e-15, 2.8734e-14, 2.5917e-15,\n 4.9438e-14, 3.2385e-18, 7.8268e-16, 2.0081e-14, 3.9988e-14, 6.7383e-15,\n 3.2873e-15, 5.9166e-14, 1.8170e-13, 7.1586e-14, 1.4878e-13, 1.5526e-13,\n 1.6275e-16, 8.3740e-16, 1.0115e-13, 1.3903e-15, 2.5652e-16, 1.1221e-17,\n 2.7188e-17, 1.6625e-15, 1.7796e-14, 2.2690e-16, 4.4083e-15, 2.9267e-15,\n 1.9359e-14, 8.8218e-18, 2.4330e-17, 2.0840e-18, 2.3036e-15, 5.2693e-15,\n 9.1900e-18, 4.6163e-14, 3.2565e-14, 2.9449e-14, 1.9893e-14, 3.2672e-16,\n 1.5386e-14, 2.2485e-15, 6.1388e-15, 3.6161e-16, 1.5526e-14, 1.6258e-15,\n 4.8324e-14, 2.5608e-13, 1.2754e-13, 6.2262e-15, 4.0762e-14, 9.1362e-15,\n 1.3429e-15, 1.0773e-16, 2.2320e-16, 5.4849e-14, 1.3755e-14, 6.5176e-15,\n 3.4481e-14, 1.3116e-15, 2.0285e-15, 5.9582e-16, 1.2696e-15, 4.1327e-16,\n 2.9316e-14, 2.4335e-14, 2.1049e-13, 3.7331e-15, 8.2116e-15, 3.2824e-14,\n 9.7627e-16, 1.2791e-15, 3.9115e-14, 1.8389e-17, 3.6788e-15, 6.0978e-15,\n 1.7476e-14, 1.0993e-14, 5.9405e-14, 2.4737e-15, 1.9456e-14, 1.7783e-15,\n 9.3399e-16, 6.8281e-17, 2.9279e-15, 5.8702e-15, 9.4426e-17, 5.0240e-14,\n 1.0688e-13, 1.7375e-16, 5.1549e-16, 1.6636e-16, 3.0244e-15, 1.4747e-13,\n 5.0254e-15, 1.3047e-16, 6.7999e-15, 2.5486e-15, 2.4582e-15, 4.5107e-16,\n 1.5178e-16, 3.7619e-15, 1.4219e-13, 1.8141e-17, 1.9171e-14, 4.1014e-14,\n 7.2121e-18, 1.4691e-15, 7.2130e-17, 8.1853e-16], device='cuda:0')" | |
| }, | |
| "37": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.0696e-18, 6.3615e-19, 2.2224e-16, 2.8433e-19, 1.8863e-20, 1.3419e-18,\n 2.2353e-20, 9.1338e-19, 6.9485e-17, 4.6896e-19, 1.5147e-16, 1.7799e-18,\n 5.0579e-19, 5.8346e-20, 2.4170e-18, 6.3982e-19, 4.0282e-19, 4.7554e-17,\n 4.1890e-20, 1.5000e-20, 2.1963e-17, 7.3137e-19, 1.3821e-19, 7.5309e-18,\n 2.4828e-19, 1.1857e-16, 8.2872e-19, 1.0347e-19, 1.9047e-17, 3.5191e-19,\n 2.7782e-17, 4.3959e-17, 5.5298e-17, 4.2297e-20, 3.9767e-20, 7.4690e-19,\n 4.2124e-16, 7.5983e-16, 6.8803e-18, 3.9205e-18, 3.0251e-17, 1.1819e-19,\n 3.7316e-19, 1.3014e-17, 2.1196e-19, 7.2138e-19, 3.0773e-18, 1.2693e-17,\n 3.1470e-18, 4.2438e-18, 2.6970e-18, 8.0461e-18, 7.1324e-19, 7.2668e-19,\n 5.5636e-18, 6.1772e-18, 1.2172e-20, 3.0657e-18, 1.0919e-16, 3.2926e-18,\n 3.4757e-17, 8.9395e-20, 5.4463e-18, 4.5394e-19, 1.1379e-16, 6.4749e-19,\n 5.3732e-17, 1.2950e-17, 1.1667e-18, 3.3253e-17, 2.2760e-17, 8.8718e-19,\n 4.1249e-17, 1.7020e-18, 1.1127e-18, 1.3550e-19, 2.9400e-20, 7.5889e-18,\n 4.7489e-18, 5.6696e-18, 1.2757e-16, 5.8487e-20, 2.8689e-19, 2.2985e-18,\n 1.0867e-18, 1.2422e-19, 6.2039e-17, 4.3690e-18, 1.4677e-19, 3.5045e-16,\n 9.8362e-19, 3.1059e-19, 4.2333e-19, 2.5782e-19, 1.8963e-16, 7.0380e-17,\n 5.0178e-19, 7.2662e-22, 6.0640e-18, 2.9253e-19, 8.3887e-20, 3.9822e-18,\n 4.7776e-17, 2.7211e-19, 2.1669e-19, 7.7696e-19, 1.4397e-19, 1.8510e-19,\n 5.3080e-17, 2.4270e-19, 3.1391e-16, 1.0260e-17, 1.8840e-18, 1.7483e-20,\n 6.9863e-18, 1.8756e-16, 2.8667e-18, 3.3839e-18, 2.5390e-17, 1.5628e-20,\n 1.7116e-17, 3.7957e-19, 1.1253e-16, 9.8011e-18, 3.2445e-18, 3.0887e-17,\n 1.5211e-17, 8.5530e-22, 2.5650e-17, 1.9428e-16, 1.4274e-18, 3.9251e-20,\n 9.1341e-19, 6.8462e-20, 1.0177e-18, 8.9425e-18, 1.0828e-18, 4.3292e-18,\n 3.9556e-18, 7.6092e-17, 2.7047e-21, 1.1481e-18, 6.8322e-19, 1.1190e-16,\n 2.3673e-18, 3.6896e-16, 1.0058e-19, 1.2412e-18, 5.4668e-18, 4.2815e-18,\n 1.0815e-17, 2.9456e-21, 4.9948e-19, 2.1992e-17, 5.3195e-18, 1.3333e-17,\n 4.5844e-19, 1.1716e-17, 1.3969e-16, 9.9623e-17, 2.7397e-16, 1.4455e-16,\n 1.2103e-20, 1.2652e-18, 3.7699e-17, 1.1773e-18, 6.6420e-19, 4.2235e-19,\n 2.8526e-20, 1.4643e-19, 4.8394e-17, 1.4082e-20, 2.5990e-18, 3.0740e-18,\n 3.5427e-17, 1.1545e-20, 3.1593e-21, 1.7510e-19, 8.4736e-20, 8.7475e-19,\n 7.2584e-20, 5.9891e-17, 3.0070e-17, 6.5220e-18, 5.4754e-18, 1.1964e-19,\n 1.5187e-17, 2.2729e-18, 1.7037e-18, 1.8553e-19, 5.6308e-18, 2.6837e-19,\n 1.3701e-17, 1.3869e-16, 2.2958e-16, 7.7969e-18, 3.0833e-17, 1.9427e-17,\n 5.0912e-19, 1.0829e-21, 6.8005e-19, 4.4375e-17, 1.7551e-17, 3.8299e-18,\n 4.1167e-17, 3.3335e-18, 6.5350e-19, 7.3101e-20, 4.8973e-19, 1.3612e-19,\n 8.4576e-18, 1.0113e-17, 3.0029e-16, 7.2200e-18, 9.0105e-19, 6.3865e-17,\n 2.0193e-19, 6.4514e-19, 3.6411e-17, 3.6194e-20, 6.1103e-19, 8.7776e-19,\n 9.8583e-18, 3.4408e-18, 8.5486e-17, 2.8541e-18, 2.7961e-18, 3.8183e-19,\n 1.4465e-19, 1.2779e-20, 1.7139e-18, 2.4634e-18, 3.5304e-19, 4.1566e-17,\n 7.8626e-17, 1.5925e-18, 6.8336e-20, 8.1384e-19, 4.0576e-18, 1.7435e-16,\n 2.6182e-18, 5.3294e-19, 5.4798e-18, 9.6643e-19, 4.2593e-19, 5.6265e-20,\n 2.8228e-19, 4.2721e-18, 3.0334e-16, 9.6976e-20, 1.1263e-17, 1.8912e-17,\n 6.4801e-21, 5.7294e-19, 7.4709e-19, 5.7868e-19], device='cuda:0')" | |
| }, | |
| "38": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.5609e-18, 2.0137e-18, 2.2173e-16, 5.6710e-19, 5.3781e-20, 5.4918e-20,\n 3.7983e-21, 7.1083e-20, 6.2017e-17, 4.2855e-20, 3.2965e-16, 1.4867e-18,\n 1.1974e-18, 9.3885e-21, 3.6031e-18, 6.2349e-18, 2.4971e-21, 4.7379e-17,\n 8.6610e-21, 3.3459e-20, 1.0985e-16, 1.8633e-18, 2.1139e-18, 1.7233e-17,\n 9.4199e-19, 1.4821e-16, 1.6775e-19, 2.0775e-19, 3.6902e-17, 2.5573e-18,\n 2.1571e-17, 6.3012e-17, 4.6927e-17, 3.0016e-21, 2.2426e-19, 5.2793e-18,\n 2.8937e-16, 4.8607e-16, 6.7333e-18, 9.5369e-18, 6.5729e-17, 6.4562e-19,\n 4.4031e-19, 4.1287e-17, 2.3256e-19, 8.8318e-19, 5.3867e-21, 5.4238e-18,\n 6.0505e-18, 2.4939e-17, 1.2598e-17, 9.8504e-18, 6.0278e-19, 7.6810e-19,\n 1.3169e-17, 3.4100e-18, 1.3750e-19, 9.3989e-18, 5.1471e-17, 1.6216e-17,\n 8.7133e-17, 2.7851e-19, 1.2908e-17, 4.0630e-18, 7.1876e-17, 6.8269e-18,\n 3.9271e-17, 5.6761e-18, 2.1370e-18, 4.2159e-17, 3.8634e-17, 5.3274e-19,\n 4.3708e-17, 1.3525e-19, 1.2102e-19, 1.9556e-18, 1.0320e-19, 5.2648e-17,\n 1.8116e-17, 4.6136e-18, 1.1989e-16, 2.2658e-19, 9.1472e-20, 4.3877e-18,\n 1.0375e-18, 2.2801e-18, 8.3241e-17, 3.8518e-19, 6.9406e-19, 2.5056e-16,\n 3.6477e-18, 6.2435e-19, 1.5239e-17, 2.8909e-18, 8.7175e-17, 1.6972e-16,\n 1.6907e-21, 3.9394e-21, 2.6048e-17, 2.3320e-18, 1.8492e-19, 1.0489e-18,\n 6.3787e-17, 6.4484e-19, 2.7140e-19, 8.0517e-19, 4.1026e-19, 1.3351e-20,\n 7.3749e-17, 3.0502e-18, 2.0634e-16, 1.2411e-17, 1.3551e-17, 2.0339e-19,\n 2.9659e-17, 2.0589e-16, 7.8973e-18, 6.5263e-19, 2.8626e-17, 3.6212e-19,\n 2.7967e-17, 3.0126e-19, 1.6993e-16, 1.1760e-17, 4.7662e-18, 5.3350e-17,\n 1.1626e-16, 5.2455e-20, 1.9532e-17, 1.2543e-16, 2.4954e-18, 1.1070e-20,\n 4.9364e-21, 2.3742e-22, 6.9745e-18, 1.2586e-17, 1.0809e-18, 7.7836e-18,\n 3.3128e-18, 1.9164e-16, 6.6190e-20, 1.7461e-17, 1.1967e-20, 5.0726e-17,\n 1.2922e-19, 3.3996e-16, 3.7135e-19, 6.5334e-18, 3.4725e-17, 5.0125e-18,\n 6.1542e-17, 1.3007e-20, 6.5510e-19, 2.9406e-17, 5.0182e-17, 1.0651e-17,\n 3.5453e-18, 7.1611e-17, 2.2281e-16, 8.3663e-17, 1.9741e-16, 2.0376e-16,\n 1.3837e-19, 1.0942e-18, 1.2476e-16, 1.9080e-18, 1.0016e-18, 2.0492e-20,\n 2.2449e-20, 2.2550e-18, 2.7289e-17, 2.3663e-19, 4.8123e-18, 3.4027e-18,\n 2.8211e-17, 3.8366e-20, 1.6264e-20, 1.7117e-20, 2.8068e-18, 6.6859e-18,\n 3.2029e-22, 6.3399e-17, 3.8363e-17, 3.4620e-17, 2.3532e-17, 4.0974e-19,\n 1.6794e-17, 3.1662e-18, 7.5093e-18, 5.5252e-19, 1.8220e-17, 1.8172e-18,\n 5.8893e-17, 3.2772e-16, 1.5148e-16, 6.2601e-18, 5.2563e-17, 1.5025e-17,\n 1.8745e-18, 9.0814e-20, 1.5334e-19, 6.5458e-17, 1.5354e-17, 7.1105e-18,\n 4.6494e-17, 2.8626e-18, 2.8516e-18, 5.1769e-19, 1.6539e-18, 6.7598e-19,\n 3.7976e-17, 2.8200e-17, 2.7770e-16, 3.8925e-18, 9.3812e-18, 3.8817e-17,\n 1.1288e-18, 1.6487e-18, 4.5666e-17, 9.2618e-20, 3.8623e-18, 7.8625e-18,\n 1.9705e-17, 1.4474e-17, 8.0590e-17, 1.8090e-18, 2.3816e-17, 2.3067e-18,\n 1.1150e-18, 1.8732e-19, 3.9756e-18, 8.7769e-18, 1.0023e-19, 6.4596e-17,\n 1.2859e-16, 9.5564e-19, 4.3375e-19, 1.5268e-19, 4.8835e-18, 1.7977e-16,\n 5.6900e-18, 1.1315e-19, 8.2434e-18, 3.5524e-18, 3.4085e-18, 3.4842e-19,\n 6.8457e-19, 5.5992e-18, 1.8854e-16, 4.1158e-20, 2.1755e-17, 5.4678e-17,\n 2.0784e-20, 1.8109e-18, 7.2786e-19, 8.4492e-19], device='cuda:0')" | |
| }, | |
| "39": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n ...,\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[7.0155e-21, 2.9911e-19, 1.0076e-18, ..., 5.7300e-20, 2.1623e-19,\n 2.6753e-20],\n [3.6084e-21, 3.3867e-19, 1.1466e-19, ..., 2.3557e-19, 1.4527e-18,\n 4.7447e-19],\n [1.2484e-24, 1.0266e-20, 4.5884e-20, ..., 3.0224e-20, 7.4598e-22,\n 3.7205e-19],\n ...,\n [2.6015e-20, 5.0691e-17, 2.8404e-16, ..., 3.9006e-19, 1.1237e-16,\n 4.1421e-16],\n [1.2414e-19, 1.3991e-16, 9.0743e-16, ..., 4.4137e-19, 3.1946e-16,\n 1.2804e-15],\n [1.8281e-19, 1.8952e-16, 1.2265e-15, ..., 1.0466e-18, 3.7065e-16,\n 1.5780e-15]], device='cuda:0')" | |
| }, | |
| "40": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.5509e-20, 2.1206e-18, 2.2698e-22, 1.7561e-18, 2.3757e-19, 1.3531e-18,\n 1.8297e-17, 3.2624e-18, 2.5646e-18, 5.7308e-19, 1.8393e-17, 3.4254e-18,\n 3.0273e-17, 9.8966e-19, 3.1555e-19, 5.6379e-21, 1.5305e-17, 1.4770e-18,\n 2.9227e-17, 3.6146e-18, 2.7568e-19, 1.0521e-17, 8.1722e-19, 9.4600e-18,\n 4.0257e-18, 3.1757e-18, 1.3979e-18, 4.0835e-19, 1.7190e-19, 6.9350e-19,\n 8.3998e-18, 5.5516e-18, 3.1505e-20, 2.2395e-18, 5.2979e-18, 1.6720e-18,\n 2.0296e-18, 3.2931e-18, 7.0112e-20, 2.3520e-21, 4.3589e-18, 1.6359e-19,\n 3.2640e-18, 1.7619e-19, 1.3358e-19, 7.0015e-20, 9.1069e-20, 1.5209e-18,\n 1.1601e-20, 2.1506e-19, 1.2881e-18, 7.5532e-18, 3.5980e-19, 2.8557e-19,\n 3.7405e-19, 7.8682e-19, 8.8606e-21, 8.1643e-20, 5.9674e-20, 1.3531e-18,\n 4.8804e-18, 1.4212e-18, 3.8289e-18, 3.0524e-19, 2.3629e-19, 2.3879e-17,\n 1.0078e-18, 4.0212e-19, 5.7769e-17, 5.7049e-18, 3.6672e-20, 3.1179e-17,\n 9.4542e-20, 2.3356e-17, 3.0703e-17, 1.1525e-17, 5.4782e-17, 2.2258e-17,\n 4.4559e-18, 9.7906e-18, 3.9932e-19, 6.8996e-17, 2.2849e-17, 2.0942e-17,\n 1.0456e-17, 9.4381e-18, 3.5992e-19, 1.8091e-19, 1.2860e-17, 8.0461e-18,\n 2.0886e-17, 5.1688e-18, 1.5114e-18, 1.2388e-17, 1.0365e-18, 6.6097e-19,\n 3.1307e-18, 5.8733e-18, 1.2686e-19, 3.7874e-18, 9.5075e-20, 2.8965e-20,\n 1.8494e-17, 7.2654e-18, 4.3030e-17, 8.6388e-18, 9.3183e-18, 2.6488e-17,\n 9.1641e-18, 1.2022e-17, 8.3830e-18, 4.4225e-18, 7.5625e-19, 1.5963e-17,\n 9.9535e-21, 1.2274e-17, 1.2879e-20, 1.2673e-17, 4.4757e-17, 4.0970e-17,\n 1.5314e-19, 1.0881e-17, 1.6977e-17, 3.4811e-17, 7.6028e-20, 3.9585e-19,\n 1.9659e-17, 2.5287e-18, 5.2356e-19, 2.1758e-19, 6.6948e-19, 5.5585e-20,\n 2.7192e-18, 5.3638e-19, 5.6989e-19, 5.7011e-18, 2.7181e-18, 3.3754e-19,\n 3.4918e-19, 8.4761e-19, 3.9430e-21, 2.5597e-19, 7.4416e-18, 2.9669e-18,\n 2.0285e-18, 5.0303e-18, 4.9917e-19, 3.0623e-18, 4.8633e-18, 1.0545e-18,\n 1.6199e-18, 1.6249e-18, 2.2785e-18, 1.3079e-17, 7.8123e-20, 6.7924e-18,\n 1.9297e-19, 1.3740e-18, 6.5213e-19, 5.5426e-18, 5.3325e-18, 2.7039e-18,\n 5.5653e-18, 3.3613e-19, 4.4353e-18, 2.2514e-19, 1.1063e-19, 9.7236e-19,\n 2.6626e-19, 1.5385e-18, 1.2924e-18, 2.1134e-17, 4.5554e-19, 1.4024e-18,\n 7.3148e-18, 1.4805e-17, 7.9887e-18, 1.1892e-17, 2.2643e-18, 2.2466e-17,\n 1.5147e-17, 4.6837e-19, 8.4490e-20, 3.3721e-20, 1.2661e-18, 2.0870e-18,\n 1.3431e-18, 2.8042e-18, 7.7469e-18, 9.8400e-18, 4.8031e-18, 6.0475e-18,\n 1.0747e-17, 4.1755e-18, 6.5304e-18, 1.0792e-19, 2.8062e-17, 4.8840e-18,\n 1.9861e-20, 5.3455e-18, 2.2540e-17, 1.1146e-17, 2.4045e-18, 1.7423e-17,\n 9.5866e-18, 1.3511e-18, 3.4572e-18, 1.9338e-17, 6.9951e-17, 5.3538e-18,\n 3.1377e-20, 7.6848e-18, 1.2596e-18, 4.4951e-17, 1.9855e-17, 9.2496e-18,\n 7.7606e-19, 1.0005e-17, 5.4826e-18, 4.3138e-18, 1.0757e-17, 1.6699e-17,\n 7.2724e-18, 2.2605e-18, 1.6687e-19, 4.2594e-19, 2.9293e-18, 1.5143e-19,\n 2.3444e-19, 1.5166e-19, 3.6615e-19, 9.5834e-21, 4.7131e-20, 4.0564e-18,\n 5.2201e-19, 5.6839e-19, 1.7778e-18, 1.3590e-19, 1.7785e-18, 1.8358e-18,\n 2.8942e-20, 2.6445e-19, 6.5239e-20, 5.4795e-20, 6.3199e-20, 5.5704e-18,\n 7.4811e-19, 3.9109e-19, 1.0488e-19, 5.2388e-19, 1.2099e-18, 1.3229e-21,\n 2.6880e-19, 4.3813e-18, 1.4043e-19, 1.7233e-18, 1.3094e-28, 2.7934e-29,\n 2.5125e-29, 7.7318e-28, 4.0531e-28, 2.7798e-27, 1.5061e-28, 1.1046e-29,\n 3.0779e-29, 4.0881e-27, 2.1327e-28, 3.2177e-29, 1.7867e-28, 6.5445e-30,\n 7.1728e-29, 5.2167e-28, 6.0151e-29, 5.8356e-29, 1.7113e-29, 1.0461e-27,\n 5.1416e-28, 3.2626e-28, 1.3042e-28, 1.2808e-27, 3.4330e-28, 1.3869e-28,\n 1.6947e-27, 1.2883e-28, 1.1237e-27, 5.7732e-28, 1.7028e-30, 2.0806e-27,\n 6.1592e-28, 3.4053e-27, 1.6574e-27, 4.1660e-28, 5.0143e-28, 8.5530e-28,\n 1.9988e-27, 1.0510e-28, 7.0128e-29, 2.6922e-28, 1.1713e-27, 1.3032e-27,\n 3.2544e-28, 4.7096e-31, 3.3151e-28, 1.0416e-27, 3.2718e-29, 5.7523e-28,\n 2.1146e-27, 7.3224e-29, 3.2625e-28, 1.0453e-27, 2.1806e-29, 1.0368e-29,\n 9.4163e-29, 2.7699e-27, 7.1576e-28, 1.4447e-27, 7.6884e-29, 1.8436e-28,\n 4.9322e-28, 1.7566e-30, 1.9472e-27, 1.1421e-26, 1.0294e-27, 1.1396e-26,\n 9.0774e-27, 5.6449e-27, 1.2145e-27, 8.0611e-29, 1.2783e-29, 2.7138e-27,\n 2.5054e-27, 1.7273e-27, 9.8931e-27, 1.9128e-27, 1.6344e-26, 4.8513e-27,\n 4.9008e-28, 5.8524e-27, 1.0938e-28, 1.3588e-27, 5.7120e-27, 9.8830e-27,\n 9.5975e-29, 2.3302e-31, 2.0817e-27, 2.7848e-29, 5.9004e-29, 2.8542e-28,\n 1.5317e-27, 1.3543e-28, 1.9692e-28, 8.3838e-30, 1.1153e-27, 6.2145e-28,\n 7.3952e-28, 1.2639e-26, 1.8972e-27, 1.0770e-27, 1.3618e-28, 1.6123e-27,\n 1.4180e-26, 1.4475e-26, 1.2372e-28, 3.5201e-28, 7.4887e-28, 3.0089e-28,\n 8.4026e-28, 2.3217e-28, 2.4198e-27, 3.2280e-27, 4.3074e-29, 1.1059e-26,\n 5.9384e-27, 7.3266e-27, 1.1055e-28, 1.1944e-27, 7.3619e-28, 4.8996e-27,\n 9.1954e-28, 2.2382e-27, 2.1649e-28, 9.2183e-30, 9.9215e-29, 6.7089e-29,\n 5.9131e-29, 5.5433e-28, 2.6302e-28, 3.7683e-29, 1.5534e-27, 1.3243e-27,\n 2.5176e-29, 1.0929e-28, 2.6539e-27, 2.4282e-29, 1.2786e-28, 3.6933e-27,\n 1.8265e-27, 7.2935e-28, 7.1762e-28, 9.2327e-29, 1.6847e-27, 1.5534e-27,\n 1.4342e-28, 6.3277e-28, 9.8710e-29, 7.9238e-28, 3.1609e-27, 3.8544e-28,\n 9.4396e-30, 4.4346e-28, 2.5504e-27, 2.3872e-27, 2.5863e-31, 1.8502e-27,\n 1.3865e-28, 4.4479e-28, 3.2834e-27, 1.3633e-28, 1.2750e-29, 1.8230e-28,\n 2.1520e-28, 6.2949e-29, 5.5314e-28, 1.0614e-28, 6.4535e-28, 4.0119e-28,\n 1.6483e-29, 3.0589e-27, 3.6187e-28, 7.3818e-28, 1.2419e-27, 2.6749e-27,\n 6.4891e-28, 6.4553e-28, 1.0653e-27, 9.4708e-27, 1.0358e-26, 2.5606e-28,\n 1.3022e-27, 5.3881e-28, 1.1348e-28, 3.5389e-27, 1.8361e-27, 1.7467e-27,\n 2.2777e-28, 7.9259e-29, 5.8885e-30, 7.6945e-28, 1.8363e-27, 4.4916e-27,\n 1.2104e-26, 3.1063e-27, 3.9229e-29, 1.3017e-27, 5.2801e-27, 8.5459e-29,\n 6.4369e-28, 1.7183e-27, 3.0820e-28, 2.4992e-29, 3.0975e-27, 6.9197e-28,\n 2.0512e-27, 6.0963e-27, 4.8184e-28, 2.0938e-28, 2.5006e-27, 1.0968e-27,\n 2.8257e-27, 2.6698e-29, 5.1873e-29, 2.7719e-28, 3.5922e-27, 4.0219e-27,\n 3.4403e-27, 1.3135e-28, 3.1150e-27, 1.4560e-28, 4.5961e-28, 4.1386e-28,\n 1.4561e-28, 1.0569e-29, 1.5364e-27, 1.6890e-28, 2.4375e-27, 9.5259e-30,\n 1.0934e-28, 3.0835e-27, 1.1640e-28, 1.4085e-29, 2.5810e-27, 2.9525e-27,\n 5.7809e-30, 3.6416e-29, 1.0467e-28, 1.4906e-27, 4.3598e-28, 1.5353e-27,\n 2.8630e-27, 3.8307e-27, 5.7301e-29, 1.8854e-28, 2.7739e-28, 5.5014e-28,\n 9.3457e-28, 1.1516e-28, 1.4612e-29, 6.4858e-29, 1.8035e-28, 4.0975e-29,\n 1.8640e-27, 2.1456e-28, 2.5513e-16, 2.4747e-14, 1.0265e-14, 3.9982e-15,\n 1.4457e-15, 1.5896e-14, 1.9531e-14, 1.4094e-14, 5.9480e-15, 8.5649e-17,\n 1.4560e-16, 3.2122e-16, 4.4878e-15, 5.7200e-16, 4.3930e-15, 1.6074e-15,\n 7.6064e-15, 1.2570e-18, 5.6040e-15, 7.8699e-16, 2.1647e-15, 5.5453e-16,\n 1.8514e-15, 1.0956e-14, 1.7434e-14, 2.3257e-15, 3.4523e-15, 4.0999e-15,\n 9.4750e-15, 6.1017e-16, 9.8107e-16, 1.1277e-17, 5.7460e-16, 1.8349e-16,\n 1.5790e-15, 1.9881e-14, 4.1506e-17, 3.5049e-15, 5.1681e-16, 2.3046e-15,\n 4.7659e-15, 4.4659e-14, 1.8962e-15, 6.6072e-16, 4.4798e-15, 1.4116e-17,\n 5.7157e-16, 3.1011e-15, 8.6576e-15, 7.7983e-17, 5.5937e-15, 6.5722e-16,\n 2.7102e-15, 3.7926e-15, 7.7611e-16, 1.3554e-14, 1.5279e-15, 5.4118e-15,\n 2.1525e-15, 4.4438e-16, 2.4045e-15, 1.6431e-14, 3.0319e-16, 6.9568e-15,\n 7.9748e-15, 7.8181e-16, 4.9548e-15, 4.6699e-16, 1.1946e-16, 1.3614e-15,\n 3.2419e-15, 1.5179e-15, 5.1942e-15, 1.5526e-15, 1.1666e-16, 1.6286e-15,\n 1.0550e-16, 6.3101e-18, 1.1440e-14, 1.4907e-14, 8.1240e-15, 2.0330e-15,\n 3.8471e-14, 5.4058e-15, 4.6755e-16, 1.8284e-15, 8.4803e-15, 9.8367e-16,\n 6.8733e-15, 3.0831e-14, 4.3308e-15, 1.5674e-14, 1.6036e-15, 1.1646e-14,\n 6.1121e-15, 1.3782e-15, 4.5387e-16, 2.2781e-17, 1.5090e-16, 1.5836e-15,\n 6.3224e-16, 8.8861e-15, 6.6457e-16, 1.0691e-14, 1.0582e-17, 5.1587e-16,\n 2.1200e-14, 3.4536e-15, 2.0359e-16, 1.3486e-15, 1.9749e-14, 4.3827e-16,\n 8.9864e-15, 1.3526e-16, 8.3275e-16, 6.4613e-16, 1.7617e-16, 1.3537e-14,\n 5.6087e-15, 8.7581e-15, 3.7628e-16, 1.1334e-14, 7.0262e-16, 1.2331e-15,\n 8.9768e-16, 1.9290e-14, 4.5339e-15, 3.2047e-15, 1.7148e-15, 1.7719e-15,\n 8.9755e-15, 7.5422e-15, 3.0586e-15, 4.3510e-15, 3.2803e-15, 5.3509e-17,\n 3.3638e-16, 9.4804e-16, 4.5596e-16, 1.4065e-17, 2.1302e-15, 1.3038e-17,\n 1.1027e-14, 1.9760e-17, 1.4865e-15, 8.5258e-16, 5.4166e-15, 8.6410e-17,\n 5.1061e-15, 1.1778e-15, 5.2438e-16, 7.4451e-15, 3.2938e-15, 3.4429e-17,\n 9.5453e-16, 1.4786e-15, 1.0597e-14, 3.0564e-17, 5.3668e-16, 3.2896e-16,\n 6.8053e-16, 1.1776e-14, 6.6069e-16, 2.0151e-15, 1.2469e-17, 2.1594e-16,\n 7.1691e-15, 8.1069e-15, 4.1272e-15, 2.7869e-16, 5.0231e-15, 1.3681e-14,\n 1.0557e-15, 4.5165e-16, 1.4462e-15, 1.2842e-16, 3.2247e-17, 2.1329e-15,\n 3.8124e-15, 1.4809e-15, 3.1422e-15, 5.2605e-15, 1.9622e-16, 1.4404e-16,\n 5.5129e-15, 1.1616e-14, 8.9021e-15, 3.0442e-15, 8.2002e-15, 1.3998e-17,\n 4.2006e-15, 1.9673e-17, 2.9218e-15, 1.9784e-14, 2.8679e-16, 1.1105e-15,\n 1.8551e-17, 6.4789e-15, 5.6029e-15, 4.0674e-15, 4.2465e-16, 1.1088e-14,\n 1.1081e-14, 4.6402e-16, 7.8024e-15, 6.3416e-15, 1.5638e-15, 9.4835e-16,\n 2.0280e-15, 2.7510e-15, 3.6179e-15, 2.3908e-16, 3.5017e-17, 3.1360e-16,\n 4.9004e-16, 4.4382e-15, 5.5092e-15, 4.2731e-15, 6.7257e-15, 6.1136e-16,\n 8.8054e-16, 9.3825e-15, 5.6209e-15, 9.8457e-18, 1.9733e-15, 3.3470e-15,\n 1.9244e-15, 1.6639e-14, 7.3300e-16, 1.7587e-16, 4.6207e-15, 4.8459e-17,\n 2.8206e-15, 9.6275e-16, 1.3973e-15, 1.7195e-16, 3.0083e-15, 2.4852e-15,\n 8.4723e-16, 4.2787e-18, 3.6758e-16, 7.8686e-15, 8.9783e-16, 7.0162e-15,\n 1.3709e-15, 2.4112e-15, 1.1485e-14, 1.9540e-16, 1.6643e-17, 8.2591e-15,\n 5.0343e-17, 9.9693e-16, 3.3695e-16, 5.7065e-16, 1.8222e-15, 2.2813e-15],\n device='cuda:0')" | |
| }, | |
| "41": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.0275e-15, 2.9426e-17, 2.0225e-15, ..., 8.9272e-19, 8.9469e-16,\n 4.4768e-16],\n [3.6390e-16, 1.5444e-17, 7.7846e-16, ..., 4.4171e-19, 3.8950e-16,\n 1.6588e-16],\n [6.2333e-16, 3.9257e-17, 1.0987e-15, ..., 1.0325e-18, 5.2683e-16,\n 2.4433e-16],\n ...,\n [1.0724e-15, 2.3554e-17, 2.3605e-15, ..., 1.3196e-18, 1.3283e-15,\n 5.0069e-16],\n [3.2696e-18, 1.5490e-17, 7.0721e-18, ..., 1.2896e-18, 2.2485e-18,\n 4.8786e-18],\n [1.6079e-17, 1.2747e-17, 5.6055e-17, ..., 1.0556e-18, 2.9708e-17,\n 1.3814e-17]], device='cuda:0')" | |
| }, | |
| "42": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.5701e-14, 6.3315e-15, 8.8044e-15, 1.6748e-15, 8.6407e-15, 7.0435e-15,\n 1.7307e-14, 6.4376e-15, 9.8499e-15, 5.5936e-15, 5.3436e-15, 6.0182e-15,\n 3.7461e-14, 4.8363e-15, 1.6524e-14, 2.7033e-14, 1.8163e-15, 4.5914e-15,\n 9.6564e-16, 1.9441e-14, 2.3941e-16, 4.0853e-15, 2.3887e-15, 8.5821e-16,\n 2.5598e-15, 4.9093e-14, 5.6161e-15, 6.5098e-14, 2.2656e-14, 2.1831e-15,\n 6.0023e-14, 1.2664e-14, 7.7462e-15, 3.7562e-14, 3.4178e-16, 1.0198e-13,\n 2.0049e-14, 1.3674e-14, 3.7253e-15, 2.4136e-15, 4.1439e-17, 5.3741e-15,\n 8.5043e-16, 1.6528e-14, 1.1502e-15, 1.5735e-14, 9.9118e-15, 4.7873e-15,\n 4.5763e-14, 7.1990e-15, 4.6436e-15, 1.9934e-14, 2.2546e-14, 3.7290e-14,\n 1.7600e-14, 4.2750e-17, 1.8838e-15, 2.3742e-15, 3.9905e-16, 1.2825e-15,\n 1.3877e-14, 1.5792e-14, 2.1166e-15, 1.3730e-15, 3.8763e-14, 5.5024e-15,\n 1.0641e-14, 1.4419e-14, 3.3399e-14, 2.7022e-14, 6.1366e-16, 1.1713e-15,\n 9.1185e-15, 1.7171e-14, 6.6113e-15, 1.3921e-15, 5.0942e-15, 5.6518e-15,\n 3.3580e-15, 6.6240e-15, 1.5585e-14, 1.3439e-15, 2.0606e-14, 1.5595e-15,\n 1.2650e-14, 3.3091e-14, 5.0253e-14, 4.1496e-15, 5.2094e-16, 5.9511e-16,\n 3.1895e-14, 3.1572e-14, 3.4725e-14, 8.9707e-15, 4.0674e-14, 6.9688e-15,\n 3.7257e-15, 3.6275e-14, 3.6776e-16, 3.1970e-17, 1.9457e-14, 1.6857e-15,\n 1.9528e-14, 2.1746e-14, 9.1086e-17, 1.8400e-17, 1.5558e-14, 4.5558e-15,\n 9.3458e-15, 1.3056e-15, 4.8187e-15, 1.8395e-14, 1.7627e-14, 3.0385e-14,\n 4.6385e-15, 1.4964e-14, 2.2938e-14, 5.5215e-15, 1.7759e-15, 1.1668e-14,\n 6.9190e-15, 3.1042e-16, 8.1153e-15, 3.7772e-15, 3.3893e-14, 1.1287e-14,\n 1.1140e-14, 1.6046e-14, 9.7025e-15, 2.0179e-17, 1.5561e-16, 3.4308e-14,\n 3.4224e-14, 3.1627e-14, 1.5227e-15, 2.0430e-15, 8.4654e-16, 5.5828e-15,\n 9.6460e-15, 3.5660e-16, 2.5359e-14, 5.4346e-14, 1.9476e-15, 4.5509e-14,\n 8.6047e-15, 1.9589e-14, 1.1592e-14, 3.4228e-15, 6.2067e-14, 1.0820e-14,\n 4.9775e-16, 3.3479e-14, 7.2627e-16, 3.0143e-14, 2.6077e-14, 1.0492e-14,\n 6.0139e-15, 1.7229e-15, 4.6841e-15, 8.8386e-15, 2.8297e-14, 8.5391e-16,\n 3.0756e-14, 3.9431e-14, 4.3837e-15, 3.2110e-16, 2.6637e-15, 2.1349e-17,\n 4.0087e-15, 8.6674e-15, 2.4234e-14, 1.7899e-15, 5.7527e-15, 2.0094e-15,\n 1.9097e-14, 6.6406e-16, 2.2090e-14, 2.2808e-16, 4.0930e-14, 1.4260e-14,\n 1.3102e-15, 1.2513e-15, 1.3773e-14, 4.2471e-14, 3.1458e-15, 5.1006e-16,\n 2.6789e-17, 9.1669e-15, 2.1525e-14, 1.7301e-15, 1.8880e-16, 2.1226e-15,\n 7.3616e-14, 3.4536e-14, 2.0234e-14, 3.1941e-14, 1.9372e-17, 2.7089e-14,\n 1.2849e-15, 4.2263e-14, 5.4631e-16, 9.0347e-15, 3.1350e-14, 7.9242e-14,\n 1.6508e-14, 1.7372e-15, 1.6034e-14, 4.6037e-15, 1.8326e-15, 4.1703e-15,\n 2.5346e-16, 5.4115e-16, 1.2473e-15, 2.5540e-15, 6.4361e-15, 2.6047e-14,\n 4.3893e-15, 2.8510e-14, 1.4701e-14, 3.8571e-15, 2.0234e-14, 1.1522e-15,\n 2.1366e-14, 6.1460e-17, 3.7640e-15, 6.5831e-14, 1.0106e-14, 4.3068e-16,\n 1.4448e-14, 2.6039e-14, 5.2174e-15, 4.1239e-16, 6.8848e-15, 3.9586e-15,\n 2.4652e-14, 2.4594e-14, 1.9232e-14, 5.9198e-15, 1.8677e-15, 4.0075e-14,\n 2.4015e-15, 1.2522e-14, 1.1684e-14, 2.2580e-14, 2.1158e-14, 1.8440e-15,\n 6.8346e-14, 2.7645e-17, 1.7489e-15, 8.1248e-16, 9.3772e-16, 1.3185e-15,\n 7.0535e-15, 1.9689e-14, 3.7706e-17, 4.6982e-16], device='cuda:0')" | |
| }, | |
| "43": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[4.5030e-15, 3.4074e-15, 4.4664e-15, ..., 9.2774e-15, 2.0035e-14,\n 1.0253e-14],\n [2.9186e-16, 2.2516e-16, 2.4911e-16, ..., 5.6792e-16, 1.1454e-15,\n 5.9913e-16],\n [2.9523e-16, 2.2261e-16, 3.1501e-16, ..., 6.2446e-16, 1.3992e-15,\n 7.1326e-16],\n [2.1276e-16, 1.5855e-16, 2.1379e-16, ..., 4.4768e-16, 9.4612e-16,\n 4.8241e-16],\n [3.3475e-16, 2.5515e-16, 3.4948e-16, ..., 6.9730e-16, 1.5638e-15,\n 7.9287e-16]], device='cuda:0')" | |
| }, | |
| "44": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.5816e-12, 1.5957e-13, 1.7142e-13, 1.2562e-13, 1.9257e-13],\n device='cuda:0')" | |
| }, | |
| "45": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[4.5004e-15, 3.4016e-15, 4.4656e-15, ..., 9.2721e-15, 2.0033e-14,\n 1.0249e-14],\n [2.9163e-16, 2.2465e-16, 2.4903e-16, ..., 5.6746e-16, 1.1453e-15,\n 5.9879e-16],\n [2.9498e-16, 2.2203e-16, 3.1493e-16, ..., 6.2393e-16, 1.3991e-15,\n 7.1285e-16],\n [2.1269e-16, 1.5841e-16, 2.1377e-16, ..., 4.4754e-16, 9.4607e-16,\n 4.8231e-16],\n [3.3462e-16, 2.5484e-16, 3.4943e-16, ..., 6.9701e-16, 1.5638e-15,\n 7.9265e-16]], device='cuda:0')" | |
| }, | |
| "46": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.5816e-12, 1.5957e-13, 1.7142e-13, 1.2562e-13, 1.9257e-13],\n device='cuda:0')" | |
| }, | |
| "47": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[4.5004e-15, 3.4016e-15, 4.4656e-15, ..., 9.2721e-15, 2.0033e-14,\n 1.0249e-14],\n [2.9163e-16, 2.2465e-16, 2.4903e-16, ..., 5.6746e-16, 1.1453e-15,\n 5.9879e-16],\n [2.9498e-16, 2.2203e-16, 3.1493e-16, ..., 6.2393e-16, 1.3991e-15,\n 7.1285e-16],\n [2.1269e-16, 1.5841e-16, 2.1377e-16, ..., 4.4754e-16, 9.4607e-16,\n 4.8231e-16],\n [3.3462e-16, 2.5484e-16, 3.4943e-16, ..., 6.9701e-16, 1.5638e-15,\n 7.9265e-16]], device='cuda:0')" | |
| }, | |
| "48": { | |
| "step": "tensor(18770.)", | |
| "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", | |
| "exp_avg_sq": "tensor([2.5816e-12, 1.5957e-13, 1.7142e-13, 1.2562e-13, 1.9257e-13],\n device='cuda:0')" | |
| }, | |
| "6": { | |
| "step": "tensor(15016.)", | |
| "exp_avg": "tensor([[-1.6302e-06, -4.5350e-06, 2.1801e-06, ..., -2.2391e-06,\n 2.4646e-06, 3.3358e-06],\n [-1.1118e-05, 4.3680e-06, 2.3517e-06, ..., -2.5745e-06,\n -1.0988e-06, -2.4019e-06],\n [ 2.5380e-07, 8.3255e-07, -5.2510e-07, ..., -5.0277e-06,\n 3.7620e-06, 6.3077e-08],\n ...,\n [ 4.2114e-07, -3.6432e-06, -2.3881e-06, ..., -2.2234e-06,\n -1.7216e-06, 1.1993e-06],\n [ 4.7321e-07, 5.0207e-06, -1.2276e-06, ..., -1.2605e-06,\n 1.2542e-06, 5.0596e-07],\n [-1.0439e-06, 1.1530e-06, 1.2166e-06, ..., -5.4925e-06,\n 4.4815e-07, -4.4797e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.8599e-10, 1.2259e-09, 3.1179e-10, ..., 1.2879e-10, 8.3796e-11,\n 1.0245e-10],\n [1.1761e-09, 1.2423e-09, 6.2865e-10, ..., 3.2809e-10, 1.7829e-10,\n 4.2935e-10],\n [5.0793e-11, 5.1140e-11, 3.8649e-11, ..., 3.7740e-10, 4.3268e-11,\n 5.5937e-11],\n ...,\n [8.0779e-11, 5.4614e-10, 1.8421e-10, ..., 5.3503e-11, 4.4454e-11,\n 4.3336e-11],\n [1.9839e-10, 8.9571e-10, 2.3831e-10, ..., 4.3276e-10, 1.1747e-10,\n 1.3308e-10],\n [1.4325e-10, 2.6212e-10, 1.0821e-10, ..., 6.0953e-10, 1.0689e-10,\n 1.5073e-10]], device='cuda:0')" | |
| }, | |
| "7": { | |
| "step": "tensor(15016.)", | |
| "exp_avg": "tensor([ 2.6010e-06, -4.2203e-05, -8.0148e-05, ..., 2.8197e-05,\n 6.7093e-06, -2.8023e-05], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.9859e-07, 3.3634e-07, 8.6075e-08, ..., 5.9119e-08, 1.8655e-07,\n 2.1565e-07], device='cuda:0')" | |
| }, | |
| "8": { | |
| "step": "tensor(15016.)", | |
| "exp_avg": "tensor([[-1.1029e-07, 3.9164e-07, 3.8312e-08, ..., -8.7341e-08,\n 1.4338e-07, -1.0622e-06],\n [-2.2516e-08, 4.1503e-07, 3.8693e-07, ..., -1.9794e-07,\n -4.4087e-07, -6.4369e-07],\n [-1.6957e-07, 7.8163e-08, 9.4868e-07, ..., 8.6282e-08,\n -6.6079e-08, -1.1127e-06],\n ...,\n [ 1.0762e-07, 1.0932e-06, -3.9344e-07, ..., 1.7869e-08,\n 6.4267e-08, -4.2874e-07],\n [-7.9043e-07, -2.1801e-07, -8.8986e-08, ..., 1.4928e-07,\n 4.1134e-07, 1.0387e-06],\n [-5.0556e-08, 7.2894e-07, -1.3578e-07, ..., -5.1933e-07,\n -2.5557e-07, -6.4283e-08]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[3.4455e-12, 2.6312e-12, 2.9703e-12, ..., 2.6208e-12, 4.0244e-12,\n 1.1289e-11],\n [6.4985e-12, 3.2859e-12, 3.2184e-12, ..., 4.8477e-12, 7.1465e-12,\n 1.1394e-11],\n [1.1542e-11, 5.2242e-12, 6.1049e-12, ..., 4.7915e-12, 1.0432e-11,\n 1.3460e-11],\n ...,\n [1.1971e-11, 4.8625e-12, 4.1274e-12, ..., 7.3374e-12, 1.1599e-11,\n 1.4106e-11],\n [7.3280e-12, 4.3459e-12, 4.1509e-12, ..., 6.5644e-12, 1.1402e-11,\n 1.5646e-11],\n [6.8605e-12, 5.1162e-12, 3.8124e-12, ..., 4.5774e-12, 1.0219e-11,\n 1.7690e-11]], device='cuda:0')" | |
| }, | |
| "9": { | |
| "step": "tensor(11262.)", | |
| "exp_avg": "tensor([[ 1.5931e-06, -2.4084e-06, -7.1674e-07, ..., 2.0872e-07,\n 6.5121e-07, 1.4585e-06],\n [ 7.6111e-06, -5.2483e-06, 4.8074e-06, ..., 2.2638e-06,\n -1.6397e-06, -1.5331e-06],\n [-2.0538e-06, 4.1970e-06, -1.0324e-06, ..., -2.5775e-06,\n -5.3749e-06, -3.9641e-07],\n ...,\n [-5.6855e-06, -2.0225e-05, -7.8764e-07, ..., 5.1956e-07,\n -3.1044e-06, -6.4914e-06],\n [-1.6226e-06, -3.1049e-06, 7.7294e-06, ..., 5.7325e-06,\n 6.7029e-06, -3.9664e-06],\n [ 4.2792e-12, 5.3706e-12, 6.3832e-12, ..., -9.0619e-13,\n 3.9004e-12, 5.4147e-13]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[2.3465e-10, 1.3334e-09, 5.0540e-10, ..., 1.5254e-10, 6.7733e-11,\n 7.5522e-11],\n [2.5283e-09, 2.3851e-09, 6.7921e-10, ..., 4.7407e-10, 5.7040e-10,\n 5.0531e-10],\n [1.7930e-10, 1.9766e-10, 6.7182e-11, ..., 2.2307e-10, 7.2038e-11,\n 6.7174e-11],\n ...,\n [1.1405e-10, 3.4602e-10, 1.4476e-10, ..., 3.2397e-10, 9.3825e-11,\n 5.3209e-11],\n [5.7687e-11, 3.9183e-10, 3.4268e-10, ..., 9.6829e-11, 1.2924e-10,\n 9.3670e-11],\n [5.5762e-15, 1.7475e-14, 8.4052e-15, ..., 2.5379e-14, 3.0142e-15,\n 3.0846e-15]], device='cuda:0')" | |
| }, | |
| "10": { | |
| "step": "tensor(11262.)", | |
| "exp_avg": "tensor([ 4.5440e-05, 4.2635e-05, -6.6603e-05, ..., -2.1868e-04,\n -1.2481e-04, -1.2286e-10], device='cuda:0')", | |
| "exp_avg_sq": "tensor([1.1416e-07, 7.1433e-07, 1.1597e-07, ..., 1.2130e-07, 8.0618e-08,\n 6.8392e-12], device='cuda:0')" | |
| }, | |
| "11": { | |
| "step": "tensor(11262.)", | |
| "exp_avg": "tensor([[ 6.5045e-08, 5.0119e-07, -5.5808e-08, ..., -3.3542e-07,\n 1.4987e-07, -1.7017e-13],\n [-6.9171e-07, -1.2392e-06, -1.2418e-07, ..., 5.8712e-07,\n -3.0513e-07, 3.0010e-13],\n [ 2.4994e-07, -5.8445e-07, 3.0398e-07, ..., -3.0869e-07,\n -4.8065e-07, -1.4186e-13],\n ...,\n [ 5.9527e-08, 6.4861e-07, -2.4925e-08, ..., 2.5305e-07,\n -4.1683e-07, -1.7462e-13],\n [ 4.4098e-07, 6.0046e-07, -4.1722e-07, ..., 1.9197e-07,\n -1.9090e-07, -3.6515e-13],\n [ 4.1966e-07, -1.7437e-06, 5.6899e-08, ..., 2.5689e-08,\n 2.0374e-07, -4.6566e-13]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.3168e-12, 1.0102e-11, 2.0154e-12, ..., 1.3671e-12, 1.7616e-12,\n 2.5104e-15],\n [3.4247e-12, 2.0407e-11, 1.9454e-12, ..., 3.7238e-12, 1.3617e-12,\n 3.3653e-15],\n [3.7968e-12, 1.7255e-11, 3.4185e-12, ..., 1.9073e-12, 1.1868e-12,\n 3.6974e-15],\n ...,\n [2.4582e-12, 2.0923e-11, 2.2639e-12, ..., 2.3769e-12, 7.4596e-12,\n 6.0388e-15],\n [3.9691e-12, 1.5325e-11, 8.3536e-12, ..., 2.9142e-12, 7.3388e-12,\n 5.1552e-15],\n [2.8154e-12, 2.9106e-11, 4.3475e-12, ..., 2.0745e-12, 4.0066e-12,\n 7.8089e-15]], device='cuda:0')" | |
| }, | |
| "12": { | |
| "step": "tensor(7508.)", | |
| "exp_avg": "tensor([[-4.6442e-10, -1.3258e-08, -5.0809e-09, ..., -9.6916e-09,\n -3.8558e-09, -6.5775e-09],\n [ 1.3938e-06, 1.1436e-06, -9.2802e-07, ..., 1.4986e-06,\n 2.9954e-07, -3.2979e-07],\n [ 1.0684e-05, 1.4580e-05, -4.5091e-06, ..., 4.8941e-06,\n 2.4724e-06, 7.4807e-06],\n ...,\n [-2.4408e-05, 1.1365e-05, -1.9250e-07, ..., 1.5690e-07,\n -4.1343e-06, -4.0017e-06],\n [-2.3652e-06, 3.2302e-06, -7.6947e-07, ..., -5.6160e-07,\n 4.2710e-06, -1.0157e-06],\n [ 1.8202e-07, 1.2720e-05, 3.6747e-06, ..., 6.6209e-06,\n -1.8927e-07, -1.0631e-07]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[4.7213e-13, 4.2397e-13, 2.5026e-13, ..., 6.2300e-13, 3.4222e-13,\n 2.6248e-13],\n [7.2100e-11, 1.4272e-10, 2.8322e-11, ..., 1.8754e-10, 2.5290e-11,\n 2.3643e-11],\n [7.5008e-09, 3.4294e-09, 1.6687e-09, ..., 2.2735e-09, 1.0473e-09,\n 1.4504e-09],\n ...,\n [3.0743e-09, 1.8166e-09, 2.4643e-10, ..., 4.3031e-10, 4.4230e-10,\n 2.6186e-10],\n [4.0503e-10, 1.5815e-09, 5.0458e-10, ..., 1.6884e-10, 1.2383e-10,\n 1.1049e-10],\n [3.9690e-10, 1.0272e-09, 4.1019e-10, ..., 1.9862e-10, 1.4071e-10,\n 1.5221e-10]], device='cuda:0')" | |
| }, | |
| "13": { | |
| "step": "tensor(7508.)", | |
| "exp_avg": "tensor([-3.0234e-07, 3.3832e-05, 1.2223e-04, ..., -2.2206e-04,\n -6.7167e-05, -8.6418e-05], device='cuda:0')", | |
| "exp_avg_sq": "tensor([4.5545e-10, 6.2291e-08, 1.5553e-06, ..., 5.4690e-07, 1.5529e-07,\n 2.1756e-07], device='cuda:0')" | |
| }, | |
| "14": { | |
| "step": "tensor(7508.)", | |
| "exp_avg": "tensor([[ 9.7029e-10, 2.5063e-08, 9.5710e-07, ..., 4.2250e-07,\n -1.4229e-07, -1.3046e-07],\n [-2.2946e-09, -8.7224e-09, 3.1436e-07, ..., 3.0158e-07,\n -1.5663e-07, -5.5361e-07],\n [ 6.4916e-10, -1.2457e-08, 1.8484e-07, ..., 7.0871e-08,\n -1.1740e-08, -4.4540e-07],\n ...,\n [-7.6472e-11, 1.9498e-07, 9.9544e-07, ..., 9.7222e-07,\n -2.1919e-07, 1.1285e-07],\n [ 1.3952e-09, -1.0700e-07, 1.0839e-06, ..., -6.7729e-07,\n -3.0913e-08, -8.9876e-07],\n [ 2.2422e-09, 7.4842e-08, -9.2544e-08, ..., -1.5864e-08,\n 1.5720e-07, 1.1612e-06]], device='cuda:0')", | |
| "exp_avg_sq": "tensor([[1.8309e-14, 1.9814e-12, 5.0981e-12, ..., 6.5333e-12, 1.9289e-12,\n 3.6275e-12],\n [2.8337e-14, 2.5060e-12, 1.7889e-11, ..., 5.9021e-12, 4.0405e-12,\n 9.8750e-12],\n [6.2022e-14, 2.9356e-12, 1.9607e-11, ..., 6.4311e-12, 3.0571e-12,\n 9.6555e-12],\n ...,\n [8.5995e-14, 1.7454e-12, 1.7597e-11, ..., 9.3300e-12, 3.0233e-12,\n 9.7887e-12],\n [3.4675e-14, 2.3668e-12, 3.6334e-11, ..., 1.6075e-11, 3.3758e-12,\n 7.5260e-12],\n [5.6179e-14, 1.1251e-12, 2.7939e-11, ..., 3.2958e-12, 4.4396e-12,\n 1.9826e-11]], device='cuda:0')" | |
| } | |
| }, | |
| "param_groups": [ | |
| { | |
| "lr": 0.0034555695366224513, | |
| "name": "scale_256", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.01, | |
| "params": [ | |
| 0, | |
| 1, | |
| 2 | |
| ] | |
| }, | |
| { | |
| "lr": 0.0034555695366224513, | |
| "name": "scale_512", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.01, | |
| "params": [ | |
| 3, | |
| 4, | |
| 5 | |
| ] | |
| }, | |
| { | |
| "lr": 0.0034555695366224513, | |
| "name": "scale_768", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.01, | |
| "params": [ | |
| 6, | |
| 7, | |
| 8 | |
| ] | |
| }, | |
| { | |
| "lr": 0.0034555695366224513, | |
| "name": "scale_1024", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.01, | |
| "params": [ | |
| 9, | |
| 10, | |
| 11 | |
| ] | |
| }, | |
| { | |
| "lr": 0.0034555695366224513, | |
| "name": "scale_1280", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.01, | |
| "params": [ | |
| 12, | |
| 13, | |
| 14 | |
| ] | |
| }, | |
| { | |
| "lr": 0.001728112022559819, | |
| "name": "fusion", | |
| "betas": [ | |
| 0.9, | |
| 0.999 | |
| ], | |
| "eps": 1e-08, | |
| "weight_decay": 1e-05, | |
| "amsgrad": false, | |
| "maximize": false, | |
| "foreach": null, | |
| "capturable": false, | |
| "differentiable": false, | |
| "fused": null, | |
| "decoupled_weight_decay": true, | |
| "initial_lr": 0.005, | |
| "params": [ | |
| 15, | |
| 16, | |
| 17, | |
| 18, | |
| 19, | |
| 20, | |
| 21, | |
| 22, | |
| 23, | |
| 24, | |
| 25, | |
| 26, | |
| 27, | |
| 28, | |
| 29, | |
| 30, | |
| 31, | |
| 32, | |
| 33, | |
| 34, | |
| 35, | |
| 36, | |
| 37, | |
| 38, | |
| 39, | |
| 40, | |
| 41, | |
| 42, | |
| 43, | |
| 44, | |
| 45, | |
| 46, | |
| 47, | |
| 48 | |
| ] | |
| } | |
| ] | |
| }, | |
| "scheduler_state_dict": { | |
| "T_0": 10, | |
| "T_i": 10, | |
| "T_mult": 2, | |
| "eta_min": 1e-06, | |
| "T_cur": 6, | |
| "base_lrs": [ | |
| 0.01, | |
| 0.01, | |
| 0.01, | |
| 0.01, | |
| 0.01, | |
| 0.005 | |
| ], | |
| "last_epoch": 6, | |
| "_step_count": 0, | |
| "_is_initial": false, | |
| "_get_lr_called_within_step": false, | |
| "_last_lr": [ | |
| 0.0034555695366224513, | |
| 0.0034555695366224513, | |
| 0.0034555695366224513, | |
| 0.0034555695366224513, | |
| 0.0034555695366224513, | |
| 0.001728112022559819 | |
| ] | |
| }, | |
| "metrics": { | |
| "best_val_acc": 66.138, | |
| "best_epoch": 5, | |
| "scale_accuracies": { | |
| "256": 66.138, | |
| "512": 72.07333333333334, | |
| "768": 73.588, | |
| "1024": 73.954, | |
| "1280": 73.66933333333333 | |
| }, | |
| "training_history": { | |
| "epochs": [ | |
| 1, | |
| 2, | |
| 3, | |
| 4, | |
| 5, | |
| 6 | |
| ], | |
| "train_loss": [ | |
| 2.9751985085156605, | |
| 2.963648736889717, | |
| 3.1708614105992834, | |
| 3.378361599522826, | |
| 3.580668337115215, | |
| 3.3858161155505577 | |
| ], | |
| "train_acc": [ | |
| 56.42811072509152, | |
| 61.669816138983705, | |
| 62.79436378447671, | |
| 63.576593319476174, | |
| 64.19873443508926, | |
| 64.70800970261227 | |
| ], | |
| "val_acc": [ | |
| 62.524, | |
| 64.036, | |
| 64.86466666666666, | |
| 65.18733333333333, | |
| 65.698, | |
| 66.138 | |
| ], | |
| "scale_accs": { | |
| "256": [ | |
| 62.524, | |
| 64.036, | |
| 64.86466666666666, | |
| 65.18733333333333, | |
| 65.698, | |
| 66.138 | |
| ], | |
| "512": [ | |
| 69.152, | |
| 70.62733333333334, | |
| 71.12733333333334, | |
| 71.67266666666667, | |
| 72.07333333333334 | |
| ], | |
| "768": [ | |
| 70.71, | |
| 72.20066666666666, | |
| 73.10133333333333, | |
| 73.588 | |
| ], | |
| "1024": [ | |
| 71.38866666666667, | |
| 73.18, | |
| 73.954 | |
| ], | |
| "1280": [ | |
| 72.18466666666667, | |
| 73.66933333333333 | |
| ] | |
| }, | |
| "lr": [ | |
| 0.00975530705321762, | |
| 0.00904518046337755, | |
| 0.00793913236883622, | |
| 0.00654543046337755, | |
| 0.005000500000000001, | |
| 0.0034555695366224513 | |
| ] | |
| } | |
| }, | |
| "train_config": { | |
| "name": "david_training", | |
| "run_id": "20251012_221046", | |
| "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", | |
| "model_variant": [ | |
| "clip_vit_b16", | |
| "clip_vit_laion_b32", | |
| "clip_vit_b32" | |
| ], | |
| "num_classes": 1000, | |
| "preset": "high_accuracy", | |
| "custom_config_path": null, | |
| "num_classes_override": null, | |
| "use_belly_override": null, | |
| "belly_expand_override": null, | |
| "progressive_training_override": true, | |
| "scale_warmup_epochs_override": { | |
| "256": 0, | |
| "512": 1, | |
| "768": 2, | |
| "1024": 3, | |
| "1280": 4 | |
| }, | |
| "num_epochs": 10, | |
| "batch_size": 1024, | |
| "learning_rate": 0.01, | |
| "weight_decay": 1e-05, | |
| "warmup_epochs": 3, | |
| "use_rose_loss": true, | |
| "rose_initial_weight": 0.2, | |
| "rose_max_weight": 0.8, | |
| "rose_weight_schedule": "adaptive", | |
| "use_cayley_loss": false, | |
| "cayley_weight": 0.01, | |
| "scale_loss_balance": null, | |
| "use_mixed_precision": false, | |
| "gradient_clip": 10.0, | |
| "scheduler_type": "cosine_restarts", | |
| "min_lr": 1e-06, | |
| "freeze_strategy": "never", | |
| "freeze_threshold": 90.0, | |
| "unfreeze_on_plateau": true, | |
| "patience": 10, | |
| "track_gradients": true, | |
| "gradient_scale_threshold": 1e-05, | |
| "gradient_scale_multiplier": 10.0, | |
| "log_interval": 50, | |
| "val_interval": 1, | |
| "save_interval": 5, | |
| "log_fusion_weights": true, | |
| "log_loss_components": true, | |
| "save_format": "safetensors", | |
| "hf_repo": "AbstractPhil/david-shared-space", | |
| "upload_to_hub": true, | |
| "base_dir": "./david_training", | |
| "num_workers": 10, | |
| "pin_memory": true, | |
| "prefetch_factor": 4, | |
| "persistent_workers": true | |
| } | |
| } |