diff --git "a/weights/David-partial_shared-hierarchical_tree/20251012_152245/best_model_acc75.53_metadata.json" "b/weights/David-partial_shared-hierarchical_tree/20251012_152245/best_model_acc75.53_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-partial_shared-hierarchical_tree/20251012_152245/best_model_acc75.53_metadata.json" @@ -0,0 +1,709 @@ +{ + "epoch": 8, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-6.5022e-04, 1.5770e-03, -6.6278e-04, ..., 1.8166e-04,\n -5.2384e-04, 2.7031e-05],\n [ 2.6736e-05, -1.5627e-03, -8.1470e-04, ..., 2.4641e-05,\n -6.5245e-04, 5.2435e-04],\n [ 7.3952e-04, -1.5534e-03, 7.9026e-04, ..., 4.1978e-04,\n 3.4472e-04, -1.4625e-04],\n ...,\n [ 5.7059e-04, 3.1276e-04, -1.2115e-03, ..., 9.1688e-04,\n -1.0436e-04, -8.4940e-05],\n [ 1.8192e-04, 1.2010e-06, 1.8779e-04, ..., 7.6435e-04,\n 1.8329e-04, -3.0034e-04],\n [ 4.7844e-04, -2.0743e-04, -4.9450e-05, ..., 2.6786e-04,\n -3.9698e-04, -2.7044e-04]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.8777e-06, 9.1921e-06, 6.5507e-06, ..., 1.2831e-06, 8.5892e-07,\n 1.7820e-06],\n [2.2231e-06, 1.3433e-05, 5.8741e-06, ..., 1.5836e-06, 1.0363e-06,\n 1.4760e-06],\n [2.4559e-06, 1.3000e-05, 6.5204e-06, ..., 1.9452e-06, 1.0022e-06,\n 2.7748e-06],\n ...,\n [1.6190e-06, 9.5355e-06, 6.2329e-06, ..., 1.1496e-06, 7.3367e-07,\n 1.1174e-06],\n [1.8958e-06, 8.8206e-06, 6.7114e-06, ..., 1.2877e-06, 8.6049e-07,\n 1.8013e-06],\n [1.4573e-06, 1.1769e-05, 4.3175e-06, ..., 1.0761e-06, 7.2599e-07,\n 1.2416e-06]], device='cuda:0')" + }, + "1": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-4.3592e-03, -5.3801e-03, 1.4889e-02, 2.1632e-02, 4.1060e-03,\n -1.1735e-02, -1.0143e-02, -5.3906e-03, -1.0617e-02, 1.5713e-02,\n -6.4685e-03, 3.7511e-03, 3.4829e-03, -6.0199e-03, 1.9351e-03,\n -1.3678e-02, -1.9010e-02, 1.1044e-02, 1.0258e-02, -1.0289e-02,\n 1.1184e-02, 8.6132e-03, 1.9273e-02, 6.9247e-03, -1.2673e-02,\n 9.1492e-03, 5.7123e-03, 7.0772e-03, 1.5234e-02, 8.7379e-03,\n 1.8594e-02, -1.7516e-03, 4.7484e-03, -1.0071e-02, -1.6076e-02,\n 1.1168e-02, 1.5967e-02, -1.4339e-02, -6.1114e-04, 4.1946e-03,\n 2.8028e-02, 1.6800e-02, -1.1671e-02, 1.5977e-03, -7.5554e-03,\n -6.8839e-04, -1.8699e-02, -9.2160e-03, 4.4636e-03, 1.2485e-02,\n 4.3592e-03, 2.5182e-02, -1.2192e-02, -1.4586e-02, -4.6505e-03,\n -1.4229e-02, -1.0716e-03, 6.1683e-03, -2.1337e-02, 2.9147e-02,\n 5.1907e-03, 1.1912e-02, 1.0126e-02, -5.1108e-03, 1.2264e-02,\n -7.5746e-03, 8.3732e-03, 3.3896e-03, 4.9018e-03, 5.7941e-03,\n 7.6484e-04, -1.6760e-02, -1.7454e-03, 2.7041e-02, -1.6281e-02,\n -1.0438e-02, 1.4248e-03, 3.7283e-04, -5.6799e-03, -7.9266e-04,\n 2.0528e-02, -1.9490e-02, -1.1539e-02, 9.0675e-04, -2.2614e-02,\n -3.9200e-03, 6.6476e-03, 1.6505e-02, -1.4256e-02, -2.6109e-02,\n 1.9933e-02, 8.6720e-03, 3.8639e-03, -5.0561e-03, 9.2281e-03,\n 1.3197e-02, -6.5153e-03, -5.9098e-03, 1.6726e-02, 6.0603e-03,\n -2.1762e-03, -2.1414e-02, 7.3471e-03, 7.5025e-03, -1.5682e-02,\n -1.4318e-03, -2.8364e-02, 2.3019e-03, 1.6346e-02, -1.1115e-02,\n -2.7497e-03, -9.8920e-03, 2.1763e-03, -6.1281e-03, -1.0096e-02,\n 1.3357e-03, 9.0309e-03, -1.1373e-02, -1.5122e-02, -1.1335e-02,\n 1.9269e-03, -5.4961e-03, 1.2799e-02, -2.0952e-02, -4.4260e-03,\n -7.5915e-04, 2.2291e-02, 1.5590e-03, -6.0917e-03, 9.0067e-03,\n -6.0244e-03, 9.0083e-03, 3.9676e-02, 1.7145e-02, 6.3034e-03,\n -1.6166e-02, 1.5751e-02, 1.1998e-02, -7.8836e-03, -3.2655e-04,\n -1.0761e-02, 5.2409e-03, -1.2873e-02, 2.5515e-02, -2.7039e-05,\n -1.0468e-02, -8.3229e-03, 2.7140e-03, 2.7426e-03, 1.7849e-02,\n -2.3055e-03, -1.4689e-02, 7.3948e-03, 5.1254e-03, 2.3870e-02,\n 9.1977e-03, 1.0650e-03, 1.2026e-02, 2.1926e-03, -1.2614e-02,\n -4.4636e-03, 2.0025e-03, 8.0756e-03, -5.6458e-03, -4.7389e-03,\n -5.3021e-03, 3.5814e-03, 7.6325e-03, 1.7047e-02, -1.2360e-02,\n 8.5214e-03, 1.6946e-02, -9.7063e-03, -1.0247e-02, -2.7226e-03,\n -1.3828e-03, 2.4277e-02, -1.6396e-02, -9.0038e-03, 8.5606e-03,\n -9.7080e-03, 9.6226e-03, 2.7896e-03, 1.5534e-02, 6.6527e-03,\n 6.6346e-03, 1.7183e-02, 1.0876e-02, -1.8700e-02, 1.6844e-03,\n -1.7965e-02, -1.7074e-02, -9.6346e-03, 7.6345e-03, -1.2419e-02,\n 1.4804e-03, -1.2494e-02, -2.6413e-03, -1.0301e-02, 2.5054e-02,\n -2.6509e-02, 9.5009e-04, -7.4656e-03, 4.7494e-03, -9.5146e-03,\n -1.0523e-02, -1.8467e-02, 1.0792e-02, 2.7983e-02, 8.3563e-04,\n 1.0290e-02, 1.2387e-02, -7.0519e-03, -6.1180e-03, -1.0480e-02,\n 1.3646e-02, -3.6155e-03, 1.6778e-03, 3.4469e-03, 1.9927e-02,\n -2.0970e-02, 2.9173e-02, -8.9273e-04, -1.5829e-03, 1.4491e-03,\n 7.7528e-03, -2.1108e-02, 2.6747e-03, -1.9581e-02, 2.3121e-02,\n -1.9694e-02, 1.6816e-03, 1.3583e-03, 1.8792e-03, -1.8423e-03,\n -2.3580e-04, -9.7434e-03, -2.4821e-03, 7.3445e-03, 3.1074e-03,\n -1.5002e-03, 2.8866e-02, 8.2805e-03, -1.5145e-02, 8.2824e-03,\n -2.6784e-02, -9.8827e-03, -7.7812e-04, 1.1707e-02, -9.0670e-03,\n -1.1793e-02, -1.6296e-03, 3.9290e-03, -4.0545e-03, 1.4402e-02,\n 4.1092e-03, -3.5177e-03, 2.6588e-03, 1.4170e-02, -8.4696e-04,\n -4.8143e-03, 1.2946e-02, 5.0008e-03, -1.9160e-02, -1.3561e-03,\n 2.8428e-03, 1.2875e-03, 4.6393e-04, 1.5586e-02, 5.3635e-03,\n -1.9804e-02, -1.6819e-02, 1.4976e-02, -1.2569e-02, 5.3349e-02,\n -5.8659e-03, 1.5279e-02, -4.4205e-03, 1.2830e-02, -1.4449e-02,\n -8.1484e-03, 5.0838e-03, -1.6105e-02, 4.0415e-03, 6.3168e-04,\n -1.6240e-02, 2.7188e-03, 1.2472e-03, 2.7239e-03, 3.5246e-02,\n -2.6394e-02, 4.5910e-03, 3.0049e-02, -1.6355e-02, -8.8518e-03,\n 1.6057e-02, 1.3980e-02, 1.5912e-02, 1.5714e-02, -6.1788e-03,\n -8.2276e-04, 2.4597e-03, 2.4931e-02, 1.2587e-02, 1.3842e-02,\n -1.3116e-02, 4.1302e-03, -1.5819e-02, -1.6758e-03, 1.1132e-02,\n -1.5648e-02, 2.6937e-02, 9.1305e-04, -5.0103e-04, 1.1335e-02,\n 5.0775e-03, 4.6712e-03, -1.3064e-02, 3.0731e-03, -2.8344e-03,\n -1.2843e-02, -2.4740e-03, -1.6332e-03, -1.1356e-02, -1.3914e-02,\n -1.1401e-02, 2.0188e-02, -2.5700e-05, 3.7524e-03, 1.9361e-02,\n -8.8435e-04, -3.6011e-03, 1.4842e-02, -3.0346e-02, -1.8485e-02,\n 1.4952e-02, -6.2640e-03, -2.5006e-02, 8.1409e-03, 2.2747e-02,\n 2.2352e-02, 2.0713e-02, 2.7951e-03, 9.5137e-03, -1.1879e-02,\n 8.3737e-03, -1.9201e-02, 1.4976e-02, -2.6143e-03, 7.6015e-03,\n 2.7374e-04, 1.7187e-02, 1.4262e-02, -1.2286e-02, 1.5596e-02,\n 3.3449e-03, -2.4250e-03, -4.0864e-03, 1.5001e-02, 9.7756e-03,\n 1.4120e-02, 2.7356e-02, 1.4689e-02, -5.0592e-03, 1.2632e-02,\n -1.1720e-02, -8.1125e-04, -8.5967e-03, 6.1948e-03, 1.0181e-02,\n 1.5668e-03, 5.4731e-03, 1.5359e-02, 9.7955e-03, 9.3578e-03,\n -1.5793e-02, 2.0113e-02, 2.1433e-02, 3.7431e-03, 1.9637e-02,\n 5.7685e-03, -6.5597e-03, 9.3881e-03, 2.3836e-02, 9.5273e-03,\n -8.3942e-03, 4.9474e-03, -7.6755e-03, 1.0480e-02, 2.1079e-02,\n -1.7088e-03, 2.1848e-02, -9.6280e-03, 7.6798e-03, -7.9147e-03,\n -2.8866e-02, 4.9815e-04, -2.3001e-02, -1.0786e-02, 2.3383e-04,\n -1.7084e-02, -2.6004e-03, 2.5785e-02, 3.5444e-03, -1.6046e-02,\n 2.0822e-03, -6.4960e-05, -2.9657e-02, 2.9094e-03, -1.6106e-02,\n 1.1395e-02, 1.1536e-02, -6.0664e-03, 2.1758e-02, -2.2247e-02,\n -1.2665e-03, 1.6067e-02, 7.1853e-03, 1.6000e-02, 1.5868e-02,\n 3.8757e-03, -9.0095e-03, 1.3272e-02, 2.4061e-03, 8.2671e-03,\n 9.3412e-03, 1.6600e-02, 3.7712e-03, 6.7861e-04, -5.5761e-03,\n -5.5806e-03, -1.2823e-03, 2.2783e-03, -4.8825e-03, -8.8647e-04,\n -4.8080e-03, 8.7815e-03, -1.5967e-02, 1.6466e-03, 1.9031e-02,\n 1.4925e-02, -8.7248e-03, -1.7602e-03, -1.2331e-02, 5.1539e-03,\n -9.3225e-03, 1.0620e-02, -1.8044e-04, 2.4743e-02, -9.8623e-03,\n -1.4465e-02, -2.1303e-03, -3.3406e-03, 1.4935e-02, 1.9352e-03,\n -8.2080e-03, -1.5001e-02, 1.2357e-02, 1.3935e-04, -1.2182e-02,\n 1.8158e-02, 3.0593e-03, 5.4270e-03, 1.6193e-03, -9.0357e-03,\n 1.0114e-03, 6.6873e-03, 2.3458e-02, -6.4276e-03, 2.7784e-02,\n 5.4509e-03, -4.0432e-03, 2.2825e-02, 8.8621e-03, 2.2748e-03,\n 9.8701e-03, 6.8564e-03, -6.9153e-03, -9.0019e-03, 1.4696e-02,\n -5.1600e-03, 6.1489e-03, 2.8824e-03, -1.0082e-02, -1.4136e-03,\n -1.1792e-02, -1.1657e-02, -2.8680e-03, 1.9453e-03, 3.6806e-03,\n -2.9098e-03, -2.9216e-02, 5.1973e-03, -3.5327e-03, -9.6941e-03,\n -2.7223e-04, -1.0915e-02, 1.6123e-03, 1.9267e-02, -1.0112e-02,\n 7.4540e-03, -2.1814e-03, -7.2308e-03, 1.2082e-02, -6.4422e-03,\n -5.6238e-03, -5.2892e-03, -7.1812e-03, -8.5948e-03, 1.4268e-03,\n 1.5008e-03, 1.0757e-02, -1.1057e-02, -1.7623e-02, 1.1345e-02,\n 9.0890e-03, -1.9241e-02, 3.5201e-03, 1.0195e-02, 1.2793e-02,\n 4.5948e-03, 2.3974e-02, -8.6667e-03, 8.5432e-04, -1.1106e-02,\n -1.7670e-02, 4.8602e-03, -4.5581e-03, 1.1588e-02, -2.8848e-04,\n -2.5098e-02, 5.0356e-03, 6.6157e-04, 1.7468e-02, -6.2331e-03,\n 9.0049e-03, -6.3527e-03, -6.5131e-03, -4.1682e-03, -1.1283e-02,\n -1.1496e-02, -8.6733e-03, 1.4731e-02, 9.9925e-03, 1.7593e-02,\n 3.0415e-04, 2.3770e-02, -1.6041e-03, 1.0215e-02, -1.6781e-03,\n -9.0451e-03, 1.3207e-03, -9.1759e-04, -3.4311e-03, 3.9722e-03,\n 2.9096e-02, 9.8304e-03, 4.6787e-03, -2.1090e-02, 1.3799e-02,\n -4.5428e-03, 1.4615e-02, -2.7157e-02, 2.8970e-04, -1.6929e-02,\n -4.5644e-03, 3.5357e-03, -1.0875e-02, 6.6855e-03, -8.2937e-03,\n 9.9056e-03, 2.1008e-02, 3.1089e-02, -2.5563e-02, -2.2594e-03,\n -3.1778e-03, -5.4487e-03, -8.6661e-03, 1.6916e-02, -2.6894e-03,\n -8.1441e-03, 1.6024e-02, 5.7729e-03, -1.8283e-02, 1.0641e-02,\n -6.0487e-03, 1.1601e-02, 1.7287e-03, -2.1074e-02, -3.4586e-03,\n -1.7082e-02, -1.3805e-02, 1.0777e-02, -4.8698e-03, 2.9387e-03,\n 1.0241e-02, 8.3849e-03, 4.4116e-03, 3.6327e-04, -5.4299e-03,\n -2.9430e-03, 6.6090e-03, -2.4220e-02, 5.0538e-03, 5.5911e-04,\n -1.6148e-03, 8.0988e-03, 1.6283e-02, 1.4195e-02, -3.3030e-03,\n 4.2190e-04, -4.2238e-03, -6.6059e-04, -2.9477e-02, -8.0494e-03,\n 2.6974e-03, 2.3479e-02, -2.2995e-02, -4.0042e-03, 1.5353e-02,\n -2.3541e-02, -1.7143e-03, -4.3762e-03, -7.7281e-03, -6.6908e-03,\n -2.0823e-02, 8.3135e-03, -4.4927e-03, 5.3139e-03, -1.4937e-02,\n 1.8800e-02, -2.7363e-03, 6.5992e-03, 1.0823e-02, 7.5192e-03,\n -1.4705e-02, 1.0451e-02, 1.2010e-03, 6.7718e-03, -5.8320e-03,\n -1.8903e-03, -2.4732e-02, 1.0028e-03, -4.5135e-03, -5.6339e-03,\n -9.7450e-03, -1.8885e-02, 2.8112e-03, -4.5751e-04, -2.8403e-02,\n -9.2819e-03, 7.6467e-03, 1.6082e-02, -2.7016e-03, -2.9185e-02,\n -2.8916e-03, 4.5846e-03, -5.6909e-03, 2.3304e-02, 1.8279e-02,\n 1.1208e-02, -5.2695e-03, -1.3584e-02, 1.3582e-02, 1.6721e-02,\n -1.3943e-02, -1.4794e-02, 8.1587e-03, -2.1878e-02, -1.2539e-02,\n -1.3529e-02, 7.3346e-04, 8.6640e-03, 9.9988e-03, -1.2381e-02,\n 1.4123e-02, 5.7351e-03, 1.3424e-02, 1.3136e-02, -1.1994e-02,\n -3.0652e-02, 4.1469e-03, -1.6106e-02, 9.4791e-03, -7.5972e-03,\n 8.4202e-03, -4.0334e-03, -1.4945e-03, -8.5470e-03, 2.0880e-03,\n -1.3676e-02, 7.1067e-03, 2.4966e-02, -3.3294e-04, -1.2086e-02,\n 5.1541e-03, 7.0220e-03, 4.7722e-03, -2.6082e-03, -4.0222e-03,\n -4.7368e-03, 1.5793e-02, -1.3549e-03, 1.7062e-02, -1.6906e-03,\n -1.5669e-03, -8.2518e-03, -2.5416e-03, 2.4360e-02, 6.2872e-03,\n 3.4575e-03, 5.7547e-03, -1.3466e-02, 5.7107e-03, -3.2905e-03,\n -2.4390e-03, -5.1571e-03, 1.7928e-02, -1.1556e-02, 2.9519e-02,\n 3.4166e-03, 6.4680e-03, -6.7423e-04, -2.9247e-02, -8.2965e-03,\n 4.8881e-05, 1.1759e-02, 2.3962e-03, 5.5069e-03, -8.2756e-03,\n -1.5360e-02, 1.2189e-02, 1.1571e-03, -8.1421e-04, 1.5000e-02,\n -1.2094e-02, -2.6234e-03, -9.9061e-03, 9.9498e-03, 1.3462e-02,\n 7.4980e-03, -2.0025e-03, 1.9508e-02, 2.3164e-02, 2.1481e-03,\n 3.0694e-03, -1.6736e-04, -1.7205e-02, 3.8269e-02, -1.0100e-03,\n -1.1926e-02, -7.2411e-03, 1.1266e-02, 2.6120e-03, -5.7277e-03,\n 8.0739e-03, 4.0702e-03, -1.3978e-02, -1.8821e-02, 1.6853e-02,\n -4.4076e-03, 1.6080e-02, 8.9545e-03, 9.7640e-03, 2.5534e-03,\n -3.9381e-03, -4.6300e-03, -1.0879e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.3396e-03, 1.6978e-03, 1.9072e-03, 1.9401e-03, 1.6503e-03, 2.2629e-03,\n 1.2953e-03, 1.7930e-03, 1.1179e-03, 1.3991e-03, 1.6937e-03, 1.1168e-03,\n 1.7743e-03, 2.1313e-03, 1.1151e-03, 1.3491e-03, 1.6162e-03, 9.7172e-04,\n 1.5945e-03, 7.2781e-04, 1.0283e-03, 1.2964e-03, 1.7457e-03, 1.3290e-03,\n 1.1275e-03, 1.0125e-03, 1.1824e-03, 1.5685e-03, 1.1593e-03, 1.1269e-03,\n 1.2370e-03, 1.6596e-04, 1.4507e-03, 1.5725e-03, 1.5129e-03, 6.4877e-04,\n 1.0431e-03, 2.0646e-03, 1.1104e-03, 1.0578e-03, 1.6913e-03, 1.7163e-03,\n 1.2526e-03, 9.2614e-04, 1.3260e-03, 1.5025e-03, 1.4175e-03, 1.2035e-03,\n 1.0537e-03, 1.7221e-03, 1.4303e-03, 2.5185e-03, 1.0272e-03, 1.2396e-03,\n 1.5704e-03, 9.5381e-04, 1.2956e-03, 1.1805e-03, 1.5462e-03, 2.0555e-03,\n 1.2016e-03, 1.4179e-03, 1.0868e-03, 1.0498e-03, 1.3081e-03, 1.5759e-03,\n 1.7086e-04, 1.0670e-03, 9.6980e-04, 1.4617e-03, 1.3502e-03, 1.6389e-03,\n 1.5291e-03, 1.8689e-03, 2.0393e-03, 1.7467e-03, 1.3504e-03, 1.7472e-03,\n 1.0801e-03, 1.4122e-03, 1.4110e-03, 1.1351e-03, 8.7580e-04, 1.6377e-03,\n 1.7101e-03, 9.5777e-04, 1.8747e-03, 1.3504e-03, 1.8327e-03, 1.6421e-03,\n 1.1435e-03, 1.1058e-03, 7.8286e-04, 1.2169e-03, 1.3224e-03, 1.4043e-03,\n 1.7435e-03, 1.1923e-03, 8.7136e-04, 1.7757e-03, 1.1899e-03, 2.0866e-03,\n 1.3007e-03, 8.3282e-04, 1.2619e-03, 9.9491e-04, 1.6991e-03, 8.6506e-04,\n 1.7563e-03, 1.2895e-03, 1.7133e-03, 1.1086e-03, 1.1367e-03, 1.3321e-03,\n 1.4477e-03, 1.5899e-03, 2.4923e-03, 1.2265e-03, 1.4256e-03, 1.0025e-03,\n 1.1142e-03, 1.3673e-03, 1.4037e-03, 1.6327e-03, 7.4180e-04, 1.0027e-03,\n 1.4169e-03, 1.3550e-03, 1.2791e-03, 1.7171e-03, 1.1561e-03, 6.9666e-04,\n 2.0255e-03, 1.5001e-03, 1.3380e-03, 1.5542e-03, 1.2140e-03, 1.4286e-03,\n 1.3984e-03, 1.2256e-03, 1.8773e-03, 1.2642e-03, 1.5773e-03, 2.3008e-03,\n 2.1917e-03, 9.9241e-04, 1.3667e-03, 9.4681e-04, 1.4196e-03, 1.4488e-03,\n 9.0121e-04, 1.4306e-03, 1.1842e-03, 1.6183e-03, 1.5471e-03, 8.5700e-04,\n 1.5415e-03, 1.6976e-03, 1.4742e-03, 1.3862e-03, 9.2669e-04, 1.7617e-03,\n 1.2438e-03, 1.4951e-03, 1.3046e-03, 1.5303e-03, 1.6318e-03, 1.6481e-03,\n 1.5748e-03, 1.1609e-03, 9.8627e-04, 1.1751e-03, 1.5359e-03, 1.2152e-03,\n 1.6998e-03, 1.2432e-03, 1.6805e-03, 1.7493e-03, 1.5795e-03, 2.2564e-03,\n 1.4297e-03, 1.8109e-03, 1.2273e-03, 1.6397e-03, 1.1870e-03, 8.6225e-04,\n 1.5036e-03, 1.4351e-03, 1.7958e-03, 1.0905e-03, 1.8740e-03, 1.4564e-03,\n 1.4118e-03, 1.1892e-03, 1.8832e-03, 1.2512e-03, 1.4036e-03, 8.2454e-04,\n 1.1918e-03, 1.4445e-03, 1.5966e-03, 1.7116e-03, 4.4187e-04, 1.0699e-03,\n 1.4324e-03, 1.1624e-03, 2.0800e-03, 9.2336e-04, 2.0896e-03, 1.1578e-03,\n 1.7158e-03, 1.3486e-03, 1.5028e-03, 1.7163e-03, 1.6271e-03, 1.7318e-03,\n 1.1693e-03, 1.5307e-03, 1.7226e-03, 1.8608e-03, 1.4099e-03, 1.4987e-03,\n 1.3048e-03, 1.2190e-03, 1.1910e-03, 1.5101e-03, 1.0746e-03, 1.1817e-03,\n 1.3141e-03, 1.7430e-03, 1.6824e-03, 1.0649e-03, 1.3857e-03, 1.2262e-03,\n 1.2252e-03, 1.2099e-03, 1.7792e-03, 1.1068e-03, 1.3548e-03, 8.0514e-04,\n 1.3265e-03, 1.7484e-03, 2.0290e-03, 1.3605e-03, 9.1092e-04, 1.5423e-03,\n 1.3181e-03, 1.2285e-03, 1.5415e-03, 1.3390e-03, 9.4965e-04, 2.0326e-03,\n 1.1467e-03, 1.9520e-03, 1.4543e-03, 1.1600e-03, 1.2739e-03, 7.9812e-04,\n 1.0595e-03, 1.4475e-03, 1.6289e-03, 1.8386e-03, 1.2576e-03, 1.3380e-03,\n 1.8028e-03, 1.3307e-03, 1.4044e-03, 1.4713e-03, 1.2855e-03, 1.7360e-03,\n 1.6665e-03, 1.7987e-03, 1.3090e-03, 1.1174e-03, 2.8156e-03, 1.3035e-03,\n 1.5112e-03, 1.1116e-03, 1.7202e-03, 2.3474e-03, 1.5373e-03, 1.1212e-03,\n 1.2865e-03, 1.2288e-03, 1.3699e-03, 1.4081e-03, 1.2839e-03, 1.2349e-03,\n 1.0345e-03, 2.0163e-03, 1.4180e-03, 1.3696e-03, 1.6680e-03, 1.5164e-03,\n 1.7969e-03, 1.3010e-03, 1.2046e-03, 1.0933e-03, 1.8973e-03, 1.1818e-03,\n 1.6430e-03, 1.3409e-03, 1.8950e-03, 1.1497e-03, 1.8117e-03, 1.3354e-03,\n 1.2914e-03, 1.4100e-03, 8.4636e-04, 1.3500e-03, 1.2538e-03, 1.1594e-03,\n 1.6208e-03, 1.2210e-03, 1.5142e-03, 1.7436e-03, 1.5863e-03, 1.6096e-03,\n 1.1808e-03, 1.0186e-03, 1.4720e-03, 1.4300e-03, 1.7367e-03, 1.7556e-03,\n 7.5424e-04, 1.3436e-03, 2.0599e-03, 6.5757e-04, 1.5113e-03, 1.0896e-03,\n 1.4357e-03, 8.2134e-04, 1.6498e-03, 1.5756e-03, 1.9232e-03, 1.4325e-03,\n 1.5474e-03, 1.1332e-03, 1.3498e-03, 1.8829e-03, 1.4644e-03, 1.1038e-03,\n 1.4859e-03, 1.5216e-03, 1.0077e-03, 1.0590e-03, 1.7625e-03, 1.6252e-03,\n 1.5726e-03, 1.5898e-03, 1.6406e-03, 1.6321e-03, 1.7615e-03, 1.6358e-03,\n 1.4457e-03, 1.0995e-03, 1.3366e-03, 9.6680e-04, 1.3276e-03, 8.2610e-04,\n 7.6255e-04, 1.9967e-03, 1.1988e-03, 7.1521e-04, 1.2690e-03, 1.7627e-03,\n 9.3464e-04, 1.3632e-03, 9.6690e-04, 1.4275e-03, 1.3817e-03, 1.4495e-03,\n 1.5953e-03, 1.2641e-03, 1.1942e-03, 1.9465e-03, 1.7992e-03, 1.1018e-03,\n 1.2970e-03, 1.4228e-03, 1.5691e-03, 1.3861e-03, 2.0202e-03, 1.3611e-03,\n 1.1442e-03, 1.5751e-03, 1.2611e-03, 1.1787e-03, 1.6784e-03, 1.3325e-03,\n 1.5929e-03, 1.6589e-03, 1.2292e-03, 1.4218e-03, 1.4322e-03, 1.6831e-03,\n 1.4941e-03, 1.7842e-03, 1.0519e-03, 1.8356e-03, 1.6214e-03, 1.3629e-03,\n 1.5308e-03, 1.5769e-03, 1.7500e-03, 1.4985e-03, 1.4818e-03, 1.3752e-03,\n 1.3691e-03, 1.4690e-03, 1.9716e-03, 8.4881e-04, 1.7665e-03, 1.5746e-03,\n 1.5104e-03, 1.0908e-03, 1.1770e-03, 1.3740e-03, 1.1607e-03, 1.2838e-03,\n 1.7808e-03, 1.4929e-03, 1.3431e-03, 1.4758e-03, 1.4239e-03, 8.5981e-04,\n 1.4543e-03, 1.1874e-03, 1.4074e-03, 1.8836e-03, 1.2281e-03, 1.2613e-03,\n 1.6206e-03, 1.1774e-03, 1.1129e-03, 1.4825e-03, 1.6267e-03, 6.5395e-04,\n 1.4158e-03, 1.7919e-03, 1.4716e-03, 1.4607e-03, 1.0424e-03, 2.0413e-03,\n 1.3541e-03, 1.4411e-03, 1.6744e-03, 1.2870e-03, 1.5941e-03, 1.4666e-03,\n 1.6002e-03, 1.1107e-03, 1.3692e-03, 1.3111e-03, 1.6235e-03, 1.3163e-03,\n 1.5767e-03, 1.7467e-03, 1.6854e-03, 2.2137e-03, 1.2542e-03, 1.1243e-03,\n 9.7908e-04, 1.0546e-03, 1.9060e-03, 2.1013e-03, 1.3584e-03, 1.6635e-03,\n 1.2723e-03, 1.3390e-03, 1.1354e-03, 1.3007e-03, 1.4076e-03, 9.5181e-04,\n 1.3164e-03, 1.3370e-03, 1.2514e-03, 6.3984e-04, 1.5840e-03, 1.9963e-03,\n 1.4834e-03, 1.5399e-03, 1.1362e-03, 1.1679e-03, 1.2928e-03, 1.4512e-03,\n 6.9825e-04, 1.1513e-03, 1.2202e-03, 1.5384e-03, 7.7611e-04, 1.2066e-03,\n 1.2856e-03, 1.5502e-03, 1.6744e-03, 9.7592e-04, 1.3133e-03, 1.5002e-03,\n 1.4299e-03, 1.0741e-03, 1.9143e-03, 1.4508e-03, 1.6814e-03, 1.3104e-03,\n 1.5165e-03, 5.1976e-04, 1.3481e-03, 1.6817e-03, 9.6902e-04, 1.3521e-03,\n 1.2691e-03, 1.6552e-03, 2.1860e-03, 1.5920e-03, 1.6221e-03, 1.8960e-03,\n 1.4745e-03, 1.9242e-03, 8.0055e-04, 1.3655e-03, 2.2900e-03, 1.9583e-03,\n 1.2677e-03, 1.9724e-03, 1.6456e-03, 1.0229e-03, 1.6946e-03, 6.7396e-04,\n 1.3390e-03, 1.0916e-03, 1.4642e-03, 1.2695e-03, 1.2577e-03, 1.8873e-03,\n 2.2364e-03, 1.6562e-03, 1.2135e-03, 1.3777e-03, 1.5019e-03, 2.1732e-03,\n 1.2048e-03, 1.4388e-03, 1.3662e-03, 2.2164e-03, 1.5813e-03, 9.3993e-04,\n 1.7329e-03, 1.5993e-03, 2.2066e-03, 1.0627e-03, 1.1188e-03, 2.0996e-03,\n 1.1781e-03, 1.1663e-03, 1.2508e-03, 1.5153e-03, 1.3690e-03, 1.7550e-03,\n 1.5950e-03, 1.1991e-03, 1.5734e-03, 1.1529e-03, 1.7807e-03, 7.4863e-04,\n 9.0607e-04, 1.7030e-03, 1.6035e-03, 1.5126e-03, 1.2097e-03, 1.5654e-03,\n 1.5063e-03, 9.3732e-04, 2.1450e-03, 1.2873e-03, 9.4505e-04, 2.1378e-03,\n 1.6774e-03, 1.0525e-03, 1.0200e-03, 1.6562e-03, 1.8018e-03, 2.0228e-03,\n 1.6407e-03, 1.2528e-03, 1.4409e-03, 1.2585e-03, 1.3131e-03, 1.3778e-03,\n 1.7564e-03, 1.3275e-03, 1.8270e-03, 2.0029e-03, 1.1064e-03, 2.0185e-03,\n 8.0182e-04, 1.8515e-03, 1.7729e-03, 1.0842e-03, 1.8694e-03, 1.7064e-03,\n 1.5546e-03, 1.1384e-03, 1.7890e-03, 1.6312e-03, 1.8100e-03, 1.7585e-03,\n 1.3128e-03, 1.5095e-03, 1.3479e-03, 2.0284e-03, 8.6705e-05, 1.4995e-03,\n 5.8790e-04, 1.3152e-03, 1.1839e-03, 1.5551e-03, 1.7334e-03, 1.4135e-03,\n 1.4013e-03, 1.8032e-03, 1.1031e-03, 1.2826e-03, 1.7712e-03, 1.5879e-03,\n 1.7578e-03, 2.0640e-03, 1.3403e-03, 1.1736e-03, 1.0344e-03, 1.1151e-03,\n 1.8105e-03, 9.9996e-04, 1.4360e-03, 1.5081e-03, 1.6453e-03, 1.4026e-03,\n 1.8477e-03, 5.8356e-04, 1.6299e-03, 9.7199e-04, 6.0612e-04, 1.5311e-03,\n 1.2315e-03, 1.5591e-03, 1.2787e-03, 7.7473e-04, 1.5504e-03, 1.5925e-03,\n 1.9728e-03, 1.6944e-03, 1.8423e-03, 1.4908e-03, 1.2151e-03, 1.2817e-03,\n 1.6807e-03, 1.1234e-03, 1.7823e-03, 1.4458e-03, 1.9557e-03, 1.7927e-03,\n 1.3565e-03, 1.3769e-03, 1.4604e-03, 1.5627e-03, 9.9165e-04, 1.4159e-03,\n 1.8520e-03, 1.9573e-03, 1.3143e-03, 2.1035e-03, 1.7496e-03, 1.3027e-03,\n 1.9824e-03, 1.3477e-03, 1.2658e-03, 1.2836e-03, 1.6432e-03, 1.4843e-03,\n 8.6702e-04, 1.0318e-03, 1.2379e-03, 1.5173e-03, 1.5249e-03, 1.3969e-03,\n 1.0181e-03, 1.0994e-03, 1.3243e-03, 1.1203e-03, 1.8262e-03, 1.6779e-03,\n 1.3902e-03, 8.5806e-04, 1.6991e-03, 1.1598e-03, 1.5248e-03, 1.5175e-03,\n 1.3785e-03, 1.9284e-03, 1.1225e-03, 1.8850e-03, 9.2948e-04, 1.7249e-03,\n 1.2294e-03, 1.1583e-03, 1.0890e-03, 1.5903e-03, 1.1188e-03, 1.5510e-03,\n 1.8777e-03, 1.2436e-03, 1.4975e-03, 1.0911e-03, 1.3377e-03, 1.6600e-03,\n 1.9096e-03, 1.3652e-03, 1.3730e-03, 1.9823e-03, 1.3750e-03, 1.5054e-03,\n 1.3496e-03, 1.5837e-03, 2.0364e-03, 1.3644e-03, 2.1633e-03, 1.9283e-03,\n 7.8201e-04, 1.5554e-03, 1.0117e-03, 1.3407e-03, 1.4291e-03, 1.3570e-03,\n 1.1548e-03, 1.3212e-03, 1.2276e-03, 1.1947e-03, 1.9783e-03, 1.5506e-03,\n 1.2821e-03, 1.5341e-03, 1.7277e-03, 1.1096e-03, 2.2528e-03, 1.4134e-03,\n 1.5293e-03, 1.0989e-03, 5.2971e-05, 1.3577e-03, 1.9972e-03, 1.2368e-03,\n 1.7174e-03, 1.7025e-03, 1.0645e-03, 9.5549e-04, 1.4051e-03, 9.0782e-04,\n 1.5205e-03, 1.0899e-03, 1.7959e-03, 1.4988e-03, 1.0512e-03, 1.7385e-03,\n 1.4014e-03, 1.2043e-03, 1.7250e-03, 1.1760e-03, 1.2809e-03, 1.1418e-03],\n device='cuda:0')" + }, + "2": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 3.1458e-05, -1.3275e-04, -4.3184e-05, ..., 8.6116e-05,\n 1.5927e-04, -1.2017e-05],\n [ 5.8812e-07, 2.5958e-06, 2.1311e-05, ..., 1.2978e-05,\n -7.3229e-07, 7.2892e-06],\n [ 6.7381e-05, 2.6192e-05, -3.1181e-05, ..., -2.6547e-05,\n -2.9361e-05, 6.5195e-06],\n ...,\n [-2.5747e-05, 3.5342e-05, 5.0380e-05, ..., 7.7215e-05,\n -1.1978e-04, 2.3319e-06],\n [ 1.6600e-05, -6.1636e-05, -2.0119e-05, ..., -6.5922e-05,\n -6.1638e-05, 1.0480e-05],\n [ 5.5518e-05, 1.1993e-05, 1.6468e-05, ..., -1.7581e-05,\n 5.6399e-05, -4.8926e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.8957e-08, 3.4471e-08, 3.2386e-08, ..., 3.9059e-08, 4.5854e-08,\n 1.9314e-08],\n [3.2922e-09, 1.0865e-08, 6.2199e-09, ..., 1.2792e-07, 7.3640e-09,\n 9.3366e-09],\n [3.9496e-08, 3.9881e-08, 6.4708e-08, ..., 2.1168e-08, 1.8542e-08,\n 1.8514e-08],\n ...,\n [2.4796e-08, 7.0783e-08, 1.0338e-08, ..., 3.2849e-08, 8.5138e-08,\n 3.9844e-08],\n [1.8797e-08, 3.9520e-08, 3.0932e-08, ..., 3.3242e-08, 3.1708e-08,\n 2.8367e-08],\n [9.5476e-08, 2.7530e-08, 2.0165e-08, ..., 1.4007e-08, 1.4168e-07,\n 5.7688e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 4.8714e-04, 1.4915e-03, -2.1808e-04, -1.1581e-02, 3.0510e-03,\n -1.9676e-03, 8.7195e-04, 1.2592e-03, 1.4351e-03, -2.4071e-04,\n 2.1415e-03, -2.3597e-03, -2.5981e-04, 2.6533e-03, 5.2496e-03,\n -1.8110e-03, -4.4629e-03, 2.0758e-03, 6.4493e-03, 1.1114e-03,\n 1.2190e-04, -3.0217e-03, -2.8637e-04, 2.5434e-03, -3.2326e-03,\n 8.2869e-04, 2.4173e-03, 5.2099e-03, -3.4530e-03, 1.9410e-03,\n -4.2989e-03, 4.6697e-03, 5.4551e-03, 8.6322e-04, 1.1124e-03,\n -1.1432e-04, -2.2243e-03, 9.1047e-04, 1.0716e-03, 2.2587e-03,\n 2.9623e-03, -3.9052e-03, -5.8373e-04, 2.3993e-03, 1.8920e-03,\n -2.1161e-03, 3.7585e-03, -3.3626e-03, 3.1031e-03, -2.6791e-03,\n -3.1460e-03, -4.4548e-03, -4.1833e-03, 1.1795e-03, 3.5318e-03,\n 1.7452e-03, 7.2626e-03, 5.6052e-45, -1.3348e-03, 1.6398e-03,\n -2.3960e-04, -7.7459e-04, 2.2614e-03, -3.0086e-04, -9.4727e-04,\n -4.5303e-03, -3.4521e-03, 5.4957e-03, 3.2602e-04, 9.0288e-04,\n -1.1238e-03, -1.7713e-03, 6.4725e-04, -3.0936e-03, -1.2830e-03,\n 7.0347e-03, 2.6209e-03, -2.1974e-03, 1.4736e-03, -3.8360e-03,\n 5.6052e-45, 1.5306e-03, 3.5106e-03, 3.1252e-03, -1.4949e-03,\n 5.2482e-03, -3.1549e-04, -7.5455e-04, -3.7839e-03, 4.6348e-03,\n -3.5691e-03, -4.7322e-03, -2.7512e-03, -1.7633e-03, -4.5540e-04,\n -1.5489e-03, -2.6025e-03, 2.2022e-03, 1.6825e-03, -1.0799e-03,\n -4.0234e-03, -6.5006e-04, 2.1327e-05, 1.7601e-03, 1.1790e-03,\n -1.4144e-04, 1.5126e-04, -2.7436e-03, -6.7892e-03, 2.1129e-03,\n 2.2869e-03, -3.0220e-03, 1.4936e-03, 8.4463e-04, 2.1203e-03,\n 1.4396e-04, -1.2785e-03, 1.7904e-03, 3.8102e-03, -2.9692e-03,\n 8.0330e-04, 2.6769e-03, 5.6052e-45, 2.8337e-03, -3.2321e-03,\n -2.6401e-03, 2.1765e-03, 3.3057e-03, 1.1033e-03, -3.8829e-04,\n -5.1702e-03, 1.1006e-04, 5.4322e-04, -2.1757e-03, 2.1123e-03,\n 2.9471e-04, -3.5963e-03, 2.0379e-03, -2.2211e-03, 2.8386e-04,\n -4.7380e-04, 5.1024e-03, -2.6472e-03, -2.1043e-04, -2.5808e-03,\n 1.1872e-03, 4.2399e-03, 1.0776e-05, 7.6610e-04, 5.4683e-03,\n -1.4038e-03, 1.5265e-03, -1.0632e-02, 2.8182e-03, 1.0037e-03,\n 3.2360e-04, 4.5050e-03, 1.5600e-03, -2.3958e-03, 1.3754e-03,\n -1.7496e-03, 1.8376e-03, -2.5860e-03, -2.2228e-03, 6.1676e-05,\n 4.0964e-03, 2.8343e-03, 2.3113e-03, -5.8333e-04, 1.0567e-03,\n -3.0331e-03, 6.6938e-04, 2.8819e-03, -5.6951e-04, -3.7525e-03,\n 4.5174e-04, 3.6247e-03, 1.9893e-03, -2.8512e-04, -5.6549e-04,\n 1.0758e-03, 1.3367e-03, 1.3862e-04, 4.1332e-03, 6.2323e-04,\n 3.8096e-03, -1.2147e-03, 1.8083e-03, 5.9475e-04, -3.8029e-03,\n -3.1014e-03, 9.6980e-05, 1.2131e-03, 4.5451e-03, 3.7426e-04,\n -3.0145e-03, 6.9521e-04, -1.6623e-03, 2.2554e-04, -8.8549e-04,\n 2.3493e-03, 2.2430e-03, -1.1764e-03, -1.8621e-03, 3.5304e-03,\n 4.1671e-03, 4.6532e-03, 2.2500e-03, -2.1710e-03, 1.9846e-03,\n -1.4554e-03, -2.2905e-03, -2.7745e-03, 2.0278e-03, 5.8721e-05,\n 1.3666e-03, -1.5119e-03, -3.2799e-03, -1.2058e-04, -2.2267e-03,\n 1.1664e-03, 2.7343e-04, 1.6232e-03, 4.7197e-03, -4.1630e-04,\n -9.6381e-04, -7.7639e-04, -2.5245e-03, -5.2199e-04, -2.1695e-03,\n -2.4308e-03, -1.5194e-03, 5.3409e-03, -3.7700e-04, 3.6622e-03,\n -3.6666e-03, 4.9439e-03, 1.9685e-03, 4.7280e-04, -2.0827e-04,\n -2.4994e-03, -4.5258e-04, -2.7610e-03, 3.5498e-03, -7.8438e-04,\n 1.0562e-03, -2.4758e-03, 3.6745e-04, 3.3158e-03, 7.8462e-04,\n -2.1992e-03, -3.0055e-03, -4.2684e-03, 3.7938e-03, 2.9026e-04,\n -5.5323e-03, -6.4524e-03, 1.6478e-03, 1.2879e-03, 1.3184e-04,\n 2.1478e-04, -2.1681e-03, -3.4470e-03, 5.2541e-03, -5.4631e-03,\n 1.8757e-03, 4.9048e-03, -2.7977e-03, 8.7713e-04, 3.3255e-03,\n 5.6052e-45, 1.4508e-03, 6.8878e-03, -8.6189e-03, 5.0964e-04,\n -1.2502e-03, -4.7526e-03, 4.2673e-03, -1.0144e-03, -3.1023e-03,\n 1.2302e-03, 5.6052e-45, 8.0764e-04, -1.6390e-03, 2.5048e-04,\n 1.9808e-03, 2.2157e-03, -2.5185e-04, 1.1413e-04, 1.2359e-03,\n 2.3052e-03, 8.3977e-04, 1.7103e-03, -1.7840e-03, 2.2796e-03,\n -2.4322e-03, -7.5688e-04, 9.9852e-03, -3.9607e-03, -1.0991e-03,\n -7.6973e-04, -3.2922e-03, 2.8650e-03, -2.9194e-04, 1.7281e-03,\n -2.9706e-04, 4.1766e-03, 6.0316e-03, -8.5028e-04, -5.8636e-04,\n -6.1152e-03, 3.9405e-03, -4.2733e-03, -2.5178e-03, -2.0879e-03,\n -3.6221e-03, -3.3385e-03, 8.2181e-04, -1.3387e-03, -1.8571e-03,\n -2.0711e-05, 2.0192e-03, -2.6557e-03, 2.7420e-03, 5.6052e-45,\n -8.7974e-03, 2.7873e-04, -3.6065e-03, 7.0326e-04, -5.0577e-03,\n -4.5679e-03, 5.2003e-06, 1.4887e-04, -2.5367e-03, -3.2536e-03,\n -1.8523e-03, -8.3013e-04, -3.2035e-03, 1.2579e-03, -2.1922e-03,\n -6.0574e-03, -5.4628e-04, -6.4782e-03, -4.6410e-04, -1.3671e-03,\n -3.1083e-04, 1.5803e-03, 3.8443e-03, -2.8524e-03, 3.6063e-03,\n -1.2152e-02, 3.4860e-03, -2.5193e-03, -3.2590e-05, 5.6052e-45,\n 1.7582e-03, -3.5071e-04, 4.7886e-03, 2.1113e-03, -1.4624e-03,\n -2.8349e-03, 2.8349e-03, 6.9829e-04, -4.5888e-05, 6.7738e-04,\n -3.1485e-03, -2.6609e-03, 3.4553e-03, 6.9521e-04, -1.5720e-04,\n 3.1479e-03, -8.7344e-04, 1.9058e-03, 5.0957e-05, 1.0989e-03,\n 3.6641e-04, 2.9010e-03, 5.1473e-03, -1.3084e-05, 4.1827e-03,\n 4.9603e-03, -4.1026e-04, -3.0694e-03, 2.6351e-04, 8.2134e-04,\n 3.8328e-04, -4.1264e-03, 2.5533e-03, -1.6623e-03, 5.6052e-45,\n -1.2611e-03, -5.4195e-04, 3.2880e-03, 2.6779e-03, -1.6310e-03,\n 2.1278e-03, -4.4435e-03, 6.4794e-04, -5.6059e-03, -3.9586e-03,\n 1.4383e-03, 6.8738e-04, -6.1305e-03, -3.0912e-03, 2.5748e-03,\n 4.9123e-03, -1.9156e-03, 2.6003e-03, 2.2004e-03, 1.6268e-03,\n 3.9284e-04, 2.0876e-03, 3.6088e-03, 3.0230e-03, -9.7488e-03,\n -1.9682e-03, 1.4957e-03, 5.6052e-45, 4.0213e-03, -1.1612e-03,\n 3.2127e-04, -1.3501e-03, 1.5663e-03, 5.7471e-03, -5.8552e-03,\n 2.9384e-04, 6.4221e-03, -1.8089e-03, -1.1789e-03, 6.3509e-04,\n -2.4816e-03, -3.3594e-04, 3.9959e-03, 1.9574e-03, 1.5090e-03,\n 3.1636e-03, 2.0909e-03, -1.0593e-03, -1.8386e-03, 9.8139e-04,\n -9.1999e-04, -1.7380e-03, -4.8609e-04, -2.6982e-03, 7.3804e-04,\n -3.5340e-03, -1.0547e-03, -1.0997e-02, 6.0130e-04, 5.6052e-45,\n 1.1357e-03, -2.4936e-03, -2.2772e-03, -8.6641e-05, -2.4324e-03,\n -3.2804e-03, 2.3277e-03, 3.5269e-03, -5.1847e-03, 2.2490e-03,\n -5.1347e-04, -2.8623e-03, 3.8751e-04, 4.0653e-03, 5.6052e-45,\n -9.7672e-04, -2.8155e-03, -3.9660e-03, -8.7322e-03, 2.8416e-03,\n -3.4873e-03, -1.5873e-04, 1.8035e-03, -1.0237e-03, 2.2528e-03,\n -4.5672e-05, 2.5705e-03, -2.7841e-03, -2.1905e-03, 2.5078e-03,\n -5.0935e-03, 2.2287e-03, 5.6052e-45, -1.3829e-03, 2.5789e-03,\n -8.8649e-04, -1.9870e-05, 1.7515e-03, -8.1349e-05, -5.3044e-05,\n 2.1659e-03, 4.4694e-03, -4.5481e-04, -3.6368e-03, -2.5643e-03,\n 3.0570e-03, -2.9139e-04, -3.5912e-03, 2.7873e-03, 6.6725e-03,\n -1.0138e-03, -3.7777e-03, -3.0940e-03, 1.7339e-03, -4.5837e-04,\n 8.0919e-04, 3.1860e-05, 1.7560e-03, -1.9487e-03, 6.2506e-04,\n 5.8704e-04, 1.6821e-03], device='cuda:0')", + "exp_avg_sq": "tensor([7.2877e-05, 2.5662e-05, 7.0001e-05, 1.1198e-04, 8.2379e-05, 8.3520e-05,\n 8.7492e-05, 9.7527e-05, 8.4228e-05, 7.4133e-05, 7.3917e-05, 8.3582e-05,\n 8.0112e-05, 8.6192e-05, 8.6092e-05, 7.4569e-05, 8.5994e-05, 5.1325e-05,\n 8.6619e-05, 7.2217e-05, 7.8151e-05, 8.5391e-05, 5.6938e-05, 8.9564e-05,\n 7.3053e-05, 8.8217e-05, 9.5074e-05, 8.1814e-05, 7.5471e-05, 8.2298e-05,\n 7.1211e-05, 8.8497e-05, 9.0822e-05, 8.6704e-05, 7.9948e-05, 7.9131e-05,\n 8.1880e-05, 8.4153e-05, 9.4745e-05, 9.1457e-05, 7.9298e-05, 7.9260e-05,\n 8.7372e-05, 6.5593e-05, 8.4375e-05, 7.0020e-05, 7.3542e-05, 7.6433e-05,\n 7.9231e-05, 5.7766e-05, 3.2905e-05, 8.0116e-05, 8.3278e-05, 6.6567e-05,\n 7.6619e-05, 7.3695e-05, 9.4033e-05, 3.0310e-13, 8.5234e-05, 7.2253e-05,\n 7.9290e-05, 9.0559e-05, 7.4380e-05, 8.9326e-05, 8.0975e-05, 8.2235e-05,\n 7.8763e-05, 9.5024e-05, 3.4956e-05, 8.3023e-05, 7.4949e-05, 8.4541e-05,\n 8.7503e-05, 8.1078e-05, 7.9872e-05, 9.8198e-05, 6.1561e-05, 7.2086e-05,\n 9.1595e-05, 8.3578e-05, 1.2762e-12, 1.0795e-04, 8.0750e-05, 8.5240e-05,\n 6.9228e-05, 8.9914e-05, 6.7122e-05, 8.9099e-05, 7.4652e-05, 7.7430e-05,\n 8.4498e-05, 8.4443e-05, 8.1605e-05, 7.2126e-05, 6.2048e-05, 9.6031e-05,\n 8.4050e-05, 6.4578e-05, 6.3319e-05, 8.3867e-05, 7.8654e-05, 3.9354e-05,\n 7.5389e-05, 8.1457e-05, 7.2736e-05, 8.3403e-05, 7.1251e-05, 6.5866e-05,\n 7.6476e-05, 7.4760e-05, 9.9943e-05, 1.0139e-04, 6.8307e-05, 6.4954e-05,\n 6.2712e-05, 7.0005e-05, 7.6996e-05, 6.6277e-05, 6.0417e-05, 7.8280e-05,\n 8.1205e-05, 1.0826e-04, 1.6166e-12, 8.0736e-05, 8.9948e-05, 6.2343e-05,\n 7.8083e-05, 8.0480e-05, 8.7801e-05, 8.5108e-05, 8.4700e-05, 9.2869e-05,\n 2.3945e-05, 5.8284e-05, 7.7094e-05, 7.6929e-05, 9.0168e-05, 1.1568e-04,\n 7.3846e-05, 7.2935e-05, 8.7956e-05, 7.9435e-05, 7.1495e-05, 8.0947e-05,\n 7.6034e-05, 9.1511e-05, 9.9559e-05, 8.5298e-05, 9.0978e-05, 6.9905e-05,\n 8.0821e-05, 6.4602e-05, 4.4298e-05, 1.0168e-04, 8.5103e-05, 7.9869e-05,\n 8.8767e-05, 7.7360e-05, 9.3263e-05, 7.3636e-05, 6.4622e-05, 8.6839e-05,\n 9.9165e-05, 1.1475e-04, 2.4958e-05, 7.4973e-05, 7.7416e-05, 6.8436e-05,\n 9.0662e-05, 8.1030e-05, 5.7155e-05, 7.5475e-05, 9.5263e-05, 8.0579e-05,\n 8.2217e-05, 8.4340e-05, 7.8797e-05, 2.9137e-05, 6.6358e-05, 8.3063e-05,\n 6.9589e-05, 6.7667e-05, 8.4214e-05, 7.3061e-05, 7.8763e-05, 5.3446e-05,\n 1.0879e-04, 8.4744e-05, 6.8479e-05, 8.2616e-05, 9.4834e-05, 7.5376e-05,\n 7.1312e-05, 1.0851e-04, 7.0479e-05, 8.5252e-05, 6.2903e-05, 7.2912e-05,\n 6.4528e-05, 5.6212e-05, 1.1602e-04, 7.5339e-05, 9.7055e-05, 7.2403e-05,\n 7.2679e-05, 8.0649e-05, 3.5525e-05, 1.0390e-04, 7.7426e-05, 9.2271e-05,\n 7.4310e-05, 8.8216e-05, 8.1010e-05, 9.6835e-05, 1.0121e-04, 9.0233e-05,\n 1.0774e-04, 8.3420e-05, 7.4985e-05, 9.1506e-05, 7.6558e-05, 7.5232e-05,\n 7.8104e-05, 1.0730e-04, 2.3837e-05, 7.0366e-05, 6.6384e-05, 7.2833e-05,\n 8.7555e-05, 6.9340e-05, 6.9780e-05, 8.4719e-05, 9.8497e-05, 1.1746e-04,\n 8.7839e-05, 6.9407e-05, 7.9619e-05, 6.5434e-05, 7.7440e-05, 7.4464e-05,\n 7.5666e-05, 8.3376e-05, 7.9342e-05, 7.8229e-05, 7.6517e-05, 8.2818e-05,\n 7.5302e-05, 6.6928e-05, 9.0425e-05, 7.5678e-05, 7.8442e-05, 7.3386e-05,\n 7.2410e-05, 9.3027e-05, 1.0812e-04, 8.9182e-05, 7.2660e-05, 9.3359e-05,\n 1.0328e-04, 9.8948e-05, 7.6524e-05, 7.8736e-05, 7.9610e-05, 8.3522e-05,\n 8.7691e-05, 7.0879e-05, 6.8426e-05, 7.9372e-05, 8.9317e-05, 6.9603e-05,\n 1.6629e-11, 7.2650e-05, 9.1412e-05, 9.2896e-05, 8.8118e-05, 7.8421e-05,\n 8.0394e-05, 9.7456e-05, 8.3608e-05, 8.8037e-05, 1.1219e-04, 8.7194e-11,\n 8.2486e-05, 9.6145e-05, 8.3118e-05, 7.3136e-05, 6.1869e-05, 8.3052e-05,\n 8.1809e-05, 8.5322e-05, 7.4470e-05, 1.0751e-04, 7.7672e-05, 8.1237e-05,\n 8.1297e-05, 8.3519e-05, 7.5980e-05, 6.6506e-05, 7.8773e-05, 6.8919e-05,\n 8.4556e-05, 1.2544e-04, 5.9842e-05, 7.7261e-05, 8.2791e-05, 8.1699e-05,\n 8.2389e-05, 7.0064e-05, 3.4132e-05, 6.9202e-05, 6.5703e-05, 1.0552e-04,\n 8.3326e-05, 7.1939e-05, 7.5213e-05, 7.9672e-05, 8.4076e-05, 7.0127e-05,\n 6.0615e-05, 7.7878e-05, 8.1299e-05, 8.5987e-05, 7.4142e-05, 7.7573e-05,\n 1.2485e-11, 7.0240e-05, 8.7195e-05, 7.9418e-05, 8.3064e-05, 7.6208e-05,\n 8.5713e-05, 9.6442e-05, 1.2576e-05, 7.5352e-05, 8.4254e-05, 8.5608e-05,\n 8.5950e-05, 7.6800e-05, 8.9594e-05, 1.1878e-04, 6.2109e-05, 7.6326e-05,\n 8.6570e-05, 8.1110e-05, 8.0229e-05, 6.8502e-05, 7.8208e-05, 9.0350e-05,\n 8.1054e-05, 9.5227e-05, 9.4565e-05, 7.7765e-05, 6.7583e-05, 6.8927e-05,\n 7.2341e-11, 9.5174e-05, 2.4255e-05, 1.5493e-04, 6.6110e-05, 8.1355e-05,\n 7.9130e-05, 7.0734e-05, 8.2182e-05, 9.2395e-05, 7.4017e-05, 7.3810e-05,\n 6.9221e-05, 7.1233e-05, 7.1967e-05, 8.6586e-05, 8.6282e-05, 8.0943e-05,\n 8.4346e-05, 7.8825e-05, 7.5518e-05, 8.3786e-05, 9.2454e-05, 6.5878e-05,\n 6.0888e-05, 8.2126e-05, 8.7910e-05, 8.0437e-05, 4.8345e-05, 7.4606e-05,\n 8.8071e-05, 8.3246e-05, 1.0232e-04, 5.3362e-05, 6.2040e-05, 2.0615e-11,\n 6.4188e-05, 8.0088e-05, 8.5745e-05, 7.0169e-05, 7.1650e-05, 7.6095e-05,\n 8.8714e-05, 8.5686e-05, 7.3486e-05, 9.3028e-05, 7.7979e-05, 7.8476e-05,\n 9.7647e-05, 6.6479e-05, 9.1889e-05, 7.6249e-05, 7.4812e-05, 8.2796e-05,\n 7.1174e-05, 8.4053e-05, 4.1766e-05, 7.1547e-05, 8.0197e-05, 8.5604e-05,\n 7.4236e-05, 6.4072e-05, 7.3591e-05, 3.3021e-12, 8.9562e-05, 7.5848e-05,\n 7.3837e-05, 4.5218e-05, 6.7958e-05, 8.1995e-05, 1.0043e-04, 2.0290e-05,\n 8.8572e-05, 7.2025e-05, 7.5784e-05, 9.6547e-05, 7.3290e-05, 7.9337e-05,\n 8.5258e-05, 6.5098e-05, 7.3362e-05, 7.9494e-05, 9.6147e-05, 9.1708e-05,\n 7.7788e-05, 4.5608e-05, 7.8009e-05, 7.2768e-05, 7.7390e-05, 5.9370e-05,\n 8.9284e-05, 7.0085e-05, 1.0315e-04, 8.9038e-05, 6.6502e-05, 7.1273e-13,\n 9.2081e-05, 7.8800e-05, 8.2293e-05, 8.2314e-05, 7.2343e-05, 8.9660e-05,\n 4.1248e-05, 9.3638e-05, 8.8667e-05, 4.8034e-05, 9.6770e-05, 7.9257e-05,\n 1.0145e-04, 8.5358e-05, 2.7075e-11, 5.0332e-05, 6.7898e-05, 7.5864e-05,\n 6.8297e-05, 8.5401e-05, 5.2666e-05, 7.1429e-05, 8.5442e-05, 8.9918e-05,\n 7.9219e-05, 7.7802e-05, 8.0853e-05, 7.1871e-05, 7.4019e-05, 6.4004e-05,\n 7.7800e-05, 1.0073e-04, 1.9812e-12, 1.2949e-04, 9.2913e-05, 5.0629e-05,\n 7.0287e-05, 8.4101e-05, 7.1896e-05, 8.7598e-05, 7.5016e-05, 6.6757e-05,\n 9.9793e-05, 1.1575e-04, 6.8737e-05, 8.6562e-05, 8.1516e-05, 8.7055e-05,\n 7.8401e-05, 6.6895e-05, 9.4630e-05, 8.3914e-05, 9.6484e-05, 9.6789e-05,\n 8.9948e-05, 7.9402e-05, 6.8099e-05, 6.8612e-05, 6.8653e-05, 8.5746e-05,\n 9.2077e-05, 7.4285e-05], device='cuda:0')" + }, + "4": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 9.2937e-07, 2.7675e-05, -5.5165e-06, ..., -8.2682e-05,\n -8.6031e-05, -1.1608e-05],\n [ 1.7933e-05, -2.9051e-05, 6.8543e-05, ..., -2.3178e-05,\n -1.8109e-05, -1.2097e-05],\n [-8.5371e-05, 3.8149e-05, -9.7465e-05, ..., -2.1260e-05,\n -5.0112e-05, -9.8845e-06],\n ...,\n [ 5.9036e-05, 9.7459e-05, -7.3575e-05, ..., -1.3723e-05,\n 4.7492e-05, -1.1322e-05],\n [ 2.2431e-05, -5.4813e-05, -2.9737e-05, ..., 2.8822e-05,\n -5.4490e-05, 8.3492e-06],\n [ 3.7304e-05, 3.2135e-05, 4.9455e-05, ..., 1.2097e-04,\n -1.7789e-07, -4.5678e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7439e-08, 2.4407e-09, 2.3420e-08, ..., 2.4880e-08, 4.6061e-08,\n 6.4400e-09],\n [5.2405e-08, 4.0098e-09, 4.1117e-08, ..., 2.2621e-08, 6.0552e-08,\n 7.8459e-09],\n [5.2005e-08, 1.9211e-09, 4.3889e-08, ..., 1.5584e-08, 5.6755e-08,\n 6.5574e-09],\n ...,\n [6.4215e-08, 6.7740e-09, 5.4347e-08, ..., 3.1836e-08, 6.1386e-08,\n 7.9509e-09],\n [5.1728e-08, 5.7171e-09, 6.5206e-08, ..., 3.1805e-08, 6.3249e-08,\n 4.3303e-09],\n [8.4929e-08, 2.6738e-09, 6.3238e-08, ..., 2.8480e-08, 6.6253e-08,\n 6.2320e-09]], device='cuda:0')" + }, + "5": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[ 5.8484e-06, -1.6451e-06, -1.4407e-05, ..., 3.3881e-05,\n 1.5271e-06, 7.9127e-05],\n [-4.6644e-06, 2.6126e-05, 2.3333e-05, ..., -2.6553e-05,\n -2.4295e-05, 4.5421e-06],\n [ 2.9654e-05, -1.9396e-06, 3.6644e-05, ..., 3.0885e-05,\n 6.0555e-05, 1.9452e-05],\n ...,\n [ 9.8575e-06, 7.0037e-06, 6.5734e-06, ..., -3.4068e-06,\n -1.8624e-05, 1.7485e-05],\n [-8.6021e-06, -2.4303e-05, 2.7908e-06, ..., 3.7704e-06,\n -5.3788e-06, 1.6231e-06],\n [-2.3330e-06, 3.4910e-07, -4.8869e-06, ..., 1.1359e-05,\n 1.1438e-05, 5.4632e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4311e-08, 1.1422e-08, 1.0039e-08, ..., 1.3702e-08, 2.1733e-08,\n 3.5091e-08],\n [3.2322e-08, 4.7468e-09, 1.7182e-08, ..., 5.9430e-09, 1.0674e-08,\n 2.7102e-09],\n [8.4790e-09, 1.7652e-08, 1.2217e-08, ..., 3.2595e-08, 7.7963e-08,\n 3.1688e-08],\n ...,\n [5.2871e-09, 9.2007e-09, 2.5483e-09, ..., 4.8574e-09, 1.2213e-08,\n 1.1695e-08],\n [1.1672e-08, 9.9084e-09, 2.1533e-09, ..., 8.7406e-09, 1.2190e-08,\n 4.7204e-09],\n [4.3649e-09, 5.0642e-09, 5.8383e-09, ..., 6.2639e-09, 1.5820e-08,\n 1.3648e-08]], device='cuda:0')" + }, + "6": { + "step": "tensor(8764.)", + "exp_avg": "tensor([ 3.1953e-03, -2.4521e-05, 9.0409e-04, ..., 1.1546e-03,\n 1.8510e-04, -2.9770e-05], device='cuda:0')", + "exp_avg_sq": "tensor([3.0889e-05, 3.3386e-05, 3.8822e-05, ..., 4.2283e-05, 6.4657e-05,\n 3.8022e-05], device='cuda:0')" + }, + "7": { + "step": "tensor(8764.)", + "exp_avg": "tensor([[-7.0349e-06, 1.0341e-05, 2.1934e-06, ..., 1.8260e-05,\n -2.3212e-06, 1.0508e-05],\n [-4.2665e-05, 2.2843e-05, 1.2043e-05, ..., 2.6321e-05,\n -1.8646e-05, 3.2924e-06],\n [ 3.6194e-05, -9.9767e-06, -4.5363e-06, ..., 4.1938e-06,\n -6.3688e-06, -3.7745e-06],\n ...,\n [-1.9537e-05, 6.8065e-06, -1.1680e-05, ..., -1.7956e-06,\n 1.3672e-05, -1.5532e-05],\n [ 3.6049e-05, -2.9251e-05, 2.5607e-05, ..., -2.0040e-05,\n 2.1984e-06, -7.6561e-06],\n [ 6.3848e-05, 1.4119e-05, 6.3780e-06, ..., 1.6914e-05,\n -4.7727e-06, 3.9572e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7227e-09, 3.5294e-09, 2.4194e-09, ..., 2.7554e-09, 2.0406e-09,\n 4.3406e-09],\n [4.2787e-09, 8.8244e-09, 2.8409e-09, ..., 4.4348e-09, 2.6112e-09,\n 5.4138e-09],\n [5.4059e-09, 9.1327e-09, 4.7488e-09, ..., 5.4087e-09, 4.5791e-09,\n 6.8710e-09],\n ...,\n [5.5416e-09, 9.9453e-09, 5.4368e-09, ..., 5.9358e-09, 2.2787e-09,\n 4.5163e-09],\n [4.3679e-09, 5.7437e-09, 4.2314e-09, ..., 4.6554e-09, 2.5141e-09,\n 3.0246e-09],\n [4.9163e-09, 7.1616e-09, 6.1130e-09, ..., 5.9990e-09, 3.2892e-09,\n 4.7567e-09]], device='cuda:0')" + }, + "8": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.4422e-07, 1.1533e-05, 4.7097e-06, ..., 8.6454e-07,\n 2.3018e-05, -1.5999e-04],\n [-9.6299e-06, 1.3208e-05, 2.7779e-05, ..., 1.6057e-05,\n -4.1802e-05, 4.4871e-05],\n [ 2.4589e-05, 2.1977e-05, 9.9058e-06, ..., 1.0300e-04,\n 1.3128e-05, 1.8370e-04],\n ...,\n [-6.9322e-06, -1.4581e-04, -6.5504e-05, ..., 2.9642e-05,\n -6.1395e-05, -1.2374e-05],\n [ 1.2275e-05, -2.0572e-05, 1.9280e-05, ..., 1.2169e-05,\n -8.2692e-06, -5.1635e-06],\n [ 4.2933e-05, 1.7730e-06, -2.1902e-05, ..., 9.1069e-06,\n 4.9584e-05, 3.0050e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.8772e-09, 2.6407e-08, 9.1252e-09, ..., 6.5870e-08, 1.2909e-08,\n 1.7276e-08],\n [2.0792e-07, 7.2835e-08, 1.0656e-07, ..., 4.7952e-08, 1.1865e-08,\n 1.1455e-07],\n [3.4668e-08, 1.1415e-07, 7.3395e-08, ..., 2.3928e-07, 6.2344e-09,\n 2.7618e-07],\n ...,\n [5.8341e-08, 2.4515e-08, 2.1928e-08, ..., 1.8286e-08, 1.8022e-08,\n 3.5143e-08],\n [2.5402e-08, 4.1698e-08, 2.6212e-08, ..., 2.2182e-08, 1.2202e-08,\n 1.5125e-08],\n [5.3168e-08, 6.0736e-08, 3.8780e-08, ..., 8.1060e-08, 2.1801e-08,\n 7.4378e-08]], device='cuda:0')" + }, + "9": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-0.0053, -0.0004, 0.0025, ..., -0.0021, -0.0003, -0.0012],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.1416e-04, 8.6485e-05, 1.8180e-04, ..., 1.0636e-04, 1.1776e-04,\n 1.0683e-04], device='cuda:0')" + }, + "10": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 3.6594e-07, 3.4075e-06, 1.4040e-05, ..., 2.1579e-05,\n -4.8809e-06, 4.8804e-06],\n [ 7.7297e-07, 6.2212e-06, -1.3209e-07, ..., 5.9609e-06,\n 2.1853e-05, -2.0799e-05],\n [-3.7169e-06, -7.0003e-06, -1.2425e-05, ..., -2.7103e-06,\n -5.5974e-06, 1.3382e-05],\n ...,\n [-5.6875e-06, 6.8317e-07, 7.6243e-06, ..., 1.9538e-05,\n -8.0045e-05, -5.8038e-06],\n [-3.9773e-06, 8.0043e-07, 2.1711e-05, ..., -2.0914e-05,\n -2.0182e-05, -5.9941e-06],\n [ 5.5623e-06, -5.8575e-07, -4.6670e-06, ..., 3.2872e-05,\n 8.0582e-06, -1.1763e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0703e-09, 1.1068e-09, 4.0675e-09, ..., 2.3427e-09, 1.8595e-09,\n 2.9425e-09],\n [1.2285e-09, 2.6060e-09, 3.5506e-09, ..., 5.9431e-09, 3.7249e-09,\n 9.3998e-09],\n [1.5821e-09, 1.9568e-09, 4.0242e-09, ..., 3.9370e-09, 5.1451e-09,\n 1.3709e-08],\n ...,\n [1.1694e-09, 2.1067e-09, 9.7023e-09, ..., 5.9493e-09, 7.7592e-09,\n 1.4660e-08],\n [1.4022e-09, 2.7464e-09, 8.9993e-09, ..., 4.3452e-09, 6.5215e-09,\n 7.5893e-09],\n [3.1905e-09, 2.2732e-09, 5.3864e-09, ..., 4.9093e-09, 5.2800e-09,\n 6.9543e-09]], device='cuda:0')" + }, + "11": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.4466e-04, 3.3776e-05, 1.4526e-05, ..., -1.6792e-05,\n 1.6994e-06, -4.0528e-05],\n [ 4.9451e-06, 4.2561e-06, -9.7707e-05, ..., -4.0373e-04,\n -7.6999e-06, 3.2616e-05],\n [ 2.9790e-05, -2.0413e-05, 4.3289e-06, ..., 2.0201e-05,\n 8.3653e-06, 2.8919e-05],\n ...,\n [ 5.6052e-45, 5.6052e-45, 0.0000e+00, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-6.4426e-06, 1.1309e-04, 1.4193e-06, ..., 2.9966e-05,\n 2.4559e-05, -1.2922e-04],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.0446e-07, 1.3593e-07, 6.4680e-08, ..., 3.4491e-08, 1.3946e-08,\n 3.6817e-08],\n [1.6021e-09, 1.9585e-08, 2.5993e-07, ..., 1.4630e-06, 7.8401e-09,\n 2.0891e-08],\n [2.0659e-07, 3.0022e-07, 9.3825e-09, ..., 2.0093e-08, 2.0115e-07,\n 6.0559e-08],\n ...,\n [7.1778e-17, 4.3796e-20, 0.0000e+00, ..., 5.3102e-17, 1.5332e-21,\n 1.4793e-17],\n [6.0463e-08, 2.1588e-06, 1.8495e-08, ..., 6.3987e-08, 8.0948e-08,\n 1.0711e-07],\n [3.6486e-15, 2.9544e-15, 3.3737e-16, ..., 8.4125e-14, 5.2729e-15,\n 4.5633e-14]], device='cuda:0')" + }, + "12": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 3.6191e-03, -4.2981e-03, 3.7659e-03, ..., 5.6052e-45,\n -2.7776e-03, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.3870e-04, 3.3148e-04, 3.8900e-04, ..., 1.2460e-13, 3.4574e-04,\n 9.7985e-11], device='cuda:0')" + }, + "13": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.5711e-05, -2.4878e-07, 8.3848e-06, ..., 5.6052e-45,\n 6.5913e-06, -5.6052e-45],\n [ 2.2018e-05, -5.3922e-06, 9.8501e-06, ..., -5.6052e-45,\n 2.5939e-05, 5.6052e-45],\n [-8.8973e-06, -3.1305e-06, 1.2453e-05, ..., -5.6052e-45,\n -1.2431e-05, 5.6052e-45],\n ...,\n [ 1.4037e-05, 2.3886e-06, 1.0850e-05, ..., 5.6052e-45,\n 1.2603e-06, 5.6052e-45],\n [-1.0570e-05, -1.3528e-06, -6.6395e-06, ..., 5.6052e-45,\n -1.9774e-05, 5.6052e-45],\n [ 3.5292e-05, -4.4219e-06, -1.6989e-05, ..., 5.6052e-45,\n 7.7919e-06, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.5746e-09, 4.5356e-10, 1.8610e-09, ..., 5.3496e-18, 9.6945e-10,\n 1.0108e-15],\n [3.5806e-09, 9.7198e-10, 3.4649e-09, ..., 4.4327e-18, 1.2689e-09,\n 1.2006e-15],\n [3.4079e-09, 2.8709e-09, 3.0984e-09, ..., 3.2865e-18, 1.7144e-09,\n 1.6905e-15],\n ...,\n [3.1932e-09, 1.0491e-09, 3.1862e-09, ..., 3.1285e-19, 1.2729e-09,\n 1.1700e-15],\n [2.8781e-09, 1.2536e-09, 3.3328e-09, ..., 7.3675e-19, 1.2820e-09,\n 2.8269e-15],\n [3.3842e-09, 6.9551e-10, 3.2171e-09, ..., 2.2086e-20, 1.2353e-09,\n 5.9147e-15]], device='cuda:0')" + }, + "14": { + "step": "tensor(11268.)", + "exp_avg": "tensor(-0.0007, device='cuda:0')", + "exp_avg_sq": "tensor(1.2044e-05, device='cuda:0')" + }, + "15": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-8.9226e-11, 2.1522e-09, 3.6454e-10, ..., -1.0754e-10,\n -8.5746e-10, -2.7812e-11],\n [ 5.9135e-10, -1.0352e-10, 1.8356e-09, ..., 6.7308e-11,\n -4.7717e-10, 1.0061e-10],\n [ 1.1673e-09, 1.3810e-08, 2.1887e-09, ..., 1.2716e-09,\n 2.2559e-11, 5.9305e-11],\n ...,\n [-5.7835e-10, -4.4085e-09, 8.0287e-10, ..., 4.6155e-10,\n -4.1990e-10, 1.7873e-10],\n [-1.0454e-09, -4.3036e-09, -9.2974e-10, ..., 4.9743e-11,\n -5.0557e-10, -1.4900e-10],\n [-3.5341e-09, -2.5066e-08, -2.0747e-09, ..., -1.1294e-09,\n 6.0790e-10, -8.8939e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9600e-16, 9.0683e-17, 2.9448e-16, ..., 6.1778e-17, 5.9064e-17,\n 1.3559e-16],\n [5.5976e-17, 5.1370e-17, 9.3359e-17, ..., 2.4562e-17, 2.2414e-17,\n 4.5235e-17],\n [2.0968e-16, 4.5976e-16, 4.4203e-16, ..., 3.6141e-16, 2.4962e-16,\n 3.0034e-16],\n ...,\n [7.3841e-17, 5.6092e-17, 6.3683e-17, ..., 7.3018e-17, 5.3695e-17,\n 3.4220e-17],\n [2.8565e-16, 7.4844e-17, 2.3404e-16, ..., 5.5809e-17, 5.0528e-17,\n 9.0165e-17],\n [2.0907e-15, 1.3547e-15, 1.9896e-15, ..., 1.0906e-15, 6.7687e-16,\n 1.0710e-15]], device='cuda:0')" + }, + "16": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 3.2132e-08, 2.6112e-08, 4.2058e-07, 1.1746e-07, 9.7456e-08,\n 1.3688e-07, 6.9743e-08, 1.2884e-07, 2.1761e-06, 8.1220e-08,\n 7.8583e-08, -7.2763e-08, 4.8075e-08, -5.2520e-07, 2.2836e-07,\n 6.5425e-08, 1.2156e-07, -1.6487e-07, -1.4995e-07, 6.2826e-08,\n 1.4438e-07, -1.6066e-07, 7.0488e-09, -1.6754e-07, -4.1956e-07,\n -4.6073e-07, 2.7846e-08, -7.7395e-08, -3.5427e-07, -5.2901e-08,\n -2.0446e-07, -1.8618e-07, -5.1898e-08, 2.7430e-07, -5.8291e-07,\n 5.1333e-08, 1.4671e-07, 1.3932e-07, -1.3644e-08, 6.9298e-08,\n 1.7888e-07, 1.3412e-07, 3.1098e-08, -4.0435e-09, 1.4007e-06,\n 2.1409e-07, -1.6006e-06, -2.0438e-07, 4.4557e-08, -1.2734e-06,\n 2.3081e-07, -2.7563e-07, 3.2015e-07, 1.1064e-07, 1.0045e-07,\n -1.8683e-07, 1.3717e-07, -5.4533e-07, 1.6633e-06, 8.9665e-08,\n -7.8867e-07, -6.7498e-08, -1.0891e-07, -7.0696e-07], device='cuda:0')", + "exp_avg_sq": "tensor([9.5571e-13, 2.0319e-13, 9.7161e-13, 8.3739e-14, 2.4294e-13, 3.2306e-13,\n 2.0689e-12, 1.5185e-11, 1.9124e-11, 6.9448e-14, 4.9626e-13, 1.3705e-13,\n 9.2770e-14, 1.9147e-11, 4.6981e-13, 5.8654e-14, 3.7215e-13, 4.4040e-13,\n 2.5916e-13, 2.0825e-13, 2.4715e-12, 7.3785e-13, 5.9507e-13, 1.1317e-12,\n 2.7818e-12, 1.1071e-11, 1.4159e-13, 1.9935e-13, 1.0127e-11, 1.1920e-13,\n 9.1698e-13, 2.8435e-12, 3.6314e-13, 6.8495e-12, 1.4536e-11, 1.1142e-12,\n 1.7364e-11, 3.7527e-12, 6.5653e-13, 4.7598e-12, 1.2246e-13, 1.0956e-13,\n 2.6750e-12, 1.9745e-12, 8.6939e-12, 5.9341e-13, 1.2402e-11, 3.0028e-13,\n 9.7906e-13, 9.6640e-12, 2.9654e-11, 3.0964e-12, 9.8479e-13, 1.1214e-13,\n 8.3856e-14, 1.1824e-12, 4.0629e-13, 1.0603e-11, 1.3857e-11, 2.7280e-12,\n 5.1949e-11, 2.5202e-13, 6.7267e-13, 6.1961e-12], device='cuda:0')" + }, + "17": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 3.0797e-10, 1.1932e-09, 2.8893e-08, -1.2612e-09, 1.5263e-09,\n -2.7739e-10, -5.7574e-10, 8.0192e-09, 5.2081e-07, 6.6080e-10,\n -3.7483e-10, 2.3686e-09, -3.8692e-10, 6.1724e-08, 4.4320e-09,\n 4.2013e-10, 1.0511e-10, 7.4412e-09, 1.7042e-09, 5.6507e-10,\n -2.3290e-09, 1.6134e-10, 4.5185e-10, 1.3778e-09, -3.5318e-09,\n 3.1078e-08, 1.3034e-09, 4.8423e-09, 5.6007e-09, 3.7647e-10,\n 1.5718e-09, 1.1757e-08, 1.0382e-09, -8.8661e-09, 1.0689e-09,\n -4.3156e-12, 1.7430e-07, -2.6781e-09, 2.7883e-09, -2.1992e-09,\n -9.7376e-10, 2.0991e-09, -2.8336e-10, -5.9796e-10, 4.3436e-07,\n -2.8923e-09, -3.3670e-08, 1.1252e-09, -1.6946e-11, 4.0196e-08,\n -6.8514e-09, 1.0392e-09, 1.8290e-09, 5.3721e-09, 4.6612e-09,\n 1.8151e-09, 8.4963e-10, 6.3929e-10, 6.3513e-07, -2.3531e-09,\n 5.3507e-09, 1.7874e-09, 9.2299e-10, -1.0730e-08], device='cuda:0')", + "exp_avg_sq": "tensor([8.3077e-16, 2.2449e-17, 8.0289e-15, 6.8738e-17, 1.0068e-15, 4.7042e-16,\n 2.9101e-15, 4.8841e-15, 1.9126e-12, 2.2911e-17, 1.5968e-16, 5.1928e-17,\n 1.8886e-17, 9.7663e-13, 3.7090e-15, 6.2781e-18, 5.7118e-16, 1.0146e-15,\n 2.5821e-16, 1.3428e-17, 1.1494e-15, 9.8067e-16, 3.2155e-16, 2.1453e-15,\n 3.8353e-15, 6.7256e-13, 4.1231e-17, 5.4212e-16, 1.3364e-14, 3.5683e-17,\n 5.8647e-16, 8.3840e-14, 6.3972e-17, 2.5010e-15, 2.6890e-14, 9.8921e-16,\n 1.7837e-12, 1.8706e-15, 1.1294e-16, 7.2757e-15, 8.2424e-18, 8.9344e-17,\n 2.6982e-15, 2.3855e-15, 1.8559e-12, 2.6093e-15, 3.1749e-13, 3.3253e-16,\n 8.9221e-16, 2.2367e-12, 3.0435e-14, 4.7635e-15, 1.7290e-15, 7.0689e-16,\n 2.4939e-16, 2.5032e-15, 4.6053e-16, 1.5127e-14, 8.1248e-13, 1.5360e-15,\n 9.8002e-14, 3.2292e-16, 5.7950e-16, 2.6218e-14], device='cuda:0')" + }, + "18": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-6.8079e-10, -3.1203e-09, 6.4882e-08, 2.4137e-09, -2.8598e-09,\n 1.4749e-10, 1.5239e-09, -7.3812e-09, 2.3327e-07, -1.4172e-09,\n 1.2067e-09, -7.4631e-09, 9.3327e-10, 3.3954e-08, 3.4812e-08,\n -1.1400e-09, -4.3012e-10, -1.8055e-08, -8.3421e-09, -1.1785e-09,\n 4.9209e-09, -1.4779e-09, -1.2396e-09, -8.1390e-09, -1.9828e-08,\n 1.9422e-08, -2.5342e-09, -1.2512e-08, -2.3933e-08, -1.4032e-09,\n -1.1366e-08, 1.6654e-08, -3.7071e-09, 2.1274e-08, -3.6107e-08,\n 6.5407e-12, 7.4159e-08, 5.5189e-09, -6.1587e-09, 7.9714e-09,\n 1.2141e-09, -2.9246e-09, 6.4547e-10, 2.5734e-09, 1.8891e-07,\n 2.8909e-09, -1.0943e-08, -1.1106e-08, 4.2701e-11, 1.2285e-08,\n 1.7285e-08, -1.4284e-08, 4.2772e-08, -7.4441e-09, -6.2414e-09,\n -1.1360e-08, -1.6462e-09, -3.5007e-08, 2.3112e-07, 6.0912e-09,\n -6.0331e-08, -6.6285e-09, -4.6469e-09, -3.7543e-08], device='cuda:0')", + "exp_avg_sq": "tensor([4.4852e-16, 2.4281e-16, 4.2878e-14, 6.7943e-16, 1.3574e-15, 1.1209e-15,\n 1.7431e-15, 2.4512e-14, 3.2622e-13, 1.8635e-16, 1.4895e-15, 5.5491e-16,\n 1.2735e-16, 2.1214e-13, 2.0720e-14, 5.7826e-17, 1.2022e-15, 2.8039e-15,\n 8.9439e-16, 6.6136e-17, 3.5515e-15, 2.2462e-15, 4.0316e-16, 3.3310e-15,\n 7.0735e-15, 2.4203e-13, 1.7968e-16, 1.1193e-15, 3.4151e-14, 3.6203e-16,\n 3.1850e-15, 1.1199e-13, 1.5238e-16, 1.4705e-14, 4.5256e-14, 8.1421e-16,\n 3.8389e-13, 5.9224e-15, 8.0885e-16, 5.2041e-15, 3.5147e-17, 3.0244e-16,\n 2.9264e-15, 1.6438e-15, 3.0544e-13, 2.1311e-15, 3.9275e-14, 1.2077e-15,\n 7.7497e-16, 2.7947e-13, 4.6654e-14, 7.5518e-15, 3.1401e-14, 1.0484e-15,\n 3.9511e-16, 3.7293e-15, 1.3519e-15, 3.3254e-14, 1.0977e-13, 3.6853e-15,\n 1.4761e-13, 8.1914e-16, 1.9314e-15, 2.3399e-14], device='cuda:0')" + }, + "19": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-1.3049e-07, -1.1250e-07, 2.8857e-07, -1.2879e-07, -1.3519e-07,\n -1.4168e-07, -1.2911e-07, -1.1484e-07, 2.2284e-06, -1.4212e-07,\n -2.0462e-08, -9.9650e-08, -1.1482e-07, 2.5657e-07, 1.1390e-07,\n -1.2876e-07, -1.2396e-07, -1.2072e-07, -7.5673e-08, -1.3344e-07,\n -1.3331e-07, -1.2687e-08, -1.0961e-07, -6.8413e-08, 6.4067e-08,\n 1.9403e-07, -1.3423e-07, -1.3283e-07, -7.7406e-08, -1.9482e-08,\n -4.6080e-08, 1.1497e-07, -1.0016e-07, -1.1223e-07, -1.5899e-08,\n -1.3836e-07, 7.5859e-07, -1.2746e-07, -1.2954e-07, -1.0049e-07,\n -1.2668e-07, -1.3336e-07, -2.5334e-08, -7.5334e-08, 2.2610e-06,\n -1.3028e-07, -1.1073e-07, -4.9455e-08, -1.1737e-07, 1.8151e-07,\n -1.0897e-07, -4.0111e-08, 2.8223e-08, -1.4329e-07, -1.3139e-07,\n -6.2231e-08, -1.2594e-07, -9.6589e-09, 2.1174e-06, -1.1706e-07,\n -5.2853e-08, -1.0198e-07, -6.6453e-08, 1.3157e-07],\n [ 1.3044e-07, 1.1237e-07, -2.8829e-07, 1.2866e-07, 1.3510e-07,\n 1.4154e-07, 1.2907e-07, 1.1480e-07, -2.2270e-06, 1.4204e-07,\n 2.0394e-08, 9.9559e-08, 1.1476e-07, -2.5525e-07, -1.1368e-07,\n 1.2873e-07, 1.2385e-07, 1.2064e-07, 7.5590e-08, 1.3334e-07,\n 1.3323e-07, 1.2618e-08, 1.0956e-07, 6.8412e-08, -6.4101e-08,\n -1.9246e-07, 1.3418e-07, 1.3272e-07, 7.7367e-08, 1.9426e-08,\n 4.5990e-08, -1.1442e-07, 1.0007e-07, 1.1216e-07, 1.5882e-08,\n 1.3823e-07, -7.5691e-07, 1.2742e-07, 1.2946e-07, 1.0045e-07,\n 1.2664e-07, 1.3325e-07, 2.5279e-08, 7.5280e-08, -2.2595e-06,\n 1.3028e-07, 1.1355e-07, 4.9327e-08, 1.1728e-07, -1.7944e-07,\n 1.0893e-07, 4.0077e-08, -2.8140e-08, 1.4323e-07, 1.3130e-07,\n 6.2156e-08, 1.2584e-07, 9.6616e-09, -2.1152e-06, 1.1696e-07,\n 5.2891e-08, 1.0194e-07, 6.6385e-08, -1.3156e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1531e-12, 8.5500e-14, 3.7259e-13, 1.0170e-13, 8.2299e-14, 8.3307e-14,\n 1.9310e-12, 1.2581e-13, 1.3822e-11, 8.2681e-14, 7.9507e-14, 7.4762e-14,\n 9.6725e-14, 1.1119e-11, 3.6250e-13, 8.3991e-14, 5.0275e-13, 9.2017e-14,\n 5.2718e-13, 7.0276e-13, 1.9836e-13, 3.9733e-13, 7.2715e-13, 8.5831e-13,\n 5.8212e-13, 1.0217e-11, 5.0048e-13, 9.0236e-14, 9.4957e-13, 1.3472e-13,\n 9.8515e-14, 2.5257e-12, 2.3342e-12, 1.0536e-13, 1.4062e-12, 1.1854e-12,\n 1.3167e-11, 2.1712e-13, 7.6875e-14, 1.7154e-12, 1.5703e-13, 1.1203e-13,\n 8.6008e-13, 1.5657e-12, 1.5349e-11, 9.7112e-14, 3.4790e-12, 4.5064e-13,\n 1.0662e-12, 1.4655e-11, 8.1494e-13, 8.0510e-13, 8.0881e-14, 1.0604e-13,\n 1.1480e-13, 1.4781e-12, 1.0700e-13, 1.0753e-12, 6.5249e-12, 2.8749e-13,\n 1.6520e-12, 8.8677e-13, 1.0231e-13, 4.0868e-12],\n [2.1531e-12, 8.5498e-14, 3.7259e-13, 1.0170e-13, 8.2294e-14, 8.3304e-14,\n 1.9310e-12, 1.2581e-13, 1.3822e-11, 8.2676e-14, 7.9503e-14, 7.4758e-14,\n 9.6723e-14, 1.1119e-11, 3.6249e-13, 8.3990e-14, 5.0274e-13, 9.2014e-14,\n 5.2718e-13, 7.0276e-13, 1.9836e-13, 3.9733e-13, 7.2715e-13, 8.5831e-13,\n 5.8212e-13, 1.0216e-11, 5.0048e-13, 9.0234e-14, 9.4957e-13, 1.3471e-13,\n 9.8515e-14, 2.5257e-12, 2.3342e-12, 1.0536e-13, 1.4062e-12, 1.1854e-12,\n 1.3166e-11, 2.1711e-13, 7.6869e-14, 1.7154e-12, 1.5703e-13, 1.1203e-13,\n 8.6007e-13, 1.5657e-12, 1.5348e-11, 9.7108e-14, 3.4780e-12, 4.5063e-13,\n 1.0662e-12, 1.4654e-11, 8.1494e-13, 8.0510e-13, 8.0881e-14, 1.0604e-13,\n 1.1479e-13, 1.4781e-12, 1.0700e-13, 1.0753e-12, 6.5240e-12, 2.8749e-13,\n 1.6520e-12, 8.8677e-13, 1.0231e-13, 4.0868e-12]], device='cuda:0')" + }, + "20": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 7.5113e-07, -7.5058e-07], device='cuda:0')", + "exp_avg_sq": "tensor([8.6350e-12, 8.6348e-12], device='cuda:0')" + }, + "21": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-1.4937e-12, -9.3306e-12, -1.2820e-12, ..., 5.4941e-12,\n -2.2011e-11, 6.6938e-12],\n [-1.5869e-12, -1.0731e-11, 1.4214e-12, ..., 3.2038e-12,\n -2.2345e-11, 2.4835e-11],\n [ 1.4109e-11, 2.3122e-11, 7.1240e-11, ..., -4.8743e-11,\n 1.0910e-10, -2.3038e-10],\n ...,\n [ 2.7266e-12, -2.8022e-12, 4.5875e-12, ..., 3.0198e-12,\n 2.5916e-12, 2.3355e-11],\n [ 1.1739e-11, 3.8071e-11, 1.4239e-10, ..., 1.3533e-10,\n 1.1976e-10, 4.7461e-10],\n [ 3.2001e-12, -1.1270e-12, -1.1600e-11, ..., 1.0005e-12,\n -3.0328e-12, 6.5593e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.5771e-19, 5.6323e-19, 1.6802e-19, ..., 3.6432e-17, 9.7150e-20,\n 7.8240e-19],\n [1.7288e-18, 7.8123e-19, 5.9162e-19, ..., 4.3007e-17, 1.5363e-19,\n 5.6823e-19],\n [8.7309e-18, 2.7540e-18, 7.0989e-19, ..., 1.3348e-18, 7.7566e-19,\n 3.1840e-18],\n ...,\n [6.7447e-20, 2.0565e-20, 1.3160e-20, ..., 3.3647e-20, 1.6730e-20,\n 2.4792e-20],\n [3.7960e-18, 4.1626e-18, 1.0576e-18, ..., 5.0244e-17, 1.4905e-18,\n 1.5634e-17],\n [6.6109e-20, 4.3137e-20, 7.5338e-21, ..., 1.6854e-19, 1.2923e-20,\n 1.6982e-20]], device='cuda:0')" + }, + "22": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-5.4079e-10, 1.1434e-09, -1.1479e-09, 1.3759e-09, 2.8258e-10,\n 6.8102e-10, 7.0025e-10, -8.5146e-10, -1.9307e-11, 3.5236e-10,\n -9.6917e-11, 1.5636e-09, 1.8412e-09, -4.0990e-10, -1.2206e-09,\n 1.1855e-09, 1.4463e-09, 9.1900e-10, 8.9359e-10, -5.0312e-09,\n 5.6834e-10, 2.1468e-10, -8.4452e-10, 7.4329e-10, 1.7041e-09,\n 9.7491e-10, 2.7158e-10, -8.1122e-10, -1.2249e-09, -1.9893e-09,\n 3.3288e-10, 6.2718e-09, 6.5266e-10, 4.3438e-10, 3.2378e-10,\n -5.7971e-09, 8.7579e-10, 7.7782e-10, 7.7830e-10, 1.4993e-10,\n 5.8822e-10, -1.8965e-10, 7.2318e-10, -2.1526e-10, 3.6035e-11,\n -2.0592e-11, -3.0540e-09, -1.9302e-09, -1.5702e-10, 5.6345e-10,\n 3.6614e-10, -2.9661e-10, -4.4644e-10, -6.2098e-10, 8.5374e-10,\n -1.6108e-09, 2.3215e-10, -8.1293e-10, -1.4293e-08, 5.3084e-09,\n -1.7855e-09, 8.9635e-10, 7.6351e-09, 7.5589e-10], device='cuda:0')", + "exp_avg_sq": "tensor([1.0940e-14, 1.1226e-14, 1.5500e-14, 1.1146e-15, 4.7065e-15, 1.1774e-16,\n 9.1057e-16, 5.8305e-15, 1.6932e-14, 3.2497e-16, 1.6484e-16, 3.6404e-14,\n 1.1962e-14, 1.8293e-14, 3.6730e-15, 7.4845e-15, 2.0625e-14, 1.5751e-16,\n 3.0085e-15, 5.8873e-15, 7.8893e-15, 1.5862e-16, 2.8374e-15, 4.0620e-16,\n 1.7089e-14, 1.8308e-14, 3.8406e-16, 2.0405e-16, 1.1235e-14, 1.0643e-15,\n 1.0221e-14, 8.8356e-15, 3.6685e-15, 6.8377e-14, 1.5010e-16, 3.1527e-14,\n 3.2372e-15, 1.0039e-16, 2.5999e-14, 4.9044e-16, 1.1900e-16, 3.4453e-16,\n 1.4180e-16, 7.9408e-15, 8.9346e-17, 1.5432e-15, 8.4569e-15, 2.2916e-14,\n 2.7128e-16, 9.6282e-15, 6.1089e-16, 1.7337e-14, 4.2314e-16, 4.4067e-14,\n 6.3774e-15, 1.6892e-16, 2.1284e-16, 3.5512e-15, 1.3393e-14, 8.5367e-15,\n 7.4660e-15, 1.1641e-16, 2.7225e-14, 1.7094e-16], device='cuda:0')" + }, + "23": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 4.7344e-12, -1.8094e-11, -9.5870e-10, -8.2460e-12, -6.6109e-12,\n -2.5230e-12, 2.4908e-13, 1.1305e-11, -7.3142e-12, -6.1356e-15,\n 4.0816e-12, -1.2778e-11, -6.1319e-10, -3.9902e-12, 1.1015e-11,\n -1.3645e-11, -3.8765e-10, -1.9698e-11, -6.1660e-12, -4.7269e-10,\n -1.3241e-11, 1.9895e-12, 6.0891e-12, -2.6490e-11, 3.4704e-13,\n 2.7046e-13, -2.3336e-11, -7.5849e-13, -9.5981e-10, 2.1308e-11,\n -1.0611e-11, -2.6572e-10, -5.9357e-12, -6.0643e-11, 7.2963e-13,\n -5.8661e-10, -3.9549e-13, -3.7452e-14, -1.8677e-11, 3.5666e-12,\n -3.5483e-12, 2.9898e-12, -1.8836e-12, -6.3705e-11, 3.4009e-13,\n -2.4603e-12, -1.0142e-09, -6.9283e-13, -2.5277e-12, -8.5289e-11,\n -6.2823e-13, -3.5849e-12, 5.7719e-12, -2.6301e-10, 3.8805e-13,\n 1.2793e-11, 4.9333e-12, 3.9679e-13, -1.8573e-09, 3.1496e-11,\n -8.3425e-10, -1.2840e-11, 6.3873e-10, -6.3784e-12], device='cuda:0')", + "exp_avg_sq": "tensor([3.9599e-17, 2.7663e-17, 2.7630e-16, 8.5985e-19, 6.8701e-18, 1.1801e-20,\n 2.9210e-19, 1.5864e-17, 5.4678e-17, 5.7349e-21, 3.4022e-19, 9.1160e-17,\n 2.3874e-16, 5.7833e-17, 7.6694e-18, 4.3780e-18, 3.5419e-16, 7.5530e-20,\n 1.8106e-17, 1.1674e-16, 1.9276e-17, 2.3904e-19, 3.6981e-18, 1.1873e-19,\n 3.1390e-17, 1.7086e-18, 2.3055e-19, 4.2941e-21, 1.0046e-16, 7.3507e-19,\n 1.4603e-17, 1.5403e-16, 4.4731e-18, 5.9566e-17, 6.7766e-21, 3.3533e-17,\n 1.9822e-18, 3.9326e-20, 9.2010e-17, 3.1319e-19, 4.7892e-21, 8.5344e-20,\n 3.4082e-20, 4.8820e-18, 3.1921e-20, 1.6344e-18, 2.1719e-16, 6.0771e-17,\n 3.4943e-20, 5.0999e-18, 9.6291e-19, 4.0812e-17, 7.3379e-19, 9.4941e-17,\n 6.8496e-18, 6.3467e-20, 6.6997e-19, 2.2539e-17, 6.7764e-17, 2.5548e-16,\n 2.3026e-16, 2.0424e-20, 1.7817e-16, 1.1236e-19], device='cuda:0')" + }, + "24": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-3.9046e-12, 4.7945e-11, -3.8784e-10, 3.9906e-11, 9.4159e-12,\n 8.4363e-12, -7.7876e-13, -6.8494e-12, 1.2624e-11, 7.0978e-13,\n 1.5401e-11, 4.7165e-11, -2.9573e-10, -1.9455e-12, -1.3788e-11,\n -4.5923e-11, -2.6810e-10, 3.2761e-11, 2.1420e-11, -4.7156e-10,\n 2.6023e-11, -7.5735e-12, -1.6329e-11, -7.6522e-11, 1.2174e-11,\n 1.4042e-12, 2.7925e-11, -1.8034e-11, -4.5134e-10, -7.6290e-11,\n 1.9312e-11, -1.2971e-10, 2.6792e-11, -9.4110e-11, -1.0790e-11,\n -5.7407e-10, -5.0345e-12, 5.3417e-13, 3.0523e-11, -8.6920e-12,\n 8.3362e-12, 1.0262e-11, 6.0084e-12, -1.3556e-10, -1.4255e-12,\n 8.5128e-12, -4.8558e-10, -8.3782e-11, -6.7396e-12, -1.5592e-10,\n 4.6962e-12, 6.1960e-12, -1.0674e-11, -2.4672e-10, 4.9058e-12,\n -2.9266e-11, -3.2731e-11, -1.6868e-11, -9.8456e-10, -2.3855e-13,\n -4.2662e-10, 2.2514e-11, 2.4324e-10, 2.3463e-11], device='cuda:0')", + "exp_avg_sq": "tensor([5.5146e-17, 6.2555e-17, 6.9163e-17, 1.3614e-18, 1.2745e-17, 2.3206e-20,\n 4.1729e-19, 5.6391e-17, 9.7742e-17, 1.4906e-19, 2.7720e-18, 1.0349e-16,\n 5.1487e-17, 4.2925e-17, 9.8055e-18, 7.1591e-18, 1.0996e-16, 6.0926e-19,\n 9.8281e-18, 4.5872e-17, 2.7101e-17, 2.4723e-19, 1.4147e-17, 1.8518e-18,\n 2.4778e-17, 1.2073e-17, 1.6152e-18, 8.4428e-20, 3.7564e-17, 1.0420e-18,\n 4.0896e-17, 3.0585e-17, 2.2606e-17, 9.5547e-17, 4.3056e-20, 7.1039e-17,\n 2.2435e-18, 4.5907e-20, 6.8879e-17, 1.7383e-18, 2.4391e-20, 2.1939e-19,\n 8.3361e-20, 1.7028e-17, 3.2735e-20, 6.1189e-18, 4.9033e-17, 8.8587e-17,\n 1.4801e-19, 1.1941e-17, 1.2049e-18, 3.6030e-17, 2.9446e-18, 6.4166e-17,\n 5.8459e-18, 6.0809e-19, 2.8823e-18, 5.3716e-17, 3.5066e-17, 3.7727e-17,\n 4.9275e-17, 1.4525e-19, 6.9851e-17, 1.4228e-18], device='cuda:0')" + }, + "25": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-1.9020e-10, -7.9209e-11, 3.0429e-09, -1.8249e-10, -1.0900e-10,\n 2.4309e-12, -5.5517e-11, -2.2327e-10, -1.5152e-10, -1.2519e-10,\n -3.5970e-10, -1.3463e-10, 2.1343e-09, -1.3644e-10, -2.0571e-10,\n 2.2623e-10, 1.2243e-09, -1.8414e-10, -1.4495e-10, 1.3361e-09,\n -2.4341e-10, -2.3092e-10, 9.0411e-11, 3.7831e-10, 2.0187e-11,\n -3.2756e-11, -1.1366e-10, -3.0022e-10, 3.4850e-09, -2.7003e-10,\n -1.9447e-10, 8.8873e-10, -1.9038e-10, 2.6784e-10, 7.9533e-12,\n 2.1445e-09, 4.1860e-11, -8.9858e-11, -2.2245e-10, -1.4084e-10,\n -1.8348e-10, -3.4866e-10, -1.3219e-10, 3.8650e-10, -1.3274e-10,\n -2.0504e-10, 3.5857e-09, -3.2813e-10, -2.6609e-10, 9.9231e-10,\n 6.1696e-11, -1.6531e-10, -2.1788e-10, 1.4118e-09, -7.9675e-11,\n -7.7106e-11, -1.1727e-10, -3.2602e-11, 4.6482e-09, -6.1676e-11,\n 2.8563e-09, -1.9272e-10, -1.7233e-09, -1.3244e-10],\n [ 1.8893e-10, 7.7176e-11, -3.1412e-09, 1.8277e-10, 9.6974e-11,\n -8.8702e-12, 4.9061e-11, 2.2059e-10, 1.3673e-10, 1.2165e-10,\n 3.5575e-10, 1.2754e-10, -1.9722e-09, 1.3892e-10, 2.0942e-10,\n -2.3838e-10, -1.1143e-09, 1.7840e-10, 1.3768e-10, -1.2735e-09,\n 2.3768e-10, 2.2109e-10, -9.1364e-11, -3.8460e-10, -7.1486e-12,\n 2.7464e-11, 1.1795e-10, 2.9660e-10, -3.2824e-09, 2.6751e-10,\n 1.9573e-10, -8.3978e-10, 1.8767e-10, -2.3194e-10, -1.3240e-11,\n -2.1507e-09, -4.3024e-11, 8.4235e-11, 2.1949e-10, 1.2613e-10,\n 1.7869e-10, 3.4432e-10, 1.3050e-10, -3.8340e-10, 1.3835e-10,\n 2.0129e-10, -3.5286e-09, 3.2691e-10, 2.5793e-10, -9.9224e-10,\n -6.7410e-11, 1.5653e-10, 2.0696e-10, -1.3881e-09, 8.7915e-11,\n 7.1222e-11, 1.1147e-10, 2.8196e-11, -4.4566e-09, 1.9948e-10,\n -2.8665e-09, 1.9263e-10, 1.7609e-09, 1.2127e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.7528e-16, 2.7794e-16, 2.8021e-15, 3.4087e-17, 9.0579e-16, 3.9372e-17,\n 4.9023e-16, 1.6000e-16, 2.8102e-16, 2.2045e-17, 3.9699e-17, 9.4402e-16,\n 2.6899e-15, 2.3803e-15, 1.2680e-15, 8.8317e-16, 3.4711e-15, 4.1851e-17,\n 4.4917e-15, 1.2484e-15, 8.4438e-16, 4.9356e-17, 1.9396e-16, 2.8033e-17,\n 2.3987e-15, 7.9078e-17, 2.8559e-17, 4.4655e-17, 1.2271e-15, 3.7679e-17,\n 2.7467e-16, 1.7475e-15, 7.7443e-17, 1.0873e-15, 5.0510e-17, 4.9268e-16,\n 7.1792e-16, 4.0375e-17, 2.4241e-15, 3.3200e-17, 5.2780e-17, 3.7510e-17,\n 1.1810e-15, 1.8587e-16, 4.6415e-17, 2.5753e-16, 2.3767e-15, 4.7184e-16,\n 3.9195e-17, 5.8080e-16, 1.6504e-15, 2.1845e-15, 9.6192e-17, 3.3213e-15,\n 1.7934e-15, 3.4813e-17, 3.5392e-17, 2.1912e-16, 4.3540e-16, 1.5680e-15,\n 2.3728e-15, 4.3353e-17, 1.1936e-15, 3.9369e-17],\n [6.7528e-16, 2.7796e-16, 2.8025e-15, 3.4095e-17, 9.0579e-16, 3.9382e-17,\n 4.9024e-16, 1.6001e-16, 2.8102e-16, 2.2046e-17, 3.9698e-17, 9.4402e-16,\n 2.6908e-15, 2.3803e-15, 1.2680e-15, 8.8318e-16, 3.4721e-15, 4.1862e-17,\n 4.4917e-15, 1.2481e-15, 8.4439e-16, 4.9363e-17, 1.9397e-16, 2.8027e-17,\n 2.3987e-15, 7.9083e-17, 2.8569e-17, 4.4663e-17, 1.2287e-15, 3.7695e-17,\n 2.7468e-16, 1.7500e-15, 7.7457e-17, 1.0873e-15, 5.0518e-17, 4.9278e-16,\n 7.1791e-16, 4.0389e-17, 2.4241e-15, 3.3207e-17, 5.2797e-17, 3.7519e-17,\n 1.1810e-15, 1.8596e-16, 4.6421e-17, 2.5754e-16, 2.3751e-15, 4.7185e-16,\n 3.9197e-17, 5.8083e-16, 1.6504e-15, 2.1845e-15, 9.6199e-17, 3.3213e-15,\n 1.7934e-15, 3.4827e-17, 3.5399e-17, 2.1913e-16, 4.3528e-16, 1.5681e-15,\n 2.3731e-15, 4.3369e-17, 1.1946e-15, 3.9378e-17]], device='cuda:0')" + }, + "26": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 1.1050e-09, -1.0748e-09], device='cuda:0')", + "exp_avg_sq": "tensor([2.8275e-15, 2.8280e-15], device='cuda:0')" + }, + "27": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-1.3392e-13, -2.3423e-13, -1.7069e-13, ..., -1.4470e-13,\n 6.6700e-14, -4.4748e-14],\n [ 1.1991e-14, 4.3024e-14, -4.7757e-14, ..., 9.5843e-15,\n -1.0815e-15, 2.7840e-15],\n [ 5.4183e-15, -6.4167e-14, 3.5726e-14, ..., 3.5060e-14,\n 1.6555e-14, -1.3630e-14],\n ...,\n [-2.9552e-14, 6.2897e-14, 1.2483e-13, ..., -1.0195e-14,\n 1.0999e-14, -4.4025e-15],\n [ 3.0498e-14, 1.0204e-13, -2.3116e-14, ..., -2.8654e-14,\n -2.4286e-15, 1.1372e-14],\n [ 3.6306e-14, 2.7900e-14, -8.3961e-15, ..., 1.6839e-14,\n 1.2069e-14, 2.0061e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.8582e-18, 2.3367e-18, 1.1490e-17, ..., 1.7369e-18, 1.3586e-18,\n 5.0896e-18],\n [4.0589e-18, 1.7202e-19, 1.1416e-17, ..., 9.8226e-20, 3.8264e-19,\n 3.9838e-19],\n [2.1996e-16, 2.6339e-17, 3.5284e-16, ..., 2.2731e-17, 2.7433e-17,\n 4.7496e-17],\n ...,\n [3.7493e-16, 3.0039e-17, 5.2967e-16, ..., 3.8282e-17, 3.8027e-17,\n 6.3969e-17],\n [4.7298e-18, 5.6001e-19, 6.5864e-18, ..., 5.7452e-19, 5.8040e-19,\n 1.6208e-18],\n [4.2854e-17, 4.3691e-18, 8.2413e-17, ..., 6.0125e-18, 3.5768e-18,\n 8.7650e-18]], device='cuda:0')" + }, + "28": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-1.4583e-11, 2.8232e-12, -1.8004e-12, -6.4578e-13, 3.4431e-13,\n 1.4798e-12, 2.9945e-12, 9.1927e-13, -4.3404e-12, 8.7966e-13,\n 2.1613e-12, 5.0943e-13, -9.3070e-14, -3.0481e-12, -3.2515e-12,\n -3.0786e-12, 3.2291e-13, 2.5540e-12, 4.6035e-13, 2.0259e-12,\n 9.4196e-13, 9.3663e-13, -1.4325e-12, 8.2729e-13, 1.3966e-12,\n -1.1921e-12, -1.1652e-11, 2.7740e-12, -1.8774e-12, 6.2910e-12,\n -2.5329e-12, 3.4538e-12, 2.5111e-12, -1.1012e-12, -2.4962e-13,\n -8.1373e-12, 1.0374e-12, -4.9609e-13, 5.1228e-12, 1.5141e-14,\n -3.1227e-14, 3.3262e-12, 1.8344e-12, -1.7842e-12, 5.0114e-12,\n -1.1347e-12, 2.2371e-12, 4.0570e-13, 2.3343e-12, -1.2991e-12,\n -4.1186e-12, -3.4290e-12, 1.2632e-12, 8.7990e-13, -7.3167e-13,\n -5.4596e-13, 1.4024e-12, -5.9648e-13, 4.1280e-12, -4.8501e-13,\n 1.0409e-12, 1.4741e-12, 2.1284e-12, 3.4199e-12], device='cuda:0')", + "exp_avg_sq": "tensor([3.0390e-14, 5.2319e-15, 5.0843e-13, 1.2507e-12, 3.1343e-15, 1.5498e-15,\n 5.9557e-14, 1.0056e-14, 1.2160e-12, 1.7781e-13, 4.9233e-15, 5.4533e-15,\n 2.5898e-14, 1.4355e-13, 1.3221e-12, 6.5775e-13, 3.4021e-14, 3.4059e-15,\n 2.5007e-14, 1.9616e-14, 7.0306e-14, 6.0074e-13, 1.4321e-14, 1.0525e-13,\n 4.4918e-14, 6.6374e-15, 3.5346e-12, 4.9452e-14, 7.6478e-14, 3.3415e-12,\n 2.9339e-15, 2.1384e-14, 6.2206e-13, 2.5088e-13, 8.6557e-15, 2.6356e-12,\n 5.9241e-14, 2.7504e-15, 1.1021e-16, 6.0168e-15, 2.6289e-13, 1.0869e-15,\n 9.6293e-13, 2.6316e-15, 2.8443e-15, 1.5335e-12, 1.0074e-14, 5.7443e-15,\n 1.7780e-14, 1.0095e-13, 5.7109e-13, 3.0941e-13, 1.6059e-14, 1.5653e-14,\n 2.8916e-13, 1.9737e-13, 2.6663e-14, 1.8710e-13, 3.1516e-14, 8.9552e-13,\n 1.9790e-14, 7.0387e-13, 1.1576e-14, 9.1348e-14], device='cuda:0')" + }, + "29": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-2.6939e-12, -2.0035e-14, 8.6244e-15, -2.6797e-14, -2.6716e-17,\n -1.2747e-14, 1.6059e-14, -1.4568e-14, -1.4990e-12, -5.3340e-15,\n -4.6090e-15, 5.6299e-15, -1.3655e-14, 2.7189e-14, 2.9239e-14,\n -1.0139e-16, 2.3734e-15, -4.8895e-15, -7.7386e-16, -1.6108e-13,\n -1.4240e-14, -1.7537e-14, -4.4584e-17, -2.1507e-13, -4.3233e-15,\n 1.8895e-15, -1.8693e-12, -5.4914e-15, 1.0775e-14, -9.5883e-13,\n 1.6010e-14, -4.4713e-13, -9.0485e-13, -4.9939e-13, -1.0753e-14,\n -2.1755e-12, -1.1846e-13, 2.4310e-15, -1.0572e-14, 1.4405e-15,\n 4.2944e-16, -4.0883e-14, -2.4488e-14, 1.6997e-14, -2.6242e-14,\n 4.6575e-15, -3.1010e-14, -3.5881e-15, 3.0683e-16, -2.2218e-16,\n 1.9362e-14, -8.5021e-13, -1.8183e-14, -1.4598e-12, -3.8456e-15,\n -5.5438e-16, -1.2869e-12, -9.0418e-18, -1.0200e-13, -9.6775e-13,\n -1.3140e-14, -1.6864e-14, -5.6638e-16, -8.6734e-14], device='cuda:0')", + "exp_avg_sq": "tensor([5.3237e-17, 4.2475e-19, 1.5848e-16, 1.0969e-16, 8.3161e-21, 1.2310e-20,\n 2.5780e-17, 9.6296e-19, 1.1289e-15, 1.9426e-17, 1.1629e-21, 8.7107e-19,\n 2.1177e-18, 4.9502e-18, 2.1284e-16, 3.4121e-16, 3.9969e-18, 7.2137e-21,\n 1.4024e-20, 1.5096e-17, 3.6984e-18, 1.4795e-16, 1.4495e-19, 7.5653e-16,\n 1.5443e-18, 7.4579e-19, 7.7138e-15, 3.1930e-18, 7.3834e-18, 7.2533e-15,\n 1.4869e-19, 3.7722e-17, 6.8882e-16, 1.4415e-15, 8.1661e-19, 4.1502e-15,\n 1.8406e-16, 5.4407e-20, 4.6929e-19, 3.3143e-18, 1.4119e-16, 3.2069e-19,\n 3.0752e-16, 6.5322e-19, 5.6079e-19, 6.5165e-16, 2.7968e-18, 4.6563e-19,\n 6.1025e-19, 2.6138e-17, 4.6091e-16, 4.0395e-16, 3.1534e-18, 9.3353e-17,\n 2.0029e-16, 1.2569e-16, 1.2637e-16, 9.7236e-17, 2.2941e-17, 4.2899e-15,\n 5.1640e-18, 1.2886e-16, 1.8836e-19, 4.7903e-16], device='cuda:0')" + }, + "30": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-1.3770e-12, 7.4043e-14, -2.0988e-15, 2.5724e-14, 3.3778e-17,\n 3.8014e-14, -1.4114e-13, -1.2976e-13, -7.7728e-13, 4.7564e-14,\n 1.2770e-14, -1.7786e-14, 5.5229e-14, -4.0764e-14, -1.8077e-14,\n 1.3182e-14, -4.5899e-15, 1.5966e-14, 8.6988e-15, -2.3110e-13,\n 4.0710e-14, 4.6050e-14, 1.4929e-15, -3.0918e-13, 1.8226e-15,\n -3.8355e-14, -1.0315e-12, 6.4783e-14, -1.2887e-14, -4.3024e-13,\n -2.0840e-14, -3.8129e-13, -4.9827e-13, -4.7467e-13, 2.3583e-14,\n -1.0029e-12, -2.5262e-13, 1.5332e-15, 1.2262e-13, -3.9563e-15,\n 1.8521e-14, 1.1268e-13, 7.5205e-14, -1.6391e-14, 1.3055e-13,\n -4.6501e-15, -1.6440e-14, 2.2217e-14, 5.9929e-15, -5.8502e-16,\n -1.3310e-14, -6.2105e-13, 4.9412e-14, -6.6345e-13, 5.8827e-15,\n 1.6572e-15, -5.7607e-13, 5.7117e-17, -2.0030e-13, -5.6111e-13,\n 2.8105e-14, 5.6558e-14, 4.0483e-15, -1.5710e-13], device='cuda:0')", + "exp_avg_sq": "tensor([3.7287e-17, 1.7419e-17, 4.9089e-16, 1.4024e-15, 2.2811e-19, 1.6793e-18,\n 1.3984e-17, 3.1483e-17, 2.2473e-15, 2.0106e-16, 2.7398e-19, 5.3253e-18,\n 5.8663e-17, 2.0240e-16, 1.4140e-15, 5.8724e-16, 4.1274e-17, 3.7930e-19,\n 1.0598e-18, 1.0048e-16, 1.1589e-16, 6.3685e-16, 3.9757e-19, 4.0472e-16,\n 5.3654e-19, 4.0110e-18, 5.4640e-15, 4.7061e-18, 1.1340e-16, 5.2355e-15,\n 9.3719e-18, 4.2959e-17, 1.2644e-15, 7.2934e-16, 2.6485e-17, 4.1933e-15,\n 2.4670e-16, 6.9284e-18, 6.8858e-18, 8.9437e-18, 1.8937e-16, 8.2625e-18,\n 9.5999e-16, 1.2342e-17, 6.1306e-18, 1.5371e-15, 2.4975e-17, 4.5482e-18,\n 4.3747e-19, 1.2076e-17, 4.2369e-16, 7.5781e-16, 3.6503e-17, 1.6612e-16,\n 1.7584e-16, 4.9046e-17, 2.3611e-16, 2.5810e-17, 1.3982e-16, 1.8416e-15,\n 1.0097e-17, 7.9662e-16, 1.7311e-19, 3.4863e-16], device='cuda:0')" + }, + "31": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 1.1854e-11, -4.4977e-13, -4.7234e-13, -2.2299e-13, -5.0487e-14,\n -5.5336e-13, -1.9906e-13, 3.8811e-13, 6.7015e-12, -4.1131e-13,\n -3.8413e-13, -3.6962e-13, -5.6712e-13, -4.4985e-13, -3.4750e-13,\n -2.1574e-13, -1.5383e-13, -3.4079e-13, -4.7071e-13, 2.0961e-12,\n -5.0848e-13, -4.7024e-13, -5.7220e-13, 2.2718e-12, -3.1514e-13,\n -5.2926e-13, 6.6608e-12, -3.7072e-13, -6.2264e-13, 3.6882e-12,\n -5.6360e-13, 2.2760e-12, 5.7411e-12, 4.1077e-12, 3.8639e-14,\n 8.7372e-12, 1.5391e-12, -4.7690e-13, -1.8312e-13, -3.1482e-13,\n -3.4141e-13, -6.7768e-13, -5.8382e-13, -5.1737e-13, -3.4294e-13,\n 3.0696e-14, -3.0722e-13, -5.6274e-13, -8.6144e-14, -4.7757e-13,\n -5.7500e-13, 5.4889e-12, -4.6683e-13, 8.6034e-12, -4.7037e-13,\n -5.2994e-13, 7.3668e-12, -7.9221e-13, 8.9421e-13, 5.6085e-12,\n -5.8766e-13, -3.6844e-13, -2.6121e-13, 9.9775e-13],\n [-1.1896e-11, 4.5228e-13, 4.7557e-13, 2.2302e-13, 5.0475e-14,\n 5.5649e-13, 2.0134e-13, -3.8939e-13, -6.7401e-12, 4.1288e-13,\n 3.8642e-13, 3.7180e-13, 5.6991e-13, 4.5002e-13, 3.5001e-13,\n 2.1606e-13, 1.5405e-13, 3.4226e-13, 4.7336e-13, -2.1081e-12,\n 5.1046e-13, 4.7320e-13, 5.7536e-13, -2.2779e-12, 3.1621e-13,\n 5.3247e-13, -6.6953e-12, 3.7280e-13, 6.2570e-13, -3.7221e-12,\n 5.6659e-13, -2.2971e-12, -5.7812e-12, -4.1173e-12, -3.8969e-14,\n -8.7836e-12, -1.5430e-12, 4.7963e-13, 1.8365e-13, 3.1723e-13,\n 3.4398e-13, 6.8066e-13, 5.8671e-13, 5.2027e-13, 3.4460e-13,\n -3.0969e-14, 3.0736e-13, 5.6574e-13, 8.6801e-14, 4.8061e-13,\n 5.7734e-13, -5.5139e-12, 4.6984e-13, -8.6583e-12, 4.7318e-13,\n 5.3295e-13, -7.4173e-12, 7.9548e-13, -9.0488e-13, -5.6372e-12,\n 5.8972e-13, 3.7095e-13, 2.6134e-13, -1.0000e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.9329e-16, 9.8833e-16, 1.0357e-14, 1.2265e-15, 7.7151e-16, 3.8320e-16,\n 1.0515e-16, 2.0296e-15, 1.6126e-14, 1.7252e-15, 1.5599e-15, 4.8256e-15,\n 1.5803e-15, 5.8224e-16, 3.5448e-15, 2.5803e-14, 1.6797e-15, 1.6819e-15,\n 1.2623e-15, 6.9728e-15, 9.2855e-16, 5.8891e-15, 1.5788e-14, 1.7184e-13,\n 4.4255e-16, 1.9776e-15, 8.8779e-14, 9.5059e-16, 5.9656e-16, 8.8227e-14,\n 4.1422e-16, 5.4042e-16, 2.8290e-14, 1.6840e-13, 1.4790e-15, 5.2728e-14,\n 5.7723e-14, 4.5613e-16, 1.7501e-15, 1.6780e-14, 3.6087e-14, 1.9833e-15,\n 1.0885e-14, 1.6105e-15, 2.0133e-15, 1.6252e-14, 1.7718e-15, 1.9356e-15,\n 1.4570e-15, 1.4650e-13, 6.3697e-14, 2.0055e-14, 1.6239e-15, 1.9231e-15,\n 6.7173e-14, 1.8356e-13, 2.2615e-15, 2.8778e-13, 3.9700e-15, 1.8667e-13,\n 1.7262e-15, 3.1531e-15, 1.8213e-15, 1.2283e-13],\n [3.9330e-16, 9.8833e-16, 1.0357e-14, 1.2265e-15, 7.7151e-16, 3.8321e-16,\n 1.0515e-16, 2.0296e-15, 1.6126e-14, 1.7252e-15, 1.5599e-15, 4.8256e-15,\n 1.5803e-15, 5.8224e-16, 3.5448e-15, 2.5803e-14, 1.6797e-15, 1.6819e-15,\n 1.2623e-15, 6.9728e-15, 9.2855e-16, 5.8891e-15, 1.5788e-14, 1.7184e-13,\n 4.4255e-16, 1.9776e-15, 8.8779e-14, 9.5059e-16, 5.9656e-16, 8.8227e-14,\n 4.1422e-16, 5.4043e-16, 2.8290e-14, 1.6840e-13, 1.4790e-15, 5.2728e-14,\n 5.7723e-14, 4.5613e-16, 1.7501e-15, 1.6780e-14, 3.6087e-14, 1.9833e-15,\n 1.0885e-14, 1.6105e-15, 2.0133e-15, 1.6252e-14, 1.7718e-15, 1.9356e-15,\n 1.4570e-15, 1.4650e-13, 6.3697e-14, 2.0056e-14, 1.6239e-15, 1.9231e-15,\n 6.7173e-14, 1.8356e-13, 2.2616e-15, 2.8778e-13, 3.9700e-15, 1.8667e-13,\n 1.7262e-15, 3.1531e-15, 1.8213e-15, 1.2283e-13]], device='cuda:0')" + }, + "32": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 2.9754e-12, -2.9912e-12], device='cuda:0')", + "exp_avg_sq": "tensor([8.5089e-14, 8.5089e-14], device='cuda:0')" + }, + "33": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-2.7414e-14, -1.6534e-13, -2.9021e-13, ..., 9.8142e-14,\n -3.5709e-13, 4.3306e-13],\n [-1.6210e-14, -1.9106e-14, 2.1316e-14, ..., 7.2145e-14,\n -4.1890e-14, 3.5219e-13],\n [-3.2398e-14, -4.9888e-14, -5.1046e-14, ..., 2.5658e-14,\n -9.9216e-14, 1.1553e-13],\n ...,\n [ 5.6385e-15, -4.1649e-15, -5.1800e-14, ..., 2.0786e-14,\n 1.0494e-14, 1.0290e-13],\n [-3.0222e-14, -1.9377e-14, -4.3533e-14, ..., 8.7452e-14,\n -1.0253e-13, 4.6415e-13],\n [-7.4124e-14, -8.3076e-15, 1.0836e-13, ..., 1.5625e-14,\n -7.2108e-14, 1.0196e-13]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.8489e-17, 2.7364e-18, 1.3740e-17, ..., 6.7732e-18, 5.2107e-19,\n 1.1066e-18],\n [2.7573e-16, 5.8404e-17, 1.6737e-16, ..., 1.1459e-16, 9.5838e-18,\n 1.8816e-17],\n [1.6816e-16, 5.0648e-17, 7.7100e-17, ..., 9.3290e-17, 3.2656e-18,\n 7.3605e-18],\n ...,\n [3.3610e-18, 4.4424e-19, 1.7179e-18, ..., 6.6458e-19, 7.9520e-20,\n 1.6955e-19],\n [1.6121e-17, 4.9336e-18, 1.1141e-17, ..., 1.0646e-17, 4.5839e-19,\n 1.4019e-18],\n [4.6588e-15, 9.9768e-16, 2.4336e-15, ..., 1.7424e-15, 1.1879e-16,\n 1.8956e-16]], device='cuda:0')" + }, + "34": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-1.3376e-11, 7.6965e-13, -6.3276e-12, 3.8129e-12, 3.3583e-12,\n 1.7620e-12, -1.2031e-12, -8.3857e-12, -8.3893e-12, 8.7525e-13,\n 5.8673e-12, 9.1319e-12, 1.4779e-11, -4.4098e-12, -4.1653e-12,\n -8.0195e-13, 9.9112e-13, -1.1966e-11, -3.4992e-12, -9.1047e-12,\n -1.5251e-11, -4.6546e-12, 1.1318e-11, -1.2763e-12, 3.2251e-12,\n 4.9500e-11, 1.9525e-12, 4.5675e-13, 5.7338e-12, -2.2593e-12,\n -1.7092e-12, -3.3873e-12, -1.5073e-11, 2.3635e-12, -7.7572e-12,\n -6.2116e-14, 1.3740e-12, 5.6008e-12, -3.6751e-12, 2.3370e-12,\n 4.0263e-12, -9.5674e-12, -2.5300e-12, -4.4860e-12, 8.0583e-12,\n -2.2715e-12, -1.1042e-14, 2.3109e-12, 5.6680e-12, 1.3551e-11,\n -4.5087e-12, 1.3632e-12, -3.4212e-12, -2.1087e-12, 5.6622e-12,\n -7.2521e-12, 1.8487e-12, -1.3790e-12, 8.7675e-12, -6.0567e-12,\n -6.1191e-12, 2.4111e-13, 1.1720e-12, -1.4315e-12], device='cuda:0')", + "exp_avg_sq": "tensor([2.8417e-14, 4.9318e-13, 2.5713e-13, 6.0724e-12, 6.9668e-15, 1.9683e-14,\n 3.9476e-12, 2.9403e-13, 8.4525e-12, 2.1544e-13, 1.9733e-13, 7.4517e-12,\n 2.6055e-11, 1.1859e-11, 1.0654e-14, 2.3380e-13, 5.7817e-12, 6.6081e-15,\n 3.8996e-14, 9.4777e-14, 2.3480e-11, 1.0366e-13, 3.9349e-14, 7.4674e-12,\n 5.6573e-14, 2.3208e-11, 4.8232e-14, 7.4974e-12, 1.5078e-12, 1.8262e-12,\n 3.5620e-12, 1.1348e-12, 5.1434e-12, 8.3663e-14, 1.5027e-12, 3.2807e-14,\n 5.6905e-14, 2.4370e-12, 5.5357e-13, 2.3672e-13, 5.2959e-14, 1.1927e-12,\n 5.3101e-14, 1.1002e-13, 1.0897e-11, 4.3302e-14, 5.9992e-14, 4.2835e-14,\n 3.1056e-13, 1.0359e-11, 9.8212e-14, 1.5905e-12, 1.5898e-14, 6.7506e-12,\n 3.6987e-13, 1.2663e-12, 1.2366e-12, 3.5077e-12, 2.5877e-12, 7.3099e-13,\n 3.8743e-13, 4.1153e-15, 3.4315e-14, 6.7165e-12], device='cuda:0')" + }, + "35": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-9.1553e-14, 1.5068e-14, 3.1286e-13, 9.7446e-13, -8.2552e-14,\n 1.1937e-15, 2.5176e-14, 3.7410e-13, -2.9186e-13, -3.7876e-14,\n 4.4748e-14, 1.0553e-12, -7.9308e-14, 3.4599e-14, 9.6100e-15,\n 1.3920e-14, -2.2268e-14, 1.2334e-13, 1.2654e-14, 1.1426e-13,\n -4.8180e-14, 1.7426e-13, 3.1616e-13, 1.2725e-13, 2.6234e-14,\n 2.8487e-12, -1.8081e-14, 5.9798e-14, 8.5185e-14, 6.6616e-13,\n 1.3248e-14, 5.5931e-14, 1.1431e-13, -3.2255e-14, -2.9421e-15,\n -6.3329e-16, -1.2027e-14, 8.6036e-13, -3.1349e-15, 1.0993e-14,\n 4.9945e-16, 5.8051e-13, -1.6710e-14, 9.0461e-14, 9.6317e-14,\n -6.0821e-14, -3.7609e-15, 6.0024e-14, 4.3249e-13, 9.8962e-13,\n 1.2741e-13, -1.6976e-14, 3.1247e-15, 1.9686e-13, -1.0292e-13,\n 5.3243e-13, 9.3292e-14, 3.4079e-14, 3.8493e-13, 1.6829e-13,\n -1.2489e-14, 6.3839e-16, 3.3674e-14, -2.1942e-15], device='cuda:0')", + "exp_avg_sq": "tensor([8.5818e-17, 6.9142e-16, 3.5958e-16, 7.1276e-15, 3.2358e-17, 5.0505e-18,\n 1.1388e-14, 5.6674e-16, 1.1962e-14, 6.6107e-16, 4.8986e-16, 5.1657e-15,\n 2.7743e-14, 1.3064e-14, 1.5265e-18, 5.6089e-16, 3.6130e-15, 2.4197e-17,\n 1.2734e-17, 3.2274e-16, 2.4566e-14, 9.2164e-17, 7.1981e-17, 9.6484e-15,\n 5.1466e-17, 1.7472e-14, 1.8613e-18, 1.1054e-15, 3.3727e-16, 3.0416e-15,\n 3.8891e-15, 4.0617e-16, 2.0310e-15, 1.5401e-17, 4.3445e-15, 7.1928e-19,\n 1.0443e-16, 3.6208e-16, 1.4613e-15, 6.6850e-18, 7.1219e-18, 2.9770e-15,\n 3.2864e-17, 2.5885e-18, 4.2509e-15, 7.0553e-18, 1.8817e-18, 6.8957e-17,\n 1.3369e-16, 1.3288e-14, 4.4675e-17, 1.1669e-15, 3.5645e-19, 2.0758e-15,\n 9.8142e-18, 2.8069e-15, 6.7362e-16, 1.5967e-15, 2.0277e-16, 3.6197e-17,\n 2.3809e-16, 1.4853e-18, 6.5961e-18, 3.9883e-15], device='cuda:0')" + }, + "36": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-3.2749e-13, 3.1690e-14, 1.8143e-13, 4.9501e-13, 5.8189e-14,\n -1.5052e-15, 1.9395e-14, 1.6615e-13, -1.0902e-13, -7.1033e-14,\n -1.4776e-14, 6.6351e-13, 4.9575e-13, -1.0833e-13, -5.1468e-14,\n -5.2168e-14, -1.8283e-14, -3.8541e-13, 5.3623e-14, -1.3880e-13,\n -4.5479e-13, 1.7037e-13, 4.4958e-13, 8.5929e-14, -1.6384e-14,\n 2.6183e-12, 2.3159e-14, -1.8057e-14, 3.8185e-13, 3.4217e-13,\n -2.1653e-14, -4.5881e-14, -4.0451e-13, -1.3313e-14, -4.3443e-14,\n -2.5647e-15, 2.6942e-14, 8.8404e-13, -1.2388e-15, -9.8575e-15,\n -3.3139e-16, 1.8051e-13, 1.4859e-14, 1.4051e-13, 4.2340e-13,\n 2.9373e-14, -3.4967e-15, -4.1255e-14, 5.0967e-13, 8.1994e-13,\n 1.2418e-13, 1.9663e-14, -3.0228e-14, 7.2976e-14, 5.8610e-14,\n 2.1022e-13, 2.2963e-13, -4.6651e-14, 5.0781e-13, 7.5121e-14,\n 1.1495e-14, -1.0757e-14, 4.9250e-14, -5.2341e-14], device='cuda:0')", + "exp_avg_sq": "tensor([5.4741e-17, 6.7190e-16, 4.0907e-16, 9.8134e-15, 2.8200e-17, 3.3356e-18,\n 5.8170e-15, 4.3412e-16, 1.3408e-14, 1.9057e-16, 2.5206e-16, 1.2011e-14,\n 4.0112e-14, 1.8133e-14, 8.6834e-19, 3.3049e-16, 8.9095e-15, 1.4092e-17,\n 5.0818e-17, 1.8220e-16, 3.5601e-14, 1.7020e-16, 4.2897e-17, 1.1357e-14,\n 2.3752e-17, 3.6022e-14, 7.8300e-19, 1.1461e-14, 2.5742e-15, 2.9794e-15,\n 5.4195e-15, 1.7758e-15, 7.9461e-15, 1.0390e-16, 2.0599e-15, 5.3646e-18,\n 7.3540e-17, 4.1410e-15, 5.1529e-16, 3.9986e-16, 2.9726e-18, 1.8735e-15,\n 1.4784e-17, 1.9190e-18, 1.6633e-14, 8.6004e-17, 6.9137e-19, 4.4654e-17,\n 5.9339e-16, 1.6204e-14, 1.6532e-16, 2.4454e-15, 2.4250e-19, 1.1065e-14,\n 6.1190e-16, 1.9594e-15, 2.0216e-15, 5.4573e-15, 4.2630e-15, 1.3566e-15,\n 6.4880e-16, 8.9272e-19, 3.4000e-18, 1.0324e-14], device='cuda:0')" + }, + "37": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-7.8510e-13, 1.2235e-12, -7.3768e-12, -7.4330e-12, 7.0205e-13,\n 8.4167e-13, 9.2840e-13, -9.3755e-12, 2.1081e-12, 1.5090e-12,\n 1.5560e-14, -6.3871e-12, 8.6212e-13, 4.3841e-13, 2.7016e-13,\n 6.4691e-13, 4.2054e-13, 3.9725e-13, -4.6903e-13, 7.5020e-13,\n -3.8362e-13, -4.6478e-12, 3.4558e-12, 1.5766e-12, 4.6619e-13,\n -1.0324e-11, 4.1657e-14, 8.3792e-13, -4.7688e-13, -8.4603e-12,\n 8.1708e-14, 1.0414e-12, -2.1711e-13, -1.3956e-13, -4.1700e-13,\n 9.0345e-13, 8.4532e-13, -5.6398e-12, -3.1030e-13, 3.8149e-14,\n 1.0253e-13, -1.0329e-11, 1.2237e-12, -4.8039e-12, 1.8651e-12,\n 1.3595e-12, 6.4183e-13, 5.5559e-13, -6.8682e-12, -5.4203e-12,\n -3.2907e-12, -1.8278e-13, -1.0690e-14, -8.6939e-13, 3.2276e-13,\n -8.7516e-12, -1.3684e-12, 7.7028e-13, -2.2868e-12, -1.5889e-12,\n 3.2628e-13, 9.4024e-13, 1.7955e-12, 6.4523e-13],\n [ 7.8510e-13, -1.2235e-12, 7.3766e-12, 7.4329e-12, -7.0202e-13,\n -8.4164e-13, -9.2842e-13, 9.3753e-12, -2.1081e-12, -1.5090e-12,\n -1.5561e-14, 6.3869e-12, -8.6229e-13, -4.3840e-13, -2.7014e-13,\n -6.4692e-13, -4.2052e-13, -3.9723e-13, 4.6888e-13, -7.5022e-13,\n 3.8364e-13, 4.6477e-12, -3.4558e-12, -1.5766e-12, -4.6617e-13,\n 1.0324e-11, -4.1633e-14, -8.3790e-13, 4.7676e-13, 8.4601e-12,\n -8.1731e-14, -1.0414e-12, 2.1712e-13, 1.3958e-13, 4.1702e-13,\n -9.0342e-13, -8.4531e-13, 5.6398e-12, 3.1028e-13, -3.8129e-14,\n -1.0251e-13, 1.0329e-11, -1.2236e-12, 4.8039e-12, -1.8651e-12,\n -1.3596e-12, -6.4181e-13, -5.5557e-13, 6.8681e-12, 5.4201e-12,\n 3.2906e-12, 1.8280e-13, 1.0708e-14, 8.6921e-13, -3.2274e-13,\n 8.7513e-12, 1.3683e-12, -7.7026e-13, 2.2868e-12, 1.5888e-12,\n -3.2637e-13, -9.4022e-13, -1.7954e-12, -6.4521e-13]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9272e-14, 6.3744e-13, 5.0255e-13, 3.9981e-13, 2.3169e-14, 1.7688e-14,\n 1.4857e-12, 8.7294e-13, 5.4457e-13, 2.8660e-15, 1.1795e-14, 1.9207e-13,\n 3.9639e-13, 3.9641e-13, 2.1630e-14, 1.4814e-14, 1.6412e-13, 2.2599e-14,\n 7.8405e-14, 1.6762e-14, 3.7729e-13, 2.4816e-13, 1.6990e-14, 5.0310e-13,\n 9.8918e-15, 2.4825e-13, 8.5209e-15, 2.1128e-14, 3.8332e-14, 6.3689e-13,\n 3.8490e-13, 6.9987e-14, 8.7784e-14, 2.3178e-14, 1.6182e-12, 5.1270e-14,\n 1.8829e-14, 1.9048e-14, 2.2917e-12, 3.9474e-15, 7.5174e-15, 1.1539e-12,\n 8.8015e-15, 1.0645e-12, 8.4317e-14, 1.5955e-14, 6.0481e-15, 1.7297e-14,\n 6.0889e-14, 4.7338e-13, 1.0500e-13, 2.1983e-13, 1.9278e-14, 5.5708e-14,\n 9.5455e-15, 1.0407e-12, 1.4159e-13, 9.4263e-14, 8.0890e-15, 5.8697e-15,\n 1.5776e-13, 2.3130e-14, 1.2351e-14, 1.5819e-13],\n [1.9272e-14, 6.3744e-13, 5.0255e-13, 3.9981e-13, 2.3169e-14, 1.7688e-14,\n 1.4857e-12, 8.7294e-13, 5.4457e-13, 2.8660e-15, 1.1795e-14, 1.9207e-13,\n 3.9639e-13, 3.9641e-13, 2.1630e-14, 1.4814e-14, 1.6412e-13, 2.2599e-14,\n 7.8405e-14, 1.6762e-14, 3.7729e-13, 2.4816e-13, 1.6990e-14, 5.0310e-13,\n 9.8918e-15, 2.4825e-13, 8.5209e-15, 2.1128e-14, 3.8332e-14, 6.3689e-13,\n 3.8490e-13, 6.9987e-14, 8.7784e-14, 2.3178e-14, 1.6182e-12, 5.1270e-14,\n 1.8829e-14, 1.9048e-14, 2.2917e-12, 3.9474e-15, 7.5174e-15, 1.1539e-12,\n 8.8015e-15, 1.0645e-12, 8.4317e-14, 1.5955e-14, 6.0481e-15, 1.7297e-14,\n 6.0889e-14, 4.7338e-13, 1.0500e-13, 2.1983e-13, 1.9278e-14, 5.5708e-14,\n 9.5455e-15, 1.0407e-12, 1.4159e-13, 9.4263e-14, 8.0890e-15, 5.8697e-15,\n 1.5776e-13, 2.3130e-14, 1.2351e-14, 1.5819e-13]], device='cuda:0')" + }, + "38": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-3.9168e-12, 3.9168e-12], device='cuda:0')", + "exp_avg_sq": "tensor([9.2389e-13, 9.2389e-13], device='cuda:0')" + }, + "39": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 4.1794e-11, -1.2084e-11, -6.5474e-12, ..., 9.6320e-11,\n 4.1017e-11, -6.1934e-12],\n [-4.0312e-11, 6.8803e-11, 4.7219e-12, ..., 7.7185e-11,\n -2.2058e-11, 4.0484e-12],\n [ 3.7311e-11, 4.3853e-11, -1.8513e-11, ..., 4.8850e-11,\n 1.8262e-11, -5.8993e-12],\n ...,\n [ 1.4600e-10, 9.7619e-11, 1.7239e-11, ..., 6.7674e-11,\n 1.2370e-10, -8.9870e-12],\n [ 3.3975e-10, 6.0489e-10, -1.3907e-10, ..., 3.8937e-10,\n 3.8701e-10, 8.1228e-11],\n [-1.8888e-10, 4.0238e-11, 1.2986e-10, ..., -1.4071e-10,\n -2.0864e-10, 3.3177e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.4974e-19, 4.2658e-19, 2.3918e-19, ..., 2.0600e-18, 3.4156e-19,\n 1.3070e-19],\n [4.1083e-19, 1.8046e-19, 3.8154e-20, ..., 4.0310e-18, 2.2753e-20,\n 2.4105e-19],\n [1.1228e-19, 7.2639e-20, 1.6455e-20, ..., 7.5230e-19, 5.2073e-20,\n 5.4139e-20],\n ...,\n [4.8022e-19, 3.8663e-19, 1.8697e-19, ..., 2.5761e-18, 8.6656e-19,\n 3.5970e-18],\n [1.4251e-17, 1.6299e-17, 4.7234e-18, ..., 1.2641e-17, 8.2647e-18,\n 1.0080e-17],\n [4.5314e-18, 4.4367e-18, 1.3665e-18, ..., 1.4415e-17, 3.6199e-18,\n 7.4517e-18]], device='cuda:0')" + }, + "40": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 2.2304e-09, -1.1727e-09, 7.3547e-10, -2.5667e-09, -5.5192e-09,\n -1.1144e-08, -8.6898e-09, 9.2596e-10, 1.3142e-09, 3.6619e-09,\n 3.4333e-09, 1.3615e-09, -3.4656e-10, 8.8451e-09, -1.4251e-09,\n -8.1247e-10, -6.5318e-10, 1.0394e-09, -1.1421e-08, 8.7725e-10,\n 1.0666e-09, -7.4910e-10, 4.4038e-09, 7.0243e-10, -1.1237e-08,\n 1.4505e-09, 2.4806e-09, 6.9476e-10, -3.1208e-09, 1.4657e-09,\n 6.7867e-10, 2.8004e-09, -7.3520e-09, 1.4152e-09, 1.9475e-09,\n 8.4273e-10, -7.3084e-10, -4.1749e-09, -1.5246e-08, 3.0979e-09,\n 1.2597e-09, 1.4031e-10, -1.2743e-09, 5.0247e-10, -6.6556e-10,\n 1.7525e-09, -2.2771e-09, 3.3248e-09, -7.6558e-10, -7.7430e-11,\n -5.9663e-11, 8.2136e-10, -4.0060e-09, -9.2691e-10, -4.3786e-09,\n -2.1551e-09, 7.6953e-10, 2.5642e-08, 1.7277e-09, -1.1283e-09,\n 7.4557e-09, 6.2950e-09, 9.8572e-09, -2.9424e-09], device='cuda:0')", + "exp_avg_sq": "tensor([1.2535e-15, 2.1969e-15, 3.3893e-16, 2.6411e-15, 1.0438e-14, 1.2109e-14,\n 1.5302e-13, 5.8177e-15, 2.2183e-14, 2.9137e-14, 2.2920e-16, 2.0956e-16,\n 1.7803e-15, 3.0912e-15, 3.1900e-16, 1.0014e-13, 1.1666e-16, 3.2402e-16,\n 1.3852e-14, 5.2764e-16, 3.7740e-15, 2.0383e-14, 1.6783e-14, 1.8092e-15,\n 1.5760e-14, 1.3108e-14, 3.5974e-16, 1.4698e-16, 1.2732e-14, 1.3563e-14,\n 8.4238e-16, 1.2609e-15, 2.6476e-14, 2.1320e-16, 1.5058e-15, 1.3228e-14,\n 1.1226e-15, 1.9679e-14, 2.9938e-14, 2.3379e-15, 6.7906e-16, 5.3175e-15,\n 3.4858e-16, 1.5814e-14, 1.3099e-14, 6.1665e-16, 5.1588e-16, 5.8027e-16,\n 9.2175e-15, 8.1851e-16, 2.4187e-14, 2.6426e-15, 1.4503e-15, 1.3664e-14,\n 4.5340e-14, 7.8495e-14, 3.5690e-15, 1.8604e-14, 4.4168e-15, 1.0483e-14,\n 7.0465e-16, 7.9565e-16, 2.0790e-14, 9.3181e-15], device='cuda:0')" + }, + "41": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-2.8652e-11, 1.6249e-12, 1.3028e-11, 1.3987e-11, -1.1699e-09,\n -1.4539e-09, -1.5476e-09, 3.3539e-11, 6.5598e-12, -9.2168e-10,\n -2.3101e-11, -1.0022e-11, -5.1036e-11, -3.1901e-11, 1.4512e-11,\n 3.2044e-11, -4.3433e-13, -1.6119e-11, -1.6288e-09, -1.5862e-11,\n 4.7324e-11, -2.7060e-10, -1.4426e-09, -1.3006e-11, -1.3291e-09,\n -1.2272e-09, -9.5433e-12, 7.0958e-14, -5.7654e-10, -2.1523e-10,\n -2.5897e-12, 4.8168e-12, -1.2014e-09, 7.5725e-13, 6.7365e-13,\n -3.0540e-10, 6.6471e-13, -1.7918e-09, -2.6080e-09, -6.7274e-12,\n 7.1514e-12, -3.9773e-12, 1.6578e-12, -3.1630e-11, 6.0360e-12,\n -7.1803e-13, 2.0185e-12, -3.4594e-11, 1.1994e-11, 2.3537e-12,\n 1.8992e-11, 1.5835e-11, 1.1301e-11, 1.3600e-11, 4.5124e-11,\n 6.3951e-12, 5.8130e-12, 5.8120e-10, 1.0328e-11, 2.3537e-12,\n -5.7007e-12, -9.6447e-11, -9.7393e-10, -6.5661e-10], device='cuda:0')", + "exp_avg_sq": "tensor([9.0272e-20, 6.4440e-19, 1.8424e-19, 5.6240e-18, 5.9430e-17, 1.1541e-16,\n 2.5779e-16, 2.2500e-18, 3.8197e-18, 1.5519e-16, 1.0466e-20, 4.6725e-21,\n 3.0039e-19, 2.5167e-19, 1.4642e-19, 1.3554e-16, 1.5859e-20, 2.9409e-20,\n 1.2958e-16, 2.7418e-19, 6.7036e-19, 2.5483e-17, 1.0407e-16, 1.1259e-19,\n 1.1894e-16, 9.5105e-17, 1.1032e-20, 5.5047e-21, 2.9063e-17, 1.1722e-17,\n 2.7882e-19, 5.6157e-19, 8.6355e-17, 2.0606e-21, 8.7019e-20, 1.4680e-17,\n 1.2118e-19, 1.9485e-16, 2.3791e-16, 1.2028e-19, 2.5473e-19, 9.3502e-19,\n 1.8072e-18, 1.8098e-17, 2.3717e-17, 2.4162e-19, 3.5301e-19, 4.8673e-20,\n 1.2929e-18, 3.0371e-20, 2.2709e-18, 4.6901e-19, 1.7681e-19, 5.8180e-18,\n 3.4964e-17, 9.7938e-17, 1.5725e-18, 1.5622e-16, 1.5793e-18, 3.2825e-17,\n 2.2692e-19, 1.1423e-19, 1.0591e-16, 3.5625e-17], device='cuda:0')" + }, + "42": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 5.3006e-11, -1.9108e-11, -1.9386e-12, -1.2008e-11, -8.5141e-10,\n -1.0689e-09, -1.0608e-09, -6.2843e-11, 4.6628e-11, -5.7189e-10,\n 3.7294e-11, 2.1620e-11, 1.4193e-11, 2.9201e-10, 8.0298e-12,\n 1.7432e-11, 3.2765e-12, 2.3169e-11, -1.1170e-09, 3.1965e-11,\n -6.6729e-11, -4.2938e-10, -5.4648e-10, 3.2746e-11, -1.0436e-09,\n -6.3267e-10, 3.5012e-11, -3.8492e-12, -6.3989e-10, -3.4468e-10,\n -3.8518e-12, -1.9975e-11, -9.3906e-10, 3.4925e-12, 2.4612e-12,\n -3.9568e-10, 8.7263e-12, -9.1407e-10, -1.4298e-09, 8.6373e-11,\n -4.6168e-11, 1.3971e-11, -3.1875e-12, -2.1807e-10, -5.9078e-12,\n -8.0245e-11, -9.6605e-12, 7.0337e-11, -1.9519e-12, -5.2195e-12,\n 6.8263e-12, -1.3843e-10, -3.2737e-11, -9.0846e-12, -5.5092e-11,\n -1.2520e-11, -2.2036e-11, 2.9458e-10, -4.3288e-11, -6.7505e-13,\n 1.9528e-10, 2.0442e-10, -4.8240e-10, -6.4690e-10], device='cuda:0')", + "exp_avg_sq": "tensor([8.4684e-19, 3.2875e-19, 2.9419e-19, 3.3868e-18, 4.6176e-17, 4.7815e-17,\n 1.4187e-16, 3.0823e-18, 1.5965e-17, 7.6319e-17, 2.9805e-20, 2.6022e-20,\n 2.3303e-18, 2.9219e-18, 1.6528e-19, 7.3828e-17, 8.4739e-20, 2.2761e-19,\n 3.7879e-17, 3.6439e-19, 5.8749e-18, 2.0128e-17, 2.3758e-17, 1.3315e-18,\n 5.4120e-17, 2.4581e-17, 9.1599e-20, 2.0746e-20, 3.7646e-17, 1.4601e-17,\n 3.0642e-19, 3.3874e-19, 5.9547e-17, 1.5183e-20, 9.6769e-19, 2.2945e-17,\n 7.4966e-19, 8.3591e-17, 8.9391e-17, 1.9546e-18, 2.3578e-19, 4.4526e-18,\n 5.9625e-19, 9.2797e-18, 1.1967e-17, 4.1404e-19, 7.5036e-19, 3.5911e-19,\n 6.8154e-18, 1.6800e-20, 1.7076e-17, 3.6058e-18, 1.2420e-18, 1.0754e-17,\n 3.3004e-17, 5.7033e-17, 1.0451e-18, 4.1089e-17, 2.0908e-18, 1.0868e-17,\n 6.1541e-19, 6.8474e-19, 3.1586e-17, 3.6050e-17], device='cuda:0')" + }, + "43": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-4.4266e-10, -3.2387e-10, -4.4326e-10, -3.7717e-10, 4.9150e-09,\n 5.0334e-09, 4.4820e-09, -5.4061e-10, -3.1370e-10, 2.2708e-09,\n -4.8348e-10, -4.7505e-10, -5.8576e-10, -2.4775e-10, -4.8776e-10,\n -4.8585e-10, -4.5330e-10, -6.1806e-10, 5.2890e-09, -5.2192e-10,\n -5.4009e-10, 1.8757e-09, 3.8225e-09, -3.4620e-10, 4.2816e-09,\n 4.4041e-09, -3.3560e-10, -1.1411e-10, 2.4234e-09, 1.2998e-09,\n -3.0227e-10, -2.8922e-12, 3.9983e-09, -1.8506e-10, -5.3386e-10,\n 1.6322e-09, -6.4711e-10, 5.8064e-09, 7.5014e-09, -2.0936e-10,\n -2.5527e-10, -5.1312e-10, -6.4918e-10, 2.3219e-10, -5.0874e-10,\n -1.4093e-10, -6.5830e-10, -5.4237e-10, -5.9091e-10, -4.5424e-10,\n -5.1435e-10, -2.3190e-10, -2.3156e-10, -4.2193e-10, -3.2819e-10,\n -5.4787e-10, -4.4867e-10, -2.1688e-09, -2.1301e-10, -6.3739e-10,\n -4.7792e-11, -5.2658e-10, 2.0914e-09, 2.6175e-09],\n [ 5.2996e-10, 3.5089e-10, 5.1296e-10, 4.3477e-10, -5.5144e-09,\n -5.6186e-09, -4.9470e-09, 6.0176e-10, 3.8555e-10, -2.9327e-09,\n 5.5335e-10, 5.3075e-10, 6.4072e-10, 2.7584e-10, 5.3093e-10,\n 5.4529e-10, 5.1932e-10, 7.1525e-10, -6.0043e-09, 5.8572e-10,\n 5.4615e-10, -1.9744e-09, -4.8724e-09, 4.0133e-10, -4.8632e-09,\n -5.2359e-09, 4.1018e-10, 1.7548e-10, -2.8386e-09, -1.5498e-09,\n 3.6268e-10, 4.0036e-11, -4.6501e-09, 2.2403e-10, 5.9907e-10,\n -1.8454e-09, 7.0114e-10, -6.4419e-09, -8.0287e-09, 2.5251e-10,\n 2.9894e-10, 5.7921e-10, 7.0705e-10, -3.3059e-10, 5.7275e-10,\n 1.7938e-10, 7.1843e-10, 5.9335e-10, 6.4648e-10, 5.2316e-10,\n 5.8183e-10, 2.1181e-10, 3.1046e-10, 4.7906e-10, 3.9730e-10,\n 6.3012e-10, 4.6612e-10, 1.7455e-09, 2.4262e-10, 7.0069e-10,\n 9.8545e-11, 5.9139e-10, -2.8618e-09, -2.8956e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.3532e-17, 2.2453e-15, 3.9840e-17, 1.9295e-15, 1.1614e-15, 1.6814e-15,\n 2.5928e-15, 7.9274e-16, 1.2422e-16, 1.1821e-15, 4.1380e-17, 4.4046e-17,\n 3.3780e-17, 3.4125e-17, 3.4478e-17, 2.0858e-15, 1.0596e-16, 2.9604e-17,\n 1.8253e-15, 4.0542e-17, 3.2982e-17, 1.9276e-15, 9.2396e-16, 3.1808e-17,\n 1.4657e-15, 1.4846e-15, 3.8303e-17, 5.1096e-17, 5.7284e-16, 1.0557e-15,\n 2.9005e-17, 2.0739e-17, 1.1853e-15, 5.4247e-17, 2.0769e-17, 6.2065e-16,\n 4.7298e-17, 2.1837e-15, 2.0203e-15, 3.9344e-17, 3.2354e-17, 9.7853e-17,\n 2.8929e-15, 3.4866e-15, 2.3821e-15, 3.3941e-17, 2.7869e-16, 4.3113e-17,\n 1.2161e-16, 3.1755e-15, 6.2354e-17, 2.0031e-17, 4.0579e-17, 5.1461e-16,\n 9.8580e-16, 1.7008e-15, 1.6253e-15, 1.7440e-15, 6.2789e-16, 3.2733e-15,\n 3.6943e-17, 5.3248e-17, 7.0057e-16, 6.8427e-16],\n [3.3527e-17, 2.2453e-15, 3.9836e-17, 1.9295e-15, 1.1614e-15, 1.6840e-15,\n 2.5951e-15, 7.9278e-16, 1.2423e-16, 1.1814e-15, 4.1386e-17, 4.4048e-17,\n 3.3781e-17, 3.4148e-17, 3.4486e-17, 2.0858e-15, 1.0597e-16, 2.9612e-17,\n 1.8278e-15, 4.0544e-17, 3.2978e-17, 1.9279e-15, 9.2762e-16, 3.1808e-17,\n 1.4659e-15, 1.4843e-15, 3.8314e-17, 5.1096e-17, 5.7329e-16, 1.0558e-15,\n 2.9005e-17, 2.0747e-17, 1.1866e-15, 5.4256e-17, 2.0772e-17, 6.2111e-16,\n 4.7306e-17, 2.1845e-15, 2.0196e-15, 3.9342e-17, 3.2355e-17, 9.7846e-17,\n 2.8930e-15, 3.4865e-15, 2.3821e-15, 3.3931e-17, 2.7871e-16, 4.3133e-17,\n 1.2164e-16, 3.1756e-15, 6.2363e-17, 2.0043e-17, 4.0577e-17, 5.1465e-16,\n 9.8583e-16, 1.7009e-15, 1.6253e-15, 1.7456e-15, 6.2791e-16, 3.2734e-15,\n 3.6964e-17, 5.3244e-17, 6.9905e-16, 6.8494e-16]], device='cuda:0')" + }, + "44": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 2.9780e-09, -3.3785e-09], device='cuda:0')", + "exp_avg_sq": "tensor([2.0935e-15, 2.0938e-15], device='cuda:0')" + }, + "45": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 6.6525e-18, 2.1467e-17, 1.8901e-17, ..., -1.2101e-17,\n 1.2539e-17, 3.6834e-18],\n [ 8.1374e-18, 1.3233e-17, 1.6078e-17, ..., -1.0975e-17,\n 7.5217e-19, -3.0181e-18],\n [-7.0175e-18, 1.1869e-16, -2.2582e-17, ..., 2.6857e-18,\n -7.3476e-18, 1.1536e-17],\n ...,\n [ 2.5633e-19, -1.1594e-17, 1.0838e-17, ..., -2.7876e-17,\n -1.3801e-18, 2.2188e-18],\n [ 2.4468e-18, -8.0541e-17, 2.4951e-18, ..., 1.1503e-17,\n -4.8957e-18, -4.4723e-18],\n [ 4.5594e-19, 8.1047e-17, 4.6410e-18, ..., -2.0896e-17,\n -3.4238e-18, 5.4857e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.5450e-20, 1.0163e-20, 3.5136e-20, ..., 2.9354e-20, 1.7305e-21,\n 7.0984e-21],\n [1.0643e-19, 1.7825e-20, 1.0878e-19, ..., 2.9280e-20, 4.5829e-21,\n 6.9138e-21],\n [1.4340e-18, 1.6831e-19, 2.8231e-18, ..., 2.8607e-19, 7.0139e-20,\n 1.3082e-19],\n ...,\n [1.3532e-19, 2.9278e-20, 2.2252e-19, ..., 6.7846e-20, 4.5982e-21,\n 1.3527e-20],\n [1.2509e-18, 3.8812e-19, 7.8844e-19, ..., 5.7530e-19, 3.1525e-20,\n 1.0813e-19],\n [1.4529e-19, 2.3406e-20, 2.5427e-19, ..., 2.6045e-20, 8.5708e-21,\n 1.5874e-20]], device='cuda:0')" + }, + "46": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 6.0402e-16, 9.4856e-16, 2.1086e-15, -7.1676e-16, -2.1872e-15,\n 5.9078e-17, -5.6913e-16, 3.9115e-17, -4.8052e-17, 2.8271e-17,\n 1.7096e-15, -5.7427e-15, -4.5067e-17, -4.7156e-15, -2.5101e-17,\n 2.0914e-15, 1.5681e-15, -9.7528e-16, -2.1629e-15, -9.5595e-16,\n -1.1656e-15, 3.4927e-15, -9.6535e-17, 2.8927e-15, 5.7006e-16,\n 8.7532e-16, -5.7613e-15, -2.1903e-15, -7.0403e-15, -3.1654e-16,\n 1.0165e-15, -7.4043e-16, -6.9083e-16, 1.8532e-15, -3.7860e-15,\n 5.1422e-16, 7.0651e-16, -1.2580e-15, 3.6912e-16, -8.5858e-16,\n 4.8195e-15, -1.8189e-16, 3.5215e-16, 3.8048e-15, 2.0426e-15,\n 9.9470e-16, -8.0297e-16, 2.0965e-15, 2.3978e-16, -3.2856e-16,\n 2.2282e-15, 1.1289e-15, -9.0627e-16, -7.0543e-16, -2.6678e-16,\n 8.7200e-16, 3.6976e-15, 8.5230e-17, 1.0832e-15, -5.6967e-16,\n 1.1838e-15, -4.3387e-16, -2.3832e-15, 2.5506e-15], device='cuda:0')", + "exp_avg_sq": "tensor([7.3618e-17, 1.4328e-16, 2.6106e-15, 4.0918e-18, 1.0650e-14, 1.4698e-15,\n 2.5114e-15, 1.9895e-17, 4.3690e-16, 1.1112e-16, 2.1001e-15, 1.1225e-14,\n 4.3381e-15, 1.7339e-14, 2.3027e-16, 1.7083e-17, 1.3993e-15, 1.3230e-15,\n 1.6493e-14, 8.5612e-16, 5.0070e-16, 1.5603e-16, 1.6422e-17, 1.3610e-14,\n 1.1409e-16, 1.2556e-15, 2.7180e-14, 2.4291e-15, 2.5126e-14, 8.0477e-16,\n 5.2428e-15, 3.1721e-17, 6.4448e-17, 1.5545e-16, 6.4024e-15, 1.6467e-16,\n 1.8727e-17, 6.6121e-17, 3.8449e-15, 8.1889e-16, 3.9697e-14, 7.6488e-17,\n 4.5271e-16, 1.5090e-15, 9.4234e-17, 2.1333e-16, 1.4186e-16, 4.5440e-16,\n 4.6246e-16, 5.3624e-17, 2.5631e-15, 2.3840e-15, 1.5296e-15, 9.4748e-15,\n 6.6302e-17, 5.8538e-15, 2.8048e-15, 8.9928e-17, 7.6764e-16, 2.0952e-15,\n 3.3445e-15, 3.2875e-16, 2.3020e-15, 2.9109e-16], device='cuda:0')" + }, + "47": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 3.8366e-18, -4.6205e-17, -8.9578e-17, 3.3176e-18, -4.2758e-16,\n -1.4555e-19, 6.8954e-18, 2.1334e-18, -1.3532e-16, -1.9169e-18,\n -2.7754e-16, -5.9871e-16, -3.6096e-18, -4.4869e-16, 2.4174e-19,\n -2.0878e-17, -1.4501e-18, 9.6613e-18, -5.9611e-16, 1.9138e-17,\n -6.1673e-17, 1.2086e-17, 2.4361e-19, 7.3668e-18, -7.8130e-18,\n -1.2967e-18, -6.6484e-16, 6.1775e-17, -4.9976e-16, 1.5536e-19,\n -6.4168e-18, 1.3799e-18, -1.3799e-17, -1.0354e-16, -1.8794e-16,\n -2.6020e-17, -5.6848e-18, -3.3414e-17, -3.3178e-16, -1.4075e-18,\n 2.5148e-17, -6.7150e-21, -1.9118e-18, -5.6992e-17, 2.6372e-18,\n -1.2433e-16, 4.9551e-19, -2.8897e-18, 1.2663e-18, -3.7757e-18,\n -2.7815e-19, 4.2640e-19, -2.8064e-16, 9.5630e-18, 4.8079e-19,\n -5.7557e-18, -1.5445e-17, -3.3956e-17, -1.0170e-17, 4.7186e-18,\n -7.0212e-19, -1.1005e-16, 5.7722e-17, 6.5106e-18], device='cuda:0')", + "exp_avg_sq": "tensor([1.5577e-19, 2.3773e-18, 9.1144e-19, 1.7305e-19, 6.4671e-17, 1.0727e-18,\n 6.1602e-19, 1.1486e-20, 6.6087e-19, 1.6461e-18, 1.6434e-17, 1.3316e-17,\n 3.5361e-18, 2.6951e-17, 1.2082e-20, 8.1282e-20, 3.9939e-19, 6.6812e-19,\n 7.9000e-17, 4.1341e-19, 2.8103e-18, 1.4598e-18, 2.3535e-20, 3.8864e-17,\n 7.6829e-20, 1.5389e-19, 3.7755e-17, 3.5229e-18, 2.6159e-17, 2.2993e-19,\n 6.3762e-18, 6.5095e-23, 3.1185e-19, 1.7924e-18, 4.0895e-18, 1.7267e-19,\n 4.7408e-20, 1.7084e-19, 2.5145e-17, 1.6486e-19, 1.1274e-16, 1.0892e-19,\n 4.3269e-20, 1.6301e-17, 6.1210e-20, 2.5707e-18, 2.9345e-23, 1.0681e-18,\n 1.9729e-19, 3.4811e-19, 2.8390e-18, 2.5562e-18, 1.0205e-18, 7.3348e-18,\n 5.5082e-19, 1.0813e-17, 1.2407e-17, 2.4529e-20, 1.2005e-18, 4.6951e-19,\n 5.9985e-18, 2.8918e-19, 4.2782e-19, 4.7966e-20], device='cuda:0')" + }, + "48": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-1.1006e-17, -6.5571e-17, -6.6691e-17, -2.2591e-17, -2.7360e-16,\n 1.5546e-19, -1.3787e-17, -8.9707e-19, -1.3123e-16, 6.8624e-18,\n -1.3762e-16, -4.1545e-16, 9.1020e-19, -3.6996e-16, -3.0788e-19,\n 5.8673e-17, -1.0758e-17, -1.3442e-17, -2.9677e-16, -1.9211e-17,\n 3.7364e-17, 1.1201e-16, -9.2777e-18, 6.0473e-17, 7.8659e-18,\n -1.5233e-17, -4.1832e-16, -4.7248e-17, -4.3454e-16, -9.5694e-20,\n 1.8714e-17, -1.3072e-18, 1.3704e-17, -6.8552e-17, -2.6175e-16,\n -7.3036e-17, 1.8150e-17, 2.2014e-17, -1.8173e-16, 1.3297e-18,\n 1.4838e-16, 3.7346e-20, 1.8556e-17, -1.1799e-17, 4.0035e-17,\n -1.0442e-16, -6.2679e-19, 5.0957e-17, -6.7430e-18, 4.5750e-18,\n -7.5742e-19, -2.6927e-18, -2.0122e-16, -9.1064e-18, -1.6672e-18,\n 1.3947e-17, -8.3081e-18, -8.2902e-17, -3.4231e-17, -5.9217e-18,\n 5.4691e-18, -1.3712e-16, -3.8769e-17, 4.5465e-17], device='cuda:0')", + "exp_avg_sq": "tensor([1.0107e-18, 2.1315e-18, 1.0643e-17, 5.6911e-20, 3.9134e-17, 1.0566e-18,\n 1.4861e-17, 3.8487e-19, 3.1527e-18, 5.1337e-19, 1.1421e-17, 3.5432e-17,\n 6.9049e-18, 4.5378e-17, 4.2099e-20, 1.1922e-19, 1.1352e-19, 2.4844e-18,\n 5.4859e-17, 5.1534e-18, 1.2296e-18, 4.8109e-19, 2.0280e-20, 1.7029e-17,\n 2.2536e-20, 3.2014e-20, 6.7948e-17, 1.5841e-17, 6.5171e-17, 4.1242e-19,\n 6.1565e-18, 2.0995e-21, 5.2088e-19, 2.2432e-18, 1.9162e-17, 1.7224e-18,\n 1.2711e-19, 1.1073e-19, 1.9730e-17, 2.4280e-19, 7.2690e-17, 2.5830e-20,\n 1.0090e-18, 1.1000e-17, 2.1039e-20, 2.8792e-18, 3.2523e-21, 2.7360e-19,\n 1.6551e-19, 1.9868e-19, 7.6529e-19, 5.4854e-19, 6.6227e-18, 2.7193e-17,\n 2.4130e-19, 7.7474e-18, 1.2423e-17, 8.9420e-19, 3.8069e-19, 4.9944e-18,\n 1.8551e-18, 2.3496e-18, 7.5980e-18, 1.0880e-20], device='cuda:0')" + }, + "49": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 7.6648e-18, 8.7514e-16, 5.5845e-16, -3.2123e-16, 2.8531e-15,\n -3.2034e-16, -2.8119e-16, -4.0022e-16, 1.6537e-15, -2.9179e-16,\n 3.2017e-15, 2.9161e-15, -3.5047e-16, 2.2267e-15, -3.8110e-16,\n -1.9485e-16, 1.5047e-16, -3.1449e-16, 3.5896e-15, -2.4706e-16,\n -1.3014e-16, 1.4401e-17, -1.5582e-16, 1.7129e-16, -3.5403e-16,\n 7.0106e-17, 2.7638e-15, -2.3994e-16, 1.7675e-15, -1.8957e-16,\n -2.3903e-16, -2.7195e-16, -2.6013e-16, 1.8898e-15, 1.0425e-15,\n 3.2163e-16, -2.1533e-16, -1.3587e-16, 3.0952e-15, -2.3572e-16,\n 1.1923e-16, -4.0662e-16, -3.3632e-16, 7.9761e-16, 2.5528e-17,\n 1.9778e-15, -2.2139e-16, -1.7127e-16, -1.8237e-16, -3.5350e-16,\n 2.5995e-16, -6.8583e-17, 2.3916e-15, -2.7366e-16, -2.7670e-16,\n -2.0203e-16, 1.7170e-16, 4.5595e-16, 3.6457e-16, -2.9827e-16,\n -1.0614e-16, 1.2214e-15, -1.4440e-16, 1.5658e-16],\n [-7.6596e-18, -8.7518e-16, -5.5852e-16, 3.2123e-16, -2.8531e-15,\n 3.2034e-16, 2.8120e-16, 4.0023e-16, -1.6538e-15, 2.9180e-16,\n -3.2018e-15, -2.9162e-15, 3.5048e-16, -2.2268e-15, 3.8111e-16,\n 1.9486e-16, -1.5048e-16, 3.1450e-16, -3.5897e-15, 2.4706e-16,\n 1.3015e-16, -1.4395e-17, 1.5583e-16, -1.7130e-16, 3.5403e-16,\n -7.0111e-17, -2.7639e-15, 2.3995e-16, -1.7676e-15, 1.8958e-16,\n 2.3903e-16, 2.7196e-16, 2.6013e-16, -1.8898e-15, -1.0425e-15,\n -3.2168e-16, 2.1534e-16, 1.3588e-16, -3.0954e-15, 2.3572e-16,\n -1.1923e-16, 4.0662e-16, 3.3633e-16, -7.9770e-16, -2.5525e-17,\n -1.9779e-15, 2.2140e-16, 1.7128e-16, 1.8237e-16, 3.5350e-16,\n -2.5995e-16, 6.8591e-17, -2.3917e-15, 2.7367e-16, 2.7671e-16,\n 2.0203e-16, -1.7178e-16, -4.5596e-16, -3.6457e-16, 2.9828e-16,\n 1.0614e-16, -1.2215e-15, 1.4441e-16, -1.5657e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.7282e-17, 1.4346e-15, 1.3928e-17, 4.8390e-17, 2.1196e-15, 2.6509e-16,\n 3.4420e-17, 4.5318e-18, 1.0875e-16, 4.0995e-17, 1.9123e-15, 1.1378e-16,\n 6.0767e-17, 5.2852e-16, 2.5065e-16, 4.4450e-17, 2.2951e-15, 1.3842e-17,\n 1.5832e-15, 3.7787e-17, 2.6814e-17, 4.0769e-17, 4.8557e-17, 1.6085e-15,\n 1.5666e-17, 2.6292e-15, 4.5539e-16, 3.4946e-17, 1.6387e-16, 6.9630e-17,\n 3.9499e-16, 3.4775e-17, 2.3592e-17, 8.4581e-16, 1.2078e-16, 1.5053e-17,\n 4.4655e-17, 4.4440e-17, 1.4862e-15, 2.0387e-16, 6.3749e-16, 1.1792e-17,\n 3.0143e-17, 2.1031e-15, 1.9090e-17, 9.6358e-16, 4.5616e-18, 1.1634e-17,\n 1.6598e-15, 2.4713e-17, 3.1175e-15, 3.2862e-15, 5.0719e-17, 1.2113e-17,\n 1.4405e-17, 3.2294e-16, 1.1030e-15, 1.8569e-17, 5.1629e-15, 1.0854e-17,\n 2.0789e-15, 2.2628e-17, 2.3628e-17, 4.1238e-18],\n [4.7282e-17, 1.4346e-15, 1.3928e-17, 4.8390e-17, 2.1196e-15, 2.6509e-16,\n 3.4420e-17, 4.5318e-18, 1.0875e-16, 4.0995e-17, 1.9123e-15, 1.1378e-16,\n 6.0767e-17, 5.2852e-16, 2.5065e-16, 4.4450e-17, 2.2951e-15, 1.3842e-17,\n 1.5832e-15, 3.7787e-17, 2.6814e-17, 4.0769e-17, 4.8557e-17, 1.6085e-15,\n 1.5666e-17, 2.6292e-15, 4.5539e-16, 3.4946e-17, 1.6387e-16, 6.9630e-17,\n 3.9499e-16, 3.4775e-17, 2.3592e-17, 8.4581e-16, 1.2078e-16, 1.5053e-17,\n 4.4655e-17, 4.4440e-17, 1.4862e-15, 2.0387e-16, 6.3749e-16, 1.1792e-17,\n 3.0143e-17, 2.1031e-15, 1.9090e-17, 9.6358e-16, 4.5616e-18, 1.1634e-17,\n 1.6598e-15, 2.4713e-17, 3.1175e-15, 3.2862e-15, 5.0719e-17, 1.2113e-17,\n 1.4405e-17, 3.2294e-16, 1.1030e-15, 1.8569e-17, 5.1629e-15, 1.0854e-17,\n 2.0789e-15, 2.2628e-17, 2.3628e-17, 4.1238e-18]], device='cuda:0')" + }, + "50": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 1.6395e-15, -1.6395e-15], device='cuda:0')", + "exp_avg_sq": "tensor([1.9604e-15, 1.9604e-15], device='cuda:0')" + }, + "51": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.5741e-15, 5.7734e-14, 3.5312e-16, ..., -2.2861e-15,\n -2.0857e-15, -2.0914e-15],\n [-2.0138e-15, 5.9671e-15, -1.3863e-15, ..., 3.2455e-15,\n -1.8973e-15, 1.5982e-15],\n [ 6.9635e-16, -6.7150e-16, -1.7685e-16, ..., 1.4295e-16,\n 1.7870e-15, -5.1302e-16],\n ...,\n [ 1.6774e-15, 1.2923e-14, 1.7087e-15, ..., 5.3801e-16,\n 1.0783e-15, -1.1220e-15],\n [ 1.5646e-17, 2.3805e-14, 9.9662e-16, ..., -1.8503e-15,\n -7.7949e-17, 9.9791e-16],\n [-2.8013e-16, -4.0093e-14, 3.0509e-15, ..., -2.0391e-15,\n 9.2896e-15, 2.7685e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.7732e-18, 1.6325e-18, 9.6642e-18, ..., 1.1930e-18, 1.3141e-18,\n 4.4986e-18],\n [1.3793e-16, 1.8801e-17, 1.6896e-16, ..., 1.7570e-17, 1.9799e-17,\n 4.9866e-17],\n [6.9563e-19, 1.3568e-19, 9.3720e-19, ..., 6.1138e-20, 1.0552e-19,\n 3.6355e-19],\n ...,\n [4.3226e-17, 6.9236e-18, 5.6153e-17, ..., 8.7352e-18, 5.7281e-18,\n 1.5612e-17],\n [1.4639e-19, 3.2325e-20, 1.2694e-19, ..., 7.3542e-20, 6.7138e-20,\n 7.0153e-20],\n [4.5682e-16, 9.5555e-17, 5.7733e-16, ..., 8.1402e-17, 6.8702e-17,\n 1.9723e-16]], device='cuda:0')" + }, + "52": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 1.7244e-12, -9.7171e-14, 5.6454e-14, -5.6118e-13, 7.2431e-14,\n 3.0940e-13, 4.8484e-12, 6.1218e-13, 2.8811e-15, -2.6485e-13,\n -1.7376e-12, 6.0587e-13, -8.8285e-13, 1.8182e-13, -2.8642e-13,\n 1.8016e-13, -3.2217e-13, -1.0768e-12, -1.2284e-12, -2.4404e-12,\n 6.2550e-13, -3.2529e-13, -3.1505e-13, 2.4253e-13, 4.8161e-13,\n 5.3273e-13, -5.5239e-13, 1.6863e-12, 1.2599e-12, 7.5652e-13,\n -9.2783e-13, -4.9087e-14, -4.6688e-13, 1.0141e-12, 3.1796e-13,\n -2.8190e-13, -2.9889e-13, -1.5502e-12, -9.6949e-13, -3.9961e-13,\n -2.0562e-12, -1.1905e-12, -1.1406e-12, 4.9948e-13, -2.4956e-13,\n -1.3623e-12, 2.3899e-13, 7.3866e-13, 1.4783e-12, 1.5401e-13,\n 1.3299e-12, 1.2413e-12, 4.1352e-13, -2.1664e-13, 1.1600e-14,\n 4.9490e-14, 1.7071e-13, 4.8785e-13, 4.0048e-13, 4.0315e-14,\n -1.8374e-12, 2.9952e-13, 7.1123e-13, -6.8872e-13], device='cuda:0')", + "exp_avg_sq": "tensor([2.7101e-14, 3.8319e-13, 2.2642e-15, 8.2311e-13, 1.8303e-14, 3.3778e-13,\n 3.4808e-13, 2.6820e-13, 2.3511e-15, 1.7622e-14, 1.6065e-14, 7.7775e-14,\n 1.1184e-12, 1.0857e-13, 3.9367e-14, 1.0229e-13, 3.0583e-15, 3.0941e-12,\n 8.8696e-13, 2.6433e-13, 6.0005e-16, 2.8523e-13, 3.0639e-12, 1.6017e-14,\n 1.4897e-14, 3.0085e-15, 6.5752e-14, 5.5637e-15, 4.4948e-15, 1.5688e-14,\n 3.1077e-15, 4.8386e-13, 9.6273e-14, 6.9596e-15, 9.3568e-15, 4.1695e-12,\n 1.4139e-14, 1.6159e-14, 3.2467e-12, 1.0068e-13, 6.2127e-15, 7.9681e-13,\n 5.9340e-13, 1.3718e-13, 5.0938e-13, 2.9643e-13, 3.4506e-13, 5.5901e-15,\n 1.8194e-15, 1.9934e-13, 2.3337e-14, 1.1713e-13, 3.3334e-15, 1.3031e-13,\n 1.5671e-13, 8.6441e-14, 7.1386e-16, 1.9905e-15, 2.5387e-14, 4.1053e-14,\n 2.8593e-12, 1.2895e-13, 3.4196e-16, 1.4666e-12], device='cuda:0')" + }, + "53": { + "step": "tensor(11268.)", + "exp_avg": "tensor([-4.4825e-15, 4.1343e-17, 3.2183e-16, -3.2131e-13, 1.4348e-15,\n -8.5046e-14, -9.6207e-14, -1.6698e-13, -3.8691e-13, -8.1344e-16,\n -7.7759e-13, -5.3789e-15, 1.2430e-14, -1.8507e-15, 1.4621e-15,\n 3.3880e-16, -6.3528e-16, -5.9874e-13, -2.3260e-13, -6.1513e-13,\n -3.9859e-15, -3.5325e-13, 3.4725e-15, 9.7014e-16, -2.0204e-13,\n 2.4671e-15, -2.3115e-15, -1.2732e-14, -3.7042e-15, -5.3462e-15,\n 1.6517e-14, -7.2001e-17, 3.1912e-15, -2.1662e-15, -7.5275e-16,\n 8.4300e-15, -1.9471e-13, -4.5505e-13, -6.6124e-13, 4.5574e-16,\n -3.2139e-13, 3.1228e-15, 8.6964e-15, -2.5331e-14, -1.5081e-13,\n 2.4413e-14, -3.3082e-14, -7.2171e-14, -7.5825e-15, -6.1233e-16,\n -5.4074e-15, -2.4992e-15, 3.8512e-15, 3.7426e-15, -2.3413e-13,\n 3.0871e-16, -4.1078e-16, -1.9681e-15, 1.1362e-15, -2.0750e-17,\n -6.8316e-13, -7.7385e-15, -5.4029e-15, 1.0539e-14], device='cuda:0')", + "exp_avg_sq": "tensor([2.0389e-18, 1.2058e-16, 5.3263e-19, 8.3462e-16, 1.1027e-17, 7.1080e-16,\n 2.6974e-16, 6.5321e-16, 3.9020e-17, 3.5669e-18, 8.0539e-17, 6.0629e-16,\n 1.5704e-16, 4.1217e-18, 1.1284e-18, 4.5628e-18, 9.3555e-19, 4.3554e-15,\n 7.8882e-16, 2.3036e-16, 1.7494e-19, 1.4732e-16, 1.2406e-15, 8.7388e-19,\n 1.2635e-17, 2.1416e-18, 1.0216e-17, 2.9383e-18, 3.7706e-21, 3.4096e-20,\n 1.2939e-18, 2.3104e-16, 4.1798e-18, 2.4255e-17, 1.1106e-19, 1.0539e-15,\n 3.7589e-17, 6.9623e-17, 4.4624e-15, 7.8724e-18, 4.0122e-17, 3.1852e-16,\n 1.6783e-16, 6.0452e-16, 6.9559e-16, 2.2178e-18, 1.0818e-15, 2.5172e-17,\n 2.2610e-19, 2.4606e-17, 1.3481e-18, 1.9733e-18, 4.7578e-18, 2.3762e-18,\n 1.1319e-16, 2.4544e-19, 2.4767e-20, 1.3022e-19, 1.2731e-18, 1.2334e-18,\n 3.1934e-15, 4.2262e-16, 8.4196e-20, 5.4694e-16], device='cuda:0')" + }, + "54": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 2.9654e-14, 1.5316e-16, -6.4813e-16, -2.4308e-13, -5.9581e-14,\n -1.3060e-13, -5.7466e-14, -1.6084e-13, -2.3665e-13, 8.6436e-16,\n -3.7497e-13, -5.4124e-14, -1.0376e-14, 3.4525e-15, -9.6965e-16,\n -7.1680e-16, 5.8280e-16, -3.2927e-13, -2.3759e-13, -3.4916e-13,\n 1.2701e-14, -2.3580e-13, -4.1998e-15, -1.8414e-15, -1.6752e-13,\n -6.7330e-15, 2.0263e-15, 4.4080e-14, 1.6654e-14, 1.3074e-14,\n -1.5069e-14, 6.6418e-16, -3.1045e-15, -2.4671e-14, 2.4472e-15,\n -4.5669e-15, -1.9738e-13, -3.1076e-13, -3.3337e-13, -3.8030e-16,\n -2.9494e-13, -2.0060e-15, -6.6135e-15, -8.4952e-14, -1.7816e-13,\n -2.6568e-14, -9.5762e-14, -1.0536e-13, 3.1903e-14, 1.3174e-15,\n 2.1194e-14, 6.7117e-15, -9.2971e-15, -4.0493e-15, -1.9618e-13,\n -6.4950e-16, 7.8614e-16, 6.4695e-15, -3.3995e-15, 1.2059e-16,\n -3.6021e-13, -6.5528e-14, 1.1323e-14, -6.1992e-15], device='cuda:0')", + "exp_avg_sq": "tensor([1.4138e-18, 1.2622e-16, 6.7591e-19, 1.1194e-15, 6.6250e-17, 6.1401e-16,\n 5.4082e-16, 5.3050e-16, 3.1375e-17, 5.9213e-18, 3.1878e-17, 2.7522e-16,\n 7.0287e-16, 5.1012e-17, 1.1080e-17, 3.1140e-18, 9.8348e-19, 3.4767e-15,\n 1.1846e-15, 4.3389e-16, 5.7927e-19, 4.5060e-16, 1.9641e-15, 2.2611e-18,\n 4.6071e-17, 1.7022e-18, 5.3382e-18, 4.3812e-18, 3.2781e-19, 7.4912e-18,\n 4.1950e-18, 1.4877e-16, 4.2543e-17, 1.6023e-17, 7.0016e-20, 2.8718e-15,\n 1.8338e-17, 2.9062e-17, 3.6090e-15, 1.8057e-17, 2.5127e-17, 3.7880e-16,\n 2.8124e-16, 3.7083e-16, 7.9488e-16, 2.1360e-16, 6.6165e-16, 1.4049e-17,\n 1.0409e-18, 6.8822e-17, 1.2300e-18, 7.0141e-17, 4.0248e-18, 1.0132e-16,\n 3.0295e-16, 2.2390e-19, 2.0013e-18, 7.8002e-19, 2.1026e-18, 5.5255e-18,\n 3.2120e-15, 3.3289e-16, 4.4717e-19, 8.4652e-16], device='cuda:0')" + }, + "55": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[-1.1464e-13, -2.2972e-13, -2.3819e-13, 1.7995e-12, 9.0416e-16,\n 6.5669e-13, 3.4991e-13, 1.3764e-12, 2.3756e-12, -2.2814e-13,\n 3.1967e-12, 8.7027e-14, -2.3551e-13, -2.2488e-13, -2.1411e-13,\n -2.3381e-13, -2.2639e-13, 2.4651e-12, 1.2187e-12, 2.1984e-12,\n -2.1336e-13, 2.0691e-12, -2.3885e-13, -2.4861e-13, 1.7788e-12,\n -1.7352e-13, -2.2728e-13, -1.8463e-13, -1.1777e-13, -2.2706e-13,\n -2.3007e-13, -2.2767e-13, -1.9444e-13, 2.7669e-14, -2.1684e-13,\n -2.2146e-13, 1.2990e-12, 2.1052e-12, 2.7404e-12, -2.2256e-13,\n 1.3546e-12, -5.6898e-14, -1.8862e-13, 2.8291e-13, 9.8870e-13,\n -1.8112e-13, 2.8607e-13, 6.9400e-13, -1.7801e-13, -2.3405e-13,\n -1.5992e-13, -6.1960e-14, -1.6953e-13, -2.3264e-13, 1.7263e-12,\n -2.4956e-13, -2.2678e-13, -4.2590e-14, -1.9871e-13, -7.0712e-14,\n 2.8309e-12, 9.5673e-14, -2.0413e-13, -1.8995e-13],\n [ 1.1422e-13, 2.2869e-13, 2.3698e-13, -1.7900e-12, -1.1088e-15,\n -6.5317e-13, -3.5121e-13, -1.3682e-12, -2.3641e-12, 2.2697e-13,\n -3.1813e-12, -8.6434e-14, 2.3466e-13, 2.2362e-13, 2.1314e-13,\n 2.3223e-13, 2.2533e-13, -2.4498e-12, -1.2138e-12, -2.1844e-12,\n 2.1208e-13, -2.0576e-12, 2.3765e-13, 2.4739e-13, -1.7676e-12,\n 1.7239e-13, 2.2614e-13, 1.8391e-13, 1.1700e-13, 2.2604e-13,\n 2.2884e-13, 2.2680e-13, 1.9348e-13, -2.6886e-14, 2.1559e-13,\n 2.2024e-13, -1.2935e-12, -2.0956e-12, -2.7250e-12, 2.2135e-13,\n -1.3489e-12, 5.6910e-14, 1.8758e-13, -2.8087e-13, -9.8333e-13,\n 1.7988e-13, -2.8376e-13, -6.8925e-13, 1.7750e-13, 2.3306e-13,\n 1.5915e-13, 6.1911e-14, 1.6832e-13, 2.3142e-13, -1.7181e-12,\n 2.4837e-13, 2.2568e-13, 4.2516e-14, 1.9754e-13, 7.0713e-14,\n -2.8175e-12, -9.5801e-14, 2.0300e-13, 1.8911e-13]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.9245e-16, 5.0331e-14, 1.7249e-15, 2.7985e-14, 1.3081e-15, 6.2136e-14,\n 1.1794e-15, 6.5179e-14, 1.6540e-15, 2.9808e-14, 4.0846e-16, 1.6504e-13,\n 5.5366e-15, 1.1284e-15, 1.2271e-15, 7.7769e-14, 1.6215e-15, 6.4341e-14,\n 2.3665e-14, 8.7421e-16, 1.9506e-15, 4.1629e-15, 2.8957e-14, 1.7038e-14,\n 1.3252e-15, 1.6881e-15, 1.5694e-13, 1.8433e-15, 3.2351e-16, 7.8289e-16,\n 1.8522e-15, 9.6448e-14, 1.2510e-15, 9.2681e-16, 1.4616e-15, 1.3196e-14,\n 3.9015e-16, 4.9020e-16, 6.3694e-14, 1.5703e-14, 1.6273e-15, 4.2480e-14,\n 2.5488e-14, 1.0194e-13, 3.6821e-14, 5.9605e-16, 9.8108e-14, 8.7333e-16,\n 1.8622e-15, 1.1964e-14, 1.0488e-15, 4.4408e-16, 1.5253e-15, 1.2463e-15,\n 6.9624e-15, 1.1694e-13, 8.4310e-16, 1.8314e-15, 3.3345e-14, 4.6623e-15,\n 4.4008e-14, 7.3539e-14, 1.9751e-15, 2.9038e-14],\n [9.9245e-16, 5.0331e-14, 1.7249e-15, 2.7985e-14, 1.3082e-15, 6.2136e-14,\n 1.1794e-15, 6.5179e-14, 1.6541e-15, 2.9808e-14, 4.0848e-16, 1.6504e-13,\n 5.5366e-15, 1.1284e-15, 1.2271e-15, 7.7769e-14, 1.6215e-15, 6.4341e-14,\n 2.3665e-14, 8.7427e-16, 1.9506e-15, 4.1629e-15, 2.8957e-14, 1.7038e-14,\n 1.3252e-15, 1.6881e-15, 1.5694e-13, 1.8433e-15, 3.2351e-16, 7.8289e-16,\n 1.8522e-15, 9.6448e-14, 1.2510e-15, 9.2681e-16, 1.4616e-15, 1.3196e-14,\n 3.9016e-16, 4.9020e-16, 6.3694e-14, 1.5703e-14, 1.6273e-15, 4.2480e-14,\n 2.5488e-14, 1.0194e-13, 3.6821e-14, 5.9605e-16, 9.8108e-14, 8.7333e-16,\n 1.8622e-15, 1.1964e-14, 1.0488e-15, 4.4408e-16, 1.5253e-15, 1.2463e-15,\n 6.9624e-15, 1.1694e-13, 8.4310e-16, 1.8314e-15, 3.3345e-14, 4.6623e-15,\n 4.4008e-14, 7.3539e-14, 1.9751e-15, 2.9038e-14]], device='cuda:0')" + }, + "56": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 1.2975e-12, -1.2913e-12], device='cuda:0')", + "exp_avg_sq": "tensor([8.6638e-14, 8.6638e-14], device='cuda:0')" + }, + "57": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.4365e-13, 4.8438e-11, 1.7095e-10, 2.9320e-04, -5.1731e-16,\n -2.0761e-13, -6.3461e-13, -2.7386e-08],\n [-1.3451e-16, -2.7026e-14, -9.2854e-14, -1.1903e-07, 3.2723e-19,\n 1.3037e-16, 3.8849e-16, 1.6905e-11],\n [ 8.0859e-17, 1.0265e-14, 2.4268e-15, 9.6852e-09, 2.5438e-19,\n 9.4599e-17, 1.9824e-16, 9.6609e-12],\n [ 1.5060e-17, 2.0755e-15, 1.9124e-15, 5.2436e-09, 8.6941e-20,\n 3.2322e-17, 6.7195e-17, 3.2924e-12],\n [ 8.1218e-18, 1.9077e-15, 8.7271e-15, 9.5388e-09, -3.5479e-19,\n -1.3354e-16, -2.9761e-16, -1.4236e-11],\n [-2.9519e-17, -5.8588e-15, -2.0645e-14, -3.5700e-08, 1.1489e-19,\n 4.4587e-17, 1.1748e-16, 5.2900e-12],\n [-2.5979e-13, -5.1554e-11, -1.8561e-10, -3.5543e-04, 5.3607e-16,\n 2.1538e-13, 6.6251e-13, 2.8523e-08],\n [-6.5850e-14, -1.3051e-11, -4.7735e-11, -9.9074e-05, 1.3270e-16,\n 5.3367e-14, 1.6498e-13, 7.0912e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.7224e-13, 9.3443e-13, 7.6396e-16, 2.4641e-06, 1.3131e-15, 2.6373e-14,\n 1.3969e-14, 1.2397e-09],\n [4.0663e-12, 8.4804e-12, 3.8556e-15, 1.8682e-12, 7.5258e-15, 3.8273e-13,\n 4.1261e-13, 2.4663e-10],\n [2.3078e-12, 1.6026e-12, 1.5836e-15, 1.6479e-12, 2.6095e-15, 4.5152e-14,\n 2.0793e-14, 3.0854e-10],\n [3.5853e-13, 5.3761e-13, 9.6896e-17, 7.2557e-14, 5.4989e-16, 2.2697e-14,\n 2.3006e-14, 1.2130e-10],\n [2.5531e-12, 2.3305e-12, 1.5855e-15, 6.1927e-13, 1.7311e-15, 1.0540e-13,\n 2.1944e-13, 5.0626e-11],\n [2.0618e-13, 6.5133e-14, 3.4251e-16, 9.0059e-13, 1.4209e-16, 1.8335e-15,\n 2.0857e-15, 2.4437e-11],\n [1.6785e-11, 2.1701e-11, 8.5161e-15, 2.7048e-06, 2.1113e-14, 1.0089e-12,\n 1.0880e-12, 1.1521e-10],\n [3.6333e-12, 2.0976e-12, 8.1756e-16, 1.6899e-07, 3.1449e-15, 7.2506e-14,\n 6.1273e-14, 2.8816e-11]], device='cuda:0')" + }, + "58": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 2.9309e-04, -1.1902e-07, 9.6962e-09, 5.2461e-09, 9.5265e-09,\n -3.5685e-08, -3.5541e-04, -9.9053e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.4654e-06, 2.8798e-10, 3.2391e-10, 1.2578e-10, 7.1879e-11, 2.6444e-11,\n 2.7051e-06, 1.6904e-07], device='cuda:0')" + }, + "59": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 9.9391e-06, -1.3314e-06, -1.3324e-06, -1.3324e-06, -1.3324e-06,\n -1.3324e-06, 1.5682e-05, 1.4587e-05],\n [ 7.9350e-06, -1.0629e-06, -1.0637e-06, -1.0637e-06, -1.0637e-06,\n -1.0637e-06, 1.2520e-05, 1.1646e-05],\n [-1.0054e-03, 1.3467e-04, 1.3477e-04, 1.3477e-04, 1.3477e-04,\n 1.3477e-04, -1.5864e-03, -1.4756e-03],\n [ 9.8765e-04, -1.3230e-04, -1.3239e-04, -1.3239e-04, -1.3239e-04,\n -1.3239e-04, 1.5584e-03, 1.4496e-03]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.9471e-10, 1.2513e-10, 4.1307e-11, 6.9818e-11, 4.2213e-11, 1.5317e-10,\n 7.0240e-10, 5.1677e-10],\n [1.8214e-10, 1.3096e-11, 5.9053e-12, 8.6571e-12, 5.5764e-12, 1.3488e-11,\n 3.9572e-10, 3.0024e-10],\n [2.4712e-05, 4.4482e-07, 4.4549e-07, 4.4550e-07, 4.4550e-07, 4.4551e-07,\n 6.1490e-05, 5.2917e-05],\n [2.4713e-05, 4.4491e-07, 4.4552e-07, 4.4557e-07, 4.4551e-07, 4.4565e-07,\n 6.1492e-05, 5.2918e-05]], device='cuda:0')" + }, + "60": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 7.8410e-06, 6.2601e-06, -7.9294e-04, 7.7884e-04], device='cuda:0')", + "exp_avg_sq": "tensor([8.7579e-09, 7.2877e-10, 1.5431e-05, 1.5438e-05], device='cuda:0')" + }, + "61": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.1137e-09, -1.6311e-10, 5.0911e-10, ..., 2.2412e-09,\n -4.5010e-11, -8.1892e-11],\n [-1.4820e-09, -2.0098e-11, -3.2620e-10, ..., -1.1697e-09,\n 1.1870e-11, -1.1650e-12],\n [ 6.3234e-09, -7.1289e-10, 9.4110e-10, ..., 5.8062e-09,\n -2.2166e-10, -2.5973e-10],\n ...,\n [ 5.9795e-09, -5.5121e-10, 1.2820e-09, ..., 6.3391e-09,\n -1.5152e-10, -2.4684e-10],\n [-2.3752e-09, 8.1985e-11, -5.5048e-10, ..., -2.1868e-09,\n 3.1666e-11, 4.0959e-11],\n [ 6.3168e-09, -6.0671e-10, 1.3145e-09, ..., 6.5497e-09,\n -1.6851e-10, -2.6406e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4148e-16, 9.1555e-17, 2.0039e-16, ..., 9.7762e-17, 3.8181e-17,\n 7.1182e-17],\n [6.7477e-17, 8.3449e-17, 6.8172e-17, ..., 6.6767e-17, 3.2296e-17,\n 3.9505e-17],\n [1.9567e-16, 2.4613e-16, 1.3798e-16, ..., 1.3001e-16, 9.9448e-17,\n 9.0551e-17],\n ...,\n [4.0246e-16, 4.4616e-16, 3.4824e-16, ..., 3.4631e-16, 1.8054e-16,\n 1.9474e-16],\n [5.3403e-16, 2.1298e-16, 3.7789e-16, ..., 2.7586e-16, 7.8349e-17,\n 1.2698e-16],\n [3.6727e-16, 4.2108e-16, 2.8610e-16, ..., 3.2064e-16, 1.6553e-16,\n 1.6291e-16]], device='cuda:0')" + }, + "62": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 5.7547e-08, -2.8581e-08, 8.1831e-08, -3.0462e-08, 9.7263e-08,\n -2.8625e-08, 1.5865e-07, 2.2095e-09, 1.6837e-07, -3.9768e-08,\n 9.4284e-08, 1.2064e-07, -1.9768e-08, 8.1500e-08, 6.8490e-08,\n -2.6336e-08, -1.5187e-08, 1.2758e-07, 1.6109e-07, 8.8651e-08,\n 1.5425e-07, 1.9714e-07, -3.3400e-08, 1.3270e-07, 1.3039e-07,\n 1.6690e-07, 2.7881e-08, -3.2370e-08, -2.6446e-08, -2.9275e-08,\n -3.5811e-09, -1.1416e-08, 6.9585e-08, -3.0082e-08, -2.9037e-08,\n -2.4619e-09, 7.7886e-08, 9.4069e-08, -3.5257e-08, -3.1595e-08,\n -3.8760e-08, 9.3159e-09, -1.5087e-08, 1.7031e-07, -4.3735e-08,\n 1.9856e-08, -2.6503e-08, -2.5676e-08, -1.4269e-08, -3.8673e-08,\n 1.3746e-07, 9.2764e-08, -1.6558e-08, -4.9004e-08, -1.7077e-08,\n -2.2540e-08, 1.0944e-07, -1.3581e-08, 9.6197e-08, 5.2775e-08,\n 3.1284e-08, 1.1119e-07, -4.2888e-08, 1.3734e-07, -2.9463e-08,\n 1.0088e-07, -2.4961e-08, 9.5664e-08, 1.9722e-08, -2.5648e-08,\n -3.9399e-08, -3.7327e-08, 1.6004e-07, -3.4668e-08, 1.0633e-07,\n 7.9898e-08, 5.3984e-08, 1.3627e-07, -4.8435e-08, 1.2735e-07,\n -4.3618e-08, 1.8150e-07, -4.3233e-10, -4.1812e-08, 1.1347e-07,\n -4.8506e-09, -3.5792e-08, 1.5334e-07, -3.6207e-08, 4.1559e-08,\n -3.4471e-08, 1.7839e-07, -4.5479e-08, 1.5289e-07, 2.6533e-08,\n -4.9806e-08, -1.3814e-08, -5.5011e-08, -3.3482e-08, 1.7964e-07,\n 3.4333e-08, 1.5339e-07, 1.5266e-07, 1.6836e-07, -4.3678e-08,\n -4.4278e-08, -3.6847e-08, -2.3542e-08, -1.2725e-08, -3.3481e-08,\n -3.6027e-08, 6.8142e-08, 6.3777e-08, -3.4244e-08, 6.4273e-08,\n 1.4114e-07, 1.7741e-08, -3.0439e-08, 1.8479e-07, -3.7921e-08,\n 6.4084e-08, -3.0580e-08, -3.4831e-08, 1.3666e-07, 1.5344e-07,\n -4.4628e-08, -4.3020e-08, -4.2978e-08, 8.3282e-09, 1.4518e-07,\n 7.6916e-08, 1.5639e-07, 7.1106e-08, -7.6092e-09, 1.4319e-07,\n 1.8680e-07, -2.2594e-08, 1.0195e-07, -1.8824e-08, -3.3169e-08,\n -3.5010e-08, -5.2765e-08, 2.0352e-08, 5.5549e-08, 1.4914e-07,\n 1.6983e-08, 6.8218e-08, -2.9977e-08, 8.3716e-08, -2.2831e-08,\n -4.0870e-08, 4.0317e-08, 3.8099e-08, 7.4008e-08, 2.9964e-08,\n -3.0809e-08, 5.1723e-08, 2.9469e-08, 1.6004e-07, -3.9937e-08,\n 6.8086e-08, -2.6513e-08, 1.8106e-07, -2.1772e-08, -3.9272e-08,\n -4.0841e-08, -4.0437e-08, -2.3286e-08, -1.5199e-08, 1.5589e-07,\n -3.2857e-08, -3.3346e-08, -4.8489e-08, -2.1205e-08, -2.3284e-08,\n -3.2898e-08, -5.0642e-08, -2.2152e-08, 1.5959e-07, 1.3538e-07,\n -3.1464e-08, -2.0471e-08, 1.4668e-07, 4.0227e-08, -3.1715e-08,\n -5.3855e-08, 1.5409e-07, -5.5710e-08, 9.9153e-08, 1.4721e-07,\n -6.1406e-08, 1.4879e-07], device='cuda:0')", + "exp_avg_sq": "tensor([1.0266e-12, 1.6176e-12, 1.7759e-12, 1.3376e-12, 5.0796e-12, 1.3650e-12,\n 1.5113e-11, 1.4509e-12, 7.7673e-12, 1.1092e-12, 9.9674e-13, 2.2002e-12,\n 1.5375e-12, 7.0270e-13, 3.4760e-12, 1.3381e-12, 1.5274e-12, 1.2891e-12,\n 5.2686e-12, 9.3233e-13, 1.0360e-12, 1.1634e-11, 1.4746e-12, 3.3170e-12,\n 1.2486e-11, 5.6898e-12, 1.3912e-12, 7.7585e-13, 8.6114e-13, 4.7617e-13,\n 3.4657e-12, 1.0819e-12, 5.6374e-13, 4.3710e-12, 3.3098e-12, 1.7451e-12,\n 4.0554e-12, 2.0563e-12, 1.4894e-12, 6.4803e-13, 1.6255e-12, 3.5841e-13,\n 2.5962e-12, 8.6061e-12, 1.6838e-12, 1.4355e-12, 1.0967e-12, 1.0408e-12,\n 2.1079e-12, 2.0570e-12, 2.7933e-12, 9.3239e-13, 5.1485e-12, 1.4736e-12,\n 1.3552e-12, 2.5222e-12, 9.3246e-12, 1.8216e-12, 5.4830e-12, 9.7847e-13,\n 8.1896e-13, 1.6900e-12, 2.5806e-12, 5.8367e-12, 1.5220e-12, 1.1056e-12,\n 8.0975e-13, 6.5902e-12, 2.1373e-12, 1.0102e-12, 4.8963e-12, 5.3951e-12,\n 1.2851e-11, 1.8829e-12, 1.0210e-12, 3.0945e-12, 6.5641e-13, 2.5082e-12,\n 1.5628e-12, 2.4394e-12, 3.9900e-12, 2.1969e-11, 4.2877e-13, 2.3025e-12,\n 3.6502e-12, 1.5648e-12, 5.3760e-13, 4.8777e-12, 1.1895e-12, 6.0391e-12,\n 1.1888e-12, 1.2836e-11, 3.0630e-12, 7.8168e-12, 1.1896e-12, 1.5188e-12,\n 1.1184e-12, 3.6396e-12, 7.8092e-13, 7.0154e-12, 7.3820e-13, 8.3888e-12,\n 4.5137e-12, 1.0689e-11, 2.1030e-12, 1.3701e-12, 1.6903e-12, 5.5402e-13,\n 5.6398e-12, 5.6950e-13, 5.5172e-12, 2.2360e-12, 1.2824e-12, 1.9372e-12,\n 2.0762e-12, 2.8394e-12, 2.6029e-12, 5.0374e-13, 2.0206e-11, 2.6468e-12,\n 2.8569e-12, 2.9813e-12, 7.2909e-12, 2.2534e-12, 7.2415e-12, 1.0085e-12,\n 1.0370e-12, 1.3295e-12, 2.3698e-12, 5.5585e-12, 8.4256e-13, 5.4068e-12,\n 1.1377e-12, 4.1026e-13, 6.3611e-12, 1.2432e-11, 3.8544e-13, 7.3126e-12,\n 2.3785e-12, 1.3153e-12, 1.0395e-12, 2.0989e-12, 1.8803e-12, 3.3852e-12,\n 1.6006e-11, 1.1896e-12, 1.5893e-12, 1.0484e-12, 1.6398e-12, 3.5202e-13,\n 3.5494e-12, 2.6065e-12, 1.4230e-12, 3.3749e-12, 1.1141e-12, 5.6433e-13,\n 1.6234e-12, 2.5682e-12, 4.4544e-12, 5.1635e-12, 5.2484e-13, 3.0431e-12,\n 9.4405e-12, 4.1432e-12, 1.6261e-12, 1.5809e-12, 2.1491e-12, 2.3521e-12,\n 9.6402e-13, 4.2744e-12, 6.5539e-13, 4.1418e-12, 3.3642e-12, 6.3230e-13,\n 2.5919e-12, 4.6617e-12, 2.3478e-12, 1.0795e-12, 9.7083e-12, 3.5577e-12,\n 2.4359e-12, 1.1413e-12, 6.8434e-12, 1.9965e-12, 2.5862e-12, 1.5203e-12,\n 3.1243e-12, 1.7917e-12, 1.7800e-12, 5.6778e-12, 2.5415e-12, 4.7800e-12],\n device='cuda:0')" + }, + "63": { + "step": "tensor(11268.)", + "exp_avg": "tensor([[ 2.4231e-08, -3.1387e-08, 1.2465e-07, -3.2446e-08, 9.6253e-08,\n -3.2949e-08, 9.1772e-08, -1.1641e-08, 1.0091e-07, -2.7690e-08,\n 6.9391e-08, 7.1803e-08, -3.1076e-08, 6.5194e-08, 6.0426e-08,\n -2.9670e-08, -2.2828e-08, 1.4707e-07, 7.8231e-08, 5.7612e-08,\n 1.5415e-07, 7.8713e-08, -3.2334e-08, 9.1975e-08, 9.5182e-08,\n 6.8169e-08, 4.6512e-08, -2.9584e-08, -3.0999e-08, -2.6112e-08,\n -1.7473e-08, -1.9551e-08, 5.5562e-08, -2.6193e-08, -3.1636e-08,\n 2.6950e-08, 8.0902e-08, 3.6389e-08, -2.9976e-08, -2.6373e-08,\n -3.1736e-08, 3.1283e-08, -2.3410e-08, 9.6042e-08, -2.8617e-08,\n -4.1340e-09, -3.3761e-08, -3.2010e-08, -1.8856e-08, -3.1199e-08,\n 7.9867e-08, 7.8962e-08, 1.7001e-08, -2.3875e-08, -2.7902e-08,\n -2.2742e-08, 7.5119e-08, -2.0470e-08, 8.9182e-08, 1.2621e-08,\n 1.2680e-08, 5.9261e-08, 2.2159e-09, 1.1285e-07, -3.2518e-08,\n 1.0384e-07, -3.2193e-08, 8.1027e-08, -7.0662e-09, -3.1329e-08,\n -3.0619e-08, -2.9841e-08, 9.1420e-08, -3.3313e-08, 7.6972e-08,\n 9.0534e-08, 3.2386e-08, 7.6466e-08, -1.1926e-08, 7.1488e-08,\n -3.1222e-08, 9.3493e-08, -1.2791e-08, -2.8153e-08, 1.0025e-07,\n 2.0236e-08, -1.7026e-08, 8.5527e-08, -2.9004e-08, 4.9633e-08,\n -2.5953e-08, 1.0115e-07, -3.0495e-08, 1.0693e-07, -2.7155e-09,\n -9.6064e-09, -3.0971e-08, -2.6670e-08, -2.5908e-08, 7.1505e-08,\n 4.8004e-08, 1.0255e-07, 9.4113e-08, 1.0244e-07, -3.0076e-08,\n -8.8889e-09, -3.1956e-08, -3.2617e-08, -1.6035e-08, -1.6981e-08,\n -2.9237e-08, 1.6412e-08, 8.6723e-08, -3.1597e-08, 8.1792e-09,\n 1.1883e-07, -4.2347e-09, -2.4764e-08, 8.2402e-08, -3.1639e-08,\n 6.5265e-08, -3.0696e-08, -2.3299e-08, 1.1843e-07, 9.8621e-08,\n -2.2154e-08, -2.4552e-08, -2.2936e-08, -1.1162e-08, 1.0434e-07,\n 4.0748e-08, 8.3607e-08, 3.6055e-08, -2.0575e-08, 1.0754e-07,\n 9.0397e-08, -3.4176e-09, 8.6990e-08, -2.6495e-08, -2.8560e-08,\n -2.7108e-08, -5.7135e-09, -5.1742e-09, 4.2803e-09, 8.8365e-08,\n -7.4230e-09, 9.5745e-08, -3.1171e-08, 2.7769e-08, -2.9134e-08,\n -3.1558e-08, 2.1229e-09, 4.5708e-09, 7.3803e-08, 5.1743e-08,\n -2.6946e-08, 6.7626e-08, 6.8085e-10, 5.8943e-08, -3.0516e-08,\n 4.7971e-08, -2.8491e-08, 8.6213e-08, -1.8234e-08, -3.1330e-08,\n -2.9828e-08, -3.1195e-08, -2.4260e-08, -3.0714e-08, 8.1555e-08,\n -2.3563e-08, -2.9525e-08, -2.9106e-08, -3.2690e-08, -3.0505e-08,\n -3.1010e-08, -2.6969e-08, 5.4741e-09, 1.0120e-07, 1.0650e-07,\n -3.2269e-08, -3.1092e-08, 1.1066e-07, 4.7689e-08, 1.3366e-08,\n -2.1744e-08, 1.1066e-07, -2.0743e-08, 4.3715e-08, 8.6423e-08,\n -2.1594e-08, 8.3696e-08],\n [ 6.3275e-09, -8.0410e-09, 3.3250e-08, -8.3960e-09, 2.6160e-08,\n -8.3978e-09, 2.4337e-08, -3.2190e-09, 2.6798e-08, -7.0740e-09,\n 1.8285e-08, 1.8647e-08, -8.0744e-09, 1.7112e-08, 1.5904e-08,\n -7.5571e-09, -5.9399e-09, 3.9664e-08, 2.0842e-08, 1.5646e-08,\n 4.1265e-08, 2.1409e-08, -8.3149e-09, 2.4398e-08, 2.5730e-08,\n 1.8322e-08, 1.2610e-08, -7.6108e-09, -7.8961e-09, -6.6546e-09,\n -4.6658e-09, -5.1071e-09, 1.4695e-08, -6.7879e-09, -8.1690e-09,\n 7.2689e-09, 2.1614e-08, 9.2911e-09, -7.7321e-09, -6.7591e-09,\n -8.2400e-09, 8.2482e-09, -6.1323e-09, 2.5810e-08, -7.3511e-09,\n -9.7846e-10, -8.6558e-09, -8.2074e-09, -5.0216e-09, -8.0950e-09,\n 2.1233e-08, 2.0907e-08, 4.3941e-09, -5.9795e-09, -7.2811e-09,\n -5.8338e-09, 2.0379e-08, -5.2753e-09, 2.3986e-08, 3.0936e-09,\n 3.6937e-09, 1.5828e-08, 5.3223e-10, 3.0342e-08, -8.3137e-09,\n 2.7125e-08, -8.2239e-09, 2.1661e-08, -1.9623e-09, -8.0188e-09,\n -7.8889e-09, -7.7035e-09, 2.4486e-08, -8.5627e-09, 2.0450e-08,\n 2.3785e-08, 8.6878e-09, 2.0308e-08, -3.3337e-09, 1.8732e-08,\n -8.1042e-09, 2.5252e-08, -3.2317e-09, -7.2381e-09, 2.7099e-08,\n 5.3209e-09, -4.3412e-09, 2.2867e-08, -7.4510e-09, 1.3233e-08,\n -6.5113e-09, 2.7521e-08, -7.9184e-09, 2.8751e-08, -7.9421e-10,\n -2.3365e-09, -7.9681e-09, -6.7826e-09, -6.5479e-09, 1.8834e-08,\n 1.2692e-08, 2.7540e-08, 2.5307e-08, 2.7617e-08, -7.6561e-09,\n -2.3215e-09, -8.2352e-09, -8.4304e-09, -4.2401e-09, -4.2590e-09,\n -7.4936e-09, 4.1215e-09, 2.3444e-08, -8.0588e-09, 2.2436e-09,\n 3.2067e-08, -1.3776e-09, -6.4496e-09, 2.1924e-08, -8.1219e-09,\n 1.7061e-08, -7.9415e-09, -6.1552e-09, 3.1383e-08, 2.6277e-08,\n -5.7277e-09, -6.5124e-09, -5.7350e-09, -2.9136e-09, 2.7856e-08,\n 1.0759e-08, 2.2222e-08, 9.8275e-09, -5.3745e-09, 2.9148e-08,\n 2.4036e-08, -1.0772e-09, 2.3185e-08, -6.8073e-09, -7.2510e-09,\n -6.8881e-09, -1.6767e-09, -1.1319e-09, 1.0111e-09, 2.3435e-08,\n -1.9475e-09, 2.5584e-08, -7.9664e-09, 7.3793e-09, -7.5772e-09,\n -8.0365e-09, 3.7874e-10, 1.2025e-09, 1.9742e-08, 1.3581e-08,\n -7.0198e-09, 1.8043e-08, 2.3181e-10, 1.5655e-08, -7.8571e-09,\n 1.2958e-08, -7.4816e-09, 2.3060e-08, -4.7368e-09, -8.0902e-09,\n -7.6168e-09, -7.9453e-09, -6.1972e-09, -7.8988e-09, 2.1480e-08,\n -5.9571e-09, -7.5944e-09, -7.5089e-09, -8.4387e-09, -7.7460e-09,\n -8.1312e-09, -6.9560e-09, 1.6206e-09, 2.6776e-08, 2.8679e-08,\n -8.2970e-09, -8.0106e-09, 2.9454e-08, 1.2849e-08, 3.2231e-09,\n -5.7038e-09, 2.9306e-08, -5.3487e-09, 1.1440e-08, 2.2919e-08,\n -5.4406e-09, 2.2177e-08],\n [-1.6596e-08, 1.8975e-08, -9.2873e-08, 1.6263e-08, -7.0194e-08,\n 1.7780e-08, -7.2984e-08, 5.1140e-09, -7.3887e-08, 1.3914e-08,\n -4.8106e-08, -5.5676e-08, 1.6474e-08, -4.8004e-08, -4.7941e-08,\n 1.8441e-08, 1.5074e-08, -1.0817e-07, -5.7653e-08, -3.8081e-08,\n -1.1076e-07, -5.9921e-08, 1.5749e-08, -6.6211e-08, -6.9250e-08,\n -5.1890e-08, -2.7665e-08, 1.5281e-08, 1.7448e-08, 1.4037e-08,\n 9.8427e-09, 1.1138e-08, -3.6406e-08, 1.7692e-08, 1.5565e-08,\n -1.7446e-08, -6.3117e-08, -2.5468e-08, 1.6868e-08, 1.5841e-08,\n 1.4474e-08, -2.0853e-08, 1.1938e-08, -6.9593e-08, 1.5580e-08,\n 6.3151e-09, 1.6727e-08, 1.6956e-08, 1.1640e-08, 1.6083e-08,\n -6.1154e-08, -6.0065e-08, -1.3480e-08, 1.3314e-08, 1.4924e-08,\n 1.5062e-08, -5.8352e-08, 1.2304e-08, -6.8575e-08, -1.1538e-08,\n -1.0971e-09, -3.8370e-08, -1.9852e-09, -7.6452e-08, 1.8514e-08,\n -8.3791e-08, 1.7046e-08, -5.9516e-08, 2.4071e-09, 1.8910e-08,\n 1.5542e-08, 1.6775e-08, -6.7912e-08, 1.5673e-08, -5.9176e-08,\n -7.1333e-08, -1.7415e-08, -6.0411e-08, 7.8998e-09, -5.7119e-08,\n 1.5917e-08, -6.6792e-08, 9.0214e-09, 1.8372e-08, -6.9668e-08,\n -1.6150e-08, 1.2169e-08, -6.5465e-08, 1.6773e-08, -3.0927e-08,\n 1.7160e-08, -6.8979e-08, 1.6681e-08, -7.7794e-08, 7.9322e-10,\n 8.9267e-09, 1.7211e-08, 1.7682e-08, 1.4737e-08, -5.8409e-08,\n -3.7786e-08, -8.3648e-08, -7.6077e-08, -7.2205e-08, 1.5889e-08,\n 4.7937e-09, 1.5545e-08, 1.5248e-08, 9.4599e-09, 1.2725e-08,\n 1.5607e-08, -1.1046e-08, -6.6152e-08, 1.9187e-08, -8.6956e-09,\n -9.1863e-08, 3.3578e-09, 1.3196e-08, -6.1717e-08, 1.5884e-08,\n -5.3091e-08, 1.4662e-08, 1.4004e-08, -9.8927e-08, -7.5960e-08,\n 1.1309e-08, 1.0923e-08, 1.6502e-08, 6.4082e-09, -7.8811e-08,\n -3.0476e-08, -6.2428e-08, -2.2270e-08, 1.0545e-08, -7.8890e-08,\n -7.3597e-08, 1.3926e-09, -6.9633e-08, 1.3815e-08, 1.8515e-08,\n 1.7656e-08, 3.0889e-09, 4.7121e-09, -2.9209e-09, -6.7665e-08,\n 2.1840e-09, -7.2507e-08, 1.7463e-08, -2.0870e-08, 1.5061e-08,\n 1.7806e-08, -1.7029e-09, -4.0332e-10, -5.3209e-08, -3.5750e-08,\n 1.4047e-08, -4.9734e-08, 4.7382e-09, -4.9795e-08, 1.5544e-08,\n -2.9589e-08, 1.3977e-08, -7.0263e-08, 1.2033e-08, 1.4790e-08,\n 1.6001e-08, 1.6534e-08, 1.6868e-08, 1.5379e-08, -6.6656e-08,\n 1.4941e-08, 1.6604e-08, 1.7814e-08, 1.6593e-08, 1.7940e-08,\n 1.4715e-08, 1.5489e-08, -1.7271e-09, -8.1909e-08, -7.8587e-08,\n 1.6936e-08, 1.6143e-08, -9.1238e-08, -3.6339e-08, -8.1361e-09,\n 1.0133e-08, -8.7258e-08, 1.1626e-08, -3.2654e-08, -6.0493e-08,\n 1.5365e-08, -6.9067e-08],\n [-1.4045e-08, 2.0534e-08, -6.5533e-08, 2.4659e-08, -5.2608e-08,\n 2.3647e-08, -4.3510e-08, 9.7829e-09, -5.4225e-08, 2.0922e-08,\n -3.9836e-08, -3.5060e-08, 2.2756e-08, -3.4550e-08, -2.8631e-08,\n 1.8864e-08, 1.3762e-08, -7.9163e-08, -4.1741e-08, -3.5419e-08,\n -8.5272e-08, -4.0543e-08, 2.4978e-08, -5.0527e-08, -5.2045e-08,\n -3.4885e-08, -3.1632e-08, 2.1992e-08, 2.1526e-08, 1.8798e-08,\n 1.2344e-08, 1.3582e-08, -3.4070e-08, 1.5366e-08, 2.4319e-08,\n -1.6862e-08, -3.9739e-08, -2.0353e-08, 2.0918e-08, 1.7364e-08,\n 2.5580e-08, -1.8790e-08, 1.7667e-08, -5.2642e-08, 2.0464e-08,\n -1.1877e-09, 2.5770e-08, 2.3341e-08, 1.2294e-08, 2.3291e-08,\n -4.0277e-08, -4.0124e-08, -7.9833e-09, 1.6605e-08, 2.0333e-08,\n 1.3582e-08, -3.7471e-08, 1.3506e-08, -4.4970e-08, -4.2281e-09,\n -1.5312e-08, -3.6955e-08, -7.6911e-10, -6.7196e-08, 2.2398e-08,\n -4.7589e-08, 2.3450e-08, -4.3498e-08, 6.6414e-09, 2.0519e-08,\n 2.3044e-08, 2.0848e-08, -4.8374e-08, 2.6281e-08, -3.8572e-08,\n -4.3346e-08, -2.3776e-08, -3.6686e-08, 7.4018e-09, -3.3394e-08,\n 2.3489e-08, -5.2343e-08, 7.0427e-09, 1.7096e-08, -5.8089e-08,\n -9.4854e-09, 9.2519e-09, -4.3294e-08, 1.9761e-08, -3.2113e-08,\n 1.5378e-08, -6.0095e-08, 2.1813e-08, -5.8337e-08, 2.7161e-09,\n 3.0523e-09, 2.1807e-08, 1.5843e-08, 1.7788e-08, -3.2224e-08,\n -2.3103e-08, -4.6896e-08, -4.3747e-08, -5.8264e-08, 2.1920e-08,\n 6.4414e-09, 2.4725e-08, 2.5877e-08, 1.0866e-08, 8.5710e-09,\n 2.1200e-08, -9.5390e-09, -4.4374e-08, 2.0550e-08, -1.7782e-09,\n -5.9554e-08, 2.2727e-09, 1.8085e-08, -4.2943e-08, 2.3955e-08,\n -2.9482e-08, 2.4052e-08, 1.5519e-08, -5.1397e-08, -4.9353e-08,\n 1.6634e-08, 2.0208e-08, 1.2238e-08, 7.7010e-09, -5.3828e-08,\n -2.1188e-08, -4.3755e-08, -2.3748e-08, 1.5467e-08, -5.8257e-08,\n -4.1228e-08, 3.1164e-09, -4.0906e-08, 1.9560e-08, 1.7374e-08,\n 1.6416e-08, 4.3173e-09, 1.6113e-09, -2.3812e-09, -4.4496e-08,\n 7.2031e-09, -4.9216e-08, 2.1754e-08, -1.4378e-08, 2.1726e-08,\n 2.1869e-08, -8.0272e-10, -5.3762e-09, -4.0640e-08, -2.9747e-08,\n 1.9993e-08, -3.6186e-08, -5.6442e-09, -2.5051e-08, 2.2907e-08,\n -3.1520e-08, 2.2072e-08, -3.9379e-08, 1.0997e-08, 2.4709e-08,\n 2.1520e-08, 2.2685e-08, 1.3662e-08, 2.3311e-08, -3.6734e-08,\n 1.4646e-08, 2.0594e-08, 1.8881e-08, 2.4614e-08, 2.0390e-08,\n 2.4505e-08, 1.8509e-08, -5.3805e-09, -4.6481e-08, -5.7035e-08,\n 2.3709e-08, 2.3037e-08, -4.9351e-08, -2.4393e-08, -8.4864e-09,\n 1.7373e-08, -5.3182e-08, 1.4530e-08, -2.2666e-08, -4.9193e-08,\n 1.1733e-08, -3.7148e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6022e-12, 4.0605e-12, 1.0033e-12, 4.8390e-12, 2.9099e-12, 5.2461e-12,\n 1.0918e-11, 7.9587e-13, 1.1448e-11, 3.5577e-12, 7.6519e-13, 4.3483e-12,\n 2.1004e-12, 1.7180e-12, 8.3312e-13, 1.3258e-12, 7.3415e-13, 1.7974e-12,\n 5.6084e-12, 9.9699e-13, 3.8546e-13, 7.3713e-12, 5.1351e-12, 9.2551e-12,\n 9.1061e-12, 3.2357e-12, 3.7415e-13, 2.8086e-12, 2.7590e-12, 1.5445e-12,\n 2.1503e-13, 7.1992e-13, 1.1873e-12, 1.6151e-12, 3.6450e-12, 2.6904e-13,\n 1.3346e-12, 3.3119e-12, 3.1414e-12, 1.6041e-12, 4.7766e-12, 4.4162e-13,\n 4.3239e-13, 8.4234e-12, 4.0945e-12, 6.5316e-13, 5.4688e-12, 3.9872e-12,\n 8.2810e-13, 3.7306e-12, 4.0755e-12, 2.6331e-12, 7.5179e-13, 3.0146e-12,\n 8.6246e-13, 7.6632e-13, 2.7036e-12, 5.5165e-13, 2.5648e-12, 1.3120e-13,\n 1.9955e-12, 2.0820e-12, 6.3812e-13, 1.5083e-11, 4.8369e-12, 3.0532e-12,\n 4.7911e-12, 1.8524e-12, 6.2210e-13, 3.1519e-12, 3.3668e-12, 3.3893e-12,\n 9.4774e-12, 5.4065e-12, 7.6861e-13, 1.2748e-12, 1.2591e-12, 2.4589e-12,\n 2.1597e-13, 3.8395e-12, 4.2534e-12, 1.4865e-11, 6.3565e-13, 2.5677e-12,\n 1.9218e-12, 4.9575e-13, 2.5896e-13, 4.8415e-12, 2.6418e-12, 4.5053e-13,\n 7.3618e-13, 1.5156e-11, 4.0418e-12, 1.5753e-11, 3.0899e-13, 8.6113e-13,\n 1.5417e-12, 2.2713e-12, 2.5702e-12, 4.2059e-12, 6.2080e-13, 1.3500e-11,\n 5.8669e-12, 1.5679e-11, 4.4483e-12, 1.2933e-12, 4.9263e-12, 4.3816e-12,\n 8.0672e-13, 4.3980e-13, 2.5045e-12, 2.3329e-12, 8.8284e-13, 4.8969e-12,\n 2.3362e-13, 4.0490e-12, 1.4795e-12, 8.0121e-13, 7.5492e-12, 3.6697e-12,\n 6.6955e-13, 2.5045e-12, 1.2193e-12, 2.1161e-12, 1.2552e-11, 1.3839e-12,\n 1.6391e-12, 9.0572e-13, 6.0020e-13, 1.1014e-11, 2.3393e-13, 6.3762e-12,\n 3.0400e-12, 4.4502e-13, 1.1390e-11, 1.1103e-11, 3.3883e-13, 3.5021e-12,\n 7.8503e-13, 1.4260e-12, 2.0716e-12, 7.9177e-13, 8.3087e-13, 1.1931e-12,\n 7.6539e-12, 2.6472e-13, 8.5821e-13, 4.0354e-12, 8.5839e-13, 1.4583e-12,\n 5.1918e-12, 1.2946e-12, 6.8526e-13, 8.2868e-13, 1.7646e-13, 1.3411e-12,\n 3.4769e-13, 1.3953e-12, 2.2112e-12, 3.2126e-12, 4.3226e-13, 1.7597e-12,\n 9.7095e-12, 1.1055e-12, 5.4933e-12, 4.6177e-12, 3.7254e-12, 1.1346e-12,\n 1.0162e-12, 5.2413e-12, 1.4530e-12, 3.0599e-12, 3.5021e-12, 2.8935e-12,\n 2.7833e-12, 3.4385e-12, 3.5045e-12, 6.1767e-13, 1.4518e-11, 7.3068e-12,\n 4.4518e-12, 1.4571e-12, 1.5508e-11, 7.9361e-13, 2.5927e-13, 2.3630e-12,\n 3.2935e-12, 2.3059e-12, 2.4646e-12, 8.8984e-12, 1.2516e-12, 3.7974e-12],\n [1.7293e-13, 7.1388e-13, 2.2198e-13, 8.0790e-13, 5.8737e-13, 8.9685e-13,\n 2.0624e-12, 6.8004e-14, 2.0989e-12, 5.1468e-13, 1.1658e-13, 6.7937e-13,\n 3.9680e-13, 2.1305e-13, 1.3400e-13, 2.6520e-13, 9.8463e-14, 3.3857e-13,\n 1.0048e-12, 1.3714e-13, 8.8813e-14, 1.3695e-12, 8.3357e-13, 1.4935e-12,\n 1.7498e-12, 5.9665e-13, 4.9599e-14, 4.4441e-13, 4.7872e-13, 2.1146e-13,\n 2.9422e-14, 7.5360e-14, 1.3527e-13, 2.8567e-13, 6.5214e-13, 3.0133e-14,\n 2.5592e-13, 4.3210e-13, 5.3372e-13, 2.4038e-13, 7.6249e-13, 4.0532e-14,\n 7.4383e-14, 1.5748e-12, 6.2908e-13, 5.5759e-14, 9.1651e-13, 6.6416e-13,\n 8.7463e-14, 6.5720e-13, 6.9868e-13, 3.6496e-13, 6.7464e-14, 4.1431e-13,\n 1.5443e-13, 1.1523e-13, 5.1387e-13, 7.1125e-14, 5.0666e-13, 7.4581e-15,\n 1.8013e-13, 3.1818e-13, 6.0533e-14, 2.7004e-12, 8.4526e-13, 4.6319e-13,\n 7.5485e-13, 3.8558e-13, 5.5426e-14, 5.5663e-13, 6.0309e-13, 6.0107e-13,\n 1.7896e-12, 9.1629e-13, 1.2093e-13, 2.5239e-13, 1.3257e-13, 4.3584e-13,\n 2.4282e-14, 6.1835e-13, 7.4146e-13, 2.7342e-12, 5.5503e-14, 4.5567e-13,\n 3.9482e-13, 4.8131e-14, 3.0013e-14, 9.0595e-13, 4.4575e-13, 8.5510e-14,\n 1.3366e-13, 2.8029e-12, 6.9904e-13, 2.8543e-12, 2.7156e-14, 9.8393e-14,\n 2.9875e-13, 4.0077e-13, 3.5926e-13, 7.8241e-13, 6.6006e-14, 2.5204e-12,\n 1.1009e-12, 2.8536e-12, 7.1020e-13, 1.3728e-13, 8.0834e-13, 6.6490e-13,\n 7.9779e-14, 5.1850e-14, 4.6171e-13, 2.6071e-13, 1.3437e-13, 8.3772e-13,\n 2.4822e-14, 8.1405e-13, 1.2455e-13, 1.1133e-13, 1.4387e-12, 6.4520e-13,\n 1.1707e-13, 4.6488e-13, 1.8309e-13, 4.3615e-13, 2.2454e-12, 1.9503e-13,\n 2.4442e-13, 1.4869e-13, 5.4964e-14, 2.0256e-12, 2.8244e-14, 1.1396e-12,\n 3.5741e-13, 4.5198e-14, 2.1542e-12, 2.0659e-12, 2.9901e-14, 6.8137e-13,\n 1.4486e-13, 2.7186e-13, 3.3513e-13, 8.4689e-14, 7.2881e-14, 1.2089e-13,\n 1.4770e-12, 2.3517e-14, 1.5317e-13, 6.6327e-13, 1.1245e-13, 2.0521e-13,\n 8.8367e-13, 1.2571e-13, 6.4709e-14, 1.5923e-13, 2.7245e-14, 1.9916e-13,\n 6.3078e-14, 1.2718e-13, 4.0122e-13, 5.7878e-13, 4.9428e-14, 3.1613e-13,\n 1.7665e-12, 1.1349e-13, 8.3868e-13, 7.0039e-13, 6.5436e-13, 1.6894e-13,\n 1.9757e-13, 9.3674e-13, 2.0211e-13, 5.4914e-13, 6.0625e-13, 5.0908e-13,\n 5.1788e-13, 6.2059e-13, 5.4550e-13, 5.6111e-14, 2.6780e-12, 1.3351e-12,\n 7.8131e-13, 2.8370e-13, 2.8773e-12, 9.2512e-14, 2.4374e-14, 3.1841e-13,\n 6.5727e-13, 3.1466e-13, 3.4432e-13, 1.5376e-12, 2.0350e-13, 7.2565e-13],\n [6.4618e-13, 1.6792e-13, 1.1743e-12, 3.5623e-13, 9.1974e-13, 2.1698e-13,\n 8.0832e-13, 5.2697e-13, 1.0629e-12, 7.1560e-13, 5.2853e-13, 1.0279e-12,\n 2.4020e-13, 8.6553e-13, 5.9856e-13, 1.0425e-13, 3.8054e-13, 1.7702e-12,\n 7.4296e-13, 4.8333e-13, 1.7170e-12, 6.4818e-13, 3.9694e-13, 1.7003e-12,\n 8.4147e-13, 4.5762e-13, 3.4252e-13, 3.9916e-13, 2.5674e-13, 4.1446e-13,\n 8.2126e-14, 4.9326e-13, 5.8017e-13, 4.1878e-13, 2.0897e-13, 1.7305e-13,\n 7.3159e-13, 8.7606e-13, 2.5623e-13, 3.4650e-13, 4.4874e-13, 3.5852e-13,\n 1.5964e-13, 8.4601e-13, 5.7242e-13, 3.3291e-13, 2.9348e-13, 4.5314e-13,\n 5.6206e-13, 1.4593e-13, 8.2260e-13, 1.1262e-12, 4.2330e-13, 6.7381e-13,\n 2.8488e-13, 3.9483e-13, 8.8729e-13, 3.4162e-13, 8.9433e-13, 5.5475e-14,\n 9.2778e-13, 5.4621e-13, 2.8491e-13, 1.6428e-12, 1.6367e-13, 1.5885e-12,\n 6.2437e-13, 5.5618e-13, 3.3490e-13, 2.1432e-13, 1.3733e-13, 2.2325e-13,\n 7.8655e-13, 2.0070e-13, 6.1102e-13, 8.0639e-13, 5.9876e-13, 6.0520e-13,\n 6.1146e-14, 8.5303e-13, 1.5101e-13, 8.8139e-13, 4.6861e-13, 1.4845e-13,\n 7.6216e-13, 3.1553e-13, 1.3635e-13, 7.1373e-13, 2.6190e-13, 2.3469e-13,\n 1.0351e-13, 9.9021e-13, 1.6687e-13, 1.3683e-12, 1.3576e-13, 2.7879e-13,\n 2.1577e-13, 1.2123e-13, 6.2059e-13, 4.9557e-13, 5.5795e-13, 1.0817e-12,\n 8.7861e-13, 1.1748e-12, 4.6633e-13, 4.9218e-13, 3.6036e-13, 7.4642e-13,\n 5.4388e-13, 1.6063e-13, 2.4847e-13, 7.7479e-13, 9.0781e-13, 2.0615e-13,\n 8.5274e-14, 1.1733e-12, 8.0722e-13, 2.6357e-13, 6.2828e-13, 1.3922e-13,\n 5.2844e-13, 1.3311e-13, 6.1249e-13, 1.1272e-12, 1.3643e-12, 2.9514e-13,\n 3.1044e-13, 1.2019e-13, 3.3785e-13, 1.2519e-12, 1.8119e-13, 8.5294e-13,\n 1.0721e-12, 3.3946e-13, 1.1267e-12, 8.3143e-13, 1.9822e-13, 9.7151e-13,\n 2.1146e-13, 1.2121e-13, 3.0111e-13, 2.9312e-13, 4.2334e-13, 4.6238e-13,\n 7.1139e-13, 1.3134e-13, 9.0706e-13, 4.0536e-13, 2.6485e-13, 4.6437e-13,\n 1.7459e-13, 5.3784e-13, 3.2122e-13, 5.5806e-13, 2.2287e-13, 3.3413e-13,\n 4.0549e-13, 6.7926e-13, 3.6358e-13, 1.3045e-13, 3.5079e-13, 4.2935e-13,\n 9.1719e-13, 7.7078e-13, 7.1053e-13, 6.3027e-13, 1.5023e-13, 5.9638e-13,\n 1.8972e-13, 7.9523e-13, 3.8756e-13, 2.1122e-13, 1.7561e-13, 3.3668e-13,\n 1.9293e-13, 1.8222e-13, 4.5371e-13, 3.1673e-13, 1.1013e-12, 1.2307e-12,\n 1.6073e-13, 1.2329e-13, 1.2835e-12, 5.8260e-13, 1.3473e-13, 5.5903e-13,\n 9.5621e-13, 5.2832e-13, 6.3162e-13, 1.2441e-12, 1.4188e-13, 6.4617e-13],\n [2.4887e-12, 9.4514e-12, 3.8715e-12, 1.0886e-11, 7.8299e-12, 1.2092e-11,\n 2.7069e-11, 8.7841e-13, 2.8078e-11, 6.9443e-12, 1.9348e-12, 9.4437e-12,\n 5.0095e-12, 3.2424e-12, 1.8023e-12, 3.3629e-12, 1.2604e-12, 5.9882e-12,\n 1.3640e-11, 2.1653e-12, 2.7796e-12, 1.8202e-11, 1.1371e-11, 2.0368e-11,\n 2.2889e-11, 8.1195e-12, 7.2964e-13, 6.0425e-12, 6.3835e-12, 2.9556e-12,\n 4.4699e-13, 9.5675e-13, 2.2551e-12, 3.4054e-12, 8.4731e-12, 4.8788e-13,\n 3.5679e-12, 6.1260e-12, 7.1776e-12, 3.2992e-12, 1.0397e-11, 5.5702e-13,\n 9.7609e-13, 2.1024e-11, 8.5302e-12, 8.6576e-13, 1.2418e-11, 8.7665e-12,\n 1.0597e-12, 8.7606e-12, 9.7303e-12, 5.4146e-12, 9.2885e-13, 5.6499e-12,\n 1.9211e-12, 1.3283e-12, 6.5476e-12, 8.8126e-13, 6.6512e-12, 1.4937e-13,\n 2.5645e-12, 4.6332e-12, 8.7394e-13, 3.6114e-11, 1.1274e-11, 6.7360e-12,\n 1.0144e-11, 5.2409e-12, 8.1377e-13, 7.3873e-12, 7.9141e-12, 7.7907e-12,\n 2.3566e-11, 1.2393e-11, 2.0342e-12, 3.6444e-12, 1.9588e-12, 6.2131e-12,\n 3.8892e-13, 8.6536e-12, 9.8851e-12, 3.6151e-11, 6.9108e-13, 6.0352e-12,\n 5.6490e-12, 6.6806e-13, 4.3873e-13, 1.2286e-11, 5.9994e-12, 1.2597e-12,\n 1.7747e-12, 3.7091e-11, 9.3700e-12, 3.8099e-11, 4.8026e-13, 1.4311e-12,\n 3.7709e-12, 5.3307e-12, 4.8440e-12, 1.0539e-11, 9.1628e-13, 3.3340e-11,\n 1.4876e-11, 3.7978e-11, 9.6422e-12, 1.9099e-12, 1.0979e-11, 8.9328e-12,\n 9.6593e-13, 8.1133e-13, 5.8237e-12, 3.7368e-12, 2.1438e-12, 1.1297e-11,\n 3.9784e-13, 1.1390e-11, 1.6919e-12, 1.5703e-12, 1.8850e-11, 8.5788e-12,\n 1.6806e-12, 6.0015e-12, 2.0183e-12, 6.5621e-12, 2.9975e-11, 2.7544e-12,\n 3.3836e-12, 2.0619e-12, 8.2008e-13, 2.7066e-11, 5.4764e-13, 1.5439e-11,\n 5.0071e-12, 5.5552e-13, 2.8546e-11, 2.7364e-11, 4.2739e-13, 8.8077e-12,\n 1.8049e-12, 3.5175e-12, 4.5301e-12, 1.2222e-12, 1.0930e-12, 1.7784e-12,\n 1.9327e-11, 4.0985e-13, 2.4889e-12, 8.9187e-12, 1.6791e-12, 2.7813e-12,\n 1.1926e-11, 1.8640e-12, 9.4852e-13, 2.3024e-12, 5.5955e-13, 2.7231e-12,\n 1.1273e-12, 1.8070e-12, 5.5669e-12, 7.5745e-12, 8.8203e-13, 3.8072e-12,\n 2.3562e-11, 1.2542e-12, 1.1401e-11, 9.5538e-12, 8.7378e-12, 1.9194e-12,\n 2.5023e-12, 1.2784e-11, 2.7788e-12, 7.0700e-12, 8.1159e-12, 6.6632e-12,\n 6.6067e-12, 8.0511e-12, 7.4239e-12, 8.1564e-13, 3.5513e-11, 1.8156e-11,\n 1.0366e-11, 3.6152e-12, 3.8184e-11, 1.3122e-12, 3.8093e-13, 4.3520e-12,\n 9.2970e-12, 4.3045e-12, 4.9198e-12, 2.0736e-11, 2.8105e-12, 9.8627e-12]],\n device='cuda:0')" + }, + "64": { + "step": "tensor(11268.)", + "exp_avg": "tensor([ 2.1732e-07, -1.0985e-08, -8.6903e-07, 6.6217e-07], device='cuda:0')", + "exp_avg_sq": "tensor([1.1918e-09, 1.2852e-10, 4.1884e-10, 1.6155e-09], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 2.5447270110570814e-05, + "name": "shared", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 0, + 1 + ] + }, + { + "lr": 2.5447270110570814e-05, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 2, + 3, + 4 + ] + }, + { + "lr": 2.5447270110570814e-05, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 5, + 6, + 7 + ] + }, + { + "lr": 2.5447270110570814e-05, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 8, + 9, + 10 + ] + }, + { + "lr": 2.5447270110570814e-05, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.001, + "params": [ + 11, + 12, + 13 + ] + }, + { + "lr": 1.3211399184359193e-05, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.0005, + "params": [ + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 9, + "base_lrs": [ + 0.001, + 0.001, + 0.001, + 0.001, + 0.001, + 0.0005 + ], + "last_epoch": 9, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 2.5447270110570814e-05, + 2.5447270110570814e-05, + 2.5447270110570814e-05, + 2.5447270110570814e-05, + 2.5447270110570814e-05, + 1.3211399184359193e-05 + ] + }, + "metrics": { + "best_val_acc": 75.532, + "best_epoch": 8, + "scale_accuracies": { + "256": 74.356, + "512": 75.256, + "768": 74.766, + "1024": 74.7 + }, + "training_history": { + "epochs": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9 + ], + "train_loss": [ + 3.4434366774635192, + 2.323611595760138, + 2.0886680400028776, + 1.9145580669180653, + 1.7639687670686375, + 1.6240364456900394, + 1.4980749088925676, + 1.389185500506776, + 1.318137846339625 + ], + "train_acc": [ + 70.1198204449537, + 77.61954530517879, + 79.95913101102354, + 81.81056802118694, + 83.51737127166092, + 85.2142616848545, + 86.77260653763327, + 88.08320851223924, + 89.10883592849332 + ], + "val_acc": [ + 73.288, + 74.412, + 74.762, + 75.256, + 75.22, + 75.508, + 75.524, + 75.5, + 75.532 + ], + "scale_accs": { + "256": [ + 71.326, + 72.25, + 73.056, + 73.482, + 73.684, + 73.908, + 74.202, + 74.376, + 74.356 + ], + "512": [ + 72.45, + 73.416, + 74.002, + 74.604, + 74.692, + 74.846, + 75.12, + 75.296, + 75.256 + ], + "768": [ + 72.264, + 73.582, + 74.158, + 74.68, + 74.654, + 74.718, + 74.98, + 74.83, + 74.766 + ], + "1024": [ + 73.004, + 74.144, + 74.334, + 74.86, + 74.692, + 74.954, + 74.8, + 74.71, + 74.7 + ] + }, + "lr": [ + 0.0009755527298894294, + 0.0009046039886902864, + 0.0007940987335200904, + 0.0006548539886902864, + 0.0005005000000000001, + 0.0003461460113097139, + 0.00020690126647990973, + 9.639601130971382e-05, + 2.5447270110570814e-05 + ] + } + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_152245", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": "clip_vit_laion_b32", + "num_classes": 1000, + "preset": "balanced", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": false, + "scale_warmup_epochs_override": null, + "num_epochs": 10, + "batch_size": 1024, + "learning_rate": 0.001, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.1, + "rose_max_weight": 0.5, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.001, + "scale_loss_balance": null, + "use_mixed_precision": true, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "performance", + "freeze_threshold": 75.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/gated-david", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file