diff --git "a/weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc72.76_metadata.json" "b/weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc72.76_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-partial_shared-hierarchical_tree/20251012_191456/best_model_acc72.76_metadata.json" @@ -0,0 +1,633 @@ +{ + "epoch": 4, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(12515.)", + "exp_avg": "tensor([[ 1.4696e-05, -2.7828e-05, -3.5423e-06, ..., -7.6348e-06,\n -6.0387e-06, 5.2523e-06],\n [-1.1754e-05, -1.0879e-05, 1.5443e-05, ..., -3.6193e-05,\n 3.0602e-06, -7.5859e-07],\n [-3.8365e-05, -4.3743e-05, -4.3301e-05, ..., 6.0891e-06,\n -8.6060e-06, -2.2884e-05],\n ...,\n [-2.5770e-05, -8.7027e-05, -4.9094e-05, ..., -1.0083e-05,\n -1.8324e-05, -9.8804e-07],\n [-1.3846e-05, -5.4129e-05, 2.7136e-05, ..., 4.3186e-06,\n -2.9494e-06, -1.3511e-05],\n [-1.6207e-05, 2.0036e-05, 1.2401e-05, ..., -4.6466e-05,\n -1.0978e-05, -4.7357e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.6474e-09, 4.7828e-08, 1.2330e-08, ..., 3.9137e-08, 5.2438e-09,\n 6.6964e-09],\n [3.0686e-09, 2.9346e-09, 3.2723e-09, ..., 1.6143e-08, 2.0444e-09,\n 3.0094e-09],\n [1.2861e-08, 7.1517e-08, 2.4742e-08, ..., 7.8649e-09, 7.0069e-09,\n 8.5374e-09],\n ...,\n [1.3579e-08, 5.7949e-08, 1.3546e-08, ..., 4.1593e-08, 7.5677e-09,\n 9.6574e-09],\n [1.4302e-08, 5.6941e-08, 2.9462e-08, ..., 2.2485e-08, 6.3547e-09,\n 1.3739e-08],\n [1.4872e-08, 1.3853e-08, 8.4946e-09, ..., 4.3783e-08, 7.6830e-09,\n 1.0462e-08]], device='cuda:0')" + }, + "1": { + "step": "tensor(12515.)", + "exp_avg": "tensor([ 2.3481e-04, -7.6586e-04, 2.0336e-03, -9.4608e-04, -2.4625e-04,\n -4.6248e-04, -7.5357e-04, 1.1750e-03, -2.5347e-04, 6.1510e-04,\n 4.5143e-04, 1.0708e-04, -1.8643e-03, -1.5304e-03, -7.9622e-04,\n -1.5288e-04, 5.7960e-04, 1.1429e-03, 7.5297e-04, 5.9599e-05,\n -3.3728e-04, 1.8787e-04, -1.2878e-03, -1.6985e-03, -7.6159e-04,\n -1.3415e-03, 1.6513e-03, -5.6740e-04, -5.6256e-04, -4.7040e-04,\n -1.0648e-03, -6.6215e-04, 1.0058e-03, 5.2296e-04, -1.3087e-03,\n -1.0915e-03, 1.2563e-03, -6.9072e-04, -6.3987e-04, 2.2523e-05,\n -1.1272e-03, -2.8269e-04, -1.1333e-03, 2.2858e-04, -8.0993e-05,\n -5.3912e-05, -3.0503e-04, -3.2844e-04, 1.8806e-04, 1.4214e-03,\n -2.1112e-03, -7.1953e-04, -1.0749e-03, -4.8762e-04, -3.7618e-04,\n -2.5314e-04, -3.5410e-04, -1.1221e-03, 6.0304e-05, -1.2431e-03,\n -9.5404e-04, 1.3670e-04, 1.4845e-03, 6.9498e-04, 7.6231e-04,\n 1.4383e-04, 1.3105e-03, -1.1082e-03, -6.3083e-04, -7.6704e-04,\n 2.8041e-04, 3.3429e-04, 4.7105e-04, -8.0014e-04, -2.7195e-04,\n -7.0406e-05, 3.4946e-05, 8.9719e-04, -1.0822e-03, 3.2482e-04,\n -1.5364e-03, -2.0874e-04, -7.2046e-04, 1.6123e-03, 2.6929e-03,\n -4.6629e-04, 1.5181e-04, 6.7821e-05, -6.6474e-04, -1.0394e-03,\n -1.4486e-05, 1.4378e-04, -1.3534e-04, -9.3992e-04, -5.2241e-04,\n -2.3082e-04, -8.5004e-04, 8.2886e-04, -9.4638e-04, 1.5386e-03,\n 8.2563e-04, -1.3672e-09, 9.5639e-04, -2.3854e-04, -8.3904e-04,\n -1.6462e-03, -6.7491e-06, -1.2086e-03, 9.7223e-05, -1.9843e-04,\n -1.0966e-03, 1.5277e-03, 2.2883e-03, -2.2430e-03, -4.5865e-04,\n 7.5136e-04, 3.4722e-04, -1.1432e-03, -1.7318e-04, -2.8695e-04,\n -8.6066e-04, 1.1230e-03, -1.2985e-03, -1.4514e-03, 1.9565e-03,\n -6.2931e-04, -2.8575e-05, 5.0019e-04, 5.3681e-05, 5.5097e-04,\n 2.5648e-04, -8.0116e-04, 6.5037e-04, -1.6561e-04, -4.5886e-05,\n 6.8655e-04, -1.8302e-03, -1.4125e-04, 9.9134e-04, -3.5626e-04,\n -1.0884e-03, -3.8030e-04, 9.0171e-04, -8.3419e-04, -1.5482e-04,\n 1.0220e-03, -5.8681e-05, -9.2155e-04, -1.3991e-03, 1.7073e-03,\n -2.7325e-03, 2.5789e-04, -6.8452e-04, 2.0148e-03, 5.7246e-04,\n -1.6182e-03, -2.9461e-04, -9.6503e-05, 4.5573e-04, -5.8148e-04,\n -8.6129e-04, -7.4337e-04, 3.7361e-04, -1.3681e-03, -4.2916e-04,\n 3.9926e-04, -1.0277e-03, 5.3387e-04, 5.6934e-04, -9.9050e-04,\n 6.9474e-04, -8.8857e-05, 1.3701e-04, -4.2331e-04, 2.3449e-05,\n -6.2591e-04, 6.0532e-04, 6.2972e-04, 4.0721e-04, 1.0424e-05,\n 1.6680e-03, -6.5670e-04, 7.2181e-04, -4.0792e-04, -2.1574e-04,\n 1.1275e-03, -1.3827e-04, -6.0167e-04, -8.9448e-04, 9.2992e-04,\n 1.7064e-04, -3.5696e-05, 1.0827e-03, -6.9586e-04, 7.2498e-04,\n -3.2435e-04, -6.3674e-04, -2.0529e-04, -8.9314e-04, 2.1192e-04,\n 5.4231e-04, -5.9063e-04, -7.3119e-04, 3.2401e-04, 1.1332e-05,\n 5.1928e-04, 2.2867e-04, 8.8789e-04, -5.5003e-04, -1.3567e-03,\n 4.9610e-04, 1.1571e-03, -1.6378e-03, 7.0790e-04, 1.7927e-04,\n -5.6164e-04, -2.1927e-04, -3.4716e-04, -5.0160e-04, -8.5459e-04,\n 4.4548e-04, -5.3264e-04, -4.4062e-04, 1.4133e-04, -1.5006e-04,\n -1.9780e-03, -1.0126e-03, -6.4218e-04, 8.7164e-05, -6.1931e-04,\n 2.8493e-03, -8.6890e-04, -2.1884e-03, -2.9283e-04, 1.0889e-03,\n 1.0507e-03, -1.6696e-04, 8.4750e-04, -2.0510e-03, -2.7876e-05,\n -1.4364e-03, -1.4585e-04, 9.0192e-04, 1.2251e-04, 6.1047e-04,\n -1.7156e-03, 6.7935e-04, 9.5787e-04, 5.1881e-04, 1.9886e-04,\n 4.7870e-04, 7.5305e-04, 6.4234e-04, 6.2536e-04, -7.2018e-05,\n -1.3964e-04, -3.8003e-03, -1.7555e-03, 2.5479e-04, -1.8361e-03,\n 1.4890e-04, -1.0937e-03, -4.9289e-04, 2.6125e-03, -9.7701e-05,\n 1.0589e-03, -1.6091e-03, -9.6791e-04, 2.8312e-04, 7.5516e-06,\n -2.2443e-04, -9.8288e-04, 1.3492e-03, -1.3728e-03, 2.2028e-04,\n 6.6147e-04, 2.2635e-03, -7.8175e-04, -6.7754e-04, -5.9067e-04,\n -1.4306e-03, 1.5786e-03, -1.6921e-04, -1.2241e-03, -8.9049e-04,\n 3.0862e-04, 1.0128e-03, -4.4250e-04, -1.3429e-03, 2.2693e-04,\n 1.4152e-03, 4.8560e-05, 3.1506e-04, 2.6032e-03, 1.9322e-03,\n -5.0557e-04, 4.1005e-04, 9.9730e-05, 1.5374e-04, -1.8830e-03,\n -3.2544e-04, 9.7247e-05, 1.2581e-03, 1.2436e-04, 2.5119e-03,\n 2.9002e-04, 5.9423e-04, 1.2309e-03, -3.5147e-04, 7.6547e-04,\n -2.0180e-03, 2.0754e-03, 4.0570e-04, -1.2784e-03, -1.3344e-03,\n 4.9119e-04, 4.3040e-04, 1.1188e-03, -1.1046e-04, -6.7138e-05,\n -8.1479e-05, -1.9162e-03, -1.2419e-04, -5.3189e-05, 4.3783e-04,\n 4.2156e-04, -6.7935e-04, -4.6417e-04, -2.2206e-04, -6.4575e-04,\n -7.6940e-04, -5.9956e-04, 2.3352e-04, 8.3622e-04, -6.5510e-07,\n -2.0618e-03, -2.0382e-03, -9.0212e-04, 8.7777e-04, 5.4249e-04,\n -7.5880e-05, 2.9409e-04, 1.6194e-03, 7.1691e-04, -1.3470e-04,\n -6.0308e-04, -1.3457e-03, 1.0638e-03, 1.8126e-04, 7.9574e-04,\n -4.7339e-04, -1.5975e-03, 7.4672e-04, 7.6741e-10, 1.5002e-03,\n -2.3911e-04, -1.8883e-04, 2.3133e-04, 1.2743e-04, 3.4978e-04,\n 4.9955e-04, 1.8125e-04, -1.2746e-03, -1.2978e-04, -6.2607e-04,\n 5.1744e-04, -5.1276e-04, -6.9850e-05, -1.1764e-04, -7.6290e-04,\n 1.3089e-03, 2.1394e-04, 5.8675e-04, -8.2499e-04, 3.1925e-04,\n -1.1273e-03, 1.4131e-03, 5.6052e-45, 3.9360e-04, -2.0092e-04,\n -2.8569e-04, -2.6892e-03, 1.2857e-03, 2.0584e-03, 5.6052e-45,\n 4.0326e-04, -2.0484e-04, -1.0928e-03, 1.9357e-04, 1.0161e-03,\n -1.2654e-03, 6.6752e-04, 1.2053e-03, -9.0019e-04, -5.3128e-04,\n 1.6603e-04, -4.7175e-04, 2.0648e-03, 7.0463e-04, -1.2536e-03,\n -1.0121e-03, 7.8428e-04, -5.6069e-04, 3.1916e-04, -6.7532e-04,\n 5.3005e-04, 5.6052e-45, 1.3700e-03, 1.0765e-04, 8.0748e-05,\n 1.8556e-03, 4.6905e-04, 6.6940e-04, -1.9864e-04, -1.5628e-03,\n 6.6254e-04, 5.6609e-05, -5.5410e-05, 1.5552e-03, 6.1947e-04,\n -6.8421e-04, 5.1638e-04, -5.0162e-04, 2.2049e-03, -9.9567e-04,\n 7.3925e-05, -1.4594e-05, 1.3547e-03, 3.4150e-04, 2.7723e-04,\n -1.3353e-03, 5.7881e-04, -9.4857e-04, 1.0838e-04, -6.9116e-04,\n -1.9356e-05, 3.7112e-04, -3.1010e-04, 1.5042e-03, 2.2478e-03,\n 2.9808e-04, 3.6489e-04, 1.4150e-03, 8.3188e-04, 1.1503e-03,\n -2.1325e-04, -1.3920e-04, 1.7435e-03, -1.6727e-03, -2.1570e-03,\n 2.4031e-03, 8.8449e-04, 7.7435e-04, -1.7979e-04, 9.4207e-04,\n -1.5974e-03, 8.6526e-05, 3.6738e-04, -9.4464e-04, -1.4042e-04,\n 8.5778e-04, 1.2959e-03, 1.0471e-03, -9.8008e-04, 1.0073e-03,\n 2.6898e-03, 8.1609e-04, 8.4079e-04, -5.6198e-04, 5.0291e-04,\n -1.7335e-04, -6.9806e-04, 2.1161e-03, -4.2551e-03, 1.2029e-04,\n 4.5859e-04, -5.4538e-04, 5.2246e-05, -1.2828e-03, 4.1930e-04,\n 1.7813e-03, -8.2656e-04, -1.3078e-04, 5.6364e-04, 4.1007e-04,\n 7.9550e-04, -7.2443e-04, -7.4382e-04, -1.9660e-03, -3.2879e-04,\n 3.0567e-04, 1.2520e-04, -3.3601e-04, -7.0284e-04, 5.1201e-04,\n -1.0475e-03, -5.6587e-04, 1.0691e-03, 3.8810e-04, 1.0544e-03,\n 1.1060e-03, 1.1758e-03, 3.3494e-04, 1.0402e-03, 2.1377e-03,\n -1.0572e-04, 1.1469e-04, 4.1928e-04, -4.2285e-05, 3.6972e-04,\n -9.0903e-04, -3.4698e-04, -2.1795e-04, 1.2735e-03, 3.7889e-04,\n 1.2280e-03, -4.9475e-04, 4.1753e-04, 1.9412e-03, -4.4946e-04,\n -3.8308e-04, 1.6202e-03, 1.6427e-03, -6.3038e-04, -4.4124e-04,\n -5.9519e-04, -3.3190e-04, 9.5384e-04, 6.3763e-04, 3.1461e-04,\n -6.5627e-04, -2.7385e-03, 2.3047e-03, 4.1786e-04, -1.8847e-03,\n -7.7021e-04, -3.9578e-04, -2.1695e-03, -7.5014e-04, -8.5178e-04,\n -1.9819e-03, 1.5514e-03, -1.9631e-04, -2.0909e-04, 1.3606e-03,\n -3.7159e-04, 1.5760e-04, -9.2595e-04, -3.2166e-03, 4.4403e-04,\n -8.3364e-05, 1.5200e-03, -1.5496e-04, -2.8784e-04, 3.3395e-05,\n 5.6052e-45, -2.6158e-04, -1.7590e-03, -1.9792e-04, 1.7720e-03,\n -1.2071e-03, 1.4717e-03, -2.8725e-04, 3.1521e-04, 1.8345e-03,\n -1.6491e-04, -6.5312e-04, 4.4987e-04, 1.4650e-04, -5.6410e-04,\n -5.7155e-04, 1.2267e-04, 4.0375e-04, -1.1377e-03, -1.4015e-03,\n 1.4735e-04, 1.6107e-03, 1.9686e-04, -1.8031e-04, 3.3227e-04,\n 1.1879e-03, -3.1494e-04, 3.2471e-04, 2.8541e-03, 4.2226e-04,\n -5.9238e-04, 1.4254e-03, -3.1331e-04, 4.5432e-04, 5.6052e-45,\n -3.0296e-04, 9.3176e-04, 1.2397e-04, -9.1720e-05, -1.1039e-03,\n -1.4618e-03, -1.8564e-04, -2.8106e-04, 2.6184e-04, 1.2092e-03,\n 3.0014e-03, -3.7707e-04, -8.2614e-04, 2.8087e-04, -7.0579e-04,\n -1.3938e-04, -8.2825e-04, -9.2847e-04, -1.1790e-04, -4.8650e-04,\n -1.6266e-04, 4.8969e-04, 4.2756e-04, -1.1723e-05, 8.4817e-04,\n -9.0519e-04, -1.4770e-03, 1.5030e-03, -7.3021e-04, 2.0178e-03,\n 1.7149e-04, -2.0559e-04, 5.6052e-45, -1.0463e-05, -9.2032e-04,\n -9.3151e-04, 1.0009e-03, -5.0897e-04, 6.8931e-04, -1.3198e-03,\n 2.4937e-04, -8.5834e-05, -2.7343e-04, 3.5089e-03, 2.7082e-04,\n 1.0729e-03, 8.0944e-04, 3.9505e-04, 1.1593e-03, -1.8614e-03,\n 1.2605e-03, 4.7103e-04, -7.9070e-04, 7.7236e-04, 3.0437e-04,\n 8.4395e-04, -6.7415e-04, 1.0763e-03, -1.2220e-03, 3.6118e-04,\n 3.8428e-04, 1.6875e-03, -4.6601e-04, 2.6946e-04, 1.8647e-03,\n -3.0281e-04, -1.5487e-03, 1.1045e-03, -1.3290e-04, -4.6957e-04,\n -4.9074e-04, 3.2405e-04, -8.8678e-04, 1.0035e-03, -9.8286e-04,\n 7.6124e-04, 4.3116e-04, 9.4029e-04, 3.6484e-04, -9.1998e-04,\n 5.4680e-04, -2.6183e-04, -2.2790e-04, 3.6658e-03, 2.3006e-03,\n 2.7389e-03, 1.4259e-04, -1.0646e-03, -9.1226e-04, -1.5702e-03,\n -1.4110e-03, 6.1356e-05, 1.1209e-03, 2.4409e-03, 6.1940e-04,\n 4.8550e-04, -2.0836e-03, 1.1562e-03, 3.8422e-05, -1.0659e-03,\n 6.0231e-04, -2.5637e-05, -1.1059e-03, -6.6833e-04, -1.4765e-03,\n -2.5082e-04, -3.2502e-04, -1.4362e-03, -1.4155e-03, 5.6528e-04,\n -2.7078e-04, 1.3471e-04, 7.6594e-04, 2.0068e-04, -4.8897e-04,\n -7.3992e-05, 9.0217e-04, -1.1192e-03, -1.0666e-03, 3.8544e-04,\n 9.5854e-04, 4.4233e-04, 7.5617e-05, -2.0015e-03, 4.3349e-04,\n 9.2692e-04, 2.2954e-04, 1.0699e-04, -1.7179e-03, -2.9807e-04,\n 2.7968e-03, -5.3732e-04, 5.8447e-05, -7.0142e-04, 1.9833e-03,\n -5.7697e-04, -2.9046e-05, 4.5795e-04, -1.4148e-04, 1.1872e-03,\n -7.9969e-04, -1.0057e-03, -7.3479e-04, 1.0051e-03, 1.1968e-03,\n 2.9738e-04, 3.7980e-05, -1.8441e-03, -2.3386e-06, 1.0287e-03,\n 8.4600e-05, -1.5231e-03, 1.7587e-03, -3.0137e-05, 4.5407e-04,\n 6.6628e-04, -4.9291e-04, -4.4899e-04, 1.7821e-03, -1.3756e-03,\n -6.1398e-04, -2.4534e-04, -7.9952e-05, 9.1188e-06, 2.9800e-04,\n 2.2779e-04, -1.1917e-03, 2.4405e-04, 1.1516e-03, 1.3576e-03,\n 1.9650e-04, -5.4068e-04, -1.5290e-04, 6.0387e-04, -2.6223e-04,\n -1.0244e-03, 2.0828e-04, -4.2724e-04], device='cuda:0')", + "exp_avg_sq": "tensor([9.1810e-06, 4.2630e-06, 8.7619e-06, 1.3286e-05, 1.9481e-05, 7.4961e-06,\n 6.3191e-06, 1.2207e-05, 8.4456e-06, 6.9778e-06, 9.0986e-06, 5.9731e-06,\n 1.0750e-05, 1.5758e-05, 2.1613e-05, 1.2405e-05, 1.5555e-05, 9.4992e-06,\n 1.3225e-05, 4.3211e-06, 1.5548e-05, 5.7002e-06, 1.4610e-05, 1.3819e-05,\n 1.2249e-05, 1.8463e-05, 8.9453e-06, 1.1836e-05, 1.1940e-05, 1.2536e-05,\n 1.8138e-05, 3.4625e-06, 5.4209e-06, 1.8235e-05, 1.8703e-05, 2.4287e-05,\n 6.6783e-06, 1.3364e-05, 1.4820e-05, 1.5773e-05, 1.2750e-05, 2.3513e-05,\n 9.9515e-06, 1.7593e-05, 3.5450e-06, 1.5853e-05, 1.0661e-05, 1.6151e-05,\n 4.3555e-06, 1.6212e-05, 1.6525e-05, 1.1003e-05, 9.9350e-06, 6.1980e-06,\n 3.3807e-06, 1.6803e-05, 8.3770e-06, 9.0658e-06, 5.9611e-06, 2.2648e-05,\n 1.5985e-05, 1.5234e-05, 8.0434e-06, 9.1674e-06, 1.6000e-05, 1.7175e-05,\n 1.4169e-05, 3.1687e-06, 1.2320e-05, 5.8566e-06, 1.8906e-05, 1.3290e-05,\n 1.3251e-05, 1.8069e-05, 7.7446e-06, 1.1208e-05, 1.7295e-05, 8.1036e-06,\n 1.9874e-05, 6.4278e-06, 1.2561e-05, 7.0610e-06, 1.0145e-05, 1.0943e-05,\n 1.5913e-05, 1.5327e-05, 1.6852e-05, 1.8964e-05, 1.1170e-05, 1.3619e-05,\n 2.3200e-05, 1.9590e-05, 1.0638e-05, 1.7232e-05, 1.3104e-05, 1.6894e-05,\n 2.3227e-05, 1.9419e-05, 7.6380e-06, 1.5932e-05, 8.7320e-06, 1.0597e-09,\n 1.1485e-05, 2.3927e-05, 9.4940e-06, 2.2820e-05, 1.1284e-05, 8.1010e-06,\n 7.6805e-06, 1.0207e-05, 1.5840e-05, 1.9385e-05, 1.4847e-05, 1.4763e-05,\n 8.8237e-06, 1.6061e-05, 7.2572e-06, 1.2605e-05, 2.0702e-06, 7.1783e-06,\n 7.9503e-06, 1.2306e-05, 1.3527e-05, 1.4745e-05, 9.0344e-06, 5.9092e-06,\n 7.7106e-06, 1.1983e-05, 5.8674e-07, 6.3117e-06, 9.2170e-06, 5.8656e-06,\n 1.2988e-05, 6.4470e-06, 1.3552e-05, 9.7394e-06, 1.0124e-05, 1.2085e-05,\n 1.9773e-05, 1.3192e-05, 1.8704e-05, 1.3827e-05, 3.5076e-06, 1.0903e-05,\n 2.0192e-07, 2.0609e-05, 1.4272e-05, 3.3100e-05, 1.3154e-05, 1.4686e-05,\n 1.9080e-05, 8.8138e-06, 1.3016e-05, 1.3177e-05, 7.6484e-06, 1.9545e-05,\n 1.0679e-05, 1.2614e-07, 1.3899e-05, 6.8361e-06, 7.8834e-06, 1.3721e-05,\n 1.2608e-05, 2.1074e-05, 1.0799e-05, 7.0119e-06, 1.2498e-05, 1.3842e-05,\n 1.1487e-05, 1.5186e-05, 1.2617e-05, 1.3388e-05, 7.5797e-06, 8.6858e-06,\n 8.8400e-06, 2.0487e-05, 7.9293e-06, 1.7432e-05, 6.2243e-06, 1.6266e-05,\n 1.3375e-05, 1.0094e-05, 1.4377e-05, 9.0109e-06, 1.4927e-05, 1.3839e-05,\n 5.6939e-06, 1.0188e-05, 1.5747e-05, 1.2148e-05, 4.5053e-06, 1.1317e-05,\n 1.5366e-05, 2.7160e-05, 1.6654e-05, 9.4698e-06, 3.6884e-06, 4.6078e-06,\n 1.5996e-05, 1.9360e-05, 1.0991e-05, 1.1494e-05, 1.4263e-05, 4.8173e-06,\n 4.4302e-06, 7.5648e-06, 3.9809e-06, 2.2900e-05, 1.0767e-05, 1.9383e-05,\n 8.6417e-06, 1.1279e-05, 7.3063e-06, 2.2240e-05, 7.2296e-06, 1.7198e-05,\n 1.1544e-05, 1.7063e-05, 5.6763e-06, 1.7587e-05, 4.8315e-06, 1.5882e-05,\n 1.0398e-05, 1.1647e-05, 1.1388e-05, 1.2585e-05, 1.1889e-05, 1.3828e-05,\n 9.5619e-06, 1.4918e-05, 2.1016e-05, 1.2518e-05, 1.9030e-05, 6.2785e-06,\n 1.3559e-05, 1.0565e-05, 4.4616e-06, 1.9859e-05, 2.0237e-05, 6.7250e-06,\n 9.7852e-06, 1.0012e-05, 9.5364e-06, 9.1680e-06, 1.1918e-05, 1.8750e-05,\n 1.5639e-05, 2.1487e-05, 9.8829e-06, 1.8057e-05, 8.7157e-06, 2.1242e-05,\n 1.2292e-05, 1.1185e-05, 1.3493e-05, 1.2855e-05, 8.2843e-06, 1.4954e-05,\n 6.5522e-06, 1.6391e-05, 1.1166e-05, 1.8080e-05, 1.5880e-05, 1.6801e-05,\n 1.0517e-05, 1.3477e-05, 1.4518e-05, 1.1488e-05, 9.4185e-06, 5.6665e-06,\n 1.2206e-05, 2.7454e-05, 1.8178e-05, 1.1780e-05, 5.3401e-06, 1.7635e-05,\n 1.6014e-05, 6.8769e-06, 8.9017e-06, 3.8552e-06, 1.2873e-05, 1.3664e-05,\n 7.2366e-06, 1.7061e-05, 1.8056e-05, 9.7593e-06, 1.0562e-05, 2.5044e-06,\n 1.0788e-05, 5.7108e-06, 9.9560e-06, 1.6172e-05, 1.1113e-05, 1.7290e-05,\n 1.1939e-05, 1.6196e-05, 1.3220e-05, 1.0451e-05, 1.5179e-05, 1.2049e-05,\n 1.7147e-05, 8.6572e-06, 1.7022e-05, 6.3685e-06, 1.7123e-05, 1.5305e-05,\n 1.4196e-05, 1.1301e-05, 1.5744e-05, 6.3885e-06, 1.8108e-05, 1.8097e-05,\n 9.3993e-06, 6.6844e-06, 1.3589e-05, 1.5087e-05, 7.5815e-06, 8.1151e-06,\n 1.4087e-05, 1.4755e-05, 1.3719e-05, 1.5026e-05, 1.3839e-05, 7.9828e-06,\n 8.5647e-06, 7.7336e-06, 7.7879e-06, 1.0458e-05, 1.5944e-05, 1.3407e-05,\n 7.6147e-06, 1.8440e-05, 1.4648e-05, 1.5266e-05, 1.2547e-05, 1.5728e-05,\n 1.6985e-05, 2.0883e-05, 1.4130e-05, 1.5570e-05, 3.0283e-06, 9.6414e-06,\n 8.8581e-06, 2.0357e-05, 9.5146e-06, 1.4636e-05, 1.7250e-05, 7.5392e-06,\n 8.7829e-06, 1.7359e-05, 8.3560e-06, 7.7824e-06, 4.0698e-06, 1.4291e-08,\n 1.0153e-05, 1.0980e-05, 1.6793e-05, 1.5364e-05, 1.4968e-05, 8.7943e-06,\n 3.9665e-06, 4.0742e-06, 1.4914e-05, 8.3191e-06, 1.1745e-05, 2.3655e-05,\n 1.7413e-05, 7.0661e-06, 4.9746e-06, 8.7105e-06, 9.8106e-06, 1.0775e-05,\n 8.7730e-06, 1.0211e-05, 3.8244e-06, 1.1140e-05, 1.6068e-05, 3.7276e-11,\n 1.6003e-05, 8.3897e-06, 6.5005e-06, 1.7380e-05, 1.1702e-05, 1.6559e-05,\n 9.5402e-14, 2.6706e-05, 7.4021e-06, 9.1492e-06, 6.1392e-06, 8.7467e-06,\n 9.0592e-06, 1.5290e-05, 7.7026e-06, 1.7894e-05, 8.3362e-06, 1.6734e-05,\n 1.2229e-05, 1.9600e-05, 7.0676e-06, 9.7543e-06, 1.5730e-05, 8.3744e-06,\n 6.0454e-06, 1.0372e-05, 1.5343e-05, 1.6515e-05, 2.5766e-12, 7.6529e-06,\n 1.3637e-05, 1.2336e-05, 1.0316e-05, 1.2279e-05, 1.1569e-05, 1.1943e-05,\n 6.0633e-06, 7.8073e-06, 6.4334e-06, 7.6654e-06, 2.1511e-05, 8.7144e-06,\n 9.5701e-06, 1.5145e-06, 5.8404e-06, 1.5778e-05, 7.4954e-06, 1.5743e-05,\n 1.0255e-07, 8.9454e-06, 1.1723e-05, 4.7548e-06, 1.2233e-05, 8.9904e-06,\n 4.7709e-06, 4.1437e-06, 8.0841e-06, 8.3910e-08, 1.3186e-05, 1.2549e-05,\n 1.6513e-05, 2.0313e-05, 8.9669e-06, 1.8370e-05, 2.1091e-05, 1.6146e-05,\n 1.5724e-05, 1.4763e-05, 1.7632e-05, 2.5571e-05, 9.9107e-06, 1.7791e-05,\n 9.2498e-06, 1.1838e-05, 1.1039e-05, 8.5740e-06, 1.4658e-05, 1.4039e-05,\n 8.4750e-06, 5.4823e-06, 6.6458e-06, 6.6278e-06, 1.9173e-05, 1.7933e-05,\n 1.8336e-05, 1.2865e-05, 2.0018e-05, 2.1417e-05, 1.3602e-05, 6.8469e-06,\n 1.6643e-05, 9.5462e-06, 1.0238e-05, 9.4713e-06, 1.5402e-05, 2.1223e-05,\n 2.1259e-06, 9.5112e-06, 1.2182e-05, 1.0973e-05, 1.9088e-05, 1.0967e-05,\n 1.7164e-05, 1.3525e-05, 8.9738e-06, 1.6305e-05, 5.7809e-06, 1.5874e-05,\n 1.5320e-05, 1.1443e-05, 1.0146e-05, 4.2203e-06, 1.0728e-05, 1.3650e-05,\n 6.5293e-06, 1.1967e-05, 1.6995e-05, 7.4876e-06, 1.1715e-05, 1.5579e-05,\n 9.3969e-06, 1.2719e-05, 1.2189e-05, 7.3935e-06, 9.9228e-06, 2.0139e-05,\n 1.2184e-05, 6.5690e-06, 8.3696e-06, 1.2037e-05, 8.0074e-06, 2.5196e-05,\n 9.0315e-06, 9.4847e-06, 5.3944e-06, 2.0966e-05, 1.1703e-05, 6.4411e-06,\n 7.4453e-06, 1.0145e-05, 1.9966e-05, 5.0873e-06, 1.5257e-05, 8.9779e-06,\n 1.9249e-05, 1.4551e-05, 9.9192e-06, 1.7386e-05, 2.6646e-06, 1.5391e-05,\n 1.3005e-05, 1.4826e-05, 1.9545e-05, 1.9804e-05, 1.4929e-05, 3.1737e-06,\n 1.8584e-05, 1.1237e-05, 1.2218e-05, 2.4809e-05, 1.3763e-05, 1.6413e-05,\n 1.5611e-05, 1.4588e-05, 1.1981e-05, 5.7770e-06, 7.4703e-06, 1.2344e-05,\n 2.8520e-05, 1.5887e-05, 1.2673e-05, 5.4357e-06, 8.3643e-06, 1.4020e-05,\n 8.2814e-06, 6.3769e-06, 8.2592e-06, 4.0418e-11, 1.5622e-05, 1.3954e-05,\n 1.5695e-05, 8.0601e-06, 1.4785e-05, 1.3455e-05, 1.8146e-05, 1.0843e-05,\n 1.2053e-05, 1.1709e-05, 1.7490e-05, 1.0231e-05, 1.2404e-05, 9.3001e-06,\n 7.3317e-06, 7.8152e-06, 1.7323e-05, 1.3467e-05, 1.0931e-05, 1.1073e-05,\n 2.0017e-05, 5.4514e-06, 1.4659e-05, 2.1881e-05, 8.2623e-06, 1.3720e-05,\n 1.2511e-06, 1.3763e-05, 1.3372e-05, 1.1687e-05, 1.6439e-05, 1.6504e-05,\n 9.1449e-06, 1.0575e-10, 1.3927e-05, 1.1867e-05, 6.1286e-06, 8.6713e-06,\n 5.0122e-06, 1.7258e-05, 5.3762e-06, 9.2008e-06, 1.8438e-05, 1.3859e-05,\n 2.1893e-05, 1.2151e-05, 2.0280e-05, 9.2438e-06, 2.3437e-06, 7.9412e-06,\n 1.9701e-05, 1.6760e-05, 1.9398e-05, 1.0543e-05, 9.4845e-06, 1.0663e-05,\n 1.6920e-05, 1.4871e-05, 1.2855e-05, 1.5729e-05, 2.5261e-05, 6.8853e-06,\n 1.0364e-05, 1.0942e-05, 1.9457e-05, 9.6717e-06, 4.1336e-13, 6.2669e-06,\n 1.5482e-05, 3.1501e-06, 1.4458e-05, 7.3428e-06, 1.0195e-05, 1.7564e-05,\n 5.6166e-06, 2.1714e-06, 1.8022e-05, 1.3075e-05, 6.8206e-06, 9.5887e-06,\n 5.7362e-06, 1.1762e-05, 9.1857e-06, 3.2978e-05, 1.0249e-05, 9.9970e-06,\n 1.3942e-05, 2.0608e-05, 8.0457e-06, 2.5810e-05, 2.0607e-05, 1.6018e-05,\n 1.7536e-05, 1.7477e-05, 6.9232e-06, 1.4406e-05, 4.4112e-06, 1.2072e-05,\n 1.4714e-05, 9.7080e-06, 1.8413e-05, 1.5148e-05, 1.1472e-05, 4.3380e-06,\n 7.3042e-06, 1.2660e-05, 1.6597e-05, 1.4817e-05, 1.1264e-05, 5.4174e-06,\n 7.9083e-06, 1.9056e-05, 8.4311e-06, 1.0474e-05, 2.6221e-06, 3.4326e-06,\n 1.1099e-05, 1.6416e-05, 1.1409e-05, 2.3727e-05, 1.9336e-05, 1.2092e-05,\n 2.1443e-05, 2.5671e-05, 2.0605e-05, 2.7740e-06, 1.1556e-05, 1.1367e-05,\n 8.2342e-06, 1.0770e-05, 1.2378e-05, 2.3161e-05, 7.5937e-06, 7.1647e-06,\n 3.5362e-06, 1.7576e-07, 1.3879e-05, 1.7101e-05, 1.7726e-05, 1.9649e-05,\n 1.2381e-05, 2.3922e-05, 1.9303e-05, 6.4279e-06, 1.5379e-05, 1.2338e-05,\n 2.2201e-05, 9.6558e-06, 1.1006e-05, 2.0627e-05, 1.5745e-05, 1.0209e-05,\n 4.0691e-06, 9.1851e-06, 9.3554e-06, 1.0445e-05, 6.8016e-06, 1.1068e-05,\n 1.5564e-05, 9.8050e-06, 5.3786e-06, 1.7724e-05, 1.7822e-05, 8.4550e-06,\n 1.9869e-05, 1.5324e-05, 1.5608e-05, 1.3862e-05, 1.8484e-05, 1.1367e-05,\n 6.2166e-06, 1.4369e-05, 2.2994e-05, 2.3186e-05, 8.2730e-06, 1.7273e-05,\n 9.2552e-06, 1.7655e-05, 1.7356e-05, 1.7747e-05, 1.9210e-05, 6.3648e-06,\n 3.5413e-08, 1.8682e-05, 8.1422e-06, 9.8862e-06, 1.9691e-05, 1.5258e-05,\n 5.0829e-06, 1.0283e-05, 7.6975e-06, 1.0500e-05, 1.6796e-05, 1.6108e-05,\n 1.7088e-05, 2.2665e-06, 9.0664e-06, 6.8705e-06, 4.5901e-06, 1.5606e-05,\n 1.5704e-05, 1.1676e-05, 1.1221e-05, 7.8107e-06, 1.2165e-05, 1.5751e-05,\n 8.2946e-06, 2.4917e-05, 2.8233e-06, 1.7157e-05, 1.4230e-05, 1.7382e-05],\n device='cuda:0')" + }, + "2": { + "step": "tensor(12515.)", + "exp_avg": "tensor([[-2.2612e-05, -1.0403e-05, -3.3728e-06, ..., 3.7117e-06,\n 4.3310e-07, 1.4648e-05],\n [ 2.1416e-06, 6.5292e-06, 4.9091e-07, ..., -5.1227e-06,\n 3.8868e-05, -3.5006e-05],\n [-6.1199e-07, -3.9187e-07, 1.6154e-06, ..., 2.0596e-06,\n 4.1625e-06, -3.5466e-07],\n ...,\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 1.2093e-07, -1.1962e-09, 2.5219e-06, ..., 3.3761e-06,\n -5.4526e-06, -6.4637e-06],\n [-3.6311e-09, -1.5753e-06, 1.8634e-06, ..., 2.0774e-06,\n 2.6803e-06, -1.0261e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.8109e-10, 7.7641e-10, 8.6949e-10, ..., 3.9112e-10, 4.4504e-10,\n 1.6987e-09],\n [1.4137e-09, 6.7959e-10, 5.1895e-10, ..., 2.8497e-10, 1.2724e-09,\n 1.8054e-09],\n [5.8031e-11, 3.9580e-11, 1.9790e-10, ..., 3.6579e-10, 4.3001e-09,\n 2.6146e-09],\n ...,\n [6.0770e-14, 2.0179e-13, 3.7128e-14, ..., 7.7831e-15, 2.8442e-14,\n 1.7203e-16],\n [3.2985e-11, 3.0818e-12, 9.2689e-11, ..., 5.9082e-10, 2.2866e-09,\n 2.6760e-09],\n [1.7335e-11, 2.1778e-10, 2.0002e-10, ..., 7.1365e-11, 1.9970e-10,\n 4.5597e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(12515.)", + "exp_avg": "tensor([-5.8487e-05, 4.6941e-05, 4.5485e-05, -3.0170e-06, 2.7227e-05,\n 2.5597e-05, 2.9124e-06, -6.3153e-05, -1.3350e-04, -5.9388e-05,\n 5.3960e-05, -1.0426e-04, 6.1102e-05, 5.6052e-45, -1.1492e-05,\n -1.8962e-05, -2.8940e-05, 5.6052e-45, 3.3026e-05, -5.6218e-05,\n -1.1479e-05, -2.6326e-05, 7.9784e-05, -2.5721e-05, 5.4995e-05,\n 5.6052e-45, 6.7965e-05, 3.7896e-05, -7.0111e-05, -1.3111e-06,\n 3.9067e-05, -3.6257e-05, -5.6052e-45, -2.3895e-05, 2.7886e-05,\n -3.2728e-05, 5.6052e-45, -9.7660e-06, -9.1113e-05, 2.9440e-05,\n 2.1223e-05, -1.0141e-05, 1.0592e-05, -7.3837e-05, 5.6052e-45,\n -4.1504e-05, 5.7215e-05, -1.8097e-05, 4.3436e-05, -4.8215e-06,\n 5.6052e-45, 1.6949e-05, -9.7669e-06, 5.6052e-45, 5.6052e-45,\n 2.8144e-05, -3.6992e-05, 8.5727e-06, -1.9740e-05, 1.1845e-06,\n 2.2160e-05, 5.6052e-45, 4.1063e-05, 4.1121e-05, -1.2374e-05,\n -2.4053e-05, 1.5774e-05, -2.5070e-05, 3.1766e-05, -9.1951e-06,\n 4.7656e-05, 8.1042e-05, 6.0367e-05, -8.2176e-06, 2.6555e-05,\n 4.1856e-05, 5.6052e-45, 9.9812e-07, 6.5279e-05, 5.6052e-45,\n -1.1821e-06, 1.6961e-05, 6.5458e-05, 2.3017e-06, 6.7888e-05,\n -3.2645e-06, -1.8003e-05, 2.0180e-05, 3.2053e-05, -4.0061e-05,\n 2.5867e-05, -1.2245e-05, -3.3071e-05, -4.3511e-05, 2.0362e-05,\n -6.3357e-05, -2.5093e-05, 1.4422e-05, 4.6816e-05, -5.1477e-06,\n 2.8947e-05, -5.8804e-06, 7.0369e-22, 5.6052e-45, -3.9772e-05,\n 3.2229e-05, 5.6478e-05, -7.2412e-06, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 2.4706e-05, -3.8859e-06, 3.8942e-05, 3.7770e-06,\n -3.9880e-05, -5.4611e-05, 1.9864e-05, 8.4726e-05, 5.6052e-45,\n 4.8387e-06, 6.5710e-05, -5.5881e-05, 7.4703e-05, -2.9036e-05,\n 5.6052e-45, -8.5442e-05, -1.1163e-05, -1.2276e-05, -7.3463e-05,\n 1.6285e-06, 1.5930e-05, -4.0234e-05, 5.6052e-45, -4.4932e-05,\n -3.4515e-05, 5.6052e-45, 1.9460e-05, -8.7625e-05, -2.0781e-05,\n 5.6052e-45, 2.8763e-05, -2.9611e-05, -1.0104e-05, 5.6052e-45,\n -7.4153e-06, 5.6052e-45, -1.4677e-05, 5.6052e-45, -2.1140e-06,\n -6.6384e-05, -9.0733e-06, 3.7580e-05, 1.0636e-04, 5.6052e-45,\n 2.5008e-07, -6.3649e-05, -7.5563e-05, 1.6019e-32, -9.1207e-06,\n 3.2465e-05, 1.1661e-05, -4.6536e-05, -5.8119e-05, 9.5098e-06,\n 1.8159e-05, 3.2290e-06, -4.3710e-05, -3.3508e-05, -1.8628e-05,\n 1.1242e-05, 4.5945e-05, 9.4119e-05, -4.3491e-05, -3.1752e-06,\n 2.9962e-06, 5.6052e-45, 9.1455e-06, 7.0484e-05, -3.5520e-05,\n 9.0335e-05, 1.0138e-05, 5.6052e-45, -2.4244e-05, -1.9585e-05,\n 5.6052e-45, 1.7995e-05, 8.3856e-05, 5.6052e-45, 5.6052e-45,\n -5.3156e-06, -5.4563e-05, -1.5502e-05, -6.7542e-05, -1.5855e-05,\n 3.8530e-05, -2.0106e-05, -5.3651e-07, 2.9204e-05, -3.3997e-05,\n 5.6052e-45, 9.0113e-05, 1.7667e-05, -1.0696e-05, -2.7110e-05,\n 4.2294e-05, 3.8021e-05, 7.2810e-05, 1.3583e-04, 5.6052e-45,\n 2.0299e-05, 3.3485e-05, -8.9019e-06, 6.4685e-06, 1.3090e-05,\n -6.4742e-06, 1.7093e-06, 3.2983e-05, 3.6029e-05, 1.7597e-05,\n 1.0776e-05, -6.8579e-05, 3.6563e-05, 2.5131e-05, 3.6736e-05,\n -2.5714e-05, 3.7585e-05, -2.9728e-05, -1.6552e-05, 9.0134e-11,\n 3.8936e-05, -1.8597e-05, 3.9236e-05, -4.9997e-05, -3.5752e-05,\n 8.1905e-06, -6.3380e-05, 8.1834e-05, 1.4712e-05, 5.3003e-05,\n 7.3057e-05, -3.3389e-05, -6.4912e-05, 3.8489e-05, -2.8526e-05,\n 4.3424e-05, 4.1658e-05, 1.5871e-05, 6.2593e-05, 5.6052e-45,\n 2.0124e-05, -5.7116e-06, -1.4308e-05, 7.1405e-05, 6.7649e-07,\n -2.0402e-05, 5.6052e-45, -1.3231e-05, 3.4940e-05, 6.1492e-05,\n 5.6052e-45, 7.9386e-07, 1.2856e-05, 1.2422e-05, -1.1150e-05,\n 2.0957e-07, -7.8191e-05, -8.7089e-05, -8.7082e-06, 8.4911e-05,\n 9.2659e-05, 2.1286e-06, 1.6377e-05, -1.3658e-05, 5.6052e-45,\n 1.4929e-05, 5.6052e-45, -7.0237e-05, -6.3833e-05, -3.0331e-05,\n 5.6052e-45, 5.9678e-06, 2.0832e-20, 1.2980e-05, -1.8257e-05,\n -5.9243e-05, -1.7022e-06, -5.1252e-05, 2.3583e-05, 2.3195e-05,\n -9.2920e-05, -1.1523e-05, 5.9414e-05, 6.9422e-05, 8.0372e-05,\n -5.1567e-05, 2.2257e-05, -3.2828e-05, -1.6083e-06, 9.9723e-06,\n 1.3851e-05, 1.9393e-05, -5.6052e-45, 3.5880e-05, 1.5731e-05,\n 5.6052e-45, 9.5218e-06, 2.6732e-05, 2.0687e-07, -4.3331e-05,\n 2.6467e-05, -5.8269e-05, -7.6575e-06, -8.0672e-06, 3.0156e-05,\n -5.9825e-06, -1.1379e-04, -4.9207e-05, -8.2974e-06, -4.2556e-06,\n 1.2679e-05, -2.8370e-05, -6.1890e-05, 3.8570e-05, -9.7722e-05,\n 3.0825e-05, 1.9611e-05, -6.9965e-05, -2.8790e-05, -1.7129e-05,\n 6.8477e-06, -5.1640e-05, -6.7830e-06, -9.7392e-06, 3.1086e-05,\n -2.7516e-05, 2.4314e-05, -6.8467e-05, -1.3668e-06, -4.4413e-05,\n 1.3557e-06, -5.9095e-05, 5.6052e-45, 3.0158e-06, 2.3647e-05,\n -8.0290e-05, 5.6052e-45, 3.7717e-05, -1.4314e-06, 6.4249e-06,\n -2.9692e-05, 3.5684e-05, -2.1166e-05, 4.1844e-05, -4.2267e-05,\n 3.3741e-05, 2.3271e-05, 9.0735e-06, -4.8602e-05, 1.8841e-05,\n 7.0581e-07, -5.6616e-05, -3.2447e-06, -7.7416e-06, -6.8251e-05,\n -2.1870e-05, -2.9262e-05, 6.8994e-06, 7.6563e-06, -1.9608e-05,\n -2.5790e-05, 3.8446e-05, -5.0292e-05, 5.9197e-05, 5.6052e-45,\n 1.0430e-05, -2.7102e-05, 2.2652e-05, 5.6052e-45, 5.6052e-45,\n 1.9514e-05, -2.2643e-05, 5.6052e-45, 3.5496e-05, 1.7037e-06,\n -6.2642e-06, -3.0654e-05, -1.9211e-05, 1.3255e-05, -4.3372e-05,\n -7.6998e-06, 5.6052e-45, 3.2079e-05, 6.5159e-05, -5.6052e-45,\n 1.6739e-05, -1.4895e-05, 5.6052e-45, 3.0916e-05, 6.5326e-05,\n 5.6488e-06, -1.8412e-05, 3.8490e-05, 4.0942e-05, 5.6090e-05,\n -6.8122e-05, 2.1583e-05, 2.7102e-05, -2.4913e-06, 7.1862e-06,\n -4.4290e-07, 7.8046e-05, -6.6645e-06, -2.1840e-05, 2.9047e-05,\n 1.7631e-05, 2.0820e-05, 5.6052e-45, -8.0095e-05, 1.0400e-04,\n 8.4258e-06, 1.8799e-06, 4.3707e-06, -4.3841e-05, 1.9664e-06,\n -6.9107e-06, 1.5423e-05, -4.6447e-05, -2.0394e-05, 1.4743e-05,\n -1.8360e-06, -6.7451e-05, 3.0144e-05, 5.1047e-06, -4.0778e-06,\n 4.3994e-05, 5.6052e-45, 5.0665e-05, -1.2395e-05, -1.2830e-05,\n 5.6052e-45, 5.6052e-45, -7.5006e-05, 2.6216e-06, -4.8476e-06,\n 6.4675e-05, -1.9455e-05, 2.1001e-05, -6.7995e-06, 2.1588e-05,\n 2.8936e-05, 1.2456e-04, 9.2678e-05, -3.2215e-05, 4.6758e-05,\n 5.6052e-45, 4.7073e-05, 5.6052e-45, 7.6974e-05, -5.3778e-06,\n -1.1606e-05, -5.3309e-05, 7.2633e-06, 3.9279e-05, -2.0208e-06,\n -5.0157e-05, 4.4466e-05, 4.9066e-05, 3.2936e-05, -5.6052e-45,\n 2.2873e-05, 2.1052e-05, -5.7766e-05, -2.7732e-05, -4.4231e-05,\n 3.5423e-05, -8.9541e-06, -3.5599e-05, 7.3507e-05, 5.6052e-45,\n 3.2716e-05, 1.2968e-05, 5.6052e-45, 3.1850e-05, -3.3726e-05,\n 1.2440e-05, 5.9060e-05, -2.4414e-05, 4.2286e-05, 2.4138e-05,\n -1.4174e-04, -9.1910e-05, 5.6052e-45, -2.8721e-05, -1.7008e-05,\n 9.6795e-05, -1.7346e-05, 4.2816e-05, -2.4111e-05, 5.6052e-45,\n 5.6052e-45, 1.9593e-05, -2.8106e-05, 5.6052e-45, -4.2192e-05,\n 5.6052e-45, -5.7672e-05, 1.0546e-04, 4.7943e-05, 5.6052e-45,\n 2.7546e-05, -3.1152e-05], device='cuda:0')", + "exp_avg_sq": "tensor([2.1528e-08, 3.4423e-08, 3.1668e-08, 1.7247e-08, 2.9426e-08, 2.8257e-08,\n 2.7099e-08, 2.7877e-08, 4.6102e-08, 1.9936e-08, 3.8360e-08, 4.0215e-08,\n 2.6352e-08, 2.4520e-12, 9.5486e-09, 4.7963e-08, 3.4664e-08, 2.8667e-18,\n 2.4366e-08, 1.3040e-08, 3.8034e-08, 9.9664e-09, 1.1655e-08, 2.5941e-08,\n 2.3139e-08, 1.0068e-09, 3.3193e-08, 3.0674e-08, 2.2740e-08, 2.7089e-08,\n 1.9547e-08, 2.6477e-08, 2.4412e-11, 2.6288e-08, 3.8268e-08, 1.9064e-08,\n 1.4782e-10, 2.3547e-08, 2.4300e-08, 2.0978e-08, 2.8457e-08, 2.3201e-08,\n 1.7373e-08, 2.6686e-08, 5.3295e-10, 1.8485e-08, 4.9089e-08, 2.7434e-08,\n 2.5571e-08, 2.4606e-08, 1.3968e-11, 2.4811e-08, 2.6577e-08, 2.9546e-10,\n 2.5324e-10, 1.2845e-08, 2.8616e-08, 2.5301e-08, 5.2009e-08, 1.8861e-08,\n 3.8745e-08, 3.0692e-11, 3.4548e-08, 2.0982e-08, 2.2303e-08, 2.6965e-08,\n 1.3791e-08, 3.5365e-08, 3.3240e-08, 1.9544e-08, 1.7851e-08, 3.2092e-08,\n 2.4520e-08, 3.0480e-08, 2.3430e-08, 2.5619e-08, 1.2253e-10, 2.4400e-08,\n 3.5415e-08, 2.9482e-14, 3.8608e-08, 2.9810e-08, 2.8131e-08, 1.0742e-08,\n 3.0083e-08, 1.0156e-08, 3.2548e-08, 1.9150e-08, 2.2024e-08, 3.1590e-08,\n 2.3165e-08, 2.3021e-08, 2.7389e-08, 1.5128e-08, 1.7900e-08, 2.4122e-08,\n 2.1997e-08, 2.7599e-08, 2.2216e-08, 3.0780e-08, 2.0060e-08, 2.5670e-08,\n 8.2122e-11, 8.7540e-11, 4.2637e-08, 2.3247e-08, 2.2472e-08, 2.2892e-08,\n 2.2562e-10, 1.6492e-13, 4.0285e-10, 3.1994e-08, 2.8853e-08, 2.5774e-08,\n 2.9750e-08, 1.6834e-08, 3.0418e-08, 1.7674e-08, 3.4827e-08, 9.7418e-10,\n 5.2010e-08, 4.8164e-08, 2.8014e-08, 2.7135e-08, 3.4425e-08, 2.1393e-10,\n 2.7202e-08, 9.8234e-09, 3.3140e-08, 3.8227e-08, 2.6422e-08, 1.4934e-08,\n 1.9944e-08, 6.5467e-10, 2.3171e-08, 2.0353e-08, 6.4738e-11, 2.3669e-08,\n 2.4586e-08, 2.4423e-08, 3.4991e-10, 2.9001e-08, 2.0865e-08, 3.0566e-08,\n 4.0372e-12, 1.8590e-08, 1.8615e-09, 2.7737e-08, 3.5791e-10, 2.3818e-08,\n 2.2724e-08, 1.8489e-08, 2.4308e-08, 4.1973e-08, 2.7427e-11, 2.4481e-08,\n 1.4869e-08, 3.2807e-08, 1.5833e-18, 2.0890e-08, 2.0444e-08, 2.5818e-08,\n 3.1301e-08, 1.3218e-08, 3.7066e-08, 1.6855e-08, 4.0561e-08, 2.8747e-08,\n 2.3239e-08, 1.8047e-08, 4.8652e-08, 1.9033e-08, 3.6118e-08, 3.0975e-08,\n 3.0888e-08, 3.7646e-08, 2.9402e-12, 2.1086e-08, 2.0622e-08, 1.7403e-08,\n 3.7875e-08, 1.9569e-08, 3.2854e-10, 2.4205e-08, 2.5839e-08, 1.9989e-11,\n 2.9631e-08, 1.8934e-08, 2.5510e-10, 8.3471e-10, 1.6988e-08, 3.4275e-08,\n 2.4049e-08, 4.4934e-08, 3.3702e-08, 2.8118e-08, 7.3342e-09, 2.0156e-08,\n 2.7183e-08, 2.2255e-08, 4.5977e-11, 3.2381e-08, 1.4343e-08, 2.1154e-08,\n 3.1714e-08, 2.7213e-08, 1.5735e-08, 3.1435e-08, 2.9829e-08, 2.0818e-11,\n 2.6782e-08, 3.6764e-08, 3.9435e-08, 2.4145e-08, 2.6130e-08, 2.9877e-08,\n 3.0966e-08, 1.5838e-08, 3.0914e-08, 1.9463e-08, 2.0269e-08, 1.7850e-08,\n 1.9672e-08, 2.1852e-08, 2.2593e-08, 2.1927e-08, 2.6743e-08, 2.7298e-08,\n 3.6178e-08, 2.4982e-10, 2.1815e-08, 1.4341e-08, 3.0540e-08, 3.4507e-08,\n 2.9190e-08, 1.8203e-08, 3.2314e-08, 3.2344e-08, 2.7991e-08, 2.4294e-08,\n 3.3355e-08, 4.3626e-08, 2.1795e-08, 6.3897e-09, 2.5548e-08, 1.9679e-08,\n 2.5481e-08, 1.6974e-08, 1.6916e-08, 3.9305e-10, 3.1935e-08, 1.0955e-08,\n 1.0167e-08, 4.5411e-08, 1.8582e-08, 2.7886e-08, 4.6189e-10, 3.0367e-08,\n 3.7021e-08, 2.5946e-08, 6.8602e-10, 3.8247e-08, 2.1635e-08, 2.0838e-08,\n 2.4018e-08, 2.5909e-08, 3.0597e-08, 4.7296e-08, 7.5132e-09, 3.4493e-08,\n 4.8423e-08, 2.1477e-08, 4.0929e-08, 2.2347e-08, 7.7920e-14, 1.7064e-08,\n 3.6727e-10, 3.4369e-08, 2.5898e-08, 1.5052e-08, 4.9150e-10, 1.7877e-08,\n 1.0139e-09, 2.5890e-08, 1.3877e-08, 2.8647e-08, 1.3431e-08, 3.8880e-08,\n 2.6349e-08, 2.2483e-08, 3.1276e-08, 4.3389e-08, 2.1688e-08, 3.4083e-08,\n 3.3107e-08, 2.5056e-08, 1.4788e-08, 2.1302e-08, 2.6904e-08, 2.9012e-08,\n 2.2501e-08, 1.5625e-08, 4.2097e-11, 1.6652e-08, 2.3763e-08, 1.3897e-10,\n 1.7765e-08, 2.9981e-08, 3.5824e-08, 2.3330e-08, 2.4263e-08, 3.8710e-08,\n 2.4194e-08, 1.7747e-08, 2.0261e-08, 3.0466e-08, 2.8449e-08, 3.9492e-08,\n 1.2291e-08, 2.2180e-08, 2.6281e-08, 2.7410e-08, 3.4665e-08, 4.5338e-08,\n 2.0999e-08, 1.9365e-08, 1.0881e-08, 2.6288e-08, 3.2755e-08, 1.9379e-08,\n 3.3895e-08, 1.8376e-08, 1.4776e-08, 2.5090e-08, 2.8170e-08, 3.2072e-08,\n 2.9026e-08, 2.7602e-08, 3.6852e-08, 2.0335e-08, 2.9817e-08, 2.5313e-08,\n 1.5538e-10, 2.7088e-08, 3.5845e-08, 3.5520e-08, 1.1525e-12, 2.3255e-08,\n 3.6564e-08, 2.4737e-08, 1.5966e-08, 2.5055e-08, 3.2404e-08, 2.9677e-08,\n 2.8032e-08, 2.3205e-08, 1.7962e-08, 3.0016e-08, 3.8642e-08, 2.8137e-08,\n 2.2746e-08, 3.0405e-08, 2.8538e-08, 2.2460e-08, 3.5535e-08, 2.6157e-08,\n 2.5209e-08, 2.1529e-08, 2.3749e-08, 3.0773e-08, 1.3697e-08, 2.6788e-08,\n 2.5751e-08, 2.8823e-08, 9.0659e-11, 1.3653e-08, 1.7225e-08, 3.9595e-08,\n 4.4121e-10, 1.8834e-10, 3.4001e-08, 1.5991e-08, 3.0086e-13, 3.3381e-08,\n 2.4923e-08, 2.9696e-08, 1.7181e-08, 2.4297e-08, 3.4086e-08, 3.7474e-08,\n 1.9387e-08, 1.1317e-09, 2.3907e-08, 4.3703e-08, 9.6703e-10, 2.5619e-08,\n 2.2729e-08, 1.4583e-10, 2.1855e-08, 1.9485e-08, 3.8081e-08, 2.1312e-08,\n 2.3423e-08, 2.6208e-08, 3.3648e-08, 3.9673e-08, 4.2661e-08, 2.3117e-08,\n 2.4229e-08, 3.7461e-08, 1.4880e-08, 4.4888e-08, 4.0385e-08, 3.5144e-08,\n 2.6186e-08, 1.9519e-08, 3.7710e-08, 2.9025e-10, 2.6213e-08, 5.0818e-08,\n 4.2989e-08, 1.9658e-08, 1.9435e-08, 4.9577e-08, 1.2111e-08, 3.2816e-08,\n 2.7512e-08, 2.3219e-08, 3.7502e-08, 2.4189e-08, 1.7750e-08, 1.5740e-08,\n 1.2698e-08, 3.2439e-08, 2.9751e-08, 2.6379e-08, 4.4237e-10, 2.8623e-08,\n 3.3791e-08, 3.6017e-08, 1.2549e-10, 3.4204e-10, 2.5025e-08, 1.9553e-08,\n 3.6442e-08, 2.6653e-08, 2.2622e-08, 2.5956e-08, 3.3759e-08, 4.8956e-08,\n 2.2013e-08, 4.5249e-08, 2.6965e-08, 2.8168e-08, 1.9980e-08, 6.8622e-10,\n 2.9376e-08, 6.3100e-11, 3.5010e-08, 1.9897e-08, 2.2332e-08, 3.2265e-08,\n 1.3565e-08, 1.6142e-08, 1.4296e-08, 2.2753e-08, 1.3034e-08, 2.7231e-08,\n 3.1597e-08, 1.0544e-10, 3.5370e-08, 2.8314e-08, 4.2442e-08, 1.3350e-08,\n 2.9668e-08, 1.9872e-08, 1.7109e-08, 1.5549e-08, 2.9964e-08, 1.1470e-12,\n 2.5532e-08, 2.1026e-08, 9.2338e-10, 1.7343e-08, 3.7256e-08, 2.2322e-08,\n 2.3467e-08, 3.0731e-08, 1.8181e-08, 1.0363e-08, 1.5456e-08, 3.2622e-08,\n 1.5465e-12, 2.3934e-08, 3.7208e-08, 2.8078e-08, 3.3348e-08, 3.7370e-08,\n 1.8422e-08, 4.3926e-10, 2.6931e-10, 1.3214e-08, 4.0446e-08, 2.9719e-11,\n 3.0392e-08, 7.2320e-10, 2.5026e-08, 3.8572e-08, 2.2063e-08, 1.2725e-10,\n 2.1951e-08, 1.2976e-08], device='cuda:0')" + }, + "4": { + "step": "tensor(12515.)", + "exp_avg": "tensor([[ 7.8955e-07, -8.6303e-06, -1.1157e-07, ..., -5.6052e-45,\n -4.3009e-07, 1.3995e-06],\n [-5.8505e-06, 7.0357e-06, -8.9880e-06, ..., -5.6052e-45,\n 3.7151e-06, 9.9478e-06],\n [-1.8331e-06, 3.4126e-05, -1.3702e-05, ..., -5.6052e-45,\n 1.5940e-06, 4.2003e-07],\n ...,\n [-7.7493e-06, 1.0953e-05, -3.7122e-06, ..., 5.6052e-45,\n -5.7647e-06, 1.3303e-06],\n [ 4.7050e-06, 8.3085e-06, 7.6515e-06, ..., -5.6052e-45,\n -9.5177e-07, -3.2078e-06],\n [-1.3110e-06, 8.5503e-06, -8.9988e-06, ..., -5.6052e-45,\n -2.7384e-06, 8.6515e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2252e-10, 1.0215e-09, 1.0111e-09, ..., 2.7584e-15, 7.6989e-11,\n 2.4740e-10],\n [2.9154e-10, 1.7015e-09, 1.1788e-09, ..., 2.5754e-14, 2.8818e-10,\n 6.1720e-10],\n [2.3094e-10, 2.2957e-09, 9.9656e-10, ..., 2.6059e-15, 1.8727e-10,\n 2.3950e-10],\n ...,\n [3.4423e-10, 1.9364e-09, 1.4251e-09, ..., 3.5633e-14, 2.7965e-10,\n 2.4820e-10],\n [3.6861e-10, 2.0932e-09, 1.4035e-09, ..., 3.7391e-14, 3.4963e-10,\n 2.0909e-10],\n [3.3214e-10, 2.5996e-09, 1.2655e-09, ..., 1.3928e-15, 2.1256e-10,\n 9.1439e-10]], device='cuda:0')" + }, + "5": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 1.8467e-06, -1.2886e-07, 6.2285e-06, ..., 3.7666e-07,\n -1.2084e-05, 4.7678e-06],\n [ 4.7600e-07, 6.2355e-08, 1.9100e-05, ..., -4.3194e-06,\n 2.2165e-05, -3.2714e-06],\n [-2.5641e-07, 2.4528e-11, -2.0049e-06, ..., -9.3619e-06,\n -3.2207e-06, -4.5081e-06],\n ...,\n [ 4.5013e-07, -9.7735e-08, -4.5565e-06, ..., 2.1607e-06,\n -7.0060e-06, 2.9734e-07],\n [-2.0822e-05, 5.4359e-09, -3.3794e-05, ..., 1.3965e-06,\n 1.8306e-05, -9.7634e-06],\n [ 1.8256e-06, 6.0528e-08, 8.6425e-06, ..., 7.7699e-07,\n 2.6031e-05, 9.9395e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.1300e-10, 2.8657e-11, 6.0413e-09, ..., 2.8333e-10, 3.3531e-09,\n 2.5472e-09],\n [1.4633e-10, 1.2577e-10, 2.5502e-09, ..., 4.4402e-09, 5.7652e-09,\n 2.7340e-09],\n [8.1172e-10, 2.0399e-11, 5.5266e-10, ..., 1.7764e-09, 1.8298e-08,\n 1.3474e-08],\n ...,\n [1.9067e-09, 1.0458e-10, 2.4862e-09, ..., 5.7328e-10, 2.1250e-09,\n 1.2832e-09],\n [2.9581e-09, 2.1072e-11, 2.4414e-09, ..., 4.1268e-09, 2.7022e-08,\n 2.7183e-09],\n [2.8810e-10, 2.2853e-09, 1.0781e-08, ..., 3.1795e-09, 1.1968e-08,\n 1.8382e-09]], device='cuda:0')" + }, + "6": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-6.4081e-05, 8.7812e-05, -3.1083e-05, ..., -7.3972e-06,\n -1.3388e-04, 5.9231e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.5674e-07, 1.2858e-07, 1.9677e-07, ..., 1.3321e-07, 1.9293e-07,\n 1.8066e-07], device='cuda:0')" + }, + "7": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 8.0759e-06, -4.6975e-07, 2.1309e-06, ..., 2.3569e-06,\n -3.2560e-06, -5.0040e-06],\n [-3.0101e-07, 2.2126e-06, -2.6306e-06, ..., 4.3860e-06,\n -7.8730e-07, -3.3690e-06],\n [ 3.3928e-06, -4.6126e-06, 2.3213e-06, ..., 2.4017e-05,\n -4.0304e-06, -2.3193e-06],\n ...,\n [ 6.2884e-07, 7.1381e-06, -6.1908e-07, ..., -5.4252e-06,\n 7.6989e-06, -1.3673e-05],\n [ 3.9369e-06, 4.3177e-06, 1.3516e-05, ..., -1.7899e-05,\n 1.1659e-05, 9.1529e-06],\n [ 8.2614e-06, 4.4586e-07, 9.2199e-06, ..., 2.6238e-05,\n 4.9019e-06, 7.2479e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0673e-09, 6.3157e-10, 7.7905e-10, ..., 7.3313e-10, 7.1261e-10,\n 9.2835e-10],\n [1.0330e-09, 7.0849e-10, 7.3060e-10, ..., 1.2046e-09, 1.1952e-09,\n 1.7075e-09],\n [7.7571e-10, 1.2806e-09, 6.8562e-10, ..., 1.7748e-09, 7.7257e-10,\n 3.5754e-09],\n ...,\n [1.2976e-09, 7.5162e-10, 2.9723e-09, ..., 1.6748e-09, 1.9954e-09,\n 1.9587e-09],\n [1.3412e-09, 1.5549e-09, 1.8847e-09, ..., 1.3666e-09, 1.5357e-09,\n 1.5958e-09],\n [1.6330e-09, 5.8881e-10, 1.2272e-09, ..., 1.6307e-09, 1.2373e-09,\n 1.7876e-09]], device='cuda:0')" + }, + "14": { + "step": "tensor(7509.)", + "exp_avg": "tensor(0.0012, device='cuda:0')", + "exp_avg_sq": "tensor(2.7000e-05, device='cuda:0')" + }, + "15": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-4.2022e-14, -2.4204e-15, -9.5827e-15, ..., 6.3617e-13,\n -2.0208e-13, 1.9450e-13],\n [ 1.4904e-13, 1.2263e-13, -4.5449e-13, ..., -4.5694e-12,\n 2.1413e-11, -1.0089e-12],\n [-1.0112e-13, 2.1706e-16, 3.0669e-15, ..., -4.2787e-13,\n -1.0257e-12, -1.9375e-13],\n ...,\n [ 1.4669e-13, -3.6781e-12, 1.1023e-11, ..., 5.7561e-12,\n -5.4753e-11, -2.3252e-13],\n [ 7.8646e-14, -6.0094e-15, -3.5154e-13, ..., -9.2108e-14,\n 6.9677e-12, -2.6850e-13],\n [ 1.4297e-13, -1.6484e-14, 2.2436e-13, ..., 1.0261e-12,\n -6.1861e-12, 4.3061e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6896e-17, 5.2635e-17, 4.5535e-16, ..., 8.4758e-16, 3.3100e-16,\n 8.3747e-16],\n [2.8504e-17, 1.0903e-17, 1.5790e-16, ..., 8.0196e-17, 2.2647e-17,\n 6.3080e-16],\n [1.3636e-15, 3.2674e-17, 3.6252e-14, ..., 9.6234e-15, 1.7428e-14,\n 1.1789e-14],\n ...,\n [1.8268e-15, 3.9455e-16, 1.5169e-15, ..., 4.1058e-15, 2.0956e-14,\n 1.1520e-14],\n [7.9958e-18, 3.2843e-18, 8.5321e-17, ..., 2.9606e-16, 5.0586e-17,\n 2.2284e-16],\n [1.7215e-16, 4.0735e-17, 3.4478e-15, ..., 9.3851e-15, 2.7548e-15,\n 1.0645e-14]], device='cuda:0')" + }, + "16": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 1.8696e-12, 3.1927e-11, -3.2498e-12, -1.5681e-12, -4.8726e-12,\n -6.0915e-12, 6.0785e-12, 1.2132e-11, -8.9636e-12, 3.4788e-11,\n 8.4143e-14, 8.8085e-14, -8.1293e-11, -3.9934e-12, 4.0115e-12,\n 5.3146e-12, -4.7736e-12, 3.1999e-12, 1.1969e-11, -6.6063e-11,\n -7.4076e-11, -1.0668e-12, -3.1430e-12, 9.3723e-12, -5.4373e-12,\n -6.5309e-11, 5.7326e-11, 4.0742e-11, 1.2206e-12, -1.5520e-11,\n -7.4204e-11, 3.1531e-12, 3.4367e-11, 4.8409e-12, -5.4274e-12,\n -5.3914e-12, -9.6245e-12, 1.3836e-11, 5.4298e-12, -1.9201e-12,\n -2.6529e-12, 1.0713e-10, -7.2377e-11, 1.8692e-12, 6.1892e-11,\n -2.4169e-12, 2.7681e-12, 3.2555e-12, -3.1554e-12, -2.9195e-12,\n 6.6118e-11, -7.4696e-12, -6.2961e-12, 1.5621e-13, -6.8331e-12,\n -5.8819e-11, 1.9754e-12, 3.9658e-11, 3.8154e-11, 8.6444e-11,\n 2.5917e-12, -8.8802e-11, 9.4262e-12, -9.4653e-12], device='cuda:0')", + "exp_avg_sq": "tensor([5.2476e-13, 2.6570e-13, 2.6917e-11, 9.8222e-13, 9.2618e-13, 7.0907e-13,\n 4.4192e-12, 2.7695e-12, 9.4774e-13, 1.4091e-11, 4.3098e-13, 1.7467e-14,\n 4.1390e-12, 1.2832e-12, 6.1561e-12, 1.5468e-11, 9.6884e-13, 2.9611e-12,\n 5.7785e-12, 4.9903e-12, 1.6652e-12, 5.6885e-12, 4.0767e-12, 1.6293e-11,\n 3.9526e-12, 1.7631e-12, 1.0193e-11, 1.0957e-11, 1.4854e-15, 1.4253e-12,\n 1.3868e-11, 5.0138e-15, 6.7169e-12, 2.0547e-12, 8.2848e-12, 1.9439e-11,\n 7.7441e-13, 1.2711e-11, 4.7727e-12, 1.1355e-12, 9.2608e-13, 2.3759e-11,\n 2.3940e-11, 6.2079e-12, 4.9246e-12, 7.4654e-13, 5.0811e-12, 1.8058e-13,\n 3.2143e-12, 3.1267e-11, 3.1458e-11, 1.3569e-12, 4.5806e-13, 7.3705e-12,\n 8.9895e-13, 1.2323e-11, 1.3058e-11, 1.1991e-13, 1.1781e-11, 1.8780e-11,\n 2.8839e-13, 1.1376e-11, 8.3076e-14, 8.4361e-12], device='cuda:0')" + }, + "17": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 2.3411e-14, 5.2739e-11, -2.8395e-13, -6.8632e-13, 7.0010e-14,\n -9.5574e-15, -2.7517e-12, 1.0197e-10, 1.3485e-12, 1.7187e-10,\n -2.4511e-13, 1.4879e-14, 6.0571e-12, -8.3544e-14, -1.8692e-12,\n -2.1398e-12, 7.9831e-11, -1.5205e-12, -7.6776e-12, 1.2137e-11,\n 7.8540e-12, 1.0474e-10, -3.4198e-13, 1.2055e-10, 2.8298e-14,\n 4.9905e-12, 1.2699e-10, 1.3638e-10, 6.6981e-14, 4.2215e-11,\n 1.4902e-11, 1.6237e-13, 1.3791e-10, 7.6282e-11, -2.1383e-13,\n 2.4282e-13, 3.7864e-13, -3.9696e-12, -3.1938e-13, -1.3321e-13,\n -7.6548e-14, 2.4518e-10, 2.4588e-11, -1.1714e-12, 1.9063e-10,\n 3.4134e-11, -2.4044e-12, 7.4100e-13, -2.6496e-13, -7.6036e-13,\n 1.7377e-10, -4.4700e-14, -5.3023e-14, -8.2440e-13, 2.2851e-14,\n 3.7511e-11, -1.5458e-12, 5.1601e-11, 1.3562e-10, 2.4335e-10,\n -4.2255e-14, 3.5019e-13, 4.5143e-13, 9.4538e-13], device='cuda:0')", + "exp_avg_sq": "tensor([8.4660e-15, 9.1061e-16, 1.9421e-13, 7.1204e-15, 1.0141e-14, 1.2318e-14,\n 2.5485e-14, 1.0360e-14, 8.5629e-15, 1.0252e-13, 4.9707e-15, 8.9865e-16,\n 2.2084e-14, 1.0114e-14, 3.8602e-14, 2.5355e-13, 2.5968e-15, 1.9817e-14,\n 3.7124e-14, 3.9541e-14, 7.4209e-15, 3.4275e-14, 3.3637e-14, 1.2453e-13,\n 3.5657e-14, 8.1215e-15, 5.0014e-14, 6.2351e-14, 1.7121e-16, 5.0741e-15,\n 9.9139e-14, 6.7280e-16, 4.0115e-14, 7.4718e-15, 5.8125e-14, 2.2467e-13,\n 8.3102e-15, 1.3805e-13, 5.0942e-14, 1.3407e-14, 1.0548e-14, 1.3732e-13,\n 1.8388e-13, 5.5419e-14, 2.2297e-14, 1.9909e-15, 2.1876e-14, 4.8265e-15,\n 2.3164e-14, 3.9119e-13, 2.2682e-13, 1.9201e-14, 5.5461e-15, 7.8168e-14,\n 9.6200e-15, 8.0909e-14, 1.3954e-13, 1.8862e-16, 8.4949e-14, 1.0080e-13,\n 7.2941e-15, 4.5100e-14, 3.5334e-17, 6.4935e-14], device='cuda:0')" + }, + "18": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-2.5188e-14, 7.9581e-11, 3.9844e-13, 7.1679e-13, -9.3243e-14,\n 1.1164e-14, 3.1351e-12, 8.4388e-11, -2.3042e-12, 1.1842e-10,\n 2.8610e-13, -5.1069e-14, 5.1083e-12, 7.7125e-14, 2.0674e-12,\n 2.2090e-12, 6.6903e-11, 1.7212e-12, 7.4497e-12, 1.0563e-11,\n 9.6131e-12, 7.8052e-11, 3.9389e-13, 8.8981e-11, -7.3105e-14,\n 5.9058e-12, 1.2100e-10, 1.1310e-10, -6.7181e-14, 5.0059e-11,\n 1.0265e-11, -1.7022e-13, 1.0636e-10, 7.2520e-11, 1.7468e-13,\n -3.1759e-13, -5.9941e-13, 3.5347e-12, 3.4952e-13, 1.5485e-13,\n 8.9895e-14, 1.8201e-10, 1.5293e-11, 1.4237e-12, 1.3688e-10,\n 5.1633e-11, 2.7846e-12, -7.1734e-13, 3.4301e-13, 1.0548e-12,\n 1.3820e-10, 4.0115e-14, 5.9123e-14, 9.1396e-13, -3.0444e-14,\n 2.8305e-11, 1.9081e-12, 7.8286e-11, 1.1213e-10, 1.7071e-10,\n 3.9133e-14, -8.4510e-13, 3.5559e-11, -1.4828e-12], device='cuda:0')", + "exp_avg_sq": "tensor([1.1502e-14, 1.4890e-15, 3.0414e-13, 1.2464e-14, 1.5011e-14, 1.4707e-14,\n 5.1954e-14, 1.8634e-14, 1.1426e-14, 1.0769e-13, 8.3296e-15, 1.4256e-15,\n 3.3794e-14, 1.9038e-14, 7.4010e-14, 1.9064e-13, 4.4814e-15, 3.4906e-14,\n 6.1101e-14, 2.4506e-14, 1.2599e-14, 3.8748e-14, 4.9889e-14, 1.3255e-13,\n 5.3778e-14, 9.9149e-15, 7.9502e-14, 9.1274e-14, 2.8969e-16, 5.6407e-15,\n 1.1580e-13, 1.0537e-15, 5.8375e-14, 1.1199e-14, 9.9641e-14, 2.2881e-13,\n 1.3207e-14, 1.5686e-13, 6.2864e-14, 2.0071e-14, 1.5904e-14, 1.9680e-13,\n 1.8412e-13, 8.2346e-14, 3.9122e-14, 2.8386e-15, 5.4672e-14, 7.0416e-15,\n 4.0423e-14, 3.6260e-13, 2.7167e-13, 2.3434e-14, 9.5651e-15, 1.0052e-13,\n 1.5942e-14, 1.0746e-13, 1.6270e-13, 3.9664e-16, 9.9803e-14, 1.6987e-13,\n 9.0435e-15, 9.9569e-14, 1.0843e-16, 1.0413e-13], device='cuda:0')" + }, + "19": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 7.7766e-11, -3.8532e-10, 7.9393e-11, 7.9409e-11, 7.7149e-11,\n 8.2052e-11, 7.6879e-11, -6.0399e-10, 7.8469e-11, -7.7129e-10,\n 8.3797e-11, 1.6419e-11, -2.5226e-11, 8.0358e-11, 7.3468e-11,\n 7.8047e-11, -6.0956e-10, 7.4410e-11, 8.2798e-11, -8.8623e-11,\n -2.1237e-11, -6.8059e-10, 8.0373e-11, -5.6141e-10, 8.8583e-11,\n -3.2975e-11, -4.8559e-10, -5.6322e-10, 7.7034e-11, -3.9452e-10,\n -4.3964e-11, 1.3472e-11, -5.2289e-10, -5.5554e-10, 8.9367e-11,\n 8.4612e-11, 7.4649e-11, 5.9996e-11, 1.5227e-11, 7.6808e-11,\n 8.3781e-11, -7.7826e-10, -1.0906e-10, 8.4625e-11, -7.7042e-10,\n -3.2578e-10, 8.1597e-11, 8.1852e-11, 7.5129e-11, 7.9977e-11,\n -5.4989e-10, 8.1721e-12, 7.4692e-11, 7.1930e-11, 7.2330e-11,\n -1.1767e-10, 6.7388e-11, -4.7464e-10, -5.8549e-10, -7.2210e-10,\n 1.0807e-11, 2.4831e-12, -1.2307e-10, 8.1311e-11],\n [-7.8055e-11, 3.8449e-10, -7.9292e-11, -7.9628e-11, -7.7126e-11,\n -8.1905e-11, -7.7249e-11, 6.0418e-10, -7.8641e-11, 7.7319e-10,\n -8.3442e-11, -1.6799e-11, 2.8656e-11, -8.0486e-11, -7.3257e-11,\n -7.8099e-11, 6.1293e-10, -7.4589e-11, -8.3483e-11, 9.6549e-11,\n 2.7859e-11, 6.7892e-10, -8.0433e-11, 5.6480e-10, -8.8908e-11,\n 3.5681e-11, 4.8535e-10, 5.6727e-10, -7.7046e-11, 3.9891e-10,\n 4.7468e-11, -1.3777e-11, 5.2468e-10, 5.5582e-10, -8.9864e-11,\n -8.4654e-11, -7.5044e-11, -5.9849e-11, -1.5848e-11, -7.6285e-11,\n -8.3700e-11, 7.7707e-10, 1.1467e-10, -8.4564e-11, 7.7080e-10,\n 3.2971e-10, -8.1813e-11, -8.1568e-11, -7.5082e-11, -8.0331e-11,\n 5.5072e-10, -8.8229e-12, -7.4386e-11, -7.2192e-11, -7.2298e-11,\n 1.2022e-10, -6.7687e-11, 4.7117e-10, 5.8685e-10, 7.2759e-10,\n -1.1188e-11, 5.5616e-12, 1.2290e-10, -8.1291e-11]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.6507e-12, 2.2102e-13, 2.3791e-12, 1.1827e-13, 8.7325e-13, 4.2564e-12,\n 3.0200e-13, 1.0890e-12, 6.6943e-15, 3.3618e-12, 3.8629e-13, 6.5199e-13,\n 6.0411e-13, 4.6742e-13, 1.1175e-12, 1.1821e-11, 9.3931e-13, 3.5516e-13,\n 3.3497e-14, 1.6998e-11, 8.9560e-14, 5.0387e-12, 1.9464e-12, 5.5645e-12,\n 2.2165e-12, 1.7617e-12, 1.0944e-12, 1.7242e-12, 2.7361e-14, 4.7099e-12,\n 7.0842e-13, 1.0653e-12, 6.3622e-13, 2.1543e-12, 1.3079e-12, 8.3068e-12,\n 4.3581e-13, 5.9258e-12, 2.0467e-12, 1.8039e-12, 1.5473e-12, 1.4537e-12,\n 7.1953e-12, 2.4900e-12, 4.2420e-13, 2.1121e-12, 6.3852e-14, 2.5556e-12,\n 1.0349e-12, 5.9959e-12, 2.9201e-12, 2.7595e-12, 1.2833e-12, 3.8092e-12,\n 1.6134e-12, 7.3176e-13, 3.4089e-12, 1.5215e-13, 2.6801e-12, 7.3804e-13,\n 3.0179e-12, 1.0609e-13, 5.9312e-13, 1.5648e-12],\n [1.6507e-12, 2.2102e-13, 2.3791e-12, 1.1827e-13, 8.7325e-13, 4.2564e-12,\n 3.0200e-13, 1.0890e-12, 6.6943e-15, 3.3618e-12, 3.8629e-13, 6.5199e-13,\n 6.0411e-13, 4.6742e-13, 1.1175e-12, 1.1821e-11, 9.3931e-13, 3.5516e-13,\n 3.3497e-14, 1.6998e-11, 8.9560e-14, 5.0387e-12, 1.9464e-12, 5.5645e-12,\n 2.2165e-12, 1.7617e-12, 1.0944e-12, 1.7242e-12, 2.7361e-14, 4.7099e-12,\n 7.0842e-13, 1.0653e-12, 6.3622e-13, 2.1543e-12, 1.3079e-12, 8.3068e-12,\n 4.3581e-13, 5.9258e-12, 2.0467e-12, 1.8039e-12, 1.5473e-12, 1.4537e-12,\n 7.1953e-12, 2.4900e-12, 4.2420e-13, 2.1121e-12, 6.3852e-14, 2.5556e-12,\n 1.0349e-12, 5.9959e-12, 2.9201e-12, 2.7595e-12, 1.2833e-12, 3.8092e-12,\n 1.6134e-12, 7.3176e-13, 3.4089e-12, 1.5215e-13, 2.6801e-12, 7.3804e-13,\n 3.0179e-12, 1.0609e-13, 5.9312e-13, 1.5648e-12]], device='cuda:0')" + }, + "20": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-4.2934e-10, 4.3014e-10], device='cuda:0')", + "exp_avg_sq": "tensor([2.0464e-11, 2.0464e-11], device='cuda:0')" + }, + "21": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-6.9597e-17, 1.2187e-17, 9.2558e-18, ..., 7.0060e-17,\n 7.5670e-17, -1.0354e-18],\n [-1.5724e-16, 1.0214e-17, -1.3911e-17, ..., -1.2506e-17,\n -1.9360e-16, 5.9763e-17],\n [-5.6532e-16, -1.7349e-17, 1.3588e-17, ..., 2.2284e-17,\n -6.5336e-17, 6.9459e-17],\n ...,\n [ 1.5307e-16, -2.0944e-17, -4.3443e-17, ..., 4.6578e-16,\n 1.2250e-15, -1.3987e-16],\n [ 3.5063e-16, 1.3218e-17, -5.5796e-17, ..., -7.0324e-16,\n -1.7782e-15, -2.4868e-16],\n [-1.0317e-16, -2.8522e-18, 1.5046e-17, ..., -1.1829e-16,\n 3.7746e-16, 7.5448e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.1672e-19, 3.5557e-21, 1.0037e-19, ..., 6.8286e-20, 7.2963e-18,\n 2.9213e-18],\n [2.3091e-18, 2.0320e-19, 7.8177e-18, ..., 5.5932e-18, 9.0837e-17,\n 2.2557e-19],\n [5.5386e-17, 6.1698e-18, 7.1387e-20, ..., 7.2108e-17, 6.5031e-16,\n 2.3591e-16],\n ...,\n [4.7613e-17, 2.0987e-18, 2.0381e-17, ..., 6.1395e-16, 9.8094e-16,\n 3.4485e-15],\n [2.4422e-18, 2.6546e-18, 4.1568e-17, ..., 1.1027e-16, 4.7511e-16,\n 1.5448e-16],\n [9.3415e-19, 1.5864e-19, 2.4077e-18, ..., 5.4391e-17, 9.3765e-17,\n 2.7530e-17]], device='cuda:0')" + }, + "22": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-6.2402e-16, -1.1208e-15, -2.9026e-15, -3.5230e-16, 1.9936e-15,\n -3.2329e-15, -1.0976e-15, 1.0532e-15, 1.1333e-15, -9.6597e-16,\n -1.6818e-15, -1.9707e-15, -7.6744e-16, 1.1785e-15, 7.8746e-16,\n 3.0983e-15, 5.2254e-15, -1.9216e-15, 2.4045e-16, 9.7928e-16,\n -4.9238e-15, 6.5509e-16, 1.6541e-16, 1.7739e-15, -6.0911e-15,\n 1.4156e-15, 1.1431e-15, -3.2568e-15, -6.7607e-16, 7.8494e-16,\n 9.4613e-16, 1.9774e-16, -3.9456e-15, -9.0668e-16, -7.4666e-16,\n -1.9275e-16, 3.4791e-15, -4.3441e-16, 2.8955e-16, 1.2036e-15,\n -4.4324e-16, 2.0967e-16, -1.4067e-15, 9.1824e-16, 6.1331e-16,\n -1.2037e-15, -8.8162e-16, -2.5214e-15, -2.2457e-15, 8.0817e-16,\n 2.4244e-15, 9.7527e-16, -2.9743e-16, -1.8219e-15, 5.2482e-15,\n 2.1311e-15, -2.3057e-15, -6.0074e-15, -1.1226e-15, 5.2262e-15,\n -4.3527e-16, 6.8769e-15, 4.7764e-15, 5.5298e-16], device='cuda:0')", + "exp_avg_sq": "tensor([3.9829e-16, 9.2340e-15, 8.2580e-14, 2.8073e-15, 1.5933e-14, 2.5576e-14,\n 8.2238e-16, 2.3674e-14, 7.7007e-15, 1.7823e-15, 2.2847e-15, 1.6578e-14,\n 6.6032e-15, 2.3486e-13, 1.1410e-13, 2.0428e-13, 5.2648e-14, 6.7573e-16,\n 1.8153e-13, 2.0526e-14, 2.4566e-15, 6.2440e-13, 3.0084e-15, 1.9689e-13,\n 2.9707e-14, 2.1139e-14, 6.4075e-14, 6.4133e-14, 3.3436e-14, 1.5041e-14,\n 2.0266e-14, 2.7640e-15, 1.1629e-13, 1.3566e-13, 1.7799e-14, 1.7412e-16,\n 9.7431e-14, 1.2175e-14, 3.5744e-14, 2.1951e-13, 1.9033e-13, 4.4168e-14,\n 5.8521e-14, 3.0244e-14, 1.3928e-14, 1.0034e-14, 6.0975e-14, 1.9547e-14,\n 5.9682e-16, 4.8211e-14, 2.5645e-14, 5.3424e-14, 1.7604e-14, 1.6768e-14,\n 2.1169e-13, 1.4645e-14, 9.5509e-15, 1.2208e-19, 1.0762e-13, 7.1939e-14,\n 3.5657e-14, 1.9627e-13, 1.6428e-13, 1.8144e-14], device='cuda:0')" + }, + "23": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 3.2314e-16, -8.6159e-17, -1.0140e-16, -1.8715e-16, 2.8555e-15,\n -9.5462e-16, -5.8443e-17, 4.3465e-15, 3.4515e-17, 5.4089e-17,\n 3.8457e-19, -3.2091e-17, 1.2457e-18, 9.7609e-16, 3.0466e-15,\n 2.3024e-15, 4.7217e-15, 1.9779e-18, -5.0971e-17, 4.0503e-15,\n 1.6213e-15, -2.2675e-16, 4.8467e-17, -2.2040e-17, 2.8634e-16,\n -7.5913e-16, 1.8786e-15, 1.2957e-17, 1.7768e-18, 4.3084e-15,\n 2.2526e-15, 1.9341e-15, -2.4531e-16, 2.8503e-15, 3.3876e-15,\n 9.8523e-18, 3.8989e-15, 3.2195e-15, 2.5809e-15, 5.5877e-15,\n -2.8084e-17, -1.5462e-16, -9.4053e-17, -6.3543e-17, 1.4616e-16,\n 1.1026e-15, 5.0758e-18, 2.3340e-17, 1.0032e-15, -1.8222e-17,\n 2.8938e-17, 3.1939e-17, 3.7094e-17, 2.0561e-15, 6.5577e-15,\n 1.4455e-17, -4.6646e-17, -9.2477e-17, 2.0809e-17, 4.4006e-15,\n 1.1251e-18, 4.6477e-15, 2.3351e-15, 4.9679e-17], device='cuda:0')", + "exp_avg_sq": "tensor([2.2324e-19, 4.1664e-17, 2.6186e-16, 7.3001e-18, 9.7297e-17, 1.7220e-16,\n 5.1581e-20, 1.7784e-16, 1.5293e-17, 1.8925e-18, 2.4509e-17, 3.7987e-17,\n 1.2983e-17, 8.9878e-16, 1.5078e-15, 9.1451e-16, 4.5323e-16, 6.7534e-18,\n 1.3809e-15, 1.2915e-16, 1.2139e-17, 3.9693e-15, 2.9252e-18, 3.6447e-16,\n 2.0929e-16, 1.3506e-16, 4.9241e-16, 8.3309e-16, 2.3560e-16, 1.0634e-16,\n 3.1332e-16, 5.1851e-17, 7.1763e-16, 1.1716e-15, 1.2520e-16, 2.6801e-20,\n 1.0661e-16, 5.6110e-17, 1.9696e-16, 1.0379e-15, 1.5717e-15, 1.8011e-16,\n 3.6729e-16, 1.8474e-16, 1.5929e-17, 1.7140e-16, 4.1194e-16, 2.0511e-16,\n 6.4657e-18, 1.7544e-16, 8.8134e-17, 1.7767e-16, 7.0711e-17, 2.6025e-16,\n 1.1371e-15, 7.5282e-17, 3.3367e-17, 3.7806e-23, 1.1584e-15, 4.2074e-16,\n 9.8069e-17, 1.6163e-15, 5.1500e-16, 6.0103e-17], device='cuda:0')" + }, + "24": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 5.1179e-16, 4.5656e-18, -2.9788e-16, -6.5216e-17, 3.0427e-15,\n -6.5170e-16, 1.0933e-16, 3.0378e-15, -4.1194e-17, -2.2627e-17,\n 7.0053e-18, -3.7053e-17, 3.3066e-17, 2.7124e-15, 2.3877e-15,\n 3.4462e-15, 3.9574e-15, -5.6177e-18, 5.0056e-17, 2.8179e-15,\n 9.1363e-16, 2.4616e-16, -5.8074e-17, 3.7526e-16, -9.1988e-16,\n 1.7429e-15, 2.8594e-15, -4.9079e-16, -1.1605e-18, 2.8673e-15,\n 2.4662e-15, 2.0093e-15, -4.3133e-16, 2.1741e-15, 2.0757e-15,\n 9.5038e-17, 3.6928e-15, 2.4399e-15, 2.3197e-15, 3.4867e-15,\n -1.0047e-16, 9.6269e-17, -1.2516e-16, 6.5927e-17, 9.4472e-18,\n 1.1776e-15, -1.1225e-17, -3.0982e-16, 1.0833e-15, -2.4254e-17,\n 8.1487e-17, 1.8338e-16, 4.3908e-18, 1.3827e-15, 4.5098e-15,\n -1.0296e-17, 1.0737e-16, -4.9492e-16, 5.6188e-18, 4.3337e-15,\n -1.3037e-18, 4.8387e-15, 2.6042e-15, -2.9874e-17], device='cuda:0')", + "exp_avg_sq": "tensor([1.4084e-19, 4.1858e-17, 5.8347e-16, 9.4095e-18, 2.2345e-16, 2.8182e-16,\n 8.2144e-20, 2.3713e-16, 4.0987e-17, 2.6108e-18, 3.0368e-17, 9.2012e-17,\n 2.7068e-17, 2.0814e-15, 1.2177e-15, 1.8175e-15, 5.1727e-16, 1.4388e-17,\n 1.1386e-15, 2.0237e-16, 3.2788e-17, 4.0000e-15, 1.0430e-17, 1.2282e-15,\n 3.5360e-16, 2.4497e-16, 6.2656e-16, 5.0482e-16, 1.8625e-16, 1.4511e-16,\n 2.3548e-16, 3.2765e-17, 8.0369e-16, 1.1974e-15, 2.0182e-16, 2.6011e-19,\n 8.4179e-16, 1.2343e-16, 3.8405e-16, 2.0111e-15, 1.3161e-15, 2.5887e-16,\n 4.5294e-16, 2.0302e-16, 5.0845e-17, 7.8536e-17, 3.3735e-16, 1.6737e-16,\n 1.2115e-17, 3.1923e-16, 1.5487e-16, 3.4746e-16, 9.4911e-17, 1.9660e-16,\n 1.9983e-15, 9.7707e-17, 3.7942e-17, 4.5001e-23, 6.6095e-16, 5.7994e-16,\n 1.6341e-16, 1.8268e-15, 1.3536e-15, 8.2521e-17], device='cuda:0')" + }, + "25": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 3.2671e-15, -5.0596e-15, -8.1443e-16, -6.4958e-15, 1.7098e-14,\n -4.6600e-15, -2.4492e-15, 3.4151e-14, -2.5618e-15, -3.2633e-15,\n -4.5091e-16, 7.2934e-16, -2.3551e-15, 3.5879e-15, 1.5747e-14,\n 8.0862e-15, 2.5785e-14, -2.4352e-15, -2.5278e-15, 3.2137e-14,\n 1.5291e-14, -3.3765e-15, -2.4621e-15, -4.2448e-15, 2.6531e-15,\n -4.9809e-15, 5.9827e-15, -1.6912e-15, -2.8324e-15, 2.9763e-14,\n 1.7985e-14, 1.7142e-14, 8.1991e-16, 1.3226e-14, 2.3916e-14,\n -3.8114e-15, 1.4593e-14, 2.2288e-14, 1.4680e-14, 2.7079e-14,\n -1.7224e-15, 1.7100e-15, -1.2243e-15, -1.4288e-15, 7.7979e-16,\n 5.1493e-15, -2.4017e-15, -1.6140e-15, 1.2884e-14, -8.2114e-15,\n -7.4923e-15, -5.1973e-15, -4.6907e-16, 1.9404e-14, 2.6206e-14,\n -1.6770e-15, -1.8562e-15, 1.0663e-15, -5.1571e-15, 1.8935e-14,\n -2.8511e-15, 1.6790e-14, 7.8261e-15, -2.6651e-15],\n [-3.2672e-15, 5.0597e-15, 8.1440e-16, 6.4958e-15, -1.7098e-14,\n 4.6604e-15, 2.4492e-15, -3.4151e-14, 2.5618e-15, 3.2633e-15,\n 4.5098e-16, -7.2933e-16, 2.3551e-15, -3.5876e-15, -1.5747e-14,\n -8.0857e-15, -2.5785e-14, 2.4355e-15, 2.5278e-15, -3.2137e-14,\n -1.5292e-14, 3.3764e-15, 2.4621e-15, 4.2448e-15, -2.6530e-15,\n 4.9811e-15, -5.9826e-15, 1.6912e-15, 2.8324e-15, -2.9763e-14,\n -1.7985e-14, -1.7141e-14, -8.1996e-16, -1.3226e-14, -2.3917e-14,\n 3.8114e-15, -1.4593e-14, -2.2288e-14, -1.4680e-14, -2.7079e-14,\n 1.7223e-15, -1.7100e-15, 1.2243e-15, 1.4287e-15, -7.7985e-16,\n -5.1491e-15, 2.4018e-15, 1.6139e-15, -1.2884e-14, 8.2114e-15,\n 7.4922e-15, 5.1973e-15, 4.6907e-16, -1.9404e-14, -2.6206e-14,\n 1.6770e-15, 1.8562e-15, -1.0662e-15, 5.1571e-15, -1.8935e-14,\n 2.8511e-15, -1.6790e-14, -7.8261e-15, 2.6650e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.6115e-14, 3.8299e-14, 9.3997e-15, 9.0771e-15, 4.7730e-15, 1.3887e-15,\n 3.9595e-14, 3.0426e-14, 6.2932e-16, 4.7920e-15, 3.1641e-18, 2.7571e-15,\n 2.5205e-16, 8.2134e-15, 7.9757e-14, 1.6172e-14, 2.9945e-14, 1.9790e-16,\n 1.1322e-13, 2.1560e-14, 1.5349e-15, 7.1836e-14, 7.6500e-16, 6.5997e-15,\n 1.2307e-14, 1.0315e-15, 1.7018e-15, 9.4309e-15, 4.0155e-14, 6.6582e-15,\n 9.1308e-14, 1.0767e-14, 2.6823e-14, 4.7012e-14, 9.1505e-15, 4.4445e-16,\n 3.3903e-16, 3.9070e-15, 9.2079e-15, 4.3028e-14, 3.2403e-14, 5.4530e-15,\n 1.9091e-14, 6.8707e-15, 7.3803e-15, 3.6173e-15, 5.5666e-14, 6.7230e-16,\n 7.4509e-15, 3.9050e-14, 7.4139e-15, 8.7407e-15, 1.4456e-14, 1.0008e-13,\n 2.7078e-14, 1.6023e-14, 6.6668e-15, 3.9846e-15, 2.3280e-13, 7.2478e-15,\n 1.7950e-14, 2.5442e-14, 4.7883e-15, 2.5598e-14],\n [2.6115e-14, 3.8299e-14, 9.3997e-15, 9.0771e-15, 4.7730e-15, 1.3887e-15,\n 3.9595e-14, 3.0426e-14, 6.2932e-16, 4.7920e-15, 3.1641e-18, 2.7571e-15,\n 2.5205e-16, 8.2134e-15, 7.9757e-14, 1.6172e-14, 2.9945e-14, 1.9790e-16,\n 1.1322e-13, 2.1560e-14, 1.5349e-15, 7.1836e-14, 7.6500e-16, 6.5997e-15,\n 1.2307e-14, 1.0315e-15, 1.7018e-15, 9.4309e-15, 4.0155e-14, 6.6582e-15,\n 9.1308e-14, 1.0767e-14, 2.6823e-14, 4.7012e-14, 9.1505e-15, 4.4445e-16,\n 3.3903e-16, 3.9070e-15, 9.2079e-15, 4.3028e-14, 3.2403e-14, 5.4530e-15,\n 1.9091e-14, 6.8707e-15, 7.3803e-15, 3.6173e-15, 5.5666e-14, 6.7230e-16,\n 7.4509e-15, 3.9050e-14, 7.4139e-15, 8.7407e-15, 1.4456e-14, 1.0008e-13,\n 2.7078e-14, 1.6023e-14, 6.6668e-15, 3.9846e-15, 2.3280e-13, 7.2478e-15,\n 1.7950e-14, 2.5442e-14, 4.7883e-15, 2.5598e-14]], device='cuda:0')" + }, + "26": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 1.6228e-14, -1.6228e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.7538e-13, 1.7538e-13], device='cuda:0')" + }, + "27": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-1.0701e-13, 1.0555e-14, -1.7629e-14, ..., 1.0917e-15,\n -9.5173e-14, -3.2941e-14],\n [ 6.3881e-14, -6.9275e-15, 9.3062e-15, ..., -3.2583e-14,\n 2.9513e-14, 6.2957e-15],\n [-6.2970e-14, -7.7329e-15, -2.2746e-14, ..., -3.0101e-14,\n 4.3739e-14, -5.4984e-14],\n ...,\n [ 5.0885e-16, -9.1228e-14, 5.3362e-14, ..., 1.0487e-14,\n -6.6419e-14, -6.8639e-14],\n [ 1.0677e-12, -1.3602e-13, -8.6144e-14, ..., -2.8575e-13,\n -4.4200e-13, 2.0158e-13],\n [-4.9936e-14, 4.7016e-14, 1.1503e-13, ..., 6.6255e-14,\n 4.3949e-14, 4.9514e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.3862e-17, 7.7498e-18, 1.7720e-15, ..., 1.3378e-15, 4.6601e-15,\n 6.6939e-16],\n [1.6965e-16, 1.6928e-17, 4.7656e-16, ..., 1.9879e-15, 3.9682e-15,\n 5.9247e-16],\n [8.1525e-16, 3.8075e-16, 9.0569e-16, ..., 9.7209e-15, 1.1093e-14,\n 3.4998e-15],\n ...,\n [8.6590e-16, 4.3071e-18, 4.8547e-16, ..., 4.6691e-15, 1.5358e-16,\n 8.7594e-16],\n [4.9385e-16, 2.0267e-17, 2.2956e-15, ..., 4.5229e-15, 3.7751e-15,\n 1.5270e-15],\n [1.0350e-15, 3.9140e-16, 9.0356e-16, ..., 1.4496e-14, 7.0198e-15,\n 7.3352e-15]], device='cuda:0')" + }, + "28": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-2.8999e-13, 5.4555e-13, -6.3391e-14, -1.1086e-13, 2.5360e-13,\n 1.1857e-13, -2.6023e-13, -1.5025e-12, -3.9220e-13, 4.7885e-13,\n -1.0212e-12, -1.3004e-12, 4.9009e-13, 1.8793e-13, 2.4909e-13,\n 4.5946e-12, -2.8028e-13, 2.4197e-13, 4.1738e-12, -1.6537e-12,\n -1.6498e-14, -1.5109e-12, -2.9881e-13, 5.2309e-12, -9.4578e-12,\n -1.3526e-12, 2.6577e-13, -6.3784e-14, -5.3665e-13, 4.5730e-13,\n -1.7597e-13, 4.9219e-13, 3.5412e-13, 6.4850e-13, 1.4979e-13,\n -2.5173e-13, -7.0883e-13, 7.8244e-14, 4.0988e-13, 4.2735e-13,\n 1.2409e-13, 7.4510e-13, -3.1732e-12, -1.6757e-14, -7.4921e-13,\n -4.6363e-14, 1.9912e-12, 1.1610e-14, -3.6963e-13, -2.1455e-13,\n -3.1508e-13, -9.2725e-13, -1.9145e-15, 8.9325e-13, -7.8521e-13,\n -9.9510e-13, 7.6432e-13, 6.2870e-13, 1.0682e-12, 1.3555e-13,\n -3.2711e-13, -6.2090e-13, 3.4518e-12, 1.2876e-13], device='cuda:0')", + "exp_avg_sq": "tensor([1.2979e-12, 2.1160e-12, 4.6262e-12, 6.6147e-13, 3.6106e-13, 2.3200e-14,\n 1.4693e-13, 1.0266e-12, 1.8820e-12, 2.6588e-13, 3.5520e-12, 8.6940e-13,\n 2.7941e-13, 4.5789e-14, 2.2350e-13, 5.0800e-12, 3.1810e-14, 2.1037e-13,\n 1.7460e-12, 1.6300e-13, 1.6346e-13, 1.1155e-12, 6.5326e-12, 6.2393e-13,\n 3.8540e-13, 2.4160e-13, 9.4384e-14, 2.6197e-12, 6.0408e-13, 3.6322e-13,\n 1.3189e-13, 4.5649e-13, 1.9838e-13, 1.8272e-14, 3.9068e-13, 1.8576e-12,\n 4.2745e-13, 1.0392e-12, 5.8009e-13, 1.4735e-12, 3.8741e-13, 4.9584e-12,\n 1.5604e-12, 7.8236e-14, 5.2903e-12, 2.7154e-12, 7.1192e-13, 3.2019e-12,\n 4.5717e-13, 3.3842e-13, 2.4234e-12, 1.8494e-12, 7.1107e-14, 1.9444e-12,\n 8.3904e-12, 1.3111e-13, 3.0608e-13, 3.6910e-15, 8.8168e-13, 1.0035e-14,\n 1.2277e-12, 1.5145e-12, 3.9998e-12, 7.2648e-12], device='cuda:0')" + }, + "29": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 3.8011e-14, -7.3439e-14, -7.0271e-14, 1.1122e-14, 1.0197e-15,\n -2.5810e-14, 2.5652e-14, 2.6405e-12, 4.4250e-14, -7.8770e-15,\n 1.7685e-13, 1.8685e-12, 2.3213e-14, 1.7256e-13, 1.9909e-14,\n 1.1196e-11, 7.8170e-14, 1.3724e-14, 7.3635e-12, 5.4716e-13,\n -4.3502e-14, 2.1682e-12, 3.7046e-12, 7.1981e-12, -9.5792e-12,\n 1.9811e-12, -9.7593e-14, 7.8194e-15, 2.8130e-12, -8.3812e-14,\n 4.7670e-12, -5.5444e-14, 5.7490e-12, -2.2041e-13, -3.8755e-14,\n 1.4186e-14, 5.9577e-13, 3.3194e-15, 1.4263e-14, -2.3110e-14,\n 2.2476e-12, -1.5656e-13, -3.7618e-13, 5.4492e-14, 1.7170e-12,\n -4.9059e-14, 2.0490e-12, 5.1648e-12, 1.8262e-12, 1.4770e-12,\n 3.7275e-14, 3.6753e-12, 7.6516e-14, 5.7383e-12, 3.3406e-12,\n 2.5655e-13, -3.8968e-14, 1.6713e-13, 4.3212e-12, 4.7300e-14,\n 1.3853e-12, 5.2327e-14, 4.9748e-12, -6.1909e-14], device='cuda:0')", + "exp_avg_sq": "tensor([8.8508e-15, 1.3293e-14, 3.1098e-14, 2.4778e-15, 1.1661e-15, 5.0345e-16,\n 4.2354e-16, 7.6044e-15, 8.5921e-15, 9.9151e-16, 1.6569e-14, 5.0894e-15,\n 7.6902e-16, 5.8770e-19, 6.1296e-16, 4.6014e-14, 3.6321e-18, 3.9662e-16,\n 1.6568e-14, 3.0491e-15, 2.8662e-16, 7.3358e-15, 5.0238e-14, 4.9385e-15,\n 3.6396e-15, 2.7117e-15, 5.7407e-16, 1.2797e-14, 6.8287e-15, 8.7941e-16,\n 1.3258e-15, 1.4349e-15, 3.2195e-15, 1.3706e-15, 1.4862e-15, 1.2126e-14,\n 7.0846e-15, 3.9757e-15, 2.1542e-15, 6.6982e-15, 7.7651e-15, 2.6325e-14,\n 3.9486e-14, 4.6551e-17, 4.8691e-14, 1.9470e-14, 7.3133e-15, 2.5744e-14,\n 8.1329e-15, 4.9812e-15, 1.8289e-14, 2.1763e-14, 3.8120e-17, 1.5074e-14,\n 1.1148e-13, 9.7625e-16, 1.3329e-15, 1.3586e-16, 1.0702e-14, 7.3727e-17,\n 9.5794e-15, 6.6228e-15, 2.1429e-14, 9.6679e-14], device='cuda:0')" + }, + "30": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-3.0644e-14, 8.1570e-14, 9.2449e-14, -1.2486e-14, -3.2396e-16,\n -7.5897e-14, -2.6980e-14, 1.7765e-12, -2.1101e-14, 7.9436e-15,\n -1.6855e-13, 1.5982e-12, -2.4632e-14, -1.6943e-13, -2.0405e-14,\n 8.0607e-12, -8.7450e-14, -1.8802e-14, 6.3759e-12, 8.0467e-13,\n 4.0164e-14, 1.0071e-12, 2.7441e-12, 5.7638e-12, -7.6732e-12,\n 1.8675e-12, 1.5967e-13, -3.3925e-15, 2.2682e-12, 9.8240e-14,\n 3.8336e-12, 5.7266e-14, 4.4830e-12, 3.7933e-13, 4.0617e-14,\n -1.3467e-14, 1.2158e-12, 3.7190e-15, -1.6297e-14, 1.9625e-14,\n 2.2624e-12, 1.3428e-13, -3.0113e-13, -5.7681e-14, 1.1180e-12,\n 5.4535e-14, 3.5477e-12, 4.3943e-12, 1.7655e-12, 2.1770e-12,\n -4.6091e-14, 2.8195e-12, -9.6602e-14, 4.2492e-12, 2.0206e-12,\n -2.6320e-13, 4.1127e-14, -1.9968e-13, 3.8443e-12, -7.5050e-14,\n 1.4345e-12, -5.1763e-14, 3.9533e-12, 8.0122e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.1069e-14, 1.6917e-14, 3.8031e-14, 4.2510e-15, 1.7524e-15, 1.1779e-15,\n 6.3853e-16, 1.6366e-14, 1.4283e-14, 1.4326e-15, 2.6736e-14, 1.2317e-14,\n 1.2090e-15, 8.0677e-19, 1.0308e-15, 6.1734e-14, 6.2716e-18, 7.3230e-16,\n 2.6293e-14, 4.4015e-15, 2.7809e-16, 1.4846e-14, 7.8468e-14, 1.0987e-14,\n 6.8507e-15, 5.2903e-15, 5.0574e-16, 2.1643e-14, 1.0530e-14, 2.3552e-15,\n 2.8007e-15, 2.9729e-15, 4.4668e-15, 1.6414e-15, 2.3096e-15, 1.3717e-14,\n 8.7353e-15, 7.7116e-15, 3.6408e-15, 9.9314e-15, 8.3880e-15, 3.8712e-14,\n 2.5374e-14, 5.2523e-17, 6.6179e-14, 2.0676e-14, 1.2258e-14, 4.3422e-14,\n 9.1118e-15, 7.3695e-15, 1.9433e-14, 2.7560e-14, 4.8976e-17, 2.5285e-14,\n 1.0250e-13, 8.4658e-16, 2.1852e-15, 2.2446e-16, 1.3731e-14, 1.4501e-16,\n 1.9069e-14, 1.0753e-14, 5.0973e-14, 5.7201e-14], device='cuda:0')" + }, + "31": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 3.2364e-12, 4.3976e-12, 5.4026e-12, 3.7049e-12, 2.4649e-12,\n 6.3101e-13, 6.8485e-13, -1.6392e-11, 3.4466e-12, 3.8935e-12,\n 1.7407e-12, -1.0089e-11, 3.4943e-12, 2.5802e-12, 1.5493e-12,\n -4.3640e-11, 5.9225e-12, 2.0796e-12, -3.6420e-11, -8.4537e-12,\n 2.5911e-12, -1.3434e-11, -1.7912e-11, -2.8030e-11, 9.3816e-12,\n -1.4970e-11, 2.9737e-12, 1.5374e-12, -2.3470e-11, 4.4747e-12,\n -1.7602e-11, 1.6368e-12, -3.0172e-11, 3.8786e-12, 6.2770e-12,\n 4.1461e-12, -1.4436e-11, 4.9978e-12, 1.7529e-12, 2.9607e-12,\n -1.5412e-11, 1.2963e-14, -2.5996e-12, 3.3575e-12, -1.2753e-11,\n 5.3773e-12, -1.1725e-11, -2.2566e-11, -1.5864e-11, -1.3832e-11,\n 2.4446e-12, -2.3514e-11, 3.7071e-12, -3.0810e-11, -1.9466e-11,\n 3.5844e-12, 1.7653e-12, 3.7419e-12, -2.5451e-11, 3.2181e-12,\n -1.1526e-11, 2.1437e-12, -2.1606e-11, 1.5213e-12],\n [-1.4897e-12, -3.0261e-12, -4.3552e-12, -2.1135e-12, -7.5330e-13,\n -8.4833e-13, 4.6781e-13, 4.0625e-12, -1.8420e-12, -2.1582e-12,\n -6.3511e-13, -2.0781e-13, -1.8828e-12, -9.1985e-13, 4.8819e-14,\n 3.5899e-11, -4.4818e-12, -1.0157e-12, 2.6187e-11, 2.9679e-12,\n -1.3472e-12, 2.1779e-12, 7.8878e-12, 1.7812e-11, -2.3168e-11,\n 5.5711e-12, -1.3890e-12, -1.0279e-13, 1.3608e-11, -2.9044e-12,\n 9.4913e-12, -4.7050e-13, 1.7278e-11, -3.0183e-12, -4.7269e-12,\n -3.0884e-12, 3.9676e-12, -3.3826e-12, -7.4845e-16, -1.1225e-12,\n 7.3595e-12, 1.5142e-12, -5.6245e-12, -1.7124e-12, 2.0769e-12,\n -3.8095e-12, 1.4610e-12, 1.0226e-11, 5.5335e-12, 5.5668e-12,\n -5.7451e-13, 1.4810e-11, -2.3462e-12, 1.9686e-11, 7.6610e-12,\n -2.0209e-12, -3.6192e-13, -2.3829e-12, 1.2766e-11, -2.2347e-12,\n 1.5950e-12, -8.9060e-13, 8.4308e-12, -3.6290e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9645e-13, 6.3240e-13, 1.0027e-12, 3.8982e-13, 3.5331e-13, 5.2292e-14,\n 6.1594e-14, 7.9560e-14, 2.4904e-13, 1.9400e-13, 1.6124e-13, 4.7811e-14,\n 1.8424e-13, 6.3157e-13, 1.9591e-13, 6.8794e-13, 2.2356e-13, 7.0512e-14,\n 3.3706e-13, 6.1255e-13, 1.6513e-12, 3.8475e-14, 6.8377e-13, 3.7059e-14,\n 1.3523e-15, 1.8206e-13, 2.3333e-16, 2.7024e-13, 3.2465e-13, 1.9588e-14,\n 7.5838e-15, 8.7289e-15, 6.7582e-14, 1.0749e-12, 1.2972e-13, 9.7988e-13,\n 1.0543e-12, 4.0434e-14, 7.2102e-14, 4.8144e-13, 9.6957e-13, 5.8510e-13,\n 4.3020e-12, 7.9721e-13, 8.5167e-13, 1.2413e-12, 3.2551e-13, 4.1218e-13,\n 1.0412e-12, 3.5223e-13, 6.9500e-13, 9.6016e-13, 2.5737e-13, 4.5475e-13,\n 2.1092e-12, 1.0861e-14, 5.1123e-14, 4.0905e-13, 5.7513e-13, 1.3906e-13,\n 9.7242e-14, 1.3073e-14, 1.8926e-13, 4.2763e-12],\n [1.9645e-13, 6.3240e-13, 1.0027e-12, 3.8982e-13, 3.5331e-13, 5.2292e-14,\n 6.1593e-14, 7.9559e-14, 2.4904e-13, 1.9400e-13, 1.6124e-13, 4.7810e-14,\n 1.8424e-13, 6.3157e-13, 1.9591e-13, 6.8794e-13, 2.2356e-13, 7.0512e-14,\n 3.3706e-13, 6.1255e-13, 1.6513e-12, 3.8474e-14, 6.8377e-13, 3.7058e-14,\n 1.3517e-15, 1.8206e-13, 2.3332e-16, 2.7024e-13, 3.2465e-13, 1.9587e-14,\n 7.5833e-15, 8.7288e-15, 6.7582e-14, 1.0749e-12, 1.2972e-13, 9.7988e-13,\n 1.0543e-12, 4.0434e-14, 7.2102e-14, 4.8144e-13, 9.6958e-13, 5.8510e-13,\n 4.3020e-12, 7.9721e-13, 8.5167e-13, 1.2413e-12, 3.2551e-13, 4.1218e-13,\n 1.0412e-12, 3.5223e-13, 6.9500e-13, 9.6016e-13, 2.5737e-13, 4.5475e-13,\n 2.1093e-12, 1.0861e-14, 5.1123e-14, 4.0905e-13, 5.7513e-13, 1.3906e-13,\n 9.7242e-14, 1.3073e-14, 1.8926e-13, 4.2763e-12]], device='cuda:0')" + }, + "32": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.8025e-11, 9.2736e-12], device='cuda:0')", + "exp_avg_sq": "tensor([4.5188e-12, 4.5188e-12], device='cuda:0')" + }, + "33": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-2.9692e-19, 1.6881e-19, -8.9647e-20, ..., -2.8967e-18,\n 1.2261e-17, -2.6066e-18],\n [-8.5521e-20, 5.1672e-20, 3.2534e-20, ..., 2.1680e-19,\n -3.6502e-18, 1.0118e-19],\n [-1.0977e-20, -2.1699e-19, 8.3865e-19, ..., 9.0121e-19,\n 9.9143e-19, 2.8837e-19],\n ...,\n [ 3.3265e-23, 2.6764e-20, -2.4755e-20, ..., 5.7303e-19,\n -2.0144e-18, -3.7285e-20],\n [-7.6129e-21, 9.7299e-21, -4.8094e-21, ..., 3.8493e-19,\n -1.1276e-18, 4.9499e-20],\n [ 1.9938e-19, 6.6390e-21, -1.2137e-19, ..., 1.1481e-18,\n -3.4200e-18, 5.7951e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.6622e-19, 1.5027e-16, 3.2757e-16, ..., 8.2489e-18, 2.0205e-16,\n 2.7755e-16],\n [1.6939e-18, 4.6720e-17, 2.5594e-17, ..., 9.6472e-17, 4.1684e-17,\n 8.5716e-17],\n [1.9162e-18, 4.5069e-17, 3.0199e-18, ..., 7.2879e-17, 1.1714e-16,\n 1.6727e-16],\n ...,\n [5.9356e-17, 8.2005e-16, 1.6577e-16, ..., 1.8374e-15, 6.4586e-16,\n 1.1173e-15],\n [1.5462e-20, 5.1456e-18, 2.3736e-18, ..., 8.8451e-18, 3.7001e-19,\n 2.1627e-18],\n [4.6562e-21, 1.6025e-19, 2.5280e-18, ..., 1.6437e-16, 3.0130e-17,\n 6.0603e-17]], device='cuda:0')" + }, + "34": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 2.3579e-17, -4.8754e-18, 3.3159e-18, 4.1487e-18, -7.9707e-19,\n -1.0872e-18, -7.0410e-18, -5.5524e-18, 1.6157e-17, 1.2650e-17,\n -2.0722e-17, 8.5211e-19, -4.1557e-19, -5.8998e-20, 1.8525e-18,\n 8.3254e-18, -2.6885e-18, 8.7022e-18, 1.3974e-17, -1.3230e-17,\n 2.0964e-18, -1.7746e-18, -3.9423e-18, -2.4321e-18, 3.7589e-19,\n 7.4069e-21, -5.1845e-19, 5.8649e-19, -1.2159e-17, 1.3389e-17,\n 6.9646e-18, -8.1213e-18, 1.2545e-18, -1.6164e-18, 8.7150e-18,\n -4.0332e-18, 5.7689e-18, -2.6252e-17, 1.1433e-17, 3.9991e-18,\n 1.5450e-17, -1.1525e-18, 8.3701e-19, 5.2072e-19, -1.3341e-17,\n 1.9542e-18, -1.4596e-18, 7.4699e-20, -2.1216e-19, 3.9834e-18,\n 9.2238e-19, 1.1144e-18, -3.3226e-18, -1.0502e-17, 1.0071e-18,\n -1.7572e-18, -1.1677e-17, -1.0472e-18, -7.7983e-18, 5.4018e-19,\n 1.4882e-18, -2.1992e-18, -1.0862e-18, -3.1672e-18], device='cuda:0')", + "exp_avg_sq": "tensor([3.3837e-13, 6.0405e-14, 1.0168e-13, 7.8403e-14, 9.2762e-14, 6.6573e-14,\n 1.7133e-17, 3.3187e-14, 5.2685e-14, 2.4344e-13, 4.7392e-14, 1.9132e-15,\n 3.9102e-14, 2.2886e-13, 1.4957e-14, 2.6612e-14, 1.4190e-13, 1.7617e-13,\n 1.7429e-13, 2.8842e-16, 9.8133e-14, 1.6506e-14, 1.0043e-14, 1.2139e-14,\n 1.3410e-13, 1.1213e-15, 2.7377e-13, 3.3486e-13, 3.1612e-15, 1.2334e-13,\n 5.8946e-14, 4.7703e-17, 1.1868e-15, 1.1679e-13, 2.3576e-13, 7.7013e-14,\n 1.6169e-13, 5.7736e-14, 1.5048e-13, 3.0991e-13, 3.8282e-13, 2.6766e-14,\n 8.0297e-15, 2.1415e-13, 9.3129e-16, 1.1450e-14, 9.5921e-16, 3.0881e-14,\n 9.9333e-15, 3.7135e-13, 3.9230e-13, 1.5114e-13, 6.0134e-14, 1.6488e-14,\n 1.1152e-14, 1.6481e-14, 7.6871e-15, 1.0342e-13, 9.6071e-15, 3.9708e-14,\n 5.8530e-14, 7.4067e-13, 3.6313e-15, 2.9216e-14], device='cuda:0')" + }, + "35": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 5.0394e-17, 3.4709e-20, 2.6221e-17, 3.1476e-17, -4.2982e-19,\n 2.4683e-17, -9.5766e-19, 2.7970e-17, 3.8971e-17, 4.5496e-17,\n 6.7717e-18, 8.0520e-19, -4.9027e-20, 2.0561e-19, 2.3039e-19,\n 3.3501e-17, -2.3688e-20, 3.4213e-17, 4.2007e-17, 6.6470e-18,\n -3.5824e-19, 1.2982e-19, 1.2190e-17, 1.2481e-19, -4.6643e-19,\n 1.9833e-19, -6.5234e-19, -7.8760e-19, 1.2128e-17, 3.7785e-17,\n 2.9897e-17, 3.6055e-19, 1.1110e-18, 3.5829e-17, 3.1523e-17,\n 2.8184e-17, 3.6602e-17, 6.6160e-18, 3.5968e-17, -2.1276e-18,\n 4.4623e-17, -2.5437e-20, 2.1712e-19, -8.9066e-19, 1.3071e-17,\n 3.5296e-19, 4.4887e-19, -1.4152e-19, 7.3855e-20, -2.1829e-18,\n 4.0328e-17, -6.3752e-19, 3.0400e-17, 1.9307e-17, 8.1710e-21,\n 6.7184e-20, 1.9206e-17, -1.0177e-19, 1.7788e-17, -1.6212e-19,\n -7.3509e-21, -6.5136e-19, 4.1832e-19, 3.0434e-17], device='cuda:0')", + "exp_avg_sq": "tensor([1.7024e-15, 5.2992e-16, 8.1315e-16, 5.4612e-16, 8.5638e-16, 4.3941e-16,\n 3.6066e-18, 2.9030e-16, 2.8814e-16, 2.3501e-15, 3.4587e-16, 1.4274e-18,\n 1.7125e-16, 7.7615e-16, 4.6892e-17, 1.2689e-16, 1.6937e-15, 1.2612e-15,\n 8.7805e-16, 2.3384e-17, 7.9848e-16, 8.2591e-17, 8.5699e-17, 2.9665e-17,\n 6.8553e-16, 1.0151e-19, 1.7983e-15, 2.1896e-15, 2.9698e-17, 8.3949e-16,\n 5.4673e-16, 2.4417e-18, 2.4300e-20, 8.0764e-16, 1.0037e-15, 6.3475e-16,\n 1.0172e-15, 5.0613e-16, 7.5047e-16, 2.0628e-15, 1.5514e-15, 1.2144e-16,\n 1.5964e-17, 9.4078e-16, 1.7183e-17, 2.3996e-17, 1.0790e-19, 7.4268e-17,\n 3.3531e-17, 2.3519e-15, 3.7416e-15, 1.3012e-15, 3.6145e-16, 2.1510e-16,\n 2.9210e-17, 5.0094e-17, 9.2256e-17, 9.1698e-16, 7.1517e-17, 1.8347e-16,\n 3.5571e-16, 7.8417e-15, 3.2923e-18, 1.8606e-16], device='cuda:0')" + }, + "36": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 4.7619e-17, -1.2828e-19, 2.8333e-17, 3.0211e-17, 4.6011e-19,\n 2.4577e-17, 3.7548e-18, 2.3584e-17, 4.0599e-17, 3.9254e-17,\n 6.7080e-18, -6.9548e-19, 4.1198e-20, -1.4062e-19, -2.0509e-19,\n 3.4585e-17, 2.7625e-20, 3.4072e-17, 4.0481e-17, 9.2787e-18,\n 3.1779e-19, -1.4919e-19, 1.8765e-17, -1.4052e-19, 4.7667e-19,\n -1.8399e-19, 6.8521e-19, 7.8140e-19, 1.3556e-17, 3.8244e-17,\n 3.3792e-17, 5.1831e-18, -9.7468e-19, 2.8506e-17, 3.2587e-17,\n 2.4557e-17, 3.3126e-17, 5.3821e-18, 3.6840e-17, 1.8563e-18,\n 4.0627e-17, 3.0185e-20, -1.9697e-19, 9.2133e-19, 1.3220e-17,\n -3.0476e-19, -4.5023e-19, 1.4104e-19, -7.1172e-20, 2.0064e-18,\n 3.0385e-17, 6.3512e-19, 2.5900e-17, 1.7554e-17, -8.2335e-21,\n -7.4000e-20, 1.6560e-17, 1.1557e-19, 1.7918e-17, 1.5716e-19,\n 6.4388e-21, 5.6407e-19, -4.2546e-19, 2.5654e-17], device='cuda:0')", + "exp_avg_sq": "tensor([3.1268e-15, 4.2631e-16, 1.1118e-15, 9.0251e-16, 7.6649e-16, 7.7002e-16,\n 7.3614e-18, 4.6829e-16, 6.3036e-16, 2.6134e-15, 5.4176e-16, 2.9804e-18,\n 2.2204e-16, 1.7384e-15, 6.0020e-17, 3.7414e-16, 9.6331e-16, 1.8691e-15,\n 1.7881e-15, 2.9899e-17, 7.4210e-16, 6.8743e-17, 1.6872e-16, 4.5235e-17,\n 9.1272e-16, 1.4902e-19, 2.0873e-15, 2.6326e-15, 7.0110e-17, 1.5438e-15,\n 6.7305e-16, 5.3891e-18, 5.9683e-20, 1.2323e-15, 2.3108e-15, 9.0105e-16,\n 1.8302e-15, 6.7589e-16, 1.5722e-15, 2.7233e-15, 3.7871e-15, 1.6265e-16,\n 2.8698e-17, 1.5510e-15, 3.5382e-17, 4.9540e-17, 2.1198e-19, 1.6526e-16,\n 4.4857e-17, 2.8161e-15, 3.9265e-15, 1.1190e-15, 6.7244e-16, 2.3349e-16,\n 5.7486e-17, 9.5192e-17, 1.3019e-16, 7.2514e-16, 1.3267e-16, 2.7131e-16,\n 3.8870e-16, 6.3521e-15, 6.8441e-18, 4.0785e-16], device='cuda:0')" + }, + "37": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-1.3888e-16, 2.3833e-17, -1.2763e-16, -1.6670e-16, 2.5743e-17,\n -1.0946e-16, 1.3044e-17, -1.4615e-16, -1.5191e-16, -1.8154e-16,\n -3.8115e-17, 2.6549e-17, 4.6926e-18, 6.7289e-18, 2.4539e-17,\n -1.4171e-16, 2.8234e-17, -1.2301e-16, -1.6196e-16, -7.6597e-17,\n 1.9380e-17, 2.0888e-17, -7.1978e-17, 2.3026e-17, 2.2758e-17,\n 9.7075e-18, 2.7862e-17, 2.4441e-17, -1.1403e-16, -1.2944e-16,\n -1.1237e-16, -5.4107e-18, 2.3605e-17, -1.5742e-16, -9.9955e-17,\n -1.5005e-16, -1.5266e-16, -3.9409e-17, -1.1764e-16, 2.5000e-17,\n -1.3572e-16, 2.5771e-17, 2.0621e-17, 2.5013e-17, -1.3562e-16,\n 2.1360e-17, 2.6577e-17, 2.3765e-17, 2.3415e-17, 2.4428e-17,\n -1.5455e-16, 2.3350e-17, -1.6890e-16, -1.6412e-16, 2.3680e-17,\n 2.5807e-17, -1.8264e-16, 2.5177e-17, -1.1617e-16, 2.3079e-17,\n 5.5555e-18, 2.4172e-17, 2.7431e-17, -1.9832e-16],\n [ 1.3874e-16, -2.3828e-17, 1.2761e-16, 1.6657e-16, -2.5774e-17,\n 1.0927e-16, -1.3117e-17, 1.4609e-16, 1.5185e-16, 1.8174e-16,\n 3.8021e-17, -2.6586e-17, -4.6773e-18, -6.7035e-18, -2.4545e-17,\n 1.4167e-16, -2.8231e-17, 1.2299e-16, 1.6211e-16, 7.6568e-17,\n -1.9352e-17, -2.0896e-17, 7.1942e-17, -2.3027e-17, -2.2781e-17,\n -9.6766e-18, -2.7872e-17, -2.4421e-17, 1.1411e-16, 1.2921e-16,\n 1.1222e-16, 5.3642e-18, -2.3613e-17, 1.5741e-16, 9.9830e-17,\n 1.5000e-16, 1.5259e-16, 3.9149e-17, 1.1772e-16, -2.4999e-17,\n 1.3551e-16, -2.5779e-17, -2.0579e-17, -2.5022e-17, 1.3558e-16,\n -2.1353e-17, -2.6574e-17, -2.3764e-17, -2.3407e-17, -2.4436e-17,\n 1.5450e-16, -2.3350e-17, 1.6894e-16, 1.6418e-16, -2.3725e-17,\n -2.5843e-17, 1.8272e-16, -2.5179e-17, 1.1639e-16, -2.3074e-17,\n -5.5070e-18, -2.4142e-17, -2.7439e-17, 1.9840e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.8138e-15, 6.7423e-14, 3.1927e-14, 2.5324e-14, 3.9852e-14, 7.0873e-15,\n 1.0667e-14, 7.4323e-15, 9.0141e-16, 5.4669e-14, 1.5606e-14, 6.3263e-15,\n 1.5815e-14, 1.1896e-14, 5.8877e-15, 1.6167e-16, 2.9150e-13, 1.6330e-14,\n 1.4669e-14, 4.1134e-14, 7.8303e-14, 1.1866e-13, 9.4897e-16, 3.7298e-14,\n 3.7336e-14, 1.8979e-15, 6.8132e-14, 7.1367e-14, 1.1233e-14, 6.2927e-15,\n 3.2484e-15, 4.1873e-15, 2.2987e-15, 1.7482e-14, 7.9497e-15, 3.0657e-14,\n 2.1486e-14, 3.3234e-14, 4.4367e-15, 4.0255e-14, 1.4318e-14, 1.3038e-14,\n 6.4909e-16, 2.1017e-14, 1.0463e-14, 5.0260e-15, 1.3065e-15, 3.0724e-15,\n 1.5220e-15, 4.3519e-14, 8.4571e-14, 6.7584e-14, 1.6628e-14, 6.8532e-14,\n 6.1170e-16, 8.5160e-15, 4.8268e-14, 1.2489e-13, 2.0655e-15, 1.4927e-14,\n 6.6036e-14, 1.1645e-13, 2.0986e-15, 1.3327e-14],\n [8.8138e-15, 6.7423e-14, 3.1927e-14, 2.5324e-14, 3.9852e-14, 7.0872e-15,\n 1.0667e-14, 7.4323e-15, 9.0141e-16, 5.4669e-14, 1.5606e-14, 6.3263e-15,\n 1.5815e-14, 1.1896e-14, 5.8877e-15, 1.6166e-16, 2.9150e-13, 1.6330e-14,\n 1.4669e-14, 4.1134e-14, 7.8303e-14, 1.1866e-13, 9.4897e-16, 3.7298e-14,\n 3.7335e-14, 1.8979e-15, 6.8132e-14, 7.1367e-14, 1.1233e-14, 6.2927e-15,\n 3.2484e-15, 4.1873e-15, 2.2987e-15, 1.7482e-14, 7.9496e-15, 3.0657e-14,\n 2.1486e-14, 3.3234e-14, 4.4367e-15, 4.0255e-14, 1.4318e-14, 1.3038e-14,\n 6.4909e-16, 2.1017e-14, 1.0463e-14, 5.0260e-15, 1.3065e-15, 3.0724e-15,\n 1.5220e-15, 4.3519e-14, 8.4571e-14, 6.7584e-14, 1.6628e-14, 6.8532e-14,\n 6.1170e-16, 8.5160e-15, 4.8268e-14, 1.2489e-13, 2.0655e-15, 1.4927e-14,\n 6.6036e-14, 1.1645e-13, 2.0986e-15, 1.3327e-14]], device='cuda:0')" + }, + "38": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.3956e-16, 1.3952e-16], device='cuda:0')", + "exp_avg_sq": "tensor([2.6663e-13, 2.6663e-13], device='cuda:0')" + }, + "39": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 1.2659e-20, 5.5440e-21, -2.1145e-21, ..., 9.8440e-19,\n -1.0633e-18, 1.5540e-19],\n [ 1.1254e-21, 7.1180e-21, -6.5993e-20, ..., 2.1774e-19,\n -1.1022e-19, 2.4430e-20],\n [-3.6377e-20, 1.8319e-22, -1.0213e-21, ..., -3.9442e-20,\n 2.9485e-20, -1.0651e-20],\n ...,\n [-2.2226e-20, -5.5521e-21, -1.2663e-21, ..., -1.7533e-19,\n 2.8502e-19, -4.5213e-20],\n [-9.6510e-21, -1.7676e-21, -4.1632e-21, ..., -7.3176e-20,\n 2.0394e-19, -4.0479e-20],\n [-2.1098e-20, -1.0946e-21, 2.4940e-21, ..., -4.4128e-20,\n 5.7228e-20, -1.2123e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.7813e-17, 2.6962e-17, 1.9167e-16, ..., 2.7566e-15, 3.5299e-15,\n 1.2268e-15],\n [2.2944e-18, 1.6459e-17, 1.6453e-16, ..., 1.9726e-16, 6.8812e-17,\n 2.8677e-16],\n [2.9845e-18, 9.7812e-20, 5.2468e-17, ..., 1.4563e-16, 2.2678e-16,\n 1.3494e-16],\n ...,\n [5.2047e-20, 1.2963e-19, 1.2410e-18, ..., 1.1892e-19, 4.6727e-18,\n 4.1896e-19],\n [1.2463e-19, 1.8063e-18, 1.2731e-17, ..., 1.5249e-18, 6.2694e-17,\n 5.6154e-18],\n [1.4132e-21, 3.9080e-19, 1.7177e-18, ..., 9.3002e-19, 4.9246e-18,\n 2.1224e-21]], device='cuda:0')" + }, + "40": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.8115e-18, 6.6196e-20, -1.0983e-19, 6.2365e-19, 5.6768e-20,\n -1.0587e-18, -2.6597e-20, -1.3597e-19, 1.1612e-18, -9.4701e-20,\n -9.5968e-20, -8.5407e-22, 9.7411e-21, -1.7421e-19, 1.7115e-19,\n 8.7526e-20, -9.1407e-20, -2.2630e-18, -1.0228e-18, 8.2495e-19,\n -3.5337e-20, -1.7485e-20, -3.8040e-20, -1.4883e-20, 2.8361e-20,\n -9.0332e-19, -1.2382e-18, 8.2558e-20, -1.2632e-18, 6.5177e-19,\n -5.2214e-20, 1.0740e-18, 1.1973e-19, -8.2716e-20, 1.0407e-18,\n 6.0173e-21, -2.6699e-20, -1.0104e-19, 4.1353e-19, -5.0451e-20,\n 6.8720e-19, 9.6983e-19, 5.9102e-19, -1.5033e-19, 4.8140e-19,\n -7.0745e-20, 7.0259e-20, -4.5099e-20, 6.4661e-19, 1.6005e-19,\n -2.8093e-19, -1.8037e-19, -1.5055e-19, -6.6747e-21, 8.7237e-21,\n -8.6317e-20, 1.2605e-19, -6.9884e-20, 8.9199e-20, 5.0504e-19,\n -6.5400e-20, 5.1666e-19, 5.2110e-19, 2.4431e-20], device='cuda:0')", + "exp_avg_sq": "tensor([1.8466e-12, 1.1820e-13, 1.5840e-13, 1.4076e-15, 2.7613e-13, 7.9234e-13,\n 1.4516e-13, 1.9304e-13, 5.6748e-15, 1.0142e-12, 2.0292e-12, 5.1765e-14,\n 6.2281e-14, 1.6654e-12, 6.7270e-13, 1.8776e-14, 1.2217e-13, 8.2097e-13,\n 2.3213e-12, 7.4731e-15, 3.3342e-13, 9.3215e-14, 2.5017e-13, 2.4640e-13,\n 1.1508e-13, 2.1175e-13, 6.7439e-13, 1.3071e-12, 1.2650e-12, 1.5667e-14,\n 1.9782e-13, 1.0464e-13, 3.3937e-14, 8.0099e-13, 5.2725e-14, 1.0467e-13,\n 7.9384e-14, 6.7859e-13, 1.7186e-14, 1.6406e-13, 2.3313e-15, 3.7883e-13,\n 6.4777e-17, 4.8977e-13, 4.4457e-14, 5.5433e-13, 5.3164e-14, 3.9831e-13,\n 9.1633e-13, 6.0374e-15, 3.2572e-13, 1.4836e-13, 2.1700e-14, 5.2197e-13,\n 2.3021e-14, 6.1252e-13, 2.1986e-12, 6.9889e-13, 6.8177e-14, 3.2354e-14,\n 9.6072e-14, 2.9086e-15, 2.8122e-14, 1.4862e-15], device='cuda:0')" + }, + "41": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.7782e-18, -7.3913e-19, 3.3554e-21, -5.0954e-20, -9.4109e-21,\n -2.1934e-18, 7.5431e-21, 4.3277e-20, -1.0094e-19, 9.8920e-20,\n 1.0478e-19, -1.1106e-20, 1.1061e-20, 1.5323e-19, -1.7003e-20,\n -7.6850e-19, -1.2124e-18, -1.9528e-18, -1.8846e-18, -4.8539e-19,\n 4.3484e-20, -1.9954e-21, -9.3578e-19, 2.6336e-20, 1.0527e-20,\n -2.3786e-18, -2.8525e-18, 2.0761e-20, -2.4063e-18, -2.0861e-19,\n 4.4341e-20, 9.5191e-20, -2.4536e-20, -1.2696e-18, 9.9359e-21,\n 2.8770e-21, 1.1603e-20, -5.1577e-21, -7.0244e-19, 2.5057e-20,\n -4.7723e-19, -1.8471e-19, -2.5723e-19, -4.5166e-19, -5.2153e-21,\n 4.9852e-20, -7.4819e-19, 5.4423e-20, -3.7745e-19, -1.5253e-20,\n -1.1135e-18, -1.0412e-18, -1.0218e-18, -1.4558e-18, 5.4445e-21,\n -8.7799e-19, 1.0070e-20, 1.0358e-19, -9.8042e-19, -5.0193e-19,\n 5.8684e-22, -4.4592e-19, -7.4884e-19, -9.8344e-21], device='cuda:0')", + "exp_avg_sq": "tensor([1.1228e-14, 4.8703e-16, 1.9095e-15, 3.2833e-20, 3.0179e-15, 6.1940e-15,\n 1.8731e-15, 1.5134e-15, 4.4283e-18, 6.5506e-15, 3.0315e-14, 5.5391e-16,\n 6.1452e-16, 1.1895e-14, 4.8592e-15, 8.2949e-17, 4.8403e-16, 4.0598e-15,\n 1.2865e-14, 6.1446e-19, 3.9180e-15, 9.1529e-16, 1.3777e-15, 2.2188e-15,\n 1.2434e-15, 9.1328e-16, 4.1685e-15, 3.2619e-14, 7.3168e-15, 9.5072e-18,\n 1.4745e-15, 4.2313e-16, 5.1591e-16, 7.3326e-15, 1.7912e-16, 9.4107e-16,\n 4.6250e-16, 4.3337e-15, 2.6754e-17, 1.3059e-15, 4.6603e-18, 1.2128e-15,\n 1.3589e-17, 2.5786e-15, 6.5972e-16, 3.6272e-15, 1.7856e-16, 2.2964e-15,\n 5.5028e-15, 2.5592e-16, 1.4363e-15, 5.4341e-16, 5.0522e-17, 3.4802e-15,\n 2.0382e-16, 3.9575e-15, 1.6219e-14, 4.2844e-15, 1.9450e-16, 5.3361e-17,\n 6.8690e-16, 2.8814e-18, 6.4401e-17, 4.0257e-17], device='cuda:0')" + }, + "42": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-2.4594e-18, -9.0062e-19, -2.9072e-21, -2.5595e-19, 1.5440e-21,\n -2.1949e-18, -6.3006e-21, -3.3141e-20, -1.4213e-20, -9.6660e-20,\n -9.7138e-20, 9.4663e-21, -9.9964e-21, -1.2126e-19, 2.2095e-21,\n -9.2553e-19, -1.1874e-18, -2.7974e-18, -1.5892e-18, -3.9175e-19,\n -3.9179e-20, 1.5251e-21, -1.0751e-18, -2.2619e-20, -1.0451e-20,\n -2.1674e-18, -2.5033e-18, -2.4919e-20, -2.1932e-18, -3.7553e-19,\n -3.8191e-20, -1.8747e-21, 2.4478e-20, -1.1471e-18, 3.0564e-22,\n -2.6417e-21, -1.0054e-20, 5.5257e-21, -6.9679e-19, -2.1679e-20,\n -4.9245e-19, -1.3712e-19, -4.8540e-19, -9.1495e-19, -2.9882e-19,\n -4.4104e-20, -9.8871e-19, -5.1162e-20, -2.2920e-19, 1.6457e-20,\n -1.2484e-18, -1.1590e-18, -1.1893e-18, -1.2386e-18, -4.1897e-21,\n -9.2087e-19, -4.1345e-20, -1.0145e-19, -1.0359e-18, -5.5817e-19,\n -4.4040e-22, -5.2897e-19, -6.6479e-19, 9.6753e-21], device='cuda:0')", + "exp_avg_sq": "tensor([1.5627e-14, 7.9151e-16, 2.3754e-15, 5.9243e-20, 3.6155e-15, 5.5612e-15,\n 1.9911e-15, 2.5226e-15, 6.4377e-18, 1.1314e-14, 2.2820e-14, 8.1348e-16,\n 9.2096e-16, 1.6893e-14, 8.0363e-15, 1.0979e-16, 7.4004e-16, 7.5353e-15,\n 1.9062e-14, 9.5595e-19, 4.3840e-15, 1.4286e-15, 1.5198e-15, 3.0773e-15,\n 1.5521e-15, 1.5252e-15, 5.5945e-15, 1.5566e-14, 1.0375e-14, 1.1214e-17,\n 2.5955e-15, 5.2458e-16, 7.3262e-16, 6.0275e-15, 3.7335e-16, 1.5230e-15,\n 1.0474e-15, 7.1364e-15, 5.1469e-17, 2.1785e-15, 9.1998e-18, 2.7666e-15,\n 2.5874e-17, 3.8702e-15, 4.0172e-16, 6.7103e-15, 1.8174e-16, 4.6784e-15,\n 7.3774e-15, 2.9871e-16, 2.2354e-15, 9.5273e-16, 1.1196e-16, 3.4778e-15,\n 2.8484e-16, 4.5795e-15, 2.3339e-14, 7.6085e-15, 2.8964e-16, 1.1477e-16,\n 1.4079e-15, 5.1592e-18, 1.0083e-16, 8.6301e-17], device='cuda:0')" + }, + "43": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-5.8020e-18, -5.0594e-18, 1.0746e-18, -6.4418e-19, 6.3996e-19,\n -1.2220e-17, 1.3557e-18, 9.4690e-19, -1.5533e-18, 1.3452e-18,\n 1.6686e-18, 1.1315e-18, 1.4981e-18, 9.7950e-19, 1.6199e-18,\n -7.3170e-18, -1.0044e-17, -5.7805e-18, -5.9453e-18, -7.1546e-18,\n 1.3070e-18, 7.4215e-19, -7.4966e-18, 1.1616e-18, 1.5625e-18,\n -1.4331e-17, -1.1250e-17, 1.4226e-18, -9.6345e-18, -2.8518e-18,\n 1.4154e-18, 9.8040e-19, 2.0812e-18, -7.9985e-18, 3.2338e-20,\n 6.3283e-19, 1.4675e-18, -1.1973e-19, -8.2965e-18, 2.0312e-18,\n -7.0879e-18, -1.0702e-18, -4.5974e-18, -2.4366e-18, -3.5450e-19,\n 9.3627e-19, -8.5228e-18, 1.4193e-18, -1.6015e-18, 1.5932e-18,\n -7.3115e-18, -7.7122e-18, -9.7936e-18, -1.0538e-17, 2.9632e-19,\n -4.9543e-18, 1.8330e-18, 2.0712e-18, -1.0152e-17, -5.7232e-18,\n 7.1918e-19, -5.9754e-18, -8.7292e-18, 5.7757e-19],\n [ 5.8088e-18, 5.0659e-18, -1.0759e-18, 6.4779e-19, -6.4012e-19,\n 1.2227e-17, -1.3568e-18, -9.4795e-19, 1.5536e-18, -1.3464e-18,\n -1.6698e-18, -1.1327e-18, -1.4992e-18, -9.8040e-19, -1.6212e-18,\n 7.3224e-18, 1.0051e-17, 5.7849e-18, 5.9538e-18, 7.1617e-18,\n -1.3081e-18, -7.4321e-19, 7.5030e-18, -1.1629e-18, -1.5637e-18,\n 1.4339e-17, 1.1259e-17, -1.4238e-18, 9.6411e-18, 2.8566e-18,\n -1.4166e-18, -9.8005e-19, -2.0824e-18, 8.0056e-18, -3.2477e-20,\n -6.3404e-19, -1.4688e-18, 1.1980e-19, 8.3032e-18, -2.0323e-18,\n 7.0935e-18, 1.0711e-18, 4.5994e-18, 2.4423e-18, 3.5525e-19,\n -9.3735e-19, 8.5287e-18, -1.4205e-18, 1.6022e-18, -1.5944e-18,\n 7.3174e-18, 7.7191e-18, 9.8009e-18, 1.0546e-17, -2.9643e-19,\n 4.9594e-18, -1.8344e-18, -2.0724e-18, 1.0159e-17, 5.7288e-18,\n -7.2034e-19, 5.9823e-18, 8.7359e-18, -5.7874e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3101e-13, 2.3744e-14, 3.4601e-13, 7.6145e-15, 2.2044e-13, 4.1302e-13,\n 2.8699e-13, 6.6538e-14, 2.8237e-13, 9.3529e-14, 1.0139e-12, 1.1864e-13,\n 3.5089e-14, 1.6315e-13, 1.1213e-13, 5.5174e-15, 9.6941e-14, 8.2777e-15,\n 1.2059e-13, 1.3429e-13, 2.4431e-13, 1.7154e-13, 3.6035e-13, 1.6485e-13,\n 1.2048e-13, 3.2196e-14, 4.7617e-14, 2.1165e-12, 1.4071e-13, 3.2665e-13,\n 7.9968e-14, 2.9029e-13, 2.7456e-13, 8.3614e-13, 7.0545e-15, 1.1749e-13,\n 2.8033e-14, 6.7223e-14, 2.5305e-14, 1.2655e-13, 6.2691e-14, 5.3124e-14,\n 1.8516e-14, 1.2236e-13, 4.1886e-16, 1.0885e-13, 3.5885e-13, 5.8510e-14,\n 1.0601e-13, 3.9967e-13, 1.2447e-13, 5.4586e-14, 2.1531e-15, 5.9767e-13,\n 1.2497e-16, 2.5397e-13, 2.3334e-13, 8.8812e-14, 1.4977e-13, 6.9690e-14,\n 1.0366e-13, 2.5241e-15, 9.7083e-14, 2.7907e-14],\n [1.3101e-13, 2.3744e-14, 3.4601e-13, 7.6145e-15, 2.2044e-13, 4.1302e-13,\n 2.8699e-13, 6.6538e-14, 2.8237e-13, 9.3529e-14, 1.0139e-12, 1.1864e-13,\n 3.5089e-14, 1.6315e-13, 1.1213e-13, 5.5174e-15, 9.6941e-14, 8.2777e-15,\n 1.2059e-13, 1.3429e-13, 2.4431e-13, 1.7154e-13, 3.6035e-13, 1.6485e-13,\n 1.2048e-13, 3.2196e-14, 4.7617e-14, 2.1165e-12, 1.4071e-13, 3.2665e-13,\n 7.9968e-14, 2.9029e-13, 2.7456e-13, 8.3614e-13, 7.0545e-15, 1.1749e-13,\n 2.8033e-14, 6.7223e-14, 2.5305e-14, 1.2655e-13, 6.2691e-14, 5.3124e-14,\n 1.8516e-14, 1.2236e-13, 4.1886e-16, 1.0885e-13, 3.5885e-13, 5.8510e-14,\n 1.0601e-13, 3.9967e-13, 1.2447e-13, 5.4586e-14, 2.1531e-15, 5.9767e-13,\n 1.2497e-16, 2.5397e-13, 2.3334e-13, 8.8812e-14, 1.4977e-13, 6.9690e-14,\n 1.0366e-13, 2.5241e-15, 9.7083e-14, 2.7907e-14]], device='cuda:0')" + }, + "44": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-8.1012e-18, 8.1016e-18], device='cuda:0')", + "exp_avg_sq": "tensor([1.6380e-12, 1.6380e-12], device='cuda:0')" + }, + "45": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 1.5967e-17, -5.8053e-18, -3.5673e-18, ..., 4.1169e-17,\n -2.9792e-18, 6.3683e-19],\n [ 5.4282e-18, 3.4100e-18, -2.1222e-18, ..., -1.1370e-17,\n -9.2751e-19, -3.2141e-18],\n [-1.2621e-17, 1.0591e-18, 1.5500e-18, ..., -1.1452e-17,\n 1.1332e-18, -9.8279e-19],\n ...,\n [-6.5330e-18, 9.2461e-21, 1.1567e-18, ..., -5.3488e-18,\n -4.1160e-20, 6.9230e-19],\n [-7.9184e-18, 3.2502e-19, -9.8817e-19, ..., -7.1771e-18,\n 4.0450e-19, -1.0019e-18],\n [ 2.7282e-18, -5.6518e-19, -9.9390e-19, ..., -1.9859e-17,\n 2.5178e-20, -2.1055e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5666e-20, 1.3377e-18, 9.2569e-20, ..., 6.4095e-19, 1.2389e-18,\n 2.1386e-19],\n [3.7880e-19, 9.3009e-21, 7.4228e-20, ..., 4.0170e-20, 6.0498e-20,\n 6.5071e-21],\n [1.3495e-20, 6.1364e-21, 8.0019e-21, ..., 1.6663e-19, 4.5839e-20,\n 2.7708e-20],\n ...,\n [1.1495e-20, 6.5841e-20, 2.3450e-21, ..., 1.0087e-19, 2.0832e-20,\n 2.6495e-21],\n [2.2626e-20, 6.5292e-20, 1.8336e-20, ..., 4.6788e-19, 1.5368e-19,\n 1.4497e-21],\n [1.4856e-18, 1.3050e-19, 5.5518e-19, ..., 3.1155e-18, 1.6840e-21,\n 1.1095e-18]], device='cuda:0')" + }, + "46": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-3.8548e-18, -5.5820e-18, -2.2337e-17, 3.8747e-17, 4.7090e-18,\n 1.2139e-17, 6.2422e-17, -2.8135e-17, 6.1898e-18, -1.6996e-17,\n 2.5315e-17, -2.3235e-17, 9.3139e-17, -5.8309e-17, 3.7083e-17,\n 3.2150e-17, -1.1887e-17, -6.8028e-18, -2.5233e-17, -1.4598e-17,\n -2.5929e-17, 4.5274e-17, 6.9822e-18, 3.0904e-17, -2.7056e-17,\n 9.4553e-17, -7.3471e-17, -1.9019e-17, -1.6921e-17, 1.6506e-18,\n -3.2100e-17, 2.4111e-17, 5.8019e-18, 3.0662e-17, -3.1400e-17,\n 2.4496e-17, 6.7766e-18, -2.8066e-17, -3.5273e-18, -3.2446e-17,\n -6.4182e-18, -4.9677e-17, -2.5115e-18, -2.5276e-17, -3.5232e-17,\n 1.1890e-17, 1.4899e-17, -8.8813e-18, -2.1136e-18, -3.3684e-18,\n 3.0071e-17, 6.3767e-17, -5.5113e-19, -1.1922e-17, 3.9131e-17,\n -1.8969e-17, 4.5204e-18, -2.4183e-18, -9.7907e-18, 2.7113e-17,\n -2.2988e-17, -1.1762e-17, -2.6094e-17, -2.9617e-17], device='cuda:0')", + "exp_avg_sq": "tensor([4.2199e-16, 2.4570e-18, 9.3803e-18, 5.2068e-16, 2.3158e-17, 2.3071e-16,\n 6.1696e-16, 3.8901e-17, 1.7631e-15, 1.9056e-16, 3.5319e-16, 1.3116e-16,\n 1.5313e-15, 2.1228e-16, 2.3652e-15, 1.4325e-16, 5.5016e-16, 2.0472e-16,\n 1.2989e-18, 4.5413e-18, 1.2574e-18, 2.9954e-16, 1.6726e-15, 4.7327e-16,\n 6.9054e-17, 8.1109e-16, 2.7658e-17, 1.2389e-16, 2.8433e-17, 3.1492e-17,\n 8.7040e-16, 3.5120e-19, 1.0151e-19, 5.7181e-16, 6.7909e-17, 2.8774e-16,\n 3.9272e-16, 4.3090e-18, 2.1229e-17, 4.8230e-18, 1.9464e-17, 2.2391e-16,\n 4.6347e-17, 1.4345e-18, 9.7185e-16, 2.4748e-15, 5.8232e-16, 2.2665e-17,\n 1.4830e-16, 1.5635e-18, 2.4954e-16, 1.5932e-16, 1.0943e-16, 2.6531e-17,\n 5.9194e-17, 9.1297e-17, 6.6096e-18, 6.2181e-17, 2.1769e-18, 3.1834e-17,\n 2.4339e-18, 1.4809e-17, 4.3841e-17, 4.7878e-16], device='cuda:0')" + }, + "47": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 7.7476e-18, -9.4099e-19, -6.0382e-19, 4.6993e-17, 9.9043e-19,\n 3.8840e-17, 8.7664e-17, 3.0002e-17, -2.6352e-18, -2.0314e-19,\n 8.8774e-17, 4.7028e-20, 8.8129e-17, -8.0078e-19, -3.0780e-18,\n 2.1733e-17, 7.0274e-19, 6.9549e-19, -2.3576e-18, -5.4675e-19,\n 9.0878e-19, 7.9155e-18, -9.7373e-19, -1.7633e-18, 1.0616e-18,\n 7.2143e-17, 1.4479e-17, -1.3848e-19, -2.9613e-18, -6.4937e-18,\n 3.9405e-18, 1.1053e-19, 1.0043e-20, 4.6496e-17, 3.9053e-17,\n 5.8420e-17, 1.8003e-17, -2.2520e-18, 7.2624e-19, -9.8124e-19,\n 8.7235e-19, 3.6891e-17, -7.4298e-18, 2.4589e-18, 2.0924e-18,\n -1.8054e-18, 3.0425e-17, -7.3120e-20, 5.5433e-19, 1.1439e-17,\n 3.4904e-18, 2.4446e-17, 9.0207e-17, -1.7855e-19, 3.2237e-17,\n 6.4931e-17, 3.2310e-18, -4.7310e-20, -8.4392e-19, 2.6464e-17,\n 2.2471e-18, -1.0276e-18, -1.3325e-18, -9.5383e-19], device='cuda:0')", + "exp_avg_sq": "tensor([5.7324e-17, 1.4168e-17, 1.8133e-20, 5.7769e-17, 1.2981e-19, 1.6825e-17,\n 1.0514e-16, 4.1700e-18, 1.3871e-17, 4.5328e-19, 3.9212e-17, 6.9143e-18,\n 1.4588e-16, 8.2990e-18, 2.2546e-17, 2.2479e-18, 2.8827e-18, 1.8158e-18,\n 4.3771e-20, 3.2546e-21, 8.2795e-20, 5.1475e-17, 9.1117e-18, 2.7345e-18,\n 3.7212e-19, 3.6358e-17, 2.7358e-19, 8.2245e-19, 2.3099e-18, 1.0011e-19,\n 2.2653e-17, 1.4404e-19, 8.0696e-20, 5.9998e-17, 7.0855e-18, 1.4094e-17,\n 3.4442e-17, 4.1640e-21, 4.4017e-20, 1.1705e-20, 2.4219e-20, 2.6710e-17,\n 6.5124e-19, 1.2103e-18, 1.0039e-17, 5.5243e-17, 6.5296e-17, 4.4828e-20,\n 5.6538e-19, 6.8327e-18, 9.7884e-18, 1.9077e-18, 3.2193e-18, 1.4485e-19,\n 7.8148e-19, 1.6346e-17, 9.3965e-20, 6.0968e-19, 5.9847e-19, 9.8607e-19,\n 6.4708e-20, 5.5956e-21, 1.0926e-19, 3.7018e-18], device='cuda:0')" + }, + "48": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 1.8230e-17, 1.2021e-17, 2.2754e-18, 4.7354e-17, -4.1267e-19,\n 3.5384e-17, 5.9995e-17, 2.7154e-17, 4.4473e-19, 2.4245e-19,\n 5.3510e-17, 6.9938e-19, 7.9596e-17, -8.0130e-18, 1.5165e-18,\n 3.7789e-17, -7.1108e-19, 2.9069e-18, 5.0226e-18, 2.6399e-18,\n 2.5778e-18, 3.6146e-17, -7.4197e-19, 2.1042e-18, -1.5124e-18,\n 6.7299e-17, 7.3182e-18, 1.8154e-18, 1.0140e-17, -2.1459e-18,\n 1.5965e-17, -6.1222e-19, -3.7908e-21, 4.3619e-17, 2.4204e-17,\n 4.8467e-17, 2.3830e-17, 1.1212e-18, 2.2692e-19, -3.3242e-18,\n 2.3523e-18, 2.2505e-17, 6.4346e-18, 9.7263e-18, -4.7924e-18,\n 1.7500e-18, 3.3573e-17, 8.4630e-19, -3.4283e-19, 1.3720e-17,\n 2.5640e-17, 4.3334e-17, 4.8460e-17, 3.0086e-19, 4.0985e-17,\n 3.4201e-17, -3.7464e-18, 4.0017e-19, 9.2267e-18, 4.0938e-17,\n 9.1710e-18, 7.4485e-19, 4.5038e-18, -1.0551e-18], device='cuda:0')", + "exp_avg_sq": "tensor([6.6543e-17, 9.7733e-18, 1.8658e-20, 6.0666e-17, 1.6478e-19, 2.8745e-17,\n 5.7814e-17, 5.7831e-18, 3.0361e-17, 1.1694e-18, 3.1769e-17, 3.6920e-18,\n 1.9641e-16, 6.8277e-18, 3.0602e-17, 9.8761e-18, 6.7837e-18, 2.7719e-18,\n 9.2856e-20, 1.6607e-20, 3.2989e-19, 2.5600e-17, 1.5632e-17, 4.3199e-18,\n 1.0498e-18, 3.1478e-17, 1.2036e-18, 7.5606e-19, 3.6713e-18, 2.0349e-18,\n 3.6403e-17, 6.1692e-19, 4.2738e-20, 2.7769e-17, 6.2304e-18, 2.1249e-17,\n 4.0801e-17, 2.7518e-21, 1.1239e-19, 3.1462e-20, 7.6232e-20, 1.7805e-17,\n 1.2803e-18, 1.1132e-18, 2.1567e-17, 2.9988e-17, 4.0668e-17, 8.3891e-20,\n 9.5417e-19, 7.2568e-18, 2.3203e-17, 9.9247e-18, 1.0283e-17, 1.8578e-19,\n 6.1384e-18, 1.3881e-17, 1.6597e-19, 4.5083e-19, 7.2370e-19, 8.1196e-18,\n 9.5344e-20, 1.3155e-20, 2.7294e-19, 6.6149e-18], device='cuda:0')" + }, + "49": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 4.0605e-17, -1.3465e-17, -3.6292e-17, 1.8534e-16, -3.4972e-17,\n 2.1774e-16, 3.7984e-16, 2.2532e-16, -2.6166e-17, -5.3410e-17,\n 5.6988e-16, -3.7643e-17, 2.8955e-16, 3.4451e-18, -1.6681e-17,\n 8.6284e-17, -5.9812e-17, -4.0419e-17, -1.7441e-17, -4.2461e-17,\n -9.4496e-18, 5.6254e-17, -1.6618e-17, -4.7502e-17, -3.2241e-17,\n 3.3577e-16, 1.4351e-16, -2.5624e-17, -2.2322e-17, -3.9676e-18,\n 1.8507e-17, -4.6725e-17, -7.1267e-17, 2.6111e-16, 3.4900e-16,\n 3.3056e-16, 6.9174e-17, -4.8170e-17, -1.5521e-17, 1.5210e-17,\n -3.7888e-17, 2.7665e-16, -5.7970e-17, 3.2634e-17, -2.0756e-17,\n -2.0488e-17, 1.4057e-16, -3.2107e-17, -5.1154e-17, 6.3874e-17,\n 8.0887e-18, 1.5244e-16, 6.5441e-16, -5.0404e-17, 1.7056e-16,\n 4.2231e-16, -3.9136e-17, -2.9402e-17, -1.0007e-17, 1.6508e-16,\n 1.9974e-17, -2.2926e-17, -1.4744e-17, -2.9732e-17],\n [-4.0237e-17, 1.3551e-17, 3.6254e-17, -1.8502e-16, 3.4942e-17,\n -2.1738e-16, -3.7916e-16, -2.2505e-16, 2.6119e-17, 5.3370e-17,\n -5.6955e-16, 3.7604e-17, -2.8911e-16, -3.4872e-18, 1.6649e-17,\n -8.6221e-17, 5.9759e-17, 4.0366e-17, 1.7399e-17, 4.2418e-17,\n 9.3629e-18, -5.5942e-17, 1.6562e-17, 4.7449e-17, 3.2222e-17,\n -3.3538e-16, -1.4326e-16, 2.5573e-17, 2.2534e-17, 3.9277e-18,\n -1.8441e-17, 4.6693e-17, 7.1201e-17, -2.6063e-16, -3.4878e-16,\n -3.3013e-16, -6.8902e-17, 4.8125e-17, 1.5490e-17, -1.5266e-17,\n 3.7841e-17, -2.7619e-16, 5.8092e-17, -3.2406e-17, 2.0698e-17,\n 2.0447e-17, -1.3998e-16, 3.2072e-17, 5.1091e-17, -6.3799e-17,\n -7.8303e-18, -1.5237e-16, -6.5412e-16, 5.0381e-17, -1.7019e-16,\n -4.2216e-16, 3.9092e-17, 2.9356e-17, 9.9547e-18, -1.6470e-16,\n -2.0034e-17, 2.2871e-17, 1.4682e-17, 2.9684e-17]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.9828e-17, 2.2378e-17, 1.5289e-16, 6.4244e-17, 9.9719e-17, 1.3768e-16,\n 1.9763e-16, 3.3901e-16, 2.0523e-16, 8.2279e-17, 3.4840e-16, 2.8366e-16,\n 1.7803e-16, 6.4958e-17, 8.7705e-16, 3.8210e-18, 1.2518e-16, 5.6741e-17,\n 8.9333e-19, 1.0444e-16, 1.2949e-16, 9.2887e-16, 5.7689e-16, 6.9901e-17,\n 1.2169e-17, 4.3955e-16, 3.1478e-17, 3.2822e-16, 1.4522e-16, 2.0886e-18,\n 3.2739e-16, 1.3490e-17, 1.1556e-17, 9.6389e-16, 8.6003e-16, 3.9318e-16,\n 3.0332e-17, 3.2573e-16, 1.0317e-17, 6.6320e-18, 7.7368e-18, 9.0583e-16,\n 1.0190e-17, 3.5328e-18, 1.7344e-16, 2.3970e-15, 2.9574e-16, 2.7002e-17,\n 2.7979e-16, 5.0156e-17, 7.1477e-17, 9.1143e-17, 1.6306e-16, 1.7980e-17,\n 1.7633e-18, 1.4942e-16, 1.4182e-16, 1.5707e-16, 3.2968e-17, 1.7384e-17,\n 5.9933e-17, 1.1689e-15, 1.6153e-16, 1.6073e-16],\n [7.9828e-17, 2.2378e-17, 1.5289e-16, 6.4244e-17, 9.9719e-17, 1.3768e-16,\n 1.9763e-16, 3.3901e-16, 2.0523e-16, 8.2279e-17, 3.4840e-16, 2.8366e-16,\n 1.7803e-16, 6.4958e-17, 8.7705e-16, 3.8210e-18, 1.2518e-16, 5.6741e-17,\n 8.9333e-19, 1.0444e-16, 1.2949e-16, 9.2887e-16, 5.7689e-16, 6.9901e-17,\n 1.2169e-17, 4.3955e-16, 3.1478e-17, 3.2822e-16, 1.4522e-16, 2.0886e-18,\n 3.2739e-16, 1.3490e-17, 1.1556e-17, 9.6389e-16, 8.6003e-16, 3.9318e-16,\n 3.0332e-17, 3.2573e-16, 1.0317e-17, 6.6320e-18, 7.7368e-18, 9.0583e-16,\n 1.0190e-17, 3.5328e-18, 1.7344e-16, 2.3970e-15, 2.9574e-16, 2.7002e-17,\n 2.7979e-16, 5.0156e-17, 7.1477e-17, 9.1143e-17, 1.6306e-16, 1.7980e-17,\n 1.7633e-18, 1.4942e-16, 1.4182e-16, 1.5707e-16, 3.2968e-17, 1.7384e-17,\n 5.9933e-17, 1.1689e-15, 1.6153e-16, 1.6073e-16]], device='cuda:0')" + }, + "50": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 2.3972e-16, -2.3943e-16], device='cuda:0')", + "exp_avg_sq": "tensor([1.6456e-15, 1.6456e-15], device='cuda:0')" + }, + "51": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 8.9473e-14, 1.8103e-13, -1.0064e-13, ..., -9.1121e-14,\n -1.7295e-13, -8.0893e-14],\n [-1.0287e-13, 5.3911e-14, -5.7316e-13, ..., 2.6439e-13,\n 5.4864e-14, 1.4455e-13],\n [ 5.8113e-14, 2.5653e-14, -9.4545e-14, ..., 3.3778e-14,\n -2.4118e-14, -3.9817e-14],\n ...,\n [-1.1865e-13, 2.5225e-14, 7.3483e-14, ..., -3.3481e-13,\n 1.6205e-12, 1.0930e-13],\n [ 9.2404e-13, 2.9725e-13, -6.4676e-13, ..., -4.3405e-12,\n -2.8359e-12, -7.1783e-13],\n [ 7.9277e-14, -4.6815e-14, -1.2294e-13, ..., -1.1818e-13,\n 7.9142e-14, -6.4225e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1273e-16, 1.0781e-16, 2.2337e-15, ..., 3.4063e-16, 3.2518e-16,\n 1.0771e-15],\n [4.8117e-18, 3.2304e-17, 3.7518e-17, ..., 1.8848e-17, 2.7335e-17,\n 3.0738e-17],\n [9.9227e-16, 1.3157e-16, 5.5554e-15, ..., 1.1529e-15, 1.2784e-15,\n 2.6243e-15],\n ...,\n [4.2319e-17, 1.2030e-16, 1.6090e-15, ..., 3.5220e-16, 3.5936e-16,\n 6.7322e-16],\n [5.9494e-16, 1.1210e-16, 8.8734e-16, ..., 1.0165e-15, 1.4267e-15,\n 2.6742e-15],\n [3.0416e-16, 2.4060e-16, 9.2691e-16, ..., 4.3603e-16, 5.7360e-16,\n 1.3346e-15]], device='cuda:0')" + }, + "52": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-4.1160e-12, -9.9884e-13, 7.0286e-13, -9.2985e-12, -2.0028e-12,\n 2.4375e-13, -1.7929e-12, -1.0630e-11, -1.8623e-13, 6.9297e-13,\n -4.6295e-12, 9.0385e-13, 1.0183e-12, 3.7831e-12, -8.2866e-13,\n 7.2046e-13, 1.3612e-12, 2.2432e-12, -6.2715e-13, 1.5146e-12,\n 1.7388e-12, -2.6988e-13, -1.0680e-12, -1.0443e-12, 4.4383e-13,\n 1.9153e-13, 7.8028e-13, -6.1536e-14, -2.0418e-12, 1.6842e-11,\n -1.1342e-13, 2.2555e-13, -1.0535e-11, 8.5503e-13, 2.3883e-13,\n 3.3950e-13, -8.5484e-12, -3.3900e-13, 5.5213e-12, -5.9879e-13,\n 7.5496e-13, -9.4635e-13, 2.6564e-13, -1.0236e-13, 1.0476e-11,\n -8.9450e-13, -4.1327e-12, 1.7106e-13, -3.4369e-13, 2.7803e-13,\n -3.8318e-12, 1.6298e-12, 1.1690e-12, 1.0740e-12, 7.6559e-12,\n 4.8226e-12, 1.8233e-12, 3.4854e-12, 2.1943e-12, 2.9753e-13,\n -3.0184e-12, -4.5944e-12, 6.4206e-13, 4.9425e-13], device='cuda:0')", + "exp_avg_sq": "tensor([1.3687e-12, 9.5700e-14, 5.6399e-12, 4.7991e-12, 3.0555e-12, 1.2506e-14,\n 2.0037e-12, 2.2687e-13, 1.1477e-13, 5.8013e-15, 3.4357e-13, 2.8454e-14,\n 1.1856e-12, 1.6320e-13, 5.3885e-12, 2.5301e-14, 5.0494e-13, 8.2873e-14,\n 1.3887e-11, 2.7799e-13, 2.7829e-14, 4.0750e-13, 2.0887e-13, 4.9468e-12,\n 1.8888e-13, 1.0649e-13, 2.2662e-12, 2.3845e-12, 8.3080e-13, 4.8107e-12,\n 2.5522e-13, 1.4060e-11, 2.8544e-12, 5.7076e-12, 5.4522e-12, 2.4661e-15,\n 3.5719e-13, 5.0929e-12, 1.9849e-13, 9.0727e-15, 3.1762e-14, 4.0757e-12,\n 2.1381e-12, 8.0245e-13, 2.1576e-12, 7.1906e-13, 1.6600e-12, 4.4684e-13,\n 3.2319e-13, 3.0998e-13, 1.5287e-12, 2.2845e-14, 2.4002e-12, 1.1680e-12,\n 1.3324e-11, 1.3368e-11, 6.3288e-14, 7.1885e-14, 5.0210e-12, 2.2892e-12,\n 2.1392e-13, 7.8735e-13, 1.4100e-12, 1.7042e-12], device='cuda:0')" + }, + "53": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.8798e-11, -8.0148e-12, 1.0825e-13, -3.1389e-11, 6.8382e-13,\n -2.9370e-13, 1.1387e-12, -2.2376e-11, 3.3144e-14, -1.6739e-13,\n -1.9659e-11, 1.4369e-13, -1.0956e-11, -3.0612e-12, -2.4355e-11,\n -7.1236e-14, 1.3848e-14, -2.0639e-13, 4.3803e-13, -4.9762e-12,\n 6.6272e-13, -4.5526e-14, -8.5870e-14, 3.5488e-13, -4.2752e-14,\n -5.3982e-12, 4.7772e-14, 1.2076e-13, -1.4471e-11, 1.1107e-12,\n 2.6557e-14, 3.7294e-13, -3.9909e-11, -2.8602e-11, 3.0403e-13,\n 2.0538e-13, -1.7110e-11, 3.4346e-13, -2.0583e-12, -1.3930e-13,\n 1.8440e-13, -1.7374e-11, 1.8676e-13, 9.8538e-14, -1.1047e-11,\n 1.6950e-13, -2.2237e-11, 7.4447e-14, 8.4850e-14, 3.2900e-14,\n -2.5491e-11, 3.1881e-13, 6.0259e-15, 4.9029e-14, -3.5587e-12,\n -1.4688e-11, -2.5228e-13, -1.2051e-12, -1.7900e-11, 1.2600e-13,\n -8.2221e-12, -2.1137e-11, -2.5823e-11, 6.7254e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.1637e-14, 8.9267e-16, 2.4535e-14, 3.2491e-14, 1.3206e-14, 1.2412e-17,\n 7.4978e-15, 1.6932e-15, 5.2267e-16, 3.0572e-18, 3.2667e-15, 1.6206e-16,\n 1.4495e-14, 2.3403e-15, 4.3286e-14, 1.0675e-16, 2.3438e-15, 9.1741e-16,\n 1.1934e-13, 3.5300e-15, 1.8078e-16, 2.4990e-15, 1.0930e-15, 3.3266e-14,\n 9.9210e-16, 1.0770e-15, 1.2006e-14, 1.7719e-14, 1.0529e-14, 3.2905e-14,\n 8.1743e-16, 1.2051e-13, 1.7158e-14, 3.9280e-14, 3.4148e-14, 5.1735e-18,\n 3.8176e-15, 3.6274e-14, 2.3979e-15, 3.1424e-18, 1.5795e-16, 4.9434e-14,\n 8.4747e-15, 3.3776e-15, 1.6508e-14, 4.2585e-15, 1.2907e-14, 2.2651e-15,\n 1.3651e-15, 1.1862e-15, 1.1258e-14, 8.9324e-17, 2.1098e-14, 6.3362e-15,\n 1.2551e-13, 1.4017e-13, 5.7416e-16, 7.0953e-16, 4.4282e-14, 1.6090e-14,\n 1.8260e-15, 5.6283e-15, 9.4501e-15, 1.0992e-14], device='cuda:0')" + }, + "54": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.6250e-11, -1.0354e-11, -1.1951e-13, -2.3426e-11, -6.6546e-13,\n 2.7314e-13, -1.2063e-12, -1.9810e-11, -2.8402e-14, 1.9554e-13,\n -1.7263e-11, -3.0528e-13, -1.0413e-11, -5.9934e-12, -1.6141e-11,\n 1.8743e-14, -5.6022e-14, -4.2984e-12, -4.8448e-13, -8.3787e-12,\n -1.0445e-12, 4.5581e-14, 7.6292e-14, -3.7007e-13, 4.4048e-14,\n -9.3231e-12, -4.0039e-14, -1.2178e-13, -1.4023e-11, -3.2982e-14,\n -2.1309e-14, -3.9627e-13, -2.5588e-11, -1.7908e-11, -3.1833e-13,\n -2.3011e-13, -1.7850e-11, -3.1214e-13, -4.4439e-12, 1.5602e-13,\n -6.1838e-13, -1.4562e-11, -1.9511e-13, -8.6212e-14, -8.7189e-12,\n -1.5578e-13, -1.6739e-11, -7.7477e-14, -9.5389e-14, -3.4604e-14,\n -1.8059e-11, -5.7311e-13, -1.3471e-14, -4.5444e-14, -4.3761e-12,\n -1.1730e-11, -3.9666e-12, -4.6812e-12, -1.3075e-11, -1.1345e-13,\n -1.2377e-11, -1.7188e-11, -1.8005e-11, -6.2738e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.6006e-14, 1.6711e-15, 5.0248e-14, 5.4175e-14, 2.7806e-14, 1.7579e-17,\n 1.7835e-14, 3.6485e-15, 8.6187e-16, 6.4641e-18, 5.1305e-15, 2.2733e-16,\n 1.3607e-14, 2.4200e-15, 5.7282e-14, 1.4388e-16, 4.3101e-15, 1.1542e-15,\n 1.2128e-13, 3.4441e-15, 4.4745e-16, 3.2110e-15, 1.4268e-15, 4.4466e-14,\n 1.4473e-15, 1.8102e-15, 1.8688e-14, 2.0118e-14, 9.6852e-15, 5.2129e-14,\n 1.7232e-15, 1.2741e-13, 3.3057e-14, 6.0069e-14, 4.6074e-14, 8.6451e-18,\n 5.0107e-15, 4.2240e-14, 2.7597e-15, 9.5930e-18, 2.4205e-16, 4.4572e-14,\n 1.9345e-14, 6.8905e-15, 2.7288e-14, 6.3629e-15, 2.0999e-14, 3.5629e-15,\n 2.7244e-15, 2.5950e-15, 2.0863e-14, 1.1655e-16, 1.9570e-14, 1.1276e-14,\n 1.3934e-13, 1.3369e-13, 8.6519e-16, 9.7393e-16, 5.2342e-14, 2.0197e-14,\n 3.2907e-15, 9.8248e-15, 1.6404e-14, 1.4233e-14], device='cuda:0')" + }, + "55": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 1.0623e-10, 5.3316e-11, -1.1328e-11, 9.9639e-11, -9.5874e-12,\n -1.2679e-11, -1.6024e-11, 9.9681e-11, -1.0999e-11, -1.0784e-11,\n 9.1275e-11, -1.4230e-11, 7.1936e-11, 2.7955e-11, 8.6901e-11,\n -1.2193e-11, -1.7870e-11, 5.3320e-12, -7.6070e-12, 4.1783e-11,\n -9.6906e-12, -1.2976e-11, -7.7767e-12, -6.1124e-12, -1.1203e-11,\n 4.1133e-11, -6.5534e-12, -1.2658e-11, 9.6749e-11, -6.0342e-12,\n -1.3366e-11, -9.5075e-12, 1.1556e-10, 9.5935e-11, -1.3661e-11,\n -1.2075e-11, 9.6533e-11, -1.0674e-11, 1.7033e-11, -1.1160e-11,\n -1.4226e-11, 7.9013e-11, -1.5604e-11, -8.0792e-12, 3.0089e-11,\n -1.2970e-11, 7.3636e-11, -1.6683e-11, -1.4420e-11, -1.0199e-11,\n 7.1369e-11, -1.5851e-11, -1.3729e-11, -1.5603e-11, 4.6772e-12,\n 4.8653e-11, 6.2418e-12, 1.2859e-11, 6.8072e-11, -1.2209e-11,\n 4.6841e-11, 1.0337e-10, 7.4931e-11, -8.0808e-12],\n [-1.0448e-10, -5.5488e-11, 1.1396e-11, -9.9898e-11, 9.4387e-12,\n 1.2984e-11, 1.5900e-11, -1.0367e-10, 1.1157e-11, 1.0622e-11,\n -9.0109e-11, 1.4610e-11, -6.8913e-11, -2.8372e-11, -8.6625e-11,\n 1.2436e-11, 1.8372e-11, -5.3871e-12, 7.4603e-12, -4.0291e-11,\n 1.0820e-11, 1.2906e-11, 8.0439e-12, 6.4013e-12, 1.1174e-11,\n -3.9764e-11, 6.7543e-12, 1.2614e-11, -9.6188e-11, 3.0130e-12,\n 1.3108e-11, 9.6747e-12, -1.1944e-10, -1.0045e-10, 1.3440e-11,\n 1.2279e-11, -9.6637e-11, 1.0227e-11, -1.8002e-11, 1.1502e-11,\n 1.4256e-11, -8.1523e-11, 1.5263e-11, 8.2513e-12, -3.2192e-11,\n 1.3354e-11, -7.4451e-11, 1.6752e-11, 1.4557e-11, 9.8333e-12,\n -7.1476e-11, 1.5939e-11, 1.3621e-11, 1.5785e-11, -1.2713e-11,\n -4.6986e-11, -6.2629e-12, -1.2658e-11, -7.0531e-11, 1.1623e-11,\n -4.7853e-11, -9.6700e-11, -7.2547e-11, 8.2415e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3670e-12, 2.2028e-13, 2.8918e-13, 4.1798e-13, 1.6434e-13, 1.8740e-12,\n 2.2010e-14, 9.3176e-14, 5.5100e-14, 2.3715e-13, 1.6009e-13, 8.6557e-13,\n 2.8673e-12, 1.8678e-12, 7.7717e-13, 8.1827e-13, 4.0240e-14, 1.0926e-12,\n 1.8843e-12, 2.3293e-12, 1.1161e-13, 8.6044e-13, 7.9243e-13, 8.3294e-13,\n 4.8240e-13, 3.2605e-13, 4.0988e-13, 1.3813e-12, 2.5896e-12, 6.0148e-13,\n 1.5193e-13, 1.5075e-12, 1.2300e-13, 6.0480e-13, 4.9918e-13, 4.4668e-15,\n 4.2242e-13, 1.2871e-12, 1.4240e-12, 8.2557e-15, 5.4497e-13, 2.7259e-12,\n 2.1138e-13, 2.9377e-13, 1.1239e-13, 2.6691e-13, 1.6661e-13, 2.5678e-13,\n 1.1434e-13, 1.0302e-13, 7.2439e-14, 1.2373e-12, 2.3837e-12, 1.0252e-13,\n 1.4158e-12, 2.4691e-12, 7.4660e-13, 5.5520e-13, 1.2506e-12, 1.0254e-12,\n 2.3582e-13, 3.3213e-13, 6.1570e-14, 9.0146e-13],\n [1.3670e-12, 2.2028e-13, 2.8918e-13, 4.1798e-13, 1.6434e-13, 1.8740e-12,\n 2.2010e-14, 9.3176e-14, 5.5100e-14, 2.3715e-13, 1.6009e-13, 8.6557e-13,\n 2.8673e-12, 1.8678e-12, 7.7717e-13, 8.1827e-13, 4.0240e-14, 1.0926e-12,\n 1.8843e-12, 2.3293e-12, 1.1161e-13, 8.6044e-13, 7.9243e-13, 8.3294e-13,\n 4.8240e-13, 3.2605e-13, 4.0988e-13, 1.3813e-12, 2.5896e-12, 6.0148e-13,\n 1.5193e-13, 1.5075e-12, 1.2300e-13, 6.0480e-13, 4.9918e-13, 4.4668e-15,\n 4.2242e-13, 1.2871e-12, 1.4240e-12, 8.2557e-15, 5.4497e-13, 2.7259e-12,\n 2.1138e-13, 2.9377e-13, 1.1239e-13, 2.6691e-13, 1.6661e-13, 2.5678e-13,\n 1.1434e-13, 1.0302e-13, 7.2437e-14, 1.2373e-12, 2.3837e-12, 1.0252e-13,\n 1.4158e-12, 2.4691e-12, 7.4660e-13, 5.5520e-13, 1.2506e-12, 1.0254e-12,\n 2.3582e-13, 3.3213e-13, 6.1568e-14, 9.0146e-13]], device='cuda:0')" + }, + "56": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 7.8337e-11, -7.8513e-11], device='cuda:0')", + "exp_avg_sq": "tensor([6.9819e-12, 6.9819e-12], device='cuda:0')" + }, + "57": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 1.7573e-15, 8.1430e-11, 7.7183e-15, 6.8972e-18, 3.3750e-12,\n 5.5114e-16, -9.4947e-12, 1.0172e-05],\n [-3.8696e-20, -1.8988e-15, -1.5536e-19, -1.3497e-22, -4.4110e-17,\n -9.0928e-21, 3.3231e-16, 3.7346e-10],\n [ 9.2608e-16, 4.3120e-11, 4.0393e-15, 3.6018e-18, 1.7178e-12,\n 2.8437e-16, -5.2481e-12, 4.1734e-06],\n [-1.7155e-17, -7.9928e-13, -7.4721e-17, -6.6595e-20, -3.1653e-14,\n -5.2534e-18, 9.7831e-14, -7.4274e-08],\n [-8.4217e-18, -3.9490e-13, -3.6345e-17, -3.2309e-20, -1.4847e-14,\n -2.5085e-18, 5.0971e-14, -2.2339e-08],\n [ 2.0283e-15, 9.4081e-11, 8.8937e-15, 7.9428e-18, 3.8671e-12,\n 6.3350e-16, -1.1073e-11, 1.1191e-05],\n [-1.7640e-20, 5.6066e-17, -1.9892e-19, -2.0918e-22, -2.8498e-16,\n -3.0798e-20, -9.2234e-16, -5.0441e-09],\n [-4.7737e-18, -2.2500e-13, -2.0437e-17, -1.8121e-20, -8.0714e-15,\n -1.3874e-18, 3.0232e-14, -6.0955e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0277e-10, 1.7272e-10, 8.2189e-12, 3.4402e-11, 6.2002e-11, 1.9182e-12,\n 2.6078e-11, 5.2307e-07],\n [4.0624e-13, 7.6913e-13, 3.1625e-14, 1.2254e-13, 6.4691e-13, 1.2116e-14,\n 1.3376e-13, 8.8663e-09],\n [8.6564e-12, 1.4133e-11, 6.3334e-13, 2.4789e-12, 5.2778e-12, 1.7021e-13,\n 2.1185e-12, 3.1096e-07],\n [1.5021e-11, 3.0993e-11, 1.3462e-12, 5.3092e-12, 1.1372e-11, 3.4036e-13,\n 5.0413e-12, 2.4164e-08],\n [4.5407e-11, 5.9336e-11, 3.1292e-12, 1.2684e-11, 2.2339e-11, 7.8744e-13,\n 1.0589e-11, 1.5641e-08],\n [4.9061e-11, 6.7839e-11, 3.1601e-12, 1.3100e-11, 3.2278e-11, 7.9198e-13,\n 1.0575e-11, 1.1904e-06],\n [4.4752e-11, 6.2008e-11, 3.6907e-12, 1.4625e-11, 2.4414e-11, 8.6465e-13,\n 1.2212e-11, 2.9511e-08],\n [1.2623e-10, 2.0423e-10, 9.2226e-12, 3.8040e-11, 6.7418e-11, 2.4374e-12,\n 3.0478e-11, 1.2537e-07]], device='cuda:0')" + }, + "58": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 1.0173e-05, 3.7344e-10, 4.1732e-06, -7.4330e-08, -2.2355e-08,\n 1.1191e-05, -5.0441e-09, -6.1082e-09], device='cuda:0')", + "exp_avg_sq": "tensor([5.2613e-07, 8.8823e-09, 3.1118e-07, 2.4715e-08, 1.6746e-08, 1.1918e-06,\n 3.0752e-08, 1.2862e-07], device='cuda:0')" + }, + "59": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-5.2739e-06, 4.7472e-07, -5.5567e-06, 4.7268e-07, 4.7419e-07,\n -4.6058e-06, 4.7472e-07, 4.7489e-07],\n [ 2.3212e-06, -2.0884e-07, 2.4464e-06, -2.0795e-07, -2.0873e-07,\n 2.0272e-06, -2.0884e-07, -2.0906e-07],\n [ 1.1776e-06, -1.0604e-07, 1.2404e-06, -1.0558e-07, -1.0587e-07,\n 1.0284e-06, -1.0604e-07, -1.0602e-07],\n [ 1.7732e-06, -1.5967e-07, 1.8678e-06, -1.5898e-07, -1.5942e-07,\n 1.5485e-06, -1.5967e-07, -1.5964e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.5670e-07, 8.4827e-08, 1.1577e-06, 3.8723e-08, 3.0593e-08, 3.2667e-07,\n 2.2011e-07, 1.2937e-07],\n [5.1115e-08, 5.7076e-09, 9.5490e-08, 2.9702e-09, 2.0143e-09, 3.3298e-08,\n 1.2819e-08, 7.9852e-09],\n [1.6250e-07, 2.9820e-08, 3.6029e-07, 9.7156e-09, 9.4845e-09, 1.0551e-07,\n 6.6204e-08, 4.3361e-08],\n [1.8779e-07, 3.1822e-08, 4.0767e-07, 1.1071e-08, 1.0859e-08, 1.2858e-07,\n 6.5177e-08, 4.4364e-08]], device='cuda:0')" + }, + "60": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.5985e-05, 1.2268e-05, 1.4834e-06, 2.2338e-06], device='cuda:0')", + "exp_avg_sq": "tensor([9.3595e-06, 5.5462e-07, 2.6191e-06, 2.5465e-06], device='cuda:0')" + }, + "61": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[-3.4005e-16, 2.7609e-15, -3.0453e-16, ..., -4.7123e-15,\n -1.7764e-14, 4.9112e-15],\n [-3.6191e-15, -5.3005e-14, -1.8052e-16, ..., -6.0078e-15,\n -8.3084e-15, -1.7077e-14],\n [-1.0386e-14, -6.2388e-14, -6.6833e-16, ..., -1.0956e-14,\n -5.9921e-15, -3.5960e-14],\n ...,\n [-4.6932e-14, -1.5609e-12, -4.9084e-15, ..., -1.5696e-13,\n 7.7704e-13, -1.7273e-12],\n [-1.2270e-15, -9.2132e-14, -2.7350e-16, ..., -5.1448e-15,\n -4.1578e-14, -5.3513e-14],\n [-1.5560e-14, -1.2114e-12, -1.2927e-15, ..., -8.8744e-14,\n 5.9718e-13, -1.3010e-12]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.9670e-15, 7.1439e-15, 9.0319e-14, ..., 1.1990e-13, 7.7132e-14,\n 9.3711e-14],\n [6.9512e-16, 8.5323e-16, 1.1811e-14, ..., 1.2060e-14, 7.6600e-15,\n 1.0430e-14],\n [6.3853e-15, 6.2886e-15, 6.1165e-14, ..., 1.1463e-13, 4.8750e-14,\n 9.3205e-14],\n ...,\n [1.3580e-13, 1.0900e-13, 1.7499e-12, ..., 1.8581e-12, 1.3160e-12,\n 1.8348e-12],\n [5.0213e-15, 3.4005e-15, 5.1115e-14, ..., 9.2322e-14, 3.1836e-14,\n 6.7025e-14],\n [2.3526e-14, 1.9963e-14, 3.4990e-13, ..., 3.4148e-13, 2.1742e-13,\n 3.6098e-13]], device='cuda:0')" + }, + "62": { + "step": "tensor(7509.)", + "exp_avg": "tensor([ 2.5737e-10, 2.4291e-10, 3.2253e-10, 2.5280e-10, 3.8682e-10,\n -2.8615e-09, 2.6302e-10, -3.0437e-09, 2.6984e-10, -2.0463e-09,\n 4.6876e-10, -1.0671e-09, -2.2997e-09, 3.3736e-10, -1.3565e-09,\n -2.7400e-09, -3.5200e-09, -2.4385e-09, 9.3572e-11, 1.3765e-10,\n 2.6940e-10, 3.7346e-10, 3.4622e-10, 2.2433e-10, -2.7069e-09,\n 2.4052e-10, -2.6336e-09, -2.7433e-09, 1.6235e-10, 3.2203e-10,\n 2.4150e-10, -1.1830e-09, -1.9799e-09, -2.8135e-09, -2.3846e-09,\n -2.8381e-09, -2.3495e-09, -1.3906e-09, -1.7446e-09, -1.0928e-09,\n 5.0842e-10, 2.5786e-10, -2.4754e-09, 1.8982e-10, 2.0615e-10,\n 4.2596e-10, -3.8966e-09, 2.5004e-10, 2.1200e-10, 3.3417e-10,\n 2.7273e-10, -2.1079e-09, -2.5066e-09, 1.5367e-10, -2.2363e-09,\n -1.6509e-09, -1.8025e-09, -3.1663e-09, 3.5423e-10, -2.3884e-09,\n -3.7630e-09, -6.1447e-10, -2.2484e-09, 2.8252e-10, 1.5666e-10,\n 3.5544e-10, -2.5305e-09, -2.0605e-09, 2.3873e-10, 1.6628e-10,\n -1.0457e-09, -2.4371e-09, 1.6108e-10, 2.3728e-10, 3.8342e-10,\n -2.7118e-09, -1.4178e-09, 4.6733e-10, -2.6969e-09, -1.0446e-09,\n 2.6278e-10, 1.8958e-10, -1.6270e-09, -7.6610e-10, 3.0787e-10,\n -1.1681e-09, -1.4403e-09, -2.9367e-09, -1.7238e-09, -3.7149e-09,\n -3.2589e-09, -1.6440e-09, 4.7238e-10, 2.5581e-10, -1.3917e-09,\n -2.2504e-09, 4.0627e-10, 2.5762e-10, -2.2705e-09, 3.1300e-10,\n 3.2891e-10, -2.2658e-09, 1.8672e-10, -2.5500e-09, -2.6043e-09,\n -1.6417e-09, -1.3618e-09, 2.6598e-10, 7.7948e-11, -1.8197e-09,\n 1.9635e-10, -1.0878e-09, -5.1654e-10, 1.6002e-10, 1.6540e-10,\n -2.2233e-09, -2.8397e-09, 2.6753e-10, -1.8893e-09, 1.8160e-10,\n -1.7553e-09, -2.8353e-09, 4.2763e-10, -2.0477e-09, 2.2541e-10,\n -3.5017e-10, 2.6463e-10, 3.3619e-10, 3.2710e-10, -3.2549e-09,\n 8.0311e-10, 3.0124e-10, 1.7965e-10, -2.4648e-09, 1.7520e-10,\n -1.3423e-09, 9.2513e-11, -2.3092e-09, 4.1348e-10, 2.4638e-10,\n 1.8983e-10, -1.8705e-09, -2.7492e-09, 3.1023e-10, 4.8061e-10,\n 4.1737e-10, -2.7428e-09, -2.6904e-09, 2.7770e-10, 4.7442e-10,\n -2.3266e-09, -2.8076e-09, -3.3668e-09, 2.4686e-10, 2.9909e-10,\n -1.9607e-09, 3.4289e-10, 3.0501e-10, -9.3963e-10, -2.0282e-09,\n -1.7749e-09, 3.1942e-10, -2.2084e-09, -3.1434e-09, 9.9033e-11,\n 2.6402e-10, -2.2810e-09, -1.8614e-09, 1.2951e-10, -2.6347e-09,\n 2.4920e-10, 1.3251e-10, -1.5902e-09, 9.9320e-11, 3.7852e-10,\n 3.8765e-10, 4.3356e-10, -1.0750e-09, 3.2178e-10, -1.9826e-09,\n -1.8095e-09, -2.0420e-09, 2.1757e-10, 4.9141e-10, 4.6169e-10,\n -2.1907e-09, 4.5889e-10, 2.1651e-10, -2.3603e-09, -2.4785e-09,\n 1.5356e-10, -2.0649e-09], device='cuda:0')", + "exp_avg_sq": "tensor([9.7124e-11, 1.1805e-11, 7.6426e-11, 2.1351e-10, 4.8315e-11, 9.7627e-11,\n 3.4779e-10, 1.1934e-09, 4.0086e-10, 7.7783e-10, 7.7170e-10, 2.1511e-10,\n 1.5928e-10, 2.4455e-10, 3.2721e-10, 3.3359e-10, 4.9056e-10, 2.7400e-10,\n 5.7447e-11, 1.3530e-11, 4.8593e-10, 1.4776e-10, 9.5180e-11, 4.4298e-10,\n 1.9493e-09, 8.2663e-12, 2.8064e-10, 3.5768e-10, 2.7248e-11, 2.9506e-10,\n 9.5667e-11, 5.7953e-10, 1.8302e-09, 5.4567e-10, 7.9745e-10, 1.0578e-09,\n 6.5361e-10, 2.3799e-11, 6.8893e-10, 1.4560e-10, 8.3196e-10, 1.0536e-11,\n 5.2992e-10, 1.3423e-11, 5.8243e-12, 7.1536e-10, 1.3712e-09, 4.5112e-11,\n 2.2682e-11, 2.0571e-10, 3.9270e-10, 5.6946e-10, 1.1954e-09, 4.2160e-11,\n 1.5016e-10, 1.3224e-10, 1.8706e-10, 6.3716e-10, 4.1704e-12, 2.0019e-09,\n 2.4229e-09, 3.5810e-12, 2.0480e-09, 2.0176e-10, 1.3958e-11, 8.4853e-10,\n 1.2468e-09, 2.7471e-10, 1.6417e-10, 9.5847e-11, 7.0243e-11, 2.3032e-09,\n 6.9066e-12, 2.4660e-10, 4.3998e-10, 2.6098e-10, 4.9148e-11, 3.8096e-10,\n 6.3062e-10, 4.3955e-10, 7.8577e-11, 3.9895e-11, 1.1006e-09, 8.0566e-11,\n 2.1200e-11, 1.6968e-10, 5.9399e-10, 1.4661e-09, 2.8749e-10, 2.4873e-09,\n 1.5224e-09, 3.3308e-10, 3.1666e-10, 1.4057e-10, 3.6782e-10, 8.6750e-10,\n 2.1032e-10, 4.4600e-11, 1.5257e-10, 6.3587e-10, 3.5429e-11, 1.5082e-10,\n 4.6142e-12, 9.9139e-11, 7.9775e-10, 6.0532e-10, 2.2822e-10, 5.2109e-11,\n 4.1255e-11, 1.1150e-10, 2.1106e-12, 3.6021e-10, 3.1481e-12, 3.0971e-11,\n 9.0762e-11, 3.1134e-10, 1.0524e-09, 3.3577e-10, 8.0270e-11, 2.7351e-10,\n 1.1368e-09, 8.1901e-10, 1.6204e-10, 6.5529e-10, 1.6002e-11, 1.5938e-10,\n 2.8913e-10, 3.7517e-11, 2.7344e-10, 1.2704e-09, 7.7353e-10, 4.4492e-10,\n 4.7953e-11, 2.4070e-09, 7.0499e-12, 2.1300e-10, 7.9999e-11, 1.7300e-10,\n 2.6348e-10, 1.8781e-11, 2.4015e-10, 5.6751e-10, 2.3285e-09, 3.2244e-11,\n 2.3662e-10, 1.6966e-10, 1.0809e-09, 5.7676e-11, 2.8813e-10, 5.4772e-11,\n 1.1763e-09, 2.7469e-09, 1.7791e-09, 2.5298e-10, 3.3616e-10, 2.5295e-11,\n 4.7587e-10, 2.1833e-10, 3.7431e-10, 1.7493e-09, 9.1093e-10, 5.3219e-10,\n 5.2211e-10, 4.2360e-10, 1.9237e-10, 1.2736e-11, 6.0110e-10, 4.8140e-11,\n 3.5324e-11, 1.2945e-10, 4.3770e-10, 1.2058e-11, 8.7072e-11, 1.7687e-10,\n 8.3415e-11, 1.9953e-10, 3.8136e-11, 3.4588e-10, 1.1040e-10, 1.1078e-10,\n 1.4321e-10, 1.1185e-09, 1.4255e-10, 1.0485e-10, 1.3452e-10, 2.6319e-09,\n 5.6668e-10, 4.0567e-11, 6.1844e-11, 1.6693e-09, 6.0510e-11, 3.2094e-10],\n device='cuda:0')" + }, + "63": { + "step": "tensor(7509.)", + "exp_avg": "tensor([[ 2.0913e-09, 2.2506e-09, 2.2595e-09, 2.1618e-09, 2.1771e-09,\n -6.3634e-09, 2.1349e-09, -7.0536e-09, 2.3001e-09, -5.5141e-09,\n 2.0538e-09, -6.5116e-09, -6.0907e-09, 2.2140e-09, -6.1221e-09,\n -5.9339e-09, -7.0828e-09, -5.6277e-09, 2.3056e-09, 2.2949e-09,\n 2.2493e-09, 2.1961e-09, 2.1936e-09, 2.2420e-09, -6.5483e-09,\n 2.1330e-09, -6.5694e-09, -7.3437e-09, 2.2580e-09, 2.1800e-09,\n 1.7859e-09, -6.3138e-09, -6.1994e-09, -6.8986e-09, -5.6638e-09,\n -6.1068e-09, -5.9233e-09, -6.5280e-09, -6.1313e-09, -5.9609e-09,\n 2.1078e-09, 2.2367e-09, -6.9861e-09, 2.2827e-09, 2.1335e-09,\n 2.0625e-09, -6.6122e-09, 2.2324e-09, 2.3311e-09, 2.2195e-09,\n 2.2570e-09, -5.8441e-09, -6.8303e-09, 2.2035e-09, -6.4160e-09,\n -7.1092e-09, -6.8126e-09, -6.3108e-09, 1.9207e-09, -5.6128e-09,\n -7.0384e-09, -5.6536e-09, -6.9816e-09, 2.2522e-09, 2.3158e-09,\n 2.2393e-09, -6.2402e-09, -6.4448e-09, 2.1518e-09, 2.3090e-09,\n -6.3082e-09, -6.6626e-09, 2.1113e-09, 2.2386e-09, 2.0696e-09,\n -6.0509e-09, -5.7379e-09, 2.1102e-09, -6.5754e-09, -5.9771e-09,\n 2.2604e-09, 2.3074e-09, -6.0936e-09, -6.8337e-09, 2.0725e-09,\n -6.0424e-09, -6.7350e-09, -6.0281e-09, -6.6934e-09, -5.5710e-09,\n -5.7427e-09, -6.2103e-09, 2.1679e-09, 2.1729e-09, -6.1300e-09,\n -6.2037e-09, 2.0990e-09, 2.2822e-09, -7.0173e-09, 2.1465e-09,\n 2.0641e-09, -6.0035e-09, 2.1439e-09, -6.1909e-09, -6.3802e-09,\n -6.2307e-09, -7.3534e-09, 2.1517e-09, 2.3025e-09, -5.9371e-09,\n 4.8980e-10, -6.7675e-09, -4.7946e-09, 2.2743e-09, 2.0609e-09,\n -6.4755e-09, -5.7275e-09, 1.9374e-09, -6.2437e-09, 2.3503e-09,\n -6.2408e-09, -7.3758e-09, 2.1298e-09, -6.3391e-09, 2.2637e-09,\n -6.2040e-09, 2.2593e-09, 2.2050e-09, 2.2531e-09, -6.6459e-09,\n 1.8462e-09, 2.1580e-09, 2.2602e-09, -5.4273e-09, 2.2288e-09,\n -6.2884e-09, 2.2803e-09, -7.1392e-09, 2.2031e-09, 2.2326e-09,\n 2.1349e-09, -5.9667e-09, -5.9045e-09, 2.2659e-09, 2.1056e-09,\n 2.1051e-09, -6.6145e-09, -6.3307e-09, 2.2780e-09, 2.0093e-09,\n -6.1851e-09, -5.1812e-09, -6.0565e-09, 2.2969e-09, 1.9882e-09,\n -7.0762e-09, 2.2820e-09, 2.2359e-09, -5.9092e-09, -6.7533e-09,\n -6.5380e-09, 2.2260e-09, -6.7989e-09, -6.5870e-09, 2.2121e-09,\n 2.1971e-09, -6.0192e-09, -6.5631e-09, 2.1997e-09, -6.1168e-09,\n 2.2756e-09, 2.2606e-09, -6.8092e-09, 2.2739e-09, 2.1582e-09,\n 2.1681e-09, 2.1564e-09, -6.2064e-09, 2.1760e-09, -6.0853e-09,\n -6.2277e-09, -5.9170e-09, 2.2594e-09, 2.1439e-09, 2.1893e-09,\n -6.5396e-09, 2.0690e-09, 2.0897e-09, -6.6359e-09, -6.3361e-09,\n 2.3231e-09, -7.0881e-09],\n [ 1.0869e-09, 1.1885e-09, 1.1920e-09, 1.1311e-09, 1.1414e-09,\n -3.0483e-09, 1.1193e-09, -3.5037e-09, 1.2174e-09, -2.6112e-09,\n 1.0630e-09, -3.0773e-09, -2.8974e-09, 1.1643e-09, -2.9604e-09,\n -2.7136e-09, -3.5282e-09, -2.5750e-09, 1.2207e-09, 1.2151e-09,\n 1.1840e-09, 1.1515e-09, 1.1522e-09, 1.1798e-09, -3.2437e-09,\n 1.1131e-09, -3.2379e-09, -3.6969e-09, 1.1910e-09, 1.1405e-09,\n 8.9192e-10, -3.1237e-09, -2.9705e-09, -3.4075e-09, -2.6877e-09,\n -2.9147e-09, -2.8029e-09, -3.0786e-09, -2.9581e-09, -2.7540e-09,\n 1.0988e-09, 1.1778e-09, -3.5852e-09, 1.2070e-09, 1.1182e-09,\n 1.0711e-09, -3.2516e-09, 1.1753e-09, 1.2374e-09, 1.1659e-09,\n 1.1946e-09, -2.8100e-09, -3.3504e-09, 1.1557e-09, -3.0651e-09,\n -3.5470e-09, -3.2695e-09, -3.1293e-09, 9.7853e-10, -2.6891e-09,\n -3.4934e-09, -2.6391e-09, -3.5008e-09, 1.1881e-09, 1.2285e-09,\n 1.1791e-09, -2.9471e-09, -3.1140e-09, 1.1231e-09, 1.2241e-09,\n -2.8968e-09, -3.2789e-09, 1.0992e-09, 1.1783e-09, 1.0736e-09,\n -2.8141e-09, -2.5999e-09, 1.1009e-09, -3.2854e-09, -2.8260e-09,\n 1.1918e-09, 1.2243e-09, -2.8442e-09, -3.4271e-09, 1.0737e-09,\n -2.8814e-09, -3.3237e-09, -2.8685e-09, -3.3257e-09, -2.6054e-09,\n -2.6529e-09, -2.9615e-09, 1.1334e-09, 1.1380e-09, -2.8517e-09,\n -3.1262e-09, 1.0897e-09, 1.2086e-09, -3.4690e-09, 1.1198e-09,\n 1.0688e-09, -2.7708e-09, 1.1186e-09, -2.8363e-09, -3.0131e-09,\n -3.0635e-09, -3.7286e-09, 1.1258e-09, 1.2190e-09, -2.8331e-09,\n 2.2767e-10, -3.3027e-09, -2.1882e-09, 1.2036e-09, 1.0673e-09,\n -3.1931e-09, -2.7093e-09, 9.8874e-10, -2.9271e-09, 1.2500e-09,\n -3.0417e-09, -3.6320e-09, 1.1113e-09, -3.1733e-09, 1.1954e-09,\n -3.0909e-09, 1.1913e-09, 1.1576e-09, 1.1898e-09, -3.2619e-09,\n 9.3368e-10, 1.1302e-09, 1.1921e-09, -2.6088e-09, 1.1811e-09,\n -2.9875e-09, 1.2051e-09, -3.4851e-09, 1.1583e-09, 1.1779e-09,\n 1.1142e-09, -2.8506e-09, -2.7236e-09, 1.1965e-09, 1.0965e-09,\n 1.0959e-09, -3.2011e-09, -3.0596e-09, 1.2043e-09, 1.0352e-09,\n -2.9937e-09, -2.3359e-09, -2.8481e-09, 1.2151e-09, 1.0346e-09,\n -3.4109e-09, 1.2042e-09, 1.1809e-09, -2.7524e-09, -3.3116e-09,\n -3.2361e-09, 1.1713e-09, -3.2084e-09, -3.2310e-09, 1.1616e-09,\n 1.1590e-09, -2.7705e-09, -3.1270e-09, 1.1550e-09, -2.9045e-09,\n 1.2053e-09, 1.1958e-09, -3.3633e-09, 1.2033e-09, 1.1303e-09,\n 1.1346e-09, 1.1305e-09, -3.0116e-09, 1.1394e-09, -2.8662e-09,\n -2.9990e-09, -2.8456e-09, 1.2007e-09, 1.1206e-09, 1.1486e-09,\n -3.2343e-09, 1.0707e-09, 1.0872e-09, -3.1772e-09, -3.0519e-09,\n 1.2309e-09, -3.5234e-09],\n [-1.3995e-09, -1.5146e-09, -1.5201e-09, -1.4501e-09, -1.4614e-09,\n 4.1414e-09, -1.4331e-09, 4.6473e-09, -1.5492e-09, 3.5758e-09,\n -1.3723e-09, 4.2189e-09, 3.9549e-09, -1.4878e-09, 3.9975e-09,\n 3.8043e-09, 4.6709e-09, 3.6074e-09, -1.5531e-09, -1.5460e-09,\n -1.5120e-09, -1.4742e-09, -1.4734e-09, -1.5069e-09, 4.3101e-09,\n -1.4295e-09, 4.3175e-09, 4.8606e-09, -1.5190e-09, -1.4623e-09,\n -1.1786e-09, 4.1548e-09, 4.0347e-09, 4.5360e-09, 3.6737e-09,\n 3.9698e-09, 3.8385e-09, 4.2263e-09, 4.0014e-09, 3.8341e-09,\n -1.4120e-09, -1.5037e-09, 4.6558e-09, -1.5370e-09, -1.4320e-09,\n -1.3799e-09, 4.3423e-09, -1.5008e-09, -1.5717e-09, -1.4909e-09,\n -1.5201e-09, 3.8084e-09, 4.4808e-09, -1.4794e-09, 4.1713e-09,\n 4.6918e-09, 4.4370e-09, 4.1566e-09, -1.2765e-09, 3.6532e-09,\n 4.6362e-09, 3.6502e-09, 4.6146e-09, -1.5152e-09, -1.5611e-09,\n -1.5055e-09, 4.0431e-09, 4.2073e-09, -1.4421e-09, -1.5561e-09,\n 4.0485e-09, 4.3751e-09, -1.4137e-09, -1.5048e-09, -1.3840e-09,\n 3.8995e-09, 3.6675e-09, -1.4140e-09, 4.3405e-09, 3.8727e-09,\n -1.5204e-09, -1.5555e-09, 3.9319e-09, 4.5180e-09, -1.3854e-09,\n 3.9271e-09, 4.4272e-09, 3.9148e-09, 4.4107e-09, 3.5960e-09,\n 3.6933e-09, 4.0367e-09, -1.4538e-09, -1.4581e-09, 3.9506e-09,\n 4.1088e-09, -1.4041e-09, -1.5374e-09, 4.6170e-09, -1.4383e-09,\n -1.3795e-09, 3.8596e-09, -1.4367e-09, 3.9702e-09, 4.1322e-09,\n 4.0913e-09, 4.8799e-09, -1.4433e-09, -1.5510e-09, 3.8599e-09,\n -3.1593e-10, 4.4318e-09, 3.0728e-09, -1.5318e-09, -1.3774e-09,\n 4.2566e-09, 3.7130e-09, -1.2882e-09, 4.0338e-09, -1.5858e-09,\n 4.0850e-09, 4.8460e-09, -1.4272e-09, 4.1876e-09, -1.5235e-09,\n 4.0912e-09, -1.5197e-09, -1.4809e-09, -1.5163e-09, 4.3602e-09,\n -1.2237e-09, -1.4481e-09, -1.5204e-09, 3.5366e-09, -1.5018e-09,\n 4.0822e-09, -1.5351e-09, 4.6762e-09, -1.4804e-09, -1.5021e-09,\n -1.4307e-09, 3.8790e-09, 3.7941e-09, -1.5249e-09, -1.4100e-09,\n -1.4096e-09, 4.3198e-09, 4.1329e-09, -1.5337e-09, -1.3405e-09,\n 4.0392e-09, 3.3055e-09, 3.9168e-09, -1.5468e-09, -1.3310e-09,\n 4.6150e-09, -1.5354e-09, -1.5048e-09, 3.8102e-09, 4.4308e-09,\n 4.3028e-09, -1.4961e-09, 4.4032e-09, 4.3219e-09, -1.4858e-09,\n -1.4780e-09, 3.8661e-09, 4.2629e-09, -1.4773e-09, 3.9696e-09,\n -1.5332e-09, -1.5223e-09, 4.4784e-09, -1.5315e-09, -1.4482e-09,\n -1.4545e-09, -1.4475e-09, 4.0567e-09, -1.4600e-09, 3.9378e-09,\n 4.0598e-09, 3.8566e-09, -1.5240e-09, -1.4377e-09, -1.4700e-09,\n 4.3015e-09, -1.3824e-09, -1.3989e-09, 4.3187e-09, 4.1322e-09,\n -1.5653e-09, 4.6714e-09],\n [-1.7823e-09, -1.9277e-09, -1.9346e-09, -1.8462e-09, -1.8604e-09,\n 5.2976e-09, -1.8246e-09, 5.9356e-09, -1.9716e-09, 4.5763e-09,\n -1.7479e-09, 5.3958e-09, 5.0576e-09, -1.8939e-09, 5.1108e-09,\n 4.8694e-09, 5.9669e-09, 4.6200e-09, -1.9764e-09, -1.9674e-09,\n -1.9247e-09, -1.8767e-09, -1.8756e-09, -1.9181e-09, 5.5059e-09,\n -1.8201e-09, 5.5168e-09, 6.2072e-09, -1.9333e-09, -1.8617e-09,\n -1.5028e-09, 5.3085e-09, 5.1594e-09, 5.7964e-09, 4.7019e-09,\n 5.0770e-09, 4.9108e-09, 5.4074e-09, 5.1148e-09, 4.9081e-09,\n -1.7981e-09, -1.9139e-09, 5.9399e-09, -1.9559e-09, -1.8231e-09,\n -1.7574e-09, 5.5471e-09, -1.9103e-09, -2.0000e-09, -1.8979e-09,\n -1.9347e-09, 4.8718e-09, 5.7261e-09, -1.8833e-09, 5.3352e-09,\n 5.9895e-09, 5.6721e-09, 5.3104e-09, -1.6265e-09, 4.6722e-09,\n 5.9205e-09, 4.6693e-09, 5.8926e-09, -1.9285e-09, -1.9864e-09,\n -1.9163e-09, 5.1692e-09, 5.3776e-09, -1.8363e-09, -1.9803e-09,\n 5.1820e-09, 5.5920e-09, -1.8002e-09, -1.9154e-09, -1.7627e-09,\n 4.9883e-09, 4.6980e-09, -1.8006e-09, 5.5428e-09, 4.9545e-09,\n -1.9351e-09, -1.9793e-09, 5.0302e-09, 5.7688e-09, -1.7644e-09,\n 5.0260e-09, 5.6557e-09, 5.0061e-09, 5.6359e-09, 4.6043e-09,\n 4.7268e-09, 5.1614e-09, -1.8509e-09, -1.8564e-09, 5.0551e-09,\n 5.2466e-09, -1.7881e-09, -1.9565e-09, 5.8968e-09, -1.8314e-09,\n -1.7569e-09, 4.9407e-09, -1.8293e-09, 5.0831e-09, 5.2861e-09,\n 5.2296e-09, 6.2267e-09, -1.8376e-09, -1.9737e-09, 4.9374e-09,\n -4.0390e-10, 5.6644e-09, 3.9318e-09, -1.9493e-09, -1.7543e-09,\n 5.4386e-09, 4.7486e-09, -1.6414e-09, 5.1644e-09, -2.0178e-09,\n 5.2190e-09, 6.1868e-09, -1.8174e-09, 5.3479e-09, -1.9389e-09,\n 5.2284e-09, -1.9343e-09, -1.8851e-09, -1.9300e-09, 5.5711e-09,\n -1.5599e-09, -1.8435e-09, -1.9351e-09, 4.5238e-09, -1.9114e-09,\n 5.2214e-09, -1.9537e-09, 5.9733e-09, -1.8846e-09, -1.9118e-09,\n -1.8217e-09, 4.9633e-09, 4.8574e-09, -1.9406e-09, -1.7954e-09,\n -1.7949e-09, 5.5212e-09, 5.2850e-09, -1.9520e-09, -1.7076e-09,\n 5.1662e-09, 4.2375e-09, 5.0125e-09, -1.9685e-09, -1.6954e-09,\n 5.9000e-09, -1.9543e-09, -1.9154e-09, 4.8769e-09, 5.6580e-09,\n 5.4969e-09, -1.9045e-09, 5.6317e-09, 5.5223e-09, -1.8913e-09,\n -1.8814e-09, 4.9503e-09, 5.4538e-09, -1.8805e-09, 5.0798e-09,\n -1.9512e-09, -1.9372e-09, 5.7199e-09, -1.9490e-09, -1.8438e-09,\n -1.8517e-09, -1.8429e-09, 5.1863e-09, -1.8588e-09, 5.0402e-09,\n 5.1925e-09, 4.9332e-09, -1.9395e-09, -1.8303e-09, -1.8713e-09,\n 5.4948e-09, -1.7607e-09, -1.7815e-09, 5.5219e-09, 5.2831e-09,\n -1.9920e-09, 5.9663e-09]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.2103e-09, 3.9132e-09, 5.1490e-09, 3.9304e-09, 4.4618e-09, 1.2152e-08,\n 3.8696e-09, 1.5247e-08, 4.5847e-09, 1.6873e-08, 3.9782e-09, 1.3898e-08,\n 8.0886e-09, 5.1426e-09, 1.2347e-08, 1.2424e-08, 1.7164e-08, 1.2982e-08,\n 4.8666e-09, 4.2183e-09, 4.3596e-09, 4.7873e-09, 4.6777e-09, 5.2233e-09,\n 1.6405e-08, 3.7126e-09, 1.1650e-08, 2.3713e-08, 4.2449e-09, 3.8373e-09,\n 2.7106e-09, 1.2022e-08, 1.8035e-08, 1.6220e-08, 1.4438e-08, 1.2331e-08,\n 1.3014e-08, 7.7549e-09, 1.3619e-08, 1.9513e-08, 3.5043e-09, 3.9459e-09,\n 1.5510e-08, 4.7481e-09, 3.2050e-09, 3.2906e-09, 1.0716e-08, 4.6706e-09,\n 4.5680e-09, 4.4030e-09, 5.1120e-09, 1.6072e-08, 2.5679e-08, 3.3551e-09,\n 1.2022e-08, 9.1969e-09, 1.7486e-08, 1.2386e-08, 2.0134e-09, 1.3828e-08,\n 1.6435e-08, 1.0432e-09, 1.6894e-08, 4.5411e-09, 4.8372e-09, 4.2050e-09,\n 1.0127e-08, 1.3746e-08, 3.6661e-09, 4.3969e-09, 1.2577e-08, 2.3046e-08,\n 3.5528e-09, 4.4185e-09, 3.7898e-09, 1.5578e-08, 1.2214e-08, 3.4229e-09,\n 1.2039e-08, 1.2485e-08, 4.7737e-09, 5.1373e-09, 1.7389e-08, 8.6423e-09,\n 3.3350e-09, 9.5215e-09, 1.4200e-08, 1.6608e-08, 1.8038e-08, 1.3994e-08,\n 1.3133e-08, 1.2011e-08, 4.3612e-09, 3.8345e-09, 1.4337e-08, 9.1923e-09,\n 4.8090e-09, 5.0767e-09, 1.5198e-08, 4.0759e-09, 3.5614e-09, 1.3699e-08,\n 2.8612e-09, 8.8628e-09, 1.8172e-08, 1.4935e-08, 1.2910e-08, 3.8124e-09,\n 4.4996e-09, 1.0603e-08, 4.0211e-10, 2.0487e-08, 9.3249e-10, 4.5868e-09,\n 3.1031e-09, 1.6118e-08, 9.5824e-09, 3.4939e-09, 1.1583e-08, 5.5733e-09,\n 1.3746e-08, 1.1922e-08, 4.0309e-09, 1.1033e-08, 4.5500e-09, 1.2171e-08,\n 4.4136e-09, 3.6358e-09, 4.6385e-09, 1.6437e-08, 2.8469e-09, 4.9118e-09,\n 4.4335e-09, 1.3067e-08, 3.2079e-09, 1.3071e-08, 4.6641e-09, 1.2563e-08,\n 3.7143e-09, 4.8214e-09, 4.6085e-09, 1.8757e-08, 1.4962e-08, 4.8845e-09,\n 4.0600e-09, 4.2718e-09, 1.4925e-08, 1.1589e-08, 3.6823e-09, 3.3329e-09,\n 2.0572e-08, 1.7442e-08, 1.3222e-08, 4.7290e-09, 3.1597e-09, 8.3720e-09,\n 4.6761e-09, 3.5046e-09, 1.2424e-08, 1.1569e-08, 1.2136e-08, 5.2255e-09,\n 2.1302e-08, 1.4622e-08, 4.7674e-09, 3.7534e-09, 1.6921e-08, 1.1813e-08,\n 5.0743e-09, 1.2097e-08, 4.4135e-09, 4.2370e-09, 1.2885e-08, 4.4093e-09,\n 3.8184e-09, 4.7711e-09, 3.5888e-09, 8.3540e-09, 3.8590e-09, 1.4477e-08,\n 1.2293e-08, 1.2526e-08, 4.7381e-09, 5.0048e-09, 5.2909e-09, 1.8447e-08,\n 4.9140e-09, 2.6004e-09, 1.1498e-08, 1.7059e-08, 5.7356e-09, 1.7930e-08],\n [3.7253e-10, 4.5174e-10, 5.9428e-10, 4.5523e-10, 5.1681e-10, 1.4292e-09,\n 4.4757e-10, 1.7835e-09, 5.2867e-10, 1.9713e-09, 4.6014e-10, 1.6238e-09,\n 9.5653e-10, 5.9062e-10, 1.4549e-09, 1.4628e-09, 2.0169e-09, 1.5300e-09,\n 5.5849e-10, 4.8812e-10, 5.0341e-10, 5.5372e-10, 5.3843e-10, 6.0008e-10,\n 1.9153e-09, 4.3152e-10, 1.3741e-09, 2.7571e-09, 4.8879e-10, 4.4475e-10,\n 3.1768e-10, 1.4095e-09, 2.1097e-09, 1.9057e-09, 1.6947e-09, 1.4510e-09,\n 1.5391e-09, 9.1416e-10, 1.5958e-09, 2.2739e-09, 4.0822e-10, 4.5470e-10,\n 1.8237e-09, 5.4797e-10, 3.7041e-10, 3.8220e-10, 1.2664e-09, 5.3922e-10,\n 5.2612e-10, 5.0779e-10, 5.8880e-10, 1.8828e-09, 3.0032e-09, 3.8800e-10,\n 1.4177e-09, 1.0825e-09, 2.0405e-09, 1.4567e-09, 2.3345e-10, 1.6227e-09,\n 1.9338e-09, 1.2444e-10, 1.9719e-09, 5.2376e-10, 5.5624e-10, 4.8690e-10,\n 1.1961e-09, 1.6211e-09, 4.2561e-10, 5.0776e-10, 1.4715e-09, 2.6939e-09,\n 4.1015e-10, 5.0995e-10, 4.3814e-10, 1.8330e-09, 1.4378e-09, 3.9927e-10,\n 1.4224e-09, 1.4656e-09, 5.5208e-10, 5.9262e-10, 2.0405e-09, 1.0173e-09,\n 3.8841e-10, 1.1229e-09, 1.6602e-09, 1.9465e-09, 2.1119e-09, 1.6472e-09,\n 1.5447e-09, 1.4104e-09, 5.0306e-10, 4.4395e-10, 1.6714e-09, 1.0833e-09,\n 5.5395e-10, 5.8434e-10, 1.7820e-09, 4.7231e-10, 4.1305e-10, 1.6119e-09,\n 3.3089e-10, 1.0519e-09, 2.1234e-09, 1.7453e-09, 1.5168e-09, 4.4192e-10,\n 5.1999e-10, 1.2503e-09, 4.4773e-11, 2.3895e-09, 1.1023e-10, 5.2887e-10,\n 3.6245e-10, 1.8898e-09, 1.1336e-09, 4.0491e-10, 1.3649e-09, 6.3915e-10,\n 1.6066e-09, 1.4078e-09, 4.6765e-10, 1.2990e-09, 5.2460e-10, 1.4264e-09,\n 5.0911e-10, 4.2152e-10, 5.3473e-10, 1.9300e-09, 3.3192e-10, 5.6601e-10,\n 5.1164e-10, 1.5382e-09, 3.7159e-10, 1.5345e-09, 5.4016e-10, 1.4841e-09,\n 4.3046e-10, 5.5569e-10, 5.3169e-10, 2.2001e-09, 1.7543e-09, 5.6257e-10,\n 4.7015e-10, 4.9476e-10, 1.7564e-09, 1.3635e-09, 4.2633e-10, 3.8707e-10,\n 2.4115e-09, 2.0406e-09, 1.5579e-09, 5.4536e-10, 3.6703e-10, 9.8383e-10,\n 5.3948e-10, 4.0743e-10, 1.4562e-09, 1.3606e-09, 1.4255e-09, 5.9960e-10,\n 2.4999e-09, 1.7135e-09, 5.5101e-10, 4.3388e-10, 1.9749e-09, 1.3933e-09,\n 5.8534e-10, 1.4215e-09, 5.0829e-10, 4.9016e-10, 1.5006e-09, 5.0779e-10,\n 4.4200e-10, 5.4846e-10, 4.1596e-10, 9.8519e-10, 4.4632e-10, 1.6965e-09,\n 1.4492e-09, 1.4707e-09, 5.4608e-10, 5.7701e-10, 6.0914e-10, 2.1478e-09,\n 5.6653e-10, 3.0466e-10, 1.3516e-09, 2.0013e-09, 6.5932e-10, 2.0933e-09],\n [2.7849e-10, 3.4243e-10, 4.5223e-10, 3.4230e-10, 3.8966e-10, 1.0285e-09,\n 3.3745e-10, 1.3057e-09, 4.0215e-10, 1.4413e-09, 3.4696e-10, 1.1910e-09,\n 6.8367e-10, 4.5239e-10, 1.0474e-09, 1.0529e-09, 1.4662e-09, 1.1022e-09,\n 4.2786e-10, 3.6809e-10, 3.8084e-10, 4.1898e-10, 4.1076e-10, 4.5971e-10,\n 1.4090e-09, 3.2313e-10, 9.8820e-10, 2.0507e-09, 3.7218e-10, 3.3409e-10,\n 2.3440e-10, 1.0214e-09, 1.5438e-09, 1.3887e-09, 1.2336e-09, 1.0516e-09,\n 1.1040e-09, 6.5235e-10, 1.1667e-09, 1.6790e-09, 3.0401e-10, 3.4526e-10,\n 1.3228e-09, 4.1716e-10, 2.7937e-10, 2.8564e-10, 9.0960e-10, 4.0904e-10,\n 4.0047e-10, 3.8569e-10, 4.4887e-10, 1.3807e-09, 2.2124e-09, 2.9181e-10,\n 1.0206e-09, 7.7661e-10, 1.5075e-09, 1.0535e-09, 1.7622e-10, 1.1785e-09,\n 1.4002e-09, 8.7814e-11, 1.4523e-09, 3.9817e-10, 4.2560e-10, 3.6636e-10,\n 8.5884e-10, 1.1720e-09, 3.1890e-10, 3.8467e-10, 1.0733e-09, 1.9847e-09,\n 3.1118e-10, 3.8658e-10, 3.3056e-10, 1.3268e-09, 1.0388e-09, 2.9628e-10,\n 1.0214e-09, 1.0636e-09, 4.1805e-10, 4.5163e-10, 1.4899e-09, 7.3031e-10,\n 2.8907e-10, 8.0624e-10, 1.2100e-09, 1.4247e-09, 1.5448e-09, 1.1907e-09,\n 1.1187e-09, 1.0188e-09, 3.8183e-10, 3.3425e-10, 1.2248e-09, 7.7728e-10,\n 4.2250e-10, 4.4680e-10, 1.3005e-09, 3.5491e-10, 3.1001e-10, 1.1650e-09,\n 2.4986e-10, 7.4635e-10, 1.5595e-09, 1.2771e-09, 1.0993e-09, 3.3177e-10,\n 3.9367e-10, 8.9460e-10, 3.7567e-11, 1.7625e-09, 7.8446e-11, 4.0192e-10,\n 2.6902e-10, 1.3807e-09, 8.1198e-10, 3.0423e-10, 9.8074e-10, 4.9208e-10,\n 1.1738e-09, 1.0092e-09, 3.5088e-10, 9.3557e-10, 3.9924e-10, 1.0346e-09,\n 3.8652e-10, 3.1626e-10, 4.0676e-10, 1.4077e-09, 2.4623e-10, 4.3160e-10,\n 3.8801e-10, 1.1114e-09, 2.7931e-10, 1.1138e-09, 4.0760e-10, 1.0651e-09,\n 3.2303e-10, 4.2410e-10, 4.0381e-10, 1.6016e-09, 1.2761e-09, 4.2919e-10,\n 3.5425e-10, 3.7302e-10, 1.2753e-09, 9.8494e-10, 3.2070e-10, 2.8876e-10,\n 1.7625e-09, 1.4966e-09, 1.1251e-09, 4.1528e-10, 2.7386e-10, 7.1577e-10,\n 4.1012e-10, 3.0410e-10, 1.0558e-09, 9.8489e-10, 1.0296e-09, 4.6124e-10,\n 1.8284e-09, 1.2525e-09, 4.1751e-10, 3.2752e-10, 1.4513e-09, 1.0024e-09,\n 4.4595e-10, 1.0389e-09, 3.8686e-10, 3.7007e-10, 1.1044e-09, 3.8658e-10,\n 3.3261e-10, 4.1907e-10, 3.1232e-10, 7.0650e-10, 3.3630e-10, 1.2368e-09,\n 1.0404e-09, 1.0639e-09, 4.1590e-10, 4.3977e-10, 4.6570e-10, 1.5773e-09,\n 4.3159e-10, 2.2484e-10, 9.7629e-10, 1.4622e-09, 5.0652e-10, 1.5412e-09],\n [4.2765e-10, 5.2014e-10, 6.8245e-10, 5.2281e-10, 5.9208e-10, 1.6302e-09,\n 5.1484e-10, 2.0370e-09, 6.0886e-10, 2.2616e-09, 5.2925e-10, 1.8579e-09,\n 1.0809e-09, 6.8392e-10, 1.6505e-09, 1.6635e-09, 2.2881e-09, 1.7340e-09,\n 6.4805e-10, 5.6076e-10, 5.8012e-10, 6.3508e-10, 6.2171e-10, 6.9418e-10,\n 2.1910e-09, 4.9243e-10, 1.5562e-09, 3.1620e-09, 5.6476e-10, 5.1018e-10,\n 3.5872e-10, 1.6128e-09, 2.4103e-09, 2.1587e-09, 1.9265e-09, 1.6439e-09,\n 1.7337e-09, 1.0433e-09, 1.8161e-09, 2.6069e-09, 4.6507e-10, 5.2542e-10,\n 2.0689e-09, 6.2924e-10, 4.2692e-10, 4.3776e-10, 1.4280e-09, 6.2033e-10,\n 6.0763e-10, 5.8532e-10, 6.7898e-10, 2.1394e-09, 3.4155e-09, 4.4745e-10,\n 1.6050e-09, 1.2353e-09, 2.3290e-09, 1.6556e-09, 2.6656e-10, 1.8491e-09,\n 2.1925e-09, 1.3878e-10, 2.2550e-09, 6.0316e-10, 6.4250e-10, 5.5926e-10,\n 1.3510e-09, 1.8289e-09, 4.8702e-10, 5.8433e-10, 1.6850e-09, 3.0677e-09,\n 4.7190e-10, 5.8754e-10, 5.0434e-10, 2.0788e-09, 1.6310e-09, 4.5451e-10,\n 1.6053e-09, 1.6697e-09, 6.3301e-10, 6.8075e-10, 2.3155e-09, 1.1600e-09,\n 4.4302e-10, 1.2737e-09, 1.9055e-09, 2.2118e-09, 2.4078e-09, 1.8685e-09,\n 1.7528e-09, 1.6111e-09, 5.7990e-10, 5.0987e-10, 1.9253e-09, 1.2319e-09,\n 6.3846e-10, 6.7354e-10, 2.0275e-09, 5.4190e-10, 4.7332e-10, 1.8305e-09,\n 3.8035e-10, 1.1837e-09, 2.4263e-09, 1.9997e-09, 1.7256e-09, 5.0700e-10,\n 5.9754e-10, 1.4227e-09, 5.2348e-11, 2.7346e-09, 1.2520e-10, 6.0968e-10,\n 4.1103e-10, 2.1483e-09, 1.2773e-09, 4.6464e-10, 1.5510e-09, 7.4002e-10,\n 1.8424e-09, 1.5933e-09, 5.3551e-10, 1.4764e-09, 6.0419e-10, 1.6328e-09,\n 5.8674e-10, 4.8368e-10, 6.1628e-10, 2.1882e-09, 3.7849e-10, 6.5180e-10,\n 5.8942e-10, 1.7448e-09, 4.2681e-10, 1.7474e-09, 6.1870e-10, 1.6763e-09,\n 4.9435e-10, 6.3916e-10, 6.1224e-10, 2.5053e-09, 2.0010e-09, 6.4846e-10,\n 5.3930e-10, 5.6699e-10, 1.9862e-09, 1.5494e-09, 4.9001e-10, 4.4409e-10,\n 2.7421e-09, 2.3264e-09, 1.7635e-09, 6.2740e-10, 4.2082e-10, 1.1153e-09,\n 6.2074e-10, 4.6592e-10, 1.6673e-09, 1.5450e-09, 1.6272e-09, 6.9375e-10,\n 2.8324e-09, 1.9500e-09, 6.3254e-10, 4.9947e-10, 2.2629e-09, 1.5777e-09,\n 6.7256e-10, 1.6060e-09, 5.8716e-10, 5.6290e-10, 1.7277e-09, 5.8647e-10,\n 5.0814e-10, 6.3476e-10, 4.7742e-10, 1.1187e-09, 5.1371e-10, 1.9347e-09,\n 1.6458e-09, 1.6786e-09, 6.2922e-10, 6.6382e-10, 7.0169e-10, 2.4787e-09,\n 6.5207e-10, 3.4426e-10, 1.5396e-09, 2.2716e-09, 7.5974e-10, 2.3929e-09]],\n device='cuda:0')" + }, + "64": { + "step": "tensor(7509.)", + "exp_avg": "tensor([-1.4249e-08, -7.6011e-09, 9.6253e-09, 1.2251e-08], device='cuda:0')", + "exp_avg_sq": "tensor([5.3373e-07, 6.0649e-08, 4.9007e-08, 6.9224e-08], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.005000500000000001, + "name": "shared", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 2, + 3, + 4 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 5, + 6, + 7 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 8, + 9, + 10 + ] + }, + { + "lr": 0.005000500000000001, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 11, + 12, + 13 + ] + }, + { + "lr": 0.0025005, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 5, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 5, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.0025005 + ] + }, + "metrics": { + "best_val_acc": 72.764, + "best_epoch": 4, + "scale_accuracies": { + "256": 70.902, + "512": 72.613 + }, + "training_history": { + "epochs": [ + 1, + 2, + 3, + 4, + 5 + ], + "train_loss": [ + 3.4310503170769358, + 2.1717353230040883, + 2.280789690497776, + 1.9685389901274355, + 1.842365700580385 + ], + "train_acc": [ + 54.52540535308824, + 69.02296890257085, + 72.83585980594255, + 75.20327170462555, + 76.72934129586541 + ], + "val_acc": [ + 66.689, + 68.889, + 71.268, + 72.266, + 72.764 + ], + "scale_accs": { + "256": [ + 66.689, + 68.889, + 70.197, + 70.459, + 70.902 + ], + "512": [ + 70.734, + 72.095, + 72.613 + ] + }, + "lr": [ + 0.00975530705321762, + 0.00904518046337755, + 0.00793913236883622, + 0.00654543046337755, + 0.005000500000000001 + ] + } + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_191456", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": [ + "clip_vit_b32", + "clip_vit_laion_b32" + ], + "num_classes": 1000, + "preset": "balanced", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "scale_warmup_epochs_override": { + "256": 0, + "512": 2, + "768": 5, + "1024": 8 + }, + "num_epochs": 10, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.2, + "rose_max_weight": 0.8, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": true, + "cayley_weight": 0.01, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "never", + "freeze_threshold": 90.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/david-shared-space", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file