diff --git "a/weights/checkpoint_epoch_15_metadata.json" "b/weights/checkpoint_epoch_15_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/checkpoint_epoch_15_metadata.json" @@ -0,0 +1,497 @@ +{ + "epoch": 14, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[-5.9878e-05, 1.5145e-04, -8.0526e-06, ..., -2.3528e-05,\n -8.7828e-06, -1.7757e-05],\n [ 2.1625e-05, -5.5180e-06, -6.7915e-06, ..., 4.0538e-05,\n -3.8513e-05, -2.6659e-05],\n [-2.6284e-05, 6.7238e-05, -4.8466e-05, ..., 2.9354e-05,\n -1.4989e-05, -3.1615e-05],\n ...,\n [ 4.0357e-05, 9.3667e-06, -3.2310e-05, ..., 2.0847e-05,\n 3.6688e-06, 8.8120e-07],\n [-1.1356e-14, 4.9165e-15, 1.1260e-14, ..., 6.7834e-15,\n 4.8789e-15, -1.7235e-14],\n [-6.6361e-05, 5.0332e-05, -1.1280e-05, ..., -1.4861e-05,\n 4.2881e-05, 2.7634e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.8155e-08, 6.0153e-08, 7.2310e-09, ..., 1.7531e-08, 1.6602e-08,\n 4.3246e-09],\n [1.1362e-07, 7.6477e-08, 2.8919e-08, ..., 5.1424e-08, 2.4834e-08,\n 3.8752e-08],\n [6.0382e-08, 8.7293e-08, 2.0518e-08, ..., 2.3148e-08, 1.5363e-08,\n 1.9589e-08],\n ...,\n [1.5971e-07, 9.2553e-08, 1.8061e-08, ..., 2.2711e-08, 1.7362e-08,\n 1.1314e-08],\n [1.0775e-11, 2.9446e-11, 4.9464e-12, ..., 9.0051e-12, 7.8304e-12,\n 6.5481e-12],\n [1.0519e-07, 7.4661e-08, 1.0791e-08, ..., 1.4872e-08, 2.4900e-08,\n 1.1094e-08]], device='cuda:0')" + }, + "1": { + "step": "tensor(7512.)", + "exp_avg": "tensor([-1.5547e-03, 2.0071e-03, 3.8861e-04, -1.6003e-03, -1.2329e-04,\n -5.0467e-05, -2.0337e-03, 2.4382e-04, 2.0350e-04, -2.1919e-05,\n 9.4562e-04, 4.2298e-04, 5.6052e-45, -3.5132e-04, 5.0270e-04,\n 9.9834e-04, 1.8121e-03, -6.9888e-04, 3.1928e-03, 5.6052e-45,\n -1.0468e-03, -1.2197e-03, 6.0330e-05, -4.6202e-03, 5.6052e-45,\n -1.1990e-03, -7.7044e-04, 1.9845e-03, -3.0753e-03, -9.7927e-04,\n -8.3084e-04, -1.5502e-03, 3.9647e-39, 5.6052e-45, 1.2096e-03,\n 1.2746e-03, 3.8038e-04, -3.0524e-04, 1.3418e-03, -3.0765e-04,\n -2.2403e-03, -4.2497e-04, 3.0549e-03, -5.8180e-04, 7.2136e-04,\n -7.7200e-04, 4.3062e-04, 2.4294e-04, -7.5414e-04, 9.3444e-04,\n 7.2593e-04, 5.6052e-45, 1.4848e-36, 5.6052e-45, 4.3734e-04,\n 2.5002e-03, 1.2859e-03, 5.1210e-04, 9.3318e-04, -2.3428e-03,\n 1.2630e-03, 3.5347e-03, -4.0320e-03, 5.1594e-04, 8.6356e-04,\n 8.7518e-04, 1.0819e-03, 5.6052e-45, 1.4315e-03, -3.6576e-03,\n 1.4177e-04, 5.6052e-45, 2.0659e-03, 2.4878e-03, 5.4197e-05,\n -7.9008e-04, 1.3947e-04, 2.1853e-03, -4.5762e-04, -2.2738e-03,\n -1.3668e-03, -3.8966e-03, 2.0002e-03, 2.1052e-04, -1.8271e-04,\n -1.5527e-03, 1.3430e-03, -1.0526e-03, -2.0740e-03, 2.9635e-04,\n 5.6052e-45, 1.9730e-03, 7.7154e-04, -2.9497e-03, 1.0967e-03,\n 2.3390e-04, -1.6306e-03, 1.8335e-03, 2.3571e-04, 7.4950e-04,\n 2.4118e-03, -1.2847e-05, 5.5689e-04, -1.9352e-03, 3.6302e-03,\n 1.5116e-03, 4.2673e-04, -1.7794e-03, -1.5053e-03, 1.0194e-03,\n 1.7510e-03, -7.3855e-04, 5.6052e-45, 1.4297e-03, -4.4048e-04,\n 2.3473e-03, 4.1661e-04, -8.0947e-04, 3.6507e-12, 7.2896e-13,\n 5.6052e-45, 1.2639e-03, 5.6052e-45, -1.9938e-03, 4.6707e-04,\n 4.4636e-03, 1.5068e-03, 5.6052e-45, 1.2132e-04, -7.4410e-04,\n 6.2267e-04, 2.3974e-03, -1.8230e-03, 7.5307e-08, -6.6941e-04,\n -7.8193e-04, 1.7270e-04, -2.8829e-04, 1.2755e-03, -1.1605e-03,\n -8.5571e-05, -2.1510e-03, -5.6207e-04, 1.5339e-03, -2.8894e-03,\n 5.1715e-06, -5.6052e-45, -1.1987e-04, -1.9933e-03, -6.6907e-04,\n 6.1965e-24, 2.0977e-04, 1.2725e-03, 5.6052e-45, 1.8509e-04,\n -4.2754e-04, 5.6052e-45, 2.1769e-03, 1.0384e-04, 1.6764e-03,\n 6.1372e-04, 4.7413e-04, 2.0586e-03, -3.0762e-03, 3.1514e-04,\n 1.4208e-03, 2.0734e-05, -9.7741e-04, 2.0391e-03, 1.6240e-14,\n 5.6052e-45, -1.8627e-03, -4.6027e-03, 4.1200e-04, 2.7234e-03,\n 1.7108e-03, -3.2127e-04, -9.4812e-04, 5.6052e-45, -3.9411e-03,\n 4.9644e-04, -3.6147e-03, 7.6441e-05, 5.6052e-45, 6.4152e-04,\n 1.4312e-03, -6.1238e-04, 4.7829e-04, 3.2941e-03, 5.6052e-45,\n 5.6052e-45, -1.2758e-03, -8.9139e-04, -3.5562e-04, 1.6980e-04,\n 7.5192e-04, -1.5328e-03, -1.0477e-03, 5.6052e-45, -4.7442e-04,\n -1.0503e-03, -2.0941e-04, -5.1279e-03, 8.2279e-05, 1.9237e-03,\n 5.6052e-45, -1.2216e-04, -1.4893e-03, 1.4088e-03, -1.7684e-04,\n 1.5857e-03, -7.5531e-06, 8.8005e-04, 1.2262e-03, -2.2247e-04,\n 1.1890e-03, 3.6135e-05, -1.3650e-03, 1.1019e-04, -6.6627e-04,\n 4.1960e-04, 2.8608e-03, 3.2458e-03, 4.4798e-04, 5.5125e-05,\n 8.9767e-04, -2.0677e-03, 3.0639e-03, -8.6294e-04, 3.7573e-03,\n -7.2673e-04, 1.3558e-03, 8.0360e-04, 5.6052e-45, -1.1249e-03,\n -1.0344e-03, 6.8387e-04, 5.2949e-04, -2.9708e-03, 3.1472e-03,\n 1.4993e-03, 7.5004e-04, -2.9475e-03, 5.6349e-04, 4.1170e-03,\n -1.9166e-03, -3.0506e-03, -1.4954e-03, 8.3250e-04, 1.7061e-04,\n 4.2885e-04, -9.7707e-04, 2.1697e-03, 1.3977e-04, 2.6650e-03,\n 7.0468e-05, 5.6052e-45, -6.6793e-04, 1.7716e-03, 6.5031e-04,\n -1.2921e-03, 9.2144e-04, 8.7300e-04, -2.5781e-04, 1.1980e-04,\n -1.3322e-04, -5.5072e-04, 5.6052e-45, -4.6666e-03, 9.9524e-04,\n -5.6931e-04, 4.4578e-04, -1.3028e-03, -3.9469e-05, 2.7959e-04,\n -3.4428e-37, 9.3859e-04, 5.6052e-45, 7.5188e-04, -7.9472e-04,\n 3.4993e-03, -1.2679e-03, 9.3277e-04, -2.2040e-03, 3.5127e-04,\n 4.9395e-04, 1.2180e-03, -3.1301e-03, -1.1774e-03, -1.2080e-03,\n -3.8436e-04, 2.8282e-04, 6.7124e-04, 1.4642e-03, 5.4226e-04,\n -1.4612e-04, -1.2069e-03, -2.0727e-03, 2.0539e-06, 9.1835e-04,\n -3.5770e-04, 3.6916e-04, -2.1207e-04, -1.9126e-03, 1.4378e-03,\n 6.6016e-04, 4.3976e-03, 5.6052e-45, 5.6052e-45, 8.1742e-04,\n 3.8007e-04, 2.2362e-03, 1.3500e-03, 2.1797e-03, -8.5256e-04,\n 1.3464e-04, -1.3407e-03, 7.8234e-04, -1.0598e-04, -1.2915e-03,\n 1.9882e-03, 2.4040e-04, 1.1723e-03, 2.8819e-03, -9.9809e-04,\n -8.5895e-04, -1.0056e-04, 4.0674e-04, 2.4588e-03, 1.9608e-04,\n -4.6726e-04, 2.4874e-03, 5.2214e-03, -3.3050e-03, 5.6052e-45,\n -6.1393e-03, 1.2381e-03, -2.0519e-03, -4.9399e-04, -2.0860e-04,\n 4.9380e-04, -7.3929e-04, -1.2120e-04, 1.3940e-03, 4.1112e-04,\n 1.6549e-03, 5.6052e-45, -1.0374e-03, 2.1756e-03, -5.4903e-04,\n 1.0074e-03, -7.2374e-04, 2.6860e-04, -3.0276e-05, 1.0967e-03,\n -1.9524e-03, -1.0634e-03, 9.6021e-04, -4.5806e-03, -9.3920e-04,\n -2.1870e-04, -5.6079e-04, 4.4154e-05, 7.0425e-04, 1.4709e-03,\n 4.2267e-03, -1.2981e-04, 1.3461e-04, 8.6668e-04, -2.7276e-03,\n 4.4787e-04, 1.3476e-03, -3.7332e-04, 1.4239e-03, 1.2915e-03,\n -7.0842e-04, -1.0320e-03, 3.7945e-04, 5.6052e-45, 3.8741e-04,\n 1.2879e-03, 5.6052e-45, -8.9241e-04, -1.3345e-03, -5.3103e-03,\n -2.2584e-03, -1.3471e-03, 1.1087e-03, -8.0159e-04, 2.2230e-03,\n -1.0370e-03, -4.7341e-04, 9.3248e-04, -2.2820e-34, 5.6052e-45,\n 9.5689e-04, 3.0996e-03, 1.0250e-03, 2.5287e-03, 6.3804e-04,\n -1.5270e-03, -2.7615e-04, -2.5267e-04, 6.7697e-04, 2.8193e-03,\n -1.2876e-04, -1.2888e-03, 5.6052e-45, 2.7561e-03, 1.4491e-05,\n 5.6052e-45, 2.1475e-03, 5.6052e-45, 8.4788e-04, -4.7463e-03,\n 1.3473e-03, 8.9887e-04, 5.6052e-45, 1.4217e-03, -4.9090e-03,\n 3.0088e-03, -6.5861e-04, -5.3771e-03, 8.3741e-04, -3.4183e-03,\n 2.4886e-04, 9.8072e-04, 4.5916e-04, -1.0990e-03, 5.6052e-45,\n -5.1696e-04, 4.8440e-04, -4.5510e-03, 5.6052e-45, 1.9971e-03,\n -6.5871e-04, -5.4696e-04, 4.4145e-04, -1.6760e-03, -8.5020e-04,\n -6.2415e-04, -1.8507e-03, 1.4622e-03, -3.2481e-03, -9.2086e-04,\n 3.2666e-04, 8.0873e-04, 5.6052e-45, 2.6666e-04, -2.9023e-03,\n 5.6052e-45, -9.8574e-04, -2.7962e-04, 1.5615e-04, -2.9323e-03,\n 6.7231e-12, 1.8411e-03, -8.3307e-04, 7.1975e-04, 5.6052e-45,\n -9.5483e-05, -7.4895e-04, 5.6052e-45, -1.6758e-04, -5.7952e-04,\n -1.2591e-03, -2.7694e-03, 5.6052e-45, -1.0466e-03, 3.6761e-04,\n -1.5731e-03, 7.2439e-04, -4.0236e-04, 3.7771e-05, 9.5313e-04,\n 2.3660e-03, 5.6052e-45, -1.0273e-03, 5.6052e-45, -1.0929e-03,\n 8.3823e-04, 3.7339e-04, -8.0509e-05, 4.7963e-05, 1.4250e-03,\n -1.1593e-03, -1.6976e-03, -5.5229e-05, -1.5914e-03, -4.2776e-04,\n 5.6052e-45, 2.3358e-04, -4.5627e-03, -3.2662e-03, 1.2686e-04,\n -1.5013e-04, 2.3623e-03, 2.2785e-03, -1.3989e-03, 6.9224e-04,\n -1.1806e-03, 5.6052e-45, 6.8883e-04, -9.5046e-04, -1.2031e-03,\n 5.7518e-05, -8.3869e-03, -2.7011e-03, -1.3202e-03, 5.1939e-04,\n 2.8636e-03, -4.4298e-03, -1.0171e-03, 9.2249e-04, 5.6052e-45,\n 5.6052e-45, -1.1843e-03, 3.6278e-03, -1.4213e-03, 1.7748e-03,\n -2.0510e-03, -1.4381e-03, -1.1727e-03, -1.0068e-03, -5.6593e-04,\n -9.5728e-04, 1.5139e-04, -2.0254e-03, -1.4656e-04, 3.4529e-04,\n 2.1844e-04, -2.2310e-03, 6.5388e-04, 7.3588e-05, 6.4380e-27,\n -2.3054e-03, 1.0189e-03, 5.6052e-45, 5.6052e-45, 9.9006e-04,\n 4.6500e-04, 7.0753e-04, 2.0556e-03, -2.6589e-04, 8.4278e-04,\n 1.4235e-03, -3.4427e-03, 1.2201e-03, -3.8728e-04, 5.5105e-04,\n 1.2138e-03, -2.1050e-03, 1.6906e-03, 2.1028e-03, 6.4352e-04,\n 2.5902e-03, 1.4057e-03, -1.7836e-03, -1.3276e-03, 6.4423e-04,\n 5.5246e-04, -7.6548e-04, -1.1630e-03, -3.0730e-03, 5.2290e-04,\n -2.6136e-04, 2.2622e-04, 1.2981e-03, 7.0619e-04, 8.2525e-04,\n -1.7701e-04, 7.7137e-06, -1.8519e-03, -1.2987e-03, -4.4375e-06,\n 2.4185e-05, 5.6052e-45, -1.1175e-03, 2.8010e-03, -1.5779e-03,\n 8.6225e-04, 5.6052e-45, -1.1465e-03, 7.8957e-04, -7.3214e-04,\n -1.3058e-03, -4.9303e-03, 1.0742e-03, 5.4710e-03, -1.8958e-03,\n 5.6857e-05, -4.5310e-05, 1.5966e-03, 1.0438e-03, -1.7156e-03,\n -1.3517e-03, -1.5308e-03, -2.0269e-03, 2.7084e-03, 1.0559e-05,\n 9.8418e-04, -1.2293e-03, 1.4061e-04, 1.8254e-03, 6.6775e-04,\n -2.9655e-04, 4.3551e-03, 1.7863e-03, -1.8031e-03, -2.1927e-03,\n -1.4684e-03, -3.4495e-04, 1.4723e-04, 2.7523e-04, 4.4844e-04,\n -2.5797e-03, 2.1401e-03, -4.7214e-04, -1.5152e-03, 1.3795e-17,\n 9.1120e-13, -1.4675e-03, 2.3385e-03, -3.2316e-04, 2.5272e-04,\n -8.7744e-04, 7.5234e-05, -5.3239e-04, -4.9507e-04, 2.0739e-03,\n 3.0889e-04, -2.0431e-11, -1.6454e-03, -5.9457e-04, 7.9547e-04,\n -4.1667e-03, -4.7809e-04, 1.6211e-04, 2.9867e-13, -4.0283e-05],\n device='cuda:0')", + "exp_avg_sq": "tensor([1.3021e-05, 5.3796e-05, 2.3192e-05, 1.9576e-05, 1.3870e-05, 4.6743e-06,\n 3.5283e-05, 3.1518e-05, 2.3867e-05, 2.9973e-05, 2.9902e-05, 6.0132e-05,\n 1.2312e-08, 2.8891e-05, 3.1634e-05, 3.4801e-05, 5.2221e-05, 3.4915e-05,\n 2.3299e-05, 3.2055e-09, 2.8631e-05, 1.9722e-05, 2.1528e-05, 3.5841e-05,\n 1.1701e-08, 4.2761e-05, 2.9477e-05, 4.1802e-05, 3.7231e-05, 3.7278e-05,\n 4.3057e-05, 3.2981e-05, 4.1089e-08, 2.0666e-09, 3.4348e-05, 2.8126e-05,\n 3.5782e-05, 2.0518e-05, 2.5256e-05, 3.3177e-05, 3.5363e-05, 2.2497e-05,\n 3.1784e-05, 2.8190e-05, 3.3807e-05, 3.4928e-05, 2.9906e-05, 5.0382e-05,\n 5.8548e-05, 2.6628e-05, 3.6741e-05, 1.2275e-09, 1.9919e-09, 1.8641e-09,\n 4.3087e-05, 6.2912e-05, 2.4602e-05, 2.5460e-05, 3.0583e-06, 3.8701e-05,\n 3.2290e-05, 3.2015e-05, 6.3771e-05, 1.6302e-05, 3.8337e-05, 3.6245e-05,\n 3.1571e-05, 2.2816e-09, 4.9863e-05, 3.5150e-05, 1.2132e-05, 9.3199e-09,\n 3.9410e-05, 2.9904e-05, 1.3435e-05, 4.0259e-05, 4.2287e-05, 4.6600e-05,\n 2.4750e-05, 4.2024e-05, 5.6549e-05, 3.0509e-05, 4.1087e-05, 2.6102e-05,\n 3.1179e-05, 3.3890e-05, 3.0759e-05, 1.9892e-05, 3.3902e-05, 3.4761e-05,\n 1.4549e-08, 4.2006e-05, 2.9632e-05, 2.5834e-05, 5.0605e-05, 1.4532e-05,\n 3.7315e-05, 3.2166e-05, 2.9248e-05, 2.3110e-05, 2.4368e-05, 1.1931e-05,\n 4.6217e-05, 4.6762e-05, 7.6658e-05, 2.5875e-05, 2.9145e-05, 5.0365e-05,\n 1.5352e-05, 2.0027e-05, 4.8282e-05, 4.5413e-05, 9.8368e-10, 2.3563e-05,\n 1.0647e-05, 2.4692e-05, 3.6042e-05, 5.6803e-05, 7.9077e-09, 1.0627e-07,\n 1.5955e-09, 3.0607e-05, 1.2704e-09, 6.2054e-05, 3.1091e-05, 4.3240e-05,\n 4.3617e-05, 2.3898e-10, 1.4798e-05, 3.6089e-05, 3.0454e-05, 3.6172e-05,\n 3.8344e-05, 4.2634e-10, 3.7936e-05, 3.4436e-05, 2.2107e-05, 2.7341e-05,\n 3.2417e-05, 2.6234e-05, 5.8949e-05, 3.3497e-05, 4.8785e-05, 3.3006e-05,\n 3.6422e-05, 3.8835e-05, 1.8814e-08, 3.6080e-05, 3.3121e-05, 2.7388e-05,\n 1.6154e-08, 2.0383e-05, 2.9089e-05, 8.6575e-09, 2.8277e-05, 4.2450e-05,\n 1.7150e-08, 4.9663e-05, 2.8028e-05, 3.6365e-05, 2.6683e-05, 3.3160e-05,\n 9.1455e-05, 3.4862e-05, 3.2651e-05, 2.6349e-05, 5.2580e-05, 2.0083e-05,\n 3.4769e-05, 2.0374e-09, 3.1397e-09, 3.1540e-05, 5.6469e-05, 2.4352e-05,\n 3.0787e-05, 4.6977e-05, 1.4707e-05, 3.9183e-05, 5.6981e-10, 1.9528e-05,\n 3.4749e-05, 3.8754e-05, 8.1521e-06, 4.1926e-09, 3.2684e-05, 3.4385e-05,\n 5.2406e-05, 2.1556e-05, 2.9397e-05, 1.2175e-09, 1.3949e-08, 3.0815e-05,\n 1.5989e-05, 4.0221e-05, 3.0197e-05, 3.4250e-05, 2.0810e-05, 3.7347e-05,\n 2.1987e-09, 2.9400e-05, 2.4720e-05, 1.5223e-05, 4.8300e-05, 1.8443e-05,\n 6.6025e-05, 1.0844e-10, 2.6224e-05, 3.2083e-05, 2.6022e-05, 2.9667e-05,\n 1.7494e-05, 3.5440e-05, 3.9505e-05, 3.2366e-05, 2.4208e-05, 2.7164e-05,\n 2.3098e-05, 3.9995e-05, 3.4779e-05, 2.2863e-05, 2.0063e-05, 3.4085e-05,\n 2.4757e-05, 2.8124e-05, 3.2091e-05, 3.5307e-05, 4.7019e-05, 5.0956e-05,\n 1.9995e-05, 4.4444e-05, 3.8122e-05, 6.3308e-05, 3.8108e-05, 2.6956e-09,\n 3.6300e-05, 1.7641e-05, 2.7193e-05, 1.9847e-05, 7.6086e-05, 3.4214e-05,\n 2.9563e-05, 3.6952e-05, 3.4829e-05, 2.7645e-05, 4.6009e-05, 2.1137e-05,\n 1.9417e-05, 4.6554e-05, 2.1130e-05, 2.2815e-05, 3.5201e-05, 2.8074e-05,\n 2.6856e-05, 2.9291e-05, 3.4602e-05, 2.5958e-05, 1.1615e-08, 2.5153e-05,\n 2.2546e-05, 1.3961e-05, 3.4100e-05, 4.4989e-05, 3.9747e-05, 3.2497e-05,\n 3.3854e-05, 2.7848e-05, 2.6141e-05, 1.7217e-09, 4.4194e-05, 3.4883e-05,\n 2.4571e-05, 4.7906e-05, 2.5940e-05, 2.2224e-05, 3.9696e-05, 3.0047e-09,\n 3.2895e-05, 8.4901e-09, 2.3303e-05, 1.5102e-05, 2.9813e-05, 2.7634e-05,\n 4.5131e-05, 3.8425e-05, 3.1950e-05, 3.9720e-05, 4.8931e-06, 5.1211e-05,\n 4.3416e-05, 2.2144e-05, 8.6966e-06, 3.3139e-05, 4.2042e-05, 3.5721e-05,\n 2.8013e-05, 6.0523e-05, 6.1655e-05, 3.3952e-05, 3.1232e-05, 6.8186e-05,\n 3.3724e-05, 8.9881e-06, 8.5592e-06, 2.6892e-05, 3.2744e-05, 1.3551e-05,\n 3.6961e-05, 3.5738e-10, 7.0405e-09, 3.4512e-05, 4.8325e-05, 2.1467e-05,\n 3.0108e-05, 4.1228e-05, 4.5396e-05, 2.9115e-05, 3.8819e-05, 2.4588e-05,\n 2.3402e-05, 3.8760e-05, 4.6794e-05, 5.1113e-05, 4.1906e-05, 2.0439e-05,\n 1.3448e-05, 2.9455e-05, 2.9132e-05, 1.0425e-05, 2.7550e-05, 3.6300e-05,\n 9.8941e-06, 2.1586e-05, 3.8315e-05, 5.1747e-05, 8.3993e-13, 3.4223e-05,\n 2.2947e-05, 4.7360e-05, 1.9721e-05, 3.3140e-05, 7.7601e-06, 3.0554e-05,\n 2.5828e-05, 2.7024e-05, 2.7939e-05, 4.3084e-05, 1.2292e-08, 1.0357e-05,\n 3.4917e-05, 3.4287e-05, 2.8434e-05, 3.7022e-05, 2.6428e-05, 2.8519e-05,\n 5.3335e-05, 3.4349e-05, 3.2266e-05, 3.0315e-05, 4.2417e-05, 9.8085e-06,\n 1.6924e-05, 2.0284e-05, 4.2453e-05, 5.2236e-05, 3.7341e-05, 8.1510e-05,\n 1.5209e-05, 4.1039e-05, 4.4343e-05, 3.4439e-05, 3.9659e-05, 2.2527e-05,\n 2.7930e-05, 9.7808e-06, 2.8062e-05, 5.0856e-05, 7.0425e-05, 1.1067e-05,\n 5.3510e-09, 3.5086e-05, 2.3238e-05, 2.9196e-08, 2.6405e-05, 5.8847e-05,\n 3.4002e-05, 2.7950e-05, 3.3519e-05, 3.7437e-05, 2.1100e-05, 2.0714e-05,\n 4.5788e-05, 4.2879e-05, 2.9109e-05, 2.9099e-09, 9.6286e-09, 3.0943e-05,\n 3.5061e-05, 3.2220e-05, 5.3933e-05, 3.0235e-05, 3.3810e-05, 2.0114e-05,\n 1.3045e-05, 2.4553e-05, 3.1477e-05, 4.6090e-05, 2.4217e-05, 3.5459e-09,\n 1.4374e-05, 1.0064e-05, 1.0215e-09, 1.2434e-05, 1.7821e-09, 4.3505e-05,\n 4.1417e-05, 1.5926e-05, 2.9558e-05, 4.8114e-09, 1.7379e-05, 4.2585e-05,\n 2.4789e-05, 3.8084e-05, 4.4486e-05, 4.2984e-05, 3.2733e-05, 4.9512e-06,\n 2.9949e-05, 9.1057e-06, 2.0809e-05, 2.4184e-09, 2.0992e-05, 4.1097e-05,\n 3.7315e-05, 3.0906e-09, 2.0551e-05, 2.9652e-05, 4.5617e-05, 3.5368e-05,\n 3.0223e-05, 2.5539e-05, 5.0838e-05, 1.7679e-05, 3.1876e-05, 4.9207e-05,\n 2.6215e-05, 1.1404e-05, 3.4683e-05, 1.0431e-09, 2.7868e-05, 2.3964e-05,\n 3.4609e-08, 2.3299e-05, 2.7027e-05, 6.1198e-06, 3.1471e-05, 1.1836e-08,\n 1.9667e-05, 3.7060e-06, 2.9034e-05, 4.6722e-08, 3.0893e-05, 5.2606e-05,\n 3.0377e-10, 2.6803e-05, 8.8889e-06, 3.3936e-05, 2.7664e-05, 2.4809e-09,\n 4.2794e-05, 2.0791e-05, 5.4504e-05, 4.1544e-05, 2.4931e-05, 1.6742e-05,\n 2.1599e-05, 2.8099e-05, 1.7244e-08, 3.8987e-05, 5.4751e-09, 9.1970e-06,\n 7.5787e-06, 9.8713e-06, 3.3283e-05, 3.3202e-05, 6.5534e-05, 2.2811e-05,\n 4.0472e-05, 2.4975e-05, 3.8881e-05, 2.6558e-05, 2.2972e-09, 3.0746e-05,\n 3.6462e-05, 6.2818e-05, 6.9899e-06, 1.9340e-05, 2.6103e-05, 2.1280e-05,\n 3.5423e-05, 1.6750e-05, 2.8258e-05, 1.7046e-08, 2.1112e-05, 4.2354e-05,\n 3.1138e-05, 5.7836e-05, 4.2856e-05, 3.0896e-05, 3.7811e-05, 3.3821e-05,\n 3.4407e-05, 7.5980e-05, 2.5047e-05, 3.7260e-05, 1.2794e-08, 1.4121e-08,\n 4.0070e-05, 7.8114e-05, 4.7140e-05, 2.9760e-05, 3.6503e-05, 3.3174e-05,\n 3.0037e-05, 4.4816e-05, 2.9107e-05, 6.8634e-06, 2.7027e-05, 3.5038e-05,\n 2.7443e-05, 2.4536e-05, 1.2112e-06, 2.5357e-05, 3.4235e-05, 2.3028e-05,\n 1.2855e-08, 2.6076e-05, 3.4987e-05, 1.0413e-09, 3.1330e-09, 3.4512e-05,\n 3.6148e-05, 3.9249e-05, 2.1503e-05, 3.6551e-05, 1.8796e-05, 3.1502e-05,\n 2.9118e-05, 1.8537e-05, 1.9520e-05, 3.6658e-05, 3.3174e-05, 2.7364e-05,\n 4.3295e-05, 4.4067e-05, 3.7220e-05, 2.3796e-05, 3.1854e-05, 9.6650e-05,\n 2.6350e-05, 3.1836e-05, 1.7949e-05, 3.0459e-05, 3.4244e-05, 6.2472e-05,\n 8.2721e-06, 2.5630e-05, 4.4750e-05, 2.3149e-05, 3.6748e-05, 2.6096e-05,\n 3.8793e-05, 2.5322e-05, 3.3730e-05, 4.4914e-05, 6.3714e-06, 3.5338e-08,\n 1.8693e-08, 2.3696e-05, 2.8240e-05, 3.4460e-05, 3.2027e-05, 3.5579e-08,\n 2.7633e-05, 3.2844e-05, 1.9751e-05, 3.5034e-05, 2.7730e-05, 2.5388e-05,\n 4.8436e-05, 3.9564e-05, 3.1841e-05, 6.3018e-05, 3.7408e-05, 4.8565e-05,\n 2.6360e-05, 3.9256e-05, 3.5083e-05, 3.4454e-05, 4.5758e-05, 6.0806e-05,\n 4.2985e-05, 2.0496e-05, 2.9045e-05, 2.9166e-05, 3.3929e-05, 2.4145e-05,\n 2.7234e-05, 3.2736e-05, 6.2173e-05, 5.3642e-05, 3.6644e-05, 3.8516e-05,\n 3.4835e-05, 1.5030e-05, 1.7896e-05, 3.3204e-05, 2.9165e-05, 3.3108e-05,\n 1.9299e-05, 1.2427e-08, 3.2354e-08, 7.5531e-06, 3.7234e-05, 3.2571e-05,\n 3.3181e-05, 3.3094e-05, 3.0623e-05, 2.4267e-05, 2.4659e-05, 4.7492e-05,\n 2.5689e-05, 5.9926e-09, 8.0428e-06, 3.9438e-05, 2.8938e-05, 4.7946e-05,\n 3.3418e-05, 2.8880e-05, 8.5901e-09, 2.2376e-05], device='cuda:0')" + }, + "2": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6198e-06, -1.5067e-05, 3.8120e-05, ..., 3.6708e-06,\n -7.9779e-15, 1.5094e-05],\n [ 7.6120e-07, -2.6588e-05, 9.2524e-05, ..., 1.5356e-05,\n -2.8971e-15, -2.8584e-06],\n [ 4.8004e-06, 3.8724e-05, 2.9184e-05, ..., -6.7134e-06,\n -9.3192e-16, -3.1998e-05],\n ...,\n [ 1.3938e-05, -1.9265e-05, -8.7151e-07, ..., -1.8440e-05,\n 9.5435e-15, -3.3535e-05],\n [ 2.1527e-07, 6.5886e-07, 3.0513e-06, ..., -3.3036e-05,\n -3.9366e-15, -2.0113e-05],\n [ 6.0556e-06, 5.2375e-05, 2.7317e-05, ..., -1.1747e-05,\n 3.7151e-15, 2.9616e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0550e-09, 4.0851e-09, 5.4619e-09, ..., 5.2441e-09, 4.7423e-12,\n 4.1123e-09],\n [2.4361e-09, 1.3603e-08, 1.0108e-08, ..., 1.1028e-08, 3.5529e-12,\n 5.9625e-09],\n [1.3159e-09, 8.5150e-09, 1.0370e-08, ..., 7.8096e-09, 7.0392e-12,\n 1.0625e-08],\n ...,\n [2.3517e-09, 8.5889e-09, 1.2553e-08, ..., 9.4604e-09, 7.5548e-12,\n 1.3323e-08],\n [2.1186e-09, 8.7608e-09, 9.9121e-09, ..., 9.1695e-09, 5.5073e-12,\n 3.0985e-08],\n [2.3974e-09, 9.1584e-09, 1.2790e-08, ..., 1.4243e-08, 1.0875e-11,\n 6.2020e-09]], device='cuda:0')" + }, + "3": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 4.5510e-06, -2.9309e-05, 9.1462e-07, ..., 8.2961e-06,\n 1.2030e-05, -8.5270e-07],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-1.4736e-05, -1.5622e-06, 7.3517e-06, ..., -8.8944e-06,\n -4.3507e-06, -9.2521e-06],\n [ 3.7801e-06, -1.0272e-05, -7.0826e-06, ..., -5.0855e-06,\n -8.7793e-06, -9.6691e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1141e-12, 1.4231e-12, 1.1524e-14, ..., 1.4970e-18, 5.3786e-14,\n 4.5932e-14],\n [8.8933e-10, 3.0109e-09, 2.2738e-10, ..., 2.4691e-10, 2.6338e-10,\n 1.8106e-10],\n [9.2400e-14, 1.3934e-13, 5.1435e-14, ..., 8.9105e-15, 1.2641e-14,\n 3.8168e-14],\n ...,\n [3.6251e-09, 5.2148e-09, 7.4398e-10, ..., 9.3777e-10, 1.7260e-09,\n 1.5085e-09],\n [3.9929e-09, 3.1558e-09, 8.3302e-10, ..., 1.3373e-09, 8.3181e-10,\n 8.2235e-10],\n [7.1817e-12, 6.0487e-12, 5.4617e-13, ..., 6.7548e-13, 1.1378e-12,\n 3.7960e-13]], device='cuda:0')" + }, + "4": { + "step": "tensor(7512.)", + "exp_avg": "tensor([ 5.6052e-45, 3.7580e-04, 5.6052e-45, ..., -2.6365e-04,\n -2.7972e-05, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.4824e-10, 4.8990e-07, 4.7533e-11, ..., 1.3344e-06, 1.2133e-06,\n 1.3355e-09], device='cuda:0')" + }, + "5": { + "step": "tensor(7512.)", + "exp_avg": "tensor([[ 5.6052e-45, -7.0514e-07, -5.6052e-45, ..., -3.9847e-06,\n 2.9505e-06, 5.6052e-45],\n [ 5.6052e-45, -2.9139e-06, -5.6052e-45, ..., -1.0544e-06,\n 2.9733e-06, 5.6052e-45],\n [ 5.6052e-45, -2.6041e-06, 5.6052e-45, ..., 4.5052e-06,\n 4.2818e-07, -5.6052e-45],\n ...,\n [-5.6052e-45, 6.3490e-06, 5.6052e-45, ..., -1.0507e-05,\n -3.1109e-06, 5.6052e-45],\n [ 5.6052e-45, 7.7425e-06, 5.6052e-45, ..., -8.9401e-06,\n -6.8942e-06, 5.6052e-45],\n [-5.6052e-45, 2.5644e-06, 5.6052e-45, ..., 9.5593e-06,\n -3.1913e-06, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.8401e-15, 1.9471e-11, 8.1013e-15, ..., 1.2060e-10, 8.4974e-11,\n 1.1169e-12],\n [2.3990e-14, 6.0794e-11, 2.1553e-14, ..., 8.6197e-11, 8.0703e-11,\n 1.6499e-12],\n [1.6022e-14, 3.9397e-11, 1.3287e-13, ..., 2.2250e-10, 1.0603e-10,\n 2.9965e-12],\n ...,\n [3.8903e-15, 2.7162e-11, 3.2012e-13, ..., 2.4657e-10, 1.0317e-10,\n 5.0358e-12],\n [8.0342e-14, 1.9714e-10, 3.0357e-14, ..., 2.9656e-10, 1.8254e-10,\n 1.9363e-12],\n [2.4853e-15, 4.0097e-11, 3.0763e-14, ..., 3.0585e-10, 1.2834e-10,\n 2.5422e-12]], device='cuda:0')" + }, + "15": { + "step": "tensor(15024.)", + "exp_avg": "tensor([5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([4.7637e-11], device='cuda:0')" + }, + "16": { + "step": "tensor(15024.)", + "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.3022e-14, 3.0411e-13, 1.1168e-13], device='cuda:0')" + }, + "17": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([3.7926e-10, 2.6781e-11, 1.5717e-11, 3.0072e-11, 2.3660e-11],\n device='cuda:0')" + }, + "19": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9593e-19, 1.4236e-18, 2.5371e-19, ..., 5.7390e-20, 6.7952e-19,\n 3.6636e-19],\n [1.5962e-16, 1.6036e-16, 1.2272e-19, ..., 2.9519e-17, 5.9032e-18,\n 1.2578e-17],\n [1.6646e-15, 1.9058e-15, 1.8942e-19, ..., 1.5367e-16, 2.0798e-16,\n 6.3127e-17],\n ...,\n [5.0008e-17, 2.2544e-17, 4.3183e-18, ..., 1.4376e-18, 1.3348e-17,\n 1.6421e-18],\n [6.3261e-18, 5.1634e-18, 1.3171e-19, ..., 1.1792e-19, 6.5490e-19,\n 7.5843e-20],\n [5.7169e-15, 6.6950e-15, 1.0207e-18, ..., 5.1628e-16, 7.3375e-16,\n 2.5059e-16]], device='cuda:0')" + }, + "20": { + "step": "tensor(15024.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.2852e-16, 1.4870e-13, 1.0345e-12, 2.6084e-14, 1.7780e-13, 6.9083e-13,\n 7.8153e-15, 1.0620e-12, 8.2967e-14, 1.9280e-15, 4.7902e-12, 1.5264e-14,\n 4.8336e-15, 1.0972e-13, 1.1341e-13, 8.9920e-13, 3.0347e-13, 3.0209e-14,\n 1.5040e-12, 4.5173e-13, 4.5234e-13, 4.3493e-14, 1.5496e-13, 3.1565e-13,\n 1.8825e-13, 3.0590e-14, 1.3971e-14, 2.0875e-12, 1.4028e-15, 4.4135e-14,\n 8.5047e-14, 4.1734e-13, 6.9142e-13, 2.6157e-14, 3.6682e-14, 9.9097e-14,\n 1.9437e-15, 1.2544e-13, 7.5545e-13, 3.9805e-15, 1.6060e-14, 5.1083e-14,\n 1.3700e-12, 2.3431e-13, 1.6197e-14, 1.2688e-15, 5.6227e-15, 4.4955e-14,\n 1.1039e-14, 2.4519e-16, 8.5535e-16, 9.9280e-15, 5.3073e-14, 2.8631e-13,\n 3.7505e-15, 4.2491e-13, 7.4798e-13, 3.3622e-12, 3.8276e-13, 7.1881e-16,\n 5.4848e-12, 1.8573e-16, 1.8386e-14, 2.8402e-13, 8.3258e-13, 1.2221e-12,\n 1.9899e-14, 2.1012e-14, 4.4149e-12, 4.1275e-14, 6.3766e-13, 1.7524e-13,\n 3.2749e-13, 5.7018e-14, 8.6160e-16, 8.2086e-14, 5.9409e-13, 9.6703e-15,\n 8.3201e-18, 3.3436e-16, 8.7657e-16, 1.3055e-14, 1.3779e-12, 5.8029e-13,\n 8.9784e-14, 2.1658e-14, 4.6953e-13, 8.0488e-14, 1.2439e-13, 3.1072e-12,\n 1.7847e-14, 1.1631e-13, 2.1437e-14, 8.8924e-13, 1.3960e-13, 2.5248e-14,\n 1.1740e-16, 3.4896e-14, 1.1273e-12, 2.2392e-13, 6.1005e-16, 8.2787e-13,\n 6.6716e-15, 4.1575e-16, 3.5237e-13, 6.2242e-14, 1.1435e-15, 1.5037e-14,\n 4.6763e-14, 6.1265e-14, 5.8608e-16, 1.2578e-14, 4.9608e-15, 1.4142e-15,\n 7.1572e-14, 3.0275e-14, 8.8532e-13, 2.4495e-14, 5.3540e-13, 2.3444e-13,\n 1.1281e-14, 2.0124e-13, 3.0425e-13, 2.3596e-14, 3.9966e-13, 4.5547e-14,\n 1.7711e-12, 3.0188e-14, 6.9894e-14, 4.8265e-15, 2.0628e-15, 3.2342e-14,\n 1.1373e-14, 1.5020e-13, 2.9737e-14, 4.9428e-13, 1.7884e-15, 1.1466e-14,\n 3.1232e-13, 4.4179e-14, 9.3764e-16, 2.5637e-15, 4.6150e-12, 1.1760e-11,\n 2.8617e-13, 1.2531e-15, 3.7629e-13, 3.2005e-13, 2.4836e-13, 6.6105e-15,\n 1.2444e-15, 2.2342e-13, 8.1537e-13, 2.3776e-13, 4.0200e-13, 1.2056e-13,\n 2.3142e-15, 1.1580e-14, 1.7957e-14, 1.2316e-12, 6.2970e-15, 1.8455e-13,\n 2.3144e-13, 5.8773e-15, 5.9625e-15, 8.4088e-12, 7.7319e-13, 1.8959e-15,\n 2.3901e-12, 1.7376e-16, 2.2403e-13, 1.2074e-14, 2.9702e-13, 6.4112e-14,\n 1.1914e-13, 1.6493e-12, 2.7837e-13, 1.3062e-13, 2.8430e-14, 7.4335e-13,\n 1.1442e-13, 2.7462e-13, 6.9450e-12, 8.8684e-14, 4.4682e-15, 6.9726e-17,\n 1.1557e-14, 1.0675e-12, 5.5199e-14, 2.3333e-13, 1.1707e-12, 4.4623e-13,\n 9.3514e-15, 5.3146e-15, 8.5885e-13, 1.4939e-13, 1.5432e-15, 3.3443e-13,\n 2.4661e-15, 2.8599e-13, 6.3202e-13, 3.5459e-14, 1.5618e-13, 4.4135e-15,\n 7.3416e-14, 2.2626e-12, 3.5088e-14, 2.5662e-13, 7.9283e-16, 1.9430e-12,\n 3.6828e-13, 2.2649e-15, 1.7730e-14, 1.4583e-15, 1.3659e-12, 1.3727e-14,\n 9.7334e-14, 5.7784e-13, 2.9989e-14, 2.5577e-13, 2.0574e-13, 1.5920e-13,\n 9.2272e-15, 5.2066e-13, 3.9374e-14, 1.8241e-13, 2.1233e-12, 1.9069e-14,\n 8.2183e-16, 6.6821e-15, 8.0622e-14, 1.1741e-13, 4.5968e-15, 1.5268e-13,\n 6.3773e-14, 7.3264e-13, 8.8431e-14, 5.1026e-13, 5.4036e-14, 8.9923e-15,\n 3.2418e-14, 5.4481e-15, 1.4752e-12, 9.7676e-14, 1.3632e-14, 2.9044e-13,\n 1.0573e-14, 3.0510e-14, 9.5962e-13, 2.6209e-14, 3.3157e-13, 1.8924e-15,\n 2.1250e-13, 1.4462e-14, 1.8296e-15, 3.5154e-12], device='cuda:0')" + }, + "21": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.7183e-17, 1.7029e-17, 2.3683e-15, 1.4782e-17, 7.2735e-17, 8.3228e-17,\n 3.9451e-17, 2.7604e-16, 1.7755e-18, 9.5239e-18, 2.9572e-15, 5.8524e-18,\n 8.6232e-19, 4.0574e-17, 2.2219e-16, 7.6175e-16, 3.2770e-16, 2.3491e-18,\n 5.5844e-15, 2.8015e-15, 2.5245e-15, 3.3190e-18, 5.3456e-18, 1.9342e-16,\n 1.5970e-15, 2.2805e-17, 3.6399e-17, 2.0136e-15, 6.6121e-19, 5.0417e-17,\n 1.7127e-17, 3.2819e-16, 2.4038e-15, 6.4426e-18, 3.0315e-17, 7.4479e-16,\n 1.0674e-17, 6.0244e-17, 1.4783e-15, 9.0574e-20, 7.2153e-18, 3.1298e-18,\n 1.5223e-15, 8.4081e-17, 1.2973e-17, 1.0607e-18, 7.5983e-18, 6.2270e-18,\n 3.6324e-18, 1.1132e-17, 1.1259e-18, 1.2354e-17, 8.4135e-17, 6.5227e-17,\n 5.5974e-20, 7.6051e-17, 4.2382e-17, 2.5628e-15, 1.8813e-16, 2.0717e-18,\n 1.1963e-14, 7.6322e-20, 5.3417e-19, 3.5094e-17, 1.6268e-15, 9.9242e-16,\n 1.3312e-17, 2.1474e-17, 1.5581e-14, 5.8302e-18, 1.1613e-15, 3.6724e-18,\n 4.5309e-17, 3.2241e-16, 3.5417e-18, 2.7060e-17, 6.0512e-16, 4.7754e-17,\n 1.0214e-17, 9.0701e-20, 4.7211e-18, 3.0677e-17, 3.4211e-16, 1.7022e-15,\n 8.4345e-17, 1.1763e-16, 6.3430e-17, 4.9008e-18, 3.0639e-17, 8.4722e-15,\n 4.1328e-17, 3.6535e-17, 1.0518e-17, 1.0752e-15, 1.9484e-17, 7.0543e-17,\n 2.1430e-19, 9.7204e-17, 1.9602e-15, 3.9637e-16, 1.4105e-18, 2.2016e-15,\n 1.0358e-18, 3.0380e-18, 9.9415e-17, 1.0879e-16, 4.2913e-18, 1.5014e-17,\n 1.0434e-17, 8.8674e-18, 1.4561e-19, 1.6204e-17, 2.7067e-17, 3.8412e-18,\n 1.7512e-17, 1.9848e-18, 2.3405e-15, 5.7590e-17, 7.3689e-16, 3.3183e-17,\n 2.9613e-17, 1.9836e-16, 2.5042e-16, 1.6615e-17, 8.1596e-16, 3.6500e-19,\n 1.8369e-15, 4.4714e-19, 2.1080e-17, 1.6911e-18, 1.5832e-18, 1.7706e-18,\n 2.4198e-18, 5.9693e-17, 6.2149e-18, 1.2754e-15, 2.9137e-18, 1.5116e-18,\n 1.8963e-16, 6.6576e-18, 2.8171e-18, 1.1681e-17, 2.0396e-14, 2.3151e-14,\n 4.4373e-17, 5.2834e-20, 4.0643e-16, 3.4873e-16, 5.8615e-16, 3.0040e-19,\n 2.3129e-18, 4.4909e-16, 8.6061e-16, 2.4447e-17, 7.5567e-17, 2.2235e-16,\n 1.7480e-20, 3.7707e-17, 8.3780e-18, 8.4851e-17, 1.1338e-18, 3.0248e-16,\n 1.1505e-17, 3.4806e-17, 1.1042e-17, 1.9670e-14, 4.5333e-16, 1.1832e-18,\n 3.1328e-15, 2.2478e-20, 2.4645e-17, 1.8756e-17, 2.0028e-15, 8.5454e-17,\n 5.2559e-17, 9.6685e-16, 8.9453e-17, 6.3869e-18, 4.1098e-17, 8.4217e-16,\n 2.1952e-18, 7.9697e-17, 1.8184e-14, 3.0136e-17, 1.6340e-18, 1.3775e-18,\n 5.2388e-19, 8.5628e-16, 1.2532e-16, 2.3264e-16, 7.3040e-16, 5.9040e-17,\n 3.6388e-18, 4.4981e-19, 5.0370e-16, 2.9848e-18, 2.4479e-19, 1.9687e-17,\n 3.5785e-18, 9.2642e-17, 1.6817e-16, 3.0776e-18, 1.6336e-17, 3.8349e-18,\n 4.9344e-18, 5.3583e-15, 3.7030e-17, 2.2443e-17, 1.2471e-17, 3.1365e-15,\n 1.1639e-16, 1.5947e-18, 1.3356e-18, 2.0717e-17, 1.3356e-16, 3.9489e-19,\n 3.4349e-18, 9.6538e-16, 1.4035e-16, 4.0865e-17, 3.7787e-16, 7.5028e-16,\n 1.1907e-18, 4.2052e-16, 2.1215e-17, 6.0979e-17, 4.2810e-16, 7.5713e-18,\n 2.4321e-19, 6.5392e-19, 9.6426e-18, 2.0801e-16, 6.8229e-19, 3.0094e-16,\n 7.3208e-18, 8.5659e-16, 8.9358e-16, 3.3798e-16, 5.4092e-17, 1.9272e-19,\n 5.0593e-18, 1.0004e-18, 5.4559e-16, 2.2031e-17, 5.1468e-18, 1.4078e-16,\n 1.2290e-18, 1.0008e-18, 5.0629e-15, 2.3820e-17, 3.5157e-17, 3.0961e-18,\n 1.5292e-16, 2.4838e-17, 3.2895e-18, 9.8331e-15], device='cuda:0')" + }, + "22": { + "step": "tensor(15024.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.7728e-19, 2.0855e-16, 1.6527e-15, 2.3198e-17, 2.5458e-16, 9.5596e-16,\n 1.1329e-17, 1.4186e-15, 1.2259e-16, 1.6428e-17, 5.7931e-15, 8.1074e-17,\n 2.0834e-17, 1.9072e-16, 2.3440e-16, 9.2636e-16, 4.7735e-16, 5.4128e-17,\n 1.5953e-15, 8.5365e-16, 7.5143e-16, 7.2990e-17, 1.6061e-16, 6.2229e-16,\n 5.9881e-16, 4.3333e-17, 1.2401e-17, 2.3665e-15, 2.6417e-19, 8.4665e-17,\n 3.6699e-17, 5.7575e-16, 1.1063e-15, 8.0002e-17, 6.4466e-17, 4.3123e-16,\n 5.6032e-18, 8.2750e-17, 1.1808e-15, 7.5655e-19, 6.3387e-17, 7.9912e-17,\n 2.0796e-15, 3.5145e-16, 8.9731e-17, 8.3548e-18, 2.1718e-18, 9.0777e-18,\n 2.1393e-17, 8.9560e-19, 5.1636e-19, 1.7220e-17, 1.0745e-16, 2.6012e-16,\n 1.6127e-19, 4.6125e-16, 1.0143e-15, 4.5420e-15, 3.9352e-16, 9.4888e-18,\n 7.3538e-15, 2.6096e-18, 1.0124e-17, 3.6286e-16, 1.2170e-15, 1.7171e-15,\n 4.0395e-17, 6.1529e-17, 6.0137e-15, 7.2757e-17, 9.1356e-16, 1.8141e-16,\n 4.1682e-16, 2.9556e-16, 2.0616e-18, 5.7230e-17, 8.7038e-16, 3.4320e-17,\n 2.2275e-19, 2.1937e-18, 3.1793e-17, 1.0844e-17, 1.8742e-15, 1.0549e-15,\n 1.5965e-16, 1.5406e-16, 5.1049e-16, 1.2458e-16, 2.0238e-16, 3.6837e-15,\n 1.1959e-16, 1.8653e-16, 2.4088e-18, 1.2564e-15, 2.0456e-16, 2.6562e-17,\n 9.6374e-18, 3.4975e-17, 1.6323e-15, 4.3542e-16, 8.0735e-18, 1.2199e-15,\n 1.4215e-17, 5.1709e-18, 3.7381e-16, 2.5528e-16, 2.6470e-18, 9.8451e-17,\n 7.3562e-17, 1.0000e-16, 3.3741e-18, 4.4554e-18, 4.8831e-17, 1.3771e-18,\n 9.8981e-17, 5.2518e-17, 1.3476e-15, 1.7236e-17, 7.7559e-16, 3.4016e-16,\n 9.4283e-18, 2.6004e-16, 4.6997e-16, 5.3881e-17, 7.3821e-16, 3.1776e-18,\n 1.9997e-15, 5.3448e-18, 1.1553e-16, 7.7414e-19, 4.6093e-18, 4.3655e-18,\n 3.7509e-18, 1.8986e-16, 4.4937e-17, 8.2942e-16, 1.4378e-17, 6.7252e-18,\n 4.6459e-16, 6.5560e-17, 2.4833e-18, 2.9516e-19, 5.7446e-15, 1.4817e-14,\n 2.7331e-16, 1.9758e-18, 6.1831e-16, 4.9458e-16, 4.0881e-16, 1.3334e-17,\n 1.2321e-17, 5.2495e-16, 8.5276e-16, 3.4685e-16, 5.7096e-16, 1.8737e-16,\n 7.7251e-21, 8.8900e-18, 3.1603e-17, 1.4904e-15, 4.2984e-19, 2.9876e-16,\n 3.3113e-16, 9.9370e-18, 5.0518e-18, 1.1128e-14, 1.1064e-15, 4.7073e-18,\n 2.8514e-15, 6.7464e-19, 2.2863e-16, 6.6491e-18, 6.6718e-16, 1.2866e-16,\n 2.0193e-16, 1.9461e-15, 2.3144e-16, 1.3290e-16, 1.0736e-17, 1.0709e-15,\n 1.2925e-16, 2.8864e-16, 9.2754e-15, 6.9884e-17, 1.2255e-17, 4.1283e-19,\n 2.2563e-17, 1.1512e-15, 2.3739e-16, 3.7270e-16, 1.6177e-15, 5.0556e-16,\n 1.6308e-17, 1.0779e-17, 9.0519e-16, 2.1001e-16, 2.1052e-19, 4.7185e-16,\n 1.5780e-17, 2.4811e-16, 6.5940e-16, 6.2104e-17, 1.5161e-16, 4.0782e-17,\n 7.1902e-17, 3.1521e-15, 3.0502e-17, 3.7030e-16, 1.4423e-18, 2.6540e-15,\n 3.8437e-16, 7.6943e-21, 1.4520e-18, 3.8028e-18, 1.8413e-15, 5.9694e-18,\n 1.4642e-16, 8.5087e-16, 4.4506e-17, 3.7820e-16, 4.1521e-16, 3.4318e-16,\n 1.9923e-17, 6.0163e-16, 1.1679e-16, 2.1048e-16, 2.8367e-15, 6.3539e-17,\n 2.4410e-18, 2.8859e-18, 1.0495e-16, 2.0768e-16, 7.6818e-19, 3.6231e-16,\n 2.4476e-17, 1.1287e-15, 4.0963e-16, 7.3942e-16, 2.1161e-16, 6.2402e-20,\n 4.9130e-17, 1.2506e-17, 1.7152e-15, 1.8476e-16, 5.5095e-17, 4.3705e-16,\n 1.8157e-17, 5.1497e-17, 1.6171e-15, 5.4078e-17, 4.7310e-16, 1.0912e-18,\n 1.8957e-16, 1.9677e-17, 1.1018e-18, 4.9685e-15], device='cuda:0')" + }, + "23": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[3.6597e-15, 3.8809e-15, 2.7928e-18, ..., 2.7360e-16, 4.9148e-16,\n 9.1046e-17],\n [2.8743e-15, 3.2533e-15, 2.0365e-20, ..., 1.8131e-16, 3.7957e-16,\n 7.8973e-17],\n [8.5812e-16, 1.0088e-15, 2.1220e-18, ..., 7.7261e-17, 9.5102e-17,\n 3.0261e-17],\n ...,\n [8.4557e-17, 5.3081e-17, 4.1269e-19, ..., 5.2408e-18, 1.4605e-17,\n 2.2112e-18],\n [2.8957e-16, 3.0803e-16, 3.9837e-19, ..., 2.9625e-17, 3.6062e-17,\n 1.4313e-17],\n [2.6047e-16, 3.7875e-16, 3.3349e-19, ..., 3.5783e-17, 3.3869e-17,\n 1.9950e-17]], device='cuda:0')" + }, + "24": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.9492e-12, 1.5273e-12, 5.0091e-13, 3.2029e-13, 3.0459e-14, 2.0195e-12,\n 1.7057e-14, 1.8088e-12, 1.7723e-12, 1.6160e-15, 2.2023e-12, 1.5029e-13,\n 2.0045e-13, 1.4088e-14, 2.1873e-15, 1.2821e-14, 3.1055e-17, 2.2546e-14,\n 4.4215e-13, 5.2443e-13, 1.1868e-13, 2.1848e-13, 2.6248e-13, 2.7287e-15,\n 1.5724e-13, 1.6645e-13, 6.3730e-14, 4.1270e-13, 5.3523e-14, 2.2555e-15,\n 2.6922e-14, 1.5211e-13, 2.4411e-13, 3.1120e-13, 1.8832e-16, 1.2202e-16,\n 1.6371e-13, 2.2863e-15, 1.7053e-13, 1.3221e-13, 3.5394e-16, 8.3121e-13,\n 2.1162e-12, 5.5265e-16, 5.6588e-14, 1.9133e-15, 2.7180e-13, 1.6056e-15,\n 5.9521e-14, 5.5003e-13, 2.9631e-15, 4.3757e-14, 8.9727e-16, 2.5931e-13,\n 1.4716e-14, 6.0909e-15, 3.9894e-12, 3.4813e-12, 1.2075e-14, 4.4043e-16,\n 5.2040e-12, 3.3759e-14, 4.0289e-13, 1.4072e-12, 1.1195e-13, 2.4212e-12,\n 5.6678e-15, 2.1766e-14, 2.5625e-13, 4.2213e-14, 5.0970e-13, 9.5387e-14,\n 8.5544e-14, 3.2676e-14, 1.9283e-14, 5.4473e-14, 2.6574e-13, 1.2280e-16,\n 1.6824e-13, 5.0231e-14, 2.4999e-13, 5.7575e-13, 2.1584e-14, 9.7974e-16,\n 2.9084e-15, 1.5019e-13, 6.0472e-13, 3.4518e-15, 9.2287e-13, 5.6534e-14,\n 2.0644e-14, 1.5181e-15, 2.7439e-14, 5.1781e-13, 1.1420e-12, 6.0607e-13,\n 3.9361e-15, 2.9574e-15, 4.9807e-13, 3.8876e-14, 2.5547e-13, 2.9164e-15,\n 6.4819e-14, 6.2879e-16, 3.5148e-14, 1.2062e-15, 2.3589e-13, 7.5842e-17,\n 2.7739e-13, 7.1528e-14, 1.3263e-14, 1.2868e-14, 1.2326e-13, 1.3299e-15,\n 3.0648e-13, 2.8734e-13, 2.1683e-14, 2.8635e-12, 9.8166e-17, 1.0743e-12,\n 6.5522e-14, 9.7585e-15, 1.7385e-13, 1.5452e-15, 3.1643e-15, 2.9901e-15,\n 1.8529e-12, 1.1151e-13, 5.3895e-16, 8.4596e-15, 3.7296e-14, 5.9388e-14,\n 1.4165e-13, 2.1808e-13, 7.0508e-15, 1.7563e-13, 3.9206e-13, 3.5369e-14,\n 8.5045e-16, 1.8631e-14, 1.6314e-13, 3.8993e-13, 4.9632e-13, 9.7987e-12,\n 6.8808e-15, 1.8067e-16, 2.8487e-13, 3.7455e-14, 1.9843e-15, 2.5982e-14,\n 6.3712e-14, 1.6882e-14, 2.8983e-14, 1.8397e-13, 1.4866e-14, 1.4571e-14,\n 6.9745e-14, 1.5882e-12, 1.3638e-14, 5.2193e-12, 8.0778e-14, 8.3223e-15,\n 3.6149e-16, 1.1933e-12, 2.9919e-13, 5.9578e-13, 5.9960e-15, 1.1346e-13,\n 2.0822e-12, 4.7343e-15, 1.0917e-12, 2.7480e-13, 1.6164e-13, 2.8665e-14,\n 2.3504e-13, 1.6517e-12, 5.9263e-13, 2.7113e-13, 4.8782e-14, 7.3466e-13,\n 1.8308e-12, 1.7780e-13, 2.0673e-12, 2.0387e-16, 2.3351e-13, 1.2592e-16,\n 3.1422e-13, 1.5782e-14, 4.9943e-14, 5.5174e-15, 1.2473e-12, 8.0861e-16,\n 1.9918e-13, 4.5153e-15, 7.4751e-14, 9.3311e-14, 6.0406e-14, 1.8473e-12,\n 1.0092e-13, 4.5688e-15, 1.7467e-13, 7.9224e-15, 1.0067e-14, 3.8017e-14,\n 7.1362e-14, 3.4301e-13, 3.3171e-14, 3.1430e-14, 2.4627e-14, 4.6043e-15,\n 1.1524e-14, 3.5732e-13, 6.3795e-16, 1.2896e-12, 1.1116e-13, 5.9827e-14,\n 5.6228e-13, 1.2401e-13, 1.2088e-12, 3.4414e-13, 1.2318e-15, 4.8153e-13,\n 1.2605e-14, 6.0890e-16, 5.3977e-14, 1.5592e-14, 2.5675e-13, 1.0029e-12,\n 4.0472e-16, 8.8393e-15, 1.4255e-13, 9.2787e-14, 1.5511e-15, 1.2009e-14,\n 2.9019e-14, 5.8078e-13, 6.6735e-14, 4.3392e-14, 1.5665e-13, 4.0436e-14,\n 8.5674e-13, 7.8168e-14, 9.3158e-13, 3.2551e-13, 3.3073e-14, 1.5009e-14,\n 3.2653e-14, 3.2784e-13, 5.3492e-13, 6.9316e-15, 5.6415e-13, 5.7076e-15,\n 4.4492e-13, 3.4134e-14, 1.9078e-13, 2.3929e-13], device='cuda:0')" + }, + "25": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.8311e-15, 1.5655e-15, 3.1165e-15, 5.3541e-18, 4.3697e-17, 5.0226e-15,\n 5.0450e-17, 1.9172e-15, 8.2854e-16, 2.7957e-18, 6.5539e-16, 2.1551e-15,\n 2.4598e-16, 9.4006e-19, 2.1985e-18, 2.0380e-17, 6.2390e-18, 1.5896e-18,\n 1.5196e-16, 1.6366e-15, 9.7039e-16, 2.5693e-17, 2.3168e-17, 3.1766e-17,\n 1.5435e-15, 5.5794e-17, 2.9940e-17, 5.4797e-17, 4.3237e-17, 3.5064e-18,\n 8.6389e-19, 1.6629e-16, 3.1499e-17, 4.5374e-16, 9.9050e-19, 1.5213e-18,\n 6.4206e-17, 1.4210e-20, 9.7968e-17, 8.2578e-17, 1.1651e-18, 3.9343e-15,\n 1.1171e-14, 4.4094e-18, 4.6006e-16, 3.0801e-19, 1.1694e-16, 5.9653e-20,\n 1.5677e-17, 1.3213e-16, 5.1736e-19, 4.1215e-18, 1.1358e-19, 2.7258e-16,\n 1.0840e-17, 3.6787e-18, 1.9216e-15, 4.3481e-15, 5.3077e-18, 1.0746e-18,\n 7.1892e-15, 7.6440e-18, 1.2168e-15, 6.8445e-16, 4.0249e-17, 3.5943e-15,\n 4.7151e-18, 2.8836e-17, 3.7681e-18, 1.2188e-17, 1.4925e-16, 1.0257e-17,\n 1.3502e-17, 3.1225e-17, 2.0570e-17, 6.7901e-18, 9.5948e-17, 3.0293e-18,\n 4.6492e-17, 8.7111e-18, 6.8311e-16, 9.2080e-17, 6.0364e-17, 1.5881e-17,\n 2.8765e-18, 1.9414e-15, 2.8944e-16, 1.8113e-18, 1.2626e-15, 6.9821e-18,\n 3.9976e-18, 1.6317e-18, 7.5327e-19, 2.5832e-16, 3.3019e-15, 1.5304e-16,\n 3.4097e-18, 2.4528e-18, 2.8188e-16, 2.5002e-17, 4.1315e-16, 3.6464e-18,\n 2.8607e-17, 3.0635e-18, 1.4061e-18, 2.8731e-19, 2.1495e-16, 8.1154e-19,\n 7.5388e-17, 8.0430e-18, 8.0074e-19, 1.6576e-16, 3.3240e-16, 1.2292e-18,\n 2.1824e-16, 8.7068e-16, 2.1489e-18, 6.6243e-15, 1.0697e-19, 1.6829e-15,\n 2.5186e-18, 1.0434e-18, 7.6844e-17, 2.5374e-20, 2.9362e-18, 1.3804e-19,\n 2.3343e-15, 3.2361e-17, 1.9887e-21, 1.0384e-18, 1.7506e-18, 4.9039e-18,\n 6.5116e-17, 3.2938e-16, 3.3142e-18, 4.0529e-17, 1.4457e-15, 6.5945e-18,\n 9.1846e-19, 5.7067e-17, 1.5336e-17, 8.2408e-17, 8.0231e-17, 2.3878e-14,\n 1.3619e-17, 2.4588e-20, 1.8655e-16, 4.9289e-18, 6.8562e-19, 1.8448e-17,\n 1.4180e-16, 2.1320e-18, 2.5911e-19, 7.1147e-17, 3.3348e-18, 4.3799e-17,\n 1.4036e-17, 1.4523e-15, 1.9768e-18, 4.9598e-15, 1.9262e-17, 3.0620e-19,\n 7.2789e-18, 3.5601e-15, 6.1476e-16, 8.5942e-18, 1.3739e-17, 1.6064e-17,\n 5.9389e-15, 1.0654e-19, 1.7025e-15, 2.7999e-17, 1.5045e-16, 8.8731e-18,\n 1.0941e-16, 1.6540e-15, 2.5505e-16, 6.4362e-17, 1.0992e-17, 5.3656e-16,\n 1.2832e-15, 7.2209e-18, 7.1038e-16, 3.8886e-19, 1.4141e-16, 2.3354e-18,\n 2.3362e-16, 2.0581e-17, 6.5972e-17, 9.7664e-18, 3.5856e-16, 6.6542e-18,\n 1.8797e-17, 1.9279e-18, 4.0897e-18, 9.6629e-18, 3.4414e-17, 5.3064e-15,\n 1.6477e-17, 4.4193e-18, 2.8653e-17, 4.5128e-19, 1.9488e-18, 9.4957e-17,\n 1.7504e-16, 7.3422e-17, 2.7989e-17, 1.4028e-18, 4.9309e-18, 1.0573e-17,\n 9.3064e-18, 4.3041e-16, 7.8412e-19, 4.3535e-15, 5.0394e-18, 5.6886e-18,\n 1.8370e-16, 1.6372e-17, 2.3855e-16, 3.0700e-17, 8.9042e-18, 1.0233e-15,\n 8.6754e-18, 1.5841e-18, 1.7847e-16, 6.7932e-19, 4.7725e-18, 1.8221e-15,\n 2.2780e-19, 6.9371e-19, 1.7238e-17, 7.4576e-17, 1.5979e-19, 3.3618e-18,\n 8.7754e-19, 1.9385e-15, 3.7403e-17, 1.3392e-18, 1.9525e-15, 3.2533e-18,\n 1.2504e-15, 2.2885e-17, 4.6180e-16, 4.5936e-16, 1.3970e-17, 5.9419e-19,\n 3.2494e-19, 5.0461e-16, 5.9599e-16, 5.7377e-17, 1.5127e-16, 2.8066e-18,\n 7.6165e-16, 9.5333e-17, 2.2037e-17, 9.9377e-17], device='cuda:0')" + }, + "26": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.4998e-15, 1.9059e-15, 1.0664e-15, 3.5294e-16, 8.5791e-17, 2.6048e-15,\n 1.6581e-17, 2.4687e-15, 2.2219e-15, 8.1525e-19, 2.7567e-15, 6.4112e-16,\n 3.7078e-16, 1.9837e-17, 8.8381e-19, 7.6047e-18, 2.8145e-19, 3.0912e-17,\n 5.3294e-16, 1.0823e-15, 4.2432e-16, 2.7831e-16, 2.9592e-16, 1.0986e-17,\n 6.1457e-16, 2.0289e-16, 1.6041e-16, 4.5559e-16, 6.7176e-17, 1.2604e-18,\n 7.9511e-18, 3.2540e-16, 3.1123e-16, 7.1221e-16, 1.0710e-19, 1.0330e-18,\n 2.1671e-16, 5.4484e-19, 2.2538e-16, 1.5738e-16, 6.1451e-18, 1.1846e-15,\n 3.2102e-15, 1.5804e-18, 3.1863e-16, 3.0757e-18, 2.3504e-16, 2.3941e-18,\n 7.3777e-17, 6.9220e-16, 4.9564e-18, 5.8983e-17, 1.3547e-18, 2.0348e-16,\n 2.0050e-17, 1.4213e-18, 5.0161e-15, 4.3999e-15, 1.7677e-18, 7.7926e-18,\n 6.4780e-15, 5.0072e-17, 4.9996e-16, 1.8180e-15, 1.3964e-16, 3.1031e-15,\n 1.8418e-18, 3.6909e-17, 3.2536e-16, 5.7056e-17, 6.3861e-16, 9.5571e-17,\n 1.3808e-16, 1.2382e-16, 2.5473e-17, 1.6958e-17, 3.3664e-16, 1.4574e-17,\n 2.6675e-16, 6.6199e-17, 3.6292e-16, 7.2128e-16, 2.0738e-17, 5.6123e-18,\n 2.0200e-17, 4.4352e-16, 7.5991e-16, 4.2122e-18, 1.2467e-15, 8.3487e-17,\n 3.0019e-17, 1.3318e-17, 1.3330e-17, 6.3884e-16, 1.4172e-15, 8.2414e-16,\n 3.6737e-17, 3.1624e-19, 7.6240e-16, 1.3265e-16, 3.9913e-16, 6.6274e-19,\n 1.0968e-16, 3.8654e-18, 1.9203e-17, 1.7482e-17, 3.0784e-16, 3.7091e-20,\n 4.1212e-16, 9.2444e-17, 1.8369e-17, 1.4777e-16, 2.3213e-16, 1.0351e-18,\n 4.2293e-16, 4.1649e-16, 3.0973e-17, 3.8312e-15, 5.1727e-19, 1.2993e-15,\n 5.0101e-17, 1.1267e-17, 2.1584e-16, 1.0974e-18, 2.8456e-17, 2.4072e-20,\n 2.2379e-15, 9.3115e-17, 1.4121e-20, 3.8618e-18, 5.0761e-17, 3.0514e-17,\n 1.1211e-16, 3.8298e-16, 4.8841e-18, 2.2763e-16, 7.2971e-16, 4.1386e-17,\n 8.9699e-19, 2.2269e-17, 2.1105e-16, 4.0350e-16, 7.5981e-16, 1.2724e-14,\n 2.5919e-18, 2.3648e-19, 4.0086e-16, 5.0007e-17, 8.1410e-20, 2.1990e-17,\n 2.5786e-16, 2.6193e-17, 1.9798e-17, 2.3238e-16, 1.9715e-17, 8.5410e-17,\n 1.7533e-17, 1.9704e-15, 1.5794e-17, 6.5674e-15, 3.4179e-17, 1.2330e-17,\n 1.1695e-18, 1.7142e-15, 3.5009e-16, 7.5824e-16, 8.8858e-18, 1.4495e-16,\n 2.6279e-15, 6.8638e-18, 1.3203e-15, 3.1011e-16, 3.8402e-16, 3.6644e-17,\n 3.2700e-16, 2.1151e-15, 6.5215e-16, 3.6477e-16, 3.9405e-17, 9.1274e-16,\n 2.2756e-15, 1.9507e-16, 2.6034e-15, 1.1593e-17, 2.9154e-16, 8.9385e-19,\n 4.1444e-16, 7.1656e-18, 1.6083e-16, 3.9409e-18, 1.5663e-15, 7.0355e-19,\n 2.5305e-16, 5.8775e-18, 5.6115e-17, 1.2187e-16, 4.9031e-17, 2.3170e-15,\n 1.3183e-16, 1.2066e-18, 1.4701e-16, 1.1665e-17, 5.7872e-18, 1.6128e-16,\n 5.8863e-17, 4.3284e-16, 4.9418e-17, 4.1660e-17, 5.7515e-17, 2.2381e-17,\n 3.4338e-18, 4.1088e-16, 9.4044e-19, 1.8794e-15, 1.4360e-16, 5.4685e-17,\n 7.1652e-16, 1.5637e-16, 1.5180e-15, 4.4217e-16, 3.3308e-18, 8.3253e-16,\n 7.4315e-18, 6.0611e-19, 2.3358e-16, 1.4793e-17, 3.3033e-16, 1.4483e-15,\n 1.2194e-19, 1.9221e-18, 2.1433e-16, 1.1477e-16, 2.6905e-19, 5.7903e-17,\n 6.6376e-18, 1.0843e-15, 1.2672e-16, 5.7111e-17, 6.1184e-16, 1.0882e-17,\n 1.0732e-15, 1.0749e-16, 1.0884e-15, 5.9718e-16, 9.7446e-17, 2.0657e-17,\n 4.4281e-17, 4.1560e-16, 7.2387e-16, 9.3609e-17, 7.2314e-16, 9.0727e-19,\n 5.2030e-16, 3.0986e-17, 2.4097e-16, 5.3366e-16], device='cuda:0')" + }, + "27": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.7287e-17, 1.0874e-19, 2.1969e-18, ..., 1.6966e-19, 6.1186e-19,\n 3.0017e-20],\n [3.7614e-15, 3.7221e-15, 2.5889e-18, ..., 2.6236e-16, 5.0416e-16,\n 1.0550e-16],\n [3.0143e-16, 3.5346e-16, 1.4374e-19, ..., 2.7881e-17, 4.1356e-17,\n 1.7364e-17],\n ...,\n [3.1177e-15, 3.3216e-15, 2.2773e-18, ..., 1.8137e-16, 4.2989e-16,\n 7.4442e-17],\n [4.3335e-17, 5.9721e-17, 8.5525e-19, ..., 4.8615e-18, 3.8050e-18,\n 1.2151e-18],\n [5.4863e-15, 5.6843e-15, 1.5604e-18, ..., 4.2262e-16, 7.0299e-16,\n 1.7015e-16]], device='cuda:0')" + }, + "28": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.2931e-16, 2.0405e-12, 2.1057e-13, 4.3425e-13, 8.3055e-14, 6.8889e-15,\n 2.1245e-13, 1.0523e-12, 6.1506e-13, 7.6527e-15, 2.4301e-14, 1.4604e-14,\n 3.2910e-14, 3.1632e-14, 6.2628e-15, 3.0811e-14, 1.0928e-15, 1.1743e-13,\n 9.2971e-14, 7.5068e-13, 1.2967e-14, 3.3104e-13, 2.0522e-12, 3.8217e-13,\n 5.5597e-13, 3.9516e-15, 1.1819e-14, 2.2083e-12, 4.9943e-17, 3.6112e-15,\n 1.1601e-14, 4.2681e-13, 2.4462e-13, 1.7532e-13, 3.7984e-14, 1.0126e-13,\n 6.9205e-14, 2.4983e-14, 1.6789e-13, 3.4281e-14, 3.1470e-14, 1.5960e-13,\n 1.4656e-12, 1.3956e-13, 2.4903e-14, 1.3039e-13, 4.9646e-14, 1.0904e-13,\n 2.2787e-15, 2.4953e-13, 1.5082e-13, 3.2214e-12, 2.5220e-14, 5.9036e-14,\n 9.6614e-15, 8.5136e-13, 7.5302e-12, 1.6353e-12, 1.2367e-13, 1.7527e-13,\n 1.4234e-12, 2.4858e-14, 2.8189e-13, 3.0134e-12, 6.8597e-14, 1.1014e-12,\n 7.4657e-14, 7.1010e-14, 2.6861e-12, 5.4884e-13, 3.8630e-16, 3.4499e-14,\n 2.3481e-14, 3.3344e-14, 2.6056e-16, 1.0828e-14, 1.0484e-15, 4.1288e-15,\n 2.1766e-13, 1.8491e-14, 1.9684e-15, 5.6311e-13, 8.4865e-14, 4.1495e-13,\n 8.6598e-16, 6.6872e-14, 4.6583e-13, 7.6836e-15, 1.6670e-12, 9.9768e-15,\n 3.3400e-13, 1.3757e-13, 4.7058e-13, 6.9132e-14, 3.1393e-13, 3.4809e-12,\n 5.3102e-15, 2.3972e-15, 2.0791e-13, 8.5005e-14, 2.4780e-15, 3.5940e-14,\n 1.6147e-14, 1.9044e-13, 4.2053e-15, 5.9094e-14, 2.6458e-13, 1.0366e-16,\n 4.0272e-14, 2.0074e-15, 1.5870e-16, 2.8389e-14, 2.2686e-16, 1.6247e-14,\n 2.7400e-13, 4.7225e-13, 2.9205e-13, 2.2303e-12, 2.2147e-15, 1.3897e-14,\n 9.2194e-13, 1.8366e-15, 3.5575e-15, 2.9001e-16, 3.9203e-15, 3.3425e-15,\n 2.3781e-13, 3.6977e-14, 4.3456e-14, 1.7977e-13, 8.6883e-14, 1.4132e-14,\n 3.5375e-14, 1.0983e-14, 2.3639e-14, 6.7813e-17, 6.0306e-14, 5.0377e-14,\n 1.9196e-13, 3.5268e-12, 2.4492e-13, 1.4117e-15, 1.2234e-13, 6.0030e-12,\n 1.5482e-13, 2.6558e-14, 1.6402e-14, 2.1318e-14, 1.6996e-14, 4.2805e-14,\n 8.3387e-15, 5.8139e-15, 4.5691e-15, 4.4317e-13, 3.7500e-15, 1.8376e-15,\n 1.8839e-15, 8.5251e-13, 2.2469e-14, 1.4450e-13, 1.3117e-13, 1.1489e-16,\n 3.6141e-15, 1.3656e-12, 8.4558e-15, 8.8431e-13, 8.1294e-16, 1.7178e-13,\n 2.5494e-12, 4.0779e-14, 1.3093e-12, 2.8773e-14, 6.1543e-14, 2.2750e-16,\n 3.1303e-15, 3.3521e-14, 1.3284e-13, 1.7459e-16, 1.0948e-12, 2.3344e-14,\n 5.2716e-13, 3.7465e-13, 1.8423e-14, 1.2747e-14, 1.6509e-15, 6.7901e-14,\n 1.8926e-14, 7.7776e-15, 2.7713e-14, 1.2253e-13, 6.5397e-13, 3.8879e-17,\n 2.1636e-13, 3.5928e-13, 2.8315e-13, 8.8666e-12, 2.8392e-15, 1.5439e-13,\n 2.7570e-14, 2.2810e-13, 7.8401e-14, 2.5566e-13, 8.0573e-14, 4.2953e-14,\n 8.7667e-14, 7.8956e-14, 4.5773e-16, 1.6826e-13, 4.0025e-16, 1.9251e-12,\n 1.4644e-14, 3.6014e-15, 2.9648e-15, 1.9403e-12, 6.0338e-13, 1.4426e-13,\n 1.0672e-13, 1.6672e-12, 5.4848e-12, 1.4986e-15, 4.6063e-15, 4.9422e-13,\n 2.2037e-15, 3.2476e-13, 2.5169e-14, 6.8450e-15, 1.5436e-12, 1.2453e-12,\n 1.3791e-15, 5.2254e-15, 1.1539e-12, 6.4021e-14, 9.0518e-14, 4.8376e-14,\n 5.9978e-14, 1.0736e-15, 3.6560e-14, 1.2041e-15, 1.8361e-13, 3.0307e-14,\n 8.8918e-16, 1.5282e-13, 1.2741e-12, 6.2973e-14, 1.9620e-13, 5.0544e-14,\n 1.4701e-12, 1.4105e-14, 5.9116e-13, 1.0322e-13, 1.7840e-14, 7.8568e-17,\n 4.7163e-14, 1.4040e-12, 2.6538e-14, 2.9116e-12], device='cuda:0')" + }, + "29": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0787e-17, 3.4276e-15, 8.2893e-17, 9.2334e-18, 1.3908e-17, 3.5783e-17,\n 4.8422e-18, 3.9145e-16, 1.0773e-16, 2.2215e-17, 8.4544e-17, 4.2323e-18,\n 2.0846e-17, 1.7827e-16, 2.2656e-17, 5.3455e-17, 5.0301e-19, 2.0031e-16,\n 3.9775e-18, 1.2066e-15, 2.3957e-17, 8.4969e-17, 9.4229e-15, 1.5699e-16,\n 1.6042e-15, 6.5836e-19, 2.8926e-18, 1.4479e-15, 1.7685e-18, 9.5964e-20,\n 3.1368e-18, 3.3625e-16, 5.4259e-17, 2.9931e-16, 4.2776e-18, 9.2822e-17,\n 6.2645e-18, 4.3409e-18, 1.7069e-16, 2.9910e-18, 6.6052e-17, 3.5751e-17,\n 1.9408e-15, 1.6413e-16, 5.1793e-17, 3.9717e-16, 2.5944e-18, 4.4633e-17,\n 3.6968e-18, 3.0016e-16, 7.9492e-17, 9.5524e-15, 1.4708e-17, 4.3049e-18,\n 2.2288e-17, 6.8027e-16, 1.6595e-14, 7.3661e-16, 4.6326e-17, 1.0108e-16,\n 1.9641e-16, 4.7138e-17, 7.6138e-17, 1.5008e-15, 3.4603e-18, 5.2940e-16,\n 6.7716e-17, 4.1873e-16, 1.0058e-14, 8.7460e-16, 4.5101e-18, 7.0350e-17,\n 9.0493e-18, 8.6474e-17, 3.3545e-17, 1.4830e-18, 1.3889e-17, 4.8920e-19,\n 5.7734e-17, 1.5598e-17, 5.0881e-20, 3.5402e-16, 4.1934e-18, 3.6629e-15,\n 1.1854e-18, 8.0583e-16, 6.5344e-17, 8.4907e-18, 3.4225e-15, 2.9605e-17,\n 8.7038e-16, 1.0507e-17, 1.3142e-16, 4.5063e-18, 6.4851e-17, 3.6926e-15,\n 1.6783e-16, 6.7518e-19, 1.4405e-16, 5.6231e-17, 1.8912e-17, 2.5119e-18,\n 7.2154e-18, 2.7460e-17, 5.6308e-18, 2.0401e-16, 5.6240e-16, 1.4199e-19,\n 8.8668e-18, 8.4564e-18, 4.3822e-18, 7.4117e-17, 4.4497e-17, 9.8335e-19,\n 1.2680e-15, 5.6621e-16, 2.0017e-16, 4.9837e-15, 1.1031e-19, 4.0350e-18,\n 5.2562e-16, 4.6491e-18, 5.7179e-19, 1.1734e-18, 1.8932e-18, 4.2116e-19,\n 4.4503e-18, 1.9819e-18, 2.5090e-18, 1.4495e-17, 1.5699e-17, 2.9388e-19,\n 1.1430e-18, 6.1362e-18, 8.6112e-18, 2.0162e-18, 3.2155e-17, 5.1464e-17,\n 3.9484e-16, 1.1176e-14, 3.6511e-17, 8.8419e-18, 2.3350e-17, 2.6393e-15,\n 2.8577e-17, 3.2031e-17, 1.6876e-18, 4.8815e-19, 2.0049e-18, 1.4570e-17,\n 2.7429e-18, 8.5853e-19, 6.4243e-18, 1.4727e-15, 8.8251e-19, 4.1948e-18,\n 1.7220e-19, 5.6173e-16, 8.3776e-20, 1.0069e-17, 5.4046e-17, 1.1857e-18,\n 3.2927e-17, 4.8691e-15, 4.8883e-19, 8.1479e-17, 8.0398e-18, 9.1787e-17,\n 3.4162e-15, 6.5067e-17, 2.7724e-15, 5.7912e-19, 7.4816e-17, 1.3300e-18,\n 2.3198e-17, 5.6027e-17, 1.6750e-17, 4.9967e-18, 2.4146e-16, 7.5002e-19,\n 5.5345e-17, 3.9196e-16, 1.0202e-16, 7.8617e-17, 9.3379e-18, 4.1571e-17,\n 4.7888e-17, 3.8280e-18, 2.8752e-17, 7.0185e-17, 3.1501e-16, 1.0031e-17,\n 4.0602e-17, 2.8345e-16, 2.6716e-17, 2.5710e-14, 5.3061e-20, 1.1050e-17,\n 1.1397e-18, 3.3643e-17, 4.7332e-18, 3.4064e-16, 6.3199e-18, 1.0741e-16,\n 2.3375e-17, 1.3692e-18, 3.0769e-18, 3.0508e-17, 3.1796e-18, 3.9432e-15,\n 9.7325e-18, 1.0825e-18, 2.2325e-18, 6.5372e-15, 2.4424e-17, 2.3105e-17,\n 1.1648e-17, 3.8382e-15, 1.0482e-14, 1.5144e-17, 2.1330e-18, 1.4348e-15,\n 2.8595e-19, 1.2711e-16, 1.2713e-17, 4.6559e-19, 2.9157e-16, 9.1619e-16,\n 4.7760e-20, 4.4354e-18, 2.0614e-16, 4.9179e-17, 4.4801e-16, 1.2655e-15,\n 4.6405e-18, 6.3162e-18, 4.9976e-17, 4.0665e-18, 3.3186e-16, 2.6987e-19,\n 1.3521e-17, 1.9376e-16, 1.4774e-15, 4.5247e-17, 1.7293e-16, 8.1456e-18,\n 1.2930e-15, 4.6792e-19, 4.3905e-16, 2.4648e-16, 2.4895e-18, 4.9321e-20,\n 5.2966e-18, 9.3272e-17, 2.0392e-18, 5.6909e-15], device='cuda:0')" + }, + "30": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([2.3846e-18, 2.8094e-15, 3.2838e-16, 4.9867e-16, 1.3020e-16, 1.6672e-17,\n 2.9074e-16, 1.4463e-15, 8.5073e-16, 3.5179e-17, 2.3994e-17, 3.8845e-17,\n 1.1397e-16, 2.1295e-16, 4.9779e-17, 1.8437e-17, 1.8781e-18, 2.1514e-16,\n 1.0923e-16, 1.1096e-15, 8.7530e-18, 4.8110e-16, 2.0432e-15, 6.6615e-16,\n 9.2343e-16, 7.6758e-18, 2.8053e-17, 2.4960e-15, 5.1755e-20, 6.0641e-18,\n 1.0977e-17, 6.4385e-16, 3.5785e-16, 5.5983e-16, 8.9209e-18, 2.5206e-16,\n 1.0844e-16, 2.3962e-17, 2.9529e-16, 1.9524e-17, 1.1466e-16, 2.4473e-16,\n 2.0975e-15, 2.5837e-16, 1.3429e-16, 2.4568e-16, 3.9403e-17, 2.7124e-17,\n 1.3170e-18, 3.0317e-16, 2.2642e-16, 4.6309e-15, 5.2170e-17, 4.1307e-17,\n 2.8201e-17, 9.1487e-16, 1.0093e-14, 2.2274e-15, 1.2482e-16, 2.6024e-16,\n 1.8791e-15, 7.6742e-17, 2.7850e-16, 3.6573e-15, 1.0197e-16, 1.5283e-15,\n 1.1744e-16, 2.2495e-16, 3.8156e-15, 8.2725e-16, 4.1659e-19, 2.5870e-17,\n 5.1021e-17, 1.8761e-16, 4.2636e-17, 3.9183e-18, 4.5670e-18, 1.8876e-19,\n 3.2832e-16, 4.5649e-17, 1.2899e-18, 7.0871e-16, 1.1356e-16, 9.5399e-16,\n 7.0322e-18, 2.6088e-16, 5.3097e-16, 4.3089e-18, 2.4704e-15, 7.7990e-18,\n 6.0764e-16, 1.9726e-16, 4.9130e-16, 9.5034e-17, 4.4494e-16, 4.2320e-15,\n 1.1210e-16, 1.9724e-19, 3.6165e-16, 1.8349e-16, 1.0907e-17, 5.4050e-17,\n 4.7700e-17, 2.3883e-16, 2.0780e-18, 2.8974e-16, 4.4536e-16, 2.5481e-18,\n 1.1439e-16, 1.8086e-18, 1.9742e-18, 8.3107e-17, 4.3147e-17, 2.3182e-17,\n 3.0196e-16, 7.1933e-16, 4.5704e-16, 2.7119e-15, 4.2938e-18, 1.7621e-17,\n 9.8508e-16, 7.6363e-19, 5.8139e-18, 6.2498e-19, 2.4389e-17, 6.9284e-18,\n 2.5377e-16, 2.0053e-17, 2.1981e-17, 1.5739e-16, 1.3125e-16, 3.3563e-18,\n 2.9498e-17, 1.6788e-18, 2.3538e-17, 1.1672e-18, 1.3038e-16, 7.9860e-17,\n 3.2468e-16, 4.9283e-15, 3.4270e-16, 8.5299e-19, 2.4049e-16, 7.5416e-15,\n 1.3592e-16, 1.3380e-17, 3.3121e-17, 3.2882e-17, 3.0503e-17, 7.2119e-17,\n 6.3419e-19, 1.4388e-17, 2.1060e-18, 7.1307e-16, 4.4778e-18, 6.3666e-19,\n 2.0808e-20, 1.1900e-15, 1.5918e-17, 1.6966e-16, 6.1122e-17, 9.7123e-20,\n 1.3619e-17, 1.6561e-15, 1.5360e-17, 1.1739e-15, 5.7872e-18, 2.7750e-16,\n 2.9321e-15, 4.6315e-17, 1.3481e-15, 1.8121e-17, 1.9784e-16, 4.0244e-19,\n 1.1735e-17, 1.6398e-17, 1.0860e-16, 1.4199e-18, 1.2192e-15, 3.4108e-17,\n 6.2278e-16, 3.3472e-16, 2.5800e-17, 3.7648e-17, 3.0986e-18, 1.2504e-16,\n 1.0090e-16, 2.3276e-18, 1.3167e-16, 2.0254e-16, 9.1090e-16, 1.4848e-20,\n 3.1687e-16, 5.1971e-16, 2.7491e-16, 1.1838e-14, 2.3375e-19, 2.2197e-16,\n 4.1848e-17, 1.9919e-16, 6.2471e-17, 4.2102e-16, 6.5650e-17, 1.3115e-16,\n 8.9960e-17, 1.1228e-16, 2.2312e-18, 2.4441e-16, 3.9189e-18, 2.4879e-15,\n 3.5576e-18, 6.2130e-19, 6.5053e-19, 2.4911e-15, 8.0473e-16, 1.3431e-16,\n 1.5378e-16, 2.3687e-15, 7.3690e-15, 1.0455e-17, 8.7289e-18, 7.0679e-16,\n 3.9024e-18, 3.8065e-16, 9.5626e-17, 3.0667e-18, 2.0447e-15, 1.6201e-15,\n 1.4861e-18, 1.2593e-17, 1.4457e-15, 1.0875e-16, 1.4397e-16, 2.9357e-16,\n 2.6132e-17, 8.7299e-18, 1.6471e-16, 4.4829e-19, 4.8156e-16, 1.3674e-17,\n 1.1968e-18, 2.9490e-16, 1.4233e-15, 1.3658e-16, 2.8618e-16, 8.3410e-17,\n 2.0372e-15, 2.4162e-17, 8.7511e-16, 1.6143e-16, 2.6480e-17, 7.1423e-20,\n 5.5956e-17, 1.8514e-15, 3.8546e-17, 4.0383e-15], device='cuda:0')" + }, + "31": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2296e-15, 1.2808e-15, 6.4745e-19, ..., 8.9767e-17, 1.4683e-16,\n 4.2670e-17],\n [1.6580e-17, 2.1420e-17, 3.8502e-20, ..., 1.5573e-18, 1.9412e-18,\n 4.1946e-19],\n [1.0869e-15, 1.1964e-15, 4.7284e-19, ..., 9.5644e-17, 1.1532e-16,\n 4.1609e-17],\n ...,\n [7.8089e-15, 8.7674e-15, 1.7484e-19, ..., 7.0895e-16, 8.7702e-16,\n 3.9827e-16],\n [5.7711e-16, 6.6082e-16, 1.4115e-18, ..., 4.6321e-17, 7.9577e-17,\n 2.3436e-17],\n [6.6505e-18, 7.0099e-18, 1.2129e-21, ..., 4.6185e-19, 5.5965e-19,\n 1.2744e-19]], device='cuda:0')" + }, + "32": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.4221e-13, 7.7337e-15, 6.1741e-13, 2.4874e-14, 5.2905e-16, 8.4595e-15,\n 1.2429e-13, 1.2047e-12, 4.4880e-13, 2.5413e-14, 4.4566e-12, 3.4839e-14,\n 2.2756e-16, 2.1623e-16, 3.4475e-15, 8.3146e-14, 5.1822e-15, 4.0334e-13,\n 6.6921e-13, 2.5209e-13, 4.5352e-13, 1.2257e-14, 4.0581e-14, 1.7765e-12,\n 1.1764e-14, 1.5690e-13, 5.5795e-13, 1.0063e-14, 7.5946e-16, 2.1434e-13,\n 8.6201e-15, 7.5204e-13, 1.9481e-13, 1.6155e-13, 4.7770e-14, 5.0507e-13,\n 1.1978e-14, 6.4001e-14, 4.5925e-13, 3.4204e-14, 1.5930e-14, 1.3127e-13,\n 4.4159e-13, 1.1119e-14, 8.0377e-16, 4.1409e-15, 2.0828e-13, 3.3606e-15,\n 4.2797e-13, 3.8949e-13, 6.5921e-14, 3.9427e-13, 5.2108e-15, 1.9547e-13,\n 7.3362e-14, 5.6423e-13, 4.4511e-13, 1.6641e-12, 8.8724e-14, 3.1274e-13,\n 5.9542e-12, 4.2729e-15, 5.9328e-15, 5.6439e-13, 7.0960e-15, 1.1786e-12,\n 1.4174e-14, 1.1734e-14, 9.6787e-16, 2.2615e-13, 3.2768e-13, 4.7574e-13,\n 3.9286e-13, 1.0667e-13, 2.6766e-15, 2.7632e-15, 5.1195e-14, 3.6910e-14,\n 4.5227e-13, 1.5065e-14, 2.5423e-16, 1.9467e-12, 1.6451e-12, 4.8392e-16,\n 1.4539e-13, 4.2505e-14, 6.7664e-13, 2.0540e-13, 1.9668e-12, 8.3058e-15,\n 5.1587e-13, 2.3732e-13, 2.9755e-14, 9.6717e-15, 8.4468e-14, 1.4554e-12,\n 2.3675e-13, 3.0444e-15, 1.0937e-12, 8.2439e-14, 5.7917e-13, 1.5453e-15,\n 2.7375e-14, 1.3465e-13, 2.3417e-14, 1.1812e-13, 5.9510e-14, 7.3777e-16,\n 7.9890e-14, 4.9017e-15, 3.1662e-13, 3.0551e-18, 3.0688e-15, 1.4596e-15,\n 9.6637e-13, 1.5414e-13, 3.6410e-13, 8.1916e-13, 1.5408e-14, 1.9094e-12,\n 8.5235e-14, 7.7461e-14, 2.1076e-13, 9.7326e-16, 1.7841e-14, 7.1810e-15,\n 7.2015e-15, 1.1077e-13, 2.1789e-14, 8.6079e-15, 5.2915e-15, 1.5034e-13,\n 6.0234e-15, 1.0081e-16, 2.6516e-15, 1.5688e-14, 5.2218e-15, 2.1510e-14,\n 1.1153e-13, 1.8616e-12, 3.6012e-13, 2.1591e-14, 2.3548e-12, 1.2864e-11,\n 9.6046e-13, 3.2926e-14, 1.1763e-14, 4.7288e-13, 2.6823e-16, 4.4839e-14,\n 5.1900e-14, 2.1202e-15, 1.6256e-14, 4.4279e-13, 4.0739e-13, 1.5773e-13,\n 5.2365e-14, 1.0553e-12, 1.2832e-14, 7.2878e-14, 6.5013e-15, 1.0540e-15,\n 1.9983e-13, 4.4671e-13, 4.8005e-15, 2.9704e-13, 3.2646e-13, 1.0681e-14,\n 1.2109e-12, 3.7161e-15, 1.7241e-12, 4.5515e-13, 3.9857e-13, 8.8506e-16,\n 1.6398e-14, 4.5639e-14, 1.8121e-13, 7.2122e-13, 1.0884e-12, 2.2295e-15,\n 2.9116e-12, 2.3288e-15, 2.1898e-12, 2.3225e-14, 1.4054e-14, 1.4203e-13,\n 6.9231e-15, 1.2535e-14, 1.7073e-13, 3.7662e-14, 6.5743e-14, 2.1637e-14,\n 9.6043e-13, 4.5565e-13, 5.9462e-14, 8.6197e-12, 2.9284e-14, 2.5177e-15,\n 1.6570e-14, 2.2068e-14, 1.3692e-12, 6.9864e-15, 2.7391e-13, 3.4741e-15,\n 1.3459e-14, 2.6235e-13, 8.0671e-15, 9.2974e-13, 8.1771e-15, 3.3491e-12,\n 2.2958e-15, 3.1953e-13, 7.0044e-15, 8.5794e-13, 4.9416e-14, 1.5238e-14,\n 6.6371e-16, 3.5586e-14, 5.7262e-12, 4.7291e-13, 7.6013e-14, 5.0635e-15,\n 1.7732e-15, 8.7317e-14, 4.2224e-14, 1.1582e-13, 1.4911e-12, 6.3209e-14,\n 1.9771e-14, 1.5980e-14, 1.1118e-14, 5.1582e-14, 4.3680e-15, 2.5653e-13,\n 6.9015e-14, 3.5288e-13, 3.4422e-14, 1.9577e-15, 1.9416e-13, 5.3469e-13,\n 7.9815e-13, 5.6838e-15, 4.7495e-13, 3.3295e-13, 3.1126e-15, 4.7497e-15,\n 5.3169e-13, 1.5209e-13, 7.4595e-14, 8.1333e-15, 4.1117e-13, 1.4948e-15,\n 3.5300e-13, 5.0369e-12, 3.7174e-13, 3.3428e-15], device='cuda:0')" + }, + "33": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.3278e-17, 2.7436e-17, 2.2045e-16, 1.6220e-17, 5.7939e-17, 3.7107e-17,\n 5.8951e-18, 1.1106e-15, 5.1795e-17, 3.4918e-17, 6.7086e-15, 1.5157e-17,\n 1.1606e-17, 6.6293e-19, 7.1280e-17, 2.3591e-18, 4.4711e-18, 1.1429e-15,\n 3.6802e-16, 1.5687e-16, 2.8802e-16, 4.4273e-17, 2.0672e-18, 3.4288e-15,\n 3.2028e-18, 4.4386e-17, 3.2280e-16, 5.9490e-18, 6.7064e-18, 2.9815e-16,\n 1.4904e-18, 1.4463e-15, 2.5103e-17, 1.8634e-16, 6.9148e-18, 2.8783e-15,\n 5.9503e-18, 1.1281e-17, 2.6049e-16, 1.0174e-18, 6.9827e-18, 2.3828e-17,\n 1.6300e-16, 2.1701e-16, 9.3421e-19, 3.2619e-19, 7.0603e-17, 5.6141e-20,\n 1.4274e-15, 1.2483e-16, 4.0460e-17, 1.7992e-17, 2.2495e-17, 5.0133e-17,\n 7.5038e-17, 1.1743e-16, 5.5124e-17, 7.2048e-16, 2.0423e-17, 1.5641e-16,\n 7.4798e-15, 1.2932e-18, 2.0393e-17, 4.3031e-17, 1.0317e-18, 6.8231e-16,\n 7.4686e-19, 1.2302e-18, 3.0741e-17, 5.5399e-17, 1.6572e-16, 2.1647e-17,\n 3.5613e-17, 2.8366e-16, 1.7813e-17, 7.1022e-19, 6.9137e-19, 5.6563e-18,\n 1.2594e-16, 2.8112e-16, 4.3230e-20, 5.7095e-15, 1.2968e-15, 1.0679e-17,\n 6.7722e-17, 4.4802e-17, 3.1367e-16, 7.9972e-17, 6.8563e-15, 5.7892e-18,\n 2.4258e-15, 1.6084e-15, 6.5724e-19, 5.2803e-17, 7.2707e-18, 3.3767e-16,\n 8.3710e-16, 6.1834e-18, 4.6350e-16, 3.8035e-17, 6.5461e-16, 4.6639e-18,\n 1.7340e-17, 1.7070e-17, 5.5886e-18, 1.0417e-15, 3.0369e-18, 6.8295e-18,\n 1.3569e-17, 2.0606e-18, 1.3473e-16, 6.1794e-19, 1.3849e-17, 8.1141e-18,\n 1.5454e-15, 7.7205e-16, 3.1937e-16, 2.8449e-16, 3.0631e-18, 2.6066e-15,\n 3.2268e-18, 2.6233e-18, 3.8968e-16, 2.5562e-19, 1.1854e-17, 1.4920e-18,\n 6.4443e-18, 2.2063e-17, 2.3560e-17, 1.3543e-17, 2.8759e-17, 8.0014e-17,\n 3.5317e-18, 8.7987e-20, 1.6447e-18, 3.0441e-18, 1.3283e-17, 7.1035e-18,\n 5.6519e-17, 1.5571e-15, 8.2010e-16, 7.3560e-18, 2.6393e-15, 5.1371e-14,\n 9.8270e-16, 1.0218e-17, 1.4587e-18, 3.0805e-16, 6.0703e-19, 7.5100e-17,\n 2.1108e-16, 2.8969e-18, 2.4505e-17, 3.4580e-16, 6.6374e-17, 1.0295e-15,\n 3.8893e-18, 4.3996e-16, 3.8230e-18, 2.0676e-16, 1.6571e-18, 2.1215e-18,\n 5.4749e-18, 4.5276e-16, 2.7223e-18, 2.7333e-18, 8.0178e-16, 8.8847e-19,\n 1.6479e-15, 2.8886e-18, 3.8029e-15, 1.4073e-16, 5.3536e-16, 4.8278e-19,\n 3.8801e-18, 1.8786e-17, 4.7187e-17, 2.5905e-16, 1.5963e-16, 8.3565e-18,\n 2.0341e-15, 4.8334e-18, 6.2362e-16, 5.8577e-17, 1.3021e-18, 4.5466e-17,\n 3.0644e-18, 1.9109e-17, 3.9393e-16, 1.6665e-18, 3.5295e-18, 5.3028e-18,\n 4.1418e-15, 6.3896e-16, 2.4999e-18, 2.1273e-14, 7.5585e-19, 8.6735e-18,\n 1.7989e-18, 2.7265e-18, 1.0787e-15, 3.3129e-17, 5.5047e-17, 1.3276e-17,\n 1.2314e-17, 5.5283e-17, 7.8173e-17, 4.8068e-16, 4.3228e-18, 1.0516e-14,\n 5.7568e-18, 8.9261e-17, 6.4359e-19, 1.2445e-15, 2.5938e-17, 2.7274e-17,\n 3.4400e-18, 2.2590e-18, 8.8481e-15, 1.3020e-15, 1.4174e-16, 1.2617e-18,\n 1.1694e-19, 1.5296e-16, 1.8987e-17, 2.2893e-17, 2.9925e-16, 7.4168e-18,\n 1.0026e-18, 3.7940e-18, 4.9275e-17, 1.5632e-18, 8.4685e-19, 4.1833e-16,\n 1.2035e-17, 1.0645e-16, 1.0400e-17, 3.8114e-19, 1.2730e-15, 5.4540e-16,\n 1.5462e-15, 1.0169e-18, 4.3533e-17, 1.6090e-16, 1.0936e-18, 1.8904e-19,\n 2.1253e-16, 7.2556e-17, 5.6000e-18, 1.0719e-17, 1.0478e-16, 4.8194e-19,\n 4.4419e-16, 3.8487e-15, 1.7779e-16, 3.5146e-17], device='cuda:0')" + }, + "34": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([7.8299e-16, 1.0243e-17, 8.8198e-16, 2.1514e-17, 5.8652e-17, 1.3489e-17,\n 1.8015e-16, 1.7208e-15, 6.2247e-16, 5.7332e-17, 5.3259e-15, 1.4243e-16,\n 4.0293e-18, 7.5210e-19, 8.2126e-17, 8.1024e-17, 1.3647e-17, 6.5873e-16,\n 7.4507e-16, 5.0389e-16, 7.1152e-16, 1.8175e-17, 3.3576e-17, 2.4743e-15,\n 5.7805e-17, 2.3264e-16, 7.4327e-16, 3.0533e-18, 2.2598e-18, 3.2990e-16,\n 7.0329e-18, 1.0151e-15, 2.8739e-16, 4.6568e-16, 4.1668e-18, 9.6531e-16,\n 4.1948e-17, 2.8275e-17, 6.6480e-16, 4.3684e-18, 4.6283e-17, 2.0290e-16,\n 7.3675e-16, 1.0402e-16, 9.0182e-18, 1.0090e-17, 1.8616e-16, 6.0644e-19,\n 6.3666e-16, 5.0598e-16, 1.0117e-16, 5.4323e-16, 1.9041e-17, 1.4908e-16,\n 1.2401e-16, 6.3753e-16, 5.9789e-16, 2.2515e-15, 9.9865e-17, 4.0525e-16,\n 7.8448e-15, 7.3331e-18, 5.2296e-18, 6.8922e-16, 1.1934e-17, 1.6281e-15,\n 1.6794e-19, 2.1454e-17, 3.2520e-18, 3.3138e-16, 4.6144e-16, 5.0640e-16,\n 5.1949e-16, 3.1689e-16, 2.0721e-17, 3.9696e-19, 7.6461e-17, 2.5782e-17,\n 6.1363e-16, 1.4397e-16, 4.6311e-19, 2.2511e-15, 2.2250e-15, 4.1979e-18,\n 1.4497e-16, 1.0081e-16, 7.8905e-16, 3.0280e-16, 2.9046e-15, 5.9299e-18,\n 9.3932e-16, 5.6563e-16, 1.4672e-17, 1.5286e-17, 1.2869e-16, 1.8500e-15,\n 4.4579e-16, 1.5623e-17, 1.5165e-15, 2.0847e-16, 8.6986e-16, 1.7664e-18,\n 1.2812e-16, 1.6357e-16, 1.2710e-17, 5.2314e-16, 8.8551e-17, 2.0664e-18,\n 1.5642e-16, 1.0731e-18, 4.6202e-16, 1.0504e-19, 2.7230e-17, 3.6792e-18,\n 1.1704e-15, 3.3634e-16, 5.5776e-16, 1.0448e-15, 2.8892e-17, 2.5916e-15,\n 6.5323e-17, 8.2519e-17, 3.5524e-16, 4.8165e-18, 8.0818e-17, 5.2972e-19,\n 2.5702e-18, 5.7692e-17, 9.5114e-18, 4.5542e-18, 1.0230e-17, 7.1027e-17,\n 5.9786e-18, 5.1729e-18, 7.7228e-18, 2.7027e-17, 5.0454e-18, 3.8995e-17,\n 1.7908e-16, 2.5319e-15, 5.7829e-16, 1.5460e-17, 3.0274e-15, 1.5977e-14,\n 9.5481e-16, 4.1851e-17, 5.0015e-17, 6.8819e-16, 7.5785e-19, 6.4964e-17,\n 2.6174e-16, 1.2245e-17, 8.1537e-18, 6.4933e-16, 5.7461e-16, 2.5181e-16,\n 2.2281e-17, 1.4407e-15, 2.3905e-17, 8.1654e-17, 5.1926e-19, 1.2495e-18,\n 2.7551e-16, 6.1936e-16, 7.0358e-19, 4.0528e-16, 5.5133e-16, 2.1948e-17,\n 1.3172e-15, 9.1864e-19, 1.8732e-15, 5.0781e-16, 6.8201e-16, 1.1528e-18,\n 3.4445e-17, 6.5103e-17, 1.5799e-16, 8.7150e-16, 1.2227e-15, 3.1556e-18,\n 3.4558e-15, 3.7945e-19, 2.9291e-15, 6.5364e-17, 2.6790e-17, 2.2530e-16,\n 3.7684e-17, 8.1331e-18, 4.3602e-16, 6.0720e-17, 9.8149e-17, 3.1167e-17,\n 1.5980e-15, 6.4693e-16, 4.5165e-17, 1.1393e-14, 1.0516e-18, 4.7478e-18,\n 2.8384e-17, 1.3781e-17, 1.4743e-15, 5.2322e-17, 2.5461e-16, 4.5801e-17,\n 1.1123e-17, 3.8151e-16, 3.5640e-17, 1.2736e-15, 2.4621e-17, 4.2167e-15,\n 2.3139e-19, 3.4038e-16, 3.0745e-18, 1.1846e-15, 6.6677e-17, 9.0983e-18,\n 1.1501e-18, 5.5186e-17, 7.6063e-15, 7.9544e-16, 2.0818e-16, 3.0518e-17,\n 5.3015e-18, 1.3305e-16, 1.3885e-16, 9.6504e-17, 2.0037e-15, 1.2617e-16,\n 3.4730e-17, 3.5107e-18, 1.4205e-17, 7.9692e-17, 4.2116e-19, 4.8474e-16,\n 3.1989e-17, 5.2311e-16, 1.3225e-16, 4.7012e-18, 6.4900e-16, 4.9920e-16,\n 1.1678e-15, 1.1876e-17, 5.3274e-16, 5.1506e-16, 2.4728e-17, 9.8327e-18,\n 7.4854e-16, 2.3551e-16, 1.0936e-16, 4.9876e-17, 5.8796e-16, 1.3995e-19,\n 3.9734e-16, 6.6411e-15, 5.2545e-16, 1.0373e-17], device='cuda:0')" + }, + "35": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[6.4676e-17, 7.6351e-17, 3.9828e-20, ..., 5.7821e-18, 6.6614e-18,\n 2.8502e-18],\n [3.3143e-18, 1.8893e-18, 1.9339e-19, ..., 8.1606e-20, 9.5223e-19,\n 4.3955e-19],\n [2.2946e-15, 2.6355e-15, 3.2504e-18, ..., 1.8619e-16, 3.1752e-16,\n 8.2547e-17],\n ...,\n [7.6805e-15, 7.9047e-15, 8.6512e-18, ..., 5.2038e-16, 9.3085e-16,\n 2.5598e-16],\n [2.1278e-19, 7.3422e-21, 8.0452e-20, ..., 4.0370e-20, 4.6883e-20,\n 1.3685e-20],\n [4.3037e-15, 4.9013e-15, 3.1293e-18, ..., 3.6676e-16, 5.8842e-16,\n 1.5315e-16]], device='cuda:0')" + }, + "36": { + "step": "tensor(15024.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.7404e-14, 1.2253e-16, 1.3467e-12, 6.0767e-12, 1.4845e-13, 3.1678e-15,\n 8.8836e-13, 5.2250e-13, 3.7186e-13, 8.5798e-16, 5.0437e-14, 2.7530e-16,\n 2.8599e-13, 1.7065e-14, 8.8634e-15, 1.1396e-13, 1.5302e-14, 1.4381e-13,\n 5.4671e-15, 1.2298e-13, 3.1275e-14, 7.0254e-16, 9.3110e-13, 1.8855e-12,\n 1.4946e-13, 2.2546e-14, 2.2777e-13, 2.0156e-13, 8.4300e-15, 4.8237e-14,\n 7.3027e-14, 1.1265e-13, 2.2612e-13, 1.8168e-13, 2.0170e-15, 1.5415e-13,\n 5.3489e-13, 7.3244e-14, 4.0258e-13, 3.0692e-15, 8.2029e-13, 4.7747e-15,\n 5.1932e-13, 6.2591e-13, 1.2792e-14, 1.6124e-15, 5.1530e-14, 2.0651e-14,\n 6.3863e-15, 7.9622e-13, 1.6352e-13, 7.7756e-13, 2.4650e-16, 9.8889e-14,\n 3.0413e-14, 9.0796e-15, 2.1616e-12, 3.2293e-14, 1.9506e-13, 3.3329e-16,\n 1.0200e-15, 2.3291e-15, 2.9649e-13, 1.4885e-13, 1.1572e-13, 7.0115e-13,\n 1.4192e-13, 5.1344e-15, 1.6752e-13, 6.7110e-13, 1.3641e-13, 8.2236e-14,\n 5.4982e-14, 8.2386e-16, 1.4777e-16, 7.0829e-14, 1.2559e-13, 1.4610e-14,\n 1.2343e-12, 1.2500e-16, 1.7227e-15, 1.3585e-13, 1.8547e-12, 3.9331e-16,\n 1.5028e-13, 4.5179e-15, 1.5999e-13, 1.9043e-14, 2.3957e-13, 1.6708e-12,\n 2.4381e-13, 5.9379e-14, 1.0026e-14, 1.6869e-13, 1.1154e-12, 5.8851e-12,\n 2.2815e-15, 3.0950e-15, 3.6912e-14, 1.0699e-13, 2.0792e-13, 4.0035e-14,\n 5.3824e-14, 1.8664e-13, 1.1423e-12, 2.7314e-15, 2.3745e-16, 6.7682e-14,\n 8.2891e-14, 6.4110e-13, 6.3984e-13, 8.4808e-16, 5.2204e-16, 6.7125e-15,\n 9.1233e-15, 2.0987e-13, 2.0245e-13, 5.4583e-14, 2.3863e-13, 1.0830e-14,\n 1.8586e-12, 6.5979e-13, 8.0654e-16, 6.0372e-14, 1.8192e-13, 5.2804e-15,\n 1.8442e-15, 4.5419e-14, 1.5578e-15, 1.0715e-13, 1.5039e-14, 6.3261e-14,\n 5.4559e-13, 8.3248e-14, 5.7526e-14, 5.1675e-14, 2.2690e-13, 2.3962e-14,\n 4.5302e-14, 5.4901e-16, 8.4240e-15, 1.2159e-12, 3.5045e-13, 3.8037e-14,\n 1.2326e-12, 6.6674e-16, 2.2344e-16, 2.7732e-13, 3.8410e-14, 1.2391e-14,\n 7.6749e-15, 8.4801e-14, 4.5642e-14, 1.8924e-15, 8.2019e-15, 1.2280e-14,\n 7.7123e-15, 2.0795e-12, 6.1761e-13, 2.1137e-12, 4.7188e-14, 6.4969e-14,\n 2.3755e-12, 6.8147e-17, 4.5123e-14, 6.6413e-12, 7.6254e-13, 1.4191e-13,\n 1.7904e-12, 2.4707e-15, 4.3924e-14, 3.8957e-13, 7.9061e-14, 1.6954e-17,\n 5.2453e-14, 2.0370e-12, 1.1739e-14, 2.5556e-15, 3.0311e-12, 8.9727e-13,\n 1.0699e-15, 9.3506e-15, 1.1514e-15, 5.5140e-14, 2.1965e-13, 1.2426e-16,\n 7.7690e-15, 1.7670e-12, 9.3385e-15, 2.5570e-16, 1.9673e-15, 3.5408e-13,\n 4.1491e-13, 3.4584e-13, 4.5305e-13, 5.0840e-14, 5.6849e-16, 1.4358e-12,\n 1.8914e-13, 7.7792e-14, 1.0736e-13, 4.9515e-14, 4.2468e-14, 8.6472e-15,\n 5.3897e-15, 7.7378e-15, 4.8491e-15, 2.9170e-14, 1.8467e-15, 4.3765e-13,\n 1.5505e-13, 1.2284e-14, 1.4080e-14, 6.3412e-14, 3.8918e-12, 5.1535e-15,\n 2.3381e-14, 2.3904e-15, 1.1215e-12, 1.6660e-12, 1.8800e-16, 8.6931e-14,\n 6.4419e-14, 2.2592e-13, 3.8758e-16, 5.4168e-15, 7.4620e-13, 8.4833e-13,\n 2.3700e-16, 5.2118e-16, 1.2253e-16, 1.3038e-14, 1.2275e-13, 3.6623e-15,\n 2.8260e-14, 3.2702e-13, 5.5565e-14, 7.8568e-17, 4.5280e-13, 5.6570e-15,\n 7.2005e-13, 5.5642e-13, 1.5277e-12, 1.1369e-12, 2.7110e-14, 2.3628e-15,\n 3.6677e-15, 3.9851e-13, 9.0774e-13, 1.1625e-14, 1.1535e-12, 2.9361e-15,\n 1.9169e-13, 3.9360e-12, 4.4135e-17, 2.5207e-12], device='cuda:0')" + }, + "37": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([1.0567e-16, 1.6933e-17, 3.1715e-15, 1.1284e-14, 2.6763e-17, 3.3289e-17,\n 4.1176e-16, 1.7337e-16, 5.3248e-17, 1.8008e-19, 1.3703e-16, 6.2492e-18,\n 5.2234e-16, 3.0283e-17, 7.0548e-17, 2.5016e-18, 1.0375e-16, 5.8772e-17,\n 4.1467e-18, 9.4714e-17, 1.2894e-17, 5.0716e-18, 3.1831e-16, 9.3260e-15,\n 3.6691e-16, 6.4477e-18, 1.8808e-16, 7.5560e-18, 2.3122e-17, 5.6110e-18,\n 6.6684e-17, 2.2788e-16, 2.5188e-17, 2.4164e-16, 4.9287e-19, 1.0754e-15,\n 1.3632e-15, 3.1812e-17, 6.8073e-16, 1.1813e-17, 2.3924e-15, 1.8157e-18,\n 4.2657e-16, 2.1762e-15, 6.8650e-18, 2.8221e-17, 3.8919e-18, 6.3968e-19,\n 1.9026e-18, 6.2263e-16, 1.5936e-16, 2.1684e-16, 9.3671e-19, 7.3912e-18,\n 3.8299e-18, 1.4094e-18, 5.6272e-16, 1.3621e-16, 1.6487e-17, 4.9295e-18,\n 3.0567e-17, 2.7094e-18, 1.7504e-16, 4.4005e-18, 1.9848e-17, 1.8838e-16,\n 1.6802e-16, 9.4010e-19, 9.9352e-18, 1.4853e-15, 5.9986e-17, 3.9246e-18,\n 1.5024e-17, 4.3620e-18, 6.8659e-19, 4.5345e-18, 3.3146e-17, 4.0518e-18,\n 1.5086e-15, 1.2371e-19, 1.6542e-18, 1.1731e-17, 1.9458e-15, 1.1111e-17,\n 2.5304e-16, 5.3889e-19, 5.2534e-17, 3.5907e-18, 9.3423e-17, 9.8808e-16,\n 2.2188e-16, 1.0455e-17, 8.5026e-18, 7.4690e-18, 1.7727e-15, 2.4871e-14,\n 1.2808e-17, 7.1817e-18, 1.3269e-17, 2.8054e-16, 3.0155e-17, 2.6759e-18,\n 1.4700e-16, 6.0237e-17, 8.7679e-16, 3.0870e-19, 6.8513e-19, 2.3701e-16,\n 1.6918e-17, 8.2819e-16, 2.5098e-15, 8.1547e-19, 2.0514e-19, 6.7305e-19,\n 1.2311e-17, 7.8744e-17, 1.5634e-16, 7.0920e-18, 8.4938e-17, 5.1185e-17,\n 1.3796e-15, 1.4304e-15, 1.9505e-19, 6.8967e-17, 1.3442e-15, 5.2938e-19,\n 4.7262e-18, 5.8375e-18, 8.7739e-19, 2.2951e-17, 1.2505e-18, 2.9313e-17,\n 3.0372e-16, 1.6490e-17, 6.7495e-18, 2.5766e-18, 2.4225e-16, 1.2948e-18,\n 8.5985e-17, 1.8687e-17, 9.9874e-19, 7.8466e-16, 8.4111e-17, 2.2649e-16,\n 3.4645e-15, 8.9660e-19, 9.4929e-19, 2.6218e-16, 7.8288e-17, 2.2485e-18,\n 3.0578e-18, 8.4444e-17, 1.9285e-18, 9.0410e-19, 4.4558e-19, 4.1966e-16,\n 1.2803e-18, 7.7931e-15, 2.7825e-16, 6.3114e-16, 6.0599e-18, 6.1200e-17,\n 1.0017e-14, 4.0236e-18, 3.1031e-18, 1.2299e-14, 1.8771e-15, 3.2520e-16,\n 1.9824e-15, 1.4231e-17, 5.3314e-20, 1.8030e-16, 5.9494e-17, 1.3407e-17,\n 1.5858e-18, 2.1676e-15, 9.7329e-18, 6.1063e-18, 3.4162e-15, 1.7952e-15,\n 1.6300e-17, 2.0350e-17, 5.2651e-17, 4.3184e-17, 4.3274e-17, 1.1318e-18,\n 1.8603e-18, 2.6741e-15, 2.8726e-18, 1.4513e-18, 1.3940e-17, 5.3336e-17,\n 1.3816e-15, 5.8701e-16, 1.6835e-16, 2.2069e-16, 4.0111e-20, 7.7456e-15,\n 2.1779e-16, 2.5970e-18, 1.1058e-17, 1.6053e-18, 4.8848e-18, 1.0073e-16,\n 7.3320e-19, 1.4103e-18, 6.2406e-17, 1.7917e-19, 2.0280e-17, 1.6313e-16,\n 1.0704e-16, 1.0373e-18, 5.4384e-19, 1.8303e-17, 5.2227e-15, 1.6341e-19,\n 4.1597e-19, 1.7446e-18, 1.1835e-16, 3.8012e-15, 2.3380e-18, 2.5798e-17,\n 4.8930e-17, 4.2639e-16, 8.4453e-18, 8.8259e-18, 4.7657e-17, 3.2374e-16,\n 5.1248e-18, 2.9572e-18, 1.9689e-17, 3.0209e-18, 2.6314e-16, 2.4205e-18,\n 9.1819e-19, 5.2766e-16, 5.9112e-17, 3.5644e-19, 1.5909e-15, 1.3628e-18,\n 1.2655e-16, 7.5293e-16, 1.8326e-15, 3.4842e-15, 1.0101e-17, 1.1684e-17,\n 2.1920e-17, 2.7008e-16, 3.1066e-15, 7.1117e-18, 2.7518e-15, 2.1190e-19,\n 1.0408e-16, 2.8630e-15, 1.3892e-18, 1.6694e-14], device='cuda:0')" + }, + "38": { + "step": "tensor(15024.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.8724e-17, 5.2119e-20, 2.1313e-15, 7.6025e-15, 2.3417e-16, 7.6431e-18,\n 1.2798e-15, 8.2241e-16, 5.5656e-16, 6.2158e-19, 3.8120e-17, 2.3230e-18,\n 5.8748e-16, 1.1334e-16, 7.8489e-17, 1.0974e-16, 9.0146e-17, 2.3789e-16,\n 4.8198e-18, 3.0413e-16, 1.0325e-16, 2.8979e-18, 1.1602e-15, 2.8518e-15,\n 4.5714e-16, 2.7496e-17, 4.0229e-16, 2.0656e-16, 8.7764e-18, 8.2290e-17,\n 1.1302e-16, 2.9314e-16, 3.4139e-16, 5.4499e-16, 4.5864e-18, 5.1401e-16,\n 9.4148e-16, 2.9679e-17, 6.7218e-16, 1.7554e-17, 1.1517e-15, 9.8753e-18,\n 9.6601e-16, 8.7389e-16, 6.5669e-17, 4.7124e-17, 3.6330e-17, 2.2265e-18,\n 6.3592e-19, 1.0363e-15, 2.5492e-16, 1.1285e-15, 8.5407e-19, 7.3746e-17,\n 1.5834e-17, 6.4053e-18, 3.0127e-15, 5.2858e-17, 2.3914e-16, 4.3606e-18,\n 9.8606e-21, 2.1763e-17, 2.9093e-16, 1.9608e-16, 1.7823e-16, 1.0340e-15,\n 1.9210e-16, 1.0424e-17, 2.4869e-16, 1.0612e-15, 2.1401e-16, 7.0365e-17,\n 1.3832e-16, 1.4278e-18, 4.3718e-19, 5.6073e-17, 2.0472e-16, 3.5944e-18,\n 1.6974e-15, 5.1798e-19, 3.7596e-18, 1.6992e-16, 2.6592e-15, 2.6831e-18,\n 1.8382e-16, 1.2363e-17, 1.4478e-16, 3.4894e-17, 3.9916e-16, 2.1545e-15,\n 4.1321e-16, 1.0590e-16, 3.0832e-18, 2.5261e-16, 1.6478e-15, 7.6493e-15,\n 6.1429e-18, 1.9367e-17, 1.0814e-16, 3.0513e-16, 3.1701e-16, 6.5509e-17,\n 2.5259e-16, 2.0793e-16, 1.3713e-15, 2.2168e-17, 8.3031e-19, 2.6170e-16,\n 1.4473e-16, 9.6056e-16, 1.0688e-15, 1.5839e-17, 5.0140e-18, 1.0651e-17,\n 4.0457e-18, 3.3694e-16, 3.3428e-16, 9.4150e-17, 3.6333e-16, 1.8067e-17,\n 2.1875e-15, 7.9080e-16, 1.8626e-18, 1.2726e-16, 5.9262e-16, 1.1736e-17,\n 4.5179e-20, 3.7082e-18, 2.6850e-21, 7.0343e-17, 2.6998e-17, 1.0288e-16,\n 5.9925e-16, 1.4076e-16, 9.2919e-17, 8.0691e-17, 4.4089e-16, 1.0119e-17,\n 8.9050e-17, 1.0358e-18, 1.4154e-17, 1.4073e-15, 5.7538e-16, 5.3515e-17,\n 1.2529e-15, 1.5942e-17, 1.4495e-19, 4.4215e-16, 9.9316e-17, 7.3865e-18,\n 2.8650e-17, 2.6168e-16, 3.9707e-17, 3.1254e-18, 1.3898e-17, 1.2974e-16,\n 3.7841e-19, 3.0352e-15, 9.0744e-16, 2.6746e-15, 1.5616e-17, 1.1677e-16,\n 3.5636e-15, 4.5911e-20, 7.4998e-17, 9.2680e-15, 1.2147e-15, 2.9211e-16,\n 2.2659e-15, 2.0851e-17, 3.4940e-17, 3.9699e-16, 2.5655e-16, 2.9822e-17,\n 8.5193e-17, 2.5990e-15, 3.6161e-18, 1.2823e-18, 3.6655e-15, 1.3542e-15,\n 1.4003e-18, 6.9404e-18, 1.6808e-18, 7.9506e-17, 3.3395e-16, 5.5723e-19,\n 1.2802e-17, 2.0621e-15, 4.6749e-17, 5.6602e-19, 6.9119e-19, 4.0009e-16,\n 7.8620e-16, 5.2658e-16, 4.8085e-16, 7.9576e-17, 2.0846e-19, 2.2336e-15,\n 3.5262e-16, 6.6161e-17, 8.0213e-17, 8.0261e-17, 3.3348e-17, 1.1205e-16,\n 2.1237e-19, 1.3484e-17, 3.2174e-17, 4.9203e-17, 3.6621e-18, 7.2260e-16,\n 1.5079e-16, 1.6505e-17, 2.5977e-17, 1.4893e-16, 5.4541e-15, 1.3337e-18,\n 3.8002e-17, 4.2656e-18, 1.5766e-15, 2.5149e-15, 1.5257e-19, 1.5748e-16,\n 4.0330e-17, 3.5482e-16, 2.3669e-18, 3.1997e-18, 1.0401e-15, 1.1674e-15,\n 5.9306e-19, 1.2484e-18, 1.7379e-19, 2.2409e-17, 1.9707e-16, 3.8593e-17,\n 1.0877e-17, 6.4220e-16, 1.7665e-16, 1.3785e-20, 8.7327e-16, 5.2567e-19,\n 1.0326e-15, 8.9268e-16, 1.8459e-15, 1.5925e-15, 5.5806e-17, 6.7123e-18,\n 8.6700e-18, 6.1103e-16, 1.5297e-15, 3.0423e-17, 1.8772e-15, 2.6737e-18,\n 2.1775e-16, 5.4708e-15, 2.7923e-19, 4.0522e-15], device='cuda:0')" + }, + "39": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5317e-18, 5.4138e-17, 1.0972e-17, ..., 4.2836e-20, 5.2296e-18,\n 1.6872e-16],\n [2.1395e-17, 3.0482e-18, 8.6978e-19, ..., 2.1201e-17, 2.8831e-20,\n 2.4180e-16],\n [3.4632e-19, 8.7395e-17, 2.2055e-18, ..., 1.4663e-18, 5.4572e-18,\n 1.3583e-16],\n ...,\n [4.1720e-16, 4.4983e-17, 7.2701e-16, ..., 2.9312e-17, 9.2256e-17,\n 7.4115e-15],\n [1.2768e-16, 1.2275e-16, 1.3964e-15, ..., 2.0240e-17, 2.7181e-17,\n 2.2678e-15],\n [3.9928e-16, 9.5726e-15, 1.2082e-13, ..., 2.7755e-15, 8.6156e-17,\n 6.7119e-14]], device='cuda:0')" + }, + "40": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([6.6966e-17, 1.6588e-16, 4.4671e-17, 1.6752e-17, 1.3781e-16, 3.8425e-17,\n 1.5381e-17, 5.0193e-17, 1.3287e-17, 3.6006e-17, 1.2185e-16, 3.0228e-17,\n 1.1192e-18, 1.4169e-17, 4.5144e-18, 2.2947e-18, 1.8502e-16, 6.2532e-17,\n 3.1264e-17, 1.6789e-16, 1.4646e-17, 2.7843e-16, 3.8346e-17, 2.1017e-17,\n 6.6676e-17, 1.1835e-16, 8.2837e-19, 3.2010e-18, 8.4754e-17, 1.4442e-16,\n 1.0432e-16, 1.1460e-16, 6.2988e-17, 1.4266e-16, 2.2534e-16, 6.8169e-16,\n 1.8904e-16, 2.8832e-17, 2.1625e-16, 1.4244e-16, 5.7821e-17, 8.9174e-17,\n 1.3664e-16, 7.2575e-17, 1.4118e-16, 5.8840e-17, 1.0767e-16, 4.5888e-17,\n 3.0375e-18, 3.9559e-16, 5.1806e-17, 8.1647e-17, 6.2290e-18, 3.7791e-16,\n 1.0602e-17, 6.5700e-17, 1.6078e-16, 3.3743e-18, 1.9780e-16, 4.0679e-17,\n 6.6051e-16, 3.9832e-16, 1.3779e-17, 4.6062e-17, 2.9912e-18, 1.0877e-17,\n 4.1366e-17, 2.1462e-17, 5.7891e-17, 2.5023e-16, 1.0182e-16, 5.2163e-17,\n 2.7328e-17, 7.6067e-17, 2.7270e-16, 4.0391e-16, 9.2727e-17, 2.2755e-17,\n 1.5150e-16, 1.5385e-17, 3.0178e-16, 3.2557e-17, 8.0388e-18, 5.5322e-18,\n 6.6048e-18, 2.4507e-17, 5.8360e-16, 7.1349e-17, 2.5478e-18, 1.4578e-16,\n 1.8543e-18, 8.7105e-17, 1.1637e-17, 7.7419e-16, 1.3351e-18, 5.7397e-17,\n 1.2498e-16, 9.2144e-19, 2.7399e-18, 7.4327e-17, 2.1730e-16, 6.1486e-19,\n 3.2488e-16, 1.9822e-17, 3.6854e-17, 3.4888e-17, 4.9045e-17, 2.9991e-16,\n 2.0047e-16, 1.1109e-16, 2.7507e-17, 7.7272e-17, 4.8660e-17, 5.8788e-17,\n 3.2716e-17, 7.4858e-17, 6.2680e-17, 1.1234e-16, 1.8198e-16, 7.5195e-16,\n 5.2975e-18, 7.0349e-17, 1.2824e-16, 3.7250e-18, 2.2362e-18, 2.7917e-18,\n 1.0024e-16, 4.3428e-16, 4.2378e-18, 7.3659e-18, 3.9038e-17, 3.4398e-16,\n 4.1664e-19, 2.2176e-16, 5.2582e-17, 8.9654e-17, 1.9192e-16, 6.2420e-18,\n 5.6900e-17, 1.1429e-18, 6.3837e-17, 1.0066e-17, 1.0879e-17, 1.6133e-17,\n 2.6180e-18, 1.2602e-19, 3.6308e-17, 2.2838e-17, 8.2168e-18, 1.1640e-17,\n 2.1602e-18, 9.6170e-18, 6.9722e-17, 1.3024e-16, 9.4283e-18, 9.2242e-18,\n 2.8024e-17, 1.0006e-17, 4.7368e-17, 7.3820e-18, 2.0084e-16, 1.4330e-18,\n 1.8996e-18, 2.7908e-16, 9.8721e-17, 8.0216e-16, 8.3103e-16, 1.2105e-16,\n 1.6555e-16, 9.3311e-16, 2.6750e-17, 3.2923e-17, 5.2761e-17, 3.2379e-16,\n 2.6206e-17, 2.5282e-16, 5.9345e-18, 1.2190e-16, 1.1111e-16, 1.3844e-15,\n 3.2141e-16, 5.2844e-16, 3.6995e-17, 8.0637e-17, 1.0014e-15, 2.0050e-17,\n 2.5917e-19, 9.3833e-19, 3.3751e-16, 8.4058e-17, 3.2237e-16, 1.5052e-16,\n 8.0847e-17, 1.3268e-17, 5.0942e-16, 5.0019e-16, 2.6114e-16, 7.8490e-17,\n 2.8188e-16, 1.8674e-18, 7.8174e-17, 1.8532e-16, 1.1415e-18, 2.4912e-18,\n 4.4936e-17, 3.7551e-16, 2.6215e-16, 4.0749e-17, 7.6331e-18, 3.5979e-19,\n 4.6100e-16, 2.2952e-16, 6.2034e-18, 4.0257e-18, 1.2860e-16, 4.1360e-17,\n 5.5914e-17, 2.3558e-16, 2.1063e-16, 5.1258e-16, 7.6766e-18, 8.7294e-17,\n 2.0032e-18, 5.9397e-16, 3.5808e-17, 2.0392e-17, 1.0384e-17, 7.2548e-18,\n 5.2915e-17, 8.4739e-18, 1.1050e-16, 1.2284e-17, 3.9298e-18, 6.4223e-17,\n 3.3936e-17, 5.4584e-18, 5.6131e-18, 3.1045e-17, 2.5787e-17, 1.1093e-17,\n 9.1999e-18, 1.2364e-17, 1.4595e-17, 2.0006e-17, 6.5579e-17, 1.5190e-17,\n 1.6044e-17, 9.4474e-18, 5.6556e-18, 2.6409e-17, 7.1613e-17, 9.4653e-17,\n 7.9142e-18, 6.2130e-17, 1.0751e-17, 8.1702e-17, 1.6083e-31, 1.3388e-32,\n 3.7503e-32, 8.2900e-33, 8.0968e-34, 2.2523e-32, 1.1336e-32, 3.4485e-32,\n 2.6079e-34, 2.4995e-32, 5.2338e-33, 1.0749e-32, 4.8178e-32, 1.8546e-33,\n 1.0926e-33, 2.8625e-33, 7.0027e-33, 2.3546e-33, 8.7283e-32, 1.7134e-32,\n 1.6439e-33, 1.5098e-34, 3.9211e-33, 2.6510e-32, 2.5388e-33, 5.9079e-34,\n 9.1985e-34, 6.5022e-33, 1.3916e-34, 4.7014e-33, 2.0991e-32, 1.9823e-32,\n 1.2198e-32, 6.8070e-34, 3.9919e-33, 4.6345e-33, 1.4152e-32, 3.1293e-33,\n 9.9585e-33, 2.2070e-32, 1.4005e-32, 8.6225e-33, 2.9880e-33, 2.4926e-32,\n 2.4644e-32, 4.1293e-33, 1.9203e-33, 3.8669e-33, 2.5207e-32, 7.5962e-33,\n 2.8675e-33, 6.7068e-34, 9.3428e-33, 1.0416e-32, 2.4558e-32, 2.5900e-32,\n 1.8471e-32, 1.0747e-31, 1.4019e-32, 1.0807e-32, 3.8348e-32, 1.9836e-32,\n 2.2561e-32, 2.1901e-32, 5.4401e-33, 6.9685e-33, 1.1271e-33, 3.7498e-33,\n 1.3707e-32, 3.7060e-33, 1.8769e-32, 1.2292e-33, 3.1528e-34, 1.1723e-32,\n 3.0592e-33, 1.0146e-32, 6.4828e-33, 5.8939e-33, 1.8038e-32, 7.5828e-33,\n 7.2342e-33, 5.7099e-32, 3.2952e-33, 4.6883e-33, 9.8562e-33, 6.2023e-34,\n 5.7902e-33, 1.6466e-34, 5.5781e-33, 4.6797e-34, 2.2861e-33, 1.4022e-32,\n 7.5280e-34, 6.1506e-33, 1.9526e-33, 5.2267e-33, 8.2570e-33, 4.0035e-33,\n 3.1994e-33, 6.8677e-33, 6.6010e-33, 1.4501e-33, 2.3473e-32, 2.5955e-32,\n 3.7139e-33, 9.4298e-33, 6.4531e-33, 2.7945e-32, 1.1135e-32, 3.7402e-33,\n 1.0933e-33, 5.9796e-33, 1.4568e-32, 1.4228e-33, 6.3385e-33, 4.3767e-34,\n 2.2032e-33, 6.1794e-33, 1.0177e-33, 3.2321e-33, 4.0331e-33, 5.0205e-33,\n 1.8243e-32, 7.3019e-33, 2.9211e-32, 4.1276e-32, 2.1191e-32, 5.8677e-32,\n 1.1416e-33, 1.1358e-33, 1.1397e-32, 4.1951e-33, 4.4069e-34, 1.1051e-34,\n 4.1413e-33, 1.0577e-33, 4.8861e-33, 3.0837e-33, 1.6079e-32, 2.0953e-33,\n 3.9034e-33, 4.2712e-32, 4.1939e-33, 7.3265e-34, 1.8655e-33, 5.7507e-34,\n 5.6648e-34, 6.9657e-33, 1.1938e-32, 1.3230e-32, 7.4248e-34, 1.8903e-32,\n 1.9559e-33, 1.8236e-33, 4.5624e-32, 9.0885e-33, 1.4720e-33, 6.1597e-34,\n 4.0239e-33, 1.7480e-33, 9.0262e-33, 2.8544e-34, 6.2573e-33, 1.4354e-33,\n 2.3581e-32, 2.9306e-33, 5.4753e-33, 2.0638e-32, 8.2916e-33, 4.0796e-33,\n 1.7169e-32, 6.5633e-33, 1.1090e-32, 1.0702e-33, 1.0336e-31, 1.4290e-33,\n 2.1881e-32, 3.4320e-33, 4.5739e-33, 1.1827e-32, 4.2376e-34, 2.7000e-33,\n 1.1441e-32, 7.7833e-33, 6.9742e-33, 6.2573e-33, 1.2045e-32, 2.2775e-33,\n 8.7310e-33, 9.4182e-33, 9.5657e-33, 8.9459e-33, 1.9631e-32, 3.3354e-32,\n 5.3400e-33, 1.4718e-33, 1.6724e-32, 4.6439e-33, 4.0113e-34, 1.0849e-33,\n 1.2692e-32, 2.8912e-32, 1.1660e-32, 5.1016e-33, 1.4064e-33, 2.4596e-34,\n 9.8020e-33, 1.6612e-32, 2.4862e-33, 9.6699e-34, 2.7463e-33, 3.7634e-33,\n 4.7316e-33, 2.7281e-33, 9.8180e-33, 1.3779e-32, 2.8228e-32, 2.8121e-33,\n 2.9537e-33, 7.9298e-33, 1.0029e-32, 6.3365e-33, 6.1498e-34, 1.8913e-32,\n 6.0549e-34, 2.7657e-33, 1.0074e-32, 9.6537e-34, 1.2174e-32, 1.8629e-32,\n 2.9036e-33, 1.8802e-32, 1.8509e-32, 1.0866e-32, 8.9254e-33, 1.1770e-32,\n 2.7740e-34, 8.1361e-33, 2.5167e-33, 6.8772e-33, 1.0289e-33, 3.4355e-33,\n 6.5864e-34, 4.2337e-32, 6.7981e-33, 1.0916e-32, 1.3107e-33, 3.9191e-33,\n 1.1438e-32, 4.3939e-32, 1.0480e-32, 1.8734e-32, 2.2582e-32, 1.7532e-32,\n 2.8138e-33, 3.7370e-33, 6.6286e-13, 2.4674e-14, 1.4530e-13, 9.5486e-15,\n 3.7574e-14, 1.2510e-14, 2.5602e-15, 5.2528e-14, 2.9003e-14, 1.1940e-14,\n 4.1460e-13, 8.0861e-15, 2.4152e-14, 3.2255e-13, 4.7450e-14, 6.4385e-14,\n 1.0707e-14, 3.9714e-16, 2.7190e-14, 3.8208e-14, 8.0428e-14, 5.8218e-14,\n 3.5079e-14, 8.6694e-15, 8.5247e-16, 6.9844e-15, 2.0000e-15, 5.0212e-14,\n 4.5444e-13, 1.3000e-14, 1.4527e-13, 3.4302e-14, 3.2568e-14, 8.2358e-14,\n 2.4153e-16, 1.9879e-13, 1.0179e-13, 3.2042e-14, 2.0812e-13, 4.9997e-15,\n 8.1662e-15, 1.5770e-14, 1.4811e-14, 4.0217e-14, 2.2891e-14, 6.8776e-16,\n 4.8923e-15, 5.2271e-14, 8.6383e-15, 1.0008e-13, 3.4314e-13, 7.2272e-14,\n 7.5964e-15, 1.6565e-13, 2.1202e-14, 1.1541e-13, 1.0886e-13, 1.7656e-14,\n 1.5656e-13, 1.3145e-14, 5.6586e-15, 1.0089e-16, 1.6368e-15, 2.7037e-15,\n 6.7283e-14, 4.0473e-14, 3.0891e-14, 1.2878e-14, 4.1062e-14, 6.0950e-15,\n 8.4267e-14, 1.8199e-13, 1.1101e-13, 2.7940e-15, 4.8161e-15, 1.4575e-13,\n 1.0492e-13, 2.8136e-13, 1.2647e-13, 9.3406e-15, 5.2454e-15, 1.0827e-14,\n 5.1697e-14, 1.9729e-14, 4.8920e-14, 9.0150e-15, 5.9413e-14, 6.8814e-15,\n 1.7527e-14, 1.9057e-13, 2.6496e-13, 2.1579e-14, 9.3692e-14, 6.4214e-14,\n 3.7937e-14, 9.4608e-14, 9.3037e-14, 6.2512e-15, 3.9151e-14, 3.1728e-14,\n 2.6386e-14, 2.5316e-13, 3.4589e-14, 2.1038e-13, 1.0887e-13, 2.0173e-14,\n 4.1976e-14, 4.9233e-15, 1.0403e-14, 1.0096e-15, 1.1278e-13, 1.1733e-14,\n 1.1014e-13, 1.5387e-14, 2.6704e-13, 6.7709e-14, 4.3636e-14, 2.3193e-13,\n 3.7297e-14, 2.2913e-14, 2.7127e-15, 3.6345e-14, 5.4532e-14, 1.3259e-14,\n 1.3403e-14, 5.5905e-14, 1.1113e-14, 5.8148e-15, 1.3471e-13, 5.4671e-15,\n 7.6242e-15, 8.6972e-15, 9.4632e-15, 3.7769e-14, 5.2557e-14, 2.3657e-14,\n 2.3373e-14, 1.5390e-15, 4.9349e-14, 3.4440e-14, 5.9683e-15, 3.0300e-14,\n 1.0621e-14, 6.6430e-15, 6.8680e-14, 1.5882e-14, 1.6931e-16, 4.5751e-14,\n 3.4375e-14, 1.0620e-14, 9.4867e-14, 5.3798e-14, 1.7807e-14, 7.3182e-15,\n 1.7954e-13, 6.3919e-15, 6.5098e-15, 7.3054e-14, 2.7214e-14, 6.5996e-14,\n 6.3313e-14, 2.2623e-14, 9.7414e-14, 5.8489e-14, 4.8636e-15, 9.3510e-15,\n 1.2444e-16, 1.0849e-14, 6.8767e-14, 9.9613e-17, 3.5455e-13, 1.8147e-16,\n 2.1420e-14, 8.5375e-15, 1.9411e-13, 7.4940e-14, 3.3329e-14, 7.0230e-14,\n 1.8022e-13, 2.7443e-14, 3.1021e-15, 1.2850e-14, 2.1357e-14, 1.1475e-14,\n 3.7665e-15, 8.2377e-15, 1.4743e-14, 3.3548e-15, 1.5681e-13, 9.4284e-16,\n 3.4141e-14, 7.0651e-15, 2.4999e-13, 1.2239e-13, 5.1164e-15, 1.7292e-14,\n 9.6007e-14, 4.7877e-15, 4.8039e-15, 1.1341e-13, 2.6893e-14, 5.5490e-14,\n 1.6385e-14, 7.5474e-14, 8.4246e-15, 1.9134e-14, 9.0050e-14, 4.2267e-15,\n 7.3612e-14, 3.4650e-14, 1.2216e-14, 5.5219e-14, 2.6607e-13, 5.4126e-15,\n 2.4734e-14, 4.6023e-14, 2.1064e-16, 3.8325e-14, 3.5099e-14, 8.3042e-14,\n 3.3392e-15, 5.7199e-14, 1.2090e-14, 1.9046e-14, 5.3889e-14, 2.1417e-16,\n 8.5955e-14, 1.4433e-14, 1.1962e-13, 4.0495e-14, 5.5785e-14, 1.0088e-14,\n 2.6292e-14, 9.4973e-15, 1.6467e-13, 3.2897e-14, 2.2399e-14, 1.5670e-13,\n 1.4136e-13, 1.3518e-13, 2.8630e-14, 1.3703e-13, 4.9768e-15, 5.8462e-16,\n 1.3095e-14, 6.1774e-15, 3.8025e-15, 8.5779e-15, 1.0775e-13, 3.5553e-15,\n 9.3325e-15, 4.8846e-14, 1.8036e-14, 6.6844e-15, 2.7958e-15, 1.2582e-13],\n device='cuda:0')" + }, + "41": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3362e-14, 9.7789e-15, 3.1522e-15, ..., 5.0424e-14, 1.7409e-15,\n 1.9647e-14],\n [6.3028e-15, 2.5797e-15, 1.4856e-15, ..., 1.1854e-14, 5.6312e-16,\n 1.0294e-14],\n [1.4633e-15, 8.6494e-17, 3.6241e-16, ..., 4.6503e-16, 1.1620e-16,\n 4.8688e-16],\n ...,\n [5.3478e-15, 3.8623e-16, 1.4412e-15, ..., 2.3776e-15, 3.3236e-16,\n 1.2122e-15],\n [2.7646e-15, 4.5941e-16, 6.8656e-16, ..., 2.3325e-15, 1.7383e-16,\n 5.6295e-16],\n [6.7096e-15, 2.0109e-15, 1.5540e-15, ..., 8.6975e-15, 5.8815e-16,\n 6.2578e-15]], device='cuda:0')" + }, + "42": { + "step": "tensor(15024.)", + "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([5.9381e-13, 1.5156e-13, 1.3399e-15, 5.8396e-13, 1.8035e-13, 1.5537e-14,\n 2.1147e-14, 3.2561e-14, 3.6870e-15, 8.0496e-14, 1.0349e-13, 1.1830e-13,\n 1.7621e-13, 3.5232e-14, 1.9453e-13, 8.7536e-14, 3.7180e-13, 5.6249e-14,\n 5.7222e-15, 4.5377e-13, 5.7645e-14, 3.9596e-14, 1.1889e-13, 4.7274e-13,\n 2.5836e-14, 6.4481e-13, 1.1542e-13, 1.2006e-13, 6.2787e-15, 3.7353e-14,\n 5.3756e-13, 1.3597e-14, 3.1503e-13, 1.0281e-14, 2.0820e-13, 4.2743e-15,\n 8.0245e-14, 1.7017e-13, 1.0459e-13, 9.8806e-14, 6.5893e-15, 1.3844e-13,\n 4.7636e-13, 1.2810e-14, 2.2394e-13, 1.6641e-14, 1.2776e-14, 4.7796e-15,\n 1.4501e-13, 4.5287e-14, 5.5226e-14, 3.0475e-14, 2.3391e-14, 6.2930e-14,\n 9.6386e-13, 1.0118e-14, 2.8044e-14, 1.4152e-14, 3.0458e-15, 2.4219e-13,\n 7.5356e-14, 2.7358e-15, 1.5814e-13, 6.5105e-13, 2.6669e-14, 4.0481e-14,\n 2.9152e-13, 4.5613e-15, 9.1081e-13, 9.3221e-15, 1.3076e-13, 9.1903e-14,\n 6.2205e-14, 3.2509e-15, 2.1921e-14, 2.8368e-14, 1.5376e-13, 5.2475e-14,\n 8.3884e-14, 1.6189e-13, 1.7557e-14, 1.6779e-14, 1.5072e-12, 5.0548e-14,\n 3.3598e-15, 6.2889e-15, 1.2867e-14, 6.7566e-15, 1.3181e-13, 1.0382e-14,\n 3.5063e-15, 2.6299e-13, 1.7793e-14, 5.0245e-13, 1.1924e-13, 1.5884e-14,\n 3.6842e-13, 1.3314e-13, 6.0866e-13, 4.5519e-15, 6.9226e-14, 6.1695e-13,\n 8.1015e-13, 1.3545e-12, 1.6823e-14, 5.8993e-14, 3.5309e-15, 2.1556e-13,\n 1.0543e-13, 5.3326e-15, 9.4565e-15, 1.0251e-13, 1.1565e-14, 1.7782e-14,\n 1.5628e-14, 8.4123e-14, 8.7636e-15, 6.8210e-14, 7.6818e-15, 6.1388e-14,\n 4.9630e-13, 2.8864e-15, 1.2223e-12, 4.5983e-14, 8.8846e-13, 8.1606e-15,\n 2.9097e-13, 3.5726e-13, 4.1322e-13, 3.4306e-14, 4.3862e-13, 3.3363e-13,\n 5.3444e-14, 4.0217e-15, 5.3448e-15, 4.7245e-13, 4.1278e-13, 2.8735e-13,\n 5.0912e-14, 7.9680e-15, 8.6560e-14, 5.7018e-13, 1.4800e-13, 2.2134e-13,\n 1.1574e-13, 2.5815e-15, 1.4533e-13, 1.0446e-13, 9.8505e-15, 4.2214e-15,\n 2.2917e-15, 3.8223e-13, 1.4731e-14, 4.6185e-14, 3.0061e-13, 6.3178e-13,\n 2.0223e-13, 4.6102e-13, 5.2917e-14, 3.9037e-13, 4.1579e-13, 3.5450e-13,\n 1.6224e-13, 5.6022e-13, 2.8445e-15, 7.0580e-13, 7.7496e-14, 8.9343e-14,\n 5.1035e-14, 7.1107e-15, 1.4517e-14, 3.3413e-15, 7.4503e-14, 2.1685e-14,\n 1.0512e-13, 3.3109e-13, 1.7753e-13, 5.8317e-15, 1.6792e-13, 3.7423e-15,\n 7.7538e-15, 2.2362e-13, 3.9778e-15, 9.2419e-14, 5.7402e-15, 1.4386e-13,\n 2.3855e-13, 6.6059e-14, 1.0710e-13, 2.3041e-13, 4.1730e-13, 2.5695e-13,\n 2.5218e-13, 4.6049e-15, 3.5097e-14, 5.2765e-15, 7.2809e-15, 5.9550e-14,\n 2.6943e-15, 7.8823e-14, 2.4075e-13, 2.7252e-13, 3.5292e-14, 1.0440e-13,\n 3.3259e-14, 5.4827e-13, 5.7592e-15, 3.9813e-13, 1.6174e-14, 3.7016e-14,\n 1.1361e-14, 9.6007e-14, 1.0427e-14, 5.5777e-14, 4.1733e-13, 6.6476e-14,\n 3.4094e-14, 4.7466e-15, 1.1135e-14, 3.2240e-14, 1.8914e-13, 8.3931e-15,\n 3.8529e-15, 8.6891e-14, 2.2028e-13, 1.1946e-12, 1.3189e-13, 2.7842e-14,\n 2.3149e-14, 6.6671e-13, 2.3307e-13, 4.4445e-14, 7.4037e-13, 2.1791e-14,\n 3.3090e-14, 8.0399e-14, 3.4222e-14, 1.9944e-13, 3.4857e-14, 6.0540e-16,\n 3.1833e-13, 2.0448e-14, 2.8988e-13, 3.6903e-14, 5.6965e-15, 1.0653e-14,\n 3.5634e-13, 1.3597e-14, 5.1568e-13, 6.0085e-13, 3.3606e-13, 6.1075e-14,\n 1.2708e-13, 1.0518e-14, 1.5520e-14, 1.0828e-13], device='cuda:0')" + }, + "43": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1789e-13, 7.4095e-13, 9.2588e-14, ..., 1.9178e-12, 1.8071e-12,\n 8.3559e-13],\n [7.8652e-15, 5.0606e-14, 5.9840e-15, ..., 1.3333e-13, 1.2995e-13,\n 5.5978e-14],\n [5.4892e-15, 3.1846e-14, 4.3003e-15, ..., 8.2943e-14, 7.3756e-14,\n 3.8631e-14],\n [8.6083e-15, 5.7671e-14, 6.8485e-15, ..., 1.4769e-13, 1.4319e-13,\n 6.1172e-14],\n [8.4917e-15, 4.8583e-14, 6.7482e-15, ..., 1.2606e-13, 1.1142e-13,\n 5.9735e-14]], device='cuda:0')" + }, + "44": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([4.2139e-11, 2.9756e-12, 1.7463e-12, 3.3413e-12, 2.6287e-12],\n device='cuda:0')" + }, + "45": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1808e-13, 7.4129e-13, 9.2731e-14, ..., 1.9191e-12, 1.8071e-12,\n 8.3686e-13],\n [7.8690e-15, 5.0613e-14, 5.9867e-15, ..., 1.3335e-13, 1.2995e-13,\n 5.6002e-14],\n [5.5078e-15, 3.1877e-14, 4.3140e-15, ..., 8.3064e-14, 7.3756e-14,\n 3.8752e-14],\n [8.6128e-15, 5.7679e-14, 6.8517e-15, ..., 1.4772e-13, 1.4319e-13,\n 6.1201e-14],\n [8.5235e-15, 4.8638e-14, 6.7718e-15, ..., 1.2627e-13, 1.1142e-13,\n 5.9945e-14]], device='cuda:0')" + }, + "46": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([4.2142e-11, 2.9757e-12, 1.7466e-12, 3.3414e-12, 2.6292e-12],\n device='cuda:0')" + }, + "47": { + "step": "tensor(15024.)", + "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.1789e-13, 7.4095e-13, 9.2588e-14, ..., 1.9178e-12, 1.8071e-12,\n 8.3559e-13],\n [7.8652e-15, 5.0606e-14, 5.9840e-15, ..., 1.3333e-13, 1.2995e-13,\n 5.5978e-14],\n [5.4892e-15, 3.1846e-14, 4.3003e-15, ..., 8.2943e-14, 7.3756e-14,\n 3.8631e-14],\n [8.6083e-15, 5.7671e-14, 6.8485e-15, ..., 1.4769e-13, 1.4319e-13,\n 6.1172e-14],\n [8.4917e-15, 4.8583e-14, 6.7482e-15, ..., 1.2606e-13, 1.1142e-13,\n 5.9735e-14]], device='cuda:0')" + }, + "48": { + "step": "tensor(15024.)", + "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", + "exp_avg_sq": "tensor([4.2139e-11, 2.9756e-12, 1.7463e-12, 3.3413e-12, 2.6287e-12],\n device='cuda:0')" + }, + "6": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 1.3580e-06, -3.5045e-06, 2.5812e-06, ..., -4.4712e-06,\n 3.5317e-06, -2.9484e-06],\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 7.9142e-17, -6.4181e-17, 1.0515e-17, ..., 3.8766e-17,\n 6.9982e-17, 1.3136e-18],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.5620e-08, 1.4753e-08, 2.8627e-09, ..., 1.5496e-09, 1.9261e-09,\n 2.1679e-09],\n [4.1372e-13, 1.5587e-13, 1.6427e-14, ..., 3.3962e-14, 1.1404e-13,\n 1.7047e-14],\n [3.1195e-12, 3.6182e-13, 1.9616e-13, ..., 6.0013e-14, 4.3665e-13,\n 3.3489e-14],\n ...,\n [2.8430e-11, 1.2244e-10, 2.1425e-11, ..., 1.5088e-11, 1.6014e-11,\n 1.2388e-11],\n [1.7339e-12, 3.4031e-13, 2.9272e-13, ..., 3.2417e-14, 9.0515e-13,\n 9.4395e-15],\n [2.4094e-12, 2.8855e-12, 1.0601e-13, ..., 1.2391e-13, 4.0752e-13,\n 3.0613e-14]], device='cuda:0')" + }, + "7": { + "step": "tensor(6260.)", + "exp_avg": "tensor([2.1816e-05, 5.7453e-44, 5.6052e-45, ..., 5.6052e-45, 9.8458e-16,\n 5.6052e-45], device='cuda:0')", + "exp_avg_sq": "tensor([3.0349e-06, 6.5905e-11, 3.6901e-10, ..., 2.7046e-08, 8.7395e-11,\n 6.7145e-10], device='cuda:0')" + }, + "8": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[-1.0743e-07, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 2.8922e-18, -5.6052e-45],\n [-4.3107e-07, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n -3.3245e-17, 5.6052e-45],\n [-3.7153e-07, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 4.2623e-18, -5.6052e-45],\n ...,\n [-1.6203e-06, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 4.0408e-18, -5.6052e-45],\n [ 2.4014e-07, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -3.9954e-18, -5.6052e-45],\n [ 3.5377e-07, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.8931e-18, -5.6052e-45]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2137e-10, 2.4383e-13, 4.8485e-13, ..., 6.4746e-12, 9.2502e-13,\n 7.2239e-13],\n [1.2870e-10, 2.9037e-13, 5.4296e-13, ..., 7.9502e-12, 1.2171e-12,\n 1.2238e-12],\n [1.1602e-10, 2.4762e-13, 5.0051e-13, ..., 1.4623e-11, 1.3617e-12,\n 1.5762e-12],\n ...,\n [2.3771e-10, 4.2176e-13, 1.0072e-12, ..., 1.0827e-11, 2.1526e-12,\n 2.2893e-12],\n [1.3587e-10, 5.9602e-13, 9.6503e-13, ..., 7.0445e-12, 2.1643e-12,\n 1.7156e-12],\n [1.9252e-10, 2.2450e-13, 5.0488e-13, ..., 9.2692e-12, 1.4715e-12,\n 2.6787e-12]], device='cuda:0')" + }, + "9": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 2.1911e-05, -1.6794e-05, 4.9263e-06, ..., 7.0992e-06,\n 1.1760e-06, -1.3618e-06],\n [-8.2102e-08, -4.1431e-08, -6.1162e-08, ..., -1.3579e-08,\n -3.2129e-08, 1.2339e-08],\n [-1.2575e-05, 1.0503e-05, -5.0192e-06, ..., 1.0514e-06,\n -7.2553e-06, 4.0623e-06],\n ...,\n [-3.7279e-06, 3.8383e-06, -1.3771e-06, ..., 3.9692e-06,\n 5.6483e-06, 3.8086e-06],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 2.1790e-05, -1.9865e-07, 2.6076e-06, ..., 7.9457e-06,\n 2.0450e-06, 1.9448e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0423e-08, 4.5684e-09, 8.0826e-10, ..., 7.7086e-10, 1.2169e-09,\n 5.2207e-10],\n [1.2813e-09, 8.9943e-10, 1.1543e-10, ..., 1.6185e-10, 1.4719e-10,\n 1.3916e-10],\n [4.7626e-09, 4.2152e-09, 6.9804e-10, ..., 4.7971e-10, 1.4073e-09,\n 4.3562e-10],\n ...,\n [1.8905e-09, 4.6136e-09, 4.9647e-10, ..., 6.6191e-10, 6.3644e-10,\n 4.6638e-10],\n [2.1721e-09, 1.4585e-09, 1.6568e-10, ..., 1.9495e-10, 2.3719e-10,\n 1.2959e-10],\n [6.7633e-09, 3.5405e-09, 8.1927e-10, ..., 2.0514e-09, 1.1147e-09,\n 8.5538e-10]], device='cuda:0')" + }, + "10": { + "step": "tensor(6260.)", + "exp_avg": "tensor([ 2.0188e-04, 1.9911e-06, -2.0124e-04, ..., 1.2104e-05,\n 5.6052e-45, 3.8372e-04], device='cuda:0')", + "exp_avg_sq": "tensor([1.4562e-06, 2.9553e-07, 9.7905e-07, ..., 8.9326e-07, 2.9173e-07,\n 2.2973e-06], device='cuda:0')" + }, + "11": { + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 8.8573e-07, 7.7643e-09, 2.0079e-07, ..., -3.7528e-07,\n -5.6052e-45, 2.7903e-07],\n [-1.6968e-06, -2.6760e-09, 1.2471e-06, ..., -8.9085e-07,\n -5.6052e-45, 8.8673e-07],\n [ 1.6751e-06, 2.0632e-09, 7.1978e-07, ..., -4.0868e-08,\n -5.6052e-45, -1.5715e-06],\n ...,\n [-6.3517e-07, 4.2686e-09, 1.8244e-06, ..., -3.1623e-07,\n 5.6052e-45, -3.6818e-07],\n [-6.4864e-07, 1.9920e-09, -1.2747e-06, ..., 6.7823e-08,\n -5.6052e-45, 1.1517e-06],\n [-2.4732e-06, 9.3278e-10, -8.9123e-07, ..., -8.3761e-07,\n -5.6052e-45, -7.1626e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.0093e-10, 1.5431e-11, 3.4014e-11, ..., 2.1362e-11, 1.5190e-11,\n 6.0224e-11],\n [9.8124e-11, 2.7816e-11, 5.5354e-11, ..., 5.9353e-11, 2.7408e-11,\n 5.5761e-11],\n [1.7947e-10, 3.1413e-11, 6.5317e-11, ..., 5.0744e-11, 2.8535e-11,\n 5.9453e-11],\n ...,\n [2.4780e-10, 2.8416e-11, 6.3586e-11, ..., 5.7939e-11, 3.0434e-11,\n 5.2552e-11],\n [1.4494e-10, 3.6085e-11, 6.3743e-11, ..., 4.0601e-11, 3.4507e-11,\n 7.3608e-11],\n [1.2538e-10, 3.3822e-11, 6.4830e-11, ..., 6.5413e-11, 2.9750e-11,\n 6.9547e-11]], device='cuda:0')" + }, + "12": { + "step": "tensor(3756.)", + "exp_avg": "tensor([[-2.7538e-06, 3.6483e-06, 3.9246e-06, ..., 3.3478e-06,\n -7.2741e-08, 8.4253e-08],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-1.7676e-05, 9.5842e-06, -9.4442e-07, ..., -4.2620e-06,\n -8.3650e-06, 1.1469e-06],\n ...,\n [-8.8349e-06, 9.7421e-06, -3.1404e-06, ..., -4.2425e-07,\n -3.7831e-06, -1.5755e-06],\n [ 3.1605e-05, -3.0500e-05, 6.2374e-06, ..., 3.6976e-06,\n 1.4000e-05, -5.2827e-06],\n [-7.5985e-06, 5.4368e-06, -1.2171e-06, ..., 1.1464e-05,\n 6.2879e-07, -9.5634e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.0568e-10, 9.0237e-10, 1.3172e-10, ..., 2.3996e-10, 1.7070e-10,\n 1.1732e-10],\n [2.5778e-11, 7.0474e-11, 7.3739e-16, ..., 3.9139e-12, 7.7569e-12,\n 4.2439e-12],\n [1.8447e-09, 7.8232e-10, 1.3948e-10, ..., 2.0912e-10, 2.6828e-10,\n 8.8962e-11],\n ...,\n [1.9542e-09, 7.7890e-10, 8.9360e-11, ..., 1.3177e-10, 4.4129e-10,\n 8.5544e-11],\n [1.1049e-09, 1.2446e-09, 2.1593e-10, ..., 1.3428e-10, 4.3925e-10,\n 8.0107e-11],\n [1.0302e-09, 1.0059e-09, 1.6025e-10, ..., 1.4430e-10, 3.1971e-10,\n 2.1210e-10]], device='cuda:0')" + }, + "13": { + "step": "tensor(3756.)", + "exp_avg": "tensor([ 1.4594e-05, 5.6052e-45, -1.5783e-04, ..., -1.1433e-04,\n 3.5018e-04, -3.1295e-04], device='cuda:0')", + "exp_avg_sq": "tensor([2.5368e-07, 3.4849e-08, 2.3720e-07, ..., 2.3299e-07, 2.4226e-07,\n 4.4061e-07], device='cuda:0')" + }, + "14": { + "step": "tensor(3756.)", + "exp_avg": "tensor([[-4.1473e-07, -5.6052e-45, 1.4058e-07, ..., -1.5282e-07,\n -4.1470e-07, 5.2540e-07],\n [-6.8656e-07, -5.6052e-45, -3.5424e-07, ..., -3.2235e-08,\n -2.1843e-07, 6.5575e-09],\n [-3.4350e-08, -5.6052e-45, 1.3350e-07, ..., -4.7222e-07,\n -1.0342e-08, 1.6750e-07],\n ...,\n [-4.2450e-07, -5.6052e-45, -1.8081e-07, ..., 2.2132e-07,\n -2.8695e-07, -1.7869e-07],\n [-7.7851e-08, 5.6052e-45, 3.5046e-07, ..., -2.0683e-07,\n -3.6419e-07, 7.5492e-07],\n [ 5.7120e-08, -5.6052e-45, 1.8796e-07, ..., -2.0358e-07,\n -5.2259e-07, -3.7004e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.1651e-12, 1.1987e-11, 3.8784e-12, ..., 2.4096e-12, 2.2528e-12,\n 2.6398e-12],\n [6.7580e-12, 1.2098e-12, 8.5860e-12, ..., 5.9261e-12, 3.1785e-12,\n 9.3709e-12],\n [9.7870e-12, 1.8606e-11, 1.0684e-11, ..., 5.1725e-12, 4.9137e-12,\n 2.4106e-11],\n ...,\n [1.0626e-11, 2.0825e-11, 1.0238e-11, ..., 1.6180e-12, 4.0025e-12,\n 5.3788e-12],\n [1.1614e-11, 2.0569e-11, 1.2569e-11, ..., 5.4767e-12, 4.4078e-12,\n 1.0310e-11],\n [1.2487e-11, 1.9828e-13, 9.9231e-12, ..., 5.6168e-12, 3.2918e-12,\n 1.0860e-11]], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.008535680352542143, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1, + 2 + ] + }, + { + "lr": 0.008535680352542143, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 3, + 4, + 5 + ] + }, + { + "lr": 0.008535680352542143, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 6, + 7, + 8 + ] + }, + { + "lr": 0.008535680352542143, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 9, + 10, + 11 + ] + }, + { + "lr": 0.008535680352542143, + "name": "scale_1280", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 12, + 13, + 14 + ] + }, + { + "lr": 0.004267913399575775, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 20, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 5, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 15, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.008535680352542143, + 0.008535680352542143, + 0.008535680352542143, + 0.008535680352542143, + 0.008535680352542143, + 0.004267913399575775 + ] + }, + "metrics": { + "val_acc": 74.622 + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_032356", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": "clip_vit_b16", + "num_classes": 1000, + "preset": "high_accuracy", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "num_epochs": 20, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.1, + "rose_max_weight": 0.5, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.001, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 5.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "performance", + "freeze_threshold": 70.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-07, + "gradient_scale_multiplier": 5.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/gated-david", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file