diff --git "a/weights/best_model_metadata.json" "b/weights/best_model_metadata.json" --- "a/weights/best_model_metadata.json" +++ "b/weights/best_model_metadata.json" @@ -1,226 +1,241 @@ { - "epoch": 3, + "epoch": 4, "optimizer_state_dict": { "state": { "0": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[-1.2734e-05, 6.8667e-06, 7.2091e-07, ..., -1.2979e-05,\n 1.7175e-05, -1.8715e-06],\n [-1.6867e-05, 7.3590e-07, -4.2806e-06, ..., -6.2592e-06,\n -2.3327e-06, 4.3109e-06],\n [ 2.1554e-05, 1.1715e-06, 4.2643e-05, ..., 2.1010e-05,\n 5.0385e-06, -1.0707e-05],\n ...,\n [ 3.5506e-05, 1.5977e-05, -1.2724e-05, ..., -9.0560e-07,\n 1.1678e-05, -5.0471e-05],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [ 4.7094e-06, 2.5706e-05, -2.5467e-05, ..., 4.2338e-05,\n 8.4632e-06, -2.5780e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.1459e-08, 1.0555e-08, 5.5806e-09, ..., 7.1333e-09, 6.8371e-09,\n 6.1065e-09],\n [6.4251e-09, 7.0470e-09, 3.8616e-09, ..., 3.7677e-09, 4.0096e-09,\n 3.1010e-09],\n [1.1300e-08, 1.1217e-08, 1.3248e-08, ..., 9.1039e-09, 7.6861e-09,\n 6.9019e-09],\n ...,\n [1.1919e-08, 1.2621e-08, 1.7811e-08, ..., 9.1560e-09, 7.9031e-09,\n 8.4781e-09],\n [1.0985e-12, 5.7303e-12, 1.7414e-12, ..., 1.0209e-14, 5.4949e-12,\n 3.2381e-14],\n [1.0395e-08, 9.6481e-09, 1.0166e-08, ..., 8.5132e-09, 5.4347e-09,\n 4.9482e-09]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 7.7419e-05, -7.2244e-06, 3.0531e-05, ..., 2.5845e-05,\n 1.8728e-05, 3.9241e-06],\n [ 3.3904e-06, -1.4326e-05, 4.1395e-06, ..., 4.7042e-06,\n 1.7799e-05, -3.5841e-06],\n [-2.3233e-06, 2.1983e-06, 1.2519e-05, ..., -6.5935e-07,\n -8.1431e-07, -1.6264e-06],\n ...,\n [-3.6582e-06, 4.0445e-05, -2.2387e-05, ..., 4.7334e-05,\n -1.9004e-06, 2.3796e-05],\n [-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., -5.6052e-45,\n 5.6052e-45, -5.6052e-45],\n [-3.9960e-05, -3.0237e-05, 2.3135e-05, ..., -2.7614e-05,\n 1.2621e-05, -4.3781e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2214e-08, 9.8888e-09, 5.6333e-09, ..., 6.1682e-09, 6.1113e-09,\n 6.5305e-09],\n [5.4207e-09, 5.4562e-09, 3.2772e-09, ..., 3.2730e-09, 3.7076e-09,\n 2.4386e-09],\n [1.1351e-08, 1.0384e-08, 1.3721e-08, ..., 8.7800e-09, 7.4439e-09,\n 6.3112e-09],\n ...,\n [1.1335e-08, 1.3139e-08, 1.9069e-08, ..., 8.3830e-09, 7.8853e-09,\n 8.1240e-09],\n [3.1389e-13, 1.6375e-12, 4.9763e-13, ..., 2.9173e-15, 1.5702e-12,\n 9.2530e-15],\n [1.0876e-08, 1.0017e-08, 1.0210e-08, ..., 8.8808e-09, 5.4898e-09,\n 4.7628e-09]], device='cuda:0')" }, "1": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 3.1157e-04, 3.8629e-05, -5.8131e-04, ..., 1.8193e-03,\n 5.6052e-45, 4.2265e-04], device='cuda:0')", - "exp_avg_sq": "tensor([1.4465e-05, 8.1649e-06, 1.9182e-05, ..., 2.0674e-05, 3.7193e-08,\n 1.5208e-05], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([ 1.5406e-03, -1.1476e-03, -2.6409e-04, ..., 2.9145e-04,\n 5.6052e-45, -1.4334e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.3140e-05, 7.1135e-06, 1.9650e-05, ..., 2.1019e-05, 1.0628e-08,\n 1.4784e-05], device='cuda:0')" }, "2": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[-3.6451e-06, -1.6667e-07, -2.4451e-06, ..., -3.0371e-05,\n 5.6052e-45, 4.7662e-06],\n [ 3.7248e-06, -6.5078e-07, -2.8587e-07, ..., -5.1080e-07,\n -5.6052e-45, 1.6882e-05],\n [ 2.5296e-06, 4.7254e-06, 7.6713e-07, ..., 3.0621e-06,\n -5.6052e-45, -9.0665e-06],\n ...,\n [ 8.3743e-08, 6.2282e-09, -3.3314e-07, ..., -8.4207e-09,\n 5.6052e-45, -1.4176e-07],\n [ 1.2875e-06, -9.8009e-07, -1.8227e-06, ..., -2.6610e-06,\n -5.6052e-45, -3.6278e-07],\n [-1.1027e-05, 4.9459e-06, 7.5636e-06, ..., 6.4726e-06,\n 5.6052e-45, 5.1197e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.9164e-09, 3.6642e-10, 2.6644e-09, ..., 1.3507e-08, 8.7579e-13,\n 1.9032e-09],\n [4.4228e-09, 1.4217e-09, 1.1549e-09, ..., 5.7510e-10, 4.8593e-11,\n 5.3753e-09],\n [1.9197e-09, 1.2928e-09, 1.1750e-09, ..., 1.1938e-09, 3.3487e-10,\n 2.7616e-09],\n ...,\n [4.6921e-11, 6.9855e-12, 1.7850e-10, ..., 4.0480e-11, 3.3627e-11,\n 2.2743e-11],\n [1.6785e-09, 2.7664e-11, 3.6481e-10, ..., 5.7069e-10, 6.7835e-14,\n 7.1781e-10],\n [6.5096e-09, 1.7869e-09, 1.4764e-08, ..., 2.9821e-09, 6.0099e-12,\n 3.9506e-09]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[-1.1485e-06, -5.7111e-07, 2.8125e-06, ..., -1.6099e-06,\n 5.6052e-45, -6.3541e-06],\n [-5.3498e-06, 5.3070e-06, -2.1827e-06, ..., 7.7663e-06,\n -5.6052e-45, -6.0954e-06],\n [-4.6320e-06, 1.8319e-06, -1.0043e-06, ..., 6.1784e-06,\n -5.6052e-45, 5.3614e-06],\n ...,\n [-2.2621e-08, 2.2287e-09, -1.9373e-07, ..., 4.6302e-07,\n 5.6052e-45, 1.4883e-07],\n [-1.2227e-06, -9.5979e-09, 1.4337e-07, ..., -1.5761e-08,\n -5.6052e-45, 1.0289e-05],\n [-2.3983e-05, 8.2098e-06, -1.4137e-05, ..., -2.6232e-05,\n 5.6052e-45, -6.9302e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.3307e-09, 3.9215e-10, 2.4352e-09, ..., 9.7499e-09, 2.5026e-13,\n 1.1423e-09],\n [3.0932e-09, 1.1406e-09, 8.4448e-10, ..., 6.5127e-10, 1.3886e-11,\n 4.0199e-09],\n [8.5765e-10, 8.2466e-10, 8.1234e-10, ..., 9.9855e-10, 9.5692e-11,\n 2.0900e-09],\n ...,\n [4.1790e-11, 2.6058e-11, 3.9500e-10, ..., 2.8262e-10, 9.6092e-12,\n 2.8260e-10],\n [1.6500e-09, 2.2122e-11, 4.5411e-10, ..., 3.4966e-10, 1.9384e-14,\n 1.0797e-09],\n [4.3279e-09, 9.7757e-10, 1.0829e-08, ..., 2.3307e-09, 1.7174e-12,\n 3.1455e-09]], device='cuda:0')" }, "3": { - "step": "tensor(5008.)", - "exp_avg": "tensor([ 1.0524e-04, 6.1300e-05, 1.7525e-04, -5.9965e-05, -5.1446e-04,\n 4.6025e-05, -3.5289e-05, 2.6264e-04, 1.8340e-04, -1.6293e-04,\n -5.5372e-04, -1.7627e-04, 5.2063e-05, -1.1186e-04, 1.0396e-05,\n 1.0095e-04, 8.8436e-06, 3.5614e-04, -2.8247e-04, 1.3035e-04,\n -1.4628e-04, -1.3555e-04, 8.4699e-05, 1.7117e-04, -1.0549e-04,\n 3.3265e-04, 1.6409e-04, -3.5745e-04, -2.8924e-04, -5.6805e-04,\n -1.7486e-05, -6.6800e-05, -1.9104e-04, -5.1038e-04, -2.3632e-04,\n 2.4621e-04, -1.0267e-05, 1.1626e-04, 2.6665e-05, -2.7442e-05,\n 4.1686e-06, -1.2692e-04, 4.9734e-05, 1.1376e-05, -4.5588e-05,\n -2.0517e-04, 1.0800e-21, -1.5950e-04, -2.0798e-04, 2.8199e-04,\n -6.7903e-06, 1.2974e-04, 1.0175e-04, -1.0722e-05, 2.0409e-04,\n -1.6284e-04, -1.7491e-04, 1.5504e-05, -1.7279e-05, -2.5228e-04,\n -1.2089e-04, -1.4503e-04, 1.1286e-04, 3.7329e-04, 9.4139e-05,\n 8.7216e-05, -3.7385e-06, -2.0794e-04, -1.8078e-04, 1.2992e-04,\n 2.8858e-05, 5.6052e-45, -1.1378e-04, -2.4071e-04, -1.5574e-04,\n -6.7744e-06, -3.8157e-05, 1.4097e-04, 2.5193e-04, 3.3739e-05,\n -9.3671e-05, 1.3167e-04, -4.0563e-04, -1.4730e-04, 1.9859e-04,\n -3.3244e-05, 1.9350e-04, 1.8134e-04, -1.5075e-05, -1.7259e-04,\n 3.1044e-04, 2.3485e-04, -3.0811e-04, -6.6752e-05, -6.4698e-05,\n -1.1759e-04, -2.8369e-04, 1.3854e-04, -2.4433e-04, 7.9351e-05,\n -3.1159e-04, -3.8603e-04, 1.1226e-04, 5.5523e-05, 7.5864e-05,\n -1.1631e-04, 3.4450e-04, -3.3212e-04, 1.3290e-04, -3.9131e-05,\n 3.7120e-05, 1.3501e-05, -5.8789e-05, -2.8282e-04, 2.6929e-04,\n 1.7545e-05, -1.0015e-04, 3.8813e-04, 5.0390e-05, -2.7069e-04,\n -1.0132e-05, -1.4554e-04, -1.0016e-04, 5.9922e-05, 5.2139e-04,\n -2.0021e-04, 8.1962e-05, 1.6005e-04, 5.3875e-05, -1.4836e-05,\n -1.5154e-05, -1.9287e-04, -1.3325e-04, 2.7008e-04, -8.3692e-05,\n 2.3526e-04, 3.0681e-05, 3.6428e-05, -8.5520e-05, 2.8669e-04,\n -5.1247e-04, 4.6252e-05, -6.3971e-05, -7.7816e-05, 2.1307e-04,\n -1.5039e-05, -1.4355e-04, 1.5798e-04, 2.2894e-04, -2.3109e-04,\n 2.7371e-04, 5.3722e-05, -1.8996e-05, 8.0504e-05, 1.0540e-04,\n 1.0118e-32, 4.3335e-05, -1.5032e-04, -6.1229e-05, 2.6403e-04,\n -1.2959e-05, -8.6999e-05, -1.2690e-04, 2.5147e-04, -1.1522e-04,\n 8.4401e-05, -7.5066e-05, 9.2998e-05, 7.6863e-05, 2.4434e-04,\n -2.3307e-04, -1.2651e-04, -1.0109e-08, 4.2658e-04, 9.4966e-05,\n 1.7514e-04, 1.9025e-04, -1.6024e-05, -9.9438e-05, 1.2486e-05,\n -1.9994e-04, 1.4981e-04, 1.2898e-04, -3.8849e-05, -9.9648e-05,\n 3.5331e-04, -2.1529e-04, 2.9356e-05, -1.1948e-05, -1.2406e-04,\n -2.5384e-04, -8.2081e-06, 2.8809e-04, -4.3127e-04, -2.9741e-04,\n 1.9566e-05, 1.7613e-05, -1.2802e-04, 5.6052e-45, -6.0463e-05,\n 2.4197e-04, 3.6962e-04, -6.7562e-05, -2.5305e-04, -1.2610e-04,\n 1.4379e-04, -1.3079e-04, -1.3645e-04, 6.1236e-05, -9.5312e-05,\n 2.1206e-04, -1.5187e-04, 5.6052e-45, 1.5648e-04, 4.8153e-04,\n 6.9655e-05, -2.8859e-04, -3.3648e-05, 1.0893e-04, 4.7440e-05,\n 3.7600e-05, 1.4911e-04, -2.7755e-04, 1.4567e-04, 2.3525e-04,\n -2.9182e-04, 1.4707e-04, 8.7702e-05, -1.0623e-05, 1.6941e-04,\n 3.7385e-05, -2.2579e-05, 2.1270e-04, 3.5696e-04, 1.4117e-04,\n -1.5809e-04, 6.2583e-05, 2.6934e-05, 4.3156e-05, 1.0619e-04,\n -5.6755e-05, 1.2007e-04, 8.8185e-05, -4.6226e-05, 5.2028e-05,\n -8.2449e-05, -2.5994e-04, -1.0182e-04, 1.3677e-04, 1.3611e-04,\n -4.6495e-05, -8.0587e-05, -7.2279e-05, -2.0518e-04, 2.3924e-04,\n 2.0234e-04, -8.5577e-05, 6.8788e-05, -2.2797e-04, 6.3856e-05,\n -1.7138e-05, 5.5240e-05, 1.0598e-04, -4.3519e-04, 2.6824e-05,\n 9.4748e-05, 3.3905e-04, 1.9563e-04, 2.2928e-04, -1.3490e-05,\n 1.2803e-04, 1.5333e-04, 3.1317e-05, -9.5770e-05, 1.0565e-04,\n 3.6688e-05, 2.8872e-05, -3.5629e-05, 6.1853e-04, 4.3311e-05,\n 1.1152e-04, -1.0609e-05, -2.5742e-04, 6.9119e-05, -1.5129e-05,\n -9.6895e-05, 2.3849e-05, 2.2376e-05, -2.3959e-04, 1.4706e-04,\n -3.5845e-04, 3.4059e-05, -4.2426e-04, 2.1481e-04, -9.3931e-11,\n 2.4337e-04, 1.3652e-04, -9.4028e-05, 1.0401e-04, 7.3040e-06,\n -8.2594e-04, -2.4534e-04, -3.1632e-05, -1.9554e-04, 2.6291e-04,\n -4.8142e-05, -1.4679e-04, 2.3119e-04, 1.3203e-04, -1.7782e-04,\n 2.1980e-04, 3.4091e-05, 2.7042e-04, -4.7652e-05, -1.8151e-04,\n 2.4120e-05, 1.0074e-04, -3.3731e-05, 1.0439e-05, 1.5479e-05,\n -1.9460e-04, 7.2228e-05, 3.6471e-05, -8.7919e-05, 1.3076e-05,\n -3.5044e-05, -2.8874e-05, -4.3670e-05, 4.4372e-05, -4.6814e-04,\n -2.2179e-04, -1.0013e-04, 4.0259e-04, 9.7545e-05, 4.8478e-05,\n -5.5253e-05, 2.3340e-04, -1.3454e-04, -1.0119e-05, -1.5862e-04,\n 3.3096e-04, -2.1455e-04, 7.9894e-05, 5.6052e-45, -1.7685e-04,\n -2.3232e-04, 4.2864e-04, -6.6742e-05, 1.8147e-04, 2.4994e-04,\n -9.6265e-05, -2.8534e-05, 1.4455e-04, 2.8408e-04, -2.1918e-04,\n 2.9929e-05, 1.3081e-04, -2.5807e-04, 4.8016e-04, 4.7468e-05,\n 1.0751e-04, 1.4297e-04, 8.1040e-05, -1.8498e-05, 9.3412e-05,\n -1.5029e-04, 7.1403e-21, -1.3656e-04, -4.1134e-04, -2.2631e-04,\n -2.5775e-04, -9.5014e-05, -3.7456e-04, -2.4558e-04, 4.1844e-05,\n -1.7963e-04, -3.5012e-05, -1.6348e-04, -1.5550e-14, 2.2185e-05,\n 1.5979e-15, -1.2668e-04, -1.9439e-04, -8.0028e-05, 3.7994e-04,\n -1.8114e-04, -1.6690e-05, 4.4489e-05, -1.4502e-06, -2.5554e-04,\n 3.5010e-04, 1.0673e-05, 4.3758e-05, 1.9235e-04, 4.2556e-05,\n -2.7503e-04, 3.8263e-05, 3.0572e-05, 1.3500e-04, 2.8905e-04,\n -3.2795e-04, -3.0505e-05, -9.4881e-06, -2.8356e-04, -2.1375e-04,\n -3.2394e-04, 7.4025e-05, -2.0911e-04, 4.3555e-05, -1.4122e-05,\n 2.9913e-04, 1.1042e-04, -4.8724e-04, 9.3339e-05, 2.4643e-04,\n -8.9602e-05, -1.4023e-04, -1.3892e-04, 1.9918e-04, -3.8559e-06,\n 1.0573e-19, -1.4259e-04, -2.9753e-04, 1.0701e-04, -4.9936e-04,\n 3.5452e-05, 1.9047e-05, 6.7956e-06, -3.4003e-06, 7.6820e-05,\n 1.7123e-04, 2.3651e-04, -9.0106e-06, 3.8639e-06, 2.2243e-04,\n 1.8564e-04, -1.7284e-04, 1.6076e-04, 4.9109e-28, -1.0387e-04,\n -3.1029e-04, -2.5354e-05, -6.0780e-05, -3.4500e-04, -3.9737e-05,\n 4.0157e-04, 5.6052e-45, -1.0184e-04, -3.8064e-04, 2.3010e-04,\n 9.2931e-05, -2.1418e-05, 7.7454e-05, 3.8316e-04, -1.0242e-04,\n 2.9471e-05, 4.1425e-05, 1.0681e-04, -2.6770e-05, -2.0651e-04,\n -2.6803e-04, 7.9215e-05, -1.8659e-04, -8.2190e-05, -1.1825e-04,\n -8.1230e-05, 8.9073e-05, -1.5398e-05, 1.4908e-04, 1.8627e-04,\n 1.6125e-04, -3.8152e-04, -6.2918e-05, 1.5101e-04, -1.1105e-04,\n -6.6683e-05, -1.0275e-05, 5.3733e-06, 1.0849e-04, 9.7083e-05,\n -3.9591e-04, -1.1922e-04, 3.4695e-05, 1.2192e-04, -2.2069e-04,\n 2.0329e-04, -1.3018e-04, 3.6613e-04, 7.3012e-05, 3.1173e-04,\n -5.2725e-05, -2.9675e-05, 2.0933e-04, -2.8284e-04, 5.1408e-05,\n 5.6052e-45, -1.5508e-04, 8.4226e-05, -2.6109e-04, 2.0390e-04,\n -5.0532e-04, 9.0625e-05, -1.6268e-04, -1.2284e-05, 3.3122e-04,\n -1.0398e-04, -5.5870e-06, 1.1848e-04, 2.2363e-05, 1.2507e-04,\n -3.6885e-05, 5.8294e-05], device='cuda:0')", - "exp_avg_sq": "tensor([6.3084e-07, 9.7418e-07, 9.6836e-07, 6.2409e-07, 7.5082e-07, 7.0385e-07,\n 2.8110e-07, 8.0162e-07, 7.4309e-07, 3.3444e-07, 1.2907e-06, 5.2186e-07,\n 4.0586e-07, 5.0316e-07, 7.8456e-08, 6.5972e-07, 1.2762e-06, 9.7714e-07,\n 3.5994e-07, 5.4178e-07, 5.1468e-08, 5.2859e-07, 1.3641e-06, 3.9305e-07,\n 5.6017e-07, 6.3334e-07, 8.4705e-07, 7.3613e-07, 2.3173e-07, 2.4493e-07,\n 9.3624e-07, 1.1017e-06, 1.0114e-06, 1.1981e-06, 2.7020e-07, 4.5341e-07,\n 4.8201e-07, 9.1590e-07, 1.3186e-06, 3.8393e-07, 9.1772e-07, 1.1525e-06,\n 9.6761e-07, 2.5029e-07, 1.0380e-06, 9.3405e-07, 1.6352e-06, 2.8515e-07,\n 9.0808e-07, 4.4831e-07, 4.9684e-07, 3.4413e-07, 7.3914e-07, 4.7266e-07,\n 4.8076e-07, 8.0259e-07, 6.7666e-07, 5.1389e-07, 6.6370e-07, 8.0635e-07,\n 5.8008e-07, 7.0550e-07, 5.7669e-07, 8.4134e-07, 5.1323e-07, 2.1020e-07,\n 6.8045e-07, 8.2684e-07, 4.1109e-07, 3.0982e-07, 6.1685e-07, 7.5433e-15,\n 5.4958e-07, 6.1453e-07, 2.4381e-07, 5.5110e-07, 7.5887e-07, 9.3425e-07,\n 7.7707e-07, 6.0121e-07, 3.5019e-07, 9.7868e-07, 3.8393e-07, 5.6662e-07,\n 4.5488e-07, 2.2549e-07, 5.9979e-07, 1.2385e-06, 3.2350e-07, 8.4144e-07,\n 5.0739e-07, 1.0218e-06, 8.0237e-07, 1.3179e-06, 2.0230e-07, 9.4253e-07,\n 8.9716e-07, 1.1291e-06, 1.3373e-06, 5.4527e-07, 7.3150e-07, 6.2734e-07,\n 5.9885e-07, 1.8035e-06, 3.9011e-07, 6.3503e-07, 7.4371e-07, 4.9578e-07,\n 5.1492e-07, 2.9206e-07, 6.1113e-08, 6.6234e-07, 2.5841e-07, 9.4981e-07,\n 8.0523e-07, 7.6730e-07, 4.5627e-07, 1.0228e-06, 3.8329e-07, 4.3576e-07,\n 6.7862e-07, 6.3576e-07, 8.8091e-07, 7.2389e-07, 7.3507e-07, 4.1235e-07,\n 6.2867e-07, 7.4529e-07, 4.2152e-07, 3.5236e-07, 1.5129e-07, 3.2720e-07,\n 4.7639e-07, 2.8960e-07, 4.3335e-07, 4.8569e-07, 1.1148e-06, 6.5838e-07,\n 7.2185e-07, 8.7610e-07, 1.0414e-06, 6.1254e-07, 7.7200e-08, 1.2337e-06,\n 8.0263e-07, 6.8457e-07, 1.1007e-06, 4.8149e-07, 4.2759e-07, 1.0865e-06,\n 7.6862e-07, 6.6685e-07, 4.4797e-07, 1.4369e-06, 4.6139e-07, 3.6498e-09,\n 5.2715e-07, 6.0256e-07, 3.7707e-07, 3.6493e-07, 2.2886e-07, 4.7550e-07,\n 4.5825e-07, 8.2313e-07, 6.5760e-07, 3.4538e-07, 7.9855e-07, 7.9590e-07,\n 7.8194e-07, 4.1715e-07, 6.3824e-07, 4.1773e-07, 3.6579e-08, 1.1198e-06,\n 5.7586e-07, 3.9466e-07, 7.5851e-07, 5.9580e-07, 3.4696e-07, 2.5181e-07,\n 3.8492e-07, 3.1086e-07, 5.7769e-07, 3.1549e-07, 3.0369e-07, 2.1315e-06,\n 4.4641e-07, 5.6986e-07, 9.5819e-07, 7.4527e-07, 1.2744e-06, 7.1123e-07,\n 9.0986e-07, 8.8265e-07, 1.2693e-06, 8.8994e-07, 8.4892e-07, 2.5914e-07,\n 5.8077e-16, 1.0915e-06, 7.7188e-07, 4.2096e-07, 4.7841e-07, 8.0791e-07,\n 6.4025e-07, 4.6883e-07, 7.8678e-07, 8.2271e-07, 1.7239e-07, 1.4024e-07,\n 8.3670e-07, 1.4024e-06, 1.1862e-06, 6.7654e-07, 6.3695e-07, 7.1597e-07,\n 6.1020e-07, 2.2899e-07, 7.6491e-07, 9.8355e-07, 4.2058e-07, 4.7808e-07,\n 5.0660e-07, 7.2260e-07, 8.9533e-07, 9.4435e-07, 9.4679e-07, 2.3112e-06,\n 7.5687e-07, 1.9256e-07, 4.5783e-07, 7.9936e-07, 8.1275e-07, 6.4406e-07,\n 6.6732e-07, 3.2855e-07, 7.5096e-07, 5.9034e-07, 7.6771e-07, 2.0720e-06,\n 4.0859e-07, 7.1854e-07, 3.4642e-07, 4.8501e-07, 2.5566e-07, 3.5889e-07,\n 4.5058e-07, 5.5048e-07, 5.4398e-07, 8.9890e-07, 5.6632e-07, 7.3559e-07,\n 6.5425e-07, 9.9204e-07, 7.1262e-07, 6.1903e-07, 4.7978e-07, 4.6423e-07,\n 7.0786e-07, 2.8913e-07, 6.3879e-07, 4.8681e-07, 6.9471e-07, 5.7324e-07,\n 9.4482e-08, 3.4427e-07, 1.1498e-06, 6.3791e-07, 1.0917e-06, 6.2625e-07,\n 6.1986e-07, 7.0721e-07, 3.5782e-07, 1.1051e-06, 2.0112e-07, 4.2100e-07,\n 6.4243e-07, 1.2121e-06, 5.2058e-07, 2.9780e-07, 5.6954e-07, 9.0936e-07,\n 6.5945e-07, 4.6469e-07, 8.2029e-07, 8.4116e-07, 1.0926e-07, 6.6219e-07,\n 4.1346e-07, 7.5214e-07, 1.1020e-06, 9.0945e-07, 1.3209e-06, 2.4160e-07,\n 2.2722e-07, 7.4551e-07, 6.0384e-07, 5.6067e-07, 7.3395e-07, 5.0480e-07,\n 2.3883e-07, 4.1634e-07, 7.8138e-07, 1.0716e-06, 8.5707e-07, 3.9069e-07,\n 5.0559e-07, 9.2141e-07, 4.1053e-07, 7.0824e-07, 5.4588e-07, 2.7803e-07,\n 4.7579e-07, 5.1554e-07, 1.0597e-06, 6.2613e-07, 2.4855e-07, 1.0544e-06,\n 5.6302e-07, 5.8304e-07, 7.7543e-07, 5.1799e-07, 1.3729e-06, 4.9558e-07,\n 2.1046e-07, 7.0736e-07, 7.9135e-07, 2.9644e-07, 8.9809e-07, 6.8166e-07,\n 9.1896e-07, 9.3077e-07, 8.3327e-07, 3.2830e-07, 7.0274e-07, 7.9926e-07,\n 7.4213e-07, 3.3247e-07, 6.8778e-08, 5.8664e-07, 8.5008e-07, 5.4242e-07,\n 9.4093e-07, 1.0022e-10, 6.5140e-07, 2.7480e-06, 3.2221e-07, 2.8606e-07,\n 3.8064e-07, 7.8784e-07, 4.6296e-07, 1.3936e-06, 5.3328e-07, 6.0196e-07,\n 1.1591e-06, 3.5130e-07, 2.8120e-07, 6.3578e-07, 7.1317e-07, 4.8608e-07,\n 7.2719e-07, 2.1351e-07, 7.6459e-07, 1.0936e-06, 6.1141e-07, 9.2203e-07,\n 5.1645e-11, 7.1058e-07, 3.0929e-07, 7.8074e-07, 5.6897e-07, 7.3874e-07,\n 7.6218e-07, 7.7155e-07, 2.0184e-07, 4.7312e-07, 4.1066e-07, 5.2921e-07,\n 1.2760e-07, 3.4972e-07, 1.9044e-08, 6.3089e-07, 9.6358e-07, 8.6184e-07,\n 5.8562e-07, 9.9045e-07, 6.5441e-07, 2.0673e-07, 6.0872e-07, 1.1312e-06,\n 6.6408e-07, 7.3556e-07, 1.1226e-06, 8.1345e-07, 6.1514e-07, 3.0961e-07,\n 3.4147e-07, 7.4523e-07, 2.0085e-07, 1.0095e-06, 6.7287e-07, 6.2311e-07,\n 9.9247e-07, 5.9581e-07, 1.2415e-06, 7.0651e-07, 6.5464e-07, 1.2665e-06,\n 5.6958e-07, 6.8164e-07, 1.0975e-06, 4.7964e-07, 1.2999e-06, 1.0496e-06,\n 6.4636e-07, 4.1000e-07, 1.3844e-06, 3.5200e-07, 4.5218e-07, 3.3368e-07,\n 8.1771e-07, 5.7402e-07, 4.3396e-07, 6.4688e-07, 1.2774e-06, 5.6696e-07,\n 2.7885e-07, 7.8217e-07, 2.9606e-07, 1.4341e-06, 1.0943e-06, 9.5153e-07,\n 3.7541e-07, 5.0040e-07, 4.1631e-07, 2.3212e-07, 6.2840e-07, 2.0885e-07,\n 6.0309e-07, 8.0131e-07, 8.1475e-07, 6.3875e-07, 2.4979e-07, 5.2369e-07,\n 1.1556e-06, 6.2302e-07, 7.3626e-13, 3.0015e-07, 1.8195e-07, 1.0237e-06,\n 7.5593e-07, 2.5413e-07, 4.8675e-07, 7.6719e-07, 1.0247e-06, 6.7791e-07,\n 2.4961e-07, 4.6294e-07, 3.6106e-08, 8.0120e-07, 7.5243e-07, 2.7476e-07,\n 3.3807e-07, 9.3026e-07, 1.0662e-06, 2.9385e-07, 6.0751e-07, 9.0723e-07,\n 2.6278e-07, 1.0593e-06, 1.4321e-06, 5.9963e-07, 9.4229e-07, 7.2757e-07,\n 4.8576e-07, 4.3370e-07, 5.6162e-07, 7.1757e-07, 2.7976e-07, 3.2330e-07,\n 1.0970e-06, 5.2661e-07, 4.1234e-07, 5.7917e-07, 7.1767e-07, 8.7538e-07,\n 1.7948e-07, 7.6791e-07, 7.2428e-07, 1.1780e-06, 1.4204e-06, 7.3552e-07,\n 4.4410e-07, 1.0161e-06, 4.9893e-07, 2.0189e-17, 5.3252e-07, 1.2544e-06,\n 4.4264e-07, 8.6851e-07, 5.5307e-07, 8.1829e-07, 4.4973e-07, 8.9137e-07,\n 4.4352e-07, 2.4866e-06, 1.4947e-07, 7.7565e-07, 3.7405e-07, 9.6525e-08,\n 3.0385e-07, 7.2594e-07], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([ 1.0477e-04, 1.1376e-04, 6.8580e-05, 1.1062e-04, -1.5102e-04,\n 1.8946e-05, 2.2321e-04, 4.4966e-04, -2.1635e-04, 3.6279e-04,\n -4.8620e-05, -1.1527e-06, 7.3181e-05, 9.5280e-05, 1.7739e-04,\n 1.6335e-04, -1.1370e-04, -1.1923e-05, 2.4393e-05, 1.0780e-04,\n -9.4480e-05, 2.5872e-04, 4.0348e-04, -1.9958e-04, -8.5265e-05,\n -1.9338e-04, 1.9970e-04, -8.4242e-05, 2.3576e-05, -3.3020e-05,\n -3.9298e-05, -7.7467e-06, -3.9194e-04, -9.2478e-05, 5.7959e-05,\n 5.9237e-05, 4.3912e-04, -1.0686e-04, 1.5071e-04, 8.6443e-06,\n 7.1097e-04, 1.8752e-04, 2.1190e-05, 4.4484e-05, 4.0833e-05,\n 9.2181e-05, 1.0533e-18, 7.5112e-05, 2.1147e-05, 1.4307e-04,\n 4.7949e-05, -1.8811e-05, 3.2247e-05, -2.0407e-06, -4.8476e-05,\n -2.7448e-04, -2.3936e-04, -9.3065e-05, 4.3918e-05, 8.5998e-05,\n 1.7864e-05, -1.4735e-04, -9.0984e-05, -2.1071e-06, 1.3226e-04,\n 5.5162e-05, -2.7692e-05, 1.8201e-04, 9.0112e-05, -4.0201e-04,\n 1.3602e-04, 5.6052e-45, 6.4871e-06, 2.7337e-04, -1.0230e-04,\n -1.5436e-04, -4.0674e-05, 5.9901e-05, -1.7824e-04, 1.3852e-04,\n 4.4658e-05, -1.3218e-04, -1.4014e-04, 1.7143e-05, -6.1319e-05,\n -1.4133e-04, -3.2296e-04, 3.2819e-04, -1.4037e-04, -3.6881e-04,\n 6.7872e-06, 2.7025e-04, -5.5208e-04, -6.8871e-05, 3.1423e-05,\n 1.5790e-04, 1.0097e-05, 1.3257e-04, -2.4127e-04, 3.0916e-05,\n 1.5010e-05, -2.2331e-04, -7.9740e-05, -2.9505e-06, -1.0984e-05,\n -1.8364e-06, -4.5352e-05, -1.1953e-04, 1.7912e-05, -2.8878e-04,\n 3.9155e-05, 1.4545e-04, -4.1241e-06, -7.3574e-05, 4.0110e-04,\n 5.3793e-05, 1.5396e-04, -2.4731e-04, -3.7000e-05, 1.9019e-04,\n -1.4076e-04, 1.2596e-04, -3.3846e-04, 2.6942e-04, -2.5929e-04,\n -2.7995e-04, 7.5528e-06, 2.9249e-05, -3.6848e-04, -1.2839e-04,\n -7.5530e-05, 1.4641e-04, 1.3239e-05, -7.3279e-05, -1.5624e-04,\n 2.0339e-04, 1.8814e-04, 8.7494e-05, -1.8627e-04, 3.8127e-04,\n 9.6811e-05, -2.5912e-05, 8.9033e-05, 2.4748e-05, 1.6964e-04,\n -1.7393e-04, -2.3904e-05, -1.5047e-04, 1.1144e-04, 2.8452e-05,\n 1.1765e-06, 4.0289e-05, 1.1107e-04, -6.5209e-05, 2.4792e-05,\n 5.6931e-13, 7.5780e-05, 1.4192e-04, 7.0784e-05, -4.5672e-04,\n 1.4378e-04, 7.7271e-06, -2.6640e-04, 8.8398e-05, -2.3331e-04,\n 1.5183e-05, 3.1811e-04, -1.2715e-05, 1.1393e-05, -2.7617e-04,\n -2.7581e-05, 8.1515e-05, 1.2959e-07, -4.6871e-04, 4.8529e-04,\n -3.3767e-04, -2.5262e-04, 1.7203e-04, -1.8694e-04, 1.1845e-04,\n 8.9294e-05, 2.0866e-04, 1.2850e-04, 8.5298e-06, 1.4894e-04,\n -9.5003e-05, 1.9936e-04, -1.3296e-04, -4.4874e-05, 2.0801e-04,\n 1.2607e-04, -1.7662e-06, 9.1341e-05, -1.8251e-04, -2.0454e-04,\n 4.9065e-05, -2.9016e-05, 2.0897e-04, 5.6052e-45, 9.3667e-05,\n -2.2721e-06, 2.3989e-04, 2.0541e-04, 4.6781e-06, 1.2744e-04,\n -4.4005e-05, -4.8953e-06, -2.6030e-04, 1.9106e-04, 9.8366e-05,\n 4.9292e-05, -1.3986e-04, 1.2331e-42, -3.2295e-05, -4.8253e-05,\n -1.3859e-04, 5.3263e-04, 6.0121e-05, -2.2526e-04, -8.2499e-05,\n 4.7832e-05, 2.7444e-05, -1.1168e-04, 1.5333e-04, -8.4024e-06,\n 3.1697e-04, -2.3630e-04, -2.1602e-04, -1.5837e-04, -5.0515e-04,\n -1.1856e-04, 4.3696e-05, 5.7240e-05, -1.5466e-04, -2.6130e-04,\n -2.7558e-05, -5.1042e-05, -5.5390e-05, -1.4133e-04, 1.0975e-05,\n -4.0222e-04, 1.2004e-04, -8.3381e-05, -1.3901e-04, -4.6265e-05,\n 1.5251e-04, 2.1482e-05, -3.0831e-04, 4.1425e-05, -9.0239e-05,\n 1.5997e-04, -1.0658e-04, 1.9397e-04, -8.7239e-05, -8.7222e-05,\n -1.9878e-04, 1.4545e-05, -4.9886e-05, 1.6795e-04, -1.6524e-05,\n -1.7034e-04, -4.8796e-04, 8.6639e-05, 1.0742e-04, 1.3301e-04,\n -7.4432e-05, -3.7230e-04, 8.4317e-05, -2.9158e-04, 1.5610e-04,\n -1.6772e-04, 8.4969e-05, -1.0371e-04, -3.1074e-06, 2.4064e-04,\n -1.6916e-04, 2.6740e-05, 1.1810e-05, 1.1866e-05, -1.3244e-05,\n -1.0962e-04, 6.9882e-05, -3.3924e-04, 1.7443e-04, -5.3820e-05,\n 6.3928e-05, 6.3257e-05, 9.3041e-05, 1.6266e-04, -3.9549e-04,\n -1.3156e-05, -2.1692e-04, -1.1161e-04, -6.8772e-05, 2.7330e-10,\n 1.4159e-04, 6.2701e-05, 3.7193e-04, 2.4729e-06, 1.1071e-04,\n 1.1206e-04, -6.4637e-05, 2.0143e-04, 3.8784e-06, -7.4142e-05,\n 6.1758e-05, -4.0520e-04, -8.6071e-05, 1.0703e-04, 2.0677e-05,\n -2.2336e-05, 2.3289e-05, -8.7161e-05, -1.1777e-04, -2.6742e-04,\n 4.0852e-05, -3.2601e-04, -1.7674e-04, -7.0380e-05, 2.6905e-04,\n -2.4768e-04, 5.2272e-05, 1.4922e-04, 1.9704e-04, -1.0745e-04,\n 9.4971e-05, 3.7686e-05, 1.2015e-04, -4.0044e-04, 2.5644e-04,\n 4.8441e-05, -1.9149e-04, -3.9381e-05, 5.9652e-05, 1.1766e-04,\n 3.9545e-05, 1.2883e-04, 1.3951e-04, 8.3888e-05, 1.5828e-05,\n 1.7781e-04, -1.2960e-04, 7.2166e-05, 1.9132e-06, -1.0379e-04,\n -2.5532e-04, 1.9610e-05, 5.7568e-06, -9.5458e-05, 2.9811e-04,\n -4.2334e-05, -1.7680e-04, 4.2791e-05, 2.5948e-04, 7.9276e-05,\n 2.5633e-05, 3.0468e-05, 2.0320e-04, 8.5785e-05, -1.1651e-04,\n -5.4273e-05, -7.3785e-04, 1.9157e-04, -3.3559e-04, -2.2174e-05,\n 6.2231e-05, 5.6052e-45, -1.7726e-05, -1.7570e-04, 2.0834e-04,\n -7.0150e-05, 1.2011e-04, 2.3193e-05, 2.5905e-04, 4.8156e-05,\n 9.4227e-05, -1.5386e-04, -3.2985e-04, -8.4540e-09, 1.3336e-04,\n 3.2558e-11, 7.8638e-05, 1.3845e-04, 2.0236e-04, 2.7482e-05,\n 1.9566e-04, 7.6982e-05, 7.3572e-05, 1.9124e-05, -4.4878e-04,\n -7.3165e-05, 4.0464e-05, 8.3604e-05, 2.8651e-05, -1.3938e-05,\n -7.2535e-05, -2.8282e-05, -3.0575e-04, 9.7689e-05, 1.9464e-04,\n 3.7703e-05, -6.0119e-05, -2.4153e-04, 8.2015e-05, 1.4069e-04,\n 8.2936e-05, 5.2972e-05, -3.1064e-05, 1.0269e-04, 2.1319e-05,\n 2.9697e-05, 5.1293e-05, -1.4735e-04, -1.1027e-04, -1.0017e-05,\n 3.7506e-05, 1.9099e-04, -2.7635e-04, -1.7746e-04, -1.0333e-05,\n -1.1964e-06, -7.3104e-05, -2.7289e-04, 1.2494e-05, -1.1267e-04,\n 9.3180e-05, 8.4389e-05, 1.2577e-04, -3.1900e-05, 6.9829e-05,\n 4.4992e-04, -2.2451e-05, -6.2085e-05, 1.0990e-04, 1.0630e-04,\n 2.3869e-04, -1.3656e-04, -8.4981e-05, -1.8713e-04, 4.7313e-05,\n 3.7676e-04, 1.2203e-04, 1.5593e-04, 7.6599e-05, -4.5713e-04,\n 3.9790e-04, 5.6052e-45, -1.1085e-04, 6.5030e-05, 2.4468e-04,\n -4.8410e-05, -1.5028e-04, 6.5386e-05, 8.2005e-05, 1.5086e-05,\n 2.5292e-04, 7.3439e-05, -6.3366e-05, 2.4668e-04, -1.3422e-04,\n -1.8001e-04, -6.6304e-05, -6.5024e-06, 6.7363e-05, -9.5922e-05,\n -3.2134e-05, 1.9623e-04, -3.5708e-05, -1.3757e-04, -1.3367e-04,\n -1.0616e-05, 4.4708e-05, 5.9554e-06, 4.1430e-05, 5.7943e-05,\n 3.9001e-05, -1.1295e-04, 5.3587e-05, 1.4498e-04, 1.8632e-04,\n -3.6238e-05, 1.3988e-04, 1.4748e-04, -2.5542e-04, -1.4115e-04,\n -7.5265e-05, 1.2344e-04, -1.2543e-04, 1.4823e-04, 3.9968e-04,\n 3.4148e-04, -8.2524e-05, 2.1892e-04, 2.9281e-04, 1.4586e-04,\n 5.6052e-45, -1.4999e-04, -7.8472e-06, -3.9977e-05, 1.2903e-04,\n 8.7892e-06, -5.1465e-05, 1.0537e-04, 5.8208e-05, 1.5826e-04,\n -1.4232e-04, -1.7625e-05, 8.3911e-05, -2.5242e-05, 3.7977e-05,\n -1.4690e-04, -4.8904e-05], device='cuda:0')", + "exp_avg_sq": "tensor([3.4901e-07, 4.9780e-07, 4.5792e-07, 4.8573e-07, 3.5745e-07, 4.6700e-07,\n 2.1051e-07, 5.0464e-07, 3.7209e-07, 2.4541e-07, 7.1930e-07, 3.6780e-07,\n 1.9960e-07, 3.7045e-07, 9.0267e-08, 4.4603e-07, 6.2253e-07, 5.7818e-07,\n 2.6456e-07, 3.0200e-07, 1.0456e-07, 3.3981e-07, 7.9043e-07, 2.8181e-07,\n 4.2366e-07, 4.3592e-07, 5.2335e-07, 5.1445e-07, 1.7712e-07, 1.8183e-07,\n 5.1255e-07, 6.2132e-07, 6.1939e-07, 7.1401e-07, 1.9657e-07, 3.3129e-07,\n 3.2808e-07, 6.1111e-07, 7.0171e-07, 3.2175e-07, 6.3079e-07, 7.1098e-07,\n 5.5448e-07, 2.0753e-07, 5.6254e-07, 5.8143e-07, 4.6728e-07, 2.4432e-07,\n 6.2282e-07, 3.0151e-07, 3.3118e-07, 2.1630e-07, 4.8689e-07, 3.5626e-07,\n 3.2663e-07, 5.7372e-07, 4.4594e-07, 3.6973e-07, 4.8599e-07, 5.2061e-07,\n 3.8244e-07, 5.1999e-07, 4.0964e-07, 5.2100e-07, 3.4740e-07, 1.8908e-07,\n 3.9346e-07, 4.1781e-07, 3.3507e-07, 2.3654e-07, 4.3065e-07, 2.2398e-15,\n 3.7701e-07, 4.1233e-07, 2.1751e-07, 3.8878e-07, 5.4075e-07, 5.4777e-07,\n 5.2886e-07, 4.0562e-07, 2.7422e-07, 4.5003e-07, 2.8292e-07, 4.2921e-07,\n 3.4534e-07, 2.1345e-07, 4.0643e-07, 7.4621e-07, 2.4236e-07, 5.3800e-07,\n 3.2723e-07, 5.9020e-07, 4.1647e-07, 6.1475e-07, 1.6163e-07, 6.0077e-07,\n 6.0399e-07, 6.7833e-07, 8.2945e-07, 3.5063e-07, 5.0820e-07, 4.1673e-07,\n 4.1482e-07, 7.0023e-07, 2.7133e-07, 3.6670e-07, 4.9629e-07, 3.1619e-07,\n 2.9640e-07, 2.3822e-07, 7.6810e-08, 4.8421e-07, 2.2558e-07, 5.3604e-07,\n 4.7887e-07, 5.1480e-07, 3.2878e-07, 6.5371e-07, 2.9507e-07, 2.9290e-07,\n 4.7715e-07, 4.1540e-07, 6.0668e-07, 4.6661e-07, 4.9506e-07, 2.3042e-07,\n 5.2414e-07, 3.8682e-07, 3.0927e-07, 2.1464e-07, 1.5432e-07, 2.3888e-07,\n 3.0761e-07, 1.9994e-07, 2.6545e-07, 3.9507e-07, 6.6907e-07, 4.2116e-07,\n 4.2793e-07, 5.7846e-07, 6.4881e-07, 4.4181e-07, 1.0540e-07, 7.9226e-07,\n 4.4806e-07, 4.6135e-07, 6.6322e-07, 3.3756e-07, 3.0772e-07, 6.5047e-07,\n 5.1120e-07, 4.5501e-07, 2.2361e-07, 7.9076e-07, 3.2080e-07, 1.0430e-09,\n 3.8042e-07, 4.0912e-07, 2.3389e-07, 2.9209e-07, 2.1960e-07, 3.2842e-07,\n 3.3549e-07, 4.0562e-07, 4.4690e-07, 2.8572e-07, 5.8275e-07, 5.0191e-07,\n 5.4766e-07, 2.8384e-07, 4.5149e-07, 2.3664e-07, 1.0455e-08, 6.0392e-07,\n 4.0145e-07, 2.8710e-07, 5.1689e-07, 4.0183e-07, 2.7466e-07, 1.9466e-07,\n 2.4865e-07, 2.5448e-07, 3.2309e-07, 1.9651e-07, 3.2357e-07, 8.9897e-07,\n 3.8397e-07, 4.2723e-07, 5.3729e-07, 4.8969e-07, 6.5440e-07, 5.0850e-07,\n 5.9515e-07, 5.0035e-07, 7.4258e-07, 5.0683e-07, 5.4641e-07, 2.0189e-07,\n 1.6596e-16, 6.6499e-07, 5.5023e-07, 3.1932e-07, 3.6105e-07, 6.1298e-07,\n 3.3548e-07, 3.3084e-07, 3.1938e-07, 6.1478e-07, 1.6463e-07, 1.2872e-07,\n 4.9612e-07, 5.8799e-07, 3.3898e-07, 4.0943e-07, 3.1607e-07, 4.3309e-07,\n 4.1719e-07, 1.7738e-07, 4.9472e-07, 5.5189e-07, 3.2298e-07, 2.5237e-07,\n 3.2492e-07, 4.5085e-07, 5.4742e-07, 5.8879e-07, 6.1076e-07, 8.3486e-07,\n 4.9658e-07, 2.4045e-07, 3.0293e-07, 4.7960e-07, 4.8984e-07, 4.0100e-07,\n 4.7781e-07, 2.6311e-07, 5.0894e-07, 3.5116e-07, 4.7168e-07, 8.5078e-07,\n 2.8361e-07, 5.2249e-07, 2.8813e-07, 3.2718e-07, 1.8626e-07, 2.5006e-07,\n 3.4277e-07, 3.4120e-07, 3.2144e-07, 5.0437e-07, 3.3093e-07, 5.0051e-07,\n 4.5540e-07, 6.1511e-07, 5.1284e-07, 4.3680e-07, 3.3738e-07, 3.0149e-07,\n 4.7970e-07, 2.5696e-07, 4.6504e-07, 3.9066e-07, 4.6030e-07, 3.8832e-07,\n 9.1776e-08, 2.8071e-07, 6.1818e-07, 4.4937e-07, 7.0595e-07, 4.4950e-07,\n 4.6531e-07, 4.7784e-07, 2.8059e-07, 7.4658e-07, 1.8031e-07, 2.5060e-07,\n 3.1726e-07, 6.5142e-07, 3.3772e-07, 1.8310e-07, 4.6568e-07, 4.4181e-07,\n 4.2281e-07, 3.6107e-07, 5.1441e-07, 5.3050e-07, 1.2671e-07, 4.3878e-07,\n 2.9601e-07, 5.5376e-07, 6.0354e-07, 5.0992e-07, 7.5289e-07, 2.2094e-07,\n 6.4931e-08, 5.4879e-07, 3.6594e-07, 3.7682e-07, 5.2444e-07, 3.6397e-07,\n 1.9318e-07, 2.9806e-07, 5.1638e-07, 6.3940e-07, 4.3617e-07, 3.0835e-07,\n 3.4528e-07, 5.7015e-07, 2.6914e-07, 4.9114e-07, 3.9302e-07, 2.3057e-07,\n 2.5393e-07, 3.7498e-07, 6.3723e-07, 4.4607e-07, 2.0030e-07, 5.8750e-07,\n 2.1584e-07, 4.1554e-07, 4.2400e-07, 3.4224e-07, 8.1866e-07, 2.9809e-07,\n 2.0921e-07, 4.7519e-07, 4.5442e-07, 2.4226e-07, 6.2201e-07, 4.0839e-07,\n 5.8200e-07, 6.4224e-07, 4.7172e-07, 2.5576e-07, 5.0983e-07, 5.0638e-07,\n 4.9579e-07, 2.4395e-07, 1.0644e-07, 4.3306e-07, 4.6720e-07, 3.9897e-07,\n 4.4289e-07, 8.2057e-09, 3.1663e-07, 9.4660e-07, 2.1463e-07, 1.7159e-07,\n 2.9124e-07, 5.2223e-07, 3.1503e-07, 5.6536e-07, 3.6594e-07, 4.0489e-07,\n 6.6517e-07, 2.4098e-07, 2.2393e-07, 4.5338e-07, 4.4010e-07, 3.3143e-07,\n 4.5810e-07, 2.1017e-07, 4.6123e-07, 6.2484e-07, 4.4140e-07, 4.5624e-07,\n 1.4758e-11, 5.0992e-07, 2.3912e-07, 5.1579e-07, 3.7319e-07, 5.0737e-07,\n 5.3290e-07, 3.6051e-07, 2.2441e-07, 3.5299e-07, 2.4764e-07, 3.4130e-07,\n 3.6462e-08, 2.5128e-07, 5.4420e-09, 4.0580e-07, 6.0589e-07, 5.6025e-07,\n 4.2791e-07, 5.6832e-07, 4.4755e-07, 1.6124e-07, 3.6226e-07, 6.3319e-07,\n 3.8873e-07, 4.2324e-07, 5.7647e-07, 5.2951e-07, 2.5311e-07, 2.2406e-07,\n 2.7735e-07, 4.2267e-07, 2.0156e-07, 6.0279e-07, 5.0002e-07, 2.6103e-07,\n 6.4814e-07, 3.5559e-07, 7.1142e-07, 4.5199e-07, 4.3067e-07, 7.7637e-07,\n 3.8712e-07, 4.8204e-07, 6.2978e-07, 3.1425e-07, 5.8953e-07, 6.5175e-07,\n 4.2345e-07, 2.8833e-07, 6.9242e-07, 3.0576e-07, 2.9983e-07, 2.4886e-07,\n 2.5190e-07, 3.7736e-07, 2.8962e-07, 2.9376e-07, 7.8339e-07, 3.6087e-07,\n 2.0723e-07, 4.6125e-07, 2.2941e-07, 7.7926e-07, 6.1360e-07, 5.5141e-07,\n 2.8900e-07, 3.6297e-07, 2.6125e-07, 2.0251e-07, 4.6401e-07, 1.6262e-07,\n 1.8248e-07, 3.6406e-07, 5.5654e-07, 4.4869e-07, 2.0911e-07, 2.9894e-07,\n 7.4050e-07, 3.9037e-07, 2.1039e-13, 3.3572e-07, 1.9306e-07, 6.3397e-07,\n 5.2783e-07, 1.8366e-07, 3.5176e-07, 4.9930e-07, 6.5063e-07, 4.7547e-07,\n 1.8812e-07, 3.3188e-07, 7.3594e-08, 4.3111e-07, 4.8595e-07, 2.3549e-07,\n 2.9108e-07, 5.8303e-07, 6.6009e-07, 1.8412e-07, 3.2794e-07, 5.4452e-07,\n 2.2323e-07, 5.0946e-07, 8.1826e-07, 2.7505e-07, 5.3600e-07, 4.5628e-07,\n 3.8038e-07, 2.9003e-07, 3.7763e-07, 4.8779e-07, 2.2477e-07, 3.1693e-07,\n 6.9090e-07, 3.7828e-07, 3.1496e-07, 3.8370e-07, 4.8291e-07, 5.3087e-07,\n 1.3978e-07, 5.1581e-07, 4.9655e-07, 6.9022e-07, 7.8373e-07, 4.8621e-07,\n 3.0570e-07, 6.3252e-07, 3.7424e-07, 5.7693e-18, 4.0560e-07, 7.3005e-07,\n 2.8521e-07, 5.1881e-07, 4.3237e-07, 5.1581e-07, 3.0775e-07, 5.5621e-07,\n 3.6184e-07, 9.2218e-07, 1.2871e-07, 4.3169e-07, 1.9843e-07, 1.0242e-07,\n 2.5739e-07, 4.5693e-07], device='cuda:0')" }, "4": { - "step": "tensor(5008.)", - "exp_avg": "tensor([[ 2.7610e-05, -3.2288e-06, -2.6303e-05, ..., 9.0364e-07,\n 6.2902e-06, 6.4425e-06],\n [ 2.1396e-05, 3.1786e-05, -2.1532e-05, ..., -1.7418e-06,\n 3.8806e-06, 2.2675e-05],\n [-1.9947e-05, -1.6229e-06, 1.5184e-06, ..., 2.8445e-06,\n -4.6658e-06, 1.1419e-05],\n ...,\n [ 5.1022e-05, -1.9222e-05, 3.9313e-05, ..., 3.8567e-06,\n -1.3248e-05, 2.9828e-05],\n [ 1.1041e-05, 1.4388e-05, -4.6712e-05, ..., 4.8514e-06,\n -1.4362e-05, 2.4878e-05],\n [ 2.1822e-05, -1.2955e-05, -3.5917e-05, ..., -3.8480e-06,\n 5.9862e-06, -4.9455e-05]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.6403e-09, 2.5609e-09, 2.6746e-09, ..., 7.9663e-11, 8.7431e-10,\n 5.3614e-09],\n [1.1346e-08, 8.0873e-09, 9.3156e-09, ..., 9.3375e-11, 1.3926e-09,\n 1.1163e-08],\n [7.1580e-09, 6.7078e-09, 4.5307e-09, ..., 2.1421e-10, 2.2927e-09,\n 5.5355e-09],\n ...,\n [4.4702e-08, 5.6153e-09, 1.0969e-08, ..., 1.4636e-10, 4.9416e-09,\n 9.5192e-09],\n [1.1054e-08, 5.9131e-09, 9.5133e-09, ..., 1.1038e-10, 8.3286e-09,\n 1.3240e-08],\n [1.2318e-08, 7.9863e-09, 1.3718e-08, ..., 2.3861e-10, 1.1383e-09,\n 1.3421e-08]], device='cuda:0')" + "step": "tensor(6260.)", + "exp_avg": "tensor([[ 1.8051e-06, 7.7717e-06, -1.1762e-05, ..., -1.7779e-06,\n -1.6077e-06, -7.2762e-06],\n [ 3.8044e-06, -8.7437e-06, -2.4373e-05, ..., 1.3913e-06,\n 9.0766e-08, -6.5949e-06],\n [ 6.6412e-06, -1.3297e-05, 4.6889e-05, ..., 2.4836e-07,\n 1.4580e-05, 6.9022e-06],\n ...,\n [ 8.5893e-05, 2.2444e-06, -3.4216e-05, ..., -1.5663e-06,\n 1.0801e-05, -4.8861e-06],\n [-5.1267e-06, -1.1652e-05, -2.3581e-05, ..., -1.0031e-07,\n 8.5159e-07, 1.2613e-05],\n [ 4.8896e-05, -1.6472e-05, -3.4303e-05, ..., 9.9761e-07,\n -1.0900e-05, -1.6162e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.1308e-09, 1.6604e-09, 1.8253e-09, ..., 9.7495e-11, 6.8655e-10,\n 4.2496e-09],\n [8.3453e-09, 5.4105e-09, 6.6297e-09, ..., 1.1778e-10, 1.0796e-09,\n 8.1224e-09],\n [5.3479e-09, 4.2595e-09, 3.1662e-09, ..., 2.6425e-10, 1.9064e-09,\n 4.0530e-09],\n ...,\n [3.1641e-08, 3.7727e-09, 7.7409e-09, ..., 1.6950e-10, 3.6582e-09,\n 7.5386e-09],\n [7.7039e-09, 3.5320e-09, 6.5373e-09, ..., 1.2939e-10, 6.4699e-09,\n 9.9044e-09],\n [9.4282e-09, 5.2435e-09, 9.9640e-09, ..., 2.3744e-10, 1.0114e-09,\n 1.0140e-08]], device='cuda:0')" }, "5": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[ 2.7764e-06, 6.2189e-07, -1.6777e-06, ..., -1.1598e-07,\n 0.0000e+00, -8.9813e-08],\n [ 2.7793e-06, -9.6008e-07, 4.4611e-07, ..., -8.0372e-07,\n 0.0000e+00, 7.2770e-07],\n [ 1.7802e-06, 8.1500e-07, -7.5316e-07, ..., 8.6705e-06,\n 0.0000e+00, -1.9213e-06],\n ...,\n [-3.5153e-06, 4.6961e-07, 2.6865e-07, ..., -2.3887e-08,\n 0.0000e+00, -1.4666e-07],\n [-7.7142e-07, -2.2786e-07, 2.0259e-06, ..., 3.3620e-06,\n 0.0000e+00, 1.7600e-07],\n [ 8.3392e-07, 3.3454e-07, 2.4118e-07, ..., -2.6229e-05,\n 0.0000e+00, -1.0306e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.0447e-11, 1.5566e-11, 2.2947e-11, ..., 1.4578e-11, 0.0000e+00,\n 2.1390e-11],\n [6.4223e-11, 1.3833e-11, 3.6363e-11, ..., 4.9165e-11, 0.0000e+00,\n 3.1313e-11],\n [4.3050e-11, 2.3529e-11, 4.6152e-11, ..., 1.1705e-10, 0.0000e+00,\n 1.6487e-10],\n ...,\n [6.5355e-11, 1.9666e-11, 1.3781e-11, ..., 9.6191e-12, 0.0000e+00,\n 1.5356e-11],\n [4.3831e-12, 8.3475e-12, 1.4465e-10, ..., 1.4523e-10, 0.0000e+00,\n 3.4109e-11],\n [2.6617e-11, 1.7481e-11, 2.8127e-11, ..., 4.5950e-10, 0.0000e+00,\n 7.0085e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 2.9052e-07, -2.2928e-08, -4.3634e-08, ..., -6.0430e-07,\n 0.0000e+00, 9.5430e-07],\n [-2.2505e-07, 4.2085e-07, 5.5051e-07, ..., 1.4380e-06,\n 0.0000e+00, -1.6246e-08],\n [-1.1071e-06, -1.2538e-07, 1.2875e-07, ..., -3.5246e-06,\n 0.0000e+00, -2.3430e-06],\n ...,\n [ 7.2477e-07, 7.6364e-07, 3.4231e-07, ..., -9.2964e-08,\n 0.0000e+00, -4.8257e-07],\n [-8.8239e-08, 1.4731e-07, 1.9609e-06, ..., 1.0204e-06,\n 0.0000e+00, 1.1838e-06],\n [-8.0134e-07, -1.5626e-06, -5.1318e-07, ..., -3.0431e-06,\n 0.0000e+00, 1.0494e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.0832e-11, 7.7793e-12, 1.9464e-11, ..., 8.1092e-12, 0.0000e+00,\n 1.2444e-11],\n [4.0658e-11, 8.6062e-12, 2.6192e-11, ..., 3.9069e-11, 0.0000e+00,\n 1.8913e-11],\n [2.4880e-11, 1.2028e-11, 2.6414e-11, ..., 7.8947e-11, 0.0000e+00,\n 1.1963e-10],\n ...,\n [4.8356e-11, 1.9637e-11, 1.0225e-11, ..., 4.4803e-12, 0.0000e+00,\n 8.1128e-12],\n [1.7608e-12, 4.0500e-12, 1.1602e-10, ..., 1.0861e-10, 0.0000e+00,\n 2.0399e-11],\n [1.0155e-11, 1.2418e-11, 2.5896e-11, ..., 3.7060e-10, 0.0000e+00,\n 4.8629e-11]], device='cuda:0')" }, "6": { - "step": "tensor(3756.)", - "exp_avg": "tensor([-3.1306e-05, -1.4238e-05, 3.2017e-05, ..., -6.3748e-06,\n 6.6271e-06, -2.4936e-05], device='cuda:0')", - "exp_avg_sq": "tensor([7.5325e-09, 4.9277e-09, 6.6089e-09, ..., 5.2383e-09, 4.4994e-09,\n 4.4666e-09], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([-2.9050e-05, 3.7621e-06, -5.0623e-05, ..., 3.0815e-06,\n -2.6227e-05, 5.0914e-06], device='cuda:0')", + "exp_avg_sq": "tensor([4.7790e-09, 3.2526e-09, 4.1045e-09, ..., 2.5641e-09, 3.0099e-09,\n 2.8913e-09], device='cuda:0')" }, "7": { - "step": "tensor(3756.)", - "exp_avg": "tensor([[-1.0215e-06, -3.4224e-09, -4.8168e-08, ..., -2.3438e-07,\n 3.6516e-07, 1.6354e-07],\n [ 2.8217e-06, -2.2096e-07, 1.7974e-06, ..., 1.5344e-06,\n 4.7161e-07, 9.8457e-07],\n [ 1.7108e-06, -8.9327e-07, -3.8529e-08, ..., -7.3804e-07,\n -4.3139e-08, 1.4990e-06],\n ...,\n [-1.4872e-06, -2.0452e-06, 3.3108e-07, ..., -7.1227e-07,\n 1.0030e-06, -1.9528e-06],\n [-1.2991e-07, -3.2628e-07, 6.0369e-07, ..., 3.1528e-07,\n 2.1867e-07, 1.1217e-07],\n [ 1.2134e-07, -1.2386e-06, -1.0235e-06, ..., -1.0227e-06,\n -3.6859e-07, 7.9320e-08]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.2084e-11, 1.0017e-11, 1.2695e-11, ..., 1.7350e-11, 1.0800e-11,\n 1.5110e-11],\n [2.8040e-11, 1.7590e-11, 2.1014e-11, ..., 3.3891e-11, 1.7932e-11,\n 2.1610e-11],\n [2.8028e-11, 2.0041e-11, 1.9567e-11, ..., 2.5047e-11, 1.5026e-11,\n 2.5975e-11],\n ...,\n [2.9675e-11, 3.2215e-11, 3.3170e-11, ..., 2.5402e-11, 2.4150e-11,\n 2.5470e-11],\n [2.7242e-11, 2.1471e-11, 2.4098e-11, ..., 1.9445e-11, 1.8598e-11,\n 2.6130e-11],\n [3.3954e-11, 2.0922e-11, 2.6926e-11, ..., 7.5594e-11, 2.0494e-11,\n 2.1542e-11]], device='cuda:0')" + "step": "tensor(5008.)", + "exp_avg": "tensor([[ 2.7085e-07, -4.0476e-07, 5.9547e-07, ..., 5.3408e-07,\n -1.3915e-06, -1.4484e-06],\n [ 5.7098e-08, 1.6263e-06, -1.3432e-06, ..., -8.1884e-07,\n -9.3186e-07, 4.7864e-07],\n [-3.6681e-07, 1.1444e-06, 5.7836e-07, ..., 5.7105e-07,\n -5.0189e-07, 1.1652e-06],\n ...,\n [-9.8853e-07, -3.5932e-06, -2.1081e-06, ..., -1.0942e-06,\n -2.0264e-06, 8.8557e-07],\n [-1.1547e-06, 1.0930e-06, -1.8229e-07, ..., -6.1655e-07,\n 1.6886e-06, 1.7918e-07],\n [ 2.8115e-06, -1.4956e-06, 2.8647e-07, ..., 4.3480e-06,\n 2.1652e-07, 1.1223e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4066e-11, 6.0948e-12, 7.8347e-12, ..., 7.1691e-12, 6.5819e-12,\n 7.8594e-12],\n [1.8706e-11, 1.1371e-11, 1.3297e-11, ..., 1.5869e-11, 1.2163e-11,\n 1.2811e-11],\n [1.8208e-11, 1.2180e-11, 1.1863e-11, ..., 1.2269e-11, 1.0056e-11,\n 1.6568e-11],\n ...,\n [1.7239e-11, 2.1737e-11, 2.0287e-11, ..., 1.3141e-11, 1.5983e-11,\n 1.4509e-11],\n [1.5653e-11, 1.1913e-11, 1.5217e-11, ..., 9.6066e-12, 1.0947e-11,\n 1.5791e-11],\n [2.1784e-11, 1.2656e-11, 1.6639e-11, ..., 5.4514e-11, 1.2477e-11,\n 1.0659e-11]], device='cuda:0')" }, "32": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([8.3333e-07], device='cuda:0')" + "exp_avg_sq": "tensor([2.3813e-07], device='cuda:0')" }, "33": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.5859e-09, 3.6875e-08, 1.1229e-08], device='cuda:0')" + "exp_avg_sq": "tensor([2.1677e-09, 1.0537e-08, 3.2088e-09], device='cuda:0')" }, "34": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([4.5269e-05, 4.1042e-07, 5.5434e-07, 5.7525e-07, 6.5953e-07, 7.5762e-07,\n 6.0234e-07, 4.4606e-07, 5.6342e-07, 5.0068e-07], device='cuda:0')" + "exp_avg_sq": "tensor([1.2936e-05, 1.1728e-07, 1.5841e-07, 1.6438e-07, 1.8847e-07, 2.1650e-07,\n 1.7212e-07, 1.2747e-07, 1.6100e-07, 1.4307e-07], device='cuda:0')" }, "36": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[6.9913e-14, 3.1504e-14, 4.6807e-14, ..., 2.0461e-13, 0.0000e+00,\n 6.9055e-14],\n [1.2585e-14, 3.0426e-14, 9.1513e-14, ..., 1.0117e-13, 0.0000e+00,\n 4.0616e-14],\n [1.1294e-11, 2.6722e-12, 1.4848e-10, ..., 4.7603e-11, 0.0000e+00,\n 1.3282e-10],\n ...,\n [3.1477e-12, 1.3091e-13, 2.4791e-11, ..., 1.3835e-11, 0.0000e+00,\n 3.7224e-11],\n [8.4842e-14, 3.0529e-14, 8.9726e-15, ..., 3.3314e-13, 0.0000e+00,\n 1.7178e-13],\n [2.8623e-15, 5.3707e-14, 7.0162e-15, ..., 1.7989e-14, 0.0000e+00,\n 7.3279e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.9978e-14, 9.0025e-15, 1.3375e-14, ..., 5.8468e-14, 0.0000e+00,\n 1.9733e-14],\n [3.5963e-15, 8.6944e-15, 2.6151e-14, ..., 2.8911e-14, 0.0000e+00,\n 1.1606e-14],\n [3.2274e-12, 7.6360e-13, 4.2429e-11, ..., 1.3603e-11, 0.0000e+00,\n 3.7956e-11],\n ...,\n [8.9948e-13, 3.7408e-14, 7.0843e-12, ..., 3.9534e-12, 0.0000e+00,\n 1.0637e-11],\n [2.4244e-14, 8.7240e-15, 2.5640e-15, ..., 9.5197e-14, 0.0000e+00,\n 4.9087e-14],\n [8.1793e-16, 1.5347e-14, 2.0049e-15, ..., 5.1406e-15, 0.0000e+00,\n 2.0940e-14]], device='cuda:0')" }, "37": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.2416e-10, 4.2850e-11, 1.6158e-07, 2.1440e-10, 7.7994e-08, 2.6861e-08,\n 5.1391e-09, 5.8972e-09, 1.4903e-07, 5.4532e-10, 7.5766e-08, 9.2193e-09,\n 8.1727e-11, 1.7046e-09, 1.2034e-11, 8.6443e-10, 1.2332e-08, 1.3966e-08,\n 9.0295e-10, 1.0844e-09, 1.3227e-08, 6.1022e-10, 9.1979e-09, 5.4789e-09,\n 4.6307e-09, 4.6134e-08, 8.7692e-09, 7.6357e-09, 5.8107e-09, 1.1581e-07,\n 4.2550e-08, 2.6330e-08, 3.0672e-07, 4.9881e-10, 4.1095e-09, 9.1919e-10,\n 1.2779e-08, 5.6793e-10, 5.1582e-08, 1.3624e-09, 5.1080e-08, 8.7109e-10,\n 2.7134e-09, 4.8363e-08, 8.0292e-11, 2.9305e-09, 4.5337e-09, 4.5204e-09,\n 9.2322e-10, 3.2560e-08, 1.1251e-08, 1.2064e-08, 8.0818e-08, 5.2855e-09,\n 8.9801e-10, 6.4714e-09, 1.2905e-12, 3.1517e-09, 1.3882e-08, 1.3609e-07,\n 1.6779e-08, 1.4886e-09, 4.7451e-11, 5.0677e-09, 5.7585e-09, 7.6168e-08,\n 6.9504e-10, 5.1032e-09, 1.6749e-09, 1.1706e-08, 7.1660e-09, 6.2503e-09,\n 1.9286e-09, 2.8527e-08, 4.0100e-08, 2.2644e-08, 2.7474e-08, 6.6335e-09,\n 1.9838e-09, 5.4855e-11, 6.7939e-09, 2.6395e-09, 6.7886e-10, 3.8227e-09,\n 5.3822e-09, 1.6913e-11, 5.6171e-08, 9.0136e-09, 1.6445e-09, 7.3502e-11,\n 5.2700e-10, 5.3789e-09, 3.0748e-08, 4.8137e-08, 4.5101e-08, 3.4249e-09,\n 4.0383e-08, 1.6930e-08, 1.1663e-08, 1.6810e-09, 4.9516e-09, 2.0782e-10,\n 8.8497e-10, 5.3060e-08, 2.1322e-08, 3.7244e-08, 1.2137e-08, 1.1639e-10,\n 4.9928e-08, 4.1713e-09, 7.2387e-09, 1.0010e-09, 1.0602e-08, 5.1539e-09,\n 4.7712e-09, 2.2559e-07, 1.6475e-10, 7.6134e-09, 1.4666e-09, 7.0491e-08,\n 7.1794e-08, 9.7973e-09, 2.2421e-08, 8.8327e-10, 1.1072e-09, 1.9344e-08,\n 1.0449e-08, 1.1804e-08, 1.5803e-09, 3.7022e-08, 2.9217e-11, 8.3475e-10,\n 2.0125e-07, 1.8349e-08, 3.4569e-10, 3.9818e-09, 1.2469e-08, 4.9627e-09,\n 8.1663e-08, 1.5729e-07, 3.6765e-09, 1.1374e-08, 9.6580e-10, 1.5937e-10,\n 1.7601e-09, 7.9747e-09, 8.3899e-09, 1.5820e-08, 5.9612e-10, 3.3723e-08,\n 3.0180e-10, 8.3029e-10, 2.7701e-08, 1.2353e-08, 1.0430e-08, 6.0248e-08,\n 8.6984e-10, 4.6225e-08, 4.0013e-10, 2.1601e-09, 5.1833e-11, 3.7268e-08,\n 3.4588e-11, 2.8139e-09, 4.7987e-08, 2.7237e-08, 1.2829e-09, 1.6847e-08,\n 1.4340e-10, 8.3961e-10, 3.5988e-09, 1.9968e-08, 4.7393e-08, 1.6688e-08,\n 1.0080e-08, 3.4298e-09, 4.9606e-08, 6.0101e-08, 1.4402e-10, 1.1018e-11,\n 1.0944e-10, 1.7908e-08, 1.4431e-09, 5.4592e-10, 1.8756e-09, 7.0026e-08,\n 4.3594e-09, 8.5916e-11, 1.5612e-09, 2.5861e-09, 3.0524e-08, 1.8045e-10,\n 2.8551e-08, 5.7024e-08, 1.6957e-08, 4.6180e-09, 1.3832e-09, 1.5620e-08,\n 4.2248e-09, 1.7679e-08, 4.7229e-09, 2.8404e-10, 1.4786e-07, 2.9119e-10,\n 1.0135e-08, 1.8982e-11, 1.1428e-08, 3.8016e-08, 9.9939e-11, 2.5981e-09,\n 4.3698e-08, 2.6412e-09, 2.0145e-08, 1.3559e-07, 1.2500e-07, 1.3780e-08,\n 2.0568e-10, 6.2756e-09, 3.6554e-08, 7.5143e-11, 8.6271e-09, 1.5201e-07,\n 4.7170e-09, 8.2592e-09, 2.7346e-08, 3.5178e-09, 1.3460e-09, 1.2288e-08,\n 7.4034e-09, 6.1729e-09, 3.4958e-09, 1.4444e-08, 6.4861e-09, 3.4551e-09,\n 1.5003e-08, 2.3758e-09, 4.8112e-09, 6.0258e-09, 1.4651e-08, 2.5809e-10,\n 7.2528e-08, 2.5066e-09, 6.8057e-10, 2.4739e-08, 8.4880e-09, 1.2700e-10,\n 1.3225e-08, 8.7973e-08, 6.5459e-08, 9.5933e-08, 3.2210e-09, 1.3518e-08,\n 1.0279e-08, 4.4784e-08, 3.5472e-10, 4.4617e-11], device='cuda:0')" + "exp_avg_sq": "tensor([3.5479e-11, 1.2245e-11, 4.6173e-08, 6.1268e-11, 2.2287e-08, 7.6758e-09,\n 1.4685e-09, 1.6852e-09, 4.2586e-08, 1.5583e-10, 2.1651e-08, 2.6345e-09,\n 2.3354e-11, 4.8711e-10, 3.4389e-12, 2.4702e-10, 3.5240e-09, 3.9909e-09,\n 2.5803e-10, 3.0989e-10, 3.7796e-09, 1.7438e-10, 2.6284e-09, 1.5656e-09,\n 1.3232e-09, 1.3183e-08, 2.5059e-09, 2.1820e-09, 1.6605e-09, 3.3095e-08,\n 1.2159e-08, 7.5240e-09, 8.7648e-08, 1.4254e-10, 1.1743e-09, 2.6267e-10,\n 3.6516e-09, 1.6229e-10, 1.4740e-08, 3.8932e-10, 1.4597e-08, 2.4892e-10,\n 7.7537e-10, 1.3820e-08, 2.2944e-11, 8.3743e-10, 1.2955e-09, 1.2917e-09,\n 2.6382e-10, 9.3042e-09, 3.2149e-09, 3.4474e-09, 2.3094e-08, 1.5104e-09,\n 2.5661e-10, 1.8493e-09, 3.6877e-13, 9.0062e-10, 3.9670e-09, 3.8889e-08,\n 4.7947e-09, 4.2538e-10, 1.3559e-11, 1.4481e-09, 1.6455e-09, 2.1766e-08,\n 1.9861e-10, 1.4583e-09, 4.7860e-10, 3.3452e-09, 2.0477e-09, 1.7861e-09,\n 5.5110e-10, 8.1518e-09, 1.1459e-08, 6.4706e-09, 7.8509e-09, 1.8956e-09,\n 5.6690e-10, 1.5675e-11, 1.9414e-09, 7.5427e-10, 1.9399e-10, 1.0924e-09,\n 1.5380e-09, 4.8332e-12, 1.6051e-08, 2.5757e-09, 4.6993e-10, 2.1004e-11,\n 1.5059e-10, 1.5371e-09, 8.7864e-09, 1.3755e-08, 1.2888e-08, 9.7868e-10,\n 1.1540e-08, 4.8378e-09, 3.3328e-09, 4.8035e-10, 1.4150e-09, 5.9385e-11,\n 2.5289e-10, 1.5162e-08, 6.0929e-09, 1.0643e-08, 3.4683e-09, 3.3259e-11,\n 1.4267e-08, 1.1920e-09, 2.0685e-09, 2.8603e-10, 3.0297e-09, 1.4728e-09,\n 1.3634e-09, 6.4464e-08, 4.7078e-11, 2.1756e-09, 4.1909e-10, 2.0143e-08,\n 2.0516e-08, 2.7997e-09, 6.4070e-09, 2.5240e-10, 3.1640e-10, 5.5276e-09,\n 2.9859e-09, 3.3730e-09, 4.5159e-10, 1.0579e-08, 8.3490e-12, 2.3854e-10,\n 5.7509e-08, 5.2433e-09, 9.8784e-11, 1.1378e-09, 3.5632e-09, 1.4181e-09,\n 2.3336e-08, 4.4948e-08, 1.0506e-09, 3.2503e-09, 2.7599e-10, 4.5542e-11,\n 5.0297e-10, 2.2788e-09, 2.3975e-09, 4.5208e-09, 1.7035e-10, 9.6365e-09,\n 8.6242e-11, 2.3726e-10, 7.9157e-09, 3.5301e-09, 2.9803e-09, 1.7216e-08,\n 2.4856e-10, 1.3209e-08, 1.1434e-10, 6.1727e-10, 1.4812e-11, 1.0650e-08,\n 9.8839e-12, 8.0410e-10, 1.3713e-08, 7.7832e-09, 3.6659e-10, 4.8140e-09,\n 4.0976e-11, 2.3992e-10, 1.0284e-09, 5.7059e-09, 1.3543e-08, 4.7687e-09,\n 2.8803e-09, 9.8009e-10, 1.4175e-08, 1.7174e-08, 4.1154e-11, 3.1486e-12,\n 3.1273e-11, 5.1175e-09, 4.1237e-10, 1.5600e-10, 5.3597e-10, 2.0011e-08,\n 1.2457e-09, 2.4551e-11, 4.4612e-10, 7.3899e-10, 8.7226e-09, 5.1564e-11,\n 8.1586e-09, 1.6295e-08, 4.8456e-09, 1.3196e-09, 3.9527e-10, 4.4637e-09,\n 1.2073e-09, 5.0518e-09, 1.3496e-09, 8.1166e-11, 4.2251e-08, 8.3211e-11,\n 2.8962e-09, 5.4242e-12, 3.2657e-09, 1.0863e-08, 2.8558e-11, 7.4243e-10,\n 1.2487e-08, 7.5474e-10, 5.7565e-09, 3.8747e-08, 3.5719e-08, 3.9376e-09,\n 5.8776e-11, 1.7933e-09, 1.0446e-08, 2.1473e-11, 2.4653e-09, 4.3438e-08,\n 1.3479e-09, 2.3601e-09, 7.8143e-09, 1.0053e-09, 3.8463e-10, 3.5114e-09,\n 2.1156e-09, 1.7640e-09, 9.9896e-10, 4.1275e-09, 1.8535e-09, 9.8731e-10,\n 4.2872e-09, 6.7889e-10, 1.3748e-09, 1.7219e-09, 4.1866e-09, 7.3752e-11,\n 2.0725e-08, 7.1628e-10, 1.9448e-10, 7.0694e-09, 2.4255e-09, 3.6291e-11,\n 3.7791e-09, 2.5139e-08, 1.8705e-08, 2.7413e-08, 9.2042e-10, 3.8629e-09,\n 2.9373e-09, 1.2797e-08, 1.0136e-10, 1.2750e-11], device='cuda:0')" }, "38": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.7892e-13, 3.1989e-16, 3.1042e-10, 1.0604e-12, 1.6512e-10, 5.1970e-11,\n 9.0572e-12, 1.3225e-11, 3.4298e-10, 1.3567e-12, 2.7111e-10, 1.5794e-11,\n 4.9420e-13, 2.1299e-12, 3.8675e-14, 3.1310e-12, 2.7383e-11, 3.8425e-11,\n 3.2760e-12, 2.3677e-12, 2.1508e-11, 1.4998e-12, 1.2183e-11, 9.4212e-12,\n 8.6707e-12, 5.7435e-11, 1.3450e-11, 1.3667e-11, 3.3250e-11, 3.3614e-10,\n 5.2518e-11, 4.6217e-11, 5.7964e-10, 7.3212e-13, 7.1492e-12, 3.4830e-12,\n 1.6250e-11, 1.7090e-12, 1.3072e-10, 1.9135e-12, 1.4397e-10, 2.9672e-12,\n 7.9904e-12, 6.4241e-11, 8.0517e-14, 3.2263e-12, 7.1310e-12, 6.4901e-12,\n 2.5228e-12, 7.7522e-11, 1.4861e-11, 3.1713e-11, 2.1866e-10, 7.4883e-12,\n 1.4684e-12, 9.6002e-12, 7.7609e-14, 4.0514e-12, 1.9498e-11, 3.9567e-10,\n 2.7571e-11, 2.3323e-12, 4.6716e-16, 1.1337e-11, 1.0352e-11, 2.1100e-10,\n 1.7591e-12, 7.0868e-12, 3.4099e-12, 1.5123e-11, 1.2062e-11, 1.1359e-11,\n 1.6758e-12, 5.7238e-11, 6.9116e-11, 3.4478e-11, 7.6082e-11, 1.0132e-11,\n 5.2267e-12, 1.4255e-14, 1.1997e-11, 3.8107e-12, 1.3132e-12, 4.7962e-12,\n 7.8780e-12, 5.0246e-14, 8.5577e-11, 2.1946e-11, 3.7087e-12, 1.3231e-14,\n 1.2786e-12, 9.0187e-12, 6.9203e-11, 1.3193e-10, 5.4475e-11, 4.2602e-12,\n 5.6545e-11, 2.4848e-11, 1.6599e-11, 7.3869e-12, 6.8725e-12, 2.9966e-13,\n 1.4743e-12, 1.0614e-10, 6.2022e-11, 7.1889e-11, 1.7552e-11, 3.9246e-13,\n 1.7909e-10, 7.6264e-12, 1.6212e-11, 2.1164e-12, 1.6980e-11, 6.4587e-12,\n 8.9091e-12, 5.8101e-10, 3.9238e-13, 7.7066e-12, 7.6770e-12, 2.2196e-10,\n 1.2394e-10, 1.3480e-11, 2.8397e-11, 7.5322e-12, 3.1583e-12, 4.1487e-11,\n 2.1310e-11, 2.7795e-11, 3.0670e-12, 1.2069e-10, 4.8543e-13, 1.6653e-12,\n 5.4635e-10, 1.6673e-11, 4.9032e-13, 5.4438e-12, 4.2652e-11, 7.0848e-12,\n 2.3209e-10, 5.0589e-10, 7.3107e-12, 2.4244e-11, 3.6807e-12, 6.7232e-13,\n 2.9795e-12, 1.2201e-11, 1.6554e-11, 2.9663e-11, 2.2396e-12, 1.1989e-10,\n 4.6997e-13, 9.6837e-13, 5.9289e-11, 2.4519e-11, 2.2810e-11, 1.0689e-10,\n 1.2632e-12, 5.3611e-11, 5.8488e-13, 3.2409e-12, 1.9626e-13, 6.1636e-11,\n 1.3301e-13, 6.4160e-12, 8.2358e-11, 4.2770e-11, 1.5875e-12, 3.5160e-11,\n 5.6266e-13, 2.8049e-12, 3.3510e-12, 1.1679e-10, 1.1710e-10, 4.9610e-11,\n 2.9323e-11, 7.9589e-12, 9.7785e-11, 1.5410e-10, 6.1339e-13, 9.4034e-15,\n 2.2986e-13, 4.9051e-11, 4.4464e-12, 9.9314e-13, 6.6111e-12, 9.1047e-11,\n 7.2134e-12, 1.8097e-13, 3.0602e-12, 4.6245e-12, 6.8342e-11, 1.1447e-12,\n 5.3286e-11, 2.0739e-10, 2.4842e-11, 1.3447e-11, 2.4833e-12, 2.6856e-11,\n 5.9648e-12, 3.3384e-11, 7.6909e-12, 2.0583e-12, 2.3947e-10, 9.9255e-13,\n 1.5544e-11, 3.6634e-14, 2.6195e-11, 8.7034e-11, 3.4282e-13, 2.8897e-12,\n 7.7022e-11, 3.5876e-12, 3.4645e-11, 2.5086e-10, 2.0053e-10, 2.5914e-11,\n 3.4406e-13, 1.0153e-11, 5.9849e-11, 6.6195e-14, 1.5268e-11, 2.6066e-10,\n 1.1216e-11, 1.5196e-11, 3.8981e-11, 8.0609e-12, 3.1450e-12, 2.0124e-11,\n 7.8979e-12, 1.4162e-11, 6.6590e-12, 3.9740e-11, 9.7006e-12, 6.3369e-12,\n 2.2989e-11, 5.2533e-12, 6.0617e-12, 8.6704e-12, 5.1135e-11, 5.4987e-13,\n 2.0065e-10, 4.0324e-12, 6.6045e-12, 2.6551e-11, 1.1895e-11, 9.4633e-13,\n 2.8339e-11, 2.7321e-10, 1.0686e-10, 3.3550e-10, 5.2563e-12, 2.7895e-11,\n 1.6563e-11, 7.7513e-11, 6.1310e-13, 1.6649e-13], device='cuda:0')" + "exp_avg_sq": "tensor([7.9704e-14, 9.1411e-17, 8.8704e-11, 3.0303e-13, 4.7183e-11, 1.4851e-11,\n 2.5882e-12, 3.7791e-12, 9.8010e-11, 3.8767e-13, 7.7471e-11, 4.5133e-12,\n 1.4122e-13, 6.0863e-13, 1.1052e-14, 8.9470e-13, 7.8248e-12, 1.0980e-11,\n 9.3614e-13, 6.7659e-13, 6.1462e-12, 4.2859e-13, 3.4815e-12, 2.6922e-12,\n 2.4777e-12, 1.6412e-11, 3.8433e-12, 3.9055e-12, 9.5015e-12, 9.6053e-11,\n 1.5007e-11, 1.3207e-11, 1.6564e-10, 2.0921e-13, 2.0429e-12, 9.9529e-13,\n 4.6437e-12, 4.8835e-13, 3.7353e-11, 5.4679e-13, 4.1141e-11, 8.4792e-13,\n 2.2833e-12, 1.8357e-11, 2.3008e-14, 9.2193e-13, 2.0377e-12, 1.8546e-12,\n 7.2091e-13, 2.2153e-11, 4.2467e-12, 9.0624e-12, 6.2485e-11, 2.1398e-12,\n 4.1960e-13, 2.7433e-12, 2.2177e-14, 1.1577e-12, 5.5717e-12, 1.1307e-10,\n 7.8786e-12, 6.6648e-13, 1.3349e-16, 3.2397e-12, 2.9583e-12, 6.0295e-11,\n 5.0269e-13, 2.0251e-12, 9.7442e-13, 4.3214e-12, 3.4468e-12, 3.2460e-12,\n 4.7888e-13, 1.6356e-11, 1.9750e-11, 9.8523e-12, 2.1741e-11, 2.8953e-12,\n 1.4936e-12, 4.0735e-15, 3.4282e-12, 1.0889e-12, 3.7526e-13, 1.3706e-12,\n 2.2512e-12, 1.4358e-14, 2.4454e-11, 6.2712e-12, 1.0598e-12, 3.7810e-15,\n 3.6536e-13, 2.5772e-12, 1.9775e-11, 3.7701e-11, 1.5567e-11, 1.2174e-12,\n 1.6158e-11, 7.1007e-12, 4.7434e-12, 2.1109e-12, 1.9639e-12, 8.5630e-14,\n 4.2130e-13, 3.0330e-11, 1.7723e-11, 2.0543e-11, 5.0156e-12, 1.1215e-13,\n 5.1175e-11, 2.1793e-12, 4.6328e-12, 6.0478e-13, 4.8523e-12, 1.8456e-12,\n 2.5458e-12, 1.6603e-10, 1.1213e-13, 2.2022e-12, 2.1938e-12, 6.3428e-11,\n 3.5417e-11, 3.8519e-12, 8.1148e-12, 2.1524e-12, 9.0250e-13, 1.1855e-11,\n 6.0894e-12, 7.9426e-12, 8.7641e-13, 3.4487e-11, 1.3871e-13, 4.7587e-13,\n 1.5612e-10, 4.7644e-12, 1.4011e-13, 1.5556e-12, 1.2188e-11, 2.0245e-12,\n 6.6321e-11, 1.4456e-10, 2.0891e-12, 6.9279e-12, 1.0518e-12, 1.9212e-13,\n 8.5141e-13, 3.4865e-12, 4.7304e-12, 8.4764e-12, 6.3999e-13, 3.4260e-11,\n 1.3430e-13, 2.7672e-13, 1.6942e-11, 7.0065e-12, 6.5180e-12, 3.0545e-11,\n 3.6096e-13, 1.5320e-11, 1.6713e-13, 9.2611e-13, 5.6083e-14, 1.7613e-11,\n 3.8010e-14, 1.8334e-12, 2.3534e-11, 1.2222e-11, 4.5365e-13, 1.0047e-11,\n 1.6078e-13, 8.0152e-13, 9.5758e-13, 3.3375e-11, 3.3462e-11, 1.4176e-11,\n 8.3792e-12, 2.2743e-12, 2.7943e-11, 4.4034e-11, 1.7528e-13, 2.6871e-15,\n 6.5686e-14, 1.4017e-11, 1.2706e-12, 2.8380e-13, 1.8892e-12, 2.6017e-11,\n 2.0613e-12, 5.1715e-14, 8.7449e-13, 1.3215e-12, 1.9529e-11, 3.2710e-13,\n 1.5227e-11, 5.9264e-11, 7.0989e-12, 3.8427e-12, 7.0962e-13, 7.6742e-12,\n 1.7045e-12, 9.5399e-12, 2.1977e-12, 5.8818e-13, 6.8430e-11, 2.8363e-13,\n 4.4418e-12, 1.0469e-14, 7.4854e-12, 2.4871e-11, 9.7962e-14, 8.2577e-13,\n 2.2010e-11, 1.0252e-12, 9.9002e-12, 7.1685e-11, 5.7302e-11, 7.4050e-12,\n 9.8317e-14, 2.9014e-12, 1.7102e-11, 1.8916e-14, 4.3630e-12, 7.4485e-11,\n 3.2052e-12, 4.3423e-12, 1.1139e-11, 2.3035e-12, 8.9871e-13, 5.7505e-12,\n 2.2569e-12, 4.0470e-12, 1.9029e-12, 1.1356e-11, 2.7720e-12, 1.8108e-12,\n 6.5694e-12, 1.5012e-12, 1.7322e-12, 2.4776e-12, 1.4612e-11, 1.5713e-13,\n 5.7337e-11, 1.1523e-12, 1.8873e-12, 7.5873e-12, 3.3990e-12, 2.7042e-13,\n 8.0982e-12, 7.8071e-11, 3.0536e-11, 9.5873e-11, 1.5020e-12, 7.9711e-12,\n 4.7331e-12, 2.2150e-11, 1.7520e-13, 4.7577e-14], device='cuda:0')" }, "39": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.6854e-13, 2.1707e-14, 4.4432e-10, 9.5704e-13, 2.0260e-10, 6.7987e-11,\n 1.6110e-11, 1.7311e-11, 4.1554e-10, 2.8909e-12, 2.1800e-10, 2.6327e-11,\n 7.7731e-13, 5.2121e-12, 1.5640e-13, 3.9582e-12, 3.7030e-11, 4.1674e-11,\n 2.9347e-12, 3.7825e-12, 3.8023e-11, 2.0875e-12, 2.6738e-11, 1.6587e-11,\n 1.3784e-11, 1.2558e-10, 2.6550e-11, 2.0482e-11, 2.2310e-11, 3.2143e-10,\n 1.1043e-10, 7.3286e-11, 8.2451e-10, 1.2885e-12, 1.2341e-11, 3.6249e-12,\n 3.2943e-11, 3.1065e-12, 1.3989e-10, 4.5672e-12, 1.3176e-10, 3.2718e-12,\n 9.0645e-12, 1.4233e-10, 5.7114e-14, 6.6240e-12, 1.3304e-11, 1.4069e-11,\n 2.7564e-12, 9.0040e-11, 3.3515e-11, 2.7913e-11, 2.1164e-10, 1.4788e-11,\n 3.0486e-12, 1.9793e-11, 1.9026e-13, 8.4260e-12, 3.7374e-11, 3.7017e-10,\n 4.4608e-11, 4.4152e-12, 4.3622e-14, 1.6540e-11, 1.7856e-11, 1.9569e-10,\n 2.3857e-12, 1.4575e-11, 5.3393e-12, 3.3670e-11, 2.1715e-11, 1.9175e-11,\n 5.0354e-12, 8.1871e-11, 1.1537e-10, 6.2176e-11, 7.7443e-11, 1.9708e-11,\n 6.7911e-12, 1.2957e-13, 2.0076e-11, 6.9412e-12, 2.6796e-12, 1.1942e-11,\n 1.3626e-11, 2.2358e-13, 1.5224e-10, 2.7072e-11, 7.4790e-12, 1.4121e-14,\n 1.6474e-12, 1.5105e-11, 7.8885e-11, 1.2503e-10, 1.1261e-10, 7.8816e-12,\n 1.0749e-10, 4.9529e-11, 3.0346e-11, 8.3714e-12, 1.4815e-11, 7.2177e-13,\n 3.0247e-12, 1.3908e-10, 6.2601e-11, 8.7676e-11, 3.6220e-11, 6.9002e-13,\n 1.2398e-10, 1.2530e-11, 2.1962e-11, 3.7797e-12, 2.7145e-11, 1.1979e-11,\n 1.3019e-11, 6.0779e-10, 8.7905e-13, 2.0563e-11, 6.8144e-12, 2.0047e-10,\n 1.9233e-10, 2.7904e-11, 6.2991e-11, 7.8118e-12, 3.7453e-12, 5.7482e-11,\n 2.6114e-11, 3.3475e-11, 5.5066e-12, 1.0418e-10, 6.5735e-13, 2.8226e-12,\n 5.3997e-10, 4.8657e-11, 1.0731e-12, 1.2469e-11, 3.7069e-11, 1.1917e-11,\n 2.1386e-10, 4.1762e-10, 1.1958e-11, 3.4535e-11, 2.8740e-12, 8.2134e-13,\n 5.6105e-12, 2.2884e-11, 2.1398e-11, 4.4112e-11, 2.8449e-12, 9.7090e-11,\n 1.0104e-12, 2.0519e-12, 7.2463e-11, 3.0679e-11, 2.9909e-11, 1.6521e-10,\n 2.6985e-12, 1.2273e-10, 1.2646e-12, 5.9884e-12, 9.9218e-13, 1.0653e-10,\n 4.8352e-13, 8.4975e-12, 1.3542e-10, 7.0662e-11, 3.9487e-12, 4.5842e-11,\n 7.6884e-13, 2.8894e-12, 9.0176e-12, 6.1258e-11, 1.2148e-10, 5.1802e-11,\n 2.7038e-11, 1.1136e-11, 1.4006e-10, 1.7128e-10, 1.7240e-12, 3.3561e-14,\n 3.8055e-13, 4.8228e-11, 5.0006e-12, 1.9349e-12, 5.6948e-12, 1.9097e-10,\n 1.2050e-11, 3.7703e-13, 5.5245e-12, 7.7897e-12, 8.3057e-11, 1.5242e-12,\n 7.2860e-11, 1.5998e-10, 4.5927e-11, 1.4100e-11, 4.3182e-12, 3.8140e-11,\n 1.1420e-11, 5.0688e-11, 1.4122e-11, 2.3044e-12, 3.9629e-10, 1.6629e-12,\n 2.7296e-11, 4.8009e-14, 3.2792e-11, 9.9507e-11, 1.1765e-12, 8.4677e-12,\n 1.2349e-10, 6.3415e-12, 5.7571e-11, 3.5512e-10, 3.3372e-10, 3.7035e-11,\n 8.6237e-13, 1.6990e-11, 9.7382e-11, 2.4991e-13, 2.0977e-11, 4.1522e-10,\n 1.3756e-11, 2.3324e-11, 8.0058e-11, 8.4960e-12, 4.7383e-12, 3.4860e-11,\n 2.1080e-11, 1.8139e-11, 1.1324e-11, 3.9130e-11, 1.5157e-11, 1.0264e-11,\n 4.2995e-11, 7.5910e-12, 1.2280e-11, 1.7906e-11, 3.6867e-11, 1.2174e-12,\n 1.9403e-10, 7.5432e-12, 5.0945e-12, 7.1504e-11, 2.3295e-11, 1.2618e-12,\n 3.9906e-11, 2.2368e-10, 1.9021e-10, 2.7214e-10, 9.9456e-12, 3.4674e-11,\n 2.4393e-11, 1.2225e-10, 1.5022e-12, 1.6251e-13], device='cuda:0')" + "exp_avg_sq": "tensor([1.6247e-13, 6.2031e-15, 1.2697e-10, 2.7348e-13, 5.7894e-11, 1.9428e-11,\n 4.6037e-12, 4.9469e-12, 1.1874e-10, 8.2611e-13, 6.2295e-11, 7.5230e-12,\n 2.2212e-13, 1.4894e-12, 4.4692e-14, 1.1311e-12, 1.0582e-11, 1.1909e-11,\n 8.3862e-13, 1.0809e-12, 1.0865e-11, 5.9651e-13, 7.6405e-12, 4.7399e-12,\n 3.9390e-12, 3.5885e-11, 7.5869e-12, 5.8530e-12, 6.3753e-12, 9.1851e-11,\n 3.1557e-11, 2.0942e-11, 2.3561e-10, 3.6821e-13, 3.5264e-12, 1.0358e-12,\n 9.4137e-12, 8.8770e-13, 3.9976e-11, 1.3051e-12, 3.7652e-11, 9.3495e-13,\n 2.5902e-12, 4.0672e-11, 1.6321e-14, 1.8929e-12, 3.8018e-12, 4.0204e-12,\n 7.8766e-13, 2.5730e-11, 9.5770e-12, 7.9764e-12, 6.0478e-11, 4.2259e-12,\n 8.7117e-13, 5.6561e-12, 5.4368e-14, 2.4078e-12, 1.0680e-11, 1.0578e-10,\n 1.2747e-11, 1.2617e-12, 1.2465e-14, 4.7263e-12, 5.1025e-12, 5.5920e-11,\n 6.8172e-13, 4.1648e-12, 1.5258e-12, 9.6215e-12, 6.2051e-12, 5.4793e-12,\n 1.4389e-12, 2.3395e-11, 3.2969e-11, 1.7767e-11, 2.2130e-11, 5.6316e-12,\n 1.9406e-12, 3.7025e-14, 5.7369e-12, 1.9835e-12, 7.6573e-13, 3.4124e-12,\n 3.8937e-12, 6.3891e-14, 4.3504e-11, 7.7361e-12, 2.1372e-12, 4.0352e-15,\n 4.7077e-13, 4.3164e-12, 2.2542e-11, 3.5727e-11, 3.2180e-11, 2.2522e-12,\n 3.0717e-11, 1.4153e-11, 8.6717e-12, 2.3922e-12, 4.2334e-12, 2.0625e-13,\n 8.6434e-13, 3.9743e-11, 1.7889e-11, 2.5054e-11, 1.0350e-11, 1.9718e-13,\n 3.5427e-11, 3.5804e-12, 6.2758e-12, 1.0801e-12, 7.7568e-12, 3.4230e-12,\n 3.7203e-12, 1.7368e-10, 2.5120e-13, 5.8762e-12, 1.9473e-12, 5.7286e-11,\n 5.4959e-11, 7.9737e-12, 1.8000e-11, 2.2323e-12, 1.0703e-12, 1.6426e-11,\n 7.4623e-12, 9.5658e-12, 1.5736e-12, 2.9771e-11, 1.8784e-13, 8.0659e-13,\n 1.5430e-10, 1.3904e-11, 3.0663e-13, 3.5631e-12, 1.0593e-11, 3.4054e-12,\n 6.1111e-11, 1.1934e-10, 3.4170e-12, 9.8687e-12, 8.2126e-13, 2.3470e-13,\n 1.6033e-12, 6.5393e-12, 6.1147e-12, 1.2605e-11, 8.1294e-13, 2.7744e-11,\n 2.8873e-13, 5.8635e-13, 2.0707e-11, 8.7667e-12, 8.5469e-12, 4.7211e-11,\n 7.7112e-13, 3.5071e-11, 3.6136e-13, 1.7112e-12, 2.8352e-13, 3.0442e-11,\n 1.3817e-13, 2.4282e-12, 3.8697e-11, 2.0192e-11, 1.1284e-12, 1.3100e-11,\n 2.1970e-13, 8.2566e-13, 2.5768e-12, 1.7505e-11, 3.4713e-11, 1.4803e-11,\n 7.7263e-12, 3.1823e-12, 4.0023e-11, 4.8944e-11, 4.9264e-13, 9.5902e-15,\n 1.0875e-13, 1.3781e-11, 1.4290e-12, 5.5292e-13, 1.6273e-12, 5.4572e-11,\n 3.4432e-12, 1.0774e-13, 1.5787e-12, 2.2260e-12, 2.3734e-11, 4.3554e-13,\n 2.0820e-11, 4.5715e-11, 1.3124e-11, 4.0293e-12, 1.2340e-12, 1.0899e-11,\n 3.2635e-12, 1.4484e-11, 4.0354e-12, 6.5851e-13, 1.1324e-10, 4.7519e-13,\n 7.8001e-12, 1.3719e-14, 9.3705e-12, 2.8435e-11, 3.3619e-13, 2.4197e-12,\n 3.5289e-11, 1.8121e-12, 1.6451e-11, 1.0148e-10, 9.5363e-11, 1.0583e-11,\n 2.4643e-13, 4.8550e-12, 2.7828e-11, 7.1413e-14, 5.9944e-12, 1.1865e-10,\n 3.9309e-12, 6.6650e-12, 2.2877e-11, 2.4278e-12, 1.3540e-12, 9.9616e-12,\n 6.0236e-12, 5.1833e-12, 3.2359e-12, 1.1182e-11, 4.3313e-12, 2.9331e-12,\n 1.2286e-11, 2.1692e-12, 3.5090e-12, 5.1169e-12, 1.0535e-11, 3.4787e-13,\n 5.5445e-11, 2.1555e-12, 1.4558e-12, 2.0433e-11, 6.6568e-12, 3.6057e-13,\n 1.1403e-11, 6.3919e-11, 5.4355e-11, 7.7766e-11, 2.8420e-12, 9.9083e-12,\n 6.9704e-12, 3.4935e-11, 4.2927e-13, 4.6440e-14], device='cuda:0')" }, "40": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2495e-17, 5.1140e-16, 3.5051e-14, ..., 1.4445e-14, 0.0000e+00,\n 1.4570e-13],\n [3.3285e-14, 3.8615e-15, 3.4658e-13, ..., 2.1326e-13, 0.0000e+00,\n 9.2621e-14],\n [1.0652e-11, 1.6433e-13, 1.4266e-10, ..., 2.2337e-11, 0.0000e+00,\n 1.0250e-10],\n ...,\n [2.4714e-12, 6.8799e-13, 4.3592e-11, ..., 2.4772e-11, 0.0000e+00,\n 2.2297e-11],\n [1.4247e-14, 3.1442e-14, 5.2739e-13, ..., 4.5729e-14, 0.0000e+00,\n 2.5254e-13],\n [2.6260e-14, 3.4394e-13, 2.3404e-12, ..., 2.7376e-13, 0.0000e+00,\n 2.3017e-12]], device='cuda:0')" + "exp_avg_sq": "tensor([[3.5706e-18, 1.4614e-16, 1.0016e-14, ..., 4.1279e-15, 0.0000e+00,\n 4.1635e-14],\n [9.5113e-15, 1.1034e-15, 9.9037e-14, ..., 6.0940e-14, 0.0000e+00,\n 2.6467e-14],\n [3.0439e-12, 4.6959e-14, 4.0767e-11, ..., 6.3829e-12, 0.0000e+00,\n 2.9291e-11],\n ...,\n [7.0622e-13, 1.9660e-13, 1.2457e-11, ..., 7.0788e-12, 0.0000e+00,\n 6.3716e-12],\n [4.0711e-15, 8.9847e-15, 1.5071e-13, ..., 1.3067e-14, 0.0000e+00,\n 7.2166e-14],\n [7.5040e-15, 9.8285e-14, 6.6878e-13, ..., 7.8228e-14, 0.0000e+00,\n 6.5772e-13]], device='cuda:0')" }, "41": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.1840e-12, 2.5281e-11, 4.8021e-08, 2.7995e-11, 1.0548e-08, 1.5264e-08,\n 7.9251e-09, 1.0214e-09, 1.6197e-07, 4.5627e-09, 3.7497e-08, 3.3625e-08,\n 1.0070e-10, 3.1489e-09, 6.5379e-11, 1.3071e-10, 1.5214e-08, 8.5250e-09,\n 1.8217e-09, 4.6246e-10, 2.6844e-09, 2.6744e-10, 1.7096e-08, 8.1112e-09,\n 2.1628e-08, 2.9056e-08, 9.3119e-09, 1.3509e-08, 5.2882e-09, 7.5441e-08,\n 7.9682e-08, 2.7381e-09, 3.1891e-07, 2.8065e-09, 3.3763e-09, 3.0484e-10,\n 1.1654e-08, 2.4743e-10, 7.6054e-09, 2.6928e-09, 2.6001e-08, 2.4649e-10,\n 6.2183e-11, 1.5812e-07, 3.9496e-11, 7.1145e-09, 1.6785e-08, 1.3496e-08,\n 1.5271e-10, 8.8426e-09, 1.3262e-08, 1.7231e-09, 5.6177e-08, 3.6858e-09,\n 1.8247e-09, 3.7729e-08, 1.2081e-09, 3.4448e-09, 6.9757e-08, 6.8641e-08,\n 1.4694e-08, 1.9310e-08, 4.6413e-10, 9.3284e-09, 1.1457e-08, 6.4009e-08,\n 3.8728e-10, 1.6834e-08, 3.5470e-09, 8.1117e-09, 2.9588e-09, 3.1568e-09,\n 8.4600e-09, 9.0739e-08, 2.7427e-08, 8.0209e-08, 1.7005e-08, 2.4193e-09,\n 2.5509e-09, 1.1659e-10, 7.8733e-09, 1.1284e-09, 7.2624e-10, 2.0542e-08,\n 5.9654e-09, 6.0431e-10, 1.1972e-07, 1.2046e-08, 3.4581e-09, 2.8631e-11,\n 2.1691e-10, 7.6709e-09, 1.3491e-08, 3.7141e-09, 1.2898e-07, 4.9288e-10,\n 1.2040e-07, 8.3283e-08, 1.4477e-08, 9.5742e-10, 3.2533e-09, 1.9547e-09,\n 2.1619e-10, 6.1040e-08, 7.1063e-10, 1.0114e-08, 8.0173e-09, 1.3518e-09,\n 8.8169e-09, 3.9937e-08, 1.0291e-08, 6.9177e-10, 4.2906e-08, 4.3860e-09,\n 3.3675e-09, 1.2886e-08, 1.1653e-09, 9.9642e-09, 1.2380e-09, 3.3834e-08,\n 9.3022e-08, 1.1806e-08, 1.0561e-08, 1.3763e-10, 2.3968e-09, 1.3562e-07,\n 9.9543e-10, 7.4428e-09, 2.8182e-09, 8.9197e-09, 9.8895e-11, 2.8343e-09,\n 1.3296e-07, 6.0315e-09, 2.6606e-10, 1.1261e-08, 7.1556e-09, 1.1036e-09,\n 3.6845e-08, 6.5446e-08, 1.1803e-09, 3.2768e-09, 4.7702e-08, 7.9775e-11,\n 1.7242e-09, 1.0424e-08, 3.3358e-09, 3.9254e-08, 7.1102e-10, 2.2978e-08,\n 2.1333e-09, 1.2962e-10, 3.4658e-08, 9.2980e-09, 2.3870e-09, 3.3759e-08,\n 1.2203e-08, 4.6379e-08, 6.7583e-11, 5.9678e-09, 2.6265e-10, 3.2019e-08,\n 2.9299e-11, 5.1271e-08, 6.8020e-09, 3.4080e-08, 3.4859e-09, 1.2078e-08,\n 2.4676e-10, 8.7233e-12, 2.9182e-09, 1.1064e-08, 1.0021e-08, 1.2457e-08,\n 4.6341e-09, 3.9179e-09, 5.5221e-08, 1.2443e-08, 1.0313e-10, 1.8327e-11,\n 1.1892e-09, 1.2638e-08, 1.4934e-10, 1.3712e-09, 7.0686e-10, 1.5546e-07,\n 1.4750e-08, 2.1146e-10, 2.9209e-09, 3.4786e-09, 4.8792e-08, 1.3958e-10,\n 2.0704e-08, 5.2765e-08, 3.4520e-08, 4.0879e-10, 4.8695e-10, 1.5231e-09,\n 1.5133e-08, 4.2693e-08, 2.1853e-08, 2.5773e-10, 9.5199e-08, 1.1870e-11,\n 6.3230e-09, 4.2699e-11, 3.6550e-09, 3.1135e-08, 8.7280e-10, 9.8352e-10,\n 5.8597e-08, 1.8415e-09, 2.2778e-08, 1.2547e-07, 4.2212e-07, 3.1263e-09,\n 5.1683e-10, 1.9354e-08, 9.5842e-09, 1.2803e-11, 7.5153e-09, 1.7339e-07,\n 2.4323e-09, 1.5897e-08, 1.4346e-08, 1.7952e-09, 1.0043e-09, 2.3818e-08,\n 2.9412e-08, 1.4836e-09, 2.8584e-08, 4.3247e-09, 1.3761e-08, 1.8420e-08,\n 1.6396e-08, 8.3376e-09, 4.7775e-09, 9.2901e-09, 2.8049e-09, 5.4028e-10,\n 4.8332e-08, 1.0118e-08, 3.1301e-10, 4.0212e-08, 4.1283e-09, 8.2808e-11,\n 3.7764e-08, 4.5944e-08, 1.1565e-07, 3.5679e-08, 1.1091e-09, 2.7505e-08,\n 2.5052e-09, 3.6744e-08, 4.3615e-10, 3.1479e-09], device='cuda:0')" + "exp_avg_sq": "tensor([3.3835e-13, 7.2243e-12, 1.3722e-08, 7.9997e-12, 3.0142e-09, 4.3618e-09,\n 2.2646e-09, 2.9187e-10, 4.6284e-08, 1.3038e-09, 1.0715e-08, 9.6087e-09,\n 2.8775e-11, 8.9983e-10, 1.8683e-11, 3.7350e-11, 4.3476e-09, 2.4361e-09,\n 5.2058e-10, 1.3215e-10, 7.6710e-10, 7.6424e-11, 4.8852e-09, 2.3178e-09,\n 6.1803e-09, 8.3029e-09, 2.6610e-09, 3.8604e-09, 1.5112e-09, 2.1558e-08,\n 2.2770e-08, 7.8243e-10, 9.1130e-08, 8.0198e-10, 9.6481e-10, 8.7110e-11,\n 3.3302e-09, 7.0704e-11, 2.1733e-09, 7.6949e-10, 7.4301e-09, 7.0436e-11,\n 1.7769e-11, 4.5184e-08, 1.1286e-11, 2.0330e-09, 4.7965e-09, 3.8565e-09,\n 4.3637e-11, 2.5268e-09, 3.7897e-09, 4.9238e-10, 1.6053e-08, 1.0532e-09,\n 5.2142e-10, 1.0781e-08, 3.4522e-10, 9.8438e-10, 1.9934e-08, 1.9615e-08,\n 4.1991e-09, 5.5180e-09, 1.3263e-10, 2.6657e-09, 3.2738e-09, 1.8291e-08,\n 1.1067e-10, 4.8104e-09, 1.0136e-09, 2.3180e-09, 8.4550e-10, 9.0208e-10,\n 2.4175e-09, 2.5930e-08, 7.8374e-09, 2.2920e-08, 4.8593e-09, 6.9132e-10,\n 7.2894e-10, 3.3316e-11, 2.2499e-09, 3.2246e-10, 2.0753e-10, 5.8701e-09,\n 1.7047e-09, 1.7269e-10, 3.4212e-08, 3.4423e-09, 9.8817e-10, 8.1816e-12,\n 6.1984e-11, 2.1920e-09, 3.8552e-09, 1.0613e-09, 3.6857e-08, 1.4084e-10,\n 3.4405e-08, 2.3799e-08, 4.1368e-09, 2.7359e-10, 9.2966e-10, 5.5858e-10,\n 6.1777e-11, 1.7443e-08, 2.0307e-10, 2.8901e-09, 2.2910e-09, 3.8629e-10,\n 2.5195e-09, 1.1412e-08, 2.9406e-09, 1.9768e-10, 1.2261e-08, 1.2533e-09,\n 9.6230e-10, 3.6824e-09, 3.3300e-10, 2.8474e-09, 3.5378e-10, 9.6683e-09,\n 2.6582e-08, 3.3735e-09, 3.0178e-09, 3.9328e-11, 6.8490e-10, 3.8755e-08,\n 2.8445e-10, 2.1269e-09, 8.0531e-10, 2.5489e-09, 2.8260e-11, 8.0991e-10,\n 3.7994e-08, 1.7236e-09, 7.6030e-11, 3.2178e-09, 2.0448e-09, 3.1537e-10,\n 1.0529e-08, 1.8702e-08, 3.3728e-10, 9.3636e-10, 1.3631e-08, 2.2796e-11,\n 4.9270e-10, 2.9789e-09, 9.5323e-10, 1.1217e-08, 2.0318e-10, 6.5663e-09,\n 6.0961e-10, 3.7041e-11, 9.9037e-09, 2.6570e-09, 6.8212e-10, 9.6470e-09,\n 3.4870e-09, 1.3253e-08, 1.9312e-11, 1.7053e-09, 7.5054e-11, 9.1497e-09,\n 8.3724e-12, 1.4651e-08, 1.9437e-09, 9.7388e-09, 9.9612e-10, 3.4513e-09,\n 7.0512e-11, 2.4928e-12, 8.3391e-10, 3.1616e-09, 2.8635e-09, 3.5596e-09,\n 1.3242e-09, 1.1196e-09, 1.5780e-08, 3.5556e-09, 2.9469e-11, 5.2370e-12,\n 3.3983e-10, 3.6113e-09, 4.2674e-11, 3.9182e-10, 2.0199e-10, 4.4425e-08,\n 4.2148e-09, 6.0427e-11, 8.3467e-10, 9.9403e-10, 1.3943e-08, 3.9887e-11,\n 5.9162e-09, 1.5078e-08, 9.8644e-09, 1.1682e-10, 1.3915e-10, 4.3525e-10,\n 4.3243e-09, 1.2200e-08, 6.2447e-09, 7.3650e-11, 2.7204e-08, 3.3920e-12,\n 1.8069e-09, 1.2202e-11, 1.0444e-09, 8.8971e-09, 2.4941e-10, 2.8105e-10,\n 1.6745e-08, 5.2622e-10, 6.5089e-09, 3.5855e-08, 1.2062e-07, 8.9336e-10,\n 1.4769e-10, 5.5306e-09, 2.7387e-09, 3.6584e-12, 2.1476e-09, 4.9546e-08,\n 6.9506e-10, 4.5427e-09, 4.0996e-09, 5.1299e-10, 2.8697e-10, 6.8062e-09,\n 8.4046e-09, 4.2396e-10, 8.1681e-09, 1.2358e-09, 3.9323e-09, 5.2638e-09,\n 4.6854e-09, 2.3825e-09, 1.3652e-09, 2.6547e-09, 8.0153e-10, 1.5439e-10,\n 1.3811e-08, 2.8913e-09, 8.9446e-11, 1.1491e-08, 1.1797e-09, 2.3663e-11,\n 1.0791e-08, 1.3129e-08, 3.3048e-08, 1.0195e-08, 3.1693e-10, 7.8598e-09,\n 7.1589e-10, 1.0500e-08, 1.2463e-10, 8.9954e-10], device='cuda:0')" }, "42": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([3.8998e-14, 5.5223e-14, 8.3651e-11, 2.3593e-13, 1.8678e-11, 2.5796e-11,\n 1.3347e-11, 1.6703e-12, 2.9204e-10, 9.8056e-12, 5.9888e-11, 5.4488e-11,\n 3.0022e-13, 4.4490e-12, 2.0319e-13, 3.4563e-13, 2.8699e-11, 3.5605e-11,\n 4.7600e-12, 1.5421e-12, 6.3590e-12, 7.0751e-13, 4.4485e-11, 1.6633e-11,\n 7.7463e-11, 4.5357e-11, 1.8812e-11, 4.0777e-11, 2.0476e-11, 1.2488e-10,\n 1.4725e-10, 6.7304e-12, 6.4293e-10, 3.1299e-12, 5.6098e-12, 4.5496e-13,\n 1.8436e-11, 1.3174e-12, 1.0411e-11, 5.8050e-12, 3.4193e-11, 3.1208e-12,\n 8.2582e-14, 5.2802e-10, 1.6182e-13, 1.2532e-11, 4.6156e-11, 5.3424e-11,\n 4.6525e-13, 1.4049e-11, 2.5771e-11, 2.1581e-12, 9.8134e-11, 5.9407e-12,\n 3.9036e-12, 5.8611e-11, 4.3370e-12, 4.6527e-12, 1.6529e-10, 9.1137e-11,\n 2.1594e-11, 5.2826e-11, 1.2461e-12, 1.4862e-11, 2.9365e-11, 1.4003e-10,\n 1.1420e-12, 2.4792e-11, 6.5673e-12, 9.0723e-12, 4.8289e-12, 6.9543e-12,\n 1.3783e-11, 2.1953e-10, 3.7983e-11, 1.7364e-10, 3.3814e-11, 4.2513e-12,\n 5.0281e-12, 1.2429e-13, 9.6405e-12, 1.6906e-12, 2.2207e-12, 8.0840e-11,\n 7.7532e-12, 3.2860e-12, 3.1848e-10, 4.2105e-11, 8.7092e-12, 1.7187e-13,\n 1.0506e-12, 1.2526e-11, 1.7558e-11, 5.9455e-12, 3.4374e-10, 1.1676e-12,\n 3.0736e-10, 2.1321e-10, 2.7042e-11, 3.8055e-12, 6.2931e-12, 4.0421e-12,\n 6.6498e-13, 1.1358e-10, 2.5411e-12, 1.4012e-11, 9.3991e-12, 3.4804e-12,\n 8.8621e-12, 9.2286e-11, 2.3017e-11, 9.2167e-13, 6.5701e-11, 5.5203e-12,\n 8.4625e-12, 2.1606e-11, 4.2824e-12, 1.1162e-11, 4.6222e-12, 4.7712e-11,\n 1.7688e-10, 3.5014e-11, 1.1670e-11, 3.1604e-13, 7.3185e-12, 2.7562e-10,\n 1.9416e-12, 2.0645e-11, 6.4653e-12, 1.5704e-11, 5.6632e-13, 3.7878e-12,\n 2.5186e-10, 9.7768e-12, 6.8314e-13, 2.2831e-11, 1.9299e-11, 2.4463e-12,\n 5.9106e-11, 1.0380e-10, 2.3714e-12, 4.9482e-12, 9.1187e-11, 5.3266e-13,\n 4.4835e-12, 2.0356e-11, 4.6722e-12, 1.2741e-10, 2.9918e-12, 7.5916e-11,\n 4.1851e-12, 8.0291e-14, 7.1183e-11, 1.7709e-11, 4.4612e-12, 3.5036e-11,\n 1.9283e-11, 5.9460e-11, 9.9563e-14, 1.2628e-11, 3.1008e-12, 6.9694e-11,\n 1.6413e-13, 1.6558e-10, 1.1866e-11, 5.8368e-11, 8.4480e-12, 2.6242e-11,\n 5.6581e-13, 2.5399e-14, 4.0793e-12, 2.7507e-11, 1.2615e-11, 1.7344e-11,\n 1.0367e-11, 8.6623e-12, 1.2171e-10, 2.3189e-11, 3.6461e-13, 4.3828e-14,\n 1.9487e-12, 3.3525e-11, 1.3921e-13, 2.6564e-12, 1.7774e-12, 2.6922e-10,\n 1.8726e-11, 1.1531e-12, 1.3036e-11, 9.2570e-12, 1.4984e-10, 1.0345e-12,\n 3.1015e-11, 1.1400e-10, 6.0488e-11, 1.2259e-12, 4.7134e-13, 2.9070e-12,\n 2.3756e-11, 1.0127e-10, 5.1243e-11, 1.4483e-12, 1.6250e-10, 2.7567e-13,\n 6.9784e-12, 8.0767e-14, 4.6602e-12, 6.6113e-11, 3.5767e-12, 1.5336e-12,\n 1.7786e-10, 1.9168e-12, 3.0280e-11, 2.5621e-10, 1.2203e-09, 4.5955e-12,\n 9.0839e-13, 3.4397e-11, 1.3942e-11, 1.1584e-14, 7.4533e-12, 3.1043e-10,\n 6.3892e-12, 4.9690e-11, 2.5939e-11, 2.9269e-12, 2.6205e-12, 4.9805e-11,\n 5.0868e-11, 2.3005e-12, 3.6316e-11, 8.9215e-12, 2.8589e-11, 3.8085e-11,\n 2.6234e-11, 1.5354e-11, 7.7859e-12, 1.8270e-11, 4.3113e-12, 1.0826e-12,\n 9.9551e-11, 2.5478e-11, 3.0522e-12, 5.5862e-11, 4.2676e-12, 5.4838e-13,\n 1.0283e-10, 1.1053e-10, 2.7279e-10, 5.8255e-11, 1.7940e-12, 5.9300e-11,\n 3.7810e-12, 8.3578e-11, 1.1293e-12, 1.3385e-11], device='cuda:0')" + "exp_avg_sq": "tensor([1.1144e-14, 1.5780e-14, 2.3904e-11, 6.7419e-14, 5.3374e-12, 7.3714e-12,\n 3.8141e-12, 4.7729e-13, 8.3453e-11, 2.8020e-12, 1.7114e-11, 1.5570e-11,\n 8.5791e-14, 1.2713e-12, 5.8064e-14, 9.8766e-14, 8.2010e-12, 1.0174e-11,\n 1.3602e-12, 4.4066e-13, 1.8171e-12, 2.0218e-13, 1.2712e-11, 4.7530e-12,\n 2.2136e-11, 1.2961e-11, 5.3757e-12, 1.1652e-11, 5.8513e-12, 3.5687e-11,\n 4.2077e-11, 1.9233e-12, 1.8372e-10, 8.9440e-13, 1.6030e-12, 1.3001e-13,\n 5.2682e-12, 3.7647e-13, 2.9749e-12, 1.6588e-12, 9.7710e-12, 8.9178e-13,\n 2.3599e-14, 1.5089e-10, 4.6241e-14, 3.5810e-12, 1.3189e-11, 1.5266e-11,\n 1.3295e-13, 4.0147e-12, 7.3643e-12, 6.1669e-13, 2.8043e-11, 1.6976e-12,\n 1.1155e-12, 1.6749e-11, 1.2393e-12, 1.3295e-12, 4.7233e-11, 2.6043e-11,\n 6.1707e-12, 1.5095e-11, 3.5608e-13, 4.2468e-12, 8.3914e-12, 4.0014e-11,\n 3.2634e-13, 7.0845e-12, 1.8767e-12, 2.5925e-12, 1.3799e-12, 1.9873e-12,\n 3.9385e-12, 6.2733e-11, 1.0854e-11, 4.9619e-11, 9.6626e-12, 1.2148e-12,\n 1.4368e-12, 3.5516e-14, 2.7548e-12, 4.8309e-13, 6.3459e-13, 2.3101e-11,\n 2.2156e-12, 9.3900e-13, 9.1007e-11, 1.2032e-11, 2.4887e-12, 4.9114e-14,\n 3.0020e-13, 3.5795e-12, 5.0175e-12, 1.6990e-12, 9.8227e-11, 3.3366e-13,\n 8.7830e-11, 6.0927e-11, 7.7274e-12, 1.0875e-12, 1.7983e-12, 1.1551e-12,\n 1.9002e-13, 3.2457e-11, 7.2612e-13, 4.0040e-12, 2.6859e-12, 9.9456e-13,\n 2.5324e-12, 2.6372e-11, 6.5774e-12, 2.6338e-13, 1.8775e-11, 1.5775e-12,\n 2.4182e-12, 6.1742e-12, 1.2237e-12, 3.1897e-12, 1.3208e-12, 1.3634e-11,\n 5.0544e-11, 1.0006e-11, 3.3348e-12, 9.0311e-14, 2.0913e-12, 7.8762e-11,\n 5.5483e-13, 5.8995e-12, 1.8475e-12, 4.4877e-12, 1.6183e-13, 1.0824e-12,\n 7.1972e-11, 2.7938e-12, 1.9521e-13, 6.5242e-12, 5.5148e-12, 6.9905e-13,\n 1.6890e-11, 2.9662e-11, 6.7764e-13, 1.4140e-12, 2.6057e-11, 1.5221e-13,\n 1.2812e-12, 5.8168e-12, 1.3351e-12, 3.6409e-11, 8.5493e-13, 2.1694e-11,\n 1.1959e-12, 2.2944e-14, 2.0341e-11, 5.0605e-12, 1.2748e-12, 1.0012e-11,\n 5.5102e-12, 1.6991e-11, 2.8451e-14, 3.6085e-12, 8.8608e-13, 1.9916e-11,\n 4.6902e-14, 4.7316e-11, 3.3909e-12, 1.6679e-11, 2.4141e-12, 7.4987e-12,\n 1.6169e-13, 7.2581e-15, 1.1657e-12, 7.8604e-12, 3.6049e-12, 4.9561e-12,\n 2.9625e-12, 2.4753e-12, 3.4780e-11, 6.6265e-12, 1.0419e-13, 1.2524e-14,\n 5.5686e-13, 9.5801e-12, 3.9779e-14, 7.5909e-13, 5.0792e-13, 7.6930e-11,\n 5.3510e-12, 3.2950e-13, 3.7250e-12, 2.6453e-12, 4.2818e-11, 2.9560e-13,\n 8.8627e-12, 3.2578e-11, 1.7285e-11, 3.5031e-13, 1.3469e-13, 8.3069e-13,\n 6.7884e-12, 2.8940e-11, 1.4643e-11, 4.1385e-13, 4.6435e-11, 7.8776e-14,\n 1.9941e-12, 2.3080e-14, 1.3317e-12, 1.8892e-11, 1.0221e-12, 4.3824e-13,\n 5.0826e-11, 5.4774e-13, 8.6526e-12, 7.3214e-11, 3.4870e-10, 1.3132e-12,\n 2.5958e-13, 9.8291e-12, 3.9841e-12, 3.3104e-15, 2.1298e-12, 8.8707e-11,\n 1.8258e-12, 1.4199e-11, 7.4123e-12, 8.3640e-13, 7.4883e-13, 1.4232e-11,\n 1.4536e-11, 6.5740e-13, 1.0378e-11, 2.5494e-12, 8.1695e-12, 1.0883e-11,\n 7.4965e-12, 4.3875e-12, 2.2249e-12, 5.2209e-12, 1.2320e-12, 3.0936e-13,\n 2.8448e-11, 7.2805e-12, 8.7219e-13, 1.5963e-11, 1.2195e-12, 1.5670e-13,\n 2.9385e-11, 3.1586e-11, 7.7952e-11, 1.6647e-11, 5.1266e-13, 1.6946e-11,\n 1.0805e-12, 2.3883e-11, 3.2270e-13, 3.8248e-12], device='cuda:0')" }, "43": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.3428e-13, 1.0002e-13, 1.3870e-10, 3.9081e-13, 2.8777e-11, 3.7838e-11,\n 2.2408e-11, 3.6578e-12, 4.4815e-10, 1.3258e-11, 1.0637e-10, 8.1958e-11,\n 2.9994e-13, 9.8345e-12, 3.0177e-13, 9.9235e-13, 4.5720e-11, 2.7737e-11,\n 4.3861e-12, 2.1063e-12, 9.1673e-12, 1.2735e-12, 4.4633e-11, 2.4104e-11,\n 6.4195e-11, 7.4874e-11, 2.7419e-11, 3.4809e-11, 1.9943e-11, 2.0926e-10,\n 1.9741e-10, 9.1512e-12, 8.8863e-10, 6.8191e-12, 1.1503e-11, 1.5304e-12,\n 2.9975e-11, 2.0980e-12, 2.3058e-11, 8.0671e-12, 6.2553e-11, 1.9542e-12,\n 3.7365e-13, 4.4324e-10, 1.7579e-13, 1.6797e-11, 4.2454e-11, 4.1453e-11,\n 8.4087e-13, 2.6096e-11, 4.1250e-11, 4.3715e-12, 1.4540e-10, 9.5550e-12,\n 6.2098e-12, 1.0051e-10, 4.8057e-12, 8.7783e-12, 1.7998e-10, 1.9753e-10,\n 3.5733e-11, 4.4952e-11, 2.1580e-12, 2.7020e-11, 3.3142e-11, 1.5901e-10,\n 1.4164e-12, 4.4948e-11, 1.0642e-11, 2.3869e-11, 8.9397e-12, 9.3278e-12,\n 1.7785e-11, 2.4971e-10, 7.9019e-11, 2.0410e-10, 4.9151e-11, 7.6957e-12,\n 8.7195e-12, 2.6733e-13, 2.1778e-11, 3.4929e-12, 3.1172e-12, 5.9163e-11,\n 1.4606e-11, 2.9400e-12, 3.3052e-10, 3.7943e-11, 1.2168e-11, 2.4397e-13,\n 1.1147e-12, 1.9784e-11, 3.5483e-11, 1.1174e-11, 3.2353e-10, 1.2729e-12,\n 3.0539e-10, 2.1140e-10, 3.2728e-11, 6.1018e-12, 9.8254e-12, 7.0996e-12,\n 6.3148e-13, 1.5818e-10, 2.1160e-12, 2.6500e-11, 2.2683e-11, 5.1998e-12,\n 2.2743e-11, 9.7972e-11, 3.0112e-11, 2.3650e-12, 1.1440e-10, 9.8595e-12,\n 1.0930e-11, 3.7543e-11, 4.5675e-12, 2.5169e-11, 6.1856e-12, 9.7054e-11,\n 2.5406e-10, 3.7012e-11, 3.0484e-11, 2.2109e-12, 8.8131e-12, 3.4603e-10,\n 2.6962e-12, 2.3163e-11, 8.5572e-12, 2.5742e-11, 8.6537e-13, 9.2609e-12,\n 3.4171e-10, 1.6283e-11, 1.1754e-12, 2.9550e-11, 2.2909e-11, 2.7265e-12,\n 9.2421e-11, 1.6503e-10, 4.4349e-12, 1.0446e-11, 1.1761e-10, 9.7492e-13,\n 6.0315e-12, 3.0520e-11, 7.7876e-12, 1.0201e-10, 2.7291e-12, 6.6709e-11,\n 7.0350e-12, 1.5750e-13, 8.8866e-11, 2.2030e-11, 8.1976e-12, 8.7713e-11,\n 2.9644e-11, 1.1227e-10, 3.7065e-13, 1.7595e-11, 4.2314e-12, 9.3594e-11,\n 3.2574e-13, 1.4414e-10, 1.9483e-11, 8.6919e-11, 8.6346e-12, 3.0864e-11,\n 9.3395e-13, 6.9799e-14, 7.6120e-12, 3.3811e-11, 2.8101e-11, 3.6914e-11,\n 1.3132e-11, 1.1489e-11, 1.5528e-10, 3.4637e-11, 1.4375e-12, 9.4859e-14,\n 4.4132e-12, 3.4361e-11, 5.8040e-13, 4.8116e-12, 2.8784e-12, 4.2798e-10,\n 3.5714e-11, 1.0658e-12, 1.0743e-11, 1.2120e-11, 1.2034e-10, 1.8649e-12,\n 5.2149e-11, 1.4057e-10, 8.4047e-11, 1.1435e-12, 1.8093e-12, 5.2291e-12,\n 4.0392e-11, 1.2230e-10, 5.5235e-11, 2.3607e-12, 2.4832e-10, 5.9383e-13,\n 1.6387e-11, 1.7591e-13, 1.1656e-11, 7.6773e-11, 4.4081e-12, 3.1785e-12,\n 1.6631e-10, 4.2382e-12, 6.5623e-11, 3.3669e-10, 1.1449e-09, 8.4044e-12,\n 1.7124e-12, 4.5668e-11, 2.6939e-11, 6.1468e-14, 1.7280e-11, 4.7489e-10,\n 8.2142e-12, 4.7229e-11, 4.3796e-11, 4.0068e-12, 3.5507e-12, 6.6285e-11,\n 8.2007e-11, 4.7836e-12, 7.7956e-11, 1.3700e-11, 3.2612e-11, 5.4629e-11,\n 4.7008e-11, 2.6404e-11, 1.2668e-11, 2.9300e-11, 7.2744e-12, 1.6952e-12,\n 1.2502e-10, 3.0550e-11, 4.0478e-12, 1.1099e-10, 1.1156e-11, 1.3090e-12,\n 1.0443e-10, 1.1669e-10, 3.1820e-10, 1.0490e-10, 3.3367e-12, 6.6580e-11,\n 5.9583e-12, 1.0786e-10, 2.1765e-12, 1.0846e-11], device='cuda:0')" + "exp_avg_sq": "tensor([3.8372e-14, 2.8582e-14, 3.9635e-11, 1.1168e-13, 8.2233e-12, 1.0812e-11,\n 6.4034e-12, 1.0453e-12, 1.2806e-10, 3.7886e-12, 3.0397e-11, 2.3420e-11,\n 8.5709e-14, 2.8103e-12, 8.6232e-14, 2.8357e-13, 1.3065e-11, 7.9260e-12,\n 1.2533e-12, 6.0188e-13, 2.6196e-12, 3.6392e-13, 1.2754e-11, 6.8879e-12,\n 1.8344e-11, 2.1396e-11, 7.8352e-12, 9.9471e-12, 5.6990e-12, 5.9798e-11,\n 5.6411e-11, 2.6150e-12, 2.5393e-10, 1.9486e-12, 3.2871e-12, 4.3734e-13,\n 8.5655e-12, 5.9951e-13, 6.5889e-12, 2.3052e-12, 1.7875e-11, 5.5844e-13,\n 1.0677e-13, 1.2666e-10, 5.0235e-14, 4.7998e-12, 1.2132e-11, 1.1846e-11,\n 2.4029e-13, 7.4572e-12, 1.1787e-11, 1.2492e-12, 4.1549e-11, 2.7304e-12,\n 1.7745e-12, 2.8722e-11, 1.3733e-12, 2.5085e-12, 5.1431e-11, 5.6446e-11,\n 1.0211e-11, 1.2845e-11, 6.1667e-13, 7.7211e-12, 9.4705e-12, 4.5440e-11,\n 4.0474e-13, 1.2844e-11, 3.0411e-12, 6.8206e-12, 2.5546e-12, 2.6655e-12,\n 5.0823e-12, 7.1356e-11, 2.2580e-11, 5.8325e-11, 1.4045e-11, 2.1991e-12,\n 2.4917e-12, 7.6391e-14, 6.2233e-12, 9.9813e-13, 8.9076e-13, 1.6906e-11,\n 4.1738e-12, 8.4012e-13, 9.4449e-11, 1.0843e-11, 3.4772e-12, 6.9717e-14,\n 3.1853e-13, 5.6534e-12, 1.0140e-11, 3.1930e-12, 9.2452e-11, 3.6375e-13,\n 8.7267e-11, 6.0409e-11, 9.3522e-12, 1.7436e-12, 2.8077e-12, 2.0288e-12,\n 1.8045e-13, 4.5200e-11, 6.0468e-13, 7.5727e-12, 6.4819e-12, 1.4859e-12,\n 6.4989e-12, 2.7996e-11, 8.6049e-12, 6.7582e-13, 3.2690e-11, 2.8174e-12,\n 3.1235e-12, 1.0728e-11, 1.3052e-12, 7.1921e-12, 1.7676e-12, 2.7734e-11,\n 7.2600e-11, 1.0576e-11, 8.7110e-12, 6.3178e-13, 2.5184e-12, 9.8882e-11,\n 7.7047e-13, 6.6190e-12, 2.4453e-12, 7.3561e-12, 2.4729e-13, 2.6464e-12,\n 9.7647e-11, 4.6531e-12, 3.3588e-13, 8.4442e-12, 6.5463e-12, 7.7911e-13,\n 2.6410e-11, 4.7160e-11, 1.2673e-12, 2.9850e-12, 3.3607e-11, 2.7859e-13,\n 1.7236e-12, 8.7213e-12, 2.2254e-12, 2.9150e-11, 7.7986e-13, 1.9063e-11,\n 2.0103e-12, 4.5006e-14, 2.5394e-11, 6.2953e-12, 2.3425e-12, 2.5065e-11,\n 8.4711e-12, 3.2081e-11, 1.0592e-13, 5.0280e-12, 1.2092e-12, 2.6745e-11,\n 9.3083e-14, 4.1189e-11, 5.5673e-12, 2.4838e-11, 2.4674e-12, 8.8197e-12,\n 2.6688e-13, 1.9946e-14, 2.1752e-12, 9.6616e-12, 8.0301e-12, 1.0548e-11,\n 3.7527e-12, 3.2831e-12, 4.4374e-11, 9.8977e-12, 4.1077e-13, 2.7107e-14,\n 1.2611e-12, 9.8190e-12, 1.6585e-13, 1.3749e-12, 8.2252e-13, 1.2230e-10,\n 1.0205e-11, 3.0457e-13, 3.0699e-12, 3.4634e-12, 3.4389e-11, 5.3292e-13,\n 1.4902e-11, 4.0170e-11, 2.4017e-11, 3.2676e-13, 5.1703e-13, 1.4943e-12,\n 1.1542e-11, 3.4949e-11, 1.5784e-11, 6.7459e-13, 7.0958e-11, 1.6969e-13,\n 4.6826e-12, 5.0268e-14, 3.3309e-12, 2.1938e-11, 1.2596e-12, 9.0829e-13,\n 4.7524e-11, 1.2111e-12, 1.8752e-11, 9.6213e-11, 3.2716e-10, 2.4016e-12,\n 4.8934e-13, 1.3050e-11, 7.6979e-12, 1.7565e-14, 4.9380e-12, 1.3570e-10,\n 2.3473e-12, 1.3496e-11, 1.2515e-11, 1.1450e-12, 1.0147e-12, 1.8941e-11,\n 2.3434e-11, 1.3669e-12, 2.2276e-11, 3.9148e-12, 9.3192e-12, 1.5611e-11,\n 1.3433e-11, 7.5452e-12, 3.6201e-12, 8.3728e-12, 2.0787e-12, 4.8442e-13,\n 3.5726e-11, 8.7299e-12, 1.1567e-12, 3.1717e-11, 3.1880e-12, 3.7406e-13,\n 2.9842e-11, 3.3346e-11, 9.0927e-11, 2.9975e-11, 9.5348e-13, 1.9026e-11,\n 1.7026e-12, 3.0821e-11, 6.2195e-13, 3.0994e-12], device='cuda:0')" }, "44": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n 0.0000e+00, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.4595e-15, 5.8671e-15, 2.3390e-14, ..., 4.1983e-15, 0.0000e+00,\n 9.2750e-14],\n [1.5049e-14, 1.8593e-15, 1.7004e-13, ..., 1.1076e-13, 0.0000e+00,\n 5.0862e-13],\n [2.9319e-11, 3.4972e-12, 2.2289e-10, ..., 1.3899e-10, 0.0000e+00,\n 3.6736e-10],\n ...,\n [7.8156e-12, 9.4788e-13, 5.3438e-11, ..., 2.5232e-11, 0.0000e+00,\n 2.7490e-11],\n [3.3986e-14, 2.0621e-14, 7.5358e-14, ..., 5.6393e-13, 0.0000e+00,\n 3.4013e-13],\n [7.4799e-15, 3.1745e-15, 2.8284e-13, ..., 2.1271e-13, 0.0000e+00,\n 1.4472e-14]], device='cuda:0')" + "exp_avg_sq": "tensor([[9.8859e-16, 1.6766e-15, 6.6839e-15, ..., 1.1997e-15, 0.0000e+00,\n 2.6504e-14],\n [4.3003e-15, 5.3131e-16, 4.8592e-14, ..., 3.1650e-14, 0.0000e+00,\n 1.4534e-13],\n [8.3781e-12, 9.9935e-13, 6.3691e-11, ..., 3.9717e-11, 0.0000e+00,\n 1.0498e-10],\n ...,\n [2.2334e-12, 2.7086e-13, 1.5270e-11, ..., 7.2101e-12, 0.0000e+00,\n 7.8554e-12],\n [9.7118e-15, 5.8925e-15, 2.1534e-14, ..., 1.6115e-13, 0.0000e+00,\n 9.7193e-14],\n [2.1374e-15, 9.0714e-16, 8.0825e-14, ..., 6.0784e-14, 0.0000e+00,\n 4.1355e-15]], device='cuda:0')" }, "45": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.4553e-11, 3.5898e-11, 1.8275e-07, 5.9432e-12, 6.0615e-08, 3.7848e-08,\n 9.0025e-09, 5.3042e-09, 1.8463e-07, 3.8258e-09, 4.8847e-09, 1.7418e-08,\n 2.1107e-10, 3.9533e-09, 8.4544e-10, 1.5328e-09, 1.3821e-08, 3.3996e-09,\n 4.7566e-10, 1.5200e-10, 1.3017e-08, 6.2939e-10, 1.7527e-08, 1.8628e-09,\n 1.5978e-08, 5.7009e-08, 6.9689e-09, 7.1617e-09, 2.5553e-09, 5.7244e-08,\n 1.9913e-09, 7.7183e-09, 3.0053e-07, 3.6503e-09, 1.9641e-09, 1.1263e-10,\n 1.9708e-08, 1.1396e-10, 1.2143e-08, 3.8682e-09, 9.1969e-09, 3.4276e-10,\n 1.0253e-10, 2.8507e-08, 9.2720e-12, 1.0613e-08, 4.6932e-09, 6.3045e-09,\n 1.1993e-09, 4.5609e-09, 3.8357e-08, 2.4320e-09, 1.4979e-08, 2.7787e-09,\n 1.5448e-09, 7.5638e-08, 4.4645e-10, 6.4239e-09, 8.0058e-09, 1.3766e-07,\n 1.4209e-08, 1.3452e-08, 7.1471e-10, 2.1488e-08, 1.2523e-08, 4.7765e-08,\n 1.3836e-10, 2.6004e-08, 1.8891e-09, 6.3139e-09, 6.4434e-09, 1.8934e-08,\n 7.7470e-10, 5.0905e-08, 5.4564e-08, 4.4476e-08, 3.1188e-09, 3.2813e-09,\n 9.4603e-10, 2.9680e-10, 1.3110e-08, 9.2888e-10, 1.4414e-09, 1.0357e-09,\n 1.3051e-08, 3.3197e-11, 4.0387e-08, 5.7005e-09, 3.0889e-09, 1.2225e-11,\n 8.2742e-11, 7.0765e-09, 2.0999e-08, 1.4253e-08, 4.1667e-08, 2.3847e-09,\n 6.6273e-08, 4.3895e-08, 2.0789e-09, 2.2849e-09, 7.3743e-09, 2.6036e-09,\n 4.7068e-09, 6.8416e-08, 1.0009e-08, 9.3571e-09, 1.4711e-08, 1.0183e-09,\n 1.0233e-08, 1.5415e-09, 3.9401e-10, 2.4884e-09, 2.4907e-09, 6.2287e-09,\n 1.0201e-09, 1.3844e-08, 1.6357e-09, 7.4727e-08, 7.0194e-10, 4.3315e-08,\n 8.0168e-08, 6.3501e-09, 5.6821e-08, 3.3306e-10, 2.8287e-09, 1.0427e-07,\n 5.6758e-09, 2.9814e-09, 3.0345e-09, 9.3066e-09, 2.5779e-10, 4.4544e-09,\n 2.3570e-07, 2.0879e-08, 3.2419e-10, 1.4137e-08, 8.3089e-09, 8.8359e-09,\n 7.6129e-08, 3.2320e-08, 6.6063e-09, 1.4655e-09, 2.0632e-08, 1.2097e-10,\n 8.3636e-11, 1.4997e-08, 1.0756e-08, 1.2837e-08, 2.0618e-10, 3.2813e-09,\n 4.3376e-09, 2.2200e-12, 1.6759e-08, 4.0599e-09, 7.7461e-10, 8.5002e-08,\n 5.0877e-09, 2.1204e-08, 6.0334e-11, 2.5205e-09, 5.7865e-10, 5.1604e-08,\n 9.6404e-11, 2.5628e-08, 8.9500e-08, 1.6325e-08, 2.5916e-09, 1.3746e-08,\n 2.1774e-10, 4.7422e-12, 1.1906e-08, 2.7444e-08, 6.6497e-08, 7.6358e-09,\n 9.2118e-09, 1.7303e-10, 2.3912e-08, 5.7510e-09, 4.8210e-10, 5.8337e-10,\n 1.4320e-09, 1.4348e-08, 6.3959e-11, 4.4246e-10, 1.9837e-09, 2.6145e-07,\n 4.1176e-09, 5.6724e-11, 6.5688e-10, 2.0857e-09, 4.2063e-08, 1.7795e-10,\n 3.1057e-08, 9.9723e-09, 5.4452e-09, 6.3788e-09, 5.4980e-10, 1.5808e-08,\n 1.1115e-08, 1.8168e-08, 1.6516e-08, 8.1723e-11, 8.3214e-08, 2.6220e-11,\n 1.0749e-08, 4.8621e-11, 5.7621e-09, 2.5200e-08, 6.3982e-10, 5.0265e-09,\n 1.7329e-08, 6.1092e-09, 5.7512e-08, 8.3288e-08, 1.4970e-07, 2.1240e-08,\n 2.7382e-09, 3.5624e-09, 1.0519e-07, 5.6747e-10, 1.0135e-08, 1.4691e-07,\n 8.2076e-10, 1.1361e-08, 1.3522e-07, 2.0886e-09, 3.0373e-09, 1.4160e-08,\n 2.2535e-08, 7.7308e-09, 8.7058e-09, 6.5255e-09, 1.3209e-08, 4.8729e-09,\n 1.4768e-08, 1.9358e-08, 2.2011e-09, 1.6368e-09, 1.0092e-08, 3.0268e-10,\n 9.2226e-08, 6.4623e-09, 3.9859e-10, 4.8656e-08, 1.4985e-08, 6.9409e-11,\n 1.8025e-08, 7.2889e-09, 1.6274e-07, 1.1523e-07, 2.4889e-09, 7.0976e-09,\n 2.8704e-09, 4.4942e-08, 9.1567e-10, 2.5008e-10], device='cuda:0')" + "exp_avg_sq": "tensor([4.1587e-12, 1.0258e-11, 5.2222e-08, 1.6983e-12, 1.7321e-08, 1.0815e-08,\n 2.5725e-09, 1.5157e-09, 5.2759e-08, 1.0933e-09, 1.3958e-09, 4.9774e-09,\n 6.0315e-11, 1.1297e-09, 2.4159e-10, 4.3801e-10, 3.9493e-09, 9.7146e-10,\n 1.3592e-10, 4.3434e-11, 3.7198e-09, 1.7985e-10, 5.0085e-09, 5.3230e-10,\n 4.5657e-09, 1.6291e-08, 1.9914e-09, 2.0465e-09, 7.3020e-10, 1.6358e-08,\n 5.6904e-10, 2.2056e-09, 8.5879e-08, 1.0431e-09, 5.6125e-10, 3.2186e-11,\n 5.6316e-09, 3.2565e-11, 3.4699e-09, 1.1054e-09, 2.6281e-09, 9.7948e-11,\n 2.9297e-11, 8.1461e-09, 2.6495e-12, 3.0328e-09, 1.3411e-09, 1.8016e-09,\n 3.4272e-10, 1.3033e-09, 1.0961e-08, 6.9497e-10, 4.2804e-09, 7.9403e-10,\n 4.4144e-10, 2.1614e-08, 1.2758e-10, 1.8357e-09, 2.2877e-09, 3.9338e-08,\n 4.0604e-09, 3.8439e-09, 2.0423e-10, 6.1403e-09, 3.5784e-09, 1.3649e-08,\n 3.9537e-11, 7.4309e-09, 5.3982e-10, 1.8043e-09, 1.8412e-09, 5.4105e-09,\n 2.2138e-10, 1.4547e-08, 1.5592e-08, 1.2709e-08, 8.9123e-10, 9.3765e-10,\n 2.7034e-10, 8.4813e-11, 3.7463e-09, 2.6543e-10, 4.1189e-10, 2.9595e-10,\n 3.7295e-09, 9.4862e-12, 1.1541e-08, 1.6290e-09, 8.8268e-10, 3.4934e-12,\n 2.3644e-11, 2.0222e-09, 6.0008e-09, 4.0728e-09, 1.1907e-08, 6.8144e-10,\n 1.8938e-08, 1.2543e-08, 5.9406e-10, 6.5294e-10, 2.1073e-09, 7.4401e-10,\n 1.3450e-09, 1.9550e-08, 2.8603e-09, 2.6739e-09, 4.2038e-09, 2.9097e-10,\n 2.9240e-09, 4.4050e-10, 1.1259e-10, 7.1108e-10, 7.1173e-10, 1.7799e-09,\n 2.9149e-10, 3.9559e-09, 4.6741e-10, 2.1354e-08, 2.0059e-10, 1.2378e-08,\n 2.2909e-08, 1.8146e-09, 1.6237e-08, 9.5174e-11, 8.0831e-10, 2.9796e-08,\n 1.6219e-09, 8.5196e-10, 8.6713e-10, 2.6594e-09, 7.3665e-11, 1.2729e-09,\n 6.7353e-08, 5.9664e-09, 9.2641e-11, 4.0398e-09, 2.3743e-09, 2.5249e-09,\n 2.1754e-08, 9.2357e-09, 1.8878e-09, 4.1877e-10, 5.8957e-09, 3.4569e-11,\n 2.3900e-11, 4.2855e-09, 3.0735e-09, 3.6681e-09, 5.8919e-11, 9.3767e-10,\n 1.2395e-09, 6.3439e-13, 4.7891e-09, 1.1601e-09, 2.2135e-10, 2.4290e-08,\n 1.4539e-09, 6.0592e-09, 1.7241e-11, 7.2027e-10, 1.6535e-10, 1.4746e-08,\n 2.7548e-11, 7.3235e-09, 2.5575e-08, 4.6651e-09, 7.4057e-10, 3.9279e-09,\n 6.2221e-11, 1.3551e-12, 3.4023e-09, 7.8422e-09, 1.9002e-08, 2.1820e-09,\n 2.6323e-09, 4.9445e-11, 6.8330e-09, 1.6434e-09, 1.3776e-10, 1.6670e-10,\n 4.0919e-10, 4.1001e-09, 1.8277e-11, 1.2644e-10, 5.6685e-10, 7.4711e-08,\n 1.1766e-09, 1.6209e-11, 1.8771e-10, 5.9601e-10, 1.2020e-08, 5.0851e-11,\n 8.8749e-09, 2.8497e-09, 1.5560e-09, 1.8228e-09, 1.5711e-10, 4.5174e-09,\n 3.1762e-09, 5.1915e-09, 4.7196e-09, 2.3353e-11, 2.3779e-08, 7.4926e-12,\n 3.0716e-09, 1.3894e-11, 1.6466e-09, 7.2011e-09, 1.8283e-10, 1.4364e-09,\n 4.9520e-09, 1.7457e-09, 1.6435e-08, 2.3800e-08, 4.2778e-08, 6.0696e-09,\n 7.8245e-10, 1.0180e-09, 3.0060e-08, 1.6216e-10, 2.8963e-09, 4.1981e-08,\n 2.3454e-10, 3.2466e-09, 3.8639e-08, 5.9683e-10, 8.6793e-10, 4.0462e-09,\n 6.4395e-09, 2.2091e-09, 2.4878e-09, 1.8647e-09, 3.7747e-09, 1.3925e-09,\n 4.2200e-09, 5.5317e-09, 6.2899e-10, 4.6773e-10, 2.8839e-09, 8.6494e-11,\n 2.6354e-08, 1.8467e-09, 1.1390e-10, 1.3904e-08, 4.2821e-09, 1.9834e-11,\n 5.1507e-09, 2.0828e-09, 4.6503e-08, 3.2928e-08, 7.1124e-10, 2.0282e-09,\n 8.2025e-10, 1.2842e-08, 2.6166e-10, 7.1461e-11], device='cuda:0')" }, "46": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([5.9177e-14, 4.3674e-13, 4.0599e-10, 8.1731e-15, 1.0502e-10, 7.0742e-11,\n 1.5675e-11, 1.6848e-11, 4.0216e-10, 9.2628e-12, 9.4432e-12, 2.9848e-11,\n 4.0116e-13, 6.5375e-12, 1.9535e-12, 5.6127e-12, 2.7472e-11, 7.0516e-12,\n 1.1750e-12, 7.9997e-13, 1.9940e-11, 3.2417e-12, 4.9856e-11, 3.0352e-12,\n 5.4011e-11, 6.9525e-11, 1.4267e-11, 1.3561e-11, 1.3622e-11, 1.1047e-10,\n 6.3244e-12, 1.4978e-11, 6.1610e-10, 5.0716e-12, 3.1218e-12, 5.5013e-13,\n 3.2153e-11, 2.0900e-12, 1.8260e-11, 9.9149e-12, 1.3347e-11, 5.1780e-13,\n 2.4074e-13, 5.5076e-11, 1.7888e-14, 2.2716e-11, 6.2712e-12, 1.8615e-11,\n 3.9483e-12, 6.1973e-12, 6.8250e-11, 3.0518e-12, 1.7665e-11, 4.2463e-12,\n 7.9817e-12, 2.1077e-10, 1.4521e-12, 9.7925e-12, 1.3024e-11, 2.7880e-10,\n 2.0097e-11, 2.1991e-11, 2.3499e-12, 2.2589e-11, 2.5075e-11, 7.6305e-11,\n 3.0812e-13, 4.2490e-11, 2.8010e-12, 9.2402e-12, 1.1690e-11, 4.6707e-11,\n 9.0077e-13, 8.7802e-11, 1.3785e-10, 6.6189e-11, 4.2711e-12, 4.4025e-12,\n 1.2451e-12, 2.0221e-13, 1.8371e-11, 1.2853e-12, 2.7879e-12, 1.4881e-12,\n 3.3399e-11, 2.4096e-13, 5.6328e-11, 2.4226e-11, 6.9125e-12, 3.1926e-14,\n 1.1685e-14, 1.3017e-11, 3.8122e-11, 2.0808e-11, 5.1396e-11, 2.2310e-12,\n 1.2382e-10, 6.8867e-11, 4.0992e-12, 8.8435e-12, 1.4906e-11, 5.1180e-12,\n 1.0930e-11, 1.6119e-10, 1.8384e-11, 1.2518e-11, 2.1609e-11, 2.0666e-12,\n 9.2230e-12, 3.6917e-12, 1.2494e-12, 9.9641e-12, 8.9202e-12, 8.3820e-12,\n 1.2824e-12, 3.8319e-11, 4.7213e-12, 2.5616e-10, 2.9060e-12, 1.0132e-10,\n 1.4469e-10, 1.3854e-11, 1.3499e-10, 3.7758e-12, 8.3596e-12, 2.2741e-10,\n 5.1262e-12, 3.4013e-12, 6.7295e-12, 1.8286e-11, 8.0510e-13, 7.0748e-12,\n 8.4878e-10, 2.3642e-11, 8.4548e-13, 3.1628e-11, 3.1160e-11, 1.8073e-11,\n 1.8491e-10, 3.3624e-11, 1.3571e-11, 3.5155e-12, 3.3077e-11, 3.9431e-13,\n 2.8146e-13, 5.0565e-11, 1.7177e-11, 2.0201e-11, 5.7950e-13, 4.8948e-12,\n 1.4453e-11, 2.7632e-14, 2.1504e-11, 6.3091e-12, 1.2108e-12, 1.2124e-10,\n 6.9006e-12, 2.8039e-11, 6.0469e-14, 4.7661e-12, 1.2752e-12, 1.0820e-10,\n 1.0105e-12, 5.7325e-11, 2.1511e-10, 2.1398e-11, 5.5194e-12, 3.0383e-11,\n 9.1787e-13, 1.6579e-13, 2.3492e-11, 1.2072e-10, 1.4984e-10, 1.0586e-11,\n 2.5346e-11, 4.6812e-13, 3.2520e-11, 8.8276e-12, 3.1540e-12, 3.3099e-12,\n 3.5601e-12, 3.6144e-11, 2.1506e-13, 1.1101e-12, 8.9983e-12, 7.2843e-10,\n 4.9715e-12, 1.9465e-13, 1.0227e-12, 6.9108e-12, 1.2788e-10, 7.6405e-13,\n 4.8407e-11, 1.4165e-11, 1.2003e-11, 1.5855e-11, 8.2326e-13, 2.2234e-11,\n 1.5224e-11, 2.5299e-11, 3.1982e-11, 4.0820e-13, 9.6178e-11, 1.7131e-13,\n 1.7290e-11, 1.4721e-13, 8.5293e-12, 4.3195e-11, 4.6982e-12, 1.1007e-11,\n 2.6703e-11, 8.0029e-12, 1.2783e-10, 1.2813e-10, 2.3025e-10, 5.6298e-11,\n 6.3499e-12, 4.9077e-12, 2.0867e-10, 2.0415e-12, 1.4704e-11, 3.5500e-10,\n 2.0011e-12, 2.8523e-11, 3.7389e-10, 3.6111e-12, 1.4578e-11, 1.6663e-11,\n 3.7184e-11, 1.5596e-11, 1.5875e-11, 9.0274e-12, 3.4068e-11, 8.5678e-12,\n 2.0749e-11, 4.0331e-11, 3.4806e-12, 2.5813e-12, 1.9748e-11, 8.1590e-13,\n 2.9520e-10, 1.0671e-11, 1.6633e-12, 7.1504e-11, 1.6486e-11, 1.7020e-13,\n 3.4540e-11, 8.4741e-12, 4.1221e-10, 4.7291e-10, 3.7304e-12, 8.1733e-12,\n 4.8701e-12, 1.1096e-10, 2.2744e-12, 4.9224e-13], device='cuda:0')" + "exp_avg_sq": "tensor([1.6910e-14, 1.2480e-13, 1.1601e-10, 2.3355e-15, 3.0010e-11, 2.0215e-11,\n 4.4793e-12, 4.8143e-12, 1.1492e-10, 2.6469e-12, 2.6985e-12, 8.5292e-12,\n 1.1463e-13, 1.8682e-12, 5.5823e-13, 1.6039e-12, 7.8503e-12, 2.0151e-12,\n 3.3576e-13, 2.2860e-13, 5.6979e-12, 9.2634e-13, 1.4247e-11, 8.6733e-13,\n 1.5434e-11, 1.9867e-11, 4.0770e-12, 3.8751e-12, 3.8925e-12, 3.1569e-11,\n 1.8073e-12, 4.2801e-12, 1.7605e-10, 1.4493e-12, 8.9209e-13, 1.5721e-13,\n 9.1879e-12, 5.9724e-13, 5.2179e-12, 2.8333e-12, 3.8141e-12, 1.4796e-13,\n 6.8793e-14, 1.5738e-11, 5.1115e-15, 6.4912e-12, 1.7920e-12, 5.3195e-12,\n 1.1283e-12, 1.7709e-12, 1.9503e-11, 8.7206e-13, 5.0479e-12, 1.2134e-12,\n 2.2808e-12, 6.0230e-11, 4.1495e-13, 2.7983e-12, 3.7216e-12, 7.9669e-11,\n 5.7428e-12, 6.2840e-12, 6.7150e-13, 6.4550e-12, 7.1654e-12, 2.1805e-11,\n 8.8047e-14, 1.2142e-11, 8.0042e-13, 2.6405e-12, 3.3404e-12, 1.3347e-11,\n 2.5740e-13, 2.5090e-11, 3.9393e-11, 1.8914e-11, 1.2205e-12, 1.2580e-12,\n 3.5579e-13, 5.7782e-14, 5.2497e-12, 3.6730e-13, 7.9667e-13, 4.2524e-13,\n 9.5440e-12, 6.8857e-14, 1.6096e-11, 6.9227e-12, 1.9753e-12, 9.1232e-15,\n 3.3390e-15, 3.7197e-12, 1.0894e-11, 5.9461e-12, 1.4687e-11, 6.3752e-13,\n 3.5383e-11, 1.9679e-11, 1.1714e-12, 2.5271e-12, 4.2596e-12, 1.4625e-12,\n 3.1234e-12, 4.6063e-11, 5.2533e-12, 3.5770e-12, 6.1751e-12, 5.9055e-13,\n 2.6355e-12, 1.0549e-12, 3.5702e-13, 2.8473e-12, 2.5490e-12, 2.3952e-12,\n 3.6645e-13, 1.0950e-11, 1.3492e-12, 7.3201e-11, 8.3040e-13, 2.8953e-11,\n 4.1346e-11, 3.9590e-12, 3.8575e-11, 1.0790e-12, 2.3888e-12, 6.4983e-11,\n 1.4648e-12, 9.7196e-13, 1.9230e-12, 5.2255e-12, 2.3006e-13, 2.0217e-12,\n 2.4254e-10, 6.7560e-12, 2.4160e-13, 9.0379e-12, 8.9043e-12, 5.1645e-12,\n 5.2840e-11, 9.6082e-12, 3.8779e-12, 1.0046e-12, 9.4522e-12, 1.1268e-13,\n 8.0429e-14, 1.4449e-11, 4.9085e-12, 5.7726e-12, 1.6560e-13, 1.3987e-12,\n 4.1301e-12, 7.8962e-15, 6.1451e-12, 1.8029e-12, 3.4600e-13, 3.4646e-11,\n 1.9719e-12, 8.0123e-12, 1.7280e-14, 1.3620e-12, 3.6441e-13, 3.0919e-11,\n 2.8876e-13, 1.6381e-11, 6.1469e-11, 6.1147e-12, 1.5772e-12, 8.6822e-12,\n 2.6229e-13, 4.7375e-14, 6.7130e-12, 3.4496e-11, 4.2818e-11, 3.0251e-12,\n 7.2428e-12, 1.3377e-13, 9.2929e-12, 2.5225e-12, 9.0127e-13, 9.4583e-13,\n 1.0173e-12, 1.0329e-11, 6.1456e-14, 3.1722e-13, 2.5713e-12, 2.0816e-10,\n 1.4206e-12, 5.5624e-14, 2.9226e-13, 1.9748e-12, 3.6542e-11, 2.1833e-13,\n 1.3833e-11, 4.0476e-12, 3.4298e-12, 4.5307e-12, 2.3525e-13, 6.3536e-12,\n 4.3504e-12, 7.2294e-12, 9.1392e-12, 1.1665e-13, 2.7484e-11, 4.8954e-14,\n 4.9409e-12, 4.2065e-14, 2.4373e-12, 1.2343e-11, 1.3425e-12, 3.1453e-12,\n 7.6306e-12, 2.2869e-12, 3.6529e-11, 3.6614e-11, 6.5797e-11, 1.6088e-11,\n 1.8145e-12, 1.4024e-12, 5.9630e-11, 5.8338e-13, 4.2019e-12, 1.0144e-10,\n 5.7183e-13, 8.1506e-12, 1.0684e-10, 1.0319e-12, 4.1657e-12, 4.7616e-12,\n 1.0626e-11, 4.4567e-12, 4.5365e-12, 2.5797e-12, 9.7351e-12, 2.4483e-12,\n 5.9293e-12, 1.1525e-11, 9.9460e-13, 7.3762e-13, 5.6430e-12, 2.3315e-13,\n 8.4356e-11, 3.0493e-12, 4.7529e-13, 2.0433e-11, 4.7109e-12, 4.8635e-14,\n 9.8702e-12, 2.4215e-12, 1.1779e-10, 1.3514e-10, 1.0660e-12, 2.3356e-12,\n 1.3917e-12, 3.1707e-11, 6.4994e-13, 1.4066e-13], device='cuda:0')" }, "47": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.5034e-13, 3.0558e-13, 5.0220e-10, 7.4021e-14, 1.4859e-10, 9.2627e-11,\n 2.5812e-11, 1.7779e-11, 4.9459e-10, 1.1171e-11, 1.5511e-11, 4.3502e-11,\n 9.0883e-13, 1.3050e-11, 3.7231e-12, 4.9256e-12, 3.9903e-11, 1.1064e-11,\n 1.7771e-12, 1.0688e-12, 3.7981e-11, 3.2034e-12, 4.5169e-11, 6.4063e-12,\n 5.0037e-11, 1.4332e-10, 2.1854e-11, 1.9422e-11, 1.1224e-11, 1.6219e-10,\n 5.6979e-12, 2.0247e-11, 8.1846e-10, 8.7898e-12, 6.1106e-12, 8.0141e-13,\n 4.5869e-11, 2.1560e-12, 3.7253e-11, 1.2549e-11, 2.3366e-11, 1.4393e-12,\n 4.3655e-13, 8.6725e-11, 4.5992e-14, 2.3802e-11, 1.3557e-11, 2.2125e-11,\n 4.4467e-12, 1.4154e-11, 1.1572e-10, 5.1261e-12, 3.7429e-11, 7.6045e-12,\n 6.4402e-12, 1.9145e-10, 2.1526e-12, 1.4797e-11, 2.0894e-11, 3.8656e-10,\n 3.2738e-11, 3.1161e-11, 2.8504e-12, 6.0751e-11, 3.6287e-11, 1.1256e-10,\n 6.8005e-13, 6.4252e-11, 5.8352e-12, 1.9581e-11, 2.0659e-11, 5.8531e-11,\n 1.6403e-12, 1.4399e-10, 1.5589e-10, 1.1552e-10, 1.0083e-11, 1.0758e-11,\n 3.2215e-12, 3.2926e-13, 3.9956e-11, 2.6548e-12, 4.4879e-12, 3.3961e-12,\n 2.9654e-11, 7.2802e-13, 1.1802e-10, 2.0694e-11, 1.1130e-11, 4.4429e-14,\n 1.1543e-13, 1.8026e-11, 5.1540e-11, 3.7677e-11, 1.0702e-10, 5.4169e-12,\n 1.7225e-10, 1.1187e-10, 5.5506e-12, 9.4470e-12, 2.2872e-11, 8.2700e-12,\n 1.5688e-11, 1.7525e-10, 2.9472e-11, 2.3421e-11, 4.2287e-11, 3.8314e-12,\n 2.3061e-11, 4.5213e-12, 1.2388e-12, 9.5449e-12, 6.8431e-12, 1.4163e-11,\n 3.6336e-12, 3.8945e-11, 6.5280e-12, 1.8221e-10, 3.5094e-12, 1.2347e-10,\n 2.2595e-10, 2.0919e-11, 1.4620e-10, 5.7312e-12, 1.0397e-11, 2.7861e-10,\n 1.3252e-11, 9.0699e-12, 1.0030e-11, 2.7662e-11, 1.3502e-12, 1.4261e-11,\n 6.0084e-10, 5.2339e-11, 1.3492e-12, 3.4167e-11, 2.4806e-11, 2.0196e-11,\n 1.8832e-10, 8.8604e-11, 1.9468e-11, 4.5410e-12, 5.0993e-11, 8.4975e-13,\n 3.9818e-13, 4.5454e-11, 2.2470e-11, 3.6330e-11, 1.2710e-12, 1.0776e-11,\n 1.5058e-11, 1.0678e-15, 4.3637e-11, 9.8284e-12, 2.4194e-12, 2.1024e-10,\n 1.3512e-11, 5.4541e-11, 8.1776e-14, 8.8474e-12, 2.4709e-12, 1.4430e-10,\n 1.2686e-12, 7.3103e-11, 2.5264e-10, 4.1750e-11, 6.1652e-12, 3.4455e-11,\n 1.3857e-12, 1.8459e-13, 2.7697e-11, 8.1531e-11, 1.6512e-10, 2.3372e-11,\n 2.2880e-11, 6.8673e-13, 6.8208e-11, 1.7559e-11, 3.2527e-12, 2.8840e-12,\n 5.7498e-12, 3.8521e-11, 2.8877e-13, 1.7633e-12, 7.9339e-12, 7.1345e-10,\n 1.0164e-11, 4.0318e-13, 2.3675e-12, 8.2168e-12, 1.0571e-10, 1.5674e-12,\n 7.7458e-11, 3.1525e-11, 1.4094e-11, 2.0739e-11, 1.8384e-12, 3.9795e-11,\n 2.9095e-11, 5.3522e-11, 4.1020e-11, 1.3236e-12, 2.2291e-10, 5.4470e-13,\n 2.5195e-11, 1.1014e-13, 1.7840e-11, 6.7270e-11, 4.0235e-12, 1.5464e-11,\n 4.8400e-11, 1.3670e-11, 1.6036e-10, 2.1838e-10, 4.2185e-10, 5.0750e-11,\n 9.9615e-12, 8.4489e-12, 2.6828e-10, 2.5505e-12, 2.3782e-11, 3.9203e-10,\n 3.0988e-12, 3.5173e-11, 3.7458e-10, 3.6559e-12, 1.1067e-11, 4.0824e-11,\n 6.5731e-11, 2.0951e-11, 2.5928e-11, 1.8663e-11, 2.9807e-11, 1.4926e-11,\n 4.1516e-11, 5.5633e-11, 6.4157e-12, 5.0951e-12, 2.3627e-11, 1.2041e-12,\n 2.2968e-10, 1.8233e-11, 2.9417e-12, 1.3755e-10, 3.5512e-11, 4.2743e-13,\n 4.9426e-11, 1.7921e-11, 4.4623e-10, 3.2531e-10, 8.1223e-12, 1.7478e-11,\n 6.5131e-12, 1.2493e-10, 3.3728e-12, 1.1141e-12], device='cuda:0')" + "exp_avg_sq": "tensor([4.2961e-14, 8.7322e-14, 1.4351e-10, 2.1152e-14, 4.2460e-11, 2.6469e-11,\n 7.3760e-12, 5.0806e-12, 1.4133e-10, 3.1921e-12, 4.4323e-12, 1.2431e-11,\n 2.5971e-13, 3.7291e-12, 1.0639e-12, 1.4075e-12, 1.1403e-11, 3.1617e-12,\n 5.0781e-13, 3.0541e-13, 1.0853e-11, 9.1541e-13, 1.2907e-11, 1.8306e-12,\n 1.4299e-11, 4.0954e-11, 6.2449e-12, 5.5499e-12, 3.2075e-12, 4.6348e-11,\n 1.6282e-12, 5.7856e-12, 2.3388e-10, 2.5118e-12, 1.7461e-12, 2.2901e-13,\n 1.3107e-11, 6.1610e-13, 1.0645e-11, 3.5860e-12, 6.6771e-12, 4.1129e-13,\n 1.2475e-13, 2.4782e-11, 1.3143e-14, 6.8017e-12, 3.8740e-12, 6.3224e-12,\n 1.2707e-12, 4.0446e-12, 3.3068e-11, 1.4648e-12, 1.0696e-11, 2.1730e-12,\n 1.8403e-12, 5.4709e-11, 6.1511e-13, 4.2284e-12, 5.9706e-12, 1.1046e-10,\n 9.3553e-12, 8.9044e-12, 8.1452e-13, 1.7360e-11, 1.0369e-11, 3.2166e-11,\n 1.9433e-13, 1.8360e-11, 1.6675e-12, 5.5954e-12, 5.9034e-12, 1.6726e-11,\n 4.6873e-13, 4.1145e-11, 4.4548e-11, 3.3011e-11, 2.8812e-12, 3.0741e-12,\n 9.2058e-13, 9.4088e-14, 1.1418e-11, 7.5864e-13, 1.2825e-12, 9.7047e-13,\n 8.4739e-12, 2.0804e-13, 3.3725e-11, 5.9136e-12, 3.1804e-12, 1.2696e-14,\n 3.2986e-14, 5.1512e-12, 1.4728e-11, 1.0767e-11, 3.0582e-11, 1.5479e-12,\n 4.9221e-11, 3.1968e-11, 1.5861e-12, 2.6995e-12, 6.5358e-12, 2.3632e-12,\n 4.4830e-12, 5.0080e-11, 8.4217e-12, 6.6929e-12, 1.2084e-11, 1.0948e-12,\n 6.5900e-12, 1.2920e-12, 3.5401e-13, 2.7275e-12, 1.9555e-12, 4.0471e-12,\n 1.0383e-12, 1.1129e-11, 1.8654e-12, 5.2068e-11, 1.0028e-12, 3.5284e-11,\n 6.4567e-11, 5.9778e-12, 4.1777e-11, 1.6377e-12, 2.9711e-12, 7.9615e-11,\n 3.7869e-12, 2.5918e-12, 2.8660e-12, 7.9046e-12, 3.8584e-13, 4.0753e-12,\n 1.7169e-10, 1.4956e-11, 3.8554e-13, 9.7635e-12, 7.0886e-12, 5.7711e-12,\n 5.3813e-11, 2.5319e-11, 5.5632e-12, 1.2976e-12, 1.4572e-11, 2.4282e-13,\n 1.1378e-13, 1.2989e-11, 6.4210e-12, 1.0382e-11, 3.6319e-13, 3.0793e-12,\n 4.3029e-12, 3.0514e-16, 1.2469e-11, 2.8085e-12, 6.9136e-13, 6.0078e-11,\n 3.8611e-12, 1.5585e-11, 2.3368e-14, 2.5282e-12, 7.0607e-13, 4.1235e-11,\n 3.6250e-13, 2.0890e-11, 7.2194e-11, 1.1930e-11, 1.7617e-12, 9.8458e-12,\n 3.9597e-13, 5.2749e-14, 7.9145e-12, 2.3298e-11, 4.7184e-11, 6.6788e-12,\n 6.5383e-12, 1.9624e-13, 1.9491e-11, 5.0178e-12, 9.2949e-13, 8.2414e-13,\n 1.6430e-12, 1.1008e-11, 8.2518e-14, 5.0388e-13, 2.2672e-12, 2.0388e-10,\n 2.9045e-12, 1.1521e-13, 6.7653e-13, 2.3480e-12, 3.0208e-11, 4.4789e-13,\n 2.2134e-11, 9.0086e-12, 4.0276e-12, 5.9263e-12, 5.2535e-13, 1.1372e-11,\n 8.3141e-12, 1.5294e-11, 1.1722e-11, 3.7822e-13, 6.3699e-11, 1.5565e-13,\n 7.1998e-12, 3.1474e-14, 5.0979e-12, 1.9223e-11, 1.1497e-12, 4.4189e-12,\n 1.3831e-11, 3.9063e-12, 4.5823e-11, 6.2403e-11, 1.2055e-10, 1.4502e-11,\n 2.8466e-12, 2.4143e-12, 7.6664e-11, 7.2881e-13, 6.7959e-12, 1.1203e-10,\n 8.8550e-13, 1.0051e-11, 1.0704e-10, 1.0447e-12, 3.1626e-12, 1.1666e-11,\n 1.8783e-11, 5.9869e-12, 7.4090e-12, 5.3331e-12, 8.5175e-12, 4.2652e-12,\n 1.1864e-11, 1.5898e-11, 1.8333e-12, 1.4560e-12, 6.7515e-12, 3.4409e-13,\n 6.5633e-11, 5.2101e-12, 8.4061e-13, 3.9307e-11, 1.0148e-11, 1.2214e-13,\n 1.4124e-11, 5.1210e-12, 1.2752e-10, 9.2959e-11, 2.3210e-12, 4.9945e-12,\n 1.8612e-12, 3.5700e-11, 9.6382e-13, 3.1836e-13], device='cuda:0')" }, "48": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[ 5.6052e-45, -5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 0.0000e+00, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[2.0848e-14, 3.7881e-15, 5.9209e-14, ..., 6.8999e-14, 0.0000e+00,\n 9.3566e-15],\n [4.2023e-15, 1.7256e-14, 2.1814e-13, ..., 2.9491e-13, 0.0000e+00,\n 5.1554e-13],\n [3.4858e-11, 6.8150e-13, 1.4574e-10, ..., 3.7024e-11, 0.0000e+00,\n 2.0905e-10],\n ...,\n [4.6014e-12, 1.9052e-13, 5.3011e-11, ..., 1.5693e-11, 0.0000e+00,\n 1.6870e-11],\n [5.7259e-13, 6.6777e-16, 1.7381e-14, ..., 7.2970e-14, 0.0000e+00,\n 7.4797e-14],\n [2.1502e-15, 1.2772e-15, 4.7563e-13, ..., 4.0397e-14, 0.0000e+00,\n 3.1875e-13]], device='cuda:0')" + "exp_avg_sq": "tensor([[5.9575e-15, 1.0825e-15, 1.6920e-14, ..., 1.9717e-14, 0.0000e+00,\n 2.6737e-15],\n [1.2008e-15, 4.9309e-15, 6.2334e-14, ..., 8.4273e-14, 0.0000e+00,\n 1.4732e-13],\n [9.9611e-12, 1.9475e-13, 4.1646e-11, ..., 1.0580e-11, 0.0000e+00,\n 5.9738e-11],\n ...,\n [1.3149e-12, 5.4443e-14, 1.5148e-11, ..., 4.4843e-12, 0.0000e+00,\n 4.8208e-12],\n [1.6362e-13, 1.9082e-16, 4.9669e-15, ..., 2.0852e-14, 0.0000e+00,\n 2.1374e-14],\n [6.1442e-16, 3.6497e-16, 1.3591e-13, ..., 1.1544e-14, 0.0000e+00,\n 9.1086e-14]], device='cuda:0')" }, "49": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.7829e-11, 1.5563e-10, 8.3490e-08, 8.6703e-11, 1.3618e-08, 4.3884e-08,\n 2.0584e-08, 4.6375e-09, 2.1912e-09, 4.4793e-09, 5.7811e-08, 1.7025e-08,\n 3.1786e-09, 1.1363e-08, 4.9332e-11, 3.2226e-09, 8.5686e-09, 4.0530e-09,\n 6.4755e-10, 1.4612e-09, 2.0949e-08, 4.0466e-10, 1.7101e-09, 6.4497e-09,\n 1.0802e-08, 5.6401e-09, 8.9396e-09, 1.8183e-08, 3.2443e-09, 3.2776e-08,\n 4.4423e-08, 5.1886e-08, 2.8175e-07, 8.8816e-10, 1.2286e-08, 2.4462e-10,\n 2.5156e-08, 4.1539e-10, 9.2806e-08, 2.6925e-09, 1.4818e-08, 3.4684e-10,\n 1.5355e-09, 6.0444e-08, 2.7519e-11, 3.0403e-09, 1.2959e-08, 8.9495e-09,\n 1.7588e-09, 1.7050e-08, 3.1461e-08, 1.0509e-08, 3.4328e-08, 2.7988e-09,\n 3.5582e-09, 7.8829e-08, 2.7277e-10, 1.2031e-08, 6.2881e-08, 7.8042e-08,\n 9.9784e-09, 8.8990e-09, 3.5404e-10, 6.1545e-08, 2.9381e-09, 8.1768e-08,\n 9.9343e-11, 2.0760e-08, 1.1687e-08, 1.1738e-08, 2.4124e-08, 3.8873e-08,\n 1.0375e-08, 2.8663e-08, 1.2666e-08, 4.8494e-08, 2.7793e-08, 1.0957e-08,\n 1.6871e-09, 6.5042e-11, 5.7761e-08, 4.1801e-09, 3.3324e-09, 2.2417e-08,\n 5.2769e-09, 7.7439e-10, 1.2517e-07, 3.8554e-09, 4.4134e-09, 1.8780e-10,\n 4.6534e-11, 4.7445e-09, 1.7770e-08, 3.9070e-09, 1.0907e-07, 3.3661e-09,\n 6.4552e-08, 8.9206e-08, 1.2859e-08, 7.7719e-09, 7.8598e-09, 3.3337e-09,\n 1.4869e-09, 2.5759e-08, 1.4886e-08, 1.0526e-08, 1.5208e-08, 1.1728e-09,\n 9.8857e-09, 1.1913e-08, 5.6298e-09, 1.4109e-09, 6.9029e-08, 1.7341e-09,\n 2.1609e-09, 6.8805e-08, 2.2268e-09, 1.4307e-08, 2.6011e-09, 4.4070e-09,\n 1.4894e-07, 1.2354e-08, 5.5504e-08, 9.9131e-10, 5.8975e-10, 1.7411e-07,\n 2.2723e-08, 1.1869e-09, 5.9781e-10, 2.5373e-08, 3.6894e-12, 5.1936e-09,\n 9.1244e-08, 5.7662e-08, 2.3445e-10, 5.8324e-09, 1.4102e-08, 4.7722e-09,\n 5.1605e-08, 3.7746e-08, 6.0342e-09, 5.5005e-09, 2.5641e-08, 3.9663e-11,\n 1.9642e-09, 2.9622e-09, 1.9008e-09, 3.1387e-08, 1.8275e-09, 2.9851e-09,\n 4.1363e-09, 5.3952e-09, 1.2196e-08, 9.2026e-09, 3.5362e-09, 4.2005e-08,\n 9.3417e-10, 1.3771e-07, 4.9598e-11, 2.5482e-09, 1.0334e-09, 3.5406e-09,\n 5.3178e-11, 6.4415e-09, 3.9806e-08, 2.5961e-08, 1.5958e-09, 7.4050e-09,\n 7.4735e-12, 1.2920e-09, 6.0022e-10, 1.1060e-08, 8.8765e-09, 1.0975e-08,\n 5.7802e-09, 1.4478e-09, 4.7364e-08, 2.7605e-08, 2.7242e-10, 6.9639e-11,\n 7.5214e-09, 2.0386e-08, 5.7092e-11, 4.3724e-09, 3.0472e-09, 5.9817e-08,\n 1.2064e-09, 3.0907e-10, 3.2471e-09, 1.4106e-09, 6.0181e-09, 7.0113e-11,\n 1.4240e-08, 4.6586e-09, 3.2931e-08, 4.0110e-09, 1.8383e-09, 1.3320e-08,\n 6.4724e-08, 6.2443e-09, 9.1652e-09, 1.9433e-10, 1.1056e-07, 2.4633e-11,\n 1.3690e-08, 5.5646e-11, 1.5060e-08, 1.4969e-08, 6.3108e-10, 9.2650e-09,\n 8.7331e-08, 5.1777e-09, 6.1579e-09, 7.3628e-08, 2.9189e-07, 3.7073e-09,\n 3.0425e-09, 3.0826e-09, 1.5397e-07, 1.6889e-10, 2.8583e-09, 2.9369e-07,\n 3.9682e-10, 2.2370e-08, 1.0521e-07, 1.8270e-09, 3.1479e-09, 4.6245e-08,\n 1.1260e-08, 3.3544e-09, 6.1382e-08, 1.3228e-08, 7.5718e-10, 4.8881e-09,\n 2.1025e-08, 2.0865e-08, 3.1247e-09, 3.8918e-09, 2.2726e-09, 1.4288e-09,\n 1.2920e-08, 9.6615e-09, 7.8925e-10, 1.3327e-07, 1.7810e-08, 1.6353e-10,\n 1.4612e-08, 6.0146e-08, 7.5169e-08, 6.5520e-08, 2.0699e-09, 7.1549e-09,\n 2.8595e-09, 2.7235e-08, 2.9564e-10, 7.5709e-11], device='cuda:0')" + "exp_avg_sq": "tensor([5.0947e-12, 4.4473e-11, 2.3858e-08, 2.4776e-11, 3.8914e-09, 1.2540e-08,\n 5.8821e-09, 1.3252e-09, 6.2616e-10, 1.2800e-09, 1.6520e-08, 4.8650e-09,\n 9.0830e-10, 3.2470e-09, 1.4097e-11, 9.2087e-10, 2.4485e-09, 1.1582e-09,\n 1.8504e-10, 4.1756e-10, 5.9863e-09, 1.1563e-10, 4.8867e-10, 1.8430e-09,\n 3.0868e-09, 1.6117e-09, 2.5546e-09, 5.1959e-09, 9.2708e-10, 9.3659e-09,\n 1.2694e-08, 1.4827e-08, 8.0513e-08, 2.5380e-10, 3.5108e-09, 6.9901e-11,\n 7.1885e-09, 1.1870e-10, 2.6520e-08, 7.6939e-10, 4.2342e-09, 9.9113e-11,\n 4.3879e-10, 1.7272e-08, 7.8637e-12, 8.6879e-10, 3.7030e-09, 2.5574e-09,\n 5.0259e-10, 4.8721e-09, 8.9902e-09, 3.0031e-09, 9.8094e-09, 7.9978e-10,\n 1.0168e-09, 2.2526e-08, 7.7947e-11, 3.4379e-09, 1.7969e-08, 2.2301e-08,\n 2.8514e-09, 2.5430e-09, 1.0117e-10, 1.7587e-08, 8.3959e-10, 2.3366e-08,\n 2.8388e-11, 5.9323e-09, 3.3395e-09, 3.3541e-09, 6.8937e-09, 1.1108e-08,\n 2.9647e-09, 8.1908e-09, 3.6194e-09, 1.3858e-08, 7.9422e-09, 3.1311e-09,\n 4.8211e-10, 1.8586e-11, 1.6506e-08, 1.1945e-09, 9.5226e-10, 6.4057e-09,\n 1.5079e-09, 2.2129e-10, 3.5769e-08, 1.1017e-09, 1.2612e-09, 5.3666e-11,\n 1.3298e-11, 1.3558e-09, 5.0779e-09, 1.1164e-09, 3.1166e-08, 9.6190e-10,\n 1.8446e-08, 2.5491e-08, 3.6745e-09, 2.2209e-09, 2.2460e-09, 9.5263e-10,\n 4.2489e-10, 7.3610e-09, 4.2538e-09, 3.0078e-09, 4.3458e-09, 3.3514e-10,\n 2.8249e-09, 3.4042e-09, 1.6088e-09, 4.0317e-10, 1.9726e-08, 4.9554e-10,\n 6.1749e-10, 1.9661e-08, 6.3633e-10, 4.0883e-09, 7.4328e-10, 1.2593e-09,\n 4.2560e-08, 3.5303e-09, 1.5861e-08, 2.8327e-10, 1.6853e-10, 4.9753e-08,\n 6.4933e-09, 3.3917e-10, 1.7083e-10, 7.2506e-09, 1.0543e-12, 1.4841e-09,\n 2.6074e-08, 1.6477e-08, 6.6996e-11, 1.6666e-09, 4.0297e-09, 1.3637e-09,\n 1.4747e-08, 1.0786e-08, 1.7243e-09, 1.5718e-09, 7.3272e-09, 1.1334e-11,\n 5.6128e-10, 8.4648e-10, 5.4316e-10, 8.9691e-09, 5.2223e-10, 8.5303e-10,\n 1.1820e-09, 1.5417e-09, 3.4850e-09, 2.6297e-09, 1.0105e-09, 1.2003e-08,\n 2.6695e-10, 3.9351e-08, 1.4173e-11, 7.2817e-10, 2.9532e-10, 1.0118e-09,\n 1.5196e-11, 1.8407e-09, 1.1375e-08, 7.4185e-09, 4.5602e-10, 2.1160e-09,\n 2.1356e-12, 3.6921e-10, 1.7152e-10, 3.1605e-09, 2.5365e-09, 3.1362e-09,\n 1.6517e-09, 4.1372e-10, 1.3535e-08, 7.8884e-09, 7.7845e-11, 1.9900e-11,\n 2.1493e-09, 5.8254e-09, 1.6315e-11, 1.2494e-09, 8.7076e-10, 1.7093e-08,\n 3.4473e-10, 8.8320e-11, 9.2788e-10, 4.0309e-10, 1.7197e-09, 2.0035e-11,\n 4.0691e-09, 1.3312e-09, 9.4103e-09, 1.1462e-09, 5.2530e-10, 3.8064e-09,\n 1.8495e-08, 1.7844e-09, 2.6190e-09, 5.5531e-11, 3.1594e-08, 7.0392e-12,\n 3.9121e-09, 1.5901e-11, 4.3035e-09, 4.2775e-09, 1.8034e-10, 2.6475e-09,\n 2.4955e-08, 1.4796e-09, 1.7597e-09, 2.1040e-08, 8.3409e-08, 1.0594e-09,\n 8.6943e-10, 8.8088e-10, 4.3999e-08, 4.8261e-11, 8.1679e-10, 8.3923e-08,\n 1.1339e-10, 6.3925e-09, 3.0066e-08, 5.2207e-10, 8.9954e-10, 1.3215e-08,\n 3.2176e-09, 9.5854e-10, 1.7540e-08, 3.7799e-09, 2.1637e-10, 1.3968e-09,\n 6.0081e-09, 5.9623e-09, 8.9291e-10, 1.1121e-09, 6.4941e-10, 4.0829e-10,\n 3.6919e-09, 2.7609e-09, 2.2553e-10, 3.8084e-08, 5.0894e-09, 4.6730e-11,\n 4.1755e-09, 1.7187e-08, 2.1480e-08, 1.8723e-08, 5.9150e-10, 2.0446e-09,\n 8.1711e-10, 7.7826e-09, 8.4481e-11, 2.1635e-11], device='cuda:0')" }, "50": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.3804e-14, 5.9536e-13, 1.2375e-10, 1.3120e-12, 1.1787e-11, 1.0007e-10,\n 5.7203e-11, 1.0272e-11, 2.1423e-11, 7.7292e-12, 1.2350e-10, 3.0286e-11,\n 1.3826e-11, 2.0664e-11, 2.0203e-13, 1.1527e-11, 1.4294e-11, 8.0537e-12,\n 2.4898e-12, 2.6770e-12, 3.9075e-11, 6.6080e-13, 3.2959e-12, 9.1213e-12,\n 2.0468e-11, 1.1637e-11, 1.1423e-11, 7.9304e-11, 2.1158e-11, 3.9894e-11,\n 5.9166e-11, 1.1496e-10, 3.9767e-10, 1.0474e-12, 4.7797e-11, 3.3954e-13,\n 5.0343e-11, 3.1102e-12, 2.9293e-10, 3.6903e-12, 1.5256e-11, 1.0027e-12,\n 2.7475e-12, 9.0904e-11, 2.4637e-15, 3.5337e-12, 3.0983e-11, 1.9465e-11,\n 8.9280e-12, 2.8464e-11, 6.2403e-11, 2.0151e-11, 6.1242e-11, 3.5562e-12,\n 7.5653e-12, 1.8294e-10, 7.0797e-13, 3.4458e-11, 9.0286e-11, 1.0087e-10,\n 1.0685e-11, 1.2898e-11, 5.8646e-13, 1.1419e-10, 4.5902e-12, 2.3846e-10,\n 2.7843e-13, 2.6350e-11, 2.9641e-11, 1.3941e-11, 5.7002e-11, 9.0947e-11,\n 2.5459e-11, 3.8520e-11, 1.6600e-11, 7.3495e-11, 5.7844e-11, 1.5812e-11,\n 2.3585e-12, 1.9682e-13, 9.7130e-11, 1.2038e-11, 8.9140e-12, 8.1180e-11,\n 6.6865e-12, 2.0570e-12, 2.6673e-10, 4.2774e-12, 8.4123e-12, 2.9081e-13,\n 2.9358e-15, 8.8771e-12, 2.8526e-11, 7.5147e-12, 2.2681e-10, 4.8777e-12,\n 8.5885e-11, 1.9426e-10, 1.7959e-11, 4.5074e-11, 1.4868e-11, 5.8614e-12,\n 2.5461e-12, 4.5040e-11, 2.7013e-11, 1.5489e-11, 2.5494e-11, 3.5555e-12,\n 1.2983e-11, 1.4119e-11, 9.0043e-12, 1.9310e-12, 1.2272e-10, 1.5564e-12,\n 2.3838e-12, 1.0062e-10, 5.5922e-12, 2.4499e-11, 9.3571e-12, 7.3243e-12,\n 3.6715e-10, 1.7753e-11, 1.2764e-10, 8.3233e-12, 1.7532e-12, 4.9719e-10,\n 6.3421e-11, 2.2804e-12, 1.6690e-12, 6.4726e-11, 1.4421e-14, 1.1056e-11,\n 1.2714e-10, 1.0191e-10, 6.2241e-13, 8.8220e-12, 4.7726e-11, 5.5514e-12,\n 9.3118e-11, 4.9438e-11, 1.2015e-11, 9.4242e-12, 3.3424e-11, 2.8263e-13,\n 2.7674e-12, 4.5286e-12, 1.5015e-12, 1.0080e-10, 6.0788e-12, 4.8931e-12,\n 1.2256e-11, 1.2458e-11, 1.9732e-11, 1.6311e-11, 7.8006e-12, 8.7016e-11,\n 1.5917e-12, 3.2470e-10, 6.7940e-14, 3.5415e-12, 2.3217e-12, 8.1257e-12,\n 2.2388e-13, 1.0918e-11, 8.3259e-11, 4.6225e-11, 1.6675e-12, 1.1282e-11,\n 9.8048e-14, 3.1756e-12, 9.5130e-13, 2.4669e-11, 1.2894e-11, 1.7237e-11,\n 1.0817e-11, 2.1157e-12, 7.2936e-11, 4.4648e-11, 3.0407e-12, 2.5359e-13,\n 2.7188e-11, 5.7208e-11, 1.0313e-13, 8.4380e-12, 1.1877e-11, 7.3008e-11,\n 2.7826e-12, 6.8503e-13, 1.0149e-11, 2.4620e-12, 9.1678e-12, 3.5457e-13,\n 1.6954e-11, 6.9891e-12, 6.1251e-11, 7.3734e-12, 2.6838e-12, 1.9372e-11,\n 2.1193e-10, 9.7710e-12, 1.9178e-11, 1.1801e-12, 1.3050e-10, 3.5342e-13,\n 3.3664e-11, 4.6927e-14, 4.9084e-11, 1.7126e-11, 4.2041e-12, 2.7266e-11,\n 2.6065e-10, 7.8487e-12, 1.4982e-11, 7.6416e-11, 4.8344e-10, 6.3421e-12,\n 6.5008e-12, 2.8032e-12, 4.6238e-10, 5.3100e-13, 2.3204e-12, 6.4556e-10,\n 1.0345e-12, 7.4627e-11, 2.2544e-10, 2.9572e-12, 5.9629e-12, 1.2008e-10,\n 1.1093e-11, 4.8041e-12, 1.1936e-10, 2.6615e-11, 1.3115e-12, 6.4827e-12,\n 3.4600e-11, 5.1046e-11, 6.2546e-12, 6.4896e-12, 3.5540e-12, 3.3644e-12,\n 1.6638e-11, 1.9734e-11, 3.6073e-12, 3.0195e-10, 2.7888e-11, 1.4744e-12,\n 2.2026e-11, 1.3276e-10, 1.0382e-10, 1.3434e-10, 3.2955e-12, 1.1872e-11,\n 2.4539e-12, 3.8213e-11, 6.3051e-13, 4.1829e-13], device='cuda:0')" + "exp_avg_sq": "tensor([3.9446e-15, 1.7013e-13, 3.5363e-11, 3.7492e-13, 3.3681e-12, 2.8597e-11,\n 1.6346e-11, 2.9354e-12, 6.1217e-12, 2.2087e-12, 3.5292e-11, 8.6545e-12,\n 3.9509e-12, 5.9049e-12, 5.7731e-14, 3.2940e-12, 4.0845e-12, 2.3014e-12,\n 7.1147e-13, 7.6498e-13, 1.1166e-11, 1.8883e-13, 9.4184e-13, 2.6065e-12,\n 5.8489e-12, 3.3253e-12, 3.2642e-12, 2.2662e-11, 6.0461e-12, 1.1400e-11,\n 1.6907e-11, 3.2851e-11, 1.1364e-10, 2.9929e-13, 1.3659e-11, 9.7027e-14,\n 1.4386e-11, 8.8877e-13, 8.3707e-11, 1.0545e-12, 4.3594e-12, 2.8653e-13,\n 7.8511e-13, 2.5977e-11, 7.0402e-16, 1.0098e-12, 8.8536e-12, 5.5622e-12,\n 2.5512e-12, 8.1338e-12, 1.7832e-11, 5.7583e-12, 1.7500e-11, 1.0162e-12,\n 2.1618e-12, 5.2277e-11, 2.0231e-13, 9.8465e-12, 2.5800e-11, 2.8825e-11,\n 3.0533e-12, 3.6857e-12, 1.6759e-13, 3.2632e-11, 1.3117e-12, 6.8141e-11,\n 7.9564e-14, 7.5297e-12, 8.4702e-12, 3.9836e-12, 1.6289e-11, 2.5989e-11,\n 7.2750e-12, 1.1007e-11, 4.7434e-12, 2.1002e-11, 1.6529e-11, 4.5184e-12,\n 6.7395e-13, 5.6244e-14, 2.7756e-11, 3.4398e-12, 2.5473e-12, 2.3198e-11,\n 1.9107e-12, 5.8781e-13, 7.6221e-11, 1.2223e-12, 2.4039e-12, 8.3100e-14,\n 8.3892e-16, 2.5367e-12, 8.1515e-12, 2.1474e-12, 6.4814e-11, 1.3938e-12,\n 2.4542e-11, 5.5510e-11, 5.1320e-12, 1.2880e-11, 4.2488e-12, 1.6749e-12,\n 7.2756e-13, 1.2870e-11, 7.7191e-12, 4.4262e-12, 7.2851e-12, 1.0160e-12,\n 3.7101e-12, 4.0345e-12, 2.5730e-12, 5.5181e-13, 3.5068e-11, 4.4474e-13,\n 6.8119e-13, 2.8753e-11, 1.5980e-12, 7.0007e-12, 2.6739e-12, 2.0930e-12,\n 1.0492e-10, 5.0731e-12, 3.6473e-11, 2.3785e-12, 5.0099e-13, 1.4207e-10,\n 1.8123e-11, 6.5164e-13, 4.7694e-13, 1.8496e-11, 4.1208e-15, 3.1593e-12,\n 3.6330e-11, 2.9122e-11, 1.7786e-13, 2.5210e-12, 1.3638e-11, 1.5864e-12,\n 2.6609e-11, 1.4127e-11, 3.4335e-12, 2.6931e-12, 9.5511e-12, 8.0765e-14,\n 7.9081e-13, 1.2941e-12, 4.2906e-13, 2.8804e-11, 1.7371e-12, 1.3982e-12,\n 3.5023e-12, 3.5601e-12, 5.6386e-12, 4.6609e-12, 2.2291e-12, 2.4865e-11,\n 4.5483e-13, 9.2786e-11, 1.9414e-14, 1.0120e-12, 6.6343e-13, 2.3220e-12,\n 6.3974e-14, 3.1198e-12, 2.3792e-11, 1.3209e-11, 4.7651e-13, 3.2238e-12,\n 2.8018e-14, 9.0746e-13, 2.7184e-13, 7.0493e-12, 3.6847e-12, 4.9255e-12,\n 3.0911e-12, 6.0458e-13, 2.0842e-11, 1.2759e-11, 8.6891e-13, 7.2465e-14,\n 7.7692e-12, 1.6348e-11, 2.9469e-14, 2.4112e-12, 3.3939e-12, 2.0863e-11,\n 7.9514e-13, 1.9575e-13, 2.9000e-12, 7.0354e-13, 2.6198e-12, 1.0132e-13,\n 4.8448e-12, 1.9972e-12, 1.7503e-11, 2.1070e-12, 7.6691e-13, 5.5358e-12,\n 6.0560e-11, 2.7921e-12, 5.4803e-12, 3.3723e-13, 3.7292e-11, 1.0099e-13,\n 9.6197e-12, 1.3410e-14, 1.4026e-11, 4.8940e-12, 1.2014e-12, 7.7915e-12,\n 7.4482e-11, 2.2428e-12, 4.2812e-12, 2.1836e-11, 1.3815e-10, 1.8123e-12,\n 1.8576e-12, 8.0104e-13, 1.3213e-10, 1.5174e-13, 6.6309e-13, 1.8447e-10,\n 2.9563e-13, 2.1325e-11, 6.4421e-11, 8.4506e-13, 1.7039e-12, 3.4314e-11,\n 3.1699e-12, 1.3728e-12, 3.4108e-11, 7.6055e-12, 3.7477e-13, 1.8525e-12,\n 9.8873e-12, 1.4587e-11, 1.7873e-12, 1.8545e-12, 1.0156e-12, 9.6140e-13,\n 4.7545e-12, 5.6392e-12, 1.0308e-12, 8.6286e-11, 7.9691e-12, 4.2131e-13,\n 6.2941e-12, 3.7936e-11, 2.9666e-11, 3.8387e-11, 9.4171e-13, 3.3925e-12,\n 7.0121e-13, 1.0920e-11, 1.8017e-13, 1.1953e-13], device='cuda:0')" }, "51": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([ 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([1.4028e-13, 6.9625e-13, 2.2236e-10, 7.4147e-13, 3.6220e-11, 1.1171e-10,\n 5.6207e-11, 1.4693e-11, 7.0555e-12, 1.3650e-11, 1.6044e-10, 4.1638e-11,\n 8.7128e-12, 3.3031e-11, 2.5636e-13, 1.0067e-11, 2.4845e-11, 1.1722e-11,\n 2.5179e-12, 4.5733e-12, 5.9163e-11, 1.2392e-12, 6.6548e-12, 1.9555e-11,\n 3.1226e-11, 1.6110e-11, 2.4444e-11, 4.8467e-11, 1.6534e-11, 8.6182e-11,\n 1.1729e-10, 1.3557e-10, 7.6126e-10, 2.2299e-12, 3.5835e-11, 7.7252e-13,\n 6.4406e-11, 3.5156e-12, 2.5378e-10, 8.0671e-12, 3.9729e-11, 1.2295e-12,\n 4.5070e-12, 1.6387e-10, 7.6098e-14, 7.4659e-12, 3.5331e-11, 2.6536e-11,\n 6.2476e-12, 4.7814e-11, 8.6377e-11, 2.4243e-11, 8.9583e-11, 7.5474e-12,\n 9.7113e-12, 1.9817e-10, 1.2524e-12, 3.0664e-11, 1.6119e-10, 2.0364e-10,\n 2.6373e-11, 2.1930e-11, 1.1276e-12, 1.6244e-10, 8.8207e-12, 2.1008e-10,\n 3.3765e-13, 5.3453e-11, 3.2563e-11, 3.1560e-11, 6.7248e-11, 1.0521e-10,\n 2.4907e-11, 7.7452e-11, 3.7046e-11, 1.2383e-10, 7.5336e-11, 3.0793e-11,\n 4.9600e-12, 1.9563e-13, 1.5210e-10, 1.1519e-11, 9.7729e-12, 6.2232e-11,\n 1.2781e-11, 2.6461e-12, 3.3247e-10, 1.0501e-11, 1.4488e-11, 2.2187e-13,\n 1.9387e-13, 1.2966e-11, 4.7128e-11, 1.1296e-11, 2.8048e-10, 8.2453e-12,\n 1.6690e-10, 2.2902e-10, 3.0352e-11, 2.5427e-11, 2.2773e-11, 1.0401e-11,\n 4.6753e-12, 7.0593e-11, 4.2906e-11, 2.7886e-11, 4.2537e-11, 4.5590e-12,\n 2.4800e-11, 3.1606e-11, 1.6637e-11, 3.9806e-12, 1.8076e-10, 4.1706e-12,\n 6.5479e-12, 1.9144e-10, 7.5466e-12, 3.7536e-11, 1.0062e-11, 1.2979e-11,\n 4.0363e-10, 3.5832e-11, 1.4335e-10, 9.8803e-12, 2.3022e-12, 4.4531e-10,\n 5.5950e-11, 3.7101e-12, 2.1675e-12, 6.9366e-11, 4.8156e-14, 1.5872e-11,\n 2.2845e-10, 1.4436e-10, 8.9250e-13, 1.5829e-11, 4.1584e-11, 1.1552e-11,\n 1.3168e-10, 9.7655e-11, 1.7310e-11, 1.6433e-11, 6.5718e-11, 1.1822e-12,\n 6.1096e-12, 8.3390e-12, 4.7920e-12, 8.3268e-11, 5.3839e-12, 8.5229e-12,\n 1.2092e-11, 1.2489e-11, 3.3169e-11, 2.2782e-11, 1.1472e-11, 1.1576e-10,\n 2.7918e-12, 3.6292e-10, 1.3952e-13, 8.0484e-12, 3.4376e-12, 1.1056e-11,\n 5.1484e-13, 2.0195e-11, 1.0522e-10, 6.7214e-11, 4.2225e-12, 2.3155e-11,\n 3.5119e-13, 4.3234e-12, 1.4535e-12, 3.2245e-11, 2.4584e-11, 3.2313e-11,\n 1.6882e-11, 4.4500e-12, 1.2407e-10, 7.6749e-11, 3.7988e-12, 3.4573e-13,\n 2.1235e-11, 5.9316e-11, 2.0201e-13, 1.2588e-11, 1.0284e-11, 1.6626e-10,\n 3.1947e-12, 1.2754e-12, 1.0364e-11, 4.1766e-12, 1.9636e-11, 1.0852e-12,\n 3.4932e-11, 1.3676e-11, 8.1231e-11, 1.2646e-11, 6.0456e-12, 3.4784e-11,\n 1.6189e-10, 1.9469e-11, 2.6113e-11, 2.1824e-12, 2.7497e-10, 8.5897e-13,\n 3.2708e-11, 6.6114e-14, 4.1918e-11, 3.7286e-11, 4.5703e-12, 2.8065e-11,\n 2.3682e-10, 1.0853e-11, 1.9043e-11, 2.0899e-10, 7.7074e-10, 1.1660e-11,\n 8.3675e-12, 7.0911e-12, 4.0866e-10, 8.2969e-13, 6.6868e-12, 7.8458e-10,\n 1.4128e-12, 5.9766e-11, 2.8030e-10, 4.0692e-12, 8.9506e-12, 1.2653e-10,\n 3.2881e-11, 9.7978e-12, 1.6795e-10, 3.5508e-11, 1.6526e-12, 1.3713e-11,\n 6.0062e-11, 5.6644e-11, 9.4538e-12, 1.2354e-11, 6.3674e-12, 3.9899e-12,\n 3.6157e-11, 2.8164e-11, 4.8840e-12, 3.5045e-10, 4.8993e-11, 2.0941e-12,\n 4.0267e-11, 1.6203e-10, 2.0183e-10, 1.8297e-10, 5.7992e-12, 1.9346e-11,\n 6.9625e-12, 7.4584e-11, 9.0710e-13, 3.1406e-13], device='cuda:0')" + "exp_avg_sq": "tensor([4.0085e-14, 1.9896e-13, 6.3540e-11, 2.1188e-13, 1.0350e-11, 3.1923e-11,\n 1.6062e-11, 4.1986e-12, 2.0162e-12, 3.9005e-12, 4.5847e-11, 1.1898e-11,\n 2.4898e-12, 9.4389e-12, 7.3258e-14, 2.8766e-12, 7.0997e-12, 3.3496e-12,\n 7.1951e-13, 1.3068e-12, 1.6906e-11, 3.5412e-13, 1.9017e-12, 5.5879e-12,\n 8.9230e-12, 4.6037e-12, 6.9851e-12, 1.3850e-11, 4.7247e-12, 2.4627e-11,\n 3.3518e-11, 3.8740e-11, 2.1753e-10, 6.3721e-13, 1.0240e-11, 2.2075e-13,\n 1.8404e-11, 1.0046e-12, 7.2519e-11, 2.3052e-12, 1.1353e-11, 3.5135e-13,\n 1.2879e-12, 4.6827e-11, 2.1746e-14, 2.1334e-12, 1.0096e-11, 7.5828e-12,\n 1.7853e-12, 1.3663e-11, 2.4683e-11, 6.9277e-12, 2.5599e-11, 2.1567e-12,\n 2.7751e-12, 5.6628e-11, 3.5789e-13, 8.7624e-12, 4.6061e-11, 5.8191e-11,\n 7.5362e-12, 6.2666e-12, 3.2223e-13, 4.6420e-11, 2.5206e-12, 6.0031e-11,\n 9.6485e-14, 1.5275e-11, 9.3053e-12, 9.0184e-12, 1.9217e-11, 3.0065e-11,\n 7.1173e-12, 2.2132e-11, 1.0586e-11, 3.5385e-11, 2.1528e-11, 8.7994e-12,\n 1.4174e-12, 5.5902e-14, 4.3463e-11, 3.2915e-12, 2.7927e-12, 1.7783e-11,\n 3.6524e-12, 7.5615e-13, 9.5006e-11, 3.0006e-12, 4.1401e-12, 6.3401e-14,\n 5.5399e-14, 3.7052e-12, 1.3467e-11, 3.2279e-12, 8.0151e-11, 2.3562e-12,\n 4.7693e-11, 6.5444e-11, 8.6732e-12, 7.2659e-12, 6.5077e-12, 2.9720e-12,\n 1.3360e-12, 2.0173e-11, 1.2261e-11, 7.9687e-12, 1.2155e-11, 1.3028e-12,\n 7.0867e-12, 9.0318e-12, 4.7541e-12, 1.1375e-12, 5.1654e-11, 1.1918e-12,\n 1.8711e-12, 5.4707e-11, 2.1565e-12, 1.0726e-11, 2.8754e-12, 3.7087e-12,\n 1.1534e-10, 1.0239e-11, 4.0964e-11, 2.8234e-12, 6.5787e-13, 1.2725e-10,\n 1.5988e-11, 1.0602e-12, 6.1938e-13, 1.9822e-11, 1.3761e-14, 4.5356e-12,\n 6.5283e-11, 4.1253e-11, 2.5504e-13, 4.5234e-12, 1.1883e-11, 3.3010e-12,\n 3.7629e-11, 2.7906e-11, 4.9464e-12, 4.6959e-12, 1.8779e-11, 3.3781e-13,\n 1.7459e-12, 2.3829e-12, 1.3694e-12, 2.3795e-11, 1.5385e-12, 2.4355e-12,\n 3.4555e-12, 3.5687e-12, 9.4784e-12, 6.5100e-12, 3.2782e-12, 3.3078e-11,\n 7.9779e-13, 1.0371e-10, 3.9868e-14, 2.2999e-12, 9.8232e-13, 3.1593e-12,\n 1.4712e-13, 5.7708e-12, 3.0069e-11, 1.9207e-11, 1.2066e-12, 6.6168e-12,\n 1.0036e-13, 1.2355e-12, 4.1535e-13, 9.2143e-12, 7.0252e-12, 9.2337e-12,\n 4.8243e-12, 1.2716e-12, 3.5453e-11, 2.1932e-11, 1.0855e-12, 9.8796e-14,\n 6.0680e-12, 1.6950e-11, 5.7725e-14, 3.5970e-12, 2.9387e-12, 4.7509e-11,\n 9.1291e-13, 3.6445e-13, 2.9615e-12, 1.1935e-12, 5.6111e-12, 3.1009e-13,\n 9.9820e-12, 3.9080e-12, 2.3213e-11, 3.6136e-12, 1.7276e-12, 9.9397e-12,\n 4.6262e-11, 5.5634e-12, 7.4620e-12, 6.2364e-13, 7.8575e-11, 2.4546e-13,\n 9.3465e-12, 1.8893e-14, 1.1978e-11, 1.0655e-11, 1.3060e-12, 8.0197e-12,\n 6.7673e-11, 3.1013e-12, 5.4417e-12, 5.9721e-11, 2.2024e-10, 3.3319e-12,\n 2.3911e-12, 2.0263e-12, 1.1678e-10, 2.3709e-13, 1.9108e-12, 2.2420e-10,\n 4.0373e-13, 1.7079e-11, 8.0098e-11, 1.1628e-12, 2.5577e-12, 3.6156e-11,\n 9.3960e-12, 2.7998e-12, 4.7994e-11, 1.0147e-11, 4.7224e-13, 3.9185e-12,\n 1.7163e-11, 1.6187e-11, 2.7015e-12, 3.5302e-12, 1.8195e-12, 1.1401e-12,\n 1.0332e-11, 8.0481e-12, 1.3956e-12, 1.0014e-10, 1.4000e-11, 5.9840e-13,\n 1.1506e-11, 4.6302e-11, 5.7676e-11, 5.2285e-11, 1.6572e-12, 5.5282e-12,\n 1.9896e-12, 2.1313e-11, 2.5921e-13, 8.9746e-14], device='cuda:0')" }, "52": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, -5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.5056e-14, 2.7499e-14, 5.1066e-14, ..., 3.3236e-15, 1.5757e-15,\n 2.5037e-13],\n [8.0096e-15, 2.3656e-14, 7.8464e-14, ..., 3.0994e-13, 1.8512e-14,\n 4.1041e-14],\n [4.8809e-14, 1.7549e-15, 2.5931e-14, ..., 1.4976e-13, 1.5468e-15,\n 2.1777e-15],\n ...,\n [6.7956e-11, 1.1060e-09, 1.5035e-09, ..., 2.5732e-09, 1.3134e-09,\n 6.4967e-10],\n [2.0246e-11, 3.3543e-10, 4.5806e-10, ..., 7.4026e-10, 4.1690e-10,\n 2.1619e-10],\n [8.3234e-12, 9.7828e-11, 1.4546e-10, ..., 2.3928e-10, 1.0334e-10,\n 5.1203e-11]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.0017e-14, 7.8582e-15, 1.4592e-14, ..., 9.4973e-16, 4.5026e-16,\n 7.1545e-14],\n [2.2888e-15, 6.7599e-15, 2.2422e-14, ..., 8.8568e-14, 5.2899e-15,\n 1.1728e-14],\n [1.3947e-14, 5.0148e-16, 7.4101e-15, ..., 4.2796e-14, 4.4201e-16,\n 6.2229e-16],\n ...,\n [1.9419e-11, 3.1606e-10, 4.2962e-10, ..., 7.3530e-10, 3.7532e-10,\n 1.8565e-10],\n [5.7855e-12, 9.5852e-11, 1.3089e-10, ..., 2.1153e-10, 1.1913e-10,\n 6.1779e-11],\n [2.3785e-12, 2.7955e-11, 4.1566e-11, ..., 6.8377e-11, 2.9529e-11,\n 1.4632e-11]], device='cuda:0')" }, "53": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([2.7604e-13, 2.2100e-13, 6.6408e-13, 8.0676e-14, 3.2972e-13, 5.2627e-13,\n 1.5002e-13, 6.4049e-13, 1.5672e-12, 5.2971e-12, 1.9149e-13, 7.9519e-14,\n 2.2080e-12, 6.0324e-13, 4.0867e-12, 2.4128e-15, 4.1107e-13, 9.2302e-14,\n 1.0845e-12, 2.9912e-12, 2.5557e-13, 6.6743e-16, 1.1443e-14, 6.3598e-14,\n 3.8671e-13, 2.6853e-13, 5.7786e-13, 8.0881e-13, 7.4825e-14, 6.1498e-13,\n 1.6900e-12, 1.9593e-13, 1.1676e-12, 1.8478e-12, 4.1479e-12, 2.5497e-12,\n 6.5510e-13, 6.9707e-13, 1.2806e-12, 2.8747e-13, 5.1107e-14, 8.8812e-13,\n 9.2866e-13, 4.4843e-15, 5.6070e-12, 2.6673e-12, 2.5383e-14, 7.9912e-15,\n 3.0252e-13, 9.5418e-13, 1.0408e-12, 6.4552e-13, 7.3863e-13, 3.3554e-13,\n 3.0578e-12, 1.1422e-12, 1.6974e-13, 3.1635e-12, 2.4239e-14, 6.0276e-13,\n 8.9284e-13, 2.2501e-13, 1.8795e-14, 5.8158e-12, 1.4641e-13, 3.4931e-12,\n 2.1661e-14, 3.6571e-12, 6.7681e-13, 6.8765e-12, 8.1182e-12, 1.9675e-13,\n 1.9582e-12, 5.4072e-12, 3.8544e-12, 3.5840e-13, 9.5339e-12, 3.6506e-12,\n 1.3379e-11, 3.6035e-14, 3.8195e-13, 1.1456e-12, 4.5353e-13, 1.5305e-12,\n 4.8382e-12, 1.7807e-13, 3.6031e-13, 1.1673e-12, 1.3920e-11, 3.3420e-13,\n 2.6396e-12, 1.8786e-14, 1.0953e-11, 1.0530e-11, 1.8063e-12, 1.5229e-12,\n 1.2037e-14, 1.9484e-12, 2.0013e-12, 1.5227e-12, 4.4389e-12, 1.0834e-14,\n 5.7466e-14, 2.2671e-14, 1.1699e-14, 1.8521e-13, 4.7130e-14, 2.6954e-13,\n 1.0954e-12, 3.4744e-12, 5.3347e-13, 2.7859e-13, 1.5973e-13, 8.7885e-13,\n 2.4183e-13, 3.3648e-12, 1.2563e-12, 2.4520e-12, 3.8276e-12, 1.2935e-12,\n 5.1921e-14, 1.2274e-14, 2.4439e-12, 3.3008e-13, 2.2625e-13, 1.3029e-13,\n 4.2425e-12, 6.8443e-13, 1.0527e-13, 3.8859e-12, 1.6581e-13, 4.6909e-13,\n 6.6414e-13, 2.5728e-12, 4.8668e-12, 4.4872e-14, 1.8649e-13, 3.7036e-13,\n 2.6572e-13, 6.9728e-15, 1.0998e-12, 3.0588e-13, 8.9475e-14, 4.0210e-15,\n 8.8657e-13, 1.4092e-12, 2.1016e-12, 9.2130e-13, 1.6336e-13, 6.7083e-13,\n 1.5729e-14, 2.2302e-12, 2.3174e-14, 6.7223e-14, 3.3481e-12, 1.6517e-12,\n 5.3834e-15, 3.3219e-15, 2.2192e-13, 1.4854e-12, 6.6191e-12, 4.1058e-13,\n 3.9769e-13, 2.1039e-13, 5.8252e-13, 6.8705e-13, 5.6124e-13, 1.0970e-12,\n 1.2717e-14, 1.3737e-13, 1.5640e-12, 3.1498e-13, 3.9705e-13, 2.3230e-12,\n 1.4598e-15, 1.6789e-12, 2.2001e-12, 2.0020e-12, 1.6991e-14, 1.1148e-13,\n 1.2753e-13, 8.1048e-13, 2.0756e-15, 5.2508e-14, 3.7836e-13, 4.4192e-12,\n 4.2227e-13, 6.8971e-14, 1.8566e-12, 2.1049e-13, 1.8143e-13, 1.5224e-12,\n 1.0721e-12, 1.2769e-12, 6.9413e-13, 1.9741e-12, 2.3827e-15, 1.4366e-13,\n 1.0714e-12, 1.2185e-12, 2.0258e-14, 3.1390e-12, 1.4996e-13, 1.8144e-12,\n 9.8531e-14, 7.7262e-14, 3.0195e-14, 5.9430e-13, 3.8237e-12, 1.2266e-13,\n 6.2481e-13, 1.0094e-12, 2.4181e-12, 2.5702e-13, 1.3359e-14, 2.8790e-13,\n 1.6794e-13, 1.6305e-13, 8.1550e-14, 1.8220e-13, 4.6472e-14, 1.2639e-11,\n 3.3557e-12, 2.0859e-12, 6.2991e-15, 6.9223e-14, 6.9085e-13, 2.2318e-12,\n 6.6183e-12, 2.1121e-12, 7.4477e-14, 4.0561e-13, 2.3467e-12, 2.0534e-14,\n 1.3443e-12, 1.0248e-12, 2.7723e-12, 6.0572e-13, 4.3729e-13, 9.0227e-14,\n 1.8073e-13, 8.0422e-14, 2.1368e-12, 5.5080e-13, 5.3026e-13, 3.7462e-12,\n 6.6037e-13, 6.3232e-15, 1.4447e-12, 1.4654e-12, 6.1090e-12, 1.2533e-12,\n 1.7986e-12, 9.8361e-13, 1.5422e-15, 7.7905e-13, 4.2201e-27, 1.0335e-28,\n 6.5740e-28, 1.1958e-28, 6.7541e-28, 2.0107e-28, 1.0857e-27, 2.6175e-29,\n 5.9292e-28, 9.2213e-28, 8.6289e-28, 2.2166e-29, 5.6022e-31, 4.7097e-29,\n 2.6488e-28, 1.9795e-29, 4.5992e-28, 3.5230e-29, 4.1946e-28, 1.0093e-28,\n 6.4855e-30, 3.2529e-28, 1.8397e-29, 3.7908e-28, 5.3669e-30, 3.2438e-28,\n 7.1665e-28, 3.0992e-29, 1.8706e-27, 6.3802e-28, 1.0379e-28, 1.4950e-28,\n 1.9148e-28, 1.1459e-27, 1.0696e-28, 8.8165e-29, 3.5839e-29, 1.9423e-28,\n 5.2317e-28, 1.0344e-28, 1.8346e-29, 2.1271e-28, 4.2781e-29, 9.4610e-29,\n 8.9417e-29, 8.0645e-28, 3.3008e-28, 1.1058e-28, 1.1856e-28, 7.3619e-29,\n 1.4739e-28, 4.8836e-28, 4.9257e-28, 6.1736e-28, 2.5143e-28, 5.7821e-29,\n 5.1406e-28, 8.8484e-28, 1.2127e-27, 5.2934e-28, 4.2753e-28, 2.9632e-28,\n 2.0386e-29, 1.8348e-28, 1.3827e-30, 7.1612e-28, 1.8961e-27, 5.9109e-28,\n 1.5970e-29, 9.4900e-29, 5.7703e-28, 1.9295e-28, 3.1294e-28, 1.8822e-29,\n 1.6120e-28, 8.7181e-29, 3.0670e-28, 8.6399e-28, 2.9764e-27, 1.0903e-27,\n 1.1811e-28, 8.9480e-28, 1.0839e-28, 1.4463e-27, 1.7695e-28, 1.6331e-28,\n 1.4539e-27, 5.0550e-28, 2.6194e-28, 3.3517e-29, 5.8541e-29, 3.1869e-28,\n 3.3425e-28, 7.0337e-29, 2.7175e-28, 1.9324e-29, 2.5656e-29, 8.3987e-28,\n 4.4120e-29, 2.5282e-28, 3.7855e-28, 2.6406e-29, 2.3067e-28, 2.1797e-28,\n 8.7487e-29, 1.2004e-28, 7.4915e-29, 1.2505e-27, 5.0796e-28, 9.0446e-29,\n 6.9176e-29, 3.6790e-28, 3.9838e-28, 7.4817e-28, 3.1511e-28, 1.4745e-27,\n 4.0493e-28, 5.4856e-28, 1.2756e-27, 1.5663e-27, 9.4773e-28, 4.9920e-28,\n 5.8894e-28, 2.8840e-28, 6.6035e-29, 5.9157e-29, 3.2851e-27, 3.8594e-30,\n 6.2402e-28, 3.7151e-29, 1.4595e-28, 1.7789e-28, 4.5798e-28, 1.4799e-28,\n 3.8813e-29, 4.3588e-32, 9.9666e-30, 5.8626e-29, 1.3159e-28, 3.4198e-28,\n 3.2523e-29, 7.7261e-28, 2.9601e-27, 3.3415e-28, 9.5605e-28, 1.2739e-28,\n 5.2272e-28, 1.4418e-27, 6.6433e-28, 4.4521e-29, 4.7942e-29, 1.9416e-27,\n 3.3431e-28, 3.7584e-28, 1.7431e-29, 2.3914e-28, 8.2788e-28, 2.7109e-28,\n 1.4205e-30, 3.7389e-28, 1.1004e-28, 1.0711e-28, 6.0803e-28, 5.6204e-29,\n 3.9290e-29, 3.4102e-28, 2.2552e-28, 1.4441e-29, 1.3277e-28, 1.5459e-27,\n 9.1298e-29, 3.1725e-27, 1.6142e-27, 2.2745e-29, 5.2589e-28, 1.6677e-27,\n 1.6502e-27, 6.2380e-29, 7.8277e-29, 1.3673e-28, 9.5984e-29, 4.1289e-28,\n 8.1781e-30, 4.1636e-28, 2.1300e-28, 5.3578e-29, 1.9441e-28, 3.5960e-30,\n 6.7323e-28, 4.1230e-30, 8.2804e-28, 4.6221e-28, 5.0991e-28, 1.1583e-27,\n 1.0559e-27, 3.8273e-29, 2.1575e-27, 1.9597e-28, 4.9939e-28, 1.0784e-28,\n 3.1904e-30, 1.8080e-28, 2.4879e-28, 4.1501e-28, 4.7919e-29, 1.4874e-28,\n 9.7762e-30, 2.2560e-28, 3.3933e-28, 1.4864e-29, 6.6533e-28, 1.2634e-29,\n 3.8149e-28, 1.4221e-28, 7.2780e-28, 3.3311e-28, 1.8973e-29, 1.5073e-28,\n 3.5990e-28, 9.8172e-29, 1.0886e-28, 1.9834e-28, 3.8810e-28, 2.2784e-28,\n 3.0018e-28, 8.6825e-30, 3.8899e-28, 4.1827e-28, 1.1172e-27, 1.1342e-27,\n 2.8205e-29, 2.0762e-28, 2.9615e-29, 1.9797e-28, 4.2373e-30, 5.4610e-29,\n 6.5540e-29, 3.4971e-28, 1.2033e-29, 1.7736e-29, 7.7958e-29, 8.7125e-28,\n 6.2201e-29, 1.7866e-28, 4.7533e-29, 5.9537e-28, 1.2283e-27, 1.2788e-31,\n 1.1691e-27, 8.2280e-28, 1.1259e-27, 5.2989e-30, 1.8531e-28, 1.2699e-27,\n 5.5944e-29, 2.5059e-29, 1.6120e-09, 2.4348e-09, 2.5617e-11, 2.0460e-08,\n 5.5667e-09, 1.3965e-09, 3.5202e-09, 9.9180e-09, 1.9740e-10, 2.4656e-09,\n 1.6308e-08, 3.8698e-11, 1.1467e-09, 4.1553e-09, 2.0624e-08, 4.6712e-10,\n 6.1055e-09, 2.0514e-08, 1.4094e-09, 2.3600e-09, 6.5698e-09, 1.1621e-11,\n 8.6402e-09, 2.6395e-08, 1.6637e-09, 2.8768e-08, 6.0863e-10, 4.6013e-09,\n 3.7828e-09, 1.7331e-08, 3.6999e-09, 4.0136e-11, 6.5259e-10, 4.2477e-11,\n 7.9472e-09, 1.1020e-08, 1.1040e-08, 1.6954e-09, 3.2290e-10, 1.0795e-08,\n 2.0472e-08, 1.0283e-08, 1.2170e-08, 9.4965e-09, 4.1251e-09, 2.7038e-11,\n 7.3920e-09, 2.1051e-09, 1.0091e-08, 7.1909e-09, 4.0247e-08, 7.8275e-09,\n 1.3339e-09, 6.2006e-11, 3.4445e-09, 3.4355e-09, 2.2059e-08, 1.8825e-09,\n 5.2763e-09, 5.3310e-09, 5.4667e-09, 2.0983e-09, 2.0185e-09, 1.4827e-09,\n 5.7328e-10, 4.6185e-11, 1.1992e-08, 4.4720e-09, 6.0812e-09, 7.8874e-09,\n 5.7023e-10, 1.8498e-10, 1.1265e-10, 2.6753e-08, 8.6604e-10, 3.6534e-08,\n 6.9988e-09, 2.0286e-08, 9.3079e-09, 9.4187e-10, 5.8983e-09, 6.8861e-08,\n 2.4093e-09, 1.5223e-08, 5.5303e-10, 2.0691e-08, 7.4763e-09, 4.7384e-10,\n 1.3273e-10, 8.2813e-09, 1.2046e-08, 5.6184e-09, 4.0663e-11, 1.1534e-08,\n 2.9535e-08, 1.4556e-09, 6.2198e-10, 1.2071e-08, 1.1365e-09, 1.1425e-09,\n 1.5295e-09, 7.1885e-09, 1.9246e-10, 9.4355e-10, 2.5494e-09, 2.5456e-08,\n 1.5405e-08, 2.1931e-09, 2.3337e-10, 2.7351e-09, 1.2331e-09, 4.1134e-09,\n 7.6864e-09, 4.2585e-10, 5.9019e-09, 8.2139e-09, 2.1722e-09, 1.0261e-09,\n 1.4500e-08, 2.8423e-10, 1.0118e-09, 5.5619e-08, 1.0175e-08, 3.5135e-08,\n 1.2677e-08, 4.2044e-09, 3.1261e-10, 5.1420e-10, 6.3530e-09, 7.1219e-09,\n 3.9737e-09, 5.6181e-09, 1.4062e-09, 2.3791e-11, 6.0801e-09, 2.2742e-09,\n 3.2656e-09, 1.5059e-08, 1.6993e-09, 3.3221e-10, 2.6880e-12, 1.2453e-08,\n 5.3903e-09, 1.2563e-10, 3.7026e-09, 2.9930e-09, 7.8094e-10, 2.6632e-08,\n 1.1373e-09, 2.0551e-11, 3.5164e-09, 3.9778e-08, 2.0658e-09, 1.6870e-09,\n 1.5775e-08, 4.7316e-10, 1.7929e-10, 2.9810e-09, 2.5934e-12, 5.0549e-09,\n 1.2724e-08, 1.7959e-08, 5.1922e-10, 3.3954e-09, 4.7322e-09, 8.5126e-09,\n 4.0402e-09, 1.9611e-09, 3.2594e-10, 2.1292e-08, 1.3129e-09, 2.7470e-08,\n 1.7378e-08, 1.3112e-09, 4.4264e-09, 1.1496e-09, 7.0958e-11, 1.4564e-09,\n 1.1877e-08, 4.5812e-11, 8.2608e-09, 2.3605e-08, 6.2342e-11, 5.6147e-09,\n 1.5598e-08, 1.4675e-08, 3.9673e-09, 2.2928e-09, 1.7910e-10, 1.5095e-08,\n 2.7520e-10, 5.1108e-09, 2.3165e-08, 2.3977e-08, 4.1736e-09, 7.0298e-09,\n 1.2541e-08, 3.0315e-08, 2.9274e-10, 1.3447e-08, 1.6709e-09, 8.4208e-10,\n 3.1221e-08, 2.1418e-09, 9.8947e-10, 3.8196e-09, 1.2382e-09, 2.2464e-09,\n 3.4908e-09, 1.1444e-09, 6.5920e-09, 4.9450e-09, 1.5869e-08, 1.2304e-09,\n 1.8355e-10, 4.9941e-08, 1.5721e-09, 2.9570e-08, 5.9993e-11, 3.2329e-09,\n 2.5298e-09, 1.1238e-08, 1.2830e-08, 7.5747e-09, 1.6399e-08, 5.8104e-10,\n 1.0398e-08, 1.3779e-09, 6.4080e-08, 4.1483e-10, 1.5133e-09, 6.2994e-09,\n 6.4373e-09, 3.7616e-09, 1.3149e-08, 4.7580e-09, 1.3343e-09, 1.1841e-09,\n 8.0319e-09, 4.8120e-11, 1.1699e-08, 4.4156e-10, 1.0854e-12, 2.1900e-09,\n 1.1139e-08, 1.4708e-10, 2.9436e-09, 2.8689e-10, 3.8750e-10, 2.6012e-09,\n 9.3118e-09, 6.7255e-09, 5.0611e-09, 2.1967e-08, 6.6385e-09, 1.9369e-09],\n device='cuda:0')" + "exp_avg_sq": "tensor([7.8881e-14, 6.3152e-14, 1.8977e-13, 2.3054e-14, 9.4219e-14, 1.5038e-13,\n 4.2869e-14, 1.8302e-13, 4.4785e-13, 1.5137e-12, 5.4721e-14, 2.2723e-14,\n 6.3096e-13, 1.7238e-13, 1.1678e-12, 6.8947e-16, 1.1747e-13, 2.6376e-14,\n 3.0992e-13, 8.5476e-13, 7.3031e-14, 1.9072e-16, 3.2698e-15, 1.8174e-14,\n 1.1050e-13, 7.6735e-14, 1.6513e-13, 2.3113e-13, 2.1382e-14, 1.7573e-13,\n 4.8293e-13, 5.5988e-14, 3.3364e-13, 5.2803e-13, 1.1853e-12, 7.2859e-13,\n 1.8720e-13, 1.9919e-13, 3.6595e-13, 8.2147e-14, 1.4604e-14, 2.5379e-13,\n 2.6537e-13, 1.2814e-15, 1.6022e-12, 7.6221e-13, 7.2534e-15, 2.2835e-15,\n 8.6447e-14, 2.7267e-13, 2.9742e-13, 1.8446e-13, 2.1107e-13, 9.5885e-14,\n 8.7379e-13, 3.2639e-13, 4.8503e-14, 9.0398e-13, 6.9266e-15, 1.7224e-13,\n 2.5514e-13, 6.4299e-14, 5.3708e-15, 1.6619e-12, 4.1839e-14, 9.9817e-13,\n 6.1899e-15, 1.0451e-12, 1.9340e-13, 1.9650e-12, 2.3198e-12, 5.6223e-14,\n 5.5957e-13, 1.5452e-12, 1.1014e-12, 1.0241e-13, 2.7244e-12, 1.0432e-12,\n 3.8233e-12, 1.0297e-14, 1.0914e-13, 3.2736e-13, 1.2960e-13, 4.3734e-13,\n 1.3825e-12, 5.0885e-14, 1.0296e-13, 3.3356e-13, 3.9776e-12, 9.5502e-14,\n 7.5428e-13, 5.3684e-15, 3.1300e-12, 3.0092e-12, 5.1618e-13, 4.3518e-13,\n 3.4397e-15, 5.5677e-13, 5.7189e-13, 4.3511e-13, 1.2684e-12, 3.0959e-15,\n 1.6421e-14, 6.4785e-15, 3.3432e-15, 5.2926e-14, 1.3468e-14, 7.7023e-14,\n 3.1301e-13, 9.9282e-13, 1.5244e-13, 7.9610e-14, 4.5644e-14, 2.5114e-13,\n 6.9103e-14, 9.6152e-13, 3.5900e-13, 7.0068e-13, 1.0938e-12, 3.6962e-13,\n 1.4837e-14, 3.5075e-15, 6.9838e-13, 9.4324e-14, 6.4651e-14, 3.7231e-14,\n 1.2123e-12, 1.9558e-13, 3.0082e-14, 1.1104e-12, 4.7382e-14, 1.3405e-13,\n 1.8978e-13, 7.3520e-13, 1.3907e-12, 1.2823e-14, 5.3291e-14, 1.0583e-13,\n 7.5930e-14, 1.9925e-15, 3.1427e-13, 8.7406e-14, 2.5568e-14, 1.1490e-15,\n 2.5335e-13, 4.0270e-13, 6.0054e-13, 2.6327e-13, 4.6681e-14, 1.9170e-13,\n 4.4947e-15, 6.3730e-13, 6.6222e-15, 1.9210e-14, 9.5676e-13, 4.7198e-13,\n 1.5384e-15, 9.4925e-16, 6.3416e-14, 4.2448e-13, 1.8915e-12, 1.1733e-13,\n 1.1364e-13, 6.0120e-14, 1.6646e-13, 1.9633e-13, 1.6038e-13, 3.1349e-13,\n 3.6340e-15, 3.9254e-14, 4.4693e-13, 9.0008e-14, 1.1346e-13, 6.6382e-13,\n 4.1716e-16, 4.7976e-13, 6.2869e-13, 5.7207e-13, 4.8554e-15, 3.1855e-14,\n 3.6443e-14, 2.3160e-13, 5.9311e-16, 1.5005e-14, 1.0812e-13, 1.2628e-12,\n 1.2067e-13, 1.9709e-14, 5.3055e-13, 6.0151e-14, 5.1844e-14, 4.3504e-13,\n 3.0636e-13, 3.6488e-13, 1.9835e-13, 5.6412e-13, 6.8088e-16, 4.1053e-14,\n 3.0615e-13, 3.4820e-13, 5.7890e-15, 8.9699e-13, 4.2852e-14, 5.1847e-13,\n 2.8156e-14, 2.2078e-14, 8.6286e-15, 1.6982e-13, 1.0927e-12, 3.5052e-14,\n 1.7854e-13, 2.8843e-13, 6.9098e-13, 7.3444e-14, 3.8175e-15, 8.2270e-14,\n 4.7991e-14, 4.6594e-14, 2.3304e-14, 5.2066e-14, 1.3280e-14, 3.6116e-12,\n 9.5892e-13, 5.9606e-13, 1.8000e-15, 1.9781e-14, 1.9741e-13, 6.3775e-13,\n 1.8912e-12, 6.0356e-13, 2.1282e-14, 1.1591e-13, 6.7058e-13, 5.8677e-15,\n 3.8415e-13, 2.9286e-13, 7.9222e-13, 1.7309e-13, 1.2496e-13, 2.5783e-14,\n 5.1645e-14, 2.2981e-14, 6.1061e-13, 1.5739e-13, 1.5153e-13, 1.0705e-12,\n 1.8871e-13, 1.8069e-15, 4.1283e-13, 4.1875e-13, 1.7457e-12, 3.5813e-13,\n 5.1397e-13, 2.8108e-13, 4.4070e-16, 2.2262e-13, 1.2059e-27, 2.9533e-29,\n 1.8786e-28, 3.4172e-29, 1.9300e-28, 5.7458e-29, 3.1024e-28, 7.4796e-30,\n 1.6943e-28, 2.6350e-28, 2.4658e-28, 6.3341e-30, 1.6009e-31, 1.3458e-29,\n 7.5691e-29, 5.6567e-30, 1.3143e-28, 1.0067e-29, 1.1986e-28, 2.8843e-29,\n 1.8533e-30, 9.2954e-29, 5.2572e-30, 1.0832e-28, 1.5336e-30, 9.2693e-29,\n 2.0479e-28, 8.8563e-30, 5.3455e-28, 1.8232e-28, 2.9657e-29, 4.2721e-29,\n 5.4718e-29, 3.2744e-28, 3.0566e-29, 2.5194e-29, 1.0241e-29, 5.5503e-29,\n 1.4950e-28, 2.9559e-29, 5.2424e-30, 6.0785e-29, 1.2225e-29, 2.7036e-29,\n 2.5552e-29, 2.3045e-28, 9.4324e-29, 3.1600e-29, 3.3879e-29, 2.1037e-29,\n 4.2117e-29, 1.3955e-28, 1.4076e-28, 1.7641e-28, 7.1848e-29, 1.6523e-29,\n 1.4690e-28, 2.5285e-28, 3.4655e-28, 1.5126e-28, 1.2217e-28, 8.4675e-29,\n 5.8254e-30, 5.2432e-29, 3.9511e-31, 2.0464e-28, 5.4184e-28, 1.6891e-28,\n 4.5634e-30, 2.7119e-29, 1.6489e-28, 5.5138e-29, 8.9425e-29, 5.3785e-30,\n 4.6064e-29, 2.4913e-29, 8.7643e-29, 2.4689e-28, 8.5054e-28, 3.1158e-28,\n 3.3752e-29, 2.5570e-28, 3.0974e-29, 4.1328e-28, 5.0564e-29, 4.6668e-29,\n 4.1546e-28, 1.4445e-28, 7.4850e-29, 9.5776e-30, 1.6729e-29, 9.1067e-29,\n 9.5516e-29, 2.0099e-29, 7.7656e-29, 5.5221e-30, 7.3314e-30, 2.4000e-28,\n 1.2608e-29, 7.2246e-29, 1.0817e-28, 7.5457e-30, 6.5915e-29, 6.2286e-29,\n 2.5000e-29, 3.4303e-29, 2.1408e-29, 3.5735e-28, 1.4515e-28, 2.5846e-29,\n 1.9768e-29, 1.0513e-28, 1.1384e-28, 2.1380e-28, 9.0044e-29, 4.2135e-28,\n 1.1571e-28, 1.5676e-28, 3.6452e-28, 4.4757e-28, 2.7082e-28, 1.4265e-28,\n 1.6829e-28, 8.2413e-29, 1.8870e-29, 1.6904e-29, 9.3875e-28, 1.1028e-30,\n 1.7832e-28, 1.0616e-29, 4.1707e-29, 5.0833e-29, 1.3087e-28, 4.2289e-29,\n 1.1091e-29, 1.2456e-32, 2.8480e-30, 1.6753e-29, 3.7604e-29, 9.7723e-29,\n 9.2936e-30, 2.2078e-28, 8.4587e-28, 9.5487e-29, 2.7320e-28, 3.6403e-29,\n 1.4937e-28, 4.1200e-28, 1.8984e-28, 1.2722e-29, 1.3700e-29, 5.5483e-28,\n 9.5530e-29, 1.0740e-28, 4.9810e-30, 6.8336e-29, 2.3657e-28, 7.7465e-29,\n 4.0592e-31, 1.0684e-28, 3.1446e-29, 3.0608e-29, 1.7375e-28, 1.6061e-29,\n 1.1227e-29, 9.7449e-29, 6.4443e-29, 4.1266e-30, 3.7941e-29, 4.4174e-28,\n 2.6089e-29, 9.0657e-28, 4.6126e-28, 6.4994e-30, 1.5028e-28, 4.7655e-28,\n 4.7156e-28, 1.7825e-29, 2.2368e-29, 3.9071e-29, 2.7428e-29, 1.1799e-28,\n 2.3370e-30, 1.1898e-28, 6.0865e-29, 1.5310e-29, 5.5554e-29, 1.0276e-30,\n 1.9238e-28, 1.1782e-30, 2.3662e-28, 1.3208e-28, 1.4571e-28, 3.3100e-28,\n 3.0172e-28, 1.0937e-29, 6.1652e-28, 5.5999e-29, 1.4270e-28, 3.0816e-29,\n 9.1167e-31, 5.1665e-29, 7.1095e-29, 1.1859e-28, 1.3693e-29, 4.2502e-29,\n 2.7936e-30, 6.4468e-29, 9.6965e-29, 4.2475e-30, 1.9012e-28, 3.6103e-30,\n 1.0901e-28, 4.0638e-29, 2.0797e-28, 9.5189e-29, 5.4217e-30, 4.3073e-29,\n 1.0285e-28, 2.8053e-29, 3.1106e-29, 5.6677e-29, 1.1090e-28, 6.5107e-29,\n 8.5778e-29, 2.4811e-30, 1.1116e-28, 1.1952e-28, 3.1926e-28, 3.2411e-28,\n 8.0598e-30, 5.9329e-29, 8.4628e-30, 5.6572e-29, 1.2108e-30, 1.5605e-29,\n 1.8729e-29, 9.9933e-29, 3.4385e-30, 5.0681e-30, 2.2277e-29, 2.4897e-28,\n 1.7774e-29, 5.1055e-29, 1.3583e-29, 1.7013e-28, 3.5099e-28, 3.6544e-32,\n 3.3407e-28, 2.3512e-28, 3.2173e-28, 1.5142e-30, 5.2955e-29, 3.6289e-28,\n 1.5986e-29, 7.1608e-30, 4.6065e-10, 6.9577e-10, 7.3203e-12, 5.8465e-09,\n 1.5907e-09, 3.9905e-10, 1.0059e-09, 2.8342e-09, 5.6410e-11, 7.0456e-10,\n 4.6601e-09, 1.1058e-11, 3.2767e-10, 1.1874e-09, 5.8934e-09, 1.3348e-10,\n 1.7447e-09, 5.8619e-09, 4.0275e-10, 6.7440e-10, 1.8774e-09, 3.3208e-12,\n 2.4690e-09, 7.5427e-09, 4.7542e-10, 8.2207e-09, 1.7392e-10, 1.3149e-09,\n 1.0810e-09, 4.9524e-09, 1.0573e-09, 1.1469e-11, 1.8648e-10, 1.2138e-11,\n 2.2710e-09, 3.1490e-09, 3.1549e-09, 4.8447e-10, 9.2270e-11, 3.0847e-09,\n 5.8501e-09, 2.9384e-09, 3.4777e-09, 2.7137e-09, 1.1788e-09, 7.7262e-12,\n 2.1123e-09, 6.0154e-10, 2.8835e-09, 2.0549e-09, 1.1501e-08, 2.2368e-09,\n 3.8118e-10, 1.7719e-11, 9.8428e-10, 9.8174e-10, 6.3035e-09, 5.3795e-10,\n 1.5077e-09, 1.5234e-09, 1.5621e-09, 5.9962e-10, 5.7680e-10, 4.2371e-10,\n 1.6382e-10, 1.3198e-11, 3.4268e-09, 1.2779e-09, 1.7378e-09, 2.2539e-09,\n 1.6295e-10, 5.2861e-11, 3.2192e-11, 7.6448e-09, 2.4748e-10, 1.0440e-08,\n 2.0000e-09, 5.7968e-09, 2.6598e-09, 2.6915e-10, 1.6855e-09, 1.9677e-08,\n 6.8846e-10, 4.3500e-09, 1.5803e-10, 5.9127e-09, 2.1364e-09, 1.3540e-10,\n 3.7929e-11, 2.3665e-09, 3.4421e-09, 1.6055e-09, 1.1620e-11, 3.2958e-09,\n 8.4400e-09, 4.1595e-10, 1.7773e-10, 3.4495e-09, 3.2476e-10, 3.2649e-10,\n 4.3707e-10, 2.0542e-09, 5.4998e-11, 2.6963e-10, 7.2852e-10, 7.2741e-09,\n 4.4020e-09, 6.2670e-10, 6.6686e-11, 7.8158e-10, 3.5236e-10, 1.1754e-09,\n 2.1964e-09, 1.2169e-10, 1.6865e-09, 2.3472e-09, 6.2072e-10, 2.9321e-10,\n 4.1434e-09, 8.1221e-11, 2.8912e-10, 1.5894e-08, 2.9077e-09, 1.0040e-08,\n 3.6226e-09, 1.2015e-09, 8.9331e-11, 1.4694e-10, 1.8154e-09, 2.0351e-09,\n 1.1355e-09, 1.6054e-09, 4.0184e-10, 6.7986e-12, 1.7374e-09, 6.4987e-10,\n 9.3318e-10, 4.3033e-09, 4.8560e-10, 9.4932e-11, 7.6813e-13, 3.5586e-09,\n 1.5403e-09, 3.5899e-11, 1.0581e-09, 8.5527e-10, 2.2316e-10, 7.6102e-09,\n 3.2499e-10, 5.8725e-12, 1.0048e-09, 1.1367e-08, 5.9032e-10, 4.8207e-10,\n 4.5078e-09, 1.3521e-10, 5.1234e-11, 8.5184e-10, 7.4108e-13, 1.4445e-09,\n 3.6361e-09, 5.1320e-09, 1.4837e-10, 9.7027e-10, 1.3523e-09, 2.4325e-09,\n 1.1545e-09, 5.6041e-10, 9.3139e-11, 6.0844e-09, 3.7516e-10, 7.8498e-09,\n 4.9659e-09, 3.7467e-10, 1.2649e-09, 3.2852e-10, 2.0277e-11, 4.1618e-10,\n 3.3939e-09, 1.3091e-11, 2.3606e-09, 6.7454e-09, 1.7815e-11, 1.6044e-09,\n 4.4573e-09, 4.1934e-09, 1.1337e-09, 6.5518e-10, 5.1178e-11, 4.3137e-09,\n 7.8639e-11, 1.4605e-09, 6.6196e-09, 6.8517e-09, 1.1926e-09, 2.0088e-09,\n 3.5838e-09, 8.6628e-09, 8.3651e-11, 3.8425e-09, 4.7748e-10, 2.4063e-10,\n 8.9216e-09, 6.1204e-10, 2.8275e-10, 1.0915e-09, 3.5383e-10, 6.4193e-10,\n 9.9752e-10, 3.2701e-10, 1.8837e-09, 1.4131e-09, 4.5348e-09, 3.5161e-10,\n 5.2451e-11, 1.4271e-08, 4.4924e-10, 8.4499e-09, 1.7143e-11, 9.2384e-10,\n 7.2292e-10, 3.2112e-09, 3.6661e-09, 2.1645e-09, 4.6861e-09, 1.6604e-10,\n 2.9713e-09, 3.9374e-10, 1.8311e-08, 1.1854e-10, 4.3243e-10, 1.8001e-09,\n 1.8395e-09, 1.0749e-09, 3.7574e-09, 1.3596e-09, 3.8129e-10, 3.3836e-10,\n 2.2952e-09, 1.3751e-11, 3.3432e-09, 1.2618e-10, 3.1017e-13, 6.2582e-10,\n 3.1831e-09, 4.2028e-11, 8.4115e-10, 8.1981e-11, 1.1073e-10, 7.4330e-10,\n 2.6609e-09, 1.9219e-09, 1.4463e-09, 6.2773e-09, 1.8970e-09, 5.5348e-10],\n device='cuda:0')" }, "54": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [-5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, -5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.4449e-10, 1.4567e-11, 1.0145e-11, ..., 3.3655e-11, 1.7864e-10,\n 2.3299e-10],\n [2.3431e-10, 1.0732e-11, 7.6765e-12, ..., 2.8423e-11, 1.0151e-10,\n 1.3292e-10],\n [4.2862e-09, 9.7182e-11, 8.4980e-11, ..., 1.4064e-10, 1.7136e-09,\n 2.1522e-09],\n ...,\n [1.3482e-09, 3.3756e-11, 2.4500e-11, ..., 6.2027e-11, 5.3906e-10,\n 7.1573e-10],\n [3.3440e-11, 2.5326e-12, 7.8369e-13, ..., 1.2764e-11, 1.3150e-11,\n 1.2276e-11],\n [7.5585e-11, 6.1229e-12, 4.7672e-12, ..., 2.0521e-11, 3.7178e-11,\n 3.9104e-11]], device='cuda:0')" + "exp_avg_sq": "tensor([[1.2702e-10, 4.1625e-12, 2.8990e-12, ..., 9.6173e-12, 5.1048e-11,\n 6.6579e-11],\n [6.6957e-11, 3.0669e-12, 2.1936e-12, ..., 8.1221e-12, 2.9007e-11,\n 3.7984e-11],\n [1.2248e-09, 2.7770e-11, 2.4284e-11, ..., 4.0189e-11, 4.8968e-10,\n 6.1500e-10],\n ...,\n [3.8525e-10, 9.6462e-12, 7.0010e-12, ..., 1.7725e-11, 1.5404e-10,\n 2.0452e-10],\n [9.5558e-12, 7.2371e-13, 2.2394e-13, ..., 3.6473e-12, 3.7576e-12,\n 3.5078e-12],\n [2.1599e-11, 1.7497e-12, 1.3623e-12, ..., 5.8641e-12, 1.0624e-11,\n 1.1174e-11]], device='cuda:0')" }, "55": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n -5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, -5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45,\n -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45, -5.6052e-45,\n -5.6052e-45, 5.6052e-45, -5.6052e-45, -5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -5.6052e-45, 5.6052e-45,\n -5.6052e-45], device='cuda:0')", - "exp_avg_sq": "tensor([7.9556e-09, 4.2619e-09, 7.5721e-08, 4.0784e-08, 3.0570e-08, 4.1731e-11,\n 1.0163e-08, 1.0549e-08, 1.8225e-08, 1.8189e-09, 3.3632e-08, 4.4264e-10,\n 2.6659e-08, 1.5620e-11, 2.6235e-08, 6.4056e-11, 1.4066e-09, 1.2429e-08,\n 1.1631e-08, 1.9925e-08, 2.8534e-08, 4.3610e-09, 3.3368e-08, 1.5048e-08,\n 8.4037e-08, 2.2701e-08, 1.8087e-08, 4.2181e-09, 1.6866e-08, 4.0632e-08,\n 2.1278e-08, 7.4197e-08, 8.2583e-09, 9.5592e-10, 3.4269e-09, 3.6774e-09,\n 9.4327e-09, 3.1243e-08, 1.9358e-08, 3.9838e-09, 1.0789e-08, 3.0251e-08,\n 3.1813e-10, 1.1743e-08, 2.0922e-08, 7.3644e-09, 7.7141e-09, 2.4333e-08,\n 9.0921e-08, 4.5341e-09, 3.4530e-08, 1.7625e-08, 8.6016e-09, 7.2975e-09,\n 1.8962e-10, 6.6686e-08, 1.2592e-09, 2.8005e-08, 6.8178e-09, 6.5749e-08,\n 3.7345e-08, 2.5458e-08, 8.0493e-10, 1.3562e-08, 5.8276e-09, 6.4163e-09,\n 7.0556e-09, 6.6849e-08, 6.6923e-10, 3.1718e-08, 2.3986e-10, 2.5692e-08,\n 4.2521e-10, 2.6331e-08, 1.6026e-10, 7.0345e-09, 5.8645e-08, 3.3548e-09,\n 2.4800e-08, 4.7106e-08, 2.1804e-08, 1.5416e-08, 1.7769e-08, 2.4509e-08,\n 2.2198e-08, 3.1304e-08, 1.0746e-08, 1.5085e-08, 1.8402e-08, 1.0100e-09,\n 5.2703e-09, 6.6204e-09, 1.1708e-07, 3.1929e-09, 1.5722e-08, 1.6434e-11,\n 8.4293e-10, 2.6781e-10, 5.9254e-11, 1.8220e-09, 1.1704e-07, 8.3296e-10,\n 1.1084e-08, 9.7290e-08, 7.6608e-09, 2.0306e-09, 3.1896e-09, 8.1952e-10,\n 2.2405e-08, 5.3281e-08, 3.0205e-10, 1.5663e-08, 6.2397e-09, 6.1033e-09,\n 1.9907e-08, 4.6983e-09, 1.1655e-09, 6.5894e-10, 1.5167e-08, 1.1824e-08,\n 2.3270e-10, 1.6626e-08, 1.1761e-09, 1.1856e-07, 4.8636e-08, 1.2207e-07,\n 4.5479e-08, 2.8599e-09, 2.5288e-09, 7.0768e-09, 5.4946e-08, 9.0020e-10,\n 5.6496e-09, 2.5355e-08, 8.8796e-11, 3.5600e-10, 1.5780e-08, 3.7136e-08,\n 1.8173e-08, 4.3808e-09, 8.1532e-09, 2.1408e-08, 1.1140e-08, 1.0328e-08,\n 1.1204e-08, 2.4795e-08, 9.2479e-09, 1.4120e-08, 9.9956e-09, 6.4786e-08,\n 3.6056e-11, 7.4238e-09, 9.0734e-10, 7.5011e-10, 1.5661e-07, 6.8178e-08,\n 3.3968e-09, 6.3683e-08, 1.4459e-08, 8.8856e-08, 1.7926e-09, 1.8360e-09,\n 1.1793e-08, 5.0089e-11, 8.6560e-08, 1.4520e-08, 1.9109e-08, 3.0573e-09,\n 1.3662e-08, 6.2676e-08, 8.5839e-10, 1.4841e-08, 2.0192e-08, 5.5474e-09,\n 6.1552e-08, 1.5093e-08, 1.2032e-08, 4.2356e-10, 1.0816e-08, 3.2582e-09,\n 2.8737e-08, 1.3319e-08, 2.0108e-09, 4.1040e-09, 3.9721e-10, 7.9475e-09,\n 3.4338e-09, 5.0728e-08, 1.2543e-09, 4.8086e-09, 4.1553e-09, 9.1738e-10,\n 3.0023e-09, 3.2592e-08, 1.1815e-08, 1.0368e-08, 3.1615e-08, 1.7243e-08,\n 1.8556e-08, 8.6293e-09, 9.2299e-09, 3.8829e-08, 2.3718e-08, 6.4023e-10,\n 4.0826e-08, 1.3885e-09, 3.1443e-08, 4.1596e-08, 4.2811e-08, 4.1163e-08,\n 1.1793e-08, 8.9255e-09, 3.8111e-08, 1.5290e-09, 3.4875e-08, 1.6520e-08,\n 2.2241e-11, 2.4041e-08, 5.7346e-11, 1.7945e-08, 1.0344e-08, 2.5831e-07,\n 3.0204e-08, 9.2297e-08, 8.4181e-10, 4.1736e-09, 1.9470e-08, 1.4933e-09,\n 5.3775e-09, 2.9981e-10, 9.2575e-09, 2.3653e-08, 2.4836e-08, 3.7263e-09,\n 8.8294e-08, 1.1564e-08, 5.1081e-09, 5.5753e-10, 5.0202e-09, 6.5041e-11,\n 2.7798e-09, 2.1013e-08, 1.5392e-09, 1.4067e-09, 1.0118e-08, 6.3409e-08,\n 1.0590e-08, 3.2971e-08, 1.6140e-08, 7.8703e-09, 6.6517e-08, 1.5465e-09,\n 5.2210e-08, 2.4058e-08, 4.5388e-10, 1.3245e-09], device='cuda:0')" + "exp_avg_sq": "tensor([2.2734e-09, 1.2179e-09, 2.1638e-08, 1.1654e-08, 8.7356e-09, 1.1925e-11,\n 2.9041e-09, 3.0143e-09, 5.2079e-09, 5.1978e-10, 9.6106e-09, 1.2649e-10,\n 7.6180e-09, 4.4636e-12, 7.4967e-09, 1.8304e-11, 4.0195e-10, 3.5517e-09,\n 3.3235e-09, 5.6938e-09, 8.1539e-09, 1.2462e-09, 9.5351e-09, 4.3001e-09,\n 2.4014e-08, 6.4869e-09, 5.1684e-09, 1.2054e-09, 4.8196e-09, 1.1611e-08,\n 6.0805e-09, 2.1202e-08, 2.3599e-09, 2.7316e-10, 9.7926e-10, 1.0508e-09,\n 2.6955e-09, 8.9280e-09, 5.5316e-09, 1.1384e-09, 3.0832e-09, 8.6444e-09,\n 9.0908e-11, 3.3556e-09, 5.9787e-09, 2.1044e-09, 2.2044e-09, 6.9534e-09,\n 2.5981e-08, 1.2957e-09, 9.8671e-09, 5.0366e-09, 2.4580e-09, 2.0853e-09,\n 5.4186e-11, 1.9056e-08, 3.5981e-10, 8.0027e-09, 1.9482e-09, 1.8788e-08,\n 1.0672e-08, 7.2747e-09, 2.3001e-10, 3.8754e-09, 1.6653e-09, 1.8335e-09,\n 2.0162e-09, 1.9103e-08, 1.9124e-10, 9.0638e-09, 6.8541e-11, 7.3417e-09,\n 1.2151e-10, 7.5243e-09, 4.5795e-11, 2.0102e-09, 1.6758e-08, 9.5867e-10,\n 7.0868e-09, 1.3461e-08, 6.2308e-09, 4.4051e-09, 5.0777e-09, 7.0036e-09,\n 6.3432e-09, 8.9455e-09, 3.0707e-09, 4.3107e-09, 5.2586e-09, 2.8862e-10,\n 1.5060e-09, 1.8918e-09, 3.3457e-08, 9.1239e-10, 4.4927e-09, 4.6961e-12,\n 2.4087e-10, 7.6529e-11, 1.6932e-11, 5.2065e-10, 3.3444e-08, 2.3802e-10,\n 3.1675e-09, 2.7801e-08, 2.1891e-09, 5.8025e-10, 9.1144e-10, 2.3418e-10,\n 6.4023e-09, 1.5225e-08, 8.6314e-11, 4.4757e-09, 1.7830e-09, 1.7441e-09,\n 5.6885e-09, 1.3426e-09, 3.3304e-10, 1.8830e-10, 4.3342e-09, 3.3787e-09,\n 6.6496e-11, 4.7511e-09, 3.3609e-10, 3.3881e-08, 1.3898e-08, 3.4882e-08,\n 1.2996e-08, 8.1725e-10, 7.2263e-10, 2.0222e-09, 1.5701e-08, 2.5724e-10,\n 1.6144e-09, 7.2453e-09, 2.5374e-11, 1.0173e-10, 4.5093e-09, 1.0612e-08,\n 5.1931e-09, 1.2518e-09, 2.3298e-09, 6.1176e-09, 3.1834e-09, 2.9513e-09,\n 3.2016e-09, 7.0853e-09, 2.6427e-09, 4.0349e-09, 2.8563e-09, 1.8513e-08,\n 1.0303e-11, 2.1214e-09, 2.5928e-10, 2.1435e-10, 4.4752e-08, 1.9482e-08,\n 9.7065e-10, 1.8198e-08, 4.1317e-09, 2.5391e-08, 5.1226e-10, 5.2466e-10,\n 3.3700e-09, 1.4313e-11, 2.4735e-08, 4.1492e-09, 5.4605e-09, 8.7364e-10,\n 3.9040e-09, 1.7910e-08, 2.4529e-10, 4.2409e-09, 5.7701e-09, 1.5852e-09,\n 1.7589e-08, 4.3128e-09, 3.4382e-09, 1.2104e-10, 3.0907e-09, 9.3106e-10,\n 8.2117e-09, 3.8059e-09, 5.7461e-10, 1.1728e-09, 1.1351e-10, 2.2711e-09,\n 9.8124e-10, 1.4496e-08, 3.5843e-10, 1.3741e-09, 1.1874e-09, 2.6215e-10,\n 8.5794e-10, 9.3135e-09, 3.3762e-09, 2.9627e-09, 9.0343e-09, 4.9274e-09,\n 5.3025e-09, 2.4659e-09, 2.6375e-09, 1.1096e-08, 6.7776e-09, 1.8295e-10,\n 1.1666e-08, 3.9677e-10, 8.9851e-09, 1.1886e-08, 1.2233e-08, 1.1763e-08,\n 3.3699e-09, 2.5505e-09, 1.0891e-08, 4.3692e-10, 9.9659e-09, 4.7206e-09,\n 6.3555e-12, 6.8700e-09, 1.6387e-11, 5.1280e-09, 2.9559e-09, 7.3815e-08,\n 8.6311e-09, 2.6375e-08, 2.4055e-10, 1.1926e-09, 5.5636e-09, 4.2671e-10,\n 1.5367e-09, 8.5672e-11, 2.6454e-09, 6.7589e-09, 7.0970e-09, 1.0648e-09,\n 2.5231e-08, 3.3046e-09, 1.4597e-09, 1.5932e-10, 1.4346e-09, 1.8586e-11,\n 7.9435e-10, 6.0046e-09, 4.3985e-10, 4.0197e-10, 2.8913e-09, 1.8120e-08,\n 3.0262e-09, 9.4218e-09, 4.6120e-09, 2.2490e-09, 1.9008e-08, 4.4192e-10,\n 1.4920e-08, 6.8747e-09, 1.2970e-10, 3.7848e-10], device='cuda:0')" }, "56": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.9513e-07, 4.2331e-08, 1.1712e-07, ..., 5.0718e-08, 4.3420e-09,\n 6.6620e-10],\n [1.7583e-09, 4.5823e-10, 1.0403e-09, ..., 4.1811e-10, 4.1416e-11,\n 8.2318e-12],\n [2.3948e-09, 5.0531e-10, 1.4289e-09, ..., 6.3309e-10, 5.3570e-11,\n 7.6798e-12],\n ...,\n [1.9578e-09, 4.1282e-10, 1.1818e-09, ..., 5.0315e-10, 4.6392e-11,\n 9.6424e-12],\n [2.4254e-09, 5.2375e-10, 1.4579e-09, ..., 6.3099e-10, 4.4970e-11,\n 4.5112e-12],\n [2.1523e-09, 4.5410e-10, 1.3461e-09, ..., 5.7615e-10, 5.1982e-11,\n 1.3173e-11]], device='cuda:0')" + "exp_avg_sq": "tensor([[5.5761e-08, 1.2097e-08, 3.3469e-08, ..., 1.4493e-08, 1.2408e-09,\n 1.9037e-10],\n [5.0245e-10, 1.3094e-10, 2.9727e-10, ..., 1.1948e-10, 1.1835e-11,\n 2.3523e-12],\n [6.8433e-10, 1.4440e-10, 4.0833e-10, ..., 1.8091e-10, 1.5308e-11,\n 2.1946e-12],\n ...,\n [5.5947e-10, 1.1797e-10, 3.3771e-10, ..., 1.4378e-10, 1.3257e-11,\n 2.7554e-12],\n [6.9308e-10, 1.4966e-10, 4.1660e-10, ..., 1.8031e-10, 1.2851e-11,\n 1.2891e-12],\n [6.1503e-10, 1.2976e-10, 3.8465e-10, ..., 1.6464e-10, 1.4854e-11,\n 3.7643e-12]], device='cuda:0')" }, "57": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([5.0299e-06, 4.5603e-08, 6.1594e-08, 6.3917e-08, 7.3281e-08, 8.4180e-08,\n 6.6927e-08, 4.9563e-08, 6.2602e-08, 5.5632e-08], device='cuda:0')" + "exp_avg_sq": "tensor([1.4373e-06, 1.3031e-08, 1.7601e-08, 1.8265e-08, 2.0941e-08, 2.4055e-08,\n 1.9125e-08, 1.4163e-08, 1.7889e-08, 1.5897e-08], device='cuda:0')" }, "58": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.9511e-07, 4.2324e-08, 1.1709e-07, ..., 5.0718e-08, 4.3322e-09,\n 6.5947e-10],\n [1.7580e-09, 4.5812e-10, 1.0398e-09, ..., 4.1810e-10, 4.1271e-11,\n 8.1335e-12],\n [2.3946e-09, 5.0522e-10, 1.4286e-09, ..., 6.3309e-10, 5.3460e-11,\n 7.6047e-12],\n ...,\n [1.9575e-09, 4.1269e-10, 1.1813e-09, ..., 5.0314e-10, 4.6230e-11,\n 9.5305e-12],\n [2.4253e-09, 5.2371e-10, 1.4578e-09, ..., 6.3099e-10, 4.4931e-11,\n 4.4845e-12],\n [2.1517e-09, 4.5387e-10, 1.3451e-09, ..., 5.7615e-10, 5.1692e-11,\n 1.2976e-11]], device='cuda:0')" + "exp_avg_sq": "tensor([[5.5755e-08, 1.2094e-08, 3.3459e-08, ..., 1.4493e-08, 1.2380e-09,\n 1.8845e-10],\n [5.0237e-10, 1.3091e-10, 2.9713e-10, ..., 1.1948e-10, 1.1793e-11,\n 2.3242e-12],\n [6.8427e-10, 1.4437e-10, 4.0823e-10, ..., 1.8091e-10, 1.5277e-11,\n 2.1731e-12],\n ...,\n [5.5938e-10, 1.1793e-10, 3.3755e-10, ..., 1.4378e-10, 1.3210e-11,\n 2.7234e-12],\n [6.9305e-10, 1.4966e-10, 4.1656e-10, ..., 1.8031e-10, 1.2839e-11,\n 1.2815e-12],\n [6.1486e-10, 1.2970e-10, 3.8437e-10, ..., 1.6464e-10, 1.4771e-11,\n 3.7080e-12]], device='cuda:0')" }, "59": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([5.0299e-06, 4.5603e-08, 6.1594e-08, 6.3916e-08, 7.3281e-08, 8.4180e-08,\n 6.6926e-08, 4.9562e-08, 6.2602e-08, 5.5631e-08], device='cuda:0')" + "exp_avg_sq": "tensor([1.4373e-06, 1.3031e-08, 1.7601e-08, 1.8265e-08, 2.0941e-08, 2.4055e-08,\n 1.9125e-08, 1.4163e-08, 1.7889e-08, 1.5897e-08], device='cuda:0')" }, "60": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([[-5.6052e-45, -5.6052e-45, -5.6052e-45, ..., -5.6052e-45,\n -5.6052e-45, -5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n ...,\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45],\n [ 5.6052e-45, 5.6052e-45, 5.6052e-45, ..., 5.6052e-45,\n 5.6052e-45, 5.6052e-45]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.9513e-07, 4.2331e-08, 1.1712e-07, ..., 5.0718e-08, 4.3420e-09,\n 6.6620e-10],\n [1.7583e-09, 4.5823e-10, 1.0403e-09, ..., 4.1811e-10, 4.1416e-11,\n 8.2318e-12],\n [2.3948e-09, 5.0531e-10, 1.4289e-09, ..., 6.3309e-10, 5.3570e-11,\n 7.6798e-12],\n ...,\n [1.9578e-09, 4.1282e-10, 1.1818e-09, ..., 5.0315e-10, 4.6392e-11,\n 9.6424e-12],\n [2.4254e-09, 5.2375e-10, 1.4579e-09, ..., 6.3099e-10, 4.4970e-11,\n 4.5112e-12],\n [2.1523e-09, 4.5410e-10, 1.3461e-09, ..., 5.7615e-10, 5.1982e-11,\n 1.3173e-11]], device='cuda:0')" + "exp_avg_sq": "tensor([[5.5761e-08, 1.2097e-08, 3.3469e-08, ..., 1.4493e-08, 1.2408e-09,\n 1.9037e-10],\n [5.0245e-10, 1.3094e-10, 2.9727e-10, ..., 1.1948e-10, 1.1835e-11,\n 2.3523e-12],\n [6.8433e-10, 1.4440e-10, 4.0833e-10, ..., 1.8091e-10, 1.5308e-11,\n 2.1946e-12],\n ...,\n [5.5947e-10, 1.1797e-10, 3.3771e-10, ..., 1.4378e-10, 1.3257e-11,\n 2.7554e-12],\n [6.9308e-10, 1.4966e-10, 4.1660e-10, ..., 1.8031e-10, 1.2851e-11,\n 1.2891e-12],\n [6.1503e-10, 1.2976e-10, 3.8465e-10, ..., 1.6464e-10, 1.4854e-11,\n 3.7643e-12]], device='cuda:0')" }, "61": { - "step": "tensor(3756.)", + "step": "tensor(5008.)", "exp_avg": "tensor([-5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45, 5.6052e-45],\n device='cuda:0')", - "exp_avg_sq": "tensor([5.0299e-06, 4.5603e-08, 6.1594e-08, 6.3917e-08, 7.3281e-08, 8.4180e-08,\n 6.6927e-08, 4.9563e-08, 6.2602e-08, 5.5632e-08], device='cuda:0')" + "exp_avg_sq": "tensor([1.4373e-06, 1.3031e-08, 1.7601e-08, 1.8265e-08, 2.0941e-08, 2.4055e-08,\n 1.9125e-08, 1.4163e-08, 1.7889e-08, 1.5897e-08], device='cuda:0')" }, "8": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[ 4.7413e-07, 1.4679e-07, 6.8218e-07, ..., -5.6910e-07,\n 0.0000e+00, 3.0936e-08],\n [ 2.3633e-07, 2.0342e-08, -4.8619e-07, ..., 3.4207e-07,\n 0.0000e+00, -4.1459e-06],\n [-5.3007e-07, 4.2938e-07, 1.0919e-06, ..., 5.5119e-07,\n 0.0000e+00, 5.1723e-07],\n ...,\n [-1.4529e-07, -6.7043e-08, -5.6330e-07, ..., 6.0936e-06,\n 0.0000e+00, -3.9499e-08],\n [ 9.2444e-09, 3.5904e-08, 3.9622e-06, ..., -5.7482e-06,\n 0.0000e+00, -1.9460e-06],\n [ 1.5768e-06, 1.6032e-06, -2.5874e-07, ..., 1.9872e-07,\n 0.0000e+00, 1.0289e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[1.2762e-11, 3.3459e-12, 4.8222e-11, ..., 7.9286e-11, 0.0000e+00,\n 7.0599e-12],\n [3.8611e-11, 1.6312e-12, 1.2670e-10, ..., 4.5336e-11, 0.0000e+00,\n 7.4880e-11],\n [7.5478e-12, 1.2109e-11, 8.8250e-11, ..., 1.8030e-11, 0.0000e+00,\n 2.1671e-11],\n ...,\n [2.6999e-11, 8.6625e-12, 4.2868e-11, ..., 9.2326e-11, 0.0000e+00,\n 5.2896e-11],\n [2.5328e-11, 5.7971e-12, 2.8048e-10, ..., 1.1511e-10, 0.0000e+00,\n 1.1650e-10],\n [7.0066e-11, 2.6651e-11, 7.1672e-11, ..., 5.8657e-11, 0.0000e+00,\n 2.1032e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[ 3.3050e-06, -1.4826e-06, -2.4800e-07, ..., -2.0102e-07,\n 0.0000e+00, -1.5793e-08],\n [-1.4022e-06, 2.0576e-08, -5.3183e-07, ..., -7.3770e-07,\n 0.0000e+00, 2.0642e-06],\n [-1.6411e-07, 9.4075e-09, -6.2795e-07, ..., -4.2783e-07,\n 0.0000e+00, 1.3053e-07],\n ...,\n [-5.7752e-07, 3.2708e-08, 1.2804e-06, ..., -2.7579e-06,\n 0.0000e+00, 1.0007e-06],\n [ 8.3237e-08, -8.3440e-09, 4.8899e-07, ..., -5.5080e-07,\n 0.0000e+00, 1.0731e-06],\n [-5.8672e-07, 2.2360e-08, -6.9772e-08, ..., 8.9757e-07,\n 0.0000e+00, -3.7203e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3802e-11, 2.4263e-12, 3.1701e-11, ..., 3.9639e-11, 0.0000e+00,\n 4.7108e-12],\n [2.2565e-11, 6.6864e-13, 6.1874e-11, ..., 1.6449e-11, 0.0000e+00,\n 3.5699e-11],\n [5.6193e-12, 4.7289e-12, 5.1355e-11, ..., 1.0132e-11, 0.0000e+00,\n 9.5389e-12],\n ...,\n [1.2220e-11, 2.8527e-12, 2.1157e-11, ..., 4.9039e-11, 0.0000e+00,\n 2.5976e-11],\n [1.2748e-11, 2.3249e-12, 1.7470e-10, ..., 8.4237e-11, 0.0000e+00,\n 8.7184e-11],\n [4.4334e-11, 1.4753e-11, 5.2298e-11, ..., 4.7643e-11, 0.0000e+00,\n 1.5019e-11]], device='cuda:0')" }, "9": { - "step": "tensor(2504.)", - "exp_avg": "tensor([ 2.4022e-05, -1.6155e-05, 6.5420e-06, ..., 1.4850e-05,\n -3.2241e-05, -4.9677e-06], device='cuda:0')", - "exp_avg_sq": "tensor([4.2370e-09, 4.3161e-09, 3.4345e-09, ..., 4.4670e-09, 4.9698e-09,\n 4.6233e-09], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([-7.2511e-06, 1.2043e-05, 9.9536e-07, ..., -1.9313e-06,\n -1.7364e-06, 1.1259e-06], device='cuda:0')", + "exp_avg_sq": "tensor([2.5431e-09, 2.8160e-09, 2.0961e-09, ..., 2.1836e-09, 2.7892e-09,\n 2.6763e-09], device='cuda:0')" }, "10": { - "step": "tensor(2504.)", - "exp_avg": "tensor([[-1.3149e-06, 2.0302e-07, -4.1142e-07, ..., -8.1504e-07,\n -5.4186e-07, 2.2176e-07],\n [ 5.0310e-07, -9.4568e-07, 5.5883e-07, ..., 4.2668e-07,\n 1.3221e-06, -1.7775e-07],\n [ 8.8880e-07, -1.2933e-07, -6.4918e-07, ..., -8.4446e-07,\n -5.8922e-07, -4.7077e-07],\n ...,\n [ 8.2344e-07, 5.5906e-08, -2.0955e-08, ..., -7.4981e-07,\n -5.6376e-08, -3.8988e-07],\n [ 1.3428e-07, -3.4760e-07, 7.9987e-07, ..., 4.1981e-07,\n 1.4329e-07, -2.0778e-07],\n [-1.7248e-06, 1.4727e-06, 8.2178e-07, ..., -1.8555e-06,\n 2.9878e-07, 1.1374e-06]], device='cuda:0')", - "exp_avg_sq": "tensor([[4.3134e-12, 5.1578e-12, 5.3716e-12, ..., 1.2667e-11, 7.2813e-12,\n 7.8468e-12],\n [1.4183e-11, 1.5321e-11, 9.2701e-12, ..., 1.5488e-11, 1.2312e-11,\n 1.0203e-11],\n [1.1023e-11, 1.3767e-11, 1.1316e-11, ..., 1.5953e-11, 1.3933e-11,\n 1.2779e-11],\n ...,\n [1.7372e-11, 1.1640e-11, 1.3308e-11, ..., 2.7208e-11, 1.6782e-11,\n 1.0306e-11],\n [1.1348e-11, 1.0429e-11, 1.3006e-11, ..., 1.5852e-11, 1.9021e-11,\n 1.1274e-11],\n [1.8771e-11, 1.2043e-11, 9.5919e-12, ..., 1.5492e-11, 1.4667e-11,\n 1.7422e-11]], device='cuda:0')" + "step": "tensor(3756.)", + "exp_avg": "tensor([[-1.4582e-07, -3.2010e-08, 8.8345e-08, ..., -8.3714e-07,\n 2.0524e-07, -1.4161e-07],\n [ 2.5680e-07, -6.5429e-07, -1.0099e-06, ..., 5.4951e-08,\n 2.9744e-07, -5.2585e-07],\n [ 2.2452e-08, 3.6399e-07, -1.0430e-06, ..., 2.9101e-07,\n -3.9084e-08, 2.8615e-07],\n ...,\n [ 1.3794e-07, 4.5533e-07, -2.2350e-07, ..., -1.5740e-06,\n -4.9998e-07, -2.8954e-07],\n [-1.9641e-07, 4.6601e-07, 1.3403e-06, ..., 8.0811e-08,\n -3.8842e-07, 1.8245e-07],\n [-4.6505e-08, -2.4896e-07, 1.7009e-07, ..., -4.5994e-07,\n 2.9078e-07, 4.7989e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.2996e-12, 2.7972e-12, 3.1599e-12, ..., 6.2085e-12, 3.3779e-12,\n 3.2575e-12],\n [6.1283e-12, 7.4758e-12, 4.7177e-12, ..., 7.6014e-12, 5.9618e-12,\n 5.2781e-12],\n [5.7199e-12, 7.3769e-12, 6.4712e-12, ..., 7.2539e-12, 7.4178e-12,\n 5.9844e-12],\n ...,\n [7.8544e-12, 5.5159e-12, 6.4635e-12, ..., 1.2175e-11, 9.2106e-12,\n 5.3115e-12],\n [5.7846e-12, 5.2673e-12, 7.7382e-12, ..., 7.8881e-12, 1.0459e-11,\n 5.6582e-12],\n [8.8593e-12, 7.0071e-12, 5.0220e-12, ..., 7.6055e-12, 6.9697e-12,\n 8.3465e-12]], device='cuda:0')" }, "11": { - "step": "tensor(1252.)", - "exp_avg": "tensor([[-6.8897e-07, -4.6710e-06, 4.8876e-07, ..., -2.9600e-07,\n 0.0000e+00, -1.1542e-06],\n [ 2.8595e-07, 1.5491e-06, 5.7516e-08, ..., 7.6243e-07,\n 0.0000e+00, -7.9567e-07],\n [ 2.0503e-07, 4.0376e-07, 1.4319e-06, ..., 7.9911e-07,\n 0.0000e+00, -1.8536e-07],\n ...,\n [ 1.8748e-07, 2.9464e-07, -1.0529e-07, ..., 2.6952e-08,\n 0.0000e+00, -1.2540e-06],\n [-6.4378e-07, 1.9082e-06, 1.5614e-07, ..., 6.1223e-06,\n 0.0000e+00, 4.7339e-06],\n [ 3.1858e-07, 1.0188e-07, -4.1999e-08, ..., 4.4435e-06,\n 0.0000e+00, 1.2459e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[5.8567e-11, 1.0354e-10, 2.4364e-11, ..., 1.9493e-11, 0.0000e+00,\n 8.6566e-11],\n [2.7991e-11, 5.3123e-11, 1.7145e-11, ..., 2.1725e-11, 0.0000e+00,\n 1.7234e-10],\n [1.7207e-11, 1.2691e-11, 1.0054e-10, ..., 1.3540e-10, 0.0000e+00,\n 5.0290e-11],\n ...,\n [2.8249e-12, 8.9585e-12, 2.8347e-11, ..., 1.6698e-11, 0.0000e+00,\n 2.0179e-11],\n [5.0556e-11, 9.0987e-11, 1.3793e-10, ..., 1.2227e-10, 0.0000e+00,\n 1.5447e-10],\n [4.3362e-11, 1.0137e-12, 2.3495e-11, ..., 8.1452e-11, 0.0000e+00,\n 4.6459e-11]], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([[-8.8237e-07, -2.4691e-06, 8.1761e-07, ..., 3.4484e-06,\n 0.0000e+00, -1.1416e-06],\n [-6.1348e-07, -1.0298e-06, -1.4337e-07, ..., 3.6510e-07,\n 0.0000e+00, -1.6920e-06],\n [ 3.2471e-07, -9.0841e-08, -6.9405e-07, ..., 5.1907e-06,\n 0.0000e+00, -1.2068e-07],\n ...,\n [ 1.7956e-07, 2.7996e-07, 1.6133e-06, ..., 1.5855e-07,\n 0.0000e+00, 2.8758e-07],\n [ 7.8083e-07, -9.2970e-07, -2.8678e-07, ..., 5.4130e-08,\n 0.0000e+00, -9.7476e-07],\n [-3.9340e-07, -1.8520e-10, 1.7226e-07, ..., -1.1919e-06,\n 0.0000e+00, -7.6001e-08]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.6084e-11, 5.9483e-11, 1.5442e-11, ..., 1.3043e-11, 0.0000e+00,\n 4.6604e-11],\n [1.6724e-11, 3.6630e-11, 8.9965e-12, ..., 2.2404e-11, 0.0000e+00,\n 1.1194e-10],\n [9.5505e-12, 4.8172e-12, 5.9238e-11, ..., 8.9772e-11, 0.0000e+00,\n 2.8023e-11],\n ...,\n [1.8681e-12, 4.6385e-12, 1.7741e-11, ..., 8.7402e-12, 0.0000e+00,\n 1.5037e-11],\n [2.3605e-11, 4.3548e-11, 8.2598e-11, ..., 9.4976e-11, 0.0000e+00,\n 1.0488e-10],\n [1.8798e-11, 3.3586e-13, 1.0003e-11, ..., 4.4677e-11, 0.0000e+00,\n 2.4950e-11]], device='cuda:0')" }, "12": { - "step": "tensor(1252.)", - "exp_avg": "tensor([-9.3791e-06, 1.8833e-06, 4.6128e-06, ..., -5.9642e-06,\n 2.2697e-07, 2.0803e-05], device='cuda:0')", - "exp_avg_sq": "tensor([6.1170e-09, 3.8040e-09, 5.0233e-09, ..., 2.7439e-09, 8.0797e-09,\n 5.1469e-09], device='cuda:0')" + "step": "tensor(2504.)", + "exp_avg": "tensor([ 2.9945e-05, -5.4609e-06, 9.1374e-06, ..., -2.6447e-05,\n -2.7284e-06, 3.6155e-06], device='cuda:0')", + "exp_avg_sq": "tensor([3.4838e-09, 2.6143e-09, 2.7865e-09, ..., 2.2366e-09, 4.0454e-09,\n 2.6550e-09], device='cuda:0')" }, "13": { + "step": "tensor(2504.)", + "exp_avg": "tensor([[-2.7803e-07, -3.7306e-08, 1.1099e-06, ..., 4.3446e-07,\n -2.4451e-07, 5.3858e-07],\n [ 1.9896e-06, -2.9311e-07, -1.4487e-06, ..., -7.3788e-08,\n 1.1764e-07, 3.2649e-07],\n [-2.2956e-07, -4.2627e-08, 5.5378e-07, ..., -1.2870e-07,\n 1.8180e-07, 5.0446e-07],\n ...,\n [-4.1148e-07, -4.8784e-07, -1.2887e-06, ..., -7.5846e-07,\n -1.7294e-07, 2.4146e-07],\n [ 3.0531e-07, -1.0245e-06, 3.2995e-07, ..., -8.1471e-08,\n -2.0297e-07, -7.3061e-07],\n [-1.2335e-06, 4.5136e-07, 5.9361e-08, ..., 1.0099e-07,\n 3.9649e-07, 2.4582e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1799e-12, 1.9794e-12, 2.9180e-12, ..., 3.1936e-12, 4.0457e-12,\n 6.2768e-12],\n [6.2242e-12, 3.3000e-12, 8.1354e-12, ..., 7.6155e-12, 8.7068e-12,\n 1.2365e-11],\n [5.2473e-12, 5.0067e-12, 7.2966e-12, ..., 8.2098e-12, 8.7280e-12,\n 3.6841e-12],\n ...,\n [6.8874e-12, 5.9269e-12, 9.9386e-12, ..., 1.0752e-11, 9.0643e-12,\n 1.1276e-11],\n [4.8607e-12, 8.6427e-12, 4.2208e-12, ..., 3.9808e-12, 8.7024e-12,\n 1.2994e-11],\n [5.0820e-12, 3.4031e-12, 7.2314e-12, ..., 8.0791e-12, 9.0745e-12,\n 5.7353e-12]], device='cuda:0')" + }, + "14": { + "step": "tensor(1252.)", + "exp_avg": "tensor([[-5.5836e-08, 5.9145e-08, 3.1468e-06, ..., 5.0826e-06,\n 0.0000e+00, 1.1467e-06],\n [-1.2405e-06, 1.2115e-08, 3.8742e-07, ..., 8.6341e-07,\n 0.0000e+00, 2.7410e-06],\n [ 1.2883e-06, 2.8321e-07, 1.7832e-07, ..., -3.5196e-07,\n 0.0000e+00, 8.1146e-08],\n ...,\n [-7.3957e-06, 3.4333e-07, 1.1083e-06, ..., -1.6878e-07,\n 0.0000e+00, 8.5872e-08],\n [ 3.9460e-08, -1.4093e-07, -1.3078e-08, ..., 1.3118e-06,\n 0.0000e+00, -1.2148e-06],\n [-6.4453e-07, 2.9878e-08, -1.1689e-06, ..., 6.4297e-06,\n 0.0000e+00, 1.5700e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.2732e-12, 2.8539e-12, 2.8385e-10, ..., 7.6557e-10, 0.0000e+00,\n 1.0338e-10],\n [3.4943e-11, 2.6239e-12, 1.9723e-10, ..., 5.9832e-11, 0.0000e+00,\n 5.7539e-11],\n [5.1052e-11, 9.2555e-12, 1.2074e-11, ..., 1.0524e-11, 0.0000e+00,\n 1.0740e-11],\n ...,\n [3.2242e-11, 5.1022e-12, 3.0993e-11, ..., 1.1228e-11, 0.0000e+00,\n 1.1824e-11],\n [8.6551e-12, 7.5511e-12, 9.6267e-12, ..., 6.9740e-11, 0.0000e+00,\n 3.0929e-10],\n [6.4763e-11, 2.8280e-12, 3.2130e-10, ..., 1.6215e-09, 0.0000e+00,\n 3.4952e-11]], device='cuda:0')" + }, + "15": { + "step": "tensor(1252.)", + "exp_avg": "tensor([-8.1167e-06, 1.7618e-05, -1.1373e-05, ..., 1.3225e-05,\n -2.5681e-06, 9.6634e-06], device='cuda:0')", + "exp_avg_sq": "tensor([8.3071e-09, 5.5925e-09, 6.2431e-09, ..., 3.4018e-09, 4.5848e-09,\n 7.2586e-09], device='cuda:0')" + }, + "16": { "step": "tensor(1252.)", - "exp_avg": "tensor([[ 1.5962e-07, 2.5146e-07, 3.1250e-07, ..., -7.0280e-07,\n 1.0569e-07, 1.3038e-06],\n [-2.9781e-08, 3.7757e-07, 1.4836e-06, ..., 1.4712e-06,\n -1.8213e-07, -5.4158e-07],\n [ 1.8238e-07, -3.2871e-07, -2.0087e-07, ..., -5.7187e-07,\n -1.1890e-06, -8.4515e-07],\n ...,\n [-2.3393e-08, 1.9342e-07, 2.9656e-07, ..., 3.3764e-07,\n -1.5531e-06, 6.3582e-07],\n [ 5.0156e-07, -1.3230e-07, -8.1853e-07, ..., 1.5280e-06,\n -2.1728e-07, -1.3523e-06],\n [-2.5958e-07, -3.4229e-07, 1.5821e-07, ..., -3.3576e-07,\n 7.4057e-07, -4.1413e-07]], device='cuda:0')", - "exp_avg_sq": "tensor([[3.8725e-12, 3.7632e-12, 5.5531e-12, ..., 5.3796e-12, 9.4119e-12,\n 1.0596e-11],\n [1.3108e-11, 6.9166e-12, 1.4761e-11, ..., 1.2494e-11, 1.7901e-11,\n 2.9108e-11],\n [1.0348e-11, 1.0691e-11, 1.3125e-11, ..., 1.4415e-11, 1.6390e-11,\n 7.6432e-12],\n ...,\n [1.4102e-11, 1.1160e-11, 1.9705e-11, ..., 2.8389e-11, 2.0457e-11,\n 2.0704e-11],\n [9.0765e-12, 2.1475e-11, 8.8586e-12, ..., 6.3389e-12, 1.9230e-11,\n 2.1308e-11],\n [8.8466e-12, 6.1225e-12, 1.6023e-11, ..., 1.1453e-11, 1.9112e-11,\n 1.1100e-11]], device='cuda:0')" + "exp_avg": "tensor([[-1.5481e-07, 2.8678e-07, 5.5108e-07, ..., -2.0205e-07,\n 1.5189e-08, 2.9531e-07],\n [-3.4451e-08, 6.0454e-08, 6.9110e-07, ..., -2.5115e-07,\n -2.3807e-07, -1.4570e-07],\n [ 5.8860e-09, 3.2474e-07, -5.4785e-07, ..., 3.3104e-08,\n 1.4735e-08, 3.6657e-07],\n ...,\n [ 9.7897e-07, -7.7870e-07, 1.5313e-07, ..., -5.1950e-07,\n 9.6189e-08, 4.9757e-07],\n [-1.3136e-06, -5.9382e-07, 3.5333e-07, ..., -1.2665e-08,\n 2.4294e-07, 3.7891e-07],\n [-1.3422e-07, 1.2921e-06, -6.8474e-07, ..., 1.9939e-07,\n 2.1495e-07, -3.0081e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.1165e-12, 4.8524e-12, 4.6993e-12, ..., 2.6035e-12, 3.6581e-12,\n 3.5904e-12],\n [6.9779e-12, 6.6144e-12, 1.1463e-11, ..., 6.5572e-12, 1.0694e-11,\n 5.1223e-12],\n [6.9161e-12, 7.6088e-12, 7.9918e-12, ..., 8.8075e-12, 3.1320e-11,\n 4.0475e-12],\n ...,\n [9.1454e-12, 9.5437e-12, 5.1974e-12, ..., 6.5395e-12, 5.4946e-12,\n 4.7305e-12],\n [7.3494e-12, 6.6373e-12, 8.6532e-12, ..., 5.7239e-12, 2.3864e-11,\n 3.8321e-12],\n [8.5844e-12, 1.5904e-11, 6.9594e-12, ..., 7.6194e-12, 8.6619e-12,\n 6.7029e-12]], device='cuda:0')" } }, "param_groups": [ { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "shared", "betas": [ 0.9, @@ -242,7 +257,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_256", "betas": [ 0.9, @@ -265,7 +280,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_512", "betas": [ 0.9, @@ -288,7 +303,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_768", "betas": [ 0.9, @@ -311,7 +326,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_1024", "betas": [ 0.9, @@ -334,7 +349,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_1280", "betas": [ 0.9, @@ -357,7 +372,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_1536", "betas": [ 0.9, @@ -380,7 +395,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_1792", "betas": [ 0.9, @@ -403,7 +418,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_2048", "betas": [ 0.9, @@ -426,7 +441,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_2304", "betas": [ 0.9, @@ -449,7 +464,7 @@ ] }, { - "lr": 0.00654543046337755, + "lr": 0.005000500000000001, "name": "scale_2560", "betas": [ 0.9, @@ -472,7 +487,7 @@ ] }, { - "lr": 0.0032728879774401812, + "lr": 0.0025005, "name": "fusion", "betas": [ 0.9, @@ -528,7 +543,7 @@ "T_i": 10, "T_mult": 2, "eta_min": 1e-06, - "T_cur": 4, + "T_cur": 5, "base_lrs": [ 0.01, 0.01, @@ -543,33 +558,34 @@ 0.01, 0.005 ], - "last_epoch": 4, + "last_epoch": 5, "_step_count": 0, "_is_initial": false, "_get_lr_called_within_step": false, "_last_lr": [ - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.00654543046337755, - 0.0032728879774401812 + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.005000500000000001, + 0.0025005 ] }, "metrics": { - "best_val_acc": 81.546, - "best_epoch": 3, + "best_val_acc": 81.822, + "best_epoch": 4, "scale_accuracies": { - "256": 81.546, - "512": 81.488, - "768": 81.392, - "1024": 80.922 + "256": 81.822, + "512": 81.94, + "768": 81.974, + "1024": 81.794, + "1280": 81.448 } }, "train_config": {