diff --git "a/weights/David-partial_shared-hierarchical_tree/20251012_210041/best_model_acc74.23_metadata.json" "b/weights/David-partial_shared-hierarchical_tree/20251012_210041/best_model_acc74.23_metadata.json" new file mode 100644--- /dev/null +++ "b/weights/David-partial_shared-hierarchical_tree/20251012_210041/best_model_acc74.23_metadata.json" @@ -0,0 +1,676 @@ +{ + "epoch": 7, + "optimizer_state_dict": { + "state": { + "0": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 2.8436e-05, -2.0844e-05, -6.8647e-06, ..., 1.4959e-05,\n -4.5391e-06, 4.2515e-06],\n [-3.0293e-05, 8.6739e-06, -2.3393e-05, ..., 1.0169e-05,\n 4.4260e-05, -2.7436e-05],\n [-4.3454e-05, 3.7610e-05, -9.4659e-06, ..., -3.6255e-05,\n 7.1751e-06, -4.6767e-05],\n ...,\n [ 8.6523e-06, 1.1321e-05, -5.7048e-06, ..., 4.2056e-05,\n -5.6781e-06, 5.1038e-06],\n [-8.8911e-05, -2.1400e-05, -9.4092e-06, ..., -3.5393e-05,\n -3.3014e-05, -2.6304e-05],\n [ 3.7192e-05, -6.6854e-06, -3.7410e-05, ..., 2.7210e-05,\n -1.9966e-05, 5.3704e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.4267e-09, 9.7604e-09, 6.6581e-09, ..., 3.9750e-08, 6.1885e-09,\n 8.6298e-09],\n [1.1609e-08, 1.1756e-08, 7.3072e-09, ..., 5.5889e-08, 8.1253e-09,\n 1.0234e-08],\n [5.4793e-08, 4.7551e-08, 9.5146e-09, ..., 9.0926e-09, 1.1273e-08,\n 9.2751e-09],\n ...,\n [1.1352e-08, 1.1444e-08, 8.4213e-09, ..., 5.0455e-08, 7.7940e-09,\n 9.6162e-09],\n [3.9356e-08, 8.6287e-08, 3.0646e-08, ..., 1.2317e-08, 1.3564e-08,\n 1.0295e-08],\n [3.1317e-08, 3.8971e-08, 1.9045e-08, ..., 2.0900e-08, 8.9590e-09,\n 8.8654e-09]], device='cuda:0')" + }, + "1": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-2.3531e-04, -8.5092e-05, -1.3049e-03, -3.9166e-04, -1.4233e-03,\n 9.2084e-05, 1.4120e-03, -1.3294e-03, 4.3381e-04, -4.2304e-04,\n -8.2397e-04, 1.4027e-03, 1.0739e-03, -5.5838e-04, 7.4799e-04,\n -6.7221e-04, 1.7539e-03, 2.5288e-04, -1.1397e-03, 2.0394e-03,\n 1.2426e-03, -8.9272e-05, 8.0990e-04, 4.1536e-04, 2.0452e-03,\n -6.8604e-04, 2.1527e-04, 5.6430e-04, 1.3658e-04, 2.1837e-03,\n 6.6113e-04, 1.0577e-03, -3.1814e-04, -3.4402e-04, 1.1155e-03,\n 7.6188e-04, 2.6361e-04, -3.7639e-04, 6.5120e-04, 6.1957e-04,\n 8.7849e-05, -5.9616e-04, 1.4627e-03, 1.0667e-03, 1.7960e-04,\n -4.5681e-04, 3.3070e-04, -4.7990e-04, -1.2819e-03, 7.4897e-05,\n 1.8497e-03, 9.2621e-04, 1.2873e-03, 1.1879e-03, 1.3904e-04,\n -5.0098e-04, 1.8666e-04, 9.5358e-04, 8.4574e-04, 4.2192e-04,\n 6.6706e-05, 9.2502e-04, -5.7518e-04, -6.8212e-04, -1.0704e-03,\n 1.8203e-03, -4.2834e-04, 8.0957e-04, 5.5036e-04, 6.9388e-04,\n -1.1655e-03, 1.2525e-03, -1.4603e-03, 7.4398e-04, -3.5624e-04,\n 8.1251e-04, -1.3990e-03, 6.4236e-05, 1.4579e-03, -5.4095e-04,\n -8.1761e-04, 4.9139e-04, 9.6514e-04, -4.9174e-04, 1.4780e-03,\n 6.8703e-04, -9.2758e-04, -1.1540e-04, 5.7156e-04, -9.6071e-04,\n 1.5457e-04, 8.2963e-04, -7.2407e-04, -1.5813e-03, -6.0705e-04,\n -9.9574e-04, 5.4044e-04, 7.1384e-04, -3.7506e-04, -6.2528e-04,\n -5.3653e-04, -7.9211e-04, 5.5039e-04, -8.1339e-06, 7.3536e-05,\n 1.2442e-03, -3.5534e-04, -4.3967e-04, -6.2745e-04, 1.8512e-03,\n 2.3811e-04, -5.3133e-04, -1.0655e-03, 2.5902e-04, -1.8109e-04,\n 2.2053e-03, -2.7888e-04, 5.1894e-04, 3.6681e-04, 1.1535e-03,\n -6.3082e-04, -6.1725e-04, -6.3445e-04, 6.4492e-05, 4.9739e-04,\n 3.3181e-04, 1.8535e-04, 1.4551e-04, -7.9330e-06, 1.4955e-03,\n -8.3849e-04, -1.2467e-03, -6.6123e-04, -6.0099e-04, 7.1728e-04,\n -1.5933e-03, 7.6459e-04, 4.7172e-04, -2.8054e-04, -8.8876e-04,\n -1.3703e-03, -2.1887e-04, -4.7991e-04, 5.3538e-05, -8.9208e-04,\n -1.0268e-03, -3.7557e-04, 2.3099e-04, -7.8624e-04, 3.2553e-04,\n -3.3595e-04, -3.9247e-04, -1.0707e-03, 4.2575e-04, -2.7163e-04,\n 2.2376e-04, -1.1982e-03, -4.7801e-04, 7.1039e-04, 1.4558e-05,\n 2.1389e-03, 1.3320e-03, 1.2807e-03, 1.0920e-04, 2.0420e-04,\n 1.1603e-03, 1.6496e-03, -1.2631e-03, -8.0450e-04, 6.3438e-04,\n -1.2414e-03, -3.5252e-04, 2.6517e-04, -1.0096e-05, 2.9660e-04,\n -6.3261e-04, 6.0811e-04, 1.2766e-03, 1.5303e-03, -1.4906e-04,\n 1.2527e-03, 9.3317e-04, -2.2586e-04, 7.6439e-04, 1.0009e-03,\n 7.7143e-04, -1.8811e-07, 8.7844e-04, -1.1875e-03, -3.6862e-04,\n 2.1585e-05, -2.6923e-04, -6.0968e-04, 1.4245e-03, -8.0292e-04,\n -1.2099e-03, 8.1358e-04, -9.4245e-04, -2.1105e-03, -1.0938e-03,\n -1.2714e-03, -1.8415e-04, 1.1911e-04, -1.1026e-04, -2.7443e-03,\n -1.3661e-03, -6.6099e-04, -1.5084e-03, -1.4635e-03, 6.2326e-04,\n -1.5235e-04, -1.3890e-03, 2.7469e-04, 2.0141e-04, -3.6426e-04,\n -1.0681e-03, -9.6454e-04, 1.0222e-04, 2.2044e-03, 3.4797e-04,\n 1.0732e-03, 1.0096e-03, 4.4591e-04, -1.0537e-03, 1.3172e-04,\n -8.2005e-04, -3.1847e-05, 2.2461e-04, -1.7456e-03, 7.7212e-05,\n -2.0964e-05, 2.7077e-04, 2.5120e-04, -1.0689e-05, -1.8708e-04,\n -2.1285e-03, 4.1156e-04, 1.9163e-04, 3.1240e-04, 1.1379e-03,\n -2.6903e-04, 3.5524e-04, 1.0495e-03, 5.7125e-04, -6.5027e-04,\n 6.7555e-04, 5.1156e-05, 3.4730e-05, 3.3980e-04, -6.2387e-04,\n 1.2750e-03, 3.9565e-04, -1.6926e-04, 1.0939e-03, -5.2860e-04,\n -9.6870e-04, 3.5412e-04, 1.3194e-03, 1.1062e-03, 6.7727e-05,\n -1.0613e-03, -1.1884e-03, 8.6768e-04, 3.3945e-04, 3.8478e-04,\n 3.4608e-04, 4.7805e-04, -5.0559e-04, -7.9799e-04, 1.3257e-04,\n -1.0867e-03, 1.0634e-03, -1.0319e-03, 2.2388e-04, -5.4202e-04,\n -1.2328e-03, 3.1825e-05, -8.7607e-04, 7.4675e-04, 6.3202e-04,\n -8.4213e-04, 2.1319e-04, 1.3586e-03, -1.3321e-03, 6.0000e-04,\n 1.1495e-03, -2.6169e-04, 5.9048e-05, 8.2769e-05, 1.2171e-03,\n -1.8146e-04, -1.3547e-03, -1.6545e-03, -5.8510e-04, 2.1268e-04,\n 8.5829e-05, -7.9845e-05, 8.2397e-04, -9.0596e-05, -1.5474e-03,\n 5.9016e-05, -8.5878e-04, -6.5746e-05, -4.7798e-04, -7.5429e-04,\n 4.4252e-04, 1.0405e-04, 2.8523e-04, -7.6584e-04, -2.5119e-04,\n -6.8307e-04, 6.2505e-04, -3.7285e-04, -1.6287e-03, 5.1493e-04,\n -2.1993e-03, 3.4838e-04, -5.7719e-04, 1.0665e-03, -3.1808e-04,\n -1.4373e-03, 3.3014e-04, -5.7009e-04, 3.5192e-04, -6.7012e-05,\n 7.2989e-04, -3.8190e-04, -9.5418e-04, -2.5684e-04, 8.8845e-04,\n 6.6831e-04, -4.7133e-05, -1.1387e-03, -1.3988e-03, -3.9346e-04,\n -3.7035e-04, 2.5845e-04, -1.2196e-03, -9.4388e-04, 1.1696e-04,\n -2.5033e-03, -3.1286e-04, 4.0962e-04, 2.5670e-04, -7.7411e-04,\n -1.1148e-03, -5.2607e-04, 4.7276e-05, -2.4496e-04, -7.9986e-05,\n 5.5807e-04, 1.6615e-04, 1.8544e-04, 4.1731e-04, 1.4093e-04,\n -1.7543e-03, -2.0082e-04, -3.4457e-04, 5.6259e-04, -4.5451e-04,\n 9.1978e-04, 1.1650e-03, -8.1237e-04, -1.8392e-03, -1.7468e-04,\n -9.6330e-04, -9.8324e-04, -2.3652e-03, 4.4408e-04, 7.6789e-04,\n 1.1466e-03, -2.8057e-05, 9.7105e-04, -1.4020e-05, -7.4847e-04,\n -1.4994e-03, 1.0294e-03, 1.8521e-03, -2.9469e-04, 6.5439e-04,\n 4.6147e-04, 3.8232e-05, 1.5916e-06, -1.5970e-03, -1.1953e-03,\n 1.1851e-03, -4.5104e-04, 6.7667e-04, 5.5024e-04, 4.5942e-05,\n 2.8373e-04, -1.0709e-04, 3.6653e-04, -4.1563e-04, 4.3581e-04,\n -8.3032e-06, -6.9012e-04, -2.2000e-03, 1.5284e-04, -9.2112e-04,\n -6.7741e-04, -6.0491e-04, 1.8751e-04, -1.0519e-03, 7.5981e-04,\n 2.3821e-04, 1.5266e-03, 5.5249e-04, -1.3510e-03, -3.0616e-04,\n 9.9257e-04, -9.3037e-04, 1.3838e-03, -1.7894e-04, 8.6561e-04,\n -9.0249e-04, -1.4184e-03, 1.1562e-03, -4.1399e-04, -1.7463e-04,\n 2.5493e-05, 7.3334e-04, -9.1425e-04, 7.0559e-04, -2.5794e-05,\n 9.7053e-04, -2.0818e-04, 8.2867e-05, 1.5693e-03, 5.4056e-05,\n 1.1177e-03, -2.0568e-04, 7.0246e-05, 2.6989e-04, 2.4109e-04,\n 2.7942e-06, -1.2214e-04, -2.2632e-04, 1.0017e-03, -4.0746e-04,\n -3.6714e-04, 1.1720e-03, -7.7488e-04, 2.6101e-04, -2.3451e-04,\n 9.5971e-04, 2.0821e-03, -9.1986e-04, -3.7037e-05, 8.0834e-04,\n -7.9154e-04, -7.0664e-04, 1.5616e-04, 7.8260e-04, 1.3995e-03,\n -7.7392e-04, 1.0056e-04, 1.8198e-05, -1.5176e-04, -6.9037e-04,\n 9.8909e-04, -6.5965e-04, 1.3001e-05, 7.2011e-07, -3.0836e-04,\n -1.2858e-05, -1.2912e-03, -4.1303e-05, -1.3001e-03, -2.1506e-03,\n 1.3020e-03, 5.9519e-04, -4.2532e-04, -6.0401e-04, -7.1775e-04,\n -1.4514e-04, 2.0784e-03, -7.3460e-04, 1.0652e-03, -6.2200e-04,\n 1.3194e-04, 6.9270e-04, -4.3214e-04, 2.7344e-03, -6.0125e-04,\n -1.9556e-04, 1.3352e-03, 1.2809e-03, -1.0306e-03, -2.5775e-03,\n -2.0561e-04, 1.0682e-03, -1.3235e-04, 4.1065e-04, 1.2579e-03,\n -8.8245e-04, 2.3170e-04, 2.9517e-04, 8.6360e-04, 6.7293e-04,\n -1.6918e-03, -5.0334e-04, 2.9731e-04, 1.5306e-03, 6.7362e-04,\n -2.5439e-04, -3.4196e-04, 4.4977e-04, 3.1037e-04, -3.5334e-04,\n 7.6062e-04, -4.9634e-04, 4.3631e-04, 9.6186e-04, 1.0998e-03,\n 1.2872e-04, 6.6106e-04, 6.2523e-04, -1.3083e-05, -7.0657e-06,\n -2.6429e-04, 1.5687e-04, 2.2654e-04, 2.6370e-04, -3.0799e-04,\n 1.0908e-03, 5.0359e-04, 1.7007e-04, -2.8483e-04, 3.7224e-04,\n -1.0178e-04, 1.9835e-05, 1.5201e-04, 1.2172e-03, -8.8194e-05,\n 4.5654e-04, 6.6540e-06, 1.8648e-03, -1.1758e-03, 2.1527e-04,\n -1.0004e-03, -1.2583e-03, 1.2750e-03, 4.2239e-04, 1.4830e-03,\n 9.4566e-05, -7.0297e-05, -1.4804e-03, 1.9343e-03, 4.9578e-04,\n -1.8803e-03, 4.6883e-04, 8.5269e-04, -6.5539e-04, -8.0726e-04,\n -5.4157e-04, 1.2512e-03, 1.4137e-03, -2.2939e-03, 1.9929e-03,\n 3.9961e-04, -2.9182e-04, 1.3773e-04, 1.1005e-03, -1.5458e-03,\n -6.3252e-05, -7.9134e-04, -2.7255e-04, 1.4663e-04, -3.7366e-04,\n -1.8032e-03, -9.8696e-04, -1.1400e-03, 4.2530e-04, -1.8300e-03,\n -8.5868e-04, -7.6117e-05, 7.0967e-05, -7.7752e-04, -1.1329e-04,\n 1.2282e-03, 4.1525e-04, -1.1630e-03, -1.8505e-04, 1.4442e-03,\n 3.1265e-04, 4.5485e-04, 8.1459e-04, 6.3996e-04, 3.5823e-04,\n -8.9365e-04, 1.0343e-03, -4.3655e-05, 4.8092e-04, -8.3151e-04,\n 9.4158e-04, -6.3014e-04, -1.4399e-03, 9.1333e-04, -1.5921e-04,\n 1.3048e-03, -2.3595e-04, -3.4485e-04, -8.5019e-04, -7.4959e-04,\n -2.8470e-04, -1.0692e-03, -2.7111e-04, 1.1078e-04, 7.4358e-06,\n -1.1150e-04, -8.9779e-04, 4.5591e-04, 4.0147e-04, -7.8615e-04,\n -5.1326e-04, -5.4208e-04, -1.1286e-03, 8.2924e-05, -2.1640e-04,\n -1.9137e-04, 1.6969e-04, 2.8777e-04, -7.3734e-04, 6.3834e-04,\n 8.5354e-04, -6.4722e-04, 8.0796e-05, -8.2827e-04, 6.4095e-04,\n 9.9824e-04, 1.5110e-03, -5.3486e-04, -2.0554e-04, -2.1208e-03,\n -1.6176e-03, -1.2847e-03, -3.3398e-04, 9.0826e-04, 1.2098e-04,\n 5.3467e-04, -1.9611e-05, -5.5451e-04, 6.1409e-05, -1.1539e-03,\n 1.7225e-03, 1.0841e-03, -8.2542e-04, 9.0927e-04, -4.4407e-04,\n 8.4581e-04, -9.9915e-04, 6.1977e-05, -1.1868e-03, 3.1352e-05,\n -5.8290e-04, 5.9337e-04, -6.1979e-04, 7.7391e-04, 1.1399e-03,\n 1.9222e-04, 1.5402e-04, 2.2301e-04, -2.1195e-04, -1.6684e-04,\n 1.8437e-03, 1.3633e-03, -1.2484e-03, -5.2309e-04, -3.7690e-04,\n -1.1817e-04, 6.4354e-06, -3.6020e-04, -2.4504e-04, 8.2444e-04,\n -5.5931e-04, 7.2053e-04, -2.3442e-03, 4.1940e-04, -1.4365e-03,\n 3.5580e-04, 5.8096e-04, 1.1580e-03, 1.1936e-04, -4.5867e-04,\n 5.3128e-04, 1.0677e-03, -1.5501e-03, -2.1060e-03, 5.6285e-04,\n 7.2581e-04, -1.0009e-03, -5.5603e-04, -5.2208e-04, 8.0468e-04,\n -2.8423e-04, -8.1658e-04, -1.6897e-04, 3.4217e-04, -3.7162e-04,\n -2.9995e-04, 5.1954e-04, 1.9482e-05, 2.3326e-04, -1.2293e-03,\n -6.0580e-04, -6.4359e-04, -2.0847e-05, -2.4707e-04, 9.0104e-04,\n -2.0699e-03, -1.7999e-05, 1.1558e-03, 1.0668e-04, -1.1694e-04,\n -4.5208e-04, -1.1158e-03, -1.9665e-04, 4.2533e-05, -8.0345e-05,\n 8.2950e-04, -1.9892e-03, 2.0334e-04, 9.1050e-04, -1.0069e-03,\n -1.4210e-03, -7.9349e-04, 1.5187e-03, 4.8385e-04, -3.8217e-04,\n 1.1114e-03, -3.9161e-04, 1.5911e-04, 1.1760e-04, -7.6005e-04,\n -1.1312e-03, -2.2257e-05, -4.8715e-04, -1.1729e-03, 1.6034e-03,\n -9.5871e-04, -7.0442e-04, -9.2348e-05, -2.0390e-04, 2.4251e-03,\n 8.0759e-04, -6.9010e-04, -9.2300e-04, -9.6265e-04, 5.6052e-45,\n 1.8040e-04, 3.3503e-04, 2.8366e-04, -5.5017e-04, -9.2055e-04,\n -1.0049e-03, -1.9086e-03, -5.7627e-04, 8.4208e-04, -6.7889e-04,\n 1.0802e-03, -2.0042e-04, 5.6432e-04, -1.3253e-03, -5.4894e-04,\n 8.6448e-04, -1.3841e-03, 1.0523e-03], device='cuda:0')", + "exp_avg_sq": "tensor([1.2779e-05, 1.5518e-05, 1.3091e-05, 8.8461e-06, 1.0083e-05, 8.2451e-06,\n 1.5678e-05, 1.4411e-05, 1.1315e-05, 5.0269e-06, 3.1745e-06, 5.2019e-06,\n 1.0690e-05, 8.1270e-06, 6.7337e-06, 2.9842e-05, 1.3605e-05, 1.3723e-05,\n 1.0854e-05, 1.7340e-05, 1.3348e-05, 1.4505e-05, 2.4797e-05, 7.0327e-06,\n 2.0923e-05, 8.5820e-06, 9.8609e-06, 8.9507e-06, 1.6967e-05, 9.0771e-06,\n 1.2511e-05, 9.6777e-06, 7.1208e-06, 6.0438e-06, 8.7780e-06, 7.7329e-06,\n 8.8116e-06, 1.1717e-05, 1.9829e-05, 1.1940e-05, 1.9634e-05, 6.0237e-06,\n 1.4398e-05, 1.3230e-05, 7.5808e-06, 1.4306e-05, 8.1352e-06, 1.9526e-05,\n 2.4683e-05, 1.9029e-05, 1.5818e-05, 1.4367e-05, 1.5440e-05, 1.2862e-05,\n 7.9167e-06, 1.2328e-05, 1.3065e-05, 1.7456e-05, 1.2065e-05, 6.0813e-06,\n 1.1848e-05, 6.6375e-06, 1.1918e-05, 1.3666e-05, 1.2960e-05, 1.3647e-05,\n 1.0771e-05, 1.5815e-05, 1.5173e-05, 1.2980e-05, 1.2420e-05, 1.1898e-05,\n 9.6776e-06, 1.1284e-05, 1.5290e-05, 9.3842e-06, 9.4359e-06, 1.5100e-05,\n 1.1406e-05, 1.1325e-05, 1.7764e-05, 1.2957e-05, 1.1909e-05, 6.7205e-06,\n 1.5802e-05, 9.8329e-06, 1.7682e-05, 1.7176e-05, 9.0433e-06, 4.5625e-06,\n 5.7166e-06, 1.2243e-05, 1.2261e-05, 2.1528e-05, 1.0152e-05, 1.0893e-05,\n 8.0847e-06, 7.5497e-06, 7.5011e-06, 7.0012e-06, 1.0218e-05, 1.5342e-05,\n 8.0806e-06, 8.1666e-06, 5.4114e-06, 6.7083e-06, 1.3288e-05, 1.7805e-05,\n 4.3851e-06, 1.0346e-05, 1.9240e-05, 1.9834e-05, 1.6138e-05, 1.3922e-05,\n 8.7037e-06, 2.0910e-05, 1.0706e-05, 1.2092e-05, 1.3492e-05, 1.3056e-05,\n 8.2256e-06, 1.0767e-05, 5.1066e-06, 1.4659e-05, 1.9804e-05, 1.3697e-05,\n 2.3512e-06, 1.1793e-05, 7.1833e-06, 5.2619e-06, 7.2303e-06, 1.6235e-05,\n 1.2783e-05, 6.2564e-06, 7.0408e-06, 1.8012e-05, 1.2430e-05, 1.1075e-05,\n 1.7894e-05, 7.0002e-06, 1.2981e-05, 1.4634e-05, 9.6133e-06, 1.2810e-05,\n 5.1005e-06, 1.4239e-05, 9.4345e-06, 6.3245e-06, 1.4647e-05, 1.5756e-05,\n 1.4363e-05, 1.2526e-05, 1.2622e-05, 1.5385e-05, 7.8207e-06, 2.5319e-05,\n 1.8576e-05, 1.5305e-05, 1.6750e-05, 7.0813e-06, 1.4466e-05, 1.1219e-05,\n 8.0591e-06, 1.4364e-05, 1.9003e-05, 8.7286e-06, 1.3350e-05, 1.5086e-05,\n 1.3898e-05, 2.0769e-05, 1.3239e-05, 1.8678e-05, 5.9915e-06, 7.5642e-06,\n 8.3068e-06, 1.7835e-05, 1.7316e-05, 9.9004e-06, 6.1023e-06, 1.2772e-05,\n 1.6195e-05, 1.2317e-05, 6.1222e-06, 7.3869e-06, 1.6656e-05, 1.9963e-05,\n 1.2682e-05, 7.4112e-06, 1.5283e-05, 1.4559e-05, 6.5226e-06, 5.2116e-06,\n 6.9370e-06, 1.6738e-05, 1.4700e-05, 1.4100e-05, 9.0684e-06, 1.6221e-05,\n 2.0399e-05, 1.2916e-05, 7.7904e-06, 3.8511e-06, 8.3096e-06, 1.4075e-05,\n 1.2774e-05, 1.2838e-05, 1.7093e-05, 1.3451e-05, 1.2882e-05, 1.6282e-05,\n 8.0400e-06, 1.8155e-05, 9.3186e-06, 1.1440e-05, 1.5846e-05, 7.6066e-06,\n 1.2617e-05, 1.5891e-05, 1.5162e-05, 5.8974e-06, 1.3081e-05, 1.2096e-05,\n 2.0480e-05, 1.3250e-05, 1.3899e-05, 1.4159e-05, 1.1451e-05, 7.0712e-06,\n 6.9724e-06, 6.0452e-06, 7.0141e-06, 7.1423e-06, 8.8317e-06, 8.1501e-06,\n 1.7888e-05, 1.9058e-05, 1.0782e-05, 1.0612e-05, 8.0427e-06, 1.3342e-05,\n 1.4329e-05, 3.7046e-06, 1.1943e-05, 1.4734e-05, 8.9544e-06, 7.2552e-06,\n 6.3135e-06, 7.6934e-06, 1.1804e-05, 6.1614e-06, 9.3594e-06, 6.5346e-06,\n 1.1194e-05, 1.0424e-05, 1.1265e-05, 1.5860e-05, 1.0056e-05, 1.4488e-05,\n 1.5611e-05, 6.0610e-06, 1.4012e-05, 1.1785e-05, 7.9360e-06, 2.7579e-06,\n 1.1681e-05, 1.0737e-05, 1.2565e-05, 1.2206e-05, 8.7502e-06, 5.8149e-06,\n 1.4607e-05, 1.4581e-05, 1.3677e-05, 5.9243e-06, 6.7009e-06, 7.8231e-06,\n 6.9118e-06, 2.0175e-05, 1.2138e-05, 1.4438e-05, 1.1595e-05, 6.5140e-06,\n 1.4981e-05, 1.7408e-05, 1.4209e-05, 1.2247e-05, 1.3982e-05, 6.8970e-06,\n 9.6828e-06, 1.6622e-05, 5.1451e-06, 1.4815e-05, 1.1565e-05, 1.0951e-05,\n 1.4014e-05, 1.5312e-05, 1.0610e-05, 8.0692e-06, 1.1511e-05, 1.6410e-05,\n 4.7069e-06, 1.2155e-05, 2.0322e-05, 1.0616e-05, 1.5710e-05, 5.4798e-06,\n 6.5667e-06, 7.4206e-06, 7.4074e-06, 8.9281e-06, 1.3620e-05, 1.2284e-05,\n 1.1057e-05, 1.3977e-05, 6.5920e-06, 1.7034e-05, 1.3910e-05, 7.2696e-06,\n 1.1712e-05, 8.9830e-06, 2.4473e-05, 1.4430e-05, 8.4232e-06, 1.4053e-05,\n 1.2430e-05, 1.8243e-05, 1.5477e-05, 1.0919e-05, 6.0899e-06, 1.6648e-05,\n 9.3445e-06, 1.5693e-05, 1.6944e-05, 1.1802e-05, 1.7297e-05, 1.5728e-06,\n 1.3679e-05, 1.5860e-05, 6.8224e-06, 1.6461e-05, 2.0571e-05, 8.9240e-06,\n 7.2581e-06, 1.0042e-05, 2.0191e-05, 1.7525e-06, 1.0481e-05, 8.2164e-06,\n 1.8298e-05, 7.7035e-06, 7.8060e-06, 7.4209e-06, 1.9479e-05, 1.1444e-05,\n 1.5409e-05, 1.2868e-05, 2.2819e-05, 1.2008e-05, 1.4917e-05, 1.5015e-05,\n 6.9146e-06, 1.1481e-05, 1.2958e-05, 1.1292e-05, 1.2513e-05, 1.2832e-05,\n 8.1600e-06, 1.1388e-05, 8.2566e-06, 1.3124e-05, 1.2009e-05, 1.1066e-05,\n 1.9609e-05, 1.2541e-05, 1.2131e-05, 1.5116e-05, 1.0082e-05, 1.0773e-05,\n 8.8707e-06, 1.4514e-05, 6.9133e-06, 1.5494e-05, 5.3709e-06, 1.2986e-05,\n 7.4710e-06, 2.9514e-05, 8.5088e-06, 1.2943e-05, 1.2404e-05, 1.2534e-05,\n 1.2643e-05, 9.8158e-06, 1.1219e-05, 1.3687e-05, 1.6834e-05, 1.1802e-05,\n 4.2445e-06, 1.7291e-05, 1.1429e-05, 1.4673e-05, 1.7020e-05, 6.8688e-06,\n 1.5635e-05, 1.6553e-05, 6.9944e-06, 1.4844e-05, 9.8727e-06, 1.6243e-05,\n 1.3328e-05, 9.0338e-06, 1.0431e-05, 1.1869e-05, 1.4533e-05, 9.6161e-06,\n 5.9369e-06, 1.9177e-05, 1.7807e-05, 1.4078e-05, 1.6580e-05, 5.5720e-06,\n 1.0929e-05, 1.0708e-05, 1.2030e-05, 1.8870e-05, 1.4297e-05, 1.3041e-05,\n 1.5149e-05, 1.4846e-05, 1.0483e-05, 9.8028e-06, 7.1599e-06, 4.4661e-06,\n 7.4326e-06, 6.9197e-06, 1.4483e-05, 1.0348e-05, 2.9571e-06, 6.4094e-06,\n 1.3834e-05, 1.1399e-05, 1.3931e-05, 1.5427e-05, 5.4113e-06, 1.2169e-05,\n 1.4006e-05, 6.5694e-06, 1.7645e-05, 1.3251e-05, 7.5953e-06, 6.6638e-06,\n 9.4411e-06, 1.0912e-05, 1.7464e-05, 1.8039e-05, 1.3073e-05, 1.1623e-05,\n 6.5197e-06, 1.5549e-05, 9.4015e-06, 5.6218e-06, 1.7605e-05, 1.3647e-05,\n 1.3742e-05, 1.3554e-05, 1.2784e-05, 6.8355e-06, 1.5621e-05, 1.4509e-05,\n 1.2817e-05, 1.1052e-05, 1.0459e-05, 8.3004e-06, 8.4860e-06, 1.1968e-05,\n 1.3490e-05, 8.7028e-06, 2.3231e-05, 1.0041e-05, 1.3470e-05, 5.5963e-06,\n 1.3359e-05, 1.6636e-05, 1.4108e-05, 1.8851e-05, 1.9517e-05, 8.7462e-06,\n 1.2004e-05, 1.3448e-05, 1.4783e-05, 1.5980e-05, 1.3830e-05, 1.6203e-05,\n 3.8467e-06, 7.4897e-06, 1.3583e-05, 1.1536e-05, 1.7536e-05, 1.7992e-05,\n 1.9293e-05, 7.0384e-06, 1.5968e-05, 1.7008e-05, 1.2156e-05, 1.4687e-05,\n 1.2902e-05, 7.9855e-06, 5.4303e-06, 1.5422e-05, 1.0397e-05, 9.5969e-06,\n 1.6300e-05, 5.6362e-06, 3.1398e-06, 1.6095e-05, 9.2264e-06, 5.2862e-06,\n 1.5024e-05, 1.3818e-05, 9.0787e-06, 1.4773e-05, 1.5596e-05, 1.1390e-05,\n 1.4012e-05, 1.3440e-05, 1.4954e-05, 6.3588e-06, 9.0750e-06, 1.4654e-05,\n 1.2943e-05, 1.3964e-05, 1.1763e-05, 6.9395e-06, 5.9657e-06, 1.3817e-05,\n 2.0902e-05, 1.3629e-05, 1.1465e-05, 2.0264e-05, 5.9803e-06, 1.2871e-05,\n 2.1107e-05, 1.5741e-05, 1.2864e-05, 7.4639e-06, 1.0287e-05, 1.2991e-05,\n 8.7493e-06, 1.6591e-05, 1.5566e-05, 1.1909e-05, 1.4306e-05, 1.8397e-05,\n 1.2028e-05, 6.1061e-06, 1.5451e-05, 8.5908e-06, 1.3165e-05, 1.6935e-05,\n 2.5684e-05, 2.3007e-05, 1.1900e-05, 6.3031e-06, 9.2003e-06, 1.5796e-05,\n 1.0882e-05, 5.9015e-06, 1.4773e-05, 2.0823e-05, 9.2456e-06, 8.5573e-06,\n 9.9292e-06, 8.1119e-06, 2.4336e-06, 1.3799e-05, 1.8123e-05, 8.3602e-06,\n 8.5886e-06, 9.2636e-06, 7.0546e-06, 1.0844e-05, 7.5572e-06, 9.8114e-06,\n 1.4855e-05, 1.2995e-05, 1.3255e-05, 3.9886e-06, 1.6290e-05, 1.3164e-05,\n 1.5341e-05, 5.3712e-06, 8.0414e-06, 8.0718e-06, 6.3563e-06, 1.5197e-05,\n 1.1872e-05, 1.5835e-05, 9.3038e-06, 1.4544e-05, 7.4984e-06, 1.4166e-05,\n 1.9148e-05, 1.5744e-05, 7.5501e-06, 1.5598e-05, 1.7157e-05, 8.3323e-06,\n 1.3265e-05, 2.1265e-05, 1.4518e-05, 5.5407e-06, 1.4219e-05, 1.3816e-05,\n 6.6737e-06, 1.4082e-05, 2.2280e-05, 1.3667e-05, 8.4599e-06, 1.0253e-05,\n 1.4788e-05, 1.4116e-05, 1.2820e-05, 8.9670e-06, 9.5077e-06, 6.3968e-06,\n 1.3059e-05, 6.2442e-06, 5.5687e-06, 7.5974e-06, 1.1604e-05, 1.4185e-05,\n 7.7196e-06, 8.8800e-06, 1.3570e-05, 1.0136e-05, 1.5859e-05, 1.5568e-05,\n 1.3061e-05, 1.1497e-05, 9.7984e-06, 1.4092e-05, 1.4219e-05, 1.3974e-05,\n 1.8653e-05, 1.0295e-05, 7.1287e-06, 2.1625e-05, 1.8097e-05, 1.5327e-05,\n 1.8256e-05, 1.1577e-05, 9.0554e-06, 1.7667e-05, 5.4255e-06, 1.5311e-05,\n 1.7257e-05, 1.1372e-05, 7.6842e-06, 1.1470e-05, 2.0169e-05, 6.0284e-06,\n 1.6083e-05, 1.3255e-05, 1.6123e-05, 1.3086e-05, 7.4360e-06, 1.6762e-05,\n 1.1061e-05, 1.6082e-05, 1.4405e-05, 1.5326e-05, 1.4299e-05, 1.1182e-05,\n 4.3846e-06, 9.5350e-06, 2.5652e-05, 1.4763e-05, 1.2758e-05, 1.3926e-05,\n 1.1033e-05, 1.1404e-05, 6.6769e-06, 6.0506e-06, 1.8239e-05, 1.4698e-05,\n 1.0288e-05, 5.8118e-06, 1.1975e-05, 1.5009e-05, 1.1455e-05, 1.0342e-05,\n 1.8307e-05, 1.3385e-05, 1.1578e-05, 7.5790e-06, 1.1631e-05, 1.1193e-05,\n 6.8614e-06, 8.3936e-06, 1.3616e-05, 8.9205e-06, 7.6681e-06, 1.3877e-05,\n 1.5237e-05, 1.1465e-05, 7.6650e-06, 1.2626e-05, 9.5564e-06, 1.4859e-05,\n 7.6275e-06, 8.9245e-06, 1.6516e-05, 1.2809e-05, 6.8117e-06, 2.3767e-05,\n 8.6083e-06, 8.7406e-06, 1.3960e-05, 5.8701e-06, 9.3552e-06, 5.7810e-06,\n 1.2477e-05, 1.5490e-05, 1.0277e-05, 7.6899e-06, 7.2831e-06, 1.4971e-05,\n 1.0659e-05, 2.1851e-05, 1.1036e-05, 9.7358e-06, 1.7251e-05, 8.5844e-06,\n 1.3852e-05, 9.2273e-06, 1.2973e-05, 1.2619e-05, 1.0401e-05, 5.6381e-06,\n 1.7228e-05, 7.8059e-06, 8.0680e-06, 1.2282e-05, 1.1478e-05, 6.4729e-06,\n 1.1411e-05, 1.1657e-05, 1.3757e-05, 1.0801e-05, 1.2479e-05, 5.8147e-20,\n 1.0241e-05, 1.9992e-05, 1.5662e-05, 1.0506e-05, 1.2961e-05, 1.4830e-05,\n 1.4559e-05, 7.0212e-06, 1.0488e-05, 1.0638e-05, 1.4064e-05, 1.2250e-05,\n 1.2899e-05, 1.2373e-05, 1.4576e-05, 1.6324e-05, 1.4943e-05, 1.3137e-05],\n device='cuda:0')" + }, + "2": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[ 1.2263e-06, 7.5129e-06, -1.8477e-09, ..., 2.6607e-06,\n -3.5730e-06, 1.4814e-06],\n [ 4.4245e-06, -1.0461e-05, -9.6432e-07, ..., -1.1214e-05,\n 2.3826e-06, 4.1668e-07],\n [ 2.9403e-06, -5.8026e-06, 1.1679e-06, ..., -7.9139e-06,\n -5.6718e-06, -1.3808e-06],\n ...,\n [-1.5232e-06, 6.1880e-07, -5.9085e-07, ..., -7.8607e-06,\n 4.4687e-06, -1.2803e-05],\n [-5.2860e-07, -6.8519e-07, -5.8831e-06, ..., -5.3692e-07,\n -7.6871e-06, -2.1340e-05],\n [ 2.5678e-06, 1.4279e-09, -7.0940e-07, ..., 1.0100e-06,\n 6.4774e-06, 2.3135e-05]], device='cuda:0')", + "exp_avg_sq": "tensor([[4.0098e-10, 1.0729e-09, 1.4895e-10, ..., 3.3293e-10, 1.3942e-10,\n 1.0282e-10],\n [5.7111e-10, 1.7239e-09, 1.0617e-09, ..., 2.8312e-08, 1.2201e-10,\n 3.2325e-11],\n [4.2947e-10, 1.0562e-09, 1.9531e-09, ..., 8.2781e-10, 2.2827e-10,\n 8.5661e-11],\n ...,\n [1.2291e-10, 3.1741e-10, 2.0929e-10, ..., 3.7059e-10, 2.7937e-10,\n 7.8237e-10],\n [2.6132e-10, 3.1039e-10, 1.0532e-09, ..., 1.2884e-10, 2.5617e-10,\n 1.2051e-08],\n [1.7752e-10, 6.5337e-11, 9.8271e-10, ..., 2.6311e-10, 1.0989e-09,\n 1.7886e-08]], device='cuda:0')" + }, + "3": { + "step": "tensor(30032.)", + "exp_avg": "tensor([-1.8377e-05, -1.5799e-05, -4.3638e-06, -2.7303e-06, 5.6052e-45,\n -2.6411e-05, 5.6052e-45, 1.1752e-05, 3.9213e-06, -3.0958e-05,\n 1.3499e-06, 4.3601e-06, -2.1172e-05, -1.5864e-05, -1.0512e-05,\n 2.6258e-05, 5.6052e-45, 1.4668e-05, 2.5929e-06, -1.1365e-05,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -8.1135e-07, -2.5470e-05,\n -6.8919e-06, 3.9070e-05, -1.5074e-05, 8.5762e-06, 7.6833e-07,\n -2.5919e-05, 5.6052e-45, -3.0531e-05, -5.4559e-05, 4.5204e-05,\n -2.1358e-05, -1.4149e-05, 5.6052e-45, -1.5528e-06, 8.6480e-06,\n 8.4709e-06, 5.6052e-45, 7.7461e-06, 5.6052e-45, 2.4471e-05,\n 3.9818e-06, -1.3346e-05, 1.1744e-05, 4.5475e-06, -2.9511e-05,\n 5.6052e-45, 1.7431e-05, 5.6052e-45, 1.3580e-05, 8.3451e-06,\n -2.0786e-06, -2.8636e-05, 5.6052e-45, -2.1546e-06, 1.0089e-05,\n -1.6244e-06, -6.7500e-06, -1.0056e-05, -1.8643e-05, -1.2147e-05,\n 1.7238e-05, 1.1402e-06, -1.8129e-05, 3.6356e-07, 5.0564e-06,\n 8.2186e-06, 1.7864e-05, 5.5935e-06, -4.5083e-06, 9.0673e-06,\n 4.6427e-05, 3.2497e-05, 5.6052e-45, 5.6052e-45, 4.7641e-05,\n -8.5699e-06, 1.9067e-06, 1.2314e-05, 2.6369e-05, -1.0677e-05,\n 8.1405e-06, -2.8178e-05, -8.2700e-06, -2.0527e-05, 5.6052e-45,\n -2.2894e-07, 7.7301e-07, 4.8632e-06, 3.6559e-06, -1.7399e-05,\n 5.6052e-45, -2.2859e-05, -7.8739e-06, 2.0295e-05, 1.1538e-05,\n 1.6602e-05, 6.0520e-06, -4.5833e-05, 3.4570e-05, -7.9477e-06,\n 5.0866e-05, 1.6526e-05, -5.8253e-06, 1.1815e-05, -3.8941e-05,\n -2.0174e-05, 2.1560e-05, -5.5638e-06, -1.0587e-05, 7.5060e-05,\n -4.9122e-06, -2.0937e-05, -5.4214e-06, 5.6052e-45, 5.9592e-06,\n 2.2897e-05, -1.7942e-05, -9.9517e-06, -3.0993e-05, 2.0384e-05,\n -1.8472e-05, -3.6956e-07, -1.8486e-05, 3.8493e-06, -2.0187e-05,\n -4.1029e-06, 5.6052e-45, 4.6031e-06, -2.7911e-05, 5.6052e-45,\n 2.2838e-05, 7.2184e-06, -3.2228e-05, 1.5698e-05, 1.6886e-05,\n 4.7634e-05, 2.6491e-05, 4.9852e-06, -8.7044e-06, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -2.0161e-05, 5.6052e-45, 2.8608e-05,\n -2.4143e-05, 4.5945e-05, -1.4097e-05, 5.6052e-45, -8.6297e-06,\n -1.1106e-06, 4.9659e-06, 4.0713e-06, 1.4831e-05, -1.6460e-06,\n -1.2357e-05, 1.5057e-05, -4.1634e-05, 3.2783e-05, -2.6352e-05,\n -1.7742e-05, 5.6052e-45, 5.6052e-45, -1.4218e-05, 5.6052e-45,\n -7.2005e-06, 3.7389e-05, 5.6052e-45, -1.7995e-05, 1.4929e-05,\n -2.0143e-05, 1.2445e-05, 5.6052e-45, -1.6601e-05, -2.2392e-06,\n 5.6052e-45, -1.0547e-05, -3.6199e-06, 1.8352e-05, 1.9860e-05,\n 1.2921e-05, 7.4497e-07, 5.6052e-45, 1.5144e-05, -1.3558e-05,\n 1.5674e-05, -7.7680e-06, -4.3861e-05, 5.6052e-45, -9.1045e-07,\n -4.7491e-05, -2.5843e-05, 2.2513e-05, 1.1780e-05, 5.6052e-45,\n 5.6052e-45, -6.2066e-06, -6.5765e-06, 1.6484e-07, -2.6169e-06,\n -5.6209e-06, 5.6052e-45, 5.6052e-45, -7.9735e-06, -2.9133e-05,\n -8.8470e-06, 3.4311e-05, 1.1872e-05, 3.9129e-05, -1.5314e-05,\n 3.4834e-05, 1.1805e-05, 1.7229e-05, 5.6052e-45, 5.6052e-45,\n -7.6232e-06, 6.7393e-07, 5.6052e-45, -1.8059e-05, 2.6588e-05,\n 1.2262e-06, -1.5807e-05, -1.5241e-05, -3.6687e-05, 2.6759e-05,\n 3.4378e-05, -1.1409e-05, 5.6052e-45, 3.6279e-05, 1.4916e-05,\n -2.1729e-05, 1.1890e-05, -1.4515e-05, 5.6052e-45, -5.2225e-05,\n 5.6052e-45, 1.5156e-05, -3.9536e-06, 2.6398e-05, 2.2367e-05,\n 5.6052e-45, -1.7621e-05, 2.8761e-05, 5.9195e-06, -3.1804e-05,\n 1.2797e-05, 8.1023e-06, -2.2358e-05, -5.6252e-06, 7.1020e-06,\n 1.1728e-05, -8.2458e-06, 5.6052e-45, 1.1238e-05, -9.7338e-06,\n 7.2094e-06, 3.1435e-05, -1.2139e-05, 1.0921e-05, -1.3672e-05,\n 6.2852e-06, -5.1464e-06, -7.4295e-06, 5.6052e-45, 2.0723e-05,\n 6.7670e-06, 5.6052e-45, -1.7085e-05, 6.3054e-06, 1.5859e-05,\n 1.5945e-05, -1.2630e-05, 4.3994e-05, -3.3297e-06, -4.1740e-05,\n 1.7758e-05, 3.5944e-06, 5.6052e-45, 5.6052e-45, -5.1496e-05,\n 3.0579e-05, 4.8990e-05, -2.5918e-06, -5.1054e-06, 2.7593e-05,\n 2.4863e-05, 1.6691e-05, 1.9403e-05, 7.8080e-06, -2.9160e-05,\n 5.6052e-45, 6.5220e-06, 1.0653e-05, -1.3712e-05, -2.4620e-05,\n -2.9998e-05, -1.0610e-05, -2.1986e-05, -1.8185e-05, 5.6052e-45,\n -2.3714e-05, 4.7229e-05, 5.6052e-45, -1.4511e-05, 1.5600e-05,\n 6.8063e-06, 2.3929e-05, 1.4547e-05, 4.0946e-06, -3.5810e-06,\n -6.0392e-06, -9.1309e-06, -1.7762e-05, 5.6052e-45, 1.9914e-05,\n 4.4799e-05, -8.2513e-06, 2.8359e-05, -1.8172e-05, 5.6052e-45,\n 1.8269e-05, -4.7434e-06, -1.2134e-05, -7.0960e-06, -3.1051e-05,\n -1.0977e-05, 2.9564e-06, -8.4571e-06, 2.4805e-05, -6.2117e-06,\n -1.0065e-05, 2.6353e-05, 5.6052e-45, 1.2509e-05, 1.3311e-05,\n -3.6147e-06, 2.3412e-05, -1.9191e-05, -4.9170e-05, 5.6052e-45,\n -7.2716e-05, -2.1518e-05, 8.7398e-06, 1.5316e-06, 5.6052e-45,\n 1.2381e-05, -2.8398e-05, 5.6052e-45, 5.6052e-45, 1.0468e-06,\n 1.1621e-05, 2.8780e-05, -5.2179e-05, 9.2898e-06, -2.1593e-05,\n -3.0256e-05, 5.6052e-45, -8.5768e-06, -3.4400e-05, 2.6995e-07,\n -6.0815e-07, -1.0067e-05, 1.2952e-05, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 3.0145e-05, -1.8930e-05, 5.6052e-45, 2.3519e-05,\n -2.0357e-05, 2.3638e-05, -4.3075e-05, 5.6051e-06, 2.7404e-05,\n 8.8246e-06, 5.6052e-45, -1.4035e-05, 5.6052e-45, -6.4291e-06,\n -6.3596e-07, 2.0620e-05, -1.1134e-05, -2.9758e-05, -2.1962e-05,\n 1.7360e-05, 2.9508e-05, -1.2188e-07, 5.6052e-45, 2.7295e-05,\n -1.6908e-05, 1.9987e-05, 1.2083e-05, 5.6052e-45, 5.6052e-45,\n 2.2810e-05, 1.0168e-05, 5.1090e-06, 2.6683e-06, -1.2901e-05,\n -7.8128e-06, 1.5200e-07, -5.1403e-05, 1.0092e-05, -1.2359e-05,\n -2.1434e-05, 1.6553e-05, -1.4782e-05, 5.6052e-45, 7.2295e-06,\n -3.8503e-05, 8.3334e-06, -4.0147e-06, -3.8935e-05, 2.5628e-07,\n -1.0170e-05, 4.0526e-05, -2.5181e-05, -2.8977e-05, 5.6052e-45,\n -6.4800e-06, -1.3132e-05, -1.4452e-05, 2.4807e-06, 2.3826e-05,\n -4.5530e-07, -1.4824e-06, -1.0222e-05, 5.6052e-45, 1.9450e-05,\n -4.0395e-05, -4.2515e-06, 1.8499e-05, 1.8040e-05, 9.3367e-06,\n 1.4009e-05, 1.1019e-05, 1.0481e-05, -3.7723e-05, 5.6052e-45,\n -7.9129e-06, -1.1157e-05, -1.0151e-05, 2.3459e-05, 1.0496e-05,\n 2.6805e-05, 5.6052e-45, 3.8552e-05, 1.2492e-05, -2.6858e-05,\n -1.0019e-06, 5.6052e-45, 1.0777e-06, 3.8013e-06, 2.0634e-05,\n -2.3012e-05, 2.4371e-05, 5.6052e-45, -2.6902e-07, 5.6052e-45,\n -3.8016e-05, 4.9304e-06, -5.6052e-45, 2.7440e-05, -1.0137e-05,\n -1.6375e-05, -8.0981e-06, -3.5183e-07, -3.5594e-06, -6.3603e-06,\n -1.7976e-05, 2.7907e-05, 1.3168e-05, 2.7116e-05, 5.6052e-45,\n -1.5109e-05, -4.5125e-06, 1.1046e-05, -1.3840e-05, 4.4802e-06,\n -1.9071e-05, -1.4020e-05, 4.1009e-06, -2.4928e-05, 2.2739e-05,\n -1.8781e-05, 5.6052e-45, -3.5622e-07, 3.2939e-05, 5.6052e-45,\n 1.3629e-05, -3.4001e-05, 5.6052e-45, 3.0306e-05, 4.2684e-06,\n 5.6052e-45, 2.5757e-05, -2.8230e-06, -4.5037e-06, -4.0895e-06,\n -2.0809e-05, -1.4039e-06, 5.6052e-45, -4.1374e-05, 1.3583e-05,\n -4.4237e-05, 1.9922e-05], device='cuda:0')", + "exp_avg_sq": "tensor([5.3571e-09, 1.2290e-08, 7.7800e-09, 7.6543e-09, 1.0792e-17, 4.8847e-09,\n 6.5712e-19, 5.5974e-09, 3.2494e-09, 7.6020e-09, 8.5873e-09, 5.7991e-09,\n 3.5553e-09, 9.1810e-09, 8.5735e-09, 7.2715e-09, 8.3922e-18, 8.5902e-09,\n 4.6434e-09, 4.8837e-09, 9.7399e-18, 1.1316e-19, 4.0609e-18, 7.4315e-09,\n 7.3593e-09, 5.4376e-09, 3.7578e-09, 7.9901e-09, 6.3819e-09, 5.7795e-09,\n 6.9770e-09, 1.1366e-17, 5.8704e-09, 9.9536e-09, 8.1679e-09, 4.6726e-09,\n 2.6150e-09, 1.9343e-18, 8.0201e-09, 6.9411e-09, 3.9345e-09, 4.4929e-21,\n 7.5936e-09, 7.0318e-19, 7.4987e-09, 6.1075e-09, 7.5094e-09, 4.1398e-09,\n 5.6307e-09, 6.2878e-09, 4.8299e-19, 6.5266e-09, 1.5280e-18, 7.8960e-09,\n 5.5350e-09, 5.4130e-09, 4.8033e-09, 3.3329e-22, 4.6277e-09, 6.2071e-09,\n 6.7645e-09, 5.3941e-09, 1.0372e-08, 7.3523e-09, 6.5577e-09, 4.0335e-09,\n 5.6011e-09, 4.6474e-09, 6.7814e-09, 7.4208e-09, 5.7222e-09, 5.8923e-09,\n 3.0405e-09, 9.4961e-09, 7.8700e-09, 9.6343e-09, 7.6575e-09, 4.8748e-23,\n 6.1499e-19, 5.8926e-09, 6.5258e-09, 6.7675e-09, 5.5365e-09, 4.7777e-09,\n 5.6718e-09, 5.2424e-09, 7.2990e-09, 5.9199e-09, 4.2699e-09, 1.2566e-18,\n 4.9464e-09, 4.3037e-09, 6.4529e-09, 7.8362e-09, 6.8610e-09, 8.5196e-20,\n 5.1301e-09, 4.6410e-09, 4.3429e-09, 4.6666e-09, 5.8456e-09, 3.2037e-09,\n 6.7976e-09, 6.6121e-09, 3.6815e-09, 6.0733e-09, 6.1978e-09, 6.1409e-09,\n 4.0688e-09, 7.3319e-09, 8.5501e-09, 7.8717e-09, 6.4049e-09, 4.3251e-09,\n 1.2471e-08, 4.9897e-09, 8.1524e-09, 5.6562e-09, 9.3332e-19, 4.8723e-09,\n 3.9000e-09, 7.0516e-09, 3.8599e-09, 5.8676e-09, 5.9227e-09, 2.6963e-09,\n 4.5460e-09, 8.3988e-09, 4.3485e-09, 6.3048e-09, 6.6859e-09, 2.9330e-20,\n 7.2500e-09, 7.0765e-09, 9.3345e-18, 6.2334e-09, 3.4276e-09, 6.6976e-09,\n 6.1383e-09, 6.5112e-09, 7.6289e-09, 4.9260e-09, 6.1340e-09, 3.8617e-09,\n 1.0632e-18, 5.2310e-18, 1.6280e-20, 4.8897e-09, 6.8893e-20, 6.6226e-09,\n 5.2906e-09, 7.8069e-09, 5.9628e-09, 3.0738e-18, 5.2643e-09, 3.6735e-09,\n 5.1907e-09, 3.5880e-09, 4.0152e-09, 5.1717e-09, 4.9759e-09, 5.4717e-09,\n 8.1932e-09, 4.8294e-09, 7.0244e-09, 4.5779e-09, 1.9664e-17, 2.0256e-19,\n 4.8663e-09, 1.2910e-18, 6.9885e-09, 4.8194e-09, 1.5902e-18, 5.7321e-09,\n 4.2833e-09, 4.8135e-09, 4.6739e-09, 8.8786e-18, 3.8431e-09, 4.4533e-09,\n 3.9394e-18, 6.6412e-09, 6.4990e-09, 7.4764e-09, 9.4470e-09, 9.8203e-09,\n 2.7671e-09, 2.9531e-19, 6.1857e-09, 4.9829e-09, 4.9001e-09, 6.7961e-09,\n 6.2896e-09, 5.2859e-18, 6.9588e-09, 8.8581e-09, 4.5423e-09, 6.0991e-09,\n 1.0926e-08, 7.8425e-18, 1.9812e-18, 5.8402e-09, 9.3616e-09, 6.6529e-09,\n 6.3531e-09, 4.4309e-09, 4.9095e-18, 8.2606e-19, 7.0113e-09, 5.7739e-09,\n 8.7003e-09, 5.7852e-09, 5.1423e-09, 6.8231e-09, 4.3997e-09, 5.1541e-09,\n 5.7706e-09, 6.6484e-09, 9.5232e-19, 1.4583e-17, 7.7235e-09, 4.1920e-09,\n 1.6466e-17, 5.9382e-09, 6.5582e-09, 4.2041e-09, 5.8426e-09, 7.4086e-09,\n 6.6394e-09, 6.0498e-09, 6.9287e-09, 6.0343e-09, 2.6907e-18, 6.5264e-09,\n 5.9515e-09, 7.1086e-09, 8.6207e-09, 5.6406e-09, 2.1685e-18, 7.3842e-09,\n 3.6179e-18, 5.8567e-09, 4.3670e-09, 4.1861e-09, 3.3038e-09, 7.4718e-18,\n 6.0260e-09, 9.3273e-09, 4.9648e-09, 4.4195e-09, 8.1198e-09, 7.9990e-09,\n 7.5967e-09, 6.6494e-09, 7.3951e-09, 9.8470e-09, 6.4946e-09, 1.9622e-18,\n 9.8061e-09, 4.9283e-09, 5.7718e-09, 8.2400e-09, 5.7933e-09, 4.6265e-09,\n 4.4264e-09, 7.1037e-09, 7.9795e-09, 4.9143e-09, 1.8148e-19, 6.7575e-09,\n 4.8096e-09, 2.6897e-20, 6.4839e-09, 3.6078e-09, 4.3570e-09, 9.1744e-09,\n 4.9748e-09, 8.3108e-09, 9.0195e-09, 1.2804e-08, 7.0299e-09, 5.1512e-09,\n 6.2269e-18, 6.7514e-20, 8.5090e-09, 7.0280e-09, 1.4058e-08, 9.6686e-09,\n 6.7424e-09, 7.3516e-09, 5.3590e-09, 7.5532e-09, 2.9816e-09, 7.7086e-09,\n 6.4948e-09, 2.3516e-18, 6.1269e-09, 5.6391e-09, 5.9915e-09, 8.7114e-09,\n 6.4154e-09, 7.3904e-09, 5.5079e-09, 7.0599e-09, 1.5923e-20, 5.5504e-09,\n 5.2872e-09, 3.7316e-19, 6.4589e-09, 4.6142e-09, 5.9330e-09, 9.6741e-09,\n 7.8818e-09, 5.6980e-09, 3.6649e-09, 5.3307e-09, 6.5251e-09, 2.5981e-09,\n 4.9747e-18, 1.1732e-08, 8.1408e-09, 6.1516e-09, 7.0705e-09, 7.2840e-09,\n 6.3345e-18, 7.5082e-09, 5.8798e-09, 5.9542e-09, 6.8258e-09, 6.6855e-09,\n 6.1598e-09, 2.7471e-09, 4.5218e-09, 4.6941e-09, 4.9400e-09, 5.7272e-09,\n 5.6096e-09, 6.4592e-19, 4.0462e-09, 6.6324e-09, 6.4055e-09, 5.2401e-09,\n 4.3688e-09, 1.0020e-08, 1.1243e-18, 5.8590e-09, 7.1982e-09, 4.8191e-09,\n 7.7684e-09, 8.0093e-19, 2.5718e-09, 1.0281e-08, 1.5533e-19, 3.9599e-19,\n 5.8398e-09, 9.2681e-09, 6.2778e-09, 5.9974e-09, 6.4435e-09, 7.7240e-09,\n 3.2933e-09, 4.9104e-19, 7.9758e-09, 6.3283e-09, 3.9061e-09, 4.5786e-09,\n 3.8672e-09, 6.9058e-09, 6.3261e-19, 4.2376e-18, 7.7881e-18, 5.6943e-09,\n 4.7610e-09, 3.2672e-18, 4.4598e-09, 7.1485e-09, 5.2416e-09, 6.4933e-09,\n 8.8063e-09, 7.0627e-09, 6.9176e-09, 6.8502e-20, 9.0081e-09, 6.3491e-18,\n 6.4278e-09, 5.2789e-09, 4.6732e-09, 4.9489e-09, 1.0021e-08, 8.1961e-09,\n 6.7842e-09, 5.1632e-09, 6.5560e-09, 8.8805e-19, 1.1476e-08, 6.2492e-09,\n 6.9569e-09, 5.8372e-09, 3.3447e-19, 8.3368e-18, 6.0958e-09, 4.7943e-09,\n 7.2124e-09, 5.7089e-09, 4.8672e-09, 5.7539e-09, 5.6247e-09, 7.6629e-09,\n 5.2327e-09, 5.9792e-09, 5.0897e-09, 5.5713e-09, 1.1865e-08, 2.3304e-17,\n 3.4368e-09, 7.3549e-09, 6.8083e-09, 3.8363e-09, 5.2040e-09, 5.7738e-09,\n 8.1349e-09, 3.8974e-09, 6.7337e-09, 3.3527e-09, 4.0494e-21, 7.6197e-09,\n 6.5346e-09, 3.1484e-09, 4.9274e-09, 6.1788e-09, 3.5128e-09, 5.8745e-09,\n 3.1546e-09, 8.9697e-19, 7.3117e-09, 7.0475e-09, 6.4410e-09, 6.3288e-09,\n 9.0605e-09, 9.8198e-09, 4.5137e-09, 8.1104e-09, 4.8983e-09, 7.1181e-09,\n 2.8662e-17, 4.4686e-09, 8.4722e-09, 8.2159e-09, 5.9937e-09, 5.3557e-09,\n 4.8091e-09, 1.3955e-18, 7.5777e-09, 7.1598e-09, 7.2002e-09, 4.8065e-09,\n 1.1284e-18, 7.9850e-09, 1.0533e-08, 3.8759e-09, 7.1418e-09, 6.1697e-09,\n 8.4455e-20, 5.2236e-09, 6.1410e-18, 6.1980e-09, 5.7044e-09, 2.6746e-18,\n 4.8753e-09, 9.1995e-09, 4.7000e-09, 3.1418e-09, 7.8173e-09, 5.7981e-09,\n 4.9109e-09, 3.6547e-09, 6.3667e-09, 8.2506e-09, 5.6169e-09, 3.0277e-18,\n 5.0249e-09, 5.4593e-09, 6.6284e-09, 5.6626e-09, 4.8938e-09, 6.4983e-09,\n 6.7456e-09, 6.0422e-09, 4.6965e-09, 6.9043e-09, 8.2109e-09, 2.2587e-17,\n 5.6775e-09, 5.1304e-09, 1.1627e-18, 7.7212e-09, 4.4234e-09, 4.0509e-18,\n 7.4382e-09, 4.5935e-09, 1.2792e-17, 9.5566e-09, 7.9412e-09, 1.0371e-08,\n 7.1611e-09, 5.5916e-09, 7.4115e-09, 7.3840e-19, 8.3096e-09, 8.1453e-09,\n 6.9732e-09, 7.6190e-09], device='cuda:0')" + }, + "4": { + "step": "tensor(30032.)", + "exp_avg": "tensor([[-1.6727e-06, -8.4867e-06, -1.2097e-06, ..., 2.7858e-06,\n -1.7058e-06, -4.6196e-06],\n [ 1.0202e-06, 1.2665e-05, -8.7681e-06, ..., 1.6087e-06,\n -2.2337e-07, 2.4791e-06],\n [ 2.6614e-06, -8.0979e-06, -2.4995e-07, ..., -2.4458e-06,\n 4.5711e-07, 4.7839e-07],\n ...,\n [ 2.1336e-06, -3.2299e-06, 3.7774e-06, ..., -5.4101e-06,\n 2.8661e-07, -2.0233e-06],\n [-2.1062e-06, -2.9647e-06, 8.7869e-08, ..., 4.1420e-06,\n -4.4431e-07, 3.6986e-06],\n [-4.4574e-06, 2.0421e-06, 9.0949e-06, ..., -6.9819e-06,\n -8.8445e-07, -2.3594e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.5314e-11, 4.1862e-10, 3.2556e-10, ..., 8.8326e-11, 3.1127e-11,\n 1.9112e-10],\n [1.0082e-10, 8.1232e-10, 3.4759e-10, ..., 1.9797e-10, 9.1283e-11,\n 1.6310e-10],\n [1.0638e-10, 1.0733e-09, 3.8496e-10, ..., 3.3782e-10, 4.7254e-11,\n 1.1323e-10],\n ...,\n [1.1600e-10, 1.4185e-09, 5.0940e-10, ..., 6.6151e-10, 1.5846e-10,\n 5.9474e-10],\n [1.0731e-10, 1.0599e-09, 7.3047e-10, ..., 5.4847e-10, 8.1353e-11,\n 2.5806e-10],\n [1.6292e-10, 8.8204e-10, 5.0571e-10, ..., 2.8522e-10, 9.4671e-11,\n 4.1044e-10]], device='cuda:0')" + }, + "5": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-1.1800e-05, -1.2332e-05, 5.1062e-05, ..., 1.5866e-06,\n 1.5571e-05, 1.0575e-04],\n [ 7.9496e-06, -2.7005e-05, 9.4272e-06, ..., -2.5355e-05,\n 6.5999e-06, 5.1856e-08],\n [ 6.2242e-07, -6.7566e-06, -3.4771e-05, ..., -8.5531e-07,\n -3.9625e-05, -3.2678e-07],\n ...,\n [ 7.8104e-06, -4.4141e-07, 1.5388e-06, ..., -2.1645e-06,\n 3.0149e-06, 3.6804e-06],\n [-5.8548e-05, -2.6332e-05, 2.0884e-05, ..., 1.3412e-05,\n -8.5254e-06, 5.4730e-07],\n [ 9.8122e-06, 1.9021e-05, -5.7476e-05, ..., -1.0351e-06,\n 1.1627e-06, 1.2207e-07]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.9413e-10, 4.5552e-09, 6.4620e-09, ..., 5.9710e-10, 6.8254e-09,\n 1.3179e-07],\n [2.9466e-09, 8.3770e-09, 9.6786e-09, ..., 2.6749e-08, 8.8587e-10,\n 9.3752e-11],\n [3.2893e-09, 5.6325e-09, 6.6328e-09, ..., 5.4098e-09, 8.2591e-09,\n 1.3334e-10],\n ...,\n [2.0557e-09, 1.2760e-09, 2.3268e-09, ..., 8.7918e-10, 1.7125e-09,\n 1.5263e-09],\n [9.4433e-09, 8.1332e-09, 1.6952e-08, ..., 2.6913e-09, 3.1980e-09,\n 7.9100e-11],\n [8.9857e-09, 4.0336e-08, 3.5247e-08, ..., 3.4742e-10, 1.6030e-09,\n 8.2132e-11]], device='cuda:0')" + }, + "6": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 3.5591e-05, 8.2962e-05, -5.7704e-05, ..., 3.6422e-05,\n 7.3672e-05, -5.3455e-05], device='cuda:0')", + "exp_avg_sq": "tensor([8.3090e-08, 1.0091e-07, 6.2620e-08, ..., 5.4792e-08, 6.1477e-08,\n 6.4376e-08], device='cuda:0')" + }, + "7": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 3.8131e-06, 5.4324e-06, -1.2096e-06, ..., -8.8930e-06,\n -2.0362e-06, 1.9055e-06],\n [ 7.3546e-06, -1.2127e-05, -1.5991e-07, ..., -1.5145e-05,\n -8.6575e-07, 7.1125e-07],\n [-1.5716e-06, 1.7965e-05, 3.1174e-06, ..., 1.2045e-06,\n -3.3890e-06, 3.2567e-06],\n ...,\n [ 9.0568e-06, 2.5449e-05, -5.0377e-06, ..., -8.1655e-08,\n 8.5506e-06, 5.1479e-06],\n [-4.6419e-06, -2.5953e-06, 2.6777e-06, ..., 2.1916e-06,\n 7.4886e-06, -3.0564e-06],\n [-2.8062e-06, -2.2754e-05, -1.2967e-08, ..., 2.0853e-06,\n 4.2127e-06, 3.4526e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.1083e-10, 3.9453e-10, 6.0642e-11, ..., 2.0860e-10, 2.7301e-10,\n 1.5129e-10],\n [3.4616e-10, 8.3799e-10, 2.8388e-10, ..., 3.8263e-10, 9.7671e-10,\n 4.1696e-10],\n [3.3432e-10, 9.7170e-10, 2.9903e-10, ..., 3.1454e-10, 9.2204e-10,\n 3.8833e-10],\n ...,\n [8.5856e-10, 1.1592e-09, 2.3206e-10, ..., 3.5289e-10, 8.1570e-10,\n 5.3645e-10],\n [4.7062e-10, 7.1373e-10, 2.1747e-10, ..., 3.0313e-10, 7.4191e-10,\n 2.9482e-10],\n [5.0762e-10, 8.6730e-10, 1.8494e-10, ..., 3.5282e-10, 6.5174e-10,\n 3.1555e-10]], device='cuda:0')" + }, + "14": { + "step": "tensor(22524.)", + "exp_avg": "tensor(-1.0023e-05, device='cuda:0')", + "exp_avg_sq": "tensor(5.7211e-07, device='cuda:0')" + }, + "15": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 8.8631e-16, -3.0500e-15, 2.5783e-16, ..., 9.4451e-17,\n -1.9159e-16, 2.2952e-16],\n [ 2.9983e-16, 4.1347e-17, -1.5009e-16, ..., 3.6417e-16,\n 1.4859e-18, -6.1633e-17],\n [-1.6734e-17, 3.1349e-16, -1.3017e-17, ..., -1.2097e-16,\n 1.0033e-16, 6.5181e-18],\n ...,\n [-2.5852e-16, 4.5405e-16, 1.0426e-16, ..., -2.0365e-16,\n 1.2058e-16, 8.8172e-17],\n [-1.1926e-16, -1.0562e-16, 3.7630e-17, ..., -1.7616e-16,\n 1.1623e-16, 1.5903e-16],\n [-5.1792e-17, -1.1546e-16, -1.6387e-16, ..., -5.3013e-17,\n -1.4899e-16, -2.2373e-16]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.4978e-22, 8.6989e-22, 3.9315e-24, ..., 2.5377e-23, 1.9023e-21,\n 5.2074e-22],\n [6.1173e-24, 2.8801e-23, 3.2737e-24, ..., 4.2132e-24, 2.3590e-23,\n 4.8145e-24],\n [4.1727e-25, 1.1624e-24, 7.5426e-25, ..., 2.0644e-24, 4.4322e-24,\n 8.6653e-24],\n ...,\n [1.0090e-21, 4.1748e-21, 1.1322e-22, ..., 2.0126e-22, 5.1255e-22,\n 3.4228e-21],\n [1.0606e-23, 3.9120e-23, 9.2083e-25, ..., 3.2976e-24, 3.1483e-23,\n 2.1847e-23],\n [8.5914e-23, 6.5809e-22, 2.5045e-24, ..., 4.6639e-23, 8.6621e-22,\n 3.2143e-22]], device='cuda:0')" + }, + "16": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.1771e-15, -1.5678e-16, -2.4875e-15, -2.0511e-14, 7.8711e-15,\n -1.0594e-15, 1.9159e-15, -4.6152e-15, -3.4124e-16, 3.0902e-15,\n 2.3009e-14, -3.3017e-14, 1.9752e-15, -9.7242e-16, 6.1227e-16,\n 1.1293e-15, 4.3766e-15, 2.8289e-16, 8.2481e-16, -9.7259e-15,\n 1.9454e-15, -3.0919e-14, -2.3151e-15, -2.9248e-14, -5.2354e-15,\n -2.6142e-16, 2.2410e-15, 9.9244e-15, 8.6411e-16, -1.2163e-15,\n 1.3348e-15, 5.7526e-15, 9.9338e-17, 4.4581e-16, -1.6736e-16,\n -6.6987e-16, -1.0179e-16, -1.9655e-15, 2.8743e-15, 3.8710e-14,\n -8.0116e-16, 3.0538e-14, -1.8000e-15, 4.9678e-15, -3.4327e-17,\n -2.2081e-16, 3.3844e-16, -2.2975e-16, -1.5866e-15, 2.4082e-14,\n -2.9255e-15, -2.4192e-15, -1.2828e-15, -1.0623e-14, -1.0105e-15,\n 2.4663e-15, -2.9208e-15, 2.2310e-15, -1.9686e-15, -1.0822e-14,\n 7.8161e-15, 4.2800e-16, 4.5691e-16, -1.4734e-16], device='cuda:0')", + "exp_avg_sq": "tensor([8.5746e-20, 3.0690e-21, 2.5398e-22, 5.2667e-19, 5.1146e-20, 3.5185e-21,\n 1.8655e-20, 1.1023e-21, 3.8676e-22, 8.2329e-21, 2.2687e-19, 2.8511e-19,\n 1.8501e-19, 6.4957e-21, 7.8970e-20, 6.3068e-19, 4.5126e-21, 1.1609e-19,\n 1.9859e-22, 4.3037e-21, 5.9199e-19, 4.8640e-19, 1.7719e-19, 1.3577e-19,\n 1.0202e-19, 1.0859e-22, 2.7782e-19, 2.0689e-19, 3.0978e-19, 1.8411e-19,\n 3.7329e-21, 5.5059e-19, 9.8833e-23, 1.0435e-20, 9.2316e-19, 1.3520e-19,\n 1.4973e-20, 2.7172e-19, 3.6992e-19, 8.8398e-19, 4.9776e-19, 3.1903e-20,\n 3.5178e-22, 8.7092e-20, 1.8760e-20, 7.9630e-21, 1.7838e-19, 3.3657e-20,\n 4.7383e-22, 5.0076e-19, 3.3741e-21, 3.4656e-20, 1.5750e-21, 7.5374e-20,\n 2.0821e-20, 3.6512e-19, 4.8522e-19, 4.3622e-20, 3.9426e-22, 5.8048e-20,\n 4.4178e-20, 3.8914e-19, 2.2818e-21, 6.2067e-20], device='cuda:0')" + }, + "17": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.5263e-14, -1.0401e-15, -3.5224e-16, 1.1999e-15, 4.2447e-14,\n 5.5429e-17, 3.9160e-15, 6.8486e-16, 1.7992e-16, 6.8413e-15,\n 9.4812e-14, -2.7951e-14, -1.4734e-15, 6.0181e-18, -2.2958e-16,\n -1.1561e-15, -1.7325e-16, -8.5384e-16, 3.9009e-16, 1.1690e-15,\n 6.3646e-14, -2.2432e-14, 4.3741e-16, -1.6947e-14, 2.1340e-14,\n 1.2707e-16, 5.8724e-14, 5.3942e-14, -5.9148e-16, -1.2657e-16,\n -2.3907e-15, 7.3457e-14, 2.3873e-16, 9.4459e-15, -3.7944e-16,\n -1.7053e-16, 3.9901e-15, 3.6897e-16, -2.0819e-15, 1.3810e-13,\n -1.7881e-16, 7.6375e-14, -2.3604e-16, 3.1547e-14, -1.9106e-16,\n -8.3103e-17, -6.6763e-16, -1.3158e-17, 7.4482e-17, 1.1716e-13,\n -3.4685e-15, 1.7724e-16, -2.6913e-17, 4.5705e-15, -4.0496e-17,\n -1.6491e-15, 1.1560e-15, 2.0958e-14, -6.6133e-17, 1.0129e-14,\n 3.3422e-14, -1.2054e-15, -1.7398e-16, -1.1955e-16], device='cuda:0')", + "exp_avg_sq": "tensor([5.9703e-22, 6.8190e-24, 3.2156e-23, 6.5867e-21, 4.2431e-22, 2.6976e-22,\n 1.3453e-22, 6.1362e-24, 2.8801e-23, 4.8852e-23, 2.0259e-21, 2.0450e-21,\n 2.3726e-21, 2.2432e-22, 2.7045e-21, 8.5617e-21, 2.0281e-23, 1.0440e-21,\n 4.1266e-23, 2.3487e-23, 7.6773e-21, 3.8787e-21, 3.2946e-21, 1.7174e-21,\n 9.5001e-22, 2.8171e-23, 2.5493e-21, 1.7305e-21, 7.2368e-21, 4.4673e-21,\n 7.9557e-24, 6.4102e-21, 6.9638e-24, 4.4951e-23, 1.1845e-20, 2.3767e-21,\n 7.7282e-23, 4.1932e-21, 3.7839e-21, 5.2788e-21, 5.9918e-21, 3.3976e-22,\n 3.0938e-24, 5.4827e-22, 2.2844e-22, 1.3394e-22, 1.8884e-21, 8.6454e-22,\n 1.7199e-23, 4.7109e-21, 1.2130e-23, 3.3099e-22, 9.6419e-23, 6.8888e-22,\n 6.0805e-22, 4.4282e-21, 4.9542e-21, 3.4740e-22, 2.7172e-23, 5.5555e-22,\n 3.6724e-22, 4.2520e-21, 6.8261e-23, 8.9268e-22], device='cuda:0')" + }, + "18": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 2.2217e-14, 6.6855e-15, 4.4615e-16, -3.0086e-15, 3.6579e-14,\n -3.8673e-17, 1.7396e-14, 5.5936e-15, -1.5315e-16, 1.8403e-14,\n 6.3498e-14, -1.6486e-14, 1.2335e-15, -2.6600e-18, 2.2656e-16,\n 1.2270e-15, 1.3573e-14, 3.6991e-16, -3.8410e-16, 1.4118e-15,\n 4.0108e-14, -1.3838e-14, -9.1799e-16, -1.1546e-14, 1.7795e-14,\n -2.0885e-16, 3.7079e-14, 4.0083e-14, 6.3484e-16, 1.5938e-16,\n 5.6582e-15, 4.2982e-14, -2.9791e-16, 1.7515e-14, 4.0524e-16,\n 1.2255e-16, 1.5174e-14, -4.7232e-16, 1.9893e-15, 8.9500e-14,\n 2.9137e-16, 6.5831e-14, 5.0969e-16, 3.1939e-14, 2.1029e-16,\n 9.5622e-17, 6.5324e-16, 2.6918e-17, -9.1966e-17, 7.2318e-14,\n 2.2376e-15, -4.0758e-16, 1.0014e-16, 7.4168e-15, 4.3983e-17,\n 1.6795e-15, -1.7824e-15, 2.6524e-14, 1.9908e-16, 6.6593e-15,\n 3.4440e-14, 8.9383e-16, 1.6873e-16, 1.3983e-16], device='cuda:0')", + "exp_avg_sq": "tensor([7.6787e-22, 3.4075e-23, 5.5108e-23, 6.1199e-21, 5.7015e-22, 3.0476e-22,\n 1.8128e-22, 1.0005e-23, 3.8945e-23, 8.8815e-23, 2.9978e-21, 3.5942e-21,\n 3.4003e-21, 2.9351e-22, 1.9113e-21, 1.0596e-20, 6.4772e-23, 1.9846e-21,\n 6.9076e-23, 6.5722e-23, 6.7654e-21, 6.6419e-21, 3.1087e-21, 2.2179e-21,\n 7.0948e-22, 5.6938e-23, 3.0125e-21, 2.3758e-21, 6.0464e-21, 3.7983e-21,\n 3.7317e-23, 6.5331e-21, 1.0504e-23, 7.3930e-23, 1.4662e-20, 2.5699e-21,\n 1.2230e-22, 4.9829e-21, 6.2512e-21, 1.1348e-20, 8.2248e-21, 4.8717e-22,\n 6.9424e-24, 7.6291e-22, 4.2224e-22, 2.7030e-22, 3.1253e-21, 9.3345e-22,\n 3.6890e-23, 6.8505e-21, 4.1714e-23, 7.1942e-22, 1.4479e-22, 7.3724e-22,\n 6.3985e-22, 5.7672e-21, 6.7251e-21, 4.1813e-22, 4.6116e-23, 6.8726e-22,\n 4.7694e-22, 5.8935e-21, 8.4052e-23, 1.3451e-21], device='cuda:0')" + }, + "19": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 1.0238e-13, -6.0074e-15, -3.4930e-14, 1.8848e-14, 2.5501e-13,\n -3.3224e-14, 4.6874e-14, 1.1019e-14, -3.0329e-14, 7.0786e-14,\n 3.6593e-13, -6.1102e-14, -2.5734e-14, -2.8473e-14, -2.8067e-14,\n -2.6772e-14, 1.2635e-14, -2.6294e-14, -2.5353e-14, 8.4470e-15,\n 2.6555e-13, -6.1649e-14, -3.1561e-14, -3.2392e-14, 1.7845e-13,\n -3.7338e-14, 3.1089e-13, 2.5269e-13, -3.1993e-14, -2.7977e-14,\n -2.2338e-14, 3.2094e-13, -2.6677e-14, 1.0917e-13, -3.1834e-14,\n -2.8907e-14, 5.5706e-14, -2.8499e-14, -2.7783e-14, 3.8491e-13,\n -2.2615e-14, 2.9871e-13, -2.8220e-14, 1.9536e-13, -3.3193e-14,\n -3.2140e-14, -3.0119e-14, -2.8776e-14, -2.5127e-14, 3.3308e-13,\n -3.4167e-14, -3.1153e-14, -3.1590e-14, 3.0268e-14, -3.2259e-14,\n -2.7052e-14, -3.0861e-14, 1.5700e-13, -3.5326e-14, 6.4918e-14,\n 2.0897e-13, -3.0307e-14, -3.0322e-14, -1.9269e-14],\n [-8.5243e-14, 7.2490e-15, 3.2921e-14, -5.5306e-15, -2.3827e-13,\n 3.1537e-14, -4.0550e-14, -1.2523e-14, 2.7651e-14, -6.1817e-14,\n -3.4277e-13, 7.9606e-14, 2.3835e-14, 2.6411e-14, 2.5254e-14,\n 2.5607e-14, -1.0655e-14, 2.3700e-14, 2.3264e-14, -4.6816e-15,\n -2.5376e-13, 7.4005e-14, 2.9677e-14, 5.2370e-14, -1.6720e-13,\n 3.5073e-14, -2.9440e-13, -2.3672e-13, 2.9655e-14, 2.6571e-14,\n 2.3585e-14, -2.9572e-13, 2.5064e-14, -1.0366e-13, 2.9500e-14,\n 2.7744e-14, -4.7859e-14, 2.6177e-14, 2.5816e-14, -3.6281e-13,\n 2.1380e-14, -2.8937e-13, 2.6116e-14, -1.8099e-13, 3.1412e-14,\n 3.0263e-14, 2.7945e-14, 2.6924e-14, 2.2459e-14, -3.1831e-13,\n 3.5335e-14, 2.9287e-14, 2.9833e-14, -2.4546e-14, 3.1081e-14,\n 2.6097e-14, 2.9632e-14, -1.4392e-13, 3.3205e-14, -5.4757e-14,\n -1.9374e-13, 2.8573e-14, 2.8555e-14, 1.7255e-14]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3211e-19, 2.8628e-20, 5.9986e-20, 1.5423e-19, 4.5852e-20, 1.9747e-19,\n 5.9928e-20, 5.2531e-20, 7.0353e-20, 2.3230e-20, 3.3111e-20, 5.8082e-21,\n 6.6091e-20, 4.8390e-20, 3.8287e-19, 9.2932e-20, 1.1881e-20, 3.3151e-20,\n 3.7975e-20, 1.6922e-20, 2.0359e-19, 3.2008e-20, 8.6078e-20, 5.1406e-21,\n 4.3396e-19, 2.7569e-20, 1.3784e-19, 6.9267e-20, 4.1853e-19, 3.1080e-19,\n 1.9188e-20, 1.6315e-19, 7.4601e-21, 6.4457e-20, 1.8739e-19, 1.3338e-19,\n 4.5736e-20, 1.6476e-19, 7.9268e-20, 3.4150e-20, 8.4494e-20, 7.5735e-21,\n 5.2440e-21, 6.6286e-20, 1.5973e-20, 1.4078e-20, 4.4251e-20, 1.5524e-19,\n 4.3930e-21, 2.5639e-20, 1.8212e-20, 1.3934e-20, 6.0696e-20, 1.2195e-19,\n 1.7056e-19, 8.5782e-20, 3.7051e-20, 6.6604e-20, 4.3754e-20, 5.2088e-20,\n 3.7933e-20, 3.3802e-20, 5.4448e-21, 6.8954e-20],\n [1.3211e-19, 2.8628e-20, 5.9986e-20, 1.5424e-19, 4.5853e-20, 1.9747e-19,\n 5.9928e-20, 5.2531e-20, 7.0353e-20, 2.3230e-20, 3.3114e-20, 5.8115e-21,\n 6.6091e-20, 4.8390e-20, 3.8287e-19, 9.2932e-20, 1.1881e-20, 3.3151e-20,\n 3.7975e-20, 1.6922e-20, 2.0359e-19, 3.2012e-20, 8.6078e-20, 5.1443e-21,\n 4.3397e-19, 2.7569e-20, 1.3784e-19, 6.9269e-20, 4.1853e-19, 3.1080e-19,\n 1.9188e-20, 1.6316e-19, 7.4601e-21, 6.4457e-20, 1.8739e-19, 1.3338e-19,\n 4.5737e-20, 1.6476e-19, 7.9268e-20, 3.4153e-20, 8.4494e-20, 7.5760e-21,\n 5.2440e-21, 6.6289e-20, 1.5973e-20, 1.4079e-20, 4.4251e-20, 1.5524e-19,\n 4.3930e-21, 2.5641e-20, 1.8212e-20, 1.3934e-20, 6.0696e-20, 1.2195e-19,\n 1.7056e-19, 8.5782e-20, 3.7051e-20, 6.6605e-20, 4.3754e-20, 5.2090e-20,\n 3.7934e-20, 3.3802e-20, 5.4449e-21, 6.8954e-20]], device='cuda:0')" + }, + "20": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.6271e-13, -1.5077e-13], device='cuda:0')", + "exp_avg_sq": "tensor([7.8425e-19, 7.8425e-19], device='cuda:0')" + }, + "21": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-2.9872e-16, 1.3878e-16, -5.8555e-16, ..., 5.6685e-16,\n -1.1162e-15, -6.2583e-16],\n [ 5.6821e-16, 9.1665e-16, 5.2298e-16, ..., 1.5489e-16,\n -3.5465e-16, -7.6703e-17],\n [-4.0186e-16, -3.9447e-16, -1.1785e-16, ..., 2.3337e-16,\n -2.5557e-16, -4.2485e-16],\n ...,\n [ 2.7777e-15, 9.5709e-16, 3.1989e-16, ..., -7.5430e-16,\n 6.4442e-16, 9.5902e-16],\n [ 4.3139e-15, -1.5018e-15, -2.3025e-15, ..., -4.5432e-16,\n 4.5302e-15, 7.6654e-15],\n [ 1.1009e-15, 1.4039e-16, -2.5666e-16, ..., -6.6990e-16,\n 7.4369e-16, 1.3595e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.8482e-23, 3.9289e-22, 4.2232e-23, ..., 1.6642e-22, 2.7776e-22,\n 9.5333e-22],\n [3.6053e-24, 1.5257e-23, 1.7326e-23, ..., 2.2380e-23, 6.4122e-23,\n 1.7843e-22],\n [3.0921e-22, 2.3947e-22, 2.4384e-22, ..., 9.5578e-22, 4.0967e-22,\n 1.1355e-21],\n ...,\n [6.4737e-24, 5.9606e-23, 2.5521e-23, ..., 1.4147e-22, 4.6393e-22,\n 4.5612e-22],\n [8.4291e-24, 2.6167e-23, 4.3144e-23, ..., 6.8762e-24, 8.6071e-23,\n 1.8931e-23],\n [1.5544e-23, 5.0275e-23, 6.0638e-24, ..., 6.5685e-24, 2.9624e-24,\n 4.9959e-23]], device='cuda:0')" + }, + "22": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-2.4696e-15, -1.1334e-16, 9.3767e-16, -1.4465e-15, 1.0210e-15,\n 2.1311e-15, 3.6259e-16, 1.7281e-15, 7.2159e-15, -7.3386e-15,\n -3.6507e-17, 2.3762e-14, -5.9596e-15, -7.9353e-16, 4.6180e-16,\n -2.0535e-14, 1.1049e-15, 1.1685e-15, 3.2337e-16, 3.1880e-15,\n 3.3388e-15, 3.0807e-15, -7.4163e-16, 5.0859e-15, -2.9533e-15,\n -7.5427e-17, -2.4300e-14, 1.6884e-15, 1.5833e-14, -2.0762e-15,\n -1.0307e-15, 2.4014e-15, -7.2955e-15, 4.5782e-16, -3.2752e-16,\n 4.6269e-16, -1.2465e-14, -1.6267e-16, -4.2075e-15, 7.2908e-16,\n -1.5306e-14, 4.3612e-15, 1.6447e-15, 5.7260e-15, 1.5003e-15,\n 1.1176e-14, 2.6897e-16, -7.2853e-16, -2.2605e-15, -2.0848e-15,\n 3.1633e-15, 2.0996e-17, -2.7873e-15, 7.5511e-15, -1.8303e-14,\n 3.5699e-15, 8.8576e-15, -1.2376e-15, -1.2949e-16, -1.1473e-16,\n -1.4496e-15, 3.8634e-15, 7.6984e-15, 2.8459e-15], device='cuda:0')", + "exp_avg_sq": "tensor([6.2740e-21, 8.1498e-21, 3.1633e-20, 1.1430e-19, 7.1825e-20, 3.3778e-20,\n 1.2182e-20, 1.5830e-20, 1.5082e-20, 2.2367e-20, 1.6117e-20, 1.1890e-20,\n 3.9259e-20, 5.5449e-21, 9.9749e-20, 4.2886e-21, 2.5726e-20, 2.7545e-20,\n 2.0748e-21, 1.4383e-20, 2.7424e-20, 1.7368e-19, 7.6275e-20, 3.1535e-20,\n 2.8769e-20, 5.9045e-21, 1.6425e-20, 1.4589e-19, 1.5143e-20, 6.4583e-21,\n 2.4266e-20, 1.3560e-20, 3.0318e-20, 4.3869e-20, 5.9862e-20, 6.3322e-20,\n 5.9356e-21, 3.5729e-19, 3.0135e-21, 4.8827e-20, 2.0362e-20, 3.5564e-20,\n 1.8547e-20, 1.7668e-20, 1.0110e-20, 2.1504e-20, 9.9429e-21, 3.4166e-21,\n 2.0147e-20, 6.7842e-21, 1.8361e-20, 5.8467e-21, 1.2651e-19, 3.7498e-20,\n 3.6690e-20, 1.4035e-20, 4.5470e-20, 2.0864e-21, 1.3211e-20, 2.3895e-20,\n 3.7646e-20, 4.9249e-20, 9.7164e-21, 1.0039e-21], device='cuda:0')" + }, + "23": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.2071e-15, 5.8272e-16, -3.4569e-16, 1.0512e-15, 2.7626e-17,\n -1.8817e-15, 1.3160e-17, 7.6069e-16, 1.1704e-14, 5.8494e-15,\n 5.9876e-15, 5.1368e-14, -3.0947e-15, 9.7159e-16, 3.4103e-16,\n -3.9661e-14, -5.8716e-16, -5.8987e-16, -1.3487e-16, -1.3967e-15,\n -9.0416e-16, -1.7195e-15, 4.8765e-17, -5.4112e-15, 1.0508e-15,\n 7.0577e-16, -5.2708e-14, -8.0968e-17, 3.2757e-14, 5.3178e-16,\n 1.2828e-15, -1.5977e-15, -4.1367e-15, 5.5490e-16, 1.8524e-15,\n 5.0125e-16, -2.6741e-14, -4.7942e-17, 3.0182e-15, 1.4767e-15,\n -5.0235e-14, 7.8198e-17, 3.0708e-17, 3.5110e-15, -1.1337e-15,\n 4.2944e-14, -2.7215e-16, 1.4857e-15, 4.2308e-16, -8.0354e-15,\n -2.1568e-15, -1.1238e-16, 6.8732e-16, 1.4316e-15, -4.4375e-14,\n -8.6980e-16, 1.0660e-14, 5.5105e-16, -2.2032e-15, 2.9425e-16,\n 2.3865e-15, -1.3188e-15, 1.8112e-14, -1.2789e-15], device='cuda:0')", + "exp_avg_sq": "tensor([6.2970e-22, 1.8741e-23, 1.3329e-21, 2.8573e-21, 3.3324e-21, 6.5067e-22,\n 6.6076e-22, 1.4421e-21, 5.1367e-21, 2.2389e-21, 8.9506e-22, 1.3241e-20,\n 3.8799e-21, 5.3683e-22, 7.4001e-21, 1.2906e-21, 3.7770e-22, 2.8466e-21,\n 9.2598e-23, 1.5577e-22, 3.3289e-21, 3.7461e-21, 1.3756e-21, 2.5933e-20,\n 1.0970e-21, 9.0481e-23, 1.1775e-20, 6.4852e-21, 1.0731e-20, 4.6135e-22,\n 4.7658e-22, 1.6142e-22, 2.7037e-21, 5.2392e-22, 1.4360e-21, 1.0214e-21,\n 1.3743e-21, 9.9071e-21, 5.8036e-23, 1.1669e-21, 2.2360e-20, 1.3940e-21,\n 2.3906e-21, 9.3147e-21, 3.4179e-22, 1.0162e-20, 5.6893e-22, 1.5962e-22,\n 1.5545e-21, 1.3618e-21, 1.0520e-22, 3.0516e-22, 8.3377e-21, 3.7900e-21,\n 6.5155e-21, 2.7377e-22, 3.7673e-21, 9.3390e-23, 2.0122e-22, 6.6986e-22,\n 9.5582e-22, 7.3767e-22, 3.4328e-21, 5.4538e-23], device='cuda:0')" + }, + "24": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.9131e-15, -7.3834e-16, 5.7895e-16, -7.9597e-16, 1.8118e-16,\n 1.9212e-15, -4.0627e-16, -9.5984e-16, 3.5523e-15, -6.3913e-15,\n 3.5557e-15, 2.7556e-14, -6.4301e-15, -6.0276e-16, -2.0179e-15,\n -2.7054e-14, 5.2605e-16, 7.1820e-16, 8.0489e-16, 1.7736e-15,\n 1.4565e-15, 2.5452e-15, 6.8751e-17, -3.5652e-15, -1.8110e-15,\n -8.1269e-16, -3.4743e-14, -4.0806e-16, 2.0510e-14, -1.2162e-15,\n -6.8909e-16, 1.8419e-15, -8.4515e-15, -2.9704e-16, -1.7434e-15,\n -1.5601e-16, -2.3849e-14, 1.2023e-16, -4.3711e-15, -6.3804e-16,\n -2.5437e-14, -9.8014e-16, -1.0711e-17, -2.8884e-16, 2.7122e-16,\n 2.3136e-14, -9.3932e-16, -1.7047e-15, -7.7998e-16, 2.0895e-15,\n 3.1536e-15, 1.4503e-16, -3.4226e-15, 9.3705e-15, -3.3383e-14,\n 1.4255e-15, 1.5440e-14, -4.7432e-16, -1.6583e-15, -4.4522e-17,\n -4.5100e-15, 2.5048e-15, 1.0296e-14, 1.5840e-15], device='cuda:0')", + "exp_avg_sq": "tensor([1.1456e-21, 7.6668e-24, 2.2679e-21, 3.1215e-21, 4.7098e-21, 7.1089e-22,\n 1.6545e-21, 2.5126e-21, 4.3212e-21, 4.0594e-21, 1.4408e-21, 6.0232e-21,\n 2.0901e-21, 5.6552e-22, 8.4526e-21, 2.0966e-21, 4.0605e-22, 2.8832e-21,\n 3.8801e-22, 1.8870e-22, 3.7877e-21, 3.1490e-21, 3.9445e-21, 8.5688e-21,\n 2.1261e-21, 3.8255e-22, 4.8768e-21, 7.5998e-21, 4.4483e-21, 8.8050e-22,\n 1.0512e-21, 5.6481e-22, 3.8852e-21, 6.7108e-22, 9.6908e-22, 1.5148e-21,\n 2.4128e-21, 1.4178e-20, 7.3294e-23, 1.3985e-21, 7.5625e-21, 1.4264e-21,\n 2.6083e-21, 3.7437e-21, 1.3275e-21, 6.1013e-21, 1.5188e-21, 5.8776e-22,\n 2.2682e-21, 2.8889e-21, 2.6027e-22, 3.5833e-22, 8.7640e-21, 2.4649e-21,\n 2.6145e-21, 2.0275e-22, 2.0512e-21, 2.0315e-22, 9.5685e-22, 1.0688e-21,\n 1.0095e-21, 9.6369e-22, 2.7614e-21, 6.4320e-23], device='cuda:0')" + }, + "25": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-4.3769e-15, 1.1174e-14, -1.8450e-14, -9.8389e-15, -1.5082e-14,\n -4.4355e-15, -2.0800e-15, 3.8372e-15, 7.5015e-14, 3.3178e-14,\n 3.4623e-14, 2.3911e-13, -8.2357e-15, -2.0152e-15, 4.8297e-15,\n -2.0758e-13, -2.0480e-15, 1.2145e-15, -1.2456e-14, -2.2003e-14,\n -1.4721e-14, 4.6042e-15, -3.2846e-15, -1.9366e-14, -6.6796e-15,\n -4.7991e-15, -2.6717e-13, -6.4655e-15, 1.8533e-13, -1.1806e-14,\n -1.0932e-14, -1.8053e-14, 2.5592e-15, -1.3179e-14, 1.1461e-14,\n 1.1502e-14, -1.3915e-13, -2.7981e-16, -1.6962e-14, 7.3612e-15,\n -2.0009e-13, -2.6016e-14, -3.3168e-15, 3.2814e-14, -3.1410e-15,\n 2.3716e-13, -9.8921e-16, 3.6801e-15, 5.9552e-15, -4.0860e-14,\n 6.0291e-16, -1.0827e-14, 1.5325e-14, 1.1665e-14, -2.7710e-13,\n 9.1936e-15, 7.1794e-14, -1.1360e-14, -1.1902e-14, 8.4902e-16,\n 1.0244e-14, -1.6203e-14, 1.0602e-13, -8.4785e-15],\n [ 1.6030e-15, -1.1612e-14, 1.6185e-14, 9.0877e-15, 1.4013e-14,\n 3.8817e-15, 2.8943e-16, 7.9440e-16, -5.5204e-14, -2.4394e-14,\n -3.5571e-14, -2.2574e-13, 2.0105e-14, 2.2355e-16, -3.8247e-15,\n 2.1961e-13, 5.5683e-16, -3.0713e-15, 1.0953e-14, 2.0525e-14,\n 1.3969e-14, -6.7080e-15, 2.0605e-15, 4.3493e-14, 5.5165e-15,\n 3.4838e-15, 2.6984e-13, 4.8570e-15, -1.6566e-13, 9.9921e-15,\n 1.0143e-14, 1.5209e-14, 2.0797e-14, 1.1513e-14, -1.1159e-14,\n -1.3220e-14, 1.4722e-13, -1.6536e-15, 1.5947e-14, -8.4875e-15,\n 2.3646e-13, 2.5103e-14, 2.0663e-15, -1.5168e-14, 7.7332e-15,\n -2.1225e-13, -4.9801e-16, -4.6836e-15, -7.3622e-15, 5.7317e-14,\n -2.4085e-15, 9.6312e-15, -1.6540e-14, 3.6286e-15, 2.9155e-13,\n -1.0177e-14, -6.2479e-14, 9.8529e-15, 1.3653e-14, -1.2627e-15,\n -1.0713e-14, 1.5814e-14, -9.0658e-14, 8.1793e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.1018e-20, 4.2283e-21, 1.1623e-19, 1.1413e-19, 2.3821e-19, 6.1464e-20,\n 6.7571e-20, 2.0219e-19, 1.3150e-18, 3.3214e-19, 8.1236e-19, 2.0529e-18,\n 3.2678e-18, 1.4133e-19, 3.2977e-19, 4.7519e-19, 8.5871e-20, 2.7293e-19,\n 5.1076e-20, 8.8882e-20, 3.0826e-19, 1.0510e-19, 7.2167e-20, 3.3830e-18,\n 1.2666e-19, 4.8723e-20, 3.1016e-18, 2.6789e-19, 3.2215e-18, 1.0553e-19,\n 7.9694e-20, 5.8899e-20, 7.6915e-19, 6.5075e-20, 2.6400e-18, 9.6184e-20,\n 5.0702e-19, 1.9299e-19, 4.0034e-20, 6.9669e-20, 3.1042e-18, 3.0916e-19,\n 2.3755e-19, 3.4879e-18, 4.3779e-19, 1.9174e-18, 6.0727e-20, 4.7008e-20,\n 2.0623e-19, 2.5982e-19, 4.9274e-20, 1.9989e-19, 2.3089e-19, 2.5442e-18,\n 4.8781e-18, 1.2215e-19, 3.5967e-18, 2.8196e-20, 1.1506e-19, 1.2710e-19,\n 1.4588e-18, 3.8127e-20, 1.4147e-18, 7.3636e-20],\n [7.1018e-20, 4.2283e-21, 1.1623e-19, 1.1413e-19, 2.3821e-19, 6.1464e-20,\n 6.7571e-20, 2.0219e-19, 1.3150e-18, 3.3214e-19, 8.1236e-19, 2.0529e-18,\n 3.2678e-18, 1.4133e-19, 3.2977e-19, 4.7519e-19, 8.5871e-20, 2.7292e-19,\n 5.1076e-20, 8.8881e-20, 3.0826e-19, 1.0510e-19, 7.2167e-20, 3.3830e-18,\n 1.2666e-19, 4.8723e-20, 3.1016e-18, 2.6789e-19, 3.2215e-18, 1.0553e-19,\n 7.9694e-20, 5.8899e-20, 7.6915e-19, 6.5075e-20, 2.6400e-18, 9.6184e-20,\n 5.0702e-19, 1.9299e-19, 4.0034e-20, 6.9669e-20, 3.1042e-18, 3.0916e-19,\n 2.3755e-19, 3.4879e-18, 4.3779e-19, 1.9174e-18, 6.0727e-20, 4.7008e-20,\n 2.0623e-19, 2.5982e-19, 4.9274e-20, 1.9989e-19, 2.3089e-19, 2.5442e-18,\n 4.8781e-18, 1.2215e-19, 3.5967e-18, 2.8196e-20, 1.1506e-19, 1.2710e-19,\n 1.4588e-18, 3.8127e-20, 1.4147e-18, 7.3636e-20]], device='cuda:0')" + }, + "26": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 2.3594e-14, -1.5205e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.7517e-18, 1.7517e-18], device='cuda:0')" + }, + "27": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 7.2347e-21, -7.4777e-21, 4.5251e-21, ..., 1.5915e-20,\n -1.0425e-20, -4.9967e-21],\n [ 4.1363e-20, -1.9432e-20, -5.7130e-20, ..., 5.0639e-20,\n 1.2878e-19, 9.2883e-21],\n [ 2.2669e-20, -1.5782e-20, -4.7383e-20, ..., 8.7184e-21,\n -3.5994e-20, -1.5225e-19],\n ...,\n [-2.5186e-20, 1.9455e-21, 1.0570e-20, ..., -8.4657e-21,\n 2.3650e-20, 3.3166e-20],\n [-1.2624e-19, 5.3965e-20, -1.3189e-20, ..., -8.0822e-20,\n 6.8716e-20, 5.4343e-20],\n [ 3.5318e-20, -3.1775e-20, 4.4737e-21, ..., 6.2223e-21,\n -2.5697e-20, -2.7747e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4773e-24, 7.0076e-24, 2.0141e-24, ..., 2.2178e-24, 6.9690e-24,\n 9.1193e-24],\n [6.6769e-23, 1.2502e-22, 2.5013e-23, ..., 2.1029e-23, 3.3186e-23,\n 4.7866e-23],\n [1.1249e-23, 4.7503e-23, 1.2888e-23, ..., 2.7688e-23, 1.3658e-23,\n 3.6425e-23],\n ...,\n [1.1355e-25, 1.1967e-25, 3.3212e-27, ..., 1.9952e-25, 1.0233e-25,\n 2.1543e-26],\n [8.5614e-29, 1.7842e-28, 4.3678e-25, ..., 4.6082e-24, 7.6743e-25,\n 6.7035e-24],\n [2.6488e-22, 2.6548e-22, 5.4878e-23, ..., 3.8653e-23, 9.7648e-23,\n 6.8004e-22]], device='cuda:0')" + }, + "28": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.0689e-20, 9.6049e-19, -3.8056e-19, 3.9971e-19, 1.6782e-19,\n 8.7946e-20, 1.4433e-19, -1.4256e-18, 1.0665e-19, -2.8913e-21,\n -6.3992e-19, -8.2559e-20, 2.4549e-19, 2.6932e-19, -9.5527e-20,\n 3.7433e-19, -6.1954e-20, -3.7276e-19, 1.3957e-20, -1.0142e-19,\n 2.4653e-19, 5.2826e-19, -7.6968e-19, 2.5789e-19, 9.8455e-19,\n 4.3014e-20, 2.2133e-19, 3.8773e-19, -4.5328e-19, 6.1768e-20,\n 1.5450e-18, 6.0831e-20, 3.0910e-20, -4.6480e-20, -4.7367e-19,\n -4.1143e-20, -2.7794e-19, -2.1103e-20, -5.8945e-19, -2.3641e-19,\n -1.0983e-19, 6.6872e-20, -5.0917e-19, -8.0117e-19, -4.1257e-20,\n 5.3091e-20, 3.3746e-19, -2.9413e-20, 2.8179e-19, 8.9834e-20,\n -1.2224e-18, -4.7801e-20, 6.4750e-19, -1.2350e-19, 4.2342e-21,\n -2.0382e-19, 1.1524e-19, -5.3287e-20, 2.2508e-19, -1.1809e-19,\n -1.7393e-20, 9.4436e-20, 5.1521e-19, -2.0851e-19], device='cuda:0')", + "exp_avg_sq": "tensor([1.2834e-21, 2.0568e-20, 9.4951e-21, 8.9285e-23, 1.7701e-21, 2.3565e-23,\n 3.5055e-21, 1.3831e-20, 2.8061e-23, 4.0248e-21, 1.6005e-20, 1.7314e-21,\n 1.0669e-21, 6.9344e-22, 6.5312e-22, 3.8485e-22, 1.0817e-20, 4.8395e-21,\n 2.7713e-21, 1.6895e-20, 1.2673e-23, 4.7272e-22, 7.3610e-21, 7.4270e-22,\n 3.3056e-21, 1.2051e-21, 1.3239e-23, 6.8706e-22, 1.7483e-20, 3.9490e-21,\n 1.0008e-20, 4.5374e-23, 3.7455e-22, 3.1397e-20, 4.8933e-20, 6.2009e-20,\n 1.3028e-20, 5.2197e-22, 5.0766e-21, 3.3917e-21, 1.4423e-20, 6.3876e-21,\n 3.6059e-20, 1.0653e-20, 1.1305e-20, 2.5829e-21, 2.6637e-22, 4.6194e-20,\n 4.7071e-21, 6.0485e-23, 3.1793e-20, 1.5580e-21, 2.4971e-22, 5.8459e-21,\n 3.6135e-20, 2.1469e-20, 2.3856e-21, 4.0475e-21, 2.6089e-22, 1.7212e-20,\n 3.6473e-22, 8.9756e-23, 4.8188e-22, 6.8273e-20], device='cuda:0')" + }, + "29": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-3.9306e-22, 2.1336e-19, -2.5191e-18, -3.1224e-19, -9.0802e-19,\n -1.2439e-20, -3.0188e-21, -3.3791e-18, -4.4856e-21, 2.1810e-21,\n -2.6235e-18, -1.4435e-20, -7.2095e-19, -8.9347e-19, -4.9087e-20,\n -9.1803e-19, 7.4428e-20, -1.9855e-18, 2.3592e-24, 1.0701e-19,\n 1.5022e-20, -7.8308e-19, -2.2604e-18, -1.4067e-18, 3.2415e-19,\n -1.5790e-20, -6.1175e-21, -7.9600e-19, -2.8776e-18, 5.6588e-21,\n 1.1118e-18, -9.9044e-21, -3.0092e-21, 1.1649e-19, -3.1321e-18,\n 1.3556e-19, -1.9297e-18, -2.4560e-20, -1.7665e-18, -2.0119e-18,\n 1.6879e-19, -9.6167e-19, -3.0305e-18, -3.8493e-18, -2.1450e-18,\n 8.5497e-21, -9.0819e-19, 1.4620e-19, -3.9874e-19, -1.1313e-20,\n -3.8274e-18, -1.4455e-18, 1.1805e-19, 3.3934e-21, 4.1178e-20,\n 1.4665e-19, -2.5431e-22, 1.2088e-20, -7.7097e-19, 6.8768e-20,\n -2.8477e-20, -1.3760e-20, -1.2516e-19, 2.9939e-19], device='cuda:0')", + "exp_avg_sq": "tensor([1.7141e-23, 1.4721e-22, 5.2570e-23, 1.8301e-24, 1.1172e-23, 2.8559e-24,\n 4.0747e-23, 1.3004e-22, 1.0206e-24, 9.5883e-23, 9.8416e-23, 2.7412e-23,\n 3.4582e-24, 2.0025e-24, 2.6472e-23, 8.7571e-25, 1.6346e-22, 2.9510e-23,\n 4.7081e-23, 2.2623e-22, 1.5140e-24, 2.5321e-25, 5.7261e-23, 3.1124e-24,\n 1.1936e-23, 2.3931e-23, 4.5949e-26, 1.2297e-24, 1.0463e-22, 4.7018e-23,\n 4.0286e-23, 1.4918e-24, 4.9912e-24, 2.7864e-22, 2.7109e-22, 7.3601e-22,\n 9.1913e-23, 8.2010e-24, 2.5356e-23, 3.1889e-23, 1.6296e-22, 5.0316e-23,\n 1.0356e-22, 1.0321e-22, 9.9689e-23, 2.7084e-23, 2.3861e-25, 6.9827e-22,\n 3.9734e-23, 1.3839e-24, 2.0462e-22, 9.9936e-24, 2.0498e-25, 1.3105e-22,\n 4.7840e-22, 4.0887e-22, 3.9856e-23, 6.9563e-23, 6.4187e-25, 3.1689e-22,\n 2.4811e-24, 3.8525e-24, 5.0062e-24, 5.2930e-22], device='cuda:0')" + }, + "30": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 3.7261e-22, 2.8175e-19, -2.1718e-18, -5.0387e-19, -1.0475e-18,\n -4.5111e-21, 3.3330e-21, -3.7840e-18, 1.0560e-20, -1.6949e-21,\n -2.6456e-18, 1.2858e-20, -8.3353e-19, -8.9446e-19, 3.9835e-20,\n -8.0315e-19, -6.7168e-20, -2.0334e-18, 1.6490e-24, -9.1931e-20,\n -1.3254e-19, -6.6069e-19, -2.6083e-18, -1.1334e-18, 3.0322e-19,\n 1.5598e-20, -1.9333e-19, -7.6669e-19, -2.4838e-18, -7.8898e-21,\n 1.1034e-18, 1.0958e-20, 3.0985e-21, -1.1605e-19, -2.6899e-18,\n -1.3236e-19, -1.9578e-18, 2.2265e-20, -2.2601e-18, -1.8446e-18,\n -1.3565e-19, -1.2582e-18, -2.8056e-18, -3.2037e-18, -1.6797e-18,\n -1.0099e-20, -8.5314e-19, -1.4288e-19, -8.3996e-19, 1.0939e-20,\n -3.5265e-18, -1.4852e-18, 2.1618e-20, -2.8544e-21, -3.9500e-20,\n -1.0828e-19, -5.7041e-23, -1.0532e-20, -8.9805e-19, -5.0041e-20,\n 2.3948e-20, 1.3757e-20, -2.0467e-19, -2.4303e-19], device='cuda:0')", + "exp_avg_sq": "tensor([2.5043e-23, 2.0389e-22, 9.0129e-23, 1.7397e-24, 1.3864e-23, 4.0047e-24,\n 6.7466e-23, 1.5730e-22, 2.0065e-24, 8.2803e-23, 1.7515e-22, 3.9154e-23,\n 3.9301e-24, 2.5927e-24, 2.5874e-23, 1.6613e-24, 1.9661e-22, 5.4023e-23,\n 6.2653e-23, 2.7958e-22, 3.4720e-24, 2.7964e-25, 8.8003e-23, 6.1436e-24,\n 2.3817e-23, 3.2905e-23, 7.3175e-26, 1.6734e-24, 1.8334e-22, 7.4855e-23,\n 1.2623e-22, 3.0082e-24, 5.8207e-24, 4.5395e-22, 6.0126e-22, 9.4539e-22,\n 1.3802e-22, 1.4870e-23, 5.6900e-23, 4.1473e-23, 2.2718e-22, 7.1568e-23,\n 4.3114e-22, 1.3252e-22, 1.0424e-22, 4.9281e-23, 3.1564e-25, 6.7811e-22,\n 3.9636e-23, 3.0900e-24, 3.5134e-22, 1.3599e-23, 1.7901e-25, 1.2243e-22,\n 5.8493e-22, 3.5480e-22, 5.5731e-23, 8.7034e-23, 1.3258e-24, 2.9516e-22,\n 6.0172e-24, 5.8263e-24, 5.4981e-24, 9.8890e-22], device='cuda:0')" + }, + "31": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-1.8234e-18, -1.0711e-18, 1.5521e-17, 4.8511e-18, 7.9155e-18,\n -1.8220e-18, -1.8166e-18, 1.0417e-17, -2.1101e-18, -1.5449e-18,\n 1.1235e-17, -2.0185e-18, 8.5912e-18, 1.1006e-17, -1.6184e-18,\n 1.1330e-17, -1.7394e-18, 1.1945e-17, -2.9477e-19, -1.4434e-18,\n -6.5127e-19, 1.1358e-17, 9.9364e-18, 1.4219e-17, -2.7890e-18,\n -1.8799e-18, -4.7048e-19, 1.0319e-17, 1.3007e-17, -1.7017e-18,\n -4.3336e-18, -1.9813e-18, -1.9307e-18, -1.9579e-18, 1.0466e-17,\n -1.2162e-18, 1.0824e-17, -1.6223e-18, 9.4350e-18, 1.3366e-17,\n -1.5014e-18, 5.7019e-18, 8.3833e-18, 1.3771e-17, 1.4611e-17,\n -1.6204e-18, 1.2505e-17, -2.0258e-18, 3.1110e-18, -2.2007e-18,\n 1.4640e-17, 1.1635e-17, -1.6500e-18, -2.6474e-19, -4.3437e-19,\n -1.2348e-18, -1.5868e-18, -1.3257e-18, 8.7971e-18, -9.1017e-19,\n -1.7293e-18, -2.5337e-18, 4.4228e-18, -1.6680e-18],\n [ 1.8236e-18, 1.0696e-18, -1.5522e-17, -4.8517e-18, -7.9159e-18,\n 1.8222e-18, 1.8168e-18, -1.0418e-17, 2.1103e-18, 1.5451e-18,\n -1.1236e-17, 2.0188e-18, -8.5932e-18, -1.1007e-17, 1.6187e-18,\n -1.1332e-17, 1.7396e-18, -1.1946e-17, 2.9496e-19, 1.4436e-18,\n 6.5132e-19, -1.1358e-17, -9.9372e-18, -1.4220e-17, 2.7875e-18,\n 1.8801e-18, 4.7015e-19, -1.0321e-17, -1.3009e-17, 1.7020e-18,\n 4.3320e-18, 1.9815e-18, 1.9309e-18, 1.9582e-18, -1.0468e-17,\n 1.2164e-18, -1.0825e-17, 1.6226e-18, -9.4367e-18, -1.3368e-17,\n 1.5016e-18, -5.7035e-18, -8.3840e-18, -1.3771e-17, -1.4613e-17,\n 1.6207e-18, -1.2506e-17, 2.0260e-18, -3.1129e-18, 2.2010e-18,\n -1.4642e-17, -1.1636e-17, 1.6492e-18, 2.6501e-19, 4.3460e-19,\n 1.2351e-18, 1.5871e-18, 1.3259e-18, -8.7986e-18, 9.1039e-19,\n 1.7295e-18, 2.5339e-18, -4.4228e-18, 1.6683e-18]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.8981e-21, 6.1492e-21, 3.1887e-21, 2.0168e-20, 2.5882e-21, 5.5890e-21,\n 2.5361e-21, 3.0040e-22, 8.5141e-22, 1.8570e-20, 1.4339e-21, 6.5116e-21,\n 8.5627e-21, 7.2406e-21, 2.3128e-20, 2.1703e-21, 6.1987e-21, 1.0964e-21,\n 9.7786e-21, 5.3302e-21, 3.8244e-21, 1.3053e-20, 4.2272e-22, 1.2809e-21,\n 3.0792e-21, 1.0360e-20, 1.1356e-22, 8.4649e-21, 2.2562e-21, 5.0690e-21,\n 1.5804e-22, 1.4477e-22, 1.0848e-22, 4.8180e-21, 3.0159e-21, 7.6927e-21,\n 5.3072e-21, 2.8538e-21, 3.8104e-22, 2.0218e-21, 1.0341e-21, 2.8931e-21,\n 1.6011e-22, 3.4019e-22, 1.2328e-20, 2.0479e-21, 3.9176e-21, 1.1122e-20,\n 9.6798e-21, 1.1376e-21, 3.0638e-21, 1.1264e-21, 2.7547e-20, 2.3581e-20,\n 9.2703e-21, 1.3397e-20, 5.7206e-21, 9.9349e-21, 3.9739e-22, 1.9069e-20,\n 2.5510e-22, 3.5764e-21, 4.9156e-23, 4.3155e-21],\n [1.8981e-21, 6.1492e-21, 3.1887e-21, 2.0168e-20, 2.5882e-21, 5.5890e-21,\n 2.5361e-21, 3.0040e-22, 8.5141e-22, 1.8570e-20, 1.4339e-21, 6.5116e-21,\n 8.5627e-21, 7.2406e-21, 2.3128e-20, 2.1703e-21, 6.1987e-21, 1.0964e-21,\n 9.7786e-21, 5.3302e-21, 3.8244e-21, 1.3053e-20, 4.2272e-22, 1.2809e-21,\n 3.0792e-21, 1.0360e-20, 1.1356e-22, 8.4649e-21, 2.2562e-21, 5.0690e-21,\n 1.5804e-22, 1.4477e-22, 1.0848e-22, 4.8180e-21, 3.0159e-21, 7.6927e-21,\n 5.3072e-21, 2.8538e-21, 3.8104e-22, 2.0218e-21, 1.0341e-21, 2.8931e-21,\n 1.6011e-22, 3.4019e-22, 1.2328e-20, 2.0479e-21, 3.9176e-21, 1.1122e-20,\n 9.6798e-21, 1.1376e-21, 3.0638e-21, 1.1264e-21, 2.7547e-20, 2.3581e-20,\n 9.2703e-21, 1.3397e-20, 5.7206e-21, 9.9349e-21, 3.9739e-22, 1.9069e-20,\n 2.5510e-22, 3.5764e-21, 4.9156e-23, 4.3155e-21]], device='cuda:0')" + }, + "32": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.0161e-17, -1.0161e-17], device='cuda:0')", + "exp_avg_sq": "tensor([5.2032e-20, 5.2032e-20], device='cuda:0')" + }, + "33": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 5.2378e-17, 2.2163e-16, -2.3372e-16, ..., 1.7526e-16,\n -2.6105e-16, -1.6602e-16],\n [ 7.7613e-17, 2.9342e-15, 8.2001e-16, ..., 2.7763e-15,\n 1.0182e-15, -3.6033e-15],\n [ 1.6819e-16, 1.0338e-16, -5.6863e-16, ..., -3.3331e-15,\n 5.9927e-16, 1.3550e-15],\n ...,\n [ 4.1647e-16, 9.0256e-16, 7.3101e-17, ..., 3.8036e-16,\n -6.3315e-16, -9.6005e-17],\n [ 6.1915e-17, -3.0695e-16, 9.6393e-17, ..., 5.0263e-17,\n 1.5261e-16, 4.6222e-16],\n [ 1.0050e-15, -8.7667e-16, 1.1624e-15, ..., -2.1123e-15,\n 1.6036e-15, -1.8986e-15]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.3012e-21, 7.0315e-21, 3.7986e-21, ..., 1.5722e-21, 2.2510e-20,\n 6.2939e-20],\n [1.2526e-21, 1.7362e-21, 1.0140e-21, ..., 5.2468e-21, 1.4427e-20,\n 7.4575e-20],\n [1.3542e-22, 1.0879e-21, 6.3343e-22, ..., 5.4426e-23, 8.9578e-22,\n 5.6421e-22],\n ...,\n [7.6413e-22, 2.0714e-22, 2.6656e-23, ..., 1.2231e-21, 3.4951e-21,\n 2.3756e-20],\n [1.0590e-21, 1.9331e-21, 5.9973e-22, ..., 8.9838e-22, 5.8179e-22,\n 1.4905e-21],\n [1.7147e-22, 1.4163e-21, 2.5769e-23, ..., 1.0501e-21, 8.2199e-22,\n 3.6566e-21]], device='cuda:0')" + }, + "34": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.5550e-15, 1.1694e-14, 3.8975e-15, 8.3177e-15, 4.9369e-17,\n 2.5761e-16, -1.3593e-14, 1.3234e-15, -5.0766e-16, 1.0278e-15,\n 9.5889e-16, -8.1305e-15, 1.0376e-14, -1.7591e-15, -1.2718e-14,\n -1.0032e-14, 4.3865e-16, 1.0017e-14, 8.1917e-15, 4.7795e-15,\n -1.2482e-15, 8.4109e-15, -2.7401e-16, 6.3988e-16, -2.3064e-15,\n 3.2096e-16, 1.4549e-15, 1.0145e-16, 4.2105e-15, -2.9480e-16,\n 3.0760e-17, -8.4657e-16, 2.6890e-15, 1.2298e-15, -1.1898e-14,\n -2.0916e-15, -7.4931e-17, 2.2929e-16, -1.0413e-15, 1.6408e-16,\n -6.8765e-17, -1.2813e-14, -1.2014e-14, 9.8477e-16, -8.7506e-15,\n 4.2881e-15, -1.3632e-15, 5.4400e-16, 1.2764e-15, 9.3627e-15,\n -2.5719e-15, -7.4061e-16, -4.3840e-16, 1.5650e-15, 5.0968e-15,\n -4.2723e-16, -4.1304e-16, 6.1151e-15, -1.2582e-15, 2.2452e-16,\n 2.1573e-16, 7.5154e-16, 3.7914e-16, -2.3845e-15], device='cuda:0')", + "exp_avg_sq": "tensor([2.6145e-18, 2.1066e-18, 2.1101e-19, 5.2680e-20, 5.0216e-18, 2.2578e-19,\n 3.7795e-20, 9.1579e-19, 1.9488e-18, 2.2206e-19, 9.9118e-19, 2.6812e-19,\n 3.4993e-19, 1.7192e-18, 1.7846e-18, 2.8008e-19, 1.7520e-19, 1.9821e-19,\n 6.0898e-19, 6.7810e-20, 2.9517e-19, 2.9737e-19, 7.3933e-20, 9.6414e-19,\n 6.9098e-18, 1.0251e-18, 1.6711e-19, 4.5169e-19, 8.2306e-20, 2.3636e-19,\n 3.6824e-19, 1.3045e-18, 2.2746e-19, 1.7221e-18, 6.7222e-20, 1.5838e-18,\n 1.5986e-18, 9.1805e-20, 2.0225e-21, 8.2129e-19, 2.5102e-18, 7.1864e-18,\n 2.2343e-19, 1.7388e-18, 1.3501e-19, 1.7499e-18, 2.2358e-21, 6.9359e-21,\n 7.9532e-21, 2.0731e-18, 1.7631e-18, 6.1040e-20, 7.7203e-20, 2.5557e-18,\n 2.9686e-18, 1.4148e-19, 3.5531e-20, 2.2386e-19, 3.4125e-18, 2.7362e-20,\n 5.4083e-21, 8.5070e-19, 3.8919e-19, 2.3521e-19], device='cuda:0')" + }, + "35": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.4828e-15, -1.9398e-14, -2.0362e-14, -5.8847e-16, 5.9860e-16,\n 8.8254e-17, -3.4798e-14, -3.1088e-14, 6.1669e-16, 4.1957e-17,\n 1.5766e-16, -3.3735e-14, -1.2905e-14, 1.3464e-15, -5.5744e-14,\n -4.9639e-14, -1.1417e-16, -7.7692e-15, -1.8462e-14, -1.2704e-14,\n -2.5736e-14, -1.6011e-14, -4.4301e-16, 9.1867e-17, -4.6524e-14,\n 3.0562e-16, -1.9518e-14, 3.5849e-16, -8.2167e-15, 4.0476e-16,\n 1.3806e-16, 7.2736e-16, -2.1012e-14, -3.1485e-14, -2.7098e-14,\n -5.0466e-14, 3.8274e-16, -9.5953e-17, -9.4373e-16, 3.6785e-16,\n 6.9396e-16, -6.0495e-14, -5.2879e-14, 1.1902e-16, -4.5720e-14,\n -3.3628e-14, -1.2424e-15, -4.5872e-16, 3.7885e-16, -1.4753e-14,\n -3.4136e-14, -2.6377e-16, -1.7738e-16, 1.6495e-16, -2.8953e-14,\n -1.1174e-16, -2.3253e-16, -1.2075e-14, 9.6691e-16, 1.7294e-17,\n -1.5352e-16, 4.2624e-16, 2.6458e-17, -2.4100e-14], device='cuda:0')", + "exp_avg_sq": "tensor([1.5961e-20, 1.9656e-20, 2.7795e-21, 5.7878e-22, 8.8920e-20, 1.7093e-21,\n 4.6093e-22, 7.6971e-21, 2.4534e-20, 1.8772e-21, 9.5035e-21, 2.7739e-21,\n 4.0478e-21, 1.3094e-20, 1.6583e-20, 3.2371e-21, 1.8854e-21, 2.2249e-21,\n 7.4491e-21, 8.8590e-22, 3.0342e-21, 3.9485e-21, 7.3725e-22, 9.3404e-21,\n 8.2123e-20, 9.2137e-21, 1.5898e-21, 4.5331e-21, 2.1108e-21, 1.3520e-21,\n 4.1661e-21, 1.3793e-20, 2.5176e-21, 2.1780e-20, 7.9330e-22, 1.4981e-20,\n 1.9105e-20, 6.6828e-22, 4.4703e-23, 8.6018e-21, 2.3815e-20, 8.3274e-20,\n 3.6772e-21, 1.8166e-20, 2.1107e-21, 1.4050e-20, 1.8283e-23, 1.9231e-23,\n 1.5790e-22, 1.6877e-20, 1.9247e-20, 4.9783e-22, 6.8647e-22, 1.7723e-20,\n 3.7541e-20, 1.3871e-21, 3.8925e-22, 2.8802e-21, 4.4302e-20, 1.6590e-22,\n 2.1035e-23, 7.7007e-21, 3.5309e-21, 3.2619e-21], device='cuda:0')" + }, + "36": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.4532e-15, -1.2922e-14, -1.9604e-14, -5.8165e-15, -6.7403e-16,\n -9.7281e-17, -3.9622e-14, -2.4890e-14, -6.1487e-16, -3.0283e-17,\n -3.4180e-16, -3.6938e-14, -1.3888e-14, -1.2544e-15, -4.2368e-14,\n -4.1745e-14, 1.2758e-16, -1.1107e-14, -1.6136e-14, -1.3908e-14,\n -2.5473e-14, -1.5874e-14, 4.3880e-16, -1.1700e-16, -3.6482e-14,\n -3.2525e-16, -2.3111e-14, -3.7408e-16, -1.4504e-14, -4.1058e-16,\n -1.2555e-16, -6.9748e-16, -2.1614e-14, -2.6631e-14, -3.5789e-14,\n -3.9090e-14, -3.5829e-16, 1.1035e-16, 1.0594e-15, -3.5150e-16,\n -7.6825e-16, -4.8116e-14, -4.4376e-14, -1.9122e-16, -3.7916e-14,\n -2.5741e-14, 1.4209e-15, 4.9352e-16, -1.4827e-15, -1.5459e-14,\n -2.7724e-14, 2.8152e-16, 1.7979e-16, -2.0891e-16, -2.1099e-14,\n 1.0846e-16, 2.6190e-16, -1.5459e-14, -1.0116e-15, -1.3244e-17,\n 1.6952e-16, -5.3086e-16, -2.5542e-17, -2.5517e-14], device='cuda:0')", + "exp_avg_sq": "tensor([3.6308e-20, 3.2943e-20, 3.5535e-21, 9.4581e-22, 7.3352e-20, 2.6525e-21,\n 1.0020e-21, 1.5439e-20, 2.5918e-20, 2.8208e-21, 1.3876e-20, 4.4979e-21,\n 5.6548e-21, 2.3165e-20, 2.6870e-20, 5.3979e-21, 2.3314e-21, 3.3518e-21,\n 9.5578e-21, 1.6046e-21, 4.8416e-21, 4.8694e-21, 8.4745e-22, 1.3097e-20,\n 1.0208e-19, 1.4793e-20, 2.9255e-21, 6.5160e-21, 1.5247e-21, 3.3208e-21,\n 5.1447e-21, 1.7687e-20, 3.8355e-21, 2.5835e-20, 1.4042e-21, 2.3328e-20,\n 2.0556e-20, 1.0482e-21, 5.4878e-23, 1.1419e-20, 3.4961e-20, 1.0173e-19,\n 4.4904e-21, 2.3993e-20, 2.8991e-21, 2.6939e-20, 3.7269e-23, 2.3337e-23,\n 2.2724e-22, 3.0229e-20, 2.6119e-20, 6.4560e-22, 8.6873e-22, 3.4081e-20,\n 4.4540e-20, 1.8202e-21, 4.3462e-22, 3.6803e-21, 4.8815e-20, 2.4193e-22,\n 4.2006e-23, 1.1445e-20, 4.9392e-21, 4.3837e-21], device='cuda:0')" + }, + "37": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 2.0284e-14, -6.6867e-14, -1.2868e-13, -1.1128e-14, 3.1773e-14,\n 1.7687e-14, -1.6871e-13, -9.7659e-14, 2.7635e-14, 3.0138e-14,\n 2.4349e-14, -1.8489e-13, -7.5326e-14, 1.2724e-14, -2.1603e-13,\n -1.8676e-13, 3.5464e-14, -5.6969e-14, -1.0282e-13, -7.5091e-14,\n -1.4919e-13, -1.1112e-13, 3.2241e-14, 2.0586e-14, -1.5398e-13,\n 2.5544e-14, -1.2026e-13, 2.2645e-14, -9.2831e-14, 2.6700e-14,\n 1.4869e-14, 1.7004e-14, -1.4440e-13, -1.4131e-13, -1.5867e-13,\n -1.6536e-13, 2.3727e-14, 2.0855e-14, 2.1308e-14, 1.9654e-14,\n 2.6581e-14, -1.9264e-13, -1.9721e-13, 2.2098e-14, -1.9017e-13,\n -1.2110e-13, 2.0088e-14, 1.9570e-14, 2.5155e-14, -5.1055e-14,\n -1.4910e-13, 2.7820e-14, 2.3405e-14, 2.4139e-14, -1.1989e-13,\n 2.2266e-14, 1.5448e-14, -9.0441e-14, 2.6930e-14, 2.7875e-14,\n 1.6930e-14, 2.2545e-14, 1.9743e-14, -1.3528e-13],\n [-2.1234e-14, 7.1406e-14, 1.1883e-13, 9.4737e-15, -3.0840e-14,\n -1.8490e-14, 1.7047e-13, 9.1186e-14, -2.7894e-14, -2.9507e-14,\n -2.3543e-14, 1.7417e-13, 7.8202e-14, -1.3173e-14, 2.2131e-13,\n 1.8209e-13, -3.4269e-14, 5.4149e-14, 9.9625e-14, 8.0498e-14,\n 1.4892e-13, 1.1231e-13, -3.2176e-14, -2.0925e-14, 1.5613e-13,\n -2.5226e-14, 1.0957e-13, -2.1940e-14, 9.4729e-14, -2.5691e-14,\n -1.4194e-14, -1.6605e-14, 1.4432e-13, 1.4215e-13, 1.5602e-13,\n 1.7299e-13, -2.3139e-14, -2.0075e-14, -2.0160e-14, -2.0708e-14,\n -2.6159e-14, 1.9083e-13, 1.9725e-13, -2.2036e-14, 1.9175e-13,\n 1.1663e-13, -1.9562e-14, -1.9748e-14, -2.4472e-14, 5.9324e-14,\n 1.4035e-13, -2.7620e-14, -2.3077e-14, -2.3389e-14, 1.2086e-13,\n -2.1918e-14, -1.6040e-14, 9.2230e-14, -2.7864e-14, -2.6952e-14,\n -1.6968e-14, -2.1855e-14, -1.9258e-14, 1.3092e-13]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.8095e-20, 3.0251e-19, 3.1564e-19, 6.5680e-19, 2.5097e-18, 1.0725e-19,\n 2.7343e-20, 5.9945e-20, 1.1844e-18, 4.5638e-19, 3.1793e-20, 2.2544e-19,\n 4.8818e-19, 1.2280e-19, 3.7511e-19, 5.3530e-20, 6.8625e-19, 5.0746e-19,\n 6.0102e-19, 1.2302e-19, 3.0815e-19, 9.5301e-19, 1.1425e-18, 6.0090e-19,\n 1.3773e-18, 5.6216e-19, 2.0700e-19, 1.6909e-19, 3.8965e-18, 3.7545e-20,\n 3.4357e-19, 4.2596e-19, 5.7081e-19, 8.9445e-19, 7.5035e-20, 1.6893e-19,\n 1.2854e-18, 3.2481e-19, 1.2167e-18, 4.6087e-19, 5.4202e-19, 1.2096e-18,\n 5.2794e-20, 4.8998e-19, 5.0055e-20, 2.1101e-19, 1.8672e-19, 9.2363e-19,\n 5.5877e-19, 3.8054e-19, 6.1644e-19, 3.3654e-19, 4.5445e-19, 2.7422e-19,\n 1.1419e-18, 5.1183e-19, 1.1150e-18, 1.0151e-18, 1.1400e-18, 3.5734e-20,\n 6.3791e-20, 7.0346e-20, 5.2504e-19, 2.6084e-19],\n [9.8095e-20, 3.0251e-19, 3.1564e-19, 6.5680e-19, 2.5097e-18, 1.0725e-19,\n 2.7343e-20, 5.9945e-20, 1.1844e-18, 4.5638e-19, 3.1793e-20, 2.2544e-19,\n 4.8818e-19, 1.2280e-19, 3.7511e-19, 5.3530e-20, 6.8625e-19, 5.0746e-19,\n 6.0102e-19, 1.2302e-19, 3.0815e-19, 9.5301e-19, 1.1425e-18, 6.0090e-19,\n 1.3773e-18, 5.6216e-19, 2.0700e-19, 1.6909e-19, 3.8965e-18, 3.7545e-20,\n 3.4357e-19, 4.2596e-19, 5.7081e-19, 8.9445e-19, 7.5035e-20, 1.6893e-19,\n 1.2854e-18, 3.2481e-19, 1.2167e-18, 4.6087e-19, 5.4202e-19, 1.2096e-18,\n 5.2794e-20, 4.8998e-19, 5.0055e-20, 2.1101e-19, 1.8672e-19, 9.2363e-19,\n 5.5877e-19, 3.8054e-19, 6.1644e-19, 3.3654e-19, 4.5445e-19, 2.7422e-19,\n 1.1419e-18, 5.1183e-19, 1.1150e-18, 1.0150e-18, 1.1400e-18, 3.5734e-20,\n 6.3791e-20, 7.0346e-20, 5.2504e-19, 2.6084e-19]], device='cuda:0')" + }, + "38": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.3865e-13, 1.3796e-13], device='cuda:0')", + "exp_avg_sq": "tensor([5.4622e-18, 5.4622e-18], device='cuda:0')" + }, + "39": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-1.2679e-22, -6.8773e-22, 9.7837e-21, ..., -2.1059e-21,\n -2.3094e-21, -1.6375e-20],\n [ 1.4401e-21, 5.3062e-23, 3.4133e-22, ..., -8.0500e-22,\n -2.5477e-22, 2.2398e-21],\n [ 7.9848e-22, -2.7939e-21, 4.6676e-21, ..., -2.4251e-21,\n -2.4770e-22, -6.0122e-21],\n ...,\n [ 2.9020e-22, 1.1789e-22, 8.6478e-22, ..., 3.9932e-22,\n 4.3132e-23, 4.7385e-22],\n [-1.2458e-22, 4.5356e-22, -9.5148e-22, ..., 1.4125e-21,\n 4.2884e-22, -1.4028e-21],\n [-1.2473e-22, -1.1017e-21, 8.6652e-22, ..., -3.0505e-21,\n -3.1972e-22, 1.3355e-21]], device='cuda:0')", + "exp_avg_sq": "tensor([[9.4783e-21, 1.7320e-20, 8.3178e-21, ..., 2.7933e-20, 4.8030e-20,\n 1.1044e-19],\n [1.7436e-21, 3.9616e-21, 4.6634e-22, ..., 3.7200e-21, 6.4129e-21,\n 6.3642e-21],\n [3.8557e-21, 6.6242e-21, 5.5795e-21, ..., 1.2974e-20, 2.3165e-20,\n 1.4344e-20],\n ...,\n [3.9477e-22, 9.1541e-22, 6.3819e-23, ..., 2.0242e-21, 8.8983e-22,\n 2.3319e-21],\n [5.3342e-24, 1.8692e-23, 3.2059e-24, ..., 1.2402e-23, 6.0704e-24,\n 8.5102e-23],\n [2.9198e-21, 1.3573e-21, 3.4959e-22, ..., 6.3070e-23, 3.0969e-21,\n 1.0912e-20]], device='cuda:0')" + }, + "40": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.5281e-20, 2.4146e-22, -1.0981e-20, -1.5446e-20, -3.8964e-21,\n -2.5540e-21, -6.7043e-21, -6.0139e-22, 2.6593e-21, 2.1517e-20,\n 3.5213e-21, -2.2899e-20, -6.4839e-21, 4.4669e-22, -2.0434e-20,\n 1.1947e-21, 5.7431e-21, 3.3989e-21, 8.7706e-21, 1.1434e-21,\n -4.2670e-22, -6.7600e-22, 8.6387e-22, 1.5355e-21, 4.5876e-21,\n 6.3329e-21, -2.3610e-20, 2.3847e-21, 1.0701e-21, -5.8155e-22,\n -1.7143e-20, -1.4012e-21, -1.4963e-21, -2.0694e-21, -6.5102e-23,\n 8.4246e-21, -1.5223e-21, -6.9451e-21, 1.2791e-20, -2.3191e-21,\n 8.9449e-21, -1.8281e-21, 2.3307e-21, -8.0938e-21, -7.8446e-23,\n 1.9276e-21, -3.6699e-22, -9.7947e-23, -6.2352e-23, -1.0350e-21,\n 1.4039e-21, 1.8871e-21, 8.1616e-21, -1.6129e-21, -4.2006e-21,\n 3.1057e-21, -2.2436e-21, 5.8161e-21, 1.0789e-21, 2.8785e-20,\n 1.0334e-21, -3.1361e-22, 8.2365e-22, 9.8249e-22], device='cuda:0')", + "exp_avg_sq": "tensor([8.4293e-18, 1.3428e-18, 3.6301e-18, 5.3876e-19, 3.8029e-19, 3.1609e-20,\n 6.0192e-19, 2.2655e-18, 1.3295e-19, 3.3858e-18, 1.9145e-18, 4.8172e-18,\n 6.3941e-19, 1.1104e-22, 3.3441e-19, 1.5561e-19, 6.2468e-18, 1.5786e-18,\n 2.0036e-19, 6.9089e-19, 5.1983e-20, 3.4738e-20, 1.1809e-18, 5.5687e-19,\n 3.8030e-18, 1.5996e-18, 1.9725e-18, 1.1357e-19, 5.9957e-19, 1.8592e-18,\n 1.3058e-19, 1.4433e-19, 2.8591e-19, 3.3419e-19, 2.8571e-19, 5.9657e-18,\n 7.0495e-20, 4.2774e-19, 2.0701e-18, 3.8238e-20, 2.4837e-19, 5.2104e-19,\n 3.7291e-18, 6.4535e-19, 2.3320e-18, 3.8558e-22, 7.7227e-19, 9.4900e-19,\n 8.1416e-21, 1.4070e-18, 1.4403e-19, 1.5118e-19, 5.1800e-19, 8.4003e-18,\n 2.8611e-20, 5.4923e-18, 2.5258e-20, 5.6586e-20, 4.5085e-19, 8.6453e-19,\n 6.3669e-19, 2.9966e-19, 3.7737e-21, 1.1224e-18], device='cuda:0')" + }, + "41": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 5.2407e-20, 2.5138e-22, -1.1998e-20, -2.3027e-21, 1.8976e-20,\n -2.7358e-22, 1.9551e-20, -3.6770e-22, 3.6342e-20, 7.9782e-20,\n -9.4257e-22, 9.8175e-21, 1.4287e-20, -1.2776e-21, -7.8421e-21,\n 3.6988e-22, -3.3585e-21, -1.5842e-21, 3.1924e-20, 1.6237e-22,\n 5.3364e-22, -2.9779e-21, -2.4861e-23, 2.6256e-22, 3.9218e-20,\n 5.4149e-20, -2.5953e-20, 1.9566e-20, 4.0495e-22, 3.1187e-22,\n -1.5753e-21, -2.1969e-22, -8.2046e-22, -3.5284e-23, 8.9394e-24,\n 8.1398e-20, 3.7370e-23, 1.7378e-20, 4.8786e-20, 4.4085e-22,\n 3.7396e-20, -4.3295e-23, 4.2063e-20, 3.1180e-20, -8.5148e-23,\n -1.9046e-21, 2.9046e-20, 1.0815e-22, 3.1933e-23, -1.7612e-22,\n 4.6915e-24, 7.3311e-23, 3.9482e-20, -3.8202e-22, 1.0043e-20,\n -3.2908e-21, -2.2371e-21, 2.6068e-20, -9.8613e-23, 9.9836e-20,\n -1.0807e-21, 3.9989e-23, 1.6950e-21, -9.1041e-22], device='cuda:0')", + "exp_avg_sq": "tensor([1.5104e-19, 7.9830e-21, 4.8434e-20, 1.8086e-20, 3.7685e-21, 2.7393e-24,\n 6.0457e-21, 1.5331e-20, 1.9227e-21, 3.9423e-20, 1.6509e-20, 4.9979e-20,\n 6.4394e-21, 4.7011e-22, 4.5078e-21, 4.3564e-22, 7.6598e-20, 1.2888e-20,\n 6.6957e-21, 4.8312e-21, 1.5270e-23, 2.7847e-21, 8.7012e-21, 3.6187e-21,\n 2.7688e-20, 3.1482e-20, 1.6511e-20, 3.1016e-21, 3.7866e-21, 1.8913e-20,\n 1.9874e-21, 3.6962e-22, 1.2701e-21, 1.3741e-21, 1.0326e-21, 6.2984e-20,\n 1.1439e-22, 8.1301e-21, 2.2698e-20, 3.3041e-22, 3.5563e-21, 2.8844e-21,\n 5.4682e-20, 7.7940e-21, 1.6049e-20, 2.2573e-22, 7.7415e-21, 4.9122e-21,\n 3.9161e-24, 1.1112e-20, 7.0217e-22, 6.2604e-22, 1.5567e-20, 1.0529e-19,\n 5.6624e-22, 4.7257e-20, 1.3277e-21, 1.0542e-21, 2.5406e-21, 8.9583e-21,\n 4.5999e-21, 1.3625e-21, 1.4421e-22, 1.0971e-20], device='cuda:0')" + }, + "42": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 5.0152e-20, -2.1987e-22, -1.8591e-21, -4.8679e-21, 2.0364e-20,\n 5.3660e-23, 2.1183e-20, 5.9640e-22, 3.0119e-20, 6.1015e-20,\n 9.7398e-22, 4.3303e-21, 1.4783e-20, -2.8921e-21, -1.2202e-20,\n -4.2695e-22, 3.3208e-21, 1.4872e-21, 3.2348e-20, -1.8753e-22,\n -5.9463e-22, 4.6923e-21, 2.6772e-23, -3.1102e-22, 3.1285e-20,\n 4.3702e-20, -2.5638e-20, 2.0514e-20, -4.3242e-22, -3.5830e-22,\n -4.1841e-21, 1.0325e-22, 9.5516e-22, 2.3730e-23, -5.7363e-23,\n 5.3950e-20, -8.5669e-23, 1.2819e-20, 4.1830e-20, -2.9049e-22,\n 3.2686e-20, 3.3261e-23, 3.6671e-20, 1.6853e-20, 2.1563e-22,\n -6.2423e-21, 2.0489e-20, -1.1095e-22, -1.4272e-22, 2.3882e-22,\n 9.3243e-25, -5.5021e-23, 3.3977e-20, 1.6213e-22, 1.0719e-20,\n 3.1631e-21, 4.2593e-21, 2.9938e-20, 8.0753e-23, 7.8318e-20,\n 1.1037e-21, -7.5634e-23, -1.5252e-21, 8.2226e-22], device='cuda:0')", + "exp_avg_sq": "tensor([1.4687e-19, 1.3522e-20, 6.5404e-20, 1.4679e-20, 7.9761e-21, 3.8558e-24,\n 1.2183e-20, 2.6251e-20, 4.1156e-21, 5.9397e-20, 2.0609e-20, 7.9385e-20,\n 1.3594e-20, 5.2936e-22, 7.8033e-21, 6.1590e-22, 7.6169e-20, 1.8996e-20,\n 6.4778e-21, 5.6251e-21, 2.5828e-23, 2.6887e-21, 1.0698e-20, 3.9368e-21,\n 6.0302e-20, 3.2089e-20, 3.7649e-20, 4.1648e-21, 4.4671e-21, 1.8334e-20,\n 4.1756e-21, 6.7874e-22, 9.1366e-22, 2.4385e-21, 2.0567e-21, 1.0523e-19,\n 2.0238e-22, 1.1092e-20, 3.9057e-20, 2.6033e-22, 6.7912e-21, 4.2797e-21,\n 6.7713e-20, 1.2953e-20, 2.4951e-20, 4.3359e-22, 1.6647e-20, 9.3773e-21,\n 7.5338e-24, 1.3881e-20, 1.1748e-21, 1.0884e-21, 1.4179e-20, 1.0205e-19,\n 1.1944e-21, 7.2635e-20, 1.9368e-21, 2.3352e-21, 4.3274e-21, 1.6899e-20,\n 7.7791e-21, 1.8600e-21, 2.2192e-22, 1.4044e-20], device='cuda:0')" + }, + "43": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 1.8039e-19, -1.3295e-21, -5.6416e-20, -1.3762e-20, 1.0145e-19,\n -5.2164e-20, 8.3013e-20, -2.6574e-20, 2.4098e-19, 3.2524e-19,\n -2.7698e-20, 3.4462e-20, 7.2891e-20, -7.5259e-20, -4.5376e-20,\n -2.5789e-20, -4.4680e-20, -2.9620e-20, 2.6932e-19, -3.3849e-20,\n -4.4433e-20, -2.8688e-20, -5.4351e-20, -1.3675e-20, 1.2478e-19,\n 2.9318e-19, -8.6512e-20, 1.6043e-19, -5.5716e-20, 2.5022e-20,\n -1.0724e-20, -2.3980e-20, 8.4066e-21, -3.7730e-20, -2.1269e-20,\n 2.5626e-19, -2.8862e-20, 1.2482e-19, 1.9414e-19, -4.6410e-20,\n 2.5947e-19, -9.6630e-21, 1.7825e-19, 1.7139e-19, -1.7482e-20,\n -7.8069e-20, 1.5588e-19, 1.4068e-20, -2.5699e-20, -5.1150e-20,\n 3.2949e-22, 1.1784e-20, 2.7343e-19, -5.0476e-20, 7.7999e-20,\n -4.1051e-20, -2.0129e-20, 2.1241e-19, 5.1157e-21, 3.7302e-19,\n -3.9430e-20, -3.1626e-20, -1.9521e-20, -2.5430e-20],\n [-1.8028e-19, 1.3152e-21, 5.6429e-20, 1.3809e-20, -1.0137e-19,\n 5.2152e-20, -8.3013e-20, 2.6559e-20, -2.4086e-19, -3.2519e-19,\n 2.7682e-20, -3.4379e-20, -7.2797e-20, 7.5254e-20, 4.5438e-20,\n 2.5775e-20, 4.4668e-20, 2.9606e-20, -2.6922e-19, 3.3828e-20,\n 4.4427e-20, 2.8763e-20, 5.4336e-20, 1.3667e-20, -1.2471e-19,\n -2.9311e-19, 8.6607e-20, -1.6036e-19, 5.5698e-20, -2.5033e-20,\n 1.0798e-20, 2.3962e-20, -8.4300e-21, 3.7703e-20, 2.1248e-20,\n -2.5621e-19, 2.8851e-20, -1.2475e-19, -1.9406e-19, 4.6398e-20,\n -2.5933e-19, 9.6558e-21, -1.7822e-19, -1.7128e-19, 1.7469e-20,\n 7.8066e-20, -1.5577e-19, -1.4080e-20, 2.5684e-20, 5.1133e-20,\n -3.4592e-22, -1.1791e-20, -2.7334e-19, 5.0476e-20, -7.7906e-20,\n 4.1037e-20, 2.0178e-20, -2.1234e-19, -5.1319e-21, -3.7294e-19,\n 3.9415e-20, 3.1609e-20, 1.9511e-20, 2.5422e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9450e-18, 5.1250e-19, 1.4125e-18, 4.7904e-18, 1.1577e-19, 9.9575e-19,\n 6.5277e-20, 3.0276e-19, 1.2604e-19, 8.3777e-19, 1.0854e-18, 6.1627e-19,\n 1.9415e-19, 1.3887e-18, 2.4281e-19, 1.1289e-18, 2.3132e-18, 4.3294e-19,\n 2.5202e-18, 1.4200e-18, 8.8361e-19, 2.4415e-18, 1.6814e-18, 2.4768e-18,\n 1.9540e-19, 1.9502e-18, 9.0106e-20, 9.6511e-19, 1.9821e-18, 1.5238e-18,\n 3.9825e-19, 3.4595e-19, 5.2858e-18, 2.5246e-19, 2.0351e-19, 5.9329e-19,\n 3.7085e-19, 1.3679e-18, 3.7086e-19, 7.8176e-21, 4.2648e-19, 4.4546e-19,\n 1.7425e-18, 2.5208e-19, 5.2625e-19, 1.5320e-19, 3.0782e-19, 4.0979e-19,\n 3.4150e-20, 8.7420e-19, 7.3616e-21, 3.5486e-20, 2.4484e-18, 2.6619e-18,\n 5.1189e-20, 5.3607e-19, 8.0124e-19, 2.6261e-19, 1.7945e-19, 3.8530e-20,\n 5.2858e-21, 7.1079e-19, 6.6397e-19, 6.3481e-20],\n [2.9450e-18, 5.1250e-19, 1.4125e-18, 4.7904e-18, 1.1577e-19, 9.9575e-19,\n 6.5277e-20, 3.0276e-19, 1.2604e-19, 8.3777e-19, 1.0854e-18, 6.1627e-19,\n 1.9415e-19, 1.3887e-18, 2.4281e-19, 1.1289e-18, 2.3132e-18, 4.3294e-19,\n 2.5202e-18, 1.4200e-18, 8.8361e-19, 2.4415e-18, 1.6814e-18, 2.4768e-18,\n 1.9540e-19, 1.9502e-18, 9.0106e-20, 9.6511e-19, 1.9821e-18, 1.5238e-18,\n 3.9825e-19, 3.4595e-19, 5.2858e-18, 2.5246e-19, 2.0351e-19, 5.9329e-19,\n 3.7085e-19, 1.3679e-18, 3.7086e-19, 7.8176e-21, 4.2648e-19, 4.4546e-19,\n 1.7425e-18, 2.5208e-19, 5.2625e-19, 1.5320e-19, 3.0782e-19, 4.0979e-19,\n 3.4150e-20, 8.7420e-19, 7.3616e-21, 3.5486e-20, 2.4484e-18, 2.6619e-18,\n 5.1189e-20, 5.3607e-19, 8.0124e-19, 2.6261e-19, 1.7945e-19, 3.8530e-20,\n 5.2858e-21, 7.1079e-19, 6.6397e-19, 6.3481e-20]], device='cuda:0')" + }, + "44": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.6203e-19, -1.6210e-19], device='cuda:0')", + "exp_avg_sq": "tensor([8.4794e-18, 8.4794e-18], device='cuda:0')" + }, + "45": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 2.5985e-24, -5.2585e-25, -5.2695e-25, ..., 5.1738e-25,\n -8.4484e-25, 2.9632e-25],\n [-6.0659e-23, 1.1349e-22, 5.0589e-23, ..., -7.6808e-24,\n -7.9452e-24, -1.5343e-23],\n [-8.2286e-23, 1.1802e-22, 6.3004e-23, ..., -2.0395e-23,\n 2.0273e-23, 4.3563e-24],\n ...,\n [-6.8982e-24, 1.6345e-24, 5.8385e-24, ..., 2.8303e-24,\n -1.0135e-25, 1.3465e-24],\n [ 2.4163e-23, -2.3351e-23, -2.0011e-24, ..., 1.1605e-23,\n -4.9455e-24, 2.9243e-24],\n [-1.8267e-25, 4.7730e-25, -2.3346e-24, ..., -1.0283e-24,\n -1.6670e-24, -2.5999e-24]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.9728e-23, 1.9008e-23, 3.0989e-24, ..., 3.8949e-23, 7.4818e-23,\n 6.6482e-23],\n [1.2002e-21, 2.1297e-21, 1.0175e-21, ..., 1.4034e-21, 3.6745e-21,\n 5.7293e-21],\n [6.9955e-22, 9.6007e-22, 1.2245e-21, ..., 2.6849e-21, 3.6489e-21,\n 5.1839e-21],\n ...,\n [2.6931e-21, 3.8310e-21, 1.0629e-21, ..., 5.8613e-21, 5.2913e-21,\n 9.8270e-21],\n [3.3183e-24, 9.1109e-24, 3.1990e-25, ..., 1.3806e-24, 1.5968e-24,\n 4.2569e-25],\n [1.2866e-22, 3.7995e-22, 3.9367e-23, ..., 2.5394e-22, 1.9661e-22,\n 2.6050e-22]], device='cuda:0')" + }, + "46": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-7.0129e-24, 9.3794e-23, 2.9728e-22, -5.7833e-24, 4.0789e-23,\n 3.9875e-24, -4.6783e-24, 2.4461e-23, -1.2952e-23, -1.0345e-22,\n 1.7624e-23, 1.6399e-23, 6.1090e-23, 3.7237e-22, 1.7279e-23,\n 8.6985e-25, -7.7501e-24, -7.1858e-23, -6.3630e-24, -1.0095e-23,\n -4.4136e-23, 2.7063e-22, 7.1873e-24, -8.1309e-24, -2.6809e-22,\n 3.8641e-23, 1.0973e-23, -6.8228e-23, -3.3189e-23, -2.7070e-22,\n 2.3941e-23, 4.8460e-24, 3.9537e-23, -6.4176e-23, 3.5243e-23,\n 8.9891e-24, 2.2437e-23, 6.3268e-24, -2.6462e-22, -6.1432e-23,\n 3.5958e-23, -1.2126e-23, -7.9693e-24, -2.2587e-22, -9.6005e-24,\n -4.7345e-26, -7.2799e-23, -3.9071e-23, 4.1380e-22, -2.1629e-23,\n 1.3886e-23, -1.2852e-23, -3.2993e-23, 1.0332e-23, -2.9792e-23,\n -2.5681e-23, 1.3446e-22, 1.5538e-22, 5.2824e-23, -1.7318e-22,\n 9.8881e-24, 4.1195e-23, -3.3058e-22, 2.4413e-23], device='cuda:0')", + "exp_avg_sq": "tensor([1.2313e-20, 8.9036e-19, 7.9205e-19, 3.4658e-20, 4.5987e-19, 6.1598e-21,\n 1.2327e-21, 1.6923e-20, 4.1738e-19, 2.3780e-21, 7.1136e-20, 2.8356e-20,\n 2.1119e-20, 2.9308e-19, 9.7128e-20, 8.9971e-21, 3.4692e-20, 4.7598e-20,\n 1.2922e-20, 3.2586e-19, 8.8192e-21, 1.6970e-19, 4.4519e-21, 3.4664e-21,\n 7.3369e-21, 2.2644e-19, 1.0847e-19, 9.1710e-21, 3.9440e-20, 6.7526e-21,\n 9.0834e-21, 1.0837e-19, 4.1973e-19, 1.1491e-24, 1.3752e-20, 8.7363e-21,\n 2.1451e-20, 1.0344e-20, 2.9655e-19, 1.4506e-22, 2.9693e-20, 4.3468e-19,\n 1.4776e-19, 5.5507e-19, 1.0885e-19, 6.7248e-21, 2.3014e-22, 3.2779e-19,\n 4.2831e-19, 2.1822e-19, 1.0429e-20, 2.1153e-20, 2.1210e-20, 7.6068e-21,\n 3.0317e-20, 1.5899e-20, 3.1623e-20, 3.5750e-19, 8.8430e-20, 1.3131e-19,\n 1.4214e-20, 1.1977e-18, 5.3298e-22, 9.1587e-20], device='cuda:0')" + }, + "47": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.9509e-23, 1.1713e-21, 1.5992e-21, 1.3852e-24, -2.8614e-23,\n 1.0428e-23, 2.3222e-23, 1.8484e-23, 1.0472e-21, 5.6668e-22,\n 1.5685e-24, 2.5177e-23, 9.3191e-22, 2.0491e-21, -1.2791e-23,\n 2.4329e-23, 1.1527e-23, 7.8752e-22, 1.4269e-23, -1.6337e-24,\n 7.3021e-22, 1.5287e-21, 3.2739e-23, 6.8563e-22, 3.7765e-22,\n -4.1073e-23, -4.2366e-24, 7.9060e-22, 1.1135e-25, 2.9505e-22,\n 1.9485e-24, -2.0039e-24, 7.0637e-22, 1.5105e-22, 6.7256e-22,\n 2.6742e-23, 1.5348e-23, 8.0812e-22, 3.7125e-22, -3.0734e-23,\n 1.2445e-23, -1.2838e-23, 3.6285e-25, 4.7434e-22, 2.3603e-24,\n 2.2714e-23, 5.1804e-22, 4.8332e-24, 1.7546e-21, -1.8115e-24,\n 9.1514e-22, 8.3688e-22, -2.9364e-24, 7.1509e-24, 7.1553e-22,\n 7.1884e-22, 1.0659e-21, 1.3622e-21, 9.5582e-22, 6.4523e-22,\n -3.8049e-23, -7.1248e-23, 9.5089e-23, -1.9024e-24], device='cuda:0')", + "exp_avg_sq": "tensor([2.6117e-23, 1.1961e-20, 1.2993e-20, 1.7611e-22, 3.4916e-21, 6.6206e-24,\n 1.3544e-24, 4.0058e-23, 5.4455e-21, 2.3408e-22, 3.8681e-22, 9.6633e-23,\n 3.7535e-22, 2.6364e-21, 7.8713e-22, 8.7966e-24, 1.7258e-22, 5.7361e-22,\n 3.7524e-23, 4.7800e-21, 2.4279e-22, 1.8069e-21, 5.5569e-26, 1.1336e-22,\n 4.1018e-22, 1.3857e-21, 8.7578e-22, 4.4348e-22, 2.6623e-22, 1.3264e-22,\n 8.7913e-24, 5.6083e-22, 5.0761e-21, 3.2938e-23, 2.0300e-22, 1.4172e-23,\n 7.0141e-23, 2.6295e-22, 3.2317e-21, 1.0240e-23, 1.1917e-22, 3.7520e-21,\n 1.1073e-21, 1.1080e-20, 6.9511e-22, 1.2152e-23, 1.6294e-23, 2.1195e-21,\n 3.2499e-21, 1.2095e-21, 1.5543e-22, 4.3182e-22, 4.7290e-23, 2.1831e-24,\n 5.3265e-22, 3.3759e-22, 3.0368e-22, 3.4067e-21, 1.8273e-21, 1.6052e-21,\n 2.8367e-22, 1.3166e-20, 7.3212e-23, 4.8333e-22], device='cuda:0')" + }, + "48": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-2.0440e-23, 9.8694e-22, 1.4510e-21, -1.5598e-24, 2.7740e-23,\n -1.1523e-23, -2.5251e-23, -1.7503e-23, 8.8302e-22, 5.6099e-22,\n -1.5333e-24, -2.3742e-23, 8.8620e-22, 1.5771e-21, 1.3860e-23,\n -2.3150e-23, -1.2376e-23, 6.6143e-22, -1.5303e-23, 1.7022e-24,\n 6.8567e-22, 1.3227e-21, -2.9891e-23, 7.0602e-22, 3.3487e-22,\n 3.8985e-23, 4.4807e-24, 6.8131e-22, 2.6806e-26, 3.1239e-22,\n -1.9647e-24, 2.2024e-24, 7.4755e-22, 3.8958e-22, 7.5040e-22,\n -2.5710e-23, -1.4670e-23, 7.5859e-22, 3.5100e-22, 9.7504e-23,\n -1.1564e-23, 1.2672e-23, -3.5241e-25, 3.4492e-22, -2.2503e-24,\n -2.2792e-23, 5.7359e-22, -1.2510e-24, 1.5747e-21, 2.2160e-24,\n 8.2459e-22, 7.3951e-22, 2.1369e-24, -7.4656e-24, 6.8761e-22,\n 6.9626e-22, 1.0148e-21, 1.1753e-21, 8.5481e-22, 5.6340e-22,\n 4.4043e-23, 7.0892e-23, 1.0387e-22, 1.7283e-24], device='cuda:0')", + "exp_avg_sq": "tensor([4.0230e-23, 1.4501e-20, 1.4030e-20, 2.9088e-22, 5.2115e-21, 1.5938e-23,\n 2.7570e-24, 8.1803e-23, 7.3965e-21, 2.6042e-22, 6.3984e-22, 1.1441e-22,\n 5.9266e-22, 5.0724e-21, 1.1231e-21, 1.0099e-23, 2.3337e-22, 8.8931e-22,\n 6.4621e-23, 3.1412e-21, 3.6773e-22, 3.2734e-21, 8.5271e-26, 1.6602e-22,\n 3.9908e-22, 2.3481e-21, 1.0767e-21, 4.5866e-22, 2.7896e-22, 2.3077e-22,\n 1.2234e-23, 9.9917e-22, 7.2768e-21, 4.6565e-23, 3.6234e-22, 2.3206e-23,\n 1.5113e-22, 3.6253e-22, 4.9895e-21, 1.5798e-23, 2.2376e-22, 4.9813e-21,\n 1.3568e-21, 9.8415e-21, 8.7701e-22, 1.9634e-23, 3.4986e-23, 3.7294e-21,\n 7.1896e-21, 2.1895e-21, 2.5989e-22, 6.2132e-22, 8.1345e-23, 2.7255e-24,\n 7.7580e-22, 5.0664e-22, 6.7789e-22, 5.7285e-21, 1.9960e-21, 2.6233e-21,\n 1.8608e-22, 1.5962e-20, 1.1388e-22, 8.9318e-22], device='cuda:0')" + }, + "49": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 1.1632e-21, -4.3413e-21, -5.4023e-21, 1.1494e-21, 6.1930e-22,\n 6.6444e-22, 1.0569e-21, 7.4300e-22, -5.0405e-21, -6.2158e-21,\n 1.1241e-21, 1.1001e-21, -7.5091e-21, -8.5506e-21, 1.0995e-21,\n 1.0924e-21, 1.3038e-21, -4.8128e-21, 1.0759e-21, 1.1872e-21,\n -6.7927e-21, -7.7992e-21, 1.1145e-21, -6.8624e-21, -3.7853e-21,\n 9.8962e-22, 1.3451e-21, -7.8125e-21, 1.0609e-21, -2.4321e-21,\n 1.9228e-23, 7.3756e-22, -3.0307e-21, -1.9968e-21, -4.9458e-21,\n 1.0381e-21, 1.0769e-21, -7.4557e-21, -1.6852e-21, 7.1252e-22,\n 9.7727e-22, 1.2075e-21, 6.6120e-22, -2.1536e-21, 1.1237e-21,\n 1.1291e-21, -5.6885e-21, 7.9443e-22, -6.5190e-21, 7.9628e-22,\n -7.3817e-21, -7.0308e-21, 2.7030e-22, 6.5413e-22, -5.7770e-21,\n -6.2824e-21, -6.9820e-21, -4.9718e-21, -6.9479e-21, -3.8168e-21,\n 1.1339e-21, 1.1134e-21, -1.0787e-21, 6.6346e-22],\n [-1.1632e-21, 4.3410e-21, 5.4021e-21, -1.1494e-21, -6.1926e-22,\n -6.6443e-22, -1.0569e-21, -7.4297e-22, 5.0403e-21, 6.2157e-21,\n -1.1241e-21, -1.1001e-21, 7.5089e-21, 8.5505e-21, -1.0995e-21,\n -1.0924e-21, -1.3038e-21, 4.8126e-21, -1.0759e-21, -1.1872e-21,\n 6.7927e-21, 7.7991e-21, -1.1145e-21, 6.8621e-21, 3.7852e-21,\n -9.8962e-22, -1.3451e-21, 7.8123e-21, -1.0609e-21, 2.4319e-21,\n -1.9210e-23, -7.3753e-22, 3.0306e-21, 1.9967e-21, 4.9458e-21,\n -1.0381e-21, -1.0768e-21, 7.4556e-21, 1.6850e-21, -7.1248e-22,\n -9.7724e-22, -1.2075e-21, -6.6116e-22, 2.1535e-21, -1.1236e-21,\n -1.1291e-21, 5.6884e-21, -7.9443e-22, 6.5189e-21, -7.9624e-22,\n 7.3815e-21, 7.0306e-21, -2.7025e-22, -6.5411e-22, 5.7768e-21,\n 6.2823e-21, 6.9818e-21, 4.9718e-21, 6.9478e-21, 3.8166e-21,\n -1.1339e-21, -1.1134e-21, 1.0786e-21, -6.6344e-22]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.2020e-20, 2.3792e-19, 1.8914e-19, 1.0701e-20, 1.0947e-19, 2.8760e-21,\n 1.3230e-20, 4.0789e-20, 2.2717e-19, 3.2765e-19, 5.6460e-20, 2.3475e-19,\n 6.7428e-20, 4.2345e-20, 3.1039e-20, 1.9218e-19, 1.1029e-19, 1.8138e-20,\n 4.1480e-20, 9.6216e-19, 9.7105e-20, 6.1395e-20, 1.0064e-19, 5.2077e-20,\n 2.9514e-19, 1.5484e-20, 1.0474e-19, 2.9434e-19, 1.5404e-19, 9.9189e-21,\n 1.5530e-19, 5.7416e-20, 1.4320e-19, 8.7235e-20, 9.4258e-21, 4.8582e-20,\n 1.2265e-20, 9.4315e-20, 8.7655e-20, 2.0782e-20, 2.0091e-20, 1.7804e-19,\n 2.2841e-19, 4.0389e-19, 1.7767e-19, 2.1463e-20, 2.2737e-21, 6.3291e-20,\n 4.0926e-20, 6.3817e-20, 9.6173e-21, 1.0037e-19, 1.2244e-19, 1.4783e-19,\n 1.0898e-19, 9.8940e-20, 9.7518e-21, 3.3849e-20, 3.1877e-19, 1.1127e-19,\n 1.9948e-21, 1.6430e-19, 8.7785e-20, 5.2731e-20],\n [7.2020e-20, 2.3792e-19, 1.8914e-19, 1.0701e-20, 1.0947e-19, 2.8760e-21,\n 1.3230e-20, 4.0789e-20, 2.2717e-19, 3.2765e-19, 5.6460e-20, 2.3475e-19,\n 6.7428e-20, 4.2345e-20, 3.1039e-20, 1.9218e-19, 1.1029e-19, 1.8138e-20,\n 4.1480e-20, 9.6216e-19, 9.7105e-20, 6.1395e-20, 1.0064e-19, 5.2077e-20,\n 2.9514e-19, 1.5484e-20, 1.0474e-19, 2.9434e-19, 1.5404e-19, 9.9189e-21,\n 1.5530e-19, 5.7416e-20, 1.4320e-19, 8.7235e-20, 9.4258e-21, 4.8582e-20,\n 1.2265e-20, 9.4315e-20, 8.7655e-20, 2.0782e-20, 2.0091e-20, 1.7804e-19,\n 2.2841e-19, 4.0389e-19, 1.7767e-19, 2.1463e-20, 2.2737e-21, 6.3291e-20,\n 4.0926e-20, 6.3817e-20, 9.6173e-21, 1.0037e-19, 1.2244e-19, 1.4783e-19,\n 1.0898e-19, 9.8940e-20, 9.7518e-21, 3.3849e-20, 3.1877e-19, 1.1127e-19,\n 1.9948e-21, 1.6430e-19, 8.7785e-20, 5.2731e-20]], device='cuda:0')" + }, + "50": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-6.1623e-21, 6.1618e-21], device='cuda:0')", + "exp_avg_sq": "tensor([1.1153e-18, 1.1153e-18], device='cuda:0')" + }, + "51": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-4.3000e-22, 1.5245e-21, 2.0189e-21, ..., 5.8843e-22,\n 2.7403e-22, -2.1059e-21],\n [ 7.5134e-23, -1.3697e-22, -1.1972e-22, ..., 1.5312e-22,\n -6.2588e-23, -2.9083e-22],\n [ 4.3595e-22, -2.4411e-22, 5.6417e-22, ..., 1.6997e-21,\n 3.5800e-22, -6.0946e-22],\n ...,\n [ 2.3691e-22, -2.4927e-23, -1.4652e-22, ..., 2.4140e-22,\n 2.5480e-23, 2.7874e-22],\n [ 6.6766e-23, -1.6937e-25, -6.2912e-23, ..., -4.5850e-23,\n -2.6814e-23, -2.2330e-22],\n [ 1.2973e-22, -2.6190e-22, 2.4956e-22, ..., 2.8260e-22,\n -2.4458e-22, 4.3174e-22]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.6572e-24, 1.0416e-23, 8.1429e-24, ..., 9.9785e-24, 4.7236e-23,\n 4.6056e-23],\n [6.6229e-26, 2.8925e-25, 2.8067e-25, ..., 5.1219e-25, 6.6372e-24,\n 1.0421e-23],\n [1.5172e-24, 5.8604e-24, 8.2087e-24, ..., 1.1226e-25, 3.3359e-23,\n 5.9074e-23],\n ...,\n [5.0965e-25, 1.3438e-24, 2.4089e-25, ..., 1.6196e-25, 3.8398e-24,\n 7.9642e-24],\n [5.0168e-23, 7.0861e-23, 2.1072e-23, ..., 5.4213e-23, 5.7388e-23,\n 1.4032e-22],\n [2.9604e-24, 6.2042e-24, 2.4240e-24, ..., 3.4574e-24, 5.7485e-24,\n 3.3640e-23]], device='cuda:0')" + }, + "52": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.7936e-21, 8.6812e-22, 9.3860e-21, 7.6408e-23, -3.2053e-21,\n 6.6562e-23, 6.9010e-22, 5.3616e-21, -7.2428e-21, 6.4116e-21,\n 1.1466e-21, -4.9173e-22, -3.1477e-21, -1.6934e-21, -4.1470e-21,\n -1.5252e-21, 2.3697e-21, 2.0077e-21, -1.1208e-21, 1.3758e-21,\n -3.2135e-21, 9.4490e-21, -2.7961e-21, 5.7386e-22, 6.5381e-22,\n -1.0973e-21, 2.2750e-21, 6.4467e-21, -5.5555e-22, 9.9018e-22,\n -2.5003e-21, 2.6168e-21, 1.1042e-20, 5.1651e-22, 5.2545e-23,\n -9.1460e-22, -9.3470e-21, 8.1043e-21, -1.0541e-20, -8.7457e-21,\n 2.7618e-21, 3.8739e-21, 2.9694e-22, 3.4840e-21, -1.1199e-22,\n -1.6649e-21, -1.1973e-21, -3.5467e-22, -7.9107e-21, -1.6983e-20,\n 5.9469e-21, -2.9186e-21, -4.2107e-22, 2.2908e-21, 4.3690e-22,\n -1.4023e-21, -5.9318e-22, -3.4785e-21, -9.6820e-23, 1.3405e-21,\n 5.7649e-21, 9.9821e-22, -1.6559e-21, -3.9526e-22], device='cuda:0')", + "exp_avg_sq": "tensor([6.8996e-21, 8.2682e-22, 4.0452e-21, 2.3389e-22, 2.4353e-21, 1.5781e-21,\n 6.0566e-22, 1.0621e-20, 1.8443e-21, 3.6500e-22, 9.3811e-24, 2.2540e-21,\n 2.3143e-21, 8.2684e-21, 2.9721e-21, 8.5070e-21, 1.7700e-22, 8.1979e-23,\n 1.2256e-23, 8.4547e-22, 1.0869e-21, 1.0626e-21, 3.5101e-21, 2.1024e-22,\n 3.2387e-21, 6.4862e-22, 8.1106e-24, 3.4901e-21, 1.8916e-22, 4.3033e-21,\n 1.5224e-20, 2.0542e-22, 7.7409e-22, 8.8310e-22, 4.2001e-23, 3.8262e-21,\n 8.9676e-21, 1.6181e-22, 3.8708e-21, 4.2047e-21, 1.1712e-20, 7.0051e-22,\n 1.9063e-21, 2.6755e-21, 1.9081e-21, 5.6435e-21, 5.6078e-21, 1.3498e-21,\n 3.0991e-20, 2.9821e-20, 4.2677e-23, 2.0813e-21, 9.8864e-22, 2.5392e-20,\n 8.5487e-21, 8.4846e-22, 1.0755e-21, 5.7164e-21, 1.3647e-20, 3.0563e-22,\n 1.4320e-22, 6.1452e-22, 2.1160e-20, 1.9317e-21], device='cuda:0')" + }, + "53": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.3640e-20, -2.4579e-22, -1.2382e-20, 3.6413e-23, -4.3314e-20,\n -3.1996e-20, -2.2515e-20, -3.2111e-20, -4.5472e-20, -6.9928e-21,\n -2.3120e-22, -2.5761e-20, -3.5040e-20, 2.0816e-21, -3.4637e-20,\n 8.5106e-22, -1.4592e-20, -1.3486e-20, -3.5842e-22, -2.3381e-23,\n -2.9426e-20, -1.3068e-20, 3.0490e-21, 1.4027e-23, 6.5923e-22,\n -3.8542e-23, -5.7609e-22, -2.2052e-20, -5.4860e-23, 2.4488e-22,\n 2.5839e-21, -6.4753e-21, -1.0241e-20, -5.5967e-24, -4.8449e-22,\n 5.3827e-22, -5.9956e-20, -8.3108e-21, -3.2116e-20, -4.3553e-20,\n -1.6242e-21, -2.5380e-20, 9.3008e-23, -1.9465e-20, 9.6764e-23,\n 1.3186e-21, 1.3084e-21, -1.2157e-23, -5.4301e-20, -7.5029e-20,\n 6.4276e-22, -3.3235e-20, -9.8454e-23, -1.5672e-21, 5.3550e-22,\n -4.8161e-23, 4.0721e-22, -3.7244e-20, -2.5279e-20, 5.6641e-23,\n -1.1560e-20, -6.7710e-23, 2.6396e-21, 2.3208e-22], device='cuda:0')", + "exp_avg_sq": "tensor([7.6701e-23, 8.5266e-24, 3.4389e-23, 3.0204e-24, 1.7703e-23, 1.2519e-23,\n 5.3126e-24, 1.2098e-22, 1.7681e-23, 1.9321e-24, 8.5345e-25, 1.7426e-23,\n 9.7547e-24, 9.0763e-23, 1.9521e-23, 1.3466e-22, 7.8605e-25, 3.7019e-25,\n 8.2896e-25, 8.9464e-24, 8.9753e-24, 6.6099e-24, 3.5023e-23, 4.1436e-24,\n 2.0615e-23, 8.6156e-24, 1.1211e-25, 2.3840e-23, 3.1034e-24, 6.1822e-23,\n 1.7890e-22, 8.2668e-25, 5.5649e-24, 8.9548e-24, 4.1014e-24, 6.3828e-23,\n 6.0266e-23, 4.3158e-25, 3.2035e-23, 2.8906e-23, 8.7824e-23, 3.7810e-24,\n 2.3975e-23, 2.2724e-23, 2.8675e-23, 6.5538e-23, 5.8768e-23, 2.0324e-23,\n 3.3954e-22, 2.8182e-22, 1.4840e-26, 1.7132e-23, 2.6371e-23, 5.9061e-22,\n 6.7720e-23, 1.5452e-23, 8.9838e-24, 5.2523e-23, 1.7025e-22, 8.4591e-24,\n 3.4684e-25, 1.5409e-23, 3.3075e-22, 2.0410e-23], device='cuda:0')" + }, + "54": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.7260e-20, 2.0558e-22, -1.0216e-20, -3.5648e-23, -3.4871e-20,\n -2.7468e-20, -2.1682e-20, -2.4086e-20, -4.2226e-20, -8.4060e-21,\n 1.5463e-22, -2.4842e-20, -3.3044e-20, -2.0062e-21, -3.3666e-20,\n -7.8659e-22, -1.6113e-20, -1.6806e-20, 3.2690e-22, 2.8768e-24,\n -3.0744e-20, -1.0148e-20, -2.7191e-21, 2.0763e-23, -8.3380e-22,\n 3.5928e-23, 1.2657e-21, -1.9451e-20, 5.9491e-23, -3.2872e-22,\n -2.2272e-21, -1.2058e-20, -6.9419e-21, 6.0717e-24, 4.3480e-22,\n -5.1969e-22, -5.0743e-20, -7.1785e-21, -3.9029e-20, -4.3162e-20,\n 1.5272e-21, -1.9936e-20, -8.8285e-23, -2.0018e-20, -8.3877e-23,\n -1.2770e-21, -1.1985e-21, 5.3194e-24, -4.4887e-20, -6.5410e-20,\n -2.1921e-21, -2.9541e-20, 9.6445e-23, 1.5861e-21, -6.2873e-22,\n 4.1056e-23, -4.3036e-22, -3.4130e-20, -2.7016e-20, -1.2850e-23,\n -1.2726e-20, 9.1374e-23, -2.5546e-21, -2.4023e-22], device='cuda:0')", + "exp_avg_sq": "tensor([8.7485e-23, 1.3799e-23, 4.9078e-23, 4.2112e-24, 3.2763e-23, 1.8694e-23,\n 7.5989e-24, 1.5694e-22, 2.6975e-23, 1.7937e-24, 1.1204e-24, 2.3964e-23,\n 2.7378e-23, 1.3450e-22, 3.7900e-23, 1.4408e-22, 1.1773e-24, 5.9542e-25,\n 1.1952e-24, 1.5591e-23, 1.5101e-23, 1.4010e-23, 5.8187e-23, 5.8122e-24,\n 6.0920e-23, 1.3947e-23, 1.4098e-25, 4.2667e-23, 5.1209e-24, 7.7079e-23,\n 2.4698e-22, 1.1245e-24, 7.8844e-24, 1.8079e-23, 3.8715e-24, 7.3574e-23,\n 1.2184e-22, 7.8259e-25, 4.5424e-23, 5.9834e-23, 1.9439e-22, 7.4503e-24,\n 3.6939e-23, 3.0648e-23, 3.7075e-23, 9.5323e-23, 8.9456e-23, 2.8887e-23,\n 4.2846e-22, 4.4274e-22, 2.1457e-26, 2.4860e-23, 2.3654e-23, 4.3492e-22,\n 1.5323e-22, 1.8379e-23, 1.9015e-23, 7.7521e-23, 1.7679e-22, 8.7619e-24,\n 8.1883e-25, 1.5315e-23, 3.4297e-22, 3.6085e-23], device='cuda:0')" + }, + "55": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-7.2958e-20, -1.6498e-21, -7.1252e-20, 1.6090e-20, -2.3402e-19,\n -1.8241e-19, -1.6303e-19, -1.2973e-19, -1.5011e-19, -8.1166e-20,\n 2.8280e-20, -2.0691e-19, -1.9777e-19, 3.1510e-20, -1.8564e-19,\n 1.0472e-20, -1.6259e-19, -1.3267e-19, -2.9329e-21, 1.9868e-20,\n -1.6747e-19, -6.9832e-20, 2.2131e-20, 1.6908e-20, 2.9252e-20,\n 2.1061e-20, 6.0798e-21, -1.2978e-19, 1.0526e-20, 2.8897e-20,\n 1.1104e-20, -7.3409e-20, -9.0648e-20, 2.6627e-20, 2.6058e-20,\n 2.9227e-20, -2.0936e-19, -9.5514e-20, -1.7625e-19, -1.8342e-19,\n -8.1173e-21, -2.1083e-19, 2.5067e-20, -1.3806e-19, 1.9283e-20,\n 2.6778e-20, 2.9223e-20, 6.5278e-21, -1.6545e-19, -2.1780e-19,\n 6.3514e-21, -2.4057e-19, 6.4085e-21, -1.6506e-20, 2.3864e-20,\n 7.1480e-22, 3.5093e-20, -1.7908e-19, -1.2324e-19, 2.3319e-20,\n -1.1805e-19, 2.9582e-20, 3.1840e-20, 1.9271e-20],\n [ 7.2748e-20, 1.6979e-21, 7.1081e-20, -1.6012e-20, 2.3358e-19,\n 1.8211e-19, 1.6258e-19, 1.2939e-19, 1.4967e-19, 8.1048e-20,\n -2.8216e-20, 2.0660e-19, 1.9736e-19, -3.1437e-20, 1.8516e-19,\n -1.0448e-20, 1.6227e-19, 1.3235e-19, 2.9813e-21, -1.9803e-20,\n 1.6709e-19, 6.9636e-20, -2.2064e-20, -1.6844e-20, -2.9177e-20,\n -2.0999e-20, -6.0502e-21, 1.2940e-19, -1.0458e-20, -2.8831e-20,\n -1.1040e-20, 7.3038e-20, 9.0103e-20, -2.6548e-20, -2.5996e-20,\n -2.9156e-20, 2.0887e-19, 9.5243e-20, 1.7595e-19, 1.8303e-19,\n 8.1394e-21, 2.1032e-19, -2.5007e-20, 1.3765e-19, -1.9223e-20,\n -2.6708e-20, -2.9160e-20, -6.4583e-21, 1.6504e-19, 2.1737e-19,\n -6.5651e-21, 2.4013e-19, -6.3337e-21, 1.6530e-20, -2.3803e-20,\n -7.0855e-22, -3.5010e-20, 1.7858e-19, 1.2286e-19, -2.3254e-20,\n 1.1772e-19, -2.9516e-20, -3.1784e-20, -1.9202e-20]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.5480e-21, 4.5217e-23, 2.2426e-21, 5.7414e-23, 5.3930e-22, 3.6243e-22,\n 4.3111e-22, 2.4200e-21, 5.7814e-24, 8.7289e-21, 1.5382e-21, 5.1477e-21,\n 3.5772e-22, 1.9894e-21, 7.8032e-22, 5.4062e-21, 2.0487e-21, 1.9128e-22,\n 2.6352e-21, 8.1654e-22, 2.1983e-22, 6.1258e-23, 7.7585e-23, 1.9747e-21,\n 1.7629e-22, 1.3284e-21, 3.0266e-23, 1.6185e-21, 1.5972e-21, 3.2341e-21,\n 2.4846e-21, 2.4758e-21, 2.0398e-21, 1.3584e-21, 8.2438e-21, 5.4986e-21,\n 4.9501e-22, 1.5143e-21, 1.6602e-21, 3.2772e-22, 1.5627e-21, 8.2813e-22,\n 2.5875e-21, 3.9452e-21, 3.6146e-21, 2.2484e-21, 1.2274e-21, 3.0277e-21,\n 3.9638e-21, 2.6956e-21, 1.2707e-21, 2.8683e-21, 1.0634e-20, 1.4796e-20,\n 1.0732e-21, 3.3255e-21, 3.0335e-22, 1.8338e-21, 9.2066e-21, 5.7086e-21,\n 3.1470e-22, 7.7888e-21, 5.8307e-21, 6.3071e-22],\n [5.5480e-21, 4.5219e-23, 2.2426e-21, 5.7414e-23, 5.3930e-22, 3.6243e-22,\n 4.3111e-22, 2.4200e-21, 5.7812e-24, 8.7290e-21, 1.5382e-21, 5.1477e-21,\n 3.5772e-22, 1.9894e-21, 7.8035e-22, 5.4062e-21, 2.0487e-21, 1.9128e-22,\n 2.6352e-21, 8.1654e-22, 2.1982e-22, 6.1257e-23, 7.7584e-23, 1.9747e-21,\n 1.7628e-22, 1.3284e-21, 3.0263e-23, 1.6185e-21, 1.5972e-21, 3.2341e-21,\n 2.4845e-21, 2.4758e-21, 2.0398e-21, 1.3584e-21, 8.2438e-21, 5.4986e-21,\n 4.9500e-22, 1.5143e-21, 1.6602e-21, 3.2773e-22, 1.5628e-21, 8.2813e-22,\n 2.5875e-21, 3.9453e-21, 3.6146e-21, 2.2485e-21, 1.2274e-21, 3.0277e-21,\n 3.9638e-21, 2.6957e-21, 1.2707e-21, 2.8684e-21, 1.0634e-20, 1.4797e-20,\n 1.0732e-21, 3.3255e-21, 3.0334e-22, 1.8338e-21, 9.2067e-21, 5.7086e-21,\n 3.1471e-22, 7.7888e-21, 5.8307e-21, 6.3071e-22]], device='cuda:0')" + }, + "56": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-1.3717e-19, 1.3671e-19], device='cuda:0')", + "exp_avg_sq": "tensor([2.4046e-20, 2.4046e-20], device='cuda:0')" + }, + "57": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 1.0650e-11, 8.0393e-17, -7.6091e-18, -2.7564e-21, -1.7761e-23,\n -2.4596e-20, -7.0507e-17, -3.0598e-21],\n [-2.5740e-09, -2.4528e-14, 2.4028e-15, 8.5189e-19, 5.4756e-21,\n 7.5886e-18, 2.1751e-14, 9.4797e-19],\n [-7.7906e-14, -5.0729e-19, 3.1686e-20, 1.7066e-23, 1.1444e-25,\n 1.5804e-22, 4.5333e-19, 1.9922e-23],\n [-3.7507e-14, -3.4950e-19, 3.3935e-20, 1.2051e-23, 7.8232e-26,\n 1.0798e-22, 3.0962e-19, 1.3630e-23],\n [-1.4192e-09, -1.4105e-14, 1.4167e-15, 4.9085e-19, 3.1468e-21,\n 4.3606e-18, 1.2499e-14, 5.4477e-19],\n [ 8.3611e-13, 7.1632e-18, -7.3106e-19, -2.4793e-22, -1.5837e-24,\n -2.1950e-21, -6.2893e-18, -2.7278e-22],\n [-1.4312e-13, 1.6097e-18, -3.3748e-19, -6.1093e-23, -3.5170e-25,\n -4.8783e-22, -1.3943e-18, -6.0436e-23],\n [-1.5237e-09, -1.5491e-14, 1.5490e-15, 5.3895e-19, 3.4585e-21,\n 4.7931e-18, 1.3738e-14, 5.9876e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.9321e-14, 1.8922e-18, 2.8550e-16, 1.4502e-18, 1.3539e-18, 5.9549e-18,\n 1.4595e-18, 2.1815e-18],\n [5.7830e-13, 1.4300e-18, 2.7190e-17, 1.1066e-18, 8.0121e-19, 3.7997e-18,\n 9.3411e-19, 1.4198e-18],\n [1.4388e-15, 3.8102e-18, 1.0330e-16, 3.0745e-18, 2.9676e-18, 1.2901e-17,\n 4.4780e-18, 6.1849e-18],\n [1.2852e-15, 5.1477e-20, 4.5739e-17, 5.2070e-20, 3.2581e-20, 1.3940e-19,\n 3.8695e-20, 5.5785e-20],\n [4.1119e-13, 2.7043e-18, 5.8097e-16, 2.1533e-18, 1.7083e-18, 7.7837e-18,\n 2.0466e-18, 3.0680e-18],\n [8.0272e-15, 5.5557e-18, 2.8943e-16, 4.9800e-18, 3.3433e-18, 1.4963e-17,\n 3.9048e-18, 5.7722e-18],\n [7.2689e-15, 1.0256e-18, 1.2996e-16, 9.2529e-19, 7.2594e-19, 3.2947e-18,\n 7.4902e-19, 1.1731e-18],\n [4.0183e-13, 2.8314e-18, 4.8699e-16, 2.5374e-18, 2.0880e-18, 9.5488e-18,\n 2.3453e-18, 3.3248e-18]], device='cuda:0')" + }, + "58": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 1.0671e-11, -2.5764e-09, -7.7717e-14, -3.7621e-14, -1.4181e-09,\n 8.3819e-13, -1.4332e-13, -1.5272e-09], device='cuda:0')", + "exp_avg_sq": "tensor([3.1836e-14, 5.7865e-13, 2.4500e-15, 1.5972e-15, 4.1631e-13, 1.0137e-14,\n 8.3074e-15, 4.0613e-13], device='cuda:0')" + }, + "59": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-1.6190e-10, 3.0677e-09, -1.6240e-10, -1.6240e-10, 1.8426e-09,\n -1.6239e-10, -1.6237e-10, 1.9722e-09],\n [ 1.4307e-10, -2.7109e-09, 1.4351e-10, 1.4351e-10, -1.6282e-09,\n 1.4351e-10, 1.4349e-10, -1.7428e-09],\n [ 3.0096e-11, -5.7035e-10, 3.0184e-11, 3.0184e-11, -3.4259e-10,\n 3.0184e-11, 3.0183e-11, -3.6671e-10],\n [-5.5214e-11, 1.0464e-09, -5.5375e-11, -5.5375e-11, 6.2851e-10,\n -5.5375e-11, -5.5373e-11, 6.7276e-10]], device='cuda:0')", + "exp_avg_sq": "tensor([[5.4931e-14, 7.1851e-13, 1.3489e-13, 1.1369e-13, 5.0625e-13, 7.4716e-14,\n 8.2916e-14, 4.8789e-13],\n [3.3248e-15, 4.8555e-13, 7.1724e-15, 5.8224e-15, 1.8401e-13, 3.7628e-15,\n 4.3311e-15, 2.0455e-13],\n [1.4492e-14, 7.1143e-14, 3.3721e-14, 2.9036e-14, 9.5402e-14, 1.9897e-14,\n 2.1661e-14, 8.4322e-14],\n [6.3979e-15, 3.6529e-14, 1.3750e-14, 1.2090e-14, 4.6765e-14, 8.6802e-15,\n 9.2874e-15, 4.1725e-14]], device='cuda:0')" + }, + "60": { + "step": "tensor(22524.)", + "exp_avg": "tensor([ 9.5327e-10, -8.3965e-10, -1.7763e-10, 3.2587e-10], device='cuda:0')", + "exp_avg_sq": "tensor([5.0044e-12, 2.9505e-13, 1.2343e-12, 4.9908e-13], device='cuda:0')" + }, + "61": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[-2.0985e-16, 2.0838e-16, -5.5699e-16, ..., 2.8412e-17,\n -1.1342e-16, 2.4764e-19],\n [-2.0194e-15, -2.6683e-15, -3.9835e-15, ..., -7.9389e-17,\n -2.2949e-16, 3.8271e-18],\n [-1.6445e-15, 3.8803e-15, -1.2329e-14, ..., 3.4924e-16,\n -1.9617e-15, -1.1212e-18],\n ...,\n [-1.8651e-16, 2.7619e-16, 3.5843e-17, ..., 3.5220e-17,\n -1.0500e-16, 2.7432e-19],\n [-1.3473e-16, 3.4531e-16, -6.9668e-16, ..., 4.2755e-17,\n -1.6104e-16, 2.0428e-19],\n [-3.8905e-15, 4.4491e-15, -2.0771e-14, ..., 4.3029e-16,\n -2.2606e-15, 3.4288e-19]], device='cuda:0')", + "exp_avg_sq": "tensor([[8.2277e-21, 3.4339e-20, 1.3571e-20, ..., 1.8665e-20, 3.4474e-20,\n 8.3182e-20],\n [3.6178e-20, 1.0944e-19, 5.4499e-20, ..., 6.7482e-20, 2.3982e-19,\n 3.4274e-19],\n [1.3707e-21, 2.8908e-21, 1.5672e-21, ..., 1.7309e-21, 5.9208e-21,\n 7.5522e-21],\n ...,\n [4.1644e-20, 1.2968e-19, 9.2367e-20, ..., 5.7097e-20, 3.0299e-19,\n 5.4845e-19],\n [1.2491e-20, 3.6178e-20, 2.2680e-20, ..., 2.6247e-20, 8.2248e-20,\n 1.3213e-19],\n [3.5139e-19, 1.0906e-18, 5.8612e-19, ..., 6.2907e-19, 2.0650e-18,\n 3.5769e-18]], device='cuda:0')" + }, + "62": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-4.1503e-15, -3.9269e-14, -8.4521e-14, -7.7338e-14, -8.8656e-14,\n -3.5559e-15, -1.6451e-13, -3.9759e-15, -7.6761e-14, -2.8910e-15,\n -1.6074e-13, 5.0982e-19, -1.1652e-13, -2.3170e-15, -1.1809e-14,\n -1.5683e-13, -1.2162e-13, -4.0857e-14, -1.2772e-13, -5.2713e-14,\n -4.7454e-14, -5.5695e-14, -2.5658e-15, -2.2848e-15, -1.2851e-13,\n -7.8378e-14, -1.8067e-15, -4.8057e-15, -7.7111e-14, -8.8352e-15,\n 6.8884e-15, -1.0047e-13, -6.4412e-15, -4.6512e-15, -2.1792e-15,\n -4.2766e-15, 1.4456e-14, -7.6584e-14, 1.6295e-15, -2.4312e-15,\n -4.2698e-15, -8.5651e-14, -1.1465e-13, -1.0768e-13, -6.4054e-15,\n -1.5615e-15, -4.8988e-15, -1.1707e-13, -7.0275e-15, -1.4503e-13,\n -5.9034e-14, -2.5699e-16, -2.6624e-15, -5.0081e-15, -5.8832e-14,\n -5.1225e-14, -7.8019e-15, -4.4701e-15, 6.3011e-15, -2.6778e-15,\n -3.3583e-15, -3.5820e-14, -1.3726e-13, -8.9168e-14, -1.3697e-13,\n -1.0159e-13, -7.6119e-14, -2.7385e-15, 6.6468e-16, -6.5372e-15,\n -9.8794e-14, -5.4729e-15, -7.5343e-15, -8.6286e-14, -1.2073e-15,\n 8.7883e-16, -8.1375e-14, -1.0622e-15, -1.6612e-13, -1.2768e-13,\n -1.2421e-13, -1.0791e-13, -2.6746e-15, -1.0475e-13, -1.0153e-13,\n -1.4342e-13, -1.4176e-15, -7.8864e-14, -1.4911e-15, -1.1640e-13,\n 7.6120e-15, -1.0621e-13, -3.9403e-15, -1.5927e-13, -5.4789e-14,\n -7.1054e-14, -1.1193e-13, 6.1858e-16, -8.3356e-14, -8.1838e-15,\n -1.5219e-13, -1.1550e-13, -5.5956e-15, -6.6734e-15, -1.0493e-13,\n -7.7609e-14, -5.9129e-14, -5.5572e-15, -1.4571e-15, -1.8996e-15,\n -4.3777e-15, -2.9629e-14, -8.6497e-14, -7.0395e-14, -1.7369e-13,\n -1.7590e-13, -2.0504e-15, -1.7565e-15, -3.6285e-15, -5.7845e-15,\n -8.0853e-16, -2.7359e-14, -9.2865e-14, -6.9398e-14, 9.5782e-16,\n -5.6651e-16, -1.6329e-13, -8.7306e-14, -8.2045e-15, 5.1912e-16,\n -3.8050e-15, -1.2123e-13, -8.5014e-14, -1.5155e-15, -1.1537e-13,\n -5.6989e-14, -1.1162e-13, -1.2795e-13, -1.1349e-13, -4.3411e-15,\n -4.0355e-15, -7.5971e-14, -1.4565e-13, -9.3657e-14, -5.1978e-15,\n -1.1560e-14, -3.7780e-15, -1.3761e-13, 1.8981e-14, -1.2925e-13,\n -8.1340e-14, -5.2165e-15, -1.1717e-13, -9.9151e-14, -2.1030e-15,\n -6.4075e-15, -3.6960e-15, -1.2357e-13, -6.4857e-16, -3.1235e-15,\n -6.2525e-15, -1.2288e-13, -6.3571e-14, -1.1407e-13, -1.4762e-15,\n -9.1609e-15, -1.1720e-13, 3.9821e-16, -6.8485e-14, -1.0759e-13,\n -1.2223e-13, -8.1331e-14, -1.1863e-13, 6.8131e-15, -4.4652e-15,\n -1.2046e-13, -2.8013e-15, -8.5296e-15, -1.3407e-13, -3.6475e-15,\n -7.7482e-14, -1.2770e-13, -8.0492e-15, -8.2936e-14, 7.2218e-15,\n -5.8969e-14, -1.4157e-13, -4.4463e-15, -7.1749e-15, -3.3445e-17,\n -4.6472e-15, -1.4586e-13], device='cuda:0')", + "exp_avg_sq": "tensor([9.5006e-18, 4.4153e-17, 1.3578e-18, 1.9873e-17, 1.0164e-16, 8.7982e-19,\n 2.4717e-16, 1.3767e-17, 1.0412e-16, 2.4302e-18, 6.5683e-16, 1.2253e-18,\n 4.4191e-17, 3.4381e-17, 9.9961e-17, 2.1761e-16, 7.6773e-17, 8.3857e-17,\n 3.3644e-16, 7.1708e-17, 1.0965e-16, 5.0254e-17, 9.0393e-19, 1.9790e-18,\n 5.2947e-16, 3.0091e-17, 1.0261e-17, 1.2775e-18, 1.3827e-18, 3.4325e-17,\n 4.0863e-19, 1.9385e-16, 4.5797e-17, 7.9737e-17, 1.3777e-17, 7.5414e-17,\n 5.2134e-19, 6.7245e-17, 5.2524e-19, 2.7326e-18, 5.6404e-18, 6.2820e-17,\n 2.3035e-16, 3.1821e-17, 1.0365e-17, 1.2191e-17, 3.9033e-17, 4.3435e-18,\n 4.5620e-18, 3.6013e-16, 1.9244e-17, 2.2028e-17, 7.6951e-17, 1.0040e-17,\n 2.7691e-17, 1.9709e-17, 7.6990e-17, 4.7642e-17, 3.7514e-19, 3.7031e-17,\n 9.7607e-18, 3.0172e-17, 2.5698e-16, 4.0540e-17, 1.9058e-16, 1.6970e-16,\n 3.6197e-17, 5.3214e-17, 5.5736e-19, 7.9563e-17, 2.6375e-16, 4.9930e-17,\n 4.7285e-17, 6.6312e-17, 1.3673e-17, 6.3491e-19, 2.6211e-17, 1.7183e-17,\n 1.8636e-16, 1.0052e-16, 4.1591e-17, 3.8459e-16, 5.5531e-17, 1.1365e-16,\n 7.4941e-17, 3.6422e-16, 2.0327e-17, 7.6104e-17, 3.9342e-17, 6.3172e-16,\n 4.0204e-19, 3.8894e-16, 2.7708e-17, 1.1715e-16, 1.0566e-16, 1.1259e-16,\n 2.3465e-17, 4.1713e-19, 1.4834e-16, 8.8368e-17, 2.7156e-16, 1.2799e-16,\n 8.3233e-17, 3.4088e-17, 8.7357e-17, 1.2777e-17, 9.2575e-17, 4.5890e-17,\n 9.7847e-18, 8.6989e-19, 2.2140e-17, 8.4245e-17, 2.6182e-16, 3.1746e-17,\n 8.6364e-16, 5.3480e-16, 4.5224e-17, 3.8587e-17, 1.9196e-17, 3.7579e-17,\n 1.3465e-18, 2.7692e-18, 3.6382e-17, 2.8398e-17, 1.3322e-18, 1.7218e-17,\n 2.4097e-16, 1.1295e-16, 2.3239e-17, 2.7630e-17, 4.7119e-17, 1.7435e-16,\n 3.8963e-16, 7.4893e-17, 3.1780e-16, 1.9311e-16, 4.6042e-16, 2.8441e-16,\n 1.0064e-16, 5.9645e-17, 3.2295e-17, 5.7601e-17, 2.8953e-16, 8.2472e-17,\n 3.0567e-17, 5.6814e-17, 3.0448e-17, 7.9909e-17, 7.0373e-19, 2.8520e-17,\n 1.4028e-16, 2.1825e-18, 3.2864e-17, 7.1089e-17, 9.4490e-19, 3.2868e-17,\n 6.8020e-17, 1.3286e-16, 2.1496e-17, 4.2830e-17, 1.1133e-16, 1.4624e-16,\n 9.4061e-18, 3.3598e-16, 2.7771e-17, 4.3674e-17, 2.2612e-17, 5.3139e-18,\n 7.1412e-17, 2.4541e-17, 5.0865e-16, 8.3561e-18, 2.8222e-16, 4.4543e-19,\n 1.4418e-16, 1.1874e-16, 2.7293e-17, 1.0334e-16, 3.1926e-16, 3.7676e-17,\n 3.2960e-17, 8.9244e-17, 2.5178e-17, 6.0658e-17, 4.9194e-19, 8.2456e-17,\n 2.8083e-17, 1.0199e-16, 7.6947e-18, 5.6222e-17, 1.5589e-17, 4.2320e-16],\n device='cuda:0')" + }, + "63": { + "step": "tensor(22524.)", + "exp_avg": "tensor([[ 1.0543e-13, -5.4844e-13, -4.9447e-13, -5.8341e-13, -5.6858e-13,\n 1.0595e-13, -6.1716e-13, 1.0626e-13, -5.7192e-13, 1.0593e-13,\n -5.5676e-13, 1.0691e-13, -5.9010e-13, 1.0625e-13, 1.0305e-13,\n -6.4484e-13, -6.6107e-13, -5.9018e-13, -4.8893e-13, -5.2654e-13,\n -5.7090e-13, -6.5038e-13, 9.9667e-14, 1.0249e-13, -5.8839e-13,\n -6.3342e-13, 1.0387e-13, 1.0013e-13, -3.0099e-13, 1.0071e-13,\n 8.7984e-14, -6.1159e-13, 1.0367e-13, 1.0540e-13, 1.0607e-13,\n 1.0613e-13, -2.8521e-15, -5.8795e-13, 7.1624e-14, 1.0646e-13,\n 1.0134e-13, -5.7163e-13, -5.8814e-13, -5.4612e-13, 1.0271e-13,\n 1.0535e-13, 1.0588e-13, -5.7967e-13, 1.0274e-13, -5.7059e-13,\n -5.3886e-13, 1.0658e-13, 1.0627e-13, 1.0528e-13, -5.9482e-13,\n -5.8177e-13, 1.0410e-13, 1.0417e-13, 1.0239e-13, 1.0692e-13,\n 1.0517e-13, -5.3541e-13, -5.7056e-13, -6.4980e-13, -5.5253e-13,\n -5.8238e-13, -6.3803e-13, 1.0669e-13, 1.0726e-13, 1.0630e-13,\n -5.4239e-13, 1.0575e-13, 1.0414e-13, -6.1083e-13, 1.0618e-13,\n 1.0681e-13, -6.1968e-13, 1.0635e-13, -5.8792e-13, -5.6248e-13,\n -5.5129e-13, -5.8345e-13, 1.0642e-13, -5.1050e-13, -6.1567e-13,\n -5.6467e-13, 1.0551e-13, -5.9885e-13, 1.0671e-13, -5.0052e-13,\n 1.0044e-13, -5.4655e-13, 1.0605e-13, -5.1792e-13, -5.4907e-13,\n -5.1342e-13, -6.5931e-13, 1.0724e-13, -5.8854e-13, 1.0422e-13,\n -5.9923e-13, -5.4996e-13, 1.0601e-13, 1.0261e-13, -5.9939e-13,\n -5.6843e-13, -5.8750e-13, 1.0508e-13, 1.0571e-13, 3.0382e-14,\n 1.0647e-13, -5.9466e-13, -5.7887e-13, -5.4213e-13, -5.5342e-13,\n -5.7021e-13, 1.0671e-13, 1.0672e-13, 1.0422e-13, 1.0313e-13,\n 1.0691e-13, -6.5608e-13, -6.0698e-13, -5.4908e-13, 1.0572e-13,\n 1.0719e-13, -5.9940e-13, -5.3618e-13, 9.9239e-14, 1.0671e-13,\n 1.0679e-13, -6.2025e-13, -5.5649e-13, 1.0623e-13, -5.0387e-13,\n -4.7823e-13, -5.4679e-13, -5.2086e-13, -5.2216e-13, 1.0639e-13,\n 1.0414e-13, -6.0885e-13, -6.3303e-13, -5.4861e-13, 1.0488e-13,\n 1.0118e-13, 1.0668e-13, -5.9778e-13, -1.0248e-13, -5.8187e-13,\n -6.0918e-13, 1.0456e-13, -5.9149e-13, -5.6412e-13, 2.2596e-14,\n 1.0600e-13, 1.0620e-13, -5.8579e-13, 1.0696e-13, 1.0583e-13,\n 1.0560e-13, -5.7226e-13, -6.5180e-13, -5.6653e-13, 1.0663e-13,\n 1.0394e-13, -5.6994e-13, 1.0754e-13, -5.1268e-13, -6.1760e-13,\n -5.4820e-13, -6.7245e-13, -4.8156e-13, 8.6392e-14, 1.0656e-13,\n -5.2829e-13, 1.0681e-13, 1.0518e-13, -4.8851e-13, 1.0453e-13,\n -5.8078e-13, -6.3146e-13, 1.0436e-13, -6.2234e-13, 9.8526e-14,\n -6.0770e-13, -6.6579e-13, 1.0576e-13, 1.0377e-13, 1.0710e-13,\n 1.0511e-13, -5.5703e-13],\n [ 1.0181e-14, -6.7225e-14, -5.1929e-14, -6.3328e-14, -6.9985e-14,\n 1.0781e-14, -7.1563e-14, 1.1564e-14, -5.7657e-14, 1.1159e-14,\n -6.5229e-14, 1.0572e-14, -7.2760e-14, 1.0806e-14, 1.0413e-14,\n -7.7706e-14, -6.2363e-14, -5.6629e-14, -5.9903e-14, -5.5308e-14,\n -5.6952e-14, -7.7150e-14, 1.0007e-14, 9.6373e-15, -6.5212e-14,\n -6.8633e-14, 1.0401e-14, 1.1180e-14, -1.9682e-14, 9.5801e-15,\n 9.6978e-15, -5.9730e-14, 9.7979e-15, 1.1169e-14, 1.0518e-14,\n 1.0903e-14, 6.1507e-15, -6.6485e-14, 8.2625e-15, 1.1086e-14,\n 1.0472e-14, -7.1218e-14, -7.9725e-14, -5.7689e-14, 1.0644e-14,\n 1.0792e-14, 9.7927e-15, -7.7359e-14, 1.1108e-14, -6.1868e-14,\n -5.5632e-14, 9.5719e-15, 1.0836e-14, 1.1586e-14, -5.9972e-14,\n -6.4985e-14, 9.7815e-15, 9.7453e-15, 9.9887e-15, 1.1062e-14,\n 9.5733e-15, -6.2283e-14, -5.1509e-14, -7.6000e-14, -5.4475e-14,\n -6.2916e-14, -7.4055e-14, 1.1249e-14, 1.0817e-14, 1.0090e-14,\n -4.0080e-14, 1.0227e-14, 9.7238e-15, -5.6182e-14, 1.1120e-14,\n 1.1065e-14, -7.1097e-14, 9.9942e-15, -5.2626e-14, -4.4074e-14,\n -6.7837e-14, -5.2363e-14, 1.0704e-14, -4.1757e-14, -6.5753e-14,\n -6.7150e-14, 1.1127e-14, -7.4369e-14, 1.1691e-14, -5.1991e-14,\n 1.0305e-14, -5.9761e-14, 1.1231e-14, -6.6756e-14, -5.3175e-14,\n -5.2305e-14, -6.6378e-14, 1.0754e-14, -7.1911e-14, 9.9238e-15,\n -6.7596e-14, -5.6149e-14, 1.1078e-14, 1.0507e-14, -6.4502e-14,\n -7.1588e-14, -6.5822e-14, 1.0862e-14, 1.1726e-14, 7.3803e-15,\n 1.0933e-14, -7.1930e-14, -6.1152e-14, -6.2492e-14, -4.8948e-14,\n -5.1743e-14, 1.0317e-14, 1.0095e-14, 9.4012e-15, 1.0102e-14,\n 1.0750e-14, -6.9342e-14, -5.9125e-14, -6.9970e-14, 1.0990e-14,\n 1.1130e-14, -6.6198e-14, -5.9591e-14, 1.0469e-14, 1.0241e-14,\n 1.0710e-14, -6.9961e-14, -5.2769e-14, 1.0760e-14, -5.4943e-14,\n -4.5820e-14, -6.1841e-14, -4.6255e-14, -5.3889e-14, 1.0128e-14,\n 1.1901e-14, -4.7509e-14, -6.2595e-14, -5.2964e-14, 1.0969e-14,\n 1.0009e-14, 1.0890e-14, -6.3846e-14, -1.6784e-14, -7.5287e-14,\n -6.1510e-14, 1.0546e-14, -5.9542e-14, -6.6430e-14, 8.8495e-15,\n 1.0267e-14, 1.0478e-14, -6.5902e-14, 1.0554e-14, 1.0396e-14,\n 1.0650e-14, -5.9989e-14, -7.0398e-14, -5.6921e-14, 1.1167e-14,\n 1.0271e-14, -6.3657e-14, 1.0592e-14, -5.6426e-14, -6.7090e-14,\n -4.5521e-14, -8.2781e-14, -5.2032e-14, 8.3624e-15, 1.0351e-14,\n -4.8609e-14, 1.1651e-14, 9.4922e-15, -4.4052e-14, 1.0015e-14,\n -6.9535e-14, -6.9000e-14, 1.0995e-14, -6.7976e-14, 1.0093e-14,\n -6.2335e-14, -7.5019e-14, 1.1030e-14, 1.0560e-14, 1.1019e-14,\n 1.0048e-14, -6.3344e-14],\n [ 7.0299e-15, -6.4487e-14, -4.6235e-14, -5.9822e-14, -6.4802e-14,\n 7.9721e-15, -6.3226e-14, 8.0770e-15, -5.3494e-14, 7.2438e-15,\n -6.3038e-14, 6.8335e-15, -6.4451e-14, 7.7045e-15, 7.2579e-15,\n -7.0796e-14, -5.9028e-14, -4.9895e-14, -6.5650e-14, -6.1392e-14,\n -5.8276e-14, -6.3630e-14, 6.5180e-15, 5.9803e-15, -6.2122e-14,\n -5.7308e-14, 7.8256e-15, 7.9784e-15, -2.7838e-14, 6.9188e-15,\n 7.8290e-15, -5.3050e-14, 7.2090e-15, 7.9432e-15, 7.3012e-15,\n 7.8328e-15, 4.3928e-15, -6.2155e-14, 5.4528e-15, 7.8565e-15,\n 6.9183e-15, -6.9021e-14, -8.0207e-14, -6.3114e-14, 7.1850e-15,\n 7.8426e-15, 6.4719e-15, -6.9577e-14, 7.4938e-15, -6.3540e-14,\n -6.0732e-14, 6.6151e-15, 8.0985e-15, 8.1664e-15, -5.6108e-14,\n -6.1425e-14, 7.1042e-15, 6.7624e-15, 7.5548e-15, 7.3968e-15,\n 5.7669e-15, -6.5404e-14, -5.6049e-14, -6.7805e-14, -5.1690e-14,\n -5.4434e-14, -6.3218e-14, 8.3417e-15, 8.2942e-15, 7.2153e-15,\n -4.5827e-14, 7.0650e-15, 6.4653e-15, -5.9949e-14, 8.2527e-15,\n 8.3821e-15, -6.4528e-14, 6.6225e-15, -5.4476e-14, -4.2595e-14,\n -6.5754e-14, -4.6183e-14, 7.0020e-15, -4.9051e-14, -6.9143e-14,\n -6.5702e-14, 7.6075e-15, -6.8487e-14, 8.4215e-15, -5.5791e-14,\n 8.4265e-15, -6.5689e-14, 7.2351e-15, -6.9313e-14, -5.7510e-14,\n -5.0110e-14, -6.0512e-14, 7.7711e-15, -6.9608e-14, 6.8033e-15,\n -5.8218e-14, -5.5267e-14, 7.7271e-15, 7.5225e-15, -5.9759e-14,\n -7.3134e-14, -6.3766e-14, 7.9208e-15, 7.2857e-15, 5.2238e-15,\n 7.9281e-15, -6.2777e-14, -5.8179e-14, -5.9787e-14, -4.9787e-14,\n -5.2872e-14, 6.5321e-15, 7.1615e-15, 5.8100e-15, 6.7097e-15,\n 7.4266e-15, -5.8189e-14, -6.0406e-14, -7.4434e-14, 8.4660e-15,\n 7.3994e-15, -6.0917e-14, -6.7837e-14, 7.6547e-15, 6.4825e-15,\n 7.5871e-15, -6.3664e-14, -4.8793e-14, 7.8752e-15, -5.5046e-14,\n -5.7043e-14, -6.4389e-14, -4.9771e-14, -6.1931e-14, 7.0683e-15,\n 8.0611e-15, -5.3189e-14, -6.2395e-14, -5.5865e-14, 7.6283e-15,\n 6.8670e-15, 8.0695e-15, -5.9229e-14, -1.2903e-14, -7.1017e-14,\n -6.1524e-14, 7.3066e-15, -5.7872e-14, -5.2713e-14, 5.9271e-15,\n 7.1002e-15, 7.6286e-15, -6.7067e-14, 6.7569e-15, 7.3509e-15,\n 7.6464e-15, -5.9735e-14, -7.2268e-14, -5.2918e-14, 8.2945e-15,\n 7.2946e-15, -7.0149e-14, 7.6008e-15, -5.4669e-14, -6.2068e-14,\n -4.8660e-14, -7.6666e-14, -5.6984e-14, 5.0899e-15, 7.6339e-15,\n -5.6130e-14, 7.9365e-15, 6.4214e-15, -5.9629e-14, 6.4558e-15,\n -6.9813e-14, -5.6708e-14, 7.8609e-15, -5.9225e-14, 7.9887e-15,\n -5.2894e-14, -7.1212e-14, 8.0854e-15, 7.2012e-15, 7.4430e-15,\n 6.4107e-15, -6.0201e-14],\n [-1.2435e-13, 7.4229e-13, 6.2810e-13, 7.5544e-13, 7.5470e-13,\n -1.2698e-13, 7.9423e-13, -1.3073e-13, 7.3153e-13, -1.2792e-13,\n 7.4388e-13, -1.2903e-13, 7.8713e-13, -1.2768e-13, -1.2480e-13,\n 8.3946e-13, 8.2870e-13, 7.5399e-13, 6.6550e-13, 6.6381e-13,\n 7.1710e-13, 8.8540e-13, -1.1827e-13, -1.2067e-13, 7.4074e-13,\n 8.2395e-13, -1.2556e-13, -1.2101e-13, 3.6909e-13, -1.2000e-13,\n -1.1285e-13, 7.8191e-13, -1.2127e-13, -1.2748e-13, -1.2813e-13,\n -1.2841e-13, -1.1841e-14, 7.8897e-13, -9.2239e-14, -1.2805e-13,\n -1.2292e-13, 7.7334e-13, 7.9465e-13, 7.1015e-13, -1.2219e-13,\n -1.2854e-13, -1.2462e-13, 7.9252e-13, -1.2378e-13, 7.3821e-13,\n 6.8082e-13, -1.2452e-13, -1.2913e-13, -1.2781e-13, 7.5732e-13,\n 7.4883e-13, -1.2181e-13, -1.2143e-13, -1.2333e-13, -1.3099e-13,\n -1.2302e-13, 7.1088e-13, 7.0218e-13, 8.5353e-13, 6.8750e-13,\n 7.4081e-13, 8.3890e-13, -1.2815e-13, -1.2993e-13, -1.2537e-13,\n 6.4873e-13, -1.2697e-13, -1.2262e-13, 7.4158e-13, -1.2798e-13,\n -1.3121e-13, 8.1883e-13, -1.2619e-13, 7.3793e-13, 6.8855e-13,\n 7.4210e-13, 7.0800e-13, -1.2713e-13, 6.1019e-13, 7.8946e-13,\n 7.2692e-13, -1.2502e-13, 8.0199e-13, -1.3135e-13, 6.5666e-13,\n -1.2339e-13, 7.0786e-13, -1.2889e-13, 7.1112e-13, 6.8668e-13,\n 6.8684e-13, 8.3064e-13, -1.2824e-13, 7.8060e-13, -1.2457e-13,\n 7.7278e-13, 6.9392e-13, -1.2823e-13, -1.2426e-13, 7.6856e-13,\n 7.7794e-13, 7.5843e-13, -1.2827e-13, -1.2931e-13, -5.3628e-14,\n -1.2786e-13, 8.0228e-13, 7.4480e-13, 7.3277e-13, 6.7936e-13,\n 7.0804e-13, -1.2783e-13, -1.2571e-13, -1.2241e-13, -1.2530e-13,\n -1.2852e-13, 8.3164e-13, 7.5865e-13, 7.6872e-13, -1.2872e-13,\n -1.3176e-13, 7.7288e-13, 6.8885e-13, -1.2103e-13, -1.2722e-13,\n -1.2640e-13, 7.9197e-13, 6.9230e-13, -1.2737e-13, 6.4853e-13,\n 6.1884e-13, 6.9989e-13, 6.5360e-13, 6.6211e-13, -1.2636e-13,\n -1.3057e-13, 6.8730e-13, 7.8689e-13, 6.8320e-13, -1.2854e-13,\n -1.2103e-13, -1.2708e-13, 7.6373e-13, 1.4539e-13, 7.8185e-13,\n 7.5566e-13, -1.2402e-13, 7.4834e-13, 7.6586e-13, -5.4061e-14,\n -1.2571e-13, -1.2704e-13, 7.6704e-13, -1.2803e-13, -1.2576e-13,\n -1.2664e-13, 7.3788e-13, 8.2883e-13, 7.2020e-13, -1.2783e-13,\n -1.2413e-13, 7.3572e-13, -1.2978e-13, 6.7316e-13, 8.0016e-13,\n 6.6943e-13, 8.8691e-13, 6.2757e-13, -1.0617e-13, -1.2578e-13,\n 6.3849e-13, -1.3176e-13, -1.2258e-13, 5.8783e-13, -1.2528e-13,\n 7.6605e-13, 8.0716e-13, -1.2650e-13, 7.9415e-13, -1.2146e-13,\n 7.7325e-13, 8.4676e-13, -1.2738e-13, -1.2430e-13, -1.3082e-13,\n -1.2508e-13, 7.2641e-13]], device='cuda:0')", + "exp_avg_sq": "tensor([[1.2500e-15, 3.0575e-15, 7.2989e-16, 4.3435e-15, 2.7673e-15, 7.4207e-16,\n 3.8136e-15, 9.7802e-16, 3.3871e-15, 1.2810e-15, 5.2416e-15, 9.3896e-16,\n 5.3788e-15, 9.4824e-16, 1.3476e-15, 5.2839e-15, 8.1454e-15, 3.6333e-15,\n 3.8415e-15, 4.7767e-15, 5.5193e-15, 4.5640e-15, 1.1305e-15, 1.1912e-15,\n 4.4994e-15, 3.3659e-15, 7.8472e-16, 1.3399e-15, 6.9872e-16, 1.2071e-15,\n 3.7429e-16, 5.3670e-15, 9.8358e-16, 1.0075e-15, 7.9013e-16, 1.0678e-15,\n 2.5769e-16, 4.0288e-15, 3.2597e-16, 9.6589e-16, 1.0868e-15, 3.8637e-15,\n 5.0754e-15, 3.7872e-15, 1.0463e-15, 9.1110e-16, 1.0396e-15, 2.3529e-15,\n 1.4551e-15, 3.6313e-15, 4.4542e-15, 1.0728e-15, 9.1083e-16, 1.2432e-15,\n 3.6833e-15, 3.0309e-15, 1.2015e-15, 1.1172e-15, 2.7961e-16, 9.2970e-16,\n 1.1820e-15, 4.1096e-15, 4.5508e-15, 5.3869e-15, 3.0357e-15, 3.6951e-15,\n 4.1142e-15, 1.1209e-15, 6.6311e-16, 8.7794e-16, 4.6342e-15, 9.0420e-16,\n 1.2498e-15, 4.9812e-15, 9.1855e-16, 5.1049e-16, 3.0365e-15, 1.1024e-15,\n 4.4493e-15, 4.5285e-15, 3.7599e-15, 5.2050e-15, 1.2084e-15, 3.7212e-15,\n 4.1440e-15, 3.6949e-15, 1.2359e-15, 4.9351e-15, 8.3993e-16, 4.2707e-15,\n 2.9575e-16, 4.0081e-15, 1.1118e-15, 3.4019e-15, 4.6481e-15, 3.8104e-15,\n 5.7011e-15, 5.9151e-16, 3.7148e-15, 8.7858e-16, 3.4112e-15, 3.2153e-15,\n 1.1124e-15, 1.0778e-15, 4.8713e-15, 3.5495e-15, 3.0727e-15, 1.0268e-15,\n 1.3269e-15, 4.0363e-16, 8.3591e-16, 3.2502e-15, 5.1688e-15, 2.9645e-15,\n 4.6442e-15, 3.5984e-15, 1.2139e-15, 9.0911e-16, 1.1456e-15, 1.2418e-15,\n 8.2319e-16, 2.3845e-15, 3.0827e-15, 3.2211e-15, 6.4110e-16, 7.1936e-16,\n 4.5019e-15, 5.2426e-15, 1.1079e-15, 1.0898e-15, 1.1911e-15, 4.9103e-15,\n 4.3785e-15, 1.0835e-15, 3.4368e-15, 3.2833e-15, 4.1876e-15, 3.1726e-15,\n 3.2356e-15, 7.8532e-16, 1.3879e-15, 3.0385e-15, 3.9868e-15, 5.5942e-15,\n 1.4531e-15, 1.2366e-15, 7.4994e-16, 3.6650e-15, 1.7824e-16, 3.1557e-15,\n 4.8692e-15, 1.0761e-15, 4.7806e-15, 4.4780e-15, 4.2243e-16, 1.0363e-15,\n 8.8656e-16, 6.9174e-15, 1.0906e-15, 1.1294e-15, 9.1204e-16, 2.6809e-15,\n 3.7067e-15, 3.8357e-15, 7.9716e-16, 1.1554e-15, 5.1331e-15, 6.7959e-16,\n 2.3031e-15, 3.2956e-15, 5.9582e-15, 3.4528e-15, 5.5894e-15, 2.7474e-16,\n 8.5225e-16, 4.9441e-15, 9.2523e-16, 1.0830e-15, 3.7409e-15, 1.1413e-15,\n 4.6680e-15, 4.5739e-15, 9.2070e-16, 5.2565e-15, 4.2717e-16, 3.5597e-15,\n 3.6204e-15, 9.9832e-16, 1.4309e-15, 9.5275e-16, 1.0028e-15, 4.2799e-15],\n [9.8715e-17, 2.2976e-16, 5.7440e-17, 3.3371e-16, 2.0940e-16, 5.7960e-17,\n 2.8994e-16, 7.6519e-17, 2.5744e-16, 1.0095e-16, 4.0072e-16, 7.3563e-17,\n 4.1187e-16, 7.4359e-17, 1.0649e-16, 4.0287e-16, 6.3207e-16, 2.7586e-16,\n 2.9256e-16, 3.6558e-16, 4.2127e-16, 3.4955e-16, 8.8221e-17, 9.3214e-17,\n 3.4251e-16, 2.5420e-16, 6.1801e-17, 1.0566e-16, 5.5402e-17, 9.4807e-17,\n 3.0480e-17, 4.1007e-16, 7.7085e-17, 7.9155e-17, 6.2011e-17, 8.3468e-17,\n 2.0995e-17, 3.0424e-16, 2.6157e-17, 7.5541e-17, 8.4667e-17, 2.9456e-16,\n 3.8754e-16, 2.8889e-16, 8.1733e-17, 7.0810e-17, 8.1075e-17, 1.7949e-16,\n 1.1533e-16, 2.7421e-16, 3.4048e-16, 8.3902e-17, 7.1272e-17, 9.8231e-17,\n 2.7848e-16, 2.2983e-16, 9.4353e-17, 8.7523e-17, 2.2511e-17, 7.2899e-17,\n 9.2553e-17, 3.1171e-16, 3.5078e-16, 4.1373e-16, 2.3067e-16, 2.8131e-16,\n 3.1052e-16, 8.8105e-17, 5.2465e-17, 6.8543e-17, 3.5257e-16, 7.0854e-17,\n 9.8245e-17, 3.8140e-16, 7.1956e-17, 4.1029e-17, 2.3162e-16, 8.5974e-17,\n 3.3699e-16, 3.4786e-16, 2.8773e-16, 3.9673e-16, 9.4705e-17, 2.8027e-16,\n 3.1743e-16, 2.8147e-16, 9.7319e-17, 3.7863e-16, 6.6074e-17, 3.2388e-16,\n 2.4535e-17, 3.0452e-16, 8.6701e-17, 2.5707e-16, 3.5176e-16, 2.9000e-16,\n 4.3899e-16, 4.6618e-17, 2.8311e-16, 6.9153e-17, 2.5877e-16, 2.4601e-16,\n 8.7757e-17, 8.4301e-17, 3.7158e-16, 2.6957e-16, 2.3141e-16, 8.0522e-17,\n 1.0454e-16, 3.2340e-17, 6.5928e-17, 2.4712e-16, 3.9158e-16, 2.2368e-16,\n 3.5325e-16, 2.7384e-16, 9.5470e-17, 7.1036e-17, 8.9853e-17, 9.7968e-17,\n 6.5314e-17, 1.8076e-16, 2.3435e-16, 2.4299e-16, 5.1372e-17, 5.7604e-17,\n 3.4586e-16, 4.0030e-16, 8.6946e-17, 8.4870e-17, 9.3076e-17, 3.7839e-16,\n 3.3215e-16, 8.4503e-17, 2.6072e-16, 2.4736e-16, 3.1773e-16, 2.4075e-16,\n 2.4418e-16, 6.1205e-17, 1.1000e-16, 2.3045e-16, 3.0347e-16, 4.2655e-16,\n 1.1573e-16, 9.6764e-17, 5.9415e-17, 2.7756e-16, 1.4463e-17, 2.4018e-16,\n 3.7226e-16, 8.4099e-17, 3.6287e-16, 3.4137e-16, 3.3836e-17, 8.1092e-17,\n 6.8699e-17, 5.3386e-16, 8.5183e-17, 8.7934e-17, 7.1549e-17, 2.0409e-16,\n 2.8212e-16, 2.9256e-16, 6.2499e-17, 9.0432e-17, 3.9795e-16, 5.4024e-17,\n 1.7551e-16, 2.5107e-16, 4.5562e-16, 2.6070e-16, 4.2605e-16, 2.2210e-17,\n 6.6805e-17, 3.7902e-16, 7.1957e-17, 8.4955e-17, 2.8259e-16, 8.9059e-17,\n 3.5495e-16, 3.4945e-16, 7.1866e-17, 4.0161e-16, 3.3848e-17, 2.7016e-16,\n 2.7404e-16, 7.8176e-17, 1.1350e-16, 7.4415e-17, 7.8316e-17, 3.2328e-16],\n [1.4130e-16, 3.3957e-16, 8.3873e-17, 4.8466e-16, 3.0593e-16, 8.3340e-17,\n 4.2592e-16, 1.1016e-16, 3.7410e-16, 1.4513e-16, 5.8699e-16, 1.0600e-16,\n 6.0314e-16, 1.0657e-16, 1.5293e-16, 5.9330e-16, 9.1614e-16, 4.0205e-16,\n 4.2548e-16, 5.2898e-16, 6.1563e-16, 5.0763e-16, 1.2787e-16, 1.3489e-16,\n 5.0169e-16, 3.7335e-16, 8.8151e-17, 1.5188e-16, 8.0986e-17, 1.3647e-16,\n 4.3470e-17, 6.0006e-16, 1.1060e-16, 1.1343e-16, 8.8599e-17, 1.2053e-16,\n 3.0342e-17, 4.4919e-16, 3.7394e-17, 1.0864e-16, 1.2297e-16, 4.2875e-16,\n 5.6742e-16, 4.2139e-16, 1.1820e-16, 1.0254e-16, 1.1751e-16, 2.6261e-16,\n 1.6539e-16, 4.0386e-16, 4.9611e-16, 1.2112e-16, 1.0232e-16, 1.4069e-16,\n 4.1005e-16, 3.3447e-16, 1.3596e-16, 1.2632e-16, 3.2009e-17, 1.0465e-16,\n 1.3386e-16, 4.5886e-16, 5.0664e-16, 6.0254e-16, 3.3727e-16, 4.1007e-16,\n 4.6043e-16, 1.2641e-16, 7.4526e-17, 9.8733e-17, 5.1756e-16, 1.0179e-16,\n 1.4141e-16, 5.5423e-16, 1.0317e-16, 5.7304e-17, 3.3484e-16, 1.2484e-16,\n 4.9950e-16, 5.0342e-16, 4.1982e-16, 5.8107e-16, 1.3676e-16, 4.1414e-16,\n 4.6079e-16, 4.0953e-16, 1.3972e-16, 5.4923e-16, 9.4340e-17, 4.7518e-16,\n 3.3544e-17, 4.4586e-16, 1.2573e-16, 3.8013e-16, 5.1665e-16, 4.2136e-16,\n 6.3932e-16, 6.7454e-17, 4.1147e-16, 9.8610e-17, 3.7993e-16, 3.5565e-16,\n 1.2552e-16, 1.2183e-16, 5.4316e-16, 3.9447e-16, 3.4039e-16, 1.1581e-16,\n 1.5029e-16, 4.7718e-17, 9.4000e-17, 3.5737e-16, 5.7935e-16, 3.3033e-16,\n 5.2008e-16, 4.0143e-16, 1.3772e-16, 1.0228e-16, 1.2929e-16, 1.4039e-16,\n 9.2341e-17, 2.6430e-16, 3.4139e-16, 3.5864e-16, 7.1888e-17, 8.0723e-17,\n 5.0246e-16, 5.8526e-16, 1.2499e-16, 1.2303e-16, 1.3492e-16, 5.4673e-16,\n 4.8782e-16, 1.2254e-16, 3.8145e-16, 3.6389e-16, 4.6662e-16, 3.5143e-16,\n 3.6035e-16, 8.8312e-17, 1.5728e-16, 3.3775e-16, 4.4319e-16, 6.2424e-16,\n 1.6474e-16, 1.4027e-16, 8.3701e-17, 4.0711e-16, 2.0825e-17, 3.4937e-16,\n 5.4245e-16, 1.2174e-16, 5.3757e-16, 4.9903e-16, 5.0177e-17, 1.1703e-16,\n 9.9837e-17, 7.7422e-16, 1.2326e-16, 1.2787e-16, 1.0243e-16, 2.9649e-16,\n 4.1251e-16, 4.2667e-16, 8.9304e-17, 1.3091e-16, 5.7602e-16, 7.6075e-17,\n 2.5231e-16, 3.6538e-16, 6.6473e-16, 3.8519e-16, 6.2618e-16, 3.1917e-17,\n 9.5671e-17, 5.5143e-16, 1.0431e-16, 1.2216e-16, 4.1800e-16, 1.2907e-16,\n 5.2147e-16, 5.0960e-16, 1.0327e-16, 5.8641e-16, 4.9111e-17, 3.9307e-16,\n 4.0494e-16, 1.1234e-16, 1.6249e-16, 1.0725e-16, 1.1320e-16, 4.7923e-16],\n [1.8347e-16, 4.7215e-16, 1.0596e-16, 6.5784e-16, 4.2699e-16, 1.1048e-16,\n 5.8140e-16, 1.4500e-16, 5.2139e-16, 1.8792e-16, 7.9430e-16, 1.3876e-16,\n 8.1322e-16, 1.4060e-16, 1.9701e-16, 8.0032e-16, 1.2171e-15, 5.5876e-16,\n 5.8911e-16, 7.3008e-16, 8.4001e-16, 6.9458e-16, 1.6725e-16, 1.7567e-16,\n 6.8640e-16, 5.1846e-16, 1.1618e-16, 1.9639e-16, 1.0003e-16, 1.7783e-16,\n 5.2373e-17, 8.1467e-16, 1.4581e-16, 1.4894e-16, 1.1739e-16, 1.5805e-16,\n 3.5617e-17, 6.1788e-16, 4.6703e-17, 1.4338e-16, 1.6093e-16, 5.9112e-16,\n 7.7082e-16, 5.7794e-16, 1.5482e-16, 1.3578e-16, 1.5399e-16, 3.5814e-16,\n 2.1203e-16, 5.5815e-16, 6.7812e-16, 1.5871e-16, 1.3538e-16, 1.8224e-16,\n 5.6517e-16, 4.6770e-16, 1.7687e-16, 1.6487e-16, 4.0076e-17, 1.3769e-16,\n 1.7423e-16, 6.2762e-16, 6.8903e-16, 8.1440e-16, 4.6513e-16, 5.6586e-16,\n 6.2921e-16, 1.6539e-16, 9.7797e-17, 1.3056e-16, 7.0613e-16, 1.3399e-16,\n 1.8387e-16, 7.5812e-16, 1.3641e-16, 7.4625e-17, 4.6699e-16, 1.6299e-16,\n 6.7703e-16, 6.8815e-16, 5.7077e-16, 7.9243e-16, 1.7814e-16, 5.7261e-16,\n 6.3087e-16, 5.6614e-16, 1.8173e-16, 7.4996e-16, 1.2439e-16, 6.5421e-16,\n 4.1858e-17, 6.1340e-16, 1.6456e-16, 5.2049e-16, 7.1364e-16, 5.8534e-16,\n 8.5849e-16, 8.6314e-17, 5.6936e-16, 1.3015e-16, 5.2207e-16, 4.9208e-16,\n 1.6367e-16, 1.5926e-16, 7.4202e-16, 5.4383e-16, 4.7478e-16, 1.5176e-16,\n 1.9476e-16, 5.6221e-17, 1.2353e-16, 5.0212e-16, 7.8749e-16, 4.5532e-16,\n 7.0614e-16, 5.4882e-16, 1.7813e-16, 1.3506e-16, 1.6919e-16, 1.8234e-16,\n 1.2132e-16, 3.6675e-16, 4.7341e-16, 4.9510e-16, 9.4015e-17, 1.0552e-16,\n 6.8172e-16, 7.9709e-16, 1.6359e-16, 1.6171e-16, 1.7581e-16, 7.4363e-16,\n 6.6981e-16, 1.6036e-16, 5.2754e-16, 5.0695e-16, 6.4048e-16, 4.8771e-16,\n 4.9695e-16, 1.1698e-16, 2.0278e-16, 4.6597e-16, 6.0971e-16, 8.5177e-16,\n 2.1148e-16, 1.8212e-16, 1.1129e-16, 5.6277e-16, 2.4919e-17, 4.8436e-16,\n 7.4107e-16, 1.5899e-16, 7.2538e-16, 6.8260e-16, 5.8600e-17, 1.5323e-16,\n 1.3235e-16, 1.0418e-15, 1.6135e-16, 1.6717e-16, 1.3531e-16, 4.1193e-16,\n 5.6638e-16, 5.8548e-16, 1.1866e-16, 1.7030e-16, 7.6887e-16, 1.0040e-16,\n 3.5655e-16, 5.0495e-16, 9.0541e-16, 5.2939e-16, 8.4839e-16, 3.8673e-17,\n 1.2659e-16, 7.5021e-16, 1.3761e-16, 1.6000e-16, 5.7257e-16, 1.6885e-16,\n 7.1136e-16, 6.9630e-16, 1.3721e-16, 7.9949e-16, 6.1635e-17, 5.4857e-16,\n 5.5295e-16, 1.4801e-16, 2.0854e-16, 1.4146e-16, 1.4851e-16, 6.5399e-16]],\n device='cuda:0')" + }, + "64": { + "step": "tensor(22524.)", + "exp_avg": "tensor([-6.3504e-13, -6.4065e-14, -4.9204e-14, 7.7365e-13], device='cuda:0')", + "exp_avg_sq": "tensor([1.1801e-13, 1.0123e-14, 1.3533e-14, 1.6114e-14], device='cuda:0')" + }, + "8": { + "step": "tensor(11262.)", + "exp_avg": "tensor([[ 4.1148e-06, 5.0409e-06, 2.2192e-06, ..., 1.4995e-06,\n 2.9470e-07, 9.5579e-07],\n [-2.9969e-07, -1.0803e-05, -7.2103e-06, ..., -1.0135e-05,\n 1.4524e-06, 1.0800e-06],\n [ 1.9782e-06, 1.7884e-05, 1.1596e-06, ..., 3.2237e-05,\n 4.1673e-06, 7.4336e-06],\n ...,\n [ 5.6052e-45, 5.6052e-45, -5.6052e-45, ..., 5.6052e-45,\n -5.6052e-45, 5.6052e-45],\n [ 6.4386e-06, 7.9587e-06, 7.0499e-07, ..., -1.2457e-06,\n -1.8644e-05, 3.4096e-06],\n [-5.6222e-06, 1.4350e-06, -6.0391e-08, ..., 4.0527e-07,\n 1.9992e-07, -2.4202e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[7.7222e-10, 1.0502e-09, 1.7650e-10, ..., 1.2581e-09, 7.8659e-10,\n 4.2575e-10],\n [5.5954e-10, 2.0348e-09, 1.3370e-09, ..., 1.1437e-09, 2.0469e-09,\n 6.3502e-10],\n [5.2381e-10, 1.7474e-09, 1.3546e-09, ..., 7.6185e-09, 5.0066e-09,\n 8.4179e-10],\n ...,\n [3.6952e-15, 4.9016e-15, 9.8248e-16, ..., 2.8012e-15, 6.5628e-15,\n 5.5719e-15],\n [3.9974e-09, 1.3475e-09, 1.1784e-10, ..., 1.5375e-10, 2.0754e-09,\n 4.6254e-10],\n [3.3551e-10, 2.3381e-10, 1.2702e-10, ..., 6.9784e-11, 5.0759e-10,\n 1.9866e-10]], device='cuda:0')" + }, + "9": { + "step": "tensor(11262.)", + "exp_avg": "tensor([ 2.6263e-05, -4.4019e-05, 3.1383e-05, ..., 5.6052e-45,\n -1.5744e-05, -1.3290e-05], device='cuda:0')", + "exp_avg_sq": "tensor([1.2636e-08, 1.4767e-08, 1.8088e-08, ..., 9.6743e-14, 1.7401e-08,\n 4.1625e-09], device='cuda:0')" + }, + "10": { + "step": "tensor(11262.)", + "exp_avg": "tensor([[ 4.8899e-07, 2.8116e-07, 4.6558e-07, ..., 5.6052e-45,\n -4.6029e-07, -4.7403e-07],\n [-1.5631e-06, 3.0303e-06, -2.1532e-09, ..., 5.6052e-45,\n -1.2112e-06, -7.5074e-07],\n [-2.9675e-07, 1.9649e-06, 1.6637e-07, ..., -5.6052e-45,\n -2.6110e-06, -6.9995e-08],\n ...,\n [ 1.7862e-07, 5.4317e-07, -1.7440e-07, ..., 5.6052e-45,\n 2.0127e-07, -6.4174e-07],\n [-2.9934e-07, 8.3334e-07, 9.2660e-07, ..., 5.6052e-45,\n -3.4967e-06, -6.2819e-07],\n [-4.8880e-07, 3.8920e-07, 6.0187e-07, ..., -5.6052e-45,\n 7.0389e-07, -1.3817e-06]], device='cuda:0')", + "exp_avg_sq": "tensor([[2.4938e-11, 2.2336e-11, 4.0081e-11, ..., 1.2552e-14, 1.0077e-10,\n 1.5502e-11],\n [4.4895e-11, 6.4313e-11, 5.8245e-11, ..., 1.7679e-14, 8.6667e-11,\n 2.6057e-11],\n [3.8414e-11, 5.0655e-11, 5.3433e-11, ..., 2.2061e-14, 1.1996e-10,\n 2.8419e-11],\n ...,\n [3.7778e-11, 5.0080e-11, 1.0509e-10, ..., 2.3813e-14, 1.9412e-10,\n 2.2650e-11],\n [9.9519e-11, 7.0762e-11, 6.3256e-11, ..., 1.4688e-14, 1.3320e-10,\n 2.5291e-11],\n [5.0701e-11, 6.8278e-11, 3.5445e-11, ..., 9.0143e-15, 1.9506e-10,\n 3.2089e-11]], device='cuda:0')" + } + }, + "param_groups": [ + { + "lr": 0.0009558195366224509, + "name": "shared", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 0, + 1 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_256", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 2, + 3, + 4 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_512", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 5, + 6, + 7 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_768", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 8, + 9, + 10 + ] + }, + { + "lr": 0.0009558195366224509, + "name": "scale_1024", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.01, + "params": [ + 11, + 12, + 13 + ] + }, + { + "lr": 0.00047836202255981916, + "name": "fusion", + "betas": [ + 0.9, + 0.999 + ], + "eps": 1e-08, + "weight_decay": 1e-05, + "amsgrad": false, + "maximize": false, + "foreach": null, + "capturable": false, + "differentiable": false, + "fused": null, + "decoupled_weight_decay": true, + "initial_lr": 0.005, + "params": [ + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64 + ] + } + ] + }, + "scheduler_state_dict": { + "T_0": 10, + "T_i": 10, + "T_mult": 2, + "eta_min": 1e-06, + "T_cur": 8, + "base_lrs": [ + 0.01, + 0.01, + 0.01, + 0.01, + 0.01, + 0.005 + ], + "last_epoch": 8, + "_step_count": 0, + "_is_initial": false, + "_get_lr_called_within_step": false, + "_last_lr": [ + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.0009558195366224509, + 0.00047836202255981916 + ] + }, + "metrics": { + "best_val_acc": 74.22733333333333, + "best_epoch": 7, + "scale_accuracies": { + "256": 71.19866666666667, + "512": 74.214, + "768": 74.30666666666667 + }, + "training_history": { + "epochs": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8 + ], + "train_loss": [ + 3.2117288923670015, + 2.2284906192478755, + 2.2844439983431433, + 1.963918379697002, + 1.8515142379254181, + 2.160790376670829, + 1.9999209507767928, + 1.912309366207397 + ], + "train_acc": [ + 56.118471154293964, + 67.94222246852544, + 72.3915513486272, + 75.14292307976504, + 76.58606046934813, + 77.70493620269644, + 78.737952715506, + 79.63731504167684 + ], + "val_acc": [ + 66.428, + 68.56466666666667, + 71.96066666666667, + 72.78933333333333, + 73.42733333333334, + 73.728, + 73.962, + 74.22733333333333 + ], + "scale_accs": { + "256": [ + 66.428, + 68.56466666666667, + 69.77133333333333, + 70.08533333333334, + 70.59733333333334, + 70.75866666666667, + 71.02266666666667, + 71.19866666666667 + ], + "512": [ + 71.56466666666667, + 72.59866666666667, + 73.31533333333333, + 73.668, + 73.888, + 74.214 + ], + "768": [ + 72.736, + 73.73333333333333, + 74.30666666666667 + ] + }, + "lr": [ + 0.00975530705321762, + 0.00904518046337755, + 0.00793913236883622, + 0.00654543046337755, + 0.005000500000000001, + 0.0034555695366224513, + 0.0020618676311637816, + 0.0009558195366224509 + ] + } + }, + "train_config": { + "name": "david_training", + "run_id": "20251012_210041", + "dataset_name": "AbstractPhil/imagenet-clip-features-orderly", + "model_variant": [ + "clip_vit_b16", + "clip_vit_laion_b32", + "clip_vit_b32" + ], + "num_classes": 1000, + "preset": "balanced", + "custom_config_path": null, + "num_classes_override": null, + "use_belly_override": null, + "belly_expand_override": null, + "progressive_training_override": true, + "scale_warmup_epochs_override": { + "256": 0, + "512": 2, + "768": 5, + "1024": 8 + }, + "num_epochs": 10, + "batch_size": 1024, + "learning_rate": 0.01, + "weight_decay": 1e-05, + "warmup_epochs": 3, + "use_rose_loss": true, + "rose_initial_weight": 0.2, + "rose_max_weight": 0.8, + "rose_weight_schedule": "adaptive", + "use_cayley_loss": false, + "cayley_weight": 0.01, + "scale_loss_balance": null, + "use_mixed_precision": false, + "gradient_clip": 10.0, + "scheduler_type": "cosine_restarts", + "min_lr": 1e-06, + "freeze_strategy": "never", + "freeze_threshold": 90.0, + "unfreeze_on_plateau": true, + "patience": 10, + "track_gradients": true, + "gradient_scale_threshold": 1e-05, + "gradient_scale_multiplier": 10.0, + "log_interval": 50, + "val_interval": 1, + "save_interval": 5, + "log_fusion_weights": true, + "log_loss_components": true, + "save_format": "safetensors", + "hf_repo": "AbstractPhil/david-shared-space", + "upload_to_hub": true, + "base_dir": "./david_training", + "num_workers": 10, + "pin_memory": true, + "prefetch_factor": 4, + "persistent_workers": true + } +} \ No newline at end of file