AbstractPhil commited on
Commit
c964bf3
·
verified ·
1 Parent(s): d2776b1

Update best_model_acc66.03_metadata.json - Run 20251012_235237

Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc66.03_metadata.json ADDED
@@ -0,0 +1,284 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 6,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(26278.)",
7
+ "exp_avg": "tensor([[ 8.3711e-05, 2.0331e-05, -5.6411e-05, ..., 9.5971e-05,\n 5.0885e-05, -5.7167e-06],\n [-1.3849e-05, 8.4896e-05, -4.8868e-05, ..., 3.1496e-05,\n -3.1875e-05, 1.3238e-06],\n [-1.6474e-05, -1.9805e-05, -3.1234e-06, ..., -3.5699e-05,\n 1.0399e-05, -5.7470e-06],\n ...,\n [-3.3450e-05, 1.4565e-04, -5.5183e-05, ..., 7.5640e-05,\n -2.8098e-05, -2.7881e-05],\n [-1.2390e-05, -1.3744e-05, -2.2372e-05, ..., -2.6885e-05,\n 3.0334e-05, 1.8931e-05],\n [ 2.3681e-05, 3.1564e-06, -2.4347e-05, ..., 4.7182e-06,\n 2.2697e-06, 8.5454e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.3352e-07, 1.2410e-07, 5.4974e-08, ..., 5.9262e-08, 3.4331e-08,\n 2.7945e-08],\n [4.7416e-08, 1.7523e-07, 7.7728e-08, ..., 6.1428e-08, 2.3948e-08,\n 2.8370e-08],\n [2.3038e-08, 3.9503e-08, 2.5185e-08, ..., 5.2952e-08, 1.2944e-08,\n 1.5630e-08],\n ...,\n [3.3443e-08, 3.0588e-07, 5.3442e-08, ..., 7.4914e-08, 1.9913e-08,\n 3.2831e-08],\n [6.5654e-08, 1.3555e-07, 5.4028e-08, ..., 5.7991e-08, 2.4256e-08,\n 2.9700e-08],\n [4.6786e-09, 1.9452e-08, 9.9581e-09, ..., 4.4657e-09, 2.1661e-09,\n 3.9454e-09]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(26278.)",
12
+ "exp_avg": "tensor([ 3.9362e-03, 8.5389e-04, -2.8412e-04, 1.3020e-03, 1.1635e-03,\n 1.1517e-03, 1.8925e-03, 1.6248e-03, 2.5223e-04, -1.8735e-03,\n 3.4774e-04, 2.1331e-03, 1.9109e-03, -1.0481e-04, -1.2093e-03,\n 1.8706e-03, -2.4770e-03, 1.9536e-03, 2.3766e-03, -1.0645e-03,\n -3.4408e-05, 1.4405e-03, 1.1278e-03, 2.8854e-04, 1.6173e-03,\n 4.0259e-04, -1.5663e-04, 2.6594e-04, 8.3385e-04, 1.8592e-03,\n 6.6836e-05, 2.1431e-03, -8.5731e-04, -1.3937e-03, -7.2692e-04,\n 1.8799e-04, 4.1675e-04, 1.5205e-03, 2.4813e-04, -8.8499e-04,\n -1.7589e-03, -1.9190e-03, 6.6844e-04, 1.1679e-03, 2.0023e-03,\n -1.5402e-03, 3.9618e-04, -1.3447e-03, 8.0791e-04, 4.0755e-04,\n 1.5590e-04, 2.7393e-04, -9.2056e-04, -1.7702e-04, -4.6269e-04,\n 1.5487e-03, 1.2917e-03, 1.6332e-03, 1.8634e-03, -9.1738e-04,\n -5.8015e-03, -1.9172e-03, 2.0263e-03, -4.6106e-04, 2.2479e-03,\n -3.0015e-03, -6.3343e-04, 2.5530e-03, 5.1387e-04, -1.3913e-03,\n -3.3148e-03, -1.3373e-03, -1.0839e-04, -1.2368e-03, 9.0359e-04,\n -3.6436e-04, -3.2300e-05, -3.2879e-03, 1.2027e-04, -1.4234e-03,\n -3.8060e-03, 1.8575e-03, 1.2925e-03, -1.0395e-04, 4.5413e-04,\n 5.2050e-05, -2.3138e-03, 7.5907e-04, 2.7855e-04, 2.7518e-05,\n -6.8747e-04, -1.9378e-03, 7.8354e-05, 1.2590e-03, -6.7709e-04,\n -1.4988e-03, 2.7026e-03, 1.3873e-03, -1.6142e-03, -2.3800e-03,\n -1.2856e-03, 1.6853e-03, 1.6000e-03, 1.0836e-03, 2.6634e-04,\n -1.1529e-03, 5.1707e-05, 3.0459e-03, -6.0144e-04, -8.9609e-04,\n 1.2335e-03, -1.7322e-04, 3.0390e-04, 3.0196e-03, 1.4742e-03,\n -1.9413e-04, 1.5410e-03, -5.7915e-04, -1.6419e-03, 1.1598e-03,\n -6.8808e-04, 2.5572e-03, -3.6919e-03, -9.4477e-04, -5.0219e-04,\n 2.6653e-03, 1.5893e-03, 1.5692e-03, 2.4898e-03, -1.7329e-03,\n -3.3645e-03, 1.0859e-03, -1.5660e-03, -4.4262e-04, 3.2662e-03,\n 1.7684e-03, -5.6288e-04, 8.0225e-04, 8.7492e-04, 1.9146e-03,\n 9.5810e-04, 1.9537e-03, 1.2952e-04, -3.8511e-03, -1.1563e-03,\n 1.5604e-04, 1.8981e-03, -2.8366e-04, 3.8715e-04, -4.6900e-03,\n -9.7259e-04, -1.2921e-03, 1.2265e-03, 7.7363e-05, -2.4034e-04,\n -1.7563e-04, 1.6700e-03, -3.0549e-04, -6.5340e-04, -1.0150e-03,\n 6.2468e-05, -6.5436e-04, -2.4572e-04, 1.6288e-03, 1.2787e-03,\n -3.8387e-03, -6.1587e-04, -9.7367e-04, -4.1378e-04, 1.2440e-04,\n -1.7492e-03, 7.0123e-04, 7.8555e-04, 5.3374e-04, 3.8011e-04,\n 1.0873e-03, 5.6674e-04, 3.1758e-04, 2.1792e-03, -2.4728e-04,\n 2.4717e-03, -1.3304e-04, -9.6367e-04, -7.0249e-04, -1.2287e-04,\n -1.4423e-04, -7.7936e-04, 9.2286e-04, -1.6494e-04, 7.0027e-04,\n -4.5247e-04, -1.2228e-03, 7.2445e-04, -6.7521e-04, 2.3149e-03,\n -5.9305e-04, 1.2245e-05, -1.1143e-03, 1.2877e-03, 5.5658e-04,\n -6.4202e-04, -2.7646e-03, 3.5184e-04, 2.2476e-04, 1.3870e-03,\n -4.9863e-03, -8.7877e-04, 7.0098e-04, 9.5861e-04, 2.8126e-04,\n -1.2770e-04, 1.6769e-03, -9.8223e-04, 7.3162e-04, -3.3161e-04,\n -1.0775e-03, 1.3580e-03, -1.0085e-03, -9.7296e-04, -1.1029e-05,\n 8.2128e-04, 1.0468e-04, 6.6270e-04, -6.3429e-04, -5.2254e-04,\n -6.4033e-04, -6.6678e-04, -6.9937e-04, 2.4879e-03, 6.7648e-04,\n 1.3911e-03, 6.3035e-04, -6.6928e-04, 1.5751e-03, 4.4608e-04,\n 2.5500e-04, -1.9245e-03, 2.9434e-04, 4.0523e-04, -1.1004e-03,\n 5.9233e-04, 2.3457e-03, 3.1384e-04, -8.7693e-04, -4.7057e-03,\n 1.6024e-03, -7.4103e-04, 1.2672e-03, -2.8023e-03, 1.4404e-03,\n -2.3548e-03, 1.6210e-03, -1.1381e-03, 2.4468e-03, -1.0241e-03,\n 1.2330e-04, 1.8948e-03, 1.9479e-03, 2.1602e-03, 3.9066e-04,\n 2.1248e-03, 6.5294e-04, 6.5484e-04, -2.9342e-03, 2.0922e-03,\n 1.1850e-03, 9.3662e-04, 4.7256e-04, 3.4604e-04, -7.3017e-05,\n 2.0778e-03, -4.0735e-03, 8.2714e-04, 1.5462e-03, -5.9846e-04,\n 1.8415e-03, -1.3406e-03, 6.4734e-05, -1.6119e-03, 1.2612e-04,\n 1.0974e-03, -1.8053e-03, 2.1888e-03, 5.3026e-05, 3.2580e-04,\n -1.7281e-04, -5.2343e-04, -4.7150e-04, -1.1927e-03, 8.2387e-04,\n -9.9447e-04, 3.8949e-04, 1.3678e-03, 9.1029e-04, -4.1772e-03,\n -1.5056e-03, -4.7458e-04, -8.8205e-04, -5.2454e-04, 3.6711e-04,\n 3.2557e-03, -1.9814e-04, -2.7768e-04, 1.3687e-03, 1.7788e-03,\n -3.3777e-03, 4.9262e-04, -1.4391e-04, -1.2458e-03, 1.0543e-04,\n -6.1598e-04, 5.1789e-03, 2.6322e-03, -1.3172e-03, 8.3436e-04,\n 7.0186e-04, -2.0828e-04, 6.6521e-04, 2.1509e-03, -2.3691e-03,\n -2.4271e-03, -2.1758e-03, 2.1477e-03, -1.4120e-03, -1.6231e-03,\n 5.3259e-03, -7.4936e-04, -7.4575e-04, 2.5676e-03, 1.5253e-03,\n 1.2636e-03, -8.6750e-04, -6.4518e-04, -1.8005e-04, -3.6883e-04,\n 3.0050e-03, -5.6479e-03, 2.5701e-04, -1.6651e-04, 3.1992e-04,\n -3.5200e-04, -4.6906e-04, -2.2544e-03, 4.2214e-04, 2.5949e-04,\n 1.6802e-03, -7.5077e-04, -1.3043e-03, -6.2441e-04, -1.1476e-03,\n -6.4569e-04, -5.6151e-04, 1.0502e-03, -5.4028e-04, -9.1460e-04,\n 2.1740e-03, 3.8731e-04, 7.8421e-04, 6.0347e-05, -1.7110e-03,\n 3.0098e-04, 1.9970e-03, 9.7392e-04, -6.9595e-04, -1.9125e-03,\n -4.8373e-04, 1.2816e-05, -9.6798e-04, 2.1613e-04, 4.5486e-04,\n -3.1119e-03, -7.0546e-04, 5.4460e-05, -4.6104e-04, 1.2980e-03,\n 8.3596e-04, -6.8471e-05, -1.5356e-03, 1.6908e-03, 1.6060e-03,\n 1.5063e-03, 1.5853e-03, 1.5432e-04, 6.1606e-04, 3.9107e-04,\n 7.7901e-04, 2.8052e-03, -1.5753e-03, -5.0385e-04, 1.0755e-03,\n 5.4688e-04, 1.9286e-03, 9.5686e-04, -1.4878e-03, 1.7072e-03,\n -6.8566e-04, 3.3886e-04, 1.4758e-03, 3.2803e-04, -2.0065e-03,\n 1.7501e-03, -4.7692e-04, -3.4246e-03, 3.8692e-03, -1.7683e-03,\n -8.6192e-04, -2.8909e-03, -9.4383e-04, -8.4068e-04, 2.8610e-05,\n 2.6847e-03, 2.4369e-03, 6.1600e-04, 1.1525e-03, -1.4353e-03,\n 8.7871e-04, 1.7157e-03, 5.7825e-04, 2.0574e-03, 3.0142e-03,\n 8.1741e-04, -2.9200e-04, 7.2283e-04, 2.1601e-04, -1.6514e-03,\n -1.7965e-03, -1.0453e-03, -1.8698e-03, 1.7633e-03, -1.3180e-03,\n 1.1226e-03, -6.2126e-05, 9.8789e-04, 6.4937e-04, -1.0816e-03,\n -8.6751e-04, 1.1170e-03, -5.8840e-04, 3.5980e-04, -1.9442e-03,\n -1.2264e-03, 1.0646e-03, -5.9388e-05, -1.3001e-03, -3.0079e-03,\n 1.2337e-04, -1.7343e-03, -9.7861e-04, 1.4835e-04, 1.1501e-03,\n -8.8553e-04, 2.0378e-04, -7.6030e-04, -2.6553e-03, -4.4576e-04,\n -1.0311e-03, 2.0269e-03, 1.1808e-03, 1.0597e-03, 1.6575e-03,\n -3.6297e-03, -9.1135e-04, 4.2017e-05, 3.7170e-04, -3.4186e-03,\n 4.1293e-04, -1.7330e-03, -4.5695e-05, -3.6367e-03, 6.2674e-04,\n -4.5245e-04, 7.2162e-04, -2.5370e-03, -2.9442e-03, 6.6551e-04,\n 1.4677e-03, -1.3829e-04, 1.0142e-03, -1.3090e-03, 6.6943e-04,\n 8.2432e-05, 3.7515e-04, -1.1089e-03, -1.8207e-03, 1.0985e-03,\n -3.4025e-03, -1.7509e-03, -3.0395e-03, 1.2554e-03, -1.3193e-04,\n -6.0126e-04, -1.3089e-03, -8.6985e-04, -3.0846e-04, -9.3765e-05,\n -2.8948e-04, -4.8228e-04, -1.7813e-03, -1.1372e-03, -5.9052e-04,\n 1.4742e-03, 4.0638e-04, -2.6089e-04, -1.1930e-03, -4.6763e-04,\n 8.8972e-04, 1.7558e-05, -2.8029e-03, -1.4430e-03, 6.7496e-04,\n 5.6188e-04, 5.5327e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([4.3536e-05, 3.9192e-05, 2.7508e-05, 3.1177e-05, 2.9260e-05, 5.9203e-05,\n 2.7207e-05, 5.0924e-05, 4.1931e-05, 4.1989e-05, 6.0760e-05, 5.9435e-05,\n 8.6974e-05, 2.0597e-05, 5.7295e-05, 4.6856e-05, 4.1404e-05, 3.8893e-05,\n 4.6367e-05, 4.7653e-05, 3.5576e-05, 2.6398e-05, 3.0260e-05, 5.7439e-05,\n 7.7144e-05, 3.1142e-05, 2.7784e-05, 5.8816e-05, 5.6151e-05, 4.0954e-05,\n 1.7610e-05, 2.9986e-05, 4.8205e-05, 3.7960e-05, 2.1242e-05, 4.3576e-05,\n 3.2640e-05, 2.5032e-05, 2.4854e-05, 2.3620e-05, 3.2385e-05, 2.6624e-05,\n 5.4303e-06, 6.4613e-05, 1.0963e-04, 3.1697e-05, 2.3218e-05, 2.6766e-05,\n 4.2243e-05, 7.0767e-05, 7.9388e-05, 3.1363e-05, 2.9385e-05, 4.1524e-05,\n 5.1428e-05, 3.7010e-05, 3.2956e-05, 9.2487e-05, 3.2464e-05, 3.6454e-05,\n 1.0132e-04, 2.5040e-05, 2.5931e-05, 3.0737e-05, 3.4075e-05, 2.7230e-05,\n 3.2142e-05, 2.9617e-05, 5.4795e-05, 3.4883e-05, 3.3777e-05, 3.2756e-05,\n 3.0044e-05, 3.3515e-05, 2.7472e-05, 3.5513e-05, 4.5464e-05, 3.9558e-05,\n 4.9437e-05, 3.1717e-05, 5.5069e-05, 3.5750e-05, 2.9497e-05, 1.8076e-05,\n 3.6817e-05, 4.1013e-05, 4.5234e-05, 2.8951e-05, 2.4694e-05, 4.2395e-05,\n 2.7735e-05, 5.8241e-05, 3.4910e-05, 3.7520e-05, 3.5719e-05, 4.0876e-05,\n 5.4799e-05, 3.5441e-05, 3.1765e-05, 1.8319e-05, 4.4993e-05, 6.3440e-05,\n 5.9627e-05, 3.3260e-05, 6.4418e-05, 5.2807e-05, 4.0812e-05, 8.5466e-05,\n 3.2890e-05, 3.9019e-05, 5.3977e-05, 5.1710e-05, 3.4807e-05, 4.8864e-05,\n 3.8565e-05, 4.6703e-05, 5.1060e-05, 4.6872e-05, 2.9211e-05, 3.7972e-05,\n 1.5846e-05, 4.9133e-05, 4.5798e-05, 5.4643e-05, 2.9053e-05, 3.9972e-05,\n 3.0610e-05, 4.4321e-05, 3.9754e-05, 7.2900e-05, 4.3665e-05, 3.9038e-05,\n 4.2882e-05, 4.0151e-05, 8.6429e-05, 3.5550e-05, 2.7014e-05, 3.8876e-05,\n 2.4792e-05, 4.6694e-05, 1.6604e-05, 2.8888e-05, 4.1464e-05, 8.8001e-05,\n 4.0273e-05, 3.8343e-05, 2.6049e-05, 5.2512e-05, 4.2750e-05, 8.0583e-05,\n 2.0124e-05, 4.5730e-05, 2.4645e-05, 6.4349e-05, 2.3640e-05, 3.3376e-05,\n 4.3074e-05, 3.2952e-05, 3.1092e-05, 2.6864e-05, 4.8479e-05, 1.2561e-05,\n 3.7398e-05, 7.0481e-05, 1.7140e-05, 5.8651e-05, 3.4394e-05, 1.9081e-05,\n 1.5691e-05, 3.9272e-05, 3.2311e-05, 4.9827e-05, 3.4907e-05, 3.3084e-05,\n 3.4129e-05, 2.4808e-05, 7.1855e-05, 2.5611e-05, 2.0313e-05, 3.6186e-05,\n 3.0974e-05, 2.9310e-05, 7.0854e-05, 3.4130e-05, 3.0892e-05, 3.6578e-05,\n 9.6462e-05, 2.0076e-05, 2.0593e-05, 1.6291e-05, 3.8691e-05, 4.7538e-05,\n 6.0940e-05, 5.7629e-05, 2.3939e-05, 1.7711e-05, 4.2671e-05, 2.6098e-05,\n 1.8945e-05, 1.8968e-05, 2.3428e-05, 4.3881e-05, 2.8497e-05, 1.9375e-05,\n 3.3581e-05, 3.9035e-05, 3.0864e-05, 2.4609e-05, 4.9128e-05, 1.8821e-05,\n 3.5921e-05, 5.4417e-05, 3.6394e-05, 2.6024e-05, 5.2678e-05, 1.5244e-05,\n 2.4767e-05, 2.4183e-05, 5.2878e-05, 4.8513e-05, 2.2642e-05, 1.9363e-05,\n 3.0443e-05, 1.6037e-05, 2.5480e-05, 5.8216e-05, 2.4496e-05, 2.1859e-05,\n 2.2861e-05, 4.8239e-05, 4.4961e-05, 1.9982e-05, 5.6496e-05, 3.3132e-05,\n 3.1687e-05, 3.2449e-05, 3.2132e-05, 5.6885e-06, 4.1585e-05, 1.1181e-04,\n 2.8751e-05, 6.9439e-05, 3.1285e-05, 4.9438e-05, 7.4297e-05, 5.0369e-05,\n 4.1125e-05, 3.0028e-05, 4.4652e-05, 3.2525e-05, 5.6218e-05, 2.6815e-05,\n 4.2206e-05, 4.4187e-05, 4.2330e-05, 3.2468e-05, 2.6493e-05, 4.7644e-05,\n 5.2818e-05, 2.9041e-05, 3.1635e-05, 6.5447e-05, 2.4893e-05, 4.8579e-05,\n 4.4575e-05, 2.5170e-05, 2.2749e-05, 3.4557e-05, 2.6624e-05, 3.4758e-05,\n 4.0122e-05, 7.6250e-05, 2.9649e-05, 3.1879e-05, 2.1678e-05, 4.9898e-05,\n 1.9739e-05, 6.5436e-05, 3.3062e-05, 2.4074e-05, 2.7804e-05, 2.4204e-05,\n 3.3107e-05, 4.5168e-05, 2.0051e-05, 6.0976e-05, 3.3214e-05, 3.2637e-05,\n 6.1338e-05, 4.7766e-05, 2.5993e-05, 3.0718e-05, 5.3690e-05, 2.7078e-05,\n 4.9701e-05, 2.3184e-05, 2.7404e-05, 5.1454e-05, 9.5913e-05, 3.5225e-05,\n 2.7800e-05, 3.0254e-05, 3.2567e-05, 4.5391e-05, 5.6206e-05, 2.1626e-05,\n 4.2105e-05, 3.6429e-05, 7.7518e-05, 4.0622e-05, 4.6062e-05, 5.5754e-05,\n 2.5351e-05, 2.5290e-05, 5.0490e-05, 2.7350e-05, 3.6913e-05, 1.5419e-05,\n 3.7346e-05, 4.7707e-05, 8.0281e-05, 3.3811e-05, 3.9531e-05, 4.3370e-05,\n 6.0996e-05, 9.6384e-05, 2.8759e-05, 4.8487e-05, 3.8545e-05, 3.0818e-05,\n 3.4141e-05, 5.3290e-05, 1.6574e-05, 2.4187e-05, 2.9451e-05, 3.2121e-05,\n 5.4885e-05, 2.5658e-05, 5.7920e-05, 4.4210e-05, 3.2297e-05, 3.0254e-05,\n 4.1670e-05, 2.4019e-05, 5.4060e-05, 5.5981e-05, 3.8969e-05, 2.2952e-05,\n 3.9194e-05, 4.1080e-05, 2.9252e-05, 3.7416e-05, 3.1515e-05, 2.9957e-05,\n 3.2519e-05, 2.5477e-05, 2.6336e-05, 3.2608e-05, 2.6681e-05, 2.4757e-05,\n 1.9343e-05, 7.7811e-05, 1.0919e-04, 3.1671e-05, 3.7263e-05, 2.5811e-05,\n 3.9475e-05, 4.6486e-05, 2.3471e-05, 3.0221e-05, 2.1364e-05, 3.4847e-05,\n 1.9906e-05, 3.7566e-05, 6.2915e-05, 3.5673e-05, 2.0971e-05, 1.3775e-05,\n 2.5886e-05, 3.2966e-05, 3.7085e-05, 4.2224e-05, 4.9304e-05, 3.1674e-05,\n 3.3310e-05, 9.7900e-05, 3.3880e-05, 5.3077e-05, 3.4089e-05, 3.8364e-05,\n 4.3064e-05, 5.2832e-05, 2.9283e-05, 3.1662e-05, 3.7609e-05, 2.3761e-05,\n 1.9821e-05, 5.9086e-05, 4.4422e-05, 7.3055e-05, 4.2371e-05, 2.9435e-05,\n 4.7732e-05, 3.3243e-05, 4.1571e-05, 3.4502e-05, 7.2742e-05, 2.0847e-05,\n 3.0374e-05, 4.6656e-05, 4.8340e-05, 5.3380e-05, 1.0037e-04, 4.6474e-05,\n 2.8220e-05, 2.4303e-05, 5.6743e-05, 2.0559e-05, 4.9331e-05, 7.3352e-05,\n 3.8659e-05, 2.9178e-05, 4.7245e-05, 4.6233e-05, 3.7560e-05, 3.7634e-05,\n 6.8641e-05, 3.7014e-05, 2.6410e-05, 3.3064e-05, 3.0036e-05, 1.7175e-05,\n 5.4557e-05, 3.8638e-05, 3.0077e-05, 2.7356e-05, 6.0739e-05, 3.9685e-05,\n 3.4580e-05, 3.7602e-05, 4.1025e-05, 6.3562e-05, 3.4838e-05, 1.1622e-04,\n 5.4118e-05, 3.6174e-05, 2.5391e-05, 3.7970e-05, 3.6874e-05, 3.5833e-05,\n 4.0191e-05, 3.5443e-05, 4.2251e-05, 6.1310e-05, 3.0435e-05, 2.1132e-05,\n 7.5010e-05, 3.9117e-05, 1.9904e-05, 4.6380e-05, 4.2154e-05, 2.9135e-05,\n 3.3584e-05, 2.9814e-05, 7.4191e-05, 2.9481e-05, 3.2056e-05, 3.6697e-05,\n 5.8467e-05, 4.3930e-05, 4.4858e-05, 6.5659e-05, 5.3989e-05, 3.6605e-05,\n 3.8286e-05, 4.5257e-05, 4.0054e-05, 3.1642e-05, 3.7238e-05, 2.7781e-05,\n 2.0797e-05, 3.1943e-05, 5.0454e-05, 3.5317e-05, 2.9891e-05, 3.5385e-05,\n 5.5088e-05, 3.6074e-05, 6.7875e-05, 3.7448e-05, 3.0007e-05, 3.8545e-05,\n 5.0295e-05, 2.4881e-05, 2.0104e-05, 3.4975e-05, 4.5873e-05, 2.2738e-05,\n 2.3076e-05, 2.6647e-05, 4.0715e-05, 2.3116e-05, 4.1577e-05, 2.7681e-05,\n 3.0646e-05, 1.9237e-05, 3.2768e-05, 4.3323e-05, 3.3892e-05, 3.7011e-05,\n 3.7811e-05, 3.3281e-06], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(26278.)",
17
+ "exp_avg": "tensor([ 1.1057e-02, 2.9721e-03, -1.2478e-03, 3.7059e-03, 1.4765e-03,\n 1.3749e-03, 4.0834e-03, 2.2043e-03, -8.9226e-04, -5.2326e-03,\n 8.5300e-04, 4.9781e-03, 4.1976e-03, -9.8771e-04, -2.1235e-03,\n 3.9776e-03, -6.9704e-03, 4.4735e-03, 5.4240e-03, -2.3764e-03,\n 1.0043e-03, 2.2200e-03, 3.2584e-03, -2.1388e-04, 4.6028e-03,\n 5.1042e-04, 5.4424e-04, -1.9946e-04, 1.9860e-03, 3.7000e-03,\n 3.9763e-04, 6.2039e-03, -1.7839e-03, -9.8892e-04, -9.9517e-04,\n -3.8247e-04, 3.9788e-04, 3.3618e-03, 4.4031e-04, -2.5119e-03,\n -3.0337e-03, -4.5581e-03, -5.6052e-45, 1.2962e-03, 5.8907e-03,\n -3.6785e-03, 6.1044e-04, -2.0085e-03, 2.7697e-03, 8.7796e-04,\n -1.3635e-04, 8.4242e-04, -2.6731e-03, -3.8472e-04, -6.1227e-04,\n 3.0793e-03, 3.4143e-03, 2.6914e-03, 3.9450e-03, -4.3486e-04,\n -8.7012e-03, -3.6767e-03, 4.0644e-03, -4.6036e-04, 4.0752e-03,\n -7.7079e-03, -2.1514e-03, 5.4845e-03, 1.4485e-04, -2.6602e-03,\n -8.9122e-03, -1.1262e-03, -1.3087e-03, -5.7018e-03, 2.4599e-03,\n -1.1356e-03, 6.6582e-04, -3.6985e-03, 7.0949e-04, -2.8624e-03,\n -8.8852e-03, 3.8411e-03, 3.4398e-03, -2.4528e-04, 9.0155e-04,\n 1.3311e-04, -7.8088e-03, 1.5787e-03, 6.7566e-04, 1.9242e-03,\n -2.5012e-03, -2.9035e-03, -7.7015e-04, 3.1640e-03, -2.2379e-03,\n -2.6981e-03, 6.0848e-03, 3.5083e-03, -3.2507e-03, -9.1761e-03,\n -4.1584e-03, 2.5944e-03, 2.9905e-03, 1.0695e-03, 1.0729e-03,\n -2.9294e-04, -7.7984e-06, 6.8819e-03, -1.4001e-03, -3.4940e-04,\n 1.6516e-03, -2.1631e-03, 1.2170e-03, 5.1698e-03, 2.4042e-03,\n -1.8996e-04, 3.0311e-03, -1.9555e-03, -4.1904e-03, 2.6819e-03,\n -3.5851e-03, 6.5712e-03, -1.0170e-02, -3.9758e-03, -1.9747e-03,\n 8.9447e-03, 2.6499e-03, 3.4893e-03, 7.9454e-03, -3.8864e-03,\n -7.6000e-03, 3.1029e-03, -5.1611e-03, -1.7219e-03, 8.0004e-03,\n 4.6570e-03, -2.2431e-03, 3.9238e-03, 2.5132e-03, 2.9512e-03,\n 2.8715e-03, 5.2816e-03, -9.8351e-05, -5.9136e-03, -3.7155e-03,\n -3.0509e-04, 4.8988e-03, -2.1712e-03, 3.5699e-04, -9.1173e-03,\n -2.6986e-03, -2.3809e-03, 2.7967e-03, -2.5008e-04, -8.9120e-04,\n -5.2748e-05, 3.2859e-03, -3.2641e-03, -2.0989e-03, -2.7166e-03,\n -4.4774e-04, -2.8424e-03, -1.3174e-03, 4.4307e-03, 3.4785e-03,\n -1.0928e-02, -2.0453e-03, -2.4476e-03, -1.0350e-03, 1.0999e-03,\n -3.2746e-03, 8.3149e-04, 9.6136e-04, 2.0536e-03, 2.6458e-04,\n 2.2282e-03, 7.9615e-04, -2.1618e-04, 4.4196e-03, 9.9605e-04,\n 6.4842e-03, -7.3625e-05, -3.0407e-03, -2.2598e-03, -3.6101e-04,\n 8.0223e-04, -2.8889e-03, 1.6844e-03, -8.2075e-04, 1.8380e-03,\n -9.2578e-04, -2.9649e-03, 1.4492e-03, -6.5268e-04, 5.9824e-03,\n -4.5731e-04, -4.6494e-04, -2.6587e-03, 2.6374e-03, 1.1573e-03,\n -1.1945e-03, -6.4956e-03, 6.5093e-04, 4.7847e-04, 1.0639e-03,\n -1.1541e-02, -2.2123e-03, 1.8119e-03, 2.4732e-03, 1.1275e-03,\n -7.1928e-04, 2.9060e-03, -3.8094e-04, 1.3791e-03, -1.3385e-03,\n -3.6465e-03, 4.0370e-03, -5.3788e-03, 1.5600e-03, -2.8533e-06,\n 2.0756e-03, -3.3264e-04, 1.2237e-03, -2.0239e-03, -3.0464e-03,\n -1.3153e-03, -8.9657e-04, -2.0026e-03, 4.1261e-03, 1.2354e-03,\n 2.5486e-03, 8.7515e-04, -1.6101e-03, 4.2298e-03, 2.6803e-04,\n -7.3476e-04, -4.9191e-03, 5.6052e-45, 7.0194e-04, -3.6668e-03,\n 1.2493e-03, 5.7303e-03, 6.8796e-04, -2.7137e-03, -6.9155e-03,\n 4.1582e-03, -1.8916e-03, 2.9130e-03, -6.6771e-03, 3.5858e-03,\n -4.9511e-03, 2.2843e-03, -6.8449e-03, 6.0237e-03, -1.4312e-03,\n -2.5641e-04, 4.5481e-03, 3.9534e-03, 3.3352e-03, 2.0644e-03,\n 6.4294e-03, 1.4889e-03, 8.9333e-04, -3.7269e-03, 4.7972e-03,\n 4.1914e-03, 2.7661e-03, 4.3122e-04, 6.3283e-05, -1.4200e-03,\n 6.1560e-03, -9.5318e-03, 1.5898e-03, 3.2328e-03, -2.2822e-03,\n 3.9110e-03, -4.4006e-03, -5.8025e-04, -5.1800e-03, 7.0996e-04,\n 2.2589e-03, -4.4590e-03, 4.3160e-03, 4.8433e-04, 2.0226e-03,\n 1.3353e-03, -1.5241e-03, -4.8530e-04, -2.4350e-03, 1.3482e-03,\n -2.1060e-03, 6.6149e-04, 2.1692e-03, 1.8330e-03, -9.9173e-03,\n -3.5078e-03, -4.1088e-04, -1.4799e-03, -1.5199e-03, 6.6988e-04,\n 7.8652e-03, -8.2355e-04, -8.9547e-04, 3.1213e-03, 1.9013e-03,\n -1.0853e-02, 1.1609e-03, -9.2284e-04, -5.6786e-03, -1.2890e-03,\n -1.4580e-03, 1.0513e-02, 6.8722e-03, -4.2812e-03, 1.9843e-03,\n 3.1991e-03, -3.7263e-04, 2.9112e-04, 5.3564e-03, -6.7354e-03,\n -7.8477e-03, -6.8849e-03, 4.2474e-03, -3.8156e-03, -3.8035e-03,\n 1.1610e-02, -2.5544e-03, -2.0941e-03, 5.9960e-03, 5.6078e-03,\n 3.4711e-03, -2.3638e-03, -1.3246e-03, 5.3682e-04, -9.2226e-04,\n 7.5835e-03, -6.1002e-03, 1.0370e-03, -7.4184e-04, 5.0754e-05,\n 6.9722e-06, -4.8993e-04, -5.0537e-03, 8.4007e-04, 7.8576e-04,\n 4.3101e-03, -1.0744e-04, -2.6777e-03, -1.0397e-03, -4.1480e-03,\n -3.4216e-03, -5.8708e-04, 2.6121e-03, -1.2183e-03, -2.3422e-03,\n 5.0907e-03, 3.5773e-03, 1.4732e-03, -4.5489e-04, -5.1333e-03,\n 3.6476e-04, 4.7972e-03, 1.9193e-03, -2.3289e-03, -5.3360e-03,\n -8.9331e-04, 2.0117e-04, -2.7782e-03, 7.8395e-04, 1.1013e-03,\n -6.7689e-03, -2.0982e-03, 9.0271e-04, -1.0797e-03, 2.7051e-03,\n 2.2805e-03, -9.2531e-05, -4.3955e-03, 5.2031e-03, 2.6116e-03,\n 4.6513e-03, 3.5368e-03, 2.5872e-04, 2.3014e-03, 4.4183e-04,\n -4.8670e-04, 6.0738e-03, -2.7835e-03, -3.3018e-03, 1.7224e-03,\n 1.4324e-03, 3.1655e-03, 1.9730e-03, -5.8064e-03, 3.0950e-03,\n -7.5152e-04, -5.2950e-04, 3.4411e-03, 3.6647e-04, -4.4902e-03,\n 3.0496e-03, -4.3884e-04, -1.1618e-02, 9.6117e-03, -2.2574e-03,\n -2.0683e-03, -6.2008e-03, -1.5997e-03, 1.1445e-04, -7.2797e-04,\n 6.2600e-03, 3.1176e-03, 2.0882e-03, 2.4649e-03, -3.2683e-03,\n 3.5847e-03, 5.1332e-03, 7.1893e-04, 5.6223e-03, 5.0460e-03,\n 1.5626e-03, -1.1822e-03, 2.9342e-03, 4.9869e-04, -7.5901e-03,\n -5.0928e-03, -1.7860e-03, -4.6200e-03, 5.1951e-03, -4.0044e-03,\n 1.4131e-03, -2.9549e-04, 6.8734e-04, 1.2553e-03, -2.6254e-03,\n -2.5762e-03, 1.3990e-03, -8.8470e-04, 1.0827e-05, -2.9487e-03,\n -3.7730e-03, 1.9905e-03, 1.4838e-03, -1.2726e-03, -7.6005e-03,\n -2.5853e-04, -6.8019e-03, -1.3626e-03, -2.5327e-04, 2.6381e-03,\n -2.5138e-03, 4.2260e-04, -1.4189e-03, -6.7779e-03, -1.4525e-03,\n -2.6068e-03, 3.5431e-03, 1.4900e-03, 2.6778e-03, 4.0317e-03,\n -8.4442e-03, -1.2846e-03, 4.4539e-04, 9.5605e-04, -5.9367e-03,\n 9.3550e-04, -4.0031e-03, 7.7888e-04, -6.4482e-03, 1.7599e-03,\n -2.5790e-03, -1.0833e-03, -5.3003e-03, -5.5724e-03, 9.6619e-04,\n 2.5952e-03, -9.2388e-04, 2.1753e-03, -4.0842e-03, 8.4569e-04,\n -4.1886e-04, 3.7846e-04, -1.4389e-03, -3.3955e-03, 2.7418e-03,\n -6.4486e-03, -3.0865e-03, -7.1147e-03, 2.5566e-03, 4.5752e-04,\n -1.4884e-03, -2.5238e-03, -2.0323e-03, -5.3152e-04, -6.0265e-04,\n -1.9145e-03, -1.1614e-03, -7.2407e-03, -1.3863e-03, -1.8758e-03,\n 4.0432e-03, 5.6177e-04, -2.0020e-04, -3.9575e-03, -9.9080e-04,\n 2.0824e-03, -5.3066e-04, -7.6417e-03, -2.3263e-03, 8.2097e-05,\n 5.5762e-04, -5.6052e-45], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([3.7272e-04, 2.3569e-04, 1.1073e-04, 1.9594e-04, 1.2175e-04, 3.0850e-04,\n 1.8129e-04, 2.7352e-04, 2.6310e-04, 3.3524e-04, 3.9032e-04, 3.5395e-04,\n 3.3698e-04, 1.8173e-04, 1.9316e-04, 2.0722e-04, 2.0938e-04, 2.0020e-04,\n 2.2015e-04, 2.5990e-04, 2.6112e-04, 9.5391e-05, 1.3399e-04, 3.2602e-04,\n 4.8135e-04, 2.1332e-04, 1.8190e-04, 3.6229e-04, 2.3292e-04, 2.6685e-04,\n 1.7875e-04, 2.4346e-04, 2.0237e-04, 1.7026e-04, 1.3712e-04, 1.3816e-04,\n 1.3580e-04, 1.6976e-04, 1.3705e-04, 1.4829e-04, 1.2350e-04, 1.0019e-04,\n 2.8879e-15, 2.1334e-04, 4.9762e-04, 1.5530e-04, 1.6735e-04, 7.1235e-05,\n 4.2976e-04, 2.8983e-04, 3.4979e-04, 1.4587e-04, 1.4345e-04, 3.0479e-04,\n 1.9658e-04, 2.0371e-04, 2.5596e-04, 1.2929e-04, 1.6363e-04, 1.0977e-04,\n 2.0965e-04, 2.0025e-04, 1.9573e-04, 1.4422e-04, 1.2483e-04, 1.4239e-04,\n 2.4541e-04, 1.4258e-04, 2.6357e-04, 1.2783e-04, 2.2661e-04, 1.0660e-04,\n 2.4364e-04, 2.9084e-04, 1.4980e-04, 2.2186e-04, 2.2718e-04, 1.8747e-04,\n 2.3284e-04, 2.4402e-04, 2.5142e-04, 1.5408e-04, 1.4917e-04, 8.2921e-05,\n 1.5225e-04, 3.4118e-04, 4.2835e-04, 1.8289e-04, 9.2570e-05, 5.5540e-04,\n 2.2752e-04, 1.7143e-04, 2.4959e-04, 2.7937e-04, 2.7872e-04, 2.3593e-04,\n 2.3436e-04, 2.0578e-04, 2.5927e-04, 3.4075e-04, 2.6275e-04, 2.6464e-04,\n 2.5228e-04, 2.1630e-04, 2.9006e-04, 4.2559e-04, 1.7239e-04, 4.7324e-04,\n 2.2525e-04, 1.8371e-04, 1.8902e-04, 3.7064e-04, 2.4699e-04, 1.6792e-04,\n 2.0210e-04, 1.3333e-04, 2.0397e-04, 2.7647e-04, 1.5742e-04, 3.1788e-04,\n 1.6324e-04, 2.9103e-04, 4.5806e-04, 4.1530e-04, 3.1649e-04, 3.9629e-04,\n 2.0873e-04, 3.2603e-04, 3.8211e-04, 4.6564e-04, 2.5392e-04, 2.6083e-04,\n 3.1880e-04, 2.3426e-04, 3.8797e-04, 1.3777e-04, 1.9851e-04, 3.2335e-04,\n 1.5788e-04, 1.1760e-04, 1.7339e-04, 2.0892e-04, 2.5360e-04, 2.4590e-04,\n 1.9467e-04, 1.7455e-04, 1.0620e-04, 3.8733e-04, 2.2754e-04, 3.6499e-04,\n 1.0720e-04, 2.7501e-04, 1.4279e-04, 2.9292e-04, 9.5532e-05, 9.9032e-05,\n 3.7619e-04, 2.6755e-04, 2.4669e-04, 1.5689e-04, 3.3054e-04, 1.3495e-04,\n 3.9819e-04, 2.0218e-04, 1.0243e-04, 4.1373e-04, 2.4172e-04, 1.0502e-04,\n 8.0785e-05, 2.0109e-04, 1.0702e-04, 2.6341e-04, 2.0426e-04, 4.2571e-04,\n 2.5234e-04, 1.5143e-04, 2.3525e-04, 1.2639e-04, 1.4603e-04, 2.1983e-04,\n 1.9628e-04, 2.1861e-04, 5.9511e-04, 2.2679e-04, 5.7669e-04, 1.4614e-04,\n 6.4085e-04, 1.2604e-04, 1.8115e-04, 1.0307e-04, 1.8792e-04, 2.6863e-04,\n 1.6408e-04, 1.1844e-04, 1.4463e-04, 9.6153e-05, 2.2835e-04, 1.3589e-04,\n 8.1554e-05, 1.0077e-04, 1.2486e-04, 2.1277e-04, 1.4780e-04, 1.0421e-04,\n 5.7787e-05, 2.0666e-04, 1.5332e-04, 1.3051e-04, 3.3109e-04, 9.6194e-05,\n 2.5955e-04, 2.4839e-04, 3.3186e-05, 1.2464e-04, 2.7197e-04, 2.0132e-04,\n 1.0098e-04, 5.1513e-04, 2.6305e-04, 3.7280e-04, 1.6754e-04, 1.7121e-04,\n 1.9580e-04, 1.5898e-04, 2.1556e-04, 3.2501e-04, 1.0225e-04, 1.3204e-04,\n 8.3328e-05, 1.7111e-04, 2.0642e-04, 1.2740e-04, 7.1473e-04, 2.8521e-04,\n 1.0915e-04, 1.5643e-04, 1.7865e-04, 4.0963e-16, 1.5873e-04, 4.2883e-04,\n 1.2388e-04, 3.1115e-04, 4.4600e-04, 2.8450e-04, 1.8723e-04, 3.6179e-04,\n 1.3814e-04, 1.2138e-04, 2.5919e-04, 1.6279e-04, 3.2413e-04, 1.0395e-04,\n 4.5013e-04, 2.6109e-04, 1.4606e-04, 2.2744e-04, 1.8456e-04, 1.7723e-04,\n 1.1324e-04, 1.8725e-04, 2.6407e-04, 2.6536e-04, 1.6959e-04, 1.7151e-04,\n 2.0964e-04, 2.2139e-04, 2.0561e-04, 2.2360e-04, 2.2215e-04, 3.7400e-04,\n 3.5945e-04, 4.2968e-04, 6.4775e-05, 2.3642e-04, 1.1377e-04, 1.5115e-04,\n 1.7154e-04, 2.7332e-04, 2.3472e-04, 2.2051e-04, 2.0309e-04, 8.1920e-05,\n 1.4647e-04, 1.4318e-04, 1.4708e-04, 2.5374e-04, 2.8452e-04, 1.2055e-04,\n 5.2685e-04, 2.1034e-04, 1.2949e-04, 8.0732e-05, 1.7337e-04, 1.0565e-04,\n 2.3332e-04, 1.4127e-04, 1.9939e-04, 1.5479e-04, 4.5548e-04, 1.9700e-04,\n 1.3210e-04, 1.3359e-04, 3.3658e-04, 2.2081e-04, 2.0701e-04, 1.8896e-04,\n 1.9052e-04, 1.8273e-04, 7.3202e-04, 4.8659e-04, 4.4470e-04, 2.3243e-04,\n 1.7674e-04, 1.9172e-04, 1.7976e-04, 2.8841e-04, 1.6620e-04, 1.0234e-04,\n 2.1820e-04, 2.6779e-04, 5.7742e-04, 3.8944e-04, 2.0077e-04, 2.9705e-04,\n 3.8408e-04, 4.2418e-04, 2.3040e-04, 3.3882e-04, 1.7182e-04, 2.8088e-04,\n 3.9296e-04, 3.6788e-04, 7.7483e-05, 1.5052e-04, 1.7666e-04, 1.7876e-04,\n 8.9891e-05, 1.4782e-04, 2.4637e-04, 2.4164e-04, 1.8011e-04, 1.3249e-04,\n 2.0672e-04, 3.4759e-04, 1.8522e-04, 2.6587e-04, 2.0357e-04, 1.0572e-04,\n 1.5897e-04, 2.5154e-04, 2.3093e-04, 2.0713e-04, 2.1163e-04, 2.3698e-04,\n 2.5316e-04, 1.5513e-04, 2.2081e-04, 1.4663e-04, 1.2361e-04, 1.4135e-04,\n 1.0441e-04, 4.5714e-04, 4.0428e-04, 3.7437e-04, 1.9909e-04, 1.7879e-04,\n 7.8571e-05, 3.4144e-04, 9.9521e-05, 1.7189e-04, 1.2310e-04, 2.1429e-04,\n 3.1011e-04, 2.1662e-04, 1.5775e-04, 1.9690e-04, 1.3198e-04, 1.2681e-04,\n 2.4762e-04, 1.0713e-04, 2.5181e-04, 1.8309e-04, 3.0123e-04, 2.5259e-04,\n 2.0671e-04, 3.7878e-04, 1.7512e-04, 1.8069e-04, 2.2715e-04, 1.4659e-04,\n 1.4762e-04, 1.4509e-04, 1.7325e-04, 4.7425e-04, 1.5085e-04, 9.9037e-05,\n 1.5957e-04, 4.9074e-04, 2.1839e-04, 3.3899e-04, 1.9784e-04, 1.1490e-04,\n 4.4081e-04, 1.8734e-04, 2.1199e-04, 1.3620e-04, 5.4520e-04, 5.1681e-05,\n 3.0102e-04, 2.7572e-04, 2.9587e-04, 6.3352e-05, 4.8584e-04, 1.9398e-04,\n 2.0137e-04, 5.5925e-04, 4.4765e-04, 2.8280e-04, 2.4867e-04, 1.7519e-04,\n 2.1496e-04, 1.7398e-04, 4.0588e-04, 2.8534e-04, 6.0899e-04, 2.3097e-04,\n 2.1303e-04, 1.9871e-04, 2.8279e-04, 1.5779e-04, 1.9294e-04, 1.0869e-04,\n 1.6951e-04, 2.1782e-04, 3.1986e-04, 2.1189e-04, 2.8926e-04, 4.0111e-04,\n 2.2489e-04, 1.1405e-04, 2.8224e-04, 2.1638e-04, 1.9164e-04, 6.9341e-04,\n 2.0047e-04, 2.8912e-04, 2.9877e-04, 1.4236e-04, 2.4314e-04, 2.7697e-04,\n 2.9927e-04, 1.3976e-04, 1.8271e-04, 4.9780e-04, 2.7049e-04, 1.2427e-04,\n 3.0442e-04, 1.7494e-04, 1.1187e-04, 1.8238e-04, 2.2294e-04, 1.4365e-04,\n 2.3263e-04, 1.5694e-04, 1.8973e-04, 1.4452e-04, 1.2705e-04, 2.3750e-04,\n 3.8404e-04, 2.8048e-04, 2.8880e-04, 4.0094e-04, 2.4995e-04, 1.4645e-04,\n 2.1934e-04, 2.4298e-04, 2.1931e-04, 2.0374e-04, 3.9145e-04, 1.4511e-04,\n 8.5923e-05, 4.9213e-04, 2.4424e-04, 1.4925e-04, 1.8684e-04, 1.3183e-04,\n 1.2674e-04, 2.0915e-04, 3.6676e-04, 1.1056e-04, 1.5157e-04, 1.2556e-04,\n 2.8443e-04, 1.5460e-04, 1.6598e-04, 2.8442e-04, 2.2107e-04, 1.9120e-04,\n 1.1982e-04, 1.7666e-04, 1.8381e-04, 1.3060e-04, 1.7243e-04, 1.8268e-04,\n 1.1523e-04, 2.3315e-04, 1.1742e-04, 1.8343e-04, 8.3238e-05, 1.0353e-04,\n 1.0205e-04, 3.1576e-12], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(26278.)",
22
+ "exp_avg": "tensor([ 4.6579e-03, 9.3006e-04, -2.7380e-04, 1.3315e-03, 1.1086e-03,\n 8.9403e-04, 1.9528e-03, 2.2440e-03, 5.7100e-05, -2.0300e-03,\n 4.2107e-04, 2.3251e-03, 2.0217e-03, -2.1078e-04, -1.1552e-03,\n 2.1792e-03, -3.2555e-03, 2.4898e-03, 2.2379e-03, -1.0236e-03,\n 1.4660e-04, 1.1224e-03, 1.3776e-03, 3.1584e-04, 2.5621e-03,\n 5.2904e-04, -3.7146e-05, 2.5094e-04, 1.0523e-03, 1.5538e-03,\n 1.2619e-04, 2.2750e-03, -5.3559e-04, -1.1203e-03, -6.7421e-04,\n 1.6342e-04, 4.7851e-04, 1.5821e-03, 2.8061e-04, -7.6273e-04,\n -1.5845e-03, -2.2697e-03, 5.6052e-45, 1.1185e-03, 2.3525e-03,\n -1.7009e-03, 4.9870e-04, -1.1146e-03, 9.0475e-04, 8.0811e-04,\n -1.4177e-04, 2.5699e-04, -1.3419e-03, 5.2890e-05, -7.7844e-04,\n 1.3341e-03, 1.5467e-03, 1.4999e-03, 1.9392e-03, -4.6178e-04,\n -5.4534e-03, -2.0713e-03, 2.1832e-03, -5.1702e-04, 2.0874e-03,\n -3.3909e-03, -8.3530e-04, 2.6613e-03, 4.2542e-04, -1.2263e-03,\n -4.1428e-03, -1.2245e-03, -4.5375e-04, -1.9097e-03, 1.3477e-03,\n -1.5284e-04, 1.5180e-05, -2.5760e-03, 3.3822e-05, -1.9700e-03,\n -3.9286e-03, 2.2585e-03, 1.5274e-03, -1.5841e-04, 5.5408e-04,\n -2.8314e-04, -2.9718e-03, 8.5245e-04, 1.5403e-04, -1.0235e-04,\n -7.0212e-04, -1.3414e-03, -2.4927e-04, 1.1328e-03, -8.0754e-04,\n -1.4141e-03, 3.2036e-03, 1.8069e-03, -2.1012e-03, -3.3246e-03,\n -1.6199e-03, 1.4966e-03, 1.6755e-03, 1.0236e-03, 3.6924e-04,\n -1.3027e-03, 1.4951e-04, 3.1482e-03, -8.9914e-04, -5.8282e-04,\n 1.3956e-03, -3.4212e-04, 4.4301e-04, 2.7572e-03, 1.5405e-03,\n 2.1232e-05, 1.7863e-03, -7.5445e-04, -1.6657e-03, 1.1039e-03,\n -9.1108e-04, 3.2598e-03, -3.6422e-03, -1.3543e-03, -8.4076e-04,\n 3.5588e-03, 1.5780e-03, 1.9629e-03, 3.2942e-03, -2.5572e-03,\n -3.7754e-03, 1.1644e-03, -2.1604e-03, -6.5469e-04, 4.4583e-03,\n 1.6403e-03, -8.1737e-04, 1.3172e-03, 9.8360e-04, 1.5086e-03,\n 1.1777e-03, 2.3461e-03, 2.8974e-04, -3.1872e-03, -1.5071e-03,\n -1.1982e-04, 2.2010e-03, -6.7399e-04, 2.1898e-04, -5.3037e-03,\n -9.6700e-04, -1.2861e-03, 1.1977e-03, 9.5214e-05, -4.7040e-04,\n -1.7868e-04, 1.7103e-03, -8.9754e-04, -8.2662e-04, -1.3771e-03,\n -9.6063e-05, -9.8948e-04, -2.2757e-04, 1.6991e-03, 1.4114e-03,\n -5.0553e-03, -6.8149e-04, -1.0825e-03, -3.4918e-04, 1.2514e-04,\n -1.7706e-03, 4.7774e-04, 5.8100e-04, 1.1825e-03, 1.7527e-04,\n 9.7035e-04, 4.5683e-04, 2.6436e-04, 1.9063e-03, -3.7777e-04,\n 2.9076e-03, -2.6081e-04, -1.6010e-03, -8.1349e-04, 1.8754e-04,\n 2.0890e-04, -1.4208e-03, 8.8059e-04, -1.6455e-04, 8.0921e-04,\n -6.8854e-04, -1.2316e-03, 7.0616e-04, -6.0584e-04, 2.4194e-03,\n -4.3896e-04, -4.3918e-04, -1.3543e-03, 1.1644e-03, 5.9253e-04,\n -6.6813e-04, -2.7691e-03, 2.0572e-04, 2.3317e-04, 9.2298e-04,\n -4.9371e-03, -1.0809e-03, 8.2263e-04, 1.0917e-03, 4.6845e-04,\n -3.2437e-04, 1.9614e-03, -5.4253e-04, 8.5057e-04, -3.9627e-04,\n -1.6867e-03, 1.8392e-03, -1.5345e-03, -3.1594e-04, 1.6698e-04,\n 1.0919e-03, 2.0186e-04, 5.9777e-04, -6.2514e-04, -1.1590e-03,\n -4.8890e-04, -5.9085e-04, -1.0084e-03, 2.2432e-03, 6.2393e-04,\n 1.2176e-03, 3.8285e-04, -1.0920e-03, 1.7585e-03, 2.9298e-04,\n 1.8577e-04, -1.9986e-03, 5.6052e-45, 4.0409e-04, -1.6281e-03,\n 7.4827e-04, 2.8240e-03, 3.0716e-04, -1.1135e-03, -5.1661e-03,\n 1.6749e-03, -9.4808e-04, 1.4132e-03, -3.1486e-03, 1.5961e-03,\n -2.5754e-03, 1.4814e-03, -2.0622e-03, 2.9958e-03, -9.6756e-04,\n 2.2454e-04, 2.6481e-03, 2.0143e-03, 1.9394e-03, 7.4916e-04,\n 2.7130e-03, 6.4239e-04, 4.5333e-04, -2.4958e-03, 2.1212e-03,\n 1.9207e-03, 1.1421e-03, 3.0106e-04, 1.4404e-04, -2.3920e-04,\n 2.9871e-03, -4.0385e-03, 7.2901e-04, 1.8725e-03, -6.1725e-04,\n 1.8520e-03, -1.9851e-03, 7.2036e-05, -1.9807e-03, 3.8990e-04,\n 1.0464e-03, -1.6936e-03, 1.7828e-03, 4.4735e-04, 5.4971e-04,\n 4.9659e-04, -5.5566e-04, -1.1745e-04, -1.1063e-03, 6.7595e-04,\n -1.3216e-03, -5.5191e-05, 1.1561e-03, 7.8845e-04, -3.7742e-03,\n -1.3959e-03, -2.4944e-04, -8.1318e-04, -7.3114e-04, 3.1288e-04,\n 3.4324e-03, -2.7702e-04, -4.6170e-04, 1.4969e-03, 1.5684e-03,\n -4.2380e-03, 3.9193e-04, -1.6641e-04, -2.1461e-03, -1.0431e-05,\n -5.3000e-04, 4.4557e-03, 2.6627e-03, -1.6985e-03, 9.2023e-04,\n 9.0653e-04, -3.4751e-04, 5.7367e-04, 2.0726e-03, -3.0940e-03,\n -3.5137e-03, -3.2008e-03, 2.2456e-03, -1.6815e-03, -1.8480e-03,\n 5.2562e-03, -1.2705e-03, -8.9445e-04, 3.1038e-03, 2.0554e-03,\n 2.1381e-03, -7.3888e-04, -7.9342e-04, 4.7037e-05, -3.0678e-04,\n 3.5362e-03, -4.2393e-03, 1.0852e-04, -5.2668e-04, -1.4750e-04,\n -1.2383e-04, -6.2252e-04, -2.6280e-03, 8.8988e-04, 1.4052e-04,\n 2.2314e-03, -3.5103e-04, -1.0741e-03, -8.3359e-04, -1.7158e-03,\n -1.1431e-03, -4.5850e-04, 1.2137e-03, -7.8640e-04, -1.3837e-03,\n 2.1628e-03, 1.2245e-03, 6.1093e-04, -1.2782e-05, -1.9238e-03,\n 1.3611e-04, 2.8768e-03, 9.7239e-04, -6.4529e-04, -2.1146e-03,\n -4.3846e-04, 6.6090e-05, -9.4547e-04, 9.4551e-05, 5.8804e-04,\n -3.7065e-03, -7.4420e-04, -1.3705e-04, -3.9680e-04, 1.1804e-03,\n 7.6823e-04, -2.5164e-05, -1.8298e-03, 2.0020e-03, 1.4096e-03,\n 1.9074e-03, 1.6705e-03, 1.0556e-04, 8.7170e-04, -1.7034e-05,\n 3.9387e-04, 2.3874e-03, -1.4929e-03, -7.2842e-04, 1.1296e-03,\n 4.8481e-04, 1.3317e-03, 1.0142e-03, -2.1709e-03, 1.6552e-03,\n -4.6592e-04, 1.2495e-04, 1.8086e-03, 5.4203e-04, -2.2706e-03,\n 1.7329e-03, -2.9541e-04, -4.5858e-03, 3.8114e-03, -1.7670e-03,\n -1.3912e-03, -3.4096e-03, -7.8475e-04, 2.3799e-04, -5.5521e-04,\n 2.5218e-03, 1.9827e-03, 9.9776e-04, 1.3925e-03, -1.2406e-03,\n 9.8116e-04, 2.3418e-03, 6.8598e-04, 2.5622e-03, 2.6748e-03,\n 1.2138e-03, -4.6284e-04, 8.7793e-04, 2.5191e-04, -2.7517e-03,\n -2.1153e-03, -9.2584e-04, -1.8188e-03, 2.1980e-03, -1.5118e-03,\n 1.1280e-03, 4.7404e-05, 7.4107e-04, 4.8121e-04, -1.3312e-03,\n -9.4865e-04, 8.1532e-04, -2.0728e-04, 3.5429e-04, -1.6217e-03,\n -1.7964e-03, 1.2782e-03, 1.2161e-04, -1.3693e-03, -4.3738e-03,\n -1.1137e-04, -3.2083e-03, -1.2700e-03, 8.3624e-05, 1.2684e-03,\n -9.8444e-04, 2.3650e-04, -1.1570e-03, -3.4743e-03, -7.2717e-04,\n -1.2442e-03, 2.0714e-03, 7.9864e-04, 1.2385e-03, 1.7462e-03,\n -4.3320e-03, -6.9085e-04, -8.6968e-05, 4.9494e-04, -2.8313e-03,\n 5.3581e-04, -2.0169e-03, -2.4391e-06, -3.1638e-03, 3.4887e-04,\n -6.5849e-04, 3.6187e-04, -2.8838e-03, -3.6467e-03, 8.5886e-04,\n 1.4394e-03, -2.2336e-04, 8.6763e-04, -1.4563e-03, 6.8975e-04,\n -3.5840e-04, 3.9347e-04, -9.6115e-04, -1.7010e-03, 1.1748e-03,\n -3.8397e-03, -1.4861e-03, -3.0709e-03, 1.4061e-03, -2.9395e-04,\n -6.8180e-04, -1.1635e-03, -1.1597e-03, -3.5765e-04, -2.1866e-04,\n -3.2447e-04, -3.0845e-04, -2.6755e-03, -1.2917e-03, -9.7330e-04,\n 1.4199e-03, 2.2018e-04, -1.8392e-04, -1.8402e-03, -4.2491e-04,\n 9.5400e-04, -9.9118e-05, -3.8281e-03, -1.6551e-03, 5.3894e-04,\n 4.9178e-04, 5.6052e-45], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([6.4936e-05, 4.1908e-05, 3.3773e-05, 3.6295e-05, 2.4578e-05, 6.4922e-05,\n 3.6820e-05, 7.4176e-05, 5.4360e-05, 6.3829e-05, 8.1492e-05, 7.0440e-05,\n 9.5632e-05, 3.5547e-05, 5.7478e-05, 5.4977e-05, 4.4256e-05, 5.1265e-05,\n 4.5829e-05, 5.5493e-05, 5.3279e-05, 2.3520e-05, 2.9384e-05, 6.5999e-05,\n 1.2443e-04, 5.8657e-05, 3.7070e-05, 7.0891e-05, 5.4689e-05, 5.2680e-05,\n 3.0794e-05, 3.7235e-05, 6.2681e-05, 4.4760e-05, 2.8458e-05, 4.4351e-05,\n 3.4179e-05, 2.8761e-05, 3.1825e-05, 2.7677e-05, 2.9013e-05, 3.0464e-05,\n 5.1653e-17, 7.5146e-05, 1.3105e-04, 3.1391e-05, 4.3025e-05, 1.6543e-05,\n 6.5583e-05, 1.0521e-04, 1.1392e-04, 3.0881e-05, 3.3028e-05, 5.2281e-05,\n 6.9797e-05, 3.8209e-05, 4.5727e-05, 5.1948e-05, 3.9335e-05, 2.9940e-05,\n 7.7314e-05, 3.4976e-05, 4.3478e-05, 3.4856e-05, 3.0097e-05, 3.2734e-05,\n 4.5488e-05, 3.0865e-05, 5.3038e-05, 3.4702e-05, 4.6014e-05, 4.3705e-05,\n 3.6110e-05, 4.2870e-05, 3.7560e-05, 4.2952e-05, 3.7684e-05, 4.1044e-05,\n 5.6076e-05, 5.7679e-05, 5.3714e-05, 4.0320e-05, 3.4160e-05, 2.0016e-05,\n 3.6377e-05, 6.5157e-05, 6.6962e-05, 3.6385e-05, 2.2195e-05, 6.7180e-05,\n 3.7233e-05, 4.5545e-05, 6.8704e-05, 5.2909e-05, 5.0960e-05, 4.2211e-05,\n 6.9053e-05, 4.1722e-05, 5.3083e-05, 3.7797e-05, 4.1522e-05, 5.4966e-05,\n 6.8020e-05, 4.6813e-05, 6.8173e-05, 9.7193e-05, 4.4258e-05, 1.3781e-04,\n 4.2964e-05, 3.6888e-05, 6.0335e-05, 6.6881e-05, 4.9243e-05, 4.6883e-05,\n 5.3482e-05, 3.9550e-05, 5.3224e-05, 7.4827e-05, 2.8098e-05, 5.0440e-05,\n 2.4750e-05, 6.0530e-05, 6.3471e-05, 1.0541e-04, 4.8400e-05, 6.4652e-05,\n 4.0542e-05, 6.5358e-05, 7.2562e-05, 1.2856e-04, 5.9737e-05, 3.9797e-05,\n 6.1417e-05, 4.7410e-05, 1.3483e-04, 3.6159e-05, 5.4047e-05, 5.3456e-05,\n 3.0422e-05, 3.0362e-05, 3.0297e-05, 4.3537e-05, 4.3563e-05, 8.3469e-05,\n 4.0205e-05, 4.4386e-05, 2.6543e-05, 7.8602e-05, 4.7056e-05, 1.0866e-04,\n 2.3953e-05, 6.5810e-05, 3.0502e-05, 7.3825e-05, 2.4507e-05, 3.6172e-05,\n 6.5036e-05, 4.7036e-05, 3.8723e-05, 3.6891e-05, 6.5339e-05, 1.8822e-05,\n 6.9946e-05, 4.7868e-05, 1.8515e-05, 8.9815e-05, 4.1933e-05, 2.3454e-05,\n 1.6387e-05, 4.9758e-05, 3.2918e-05, 6.7350e-05, 3.9074e-05, 9.8024e-05,\n 4.7712e-05, 2.4431e-05, 8.9320e-05, 2.8597e-05, 2.6436e-05, 6.2063e-05,\n 4.1986e-05, 3.9808e-05, 1.1703e-04, 4.4468e-05, 8.4071e-05, 3.4425e-05,\n 1.5109e-04, 3.4509e-05, 4.3226e-05, 2.0260e-05, 4.3681e-05, 5.2778e-05,\n 4.9641e-05, 3.7639e-05, 2.9447e-05, 2.0338e-05, 7.0197e-05, 3.0487e-05,\n 1.6891e-05, 2.2665e-05, 3.3459e-05, 5.0094e-05, 3.9169e-05, 2.3436e-05,\n 2.1885e-05, 4.2930e-05, 2.9471e-05, 2.7040e-05, 6.2604e-05, 2.4209e-05,\n 5.0994e-05, 6.8276e-05, 2.2660e-05, 3.0760e-05, 5.3601e-05, 2.6552e-05,\n 2.5876e-05, 5.2921e-05, 4.6979e-05, 6.8033e-05, 3.0750e-05, 3.3008e-05,\n 3.3317e-05, 2.6285e-05, 4.0090e-05, 8.0580e-05, 2.4867e-05, 2.6464e-05,\n 2.1879e-05, 3.7311e-05, 3.8962e-05, 2.8987e-05, 1.1392e-04, 5.0192e-05,\n 3.0212e-05, 3.6906e-05, 3.8935e-05, 1.4343e-17, 3.8905e-05, 1.0916e-04,\n 3.1901e-05, 8.4289e-05, 6.0238e-05, 6.7966e-05, 8.4817e-05, 6.2076e-05,\n 4.5270e-05, 3.2078e-05, 5.5340e-05, 3.5753e-05, 8.9923e-05, 2.5438e-05,\n 7.4402e-05, 6.4992e-05, 3.4344e-05, 4.6833e-05, 5.6714e-05, 5.2709e-05,\n 3.7919e-05, 4.7415e-05, 4.9810e-05, 6.3425e-05, 3.8774e-05, 6.0024e-05,\n 4.8698e-05, 4.7083e-05, 2.8926e-05, 5.0122e-05, 3.8830e-05, 6.6568e-05,\n 7.0909e-05, 9.9247e-05, 2.2910e-05, 5.2301e-05, 2.6094e-05, 3.6833e-05,\n 4.6622e-05, 7.9351e-05, 4.0795e-05, 4.0574e-05, 5.0628e-05, 2.5585e-05,\n 2.9967e-05, 4.5776e-05, 3.1382e-05, 5.6739e-05, 4.8898e-05, 2.7477e-05,\n 9.2491e-05, 4.6815e-05, 2.7525e-05, 2.7277e-05, 4.6295e-05, 2.6127e-05,\n 4.2878e-05, 2.8501e-05, 4.3199e-05, 4.7407e-05, 1.5222e-04, 4.5523e-05,\n 3.1584e-05, 2.9307e-05, 4.8886e-05, 5.2576e-05, 5.8749e-05, 3.0340e-05,\n 3.9917e-05, 4.7712e-05, 1.2472e-04, 6.4785e-05, 7.0918e-05, 4.8873e-05,\n 3.1095e-05, 3.0669e-05, 4.8408e-05, 4.2666e-05, 4.1266e-05, 2.0179e-05,\n 3.8676e-05, 6.9587e-05, 1.3264e-04, 8.1165e-05, 4.9978e-05, 6.2627e-05,\n 8.1537e-05, 1.0440e-04, 5.8176e-05, 8.0663e-05, 4.6861e-05, 4.6418e-05,\n 9.4679e-05, 6.3050e-05, 1.6090e-05, 3.5980e-05, 3.5768e-05, 4.1938e-05,\n 3.1841e-05, 2.6093e-05, 6.5784e-05, 4.3629e-05, 4.2730e-05, 3.1054e-05,\n 4.1690e-05, 7.4488e-05, 6.2601e-05, 8.9002e-05, 6.2703e-05, 2.8922e-05,\n 4.6182e-05, 6.6998e-05, 4.5192e-05, 4.2928e-05, 5.0011e-05, 3.3298e-05,\n 4.7587e-05, 3.5337e-05, 4.3817e-05, 2.7872e-05, 3.3382e-05, 2.6016e-05,\n 1.9980e-05, 1.1272e-04, 1.0222e-04, 5.8426e-05, 4.6087e-05, 3.2879e-05,\n 2.3226e-05, 6.3672e-05, 2.2521e-05, 2.8899e-05, 2.8299e-05, 4.5317e-05,\n 5.1832e-05, 5.1913e-05, 5.0176e-05, 3.8017e-05, 2.8831e-05, 2.3087e-05,\n 3.8875e-05, 3.0645e-05, 4.8965e-05, 4.2827e-05, 7.1025e-05, 3.5812e-05,\n 4.3130e-05, 1.2384e-04, 3.8498e-05, 4.8197e-05, 4.1300e-05, 4.0162e-05,\n 3.9973e-05, 3.7778e-05, 4.8106e-05, 7.0635e-05, 3.7690e-05, 2.0530e-05,\n 2.6027e-05, 7.7550e-05, 5.7928e-05, 8.3323e-05, 3.9568e-05, 2.6807e-05,\n 7.8413e-05, 3.2967e-05, 4.4409e-05, 3.7665e-05, 1.0108e-04, 1.4613e-05,\n 6.5950e-05, 7.4220e-05, 5.5296e-05, 3.2726e-05, 1.6598e-04, 6.0798e-05,\n 3.7292e-05, 4.9947e-05, 8.9942e-05, 7.0340e-05, 5.5518e-05, 5.1537e-05,\n 5.1874e-05, 3.5887e-05, 6.5016e-05, 5.4985e-05, 7.6993e-05, 4.6175e-05,\n 6.6764e-05, 4.0459e-05, 4.6748e-05, 3.4225e-05, 4.0310e-05, 2.6898e-05,\n 4.6202e-05, 4.8260e-05, 4.8505e-05, 4.1522e-05, 6.0426e-05, 6.5042e-05,\n 4.4922e-05, 2.8807e-05, 5.7193e-05, 5.4244e-05, 4.4435e-05, 1.9774e-04,\n 6.8829e-05, 4.5718e-05, 6.2617e-05, 4.4810e-05, 6.5267e-05, 4.7888e-05,\n 5.0868e-05, 3.2780e-05, 4.5831e-05, 1.1779e-04, 5.6610e-05, 2.5109e-05,\n 9.4573e-05, 3.9459e-05, 2.4801e-05, 4.4948e-05, 5.1316e-05, 3.8886e-05,\n 3.7501e-05, 4.2581e-05, 5.0414e-05, 3.1988e-05, 3.0826e-05, 5.7551e-05,\n 6.1397e-05, 5.7160e-05, 5.0481e-05, 1.4256e-04, 6.3968e-05, 5.0918e-05,\n 6.2180e-05, 4.9525e-05, 5.3867e-05, 4.3014e-05, 6.1173e-05, 2.9736e-05,\n 2.5868e-05, 6.9898e-05, 5.3354e-05, 2.8508e-05, 3.7713e-05, 4.5177e-05,\n 4.1725e-05, 4.2151e-05, 7.9248e-05, 3.8758e-05, 3.3300e-05, 3.6293e-05,\n 5.0968e-05, 4.1371e-05, 2.9390e-05, 4.1875e-05, 4.9736e-05, 4.1545e-05,\n 3.1014e-05, 3.7659e-05, 3.9993e-05, 2.6229e-05, 3.8060e-05, 3.6831e-05,\n 2.7940e-05, 4.4228e-05, 3.0476e-05, 4.8356e-05, 3.4565e-05, 4.6450e-05,\n 2.9776e-05, 5.0322e-14], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(26278.)",
27
+ "exp_avg": "tensor([[ 4.6059e-06, 6.8515e-06, -3.1801e-06, ..., 3.0888e-06,\n 1.0737e-05, 5.6052e-45],\n [-3.4468e-06, 7.6127e-06, -2.2470e-07, ..., 2.0734e-05,\n -3.2006e-06, 5.6052e-45],\n [-7.4183e-06, -1.4955e-05, 2.4982e-06, ..., 3.0288e-08,\n 8.8312e-06, -5.6052e-45],\n ...,\n [-2.2202e-05, 1.2453e-05, 1.4053e-05, ..., 4.9678e-06,\n 9.5985e-07, 5.6052e-45],\n [ 1.8454e-05, -1.0810e-05, 1.4333e-06, ..., 1.6876e-05,\n -2.7159e-06, 5.6052e-45],\n [-2.4832e-05, 2.2070e-06, 9.7547e-06, ..., 5.3251e-06,\n -5.1385e-06, 5.6052e-45]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[7.5320e-10, 1.3742e-09, 6.9011e-10, ..., 1.4464e-09, 5.0516e-10,\n 1.3470e-18],\n [1.1823e-09, 2.4506e-09, 1.6387e-09, ..., 4.3133e-09, 2.3760e-09,\n 7.5747e-18],\n [9.3899e-10, 1.4606e-09, 2.0187e-09, ..., 3.6185e-09, 8.4672e-10,\n 6.8959e-18],\n ...,\n [1.6533e-09, 3.2532e-09, 1.3644e-09, ..., 5.0743e-09, 1.1455e-09,\n 7.7309e-18],\n [1.7982e-09, 4.5648e-09, 1.3665e-09, ..., 3.1752e-09, 2.0450e-09,\n 1.3759e-17],\n [2.1715e-09, 2.0649e-09, 1.1585e-09, ..., 3.5198e-09, 1.8234e-09,\n 7.9439e-18]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(18770.)",
32
+ "exp_avg": "tensor([[-3.0043e-06, 1.0618e-06, -1.3340e-05, ..., 1.0328e-05,\n 4.0544e-06, 5.6052e-45],\n [-4.9781e-06, 1.3001e-05, -8.2443e-07, ..., 1.5149e-05,\n -2.1287e-06, 5.6052e-45],\n [-1.2022e-05, -1.7145e-05, 1.2638e-06, ..., 7.5476e-06,\n -6.3028e-06, -5.6052e-45],\n ...,\n [ 2.4165e-05, 5.6837e-06, 1.8984e-05, ..., -1.7171e-05,\n 1.5838e-05, 5.6052e-45],\n [-1.7129e-05, -3.3812e-06, -1.0679e-05, ..., -7.6476e-06,\n 4.4477e-06, 5.6052e-45],\n [ 5.2217e-06, 7.5797e-06, -4.4868e-06, ..., -1.6563e-05,\n 5.9140e-06, 5.6052e-45]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[9.3447e-10, 1.1413e-09, 5.0160e-10, ..., 1.0569e-09, 7.2055e-10,\n 5.8908e-18],\n [7.7286e-10, 2.3228e-09, 1.5487e-09, ..., 4.8466e-09, 2.0346e-09,\n 3.8528e-17],\n [8.9575e-10, 1.8410e-09, 1.4173e-09, ..., 4.2610e-09, 1.1309e-09,\n 1.1266e-17],\n ...,\n [1.9351e-09, 1.4721e-09, 1.9577e-09, ..., 6.7987e-09, 1.3092e-09,\n 2.3410e-18],\n [1.1253e-09, 2.3569e-09, 1.1041e-09, ..., 2.4164e-09, 1.1318e-09,\n 8.4931e-17],\n [2.0530e-09, 1.4335e-09, 1.0749e-09, ..., 2.4967e-09, 2.0305e-09,\n 8.1254e-19]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(18770.)",
37
+ "exp_avg": "tensor([ 0.0012, -0.0012], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([7.1097e-06, 7.1097e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.0020618676311637816,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.0020618676311637816,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.0020618676311637816,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.0010313307618949636,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 7,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 7,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.0020618676311637816,
149
+ 0.0020618676311637816,
150
+ 0.0020618676311637816,
151
+ 0.0010313307618949636
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 66.03133333333334,
156
+ "best_epoch": 6,
157
+ "scale_accuracies": {
158
+ "256": 65.49933333333334,
159
+ "512": 66.02266666666667
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6,
169
+ 7
170
+ ],
171
+ "train_loss": [
172
+ 3.9435249049420933,
173
+ 3.3040703793567867,
174
+ 4.3101251841734625,
175
+ 4.185147669827233,
176
+ 4.123004540650211,
177
+ 4.076372152195373,
178
+ 4.03838544134517
179
+ ],
180
+ "train_acc": [
181
+ 54.38726307083047,
182
+ 59.31631083223343,
183
+ 60.291879721118846,
184
+ 61.30111583163371,
185
+ 61.94625681117294,
186
+ 62.46739626189768,
187
+ 62.918183187671865
188
+ ],
189
+ "val_acc": [
190
+ 61.635333333333335,
191
+ 62.978,
192
+ 64.12,
193
+ 64.73133333333334,
194
+ 65.312,
195
+ 65.66133333333333,
196
+ 66.03133333333334
197
+ ],
198
+ "scale_accs": {
199
+ "256": [
200
+ 61.635333333333335,
201
+ 62.978,
202
+ 63.782,
203
+ 64.34866666666667,
204
+ 64.754,
205
+ 65.17733333333334,
206
+ 65.49933333333334
207
+ ],
208
+ "512": [
209
+ 63.839333333333336,
210
+ 64.522,
211
+ 65.18466666666667,
212
+ 65.52333333333333,
213
+ 66.02266666666667
214
+ ]
215
+ },
216
+ "lr": [
217
+ 0.00975530705321762,
218
+ 0.00904518046337755,
219
+ 0.00793913236883622,
220
+ 0.00654543046337755,
221
+ 0.005000500000000001,
222
+ 0.0034555695366224513,
223
+ 0.0020618676311637816
224
+ ]
225
+ }
226
+ },
227
+ "train_config": {
228
+ "name": "david_training",
229
+ "run_id": "20251012_235237",
230
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
231
+ "model_variant": [
232
+ "clip_vit_b16",
233
+ "clip_vit_laion_b32",
234
+ "clip_vit_b32"
235
+ ],
236
+ "num_classes": 1000,
237
+ "preset": "small_fast",
238
+ "custom_config_path": null,
239
+ "num_classes_override": null,
240
+ "use_belly_override": null,
241
+ "belly_expand_override": null,
242
+ "progressive_training_override": true,
243
+ "scale_warmup_epochs_override": {
244
+ "256": 0,
245
+ "512": 2
246
+ },
247
+ "num_epochs": 10,
248
+ "batch_size": 1024,
249
+ "learning_rate": 0.01,
250
+ "weight_decay": 1e-05,
251
+ "warmup_epochs": 3,
252
+ "use_rose_loss": true,
253
+ "rose_initial_weight": 0.1,
254
+ "rose_max_weight": 0.8,
255
+ "rose_weight_schedule": "adaptive",
256
+ "use_cayley_loss": false,
257
+ "cayley_weight": 0.01,
258
+ "scale_loss_balance": null,
259
+ "use_mixed_precision": false,
260
+ "gradient_clip": 15.0,
261
+ "scheduler_type": "cosine_restarts",
262
+ "min_lr": 1e-06,
263
+ "freeze_strategy": "never",
264
+ "freeze_threshold": 90.0,
265
+ "unfreeze_on_plateau": true,
266
+ "patience": 10,
267
+ "track_gradients": true,
268
+ "gradient_scale_threshold": 1e-05,
269
+ "gradient_scale_multiplier": 10.0,
270
+ "log_interval": 50,
271
+ "val_interval": 1,
272
+ "save_interval": 5,
273
+ "log_fusion_weights": true,
274
+ "log_loss_components": true,
275
+ "save_format": "safetensors",
276
+ "hf_repo": "AbstractPhil/david-shared-space",
277
+ "upload_to_hub": true,
278
+ "base_dir": "./david_training",
279
+ "num_workers": 10,
280
+ "pin_memory": true,
281
+ "prefetch_factor": 4,
282
+ "persistent_workers": true
283
+ }
284
+ }