AbstractPhil commited on
Commit
6ecbdd9
·
verified ·
1 Parent(s): 85dea3a

Update best_model_acc66.39_metadata.json - Run 20251012_235237

Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc66.39_metadata.json ADDED
@@ -0,0 +1,298 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 8,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(33786.)",
7
+ "exp_avg": "tensor([[ 4.8147e-05, -8.3186e-05, -7.0035e-05, ..., 5.7512e-05,\n 2.6287e-05, 6.7521e-05],\n [-3.4752e-05, -4.0599e-06, 7.3444e-05, ..., -6.6701e-05,\n -6.0121e-05, -4.6441e-05],\n [-1.0008e-05, 5.2972e-05, 6.8685e-06, ..., -5.6289e-05,\n 1.1144e-05, -1.1350e-06],\n ...,\n [ 1.9848e-05, 2.6379e-05, 1.5499e-05, ..., -7.5853e-05,\n -1.4292e-05, -2.9186e-06],\n [ 4.3666e-05, -4.4148e-05, 1.3915e-05, ..., 1.6668e-05,\n 2.0697e-05, 2.9403e-05],\n [ 1.4400e-05, 5.6524e-05, -3.2244e-05, ..., 1.7298e-05,\n -1.5852e-05, 3.4920e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.3603e-07, 1.2865e-07, 5.5040e-08, ..., 6.2294e-08, 4.0020e-08,\n 2.8956e-08],\n [5.1712e-08, 1.8223e-07, 8.2570e-08, ..., 5.6692e-08, 2.4728e-08,\n 2.5263e-08],\n [2.7429e-08, 5.0032e-08, 3.0049e-08, ..., 5.7063e-08, 1.5310e-08,\n 1.7125e-08],\n ...,\n [3.7490e-08, 2.9889e-07, 4.9826e-08, ..., 7.6243e-08, 2.0602e-08,\n 3.2019e-08],\n [7.8809e-08, 1.2773e-07, 4.8823e-08, ..., 5.5924e-08, 4.0997e-08,\n 2.6997e-08],\n [5.3377e-09, 2.0808e-08, 1.2133e-08, ..., 5.0806e-09, 2.5691e-09,\n 4.0011e-09]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(33786.)",
12
+ "exp_avg": "tensor([-3.6794e-04, -1.2300e-03, -4.8485e-04, -1.1745e-03, 9.2061e-04,\n -2.0311e-04, 1.9505e-03, 1.5513e-03, -1.5334e-03, 1.2738e-03,\n -3.2904e-04, -2.2887e-03, -1.3025e-03, 1.2906e-03, -1.5889e-03,\n 1.5787e-03, 1.7037e-03, 7.8665e-04, -2.4058e-03, -1.4134e-03,\n -4.3962e-04, 7.1143e-04, 1.8773e-03, 1.3606e-03, -2.0870e-03,\n -1.4756e-03, 2.5319e-04, 1.0392e-03, -1.9356e-04, -1.1951e-03,\n 3.4740e-04, -1.8319e-04, -4.4989e-03, 8.7356e-04, -9.5172e-04,\n 1.0779e-04, -2.3524e-04, 1.2233e-03, 1.5904e-03, -7.8214e-04,\n -8.5300e-04, -3.6157e-03, -1.9459e-04, 1.7094e-03, -2.5238e-03,\n -9.6311e-05, -2.7176e-04, 3.6674e-04, -5.7872e-04, -2.7524e-03,\n 3.3464e-03, 7.4986e-04, -1.3080e-03, -1.4779e-03, -2.3748e-04,\n -1.3913e-03, -1.4880e-03, 1.1327e-03, 3.5838e-04, -5.0724e-04,\n -1.0686e-03, -5.7886e-04, 1.8067e-03, 1.0865e-03, 1.2962e-03,\n 2.2025e-03, -8.2493e-04, 7.3944e-04, 1.9201e-03, 1.5250e-03,\n 2.4516e-03, -8.1672e-04, 1.5756e-03, 1.3153e-04, 7.8063e-04,\n 1.6335e-04, 3.1799e-04, -1.7326e-03, -1.0917e-03, -2.7833e-03,\n -5.7801e-05, -8.6690e-04, -1.4853e-03, -6.2748e-04, -3.6019e-03,\n -2.1091e-03, 6.0563e-04, -5.2016e-03, -1.5709e-03, 1.8940e-03,\n 3.6264e-03, -9.5648e-04, 1.0935e-03, 2.7706e-04, -1.1048e-03,\n 5.1152e-04, 1.9442e-03, 6.6058e-04, 2.4123e-03, 5.8022e-04,\n 2.1812e-04, 2.8803e-03, 2.3681e-03, -2.0324e-03, -1.7051e-04,\n 2.1177e-03, -2.9629e-03, -1.0473e-05, -4.3649e-04, -7.8830e-04,\n -4.6836e-03, -2.9915e-03, -8.0232e-04, 6.4043e-05, 2.7644e-03,\n -9.1294e-04, -5.4345e-04, -2.7834e-03, -1.2655e-03, -9.7020e-04,\n 1.8036e-03, 3.2508e-03, 6.6336e-05, 1.8349e-03, 9.6591e-04,\n -3.2090e-04, 6.1328e-04, 1.5236e-03, -1.7424e-03, 4.9396e-04,\n 9.0372e-04, 6.1785e-04, 2.9409e-03, -6.0068e-05, 1.1562e-03,\n -8.7721e-04, 7.9119e-04, -5.3409e-04, 4.5888e-04, 2.1273e-03,\n 6.7312e-04, 1.6370e-03, 2.0059e-03, -8.9793e-04, -5.2019e-03,\n 1.8344e-03, -1.0533e-04, 1.1810e-03, -1.5956e-03, 1.5224e-03,\n 9.9976e-04, -2.5865e-04, 9.6202e-04, -2.1660e-04, 1.4698e-03,\n -1.0381e-03, -2.3549e-03, 1.0070e-03, 7.9520e-05, 1.9783e-04,\n -6.6811e-04, 3.0838e-04, 1.5582e-03, 2.2661e-03, 1.4614e-04,\n 1.0749e-03, -5.2224e-04, 2.0865e-03, -1.6686e-04, 1.3186e-04,\n -2.8336e-04, 1.0245e-03, -1.7414e-03, -9.2805e-04, 2.6747e-03,\n 2.9964e-04, -3.0109e-03, 1.5085e-03, -2.8441e-03, 1.6872e-03,\n 2.5280e-04, 4.9261e-04, 2.1785e-03, 3.5660e-04, -6.6347e-04,\n -5.0094e-04, 1.2504e-03, -3.1142e-04, 9.2183e-04, 1.4598e-03,\n 8.2119e-04, -1.8999e-04, -3.4901e-03, 1.7294e-04, -1.6112e-03,\n 4.0464e-04, -6.1310e-04, 7.8661e-04, 1.4419e-03, -5.3727e-04,\n 1.4051e-03, -1.4212e-03, -1.1030e-03, 1.7367e-04, -6.8381e-04,\n -2.8908e-03, -3.9383e-04, -7.7298e-04, -1.9304e-03, -1.7369e-03,\n -6.1974e-04, -8.0539e-04, -1.3034e-03, -5.8550e-04, 1.4376e-03,\n -6.4315e-04, -2.2104e-03, -1.8444e-04, 2.8374e-04, 5.0619e-04,\n -5.9703e-04, -4.1775e-04, 1.0568e-03, 1.1393e-03, 2.1197e-03,\n -1.2327e-03, -3.8130e-04, 1.7756e-03, -7.6584e-04, 5.7618e-04,\n 9.2248e-04, -2.6207e-03, -2.2857e-03, 2.1912e-03, -6.8507e-04,\n 3.5278e-04, 1.4350e-03, -2.1437e-04, 2.2682e-03, -5.3711e-05,\n -2.5187e-03, -1.9548e-04, 1.7150e-03, -3.6543e-04, -1.4743e-03,\n 2.7775e-03, -1.4887e-03, 1.8342e-03, 1.8382e-04, -4.2730e-04,\n -7.0642e-04, 1.1317e-03, -1.5966e-03, -9.0392e-04, 3.4441e-04,\n 8.7638e-04, 9.5791e-04, -8.9721e-04, -7.7765e-04, 6.7565e-04,\n 2.4271e-04, -1.2898e-03, -2.1818e-04, 4.4599e-03, -1.0566e-03,\n -8.5318e-04, -2.6317e-04, 1.9170e-04, -1.1511e-03, 2.5952e-03,\n -1.3320e-03, -3.4376e-04, -5.8618e-04, -8.7696e-04, 1.7143e-03,\n -6.9870e-04, -5.4638e-04, 1.9265e-03, 3.4632e-03, -5.2842e-04,\n -4.2234e-04, -2.5893e-04, -2.7058e-03, 1.4777e-03, -3.8584e-04,\n 1.1091e-03, -1.6666e-03, 2.1849e-03, 2.7441e-03, -1.3426e-03,\n 1.9712e-03, 1.0956e-03, 4.4670e-04, -3.3251e-04, -1.1621e-04,\n -2.7582e-04, -1.1778e-03, 1.3370e-03, 3.9119e-03, 2.9013e-04,\n -2.1034e-03, 7.6543e-05, -1.3367e-04, -9.4714e-05, 1.8596e-03,\n -1.5717e-03, 1.2141e-03, -1.5051e-03, -2.5721e-03, 2.8805e-03,\n -1.2608e-03, 2.9328e-03, -1.6737e-03, -4.2656e-04, 2.8595e-03,\n 1.4876e-03, 1.7805e-03, 1.0341e-03, -3.0919e-04, 5.0888e-04,\n -1.7373e-03, -2.3768e-03, -1.9429e-03, 2.5333e-03, 1.4275e-04,\n -1.0304e-03, 9.6739e-04, 1.4949e-03, 1.0786e-03, 2.9730e-04,\n 3.7247e-04, -7.3322e-05, -8.1649e-04, -1.9256e-03, -8.3127e-04,\n 5.5995e-04, -9.6350e-04, 1.1676e-03, 2.2581e-03, -1.4291e-03,\n 1.2134e-03, -6.7407e-04, 3.5458e-04, -1.1892e-04, 6.8681e-04,\n 2.8936e-03, 2.2916e-03, 1.6839e-03, -2.5735e-04, 6.2672e-04,\n 4.8122e-04, -1.1575e-03, -1.1197e-03, -2.0544e-03, 1.7142e-03,\n -1.0422e-03, 1.4205e-03, -2.1495e-03, -3.3274e-03, 4.3756e-04,\n 8.5005e-04, 1.4082e-03, -3.1031e-03, 6.8399e-04, 2.2598e-05,\n -8.3151e-04, 1.8572e-03, -3.0006e-03, -4.7884e-03, -1.0931e-03,\n -4.3238e-04, 8.1551e-04, -7.6897e-04, -3.8781e-03, -1.0797e-03,\n 1.9890e-03, 3.7999e-04, -3.8495e-04, -2.9855e-04, 4.0042e-04,\n -2.2643e-03, 3.6521e-04, -3.5612e-03, -1.6364e-03, -3.0584e-04,\n 3.3922e-04, -2.0951e-03, -3.0491e-03, -2.5652e-03, -9.6807e-04,\n -5.3499e-04, 2.7661e-03, 2.2654e-03, 1.1554e-03, -6.8748e-04,\n -1.5041e-04, -5.2043e-04, 1.1395e-03, 3.8705e-04, -1.9850e-04,\n -2.5859e-03, 2.7421e-03, -3.0930e-04, -6.0061e-04, -2.0923e-04,\n -2.5691e-04, -1.7412e-03, 8.6021e-04, 1.3119e-03, 3.8857e-06,\n -1.7173e-04, -5.7369e-05, 1.0160e-03, -1.6157e-03, -3.5152e-04,\n -1.5089e-03, -1.0385e-03, -8.6843e-05, 3.0845e-03, 8.2899e-04,\n -1.1395e-04, -2.2823e-03, 1.2407e-03, 1.3767e-03, -5.7634e-04,\n -1.3370e-03, 1.6879e-03, 1.5548e-03, 1.2411e-03, -4.9360e-04,\n -5.3312e-04, -1.3481e-04, -4.3577e-04, 2.7060e-03, -2.0963e-03,\n 3.4601e-04, 1.4556e-03, 2.1278e-04, -3.5778e-04, -4.6176e-04,\n -7.0518e-04, 1.4880e-03, 2.4727e-03, -2.3817e-03, 1.6780e-03,\n 3.9164e-04, 1.0458e-03, 2.3445e-03, 5.9402e-04, -5.5383e-04,\n 1.0809e-03, 3.3727e-04, -2.9880e-04, 1.1196e-03, -8.1334e-04,\n 8.7434e-05, 3.2383e-04, -3.6598e-03, 2.7211e-04, -1.0079e-03,\n 2.1191e-03, -1.0020e-03, 1.1452e-03, 3.6476e-05, -3.3872e-04,\n 7.4811e-04, 1.9676e-03, 7.1897e-04, -1.4067e-03, -5.2078e-04,\n 3.1388e-04, 9.1802e-04, 3.9846e-03, 1.3361e-03, -1.5128e-03,\n -1.6229e-03, 1.2735e-03, -9.7674e-04, -2.5680e-04, 6.6813e-04,\n 1.1684e-03, -7.1409e-05, 6.1703e-04, -1.3099e-03, 3.2990e-04,\n -1.1070e-04, -3.9755e-03, -1.1333e-03, -7.8977e-04, 2.9397e-03,\n -2.2536e-04, 1.0731e-03, -2.6824e-03, -2.1390e-03, 2.1438e-03,\n 1.4045e-03, 4.3717e-04, 4.7331e-04, -1.8443e-04, 7.2770e-05,\n 7.6113e-04, -2.4996e-03, 1.3164e-03, 2.1215e-03, -1.2972e-03,\n -1.4874e-03, -1.0938e-04, 7.6072e-04, 1.0493e-03, -3.9281e-04,\n -4.1761e-04, 4.3044e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([4.3673e-05, 3.8491e-05, 2.9143e-05, 2.9453e-05, 2.9911e-05, 5.8914e-05,\n 3.0439e-05, 5.1364e-05, 3.8997e-05, 4.4126e-05, 6.0747e-05, 6.1318e-05,\n 8.3092e-05, 2.0454e-05, 6.4205e-05, 5.1827e-05, 4.3819e-05, 3.7464e-05,\n 4.6329e-05, 5.3256e-05, 3.7557e-05, 3.2502e-05, 3.1430e-05, 5.6277e-05,\n 7.6488e-05, 3.2339e-05, 2.8574e-05, 6.4447e-05, 5.7559e-05, 4.0270e-05,\n 1.7713e-05, 2.7169e-05, 4.8543e-05, 4.6718e-05, 2.4287e-05, 4.7154e-05,\n 3.2098e-05, 2.6228e-05, 2.6854e-05, 2.3734e-05, 3.1501e-05, 2.4326e-05,\n 5.8741e-06, 6.5806e-05, 1.0577e-04, 3.1293e-05, 2.2170e-05, 2.3957e-05,\n 4.1332e-05, 6.8208e-05, 8.5000e-05, 2.6103e-05, 2.7169e-05, 3.9101e-05,\n 4.6037e-05, 3.5644e-05, 3.2778e-05, 9.0211e-05, 3.1883e-05, 3.9197e-05,\n 1.0174e-04, 2.6088e-05, 2.9343e-05, 3.4486e-05, 3.3470e-05, 2.4578e-05,\n 3.3046e-05, 2.9502e-05, 5.4725e-05, 3.5462e-05, 3.6084e-05, 6.2266e-05,\n 2.8470e-05, 3.0798e-05, 2.7240e-05, 3.3760e-05, 4.4353e-05, 3.8262e-05,\n 5.1991e-05, 3.5878e-05, 5.9505e-05, 3.7160e-05, 2.8038e-05, 1.9247e-05,\n 3.7780e-05, 4.0750e-05, 4.5813e-05, 3.3878e-05, 2.4419e-05, 3.8363e-05,\n 2.8905e-05, 6.1623e-05, 3.3141e-05, 3.7231e-05, 3.7270e-05, 4.4566e-05,\n 5.4656e-05, 3.7854e-05, 3.6934e-05, 2.3192e-05, 4.5716e-05, 6.7654e-05,\n 6.3904e-05, 3.3396e-05, 6.6582e-05, 5.6066e-05, 3.9971e-05, 8.9140e-05,\n 3.0792e-05, 4.3789e-05, 5.5618e-05, 5.0535e-05, 3.6382e-05, 4.7687e-05,\n 3.7845e-05, 4.4668e-05, 4.8593e-05, 4.4173e-05, 3.0943e-05, 4.0710e-05,\n 1.6888e-05, 5.2024e-05, 4.4411e-05, 5.9567e-05, 3.7881e-05, 3.9396e-05,\n 3.2781e-05, 4.3183e-05, 3.8757e-05, 7.0398e-05, 4.0232e-05, 3.6303e-05,\n 5.1801e-05, 3.6887e-05, 9.8451e-05, 3.3265e-05, 2.5268e-05, 4.2344e-05,\n 2.7152e-05, 4.6199e-05, 1.6648e-05, 2.7340e-05, 4.1812e-05, 9.2046e-05,\n 4.5920e-05, 4.0432e-05, 2.6569e-05, 5.5260e-05, 4.2908e-05, 8.2255e-05,\n 1.8199e-05, 1.0164e-04, 2.5027e-05, 1.4777e-04, 2.7218e-05, 3.2609e-05,\n 4.4667e-05, 3.5953e-05, 3.2906e-05, 2.6938e-05, 4.9716e-05, 1.2508e-05,\n 3.7497e-05, 7.2468e-05, 1.7471e-05, 5.4711e-05, 3.6436e-05, 1.8371e-05,\n 1.8075e-05, 4.4501e-05, 3.0548e-05, 4.8198e-05, 3.6464e-05, 3.1710e-05,\n 3.4402e-05, 2.4282e-05, 7.6058e-05, 2.2712e-05, 1.9348e-05, 3.8367e-05,\n 3.1759e-05, 2.8524e-05, 7.0677e-05, 3.3632e-05, 3.1805e-05, 3.9231e-05,\n 9.0034e-05, 2.3506e-05, 1.9574e-05, 1.5395e-05, 3.2762e-05, 4.8678e-05,\n 5.8162e-05, 5.7248e-05, 2.2554e-05, 1.7789e-05, 4.2479e-05, 2.6968e-05,\n 1.9181e-05, 1.9443e-05, 2.4576e-05, 4.0802e-05, 2.6253e-05, 1.9311e-05,\n 3.4090e-05, 4.0617e-05, 3.2598e-05, 2.5434e-05, 4.9055e-05, 1.8963e-05,\n 3.8158e-05, 5.6339e-05, 3.7135e-05, 2.8260e-05, 5.5457e-05, 1.4409e-05,\n 2.0603e-05, 2.1779e-05, 5.6536e-05, 5.3702e-05, 2.2282e-05, 1.9102e-05,\n 3.0332e-05, 1.7065e-05, 2.6160e-05, 5.4723e-05, 2.7349e-05, 1.9786e-05,\n 2.2619e-05, 5.5226e-05, 4.0246e-05, 2.4034e-05, 4.8179e-05, 3.2426e-05,\n 3.0387e-05, 3.3322e-05, 3.4287e-05, 6.1282e-06, 4.0770e-05, 1.1482e-04,\n 2.7944e-05, 7.1332e-05, 3.2253e-05, 5.4513e-05, 7.4662e-05, 5.1427e-05,\n 4.1796e-05, 2.9748e-05, 4.6917e-05, 3.2488e-05, 5.6468e-05, 2.7449e-05,\n 4.2148e-05, 4.4168e-05, 3.6762e-05, 3.1815e-05, 2.6212e-05, 4.9604e-05,\n 5.3690e-05, 3.1974e-05, 3.0501e-05, 6.9820e-05, 2.5012e-05, 4.6320e-05,\n 4.1562e-05, 2.5773e-05, 2.3886e-05, 3.5276e-05, 2.6385e-05, 3.4438e-05,\n 3.9327e-05, 4.0468e-05, 3.2468e-05, 3.1601e-05, 2.3632e-05, 5.2326e-05,\n 2.0558e-05, 6.0439e-05, 3.3260e-05, 2.4340e-05, 3.1006e-05, 2.4220e-05,\n 3.2043e-05, 4.7019e-05, 2.2929e-05, 5.2950e-05, 3.4786e-05, 3.6018e-05,\n 6.2803e-05, 4.4521e-05, 2.5529e-05, 3.1515e-05, 5.1899e-05, 2.7186e-05,\n 4.7310e-05, 2.4033e-05, 2.7820e-05, 5.6687e-05, 8.6596e-05, 3.4128e-05,\n 2.5286e-05, 4.4796e-05, 3.4304e-05, 3.2939e-05, 5.6873e-05, 2.2018e-05,\n 3.8613e-05, 3.8217e-05, 7.0425e-05, 3.9993e-05, 4.9432e-05, 5.7644e-05,\n 2.4623e-05, 2.5870e-05, 4.8040e-05, 2.5914e-05, 3.8019e-05, 1.5977e-05,\n 3.7966e-05, 4.9974e-05, 8.4612e-05, 3.3772e-05, 4.1001e-05, 4.0669e-05,\n 5.8239e-05, 9.9043e-05, 2.7117e-05, 4.8184e-05, 3.1500e-05, 3.0232e-05,\n 3.1847e-05, 4.6881e-05, 1.6447e-05, 2.5531e-05, 2.9917e-05, 3.3516e-05,\n 5.4967e-05, 2.5126e-05, 5.4745e-05, 4.6476e-05, 3.3481e-05, 3.1747e-05,\n 4.0320e-05, 2.3106e-05, 5.5699e-05, 5.5671e-05, 3.6743e-05, 2.3194e-05,\n 4.0317e-05, 4.3079e-05, 2.9289e-05, 4.0866e-05, 3.0422e-05, 3.0677e-05,\n 3.2070e-05, 2.4641e-05, 2.6130e-05, 3.0794e-05, 2.7487e-05, 2.3938e-05,\n 2.3490e-05, 7.7276e-05, 1.0791e-04, 3.2783e-05, 3.8810e-05, 3.8999e-05,\n 3.3771e-05, 4.8128e-05, 2.4712e-05, 3.0277e-05, 2.0322e-05, 3.7948e-05,\n 2.1032e-05, 3.7567e-05, 6.1411e-05, 3.5823e-05, 2.0010e-05, 1.4081e-05,\n 2.5152e-05, 3.2398e-05, 3.6208e-05, 4.3121e-05, 4.6700e-05, 3.2425e-05,\n 3.5476e-05, 1.0818e-04, 3.0257e-05, 5.0716e-05, 3.6007e-05, 3.7406e-05,\n 4.8472e-05, 5.5363e-05, 3.0451e-05, 3.1612e-05, 3.7603e-05, 2.4273e-05,\n 2.4023e-05, 5.4084e-05, 4.3297e-05, 7.4203e-05, 4.0510e-05, 2.8985e-05,\n 4.6581e-05, 3.2491e-05, 4.5245e-05, 3.4630e-05, 7.4938e-05, 2.1313e-05,\n 3.2213e-05, 4.5857e-05, 5.1657e-05, 4.9079e-05, 4.6666e-05, 4.7677e-05,\n 2.7703e-05, 2.3545e-05, 6.0017e-05, 2.2074e-05, 5.0821e-05, 6.3094e-05,\n 3.7496e-05, 2.9903e-05, 4.9253e-05, 4.4335e-05, 3.5837e-05, 3.7946e-05,\n 6.8808e-05, 3.7112e-05, 2.7310e-05, 3.4914e-05, 3.0845e-05, 1.8769e-05,\n 5.3495e-05, 4.2078e-05, 3.2204e-05, 2.8964e-05, 5.6471e-05, 4.0601e-05,\n 3.7390e-05, 3.8461e-05, 3.9098e-05, 6.3768e-05, 3.5389e-05, 1.1647e-04,\n 5.0117e-05, 4.4133e-05, 2.4905e-05, 3.5573e-05, 3.3923e-05, 3.5924e-05,\n 3.8528e-05, 3.4575e-05, 5.7792e-05, 6.5856e-05, 3.3194e-05, 1.9389e-05,\n 7.1899e-05, 4.0832e-05, 2.0061e-05, 4.3040e-05, 4.0327e-05, 3.0808e-05,\n 3.2640e-05, 3.1220e-05, 6.4329e-05, 3.0634e-05, 3.2343e-05, 3.5100e-05,\n 6.0196e-05, 4.4152e-05, 4.1170e-05, 6.4853e-05, 5.1989e-05, 3.5338e-05,\n 3.7096e-05, 4.1035e-05, 4.0931e-05, 3.1642e-05, 3.5573e-05, 2.9296e-05,\n 2.0354e-05, 3.3999e-05, 4.5529e-05, 3.6987e-05, 2.9358e-05, 3.4570e-05,\n 5.3321e-05, 3.6979e-05, 7.8052e-05, 3.7314e-05, 3.6504e-05, 3.8302e-05,\n 6.6283e-05, 2.7797e-05, 2.2336e-05, 3.2768e-05, 4.5686e-05, 2.2881e-05,\n 2.3388e-05, 2.9245e-05, 4.5418e-05, 2.4719e-05, 4.0572e-05, 3.0097e-05,\n 3.6141e-05, 2.0446e-05, 3.5067e-05, 3.8878e-05, 3.6408e-05, 3.8236e-05,\n 4.0287e-05, 3.6660e-06], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(33786.)",
17
+ "exp_avg": "tensor([-6.5717e-04, -2.7940e-03, -2.2017e-04, -2.9694e-03, 2.1555e-03,\n -4.9024e-05, 5.8688e-03, 4.0256e-03, -4.8915e-03, 4.3122e-03,\n 9.9433e-04, -4.7252e-03, -1.5315e-03, 4.0009e-03, -3.6026e-03,\n 2.6282e-03, 3.5389e-03, 1.5753e-03, -4.9396e-03, -2.9654e-03,\n -1.0300e-03, 1.7895e-03, 4.0733e-03, 3.8169e-03, -5.5863e-03,\n -4.9477e-03, 2.1834e-04, 3.7806e-03, 4.5194e-04, -3.8660e-03,\n 6.7774e-04, -2.0817e-04, -8.4781e-03, 2.5279e-03, -2.5020e-03,\n 1.2513e-03, -8.9791e-04, 2.8137e-03, 3.8251e-03, -1.8723e-03,\n -1.5897e-03, -5.5273e-03, -5.6052e-45, 3.2851e-03, -7.7513e-03,\n -8.6322e-04, -1.7871e-03, 6.3944e-04, -2.7256e-03, -6.0083e-03,\n 6.0178e-03, 2.2051e-03, -1.6459e-03, -2.9862e-03, -1.3240e-03,\n -3.5713e-03, -3.5608e-03, 3.7972e-04, 4.6647e-04, -2.1127e-03,\n -1.5372e-03, -1.1842e-03, 4.5357e-03, 2.8495e-03, 2.5907e-03,\n 6.3857e-03, -1.1564e-03, 1.9037e-03, 4.0332e-03, 2.2696e-03,\n 6.2176e-03, -2.2702e-04, 4.5850e-03, 7.6597e-04, 9.4669e-04,\n -1.1024e-04, 4.0769e-04, -4.1541e-03, -1.2839e-03, -6.8712e-03,\n -4.6571e-04, -2.4211e-03, -2.9598e-03, -8.7631e-04, -6.1048e-03,\n -6.8165e-03, 1.0808e-03, -1.2700e-02, -4.4225e-03, 6.1460e-03,\n 1.0579e-02, -1.8109e-03, 2.2637e-03, 1.6899e-04, -3.5663e-03,\n 1.4800e-03, 3.2580e-03, 1.7748e-03, 8.9426e-03, 3.0093e-03,\n 3.6681e-04, 6.1694e-03, 4.8504e-03, -3.3909e-03, -1.2699e-04,\n 6.2014e-03, -5.5733e-03, 7.5208e-04, -1.3582e-03, -2.0973e-03,\n -8.4694e-03, -5.3365e-03, -1.9499e-03, 5.8492e-05, 7.4504e-03,\n -1.8507e-03, -1.5614e-03, -7.9707e-03, -2.3101e-03, -4.0343e-03,\n 4.9484e-03, 7.7342e-03, -2.4605e-04, 5.2559e-03, 3.0887e-03,\n 8.3443e-05, 1.4959e-03, 3.2481e-03, -7.9262e-03, -8.5330e-05,\n 2.4891e-03, 8.2055e-04, 7.1905e-03, 9.6975e-04, 1.2895e-04,\n -1.8810e-03, 1.1564e-03, -2.0146e-03, 1.0313e-03, 5.3426e-03,\n 1.4069e-03, 4.9430e-03, 5.7087e-03, -1.8572e-03, -1.2117e-02,\n 3.8765e-03, -3.3009e-04, 1.9042e-03, -4.5237e-03, 2.3251e-03,\n 2.0547e-03, 4.0952e-04, 2.6425e-03, 2.6832e-04, 3.1994e-03,\n -2.3221e-03, -6.9143e-03, 3.3947e-03, 1.6444e-03, 1.2647e-03,\n -4.3410e-04, 7.3818e-04, 4.8697e-03, 4.3951e-03, 8.8060e-04,\n 2.4892e-03, -6.9117e-04, 4.7901e-03, 7.4892e-05, 2.3764e-04,\n -6.2338e-04, 1.6010e-03, -2.0060e-03, -2.6850e-03, 7.3776e-03,\n 1.0461e-03, -5.4463e-03, 3.5965e-03, -4.6495e-03, 4.1646e-03,\n 4.2866e-05, 2.3737e-03, 6.6765e-03, 7.3603e-04, -1.2914e-03,\n -6.1797e-04, 3.5135e-03, -5.5162e-04, 2.6346e-03, 4.1069e-03,\n 2.5539e-03, -1.2934e-03, -5.0605e-03, -5.8740e-04, -3.9509e-03,\n 5.8398e-04, -1.4364e-03, 8.3062e-04, 3.1212e-03, -8.1413e-04,\n 3.3369e-03, -4.0919e-03, -1.8032e-03, 1.6149e-04, -9.5470e-04,\n -5.2607e-03, -2.4175e-04, -1.8946e-03, -5.7767e-03, -3.7504e-03,\n -1.2469e-03, -2.5190e-03, -1.5875e-03, -1.1330e-03, 3.7709e-03,\n -2.3420e-03, -5.2117e-03, -1.5157e-03, 1.3730e-03, 1.3470e-03,\n -1.5943e-03, -1.3623e-03, 2.9961e-03, 3.1443e-03, 7.3412e-03,\n -2.8759e-03, -8.6861e-04, 4.1069e-03, -1.5279e-03, 8.0162e-05,\n 1.4829e-03, -6.7355e-03, -8.8078e-03, 6.0523e-03, -1.1701e-03,\n 5.9685e-04, 4.0475e-03, 5.6052e-45, 4.6661e-03, -7.9275e-05,\n -5.6302e-03, -2.0874e-04, 6.6235e-03, -2.0976e-04, -3.3536e-03,\n 7.5190e-03, -3.3811e-03, 2.8835e-03, 2.3256e-04, 3.7235e-05,\n -2.9414e-03, 2.2007e-03, -6.8712e-03, -1.7693e-03, 6.1567e-04,\n 1.8875e-03, 3.5931e-03, -2.4673e-03, -5.6110e-04, 2.0298e-03,\n 5.1943e-04, -1.8044e-03, 2.6817e-04, 8.8301e-03, -2.6236e-03,\n -2.5089e-03, -1.9350e-03, -2.9620e-04, -2.5717e-03, 9.3351e-03,\n -4.3509e-03, -1.0976e-03, -1.1548e-03, -3.2089e-03, 3.4919e-03,\n 5.9738e-04, -1.7291e-03, 4.2512e-03, 9.6272e-03, -1.5789e-03,\n -2.2874e-03, -4.2612e-04, -5.6131e-03, 2.3565e-03, -1.3682e-03,\n 2.3767e-03, -5.6995e-03, 4.7370e-03, 9.0363e-03, -2.4804e-03,\n 3.9747e-03, 1.6626e-03, 1.1815e-03, -5.7573e-04, -1.2549e-03,\n -1.6632e-03, -2.5812e-03, 2.0371e-03, 9.4315e-03, 9.7276e-04,\n -3.8903e-03, 1.4682e-03, 1.0085e-03, -2.1484e-04, 3.2489e-03,\n -4.6255e-03, 2.7441e-03, -3.9551e-03, -7.2200e-03, 1.0879e-02,\n -3.5588e-03, 6.2576e-03, -4.9052e-03, -1.2541e-03, 7.2481e-03,\n 4.4254e-03, 3.3091e-03, 2.3658e-03, -1.0566e-03, 8.2554e-04,\n -5.4680e-03, -7.1394e-03, -3.8967e-03, 7.0076e-03, -8.9836e-04,\n -2.1421e-03, 2.5237e-03, 3.8933e-03, 2.9008e-03, 4.4238e-04,\n 1.1739e-03, 1.3093e-03, -1.6229e-03, -3.7722e-03, -2.7400e-03,\n 1.6808e-03, -1.6662e-03, 3.1946e-03, 4.4793e-03, -5.2520e-03,\n 2.2419e-03, -1.6956e-03, 7.8937e-04, -1.1779e-03, 7.7809e-04,\n 7.4021e-03, 5.1331e-03, 3.7180e-03, -1.1992e-03, 7.8845e-04,\n 1.4067e-03, -2.4852e-03, -2.6607e-03, -4.5949e-03, 4.4674e-03,\n -2.3863e-03, 4.4413e-03, -4.4423e-03, -5.9404e-03, 1.4085e-03,\n 1.5829e-03, 3.2499e-03, -7.1011e-03, 2.6142e-03, 2.8360e-04,\n -1.3879e-03, 3.2671e-03, -8.5057e-03, -8.1070e-03, -1.9651e-03,\n -2.0524e-03, 2.3683e-03, -2.5778e-03, -1.0283e-02, -1.5734e-03,\n 3.8888e-03, -1.4845e-04, -5.0199e-04, -9.5095e-04, 1.5587e-04,\n -5.9045e-03, 1.0887e-03, -8.0646e-03, -4.7298e-03, 2.9666e-04,\n -1.7044e-04, -5.3247e-03, -5.5400e-03, -7.3157e-03, -1.5130e-03,\n -2.9101e-03, 5.0138e-03, 5.2270e-03, 3.6707e-03, -9.6334e-04,\n -5.1775e-04, -1.5238e-03, 1.5919e-03, -7.4701e-04, 6.5073e-04,\n -4.5016e-03, 5.1904e-03, -1.0366e-03, -1.6416e-03, 6.0728e-04,\n -1.0645e-03, -5.3113e-03, 2.0185e-03, 5.0073e-03, -9.5980e-04,\n 5.6252e-04, -7.8147e-05, 4.3469e-03, -4.1121e-03, -1.6132e-03,\n -8.9609e-03, -3.6481e-03, -1.3875e-03, 6.4510e-03, 1.9335e-03,\n -7.2241e-04, -5.0857e-03, 4.7985e-03, 3.5282e-03, -7.4015e-04,\n -2.8698e-03, 3.1757e-03, 3.2963e-03, 4.0980e-03, -1.5951e-03,\n -1.0304e-03, -4.1050e-04, -2.0293e-03, 6.7205e-03, -5.8441e-03,\n 7.4232e-04, 3.1269e-03, 1.3113e-03, -2.1254e-04, -2.2896e-04,\n -1.4022e-03, 2.4935e-03, 5.5221e-03, -8.6807e-03, 3.1945e-03,\n -5.5099e-04, 3.1659e-03, 4.8062e-03, 2.5105e-03, -1.2977e-03,\n 3.0159e-03, -3.1306e-05, -1.9931e-03, 5.0833e-03, 1.4089e-03,\n 5.0543e-04, -6.0436e-04, -5.7340e-03, 9.0368e-04, -2.1454e-03,\n 5.1670e-03, -2.7892e-03, 3.3144e-03, 2.9466e-04, -1.3325e-03,\n 2.1512e-03, 3.6460e-03, 9.7078e-04, -4.3674e-03, -2.2156e-03,\n 1.0430e-03, 3.3188e-04, 8.6878e-03, 3.5818e-03, -3.3451e-03,\n -3.6370e-03, 1.9204e-03, -1.2300e-03, -2.6380e-03, 1.7101e-03,\n 2.2692e-03, -7.5329e-05, 1.8771e-03, -1.8963e-03, 2.9655e-04,\n -1.1224e-03, -5.2508e-03, -2.3701e-03, -2.5511e-03, 5.3509e-03,\n -1.3419e-03, 2.3433e-03, -6.7808e-03, -3.8937e-03, 7.7232e-03,\n 4.5273e-03, 3.4372e-04, 1.1526e-03, -4.1765e-04, 4.0345e-04,\n 2.4832e-03, -7.9734e-03, 3.5306e-03, 4.7581e-03, -2.9306e-03,\n -5.2180e-03, 5.7224e-04, 2.5728e-03, 6.9855e-04, -4.9693e-04,\n -8.8126e-04, -5.6052e-45], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([3.8318e-04, 2.6239e-04, 1.0912e-04, 2.0105e-04, 1.2721e-04, 3.2086e-04,\n 2.1043e-04, 2.9916e-04, 2.5453e-04, 3.6106e-04, 4.1981e-04, 3.8882e-04,\n 3.4067e-04, 1.8691e-04, 2.3059e-04, 2.2264e-04, 2.3165e-04, 2.0442e-04,\n 2.2254e-04, 2.9697e-04, 2.7467e-04, 1.1582e-04, 1.3839e-04, 3.3458e-04,\n 5.1416e-04, 2.3786e-04, 1.8191e-04, 3.9957e-04, 2.4595e-04, 2.7300e-04,\n 1.7535e-04, 2.2917e-04, 2.0295e-04, 2.1481e-04, 1.4796e-04, 1.4627e-04,\n 1.4087e-04, 1.9017e-04, 1.5702e-04, 1.5096e-04, 1.2451e-04, 9.3812e-05,\n 1.5787e-18, 2.2161e-04, 5.0696e-04, 1.6852e-04, 1.5833e-04, 6.8805e-05,\n 4.3426e-04, 3.0037e-04, 3.8563e-04, 1.3859e-04, 1.3631e-04, 3.0543e-04,\n 1.8408e-04, 2.0316e-04, 2.6006e-04, 1.2775e-04, 1.6322e-04, 1.1697e-04,\n 2.1584e-04, 2.0911e-04, 2.2156e-04, 1.5239e-04, 1.2706e-04, 1.3196e-04,\n 2.5467e-04, 1.4219e-04, 2.7228e-04, 1.3676e-04, 2.4642e-04, 1.6190e-04,\n 2.4407e-04, 2.7295e-04, 1.5701e-04, 2.1729e-04, 2.2186e-04, 1.9600e-04,\n 2.5242e-04, 2.6663e-04, 2.7963e-04, 1.5638e-04, 1.5330e-04, 9.1184e-05,\n 1.5362e-04, 3.4145e-04, 4.7367e-04, 2.1483e-04, 9.6318e-05, 5.2313e-04,\n 2.4721e-04, 1.8105e-04, 2.4239e-04, 2.9231e-04, 2.9242e-04, 2.7460e-04,\n 2.4325e-04, 2.1934e-04, 3.2261e-04, 3.8188e-04, 2.7613e-04, 2.9343e-04,\n 2.7457e-04, 2.2051e-04, 2.9992e-04, 4.6299e-04, 1.7668e-04, 5.0856e-04,\n 2.1867e-04, 1.9025e-04, 1.8888e-04, 3.5636e-04, 2.6574e-04, 1.7341e-04,\n 2.0768e-04, 1.2687e-04, 2.0253e-04, 2.6497e-04, 1.6943e-04, 3.3187e-04,\n 1.8007e-04, 3.2212e-04, 4.5750e-04, 4.7688e-04, 4.0581e-04, 3.8271e-04,\n 2.3536e-04, 3.4515e-04, 3.7978e-04, 4.7588e-04, 2.5553e-04, 2.5232e-04,\n 3.8279e-04, 2.3289e-04, 5.0822e-04, 1.3218e-04, 1.9811e-04, 3.5882e-04,\n 1.7321e-04, 1.2532e-04, 1.7874e-04, 2.0575e-04, 2.5759e-04, 2.7606e-04,\n 2.1158e-04, 1.8670e-04, 1.1101e-04, 4.1218e-04, 2.4123e-04, 3.8740e-04,\n 9.7309e-05, 7.9573e-04, 1.4668e-04, 5.5370e-04, 1.0598e-04, 1.0291e-04,\n 4.1549e-04, 2.9654e-04, 2.5979e-04, 1.5348e-04, 3.4878e-04, 1.4252e-04,\n 4.0972e-04, 2.2934e-04, 1.1217e-04, 4.1552e-04, 2.6497e-04, 1.0258e-04,\n 8.7501e-05, 2.1905e-04, 1.0341e-04, 2.6891e-04, 2.2489e-04, 4.2864e-04,\n 2.5388e-04, 1.5066e-04, 2.6652e-04, 1.1390e-04, 1.3843e-04, 2.3560e-04,\n 1.9933e-04, 2.1241e-04, 6.4854e-04, 2.2841e-04, 5.8574e-04, 1.6184e-04,\n 6.5656e-04, 1.5005e-04, 1.8122e-04, 9.9073e-05, 1.7361e-04, 2.9154e-04,\n 1.5384e-04, 1.2095e-04, 1.3549e-04, 9.6821e-05, 2.3507e-04, 1.4401e-04,\n 8.8195e-05, 1.0288e-04, 1.3056e-04, 2.0559e-04, 1.4172e-04, 1.0757e-04,\n 6.7450e-05, 2.1946e-04, 1.6669e-04, 1.3640e-04, 3.3827e-04, 1.0230e-04,\n 2.8423e-04, 2.7225e-04, 3.4165e-05, 1.4238e-04, 3.1215e-04, 2.0665e-04,\n 8.6453e-05, 4.6312e-04, 2.6889e-04, 4.2793e-04, 1.5890e-04, 1.6901e-04,\n 2.0107e-04, 1.7126e-04, 2.2681e-04, 3.3800e-04, 1.1438e-04, 1.1597e-04,\n 8.3076e-05, 2.1046e-04, 1.9495e-04, 1.4282e-04, 6.6002e-04, 2.7272e-04,\n 1.0516e-04, 1.6130e-04, 1.8541e-04, 2.2393e-19, 1.6104e-04, 4.8705e-04,\n 1.2642e-04, 3.3901e-04, 4.4997e-04, 3.2671e-04, 1.9617e-04, 3.9716e-04,\n 1.4681e-04, 1.1894e-04, 2.8683e-04, 1.6465e-04, 3.3411e-04, 9.4497e-05,\n 4.7364e-04, 2.7090e-04, 1.4494e-04, 2.2594e-04, 1.9322e-04, 1.8861e-04,\n 1.2148e-04, 2.0115e-04, 2.5918e-04, 2.8832e-04, 1.7027e-04, 1.7186e-04,\n 2.0695e-04, 2.1534e-04, 2.2217e-04, 2.3591e-04, 2.2456e-04, 3.9010e-04,\n 3.6906e-04, 3.1773e-04, 6.8390e-05, 2.3398e-04, 1.2748e-04, 1.6276e-04,\n 1.9064e-04, 2.6569e-04, 2.3610e-04, 2.2862e-04, 2.1882e-04, 8.3687e-05,\n 1.5013e-04, 1.5551e-04, 1.6617e-04, 2.4407e-04, 3.0803e-04, 1.3514e-04,\n 5.5903e-04, 1.9416e-04, 1.3696e-04, 8.1813e-05, 1.8028e-04, 1.0737e-04,\n 2.3430e-04, 1.5372e-04, 2.0857e-04, 1.7829e-04, 4.5659e-04, 1.9205e-04,\n 1.2972e-04, 2.0214e-04, 3.5305e-04, 1.9170e-04, 2.2668e-04, 1.9419e-04,\n 1.8632e-04, 1.8691e-04, 7.3114e-04, 4.9573e-04, 4.8634e-04, 2.3758e-04,\n 1.6709e-04, 1.9643e-04, 1.8555e-04, 2.8530e-04, 1.7234e-04, 9.9767e-05,\n 2.2877e-04, 2.8056e-04, 6.7004e-04, 4.2710e-04, 2.2078e-04, 3.0046e-04,\n 3.9907e-04, 4.7010e-04, 2.1702e-04, 3.6857e-04, 1.5267e-04, 2.8290e-04,\n 3.9688e-04, 3.7438e-04, 7.7597e-05, 1.5391e-04, 1.8315e-04, 1.7905e-04,\n 9.3642e-05, 1.4411e-04, 2.4578e-04, 2.6642e-04, 1.8762e-04, 1.4567e-04,\n 2.0394e-04, 3.4803e-04, 2.0443e-04, 2.7572e-04, 1.9687e-04, 1.1064e-04,\n 1.7293e-04, 2.6810e-04, 2.4691e-04, 2.3051e-04, 2.2843e-04, 2.4539e-04,\n 2.5532e-04, 1.6099e-04, 2.2828e-04, 1.4664e-04, 1.2585e-04, 1.3975e-04,\n 1.2246e-04, 4.7174e-04, 4.3732e-04, 3.8610e-04, 2.0793e-04, 2.2324e-04,\n 7.3827e-05, 3.6569e-04, 1.0795e-04, 1.8247e-04, 1.1416e-04, 2.5211e-04,\n 3.2219e-04, 2.3450e-04, 1.6445e-04, 2.0039e-04, 1.2339e-04, 1.2621e-04,\n 2.4620e-04, 1.0342e-04, 2.5802e-04, 1.9386e-04, 2.9862e-04, 2.6900e-04,\n 2.3302e-04, 4.4692e-04, 1.6601e-04, 1.7985e-04, 2.5287e-04, 1.4937e-04,\n 1.6121e-04, 1.5798e-04, 1.8088e-04, 4.8286e-04, 1.6206e-04, 1.0688e-04,\n 1.8233e-04, 4.7897e-04, 2.2529e-04, 3.6076e-04, 1.9246e-04, 1.1333e-04,\n 4.6564e-04, 1.9516e-04, 2.3808e-04, 1.4462e-04, 5.9781e-04, 5.6240e-05,\n 3.2794e-04, 2.8069e-04, 3.2848e-04, 6.2395e-05, 3.3778e-04, 2.1290e-04,\n 1.9971e-04, 5.2238e-04, 5.0082e-04, 3.0295e-04, 2.5750e-04, 1.6190e-04,\n 2.1261e-04, 1.8032e-04, 4.5700e-04, 2.8562e-04, 5.8630e-04, 2.3287e-04,\n 2.2984e-04, 2.1298e-04, 3.0230e-04, 1.6935e-04, 1.9341e-04, 1.1116e-04,\n 1.7192e-04, 2.4421e-04, 3.3205e-04, 2.2631e-04, 2.7806e-04, 4.2914e-04,\n 2.4683e-04, 1.2642e-04, 2.9488e-04, 2.3598e-04, 1.9111e-04, 7.6046e-04,\n 1.9875e-04, 3.3033e-04, 2.9834e-04, 1.2958e-04, 2.4052e-04, 2.9400e-04,\n 2.9294e-04, 1.4525e-04, 2.3617e-04, 5.6576e-04, 2.9540e-04, 1.1426e-04,\n 3.0814e-04, 1.8310e-04, 1.1156e-04, 1.7535e-04, 2.1770e-04, 1.4967e-04,\n 2.3846e-04, 1.5565e-04, 1.8694e-04, 1.5249e-04, 1.3204e-04, 2.3436e-04,\n 4.0989e-04, 2.8727e-04, 2.8602e-04, 4.2500e-04, 2.4182e-04, 1.5525e-04,\n 2.2153e-04, 2.2336e-04, 2.3856e-04, 2.0530e-04, 3.8430e-04, 1.5607e-04,\n 8.8844e-05, 5.0932e-04, 2.3518e-04, 1.6738e-04, 1.8660e-04, 1.3752e-04,\n 1.3213e-04, 2.1500e-04, 4.3794e-04, 1.0709e-04, 1.8436e-04, 1.2613e-04,\n 3.6344e-04, 1.6848e-04, 1.8614e-04, 2.6874e-04, 2.2690e-04, 1.9681e-04,\n 1.2327e-04, 1.9341e-04, 2.0584e-04, 1.3991e-04, 1.8155e-04, 2.1926e-04,\n 1.4031e-04, 2.4096e-04, 1.2949e-04, 1.6654e-04, 9.0166e-05, 1.0871e-04,\n 1.0447e-04, 1.7262e-15], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(33786.)",
22
+ "exp_avg": "tensor([-3.3279e-04, -1.0597e-03, -2.5859e-04, -1.2833e-03, 9.3163e-04,\n -1.5813e-05, 2.4990e-03, 2.1171e-03, -1.9629e-03, 1.8787e-03,\n -3.4636e-04, -2.1274e-03, -8.1621e-04, 1.8384e-03, -1.4522e-03,\n 1.4199e-03, 1.7861e-03, 6.6394e-04, -2.2498e-03, -1.3635e-03,\n -6.4393e-04, 6.6040e-04, 1.6461e-03, 1.3942e-03, -2.7430e-03,\n -2.0864e-03, 2.1844e-04, 1.6205e-03, -6.6254e-05, -1.5677e-03,\n 4.0642e-04, 1.9686e-05, -4.7964e-03, 1.0339e-03, -1.0643e-03,\n 4.4470e-04, -3.7466e-04, 1.0769e-03, 1.8200e-03, -7.9994e-04,\n -7.4419e-04, -3.4621e-03, 5.6052e-45, 2.0476e-03, -3.4557e-03,\n -1.3963e-04, -4.7333e-04, 4.9394e-04, -1.0130e-03, -3.0114e-03,\n 3.7808e-03, 1.0013e-03, -8.8420e-04, -1.7372e-03, -5.4263e-04,\n -1.4868e-03, -1.2616e-03, 6.2421e-04, 5.1222e-04, -6.7908e-04,\n -7.1210e-04, -3.6268e-04, 2.2161e-03, 1.4457e-03, 1.4206e-03,\n 2.7738e-03, -7.4872e-04, 6.0113e-04, 1.8825e-03, 1.3278e-03,\n 3.0882e-03, -3.9327e-04, 1.8025e-03, 3.2988e-04, 8.1745e-04,\n 1.3136e-04, 1.6329e-04, -1.7318e-03, -1.1455e-03, -3.8174e-03,\n -1.7043e-04, -9.4439e-04, -1.4184e-03, -7.6649e-04, -2.9839e-03,\n -2.8994e-03, 7.5506e-04, -5.3296e-03, -1.8493e-03, 2.2285e-03,\n 4.1237e-03, -9.0881e-04, 1.5775e-03, 4.3136e-04, -1.7189e-03,\n 8.9071e-04, 1.7861e-03, 9.6422e-04, 3.4010e-03, 8.9791e-04,\n 2.8716e-04, 2.8269e-03, 2.8205e-03, -2.1034e-03, -2.2068e-04,\n 3.0500e-03, -2.8011e-03, -1.5949e-04, -3.1659e-04, -1.1244e-03,\n -5.0784e-03, -2.5609e-03, -9.1347e-04, 1.1636e-04, 3.3695e-03,\n -8.9110e-04, -6.6268e-04, -3.9785e-03, -1.1130e-03, -1.1422e-03,\n 2.0088e-03, 3.5547e-03, 1.0227e-04, 2.4055e-03, 1.2435e-03,\n -2.6943e-04, 5.6134e-04, 2.0395e-03, -3.5165e-03, 3.3470e-04,\n 1.2450e-03, 6.9380e-04, 3.1312e-03, 3.1100e-04, 6.2462e-04,\n -9.3954e-04, 1.0538e-03, -5.3342e-04, 6.3466e-04, 2.0352e-03,\n 9.5023e-04, 2.0548e-03, 2.1894e-03, -1.3350e-03, -4.9047e-03,\n 1.9900e-03, -6.4725e-05, 1.0022e-03, -1.3421e-03, 1.3547e-03,\n 9.8762e-04, -2.2277e-04, 9.9476e-04, 5.8482e-04, 1.5200e-03,\n -9.0580e-04, -2.7948e-03, 1.0063e-03, 5.3908e-04, 4.8812e-04,\n -5.3148e-04, 4.7560e-04, 1.7905e-03, 1.8594e-03, 2.9355e-04,\n 1.1813e-03, -5.6289e-04, 2.3252e-03, 8.5905e-05, 3.5569e-04,\n -4.0454e-04, 1.0119e-03, -1.6496e-03, -1.6074e-03, 3.1952e-03,\n 4.3365e-04, -2.9612e-03, 1.6075e-03, -2.5167e-03, 2.1435e-03,\n 1.2433e-04, 9.2633e-04, 2.5839e-03, 5.0839e-04, 2.6874e-05,\n -4.5193e-04, 1.4535e-03, -4.2117e-04, 1.3198e-03, 1.6278e-03,\n 9.7827e-04, -1.0906e-04, -2.9283e-03, -1.6979e-04, -1.8377e-03,\n 4.2543e-04, -8.8731e-04, 5.7618e-04, 1.2317e-03, -6.6211e-04,\n 1.4341e-03, -1.5565e-03, -9.7435e-04, 1.1048e-05, -3.9391e-04,\n -2.6094e-03, -1.8834e-04, -8.5813e-04, -2.2470e-03, -2.0339e-03,\n -4.5588e-04, -1.1902e-03, -9.0308e-04, -5.2026e-04, 1.6996e-03,\n -8.7634e-04, -2.4437e-03, -5.3384e-04, 2.3023e-04, 5.0528e-04,\n -9.7525e-04, -8.7930e-04, 1.1750e-03, 1.1878e-03, 3.1101e-03,\n -1.2037e-03, -3.6700e-04, 2.0515e-03, -8.3650e-04, 4.1305e-04,\n 8.8932e-04, -3.1630e-03, -2.9355e-03, 2.8497e-03, -5.5448e-04,\n 3.5172e-04, 1.6862e-03, 5.6052e-45, 2.2334e-03, -2.4839e-05,\n -2.7272e-03, -1.7457e-04, 2.5125e-03, -2.5375e-04, -2.1203e-03,\n 3.0576e-03, -1.4312e-03, 1.6282e-03, 4.1400e-04, -1.8250e-04,\n -1.4479e-03, 1.2723e-03, -2.6014e-03, -1.0144e-03, 4.4436e-04,\n 1.0763e-03, 1.3126e-03, -9.4914e-04, -6.4021e-04, 7.9008e-04,\n 1.0724e-04, -1.4161e-03, -2.1945e-05, 4.8063e-03, -1.1636e-03,\n -1.1295e-03, -4.1809e-04, 3.7793e-04, -1.0903e-03, 4.1531e-03,\n -1.5955e-03, -6.7415e-04, -5.3504e-04, -1.1914e-03, 1.9585e-03,\n -3.2446e-04, -9.8567e-04, 2.3097e-03, 3.9189e-03, -8.9597e-04,\n -7.4549e-04, -2.5415e-04, -2.7581e-03, 1.4484e-03, -3.6337e-04,\n 9.2442e-04, -1.8823e-03, 2.1307e-03, 3.5029e-03, -1.2673e-03,\n 2.2015e-03, 8.4123e-04, 5.5887e-04, -2.7555e-04, -2.6788e-04,\n -2.8653e-04, -1.3068e-03, 1.2928e-03, 4.4357e-03, 3.5733e-04,\n -2.0288e-03, 1.1465e-04, -1.2784e-04, -1.5071e-04, 1.6221e-03,\n -1.7772e-03, 1.2772e-03, -1.6121e-03, -2.6474e-03, 3.9361e-03,\n -1.5099e-03, 2.8346e-03, -1.9671e-03, -3.9762e-04, 3.0985e-03,\n 1.6454e-03, 1.8125e-03, 1.1562e-03, -4.9818e-04, 5.6465e-04,\n -2.6978e-03, -2.9189e-03, -2.0374e-03, 3.1564e-03, -3.1068e-04,\n -8.2319e-04, 1.3739e-03, 2.0743e-03, 1.6215e-03, 3.3594e-04,\n 8.0072e-05, 2.8403e-04, -6.4317e-04, -1.9779e-03, -9.3284e-04,\n 6.5223e-04, -7.8817e-04, 1.1788e-03, 2.1699e-03, -1.6471e-03,\n 1.1768e-03, -6.2059e-04, 3.7919e-04, -4.4896e-04, 5.0649e-04,\n 3.7622e-03, 2.9485e-03, 1.9965e-03, -5.9569e-04, 4.8832e-04,\n 4.7023e-04, -1.2395e-03, -1.3929e-03, -1.8348e-03, 1.9830e-03,\n -7.6642e-04, 1.9453e-03, -1.9711e-03, -3.0374e-03, 5.2914e-04,\n 5.9716e-04, 1.6366e-03, -3.7403e-03, 1.1292e-03, -1.9638e-04,\n -5.9520e-04, 1.4739e-03, -3.5750e-03, -4.1887e-03, -8.5936e-04,\n -6.4417e-04, 1.1820e-03, -7.3529e-04, -4.7861e-03, -9.9776e-04,\n 2.2607e-03, 2.2306e-04, -3.1330e-04, -2.1546e-04, 4.1124e-04,\n -2.7176e-03, 5.7918e-04, -4.4128e-03, -2.0297e-03, 2.5342e-04,\n 3.6419e-05, -1.9922e-03, -2.4001e-03, -2.9271e-03, -9.4626e-04,\n -8.9317e-04, 2.4210e-03, 2.5318e-03, 1.5106e-03, -7.1311e-05,\n -1.7083e-04, -7.0803e-04, 9.4234e-04, 1.2080e-06, -1.3762e-04,\n -2.2200e-03, 2.7265e-03, 1.7720e-05, -7.8270e-04, -5.5130e-04,\n -4.9180e-04, -2.3918e-03, 8.5722e-04, 1.9168e-03, 1.3816e-04,\n 3.1509e-04, -1.0114e-04, 2.1237e-03, -1.9205e-03, -7.4738e-04,\n -2.3916e-03, -1.2097e-03, -1.2344e-04, 2.8174e-03, 6.4609e-04,\n 1.8453e-04, -2.5646e-03, 1.9061e-03, 1.2537e-03, -2.8866e-04,\n -1.2626e-03, 1.6689e-03, 1.6033e-03, 1.8075e-03, -5.5487e-04,\n -3.8532e-04, 1.6391e-05, -7.0286e-04, 2.7701e-03, -2.6359e-03,\n 2.2969e-04, 1.3829e-03, 7.2024e-04, -2.5696e-04, -2.4859e-04,\n -3.8842e-04, 1.3661e-03, 2.6077e-03, -4.5137e-03, 1.7076e-03,\n -4.9623e-05, 1.4353e-03, 2.6706e-03, 1.2531e-03, -4.4211e-04,\n 1.3484e-03, 2.5076e-04, -7.0974e-04, 1.3869e-03, -4.2019e-04,\n 3.2804e-04, -8.7216e-04, -3.0217e-03, 4.3498e-04, -1.3306e-03,\n 2.7053e-03, -1.3033e-03, 1.3192e-03, 1.2092e-04, -3.4289e-04,\n 9.2060e-04, 1.9389e-03, 1.0108e-03, -1.5533e-03, -4.9388e-04,\n 3.6353e-04, 1.0232e-03, 4.2737e-03, 1.5600e-03, -1.8705e-03,\n -1.6505e-03, 6.9375e-04, -9.9822e-04, -1.0224e-03, 7.6707e-04,\n 1.2328e-03, -4.6974e-04, 8.9333e-04, -1.0307e-03, 5.0452e-04,\n -1.1920e-04, -3.3833e-03, -1.3219e-03, -9.7030e-04, 3.1755e-03,\n -1.8273e-04, 9.9352e-04, -3.0239e-03, -2.4323e-03, 2.6475e-03,\n 1.6612e-03, 2.0134e-04, 3.9166e-04, -2.2391e-04, 3.1888e-04,\n 1.2840e-03, -3.3484e-03, 1.3525e-03, 2.4825e-03, -1.2371e-03,\n -2.0933e-03, -6.2974e-05, 1.1197e-03, 6.0984e-04, 9.5509e-07,\n -2.1335e-04, 5.6052e-45], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([6.4806e-05, 4.3480e-05, 3.5192e-05, 3.5335e-05, 2.5271e-05, 6.5255e-05,\n 4.3338e-05, 7.6170e-05, 5.0814e-05, 6.7398e-05, 8.3245e-05, 7.5811e-05,\n 9.2656e-05, 3.6594e-05, 6.5682e-05, 6.6398e-05, 4.6893e-05, 4.9612e-05,\n 4.5021e-05, 6.6261e-05, 5.4968e-05, 2.8410e-05, 3.0432e-05, 6.5059e-05,\n 1.2749e-04, 6.2530e-05, 3.7457e-05, 7.6805e-05, 5.4900e-05, 5.3086e-05,\n 3.0750e-05, 3.4319e-05, 6.3123e-05, 5.9890e-05, 3.0847e-05, 4.7393e-05,\n 3.4124e-05, 3.0579e-05, 3.4262e-05, 2.7607e-05, 2.7998e-05, 2.8418e-05,\n 2.8237e-20, 7.7438e-05, 1.2972e-04, 3.2290e-05, 4.0348e-05, 1.5280e-05,\n 6.5263e-05, 1.0435e-04, 1.2447e-04, 2.8034e-05, 3.0540e-05, 5.0855e-05,\n 6.3546e-05, 3.7134e-05, 4.5853e-05, 5.0561e-05, 3.8518e-05, 3.1814e-05,\n 7.7304e-05, 3.6304e-05, 4.9270e-05, 3.8020e-05, 2.9470e-05, 2.9376e-05,\n 4.6853e-05, 3.0879e-05, 5.3720e-05, 3.5002e-05, 4.9490e-05, 9.2424e-05,\n 3.5198e-05, 3.9427e-05, 3.7218e-05, 4.1293e-05, 3.6343e-05, 3.9728e-05,\n 5.9582e-05, 6.2460e-05, 5.9556e-05, 4.1734e-05, 3.3025e-05, 2.1699e-05,\n 3.6440e-05, 6.4977e-05, 7.0434e-05, 4.2340e-05, 2.2439e-05, 6.1327e-05,\n 3.9324e-05, 4.7037e-05, 6.7325e-05, 5.2946e-05, 5.3169e-05, 4.7648e-05,\n 7.0279e-05, 4.4195e-05, 6.2814e-05, 4.5161e-05, 4.2218e-05, 5.9686e-05,\n 7.3268e-05, 4.6761e-05, 6.9398e-05, 1.0803e-04, 4.3570e-05, 1.4613e-04,\n 4.0902e-05, 3.9662e-05, 6.2201e-05, 6.3798e-05, 5.0931e-05, 4.5924e-05,\n 5.3194e-05, 3.7371e-05, 5.0632e-05, 7.0283e-05, 2.9640e-05, 5.3727e-05,\n 2.6413e-05, 6.4170e-05, 6.2464e-05, 1.1433e-04, 6.3520e-05, 6.3589e-05,\n 4.3862e-05, 6.6214e-05, 7.1162e-05, 1.3000e-04, 5.6419e-05, 3.7384e-05,\n 7.1354e-05, 4.5134e-05, 1.6520e-04, 3.3752e-05, 5.1104e-05, 5.7983e-05,\n 3.3000e-05, 3.0285e-05, 3.0459e-05, 4.0884e-05, 4.3378e-05, 9.0222e-05,\n 4.4468e-05, 4.7194e-05, 2.7466e-05, 8.3951e-05, 4.7241e-05, 1.0918e-04,\n 2.1505e-05, 2.3639e-04, 3.1028e-05, 1.8137e-04, 2.7889e-05, 3.5759e-05,\n 6.6827e-05, 5.1228e-05, 4.0077e-05, 3.6288e-05, 6.7405e-05, 1.9098e-05,\n 7.0039e-05, 5.0725e-05, 2.0037e-05, 8.6179e-05, 4.5357e-05, 2.3037e-05,\n 1.8764e-05, 5.4263e-05, 3.1090e-05, 6.5867e-05, 4.1979e-05, 9.5810e-05,\n 4.7867e-05, 2.3638e-05, 9.6070e-05, 2.5313e-05, 2.4722e-05, 6.5456e-05,\n 4.2466e-05, 3.8362e-05, 1.2099e-04, 4.3845e-05, 8.6464e-05, 3.7173e-05,\n 1.4561e-04, 3.9891e-05, 4.1949e-05, 1.8793e-05, 3.6665e-05, 5.4839e-05,\n 4.5672e-05, 3.7155e-05, 2.7148e-05, 2.0044e-05, 6.9953e-05, 3.1177e-05,\n 1.8031e-05, 2.2872e-05, 3.4766e-05, 4.7032e-05, 3.6639e-05, 2.3255e-05,\n 2.3757e-05, 4.5472e-05, 3.0891e-05, 2.7542e-05, 6.2121e-05, 2.5337e-05,\n 5.4207e-05, 7.2276e-05, 2.3639e-05, 3.4250e-05, 5.7860e-05, 2.6144e-05,\n 2.1666e-05, 4.6958e-05, 4.7763e-05, 7.7295e-05, 2.9207e-05, 3.2532e-05,\n 3.4164e-05, 2.7856e-05, 4.0847e-05, 7.9518e-05, 2.8176e-05, 2.2870e-05,\n 2.1696e-05, 4.4458e-05, 3.5885e-05, 3.3629e-05, 9.9216e-05, 4.9536e-05,\n 2.8766e-05, 3.8240e-05, 4.0613e-05, 7.8410e-21, 3.8749e-05, 1.1537e-04,\n 3.2919e-05, 8.8917e-05, 6.0590e-05, 7.5031e-05, 8.6656e-05, 6.4765e-05,\n 4.7092e-05, 3.1633e-05, 5.9537e-05, 3.4867e-05, 9.1267e-05, 2.4185e-05,\n 7.4472e-05, 6.6069e-05, 3.0786e-05, 4.5269e-05, 5.9238e-05, 5.4980e-05,\n 4.0353e-05, 5.0959e-05, 4.4615e-05, 6.7119e-05, 3.9159e-05, 5.8476e-05,\n 4.6246e-05, 4.7014e-05, 3.0675e-05, 5.2099e-05, 3.9015e-05, 6.7392e-05,\n 7.3602e-05, 5.6864e-05, 2.5072e-05, 5.1555e-05, 2.9009e-05, 3.9384e-05,\n 5.0101e-05, 7.4945e-05, 4.0414e-05, 4.1621e-05, 5.5686e-05, 2.6355e-05,\n 2.9506e-05, 4.8381e-05, 3.5133e-05, 5.1399e-05, 5.2345e-05, 3.0222e-05,\n 9.5692e-05, 4.2889e-05, 2.7861e-05, 2.7992e-05, 4.4932e-05, 2.6583e-05,\n 4.1154e-05, 3.0135e-05, 4.3828e-05, 5.2873e-05, 1.4352e-04, 4.3699e-05,\n 2.9690e-05, 5.0594e-05, 5.1894e-05, 4.0252e-05, 6.0497e-05, 3.0294e-05,\n 3.6561e-05, 4.9276e-05, 1.1663e-04, 6.3940e-05, 7.5781e-05, 4.9744e-05,\n 2.9482e-05, 3.1275e-05, 4.8491e-05, 4.1429e-05, 4.2653e-05, 2.0444e-05,\n 3.9068e-05, 7.1896e-05, 1.4489e-04, 8.4950e-05, 5.2902e-05, 5.9046e-05,\n 8.0318e-05, 1.1070e-04, 5.3866e-05, 8.1568e-05, 3.9970e-05, 4.6435e-05,\n 9.1255e-05, 5.8962e-05, 1.6177e-05, 3.6684e-05, 3.6135e-05, 4.1342e-05,\n 3.1935e-05, 2.5850e-05, 6.3300e-05, 4.6223e-05, 4.4343e-05, 3.3005e-05,\n 4.0174e-05, 7.4234e-05, 6.5627e-05, 8.8413e-05, 6.0526e-05, 2.8383e-05,\n 4.8775e-05, 7.0203e-05, 4.7168e-05, 4.6913e-05, 4.9886e-05, 3.3963e-05,\n 4.6901e-05, 3.4719e-05, 4.4223e-05, 2.6880e-05, 3.2837e-05, 2.4786e-05,\n 2.3029e-05, 1.1336e-04, 1.0570e-04, 5.9320e-05, 4.8908e-05, 4.5736e-05,\n 2.0099e-05, 6.6545e-05, 2.3845e-05, 2.9542e-05, 2.6321e-05, 5.0067e-05,\n 5.5006e-05, 5.3926e-05, 4.9688e-05, 3.7495e-05, 2.6862e-05, 2.3063e-05,\n 3.7916e-05, 3.0487e-05, 4.8934e-05, 4.3315e-05, 6.8586e-05, 3.7822e-05,\n 4.6759e-05, 1.4043e-04, 3.4510e-05, 4.5980e-05, 4.4416e-05, 4.0072e-05,\n 4.3655e-05, 4.0147e-05, 5.0221e-05, 6.8663e-05, 3.9068e-05, 2.1226e-05,\n 2.9911e-05, 7.4104e-05, 5.6761e-05, 8.6577e-05, 3.7688e-05, 2.6706e-05,\n 7.7720e-05, 3.2907e-05, 4.8580e-05, 3.8524e-05, 1.0651e-04, 1.5477e-05,\n 7.1968e-05, 7.2710e-05, 6.0289e-05, 3.1135e-05, 9.0334e-05, 6.3668e-05,\n 3.6951e-05, 4.7306e-05, 9.8739e-05, 7.5145e-05, 5.6328e-05, 4.4764e-05,\n 4.9762e-05, 3.6579e-05, 6.8990e-05, 5.5827e-05, 7.3073e-05, 4.6379e-05,\n 6.7584e-05, 4.1478e-05, 4.9229e-05, 3.6551e-05, 4.1130e-05, 2.9273e-05,\n 4.5097e-05, 5.2561e-05, 5.2508e-05, 4.3513e-05, 5.5147e-05, 6.7980e-05,\n 4.9210e-05, 2.9870e-05, 5.6859e-05, 5.5917e-05, 4.3939e-05, 2.0522e-04,\n 6.6670e-05, 5.3651e-05, 6.1366e-05, 4.0138e-05, 6.3458e-05, 5.0054e-05,\n 4.8590e-05, 3.2070e-05, 6.1344e-05, 1.2882e-04, 6.1859e-05, 2.3080e-05,\n 9.2569e-05, 4.0763e-05, 2.4809e-05, 4.1907e-05, 4.8302e-05, 4.0621e-05,\n 3.7213e-05, 4.3323e-05, 4.5604e-05, 3.3011e-05, 3.1869e-05, 5.4907e-05,\n 6.4699e-05, 5.8574e-05, 4.8590e-05, 1.4694e-04, 6.0913e-05, 5.1251e-05,\n 6.1774e-05, 4.5858e-05, 5.8171e-05, 4.3448e-05, 5.8174e-05, 3.1237e-05,\n 2.5073e-05, 7.2967e-05, 4.8866e-05, 3.0463e-05, 3.7426e-05, 4.4904e-05,\n 4.0396e-05, 4.3549e-05, 9.4353e-05, 3.9245e-05, 4.0306e-05, 3.6145e-05,\n 6.8128e-05, 4.4907e-05, 3.2179e-05, 3.9520e-05, 5.0208e-05, 4.2354e-05,\n 3.1725e-05, 4.1203e-05, 4.4977e-05, 2.7590e-05, 3.8122e-05, 4.1651e-05,\n 3.3428e-05, 4.5989e-05, 3.2643e-05, 4.2445e-05, 3.7140e-05, 4.8971e-05,\n 3.2995e-05, 2.7510e-17], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(33786.)",
27
+ "exp_avg": "tensor([[ 4.4424e-06, 6.9913e-07, 4.3340e-06, ..., -5.9821e-06,\n 2.3295e-06, 5.6052e-45],\n [ 1.1622e-06, -1.2881e-05, -5.0208e-07, ..., -3.8914e-06,\n -1.1468e-05, 5.6052e-45],\n [-4.8950e-06, 1.0618e-05, -6.5939e-06, ..., 3.9576e-06,\n -5.8825e-06, -5.6052e-45],\n ...,\n [ 7.2706e-07, 1.9114e-05, 1.7315e-06, ..., -1.1658e-05,\n -8.7889e-06, 5.6052e-45],\n [-6.2016e-06, 7.8928e-06, -6.3559e-06, ..., -4.1526e-06,\n 2.4529e-06, 5.6052e-45],\n [ 1.0339e-05, 2.5878e-05, 1.2719e-05, ..., 3.5464e-06,\n 1.0684e-05, 5.6052e-45]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[6.9047e-10, 1.2330e-09, 6.5321e-10, ..., 1.5000e-09, 4.7258e-10,\n 7.3638e-22],\n [1.0611e-09, 2.3515e-09, 1.5323e-09, ..., 4.5662e-09, 2.2906e-09,\n 4.1409e-21],\n [9.2914e-10, 1.5004e-09, 1.9908e-09, ..., 3.6808e-09, 7.0208e-10,\n 3.7698e-21],\n ...,\n [1.5196e-09, 3.4233e-09, 1.2084e-09, ..., 5.4725e-09, 1.0361e-09,\n 4.2263e-21],\n [1.6989e-09, 4.1254e-09, 1.1584e-09, ..., 3.4083e-09, 1.7616e-09,\n 7.5219e-21],\n [1.9343e-09, 1.9773e-09, 1.0668e-09, ..., 3.5208e-09, 1.5189e-09,\n 4.3427e-21]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(26278.)",
32
+ "exp_avg": "tensor([[-2.0537e-06, -9.5453e-06, -1.6251e-06, ..., -1.3359e-06,\n -2.3479e-06, 5.6052e-45],\n [ 4.6257e-06, -1.2685e-05, -7.0234e-06, ..., 7.8879e-06,\n -9.8097e-06, 5.6052e-45],\n [-3.0923e-06, -5.9677e-06, -1.5499e-06, ..., 8.6730e-06,\n -5.3720e-06, -5.6052e-45],\n ...,\n [-8.4723e-06, -2.9412e-06, -1.2756e-05, ..., -1.3509e-05,\n 6.0123e-07, 5.6052e-45],\n [-1.1595e-06, 2.0304e-05, 6.6591e-08, ..., -5.3187e-06,\n -5.4853e-06, 5.6052e-45],\n [ 1.5452e-05, 1.7914e-05, -1.8065e-06, ..., 4.7031e-06,\n -5.6449e-06, 5.6052e-45]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[8.5266e-10, 1.0792e-09, 5.2682e-10, ..., 1.1036e-09, 6.5896e-10,\n 3.2203e-21],\n [7.1707e-10, 2.2862e-09, 1.4177e-09, ..., 5.3031e-09, 1.9734e-09,\n 2.1062e-20],\n [8.6544e-10, 1.8433e-09, 1.4162e-09, ..., 4.4353e-09, 1.0025e-09,\n 6.1589e-21],\n ...,\n [1.7796e-09, 1.4581e-09, 1.7876e-09, ..., 7.4523e-09, 1.1706e-09,\n 1.2798e-21],\n [1.0140e-09, 2.3840e-09, 9.8166e-10, ..., 2.4948e-09, 1.0021e-09,\n 4.6430e-20],\n [1.8991e-09, 1.4536e-09, 1.0014e-09, ..., 2.5188e-09, 1.8613e-09,\n 4.4419e-22]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(26278.)",
37
+ "exp_avg": "tensor([ 0.0003, -0.0003], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([6.4452e-06, 6.4452e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.00024569294678237997,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.00024569294678237997,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.00024569294678237997,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.00012333423752026375,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 9,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 9,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.00024569294678237997,
149
+ 0.00024569294678237997,
150
+ 0.00024569294678237997,
151
+ 0.00012333423752026375
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 66.38533333333334,
156
+ "best_epoch": 8,
157
+ "scale_accuracies": {
158
+ "256": 65.89133333333334,
159
+ "512": 66.39533333333334
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6,
169
+ 7,
170
+ 8,
171
+ 9
172
+ ],
173
+ "train_loss": [
174
+ 3.9435249049420933,
175
+ 3.3040703793567867,
176
+ 4.3101251841734625,
177
+ 4.185147669827233,
178
+ 4.123004540650211,
179
+ 4.076372152195373,
180
+ 4.03838544134517,
181
+ 4.0064857600531685,
182
+ 3.9818663271297847
183
+ ],
184
+ "train_acc": [
185
+ 54.38726307083047,
186
+ 59.31631083223343,
187
+ 60.291879721118846,
188
+ 61.30111583163371,
189
+ 61.94625681117294,
190
+ 62.46739626189768,
191
+ 62.918183187671865,
192
+ 63.32416721109218,
193
+ 63.64023321445734
194
+ ],
195
+ "val_acc": [
196
+ 61.635333333333335,
197
+ 62.978,
198
+ 64.12,
199
+ 64.73133333333334,
200
+ 65.312,
201
+ 65.66133333333333,
202
+ 66.03133333333334,
203
+ 66.252,
204
+ 66.38533333333334
205
+ ],
206
+ "scale_accs": {
207
+ "256": [
208
+ 61.635333333333335,
209
+ 62.978,
210
+ 63.782,
211
+ 64.34866666666667,
212
+ 64.754,
213
+ 65.17733333333334,
214
+ 65.49933333333334,
215
+ 65.74333333333334,
216
+ 65.89133333333334
217
+ ],
218
+ "512": [
219
+ 63.839333333333336,
220
+ 64.522,
221
+ 65.18466666666667,
222
+ 65.52333333333333,
223
+ 66.02266666666667,
224
+ 66.17,
225
+ 66.39533333333334
226
+ ]
227
+ },
228
+ "lr": [
229
+ 0.00975530705321762,
230
+ 0.00904518046337755,
231
+ 0.00793913236883622,
232
+ 0.00654543046337755,
233
+ 0.005000500000000001,
234
+ 0.0034555695366224513,
235
+ 0.0020618676311637816,
236
+ 0.0009558195366224509,
237
+ 0.00024569294678237997
238
+ ]
239
+ }
240
+ },
241
+ "train_config": {
242
+ "name": "david_training",
243
+ "run_id": "20251012_235237",
244
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
245
+ "model_variant": [
246
+ "clip_vit_b16",
247
+ "clip_vit_laion_b32",
248
+ "clip_vit_b32"
249
+ ],
250
+ "num_classes": 1000,
251
+ "preset": "small_fast",
252
+ "custom_config_path": null,
253
+ "num_classes_override": null,
254
+ "use_belly_override": null,
255
+ "belly_expand_override": null,
256
+ "progressive_training_override": true,
257
+ "scale_warmup_epochs_override": {
258
+ "256": 0,
259
+ "512": 2
260
+ },
261
+ "num_epochs": 10,
262
+ "batch_size": 1024,
263
+ "learning_rate": 0.01,
264
+ "weight_decay": 1e-05,
265
+ "warmup_epochs": 3,
266
+ "use_rose_loss": true,
267
+ "rose_initial_weight": 0.1,
268
+ "rose_max_weight": 0.8,
269
+ "rose_weight_schedule": "adaptive",
270
+ "use_cayley_loss": false,
271
+ "cayley_weight": 0.01,
272
+ "scale_loss_balance": null,
273
+ "use_mixed_precision": false,
274
+ "gradient_clip": 15.0,
275
+ "scheduler_type": "cosine_restarts",
276
+ "min_lr": 1e-06,
277
+ "freeze_strategy": "never",
278
+ "freeze_threshold": 90.0,
279
+ "unfreeze_on_plateau": true,
280
+ "patience": 10,
281
+ "track_gradients": true,
282
+ "gradient_scale_threshold": 1e-05,
283
+ "gradient_scale_multiplier": 10.0,
284
+ "log_interval": 50,
285
+ "val_interval": 1,
286
+ "save_interval": 5,
287
+ "log_fusion_weights": true,
288
+ "log_loss_components": true,
289
+ "save_format": "safetensors",
290
+ "hf_repo": "AbstractPhil/david-shared-space",
291
+ "upload_to_hub": true,
292
+ "base_dir": "./david_training",
293
+ "num_workers": 10,
294
+ "pin_memory": true,
295
+ "prefetch_factor": 4,
296
+ "persistent_workers": true
297
+ }
298
+ }