Update best_model_acc65.31_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc65.31_metadata.json
ADDED
|
@@ -0,0 +1,270 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 4,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(18770.)",
|
| 7 |
+
"exp_avg": "tensor([[ 1.1670e-04, -2.7209e-05, -2.0383e-05, ..., 3.3975e-05,\n 6.8323e-06, -5.2677e-05],\n [-3.5774e-05, 1.2501e-04, -1.3141e-05, ..., -6.3927e-05,\n -3.1095e-05, -1.1841e-05],\n [-2.3813e-05, -3.3120e-05, 6.5585e-05, ..., 4.7654e-06,\n -1.0026e-05, 1.6326e-06],\n ...,\n [-1.5197e-05, -5.3045e-05, -1.8704e-05, ..., 3.4067e-05,\n -1.7358e-05, 3.4572e-05],\n [ 4.5181e-05, -9.2853e-05, 7.9375e-05, ..., -1.4000e-05,\n -8.7274e-06, 1.0942e-06],\n [ 5.6316e-06, 6.8053e-05, -2.1784e-05, ..., -7.3016e-06,\n -3.5426e-07, 5.8325e-06]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.3601e-07, 1.2344e-07, 5.4351e-08, ..., 5.8804e-08, 3.1725e-08,\n 2.7670e-08],\n [5.5360e-08, 2.1679e-07, 8.3248e-08, ..., 6.1213e-08, 2.7403e-08,\n 2.7557e-08],\n [2.5135e-08, 3.7140e-08, 2.7207e-08, ..., 5.9421e-08, 1.4152e-08,\n 1.5807e-08],\n ...,\n [3.8743e-08, 3.5310e-07, 4.8375e-08, ..., 8.1358e-08, 2.4638e-08,\n 3.7413e-08],\n [7.0078e-08, 1.3988e-07, 6.6127e-08, ..., 5.7854e-08, 2.9637e-08,\n 3.2361e-08],\n [5.8216e-09, 2.0864e-08, 1.0414e-08, ..., 5.7655e-09, 2.5222e-09,\n 4.2160e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(18770.)",
|
| 12 |
+
"exp_avg": "tensor([-8.7114e-05, -1.0671e-03, -5.9770e-04, 4.4297e-04, 1.1371e-03,\n -2.6867e-03, -1.4777e-04, 7.9944e-04, -2.7325e-03, -8.0165e-04,\n 4.8585e-03, -2.0923e-03, -5.2535e-04, -1.8068e-03, 8.9153e-04,\n 1.7946e-03, 1.3469e-03, 2.5790e-04, 7.2194e-04, 1.6209e-04,\n -4.5186e-04, -4.8122e-04, -6.9842e-05, -1.3003e-03, -7.2484e-04,\n 2.0380e-03, -7.7850e-04, -6.8494e-05, -2.2625e-04, 6.1100e-04,\n -3.5591e-04, -2.0754e-05, -1.1110e-03, 2.0911e-03, 2.3545e-03,\n 1.3282e-03, 1.8799e-03, 1.4013e-03, -1.7487e-03, -1.4708e-03,\n -1.0386e-03, 1.0415e-03, -3.1989e-04, 1.3782e-03, -3.6218e-03,\n -1.0446e-04, -8.1872e-04, -6.3205e-04, 2.4303e-03, 1.8721e-03,\n -1.5025e-03, -1.2830e-03, -6.3791e-04, -4.0306e-03, 3.5478e-03,\n 7.8553e-04, 2.4549e-03, -6.2769e-04, 2.5188e-04, -2.5523e-03,\n -1.3315e-03, 1.0440e-03, -9.0034e-04, 1.7092e-03, -2.0730e-03,\n -2.6747e-03, 2.6960e-03, -1.1541e-03, -4.4711e-04, 4.2348e-04,\n 2.0411e-03, 5.4930e-04, -9.2698e-04, 2.3569e-03, 2.2975e-03,\n 1.3919e-03, -1.8671e-03, 9.7326e-04, -4.3026e-04, -5.8683e-04,\n 1.1419e-03, -4.6305e-03, -5.1766e-04, 1.9889e-03, -1.8061e-03,\n -9.1163e-04, 7.5826e-04, 6.4798e-04, -1.7126e-03, 2.0327e-03,\n 7.7093e-04, 2.0129e-03, 1.5037e-03, -1.0086e-03, 2.8177e-03,\n -3.2447e-04, -3.4606e-03, 9.3328e-04, 1.8499e-03, -8.1479e-04,\n -1.2676e-04, 5.9705e-04, -5.0897e-04, 1.1093e-03, -8.8533e-04,\n 1.3984e-03, -1.6115e-03, 1.8578e-03, 1.1144e-03, 2.0511e-03,\n -4.4371e-03, -1.3880e-03, 3.0332e-03, -2.7970e-03, -2.4407e-04,\n -1.3822e-03, -7.0175e-04, 4.3340e-04, 1.0294e-03, 1.4333e-03,\n 9.6525e-05, -1.8095e-03, 5.7985e-05, 3.8496e-04, -1.0216e-03,\n -1.6363e-04, -7.8637e-04, -8.1473e-04, -1.4170e-04, 1.1744e-03,\n -3.5600e-03, 1.9474e-03, 1.0081e-03, -1.2663e-03, 2.0946e-03,\n -3.0687e-03, -8.6380e-04, -1.7192e-05, 2.1045e-03, 1.5639e-04,\n -2.8941e-04, -6.5655e-04, 2.3248e-03, 2.0626e-03, -2.6768e-03,\n 7.8914e-04, -1.3990e-04, -1.4783e-03, -1.4784e-03, -2.5981e-03,\n -6.1377e-04, -1.4867e-03, -1.5474e-03, 8.1107e-04, -5.2134e-04,\n 2.7955e-03, -9.0896e-04, -2.1407e-03, 1.0430e-03, -4.9742e-04,\n 5.5373e-04, -3.7892e-03, -2.0581e-04, 3.0295e-03, 2.8193e-04,\n 4.6840e-03, -4.0096e-03, -7.2779e-04, 8.8777e-04, -8.2696e-04,\n 1.6930e-03, -1.5556e-04, 8.2315e-04, 7.3977e-04, -4.1547e-04,\n -3.1975e-03, -1.8502e-03, 1.3620e-03, -6.6333e-04, 2.9344e-03,\n -3.4915e-04, -5.5147e-04, -1.0284e-04, 3.6782e-03, 1.2041e-03,\n -2.5119e-04, 2.4818e-03, -1.3543e-04, -1.1512e-03, -3.2622e-05,\n -2.3225e-04, -5.9698e-04, 3.7978e-03, 6.4166e-04, -2.7230e-04,\n 5.1193e-04, 1.9635e-04, -1.0264e-03, -1.2978e-03, -1.5340e-04,\n 2.0176e-03, -5.7440e-04, -2.1778e-04, -4.0735e-04, 1.6509e-04,\n -7.3806e-06, 2.6577e-03, -1.4410e-03, 2.3415e-04, 4.3100e-04,\n -5.3372e-04, 1.9784e-04, -2.8977e-03, 8.1260e-04, -1.7063e-03,\n 8.1576e-04, -1.4069e-03, 5.5053e-04, 1.0569e-03, 2.0052e-03,\n 4.6305e-04, -2.1633e-03, 2.2135e-03, -8.4999e-05, -1.3704e-03,\n -4.3528e-03, 2.6774e-03, -3.3057e-03, 1.0360e-03, -1.1391e-03,\n 2.8348e-04, 2.7494e-03, -1.2331e-03, 1.2742e-03, -1.1065e-03,\n 1.5601e-03, 2.0791e-03, 6.1973e-04, 4.8743e-04, 4.1067e-04,\n -8.1124e-04, 9.7425e-04, -1.3204e-04, -1.4745e-03, 1.3149e-03,\n -1.6826e-04, 5.7581e-04, 1.2348e-03, -1.0848e-03, -2.7405e-04,\n -2.3924e-04, -6.4631e-04, -7.4192e-04, 1.1146e-04, -3.0110e-04,\n -1.5015e-03, 2.2488e-04, -7.4490e-04, -1.6838e-03, -1.6911e-03,\n 1.4764e-03, -2.0289e-04, -2.5809e-03, -2.7158e-03, 1.6070e-03,\n -1.4105e-03, 3.6011e-04, 1.5683e-03, 1.5275e-04, -1.8426e-03,\n -2.8383e-04, 1.0685e-03, -1.3169e-03, 8.6504e-04, -4.8456e-04,\n -4.5773e-04, -5.0025e-04, 2.1435e-03, 1.2601e-03, 9.4405e-05,\n -5.2945e-04, 7.9902e-04, -7.2858e-04, -3.2680e-03, -7.0442e-04,\n 2.0522e-03, -1.1665e-03, 1.2132e-04, -1.3856e-03, 5.9692e-05,\n -4.6359e-04, 1.2629e-03, 9.8488e-04, 6.4743e-04, 3.1004e-03,\n -7.4975e-05, 1.5029e-03, -6.3181e-04, 4.3397e-04, 5.3480e-04,\n -1.8790e-04, -1.7018e-03, -9.1772e-04, -9.9635e-04, 2.7431e-03,\n 9.0998e-04, -1.5125e-03, 2.8346e-03, 3.4903e-03, 1.6485e-04,\n 2.3381e-03, -2.9366e-04, -1.1572e-03, 9.4745e-04, -1.6023e-03,\n -6.4641e-04, -5.6718e-05, -5.1001e-04, -4.8117e-04, -1.0363e-03,\n -1.0549e-03, 2.2251e-04, -6.6669e-04, -2.5281e-03, -8.9181e-04,\n -3.7092e-03, -4.4747e-04, 2.3508e-03, -1.0893e-03, 1.3922e-03,\n -9.4625e-04, -1.4334e-03, 1.1131e-03, 4.5168e-04, 9.9355e-04,\n -1.1855e-03, 4.2115e-03, 7.6823e-04, 3.2923e-04, -4.9417e-04,\n 3.9203e-04, -4.8724e-05, -1.3906e-04, -1.1709e-03, -4.6085e-03,\n 2.2029e-03, -2.5025e-04, -1.3295e-03, 1.2292e-03, -8.3139e-04,\n 3.4813e-04, -1.0049e-04, 1.9925e-03, -9.5156e-05, 1.0348e-03,\n 8.8039e-04, 1.1125e-03, -1.4958e-03, 2.3632e-03, 1.0440e-03,\n 1.8746e-03, 2.1114e-03, 9.8671e-04, 2.7809e-04, -6.3371e-04,\n 5.2318e-04, 3.3256e-03, 1.9659e-03, -8.8919e-04, 9.3690e-04,\n -2.4826e-05, -2.0109e-03, 3.2422e-04, -7.6314e-04, -3.7248e-04,\n -1.7189e-03, -1.9445e-03, 5.2567e-04, 2.9138e-04, -3.6436e-03,\n 5.7268e-04, -1.7486e-03, -4.4197e-04, -2.2207e-03, 1.4421e-03,\n -1.6903e-03, 1.4496e-03, -1.5129e-03, -1.1904e-03, -1.2569e-03,\n -4.0172e-05, -2.3737e-03, -3.0015e-03, 2.2099e-03, -3.6720e-03,\n 1.6649e-03, -3.4957e-04, 2.7790e-03, 1.2495e-04, -1.2046e-03,\n -3.8337e-04, 9.8383e-04, 2.9468e-04, 1.1953e-03, 9.4996e-04,\n -4.3448e-04, 1.7607e-04, 1.3189e-03, -3.5588e-04, -3.8327e-04,\n 2.6508e-04, -2.2680e-03, 4.2400e-05, 2.0437e-03, -8.5429e-04,\n -6.5838e-04, -1.6694e-03, -8.0171e-04, 1.5630e-03, 1.6335e-03,\n 1.8829e-03, -1.1960e-03, 1.7664e-03, -5.5749e-04, 1.9340e-03,\n 3.1618e-03, 9.1274e-05, -2.6503e-03, 2.1695e-04, -3.3101e-04,\n 1.5715e-03, -6.7334e-04, 2.2635e-03, 4.7405e-04, 2.0058e-03,\n 4.5084e-04, 9.1772e-04, 1.6450e-03, -4.3551e-04, -1.2727e-03,\n 8.7450e-04, -5.8248e-04, 3.2004e-04, -1.5004e-04, -1.2935e-03,\n 1.6481e-04, 6.2143e-05, -5.8269e-05, 2.1367e-04, -2.3519e-03,\n -5.9157e-04, -1.1257e-03, -4.0488e-05, 3.2874e-05, -8.5714e-04,\n -5.0191e-04, -2.7826e-03, 1.2522e-03, 4.9352e-04, -1.1335e-03,\n -1.1432e-03, 5.5174e-04, 1.3369e-03, 1.6404e-04, -3.6526e-04,\n 1.5371e-03, 2.1475e-04, 1.7333e-03, 5.6394e-04, -2.3304e-03,\n 1.6476e-03, -3.3416e-03, 1.9070e-03, -2.6396e-03, 9.8974e-04,\n 2.0802e-03, 1.7136e-03, -7.5578e-04, 5.4716e-04, 1.4881e-04,\n 1.4052e-03, 1.0005e-04, 6.2443e-05, 3.8118e-04, 1.8223e-03,\n -1.3691e-03, 2.4477e-04, -1.3063e-03, -1.0064e-03, -1.9149e-03,\n 3.0326e-04, -3.0468e-04, -8.6823e-06, -4.9310e-04, 6.4170e-04,\n 1.6046e-03, 9.0267e-04, 1.5799e-03, 5.8483e-04, -1.1301e-03,\n -1.1726e-04, 1.4626e-03, -9.8653e-04, 2.1993e-03, -1.7290e-04,\n -5.7371e-04, 1.2332e-04, -3.1937e-03, -2.2325e-03, 7.4068e-04,\n 2.5310e-04, 4.0245e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([4.5000e-05, 4.2282e-05, 2.7863e-05, 3.4922e-05, 4.0149e-05, 6.5842e-05,\n 3.0686e-05, 5.8672e-05, 4.4570e-05, 4.6022e-05, 6.1752e-05, 6.6774e-05,\n 9.2845e-05, 2.1055e-05, 7.5870e-05, 9.2654e-05, 4.3562e-05, 3.7609e-05,\n 4.8855e-05, 5.7775e-05, 3.8818e-05, 3.1604e-05, 3.1395e-05, 6.3339e-05,\n 8.8227e-05, 3.4597e-05, 3.4777e-05, 7.1331e-05, 6.0292e-05, 4.3961e-05,\n 1.9483e-05, 3.4955e-05, 5.4042e-05, 4.0132e-05, 2.5551e-05, 5.8432e-05,\n 3.4190e-05, 2.8843e-05, 2.7133e-05, 2.5608e-05, 3.6912e-05, 2.6678e-05,\n 6.2753e-06, 7.0264e-05, 1.2720e-04, 3.3361e-05, 2.7272e-05, 2.5516e-05,\n 5.0868e-05, 7.5774e-05, 8.1073e-05, 2.9176e-05, 3.2950e-05, 4.6554e-05,\n 5.8438e-05, 3.8028e-05, 3.7875e-05, 9.9667e-05, 3.1473e-05, 3.9962e-05,\n 1.2603e-04, 2.8810e-05, 3.2364e-05, 3.5215e-05, 3.8706e-05, 3.1152e-05,\n 3.3369e-05, 3.4112e-05, 6.0166e-05, 3.8412e-05, 4.0757e-05, 3.5891e-05,\n 3.2531e-05, 3.2896e-05, 3.4562e-05, 3.6247e-05, 4.8380e-05, 3.8759e-05,\n 5.8191e-05, 3.5823e-05, 6.3749e-05, 3.8415e-05, 3.2603e-05, 2.1159e-05,\n 3.8423e-05, 4.4163e-05, 4.8444e-05, 3.2846e-05, 2.6236e-05, 4.4977e-05,\n 2.8291e-05, 6.3991e-05, 3.7552e-05, 4.0643e-05, 4.2125e-05, 4.9002e-05,\n 5.9792e-05, 3.8219e-05, 3.8511e-05, 2.0663e-05, 4.7026e-05, 7.3988e-05,\n 5.8908e-05, 3.4363e-05, 6.8136e-05, 6.3323e-05, 4.0708e-05, 9.8518e-05,\n 3.3611e-05, 4.5643e-05, 5.9627e-05, 4.7983e-05, 3.9139e-05, 4.8987e-05,\n 4.8243e-05, 4.3451e-05, 5.7381e-05, 4.6760e-05, 3.7500e-05, 3.8501e-05,\n 1.6146e-05, 6.1097e-05, 4.4404e-05, 5.6476e-05, 3.1531e-05, 4.8250e-05,\n 3.5694e-05, 5.0482e-05, 4.8215e-05, 7.1420e-05, 4.4727e-05, 4.0175e-05,\n 4.9905e-05, 4.1862e-05, 9.6770e-05, 3.5746e-05, 2.6631e-05, 4.1634e-05,\n 3.4585e-05, 4.6316e-05, 1.7487e-05, 3.4440e-05, 4.7031e-05, 9.8270e-05,\n 4.9996e-05, 4.2569e-05, 2.4528e-05, 5.0911e-05, 4.8964e-05, 9.2989e-05,\n 2.1317e-05, 4.9513e-05, 2.6876e-05, 7.4590e-05, 2.8217e-05, 3.5741e-05,\n 5.6205e-05, 4.2081e-05, 3.2223e-05, 3.1095e-05, 5.1205e-05, 1.4715e-05,\n 4.0076e-05, 7.1116e-05, 1.7308e-05, 6.5235e-05, 3.9449e-05, 1.9551e-05,\n 1.7582e-05, 3.9291e-05, 3.0888e-05, 5.2939e-05, 4.4203e-05, 3.6261e-05,\n 3.6714e-05, 2.6531e-05, 8.0492e-05, 2.5993e-05, 2.1065e-05, 4.0199e-05,\n 3.1703e-05, 2.9895e-05, 7.4487e-05, 3.9784e-05, 3.4234e-05, 3.6625e-05,\n 1.0234e-04, 2.2981e-05, 2.1133e-05, 1.8179e-05, 3.6311e-05, 5.3139e-05,\n 6.4117e-05, 6.1238e-05, 2.3202e-05, 1.8225e-05, 4.5993e-05, 2.6890e-05,\n 1.9548e-05, 2.1890e-05, 2.4629e-05, 4.4456e-05, 3.3115e-05, 2.0791e-05,\n 4.2468e-05, 4.3928e-05, 3.4961e-05, 7.7769e-05, 5.5419e-05, 2.2784e-05,\n 3.7333e-05, 6.4549e-05, 4.0372e-05, 3.1140e-05, 6.0728e-05, 1.7484e-05,\n 3.0677e-05, 2.4681e-05, 6.4035e-05, 5.2191e-05, 2.5391e-05, 1.8936e-05,\n 3.5888e-05, 1.6704e-05, 2.7824e-05, 5.8820e-05, 2.8567e-05, 2.2089e-05,\n 2.2039e-05, 5.8401e-05, 4.3764e-05, 2.0530e-05, 5.1411e-05, 3.8651e-05,\n 3.6060e-05, 3.4763e-05, 3.6229e-05, 7.1046e-06, 4.4441e-05, 1.0848e-04,\n 3.5328e-05, 7.8436e-05, 3.0536e-05, 5.4793e-05, 8.0590e-05, 5.2585e-05,\n 4.4664e-05, 3.0867e-05, 5.2296e-05, 3.3320e-05, 5.6544e-05, 2.7988e-05,\n 4.3006e-05, 4.7466e-05, 3.8491e-05, 3.5771e-05, 2.8269e-05, 5.5127e-05,\n 5.6030e-05, 2.8340e-05, 2.8805e-05, 7.4845e-05, 2.8050e-05, 5.0373e-05,\n 4.1885e-05, 2.7164e-05, 2.3839e-05, 3.7684e-05, 2.6961e-05, 4.3150e-05,\n 3.9786e-05, 4.1899e-05, 3.4790e-05, 3.1269e-05, 2.3288e-05, 5.1281e-05,\n 2.0493e-05, 6.9801e-05, 3.8501e-05, 2.5935e-05, 3.2291e-05, 2.7665e-05,\n 3.5320e-05, 4.9541e-05, 2.4596e-05, 5.7366e-05, 3.8196e-05, 3.8728e-05,\n 7.1311e-05, 5.0301e-05, 2.6496e-05, 3.4454e-05, 6.5052e-05, 3.3190e-05,\n 5.5089e-05, 2.5445e-05, 2.8218e-05, 5.7068e-05, 1.0408e-04, 4.0579e-05,\n 2.7218e-05, 3.1941e-05, 3.6415e-05, 3.5356e-05, 6.3635e-05, 2.5075e-05,\n 3.8401e-05, 4.0607e-05, 7.8869e-05, 4.0426e-05, 4.9209e-05, 5.2149e-05,\n 3.1405e-05, 2.8046e-05, 5.3106e-05, 3.1438e-05, 3.9627e-05, 1.6635e-05,\n 3.7859e-05, 5.8677e-05, 8.9502e-05, 3.7847e-05, 4.4330e-05, 4.8664e-05,\n 6.2849e-05, 9.7527e-05, 2.9408e-05, 5.7285e-05, 3.7165e-05, 3.6531e-05,\n 3.5392e-05, 5.4713e-05, 1.7073e-05, 2.4382e-05, 3.0614e-05, 3.4934e-05,\n 5.7453e-05, 2.6683e-05, 6.5157e-05, 4.7377e-05, 3.6816e-05, 3.6338e-05,\n 4.4056e-05, 2.6675e-05, 6.4656e-05, 5.4569e-05, 3.9755e-05, 2.5613e-05,\n 4.7705e-05, 4.1896e-05, 3.4630e-05, 4.1750e-05, 3.7088e-05, 3.0822e-05,\n 3.4953e-05, 2.6173e-05, 2.7644e-05, 3.3835e-05, 3.1176e-05, 2.9612e-05,\n 2.3693e-05, 9.6559e-05, 1.1769e-04, 3.8997e-05, 3.8327e-05, 2.6469e-05,\n 4.0355e-05, 5.3881e-05, 2.5225e-05, 3.0099e-05, 2.2620e-05, 4.2656e-05,\n 2.0440e-05, 3.6665e-05, 6.8352e-05, 3.9869e-05, 2.1406e-05, 1.5337e-05,\n 3.0395e-05, 3.1368e-05, 3.5805e-05, 4.9521e-05, 4.9244e-05, 3.2451e-05,\n 3.7380e-05, 9.7539e-05, 3.8251e-05, 5.5611e-05, 4.1999e-05, 4.6016e-05,\n 4.9943e-05, 5.2201e-05, 3.0363e-05, 3.6398e-05, 4.2420e-05, 2.3860e-05,\n 2.1433e-05, 6.9190e-05, 4.5829e-05, 7.1658e-05, 4.6069e-05, 3.1461e-05,\n 5.5582e-05, 3.8300e-05, 4.6061e-05, 3.8053e-05, 8.1406e-05, 2.0948e-05,\n 3.0899e-05, 4.9767e-05, 5.2409e-05, 5.9478e-05, 4.8696e-05, 5.3688e-05,\n 2.8934e-05, 2.5665e-05, 6.0650e-05, 2.1949e-05, 5.6205e-05, 6.9782e-05,\n 4.1736e-05, 3.1846e-05, 5.6679e-05, 5.0731e-05, 4.1420e-05, 4.3384e-05,\n 7.4041e-05, 4.0075e-05, 3.1257e-05, 3.7689e-05, 3.0162e-05, 1.9529e-05,\n 5.9871e-05, 4.4170e-05, 3.3875e-05, 3.3342e-05, 5.9975e-05, 4.6380e-05,\n 4.0336e-05, 4.1431e-05, 4.6622e-05, 6.8852e-05, 3.5951e-05, 1.3069e-04,\n 5.1298e-05, 4.8626e-05, 2.6749e-05, 3.9951e-05, 3.4089e-05, 4.4607e-05,\n 4.0001e-05, 4.0609e-05, 4.5979e-05, 6.8458e-05, 3.2512e-05, 1.9742e-05,\n 7.9694e-05, 4.3847e-05, 2.0786e-05, 5.1880e-05, 4.2846e-05, 3.0079e-05,\n 3.5209e-05, 3.1507e-05, 7.7864e-05, 3.1980e-05, 3.4484e-05, 3.9419e-05,\n 5.9365e-05, 5.2315e-05, 4.6967e-05, 7.4267e-05, 5.2325e-05, 4.9220e-05,\n 4.5037e-05, 5.0081e-05, 4.4680e-05, 3.8713e-05, 3.7466e-05, 3.1465e-05,\n 2.3324e-05, 3.4035e-05, 4.9091e-05, 4.0949e-05, 3.3359e-05, 4.0017e-05,\n 5.9872e-05, 3.8569e-05, 7.9766e-05, 4.0301e-05, 3.7522e-05, 4.1185e-05,\n 5.6083e-05, 2.6982e-05, 2.1813e-05, 3.5390e-05, 4.6537e-05, 2.4316e-05,\n 2.6375e-05, 3.0492e-05, 4.8153e-05, 2.7536e-05, 4.3996e-05, 3.2686e-05,\n 3.3091e-05, 1.9479e-05, 3.3499e-05, 4.5411e-05, 3.7737e-05, 4.1878e-05,\n 4.1193e-05, 3.9989e-06], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(18770.)",
|
| 17 |
+
"exp_avg": "tensor([-6.2841e-04, -2.0829e-03, -1.0760e-03, 1.9077e-03, 2.2635e-03,\n -5.4141e-03, 1.2664e-04, 2.9474e-03, -6.5296e-03, -3.0293e-03,\n 1.2228e-02, -3.6974e-03, -1.4171e-03, -5.6383e-03, 1.3007e-03,\n 6.2755e-03, 1.9756e-03, 4.1319e-04, 2.5784e-03, -4.7915e-04,\n -9.0285e-04, -8.5306e-04, -1.2527e-03, -2.3325e-03, -2.3879e-03,\n 4.6125e-03, -3.1698e-03, 5.6987e-04, -1.1644e-03, 1.1570e-03,\n -5.2069e-04, -3.8061e-04, -2.5037e-03, 4.3506e-03, 5.7330e-03,\n 2.1819e-03, 4.3934e-03, 3.0179e-03, -4.8161e-03, -3.1370e-03,\n -2.0526e-03, 2.2723e-03, -5.6052e-45, 2.1100e-03, -6.2575e-03,\n -3.4335e-04, -6.4094e-04, -1.6879e-03, 6.6019e-03, 3.1002e-03,\n -4.3202e-03, -1.6735e-03, -5.8708e-04, -1.2103e-02, 6.4458e-03,\n 2.0334e-03, 6.7194e-03, -6.2277e-05, 1.0551e-03, -3.7956e-03,\n -1.5626e-03, 1.9797e-03, -2.4924e-03, 2.8876e-03, -4.7105e-03,\n -5.5492e-03, 5.9549e-03, -1.9542e-03, -1.3351e-03, 3.6893e-04,\n 5.9851e-03, -2.6477e-04, -2.9165e-03, 7.3338e-03, 4.1124e-03,\n 2.4193e-03, -4.0481e-03, 2.7782e-03, -2.6270e-03, -2.9838e-03,\n 1.6586e-03, -9.3845e-03, -9.5092e-04, 3.7454e-03, -3.1646e-03,\n -2.0701e-03, 2.4131e-03, 1.3568e-03, -3.7781e-03, 6.4435e-03,\n 1.1486e-03, 3.4667e-03, 4.0578e-03, -1.4234e-03, 8.6508e-03,\n -1.4710e-03, -7.5184e-03, 2.8998e-03, 4.3350e-03, -3.5797e-03,\n -3.0646e-04, 9.2565e-04, -1.2954e-03, 1.9977e-03, -2.3137e-03,\n 4.2899e-03, -3.7108e-03, 4.2645e-03, 3.3982e-03, 4.3333e-03,\n -8.4574e-03, -3.1707e-03, 8.0974e-03, -4.8368e-03, 3.5872e-04,\n -1.5774e-03, -7.4741e-04, 1.9281e-04, 1.8868e-03, 4.2759e-03,\n -1.8797e-04, -1.8613e-03, 7.2391e-04, -3.2510e-04, -3.2871e-03,\n -1.2571e-03, -2.5407e-03, -1.0478e-03, -1.3597e-03, 2.4007e-03,\n -9.2510e-03, 4.9015e-03, 3.0066e-03, -2.5871e-03, 4.7178e-03,\n -5.8226e-03, -2.8077e-03, -5.9951e-04, 5.5224e-03, -4.0413e-04,\n 1.2691e-03, -6.5186e-04, 4.8248e-03, 3.6607e-03, -5.9795e-03,\n 2.0323e-03, 1.7845e-04, -3.2025e-03, -5.1540e-03, -4.7960e-03,\n -1.6131e-03, -3.5588e-03, -3.4318e-03, 2.3010e-03, 1.5096e-04,\n 5.4803e-03, -5.7340e-03, -6.2222e-03, 2.4732e-03, -1.6431e-03,\n -1.2502e-04, -1.0981e-02, -5.0865e-04, 3.8500e-03, 6.7573e-04,\n 1.1605e-02, -1.1136e-02, -1.2940e-03, 1.8649e-03, -3.1956e-03,\n 3.0406e-03, 7.8509e-04, 8.9908e-04, 3.1252e-03, -1.6358e-03,\n -7.8784e-03, -3.1100e-03, 3.3129e-03, -2.0464e-03, 5.7848e-03,\n -8.9584e-04, -1.3203e-03, -1.6253e-03, 8.1897e-03, 3.1027e-03,\n -4.8704e-04, 6.6517e-03, -1.4172e-03, -3.8520e-03, -9.2818e-06,\n -1.1637e-04, -5.2525e-04, 4.9206e-03, 1.2675e-03, -1.2561e-03,\n 6.8915e-04, -1.3682e-04, -3.3271e-03, -2.7129e-03, -1.0387e-03,\n 4.0559e-03, -1.5872e-03, 6.4306e-04, -4.4376e-04, -5.5410e-04,\n -9.0505e-04, 5.6500e-03, -3.6111e-03, -1.1394e-03, 1.6022e-03,\n -1.1447e-03, 2.5182e-03, -1.8952e-03, 2.3398e-03, -3.8124e-03,\n 3.4184e-03, -2.7500e-03, 6.1242e-04, 3.5523e-03, 5.2258e-03,\n 1.1060e-03, -5.8090e-03, 5.6315e-03, -8.4894e-04, -4.6263e-03,\n -9.8869e-03, 5.1261e-03, -7.9740e-03, 1.8628e-03, -1.9387e-03,\n 1.3107e-03, 6.4216e-03, -5.1872e-03, 2.8228e-03, -2.1626e-03,\n 3.6899e-03, 3.7955e-03, 5.6052e-45, 9.0640e-04, 1.7051e-03,\n -2.0203e-03, 1.7477e-03, -4.4129e-04, -2.4483e-03, 4.2207e-04,\n -1.1038e-03, 1.4895e-03, 3.1063e-03, -1.4445e-03, -1.0456e-03,\n -1.1671e-03, -1.6265e-03, -2.6594e-03, 5.8470e-04, -1.0621e-03,\n -2.8048e-03, 9.9045e-04, -9.4656e-04, -2.1199e-03, -5.1353e-03,\n 2.8383e-03, -1.3512e-03, -6.1278e-03, -6.8262e-03, 2.6968e-03,\n -4.8839e-03, 2.1357e-03, 5.0698e-03, -3.3768e-04, -4.5862e-03,\n -1.8821e-03, 3.7222e-03, -1.8631e-03, 1.3804e-03, -7.9416e-04,\n -7.7628e-04, -2.7369e-03, 2.3431e-03, 4.1405e-03, -4.9810e-04,\n -2.6644e-03, 1.9732e-03, -1.9554e-03, -5.8542e-03, -1.8981e-03,\n 4.6116e-03, -4.9970e-03, -2.3072e-04, -4.3257e-03, -4.3848e-04,\n 5.7797e-04, 2.1206e-03, 3.0486e-03, 7.9081e-04, 6.5707e-03,\n 3.8961e-04, 3.6683e-03, -1.2425e-03, 3.8045e-04, 1.3202e-03,\n -6.0321e-05, -3.9443e-03, -2.4617e-03, -3.2283e-03, 5.2759e-03,\n 2.9133e-03, -3.0354e-03, 7.3569e-03, 7.8139e-03, 7.2476e-04,\n 5.6421e-03, -3.2443e-04, -3.1666e-03, 3.0268e-03, -2.3363e-03,\n -8.7422e-04, -8.1137e-04, -1.4393e-03, -1.6162e-03, -2.1127e-03,\n -3.2356e-03, -2.8186e-04, -1.7129e-03, -5.2649e-03, -2.9009e-03,\n -7.3042e-03, -1.8399e-03, 5.2425e-03, -3.2323e-03, 2.6229e-03,\n -2.9864e-03, -4.7965e-03, 2.0635e-03, 1.2600e-03, 2.1387e-03,\n -2.3442e-03, 4.7770e-03, 1.6736e-03, 8.3106e-04, -2.1671e-03,\n 1.3850e-03, 1.0172e-04, -5.1001e-04, -4.2929e-03, -6.3930e-03,\n 4.4945e-03, -1.5526e-03, -2.0451e-03, 1.9502e-03, -3.3331e-03,\n 1.3604e-03, -1.5199e-04, 5.4064e-03, 3.5972e-04, 2.1504e-03,\n 2.4307e-03, 2.6996e-03, -2.5325e-03, 4.9571e-03, 1.2664e-03,\n 5.0832e-03, 4.8683e-03, 2.7553e-03, 4.8577e-04, -1.9661e-03,\n 1.0447e-03, 4.8182e-03, 4.4421e-03, -1.6832e-03, 1.9181e-03,\n -1.9234e-04, -3.3499e-03, -4.1859e-05, 8.4319e-05, -3.5456e-04,\n -3.9364e-03, -5.2115e-03, 2.4594e-03, 1.8926e-03, -5.4021e-03,\n 2.6326e-03, -3.7358e-03, -3.0804e-03, -6.0810e-03, 3.8908e-03,\n -3.1871e-03, 4.3075e-03, -2.9882e-03, -4.4668e-03, -4.2002e-03,\n 3.8008e-04, -4.0944e-03, -7.1189e-03, 6.6650e-03, -8.8251e-03,\n 3.9926e-03, -1.2077e-03, 6.9052e-03, 5.1424e-04, -3.4388e-03,\n -2.4134e-03, 2.8272e-03, -4.2394e-04, 1.4677e-03, 2.3561e-03,\n -9.2097e-04, 8.5265e-04, 1.8332e-03, -1.4288e-03, -7.0048e-04,\n 1.1288e-03, -3.0452e-03, -1.8375e-06, 4.2444e-03, -2.4634e-03,\n -5.1649e-03, -4.9072e-03, -1.8505e-03, 3.3574e-03, 3.0541e-03,\n 4.9673e-03, -1.3854e-03, 6.1860e-03, -2.3990e-03, 8.8950e-03,\n 7.8265e-03, 2.6216e-04, -5.8476e-03, 6.5103e-04, -1.4859e-03,\n 4.1992e-03, -2.0193e-03, 3.2821e-03, -7.4273e-06, 6.7962e-03,\n 7.5516e-04, 2.2679e-03, 3.9021e-03, -2.9506e-03, -1.6173e-03,\n 7.5083e-04, -1.4537e-03, 2.6100e-03, 4.8147e-04, -1.2627e-03,\n 1.0316e-03, 5.4393e-04, 2.5565e-04, 1.0080e-03, -6.3647e-03,\n -2.9111e-03, -2.2477e-03, 2.1716e-03, -5.8308e-04, -3.6707e-03,\n -9.9847e-04, -4.7107e-03, 2.3459e-03, 1.5116e-03, -1.8147e-03,\n -2.6726e-03, 2.0824e-03, 3.5394e-03, 1.3903e-03, -5.8787e-04,\n 3.2703e-03, 1.3235e-03, 3.4369e-03, 1.3303e-03, -4.8695e-03,\n 3.8146e-03, -6.4818e-03, 3.3836e-03, -6.3889e-03, 1.8709e-03,\n 4.9566e-03, 4.5791e-03, -2.2093e-03, 1.2893e-03, 7.7031e-04,\n 2.8337e-03, 8.1085e-04, 1.0222e-03, 6.5198e-04, 5.1953e-03,\n -3.0684e-03, 3.6728e-04, -2.3919e-03, -2.0579e-03, -2.9052e-03,\n 1.0557e-03, -8.1136e-04, -9.9663e-04, -1.1591e-03, 2.1529e-03,\n 4.4585e-03, 9.8930e-04, 4.1659e-03, 1.8648e-03, -3.3042e-03,\n -5.0491e-06, 3.7388e-03, -1.6110e-03, 5.4227e-03, -4.9827e-04,\n -2.1937e-03, -4.1189e-04, -6.7681e-03, -4.1381e-03, 1.6468e-03,\n -2.8481e-04, 5.6052e-45], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([3.5835e-04, 2.0901e-04, 1.0839e-04, 1.8525e-04, 1.2499e-04, 2.8433e-04,\n 1.8117e-04, 2.6305e-04, 2.5035e-04, 3.1592e-04, 3.5600e-04, 3.6734e-04,\n 3.0066e-04, 1.6872e-04, 2.0356e-04, 2.9880e-04, 1.9198e-04, 1.9454e-04,\n 1.9778e-04, 2.7805e-04, 2.5688e-04, 1.0477e-04, 1.2980e-04, 2.8419e-04,\n 4.3711e-04, 2.0977e-04, 1.9988e-04, 3.6763e-04, 2.3764e-04, 2.4425e-04,\n 1.9130e-04, 2.6554e-04, 2.3250e-04, 1.6741e-04, 1.5091e-04, 1.4094e-04,\n 1.2648e-04, 1.5854e-04, 1.2424e-04, 1.6418e-04, 1.1845e-04, 8.9444e-05,\n 5.2826e-12, 2.0488e-04, 4.7626e-04, 1.4564e-04, 1.6777e-04, 6.3598e-05,\n 5.0330e-04, 2.4299e-04, 3.1817e-04, 1.3344e-04, 1.4714e-04, 2.9146e-04,\n 2.1140e-04, 1.8488e-04, 2.9250e-04, 1.1924e-04, 1.4979e-04, 1.1330e-04,\n 2.0891e-04, 2.0178e-04, 2.2558e-04, 1.5031e-04, 1.2747e-04, 1.5508e-04,\n 2.3947e-04, 1.4525e-04, 2.2529e-04, 1.2431e-04, 2.4667e-04, 1.1516e-04,\n 2.5620e-04, 2.6069e-04, 1.6157e-04, 2.1479e-04, 2.3023e-04, 1.6484e-04,\n 2.4444e-04, 2.5793e-04, 2.4780e-04, 1.5461e-04, 1.3586e-04, 9.6088e-05,\n 1.4705e-04, 3.5829e-04, 3.6694e-04, 2.0221e-04, 9.0456e-05, 5.3607e-04,\n 1.9749e-04, 1.7125e-04, 2.3921e-04, 2.7694e-04, 3.3568e-04, 2.5223e-04,\n 2.3986e-04, 2.0774e-04, 2.5159e-04, 4.3541e-04, 2.2705e-04, 2.8419e-04,\n 2.1366e-04, 2.0192e-04, 2.6291e-04, 4.0154e-04, 1.6132e-04, 4.4915e-04,\n 1.9603e-04, 2.1501e-04, 2.1516e-04, 3.1024e-04, 2.4169e-04, 1.4617e-04,\n 2.0294e-04, 1.1381e-04, 1.8166e-04, 2.4911e-04, 1.6148e-04, 2.8437e-04,\n 1.6648e-04, 3.0631e-04, 4.2862e-04, 3.7056e-04, 3.3016e-04, 4.6163e-04,\n 2.2204e-04, 2.9168e-04, 4.5207e-04, 4.3799e-04, 2.2167e-04, 2.4826e-04,\n 3.5686e-04, 2.2128e-04, 3.5190e-04, 1.3402e-04, 1.9161e-04, 2.9785e-04,\n 1.8262e-04, 1.0470e-04, 1.7089e-04, 2.1365e-04, 2.5807e-04, 2.2004e-04,\n 2.1022e-04, 1.7735e-04, 9.2964e-05, 3.3826e-04, 2.2992e-04, 3.3065e-04,\n 1.0108e-04, 2.7200e-04, 1.4497e-04, 2.7648e-04, 9.9736e-05, 9.3156e-05,\n 4.3029e-04, 3.0646e-04, 2.6340e-04, 1.6751e-04, 3.2103e-04, 1.3796e-04,\n 3.9730e-04, 1.6301e-04, 1.1043e-04, 4.1733e-04, 2.5419e-04, 1.0113e-04,\n 9.8476e-05, 1.8646e-04, 8.7634e-05, 2.4821e-04, 2.1906e-04, 4.3276e-04,\n 2.4355e-04, 1.4307e-04, 2.1958e-04, 1.2568e-04, 1.4785e-04, 2.2957e-04,\n 1.8911e-04, 2.0768e-04, 4.8470e-04, 2.3258e-04, 6.8501e-04, 1.3374e-04,\n 5.3299e-04, 1.5216e-04, 1.8315e-04, 9.4559e-05, 1.5317e-04, 2.6569e-04,\n 1.5049e-04, 1.1166e-04, 1.3525e-04, 9.3533e-05, 2.3671e-04, 1.2794e-04,\n 8.7860e-05, 1.0791e-04, 1.2977e-04, 1.8532e-04, 1.4630e-04, 1.0006e-04,\n 5.7824e-05, 2.0504e-04, 1.6248e-04, 3.4048e-04, 3.0034e-04, 9.8055e-05,\n 2.6131e-04, 2.6812e-04, 3.3847e-05, 1.3199e-04, 2.5768e-04, 2.1787e-04,\n 1.0783e-04, 5.6195e-04, 2.7414e-04, 3.9242e-04, 1.6765e-04, 1.4670e-04,\n 2.0773e-04, 1.5645e-04, 2.1401e-04, 2.9806e-04, 1.0265e-04, 1.2148e-04,\n 7.4493e-05, 1.7657e-04, 1.9203e-04, 1.3102e-04, 6.2866e-04, 2.9921e-04,\n 1.0935e-04, 1.5290e-04, 1.9206e-04, 7.4931e-13, 1.5078e-04, 3.3693e-04,\n 1.3114e-04, 3.1089e-04, 4.2397e-04, 2.7033e-04, 1.8059e-04, 3.3198e-04,\n 1.2866e-04, 1.2140e-04, 2.8686e-04, 1.5590e-04, 3.0828e-04, 1.1495e-04,\n 4.1415e-04, 2.5354e-04, 1.3350e-04, 2.2149e-04, 1.8599e-04, 1.7085e-04,\n 1.1042e-04, 1.8495e-04, 2.1299e-04, 2.6826e-04, 1.7347e-04, 1.6441e-04,\n 1.9881e-04, 2.6029e-04, 1.9673e-04, 2.2316e-04, 2.0460e-04, 4.0155e-04,\n 3.5592e-04, 2.8484e-04, 8.0134e-05, 2.2510e-04, 1.1186e-04, 1.4015e-04,\n 1.6728e-04, 2.5433e-04, 2.3089e-04, 2.3168e-04, 2.2769e-04, 9.6902e-05,\n 1.3212e-04, 1.5081e-04, 1.5770e-04, 2.3098e-04, 2.8145e-04, 1.1760e-04,\n 5.3523e-04, 2.0714e-04, 1.0541e-04, 8.0848e-05, 1.7949e-04, 1.0507e-04,\n 2.4323e-04, 1.4988e-04, 1.7469e-04, 1.4953e-04, 3.9894e-04, 2.4091e-04,\n 1.2089e-04, 1.2783e-04, 3.2905e-04, 1.7454e-04, 1.8962e-04, 2.0515e-04,\n 1.5567e-04, 1.9430e-04, 6.4220e-04, 4.3475e-04, 4.1416e-04, 2.1608e-04,\n 1.9919e-04, 2.1418e-04, 1.5170e-04, 3.1241e-04, 1.7873e-04, 1.0847e-04,\n 1.8823e-04, 2.7889e-04, 5.3112e-04, 3.8244e-04, 1.9483e-04, 2.9833e-04,\n 3.7673e-04, 3.5391e-04, 2.2636e-04, 3.4170e-04, 1.4661e-04, 2.8017e-04,\n 3.5962e-04, 3.2007e-04, 7.4466e-05, 1.4539e-04, 1.7342e-04, 1.8269e-04,\n 8.1633e-05, 1.5160e-04, 2.6276e-04, 2.3455e-04, 1.8902e-04, 1.3593e-04,\n 2.0167e-04, 3.3195e-04, 1.9385e-04, 2.4317e-04, 1.9848e-04, 1.1390e-04,\n 1.8136e-04, 2.3235e-04, 2.4961e-04, 2.0862e-04, 2.2535e-04, 2.0510e-04,\n 2.3816e-04, 1.5563e-04, 2.2228e-04, 1.3372e-04, 1.2553e-04, 1.4936e-04,\n 1.1788e-04, 5.0661e-04, 3.4819e-04, 4.4462e-04, 1.9524e-04, 1.9369e-04,\n 7.4189e-05, 3.5295e-04, 1.0439e-04, 1.5445e-04, 1.2949e-04, 2.2460e-04,\n 3.0996e-04, 1.7509e-04, 1.5816e-04, 2.0199e-04, 1.3057e-04, 1.2838e-04,\n 2.6624e-04, 1.0341e-04, 2.2829e-04, 1.9466e-04, 2.6732e-04, 2.2679e-04,\n 2.2014e-04, 3.2712e-04, 1.7112e-04, 1.5781e-04, 2.3022e-04, 1.6057e-04,\n 1.3268e-04, 1.2966e-04, 1.7941e-04, 4.7403e-04, 1.5330e-04, 8.6057e-05,\n 1.6316e-04, 5.0949e-04, 1.9790e-04, 3.0470e-04, 1.9598e-04, 1.2130e-04,\n 4.3425e-04, 1.7321e-04, 1.9688e-04, 1.3621e-04, 5.1765e-04, 5.0178e-05,\n 2.9546e-04, 2.7433e-04, 2.8332e-04, 5.9599e-05, 3.2997e-04, 2.0331e-04,\n 1.8600e-04, 6.3895e-04, 4.4143e-04, 3.1153e-04, 2.5182e-04, 1.4338e-04,\n 2.1246e-04, 1.7992e-04, 3.6382e-04, 2.8680e-04, 6.1213e-04, 2.5699e-04,\n 1.8514e-04, 1.7975e-04, 2.9969e-04, 1.5668e-04, 1.9300e-04, 1.1604e-04,\n 1.6225e-04, 2.2533e-04, 3.5870e-04, 2.3634e-04, 2.5467e-04, 4.1154e-04,\n 2.2770e-04, 1.0881e-04, 2.9866e-04, 2.0337e-04, 1.9833e-04, 6.2885e-04,\n 1.8566e-04, 3.1637e-04, 3.1432e-04, 1.4269e-04, 2.1766e-04, 2.8870e-04,\n 2.9719e-04, 1.2830e-04, 1.9005e-04, 4.4648e-04, 2.5519e-04, 1.1981e-04,\n 2.7398e-04, 1.7360e-04, 1.1596e-04, 1.6766e-04, 2.1772e-04, 1.4679e-04,\n 2.1605e-04, 1.6738e-04, 1.7262e-04, 1.4183e-04, 1.4183e-04, 2.4224e-04,\n 3.5953e-04, 2.9868e-04, 2.4564e-04, 3.7365e-04, 2.1509e-04, 1.6374e-04,\n 2.5870e-04, 2.4659e-04, 2.3338e-04, 2.3113e-04, 4.1401e-04, 1.4700e-04,\n 9.5151e-05, 5.6506e-04, 1.9547e-04, 1.3775e-04, 1.8671e-04, 1.1889e-04,\n 1.1460e-04, 2.1253e-04, 4.0677e-04, 1.2093e-04, 1.5621e-04, 1.2172e-04,\n 2.8250e-04, 1.5973e-04, 1.6353e-04, 2.7804e-04, 2.0601e-04, 2.0640e-04,\n 1.2629e-04, 1.8742e-04, 1.8881e-04, 1.4564e-04, 1.5637e-04, 1.8592e-04,\n 1.0113e-04, 2.3695e-04, 1.0491e-04, 1.7526e-04, 8.4479e-05, 1.0149e-04,\n 1.0050e-04, 1.9475e-10], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(18770.)",
|
| 22 |
+
"exp_avg": "tensor([-1.0038e-04, -1.0789e-03, -6.0273e-04, 4.9233e-04, 1.1096e-03,\n -2.4024e-03, -8.3872e-05, 1.3018e-03, -3.3792e-03, -1.1040e-03,\n 5.5250e-03, -1.8057e-03, -3.9928e-04, -2.3456e-03, 8.6953e-04,\n 1.7119e-03, 1.0277e-03, 3.2732e-04, 1.0922e-03, -1.1180e-04,\n -2.3557e-04, -3.4027e-04, -2.8370e-04, -1.1135e-03, -1.0707e-03,\n 2.8103e-03, -9.9949e-04, -1.4578e-04, -6.8237e-04, 6.0917e-04,\n -3.5362e-04, 5.6011e-05, -1.3081e-03, 2.4582e-03, 2.5736e-03,\n 1.5566e-03, 1.9983e-03, 1.6401e-03, -2.1768e-03, -1.6480e-03,\n -1.0121e-03, 8.5758e-04, 5.6052e-45, 1.2657e-03, -3.8065e-03,\n -2.3597e-04, -7.1465e-04, -6.2167e-04, 2.7176e-03, 2.0974e-03,\n -1.5818e-03, -1.0940e-03, -1.9742e-04, -4.6016e-03, 3.6624e-03,\n 9.0172e-04, 3.3272e-03, -2.8224e-04, 3.5167e-04, -2.0955e-03,\n -1.0258e-03, 1.1737e-03, -1.3203e-03, 1.6026e-03, -1.8062e-03,\n -2.5733e-03, 3.1600e-03, -1.0222e-03, -7.5245e-04, 2.9571e-04,\n 2.5205e-03, 3.4863e-04, -7.6960e-04, 2.7616e-03, 2.1448e-03,\n 1.2688e-03, -1.8586e-03, 1.1891e-03, -8.6989e-04, -1.1606e-03,\n 7.7813e-04, -4.9122e-03, -4.8401e-04, 1.7633e-03, -1.5911e-03,\n -9.4638e-04, 1.0589e-03, 8.0915e-04, -1.5127e-03, 2.6014e-03,\n 4.9265e-04, 1.5095e-03, 2.3331e-03, -9.3580e-04, 3.4184e-03,\n -2.9075e-04, -3.8104e-03, 1.1812e-03, 2.0569e-03, -1.0889e-03,\n -2.0070e-05, 7.4634e-04, -6.6163e-04, 1.1064e-03, -1.0674e-03,\n 2.0389e-03, -2.0056e-03, 2.5776e-03, 1.3743e-03, 2.0520e-03,\n -5.5340e-03, -1.6420e-03, 3.7326e-03, -2.8786e-03, -1.0838e-04,\n -1.0219e-03, -6.3952e-04, 2.3752e-04, 1.1064e-03, 1.8325e-03,\n 7.9432e-05, -1.4260e-03, -1.7791e-04, 2.2048e-04, -1.0191e-03,\n -1.0796e-04, -9.8968e-04, -7.5894e-04, -2.2089e-04, 1.5689e-03,\n -3.8955e-03, 1.9588e-03, 1.5122e-03, -1.2104e-03, 2.7123e-03,\n -3.1173e-03, -1.2847e-03, -1.5313e-04, 2.5892e-03, -5.0584e-05,\n -5.5173e-05, -5.4346e-04, 2.0904e-03, 2.1431e-03, -2.8077e-03,\n 1.0050e-03, -1.8454e-04, -1.3277e-03, -1.7277e-03, -2.7967e-03,\n -4.9860e-04, -1.4144e-03, -1.8408e-03, 5.8548e-04, -4.5771e-04,\n 3.3320e-03, -8.0153e-04, -2.6047e-03, 1.1267e-03, -8.7290e-04,\n 3.9139e-04, -5.0012e-03, -2.8253e-04, 2.3652e-03, 9.0009e-05,\n 5.4159e-03, -4.3045e-03, -8.2320e-04, 6.9243e-04, -1.0566e-03,\n 1.6035e-03, 1.4129e-04, 4.7055e-04, 1.3356e-03, -1.1196e-03,\n -3.1103e-03, -2.2455e-03, 1.4833e-03, -7.2509e-04, 3.0840e-03,\n -4.1582e-04, -8.8318e-04, -5.1005e-04, 3.6904e-03, 1.6462e-03,\n -3.0252e-04, 3.6286e-03, -1.6697e-04, -1.9183e-03, 1.1630e-04,\n -9.4837e-05, -3.0852e-04, 3.2751e-03, 6.6402e-04, -5.4494e-04,\n 2.9924e-04, 1.3165e-04, -1.2441e-03, -1.1870e-03, -2.2613e-04,\n 2.3546e-03, -4.5713e-04, 1.5621e-05, -4.2508e-04, 2.1003e-04,\n 4.8967e-05, 2.3480e-03, -1.7555e-03, 2.7781e-04, 6.0581e-04,\n -6.6347e-04, 7.8744e-04, -2.0868e-03, 7.5839e-04, -1.6264e-03,\n 1.1795e-03, -1.6229e-03, 3.4950e-04, 1.2533e-03, 2.3049e-03,\n 4.2422e-04, -2.9314e-03, 2.4544e-03, -4.2486e-04, -1.8879e-03,\n -4.5923e-03, 2.6831e-03, -4.3377e-03, 1.1040e-03, -8.9663e-04,\n 2.4137e-04, 2.9846e-03, -2.0656e-03, 1.5057e-03, -1.2856e-03,\n 1.8567e-03, 2.0434e-03, 5.6052e-45, 3.0828e-04, 9.6630e-04,\n -7.7575e-04, 8.4025e-04, -1.2732e-04, -1.7242e-03, 1.0073e-03,\n -2.5302e-04, 6.1464e-04, 1.2191e-03, -1.0325e-03, -3.4756e-04,\n -6.2541e-04, -4.9865e-04, -7.1436e-04, -1.4233e-04, -4.0801e-04,\n -1.7865e-03, 4.9394e-04, -6.8246e-04, -1.3219e-03, -2.0853e-03,\n 1.4361e-03, -2.2024e-04, -2.9675e-03, -3.2253e-03, 1.6172e-03,\n -1.9093e-03, 5.7643e-04, 2.1975e-03, 2.9032e-04, -2.9655e-03,\n -5.5607e-04, 1.5198e-03, -1.5989e-03, 7.5135e-04, -6.0479e-04,\n -3.6004e-04, -1.1624e-03, 1.7172e-03, 1.5934e-03, -2.1846e-05,\n -1.0439e-03, 1.0426e-03, -7.9761e-04, -3.5590e-03, -1.2792e-03,\n 2.4207e-03, -1.5997e-03, -3.3403e-05, -1.8980e-03, -4.6617e-05,\n -8.0500e-05, 1.3333e-03, 9.7924e-04, 4.1886e-04, 2.9444e-03,\n 1.8670e-04, 1.7547e-03, -6.1312e-04, 3.9567e-04, 5.2662e-04,\n 8.8409e-05, -1.3283e-03, -1.1582e-03, -1.2145e-03, 2.7646e-03,\n 9.9399e-04, -1.3866e-03, 3.8611e-03, 3.7875e-03, 3.5412e-05,\n 2.5986e-03, 8.9967e-05, -1.3629e-03, 1.0918e-03, -1.3292e-03,\n -6.7192e-04, -3.3394e-04, -6.0709e-04, -5.5886e-04, -1.3831e-03,\n -1.7379e-03, 1.8602e-04, -9.1342e-04, -2.4618e-03, -1.4006e-03,\n -3.5055e-03, -5.6362e-04, 3.3142e-03, -1.0680e-03, 1.1780e-03,\n -1.2378e-03, -1.5785e-03, 1.0345e-03, 4.9033e-04, 7.1581e-04,\n -1.0366e-03, 3.0887e-03, 7.1580e-04, 1.3546e-04, -4.7055e-04,\n 7.6497e-04, 5.5480e-05, -1.0313e-04, -2.0962e-03, -4.2355e-03,\n 2.4121e-03, -5.8006e-04, -1.3368e-03, 1.3186e-03, -1.5046e-03,\n 5.1611e-04, -2.3644e-04, 2.7536e-03, 2.3123e-04, 1.1309e-03,\n 1.1124e-03, 1.2797e-03, -1.3769e-03, 2.5138e-03, 5.8588e-04,\n 1.9552e-03, 2.6691e-03, 1.1807e-03, 3.4006e-04, -4.2425e-04,\n 6.7485e-04, 2.7211e-03, 2.2994e-03, -8.1059e-04, 9.3222e-04,\n 7.2737e-07, -1.8708e-03, 2.4427e-04, -1.5825e-04, -2.6497e-04,\n -1.8099e-03, -2.3600e-03, 8.0110e-04, 6.9159e-04, -3.3848e-03,\n 9.2087e-04, -2.0309e-03, -7.0922e-04, -2.0228e-03, 1.5091e-03,\n -1.6898e-03, 1.5863e-03, -1.2489e-03, -1.4141e-03, -1.4496e-03,\n 8.7871e-05, -2.0960e-03, -3.6138e-03, 3.2008e-03, -4.2019e-03,\n 1.6412e-03, -5.9618e-04, 3.1681e-03, 1.6519e-04, -1.1365e-03,\n -6.2298e-04, 1.1317e-03, -4.6402e-04, 1.0425e-03, 1.2814e-03,\n -4.1252e-04, 6.1329e-04, 8.9066e-04, -6.2203e-04, -2.8837e-04,\n 1.7260e-04, -1.9999e-03, -3.6073e-04, 2.2087e-03, -7.9948e-04,\n -8.6062e-04, -2.3394e-03, -1.3352e-03, 1.7751e-03, 1.4841e-03,\n 2.3931e-03, -1.1019e-03, 2.2809e-03, -9.1630e-04, 2.8788e-03,\n 3.3407e-03, 2.0606e-04, -2.4302e-03, 3.4908e-04, -7.6125e-04,\n 1.9736e-03, -1.1961e-03, 1.9227e-03, 4.6030e-04, 2.6774e-03,\n 5.4139e-04, 7.6972e-04, 1.9545e-03, -7.0724e-04, -9.7648e-04,\n 9.4853e-04, -4.4035e-04, 7.6157e-04, 2.0967e-04, -1.4132e-03,\n 1.5035e-04, 3.1145e-04, -1.5963e-04, 5.6412e-04, -2.7077e-03,\n -8.7246e-04, -9.6211e-04, 4.6338e-04, -1.9306e-04, -1.2981e-03,\n -2.8415e-04, -3.4020e-03, 1.1476e-03, 5.3742e-04, -7.9628e-04,\n -1.0536e-03, 7.8208e-04, 1.5782e-03, 1.8483e-04, -3.2604e-04,\n 1.5701e-03, 2.8227e-04, 1.9290e-03, 6.2370e-04, -2.2101e-03,\n 1.4501e-03, -4.5004e-03, 1.9643e-03, -3.7530e-03, 1.1259e-03,\n 1.9719e-03, 2.5730e-03, -9.9589e-04, 1.0325e-03, 1.2226e-04,\n 1.6029e-03, 4.4747e-04, 2.7438e-05, 6.2867e-04, 2.2525e-03,\n -1.4602e-03, 9.9320e-05, -1.4191e-03, -9.7642e-04, -1.8313e-03,\n 4.1792e-04, -3.0211e-04, -1.7855e-04, -6.9703e-04, 7.6306e-04,\n 1.8334e-03, 8.6231e-04, 1.8778e-03, 8.4627e-04, -1.3987e-03,\n -6.6437e-06, 1.8756e-03, -9.5231e-04, 2.5710e-03, -8.7125e-05,\n -1.2125e-03, 1.7757e-04, -3.3911e-03, -2.3870e-03, 6.4629e-04,\n -1.3648e-04, 5.6052e-45], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.7415e-05, 4.2451e-05, 3.4058e-05, 3.8490e-05, 3.0408e-05, 6.7981e-05,\n 3.9387e-05, 8.1924e-05, 5.6945e-05, 6.7653e-05, 8.0714e-05, 7.9529e-05,\n 9.7310e-05, 3.5259e-05, 6.9572e-05, 1.0843e-04, 4.4473e-05, 5.1562e-05,\n 4.6263e-05, 6.4073e-05, 5.7511e-05, 2.8223e-05, 3.0996e-05, 6.6883e-05,\n 1.2795e-04, 6.1616e-05, 4.4398e-05, 8.4094e-05, 5.8863e-05, 5.4011e-05,\n 3.3929e-05, 4.2421e-05, 7.1176e-05, 4.7089e-05, 3.3530e-05, 5.2657e-05,\n 3.3279e-05, 3.1396e-05, 3.2429e-05, 3.1470e-05, 3.1345e-05, 2.8803e-05,\n 9.4486e-14, 7.7771e-05, 1.3976e-04, 3.1618e-05, 4.8813e-05, 1.5586e-05,\n 7.9343e-05, 1.0065e-04, 1.1022e-04, 2.9961e-05, 3.4991e-05, 5.5757e-05,\n 7.8670e-05, 3.7850e-05, 5.2833e-05, 5.2661e-05, 3.8345e-05, 3.1891e-05,\n 9.0015e-05, 3.8757e-05, 5.2691e-05, 3.9702e-05, 3.3400e-05, 3.6343e-05,\n 4.6004e-05, 3.4360e-05, 5.3430e-05, 3.6420e-05, 5.3621e-05, 4.7335e-05,\n 3.8818e-05, 4.2066e-05, 4.4308e-05, 4.2438e-05, 3.9876e-05, 3.9157e-05,\n 6.3035e-05, 6.4844e-05, 5.9514e-05, 4.3732e-05, 3.5383e-05, 2.3416e-05,\n 3.8354e-05, 6.8604e-05, 6.6699e-05, 4.2646e-05, 2.3356e-05, 6.9082e-05,\n 3.7420e-05, 4.9590e-05, 6.9622e-05, 5.5393e-05, 6.3805e-05, 4.9485e-05,\n 7.1529e-05, 4.4590e-05, 5.8830e-05, 4.7049e-05, 4.0243e-05, 6.4503e-05,\n 6.4090e-05, 4.6429e-05, 6.8789e-05, 1.0510e-04, 4.3140e-05, 1.4490e-04,\n 4.2059e-05, 4.2924e-05, 7.0382e-05, 6.0775e-05, 5.2036e-05, 4.4711e-05,\n 6.0582e-05, 3.6234e-05, 5.5066e-05, 7.2397e-05, 3.4290e-05, 5.1862e-05,\n 2.5802e-05, 6.9898e-05, 6.2789e-05, 1.0515e-04, 5.4722e-05, 7.8307e-05,\n 4.5767e-05, 6.7441e-05, 8.8859e-05, 1.2314e-04, 5.8072e-05, 4.0387e-05,\n 7.2585e-05, 4.9183e-05, 1.4052e-04, 3.5429e-05, 5.5311e-05, 5.3151e-05,\n 4.0167e-05, 2.9036e-05, 3.1455e-05, 4.9379e-05, 4.8266e-05, 8.4545e-05,\n 4.8438e-05, 5.0443e-05, 2.4405e-05, 7.4833e-05, 5.1645e-05, 1.1068e-04,\n 2.4268e-05, 6.8607e-05, 3.3949e-05, 8.0473e-05, 3.1409e-05, 3.6389e-05,\n 9.7137e-05, 5.8865e-05, 4.1924e-05, 4.1944e-05, 6.7554e-05, 2.1870e-05,\n 7.5469e-05, 4.4693e-05, 2.0111e-05, 9.7340e-05, 4.7329e-05, 2.3413e-05,\n 1.9218e-05, 4.8116e-05, 2.9557e-05, 6.8436e-05, 4.6483e-05, 1.0417e-04,\n 5.0998e-05, 2.4892e-05, 9.3307e-05, 3.0167e-05, 2.7462e-05, 6.8611e-05,\n 4.4132e-05, 4.0732e-05, 1.1414e-04, 4.9890e-05, 9.4665e-05, 3.4126e-05,\n 1.4554e-04, 4.1667e-05, 4.4377e-05, 2.0607e-05, 3.9020e-05, 5.5920e-05,\n 5.0645e-05, 3.8846e-05, 2.7935e-05, 2.0279e-05, 7.4891e-05, 3.2229e-05,\n 1.7588e-05, 2.5891e-05, 3.4906e-05, 4.8987e-05, 4.1829e-05, 2.4352e-05,\n 2.5503e-05, 4.7512e-05, 3.3270e-05, 8.7432e-05, 6.6480e-05, 2.7094e-05,\n 5.3862e-05, 7.8399e-05, 2.3965e-05, 3.6309e-05, 5.7523e-05, 3.0587e-05,\n 3.0672e-05, 5.6146e-05, 5.3922e-05, 7.2821e-05, 3.3643e-05, 3.1527e-05,\n 3.9873e-05, 2.6798e-05, 4.2129e-05, 7.9343e-05, 2.7590e-05, 2.7089e-05,\n 2.1045e-05, 4.3584e-05, 3.7693e-05, 3.0673e-05, 1.0279e-04, 5.9251e-05,\n 3.2157e-05, 3.9452e-05, 4.2654e-05, 2.6237e-14, 4.1006e-05, 1.0193e-04,\n 3.8688e-05, 9.2514e-05, 5.9250e-05, 7.1832e-05, 8.6462e-05, 6.5541e-05,\n 4.5699e-05, 3.3684e-05, 6.4015e-05, 3.6877e-05, 9.0651e-05, 2.7879e-05,\n 7.4799e-05, 6.9357e-05, 3.2142e-05, 4.8964e-05, 5.8060e-05, 5.7312e-05,\n 3.9873e-05, 4.7740e-05, 4.0357e-05, 7.0803e-05, 4.0485e-05, 5.9362e-05,\n 4.6846e-05, 5.3974e-05, 2.9727e-05, 5.4325e-05, 3.8881e-05, 7.5531e-05,\n 7.0445e-05, 5.6811e-05, 2.7876e-05, 5.2983e-05, 2.7798e-05, 3.7436e-05,\n 4.7861e-05, 8.1619e-05, 4.4740e-05, 4.4535e-05, 5.7597e-05, 2.9408e-05,\n 3.0473e-05, 4.9105e-05, 3.5608e-05, 5.5259e-05, 5.3886e-05, 2.9981e-05,\n 1.0357e-04, 4.8578e-05, 2.6474e-05, 2.8134e-05, 5.4308e-05, 3.2456e-05,\n 4.6429e-05, 3.1167e-05, 4.2408e-05, 5.1607e-05, 1.5306e-04, 5.7975e-05,\n 3.0550e-05, 2.9081e-05, 5.1949e-05, 4.1993e-05, 6.2186e-05, 3.4952e-05,\n 3.5318e-05, 5.3122e-05, 1.2448e-04, 6.3403e-05, 7.4794e-05, 4.6969e-05,\n 3.9011e-05, 3.5467e-05, 4.6781e-05, 4.8693e-05, 4.6198e-05, 2.1911e-05,\n 3.6875e-05, 8.0806e-05, 1.3984e-04, 8.6525e-05, 5.2921e-05, 6.8190e-05,\n 8.4923e-05, 1.0026e-04, 5.9782e-05, 8.7925e-05, 4.3642e-05, 5.1088e-05,\n 9.2895e-05, 6.1382e-05, 1.6234e-05, 3.6197e-05, 3.7985e-05, 4.4302e-05,\n 3.0827e-05, 2.7490e-05, 7.4541e-05, 4.6283e-05, 4.8563e-05, 3.6260e-05,\n 4.3686e-05, 7.8453e-05, 7.0466e-05, 8.3991e-05, 6.3626e-05, 3.1679e-05,\n 5.5390e-05, 6.7623e-05, 5.4016e-05, 4.5911e-05, 5.8420e-05, 3.2479e-05,\n 4.8872e-05, 3.6986e-05, 4.6125e-05, 2.7524e-05, 3.6657e-05, 2.9395e-05,\n 2.4362e-05, 1.3858e-04, 1.0319e-04, 7.3434e-05, 4.8439e-05, 3.8465e-05,\n 2.3240e-05, 7.2634e-05, 2.5118e-05, 2.8618e-05, 3.0721e-05, 5.2670e-05,\n 5.2533e-05, 4.7770e-05, 5.2739e-05, 4.1996e-05, 3.0512e-05, 2.5072e-05,\n 4.5651e-05, 3.0002e-05, 4.6438e-05, 4.9155e-05, 6.7791e-05, 3.5502e-05,\n 4.9828e-05, 1.1908e-04, 4.1180e-05, 4.8316e-05, 4.6939e-05, 4.7142e-05,\n 4.3417e-05, 3.7295e-05, 4.9618e-05, 7.6206e-05, 4.2217e-05, 1.9849e-05,\n 2.7676e-05, 8.8421e-05, 5.7495e-05, 8.1875e-05, 4.2959e-05, 2.9390e-05,\n 8.6623e-05, 3.5108e-05, 4.6410e-05, 3.9628e-05, 1.0563e-04, 1.4473e-05,\n 6.7889e-05, 7.7354e-05, 5.9465e-05, 3.4886e-05, 9.6726e-05, 6.9662e-05,\n 3.6899e-05, 5.4176e-05, 9.6159e-05, 7.5106e-05, 6.0543e-05, 4.7518e-05,\n 5.3709e-05, 3.8738e-05, 7.1094e-05, 6.0425e-05, 8.3863e-05, 5.0698e-05,\n 6.5600e-05, 4.1340e-05, 5.4069e-05, 3.7596e-05, 4.1281e-05, 2.9546e-05,\n 4.9770e-05, 5.2523e-05, 5.5689e-05, 5.1054e-05, 5.5998e-05, 7.4075e-05,\n 4.9837e-05, 2.9961e-05, 6.5067e-05, 5.7396e-05, 4.7630e-05, 2.0839e-04,\n 6.4920e-05, 5.8520e-05, 6.5124e-05, 4.6973e-05, 5.9842e-05, 5.8021e-05,\n 5.1931e-05, 3.4251e-05, 4.9738e-05, 1.2140e-04, 5.8834e-05, 2.3965e-05,\n 9.4901e-05, 4.3625e-05, 2.5481e-05, 4.6345e-05, 5.3284e-05, 4.0828e-05,\n 3.9475e-05, 4.7362e-05, 5.0013e-05, 3.2770e-05, 3.5335e-05, 5.9627e-05,\n 6.1371e-05, 6.5057e-05, 4.9874e-05, 1.4597e-04, 5.8982e-05, 6.2492e-05,\n 7.5172e-05, 5.5383e-05, 6.3737e-05, 5.2254e-05, 6.4594e-05, 3.3185e-05,\n 2.8968e-05, 7.7490e-05, 4.9472e-05, 3.1301e-05, 4.1097e-05, 4.7536e-05,\n 4.3821e-05, 4.6733e-05, 9.3347e-05, 4.1533e-05, 3.9534e-05, 3.7074e-05,\n 5.5976e-05, 4.4186e-05, 3.2457e-05, 4.5428e-05, 5.0245e-05, 4.5683e-05,\n 3.4421e-05, 4.1646e-05, 4.5108e-05, 3.1564e-05, 3.8743e-05, 4.1915e-05,\n 2.8037e-05, 4.6392e-05, 3.0622e-05, 4.8096e-05, 3.7795e-05, 4.8614e-05,\n 3.2194e-05, 9.8495e-13], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(18770.)",
|
| 27 |
+
"exp_avg": "tensor([[ 9.0583e-06, -1.1037e-05, 4.0036e-06, ..., 1.9638e-05,\n -1.3904e-06, 5.6052e-45],\n [-1.0590e-05, -2.0471e-05, 1.0162e-05, ..., 7.5376e-06,\n 8.0906e-06, 5.6052e-45],\n [ 3.2989e-06, 1.3567e-05, 3.3122e-06, ..., -1.0453e-07,\n -6.3082e-06, -5.6052e-45],\n ...,\n [-3.9455e-06, 1.0250e-05, 7.7541e-06, ..., 7.5924e-06,\n 7.0687e-06, 5.6052e-45],\n [ 3.4379e-06, 1.3468e-05, -6.4598e-06, ..., -2.7081e-05,\n -1.1931e-05, 5.6052e-45],\n [-2.0803e-06, 9.2738e-06, 8.6664e-06, ..., -6.9591e-06,\n -1.9746e-06, -5.6052e-45]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[8.5762e-10, 1.7275e-09, 8.0449e-10, ..., 1.5730e-09, 6.0173e-10,\n 1.8719e-15],\n [1.5027e-09, 2.9246e-09, 1.7315e-09, ..., 4.7525e-09, 2.6941e-09,\n 8.3357e-15],\n [1.1640e-09, 1.7037e-09, 1.9069e-09, ..., 4.0590e-09, 9.6368e-10,\n 1.1195e-14],\n ...,\n [1.9383e-09, 3.9870e-09, 1.5730e-09, ..., 5.5723e-09, 1.2755e-09,\n 3.7441e-15],\n [2.1879e-09, 5.2905e-09, 1.5285e-09, ..., 3.4636e-09, 2.5486e-09,\n 2.0184e-14],\n [2.4352e-09, 2.4773e-09, 1.4015e-09, ..., 3.7556e-09, 2.1221e-09,\n 6.1539e-15]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(11262.)",
|
| 32 |
+
"exp_avg": "tensor([[ 6.2253e-06, -1.1301e-05, 3.4716e-06, ..., 2.5214e-05,\n -3.7108e-07, 5.6052e-45],\n [-1.1359e-05, -1.2720e-05, 1.5423e-05, ..., 7.7210e-06,\n 4.4133e-06, 5.6052e-45],\n [-4.1540e-06, 1.4574e-05, 5.5210e-06, ..., 1.1473e-05,\n -1.0587e-05, -5.6052e-45],\n ...,\n [-2.8643e-05, 1.1451e-05, 2.1390e-06, ..., 7.6926e-06,\n 2.5662e-06, 5.6052e-45],\n [ 1.4982e-05, 1.4639e-05, -5.2742e-06, ..., -2.5635e-06,\n -3.5384e-07, -5.6052e-45],\n [ 8.0856e-06, -4.1647e-06, 8.7697e-06, ..., 1.4256e-05,\n 1.3278e-05, -5.6052e-45]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[1.2251e-09, 1.5130e-09, 6.3398e-10, ..., 1.2964e-09, 8.9243e-10,\n 4.2639e-15],\n [1.0577e-09, 3.1997e-09, 1.8746e-09, ..., 6.2810e-09, 2.5699e-09,\n 6.4975e-14],\n [1.2697e-09, 2.3662e-09, 1.6851e-09, ..., 5.3554e-09, 1.4104e-09,\n 1.8342e-14],\n ...,\n [2.7714e-09, 1.9577e-09, 2.6183e-09, ..., 9.0490e-09, 1.6099e-09,\n 1.7930e-16],\n [1.3943e-09, 3.0500e-09, 1.3723e-09, ..., 2.9417e-09, 1.4977e-09,\n 1.0239e-13],\n [2.5991e-09, 2.1032e-09, 1.3242e-09, ..., 3.1109e-09, 2.5661e-09,\n 6.6879e-16]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(11262.)",
|
| 37 |
+
"exp_avg": "tensor([ 0.0002, -0.0002], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.9835e-06, 7.9835e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.005000500000000001,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.005000500000000001,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.005000500000000001,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.0025005,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 5,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 5,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.005000500000000001,
|
| 149 |
+
0.005000500000000001,
|
| 150 |
+
0.005000500000000001,
|
| 151 |
+
0.0025005
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 65.312,
|
| 156 |
+
"best_epoch": 4,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 64.754,
|
| 159 |
+
"512": 65.18466666666667
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5
|
| 168 |
+
],
|
| 169 |
+
"train_loss": [
|
| 170 |
+
3.9435249049420933,
|
| 171 |
+
3.3040703793567867,
|
| 172 |
+
4.3101251841734625,
|
| 173 |
+
4.185147669827233,
|
| 174 |
+
4.123004540650211
|
| 175 |
+
],
|
| 176 |
+
"train_acc": [
|
| 177 |
+
54.38726307083047,
|
| 178 |
+
59.31631083223343,
|
| 179 |
+
60.291879721118846,
|
| 180 |
+
61.30111583163371,
|
| 181 |
+
61.94625681117294
|
| 182 |
+
],
|
| 183 |
+
"val_acc": [
|
| 184 |
+
61.635333333333335,
|
| 185 |
+
62.978,
|
| 186 |
+
64.12,
|
| 187 |
+
64.73133333333334,
|
| 188 |
+
65.312
|
| 189 |
+
],
|
| 190 |
+
"scale_accs": {
|
| 191 |
+
"256": [
|
| 192 |
+
61.635333333333335,
|
| 193 |
+
62.978,
|
| 194 |
+
63.782,
|
| 195 |
+
64.34866666666667,
|
| 196 |
+
64.754
|
| 197 |
+
],
|
| 198 |
+
"512": [
|
| 199 |
+
63.839333333333336,
|
| 200 |
+
64.522,
|
| 201 |
+
65.18466666666667
|
| 202 |
+
]
|
| 203 |
+
},
|
| 204 |
+
"lr": [
|
| 205 |
+
0.00975530705321762,
|
| 206 |
+
0.00904518046337755,
|
| 207 |
+
0.00793913236883622,
|
| 208 |
+
0.00654543046337755,
|
| 209 |
+
0.005000500000000001
|
| 210 |
+
]
|
| 211 |
+
}
|
| 212 |
+
},
|
| 213 |
+
"train_config": {
|
| 214 |
+
"name": "david_training",
|
| 215 |
+
"run_id": "20251012_235237",
|
| 216 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 217 |
+
"model_variant": [
|
| 218 |
+
"clip_vit_b16",
|
| 219 |
+
"clip_vit_laion_b32",
|
| 220 |
+
"clip_vit_b32"
|
| 221 |
+
],
|
| 222 |
+
"num_classes": 1000,
|
| 223 |
+
"preset": "small_fast",
|
| 224 |
+
"custom_config_path": null,
|
| 225 |
+
"num_classes_override": null,
|
| 226 |
+
"use_belly_override": null,
|
| 227 |
+
"belly_expand_override": null,
|
| 228 |
+
"progressive_training_override": true,
|
| 229 |
+
"scale_warmup_epochs_override": {
|
| 230 |
+
"256": 0,
|
| 231 |
+
"512": 2
|
| 232 |
+
},
|
| 233 |
+
"num_epochs": 10,
|
| 234 |
+
"batch_size": 1024,
|
| 235 |
+
"learning_rate": 0.01,
|
| 236 |
+
"weight_decay": 1e-05,
|
| 237 |
+
"warmup_epochs": 3,
|
| 238 |
+
"use_rose_loss": true,
|
| 239 |
+
"rose_initial_weight": 0.1,
|
| 240 |
+
"rose_max_weight": 0.8,
|
| 241 |
+
"rose_weight_schedule": "adaptive",
|
| 242 |
+
"use_cayley_loss": false,
|
| 243 |
+
"cayley_weight": 0.01,
|
| 244 |
+
"scale_loss_balance": null,
|
| 245 |
+
"use_mixed_precision": false,
|
| 246 |
+
"gradient_clip": 15.0,
|
| 247 |
+
"scheduler_type": "cosine_restarts",
|
| 248 |
+
"min_lr": 1e-06,
|
| 249 |
+
"freeze_strategy": "never",
|
| 250 |
+
"freeze_threshold": 90.0,
|
| 251 |
+
"unfreeze_on_plateau": true,
|
| 252 |
+
"patience": 10,
|
| 253 |
+
"track_gradients": true,
|
| 254 |
+
"gradient_scale_threshold": 1e-05,
|
| 255 |
+
"gradient_scale_multiplier": 10.0,
|
| 256 |
+
"log_interval": 50,
|
| 257 |
+
"val_interval": 1,
|
| 258 |
+
"save_interval": 5,
|
| 259 |
+
"log_fusion_weights": true,
|
| 260 |
+
"log_loss_components": true,
|
| 261 |
+
"save_format": "safetensors",
|
| 262 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 263 |
+
"upload_to_hub": true,
|
| 264 |
+
"base_dir": "./david_training",
|
| 265 |
+
"num_workers": 10,
|
| 266 |
+
"pin_memory": true,
|
| 267 |
+
"prefetch_factor": 4,
|
| 268 |
+
"persistent_workers": true
|
| 269 |
+
}
|
| 270 |
+
}
|