Update best_model_acc66.50_metadata.json - Run 20251012_231445
Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc66.50_metadata.json
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 7,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(30032.)",
|
| 7 |
+
"exp_avg": "tensor([[ 4.7726e-05, -6.2857e-05, -1.4752e-05, ..., -3.4455e-05,\n -4.7871e-05, 1.8786e-06],\n [ 3.7341e-05, -4.3514e-05, 3.7145e-05, ..., 5.4946e-05,\n 5.1166e-05, -6.4977e-05],\n [ 8.3701e-05, -9.0343e-05, 2.7573e-05, ..., 7.3650e-05,\n -1.0325e-05, -5.1072e-05],\n ...,\n [ 2.0104e-05, 2.4889e-06, 9.1569e-05, ..., -3.2536e-05,\n -1.4902e-06, 2.5105e-05],\n [-5.8237e-05, -6.9328e-05, -1.5916e-05, ..., -6.7836e-05,\n -1.8618e-05, -1.8809e-05],\n [ 2.1039e-06, -1.9841e-05, -6.0444e-05, ..., -2.0303e-05,\n -7.6297e-05, -4.2238e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[2.4177e-08, 5.1576e-08, 3.0018e-08, ..., 3.6028e-08, 2.5463e-08,\n 1.3620e-08],\n [7.3300e-08, 1.1052e-07, 4.6090e-08, ..., 4.5024e-08, 5.8511e-08,\n 2.1904e-08],\n [2.0666e-07, 1.8412e-07, 1.1604e-07, ..., 8.4827e-08, 4.1834e-08,\n 6.6682e-08],\n ...,\n [3.5244e-08, 1.1751e-07, 2.6688e-08, ..., 2.2216e-08, 1.5440e-08,\n 1.3173e-08],\n [3.5206e-08, 4.3014e-07, 4.1562e-08, ..., 7.8022e-08, 2.0624e-08,\n 2.7263e-08],\n [5.0633e-08, 7.6129e-08, 2.6479e-08, ..., 8.3474e-08, 2.3791e-08,\n 2.6975e-08]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(30032.)",
|
| 12 |
+
"exp_avg": "tensor([ 1.4106e-03, 8.1873e-04, 2.8454e-04, 3.4300e-04, 2.1118e-04,\n 2.4556e-04, -2.8130e-03, -2.3481e-04, 9.4691e-04, -1.1904e-03,\n 1.2778e-03, 1.7868e-03, 8.5774e-04, 1.8583e-03, 5.6964e-04,\n -2.1919e-03, 3.2967e-05, 1.9961e-03, 1.6545e-04, -2.4182e-03,\n 1.9884e-03, 4.4165e-04, -1.8589e-03, -1.3754e-03, 1.3151e-04,\n 1.8081e-03, 3.0598e-03, 7.8937e-04, -1.0689e-03, 1.0367e-03,\n -1.0278e-03, 3.1597e-04, 1.4810e-03, -4.6118e-03, 1.9876e-03,\n -2.1323e-03, 5.4252e-04, -1.0104e-04, -4.6964e-04, 6.2164e-04,\n -3.1810e-04, 1.0332e-03, 1.6525e-04, -1.1876e-03, -2.0335e-03,\n -3.9736e-04, 1.1547e-04, 1.3114e-04, 1.3023e-03, -5.2056e-04,\n 1.4632e-03, 2.1764e-03, -1.8150e-03, 2.6491e-03, -3.8700e-03,\n 4.3123e-04, 1.2348e-03, -1.1698e-03, 6.2565e-04, -2.4264e-03,\n -2.1929e-04, -3.8605e-03, 1.5012e-03, 5.0693e-04, 2.6589e-03,\n 4.8393e-04, 9.1439e-04, 1.3606e-03, -5.3121e-05, -1.4049e-04,\n -4.1423e-04, -1.5084e-03, -4.3318e-04, -1.4636e-03, -5.7634e-04,\n -5.8724e-05, 1.1380e-03, 4.2799e-04, -1.9236e-03, 1.2177e-03,\n 8.8521e-04, -1.8878e-04, -1.3859e-05, -5.1169e-04, -2.0188e-03,\n -2.0196e-03, 1.8716e-03, -2.7471e-04, 1.1943e-03, 4.3090e-04,\n -9.9022e-04, -1.8846e-04, -1.3821e-03, -9.3869e-04, -5.4592e-04,\n 1.3192e-03, 1.0235e-03, -1.8957e-03, -1.9894e-04, -3.5049e-04,\n -1.4825e-03, 6.6286e-04, 2.3466e-04, 1.0027e-05, 1.2919e-03,\n -5.8763e-04, -2.4531e-03, 4.9929e-04, 1.8687e-03, -9.6265e-04,\n 1.9373e-03, -1.7798e-03, -4.1277e-03, 2.7319e-03, 2.1932e-04,\n 8.6317e-04, -8.6069e-04, 7.6673e-04, -2.7271e-04, -1.4901e-05,\n 2.8631e-03, 6.7408e-04, -3.9262e-03, 1.8624e-03, 1.0465e-03,\n -3.0812e-04, -8.0830e-04, 1.4599e-03, -1.2482e-03, -6.3361e-04,\n 1.0361e-03, 1.9973e-03, -9.2798e-04, -4.1670e-03, -8.7477e-04,\n -2.8532e-03, 2.6840e-03, -1.1399e-03, 3.3730e-04, 5.4425e-04,\n -1.5251e-03, 3.3101e-04, -3.9210e-04, 1.3398e-04, 1.0898e-03,\n -3.3896e-03, 2.7347e-03, -8.7768e-05, 1.9565e-03, -1.3537e-03,\n -9.3055e-04, -8.1678e-04, -1.0414e-03, 3.5274e-03, -2.1010e-03,\n 1.9163e-03, 2.7620e-04, 9.7271e-04, -5.9795e-04, 7.9674e-05,\n -1.5584e-03, -5.5970e-04, 3.1029e-03, -2.8743e-03, -1.4734e-03,\n -1.5222e-03, 1.5856e-03, -2.6737e-04, 2.5831e-04, 1.2956e-03,\n 1.6506e-03, -1.7115e-04, 1.3251e-03, -1.3884e-03, 5.3491e-05,\n 1.0830e-04, 2.2386e-04, 1.1860e-03, -6.1136e-04, -2.3926e-03,\n -3.6602e-04, -3.1457e-03, 2.7624e-03, 1.1525e-03, -1.5546e-03,\n 7.5272e-04, -2.3334e-04, -2.0018e-04, -5.9752e-03, 1.8863e-03,\n -1.5506e-04, 8.8660e-04, -2.1735e-05, 7.2664e-04, -1.7500e-03,\n -8.4732e-04, 1.9039e-04, -6.5888e-04, -8.9068e-04, 9.0388e-04,\n 9.4224e-04, 5.6647e-04, -1.1230e-03, -1.5140e-03, -3.6294e-04,\n 1.6272e-04, 1.9630e-03, 1.4625e-03, -2.3495e-04, 1.6889e-03,\n 1.0717e-03, 3.2803e-04, 2.4808e-04, 2.0049e-03, 1.0521e-03,\n -7.2475e-04, 2.6832e-03, -1.7649e-03, -7.4115e-04, -9.3428e-04,\n 1.0501e-03, -1.5708e-03, -1.8262e-04, -3.9823e-04, -1.8076e-03,\n 1.8510e-04, -1.5726e-03, -2.1941e-03, -3.6067e-03, 6.8847e-04,\n 1.8937e-03, -1.2050e-03, -1.1728e-03, -1.6911e-03, 7.6307e-04,\n 5.1417e-05, 1.2117e-03, -2.3437e-03, -8.7540e-04, 2.6479e-03,\n 6.0026e-04, -1.0415e-04, 1.2142e-03, -1.8749e-03, -7.7331e-04,\n -8.3722e-04, 2.3541e-03, -6.2794e-04, 2.1742e-04, 1.6563e-04,\n 1.6504e-03, -1.0422e-03, 2.7333e-04, -3.9045e-04, -1.2579e-04,\n -2.4482e-04, 5.8640e-04, -4.6337e-03, 2.7451e-04, 1.7741e-03,\n 1.2756e-03, -1.8851e-03, -2.2197e-03, -1.1332e-04, 3.2810e-04,\n -1.3800e-03, -9.1273e-05, -2.2873e-03, 1.9134e-03, 1.2783e-03,\n -1.9523e-03, 1.7205e-03, -4.4359e-04, 2.7089e-03, -4.5358e-04,\n 6.0505e-04, -5.1661e-04, 8.2156e-04, 2.3082e-03, -9.0454e-04,\n -1.7448e-03, 2.0969e-03, -2.0227e-04, -9.1959e-04, 2.4469e-04,\n 2.7262e-04, 3.0291e-04, -1.3828e-03, -1.4472e-04, 1.0156e-03,\n 6.4715e-04, -2.7190e-03, 3.6117e-05, 1.9247e-03, 2.9657e-05,\n 1.8798e-04, 1.1288e-03, -1.6569e-03, -1.9769e-04, 3.5293e-04,\n -2.7792e-03, -6.2034e-03, 1.2571e-03, 1.1997e-04, 5.4414e-05,\n -8.7238e-04, -2.6280e-03, -6.5219e-05, -1.1714e-03, 1.4311e-03,\n 2.7411e-03, -3.7023e-03, -5.0041e-04, 1.4765e-03, 1.5509e-03,\n -5.5148e-04, 2.4834e-03, -1.8533e-03, 2.1965e-04, 1.5062e-03,\n 1.1564e-03, -4.7374e-04, -9.6760e-04, 2.2324e-03, -4.2956e-03,\n 2.1599e-03, -2.3864e-03, -3.8059e-04, -9.9850e-04, -1.1875e-04,\n 1.9455e-04, -3.0398e-03, -4.3592e-04, 4.5769e-04, -1.6214e-03,\n -7.9755e-04, -4.1966e-04, 9.3999e-04, 9.6026e-04, 1.5656e-03,\n 1.6876e-03, -5.3360e-04, 1.6850e-03, 1.4737e-03, 6.1209e-04,\n 2.2843e-04, 1.3752e-03, -3.6061e-04, 2.3291e-03, -3.0002e-04,\n -3.2148e-04, -1.2024e-03, -1.5816e-03, -2.2552e-04, 5.8338e-03,\n 2.5511e-04, 1.5753e-03, -6.4847e-04, -6.9831e-05, 7.5574e-04,\n 1.0655e-03, 1.4152e-03, 5.8881e-04, -5.8039e-04, -3.3873e-04,\n -3.1370e-04, 9.0675e-04, 5.6650e-04, 2.0003e-03, -1.9136e-04,\n -1.9526e-03, -3.1867e-04, -8.8097e-04, 2.2154e-03, 3.7947e-04,\n -1.5730e-03, -8.0996e-04, 3.4519e-03, 1.0636e-03, 1.1559e-03,\n 1.3063e-03, -4.2810e-04, 3.4032e-04, -8.0775e-04, 4.1229e-04,\n 1.8404e-03, 1.8699e-03, -1.8907e-03, -1.5664e-04, 3.1092e-04,\n -1.7967e-04, -3.9105e-04, 3.6467e-03, -3.6112e-04, -1.3400e-03,\n 1.2914e-03, 8.1190e-04, 7.3444e-04, -3.7557e-04, -1.3602e-03,\n -1.1490e-03, 2.2700e-03, -1.8437e-03, -4.7159e-04, -5.9964e-04,\n -3.4340e-04, -1.2922e-03, -1.5015e-03, -1.9549e-04, -3.0699e-03,\n -5.5757e-04, 4.5066e-05, 1.8272e-03, 1.9226e-03, -3.5682e-03,\n -6.6471e-04, 2.6589e-03, 2.1520e-03, 4.9191e-04, 6.3996e-04,\n 1.1875e-03, 4.0246e-03, -5.2893e-05, 1.9051e-03, 4.0679e-06,\n 1.6701e-03, -8.0262e-05, -1.7132e-03, 1.9380e-03, 1.2959e-04,\n -1.4271e-03, 1.7811e-03, 4.0925e-03, 9.2347e-04, 1.3108e-03,\n -3.5839e-04, 2.6580e-03, -2.6571e-03, 2.6476e-04, -7.9960e-04,\n -9.3483e-04, 1.3326e-03, 1.3926e-03, 2.0054e-03, 2.5792e-04,\n -1.4398e-03, 1.6427e-03, -5.7682e-05, -6.5730e-04, 7.7907e-04,\n 5.9413e-04, 1.5467e-03, -5.8613e-04, -9.0642e-04, -2.2094e-03,\n -1.0410e-03, 3.8760e-04, -2.5006e-04, 8.8922e-04, 1.8332e-05,\n -5.2983e-05, 3.3036e-03, -1.1690e-03, 9.2154e-04, 5.6041e-04,\n 2.5364e-03, 4.3054e-03, 2.3881e-03, 1.6439e-03, 1.0802e-03,\n 3.2381e-03, -9.3581e-05, 2.9285e-03, -9.7042e-04, 1.3038e-03,\n -5.0193e-04, 1.9483e-03, -2.7088e-03, 6.4556e-04, 1.9695e-03,\n -3.2232e-03, 2.7615e-04, 3.7480e-04, -2.5918e-04, -7.3768e-04,\n -1.9478e-04, -6.0084e-06, -2.6002e-03, -2.3943e-04, -2.3738e-03,\n -1.3700e-04, -8.4054e-04, -4.2332e-03, -1.0302e-03, -1.0734e-04,\n -3.5346e-04, -1.6052e-03, 2.7595e-03, -5.4486e-04, -7.0890e-04,\n -1.8395e-03, -9.5462e-04, -1.5158e-03, 1.0111e-03, -3.1203e-03,\n 1.1744e-03, -2.9719e-03, -1.3995e-03, -1.0547e-03, -1.1897e-03,\n -2.0950e-03, -1.1202e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([2.2886e-05, 3.8641e-05, 6.9345e-05, 4.8359e-05, 2.6824e-05, 8.0984e-05,\n 6.6846e-05, 1.9844e-05, 2.9340e-05, 3.9777e-05, 5.1504e-05, 5.5183e-05,\n 2.8009e-05, 3.5433e-05, 8.5925e-05, 2.5008e-05, 4.7949e-05, 9.2129e-05,\n 1.3270e-05, 3.7761e-05, 2.7953e-05, 4.0796e-05, 6.2245e-05, 3.2455e-05,\n 2.1909e-05, 2.5031e-05, 6.8650e-05, 4.7207e-05, 4.3214e-05, 3.1479e-05,\n 3.7390e-05, 3.8593e-05, 1.7877e-05, 1.0321e-04, 4.8360e-05, 3.2415e-05,\n 2.4313e-05, 4.9671e-05, 5.6270e-05, 4.8616e-05, 3.1180e-05, 3.7607e-05,\n 4.3590e-05, 2.5976e-05, 3.9335e-05, 4.1368e-05, 2.7585e-05, 2.0719e-05,\n 2.3562e-05, 2.3013e-05, 2.8430e-05, 3.2703e-05, 8.9323e-05, 3.7775e-05,\n 5.6992e-05, 3.3767e-05, 3.8433e-05, 2.8509e-05, 3.8880e-05, 3.9436e-05,\n 2.0239e-05, 3.4182e-05, 1.8342e-05, 7.4124e-05, 8.2129e-05, 5.9716e-05,\n 3.9872e-05, 6.8127e-05, 3.0080e-05, 4.6983e-05, 2.8668e-05, 7.5304e-05,\n 3.8129e-05, 9.3926e-05, 3.3650e-05, 4.5614e-05, 5.8670e-05, 2.6249e-05,\n 3.3915e-05, 4.7991e-05, 3.5112e-05, 3.0105e-05, 3.0711e-05, 5.2456e-05,\n 4.1481e-05, 3.9707e-05, 1.0651e-04, 2.3958e-05, 3.5569e-05, 4.6860e-05,\n 3.5870e-05, 2.6166e-05, 7.5622e-05, 1.2693e-04, 4.3446e-05, 5.1633e-05,\n 4.6372e-05, 6.2417e-05, 4.0843e-05, 3.3039e-05, 3.5775e-05, 3.5231e-05,\n 2.0848e-05, 3.1432e-05, 2.5842e-05, 2.7675e-05, 3.4237e-05, 2.0050e-05,\n 4.0777e-05, 1.9125e-05, 7.7176e-05, 1.8863e-05, 6.0606e-05, 3.6891e-05,\n 3.5299e-05, 1.7946e-05, 3.3686e-05, 6.5787e-05, 2.9824e-05, 4.0486e-05,\n 3.8758e-05, 4.0220e-05, 6.2174e-05, 5.5754e-05, 3.5636e-05, 5.4820e-05,\n 4.0695e-05, 6.5440e-05, 6.1700e-05, 4.1426e-05, 3.8131e-05, 8.7383e-05,\n 5.2478e-05, 4.0136e-05, 6.4495e-05, 2.4293e-05, 2.7237e-05, 1.9883e-05,\n 5.7222e-05, 4.2594e-05, 2.8040e-05, 5.7058e-05, 4.1171e-05, 1.0977e-04,\n 3.5318e-05, 4.7398e-05, 4.5713e-05, 3.4972e-05, 8.8816e-05, 4.2135e-05,\n 4.8711e-05, 4.6351e-05, 4.4892e-05, 4.8399e-05, 4.0777e-05, 5.9707e-05,\n 2.1165e-05, 2.1831e-05, 4.6629e-05, 2.3227e-05, 4.7374e-05, 5.2183e-05,\n 5.1661e-05, 3.9023e-05, 4.7266e-05, 2.8224e-05, 5.0491e-05, 3.1128e-05,\n 5.9452e-05, 7.7483e-05, 2.7693e-05, 4.1078e-05, 2.7077e-05, 3.7669e-05,\n 2.7060e-05, 1.6371e-05, 3.6196e-05, 3.8954e-05, 3.2608e-05, 3.6892e-05,\n 4.3179e-05, 5.5065e-05, 1.1981e-04, 3.7205e-05, 1.6230e-05, 4.9518e-05,\n 5.6056e-05, 5.7502e-05, 5.7348e-05, 4.6915e-05, 3.5171e-06, 3.4290e-05,\n 1.4444e-05, 1.9955e-05, 2.5380e-05, 3.2877e-05, 2.7658e-05, 3.7376e-05,\n 3.5662e-05, 2.6823e-05, 4.5040e-05, 3.4080e-05, 3.2219e-05, 1.9682e-05,\n 3.7232e-05, 8.3582e-05, 3.1207e-05, 2.7343e-05, 2.8686e-05, 3.0175e-05,\n 3.7859e-05, 3.3769e-05, 3.3654e-05, 4.1089e-05, 3.7740e-05, 4.4408e-05,\n 4.6060e-05, 3.1392e-05, 3.5659e-05, 3.1782e-05, 4.7196e-05, 7.4091e-05,\n 2.6564e-05, 3.9652e-05, 2.8976e-05, 3.2447e-05, 3.8755e-05, 3.4832e-05,\n 5.5479e-05, 7.3154e-05, 4.7641e-05, 2.9244e-05, 3.1009e-05, 2.9326e-05,\n 2.3846e-05, 3.8067e-05, 1.9076e-05, 3.5752e-05, 4.1803e-05, 4.1896e-05,\n 2.6994e-05, 3.0408e-05, 2.1844e-05, 3.9983e-05, 1.2116e-05, 3.2167e-05,\n 7.0421e-05, 4.1823e-05, 3.0392e-05, 4.4973e-05, 6.1643e-05, 5.5038e-05,\n 2.3419e-05, 3.7743e-05, 3.2016e-05, 2.8338e-05, 3.3256e-05, 3.6003e-05,\n 3.4249e-05, 7.1349e-05, 3.6505e-05, 2.7839e-05, 3.2660e-05, 1.7308e-05,\n 4.3372e-05, 4.7771e-05, 5.8849e-05, 2.7918e-05, 2.1302e-05, 1.6799e-05,\n 3.0644e-05, 3.5614e-05, 2.4581e-05, 5.3979e-05, 2.4257e-05, 4.2087e-05,\n 8.0116e-05, 2.9937e-05, 4.6392e-05, 2.3040e-05, 3.7270e-05, 3.9417e-05,\n 3.3991e-05, 3.8494e-05, 3.2202e-05, 4.1516e-05, 6.1689e-05, 4.2772e-05,\n 3.2463e-06, 7.6006e-05, 4.7257e-05, 2.4836e-05, 4.5594e-05, 5.3143e-05,\n 4.0529e-05, 5.6010e-06, 4.0809e-05, 5.8284e-05, 4.4442e-05, 1.0819e-04,\n 5.4055e-05, 5.3367e-05, 4.2531e-05, 3.0399e-05, 5.7151e-05, 4.3111e-05,\n 2.5420e-05, 3.2122e-05, 5.1539e-05, 3.2421e-05, 4.9744e-05, 5.4457e-05,\n 7.9338e-05, 2.6393e-05, 3.2135e-05, 4.2351e-05, 3.3412e-05, 3.7806e-05,\n 1.5504e-05, 3.7189e-05, 4.8293e-05, 4.7187e-05, 2.8701e-05, 4.6981e-05,\n 6.0419e-05, 8.2313e-05, 3.0176e-05, 4.0159e-05, 1.8965e-05, 3.0554e-05,\n 3.6376e-05, 6.5126e-05, 3.5536e-05, 2.1464e-05, 8.2617e-05, 5.1182e-05,\n 1.0587e-05, 2.9009e-05, 2.9358e-05, 7.4722e-05, 2.2767e-05, 4.9114e-05,\n 4.1664e-05, 3.3219e-05, 4.9281e-05, 3.6106e-05, 3.0874e-05, 3.4131e-05,\n 4.3116e-05, 7.0187e-05, 2.2127e-05, 6.7530e-05, 2.1617e-05, 5.6814e-05,\n 8.5169e-05, 4.6761e-05, 3.8717e-05, 4.1888e-05, 2.6559e-05, 3.1581e-05,\n 6.5100e-05, 3.3980e-05, 2.5880e-05, 1.9216e-05, 2.7864e-05, 3.8880e-05,\n 2.0679e-05, 2.9762e-05, 6.5688e-05, 2.7317e-05, 3.6022e-05, 4.4627e-06,\n 2.6535e-05, 3.6302e-05, 4.0385e-05, 3.9835e-05, 7.9937e-05, 5.0869e-05,\n 4.1690e-05, 3.7664e-05, 5.0601e-05, 5.4365e-05, 3.1869e-05, 2.7019e-05,\n 3.2860e-05, 5.2029e-05, 2.7657e-05, 2.5872e-05, 3.1178e-05, 2.2790e-05,\n 2.0218e-05, 2.9183e-05, 2.9689e-05, 3.2180e-05, 3.8985e-05, 4.7810e-05,\n 3.5118e-05, 4.2227e-05, 2.8017e-05, 6.8675e-05, 3.0241e-05, 3.2184e-05,\n 3.3247e-05, 2.8637e-05, 3.5614e-05, 1.7443e-05, 1.5771e-05, 4.6998e-05,\n 5.7556e-05, 5.3960e-05, 3.1644e-05, 5.2987e-05, 4.7134e-05, 2.8621e-05,\n 3.3756e-05, 4.0192e-05, 5.4269e-05, 4.1185e-05, 4.0862e-05, 3.4924e-05,\n 3.1408e-05, 3.3850e-05, 3.7507e-05, 8.2925e-05, 2.5823e-05, 5.7697e-05,\n 2.5885e-05, 3.5019e-05, 4.1194e-05, 2.2645e-05, 3.9558e-05, 6.6050e-05,\n 4.0798e-05, 4.4115e-05, 4.0800e-05, 4.0840e-05, 4.4500e-05, 5.1746e-05,\n 4.6822e-05, 4.8416e-05, 1.8897e-05, 8.5614e-05, 4.6424e-05, 4.6017e-05,\n 2.5748e-05, 4.3829e-05, 3.8184e-05, 2.8087e-05, 5.0892e-05, 4.4135e-05,\n 5.0831e-05, 5.2862e-05, 2.7947e-05, 4.4906e-05, 1.2518e-04, 2.6364e-05,\n 4.3057e-05, 3.3701e-05, 3.7998e-05, 3.9847e-05, 4.9116e-05, 4.0573e-05,\n 3.8296e-05, 3.9932e-05, 2.1591e-05, 2.7256e-05, 3.4946e-05, 6.0729e-05,\n 2.9027e-05, 5.6018e-05, 6.3417e-05, 1.9591e-05, 2.6368e-05, 3.9499e-05,\n 5.1451e-05, 2.5577e-05, 3.3685e-05, 5.8088e-05, 2.8822e-05, 5.9485e-05,\n 5.8089e-05, 2.7552e-05, 2.2947e-05, 3.4554e-05, 6.1876e-05, 2.7442e-05,\n 2.0139e-05, 3.2293e-05, 4.9322e-05, 3.3103e-05, 3.2979e-05, 4.0569e-05,\n 1.1442e-04, 3.6962e-05, 3.8044e-05, 2.1297e-05, 6.4847e-05, 3.4182e-05,\n 3.6676e-05, 4.5293e-05, 3.3419e-05, 3.7788e-05, 4.5641e-05, 1.7267e-05,\n 5.1644e-05, 3.4983e-05, 5.2556e-05, 2.9653e-05, 6.1090e-05, 1.8544e-05,\n 3.7645e-05, 4.0450e-05], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(30032.)",
|
| 17 |
+
"exp_avg": "tensor([ 6.6199e-03, 1.0710e-03, 2.7178e-04, 8.1019e-04, 1.0499e-03,\n -6.2358e-04, -2.7068e-03, -2.4824e-04, 2.1372e-03, -1.9423e-03,\n 1.8377e-03, 4.7663e-03, 3.3615e-03, 5.2777e-03, 3.6742e-04,\n -7.9479e-03, 5.5007e-04, 4.9487e-03, -2.2922e-05, -6.0870e-03,\n 5.2302e-03, 1.4799e-03, -4.2429e-03, -3.6549e-03, 2.1021e-03,\n 4.3205e-03, 7.8681e-03, 8.9033e-04, -2.2408e-03, 1.7871e-03,\n -2.0238e-03, 8.3739e-04, 3.3555e-03, -1.1124e-02, 3.2199e-03,\n -5.2349e-03, 9.4953e-04, -3.6186e-04, -3.3989e-04, 1.9918e-03,\n 3.2392e-04, 6.2131e-03, 1.3978e-03, -3.3349e-03, -4.4049e-03,\n -1.0610e-03, 2.0660e-03, 9.3430e-04, 3.6064e-03, -1.7310e-03,\n 4.0215e-03, 7.2764e-03, -3.2337e-03, 6.8654e-03, -1.0752e-02,\n 1.2019e-03, 3.3453e-03, -3.1437e-03, 7.5893e-04, -5.2031e-03,\n -7.2590e-04, -6.9352e-03, 4.7534e-03, 8.5462e-04, 4.3108e-03,\n 1.3717e-03, 3.2301e-03, 2.2582e-03, 2.6047e-04, 6.7339e-04,\n -2.7311e-04, -2.2361e-03, 4.3638e-04, -4.3466e-03, -1.6365e-03,\n 8.0138e-05, 2.1599e-03, 1.0593e-03, -4.0540e-03, 2.4277e-03,\n 2.1383e-03, -8.4879e-04, 5.0467e-04, -4.6855e-05, -3.5688e-03,\n -3.9085e-03, 2.0019e-03, -6.3958e-04, 3.9461e-03, 3.7031e-04,\n -2.4671e-03, -1.6980e-04, -3.9268e-03, -1.7705e-03, -1.0624e-03,\n 3.2799e-03, 1.6480e-03, -3.7582e-03, 2.0416e-03, -7.5112e-04,\n -4.4121e-03, 1.1218e-03, 1.2100e-03, 6.8655e-04, 1.9147e-03,\n -3.4471e-04, -6.8315e-03, 1.4386e-03, 3.3008e-03, -1.7327e-03,\n 5.6084e-03, -4.1534e-03, -1.2098e-02, 7.1510e-03, 4.4852e-04,\n 2.2259e-03, -9.3877e-04, 6.5902e-04, -7.8867e-04, -5.8594e-04,\n 5.3375e-03, 6.3207e-04, -9.7303e-03, 4.3632e-03, 1.6354e-03,\n -1.0339e-04, -1.6585e-03, 4.6208e-03, -2.7615e-03, -1.7678e-03,\n 1.3844e-03, 3.2284e-03, -2.2196e-03, -7.7253e-03, -3.3033e-03,\n -6.1984e-03, 5.0637e-03, -3.0133e-03, 5.5561e-04, 9.3494e-04,\n -2.8223e-03, 1.3211e-03, -4.9075e-04, 8.4292e-04, 3.1915e-03,\n -6.6115e-03, 7.7010e-03, 2.0187e-04, 4.6973e-03, -4.5166e-03,\n -1.1989e-03, -1.5842e-03, 1.6214e-04, 5.2572e-03, -4.9185e-03,\n 3.7486e-03, 4.6837e-04, 2.9156e-03, -2.1420e-03, -9.0477e-04,\n -1.6840e-03, -1.4284e-04, 1.0557e-02, -6.7146e-03, -3.4864e-03,\n -3.8811e-03, 4.3772e-03, 4.3045e-04, 4.8288e-04, 3.0692e-03,\n 5.2452e-03, -1.0726e-03, 3.2086e-03, -2.3396e-03, -4.0832e-04,\n 9.0925e-04, 2.3706e-04, 1.3124e-03, -1.6813e-03, -6.0765e-03,\n -1.1561e-04, -7.4344e-03, 4.4001e-03, 5.5434e-03, -4.0877e-03,\n 1.4133e-03, 8.7637e-04, -6.6298e-04, -1.4631e-02, 3.6995e-03,\n -5.6052e-45, 1.3594e-03, -5.0522e-04, 2.3833e-04, -4.0163e-03,\n -2.7591e-03, 6.3674e-04, -4.0186e-04, -7.0086e-04, 2.6294e-03,\n 2.2587e-03, 2.1621e-03, -2.1999e-03, -5.3210e-03, 8.5888e-04,\n 1.1471e-03, 4.7909e-03, 2.1255e-03, -1.1644e-03, 4.1508e-03,\n 2.0549e-03, -2.2769e-04, 4.7186e-04, 4.0363e-03, 1.1524e-03,\n -1.8473e-03, 6.3971e-03, -3.1802e-03, -1.2014e-03, -3.2005e-03,\n 3.0214e-03, -2.4332e-03, -6.5894e-04, -1.1667e-03, -4.4415e-03,\n 4.9815e-04, -4.0621e-03, -4.2093e-03, -1.1218e-02, 2.0215e-03,\n 3.0188e-03, -2.2370e-03, -2.1334e-03, -3.2805e-03, 2.0214e-03,\n 9.0936e-04, 3.2095e-03, -6.3936e-03, -2.2352e-03, 6.7096e-03,\n 2.0220e-03, 2.7953e-04, 4.3871e-03, -6.3008e-03, -2.3800e-03,\n -3.7843e-03, 3.6340e-03, -1.1288e-03, 5.1682e-05, -4.2390e-04,\n 4.3254e-03, -2.3842e-03, 4.4087e-04, -9.5754e-04, -9.3797e-04,\n -6.6310e-04, 8.4286e-04, -1.1117e-02, 4.5126e-04, 4.2179e-03,\n 3.6591e-03, -4.1457e-03, -5.2797e-03, 1.2469e-04, 5.1401e-05,\n -4.4262e-03, -8.6467e-04, -4.2543e-03, 6.8146e-03, 3.8279e-03,\n -5.5889e-03, 3.4974e-03, -1.1394e-03, 6.3459e-03, -1.8219e-03,\n 1.2981e-03, -3.6652e-05, 1.0312e-03, 4.6149e-03, -3.2797e-03,\n -4.5719e-03, 5.7115e-03, 1.5783e-04, -2.4610e-03, 4.7381e-04,\n 9.1657e-04, 3.9443e-04, -1.9181e-03, 5.6052e-45, 1.7521e-03,\n 1.4046e-03, -4.9883e-03, -2.3849e-04, 2.5232e-03, 4.3999e-04,\n 5.6052e-45, 4.1037e-03, -2.7998e-03, -4.6121e-04, -2.4911e-04,\n -7.9419e-03, -1.5852e-02, 1.0566e-03, 2.5896e-04, 2.7031e-04,\n -2.7654e-03, -7.5775e-03, 1.7877e-03, -2.6926e-03, 3.3924e-03,\n 6.4683e-03, -9.0310e-03, -3.9493e-04, 2.8628e-03, 2.7485e-03,\n -1.8330e-03, 5.9238e-03, -4.7199e-03, 7.6752e-05, 2.8920e-03,\n 5.1964e-04, -3.7424e-03, -2.1420e-03, 3.5281e-03, -8.4832e-03,\n 6.1146e-03, -4.7748e-03, -6.1781e-04, -2.8082e-03, 1.0974e-04,\n 1.9392e-03, -3.4436e-03, -7.7203e-04, 5.2442e-04, -3.3074e-03,\n -2.5102e-03, -1.7195e-03, 1.6276e-03, 2.6476e-03, 3.3043e-03,\n 5.0130e-03, -1.7838e-03, 4.1822e-03, 4.3034e-03, 8.5622e-04,\n 2.5191e-03, 2.3661e-03, -1.1208e-03, 4.4443e-03, -1.0722e-03,\n -4.7046e-04, -4.0700e-03, -3.9193e-03, 5.8100e-04, 1.8937e-02,\n 1.7424e-03, 2.4605e-03, -4.5481e-04, -6.7494e-04, 1.2737e-03,\n 3.9602e-03, 1.8640e-03, 6.3922e-04, -3.1212e-03, -1.8075e-03,\n -8.1986e-04, 2.2854e-03, 8.2082e-04, 4.3570e-03, 2.1369e-03,\n -4.2366e-03, 5.6052e-45, -2.4186e-03, 4.8824e-03, 2.1703e-04,\n -3.5659e-03, -3.3841e-03, 6.6446e-03, 3.0400e-03, 2.1492e-03,\n 2.3974e-03, -6.3204e-04, 8.2934e-04, -2.1701e-04, 4.2369e-04,\n 5.6906e-03, 7.3026e-03, -4.5913e-03, -1.2882e-03, 1.2811e-03,\n -1.4126e-03, -9.0614e-04, 7.1842e-03, 4.5028e-05, -3.4665e-03,\n 3.6117e-03, 2.5484e-03, 1.4763e-03, -9.7412e-04, -3.1613e-03,\n -2.4404e-03, 5.9403e-03, -3.4142e-03, -1.0661e-04, -3.5556e-04,\n -2.0975e-04, -2.7692e-03, -2.4605e-03, 2.8459e-04, -7.2163e-03,\n -2.4680e-03, -8.3999e-04, 5.3675e-03, 9.3668e-03, -7.0366e-03,\n -2.6085e-03, 7.0631e-03, 6.4826e-03, 2.3174e-03, 1.3604e-03,\n 2.8704e-03, 8.0392e-03, -9.2867e-05, 3.9570e-03, -1.5171e-03,\n 3.4501e-03, 3.9984e-04, -4.3948e-03, 4.1777e-03, 1.0186e-04,\n -3.2478e-03, 4.7042e-03, 1.1096e-02, 1.9999e-03, 3.3780e-03,\n -1.2037e-03, 9.1888e-03, -6.1971e-03, 7.6433e-04, -2.3877e-03,\n -3.8223e-03, 3.4805e-03, 2.6356e-03, 3.6718e-03, 1.2473e-03,\n -2.7607e-03, 4.4426e-03, -1.0550e-03, -1.6891e-03, 1.3323e-03,\n 1.1716e-03, 2.7203e-03, -9.3133e-04, -4.0515e-03, -4.8008e-03,\n -2.0996e-03, -8.6329e-05, 7.3614e-05, 1.8899e-03, -2.1987e-04,\n 4.8825e-04, 6.6271e-03, -2.0499e-03, 2.7183e-03, 1.3481e-03,\n 6.5833e-03, 1.3706e-02, 5.1452e-03, 3.1522e-03, 2.2838e-03,\n 8.3258e-03, -1.6169e-05, 6.1072e-03, -1.7251e-03, 2.7425e-03,\n -2.4815e-03, 5.7065e-03, -7.2011e-03, 1.0400e-03, 3.6452e-03,\n -8.3963e-03, -1.4410e-04, 1.6916e-03, -1.2686e-03, -2.4937e-03,\n -1.0372e-03, -1.9464e-03, -7.0577e-03, 9.9757e-05, -7.5937e-03,\n -6.9134e-06, -1.4091e-03, -7.5651e-03, -1.7978e-03, -1.1489e-03,\n -1.3051e-03, -2.9198e-03, 6.5932e-03, -6.2182e-04, -7.3180e-04,\n -6.1267e-03, -3.7212e-04, -3.0686e-03, 3.4818e-03, -8.8690e-03,\n 2.4837e-03, -6.4683e-03, -3.3298e-03, -8.4670e-04, -4.1323e-03,\n -3.2466e-03, -2.5568e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([4.7109e-04, 2.2093e-04, 2.6362e-04, 2.6312e-04, 1.5085e-04, 3.2912e-04,\n 2.5218e-04, 1.8733e-04, 1.2404e-04, 1.5455e-04, 2.7007e-04, 5.1412e-04,\n 1.7055e-04, 2.4544e-04, 3.2785e-04, 3.2941e-04, 1.1156e-04, 4.8844e-04,\n 5.0602e-05, 1.8728e-04, 1.9788e-04, 2.0227e-04, 2.4382e-04, 1.6259e-04,\n 2.7933e-04, 1.6095e-04, 5.1934e-04, 1.4764e-04, 2.2009e-04, 1.1955e-04,\n 2.5287e-04, 2.7476e-04, 1.0073e-04, 5.2573e-04, 1.4730e-04, 1.6741e-04,\n 1.3503e-04, 1.6758e-04, 2.5707e-04, 2.4003e-04, 2.9491e-04, 1.1509e-03,\n 1.1971e-04, 1.5513e-04, 2.4018e-04, 3.9844e-04, 2.9865e-04, 1.2169e-04,\n 1.9211e-04, 1.7860e-04, 1.7028e-04, 4.7588e-04, 3.1035e-04, 2.7205e-04,\n 4.4903e-04, 1.7834e-04, 2.1856e-04, 1.1944e-04, 1.7947e-04, 1.7856e-04,\n 1.5301e-04, 1.1315e-04, 2.3485e-04, 2.1398e-04, 2.7890e-04, 2.6273e-04,\n 3.0458e-04, 3.6264e-04, 1.7632e-04, 2.6366e-04, 1.1884e-04, 2.3264e-04,\n 1.4813e-04, 4.6676e-04, 1.7797e-04, 1.2992e-04, 4.8010e-04, 2.6648e-04,\n 1.9379e-04, 2.1943e-04, 1.5209e-04, 1.3825e-04, 1.4948e-04, 3.7728e-04,\n 1.6456e-04, 3.4445e-04, 2.8166e-04, 9.4881e-05, 2.5637e-04, 3.7841e-04,\n 1.3346e-04, 2.9226e-04, 7.0049e-04, 4.7097e-04, 1.9283e-04, 2.6310e-04,\n 1.3938e-04, 2.3491e-04, 2.8712e-04, 2.6436e-04, 2.4032e-04, 1.2036e-04,\n 1.4358e-04, 2.6365e-04, 7.7338e-05, 1.9122e-04, 1.6610e-04, 1.5104e-04,\n 9.9360e-05, 1.2348e-04, 4.4473e-04, 1.2547e-04, 8.0236e-04, 2.0955e-04,\n 1.1316e-04, 5.8833e-05, 2.9234e-04, 3.1145e-04, 1.2628e-04, 2.0705e-04,\n 1.3840e-04, 9.9973e-05, 3.4273e-04, 4.4335e-04, 2.1995e-04, 2.0641e-04,\n 1.0541e-04, 2.6326e-04, 4.6112e-04, 2.1280e-04, 1.5757e-04, 3.0476e-04,\n 3.2360e-04, 1.4427e-04, 3.3821e-04, 2.4900e-04, 1.1076e-04, 8.6411e-05,\n 4.6460e-04, 2.3398e-04, 1.1736e-04, 7.8465e-05, 1.8877e-04, 7.9554e-04,\n 2.7813e-04, 2.0513e-04, 2.4291e-04, 1.2109e-04, 3.7177e-04, 6.0753e-04,\n 1.1465e-04, 1.5039e-04, 6.0427e-05, 1.2700e-04, 2.7422e-04, 2.8611e-04,\n 1.6016e-04, 1.9554e-04, 3.3084e-04, 1.9159e-04, 1.5378e-04, 2.9259e-04,\n 5.2142e-04, 1.8400e-04, 2.4333e-04, 2.2562e-04, 3.4029e-04, 1.0560e-04,\n 2.0266e-04, 4.5493e-04, 2.2006e-04, 2.0035e-04, 1.3658e-04, 1.8423e-04,\n 2.6471e-04, 1.5258e-04, 1.4401e-04, 9.4920e-05, 9.9653e-05, 3.2906e-04,\n 1.8409e-04, 2.8736e-04, 3.7703e-04, 2.3740e-04, 1.1329e-04, 2.2170e-04,\n 4.0676e-04, 2.3696e-04, 2.8092e-04, 1.3922e-04, 4.0923e-16, 1.8938e-04,\n 1.1225e-04, 9.6961e-05, 1.5403e-04, 1.4529e-04, 1.3313e-04, 1.1081e-04,\n 1.3457e-04, 1.7155e-04, 1.4501e-04, 1.8811e-04, 2.4804e-04, 2.0227e-04,\n 1.2273e-04, 3.3312e-04, 1.7892e-04, 9.6188e-05, 1.5414e-04, 1.2521e-04,\n 2.5780e-04, 2.6688e-04, 2.2484e-04, 1.9881e-04, 2.7940e-05, 2.9580e-04,\n 1.4115e-04, 1.3909e-04, 2.3385e-04, 2.7035e-04, 3.3301e-04, 3.6867e-04,\n 1.6779e-04, 8.7281e-05, 9.7460e-05, 1.3464e-04, 1.6908e-04, 9.4397e-05,\n 4.6962e-04, 1.8339e-04, 2.4657e-04, 2.0645e-04, 1.8059e-04, 2.9300e-04,\n 1.8365e-04, 1.8137e-04, 1.8829e-04, 2.5688e-04, 1.5207e-04, 2.3284e-04,\n 1.2193e-04, 1.3312e-04, 2.0671e-04, 3.6204e-04, 1.2682e-04, 2.3067e-04,\n 1.7929e-04, 1.0126e-04, 1.4469e-04, 2.5346e-04, 2.0702e-04, 2.0427e-04,\n 1.3743e-04, 8.8087e-05, 8.5159e-05, 1.6102e-04, 1.5184e-04, 2.6056e-04,\n 1.7152e-04, 3.1223e-04, 4.2016e-04, 1.2368e-04, 1.9963e-04, 1.0848e-04,\n 2.6067e-04, 4.4037e-04, 8.6402e-04, 1.5204e-04, 1.7532e-04, 1.3270e-04,\n 1.6085e-04, 1.7565e-04, 1.0049e-04, 2.8590e-04, 1.6881e-04, 2.3519e-04,\n 3.8647e-04, 8.2212e-05, 1.7101e-04, 1.8164e-04, 1.2957e-04, 1.4645e-04,\n 2.0254e-04, 1.7683e-04, 1.8255e-04, 2.3976e-04, 1.9491e-04, 1.7912e-04,\n 2.5858e-19, 3.1667e-04, 1.4696e-04, 1.3571e-04, 2.7745e-04, 2.3949e-04,\n 2.5901e-04, 2.5351e-18, 3.1214e-04, 2.0961e-04, 2.6194e-04, 5.5673e-04,\n 4.1204e-04, 3.7161e-04, 1.6567e-04, 1.1258e-04, 2.6035e-04, 2.2000e-04,\n 2.0622e-04, 2.9739e-04, 4.1245e-04, 1.9704e-04, 1.8424e-04, 3.6042e-04,\n 4.4839e-04, 6.1942e-05, 7.6088e-05, 2.5839e-04, 1.3164e-04, 2.0466e-04,\n 5.0556e-05, 1.9568e-04, 1.5870e-04, 4.4955e-04, 9.2947e-05, 2.0816e-04,\n 2.0796e-04, 4.9622e-04, 1.4558e-04, 2.1941e-04, 1.5536e-04, 2.3810e-04,\n 3.0976e-04, 1.4003e-04, 1.3804e-04, 1.3349e-04, 3.7804e-04, 2.5280e-04,\n 1.1682e-04, 1.0336e-04, 2.1862e-04, 4.7806e-04, 1.5162e-04, 1.4015e-04,\n 1.8014e-04, 3.3438e-04, 2.3130e-04, 2.7788e-04, 8.9085e-05, 9.3574e-05,\n 1.8133e-04, 2.2195e-04, 1.1945e-04, 5.5864e-04, 9.6183e-05, 2.3197e-04,\n 9.3299e-04, 2.4918e-04, 2.1293e-04, 2.2592e-04, 1.5383e-04, 9.7161e-05,\n 4.4528e-04, 7.8784e-05, 1.5331e-04, 1.9761e-04, 1.0892e-04, 1.3223e-04,\n 1.8588e-04, 1.7524e-04, 2.8001e-04, 1.8597e-04, 1.6225e-04, 5.3305e-19,\n 2.6776e-04, 1.3138e-04, 1.5217e-04, 1.5262e-04, 3.1920e-04, 1.8125e-04,\n 2.3305e-04, 1.6645e-04, 2.0547e-04, 2.6547e-04, 2.3983e-04, 2.2890e-04,\n 1.3998e-04, 4.0904e-04, 2.2867e-04, 1.5274e-04, 1.9599e-04, 1.1314e-04,\n 1.8535e-04, 9.2325e-05, 1.3380e-04, 3.0623e-04, 2.4284e-04, 3.0703e-04,\n 1.2808e-04, 3.4976e-04, 1.6162e-04, 2.9844e-04, 1.2389e-04, 1.6813e-04,\n 1.3281e-04, 1.5366e-04, 3.4331e-04, 6.3745e-05, 6.4459e-05, 2.6816e-04,\n 2.0864e-04, 2.4778e-04, 1.4608e-04, 1.7335e-04, 2.8940e-04, 5.4966e-04,\n 1.1928e-04, 1.8952e-04, 3.0067e-04, 3.5028e-04, 4.2314e-04, 1.4020e-04,\n 1.9178e-04, 1.0709e-04, 4.5356e-04, 1.9775e-04, 1.6011e-04, 1.3387e-04,\n 1.3125e-04, 1.7420e-04, 2.0069e-04, 1.0454e-04, 1.6108e-04, 3.4079e-04,\n 2.8957e-04, 2.3130e-04, 2.2319e-04, 3.2552e-04, 4.3564e-04, 3.4298e-04,\n 3.0356e-04, 2.3417e-04, 2.1025e-04, 4.1145e-04, 1.5853e-04, 2.1980e-04,\n 1.8321e-04, 2.0901e-04, 2.8245e-04, 1.8662e-04, 2.6783e-04, 1.5923e-04,\n 1.1056e-04, 2.8797e-04, 2.2845e-04, 4.0581e-04, 5.6571e-04, 1.2745e-04,\n 4.6977e-04, 1.4583e-04, 1.2662e-04, 1.5774e-04, 1.6687e-04, 1.7564e-04,\n 1.5060e-04, 3.1487e-04, 1.5210e-04, 1.2805e-04, 3.4357e-04, 2.7378e-04,\n 1.3599e-04, 2.5615e-04, 3.2677e-04, 1.5359e-04, 1.3774e-04, 2.1686e-04,\n 1.7902e-04, 1.7622e-04, 1.9091e-04, 4.1362e-04, 1.0410e-04, 2.0014e-04,\n 3.9060e-04, 2.1141e-04, 4.8093e-04, 1.8110e-04, 3.7452e-04, 8.8334e-05,\n 8.9684e-05, 1.9162e-04, 2.5195e-04, 2.4921e-04, 3.3120e-04, 9.1958e-05,\n 4.9986e-04, 1.2860e-04, 1.6798e-04, 1.3691e-04, 4.0981e-04, 2.4095e-04,\n 1.9320e-04, 1.6372e-04, 3.6771e-04, 1.8588e-04, 3.2245e-04, 1.6629e-04,\n 4.5038e-04, 1.7740e-04, 2.5415e-04, 2.4096e-04, 1.2609e-04, 6.1853e-04,\n 1.8594e-04, 1.9563e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(30032.)",
|
| 22 |
+
"exp_avg": "tensor([ 2.4982e-03, 6.1627e-04, -2.0623e-04, 4.6319e-04, 2.4867e-04,\n -7.8548e-05, -2.4533e-03, -2.8443e-04, 1.0478e-03, -1.0875e-03,\n 1.2429e-03, 2.2388e-03, 1.5269e-03, 2.8070e-03, 3.2221e-04,\n -3.2556e-03, 4.4403e-04, 2.5241e-03, 2.2758e-04, -3.1393e-03,\n 2.4765e-03, 1.1208e-03, -2.0707e-03, -1.7918e-03, 1.0907e-03,\n 2.2800e-03, 3.7425e-03, 8.1427e-04, -1.0898e-03, 1.2257e-03,\n -1.0072e-03, 3.5334e-04, 2.0242e-03, -4.9630e-03, 1.7341e-03,\n -2.6811e-03, 7.4268e-04, -4.9028e-04, -5.4526e-04, 7.9143e-04,\n -1.3506e-04, 2.1725e-03, 3.6059e-04, -1.4391e-03, -2.6537e-03,\n -5.4395e-04, 6.8582e-04, 3.2958e-04, 1.5946e-03, -5.6508e-04,\n 1.7365e-03, 2.5033e-03, -1.7332e-03, 3.0432e-03, -4.2925e-03,\n 7.3234e-04, 1.6544e-03, -1.3144e-03, 5.4879e-04, -2.7806e-03,\n -5.0591e-04, -3.9376e-03, 1.8631e-03, 5.3458e-04, 2.5249e-03,\n 8.5545e-04, 1.2669e-03, 1.5020e-03, 4.4110e-04, 3.6616e-04,\n -1.0035e-04, -1.2282e-03, -2.3762e-04, -1.8081e-03, -9.0377e-04,\n 2.4254e-04, 2.0077e-03, 1.7643e-04, -2.1047e-03, 1.1335e-03,\n 7.3750e-04, -3.4442e-05, 2.4501e-04, -4.8968e-04, -2.3617e-03,\n -1.9339e-03, 1.9197e-03, -1.5730e-04, 1.9833e-03, 4.1916e-04,\n -1.0632e-03, 5.7697e-06, -1.1194e-03, -6.8892e-04, -6.9378e-04,\n 1.4646e-03, 1.0549e-03, -2.1478e-03, 5.6616e-05, -3.3749e-04,\n -2.0361e-03, 7.4163e-04, 1.2121e-04, 2.1185e-04, 1.6620e-03,\n -9.1678e-04, -2.6115e-03, 4.1310e-04, 1.9683e-03, -8.2291e-04,\n 2.3927e-03, -1.9902e-03, -5.7097e-03, 3.0174e-03, 4.6089e-04,\n 1.0059e-03, -9.9067e-04, 7.7875e-04, -2.2949e-04, -9.1248e-05,\n 3.0152e-03, 5.4272e-04, -4.9020e-03, 2.0032e-03, 1.0461e-03,\n -4.1666e-05, -8.4221e-04, 1.4540e-03, -9.6638e-04, -7.1537e-04,\n 1.1346e-03, 2.0291e-03, -8.3433e-04, -4.3391e-03, -1.1577e-03,\n -3.4552e-03, 2.7424e-03, -1.7260e-03, 7.6552e-04, 6.4110e-04,\n -1.5675e-03, 6.5970e-04, -8.8549e-05, 2.9128e-04, 1.1473e-03,\n -3.6375e-03, 3.0458e-03, 1.0704e-04, 2.3846e-03, -2.0319e-03,\n -9.8750e-04, -7.5543e-04, -1.6698e-04, 3.5754e-03, -1.8488e-03,\n 2.0113e-03, -9.8259e-05, 1.2932e-03, -6.0312e-04, -1.7176e-04,\n -1.1399e-03, -2.7017e-04, 4.0758e-03, -3.4397e-03, -1.8174e-03,\n -1.6386e-03, 1.9730e-03, -3.0894e-06, 1.6316e-04, 1.3561e-03,\n 2.4821e-03, -6.1883e-04, 1.4316e-03, -1.3367e-03, -3.5309e-04,\n 3.6383e-04, 2.5708e-04, 9.8254e-04, -4.3477e-04, -3.0385e-03,\n -4.3206e-04, -3.2717e-03, 2.9222e-03, 1.7114e-03, -1.6164e-03,\n 7.3902e-04, -1.9078e-04, -2.8926e-05, -7.6049e-03, 1.8416e-03,\n 5.6052e-45, 9.2444e-04, 9.7324e-05, 4.9180e-04, -2.3111e-03,\n -1.2096e-03, 3.7365e-04, -4.0892e-04, -4.6250e-04, 1.1733e-03,\n 6.5122e-04, 9.5088e-04, -1.2510e-03, -1.6539e-03, -8.3123e-05,\n 5.8481e-04, 2.3695e-03, 1.5214e-03, -2.8716e-04, 2.2981e-03,\n 1.1871e-03, 2.7129e-04, 4.7994e-04, 2.5834e-03, 8.1966e-04,\n -7.4539e-04, 3.1962e-03, -1.4195e-03, -2.7389e-04, -1.1510e-03,\n 1.2109e-03, -1.1306e-03, -2.5233e-04, -3.1354e-04, -1.9947e-03,\n 4.9727e-04, -1.9093e-03, -2.2682e-03, -4.3358e-03, 1.6595e-03,\n 1.8671e-03, -8.4950e-04, -9.5894e-04, -2.0069e-03, 9.8908e-04,\n 1.7114e-04, 1.5307e-03, -2.5780e-03, -1.0993e-03, 3.1895e-03,\n 6.7026e-04, -2.2820e-05, 1.5161e-03, -3.0588e-03, -9.0776e-04,\n -1.3186e-03, 2.2185e-03, -3.8845e-04, 1.7326e-04, -2.8349e-05,\n 2.2409e-03, -1.4380e-03, 2.8191e-04, -4.3911e-04, -3.4090e-05,\n -2.3670e-04, 6.4409e-04, -5.4245e-03, 6.0748e-04, 1.7225e-03,\n 1.7128e-03, -2.1608e-03, -2.8759e-03, 3.0396e-05, 1.5738e-04,\n -1.7690e-03, 1.3743e-04, -2.0805e-03, 2.7782e-03, 1.8223e-03,\n -2.6795e-03, 2.1265e-03, -2.1989e-04, 3.5706e-03, -6.1191e-04,\n 7.0007e-04, -8.5295e-05, 7.1750e-04, 2.4690e-03, -1.1411e-03,\n -2.1833e-03, 2.7619e-03, 1.5926e-04, -1.0331e-03, 1.9374e-04,\n 3.2707e-04, 4.0895e-04, -1.0277e-03, 5.6052e-45, 1.2451e-03,\n 5.6116e-04, -2.7091e-03, -1.8142e-05, 1.5254e-03, -4.6367e-05,\n 5.6052e-45, 1.9645e-03, -1.6838e-03, -1.0888e-05, 2.5386e-04,\n -3.3553e-03, -6.3843e-03, 7.5821e-04, 2.6211e-04, 3.0299e-04,\n -9.9416e-04, -2.9413e-03, 2.7383e-04, -1.1533e-03, 1.4223e-03,\n 3.0780e-03, -4.8914e-03, 3.9278e-04, 1.7757e-03, 1.6893e-03,\n -6.5273e-04, 2.7004e-03, -2.0889e-03, 1.0186e-04, 1.5475e-03,\n 1.2969e-03, -6.5610e-04, -5.6445e-04, 2.4274e-03, -4.4692e-03,\n 3.2858e-03, -2.8923e-03, -3.5945e-04, -1.4664e-03, -1.1006e-04,\n 4.4456e-04, -2.4121e-03, -3.6479e-04, 4.4057e-04, -1.8531e-03,\n -1.1181e-03, -5.0811e-04, 1.0630e-03, 9.9768e-04, 2.2280e-03,\n 1.9504e-03, -8.5201e-04, 2.0583e-03, 1.7916e-03, 4.4320e-04,\n 8.1611e-04, 1.4317e-03, -3.1536e-04, 2.2805e-03, -1.2392e-04,\n -2.9487e-04, -2.1170e-03, -1.6208e-03, 2.0504e-05, 7.5356e-03,\n 2.0455e-04, 1.3925e-03, -3.0898e-04, -3.1238e-04, 8.4356e-04,\n 2.1281e-03, 1.0628e-03, 5.8374e-04, -1.2129e-03, -4.1417e-04,\n -8.0890e-04, 1.0989e-03, 6.3754e-04, 2.0528e-03, 2.5512e-04,\n -2.1619e-03, 5.6052e-45, -7.8101e-04, 2.0268e-03, 2.4844e-04,\n -1.6296e-03, -8.5934e-04, 3.3705e-03, 1.8400e-03, 1.1330e-03,\n 1.0875e-03, -3.8178e-04, 3.2032e-04, -9.6089e-04, 6.0526e-04,\n 2.2142e-03, 2.7609e-03, -1.9053e-03, -5.0529e-04, 6.3960e-04,\n -1.7793e-04, -3.4428e-04, 4.3345e-03, 2.5965e-04, -2.1553e-03,\n 1.2954e-03, 1.4051e-03, 4.8943e-04, -4.8353e-04, -1.5182e-03,\n -1.3294e-03, 2.6344e-03, -1.7920e-03, -2.0415e-04, 1.4929e-05,\n -1.0097e-04, -1.2418e-03, -1.1801e-03, 5.7275e-05, -3.3535e-03,\n -6.6787e-04, 7.0758e-05, 1.9615e-03, 3.4368e-03, -3.3271e-03,\n -1.2832e-03, 3.3814e-03, 3.3099e-03, 1.3104e-03, 7.4327e-04,\n 1.4344e-03, 4.1439e-03, 2.0972e-04, 2.0924e-03, -4.6420e-04,\n 1.9899e-03, -2.0843e-04, -2.3399e-03, 2.4674e-03, 2.0207e-04,\n -1.4421e-03, 2.0712e-03, 5.4760e-03, 1.0074e-03, 1.3758e-03,\n -4.7859e-04, 3.7511e-03, -3.3669e-03, 2.5747e-04, -1.2989e-03,\n -1.3335e-03, 1.9269e-03, 1.7283e-03, 2.6138e-03, 2.7811e-04,\n -1.4233e-03, 2.0522e-03, -6.6500e-06, -8.4773e-04, 8.8442e-04,\n 8.6226e-04, 1.7162e-03, -5.1240e-04, -1.4325e-03, -2.0460e-03,\n -1.2909e-03, 4.9261e-04, -2.2012e-04, 1.0991e-03, 4.7020e-05,\n -3.0462e-04, 3.1847e-03, -9.0657e-04, 1.1424e-03, 7.4633e-04,\n 3.2753e-03, 5.7089e-03, 2.4264e-03, 2.0378e-03, 1.1970e-03,\n 3.4972e-03, -2.0251e-04, 3.1509e-03, -1.0577e-03, 1.8148e-03,\n -8.7868e-04, 2.4793e-03, -3.2938e-03, 6.5667e-04, 2.2712e-03,\n -3.1748e-03, -1.3194e-04, 3.7297e-04, -6.6585e-05, -1.1274e-03,\n -3.4844e-04, -5.1486e-04, -2.8607e-03, -2.4809e-04, -3.1566e-03,\n -3.0066e-04, -6.3515e-04, -3.7345e-03, -9.3692e-04, -2.4081e-04,\n -2.8334e-04, -1.5559e-03, 2.8577e-03, -4.7350e-04, -5.7351e-07,\n -2.2945e-03, -1.0790e-03, -1.2445e-03, 1.3256e-03, -3.6768e-03,\n 1.2418e-03, -3.5852e-03, -1.4151e-03, -5.6570e-04, -1.0806e-03,\n -1.6784e-03, -1.6616e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.0376e-05, 7.1728e-05, 9.5333e-05, 6.0174e-05, 3.8539e-05, 9.7718e-05,\n 7.5142e-05, 3.2808e-05, 3.3726e-05, 4.0789e-05, 7.1195e-05, 9.8797e-05,\n 3.9697e-05, 6.6109e-05, 9.7494e-05, 5.0021e-05, 4.5596e-05, 1.3984e-04,\n 1.3673e-05, 5.8114e-05, 3.8747e-05, 5.1195e-05, 6.6296e-05, 4.4302e-05,\n 4.1252e-05, 3.9602e-05, 1.0518e-04, 5.1835e-05, 5.4242e-05, 3.4141e-05,\n 4.9822e-05, 6.2666e-05, 2.6503e-05, 1.1811e-04, 5.0035e-05, 3.9446e-05,\n 2.9303e-05, 5.6907e-05, 8.9297e-05, 5.5074e-05, 6.1021e-05, 1.4663e-04,\n 3.9620e-05, 3.3948e-05, 6.9976e-05, 7.4541e-05, 6.5029e-05, 2.6413e-05,\n 3.6450e-05, 3.3911e-05, 3.6440e-05, 5.8981e-05, 1.1776e-04, 5.5238e-05,\n 7.2967e-05, 3.8512e-05, 5.6501e-05, 3.4272e-05, 5.3984e-05, 4.7784e-05,\n 2.8358e-05, 3.0913e-05, 3.9983e-05, 6.5864e-05, 8.9931e-05, 8.0916e-05,\n 6.1634e-05, 7.8414e-05, 4.9296e-05, 5.3522e-05, 3.1255e-05, 6.7282e-05,\n 4.1424e-05, 1.1906e-04, 4.6387e-05, 3.6667e-05, 1.4613e-04, 3.8493e-05,\n 4.4908e-05, 5.2044e-05, 4.7385e-05, 3.1631e-05, 3.8442e-05, 6.8104e-05,\n 4.9852e-05, 6.0380e-05, 1.0005e-04, 2.9549e-05, 6.9930e-05, 6.6264e-05,\n 4.6222e-05, 4.1396e-05, 1.4453e-04, 1.4739e-04, 4.5788e-05, 5.0349e-05,\n 3.5117e-05, 6.3773e-05, 7.4198e-05, 4.8438e-05, 5.0239e-05, 4.1138e-05,\n 3.1568e-05, 5.9850e-05, 3.2785e-05, 4.6357e-05, 3.6760e-05, 2.8237e-05,\n 4.1272e-05, 3.2321e-05, 1.1454e-04, 3.3177e-05, 1.4163e-04, 4.1757e-05,\n 3.5901e-05, 1.9623e-05, 7.6125e-05, 1.0762e-04, 3.9342e-05, 5.9851e-05,\n 4.3971e-05, 3.1735e-05, 8.0484e-05, 8.7452e-05, 4.7699e-05, 6.0476e-05,\n 3.3952e-05, 7.3850e-05, 8.0492e-05, 6.6151e-05, 4.8504e-05, 9.9252e-05,\n 6.1609e-05, 3.4219e-05, 9.3445e-05, 4.1496e-05, 3.1409e-05, 2.6970e-05,\n 1.2910e-04, 6.3956e-05, 3.2708e-05, 3.8699e-05, 4.7377e-05, 1.8376e-04,\n 4.7112e-05, 6.2851e-05, 4.9987e-05, 3.0398e-05, 1.1013e-04, 1.1071e-04,\n 4.6781e-05, 4.1165e-05, 3.4726e-05, 5.3266e-05, 4.8080e-05, 6.6510e-05,\n 3.6069e-05, 3.7018e-05, 7.1001e-05, 3.7586e-05, 5.1466e-05, 5.2434e-05,\n 8.7984e-05, 4.7424e-05, 6.0408e-05, 5.7612e-05, 1.0178e-04, 3.3009e-05,\n 5.6878e-05, 1.3950e-04, 5.3703e-05, 6.4439e-05, 3.0233e-05, 4.6023e-05,\n 3.9731e-05, 2.9705e-05, 3.4718e-05, 3.8967e-05, 3.5053e-05, 7.5905e-05,\n 4.1485e-05, 6.7750e-05, 1.3148e-04, 4.2246e-05, 1.8523e-05, 6.7319e-05,\n 7.7889e-05, 4.6590e-05, 7.8364e-05, 4.8061e-05, 7.0872e-18, 4.8478e-05,\n 2.4035e-05, 2.2341e-05, 4.6221e-05, 4.2179e-05, 3.1141e-05, 3.2018e-05,\n 3.4216e-05, 4.0334e-05, 4.3283e-05, 4.6755e-05, 4.1867e-05, 2.8593e-05,\n 4.0862e-05, 1.2054e-04, 3.9102e-05, 3.1350e-05, 3.5172e-05, 3.9075e-05,\n 6.4315e-05, 5.4912e-05, 5.1094e-05, 4.9778e-05, 2.1044e-05, 7.8886e-05,\n 4.7181e-05, 2.9331e-05, 5.5740e-05, 5.5866e-05, 6.1354e-05, 9.8324e-05,\n 3.3840e-05, 3.4283e-05, 2.9450e-05, 4.0684e-05, 4.7147e-05, 3.0553e-05,\n 9.4413e-05, 8.7464e-05, 7.3362e-05, 5.1716e-05, 5.0919e-05, 4.7787e-05,\n 3.7585e-05, 5.6162e-05, 3.2437e-05, 4.8082e-05, 5.0663e-05, 5.3159e-05,\n 2.7703e-05, 3.6606e-05, 3.8122e-05, 7.2178e-05, 1.9528e-05, 6.1760e-05,\n 6.0279e-05, 4.8416e-05, 3.4984e-05, 5.4647e-05, 7.3942e-05, 6.9402e-05,\n 3.2740e-05, 3.0025e-05, 3.0062e-05, 4.0601e-05, 4.4374e-05, 6.0543e-05,\n 4.9378e-05, 8.8444e-05, 6.8945e-05, 3.6634e-05, 4.7565e-05, 2.6303e-05,\n 5.5715e-05, 7.3328e-05, 1.3593e-04, 3.5087e-05, 3.1946e-05, 2.8797e-05,\n 3.3753e-05, 4.9577e-05, 2.8368e-05, 8.1395e-05, 3.4054e-05, 5.9789e-05,\n 1.0975e-04, 3.4906e-05, 5.6095e-05, 4.5413e-05, 3.7618e-05, 4.2360e-05,\n 5.5448e-05, 4.4521e-05, 4.7499e-05, 6.9713e-05, 5.1930e-05, 6.7445e-05,\n 2.1565e-20, 1.0762e-04, 4.3017e-05, 3.1984e-05, 7.7387e-05, 5.9358e-05,\n 6.3937e-05, 1.1344e-19, 7.1030e-05, 8.4444e-05, 5.1504e-05, 1.6636e-04,\n 6.9758e-05, 6.7380e-05, 5.6007e-05, 2.8680e-05, 7.8857e-05, 5.4014e-05,\n 3.8850e-05, 4.5277e-05, 7.9366e-05, 4.2565e-05, 4.7989e-05, 9.0458e-05,\n 1.1320e-04, 2.9794e-05, 3.5628e-05, 6.3961e-05, 3.0245e-05, 5.0738e-05,\n 1.4215e-05, 4.5048e-05, 5.2381e-05, 9.1992e-05, 3.1094e-05, 6.4513e-05,\n 6.1642e-05, 1.4829e-04, 4.5499e-05, 5.1046e-05, 3.1406e-05, 4.1578e-05,\n 6.7119e-05, 6.2354e-05, 3.3499e-05, 2.4009e-05, 1.0182e-04, 7.2898e-05,\n 1.5148e-05, 3.2723e-05, 4.1826e-05, 1.3790e-04, 3.1824e-05, 4.1896e-05,\n 4.9452e-05, 4.9499e-05, 5.8153e-05, 6.5699e-05, 2.9400e-05, 3.3417e-05,\n 4.8017e-05, 6.2833e-05, 2.6163e-05, 1.2570e-04, 2.7998e-05, 6.2619e-05,\n 1.4370e-04, 6.0676e-05, 4.1301e-05, 7.4978e-05, 3.5596e-05, 2.7489e-05,\n 1.2750e-04, 3.1342e-05, 3.6149e-05, 3.2866e-05, 3.3653e-05, 5.5877e-05,\n 2.9980e-05, 4.6728e-05, 8.3113e-05, 4.5702e-05, 4.6902e-05, 2.5816e-20,\n 4.4311e-05, 2.6801e-05, 3.9933e-05, 4.5228e-05, 9.7694e-05, 4.8613e-05,\n 5.8356e-05, 3.9464e-05, 5.1654e-05, 6.7303e-05, 6.5534e-05, 6.1240e-05,\n 4.5092e-05, 1.0042e-04, 4.3991e-05, 2.9004e-05, 4.9550e-05, 2.8893e-05,\n 4.0645e-05, 2.8680e-05, 4.1346e-05, 7.2684e-05, 6.6276e-05, 6.1755e-05,\n 3.8473e-05, 7.7372e-05, 4.2332e-05, 7.1335e-05, 3.3846e-05, 3.8814e-05,\n 3.4068e-05, 3.6524e-05, 5.4693e-05, 1.5466e-05, 1.7108e-05, 5.9837e-05,\n 6.0860e-05, 6.2173e-05, 3.5886e-05, 6.3564e-05, 5.3786e-05, 7.7229e-05,\n 3.0400e-05, 3.9616e-05, 7.3895e-05, 7.3692e-05, 1.2883e-04, 3.8383e-05,\n 3.6862e-05, 3.2574e-05, 8.3239e-05, 7.2363e-05, 3.7617e-05, 6.1062e-05,\n 3.7646e-05, 5.3784e-05, 5.4069e-05, 2.6429e-05, 4.0972e-05, 9.5671e-05,\n 6.8802e-05, 6.1131e-05, 5.2416e-05, 6.1094e-05, 9.2584e-05, 8.1326e-05,\n 7.2858e-05, 6.5159e-05, 4.3252e-05, 9.8420e-05, 6.2254e-05, 6.0854e-05,\n 3.0432e-05, 5.0285e-05, 5.8543e-05, 3.4855e-05, 7.7990e-05, 5.1016e-05,\n 4.3724e-05, 7.2350e-05, 4.3359e-05, 9.2313e-05, 1.6087e-04, 3.6972e-05,\n 6.5769e-05, 2.8484e-05, 4.1275e-05, 4.1712e-05, 5.6537e-05, 3.8305e-05,\n 4.6921e-05, 5.9158e-05, 2.5190e-05, 3.5123e-05, 5.7678e-05, 6.3332e-05,\n 3.9649e-05, 7.1348e-05, 8.3103e-05, 3.4155e-05, 3.5187e-05, 5.6153e-05,\n 6.7337e-05, 4.0753e-05, 4.4714e-05, 8.4480e-05, 3.0729e-05, 6.4894e-05,\n 6.3037e-05, 4.2886e-05, 5.5117e-05, 4.6614e-05, 1.0259e-04, 3.3228e-05,\n 1.9311e-05, 4.4978e-05, 5.2134e-05, 3.7496e-05, 6.3260e-05, 2.7371e-05,\n 1.2201e-04, 3.6647e-05, 4.8711e-05, 3.5466e-05, 9.3131e-05, 4.9191e-05,\n 4.5615e-05, 4.7891e-05, 6.4475e-05, 4.9266e-05, 5.5734e-05, 2.7344e-05,\n 7.9881e-05, 4.2728e-05, 5.9071e-05, 3.9969e-05, 5.1264e-05, 4.7543e-05,\n 5.1607e-05, 5.0523e-05], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(30032.)",
|
| 27 |
+
"exp_avg": "tensor([[-2.0789e-06, -5.3879e-06, -6.8699e-06, ..., 2.3091e-06,\n -3.6172e-06, 5.7089e-06],\n [ 3.7351e-07, 3.8695e-06, -1.5964e-06, ..., -3.7311e-08,\n 2.0191e-05, 4.4751e-06],\n [ 4.9546e-06, 6.6000e-06, -1.0607e-05, ..., 4.1582e-06,\n -4.3951e-06, 1.3560e-05],\n ...,\n [ 7.2913e-06, 1.0294e-05, -1.5797e-05, ..., 2.2576e-06,\n 1.7060e-05, -1.8540e-05],\n [ 5.1995e-06, -2.0595e-05, -1.5364e-05, ..., 4.7798e-06,\n -8.6481e-06, -2.9143e-06],\n [ 1.3168e-05, 9.7069e-06, 2.0731e-06, ..., 6.9789e-06,\n -1.0090e-05, 1.9781e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[6.7872e-10, 1.9010e-09, 5.0240e-10, ..., 4.9454e-10, 8.8252e-10,\n 1.2385e-09],\n [1.0476e-09, 1.4444e-09, 2.0272e-09, ..., 1.0033e-09, 2.4204e-09,\n 1.9980e-09],\n [1.1085e-09, 1.7872e-09, 1.8585e-09, ..., 9.1953e-10, 2.0559e-09,\n 1.7236e-09],\n ...,\n [1.5790e-09, 2.8993e-09, 1.4283e-09, ..., 5.9930e-10, 3.5616e-09,\n 2.3048e-09],\n [9.1421e-10, 2.8500e-09, 2.2147e-09, ..., 8.8820e-10, 1.7611e-09,\n 1.9334e-09],\n [2.9775e-09, 2.8052e-09, 1.5273e-09, ..., 8.3597e-10, 1.8154e-09,\n 2.2910e-09]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(30032.)",
|
| 32 |
+
"exp_avg": "tensor([[-3.5115e-06, 3.4964e-06, -2.2963e-06, ..., -4.1247e-06,\n 1.0792e-06, -8.8067e-07],\n [ 1.1748e-06, 7.8915e-06, 1.8904e-06, ..., 4.0855e-06,\n 1.0777e-05, 9.0819e-07],\n [ 9.2108e-06, 2.3984e-06, -6.0366e-06, ..., 1.5464e-05,\n -2.2737e-05, 3.9211e-06],\n ...,\n [ 2.4366e-06, -8.2600e-06, -9.4242e-06, ..., 1.4360e-06,\n 1.1118e-05, -4.2899e-06],\n [ 2.0901e-06, 1.1320e-05, -7.3478e-06, ..., -3.6016e-06,\n 5.6853e-06, -1.3643e-06],\n [ 9.2954e-06, 2.6024e-05, 3.4454e-06, ..., 1.0583e-05,\n 3.3111e-06, -3.6401e-06]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[4.1817e-10, 1.1208e-09, 4.7565e-10, ..., 2.3288e-10, 4.2342e-10,\n 8.6795e-10],\n [1.0600e-09, 2.4227e-09, 7.5300e-10, ..., 7.2518e-10, 2.2157e-09,\n 1.4975e-09],\n [8.0725e-10, 1.1938e-09, 1.0953e-09, ..., 8.4497e-10, 1.8122e-09,\n 1.5325e-09],\n ...,\n [1.0182e-09, 1.1246e-09, 1.9306e-09, ..., 4.5097e-10, 3.5353e-09,\n 1.2599e-09],\n [1.3674e-09, 2.0845e-09, 2.1645e-09, ..., 9.8524e-10, 8.5484e-10,\n 1.1723e-09],\n [7.4560e-10, 2.5175e-09, 9.2982e-10, ..., 5.8797e-10, 7.4371e-10,\n 1.6503e-09]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(30032.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0004, 0.0004], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([3.3157e-06, 3.3157e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.0009558195366224509,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.0009558195366224509,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.0009558195366224509,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.00047836202255981916,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 8,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 8,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.0009558195366224509,
|
| 149 |
+
0.0009558195366224509,
|
| 150 |
+
0.0009558195366224509,
|
| 151 |
+
0.00047836202255981916
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 66.5,
|
| 156 |
+
"best_epoch": 7,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 65.83333333333333,
|
| 159 |
+
"512": 66.44466666666666
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6,
|
| 169 |
+
7,
|
| 170 |
+
8
|
| 171 |
+
],
|
| 172 |
+
"train_loss": [
|
| 173 |
+
5.311051666323785,
|
| 174 |
+
4.462767010682684,
|
| 175 |
+
4.340839946911445,
|
| 176 |
+
4.262519323832187,
|
| 177 |
+
4.204208532545754,
|
| 178 |
+
4.159249462977202,
|
| 179 |
+
4.114808493176228,
|
| 180 |
+
4.081520883974777
|
| 181 |
+
],
|
| 182 |
+
"train_acc": [
|
| 183 |
+
54.91727464101089,
|
| 184 |
+
60.04988680892759,
|
| 185 |
+
61.02839572566782,
|
| 186 |
+
61.696614622970046,
|
| 187 |
+
62.27501957199959,
|
| 188 |
+
62.741287175416375,
|
| 189 |
+
63.16837175273273,
|
| 190 |
+
63.52585832552145
|
| 191 |
+
],
|
| 192 |
+
"val_acc": [
|
| 193 |
+
63.041333333333334,
|
| 194 |
+
64.17333333333333,
|
| 195 |
+
64.75866666666667,
|
| 196 |
+
65.36133333333333,
|
| 197 |
+
65.65466666666667,
|
| 198 |
+
66.04266666666666,
|
| 199 |
+
66.282,
|
| 200 |
+
66.5
|
| 201 |
+
],
|
| 202 |
+
"scale_accs": {
|
| 203 |
+
"256": [
|
| 204 |
+
62.11666666666667,
|
| 205 |
+
63.38733333333333,
|
| 206 |
+
63.992666666666665,
|
| 207 |
+
64.614,
|
| 208 |
+
64.958,
|
| 209 |
+
65.344,
|
| 210 |
+
65.70466666666667,
|
| 211 |
+
65.83333333333333
|
| 212 |
+
],
|
| 213 |
+
"512": [
|
| 214 |
+
62.967333333333336,
|
| 215 |
+
64.19266666666667,
|
| 216 |
+
64.73066666666666,
|
| 217 |
+
65.34666666666666,
|
| 218 |
+
65.59266666666667,
|
| 219 |
+
65.97266666666667,
|
| 220 |
+
66.24,
|
| 221 |
+
66.44466666666666
|
| 222 |
+
]
|
| 223 |
+
},
|
| 224 |
+
"lr": [
|
| 225 |
+
0.00975530705321762,
|
| 226 |
+
0.00904518046337755,
|
| 227 |
+
0.00793913236883622,
|
| 228 |
+
0.00654543046337755,
|
| 229 |
+
0.005000500000000001,
|
| 230 |
+
0.0034555695366224513,
|
| 231 |
+
0.0020618676311637816,
|
| 232 |
+
0.0009558195366224509
|
| 233 |
+
]
|
| 234 |
+
}
|
| 235 |
+
},
|
| 236 |
+
"train_config": {
|
| 237 |
+
"name": "david_training",
|
| 238 |
+
"run_id": "20251012_231445",
|
| 239 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 240 |
+
"model_variant": [
|
| 241 |
+
"clip_vit_b16",
|
| 242 |
+
"clip_vit_laion_b32",
|
| 243 |
+
"clip_vit_b32"
|
| 244 |
+
],
|
| 245 |
+
"num_classes": 1000,
|
| 246 |
+
"preset": "small_fast",
|
| 247 |
+
"custom_config_path": null,
|
| 248 |
+
"num_classes_override": null,
|
| 249 |
+
"use_belly_override": null,
|
| 250 |
+
"belly_expand_override": null,
|
| 251 |
+
"progressive_training_override": true,
|
| 252 |
+
"scale_warmup_epochs_override": {
|
| 253 |
+
"256": 0,
|
| 254 |
+
"512": 0
|
| 255 |
+
},
|
| 256 |
+
"num_epochs": 10,
|
| 257 |
+
"batch_size": 1024,
|
| 258 |
+
"learning_rate": 0.01,
|
| 259 |
+
"weight_decay": 1e-05,
|
| 260 |
+
"warmup_epochs": 3,
|
| 261 |
+
"use_rose_loss": true,
|
| 262 |
+
"rose_initial_weight": 0.2,
|
| 263 |
+
"rose_max_weight": 0.6,
|
| 264 |
+
"rose_weight_schedule": "adaptive",
|
| 265 |
+
"use_cayley_loss": false,
|
| 266 |
+
"cayley_weight": 0.01,
|
| 267 |
+
"scale_loss_balance": null,
|
| 268 |
+
"use_mixed_precision": false,
|
| 269 |
+
"gradient_clip": 5.0,
|
| 270 |
+
"scheduler_type": "cosine_restarts",
|
| 271 |
+
"min_lr": 1e-06,
|
| 272 |
+
"freeze_strategy": "never",
|
| 273 |
+
"freeze_threshold": 90.0,
|
| 274 |
+
"unfreeze_on_plateau": true,
|
| 275 |
+
"patience": 10,
|
| 276 |
+
"track_gradients": true,
|
| 277 |
+
"gradient_scale_threshold": 1e-05,
|
| 278 |
+
"gradient_scale_multiplier": 10.0,
|
| 279 |
+
"log_interval": 50,
|
| 280 |
+
"val_interval": 1,
|
| 281 |
+
"save_interval": 5,
|
| 282 |
+
"log_fusion_weights": true,
|
| 283 |
+
"log_loss_components": true,
|
| 284 |
+
"save_format": "safetensors",
|
| 285 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 286 |
+
"upload_to_hub": true,
|
| 287 |
+
"base_dir": "./david_training",
|
| 288 |
+
"num_workers": 10,
|
| 289 |
+
"pin_memory": true,
|
| 290 |
+
"prefetch_factor": 4,
|
| 291 |
+
"persistent_workers": true
|
| 292 |
+
}
|
| 293 |
+
}
|