Update best_model_acc66.25_metadata.json - Run 20251012_235237
Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc66.25_metadata.json
ADDED
|
@@ -0,0 +1,291 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 7,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(30032.)",
|
| 7 |
+
"exp_avg": "tensor([[ 1.9121e-04, -6.1769e-06, -4.7019e-05, ..., 4.2886e-05,\n -1.2914e-05, 4.3987e-07],\n [ 6.1686e-05, 3.9755e-05, 1.1639e-04, ..., -8.0078e-05,\n -7.5848e-05, -4.1464e-05],\n [-1.8210e-05, 1.8427e-06, -7.7101e-05, ..., 6.0653e-06,\n -1.4543e-05, -4.7383e-08],\n ...,\n [ 2.4642e-05, -1.1125e-04, 8.5469e-05, ..., -7.9092e-05,\n -1.9059e-06, 2.7797e-05],\n [ 8.4715e-05, 3.6784e-05, -1.8912e-05, ..., 9.6558e-05,\n 1.1393e-05, 2.4064e-05],\n [-2.3824e-05, -1.1016e-05, 8.0697e-06, ..., 1.1739e-05,\n 1.0194e-05, 5.1344e-06]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[1.2660e-07, 1.2424e-07, 6.8022e-08, ..., 6.8633e-08, 3.6580e-08,\n 2.7904e-08],\n [4.7784e-08, 1.6851e-07, 9.3317e-08, ..., 5.9139e-08, 2.4234e-08,\n 2.6423e-08],\n [2.4266e-08, 4.5004e-08, 2.7181e-08, ..., 5.4700e-08, 1.3714e-08,\n 1.6082e-08],\n ...,\n [3.3421e-08, 2.8291e-07, 4.3643e-08, ..., 7.1806e-08, 1.9678e-08,\n 3.4737e-08],\n [6.3284e-08, 1.4145e-07, 5.6443e-08, ..., 5.7211e-08, 2.5548e-08,\n 3.1476e-08],\n [4.8973e-09, 2.1165e-08, 1.1199e-08, ..., 4.3199e-09, 2.1737e-09,\n 3.8120e-09]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(30032.)",
|
| 12 |
+
"exp_avg": "tensor([ 1.3976e-03, -9.0008e-04, 5.9929e-05, -2.9844e-04, 1.4660e-03,\n 1.7655e-03, -7.6492e-04, -1.2706e-03, -1.0936e-03, -2.2800e-04,\n 8.0010e-04, -1.5185e-03, -6.6338e-04, -7.1089e-04, -7.2412e-03,\n -5.4169e-04, -8.2339e-04, -1.8376e-03, -1.7509e-03, 1.4488e-03,\n -2.8592e-03, -2.1191e-03, -3.7807e-04, 6.0571e-04, -2.2881e-03,\n 1.1774e-03, 3.4166e-04, -1.5901e-03, -1.8246e-03, 5.3697e-04,\n 1.4186e-03, 2.3382e-04, -2.2616e-03, -2.5743e-04, -1.2507e-03,\n 2.2414e-03, -1.5292e-03, -6.4132e-04, -9.5039e-05, -2.3601e-03,\n -5.9544e-04, 3.2103e-04, 4.7269e-04, 7.9418e-04, -7.5132e-03,\n 1.2388e-03, 1.2659e-03, 2.1790e-03, -1.2142e-04, 1.7951e-03,\n 3.6505e-03, 1.1189e-03, 7.8414e-04, -1.3356e-04, 1.2242e-04,\n -5.9096e-04, 1.0208e-03, -4.6982e-03, -1.9632e-03, 2.1092e-03,\n 7.0808e-03, 1.5049e-03, -3.2619e-04, 9.3580e-04, 1.0592e-03,\n -2.8275e-03, 8.0354e-04, -3.0072e-04, -4.7659e-05, 3.9554e-04,\n 2.6175e-03, 3.2058e-04, -8.7856e-04, 6.1762e-04, 1.1191e-03,\n 1.0013e-03, -2.4916e-03, 1.2270e-03, -1.2992e-03, 4.4485e-04,\n -1.5631e-03, -1.3416e-03, 4.2383e-04, -1.1016e-03, -1.5187e-03,\n 9.6622e-04, -5.7913e-04, -2.5753e-04, -3.2143e-04, 2.7166e-03,\n -2.5032e-03, -8.0894e-04, -1.6299e-03, 1.2189e-03, -2.5648e-05,\n 2.8704e-03, 3.4750e-04, 3.4385e-04, -6.2592e-04, 1.2894e-03,\n -4.8128e-03, -1.5124e-03, -1.1611e-04, -1.1093e-03, -2.3157e-04,\n 3.3227e-05, -3.6942e-05, 1.0539e-04, -2.9716e-03, -3.2371e-04,\n 2.5709e-03, -2.2288e-03, 1.7253e-03, 1.3787e-03, -1.1437e-03,\n 3.9255e-03, -1.2492e-03, -8.3124e-04, -5.4130e-04, 1.7354e-04,\n 1.4267e-04, 2.8970e-04, 8.0326e-04, 1.1589e-03, -1.5564e-03,\n -1.4258e-03, -2.9281e-04, -1.4534e-03, 1.1187e-03, 1.1260e-03,\n 1.2746e-03, 5.5971e-05, 2.9051e-04, 1.5920e-03, -1.9151e-03,\n -3.5502e-03, 9.1333e-04, 2.1814e-04, -3.6097e-04, -9.8453e-04,\n -6.5844e-05, 2.5975e-04, -2.2952e-03, -2.0202e-03, -1.8729e-03,\n 3.9578e-03, -1.0732e-03, -2.1230e-03, 7.7133e-04, 4.2152e-03,\n 5.8506e-04, 2.4751e-03, -8.4501e-04, -8.6186e-04, 3.5048e-04,\n 1.5437e-03, 1.5013e-03, -2.4081e-04, 3.3467e-04, 4.0081e-04,\n -1.5990e-04, 5.7512e-04, 5.0828e-04, -1.3352e-03, -1.3539e-03,\n 4.7812e-04, 2.3161e-03, 1.5737e-03, 3.0703e-04, 1.2465e-03,\n -7.2366e-04, 2.8585e-03, 3.1951e-03, 7.9242e-04, 2.4814e-03,\n -4.8151e-04, 3.4796e-03, -1.9870e-03, -4.9703e-04, -2.5826e-03,\n -1.2649e-03, -1.1736e-03, 2.8165e-03, 4.1245e-04, 4.3330e-04,\n -4.5014e-03, 3.2134e-04, -1.4756e-03, 1.0973e-03, -3.5798e-04,\n -6.1200e-04, -1.8107e-03, -1.1183e-03, -1.6321e-03, 4.1719e-04,\n 1.9499e-05, 3.6209e-04, -3.3026e-03, -3.3821e-04, -2.0537e-04,\n 2.5878e-03, -9.4069e-04, -1.2949e-03, -7.7067e-05, -4.3715e-04,\n 8.2951e-04, 2.4541e-04, 4.5172e-05, 8.9136e-04, -1.3955e-04,\n 2.2222e-03, -2.8751e-04, 1.5979e-03, 6.3063e-04, -1.3649e-03,\n -2.5227e-03, 1.3379e-04, 4.9125e-04, 2.2992e-03, 2.1762e-03,\n -1.2178e-03, 2.7335e-04, 2.0170e-03, -4.3534e-04, 8.4426e-04,\n -5.9032e-04, -2.0934e-03, -1.0438e-03, -1.7372e-03, 5.4005e-04,\n 4.9343e-04, -9.6333e-04, -9.9057e-04, 1.8370e-03, -5.3000e-04,\n -2.1318e-03, 6.9046e-04, 1.3175e-03, -3.8210e-03, 7.0648e-04,\n -2.2301e-03, -2.8084e-04, 1.7935e-05, -7.2747e-05, 1.3667e-03,\n 1.2315e-03, 6.3152e-04, -1.5094e-03, 2.2745e-04, 1.8302e-04,\n 5.2319e-04, 7.1285e-04, 7.3754e-04, -1.2056e-03, -1.4112e-03,\n -4.7288e-04, 6.2339e-04, 2.2447e-04, -3.7326e-04, -1.9394e-03,\n 2.6604e-04, -7.5413e-05, 8.7707e-04, 3.0478e-03, -2.1081e-03,\n 1.0613e-03, 1.3480e-03, -1.1656e-03, 8.8642e-04, -5.0363e-04,\n 5.0799e-04, 5.9112e-04, -1.8133e-05, 3.0772e-03, 2.1231e-03,\n 2.2435e-04, -1.4013e-03, 1.5174e-03, -2.6130e-03, 2.4575e-03,\n 1.3322e-03, -2.2001e-03, 2.2237e-03, 2.2868e-04, -1.3658e-03,\n 1.7143e-03, 4.0969e-04, 1.0965e-03, -5.4392e-04, -1.9293e-03,\n -1.7876e-03, 1.6137e-03, -1.7676e-03, -2.7279e-03, 1.0017e-03,\n -3.1890e-04, 2.9709e-04, -1.3467e-03, 3.0219e-03, 1.6459e-03,\n -4.4857e-04, 1.0890e-04, 8.8081e-04, -1.5070e-03, 1.0191e-03,\n 2.0847e-03, -4.5553e-03, -6.9984e-04, -4.0213e-04, 1.9820e-03,\n -1.2237e-03, 3.9627e-03, 1.2970e-03, -4.1281e-04, -1.1729e-03,\n 5.1648e-04, -2.0814e-03, -2.6363e-03, 1.6087e-03, 7.3786e-04,\n 8.6695e-05, 6.3214e-04, 1.7841e-04, 3.8265e-04, 6.5360e-04,\n 2.6641e-05, -7.7373e-04, -9.5098e-04, -7.6302e-04, 3.8916e-04,\n -6.0746e-04, -3.0668e-04, -4.9488e-04, 4.6856e-04, -1.6014e-03,\n -9.5451e-04, 2.0919e-03, 2.7273e-04, -1.2095e-03, 1.1442e-03,\n 5.0831e-06, 1.6929e-03, 1.2681e-03, 2.5665e-03, -1.4867e-03,\n 5.0445e-04, 9.9089e-04, 4.3523e-04, -1.6191e-03, 1.2706e-03,\n 1.1920e-03, 3.3356e-03, -6.0983e-04, -2.1432e-03, 1.1373e-04,\n -1.5701e-03, -1.4739e-03, 7.6827e-04, 6.6323e-04, -1.4709e-03,\n 1.1611e-03, -9.4412e-04, -2.4561e-03, -2.3193e-03, -1.1191e-04,\n 8.3460e-04, 1.9064e-03, -3.5786e-03, 4.9882e-04, -5.6440e-04,\n 2.3928e-03, 6.6263e-04, -7.9212e-06, 7.6716e-04, -6.0029e-04,\n -7.2743e-04, 3.3835e-04, 1.3470e-03, -3.2128e-04, 3.7703e-04,\n -7.2465e-04, 2.8103e-03, 2.3903e-03, 1.0215e-03, 1.5374e-03,\n -3.9366e-03, 2.1571e-04, -1.0483e-04, -7.4089e-04, 2.3560e-03,\n -6.7515e-04, 3.0427e-03, -1.8250e-03, -1.8150e-03, 6.1298e-04,\n -1.0394e-03, -8.6074e-04, 9.1729e-04, 7.9261e-05, 7.5256e-04,\n -1.7578e-03, 2.3300e-04, -2.6700e-04, 2.3346e-04, -1.8498e-03,\n -1.0263e-03, -6.0467e-04, 2.7327e-05, -6.5012e-04, 3.1479e-03,\n -2.7738e-04, -1.1980e-03, 3.2354e-03, -1.8736e-03, -6.5609e-04,\n -2.3134e-04, 8.7252e-04, 1.3047e-04, -2.3970e-03, -1.4681e-03,\n -2.0033e-04, -1.9923e-04, 1.3538e-03, -1.6718e-03, -3.2249e-03,\n 6.6711e-05, 1.5097e-03, 4.6077e-04, -1.1135e-03, -1.7839e-03,\n 2.9124e-03, -6.4932e-05, -1.0602e-03, 9.1930e-04, 4.6286e-04,\n -1.8161e-03, -1.4641e-03, 1.9139e-03, 5.3651e-04, 1.9982e-03,\n 6.2716e-04, 1.2294e-03, 3.0628e-03, 1.4195e-03, -1.3966e-03,\n 9.9413e-04, 1.5310e-03, 2.1274e-03, -1.2057e-03, 1.1302e-03,\n 6.8858e-05, -1.0079e-03, -2.9143e-03, 1.3701e-03, -3.4676e-04,\n -5.9775e-04, -2.1635e-03, -2.3908e-03, -2.2556e-04, 3.7219e-04,\n -2.2890e-04, -8.3498e-04, 9.5836e-04, -5.1227e-04, 2.6494e-03,\n -1.3769e-03, -1.0025e-03, 5.4721e-05, -1.5924e-03, -3.8856e-05,\n 1.4700e-03, 1.3215e-04, 1.6584e-03, 2.6918e-04, 6.4052e-04,\n 1.4897e-03, 2.3048e-04, -9.4935e-04, 5.3625e-04, -6.9964e-04,\n -1.3463e-03, -7.3773e-04, 5.6004e-04, 1.4637e-03, 8.4027e-04,\n -2.0804e-03, -1.0328e-03, 5.6807e-04, 7.7861e-05, -2.0803e-03,\n -2.4974e-05, 2.6445e-04, 1.0653e-03, -1.6285e-03, 1.0312e-03,\n 8.1555e-04, 1.4499e-03, 1.3016e-03, 5.1379e-04, 1.9153e-03,\n -6.4469e-04, 9.4310e-04, -6.2303e-04, -1.7856e-03, -4.2027e-05,\n 2.3730e-03, 1.3717e-03, 1.1357e-03, 1.6657e-03, 9.1863e-04,\n 2.9841e-03, -2.1655e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([4.1525e-05, 4.0125e-05, 2.7505e-05, 3.0923e-05, 2.9596e-05, 5.8295e-05,\n 3.0021e-05, 5.0972e-05, 4.1160e-05, 4.7291e-05, 5.5052e-05, 6.3129e-05,\n 8.8584e-05, 2.1343e-05, 7.2833e-05, 4.3974e-05, 4.1696e-05, 3.7648e-05,\n 4.8280e-05, 5.6558e-05, 3.9193e-05, 2.7101e-05, 3.1055e-05, 6.8750e-05,\n 7.2141e-05, 3.2970e-05, 2.7233e-05, 6.4352e-05, 5.5192e-05, 4.3014e-05,\n 1.7239e-05, 3.5278e-05, 4.7538e-05, 3.5439e-05, 2.2277e-05, 5.1326e-05,\n 3.4277e-05, 2.5720e-05, 2.5706e-05, 2.4325e-05, 3.6244e-05, 2.6486e-05,\n 5.9277e-06, 6.5738e-05, 1.0109e-04, 3.0675e-05, 2.3733e-05, 2.4523e-05,\n 4.5055e-05, 7.1697e-05, 7.7240e-05, 2.4412e-05, 2.7321e-05, 3.6106e-05,\n 5.0944e-05, 3.6446e-05, 3.1605e-05, 9.8687e-05, 3.3376e-05, 3.3895e-05,\n 1.2896e-04, 2.3415e-05, 2.8069e-05, 2.8760e-05, 3.3352e-05, 2.5516e-05,\n 3.3280e-05, 3.0623e-05, 5.1272e-05, 3.5484e-05, 3.7451e-05, 3.0431e-05,\n 2.9417e-05, 3.1558e-05, 2.6493e-05, 3.2647e-05, 4.4837e-05, 4.0192e-05,\n 4.8240e-05, 3.5206e-05, 5.4303e-05, 3.5680e-05, 3.0441e-05, 1.8376e-05,\n 3.4962e-05, 3.9686e-05, 4.4103e-05, 2.9943e-05, 2.6331e-05, 4.3671e-05,\n 2.7920e-05, 6.0273e-05, 3.4936e-05, 3.7027e-05, 3.5415e-05, 4.6264e-05,\n 5.5532e-05, 3.3227e-05, 3.2985e-05, 1.8688e-05, 4.1062e-05, 6.7112e-05,\n 5.7382e-05, 3.2817e-05, 6.6844e-05, 4.9165e-05, 3.8341e-05, 8.7700e-05,\n 3.0207e-05, 4.2484e-05, 5.8517e-05, 5.0149e-05, 3.6327e-05, 5.0545e-05,\n 3.8244e-05, 4.3898e-05, 5.1128e-05, 4.8691e-05, 2.9846e-05, 3.9659e-05,\n 1.5921e-05, 4.8105e-05, 4.4619e-05, 6.1389e-05, 3.0443e-05, 4.0898e-05,\n 3.1706e-05, 4.2871e-05, 4.2036e-05, 8.6877e-05, 3.9896e-05, 3.5843e-05,\n 4.6064e-05, 3.7925e-05, 8.4510e-05, 3.4056e-05, 2.5781e-05, 3.6712e-05,\n 2.8228e-05, 4.1690e-05, 1.5587e-05, 2.8159e-05, 3.9802e-05, 8.7649e-05,\n 4.6773e-05, 4.1742e-05, 2.6394e-05, 4.9207e-05, 4.3661e-05, 7.5248e-05,\n 1.9324e-05, 4.3267e-05, 2.4248e-05, 6.4425e-05, 2.5768e-05, 3.3982e-05,\n 4.7652e-05, 3.3728e-05, 3.2425e-05, 2.6223e-05, 4.7082e-05, 1.3601e-05,\n 3.6257e-05, 6.6604e-05, 1.7458e-05, 5.3923e-05, 3.6659e-05, 1.8983e-05,\n 1.6390e-05, 4.0909e-05, 2.8268e-05, 4.8672e-05, 3.8804e-05, 3.1771e-05,\n 3.4853e-05, 2.7787e-05, 7.5120e-05, 2.3272e-05, 2.0770e-05, 3.6571e-05,\n 3.0726e-05, 2.7203e-05, 6.9057e-05, 3.6250e-05, 3.2287e-05, 4.1953e-05,\n 9.2197e-05, 2.1591e-05, 1.9461e-05, 1.4882e-05, 3.1981e-05, 4.9477e-05,\n 5.7241e-05, 5.0447e-05, 2.2299e-05, 1.6791e-05, 4.2243e-05, 2.6466e-05,\n 1.7821e-05, 1.9606e-05, 2.1680e-05, 4.2634e-05, 2.7543e-05, 1.9699e-05,\n 3.7169e-05, 3.7405e-05, 3.0925e-05, 2.5709e-05, 5.0734e-05, 1.9455e-05,\n 3.9061e-05, 5.7461e-05, 3.6053e-05, 2.7942e-05, 5.3141e-05, 1.3620e-05,\n 2.4424e-05, 2.2692e-05, 4.9145e-05, 5.3041e-05, 2.1402e-05, 2.0430e-05,\n 3.0909e-05, 1.4856e-05, 2.4463e-05, 5.2776e-05, 2.8091e-05, 1.9065e-05,\n 2.1448e-05, 4.7405e-05, 3.8415e-05, 2.1349e-05, 4.6212e-05, 3.3234e-05,\n 3.0555e-05, 3.2803e-05, 3.2969e-05, 6.1286e-06, 4.2240e-05, 1.0637e-04,\n 2.8222e-05, 6.8639e-05, 3.1325e-05, 5.2312e-05, 7.2549e-05, 5.3605e-05,\n 3.9175e-05, 2.7625e-05, 4.8172e-05, 3.0883e-05, 5.1548e-05, 2.6323e-05,\n 4.4315e-05, 4.4720e-05, 3.5241e-05, 3.7074e-05, 2.6218e-05, 4.6043e-05,\n 5.3910e-05, 2.9143e-05, 2.7884e-05, 6.3652e-05, 2.5364e-05, 5.6098e-05,\n 4.2507e-05, 2.5840e-05, 2.3562e-05, 3.6024e-05, 2.4913e-05, 3.5603e-05,\n 3.9479e-05, 4.4752e-05, 3.1995e-05, 3.5559e-05, 2.4347e-05, 5.3539e-05,\n 2.1610e-05, 6.0620e-05, 3.6007e-05, 2.2126e-05, 3.0434e-05, 2.4624e-05,\n 3.3556e-05, 4.5945e-05, 2.0914e-05, 5.1581e-05, 3.3315e-05, 3.5229e-05,\n 6.2149e-05, 4.6065e-05, 2.4826e-05, 3.3315e-05, 5.0098e-05, 2.9532e-05,\n 4.9994e-05, 2.1949e-05, 2.8973e-05, 5.3573e-05, 9.2737e-05, 3.6030e-05,\n 2.5440e-05, 3.1265e-05, 3.4656e-05, 3.2512e-05, 5.5769e-05, 2.0440e-05,\n 3.8815e-05, 3.2841e-05, 7.3178e-05, 3.8954e-05, 4.6750e-05, 5.2722e-05,\n 2.3227e-05, 2.6295e-05, 4.9158e-05, 2.8007e-05, 3.9335e-05, 1.6216e-05,\n 3.4313e-05, 4.4644e-05, 8.9512e-05, 3.3085e-05, 4.1181e-05, 4.0901e-05,\n 5.8093e-05, 8.5644e-05, 2.7841e-05, 4.8390e-05, 3.1554e-05, 3.1188e-05,\n 3.4562e-05, 4.7890e-05, 1.6341e-05, 2.3150e-05, 2.8062e-05, 3.3418e-05,\n 5.2005e-05, 2.5817e-05, 5.5619e-05, 4.7059e-05, 3.1219e-05, 3.0538e-05,\n 4.1633e-05, 2.4119e-05, 5.1206e-05, 5.2451e-05, 4.1754e-05, 2.1882e-05,\n 4.1051e-05, 3.9788e-05, 2.9266e-05, 3.7973e-05, 3.1894e-05, 3.1712e-05,\n 3.4300e-05, 2.6290e-05, 2.7138e-05, 3.0196e-05, 2.6585e-05, 2.4691e-05,\n 2.2561e-05, 8.5922e-05, 1.1331e-04, 3.0540e-05, 3.5876e-05, 2.8566e-05,\n 3.5948e-05, 4.7336e-05, 2.4392e-05, 3.0965e-05, 2.2290e-05, 3.6504e-05,\n 1.9839e-05, 3.6012e-05, 6.2009e-05, 3.5697e-05, 1.9320e-05, 1.4233e-05,\n 2.9181e-05, 3.3937e-05, 3.2227e-05, 4.3488e-05, 4.7889e-05, 3.1750e-05,\n 3.5512e-05, 9.7204e-05, 2.9663e-05, 5.5106e-05, 3.5848e-05, 3.4750e-05,\n 4.5417e-05, 4.9605e-05, 3.0882e-05, 3.0510e-05, 3.8687e-05, 2.6961e-05,\n 2.0184e-05, 5.4840e-05, 4.5141e-05, 7.3215e-05, 3.9184e-05, 2.9728e-05,\n 5.3000e-05, 3.5155e-05, 4.2297e-05, 3.1613e-05, 6.9574e-05, 2.0589e-05,\n 3.1215e-05, 4.6902e-05, 4.8517e-05, 4.9781e-05, 4.6083e-05, 4.9186e-05,\n 2.6407e-05, 2.4489e-05, 5.6075e-05, 2.5353e-05, 5.0363e-05, 7.3636e-05,\n 4.0709e-05, 3.3338e-05, 4.9702e-05, 4.3197e-05, 3.6255e-05, 3.8494e-05,\n 6.9511e-05, 3.5807e-05, 2.5711e-05, 3.6541e-05, 2.9661e-05, 1.7592e-05,\n 5.1888e-05, 4.0186e-05, 3.1938e-05, 2.8327e-05, 5.7229e-05, 3.9158e-05,\n 3.6435e-05, 3.8272e-05, 3.9444e-05, 6.6928e-05, 3.7363e-05, 1.1334e-04,\n 4.4379e-05, 3.9459e-05, 2.4768e-05, 3.4311e-05, 3.2211e-05, 3.5073e-05,\n 4.0413e-05, 3.6907e-05, 4.0808e-05, 6.4190e-05, 2.9120e-05, 1.8956e-05,\n 7.7054e-05, 3.8927e-05, 2.0430e-05, 4.5333e-05, 4.3300e-05, 2.9155e-05,\n 3.6351e-05, 2.9084e-05, 6.7985e-05, 2.8826e-05, 3.3668e-05, 3.3390e-05,\n 6.0817e-05, 4.3288e-05, 3.7642e-05, 6.1696e-05, 5.5088e-05, 3.6471e-05,\n 3.8560e-05, 4.6324e-05, 4.4420e-05, 3.6921e-05, 3.6393e-05, 2.5160e-05,\n 1.8762e-05, 3.2931e-05, 5.0166e-05, 3.7013e-05, 3.0553e-05, 3.7847e-05,\n 5.0403e-05, 3.7737e-05, 7.2785e-05, 3.4116e-05, 3.3279e-05, 3.6605e-05,\n 4.7804e-05, 2.6482e-05, 2.1068e-05, 3.3516e-05, 4.4738e-05, 2.1748e-05,\n 2.3457e-05, 2.7977e-05, 4.2651e-05, 2.7365e-05, 3.9255e-05, 2.9701e-05,\n 3.4302e-05, 1.9266e-05, 3.6508e-05, 3.9675e-05, 3.3801e-05, 3.6210e-05,\n 3.9493e-05, 3.2643e-06], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(30032.)",
|
| 17 |
+
"exp_avg": "tensor([ 5.1551e-03, -2.9218e-03, -2.1077e-05, -1.0952e-03, 2.4132e-03,\n 3.5127e-03, -2.0708e-03, -3.4523e-03, -2.4667e-03, -5.0501e-04,\n 1.7043e-03, -4.5479e-03, -1.7720e-03, -1.4213e-03, -1.2041e-02,\n -3.5846e-04, -3.0472e-03, -5.1372e-03, -5.0839e-03, 3.6527e-03,\n -8.2647e-03, -4.4952e-03, -9.9628e-04, 1.1351e-03, -5.3637e-03,\n 3.0129e-03, 6.7170e-04, -3.5746e-03, -3.1902e-03, 6.4597e-04,\n 5.0983e-03, 4.6840e-04, -4.3739e-03, -3.4182e-04, -4.0925e-03,\n 5.6827e-03, -4.7192e-03, -2.0831e-03, -1.8947e-03, -4.3000e-03,\n -1.2397e-03, -1.3056e-04, -5.6052e-45, 1.3491e-03, -1.4681e-02,\n 2.9312e-03, 3.3286e-03, 3.8380e-03, -3.6711e-04, 3.5067e-03,\n 8.1223e-03, 3.4550e-03, 8.5509e-04, -3.2333e-04, -2.7449e-04,\n -4.9308e-04, 2.2873e-03, -6.6043e-03, -3.2181e-03, 3.6653e-03,\n 1.0319e-02, 4.2230e-03, -1.6486e-03, 2.2974e-03, 7.5731e-04,\n -7.0272e-03, 5.4133e-04, -1.0615e-03, -1.1956e-04, 2.3642e-04,\n 7.9030e-03, 1.1709e-03, -3.8319e-03, 2.2302e-03, 2.5656e-03,\n 4.8126e-03, -6.8412e-03, 2.8818e-03, -4.0633e-03, 1.4434e-03,\n -4.7846e-03, -3.4458e-03, 9.2282e-04, -2.5106e-03, -3.1823e-03,\n 2.7735e-03, -2.7859e-03, -5.2892e-04, -8.0112e-04, 8.3107e-03,\n -7.0120e-03, -1.8156e-03, -4.9905e-03, 3.8657e-03, -2.4251e-04,\n 5.2464e-03, 1.1178e-03, 5.1506e-04, -1.9679e-03, 3.5118e-03,\n -1.1380e-02, -2.0386e-03, 3.1470e-04, -2.1954e-03, 8.4313e-04,\n 4.1311e-04, -2.5865e-04, -1.6050e-03, -7.0686e-03, -8.7002e-04,\n 4.0168e-03, -6.7552e-03, 4.8085e-03, 1.2758e-03, -1.3343e-03,\n 7.1272e-03, -3.3743e-03, -1.4242e-03, -1.0479e-03, -1.4052e-03,\n 6.7696e-04, -2.9398e-04, 2.8993e-03, 1.4697e-03, -6.4227e-03,\n -5.3848e-03, -4.3868e-04, -4.6438e-03, 1.6953e-03, 3.6931e-03,\n 2.5184e-03, 7.7379e-04, 9.0035e-04, 4.6880e-03, -5.0317e-03,\n -4.7538e-03, 2.3809e-03, 2.2817e-05, -2.2244e-03, -4.0700e-04,\n -7.3060e-04, 1.2474e-03, -6.4598e-03, -3.9127e-03, -4.3105e-03,\n 8.5119e-03, -3.6508e-03, -3.4972e-03, 1.6720e-03, 9.8726e-03,\n 1.3726e-03, 5.8958e-03, -2.7296e-03, -9.2579e-04, 9.3507e-04,\n 2.3732e-03, 2.7147e-03, -2.2204e-04, 4.0856e-04, 1.3120e-03,\n 8.9673e-04, 2.5500e-03, 2.1091e-03, -3.3939e-03, -3.1655e-03,\n 1.7964e-03, 6.1728e-03, 4.0047e-03, 4.8197e-04, 2.9802e-03,\n -1.0468e-03, 6.3960e-03, 8.8999e-03, 1.0147e-03, 6.3925e-03,\n -2.1523e-03, 5.6609e-03, -3.5395e-03, -1.1019e-03, -6.7776e-03,\n -2.8455e-03, -3.1568e-03, 5.9994e-03, 5.9866e-04, 1.2821e-03,\n -8.5681e-03, 1.7032e-03, -5.2726e-03, 4.1810e-03, -8.2897e-04,\n -1.2438e-03, -4.3252e-03, -2.2254e-03, -3.1620e-03, 1.0181e-03,\n -3.3042e-04, 7.5418e-04, -8.4107e-03, -4.9832e-04, 1.2611e-04,\n 5.3633e-03, -2.3920e-03, -4.0485e-03, -1.1480e-03, -2.1580e-04,\n 2.3706e-03, 5.7318e-04, 2.6658e-04, 1.7063e-03, 9.8014e-05,\n 5.2969e-03, -1.0865e-03, 2.1749e-03, 2.3414e-03, -3.9363e-03,\n -9.9703e-03, -5.1277e-04, 1.5952e-03, 4.6329e-03, 6.2869e-03,\n -3.5264e-03, 2.3173e-03, 5.5449e-03, -2.6831e-03, 3.1072e-03,\n -1.9337e-03, -4.3685e-03, -2.3087e-03, -2.7491e-03, 3.5556e-03,\n 6.6144e-04, -8.4785e-04, -4.1629e-03, 5.0582e-03, -1.0301e-03,\n -4.5527e-03, 1.2417e-03, 5.6052e-45, -6.2756e-03, 3.5761e-04,\n -4.5570e-03, -1.7316e-03, -7.8804e-04, 4.7733e-04, 2.2956e-03,\n 3.0217e-03, 3.1777e-03, -2.8706e-03, -4.5222e-04, 9.4599e-04,\n 2.0339e-03, 1.0284e-03, 1.9130e-03, -1.9721e-03, -2.7754e-03,\n -1.5244e-03, 1.8619e-03, 6.8111e-04, -2.7197e-04, -5.2653e-03,\n 5.2948e-06, -1.2831e-03, 1.8697e-03, 5.6684e-03, -5.0459e-03,\n 3.8885e-03, 4.0740e-03, -2.3548e-03, 1.5009e-03, 1.0866e-04,\n 3.1075e-03, 2.8312e-03, -2.2927e-04, 7.0082e-03, 6.2846e-03,\n 4.9137e-04, -4.3774e-03, 2.1463e-03, -4.6428e-03, 7.9594e-03,\n 3.1819e-03, -3.6839e-03, 4.4187e-03, 6.9148e-04, -4.1544e-03,\n 4.1842e-03, 2.2930e-03, 1.6532e-03, -2.8146e-03, -3.9081e-03,\n -4.4307e-03, 2.6536e-03, -3.4152e-03, -4.6826e-03, 1.5647e-03,\n -1.4396e-03, 4.2295e-04, -2.2224e-03, 8.0803e-03, 4.1832e-03,\n -9.4164e-04, -5.6427e-04, 1.9523e-03, -4.1956e-03, 1.5242e-03,\n 5.8719e-03, -7.7876e-03, -6.4869e-04, -2.2171e-03, 6.7980e-03,\n -5.2435e-03, 9.2890e-03, 4.4465e-03, -2.0765e-03, -3.1597e-03,\n 8.5648e-04, -5.4718e-03, -6.3868e-03, 3.5961e-03, 1.9124e-03,\n -1.6003e-03, 2.2972e-03, 2.2117e-04, 4.6629e-04, 1.4582e-03,\n -1.1594e-03, -3.8606e-03, -1.8886e-03, -6.8422e-04, 6.5760e-04,\n -2.1629e-03, 1.1016e-03, -1.3155e-03, 1.5862e-03, -2.5420e-03,\n -3.2354e-03, 1.3513e-03, 1.7350e-04, -2.7453e-03, 3.4928e-03,\n -1.1837e-04, 3.3217e-03, 3.0234e-03, 8.8523e-03, -3.5442e-03,\n 2.6784e-03, 2.8209e-03, 7.4207e-04, -2.5451e-03, 2.5846e-03,\n 5.3954e-03, 7.6260e-03, -1.1921e-03, -5.0637e-03, 4.2208e-04,\n -3.3567e-03, -3.2344e-03, 1.4500e-03, 1.8792e-03, -3.9549e-03,\n 2.2970e-03, -2.1439e-03, -4.8246e-03, -7.1369e-03, -2.5972e-04,\n 1.7433e-03, 3.9085e-03, -9.1773e-03, 7.8856e-04, -9.7194e-04,\n 5.1246e-03, 2.1452e-03, 2.0144e-03, 1.4859e-03, -1.5491e-03,\n -2.0419e-03, 1.1363e-03, 3.4261e-03, -2.1603e-03, -1.9643e-04,\n -1.2891e-03, 5.7897e-03, 7.2323e-03, 3.6648e-03, 3.8894e-03,\n -8.6754e-03, -6.5034e-05, 4.6069e-04, -1.3805e-03, 3.1547e-03,\n -1.0946e-03, 4.9913e-03, -5.2455e-03, -8.2861e-03, 1.7777e-03,\n -1.5739e-03, -3.1845e-03, 3.7778e-03, -5.1382e-04, 3.1097e-03,\n -4.1989e-03, 9.0270e-04, 2.8792e-04, -3.4553e-04, -2.0521e-03,\n -2.3518e-03, -1.0566e-03, -3.8891e-04, -2.0803e-03, 6.1044e-03,\n -2.9662e-03, -1.1129e-03, 8.9447e-03, -3.5674e-03, -5.8848e-04,\n -2.3000e-03, 2.9655e-03, -6.0513e-04, -5.0703e-03, -1.6281e-03,\n 3.2793e-04, -1.7524e-03, 4.1242e-03, -4.5335e-03, -1.1956e-02,\n 4.9370e-04, 3.7301e-03, 3.0208e-04, -4.8824e-03, -4.6313e-03,\n 6.9105e-03, -3.6112e-04, -1.1618e-03, 2.5618e-03, 1.9263e-03,\n -5.4399e-03, -4.3253e-03, 3.9238e-03, 8.2192e-04, 3.5928e-03,\n 1.9734e-03, 3.5728e-03, 7.0657e-03, 3.7480e-03, -3.0321e-03,\n 2.3373e-03, 6.3569e-03, 4.2543e-03, -3.9096e-03, 2.8445e-03,\n -8.8602e-04, -1.2467e-03, -5.8942e-03, 5.0935e-03, -1.6568e-03,\n -1.3625e-03, -2.5430e-03, -5.4916e-03, -6.5379e-04, 1.2625e-03,\n -1.5874e-03, -2.3788e-03, 3.5868e-04, -1.4623e-03, 4.0068e-03,\n -3.7762e-03, -9.9324e-04, 4.3548e-04, -4.1802e-03, -1.0326e-03,\n 3.3396e-03, -2.0740e-03, 3.6209e-03, 2.3718e-05, 1.2336e-03,\n 2.2308e-03, 3.8307e-04, -2.0840e-03, 1.4359e-03, -1.1226e-03,\n -2.6780e-03, -3.6906e-03, 1.0293e-03, 2.5251e-03, 2.1090e-03,\n -3.5158e-03, -2.5184e-03, 8.2109e-04, -1.1507e-04, -3.3841e-03,\n -1.0113e-04, 1.3117e-03, 1.2708e-03, -4.4564e-03, 3.7156e-03,\n 1.1934e-03, 4.0693e-03, 3.4811e-03, 5.1033e-04, 4.5790e-03,\n -1.2290e-03, 1.7773e-03, -1.7603e-03, -5.0485e-03, -1.5351e-04,\n 8.3181e-03, 2.4821e-03, 2.8880e-03, 2.5159e-03, 2.0859e-03,\n 5.2034e-03, -5.6052e-45], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([3.6659e-04, 2.6302e-04, 1.0746e-04, 2.1092e-04, 1.2703e-04, 3.1144e-04,\n 2.0002e-04, 2.8439e-04, 2.5987e-04, 3.7728e-04, 3.7676e-04, 3.9766e-04,\n 3.5513e-04, 1.8874e-04, 2.3995e-04, 2.0612e-04, 2.2059e-04, 2.0682e-04,\n 2.3353e-04, 2.9704e-04, 2.8953e-04, 9.9988e-05, 1.3333e-04, 3.5939e-04,\n 4.7778e-04, 2.3674e-04, 1.7634e-04, 3.8688e-04, 2.3231e-04, 2.8633e-04,\n 1.7615e-04, 2.7397e-04, 1.9679e-04, 1.6784e-04, 1.4147e-04, 1.5543e-04,\n 1.4730e-04, 1.8475e-04, 1.4913e-04, 1.5228e-04, 1.3373e-04, 1.0386e-04,\n 6.7521e-17, 2.2611e-04, 4.7512e-04, 1.5645e-04, 1.7007e-04, 6.8834e-05,\n 4.7330e-04, 3.1073e-04, 3.5768e-04, 1.3109e-04, 1.3357e-04, 2.8014e-04,\n 1.9491e-04, 2.0330e-04, 2.4170e-04, 1.3738e-04, 1.6651e-04, 1.0670e-04,\n 2.6431e-04, 1.8910e-04, 2.1139e-04, 1.3476e-04, 1.2407e-04, 1.3412e-04,\n 2.5464e-04, 1.4749e-04, 2.5307e-04, 1.3455e-04, 2.5933e-04, 9.9353e-05,\n 2.4278e-04, 2.7997e-04, 1.5087e-04, 2.1420e-04, 2.2264e-04, 1.9916e-04,\n 2.3562e-04, 2.6710e-04, 2.5120e-04, 1.5278e-04, 1.6054e-04, 8.5469e-05,\n 1.4673e-04, 3.3026e-04, 4.3944e-04, 1.9265e-04, 9.8188e-05, 5.8896e-04,\n 2.3875e-04, 1.7638e-04, 2.5270e-04, 2.9110e-04, 2.7899e-04, 2.7343e-04,\n 2.4270e-04, 1.9968e-04, 2.7696e-04, 3.4388e-04, 2.5160e-04, 2.8322e-04,\n 2.5040e-04, 2.1757e-04, 3.1234e-04, 4.0210e-04, 1.6661e-04, 4.9380e-04,\n 2.1174e-04, 1.8826e-04, 2.0401e-04, 3.7110e-04, 2.5612e-04, 1.7520e-04,\n 2.1257e-04, 1.2475e-04, 2.0727e-04, 2.7888e-04, 1.6277e-04, 3.2023e-04,\n 1.6759e-04, 3.0154e-04, 4.4551e-04, 4.8904e-04, 3.3588e-04, 3.9563e-04,\n 2.2642e-04, 3.2670e-04, 4.0452e-04, 5.4698e-04, 2.3872e-04, 2.4762e-04,\n 3.3462e-04, 2.3127e-04, 4.2054e-04, 1.3175e-04, 1.9318e-04, 3.1167e-04,\n 1.7985e-04, 1.1071e-04, 1.6778e-04, 2.0763e-04, 2.5228e-04, 2.6107e-04,\n 2.1407e-04, 1.9117e-04, 1.1296e-04, 3.5467e-04, 2.3553e-04, 3.4983e-04,\n 1.0202e-04, 2.7771e-04, 1.3908e-04, 3.0735e-04, 1.0189e-04, 1.0347e-04,\n 4.3526e-04, 2.7790e-04, 2.5114e-04, 1.5041e-04, 3.3265e-04, 1.4908e-04,\n 3.9470e-04, 2.0641e-04, 1.1055e-04, 3.9381e-04, 2.6428e-04, 1.0713e-04,\n 8.4281e-05, 2.0720e-04, 9.8325e-05, 2.7158e-04, 2.3124e-04, 4.2076e-04,\n 2.5511e-04, 1.6217e-04, 2.5670e-04, 1.1436e-04, 1.4996e-04, 2.2734e-04,\n 1.9609e-04, 2.0095e-04, 6.3005e-04, 2.4067e-04, 6.0809e-04, 1.6396e-04,\n 6.5085e-04, 1.3964e-04, 1.7642e-04, 9.2386e-05, 1.6796e-04, 2.8650e-04,\n 1.5454e-04, 1.0841e-04, 1.3310e-04, 9.0219e-05, 2.3237e-04, 1.3960e-04,\n 8.1292e-05, 1.0201e-04, 1.1774e-04, 2.1376e-04, 1.4984e-04, 1.0829e-04,\n 6.8918e-05, 2.0227e-04, 1.5618e-04, 1.3333e-04, 3.4165e-04, 9.9440e-05,\n 2.8152e-04, 2.6366e-04, 3.4185e-05, 1.3949e-04, 2.8959e-04, 1.9096e-04,\n 9.5920e-05, 4.7265e-04, 2.5204e-04, 4.0569e-04, 1.5659e-04, 1.7951e-04,\n 2.0213e-04, 1.5284e-04, 2.1082e-04, 3.1526e-04, 1.1704e-04, 1.1417e-04,\n 7.9274e-05, 1.7965e-04, 1.8207e-04, 1.3220e-04, 6.3227e-04, 2.8183e-04,\n 1.0712e-04, 1.6218e-04, 1.7964e-04, 9.5775e-18, 1.6082e-04, 4.2670e-04,\n 1.2249e-04, 3.2216e-04, 4.4371e-04, 3.2026e-04, 1.8798e-04, 4.1424e-04,\n 1.3566e-04, 1.1032e-04, 2.8548e-04, 1.6124e-04, 2.9936e-04, 9.7582e-05,\n 4.8575e-04, 2.7198e-04, 1.3514e-04, 2.5122e-04, 1.8696e-04, 1.7943e-04,\n 1.2054e-04, 1.8935e-04, 2.3302e-04, 2.7137e-04, 1.7229e-04, 1.8999e-04,\n 2.1120e-04, 2.1836e-04, 2.2038e-04, 2.2536e-04, 2.1305e-04, 3.8708e-04,\n 3.5787e-04, 3.4331e-04, 6.8665e-05, 2.4414e-04, 1.2843e-04, 1.6776e-04,\n 1.9225e-04, 2.6023e-04, 2.5072e-04, 2.1805e-04, 2.0912e-04, 8.4664e-05,\n 1.5148e-04, 1.5387e-04, 1.6074e-04, 2.3806e-04, 2.9224e-04, 1.3373e-04,\n 5.5440e-04, 2.0165e-04, 1.2851e-04, 8.7070e-05, 1.6673e-04, 1.1217e-04,\n 2.3817e-04, 1.3802e-04, 2.1371e-04, 1.6528e-04, 4.6351e-04, 1.9558e-04,\n 1.2359e-04, 1.4325e-04, 3.4711e-04, 1.9097e-04, 2.1656e-04, 1.8127e-04,\n 1.8231e-04, 1.6580e-04, 7.5518e-04, 4.7172e-04, 4.4249e-04, 2.1959e-04,\n 1.6175e-04, 2.0523e-04, 1.8522e-04, 3.0394e-04, 1.7921e-04, 1.0350e-04,\n 2.1098e-04, 2.5781e-04, 6.7277e-04, 4.1820e-04, 2.2003e-04, 2.9845e-04,\n 3.9249e-04, 4.0507e-04, 2.2338e-04, 3.6455e-04, 1.4894e-04, 2.8447e-04,\n 4.1757e-04, 3.5878e-04, 7.6296e-05, 1.4404e-04, 1.6890e-04, 1.8029e-04,\n 8.8632e-05, 1.4923e-04, 2.5266e-04, 2.6457e-04, 1.7585e-04, 1.3742e-04,\n 2.1261e-04, 3.4933e-04, 1.8937e-04, 2.6132e-04, 2.1568e-04, 1.0414e-04,\n 1.7046e-04, 2.4955e-04, 2.4338e-04, 2.1278e-04, 2.3663e-04, 2.5128e-04,\n 2.7090e-04, 1.6986e-04, 2.3229e-04, 1.4046e-04, 1.2638e-04, 1.4036e-04,\n 1.1844e-04, 5.0942e-04, 4.3069e-04, 3.5470e-04, 1.9145e-04, 1.9599e-04,\n 7.6122e-05, 3.5244e-04, 1.1106e-04, 1.8489e-04, 1.2538e-04, 2.3306e-04,\n 3.0683e-04, 2.2310e-04, 1.6196e-04, 1.9915e-04, 1.2111e-04, 1.2686e-04,\n 2.7950e-04, 1.0587e-04, 2.2118e-04, 1.9335e-04, 3.0423e-04, 2.5868e-04,\n 2.2930e-04, 3.9338e-04, 1.6440e-04, 1.9065e-04, 2.4469e-04, 1.4033e-04,\n 1.5356e-04, 1.4479e-04, 1.8533e-04, 4.6682e-04, 1.5653e-04, 1.1397e-04,\n 1.5806e-04, 4.7508e-04, 2.2787e-04, 3.3641e-04, 1.8515e-04, 1.1548e-04,\n 4.9232e-04, 1.9853e-04, 2.1638e-04, 1.3505e-04, 5.2746e-04, 5.4550e-05,\n 3.1732e-04, 2.8079e-04, 3.0592e-04, 6.0697e-05, 3.2721e-04, 2.1343e-04,\n 1.9378e-04, 5.4077e-04, 4.4714e-04, 3.3692e-04, 2.5407e-04, 1.8262e-04,\n 2.2425e-04, 1.9275e-04, 4.3355e-04, 2.7276e-04, 5.9592e-04, 2.2796e-04,\n 2.1910e-04, 1.9987e-04, 2.7695e-04, 1.8005e-04, 1.8629e-04, 1.1099e-04,\n 1.6522e-04, 2.2540e-04, 3.3533e-04, 2.1915e-04, 2.7693e-04, 4.0817e-04,\n 2.5053e-04, 1.2375e-04, 2.8671e-04, 2.3756e-04, 2.0466e-04, 7.2571e-04,\n 1.7884e-04, 3.1321e-04, 2.9845e-04, 1.2737e-04, 2.1905e-04, 2.7874e-04,\n 2.9924e-04, 1.4687e-04, 1.7572e-04, 5.4336e-04, 2.6775e-04, 1.1056e-04,\n 3.2181e-04, 1.7934e-04, 1.1022e-04, 1.7776e-04, 2.2590e-04, 1.4572e-04,\n 2.5751e-04, 1.4791e-04, 1.8549e-04, 1.4457e-04, 1.3249e-04, 2.1822e-04,\n 4.2838e-04, 2.8283e-04, 2.5130e-04, 3.9938e-04, 2.4941e-04, 1.5813e-04,\n 2.2419e-04, 2.5003e-04, 2.4689e-04, 2.2415e-04, 3.9666e-04, 1.3569e-04,\n 8.0416e-05, 5.0071e-04, 2.5255e-04, 1.6048e-04, 1.9194e-04, 1.4177e-04,\n 1.2415e-04, 2.2102e-04, 4.1081e-04, 1.0030e-04, 1.6593e-04, 1.2035e-04,\n 2.7604e-04, 1.6041e-04, 1.7756e-04, 2.7133e-04, 2.2015e-04, 1.8741e-04,\n 1.2373e-04, 1.8070e-04, 1.9114e-04, 1.4989e-04, 1.7614e-04, 2.1297e-04,\n 1.3175e-04, 2.2333e-04, 1.2615e-04, 1.6894e-04, 8.3008e-05, 1.0507e-04,\n 1.0657e-04, 7.3827e-14], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(30032.)",
|
| 22 |
+
"exp_avg": "tensor([ 2.1367e-03, -7.0935e-04, 1.8648e-04, -4.1322e-04, 1.4005e-03,\n 1.9311e-03, -1.0177e-03, -1.6054e-03, -1.2730e-03, -6.1607e-05,\n 8.8806e-04, -1.8000e-03, -7.4086e-04, -5.4276e-04, -7.0110e-03,\n -6.3259e-04, -9.9368e-04, -2.1014e-03, -2.0596e-03, 1.7026e-03,\n -3.5970e-03, -2.0474e-03, -6.0600e-04, 4.6766e-04, -2.4735e-03,\n 1.7797e-03, 2.7612e-04, -1.5684e-03, -1.5980e-03, 8.9498e-04,\n 1.7534e-03, 1.8591e-04, -2.2898e-03, -3.1145e-04, -1.7450e-03,\n 2.6636e-03, -1.9563e-03, -7.0424e-04, -3.9529e-04, -2.5903e-03,\n -5.3634e-04, 2.7621e-04, 5.6052e-45, 7.3156e-04, -8.6179e-03,\n 1.1468e-03, 1.9567e-03, 1.6750e-03, -1.1714e-04, 2.2359e-03,\n 4.4262e-03, 1.3309e-03, 4.4008e-04, -2.4752e-04, -1.8007e-04,\n -3.1645e-04, 8.9891e-04, -3.7039e-03, -1.8958e-03, 1.7431e-03,\n 5.7762e-03, 2.0297e-03, -6.1663e-04, 9.2578e-04, 1.0600e-03,\n -3.4024e-03, 5.6814e-04, -7.5354e-05, -6.1741e-05, 3.4714e-04,\n 3.3050e-03, 4.9001e-04, -1.0261e-03, 9.3216e-04, 1.0379e-03,\n 1.3639e-03, -2.8007e-03, 9.2874e-04, -1.4716e-03, 1.0458e-03,\n -1.6408e-03, -1.2412e-03, 4.1701e-04, -1.1268e-03, -1.5862e-03,\n 1.0096e-03, -1.1910e-03, -1.7287e-04, -5.2967e-04, 2.9127e-03,\n -2.7621e-03, -6.7717e-04, -2.4686e-03, 1.5474e-03, -3.0040e-04,\n 2.6869e-03, 2.6285e-04, 1.4271e-04, -6.9103e-04, 1.2300e-03,\n -4.9220e-03, -7.9691e-04, 1.6457e-05, -1.3775e-03, -7.6409e-05,\n 6.8942e-06, 1.3799e-04, -6.8195e-04, -3.0845e-03, -2.9651e-04,\n 2.8860e-03, -3.1765e-03, 1.7175e-03, 1.2516e-03, -9.9642e-04,\n 3.7938e-03, -1.5714e-03, -1.1517e-03, -5.8751e-04, -4.9686e-04,\n 2.7598e-04, 1.0173e-04, 1.1030e-03, 1.3501e-03, -2.5960e-03,\n -1.7665e-03, -2.7359e-04, -1.6792e-03, 1.0125e-03, 1.5845e-03,\n 1.3100e-03, 2.3287e-04, 4.4052e-04, 1.9401e-03, -2.5499e-03,\n -2.9807e-03, 1.0555e-03, -3.4847e-05, -6.5749e-04, -6.9289e-04,\n -8.1876e-05, 2.7170e-05, -2.4820e-03, -2.1442e-03, -1.7952e-03,\n 4.2048e-03, -1.4147e-03, -1.7805e-03, 5.6817e-04, 5.2038e-03,\n 6.3093e-04, 3.0176e-03, -1.0896e-03, -2.2342e-04, 4.4852e-04,\n 1.4492e-03, 1.2338e-03, -2.1700e-04, 4.4047e-04, 4.6898e-04,\n 7.2611e-05, 8.1698e-04, 6.6433e-04, -1.2288e-03, -1.4249e-03,\n 8.3652e-04, 2.2497e-03, 1.9965e-03, 2.3169e-04, 1.3422e-03,\n -7.2470e-04, 3.2273e-03, 3.4204e-03, 1.0960e-03, 2.7140e-03,\n -6.5727e-04, 3.9007e-03, -1.9290e-03, -4.8990e-04, -3.5950e-03,\n -1.4765e-03, -1.3317e-03, 3.2112e-03, 2.4952e-04, -1.8841e-04,\n -3.9033e-03, 8.5616e-04, -2.6363e-03, 1.3683e-03, -3.4432e-04,\n -4.1131e-04, -1.9696e-03, -1.3346e-03, -1.5194e-03, 3.1526e-04,\n 8.0519e-05, 3.3815e-04, -3.9407e-03, -3.0033e-04, -5.0077e-05,\n 2.9509e-03, -1.0263e-03, -1.6986e-03, -4.0631e-04, -2.0487e-04,\n 1.0856e-03, 2.1249e-04, 1.2171e-04, 8.5760e-04, -1.4589e-04,\n 2.4225e-03, -5.5207e-04, 1.4739e-03, 7.9260e-04, -1.3536e-03,\n -3.1558e-03, -5.2427e-05, 6.1590e-04, 1.8846e-03, 2.2342e-03,\n -1.8651e-03, 5.9600e-04, 2.0269e-03, -6.0417e-04, 1.5400e-03,\n -8.9142e-04, -1.9504e-03, -1.0289e-03, -1.5392e-03, 1.2653e-03,\n 2.6315e-04, -6.3203e-04, -1.5615e-03, 2.1412e-03, -5.4815e-04,\n -2.1924e-03, 7.0446e-04, 5.6052e-45, -3.5613e-03, 5.9584e-04,\n -2.4972e-03, -6.8219e-05, -1.2355e-04, 7.2637e-04, 1.4702e-03,\n 1.3668e-03, 1.1582e-03, -1.4847e-03, -5.5625e-05, 3.1081e-04,\n 7.9208e-04, 6.4152e-04, 9.6740e-04, -1.0746e-03, -1.2811e-03,\n -5.2131e-04, 1.0900e-03, 1.2346e-04, 2.3175e-05, -2.4348e-03,\n 1.3701e-04, -6.0296e-04, 7.8861e-04, 3.1478e-03, -2.3352e-03,\n 1.7021e-03, 1.6030e-03, -1.0891e-03, 9.7296e-04, -6.3793e-04,\n 1.2855e-03, 6.2511e-04, -2.9083e-05, 3.5871e-03, 2.6585e-03,\n 3.7861e-04, -2.2216e-03, 1.2300e-03, -2.0345e-03, 3.1298e-03,\n 1.9586e-03, -2.2568e-03, 1.7123e-03, 4.1607e-04, -1.9724e-03,\n 2.0350e-03, 8.1493e-04, 9.1047e-04, -3.1971e-04, -1.8770e-03,\n -1.8558e-03, 1.7860e-03, -1.5147e-03, -2.6884e-03, 8.9980e-04,\n -6.7296e-04, 5.2811e-04, -1.3302e-03, 3.9636e-03, 1.8402e-03,\n -2.9066e-04, -8.8779e-05, 6.6868e-04, -2.1001e-03, 9.6098e-04,\n 2.3095e-03, -3.5544e-03, -9.0714e-04, -5.8582e-04, 2.7022e-03,\n -1.9506e-03, 3.4150e-03, 1.3893e-03, -3.1623e-04, -1.3812e-03,\n 1.5541e-04, -2.3833e-03, -3.6804e-03, 1.6392e-03, 7.1138e-04,\n -7.3018e-04, 6.5815e-04, 3.5335e-04, 2.9080e-04, 8.1726e-04,\n -3.7092e-04, -2.0969e-03, -1.1335e-03, -6.7586e-04, 1.8288e-04,\n -1.0197e-03, 3.1147e-04, -5.1049e-04, 4.5522e-04, -1.5245e-03,\n -1.4726e-03, 1.4753e-03, 2.1386e-04, -1.2639e-03, 1.2484e-03,\n -1.3188e-04, 1.5765e-03, 1.5441e-03, 4.7318e-03, -1.6959e-03,\n 9.4489e-04, 1.4926e-03, 7.7930e-05, -1.9315e-03, 1.4150e-03,\n 1.5465e-03, 3.5439e-03, -4.5828e-04, -2.1406e-03, 2.0976e-06,\n -1.9753e-03, -1.8985e-03, 5.8453e-04, 7.7552e-04, -1.8143e-03,\n 9.0224e-04, -8.9238e-04, -2.3837e-03, -3.0797e-03, 2.6480e-05,\n 9.4380e-04, 1.5284e-03, -3.8381e-03, 3.2390e-04, -6.2257e-04,\n 2.5879e-03, 1.2181e-03, 2.2092e-04, 9.1847e-04, -4.7132e-04,\n -1.1129e-03, 3.6304e-04, 1.8310e-03, -9.1195e-04, 1.3472e-04,\n -3.3474e-04, 2.3616e-03, 3.3493e-03, 9.4259e-04, 2.0148e-03,\n -4.4603e-03, 2.3906e-04, 2.4708e-04, -9.5338e-04, 1.9478e-03,\n -6.4414e-04, 2.6900e-03, -2.4188e-03, -2.7789e-03, 7.8899e-04,\n -8.6939e-04, -1.0127e-03, 1.2294e-03, 3.7272e-05, 1.4255e-03,\n -1.8162e-03, 4.0381e-04, 1.3583e-04, 2.5170e-04, -1.5338e-03,\n -9.9580e-04, -3.9327e-04, -6.2284e-05, -7.1829e-04, 3.2940e-03,\n -7.9169e-04, -7.7774e-04, 4.3536e-03, -2.0267e-03, -2.9555e-04,\n -4.8375e-04, 8.1415e-04, 5.2368e-05, -2.3947e-03, -1.0616e-03,\n -2.4358e-04, -4.5774e-04, 1.6243e-03, -2.1672e-03, -4.2433e-03,\n 1.2420e-05, 2.1729e-03, 6.9347e-05, -1.8602e-03, -2.1623e-03,\n 3.3170e-03, -2.5340e-04, -1.0480e-03, 1.1446e-03, 7.3025e-04,\n -1.9849e-03, -1.6934e-03, 2.1231e-03, 2.8915e-04, 1.5779e-03,\n 5.9109e-04, 1.0807e-03, 3.3621e-03, 2.0240e-03, -1.4201e-03,\n 6.6580e-04, 2.6034e-03, 2.1888e-03, -1.6834e-03, 1.2971e-03,\n -6.6146e-05, -7.4696e-04, -2.8885e-03, 2.3470e-03, -6.3959e-04,\n -6.6703e-04, -1.8895e-03, -2.6845e-03, -3.3803e-04, 5.0336e-04,\n -8.0913e-04, -8.4269e-04, 6.4030e-04, -3.9341e-04, 2.0815e-03,\n -1.6967e-03, -9.4585e-04, 1.3859e-04, -1.8894e-03, 8.1247e-05,\n 1.3758e-03, -9.2719e-04, 1.5821e-03, -1.9963e-04, 4.4613e-04,\n 1.2897e-03, 3.9149e-04, -8.9392e-04, 7.0982e-04, -5.9035e-04,\n -1.4745e-03, -1.3703e-03, 7.7506e-04, 1.3467e-03, 1.1728e-03,\n -2.4041e-03, -1.2586e-03, 3.0013e-04, -3.1711e-06, -2.0241e-03,\n -8.0618e-05, 4.2179e-04, 7.5090e-04, -1.9503e-03, 1.4318e-03,\n 7.6369e-04, 1.4854e-03, 1.9710e-03, 5.7782e-04, 1.9246e-03,\n -6.8177e-04, 7.3320e-04, -6.9063e-04, -2.2104e-03, -2.0011e-04,\n 3.5428e-03, 1.4228e-03, 1.4516e-03, 1.6856e-03, 1.2969e-03,\n 2.4919e-03, 5.6052e-45], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.2762e-05, 4.4328e-05, 3.3658e-05, 3.6509e-05, 2.4979e-05, 6.3077e-05,\n 4.0537e-05, 7.4150e-05, 5.2910e-05, 7.2754e-05, 7.6178e-05, 7.7626e-05,\n 9.9738e-05, 3.6066e-05, 7.1417e-05, 5.3116e-05, 4.5051e-05, 5.0372e-05,\n 4.8487e-05, 6.5995e-05, 5.8261e-05, 2.4009e-05, 2.9838e-05, 7.6740e-05,\n 1.2018e-04, 6.2931e-05, 3.6232e-05, 7.6078e-05, 5.3054e-05, 5.6101e-05,\n 3.0894e-05, 4.4753e-05, 6.1615e-05, 4.3388e-05, 2.8861e-05, 5.0425e-05,\n 3.5931e-05, 3.0284e-05, 3.3011e-05, 2.8638e-05, 3.2266e-05, 3.0836e-05,\n 1.2077e-18, 7.6888e-05, 1.2198e-04, 3.0788e-05, 4.3851e-05, 1.5705e-05,\n 7.0996e-05, 1.0788e-04, 1.1348e-04, 2.5875e-05, 3.0102e-05, 4.5945e-05,\n 6.9479e-05, 3.7683e-05, 4.3730e-05, 5.4706e-05, 3.9738e-05, 2.7981e-05,\n 1.0044e-04, 3.2665e-05, 4.6411e-05, 3.2399e-05, 2.9440e-05, 3.0158e-05,\n 4.6813e-05, 3.1653e-05, 4.9962e-05, 3.5054e-05, 5.1274e-05, 4.1337e-05,\n 3.5259e-05, 4.0216e-05, 3.6251e-05, 4.0877e-05, 3.6665e-05, 4.2212e-05,\n 5.4990e-05, 6.1766e-05, 5.2697e-05, 3.9543e-05, 3.5686e-05, 2.0553e-05,\n 3.4654e-05, 6.2882e-05, 6.6929e-05, 3.8090e-05, 2.3918e-05, 7.0659e-05,\n 3.8358e-05, 4.6493e-05, 6.9510e-05, 5.2658e-05, 5.0416e-05, 4.7712e-05,\n 7.0231e-05, 3.9526e-05, 5.4537e-05, 3.8903e-05, 3.8076e-05, 5.8954e-05,\n 6.6571e-05, 4.6941e-05, 7.0908e-05, 9.1715e-05, 4.1829e-05, 1.4628e-04,\n 4.0168e-05, 3.8433e-05, 6.6645e-05, 6.5180e-05, 4.9715e-05, 4.8232e-05,\n 5.3432e-05, 3.6830e-05, 5.2795e-05, 7.6435e-05, 2.8660e-05, 5.1575e-05,\n 2.4764e-05, 6.0398e-05, 6.1442e-05, 1.1938e-04, 5.0810e-05, 6.5743e-05,\n 4.2487e-05, 6.4115e-05, 7.5421e-05, 1.5694e-04, 5.4480e-05, 3.6552e-05,\n 6.4023e-05, 4.4448e-05, 1.3886e-04, 3.4606e-05, 5.1928e-05, 4.9441e-05,\n 3.4720e-05, 2.7347e-05, 2.8771e-05, 4.2482e-05, 4.2340e-05, 8.4416e-05,\n 4.5888e-05, 4.8129e-05, 2.7042e-05, 7.2731e-05, 4.7338e-05, 9.9409e-05,\n 2.2817e-05, 6.4696e-05, 2.9662e-05, 7.5351e-05, 2.6407e-05, 3.6800e-05,\n 7.2377e-05, 4.8429e-05, 3.9588e-05, 3.5691e-05, 6.4286e-05, 2.0576e-05,\n 6.8320e-05, 4.5620e-05, 1.9706e-05, 8.3314e-05, 4.4911e-05, 2.3536e-05,\n 1.7025e-05, 5.0900e-05, 2.9098e-05, 6.7091e-05, 4.3508e-05, 9.5267e-05,\n 4.8522e-05, 2.5813e-05, 9.3669e-05, 2.5321e-05, 2.6758e-05, 6.2508e-05,\n 4.1274e-05, 3.6740e-05, 1.1824e-04, 4.7365e-05, 8.8155e-05, 3.8353e-05,\n 1.4730e-04, 3.6405e-05, 4.1134e-05, 1.7885e-05, 3.6451e-05, 5.6763e-05,\n 4.5913e-05, 3.3051e-05, 2.6710e-05, 1.8632e-05, 7.0334e-05, 3.0891e-05,\n 1.6996e-05, 2.3023e-05, 3.0318e-05, 4.9992e-05, 3.8667e-05, 2.3709e-05,\n 2.5015e-05, 4.2092e-05, 3.0130e-05, 2.8331e-05, 6.4687e-05, 2.5232e-05,\n 5.4805e-05, 7.2120e-05, 2.2641e-05, 3.3496e-05, 5.5066e-05, 2.4547e-05,\n 2.4916e-05, 4.9124e-05, 4.4036e-05, 7.4341e-05, 2.8486e-05, 3.4028e-05,\n 3.3948e-05, 2.5012e-05, 3.7879e-05, 7.5533e-05, 2.9213e-05, 2.2848e-05,\n 2.0675e-05, 3.7254e-05, 3.3251e-05, 3.0631e-05, 9.6726e-05, 4.9521e-05,\n 2.9307e-05, 3.7808e-05, 3.9140e-05, 3.3536e-19, 3.9389e-05, 1.0317e-04,\n 3.1323e-05, 8.5611e-05, 5.9618e-05, 7.2604e-05, 8.4022e-05, 6.8520e-05,\n 4.2968e-05, 2.9432e-05, 6.0608e-05, 3.4362e-05, 8.2135e-05, 2.4186e-05,\n 7.8624e-05, 6.6017e-05, 2.9370e-05, 5.2128e-05, 5.7349e-05, 5.1457e-05,\n 3.9929e-05, 4.7593e-05, 4.0146e-05, 6.2474e-05, 4.0134e-05, 6.7651e-05,\n 4.8595e-05, 4.6876e-05, 3.0528e-05, 5.1283e-05, 3.6093e-05, 6.8321e-05,\n 7.1150e-05, 6.1890e-05, 2.4135e-05, 5.8510e-05, 3.0022e-05, 4.0347e-05,\n 5.1013e-05, 7.3702e-05, 4.2856e-05, 3.8056e-05, 5.3095e-05, 2.6644e-05,\n 3.0590e-05, 4.7392e-05, 3.3658e-05, 4.9865e-05, 4.9825e-05, 2.9899e-05,\n 9.4024e-05, 4.4996e-05, 2.6686e-05, 2.9284e-05, 4.3372e-05, 2.8683e-05,\n 4.3075e-05, 2.7396e-05, 4.5275e-05, 4.9278e-05, 1.5055e-04, 4.6191e-05,\n 2.9069e-05, 3.0485e-05, 5.0787e-05, 4.0735e-05, 5.8825e-05, 2.8642e-05,\n 3.6769e-05, 4.2537e-05, 1.2236e-04, 6.2875e-05, 7.1221e-05, 4.5559e-05,\n 2.8383e-05, 3.2985e-05, 4.8986e-05, 4.3846e-05, 4.4397e-05, 2.1610e-05,\n 3.6809e-05, 6.5923e-05, 1.5139e-04, 8.1624e-05, 5.3506e-05, 6.0006e-05,\n 7.9578e-05, 9.6199e-05, 5.6005e-05, 8.1443e-05, 3.9660e-05, 4.7857e-05,\n 9.8532e-05, 5.8360e-05, 1.5759e-05, 3.4067e-05, 3.3419e-05, 4.2382e-05,\n 3.0209e-05, 2.6288e-05, 6.4962e-05, 4.6473e-05, 4.1383e-05, 3.1290e-05,\n 4.1644e-05, 7.4785e-05, 6.0154e-05, 8.4737e-05, 6.7010e-05, 2.7185e-05,\n 4.9364e-05, 6.5348e-05, 4.6194e-05, 4.3678e-05, 5.2212e-05, 3.5116e-05,\n 5.0098e-05, 3.7085e-05, 4.5695e-05, 2.5592e-05, 3.3152e-05, 2.5629e-05,\n 2.2691e-05, 1.2499e-04, 1.0828e-04, 5.4598e-05, 4.4278e-05, 3.6374e-05,\n 2.1185e-05, 6.6043e-05, 2.4623e-05, 3.0018e-05, 2.9531e-05, 4.7327e-05,\n 5.0682e-05, 5.1311e-05, 4.9978e-05, 3.7575e-05, 2.6362e-05, 2.2880e-05,\n 4.3704e-05, 3.1432e-05, 4.2434e-05, 4.3653e-05, 6.9956e-05, 3.6516e-05,\n 4.6608e-05, 1.2592e-04, 3.4435e-05, 5.0037e-05, 4.3734e-05, 3.7418e-05,\n 4.1454e-05, 3.6354e-05, 5.0377e-05, 6.6829e-05, 3.9237e-05, 2.3111e-05,\n 2.5806e-05, 7.3779e-05, 5.8942e-05, 8.1666e-05, 3.6647e-05, 2.7109e-05,\n 8.6295e-05, 3.5257e-05, 4.4773e-05, 3.5175e-05, 9.7134e-05, 1.5052e-05,\n 6.9635e-05, 7.3700e-05, 5.6566e-05, 3.1596e-05, 8.9215e-05, 6.5457e-05,\n 3.5178e-05, 4.8918e-05, 8.8779e-05, 8.6889e-05, 5.5980e-05, 5.2580e-05,\n 5.3323e-05, 4.0320e-05, 6.8495e-05, 5.1841e-05, 7.3916e-05, 4.5560e-05,\n 6.7026e-05, 3.9722e-05, 4.5479e-05, 3.8447e-05, 3.9538e-05, 2.7711e-05,\n 4.3739e-05, 4.9334e-05, 5.2044e-05, 4.2902e-05, 5.5160e-05, 6.4499e-05,\n 4.8690e-05, 2.9636e-05, 5.5876e-05, 5.6847e-05, 4.6952e-05, 1.9879e-04,\n 5.8801e-05, 4.9085e-05, 6.2172e-05, 3.9285e-05, 5.8093e-05, 4.7650e-05,\n 5.0229e-05, 3.3075e-05, 4.2966e-05, 1.2442e-04, 5.5051e-05, 2.2461e-05,\n 9.8150e-05, 3.9481e-05, 2.4932e-05, 4.4029e-05, 5.1487e-05, 3.8815e-05,\n 4.1331e-05, 4.1430e-05, 4.6658e-05, 3.1334e-05, 3.2766e-05, 5.1242e-05,\n 6.6178e-05, 5.7499e-05, 4.3083e-05, 1.3754e-04, 6.3366e-05, 5.2248e-05,\n 6.3163e-05, 5.1069e-05, 6.2036e-05, 5.0043e-05, 5.9547e-05, 2.7478e-05,\n 2.2718e-05, 7.2263e-05, 5.3414e-05, 3.0236e-05, 3.8856e-05, 4.7570e-05,\n 3.8841e-05, 4.4395e-05, 8.6259e-05, 3.5759e-05, 3.6686e-05, 3.4290e-05,\n 4.8470e-05, 4.2871e-05, 3.0773e-05, 4.0507e-05, 4.8807e-05, 3.9797e-05,\n 3.1364e-05, 3.8527e-05, 4.2337e-05, 3.0068e-05, 3.7598e-05, 4.0253e-05,\n 3.2400e-05, 4.3128e-05, 3.2939e-05, 4.3435e-05, 3.4353e-05, 4.6307e-05,\n 3.1516e-05, 1.1766e-15], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(30032.)",
|
| 27 |
+
"exp_avg": "tensor([[-5.2708e-06, -2.5403e-05, 8.9942e-06, ..., 3.8447e-06,\n -9.9860e-06, 5.6052e-45],\n [-8.6702e-06, -3.5106e-06, -1.0925e-06, ..., 3.2660e-05,\n 1.0863e-07, 5.6052e-45],\n [-9.5587e-06, -3.1710e-05, 1.2700e-05, ..., 2.2788e-06,\n -1.7314e-05, -5.6052e-45],\n ...,\n [ 1.8341e-05, -1.7316e-05, -4.9865e-06, ..., 1.4680e-05,\n -8.8622e-06, 5.6052e-45],\n [-1.6117e-05, -1.6569e-05, 5.5801e-06, ..., -8.8825e-07,\n 1.9060e-06, 5.6052e-45],\n [-5.5058e-06, 1.0297e-06, -2.6418e-06, ..., 1.2793e-06,\n -2.6657e-06, 5.6052e-45]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[6.5238e-10, 1.3040e-09, 6.4818e-10, ..., 1.4283e-09, 5.2246e-10,\n 3.1495e-20],\n [1.0874e-09, 2.4259e-09, 1.5050e-09, ..., 4.3118e-09, 2.2477e-09,\n 1.7710e-19],\n [8.8159e-10, 1.5172e-09, 2.1656e-09, ..., 3.6600e-09, 8.8765e-10,\n 1.6123e-19],\n ...,\n [1.4673e-09, 3.2065e-09, 1.2202e-09, ..., 5.1323e-09, 1.0172e-09,\n 1.8076e-19],\n [1.7288e-09, 4.4286e-09, 1.2676e-09, ..., 3.1381e-09, 1.8795e-09,\n 3.2171e-19],\n [1.9164e-09, 2.0406e-09, 1.0817e-09, ..., 3.6361e-09, 1.6447e-09,\n 1.8574e-19]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(22524.)",
|
| 32 |
+
"exp_avg": "tensor([[-4.6570e-06, -9.5121e-06, 3.1315e-06, ..., 5.3001e-06,\n -5.9384e-06, 5.6052e-45],\n [-3.3572e-06, -1.3279e-05, 3.0202e-06, ..., 2.7749e-05,\n 2.3155e-06, 5.6052e-45],\n [-8.0309e-06, -3.6581e-05, 6.0706e-06, ..., -4.3819e-06,\n 1.1238e-05, -5.6052e-45],\n ...,\n [ 1.1122e-05, 1.2285e-06, 3.9079e-06, ..., 2.2121e-05,\n 5.4371e-06, 5.6052e-45],\n [-4.2549e-06, 9.2396e-06, 3.6358e-06, ..., -1.9654e-05,\n 2.6248e-06, 5.6052e-45],\n [ 5.3912e-06, -4.9359e-06, -7.0072e-07, ..., 1.4846e-05,\n 7.5506e-06, 5.6052e-45]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.7559e-10, 1.0872e-09, 5.2255e-10, ..., 1.0115e-09, 7.2519e-10,\n 1.3773e-19],\n [7.0976e-10, 2.3297e-09, 1.3818e-09, ..., 4.9031e-09, 1.9874e-09,\n 9.0083e-19],\n [8.0906e-10, 1.8744e-09, 1.4111e-09, ..., 4.2285e-09, 1.0743e-09,\n 2.6341e-19],\n ...,\n [1.9487e-09, 1.3959e-09, 1.8794e-09, ..., 7.1082e-09, 1.1236e-09,\n 5.4735e-20],\n [9.4972e-10, 2.3303e-09, 9.7787e-10, ..., 2.4434e-09, 1.0978e-09,\n 1.9858e-18],\n [1.8853e-09, 1.4149e-09, 1.0017e-09, ..., 2.5492e-09, 2.0146e-09,\n 1.8998e-20]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(22524.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0006, 0.0006], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([6.6441e-06, 6.6441e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.0009558195366224509,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.0009558195366224509,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.0009558195366224509,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.00047836202255981916,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 8,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 8,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.0009558195366224509,
|
| 149 |
+
0.0009558195366224509,
|
| 150 |
+
0.0009558195366224509,
|
| 151 |
+
0.00047836202255981916
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 66.252,
|
| 156 |
+
"best_epoch": 7,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 65.74333333333334,
|
| 159 |
+
"512": 66.17
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6,
|
| 169 |
+
7,
|
| 170 |
+
8
|
| 171 |
+
],
|
| 172 |
+
"train_loss": [
|
| 173 |
+
3.9435249049420933,
|
| 174 |
+
3.3040703793567867,
|
| 175 |
+
4.3101251841734625,
|
| 176 |
+
4.185147669827233,
|
| 177 |
+
4.123004540650211,
|
| 178 |
+
4.076372152195373,
|
| 179 |
+
4.03838544134517,
|
| 180 |
+
4.0064857600531685
|
| 181 |
+
],
|
| 182 |
+
"train_acc": [
|
| 183 |
+
54.38726307083047,
|
| 184 |
+
59.31631083223343,
|
| 185 |
+
60.291879721118846,
|
| 186 |
+
61.30111583163371,
|
| 187 |
+
61.94625681117294,
|
| 188 |
+
62.46739626189768,
|
| 189 |
+
62.918183187671865,
|
| 190 |
+
63.32416721109218
|
| 191 |
+
],
|
| 192 |
+
"val_acc": [
|
| 193 |
+
61.635333333333335,
|
| 194 |
+
62.978,
|
| 195 |
+
64.12,
|
| 196 |
+
64.73133333333334,
|
| 197 |
+
65.312,
|
| 198 |
+
65.66133333333333,
|
| 199 |
+
66.03133333333334,
|
| 200 |
+
66.252
|
| 201 |
+
],
|
| 202 |
+
"scale_accs": {
|
| 203 |
+
"256": [
|
| 204 |
+
61.635333333333335,
|
| 205 |
+
62.978,
|
| 206 |
+
63.782,
|
| 207 |
+
64.34866666666667,
|
| 208 |
+
64.754,
|
| 209 |
+
65.17733333333334,
|
| 210 |
+
65.49933333333334,
|
| 211 |
+
65.74333333333334
|
| 212 |
+
],
|
| 213 |
+
"512": [
|
| 214 |
+
63.839333333333336,
|
| 215 |
+
64.522,
|
| 216 |
+
65.18466666666667,
|
| 217 |
+
65.52333333333333,
|
| 218 |
+
66.02266666666667,
|
| 219 |
+
66.17
|
| 220 |
+
]
|
| 221 |
+
},
|
| 222 |
+
"lr": [
|
| 223 |
+
0.00975530705321762,
|
| 224 |
+
0.00904518046337755,
|
| 225 |
+
0.00793913236883622,
|
| 226 |
+
0.00654543046337755,
|
| 227 |
+
0.005000500000000001,
|
| 228 |
+
0.0034555695366224513,
|
| 229 |
+
0.0020618676311637816,
|
| 230 |
+
0.0009558195366224509
|
| 231 |
+
]
|
| 232 |
+
}
|
| 233 |
+
},
|
| 234 |
+
"train_config": {
|
| 235 |
+
"name": "david_training",
|
| 236 |
+
"run_id": "20251012_235237",
|
| 237 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 238 |
+
"model_variant": [
|
| 239 |
+
"clip_vit_b16",
|
| 240 |
+
"clip_vit_laion_b32",
|
| 241 |
+
"clip_vit_b32"
|
| 242 |
+
],
|
| 243 |
+
"num_classes": 1000,
|
| 244 |
+
"preset": "small_fast",
|
| 245 |
+
"custom_config_path": null,
|
| 246 |
+
"num_classes_override": null,
|
| 247 |
+
"use_belly_override": null,
|
| 248 |
+
"belly_expand_override": null,
|
| 249 |
+
"progressive_training_override": true,
|
| 250 |
+
"scale_warmup_epochs_override": {
|
| 251 |
+
"256": 0,
|
| 252 |
+
"512": 2
|
| 253 |
+
},
|
| 254 |
+
"num_epochs": 10,
|
| 255 |
+
"batch_size": 1024,
|
| 256 |
+
"learning_rate": 0.01,
|
| 257 |
+
"weight_decay": 1e-05,
|
| 258 |
+
"warmup_epochs": 3,
|
| 259 |
+
"use_rose_loss": true,
|
| 260 |
+
"rose_initial_weight": 0.1,
|
| 261 |
+
"rose_max_weight": 0.8,
|
| 262 |
+
"rose_weight_schedule": "adaptive",
|
| 263 |
+
"use_cayley_loss": false,
|
| 264 |
+
"cayley_weight": 0.01,
|
| 265 |
+
"scale_loss_balance": null,
|
| 266 |
+
"use_mixed_precision": false,
|
| 267 |
+
"gradient_clip": 15.0,
|
| 268 |
+
"scheduler_type": "cosine_restarts",
|
| 269 |
+
"min_lr": 1e-06,
|
| 270 |
+
"freeze_strategy": "never",
|
| 271 |
+
"freeze_threshold": 90.0,
|
| 272 |
+
"unfreeze_on_plateau": true,
|
| 273 |
+
"patience": 10,
|
| 274 |
+
"track_gradients": true,
|
| 275 |
+
"gradient_scale_threshold": 1e-05,
|
| 276 |
+
"gradient_scale_multiplier": 10.0,
|
| 277 |
+
"log_interval": 50,
|
| 278 |
+
"val_interval": 1,
|
| 279 |
+
"save_interval": 5,
|
| 280 |
+
"log_fusion_weights": true,
|
| 281 |
+
"log_loss_components": true,
|
| 282 |
+
"save_format": "safetensors",
|
| 283 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 284 |
+
"upload_to_hub": true,
|
| 285 |
+
"base_dir": "./david_training",
|
| 286 |
+
"num_workers": 10,
|
| 287 |
+
"pin_memory": true,
|
| 288 |
+
"prefetch_factor": 4,
|
| 289 |
+
"persistent_workers": true
|
| 290 |
+
}
|
| 291 |
+
}
|