Update best_model_acc71.39_metadata.json - Run 20251012_141246
Browse files
weights/David-fully_shared-weighted_sum/20251012_141246/best_model_acc71.39_metadata.json
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 6,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(8764.)",
|
| 7 |
+
"exp_avg": "tensor([[-2.4349e-04, -2.2654e-03, -5.1946e-04, ..., -9.8929e-04,\n -2.0279e-04, 7.3567e-04],\n [ 2.4006e-04, 7.9619e-04, -5.7415e-04, ..., -2.8090e-04,\n -7.1119e-04, 2.3983e-04],\n [ 2.1843e-04, -1.5389e-05, 1.2614e-03, ..., -1.0665e-04,\n 4.0351e-04, -3.8134e-04],\n ...,\n [-7.3398e-04, 6.3932e-04, 6.7723e-04, ..., -1.4967e-03,\n 1.9048e-04, -1.4507e-04],\n [-8.9451e-04, -1.9885e-03, -3.1425e-03, ..., -1.1181e-03,\n -3.3708e-04, -1.0747e-04],\n [ 5.3654e-05, -3.6505e-03, 1.5602e-03, ..., 2.2699e-04,\n -2.0568e-04, 1.0763e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[4.4030e-06, 2.9651e-05, 1.2428e-05, ..., 3.1467e-06, 2.2760e-06,\n 2.7260e-06],\n [8.5751e-06, 4.5559e-05, 1.6291e-05, ..., 4.2304e-06, 3.6570e-06,\n 3.2979e-06],\n [3.7953e-06, 2.1540e-05, 1.1333e-05, ..., 2.4287e-06, 1.9110e-06,\n 2.5416e-06],\n ...,\n [7.8280e-06, 3.9403e-05, 2.3881e-05, ..., 8.4606e-06, 3.0742e-06,\n 8.5154e-06],\n [3.4287e-06, 2.3086e-05, 1.5647e-05, ..., 3.0539e-06, 1.8344e-06,\n 2.1556e-06],\n [9.3343e-06, 4.7331e-05, 1.9886e-05, ..., 4.6899e-06, 2.9090e-06,\n 3.3902e-06]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(8764.)",
|
| 12 |
+
"exp_avg": "tensor([-1.3266e-02, 4.6368e-02, -1.4410e-02, -8.0130e-04, 3.4629e-02,\n 1.8597e-02, 1.6029e-02, 1.6799e-02, -1.5680e-02, 2.4054e-02,\n -1.0313e-02, 5.0305e-03, -1.3031e-02, -1.0714e-02, -3.1066e-03,\n -1.3067e-02, 5.0217e-03, 3.1027e-02, -4.0027e-03, 9.9396e-03,\n -2.2293e-02, -1.5414e-02, 3.1446e-02, 6.5082e-03, -9.3904e-03,\n -5.9201e-02, -5.4351e-03, -1.8529e-02, 1.7064e-02, 1.6005e-03,\n 1.8375e-02, -1.5180e-03, 3.8236e-03, 2.7619e-02, -2.8372e-03,\n -2.3844e-02, -1.7933e-02, 5.1760e-03, 8.5523e-03, -2.3008e-04,\n -2.4925e-02, -1.7885e-02, 1.7173e-02, -3.7611e-02, 1.4634e-02,\n -1.0286e-02, -7.1116e-03, -3.1664e-03, -5.1471e-02, -3.4602e-02,\n -3.6279e-02, 1.7073e-02, -1.1842e-02, 1.3990e-02, 1.0182e-02,\n 3.8959e-02, -1.2633e-02, 4.9395e-03, 2.5306e-02, 6.9891e-03,\n 1.3758e-02, -1.9255e-02, 2.2844e-02, 1.8344e-02, 2.5398e-02,\n -2.5935e-04, 3.3369e-03, 1.0681e-02, -2.8162e-02, 5.1564e-03,\n -1.3558e-02, -5.6830e-03, -1.4059e-03, 2.1158e-02, 6.1993e-03,\n -3.2072e-02, 1.6100e-03, -3.4827e-02, 5.8532e-03, -2.7191e-03,\n 2.0613e-02, -6.2200e-02, 2.0570e-02, 1.6299e-02, -2.0505e-03,\n 7.2863e-03, -1.8630e-02, 2.7141e-02, 1.3834e-02, -1.1187e-02,\n 2.2176e-02, 2.6541e-02, 2.0181e-02, 2.1463e-02, 9.2602e-04,\n 5.9035e-03, 1.5825e-02, 3.8033e-03, -2.0652e-03, -3.4295e-02,\n -1.0180e-02, -1.9484e-02, -3.5160e-02, -1.5408e-02, 2.5276e-02,\n -1.0491e-04, -1.5353e-02, -1.9503e-02, 8.4460e-03, 3.0589e-02,\n 8.4181e-03, 1.9395e-04, 8.3626e-03, 2.4194e-02, 1.8381e-02,\n 4.4691e-03, -6.1328e-03, -2.6310e-03, 2.6272e-02, -1.9824e-02,\n 3.0019e-03, -1.0430e-02, 6.5874e-03, 2.3970e-02, -1.0405e-02,\n 2.2647e-02, -1.7021e-03, 1.3471e-02, 2.2348e-02, 4.3384e-02,\n -1.2274e-01, -2.9953e-03, 1.4057e-02, 6.8967e-03, 7.9560e-03,\n 1.1019e-02, -6.4201e-03, 1.2988e-02, -7.5804e-03, -7.9639e-02,\n -1.1527e-03, 2.4051e-02, 2.0664e-02, 5.2265e-03, -3.0153e-02,\n -3.8700e-02, -2.3249e-03, -7.1461e-03, 4.3387e-03, 1.7179e-02,\n -5.4997e-03, -1.0846e-02, 1.9678e-02, 9.2209e-03, -7.8679e-03,\n 6.9929e-03, -1.6440e-02, -5.9395e-03, -5.1184e-02, 1.1917e-02,\n 1.5143e-02, -2.0551e-02, -6.7452e-03, 2.6084e-03, 8.2201e-04,\n -8.6758e-03, 3.9120e-03, 2.2292e-02, -6.8428e-03, -1.2297e-02,\n -9.3508e-03, -2.8993e-03, 2.4048e-02, -6.6249e-03, 2.0808e-02,\n -1.6845e-02, -2.6173e-02, 9.7305e-03, -1.4662e-02, -6.3159e-05,\n 6.5500e-03, -1.1503e-02, 1.4655e-02, -9.5546e-03, -1.8014e-03,\n 2.1940e-02, 6.5762e-03, 3.0693e-02, -8.1404e-03, 1.6534e-02,\n -5.0555e-03, 1.3242e-04, -1.2293e-02, 2.5320e-03, 3.6563e-02,\n -1.5691e-02, 1.5916e-02, -3.1807e-03, 1.1081e-02, -1.9375e-02,\n -3.4324e-02, -2.1375e-03, -1.2351e-02, 1.6443e-02, -1.7255e-03,\n -1.3641e-03, 1.7480e-02, -2.7258e-03, 1.6903e-03, -3.1614e-02,\n -1.0654e-03, -2.4611e-03, 2.2055e-02, 9.6748e-03, -6.1015e-02,\n 5.3117e-02, 9.9686e-03, 2.5463e-03, 3.0223e-02, 5.9010e-03,\n -9.8381e-03, 1.4253e-02, -1.2060e-02, -1.1842e-02, 1.1036e-02,\n 9.5148e-03, 3.2977e-03, -8.0912e-03, 7.9043e-03, 8.5553e-03,\n 8.1267e-03, 1.9581e-02, 1.2066e-02, -1.8155e-03, 5.7337e-03,\n -5.1651e-04, -1.9279e-03, -7.8752e-03, -9.5507e-03, -4.6476e-02,\n 3.6448e-03, -2.5378e-02, -1.4638e-03, -1.7535e-02, 1.5403e-02,\n -9.7958e-03, -1.1820e-02, -1.0140e-02, -8.2914e-03, -7.4264e-03,\n -8.1822e-03, -5.7826e-03, -3.0630e-02, -2.6352e-03, -5.5908e-03,\n 3.1479e-02, -1.2862e-02, 1.2913e-03, 3.8341e-02, 9.4214e-02,\n -1.0325e-02, -2.5101e-02, 3.4513e-02, -1.0033e-02, -3.6334e-03,\n -2.4741e-03, 1.6916e-02, -1.9613e-02, -2.1182e-03, 1.3065e-02,\n 1.9524e-02, 5.3455e-03, 9.6963e-03, 1.4832e-02, 3.0316e-03,\n 8.2094e-03, -3.1812e-03, 1.1447e-02, -3.6455e-02, -2.9528e-03,\n 7.4051e-03, 5.4755e-02, -3.8315e-02, 9.1093e-03, -6.0375e-03,\n 3.8294e-03, -2.3290e-02, 8.5290e-03, -8.0257e-03, 1.3777e-02,\n 1.6122e-02, 1.0892e-02, -1.9209e-02, -2.5388e-02, 2.3471e-02,\n 1.5145e-02, -1.1434e-02, 7.3134e-03, -1.0674e-02, 3.8342e-02,\n 9.1741e-03, -5.6958e-03, 1.9142e-02, -2.0360e-02, 2.1511e-03,\n 4.9162e-02, 1.7578e-02, 1.0036e-02, -6.2794e-03, 1.0488e-02,\n 7.8993e-03, 3.9587e-03, -1.1643e-02, 2.0737e-02, -5.1006e-03,\n 2.9339e-03, -1.7836e-02, -2.2618e-03, 1.5866e-02, 5.8052e-03,\n 1.7592e-03, 2.4958e-03, -1.6302e-02, -1.9906e-02, -4.1008e-03,\n 2.0094e-02, 1.9165e-02, 4.3300e-03, 4.1939e-03, 4.4354e-02,\n 1.2441e-02, -4.8007e-02, -6.8367e-04, 2.1502e-02, 4.3819e-03,\n 2.1816e-02, -2.4664e-02, -5.0012e-03, 6.0463e-03, -1.9299e-02,\n -3.3159e-05, 5.1808e-02, 1.2659e-02, -1.6734e-02, -2.2232e-03,\n -1.4807e-02, -9.2214e-03, -1.9594e-02, -4.1933e-02, 1.5130e-02,\n 1.2258e-02, 2.0433e-02, -2.4992e-03, -1.1822e-03, -2.3413e-03,\n -1.7221e-02, -1.2752e-02, 1.3921e-02, -3.1829e-02, 2.8033e-02,\n -2.4539e-03, -5.5763e-03, 3.9301e-04, -7.1273e-03, -7.3812e-03,\n 1.2650e-02, -1.0270e-02, -3.8789e-03, 6.3937e-03, 2.3637e-02,\n 9.6949e-03, 4.4828e-03, 3.7177e-03, -2.5413e-02, -1.6845e-02,\n -2.3029e-02, 4.0649e-02, 8.0362e-03, 4.1396e-03, 1.5328e-02,\n 1.5110e-02, -1.0880e-02, -2.4461e-02, 6.8456e-03, 1.2230e-02,\n -7.2559e-02, -1.7523e-02, -1.0857e-02, -6.5230e-04, 2.1013e-02,\n 3.0748e-02, 5.9746e-05, -2.0030e-03, -2.3714e-02, 4.6671e-03,\n -2.2411e-02, -3.0160e-03, -2.3568e-02, 2.8145e-02, -2.6480e-02,\n -1.2636e-03, 2.7422e-03, 1.7341e-04, 6.7531e-04, -7.5763e-02,\n -4.0634e-02, 1.3994e-02, -1.7911e-02, -1.2567e-02, -3.8146e-03,\n 5.7541e-03, 7.7368e-03, -6.1076e-03, 3.8420e-02, 6.2463e-03,\n -2.9781e-02, 1.0699e-02, -1.6593e-02, -1.4810e-02, 9.4360e-03,\n 1.1887e-02, 8.5622e-03, -3.9596e-03, 1.1328e-02, 2.0925e-02,\n 2.9720e-03, 1.7929e-03, -3.5401e-03, 1.8729e-02, 5.6526e-03,\n 3.5945e-03, 5.3934e-02, 3.6562e-03, -6.5805e-03, -5.6410e-04,\n 7.5692e-03, 1.3622e-02, -6.6663e-02, 1.4986e-02, 3.1639e-02,\n 2.3035e-03, 1.7730e-02, -1.9697e-03, -1.7727e-02, -2.6900e-03,\n -4.1229e-03, 6.6485e-03, -3.1195e-03, -5.5053e-02, 4.3392e-03,\n -1.2986e-02, 4.4622e-03, -8.3546e-03, -5.5100e-03, -5.5153e-02,\n 8.5290e-03, -1.2746e-02, -9.8903e-03, -5.6863e-03, -7.9286e-03,\n -1.1952e-02, -1.1257e-02, 7.9037e-03, 1.7481e-02, 4.8889e-02,\n 6.9915e-03, -5.8151e-03, -4.4454e-02, 6.1563e-03, 2.1193e-02,\n 7.4344e-03, 1.2264e-02, -4.1606e-03, -2.0928e-02, 5.9082e-03,\n -5.1830e-04, 3.3835e-02, 2.3677e-02, 8.7015e-03, 2.4980e-03,\n 4.2729e-03, 2.3274e-02, -4.0700e-03, 1.3568e-02, -1.1782e-03,\n -3.0646e-02, -1.7604e-02, -1.0127e-01, 8.7966e-03, -7.2682e-03,\n 1.3777e-03, 2.8607e-02, -1.5031e-02, 2.2580e-02, -2.0546e-02,\n -1.5335e-02, -5.1929e-03, 2.0696e-02, 3.5286e-02, 2.5347e-03,\n -2.6295e-02, 1.8686e-02, -2.3784e-02, -1.7755e-02, 2.6031e-02,\n 1.3636e-02, 8.6087e-03, 2.2042e-03, -1.9776e-02, -2.2316e-02,\n 1.3013e-02, 5.4038e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0034, 0.0052, 0.0026, 0.0023, 0.0035, 0.0054, 0.0020, 0.0037, 0.0039,\n 0.0041, 0.0025, 0.0032, 0.0067, 0.0022, 0.0057, 0.0062, 0.0039, 0.0036,\n 0.0030, 0.0044, 0.0033, 0.0070, 0.0046, 0.0033, 0.0019, 0.0068, 0.0038,\n 0.0050, 0.0053, 0.0040, 0.0036, 0.0084, 0.0027, 0.0099, 0.0032, 0.0059,\n 0.0035, 0.0033, 0.0035, 0.0053, 0.0037, 0.0043, 0.0038, 0.0037, 0.0062,\n 0.0037, 0.0026, 0.0031, 0.0043, 0.0045, 0.0051, 0.0030, 0.0031, 0.0034,\n 0.0035, 0.0032, 0.0034, 0.0020, 0.0026, 0.0034, 0.0051, 0.0027, 0.0066,\n 0.0043, 0.0054, 0.0063, 0.0029, 0.0034, 0.0042, 0.0031, 0.0046, 0.0024,\n 0.0032, 0.0076, 0.0050, 0.0021, 0.0064, 0.0031, 0.0038, 0.0055, 0.0046,\n 0.0064, 0.0023, 0.0059, 0.0042, 0.0051, 0.0034, 0.0066, 0.0053, 0.0028,\n 0.0081, 0.0049, 0.0032, 0.0035, 0.0049, 0.0020, 0.0027, 0.0043, 0.0023,\n 0.0033, 0.0038, 0.0046, 0.0040, 0.0030, 0.0027, 0.0058, 0.0060, 0.0024,\n 0.0042, 0.0050, 0.0038, 0.0044, 0.0056, 0.0027, 0.0039, 0.0027, 0.0019,\n 0.0029, 0.0067, 0.0050, 0.0046, 0.0097, 0.0063, 0.0050, 0.0068, 0.0051,\n 0.0027, 0.0018, 0.0057, 0.0057, 0.0071, 0.0034, 0.0044, 0.0033, 0.0022,\n 0.0013, 0.0029, 0.0042, 0.0014, 0.0056, 0.0047, 0.0035, 0.0047, 0.0036,\n 0.0048, 0.0037, 0.0033, 0.0028, 0.0020, 0.0039, 0.0041, 0.0028, 0.0040,\n 0.0032, 0.0032, 0.0024, 0.0037, 0.0036, 0.0055, 0.0021, 0.0034, 0.0025,\n 0.0031, 0.0034, 0.0037, 0.0032, 0.0028, 0.0031, 0.0054, 0.0062, 0.0059,\n 0.0052, 0.0058, 0.0039, 0.0040, 0.0024, 0.0052, 0.0036, 0.0042, 0.0039,\n 0.0047, 0.0029, 0.0023, 0.0042, 0.0043, 0.0056, 0.0053, 0.0051, 0.0051,\n 0.0044, 0.0041, 0.0027, 0.0035, 0.0039, 0.0037, 0.0067, 0.0024, 0.0032,\n 0.0025, 0.0035, 0.0024, 0.0022, 0.0038, 0.0044, 0.0048, 0.0041, 0.0091,\n 0.0039, 0.0016, 0.0036, 0.0034, 0.0021, 0.0063, 0.0026, 0.0042, 0.0051,\n 0.0035, 0.0044, 0.0028, 0.0045, 0.0029, 0.0046, 0.0050, 0.0041, 0.0046,\n 0.0026, 0.0016, 0.0038, 0.0061, 0.0027, 0.0029, 0.0030, 0.0041, 0.0057,\n 0.0043, 0.0018, 0.0027, 0.0030, 0.0043, 0.0041, 0.0013, 0.0032, 0.0045,\n 0.0050, 0.0043, 0.0032, 0.0033, 0.0038, 0.0036, 0.0054, 0.0015, 0.0029,\n 0.0050, 0.0043, 0.0039, 0.0065, 0.0030, 0.0043, 0.0028, 0.0068, 0.0037,\n 0.0048, 0.0044, 0.0047, 0.0033, 0.0023, 0.0033, 0.0025, 0.0029, 0.0066,\n 0.0044, 0.0034, 0.0036, 0.0057, 0.0047, 0.0020, 0.0029, 0.0035, 0.0055,\n 0.0039, 0.0028, 0.0061, 0.0056, 0.0018, 0.0012, 0.0034, 0.0031, 0.0045,\n 0.0026, 0.0040, 0.0025, 0.0019, 0.0037, 0.0037, 0.0035, 0.0044, 0.0031,\n 0.0017, 0.0046, 0.0084, 0.0034, 0.0023, 0.0030, 0.0038, 0.0045, 0.0050,\n 0.0046, 0.0035, 0.0060, 0.0033, 0.0065, 0.0039, 0.0055, 0.0045, 0.0046,\n 0.0039, 0.0041, 0.0045, 0.0036, 0.0042, 0.0034, 0.0041, 0.0030, 0.0040,\n 0.0027, 0.0057, 0.0041, 0.0034, 0.0034, 0.0037, 0.0032, 0.0042, 0.0032,\n 0.0036, 0.0061, 0.0042, 0.0025, 0.0017, 0.0040, 0.0044, 0.0055, 0.0080,\n 0.0022, 0.0032, 0.0037, 0.0055, 0.0031, 0.0024, 0.0047, 0.0026, 0.0021,\n 0.0047, 0.0031, 0.0029, 0.0028, 0.0028, 0.0022, 0.0038, 0.0041, 0.0026,\n 0.0031, 0.0087, 0.0024, 0.0110, 0.0041, 0.0042, 0.0015, 0.0030, 0.0028,\n 0.0029, 0.0043, 0.0040, 0.0039, 0.0027, 0.0022, 0.0056, 0.0047, 0.0041,\n 0.0061, 0.0038, 0.0032, 0.0014, 0.0024, 0.0021, 0.0043, 0.0071, 0.0047,\n 0.0028, 0.0030, 0.0072, 0.0029, 0.0030, 0.0021, 0.0037, 0.0048, 0.0036,\n 0.0033, 0.0036, 0.0058, 0.0099, 0.0023, 0.0030, 0.0028, 0.0030, 0.0038,\n 0.0067, 0.0032, 0.0021, 0.0034, 0.0036, 0.0030, 0.0028, 0.0018, 0.0052,\n 0.0045, 0.0057, 0.0037, 0.0090, 0.0033, 0.0064, 0.0053, 0.0032, 0.0070,\n 0.0028, 0.0051, 0.0026, 0.0028, 0.0041, 0.0025, 0.0030, 0.0035, 0.0091,\n 0.0059, 0.0031, 0.0037, 0.0026, 0.0063, 0.0079, 0.0027, 0.0041, 0.0064,\n 0.0037, 0.0022, 0.0037, 0.0070, 0.0028, 0.0020, 0.0029, 0.0055, 0.0029,\n 0.0036, 0.0046, 0.0034, 0.0037, 0.0042, 0.0057, 0.0051, 0.0023, 0.0038,\n 0.0038, 0.0022, 0.0037, 0.0040, 0.0038, 0.0061, 0.0022, 0.0031, 0.0071,\n 0.0041, 0.0067, 0.0032, 0.0037, 0.0033, 0.0032, 0.0054, 0.0018, 0.0065,\n 0.0021, 0.0029, 0.0038, 0.0041, 0.0047, 0.0041, 0.0080, 0.0055, 0.0034,\n 0.0033, 0.0077, 0.0045, 0.0042, 0.0028, 0.0042, 0.0031, 0.0033, 0.0037,\n 0.0066, 0.0031, 0.0065, 0.0034, 0.0037, 0.0037, 0.0017, 0.0023, 0.0033,\n 0.0022, 0.0026, 0.0039, 0.0006, 0.0042, 0.0068, 0.0029, 0.0045],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(8764.)",
|
| 17 |
+
"exp_avg": "tensor([-4.0680e-03, 6.6829e-03, -4.0787e-03, -1.8016e-04, 8.4884e-03,\n 1.5624e-03, 7.2976e-03, 2.0502e-03, -2.1841e-03, 3.0244e-03,\n -4.3089e-03, 1.7236e-03, -5.1342e-04, -2.1223e-03, -1.5256e-04,\n -1.5778e-03, -2.8637e-04, 4.8969e-03, -7.4922e-04, 1.3567e-03,\n -2.8959e-03, -2.3165e-03, 4.1959e-03, 1.4452e-03, -4.2157e-03,\n -8.3434e-03, -2.1071e-03, -2.1094e-03, 2.4163e-03, -1.0830e-03,\n 3.7457e-03, 2.6193e-04, -3.7832e-05, 2.7292e-03, -6.9472e-04,\n -1.9404e-03, -2.7867e-03, 1.0868e-03, 1.5758e-03, 9.4112e-04,\n -5.1353e-03, -4.9524e-03, 4.6131e-03, -8.7678e-03, 2.3880e-03,\n -2.2376e-03, -1.3755e-03, -1.1606e-03, -6.4396e-03, -6.8079e-03,\n -6.1988e-03, 4.3236e-03, -7.0828e-03, 2.0882e-03, 1.1895e-03,\n 1.3711e-02, -8.9461e-04, 2.6716e-03, 1.0253e-02, 1.8433e-03,\n 1.9521e-03, -5.7145e-03, 3.9475e-03, 2.9740e-03, 6.5288e-03,\n -3.4333e-03, 3.6066e-04, 2.0653e-03, -3.5932e-03, 1.6256e-03,\n -2.2062e-03, -2.2798e-03, 2.8133e-04, 2.8591e-03, 9.7867e-05,\n -1.2068e-02, 1.0360e-03, -1.0464e-02, 1.4009e-03, -1.0464e-03,\n 3.3302e-03, -1.1588e-02, 6.7534e-03, 2.0216e-03, -1.4075e-03,\n 6.2614e-04, -6.3607e-03, 3.6016e-03, 2.1350e-03, -2.4688e-03,\n 2.1657e-03, 3.9185e-03, 1.8532e-03, 5.2265e-03, -2.0087e-03,\n 2.3121e-03, 3.0012e-03, -1.1035e-04, -5.4227e-05, -4.9050e-03,\n -8.2653e-04, -6.5472e-03, -3.7804e-03, -4.4695e-03, 5.0242e-03,\n -1.1241e-03, -1.2884e-03, -4.5279e-03, 1.3628e-03, 3.7526e-03,\n 2.3020e-03, 3.5751e-04, 4.4867e-04, 5.6730e-03, 3.8009e-03,\n 6.2181e-04, -2.5449e-03, -1.7293e-03, 3.8671e-03, -4.7071e-03,\n 1.1478e-03, -1.7092e-03, 1.4690e-03, 4.5357e-03, -9.7741e-04,\n 2.4749e-03, 1.4821e-04, 4.2575e-03, 2.9339e-03, 5.9005e-03,\n -1.1294e-02, -1.8975e-03, 1.6888e-03, 1.5766e-03, 2.4795e-03,\n 1.2842e-02, -2.2842e-03, 1.3989e-03, -6.2393e-03, -1.3558e-02,\n 5.2529e-05, 2.5652e-03, 3.2737e-03, 1.6620e-03, -6.5159e-03,\n -7.9261e-03, -7.9064e-04, -4.6334e-03, -7.1653e-04, 2.9948e-03,\n -2.6628e-03, -6.5526e-03, 4.2495e-03, 2.0598e-03, -9.5028e-04,\n 3.4877e-03, -3.2299e-03, -2.6964e-03, -9.1063e-03, 2.7588e-03,\n 3.4673e-03, -7.1249e-03, -4.0270e-03, 1.2985e-03, -2.6483e-04,\n -2.0158e-03, 1.4436e-03, 6.2287e-03, -1.3672e-03, -8.1019e-04,\n -1.4853e-03, -1.1854e-03, 3.8812e-03, -2.3983e-03, 2.2840e-03,\n -4.1452e-03, -2.8599e-03, 9.5202e-04, -2.6844e-03, 3.3114e-04,\n 1.5023e-03, -3.9557e-03, 5.0164e-03, -9.9764e-04, 1.5500e-03,\n 1.8694e-03, 4.9472e-05, 4.3471e-03, -1.3762e-03, 2.4468e-03,\n -2.1882e-03, 3.3970e-04, -1.0092e-03, 3.3181e-04, 9.0714e-03,\n -2.8398e-03, 7.1258e-03, -2.1074e-03, 4.8543e-03, -1.0519e-03,\n -1.0214e-02, -5.5461e-04, -3.4376e-03, 1.9021e-03, -4.0464e-04,\n -7.9887e-04, 2.2734e-03, -4.8803e-04, 1.0294e-03, -4.3881e-03,\n -7.2529e-04, -1.3958e-03, 2.6024e-03, 2.1258e-03, -1.3034e-02,\n 9.3739e-03, 1.3062e-03, -2.1675e-04, 1.1523e-02, 1.2196e-03,\n -3.0982e-03, 2.1356e-03, -9.5575e-04, -1.0264e-03, 2.6563e-03,\n 3.7361e-03, -7.9376e-04, -1.9063e-03, 1.0342e-03, 3.6307e-03,\n 6.3311e-04, 3.5945e-03, 2.2012e-03, 1.1716e-03, 7.0318e-04,\n 4.8124e-04, -7.0214e-05, -4.2546e-04, -5.8128e-04, -8.6485e-03,\n 2.2620e-03, -6.3424e-03, -2.8102e-04, -2.5668e-03, 3.6279e-03,\n -2.1638e-03, -9.8701e-04, -3.8217e-03, -2.2617e-03, -1.1474e-03,\n -4.1215e-03, -1.8431e-03, -2.2027e-03, 1.1814e-04, -9.7352e-04,\n 4.1784e-03, -3.8049e-03, -2.6742e-04, 1.1670e-02, 1.4902e-02,\n -2.1628e-03, -5.0245e-03, 5.3476e-03, 3.6585e-04, -1.8104e-03,\n -1.4506e-03, 5.4125e-03, -7.6390e-03, -2.2429e-04, 1.7042e-03,\n 3.2557e-03, 9.8123e-05, 1.9050e-03, 1.5403e-03, 1.9840e-04,\n 2.6833e-03, -4.6434e-04, 1.6439e-03, -4.6395e-03, 1.0075e-03,\n 1.4114e-03, 8.2409e-03, -4.2906e-03, 7.9231e-03, -5.8610e-03,\n 2.3018e-04, -2.0794e-03, 1.8896e-03, -3.0763e-03, 3.3588e-03,\n 5.2528e-03, 5.0565e-03, -2.4846e-03, -4.3998e-03, 2.9396e-03,\n 2.2370e-03, -1.5469e-03, 4.8048e-03, -2.6728e-03, 5.9849e-03,\n 1.9466e-03, -1.5960e-03, 6.0741e-03, -3.3812e-03, 9.2527e-05,\n 6.2893e-03, 3.4826e-03, 1.5398e-03, -9.1201e-04, 2.5528e-03,\n 9.8967e-04, 9.7769e-04, -1.7335e-03, 6.2394e-03, -1.4147e-03,\n 2.6331e-04, -3.2694e-03, -1.0400e-03, 3.5490e-03, 7.1708e-04,\n -1.0278e-03, 6.2343e-04, -1.5566e-03, -2.4536e-03, -5.5344e-04,\n 3.1793e-03, 6.1909e-03, 1.5933e-03, 1.5963e-03, 1.0084e-02,\n 2.5273e-03, -8.4816e-03, -4.3671e-04, 4.3394e-03, 7.6875e-04,\n 1.6751e-03, -7.8131e-03, -3.6579e-03, 4.2378e-04, -4.1793e-03,\n 1.4563e-03, 6.5346e-03, 7.0314e-03, -2.6943e-03, -1.3790e-04,\n -1.6595e-03, -1.1661e-03, -4.4729e-03, -5.4177e-03, 5.2611e-03,\n 5.7537e-03, 3.5971e-03, 7.1272e-06, -3.9306e-04, -8.7059e-04,\n -5.7978e-03, -3.6959e-03, 2.5597e-03, -5.0909e-03, 7.4778e-03,\n -7.1718e-04, -9.0106e-04, -4.9618e-04, -1.4329e-03, -1.2762e-03,\n 2.0640e-03, -1.0938e-02, -2.6342e-04, -3.1378e-05, 8.2004e-03,\n 2.1231e-03, -7.6530e-06, 3.0492e-05, -9.6797e-03, -5.6723e-03,\n -1.1710e-03, 6.8684e-03, 4.4332e-04, -2.0038e-04, 2.8719e-03,\n 3.1773e-03, -6.3351e-03, -9.8713e-03, 2.6320e-03, 2.7958e-03,\n -8.1069e-03, -2.6043e-03, -2.1093e-03, -7.7829e-04, 2.9886e-03,\n 6.4509e-03, -9.3528e-04, -1.2716e-03, -4.2661e-03, 8.9020e-05,\n -4.7339e-03, -1.6677e-03, -4.6050e-03, 4.4192e-03, -4.9084e-03,\n -5.9069e-04, 8.8436e-04, -8.8658e-04, -1.1028e-03, -1.7460e-02,\n -5.0507e-03, 1.6745e-03, -6.4552e-03, -3.2592e-03, -1.0059e-03,\n 2.0340e-03, 1.2811e-03, -1.9346e-03, 5.8313e-03, 2.9926e-05,\n -3.1227e-03, 2.6390e-03, -1.7706e-03, -2.8282e-03, 8.3224e-04,\n 1.5629e-03, 1.8875e-03, -9.4519e-04, 3.4580e-03, 3.5690e-03,\n 1.9048e-04, 4.1451e-04, -8.0689e-04, 3.3160e-03, 1.7355e-03,\n 1.5005e-03, 6.1917e-03, -7.5888e-04, -1.4643e-03, -5.5097e-04,\n 5.8780e-04, 2.3577e-03, -7.0522e-03, 4.0002e-03, 4.4550e-03,\n 7.3450e-04, 4.3019e-03, -3.5777e-03, -2.7456e-03, -1.2502e-03,\n -1.8686e-03, 1.5768e-03, 3.6493e-04, -4.9217e-03, 1.2380e-03,\n -3.4452e-03, -5.0734e-04, -1.5954e-03, 1.4434e-03, -1.1108e-02,\n 1.6037e-03, -3.9104e-03, -3.0820e-03, -1.1715e-03, -2.8504e-03,\n -2.3969e-03, -4.3910e-03, 1.7937e-03, 2.7345e-03, 7.6946e-03,\n 1.1213e-03, -9.0114e-04, -6.0254e-03, 1.6156e-03, 2.4866e-03,\n 2.1938e-03, 1.5718e-03, -9.9729e-04, -3.4472e-03, 2.7188e-03,\n 4.8034e-04, 3.7786e-03, 6.2040e-03, 1.3255e-03, 1.4229e-03,\n 1.3139e-03, 2.5176e-03, -1.8530e-03, 2.0506e-03, -2.2643e-04,\n -8.9930e-03, -4.5082e-03, -9.7199e-03, 1.7994e-03, -5.3257e-04,\n -5.0382e-04, 3.9093e-03, -5.5638e-03, 4.4178e-03, -3.6450e-03,\n -2.4753e-03, -2.7799e-03, 1.6480e-03, 8.7352e-03, 1.6124e-05,\n -3.6298e-03, 7.1878e-03, -6.3305e-03, -3.9841e-03, 5.4878e-03,\n 4.6766e-03, 2.3168e-03, -1.4455e-01, -3.5436e-03, -3.2556e-03,\n 6.4060e-03, 2.3144e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([2.1025e-04, 8.7411e-05, 1.7924e-04, 1.4429e-04, 2.0654e-04, 1.2018e-04,\n 3.7673e-04, 5.9840e-05, 5.7073e-05, 9.7911e-05, 3.1859e-04, 8.0786e-05,\n 1.0257e-04, 1.6294e-04, 6.1875e-05, 1.0636e-04, 1.4079e-04, 9.6362e-05,\n 9.3796e-05, 1.5696e-04, 5.3682e-05, 8.7110e-05, 8.6687e-05, 2.8604e-04,\n 2.7608e-04, 1.1627e-04, 1.6130e-04, 1.3550e-04, 1.1808e-04, 1.2803e-04,\n 1.3589e-04, 1.2237e-04, 1.5558e-04, 1.2602e-04, 1.8509e-04, 6.5029e-05,\n 1.0768e-04, 1.1301e-04, 1.7861e-04, 2.6032e-04, 2.0977e-04, 1.9632e-04,\n 2.8918e-04, 1.3710e-04, 6.4941e-05, 2.2480e-04, 2.0865e-04, 1.5848e-04,\n 7.7382e-05, 1.2218e-04, 1.2277e-04, 1.9869e-04, 2.0547e-04, 7.6832e-05,\n 1.9724e-04, 2.3235e-04, 5.5203e-05, 2.6824e-04, 4.2097e-04, 2.1260e-04,\n 1.1507e-04, 2.1045e-04, 2.1765e-04, 1.1671e-04, 2.3176e-04, 1.9440e-04,\n 1.0187e-04, 1.1065e-04, 9.1788e-05, 1.1103e-04, 1.0417e-04, 1.7801e-04,\n 8.8741e-05, 8.0917e-05, 1.4228e-04, 3.2109e-04, 9.0006e-05, 3.2619e-04,\n 7.4125e-05, 6.3685e-05, 1.2444e-04, 2.0297e-04, 1.8925e-04, 5.4315e-05,\n 2.5201e-04, 1.3618e-04, 1.9670e-04, 8.8947e-05, 5.1160e-05, 1.6713e-04,\n 8.5189e-05, 1.9347e-04, 6.5999e-05, 1.6257e-04, 1.2605e-04, 3.6724e-04,\n 1.2145e-04, 9.2588e-05, 2.3121e-04, 5.5395e-05, 5.7084e-05, 1.2494e-04,\n 7.4406e-05, 1.8037e-04, 1.2969e-04, 8.0106e-05, 1.5997e-04, 1.0908e-04,\n 1.3435e-04, 9.1733e-05, 1.6036e-04, 6.3087e-05, 1.0416e-04, 7.2124e-05,\n 9.8972e-05, 3.7707e-04, 2.6897e-04, 1.1354e-04, 1.7782e-04, 1.8461e-04,\n 8.8950e-05, 8.5247e-05, 1.8298e-04, 2.9014e-04, 1.0301e-04, 1.3190e-04,\n 8.2512e-05, 2.1707e-04, 8.7892e-05, 1.3351e-04, 1.0253e-04, 1.2652e-04,\n 1.1383e-04, 1.9378e-04, 2.2316e-04, 9.2016e-04, 1.1873e-04, 9.0543e-05,\n 5.3516e-04, 2.0310e-04, 1.3806e-04, 6.4406e-05, 5.9716e-05, 1.3861e-04,\n 2.2753e-04, 2.1408e-04, 1.1946e-04, 2.3487e-04, 5.1592e-04, 1.7329e-04,\n 1.1437e-04, 9.6918e-04, 1.9854e-04, 2.3933e-04, 9.7221e-05, 5.4439e-04,\n 1.0052e-04, 3.1909e-04, 1.3719e-04, 1.0128e-04, 9.8727e-05, 4.0485e-04,\n 2.4252e-04, 1.8288e-04, 1.2445e-04, 1.2917e-04, 4.3657e-04, 1.6622e-04,\n 7.9802e-05, 9.6619e-05, 9.5552e-05, 7.8573e-05, 1.1118e-04, 2.0060e-04,\n 9.9065e-05, 1.9583e-04, 1.1147e-04, 7.3112e-05, 1.0321e-04, 9.8594e-05,\n 2.4663e-04, 2.4598e-04, 4.0772e-04, 9.9268e-05, 1.8248e-04, 6.9313e-05,\n 1.1379e-04, 7.1711e-05, 1.7627e-04, 1.1872e-04, 1.6852e-04, 7.0406e-05,\n 8.7358e-05, 2.3674e-04, 1.5343e-04, 6.1271e-05, 4.7741e-04, 7.7236e-05,\n 4.6214e-04, 1.3541e-04, 2.1793e-04, 1.6930e-04, 2.1504e-04, 8.9881e-05,\n 9.3763e-05, 9.4924e-05, 1.0339e-04, 7.7618e-05, 3.0463e-04, 7.5740e-05,\n 3.6668e-04, 4.0490e-04, 1.1781e-04, 2.1447e-04, 2.1658e-04, 1.8518e-04,\n 7.3888e-05, 2.5723e-04, 3.6766e-04, 6.1637e-05, 2.3390e-04, 2.4576e-04,\n 6.9650e-05, 6.0344e-05, 1.6126e-04, 5.0821e-04, 5.6174e-04, 9.9798e-05,\n 8.9209e-05, 1.3210e-04, 1.0979e-04, 1.1444e-04, 1.0168e-04, 7.5269e-05,\n 1.0564e-04, 2.9496e-04, 3.1386e-04, 1.8358e-04, 6.6414e-05, 1.5119e-04,\n 1.2409e-03, 1.4062e-04, 1.7512e-04, 2.4908e-04, 2.1004e-04, 2.0707e-04,\n 2.1887e-04, 2.3764e-04, 1.3090e-04, 1.3041e-04, 6.6097e-04, 3.3459e-04,\n 8.2570e-05, 9.3883e-05, 1.8240e-04, 1.0595e-04, 2.6103e-04, 2.7093e-04,\n 2.6510e-04, 1.7051e-04, 1.7994e-04, 1.4801e-04, 7.6559e-05, 2.0037e-04,\n 2.1445e-04, 2.4924e-04, 2.9812e-04, 3.3543e-04, 2.5849e-04, 6.2746e-05,\n 1.3177e-04, 2.1197e-04, 2.4829e-04, 5.4273e-05, 1.0560e-04, 2.0449e-04,\n 2.1835e-04, 1.0353e-04, 7.9134e-05, 3.4156e-04, 1.1306e-04, 1.1294e-04,\n 9.5661e-05, 6.9279e-04, 4.5332e-04, 8.2891e-05, 9.1189e-05, 9.3172e-05,\n 2.2766e-04, 3.6631e-04, 2.7715e-04, 5.0063e-04, 1.0936e-04, 1.1700e-04,\n 1.0269e-04, 1.3588e-04, 1.0972e-04, 7.1013e-04, 1.9231e-04, 1.3123e-04,\n 1.0980e-04, 1.8728e-04, 1.7838e-04, 1.3814e-04, 2.0683e-04, 6.4718e-05,\n 1.6672e-04, 1.0639e-04, 1.4202e-04, 1.1064e-04, 1.6677e-04, 2.1521e-04,\n 1.0703e-04, 2.9808e-04, 7.8335e-05, 1.8881e-04, 1.3495e-04, 1.1232e-04,\n 1.8389e-04, 6.4927e-05, 1.2400e-04, 1.6511e-04, 1.2086e-04, 1.6998e-04,\n 1.1152e-04, 1.0526e-04, 3.5814e-04, 3.4395e-04, 2.0965e-04, 2.4283e-04,\n 9.4004e-05, 1.2789e-04, 8.5052e-05, 1.3743e-04, 1.6823e-04, 4.6968e-05,\n 1.3806e-04, 3.2699e-04, 7.5141e-05, 1.1951e-04, 8.6793e-05, 9.8576e-05,\n 7.1066e-04, 1.1094e-04, 5.9652e-05, 8.0657e-05, 1.1781e-04, 1.3616e-04,\n 1.1865e-04, 4.2777e-04, 2.6487e-04, 9.5215e-05, 1.1157e-04, 1.3377e-04,\n 1.0910e-04, 2.0210e-04, 2.6841e-04, 1.7799e-04, 5.5520e-05, 2.7397e-04,\n 2.5837e-04, 1.5563e-04, 1.7979e-04, 1.2383e-04, 9.3696e-05, 7.2004e-05,\n 1.2970e-03, 6.8119e-05, 1.5436e-04, 2.8731e-04, 8.6846e-05, 7.3584e-05,\n 6.5582e-05, 2.1448e-04, 1.9694e-04, 9.8653e-05, 7.4587e-05, 1.5967e-04,\n 1.0897e-04, 9.8348e-05, 6.3517e-05, 5.4152e-04, 2.7431e-04, 1.8886e-04,\n 1.0729e-04, 7.0896e-05, 6.7577e-05, 2.8418e-04, 2.4551e-04, 1.2420e-04,\n 2.0584e-04, 1.1189e-04, 2.2779e-04, 1.2513e-04, 9.9707e-05, 1.3764e-04,\n 8.9317e-05, 1.2196e-04, 1.0455e-04, 1.7773e-04, 2.4484e-04, 1.0559e-04,\n 2.8015e-04, 6.9825e-05, 2.6680e-04, 9.5509e-05, 1.2276e-04, 2.8149e-04,\n 1.2438e-04, 1.4398e-04, 1.2841e-04, 1.3106e-04, 1.8445e-04, 9.3691e-05,\n 8.2548e-05, 6.5640e-05, 1.6902e-04, 1.0488e-04, 1.5004e-04, 1.0753e-04,\n 7.0599e-05, 1.0770e-04, 8.9417e-05, 3.9647e-04, 1.2246e-04, 2.9822e-04,\n 2.1650e-04, 1.7541e-04, 6.3975e-05, 3.0636e-04, 1.6520e-04, 7.9707e-05,\n 1.6228e-04, 1.4587e-04, 1.1975e-04, 8.9246e-05, 8.7703e-05, 1.5295e-04,\n 1.8199e-04, 1.3077e-04, 1.8871e-04, 1.4023e-04, 4.6005e-04, 1.4552e-04,\n 1.0183e-04, 1.1331e-04, 1.7047e-04, 1.3946e-04, 5.2619e-05, 2.5633e-04,\n 1.0036e-04, 7.8853e-05, 1.3289e-04, 1.8456e-04, 2.2933e-04, 5.6752e-05,\n 2.0271e-04, 2.2780e-04, 1.4331e-04, 2.3097e-04, 2.9699e-04, 2.6847e-04,\n 2.8697e-04, 8.2176e-05, 1.3481e-04, 9.2793e-05, 1.0390e-04, 8.7775e-05,\n 3.0503e-04, 1.0818e-04, 1.5296e-04, 8.6271e-05, 2.3226e-04, 1.6166e-04,\n 1.0820e-04, 5.7114e-04, 1.1568e-04, 1.1255e-04, 1.9537e-04, 2.0264e-04,\n 9.3698e-05, 9.4242e-05, 2.5837e-04, 2.7005e-04, 1.0311e-04, 3.4433e-04,\n 2.3452e-04, 1.3983e-04, 7.2770e-05, 1.3800e-04, 1.4788e-04, 7.6760e-05,\n 2.6855e-04, 1.4813e-04, 1.2082e-04, 2.1239e-04, 3.0557e-04, 6.5108e-05,\n 3.0793e-04, 6.6862e-05, 7.0979e-05, 3.8564e-04, 1.6169e-04, 1.8404e-04,\n 1.6753e-04, 2.7472e-04, 2.7524e-04, 2.7890e-01, 1.4457e-04, 1.0759e-04,\n 2.9196e-04, 1.0324e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(8764.)",
|
| 22 |
+
"exp_avg": "tensor([-3.1446e-03, 7.2933e-03, -3.6360e-03, -5.5043e-04, 7.9501e-03,\n 3.3994e-03, 5.0660e-03, 3.6845e-03, -2.4208e-03, 4.1630e-03,\n -2.9973e-03, 1.3232e-03, -2.6776e-03, -2.2305e-03, -4.4358e-04,\n -2.5890e-03, 2.9634e-04, 5.2427e-03, -5.5870e-04, 1.4315e-03,\n -3.6585e-03, -2.1247e-03, 4.9263e-03, 2.0698e-03, -2.6113e-03,\n -9.5978e-03, -1.1320e-03, -2.9902e-03, 3.4552e-03, 3.0080e-04,\n 3.2955e-03, -3.2033e-04, 6.3931e-04, 4.2508e-03, -8.8640e-04,\n -3.0292e-03, -3.7909e-03, 7.4300e-04, 1.7475e-03, 7.3000e-04,\n -5.3478e-03, -5.0915e-03, 4.2397e-03, -7.8890e-03, 2.6963e-03,\n -2.3622e-03, -1.6556e-03, -6.5232e-04, -8.0798e-03, -7.9774e-03,\n -7.3410e-03, 4.2161e-03, -4.1827e-03, 2.4598e-03, 1.1289e-03,\n 1.0072e-02, -1.9242e-03, 1.6081e-03, 6.4573e-03, 1.7735e-03,\n 2.7680e-03, -4.0351e-03, 4.9482e-03, 3.5980e-03, 5.6934e-03,\n -1.1951e-03, 7.5877e-04, 2.2049e-03, -5.0941e-03, 1.0087e-03,\n -2.4129e-03, -1.3436e-03, -1.6936e-05, 2.9676e-03, 9.2000e-04,\n -9.3412e-03, 5.3564e-04, -6.7859e-03, 1.4453e-03, -6.7259e-04,\n 3.6275e-03, -1.2426e-02, 5.4652e-03, 2.5446e-03, -1.1578e-03,\n 1.8533e-03, -5.2025e-03, 4.5618e-03, 2.5702e-03, -2.6664e-03,\n 3.2512e-03, 5.1869e-03, 3.1100e-03, 4.7962e-03, -1.3784e-03,\n 2.1698e-03, 2.7964e-03, 5.2959e-04, 3.3660e-04, -5.8887e-03,\n -1.3166e-03, -5.0593e-03, -5.8572e-03, -3.9161e-03, 5.2531e-03,\n -6.8710e-04, -2.1595e-03, -4.5833e-03, 1.7187e-03, 5.2974e-03,\n 1.5051e-03, 3.7649e-05, 1.7947e-03, 4.4269e-03, 3.3978e-03,\n 9.5620e-04, -2.6730e-03, -1.2593e-03, 4.8152e-03, -4.6381e-03,\n 9.4316e-06, -1.3703e-03, 7.5958e-04, 4.0114e-03, -1.2299e-03,\n 3.8145e-03, -2.8943e-04, 3.3981e-03, 3.6474e-03, 8.4447e-03,\n -2.0105e-02, -1.1732e-03, 2.6113e-03, 1.1334e-03, 1.8108e-03,\n 4.9357e-03, -1.5265e-03, 1.6796e-03, -3.1251e-03, -1.6140e-02,\n -1.2659e-04, 3.9482e-03, 3.9825e-03, 1.3466e-03, -6.7201e-03,\n -7.6116e-03, -4.6559e-04, -2.5185e-03, -4.5415e-04, 3.7115e-03,\n -1.5391e-03, -3.2643e-03, 4.2852e-03, 1.9565e-03, -1.7033e-03,\n 1.8455e-03, -2.7869e-03, -2.5938e-03, -9.6308e-03, 2.3731e-03,\n 3.2551e-03, -5.5294e-03, -2.5105e-03, 6.8156e-04, 1.7218e-04,\n -1.5073e-03, 9.5231e-04, 4.1080e-03, -1.3634e-03, -1.7384e-03,\n -1.7523e-03, -6.8153e-04, 4.4099e-03, -1.6776e-03, 3.6447e-03,\n -3.8985e-03, -3.7700e-03, 1.6515e-03, -2.8655e-03, 4.0447e-04,\n 8.3385e-04, -3.1374e-03, 3.4825e-03, -1.5632e-03, -7.3805e-04,\n 3.7093e-03, 2.6932e-04, 4.7526e-03, -3.0150e-03, 3.1336e-03,\n -1.7488e-03, -2.0113e-04, -1.8180e-03, 8.7812e-04, 8.1475e-03,\n -2.5080e-03, 4.6059e-03, -5.4518e-04, 2.7018e-03, -2.2743e-03,\n -9.4340e-03, -1.0520e-03, -3.0576e-03, 2.8620e-03, -1.5518e-04,\n -5.0430e-04, 2.6915e-03, -4.9127e-04, 1.0237e-03, -5.8297e-03,\n -4.1706e-04, -5.8308e-04, 4.2837e-03, 2.0417e-03, -1.2079e-02,\n 1.0709e-02, 2.2942e-03, 8.0430e-04, 9.7957e-03, 1.2909e-03,\n -2.9868e-03, 2.1702e-03, -1.9085e-03, -2.0753e-03, 2.7650e-03,\n 2.1198e-03, -1.9608e-04, -1.8632e-03, 1.1559e-03, 1.7268e-03,\n 1.5351e-03, 3.7574e-03, 2.6944e-03, 3.1048e-04, 1.0002e-03,\n -4.6006e-04, -2.6163e-04, -1.6350e-03, -1.3288e-03, -8.6811e-03,\n 6.6436e-04, -5.6426e-03, -6.1352e-04, -3.2720e-03, 3.4481e-03,\n -2.1031e-03, -2.2311e-03, -2.3900e-03, -2.4803e-03, -1.3355e-03,\n -2.2686e-03, -1.5767e-03, -4.0264e-03, -6.1216e-04, -1.1988e-03,\n 5.9192e-03, -3.1476e-03, -8.9213e-05, 9.6932e-03, 1.7681e-02,\n -2.4735e-03, -5.0010e-03, 5.8883e-03, -1.4241e-03, -1.2863e-03,\n -8.6098e-04, 4.4312e-03, -4.9726e-03, -7.2715e-04, 2.6836e-03,\n 3.6694e-03, 8.2808e-04, 1.5438e-03, 2.5508e-03, 1.7133e-04,\n 1.6932e-03, -4.8619e-04, 1.9194e-03, -6.1105e-03, -4.6731e-04,\n 1.1502e-03, 1.0128e-02, -5.8111e-03, 2.6912e-03, -2.5747e-03,\n 3.2649e-04, -3.9803e-03, 4.0906e-03, -2.1288e-03, 3.2421e-03,\n 4.3815e-03, 2.6020e-03, -3.4147e-03, -4.3381e-03, 4.2671e-03,\n 2.5306e-03, -1.6100e-03, 2.0495e-03, -3.0127e-03, 7.0217e-03,\n 1.4119e-03, -1.3028e-03, 4.9828e-03, -4.1564e-03, 1.3247e-04,\n 7.3577e-03, 3.3376e-03, 1.8557e-03, -9.0874e-04, 2.6453e-03,\n 1.7241e-03, 9.7723e-04, -2.2301e-03, 5.3625e-03, -1.4090e-03,\n 3.3062e-04, -3.3852e-03, -6.0372e-04, 2.9167e-03, 8.8756e-04,\n -4.1515e-05, 6.3665e-04, -2.3088e-03, -3.5783e-03, -1.0706e-03,\n 3.8297e-03, 4.8427e-03, 8.7286e-04, 5.9140e-04, 8.8872e-03,\n 2.3900e-03, -9.9958e-03, -8.2983e-05, 4.0397e-03, -8.1742e-05,\n 3.4927e-03, -5.6883e-03, -1.6874e-03, 1.1287e-03, -3.6805e-03,\n -1.0888e-04, 8.2711e-03, 3.6576e-03, -3.2588e-03, 1.8947e-04,\n -2.2228e-03, -1.5215e-03, -4.1886e-03, -7.2437e-03, 3.5025e-03,\n 3.7035e-03, 4.1268e-03, -7.6701e-04, -5.3507e-04, -4.7338e-04,\n -4.6714e-03, -2.6919e-03, 2.4037e-03, -5.7021e-03, 6.8276e-03,\n -5.3647e-04, -1.7868e-03, 2.7702e-04, -1.5746e-04, -1.4305e-03,\n 2.7887e-03, -4.3401e-03, -3.2743e-05, 7.9468e-04, 6.4274e-03,\n 1.7991e-03, 8.2104e-04, 7.6998e-04, -6.4594e-03, -4.4808e-03,\n -1.4609e-03, 6.8097e-03, 8.5248e-04, 3.3817e-04, 3.1288e-03,\n 3.0143e-03, -3.1792e-03, -5.5076e-03, 1.9110e-03, 2.5362e-03,\n -1.0477e-02, -3.2953e-03, -2.8363e-03, 2.2285e-04, 3.8721e-03,\n 5.9143e-03, -6.5470e-04, -8.7221e-04, -4.3774e-03, 3.6811e-04,\n -4.7164e-03, -8.3085e-04, -4.0271e-03, 4.7526e-03, -4.3012e-03,\n -1.0821e-04, 6.4022e-04, -1.9982e-04, 2.9075e-04, -1.7577e-02,\n -7.2443e-03, 2.3861e-03, -4.7381e-03, -2.6560e-03, -1.1689e-03,\n 8.5051e-04, 2.4693e-03, -1.4923e-03, 7.3821e-03, 8.5348e-04,\n -4.7757e-03, 2.2719e-03, -2.0648e-03, -3.1369e-03, 1.6757e-03,\n 1.5529e-03, 1.6316e-03, -1.1256e-03, 2.5562e-03, 4.2267e-03,\n 1.2561e-03, 6.2268e-04, -8.5897e-04, 3.5806e-03, 1.6331e-03,\n 8.7567e-04, 8.3248e-03, 2.6736e-04, -1.2706e-03, -3.3870e-04,\n 1.0436e-03, 2.7583e-03, -9.5377e-03, 3.8534e-03, 6.0397e-03,\n 2.8330e-04, 3.4345e-03, -1.2712e-03, -3.2025e-03, -1.7153e-03,\n -9.8518e-04, 1.5891e-03, -3.8019e-05, -9.2993e-03, 1.0303e-03,\n -2.7051e-03, 9.1814e-04, -1.4196e-03, -4.2011e-04, -1.2204e-02,\n 1.8552e-03, -3.0702e-03, -2.2601e-03, -1.0440e-03, -2.5456e-03,\n -2.7605e-03, -2.5043e-03, 1.3742e-03, 2.6961e-03, 9.5814e-03,\n 1.5934e-03, -1.3852e-03, -7.1916e-03, 1.1167e-03, 3.1565e-03,\n 1.7819e-03, 2.5412e-03, -1.3181e-03, -2.7144e-03, 1.8645e-03,\n 2.3536e-05, 5.9239e-03, 5.2027e-03, 1.7900e-03, 8.5180e-04,\n 9.3379e-04, 5.3928e-03, -1.2857e-03, 2.6850e-03, -2.5968e-04,\n -6.7700e-03, -3.7196e-03, -1.8319e-02, 1.8433e-03, -1.1957e-03,\n -1.8376e-04, 4.3780e-03, -4.2845e-03, 5.2930e-03, -4.4159e-03,\n -3.0084e-03, -1.4557e-03, 3.0372e-03, 9.1479e-03, 5.0410e-04,\n -4.4764e-03, 4.4296e-03, -5.1427e-03, -4.5408e-03, 5.8491e-03,\n 3.0098e-03, 8.7181e-04, -7.0911e-03, -4.2284e-03, -4.8313e-03,\n 5.1256e-03, 1.3487e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([1.4951e-04, 1.2417e-04, 1.3256e-04, 1.1880e-04, 1.8261e-04, 1.5533e-04,\n 1.5635e-04, 1.1017e-04, 1.0049e-04, 1.1810e-04, 1.9068e-04, 1.0522e-04,\n 1.9567e-04, 1.1518e-04, 1.3605e-04, 1.8386e-04, 1.4474e-04, 1.0706e-04,\n 9.9004e-05, 1.7301e-04, 9.1516e-05, 1.6736e-04, 1.2421e-04, 1.9060e-04,\n 1.3009e-04, 2.0397e-04, 1.5551e-04, 1.9857e-04, 1.6789e-04, 1.7103e-04,\n 1.3900e-04, 2.5816e-04, 1.2806e-04, 2.7294e-04, 1.6654e-04, 1.3232e-04,\n 1.3682e-04, 1.2776e-04, 1.4622e-04, 2.8015e-04, 1.7851e-04, 2.0786e-04,\n 2.2310e-04, 1.3957e-04, 1.5098e-04, 1.7536e-04, 1.4374e-04, 1.3174e-04,\n 1.2781e-04, 1.8680e-04, 1.7687e-04, 1.5419e-04, 1.5145e-04, 1.2197e-04,\n 1.4591e-04, 1.6843e-04, 8.6294e-05, 1.3549e-04, 2.0891e-04, 1.5055e-04,\n 1.6393e-04, 1.3663e-04, 2.6403e-04, 1.4736e-04, 2.3842e-04, 2.6353e-04,\n 1.1394e-04, 1.1886e-04, 1.3480e-04, 1.2179e-04, 1.7795e-04, 1.3760e-04,\n 1.1900e-04, 1.8269e-04, 1.6610e-04, 1.6300e-04, 1.6559e-04, 1.5151e-04,\n 1.2229e-04, 1.2078e-04, 1.4206e-04, 2.2428e-04, 1.2780e-04, 1.3610e-04,\n 2.1421e-04, 1.6533e-04, 1.9770e-04, 1.6056e-04, 1.1496e-04, 1.6044e-04,\n 2.0646e-04, 2.0518e-04, 8.8223e-05, 1.6550e-04, 1.8856e-04, 1.5186e-04,\n 1.1795e-04, 1.5648e-04, 1.3967e-04, 8.5380e-05, 1.0203e-04, 1.5205e-04,\n 1.1625e-04, 1.4005e-04, 1.1226e-04, 1.3882e-04, 1.8893e-04, 1.0779e-04,\n 1.6491e-04, 1.5471e-04, 1.7617e-04, 9.6146e-05, 1.9401e-04, 9.6919e-05,\n 1.1786e-04, 2.0484e-04, 1.3799e-04, 1.2258e-04, 2.4420e-04, 2.1799e-04,\n 1.3772e-04, 1.6259e-04, 2.2660e-04, 2.0117e-04, 2.0765e-04, 1.8494e-04,\n 9.3000e-05, 1.0886e-04, 1.5614e-04, 2.1098e-04, 1.8057e-04, 1.5171e-04,\n 1.4678e-04, 1.4165e-04, 1.1524e-04, 1.6837e-04, 1.1760e-04, 1.3127e-04,\n 1.4772e-04, 2.6083e-04, 1.9130e-04, 9.3701e-05, 1.2379e-04, 1.4675e-04,\n 2.0968e-04, 1.6211e-04, 1.2671e-04, 1.5161e-04, 2.0436e-04, 1.6552e-04,\n 1.4084e-04, 2.8387e-04, 1.7063e-04, 2.0548e-04, 1.1044e-04, 2.2705e-04,\n 1.1796e-04, 2.1247e-04, 1.7623e-04, 8.2831e-05, 1.1911e-04, 1.8331e-04,\n 1.7443e-04, 1.5166e-04, 1.4416e-04, 1.2083e-04, 2.0132e-04, 1.2401e-04,\n 1.2570e-04, 1.7555e-04, 1.7570e-04, 1.4748e-04, 1.6446e-04, 1.7463e-04,\n 1.3881e-04, 1.2407e-04, 1.6068e-04, 1.0443e-04, 1.6108e-04, 1.2866e-04,\n 2.4413e-04, 1.7859e-04, 1.7494e-04, 1.3440e-04, 1.9232e-04, 1.3397e-04,\n 1.4154e-04, 1.2463e-04, 1.9993e-04, 1.5906e-04, 1.7024e-04, 9.0636e-05,\n 1.1121e-04, 1.6937e-04, 1.5444e-04, 1.3759e-04, 1.9989e-04, 1.0055e-04,\n 1.6864e-04, 1.3352e-04, 1.3923e-04, 1.1733e-04, 1.5736e-04, 1.3028e-04,\n 1.6356e-04, 1.4840e-04, 2.3378e-04, 1.0797e-04, 1.2473e-04, 1.1720e-04,\n 2.2477e-04, 1.7334e-04, 2.0589e-04, 1.2859e-04, 1.6602e-04, 1.8422e-04,\n 1.2161e-04, 2.2013e-04, 1.8838e-04, 9.5960e-05, 1.5999e-04, 1.9466e-04,\n 1.2022e-04, 9.8841e-05, 1.9230e-04, 1.9393e-04, 1.6291e-04, 1.2775e-04,\n 1.5497e-04, 1.2346e-04, 1.1040e-04, 1.0934e-04, 1.4302e-04, 1.4002e-04,\n 1.5876e-04, 1.3085e-04, 1.9501e-04, 1.5526e-04, 1.2103e-04, 1.5735e-04,\n 2.2249e-04, 1.3724e-04, 1.8190e-04, 2.2224e-04, 2.1039e-04, 1.8571e-04,\n 1.6311e-04, 1.7256e-04, 1.3613e-04, 1.9072e-04, 1.7501e-04, 2.1730e-04,\n 1.2482e-04, 1.4494e-04, 1.5915e-04, 1.6764e-04, 1.6897e-04, 2.0893e-04,\n 1.6981e-04, 2.2233e-04, 1.8780e-04, 1.7801e-04, 1.1092e-04, 2.4186e-04,\n 1.5685e-04, 1.3077e-04, 2.0040e-04, 1.8374e-04, 1.6802e-04, 1.3855e-04,\n 1.7335e-04, 1.4691e-04, 1.7182e-04, 1.0819e-04, 1.4960e-04, 1.1528e-04,\n 1.3824e-04, 1.1997e-04, 1.4517e-04, 2.3381e-04, 1.1770e-04, 2.0339e-04,\n 1.6395e-04, 1.8140e-04, 1.2501e-04, 1.2141e-04, 1.1416e-04, 1.5182e-04,\n 1.4954e-04, 2.4941e-04, 1.6321e-04, 1.5291e-04, 1.3410e-04, 1.4105e-04,\n 1.2162e-04, 1.7491e-04, 1.2833e-04, 1.9112e-04, 2.1492e-04, 2.2857e-04,\n 1.1045e-04, 1.3318e-04, 1.4244e-04, 1.5475e-04, 2.1058e-04, 1.2155e-04,\n 1.5600e-04, 1.2830e-04, 2.0719e-04, 1.2330e-04, 2.4019e-04, 1.9335e-04,\n 1.8080e-04, 2.3367e-04, 1.3107e-04, 1.7079e-04, 1.5392e-04, 1.6459e-04,\n 1.5001e-04, 1.0854e-04, 1.4432e-04, 1.4654e-04, 1.2493e-04, 1.5824e-04,\n 1.0227e-04, 1.8343e-04, 2.1392e-04, 2.1630e-04, 1.8433e-04, 1.9890e-04,\n 1.0257e-04, 1.4553e-04, 1.0982e-04, 1.3995e-04, 2.2483e-04, 9.2097e-05,\n 1.1076e-04, 1.1525e-04, 1.1757e-04, 1.4289e-04, 1.4294e-04, 1.7324e-04,\n 2.2296e-04, 1.3099e-04, 1.0255e-04, 1.3952e-04, 1.0769e-04, 1.1781e-04,\n 1.5157e-04, 1.9112e-04, 1.4304e-04, 1.6158e-04, 1.1650e-04, 1.1154e-04,\n 1.0667e-04, 1.6313e-04, 1.4382e-04, 1.5889e-04, 9.6664e-05, 1.5806e-04,\n 1.6275e-04, 2.8851e-04, 1.3887e-04, 2.7533e-04, 1.2962e-04, 1.3936e-04,\n 2.2533e-04, 9.8582e-05, 1.3237e-04, 1.7192e-04, 9.9232e-05, 1.2116e-04,\n 1.0625e-04, 1.5114e-04, 1.3122e-04, 1.7989e-04, 1.1016e-04, 1.6376e-04,\n 2.2518e-04, 1.5014e-04, 9.3642e-05, 1.4590e-04, 1.3862e-04, 1.3066e-04,\n 1.3334e-04, 1.5750e-04, 1.3901e-04, 1.8410e-04, 1.4487e-04, 2.3963e-04,\n 1.3478e-04, 1.2370e-04, 1.2994e-04, 1.2650e-04, 1.5655e-04, 1.5748e-04,\n 1.1146e-04, 1.2743e-04, 1.8274e-04, 3.0630e-04, 1.4759e-04, 1.1622e-04,\n 1.9713e-04, 7.7582e-05, 2.1238e-04, 1.7757e-04, 1.2431e-04, 1.3829e-04,\n 1.3110e-04, 1.7559e-04, 1.1864e-04, 1.2152e-04, 1.1934e-04, 1.7129e-04,\n 1.2262e-04, 1.3210e-04, 1.3506e-04, 2.2913e-04, 1.5721e-04, 2.1491e-04,\n 1.1708e-04, 1.1170e-04, 1.7757e-04, 2.0435e-04, 1.7986e-04, 1.5396e-04,\n 1.5735e-04, 1.5206e-04, 8.4866e-05, 1.6899e-04, 1.5037e-04, 1.9118e-04,\n 2.3234e-04, 1.3136e-04, 1.2420e-04, 9.8726e-05, 1.5058e-04, 2.4058e-04,\n 1.4161e-04, 1.3011e-04, 2.7214e-04, 1.4362e-04, 1.9755e-04, 1.6991e-04,\n 2.0786e-04, 1.1024e-04, 1.0053e-04, 1.1742e-04, 1.2793e-04, 1.5209e-04,\n 1.2403e-04, 1.0650e-04, 1.3632e-04, 1.6772e-04, 1.9917e-04, 1.0615e-04,\n 2.3838e-04, 1.3123e-04, 1.4172e-04, 1.8000e-04, 1.3807e-04, 2.0109e-04,\n 2.1590e-04, 1.1687e-04, 2.1654e-04, 9.0364e-05, 1.1420e-04, 1.5900e-04,\n 2.3061e-04, 1.8224e-04, 1.3567e-04, 1.1439e-04, 1.6744e-04, 1.5007e-04,\n 1.8272e-04, 1.7939e-04, 1.8589e-04, 9.6710e-05, 1.5773e-04, 1.9358e-04,\n 1.4120e-04, 1.6503e-04, 1.8861e-04, 3.3446e-04, 1.4215e-04, 1.9782e-04,\n 1.5315e-04, 2.3290e-04, 1.1217e-04, 1.4542e-04, 1.2742e-04, 1.0549e-04,\n 1.7977e-04, 1.4195e-04, 1.4274e-04, 2.2228e-04, 1.5881e-04, 1.4202e-04,\n 1.9495e-04, 1.0777e-04, 1.1108e-04, 1.3659e-04, 1.0844e-04, 1.4602e-04,\n 1.1792e-04, 1.7261e-04, 2.0726e-04, 7.4009e-04, 1.7590e-04, 2.1265e-04,\n 1.8630e-04, 1.4100e-04], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(8764.)",
|
| 27 |
+
"exp_avg": "tensor([[-4.8066e-05, -1.0682e-05, -1.9323e-04, ..., -9.6700e-05,\n -6.5513e-05, -3.7167e-04],\n [-4.3237e-05, 5.0704e-05, -1.1253e-05, ..., -1.4590e-04,\n 6.5564e-05, -3.7635e-04],\n [-3.0534e-05, -9.7792e-05, -1.2204e-04, ..., 1.2374e-04,\n -2.2176e-04, 1.5218e-04],\n ...,\n [-1.9999e-04, 2.7830e-05, -5.4196e-05, ..., -1.4007e-04,\n 2.3173e-04, -4.5252e-05],\n [-2.7541e-04, -2.2288e-05, 4.3935e-05, ..., -2.4087e-04,\n 6.5591e-04, 4.9404e-05],\n [-1.3029e-05, 1.6541e-05, -8.6760e-05, ..., 3.2268e-05,\n 1.2635e-04, -5.0726e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[3.0278e-07, 1.4209e-07, 1.7544e-07, ..., 2.1762e-07, 2.8576e-07,\n 2.1128e-07],\n [4.9573e-07, 2.6927e-07, 2.5560e-07, ..., 5.9682e-07, 5.3662e-07,\n 8.0307e-07],\n [4.3362e-07, 4.0839e-07, 3.0769e-07, ..., 5.8190e-07, 5.5083e-07,\n 6.4389e-07],\n ...,\n [4.6890e-07, 6.3947e-07, 3.2498e-07, ..., 4.7834e-07, 6.3968e-07,\n 5.7895e-07],\n [3.2429e-07, 4.4385e-07, 3.5511e-07, ..., 7.0202e-07, 7.0056e-07,\n 6.1159e-07],\n [5.4645e-07, 4.7699e-07, 3.0392e-07, ..., 5.3801e-07, 6.1423e-07,\n 6.1334e-07]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(8764.)",
|
| 32 |
+
"exp_avg": "tensor([[-8.3619e-06, -1.4720e-05, -1.3382e-05, ..., 4.2089e-06,\n -6.4236e-05, -2.5919e-05],\n [ 3.5252e-05, 8.1874e-06, 4.0514e-05, ..., -2.8238e-05,\n -1.7980e-05, -2.0631e-04],\n [ 3.4605e-05, -1.8041e-04, -3.3197e-05, ..., 7.0006e-05,\n -1.9703e-04, 6.2779e-05],\n ...,\n [-2.0820e-04, -6.8090e-05, 1.2461e-04, ..., 8.5330e-05,\n -1.3491e-04, -5.9078e-05],\n [ 1.4419e-04, 4.0308e-05, -3.7566e-05, ..., -1.7161e-04,\n 2.0185e-04, -7.9278e-05],\n [-6.7564e-06, 6.7827e-05, 1.4839e-04, ..., -8.2880e-05,\n -2.0097e-05, 9.5238e-05]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.1426e-08, 4.7483e-08, 6.5020e-08, ..., 5.0662e-08, 1.0453e-07,\n 7.3694e-08],\n [1.4727e-07, 8.6845e-08, 1.1394e-07, ..., 1.6895e-07, 1.8979e-07,\n 2.6245e-07],\n [1.4573e-07, 1.3843e-07, 1.1743e-07, ..., 2.4185e-07, 1.9821e-07,\n 2.7111e-07],\n ...,\n [1.4543e-07, 2.3135e-07, 2.1944e-07, ..., 2.0407e-07, 2.8540e-07,\n 1.6630e-07],\n [1.4841e-07, 1.4078e-07, 1.1995e-07, ..., 2.3602e-07, 2.7503e-07,\n 2.2500e-07],\n [1.3654e-07, 1.4647e-07, 1.5267e-07, ..., 1.9147e-07, 2.4958e-07,\n 3.2951e-07]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(8764.)",
|
| 37 |
+
"exp_avg": "tensor([ 0.0002, -0.0002], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.8441e-06, 7.8441e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.00020690126647990973,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.001,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.00020690126647990973,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.001,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.00020690126647990973,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.001,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.00010384757955302797,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.0005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 7,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.001,
|
| 139 |
+
0.001,
|
| 140 |
+
0.001,
|
| 141 |
+
0.0005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 7,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.00020690126647990973,
|
| 149 |
+
0.00020690126647990973,
|
| 150 |
+
0.00020690126647990973,
|
| 151 |
+
0.00010384757955302797
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 71.388,
|
| 156 |
+
"best_epoch": 6,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 70.794,
|
| 159 |
+
"512": 71.292
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6,
|
| 169 |
+
7
|
| 170 |
+
],
|
| 171 |
+
"train_loss": [
|
| 172 |
+
5.60248446921571,
|
| 173 |
+
4.156974341351384,
|
| 174 |
+
3.7702821485531595,
|
| 175 |
+
3.570641661223512,
|
| 176 |
+
3.4472002215659656,
|
| 177 |
+
3.3609565016560663,
|
| 178 |
+
3.300025675433893
|
| 179 |
+
],
|
| 180 |
+
"train_acc": [
|
| 181 |
+
63.38018384800733,
|
| 182 |
+
69.48813074329888,
|
| 183 |
+
70.23393515443342,
|
| 184 |
+
70.76774534467404,
|
| 185 |
+
71.32536195515495,
|
| 186 |
+
71.77877669343653,
|
| 187 |
+
72.23593801588707
|
| 188 |
+
],
|
| 189 |
+
"val_acc": [
|
| 190 |
+
67.966,
|
| 191 |
+
69.586,
|
| 192 |
+
69.866,
|
| 193 |
+
70.47,
|
| 194 |
+
70.854,
|
| 195 |
+
71.1,
|
| 196 |
+
71.388
|
| 197 |
+
],
|
| 198 |
+
"scale_accs": {
|
| 199 |
+
"256": [
|
| 200 |
+
66.908,
|
| 201 |
+
68.868,
|
| 202 |
+
69.194,
|
| 203 |
+
69.78,
|
| 204 |
+
70.214,
|
| 205 |
+
70.592,
|
| 206 |
+
70.794
|
| 207 |
+
],
|
| 208 |
+
"512": [
|
| 209 |
+
67.774,
|
| 210 |
+
69.268,
|
| 211 |
+
69.844,
|
| 212 |
+
70.366,
|
| 213 |
+
70.82,
|
| 214 |
+
71.088,
|
| 215 |
+
71.292
|
| 216 |
+
]
|
| 217 |
+
},
|
| 218 |
+
"lr": [
|
| 219 |
+
0.0009755527298894294,
|
| 220 |
+
0.0009046039886902864,
|
| 221 |
+
0.0007940987335200904,
|
| 222 |
+
0.0006548539886902864,
|
| 223 |
+
0.0005005000000000001,
|
| 224 |
+
0.0003461460113097139,
|
| 225 |
+
0.00020690126647990973
|
| 226 |
+
]
|
| 227 |
+
}
|
| 228 |
+
},
|
| 229 |
+
"train_config": {
|
| 230 |
+
"name": "david_training",
|
| 231 |
+
"run_id": "20251012_141246",
|
| 232 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 233 |
+
"model_variant": "clip_vit_laion_b32",
|
| 234 |
+
"num_classes": 1000,
|
| 235 |
+
"preset": "small_fast",
|
| 236 |
+
"custom_config_path": null,
|
| 237 |
+
"num_classes_override": null,
|
| 238 |
+
"use_belly_override": null,
|
| 239 |
+
"belly_expand_override": null,
|
| 240 |
+
"progressive_training_override": false,
|
| 241 |
+
"scale_warmup_epochs_override": null,
|
| 242 |
+
"num_epochs": 10,
|
| 243 |
+
"batch_size": 1024,
|
| 244 |
+
"learning_rate": 0.001,
|
| 245 |
+
"weight_decay": 1e-05,
|
| 246 |
+
"warmup_epochs": 3,
|
| 247 |
+
"use_rose_loss": true,
|
| 248 |
+
"rose_initial_weight": 0.1,
|
| 249 |
+
"rose_max_weight": 0.5,
|
| 250 |
+
"rose_weight_schedule": "adaptive",
|
| 251 |
+
"use_cayley_loss": false,
|
| 252 |
+
"cayley_weight": 0.001,
|
| 253 |
+
"scale_loss_balance": null,
|
| 254 |
+
"use_mixed_precision": true,
|
| 255 |
+
"gradient_clip": 10.0,
|
| 256 |
+
"scheduler_type": "cosine_restarts",
|
| 257 |
+
"min_lr": 1e-06,
|
| 258 |
+
"freeze_strategy": "never",
|
| 259 |
+
"freeze_threshold": 90.0,
|
| 260 |
+
"unfreeze_on_plateau": true,
|
| 261 |
+
"patience": 10,
|
| 262 |
+
"track_gradients": true,
|
| 263 |
+
"gradient_scale_threshold": 1e-05,
|
| 264 |
+
"gradient_scale_multiplier": 10.0,
|
| 265 |
+
"log_interval": 50,
|
| 266 |
+
"val_interval": 1,
|
| 267 |
+
"save_interval": 5,
|
| 268 |
+
"log_fusion_weights": true,
|
| 269 |
+
"log_loss_components": true,
|
| 270 |
+
"save_format": "safetensors",
|
| 271 |
+
"hf_repo": "AbstractPhil/gated-david",
|
| 272 |
+
"upload_to_hub": true,
|
| 273 |
+
"base_dir": "./david_training",
|
| 274 |
+
"num_workers": 10,
|
| 275 |
+
"pin_memory": true,
|
| 276 |
+
"prefetch_factor": 4,
|
| 277 |
+
"persistent_workers": true
|
| 278 |
+
}
|
| 279 |
+
}
|