Update best_model_acc63.04_metadata.json - Run 20251012_231445
Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc63.04_metadata.json
ADDED
|
@@ -0,0 +1,244 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 0,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(3754.)",
|
| 7 |
+
"exp_avg": "tensor([[-7.2354e-05, -6.4286e-05, -2.3396e-05, ..., -7.4869e-05,\n -1.1145e-04, 2.9268e-05],\n [-2.0049e-05, 4.4729e-06, 3.3404e-05, ..., -8.9010e-06,\n -3.1663e-05, -3.1633e-05],\n [-5.4587e-05, -3.1029e-05, -4.9208e-05, ..., 3.4661e-05,\n 1.8054e-05, -7.8119e-05],\n ...,\n [ 4.3341e-06, 6.3042e-05, -2.9829e-05, ..., -3.9245e-05,\n -2.6280e-05, 6.0246e-07],\n [ 1.5795e-05, 1.9187e-04, -1.9030e-04, ..., 9.5873e-05,\n 9.1547e-05, -1.9468e-04],\n [ 2.3775e-05, 1.1544e-04, 2.0107e-05, ..., 5.6798e-05,\n 6.8590e-05, -5.1135e-06]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[3.7260e-08, 9.5780e-08, 5.1665e-08, ..., 6.2371e-08, 4.0984e-08,\n 2.2801e-08],\n [2.4923e-07, 4.0484e-07, 1.5717e-07, ..., 1.3940e-07, 1.7980e-07,\n 8.4954e-08],\n [4.4872e-07, 5.9915e-07, 4.0638e-07, ..., 2.3920e-07, 1.2624e-07,\n 2.1056e-07],\n ...,\n [5.3376e-08, 1.4786e-07, 3.3030e-08, ..., 2.9804e-08, 2.0662e-08,\n 1.8713e-08],\n [1.0604e-07, 1.6222e-06, 1.5255e-07, ..., 2.9504e-07, 7.0019e-08,\n 9.8463e-08],\n [3.5325e-07, 4.4776e-07, 1.6270e-07, ..., 4.3948e-07, 1.3270e-07,\n 1.3775e-07]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(3754.)",
|
| 12 |
+
"exp_avg": "tensor([-7.4694e-04, -1.8854e-03, 6.4286e-04, 2.4508e-03, 2.4702e-03,\n -4.4378e-03, 9.4159e-04, 4.1660e-03, 4.4945e-04, 7.3576e-04,\n -2.0539e-03, -2.2433e-03, 2.3815e-03, 5.8064e-03, -4.0864e-03,\n -4.2362e-04, 1.3323e-03, 2.9970e-03, 3.8786e-04, -1.5035e-03,\n 3.4663e-03, 7.7550e-04, -9.1650e-04, 2.7834e-03, -3.5398e-03,\n 3.0187e-03, 3.9582e-03, -4.7403e-04, 9.8748e-04, 2.8343e-04,\n -8.3944e-04, -2.6337e-03, -6.5530e-04, 3.4943e-03, 1.9313e-03,\n 2.3554e-03, -4.6680e-04, -1.5787e-03, 4.6821e-03, 8.8973e-04,\n 3.7084e-03, 5.3840e-04, -4.2972e-04, -1.0919e-03, 3.1032e-04,\n -1.0693e-03, -5.8211e-03, -1.2884e-05, 1.2900e-03, 6.3695e-05,\n -1.6127e-03, -1.9020e-03, -4.5896e-03, -6.1325e-04, -5.5176e-03,\n -2.1044e-03, -8.2202e-04, -8.7492e-04, 1.1453e-03, 1.6601e-03,\n 1.6680e-03, 4.6909e-03, -1.1764e-03, 8.7526e-04, -5.4243e-03,\n 2.1636e-03, 3.5105e-03, -1.3637e-03, -2.9842e-03, 6.1040e-03,\n -8.3799e-04, -7.7987e-04, 4.4456e-04, 1.3383e-04, -2.2568e-03,\n 2.5282e-03, 8.8292e-03, -5.4406e-03, -3.1000e-03, -6.6494e-04,\n 1.8741e-03, -3.2414e-03, 3.8894e-03, -1.5239e-03, -5.2431e-03,\n 5.1293e-03, -1.6384e-03, -1.4408e-03, 1.8991e-03, -3.3997e-03,\n -1.8784e-03, 3.8146e-04, -3.4037e-03, -8.6275e-04, -2.2784e-04,\n -2.1998e-03, -8.9559e-03, 7.3645e-06, 5.0222e-03, -1.5464e-03,\n -1.8769e-03, 4.6629e-04, 1.4802e-04, 3.3993e-03, -2.8500e-04,\n -2.8801e-03, 1.3355e-04, 1.7332e-03, -1.3615e-03, -1.2785e-04,\n 2.0490e-03, -4.8874e-03, 3.6412e-03, -1.2456e-03, 1.9463e-03,\n 2.3852e-03, -7.8131e-04, -1.3544e-03, -3.7376e-03, 3.1133e-03,\n 6.1187e-03, 1.6319e-03, -1.2988e-03, -3.5480e-04, 5.4413e-04,\n -5.1694e-03, 3.9637e-04, 2.5300e-03, 5.0214e-03, 3.4440e-03,\n -1.8358e-03, 5.0253e-03, -6.5769e-04, 1.8519e-03, -4.8862e-03,\n -2.9660e-04, 1.7705e-03, -1.9225e-03, 1.5343e-03, -1.7688e-03,\n -1.4007e-03, 1.7188e-03, 3.7779e-03, 1.9724e-03, 2.1922e-03,\n -3.6869e-03, 1.0900e-03, -3.0829e-03, 3.8106e-03, -3.9192e-04,\n -5.8543e-03, 3.4945e-03, -7.4982e-04, 4.1542e-03, -3.6999e-03,\n 1.5299e-03, 1.9316e-03, -1.9402e-03, 7.8178e-04, -2.7585e-03,\n -1.1956e-03, 1.9709e-03, -4.8795e-03, -1.3219e-03, -4.9874e-04,\n -2.0221e-03, 2.7406e-03, -1.4590e-03, 6.0888e-03, -8.8876e-04,\n 1.6323e-03, 1.6663e-03, -1.0529e-03, 3.6704e-03, 5.5299e-04,\n 5.0657e-04, 1.5852e-04, -1.1170e-03, -3.4064e-03, 3.4358e-03,\n -1.1723e-03, 5.7062e-03, 2.9113e-04, -1.1692e-04, 1.2727e-03,\n 5.4762e-04, -2.2104e-03, 5.9736e-04, -3.2695e-03, -1.5829e-04,\n -9.5352e-04, -2.6392e-03, 6.8787e-04, -2.9923e-03, -2.0952e-03,\n 1.8715e-03, -1.0900e-03, -6.4409e-03, -4.0520e-03, 8.6837e-04,\n 1.7472e-03, -3.2015e-03, -2.7875e-03, 2.3379e-04, -8.6413e-04,\n -1.3030e-03, -7.9724e-04, 1.2198e-03, 2.5908e-03, 2.6362e-03,\n -3.3555e-04, -1.7064e-03, 1.2471e-03, -3.8099e-04, -1.0232e-03,\n 1.7787e-03, 3.6412e-03, 4.9021e-04, -1.0015e-03, -2.1289e-03,\n 6.2512e-03, -3.6754e-03, -2.2907e-03, -2.5184e-03, 1.2248e-04,\n -4.0704e-03, -1.9282e-03, -1.3853e-04, -2.6307e-03, -3.7599e-03,\n -3.2025e-03, -3.3750e-03, 6.2407e-04, -1.9586e-03, -2.7374e-03,\n -9.4793e-04, 1.1216e-03, -8.0621e-04, -1.0127e-03, 2.2832e-04,\n -2.4168e-03, -4.2152e-04, 1.5586e-03, -7.3051e-03, 2.0433e-04,\n -2.4658e-03, -1.5663e-03, 2.6252e-03, 3.0229e-03, 1.2499e-03,\n -8.8238e-05, -6.1493e-03, 1.0903e-03, 2.6188e-03, 4.2742e-03,\n -2.5588e-03, -1.1711e-03, -2.6356e-03, 1.4234e-04, -8.7264e-04,\n -1.8551e-03, -5.5853e-05, 2.3291e-03, 1.0430e-03, -1.4976e-03,\n 3.3810e-03, 6.1596e-04, 1.8770e-04, 7.3659e-04, -3.2966e-04,\n 2.1309e-03, 2.7374e-03, 4.8025e-04, 6.1352e-03, 2.8743e-03,\n -1.9054e-03, -4.5550e-04, -5.0876e-03, -2.3511e-03, -1.5933e-03,\n 1.0687e-04, 2.8330e-03, 9.1963e-04, 6.2050e-03, 1.1286e-03,\n -2.8647e-03, -1.7058e-03, -2.4978e-03, -9.3395e-04, -2.0385e-03,\n -2.8224e-03, -2.1238e-03, 6.6332e-04, -1.1424e-02, -4.4332e-03,\n -7.7984e-04, -1.3967e-03, -2.8143e-03, 1.1470e-03, 2.4622e-03,\n -9.6972e-04, -1.5281e-03, 7.4986e-03, -1.2426e-03, 1.6148e-03,\n -1.8027e-03, -3.0833e-03, 9.0789e-04, 3.1055e-03, 2.8347e-03,\n 4.9362e-04, -1.5213e-03, 6.7606e-03, -2.4927e-03, 1.3228e-04,\n -5.9060e-03, -1.5623e-04, -9.2347e-04, -3.5972e-04, 4.7530e-04,\n 3.4956e-03, 3.1981e-03, -9.9783e-05, -1.2248e-03, 2.5349e-03,\n 5.5484e-03, -1.0852e-03, -8.9385e-04, -1.6231e-03, 1.5568e-03,\n -6.7539e-03, -3.8967e-03, -1.2148e-03, -4.4025e-03, -1.9438e-03,\n -4.6529e-04, 7.5988e-05, 2.1016e-03, 2.1690e-03, -1.3680e-03,\n 1.6991e-03, 2.3287e-03, -2.0257e-03, -7.9087e-04, -4.9874e-04,\n -2.9218e-03, -1.9820e-03, -4.5141e-03, -1.3238e-03, 7.2791e-03,\n 8.3832e-04, 4.7109e-03, -1.0197e-03, -3.0687e-03, -2.8481e-03,\n 2.8211e-03, 1.5780e-03, 3.5104e-03, -1.1236e-03, -6.6493e-04,\n -1.9122e-03, -3.9563e-04, -1.7791e-03, -2.1975e-03, -2.3131e-03,\n 5.6247e-04, 7.5055e-04, -2.9730e-03, -6.0020e-04, 6.1145e-03,\n -5.8696e-04, -7.7924e-04, 2.9766e-03, 3.5574e-03, -5.4198e-03,\n 1.4053e-03, 4.3297e-03, -4.0532e-03, -6.8256e-03, 1.3382e-03,\n -1.3129e-03, 1.0446e-03, -3.1759e-03, 3.0873e-03, 1.3546e-04,\n -7.4001e-04, 8.1028e-04, 2.0531e-03, 1.7742e-03, -4.7989e-04,\n -5.4175e-04, 4.5436e-04, -3.7469e-03, -2.9908e-03, 7.1776e-04,\n 1.1322e-03, -4.5424e-04, 3.0122e-04, 7.2778e-04, -5.5906e-04,\n 4.5228e-03, 4.3530e-05, -4.4601e-03, 4.1903e-03, -3.3006e-05,\n 1.3963e-04, 2.0600e-03, -4.6705e-03, -3.6625e-03, -6.3307e-05,\n 1.1196e-03, 1.8770e-03, -1.1955e-03, 4.1922e-03, 5.6525e-03,\n -3.0654e-04, 4.4502e-03, -1.1090e-02, -2.1486e-03, -6.1124e-04,\n -7.5762e-04, 1.7326e-03, -9.1900e-04, 7.9343e-03, 3.6244e-03,\n 8.9290e-05, -1.9178e-04, 3.4626e-03, 8.3931e-04, 1.9274e-03,\n 6.4167e-03, -1.9996e-03, 1.6645e-03, -3.8767e-03, 2.8634e-04,\n -2.9078e-03, -1.1337e-03, 2.8414e-03, -3.2912e-03, 2.9846e-03,\n -1.0800e-03, -1.9853e-03, 2.0112e-03, 9.9751e-04, 2.5994e-03,\n -1.8780e-03, 3.0010e-03, 1.7698e-03, -4.5803e-03, -3.7054e-03,\n -4.6714e-04, 4.1066e-03, 1.8244e-03, 1.8000e-03, 6.0085e-03,\n 2.0967e-03, -1.3409e-03, -2.6004e-03, 4.2883e-03, -3.8427e-04,\n 3.4389e-03, -5.3578e-03, 1.7998e-03, 3.4548e-03, -1.8643e-03,\n 1.6837e-03, 1.2631e-03, 3.4992e-03, 2.3892e-03, 2.7743e-03,\n -3.4071e-03, 6.6438e-04, 2.4370e-03, 2.5256e-03, -4.7778e-03,\n -2.4122e-03, 1.3776e-03, -5.7360e-04, -2.0574e-03, 3.5375e-03,\n 4.4453e-03, 4.9028e-04, 9.4920e-04, -2.0866e-03, -2.7016e-04,\n -1.9334e-03, 5.8010e-04, 4.3880e-03, -6.2957e-04, -2.5803e-03,\n -1.3842e-03, -3.0487e-04, -1.1014e-03, 1.4077e-03, 2.4392e-03,\n -6.9983e-04, 1.8933e-03, 6.5714e-04, -2.6230e-03, -1.3126e-03,\n -2.0840e-04, 7.5329e-03, -2.5672e-03, 1.1949e-04, 2.5239e-03,\n 4.3178e-04, -8.1677e-04, 3.3945e-03, 6.4255e-04, 7.0411e-04,\n 8.4216e-04, 1.8698e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([3.9580e-05, 1.4573e-04, 1.9525e-04, 2.1761e-04, 1.0925e-04, 3.2656e-04,\n 2.6181e-04, 1.1504e-04, 1.5358e-04, 1.3930e-04, 2.0814e-04, 1.3503e-04,\n 1.0719e-04, 1.3707e-04, 3.5046e-04, 7.6724e-05, 1.7543e-04, 2.3621e-04,\n 6.4244e-05, 1.2932e-04, 1.1417e-04, 1.7524e-04, 2.6691e-04, 1.7640e-04,\n 8.2294e-05, 1.1563e-04, 2.1574e-04, 2.5255e-04, 1.5854e-04, 1.9049e-04,\n 1.7276e-04, 2.0522e-04, 7.0531e-05, 3.6507e-04, 2.4686e-04, 1.3923e-04,\n 1.2685e-04, 1.8399e-04, 2.1238e-04, 2.3072e-04, 1.0859e-04, 7.2823e-05,\n 2.0212e-04, 8.6798e-05, 1.6325e-04, 1.3869e-04, 1.2498e-04, 9.1140e-05,\n 9.3330e-05, 1.1777e-04, 1.5073e-04, 1.2488e-04, 3.6929e-04, 1.5653e-04,\n 2.2337e-04, 1.7893e-04, 1.4323e-04, 1.3225e-04, 1.8051e-04, 1.4167e-04,\n 7.8831e-05, 1.6683e-04, 9.9835e-05, 3.3808e-04, 2.2553e-04, 2.7497e-04,\n 2.1231e-04, 2.3227e-04, 1.3292e-04, 2.3606e-04, 1.3174e-04, 2.6829e-04,\n 2.2305e-04, 2.3082e-04, 1.6634e-04, 1.6873e-04, 3.0263e-04, 9.7722e-05,\n 1.2254e-04, 2.8399e-04, 1.5289e-04, 1.4631e-04, 1.1871e-04, 1.7995e-04,\n 2.2778e-04, 1.6899e-04, 2.9110e-04, 8.6954e-05, 1.7072e-04, 1.3276e-04,\n 1.5083e-04, 6.2353e-05, 1.5636e-04, 6.1455e-04, 1.9374e-04, 2.1486e-04,\n 1.9803e-04, 2.5353e-04, 2.1090e-04, 1.4718e-04, 1.1512e-04, 1.4077e-04,\n 1.2144e-04, 9.9694e-05, 1.3141e-04, 1.0102e-04, 1.7896e-04, 9.6238e-05,\n 1.9308e-04, 6.8624e-05, 3.8440e-04, 6.6086e-05, 1.8813e-04, 1.1272e-04,\n 1.9027e-04, 1.0111e-04, 1.0357e-04, 3.2750e-04, 1.8922e-04, 1.1096e-04,\n 1.3921e-04, 2.1477e-04, 2.9440e-04, 1.7645e-04, 2.1777e-04, 1.6115e-04,\n 9.6899e-05, 2.3615e-04, 2.4596e-04, 2.0575e-04, 1.9231e-04, 3.4021e-04,\n 1.4721e-04, 1.8355e-04, 2.6938e-04, 8.1464e-05, 7.1452e-05, 1.1355e-04,\n 1.9305e-04, 1.1567e-04, 1.4918e-04, 2.5000e-04, 1.9050e-04, 2.6035e-04,\n 1.6531e-04, 2.2872e-04, 1.1759e-04, 1.5251e-04, 4.3588e-04, 9.3211e-05,\n 1.3208e-04, 2.7472e-04, 2.5055e-04, 2.1278e-04, 1.2256e-04, 1.6070e-04,\n 9.8579e-05, 9.3350e-05, 1.6191e-04, 1.1261e-04, 2.5063e-04, 1.7917e-04,\n 2.1064e-04, 1.2400e-04, 1.8241e-04, 1.0413e-04, 2.0252e-04, 1.9032e-04,\n 2.1953e-04, 2.5579e-04, 1.1449e-04, 2.1244e-04, 1.3946e-04, 1.3630e-04,\n 8.3813e-05, 7.9570e-05, 1.4667e-04, 1.7330e-04, 1.3167e-04, 1.3544e-04,\n 1.5924e-04, 1.8544e-04, 5.0649e-04, 1.4569e-04, 6.8415e-05, 2.2498e-04,\n 2.4325e-04, 2.2475e-04, 3.0557e-04, 1.9246e-04, 1.3699e-05, 1.3601e-04,\n 6.5031e-05, 9.1897e-05, 1.4472e-04, 2.0701e-04, 9.2776e-05, 9.4943e-05,\n 1.8145e-04, 1.0690e-04, 2.1160e-04, 1.0185e-04, 1.6619e-04, 6.6340e-05,\n 9.4333e-05, 3.5354e-04, 1.5518e-04, 1.1057e-04, 1.3259e-04, 1.2509e-04,\n 1.2542e-04, 1.2472e-04, 1.2812e-04, 1.7526e-04, 1.6126e-04, 1.9294e-04,\n 2.0154e-04, 1.3200e-04, 1.7420e-04, 1.8875e-04, 1.7489e-04, 2.6434e-04,\n 9.0441e-05, 1.6814e-04, 1.2016e-04, 1.6545e-04, 1.1213e-04, 1.6174e-04,\n 1.3191e-04, 3.0184e-04, 1.9457e-04, 2.2462e-04, 1.4175e-04, 1.0245e-04,\n 1.0437e-04, 9.3696e-05, 8.6675e-05, 1.1649e-04, 1.6108e-04, 1.8009e-04,\n 9.4897e-05, 1.6744e-04, 1.1453e-04, 2.3519e-04, 5.9175e-05, 1.6106e-04,\n 2.4631e-04, 1.5065e-04, 1.3058e-04, 1.5854e-04, 2.2209e-04, 2.0896e-04,\n 9.2085e-05, 1.5845e-04, 1.9781e-04, 1.5109e-04, 1.1682e-04, 1.7359e-04,\n 1.5194e-04, 2.3285e-04, 1.5615e-04, 8.3650e-05, 1.4226e-04, 1.0657e-04,\n 1.6818e-04, 1.8424e-04, 1.0554e-04, 1.3755e-04, 1.1385e-04, 8.4061e-05,\n 9.8724e-05, 1.7224e-04, 1.1273e-04, 2.4146e-04, 1.0049e-04, 1.9750e-04,\n 2.2684e-04, 1.1199e-04, 2.1350e-04, 1.1349e-04, 1.1351e-04, 1.4654e-04,\n 1.4720e-04, 1.7307e-04, 1.7796e-04, 1.5613e-04, 2.9459e-04, 1.6749e-04,\n 1.2703e-05, 2.4267e-04, 1.6418e-04, 8.9549e-05, 2.0229e-04, 3.2403e-04,\n 2.2184e-04, 1.3965e-05, 1.4163e-04, 2.9894e-04, 1.1313e-04, 3.3625e-04,\n 1.8446e-04, 2.3056e-04, 1.8734e-04, 1.2444e-04, 2.2768e-04, 1.2658e-04,\n 9.8068e-05, 9.2742e-05, 1.7799e-04, 1.3232e-04, 2.4321e-04, 2.5457e-04,\n 3.0479e-04, 1.8330e-04, 1.9426e-04, 1.7227e-04, 1.7216e-04, 1.7389e-04,\n 9.1604e-05, 1.1852e-04, 2.7790e-04, 2.5261e-04, 1.2512e-04, 2.3921e-04,\n 1.9241e-04, 2.2489e-04, 1.6382e-04, 1.5453e-04, 6.8845e-05, 1.2863e-04,\n 1.8345e-04, 2.3605e-04, 1.0006e-04, 1.0167e-04, 2.6062e-04, 2.2722e-04,\n 4.3494e-05, 9.2343e-05, 1.3445e-04, 3.6931e-04, 7.8193e-05, 2.0200e-04,\n 2.3983e-04, 8.2945e-05, 1.6290e-04, 1.5460e-04, 1.4777e-04, 1.5330e-04,\n 1.7176e-04, 2.1355e-04, 9.9718e-05, 2.4342e-04, 9.6084e-05, 1.1338e-04,\n 2.4615e-04, 2.1225e-04, 1.4786e-04, 1.9118e-04, 1.0346e-04, 1.6692e-04,\n 2.3532e-04, 1.4719e-04, 1.1618e-04, 7.8781e-05, 1.4614e-04, 1.3583e-04,\n 7.6553e-05, 9.9334e-05, 2.7284e-04, 1.3559e-04, 1.5362e-04, 1.0318e-05,\n 7.1115e-05, 1.2140e-04, 1.6274e-04, 1.4023e-04, 3.0912e-04, 2.1811e-04,\n 2.1118e-04, 1.5916e-04, 5.1196e-04, 2.1245e-04, 1.2052e-04, 1.1415e-04,\n 1.6300e-04, 2.0708e-04, 1.5043e-04, 8.3094e-05, 1.4875e-04, 7.0299e-05,\n 9.6857e-05, 1.0545e-04, 1.1373e-04, 1.7515e-04, 1.6152e-04, 2.1669e-04,\n 1.2106e-04, 2.2236e-04, 1.1197e-04, 2.1907e-04, 1.0982e-04, 1.2585e-04,\n 1.6252e-04, 9.9611e-05, 1.3240e-04, 6.4679e-05, 7.8128e-05, 2.0650e-04,\n 1.7112e-04, 1.7041e-04, 1.3634e-04, 1.9261e-04, 2.0769e-04, 8.6754e-05,\n 1.8438e-04, 1.5344e-04, 1.5724e-04, 2.4949e-04, 2.2573e-04, 1.3595e-04,\n 1.3677e-04, 2.0219e-04, 1.2219e-04, 3.7145e-04, 1.0378e-04, 1.8392e-04,\n 1.0518e-04, 1.4153e-04, 2.0256e-04, 1.2213e-04, 2.1290e-04, 3.5128e-04,\n 1.9764e-04, 1.7055e-04, 1.7250e-04, 1.4218e-04, 1.7572e-04, 1.6331e-04,\n 2.2609e-04, 2.4648e-04, 8.1689e-05, 3.0863e-04, 2.4544e-04, 2.5962e-04,\n 9.8918e-05, 1.4535e-04, 2.3528e-04, 1.2681e-04, 2.2662e-04, 2.5970e-04,\n 1.6561e-04, 2.7986e-04, 1.2388e-04, 2.0757e-04, 5.5303e-04, 1.0018e-04,\n 1.4743e-04, 1.2686e-04, 1.3737e-04, 1.3739e-04, 2.6879e-04, 1.8491e-04,\n 1.3543e-04, 1.8425e-04, 7.8322e-05, 8.7102e-05, 1.6875e-04, 2.1523e-04,\n 1.1828e-04, 2.6345e-04, 2.2817e-04, 9.4096e-05, 1.0682e-04, 1.3989e-04,\n 3.1330e-04, 8.8217e-05, 1.3949e-04, 2.1332e-04, 1.4801e-04, 2.1541e-04,\n 2.2870e-04, 1.3335e-04, 5.9164e-05, 1.6452e-04, 3.0500e-04, 1.3147e-04,\n 6.5223e-05, 1.1539e-04, 2.3683e-04, 1.0606e-04, 1.4133e-04, 2.1825e-04,\n 4.4378e-04, 1.8223e-04, 1.7469e-04, 1.0525e-04, 3.0025e-04, 1.6830e-04,\n 1.7680e-04, 1.4264e-04, 1.8489e-04, 2.1031e-04, 1.1897e-04, 4.9972e-05,\n 2.2349e-04, 1.6746e-04, 2.6275e-04, 1.5006e-04, 2.2386e-04, 3.3769e-05,\n 1.3311e-04, 2.4531e-04], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(3754.)",
|
| 17 |
+
"exp_avg": "tensor([-1.3719e-03, -2.0500e-03, 1.9767e-04, 4.8321e-03, 2.7397e-03,\n -2.2437e-03, 7.0454e-04, 5.4498e-03, 6.3850e-04, 5.4943e-04,\n -2.0336e-03, -3.4387e-03, 2.6600e-03, 8.3353e-03, -3.2104e-03,\n -1.2033e-03, 1.0222e-03, 3.8084e-03, 2.0157e-04, -1.3417e-03,\n 4.4240e-03, 1.2131e-03, -1.1689e-03, 2.3903e-03, -8.9493e-03,\n 3.7973e-03, 6.8942e-03, -7.3185e-04, 1.5300e-03, -8.9937e-04,\n -1.6351e-03, -4.8014e-03, -1.5292e-03, 4.9955e-03, 9.7128e-04,\n 1.7580e-03, -1.3376e-03, -2.2609e-03, 3.5169e-03, 1.1361e-03,\n 7.6220e-03, 2.7065e-03, -3.7115e-05, -1.4843e-03, 9.2169e-05,\n -3.2745e-03, -6.2576e-03, 9.8429e-04, 3.4935e-03, 2.1272e-04,\n -2.6416e-03, -4.7172e-03, -4.0176e-03, -1.5630e-03, -7.0226e-03,\n -1.5251e-03, -1.6509e-03, -1.7042e-04, 1.8498e-03, 2.8207e-03,\n 3.2392e-03, 3.9742e-03, -1.7332e-03, 2.1212e-04, -8.0723e-03,\n 3.5745e-03, 6.1053e-03, -2.2802e-03, -4.2194e-03, 4.7331e-03,\n -6.3274e-04, -3.9962e-04, 9.4534e-04, -1.3328e-04, -1.0699e-03,\n 2.2504e-03, 1.5502e-02, -1.6117e-02, -6.2887e-03, -8.7188e-04,\n 3.1398e-03, -2.3369e-03, 4.6835e-03, -2.0712e-03, -4.5536e-03,\n 6.8595e-03, -1.7618e-03, -1.4461e-03, 1.9409e-03, -6.0204e-03,\n -1.1736e-03, 2.7584e-03, -7.8350e-03, -1.2656e-03, -6.9653e-04,\n -2.7044e-03, -7.9280e-03, -1.8900e-04, 6.9229e-03, -3.2926e-03,\n -4.2349e-03, 5.0916e-04, 1.1347e-03, 6.5064e-03, 4.1473e-04,\n -5.1135e-03, 9.3562e-04, 1.6578e-03, -1.1119e-03, -3.0729e-05,\n 1.7587e-03, -6.0359e-03, 1.4171e-02, -1.8703e-03, 1.7619e-03,\n 2.3906e-03, -1.5154e-03, -1.1050e-03, -3.0043e-03, 5.4402e-03,\n 6.7512e-03, 5.8188e-04, -7.2460e-04, -9.9300e-04, 1.1500e-05,\n -5.8476e-03, 1.1694e-04, 4.2084e-03, 6.3540e-03, 2.6811e-03,\n -1.0809e-03, 4.6241e-03, -4.7478e-04, 1.4791e-03, -7.5009e-03,\n -2.0332e-03, 3.7923e-03, -3.1869e-03, 1.6696e-03, -1.4813e-03,\n -1.3933e-03, 1.8279e-03, 4.7297e-03, 2.8584e-03, 3.4396e-03,\n -4.1479e-03, 1.6013e-03, -3.5751e-03, 2.1509e-03, -9.7534e-04,\n -8.2669e-03, 2.8510e-03, -7.1100e-04, 2.8338e-03, -6.1971e-03,\n 4.0565e-03, 2.3599e-03, -3.7253e-03, 2.3684e-03, -4.2298e-03,\n -2.4903e-03, 2.0611e-03, -8.5711e-03, -1.4749e-03, 1.9298e-04,\n -2.6741e-03, 3.6321e-03, -1.2352e-03, 6.6707e-03, -7.1817e-04,\n 3.5911e-03, 2.4586e-03, -5.7307e-04, 4.3893e-03, 1.9448e-03,\n 3.2219e-04, -3.6677e-04, 3.4401e-05, -3.5946e-03, 7.6635e-03,\n -1.0496e-03, 7.6160e-03, -1.5811e-03, -1.3128e-03, 2.4496e-03,\n 7.8393e-04, -2.3294e-03, 8.6624e-04, -3.8759e-03, -1.5546e-04,\n 5.6052e-45, -2.4608e-03, 6.6220e-04, -3.0858e-03, -3.6475e-03,\n 1.7749e-03, -2.4538e-03, -7.1193e-03, -3.8348e-03, 1.0132e-03,\n 1.7283e-03, -5.4700e-03, -3.9496e-03, 8.4109e-04, -1.5787e-03,\n -2.7327e-03, -6.3661e-04, 1.7234e-03, 2.6650e-03, 3.1973e-03,\n -8.7308e-04, -1.3060e-03, 2.2832e-03, -1.5349e-04, -6.3979e-04,\n 3.4811e-03, 2.3990e-03, 3.6056e-04, -8.0078e-04, -2.1169e-03,\n 6.8689e-03, -5.7097e-03, -3.0185e-03, -3.5278e-03, 1.0896e-03,\n -6.4248e-03, -1.1783e-03, 1.7921e-05, -4.1728e-03, -3.5969e-03,\n -5.0277e-03, -2.9768e-03, 2.6593e-03, -5.3467e-03, -5.0875e-03,\n -1.4797e-03, 2.3504e-03, -8.5540e-04, -2.0839e-03, 2.4656e-04,\n -2.9842e-03, -2.6683e-04, 2.6014e-03, -8.9039e-03, 7.6566e-04,\n -4.6567e-03, -1.1042e-03, 3.0265e-03, 2.2202e-03, 1.6322e-03,\n 1.8939e-03, -4.8255e-03, 1.3510e-03, 2.0129e-03, 2.9672e-03,\n -2.3568e-03, -1.1355e-03, -3.7459e-03, -4.4464e-04, -1.3732e-03,\n -5.4503e-03, -4.2559e-04, 3.4751e-03, 9.5452e-04, -2.7296e-03,\n 6.3456e-03, 2.5452e-03, 7.0415e-06, 1.6246e-03, -2.6599e-04,\n 4.0974e-03, 3.4126e-03, 2.2551e-04, 7.1736e-03, 3.9940e-03,\n -4.7477e-03, -6.9284e-04, -4.5033e-03, -2.9448e-03, -2.1929e-03,\n -3.6686e-04, 3.6867e-03, 2.6906e-03, 5.6718e-03, 2.9289e-03,\n -2.9982e-03, -2.5126e-03, -2.5718e-03, 5.6052e-45, -4.3894e-03,\n -3.1275e-03, -3.3664e-03, 1.6527e-03, -1.0360e-02, -5.1515e-03,\n 5.6052e-45, -2.4321e-03, -1.9274e-03, 2.2455e-03, 3.1221e-03,\n -9.0518e-04, -4.8290e-04, 5.3521e-03, -1.2330e-03, 1.5473e-03,\n -2.4620e-03, -6.3240e-03, 2.0754e-03, 6.3765e-03, 2.7270e-03,\n 8.7065e-04, -2.4552e-03, 9.5269e-03, -1.6520e-03, 9.9524e-04,\n -5.8835e-03, 8.1140e-04, -8.7747e-04, -7.0069e-04, 5.4370e-04,\n 2.4407e-03, 7.1916e-03, -2.9477e-04, -1.1874e-03, 2.8071e-03,\n 5.9958e-03, -1.8503e-03, -1.0712e-03, -3.4946e-03, 2.6927e-03,\n -9.5229e-03, -2.3001e-03, -1.3243e-03, -4.4701e-03, -1.3686e-03,\n -2.4609e-03, 9.4936e-04, 1.5041e-03, 4.2753e-03, -1.0049e-03,\n 3.1013e-03, 1.9815e-03, -8.3938e-04, -2.1227e-03, -1.7472e-03,\n -3.2248e-03, -1.2407e-03, -2.6450e-03, -1.6973e-03, 8.6027e-03,\n 8.5959e-04, 1.1799e-02, -8.0883e-04, -3.4364e-03, -6.8581e-03,\n 4.4990e-03, 1.1929e-03, 3.7268e-03, -1.2716e-03, -4.0251e-04,\n -3.1434e-03, -4.2422e-04, -3.4776e-03, -2.0542e-03, -1.8227e-03,\n -3.3823e-05, 2.3837e-04, -4.7382e-03, -4.9632e-04, 6.1268e-03,\n -1.4165e-03, 5.6052e-45, 8.2372e-03, 3.2994e-03, -5.6951e-03,\n 1.6807e-03, 5.0476e-03, -3.4850e-03, -8.2292e-03, 1.0402e-03,\n -1.2604e-03, 1.6033e-03, -5.8185e-03, 7.0421e-03, -3.0010e-05,\n -1.9091e-03, 1.8232e-03, 2.3912e-03, 3.3916e-03, 6.7348e-04,\n -1.5505e-03, 1.9295e-04, -5.6878e-03, -5.1406e-03, 1.1523e-03,\n 3.1227e-03, -4.9939e-04, 1.7581e-03, 1.6834e-03, -7.0039e-04,\n 4.0287e-03, 8.3342e-04, -4.3347e-03, 5.1219e-03, -5.8664e-04,\n 9.1535e-04, 2.5057e-03, -6.8348e-03, -2.9741e-03, -8.8763e-04,\n 1.3416e-03, 2.8614e-03, -1.9057e-03, 2.2747e-02, 6.0790e-03,\n -2.7620e-03, 6.1199e-03, -1.2110e-02, -3.0207e-03, 5.7203e-04,\n -1.0282e-03, 1.2997e-03, -2.8916e-03, 7.0600e-03, 5.7062e-03,\n 7.2394e-04, -2.3383e-04, 7.4035e-03, 8.3399e-04, 1.7240e-03,\n 6.2622e-03, -3.2879e-03, 3.0298e-04, -4.6004e-03, 2.5102e-03,\n -4.3810e-03, -2.1094e-03, 4.0704e-03, -4.2255e-03, 5.2591e-03,\n -2.5103e-03, -3.3373e-03, 1.0151e-03, 1.0912e-03, 4.4999e-03,\n -2.5893e-03, 1.8178e-03, 3.1483e-03, -6.0817e-03, -2.6212e-03,\n -2.6566e-04, 3.8066e-03, 3.5681e-03, 3.6578e-03, 4.2109e-03,\n 2.3723e-03, -1.0823e-03, -3.8644e-03, 3.7440e-03, 1.6508e-04,\n 1.4626e-03, -5.1783e-03, 2.2464e-03, 5.1227e-03, -3.3208e-03,\n 1.7018e-03, 3.7861e-03, 4.7806e-03, 3.6857e-03, 3.5169e-03,\n -6.1522e-03, 8.3321e-04, 3.4366e-03, 3.9894e-03, -3.9909e-03,\n -3.5723e-03, 2.8367e-03, -6.9980e-04, -1.7069e-03, 4.6010e-03,\n 7.5646e-03, 8.8311e-04, 1.1128e-02, -3.9325e-03, -7.7033e-04,\n -2.2018e-03, 5.0996e-04, 6.7734e-03, -1.6890e-03, -3.9625e-03,\n -3.3608e-03, -1.1282e-03, -2.9917e-03, 1.3233e-03, 2.5721e-03,\n -1.5152e-03, 3.4936e-03, 3.9144e-04, -4.0170e-03, -2.0180e-03,\n -2.8471e-03, 7.4206e-03, -4.7447e-03, 7.1115e-05, 4.3443e-03,\n 2.1609e-03, -1.2057e-03, 4.3278e-03, -7.3956e-04, 7.9911e-03,\n -7.4285e-04, 1.4023e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([7.0580e-04, 1.2074e-04, 1.3505e-04, 2.6201e-04, 1.6057e-04, 2.0415e-04,\n 2.0353e-04, 1.9596e-04, 1.0147e-04, 1.1522e-04, 3.4943e-04, 1.6257e-04,\n 1.2803e-04, 2.2736e-04, 1.7206e-04, 9.2292e-04, 1.2161e-04, 4.8825e-04,\n 4.0284e-05, 1.6936e-04, 2.5678e-04, 2.1355e-04, 1.3676e-04, 9.5494e-05,\n 2.5940e-04, 1.0327e-04, 6.4010e-04, 8.4730e-05, 1.6908e-04, 9.7296e-05,\n 1.9617e-04, 2.8177e-04, 1.1320e-04, 4.1035e-04, 9.8593e-05, 1.6040e-04,\n 1.4161e-04, 2.1678e-04, 1.8941e-04, 4.2543e-04, 3.1917e-04, 2.6329e-03,\n 9.8131e-05, 3.8892e-04, 3.2583e-04, 5.2595e-04, 3.8837e-04, 1.2655e-04,\n 3.3746e-04, 2.1901e-04, 1.1430e-04, 1.2777e-03, 3.2535e-04, 1.8876e-04,\n 2.9157e-04, 1.3428e-04, 2.4435e-04, 5.7089e-05, 1.8462e-04, 2.5305e-04,\n 2.6928e-04, 1.2023e-04, 2.4419e-04, 1.9343e-04, 3.7530e-04, 2.5849e-04,\n 3.4691e-04, 2.4171e-04, 1.2047e-04, 1.4452e-04, 1.2317e-04, 1.2471e-04,\n 1.1146e-04, 7.9223e-04, 1.2170e-04, 1.2414e-04, 6.8294e-04, 8.1833e-04,\n 3.4788e-04, 1.6501e-04, 1.8332e-04, 6.9679e-05, 1.5198e-04, 5.7796e-04,\n 1.4324e-04, 3.8498e-04, 2.2327e-04, 6.1022e-05, 3.1909e-04, 4.1102e-04,\n 1.4156e-04, 1.1974e-03, 6.8321e-04, 2.4513e-04, 8.8538e-05, 3.3242e-04,\n 1.4801e-04, 3.0661e-04, 1.7247e-04, 3.2521e-04, 3.6901e-04, 1.7652e-04,\n 1.0815e-04, 2.8099e-04, 6.3611e-05, 1.8322e-04, 1.4025e-04, 1.1066e-04,\n 9.3173e-05, 9.5348e-05, 3.6061e-04, 6.4505e-05, 3.1096e-03, 2.1271e-04,\n 6.7848e-05, 7.2426e-05, 3.8534e-04, 1.9813e-04, 1.0854e-04, 3.1194e-04,\n 1.4759e-04, 8.2308e-05, 1.9876e-04, 3.9714e-04, 2.1490e-04, 1.8462e-04,\n 1.6654e-04, 3.1972e-04, 2.8894e-04, 1.3020e-04, 1.4870e-04, 3.5833e-04,\n 1.4562e-04, 1.1527e-04, 3.8820e-04, 7.9937e-04, 1.5982e-04, 1.0106e-04,\n 6.7860e-04, 1.4764e-04, 1.2140e-04, 5.1308e-05, 1.8934e-04, 8.3349e-04,\n 2.7567e-04, 1.9414e-04, 2.7632e-04, 8.4680e-05, 2.4894e-04, 1.6187e-03,\n 1.9929e-04, 2.0182e-04, 7.8461e-05, 1.0134e-04, 4.4915e-04, 4.7481e-04,\n 2.3482e-04, 1.4775e-04, 8.1032e-04, 2.6930e-04, 1.7026e-04, 2.7751e-04,\n 5.6187e-04, 1.0107e-04, 2.4882e-04, 3.0808e-04, 4.3959e-04, 9.2932e-05,\n 2.3755e-04, 1.5132e-03, 4.5760e-04, 1.9379e-04, 9.2675e-05, 1.0560e-04,\n 5.1885e-04, 6.3178e-05, 1.1741e-04, 1.4698e-04, 1.1189e-04, 5.2644e-04,\n 1.1269e-04, 2.9166e-04, 2.3022e-04, 2.0156e-04, 1.0610e-04, 2.3213e-04,\n 2.5239e-04, 1.9805e-04, 2.0361e-04, 1.9933e-04, 3.5321e-06, 1.1707e-04,\n 1.6005e-04, 8.4821e-05, 1.5355e-04, 1.5891e-04, 1.0414e-04, 1.0225e-04,\n 9.1765e-05, 1.6152e-04, 7.9942e-05, 2.0709e-04, 2.7680e-04, 3.7795e-04,\n 1.1768e-04, 2.5876e-04, 1.6952e-04, 1.4383e-04, 1.3195e-04, 1.4554e-04,\n 7.5846e-04, 2.1259e-04, 3.4873e-04, 1.0064e-04, 3.4922e-05, 5.9832e-04,\n 8.5836e-05, 1.3869e-04, 2.4107e-04, 2.5534e-04, 1.6849e-04, 4.6021e-04,\n 2.2086e-04, 1.4392e-04, 1.1312e-04, 1.0729e-04, 2.5346e-04, 6.7460e-05,\n 7.1101e-04, 1.3046e-04, 4.1665e-04, 1.8440e-04, 4.6335e-04, 7.1557e-04,\n 2.6296e-04, 4.8031e-04, 3.1822e-04, 2.4090e-04, 1.7305e-04, 1.3837e-04,\n 1.9333e-04, 1.4944e-04, 2.5028e-04, 3.2208e-04, 6.4019e-05, 3.8915e-04,\n 1.5617e-04, 2.1905e-04, 8.9168e-05, 2.1575e-04, 3.6226e-04, 1.4213e-04,\n 1.3774e-04, 9.1685e-05, 6.1675e-05, 1.6831e-04, 7.0232e-05, 1.6688e-04,\n 2.0681e-04, 3.9716e-04, 4.5447e-04, 7.8539e-05, 1.6692e-04, 8.9858e-05,\n 3.8373e-04, 5.5777e-04, 2.0097e-03, 2.1486e-04, 1.1815e-04, 7.4996e-05,\n 1.3228e-04, 2.1836e-04, 8.9824e-05, 1.8424e-04, 1.2886e-04, 2.1842e-04,\n 4.1097e-04, 5.4911e-05, 1.4161e-04, 1.1131e-04, 1.7117e-04, 1.4373e-04,\n 2.8519e-04, 1.2997e-04, 1.5736e-04, 1.9189e-04, 1.6235e-04, 1.6901e-04,\n 6.6847e-08, 2.2798e-04, 2.7574e-04, 1.2337e-04, 1.9786e-04, 1.2653e-04,\n 2.1704e-04, 6.6367e-07, 1.8873e-04, 1.6334e-04, 2.5669e-04, 3.6645e-04,\n 4.5605e-04, 3.5419e-04, 1.1345e-04, 1.5790e-04, 1.2358e-04, 2.4171e-04,\n 3.0205e-04, 4.0105e-04, 4.4714e-04, 9.6770e-05, 1.1202e-04, 1.9776e-04,\n 4.1520e-04, 8.0005e-05, 8.9585e-05, 1.5427e-04, 9.6959e-05, 3.0546e-04,\n 3.1054e-05, 1.4869e-04, 1.2593e-04, 6.4348e-04, 5.1209e-05, 2.0239e-04,\n 3.3681e-04, 3.0150e-04, 1.0713e-04, 1.4735e-04, 2.0172e-04, 2.7885e-04,\n 2.0059e-04, 8.7734e-05, 8.6923e-05, 8.1599e-05, 4.0381e-04, 1.4380e-04,\n 2.1010e-04, 5.5019e-05, 2.9645e-04, 3.1914e-04, 1.2576e-04, 1.4720e-04,\n 1.6616e-04, 4.2782e-04, 3.2363e-04, 3.6379e-04, 8.7692e-05, 5.2217e-05,\n 1.9258e-04, 2.2155e-04, 1.0986e-04, 1.3375e-03, 1.6238e-04, 9.7684e-05,\n 1.8371e-03, 3.2841e-04, 1.3339e-04, 3.7877e-04, 1.5791e-04, 8.1279e-05,\n 4.0521e-04, 7.2467e-05, 4.3222e-04, 1.2578e-04, 1.2843e-04, 2.0542e-04,\n 2.4059e-04, 1.7328e-04, 1.8245e-04, 1.0754e-04, 2.2800e-04, 1.3955e-07,\n 5.0650e-04, 7.9706e-05, 1.1941e-04, 1.3653e-04, 2.9076e-04, 8.4456e-05,\n 1.5339e-04, 1.4316e-04, 1.8465e-04, 4.0724e-04, 3.6063e-04, 4.1117e-04,\n 8.9349e-05, 3.7133e-04, 2.3752e-04, 8.3894e-05, 3.1068e-04, 9.7324e-05,\n 1.9410e-04, 6.0496e-05, 1.8042e-04, 4.0277e-04, 2.0096e-04, 2.5690e-04,\n 1.1057e-04, 3.8812e-04, 1.1666e-04, 2.8401e-04, 7.9382e-05, 3.1845e-04,\n 1.1985e-04, 1.0218e-04, 4.0784e-04, 9.2370e-05, 7.5471e-05, 2.7112e-04,\n 2.5836e-04, 1.7334e-04, 1.4519e-04, 1.6404e-04, 1.5578e-04, 1.6231e-03,\n 9.2317e-05, 3.3163e-04, 2.8117e-04, 2.6234e-04, 2.0671e-04, 1.7064e-04,\n 1.8417e-04, 6.5265e-05, 7.4625e-04, 1.3781e-04, 1.9217e-04, 1.4462e-04,\n 1.1276e-04, 2.8254e-04, 1.2756e-04, 1.2041e-04, 1.3712e-04, 2.3698e-04,\n 1.6220e-04, 2.5672e-04, 4.1030e-04, 6.8473e-04, 6.9293e-04, 2.7651e-04,\n 2.2386e-04, 2.4393e-04, 4.1452e-04, 3.0018e-04, 1.4920e-04, 1.4608e-04,\n 3.0380e-04, 2.0502e-04, 1.9003e-04, 2.8929e-04, 2.6973e-04, 1.6549e-04,\n 7.8286e-05, 1.6115e-04, 3.2018e-04, 6.5010e-04, 2.7126e-04, 1.0676e-04,\n 4.3518e-04, 1.0337e-04, 9.8222e-05, 2.3327e-04, 1.5362e-04, 1.5634e-04,\n 2.0346e-04, 3.8069e-04, 1.8856e-04, 6.6551e-05, 4.0388e-04, 2.3872e-04,\n 3.1805e-04, 2.4911e-04, 2.7111e-04, 7.6182e-05, 1.7139e-04, 3.0696e-04,\n 2.1520e-04, 1.5325e-04, 3.3753e-04, 4.5031e-04, 1.3807e-04, 1.3997e-04,\n 3.5214e-04, 2.1478e-04, 3.3550e-03, 1.6307e-04, 2.5697e-04, 6.1223e-05,\n 6.9478e-05, 2.4941e-04, 3.0103e-04, 1.9512e-04, 4.4844e-04, 9.1715e-05,\n 4.6155e-04, 1.0889e-04, 8.4545e-05, 1.6298e-04, 4.6693e-04, 1.8639e-04,\n 2.2615e-04, 1.2105e-04, 3.8348e-04, 1.0200e-04, 3.2915e-04, 1.6562e-04,\n 4.4909e-04, 2.5553e-04, 1.5807e-04, 2.4131e-04, 1.4003e-04, 2.4866e-03,\n 1.6998e-04, 1.4101e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(3754.)",
|
| 22 |
+
"exp_avg": "tensor([-4.4047e-04, -1.6539e-03, 4.7333e-04, 3.1365e-03, 1.9440e-03,\n -3.2075e-03, 2.1878e-04, 3.8182e-03, 3.4011e-04, 2.2675e-04,\n -1.4460e-03, -1.7237e-03, 2.1929e-03, 6.5514e-03, -2.8706e-03,\n -3.9118e-04, 1.2447e-03, 3.0955e-03, 4.4587e-04, -1.4378e-03,\n 3.2296e-03, 7.5075e-04, -6.0264e-04, 2.5440e-03, -3.8316e-03,\n 1.8425e-03, 4.1411e-03, -3.8555e-04, 1.5195e-03, -1.0913e-04,\n -8.0810e-04, -3.0381e-03, -5.8225e-04, 3.5472e-03, 1.2735e-03,\n 1.8858e-03, -6.5384e-04, -1.3253e-03, 3.6555e-03, 9.8600e-04,\n 4.5198e-03, 1.6751e-03, -1.4533e-04, -5.1313e-04, -1.2750e-05,\n -1.1935e-03, -5.9371e-03, 5.7720e-04, 1.8498e-03, 8.4818e-05,\n -1.1184e-03, -1.8889e-03, -3.0648e-03, -7.0514e-04, -4.2672e-03,\n -1.6229e-03, -7.9513e-04, -5.4965e-04, 1.3211e-03, 1.6321e-03,\n 2.2160e-03, 3.5554e-03, -1.1072e-03, 8.2200e-04, -6.1147e-03,\n 2.3396e-03, 3.6442e-03, -1.6308e-03, -3.0500e-03, 4.5324e-03,\n -9.2677e-04, -5.5055e-04, 7.4072e-04, 5.7465e-05, -1.3444e-03,\n 1.8997e-03, 9.0477e-03, -6.6309e-03, -2.8898e-03, -9.4071e-04,\n 1.7319e-03, -2.1697e-03, 3.1646e-03, -9.7059e-04, -4.0998e-03,\n 5.2493e-03, -1.4087e-03, -1.2181e-03, 2.1037e-03, -3.1504e-03,\n -1.4243e-03, 1.0854e-03, -4.9873e-03, -9.5268e-04, -2.7572e-04,\n -1.7116e-03, -6.8997e-03, 1.0940e-04, 4.8411e-03, -1.4201e-03,\n -1.5782e-03, 4.1189e-04, 5.7127e-04, 4.0234e-03, 9.0068e-05,\n -3.1263e-03, 8.1591e-04, 1.6104e-03, -1.2286e-03, 3.9313e-04,\n 1.5166e-03, -4.2407e-03, 5.4970e-03, -8.6086e-04, 1.4035e-03,\n 1.6473e-03, -1.4528e-03, -1.2074e-03, -2.6919e-03, 3.5437e-03,\n 5.5252e-03, 1.1348e-03, -1.0780e-03, -4.8202e-04, 7.3003e-04,\n -4.4295e-03, 4.8883e-04, 2.4423e-03, 4.3824e-03, 2.7424e-03,\n -1.3995e-03, 3.9310e-03, -6.9689e-04, 1.2608e-03, -4.9229e-03,\n -4.4741e-04, 2.1044e-03, -2.1957e-03, 2.1230e-03, -1.8988e-03,\n -1.0193e-03, 6.5515e-04, 3.2797e-03, 2.8146e-03, 2.7059e-03,\n -2.8812e-03, 1.3665e-03, -2.4552e-03, 3.2136e-03, -5.8814e-04,\n -5.2502e-03, 3.1479e-03, -8.1437e-04, 2.9589e-03, -3.2859e-03,\n 1.3519e-03, 2.4060e-03, -2.2047e-03, 1.9354e-03, -2.4126e-03,\n -9.7110e-04, 1.3599e-03, -5.3001e-03, -1.0159e-03, -1.6077e-04,\n -1.7083e-03, 3.3028e-03, -8.3022e-04, 4.9776e-03, -3.3761e-04,\n 2.2941e-03, 1.8788e-03, -7.8100e-04, 3.2007e-03, 1.2217e-03,\n 5.2424e-04, -6.4373e-05, -6.1439e-04, -2.7753e-03, 4.9249e-03,\n -2.9407e-04, 4.9853e-03, 2.0746e-04, -3.1863e-04, 1.1353e-03,\n 2.8091e-04, -1.4641e-03, 7.4903e-04, -3.0541e-03, -1.6581e-04,\n 5.6052e-45, -1.7783e-03, 8.4872e-04, -2.0866e-03, -2.0469e-03,\n 1.4684e-03, -8.4836e-04, -6.0307e-03, -2.7740e-03, 9.5375e-04,\n 1.0728e-03, -3.7240e-03, -2.3736e-03, 4.4435e-04, -8.5016e-04,\n -2.1711e-03, -3.4538e-04, 1.1102e-03, 2.3476e-03, 2.5777e-03,\n -1.6695e-04, -8.1422e-04, 1.4443e-03, -8.8846e-05, -4.5043e-04,\n 2.5622e-03, 2.5779e-03, 3.5473e-04, -6.9471e-04, -1.9283e-03,\n 5.2829e-03, -3.9996e-03, -1.8147e-03, -1.9545e-03, 6.5435e-05,\n -3.6777e-03, -1.1546e-03, 2.1745e-04, -2.8743e-03, -2.8699e-03,\n -3.2061e-03, -3.1855e-03, 1.1345e-03, -2.0892e-03, -2.5790e-03,\n -1.0556e-03, 2.0828e-03, -8.7022e-04, -9.6309e-04, 3.9508e-04,\n -2.0394e-03, -4.4556e-04, 1.9703e-03, -6.2184e-03, 4.1692e-04,\n -2.6931e-03, -1.2035e-03, 2.7629e-03, 2.4077e-03, 1.6545e-03,\n 4.5016e-04, -4.6031e-03, 1.2937e-03, 2.5505e-03, 2.8668e-03,\n -1.4081e-03, -1.0094e-03, -2.6134e-03, 2.3079e-04, -4.6385e-04,\n -2.0837e-03, 7.2006e-05, 2.5522e-03, 7.3583e-04, -1.2809e-03,\n 3.9570e-03, 1.2861e-03, 3.4730e-04, 1.0228e-03, -1.4135e-04,\n 1.8332e-03, 2.8918e-03, 8.3717e-04, 5.3763e-03, 2.5829e-03,\n -2.2430e-03, -1.9978e-04, -4.1596e-03, -2.2217e-03, -1.8505e-03,\n 2.6270e-04, 2.4674e-03, 1.3075e-03, 4.8762e-03, 1.7366e-03,\n -2.3887e-03, -8.7320e-04, -2.8373e-03, 5.6052e-45, -2.5562e-03,\n -2.2960e-03, -2.0838e-03, 1.3430e-03, -8.2534e-03, -4.1162e-03,\n 5.6052e-45, -1.3834e-03, -2.5375e-03, 1.5765e-03, 3.0625e-03,\n -2.7101e-04, -1.0648e-03, 5.9041e-03, -1.1249e-03, 1.4901e-03,\n -1.6423e-03, -4.1269e-03, 1.1387e-03, 4.3750e-03, 2.4157e-03,\n 7.4619e-04, -1.8672e-03, 6.7478e-03, -1.8169e-03, 4.1135e-04,\n -5.2512e-03, 1.5688e-04, -4.8559e-04, -3.3454e-04, 2.3253e-04,\n 3.0261e-03, 3.6480e-03, 8.5329e-05, -4.9043e-04, 2.1303e-03,\n 4.4436e-03, -1.4250e-03, -8.2785e-04, -1.9278e-03, 1.9101e-03,\n -7.9261e-03, -2.3358e-03, -1.0480e-03, -3.2782e-03, -1.0322e-03,\n -5.8902e-04, 1.7745e-04, 1.6598e-03, 2.4342e-03, -9.4046e-04,\n 2.2420e-03, 1.6116e-03, -1.6253e-03, -4.6738e-04, -4.6604e-04,\n -2.7357e-03, -1.3722e-03, -3.0602e-03, -1.6071e-03, 5.5343e-03,\n 7.7591e-04, 6.0489e-03, -5.5862e-04, -2.6750e-03, -2.3573e-03,\n 2.5619e-03, 1.0446e-03, 3.3763e-03, -5.9085e-04, -9.9668e-05,\n -2.6292e-03, -2.9041e-04, -1.7974e-03, -1.1073e-03, -1.6743e-03,\n 3.9334e-04, 7.5936e-04, -2.8909e-03, -4.3737e-04, 5.4307e-03,\n -1.9262e-04, 5.6052e-45, 3.6470e-03, 2.2420e-03, -4.0790e-03,\n 1.3551e-03, 4.1557e-03, -3.1829e-03, -5.6093e-03, 1.2332e-03,\n -7.4608e-04, 1.4587e-03, -3.6455e-03, 4.1160e-03, -1.2696e-05,\n -1.2116e-03, 8.6034e-04, 1.5580e-03, 1.9281e-03, 1.9906e-04,\n -5.0905e-04, 4.6784e-04, -3.9072e-03, -3.1836e-03, 7.2371e-04,\n 1.0740e-03, -1.7979e-04, 1.2356e-03, 1.0614e-03, -6.9764e-04,\n 3.8429e-03, -8.2964e-05, -3.5188e-03, 3.3972e-03, -3.3785e-04,\n 1.7693e-04, 1.6713e-03, -4.3198e-03, -2.8298e-03, -1.1019e-04,\n 1.2355e-03, 2.1965e-03, -1.0825e-03, 7.5386e-03, 4.0051e-03,\n -9.2732e-04, 4.5783e-03, -1.0131e-02, -2.3425e-03, -1.1934e-04,\n -4.4398e-04, 1.0778e-03, -8.6026e-04, 5.9838e-03, 4.0759e-03,\n 1.4047e-04, 2.0907e-04, 3.5586e-03, 8.5612e-04, 1.7151e-03,\n 4.8944e-03, -2.7225e-03, 1.3257e-03, -3.8746e-03, 7.3946e-04,\n -2.7321e-03, -2.0201e-03, 3.1209e-03, -2.6261e-03, 2.9186e-03,\n -1.6500e-03, -1.7772e-03, 1.8437e-03, 7.9544e-04, 2.4970e-03,\n -2.0139e-03, 3.2267e-03, 2.0073e-03, -3.8101e-03, -3.6501e-03,\n -2.4028e-04, 3.3831e-03, 2.2648e-03, 2.7405e-03, 4.5067e-03,\n 2.0703e-03, -8.1311e-04, -1.7403e-03, 3.5808e-03, 1.9969e-04,\n 2.6830e-03, -3.3711e-03, 1.5773e-03, 3.3146e-03, -1.5794e-03,\n 1.5516e-03, 9.1557e-04, 3.4845e-03, 2.2805e-03, 2.8849e-03,\n -3.6246e-03, 6.6412e-04, 2.3678e-03, 2.3242e-03, -3.2997e-03,\n -2.4895e-03, 1.9849e-03, -3.8141e-04, -1.8463e-03, 3.7688e-03,\n 3.7272e-03, 8.3952e-04, 3.0438e-03, -2.0169e-03, -6.5363e-04,\n -1.2259e-03, 4.3743e-04, 4.2691e-03, -7.0541e-04, -2.0336e-03,\n -1.0687e-03, -4.5970e-04, -1.8963e-03, 1.1797e-03, 1.8163e-03,\n -6.2940e-04, 2.2735e-03, 6.9200e-04, -2.5795e-03, -7.6817e-04,\n -3.5508e-04, 6.1781e-03, -1.7976e-03, 3.8765e-04, 2.6190e-03,\n 7.2999e-04, -4.6608e-04, 2.9396e-03, 2.1914e-04, 2.0620e-03,\n 5.5144e-04, 1.5873e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([8.4526e-05, 9.4968e-05, 1.1246e-04, 1.7074e-04, 7.8907e-05, 1.8562e-04,\n 1.4154e-04, 8.9039e-05, 8.1945e-05, 6.2410e-05, 1.4909e-04, 9.6598e-05,\n 8.4483e-05, 1.5609e-04, 1.6470e-04, 1.4487e-04, 8.5185e-05, 2.2216e-04,\n 2.8329e-05, 1.1270e-04, 8.4821e-05, 1.2602e-04, 1.3069e-04, 1.0353e-04,\n 8.9726e-05, 6.3094e-05, 1.8391e-04, 1.0506e-04, 9.1274e-05, 8.1586e-05,\n 1.2672e-04, 1.9996e-04, 6.0112e-05, 2.2832e-04, 1.0189e-04, 9.6113e-05,\n 8.6392e-05, 1.4125e-04, 1.5654e-04, 1.6540e-04, 1.1875e-04, 2.8105e-04,\n 8.9243e-05, 9.1852e-05, 1.9139e-04, 1.8893e-04, 1.4134e-04, 6.7404e-05,\n 9.2886e-05, 9.0435e-05, 7.3663e-05, 1.9223e-04, 2.7151e-04, 1.0690e-04,\n 1.6762e-04, 8.4969e-05, 1.0404e-04, 5.9445e-05, 1.2014e-04, 1.1260e-04,\n 8.7679e-05, 8.0868e-05, 8.6205e-05, 1.5578e-04, 2.4353e-04, 2.0151e-04,\n 1.6810e-04, 1.7369e-04, 9.8318e-05, 1.2807e-04, 9.7104e-05, 1.0738e-04,\n 9.5275e-05, 3.1136e-04, 1.1529e-04, 7.2335e-05, 2.8086e-04, 1.5123e-04,\n 1.0119e-04, 1.2932e-04, 9.4381e-05, 5.3877e-05, 8.0580e-05, 1.9800e-04,\n 1.2408e-04, 1.5956e-04, 1.7116e-04, 4.9617e-05, 1.7172e-04, 1.1823e-04,\n 1.1229e-04, 1.2878e-04, 3.4074e-04, 2.7761e-04, 8.9412e-05, 1.2039e-04,\n 9.9980e-05, 1.7934e-04, 1.2924e-04, 1.2308e-04, 1.1973e-04, 9.6882e-05,\n 7.3844e-05, 1.2062e-04, 6.1400e-05, 8.0367e-05, 9.0754e-05, 5.1516e-05,\n 1.0126e-04, 5.8364e-05, 2.6865e-04, 4.2554e-05, 4.1349e-04, 7.1785e-05,\n 8.2460e-05, 4.9938e-05, 1.3851e-04, 2.0803e-04, 9.5981e-05, 1.3075e-04,\n 8.8668e-05, 9.5146e-05, 1.5750e-04, 1.6462e-04, 1.4224e-04, 8.8923e-05,\n 8.0677e-05, 1.6826e-04, 1.6163e-04, 1.1647e-04, 1.2168e-04, 2.2326e-04,\n 7.2240e-05, 7.1551e-05, 1.9339e-04, 1.1940e-04, 8.0811e-05, 8.0731e-05,\n 2.9704e-04, 9.4285e-05, 7.9087e-05, 7.6410e-05, 1.2247e-04, 3.1184e-04,\n 1.3450e-04, 1.5390e-04, 9.1750e-05, 7.1261e-05, 2.6665e-04, 2.2611e-04,\n 1.0259e-04, 1.6759e-04, 9.2322e-05, 1.2807e-04, 1.1054e-04, 1.2181e-04,\n 1.0429e-04, 8.8117e-05, 2.4222e-04, 9.5842e-05, 1.1816e-04, 9.9075e-05,\n 2.1370e-04, 6.9452e-05, 1.2502e-04, 1.5935e-04, 2.5986e-04, 7.9765e-05,\n 1.2756e-04, 4.7654e-04, 1.6598e-04, 1.6754e-04, 6.3986e-05, 7.8483e-05,\n 9.8474e-05, 4.5026e-05, 6.8159e-05, 8.6956e-05, 7.9337e-05, 2.0365e-04,\n 6.9249e-05, 1.3776e-04, 2.6128e-04, 9.4608e-05, 3.6307e-05, 1.9238e-04,\n 1.4955e-04, 1.1333e-04, 1.8328e-04, 1.0595e-04, 4.6941e-08, 7.2921e-05,\n 6.3288e-05, 4.8467e-05, 1.1511e-04, 1.1804e-04, 4.9474e-05, 7.4816e-05,\n 7.4368e-05, 7.7989e-05, 8.6042e-05, 1.2439e-04, 1.1728e-04, 7.5357e-05,\n 7.3759e-05, 3.1661e-04, 8.3804e-05, 8.3737e-05, 7.6134e-05, 9.0163e-05,\n 2.1835e-04, 9.9631e-05, 1.1898e-04, 8.5587e-05, 4.2777e-05, 2.3773e-04,\n 8.4178e-05, 5.9977e-05, 1.3406e-04, 1.5014e-04, 1.0799e-04, 2.5214e-04,\n 7.0063e-05, 7.8776e-05, 6.9800e-05, 9.0508e-05, 1.0073e-04, 6.1838e-05,\n 1.7145e-04, 1.4894e-04, 2.1687e-04, 1.3024e-04, 1.9582e-04, 1.4525e-04,\n 9.6612e-05, 1.4915e-04, 1.1410e-04, 1.0707e-04, 1.0332e-04, 9.5010e-05,\n 7.1159e-05, 8.1217e-05, 1.0112e-04, 1.9574e-04, 4.0412e-05, 1.5587e-04,\n 1.3742e-04, 1.6723e-04, 5.7296e-05, 1.0253e-04, 1.9168e-04, 9.4558e-05,\n 7.8007e-05, 8.5043e-05, 6.7840e-05, 1.1090e-04, 5.5687e-05, 1.2751e-04,\n 1.3473e-04, 1.8749e-04, 1.7250e-04, 5.7877e-05, 9.0110e-05, 5.9375e-05,\n 1.3892e-04, 1.7490e-04, 3.2453e-04, 9.1160e-05, 7.0178e-05, 6.1118e-05,\n 6.8023e-05, 1.3700e-04, 6.7800e-05, 1.4852e-04, 6.9708e-05, 1.4108e-04,\n 1.9984e-04, 5.6832e-05, 1.2261e-04, 1.1611e-04, 8.0792e-05, 8.1197e-05,\n 1.3033e-04, 8.7025e-05, 1.1119e-04, 1.0298e-04, 1.5504e-04, 1.4261e-04,\n 5.6402e-09, 1.8286e-04, 1.2734e-04, 7.0210e-05, 1.6038e-04, 1.2515e-04,\n 1.6223e-04, 2.9697e-08, 9.5148e-05, 1.9240e-04, 8.1936e-05, 2.6838e-04,\n 1.2434e-04, 1.5324e-04, 1.0736e-04, 8.3778e-05, 1.2368e-04, 1.1190e-04,\n 1.2272e-04, 1.0020e-04, 1.9783e-04, 7.8247e-05, 9.6772e-05, 1.9567e-04,\n 2.4026e-04, 1.1143e-04, 9.6709e-05, 1.0585e-04, 7.8437e-05, 1.2565e-04,\n 2.9574e-05, 7.4239e-05, 1.2680e-04, 2.5739e-04, 4.8173e-05, 1.3025e-04,\n 1.3792e-04, 2.0185e-04, 8.9446e-05, 9.7352e-05, 7.7508e-05, 9.6571e-05,\n 1.4400e-04, 1.0685e-04, 5.7434e-05, 4.7493e-05, 2.1712e-04, 1.4194e-04,\n 3.8614e-05, 4.0363e-05, 1.0800e-04, 2.2588e-04, 6.4061e-05, 9.3524e-05,\n 1.2355e-04, 9.2450e-05, 1.4561e-04, 1.8225e-04, 7.0141e-05, 5.3814e-05,\n 1.0906e-04, 1.1936e-04, 5.2350e-05, 3.2865e-04, 8.2265e-05, 7.6953e-05,\n 3.4470e-04, 1.7038e-04, 6.7790e-05, 1.8183e-04, 7.4215e-05, 6.3299e-05,\n 2.1811e-04, 7.0269e-05, 1.3248e-04, 5.1856e-05, 8.6612e-05, 1.2126e-04,\n 6.3729e-05, 1.0199e-04, 1.8063e-04, 8.3362e-05, 1.2707e-04, 6.7584e-09,\n 9.5126e-05, 4.8935e-05, 6.8092e-05, 7.2251e-05, 2.2689e-04, 8.2944e-05,\n 1.3524e-04, 7.5918e-05, 2.0147e-04, 1.7539e-04, 1.6049e-04, 1.4293e-04,\n 1.0030e-04, 2.1333e-04, 1.0243e-04, 4.1092e-05, 1.3971e-04, 5.0064e-05,\n 1.0491e-04, 4.7243e-05, 1.0970e-04, 1.8989e-04, 1.3897e-04, 1.2485e-04,\n 6.3488e-05, 2.0232e-04, 6.5636e-05, 1.7737e-04, 6.4209e-05, 1.1270e-04,\n 8.6702e-05, 6.0120e-05, 1.2829e-04, 4.3944e-05, 4.5820e-05, 1.4196e-04,\n 1.5715e-04, 9.4127e-05, 6.7665e-05, 1.2729e-04, 9.2558e-05, 2.1453e-04,\n 7.5185e-05, 1.2207e-04, 1.6441e-04, 1.7494e-04, 2.3256e-04, 7.3616e-05,\n 8.2722e-05, 7.1556e-05, 1.9205e-04, 1.4512e-04, 9.2028e-05, 1.0342e-04,\n 8.5436e-05, 1.4824e-04, 1.1809e-04, 7.4067e-05, 1.0118e-04, 2.0760e-04,\n 1.2322e-04, 1.5317e-04, 1.3657e-04, 1.6585e-04, 2.5061e-04, 1.6177e-04,\n 1.4598e-04, 1.4948e-04, 1.9004e-04, 1.9357e-04, 1.3926e-04, 1.7507e-04,\n 8.3482e-05, 1.2622e-04, 1.2728e-04, 1.0041e-04, 1.6275e-04, 1.4754e-04,\n 6.9349e-05, 1.4090e-04, 1.0951e-04, 2.9828e-04, 2.9030e-04, 8.2622e-05,\n 1.2628e-04, 5.5209e-05, 8.4126e-05, 1.1350e-04, 1.4139e-04, 8.9254e-05,\n 9.5837e-05, 1.4251e-04, 6.2942e-05, 5.1139e-05, 1.5833e-04, 1.1514e-04,\n 1.0094e-04, 1.6230e-04, 1.6003e-04, 6.2776e-05, 7.6806e-05, 1.2830e-04,\n 1.7430e-04, 7.5935e-05, 1.2215e-04, 1.8977e-04, 9.5249e-05, 1.2666e-04,\n 1.4260e-04, 1.2463e-04, 2.1811e-04, 9.1969e-05, 1.6584e-04, 5.9845e-05,\n 3.5894e-05, 1.1771e-04, 1.5046e-04, 6.8187e-05, 1.5759e-04, 7.9617e-05,\n 2.9375e-04, 8.4793e-05, 8.1968e-05, 9.6651e-05, 2.3801e-04, 1.0257e-04,\n 1.3704e-04, 7.7867e-05, 1.4846e-04, 1.0880e-04, 1.1172e-04, 4.7839e-05,\n 1.8285e-04, 1.2452e-04, 1.2852e-04, 1.1938e-04, 1.0536e-04, 1.2419e-04,\n 9.0202e-05, 1.2162e-04], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(3754.)",
|
| 27 |
+
"exp_avg": "tensor([[ 2.7610e-06, -3.0991e-06, 6.3974e-06, ..., -1.9299e-05,\n -1.2429e-05, -6.9119e-06],\n [-4.2825e-06, -8.2667e-06, 1.5510e-05, ..., -3.7479e-05,\n 1.5474e-05, 1.0309e-05],\n [-9.6398e-06, 1.6488e-05, -1.5959e-06, ..., 2.8412e-05,\n 2.2724e-05, 1.1339e-05],\n ...,\n [-1.7461e-05, 1.1830e-05, 1.6869e-05, ..., -9.9018e-06,\n -2.3360e-05, -2.5972e-05],\n [-1.4841e-07, 1.1891e-05, 6.0819e-06, ..., 1.1481e-05,\n -1.7926e-05, -6.4788e-06],\n [-1.5181e-05, 3.0772e-05, -3.9366e-06, ..., -6.6838e-07,\n -5.9380e-05, 3.9363e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[1.1171e-08, 1.5896e-08, 6.3454e-09, ..., 1.1861e-08, 5.3828e-09,\n 1.1674e-08],\n [1.4818e-08, 1.0515e-08, 2.4266e-08, ..., 1.2844e-08, 1.6427e-08,\n 1.9707e-08],\n [1.6949e-08, 1.5330e-08, 1.9968e-08, ..., 1.2787e-08, 1.4563e-08,\n 1.8445e-08],\n ...,\n [2.0124e-08, 2.7654e-08, 1.7124e-08, ..., 1.7096e-08, 2.3663e-08,\n 2.1295e-08],\n [1.4818e-08, 3.4242e-08, 2.2169e-08, ..., 2.0589e-08, 1.2672e-08,\n 1.9314e-08],\n [3.3833e-08, 1.8542e-08, 2.0268e-08, ..., 3.3469e-08, 1.4236e-08,\n 1.9808e-08]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(3754.)",
|
| 32 |
+
"exp_avg": "tensor([[ 1.2277e-05, -1.4826e-05, 3.3425e-06, ..., 3.1183e-06,\n -1.6021e-05, 4.2359e-07],\n [ 6.8809e-07, -1.7505e-05, 6.7236e-06, ..., -7.5324e-06,\n 2.1106e-05, 5.6284e-07],\n [-1.3361e-05, 3.1436e-07, -1.6124e-06, ..., 6.6658e-06,\n 8.3651e-06, 6.6790e-06],\n ...,\n [-1.6621e-05, -1.4623e-05, 1.4233e-05, ..., -1.5308e-06,\n -6.1490e-05, -4.1109e-05],\n [ 9.0778e-06, -4.2508e-06, 2.9068e-05, ..., -2.5405e-06,\n 6.9023e-06, -7.1386e-07],\n [-7.0550e-06, -5.1197e-06, 6.6825e-07, ..., -1.9130e-05,\n -1.5163e-05, 2.3716e-06]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[6.3317e-09, 6.4573e-09, 4.3752e-09, ..., 6.5068e-09, 2.6730e-09,\n 6.8512e-09],\n [1.4284e-08, 1.7319e-08, 8.1033e-09, ..., 9.1079e-09, 1.2504e-08,\n 1.2213e-08],\n [9.4481e-09, 7.3479e-09, 1.0830e-08, ..., 9.0653e-09, 9.4131e-09,\n 1.2229e-08],\n ...,\n [1.2009e-08, 6.0517e-09, 1.7581e-08, ..., 6.2767e-09, 2.1376e-08,\n 1.0515e-08],\n [1.3590e-08, 1.2427e-08, 2.2941e-08, ..., 1.3772e-08, 4.7376e-09,\n 9.3126e-09],\n [7.9292e-09, 1.9435e-08, 9.6371e-09, ..., 7.3311e-09, 4.7287e-09,\n 1.4261e-08]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(3754.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0008, 0.0008], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([5.7445e-06, 5.7445e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.00975530705321762,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.00975530705321762,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.00975530705321762,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.004877665762479736,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 1,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 1,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.00975530705321762,
|
| 149 |
+
0.00975530705321762,
|
| 150 |
+
0.00975530705321762,
|
| 151 |
+
0.004877665762479736
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 63.041333333333334,
|
| 156 |
+
"best_epoch": 0,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 62.11666666666667,
|
| 159 |
+
"512": 62.967333333333336
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1
|
| 164 |
+
],
|
| 165 |
+
"train_loss": [
|
| 166 |
+
5.311051666323785
|
| 167 |
+
],
|
| 168 |
+
"train_acc": [
|
| 169 |
+
54.91727464101089
|
| 170 |
+
],
|
| 171 |
+
"val_acc": [
|
| 172 |
+
63.041333333333334
|
| 173 |
+
],
|
| 174 |
+
"scale_accs": {
|
| 175 |
+
"256": [
|
| 176 |
+
62.11666666666667
|
| 177 |
+
],
|
| 178 |
+
"512": [
|
| 179 |
+
62.967333333333336
|
| 180 |
+
]
|
| 181 |
+
},
|
| 182 |
+
"lr": [
|
| 183 |
+
0.00975530705321762
|
| 184 |
+
]
|
| 185 |
+
}
|
| 186 |
+
},
|
| 187 |
+
"train_config": {
|
| 188 |
+
"name": "david_training",
|
| 189 |
+
"run_id": "20251012_231445",
|
| 190 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 191 |
+
"model_variant": [
|
| 192 |
+
"clip_vit_b16",
|
| 193 |
+
"clip_vit_laion_b32",
|
| 194 |
+
"clip_vit_b32"
|
| 195 |
+
],
|
| 196 |
+
"num_classes": 1000,
|
| 197 |
+
"preset": "small_fast",
|
| 198 |
+
"custom_config_path": null,
|
| 199 |
+
"num_classes_override": null,
|
| 200 |
+
"use_belly_override": null,
|
| 201 |
+
"belly_expand_override": null,
|
| 202 |
+
"progressive_training_override": true,
|
| 203 |
+
"scale_warmup_epochs_override": {
|
| 204 |
+
"256": 0,
|
| 205 |
+
"512": 0
|
| 206 |
+
},
|
| 207 |
+
"num_epochs": 10,
|
| 208 |
+
"batch_size": 1024,
|
| 209 |
+
"learning_rate": 0.01,
|
| 210 |
+
"weight_decay": 1e-05,
|
| 211 |
+
"warmup_epochs": 3,
|
| 212 |
+
"use_rose_loss": true,
|
| 213 |
+
"rose_initial_weight": 0.2,
|
| 214 |
+
"rose_max_weight": 0.6,
|
| 215 |
+
"rose_weight_schedule": "adaptive",
|
| 216 |
+
"use_cayley_loss": false,
|
| 217 |
+
"cayley_weight": 0.01,
|
| 218 |
+
"scale_loss_balance": null,
|
| 219 |
+
"use_mixed_precision": false,
|
| 220 |
+
"gradient_clip": 5.0,
|
| 221 |
+
"scheduler_type": "cosine_restarts",
|
| 222 |
+
"min_lr": 1e-06,
|
| 223 |
+
"freeze_strategy": "never",
|
| 224 |
+
"freeze_threshold": 90.0,
|
| 225 |
+
"unfreeze_on_plateau": true,
|
| 226 |
+
"patience": 10,
|
| 227 |
+
"track_gradients": true,
|
| 228 |
+
"gradient_scale_threshold": 1e-05,
|
| 229 |
+
"gradient_scale_multiplier": 10.0,
|
| 230 |
+
"log_interval": 50,
|
| 231 |
+
"val_interval": 1,
|
| 232 |
+
"save_interval": 5,
|
| 233 |
+
"log_fusion_weights": true,
|
| 234 |
+
"log_loss_components": true,
|
| 235 |
+
"save_format": "safetensors",
|
| 236 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 237 |
+
"upload_to_hub": true,
|
| 238 |
+
"base_dir": "./david_training",
|
| 239 |
+
"num_workers": 10,
|
| 240 |
+
"pin_memory": true,
|
| 241 |
+
"prefetch_factor": 4,
|
| 242 |
+
"persistent_workers": true
|
| 243 |
+
}
|
| 244 |
+
}
|