Update best_model_acc64.17_metadata.json - Run 20251012_231445
Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc64.17_metadata.json
ADDED
|
@@ -0,0 +1,251 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 1,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(7508.)",
|
| 7 |
+
"exp_avg": "tensor([[-7.1688e-05, -4.7786e-05, 7.0711e-05, ..., 3.8309e-05,\n 7.5121e-05, -4.0671e-05],\n [ 5.4131e-05, -1.4784e-05, -8.7094e-05, ..., 1.2703e-04,\n -7.8899e-05, 4.8001e-05],\n [ 3.2606e-05, 3.1010e-05, 5.1503e-05, ..., 1.0095e-04,\n 2.0523e-05, -1.8714e-05],\n ...,\n [ 3.8394e-05, 1.2549e-04, 1.3070e-06, ..., -3.1328e-06,\n -2.9469e-05, -9.6065e-06],\n [-9.8062e-05, 3.8962e-04, 6.2708e-05, ..., 2.6928e-05,\n 1.2245e-04, -1.1132e-04],\n [-1.0667e-04, -1.4823e-07, 2.9675e-05, ..., 1.1026e-04,\n 4.4891e-05, 2.5805e-05]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[2.9297e-08, 7.4951e-08, 4.3808e-08, ..., 4.7854e-08, 3.7865e-08,\n 1.8867e-08],\n [1.1543e-07, 1.9111e-07, 7.9278e-08, ..., 7.0996e-08, 9.2427e-08,\n 3.7357e-08],\n [2.4715e-07, 3.2266e-07, 1.8295e-07, ..., 1.4543e-07, 6.6435e-08,\n 1.2030e-07],\n ...,\n [4.1648e-08, 1.6622e-07, 3.7174e-08, ..., 2.9813e-08, 2.1489e-08,\n 1.7902e-08],\n [5.9380e-08, 7.4123e-07, 7.3112e-08, ..., 1.5661e-07, 3.7447e-08,\n 5.4455e-08],\n [1.3549e-07, 2.0205e-07, 6.1573e-08, ..., 2.2370e-07, 5.4969e-08,\n 6.0029e-08]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(7508.)",
|
| 12 |
+
"exp_avg": "tensor([-6.3950e-04, 2.8239e-03, 2.5466e-03, -2.0240e-03, -3.4597e-04,\n 1.0518e-03, 2.5952e-03, 4.1588e-04, -9.9868e-04, 1.2644e-03,\n 1.9178e-03, 3.0522e-04, 1.0971e-03, 2.2692e-03, 1.9848e-03,\n 3.1955e-03, 2.5345e-03, -2.5218e-03, -1.1562e-03, 7.7872e-04,\n 1.2674e-03, -6.9693e-03, -5.5044e-04, -3.9938e-03, 1.7554e-03,\n -8.3403e-04, -2.4381e-03, -1.4236e-03, -1.0275e-03, -2.5308e-03,\n -5.3019e-04, -4.1895e-04, 1.0302e-03, 8.4982e-03, 2.5866e-03,\n -9.1308e-04, 9.4125e-04, -9.9469e-04, -5.0495e-04, -1.1851e-03,\n 2.0542e-05, 1.5726e-03, 4.9952e-03, 1.4271e-03, -1.0964e-03,\n 2.2574e-03, 2.1183e-03, -3.4459e-03, -2.2534e-03, 9.8475e-04,\n -1.3288e-03, -2.0437e-03, 4.2133e-03, 1.8581e-04, -2.1505e-03,\n 1.2147e-06, -3.1481e-03, 1.4813e-03, -4.4420e-03, -7.4932e-04,\n -3.8289e-05, -1.8562e-04, 2.5198e-04, -1.2576e-03, -1.3415e-03,\n -7.4842e-04, -1.6772e-04, -1.2223e-04, 7.6657e-04, 4.9337e-04,\n -9.4572e-04, 3.8271e-03, -1.2421e-03, -2.0611e-03, -7.0015e-04,\n -2.1440e-04, -1.1427e-03, 6.9820e-04, 2.0004e-03, 8.9130e-04,\n -6.1107e-04, 2.9927e-04, -1.8534e-03, -5.6728e-03, -2.8846e-03,\n -2.5339e-04, 3.1258e-03, -1.5899e-03, -2.4849e-03, 2.9084e-04,\n 2.4871e-03, -1.3931e-03, 3.5594e-03, 3.3680e-04, -9.7455e-04,\n -2.2025e-03, 1.1995e-03, 1.2322e-03, 2.2897e-03, -6.1314e-04,\n -8.9649e-04, 3.8585e-04, 6.2811e-04, -2.8575e-03, -1.6565e-03,\n 1.9131e-03, 2.4518e-04, -1.2361e-03, 3.1904e-03, -1.4807e-03,\n -2.3673e-03, 6.2881e-04, -1.5105e-03, -2.1472e-03, 1.8426e-03,\n 1.0631e-03, 3.8578e-04, 1.6001e-03, -9.2722e-04, 7.2584e-04,\n -1.7007e-04, 1.6404e-03, 4.5675e-03, -1.4132e-03, 1.5143e-03,\n 1.9506e-04, 6.2788e-04, 3.2843e-03, 4.3841e-04, -2.4172e-05,\n 1.9103e-04, 4.7428e-03, 1.7411e-03, 6.1956e-04, 2.8773e-03,\n -2.8431e-04, -1.1726e-03, 1.4547e-03, -1.1445e-03, 1.2729e-03,\n 3.6050e-03, -2.4063e-03, 8.7809e-04, 3.3346e-03, -3.1013e-03,\n 1.0865e-03, -9.2568e-04, 2.3289e-03, -2.7384e-03, -2.4082e-04,\n -4.2748e-05, 3.2298e-03, -1.6415e-03, -2.0240e-03, 4.9859e-05,\n -7.7042e-04, -2.2329e-04, -2.2734e-03, -6.9446e-03, -2.7060e-03,\n -1.2269e-03, 4.2716e-04, -2.7252e-03, 3.0065e-03, 2.4778e-03,\n -1.9062e-03, -1.1563e-03, -1.1820e-03, 3.8148e-03, -2.2931e-03,\n 1.8053e-03, 8.4496e-04, -1.0810e-03, -2.6714e-03, -4.5335e-04,\n 2.0077e-03, 8.6047e-04, 1.2356e-03, 3.8504e-03, 5.4237e-04,\n 2.6156e-04, 2.7798e-03, 2.9324e-03, -7.4610e-04, -8.7613e-04,\n -6.5070e-03, 2.6212e-03, 6.9550e-04, 1.5705e-03, -3.8470e-03,\n -6.3511e-04, -1.5883e-03, -4.7812e-03, -4.5336e-04, -5.7333e-04,\n -1.5179e-03, -1.3630e-03, -4.8082e-06, 3.4747e-03, 6.8888e-04,\n 2.3172e-03, -3.0447e-03, -1.0883e-03, 8.9313e-04, -2.5540e-04,\n -3.9339e-03, 3.2732e-03, 2.2664e-03, -1.4457e-03, 3.7363e-03,\n -1.6019e-04, 9.2800e-04, -8.4248e-04, 3.9889e-04, -4.4407e-03,\n -5.0770e-03, 1.6698e-03, 2.2762e-03, 4.9873e-03, -5.1460e-03,\n -4.6428e-06, 1.4057e-03, -2.9699e-04, -2.0126e-03, 3.0502e-03,\n -3.2547e-04, 3.2291e-03, -6.3639e-04, -2.5475e-03, 3.0769e-03,\n 3.0093e-03, -1.3944e-03, 2.1179e-03, 1.9317e-03, 7.0449e-04,\n -1.8693e-03, 9.3768e-04, 7.3287e-04, 5.1850e-04, 1.9385e-03,\n -1.2989e-03, 4.0274e-03, 2.3088e-04, 2.1443e-04, 7.3289e-04,\n 1.4056e-03, 3.7083e-03, -2.0819e-03, 1.8736e-03, 2.6386e-04,\n -9.2692e-04, 2.2004e-03, 1.0707e-04, -2.9268e-03, 1.0520e-03,\n -3.7012e-03, 1.4750e-04, 1.6878e-03, -3.3798e-03, 2.6367e-03,\n 1.9660e-03, -1.8188e-03, -7.3229e-04, 1.2437e-03, 2.2821e-03,\n 3.2798e-03, 5.3315e-03, 6.4443e-04, -9.1769e-04, 6.4554e-04,\n -6.3156e-05, -9.1509e-04, -3.7692e-04, 1.4386e-03, 1.6059e-03,\n -3.7542e-03, 1.5186e-03, -1.1140e-03, -4.0096e-03, -1.7266e-03,\n 9.8427e-04, 1.9816e-03, 3.1475e-03, 2.6980e-04, -2.7991e-04,\n 5.0791e-04, 5.7730e-06, 2.4713e-03, -2.8231e-04, 2.0824e-03,\n 2.0839e-03, -8.4814e-05, 8.2555e-05, 1.5397e-03, 9.9082e-04,\n -1.3008e-03, 4.4567e-04, 6.2813e-03, 1.5251e-03, -2.0627e-03,\n 4.4984e-05, -1.3637e-03, -3.2350e-03, -4.2698e-04, 3.7197e-04,\n 2.0334e-03, -2.1955e-03, -4.9526e-04, 2.6317e-04, -2.6572e-03,\n 2.0886e-03, -1.7885e-04, -2.4214e-04, 3.4571e-04, -1.3023e-03,\n 5.4575e-04, -6.0087e-04, -6.4804e-03, -1.4121e-03, 1.3413e-03,\n 1.1904e-04, 6.8676e-04, -1.1914e-03, -6.0426e-04, -1.8520e-03,\n -4.4205e-03, 2.7774e-03, 2.1329e-03, -5.1768e-04, -4.3118e-04,\n -2.0556e-03, -4.1444e-03, -8.6387e-04, 2.0691e-04, 2.9861e-03,\n 1.4557e-03, 1.5458e-03, -3.4890e-03, 2.5402e-03, -2.7554e-04,\n 6.7073e-04, 3.2912e-04, 1.1880e-03, -5.5560e-04, -8.2186e-04,\n -1.0537e-03, -8.0236e-04, -4.3435e-04, 8.7699e-04, -1.9533e-03,\n 7.8583e-05, 9.5235e-04, 7.0407e-05, 1.6368e-03, 3.2281e-04,\n -4.6724e-03, -3.0863e-03, 3.0731e-04, -2.3011e-03, -1.4133e-03,\n 1.6281e-03, -4.2087e-03, 2.3872e-04, 1.1984e-03, 1.6487e-04,\n 1.8345e-03, 1.7945e-04, 2.8276e-03, -2.6227e-03, -1.4168e-04,\n 1.2914e-04, -5.7308e-04, 5.2837e-04, 2.5075e-04, -3.3909e-03,\n -4.9053e-03, -3.6451e-03, -1.2652e-03, -1.5908e-03, 2.6821e-03,\n 1.8722e-03, 4.0784e-04, -1.7199e-04, -4.0198e-04, 6.7351e-04,\n 4.1669e-04, 7.1714e-04, -1.5535e-04, -1.1831e-03, -1.1686e-03,\n 1.9231e-03, -1.7296e-03, 2.6236e-03, 6.0802e-04, 1.9874e-04,\n 3.4142e-03, 6.4037e-04, 3.1006e-03, 2.1912e-03, 8.1297e-04,\n -1.6210e-03, -1.5145e-04, -1.1321e-03, 1.1233e-03, 8.7370e-03,\n -2.3595e-04, -5.8766e-04, 3.4537e-03, 3.9879e-03, -1.6207e-03,\n -3.9287e-03, 1.6280e-03, -1.1555e-03, 5.4331e-04, -5.9670e-04,\n -2.9650e-03, -3.2739e-03, -2.2195e-03, -6.4000e-03, 2.5389e-03,\n 5.5098e-03, -5.2781e-04, 2.2517e-03, -8.6515e-04, 1.4814e-03,\n 1.4228e-03, 3.5365e-03, 2.4920e-03, 3.4686e-03, -1.5156e-03,\n -4.4241e-05, -3.5168e-03, 2.1760e-03, 1.2831e-03, -2.6773e-03,\n 9.4788e-04, 1.2484e-03, 1.2680e-03, -3.9073e-03, -2.9715e-03,\n 9.0569e-04, 2.5212e-05, -8.7963e-04, -2.5082e-04, -6.9949e-04,\n 3.9075e-04, -1.5739e-03, -4.2084e-04, -7.2138e-04, 1.0292e-03,\n -4.3913e-05, -2.9256e-03, -2.5364e-03, -9.9094e-04, 2.5372e-03,\n -2.6183e-03, -9.3385e-05, -1.7683e-03, 2.3235e-04, 3.2995e-03,\n 8.4909e-04, -1.7897e-03, 7.0278e-04, -1.2043e-03, 7.1679e-04,\n -1.3046e-03, -2.5971e-03, -3.3778e-04, -1.3416e-03, -5.4581e-03,\n 2.2482e-03, 3.3215e-04, 9.7428e-04, 6.8748e-04, -8.2029e-04,\n -1.3990e-03, 8.0906e-04, -3.0686e-04, -4.7146e-04, -2.4574e-03,\n -3.4890e-03, 1.3586e-03, -1.7936e-03, -1.5284e-03, 2.5289e-03,\n 2.3081e-03, -1.5509e-03, 1.4251e-03, 3.5347e-03, 5.2434e-04,\n 4.3755e-03, -2.1578e-03, -3.3980e-03, 1.0392e-03, 3.2431e-03,\n 2.4939e-04, -1.3769e-03, -2.8522e-03, 1.7031e-03, 9.7516e-04,\n -2.8960e-03, 3.5150e-04, -2.0671e-03, -6.6207e-04, -3.4830e-03,\n -4.5600e-04, 1.0725e-03, -4.1516e-03, 1.6419e-03, 5.8248e-04,\n -4.6141e-03, -3.9163e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([2.9793e-05, 7.1129e-05, 1.1272e-04, 9.8080e-05, 6.0149e-05, 1.5192e-04,\n 1.4958e-04, 5.2104e-05, 7.8060e-05, 9.1937e-05, 1.1501e-04, 7.5867e-05,\n 4.9325e-05, 7.2272e-05, 1.8586e-04, 4.9015e-05, 9.9982e-05, 1.5913e-04,\n 3.5236e-05, 7.8583e-05, 6.2196e-05, 9.2304e-05, 1.3040e-04, 1.0277e-04,\n 4.4721e-05, 5.8595e-05, 1.4002e-04, 1.1759e-04, 9.2292e-05, 9.0444e-05,\n 8.1867e-05, 7.5616e-05, 3.9273e-05, 2.1553e-04, 1.3539e-04, 7.1465e-05,\n 6.2871e-05, 1.1292e-04, 1.1796e-04, 1.2471e-04, 6.2257e-05, 4.3960e-05,\n 9.6188e-05, 5.3854e-05, 8.8315e-05, 8.1057e-05, 6.6152e-05, 4.1506e-05,\n 4.8931e-05, 6.1398e-05, 7.5216e-05, 7.4368e-05, 1.7185e-04, 7.6786e-05,\n 1.1677e-04, 8.5898e-05, 7.7663e-05, 6.4809e-05, 8.5358e-05, 7.3526e-05,\n 4.0557e-05, 8.2392e-05, 4.6670e-05, 1.7642e-04, 1.3997e-04, 1.3907e-04,\n 9.6167e-05, 1.3603e-04, 6.0623e-05, 1.2452e-04, 6.4873e-05, 1.3726e-04,\n 1.0840e-04, 1.4876e-04, 8.6387e-05, 9.7959e-05, 1.5307e-04, 6.4192e-05,\n 7.4394e-05, 1.3134e-04, 7.8535e-05, 6.9540e-05, 5.7971e-05, 1.2286e-04,\n 1.0729e-04, 8.5242e-05, 1.6644e-04, 4.7969e-05, 8.9036e-05, 8.3464e-05,\n 8.1546e-05, 5.2638e-05, 1.1644e-04, 3.2766e-04, 1.0526e-04, 1.1579e-04,\n 1.0367e-04, 1.4577e-04, 1.0129e-04, 7.8200e-05, 6.9863e-05, 7.8359e-05,\n 5.8105e-05, 5.9960e-05, 6.0210e-05, 5.6675e-05, 8.1389e-05, 4.7188e-05,\n 9.5313e-05, 3.8635e-05, 1.8746e-04, 3.6906e-05, 1.1451e-04, 6.3842e-05,\n 9.1863e-05, 4.4504e-05, 6.3040e-05, 1.6159e-04, 8.7349e-05, 7.6218e-05,\n 8.2129e-05, 9.8686e-05, 1.5013e-04, 1.0568e-04, 1.0390e-04, 9.9059e-05,\n 6.3345e-05, 1.3313e-04, 1.2924e-04, 1.0375e-04, 1.0563e-04, 2.1427e-04,\n 1.0244e-04, 9.8254e-05, 1.7002e-04, 5.5039e-05, 4.5224e-05, 4.9659e-05,\n 1.2488e-04, 5.7444e-05, 7.4460e-05, 1.2877e-04, 9.7697e-05, 1.6989e-04,\n 8.1191e-05, 1.2121e-04, 6.8485e-05, 7.0214e-05, 2.1355e-04, 6.3903e-05,\n 7.5422e-05, 1.4158e-04, 1.1273e-04, 1.0897e-04, 6.9726e-05, 1.1955e-04,\n 4.9581e-05, 4.6449e-05, 1.1073e-04, 6.2959e-05, 1.2135e-04, 1.0434e-04,\n 9.9083e-05, 7.4581e-05, 1.0156e-04, 5.2537e-05, 1.1839e-04, 8.3124e-05,\n 1.3252e-04, 1.6251e-04, 5.7469e-05, 1.2431e-04, 7.0338e-05, 7.2973e-05,\n 5.6274e-05, 4.7706e-05, 8.1388e-05, 9.6447e-05, 7.9830e-05, 8.3329e-05,\n 9.1907e-05, 1.1067e-04, 2.4461e-04, 7.7096e-05, 3.6031e-05, 1.1070e-04,\n 1.2237e-04, 1.1300e-04, 1.3368e-04, 1.0697e-04, 8.2787e-06, 7.5318e-05,\n 3.8699e-05, 4.3571e-05, 6.6801e-05, 9.0988e-05, 5.1421e-05, 5.8867e-05,\n 8.0477e-05, 5.9772e-05, 1.0526e-04, 6.3604e-05, 8.1074e-05, 4.1917e-05,\n 5.2363e-05, 1.6133e-04, 7.9865e-05, 6.4500e-05, 7.0744e-05, 7.5919e-05,\n 8.1160e-05, 6.6215e-05, 7.4326e-05, 1.0289e-04, 8.9042e-05, 9.6007e-05,\n 9.3952e-05, 7.0109e-05, 8.7390e-05, 8.9840e-05, 9.4114e-05, 1.5390e-04,\n 5.6927e-05, 1.0056e-04, 6.8733e-05, 8.7869e-05, 7.4484e-05, 8.1577e-05,\n 1.0350e-04, 1.5173e-04, 1.1066e-04, 8.8370e-05, 7.9708e-05, 6.0096e-05,\n 6.2524e-05, 6.7462e-05, 3.9631e-05, 7.3002e-05, 9.8155e-05, 1.0244e-04,\n 5.3758e-05, 8.8906e-05, 4.9825e-05, 1.0237e-04, 2.7519e-05, 8.1512e-05,\n 1.5297e-04, 7.5363e-05, 7.1620e-05, 8.7702e-05, 1.3336e-04, 1.2787e-04,\n 4.9248e-05, 8.4026e-05, 9.0500e-05, 7.2547e-05, 5.8574e-05, 8.3347e-05,\n 8.0541e-05, 1.3688e-04, 6.9763e-05, 5.1034e-05, 8.2033e-05, 5.0216e-05,\n 1.0147e-04, 1.1467e-04, 8.7059e-05, 7.6366e-05, 5.2132e-05, 3.8576e-05,\n 7.0163e-05, 9.0227e-05, 6.3789e-05, 1.3210e-04, 5.0377e-05, 1.1120e-04,\n 1.4178e-04, 5.8297e-05, 1.0531e-04, 5.8670e-05, 7.0793e-05, 9.3117e-05,\n 8.0090e-05, 8.7991e-05, 8.7601e-05, 9.1076e-05, 1.2136e-04, 8.6188e-05,\n 6.5748e-06, 1.3877e-04, 1.1037e-04, 4.8427e-05, 1.1013e-04, 1.5995e-04,\n 1.0229e-04, 1.0132e-05, 7.9741e-05, 1.6877e-04, 8.1430e-05, 2.0680e-04,\n 1.1513e-04, 1.1545e-04, 8.9645e-05, 6.3805e-05, 1.1422e-04, 9.1466e-05,\n 5.1289e-05, 5.8356e-05, 1.0042e-04, 6.8906e-05, 1.3497e-04, 1.2686e-04,\n 1.6522e-04, 6.2045e-05, 8.9355e-05, 9.3940e-05, 8.5774e-05, 9.4936e-05,\n 4.2944e-05, 7.0943e-05, 1.4699e-04, 1.1233e-04, 6.7215e-05, 1.1309e-04,\n 1.2639e-04, 1.5920e-04, 7.0888e-05, 8.5204e-05, 4.0729e-05, 7.3749e-05,\n 8.5833e-05, 1.4536e-04, 6.3086e-05, 5.1756e-05, 1.6545e-04, 1.1003e-04,\n 2.8715e-05, 5.9747e-05, 8.0917e-05, 1.8634e-04, 3.7693e-05, 1.0355e-04,\n 1.1865e-04, 5.2749e-05, 9.4409e-05, 8.8099e-05, 6.9907e-05, 8.8722e-05,\n 9.7673e-05, 1.4172e-04, 4.8764e-05, 1.3872e-04, 5.2194e-05, 5.8800e-05,\n 1.6670e-04, 1.0815e-04, 8.5456e-05, 1.0120e-04, 6.1300e-05, 8.7846e-05,\n 1.3828e-04, 7.9928e-05, 5.8165e-05, 4.4629e-05, 7.4200e-05, 7.7981e-05,\n 4.5885e-05, 5.1466e-05, 1.4939e-04, 6.5272e-05, 8.7964e-05, 6.6868e-06,\n 5.6365e-05, 5.8686e-05, 8.7116e-05, 8.6260e-05, 1.8629e-04, 1.1583e-04,\n 1.0804e-04, 8.3739e-05, 1.4228e-04, 1.1483e-04, 7.2465e-05, 6.5417e-05,\n 8.4583e-05, 1.1100e-04, 7.3107e-05, 5.2659e-05, 7.5352e-05, 4.1841e-05,\n 4.8062e-05, 5.5621e-05, 6.0021e-05, 7.8856e-05, 8.3648e-05, 1.1824e-04,\n 7.3488e-05, 9.6768e-05, 5.9898e-05, 1.2480e-04, 6.8825e-05, 7.7295e-05,\n 7.5738e-05, 6.2505e-05, 7.9009e-05, 3.2197e-05, 3.8476e-05, 1.1074e-04,\n 1.0893e-04, 1.0761e-04, 6.9475e-05, 1.0316e-04, 1.1926e-04, 6.0836e-05,\n 9.6265e-05, 8.8090e-05, 8.5561e-05, 1.0300e-04, 1.0685e-04, 8.0972e-05,\n 7.6026e-05, 9.2212e-05, 8.2186e-05, 1.8595e-04, 6.8740e-05, 1.0420e-04,\n 5.9539e-05, 7.6991e-05, 1.0750e-04, 6.0409e-05, 1.0233e-04, 1.6092e-04,\n 1.0058e-04, 9.1214e-05, 8.9020e-05, 9.6137e-05, 9.4375e-05, 9.6717e-05,\n 1.0541e-04, 1.3265e-04, 3.7225e-05, 1.7545e-04, 1.0993e-04, 1.2971e-04,\n 5.5907e-05, 8.7923e-05, 1.0195e-04, 6.5811e-05, 1.1496e-04, 1.3815e-04,\n 8.9410e-05, 1.4125e-04, 6.9692e-05, 1.0768e-04, 2.9818e-04, 5.4294e-05,\n 9.9713e-05, 7.5581e-05, 7.8120e-05, 8.8658e-05, 1.1801e-04, 1.0600e-04,\n 8.2331e-05, 9.5936e-05, 4.1954e-05, 4.5244e-05, 7.9170e-05, 1.1652e-04,\n 6.9285e-05, 1.3654e-04, 1.2435e-04, 4.1816e-05, 5.3615e-05, 7.3944e-05,\n 1.3818e-04, 5.4304e-05, 8.1682e-05, 1.2380e-04, 7.3816e-05, 1.3465e-04,\n 1.3021e-04, 6.8347e-05, 3.8117e-05, 8.7257e-05, 1.4370e-04, 6.0621e-05,\n 3.8060e-05, 6.2529e-05, 1.2300e-04, 6.1011e-05, 7.4393e-05, 1.0439e-04,\n 2.4649e-04, 8.4213e-05, 9.6125e-05, 4.8064e-05, 1.2714e-04, 8.0562e-05,\n 8.9435e-05, 7.7483e-05, 8.7802e-05, 1.0123e-04, 8.4863e-05, 3.4485e-05,\n 1.2567e-04, 9.0284e-05, 1.2874e-04, 6.8714e-05, 1.3646e-04, 3.0542e-05,\n 7.1811e-05, 1.1293e-04], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(7508.)",
|
| 17 |
+
"exp_avg": "tensor([-1.3745e-03, 3.9575e-03, 2.9612e-03, -3.5732e-03, -5.6288e-04,\n 6.6000e-04, 3.0511e-03, -7.3772e-05, -1.4084e-03, 5.4788e-04,\n 3.0972e-03, 8.9126e-04, 1.2762e-03, 4.5427e-03, 2.2170e-03,\n 1.2177e-02, 3.5995e-03, -6.8017e-03, -1.0536e-03, 9.8957e-04,\n 2.5061e-03, -5.7269e-03, 1.3327e-04, -5.0996e-03, 4.0456e-03,\n -1.5009e-03, -3.8074e-03, -1.1125e-03, -1.1330e-03, -3.1948e-03,\n -1.3441e-03, 4.7304e-04, 1.9833e-03, 1.0889e-02, 2.3963e-03,\n -1.8069e-03, 1.4726e-03, -2.0101e-03, -1.2270e-03, -8.7568e-04,\n -2.2516e-04, 1.4524e-02, 6.0032e-03, 3.1180e-03, -2.5953e-03,\n 5.5801e-03, 5.6152e-03, -4.7284e-03, -6.4363e-03, 1.6369e-03,\n -2.3498e-03, -7.8328e-03, 4.7669e-03, 1.2944e-03, -1.4633e-03,\n 4.0496e-05, -4.9588e-03, 1.7609e-03, -7.3305e-03, -1.6781e-03,\n -3.0350e-04, -4.7240e-05, 1.8092e-03, -1.9640e-03, -2.8413e-03,\n -1.4075e-03, 9.0849e-04, 1.1030e-03, 1.0807e-03, 2.3594e-04,\n -8.3085e-04, 3.9752e-03, -2.0644e-03, -4.5605e-03, -2.0335e-03,\n -2.2097e-04, -1.2897e-03, 8.9736e-04, 2.7418e-03, 1.5998e-03,\n -1.0800e-03, 2.1007e-03, -2.8420e-03, -1.1080e-02, -4.5074e-03,\n -7.8640e-04, 3.3700e-03, -2.6834e-03, -3.1470e-03, 5.4386e-05,\n 4.0457e-03, -3.9886e-03, 6.7484e-03, 4.5385e-04, -1.7758e-03,\n -4.6396e-03, -8.1230e-04, 1.0268e-03, 3.5871e-03, -4.5932e-05,\n -2.4948e-03, 1.8799e-04, 8.5708e-04, -5.9589e-03, -2.3917e-03,\n 3.0817e-03, 2.8090e-04, -2.2533e-03, 3.2808e-03, -2.1841e-03,\n -1.4561e-03, 1.1015e-03, -3.6220e-03, -3.0754e-03, 2.5134e-03,\n 1.4287e-03, 1.6496e-03, 1.9408e-03, -3.6734e-04, 1.1651e-03,\n -1.1378e-03, 1.2991e-03, 6.0956e-03, -3.9754e-03, 2.9599e-03,\n -2.1453e-04, 6.6192e-04, 4.5539e-03, -1.9065e-05, -1.5698e-05,\n 5.5232e-04, 7.4666e-03, 2.4305e-03, 7.0172e-04, 5.5427e-03,\n -7.6074e-04, -1.4348e-03, 2.0351e-03, -2.0435e-03, 3.0816e-03,\n 5.3665e-03, -2.7117e-03, 2.6853e-03, 6.0333e-03, -8.2310e-03,\n 2.1660e-03, -1.3727e-03, 2.7364e-03, -1.3490e-03, -1.3649e-03,\n -9.8331e-04, 4.1342e-03, -1.1290e-03, -2.2551e-03, 1.2662e-03,\n 1.7611e-06, -6.2734e-04, -3.6048e-03, -1.3790e-02, -6.6668e-03,\n -1.2158e-03, -3.1558e-04, -8.3940e-03, 3.5470e-03, 4.1435e-03,\n -5.7544e-03, -2.3384e-03, -1.2802e-03, 5.3496e-03, -6.9659e-03,\n 6.0230e-03, 9.5941e-04, -1.5794e-03, -3.1081e-03, -1.6099e-03,\n 3.1885e-03, 1.4962e-03, 3.0210e-03, 6.1894e-03, -8.1370e-05,\n 1.5442e-04, 5.7202e-03, 2.5029e-03, -1.7369e-03, -7.1043e-04,\n -8.1552e-03, 4.7669e-03, 1.9156e-03, 1.1807e-03, -4.6865e-03,\n 5.6052e-45, -1.3995e-03, -9.4809e-03, -9.9756e-04, -9.9653e-04,\n -1.6672e-03, -2.5166e-03, 5.2553e-04, 4.3392e-03, 1.9494e-03,\n 2.6116e-03, -3.9867e-03, -3.8939e-03, 3.0628e-03, 5.4020e-04,\n -4.6596e-03, 6.2235e-03, 2.8811e-03, -1.3621e-03, 5.1837e-03,\n -2.2512e-04, 2.1060e-03, -9.7727e-04, 3.7206e-04, -2.9819e-03,\n -1.1523e-02, 9.0898e-04, 4.4894e-03, 9.5612e-03, -7.4520e-03,\n 4.9725e-04, 1.1145e-03, -1.0651e-03, -1.5937e-03, 3.6711e-03,\n -1.8973e-04, 6.3387e-03, -8.9241e-04, -5.9115e-03, 4.1221e-03,\n 4.7484e-03, -1.6023e-03, 6.1709e-03, 7.5893e-03, 1.4701e-03,\n -5.1824e-03, 1.8810e-03, 1.6923e-03, 1.5201e-03, 1.6875e-03,\n -3.0848e-03, 5.4491e-03, -6.3855e-05, 1.2351e-03, 1.1743e-03,\n 2.4642e-03, 4.7074e-03, -2.8556e-03, 3.5806e-03, 1.0536e-03,\n 4.5802e-04, 3.6734e-03, 4.2627e-06, -2.9711e-03, 1.0232e-03,\n -6.7090e-03, 4.8238e-04, 4.1660e-03, -5.7912e-03, 3.3107e-03,\n 5.3770e-03, -1.4847e-03, -3.7293e-04, 1.2484e-03, 4.5361e-03,\n 4.0005e-03, 1.8092e-02, 1.2195e-03, -1.2427e-03, 5.7334e-04,\n -1.4886e-03, -3.2225e-03, -1.3018e-03, -2.7202e-04, 4.3787e-03,\n -4.6912e-03, 2.0900e-03, -1.5161e-03, -4.7686e-03, -2.0052e-03,\n 1.2159e-03, 3.6972e-03, 2.9864e-03, 9.3612e-04, -1.0223e-03,\n 2.1261e-03, 1.9159e-03, 4.5868e-03, 5.6052e-45, 4.0523e-03,\n 3.0509e-03, -4.4399e-05, 3.4395e-04, 2.5836e-03, 8.1719e-04,\n 5.6052e-45, -7.0347e-05, 7.7945e-03, 2.4364e-03, -3.8649e-03,\n 9.3214e-04, -1.8082e-03, -5.6869e-03, -3.2340e-04, 1.5338e-03,\n 3.3149e-03, -6.1623e-03, -2.3751e-03, 1.2097e-03, -3.9368e-03,\n 1.8974e-03, -6.7806e-04, -1.8527e-03, 1.8813e-04, -2.3109e-04,\n -3.2739e-04, -1.2510e-03, -1.3831e-02, -7.6878e-04, 1.7322e-03,\n 4.2288e-04, 4.2199e-04, -1.1693e-03, 2.5011e-04, -3.9081e-03,\n -5.8838e-03, 4.1962e-03, 1.5822e-03, -1.7634e-03, 7.4793e-04,\n -2.7454e-03, -4.5052e-03, -1.0286e-03, 5.0659e-04, 5.2118e-03,\n 1.3245e-03, 4.4968e-03, -5.0084e-03, 5.7190e-03, 1.9970e-03,\n 1.1279e-03, 1.0361e-03, 2.4346e-03, -2.8616e-03, -1.4886e-03,\n -4.0472e-04, -1.5916e-03, -5.9818e-04, 2.3640e-03, -2.2467e-03,\n -3.6346e-04, 7.3425e-04, 8.0356e-04, 2.1310e-03, -9.2581e-04,\n -9.4645e-03, -4.8967e-03, -1.3588e-03, -4.5018e-03, -1.9186e-03,\n 5.1636e-04, -4.0741e-03, 6.8320e-04, 2.7224e-03, 3.6773e-04,\n 3.8487e-03, 1.7868e-03, 6.5120e-03, -5.4164e-03, -9.6510e-05,\n 1.2425e-04, 5.6052e-45, 1.6780e-03, 6.2322e-04, -4.1359e-03,\n -6.2223e-03, -5.1948e-03, -1.4516e-03, -2.9233e-03, 3.6182e-03,\n 2.2522e-03, -8.7818e-05, -4.2836e-04, 2.1496e-03, 3.5874e-05,\n 6.1739e-04, 6.4698e-05, -1.0447e-03, -1.6752e-03, -2.1155e-03,\n 4.0780e-03, -1.6586e-03, 4.7947e-03, 6.0226e-04, 9.7666e-04,\n 5.8420e-03, 7.8715e-04, 6.1406e-03, 4.0105e-03, 9.8981e-04,\n -2.1462e-03, 1.8330e-04, -1.6304e-03, 1.7555e-03, 1.9053e-02,\n -6.0381e-04, -1.2101e-03, 7.2629e-03, 6.7675e-03, -2.5932e-03,\n -6.3856e-03, 1.0747e-03, -5.3246e-04, 6.2111e-03, -1.5673e-03,\n -4.6031e-03, -6.7129e-03, -5.2892e-03, -1.0336e-02, 5.1901e-03,\n 1.0765e-02, -1.3983e-03, 5.7514e-03, -8.4786e-05, 3.1264e-03,\n 8.8268e-04, 6.2965e-03, 4.1016e-03, 5.2453e-03, -2.9163e-03,\n 1.0029e-04, -6.1351e-03, 2.7428e-03, 1.3904e-03, -7.0387e-03,\n 2.5254e-03, 4.1305e-03, 1.7671e-03, -7.9259e-03, -3.8700e-03,\n 1.1989e-03, -1.9531e-03, -6.1024e-04, 3.4733e-04, -1.4969e-03,\n 1.9258e-03, -2.9765e-03, -1.2184e-03, -2.1738e-03, 1.1281e-03,\n 8.7778e-04, -3.7974e-03, -5.6836e-03, -1.5539e-03, 3.8580e-03,\n -4.8385e-03, -6.8975e-04, -1.9625e-03, 6.8320e-04, 4.6165e-03,\n 1.0653e-03, -3.4893e-03, 2.4983e-03, -1.5693e-03, 7.7922e-04,\n -7.3550e-04, -5.5209e-03, -4.8941e-04, -2.5460e-03, -7.6358e-03,\n 4.6787e-03, 4.9408e-04, 2.0994e-03, 1.5123e-03, -6.8336e-04,\n -2.8951e-03, 1.1074e-03, -3.4417e-04, -1.6947e-03, -3.7598e-03,\n -6.9463e-03, 2.2626e-03, -1.1949e-02, -1.8725e-03, 4.2500e-03,\n 3.9203e-03, -1.8139e-03, 3.3018e-03, 5.4220e-03, 1.4792e-04,\n 1.0282e-02, -1.5612e-03, -5.1316e-03, 1.0343e-03, 5.5134e-03,\n 5.1086e-04, -1.8032e-03, -4.4526e-03, 2.3782e-03, 1.0703e-03,\n -6.8723e-03, 5.3489e-04, -3.7095e-03, -2.4438e-03, -8.0263e-03,\n -1.0808e-04, 5.9704e-04, -5.5757e-03, 8.4410e-04, 5.8805e-03,\n -6.5776e-03, -6.5442e-04], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([6.4392e-04, 1.6061e-04, 1.7688e-04, 2.5167e-04, 1.6629e-04, 2.0745e-04,\n 2.1170e-04, 2.0296e-04, 1.1336e-04, 1.4042e-04, 3.2910e-04, 2.2967e-04,\n 1.3590e-04, 2.3639e-04, 1.8637e-04, 6.8723e-04, 1.2771e-04, 4.6418e-04,\n 4.8564e-05, 2.0939e-04, 2.4164e-04, 1.9444e-04, 1.5906e-04, 1.2955e-04,\n 2.8082e-04, 1.2704e-04, 5.6666e-04, 1.0551e-04, 1.9160e-04, 1.0575e-04,\n 2.0932e-04, 2.2285e-04, 1.3328e-04, 4.1077e-04, 1.2870e-04, 1.6188e-04,\n 1.6265e-04, 2.1259e-04, 2.4418e-04, 3.2657e-04, 3.4068e-04, 2.1638e-03,\n 1.0912e-04, 2.8804e-04, 2.9597e-04, 4.6893e-04, 3.5487e-04, 1.2478e-04,\n 2.8398e-04, 2.1244e-04, 1.3997e-04, 9.3206e-04, 2.7564e-04, 2.1321e-04,\n 3.3866e-04, 1.4008e-04, 2.1767e-04, 7.7498e-05, 1.8556e-04, 1.9182e-04,\n 2.1606e-04, 1.1030e-04, 2.5485e-04, 1.8590e-04, 3.3047e-04, 2.4696e-04,\n 3.2929e-04, 2.6954e-04, 1.5280e-04, 1.8295e-04, 1.2944e-04, 1.3344e-04,\n 1.2925e-04, 6.1395e-04, 1.4114e-04, 1.2525e-04, 6.2422e-04, 5.2309e-04,\n 2.7791e-04, 1.8671e-04, 1.9258e-04, 9.2056e-05, 1.2825e-04, 4.9671e-04,\n 1.6340e-04, 3.6462e-04, 2.2669e-04, 7.5057e-05, 3.1257e-04, 3.6908e-04,\n 1.6536e-04, 8.6408e-04, 6.0514e-04, 2.4155e-04, 1.2607e-04, 2.9271e-04,\n 1.3528e-04, 2.6613e-04, 2.2051e-04, 2.8181e-04, 3.4347e-04, 1.5848e-04,\n 1.3197e-04, 3.0966e-04, 6.8993e-05, 2.0665e-04, 1.4942e-04, 1.3827e-04,\n 9.7298e-05, 1.2870e-04, 3.2429e-04, 1.0735e-04, 1.7317e-03, 2.0484e-04,\n 7.9116e-05, 6.7023e-05, 3.4931e-04, 2.3787e-04, 1.2733e-04, 2.7438e-04,\n 1.6843e-04, 8.3796e-05, 2.6118e-04, 4.3835e-04, 2.2544e-04, 1.9898e-04,\n 1.5120e-04, 2.5240e-04, 3.4484e-04, 1.6105e-04, 1.7780e-04, 3.7949e-04,\n 2.1745e-04, 1.2289e-04, 3.8334e-04, 5.7301e-04, 1.5612e-04, 9.5500e-05,\n 6.2062e-04, 1.6317e-04, 1.4582e-04, 5.8955e-05, 1.7648e-04, 7.4981e-04,\n 3.0869e-04, 2.1148e-04, 2.6401e-04, 9.7427e-05, 2.6623e-04, 1.1966e-03,\n 1.5952e-04, 2.0914e-04, 6.8267e-05, 1.0996e-04, 3.4267e-04, 4.3285e-04,\n 2.2685e-04, 1.5036e-04, 6.0719e-04, 2.8295e-04, 1.7382e-04, 2.6263e-04,\n 4.9623e-04, 1.2462e-04, 2.4486e-04, 2.7778e-04, 4.6882e-04, 1.0011e-04,\n 2.2156e-04, 9.8315e-04, 3.5009e-04, 2.3235e-04, 1.2047e-04, 1.3815e-04,\n 4.2552e-04, 1.3722e-04, 1.3185e-04, 1.3614e-04, 1.4255e-04, 4.6953e-04,\n 1.4723e-04, 3.1209e-04, 2.2445e-04, 2.1147e-04, 1.1548e-04, 1.9490e-04,\n 3.0114e-04, 2.0774e-04, 1.9755e-04, 1.7299e-04, 8.2584e-08, 1.4862e-04,\n 1.8715e-04, 9.3837e-05, 1.8170e-04, 1.5633e-04, 1.2026e-04, 1.1135e-04,\n 1.0128e-04, 1.7720e-04, 1.0559e-04, 2.0918e-04, 2.7078e-04, 3.2388e-04,\n 1.1761e-04, 2.3722e-04, 1.8905e-04, 1.3925e-04, 1.5406e-04, 1.5559e-04,\n 5.2207e-04, 2.2673e-04, 3.1032e-04, 1.2576e-04, 3.2421e-05, 4.9407e-04,\n 1.0050e-04, 1.3984e-04, 2.5486e-04, 2.5826e-04, 2.1747e-04, 3.9725e-04,\n 1.9189e-04, 1.4640e-04, 1.3041e-04, 1.2444e-04, 2.1127e-04, 7.8077e-05,\n 6.4594e-04, 1.6712e-04, 3.9499e-04, 1.9072e-04, 3.6542e-04, 5.3042e-04,\n 2.8640e-04, 4.1145e-04, 2.4758e-04, 2.5826e-04, 1.8936e-04, 1.9270e-04,\n 1.7718e-04, 1.5754e-04, 2.1981e-04, 3.1252e-04, 9.8626e-05, 3.3410e-04,\n 1.5987e-04, 1.6580e-04, 1.1863e-04, 2.0222e-04, 3.0648e-04, 1.8800e-04,\n 1.4378e-04, 9.8822e-05, 7.2708e-05, 2.0866e-04, 9.8850e-05, 2.0763e-04,\n 2.0231e-04, 3.9551e-04, 3.9505e-04, 1.1634e-04, 2.1396e-04, 9.7395e-05,\n 3.6707e-04, 4.7545e-04, 1.4618e-03, 1.9418e-04, 1.4078e-04, 8.7543e-05,\n 1.6918e-04, 2.1351e-04, 1.3390e-04, 2.2401e-04, 1.6262e-04, 2.4432e-04,\n 4.0880e-04, 7.4738e-05, 1.5371e-04, 1.3263e-04, 1.6940e-04, 1.5915e-04,\n 2.6339e-04, 1.5917e-04, 1.7904e-04, 2.3869e-04, 1.3742e-04, 2.1224e-04,\n 1.5828e-09, 2.5153e-04, 2.3600e-04, 1.3410e-04, 2.2937e-04, 1.6148e-04,\n 1.9833e-04, 1.5517e-08, 2.0898e-04, 1.9834e-04, 3.1196e-04, 3.8241e-04,\n 4.4333e-04, 3.1589e-04, 1.3102e-04, 1.3446e-04, 1.4946e-04, 2.7106e-04,\n 2.5833e-04, 3.8060e-04, 4.5031e-04, 1.3653e-04, 1.5056e-04, 2.0608e-04,\n 4.1260e-04, 5.3883e-05, 9.1376e-05, 2.1082e-04, 1.1239e-04, 2.6659e-04,\n 4.0352e-05, 1.6761e-04, 1.3810e-04, 4.9156e-04, 7.8433e-05, 2.2964e-04,\n 2.8387e-04, 4.1924e-04, 1.2346e-04, 1.7501e-04, 2.1277e-04, 2.7757e-04,\n 2.4865e-04, 1.1180e-04, 1.0486e-04, 1.0069e-04, 3.5952e-04, 1.6434e-04,\n 2.0591e-04, 7.5536e-05, 2.8160e-04, 3.7028e-04, 1.4838e-04, 1.3641e-04,\n 1.9630e-04, 4.3095e-04, 2.5721e-04, 3.4009e-04, 8.6754e-05, 8.1725e-05,\n 1.8070e-04, 2.4039e-04, 1.2630e-04, 8.2876e-04, 1.5278e-04, 1.1553e-04,\n 1.2666e-03, 3.0253e-04, 1.5892e-04, 3.2937e-04, 1.6200e-04, 9.2654e-05,\n 4.6625e-04, 7.4149e-05, 2.7899e-04, 1.8139e-04, 1.3272e-04, 1.7515e-04,\n 2.3600e-04, 1.8445e-04, 2.1888e-04, 1.2476e-04, 1.8182e-04, 3.2628e-09,\n 4.8592e-04, 9.1574e-05, 1.3228e-04, 1.5901e-04, 3.0118e-04, 1.1727e-04,\n 1.9826e-04, 1.6680e-04, 1.5934e-04, 3.4319e-04, 3.3156e-04, 3.6518e-04,\n 1.0823e-04, 3.7303e-04, 2.3901e-04, 1.3046e-04, 2.6909e-04, 1.1613e-04,\n 1.8630e-04, 6.9864e-05, 1.7309e-04, 3.5181e-04, 2.3946e-04, 2.9067e-04,\n 1.2755e-04, 3.8484e-04, 1.4930e-04, 2.5280e-04, 1.0949e-04, 2.9357e-04,\n 1.1550e-04, 1.3079e-04, 4.1300e-04, 7.6162e-05, 7.9868e-05, 2.8619e-04,\n 2.4967e-04, 2.0551e-04, 1.4335e-04, 1.5771e-04, 2.1893e-04, 1.2970e-03,\n 1.0913e-04, 2.4283e-04, 3.0232e-04, 2.6871e-04, 3.0934e-04, 1.9466e-04,\n 2.0621e-04, 7.3046e-05, 6.9705e-04, 1.5053e-04, 2.1511e-04, 1.4414e-04,\n 1.3761e-04, 2.4833e-04, 1.6438e-04, 1.2614e-04, 1.4610e-04, 2.5744e-04,\n 2.0939e-04, 2.3669e-04, 3.1166e-04, 5.9126e-04, 5.2846e-04, 3.2048e-04,\n 2.4548e-04, 2.5910e-04, 2.7408e-04, 3.3211e-04, 1.6087e-04, 1.5921e-04,\n 2.5797e-04, 2.0059e-04, 1.9579e-04, 2.4389e-04, 2.3119e-04, 1.8610e-04,\n 9.6271e-05, 1.9885e-04, 2.9656e-04, 5.3682e-04, 3.1450e-04, 1.1102e-04,\n 4.8719e-04, 1.1260e-04, 1.3154e-04, 2.2687e-04, 1.5902e-04, 1.7254e-04,\n 2.0863e-04, 3.5227e-04, 1.7951e-04, 8.4699e-05, 3.5984e-04, 2.4542e-04,\n 2.5958e-04, 2.4908e-04, 2.8151e-04, 9.2134e-05, 1.6024e-04, 2.5250e-04,\n 1.8896e-04, 1.7332e-04, 2.9080e-04, 4.3097e-04, 1.4306e-04, 1.7202e-04,\n 3.3890e-04, 2.5728e-04, 1.9794e-03, 1.8035e-04, 2.7308e-04, 7.5604e-05,\n 7.8666e-05, 2.5206e-04, 2.8085e-04, 2.1894e-04, 3.9598e-04, 8.0594e-05,\n 3.8256e-04, 1.2095e-04, 1.1596e-04, 1.6079e-04, 3.9165e-04, 2.3507e-04,\n 2.0439e-04, 1.3514e-04, 3.7895e-04, 1.4157e-04, 3.6875e-04, 1.9040e-04,\n 4.5268e-04, 2.4962e-04, 2.0549e-04, 2.0187e-04, 1.4532e-04, 1.8263e-03,\n 1.7987e-04, 1.6398e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(7508.)",
|
| 22 |
+
"exp_avg": "tensor([-8.6003e-04, 3.5411e-03, 2.4133e-03, -1.9745e-03, -2.9653e-04,\n 7.7292e-04, 2.8375e-03, 1.5105e-04, -7.7927e-04, 1.1339e-03,\n 1.5925e-03, 4.3149e-04, 1.1540e-03, 2.8373e-03, 1.8631e-03,\n 4.9444e-03, 2.4967e-03, -3.5689e-03, -1.0322e-03, 9.2444e-04,\n 1.9374e-03, -4.7530e-03, -1.6188e-05, -3.0096e-03, 2.5272e-03,\n -8.2643e-04, -2.1181e-03, -8.2939e-04, -6.6420e-04, -2.3978e-03,\n -6.1504e-04, -2.2088e-04, 1.1780e-03, 7.6672e-03, 2.1023e-03,\n -8.9867e-04, 1.0809e-03, -1.1073e-03, -5.3961e-04, -8.1173e-04,\n 4.8449e-04, 5.2075e-03, 4.1215e-03, 1.8275e-03, -9.0428e-04,\n 3.0627e-03, 2.4985e-03, -3.7047e-03, -2.9241e-03, 1.0274e-03,\n -1.4186e-03, -2.9596e-03, 5.6043e-03, 3.5407e-04, -2.4932e-03,\n -1.4177e-04, -2.5165e-03, 1.3592e-03, -4.2591e-03, -7.2813e-04,\n 1.3928e-04, 2.3082e-04, 6.3677e-04, -9.5985e-04, -1.8842e-03,\n -3.4779e-04, 1.6182e-04, 1.5949e-04, 1.1102e-03, 3.9518e-04,\n -7.3869e-04, 2.5480e-03, -1.0524e-03, -2.3211e-03, -9.0850e-04,\n -9.9515e-05, -1.0845e-03, 5.4774e-04, 2.7917e-03, 1.0863e-03,\n -1.0002e-03, 1.1382e-04, -1.6252e-03, -6.1535e-03, -2.8857e-03,\n -6.6516e-05, 2.7285e-03, -1.8610e-03, -2.9089e-03, 3.6746e-04,\n 2.5388e-03, -1.4440e-03, 4.9699e-03, 4.3852e-04, -8.0742e-04,\n -1.9344e-03, 6.3309e-04, 1.0155e-03, 2.6557e-03, -1.5064e-04,\n -1.2316e-03, 2.8706e-04, 9.8282e-04, -3.2035e-03, -1.2116e-03,\n 1.9362e-03, 6.2851e-04, -1.0865e-03, 2.8242e-03, -1.6332e-03,\n -9.1902e-04, 9.2764e-04, -1.9608e-03, -1.7878e-03, 1.7249e-03,\n 1.1977e-03, 1.0538e-03, 1.7854e-03, -6.9891e-04, 5.8862e-04,\n -8.3765e-04, 1.7269e-03, 3.8769e-03, -1.8329e-03, 1.6607e-03,\n 1.4524e-04, 4.7418e-04, 3.8413e-03, 1.3303e-04, -1.6378e-04,\n 3.8160e-04, 4.6202e-03, 1.6788e-03, 6.5655e-04, 3.2300e-03,\n -5.5393e-05, -9.1881e-04, 1.5279e-03, -8.5952e-04, 1.7443e-03,\n 3.8549e-03, -1.7496e-03, 1.3414e-03, 4.1355e-03, -3.8427e-03,\n 1.2805e-03, -7.6621e-04, 2.0046e-03, -1.9721e-03, -4.1759e-04,\n -1.7286e-04, 2.5675e-03, -1.2654e-03, -1.8360e-03, 5.2909e-04,\n -3.6341e-04, -2.4571e-04, -2.1916e-03, -7.9444e-03, -3.5326e-03,\n -1.3342e-03, 1.4100e-04, -3.9239e-03, 2.4750e-03, 2.6773e-03,\n -2.9993e-03, -1.8886e-03, -9.1224e-04, 3.4465e-03, -3.6170e-03,\n 2.9705e-03, 1.2104e-03, -9.1048e-04, -2.4262e-03, -3.7480e-04,\n 2.4559e-03, 1.1190e-03, 1.6652e-03, 4.0381e-03, 7.2564e-04,\n 4.8578e-04, 3.1362e-03, 2.4110e-03, -4.8879e-04, -4.9337e-04,\n -6.9581e-03, 3.0772e-03, 5.7591e-04, 1.3076e-03, -3.0171e-03,\n 5.6052e-45, -1.1225e-03, -5.3301e-03, -5.2185e-04, -9.0506e-04,\n -1.3879e-03, -1.1934e-03, 6.6678e-04, 2.7684e-03, 8.5377e-04,\n 1.8281e-03, -2.9491e-03, -9.6935e-04, 1.2966e-03, -9.3287e-05,\n -4.1107e-03, 3.4553e-03, 2.2053e-03, -6.3041e-04, 3.9115e-03,\n 4.3580e-05, 1.0421e-03, -7.0176e-04, 1.1736e-04, -2.8150e-03,\n -6.5524e-03, 1.0912e-03, 2.4786e-03, 7.5455e-03, -5.7530e-03,\n 2.8537e-04, 1.2451e-03, -2.5562e-04, -1.4074e-03, 2.6705e-03,\n -4.2031e-04, 3.7166e-03, -7.1595e-04, -3.3181e-03, 2.3880e-03,\n 3.5636e-03, -8.2537e-04, 3.5500e-03, 3.0329e-03, 1.2543e-03,\n -2.2314e-03, 1.3368e-03, 1.0044e-03, 5.8841e-04, 1.7283e-03,\n -1.3104e-03, 3.8890e-03, 9.1448e-05, 9.8843e-04, 7.3201e-04,\n 1.9438e-03, 3.6368e-03, -2.2155e-03, 1.9349e-03, 5.9589e-04,\n -1.7104e-04, 2.5938e-03, 2.8475e-04, -2.0754e-03, 1.0425e-03,\n -3.6684e-03, 2.7347e-04, 1.9706e-03, -3.8468e-03, 2.5455e-03,\n 2.6019e-03, -1.2743e-03, -1.5201e-04, 1.2586e-03, 2.1549e-03,\n 3.1967e-03, 8.2944e-03, 7.9749e-04, -9.4234e-04, 8.2516e-04,\n -5.2163e-04, -9.6984e-04, -6.0214e-04, 1.1002e-03, 2.0959e-03,\n -4.1089e-03, 1.6970e-03, -1.1255e-03, -3.5012e-03, -2.2491e-03,\n 9.1376e-04, 1.8468e-03, 3.0977e-03, 2.1041e-04, -3.1445e-04,\n 7.5291e-04, 7.9053e-04, 3.2216e-03, 5.6052e-45, 2.9218e-03,\n 2.0854e-03, 1.2447e-04, -1.5941e-05, 1.5689e-03, 1.1332e-03,\n 5.6052e-45, 4.1946e-04, 5.7413e-03, 1.3567e-03, -2.1125e-03,\n 3.7481e-04, -1.0426e-03, -3.3668e-03, -2.6660e-04, 1.0122e-03,\n 2.1578e-03, -2.9802e-03, -8.4469e-04, 8.5495e-04, -2.6215e-03,\n 1.6641e-03, -4.7349e-04, 3.7535e-04, 2.7739e-04, -9.4402e-04,\n 3.6461e-04, -5.7526e-04, -9.3438e-03, -8.4745e-04, 1.1488e-03,\n 3.3197e-04, 2.0590e-04, -1.0574e-03, -6.4726e-04, -1.8167e-03,\n -4.2726e-03, 2.8572e-03, 1.9809e-03, -9.3122e-04, 6.9437e-05,\n -2.1124e-03, -3.5934e-03, -3.5126e-04, 3.3538e-04, 2.9168e-03,\n 1.7734e-03, 1.7221e-03, -3.4743e-03, 2.8918e-03, 5.5340e-04,\n 1.1116e-03, 4.9373e-04, 1.3137e-03, -6.4143e-04, -7.2743e-04,\n -9.4011e-04, -8.4969e-04, -9.4806e-05, 1.1837e-03, -1.1652e-03,\n 5.0688e-05, 9.9657e-04, -7.8477e-05, 1.5256e-03, 9.3055e-05,\n -5.8676e-03, -2.4727e-03, 3.2354e-04, -2.2861e-03, -1.0891e-03,\n 9.8596e-04, -3.2176e-03, 3.3699e-04, 9.1242e-04, -1.0214e-04,\n 2.1085e-03, 3.3424e-04, 3.8388e-03, -4.1021e-03, -4.3199e-04,\n 8.0011e-05, 5.6052e-45, 8.3694e-04, 3.8156e-04, -2.5876e-03,\n -4.3533e-03, -3.6770e-03, -8.0231e-04, -1.2116e-03, 2.1565e-03,\n 1.3410e-03, 6.9447e-04, -6.8760e-05, 3.0785e-04, 5.9825e-04,\n 1.1640e-03, 5.6189e-04, -9.3096e-05, -1.3761e-03, -1.3037e-03,\n 2.6754e-03, -1.6120e-03, 3.2670e-03, 6.7936e-04, 4.7283e-04,\n 3.6581e-03, 5.1630e-04, 4.0033e-03, 2.2180e-03, 9.2958e-04,\n -1.8706e-03, 8.0642e-05, -8.5846e-04, 1.2067e-03, 1.0261e-02,\n -1.5249e-04, -5.9399e-04, 3.4405e-03, 4.4834e-03, -1.0593e-03,\n -3.7747e-03, 1.3568e-03, -6.6646e-04, 1.6686e-03, -5.8544e-04,\n -3.2001e-03, -3.7002e-03, -2.9380e-03, -8.3243e-03, 2.7737e-03,\n 5.6662e-03, -3.9775e-04, 3.0536e-03, -7.1727e-04, 1.8619e-03,\n 1.5579e-03, 4.0476e-03, 2.5070e-03, 3.7815e-03, -1.4202e-03,\n 1.1778e-04, -3.8669e-03, 2.0842e-03, 1.5034e-03, -3.0906e-03,\n 1.4722e-03, 1.7141e-03, 1.3484e-03, -4.3194e-03, -2.4889e-03,\n 1.5724e-03, -5.0437e-04, -6.8783e-04, 1.7271e-07, -7.1642e-04,\n 7.2223e-04, -1.1387e-03, -5.0196e-04, -9.9090e-04, 8.4262e-04,\n -1.4748e-04, -2.4147e-03, -2.7535e-03, -6.8441e-04, 2.9981e-03,\n -3.0645e-03, 2.5358e-04, -1.0643e-03, 3.4255e-04, 3.2072e-03,\n 1.1740e-03, -1.3245e-03, 7.5514e-04, -7.5197e-04, 9.6377e-04,\n -8.5812e-04, -2.9789e-03, -2.9463e-04, -1.6654e-03, -5.5644e-03,\n 2.7813e-03, 3.0560e-04, 1.4897e-03, 6.6015e-04, -1.0991e-03,\n -1.4017e-03, 9.4922e-04, -9.0859e-05, -5.3276e-04, -2.3116e-03,\n -3.5102e-03, 1.1826e-03, -3.1689e-03, -1.3322e-03, 2.8452e-03,\n 2.2078e-03, -8.7526e-04, 1.7830e-03, 3.5420e-03, 3.0954e-04,\n 5.6018e-03, -1.1569e-03, -2.8887e-03, 9.6424e-04, 3.0773e-03,\n 6.5082e-04, -1.4591e-03, -2.4097e-03, 1.7329e-03, 6.9053e-04,\n -3.3040e-03, 2.6072e-04, -1.8624e-03, -5.4302e-04, -3.6082e-03,\n 5.7326e-05, 7.1439e-04, -3.5626e-03, 1.1443e-03, 1.4654e-03,\n -4.1634e-03, -5.0379e-04], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([7.3763e-05, 8.2699e-05, 1.0170e-04, 1.1970e-04, 6.3520e-05, 1.3488e-04,\n 1.1635e-04, 6.4528e-05, 5.9936e-05, 6.0973e-05, 1.1854e-04, 9.0957e-05,\n 5.8567e-05, 1.1363e-04, 1.2656e-04, 1.0589e-04, 7.0502e-05, 1.8417e-04,\n 2.2944e-05, 9.7138e-05, 6.4593e-05, 8.6058e-05, 9.5206e-05, 8.8165e-05,\n 6.9717e-05, 5.2249e-05, 1.5519e-04, 7.8586e-05, 7.9564e-05, 5.9580e-05,\n 8.7972e-05, 1.0233e-04, 5.1426e-05, 1.8496e-04, 9.2213e-05, 6.7449e-05,\n 6.4826e-05, 1.0971e-04, 1.3259e-04, 1.1210e-04, 1.0253e-04, 2.2970e-04,\n 6.2240e-05, 6.5961e-05, 1.3224e-04, 1.3458e-04, 1.1349e-04, 4.5040e-05,\n 6.6741e-05, 6.5647e-05, 6.1867e-05, 1.3073e-04, 1.7788e-04, 8.0371e-05,\n 1.2397e-04, 6.4585e-05, 8.1263e-05, 4.8534e-05, 8.5921e-05, 7.3085e-05,\n 5.6790e-05, 5.5973e-05, 6.7249e-05, 1.1332e-04, 1.7314e-04, 1.4217e-04,\n 1.1654e-04, 1.3434e-04, 7.7451e-05, 9.2446e-05, 6.8903e-05, 7.9948e-05,\n 7.0969e-05, 2.2109e-04, 8.6839e-05, 5.7383e-05, 2.2947e-04, 9.7663e-05,\n 7.4618e-05, 9.3272e-05, 7.4601e-05, 4.5085e-05, 5.4432e-05, 1.4788e-04,\n 9.4883e-05, 1.1245e-04, 1.3700e-04, 4.0903e-05, 1.2578e-04, 9.0807e-05,\n 9.3736e-05, 1.0389e-04, 2.2944e-04, 2.0310e-04, 7.6415e-05, 8.8018e-05,\n 6.9192e-05, 1.2566e-04, 1.0624e-04, 8.7469e-05, 9.2383e-05, 7.4889e-05,\n 5.6741e-05, 1.0069e-04, 4.4557e-05, 6.9100e-05, 6.4269e-05, 4.3626e-05,\n 7.0491e-05, 5.2030e-05, 1.7922e-04, 4.5323e-05, 2.7008e-04, 5.5649e-05,\n 6.0630e-05, 3.3253e-05, 1.1129e-04, 1.6643e-04, 7.5288e-05, 1.0045e-04,\n 7.4033e-05, 5.8054e-05, 1.3459e-04, 1.3457e-04, 1.0321e-04, 7.7834e-05,\n 5.6709e-05, 1.1809e-04, 1.2711e-04, 9.7805e-05, 1.0260e-04, 1.8979e-04,\n 8.3116e-05, 5.5274e-05, 1.6166e-04, 9.1658e-05, 5.6112e-05, 5.4216e-05,\n 2.2784e-04, 7.2093e-05, 6.4624e-05, 5.6295e-05, 8.5010e-05, 2.4932e-04,\n 9.4928e-05, 1.2011e-04, 6.9330e-05, 5.0851e-05, 1.8218e-04, 1.7236e-04,\n 7.1535e-05, 1.1685e-04, 6.0838e-05, 9.3157e-05, 7.5850e-05, 1.0720e-04,\n 7.5408e-05, 6.0105e-05, 1.6757e-04, 7.8007e-05, 9.3020e-05, 7.7094e-05,\n 1.4287e-04, 5.9578e-05, 9.9023e-05, 9.9002e-05, 1.9544e-04, 5.6414e-05,\n 9.9341e-05, 2.9448e-04, 1.0699e-04, 1.4111e-04, 5.1896e-05, 6.6039e-05,\n 7.6976e-05, 5.3170e-05, 5.6869e-05, 6.7273e-05, 6.9760e-05, 1.5303e-04,\n 6.3251e-05, 1.1673e-04, 1.7707e-04, 6.9399e-05, 2.8954e-05, 1.1849e-04,\n 1.1826e-04, 8.0066e-05, 1.2240e-04, 7.6918e-05, 1.0975e-09, 6.6119e-05,\n 5.4040e-05, 3.4690e-05, 8.5053e-05, 8.0423e-05, 4.2581e-05, 5.0434e-05,\n 5.3843e-05, 6.3293e-05, 6.9047e-05, 8.9313e-05, 8.5783e-05, 5.7037e-05,\n 5.2985e-05, 1.9851e-04, 6.9355e-05, 6.1497e-05, 6.1276e-05, 7.3831e-05,\n 1.4130e-04, 7.8095e-05, 9.5315e-05, 7.6799e-05, 3.4004e-05, 1.6362e-04,\n 6.4519e-05, 4.7889e-05, 1.0274e-04, 1.1027e-04, 9.3683e-05, 1.7894e-04,\n 5.3952e-05, 6.5581e-05, 5.8199e-05, 7.6500e-05, 7.9849e-05, 4.7836e-05,\n 1.5080e-04, 1.1794e-04, 1.6512e-04, 9.0283e-05, 1.3239e-04, 9.5672e-05,\n 8.1850e-05, 1.2116e-04, 6.6361e-05, 8.5153e-05, 8.9979e-05, 8.5680e-05,\n 5.2082e-05, 6.7284e-05, 6.7007e-05, 1.2982e-04, 3.3209e-05, 1.1066e-04,\n 1.0372e-04, 9.2951e-05, 5.3790e-05, 7.9444e-05, 1.4233e-04, 9.8845e-05,\n 5.6631e-05, 6.1442e-05, 5.2256e-05, 9.0757e-05, 4.9847e-05, 9.8173e-05,\n 9.5824e-05, 1.5074e-04, 1.0533e-04, 5.3576e-05, 8.2670e-05, 4.3948e-05,\n 1.0613e-04, 1.3177e-04, 2.4743e-04, 6.7587e-05, 5.5100e-05, 4.2922e-05,\n 6.5187e-05, 1.0283e-04, 6.0832e-05, 1.2333e-04, 5.6247e-05, 1.1363e-04,\n 1.5689e-04, 4.9376e-05, 9.0024e-05, 8.5476e-05, 6.3472e-05, 7.1934e-05,\n 9.6718e-05, 7.2250e-05, 8.2911e-05, 9.7185e-05, 8.1754e-05, 1.1026e-04,\n 1.3199e-10, 1.5154e-04, 8.9887e-05, 5.4754e-05, 1.2605e-04, 1.0053e-04,\n 1.0505e-04, 6.9434e-10, 8.0422e-05, 1.5701e-04, 8.0862e-05, 2.1360e-04,\n 1.0276e-04, 1.0849e-04, 7.9092e-05, 5.3943e-05, 9.8070e-05, 9.9631e-05,\n 7.7796e-05, 8.3327e-05, 1.4691e-04, 6.6332e-05, 8.3843e-05, 1.3987e-04,\n 1.8493e-04, 4.9252e-05, 6.6583e-05, 9.3864e-05, 5.8501e-05, 9.7960e-05,\n 2.3276e-05, 6.1066e-05, 1.0406e-04, 1.5931e-04, 4.5405e-05, 1.0479e-04,\n 1.0825e-04, 1.9819e-04, 6.5068e-05, 8.0889e-05, 5.9977e-05, 7.7020e-05,\n 1.0998e-04, 9.5132e-05, 4.7093e-05, 3.8037e-05, 1.6289e-04, 1.0186e-04,\n 3.2884e-05, 4.0843e-05, 8.7115e-05, 1.8792e-04, 4.9867e-05, 6.4200e-05,\n 9.8602e-05, 7.7923e-05, 9.8478e-05, 1.3556e-04, 4.6691e-05, 5.3308e-05,\n 8.1267e-05, 9.8556e-05, 4.0970e-05, 2.1102e-04, 5.8164e-05, 6.1151e-05,\n 2.5564e-04, 1.1976e-04, 5.7240e-05, 1.3170e-04, 6.2435e-05, 4.9247e-05,\n 1.9566e-04, 4.9678e-05, 7.3142e-05, 4.8879e-05, 6.2714e-05, 8.7928e-05,\n 5.1852e-05, 7.6848e-05, 1.5083e-04, 6.2275e-05, 8.6464e-05, 1.5802e-10,\n 8.4491e-05, 3.4511e-05, 5.6803e-05, 6.5384e-05, 1.7862e-04, 7.1413e-05,\n 1.0673e-04, 6.0445e-05, 8.9544e-05, 1.2587e-04, 1.2410e-04, 1.1306e-04,\n 7.5528e-05, 1.6512e-04, 7.9373e-05, 4.1954e-05, 9.8784e-05, 4.3077e-05,\n 7.5609e-05, 3.8150e-05, 7.7077e-05, 1.3037e-04, 1.1148e-04, 1.0426e-04,\n 5.4452e-05, 1.3890e-04, 5.8617e-05, 1.1814e-04, 5.9166e-05, 8.7718e-05,\n 5.7094e-05, 5.5589e-05, 1.0795e-04, 2.6787e-05, 3.2913e-05, 1.0920e-04,\n 1.0995e-04, 8.4904e-05, 5.1923e-05, 9.3825e-05, 8.4348e-05, 1.7601e-04,\n 6.1067e-05, 7.3388e-05, 1.2330e-04, 1.2254e-04, 2.0470e-04, 6.4784e-05,\n 6.7124e-05, 5.0862e-05, 1.6833e-04, 1.1367e-04, 8.0222e-05, 8.2046e-05,\n 6.8883e-05, 1.0346e-04, 9.6034e-05, 5.5654e-05, 7.3800e-05, 1.4989e-04,\n 1.0402e-04, 1.0623e-04, 9.2099e-05, 1.3436e-04, 1.6985e-04, 1.2915e-04,\n 1.0686e-04, 1.2038e-04, 9.8543e-05, 1.5481e-04, 9.8265e-05, 1.1534e-04,\n 5.6011e-05, 8.7929e-05, 9.3049e-05, 6.8765e-05, 1.1508e-04, 1.1568e-04,\n 5.7410e-05, 1.1656e-04, 8.5102e-05, 1.9860e-04, 2.3213e-04, 6.0532e-05,\n 1.1108e-04, 4.3738e-05, 7.1727e-05, 8.5274e-05, 9.2222e-05, 7.4706e-05,\n 8.1668e-05, 1.0664e-04, 4.7629e-05, 4.3241e-05, 1.0199e-04, 8.9274e-05,\n 7.8799e-05, 1.2269e-04, 1.2477e-04, 4.6224e-05, 5.6239e-05, 8.8057e-05,\n 1.1621e-04, 6.4719e-05, 9.3450e-05, 1.5076e-04, 6.5739e-05, 1.0486e-04,\n 1.1107e-04, 9.5553e-05, 1.4342e-04, 7.4932e-05, 1.3323e-04, 4.6815e-05,\n 2.8932e-05, 8.8075e-05, 1.0545e-04, 5.7631e-05, 1.1331e-04, 4.8372e-05,\n 2.0207e-04, 6.1300e-05, 7.1251e-05, 6.5078e-05, 1.4785e-04, 8.3591e-05,\n 8.8532e-05, 6.2021e-05, 1.1431e-04, 8.5037e-05, 9.7043e-05, 4.4059e-05,\n 1.4502e-04, 8.9652e-05, 1.0380e-04, 7.0840e-05, 8.6917e-05, 1.0337e-04,\n 6.9631e-05, 9.3824e-05], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(7508.)",
|
| 27 |
+
"exp_avg": "tensor([[-4.1846e-06, 4.9860e-06, -1.3992e-05, ..., 6.5100e-06,\n -8.5815e-06, -1.6667e-05],\n [-1.4252e-06, 2.0124e-05, 4.0215e-06, ..., -1.8155e-05,\n 6.5681e-07, 8.2556e-06],\n [ 2.2303e-05, 9.7771e-06, -2.3295e-05, ..., 1.9560e-05,\n 2.5665e-05, 2.5339e-06],\n ...,\n [ 2.5463e-05, 1.8846e-05, 4.1772e-06, ..., 1.1109e-05,\n -6.5720e-05, -1.9681e-05],\n [ 6.8523e-06, -1.0548e-05, 3.0981e-06, ..., 1.1080e-05,\n 3.1087e-05, 4.0587e-06],\n [ 7.9782e-06, -6.7154e-06, 6.3290e-08, ..., -1.7903e-05,\n 7.0733e-06, -1.5689e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[2.4661e-09, 6.1902e-09, 1.8035e-09, ..., 1.7398e-09, 2.0681e-09,\n 4.5105e-09],\n [3.9861e-09, 3.7205e-09, 8.4015e-09, ..., 3.0236e-09, 6.0442e-09,\n 6.7968e-09],\n [3.9286e-09, 5.4220e-09, 6.6817e-09, ..., 2.9001e-09, 5.3161e-09,\n 6.9982e-09],\n ...,\n [5.7335e-09, 1.0033e-08, 5.8935e-09, ..., 2.1393e-09, 1.0282e-08,\n 8.1572e-09],\n [2.9456e-09, 1.2137e-08, 7.1871e-09, ..., 3.2631e-09, 4.6854e-09,\n 7.2113e-09],\n [9.4137e-09, 7.2130e-09, 6.0237e-09, ..., 3.1191e-09, 4.6962e-09,\n 7.8844e-09]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(7508.)",
|
| 32 |
+
"exp_avg": "tensor([[ 4.8274e-06, 9.6597e-06, -1.1387e-05, ..., 5.5125e-06,\n -2.6488e-06, -7.4846e-06],\n [-1.1600e-05, 6.7700e-06, -8.7018e-06, ..., -1.9049e-05,\n -4.1614e-06, -5.1973e-06],\n [ 8.1131e-06, 1.4135e-06, -1.7709e-05, ..., 9.1534e-06,\n 2.2766e-05, 9.2675e-06],\n ...,\n [-7.9178e-06, -8.7916e-06, -1.7359e-05, ..., -7.9485e-07,\n -2.5506e-05, -9.5410e-06],\n [ 2.8882e-06, -1.0656e-06, -2.4007e-06, ..., -2.8522e-05,\n 5.2818e-06, 9.7514e-06],\n [-2.1509e-05, -6.0701e-06, 6.6829e-06, ..., -2.4879e-05,\n -9.6000e-06, 3.9246e-06]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[1.3154e-09, 2.9263e-09, 1.4614e-09, ..., 7.8770e-10, 1.0038e-09,\n 2.9811e-09],\n [3.8434e-09, 7.2723e-09, 2.7210e-09, ..., 1.9425e-09, 5.3179e-09,\n 4.6627e-09],\n [2.9254e-09, 2.9633e-09, 3.7109e-09, ..., 2.1516e-09, 3.8091e-09,\n 5.5001e-09],\n ...,\n [3.4109e-09, 2.8117e-09, 6.3888e-09, ..., 1.2703e-09, 9.7005e-09,\n 4.5777e-09],\n [4.1349e-09, 5.6845e-09, 7.8539e-09, ..., 2.9701e-09, 1.9637e-09,\n 3.8217e-09],\n [2.1725e-09, 8.2607e-09, 3.4255e-09, ..., 1.7227e-09, 1.7276e-09,\n 5.2702e-09]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(7508.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0003, 0.0003], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([4.4702e-06, 4.4702e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.00904518046337755,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.00904518046337755,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.00904518046337755,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.004522637977440181,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 2,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 2,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.00904518046337755,
|
| 149 |
+
0.00904518046337755,
|
| 150 |
+
0.00904518046337755,
|
| 151 |
+
0.004522637977440181
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 64.17333333333333,
|
| 156 |
+
"best_epoch": 1,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 63.38733333333333,
|
| 159 |
+
"512": 64.19266666666667
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2
|
| 165 |
+
],
|
| 166 |
+
"train_loss": [
|
| 167 |
+
5.311051666323785,
|
| 168 |
+
4.462767010682684
|
| 169 |
+
],
|
| 170 |
+
"train_acc": [
|
| 171 |
+
54.91727464101089,
|
| 172 |
+
60.04988680892759
|
| 173 |
+
],
|
| 174 |
+
"val_acc": [
|
| 175 |
+
63.041333333333334,
|
| 176 |
+
64.17333333333333
|
| 177 |
+
],
|
| 178 |
+
"scale_accs": {
|
| 179 |
+
"256": [
|
| 180 |
+
62.11666666666667,
|
| 181 |
+
63.38733333333333
|
| 182 |
+
],
|
| 183 |
+
"512": [
|
| 184 |
+
62.967333333333336,
|
| 185 |
+
64.19266666666667
|
| 186 |
+
]
|
| 187 |
+
},
|
| 188 |
+
"lr": [
|
| 189 |
+
0.00975530705321762,
|
| 190 |
+
0.00904518046337755
|
| 191 |
+
]
|
| 192 |
+
}
|
| 193 |
+
},
|
| 194 |
+
"train_config": {
|
| 195 |
+
"name": "david_training",
|
| 196 |
+
"run_id": "20251012_231445",
|
| 197 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 198 |
+
"model_variant": [
|
| 199 |
+
"clip_vit_b16",
|
| 200 |
+
"clip_vit_laion_b32",
|
| 201 |
+
"clip_vit_b32"
|
| 202 |
+
],
|
| 203 |
+
"num_classes": 1000,
|
| 204 |
+
"preset": "small_fast",
|
| 205 |
+
"custom_config_path": null,
|
| 206 |
+
"num_classes_override": null,
|
| 207 |
+
"use_belly_override": null,
|
| 208 |
+
"belly_expand_override": null,
|
| 209 |
+
"progressive_training_override": true,
|
| 210 |
+
"scale_warmup_epochs_override": {
|
| 211 |
+
"256": 0,
|
| 212 |
+
"512": 0
|
| 213 |
+
},
|
| 214 |
+
"num_epochs": 10,
|
| 215 |
+
"batch_size": 1024,
|
| 216 |
+
"learning_rate": 0.01,
|
| 217 |
+
"weight_decay": 1e-05,
|
| 218 |
+
"warmup_epochs": 3,
|
| 219 |
+
"use_rose_loss": true,
|
| 220 |
+
"rose_initial_weight": 0.2,
|
| 221 |
+
"rose_max_weight": 0.6,
|
| 222 |
+
"rose_weight_schedule": "adaptive",
|
| 223 |
+
"use_cayley_loss": false,
|
| 224 |
+
"cayley_weight": 0.01,
|
| 225 |
+
"scale_loss_balance": null,
|
| 226 |
+
"use_mixed_precision": false,
|
| 227 |
+
"gradient_clip": 5.0,
|
| 228 |
+
"scheduler_type": "cosine_restarts",
|
| 229 |
+
"min_lr": 1e-06,
|
| 230 |
+
"freeze_strategy": "never",
|
| 231 |
+
"freeze_threshold": 90.0,
|
| 232 |
+
"unfreeze_on_plateau": true,
|
| 233 |
+
"patience": 10,
|
| 234 |
+
"track_gradients": true,
|
| 235 |
+
"gradient_scale_threshold": 1e-05,
|
| 236 |
+
"gradient_scale_multiplier": 10.0,
|
| 237 |
+
"log_interval": 50,
|
| 238 |
+
"val_interval": 1,
|
| 239 |
+
"save_interval": 5,
|
| 240 |
+
"log_fusion_weights": true,
|
| 241 |
+
"log_loss_components": true,
|
| 242 |
+
"save_format": "safetensors",
|
| 243 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 244 |
+
"upload_to_hub": true,
|
| 245 |
+
"base_dir": "./david_training",
|
| 246 |
+
"num_workers": 10,
|
| 247 |
+
"pin_memory": true,
|
| 248 |
+
"prefetch_factor": 4,
|
| 249 |
+
"persistent_workers": true
|
| 250 |
+
}
|
| 251 |
+
}
|