AbstractPhil commited on
Commit
2a9e46d
·
verified ·
1 Parent(s): 144eedf

Update best_model_acc65.36_metadata.json - Run 20251012_231445

Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc65.36_metadata.json ADDED
@@ -0,0 +1,265 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(15016.)",
7
+ "exp_avg": "tensor([[-2.6426e-05, 6.5358e-05, -8.5242e-06, ..., 2.0718e-05,\n 5.9025e-05, -8.4665e-06],\n [ 1.4521e-04, -2.5984e-04, -1.1355e-06, ..., 2.0086e-05,\n 4.0224e-05, 9.7844e-05],\n [ 7.4279e-05, 4.6700e-05, -5.4785e-05, ..., -8.2962e-05,\n -3.0775e-05, -6.3975e-05],\n ...,\n [-9.8967e-05, -4.7576e-05, 4.0865e-05, ..., -6.1476e-05,\n 1.4692e-05, -1.6173e-05],\n [ 2.8843e-05, -2.2011e-04, 1.6153e-05, ..., 2.8808e-05,\n 1.8750e-05, 2.7499e-05],\n [-6.7829e-05, -7.9524e-05, -1.0919e-05, ..., -8.8679e-05,\n -5.9165e-06, -5.2902e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[2.2140e-08, 5.3730e-08, 3.1195e-08, ..., 3.8133e-08, 2.7658e-08,\n 1.4060e-08],\n [7.0879e-08, 1.2098e-07, 4.9290e-08, ..., 4.2548e-08, 6.1702e-08,\n 2.3670e-08],\n [1.4997e-07, 1.7918e-07, 9.4298e-08, ..., 1.1922e-07, 4.4758e-08,\n 6.5953e-08],\n ...,\n [3.1142e-08, 1.2784e-07, 3.2312e-08, ..., 2.2353e-08, 1.6573e-08,\n 1.2738e-08],\n [3.4036e-08, 4.6071e-07, 4.3969e-08, ..., 9.5695e-08, 2.3129e-08,\n 2.9897e-08],\n [5.4561e-08, 8.1462e-08, 2.6461e-08, ..., 1.0241e-07, 2.5688e-08,\n 2.4697e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(15016.)",
12
+ "exp_avg": "tensor([-8.2621e-04, 3.8871e-03, -1.0143e-03, 5.3546e-04, -1.6335e-03,\n -4.1559e-03, 4.9453e-04, 2.2339e-05, 7.4485e-04, 1.8977e-03,\n -1.6329e-03, 7.3371e-04, -3.3497e-04, 2.9334e-03, 2.3149e-05,\n -1.1989e-04, 2.8457e-03, -6.7021e-03, -3.9347e-04, 1.5533e-03,\n 1.3523e-03, 1.4604e-04, 3.7721e-03, -2.6597e-03, -6.5898e-05,\n 2.6314e-04, 5.3633e-04, -1.7203e-03, -9.0886e-04, 1.8042e-04,\n 1.0639e-03, 1.0517e-03, 1.8396e-05, -1.9976e-03, 2.1024e-04,\n -1.6119e-03, -1.0748e-03, 2.2985e-03, -2.9075e-03, 3.8597e-04,\n -2.7622e-03, 3.5419e-04, -9.5602e-04, -6.5947e-04, -1.5772e-03,\n -6.3960e-04, 1.0204e-03, -8.1520e-04, -2.6796e-05, 1.5977e-04,\n 1.3011e-03, -1.2722e-03, 1.0824e-03, 1.0979e-03, -1.3045e-03,\n -1.3744e-03, -1.0405e-03, -2.0771e-03, -1.0218e-04, 5.5831e-04,\n -1.2254e-03, -6.6426e-04, 8.1934e-04, 3.5197e-03, 1.6646e-03,\n 9.0564e-04, 1.6414e-05, -6.0159e-04, -4.4342e-04, -3.2209e-03,\n -1.6995e-03, 8.2081e-04, 2.4346e-03, -1.3421e-03, 4.1070e-04,\n 1.1141e-03, 2.5147e-03, -4.2647e-04, -1.3793e-03, -3.1127e-04,\n 1.5430e-03, 9.4999e-04, 6.6026e-04, 1.2674e-03, 5.8343e-04,\n -3.4741e-03, 2.0347e-03, 9.6947e-04, 4.2092e-04, 2.2740e-03,\n -5.7371e-05, 1.3517e-03, 3.4349e-03, -1.5500e-03, 2.0301e-03,\n 9.3708e-04, -2.4377e-03, 1.0089e-03, 1.7100e-03, -1.6489e-04,\n 1.3164e-05, 1.2520e-03, 1.0835e-03, 2.2743e-03, 3.2888e-04,\n -1.5903e-03, -2.3544e-03, 2.0111e-03, -3.7285e-04, -3.3443e-03,\n -2.8103e-03, 2.0364e-03, 4.2548e-04, -5.0407e-03, -1.2749e-04,\n 2.5258e-04, -1.7049e-03, 1.8683e-03, 3.7306e-05, -1.0084e-03,\n 1.7632e-03, 2.3984e-03, -7.2422e-04, 1.6886e-03, 4.6595e-04,\n 6.3143e-04, -1.8089e-03, 3.2457e-03, 1.1944e-03, -3.0526e-03,\n -2.0207e-03, 6.2565e-04, -5.2000e-03, 1.5301e-03, 1.9449e-03,\n 5.7999e-04, -1.7723e-03, 5.7608e-04, -6.3638e-04, 4.6382e-05,\n -7.1112e-06, -5.2816e-03, 3.9068e-04, 1.4394e-03, 5.8731e-04,\n 1.2587e-03, 2.8311e-03, 3.0437e-04, 1.1522e-04, 2.3503e-03,\n 1.1776e-03, -5.7889e-04, -9.3746e-04, -1.9174e-03, 1.7252e-03,\n -1.7966e-03, 1.2897e-03, -8.8424e-05, 1.8126e-03, 6.4632e-05,\n -3.0238e-03, -1.8097e-04, -2.0486e-03, 9.7324e-05, 3.2794e-03,\n 2.1510e-03, -1.0116e-03, -7.1880e-04, -1.3606e-03, 3.6777e-03,\n 4.9174e-04, 1.9436e-03, 1.7946e-03, -7.2798e-04, -4.7731e-04,\n -5.2875e-04, 1.0192e-03, -2.9293e-04, -2.1666e-04, -1.2659e-03,\n 2.1870e-03, -8.7046e-05, -4.7281e-03, 5.7298e-04, 4.4607e-04,\n 4.2207e-04, 4.3718e-04, 3.0498e-03, 1.6277e-03, 1.8717e-03,\n -5.7908e-04, -2.8924e-04, 7.9994e-04, 1.1075e-03, 9.4409e-05,\n 8.2227e-04, 1.7696e-03, -5.1261e-04, 1.7512e-03, 8.7163e-04,\n -6.3388e-04, -9.5506e-04, 6.4060e-04, -1.1232e-03, 5.4075e-04,\n 2.9963e-03, 6.3466e-04, -4.9655e-05, 2.8639e-03, -1.5646e-04,\n 6.4097e-05, -6.5201e-04, -2.1194e-03, 1.2149e-03, 3.9113e-04,\n 3.0444e-03, 3.0463e-04, -8.5384e-04, -7.5138e-04, 1.5914e-03,\n -1.9243e-03, -2.0464e-03, 3.2532e-04, -2.1998e-03, -5.6722e-05,\n -1.6099e-03, -1.0763e-03, -1.0687e-03, -3.0899e-04, 3.1006e-03,\n 2.4634e-04, -2.1039e-04, 9.8053e-04, 3.7706e-03, 1.0010e-03,\n -1.0720e-03, 2.9036e-03, 1.7135e-03, -1.3123e-03, -7.0468e-04,\n 6.3109e-04, 4.4041e-04, -1.2252e-03, 3.3697e-04, -5.0897e-04,\n 9.8265e-04, 4.5375e-04, -1.8202e-03, 1.4339e-03, 2.0313e-03,\n -7.4937e-04, -9.6072e-04, -1.5985e-03, 1.7529e-03, -1.0793e-03,\n 3.0268e-04, 1.1810e-03, -5.4559e-05, 1.7833e-03, -3.5488e-04,\n -1.4856e-03, 1.1960e-03, -3.2185e-04, 1.1832e-03, -1.3187e-03,\n -5.8803e-04, -1.3059e-03, -1.5326e-03, -1.1879e-03, 1.8613e-04,\n 7.4507e-04, 1.5801e-03, -3.0611e-04, 1.3128e-04, -1.1779e-03,\n 8.8187e-04, 2.1413e-04, 4.8016e-04, -1.4755e-04, 8.3460e-04,\n 1.4744e-04, 2.2531e-03, -1.7230e-03, -1.5587e-03, 1.3243e-03,\n 2.9322e-03, -1.7245e-03, 1.1438e-03, -3.6279e-04, 2.2172e-03,\n 4.6644e-04, 4.4335e-04, -7.3337e-04, 1.0767e-03, 6.7741e-04,\n -6.1776e-05, -2.0276e-03, 6.9733e-04, -8.1157e-04, -2.2156e-03,\n -4.5008e-04, 5.9098e-04, -2.2889e-04, -1.0241e-03, 2.5948e-03,\n -9.0921e-04, -1.1621e-03, -1.0900e-03, -1.0408e-03, 1.0558e-03,\n 9.6534e-04, -4.3620e-03, 2.7264e-03, 1.7164e-03, -4.9265e-04,\n -1.3369e-03, -1.2264e-03, -1.4695e-03, 1.1820e-03, 2.2499e-03,\n 3.1979e-04, -1.2122e-03, -1.9190e-03, 9.7255e-05, 1.6300e-03,\n -3.7836e-04, -1.8528e-04, -1.5137e-03, -9.9197e-04, -4.0131e-04,\n 2.6141e-04, 1.6145e-03, 9.8357e-04, -4.4743e-04, -1.3037e-03,\n 2.3257e-03, 4.7882e-05, 2.1019e-03, -5.4020e-04, 6.0271e-04,\n -1.4119e-03, 1.2773e-03, 5.5148e-04, 5.0657e-04, -1.2004e-03,\n 6.2272e-04, 3.4211e-03, 1.5316e-03, -9.5764e-04, 1.4213e-03,\n -1.5390e-04, -1.9653e-03, -1.4458e-03, -4.2702e-04, 3.5199e-03,\n 7.0922e-04, 2.0246e-03, 1.7031e-03, -4.3670e-04, -9.9814e-04,\n 3.7440e-03, -4.2003e-04, 1.1244e-03, 4.5240e-05, -1.5180e-03,\n -7.7093e-04, -8.7621e-04, 1.7612e-04, -2.5859e-03, -9.7167e-04,\n 1.0013e-03, 1.9277e-04, -3.5660e-04, 1.3742e-03, -1.0455e-03,\n -7.9442e-04, -2.7098e-03, 1.5105e-03, -2.7352e-03, 8.9338e-04,\n -2.7570e-03, -4.8111e-04, -8.8532e-04, 2.0965e-03, 1.9863e-03,\n 6.3355e-04, -2.2156e-04, 1.6868e-03, 5.4901e-04, -3.4683e-03,\n 9.8863e-04, 1.8258e-04, -1.2340e-03, 6.0619e-04, -2.8586e-04,\n 8.2402e-04, 1.0839e-03, 1.4065e-03, -1.5060e-03, -1.8238e-03,\n -1.1662e-03, 1.1105e-05, -5.2706e-04, 6.2159e-04, -2.1046e-03,\n 4.9964e-04, 1.3864e-03, 2.5517e-04, -2.7332e-03, 1.7948e-04,\n -4.3292e-04, -1.1813e-03, 3.5352e-04, -1.4346e-03, 5.2274e-05,\n -1.3579e-03, 7.6732e-04, 2.1696e-03, -4.7727e-04, -8.4843e-04,\n -4.0168e-03, 9.4967e-04, -5.3188e-05, -2.8030e-03, -1.6853e-03,\n 1.3852e-05, 8.1313e-04, 2.1392e-04, 3.6333e-03, -1.2370e-03,\n 2.1502e-04, 7.2908e-05, -3.6343e-04, 6.9125e-04, -2.3120e-03,\n -1.4791e-05, 1.8803e-03, -1.0803e-03, -2.1939e-03, 1.9465e-03,\n 4.4758e-04, 2.3184e-03, -3.1599e-03, 2.3189e-03, 8.7711e-04,\n -6.5490e-04, 2.8074e-04, 6.8799e-04, -4.4972e-03, 3.6285e-04,\n 1.2209e-03, 1.5929e-03, -2.4335e-03, 1.1650e-03, 4.4735e-03,\n 2.5811e-03, -2.2045e-03, -9.5362e-05, 7.3133e-04, -3.5249e-03,\n -3.9454e-04, 1.2088e-03, 1.2589e-03, 1.2254e-03, 3.3265e-04,\n -4.9732e-04, -1.8292e-03, -1.1289e-03, 1.1521e-03, 3.3372e-03,\n -1.9418e-04, -2.1097e-04, -1.1358e-03, -3.5100e-05, -3.8606e-04,\n -2.7731e-04, -4.3714e-04, -1.7402e-03, 2.2119e-03, 7.8482e-04,\n 2.7003e-03, 1.2914e-03, 1.1837e-03, -1.2648e-03, -4.8686e-03,\n -1.5658e-03, -1.7102e-03, -2.5484e-03, -2.9177e-03, -2.4651e-04,\n -2.1633e-03, -3.7538e-03, -6.3400e-03, -1.1953e-03, -7.6144e-05,\n -2.0399e-03, 1.8687e-03, 2.4278e-03, -2.0108e-03, 4.6628e-03,\n 1.7531e-03, 5.8896e-04, -2.6292e-03, -1.2582e-04, 2.3237e-04,\n -1.1524e-03, -3.7761e-03, 1.0260e-03, -3.1550e-03, -1.2678e-03,\n 2.5172e-03, -1.0890e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([2.5208e-05, 4.3048e-05, 6.8125e-05, 5.0659e-05, 3.1726e-05, 9.8857e-05,\n 8.3588e-05, 2.7929e-05, 4.0135e-05, 4.7464e-05, 6.2960e-05, 3.7373e-05,\n 2.9540e-05, 4.4520e-05, 1.0051e-04, 3.0614e-05, 5.8780e-05, 1.0841e-04,\n 1.4963e-05, 4.3364e-05, 3.5116e-05, 5.2967e-05, 7.9182e-05, 4.5042e-05,\n 2.4890e-05, 2.9124e-05, 8.2306e-05, 6.0688e-05, 5.7086e-05, 4.2516e-05,\n 4.3770e-05, 5.0700e-05, 1.9922e-05, 1.2521e-04, 5.8378e-05, 3.4299e-05,\n 3.1456e-05, 6.6037e-05, 6.7935e-05, 5.9436e-05, 4.0717e-05, 3.3576e-05,\n 4.9132e-05, 3.1275e-05, 4.5858e-05, 5.1545e-05, 3.1764e-05, 2.2810e-05,\n 2.6865e-05, 3.4414e-05, 3.5798e-05, 4.2480e-05, 1.0672e-04, 4.3870e-05,\n 6.7244e-05, 4.4554e-05, 4.2397e-05, 3.7165e-05, 4.0754e-05, 4.9007e-05,\n 2.4467e-05, 4.0743e-05, 2.2586e-05, 1.0075e-04, 8.0242e-05, 7.8375e-05,\n 4.7429e-05, 7.3778e-05, 3.3168e-05, 6.1207e-05, 3.4888e-05, 7.1380e-05,\n 5.2833e-05, 8.7133e-05, 4.4652e-05, 5.9090e-05, 7.4476e-05, 3.4342e-05,\n 5.0224e-05, 6.1165e-05, 4.3068e-05, 3.8488e-05, 3.4675e-05, 6.4060e-05,\n 7.1503e-05, 4.5075e-05, 1.2529e-04, 2.7931e-05, 4.9219e-05, 5.3643e-05,\n 4.4996e-05, 2.8994e-05, 8.1295e-05, 1.7035e-04, 5.6705e-05, 6.8453e-05,\n 5.6964e-05, 7.1214e-05, 5.8747e-05, 5.2106e-05, 4.0955e-05, 4.2508e-05,\n 2.9060e-05, 3.9003e-05, 3.2027e-05, 3.2178e-05, 4.2150e-05, 2.4771e-05,\n 5.6065e-05, 2.1864e-05, 1.0905e-04, 2.0465e-05, 7.6134e-05, 4.5301e-05,\n 3.8197e-05, 2.2980e-05, 4.0343e-05, 7.5300e-05, 4.1470e-05, 4.7829e-05,\n 4.6643e-05, 5.4640e-05, 7.3571e-05, 6.0624e-05, 4.4311e-05, 6.4393e-05,\n 4.2759e-05, 7.8250e-05, 7.4457e-05, 4.7935e-05, 4.8192e-05, 1.1767e-04,\n 6.5944e-05, 5.3269e-05, 8.8253e-05, 3.2434e-05, 3.2194e-05, 2.8268e-05,\n 6.8922e-05, 5.1744e-05, 3.7880e-05, 6.9254e-05, 5.0963e-05, 1.1328e-04,\n 4.1519e-05, 6.5305e-05, 5.3376e-05, 4.1770e-05, 1.1758e-04, 4.7569e-05,\n 4.1685e-05, 7.1825e-05, 6.2265e-05, 6.2515e-05, 4.1481e-05, 6.9484e-05,\n 2.4392e-05, 2.3699e-05, 5.9859e-05, 3.0855e-05, 5.5011e-05, 6.2776e-05,\n 6.5175e-05, 4.2386e-05, 6.2481e-05, 3.3518e-05, 6.3155e-05, 3.7823e-05,\n 7.7133e-05, 1.1850e-04, 3.3304e-05, 5.4507e-05, 3.2794e-05, 4.1050e-05,\n 3.3208e-05, 2.4545e-05, 4.2458e-05, 5.0147e-05, 3.8244e-05, 4.7766e-05,\n 5.4218e-05, 6.5964e-05, 1.4953e-04, 4.2594e-05, 2.0114e-05, 6.3600e-05,\n 6.9138e-05, 6.3456e-05, 7.4940e-05, 5.8154e-05, 4.8567e-06, 4.0521e-05,\n 1.7058e-05, 2.4082e-05, 3.4314e-05, 4.3258e-05, 3.5666e-05, 4.4394e-05,\n 4.3625e-05, 3.6521e-05, 5.4837e-05, 4.0725e-05, 4.5518e-05, 2.5448e-05,\n 3.1785e-05, 9.3778e-05, 4.2185e-05, 3.0683e-05, 3.8614e-05, 4.0336e-05,\n 4.9190e-05, 4.0582e-05, 4.4596e-05, 5.0142e-05, 4.5884e-05, 4.8246e-05,\n 5.5682e-05, 4.3811e-05, 4.0491e-05, 4.3698e-05, 6.3175e-05, 8.5033e-05,\n 3.5749e-05, 5.4327e-05, 3.4426e-05, 4.3986e-05, 4.7309e-05, 4.1057e-05,\n 6.1187e-05, 8.6136e-05, 6.0297e-05, 4.1584e-05, 4.0491e-05, 4.0570e-05,\n 2.8928e-05, 3.8766e-05, 2.3020e-05, 4.3462e-05, 5.3156e-05, 4.9061e-05,\n 3.0936e-05, 4.3273e-05, 2.6345e-05, 5.0982e-05, 1.5553e-05, 4.0269e-05,\n 8.9108e-05, 5.0842e-05, 3.7880e-05, 4.8965e-05, 7.3280e-05, 6.9829e-05,\n 2.5622e-05, 4.4041e-05, 4.1540e-05, 3.6444e-05, 3.6461e-05, 4.3442e-05,\n 4.9122e-05, 9.5750e-05, 4.1533e-05, 3.4835e-05, 4.2309e-05, 2.6769e-05,\n 5.4102e-05, 6.1377e-05, 7.1370e-05, 3.7135e-05, 2.7025e-05, 2.0543e-05,\n 3.5843e-05, 4.6112e-05, 3.4066e-05, 6.8640e-05, 3.0059e-05, 5.6592e-05,\n 8.6184e-05, 3.2174e-05, 5.6206e-05, 3.1215e-05, 4.2616e-05, 5.2401e-05,\n 4.2915e-05, 4.5811e-05, 4.4291e-05, 5.1105e-05, 8.1443e-05, 5.2842e-05,\n 3.4688e-06, 8.0801e-05, 6.0245e-05, 2.8066e-05, 5.7924e-05, 6.7126e-05,\n 5.1118e-05, 7.4370e-06, 4.3255e-05, 7.9929e-05, 5.4089e-05, 1.1813e-04,\n 6.9240e-05, 6.2893e-05, 5.2638e-05, 3.4353e-05, 6.6003e-05, 5.1798e-05,\n 3.4668e-05, 3.7586e-05, 5.8193e-05, 3.3967e-05, 6.2202e-05, 6.9326e-05,\n 9.2513e-05, 3.0954e-05, 4.1157e-05, 5.5855e-05, 4.1896e-05, 4.6287e-05,\n 2.0898e-05, 4.6561e-05, 6.0377e-05, 5.7268e-05, 3.6605e-05, 6.6869e-05,\n 9.4830e-05, 9.5596e-05, 3.7102e-05, 5.1816e-05, 2.0197e-05, 4.2165e-05,\n 4.0688e-05, 7.6253e-05, 4.2071e-05, 2.4463e-05, 9.5054e-05, 6.3021e-05,\n 1.3910e-05, 3.5458e-05, 3.7855e-05, 9.3071e-05, 2.5838e-05, 6.1739e-05,\n 5.9295e-05, 3.3929e-05, 5.0962e-05, 4.5253e-05, 3.7440e-05, 4.5178e-05,\n 5.3629e-05, 7.8915e-05, 2.5530e-05, 9.3274e-05, 2.7565e-05, 5.4167e-05,\n 1.0474e-04, 5.7461e-05, 4.8365e-05, 5.6212e-05, 3.3120e-05, 4.2137e-05,\n 8.9904e-05, 4.1258e-05, 3.6309e-05, 2.6505e-05, 3.7753e-05, 4.5356e-05,\n 2.4117e-05, 3.1483e-05, 7.4927e-05, 3.1225e-05, 4.9854e-05, 4.8619e-06,\n 3.1233e-05, 3.7449e-05, 5.6073e-05, 4.3828e-05, 1.0120e-04, 6.6706e-05,\n 5.6276e-05, 5.1115e-05, 6.1656e-05, 6.3458e-05, 3.9510e-05, 3.4248e-05,\n 4.4036e-05, 6.2132e-05, 3.4690e-05, 2.6391e-05, 4.3506e-05, 2.8733e-05,\n 2.4241e-05, 3.2312e-05, 3.8347e-05, 4.3864e-05, 4.5512e-05, 6.0654e-05,\n 4.6976e-05, 4.9690e-05, 3.1671e-05, 7.1527e-05, 4.0402e-05, 4.2452e-05,\n 3.9124e-05, 3.6659e-05, 4.2806e-05, 1.9421e-05, 2.0866e-05, 5.8171e-05,\n 6.7224e-05, 6.2067e-05, 3.8497e-05, 6.2495e-05, 6.0801e-05, 3.2083e-05,\n 4.8556e-05, 5.2754e-05, 5.9011e-05, 5.5333e-05, 5.4365e-05, 4.9309e-05,\n 5.2434e-05, 4.7557e-05, 4.8142e-05, 1.0767e-04, 3.7275e-05, 5.8749e-05,\n 3.0557e-05, 4.5306e-05, 4.9825e-05, 3.0522e-05, 5.2174e-05, 8.5316e-05,\n 5.2085e-05, 5.5878e-05, 5.0828e-05, 5.3812e-05, 4.8662e-05, 6.3729e-05,\n 5.8446e-05, 6.8818e-05, 2.3102e-05, 9.8759e-05, 5.7135e-05, 6.1727e-05,\n 3.3229e-05, 5.0070e-05, 5.3050e-05, 3.5331e-05, 6.5934e-05, 6.3725e-05,\n 5.9303e-05, 7.4594e-05, 3.6674e-05, 5.4540e-05, 1.4871e-04, 3.2240e-05,\n 5.7428e-05, 4.3128e-05, 4.2560e-05, 5.3241e-05, 6.3976e-05, 4.9772e-05,\n 5.0344e-05, 5.0193e-05, 2.4261e-05, 2.7050e-05, 3.9093e-05, 6.6150e-05,\n 3.2666e-05, 7.1853e-05, 7.1701e-05, 2.4125e-05, 3.2175e-05, 4.6092e-05,\n 6.4944e-05, 2.9729e-05, 5.1446e-05, 6.4766e-05, 3.6500e-05, 7.1774e-05,\n 7.1394e-05, 3.4659e-05, 2.6506e-05, 4.7851e-05, 8.2797e-05, 3.2050e-05,\n 2.3252e-05, 3.7500e-05, 6.8043e-05, 3.4712e-05, 4.1839e-05, 5.3208e-05,\n 1.4102e-04, 4.8144e-05, 4.7655e-05, 2.6788e-05, 7.4753e-05, 4.0446e-05,\n 5.0200e-05, 4.9052e-05, 4.2478e-05, 4.5568e-05, 5.5461e-05, 1.9350e-05,\n 5.9844e-05, 4.6112e-05, 6.7993e-05, 3.6993e-05, 7.7101e-05, 2.1684e-05,\n 4.2379e-05, 4.9267e-05], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(15016.)",
17
+ "exp_avg": "tensor([-4.5432e-03, 8.1411e-03, -8.0405e-04, 1.5246e-03, -4.4131e-03,\n -6.8076e-03, -2.0476e-04, 1.2899e-03, 1.3951e-03, 3.3849e-03,\n -3.7711e-03, 9.4948e-04, -1.0806e-03, 5.0323e-03, 5.3645e-04,\n 1.4987e-04, 3.9957e-03, -1.4956e-02, -7.1166e-04, 2.5754e-03,\n 4.0500e-03, 2.4308e-04, 6.3063e-03, -5.7589e-03, -2.3686e-05,\n 6.5021e-04, 7.7866e-04, -1.5012e-03, 5.8542e-07, 6.4402e-04,\n 3.4263e-03, 4.5335e-03, 1.0103e-03, -3.0121e-03, -7.2958e-04,\n -3.7585e-03, -1.2366e-03, 3.3771e-03, -8.6910e-03, 2.4370e-04,\n -7.9976e-03, 2.5375e-04, -1.1234e-03, -1.1307e-03, -2.7190e-03,\n -4.8355e-04, 2.3049e-03, -2.4585e-03, 8.0755e-04, 3.7524e-04,\n 2.9734e-03, -3.9601e-03, 1.2405e-03, 2.8104e-03, -2.7732e-03,\n -3.7027e-03, -4.8138e-03, -3.9511e-03, -1.4495e-03, 1.2526e-03,\n -3.7977e-03, -6.6553e-04, 1.9599e-03, 4.4725e-03, 2.4870e-03,\n 7.2198e-04, 2.0807e-03, -2.6001e-03, -1.2493e-03, -6.3046e-03,\n -2.5620e-03, 6.0314e-04, 4.3101e-03, -3.8061e-03, 5.0641e-04,\n 1.3861e-03, 6.7980e-03, 2.7938e-04, -3.1941e-03, -2.0431e-03,\n 3.1654e-03, 2.1434e-03, 9.4930e-04, 2.4616e-03, 1.3281e-04,\n -9.0121e-03, 4.0296e-03, 1.9772e-03, 2.5847e-03, 5.8388e-03,\n 1.4321e-03, 4.5035e-03, 1.0695e-02, -2.5877e-03, 3.5377e-03,\n 3.2735e-03, -4.6867e-03, 2.1187e-03, 4.5887e-03, -2.3421e-04,\n 3.6826e-04, 1.3404e-03, 2.6110e-03, 4.7517e-03, 3.9129e-04,\n -4.4444e-03, -3.9198e-03, 4.8974e-03, -1.1114e-03, -8.6166e-03,\n -5.4296e-03, 5.6804e-03, 1.7974e-03, -1.1779e-02, 1.3590e-04,\n 5.0304e-04, -4.2772e-03, 3.3305e-03, -4.9746e-04, -1.6736e-03,\n 4.0060e-03, 2.8531e-03, -1.4783e-03, 4.0141e-03, 2.9378e-04,\n 1.0101e-03, -3.3444e-03, 5.4281e-03, 3.8661e-03, -5.8089e-03,\n -2.7914e-03, 1.5514e-03, -7.8142e-03, 2.2969e-03, 3.3058e-03,\n 2.4334e-03, -3.8000e-03, 1.1620e-03, -1.9577e-03, -1.1061e-03,\n 1.8000e-04, -4.4815e-03, 9.3402e-04, 1.6317e-03, 7.5157e-04,\n 3.1608e-03, 6.2078e-03, 1.4604e-03, 1.3147e-03, 8.3236e-03,\n 2.2001e-03, -1.2373e-03, -7.7556e-04, -2.1347e-03, 4.0654e-03,\n -2.7874e-03, 4.2870e-03, -7.4318e-05, 5.0429e-03, 5.5480e-06,\n -3.7716e-03, -4.2399e-04, -4.1805e-03, 3.6535e-04, 6.1129e-03,\n 6.2802e-03, -9.8047e-04, -1.5414e-03, -3.1036e-03, 8.8169e-03,\n 1.9434e-03, 4.1409e-03, 3.9275e-03, -3.4187e-04, -1.2728e-03,\n -1.4529e-03, 2.3495e-03, -2.5426e-04, -1.0257e-03, -4.5321e-03,\n 4.6350e-03, -2.5555e-04, -8.7975e-03, -3.9875e-04, 1.3101e-03,\n 8.3622e-04, 1.8507e-04, 5.6333e-03, 3.2206e-03, 2.3755e-03,\n 5.6052e-45, -4.0704e-04, 2.7885e-03, 3.3908e-03, 8.0420e-04,\n 2.2819e-03, 3.7793e-03, -1.1131e-03, 2.7553e-03, 1.7888e-03,\n -8.5545e-04, -1.6677e-03, 1.0014e-03, -2.5363e-03, 1.5033e-03,\n 5.1148e-03, 1.7311e-03, -4.1332e-04, 4.6312e-03, -1.1087e-05,\n 9.4218e-04, -2.8917e-03, -5.7540e-03, 1.6894e-03, 6.7947e-07,\n 9.4579e-03, 1.1714e-04, -2.0941e-03, -1.7973e-03, 5.1341e-03,\n -2.8110e-03, -3.9564e-03, 9.6250e-04, -3.7238e-03, 5.1530e-04,\n -2.4186e-03, -2.8498e-03, -6.5916e-04, -1.3998e-03, 6.0768e-03,\n -7.1378e-05, -4.2633e-04, 2.7745e-03, 1.1646e-02, 2.8624e-03,\n -3.2510e-03, 8.8870e-03, 4.3640e-03, -2.8348e-03, -1.6382e-03,\n 1.5756e-03, 3.5639e-04, -3.2924e-03, 9.7178e-04, -1.2517e-03,\n 2.4696e-03, 9.2340e-05, 5.4437e-04, 3.0862e-03, 4.2767e-03,\n -2.7286e-03, -2.6859e-03, -3.6215e-03, 2.4614e-03, -1.0617e-03,\n 9.7640e-04, 1.9813e-03, -1.2073e-03, 4.2642e-03, -1.1689e-03,\n -3.1190e-03, 2.0781e-03, -1.3008e-03, 3.1173e-03, -1.4897e-03,\n 4.4913e-04, -4.7575e-03, -2.4091e-03, -5.0305e-03, 1.4017e-03,\n 1.4018e-03, 3.2187e-03, -1.6180e-03, -1.0292e-05, -1.9227e-03,\n 2.9686e-03, -1.1150e-03, 5.4328e-04, 7.5330e-04, 3.0374e-03,\n -3.6629e-04, 3.9863e-03, -3.2100e-03, -2.1477e-03, 3.8152e-03,\n 6.6510e-03, -3.2845e-03, 2.8144e-03, 5.6052e-45, 4.6499e-03,\n 2.0376e-03, 1.5399e-03, -1.5579e-03, 1.0046e-04, 1.5432e-03,\n 5.6052e-45, -3.5915e-03, 1.6395e-03, -3.0727e-03, -5.5306e-03,\n 2.1317e-04, 9.5609e-04, -1.7249e-04, -6.3676e-04, 4.9804e-03,\n -3.0103e-03, -2.7724e-03, -2.3880e-03, -2.3074e-03, 2.2778e-03,\n 1.5445e-03, -6.9964e-03, 7.0951e-03, 2.0210e-03, 3.2542e-04,\n -3.2269e-03, -1.8221e-03, -3.5901e-03, 2.1425e-03, 5.1642e-03,\n 1.1309e-03, -5.7521e-03, -2.9071e-03, 3.6747e-04, 3.0325e-03,\n 1.9397e-03, -1.9838e-04, -2.1590e-03, -3.5690e-03, -1.6329e-03,\n 7.2032e-04, 1.7071e-03, 1.5798e-03, -1.1345e-03, -2.7319e-03,\n 4.0827e-03, -7.3702e-05, 3.8562e-03, -4.9987e-05, 2.0775e-03,\n -3.6073e-03, 1.0085e-03, -4.4557e-04, 2.0687e-03, -3.5996e-03,\n 1.9795e-03, 4.7783e-03, 2.4455e-03, -2.1788e-03, 2.1397e-03,\n 4.8257e-04, -4.3599e-03, -1.9571e-03, -1.0691e-03, 7.9147e-03,\n 1.4699e-03, 3.3755e-03, 3.1384e-03, -5.7290e-04, -2.1173e-03,\n 6.5948e-03, -1.1059e-03, 1.9995e-03, 1.0148e-03, -1.0426e-03,\n -1.3082e-03, -2.5570e-03, 7.1014e-04, -4.9824e-03, -9.2829e-04,\n 2.5632e-03, 5.6052e-45, -1.5168e-03, 1.5792e-03, -2.7492e-03,\n -2.1995e-03, -4.5024e-03, 2.3441e-03, -6.0281e-03, 8.7715e-04,\n -3.7212e-03, -1.1090e-03, -4.9387e-04, 4.9262e-03, 3.3294e-03,\n -8.7512e-04, -3.7399e-05, 4.2960e-03, 6.2289e-04, -7.1532e-03,\n 2.4143e-03, 1.0463e-04, -2.6476e-03, 1.7529e-03, -9.5736e-04,\n 1.2034e-03, 2.1088e-03, 3.4136e-03, -3.9779e-03, -3.3553e-03,\n -1.1336e-03, -4.0809e-04, -1.4695e-03, 6.3852e-04, -6.0210e-03,\n 2.8296e-04, 2.9653e-03, -1.8395e-04, -4.0285e-03, 5.3985e-04,\n -1.0959e-04, -2.3931e-03, -4.5228e-04, -3.7253e-03, 2.9034e-04,\n -2.3864e-03, 1.3140e-03, 6.7604e-03, -2.0179e-03, -1.9811e-03,\n -7.5538e-03, 8.6299e-04, 5.7014e-04, -4.0145e-03, -3.3931e-03,\n -6.3104e-04, 1.3846e-03, 5.0571e-04, 6.2165e-03, -2.5833e-03,\n 4.6031e-05, 1.7326e-04, -1.6486e-03, 7.8355e-04, -6.7694e-03,\n -4.4733e-04, 4.3198e-03, -1.7782e-03, -5.6310e-03, 3.9793e-03,\n 2.0055e-03, 5.0424e-03, -5.2304e-03, 5.2491e-03, 1.6069e-03,\n -1.8342e-03, 7.4019e-04, 2.1652e-03, -9.1621e-03, -4.4922e-04,\n 1.6103e-03, 3.5035e-03, -5.5498e-03, 4.2346e-03, 7.2325e-03,\n 4.6747e-03, -5.8645e-03, 9.6731e-04, 1.7358e-03, -5.3082e-03,\n -3.9120e-04, 1.8421e-03, 1.8716e-03, 2.8777e-03, 2.4465e-04,\n -1.7657e-03, -6.2660e-03, -5.6338e-04, 2.2148e-03, 7.2164e-03,\n 2.4370e-04, -2.9639e-04, -2.4441e-03, -1.0838e-03, -2.1383e-03,\n -1.0267e-03, -2.5150e-03, -5.5685e-03, 3.9882e-03, 1.4962e-03,\n 5.4546e-03, 4.9599e-03, 6.2584e-03, -2.2049e-03, -1.0412e-02,\n -2.8595e-03, -3.2498e-03, -7.1781e-03, -5.2834e-03, -1.8755e-04,\n -5.5788e-03, -5.2966e-03, -1.1739e-02, -3.1659e-03, 3.0148e-04,\n -4.7120e-03, 5.6474e-03, 6.2045e-03, -1.5667e-03, 8.8163e-03,\n 5.7996e-03, 6.9041e-04, -6.5991e-03, -6.7462e-04, 1.0281e-03,\n -2.3332e-03, -6.0090e-03, 2.2603e-03, -4.4258e-03, -9.2695e-03,\n 3.9277e-03, -2.0649e-03], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([5.6389e-04, 1.9662e-04, 2.0562e-04, 2.4552e-04, 1.4776e-04, 2.6680e-04,\n 2.1036e-04, 1.9540e-04, 1.2013e-04, 1.3276e-04, 2.8694e-04, 2.5023e-04,\n 1.6113e-04, 2.3352e-04, 2.1884e-04, 4.2263e-04, 1.1707e-04, 4.4463e-04,\n 4.3459e-05, 1.9494e-04, 2.2663e-04, 1.9301e-04, 2.0467e-04, 1.4937e-04,\n 2.6372e-04, 1.3923e-04, 4.9081e-04, 1.1969e-04, 2.1782e-04, 1.1108e-04,\n 2.3037e-04, 2.5783e-04, 1.0413e-04, 4.2583e-04, 1.2964e-04, 1.3650e-04,\n 1.4695e-04, 1.8062e-04, 2.5551e-04, 2.4692e-04, 3.4404e-04, 1.4583e-03,\n 1.1069e-04, 1.9295e-04, 2.4260e-04, 4.3106e-04, 2.6320e-04, 1.2326e-04,\n 2.0936e-04, 2.1733e-04, 1.4619e-04, 5.8401e-04, 2.8010e-04, 2.2848e-04,\n 3.9345e-04, 1.6026e-04, 2.0729e-04, 9.9591e-05, 1.5983e-04, 1.9670e-04,\n 1.8761e-04, 1.0062e-04, 2.4888e-04, 2.1213e-04, 2.4109e-04, 2.4627e-04,\n 2.8196e-04, 2.7581e-04, 1.4802e-04, 2.1486e-04, 1.2432e-04, 1.5441e-04,\n 1.4455e-04, 4.7325e-04, 1.5137e-04, 1.3300e-04, 5.1764e-04, 3.0616e-04,\n 2.3977e-04, 1.7968e-04, 1.7600e-04, 1.1810e-04, 1.3275e-04, 3.7900e-04,\n 1.6928e-04, 3.0685e-04, 2.4800e-04, 8.6197e-05, 2.8335e-04, 3.4782e-04,\n 1.4248e-04, 3.8035e-04, 5.7957e-04, 3.1363e-04, 1.5104e-04, 2.8132e-04,\n 1.1879e-04, 2.0095e-04, 2.7561e-04, 2.9882e-04, 2.5861e-04, 1.2420e-04,\n 1.3324e-04, 2.7515e-04, 7.2887e-05, 1.9598e-04, 1.5629e-04, 1.4841e-04,\n 1.0312e-04, 1.3529e-04, 4.0295e-04, 1.2625e-04, 9.7501e-04, 2.1432e-04,\n 8.0687e-05, 5.8799e-05, 3.2825e-04, 2.4583e-04, 1.2667e-04, 2.2138e-04,\n 1.4584e-04, 9.3686e-05, 2.6510e-04, 3.9758e-04, 2.0016e-04, 1.9148e-04,\n 1.1710e-04, 2.2789e-04, 3.9444e-04, 1.7536e-04, 1.5387e-04, 3.0896e-04,\n 2.4320e-04, 1.3151e-04, 3.5763e-04, 3.3641e-04, 1.3054e-04, 9.8033e-05,\n 5.1808e-04, 2.5987e-04, 1.2376e-04, 6.8762e-05, 1.6604e-04, 6.6562e-04,\n 2.5952e-04, 2.0814e-04, 2.5017e-04, 1.0559e-04, 3.0278e-04, 7.6476e-04,\n 1.2838e-04, 1.6065e-04, 6.0946e-05, 1.2006e-04, 2.5859e-04, 3.0535e-04,\n 1.6632e-04, 1.5966e-04, 3.7822e-04, 2.1125e-04, 1.3650e-04, 2.6015e-04,\n 5.1311e-04, 1.4175e-04, 2.3504e-04, 2.3261e-04, 4.0341e-04, 9.2877e-05,\n 2.0529e-04, 6.4199e-04, 2.5322e-04, 1.9650e-04, 1.2232e-04, 1.4830e-04,\n 2.9627e-04, 1.7093e-04, 1.2946e-04, 1.0201e-04, 1.0434e-04, 3.4218e-04,\n 1.7334e-04, 2.8355e-04, 2.7300e-04, 2.0852e-04, 1.2118e-04, 2.0944e-04,\n 3.4169e-04, 1.9870e-04, 2.2791e-04, 1.5037e-04, 4.5147e-11, 1.6803e-04,\n 1.1918e-04, 9.5252e-05, 1.7266e-04, 1.4053e-04, 1.4989e-04, 1.1911e-04,\n 1.2088e-04, 1.8417e-04, 1.1509e-04, 1.8767e-04, 2.6350e-04, 2.3094e-04,\n 1.0923e-04, 2.5364e-04, 1.9158e-04, 9.8596e-05, 1.5639e-04, 1.3646e-04,\n 3.4136e-04, 2.5302e-04, 2.5625e-04, 1.5135e-04, 2.7185e-05, 3.3972e-04,\n 1.2755e-04, 1.4084e-04, 2.1854e-04, 2.4840e-04, 2.9203e-04, 3.2050e-04,\n 1.8375e-04, 1.1328e-04, 9.6698e-05, 1.2041e-04, 1.6749e-04, 7.8909e-05,\n 4.6708e-04, 1.8926e-04, 2.9175e-04, 1.8771e-04, 2.3762e-04, 4.0565e-04,\n 2.1584e-04, 2.2601e-04, 2.1331e-04, 2.4031e-04, 1.6964e-04, 1.8429e-04,\n 1.2743e-04, 1.4600e-04, 2.1165e-04, 3.1468e-04, 1.2578e-04, 2.4797e-04,\n 1.7577e-04, 1.2145e-04, 1.3328e-04, 2.0579e-04, 2.2370e-04, 1.9352e-04,\n 1.2582e-04, 9.0982e-05, 7.6509e-05, 1.7320e-04, 1.3545e-04, 2.4532e-04,\n 1.9796e-04, 3.5271e-04, 3.8802e-04, 1.3752e-04, 2.0386e-04, 1.1763e-04,\n 2.9170e-04, 4.0138e-04, 1.0088e-03, 1.6413e-04, 1.6712e-04, 1.0972e-04,\n 1.6070e-04, 1.9418e-04, 1.2359e-04, 2.4232e-04, 1.7598e-04, 2.3145e-04,\n 3.1496e-04, 7.5278e-05, 1.4908e-04, 1.5356e-04, 1.3518e-04, 1.5689e-04,\n 2.1475e-04, 1.6704e-04, 1.6865e-04, 2.1879e-04, 1.5694e-04, 2.1047e-04,\n 8.6527e-13, 2.4810e-04, 1.6622e-04, 1.3067e-04, 2.5719e-04, 1.8638e-04,\n 2.2302e-04, 8.4829e-12, 2.3459e-04, 1.8530e-04, 2.7319e-04, 4.2265e-04,\n 4.2667e-04, 3.0021e-04, 1.6159e-04, 1.0815e-04, 1.9660e-04, 2.2453e-04,\n 2.2702e-04, 3.1781e-04, 3.8273e-04, 1.5613e-04, 1.4448e-04, 2.6135e-04,\n 3.7489e-04, 5.4510e-05, 7.9640e-05, 2.4603e-04, 1.0764e-04, 1.9489e-04,\n 4.6127e-05, 1.8760e-04, 1.3052e-04, 4.3387e-04, 8.6499e-05, 2.3810e-04,\n 2.6968e-04, 4.3209e-04, 1.2899e-04, 1.9904e-04, 1.6635e-04, 2.5500e-04,\n 2.4468e-04, 1.2340e-04, 1.2694e-04, 1.0227e-04, 3.1934e-04, 2.2521e-04,\n 1.3237e-04, 9.0374e-05, 2.2166e-04, 4.1351e-04, 1.6254e-04, 1.4397e-04,\n 1.8612e-04, 3.5612e-04, 1.9992e-04, 2.6755e-04, 8.4180e-05, 9.6025e-05,\n 1.6726e-04, 2.0844e-04, 1.1694e-04, 6.0386e-04, 1.0965e-04, 1.6375e-04,\n 8.4266e-04, 2.4859e-04, 1.8338e-04, 2.7037e-04, 1.5215e-04, 9.0196e-05,\n 4.3848e-04, 7.6490e-05, 2.0673e-04, 2.1363e-04, 1.2369e-04, 1.2755e-04,\n 1.9349e-04, 1.7045e-04, 2.2274e-04, 1.4524e-04, 1.7052e-04, 1.7837e-12,\n 3.1540e-04, 1.1320e-04, 1.5871e-04, 1.3652e-04, 2.6807e-04, 1.6524e-04,\n 2.2215e-04, 1.7251e-04, 1.6223e-04, 2.5436e-04, 2.4438e-04, 2.6437e-04,\n 1.2282e-04, 3.6152e-04, 2.1406e-04, 1.2881e-04, 2.2767e-04, 1.3161e-04,\n 1.6191e-04, 8.7524e-05, 1.6307e-04, 3.4284e-04, 2.2395e-04, 2.8082e-04,\n 1.3242e-04, 3.4513e-04, 1.4323e-04, 2.4637e-04, 1.1675e-04, 2.0250e-04,\n 1.1985e-04, 1.5198e-04, 3.3339e-04, 6.5739e-05, 7.5741e-05, 2.7248e-04,\n 2.1449e-04, 2.0729e-04, 1.3496e-04, 1.5824e-04, 2.4780e-04, 7.5930e-04,\n 1.0848e-04, 2.0311e-04, 2.6879e-04, 3.3062e-04, 3.8329e-04, 1.8398e-04,\n 2.1944e-04, 9.1058e-05, 5.2433e-04, 1.7652e-04, 1.7648e-04, 1.2990e-04,\n 1.3221e-04, 2.0950e-04, 1.6690e-04, 1.2491e-04, 1.5026e-04, 2.9147e-04,\n 2.4115e-04, 2.2357e-04, 2.4590e-04, 3.8986e-04, 4.0759e-04, 3.2401e-04,\n 2.7144e-04, 2.3720e-04, 2.1685e-04, 3.3787e-04, 1.5758e-04, 1.8307e-04,\n 2.0524e-04, 1.8648e-04, 2.3048e-04, 1.8651e-04, 2.5377e-04, 1.6627e-04,\n 9.8461e-05, 2.5407e-04, 2.4075e-04, 3.9791e-04, 3.5638e-04, 1.2924e-04,\n 4.7465e-04, 1.2442e-04, 1.2662e-04, 1.8869e-04, 1.6091e-04, 1.5986e-04,\n 1.7227e-04, 2.8865e-04, 1.5234e-04, 1.0205e-04, 2.8727e-04, 2.3032e-04,\n 1.5295e-04, 2.2697e-04, 2.8190e-04, 1.2295e-04, 1.4996e-04, 2.1297e-04,\n 1.7159e-04, 1.5769e-04, 2.4895e-04, 3.6075e-04, 1.1518e-04, 1.6353e-04,\n 3.2040e-04, 2.2529e-04, 9.3733e-04, 1.9063e-04, 3.2551e-04, 8.5204e-05,\n 9.0879e-05, 2.2672e-04, 2.5120e-04, 2.1515e-04, 3.3668e-04, 8.5712e-05,\n 3.8133e-04, 1.2548e-04, 1.3319e-04, 1.4244e-04, 3.6951e-04, 2.2199e-04,\n 2.0578e-04, 1.5390e-04, 3.3680e-04, 1.4193e-04, 3.2062e-04, 1.7442e-04,\n 3.5250e-04, 2.0103e-04, 2.4076e-04, 2.1246e-04, 1.2342e-04, 9.7030e-04,\n 1.7178e-04, 1.6077e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(15016.)",
22
+ "exp_avg": "tensor([-1.3842e-03, 5.2339e-03, -6.4603e-04, 8.4349e-04, -2.2664e-03,\n -4.0977e-03, 5.1200e-04, 3.3721e-04, 8.3647e-04, 1.9674e-03,\n -1.6567e-03, 1.0404e-03, -7.4051e-04, 3.8217e-03, 4.0712e-04,\n -5.6094e-05, 2.6028e-03, -8.5378e-03, -2.2530e-04, 1.7838e-03,\n 1.8343e-03, 1.7603e-04, 3.9035e-03, -3.0062e-03, 8.0840e-05,\n 3.3921e-04, 8.0222e-04, -1.3782e-03, -4.7932e-04, 4.9174e-04,\n 1.5014e-03, 1.8829e-03, 3.5787e-04, -1.3658e-03, 1.9989e-04,\n -1.8127e-03, -8.1482e-04, 2.0615e-03, -4.4473e-03, 3.4426e-04,\n -3.8633e-03, 6.1083e-04, -7.8065e-04, -6.9089e-04, -1.4594e-03,\n -1.7866e-04, 1.4555e-03, -1.2238e-03, 1.5165e-04, 3.0660e-04,\n 1.4565e-03, -1.6327e-03, 1.1816e-03, 1.2548e-03, -9.5234e-04,\n -1.5243e-03, -1.2116e-03, -1.8840e-03, -2.3281e-04, 6.7335e-04,\n -1.7829e-03, -4.8201e-04, 1.3719e-03, 3.2380e-03, 1.7388e-03,\n 9.7822e-04, 6.6854e-04, -7.4797e-04, -3.6158e-04, -2.8655e-03,\n -1.6460e-03, 2.7333e-04, 2.2516e-03, -1.9445e-03, 5.7533e-04,\n 9.2894e-04, 3.8840e-03, -2.8692e-04, -1.3784e-03, -8.6704e-04,\n 1.4094e-03, 1.0315e-03, 1.0351e-03, 1.2726e-03, 4.4329e-04,\n -4.3345e-03, 2.2778e-03, 9.6559e-04, 1.0721e-03, 2.9361e-03,\n 1.7155e-04, 1.7884e-03, 4.9345e-03, -1.2662e-03, 1.9836e-03,\n 1.1104e-03, -2.1088e-03, 1.2648e-03, 2.6601e-03, 6.4639e-05,\n 2.8661e-06, 1.2180e-03, 1.4195e-03, 2.4781e-03, 3.4563e-04,\n -2.0602e-03, -2.2861e-03, 2.2814e-03, -6.4872e-04, -4.3725e-03,\n -3.5879e-03, 2.7280e-03, 6.9794e-04, -4.9071e-03, -1.6305e-05,\n 4.2615e-04, -2.6105e-03, 2.3418e-03, 4.5158e-05, -8.8613e-04,\n 2.1834e-03, 2.0848e-03, -7.2735e-04, 2.2085e-03, 4.2569e-04,\n 6.2133e-04, -1.8377e-03, 3.6580e-03, 1.3618e-03, -4.3782e-03,\n -1.9672e-03, 1.0259e-03, -4.9599e-03, 1.0586e-03, 2.2759e-03,\n 4.4133e-04, -1.9257e-03, 6.5947e-04, -6.0144e-04, -4.3457e-04,\n 2.3910e-04, -3.8337e-03, 6.3373e-04, 1.6973e-03, 3.7093e-04,\n 1.7271e-03, 2.8249e-03, 4.6743e-04, 2.8227e-04, 3.1225e-03,\n 1.3393e-03, -6.4129e-04, -6.5379e-04, -1.6465e-03, 1.8169e-03,\n -1.6627e-03, 1.9419e-03, 4.2255e-05, 2.3897e-03, 1.8385e-04,\n -2.4643e-03, -1.5019e-04, -2.3877e-03, 2.4786e-04, 3.3766e-03,\n 3.2882e-03, -1.9175e-03, -6.9372e-04, -1.5513e-03, 4.9244e-03,\n 4.9763e-04, 2.6959e-03, 1.8130e-03, -2.8548e-04, -3.6011e-04,\n -6.1941e-04, 1.3829e-03, -8.3560e-04, -3.1164e-04, -1.6784e-03,\n 2.1027e-03, -3.1528e-04, -5.5110e-03, 5.1563e-04, 4.2597e-04,\n 6.4126e-04, 2.9491e-05, 2.5305e-03, 1.9312e-03, 1.9934e-03,\n 5.6052e-45, -2.7296e-04, 1.0226e-03, 1.2434e-03, 7.9211e-05,\n 8.7262e-04, 1.9170e-03, -4.4621e-04, 1.4483e-03, 9.4650e-04,\n -5.2594e-04, -1.2563e-03, 6.8991e-04, -8.3353e-04, 7.2274e-04,\n 3.7234e-03, 5.7788e-04, -4.2835e-05, 2.5913e-03, -1.7705e-04,\n 2.3643e-04, -3.7984e-04, -2.7149e-03, 1.1768e-03, 2.5690e-04,\n 4.4035e-03, 3.2623e-04, -9.2700e-04, -8.0272e-04, 2.4247e-03,\n -1.6850e-03, -1.9615e-03, 4.8026e-04, -1.9758e-03, 6.3422e-06,\n -1.6483e-03, -9.7342e-04, -9.3505e-04, -4.1538e-04, 3.2010e-03,\n 5.2586e-04, -1.5963e-04, 1.6015e-03, 4.8604e-03, 1.3845e-03,\n -1.4225e-03, 3.9763e-03, 2.3034e-03, -1.4789e-03, -8.5608e-04,\n 7.9152e-04, 2.3692e-04, -1.4323e-03, 6.3693e-04, -4.9287e-04,\n 1.1742e-03, 3.1775e-04, -1.1853e-03, 1.6307e-03, 2.2564e-03,\n -1.1041e-03, -1.0540e-03, -1.9874e-03, 1.6063e-03, -7.6993e-04,\n 6.1834e-04, 1.0986e-03, -6.6535e-04, 2.4525e-03, -6.5707e-04,\n -1.5908e-03, 1.3316e-03, -4.5453e-04, 9.9126e-04, -9.8242e-04,\n 2.9738e-05, -1.9820e-03, -1.5470e-03, -2.0303e-03, 6.3513e-04,\n 9.2091e-04, 2.1382e-03, -3.7025e-04, 2.7574e-05, -8.5313e-04,\n 1.2977e-03, -1.3714e-04, 6.9363e-04, 1.1944e-04, 1.4651e-03,\n 4.0632e-05, 2.1550e-03, -1.8693e-03, -1.3029e-03, 1.7130e-03,\n 4.0341e-03, -1.7268e-03, 1.2465e-03, 5.6052e-45, 2.9410e-03,\n 7.3718e-04, 7.4622e-04, -7.7939e-04, 1.0749e-03, 8.6885e-04,\n 5.6052e-45, -2.0288e-03, 1.1649e-03, -1.0000e-03, -2.9087e-03,\n 5.0080e-05, 6.7365e-04, -1.0882e-04, -7.2523e-04, 2.9067e-03,\n -1.1769e-03, -1.3658e-03, -7.5960e-04, -1.1380e-03, 1.1990e-03,\n 1.0998e-03, -4.2717e-03, 3.9029e-03, 1.9198e-03, -8.2030e-05,\n -1.6166e-03, -9.2355e-04, -1.8666e-03, 1.1627e-03, 2.5583e-03,\n 7.2312e-04, -2.1706e-03, -1.5058e-03, 1.5072e-04, 1.7645e-03,\n -9.7046e-05, -2.5969e-04, -1.4779e-03, -1.2326e-03, -7.6496e-04,\n 3.3726e-04, 1.4938e-03, 1.0459e-03, -6.1789e-04, -8.5258e-04,\n 2.6921e-03, 4.5451e-05, 2.3245e-03, 2.3954e-04, 1.3804e-03,\n -1.5034e-03, 6.4962e-04, 1.0109e-04, 9.2869e-04, -1.5085e-03,\n 9.0325e-04, 3.0478e-03, 1.6856e-03, -6.0678e-04, 1.1523e-03,\n 1.6330e-04, -2.2864e-03, -1.2501e-03, -5.5622e-04, 3.6053e-03,\n 7.1801e-04, 1.8178e-03, 1.7434e-03, -5.8238e-04, -7.8669e-04,\n 4.2241e-03, -8.4302e-04, 1.3908e-03, 1.7898e-04, -1.4357e-03,\n -7.4350e-04, -9.5724e-04, 4.6979e-04, -2.6110e-03, -6.4149e-04,\n 1.1667e-03, 5.6052e-45, -5.7019e-04, 1.0931e-03, -9.4257e-04,\n -4.9912e-04, -3.3666e-03, 1.4911e-03, -3.0677e-03, 6.7796e-04,\n -2.3650e-03, -2.4732e-05, -8.1303e-04, 3.3042e-03, 2.3210e-03,\n 3.3989e-04, -2.8565e-04, 1.8123e-03, 5.8905e-04, -3.7022e-03,\n 1.4477e-03, 2.6512e-04, -1.5418e-03, 8.1903e-04, -2.2376e-04,\n 1.2563e-03, 1.5198e-03, 1.8963e-03, -1.7162e-03, -1.7095e-03,\n -5.8910e-04, 2.7515e-05, -4.2787e-04, 6.2595e-04, -2.4380e-03,\n 4.8709e-04, 1.4355e-03, 9.5464e-05, -2.6625e-03, 2.3744e-04,\n -3.1551e-04, -1.1976e-03, 3.6729e-04, -1.9146e-03, 1.1737e-04,\n -1.2938e-03, 9.6905e-04, 3.2151e-03, -6.8568e-04, -6.9936e-04,\n -3.9036e-03, 7.0182e-04, 1.7738e-04, -2.6797e-03, -1.7557e-03,\n -1.2326e-04, 1.1277e-03, 1.4611e-04, 3.8033e-03, -1.6826e-03,\n 2.7849e-04, 2.9576e-04, -7.1241e-04, 7.0869e-04, -2.4351e-03,\n -8.9348e-05, 2.3802e-03, -1.0220e-03, -2.7496e-03, 2.3106e-03,\n 8.0733e-04, 2.6093e-03, -3.6683e-03, 2.3155e-03, 6.7825e-04,\n -6.0035e-04, 6.6225e-04, 7.2437e-04, -5.5274e-03, 3.0452e-04,\n 1.2437e-03, 1.8798e-03, -2.8250e-03, 1.5884e-03, 5.0511e-03,\n 3.1411e-03, -2.8133e-03, 7.4741e-05, 9.8433e-04, -3.2445e-03,\n -3.4481e-04, 1.0210e-03, 1.1443e-03, 1.7895e-03, 3.4859e-04,\n -7.4333e-04, -2.2691e-03, -7.2154e-04, 1.3064e-03, 4.0352e-03,\n -6.8797e-05, -1.9907e-04, -1.0235e-03, -2.9253e-04, -9.9811e-04,\n -4.6711e-04, -9.4997e-04, -2.0266e-03, 2.2063e-03, 8.3265e-04,\n 2.6729e-03, 2.1180e-03, 2.2715e-03, -1.2978e-03, -5.8355e-03,\n -1.5375e-03, -1.7475e-03, -2.8569e-03, -2.4352e-03, 6.0104e-05,\n -2.4235e-03, -2.8434e-03, -5.9192e-03, -1.5127e-03, 2.3776e-05,\n -2.5441e-03, 2.5167e-03, 3.1237e-03, -2.2277e-03, 4.8340e-03,\n 2.5190e-03, 4.6184e-04, -3.1215e-03, -2.6896e-04, 4.5924e-04,\n -1.2732e-03, -3.2560e-03, 1.1363e-03, -2.6944e-03, -2.3029e-03,\n 2.6983e-03, -1.0356e-03], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([6.8950e-05, 7.4093e-05, 8.5677e-05, 6.8162e-05, 4.1877e-05, 1.1001e-04,\n 8.2627e-05, 4.3713e-05, 4.1735e-05, 4.2316e-05, 8.5128e-05, 5.6934e-05,\n 4.4690e-05, 8.0652e-05, 9.2888e-05, 6.4936e-05, 5.1325e-05, 1.5138e-04,\n 1.3662e-05, 6.6230e-05, 4.7863e-05, 5.9463e-05, 7.3776e-05, 5.5494e-05,\n 4.5431e-05, 3.9597e-05, 1.1397e-04, 5.6462e-05, 6.8573e-05, 4.0490e-05,\n 5.7231e-05, 7.7674e-05, 3.0540e-05, 1.3161e-04, 5.4079e-05, 3.9451e-05,\n 3.9372e-05, 7.2507e-05, 1.0126e-04, 6.4722e-05, 8.0123e-05, 1.6362e-04,\n 4.2051e-05, 4.1632e-05, 7.9475e-05, 9.4680e-05, 6.6985e-05, 2.9437e-05,\n 4.1911e-05, 4.8295e-05, 4.0786e-05, 7.6366e-05, 1.3271e-04, 5.8614e-05,\n 8.5972e-05, 4.6044e-05, 5.8969e-05, 3.8446e-05, 5.2811e-05, 5.9079e-05,\n 3.8310e-05, 3.4812e-05, 4.7538e-05, 8.3337e-05, 9.1122e-05, 9.8288e-05,\n 7.1718e-05, 8.1780e-05, 5.2430e-05, 6.1745e-05, 4.0948e-05, 5.8961e-05,\n 4.9907e-05, 1.3615e-04, 5.6789e-05, 4.4387e-05, 1.6460e-04, 5.1214e-05,\n 5.9952e-05, 5.6194e-05, 5.5567e-05, 3.5557e-05, 4.1499e-05, 8.1587e-05,\n 7.5293e-05, 6.6968e-05, 1.0839e-04, 3.1431e-05, 8.6783e-05, 6.9676e-05,\n 5.9008e-05, 5.1508e-05, 1.5777e-04, 1.5379e-04, 5.1423e-05, 6.2167e-05,\n 4.2032e-05, 6.8516e-05, 8.9734e-05, 7.6814e-05, 6.0018e-05, 4.8460e-05,\n 3.6790e-05, 7.3225e-05, 3.4189e-05, 5.1718e-05, 4.3355e-05, 3.2096e-05,\n 5.2142e-05, 3.8602e-05, 1.4192e-04, 3.6400e-05, 1.7429e-04, 4.8007e-05,\n 3.4443e-05, 2.1486e-05, 8.8243e-05, 1.1005e-04, 4.8657e-05, 6.9711e-05,\n 5.0673e-05, 3.9608e-05, 8.4909e-05, 9.1187e-05, 5.8761e-05, 6.3647e-05,\n 3.8178e-05, 7.9160e-05, 9.1213e-05, 6.9133e-05, 5.6579e-05, 1.2472e-04,\n 6.7083e-05, 4.0067e-05, 1.1527e-04, 5.5928e-05, 3.9762e-05, 3.7119e-05,\n 1.5310e-04, 7.7307e-05, 4.1790e-05, 4.1182e-05, 5.4299e-05, 1.8597e-04,\n 5.4535e-05, 8.0939e-05, 5.7456e-05, 3.4918e-05, 1.2708e-04, 1.2640e-04,\n 4.7598e-05, 6.2373e-05, 4.2439e-05, 6.5951e-05, 4.8817e-05, 7.3013e-05,\n 4.2726e-05, 3.8265e-05, 9.0291e-05, 4.7174e-05, 5.2758e-05, 5.7085e-05,\n 1.0513e-04, 4.5781e-05, 7.3985e-05, 6.6660e-05, 1.2613e-04, 3.5195e-05,\n 7.0161e-05, 2.0481e-04, 6.5785e-05, 7.8440e-05, 3.3346e-05, 4.7845e-05,\n 4.7918e-05, 4.0067e-05, 3.7957e-05, 4.3815e-05, 4.0187e-05, 9.2056e-05,\n 4.8892e-05, 7.9699e-05, 1.3781e-04, 4.5846e-05, 2.1574e-05, 8.2166e-05,\n 8.7860e-05, 5.1177e-05, 8.9626e-05, 5.5168e-05, 5.9999e-13, 5.2114e-05,\n 2.7413e-05, 2.5164e-05, 6.0571e-05, 4.9733e-05, 3.9268e-05, 4.5273e-05,\n 3.9565e-05, 5.0486e-05, 4.5751e-05, 5.5903e-05, 5.7426e-05, 3.7296e-05,\n 3.7934e-05, 1.3533e-04, 4.9246e-05, 3.4200e-05, 4.3419e-05, 5.0062e-05,\n 8.7088e-05, 6.1460e-05, 6.6249e-05, 5.3966e-05, 2.3921e-05, 9.3201e-05,\n 5.2737e-05, 3.6547e-05, 6.1556e-05, 6.9322e-05, 7.6921e-05, 1.0832e-04,\n 4.2204e-05, 4.4417e-05, 3.3752e-05, 4.7610e-05, 5.5700e-05, 3.1397e-05,\n 1.0396e-04, 9.2471e-05, 9.6654e-05, 6.0738e-05, 6.9905e-05, 6.8078e-05,\n 4.6212e-05, 6.7470e-05, 4.1425e-05, 5.6356e-05, 6.0180e-05, 5.5090e-05,\n 3.2145e-05, 4.7286e-05, 4.4696e-05, 8.1989e-05, 2.5016e-05, 6.8688e-05,\n 7.3935e-05, 5.8223e-05, 3.9719e-05, 5.5533e-05, 8.8350e-05, 7.5205e-05,\n 3.5094e-05, 3.6740e-05, 3.3839e-05, 5.3118e-05, 4.5822e-05, 6.9106e-05,\n 6.7792e-05, 1.1583e-04, 7.4059e-05, 4.8369e-05, 5.7069e-05, 3.5183e-05,\n 6.8723e-05, 8.3140e-05, 1.7387e-04, 4.3060e-05, 3.8421e-05, 3.1795e-05,\n 3.9681e-05, 6.4739e-05, 3.9475e-05, 8.9495e-05, 4.2212e-05, 7.4059e-05,\n 1.0788e-04, 3.6328e-05, 6.2810e-05, 5.4878e-05, 4.2507e-05, 5.2230e-05,\n 6.4377e-05, 4.9937e-05, 5.5652e-05, 7.4445e-05, 6.0970e-05, 8.3172e-05,\n 7.2158e-14, 1.0524e-04, 5.5696e-05, 3.5765e-05, 9.0137e-05, 6.3516e-05,\n 7.3014e-05, 3.7958e-13, 6.2190e-05, 9.6369e-05, 6.2220e-05, 1.6280e-04,\n 8.2893e-05, 7.2072e-05, 6.3960e-05, 3.1200e-05, 7.8657e-05, 6.5261e-05,\n 5.4484e-05, 5.6196e-05, 9.3929e-05, 4.3295e-05, 5.0822e-05, 1.0097e-04,\n 1.2142e-04, 3.0666e-05, 4.0913e-05, 7.4181e-05, 3.5018e-05, 5.7323e-05,\n 1.6371e-05, 5.0996e-05, 5.8855e-05, 1.0413e-04, 3.4833e-05, 8.4235e-05,\n 9.9273e-05, 1.5247e-04, 4.8662e-05, 6.1798e-05, 3.5163e-05, 5.4133e-05,\n 7.0375e-05, 6.8472e-05, 3.8527e-05, 2.3876e-05, 1.1097e-04, 7.9260e-05,\n 1.8167e-05, 3.4490e-05, 5.1192e-05, 1.4517e-04, 3.7876e-05, 4.9329e-05,\n 6.6672e-05, 5.6571e-05, 5.9879e-05, 8.0334e-05, 3.2334e-05, 4.0520e-05,\n 5.5483e-05, 6.7022e-05, 2.8761e-05, 1.5282e-04, 3.3956e-05, 5.9253e-05,\n 1.6226e-04, 7.3603e-05, 4.5530e-05, 9.3708e-05, 4.2957e-05, 3.1782e-05,\n 1.5600e-04, 3.4264e-05, 4.8734e-05, 4.1203e-05, 4.1951e-05, 5.6949e-05,\n 3.3829e-05, 5.1336e-05, 9.0306e-05, 4.3058e-05, 5.9640e-05, 8.6384e-14,\n 5.2668e-05, 2.7448e-05, 4.9410e-05, 4.4070e-05, 1.1333e-04, 5.8115e-05,\n 7.2222e-05, 4.8350e-05, 5.3469e-05, 7.5633e-05, 7.7273e-05, 7.6891e-05,\n 5.1872e-05, 1.1162e-04, 4.9766e-05, 2.8074e-05, 6.7338e-05, 3.6924e-05,\n 4.5869e-05, 3.0970e-05, 5.5828e-05, 9.5016e-05, 7.3524e-05, 7.0445e-05,\n 4.5174e-05, 9.1897e-05, 4.3202e-05, 7.6306e-05, 4.1621e-05, 5.1133e-05,\n 3.9113e-05, 4.4384e-05, 6.4731e-05, 1.7433e-05, 2.1672e-05, 7.0440e-05,\n 7.3901e-05, 6.4066e-05, 3.8250e-05, 7.0227e-05, 5.9671e-05, 9.7624e-05,\n 3.8855e-05, 4.9339e-05, 8.0474e-05, 8.8838e-05, 1.5418e-04, 5.3072e-05,\n 5.4815e-05, 3.8034e-05, 1.0717e-04, 8.4085e-05, 4.9755e-05, 6.1208e-05,\n 4.4344e-05, 6.9750e-05, 5.8980e-05, 3.6077e-05, 5.0280e-05, 1.0850e-04,\n 7.6264e-05, 7.4880e-05, 6.2601e-05, 7.9891e-05, 1.0247e-04, 9.5253e-05,\n 7.9787e-05, 7.9666e-05, 5.5976e-05, 1.0383e-04, 6.6845e-05, 7.2038e-05,\n 3.7731e-05, 5.6736e-05, 6.7689e-05, 4.0992e-05, 8.9971e-05, 6.7894e-05,\n 4.6144e-05, 8.6623e-05, 5.3803e-05, 1.1014e-04, 1.5750e-04, 4.6294e-05,\n 7.9874e-05, 3.1980e-05, 4.6379e-05, 5.6767e-05, 6.5349e-05, 4.4910e-05,\n 5.7807e-05, 6.7781e-05, 2.9691e-05, 3.3101e-05, 6.0577e-05, 6.4194e-05,\n 4.4452e-05, 7.9827e-05, 8.7813e-05, 3.7508e-05, 4.1852e-05, 6.1819e-05,\n 7.5593e-05, 4.3617e-05, 6.7976e-05, 9.0404e-05, 3.7601e-05, 6.9821e-05,\n 7.1177e-05, 5.3998e-05, 8.3663e-05, 5.6788e-05, 1.1254e-04, 3.6273e-05,\n 2.3265e-05, 6.0413e-05, 6.5331e-05, 3.9739e-05, 7.5976e-05, 3.2569e-05,\n 1.3511e-04, 4.4409e-05, 5.1010e-05, 4.2991e-05, 1.0432e-04, 5.4889e-05,\n 5.9396e-05, 5.1642e-05, 7.2564e-05, 5.0808e-05, 6.5912e-05, 3.0754e-05,\n 8.3894e-05, 5.4467e-05, 7.0208e-05, 4.6521e-05, 5.9053e-05, 6.5065e-05,\n 5.2320e-05, 5.3952e-05], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(15016.)",
27
+ "exp_avg": "tensor([[ 1.2102e-05, 4.0470e-08, 4.9984e-06, ..., -3.4788e-06,\n 2.2236e-06, 3.6376e-06],\n [ 1.9927e-06, -8.3223e-06, -8.0621e-06, ..., -3.5620e-06,\n 4.9296e-06, -1.0206e-05],\n [ 7.9660e-06, 7.0537e-06, 2.3305e-05, ..., -4.7090e-07,\n -5.6781e-06, 1.2158e-06],\n ...,\n [ 4.5080e-06, -1.9294e-05, -5.5104e-06, ..., -1.7315e-06,\n 1.8579e-05, 9.1626e-06],\n [ 9.9849e-06, 1.7051e-05, 2.6366e-06, ..., -1.3908e-06,\n -1.4335e-05, 2.6285e-06],\n [-4.1077e-06, 1.4655e-05, 1.3212e-05, ..., -5.6920e-06,\n -1.9093e-05, -1.9200e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[9.7664e-10, 2.5124e-09, 7.1777e-10, ..., 6.1189e-10, 9.9324e-10,\n 1.5440e-09],\n [1.4340e-09, 1.8783e-09, 3.3400e-09, ..., 1.2933e-09, 2.7404e-09,\n 2.7061e-09],\n [1.6572e-09, 2.4026e-09, 2.9114e-09, ..., 1.0841e-09, 2.5007e-09,\n 2.2660e-09],\n ...,\n [2.0344e-09, 3.9133e-09, 2.2411e-09, ..., 8.0091e-10, 4.5485e-09,\n 2.8786e-09],\n [1.1937e-09, 4.7097e-09, 3.0841e-09, ..., 1.1314e-09, 2.1942e-09,\n 2.7221e-09],\n [4.0446e-09, 3.4758e-09, 2.3321e-09, ..., 1.0728e-09, 2.2549e-09,\n 3.2773e-09]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(15016.)",
32
+ "exp_avg": "tensor([[ 4.0352e-06, -1.4166e-05, 6.6065e-07, ..., -1.5637e-07,\n -7.5068e-07, -6.2223e-06],\n [ 1.2860e-06, -1.6731e-06, -4.1210e-06, ..., -3.6199e-06,\n 5.1734e-06, -1.0755e-05],\n [ 1.1144e-05, 6.1736e-06, 2.3937e-05, ..., 3.7241e-06,\n -6.8510e-07, -1.5336e-06],\n ...,\n [-5.1586e-06, -7.6704e-06, 5.5119e-06, ..., 1.3015e-06,\n 2.8246e-05, -1.7804e-05],\n [-3.3984e-06, -1.5647e-05, 1.4332e-05, ..., 1.0286e-06,\n -1.8631e-05, -4.0085e-06],\n [ 5.7704e-07, -6.4737e-06, -6.7770e-06, ..., -5.1286e-06,\n -3.4149e-06, 4.4798e-06]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[5.6556e-10, 1.3809e-09, 6.4931e-10, ..., 2.8085e-10, 5.3639e-10,\n 1.0878e-09],\n [1.4081e-09, 3.4819e-09, 1.1509e-09, ..., 8.7816e-10, 2.5815e-09,\n 1.8933e-09],\n [1.1710e-09, 1.5974e-09, 1.7273e-09, ..., 9.4830e-10, 1.9694e-09,\n 1.9863e-09],\n ...,\n [1.3904e-09, 1.3812e-09, 2.8856e-09, ..., 5.6982e-10, 4.5890e-09,\n 1.7967e-09],\n [1.8491e-09, 2.8663e-09, 3.5055e-09, ..., 1.1993e-09, 9.9627e-10,\n 1.5619e-09],\n [1.0073e-09, 3.4483e-09, 1.4301e-09, ..., 6.8026e-10, 9.1361e-10,\n 2.1108e-09]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(15016.)",
37
+ "exp_avg": "tensor([-7.6212e-05, 7.6218e-05], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([4.0051e-06, 4.0051e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.00654543046337755,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.00654543046337755,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.00654543046337755,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.0032728879774401812,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 4,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 4,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.00654543046337755,
149
+ 0.00654543046337755,
150
+ 0.00654543046337755,
151
+ 0.0032728879774401812
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 65.36133333333333,
156
+ "best_epoch": 3,
157
+ "scale_accuracies": {
158
+ "256": 64.614,
159
+ "512": 65.34666666666666
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4
167
+ ],
168
+ "train_loss": [
169
+ 5.311051666323785,
170
+ 4.462767010682684,
171
+ 4.340839946911445,
172
+ 4.262519323832187
173
+ ],
174
+ "train_acc": [
175
+ 54.91727464101089,
176
+ 60.04988680892759,
177
+ 61.02839572566782,
178
+ 61.696614622970046
179
+ ],
180
+ "val_acc": [
181
+ 63.041333333333334,
182
+ 64.17333333333333,
183
+ 64.75866666666667,
184
+ 65.36133333333333
185
+ ],
186
+ "scale_accs": {
187
+ "256": [
188
+ 62.11666666666667,
189
+ 63.38733333333333,
190
+ 63.992666666666665,
191
+ 64.614
192
+ ],
193
+ "512": [
194
+ 62.967333333333336,
195
+ 64.19266666666667,
196
+ 64.73066666666666,
197
+ 65.34666666666666
198
+ ]
199
+ },
200
+ "lr": [
201
+ 0.00975530705321762,
202
+ 0.00904518046337755,
203
+ 0.00793913236883622,
204
+ 0.00654543046337755
205
+ ]
206
+ }
207
+ },
208
+ "train_config": {
209
+ "name": "david_training",
210
+ "run_id": "20251012_231445",
211
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
212
+ "model_variant": [
213
+ "clip_vit_b16",
214
+ "clip_vit_laion_b32",
215
+ "clip_vit_b32"
216
+ ],
217
+ "num_classes": 1000,
218
+ "preset": "small_fast",
219
+ "custom_config_path": null,
220
+ "num_classes_override": null,
221
+ "use_belly_override": null,
222
+ "belly_expand_override": null,
223
+ "progressive_training_override": true,
224
+ "scale_warmup_epochs_override": {
225
+ "256": 0,
226
+ "512": 0
227
+ },
228
+ "num_epochs": 10,
229
+ "batch_size": 1024,
230
+ "learning_rate": 0.01,
231
+ "weight_decay": 1e-05,
232
+ "warmup_epochs": 3,
233
+ "use_rose_loss": true,
234
+ "rose_initial_weight": 0.2,
235
+ "rose_max_weight": 0.6,
236
+ "rose_weight_schedule": "adaptive",
237
+ "use_cayley_loss": false,
238
+ "cayley_weight": 0.01,
239
+ "scale_loss_balance": null,
240
+ "use_mixed_precision": false,
241
+ "gradient_clip": 5.0,
242
+ "scheduler_type": "cosine_restarts",
243
+ "min_lr": 1e-06,
244
+ "freeze_strategy": "never",
245
+ "freeze_threshold": 90.0,
246
+ "unfreeze_on_plateau": true,
247
+ "patience": 10,
248
+ "track_gradients": true,
249
+ "gradient_scale_threshold": 1e-05,
250
+ "gradient_scale_multiplier": 10.0,
251
+ "log_interval": 50,
252
+ "val_interval": 1,
253
+ "save_interval": 5,
254
+ "log_fusion_weights": true,
255
+ "log_loss_components": true,
256
+ "save_format": "safetensors",
257
+ "hf_repo": "AbstractPhil/david-shared-space",
258
+ "upload_to_hub": true,
259
+ "base_dir": "./david_training",
260
+ "num_workers": 10,
261
+ "pin_memory": true,
262
+ "prefetch_factor": 4,
263
+ "persistent_workers": true
264
+ }
265
+ }