AbstractPhil commited on
Commit
31cd484
·
verified ·
1 Parent(s): ddb7bac

Update best_model_acc71.67_metadata.json - Run 20251012_141246

Browse files
weights/David-fully_shared-weighted_sum/20251012_141246/best_model_acc71.67_metadata.json ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 7,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(10016.)",
7
+ "exp_avg": "tensor([[-2.4223e-04, 5.1925e-04, 5.2717e-04, ..., -2.4494e-04,\n 1.2532e-04, -3.4761e-04],\n [ 4.6919e-04, 7.1536e-04, 1.0966e-04, ..., 3.0807e-04,\n -3.7684e-04, -9.8171e-04],\n [-4.0141e-04, -4.6843e-03, 2.1972e-03, ..., -2.6662e-04,\n 1.6477e-04, -4.2244e-04],\n ...,\n [ 3.1615e-04, -4.3424e-03, -5.5447e-03, ..., 6.7308e-04,\n -5.8223e-04, 1.3937e-03],\n [-4.8674e-04, -1.7077e-04, 5.4414e-04, ..., -5.5291e-04,\n -2.6563e-04, 2.4129e-04],\n [-4.2120e-04, 1.9915e-03, -1.3416e-03, ..., 4.4661e-06,\n -4.5849e-04, -1.5839e-04]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[4.5597e-06, 2.8351e-05, 1.2590e-05, ..., 2.9732e-06, 2.2122e-06,\n 2.4077e-06],\n [8.7381e-06, 4.6441e-05, 1.7898e-05, ..., 4.4779e-06, 3.9790e-06,\n 3.6031e-06],\n [3.6976e-06, 2.4248e-05, 1.2551e-05, ..., 2.4998e-06, 2.0492e-06,\n 2.6432e-06],\n ...,\n [8.8164e-06, 4.2792e-05, 2.8044e-05, ..., 9.0187e-06, 3.4320e-06,\n 8.6275e-06],\n [3.4135e-06, 2.4848e-05, 1.4228e-05, ..., 2.8843e-06, 1.9514e-06,\n 2.2857e-06],\n [8.9014e-06, 4.9011e-05, 2.0988e-05, ..., 4.7481e-06, 3.1148e-06,\n 3.6411e-06]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(10016.)",
12
+ "exp_avg": "tensor([ 7.9784e-03, 1.7617e-02, 1.3049e-02, -5.5260e-02, -3.0634e-02,\n -3.7295e-02, 1.1757e-02, 9.5989e-03, -3.0527e-02, 9.9979e-03,\n 1.1812e-02, 1.0251e-03, 2.5976e-02, -2.6878e-03, -9.3533e-03,\n -4.9985e-02, 3.6460e-02, 1.4422e-02, 4.4387e-02, -1.7126e-02,\n 5.0476e-02, -7.3376e-02, -8.3123e-02, -1.7632e-02, 1.5029e-02,\n 1.7179e-02, -3.1529e-02, 1.2729e-02, -7.6851e-03, 2.0332e-03,\n -7.5530e-03, 8.1233e-04, 5.7511e-04, -1.1759e-02, -1.1855e-02,\n -1.2539e-02, -5.1526e-02, 2.1568e-02, 1.9665e-02, 1.2097e-02,\n -3.0566e-02, 7.1514e-04, -3.8878e-03, 1.1694e-02, -1.7316e-02,\n -7.9533e-03, 1.8823e-04, 2.0768e-02, 3.3135e-02, -2.8038e-02,\n -2.6061e-02, -1.2654e-02, 2.3375e-02, -5.0861e-03, -1.2514e-03,\n 2.8419e-02, 9.9225e-03, -2.9250e-02, 3.6053e-03, -2.5366e-02,\n 2.7364e-02, -2.4980e-05, -1.3248e-02, -7.2098e-03, 1.0928e-02,\n 2.6949e-02, -1.3561e-02, -1.4683e-02, 2.2532e-02, 2.2013e-02,\n -8.1983e-03, -8.9692e-04, 2.8243e-02, 2.3909e-02, 1.1034e-02,\n -1.3862e-02, 2.1205e-02, -3.9808e-03, -2.2417e-03, -1.8534e-02,\n 1.8742e-02, -1.7668e-02, 7.7479e-03, 4.1255e-02, -8.7052e-03,\n 2.4445e-02, -1.7175e-02, 4.9946e-03, 3.9897e-02, 5.7419e-03,\n 1.7820e-02, 3.5302e-02, -1.3216e-03, 2.8782e-02, -2.1295e-02,\n -1.0785e-02, 3.9312e-03, 2.7061e-03, -9.7154e-03, 2.1368e-02,\n 1.2157e-02, -9.7691e-03, -1.7071e-02, 2.4439e-02, 1.4486e-02,\n -4.0168e-02, 3.4434e-02, 8.0325e-03, -2.3924e-04, 2.8631e-03,\n 1.5029e-02, -2.6702e-03, 6.8956e-03, 3.2131e-03, -3.3150e-03,\n 1.8477e-02, -4.2256e-03, 4.5423e-03, 2.8327e-02, 9.9076e-03,\n 8.2998e-03, 1.6780e-02, -2.0422e-03, -3.7794e-03, -6.3030e-02,\n 3.0392e-02, 1.7600e-02, -6.9212e-03, -8.9287e-03, 1.2202e-02,\n -9.4757e-03, -5.1524e-02, 1.9095e-02, 1.0699e-03, -4.5809e-03,\n -1.1730e-02, -1.4205e-02, -5.0602e-02, -6.6210e-05, -3.5646e-02,\n 1.2642e-02, -1.4325e-02, -3.9063e-02, -9.4662e-04, 2.6547e-02,\n -2.6006e-02, -3.7031e-03, -4.1625e-03, 1.0318e-02, -1.0172e-02,\n 5.5440e-03, -1.0537e-02, -5.0770e-02, -2.0848e-03, -2.0999e-02,\n -1.3435e-02, -3.0220e-02, 1.9455e-02, -4.6738e-02, -1.4640e-02,\n -6.7401e-03, 3.0971e-02, 1.5008e-02, -4.2914e-03, -2.3150e-02,\n 3.2511e-03, 9.8770e-04, 1.2749e-02, 1.5781e-02, 1.3899e-02,\n -2.2590e-02, -2.0266e-02, -5.6155e-03, 2.2191e-02, 1.7704e-03,\n 7.0781e-03, 1.8759e-02, 2.6165e-02, -2.0300e-02, 1.4442e-02,\n -1.6931e-02, -4.0786e-02, -5.8045e-03, 2.7391e-02, -2.5103e-02,\n 6.9993e-03, 1.5779e-02, -3.6714e-02, -1.1309e-02, -2.3160e-02,\n 1.9229e-02, 5.8452e-03, 1.6521e-02, -7.7569e-03, 2.3838e-02,\n 2.8977e-02, -7.6561e-03, -1.4972e-02, 4.0257e-03, 1.5166e-02,\n 4.1126e-03, -3.1176e-02, 1.0397e-02, -1.3520e-02, -2.9296e-03,\n 1.5832e-03, -4.2154e-03, 6.5406e-03, 1.2383e-02, -2.3578e-02,\n -1.7982e-02, 3.6483e-03, 1.6712e-03, 3.1193e-02, 1.9099e-04,\n 1.1481e-03, 2.2189e-02, -8.7299e-03, -1.0215e-02, -1.8593e-02,\n 2.2231e-02, 3.6835e-02, 1.1688e-02, -7.1432e-03, 3.5264e-02,\n -4.3532e-02, 1.6690e-02, -7.5373e-03, 2.6767e-02, 1.8856e-04,\n 1.1976e-02, 2.2269e-02, -8.9825e-03, 1.2224e-02, 3.6106e-02,\n -3.2009e-02, 6.9119e-03, 1.3607e-02, 2.5894e-02, -2.6278e-03,\n -1.4596e-02, -1.9066e-02, 4.9402e-03, 3.6568e-04, 6.4631e-03,\n 8.2937e-03, 3.0041e-02, -1.5280e-02, -6.9264e-03, 1.3711e-02,\n 1.2640e-02, 6.1456e-03, 1.5308e-03, 2.5851e-02, 1.8396e-02,\n -6.1790e-02, 7.5163e-03, 1.9284e-02, -3.2642e-03, -1.1023e-04,\n 1.5983e-02, 9.0094e-03, -1.0684e-02, -1.1016e-02, 2.1640e-02,\n -2.4278e-02, -2.0739e-02, 5.4379e-03, 1.2627e-03, -1.5793e-02,\n 1.4356e-02, -1.9775e-03, 3.1215e-02, 1.4293e-02, 4.1504e-02,\n 7.0107e-03, -9.7582e-03, 6.7147e-03, -3.7609e-03, -9.5202e-05,\n 3.1897e-02, -6.5587e-03, 2.0660e-02, 9.4450e-03, -7.0822e-04,\n 2.9779e-02, 2.5103e-02, -1.6344e-03, -1.8840e-03, 1.2620e-02,\n 1.4417e-02, -1.3423e-02, -9.9817e-03, 1.5125e-02, -3.0788e-04,\n 3.0722e-02, 2.1605e-02, -2.7199e-02, 2.5235e-02, 2.5491e-02,\n 7.1449e-03, 2.7640e-02, 6.1503e-03, 8.5047e-03, 2.4008e-02,\n -1.0834e-02, -1.0323e-02, -1.2523e-01, -3.2518e-02, 2.9439e-02,\n 3.0332e-03, 7.3469e-03, -2.1852e-02, 2.4644e-02, 9.9222e-03,\n 2.2561e-02, -1.1855e-02, 1.6618e-02, -2.5206e-02, 1.2125e-02,\n -9.1740e-03, -6.1367e-02, -8.6690e-03, 1.3034e-03, 3.4961e-02,\n 2.4001e-02, 4.9764e-03, -3.7871e-03, -1.3193e-02, -4.6883e-02,\n -1.6713e-03, 1.7505e-02, 1.6466e-02, -2.8907e-02, 3.7464e-03,\n 1.6053e-03, -2.5081e-02, -1.1144e-02, 8.8160e-03, 6.8128e-03,\n -5.8108e-03, 4.3550e-02, -6.2555e-02, 1.2219e-02, 4.4231e-03,\n 2.5546e-02, 1.0957e-02, 2.0474e-02, 4.7548e-02, 4.2932e-06,\n 7.8006e-04, 3.0648e-03, -1.9028e-02, 7.3765e-03, 4.5592e-03,\n -9.5583e-03, 8.3584e-03, 1.5759e-03, -1.5597e-02, 4.5906e-03,\n -7.5302e-03, -9.2624e-03, 3.4622e-03, -1.5331e-03, -4.3392e-03,\n -3.2649e-02, 2.7978e-03, -2.4562e-02, -7.4578e-03, 5.2274e-04,\n 8.6895e-03, -3.4841e-03, 3.1125e-02, -2.3851e-02, -6.4935e-03,\n -5.6430e-03, 3.2395e-03, 3.1886e-02, 2.7574e-02, 1.5418e-02,\n -8.0175e-03, 1.6883e-02, 7.7349e-03, -3.0282e-03, 1.0806e-02,\n 1.6181e-02, 2.9195e-02, -1.0564e-02, -1.2009e-02, 2.7202e-02,\n 3.2981e-02, -1.3981e-02, -2.9734e-03, 1.7715e-02, 1.7492e-02,\n 1.5030e-02, -7.6771e-03, -2.1224e-02, -5.5355e-02, -2.8047e-02,\n 3.1445e-03, -1.1505e-02, -2.0433e-02, -1.5639e-02, -1.1784e-02,\n 1.6073e-02, 1.6451e-03, -1.8214e-02, 2.7843e-02, -1.2388e-02,\n -1.7215e-02, 9.6425e-03, -4.2229e-03, -2.2726e-03, -1.1701e-01,\n -2.9945e-03, 5.8857e-03, 2.4358e-02, -6.3381e-03, 2.4691e-02,\n 3.1278e-03, -5.4392e-02, -5.0162e-02, 8.5155e-03, -3.2807e-02,\n 2.6067e-02, -3.8676e-03, 2.1570e-03, -6.9983e-03, 4.0533e-03,\n -7.2209e-04, 8.3473e-02, -3.8606e-02, 9.1476e-03, -5.4207e-02,\n -7.1239e-03, 4.9711e-03, 4.5675e-02, 2.4708e-02, 3.0559e-02,\n 8.1010e-03, -4.7047e-02, -7.8140e-03, -1.9736e-02, -1.5950e-02,\n 5.0801e-02, 1.7157e-02, 2.7126e-02, 1.6330e-02, 1.3770e-02,\n -7.2588e-03, 1.7184e-02, -1.8236e-02, 5.9736e-03, -1.7531e-02,\n -5.2641e-03, 3.8283e-02, 2.5991e-02, -3.6908e-02, -3.1987e-02,\n 1.0755e-03, 4.5443e-03, 3.4956e-02, 3.1658e-02, 2.1064e-02,\n -1.2253e-02, 1.5228e-02, -1.3398e-02, -4.2415e-03, -3.0445e-02,\n 5.0017e-03, -1.1082e-02, -3.7809e-03, -1.8966e-02, -1.4736e-02,\n 8.5015e-03, -5.2303e-02, -1.0084e-02, 1.4925e-02, 2.9302e-02,\n -2.6232e-02, -4.4798e-02, -1.4278e-04, -3.7879e-03, 8.4012e-03,\n 4.5905e-02, 4.2445e-03, -6.2572e-03, 1.4894e-02, -4.7649e-03,\n -9.9057e-03, -1.0883e-02, -4.4593e-03, 4.2619e-02, -5.7698e-02,\n -1.5005e-03, -1.3097e-02, -8.8706e-03, 2.3506e-03, 1.6113e-03,\n -2.2254e-02, -1.0422e-02, -7.3676e-03, -3.9174e-02, 8.3310e-03,\n 1.2812e-02, 1.0414e-02, -1.2139e-02, -3.5256e-03, 2.2413e-02,\n 4.5231e-03, -3.8353e-03], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([0.0032, 0.0055, 0.0026, 0.0025, 0.0035, 0.0055, 0.0021, 0.0035, 0.0042,\n 0.0041, 0.0026, 0.0030, 0.0063, 0.0024, 0.0054, 0.0063, 0.0040, 0.0039,\n 0.0034, 0.0046, 0.0036, 0.0076, 0.0047, 0.0033, 0.0018, 0.0063, 0.0038,\n 0.0049, 0.0052, 0.0040, 0.0038, 0.0087, 0.0029, 0.0098, 0.0029, 0.0059,\n 0.0039, 0.0032, 0.0038, 0.0051, 0.0038, 0.0041, 0.0037, 0.0036, 0.0064,\n 0.0039, 0.0029, 0.0032, 0.0041, 0.0043, 0.0051, 0.0032, 0.0030, 0.0035,\n 0.0036, 0.0032, 0.0033, 0.0019, 0.0026, 0.0035, 0.0048, 0.0028, 0.0066,\n 0.0041, 0.0059, 0.0063, 0.0031, 0.0036, 0.0038, 0.0032, 0.0044, 0.0024,\n 0.0035, 0.0079, 0.0050, 0.0022, 0.0061, 0.0033, 0.0038, 0.0053, 0.0046,\n 0.0067, 0.0023, 0.0058, 0.0041, 0.0052, 0.0034, 0.0064, 0.0051, 0.0030,\n 0.0078, 0.0050, 0.0032, 0.0036, 0.0048, 0.0021, 0.0027, 0.0040, 0.0022,\n 0.0032, 0.0037, 0.0046, 0.0038, 0.0032, 0.0027, 0.0059, 0.0061, 0.0025,\n 0.0041, 0.0053, 0.0038, 0.0042, 0.0056, 0.0027, 0.0039, 0.0026, 0.0020,\n 0.0029, 0.0066, 0.0046, 0.0045, 0.0097, 0.0062, 0.0049, 0.0068, 0.0052,\n 0.0026, 0.0018, 0.0055, 0.0057, 0.0062, 0.0034, 0.0047, 0.0036, 0.0025,\n 0.0013, 0.0029, 0.0043, 0.0015, 0.0057, 0.0045, 0.0035, 0.0048, 0.0035,\n 0.0045, 0.0038, 0.0033, 0.0027, 0.0021, 0.0041, 0.0042, 0.0027, 0.0039,\n 0.0034, 0.0032, 0.0025, 0.0039, 0.0037, 0.0051, 0.0021, 0.0033, 0.0025,\n 0.0029, 0.0037, 0.0038, 0.0032, 0.0027, 0.0031, 0.0054, 0.0060, 0.0059,\n 0.0049, 0.0057, 0.0039, 0.0038, 0.0024, 0.0054, 0.0037, 0.0043, 0.0040,\n 0.0053, 0.0028, 0.0022, 0.0041, 0.0039, 0.0054, 0.0055, 0.0051, 0.0047,\n 0.0043, 0.0046, 0.0028, 0.0033, 0.0040, 0.0037, 0.0069, 0.0024, 0.0031,\n 0.0026, 0.0035, 0.0024, 0.0023, 0.0039, 0.0041, 0.0047, 0.0044, 0.0093,\n 0.0039, 0.0016, 0.0036, 0.0033, 0.0021, 0.0063, 0.0027, 0.0036, 0.0050,\n 0.0037, 0.0042, 0.0028, 0.0045, 0.0031, 0.0045, 0.0048, 0.0040, 0.0047,\n 0.0027, 0.0016, 0.0036, 0.0061, 0.0027, 0.0029, 0.0032, 0.0039, 0.0059,\n 0.0043, 0.0017, 0.0028, 0.0031, 0.0042, 0.0041, 0.0013, 0.0033, 0.0042,\n 0.0048, 0.0045, 0.0030, 0.0032, 0.0037, 0.0041, 0.0053, 0.0015, 0.0028,\n 0.0047, 0.0044, 0.0040, 0.0067, 0.0031, 0.0044, 0.0030, 0.0066, 0.0040,\n 0.0050, 0.0046, 0.0050, 0.0033, 0.0023, 0.0034, 0.0024, 0.0030, 0.0068,\n 0.0043, 0.0035, 0.0037, 0.0054, 0.0046, 0.0020, 0.0031, 0.0038, 0.0056,\n 0.0043, 0.0028, 0.0060, 0.0057, 0.0018, 0.0013, 0.0035, 0.0032, 0.0046,\n 0.0025, 0.0042, 0.0025, 0.0020, 0.0041, 0.0037, 0.0035, 0.0043, 0.0030,\n 0.0017, 0.0048, 0.0078, 0.0034, 0.0024, 0.0029, 0.0038, 0.0047, 0.0052,\n 0.0044, 0.0050, 0.0058, 0.0034, 0.0064, 0.0038, 0.0053, 0.0046, 0.0044,\n 0.0040, 0.0038, 0.0049, 0.0035, 0.0043, 0.0033, 0.0043, 0.0028, 0.0041,\n 0.0031, 0.0056, 0.0039, 0.0032, 0.0036, 0.0037, 0.0033, 0.0041, 0.0030,\n 0.0037, 0.0056, 0.0042, 0.0025, 0.0019, 0.0040, 0.0046, 0.0054, 0.0075,\n 0.0026, 0.0032, 0.0038, 0.0056, 0.0031, 0.0025, 0.0045, 0.0028, 0.0022,\n 0.0047, 0.0029, 0.0029, 0.0027, 0.0028, 0.0022, 0.0040, 0.0043, 0.0025,\n 0.0032, 0.0085, 0.0024, 0.0102, 0.0042, 0.0046, 0.0015, 0.0028, 0.0029,\n 0.0029, 0.0043, 0.0041, 0.0039, 0.0028, 0.0023, 0.0056, 0.0049, 0.0042,\n 0.0062, 0.0038, 0.0031, 0.0015, 0.0025, 0.0020, 0.0044, 0.0070, 0.0046,\n 0.0029, 0.0031, 0.0078, 0.0028, 0.0029, 0.0023, 0.0036, 0.0048, 0.0037,\n 0.0035, 0.0038, 0.0060, 0.0099, 0.0024, 0.0028, 0.0027, 0.0030, 0.0035,\n 0.0064, 0.0032, 0.0022, 0.0036, 0.0035, 0.0030, 0.0028, 0.0019, 0.0047,\n 0.0060, 0.0055, 0.0036, 0.0083, 0.0035, 0.0066, 0.0051, 0.0036, 0.0070,\n 0.0028, 0.0050, 0.0026, 0.0028, 0.0043, 0.0025, 0.0031, 0.0035, 0.0091,\n 0.0062, 0.0033, 0.0039, 0.0026, 0.0065, 0.0082, 0.0027, 0.0041, 0.0066,\n 0.0041, 0.0023, 0.0039, 0.0068, 0.0029, 0.0020, 0.0028, 0.0054, 0.0029,\n 0.0038, 0.0050, 0.0036, 0.0035, 0.0039, 0.0059, 0.0051, 0.0023, 0.0040,\n 0.0038, 0.0023, 0.0038, 0.0038, 0.0041, 0.0060, 0.0021, 0.0031, 0.0068,\n 0.0041, 0.0066, 0.0030, 0.0035, 0.0030, 0.0033, 0.0055, 0.0018, 0.0070,\n 0.0021, 0.0030, 0.0041, 0.0045, 0.0043, 0.0040, 0.0083, 0.0055, 0.0038,\n 0.0032, 0.0070, 0.0044, 0.0042, 0.0029, 0.0041, 0.0030, 0.0033, 0.0042,\n 0.0066, 0.0032, 0.0065, 0.0034, 0.0038, 0.0038, 0.0018, 0.0023, 0.0034,\n 0.0021, 0.0025, 0.0039, 0.0007, 0.0045, 0.0071, 0.0030, 0.0043],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(10016.)",
17
+ "exp_avg": "tensor([ 1.7852e-03, 3.1211e-03, 3.0576e-03, -1.0802e-02, -5.5361e-03,\n -7.1377e-03, 2.4634e-03, 1.7267e-03, -2.9238e-03, 2.6623e-03,\n 5.9245e-03, 2.4572e-04, 3.0481e-03, 1.7709e-04, -1.0914e-03,\n -5.1022e-03, 5.8639e-03, 2.0875e-03, 7.1091e-03, -2.2633e-03,\n 6.2070e-03, -6.8822e-03, -1.1940e-02, -5.3673e-03, 5.9812e-03,\n 1.6669e-03, -8.1395e-03, 1.9151e-03, -5.3823e-04, 1.3388e-03,\n -1.9944e-03, -2.4451e-04, -8.0545e-04, -1.3220e-03, -4.3320e-03,\n -1.8678e-03, -1.1477e-02, 4.6031e-03, 5.2075e-03, 4.0108e-03,\n -7.4182e-03, 7.6446e-04, 7.5012e-04, 2.4127e-03, -3.8204e-03,\n 1.2208e-03, 6.4187e-04, 4.2285e-03, 4.2188e-03, -4.7491e-03,\n -3.8493e-03, -4.4727e-03, 5.2800e-03, -1.6675e-03, 2.5534e-05,\n 7.2626e-03, 1.9978e-03, -1.3079e-02, 7.5395e-05, -6.3227e-03,\n 3.2970e-03, 1.2672e-03, -3.4584e-03, -1.5102e-03, 4.2986e-03,\n 3.5431e-03, -2.8196e-03, -2.3170e-03, 3.1287e-03, 3.9468e-03,\n -1.2070e-03, -2.6329e-04, 3.6917e-03, 2.5862e-03, 1.5102e-03,\n -2.8912e-03, 2.4101e-03, 5.7800e-04, 2.7567e-04, -1.9099e-03,\n 2.6354e-03, -4.2057e-03, 2.3433e-03, 3.8550e-03, -3.9451e-03,\n 3.4918e-03, -2.4416e-03, 1.1558e-03, 2.9267e-03, 1.4421e-03,\n 1.8760e-03, 7.9391e-03, -3.2000e-04, 9.4505e-03, -4.1718e-03,\n -4.6488e-03, 1.4617e-03, 1.8825e-04, -4.3217e-03, 1.4322e-03,\n 2.0419e-03, -1.0981e-03, -2.0851e-03, 6.2896e-03, 5.1936e-03,\n -7.7799e-03, 4.8259e-03, 2.3193e-03, -8.5114e-04, 1.6988e-04,\n 6.7404e-03, -9.0676e-04, -4.4378e-05, -3.8663e-04, 1.0996e-03,\n 7.0533e-03, -3.2675e-04, 1.0309e-03, 3.3615e-03, 1.8085e-03,\n 6.5756e-04, 1.3014e-03, 2.5189e-04, -3.3471e-04, -9.0521e-03,\n 5.2328e-03, 3.9693e-03, -2.5871e-03, -1.3278e-03, 1.1238e-03,\n -9.2190e-04, -1.1028e-02, 3.4942e-03, 1.4891e-03, -1.3146e-03,\n -1.1087e-02, -3.2225e-03, -5.3452e-03, -9.8195e-04, -6.2731e-03,\n 2.5006e-03, -1.5167e-03, -4.4597e-03, -7.3752e-04, 5.7120e-03,\n -6.2428e-03, -7.7309e-04, -1.6555e-03, 7.6723e-03, 2.7499e-04,\n -3.4658e-04, -4.5019e-03, -1.0995e-02, -8.6122e-04, -2.6582e-03,\n -5.7947e-03, -3.0820e-03, 3.6880e-03, -6.2862e-03, -3.1388e-03,\n -7.5973e-04, 9.8629e-03, 4.5088e-03, -2.7591e-03, -4.4479e-03,\n 1.6373e-03, -3.6782e-04, 2.7410e-03, 6.9803e-04, 1.4338e-03,\n -3.0636e-03, -2.0644e-03, -1.0385e-03, 5.4941e-03, -6.6944e-04,\n 3.5528e-03, 2.5248e-03, 4.9414e-03, -2.0341e-03, 3.1655e-03,\n -4.1819e-03, -1.2157e-02, -1.6810e-03, 3.7924e-03, -5.3066e-03,\n 3.2706e-03, 2.8773e-03, -3.9727e-03, -2.3477e-03, -2.7324e-03,\n 4.9901e-03, 1.3297e-03, 3.7786e-03, -1.9335e-03, 5.4534e-03,\n 2.4182e-03, -3.2103e-03, -2.3067e-03, 2.9271e-03, 3.9070e-03,\n 1.8039e-03, -7.3062e-03, 3.4808e-03, -1.7350e-03, -6.2221e-04,\n -6.4355e-04, -3.0094e-04, 3.8947e-04, 4.4719e-03, -3.0352e-03,\n -7.0925e-03, 2.3367e-03, -3.1001e-04, 1.0640e-02, -2.1539e-05,\n 3.8488e-04, 2.7064e-03, -1.5773e-03, -4.6876e-03, -2.9865e-03,\n 4.1528e-03, 9.5635e-03, 1.8398e-03, -1.2907e-03, 5.6170e-03,\n -1.9070e-02, 1.3114e-02, -9.1106e-04, 3.6948e-03, 3.4482e-04,\n 2.8142e-03, 6.4031e-03, -2.1184e-03, 2.1041e-03, 5.5030e-03,\n -1.1661e-02, 2.4988e-03, 7.0173e-03, 2.5281e-03, -1.1816e-03,\n -1.3776e-02, -3.1865e-03, 1.3331e-03, -3.0834e-03, 2.0966e-03,\n 4.0906e-03, 8.6797e-03, -4.4026e-03, -9.2114e-04, 2.6145e-03,\n 1.1516e-02, 1.9909e-03, -6.1536e-04, 3.0942e-03, 4.0202e-03,\n -7.2309e-03, 4.2354e-03, 4.1458e-03, -6.8333e-04, 1.2162e-04,\n 3.2906e-03, 1.3639e-03, -1.0260e-03, -2.0043e-03, 5.8700e-03,\n -8.1328e-03, -7.4507e-03, 2.9274e-03, 1.0193e-03, -1.0208e-03,\n 3.8230e-03, 3.2685e-05, 9.1122e-03, 2.4731e-03, 6.0597e-03,\n 2.4155e-03, -2.3259e-03, 2.3043e-03, -1.5891e-03, -1.9807e-03,\n 6.1271e-03, -1.4964e-03, 3.5171e-03, 7.7792e-03, 1.0925e-04,\n 3.9933e-03, 3.3252e-03, 8.4593e-04, -3.0830e-04, 4.2663e-03,\n 4.3743e-03, -5.9924e-03, -8.8675e-04, 2.1773e-03, -6.7039e-04,\n 6.7685e-03, 4.7706e-03, -1.6916e-02, 5.6848e-03, 3.4385e-03,\n 4.4993e-04, 7.6635e-03, 1.5199e-03, 2.3621e-03, 6.3722e-03,\n -1.0517e-03, -2.6127e-03, -2.1996e-02, -5.2905e-03, 5.8359e-03,\n 1.1746e-03, 2.1047e-03, -3.3391e-03, 7.7638e-03, 7.5436e-04,\n 4.2364e-03, -2.0896e-03, 4.4647e-03, -5.9173e-03, 2.4806e-03,\n -1.4361e-03, -1.0431e-02, -1.3892e-03, 1.1402e-03, 8.3113e-03,\n 3.5927e-03, 4.5064e-04, -4.0534e-04, -2.4619e-03, -1.0905e-02,\n 4.0226e-04, 3.5902e-03, 2.6361e-03, -7.3587e-03, 7.8692e-04,\n 2.0617e-04, -1.0349e-02, -5.7417e-03, 2.0145e-03, 1.1033e-03,\n -9.8922e-04, 6.7843e-03, -3.1289e-02, 2.0958e-03, 2.8625e-04,\n 3.0840e-03, 1.0440e-03, 6.0768e-03, 8.6609e-03, 4.0496e-04,\n 1.4288e-03, 1.4066e-03, -3.9976e-03, 9.1584e-04, 1.2039e-03,\n -3.4511e-03, -2.8757e-04, -9.1710e-04, -1.7791e-03, 7.2256e-04,\n -1.7579e-03, 2.4886e-04, 1.7816e-03, 2.6367e-04, -6.0156e-04,\n -4.0217e-03, 6.9579e-03, -2.8981e-03, -1.5744e-03, -3.2596e-04,\n 1.2704e-03, -6.3521e-04, 3.2689e-03, -4.3258e-03, -6.4795e-04,\n -6.2779e-04, 4.3000e-04, 6.2237e-03, 4.1555e-03, 3.4757e-03,\n -1.0686e-03, 1.0833e-02, 2.5381e-03, 8.8610e-04, 2.9632e-03,\n 4.0719e-03, 4.0153e-03, -8.3665e-05, -9.5102e-04, 5.0035e-03,\n 8.6628e-03, -2.0118e-03, -1.5434e-03, 2.1903e-03, 3.0767e-03,\n 3.2766e-03, -2.3577e-03, -3.4769e-03, -3.6919e-03, -3.4722e-03,\n 2.5087e-03, -3.4197e-03, -7.4781e-03, -3.9665e-03, -4.1171e-03,\n 1.1194e-03, 2.1756e-03, -5.7964e-03, 6.3798e-03, -2.0853e-03,\n -4.0546e-03, 1.3200e-03, 3.3980e-04, -4.9518e-04, -1.2453e-02,\n -1.9042e-03, 2.6648e-03, 3.4314e-03, -1.8304e-03, 3.3627e-03,\n 5.9887e-04, -1.2419e-02, -6.6134e-03, 4.3274e-03, -5.3047e-03,\n 8.2349e-03, -1.0247e-03, 3.8936e-04, -2.0461e-03, 2.7831e-03,\n 2.1013e-05, 8.2277e-03, -6.8602e-03, 1.7722e-03, -1.1082e-02,\n -4.5614e-04, 3.3836e-05, 6.2614e-03, 5.5343e-03, 5.8085e-03,\n 2.8407e-03, -1.0788e-02, -3.1252e-03, -5.3851e-04, -2.3826e-03,\n 1.0109e-02, 6.1350e-03, 7.7391e-03, 2.3746e-03, 4.5760e-03,\n -9.9079e-04, 2.5315e-03, -6.3237e-03, 5.7367e-04, -2.8088e-03,\n -1.5058e-03, 7.0347e-03, 9.2792e-03, -7.4030e-03, -1.1045e-02,\n 5.8563e-04, 9.6871e-04, 9.6280e-03, 5.6997e-03, 3.1855e-03,\n -2.1639e-04, 3.6134e-03, -1.9038e-03, -2.0638e-03, -2.5208e-03,\n 1.8576e-03, -1.8517e-03, -7.4490e-04, -4.1233e-03, -2.1749e-03,\n 7.6545e-03, -5.7078e-03, -2.1271e-03, 2.0840e-03, 7.4474e-03,\n -4.5410e-03, -8.0975e-03, 2.7728e-04, -2.4818e-03, 1.5814e-03,\n 1.3655e-02, 1.4408e-03, -1.3459e-03, 1.2913e-03, -7.5726e-04,\n -1.5302e-03, -1.6291e-03, -1.5803e-03, 8.3150e-03, -1.0398e-02,\n -1.7043e-04, -3.7445e-03, -5.1762e-04, 1.1195e-03, 3.9528e-04,\n -9.5867e-04, -5.4700e-03, -1.5749e-03, -8.7905e-03, 2.9511e-03,\n 4.1391e-03, 3.3896e-03, 2.5061e-01, -1.5121e-03, 2.4166e-03,\n 1.0317e-03, -7.2631e-04], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([2.0876e-04, 9.4380e-05, 1.8335e-04, 1.6551e-04, 2.1584e-04, 1.2429e-04,\n 4.1989e-04, 5.8001e-05, 6.5823e-05, 9.9308e-05, 3.4048e-04, 7.6229e-05,\n 9.6966e-05, 1.8480e-04, 5.9312e-05, 1.1249e-04, 1.4876e-04, 1.0643e-04,\n 1.0694e-04, 1.6508e-04, 6.1610e-05, 9.4324e-05, 9.2570e-05, 2.9336e-04,\n 2.8365e-04, 1.0856e-04, 1.6818e-04, 1.3389e-04, 1.1846e-04, 1.3333e-04,\n 1.4567e-04, 1.2691e-04, 1.6539e-04, 1.2630e-04, 1.7674e-04, 6.5381e-05,\n 1.3153e-04, 1.1455e-04, 2.0313e-04, 2.5368e-04, 2.2676e-04, 1.9018e-04,\n 2.9231e-04, 1.3501e-04, 7.0723e-05, 2.4581e-04, 2.3420e-04, 1.7381e-04,\n 7.6135e-05, 1.1935e-04, 1.2923e-04, 2.1628e-04, 2.0010e-04, 8.1877e-05,\n 2.0809e-04, 2.3649e-04, 5.4354e-05, 2.6546e-04, 4.4548e-04, 2.3164e-04,\n 1.0814e-04, 2.2634e-04, 2.2025e-04, 1.1694e-04, 2.6021e-04, 2.0101e-04,\n 1.1293e-04, 1.2143e-04, 8.6711e-05, 1.1818e-04, 1.0496e-04, 1.8950e-04,\n 9.8865e-05, 8.4472e-05, 1.5213e-04, 3.5406e-04, 8.6973e-05, 3.5542e-04,\n 7.4748e-05, 6.3827e-05, 1.2674e-04, 2.1752e-04, 2.0380e-04, 5.3842e-05,\n 2.5624e-04, 1.4299e-04, 2.0071e-04, 8.9355e-05, 5.1581e-05, 1.8135e-04,\n 8.2992e-05, 2.0735e-04, 6.8483e-05, 1.7676e-04, 1.3007e-04, 4.0481e-04,\n 1.2230e-04, 8.7040e-05, 2.3491e-04, 5.6819e-05, 5.6015e-05, 1.2745e-04,\n 7.4021e-05, 1.9670e-04, 1.3825e-04, 8.6340e-05, 1.6578e-04, 1.1578e-04,\n 1.3872e-04, 9.7552e-05, 1.6531e-04, 6.1043e-05, 1.0676e-04, 7.3112e-05,\n 1.0760e-04, 3.6406e-04, 2.8709e-04, 1.1431e-04, 1.7789e-04, 1.7382e-04,\n 8.9050e-05, 8.6821e-05, 1.8321e-04, 2.9249e-04, 1.0910e-04, 1.3376e-04,\n 8.2478e-05, 2.2467e-04, 8.2994e-05, 1.3832e-04, 9.8880e-05, 1.3678e-04,\n 1.2266e-04, 2.1591e-04, 2.5889e-04, 9.7051e-04, 1.2047e-04, 9.4213e-05,\n 5.9177e-04, 2.0970e-04, 1.3501e-04, 6.6300e-05, 6.3526e-05, 1.4451e-04,\n 2.1887e-04, 2.2369e-04, 1.2685e-04, 2.3427e-04, 5.4820e-04, 1.9039e-04,\n 1.1841e-04, 9.8137e-04, 1.9491e-04, 2.5672e-04, 9.8873e-05, 5.7488e-04,\n 1.0791e-04, 3.3524e-04, 1.2572e-04, 1.0422e-04, 9.8822e-05, 4.1337e-04,\n 2.3641e-04, 2.0512e-04, 1.3148e-04, 1.3631e-04, 4.3175e-04, 1.6998e-04,\n 7.9858e-05, 9.6413e-05, 9.6526e-05, 7.8393e-05, 1.1139e-04, 2.0447e-04,\n 9.7544e-05, 2.0376e-04, 1.1483e-04, 7.6276e-05, 1.0645e-04, 1.0307e-04,\n 2.8481e-04, 2.4188e-04, 4.1087e-04, 9.7926e-05, 1.6887e-04, 6.9852e-05,\n 1.1963e-04, 7.2540e-05, 1.7072e-04, 1.1633e-04, 1.9441e-04, 7.5399e-05,\n 9.0130e-05, 2.4428e-04, 1.5766e-04, 6.4957e-05, 5.0507e-04, 7.8613e-05,\n 4.8942e-04, 1.4283e-04, 2.2450e-04, 1.7965e-04, 2.3366e-04, 8.8950e-05,\n 9.4863e-05, 1.0093e-04, 1.0727e-04, 7.9012e-05, 3.1978e-04, 7.8013e-05,\n 3.6512e-04, 4.2011e-04, 1.2316e-04, 2.2945e-04, 2.0116e-04, 1.8828e-04,\n 7.9316e-05, 2.4754e-04, 3.6777e-04, 6.0574e-05, 2.5558e-04, 2.4301e-04,\n 6.9247e-05, 6.0874e-05, 1.6604e-04, 5.5742e-04, 6.0114e-04, 9.9479e-05,\n 8.9035e-05, 1.3476e-04, 1.0949e-04, 1.2855e-04, 9.8418e-05, 7.7128e-05,\n 1.0855e-04, 2.9864e-04, 3.3026e-04, 2.0385e-04, 6.6766e-05, 1.5235e-04,\n 1.3709e-03, 1.4972e-04, 1.6881e-04, 2.5438e-04, 2.2431e-04, 2.0128e-04,\n 2.1678e-04, 2.3641e-04, 1.4725e-04, 1.2971e-04, 7.2789e-04, 3.4410e-04,\n 7.9025e-05, 9.8474e-05, 1.8660e-04, 1.1017e-04, 2.8214e-04, 2.8194e-04,\n 2.9039e-04, 1.6929e-04, 1.9197e-04, 1.5807e-04, 8.0998e-05, 2.1435e-04,\n 2.1776e-04, 2.5594e-04, 3.2509e-04, 3.2719e-04, 2.7712e-04, 6.6624e-05,\n 1.2998e-04, 2.2417e-04, 2.6456e-04, 5.2756e-05, 1.0519e-04, 2.1520e-04,\n 2.3834e-04, 1.1464e-04, 8.2129e-05, 3.9689e-04, 1.1701e-04, 1.1577e-04,\n 1.0268e-04, 7.2523e-04, 5.0100e-04, 8.3686e-05, 9.6966e-05, 9.7696e-05,\n 2.3297e-04, 3.8408e-04, 2.9064e-04, 5.3296e-04, 1.2188e-04, 1.1431e-04,\n 1.0830e-04, 1.4278e-04, 1.1014e-04, 7.0887e-04, 2.1166e-04, 1.2249e-04,\n 1.1701e-04, 1.9958e-04, 1.7748e-04, 1.4060e-04, 2.2359e-04, 6.7980e-05,\n 1.6225e-04, 1.5819e-04, 1.4044e-04, 1.1504e-04, 1.6722e-04, 2.1855e-04,\n 1.0594e-04, 3.0968e-04, 7.5912e-05, 1.9707e-04, 1.2811e-04, 1.2621e-04,\n 1.8465e-04, 6.7501e-05, 1.2755e-04, 1.7281e-04, 1.2445e-04, 1.7874e-04,\n 1.2612e-04, 1.0293e-04, 3.4760e-04, 3.3532e-04, 2.2806e-04, 2.4931e-04,\n 1.0030e-04, 1.2299e-04, 8.2513e-05, 1.4538e-04, 1.5282e-04, 4.6280e-05,\n 1.4798e-04, 3.7410e-04, 7.8217e-05, 1.2964e-04, 8.6548e-05, 9.6633e-05,\n 8.2609e-04, 1.1571e-04, 6.0569e-05, 8.5914e-05, 1.2250e-04, 1.4428e-04,\n 1.1805e-04, 4.6985e-04, 2.8467e-04, 9.8006e-05, 1.0587e-04, 1.3788e-04,\n 1.1474e-04, 2.1780e-04, 2.7446e-04, 1.8389e-04, 5.7860e-05, 2.7026e-04,\n 2.6469e-04, 1.4736e-04, 1.8571e-04, 1.1555e-04, 9.9432e-05, 8.0844e-05,\n 1.4007e-03, 6.6971e-05, 1.6839e-04, 2.9648e-04, 8.9538e-05, 7.4976e-05,\n 6.6898e-05, 2.2561e-04, 2.1153e-04, 1.0232e-04, 7.6556e-05, 1.6681e-04,\n 1.1408e-04, 1.0220e-04, 6.2161e-05, 6.0680e-04, 2.9313e-04, 1.8142e-04,\n 1.1775e-04, 7.1000e-05, 6.7741e-05, 3.0584e-04, 2.5623e-04, 1.3414e-04,\n 2.1424e-04, 1.1501e-04, 2.5064e-04, 1.2042e-04, 1.0218e-04, 1.3931e-04,\n 9.7901e-05, 1.3174e-04, 1.0349e-04, 1.8495e-04, 2.6731e-04, 1.0091e-04,\n 2.8715e-04, 7.1684e-05, 2.5904e-04, 9.2880e-05, 1.2607e-04, 2.9841e-04,\n 1.3610e-04, 1.4281e-04, 1.3412e-04, 1.3584e-04, 2.0359e-04, 8.7812e-05,\n 1.0064e-04, 6.3425e-05, 1.7137e-04, 9.9454e-05, 1.6269e-04, 1.1222e-04,\n 6.6629e-05, 1.3027e-04, 8.9020e-05, 4.1602e-04, 1.2044e-04, 2.9066e-04,\n 2.1789e-04, 1.8778e-04, 6.6310e-05, 3.3011e-04, 1.7282e-04, 8.0369e-05,\n 1.8043e-04, 1.6030e-04, 1.2863e-04, 9.3627e-05, 9.1209e-05, 1.6459e-04,\n 1.8510e-04, 1.3358e-04, 2.0011e-04, 1.6362e-04, 5.1137e-04, 1.5669e-04,\n 1.0316e-04, 1.2055e-04, 1.8425e-04, 1.4736e-04, 5.2887e-05, 2.6595e-04,\n 1.0632e-04, 8.9105e-05, 1.4673e-04, 1.8124e-04, 2.2100e-04, 5.7702e-05,\n 2.0742e-04, 2.4241e-04, 1.5109e-04, 2.4639e-04, 3.0999e-04, 2.8575e-04,\n 2.7941e-04, 9.3645e-05, 1.3695e-04, 9.3745e-05, 1.0506e-04, 8.3428e-05,\n 3.1674e-04, 1.0812e-04, 1.5229e-04, 8.5313e-05, 2.1748e-04, 1.7515e-04,\n 1.1103e-04, 5.7565e-04, 1.2126e-04, 1.1690e-04, 2.0746e-04, 2.2280e-04,\n 1.0262e-04, 9.2259e-05, 2.6066e-04, 2.8716e-04, 1.0439e-04, 3.9211e-04,\n 2.3232e-04, 1.3652e-04, 7.3281e-05, 1.4089e-04, 1.5639e-04, 7.8608e-05,\n 2.7493e-04, 1.5180e-04, 1.4385e-04, 2.1646e-04, 3.2443e-04, 6.6157e-05,\n 3.1148e-04, 6.9207e-05, 7.6211e-05, 4.1581e-04, 1.6624e-04, 2.0300e-04,\n 1.6920e-04, 2.7138e-04, 2.8777e-04, 3.3334e-01, 1.5671e-04, 1.1741e-04,\n 3.0472e-04, 1.0353e-04], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(10016.)",
22
+ "exp_avg": "tensor([ 2.2343e-03, 2.9648e-03, 2.3136e-03, -1.2029e-02, -6.8101e-03,\n -5.0743e-03, 3.0855e-03, 2.0534e-03, -4.7125e-03, 1.8780e-03,\n 4.3007e-03, 1.4204e-05, 4.4408e-03, -8.7674e-05, -1.6956e-03,\n -8.6901e-03, 7.4716e-03, 2.2880e-03, 8.9471e-03, -4.4506e-03,\n 8.5033e-03, -1.3727e-02, -1.4098e-02, -5.1476e-03, 4.2040e-03,\n 3.0455e-03, -6.6591e-03, 3.1945e-03, -1.1155e-03, 3.0527e-04,\n -1.5431e-03, -4.0676e-04, 3.4025e-04, -2.0233e-03, -2.7749e-03,\n -1.7976e-03, -9.6173e-03, 4.9957e-03, 4.7707e-03, 3.5157e-03,\n -6.3425e-03, 6.5959e-04, -5.9545e-04, 2.5299e-03, -2.8100e-03,\n -1.9600e-04, 8.4590e-04, 4.1958e-03, 5.4795e-03, -6.0368e-03,\n -4.6910e-03, -3.0229e-03, 5.5020e-03, -1.0803e-03, -4.0281e-04,\n 6.6709e-03, 1.6221e-03, -6.6036e-03, 6.0849e-04, -5.8969e-03,\n 4.0422e-03, 2.8431e-04, -3.1338e-03, -1.3937e-03, 3.9006e-03,\n 5.4319e-03, -1.9179e-03, -2.4110e-03, 3.8069e-03, 4.6746e-03,\n -1.6502e-03, -3.1291e-04, 6.4668e-03, 2.1613e-03, 2.1594e-03,\n -2.3114e-03, 3.6070e-03, -7.7532e-04, -6.5108e-04, -2.7819e-03,\n 3.0938e-03, -3.5281e-03, 2.6690e-03, 6.7375e-03, -2.7094e-03,\n 4.3832e-03, -2.0254e-03, 1.1912e-03, 6.1399e-03, 9.3371e-04,\n 3.0112e-03, 7.5379e-03, -2.3503e-04, 7.9615e-03, -4.7393e-03,\n -2.7205e-03, 1.0010e-03, 1.8975e-04, -1.9497e-03, 3.5043e-03,\n 2.3917e-03, -1.9760e-03, -2.5322e-03, 5.1994e-03, 3.5288e-03,\n -6.9291e-03, 6.2918e-03, 2.3425e-03, -1.8210e-04, 7.2289e-04,\n 4.5938e-03, 2.5275e-04, 1.1123e-03, 2.4106e-04, -2.1422e-04,\n 4.8342e-03, -1.1172e-03, 1.3028e-03, 5.7473e-03, 1.8439e-03,\n 1.8120e-03, 2.1264e-03, -3.5271e-04, -3.7502e-04, -1.1107e-02,\n 5.6660e-03, 3.6888e-03, -1.7682e-03, -2.0655e-03, 2.0232e-03,\n -1.3289e-03, -1.1837e-02, 3.9142e-03, 8.0482e-04, -7.0730e-04,\n -4.0453e-03, -3.3461e-03, -9.2799e-03, -3.4326e-04, -7.8086e-03,\n 2.9951e-03, -2.2976e-03, -6.2480e-03, -5.5985e-04, 5.2672e-03,\n -5.1053e-03, -6.8566e-04, -2.4248e-03, 4.6377e-03, -1.8438e-03,\n 5.8076e-04, -3.2811e-03, -1.0459e-02, -1.7582e-04, -3.8483e-03,\n -3.3203e-03, -4.2108e-03, 4.1738e-03, -7.9138e-03, -2.3451e-03,\n -6.9127e-04, 7.3557e-03, 2.9754e-03, -1.9989e-04, -6.0420e-03,\n 7.9525e-04, 1.9735e-04, 2.7386e-03, 1.7816e-03, 2.9070e-03,\n -4.3518e-03, -3.5982e-03, -8.2148e-04, 5.5825e-03, 1.4812e-03,\n 2.4916e-03, 3.0065e-03, 4.9127e-03, -3.3485e-03, 2.9601e-03,\n -3.9668e-03, -9.1989e-03, -1.1101e-03, 4.8644e-03, -6.1561e-03,\n 1.2268e-03, 3.0635e-03, -6.2264e-03, -2.4909e-03, -4.3818e-03,\n 4.7117e-03, 1.8048e-03, 3.5282e-03, -1.7461e-03, 5.9275e-03,\n 4.1136e-03, -2.7045e-03, -1.3852e-03, 1.7367e-03, 3.4480e-03,\n 1.2907e-03, -7.3187e-03, 2.7814e-03, -2.2735e-03, -5.4299e-04,\n 5.0674e-04, 1.6185e-04, 1.2828e-03, 3.8466e-03, -4.3199e-03,\n -5.2889e-03, 1.3178e-03, 2.6778e-04, 7.6381e-03, 4.3479e-04,\n 1.8193e-04, 4.0857e-03, -3.0023e-04, -3.2355e-03, -2.9138e-03,\n 4.4785e-03, 8.9910e-03, 2.1476e-03, -9.9287e-04, 7.5865e-03,\n -9.7689e-03, 6.7345e-03, -1.4507e-03, 4.9670e-03, 4.3101e-04,\n 2.5780e-03, 5.2890e-03, -1.8299e-03, 2.2588e-03, 7.0923e-03,\n -6.4301e-03, 1.9967e-03, 4.2070e-03, 4.4025e-03, -7.4346e-04,\n -5.7926e-03, -3.5324e-03, 9.8615e-04, 1.1966e-04, 2.0349e-03,\n 2.1912e-03, 7.2792e-03, -3.6345e-03, -1.5901e-03, 2.8734e-03,\n 5.5629e-03, 1.4856e-03, 2.9099e-04, 4.9176e-03, 3.8930e-03,\n -1.0383e-02, 2.8639e-03, 4.4455e-03, -5.4779e-04, 2.0140e-04,\n 3.4286e-03, 2.1559e-03, -1.5102e-03, -2.4368e-03, 5.7675e-03,\n -6.1446e-03, -5.4093e-03, 1.1886e-03, 1.6290e-03, -3.3095e-03,\n 2.8563e-03, 1.2244e-04, 7.3054e-03, 2.1065e-03, 7.8855e-03,\n 2.1908e-03, -2.3079e-03, 1.8839e-03, -7.7964e-04, -4.2033e-04,\n 4.7613e-03, -1.2770e-03, 3.7686e-03, 4.4739e-03, 4.5911e-04,\n 5.8082e-03, 5.2877e-03, -3.1642e-04, -5.4214e-04, 3.0599e-03,\n 3.8425e-03, -2.9150e-03, -1.9297e-03, 2.8148e-03, 4.2700e-04,\n 6.3727e-03, 4.6701e-03, -9.0194e-03, 6.4385e-03, 4.3695e-03,\n 1.7908e-03, 7.0783e-03, 1.6218e-03, 2.3589e-03, 5.5157e-03,\n -1.7000e-03, -2.1735e-03, -2.4940e-02, -7.1424e-03, 5.4571e-03,\n 8.9585e-04, 1.8634e-03, -3.8960e-03, 6.7149e-03, 1.9155e-03,\n 4.9626e-03, -2.1690e-03, 4.1487e-03, -5.8654e-03, 1.9429e-03,\n -1.8484e-03, -9.9403e-03, 2.3375e-04, 2.6402e-04, 7.8511e-03,\n 4.8038e-03, 4.9830e-04, -2.1007e-04, -2.5316e-03, -9.3044e-03,\n -2.2994e-04, 3.0943e-03, 3.5758e-03, -5.6337e-03, 5.0022e-04,\n 5.0607e-05, -6.2003e-03, -2.9319e-03, 2.1250e-03, 1.4555e-03,\n -1.2173e-03, 6.8447e-03, -1.7994e-02, 2.8860e-03, 7.4199e-04,\n 3.7063e-03, 1.5736e-03, 5.2650e-03, 8.6541e-03, -6.7081e-05,\n 1.1621e-03, 1.4348e-03, -3.6009e-03, 1.3925e-03, 6.7595e-04,\n -1.2141e-03, 1.5981e-03, 1.8083e-04, -1.9401e-03, 1.2613e-03,\n -1.1765e-03, -2.3795e-04, 1.1086e-03, 9.0569e-04, -5.7504e-04,\n -5.3191e-03, 2.3930e-03, -4.0840e-03, -3.7080e-04, 1.8876e-04,\n 1.4494e-03, -8.2883e-04, 5.6719e-03, -5.0917e-03, -6.0110e-04,\n -6.4335e-04, 8.9498e-04, 6.9070e-03, 6.0881e-03, 3.5399e-03,\n -9.8796e-04, 6.9395e-03, 1.8927e-03, -3.9252e-04, 2.8111e-03,\n 2.9601e-03, 4.7562e-03, 2.6584e-04, -2.6951e-03, 5.3063e-03,\n 8.1210e-03, -2.9369e-03, 2.9377e-05, 3.3956e-03, 3.9245e-03,\n 3.5222e-03, -1.3347e-03, -4.4184e-03, -7.8812e-03, -4.5048e-03,\n 1.4448e-03, -1.8739e-03, -5.9781e-03, -2.5950e-03, -3.0431e-03,\n 2.9734e-03, 1.3288e-03, -4.1254e-03, 5.2756e-03, -2.1367e-03,\n -3.6423e-03, 2.0072e-03, -8.8297e-04, -4.1694e-04, -2.0814e-02,\n 1.4755e-04, 1.5693e-03, 4.4171e-03, -7.2721e-04, 4.4566e-03,\n 8.6885e-04, -1.1095e-02, -7.9510e-03, 2.9101e-03, -6.5852e-03,\n 6.3894e-03, -3.5018e-04, -8.4567e-04, -1.1049e-03, 2.4588e-03,\n 1.3189e-04, 1.3470e-02, -6.8804e-03, 2.1526e-03, -9.3119e-03,\n -5.8653e-04, 1.1418e-03, 8.4844e-03, 5.7694e-03, 6.2388e-03,\n 2.8920e-03, -8.4639e-03, -3.0648e-03, -2.9600e-03, -3.5338e-03,\n 9.6215e-03, 4.5924e-03, 6.4164e-03, 3.1124e-03, 2.4935e-03,\n -1.0152e-03, 2.9732e-03, -3.8660e-03, 1.7370e-03, -3.3074e-03,\n -1.0539e-03, 8.1356e-03, 7.0475e-03, -7.6105e-03, -6.6368e-03,\n 6.5422e-04, 1.1708e-03, 8.7724e-03, 6.2590e-03, 4.0336e-03,\n -1.1962e-03, 3.3759e-03, -2.0346e-03, -1.1481e-03, -5.2331e-03,\n 1.3798e-03, -2.0501e-03, 2.8123e-04, -4.6198e-03, -3.0643e-03,\n 3.6644e-03, -1.0167e-02, -1.5491e-03, 3.4962e-03, 7.0967e-03,\n -5.3655e-03, -9.4554e-03, 2.2844e-04, -1.5834e-03, 5.9350e-04,\n 1.0843e-02, 1.1880e-03, -1.1803e-03, 2.3009e-03, -5.8615e-04,\n -1.8276e-03, -1.8342e-03, -1.5911e-03, 9.5384e-03, -1.4258e-02,\n -4.7428e-04, -2.9326e-03, -7.2962e-04, 2.6976e-04, 7.4247e-04,\n -4.1898e-03, -3.2035e-03, -1.2785e-03, -8.6013e-03, 2.5878e-03,\n 3.3088e-03, 2.0808e-03, 1.3706e-02, -3.9166e-05, 4.9333e-03,\n 1.2107e-03, -5.5478e-05], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([1.4641e-04, 1.3761e-04, 1.3727e-04, 1.3585e-04, 1.9105e-04, 1.6445e-04,\n 1.7384e-04, 1.0801e-04, 1.1237e-04, 1.2136e-04, 2.0517e-04, 1.0009e-04,\n 1.8804e-04, 1.3068e-04, 1.3120e-04, 1.9615e-04, 1.5708e-04, 1.1981e-04,\n 1.1580e-04, 1.8630e-04, 1.0436e-04, 1.9222e-04, 1.3172e-04, 1.9819e-04,\n 1.3187e-04, 1.9587e-04, 1.6053e-04, 2.0027e-04, 1.6981e-04, 1.7757e-04,\n 1.5285e-04, 2.7389e-04, 1.3821e-04, 2.7879e-04, 1.5548e-04, 1.3733e-04,\n 1.5730e-04, 1.2634e-04, 1.6651e-04, 2.7871e-04, 1.9113e-04, 2.0522e-04,\n 2.3197e-04, 1.3895e-04, 1.6514e-04, 1.9183e-04, 1.6282e-04, 1.4446e-04,\n 1.2709e-04, 1.8236e-04, 1.8630e-04, 1.6784e-04, 1.5256e-04, 1.3112e-04,\n 1.5506e-04, 1.7276e-04, 8.6129e-05, 1.3182e-04, 2.2028e-04, 1.6478e-04,\n 1.5545e-04, 1.4444e-04, 2.7434e-04, 1.4479e-04, 2.6968e-04, 2.7214e-04,\n 1.2544e-04, 1.2996e-04, 1.2820e-04, 1.2910e-04, 1.7967e-04, 1.4446e-04,\n 1.3565e-04, 1.9815e-04, 1.7469e-04, 1.7763e-04, 1.6200e-04, 1.6557e-04,\n 1.2387e-04, 1.2059e-04, 1.4497e-04, 2.4345e-04, 1.3582e-04, 1.3913e-04,\n 2.1815e-04, 1.7766e-04, 2.0470e-04, 1.6095e-04, 1.1624e-04, 1.7703e-04,\n 2.0397e-04, 2.1774e-04, 9.3148e-05, 1.7964e-04, 1.9310e-04, 1.6607e-04,\n 1.1713e-04, 1.4894e-04, 1.3727e-04, 8.5529e-05, 1.0138e-04, 1.5970e-04,\n 1.1503e-04, 1.5387e-04, 1.1839e-04, 1.4608e-04, 1.9999e-04, 1.1384e-04,\n 1.6850e-04, 1.6841e-04, 1.8147e-04, 9.4110e-05, 2.0166e-04, 1.0002e-04,\n 1.2433e-04, 1.9806e-04, 1.4669e-04, 1.2615e-04, 2.4717e-04, 2.0393e-04,\n 1.3898e-04, 1.6943e-04, 2.3117e-04, 2.0378e-04, 2.1391e-04, 1.9218e-04,\n 9.2523e-05, 1.1253e-04, 1.5122e-04, 2.2117e-04, 1.5968e-04, 1.6003e-04,\n 1.6016e-04, 1.6090e-04, 1.3846e-04, 1.7811e-04, 1.1874e-04, 1.3976e-04,\n 1.6057e-04, 2.7515e-04, 1.8734e-04, 9.8338e-05, 1.3244e-04, 1.5291e-04,\n 2.0244e-04, 1.7217e-04, 1.3295e-04, 1.5346e-04, 2.1746e-04, 1.8123e-04,\n 1.4605e-04, 2.8362e-04, 1.7257e-04, 2.2621e-04, 1.1509e-04, 2.4152e-04,\n 1.2708e-04, 2.2545e-04, 1.6728e-04, 8.7226e-05, 1.1911e-04, 1.9011e-04,\n 1.6818e-04, 1.6958e-04, 1.5393e-04, 1.2734e-04, 1.9937e-04, 1.2852e-04,\n 1.2843e-04, 1.7726e-04, 1.8152e-04, 1.4732e-04, 1.6818e-04, 1.7821e-04,\n 1.3851e-04, 1.2804e-04, 1.7446e-04, 1.1224e-04, 1.6934e-04, 1.3436e-04,\n 2.8745e-04, 1.7571e-04, 1.7619e-04, 1.3465e-04, 1.8298e-04, 1.3384e-04,\n 1.5198e-04, 1.2905e-04, 1.9151e-04, 1.5958e-04, 1.9758e-04, 9.6969e-05,\n 1.1326e-04, 1.7624e-04, 1.6070e-04, 1.4948e-04, 2.0894e-04, 1.0095e-04,\n 1.7920e-04, 1.4138e-04, 1.4412e-04, 1.2294e-04, 1.6999e-04, 1.2789e-04,\n 1.6475e-04, 1.6107e-04, 2.4898e-04, 1.1229e-04, 1.3250e-04, 1.2416e-04,\n 2.2792e-04, 1.7984e-04, 2.1316e-04, 1.3656e-04, 1.5058e-04, 1.8962e-04,\n 1.3185e-04, 2.1374e-04, 1.9033e-04, 9.8617e-05, 1.7655e-04, 1.9114e-04,\n 1.2029e-04, 1.0211e-04, 2.0577e-04, 2.0868e-04, 1.7025e-04, 1.2621e-04,\n 1.5964e-04, 1.2694e-04, 1.1250e-04, 1.2244e-04, 1.3894e-04, 1.4699e-04,\n 1.6354e-04, 1.3164e-04, 2.0177e-04, 1.7045e-04, 1.2230e-04, 1.6317e-04,\n 2.4035e-04, 1.4615e-04, 1.7514e-04, 2.2364e-04, 2.2690e-04, 1.7846e-04,\n 1.6013e-04, 1.7557e-04, 1.5730e-04, 1.9023e-04, 1.9117e-04, 2.2414e-04,\n 1.2208e-04, 1.5512e-04, 1.6680e-04, 1.8010e-04, 1.8153e-04, 2.2028e-04,\n 1.8873e-04, 2.2223e-04, 2.0318e-04, 1.9425e-04, 1.1791e-04, 2.6656e-04,\n 1.6449e-04, 1.3471e-04, 2.1637e-04, 1.7952e-04, 1.8269e-04, 1.5147e-04,\n 1.7275e-04, 1.5511e-04, 1.8314e-04, 1.0591e-04, 1.4949e-04, 1.2092e-04,\n 1.5163e-04, 1.3215e-04, 1.5295e-04, 2.7235e-04, 1.1748e-04, 2.0847e-04,\n 1.7528e-04, 1.8873e-04, 1.3722e-04, 1.2508e-04, 1.2137e-04, 1.6103e-04,\n 1.5172e-04, 2.6339e-04, 1.7433e-04, 1.6571e-04, 1.5108e-04, 1.4255e-04,\n 1.2829e-04, 1.7702e-04, 1.2879e-04, 1.8916e-04, 2.3756e-04, 2.2017e-04,\n 1.1435e-04, 1.3940e-04, 1.4342e-04, 1.5668e-04, 2.2496e-04, 1.3188e-04,\n 1.5656e-04, 1.9522e-04, 2.0819e-04, 1.3168e-04, 2.3949e-04, 1.9560e-04,\n 1.8175e-04, 2.4673e-04, 1.2972e-04, 1.8135e-04, 1.4730e-04, 1.8667e-04,\n 1.5208e-04, 1.1672e-04, 1.4734e-04, 1.5589e-04, 1.2469e-04, 1.6775e-04,\n 1.2113e-04, 1.8457e-04, 2.1150e-04, 2.0880e-04, 2.0337e-04, 2.0121e-04,\n 1.1122e-04, 1.4457e-04, 1.0590e-04, 1.4740e-04, 2.0994e-04, 9.3246e-05,\n 1.1340e-04, 1.3408e-04, 1.2203e-04, 1.5763e-04, 1.4647e-04, 1.6795e-04,\n 2.6117e-04, 1.3561e-04, 1.1042e-04, 1.4569e-04, 1.1147e-04, 1.2726e-04,\n 1.5252e-04, 2.0957e-04, 1.5521e-04, 1.6689e-04, 1.0954e-04, 1.1632e-04,\n 1.1102e-04, 1.7254e-04, 1.4534e-04, 1.6942e-04, 1.0397e-04, 1.5797e-04,\n 1.7075e-04, 2.8457e-04, 1.3978e-04, 2.6567e-04, 1.4001e-04, 1.5883e-04,\n 2.3945e-04, 9.5864e-05, 1.4315e-04, 1.8031e-04, 1.0347e-04, 1.2796e-04,\n 1.1092e-04, 1.6475e-04, 1.4112e-04, 1.8480e-04, 1.1621e-04, 1.7462e-04,\n 2.3703e-04, 1.5776e-04, 9.2371e-05, 1.6071e-04, 1.4849e-04, 1.2550e-04,\n 1.4420e-04, 1.6036e-04, 1.4187e-04, 1.9495e-04, 1.5033e-04, 2.6768e-04,\n 1.4079e-04, 1.2749e-04, 1.4502e-04, 1.2419e-04, 1.6334e-04, 1.6139e-04,\n 1.2262e-04, 1.3804e-04, 1.9047e-04, 3.1826e-04, 1.6093e-04, 1.1162e-04,\n 2.0040e-04, 8.2314e-05, 2.0105e-04, 1.7374e-04, 1.2949e-04, 1.4603e-04,\n 1.4475e-04, 1.7155e-04, 1.2615e-04, 1.2722e-04, 1.3101e-04, 1.6064e-04,\n 1.7145e-04, 1.3395e-04, 1.3814e-04, 2.1991e-04, 1.7057e-04, 2.2979e-04,\n 1.1569e-04, 1.3157e-04, 1.7923e-04, 2.1499e-04, 1.8193e-04, 1.5136e-04,\n 1.5919e-04, 1.6450e-04, 8.8687e-05, 1.8271e-04, 1.5671e-04, 1.9656e-04,\n 2.5104e-04, 1.4778e-04, 1.3413e-04, 1.0380e-04, 1.6108e-04, 2.6103e-04,\n 1.4323e-04, 1.3599e-04, 2.9462e-04, 1.6252e-04, 2.1777e-04, 1.8527e-04,\n 2.1072e-04, 1.1824e-04, 1.0841e-04, 1.2249e-04, 1.2919e-04, 1.5707e-04,\n 1.3447e-04, 1.1818e-04, 1.4796e-04, 1.6545e-04, 1.9059e-04, 1.1297e-04,\n 2.4913e-04, 1.3776e-04, 1.5456e-04, 1.8664e-04, 1.4906e-04, 2.1633e-04,\n 2.1337e-04, 1.3139e-04, 2.2049e-04, 9.0765e-05, 1.1807e-04, 1.5806e-04,\n 2.4066e-04, 1.8354e-04, 1.3344e-04, 1.1386e-04, 1.5806e-04, 1.6252e-04,\n 1.8802e-04, 1.8010e-04, 2.0699e-04, 9.8091e-05, 1.6680e-04, 2.1276e-04,\n 1.5978e-04, 1.5721e-04, 1.9126e-04, 3.6074e-04, 1.4494e-04, 2.3075e-04,\n 1.5427e-04, 2.1582e-04, 1.1421e-04, 1.5037e-04, 1.3689e-04, 1.0764e-04,\n 1.8222e-04, 1.4673e-04, 1.7729e-04, 2.2892e-04, 1.7091e-04, 1.4808e-04,\n 1.9978e-04, 1.1424e-04, 1.2003e-04, 1.4425e-04, 1.1207e-04, 1.6074e-04,\n 1.1773e-04, 1.7170e-04, 2.1541e-04, 8.8129e-04, 1.9252e-04, 2.3297e-04,\n 1.9567e-04, 1.4372e-04], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(10016.)",
27
+ "exp_avg": "tensor([[ 1.7407e-04, 3.4289e-05, -3.1187e-05, ..., 7.6575e-05,\n -1.2312e-04, -9.9653e-05],\n [-1.6279e-04, -2.0368e-05, -3.9963e-05, ..., 4.5775e-05,\n 4.0355e-04, 4.7058e-05],\n [-2.0378e-04, -8.2900e-05, -5.0687e-05, ..., -1.8993e-04,\n 3.6464e-05, -2.4459e-04],\n ...,\n [ 1.9108e-04, -2.3998e-04, -5.0111e-04, ..., 3.0691e-04,\n -1.3810e-05, 4.0414e-05],\n [-4.6076e-05, 8.0314e-05, 5.1396e-05, ..., 5.2625e-06,\n -1.8147e-07, -4.1366e-05],\n [-2.3686e-04, 2.2101e-05, -3.1860e-04, ..., -1.9213e-05,\n -3.3608e-04, 9.5465e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[2.8062e-07, 1.3586e-07, 1.7940e-07, ..., 1.9810e-07, 2.8173e-07,\n 2.0325e-07],\n [4.6857e-07, 2.4735e-07, 2.5250e-07, ..., 5.6012e-07, 5.1566e-07,\n 7.3924e-07],\n [4.1114e-07, 3.4571e-07, 3.0135e-07, ..., 5.2743e-07, 4.8036e-07,\n 5.9528e-07],\n ...,\n [4.3943e-07, 6.0442e-07, 3.5237e-07, ..., 4.5987e-07, 6.3401e-07,\n 5.3771e-07],\n [2.7390e-07, 4.1264e-07, 3.4077e-07, ..., 6.3731e-07, 6.4171e-07,\n 5.7520e-07],\n [5.0612e-07, 4.6748e-07, 3.2607e-07, ..., 5.0182e-07, 5.8638e-07,\n 5.4558e-07]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(10016.)",
32
+ "exp_avg": "tensor([[-2.7842e-05, -2.9783e-06, 7.9586e-05, ..., -9.2905e-05,\n -2.3171e-05, 7.7654e-05],\n [-8.8189e-05, 1.5809e-05, -6.4336e-06, ..., 1.3025e-04,\n 9.6084e-05, 7.7989e-05],\n [-1.6093e-04, -7.4145e-05, -1.6655e-06, ..., -9.1468e-05,\n 1.2023e-05, -1.7912e-04],\n ...,\n [-6.7943e-05, 3.0939e-05, -2.5339e-04, ..., 7.9578e-05,\n -2.0732e-05, 1.4584e-04],\n [ 7.8988e-05, 3.4734e-05, 8.0552e-05, ..., -8.1892e-05,\n 1.6680e-04, 1.3395e-04],\n [ 9.5477e-06, 4.3196e-06, 2.7438e-04, ..., 5.7052e-05,\n 9.1633e-05, -6.4724e-05]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[7.2073e-08, 4.4434e-08, 6.4748e-08, ..., 4.9981e-08, 1.0894e-07,\n 7.0026e-08],\n [1.4621e-07, 8.1970e-08, 1.1477e-07, ..., 1.6493e-07, 1.7621e-07,\n 2.4164e-07],\n [1.4042e-07, 1.2253e-07, 1.1564e-07, ..., 2.2682e-07, 1.7833e-07,\n 2.4486e-07],\n ...,\n [1.3787e-07, 2.2425e-07, 2.3411e-07, ..., 1.7992e-07, 2.6487e-07,\n 1.5554e-07],\n [1.4361e-07, 1.3411e-07, 1.1772e-07, ..., 2.1580e-07, 2.5956e-07,\n 2.1208e-07],\n [1.3165e-07, 1.3197e-07, 1.6200e-07, ..., 1.7715e-07, 2.5392e-07,\n 2.8700e-07]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(10016.)",
37
+ "exp_avg": "tensor([ 0.0009, -0.0009], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([7.8672e-06, 7.8672e-06], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 9.639601130971382e-05,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.001,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 9.639601130971382e-05,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.001,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 9.639601130971382e-05,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.001,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 4.865025990345063e-05,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.0005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 8,
137
+ "base_lrs": [
138
+ 0.001,
139
+ 0.001,
140
+ 0.001,
141
+ 0.0005
142
+ ],
143
+ "last_epoch": 8,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 9.639601130971382e-05,
149
+ 9.639601130971382e-05,
150
+ 9.639601130971382e-05,
151
+ 4.865025990345063e-05
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 71.674,
156
+ "best_epoch": 7,
157
+ "scale_accuracies": {
158
+ "256": 71.166,
159
+ "512": 71.628
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3,
166
+ 4,
167
+ 5,
168
+ 6,
169
+ 7,
170
+ 8
171
+ ],
172
+ "train_loss": [
173
+ 5.60248446921571,
174
+ 4.156974341351384,
175
+ 3.7702821485531595,
176
+ 3.570641661223512,
177
+ 3.4472002215659656,
178
+ 3.3609565016560663,
179
+ 3.300025675433893,
180
+ 3.2499928289709
181
+ ],
182
+ "train_acc": [
183
+ 63.38018384800733,
184
+ 69.48813074329888,
185
+ 70.23393515443342,
186
+ 70.76774534467404,
187
+ 71.32536195515495,
188
+ 71.77877669343653,
189
+ 72.23593801588707,
190
+ 72.61926040867428
191
+ ],
192
+ "val_acc": [
193
+ 67.966,
194
+ 69.586,
195
+ 69.866,
196
+ 70.47,
197
+ 70.854,
198
+ 71.1,
199
+ 71.388,
200
+ 71.674
201
+ ],
202
+ "scale_accs": {
203
+ "256": [
204
+ 66.908,
205
+ 68.868,
206
+ 69.194,
207
+ 69.78,
208
+ 70.214,
209
+ 70.592,
210
+ 70.794,
211
+ 71.166
212
+ ],
213
+ "512": [
214
+ 67.774,
215
+ 69.268,
216
+ 69.844,
217
+ 70.366,
218
+ 70.82,
219
+ 71.088,
220
+ 71.292,
221
+ 71.628
222
+ ]
223
+ },
224
+ "lr": [
225
+ 0.0009755527298894294,
226
+ 0.0009046039886902864,
227
+ 0.0007940987335200904,
228
+ 0.0006548539886902864,
229
+ 0.0005005000000000001,
230
+ 0.0003461460113097139,
231
+ 0.00020690126647990973,
232
+ 9.639601130971382e-05
233
+ ]
234
+ }
235
+ },
236
+ "train_config": {
237
+ "name": "david_training",
238
+ "run_id": "20251012_141246",
239
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
240
+ "model_variant": "clip_vit_laion_b32",
241
+ "num_classes": 1000,
242
+ "preset": "small_fast",
243
+ "custom_config_path": null,
244
+ "num_classes_override": null,
245
+ "use_belly_override": null,
246
+ "belly_expand_override": null,
247
+ "progressive_training_override": false,
248
+ "scale_warmup_epochs_override": null,
249
+ "num_epochs": 10,
250
+ "batch_size": 1024,
251
+ "learning_rate": 0.001,
252
+ "weight_decay": 1e-05,
253
+ "warmup_epochs": 3,
254
+ "use_rose_loss": true,
255
+ "rose_initial_weight": 0.1,
256
+ "rose_max_weight": 0.5,
257
+ "rose_weight_schedule": "adaptive",
258
+ "use_cayley_loss": false,
259
+ "cayley_weight": 0.001,
260
+ "scale_loss_balance": null,
261
+ "use_mixed_precision": true,
262
+ "gradient_clip": 10.0,
263
+ "scheduler_type": "cosine_restarts",
264
+ "min_lr": 1e-06,
265
+ "freeze_strategy": "never",
266
+ "freeze_threshold": 90.0,
267
+ "unfreeze_on_plateau": true,
268
+ "patience": 10,
269
+ "track_gradients": true,
270
+ "gradient_scale_threshold": 1e-05,
271
+ "gradient_scale_multiplier": 10.0,
272
+ "log_interval": 50,
273
+ "val_interval": 1,
274
+ "save_interval": 5,
275
+ "log_fusion_weights": true,
276
+ "log_loss_components": true,
277
+ "save_format": "safetensors",
278
+ "hf_repo": "AbstractPhil/gated-david",
279
+ "upload_to_hub": true,
280
+ "base_dir": "./david_training",
281
+ "num_workers": 10,
282
+ "pin_memory": true,
283
+ "prefetch_factor": 4,
284
+ "persistent_workers": true
285
+ }
286
+ }