Update best_model_acc64.76_metadata.json - Run 20251012_231445
Browse files
weights/David-fully_shared-weighted_sum/20251012_231445/best_model_acc64.76_metadata.json
ADDED
|
@@ -0,0 +1,258 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 2,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(11262.)",
|
| 7 |
+
"exp_avg": "tensor([[ 1.8333e-05, -6.7874e-05, -2.9129e-05, ..., -4.4069e-05,\n -4.4142e-06, 2.2160e-05],\n [-1.7258e-04, 3.3545e-04, 1.4173e-04, ..., 7.7682e-05,\n -1.1221e-04, -1.7556e-04],\n [ 8.7062e-06, 1.2141e-05, 6.6850e-05, ..., 1.0181e-04,\n 4.0209e-05, 3.9069e-05],\n ...,\n [-2.1505e-05, 2.2237e-05, -1.2414e-06, ..., 4.5088e-05,\n -9.2726e-06, 3.3886e-05],\n [-3.3952e-05, 8.7274e-05, 6.4996e-05, ..., 4.9540e-06,\n -2.3076e-05, -4.8487e-05],\n [-9.7660e-05, 2.4707e-05, 3.6618e-05, ..., -1.0187e-04,\n -7.8792e-05, 2.7381e-07]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[2.4238e-08, 6.3620e-08, 3.6030e-08, ..., 4.1387e-08, 3.0139e-08,\n 1.6520e-08],\n [7.8705e-08, 1.2609e-07, 6.0073e-08, ..., 4.6475e-08, 6.5801e-08,\n 2.9230e-08],\n [1.6652e-07, 2.2544e-07, 1.2458e-07, ..., 9.6627e-08, 4.8529e-08,\n 7.7994e-08],\n ...,\n [3.1778e-08, 1.3462e-07, 3.2063e-08, ..., 2.4434e-08, 1.7453e-08,\n 1.4653e-08],\n [4.2665e-08, 6.1700e-07, 5.1992e-08, ..., 1.1810e-07, 2.7608e-08,\n 3.9445e-08],\n [7.3612e-08, 1.1030e-07, 3.8194e-08, ..., 1.2868e-07, 3.5792e-08,\n 3.5715e-08]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(11262.)",
|
| 12 |
+
"exp_avg": "tensor([ 5.2190e-04, -5.3244e-03, -7.2162e-04, -4.4188e-04, -2.8402e-04,\n -1.1323e-03, -1.5161e-03, 1.9657e-03, -3.6978e-04, 1.2581e-03,\n 1.8668e-03, -2.8657e-03, 1.1186e-03, -1.1675e-03, 1.7468e-03,\n 7.0451e-04, 1.9712e-03, 3.8992e-03, 1.9166e-03, 3.7829e-03,\n 1.5301e-03, -3.9328e-04, 4.3162e-03, -9.7622e-04, -5.1182e-05,\n 3.9106e-04, -3.3839e-03, -1.7580e-03, -1.0579e-03, 4.6107e-04,\n -3.1185e-04, 3.6134e-04, 2.6319e-05, 1.5673e-03, -2.8647e-03,\n -3.8440e-03, 3.3054e-03, 2.1810e-03, -3.6950e-03, -1.6934e-03,\n -1.4479e-03, -6.6224e-04, -1.1566e-06, -1.5537e-03, 3.2961e-04,\n -2.0103e-04, -6.9785e-04, -3.1431e-04, -6.1658e-04, 1.0373e-03,\n 2.0098e-04, 1.8853e-03, 8.4466e-03, 4.2555e-04, -4.3001e-04,\n -2.0311e-04, -1.5237e-03, 1.1850e-04, 4.6682e-04, 1.6130e-03,\n 2.3578e-04, -1.2601e-03, -2.2867e-03, -4.4512e-03, 9.4797e-05,\n -2.6233e-03, 8.4706e-04, -1.3981e-03, -2.4683e-03, 9.7103e-04,\n -9.5464e-04, 1.8775e-03, 2.1358e-03, 5.7291e-04, -2.2054e-03,\n 1.2448e-03, -5.6505e-04, 1.2288e-03, -1.5009e-03, 2.1695e-03,\n -2.3113e-03, -2.1906e-04, 1.0543e-03, 3.2646e-03, -7.7267e-04,\n -6.6507e-04, 2.5271e-03, 4.2252e-05, -2.2760e-03, 1.1174e-03,\n 7.2311e-04, 3.0848e-03, -1.6448e-03, 3.0094e-03, 4.1130e-04,\n -5.4281e-03, 1.4495e-03, -2.8003e-03, 1.8764e-03, 1.6831e-03,\n 8.3601e-04, 3.6257e-03, -1.7786e-03, -1.7281e-04, 2.7749e-03,\n 7.0219e-04, -9.1261e-04, -1.1039e-03, 7.9247e-04, 1.5190e-04,\n -1.0789e-03, -1.2405e-03, -1.8675e-03, 4.9006e-04, 1.7491e-03,\n -1.0499e-03, 4.0036e-04, 6.8533e-04, 2.2103e-03, -3.6094e-03,\n 7.8523e-04, -4.1126e-04, 3.7811e-03, -1.0181e-03, 1.7698e-03,\n -2.2238e-03, 3.5108e-04, -3.2600e-03, 3.2644e-03, 1.1498e-03,\n -6.4146e-04, -6.3976e-04, -8.0477e-04, 1.0446e-03, 5.8788e-03,\n -1.2535e-03, -2.2350e-03, -1.2982e-03, -3.5407e-03, -3.1151e-03,\n 7.3988e-04, 2.4888e-03, -3.7454e-04, -1.7383e-03, -1.8798e-03,\n -1.2256e-04, 8.4496e-04, 1.1558e-03, 1.4876e-03, 9.7790e-04,\n -3.7379e-03, -5.2512e-04, 5.4256e-04, 1.5047e-04, -7.0406e-04,\n 2.1967e-03, -2.6779e-04, -1.2155e-03, -1.4446e-03, 6.1398e-04,\n 1.0713e-03, 1.8314e-04, 1.7152e-03, 4.6829e-04, -1.7911e-03,\n -1.4155e-03, 3.2979e-03, 2.8972e-04, -2.1944e-03, -1.3407e-03,\n 5.2122e-04, -7.8490e-04, -4.6088e-04, -1.6104e-03, -2.3636e-05,\n -1.8738e-03, -2.4248e-04, -4.3436e-03, 2.7452e-03, 2.0438e-03,\n -2.7489e-04, 2.9571e-03, 1.1464e-03, 3.7472e-03, 1.0478e-03,\n 1.4594e-03, -4.2053e-03, -2.4082e-04, -1.1105e-03, 9.9773e-04,\n 2.3277e-04, -2.4956e-03, -4.1228e-03, -9.4385e-05, 2.9007e-03,\n -5.3146e-04, 7.9139e-04, 1.2355e-03, -4.6767e-04, -1.7094e-03,\n -4.2796e-04, -2.4211e-04, -1.5982e-03, -6.0366e-04, 8.7818e-04,\n 6.2747e-04, 4.6862e-04, -6.3708e-04, 2.9362e-03, 1.7580e-03,\n 5.6002e-05, -4.1483e-05, 2.1351e-03, -1.2169e-03, -8.3304e-04,\n 7.9630e-05, 1.8380e-03, 4.5831e-04, 3.2158e-03, -1.1282e-03,\n 8.7204e-04, -8.2735e-04, -1.6538e-03, -7.1311e-04, 3.8588e-04,\n -4.5219e-04, 1.5573e-03, -1.5326e-04, -1.5632e-03, -6.2297e-04,\n -1.7408e-03, -3.4432e-03, 7.4831e-04, 2.8713e-04, 1.3746e-03,\n 4.5151e-05, -1.5198e-03, 9.2112e-04, -7.5759e-04, -3.8610e-04,\n 3.2199e-04, 8.9084e-04, 7.8131e-05, 8.9003e-04, -4.5481e-04,\n 9.4776e-04, 1.4699e-04, -8.8403e-04, 1.2068e-03, 1.7969e-03,\n 1.2351e-04, 1.0462e-03, 6.7216e-04, 9.6253e-06, 3.9198e-03,\n 2.0757e-05, 1.5445e-03, 1.9747e-03, -2.1779e-04, 1.8782e-03,\n 4.3209e-04, -6.8981e-04, 8.4074e-04, 1.6067e-03, -1.0440e-03,\n -3.3442e-03, -2.2272e-03, -1.3032e-03, 3.3825e-04, -5.1197e-04,\n -8.5718e-04, -1.1806e-03, -5.1018e-05, -5.1569e-03, 4.0823e-05,\n -1.2165e-03, 1.0631e-03, 2.0035e-03, 1.8267e-03, 1.6037e-03,\n 1.8316e-03, -2.1904e-04, -4.2742e-04, 6.1593e-04, 2.1665e-03,\n -2.5866e-03, 1.9806e-03, 2.6417e-04, 2.7428e-04, -2.8684e-03,\n -8.6166e-04, -1.6130e-03, 1.9033e-03, -1.1501e-03, -1.0057e-03,\n 6.5489e-05, -7.8112e-04, 4.4770e-04, -6.8524e-04, -1.0147e-03,\n -1.1446e-03, -6.9740e-04, 2.4894e-03, -2.4050e-03, -5.6829e-04,\n -2.8669e-03, -1.3868e-03, -3.9096e-05, -1.8960e-04, -5.5426e-04,\n -2.0941e-03, 1.7166e-04, -2.4639e-03, 1.8480e-03, 7.6943e-04,\n 2.0214e-03, 2.4737e-04, -9.9383e-04, 1.0853e-03, -9.9431e-04,\n -6.8990e-04, -2.7968e-04, -4.8945e-03, 1.5787e-03, 1.2513e-03,\n 2.3810e-03, -2.1637e-03, 3.7987e-03, 6.0793e-04, 6.0898e-04,\n 3.1019e-03, -5.0784e-04, -2.9265e-04, -3.7938e-04, -4.7911e-03,\n 2.3872e-03, 1.8891e-03, -2.0527e-03, 1.0407e-03, 2.0690e-03,\n -1.5728e-04, 7.6200e-04, -5.1581e-04, -2.2631e-03, 1.5460e-03,\n -2.4737e-04, 1.6864e-03, 4.3548e-04, 5.3505e-03, 5.6891e-04,\n 1.3413e-03, 1.5591e-03, -2.6518e-03, 1.1964e-03, -1.6969e-06,\n -2.9345e-03, 8.4884e-04, 1.8812e-03, -3.8445e-04, -6.5502e-04,\n -1.5773e-03, -7.5561e-04, 2.3128e-04, -1.0736e-03, 1.7738e-03,\n 3.7070e-03, -3.8224e-04, -2.3014e-03, -3.7663e-03, -4.2910e-04,\n 2.0216e-04, 1.0220e-04, 3.3564e-04, 1.9287e-03, -2.4800e-03,\n -2.0838e-05, -5.4369e-03, -2.5082e-03, 1.1622e-03, 8.1948e-04,\n 2.1299e-03, -5.9799e-04, 2.4286e-03, -3.8580e-04, 9.5885e-04,\n -2.5418e-03, 1.2915e-03, -4.7334e-04, 1.5905e-03, 7.0246e-05,\n 2.0905e-05, -3.2322e-04, 9.8621e-05, -1.8794e-03, 2.6644e-03,\n 1.2414e-03, -3.1454e-03, -1.3164e-03, -7.4357e-04, 1.2676e-03,\n -3.0153e-03, -8.0881e-05, -6.9705e-05, -2.9417e-04, 7.0138e-04,\n 4.0233e-04, 2.5004e-05, 1.3242e-03, 1.7695e-04, -9.4227e-04,\n 1.1759e-03, 1.1781e-03, 2.4891e-03, -1.2155e-04, -1.4410e-04,\n -1.9248e-04, -1.5107e-03, -1.2530e-04, -6.0916e-04, -6.2150e-04,\n -2.6290e-03, -6.4208e-04, 6.7992e-04, 3.2526e-04, 3.7004e-03,\n 2.7126e-05, 2.4195e-03, 8.0302e-04, 1.1666e-03, 2.0978e-04,\n -5.8804e-04, -1.9603e-03, -7.5481e-04, -4.3217e-04, -1.7654e-03,\n -2.9582e-05, -7.9961e-04, 9.8833e-04, -1.9530e-03, -4.0793e-03,\n -1.9652e-03, -4.5670e-04, 7.1033e-04, -8.3502e-04, 1.6896e-03,\n 1.0255e-04, 5.9480e-04, 4.3354e-03, 1.6881e-03, 4.7484e-04,\n -8.6273e-04, 9.4854e-04, -6.7752e-04, -9.2156e-04, 5.4961e-03,\n 2.5724e-04, -4.9138e-03, 1.0255e-03, -1.5474e-04, 1.5950e-03,\n 6.0311e-04, -1.0858e-03, 2.3201e-03, 2.9673e-03, -1.5107e-03,\n -8.4309e-04, -1.1541e-03, -3.8697e-03, -1.5296e-03, 4.2926e-03,\n 1.5535e-03, -4.8109e-04, 6.0541e-04, 1.7465e-03, 4.7826e-05,\n -1.0409e-03, -2.4406e-04, 1.4233e-03, 2.5605e-03, -4.0718e-03,\n -4.6288e-03, 1.9230e-04, -1.3702e-03, 6.0577e-04, 9.8547e-04,\n -4.6948e-04, 1.1336e-03, 6.7509e-04, 1.9319e-03, -7.6528e-04,\n 5.9857e-04, 2.0084e-03, -4.6678e-03, 3.6797e-04, 1.2359e-03,\n -3.5799e-03, 2.7494e-03, 1.3989e-03, -8.2323e-05, -4.3591e-03,\n -1.3793e-03, 4.2007e-04, 2.2344e-03, -1.4183e-03, 7.8642e-04,\n 1.3034e-03, 1.0545e-03, 2.1168e-04, 9.0568e-04, -3.5864e-04,\n -8.3726e-04, -1.9821e-03], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([2.6603e-05, 4.9810e-05, 7.2678e-05, 6.2618e-05, 4.2502e-05, 1.0668e-04,\n 9.9435e-05, 3.4689e-05, 4.7502e-05, 6.2777e-05, 8.2373e-05, 5.4698e-05,\n 3.5914e-05, 5.2248e-05, 1.2332e-04, 3.7784e-05, 7.4618e-05, 1.3352e-04,\n 2.0405e-05, 5.2518e-05, 4.4399e-05, 6.9594e-05, 9.4462e-05, 5.9877e-05,\n 3.1333e-05, 3.6277e-05, 9.9369e-05, 7.2474e-05, 6.1342e-05, 5.6799e-05,\n 5.1881e-05, 6.0035e-05, 2.6502e-05, 1.4447e-04, 7.9616e-05, 4.9848e-05,\n 3.8462e-05, 7.6141e-05, 8.2664e-05, 8.1689e-05, 4.5812e-05, 3.5566e-05,\n 6.5911e-05, 4.0176e-05, 5.9224e-05, 6.5519e-05, 4.3087e-05, 3.1554e-05,\n 3.2949e-05, 3.7648e-05, 4.8227e-05, 5.0373e-05, 1.2692e-04, 5.3949e-05,\n 8.2585e-05, 5.7705e-05, 5.1810e-05, 4.6244e-05, 5.5856e-05, 5.6316e-05,\n 2.7739e-05, 5.4657e-05, 3.0132e-05, 1.1736e-04, 9.2635e-05, 9.7157e-05,\n 6.5458e-05, 8.9860e-05, 4.3557e-05, 7.9761e-05, 4.7472e-05, 9.4237e-05,\n 6.4591e-05, 1.1577e-04, 5.7729e-05, 7.2267e-05, 9.9171e-05, 4.0304e-05,\n 5.5667e-05, 8.7764e-05, 5.3278e-05, 5.1237e-05, 4.0164e-05, 8.4700e-05,\n 6.2570e-05, 5.5360e-05, 1.3731e-04, 3.2163e-05, 6.0491e-05, 6.7750e-05,\n 5.2896e-05, 3.5575e-05, 9.7942e-05, 2.2361e-04, 7.0369e-05, 8.1908e-05,\n 7.2052e-05, 8.5971e-05, 6.7255e-05, 5.5054e-05, 4.9795e-05, 5.5780e-05,\n 3.9436e-05, 4.3348e-05, 4.1290e-05, 4.1670e-05, 5.9393e-05, 3.2777e-05,\n 7.0571e-05, 2.3665e-05, 1.3182e-04, 2.4894e-05, 8.5752e-05, 5.2834e-05,\n 5.9751e-05, 4.4317e-05, 5.8218e-05, 9.8982e-05, 5.5480e-05, 5.8955e-05,\n 5.5877e-05, 7.2009e-05, 9.3503e-05, 8.2305e-05, 6.1228e-05, 7.5699e-05,\n 5.1022e-05, 9.2785e-05, 8.8689e-05, 6.4705e-05, 5.8841e-05, 1.4150e-04,\n 6.7963e-05, 6.9605e-05, 1.1496e-04, 4.3953e-05, 3.6287e-05, 3.2733e-05,\n 8.7476e-05, 5.1300e-05, 5.0776e-05, 9.1479e-05, 6.7362e-05, 1.3432e-04,\n 5.1431e-05, 7.6767e-05, 6.3713e-05, 5.2843e-05, 1.4071e-04, 4.8708e-05,\n 5.2936e-05, 8.7290e-05, 7.8106e-05, 7.7120e-05, 4.9626e-05, 7.9870e-05,\n 3.1096e-05, 3.0037e-05, 7.2620e-05, 3.8394e-05, 7.1780e-05, 7.8795e-05,\n 7.6424e-05, 5.6750e-05, 7.6679e-05, 4.1440e-05, 7.9626e-05, 5.1587e-05,\n 9.9898e-05, 1.1553e-04, 3.9954e-05, 7.6014e-05, 4.3147e-05, 4.9443e-05,\n 4.3164e-05, 3.0827e-05, 5.1071e-05, 6.7084e-05, 4.9153e-05, 6.3969e-05,\n 6.2738e-05, 7.5049e-05, 1.7915e-04, 5.5136e-05, 2.4784e-05, 8.2784e-05,\n 8.3287e-05, 8.3677e-05, 8.2652e-05, 7.5833e-05, 6.2194e-06, 4.8407e-05,\n 2.1594e-05, 3.0626e-05, 4.6495e-05, 6.2540e-05, 3.7896e-05, 4.9823e-05,\n 5.4779e-05, 4.2594e-05, 7.5837e-05, 4.6378e-05, 5.3575e-05, 3.0689e-05,\n 3.6914e-05, 1.1229e-04, 5.9229e-05, 4.0119e-05, 4.6722e-05, 4.9164e-05,\n 6.3782e-05, 4.9087e-05, 4.8311e-05, 6.1609e-05, 6.1348e-05, 6.1151e-05,\n 6.8973e-05, 4.9947e-05, 5.4081e-05, 5.6638e-05, 7.1837e-05, 1.1235e-04,\n 4.4465e-05, 7.0242e-05, 4.2472e-05, 5.4768e-05, 6.2913e-05, 4.9730e-05,\n 7.2059e-05, 9.5865e-05, 7.6971e-05, 5.2845e-05, 4.9531e-05, 4.7823e-05,\n 4.0583e-05, 4.8451e-05, 2.9313e-05, 5.0282e-05, 6.5039e-05, 6.5432e-05,\n 3.5888e-05, 5.2076e-05, 3.4553e-05, 6.4623e-05, 1.9894e-05, 4.9360e-05,\n 1.1123e-04, 5.8295e-05, 4.6889e-05, 6.2423e-05, 8.9643e-05, 8.6358e-05,\n 3.6429e-05, 5.2097e-05, 5.2798e-05, 4.3433e-05, 3.8493e-05, 5.5137e-05,\n 5.5794e-05, 1.0296e-04, 4.8927e-05, 4.0079e-05, 5.3311e-05, 3.3467e-05,\n 6.8091e-05, 7.7789e-05, 7.0287e-05, 5.0470e-05, 3.5551e-05, 2.6914e-05,\n 4.2176e-05, 5.7856e-05, 4.0189e-05, 9.2557e-05, 3.7777e-05, 7.3373e-05,\n 1.0616e-04, 3.9935e-05, 7.5450e-05, 4.1562e-05, 5.2323e-05, 6.5467e-05,\n 5.3182e-05, 5.6628e-05, 6.0278e-05, 5.7422e-05, 9.4944e-05, 6.1179e-05,\n 4.4279e-06, 1.0528e-04, 7.0412e-05, 3.5418e-05, 7.2437e-05, 1.0092e-04,\n 7.2649e-05, 8.9786e-06, 5.5091e-05, 1.0018e-04, 6.2620e-05, 1.5198e-04,\n 8.3748e-05, 7.9826e-05, 6.4825e-05, 4.5126e-05, 7.9754e-05, 6.1899e-05,\n 4.0718e-05, 4.5704e-05, 6.7584e-05, 4.1337e-05, 8.0352e-05, 7.9559e-05,\n 1.1988e-04, 3.9998e-05, 5.3728e-05, 6.6014e-05, 5.3232e-05, 6.4005e-05,\n 2.6615e-05, 4.8340e-05, 8.2979e-05, 7.3562e-05, 4.6813e-05, 8.1837e-05,\n 8.8334e-05, 1.1253e-04, 4.8857e-05, 6.3643e-05, 2.5967e-05, 4.7593e-05,\n 5.2522e-05, 9.2031e-05, 4.6518e-05, 3.3434e-05, 1.3296e-04, 7.3008e-05,\n 1.9370e-05, 4.2171e-05, 5.0339e-05, 1.2617e-04, 2.7803e-05, 7.2430e-05,\n 7.3672e-05, 3.9915e-05, 6.7664e-05, 5.8545e-05, 4.4257e-05, 5.3513e-05,\n 6.5331e-05, 1.0533e-04, 3.0861e-05, 9.9890e-05, 3.6134e-05, 5.3644e-05,\n 1.3465e-04, 7.0499e-05, 5.9223e-05, 7.3153e-05, 4.1252e-05, 5.1977e-05,\n 1.0750e-04, 5.7681e-05, 4.3721e-05, 3.1256e-05, 4.5509e-05, 6.0793e-05,\n 2.8403e-05, 3.8345e-05, 1.0048e-04, 4.0025e-05, 6.1663e-05, 5.4099e-06,\n 4.0134e-05, 4.5618e-05, 6.4420e-05, 5.9457e-05, 1.2517e-04, 8.0936e-05,\n 7.2748e-05, 6.1634e-05, 9.1102e-05, 8.6748e-05, 4.7351e-05, 4.3764e-05,\n 5.4358e-05, 8.1168e-05, 4.5493e-05, 3.4367e-05, 4.6975e-05, 3.3471e-05,\n 3.4150e-05, 3.5960e-05, 4.7444e-05, 5.0248e-05, 5.7005e-05, 8.0142e-05,\n 5.6707e-05, 5.8981e-05, 4.0937e-05, 9.1392e-05, 4.8528e-05, 5.2050e-05,\n 5.0687e-05, 4.3165e-05, 5.5230e-05, 2.2684e-05, 2.7183e-05, 7.9457e-05,\n 7.7044e-05, 8.4987e-05, 4.9892e-05, 7.4377e-05, 7.6228e-05, 3.8425e-05,\n 6.1747e-05, 6.8691e-05, 6.9774e-05, 6.4425e-05, 7.3587e-05, 5.5578e-05,\n 5.0899e-05, 6.4617e-05, 5.6011e-05, 1.2796e-04, 4.5863e-05, 7.6875e-05,\n 4.1358e-05, 4.8229e-05, 6.4537e-05, 3.7779e-05, 7.3190e-05, 1.0379e-04,\n 6.9343e-05, 7.0195e-05, 6.8953e-05, 7.0499e-05, 6.3640e-05, 7.7989e-05,\n 7.4889e-05, 8.1400e-05, 2.9110e-05, 1.2513e-04, 7.1215e-05, 8.0113e-05,\n 3.8027e-05, 6.4519e-05, 6.9410e-05, 5.4018e-05, 7.8453e-05, 8.0371e-05,\n 7.1641e-05, 9.6530e-05, 4.5158e-05, 6.6989e-05, 2.0771e-04, 3.8584e-05,\n 7.1657e-05, 5.6005e-05, 5.5983e-05, 6.7093e-05, 8.0702e-05, 6.6174e-05,\n 6.0013e-05, 6.6942e-05, 2.9921e-05, 3.3401e-05, 5.8135e-05, 8.4844e-05,\n 4.5688e-05, 8.7362e-05, 8.4053e-05, 3.2432e-05, 3.8262e-05, 5.5620e-05,\n 8.6786e-05, 3.7568e-05, 5.8627e-05, 8.0805e-05, 4.7541e-05, 8.8202e-05,\n 9.2106e-05, 4.4647e-05, 3.2593e-05, 5.5143e-05, 1.0066e-04, 3.8270e-05,\n 2.8417e-05, 4.7744e-05, 7.5396e-05, 4.1792e-05, 5.2663e-05, 6.9218e-05,\n 1.6818e-04, 6.0646e-05, 7.1147e-05, 3.4418e-05, 9.6129e-05, 5.5701e-05,\n 6.4616e-05, 6.7430e-05, 5.9783e-05, 6.0705e-05, 5.7902e-05, 2.3589e-05,\n 8.1379e-05, 6.0300e-05, 7.8885e-05, 4.6577e-05, 9.1981e-05, 2.4110e-05,\n 5.3999e-05, 6.1208e-05], device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(11262.)",
|
| 17 |
+
"exp_avg": "tensor([ 3.2829e-03, -9.9377e-03, -9.4456e-04, -2.0101e-03, -7.3105e-05,\n -2.3362e-03, -2.2901e-03, 5.7176e-03, -1.2926e-03, 1.7821e-03,\n 4.8055e-03, -6.3120e-03, 2.2399e-03, -2.4223e-03, 2.0971e-03,\n 3.1058e-03, 1.9829e-03, 8.1614e-03, 2.4751e-03, 7.3238e-03,\n 4.5171e-03, -3.5026e-05, 6.9885e-03, -1.1706e-03, -1.4636e-04,\n 5.4102e-04, -7.0435e-03, -2.1786e-03, -2.8514e-03, 1.6339e-03,\n -1.8874e-03, 8.0064e-04, 9.2817e-04, 2.0471e-03, -3.5591e-03,\n -8.4444e-03, 5.8211e-03, 2.5654e-03, -6.6237e-03, -3.8819e-03,\n -3.5483e-03, -4.7154e-03, 7.0405e-04, -4.2534e-03, 2.3283e-05,\n -6.4074e-04, -3.0112e-03, -1.0728e-03, -1.4716e-03, 2.1479e-03,\n 3.5744e-04, 5.7730e-03, 1.2744e-02, 8.7230e-04, -4.6209e-04,\n 1.3772e-04, -2.6074e-03, 3.9484e-04, 7.6685e-05, 2.8273e-03,\n 6.1390e-04, -1.5430e-03, -7.9502e-03, -7.1455e-03, 1.6328e-03,\n -4.3656e-03, 1.2895e-03, -1.4687e-03, -6.6311e-03, 2.1164e-03,\n -1.5184e-03, 2.2646e-03, 3.8156e-03, 3.2398e-03, -5.6795e-03,\n 1.0188e-03, -1.1075e-03, 2.7131e-03, -4.7034e-03, 2.8748e-03,\n -5.4864e-03, 6.5969e-04, 2.1680e-03, 7.7087e-03, -1.8491e-03,\n -8.0344e-04, 2.6735e-03, 9.8188e-04, -4.5637e-03, 1.9974e-03,\n 1.4621e-03, 1.2156e-02, -1.7110e-03, 2.7379e-03, 1.5880e-03,\n -9.6199e-03, 1.5535e-03, -5.3460e-03, 4.5867e-03, 2.7834e-03,\n 1.1109e-03, 7.2796e-03, -3.2086e-03, -8.4824e-04, 2.7491e-03,\n 3.1475e-03, -1.4755e-03, -2.9130e-03, 5.8637e-04, 1.2047e-04,\n -3.2369e-03, -2.7733e-03, -7.1992e-03, 6.8975e-04, 2.9653e-03,\n -2.1591e-03, 1.5367e-03, 2.0013e-03, 4.2624e-03, -7.8326e-03,\n 1.1319e-03, 1.7344e-03, 6.7522e-03, -1.7741e-03, 3.7887e-03,\n -4.1168e-03, 1.2853e-03, -3.8097e-03, 6.2846e-03, 2.6396e-03,\n -1.0742e-03, -7.5688e-04, -6.6932e-04, 1.9611e-03, 1.1822e-02,\n -5.5404e-03, -4.7726e-03, -2.0292e-03, -9.5139e-03, -7.7356e-03,\n 2.4687e-04, 2.3629e-03, -1.7110e-03, -2.8372e-03, -4.7479e-03,\n -5.8424e-04, 9.9359e-04, 1.6202e-03, 2.0407e-03, 5.1268e-03,\n -5.4706e-03, 6.1510e-05, 7.5762e-04, 3.1026e-04, -2.8395e-03,\n 3.2791e-03, 2.9684e-04, -1.9646e-03, -4.6062e-03, 7.0646e-04,\n 2.8984e-03, -3.4314e-04, 6.2673e-03, 1.4288e-03, -2.4920e-03,\n -2.8552e-03, 8.5288e-03, 6.9532e-04, -3.3935e-03, -4.5264e-03,\n 1.4829e-03, -1.6266e-04, -9.6346e-04, -1.4285e-03, -1.7695e-04,\n -5.1032e-03, 2.6840e-04, -4.6888e-03, 3.6636e-03, 5.9662e-03,\n -4.7344e-04, 5.8649e-03, 3.2353e-04, 7.6932e-03, 2.1244e-03,\n 3.6035e-03, -9.1780e-03, -1.3688e-03, -2.1423e-03, 8.8668e-04,\n 5.6052e-45, -4.3080e-03, -9.1364e-03, 2.7414e-05, 7.2862e-03,\n -1.5343e-03, 1.9955e-03, 2.4056e-03, -3.0202e-04, -4.2550e-03,\n -5.4694e-04, 1.7828e-04, -5.0977e-03, -1.2037e-03, 2.0400e-03,\n 7.6736e-04, 1.7193e-03, -1.5183e-03, 5.1296e-03, 3.6133e-03,\n 1.3759e-04, 2.7384e-04, 4.6324e-03, -2.8912e-03, -5.1370e-04,\n 9.8167e-04, 1.8638e-03, 2.1882e-04, 7.7787e-03, -2.8165e-03,\n 1.5068e-03, -2.3008e-03, -1.5790e-03, -9.5725e-04, 2.8685e-04,\n -4.7422e-04, 1.7882e-03, 3.3194e-04, -3.1480e-03, -1.1108e-03,\n -4.8905e-03, -7.5772e-03, 6.4869e-04, -3.3150e-04, 3.5524e-03,\n -4.6056e-04, -4.9889e-03, 2.4858e-03, -1.3477e-03, -1.4591e-03,\n 2.9834e-04, 1.7551e-03, -2.4902e-04, 4.0893e-03, -1.5651e-03,\n 2.4842e-03, -5.9886e-04, -1.3543e-03, 1.1497e-03, 3.2058e-03,\n 3.0927e-04, 2.5713e-03, 8.2813e-04, 2.5920e-04, 4.3447e-03,\n -4.8262e-04, 2.8940e-03, 3.2312e-03, -6.5706e-04, 4.8140e-03,\n 2.3716e-03, -1.4788e-03, 1.9399e-03, 2.6344e-03, -1.9287e-03,\n -7.5937e-03, -8.5598e-03, -1.6250e-03, 1.1517e-03, -9.1375e-04,\n -2.3414e-03, -2.8057e-03, -1.1678e-04, -8.8254e-03, 1.5815e-03,\n -2.3441e-03, 6.9853e-04, 2.1307e-03, 3.0979e-03, 2.4532e-03,\n 2.6404e-03, 4.8425e-04, 1.1734e-04, 2.9527e-03, 3.2359e-03,\n -4.8208e-03, 2.6736e-03, 1.5134e-04, 5.6052e-45, -5.6950e-03,\n -9.5136e-04, -2.5066e-03, 3.5105e-03, -2.7724e-03, -1.4014e-03,\n 5.6052e-45, -2.1819e-03, 1.1284e-03, -1.0390e-03, -6.6283e-04,\n -2.1812e-03, -1.0500e-03, 3.7312e-03, -4.4248e-03, -8.2403e-04,\n -5.5813e-03, -3.6329e-03, 3.3063e-05, -3.5425e-04, -2.0223e-03,\n -3.7911e-03, -8.5936e-04, -5.8504e-03, 1.4773e-03, 5.3007e-04,\n 3.8649e-03, 3.5672e-04, -1.5122e-03, 1.7250e-03, -9.2010e-04,\n -1.7974e-03, -1.6282e-03, -7.6680e-03, 2.4278e-03, 1.8063e-03,\n 5.4334e-03, -2.8506e-03, 8.4822e-03, 1.1310e-03, 1.3467e-03,\n 5.6655e-03, -1.0026e-04, -4.0207e-04, -1.4454e-04, -9.4024e-03,\n 4.4798e-03, 4.0657e-03, -2.6092e-03, 6.3693e-04, 4.4180e-03,\n -3.9620e-04, 9.6108e-04, -6.0667e-04, -8.5132e-03, 2.9003e-03,\n -7.4281e-04, 1.9058e-03, 5.4132e-04, 9.2934e-03, 3.5570e-04,\n 2.9281e-03, 2.6104e-03, -4.7839e-03, 2.2106e-03, 5.0328e-04,\n -6.8777e-03, 1.1651e-03, 2.8707e-03, -3.8522e-04, -2.1749e-03,\n -3.9343e-03, -1.0723e-03, 2.9562e-04, -2.3825e-03, 2.8396e-03,\n 5.6983e-03, -1.2648e-03, -5.9655e-03, -7.5085e-03, -1.5689e-03,\n 6.9179e-04, 5.6052e-45, -7.6648e-04, 2.7806e-03, -4.3444e-03,\n 3.2695e-04, -9.3513e-03, -4.2976e-03, 1.5723e-03, 9.9691e-04,\n 2.5556e-03, -9.1944e-04, 5.4794e-03, 4.0169e-04, 1.9347e-03,\n -6.2291e-03, 3.2766e-03, -9.2511e-04, 3.4209e-03, -9.2325e-05,\n -6.8684e-04, -8.0204e-05, -6.4876e-04, -3.3816e-03, 7.1251e-03,\n 2.8269e-03, -4.5069e-03, -2.6952e-03, -1.1105e-03, 2.3878e-03,\n -3.7870e-03, -5.4191e-04, -4.6597e-04, -3.2027e-04, 2.0218e-03,\n 8.8617e-04, -4.9914e-04, 1.4473e-03, -2.1935e-04, -1.1411e-03,\n 1.5850e-03, 1.7686e-03, 4.6007e-03, -5.0889e-04, 8.2751e-04,\n -1.2681e-03, -3.1909e-03, -9.7124e-04, -6.6975e-04, -1.8710e-03,\n -5.3579e-03, -6.7778e-04, 2.3504e-03, 1.1413e-04, 7.6441e-03,\n 9.8636e-05, 4.3347e-03, 5.8864e-04, 1.7671e-03, 3.9776e-04,\n 3.2657e-04, -2.7976e-03, -3.0897e-03, 5.2893e-04, -2.9012e-03,\n 1.1489e-04, -2.2698e-03, 1.4205e-03, -3.6992e-03, -6.2912e-03,\n -7.0110e-03, -1.3096e-04, 1.1896e-03, -8.7837e-04, 4.4810e-03,\n -4.3676e-04, 2.1095e-04, 9.1930e-03, 2.9702e-03, 9.9897e-04,\n -1.3973e-03, -2.9112e-04, -2.3020e-03, -1.4575e-03, 8.8424e-03,\n 4.2704e-04, -1.2696e-02, 1.2026e-03, -1.0812e-03, 2.6356e-03,\n 9.9349e-04, -9.3917e-04, 4.2255e-03, 8.4142e-03, -4.0609e-03,\n -1.3939e-03, -2.2439e-03, -7.2632e-03, -4.1042e-03, 7.5547e-03,\n 3.5668e-03, -1.2042e-03, 1.3649e-03, 3.3929e-03, -1.7792e-03,\n -1.8653e-03, -1.2591e-03, 4.0414e-03, 4.4361e-03, -4.8243e-03,\n -9.9566e-03, 1.2358e-03, -9.2498e-03, 2.1045e-03, 6.7835e-04,\n -6.6395e-04, 2.3814e-03, 1.4465e-03, 4.0426e-03, -2.3420e-03,\n 3.2111e-03, 2.7940e-03, -6.2906e-03, -4.2112e-04, 1.7936e-03,\n -8.2948e-03, 5.2606e-03, 4.0569e-03, 2.2058e-04, -5.2927e-03,\n -3.7255e-03, 1.8574e-03, 4.6499e-03, -3.6850e-03, 3.1472e-03,\n 3.3539e-03, 3.5389e-03, 7.0728e-04, 1.7613e-03, -1.1236e-03,\n -1.2015e-03, -1.8796e-03], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([5.9637e-04, 1.8878e-04, 1.7834e-04, 2.5927e-04, 1.6715e-04, 2.2160e-04,\n 1.9224e-04, 1.8905e-04, 1.0707e-04, 1.4141e-04, 3.1899e-04, 2.7248e-04,\n 1.5801e-04, 2.4354e-04, 1.9738e-04, 4.9989e-04, 1.2227e-04, 4.5416e-04,\n 4.5564e-05, 1.9936e-04, 2.4022e-04, 1.9599e-04, 1.8085e-04, 1.2934e-04,\n 2.7356e-04, 1.2833e-04, 5.0772e-04, 1.0665e-04, 2.0097e-04, 1.0603e-04,\n 2.1336e-04, 2.4766e-04, 1.2048e-04, 3.8097e-04, 1.2104e-04, 1.6228e-04,\n 1.4351e-04, 1.8911e-04, 2.5069e-04, 2.9277e-04, 3.3002e-04, 1.7460e-03,\n 1.1498e-04, 2.3885e-04, 2.5916e-04, 4.5738e-04, 3.1865e-04, 1.4962e-04,\n 2.3382e-04, 1.9295e-04, 1.4172e-04, 6.9104e-04, 2.9508e-04, 2.2430e-04,\n 3.6586e-04, 1.5040e-04, 1.8623e-04, 9.0777e-05, 1.7927e-04, 1.8826e-04,\n 1.8636e-04, 1.0652e-04, 2.7307e-04, 1.8627e-04, 2.6453e-04, 2.3674e-04,\n 3.2325e-04, 2.5332e-04, 1.6167e-04, 1.9510e-04, 1.4558e-04, 1.4761e-04,\n 1.2883e-04, 5.7901e-04, 1.5380e-04, 1.3587e-04, 5.7775e-04, 3.4124e-04,\n 2.5059e-04, 1.9794e-04, 1.8375e-04, 1.1588e-04, 1.3101e-04, 4.2118e-04,\n 1.4439e-04, 3.3225e-04, 2.3517e-04, 8.0274e-05, 2.7888e-04, 3.9647e-04,\n 1.4836e-04, 5.1724e-04, 5.9325e-04, 2.8385e-04, 1.2835e-04, 2.7440e-04,\n 1.1818e-04, 2.1154e-04, 2.5788e-04, 2.8506e-04, 2.9475e-04, 1.4362e-04,\n 1.3961e-04, 2.6809e-04, 7.3947e-05, 2.1038e-04, 1.6352e-04, 1.5474e-04,\n 1.0422e-04, 1.2452e-04, 3.7126e-04, 1.2013e-04, 1.1844e-03, 2.1881e-04,\n 8.2332e-05, 9.0248e-05, 4.1676e-04, 2.2829e-04, 1.2350e-04, 2.5573e-04,\n 1.5498e-04, 1.0405e-04, 2.4970e-04, 4.6381e-04, 2.0655e-04, 1.9949e-04,\n 1.2803e-04, 2.2447e-04, 3.6646e-04, 1.7673e-04, 1.5196e-04, 3.0380e-04,\n 2.0563e-04, 1.3114e-04, 3.7481e-04, 4.7319e-04, 1.4792e-04, 9.2242e-05,\n 5.6111e-04, 2.2229e-04, 1.5335e-04, 6.7202e-05, 1.8038e-04, 7.2715e-04,\n 2.7643e-04, 1.9260e-04, 2.7212e-04, 1.0494e-04, 2.6808e-04, 9.1366e-04,\n 1.3742e-04, 1.7243e-04, 6.0056e-05, 1.1847e-04, 2.9174e-04, 3.3342e-04,\n 1.8332e-04, 1.5175e-04, 4.2399e-04, 2.4054e-04, 1.4728e-04, 2.8375e-04,\n 5.1895e-04, 1.5301e-04, 2.4505e-04, 2.6737e-04, 4.2475e-04, 1.0082e-04,\n 2.1312e-04, 6.8502e-04, 2.8136e-04, 1.9953e-04, 1.2006e-04, 1.3488e-04,\n 3.4715e-04, 1.5933e-04, 1.2547e-04, 1.1841e-04, 1.2066e-04, 4.0332e-04,\n 1.6029e-04, 2.7894e-04, 2.4094e-04, 2.2211e-04, 1.1705e-04, 2.1498e-04,\n 3.3258e-04, 2.0744e-04, 2.0920e-04, 1.6359e-04, 1.9309e-09, 1.5157e-04,\n 1.3883e-04, 9.5595e-05, 1.8486e-04, 1.6085e-04, 1.3599e-04, 1.1491e-04,\n 1.1555e-04, 1.8352e-04, 1.2103e-04, 1.7815e-04, 2.4841e-04, 2.6935e-04,\n 1.0718e-04, 2.4158e-04, 2.0929e-04, 1.1349e-04, 1.5075e-04, 1.4033e-04,\n 4.2119e-04, 2.3692e-04, 2.5975e-04, 1.2995e-04, 3.2159e-05, 3.8567e-04,\n 1.1675e-04, 1.3668e-04, 2.3731e-04, 2.5131e-04, 2.5313e-04, 3.5335e-04,\n 1.9465e-04, 1.4171e-04, 1.1004e-04, 1.1884e-04, 2.0008e-04, 7.5753e-05,\n 5.0793e-04, 1.6575e-04, 3.3456e-04, 1.7988e-04, 2.6628e-04, 4.4126e-04,\n 2.5292e-04, 2.8681e-04, 2.4386e-04, 2.4089e-04, 1.7733e-04, 1.8733e-04,\n 1.3107e-04, 1.4081e-04, 2.2507e-04, 2.8629e-04, 1.2274e-04, 2.7658e-04,\n 1.7002e-04, 1.3075e-04, 1.2432e-04, 2.1403e-04, 2.3416e-04, 1.9510e-04,\n 1.5120e-04, 9.1175e-05, 7.3060e-05, 1.8517e-04, 1.0984e-04, 2.3451e-04,\n 1.8647e-04, 3.4896e-04, 3.8132e-04, 1.2536e-04, 2.0926e-04, 1.0711e-04,\n 3.1538e-04, 4.1794e-04, 1.0444e-03, 1.7220e-04, 1.5557e-04, 1.0794e-04,\n 1.4695e-04, 1.9638e-04, 1.1707e-04, 2.3337e-04, 1.7315e-04, 2.3846e-04,\n 3.5369e-04, 7.5890e-05, 1.5514e-04, 1.5404e-04, 1.4391e-04, 1.6858e-04,\n 2.2752e-04, 1.5917e-04, 1.8149e-04, 2.2427e-04, 1.4193e-04, 2.0679e-04,\n 3.7007e-11, 2.6647e-04, 1.7825e-04, 1.3090e-04, 2.4058e-04, 1.9446e-04,\n 2.2939e-04, 3.6281e-10, 2.3209e-04, 1.7642e-04, 2.8167e-04, 4.1101e-04,\n 4.6026e-04, 2.9728e-04, 1.4823e-04, 1.2048e-04, 1.7507e-04, 2.2554e-04,\n 2.5073e-04, 3.3895e-04, 4.0475e-04, 1.3558e-04, 1.3968e-04, 2.1707e-04,\n 3.9710e-04, 5.3242e-05, 8.5097e-05, 2.4010e-04, 1.0752e-04, 2.3233e-04,\n 4.2705e-05, 1.5948e-04, 1.3531e-04, 4.4931e-04, 8.7488e-05, 2.3573e-04,\n 2.3542e-04, 4.0174e-04, 1.3108e-04, 2.0138e-04, 1.8139e-04, 2.4626e-04,\n 2.5381e-04, 1.0832e-04, 1.1171e-04, 1.0126e-04, 3.5771e-04, 1.8079e-04,\n 1.6386e-04, 8.4290e-05, 2.4864e-04, 4.1355e-04, 1.5264e-04, 1.4106e-04,\n 1.8682e-04, 3.9145e-04, 2.3984e-04, 2.9326e-04, 8.0519e-05, 8.3019e-05,\n 1.7269e-04, 2.4029e-04, 1.1774e-04, 6.5352e-04, 1.3533e-04, 1.3872e-04,\n 1.0243e-03, 2.9004e-04, 1.7059e-04, 3.1312e-04, 1.4903e-04, 8.7588e-05,\n 4.4706e-04, 8.1231e-05, 2.3371e-04, 2.0299e-04, 1.2207e-04, 1.5431e-04,\n 2.0037e-04, 1.8517e-04, 2.2524e-04, 1.3435e-04, 1.7525e-04, 7.6287e-11,\n 3.8930e-04, 1.0312e-04, 1.4467e-04, 1.4778e-04, 2.7199e-04, 1.4760e-04,\n 2.1936e-04, 1.6446e-04, 1.7067e-04, 3.1249e-04, 2.5731e-04, 3.0760e-04,\n 1.1370e-04, 3.8845e-04, 2.2042e-04, 1.3922e-04, 2.2289e-04, 1.2496e-04,\n 1.8547e-04, 7.5372e-05, 1.7798e-04, 3.1721e-04, 2.4019e-04, 2.8254e-04,\n 1.3633e-04, 3.4786e-04, 1.4827e-04, 2.5554e-04, 1.0567e-04, 2.1857e-04,\n 1.1993e-04, 1.3216e-04, 3.6979e-04, 6.8960e-05, 8.3895e-05, 2.9224e-04,\n 2.1939e-04, 2.2708e-04, 1.4128e-04, 1.5230e-04, 2.2214e-04, 9.2646e-04,\n 1.0769e-04, 2.3107e-04, 3.0010e-04, 2.7619e-04, 3.5706e-04, 1.7170e-04,\n 1.9069e-04, 8.5032e-05, 5.5895e-04, 1.6936e-04, 1.8982e-04, 1.4298e-04,\n 1.3988e-04, 2.0121e-04, 1.7012e-04, 1.2192e-04, 1.6650e-04, 2.7600e-04,\n 2.4319e-04, 2.3840e-04, 2.9982e-04, 4.9122e-04, 4.4989e-04, 3.3738e-04,\n 2.7229e-04, 2.3303e-04, 2.4333e-04, 3.2015e-04, 1.5960e-04, 1.7427e-04,\n 2.1524e-04, 2.0779e-04, 2.1430e-04, 2.3921e-04, 2.2627e-04, 1.7487e-04,\n 9.9062e-05, 2.3472e-04, 2.5099e-04, 4.1194e-04, 3.3732e-04, 1.1877e-04,\n 5.1237e-04, 1.2155e-04, 1.3337e-04, 2.0242e-04, 1.6373e-04, 1.5919e-04,\n 1.9243e-04, 3.2155e-04, 1.5987e-04, 9.3122e-05, 3.4393e-04, 2.4509e-04,\n 2.0229e-04, 2.2966e-04, 2.6920e-04, 1.2425e-04, 1.5762e-04, 2.3124e-04,\n 1.8941e-04, 1.7115e-04, 2.6417e-04, 3.7408e-04, 1.3537e-04, 1.5700e-04,\n 3.2348e-04, 2.2908e-04, 1.3656e-03, 1.7451e-04, 2.9295e-04, 7.8227e-05,\n 9.2908e-05, 2.3016e-04, 2.4504e-04, 2.2113e-04, 3.5915e-04, 8.5179e-05,\n 3.6401e-04, 1.2555e-04, 1.4473e-04, 1.5699e-04, 3.9134e-04, 2.4654e-04,\n 2.0111e-04, 1.6235e-04, 3.8037e-04, 1.4619e-04, 3.1870e-04, 1.6907e-04,\n 3.9815e-04, 2.1547e-04, 2.2220e-04, 2.0640e-04, 1.2263e-04, 1.2069e-03,\n 1.9226e-04, 1.4990e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(11262.)",
|
| 22 |
+
"exp_avg": "tensor([ 1.0032e-03, -6.3115e-03, -1.2172e-03, -8.3873e-04, 1.8252e-04,\n -1.5509e-03, -1.4817e-03, 2.6408e-03, -3.6466e-04, 1.4716e-03,\n 2.2037e-03, -3.1646e-03, 1.3079e-03, -1.8574e-03, 1.9033e-03,\n 1.0339e-03, 1.5488e-03, 4.5836e-03, 1.7574e-03, 4.7479e-03,\n 2.0248e-03, -5.0183e-04, 4.7744e-03, -1.0055e-03, 7.6139e-05,\n 3.4213e-04, -3.5617e-03, -1.2713e-03, -1.1024e-03, 4.6916e-04,\n -5.5982e-04, 5.8956e-04, 4.1682e-04, 1.2501e-03, -2.5210e-03,\n -3.6715e-03, 3.4073e-03, 1.8711e-03, -4.4700e-03, -1.4700e-03,\n -1.5722e-03, -1.7062e-03, 1.1298e-04, -1.9145e-03, 2.6233e-04,\n -2.1180e-05, -1.1078e-03, -4.6109e-04, -8.8034e-04, 1.0078e-03,\n 3.5140e-04, 2.2363e-03, 9.2395e-03, 5.7513e-04, -2.8616e-04,\n -1.8897e-04, -1.1674e-03, 3.2154e-04, 2.8627e-05, 1.8284e-03,\n 2.9496e-04, -8.2136e-04, -3.5777e-03, -4.4469e-03, 4.8187e-04,\n -2.8574e-03, 9.7466e-04, -1.0279e-03, -3.9316e-03, 9.0827e-04,\n -7.1248e-04, 1.5262e-03, 1.7366e-03, 1.5398e-03, -2.7578e-03,\n 1.0503e-03, -6.3364e-04, 1.3911e-03, -1.9136e-03, 2.2554e-03,\n -3.0131e-03, 1.0361e-04, 9.7210e-04, 3.9271e-03, -8.5762e-04,\n -4.3561e-04, 2.2426e-03, 4.8591e-04, -2.7522e-03, 1.6295e-03,\n 4.7307e-04, 4.5434e-03, -1.5812e-03, 2.3581e-03, 4.7088e-04,\n -4.8604e-03, 1.1225e-03, -2.6489e-03, 2.4130e-03, 1.9900e-03,\n 7.6318e-04, 3.8414e-03, -1.7077e-03, -6.8422e-04, 2.3823e-03,\n 1.2537e-03, -1.2453e-03, -9.5289e-04, 7.8745e-04, 1.2308e-04,\n -1.3412e-03, -1.6768e-03, -2.9925e-03, 6.8595e-04, 1.8047e-03,\n -1.0139e-03, 5.6515e-04, 1.2410e-03, 2.5314e-03, -4.3193e-03,\n 8.8547e-04, 4.0785e-04, 3.9399e-03, -7.8771e-04, 1.8092e-03,\n -2.4158e-03, 6.4128e-04, -2.3814e-03, 3.3769e-03, 1.5373e-03,\n -4.2552e-04, -4.6623e-04, -3.4483e-04, 9.8256e-04, 7.1944e-03,\n -1.8615e-03, -2.2597e-03, -1.6612e-03, -5.2424e-03, -4.2978e-03,\n 3.6208e-04, 1.7645e-03, -7.4804e-04, -1.4077e-03, -1.9791e-03,\n -5.5494e-05, 8.7165e-04, 1.2951e-03, 9.8044e-04, 1.9513e-03,\n -3.2150e-03, 3.0960e-05, 5.2045e-04, 3.5727e-04, -6.6772e-04,\n 2.1900e-03, -1.9117e-04, -1.2830e-03, -1.7223e-03, 7.4901e-04,\n 1.3947e-03, 1.0359e-04, 2.1194e-03, 5.8211e-04, -1.9391e-03,\n -1.9479e-03, 4.3915e-03, 1.0889e-04, -2.4059e-03, -1.8707e-03,\n 9.0491e-04, -4.0279e-04, -2.3030e-04, -1.3982e-03, -3.3292e-04,\n -2.3330e-03, -2.6292e-04, -3.4794e-03, 2.8366e-03, 2.9565e-03,\n -5.8248e-06, 3.1588e-03, 9.8926e-04, 3.8098e-03, 9.2118e-04,\n 1.6586e-03, -4.6387e-03, -1.9172e-04, -9.9668e-04, 7.4597e-04,\n 5.6052e-45, -2.6208e-03, -4.9640e-03, 2.1536e-04, 3.8738e-03,\n -5.7098e-04, 8.5981e-04, 1.2660e-03, -3.8969e-04, -2.2208e-03,\n -1.2138e-04, -3.1231e-04, -1.8860e-03, -7.2477e-04, 9.8915e-04,\n 9.2096e-04, 7.6844e-04, -6.4108e-04, 3.0023e-03, 1.9681e-03,\n 1.1332e-04, 1.7629e-04, 2.4118e-03, -1.4162e-03, -6.3999e-04,\n 4.3928e-04, 1.5527e-03, 5.0852e-04, 4.3644e-03, -1.1580e-03,\n 8.1382e-04, -1.1922e-03, -1.3726e-03, -6.1256e-04, 6.1258e-04,\n -1.2901e-04, 1.3723e-03, -6.0307e-05, -1.8516e-03, -5.7213e-04,\n -2.9393e-03, -4.0987e-03, 7.0923e-04, 5.0830e-05, 1.6580e-03,\n -8.5755e-05, -2.0251e-03, 1.4247e-03, -5.7705e-04, -4.2388e-04,\n 4.2760e-04, 1.1637e-03, -1.6108e-05, 1.6052e-03, -5.0644e-04,\n 1.4167e-03, -5.5006e-05, -1.2773e-03, 1.1831e-03, 1.6813e-03,\n 1.0046e-04, 8.2467e-04, 7.9378e-04, -2.5382e-05, 3.4800e-03,\n -2.2852e-04, 1.3816e-03, 2.5775e-03, -7.9611e-05, 2.6479e-03,\n 8.2837e-04, -6.9984e-04, 1.0879e-03, 1.3731e-03, -1.3416e-03,\n -3.5351e-03, -3.3271e-03, -1.2659e-03, 3.1665e-04, -4.8258e-04,\n -9.3276e-04, -1.6878e-03, -3.2785e-05, -5.7829e-03, 5.6818e-04,\n -1.0597e-03, 1.1080e-03, 1.7876e-03, 2.1609e-03, 2.0158e-03,\n 1.7027e-03, -8.8990e-05, -9.8578e-05, 8.7725e-04, 2.2874e-03,\n -3.0531e-03, 1.7306e-03, 2.0766e-04, 5.6052e-45, -3.7700e-03,\n -7.1133e-04, -1.6732e-03, 2.1796e-03, -8.9762e-04, -9.2197e-04,\n 5.6052e-45, -1.1889e-03, 8.8571e-04, -2.7811e-04, -3.3474e-04,\n -9.2819e-04, -3.6412e-04, 2.7231e-03, -2.2163e-03, -6.0346e-04,\n -3.4705e-03, -1.5054e-03, 1.2225e-04, 6.0827e-05, -6.3013e-04,\n -2.0312e-03, 6.3461e-04, -3.0529e-03, 1.5847e-03, 4.8143e-04,\n 2.3814e-03, 2.9467e-04, -3.5372e-04, 1.0324e-03, -4.1449e-04,\n -1.2842e-03, -7.0856e-04, -4.5153e-03, 1.9284e-03, 1.2378e-03,\n 3.3275e-03, -2.2935e-03, 4.4600e-03, 5.2152e-04, 7.6427e-04,\n 3.6408e-03, -4.7041e-04, -2.7695e-04, -4.6669e-04, -5.2096e-03,\n 2.6744e-03, 2.2578e-03, -1.8834e-03, 1.3057e-03, 2.1695e-03,\n -1.0344e-04, 8.2644e-04, -1.8918e-04, -2.9826e-03, 1.8357e-03,\n -1.1806e-04, 1.5955e-03, 5.5419e-04, 5.3285e-03, 4.8947e-04,\n 1.4112e-03, 1.6379e-03, -2.9084e-03, 1.5181e-03, 4.0065e-04,\n -3.3097e-03, 8.8463e-04, 1.9077e-03, -1.9315e-04, -4.8585e-04,\n -2.0623e-03, -5.5407e-04, 2.3700e-04, -1.2813e-03, 1.8211e-03,\n 4.3145e-03, -3.3479e-04, -3.2439e-03, -4.6818e-03, -1.0483e-03,\n 3.9357e-04, 5.6052e-45, 9.8418e-07, 1.4986e-03, -2.4017e-03,\n 2.6609e-04, -5.9087e-03, -2.0540e-03, 9.9525e-04, 8.3917e-04,\n 1.6979e-03, -2.8595e-05, 3.2255e-03, -2.3934e-04, 1.4371e-03,\n -3.4408e-03, 1.5473e-03, -2.4491e-04, 1.9816e-03, 1.9305e-05,\n -1.5249e-05, -1.7359e-04, -2.3330e-04, -1.9434e-03, 3.8007e-03,\n 8.4786e-04, -2.6784e-03, -2.3033e-03, -5.2495e-04, 1.2723e-03,\n -3.0782e-03, -1.7800e-06, -2.7363e-05, -4.8501e-04, 1.0285e-03,\n 5.0462e-04, 7.6560e-05, 1.1954e-03, -6.1916e-06, -7.5919e-04,\n 1.1721e-03, 1.3342e-03, 2.2862e-03, -9.6567e-05, 2.3296e-04,\n -1.7942e-04, -1.7423e-03, -5.6596e-04, -6.6755e-04, -6.7602e-04,\n -2.5907e-03, -4.9541e-04, 9.6501e-04, 2.2884e-04, 4.1427e-03,\n -9.4429e-05, 2.6278e-03, 1.0234e-03, 1.1110e-03, 4.3900e-04,\n -5.5472e-04, -1.7460e-03, -1.2313e-03, 1.9431e-04, -1.5918e-03,\n -4.6606e-05, -1.1093e-03, 1.1996e-03, -2.2305e-03, -4.0170e-03,\n -3.4481e-03, -9.2548e-05, 8.7586e-04, -5.3599e-04, 1.7001e-03,\n 2.3433e-04, 7.4455e-04, 4.6729e-03, 1.9697e-03, 1.0716e-03,\n -8.3904e-04, 5.1277e-04, -8.8896e-04, -1.2293e-03, 5.8790e-03,\n 6.0928e-04, -5.2333e-03, 9.8838e-04, -4.5194e-04, 1.5891e-03,\n 4.1999e-04, -7.7945e-04, 2.6267e-03, 3.7225e-03, -1.8162e-03,\n -9.7152e-04, -8.9990e-04, -3.6418e-03, -2.1324e-03, 4.8432e-03,\n 1.9997e-03, -7.7598e-04, 9.6255e-04, 1.7438e-03, -5.3906e-04,\n -1.0196e-03, -6.8381e-04, 1.9196e-03, 2.8211e-03, -4.1365e-03,\n -4.7058e-03, 6.6007e-04, -2.1814e-03, 7.4486e-04, 1.2551e-03,\n -3.8859e-04, 9.7836e-04, 1.2473e-03, 1.7782e-03, -9.8624e-04,\n 1.0430e-03, 1.4251e-03, -4.0165e-03, 9.8472e-05, 1.4204e-03,\n -5.0643e-03, 3.2829e-03, 2.0517e-03, -2.5227e-05, -3.5654e-03,\n -1.8817e-03, 1.1656e-03, 2.0558e-03, -1.5252e-03, 1.3633e-03,\n 1.4007e-03, 1.8437e-03, 2.3217e-04, 9.5622e-04, -2.4924e-04,\n -1.1076e-03, -1.6864e-03], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([6.9415e-05, 7.5523e-05, 8.2419e-05, 8.7155e-05, 5.2702e-05, 1.0542e-04,\n 8.7420e-05, 5.0457e-05, 4.4209e-05, 5.3792e-05, 1.0102e-04, 7.6522e-05,\n 5.1057e-05, 9.2143e-05, 1.0273e-04, 7.8282e-05, 6.0231e-05, 1.6801e-04,\n 1.6395e-05, 7.6875e-05, 5.4279e-05, 7.1883e-05, 8.0547e-05, 6.2803e-05,\n 5.3945e-05, 4.1984e-05, 1.2665e-04, 5.9166e-05, 6.6305e-05, 4.6570e-05,\n 6.3776e-05, 9.0241e-05, 3.8614e-05, 1.3914e-04, 6.3935e-05, 5.3148e-05,\n 4.4473e-05, 8.2550e-05, 1.1097e-04, 8.4762e-05, 8.3273e-05, 1.8715e-04,\n 5.0652e-05, 5.2085e-05, 9.6047e-05, 1.1408e-04, 8.5800e-05, 4.0314e-05,\n 4.9537e-05, 4.8067e-05, 4.7900e-05, 9.1935e-05, 1.5180e-04, 6.5359e-05,\n 9.8582e-05, 5.2251e-05, 6.0946e-05, 4.2279e-05, 6.8664e-05, 6.1159e-05,\n 4.2065e-05, 4.2543e-05, 5.8546e-05, 8.6908e-05, 1.1097e-04, 1.1105e-04,\n 9.2935e-05, 9.4006e-05, 6.4537e-05, 7.1283e-05, 5.6875e-05, 7.1604e-05,\n 5.2613e-05, 1.8159e-04, 6.8595e-05, 5.1277e-05, 1.9185e-04, 6.0164e-05,\n 6.3337e-05, 7.4607e-05, 6.0410e-05, 4.2171e-05, 4.4327e-05, 1.0337e-04,\n 6.5526e-05, 8.0182e-05, 1.1485e-04, 3.3164e-05, 9.6595e-05, 8.4184e-05,\n 6.7202e-05, 6.6646e-05, 1.8307e-04, 1.7381e-04, 5.8073e-05, 6.8725e-05,\n 4.9967e-05, 8.0523e-05, 9.4269e-05, 7.2671e-05, 7.0855e-05, 5.9363e-05,\n 4.5710e-05, 7.6268e-05, 3.8752e-05, 6.0241e-05, 5.5162e-05, 3.7832e-05,\n 5.9788e-05, 3.9610e-05, 1.5550e-04, 3.8947e-05, 1.9913e-04, 5.2435e-05,\n 4.7674e-05, 3.9153e-05, 1.3266e-04, 1.2506e-04, 5.7363e-05, 8.5655e-05,\n 5.7903e-05, 5.0660e-05, 9.7090e-05, 1.1876e-04, 7.2645e-05, 6.9188e-05,\n 4.3781e-05, 9.1339e-05, 1.0083e-04, 8.1613e-05, 6.6118e-05, 1.3829e-04,\n 6.4529e-05, 4.6757e-05, 1.3400e-04, 7.5668e-05, 4.5331e-05, 4.0431e-05,\n 1.7817e-04, 7.4725e-05, 5.4851e-05, 4.8501e-05, 6.8313e-05, 2.1334e-04,\n 6.4742e-05, 8.7431e-05, 6.8203e-05, 4.1591e-05, 1.3928e-04, 1.3771e-04,\n 5.4246e-05, 7.6747e-05, 4.7809e-05, 7.6543e-05, 5.7922e-05, 8.0150e-05,\n 5.2142e-05, 4.6115e-05, 1.0881e-04, 5.5474e-05, 6.4633e-05, 6.8486e-05,\n 1.1973e-04, 5.8402e-05, 8.5707e-05, 8.0868e-05, 1.4532e-04, 4.4436e-05,\n 8.2823e-05, 2.0813e-04, 7.7602e-05, 9.4648e-05, 3.9742e-05, 4.9954e-05,\n 6.0440e-05, 4.4351e-05, 4.3112e-05, 5.4062e-05, 4.9470e-05, 1.2026e-04,\n 5.2017e-05, 8.8810e-05, 1.5146e-04, 5.6797e-05, 2.3789e-05, 9.8442e-05,\n 9.9552e-05, 6.4251e-05, 9.5009e-05, 6.3705e-05, 2.5661e-11, 5.4736e-05,\n 3.4273e-05, 2.9495e-05, 7.2133e-05, 6.4414e-05, 3.9439e-05, 4.4116e-05,\n 4.4883e-05, 5.4927e-05, 5.7500e-05, 6.2681e-05, 6.2569e-05, 4.4572e-05,\n 4.1651e-05, 1.5607e-04, 6.1905e-05, 4.2679e-05, 4.8771e-05, 5.5488e-05,\n 1.0923e-04, 6.7098e-05, 6.9332e-05, 5.6224e-05, 2.8331e-05, 1.1264e-04,\n 5.7341e-05, 3.8392e-05, 7.4203e-05, 8.3326e-05, 8.0333e-05, 1.3736e-04,\n 4.8527e-05, 5.4950e-05, 4.0843e-05, 5.5775e-05, 7.2088e-05, 3.5638e-05,\n 1.1630e-04, 9.1634e-05, 1.2079e-04, 6.7146e-05, 8.2713e-05, 7.8071e-05,\n 6.0631e-05, 8.5804e-05, 5.1336e-05, 6.3220e-05, 7.0144e-05, 6.4475e-05,\n 3.5484e-05, 5.0166e-05, 5.3962e-05, 9.2523e-05, 2.8876e-05, 8.0127e-05,\n 8.5822e-05, 6.6189e-05, 4.4990e-05, 6.6520e-05, 1.0090e-04, 8.5711e-05,\n 4.7479e-05, 4.2667e-05, 3.8660e-05, 6.3203e-05, 4.2040e-05, 8.0413e-05,\n 7.3104e-05, 1.2092e-04, 8.2782e-05, 4.7757e-05, 6.6068e-05, 3.7204e-05,\n 8.0958e-05, 9.9529e-05, 1.7912e-04, 5.1286e-05, 4.4308e-05, 3.6443e-05,\n 4.3174e-05, 7.5054e-05, 4.3024e-05, 1.0478e-04, 4.7614e-05, 8.8918e-05,\n 1.2796e-04, 4.0727e-05, 7.6598e-05, 6.8438e-05, 4.8707e-05, 6.1141e-05,\n 7.4439e-05, 5.5358e-05, 6.7711e-05, 7.8446e-05, 6.7603e-05, 8.8930e-05,\n 3.0862e-12, 1.3015e-04, 6.2181e-05, 4.2467e-05, 1.0186e-04, 8.3867e-05,\n 9.0996e-05, 1.6234e-11, 7.1147e-05, 1.0951e-04, 6.6464e-05, 1.8501e-04,\n 9.2777e-05, 8.2704e-05, 6.8747e-05, 3.9891e-05, 8.5630e-05, 7.3228e-05,\n 6.4133e-05, 6.7464e-05, 1.0823e-04, 4.7994e-05, 6.0615e-05, 1.0517e-04,\n 1.4796e-04, 3.6019e-05, 4.8519e-05, 8.1925e-05, 4.1293e-05, 7.4261e-05,\n 1.8091e-05, 4.8667e-05, 7.2799e-05, 1.2393e-04, 3.9269e-05, 9.1794e-05,\n 8.2510e-05, 1.6053e-04, 5.6217e-05, 7.2177e-05, 4.3204e-05, 5.7163e-05,\n 8.5176e-05, 7.0302e-05, 3.8932e-05, 2.9515e-05, 1.3852e-04, 8.2208e-05,\n 2.4514e-05, 3.6808e-05, 6.3508e-05, 1.6592e-04, 4.0699e-05, 5.3624e-05,\n 7.3510e-05, 6.4083e-05, 7.7816e-05, 9.9531e-05, 3.4750e-05, 4.1353e-05,\n 6.3348e-05, 8.2266e-05, 3.1616e-05, 1.6403e-04, 4.4639e-05, 6.0601e-05,\n 2.0623e-04, 9.1914e-05, 4.8369e-05, 1.1183e-04, 4.8899e-05, 3.4729e-05,\n 1.7169e-04, 4.3123e-05, 5.7743e-05, 4.6961e-05, 4.6421e-05, 7.2788e-05,\n 3.7635e-05, 6.3464e-05, 1.1733e-04, 4.8593e-05, 6.9556e-05, 3.6946e-12,\n 6.5702e-05, 3.0675e-05, 5.2076e-05, 5.3565e-05, 1.3164e-04, 6.4532e-05,\n 8.6042e-05, 5.3010e-05, 7.2200e-05, 1.0201e-04, 8.6986e-05, 9.1930e-05,\n 5.7486e-05, 1.3610e-04, 5.8664e-05, 3.4811e-05, 7.0917e-05, 3.9127e-05,\n 6.2643e-05, 3.1092e-05, 6.6761e-05, 9.9256e-05, 8.7231e-05, 8.3213e-05,\n 5.0430e-05, 1.0113e-04, 4.8895e-05, 9.1413e-05, 4.6177e-05, 6.1083e-05,\n 4.5383e-05, 4.5173e-05, 8.4153e-05, 2.0151e-05, 2.6911e-05, 8.8429e-05,\n 8.1397e-05, 7.8883e-05, 4.3475e-05, 7.7017e-05, 6.5337e-05, 1.2084e-04,\n 4.6131e-05, 6.2636e-05, 9.6656e-05, 9.1307e-05, 1.7358e-04, 5.3125e-05,\n 5.0229e-05, 4.4266e-05, 1.2146e-04, 9.4122e-05, 5.8405e-05, 7.2163e-05,\n 5.4328e-05, 7.1820e-05, 7.1140e-05, 4.1144e-05, 6.4480e-05, 1.1997e-04,\n 8.9596e-05, 8.8774e-05, 8.0423e-05, 1.0227e-04, 1.2706e-04, 1.1198e-04,\n 9.1242e-05, 8.6571e-05, 7.0329e-05, 1.2042e-04, 7.7961e-05, 8.4930e-05,\n 4.0963e-05, 6.9671e-05, 7.7277e-05, 6.0337e-05, 9.3133e-05, 8.2834e-05,\n 5.1513e-05, 1.0087e-04, 6.2431e-05, 1.2842e-04, 1.9195e-04, 5.0454e-05,\n 9.4492e-05, 3.7545e-05, 5.6222e-05, 6.8288e-05, 7.5115e-05, 5.3397e-05,\n 6.7486e-05, 8.2730e-05, 3.5872e-05, 3.6977e-05, 8.2695e-05, 7.5487e-05,\n 5.8917e-05, 9.1718e-05, 9.5747e-05, 4.6332e-05, 4.7346e-05, 7.1389e-05,\n 9.1655e-05, 5.2184e-05, 7.4297e-05, 1.0808e-04, 4.9082e-05, 7.6852e-05,\n 8.6435e-05, 6.4986e-05, 1.1152e-04, 5.8435e-05, 1.1716e-04, 3.8337e-05,\n 2.6146e-05, 6.8020e-05, 7.1261e-05, 4.5920e-05, 8.8334e-05, 3.9210e-05,\n 1.5470e-04, 4.9905e-05, 6.5962e-05, 5.2456e-05, 1.2495e-04, 7.0370e-05,\n 6.9666e-05, 6.3452e-05, 9.3691e-05, 6.2480e-05, 7.0552e-05, 3.4141e-05,\n 1.0845e-04, 6.6155e-05, 7.8404e-05, 5.4094e-05, 6.6116e-05, 7.4640e-05,\n 6.3361e-05, 6.1618e-05], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(11262.)",
|
| 27 |
+
"exp_avg": "tensor([[ 4.0784e-06, 1.0664e-05, 6.6661e-06, ..., -5.0421e-06,\n -6.1632e-07, -1.3012e-05],\n [-1.2381e-05, 1.7363e-05, -6.4040e-06, ..., -1.0820e-05,\n -4.1939e-06, 1.4221e-05],\n [ 1.5264e-06, -2.9778e-05, 3.9364e-06, ..., -4.9208e-06,\n -2.3895e-05, -1.8241e-05],\n ...,\n [-1.3589e-06, -8.0389e-06, 6.1607e-06, ..., -1.4583e-06,\n 1.0957e-05, 7.6506e-06],\n [ 5.6774e-06, -3.9664e-05, -2.4191e-05, ..., -1.0670e-05,\n -2.0851e-05, -3.8883e-06],\n [ 1.6506e-05, -2.7380e-05, -1.6841e-05, ..., 1.7452e-05,\n 3.2552e-07, 3.3396e-05]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[1.2880e-09, 3.5146e-09, 9.7494e-10, ..., 8.6417e-10, 1.4439e-09,\n 2.3512e-09],\n [2.0042e-09, 2.3692e-09, 4.5102e-09, ..., 1.5711e-09, 4.0671e-09,\n 4.0303e-09],\n [2.0290e-09, 3.0896e-09, 3.6358e-09, ..., 1.3265e-09, 3.2824e-09,\n 3.4043e-09],\n ...,\n [2.8867e-09, 5.3695e-09, 2.9890e-09, ..., 1.0797e-09, 6.5653e-09,\n 4.1349e-09],\n [1.5814e-09, 6.8555e-09, 4.2318e-09, ..., 1.5647e-09, 3.0159e-09,\n 4.3318e-09],\n [5.1745e-09, 4.1875e-09, 3.0390e-09, ..., 1.3796e-09, 2.8651e-09,\n 4.4191e-09]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(11262.)",
|
| 32 |
+
"exp_avg": "tensor([[ 9.1620e-06, 1.0120e-05, 3.6055e-06, ..., -2.4512e-06,\n -3.6551e-06, -2.5438e-06],\n [-2.9822e-06, -6.2664e-06, -2.8657e-06, ..., -6.9986e-06,\n -7.9950e-06, 2.3799e-05],\n [-1.6042e-07, -1.1583e-05, 1.1710e-05, ..., 2.6288e-06,\n -1.1508e-05, -7.7784e-06],\n ...,\n [ 1.8677e-05, 2.8084e-05, 2.7015e-05, ..., -1.7635e-05,\n -9.5141e-07, -8.3338e-06],\n [-1.1144e-05, 2.3143e-07, -2.9230e-06, ..., 1.0506e-05,\n -9.8173e-06, 6.2561e-06],\n [-2.0907e-05, -1.2192e-05, -4.3931e-06, ..., 2.4223e-05,\n -5.8555e-06, 1.4247e-05]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[7.2498e-10, 1.7150e-09, 8.3896e-10, ..., 3.6504e-10, 6.5196e-10,\n 1.6045e-09],\n [2.1033e-09, 4.3628e-09, 1.4927e-09, ..., 1.0909e-09, 3.6317e-09,\n 2.6363e-09],\n [1.5924e-09, 1.9145e-09, 2.0938e-09, ..., 1.1378e-09, 2.5085e-09,\n 3.0089e-09],\n ...,\n [1.8495e-09, 1.7111e-09, 3.5571e-09, ..., 7.1487e-10, 6.1680e-09,\n 2.5394e-09],\n [2.1959e-09, 3.2535e-09, 4.6413e-09, ..., 1.5088e-09, 1.2254e-09,\n 2.0426e-09],\n [1.2402e-09, 4.6724e-09, 1.9405e-09, ..., 9.0682e-10, 1.0870e-09,\n 2.9952e-09]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(11262.)",
|
| 37 |
+
"exp_avg": "tensor([-2.4104e-05, 2.4127e-05], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([4.0345e-06, 4.0345e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 0.00793913236883622,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.01,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 0.00793913236883622,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.01,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 0.00793913236883622,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.01,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 0.003969669238105037,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 3,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.01,
|
| 139 |
+
0.01,
|
| 140 |
+
0.01,
|
| 141 |
+
0.005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 3,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
0.00793913236883622,
|
| 149 |
+
0.00793913236883622,
|
| 150 |
+
0.00793913236883622,
|
| 151 |
+
0.003969669238105037
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 64.75866666666667,
|
| 156 |
+
"best_epoch": 2,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 63.992666666666665,
|
| 159 |
+
"512": 64.73066666666666
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3
|
| 166 |
+
],
|
| 167 |
+
"train_loss": [
|
| 168 |
+
5.311051666323785,
|
| 169 |
+
4.462767010682684,
|
| 170 |
+
4.340839946911445
|
| 171 |
+
],
|
| 172 |
+
"train_acc": [
|
| 173 |
+
54.91727464101089,
|
| 174 |
+
60.04988680892759,
|
| 175 |
+
61.02839572566782
|
| 176 |
+
],
|
| 177 |
+
"val_acc": [
|
| 178 |
+
63.041333333333334,
|
| 179 |
+
64.17333333333333,
|
| 180 |
+
64.75866666666667
|
| 181 |
+
],
|
| 182 |
+
"scale_accs": {
|
| 183 |
+
"256": [
|
| 184 |
+
62.11666666666667,
|
| 185 |
+
63.38733333333333,
|
| 186 |
+
63.992666666666665
|
| 187 |
+
],
|
| 188 |
+
"512": [
|
| 189 |
+
62.967333333333336,
|
| 190 |
+
64.19266666666667,
|
| 191 |
+
64.73066666666666
|
| 192 |
+
]
|
| 193 |
+
},
|
| 194 |
+
"lr": [
|
| 195 |
+
0.00975530705321762,
|
| 196 |
+
0.00904518046337755,
|
| 197 |
+
0.00793913236883622
|
| 198 |
+
]
|
| 199 |
+
}
|
| 200 |
+
},
|
| 201 |
+
"train_config": {
|
| 202 |
+
"name": "david_training",
|
| 203 |
+
"run_id": "20251012_231445",
|
| 204 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 205 |
+
"model_variant": [
|
| 206 |
+
"clip_vit_b16",
|
| 207 |
+
"clip_vit_laion_b32",
|
| 208 |
+
"clip_vit_b32"
|
| 209 |
+
],
|
| 210 |
+
"num_classes": 1000,
|
| 211 |
+
"preset": "small_fast",
|
| 212 |
+
"custom_config_path": null,
|
| 213 |
+
"num_classes_override": null,
|
| 214 |
+
"use_belly_override": null,
|
| 215 |
+
"belly_expand_override": null,
|
| 216 |
+
"progressive_training_override": true,
|
| 217 |
+
"scale_warmup_epochs_override": {
|
| 218 |
+
"256": 0,
|
| 219 |
+
"512": 0
|
| 220 |
+
},
|
| 221 |
+
"num_epochs": 10,
|
| 222 |
+
"batch_size": 1024,
|
| 223 |
+
"learning_rate": 0.01,
|
| 224 |
+
"weight_decay": 1e-05,
|
| 225 |
+
"warmup_epochs": 3,
|
| 226 |
+
"use_rose_loss": true,
|
| 227 |
+
"rose_initial_weight": 0.2,
|
| 228 |
+
"rose_max_weight": 0.6,
|
| 229 |
+
"rose_weight_schedule": "adaptive",
|
| 230 |
+
"use_cayley_loss": false,
|
| 231 |
+
"cayley_weight": 0.01,
|
| 232 |
+
"scale_loss_balance": null,
|
| 233 |
+
"use_mixed_precision": false,
|
| 234 |
+
"gradient_clip": 5.0,
|
| 235 |
+
"scheduler_type": "cosine_restarts",
|
| 236 |
+
"min_lr": 1e-06,
|
| 237 |
+
"freeze_strategy": "never",
|
| 238 |
+
"freeze_threshold": 90.0,
|
| 239 |
+
"unfreeze_on_plateau": true,
|
| 240 |
+
"patience": 10,
|
| 241 |
+
"track_gradients": true,
|
| 242 |
+
"gradient_scale_threshold": 1e-05,
|
| 243 |
+
"gradient_scale_multiplier": 10.0,
|
| 244 |
+
"log_interval": 50,
|
| 245 |
+
"val_interval": 1,
|
| 246 |
+
"save_interval": 5,
|
| 247 |
+
"log_fusion_weights": true,
|
| 248 |
+
"log_loss_components": true,
|
| 249 |
+
"save_format": "safetensors",
|
| 250 |
+
"hf_repo": "AbstractPhil/david-shared-space",
|
| 251 |
+
"upload_to_hub": true,
|
| 252 |
+
"base_dir": "./david_training",
|
| 253 |
+
"num_workers": 10,
|
| 254 |
+
"pin_memory": true,
|
| 255 |
+
"prefetch_factor": 4,
|
| 256 |
+
"persistent_workers": true
|
| 257 |
+
}
|
| 258 |
+
}
|