AbstractPhil commited on
Commit
1b5e9eb
·
verified ·
1 Parent(s): 211a349

Update best_model_acc62.52_metadata.json - Run 20251012_221046

Browse files
weights/David-decoupled-deep_efficiency/20251012_221046/best_model_acc62.52_metadata.json ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 0,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(3754.)",
7
+ "exp_avg": "tensor([[-4.4749e-05, 1.4725e-04, 4.0209e-05, ..., -5.3002e-06,\n -1.4165e-05, 2.8785e-05],\n [ 5.5528e-05, 1.3407e-04, -7.4307e-05, ..., 3.2654e-05,\n 4.3700e-05, 7.5850e-05],\n [-7.6965e-05, 9.2139e-05, 1.8860e-05, ..., 2.1867e-05,\n -1.3094e-05, 3.8954e-06],\n ...,\n [ 7.6618e-05, 8.5101e-05, 1.0000e-04, ..., 2.8659e-05,\n -1.3026e-05, 1.7126e-05],\n [-1.2214e-05, 6.4223e-05, -2.3335e-05, ..., 1.3158e-04,\n 5.7863e-06, 1.3439e-05],\n [-1.0400e-04, -2.0184e-04, 9.1752e-05, ..., 2.5940e-06,\n 4.6468e-05, -1.0303e-04]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[2.7359e-08, 2.3284e-07, 7.0138e-08, ..., 2.2038e-08, 1.2536e-08,\n 1.4616e-08],\n [9.6217e-08, 3.8915e-07, 9.2744e-08, ..., 1.5339e-07, 4.1522e-08,\n 4.9984e-08],\n [1.7404e-07, 1.1525e-07, 2.7388e-08, ..., 4.0486e-08, 2.5376e-08,\n 2.6671e-08],\n ...,\n [4.9063e-08, 1.7711e-07, 8.7920e-08, ..., 4.4341e-08, 1.5060e-08,\n 5.5431e-08],\n [1.5269e-07, 2.0826e-07, 8.2987e-08, ..., 1.4950e-07, 3.8942e-08,\n 4.1262e-08],\n [2.1312e-08, 1.2214e-07, 9.2706e-08, ..., 1.6309e-08, 7.3698e-09,\n 2.1436e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(3754.)",
12
+ "exp_avg": "tensor([-8.1367e-04, -4.9727e-04, -1.9642e-04, 4.1624e-04, -7.8312e-09,\n 1.8762e-03, -5.8328e-04, 8.3392e-05, 3.3990e-04, -4.0941e-04,\n 1.2410e-03, 5.6052e-45, -5.2167e-03, 1.9487e-04, 3.2701e-03,\n 9.8769e-04, 1.1269e-04, 4.4264e-04, -1.4434e-03, 5.7081e-04,\n -1.4201e-03, -1.2336e-03, 1.8030e-03, 2.4552e-04, -4.9408e-04,\n -8.1328e-04, -1.7902e-03, -7.4724e-04, 1.9403e-03, 4.9805e-04,\n -5.4805e-04, -1.9834e-03, -1.7212e-03, -1.6541e-03, 1.3718e-03,\n 1.3493e-04, -6.3605e-05, 1.1535e-03, 3.2156e-04, -1.3517e-03,\n 1.2956e-03, -1.2730e-03, -3.8118e-04, -8.4489e-04, -8.9233e-04,\n 3.3987e-04, -8.0669e-04, 3.0355e-04, -4.2855e-04, 9.5174e-04,\n 9.5905e-04, -4.4701e-04, -2.0572e-04, -2.7839e-03, 6.7109e-04,\n -2.6372e-03, 1.3612e-03, -4.3315e-05, 2.4029e-03, 1.0660e-04,\n 2.0569e-03, 2.1287e-03, 4.0652e-04, -3.2699e-11, -3.5535e-03,\n 2.0943e-03, 6.8498e-04, -4.5304e-12, 3.5901e-03, 8.4772e-04,\n -3.3510e-05, -8.0424e-06, -3.8541e-03, 1.8668e-04, -6.0599e-04,\n -2.6157e-03, 3.8743e-04, -1.4900e-03, 1.7907e-03, 2.5035e-03,\n 1.9545e-03, 3.1201e-03, -2.0438e-03, -2.4916e-03, -3.7299e-03,\n -3.4377e-03, 1.0809e-04, 1.9242e-03, -3.6899e-03, 5.6052e-45,\n -1.0445e-03, 2.5261e-04, -2.3670e-03, -1.0501e-03, -1.1143e-03,\n -1.0533e-03, -2.4643e-04, 3.8127e-04, -6.9685e-04, 5.5370e-04,\n 4.9100e-04, 3.7193e-04, 3.0040e-03, 6.6864e-04, 2.1311e-04,\n -1.3562e-04, -2.0445e-03, 1.0353e-03, 1.4375e-03, 2.4195e-03,\n 5.0358e-04, 6.5302e-04, 2.6690e-03, 1.2135e-03, 1.1476e-03,\n -1.1151e-03, -1.1266e-03, -1.3055e-04, 2.5802e-03, -3.9729e-04,\n 1.6163e-04, 7.6585e-04, -8.7372e-05, 1.0230e-03, -8.6288e-04,\n -1.3835e-03, -1.1270e-03, 1.0610e-03, 1.4084e-03, -1.9976e-03,\n 2.2596e-03, -6.2884e-04, -2.5123e-04, -2.1554e-03, -1.6450e-04,\n -1.2985e-04, 5.2091e-03, 2.3489e-04, 4.8430e-04, 4.6869e-04,\n -4.3170e-04, -2.5162e-04, -6.8161e-05, -2.3293e-04, -9.6389e-04,\n 7.9047e-04, 3.1049e-04, -2.0034e-04, 2.4829e-04, -9.2371e-04,\n 3.5966e-03, -5.6832e-04, -5.0496e-04, 7.4216e-04, 1.1504e-03,\n 1.3737e-03, 6.6952e-04, -1.8899e-04, 1.6868e-03, 1.6146e-03,\n 2.8243e-04, 2.5944e-03, -6.6431e-04, 3.5935e-04, 6.3384e-05,\n -9.9373e-04, -6.2823e-04, 3.2152e-04, 5.4877e-04, 4.7079e-04,\n -2.3184e-03, 5.3538e-04, 1.6515e-03, -1.0744e-03, 7.8039e-05,\n 7.9547e-04, -5.0773e-05, 3.6923e-03, 1.7267e-03, 4.3681e-04,\n -2.1966e-04, -4.6968e-04, -1.0880e-04, -2.0722e-03, 6.3926e-04,\n 4.7019e-04, 1.7456e-04, -5.4411e-04, 7.6683e-04, 9.2771e-04,\n -3.6417e-03, -4.1295e-05, -9.5306e-04, 7.0555e-04, 1.8279e-03,\n 8.8248e-04, 2.0808e-03, -1.5755e-03, -5.0833e-05, 2.6237e-03,\n -2.1856e-03, 1.7919e-03, -2.5416e-03, -1.7525e-03, -2.0636e-03,\n -1.5252e-04, 2.3727e-03, 5.6052e-45, -2.1118e-03, -7.5673e-04,\n -1.4113e-03, 2.8449e-03, 1.1457e-03, 1.2253e-03, -1.4174e-03,\n 2.6625e-03, -4.7899e-04, -1.4394e-03, 8.7591e-04, 1.4092e-04,\n -9.5176e-05, 9.3961e-04, -1.0062e-03, 1.7913e-03, -1.7141e-03,\n -4.7101e-05, 5.8291e-04, -2.2839e-04, 3.1676e-03, 6.5609e-04,\n -6.4759e-04, -2.9385e-04, 1.9144e-03, 2.9592e-03, 1.7372e-03,\n 9.8980e-04, -1.3639e-03, 1.5438e-04, -1.2169e-03, -1.9853e-03,\n 3.5970e-03, -4.1487e-04, 4.2076e-04, 1.1652e-03, 9.7023e-04,\n 2.1867e-03, 1.4974e-04, 4.0961e-03, -1.3125e-04, 5.8265e-04,\n -2.2352e-03, -7.6680e-04, -1.5706e-04, 8.5366e-04, 4.2754e-04,\n -1.2114e-04, 9.5753e-05, -1.7046e-03, 2.6724e-04, 8.3027e-04,\n -1.7633e-03, 8.0649e-04, 6.4147e-04, -6.0678e-04, 9.5589e-04,\n -8.6376e-04, 9.5832e-04, 1.5135e-03, -1.2679e-04, 2.3524e-04,\n -2.7402e-03, 1.9261e-03, -1.5786e-03, 3.2450e-03, 1.1703e-03,\n -6.5746e-05, 3.6863e-05, 4.8643e-05, -1.3448e-04, 5.5564e-04,\n 8.9180e-04, -2.1294e-03, -9.7765e-04, -1.2002e-03, 5.6416e-04,\n 6.2707e-04, 8.6123e-04, 7.9437e-04, -8.4291e-04, -4.0692e-03,\n -2.9456e-05, -1.9278e-03, 1.5858e-03, -2.0333e-03, -1.1549e-04,\n -1.3178e-04, 1.1006e-03, 1.2868e-03, 3.3223e-04, -8.9190e-04,\n 7.7729e-04, -1.9359e-03, 5.6172e-05, -3.3262e-03, 1.0916e-03,\n -1.6061e-04, 5.5256e-04, -1.3285e-03, -4.9060e-04, 1.8463e-03,\n 2.0704e-04, -4.8362e-03, -2.7467e-05, 2.4457e-03, 2.3183e-04,\n -2.0011e-03, 2.5882e-03, 5.4843e-05, 3.4988e-03, -1.0774e-03,\n -2.8791e-03, -2.1049e-03, -1.1404e-03, -1.0179e-03, 2.0403e-04,\n -1.1020e-03, 1.9153e-03, -5.0879e-03, 8.7456e-04, 7.7336e-04,\n -8.6910e-04, 1.5670e-03, -1.0073e-03, -5.3445e-04, -2.7277e-03,\n 3.1433e-03, 5.5835e-04, 1.9155e-03, 2.3089e-04, -2.7299e-03,\n 1.1266e-03, -1.6356e-04, 3.0958e-04, -4.0072e-03, -1.7838e-03,\n 2.3742e-03, 1.2951e-04, -3.0488e-03, 2.5931e-04, -7.9527e-04,\n 2.4933e-03, 7.0156e-04, -1.0500e-03, -5.1042e-04, -3.4115e-03,\n -2.8097e-04, 2.9949e-03, -1.8634e-05, 8.0695e-04, 2.2839e-03,\n 2.2516e-04, -6.1616e-03, -2.0914e-03, -1.2352e-03, 3.8066e-03,\n 7.8859e-04, -2.9602e-04, 9.6411e-04, -4.0930e-04, 3.4579e-03,\n 7.0849e-04, 3.3980e-04, -4.7161e-04, -3.3371e-04, -2.6211e-03,\n -1.2608e-03, -1.3335e-03, -8.5335e-05, -3.4740e-03, 4.8508e-04,\n -1.1350e-03, 2.0014e-03, 4.6344e-04, -1.0450e-03, 1.1318e-03,\n -1.6881e-03, 2.5003e-03, -9.1399e-04, -1.2158e-03, -4.9908e-05,\n -1.9117e-03, 1.9589e-03, 2.3057e-03, 1.8100e-03, -8.3746e-04,\n 3.8921e-04, -3.7045e-04, 6.4259e-04, -1.3137e-05, -2.2742e-03,\n -1.6005e-03, 1.8838e-04, -1.6177e-03, 2.8799e-03, 3.3492e-03,\n -1.8542e-03, -2.4921e-04, -1.5754e-03, -5.2379e-04, 1.3332e-03,\n -1.5188e-04, 4.4111e-04, -1.4944e-03, -1.2012e-04, -4.7634e-04,\n -4.9768e-04, 2.8108e-04, -7.7396e-04, -1.7094e-03, 2.9795e-04,\n -1.0855e-03, -1.1367e-03, 1.0956e-03, 3.5483e-06, -3.0103e-04,\n 1.3493e-03, -3.6295e-03, 2.1419e-03, -1.4176e-03, 3.3886e-04,\n 8.0136e-04, 3.2464e-03, 5.6052e-45, 1.7250e-03, -1.0690e-03,\n -8.1598e-06, 5.0956e-03, -4.4878e-04, -1.4565e-03, -3.6110e-03,\n 2.9139e-04, 8.7195e-04, 2.5014e-03, -1.4494e-03, -9.7636e-04,\n -2.6361e-04, 3.3479e-03, 1.0007e-03, 5.9731e-04, -7.6476e-04,\n 1.1284e-04, -4.6298e-04, -1.0221e-03, -1.1098e-03, 1.1489e-04,\n 2.2521e-03, 2.2030e-03, -1.1021e-03, 1.6674e-03, 1.5575e-03,\n 5.6052e-45, -5.6660e-04, -1.0405e-03, -2.5626e-03, 1.0611e-03,\n 1.0676e-03, -2.4274e-03, 2.1316e-03, 9.1119e-04, 1.3404e-03,\n 7.2306e-04, -2.5358e-03, -1.1410e-03, -1.9789e-03, -5.0153e-04,\n -4.5727e-03, -1.2091e-04, 8.4857e-04, -1.3097e-03, 2.1005e-03,\n 6.9048e-04, 1.1576e-03, 4.2954e-04, -3.2762e-03, 3.4072e-03,\n -2.2647e-03, -1.4537e-03, 3.3851e-04, 6.7889e-04, -2.3878e-03,\n -1.3313e-03, 9.3333e-04, 1.5462e-03, 1.9939e-04, 2.7098e-03,\n -2.2441e-03, 1.8803e-03, -1.2065e-03, -5.0322e-04, -3.5094e-03,\n 9.3891e-05, 2.4084e-03, -2.4659e-03, 2.5327e-03, 1.3607e-03,\n -1.7305e-03, -1.1718e-03, -4.6362e-04, -1.9883e-04, -5.2899e-04,\n 1.2901e-03, 5.7018e-04, -2.9603e-03, 4.4879e-04, -2.8203e-03,\n 1.2655e-03, -1.1335e-03, 2.9538e-03, 2.5082e-03, -9.8844e-04,\n -1.0733e-03, 3.5881e-03, -1.0556e-03, -3.7764e-03, -4.2339e-04,\n 1.6562e-03, 3.0200e-03, 1.2011e-03, 2.3737e-04, -1.4873e-04,\n 4.0332e-04, -3.8080e-03, 6.8526e-04, -1.8363e-03, 7.9228e-04,\n -4.1291e-03, 9.3338e-05, 5.1121e-04, 2.8580e-03, -3.2570e-03,\n 4.1448e-04, 1.4502e-03, 2.0674e-04, 7.0930e-04, -8.6215e-04,\n 2.9365e-03, 8.3438e-04, 1.8299e-03, 1.8106e-03, -1.6737e-03,\n 2.8248e-04, 5.2771e-04, 2.9061e-04, -1.0569e-03, 1.1242e-03,\n 2.1906e-04, 3.1560e-03, -7.6455e-05, 6.2387e-05, 1.2929e-04,\n 6.6199e-04, 1.7683e-03, -6.2497e-04, -1.9280e-03, 1.1437e-03,\n 3.6147e-04, -6.9456e-04, -1.2706e-03, 7.3737e-04, -9.6973e-04,\n 1.5202e-03, -1.0515e-04, 2.2749e-03, -2.3923e-04, 1.6866e-03,\n -3.1259e-03, -6.7030e-04, 9.1899e-04, -2.3185e-03, 1.7068e-03,\n -1.0078e-03, -7.0923e-04, 1.4189e-03, 1.1298e-03, -7.7039e-04,\n -4.0293e-04, 3.6223e-04, 2.1090e-04, 1.4420e-03, 1.8279e-04,\n -1.0262e-03, 1.0139e-03, -7.4034e-04, 1.6338e-03, 2.8892e-03,\n 1.2747e-03, 1.6528e-03, 3.6505e-04, 9.2641e-04, -2.3019e-04,\n 5.0369e-04, -4.1351e-05, 2.4120e-04, -2.1856e-03, 5.4232e-04,\n 1.0482e-03, -1.0749e-03, 3.3277e-03, 1.0134e-04, 1.1085e-03,\n -7.7962e-04, 1.2008e-03, -9.3034e-04, -3.0980e-03, -1.5012e-03,\n -5.2173e-05, 1.7408e-04, 1.9751e-03, 3.1282e-04, 1.6241e-03,\n -7.6673e-04, 3.4086e-04, -2.6384e-03, -4.8475e-03, -2.4161e-03,\n 1.1833e-03, -1.5770e-03, 1.5403e-04, -9.1986e-04, 1.0524e-03,\n -1.9944e-03, -7.0337e-04, 1.8231e-03, -2.6331e-04, 6.2168e-04,\n -2.2057e-05, -1.4172e-04, 9.7819e-04, 2.1654e-03, -2.1981e-03],\n device='cuda:0')",
13
+ "exp_avg_sq": "tensor([2.0154e-05, 8.2802e-05, 4.1377e-05, 5.8201e-05, 2.7666e-07, 1.3371e-04,\n 6.3215e-05, 7.5467e-05, 6.0311e-05, 2.5713e-05, 6.4536e-05, 9.8062e-08,\n 1.2011e-04, 2.5607e-05, 9.6979e-05, 2.6946e-05, 7.4975e-05, 6.6907e-05,\n 7.2662e-05, 9.0838e-05, 3.8164e-05, 2.2901e-05, 5.5431e-05, 7.3976e-05,\n 6.0370e-05, 2.5836e-05, 5.7701e-05, 2.7076e-05, 7.4471e-05, 5.1125e-05,\n 5.3557e-05, 9.6772e-05, 7.3849e-05, 3.0324e-05, 5.3788e-05, 3.3623e-05,\n 6.3435e-05, 1.4947e-05, 1.0229e-04, 5.6720e-05, 5.7996e-05, 3.1101e-05,\n 3.6252e-05, 2.1665e-05, 9.5999e-05, 1.2943e-04, 4.6959e-05, 1.4945e-05,\n 4.3585e-05, 8.5697e-05, 6.8489e-05, 3.6036e-05, 1.7792e-05, 9.1431e-05,\n 8.5387e-05, 1.4333e-04, 5.6405e-05, 7.0258e-05, 3.6230e-05, 4.2322e-05,\n 5.9743e-05, 9.8703e-05, 2.8072e-05, 2.3357e-07, 8.3855e-05, 1.1213e-04,\n 2.0748e-05, 4.1405e-08, 5.7863e-05, 8.0638e-05, 2.3171e-05, 7.0682e-05,\n 1.2494e-04, 3.5240e-05, 4.0836e-05, 4.4009e-05, 1.0840e-05, 6.2528e-05,\n 7.3188e-05, 5.3101e-05, 2.8500e-05, 4.8022e-05, 1.3270e-04, 7.1345e-05,\n 9.8039e-05, 4.9880e-05, 7.4288e-05, 7.5197e-05, 1.2188e-04, 6.2601e-09,\n 4.7078e-05, 4.5902e-05, 8.0037e-05, 4.5587e-05, 6.4892e-05, 5.7550e-05,\n 3.9802e-05, 8.4359e-05, 4.4066e-05, 6.1808e-05, 1.3663e-05, 1.2267e-04,\n 1.5534e-04, 2.7016e-05, 2.4059e-05, 5.5049e-05, 5.8961e-05, 5.2122e-05,\n 4.9386e-05, 6.1050e-05, 5.1711e-05, 7.4730e-05, 6.7354e-05, 3.2563e-05,\n 1.8646e-05, 6.6369e-05, 8.4351e-06, 2.6946e-05, 6.5055e-05, 2.6992e-05,\n 7.2243e-05, 4.6647e-05, 2.3662e-05, 2.3980e-05, 4.0668e-05, 3.4866e-05,\n 9.6333e-05, 8.3457e-05, 6.5208e-05, 7.9298e-05, 3.9412e-05, 3.7890e-05,\n 1.7838e-05, 6.4214e-05, 5.1830e-05, 2.0711e-05, 8.2491e-05, 3.8297e-05,\n 7.6388e-05, 6.5668e-05, 5.8247e-05, 2.6719e-05, 3.0070e-05, 4.6429e-05,\n 2.0942e-05, 5.2430e-05, 3.9129e-05, 3.4937e-05, 2.9151e-05, 8.2376e-05,\n 1.2240e-04, 2.2993e-05, 5.8033e-06, 2.3480e-05, 7.5797e-05, 5.0620e-05,\n 3.0101e-05, 1.5111e-05, 5.9141e-05, 1.1922e-04, 4.3985e-05, 7.0778e-05,\n 2.1452e-05, 2.9974e-05, 5.9602e-05, 7.0787e-05, 1.0572e-05, 5.9085e-05,\n 4.8838e-05, 3.3771e-05, 1.4795e-03, 2.8343e-05, 6.7596e-05, 2.5200e-05,\n 1.1581e-04, 7.6340e-05, 1.8768e-05, 7.8852e-05, 4.3319e-05, 4.3013e-05,\n 6.0303e-05, 1.0362e-04, 3.6780e-05, 6.6169e-05, 4.8046e-05, 8.7605e-05,\n 8.5679e-05, 2.4497e-05, 3.8737e-05, 5.7655e-05, 1.1327e-04, 2.0221e-05,\n 2.5678e-05, 3.9036e-05, 5.9405e-05, 7.4863e-05, 1.3981e-04, 4.9663e-05,\n 6.7746e-05, 9.4101e-05, 2.5562e-05, 5.0306e-05, 5.7220e-05, 5.8151e-05,\n 9.1796e-05, 2.5666e-05, 8.7783e-05, 1.1497e-07, 6.5675e-05, 5.7744e-05,\n 3.5179e-05, 6.6260e-05, 1.0486e-04, 2.4144e-05, 2.4743e-05, 8.4151e-05,\n 4.0905e-05, 7.6226e-06, 4.5574e-05, 1.8996e-05, 3.8531e-05, 4.9739e-05,\n 1.6613e-05, 4.3763e-05, 8.9930e-05, 3.9020e-05, 2.0102e-05, 3.1018e-05,\n 4.9381e-05, 2.1246e-05, 1.3685e-05, 3.9440e-05, 5.4019e-05, 4.4976e-05,\n 2.5335e-05, 3.5664e-05, 6.5777e-05, 1.5524e-05, 3.3335e-05, 7.4262e-05,\n 6.0234e-05, 3.4098e-05, 7.6233e-05, 7.9724e-05, 4.2272e-05, 3.9750e-05,\n 6.4428e-05, 4.7021e-05, 2.2019e-05, 9.3759e-05, 1.0903e-04, 6.2591e-05,\n 4.2635e-05, 7.1636e-05, 3.0634e-05, 9.3350e-05, 9.6588e-05, 3.3782e-05,\n 2.4793e-05, 1.7948e-05, 5.6969e-05, 3.0138e-05, 1.7028e-04, 2.0664e-05,\n 5.7346e-05, 9.7025e-05, 7.4213e-05, 4.9465e-05, 4.2274e-05, 2.0708e-05,\n 9.8778e-05, 4.6467e-05, 1.0464e-04, 8.2468e-05, 1.1371e-04, 1.9350e-05,\n 3.8852e-05, 6.6679e-05, 2.2131e-05, 4.3382e-05, 8.4914e-05, 5.0501e-05,\n 5.8088e-05, 6.3481e-05, 7.3401e-05, 3.2443e-05, 2.5234e-05, 1.5137e-05,\n 1.3448e-05, 5.4638e-05, 2.2688e-05, 7.5122e-05, 1.4985e-05, 4.4604e-05,\n 3.6136e-05, 2.1323e-05, 5.6948e-05, 1.3312e-04, 5.7456e-05, 1.9938e-05,\n 6.3773e-05, 5.8517e-05, 4.4364e-05, 7.9548e-05, 3.1828e-05, 2.7609e-05,\n 1.3751e-05, 8.3910e-05, 7.5579e-05, 1.6606e-04, 2.7323e-05, 9.2713e-05,\n 1.8283e-05, 4.1909e-05, 5.8471e-05, 9.0405e-05, 5.5889e-05, 3.7126e-05,\n 6.3142e-05, 1.3276e-04, 1.3477e-04, 3.4052e-05, 3.9605e-05, 3.9283e-05,\n 5.4168e-05, 1.5002e-05, 9.8671e-05, 8.7662e-05, 3.8084e-05, 8.2566e-05,\n 4.6938e-05, 3.6747e-05, 3.4818e-05, 5.3940e-05, 9.5945e-05, 5.8190e-05,\n 9.2539e-05, 6.1749e-05, 7.7528e-05, 9.4096e-05, 9.1491e-05, 1.8077e-05,\n 7.2332e-05, 6.9511e-05, 8.0511e-05, 6.8578e-05, 9.9081e-05, 1.0740e-04,\n 1.4787e-05, 4.2240e-05, 6.8638e-05, 7.0029e-05, 5.8209e-05, 7.0207e-05,\n 1.2905e-04, 5.1791e-05, 1.3230e-04, 3.1328e-05, 3.7703e-05, 8.7016e-05,\n 2.7134e-05, 5.1551e-05, 3.4037e-05, 9.6508e-05, 5.0870e-05, 1.5455e-05,\n 1.1583e-05, 3.4642e-05, 4.9443e-05, 1.3413e-04, 8.9876e-05, 7.4853e-05,\n 3.3722e-05, 5.2104e-05, 3.0012e-05, 6.1226e-05, 4.5965e-05, 2.0755e-05,\n 6.0192e-05, 1.6045e-04, 4.2866e-05, 7.2082e-05, 4.9185e-05, 2.7262e-05,\n 2.3920e-05, 4.6540e-05, 9.5091e-05, 4.8232e-05, 5.8458e-05, 9.3518e-06,\n 2.1562e-05, 4.6629e-05, 2.5563e-05, 3.2983e-05, 4.1256e-05, 5.3837e-05,\n 1.9471e-05, 2.6884e-05, 2.1591e-05, 4.7164e-05, 1.1849e-04, 1.2639e-04,\n 5.9991e-05, 6.1959e-05, 5.2603e-05, 8.3766e-05, 1.7410e-04, 1.1270e-04,\n 4.3404e-05, 4.2885e-05, 7.8638e-05, 3.2916e-05, 9.5563e-05, 3.0930e-05,\n 7.1301e-06, 6.1772e-05, 7.1582e-05, 6.4861e-05, 1.5175e-05, 4.7819e-05,\n 8.0532e-05, 4.7885e-05, 4.6217e-05, 5.2978e-05, 1.5163e-05, 5.7309e-05,\n 5.0127e-05, 5.6288e-05, 7.5206e-05, 3.0704e-05, 5.6026e-05, 7.0860e-05,\n 9.4935e-08, 3.8167e-05, 1.7378e-05, 1.1109e-04, 8.9561e-05, 1.0129e-04,\n 5.2488e-05, 1.9742e-04, 4.7105e-05, 4.1449e-05, 8.4381e-05, 8.5691e-05,\n 5.8117e-05, 7.7153e-05, 8.3289e-05, 9.4483e-05, 6.0720e-05, 4.3580e-05,\n 4.8150e-05, 7.2913e-05, 3.3791e-05, 3.9347e-05, 7.4294e-05, 7.9486e-05,\n 3.8494e-05, 5.4305e-05, 3.4612e-05, 2.2176e-05, 3.2542e-08, 4.3827e-05,\n 4.6214e-05, 2.2613e-05, 8.6411e-05, 7.4949e-05, 1.0844e-04, 4.8134e-05,\n 2.5165e-05, 5.1375e-05, 4.4508e-05, 6.4718e-05, 2.3473e-05, 6.8435e-05,\n 3.0449e-05, 1.6608e-04, 4.7180e-05, 4.0033e-05, 4.8861e-05, 7.8811e-05,\n 1.1130e-04, 5.9936e-05, 3.7847e-05, 1.2143e-04, 1.0472e-04, 3.4603e-05,\n 6.8928e-05, 3.9774e-05, 5.2932e-05, 8.9824e-05, 7.6342e-05, 3.7765e-05,\n 5.9049e-05, 4.7249e-05, 4.2584e-05, 6.6110e-05, 3.2549e-05, 4.6470e-05,\n 1.6718e-05, 9.8433e-05, 3.9022e-05, 4.3184e-05, 2.5180e-05, 1.0893e-04,\n 6.1968e-05, 1.0441e-04, 6.4247e-05, 6.1753e-05, 2.6640e-05, 4.1650e-05,\n 4.2180e-05, 5.0685e-05, 6.3948e-05, 5.8474e-05, 1.8964e-04, 8.8770e-05,\n 4.5382e-05, 6.4377e-05, 9.1724e-05, 2.1386e-05, 1.3245e-04, 7.1046e-05,\n 9.4482e-05, 3.4258e-05, 1.6250e-04, 2.5424e-05, 9.6437e-05, 2.9211e-05,\n 5.8509e-05, 1.1030e-05, 8.0235e-05, 7.5738e-05, 3.1076e-05, 3.6277e-05,\n 3.8421e-05, 9.6752e-05, 8.3197e-05, 9.7976e-05, 1.7442e-04, 1.2302e-04,\n 8.3615e-05, 7.3013e-05, 4.0972e-05, 8.6529e-05, 4.4829e-05, 1.0420e-04,\n 1.5958e-05, 3.2191e-05, 4.7364e-05, 4.5041e-05, 2.6920e-05, 1.0833e-04,\n 3.3251e-05, 7.6877e-05, 9.5259e-05, 4.3558e-05, 4.5480e-05, 1.0422e-04,\n 9.5622e-05, 7.3834e-05, 2.5551e-05, 8.4178e-05, 8.3270e-05, 1.3172e-04,\n 1.5012e-05, 2.9435e-05, 5.2833e-05, 7.8708e-05, 2.8242e-05, 1.4015e-05,\n 3.4560e-05, 2.4896e-05, 7.3177e-05, 4.1156e-05, 4.3540e-05, 5.5396e-05,\n 5.9097e-05, 2.8384e-05, 5.0072e-05, 3.1305e-05, 2.2703e-05, 1.3928e-05,\n 4.4112e-05, 6.3030e-05, 5.6137e-05, 7.4179e-05, 2.9836e-05, 6.9072e-05,\n 9.2407e-05, 1.2298e-04, 3.2462e-05, 2.0662e-05, 2.2276e-05, 3.6505e-05,\n 6.7992e-05, 8.7684e-05, 7.4997e-05, 7.6316e-05, 2.8529e-05, 4.6959e-05,\n 3.7555e-05, 8.9426e-05, 4.5815e-05, 2.5308e-05, 2.4237e-05, 2.2442e-05,\n 3.2402e-05, 1.5918e-04, 4.5283e-05, 6.3682e-05, 7.3752e-05, 4.3061e-05,\n 5.2020e-05, 1.4130e-04, 6.8139e-05, 4.7804e-05, 2.5954e-05, 2.1221e-05,\n 2.9560e-05, 3.9971e-05, 2.7097e-05, 6.2107e-05, 1.0919e-04, 7.1817e-05,\n 7.7619e-05, 4.3607e-05, 1.0661e-04, 5.4150e-05, 2.6929e-05, 3.0819e-05,\n 6.0811e-05, 5.6102e-05, 6.6251e-05, 6.3621e-05, 4.6197e-05, 3.9022e-05,\n 6.5501e-05, 3.2605e-05, 7.9737e-05, 1.5649e-05], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(3754.)",
17
+ "exp_avg": "tensor([[-5.7145e-06, 1.1063e-05, 2.0852e-05, ..., -3.4128e-05,\n -1.9831e-05, 8.1359e-06],\n [-2.8538e-05, 3.3111e-05, 1.1532e-05, ..., 2.4668e-06,\n -6.9036e-05, 2.2730e-05],\n [ 1.2580e-05, -3.5312e-05, 2.8274e-05, ..., -1.0950e-05,\n -7.7000e-06, 1.3544e-06],\n ...,\n [ 1.4112e-05, 5.2714e-05, -3.8939e-06, ..., -2.0392e-05,\n 4.5299e-05, 4.7248e-06],\n [-4.3884e-05, -1.8464e-05, 2.4305e-05, ..., -2.0412e-05,\n -1.4639e-06, -2.3918e-05],\n [-3.3159e-05, -2.7091e-05, -4.7088e-05, ..., 4.6097e-05,\n 1.4003e-05, -1.4675e-06]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[7.9731e-09, 1.5778e-08, 8.4568e-09, ..., 8.5942e-09, 1.9911e-08,\n 8.4072e-09],\n [2.4406e-08, 3.0611e-08, 2.3650e-08, ..., 2.4743e-08, 2.6419e-08,\n 2.0760e-08],\n [2.9345e-08, 2.1444e-08, 2.9055e-08, ..., 1.7540e-08, 2.5749e-08,\n 1.3699e-08],\n ...,\n [1.3579e-08, 3.5188e-08, 6.4344e-08, ..., 1.5934e-08, 3.1071e-08,\n 1.6421e-08],\n [2.4944e-08, 3.0704e-08, 4.8105e-08, ..., 1.7044e-08, 2.0681e-08,\n 2.2106e-08],\n [1.7889e-08, 2.6658e-08, 2.3520e-08, ..., 2.2739e-08, 2.7569e-08,\n 8.4465e-09]], device='cuda:0')"
19
+ }
20
+ },
21
+ "param_groups": [
22
+ {
23
+ "lr": 0.00975530705321762,
24
+ "name": "scale_256",
25
+ "betas": [
26
+ 0.9,
27
+ 0.999
28
+ ],
29
+ "eps": 1e-08,
30
+ "weight_decay": 1e-05,
31
+ "amsgrad": false,
32
+ "maximize": false,
33
+ "foreach": null,
34
+ "capturable": false,
35
+ "differentiable": false,
36
+ "fused": null,
37
+ "decoupled_weight_decay": true,
38
+ "initial_lr": 0.01,
39
+ "params": [
40
+ 0,
41
+ 1,
42
+ 2
43
+ ]
44
+ },
45
+ {
46
+ "lr": 0.00975530705321762,
47
+ "name": "scale_512",
48
+ "betas": [
49
+ 0.9,
50
+ 0.999
51
+ ],
52
+ "eps": 1e-08,
53
+ "weight_decay": 1e-05,
54
+ "amsgrad": false,
55
+ "maximize": false,
56
+ "foreach": null,
57
+ "capturable": false,
58
+ "differentiable": false,
59
+ "fused": null,
60
+ "decoupled_weight_decay": true,
61
+ "initial_lr": 0.01,
62
+ "params": [
63
+ 3,
64
+ 4,
65
+ 5
66
+ ]
67
+ },
68
+ {
69
+ "lr": 0.00975530705321762,
70
+ "name": "scale_768",
71
+ "betas": [
72
+ 0.9,
73
+ 0.999
74
+ ],
75
+ "eps": 1e-08,
76
+ "weight_decay": 1e-05,
77
+ "amsgrad": false,
78
+ "maximize": false,
79
+ "foreach": null,
80
+ "capturable": false,
81
+ "differentiable": false,
82
+ "fused": null,
83
+ "decoupled_weight_decay": true,
84
+ "initial_lr": 0.01,
85
+ "params": [
86
+ 6,
87
+ 7,
88
+ 8
89
+ ]
90
+ },
91
+ {
92
+ "lr": 0.00975530705321762,
93
+ "name": "scale_1024",
94
+ "betas": [
95
+ 0.9,
96
+ 0.999
97
+ ],
98
+ "eps": 1e-08,
99
+ "weight_decay": 1e-05,
100
+ "amsgrad": false,
101
+ "maximize": false,
102
+ "foreach": null,
103
+ "capturable": false,
104
+ "differentiable": false,
105
+ "fused": null,
106
+ "decoupled_weight_decay": true,
107
+ "initial_lr": 0.01,
108
+ "params": [
109
+ 9,
110
+ 10,
111
+ 11
112
+ ]
113
+ },
114
+ {
115
+ "lr": 0.00975530705321762,
116
+ "name": "scale_1280",
117
+ "betas": [
118
+ 0.9,
119
+ 0.999
120
+ ],
121
+ "eps": 1e-08,
122
+ "weight_decay": 1e-05,
123
+ "amsgrad": false,
124
+ "maximize": false,
125
+ "foreach": null,
126
+ "capturable": false,
127
+ "differentiable": false,
128
+ "fused": null,
129
+ "decoupled_weight_decay": true,
130
+ "initial_lr": 0.01,
131
+ "params": [
132
+ 12,
133
+ 13,
134
+ 14
135
+ ]
136
+ },
137
+ {
138
+ "lr": 0.004877665762479736,
139
+ "name": "fusion",
140
+ "betas": [
141
+ 0.9,
142
+ 0.999
143
+ ],
144
+ "eps": 1e-08,
145
+ "weight_decay": 1e-05,
146
+ "amsgrad": false,
147
+ "maximize": false,
148
+ "foreach": null,
149
+ "capturable": false,
150
+ "differentiable": false,
151
+ "fused": null,
152
+ "decoupled_weight_decay": true,
153
+ "initial_lr": 0.005,
154
+ "params": [
155
+ 15,
156
+ 16,
157
+ 17,
158
+ 18,
159
+ 19,
160
+ 20,
161
+ 21,
162
+ 22,
163
+ 23,
164
+ 24,
165
+ 25,
166
+ 26,
167
+ 27,
168
+ 28,
169
+ 29,
170
+ 30,
171
+ 31,
172
+ 32,
173
+ 33,
174
+ 34,
175
+ 35,
176
+ 36,
177
+ 37,
178
+ 38,
179
+ 39,
180
+ 40,
181
+ 41,
182
+ 42,
183
+ 43,
184
+ 44,
185
+ 45,
186
+ 46,
187
+ 47,
188
+ 48
189
+ ]
190
+ }
191
+ ]
192
+ },
193
+ "scheduler_state_dict": {
194
+ "T_0": 10,
195
+ "T_i": 10,
196
+ "T_mult": 2,
197
+ "eta_min": 1e-06,
198
+ "T_cur": 1,
199
+ "base_lrs": [
200
+ 0.01,
201
+ 0.01,
202
+ 0.01,
203
+ 0.01,
204
+ 0.01,
205
+ 0.005
206
+ ],
207
+ "last_epoch": 1,
208
+ "_step_count": 0,
209
+ "_is_initial": false,
210
+ "_get_lr_called_within_step": false,
211
+ "_last_lr": [
212
+ 0.00975530705321762,
213
+ 0.00975530705321762,
214
+ 0.00975530705321762,
215
+ 0.00975530705321762,
216
+ 0.00975530705321762,
217
+ 0.004877665762479736
218
+ ]
219
+ },
220
+ "metrics": {
221
+ "best_val_acc": 62.524,
222
+ "best_epoch": 0,
223
+ "scale_accuracies": {
224
+ "256": 62.524
225
+ },
226
+ "training_history": {
227
+ "epochs": [
228
+ 1
229
+ ],
230
+ "train_loss": [
231
+ 2.9751985085156605
232
+ ],
233
+ "train_acc": [
234
+ 56.42811072509152
235
+ ],
236
+ "val_acc": [
237
+ 62.524
238
+ ],
239
+ "scale_accs": {
240
+ "256": [
241
+ 62.524
242
+ ]
243
+ },
244
+ "lr": [
245
+ 0.00975530705321762
246
+ ]
247
+ }
248
+ },
249
+ "train_config": {
250
+ "name": "david_training",
251
+ "run_id": "20251012_221046",
252
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
253
+ "model_variant": [
254
+ "clip_vit_b16",
255
+ "clip_vit_laion_b32",
256
+ "clip_vit_b32"
257
+ ],
258
+ "num_classes": 1000,
259
+ "preset": "high_accuracy",
260
+ "custom_config_path": null,
261
+ "num_classes_override": null,
262
+ "use_belly_override": null,
263
+ "belly_expand_override": null,
264
+ "progressive_training_override": true,
265
+ "scale_warmup_epochs_override": {
266
+ "256": 0,
267
+ "512": 1,
268
+ "768": 2,
269
+ "1024": 3,
270
+ "1280": 4
271
+ },
272
+ "num_epochs": 10,
273
+ "batch_size": 1024,
274
+ "learning_rate": 0.01,
275
+ "weight_decay": 1e-05,
276
+ "warmup_epochs": 3,
277
+ "use_rose_loss": true,
278
+ "rose_initial_weight": 0.2,
279
+ "rose_max_weight": 0.8,
280
+ "rose_weight_schedule": "adaptive",
281
+ "use_cayley_loss": false,
282
+ "cayley_weight": 0.01,
283
+ "scale_loss_balance": null,
284
+ "use_mixed_precision": false,
285
+ "gradient_clip": 10.0,
286
+ "scheduler_type": "cosine_restarts",
287
+ "min_lr": 1e-06,
288
+ "freeze_strategy": "never",
289
+ "freeze_threshold": 90.0,
290
+ "unfreeze_on_plateau": true,
291
+ "patience": 10,
292
+ "track_gradients": true,
293
+ "gradient_scale_threshold": 1e-05,
294
+ "gradient_scale_multiplier": 10.0,
295
+ "log_interval": 50,
296
+ "val_interval": 1,
297
+ "save_interval": 5,
298
+ "log_fusion_weights": true,
299
+ "log_loss_components": true,
300
+ "save_format": "safetensors",
301
+ "hf_repo": "AbstractPhil/david-shared-space",
302
+ "upload_to_hub": true,
303
+ "base_dir": "./david_training",
304
+ "num_workers": 10,
305
+ "pin_memory": true,
306
+ "prefetch_factor": 4,
307
+ "persistent_workers": true
308
+ }
309
+ }