AbstractPhil commited on
Commit
19316e9
·
verified ·
1 Parent(s): 55f54f5

Update best_model_acc71.40_metadata.json - Run 20251012_194945

Browse files
weights/David-partial_shared-hierarchical_tree/20251012_194945/best_model_acc71.40_metadata.json ADDED
@@ -0,0 +1,339 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(5006.)",
7
+ "exp_avg": "tensor([[-1.1102e-05, -2.8265e-05, 1.8736e-05, ..., 1.3455e-05,\n 1.4656e-05, -6.3916e-06],\n [-4.7670e-05, 7.8663e-05, 7.9609e-05, ..., -1.6858e-05,\n 1.9270e-05, 2.8278e-06],\n [-9.1060e-06, 1.6025e-05, -1.0247e-05, ..., 4.2748e-05,\n 2.1269e-05, 1.8774e-05],\n ...,\n [-1.0568e-04, 3.6977e-05, -1.3958e-05, ..., -2.1965e-05,\n -1.0222e-05, 2.1823e-06],\n [ 5.5064e-05, -1.0379e-04, 5.1612e-05, ..., 1.3015e-05,\n 1.1095e-05, -3.9431e-05],\n [ 9.9883e-05, 3.3452e-05, -2.5955e-05, ..., 1.1633e-05,\n -2.2970e-05, -7.4984e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.5540e-08, 1.0690e-07, 4.4545e-08, ..., 1.1972e-08, 7.2417e-09,\n 1.2149e-08],\n [3.4215e-08, 7.5094e-08, 3.7728e-08, ..., 1.6000e-08, 1.4266e-08,\n 1.3483e-08],\n [1.3132e-08, 1.1349e-07, 3.4704e-08, ..., 9.3921e-09, 6.2301e-09,\n 7.0934e-09],\n ...,\n [6.8438e-08, 7.5882e-08, 1.0577e-08, ..., 9.1364e-09, 1.6166e-08,\n 9.8856e-09],\n [3.1686e-08, 8.5556e-08, 3.9887e-08, ..., 1.1797e-08, 1.1072e-08,\n 1.0028e-08],\n [9.4575e-08, 7.8050e-08, 1.5122e-08, ..., 1.7934e-08, 2.4583e-08,\n 1.4512e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(5006.)",
12
+ "exp_avg": "tensor([-9.3524e-05, -2.6483e-04, -2.1244e-04, -1.0133e-03, 1.0737e-03,\n -1.5093e-04, 1.5592e-41, -1.4023e-04, -6.7457e-04, 1.6105e-04,\n -1.1014e-03, 1.0991e-03, 1.4101e-04, 8.5025e-04, -8.7688e-04,\n -1.5157e-03, 1.1988e-03, 1.1991e-03, 2.4170e-04, -6.8603e-05,\n 6.9218e-04, 1.2967e-03, -1.2505e-03, 9.1271e-04, 3.1903e-04,\n 2.7904e-04, 1.0776e-03, -4.7376e-04, -7.7515e-04, 1.0674e-04,\n -1.4992e-03, 1.2823e-04, 4.5403e-04, -1.2191e-03, 4.2839e-04,\n 1.6730e-04, 5.4630e-04, 4.8297e-05, -9.2445e-04, -9.6718e-04,\n 4.1407e-06, -3.2857e-04, 1.2298e-03, 2.0634e-03, 1.6388e-04,\n 2.2568e-04, -1.8765e-03, 1.1906e-04, 7.1247e-04, -1.4133e-04,\n -1.3901e-03, 1.2738e-03, 1.1137e-03, -4.3189e-04, 4.7465e-04,\n 4.5918e-05, 1.1704e-03, -2.7464e-04, -2.9545e-05, 1.5069e-03,\n -7.1889e-04, 3.3825e-04, 5.6839e-04, -2.1567e-03, 3.2270e-04,\n 4.2724e-04, 6.8517e-04, 1.0898e-04, 1.7854e-03, -1.0207e-03,\n -3.3881e-05, -7.0363e-04, 5.4566e-04, -9.1466e-04, -1.4692e-03,\n 2.3179e-04, -4.0871e-04, 1.5503e-03, -9.3588e-04, 1.0607e-03,\n 4.7690e-04, -4.8967e-04, -9.7092e-05, 1.1148e-03, 2.8300e-03,\n 5.0687e-04, 8.2986e-04, -1.4374e-05, -1.1525e-03, 9.6437e-04,\n 2.4263e-03, 2.2900e-03, 5.7129e-04, 2.9572e-03, 1.9315e-03,\n -4.9835e-04, 4.3576e-04, -1.2021e-03, -2.9494e-04, 7.4510e-04,\n -1.4596e-03, 5.7152e-04, 1.3333e-03, 7.3233e-04, 8.0770e-04,\n 2.2750e-04, 5.3537e-04, 7.6498e-08, -6.8664e-04, 4.4054e-08,\n 1.6692e-03, -4.7351e-04, -3.1561e-03, 4.5608e-04, -4.9548e-04,\n -6.5677e-04, 1.2063e-03, -6.6815e-04, -1.2202e-03, -1.9695e-04,\n -6.8018e-04, -3.4610e-06, 3.3352e-04, 1.2191e-03, -9.9071e-05,\n 1.7011e-04, 8.0534e-04, 2.8993e-04, 5.7242e-04, -1.3677e-03,\n -1.4883e-03, -8.9086e-04, 1.6929e-04, -5.9474e-04, 2.2288e-03,\n 2.6574e-04, -7.0283e-05, 5.8385e-04, -4.9332e-04, -1.4012e-03,\n 8.5539e-04, 1.0820e-03, -1.9567e-03, 3.1956e-03, -6.2668e-05,\n -2.8605e-03, -1.3539e-04, -7.3320e-04, -9.0857e-04, -1.1768e-03,\n -4.1844e-04, -6.8175e-04, 1.3981e-03, -9.4421e-04, 4.4282e-04,\n 5.5268e-04, 1.6512e-03, -3.5402e-04, 5.6761e-04, -4.8969e-05,\n 2.3212e-04, 3.1427e-04, 3.6959e-03, -7.1391e-04, 7.5045e-04,\n 1.1198e-03, -1.0150e-03, -6.4672e-06, 2.0132e-03, 1.0064e-04,\n -5.5783e-04, -1.9791e-04, -2.9053e-04, 3.7409e-04, 2.8362e-04,\n 1.2628e-04, -4.8566e-04, -9.5112e-04, -1.2906e-03, 5.5137e-04,\n -1.3240e-03, -1.1369e-03, 1.2190e-03, -6.5407e-04, 1.1998e-03,\n 9.1056e-04, 1.0337e-03, 9.2695e-04, 3.1581e-05, -1.0108e-03,\n 1.7594e-04, 1.4502e-03, 1.1499e-03, 1.0647e-03, -7.1037e-04,\n -2.1165e-03, -1.1385e-04, 9.9460e-04, -4.1340e-04, 5.0121e-04,\n -1.2022e-03, -4.6241e-04, -3.0070e-04, 5.4502e-04, 1.3211e-03,\n 8.6204e-04, 1.9384e-03, 1.3470e-04, -3.9757e-04, -1.8404e-04,\n 1.7827e-03, 4.8036e-04, -9.7442e-05, 2.9662e-03, 3.0802e-04,\n -6.4396e-04, -1.1620e-03, 2.1621e-03, 2.4947e-03, -4.4618e-04,\n -2.9082e-04, 7.3692e-05, 1.4302e-04, 3.2486e-03, 1.2034e-04,\n 6.1575e-04, 7.2309e-04, -6.9734e-04, -2.0413e-04, -1.2615e-04,\n -2.1571e-04, 1.3975e-35, -4.6614e-04, -3.9080e-04, 1.1222e-03,\n -9.1296e-04, 1.3953e-04, -5.5466e-04, -1.2303e-03, -5.1265e-04,\n 4.2969e-04, 1.2047e-04, 2.5238e-04, 3.8175e-04, 1.0690e-04,\n -1.7357e-03, 1.1170e-03, -3.2641e-05, -1.3580e-04, -1.7312e-05,\n 1.9707e-03, 8.4386e-04, -4.9565e-05, -5.1754e-04, -1.3603e-03,\n -4.3827e-05, 1.2523e-04, -2.7215e-04, 4.2006e-05, -3.4127e-05,\n 7.7192e-04, 3.1890e-04, -2.7214e-03, 1.2670e-03, 2.0340e-04,\n -1.2634e-03, -3.2981e-04, 6.4828e-04, -1.4563e-04, -1.1572e-03,\n -3.5993e-04, 1.6197e-03, -1.6184e-03, 6.5343e-04, -1.4284e-03,\n 5.0537e-04, 8.6351e-04, -4.5828e-04, 3.2333e-04, -9.4557e-05,\n -1.1097e-03, -4.4240e-04, 1.9383e-04, 1.3318e-03, -2.4555e-04,\n -7.0974e-05, 1.5471e-04, -3.4326e-04, 2.2553e-04, 5.6052e-45,\n -1.1286e-03, -1.1769e-03, 8.3278e-04, 7.9517e-15, -1.8602e-03,\n 1.1592e-03, 3.7793e-04, 7.0635e-04, -2.4250e-04, -6.1304e-04,\n -4.0441e-04, -1.6141e-04, -1.1990e-03, -5.6851e-04, -2.8730e-04,\n 3.4397e-03, 2.9832e-04, 3.3728e-05, 1.4627e-03, 2.0340e-03,\n 1.9875e-04, -2.4680e-03, -3.1710e-04, 9.9026e-04, 1.1578e-03,\n 5.5384e-04, -2.9804e-04, -8.7622e-04, -1.0593e-03, 3.5163e-03,\n 2.7079e-04, -1.7558e-03, 9.9492e-04, -4.1512e-04, -5.9920e-04,\n -5.5398e-04, -1.3962e-03, -1.4051e-03, 1.1045e-05, 2.0840e-04,\n -9.3341e-04, -2.5469e-04, -2.4514e-04, 1.6494e-03, -1.4498e-03,\n -2.2662e-03, 1.2080e-03, 3.4170e-04, 5.0330e-04, -1.1068e-03,\n -7.3262e-04, -5.5628e-04, 7.9664e-04, 1.0669e-03, 1.9202e-03,\n -9.5093e-05, 9.6795e-04, 9.9232e-04, 6.0781e-05, 8.3829e-04,\n -6.1608e-04, -1.4615e-03, 2.9712e-04, -2.0834e-03, -7.9299e-04,\n -6.8853e-04, 1.2447e-03, -8.3668e-04, 1.6432e-03, -2.5935e-04,\n 7.0321e-05, 5.3620e-04, 1.7841e-06, 1.7136e-03, -9.9736e-04,\n -2.3537e-03, -6.4037e-04, 6.0884e-04, 1.1158e-03, -8.5832e-04,\n 6.9547e-04, -4.7576e-04, 8.7080e-04, 4.8085e-05, -7.8019e-04,\n -7.0610e-04, 1.2104e-03, 8.7728e-04, -4.9241e-04, 6.2915e-04,\n -1.1334e-04, -6.9035e-04, -1.1615e-03, 1.2706e-03, 2.7374e-04,\n 4.8304e-04, -7.1188e-04, 1.5892e-03, -1.1769e-03, 1.8924e-03,\n 5.5354e-04, 2.4457e-03, -8.0773e-04, -7.8396e-05, 6.6923e-04,\n -3.0883e-04, 1.0479e-04, 3.7889e-04, -9.5098e-05, -2.1080e-04,\n 3.9098e-05, 1.4860e-03, 1.4287e-04, 7.9616e-04, 1.1744e-03,\n 1.2949e-03, -1.3613e-03, -6.9435e-04, -1.4339e-04, -1.3784e-03,\n -5.9404e-04, 9.8915e-05, 5.0154e-04, -1.1022e-03, -3.0018e-04,\n -8.2800e-04, -5.8926e-04, 1.2660e-03, 1.0326e-03, -6.0273e-04,\n -1.2980e-03, 1.2370e-04, -4.6096e-06, -1.0685e-03, -9.3465e-04,\n 2.3984e-03, 1.4107e-03, -1.6447e-04, -1.2127e-03, 6.9694e-04,\n -8.8384e-05, 6.9089e-04, 1.2754e-03, 4.7109e-04, 5.3703e-04,\n 2.4302e-03, -1.1328e-03, 3.4651e-04, 1.1090e-03, -7.7728e-04,\n 3.5762e-04, 9.0707e-04, 6.2826e-05, 3.3642e-04, -1.1808e-03,\n -1.6733e-06, -1.1486e-03, 8.7257e-05, 2.0762e-03, -1.8001e-03,\n -5.9867e-04, 6.0385e-04, -1.6382e-03, -3.0561e-03, -2.7935e-04,\n 1.5777e-04, -1.7651e-03, -6.7492e-04, -1.0848e-03, -5.9083e-04,\n 5.5051e-04, -4.7253e-04, -5.4657e-04, -8.4448e-05, 4.7663e-04,\n 1.3784e-03, -1.4494e-03, 6.2173e-04, 1.9006e-04, -3.3795e-04,\n -2.0649e-04, 4.5699e-04, -1.4130e-03, 8.6535e-04, -6.7460e-04,\n 9.0744e-04, 2.1150e-08, -7.7920e-04, 5.6052e-45, -2.0617e-04,\n 2.4040e-04, 1.6306e-03, -2.0740e-03, 1.0308e-03, 6.0395e-04,\n 1.6011e-03, -1.5117e-03, -1.2945e-03, 2.9267e-04, -1.5200e-03,\n 1.5595e-03, 7.1129e-04, 3.5071e-04, 1.5805e-04, -5.0273e-04,\n -8.1936e-04, 3.9015e-04, 2.3356e-03, 2.3479e-04, 2.9584e-03,\n -6.2896e-04, -1.1220e-03, -4.4995e-04, 2.3176e-03, 4.8723e-04,\n -6.6979e-04, -4.7862e-04, -9.1846e-04, -1.4077e-03, 8.8805e-04,\n -1.3252e-03, -3.4766e-04, -1.5267e-03, -3.6444e-10, -5.2575e-04,\n 3.6681e-04, 1.1974e-04, 8.2155e-04, 1.5932e-03, -4.4865e-04,\n -1.9749e-03, -1.3785e-03, -1.6971e-03, 1.7132e-04, 5.6052e-45,\n 1.5017e-05, -4.4825e-04, 3.4240e-04, 2.4665e-06, 1.0678e-03,\n -2.6263e-04, -1.8748e-03, 6.6138e-04, -2.8781e-03, -3.7668e-04,\n 7.9462e-04, 1.1491e-03, 3.2378e-04, -3.9735e-04, 4.0062e-04,\n -4.9844e-04, -2.2753e-05, -3.6486e-04, -1.8395e-04, 2.6429e-03,\n 1.6314e-03, -9.2828e-04, -1.4716e-03, -1.2666e-03, -4.9216e-04,\n -1.1428e-03, -2.6159e-04, 1.1843e-03, -2.6419e-03, -3.0965e-04,\n -2.1326e-05, -8.0841e-04, 1.8770e-03, 1.0900e-03, -1.9023e-04,\n -1.8945e-04, -6.7407e-04, -1.0025e-03, -8.0724e-04, 4.8419e-04,\n 1.1439e-03, 1.7990e-03, 4.2971e-04, -2.6087e-04, -5.8484e-04,\n 6.4674e-04, 2.7973e-04, 3.1806e-04, 4.7927e-04, 5.2978e-04,\n 2.0682e-04, 4.0024e-04, 6.0253e-04, 2.0087e-04, -4.6925e-04,\n -1.7529e-03, 8.8559e-04, -3.4968e-04, -2.8153e-05, -3.8088e-04,\n 5.1644e-05, -1.8571e-03, 2.7964e-04, -3.6047e-04, -1.2661e-03,\n -9.4009e-04, -1.9039e-03, -1.8043e-03, -2.7859e-05, 9.7856e-05,\n -2.9953e-04, 2.9123e-04, -1.1022e-03, -4.2280e-04, 1.2675e-03,\n -3.3538e-04, 6.9308e-04, 3.1160e-04, 5.9763e-04, 1.1270e-03,\n -1.1930e-03, -1.5519e-05, -1.8704e-04, 1.3827e-04, 1.0142e-34,\n -4.8374e-05, -1.0582e-03, -1.1587e-03, -1.1439e-03, 1.1192e-04,\n -1.6054e-03, 1.4309e-03, 7.6318e-04, 1.2332e-03, -4.9970e-04,\n -1.3140e-03, -1.9052e-03, 1.4506e-03, -5.4092e-04, 9.7031e-04,\n -5.0384e-04, -5.5273e-04, 1.9748e-04, -1.4853e-03, 2.1295e-04,\n 2.6515e-04, 4.4450e-04, 1.9087e-03, -4.1520e-04, 2.2447e-03,\n -9.8173e-04, 4.7065e-04, 1.0959e-04, 6.7012e-04, -6.9223e-04,\n 2.2352e-03, -1.2468e-03, -1.3782e-03, 4.0307e-04, -1.0415e-03,\n 7.9789e-04, 4.3228e-04, 6.5449e-04, -1.3047e-03, -1.3158e-03,\n -9.6849e-04, 1.9917e-03, -1.5109e-04, 1.6693e-04, -9.5253e-04,\n -1.1295e-04, 7.7944e-04, -9.2438e-04, -1.3467e-04, -1.8998e-03,\n 4.7793e-04, 8.0715e-04, -1.3535e-04, -5.8384e-17, 1.3438e-03,\n -4.3074e-04, 4.3083e-04, -1.1026e-03, 1.3162e-03, -5.5436e-05,\n 3.1309e-04, -6.5870e-04, 7.5111e-04, -6.6877e-05, -2.5341e-04,\n 8.3558e-04, 5.0146e-04, 3.4608e-04, 1.5283e-04, -3.3549e-04,\n 2.1428e-04, -9.4416e-04, -1.4293e-03, 7.4642e-04, 5.3609e-04,\n 3.0499e-03, -1.9903e-04, -1.0596e-03, 2.0855e-03, 9.8147e-04,\n -9.5699e-04, 1.8998e-04, -1.2876e-03, 1.1373e-03, 3.1583e-04,\n 2.4793e-04, -2.0866e-04, 1.9586e-03, 3.8283e-04, -3.3608e-04,\n 7.9588e-04, -8.1775e-04, 7.9821e-04, 9.9319e-04, 9.9642e-04,\n 5.5505e-04, -1.4112e-03, 3.0018e-04, 2.1346e-03, -2.1958e-04,\n -8.7263e-05, -1.5472e-04, 2.9593e-04, 6.4011e-04, -2.4097e-03,\n -3.5024e-05, -6.8720e-05, -4.5057e-04, 9.8936e-04, 1.0805e-04,\n -1.3959e-04, -1.2739e-03, 7.0088e-04, 7.8589e-04, -2.1943e-04,\n -2.9158e-04, 7.2064e-04, 4.2117e-05, -1.2402e-03, -5.5763e-04,\n -5.0005e-04, -1.3645e-03, -6.4022e-04, 1.7814e-03, 1.3935e-03,\n 9.9486e-05, 8.0507e-04, 4.2268e-04, -4.7554e-04, -8.5764e-04,\n -4.3196e-04, -1.0086e-04, 2.7705e-03, -1.2398e-04, -8.4119e-04,\n 6.5615e-04, -1.4566e-04, -5.4842e-06, 1.0385e-03, -3.1963e-04,\n -2.1410e-04, 1.0374e-03, 1.9612e-03, 4.3915e-04, -1.6074e-04,\n -3.0733e-04, 7.9461e-04, 1.5130e-03, -1.5792e-04, 1.8014e-03,\n -9.3692e-04, -1.6475e-03, 8.8721e-04, 1.3705e-03, 5.3955e-04,\n -7.7483e-04, -3.5056e-04, 6.1213e-05], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([1.1874e-05, 1.7575e-05, 9.9427e-06, 2.1487e-05, 2.2286e-05, 1.0912e-05,\n 4.3787e-09, 4.0702e-06, 2.4209e-05, 1.2851e-05, 3.2095e-05, 1.8971e-05,\n 9.1357e-06, 1.6158e-05, 1.9294e-05, 7.0072e-06, 1.8233e-05, 3.0150e-05,\n 8.9884e-06, 1.7313e-05, 1.6427e-05, 2.4518e-05, 1.6423e-05, 1.4999e-05,\n 1.8147e-05, 2.9541e-05, 3.1209e-05, 1.9342e-05, 2.3942e-05, 1.5662e-05,\n 2.2840e-05, 2.5998e-05, 1.7813e-05, 2.2978e-05, 1.2867e-05, 1.0726e-05,\n 2.0717e-05, 1.3117e-05, 2.2389e-05, 9.9147e-06, 5.9563e-06, 8.3289e-06,\n 1.3960e-05, 1.6238e-05, 4.0761e-06, 2.8501e-05, 2.1945e-05, 8.7604e-06,\n 1.7509e-05, 2.2875e-05, 9.2391e-06, 1.5486e-05, 1.5938e-05, 3.3007e-05,\n 2.5840e-05, 2.1962e-05, 1.4869e-05, 8.0597e-06, 1.1696e-05, 1.9724e-05,\n 2.2725e-05, 1.9472e-05, 1.3699e-05, 2.3473e-05, 1.1774e-05, 1.4602e-05,\n 5.2196e-06, 1.6749e-05, 1.9808e-05, 2.5284e-05, 9.1351e-06, 4.3082e-05,\n 1.8739e-05, 2.0986e-05, 2.3858e-05, 6.4750e-06, 1.0408e-05, 2.4028e-05,\n 1.3639e-05, 6.7025e-06, 2.4197e-06, 1.9558e-05, 1.3511e-05, 1.8706e-05,\n 3.6145e-05, 1.5887e-05, 1.7950e-05, 6.7844e-06, 1.9759e-05, 9.2361e-06,\n 2.3253e-05, 2.0936e-05, 1.6261e-05, 3.2197e-05, 2.4419e-05, 1.2322e-05,\n 6.7583e-06, 2.6068e-05, 6.2825e-06, 1.1546e-05, 2.1161e-05, 6.9492e-06,\n 9.3802e-06, 2.1892e-05, 1.7436e-05, 1.9706e-05, 1.8638e-05, 2.1076e-08,\n 6.0636e-06, 1.1331e-08, 2.9888e-05, 9.5933e-06, 4.1292e-05, 1.4549e-05,\n 1.2296e-05, 1.6373e-05, 1.9280e-05, 1.8099e-05, 2.4833e-05, 1.5105e-05,\n 2.6184e-05, 1.0099e-05, 3.3881e-05, 1.2606e-05, 1.0782e-05, 1.6811e-05,\n 2.0676e-05, 2.7389e-06, 1.0964e-05, 2.7551e-05, 8.0876e-06, 5.1826e-06,\n 2.3211e-05, 3.0179e-05, 1.3051e-05, 1.6604e-05, 9.8253e-06, 2.5336e-05,\n 1.5023e-05, 1.1916e-05, 2.1227e-05, 1.7268e-05, 3.1200e-05, 2.6024e-05,\n 3.9795e-06, 2.3104e-05, 1.2850e-05, 1.7123e-05, 1.6869e-05, 2.0807e-05,\n 3.0358e-05, 2.5428e-05, 1.6987e-05, 2.9526e-05, 2.4965e-05, 2.7295e-05,\n 1.4605e-05, 3.4563e-05, 2.1062e-05, 1.3550e-05, 9.5256e-06, 1.3008e-05,\n 3.0996e-05, 8.1133e-06, 7.6888e-06, 2.3371e-05, 1.8251e-05, 2.3150e-05,\n 2.8604e-05, 8.4538e-06, 2.8296e-05, 2.5490e-05, 2.4604e-05, 1.2381e-05,\n 1.3748e-05, 1.3519e-05, 1.6806e-05, 2.3238e-05, 1.9516e-05, 8.7612e-06,\n 2.4264e-05, 2.2917e-05, 1.2306e-05, 1.5101e-05, 2.2519e-05, 2.1413e-05,\n 1.6562e-05, 1.9602e-05, 1.3649e-05, 1.1528e-05, 8.0020e-06, 1.4142e-05,\n 1.9817e-05, 9.3227e-06, 8.8881e-06, 2.8140e-05, 1.7764e-05, 1.9819e-05,\n 1.3397e-05, 8.8208e-06, 1.9484e-05, 1.6725e-05, 1.6862e-05, 3.4892e-05,\n 1.3616e-05, 2.5356e-05, 2.5626e-05, 1.2610e-05, 5.4982e-06, 9.0222e-06,\n 2.0402e-05, 2.3291e-05, 2.5272e-05, 3.7572e-05, 1.5603e-05, 1.2657e-05,\n 2.0885e-05, 2.0549e-05, 2.4305e-05, 1.1999e-05, 1.0330e-05, 2.1247e-05,\n 2.6133e-05, 4.1221e-05, 6.9837e-06, 1.4677e-05, 1.7511e-05, 1.5549e-05,\n 6.8446e-06, 8.2766e-06, 2.3729e-05, 1.2852e-07, 3.1859e-05, 1.6103e-05,\n 1.0592e-05, 2.3203e-05, 1.1925e-05, 1.7151e-05, 2.5208e-05, 8.4224e-06,\n 1.0261e-05, 9.8818e-06, 1.8143e-05, 1.2198e-05, 9.6197e-06, 1.5742e-05,\n 1.7862e-05, 2.2663e-05, 2.1859e-05, 1.2187e-06, 3.2630e-05, 1.7635e-05,\n 4.0601e-05, 1.3247e-05, 1.7464e-05, 2.4803e-05, 2.3689e-05, 2.2997e-05,\n 1.4797e-05, 4.3459e-06, 6.2467e-06, 8.2389e-06, 2.2528e-05, 3.2221e-05,\n 2.0953e-05, 1.2284e-05, 1.2776e-05, 1.1891e-05, 1.3819e-05, 2.6613e-05,\n 1.5905e-05, 1.6538e-05, 1.7738e-05, 3.6972e-05, 1.4625e-05, 1.6239e-05,\n 1.5473e-05, 1.0549e-05, 7.5876e-06, 7.1862e-06, 2.1748e-05, 1.5505e-05,\n 1.1146e-05, 2.8515e-05, 1.0188e-05, 6.5933e-06, 1.5258e-05, 1.1982e-05,\n 1.5025e-05, 6.4988e-10, 2.0007e-05, 1.4990e-05, 9.3176e-06, 8.8751e-10,\n 3.2605e-05, 2.2074e-05, 1.9330e-05, 1.2219e-05, 9.1855e-06, 7.2646e-06,\n 1.6114e-05, 1.3294e-05, 2.2385e-05, 1.4390e-05, 1.1697e-05, 3.7030e-05,\n 9.9261e-06, 2.4018e-05, 1.4693e-05, 3.2197e-05, 2.9577e-05, 1.6818e-05,\n 7.6699e-06, 1.3096e-05, 2.2360e-05, 1.8392e-06, 7.4989e-06, 1.1232e-05,\n 3.4648e-05, 3.5731e-05, 6.8116e-06, 2.5502e-05, 1.0039e-05, 1.0072e-05,\n 1.2892e-05, 1.9289e-05, 2.5698e-05, 4.2053e-05, 8.3491e-06, 2.6551e-05,\n 1.0497e-05, 1.0826e-05, 7.9840e-06, 2.9429e-05, 2.0521e-05, 1.6837e-05,\n 1.8059e-05, 1.8947e-05, 2.5517e-05, 2.4694e-06, 1.4885e-05, 2.2466e-05,\n 1.6857e-05, 9.3328e-06, 1.4936e-05, 1.7430e-05, 1.7121e-05, 1.6885e-05,\n 1.5235e-05, 1.0666e-05, 1.3089e-05, 2.0231e-05, 1.9294e-05, 2.9469e-05,\n 9.5741e-06, 1.0016e-05, 2.1985e-05, 1.0464e-05, 1.8800e-05, 1.6561e-05,\n 7.6691e-06, 2.3952e-05, 4.5127e-09, 2.1780e-05, 1.5893e-05, 1.4011e-05,\n 2.4637e-05, 2.1274e-05, 1.1475e-05, 2.3385e-05, 2.3985e-05, 1.4468e-05,\n 2.6090e-05, 2.4821e-05, 1.1464e-05, 1.6419e-05, 2.4077e-05, 1.9227e-05,\n 1.3713e-05, 1.5054e-05, 1.0981e-05, 2.2408e-06, 1.9236e-05, 1.5254e-05,\n 3.2508e-06, 1.4408e-05, 4.5553e-06, 6.0716e-06, 2.1863e-05, 2.1516e-05,\n 2.4641e-05, 1.6832e-05, 2.4645e-05, 2.7974e-05, 1.2261e-05, 1.3853e-05,\n 2.0873e-05, 1.9195e-05, 1.5211e-05, 1.8551e-05, 3.5166e-05, 1.1762e-05,\n 5.7018e-06, 2.9213e-05, 1.4847e-05, 2.8578e-05, 3.3220e-05, 6.2299e-06,\n 9.3954e-06, 2.4919e-05, 7.5144e-06, 1.2046e-05, 8.2737e-06, 1.8668e-05,\n 2.5841e-05, 5.4846e-06, 2.4054e-05, 2.9074e-05, 2.1027e-05, 6.0805e-06,\n 2.3654e-05, 2.8768e-05, 4.9239e-08, 1.5683e-05, 9.0213e-06, 1.5884e-05,\n 7.3513e-06, 1.8204e-05, 3.1633e-05, 7.8388e-06, 1.6585e-05, 1.7458e-05,\n 1.3219e-05, 1.8277e-05, 1.0946e-05, 3.6361e-05, 1.9726e-05, 9.3728e-06,\n 1.2185e-05, 1.4195e-05, 2.4588e-05, 2.2978e-05, 7.2627e-06, 1.7323e-05,\n 2.9174e-05, 2.2917e-05, 3.0319e-05, 3.1371e-05, 1.8119e-05, 2.8173e-05,\n 2.2848e-05, 3.0372e-05, 2.4743e-05, 2.0176e-05, 1.6234e-05, 1.0461e-05,\n 2.7461e-05, 8.9972e-06, 2.1583e-05, 7.4996e-06, 1.1280e-05, 1.6986e-05,\n 1.1546e-05, 1.0584e-05, 1.1025e-05, 1.9278e-05, 1.6285e-05, 2.8406e-06,\n 4.4928e-06, 9.3362e-06, 1.0606e-05, 1.3969e-05, 1.1189e-05, 1.2537e-05,\n 1.1150e-05, 2.2025e-05, 2.0166e-09, 9.5606e-06, 7.4709e-11, 9.4819e-06,\n 5.2372e-06, 1.5150e-05, 2.6355e-05, 1.0586e-05, 6.7608e-06, 2.3856e-05,\n 2.0998e-05, 1.3328e-05, 2.0031e-05, 2.3063e-05, 1.7127e-05, 6.0330e-06,\n 7.9519e-06, 2.5325e-05, 3.3775e-06, 1.0970e-05, 3.3781e-07, 3.7038e-05,\n 4.8309e-05, 2.9731e-05, 1.3201e-05, 2.4203e-05, 4.7049e-06, 2.9893e-05,\n 2.4161e-05, 9.1261e-06, 1.7273e-05, 1.8857e-05, 2.9297e-05, 7.7461e-06,\n 1.4151e-05, 1.7877e-05, 2.3207e-05, 4.2886e-09, 2.2379e-05, 1.6183e-05,\n 1.9621e-05, 1.2358e-05, 1.4290e-05, 2.9682e-05, 2.4003e-05, 1.0405e-05,\n 1.7709e-05, 2.8511e-05, 7.2089e-11, 2.2240e-05, 3.9867e-06, 1.4095e-05,\n 2.3114e-05, 2.7100e-05, 1.2523e-05, 2.1592e-05, 5.9664e-06, 1.9056e-05,\n 2.1439e-05, 2.8135e-05, 1.4330e-05, 1.1105e-05, 1.5490e-05, 6.2024e-06,\n 2.5409e-05, 1.1097e-05, 9.6690e-06, 1.4932e-05, 2.6350e-05, 1.5336e-05,\n 1.6269e-05, 3.2577e-05, 1.7235e-05, 2.0026e-05, 2.1763e-05, 2.1639e-05,\n 2.4361e-05, 2.1507e-05, 2.7363e-05, 9.7842e-06, 2.3400e-05, 1.5504e-05,\n 2.3613e-05, 2.1337e-05, 1.0622e-05, 1.4883e-05, 1.6259e-05, 2.1495e-05,\n 3.2031e-05, 1.8716e-05, 2.2747e-05, 1.0966e-05, 1.3202e-05, 3.2273e-05,\n 1.1176e-05, 1.6749e-05, 2.1799e-05, 2.6023e-05, 2.6529e-05, 2.9301e-05,\n 1.3559e-05, 1.1013e-05, 1.5382e-05, 2.5669e-05, 1.1280e-05, 1.2316e-05,\n 2.3692e-05, 3.2214e-05, 6.0962e-06, 1.3739e-05, 3.1214e-05, 2.6604e-05,\n 6.2331e-06, 1.5943e-05, 2.1680e-05, 1.9065e-05, 2.5435e-05, 2.9522e-05,\n 1.6733e-05, 2.1371e-05, 1.7177e-05, 2.9101e-05, 1.1532e-05, 1.4714e-05,\n 8.2486e-06, 2.0646e-05, 2.6334e-05, 1.6523e-05, 2.0416e-05, 1.4772e-05,\n 4.6939e-06, 1.2742e-05, 2.1731e-05, 9.8793e-10, 2.2968e-05, 1.7680e-05,\n 1.7961e-05, 1.3608e-05, 1.1621e-05, 2.1563e-05, 1.5588e-05, 1.1067e-05,\n 1.0225e-05, 6.3691e-06, 2.4517e-05, 1.8124e-05, 1.3821e-05, 1.8601e-05,\n 1.4481e-05, 1.7691e-05, 2.9109e-05, 1.1632e-05, 1.7370e-05, 1.9490e-05,\n 2.6547e-05, 1.7417e-05, 2.0594e-05, 8.8935e-06, 1.9653e-05, 1.7549e-05,\n 1.1998e-05, 3.4976e-05, 1.1731e-05, 4.4733e-05, 2.4645e-05, 1.5566e-05,\n 2.2690e-05, 6.7860e-06, 1.1783e-05, 1.5204e-05, 1.2338e-05, 1.9668e-05,\n 1.9263e-05, 1.9518e-05, 1.4164e-05, 1.6230e-05, 2.3533e-05, 1.3893e-05,\n 1.9684e-05, 9.2814e-06, 1.7110e-05, 1.4549e-05, 7.4949e-06, 1.0393e-05,\n 1.0572e-05, 2.1247e-05, 6.9828e-06, 5.0273e-09, 1.3797e-05, 2.8076e-05,\n 2.1414e-05, 1.3704e-05, 1.4242e-05, 8.0207e-06, 1.8334e-05, 1.5423e-05,\n 1.2659e-05, 8.8168e-06, 1.4138e-05, 2.6424e-05, 1.9833e-05, 8.3057e-06,\n 2.4195e-05, 1.7277e-05, 2.6132e-05, 1.5263e-05, 1.9307e-05, 1.8898e-05,\n 1.8537e-05, 2.9254e-05, 1.4254e-05, 1.6567e-05, 1.3436e-05, 1.0012e-05,\n 2.1050e-05, 1.0207e-05, 2.4325e-05, 1.5581e-05, 1.1037e-05, 1.1280e-05,\n 1.4493e-05, 2.8952e-05, 1.9329e-05, 1.7974e-05, 3.1207e-05, 4.7100e-06,\n 2.3778e-05, 2.1742e-05, 1.2663e-05, 1.2974e-05, 2.4433e-05, 2.1049e-05,\n 2.3968e-05, 1.7096e-05, 2.3433e-06, 2.1859e-05, 2.1632e-05, 2.0333e-05,\n 2.5397e-05, 1.3827e-05, 1.2915e-06, 1.1808e-05, 9.7985e-06, 3.3136e-05,\n 1.1169e-05, 2.5888e-05, 2.4733e-05, 1.8007e-05, 7.4363e-06, 2.0548e-05,\n 8.6092e-06, 1.7209e-05, 1.9614e-05, 4.5280e-06, 9.9468e-06, 2.3980e-05,\n 2.0841e-05, 2.3021e-05, 2.4843e-05, 1.0418e-05, 1.5016e-05, 1.5675e-05,\n 1.3920e-05, 3.4649e-05, 1.3842e-05, 3.7749e-05, 2.6459e-05, 2.1213e-05,\n 1.3256e-05, 1.4680e-05, 2.0424e-05, 1.6231e-05, 2.6082e-05, 5.4940e-06,\n 1.1405e-05, 1.0867e-05, 8.2931e-06, 2.4117e-05, 2.2394e-05, 2.4754e-05,\n 2.3403e-05, 1.5960e-05, 1.1202e-05, 2.3229e-05, 1.7608e-05, 2.6702e-05,\n 2.8554e-05, 1.6292e-05, 1.0495e-05, 1.6017e-05, 1.2584e-05, 2.7411e-05],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(5006.)",
17
+ "exp_avg": "tensor([[ 4.1192e-06, 2.6239e-05, -2.4192e-07, ..., 2.1788e-06,\n 5.4424e-06, 5.4998e-05],\n [ 3.3242e-06, -1.0849e-05, 9.4152e-06, ..., 2.6490e-06,\n 1.2981e-06, 4.0970e-06],\n [-3.0909e-05, 1.4179e-05, 3.3726e-05, ..., 2.5638e-06,\n 6.5632e-07, 1.9418e-06],\n ...,\n [ 3.9036e-05, -9.3464e-06, -7.2339e-06, ..., -6.5664e-05,\n 6.3489e-06, -1.8406e-04],\n [-3.3070e-05, -1.2893e-05, -1.5872e-05, ..., 2.3847e-06,\n -1.3001e-06, 2.2773e-05],\n [-9.3321e-07, -3.1515e-06, 4.2756e-06, ..., 7.1951e-06,\n 5.8932e-06, -5.6239e-05]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[6.8554e-09, 1.3538e-08, 1.8369e-09, ..., 6.9194e-10, 1.5915e-07,\n 1.5037e-08],\n [9.3643e-09, 5.7243e-09, 8.2666e-09, ..., 1.9552e-09, 4.5198e-10,\n 2.5314e-08],\n [2.9498e-08, 1.4530e-08, 6.8042e-09, ..., 1.7992e-09, 4.3271e-10,\n 1.3080e-09],\n ...,\n [2.3993e-08, 7.5581e-10, 2.4168e-09, ..., 7.0278e-09, 5.0233e-10,\n 2.3479e-08],\n [1.8787e-08, 1.0700e-08, 3.3709e-08, ..., 1.2548e-08, 4.0536e-09,\n 1.7791e-08],\n [1.7146e-09, 4.6978e-10, 4.5948e-09, ..., 6.5415e-09, 1.6625e-08,\n 9.2231e-09]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(5006.)",
22
+ "exp_avg": "tensor([ 1.2977e-04, 4.5593e-06, 1.2792e-04, -1.8651e-05, 1.7488e-04,\n 5.6052e-45, -1.9865e-04, -4.9595e-04, 5.6052e-45, -2.9323e-04,\n -2.7281e-04, -1.9030e-04, 2.8972e-04, -3.4295e-04, -2.2397e-04,\n -2.9199e-04, 5.9578e-05, -1.6102e-04, 2.7234e-04, -3.0663e-04,\n 2.1002e-04, -4.1070e-05, 4.3322e-05, -8.3534e-05, 1.1470e-04,\n 7.2038e-05, -2.1653e-05, -3.6391e-04, 1.0984e-04, -6.8948e-05,\n 6.5089e-05, 2.0420e-04, -1.1621e-04, 4.3784e-04, 5.6052e-45,\n 5.6052e-45, 2.6671e-04, 1.2247e-04, -7.1568e-05, -2.1409e-05,\n -1.6883e-04, 3.9966e-05, 2.6843e-05, -1.4241e-04, 5.6052e-45,\n 1.8344e-05, -9.1519e-05, -4.4001e-04, 4.2671e-04, 2.0753e-04,\n -1.8035e-04, -1.4810e-04, 8.5736e-04, -3.8880e-04, -5.6137e-05,\n 8.9865e-05, 2.9478e-04, 4.5244e-22, -2.8025e-04, -2.3378e-04,\n 1.6368e-04, 1.8937e-04, 3.4538e-04, -3.1189e-06, -7.2922e-05,\n 1.8528e-04, 5.6052e-45, 4.6887e-05, -2.8865e-04, 5.6052e-45,\n 3.3681e-05, 4.8403e-04, -2.3824e-04, 2.8092e-05, -3.0983e-04,\n 1.8171e-04, -1.7446e-04, -1.0978e-04, 1.1575e-04, -2.6390e-04,\n -2.1042e-04, -3.7020e-04, -1.3973e-06, -2.0133e-17, 4.8809e-04,\n 9.6185e-05, 1.4245e-04, 1.2672e-04, 4.8182e-05, 4.0072e-04,\n -3.1191e-04, -1.0660e-04, -1.0152e-04, 2.3352e-04, 2.2328e-04,\n 1.8841e-04, 2.8225e-04, -1.8528e-05, -1.2143e-04, 1.6394e-04,\n -1.9529e-04, 6.3931e-05, 8.5216e-05, 5.6052e-45, -4.9964e-04,\n -4.6957e-04, -2.8986e-04, 6.5790e-05, 4.8662e-04, -3.6413e-04,\n -3.0261e-04, -9.9957e-05, 1.7418e-04, -1.3327e-04, 5.6052e-45,\n -9.1896e-05, -1.5365e-04, -4.0071e-04, 7.3556e-05, 4.4600e-04,\n 5.6052e-45, 8.2772e-05, -2.3712e-04, -2.4732e-06, -1.2258e-04,\n -1.8734e-04, 2.9284e-04, 5.6052e-45, -9.6856e-05, 1.1522e-04,\n -1.6518e-04, -1.8478e-05, -5.0603e-05, -1.4551e-04, -3.9853e-04,\n -2.1775e-04, 5.6052e-45, 2.1218e-04, 4.6548e-05, 6.3197e-05,\n -1.2168e-04, -1.9976e-05, -3.1376e-04, -4.6543e-04, 1.0562e-04,\n 5.6052e-45, -5.1083e-04, 8.6268e-25, -6.9860e-05, 1.2261e-05,\n 4.8243e-05, 5.6052e-45, -4.6617e-05, 5.6052e-45, 5.1767e-05,\n 1.1776e-05, 1.4007e-04, -2.3069e-05, 5.6052e-45, -8.5204e-05,\n 7.7896e-05, 6.8904e-05, 7.5845e-05, 3.3840e-05, 5.6052e-45,\n -3.3095e-04, -4.4317e-05, -2.9219e-04, -4.7811e-04, 5.6052e-45,\n 5.6052e-45, -1.5827e-04, -8.0561e-05, 5.6817e-05, -1.9737e-05,\n 6.5695e-04, 6.9641e-05, 3.2568e-04, 2.5603e-04, 5.6052e-45,\n 5.5778e-05, 2.6395e-04, -2.4373e-04, -7.3377e-05, 5.6052e-45,\n 1.1297e-04, 5.6052e-45, 5.6052e-45, -2.8043e-04, -7.6324e-05,\n 5.1052e-05, -1.1885e-04, 1.8403e-04, 4.2219e-05, 1.7548e-04,\n -1.1203e-05, 5.6052e-45, -1.2318e-04, 5.6052e-45, 5.6052e-45,\n 1.0280e-04, -1.5895e-04, 5.4264e-06, 3.3851e-04, 7.6222e-05,\n -1.0767e-05, 4.1888e-04, -2.4792e-04, 2.1624e-04, 7.6212e-05,\n -2.2858e-04, -1.6966e-04, -9.4406e-05, -2.7208e-04, -3.2607e-04,\n -1.2322e-04, -7.4909e-06, 7.4629e-05, 5.6052e-45, -1.3915e-04,\n -2.8239e-04, 4.1038e-06, 5.6052e-45, 1.3510e-04, 1.9662e-04,\n -5.9011e-04, 5.6052e-45, -1.0706e-04, -2.2340e-04, -7.1245e-05,\n 1.2257e-04, -6.4971e-05, 6.2675e-04, 5.6052e-45, -2.4236e-04,\n -1.0674e-04, -1.8252e-04, 1.3268e-04, 3.0928e-04, -2.6312e-04,\n 1.5955e-05, 7.5928e-05, 4.7168e-04, 4.1604e-05, -2.3137e-10,\n -6.6143e-11, 2.8564e-05, 1.5133e-21, 2.8772e-05, 5.6052e-45,\n 1.0088e-04, -4.1686e-05, -2.1231e-05, 5.6052e-45, -7.7674e-06,\n -4.3077e-04, 1.9712e-04, -4.5610e-04, 5.6052e-45, -9.7388e-05,\n -1.5661e-04, 2.2372e-04, 4.4653e-05, 2.1837e-38, 2.4809e-05,\n 1.2348e-04, 2.7609e-05, 5.6052e-45, 5.1219e-05, 2.4081e-05,\n -3.0383e-04, 9.8191e-05, 5.6052e-45, 2.1767e-04, 6.8673e-05,\n 7.3287e-05, -8.9373e-05, -6.3134e-05, 5.2839e-13, -9.2081e-04,\n -2.5172e-05, 9.8533e-05, 2.0831e-04, 9.0081e-05, 5.6052e-45,\n -2.1087e-04, -2.9123e-05, 5.6052e-45, 8.6570e-05, -4.6774e-04,\n 2.3096e-07, 5.8872e-05, -1.4254e-04, -1.7002e-06, -1.5441e-04,\n 1.4136e-04, 6.2845e-04, -9.3941e-05, 3.0448e-11, 1.7809e-04,\n 2.2570e-05, -4.1103e-05, 6.0253e-05, -1.0966e-04, 5.6052e-45,\n 2.6022e-04, 5.6052e-45, -2.3764e-05, 2.8141e-04, 1.7809e-04,\n 3.3439e-04, -1.4369e-04, 4.5577e-04, 3.0087e-04, -7.9119e-05,\n 4.7274e-04, 1.0287e-04, -8.2324e-04, 2.3294e-04, -8.4086e-04,\n 6.3667e-06, -2.3662e-04, 1.3296e-04, 2.8007e-04, 1.8307e-04,\n 5.7838e-05, 5.6052e-45, 3.0220e-04, 4.7522e-04, 1.1832e-09,\n 1.3683e-04, 6.5725e-13, -5.6854e-04, -9.5318e-05, 1.0948e-04,\n 7.6156e-05, -9.8886e-06, -3.7060e-04, 4.2034e-08, 5.6052e-45,\n 4.2548e-06, -3.5678e-05, 1.7353e-04, 2.4215e-04, 9.1319e-05,\n -2.6496e-04, -3.4311e-04, 3.8167e-05, -6.3478e-05, 3.1667e-04,\n 8.4050e-05, 1.1637e-04, 3.6883e-05, 4.6385e-04, 1.3621e-19,\n -1.5921e-04, 7.0044e-15, 1.2584e-04, 5.6052e-45, -1.8233e-04,\n -3.7744e-04, 7.0859e-05, -1.4728e-04, 5.6052e-45, -2.7409e-05,\n 2.5088e-04, -3.3676e-05, -1.8882e-04, -1.9095e-04, -2.0269e-04,\n 1.2413e-04, -1.0011e-04, -1.6923e-04, -3.6745e-05, -3.2480e-04,\n 1.2748e-04, -2.0446e-04, -3.0736e-05, -2.3007e-04, 1.0398e-04,\n -1.5992e-04, 3.0735e-05, 2.6205e-04, 2.8844e-04, 1.6401e-11,\n 3.9480e-04, 5.6052e-45, -3.1303e-04, -7.1616e-05, 3.5599e-05,\n 1.9159e-04, 3.1648e-04, -4.0716e-05, 5.6052e-45, 9.7396e-05,\n -1.2797e-04, 5.6052e-45, 5.6174e-04, -2.1254e-05, 7.3769e-05,\n -1.9949e-04, -2.8138e-04, 9.9244e-05, -2.2607e-04, 5.6052e-45,\n -8.9750e-05, 3.9805e-06, 3.5968e-05, 3.0847e-04, 1.0185e-04,\n 1.3887e-34, -5.3551e-05, 6.9674e-09, -1.8633e-04, 9.8200e-06,\n -1.7553e-04, 4.6625e-04, -4.5231e-04, 2.4073e-04, -1.1454e-04,\n -1.3068e-04, 1.5809e-04, 1.4722e-04, -5.8401e-04, 5.6052e-45,\n -1.1040e-04, 2.6952e-05, 1.3574e-04, 5.6052e-45, -2.5597e-04,\n 2.7121e-04, 1.5639e-05, -2.4147e-04, 1.2207e-04, 3.5683e-06,\n -9.9534e-05, -8.2326e-05, -9.6096e-05, -3.0317e-05, -2.4143e-04,\n -1.6356e-04, 1.3234e-04, 1.2675e-04, 5.6052e-45, 2.6399e-04,\n 1.3448e-04, 5.6052e-45, 5.5714e-05, 1.7824e-04, -2.8463e-04,\n -1.3073e-04, 5.6052e-45, 3.2108e-39, -8.4172e-05, 1.0733e-04,\n 1.1872e-04, -2.4368e-04, -7.6059e-04, -2.0618e-04, 2.0249e-05,\n 2.0545e-04, 2.3158e-04, -2.9074e-04, 2.1748e-04, 8.9350e-06,\n -2.3601e-04, -6.8135e-05, 1.9951e-04, 4.3421e-05, -2.0018e-04,\n 3.0658e-04, -5.3273e-14, 5.6052e-45, -1.1692e-04, 5.6052e-45,\n -6.5520e-06, -3.4355e-05, -5.4988e-04, 1.1066e-04, 1.5424e-04,\n -1.2642e-04, 5.6052e-45, -1.1051e-04, 4.3786e-04, 3.9910e-06,\n -1.3295e-04, -1.5891e-04, 8.7591e-05, -1.5542e-04, 4.8239e-31,\n 2.8728e-04, -1.2816e-04, -3.5802e-04, -1.0728e-04, 1.7937e-05,\n -3.3295e-04, 4.3197e-05, -1.8574e-04, 4.4832e-05, -2.2943e-05,\n 5.7458e-05, 2.1127e-04, -5.1600e-05, -2.5911e-04, 3.1793e-04,\n 5.9347e-05, 8.6573e-05, 2.1832e-04, 2.0966e-04, -7.5499e-04,\n 8.6027e-05, -1.1353e-04], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([3.7378e-07, 7.0857e-07, 5.7033e-07, 1.4020e-06, 1.4722e-06, 1.9752e-07,\n 1.8050e-06, 1.6781e-06, 9.1389e-14, 1.2363e-06, 1.3280e-06, 2.1484e-06,\n 2.4532e-06, 1.1912e-06, 1.3535e-06, 1.2439e-06, 1.3582e-06, 1.1972e-06,\n 1.0027e-06, 1.6220e-06, 1.0697e-06, 1.5644e-06, 6.5689e-07, 1.7102e-06,\n 1.3235e-06, 6.5309e-07, 1.0607e-07, 1.0400e-06, 5.4028e-07, 1.7608e-06,\n 1.1849e-06, 1.0856e-06, 3.6310e-07, 1.7382e-06, 1.6470e-15, 2.4762e-07,\n 1.4530e-06, 1.7655e-06, 1.1823e-06, 8.2243e-07, 1.2140e-06, 1.0869e-06,\n 1.3378e-06, 1.8785e-06, 7.9833e-07, 7.2450e-07, 1.5692e-06, 1.1932e-06,\n 1.1544e-06, 1.4265e-06, 6.5509e-07, 1.2513e-06, 1.8558e-06, 6.8288e-07,\n 1.3053e-06, 1.1166e-06, 1.8666e-06, 8.6502e-08, 1.1108e-06, 1.3738e-06,\n 9.8376e-07, 1.7002e-06, 1.3824e-06, 1.4087e-06, 1.8604e-06, 2.3007e-06,\n 4.4628e-07, 9.0524e-07, 1.3521e-06, 6.5863e-07, 8.2287e-07, 1.0313e-06,\n 1.3074e-06, 1.3628e-08, 1.7522e-06, 1.2047e-06, 1.7766e-06, 5.8730e-07,\n 1.4320e-06, 1.1950e-06, 9.3816e-07, 1.5804e-06, 1.2501e-06, 1.7462e-08,\n 2.0626e-06, 1.5946e-06, 9.5773e-07, 1.5150e-06, 1.9684e-06, 2.2045e-06,\n 1.2313e-06, 1.1282e-06, 1.6682e-06, 5.7160e-07, 1.4849e-06, 1.8179e-06,\n 1.4703e-06, 2.2516e-07, 1.0961e-06, 1.0221e-06, 2.0708e-06, 6.0499e-07,\n 3.2570e-07, 1.5140e-07, 4.5049e-07, 1.9869e-06, 1.2549e-06, 6.4827e-07,\n 1.4814e-06, 8.1277e-07, 1.4667e-06, 1.1788e-06, 2.0397e-06, 7.1710e-07,\n 3.6080e-08, 1.4434e-06, 1.7491e-06, 1.1452e-06, 5.5788e-07, 1.8555e-06,\n 9.0319e-07, 1.1667e-06, 1.0801e-06, 1.7322e-06, 9.1232e-07, 1.6705e-06,\n 1.2537e-06, 4.5814e-07, 8.5783e-07, 1.8885e-06, 1.5759e-06, 7.8239e-07,\n 1.9368e-06, 7.8276e-07, 9.1612e-07, 1.5356e-06, 1.6667e-07, 2.0678e-06,\n 4.4876e-07, 1.1810e-06, 1.0922e-06, 1.1994e-06, 1.5480e-06, 1.6785e-06,\n 3.6454e-07, 4.8644e-10, 2.2299e-06, 8.1474e-08, 1.3317e-06, 6.4977e-07,\n 1.4948e-06, 1.2100e-10, 1.1951e-06, 1.6583e-07, 1.3189e-06, 9.9062e-07,\n 1.3152e-06, 1.4948e-06, 3.7339e-06, 6.9755e-07, 5.5839e-07, 9.2977e-07,\n 2.1965e-06, 7.1628e-07, 1.3699e-12, 1.5618e-06, 5.4800e-07, 9.1532e-07,\n 1.9782e-06, 2.0399e-09, 7.3228e-07, 9.4417e-07, 1.0315e-06, 8.2315e-07,\n 7.4786e-07, 1.3880e-06, 4.7913e-07, 6.3037e-07, 7.7588e-07, 2.7405e-14,\n 1.5680e-06, 4.6456e-07, 6.5898e-07, 7.9498e-07, 1.2865e-06, 9.4154e-07,\n 5.2411e-07, 1.5087e-07, 1.7044e-06, 1.2353e-06, 1.2572e-06, 1.2283e-06,\n 4.4891e-07, 2.2700e-07, 1.3788e-06, 9.9401e-07, 1.1613e-08, 3.2154e-08,\n 1.8216e-06, 2.2723e-07, 7.3130e-07, 2.0307e-06, 2.4700e-07, 1.3179e-06,\n 7.8003e-07, 3.0011e-07, 1.4208e-06, 8.5523e-07, 4.4839e-07, 6.9461e-07,\n 7.6572e-07, 2.4659e-06, 9.5830e-07, 1.5610e-06, 1.6604e-06, 8.1944e-07,\n 1.5279e-06, 7.8177e-07, 2.2900e-07, 8.3769e-07, 1.1680e-06, 1.8239e-06,\n 1.0417e-06, 1.5205e-06, 2.3005e-06, 1.7863e-06, 1.0027e-10, 1.6401e-06,\n 1.6204e-06, 4.6957e-07, 1.1684e-06, 1.2836e-06, 1.5046e-06, 2.5657e-08,\n 1.7091e-06, 4.5877e-07, 2.0731e-06, 2.6013e-07, 1.5782e-06, 3.1280e-06,\n 7.4390e-07, 1.2012e-06, 2.0084e-06, 9.4892e-07, 9.9049e-08, 1.6006e-06,\n 1.4306e-06, 8.9641e-07, 1.2172e-06, 1.7014e-12, 1.0273e-06, 1.2759e-06,\n 1.4125e-07, 2.3271e-11, 1.9013e-06, 5.6303e-07, 2.3455e-06, 7.4170e-07,\n 2.3638e-11, 6.4904e-07, 9.1145e-07, 1.3519e-06, 1.0006e-06, 2.3695e-07,\n 3.0508e-08, 8.1557e-07, 7.1065e-07, 2.2266e-07, 9.4493e-07, 1.1151e-06,\n 1.7278e-06, 2.8851e-07, 6.2310e-08, 1.1170e-06, 2.8336e-07, 7.2722e-07,\n 7.9903e-07, 1.2952e-06, 3.6902e-07, 1.7412e-06, 1.8536e-06, 8.2117e-07,\n 2.3289e-06, 2.0081e-06, 1.2291e-06, 1.2703e-06, 8.1501e-07, 2.8933e-08,\n 1.1589e-06, 1.9147e-06, 1.1083e-07, 1.4685e-06, 1.7228e-06, 4.3384e-07,\n 1.5937e-06, 8.3734e-07, 2.0411e-06, 8.5667e-07, 2.8904e-07, 8.9090e-07,\n 1.0504e-06, 1.5866e-06, 7.7314e-07, 7.7604e-07, 1.5566e-08, 1.3829e-06,\n 3.0692e-09, 1.7680e-06, 1.3098e-06, 1.0985e-06, 1.6099e-06, 1.8847e-06,\n 1.0405e-06, 1.4416e-06, 8.8494e-07, 1.6602e-06, 2.3593e-07, 1.6669e-06,\n 1.0158e-06, 2.3636e-06, 9.6529e-07, 1.0808e-06, 1.3055e-06, 1.9548e-06,\n 6.2688e-07, 1.1828e-06, 4.1611e-07, 1.3097e-06, 8.8238e-07, 1.4558e-07,\n 1.4688e-06, 4.0252e-08, 1.3177e-06, 4.3773e-07, 9.1018e-07, 2.6108e-06,\n 1.9576e-06, 1.9067e-06, 7.8495e-07, 1.0779e-15, 2.7256e-07, 1.4795e-06,\n 1.0278e-06, 9.5247e-07, 5.2902e-07, 2.2997e-06, 4.9839e-07, 5.7968e-07,\n 1.1504e-06, 1.8293e-06, 6.8660e-07, 4.8872e-07, 1.2336e-06, 1.9238e-06,\n 6.3906e-08, 9.5122e-07, 4.1389e-09, 1.0719e-06, 2.4253e-08, 2.7476e-06,\n 1.1286e-06, 7.4568e-07, 9.7340e-07, 1.3720e-07, 9.8040e-07, 1.1805e-07,\n 4.2269e-07, 1.6190e-06, 1.1857e-06, 1.4939e-06, 8.8615e-08, 1.1064e-06,\n 2.4285e-06, 1.5485e-06, 2.3306e-06, 7.9662e-07, 7.8884e-07, 8.5233e-07,\n 1.9533e-06, 2.4788e-06, 8.9746e-07, 8.8335e-07, 1.8722e-06, 1.1300e-06,\n 5.5503e-08, 1.5013e-06, 9.0453e-07, 1.1314e-06, 6.5896e-07, 1.1934e-06,\n 1.1980e-06, 1.3556e-06, 3.3423e-06, 5.1934e-08, 1.4145e-06, 1.5536e-06,\n 7.0146e-13, 1.9856e-06, 1.5487e-06, 1.5959e-07, 3.5037e-07, 1.3052e-06,\n 8.6983e-07, 2.6128e-07, 2.3468e-07, 1.2733e-06, 1.0645e-06, 5.5782e-07,\n 1.1697e-06, 1.5696e-06, 4.6873e-07, 8.5742e-07, 5.7663e-08, 7.5282e-07,\n 1.1017e-06, 1.3030e-06, 9.4016e-07, 1.5400e-06, 4.3723e-07, 1.0963e-06,\n 8.6724e-07, 1.1098e-06, 1.1904e-06, 1.4716e-06, 1.6985e-08, 1.0713e-06,\n 7.2891e-07, 6.3296e-07, 1.2629e-10, 1.1165e-06, 2.5659e-07, 1.8929e-06,\n 7.4323e-07, 2.0276e-06, 1.0716e-06, 1.9492e-07, 1.2614e-06, 8.8400e-07,\n 1.5395e-06, 1.9499e-06, 9.8512e-07, 7.2791e-07, 1.4509e-06, 1.7029e-07,\n 1.6165e-06, 9.9503e-07, 1.9396e-08, 2.3871e-06, 4.4686e-07, 9.2826e-07,\n 9.2918e-07, 4.3016e-08, 4.7869e-09, 1.1088e-06, 1.5759e-06, 9.3767e-07,\n 2.0275e-06, 9.9010e-07, 7.0089e-07, 1.2142e-06, 1.4103e-06, 7.2908e-07,\n 7.9399e-07, 1.1902e-06, 4.9924e-07, 5.9824e-07, 6.5295e-07, 1.3251e-06,\n 3.3853e-07, 1.6866e-06, 1.2552e-06, 1.4743e-07, 9.8895e-10, 1.2349e-06,\n 6.3533e-08, 5.1422e-07, 1.1374e-06, 2.0286e-06, 1.2427e-06, 1.8446e-06,\n 1.8622e-06, 1.3381e-07, 6.3014e-07, 1.7181e-06, 2.3490e-06, 4.6386e-07,\n 1.4112e-06, 7.3790e-07, 1.5747e-06, 5.7570e-14, 1.3808e-06, 1.3610e-06,\n 1.7011e-06, 1.3038e-06, 1.7791e-06, 1.4476e-06, 9.3598e-07, 8.6080e-07,\n 5.8852e-07, 3.3521e-07, 1.1457e-06, 1.2474e-06, 1.7570e-06, 1.1003e-06,\n 2.1049e-06, 1.0962e-06, 7.0089e-07, 1.1957e-06, 8.6901e-07, 2.0300e-06,\n 7.4951e-07, 1.7843e-07], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(5006.)",
27
+ "exp_avg": "tensor([[ 1.7684e-05, -1.1887e-05, 3.2143e-06, ..., -3.4804e-06,\n 4.4078e-05, 3.0542e-05],\n [ 1.0641e-05, -2.4548e-05, -1.1624e-05, ..., -5.7144e-05,\n -2.0362e-05, -4.7978e-05],\n [ 1.3563e-05, 8.3540e-06, 1.0552e-05, ..., 1.3136e-06,\n -1.7861e-05, 6.7835e-06],\n ...,\n [-1.7536e-05, 4.1417e-05, 7.2489e-06, ..., 3.8233e-05,\n 2.2424e-05, 5.0596e-06],\n [ 1.1550e-05, 1.6547e-05, 1.4881e-05, ..., -4.2815e-05,\n -2.5277e-06, 3.0398e-05],\n [ 1.2734e-05, 9.5913e-05, -9.7876e-06, ..., -9.0472e-06,\n 3.4182e-05, 5.9467e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[8.7735e-10, 1.9407e-08, 4.4892e-09, ..., 3.7345e-08, 4.7874e-09,\n 8.7428e-10],\n [2.3694e-09, 1.0836e-08, 1.0490e-08, ..., 7.7805e-08, 2.3367e-08,\n 4.0536e-09],\n [2.2902e-09, 8.3055e-09, 2.9233e-08, ..., 2.9546e-08, 2.9886e-08,\n 1.3995e-09],\n ...,\n [3.2342e-09, 1.0375e-08, 8.2181e-09, ..., 9.2905e-08, 1.5262e-08,\n 2.0588e-09],\n [1.8099e-09, 1.2683e-08, 1.0617e-08, ..., 7.0927e-08, 1.3819e-08,\n 4.5343e-09],\n [1.6789e-09, 3.5772e-08, 1.5535e-08, ..., 5.1097e-08, 1.1847e-08,\n 8.9299e-09]], device='cuda:0')"
29
+ }
30
+ },
31
+ "param_groups": [
32
+ {
33
+ "lr": 0.00904518046337755,
34
+ "name": "shared",
35
+ "betas": [
36
+ 0.9,
37
+ 0.999
38
+ ],
39
+ "eps": 1e-08,
40
+ "weight_decay": 1e-05,
41
+ "amsgrad": false,
42
+ "maximize": false,
43
+ "foreach": null,
44
+ "capturable": false,
45
+ "differentiable": false,
46
+ "fused": null,
47
+ "decoupled_weight_decay": true,
48
+ "initial_lr": 0.01,
49
+ "params": [
50
+ 0,
51
+ 1
52
+ ]
53
+ },
54
+ {
55
+ "lr": 0.00904518046337755,
56
+ "name": "scale_256",
57
+ "betas": [
58
+ 0.9,
59
+ 0.999
60
+ ],
61
+ "eps": 1e-08,
62
+ "weight_decay": 1e-05,
63
+ "amsgrad": false,
64
+ "maximize": false,
65
+ "foreach": null,
66
+ "capturable": false,
67
+ "differentiable": false,
68
+ "fused": null,
69
+ "decoupled_weight_decay": true,
70
+ "initial_lr": 0.01,
71
+ "params": [
72
+ 2,
73
+ 3,
74
+ 4
75
+ ]
76
+ },
77
+ {
78
+ "lr": 0.00904518046337755,
79
+ "name": "scale_512",
80
+ "betas": [
81
+ 0.9,
82
+ 0.999
83
+ ],
84
+ "eps": 1e-08,
85
+ "weight_decay": 1e-05,
86
+ "amsgrad": false,
87
+ "maximize": false,
88
+ "foreach": null,
89
+ "capturable": false,
90
+ "differentiable": false,
91
+ "fused": null,
92
+ "decoupled_weight_decay": true,
93
+ "initial_lr": 0.01,
94
+ "params": [
95
+ 5,
96
+ 6,
97
+ 7
98
+ ]
99
+ },
100
+ {
101
+ "lr": 0.00904518046337755,
102
+ "name": "scale_768",
103
+ "betas": [
104
+ 0.9,
105
+ 0.999
106
+ ],
107
+ "eps": 1e-08,
108
+ "weight_decay": 1e-05,
109
+ "amsgrad": false,
110
+ "maximize": false,
111
+ "foreach": null,
112
+ "capturable": false,
113
+ "differentiable": false,
114
+ "fused": null,
115
+ "decoupled_weight_decay": true,
116
+ "initial_lr": 0.01,
117
+ "params": [
118
+ 8,
119
+ 9,
120
+ 10
121
+ ]
122
+ },
123
+ {
124
+ "lr": 0.00904518046337755,
125
+ "name": "scale_1024",
126
+ "betas": [
127
+ 0.9,
128
+ 0.999
129
+ ],
130
+ "eps": 1e-08,
131
+ "weight_decay": 1e-05,
132
+ "amsgrad": false,
133
+ "maximize": false,
134
+ "foreach": null,
135
+ "capturable": false,
136
+ "differentiable": false,
137
+ "fused": null,
138
+ "decoupled_weight_decay": true,
139
+ "initial_lr": 0.01,
140
+ "params": [
141
+ 11,
142
+ 12,
143
+ 13
144
+ ]
145
+ },
146
+ {
147
+ "lr": 0.004522637977440181,
148
+ "name": "fusion",
149
+ "betas": [
150
+ 0.9,
151
+ 0.999
152
+ ],
153
+ "eps": 1e-08,
154
+ "weight_decay": 1e-05,
155
+ "amsgrad": false,
156
+ "maximize": false,
157
+ "foreach": null,
158
+ "capturable": false,
159
+ "differentiable": false,
160
+ "fused": null,
161
+ "decoupled_weight_decay": true,
162
+ "initial_lr": 0.005,
163
+ "params": [
164
+ 14,
165
+ 15,
166
+ 16,
167
+ 17,
168
+ 18,
169
+ 19,
170
+ 20,
171
+ 21,
172
+ 22,
173
+ 23,
174
+ 24,
175
+ 25,
176
+ 26,
177
+ 27,
178
+ 28,
179
+ 29,
180
+ 30,
181
+ 31,
182
+ 32,
183
+ 33,
184
+ 34,
185
+ 35,
186
+ 36,
187
+ 37,
188
+ 38,
189
+ 39,
190
+ 40,
191
+ 41,
192
+ 42,
193
+ 43,
194
+ 44,
195
+ 45,
196
+ 46,
197
+ 47,
198
+ 48,
199
+ 49,
200
+ 50,
201
+ 51,
202
+ 52,
203
+ 53,
204
+ 54,
205
+ 55,
206
+ 56,
207
+ 57,
208
+ 58,
209
+ 59,
210
+ 60,
211
+ 61,
212
+ 62,
213
+ 63,
214
+ 64
215
+ ]
216
+ }
217
+ ]
218
+ },
219
+ "scheduler_state_dict": {
220
+ "T_0": 10,
221
+ "T_i": 10,
222
+ "T_mult": 2,
223
+ "eta_min": 1e-06,
224
+ "T_cur": 2,
225
+ "base_lrs": [
226
+ 0.01,
227
+ 0.01,
228
+ 0.01,
229
+ 0.01,
230
+ 0.01,
231
+ 0.005
232
+ ],
233
+ "last_epoch": 2,
234
+ "_step_count": 0,
235
+ "_is_initial": false,
236
+ "_get_lr_called_within_step": false,
237
+ "_last_lr": [
238
+ 0.00904518046337755,
239
+ 0.00904518046337755,
240
+ 0.00904518046337755,
241
+ 0.00904518046337755,
242
+ 0.00904518046337755,
243
+ 0.004522637977440181
244
+ ]
245
+ },
246
+ "metrics": {
247
+ "best_val_acc": 71.403,
248
+ "best_epoch": 1,
249
+ "scale_accuracies": {
250
+ "256": 71.403
251
+ },
252
+ "training_history": {
253
+ "epochs": [
254
+ 1,
255
+ 2
256
+ ],
257
+ "train_loss": [
258
+ 3.078377773987499,
259
+ 1.970284724826104
260
+ ],
261
+ "train_acc": [
262
+ 58.631622575355124,
263
+ 71.63597719891318
264
+ ],
265
+ "val_acc": [
266
+ 69.481,
267
+ 71.403
268
+ ],
269
+ "scale_accs": {
270
+ "256": [
271
+ 69.481,
272
+ 71.403
273
+ ]
274
+ },
275
+ "lr": [
276
+ 0.00975530705321762,
277
+ 0.00904518046337755
278
+ ]
279
+ }
280
+ },
281
+ "train_config": {
282
+ "name": "david_training",
283
+ "run_id": "20251012_194945",
284
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
285
+ "model_variant": [
286
+ "clip_vit_b16",
287
+ "clip_vit_laion_b32"
288
+ ],
289
+ "num_classes": 1000,
290
+ "preset": "balanced",
291
+ "custom_config_path": null,
292
+ "num_classes_override": null,
293
+ "use_belly_override": null,
294
+ "belly_expand_override": null,
295
+ "progressive_training_override": true,
296
+ "scale_warmup_epochs_override": {
297
+ "256": 0,
298
+ "512": 2,
299
+ "768": 5,
300
+ "1024": 8
301
+ },
302
+ "num_epochs": 10,
303
+ "batch_size": 1024,
304
+ "learning_rate": 0.01,
305
+ "weight_decay": 1e-05,
306
+ "warmup_epochs": 3,
307
+ "use_rose_loss": true,
308
+ "rose_initial_weight": 0.2,
309
+ "rose_max_weight": 0.8,
310
+ "rose_weight_schedule": "adaptive",
311
+ "use_cayley_loss": true,
312
+ "cayley_weight": 0.01,
313
+ "scale_loss_balance": null,
314
+ "use_mixed_precision": false,
315
+ "gradient_clip": 10.0,
316
+ "scheduler_type": "cosine_restarts",
317
+ "min_lr": 1e-06,
318
+ "freeze_strategy": "never",
319
+ "freeze_threshold": 90.0,
320
+ "unfreeze_on_plateau": true,
321
+ "patience": 10,
322
+ "track_gradients": true,
323
+ "gradient_scale_threshold": 1e-05,
324
+ "gradient_scale_multiplier": 10.0,
325
+ "log_interval": 50,
326
+ "val_interval": 1,
327
+ "save_interval": 5,
328
+ "log_fusion_weights": true,
329
+ "log_loss_components": true,
330
+ "save_format": "safetensors",
331
+ "hf_repo": "AbstractPhil/david-shared-space",
332
+ "upload_to_hub": true,
333
+ "base_dir": "./david_training",
334
+ "num_workers": 10,
335
+ "pin_memory": true,
336
+ "prefetch_factor": 4,
337
+ "persistent_workers": true
338
+ }
339
+ }