AbstractPhil commited on
Commit
0978134
·
verified ·
1 Parent(s): d14cece

Update best_model_acc68.56_metadata.json - Run 20251012_210041

Browse files
weights/David-partial_shared-hierarchical_tree/20251012_210041/best_model_acc68.56_metadata.json ADDED
@@ -0,0 +1,340 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(7508.)",
7
+ "exp_avg": "tensor([[ 1.9258e-05, -5.5453e-05, 5.3597e-06, ..., -3.3262e-05,\n 1.5173e-05, -1.8343e-07],\n [ 1.2527e-05, 1.3770e-05, 2.7555e-05, ..., 6.5963e-05,\n 1.3010e-05, -2.3416e-06],\n [-2.0192e-07, -7.8963e-06, -4.4261e-06, ..., 1.8589e-05,\n 2.2583e-05, 4.7641e-05],\n ...,\n [ 2.6663e-05, 1.9958e-05, -2.9462e-07, ..., -5.5919e-06,\n 1.1830e-05, 5.6557e-06],\n [ 8.0384e-05, 5.3253e-06, 1.8500e-05, ..., -2.3051e-05,\n 2.4302e-05, 9.4266e-05],\n [ 1.3651e-05, -1.5683e-04, -3.0977e-05, ..., -1.8889e-05,\n -2.0185e-05, -1.1899e-05]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[8.0885e-09, 1.1671e-08, 6.6052e-09, ..., 3.7021e-08, 5.8507e-09,\n 8.5512e-09],\n [1.3249e-08, 1.1549e-08, 8.1267e-09, ..., 6.4415e-08, 9.5274e-09,\n 1.1536e-08],\n [5.7524e-08, 5.0332e-08, 9.4126e-09, ..., 9.1495e-09, 1.0797e-08,\n 8.9686e-09],\n ...,\n [1.2521e-08, 1.1383e-08, 9.4750e-09, ..., 5.3489e-08, 8.1194e-09,\n 9.3634e-09],\n [4.1991e-08, 8.6983e-08, 3.3428e-08, ..., 1.2160e-08, 1.2064e-08,\n 1.0715e-08],\n [4.5222e-08, 5.2184e-08, 2.5476e-08, ..., 2.3512e-08, 1.3373e-08,\n 1.1059e-08]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(7508.)",
12
+ "exp_avg": "tensor([-2.9188e-04, 1.3547e-03, 6.3713e-04, 1.5906e-03, 6.0282e-04,\n 3.4800e-04, -1.0357e-03, -1.1072e-03, -3.1407e-04, -9.7096e-05,\n 2.4048e-04, -1.3038e-06, -9.3829e-05, -5.2598e-04, -3.4067e-04,\n -1.8290e-03, -4.9520e-04, 3.8292e-04, 2.4689e-04, -3.4816e-04,\n 1.2101e-03, 6.7822e-05, -5.1079e-04, 1.7768e-04, 5.9010e-04,\n 7.6382e-04, 1.7403e-04, -1.2408e-03, 6.6678e-05, 8.0166e-04,\n 2.2350e-04, -1.0048e-03, -7.9483e-04, 4.0695e-05, 2.8010e-04,\n -8.1421e-04, -7.5669e-04, -5.7476e-04, -7.9907e-04, -2.7060e-04,\n -5.8033e-04, -1.2815e-04, 6.3594e-05, -1.6508e-04, -7.7013e-04,\n -2.5530e-04, 7.8865e-05, 9.8901e-04, 2.9785e-04, -1.6961e-04,\n 3.0045e-04, 5.2573e-04, 1.0039e-03, 6.5757e-04, 5.9115e-04,\n 6.8695e-05, 7.9055e-04, 7.8091e-04, -3.7420e-05, -2.7502e-04,\n 8.0985e-04, 3.2180e-05, 6.9553e-04, -4.8303e-04, 8.8255e-04,\n 4.8842e-04, -1.1250e-04, -9.4193e-04, -2.2918e-03, 1.5705e-04,\n -6.2228e-05, 5.6042e-04, 1.9984e-04, -2.2279e-04, 7.7347e-04,\n -2.3626e-05, 1.5039e-03, 3.8137e-04, -3.8838e-04, -2.5292e-04,\n -1.8209e-04, 7.4693e-04, -9.4125e-04, 7.2097e-04, 3.4583e-04,\n -1.2108e-03, -1.4049e-05, 4.8142e-04, 1.2041e-05, -2.1984e-04,\n 5.9659e-04, -1.1509e-03, 1.5339e-04, -2.4800e-03, 8.3144e-05,\n 4.7372e-04, -9.0775e-04, 1.0980e-03, -7.1834e-06, -5.7614e-04,\n -1.6976e-04, -1.3913e-04, -9.2188e-04, -5.6519e-04, -1.4039e-04,\n -1.8555e-04, -5.1074e-04, 4.7975e-04, -1.7344e-05, -1.0384e-03,\n -4.8137e-04, -2.5832e-04, 3.8509e-06, 1.1066e-03, 2.8575e-04,\n -3.6889e-04, 8.2144e-04, -1.5098e-04, -3.9037e-04, -3.8084e-04,\n 1.3591e-04, -4.6168e-04, 8.3484e-04, -2.1212e-04, -1.0388e-03,\n -1.0011e-03, -8.4956e-05, 3.7343e-04, 1.1910e-03, -9.5119e-04,\n 6.8425e-05, 5.8408e-04, 8.1326e-04, -2.1853e-04, -1.0320e-03,\n 1.1183e-03, -6.1023e-04, -8.3218e-04, -7.4586e-04, -6.0124e-05,\n 3.1975e-04, -1.7440e-04, -1.0932e-03, -4.6078e-04, -1.5217e-04,\n -7.7477e-04, -3.8553e-04, 2.7571e-05, 9.8656e-04, -1.0501e-03,\n 3.6108e-04, -8.7069e-05, -1.6781e-04, 3.2158e-04, -2.5359e-04,\n -1.3901e-04, 1.2155e-03, -3.2713e-04, -3.1253e-04, 6.9283e-04,\n -8.0328e-04, 5.4665e-05, 6.7718e-04, 5.3893e-04, 8.4138e-04,\n 5.3436e-04, 6.6027e-04, 5.7255e-04, -4.3032e-05, -1.7519e-03,\n 9.7838e-04, -1.9268e-03, 3.2681e-04, 1.3453e-03, 2.5801e-04,\n -7.7763e-04, 1.3612e-03, 7.1981e-05, 5.6511e-04, -6.0113e-05,\n -5.9594e-04, -6.2211e-04, -1.4123e-04, -6.4041e-04, 1.0965e-04,\n -5.3451e-04, -5.0356e-04, 5.9457e-04, 3.9678e-04, 1.1950e-04,\n -2.3830e-04, -3.5594e-04, -4.7904e-04, -9.6250e-04, 1.2370e-04,\n 1.1841e-03, -1.6781e-04, 5.0236e-04, -1.4849e-04, -1.4680e-03,\n 2.1114e-04, 7.4081e-04, -4.6400e-04, 4.9427e-04, 1.0379e-03,\n -1.5630e-04, 1.5486e-03, -1.6416e-04, -2.8248e-04, 1.1508e-03,\n -5.1933e-05, 3.7888e-04, 5.7236e-04, -3.9108e-04, 5.4053e-04,\n -3.5717e-04, 2.1491e-04, 1.6768e-04, -7.7221e-04, 6.4016e-04,\n 7.0755e-04, -2.5758e-04, -1.8483e-03, 1.5032e-03, -1.1110e-03,\n 5.6482e-04, 1.2571e-03, 2.9092e-04, -5.5920e-04, 3.0554e-04,\n 6.1218e-04, 2.3708e-04, -3.0358e-05, 6.3160e-04, 1.5505e-04,\n -8.9993e-05, 2.8612e-04, -3.6682e-04, -5.8538e-04, 5.2906e-04,\n -4.3650e-04, 2.3654e-04, 5.0882e-04, -7.3566e-04, -1.3068e-03,\n -1.4294e-03, -4.5417e-04, -5.8746e-04, 8.2751e-04, 4.5595e-04,\n -7.9933e-04, 6.4912e-04, 5.2190e-04, -1.4033e-04, 8.4427e-04,\n 6.4671e-04, -5.7261e-04, 1.0648e-03, 2.1930e-04, -1.3710e-03,\n -9.0997e-04, 7.0449e-04, 7.3761e-05, -2.9778e-05, -8.2909e-05,\n -3.2936e-04, 7.4964e-04, 6.1509e-04, -7.9118e-05, -4.5949e-04,\n -6.6937e-04, -5.8553e-05, -8.0199e-04, -2.0085e-04, -1.0319e-03,\n -2.5895e-04, 4.1503e-04, 9.8141e-04, 7.4408e-04, -1.9732e-04,\n -1.9926e-04, 1.1910e-03, 2.7664e-04, 2.5469e-04, 9.4579e-04,\n -1.1001e-03, 7.0821e-05, -7.0905e-05, -1.8849e-05, -5.1692e-04,\n -1.0981e-04, 1.3996e-03, 1.5029e-03, -4.0671e-04, 1.3786e-04,\n 1.3411e-03, -2.2505e-03, -3.9675e-04, -5.1719e-05, 1.3381e-04,\n 4.2739e-04, 8.2229e-05, -6.0141e-04, -7.3391e-06, 2.0612e-04,\n -6.5625e-04, -1.0679e-03, -3.8153e-04, -2.4124e-04, 1.7480e-04,\n -3.6409e-04, 2.7674e-04, 1.9325e-03, 7.7582e-04, -6.6420e-04,\n 1.4088e-03, -1.9761e-04, -1.5925e-04, -4.9099e-04, 1.4770e-03,\n 3.0950e-04, -3.0455e-04, -8.6607e-04, -2.1822e-05, -1.0428e-03,\n 4.9884e-04, 3.9876e-04, 9.7224e-04, 6.5203e-04, 1.5500e-03,\n 1.5530e-03, -4.1683e-05, 7.4969e-04, 5.3811e-04, -2.4412e-03,\n 7.3063e-07, 4.2381e-04, 8.3474e-04, -4.8433e-04, 7.4409e-04,\n -2.0279e-03, 2.2447e-04, -6.1023e-04, 2.7073e-04, 8.6038e-04,\n -2.5841e-06, -7.4819e-04, -1.3071e-05, 2.5956e-04, 2.2912e-04,\n -4.0350e-04, 7.2968e-04, -3.0014e-04, -6.4240e-04, -3.4733e-04,\n 2.4231e-04, 1.1862e-03, -2.6171e-04, -1.1810e-03, -2.6911e-04,\n -2.9698e-04, -4.7871e-04, -8.2692e-04, -3.6638e-04, 2.1629e-03,\n 2.1541e-03, -2.5978e-04, 1.5013e-04, -1.9250e-04, -7.6906e-04,\n 3.6165e-04, -8.0019e-04, 1.9492e-03, -2.0102e-04, 1.5614e-04,\n 6.6919e-04, 2.0943e-04, 4.7034e-04, 9.4502e-04, -5.3102e-04,\n 2.8658e-04, 3.9756e-04, -1.3818e-03, 3.9020e-04, -6.9791e-04,\n -1.6492e-03, 4.4889e-04, -3.3112e-05, -4.8403e-04, 1.9069e-03,\n 1.7454e-03, 6.4522e-04, -6.9987e-04, 9.8502e-04, 3.3683e-04,\n 4.3444e-04, 8.6534e-04, 3.9975e-05, -1.3401e-03, -1.5063e-04,\n 3.7311e-04, -1.0473e-03, -1.1871e-03, -1.6486e-03, -5.6669e-04,\n 1.2258e-03, -8.5051e-04, 1.6638e-03, -1.1536e-03, -5.7317e-04,\n 1.0968e-03, -7.3357e-04, -1.1562e-03, 1.5714e-03, 1.6061e-04,\n 4.1769e-04, -1.4285e-04, 1.2986e-05, 2.6280e-04, 2.1556e-03,\n -1.4475e-03, -7.4197e-04, -5.7673e-04, 2.9239e-03, 9.8857e-04,\n -1.7673e-04, 9.8178e-06, -1.2364e-03, -1.4088e-03, -3.0965e-04,\n -8.5188e-04, 3.5296e-04, -5.6539e-04, 9.1236e-04, 1.3912e-03,\n 1.6544e-03, 3.7750e-04, -2.9361e-04, 1.7388e-05, -6.0813e-04,\n 2.0862e-04, -3.9661e-04, 6.1964e-04, 9.0592e-04, 1.2188e-03,\n -5.9365e-04, 2.2502e-03, -1.2632e-03, -3.7777e-04, 9.2972e-05,\n -6.2089e-04, 9.2812e-04, -2.8615e-04, -4.6767e-05, -3.8998e-04,\n 1.2704e-03, 7.5910e-05, -4.4039e-05, -1.6599e-03, 2.7483e-04,\n 2.7817e-03, -1.5589e-04, 6.0593e-04, -6.5128e-04, 4.3618e-04,\n -7.1131e-04, -3.7160e-04, 9.6249e-04, 1.2242e-03, 1.5227e-04,\n -7.9156e-04, 9.0771e-04, 1.5182e-03, -4.0087e-04, 3.4700e-04,\n 1.0199e-03, -5.3496e-04, -6.3091e-04, 8.9449e-04, 4.8076e-04,\n -2.0257e-03, -8.2708e-04, -5.8419e-04, -6.7143e-04, -2.4253e-04,\n -1.8477e-04, -1.3258e-05, 8.7099e-04, -1.1980e-03, -2.6353e-04,\n 1.2820e-03, -5.2959e-04, 5.0364e-04, -5.6231e-05, -3.9937e-04,\n 7.4327e-05, -1.3764e-03, -5.8776e-04, -7.5745e-04, -9.0174e-04,\n 4.0639e-04, 1.7778e-05, 1.0378e-03, 4.0372e-04, -1.1756e-03,\n 9.0878e-04, -7.7144e-04, 8.2292e-04, -5.2961e-05, 9.4302e-04,\n 2.9483e-04, 6.4523e-04, 3.8701e-04, -1.1381e-03, -2.1124e-04,\n 1.8170e-04, 6.7851e-04, 1.4148e-03, -8.2511e-04, -1.2494e-03,\n 4.3251e-06, -9.4341e-05, 2.2473e-04, 2.3646e-04, 4.0208e-04,\n 5.9751e-04, 1.0549e-03, -4.0326e-04, 2.3538e-05, -9.0783e-05,\n 8.6513e-04, 4.3728e-05, -1.2984e-03, 7.0331e-04, 1.1773e-03,\n 4.5995e-04, 6.3292e-04, 4.3414e-04, 8.4390e-05, -7.0847e-04,\n 8.9718e-04, 6.7453e-04, 2.5578e-04, -8.2384e-05, 7.9017e-04,\n -2.8432e-04, -8.3189e-04, 1.0867e-03, 1.8817e-03, -7.5241e-04,\n -2.6468e-04, -4.5546e-04, 9.9847e-04, -3.4868e-04, 1.3192e-03,\n 9.3998e-04, -5.2970e-04, -5.3026e-05, -1.8368e-03, -1.4976e-03,\n 5.9842e-04, 5.3932e-04, -2.9992e-04, 9.0178e-04, -7.8338e-04,\n 5.4364e-05, 1.1448e-04, -2.1142e-03, -6.1474e-04, 1.7305e-05,\n -5.2993e-04, 5.9894e-04, 9.6531e-05, -3.7892e-04, 1.8767e-04,\n 4.8045e-04, -1.4013e-03, -9.4113e-04, -1.7669e-04, 7.8308e-04,\n -6.5611e-04, -2.4542e-04, 4.9408e-04, 7.1060e-04, 3.3822e-04,\n 6.2537e-04, -5.3297e-04, -5.2184e-04, -2.3369e-04, 2.4095e-04,\n 2.5755e-04, 1.8892e-05, -5.8866e-04, 1.3538e-03, -2.3254e-04,\n 6.4019e-04, 3.3652e-04, 1.2295e-03, -1.1056e-06, 9.1273e-04,\n -5.8692e-04, 8.1194e-04, 1.0936e-03, -4.6434e-04, 1.0179e-03,\n -6.8525e-04, 1.4916e-03, 5.4092e-04, 8.6372e-04, 2.1832e-04,\n -8.0921e-05, 1.0538e-03, -6.6769e-04, 6.0029e-04, -9.2134e-04,\n -1.6242e-03, 9.6945e-04, 6.5965e-04, -7.4092e-05, -5.2783e-04,\n 1.1310e-04, 4.0312e-05, 4.3205e-04, 9.5885e-05, -4.2719e-04,\n -8.8025e-04, -5.6920e-04, 1.6409e-03, -7.0120e-04, -7.0868e-04,\n -5.2120e-04, 5.1693e-04, 1.1323e-03, 5.2384e-05, 1.6000e-03,\n 2.0574e-04, -5.3421e-04, 4.8846e-04, -9.4358e-04, -1.3677e-04,\n 1.3407e-04, -5.0291e-04, 5.6267e-04, -1.2508e-03, 1.9506e-04,\n 1.0458e-03, -3.5259e-04, 6.5594e-04, 1.8080e-03, 2.8681e-04,\n -8.2221e-04, -6.0601e-04, 2.6523e-04, -2.7249e-04, 1.2009e-03,\n -1.3362e-03, -5.8133e-04, 1.2108e-03, 3.1431e-03, -3.7350e-05,\n -9.8317e-04, -1.0514e-03, -2.8124e-04, 7.9589e-05, 7.2151e-04,\n 5.6542e-04, 2.2791e-04, -1.7770e-04, 3.3713e-05, -2.2484e-03,\n 4.8486e-04, -6.3381e-06, 2.1711e-04, -1.0407e-03, 1.3978e-03,\n 1.7664e-04, 1.7722e-04, -8.5444e-05, 3.9211e-04, 1.4031e-03,\n -1.8068e-04, 2.8655e-05, 5.9134e-04, 2.8519e-03, 1.4076e-03,\n 8.0842e-05, 2.8437e-04, -1.0571e-03, -6.1528e-04, -4.3490e-04,\n -2.4222e-05, -1.0304e-03, -1.1066e-04, -1.1081e-03, -1.0867e-04,\n 9.9430e-04, -2.7254e-04, -1.2047e-03, -3.5937e-04, -7.7535e-05,\n 3.2651e-04, -5.2555e-04, 1.1106e-06, 1.1102e-03, -1.9872e-04,\n -1.7087e-03, 7.6501e-04, 6.9294e-04, -1.6622e-03, 1.5489e-04,\n -2.5784e-04, 1.6709e-03, 4.7291e-05, -7.6176e-04, 7.5434e-04,\n -9.8905e-04, -1.7836e-05, 2.9228e-04, -8.1060e-04, 8.3022e-06,\n 1.6217e-03, -1.1967e-03, 3.2870e-04, -2.6988e-04, -1.2788e-03,\n 5.5821e-04, 7.9114e-04, 1.0013e-04, 2.5154e-03, 3.0371e-04,\n 3.7376e-04, 2.3465e-04, 1.1493e-03, 2.0935e-03, -1.2240e-03,\n -9.5428e-04, 1.0278e-03, 2.9564e-04, 3.5360e-04, -2.9482e-04,\n -7.3262e-04, 7.3042e-05, -4.1482e-04, -2.6279e-04, 1.2840e-03,\n -1.9814e-03, 1.0678e-03, -6.5410e-04, -8.1253e-04, 7.8036e-39,\n -8.3800e-04, 5.6012e-04, 1.1379e-04, 1.7485e-05, -1.0083e-03,\n 5.7487e-04, 7.2195e-04, 3.0965e-04, -1.9486e-03, -3.9202e-04,\n -1.5899e-03, 3.7391e-04, -1.1448e-03, 1.7762e-04, 1.2542e-03,\n 6.5424e-04, 1.1417e-05, 2.4031e-04], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([1.2764e-05, 1.8477e-05, 1.4345e-05, 1.0521e-05, 9.4743e-06, 8.7672e-06,\n 1.8576e-05, 1.7104e-05, 1.3123e-05, 6.4969e-06, 2.8215e-06, 5.3649e-06,\n 1.1419e-05, 9.2627e-06, 7.4592e-06, 3.7535e-05, 1.2518e-05, 1.6824e-05,\n 1.3926e-05, 1.9391e-05, 1.4920e-05, 1.5336e-05, 2.9018e-05, 8.6969e-06,\n 2.6978e-05, 8.1793e-06, 1.2336e-05, 1.0469e-05, 1.9187e-05, 9.2304e-06,\n 1.6392e-05, 1.0754e-05, 8.0279e-06, 6.2738e-06, 9.1422e-06, 7.6309e-06,\n 8.4286e-06, 1.1842e-05, 2.2284e-05, 1.2351e-05, 2.1943e-05, 6.0401e-06,\n 1.8671e-05, 1.5675e-05, 1.1420e-05, 1.6412e-05, 8.2982e-06, 1.8312e-05,\n 2.6657e-05, 2.2955e-05, 1.9984e-05, 1.4920e-05, 1.8747e-05, 1.5516e-05,\n 9.4472e-06, 1.6257e-05, 1.5705e-05, 1.9235e-05, 1.5567e-05, 6.6497e-06,\n 1.1670e-05, 7.7761e-06, 1.4911e-05, 1.7074e-05, 1.6063e-05, 1.8112e-05,\n 9.9975e-06, 2.0761e-05, 1.5937e-05, 1.5703e-05, 1.2338e-05, 1.3191e-05,\n 1.1960e-05, 1.5055e-05, 1.7458e-05, 1.0971e-05, 1.1859e-05, 1.5336e-05,\n 1.2601e-05, 1.1026e-05, 1.8018e-05, 1.7845e-05, 1.1690e-05, 8.3050e-06,\n 1.3413e-05, 9.9617e-06, 2.4789e-05, 2.0173e-05, 1.0625e-05, 2.6938e-06,\n 6.7568e-06, 1.6162e-05, 1.4445e-05, 2.3489e-05, 1.3409e-05, 1.3717e-05,\n 1.0455e-05, 8.4311e-06, 1.0949e-05, 8.0115e-06, 1.3215e-05, 1.8352e-05,\n 8.8940e-06, 8.3323e-06, 4.1268e-06, 5.9558e-06, 1.3767e-05, 2.2363e-05,\n 4.5534e-06, 1.1708e-05, 2.3235e-05, 2.9159e-05, 1.4946e-05, 1.6431e-05,\n 8.6755e-06, 2.3046e-05, 1.3923e-05, 1.3341e-05, 1.4622e-05, 1.3787e-05,\n 9.0721e-06, 1.1294e-05, 5.1727e-06, 1.6874e-05, 2.4050e-05, 1.8453e-05,\n 1.3044e-07, 1.3867e-05, 8.9345e-06, 6.8767e-06, 7.9126e-06, 1.9920e-05,\n 1.5479e-05, 7.3443e-06, 7.8019e-06, 1.8045e-05, 1.0036e-05, 9.6249e-06,\n 2.1013e-05, 7.8672e-06, 1.4908e-05, 1.6364e-05, 1.2489e-05, 1.5137e-05,\n 5.6465e-06, 1.5390e-05, 8.6744e-06, 6.2604e-06, 1.6557e-05, 1.6011e-05,\n 1.6377e-05, 1.3850e-05, 1.6355e-05, 1.8712e-05, 1.2759e-05, 3.3261e-05,\n 2.2886e-05, 2.2272e-05, 2.0688e-05, 7.2818e-06, 1.5768e-05, 1.2865e-05,\n 1.0303e-05, 1.7082e-05, 2.7548e-05, 9.9059e-06, 1.6889e-05, 1.9681e-05,\n 1.3950e-05, 2.3781e-05, 1.5970e-05, 1.8121e-05, 6.0326e-06, 1.0060e-05,\n 9.1804e-06, 2.0136e-05, 1.8478e-05, 1.1246e-05, 5.6694e-06, 1.4990e-05,\n 1.6395e-05, 1.1192e-05, 7.0845e-06, 8.7534e-06, 1.3396e-05, 1.8395e-05,\n 1.3421e-05, 7.2901e-06, 1.4254e-05, 1.6133e-05, 7.7305e-06, 5.1291e-06,\n 8.0367e-06, 1.7324e-05, 1.9594e-05, 1.3117e-05, 9.8610e-06, 1.9595e-05,\n 2.5076e-05, 1.2809e-05, 8.7907e-06, 3.1610e-06, 8.9788e-06, 1.5776e-05,\n 1.2367e-05, 1.3474e-05, 1.7627e-05, 1.1368e-05, 1.3993e-05, 1.5422e-05,\n 1.0845e-05, 2.0622e-05, 1.0583e-05, 1.3519e-05, 1.4919e-05, 9.5614e-06,\n 1.4545e-05, 1.7342e-05, 2.1007e-05, 6.7365e-06, 1.7298e-05, 1.7492e-05,\n 2.1002e-05, 1.4926e-05, 1.8035e-05, 1.7905e-05, 1.3301e-05, 7.9620e-06,\n 9.0996e-06, 5.9695e-06, 9.5745e-06, 6.2749e-06, 1.1379e-05, 9.8811e-06,\n 1.7054e-05, 2.3306e-05, 1.4291e-05, 1.4551e-05, 1.1296e-05, 1.4006e-05,\n 1.6749e-05, 3.5766e-06, 9.9468e-06, 1.6476e-05, 9.7492e-06, 7.8953e-06,\n 7.0322e-06, 8.0371e-06, 1.3497e-05, 6.4544e-06, 9.6091e-06, 4.8994e-06,\n 1.3694e-05, 1.1358e-05, 1.2709e-05, 1.7610e-05, 1.1749e-05, 2.0738e-05,\n 1.8814e-05, 9.6859e-06, 1.4661e-05, 1.2833e-05, 9.6204e-06, 9.9148e-07,\n 1.4877e-05, 1.1962e-05, 1.3668e-05, 1.1828e-05, 1.1080e-05, 7.1062e-06,\n 2.0325e-05, 1.8767e-05, 1.5729e-05, 7.3756e-06, 9.5756e-06, 9.7018e-06,\n 9.2670e-06, 2.0787e-05, 1.2156e-05, 1.8015e-05, 1.5323e-05, 6.5185e-06,\n 1.6457e-05, 1.7169e-05, 1.8286e-05, 1.4628e-05, 1.3801e-05, 7.4992e-06,\n 1.2779e-05, 2.1662e-05, 3.5822e-06, 1.8322e-05, 1.6071e-05, 1.3056e-05,\n 2.0116e-05, 1.8284e-05, 1.3637e-05, 8.9731e-06, 1.0787e-05, 1.7947e-05,\n 3.5430e-06, 1.1624e-05, 2.3360e-05, 1.2050e-05, 2.2200e-05, 5.3436e-06,\n 8.0774e-06, 8.3959e-06, 9.5905e-06, 9.5850e-06, 1.4033e-05, 1.4792e-05,\n 1.2103e-05, 1.5008e-05, 8.4658e-06, 1.6972e-05, 1.7182e-05, 8.8229e-06,\n 1.4177e-05, 1.2267e-05, 2.3714e-05, 1.7193e-05, 8.3598e-06, 1.7724e-05,\n 1.3896e-05, 2.5815e-05, 2.0340e-05, 1.1724e-05, 7.6579e-06, 1.6946e-05,\n 9.0413e-06, 1.6940e-05, 1.9358e-05, 1.3666e-05, 2.2182e-05, 1.2374e-08,\n 1.4891e-05, 1.5755e-05, 7.8171e-06, 1.9489e-05, 2.2247e-05, 9.3583e-06,\n 6.4501e-06, 9.1279e-06, 2.3672e-05, 1.2875e-07, 1.2452e-05, 1.1108e-05,\n 1.8915e-05, 8.6068e-06, 8.6544e-06, 6.2805e-06, 2.3932e-05, 1.6767e-05,\n 1.8119e-05, 1.5032e-05, 2.5069e-05, 1.3409e-05, 1.8856e-05, 1.7881e-05,\n 7.5441e-06, 1.1867e-05, 2.3108e-05, 1.2088e-05, 1.6034e-05, 1.7181e-05,\n 7.5536e-06, 1.4430e-05, 1.0528e-05, 1.3438e-05, 1.6294e-05, 1.3540e-05,\n 2.4623e-05, 1.4424e-05, 1.4365e-05, 1.5132e-05, 1.0294e-05, 1.1521e-05,\n 1.3062e-05, 1.7066e-05, 6.9418e-06, 1.7611e-05, 5.2546e-06, 1.3924e-05,\n 9.9329e-06, 3.6056e-05, 8.4073e-06, 1.5613e-05, 1.4842e-05, 1.5454e-05,\n 1.8634e-05, 1.1082e-05, 1.2567e-05, 1.5613e-05, 1.9892e-05, 1.3438e-05,\n 4.7781e-06, 2.2481e-05, 1.2155e-05, 1.8293e-05, 2.5169e-05, 5.2857e-06,\n 1.8737e-05, 1.9758e-05, 8.4869e-06, 1.7710e-05, 1.3177e-05, 2.0309e-05,\n 1.6941e-05, 8.0899e-06, 1.2952e-05, 1.1301e-05, 1.4383e-05, 1.3373e-05,\n 6.5560e-06, 2.1982e-05, 1.6969e-05, 1.9507e-05, 1.8218e-05, 6.5796e-06,\n 1.2440e-05, 1.3380e-05, 1.4644e-05, 2.4431e-05, 1.6294e-05, 1.2522e-05,\n 1.5920e-05, 1.7470e-05, 1.4703e-05, 1.2608e-05, 6.9901e-06, 3.4990e-06,\n 8.9700e-06, 9.3940e-06, 1.5964e-05, 1.3623e-05, 1.9786e-06, 7.5659e-06,\n 1.7035e-05, 1.0914e-05, 1.4667e-05, 1.7917e-05, 6.0939e-06, 1.3580e-05,\n 1.9434e-05, 7.3742e-06, 2.3377e-05, 1.6401e-05, 8.1055e-06, 7.8027e-06,\n 1.0017e-05, 1.3460e-05, 1.8137e-05, 2.4474e-05, 1.5211e-05, 1.0916e-05,\n 7.3318e-06, 1.6494e-05, 1.1088e-05, 5.0212e-06, 1.6702e-05, 1.4351e-05,\n 1.2223e-05, 1.5173e-05, 1.5592e-05, 8.1230e-06, 1.8440e-05, 1.7433e-05,\n 1.6475e-05, 1.2959e-05, 1.3742e-05, 1.0527e-05, 9.8001e-06, 1.3985e-05,\n 1.3460e-05, 9.2773e-06, 2.1029e-05, 1.1043e-05, 1.4603e-05, 5.2153e-06,\n 1.3842e-05, 1.6292e-05, 1.8581e-05, 1.7293e-05, 1.8536e-05, 1.0574e-05,\n 1.6289e-05, 1.4934e-05, 1.5867e-05, 1.8607e-05, 1.3275e-05, 1.7563e-05,\n 3.5740e-06, 7.7058e-06, 1.3950e-05, 1.1108e-05, 2.2269e-05, 1.6980e-05,\n 2.0291e-05, 8.3315e-06, 1.9871e-05, 1.9144e-05, 1.3777e-05, 1.5985e-05,\n 1.3703e-05, 7.7642e-06, 5.8474e-06, 1.9852e-05, 1.2343e-05, 1.0345e-05,\n 1.4800e-05, 5.0925e-06, 4.0782e-06, 2.0144e-05, 1.1197e-05, 6.2929e-06,\n 1.6884e-05, 1.7490e-05, 1.0374e-05, 1.9681e-05, 1.9613e-05, 1.3036e-05,\n 1.3712e-05, 1.3536e-05, 1.8790e-05, 9.5395e-06, 8.3583e-06, 1.4146e-05,\n 1.4040e-05, 1.5749e-05, 1.3914e-05, 9.7474e-06, 6.5225e-06, 1.8177e-05,\n 2.1148e-05, 1.7063e-05, 1.4682e-05, 2.7384e-05, 6.1363e-06, 1.4183e-05,\n 2.1412e-05, 1.8771e-05, 1.3552e-05, 7.8462e-06, 1.1646e-05, 1.4228e-05,\n 9.1110e-06, 1.7250e-05, 1.8878e-05, 1.4838e-05, 1.6414e-05, 1.9819e-05,\n 1.2848e-05, 7.5440e-06, 1.5671e-05, 1.0660e-05, 1.5065e-05, 1.8753e-05,\n 2.6864e-05, 2.4163e-05, 1.3501e-05, 6.4894e-06, 9.7936e-06, 2.0941e-05,\n 1.4427e-05, 7.6370e-06, 1.3987e-05, 2.3006e-05, 8.4320e-06, 9.4453e-06,\n 1.3531e-05, 9.3762e-06, 1.5435e-07, 1.5848e-05, 1.6629e-05, 9.1558e-06,\n 1.0200e-05, 1.2112e-05, 6.4585e-06, 1.3203e-05, 7.7648e-06, 1.0226e-05,\n 1.7464e-05, 1.4702e-05, 1.1262e-05, 1.8855e-06, 1.6480e-05, 1.2685e-05,\n 1.9155e-05, 5.2134e-06, 7.9934e-06, 9.6623e-06, 7.8758e-06, 1.6139e-05,\n 1.2202e-05, 1.7655e-05, 7.2711e-06, 1.4871e-05, 9.6478e-06, 1.7357e-05,\n 2.0892e-05, 1.8899e-05, 9.7521e-06, 1.6541e-05, 1.7305e-05, 1.0176e-05,\n 1.5983e-05, 2.0169e-05, 1.6354e-05, 6.6553e-06, 1.7282e-05, 1.5678e-05,\n 7.3218e-06, 1.5266e-05, 2.3784e-05, 1.6783e-05, 9.7114e-06, 1.1834e-05,\n 1.3696e-05, 1.9128e-05, 1.3893e-05, 1.0542e-05, 1.1852e-05, 7.9272e-06,\n 1.6619e-05, 7.8633e-06, 5.9261e-06, 7.6096e-06, 1.3789e-05, 1.4143e-05,\n 9.2947e-06, 1.0572e-05, 1.9570e-05, 1.1497e-05, 2.3273e-05, 1.4583e-05,\n 1.7238e-05, 1.3268e-05, 1.0583e-05, 1.7329e-05, 1.4748e-05, 1.8787e-05,\n 1.9857e-05, 8.9648e-06, 9.0161e-06, 2.7240e-05, 1.9648e-05, 1.5905e-05,\n 2.2032e-05, 1.3959e-05, 1.0672e-05, 2.1937e-05, 5.8185e-06, 1.6647e-05,\n 2.4160e-05, 1.2309e-05, 8.5937e-06, 1.6457e-05, 2.7360e-05, 6.6777e-06,\n 1.6849e-05, 1.3613e-05, 1.9003e-05, 1.7306e-05, 9.4128e-06, 2.3602e-05,\n 1.2917e-05, 1.8897e-05, 1.6195e-05, 1.7967e-05, 1.5510e-05, 1.3196e-05,\n 4.2830e-06, 1.0420e-05, 3.1283e-05, 1.5390e-05, 1.4401e-05, 1.5304e-05,\n 1.1718e-05, 1.3981e-05, 5.8972e-06, 6.9792e-06, 1.9126e-05, 1.6436e-05,\n 1.0994e-05, 7.6629e-06, 1.3978e-05, 1.6482e-05, 1.3773e-05, 1.0305e-05,\n 1.9492e-05, 1.3390e-05, 9.6950e-06, 8.8627e-06, 1.5267e-05, 1.0732e-05,\n 8.6257e-06, 9.8280e-06, 1.3291e-05, 1.1644e-05, 9.2442e-06, 1.3444e-05,\n 1.7419e-05, 1.2303e-05, 9.1955e-06, 1.5041e-05, 9.4775e-06, 1.3770e-05,\n 7.0643e-06, 9.0284e-06, 1.8779e-05, 1.6672e-05, 7.5505e-06, 2.5854e-05,\n 8.6435e-06, 1.0169e-05, 1.5999e-05, 6.9199e-06, 1.0854e-05, 7.5112e-06,\n 1.1304e-05, 1.7214e-05, 1.3659e-05, 1.1107e-05, 7.9862e-06, 1.5609e-05,\n 8.6370e-06, 2.4930e-05, 1.2505e-05, 1.0032e-05, 1.6345e-05, 9.7866e-06,\n 1.7588e-05, 1.1339e-05, 1.4857e-05, 1.8047e-05, 1.1552e-05, 6.5783e-06,\n 1.6376e-05, 8.5012e-06, 9.0784e-06, 1.4048e-05, 1.0008e-05, 7.0845e-06,\n 1.3109e-05, 1.2622e-05, 1.4988e-05, 1.3331e-05, 1.4150e-05, 3.5591e-10,\n 1.0200e-05, 2.4263e-05, 2.2971e-05, 1.1757e-05, 1.6873e-05, 1.6255e-05,\n 1.5715e-05, 7.4529e-06, 1.1621e-05, 1.0848e-05, 1.4682e-05, 1.3537e-05,\n 1.7388e-05, 1.3816e-05, 1.6372e-05, 1.7529e-05, 1.5847e-05, 1.6624e-05],\n device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(7508.)",
17
+ "exp_avg": "tensor([[-4.5259e-06, -1.5647e-05, -1.1717e-08, ..., -2.1334e-05,\n 7.5175e-07, -6.0642e-07],\n [ 1.4238e-05, 6.3289e-05, -8.5907e-06, ..., 2.1653e-04,\n -8.0629e-06, 6.7867e-07],\n [-5.3978e-05, -7.5982e-05, -3.5889e-06, ..., -2.8026e-05,\n -1.1446e-05, 1.4358e-05],\n ...,\n [-1.4725e-06, 2.7038e-06, -6.9644e-05, ..., 3.9392e-05,\n 1.7441e-05, 3.7764e-05],\n [ 1.7311e-05, 5.6579e-05, -2.3671e-05, ..., 7.2830e-06,\n -6.7597e-06, -1.1687e-04],\n [ 2.1513e-07, 1.8281e-06, 4.6479e-05, ..., 1.7764e-05,\n 4.7428e-06, -1.2307e-04]], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([[9.0945e-09, 2.4544e-08, 4.1298e-09, ..., 3.9658e-09, 3.1240e-09,\n 2.1059e-09],\n [1.0183e-08, 3.7699e-08, 1.6068e-08, ..., 9.9429e-07, 3.2798e-09,\n 6.0409e-10],\n [8.9439e-09, 3.1616e-08, 6.2091e-08, ..., 3.2841e-08, 8.2836e-09,\n 2.6426e-09],\n ...,\n [1.1700e-09, 1.1083e-08, 9.0797e-09, ..., 6.1321e-09, 8.0530e-09,\n 2.4391e-08],\n [2.4424e-09, 1.0429e-08, 3.3199e-08, ..., 9.2736e-10, 5.1868e-09,\n 2.7540e-07],\n [2.5705e-09, 5.2274e-09, 3.7639e-08, ..., 4.6385e-09, 2.2005e-08,\n 5.7822e-07]], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(7508.)",
22
+ "exp_avg": "tensor([-1.6508e-04, 1.8053e-04, -1.1532e-04, 2.3541e-07, 5.6052e-45,\n 6.1585e-05, 5.6052e-45, 1.9189e-04, 2.0200e-05, 2.0373e-04,\n 2.4177e-05, -3.3207e-04, -5.5630e-05, 8.8049e-06, 3.3914e-05,\n 9.6633e-05, 5.6052e-45, -1.7360e-04, -2.6709e-06, -6.3075e-05,\n 5.6052e-45, 5.6052e-45, 5.6052e-45, -1.0032e-04, -2.6039e-04,\n -1.6923e-04, 5.7898e-05, 6.0785e-06, 7.9942e-05, 5.6052e-45,\n -8.9747e-05, 5.6052e-45, 3.4118e-05, -2.5305e-06, -1.2177e-04,\n -7.5763e-05, 4.4636e-31, 5.6052e-45, 5.6052e-45, 1.9315e-04,\n 1.5419e-04, 5.6052e-45, -1.0187e-04, 5.6052e-45, 5.6052e-45,\n 2.2290e-04, -2.8689e-05, -7.0275e-05, -1.1514e-04, 2.5204e-04,\n 5.6052e-45, 1.2984e-04, 5.6052e-45, -4.7268e-05, -2.3324e-05,\n -1.2736e-04, 2.8440e-05, 5.6052e-45, -2.1979e-04, 2.1320e-05,\n -2.1496e-05, 2.0068e-04, 1.5408e-04, 2.4613e-04, -1.9839e-04,\n -8.7921e-06, 1.5129e-04, 1.8157e-06, 1.6523e-05, -1.1995e-05,\n -6.8061e-06, 1.9974e-04, -2.6681e-05, -1.4036e-04, 9.1058e-05,\n -9.2803e-05, -1.9215e-04, 5.6052e-45, 5.6052e-45, -3.7189e-05,\n 5.7948e-05, -1.8475e-04, -3.1559e-05, 1.0398e-04, 2.6702e-05,\n 1.2827e-04, -6.0632e-05, -5.7436e-05, 8.3283e-05, 5.6052e-45,\n 5.8204e-05, 6.0541e-05, 2.4946e-05, -6.3652e-05, 1.3549e-04,\n 5.6052e-45, -1.8558e-04, -1.4316e-04, -3.5980e-05, 1.5341e-05,\n -2.8016e-05, 5.6052e-45, -1.1583e-04, 4.6182e-05, 1.1594e-04,\n -6.4912e-05, -2.7751e-04, 2.9455e-04, -3.6532e-05, 1.2909e-04,\n -3.8969e-04, -4.9221e-05, -2.9352e-05, -1.1714e-05, 2.3369e-04,\n -1.7662e-05, -3.2708e-07, -3.9678e-05, 5.6052e-45, 1.2784e-05,\n 8.0707e-05, 6.8648e-05, 4.2315e-05, 3.9851e-04, -4.2072e-05,\n -1.9583e-07, 1.1626e-04, -2.5578e-04, 3.4011e-05, -6.3134e-06,\n 2.2689e-05, 5.6052e-45, -7.7050e-05, 9.8078e-05, 4.7545e-18,\n 1.9704e-04, 4.1383e-05, 5.9384e-05, -4.3333e-05, -6.7427e-05,\n -5.1606e-05, -9.8125e-05, 5.6052e-45, 3.0668e-06, 5.6052e-45,\n 5.6052e-45, 5.6052e-45, -8.3734e-05, 5.6052e-45, -2.7152e-04,\n 9.6525e-06, -3.5045e-07, 1.8702e-04, 5.6052e-45, 9.5843e-05,\n 5.6052e-45, -1.4542e-04, 1.9691e-04, 4.0157e-05, -8.6291e-05,\n 1.0797e-04, -4.4193e-05, 1.6502e-04, -3.7908e-05, 2.4358e-05,\n 1.0322e-05, 5.6052e-45, 5.6052e-45, -3.5352e-05, 5.6052e-45,\n 1.6321e-04, -8.6995e-05, 5.6052e-45, -8.2218e-06, 6.8470e-05,\n -3.5386e-04, -1.2224e-04, 5.6052e-45, 5.2812e-05, 1.3312e-04,\n 5.6052e-45, 2.5513e-04, -3.3911e-05, -1.7354e-04, 1.7532e-04,\n -6.5447e-05, 5.6052e-45, 5.0447e-44, 1.6588e-04, 2.1246e-04,\n -6.3220e-05, 5.5230e-06, 7.2838e-05, 5.6052e-45, 9.8657e-05,\n 1.2526e-04, 8.3620e-05, -1.0592e-04, 1.0740e-04, 5.6052e-45,\n 5.6052e-45, -2.5575e-05, 1.8520e-04, -8.5502e-05, -1.5756e-04,\n -6.5591e-05, 5.6052e-45, 5.6052e-45, 7.9165e-05, 6.1014e-05,\n 1.1286e-04, -3.5744e-04, -1.7194e-05, -7.8100e-05, 5.1070e-05,\n 8.1805e-05, -4.3803e-05, 4.5956e-05, 5.6052e-45, 5.6052e-45,\n 2.8675e-04, -9.1088e-05, 4.1233e-17, -1.6762e-04, -2.7000e-05,\n 2.6455e-05, 1.7842e-04, 4.6447e-05, 7.0917e-05, 3.1414e-04,\n 6.6623e-05, 2.3830e-04, 5.6052e-45, -2.4703e-04, -3.5667e-05,\n 2.3304e-04, 5.6052e-45, 1.5095e-04, 5.6052e-45, -2.3781e-05,\n 5.6052e-45, 1.7679e-04, 1.1482e-04, -8.7360e-05, -5.2061e-05,\n 5.6052e-45, -1.4405e-04, 1.7258e-04, 6.9953e-05, -7.9862e-05,\n 5.7113e-05, -2.8222e-05, -6.3930e-05, -1.3060e-04, 1.8094e-05,\n -1.8658e-05, 2.0120e-04, 5.6052e-45, 6.8660e-05, 4.9508e-05,\n 1.7602e-04, 1.4410e-04, -1.2065e-04, -1.6836e-05, -4.0512e-05,\n -1.2154e-04, 1.1790e-06, -1.9638e-05, 5.6052e-45, -1.5797e-04,\n 1.6893e-06, 5.6052e-45, -3.9947e-05, -2.1589e-04, -1.6877e-04,\n 5.4543e-05, -2.2506e-04, -1.8659e-05, -2.7988e-04, -1.0204e-04,\n -1.7266e-06, -3.7595e-04, 5.6052e-45, 2.9067e-38, 1.6259e-04,\n -1.4764e-04, -3.5186e-04, -3.1876e-04, 2.4057e-04, 3.8142e-05,\n -1.7640e-04, 7.2290e-05, 1.8320e-05, -2.7089e-04, 1.1097e-04,\n 5.6052e-45, -3.0757e-04, 1.0548e-04, -1.2897e-05, -1.0249e-04,\n 6.6737e-05, -9.9560e-06, -4.7809e-05, -2.2668e-04, 5.6052e-45,\n -1.2415e-05, 2.4577e-05, 5.6052e-45, 2.1218e-04, 9.5567e-05,\n 5.2389e-05, -1.1694e-04, -1.8852e-04, 9.4969e-05, 5.6052e-45,\n 1.1395e-04, -1.9571e-04, -1.3431e-04, 5.6052e-45, 2.3248e-04,\n -1.6531e-04, 1.5653e-04, 4.4158e-06, -3.3431e-05, 5.6052e-45,\n -7.5095e-05, -3.1739e-04, 3.8762e-05, -5.9789e-04, 1.8955e-04,\n -1.6724e-04, -1.0351e-05, 1.1951e-04, -7.1127e-05, -1.8099e-04,\n -1.5896e-04, 1.4604e-04, 5.6052e-45, -9.5430e-05, 1.3757e-04,\n -2.4392e-05, 8.0499e-05, -6.9408e-05, -2.8591e-04, 5.6052e-45,\n -1.1269e-04, 2.0331e-04, 5.6052e-45, -6.4151e-05, 4.6601e-22,\n 1.0851e-04, 2.1298e-04, 5.6052e-45, 5.6052e-45, -1.7219e-04,\n 1.5578e-04, -1.7786e-04, 1.9785e-04, 1.4506e-04, -3.2119e-04,\n 8.4059e-06, 5.6052e-45, 3.9045e-05, 1.9216e-04, -4.0136e-05,\n 1.3297e-04, 2.4836e-04, 1.7421e-04, 5.6052e-45, 5.6052e-45,\n 5.6052e-45, 1.5110e-04, 4.2422e-05, 5.6052e-45, -2.0882e-05,\n -8.6237e-05, -1.3106e-04, -1.1062e-05, 1.7950e-04, -9.1653e-05,\n -2.5108e-04, 5.6052e-45, -6.5619e-05, 5.6052e-45, -3.3992e-05,\n -2.7829e-04, 1.6437e-04, -1.4437e-05, 2.8147e-04, 4.5149e-05,\n -2.9547e-05, 8.0850e-06, 1.2178e-04, 5.6052e-45, 1.0660e-04,\n 1.0978e-04, -1.7577e-04, -2.0178e-05, 5.6052e-45, 5.6052e-45,\n 2.0705e-04, -2.8388e-05, -2.1606e-05, -1.9174e-04, -3.1283e-05,\n 1.7254e-04, -1.9267e-05, -7.0006e-05, -9.1704e-05, 7.7505e-05,\n 1.5613e-04, -2.6790e-05, 8.7655e-05, 5.6052e-45, -7.0084e-05,\n -3.1935e-05, -1.4106e-04, 1.9441e-05, -1.1807e-04, -6.9797e-05,\n -1.7819e-04, 6.2370e-05, 1.9031e-04, -1.7595e-04, 5.6052e-45,\n 1.6391e-04, -3.1167e-04, 2.0189e-35, -1.2336e-05, 1.5648e-04,\n -5.4012e-05, -1.0706e-04, 1.1349e-04, 5.6052e-45, -1.7095e-04,\n -3.6391e-05, 1.2762e-04, 1.0319e-04, 9.2303e-06, -5.6678e-05,\n -6.0437e-05, -1.8584e-04, 1.5105e-04, -4.0843e-05, 5.6052e-45,\n 1.8711e-04, -7.6472e-05, -3.5523e-05, 9.0769e-05, 2.3357e-04,\n -7.1382e-05, 5.6052e-45, 2.5976e-05, 1.9990e-04, 1.0411e-04,\n 1.7328e-05, 5.6052e-45, -7.9901e-05, 3.5516e-05, -1.0213e-04,\n -2.4992e-04, 1.3348e-04, 5.6052e-45, -3.3571e-05, 5.6052e-45,\n 1.3557e-04, -2.4574e-04, -2.2142e-19, -6.8516e-05, -1.3706e-04,\n -1.2523e-04, 9.3312e-05, 9.5337e-05, 1.2351e-05, -1.5749e-04,\n 1.6693e-05, 4.2700e-05, -4.9888e-05, 1.9073e-04, 5.6052e-45,\n -2.2172e-05, -2.1549e-05, 1.8529e-04, 1.1455e-05, -2.2805e-05,\n -3.7302e-04, -7.0822e-05, 8.7923e-05, 1.3980e-04, -2.8533e-05,\n -6.9635e-05, 5.6052e-45, -7.4307e-05, 1.3329e-04, 5.6052e-45,\n -1.4578e-04, 1.7583e-04, 5.6052e-45, -6.5838e-05, -3.5392e-05,\n 5.6052e-45, -1.1051e-04, 1.6772e-04, 1.3826e-04, -2.9475e-04,\n -5.2917e-05, -2.0315e-05, 5.6052e-45, -1.9992e-04, -1.5398e-04,\n -1.9565e-04, -9.6845e-05], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([4.4163e-07, 1.1356e-06, 7.1210e-07, 8.2343e-07, 6.6057e-08, 4.7957e-07,\n 4.0221e-09, 6.5994e-07, 2.7498e-07, 6.9736e-07, 8.2684e-07, 4.9636e-07,\n 2.3576e-07, 1.0521e-06, 8.3886e-07, 6.5498e-07, 5.1368e-08, 6.5543e-07,\n 3.5165e-07, 4.2019e-07, 5.9618e-08, 6.9199e-10, 2.4856e-08, 7.0340e-07,\n 7.4572e-07, 4.7067e-07, 1.4863e-07, 7.6130e-07, 4.2416e-07, 5.2307e-10,\n 7.0671e-07, 6.9572e-08, 4.0319e-07, 1.0389e-06, 6.4432e-07, 4.4746e-07,\n 1.2325e-08, 1.1840e-08, 6.6520e-10, 7.4583e-07, 2.6781e-07, 2.7501e-11,\n 7.5224e-07, 4.3041e-09, 9.4148e-10, 4.3634e-07, 6.5703e-07, 2.5414e-07,\n 5.4926e-07, 6.0703e-07, 2.9563e-09, 6.7343e-07, 9.3529e-09, 6.2477e-07,\n 4.4835e-07, 5.4136e-07, 4.3083e-07, 2.0401e-12, 4.8838e-07, 5.4429e-09,\n 4.8361e-07, 4.5096e-07, 8.8157e-07, 5.7115e-07, 5.3318e-07, 3.6277e-07,\n 3.8676e-07, 4.3916e-07, 5.7979e-07, 7.3605e-07, 4.1439e-07, 5.2336e-07,\n 2.4671e-07, 8.0232e-07, 8.1804e-07, 8.4328e-07, 7.3968e-07, 2.9838e-13,\n 3.7643e-09, 4.9951e-07, 4.6739e-07, 8.5897e-07, 5.5824e-07, 4.8456e-07,\n 6.3872e-07, 1.9575e-07, 1.8851e-07, 6.1956e-07, 3.3528e-07, 7.6918e-09,\n 4.3295e-07, 3.5615e-07, 3.2591e-07, 6.3765e-07, 7.7525e-07, 5.2148e-10,\n 4.6283e-07, 3.9023e-07, 2.0086e-07, 3.9787e-07, 4.5684e-07, 3.2488e-10,\n 6.4199e-07, 6.1850e-07, 2.5208e-07, 5.0790e-07, 5.9250e-07, 5.5304e-07,\n 2.8871e-07, 6.8658e-07, 8.3283e-07, 8.0918e-07, 6.8772e-07, 3.6097e-07,\n 1.1155e-06, 3.9538e-07, 1.5094e-08, 5.1418e-07, 5.7128e-09, 4.4700e-07,\n 2.7724e-07, 5.5502e-07, 3.8257e-07, 6.1223e-07, 6.4449e-07, 5.1632e-10,\n 4.6253e-07, 9.0706e-07, 2.9204e-07, 6.8439e-07, 3.4476e-07, 1.7953e-10,\n 6.2842e-07, 5.9450e-07, 5.7136e-08, 5.1057e-07, 6.2821e-08, 5.5297e-07,\n 5.5918e-07, 6.1221e-07, 4.8893e-07, 3.3861e-07, 4.2004e-09, 1.9802e-07,\n 6.5077e-09, 3.2018e-08, 9.9649e-11, 2.4608e-07, 4.2169e-10, 7.3679e-07,\n 3.6265e-07, 7.4677e-07, 4.3953e-07, 1.8814e-08, 4.4842e-07, 1.7393e-10,\n 4.8057e-07, 2.7111e-07, 2.8201e-07, 4.1883e-07, 2.1658e-07, 6.0628e-07,\n 6.0222e-07, 1.1785e-07, 5.7850e-07, 2.2809e-07, 1.2036e-07, 1.2399e-09,\n 4.7198e-07, 7.9023e-09, 5.5323e-07, 4.4516e-07, 9.7336e-09, 8.6720e-08,\n 3.9544e-07, 4.4136e-07, 2.3799e-07, 5.4345e-08, 1.6005e-07, 4.8177e-07,\n 2.4113e-08, 5.6013e-07, 6.6214e-07, 8.6315e-07, 7.7098e-07, 9.6422e-07,\n 2.4332e-10, 1.8076e-09, 6.4699e-07, 4.2942e-07, 3.7425e-07, 6.7946e-07,\n 6.8386e-07, 3.2355e-08, 5.9725e-07, 7.4077e-07, 2.9320e-07, 5.0111e-07,\n 9.1384e-07, 4.8003e-08, 1.2127e-08, 3.7473e-07, 6.2571e-07, 5.7175e-07,\n 6.2018e-07, 3.0393e-07, 3.0051e-08, 5.0563e-09, 5.5074e-07, 5.7699e-07,\n 8.5837e-07, 5.8580e-07, 4.7985e-07, 6.8530e-07, 3.6369e-07, 4.0570e-07,\n 2.9025e-07, 3.9339e-07, 5.8291e-09, 8.9261e-08, 7.0963e-07, 3.8059e-07,\n 1.0079e-07, 5.1535e-07, 5.4701e-07, 3.4858e-07, 5.9775e-07, 7.1174e-07,\n 2.7987e-07, 5.3547e-07, 6.2810e-07, 6.5387e-07, 1.6469e-08, 7.0579e-07,\n 4.2052e-07, 5.7849e-07, 3.8133e-09, 6.6725e-07, 1.3273e-08, 6.0252e-07,\n 2.2145e-08, 4.1518e-07, 4.3204e-07, 3.7031e-07, 2.6107e-07, 4.5734e-08,\n 3.6708e-07, 7.5238e-07, 3.4923e-07, 3.7465e-07, 8.7547e-07, 8.3143e-07,\n 7.7318e-07, 7.4792e-07, 7.1253e-07, 8.6251e-07, 5.7584e-07, 1.2010e-08,\n 6.4618e-07, 3.6975e-07, 5.0936e-07, 7.1028e-07, 4.6454e-07, 3.5768e-07,\n 3.9482e-07, 6.9307e-07, 8.1228e-07, 8.2493e-09, 1.1108e-09, 5.5193e-07,\n 4.3051e-07, 1.6464e-10, 6.3556e-07, 1.7012e-07, 2.9778e-07, 8.2316e-07,\n 4.2347e-07, 7.9374e-07, 7.8054e-07, 1.1409e-06, 1.8682e-08, 4.4288e-07,\n 3.8114e-08, 4.1325e-10, 7.8151e-07, 8.6333e-07, 1.0779e-06, 9.6114e-07,\n 8.0258e-07, 3.0205e-07, 4.9567e-07, 5.5537e-07, 2.6281e-07, 6.3604e-07,\n 6.5029e-07, 1.4394e-08, 6.6437e-07, 4.9637e-08, 3.3453e-08, 7.6939e-07,\n 6.0887e-07, 6.3207e-07, 3.5647e-07, 7.1045e-07, 9.7462e-11, 5.5279e-07,\n 5.1846e-07, 2.2841e-09, 4.9099e-07, 1.9583e-07, 4.8791e-07, 9.1948e-07,\n 6.4251e-07, 6.3165e-07, 1.1799e-09, 4.4388e-07, 4.8200e-07, 5.3087e-08,\n 3.0450e-08, 8.0550e-07, 3.7219e-07, 5.5511e-07, 5.6846e-07, 6.9394e-07,\n 3.8773e-08, 5.8914e-07, 5.8997e-07, 5.4665e-07, 8.6423e-07, 5.4833e-07,\n 5.7107e-07, 1.6188e-07, 3.5607e-07, 4.9038e-07, 2.2007e-07, 6.2014e-07,\n 4.3343e-07, 3.9536e-09, 2.2606e-07, 6.9900e-07, 2.5604e-07, 3.9441e-07,\n 3.3225e-07, 8.8041e-07, 6.8817e-09, 6.2516e-07, 4.8601e-07, 3.2217e-08,\n 6.9775e-07, 4.9025e-09, 2.2910e-07, 1.0051e-06, 9.5007e-10, 2.4238e-09,\n 4.0140e-07, 7.9697e-07, 4.8419e-07, 4.9353e-07, 6.3713e-07, 9.5151e-07,\n 3.0497e-07, 3.0056e-09, 6.0563e-07, 4.1456e-07, 2.8047e-07, 2.6607e-07,\n 3.2685e-07, 6.5367e-07, 3.8721e-09, 2.5938e-08, 4.7670e-08, 6.2130e-07,\n 2.4105e-07, 1.9998e-08, 2.4261e-07, 5.7974e-07, 3.2508e-07, 6.7589e-07,\n 5.8430e-07, 5.8885e-07, 6.3847e-07, 4.1930e-10, 7.4048e-07, 3.8863e-08,\n 7.4215e-07, 4.8298e-07, 1.6251e-07, 1.0982e-07, 8.0751e-07, 3.3804e-07,\n 4.8979e-07, 5.7803e-07, 7.9452e-07, 5.4357e-09, 1.0222e-06, 4.2733e-07,\n 6.2643e-07, 2.3172e-07, 2.0472e-09, 5.1029e-08, 6.2117e-07, 3.8252e-07,\n 6.7592e-07, 5.8412e-07, 4.0229e-07, 5.0954e-07, 4.5800e-07, 9.1744e-07,\n 4.4701e-07, 5.8063e-07, 4.8116e-07, 5.2634e-07, 9.9270e-07, 1.4264e-07,\n 2.8763e-07, 8.2048e-07, 6.5107e-07, 3.7915e-07, 6.3097e-07, 5.1033e-07,\n 7.3832e-07, 3.3717e-07, 7.4499e-07, 3.6060e-07, 2.4786e-11, 7.6798e-07,\n 7.0159e-07, 7.2792e-09, 3.3624e-07, 5.5706e-07, 2.9045e-07, 4.5904e-07,\n 2.6068e-07, 5.4903e-09, 7.0075e-07, 5.8999e-07, 6.8871e-07, 4.9732e-07,\n 8.3912e-07, 9.9522e-07, 3.4097e-07, 8.0558e-07, 4.8093e-07, 6.6599e-07,\n 1.7544e-07, 3.8130e-07, 7.1516e-07, 7.5153e-07, 5.5631e-07, 3.9589e-07,\n 4.9038e-07, 8.5419e-09, 6.6895e-07, 6.8508e-07, 6.5815e-07, 4.3599e-07,\n 6.9067e-09, 8.1211e-07, 6.8599e-07, 4.1171e-07, 7.6717e-07, 6.6376e-07,\n 5.1694e-10, 5.1575e-07, 3.7588e-08, 6.1456e-07, 5.4616e-07, 1.5408e-08,\n 3.2658e-07, 6.9813e-07, 3.4411e-07, 2.4982e-07, 6.9923e-07, 4.7604e-07,\n 3.5797e-07, 3.8505e-07, 6.6258e-07, 6.7331e-07, 4.0657e-07, 1.8532e-08,\n 3.4432e-07, 5.3718e-07, 7.2597e-07, 4.9430e-07, 2.8839e-07, 7.1350e-07,\n 6.0661e-07, 2.2360e-07, 4.2657e-07, 6.4728e-07, 5.6864e-07, 1.3825e-07,\n 4.3852e-07, 4.9143e-07, 7.1165e-09, 5.9623e-07, 4.3848e-07, 2.4796e-08,\n 6.3322e-07, 1.5947e-07, 7.8301e-08, 8.8580e-07, 6.7210e-07, 9.0130e-07,\n 7.2274e-07, 4.8843e-07, 5.3537e-07, 4.5197e-09, 7.1842e-07, 7.1316e-07,\n 4.6498e-07, 6.6325e-07], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(7508.)",
27
+ "exp_avg": "tensor([[-1.4500e-05, 7.4914e-06, -7.9246e-07, ..., 5.3356e-07,\n -6.2456e-06, -1.0201e-05],\n [-1.3146e-05, 1.2174e-05, 2.1376e-05, ..., 1.2758e-05,\n -1.1739e-05, 1.1155e-05],\n [ 1.2148e-06, -4.7449e-05, -2.9570e-05, ..., -9.8354e-06,\n -1.0421e-05, 3.1890e-06],\n ...,\n [ 6.8725e-06, 4.2402e-05, -2.3562e-05, ..., -1.1762e-05,\n 1.5443e-05, -3.8750e-05],\n [-5.3226e-06, -2.5794e-05, 3.2360e-05, ..., 3.6089e-05,\n -4.3098e-06, 1.8161e-05],\n [-7.8413e-06, 4.2279e-05, -4.3822e-05, ..., -2.9792e-05,\n 1.4919e-05, -1.0558e-05]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[2.7302e-09, 1.3736e-08, 1.7940e-08, ..., 3.8869e-09, 8.6940e-10,\n 6.5510e-09],\n [4.9543e-09, 3.0929e-08, 1.5839e-08, ..., 8.8572e-09, 3.3401e-09,\n 7.1319e-09],\n [4.0614e-09, 3.8230e-08, 1.3167e-08, ..., 1.2257e-08, 1.7331e-09,\n 5.7317e-09],\n ...,\n [4.6266e-09, 4.6846e-08, 2.2250e-08, ..., 2.8383e-08, 4.5865e-09,\n 2.1145e-08],\n [5.2251e-09, 3.7854e-08, 3.0074e-08, ..., 2.0192e-08, 3.0133e-09,\n 1.4212e-08],\n [6.6693e-09, 3.8959e-08, 1.5822e-08, ..., 1.0703e-08, 2.4368e-09,\n 1.7607e-08]], device='cuda:0')"
29
+ }
30
+ },
31
+ "param_groups": [
32
+ {
33
+ "lr": 0.00904518046337755,
34
+ "name": "shared",
35
+ "betas": [
36
+ 0.9,
37
+ 0.999
38
+ ],
39
+ "eps": 1e-08,
40
+ "weight_decay": 1e-05,
41
+ "amsgrad": false,
42
+ "maximize": false,
43
+ "foreach": null,
44
+ "capturable": false,
45
+ "differentiable": false,
46
+ "fused": null,
47
+ "decoupled_weight_decay": true,
48
+ "initial_lr": 0.01,
49
+ "params": [
50
+ 0,
51
+ 1
52
+ ]
53
+ },
54
+ {
55
+ "lr": 0.00904518046337755,
56
+ "name": "scale_256",
57
+ "betas": [
58
+ 0.9,
59
+ 0.999
60
+ ],
61
+ "eps": 1e-08,
62
+ "weight_decay": 1e-05,
63
+ "amsgrad": false,
64
+ "maximize": false,
65
+ "foreach": null,
66
+ "capturable": false,
67
+ "differentiable": false,
68
+ "fused": null,
69
+ "decoupled_weight_decay": true,
70
+ "initial_lr": 0.01,
71
+ "params": [
72
+ 2,
73
+ 3,
74
+ 4
75
+ ]
76
+ },
77
+ {
78
+ "lr": 0.00904518046337755,
79
+ "name": "scale_512",
80
+ "betas": [
81
+ 0.9,
82
+ 0.999
83
+ ],
84
+ "eps": 1e-08,
85
+ "weight_decay": 1e-05,
86
+ "amsgrad": false,
87
+ "maximize": false,
88
+ "foreach": null,
89
+ "capturable": false,
90
+ "differentiable": false,
91
+ "fused": null,
92
+ "decoupled_weight_decay": true,
93
+ "initial_lr": 0.01,
94
+ "params": [
95
+ 5,
96
+ 6,
97
+ 7
98
+ ]
99
+ },
100
+ {
101
+ "lr": 0.00904518046337755,
102
+ "name": "scale_768",
103
+ "betas": [
104
+ 0.9,
105
+ 0.999
106
+ ],
107
+ "eps": 1e-08,
108
+ "weight_decay": 1e-05,
109
+ "amsgrad": false,
110
+ "maximize": false,
111
+ "foreach": null,
112
+ "capturable": false,
113
+ "differentiable": false,
114
+ "fused": null,
115
+ "decoupled_weight_decay": true,
116
+ "initial_lr": 0.01,
117
+ "params": [
118
+ 8,
119
+ 9,
120
+ 10
121
+ ]
122
+ },
123
+ {
124
+ "lr": 0.00904518046337755,
125
+ "name": "scale_1024",
126
+ "betas": [
127
+ 0.9,
128
+ 0.999
129
+ ],
130
+ "eps": 1e-08,
131
+ "weight_decay": 1e-05,
132
+ "amsgrad": false,
133
+ "maximize": false,
134
+ "foreach": null,
135
+ "capturable": false,
136
+ "differentiable": false,
137
+ "fused": null,
138
+ "decoupled_weight_decay": true,
139
+ "initial_lr": 0.01,
140
+ "params": [
141
+ 11,
142
+ 12,
143
+ 13
144
+ ]
145
+ },
146
+ {
147
+ "lr": 0.004522637977440181,
148
+ "name": "fusion",
149
+ "betas": [
150
+ 0.9,
151
+ 0.999
152
+ ],
153
+ "eps": 1e-08,
154
+ "weight_decay": 1e-05,
155
+ "amsgrad": false,
156
+ "maximize": false,
157
+ "foreach": null,
158
+ "capturable": false,
159
+ "differentiable": false,
160
+ "fused": null,
161
+ "decoupled_weight_decay": true,
162
+ "initial_lr": 0.005,
163
+ "params": [
164
+ 14,
165
+ 15,
166
+ 16,
167
+ 17,
168
+ 18,
169
+ 19,
170
+ 20,
171
+ 21,
172
+ 22,
173
+ 23,
174
+ 24,
175
+ 25,
176
+ 26,
177
+ 27,
178
+ 28,
179
+ 29,
180
+ 30,
181
+ 31,
182
+ 32,
183
+ 33,
184
+ 34,
185
+ 35,
186
+ 36,
187
+ 37,
188
+ 38,
189
+ 39,
190
+ 40,
191
+ 41,
192
+ 42,
193
+ 43,
194
+ 44,
195
+ 45,
196
+ 46,
197
+ 47,
198
+ 48,
199
+ 49,
200
+ 50,
201
+ 51,
202
+ 52,
203
+ 53,
204
+ 54,
205
+ 55,
206
+ 56,
207
+ 57,
208
+ 58,
209
+ 59,
210
+ 60,
211
+ 61,
212
+ 62,
213
+ 63,
214
+ 64
215
+ ]
216
+ }
217
+ ]
218
+ },
219
+ "scheduler_state_dict": {
220
+ "T_0": 10,
221
+ "T_i": 10,
222
+ "T_mult": 2,
223
+ "eta_min": 1e-06,
224
+ "T_cur": 2,
225
+ "base_lrs": [
226
+ 0.01,
227
+ 0.01,
228
+ 0.01,
229
+ 0.01,
230
+ 0.01,
231
+ 0.005
232
+ ],
233
+ "last_epoch": 2,
234
+ "_step_count": 0,
235
+ "_is_initial": false,
236
+ "_get_lr_called_within_step": false,
237
+ "_last_lr": [
238
+ 0.00904518046337755,
239
+ 0.00904518046337755,
240
+ 0.00904518046337755,
241
+ 0.00904518046337755,
242
+ 0.00904518046337755,
243
+ 0.004522637977440181
244
+ ]
245
+ },
246
+ "metrics": {
247
+ "best_val_acc": 68.56466666666667,
248
+ "best_epoch": 1,
249
+ "scale_accuracies": {
250
+ "256": 68.56466666666667
251
+ },
252
+ "training_history": {
253
+ "epochs": [
254
+ 1,
255
+ 2
256
+ ],
257
+ "train_loss": [
258
+ 3.2117288923670015,
259
+ 2.2284906192478755
260
+ ],
261
+ "train_acc": [
262
+ 56.118471154293964,
263
+ 67.94222246852544
264
+ ],
265
+ "val_acc": [
266
+ 66.428,
267
+ 68.56466666666667
268
+ ],
269
+ "scale_accs": {
270
+ "256": [
271
+ 66.428,
272
+ 68.56466666666667
273
+ ]
274
+ },
275
+ "lr": [
276
+ 0.00975530705321762,
277
+ 0.00904518046337755
278
+ ]
279
+ }
280
+ },
281
+ "train_config": {
282
+ "name": "david_training",
283
+ "run_id": "20251012_210041",
284
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
285
+ "model_variant": [
286
+ "clip_vit_b16",
287
+ "clip_vit_laion_b32",
288
+ "clip_vit_b32"
289
+ ],
290
+ "num_classes": 1000,
291
+ "preset": "balanced",
292
+ "custom_config_path": null,
293
+ "num_classes_override": null,
294
+ "use_belly_override": null,
295
+ "belly_expand_override": null,
296
+ "progressive_training_override": true,
297
+ "scale_warmup_epochs_override": {
298
+ "256": 0,
299
+ "512": 2,
300
+ "768": 5,
301
+ "1024": 8
302
+ },
303
+ "num_epochs": 10,
304
+ "batch_size": 1024,
305
+ "learning_rate": 0.01,
306
+ "weight_decay": 1e-05,
307
+ "warmup_epochs": 3,
308
+ "use_rose_loss": true,
309
+ "rose_initial_weight": 0.2,
310
+ "rose_max_weight": 0.8,
311
+ "rose_weight_schedule": "adaptive",
312
+ "use_cayley_loss": false,
313
+ "cayley_weight": 0.01,
314
+ "scale_loss_balance": null,
315
+ "use_mixed_precision": false,
316
+ "gradient_clip": 10.0,
317
+ "scheduler_type": "cosine_restarts",
318
+ "min_lr": 1e-06,
319
+ "freeze_strategy": "never",
320
+ "freeze_threshold": 90.0,
321
+ "unfreeze_on_plateau": true,
322
+ "patience": 10,
323
+ "track_gradients": true,
324
+ "gradient_scale_threshold": 1e-05,
325
+ "gradient_scale_multiplier": 10.0,
326
+ "log_interval": 50,
327
+ "val_interval": 1,
328
+ "save_interval": 5,
329
+ "log_fusion_weights": true,
330
+ "log_loss_components": true,
331
+ "save_format": "safetensors",
332
+ "hf_repo": "AbstractPhil/david-shared-space",
333
+ "upload_to_hub": true,
334
+ "base_dir": "./david_training",
335
+ "num_workers": 10,
336
+ "pin_memory": true,
337
+ "prefetch_factor": 4,
338
+ "persistent_workers": true
339
+ }
340
+ }