AbstractPhil commited on
Commit
827fe0c
·
verified ·
1 Parent(s): b4881f3

Update best_model_acc64.12_metadata.json - Run 20251012_235237

Browse files
weights/David-fully_shared-weighted_sum/20251012_235237/best_model_acc64.12_metadata.json ADDED
@@ -0,0 +1,256 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2,
3
+ "optimizer_state_dict": {
4
+ "state": {
5
+ "0": {
6
+ "step": "tensor(11262.)",
7
+ "exp_avg": "tensor([[-1.0912e-04, 1.7710e-04, -5.1581e-05, ..., 8.2750e-05,\n 1.5077e-05, -2.8984e-06],\n [-6.3662e-06, -1.4232e-04, 8.9995e-05, ..., -9.2837e-05,\n 7.7396e-05, -4.0614e-05],\n [-8.3108e-05, -2.1778e-05, 9.1490e-07, ..., -4.9511e-05,\n 7.7988e-06, 2.0816e-06],\n ...,\n [-5.6232e-05, -5.1216e-05, -1.7388e-08, ..., 1.1630e-05,\n -2.3462e-05, -4.6679e-06],\n [-1.6300e-05, 7.5157e-05, -2.2505e-05, ..., 2.9254e-05,\n 1.7576e-05, -6.2404e-05],\n [-3.3755e-05, -2.5914e-05, 1.6203e-05, ..., -8.8669e-06,\n 3.0353e-06, -8.0826e-06]], device='cuda:0')",
8
+ "exp_avg_sq": "tensor([[1.5719e-07, 1.6665e-07, 6.7613e-08, ..., 7.7754e-08, 4.0946e-08,\n 3.7556e-08],\n [7.0600e-08, 2.4836e-07, 1.1595e-07, ..., 8.7239e-08, 3.5817e-08,\n 3.6428e-08],\n [2.9269e-08, 3.4777e-08, 3.0109e-08, ..., 8.0372e-08, 1.6255e-08,\n 2.0124e-08],\n ...,\n [5.5477e-08, 4.7951e-07, 6.6263e-08, ..., 1.1796e-07, 3.2124e-08,\n 5.1725e-08],\n [1.1701e-07, 2.2806e-07, 8.8380e-08, ..., 8.8289e-08, 3.9845e-08,\n 4.9812e-08],\n [1.1484e-08, 3.2477e-08, 1.2295e-08, ..., 1.1267e-08, 4.4124e-09,\n 5.9155e-09]], device='cuda:0')"
9
+ },
10
+ "1": {
11
+ "step": "tensor(11262.)",
12
+ "exp_avg": "tensor([-9.7561e-04, -2.9223e-03, -2.1954e-03, 1.0857e-03, -7.5199e-04,\n 1.4749e-03, 2.3345e-03, -2.2778e-03, -1.7197e-03, 2.3745e-03,\n 3.4425e-03, 2.9312e-03, -3.5188e-03, -6.3150e-04, 8.1537e-04,\n -2.6149e-03, -1.7568e-03, 3.1963e-04, -3.5566e-03, -8.9193e-04,\n 1.1261e-03, 3.1425e-04, 1.4634e-03, -1.1202e-03, 4.2987e-03,\n -5.9736e-04, -7.3047e-04, -3.1093e-03, 1.7836e-03, 2.2185e-03,\n 1.6668e-03, -1.9346e-03, -5.1995e-05, -1.5656e-03, -9.9192e-04,\n 2.3517e-04, 2.0873e-04, -4.5518e-04, -1.6752e-03, 7.8052e-04,\n 1.0085e-03, -4.0698e-04, -3.3468e-04, 1.6418e-03, 1.6571e-03,\n 2.2843e-03, 1.5842e-03, 1.0527e-03, 1.4150e-03, 6.3812e-03,\n -2.6086e-03, -1.4296e-03, 2.1464e-03, -3.6336e-03, -1.1823e-03,\n 1.5671e-03, -1.7816e-03, 1.4344e-03, -1.2197e-03, 5.3770e-04,\n -6.4832e-04, 6.9076e-04, -1.0139e-03, 6.9532e-04, -4.8257e-03,\n -1.1314e-03, 3.7039e-04, 2.4140e-04, -4.6276e-04, -1.8202e-03,\n -5.4913e-03, -1.8812e-03, 1.1106e-03, 1.8219e-03, -9.3386e-04,\n 2.6704e-03, 1.0571e-03, 6.7814e-04, 1.7650e-04, 2.8034e-03,\n -2.1344e-03, 8.9061e-04, 9.8272e-04, 1.1384e-03, -3.0990e-03,\n 1.4348e-03, 1.5937e-03, -1.1270e-03, -1.2893e-03, 1.5836e-03,\n -1.3288e-03, -9.9934e-04, -1.6123e-03, -1.5538e-03, -2.3817e-04,\n -9.7634e-04, -2.3572e-03, -7.4427e-04, 2.9383e-03, 3.0116e-04,\n -3.0375e-03, -1.1569e-03, 5.1621e-04, -4.2828e-04, 2.3128e-03,\n 8.7616e-04, -5.3757e-04, 2.4319e-03, -3.4286e-04, 1.9898e-03,\n -9.3582e-04, -3.2799e-03, -3.9485e-04, 8.8668e-04, -4.2284e-03,\n -6.0062e-05, 5.9364e-04, 8.8135e-04, 2.8938e-03, 1.6767e-03,\n 7.3339e-04, -1.0405e-03, -2.5950e-03, -1.3751e-03, 9.6298e-05,\n -8.3190e-04, 2.5935e-03, 4.2066e-03, -1.1894e-03, -1.8857e-03,\n -5.1084e-04, 2.3401e-03, 6.0154e-04, -2.6360e-04, 4.7422e-04,\n -2.6120e-04, -1.8787e-04, 1.4509e-03, 3.2442e-04, 2.0326e-03,\n 1.3311e-03, -5.8338e-05, -2.3893e-05, 1.0154e-03, 1.1787e-03,\n -1.5723e-03, 2.4558e-03, 1.1732e-03, 5.3568e-05, 1.9018e-03,\n -9.0179e-06, 7.9822e-04, -2.7942e-03, -1.7042e-03, -1.4375e-03,\n 3.9795e-03, -2.0245e-03, -6.5536e-04, 9.0931e-04, -1.1015e-03,\n 6.0151e-04, 8.5853e-04, -1.1473e-03, 1.2115e-03, 1.2035e-03,\n -1.7982e-03, -3.2100e-04, 2.2376e-03, -1.7019e-04, 1.7969e-03,\n 6.2854e-04, 1.0448e-03, -2.1108e-03, -1.6993e-03, 1.2114e-03,\n 1.1602e-04, -8.0140e-07, -2.4814e-04, 3.2875e-03, 4.4480e-04,\n 1.4795e-03, 1.0779e-03, -8.8007e-04, 8.1684e-05, 7.0753e-05,\n 1.0177e-03, -1.2418e-03, -1.0981e-07, 1.7772e-03, -1.3615e-04,\n 1.9272e-03, -1.0292e-03, 1.4597e-03, -1.1560e-03, 9.8944e-04,\n -8.2585e-05, 1.2876e-03, 5.3097e-04, 6.4702e-04, -7.0670e-04,\n -2.9858e-03, -9.5315e-04, 1.2816e-03, 1.7454e-03, 1.9446e-03,\n -3.0264e-03, -4.3523e-04, 1.5789e-03, -5.8569e-04, -1.5138e-03,\n 1.6928e-03, -1.2887e-04, 9.6426e-04, 1.9781e-03, -2.5778e-04,\n -1.9230e-03, 5.3544e-04, -3.0825e-04, 4.1377e-03, -1.3446e-03,\n 4.3234e-04, 2.0698e-03, 1.1025e-03, -6.6271e-04, 1.1723e-03,\n 1.0806e-03, -5.2396e-04, 9.3079e-04, -9.5873e-04, 2.0225e-03,\n 1.5008e-03, 2.0853e-04, 3.1768e-04, 6.2001e-04, -3.1956e-04,\n -1.5737e-03, -3.0661e-03, 7.6443e-04, -1.3155e-03, -1.1751e-03,\n 6.6068e-04, -6.1229e-05, -1.0177e-03, 3.2870e-04, -2.8642e-03,\n 2.1700e-03, -1.4414e-03, -1.2404e-03, -1.4582e-03, -1.7603e-03,\n 2.6646e-03, 6.3877e-04, -6.8365e-04, 2.9977e-03, 1.2099e-05,\n -4.9220e-04, 7.6980e-04, -4.3470e-04, 5.1872e-04, -1.6180e-04,\n 4.2571e-03, 1.7957e-03, 6.2088e-04, 5.0587e-04, -2.8645e-03,\n -1.4121e-03, -1.8489e-03, 6.2157e-05, 3.2055e-03, -1.5167e-03,\n 3.7295e-04, 2.0427e-03, 1.1712e-03, 4.5663e-05, -1.0289e-03,\n 1.8450e-03, 1.6603e-04, 3.6912e-03, -3.3014e-05, -1.6395e-03,\n 2.8291e-04, 2.6634e-03, 1.5493e-04, 7.5401e-04, 1.2295e-03,\n -1.2600e-03, -2.2986e-03, -1.8711e-03, -1.7401e-03, 1.1025e-03,\n -9.9652e-04, -5.4204e-04, 4.5636e-03, -5.7286e-04, -1.1353e-03,\n -8.3940e-04, -3.8116e-03, -1.4432e-04, 5.9920e-03, 6.2135e-04,\n 8.3453e-05, -6.3219e-04, 1.4938e-03, 2.6616e-04, 3.6185e-04,\n 1.4106e-03, -5.6713e-04, -6.4478e-04, 7.4550e-04, 1.0287e-04,\n -1.6442e-03, -2.2371e-03, 8.6978e-04, -9.3575e-04, -8.6471e-04,\n -1.1625e-03, -2.3668e-03, 2.8170e-03, -1.8375e-03, 1.2227e-03,\n 6.8663e-04, 1.0498e-03, -3.2694e-03, 5.5477e-05, 3.2989e-03,\n -5.2962e-03, -1.9337e-03, 2.1202e-03, -1.0353e-03, 7.0940e-04,\n -2.4488e-03, -1.9739e-03, -4.9664e-05, 3.3599e-03, -3.0859e-03,\n 3.5400e-04, -2.2120e-03, 2.7310e-03, 2.3631e-03, -1.9754e-03,\n 8.1100e-04, 2.2838e-03, 2.8099e-03, 7.7455e-04, 2.9582e-05,\n 1.9236e-03, -8.2741e-04, 8.3141e-05, 1.0739e-03, 6.8710e-04,\n 8.9286e-04, -1.7695e-03, 4.4429e-04, -1.1367e-03, -1.7087e-03,\n -1.8113e-03, -4.2389e-03, 1.8198e-03, -2.5335e-03, -1.3040e-03,\n 2.1068e-03, 4.3284e-04, -3.3706e-03, 2.4968e-03, 2.1916e-03,\n 1.6861e-03, 3.4699e-03, -2.6641e-03, -2.8532e-03, -2.0975e-03,\n 1.5167e-05, 3.9983e-03, 6.6399e-04, -2.2758e-03, 2.1884e-03,\n 1.4212e-03, -1.9144e-03, -1.1839e-03, -1.7388e-03, -1.4871e-03,\n -3.6986e-03, -1.6055e-03, -2.9223e-03, 2.7541e-04, -7.1125e-04,\n 2.4385e-03, 6.5734e-05, 8.8680e-04, 1.0729e-03, 2.1518e-03,\n -1.1011e-03, 4.0585e-03, -2.7121e-03, -2.2381e-03, 1.4832e-03,\n 9.0172e-04, -4.4358e-04, -9.6249e-04, -1.4330e-03, 1.2729e-05,\n -4.4942e-03, -1.2662e-03, 1.6950e-03, 8.2689e-04, -2.8605e-03,\n 1.8629e-03, -2.2159e-03, -5.9918e-04, -1.4415e-04, 1.7575e-03,\n -1.7720e-03, 1.1895e-03, 2.3498e-04, -8.8975e-04, -1.9059e-04,\n -3.1834e-03, 2.2669e-03, 3.2523e-03, 3.6065e-03, 2.1941e-03,\n -2.7851e-03, -1.7813e-03, -2.2238e-03, -1.6512e-03, -1.8645e-03,\n 9.5244e-04, -3.6425e-04, -1.6561e-03, -3.6655e-04, 2.7698e-04,\n 1.6247e-03, 1.4979e-03, 1.0678e-03, 1.3471e-03, -7.5115e-04,\n 2.8534e-04, 2.2090e-03, -2.5342e-03, -1.2900e-03, -4.0674e-03,\n -1.3167e-03, -3.4882e-03, -1.4406e-03, -2.0141e-03, -9.3591e-04,\n 3.9122e-04, -1.5177e-04, 9.4264e-04, 1.8727e-03, -3.8648e-04,\n 3.2071e-03, -9.9253e-04, 1.2346e-03, -3.0654e-03, 3.6269e-04,\n 4.7850e-04, -2.9567e-03, -2.5282e-03, -9.3257e-04, 1.5724e-04,\n 8.9919e-04, -9.5886e-04, 1.6499e-03, 1.0015e-03, -2.6889e-06,\n -2.8737e-03, 7.7518e-04, -1.0712e-04, 1.4219e-04, -7.7231e-04,\n 2.3293e-04, -1.4006e-03, 2.7590e-04, 1.1924e-05, 3.8488e-05,\n -8.9089e-04, 1.4239e-03, 3.3431e-04, -1.3620e-03, -9.0403e-04,\n -3.5629e-04, -3.5205e-04, 2.0466e-03, -1.6933e-03, 2.7038e-03,\n 1.7508e-03, -1.3856e-03, 1.6535e-03, -6.6903e-03, -1.5825e-03,\n 1.1371e-03, 1.7900e-03, 2.1926e-03, 3.7010e-03, -4.9971e-04,\n -3.2059e-04, -3.3764e-04, 1.9115e-04, 2.3980e-03, -1.2414e-03,\n -3.4831e-04, 1.3686e-03, 1.9347e-03, 3.7829e-04, 1.2737e-04,\n 3.8034e-04, -2.5405e-03, 2.7701e-03, 6.1209e-05, 9.0948e-04,\n 6.5168e-04, -4.1386e-05], device='cuda:0')",
13
+ "exp_avg_sq": "tensor([5.7052e-05, 5.7719e-05, 3.4529e-05, 5.0794e-05, 5.8009e-05, 1.0639e-04,\n 4.4471e-05, 8.8108e-05, 5.8732e-05, 6.7099e-05, 8.4264e-05, 9.5816e-05,\n 1.2307e-04, 3.4090e-05, 9.3219e-05, 6.9379e-05, 5.1789e-05, 5.6476e-05,\n 7.2159e-05, 8.4651e-05, 5.7269e-05, 4.7419e-05, 4.6425e-05, 9.1345e-05,\n 1.3058e-04, 5.4081e-05, 4.4955e-05, 7.9315e-05, 7.6084e-05, 5.4577e-05,\n 2.7385e-05, 4.7483e-05, 7.4752e-05, 5.6689e-05, 2.9226e-05, 6.8754e-05,\n 4.5041e-05, 4.4168e-05, 4.4237e-05, 3.2044e-05, 5.2937e-05, 3.5231e-05,\n 7.8244e-06, 9.1331e-05, 1.5530e-04, 5.2834e-05, 4.8323e-05, 3.8394e-05,\n 6.5874e-05, 1.2391e-04, 1.2181e-04, 3.6602e-05, 5.2449e-05, 6.5079e-05,\n 8.1471e-05, 4.7673e-05, 5.2794e-05, 1.3732e-04, 3.9620e-05, 4.9191e-05,\n 1.2911e-04, 3.7274e-05, 4.8934e-05, 3.8693e-05, 4.8830e-05, 3.7467e-05,\n 4.8224e-05, 4.3914e-05, 8.4942e-05, 5.4510e-05, 5.7772e-05, 4.9101e-05,\n 4.3331e-05, 4.6668e-05, 4.8934e-05, 4.9698e-05, 5.8778e-05, 5.7731e-05,\n 8.2022e-05, 4.6404e-05, 8.2606e-05, 4.4246e-05, 4.9142e-05, 2.9968e-05,\n 4.4788e-05, 5.1732e-05, 7.3969e-05, 4.3433e-05, 3.9040e-05, 5.6511e-05,\n 4.3844e-05, 8.4531e-05, 5.7211e-05, 5.9326e-05, 4.9808e-05, 7.4306e-05,\n 8.3561e-05, 5.7282e-05, 5.7239e-05, 2.6371e-05, 6.8554e-05, 9.6172e-05,\n 8.8626e-05, 4.8312e-05, 8.9585e-05, 8.4861e-05, 5.8973e-05, 1.3507e-04,\n 4.7467e-05, 6.2455e-05, 7.0972e-05, 7.0929e-05, 5.4218e-05, 7.7551e-05,\n 6.1466e-05, 4.6904e-05, 8.7566e-05, 5.8981e-05, 5.2109e-05, 5.7253e-05,\n 2.3394e-05, 8.3456e-05, 6.7353e-05, 8.3342e-05, 4.0105e-05, 6.3596e-05,\n 5.0921e-05, 7.6806e-05, 6.5474e-05, 1.0007e-04, 6.3934e-05, 5.5727e-05,\n 6.5643e-05, 5.6804e-05, 1.5146e-04, 5.0583e-05, 3.7845e-05, 6.4812e-05,\n 5.0028e-05, 6.8916e-05, 2.2519e-05, 4.5613e-05, 7.0413e-05, 1.5130e-04,\n 6.5253e-05, 5.0007e-05, 3.8139e-05, 7.4509e-05, 6.5441e-05, 1.2404e-04,\n 3.1234e-05, 7.2506e-05, 3.0902e-05, 1.0056e-04, 3.4533e-05, 6.0797e-05,\n 7.3751e-05, 6.4565e-05, 4.0149e-05, 7.2062e-05, 6.4298e-05, 2.1080e-05,\n 5.2286e-05, 9.1859e-05, 2.3228e-05, 8.9993e-05, 5.3283e-05, 3.0994e-05,\n 2.0332e-05, 4.9120e-05, 4.5643e-05, 6.8081e-05, 6.1100e-05, 4.7197e-05,\n 5.6950e-05, 3.6728e-05, 1.2594e-04, 3.6854e-05, 2.9321e-05, 6.1584e-05,\n 4.3987e-05, 4.3956e-05, 1.1104e-04, 5.2574e-05, 4.7700e-05, 6.0813e-05,\n 1.3040e-04, 2.6602e-05, 3.2875e-05, 2.2837e-05, 5.8955e-05, 8.0172e-05,\n 8.3606e-05, 8.8376e-05, 3.1634e-05, 2.5699e-05, 6.0203e-05, 4.1966e-05,\n 2.6605e-05, 2.8327e-05, 3.3592e-05, 6.7654e-05, 4.7923e-05, 3.3395e-05,\n 6.0648e-05, 5.9565e-05, 4.7552e-05, 3.7074e-05, 7.7749e-05, 4.5406e-05,\n 4.4488e-05, 7.5826e-05, 5.8192e-05, 3.8553e-05, 8.9115e-05, 2.9895e-05,\n 3.8195e-05, 3.3960e-05, 7.5431e-05, 7.5163e-05, 3.4928e-05, 2.8597e-05,\n 4.9426e-05, 2.5028e-05, 3.8677e-05, 7.1520e-05, 4.1035e-05, 2.8499e-05,\n 3.1881e-05, 7.1580e-05, 5.5877e-05, 3.0581e-05, 6.6692e-05, 4.8929e-05,\n 5.1532e-05, 4.9202e-05, 5.1486e-05, 7.7529e-06, 6.0230e-05, 1.6619e-04,\n 4.6168e-05, 9.6414e-05, 4.1207e-05, 6.7024e-05, 1.1938e-04, 6.5222e-05,\n 6.8466e-05, 4.0211e-05, 7.5619e-05, 4.8348e-05, 7.7700e-05, 3.8453e-05,\n 5.1964e-05, 6.9053e-05, 4.9041e-05, 5.1501e-05, 3.7940e-05, 7.6122e-05,\n 8.1325e-05, 3.6696e-05, 4.2306e-05, 9.6854e-05, 4.1899e-05, 7.5102e-05,\n 5.1362e-05, 3.6049e-05, 3.3131e-05, 5.2470e-05, 3.8097e-05, 5.9783e-05,\n 4.4980e-05, 6.5524e-05, 4.2473e-05, 3.7881e-05, 3.7133e-05, 7.4746e-05,\n 4.7454e-05, 9.3598e-05, 5.5075e-05, 3.2126e-05, 3.7307e-05, 4.0628e-05,\n 5.8148e-05, 7.0551e-05, 3.4578e-05, 8.2077e-05, 5.6246e-05, 5.0898e-05,\n 8.9830e-05, 7.2967e-05, 3.9321e-05, 4.7290e-05, 8.2390e-05, 4.4032e-05,\n 7.0549e-05, 3.5243e-05, 4.0645e-05, 8.1745e-05, 1.6108e-04, 5.5979e-05,\n 4.0908e-05, 4.7354e-05, 5.2856e-05, 4.8829e-05, 9.3484e-05, 3.1299e-05,\n 5.9867e-05, 5.3580e-05, 9.6073e-05, 5.8990e-05, 6.9116e-05, 6.4181e-05,\n 3.7414e-05, 3.8067e-05, 7.8242e-05, 4.9574e-05, 4.8079e-05, 2.3716e-05,\n 5.5826e-05, 7.4520e-05, 1.2847e-04, 5.9199e-05, 6.0881e-05, 6.4638e-05,\n 8.8811e-05, 1.4658e-04, 3.3760e-05, 6.5799e-05, 5.8892e-05, 5.4393e-05,\n 5.3478e-05, 7.9466e-05, 2.3933e-05, 3.5141e-05, 4.0306e-05, 4.9266e-05,\n 8.1640e-05, 3.4246e-05, 8.3838e-05, 6.6685e-05, 5.1288e-05, 4.9742e-05,\n 6.0262e-05, 4.2136e-05, 7.5761e-05, 8.2803e-05, 5.1966e-05, 4.1517e-05,\n 6.2024e-05, 5.8651e-05, 4.5846e-05, 5.2948e-05, 5.0824e-05, 4.4946e-05,\n 5.0352e-05, 3.8667e-05, 4.0771e-05, 4.6540e-05, 3.6237e-05, 3.5681e-05,\n 3.1937e-05, 1.1132e-04, 1.7325e-04, 4.6391e-05, 5.3180e-05, 4.3565e-05,\n 5.1896e-05, 7.6209e-05, 3.3631e-05, 4.1142e-05, 3.1365e-05, 5.6484e-05,\n 3.0048e-05, 5.5595e-05, 1.0255e-04, 5.5477e-05, 2.8172e-05, 2.3594e-05,\n 4.6189e-05, 4.5540e-05, 5.3573e-05, 6.4307e-05, 6.2107e-05, 4.6014e-05,\n 5.2082e-05, 1.1523e-04, 4.9673e-05, 8.3738e-05, 5.6916e-05, 6.2360e-05,\n 7.0428e-05, 6.4362e-05, 4.7518e-05, 5.6156e-05, 6.0816e-05, 3.6471e-05,\n 2.5742e-05, 9.5092e-05, 5.6686e-05, 9.7241e-05, 5.8721e-05, 3.8199e-05,\n 7.0910e-05, 5.3322e-05, 6.3692e-05, 5.6863e-05, 1.0383e-04, 3.1616e-05,\n 3.9051e-05, 6.9078e-05, 7.1211e-05, 7.1741e-05, 6.2854e-05, 6.3060e-05,\n 4.2210e-05, 3.6826e-05, 8.2882e-05, 3.1029e-05, 8.1777e-05, 1.1878e-04,\n 5.7773e-05, 4.3620e-05, 7.3776e-05, 6.9115e-05, 5.2363e-05, 5.3682e-05,\n 1.0003e-04, 5.2792e-05, 4.3825e-05, 5.1304e-05, 3.9633e-05, 2.8615e-05,\n 8.6796e-05, 6.6872e-05, 4.6451e-05, 4.7797e-05, 8.9823e-05, 5.9885e-05,\n 4.6415e-05, 6.5572e-05, 7.6754e-05, 9.1032e-05, 4.2874e-05, 1.7804e-04,\n 6.2092e-05, 5.5679e-05, 3.9646e-05, 5.9869e-05, 4.8876e-05, 7.1112e-05,\n 5.2213e-05, 6.2063e-05, 6.1783e-05, 1.0993e-04, 5.2396e-05, 2.7798e-05,\n 1.4002e-04, 5.9579e-05, 3.4151e-05, 8.2561e-05, 6.3050e-05, 3.6455e-05,\n 5.2519e-05, 4.2660e-05, 1.0658e-04, 4.7467e-05, 4.5055e-05, 5.6356e-05,\n 8.2555e-05, 7.6966e-05, 6.7380e-05, 1.1343e-04, 8.1185e-05, 6.9085e-05,\n 6.4420e-05, 6.7182e-05, 5.5866e-05, 5.2950e-05, 4.9296e-05, 3.9322e-05,\n 3.0705e-05, 4.2921e-05, 7.9903e-05, 5.4642e-05, 4.7852e-05, 5.4451e-05,\n 8.4876e-05, 5.4206e-05, 9.3537e-05, 5.8872e-05, 5.4731e-05, 6.6546e-05,\n 8.4839e-05, 4.1191e-05, 3.2842e-05, 5.2010e-05, 6.0087e-05, 2.9815e-05,\n 3.9280e-05, 4.2476e-05, 6.1757e-05, 3.6976e-05, 6.4386e-05, 1.5556e-04,\n 4.9412e-05, 2.7862e-05, 4.7921e-05, 6.3107e-05, 5.3832e-05, 5.9705e-05,\n 6.6724e-05, 6.9316e-06], device='cuda:0')"
14
+ },
15
+ "2": {
16
+ "step": "tensor(11262.)",
17
+ "exp_avg": "tensor([-3.8383e-03, -6.1540e-03, -4.9859e-03, 1.6268e-03, -9.6207e-04,\n 3.2293e-03, 5.8433e-03, -3.0914e-03, -1.8241e-03, 7.1491e-03,\n 7.3362e-03, 4.9434e-03, -6.8015e-03, -2.6870e-03, 1.4911e-03,\n -5.1057e-03, -3.4573e-03, -3.4191e-04, -5.1467e-03, -2.0905e-03,\n 3.3353e-03, 2.4734e-04, 2.2019e-03, -1.4093e-03, 8.7804e-03,\n -7.0550e-04, -1.8054e-03, -5.4451e-03, 3.7141e-03, 3.2821e-03,\n 5.1603e-03, -2.1174e-03, 7.4943e-04, -3.0005e-03, -2.3556e-03,\n -2.6629e-04, 5.8814e-04, -8.8162e-04, -2.9146e-03, 2.7102e-03,\n 1.1066e-03, 2.9203e-04, 5.6052e-45, 2.7272e-03, 4.0726e-03,\n 4.6830e-03, 5.4500e-03, 8.0301e-04, 5.4482e-03, 1.0378e-02,\n -4.9910e-03, -2.9325e-03, 2.9640e-03, -7.1033e-03, -3.0583e-03,\n 3.7900e-03, -5.3817e-03, 2.1803e-03, -2.9717e-03, 5.2983e-04,\n -3.1303e-04, 1.9063e-03, -3.6238e-03, 1.3275e-03, -6.5993e-03,\n -2.7344e-03, 2.4064e-03, 8.4100e-04, -6.8965e-04, -2.7053e-03,\n -9.0746e-03, -3.6717e-03, 3.3957e-03, 4.8726e-03, -1.8968e-03,\n 5.0618e-03, 2.9925e-03, 1.5269e-03, 5.3274e-04, 6.5530e-03,\n -3.1110e-03, 2.3113e-03, 1.0009e-03, 2.6082e-03, -5.0260e-03,\n 4.7674e-03, 5.1641e-03, -2.6618e-03, -2.2491e-03, 5.0335e-03,\n -4.8282e-03, -5.7694e-04, -3.2199e-03, -3.5665e-03, 1.1884e-03,\n -2.8579e-03, -5.1019e-03, -2.0734e-03, 5.3558e-03, -1.6676e-04,\n -5.1350e-03, -3.1447e-04, -2.5460e-04, -1.8348e-03, 3.2955e-03,\n 6.6166e-04, -9.0602e-04, 4.6632e-03, -4.5501e-04, 4.2801e-03,\n -1.3581e-03, -8.1584e-03, -2.2669e-03, 6.3209e-04, -7.5083e-03,\n -9.7099e-04, 1.5286e-03, 1.4918e-03, 5.1197e-03, 3.6367e-03,\n 1.6927e-03, -1.9124e-03, -1.0237e-02, -3.2932e-03, -4.5239e-05,\n -2.3912e-03, 4.5199e-03, 6.8376e-03, 5.2471e-04, -3.5988e-03,\n -1.1442e-03, 6.2491e-03, 6.2998e-04, -7.5451e-04, 9.0272e-04,\n -1.9743e-03, 2.6464e-05, 3.2798e-03, 8.7822e-04, 2.5203e-03,\n 2.4691e-03, 4.4215e-04, 7.0937e-04, 7.6202e-04, 2.1921e-03,\n -2.5469e-03, 4.7661e-03, 2.1954e-03, 1.8249e-04, 2.8702e-03,\n 7.0412e-04, 2.1205e-03, -5.6942e-03, -2.4098e-03, -3.5073e-03,\n 4.8298e-03, -5.1933e-03, -3.0608e-03, 1.9692e-03, -2.8164e-03,\n 6.2478e-04, 2.4682e-03, -2.9482e-03, 2.1280e-03, 3.3360e-03,\n -2.7242e-03, -1.7645e-03, 4.4480e-03, 2.6899e-04, 3.6357e-03,\n 1.5217e-03, 3.1348e-03, -2.4103e-03, -6.1548e-03, 2.6197e-03,\n -1.7184e-04, -9.3577e-04, -6.9830e-04, 6.9767e-03, 1.1188e-03,\n 3.5387e-03, 1.9613e-03, -1.9916e-03, 9.4317e-04, -4.5180e-04,\n 9.2688e-04, -1.8631e-03, -1.9084e-04, 5.0588e-03, 5.1302e-04,\n 3.7390e-03, -3.0840e-03, 1.5781e-03, -1.2785e-03, 1.6394e-03,\n -3.9158e-04, 1.9236e-03, 1.2788e-03, 7.2939e-04, -1.9368e-03,\n -6.6013e-03, -1.9154e-03, 2.8794e-03, 4.2982e-03, 1.9816e-03,\n -3.9091e-03, -1.7601e-03, 3.2678e-03, -8.2386e-04, -3.2358e-03,\n 3.5300e-03, -1.4616e-03, 1.0730e-03, 3.1331e-03, 6.6204e-04,\n -6.0869e-03, 7.5554e-04, -1.3409e-03, 6.9194e-03, -4.1688e-03,\n 1.6246e-03, 4.2545e-03, 1.5633e-03, -1.3903e-03, 3.6372e-03,\n 2.1016e-03, -7.5213e-04, 2.5188e-03, -1.2297e-03, 2.4748e-03,\n 2.5435e-03, 8.5568e-04, 4.6438e-04, 2.4583e-03, -4.8155e-04,\n -3.1478e-03, -4.9136e-03, 5.6052e-45, -3.0112e-03, -3.2181e-03,\n 4.1552e-05, 1.3250e-03, -4.4352e-03, 5.5115e-04, -2.7802e-03,\n 3.8695e-03, -2.8108e-03, -1.3023e-03, -4.2449e-03, -3.7119e-03,\n 6.7012e-03, 1.4846e-03, -2.0396e-03, 5.5260e-03, -8.3984e-04,\n -1.3734e-05, 2.1679e-03, -9.2703e-04, 2.0443e-03, -6.2581e-04,\n 8.3091e-03, 2.5725e-03, 1.7831e-03, 7.3104e-04, -5.4419e-03,\n -3.5658e-03, -4.8227e-03, 4.6624e-04, 7.8256e-03, -5.3535e-03,\n 5.0343e-04, 6.0325e-03, 1.3370e-03, -9.3955e-04, -1.0423e-03,\n 2.6165e-03, 1.4198e-03, 6.6146e-03, -8.9428e-04, -4.6854e-03,\n 7.6434e-04, 3.7235e-03, 1.0475e-03, 2.4975e-05, 3.8366e-03,\n -2.0097e-03, -4.6517e-03, -3.0200e-03, -4.2114e-03, 2.3499e-03,\n -2.8575e-03, -6.8444e-04, 5.0862e-03, -1.0766e-04, -3.2563e-03,\n -3.0784e-03, -9.0239e-03, 5.6075e-04, 9.6844e-03, 6.5048e-04,\n 5.0841e-05, -9.4888e-04, 4.3470e-03, 6.6175e-04, 1.0592e-03,\n 3.9196e-03, -1.6150e-03, -1.4223e-03, 2.3456e-03, 2.3221e-03,\n -2.4171e-03, -4.5641e-03, 2.2232e-03, -2.3503e-03, -1.1534e-03,\n -3.5676e-03, -3.0082e-03, 6.5892e-03, -3.4087e-03, 1.5383e-03,\n 2.5886e-03, 5.9313e-03, -5.9984e-03, 3.8277e-05, 6.9814e-03,\n -8.0377e-03, -4.3299e-03, 4.8806e-03, -1.9938e-03, 1.5620e-03,\n -6.5722e-03, -1.2233e-03, 1.8883e-04, 7.1410e-03, -7.6674e-03,\n 9.7486e-04, -1.9581e-03, 6.4242e-03, 5.6908e-03, -3.2994e-03,\n 1.8105e-03, 4.5154e-03, 6.1850e-03, 1.5853e-03, -1.3961e-04,\n 3.6194e-03, -2.2353e-03, -8.8953e-05, 1.8399e-03, 1.5913e-03,\n 2.0818e-03, -5.1399e-03, 7.9397e-04, -2.3443e-03, -4.2101e-03,\n -2.7164e-03, -9.6635e-03, 2.6094e-03, -4.0191e-03, -1.8879e-03,\n 4.5960e-03, 1.3642e-03, -4.5807e-03, 7.2668e-03, 5.0224e-03,\n 4.4326e-03, 3.6920e-03, -6.0768e-03, -5.1083e-03, -4.8419e-03,\n 1.8232e-04, 7.8318e-03, 1.2457e-03, -4.3599e-03, 1.4948e-03,\n 2.7510e-03, -3.7681e-03, -2.2270e-03, -4.3651e-03, -2.7356e-03,\n -8.6997e-03, -1.8590e-03, -6.6807e-03, -1.3146e-04, -2.2101e-03,\n 5.6526e-03, 1.2870e-03, 8.8364e-04, 2.0324e-03, 3.7312e-03,\n -1.3215e-03, 5.5745e-03, -6.9768e-03, -9.2513e-03, 1.9801e-03,\n 1.2436e-03, -5.4656e-04, -2.2998e-03, -3.9335e-03, 4.9747e-04,\n -8.6186e-03, -3.4053e-03, 3.6873e-03, 9.9483e-04, -5.9396e-03,\n 2.4168e-03, -6.1539e-03, -8.7726e-04, -1.1908e-03, 3.3853e-03,\n -4.1435e-03, 7.1441e-04, 9.4301e-04, -1.5704e-03, 5.7504e-04,\n -1.3018e-02, 3.8240e-03, 9.7931e-03, 5.8120e-03, 2.4471e-03,\n -5.2663e-03, -4.2605e-03, -2.8997e-03, -4.3395e-03, -8.0664e-03,\n 1.9071e-03, 6.8833e-05, -3.0935e-03, 7.9450e-04, 6.2631e-05,\n 3.4754e-03, 3.9824e-03, 1.7273e-03, 2.8056e-03, -2.3946e-03,\n 1.3439e-03, 4.0816e-03, -7.9949e-03, -1.6629e-03, -4.5662e-03,\n -2.8262e-03, -5.9526e-03, -3.4456e-03, -4.6602e-03, -1.5970e-03,\n 2.8261e-04, -7.3492e-04, 2.4684e-03, 5.2321e-03, 1.1108e-03,\n 8.3453e-03, -2.0697e-03, 1.1179e-03, -3.5575e-03, 5.0359e-04,\n 5.3528e-04, -5.3279e-03, -4.3438e-03, -2.5293e-03, -3.1078e-04,\n 1.9746e-03, -3.0322e-03, 2.9032e-03, 2.1372e-03, 9.4457e-04,\n -4.4942e-03, 1.2107e-03, -8.7905e-04, 1.3938e-03, -3.3560e-03,\n 1.1607e-04, -1.1018e-03, 2.4275e-04, -1.0553e-03, 4.6367e-04,\n -2.1866e-03, 2.9766e-03, 7.9458e-04, -4.1799e-03, -2.8476e-03,\n -6.6801e-04, -5.6503e-04, 3.3399e-03, -3.1190e-03, 5.5225e-03,\n 2.9456e-03, -1.6319e-03, 3.7037e-03, -1.1580e-02, -2.0138e-03,\n 2.9754e-03, 3.2874e-03, 4.1344e-03, 8.5079e-03, -4.9945e-04,\n -1.1265e-03, -3.5271e-04, 1.2119e-04, 4.3079e-03, -2.3751e-03,\n 2.3602e-04, 2.6625e-03, 4.8589e-03, 3.6760e-04, 3.5576e-04,\n 2.0360e-03, -3.2840e-03, 5.2749e-03, -2.8866e-04, 7.5838e-04,\n -4.1207e-04, 5.6052e-45], device='cuda:0')",
18
+ "exp_avg_sq": "tensor([3.4645e-04, 1.8538e-04, 1.0919e-04, 1.6379e-04, 1.0339e-04, 2.9148e-04,\n 1.8903e-04, 2.5144e-04, 2.4956e-04, 3.1498e-04, 3.8780e-04, 3.6085e-04,\n 3.0262e-04, 2.0441e-04, 1.9537e-04, 2.1153e-04, 1.5261e-04, 2.2734e-04,\n 1.9484e-04, 3.4151e-04, 3.0035e-04, 1.1416e-04, 1.4468e-04, 2.4493e-04,\n 4.7429e-04, 2.2267e-04, 1.9041e-04, 3.2836e-04, 2.4175e-04, 2.0296e-04,\n 2.1676e-04, 3.2639e-04, 2.4758e-04, 1.6298e-04, 1.2642e-04, 1.0908e-04,\n 9.2851e-05, 1.4156e-04, 1.0738e-04, 1.7084e-04, 1.0239e-04, 8.4717e-05,\n 5.0342e-09, 2.0478e-04, 4.3598e-04, 1.6133e-04, 1.7428e-04, 5.7915e-05,\n 6.5617e-04, 2.5571e-04, 3.4747e-04, 1.0109e-04, 1.4643e-04, 2.5993e-04,\n 2.1811e-04, 1.6734e-04, 3.3546e-04, 1.2987e-04, 1.6127e-04, 8.4983e-05,\n 1.8590e-04, 1.9458e-04, 2.6462e-04, 1.2986e-04, 1.2104e-04, 1.5628e-04,\n 2.3736e-04, 1.3289e-04, 2.0128e-04, 1.2478e-04, 2.5921e-04, 1.5543e-04,\n 3.1332e-04, 2.6528e-04, 1.4109e-04, 2.2974e-04, 2.5120e-04, 1.4809e-04,\n 2.2003e-04, 2.3857e-04, 2.3064e-04, 1.6503e-04, 1.3922e-04, 1.0394e-04,\n 1.4860e-04, 3.9225e-04, 3.1138e-04, 2.2582e-04, 9.1337e-05, 6.5899e-04,\n 2.2678e-04, 1.7558e-04, 2.4293e-04, 2.6396e-04, 4.2002e-04, 2.2882e-04,\n 2.7334e-04, 2.5348e-04, 2.2958e-04, 5.4271e-04, 2.3043e-04, 2.8752e-04,\n 2.1935e-04, 1.9002e-04, 2.4694e-04, 4.0163e-04, 1.5262e-04, 4.6743e-04,\n 1.8029e-04, 2.4324e-04, 2.4924e-04, 3.2553e-04, 2.5950e-04, 1.6002e-04,\n 2.0306e-04, 1.0342e-04, 1.8190e-04, 2.4937e-04, 1.5075e-04, 3.3851e-04,\n 1.8646e-04, 2.8951e-04, 4.9521e-04, 4.0678e-04, 3.8927e-04, 5.9921e-04,\n 2.0666e-04, 2.4853e-04, 4.8197e-04, 4.5726e-04, 1.9299e-04, 2.4434e-04,\n 3.5836e-04, 1.7591e-04, 3.4120e-04, 1.5422e-04, 2.4543e-04, 3.2701e-04,\n 1.5798e-04, 1.0539e-04, 1.6610e-04, 2.0235e-04, 2.7505e-04, 1.9240e-04,\n 2.1869e-04, 1.7155e-04, 1.0007e-04, 3.4244e-04, 2.1689e-04, 2.7502e-04,\n 9.7171e-05, 3.1437e-04, 1.3214e-04, 2.3058e-04, 9.6839e-05, 1.0418e-04,\n 3.5103e-04, 3.4837e-04, 2.9733e-04, 3.2148e-04, 3.3225e-04, 1.2529e-04,\n 4.2702e-04, 1.3237e-04, 1.2800e-04, 4.3447e-04, 2.4389e-04, 1.0960e-04,\n 1.1660e-04, 1.8527e-04, 8.5047e-05, 2.1953e-04, 2.0334e-04, 3.6732e-04,\n 2.9196e-04, 1.3129e-04, 2.1837e-04, 1.2133e-04, 1.3543e-04, 2.2047e-04,\n 1.7469e-04, 2.2291e-04, 5.1745e-04, 2.1099e-04, 1.0539e-03, 1.4955e-04,\n 4.4444e-04, 1.7713e-04, 1.9554e-04, 8.9782e-05, 1.5540e-04, 2.7097e-04,\n 1.3307e-04, 1.1057e-04, 1.6956e-04, 9.9253e-05, 2.5603e-04, 1.4224e-04,\n 8.4534e-05, 1.1404e-04, 1.3456e-04, 1.8831e-04, 1.5662e-04, 1.0634e-04,\n 4.9993e-05, 1.8743e-04, 1.6331e-04, 1.5691e-04, 2.6667e-04, 1.1398e-04,\n 2.6188e-04, 2.2513e-04, 3.6568e-05, 1.1744e-04, 2.3619e-04, 2.6153e-04,\n 9.9621e-05, 8.4062e-04, 2.5118e-04, 4.9116e-04, 1.3685e-04, 1.7223e-04,\n 1.9979e-04, 1.8497e-04, 2.1166e-04, 2.9724e-04, 1.1470e-04, 1.1646e-04,\n 7.5156e-05, 1.4492e-04, 2.0138e-04, 1.1632e-04, 6.1845e-04, 2.8370e-04,\n 1.0583e-04, 1.5957e-04, 2.2859e-04, 1.3707e-09, 1.3931e-04, 3.4898e-04,\n 1.2292e-04, 3.2260e-04, 5.1706e-04, 1.9693e-04, 2.1296e-04, 2.9837e-04,\n 1.3494e-04, 1.2751e-04, 2.9674e-04, 1.5472e-04, 3.2296e-04, 1.5250e-04,\n 3.2038e-04, 2.4248e-04, 1.3693e-04, 2.2018e-04, 2.0124e-04, 1.5068e-04,\n 1.0746e-04, 1.9961e-04, 2.2751e-04, 2.5767e-04, 1.9738e-04, 1.6575e-04,\n 2.1103e-04, 3.3630e-04, 2.3649e-04, 2.1140e-04, 2.1562e-04, 3.7970e-04,\n 3.8372e-04, 3.2390e-04, 9.8330e-05, 2.0977e-04, 1.0183e-04, 1.4657e-04,\n 2.6684e-04, 2.2827e-04, 2.1658e-04, 2.0837e-04, 2.1404e-04, 1.1695e-04,\n 1.4280e-04, 1.7050e-04, 1.7871e-04, 2.5609e-04, 2.6697e-04, 1.0665e-04,\n 4.9082e-04, 2.2943e-04, 9.0256e-05, 7.5829e-05, 1.5764e-04, 9.1415e-05,\n 2.3180e-04, 1.6711e-04, 1.6639e-04, 1.4911e-04, 3.6989e-04, 2.7207e-04,\n 1.3729e-04, 1.2868e-04, 3.6406e-04, 1.4793e-04, 1.7778e-04, 2.2485e-04,\n 1.5697e-04, 1.7988e-04, 5.6074e-04, 4.5800e-04, 3.8158e-04, 2.2825e-04,\n 1.9153e-04, 2.5237e-04, 1.4238e-04, 3.7005e-04, 2.0609e-04, 1.1899e-04,\n 2.0153e-04, 2.3342e-04, 5.0608e-04, 3.7491e-04, 1.7803e-04, 3.1476e-04,\n 4.1593e-04, 3.2778e-04, 1.9930e-04, 2.9670e-04, 1.4346e-04, 2.7940e-04,\n 3.2241e-04, 2.7470e-04, 6.6464e-05, 1.5764e-04, 1.9159e-04, 1.7604e-04,\n 7.6527e-05, 1.5052e-04, 2.7529e-04, 2.3824e-04, 1.8140e-04, 1.2213e-04,\n 2.1225e-04, 3.5921e-04, 1.4082e-04, 2.7669e-04, 2.1180e-04, 1.4551e-04,\n 1.5996e-04, 2.3903e-04, 2.2410e-04, 2.1935e-04, 1.9661e-04, 2.0632e-04,\n 2.5160e-04, 1.7761e-04, 2.7282e-04, 1.2139e-04, 9.8291e-05, 1.3392e-04,\n 1.1625e-04, 5.3406e-04, 3.1025e-04, 5.0269e-04, 1.8106e-04, 2.1198e-04,\n 6.4041e-05, 3.9356e-04, 1.0794e-04, 1.4122e-04, 1.5898e-04, 2.1411e-04,\n 3.7174e-04, 1.6141e-04, 1.6048e-04, 2.2685e-04, 1.3599e-04, 1.3592e-04,\n 3.0076e-04, 1.0859e-04, 2.2861e-04, 1.6198e-04, 2.3352e-04, 1.9115e-04,\n 2.2450e-04, 2.9334e-04, 1.3659e-04, 1.5479e-04, 1.9806e-04, 1.6236e-04,\n 1.2167e-04, 1.2241e-04, 2.3649e-04, 5.9021e-04, 1.4529e-04, 8.3922e-05,\n 1.5866e-04, 5.0911e-04, 1.6184e-04, 3.1629e-04, 2.1776e-04, 1.1951e-04,\n 3.6609e-04, 1.5643e-04, 1.7705e-04, 1.4376e-04, 4.8178e-04, 4.7578e-05,\n 2.6379e-04, 2.5982e-04, 2.5753e-04, 5.2925e-05, 3.8073e-04, 1.9564e-04,\n 1.9923e-04, 9.9014e-04, 4.7281e-04, 3.6127e-04, 2.4566e-04, 1.4836e-04,\n 2.1190e-04, 1.7642e-04, 2.8058e-04, 2.7598e-04, 6.5383e-04, 2.5623e-04,\n 1.6205e-04, 1.7929e-04, 2.8630e-04, 1.6216e-04, 1.9880e-04, 1.2744e-04,\n 1.5023e-04, 2.3484e-04, 4.4529e-04, 2.4379e-04, 2.6895e-04, 4.1788e-04,\n 1.9983e-04, 1.0177e-04, 3.5036e-04, 1.8450e-04, 2.1038e-04, 5.8356e-04,\n 1.8593e-04, 2.6055e-04, 3.2602e-04, 1.7075e-04, 2.2287e-04, 2.5922e-04,\n 3.3419e-04, 1.3153e-04, 1.8253e-04, 4.4392e-04, 2.8395e-04, 1.4203e-04,\n 3.0093e-04, 1.8613e-04, 1.4878e-04, 1.6851e-04, 2.4667e-04, 1.6634e-04,\n 2.2706e-04, 1.8400e-04, 1.5805e-04, 1.6442e-04, 1.4213e-04, 2.3850e-04,\n 3.2546e-04, 3.4333e-04, 2.0448e-04, 3.4677e-04, 2.4907e-04, 1.3603e-04,\n 2.6762e-04, 2.6344e-04, 2.3707e-04, 2.6883e-04, 5.0835e-04, 1.3848e-04,\n 8.9981e-05, 8.6502e-04, 1.8899e-04, 1.1267e-04, 1.9643e-04, 9.6222e-05,\n 1.0926e-04, 2.3241e-04, 3.4771e-04, 1.5404e-04, 1.3191e-04, 1.3667e-04,\n 3.4614e-04, 2.0409e-04, 1.7049e-04, 3.7157e-04, 1.9776e-04, 2.2904e-04,\n 1.4937e-04, 2.2724e-04, 1.9668e-04, 1.4805e-04, 1.5789e-04, 4.2007e-04,\n 8.9806e-05, 2.6460e-04, 9.9957e-05, 1.6949e-04, 8.5408e-05, 1.1057e-04,\n 1.2282e-04, 3.1482e-10], device='cuda:0')"
19
+ },
20
+ "3": {
21
+ "step": "tensor(11262.)",
22
+ "exp_avg": "tensor([-1.5601e-03, -2.7137e-03, -2.5694e-03, 1.3687e-03, -7.2784e-04,\n 1.3940e-03, 2.5005e-03, -2.4382e-03, -1.7829e-03, 3.3241e-03,\n 3.8000e-03, 2.9948e-03, -3.8303e-03, -1.0549e-03, 7.5320e-04,\n -2.6816e-03, -1.4596e-03, 3.1982e-05, -2.8980e-03, -1.0070e-03,\n 1.8870e-03, 2.8147e-04, 1.5104e-03, -1.3933e-03, 4.7815e-03,\n -3.3008e-04, -8.5010e-04, -2.9928e-03, 2.2006e-03, 2.2226e-03,\n 2.0358e-03, -1.6764e-03, 2.3680e-04, -1.3496e-03, -1.3734e-03,\n 2.9866e-04, 5.0790e-04, -3.9938e-04, -1.5516e-03, 1.0692e-03,\n 8.9063e-04, 2.1665e-04, 5.6052e-45, 1.7924e-03, 2.5745e-03,\n 2.1365e-03, 1.7422e-03, 5.6950e-04, 1.7737e-03, 6.9404e-03,\n -2.7409e-03, -1.4628e-03, 1.8771e-03, -3.7301e-03, -1.6481e-03,\n 1.8146e-03, -2.2376e-03, 1.1888e-03, -1.6700e-03, 3.8630e-04,\n -3.7689e-04, 8.6349e-04, -1.2764e-03, 7.7559e-04, -4.0782e-03,\n -1.3337e-03, 6.3696e-04, 1.8349e-04, -4.8381e-04, -1.5061e-03,\n -5.4320e-03, -2.3399e-03, 1.4935e-03, 2.1549e-03, -1.0798e-03,\n 2.6417e-03, 1.2792e-03, 3.2554e-04, 6.9259e-06, 3.4585e-03,\n -1.6721e-03, 1.1080e-03, 1.0163e-03, 1.3483e-03, -3.0600e-03,\n 1.8635e-03, 1.9516e-03, -1.3552e-03, -1.1381e-03, 2.1241e-03,\n -1.7032e-03, -5.4721e-04, -2.1044e-03, -1.2401e-03, 2.6145e-04,\n -1.0219e-03, -2.5484e-03, -8.1730e-04, 3.1231e-03, 1.9533e-04,\n -2.4566e-03, -9.3648e-04, 1.2388e-04, -5.0295e-04, 2.1907e-03,\n 1.5544e-03, -2.1028e-04, 3.1905e-03, -3.5852e-04, 1.9314e-03,\n -7.3255e-04, -3.4666e-03, -6.6665e-04, 9.1960e-04, -5.0591e-03,\n 2.2181e-05, 6.9340e-04, 1.3409e-03, 2.6542e-03, 1.9558e-03,\n 8.3761e-04, -9.8218e-04, -3.5041e-03, -1.8421e-03, -1.0492e-04,\n -1.0396e-03, 2.8493e-03, 4.3439e-03, -1.2487e-04, -1.7223e-03,\n -6.0166e-04, 2.2691e-03, 4.4085e-04, 5.4240e-06, 8.1918e-04,\n -2.3547e-04, -3.7652e-04, 1.7535e-03, 3.9360e-04, 1.7165e-03,\n 1.5850e-03, 2.5528e-04, 4.5262e-04, 9.4983e-04, 1.1653e-03,\n -1.1241e-03, 2.4784e-03, 1.3750e-03, 2.8264e-04, 1.9409e-03,\n -1.7383e-05, 8.7834e-04, -2.9346e-03, -1.2514e-03, -1.7227e-03,\n 3.5488e-03, -2.7287e-03, -6.7608e-04, 9.9781e-04, -1.3983e-03,\n 5.3979e-04, 1.1045e-03, -1.5328e-03, 1.1158e-03, 1.2648e-03,\n -2.2307e-03, -6.6756e-04, 2.3250e-03, 1.0553e-05, 2.2732e-03,\n 9.3956e-04, 1.5667e-03, -1.7041e-03, -2.3324e-03, 1.7202e-03,\n 7.3663e-05, -5.0600e-05, -3.2178e-04, 3.3367e-03, 8.8608e-04,\n 1.7552e-03, 9.9632e-04, -1.2399e-03, 1.4765e-04, -1.9797e-04,\n 7.2090e-04, -1.2034e-03, -4.2972e-05, 2.4201e-03, -3.7081e-05,\n 2.1351e-03, -1.1978e-03, 1.0786e-03, -7.2501e-04, 1.0409e-03,\n -4.1492e-05, 1.3271e-03, 1.1469e-03, 5.3198e-04, -9.5302e-04,\n -3.6357e-03, -9.0129e-04, 1.6499e-03, 1.7868e-03, 1.5258e-03,\n -2.2020e-03, -5.6184e-04, 1.7095e-03, -2.8686e-04, -1.6996e-03,\n 2.2172e-03, -4.9734e-04, 1.0310e-03, 1.9417e-03, -1.1164e-04,\n -2.4582e-03, 5.6460e-04, -5.7146e-04, 3.6416e-03, -1.6317e-03,\n 7.2642e-04, 2.7691e-03, 1.0570e-03, -6.3976e-04, 1.6407e-03,\n 1.2322e-03, -2.9007e-04, 1.2182e-03, -5.4158e-04, 1.6365e-03,\n 1.4352e-03, 3.7827e-04, 5.2828e-04, 8.3237e-04, 2.0402e-04,\n -1.6188e-03, -2.7719e-03, 5.6052e-45, -1.1652e-03, -7.0972e-04,\n 5.4487e-04, 8.9659e-04, -1.7703e-03, 1.7259e-04, -2.4445e-03,\n 2.3177e-03, -1.3090e-03, -9.9171e-04, -1.9136e-03, -1.7041e-03,\n 3.4053e-03, 8.1076e-04, -1.0398e-03, 3.4696e-03, -7.4722e-05,\n -4.3249e-04, 1.1213e-03, -4.0610e-04, 1.0740e-03, -1.4486e-04,\n 4.5937e-03, 1.6120e-03, 7.2189e-04, 4.4445e-04, -2.7097e-03,\n -1.7412e-03, -1.9128e-03, 1.7867e-04, 3.7019e-03, -1.7019e-03,\n 5.2836e-04, 2.5443e-03, 1.0072e-03, -1.2997e-04, -5.4547e-04,\n 1.2637e-03, 6.0989e-04, 3.9975e-03, 2.7822e-05, -1.8465e-03,\n 6.2542e-04, 2.4942e-03, 2.9591e-04, 7.7028e-04, 1.5481e-03,\n -8.7528e-04, -2.2298e-03, -1.7591e-03, -1.7360e-03, 1.1714e-03,\n -9.6943e-04, -5.4546e-04, 3.8049e-03, -1.7561e-04, -9.3024e-04,\n -1.2161e-03, -4.4666e-03, -1.6031e-04, 6.3971e-03, 6.2885e-04,\n 1.2545e-04, -3.7994e-04, 1.8660e-03, 5.4930e-04, 7.1931e-04,\n 2.2108e-03, -6.8960e-04, -8.1760e-04, 1.1610e-03, 4.9232e-04,\n -1.9407e-03, -1.9141e-03, 1.1418e-03, -1.0351e-03, -9.1975e-04,\n -1.1962e-03, -1.5443e-03, 3.2180e-03, -1.8554e-03, 1.6122e-03,\n 1.5148e-03, 1.5953e-03, -2.8476e-03, -1.0108e-04, 3.7558e-03,\n -5.0126e-03, -2.1199e-03, 2.6691e-03, -1.1858e-03, 1.2075e-03,\n -3.3251e-03, -1.7822e-03, 1.0029e-04, 3.9815e-03, -3.5435e-03,\n 4.8926e-04, -1.5415e-03, 2.6215e-03, 2.8570e-03, -1.6738e-03,\n 1.1884e-03, 2.3238e-03, 2.6745e-03, 1.1452e-03, 2.6080e-04,\n 2.1027e-03, -9.6783e-04, 3.5810e-05, 1.0721e-03, 7.9256e-04,\n 1.2837e-03, -2.0167e-03, 4.2944e-04, -9.3541e-04, -1.9617e-03,\n -2.0754e-03, -5.3060e-03, 1.4522e-03, -2.3678e-03, -1.0589e-03,\n 2.1809e-03, 9.2100e-04, -2.2307e-03, 3.3136e-03, 2.3962e-03,\n 2.2174e-03, 2.3272e-03, -3.3886e-03, -2.9256e-03, -1.7585e-03,\n 8.8359e-05, 3.9552e-03, 1.0281e-03, -3.0283e-03, 1.6900e-03,\n 1.5578e-03, -2.2375e-03, -1.3610e-03, -1.8819e-03, -1.4817e-03,\n -3.8170e-03, -1.3253e-03, -3.1024e-03, 3.0680e-04, -9.1195e-04,\n 3.0943e-03, 4.4853e-04, 7.2554e-04, 1.0450e-03, 2.1890e-03,\n -1.0322e-03, 3.5637e-03, -3.5161e-03, -3.3515e-03, 1.3343e-03,\n 8.7015e-04, -3.3140e-04, -1.2069e-03, -1.6019e-03, 2.6258e-04,\n -4.4531e-03, -1.1428e-03, 1.7805e-03, 9.8714e-04, -2.6945e-03,\n 1.7787e-03, -2.4282e-03, -3.5153e-04, -7.3938e-05, 2.2864e-03,\n -2.0880e-03, 8.3916e-04, 5.1187e-05, -8.4900e-04, 2.4691e-04,\n -4.4556e-03, 1.9020e-03, 6.0159e-03, 3.6901e-03, 1.5354e-03,\n -2.8476e-03, -2.1203e-03, -1.9970e-03, -1.9098e-03, -2.9456e-03,\n 1.1029e-03, -1.8288e-04, -1.5313e-03, -3.5359e-04, 4.0771e-04,\n 1.8256e-03, 1.9200e-03, 1.0851e-03, 1.6194e-03, -1.0850e-03,\n 7.3538e-04, 2.0108e-03, -3.2045e-03, -1.3253e-03, -3.1734e-03,\n -1.9056e-03, -3.0256e-03, -1.7194e-03, -2.3430e-03, -9.7684e-04,\n 2.7238e-04, -4.1829e-04, 1.4042e-03, 2.3650e-03, -1.8151e-04,\n 3.9374e-03, -9.2523e-04, 9.8843e-04, -2.9046e-03, 4.2293e-04,\n 5.5936e-04, -3.1151e-03, -2.1916e-03, -1.0851e-03, 3.1207e-04,\n 9.5898e-04, -1.2187e-03, 1.7448e-03, 1.2587e-03, -1.0304e-04,\n -2.5067e-03, 9.6123e-04, -1.8083e-04, -7.5167e-05, -6.9875e-04,\n 3.4013e-04, -9.7343e-04, 4.0843e-06, -1.6400e-04, 5.3867e-05,\n -1.2476e-03, 2.0023e-03, 3.7016e-04, -1.7069e-03, -1.1234e-03,\n -2.9806e-04, -3.8543e-04, 2.0532e-03, -1.3676e-03, 3.1693e-03,\n 2.0815e-03, -1.2164e-03, 1.8292e-03, -5.7838e-03, -1.4371e-03,\n 1.4224e-03, 1.7215e-03, 2.1543e-03, 5.0128e-03, -1.8667e-04,\n -4.5204e-04, -9.6684e-05, 2.2823e-04, 2.5150e-03, -1.2292e-03,\n -7.4802e-05, 1.9751e-03, 1.9624e-03, 4.9433e-04, 3.6077e-04,\n 8.7611e-04, -1.9920e-03, 2.9205e-03, 1.1169e-04, 1.1486e-03,\n 3.3898e-04, 5.6052e-45], device='cuda:0')",
23
+ "exp_avg_sq": "tensor([7.9966e-05, 5.1980e-05, 3.8042e-05, 4.8337e-05, 3.6115e-05, 9.5957e-05,\n 5.0984e-05, 1.0257e-04, 7.0302e-05, 8.5363e-05, 1.0258e-04, 1.0088e-04,\n 1.1925e-04, 5.1618e-05, 8.0283e-05, 6.9749e-05, 4.5397e-05, 7.1996e-05,\n 5.9935e-05, 8.8604e-05, 8.2509e-05, 3.9996e-05, 4.3334e-05, 8.1743e-05,\n 1.6655e-04, 7.9729e-05, 5.1822e-05, 9.4460e-05, 7.3695e-05, 6.0947e-05,\n 4.5611e-05, 5.6501e-05, 8.6006e-05, 5.5835e-05, 3.5858e-05, 5.3059e-05,\n 3.5657e-05, 3.9793e-05, 4.0024e-05, 3.8295e-05, 3.7145e-05, 3.3925e-05,\n 3.6228e-11, 9.2380e-05, 1.5393e-04, 4.3784e-05, 6.8786e-05, 1.9368e-05,\n 1.0337e-04, 1.4451e-04, 1.4335e-04, 3.0841e-05, 4.6206e-05, 6.6669e-05,\n 1.0087e-04, 4.2221e-05, 6.8714e-05, 6.6785e-05, 5.0580e-05, 3.1118e-05,\n 9.3857e-05, 4.6432e-05, 7.0247e-05, 4.2039e-05, 3.7877e-05, 4.1359e-05,\n 5.8162e-05, 3.8843e-05, 6.6200e-05, 4.7174e-05, 6.8910e-05, 6.5559e-05,\n 5.1703e-05, 5.5014e-05, 5.0538e-05, 5.3091e-05, 4.9452e-05, 4.8519e-05,\n 7.3095e-05, 7.9937e-05, 7.1481e-05, 5.5988e-05, 4.8349e-05, 2.9897e-05,\n 4.8736e-05, 7.9313e-05, 8.1117e-05, 5.6987e-05, 3.0978e-05, 9.2762e-05,\n 5.3137e-05, 6.0984e-05, 8.8243e-05, 6.8840e-05, 8.3279e-05, 6.0717e-05,\n 9.2867e-05, 6.5442e-05, 7.2012e-05, 6.2794e-05, 5.4280e-05, 7.9182e-05,\n 8.3567e-05, 5.6467e-05, 8.1203e-05, 1.2959e-04, 5.2975e-05, 1.7900e-04,\n 4.9857e-05, 5.2999e-05, 8.2467e-05, 8.5463e-05, 6.7539e-05, 5.9590e-05,\n 7.4968e-05, 3.8871e-05, 7.3273e-05, 8.4842e-05, 4.2607e-05, 7.8237e-05,\n 3.9115e-05, 8.3781e-05, 9.1513e-05, 1.4315e-04, 7.7045e-05, 1.0398e-04,\n 5.4936e-05, 8.1529e-05, 1.1231e-04, 1.5336e-04, 7.0591e-05, 5.1141e-05,\n 9.3448e-05, 5.1588e-05, 1.8498e-04, 4.6713e-05, 8.6761e-05, 7.4301e-05,\n 4.8846e-05, 3.7574e-05, 3.8659e-05, 5.6510e-05, 6.8367e-05, 1.0322e-04,\n 5.8965e-05, 5.6823e-05, 3.3379e-05, 9.5127e-05, 6.0573e-05, 1.2573e-04,\n 3.0080e-05, 9.3865e-05, 3.6539e-05, 9.0331e-05, 3.3739e-05, 5.2206e-05,\n 1.0396e-04, 8.0620e-05, 5.1209e-05, 1.1287e-04, 7.9050e-05, 2.5938e-05,\n 9.3988e-05, 5.0930e-05, 2.7018e-05, 1.2249e-04, 5.8501e-05, 3.2652e-05,\n 2.2514e-05, 5.6627e-05, 3.6670e-05, 7.6181e-05, 5.3770e-05, 1.0389e-04,\n 7.4609e-05, 3.0506e-05, 1.1778e-04, 3.9417e-05, 3.3096e-05, 8.7546e-05,\n 5.3750e-05, 5.5404e-05, 1.5661e-04, 6.0822e-05, 1.3851e-04, 4.9893e-05,\n 1.6334e-04, 5.3550e-05, 6.0228e-05, 2.2818e-05, 5.3953e-05, 7.2502e-05,\n 5.8032e-05, 4.9767e-05, 3.7612e-05, 2.5448e-05, 9.1077e-05, 4.9190e-05,\n 2.0898e-05, 3.0701e-05, 4.3411e-05, 6.6282e-05, 5.3608e-05, 3.2653e-05,\n 2.8011e-05, 5.3844e-05, 4.1083e-05, 3.8519e-05, 7.9605e-05, 4.5361e-05,\n 6.4505e-05, 7.6475e-05, 2.8771e-05, 4.0909e-05, 7.3418e-05, 4.6857e-05,\n 3.3863e-05, 8.2200e-05, 5.9866e-05, 1.0005e-04, 3.7599e-05, 4.8621e-05,\n 4.9376e-05, 3.7425e-05, 5.1560e-05, 8.9818e-05, 3.4917e-05, 3.2484e-05,\n 2.6248e-05, 4.8144e-05, 4.3685e-05, 3.7472e-05, 1.2169e-04, 7.1397e-05,\n 4.0846e-05, 5.1589e-05, 5.6053e-05, 4.7995e-11, 4.8544e-05, 1.4573e-04,\n 4.3496e-05, 1.1011e-04, 7.9539e-05, 7.1574e-05, 1.1118e-04, 7.9551e-05,\n 5.9564e-05, 4.1096e-05, 8.0599e-05, 4.6039e-05, 1.1533e-04, 3.8230e-05,\n 7.8031e-05, 8.3880e-05, 3.9568e-05, 6.3194e-05, 6.8169e-05, 6.8249e-05,\n 4.8256e-05, 6.0112e-05, 5.2866e-05, 8.4468e-05, 5.2304e-05, 7.3406e-05,\n 5.7010e-05, 7.3886e-05, 4.1158e-05, 6.7199e-05, 5.1107e-05, 8.4861e-05,\n 8.2142e-05, 8.1444e-05, 3.5788e-05, 6.0043e-05, 3.6736e-05, 4.6916e-05,\n 9.6615e-05, 9.1477e-05, 5.7444e-05, 5.0243e-05, 6.1542e-05, 3.7778e-05,\n 4.2799e-05, 6.6532e-05, 4.7839e-05, 7.6029e-05, 6.7062e-05, 3.4029e-05,\n 1.1604e-04, 6.4860e-05, 3.0789e-05, 3.0622e-05, 6.0767e-05, 3.4957e-05,\n 5.6079e-05, 3.9751e-05, 5.4436e-05, 6.8501e-05, 1.9235e-04, 7.1055e-05,\n 4.1584e-05, 3.8645e-05, 6.7652e-05, 4.7022e-05, 7.6654e-05, 4.6120e-05,\n 4.6853e-05, 6.1196e-05, 1.3920e-04, 8.4065e-05, 9.1199e-05, 5.8681e-05,\n 4.3028e-05, 4.4889e-05, 5.9704e-05, 6.8785e-05, 5.9507e-05, 3.1233e-05,\n 5.0933e-05, 8.9957e-05, 1.7530e-04, 1.0938e-04, 6.2248e-05, 8.4178e-05,\n 1.1189e-04, 1.3230e-04, 6.7090e-05, 9.5267e-05, 5.6797e-05, 6.2527e-05,\n 1.0772e-04, 7.3321e-05, 1.9043e-05, 4.8008e-05, 5.1021e-05, 5.3215e-05,\n 3.6645e-05, 3.1625e-05, 9.0825e-05, 6.0499e-05, 5.9290e-05, 4.0658e-05,\n 5.7553e-05, 1.0517e-04, 6.8352e-05, 1.1587e-04, 7.9529e-05, 4.9449e-05,\n 6.2019e-05, 8.6822e-05, 6.2308e-05, 5.6156e-05, 6.8126e-05, 4.2428e-05,\n 6.3114e-05, 5.1567e-05, 6.6745e-05, 3.3371e-05, 3.7000e-05, 3.1840e-05,\n 3.0272e-05, 1.5271e-04, 1.2770e-04, 8.9714e-05, 5.5806e-05, 6.3108e-05,\n 2.5437e-05, 9.4332e-05, 3.1335e-05, 3.5143e-05, 4.1560e-05, 6.4386e-05,\n 7.2389e-05, 6.0609e-05, 6.8877e-05, 5.4411e-05, 4.0655e-05, 3.3098e-05,\n 6.6132e-05, 3.7969e-05, 5.9341e-05, 5.6685e-05, 7.3559e-05, 4.1820e-05,\n 6.5648e-05, 1.3357e-04, 4.7597e-05, 6.2148e-05, 5.3487e-05, 5.8185e-05,\n 5.2054e-05, 4.3228e-05, 7.2328e-05, 1.1026e-04, 5.1908e-05, 2.6989e-05,\n 3.3073e-05, 1.1134e-04, 5.9867e-05, 1.0653e-04, 5.5609e-05, 3.5182e-05,\n 9.5932e-05, 4.2643e-05, 5.6175e-05, 5.1553e-05, 1.1728e-04, 1.8956e-05,\n 6.9493e-05, 9.1726e-05, 6.9291e-05, 3.6632e-05, 1.2708e-04, 7.7432e-05,\n 4.6496e-05, 8.2611e-05, 1.2031e-04, 9.9610e-05, 7.9219e-05, 6.8129e-05,\n 6.4793e-05, 4.6599e-05, 7.8830e-05, 7.4470e-05, 1.0422e-04, 5.7375e-05,\n 7.4938e-05, 4.9964e-05, 6.6354e-05, 4.7042e-05, 4.8135e-05, 3.9953e-05,\n 6.2767e-05, 6.8906e-05, 7.5842e-05, 6.3660e-05, 7.8614e-05, 8.9747e-05,\n 5.3038e-05, 3.9621e-05, 9.6984e-05, 6.5548e-05, 5.7007e-05, 2.4710e-04,\n 7.2322e-05, 6.1321e-05, 8.0947e-05, 6.4385e-05, 7.4309e-05, 7.1873e-05,\n 7.1314e-05, 4.6217e-05, 6.1453e-05, 1.6699e-04, 8.3100e-05, 3.2438e-05,\n 1.3535e-04, 5.5716e-05, 3.9852e-05, 6.2000e-05, 8.0328e-05, 5.0720e-05,\n 5.3831e-05, 5.8944e-05, 5.9366e-05, 4.3835e-05, 4.2451e-05, 6.9133e-05,\n 7.6440e-05, 8.7080e-05, 6.3170e-05, 1.8459e-04, 8.1594e-05, 7.2582e-05,\n 9.5871e-05, 6.9516e-05, 7.1750e-05, 6.7385e-05, 8.6309e-05, 3.7863e-05,\n 3.4631e-05, 1.1066e-04, 6.9036e-05, 3.5644e-05, 5.3421e-05, 5.4410e-05,\n 5.3269e-05, 6.1536e-05, 9.6380e-05, 5.7837e-05, 4.8601e-05, 5.0161e-05,\n 8.0323e-05, 7.2128e-05, 4.4709e-05, 7.1943e-05, 6.1025e-05, 5.9979e-05,\n 4.5788e-05, 5.6831e-05, 5.4524e-05, 4.1336e-05, 5.0011e-05, 1.8770e-04,\n 3.4429e-05, 6.2511e-05, 3.8308e-05, 5.9126e-05, 4.7405e-05, 5.7981e-05,\n 4.4551e-05, 8.4544e-12], device='cuda:0')"
24
+ },
25
+ "4": {
26
+ "step": "tensor(11262.)",
27
+ "exp_avg": "tensor([[-4.8298e-06, -1.5772e-05, 1.1096e-05, ..., -5.6297e-06,\n 1.6031e-06, 5.6052e-45],\n [ 7.2220e-06, -8.9088e-06, 2.7805e-06, ..., -2.0629e-06,\n -2.3938e-05, 5.6052e-45],\n [ 1.0664e-05, 2.5490e-06, 6.9964e-06, ..., -2.4278e-05,\n 1.0064e-06, 5.6052e-45],\n ...,\n [ 3.4498e-06, -7.4226e-06, -2.3703e-05, ..., -4.6634e-05,\n -1.9466e-05, -5.6052e-45],\n [ 5.1472e-06, 4.9359e-05, 1.8200e-05, ..., -3.3457e-05,\n -6.3454e-06, -5.6052e-45],\n [ 1.5525e-05, -4.0262e-06, 1.9213e-05, ..., -4.2439e-05,\n -1.8978e-05, -5.6052e-45]], device='cuda:0')",
28
+ "exp_avg_sq": "tensor([[1.8577e-09, 4.1241e-09, 1.7344e-09, ..., 3.1889e-09, 1.1422e-09,\n 3.3041e-12],\n [3.1972e-09, 6.4718e-09, 3.7467e-09, ..., 9.3186e-09, 5.3947e-09,\n 1.0792e-12],\n [2.8820e-09, 4.2344e-09, 4.0620e-09, ..., 7.9349e-09, 1.8628e-09,\n 1.0557e-12],\n ...,\n [3.9377e-09, 8.8852e-09, 3.4207e-09, ..., 1.0908e-08, 2.6426e-09,\n 2.4442e-12],\n [4.0449e-09, 1.2772e-08, 3.6352e-09, ..., 6.9773e-09, 5.0601e-09,\n 3.2608e-12],\n [4.9892e-09, 5.7934e-09, 2.9977e-09, ..., 8.0983e-09, 4.1906e-09,\n 1.6918e-12]], device='cuda:0')"
29
+ },
30
+ "5": {
31
+ "step": "tensor(3754.)",
32
+ "exp_avg": "tensor([[-6.3968e-06, -3.6907e-05, -3.8804e-06, ..., 1.5208e-05,\n 1.0177e-06, 0.0000e+00],\n [ 8.6372e-06, -2.5307e-05, -1.6623e-05, ..., -6.7706e-06,\n -2.8432e-05, 0.0000e+00],\n [ 1.1897e-05, 3.7288e-05, 8.6056e-06, ..., -5.2612e-05,\n -1.4793e-05, 0.0000e+00],\n ...,\n [-5.0890e-06, 6.1809e-06, -3.3292e-05, ..., -3.9815e-05,\n 1.6332e-05, 0.0000e+00],\n [ 2.0148e-05, 2.3317e-05, 1.1576e-05, ..., 2.3579e-05,\n -7.0544e-07, 0.0000e+00],\n [ 6.1205e-06, 4.0548e-05, 1.9448e-05, ..., -1.5858e-05,\n -3.4978e-05, 0.0000e+00]], device='cuda:0')",
33
+ "exp_avg_sq": "tensor([[5.5686e-09, 6.2557e-09, 2.5712e-09, ..., 4.9927e-09, 3.7908e-09,\n 0.0000e+00],\n [4.8561e-09, 1.4821e-08, 7.8047e-09, ..., 2.3532e-08, 9.9939e-09,\n 0.0000e+00],\n [6.2160e-09, 1.0547e-08, 7.0683e-09, ..., 2.0253e-08, 5.7153e-09,\n 0.0000e+00],\n ...,\n [1.2963e-08, 1.0113e-08, 1.1602e-08, ..., 3.4012e-08, 6.3986e-09,\n 0.0000e+00],\n [5.6864e-09, 1.3215e-08, 5.9178e-09, ..., 1.1426e-08, 6.0170e-09,\n 0.0000e+00],\n [1.0390e-08, 9.5131e-09, 5.5627e-09, ..., 1.2499e-08, 1.0134e-08,\n 0.0000e+00]], device='cuda:0')"
34
+ },
35
+ "6": {
36
+ "step": "tensor(3754.)",
37
+ "exp_avg": "tensor([-0.0007, 0.0007], device='cuda:0')",
38
+ "exp_avg_sq": "tensor([0.0001, 0.0001], device='cuda:0')"
39
+ }
40
+ },
41
+ "param_groups": [
42
+ {
43
+ "lr": 0.00793913236883622,
44
+ "name": "shared",
45
+ "betas": [
46
+ 0.9,
47
+ 0.999
48
+ ],
49
+ "eps": 1e-08,
50
+ "weight_decay": 1e-05,
51
+ "amsgrad": false,
52
+ "maximize": false,
53
+ "foreach": null,
54
+ "capturable": false,
55
+ "differentiable": false,
56
+ "fused": null,
57
+ "decoupled_weight_decay": true,
58
+ "initial_lr": 0.01,
59
+ "params": [
60
+ 0,
61
+ 1,
62
+ 2,
63
+ 3
64
+ ]
65
+ },
66
+ {
67
+ "lr": 0.00793913236883622,
68
+ "name": "scale_256",
69
+ "betas": [
70
+ 0.9,
71
+ 0.999
72
+ ],
73
+ "eps": 1e-08,
74
+ "weight_decay": 1e-05,
75
+ "amsgrad": false,
76
+ "maximize": false,
77
+ "foreach": null,
78
+ "capturable": false,
79
+ "differentiable": false,
80
+ "fused": null,
81
+ "decoupled_weight_decay": true,
82
+ "initial_lr": 0.01,
83
+ "params": [
84
+ 4
85
+ ]
86
+ },
87
+ {
88
+ "lr": 0.00793913236883622,
89
+ "name": "scale_512",
90
+ "betas": [
91
+ 0.9,
92
+ 0.999
93
+ ],
94
+ "eps": 1e-08,
95
+ "weight_decay": 1e-05,
96
+ "amsgrad": false,
97
+ "maximize": false,
98
+ "foreach": null,
99
+ "capturable": false,
100
+ "differentiable": false,
101
+ "fused": null,
102
+ "decoupled_weight_decay": true,
103
+ "initial_lr": 0.01,
104
+ "params": [
105
+ 5
106
+ ]
107
+ },
108
+ {
109
+ "lr": 0.003969669238105037,
110
+ "name": "fusion",
111
+ "betas": [
112
+ 0.9,
113
+ 0.999
114
+ ],
115
+ "eps": 1e-08,
116
+ "weight_decay": 1e-05,
117
+ "amsgrad": false,
118
+ "maximize": false,
119
+ "foreach": null,
120
+ "capturable": false,
121
+ "differentiable": false,
122
+ "fused": null,
123
+ "decoupled_weight_decay": true,
124
+ "initial_lr": 0.005,
125
+ "params": [
126
+ 6
127
+ ]
128
+ }
129
+ ]
130
+ },
131
+ "scheduler_state_dict": {
132
+ "T_0": 10,
133
+ "T_i": 10,
134
+ "T_mult": 2,
135
+ "eta_min": 1e-06,
136
+ "T_cur": 3,
137
+ "base_lrs": [
138
+ 0.01,
139
+ 0.01,
140
+ 0.01,
141
+ 0.005
142
+ ],
143
+ "last_epoch": 3,
144
+ "_step_count": 0,
145
+ "_is_initial": false,
146
+ "_get_lr_called_within_step": false,
147
+ "_last_lr": [
148
+ 0.00793913236883622,
149
+ 0.00793913236883622,
150
+ 0.00793913236883622,
151
+ 0.003969669238105037
152
+ ]
153
+ },
154
+ "metrics": {
155
+ "best_val_acc": 64.12,
156
+ "best_epoch": 2,
157
+ "scale_accuracies": {
158
+ "256": 63.782,
159
+ "512": 63.839333333333336
160
+ },
161
+ "training_history": {
162
+ "epochs": [
163
+ 1,
164
+ 2,
165
+ 3
166
+ ],
167
+ "train_loss": [
168
+ 3.9435249049420933,
169
+ 3.3040703793567867,
170
+ 4.3101251841734625
171
+ ],
172
+ "train_acc": [
173
+ 54.38726307083047,
174
+ 59.31631083223343,
175
+ 60.291879721118846
176
+ ],
177
+ "val_acc": [
178
+ 61.635333333333335,
179
+ 62.978,
180
+ 64.12
181
+ ],
182
+ "scale_accs": {
183
+ "256": [
184
+ 61.635333333333335,
185
+ 62.978,
186
+ 63.782
187
+ ],
188
+ "512": [
189
+ 63.839333333333336
190
+ ]
191
+ },
192
+ "lr": [
193
+ 0.00975530705321762,
194
+ 0.00904518046337755,
195
+ 0.00793913236883622
196
+ ]
197
+ }
198
+ },
199
+ "train_config": {
200
+ "name": "david_training",
201
+ "run_id": "20251012_235237",
202
+ "dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
203
+ "model_variant": [
204
+ "clip_vit_b16",
205
+ "clip_vit_laion_b32",
206
+ "clip_vit_b32"
207
+ ],
208
+ "num_classes": 1000,
209
+ "preset": "small_fast",
210
+ "custom_config_path": null,
211
+ "num_classes_override": null,
212
+ "use_belly_override": null,
213
+ "belly_expand_override": null,
214
+ "progressive_training_override": true,
215
+ "scale_warmup_epochs_override": {
216
+ "256": 0,
217
+ "512": 2
218
+ },
219
+ "num_epochs": 10,
220
+ "batch_size": 1024,
221
+ "learning_rate": 0.01,
222
+ "weight_decay": 1e-05,
223
+ "warmup_epochs": 3,
224
+ "use_rose_loss": true,
225
+ "rose_initial_weight": 0.1,
226
+ "rose_max_weight": 0.8,
227
+ "rose_weight_schedule": "adaptive",
228
+ "use_cayley_loss": false,
229
+ "cayley_weight": 0.01,
230
+ "scale_loss_balance": null,
231
+ "use_mixed_precision": false,
232
+ "gradient_clip": 15.0,
233
+ "scheduler_type": "cosine_restarts",
234
+ "min_lr": 1e-06,
235
+ "freeze_strategy": "never",
236
+ "freeze_threshold": 90.0,
237
+ "unfreeze_on_plateau": true,
238
+ "patience": 10,
239
+ "track_gradients": true,
240
+ "gradient_scale_threshold": 1e-05,
241
+ "gradient_scale_multiplier": 10.0,
242
+ "log_interval": 50,
243
+ "val_interval": 1,
244
+ "save_interval": 5,
245
+ "log_fusion_weights": true,
246
+ "log_loss_components": true,
247
+ "save_format": "safetensors",
248
+ "hf_repo": "AbstractPhil/david-shared-space",
249
+ "upload_to_hub": true,
250
+ "base_dir": "./david_training",
251
+ "num_workers": 10,
252
+ "pin_memory": true,
253
+ "prefetch_factor": 4,
254
+ "persistent_workers": true
255
+ }
256
+ }