Upload weights and configs - David-fully_shared-weighted_sum - Run 20251012_135249
Browse files- weights/David-fully_shared-weighted_sum/20251012_135249/MODEL_SUMMARY.txt +10 -9
- weights/David-fully_shared-weighted_sum/20251012_135249/best_model_acc71.91.safetensors +3 -0
- weights/David-fully_shared-weighted_sum/20251012_135249/best_model_acc71.91_metadata.json +293 -0
- weights/David-fully_shared-weighted_sum/20251012_135249/training_history.json +14 -7
weights/David-fully_shared-weighted_sum/20251012_135249/MODEL_SUMMARY.txt
CHANGED
|
@@ -3,24 +3,24 @@
|
|
| 3 |
β DAVID MODEL SUMMARY β
|
| 4 |
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£
|
| 5 |
β β
|
| 6 |
-
β π― VALIDATION ACCURACY: 71.
|
| 7 |
β β
|
| 8 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
|
| 10 |
MODEL: David-fully_shared-weighted_sum
|
| 11 |
RUN ID: 20251012_135249
|
| 12 |
-
BEST EPOCH:
|
| 13 |
|
| 14 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
|
| 16 |
π PERFORMANCE BREAKDOWN
|
| 17 |
|
| 18 |
-
Final Training Accuracy:
|
| 19 |
-
Best Validation Accuracy: 71.
|
| 20 |
|
| 21 |
Per-Scale Accuracies:
|
| 22 |
-
β’ Scale 256: 71.
|
| 23 |
-
β’ Scale 512: 71.
|
| 24 |
|
| 25 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
|
|
@@ -46,17 +46,18 @@ Epoch | Train Acc | Val Acc | Learning Rate
|
|
| 46 |
5 | 71.36% | 70.97% | 5.01e-04
|
| 47 |
6 | 71.84% | 71.46% | 3.46e-04
|
| 48 |
7 | 72.29% | 71.77% | 2.07e-04
|
| 49 |
-
8 | 72.75% | 71.78%
|
|
|
|
| 50 |
|
| 51 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 52 |
|
| 53 |
π FILES
|
| 54 |
|
| 55 |
-
Best Model: best_model_acc71.
|
| 56 |
Config: david_config.json
|
| 57 |
Training Cfg: train_config.json
|
| 58 |
History: training_history.json
|
| 59 |
|
| 60 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 61 |
|
| 62 |
-
Generated: 2025-10-12 14:
|
|
|
|
| 3 |
β DAVID MODEL SUMMARY β
|
| 4 |
β βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ£
|
| 5 |
β β
|
| 6 |
+
β π― VALIDATION ACCURACY: 71.91% β
|
| 7 |
β β
|
| 8 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 9 |
|
| 10 |
MODEL: David-fully_shared-weighted_sum
|
| 11 |
RUN ID: 20251012_135249
|
| 12 |
+
BEST EPOCH: 9/10
|
| 13 |
|
| 14 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 15 |
|
| 16 |
π PERFORMANCE BREAKDOWN
|
| 17 |
|
| 18 |
+
Final Training Accuracy: 73.02%
|
| 19 |
+
Best Validation Accuracy: 71.91%
|
| 20 |
|
| 21 |
Per-Scale Accuracies:
|
| 22 |
+
β’ Scale 256: 71.57%
|
| 23 |
+
β’ Scale 512: 71.71%
|
| 24 |
|
| 25 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
|
|
|
|
| 46 |
5 | 71.36% | 70.97% | 5.01e-04
|
| 47 |
6 | 71.84% | 71.46% | 3.46e-04
|
| 48 |
7 | 72.29% | 71.77% | 2.07e-04
|
| 49 |
+
8 | 72.75% | 71.78% | 9.64e-05
|
| 50 |
+
9 | 73.02% | 71.91% π | 2.54e-05
|
| 51 |
|
| 52 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 53 |
|
| 54 |
π FILES
|
| 55 |
|
| 56 |
+
Best Model: best_model_acc71.91.safetensors
|
| 57 |
Config: david_config.json
|
| 58 |
Training Cfg: train_config.json
|
| 59 |
History: training_history.json
|
| 60 |
|
| 61 |
βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 62 |
|
| 63 |
+
Generated: 2025-10-12 14:08:03
|
weights/David-fully_shared-weighted_sum/20251012_135249/best_model_acc71.91.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5cd333eab56f48ed4340554d416262c27ef15daf83daa939444f82a0a6b6b31
|
| 3 |
+
size 2628344
|
weights/David-fully_shared-weighted_sum/20251012_135249/best_model_acc71.91_metadata.json
ADDED
|
@@ -0,0 +1,293 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"epoch": 8,
|
| 3 |
+
"optimizer_state_dict": {
|
| 4 |
+
"state": {
|
| 5 |
+
"0": {
|
| 6 |
+
"step": "tensor(11268.)",
|
| 7 |
+
"exp_avg": "tensor([[ 1.6730e-04, -3.5845e-03, -2.9463e-04, ..., 5.4469e-05,\n 1.8532e-04, 1.8314e-03],\n [ 1.9663e-04, -1.2293e-04, -5.9021e-04, ..., 1.4957e-03,\n 1.7826e-04, 6.0047e-04],\n [-2.0832e-03, 7.9586e-04, -4.6102e-04, ..., 1.6200e-03,\n -7.5037e-05, -7.4932e-04],\n ...,\n [-1.0317e-03, 1.3865e-04, 2.0382e-03, ..., -4.5965e-05,\n -7.8645e-04, 1.7134e-03],\n [-3.6024e-04, -2.3257e-03, 1.8227e-03, ..., 3.0624e-04,\n -2.0507e-06, 1.2756e-04],\n [ 3.4405e-04, -4.4155e-03, -9.5354e-04, ..., -1.4266e-03,\n -4.3076e-04, -1.9174e-04]], device='cuda:0')",
|
| 8 |
+
"exp_avg_sq": "tensor([[6.0221e-06, 3.2733e-05, 1.6837e-05, ..., 6.1610e-06, 3.1043e-06,\n 8.1782e-06],\n [5.5283e-06, 2.4316e-05, 1.8268e-05, ..., 8.3223e-06, 2.2850e-06,\n 9.1605e-06],\n [1.1135e-05, 6.4293e-05, 2.1208e-05, ..., 6.1934e-06, 3.2528e-06,\n 4.0694e-06],\n ...,\n [6.3581e-06, 2.4659e-05, 1.4418e-05, ..., 3.4166e-06, 1.8933e-06,\n 2.8011e-06],\n [7.2142e-06, 4.4596e-05, 1.5713e-05, ..., 7.1391e-06, 3.7922e-06,\n 6.0627e-06],\n [3.9601e-06, 4.7474e-05, 1.1590e-05, ..., 3.0785e-06, 1.8115e-06,\n 2.1169e-06]], device='cuda:0')"
|
| 9 |
+
},
|
| 10 |
+
"1": {
|
| 11 |
+
"step": "tensor(11268.)",
|
| 12 |
+
"exp_avg": "tensor([ 2.3417e-02, 2.4568e-02, -2.4948e-02, -3.8981e-03, 1.4236e-02,\n -1.3251e-03, 2.5327e-02, -3.8716e-02, 4.5635e-02, 4.1084e-02,\n 6.2685e-03, 1.1092e-02, 1.4601e-02, 4.6408e-03, 2.3364e-02,\n -1.9685e-02, -1.0079e-03, 4.3966e-02, -4.2904e-02, -2.5925e-02,\n 2.3419e-02, 8.4815e-03, -1.7112e-02, 8.1780e-03, 8.9684e-03,\n -2.0205e-03, -3.1504e-02, -4.6504e-03, 8.0569e-03, -3.9166e-03,\n 2.4622e-02, -1.9405e-02, -8.1325e-03, -1.3363e-02, -8.4387e-04,\n 1.4350e-02, 1.1040e-02, -1.6993e-02, 1.6805e-02, -6.3212e-03,\n 4.9554e-02, 1.8274e-02, 7.2142e-03, -1.3180e-02, -1.9856e-02,\n -6.7373e-03, -1.3154e-02, 1.9755e-02, 6.3563e-03, -2.5880e-02,\n -2.6455e-02, 8.4570e-03, 1.2293e-02, -6.4048e-03, -5.3977e-03,\n 1.5325e-02, 1.1222e-03, -1.8461e-02, 6.1249e-03, -1.3892e-02,\n 2.1076e-02, 2.6032e-02, 1.8742e-02, 3.5452e-02, -5.4550e-03,\n -3.3830e-02, 1.6358e-02, -7.3318e-03, 7.1393e-03, 7.1724e-03,\n 2.2005e-03, 1.7771e-03, 1.5879e-02, 7.7530e-03, 2.9091e-02,\n 1.1877e-04, -6.9003e-03, -1.3296e-02, -4.6233e-03, 1.9011e-02,\n 1.0496e-03, 1.5354e-02, 1.7245e-02, 2.3618e-02, 2.6573e-02,\n -9.7724e-03, -7.0901e-04, 1.7073e-02, 1.6063e-02, 2.7890e-04,\n 5.9407e-03, 2.1000e-02, 1.7279e-02, 1.0560e-02, 5.1744e-03,\n 4.9364e-03, 2.7571e-03, 4.0429e-03, -7.3446e-04, -1.0896e-02,\n 1.4041e-02, -1.2464e-02, 4.3924e-04, 1.1170e-02, 2.3301e-02,\n 2.9854e-03, 2.0234e-02, 1.2097e-02, -4.2271e-03, -3.6744e-02,\n -1.1477e-02, -9.5774e-02, 2.4953e-02, 1.5827e-02, 1.2379e-02,\n 2.9007e-02, 5.0457e-02, -3.5164e-02, -7.8931e-04, -4.6327e-02,\n -9.0779e-03, 2.0608e-03, 1.3435e-02, 1.2624e-02, -4.2322e-02,\n 1.1696e-02, 7.3104e-03, -7.5815e-03, 1.7814e-02, 2.0646e-03,\n -9.3445e-03, 2.6580e-02, 1.8488e-02, 3.1770e-02, -9.2026e-04,\n 1.2714e-02, -3.1506e-03, 9.2296e-03, 9.4015e-03, -3.5044e-02,\n -3.0579e-02, 8.5404e-03, 1.6727e-02, -1.5936e-02, 2.4565e-02,\n 5.9315e-03, 8.9090e-03, 5.1917e-04, 1.4244e-02, -2.7509e-02,\n -8.0235e-04, 1.0796e-02, -5.9597e-03, 3.7340e-02, -2.6956e-03,\n 1.0407e-02, 1.3774e-02, 1.8758e-02, -1.0442e-02, 7.4692e-03,\n 1.5819e-04, -1.3350e-03, -1.5845e-03, -9.8105e-03, 6.5078e-04,\n -2.6122e-02, 5.4580e-03, -1.1448e-02, 4.4871e-02, 2.2683e-02,\n -1.0429e-02, 2.2779e-02, -2.9393e-02, 3.2069e-04, -3.4460e-02,\n -8.5228e-04, 1.4744e-03, 1.0110e-02, -3.2068e-02, -3.3942e-02,\n 1.0565e-02, 1.9476e-02, 2.5715e-03, 1.1491e-02, 3.0778e-03,\n -1.2419e-02, 9.9467e-03, 2.5693e-03, 4.3628e-03, 1.1604e-02,\n -1.4779e-02, 8.8604e-03, -5.5464e-03, 1.6012e-02, 9.4487e-03,\n 2.3047e-02, -1.9399e-02, -9.4726e-03, 2.8240e-03, 1.4161e-03,\n 2.8132e-02, -9.2503e-03, 3.6927e-04, -1.8092e-02, 2.0011e-02,\n 2.4054e-02, -1.0017e-03, 4.1355e-05, 3.0499e-03, 2.6769e-02,\n 2.0352e-02, 1.8197e-02, -5.2026e-03, -1.7787e-02, 1.0246e-02,\n -2.2973e-02, -1.0475e-02, -9.3069e-03, 3.9987e-04, 1.7289e-02,\n 4.5149e-03, -4.7103e-02, 2.8241e-02, -1.5305e-02, 8.8425e-03,\n -1.0419e-02, 2.6158e-03, 3.0699e-02, 8.3658e-03, 2.5466e-03,\n -1.3101e-02, 1.4214e-02, -3.7761e-04, 2.7105e-02, -3.5094e-02,\n -2.9760e-03, -2.2416e-02, 1.6855e-02, 3.0014e-02, -8.5159e-03,\n -1.4484e-02, -1.0927e-02, 3.7028e-03, 5.7203e-03, 8.7947e-03,\n -1.1759e-02, -1.2740e-02, -2.8305e-02, 7.0330e-03, -2.7081e-02,\n -8.6393e-02, -2.5057e-02, 8.0040e-03, -1.3091e-02, 1.3749e-02,\n 1.5344e-02, 1.4968e-02, 1.3072e-03, 1.2245e-02, -4.1840e-03,\n -3.6183e-03, -9.9598e-03, -1.7933e-02, -1.7962e-02, 1.1602e-02,\n -1.1091e-02, -3.6709e-02, -2.5325e-02, -1.3609e-02, 8.8257e-03,\n 9.8987e-03, -6.5019e-03, -2.3896e-02, 1.2553e-02, 7.6524e-03,\n -2.9426e-02, -2.3711e-02, -1.3833e-02, 7.3366e-03, -2.3608e-02,\n -2.4407e-02, -2.0078e-02, -1.5352e-02, 1.1377e-02, 1.5055e-02,\n -1.9883e-02, -2.1071e-02, -9.3366e-03, -3.4556e-02, 1.7653e-02,\n -8.6176e-03, -3.0723e-02, -4.1446e-02, -4.3035e-02, 4.9087e-03,\n -1.0463e-02, -8.5456e-03, -1.8274e-02, -2.0428e-02, 3.2974e-02,\n 4.2646e-03, 3.0269e-02, -3.3250e-02, 3.3279e-02, 3.1356e-02,\n 5.0759e-03, 4.2344e-03, 2.2810e-02, -3.0778e-03, 6.0539e-03,\n 3.0022e-02, 2.8016e-02, 2.3003e-02, 5.7731e-02, 1.9192e-02,\n -5.8274e-03, -1.4113e-02, 5.4328e-03, -7.0662e-03, -5.9251e-03,\n -2.0554e-02, 1.0130e-02, 9.7318e-03, -5.5134e-03, 1.3090e-02,\n 2.2694e-02, 1.5230e-02, -9.0117e-03, -4.6628e-03, 1.1780e-02,\n -4.4631e-02, -2.6486e-02, 6.7319e-03, 9.0405e-03, 1.4729e-02,\n 1.2131e-02, -6.5690e-03, -1.0547e-03, -1.0715e-02, 6.9890e-04,\n 1.6102e-02, -8.1128e-03, -1.6691e-02, 1.3611e-02, -3.8144e-02,\n -1.4594e-02, -1.1065e-02, 8.0046e-03, -7.6482e-03, -3.8973e-02,\n -9.7575e-03, 9.7779e-03, -5.7173e-03, 9.5195e-03, -3.2671e-03,\n 7.2250e-03, 1.7442e-02, -2.9992e-02, -1.0860e-03, -1.6720e-02,\n 1.7207e-02, 5.6576e-03, -4.3277e-03, -1.3699e-02, -1.5863e-02,\n 4.2251e-03, 2.8606e-02, -1.8297e-02, 5.9028e-03, 5.2723e-03,\n 7.0744e-03, 2.6067e-02, 2.8844e-04, 2.0400e-03, -1.0353e-02,\n 5.6203e-03, 1.6595e-02, -2.3032e-02, -3.6138e-03, 2.9388e-02,\n -1.1867e-04, 1.3212e-02, 4.1837e-03, 8.2909e-03, 3.7230e-02,\n 1.7210e-02, -3.3278e-02, 2.9877e-02, 1.1760e-02, -6.2272e-03,\n -5.2627e-02, 4.2746e-03, -9.4480e-03, -5.8680e-02, -2.3676e-02,\n 1.7897e-02, -5.3967e-03, 4.4075e-02, -6.4443e-03, 1.6696e-02,\n -2.2963e-03, 6.6957e-03, 9.1096e-03, -8.4349e-02, 1.7080e-02,\n 2.2908e-02, 1.9319e-02, 8.5321e-03, 2.0333e-03, 9.6435e-03,\n 1.9194e-03, 5.9562e-03, 2.8823e-02, 1.4122e-02, -7.3116e-03,\n 1.7836e-02, 4.7562e-02, -3.7067e-02, -1.7976e-02, -4.2345e-05,\n -9.0524e-04, 1.2399e-02, -8.2412e-03, -2.7185e-02, 1.4940e-03,\n -5.2218e-03, -2.5443e-02, -8.2259e-03, 8.3559e-04, 2.3527e-02,\n 7.1882e-04, -1.6138e-02, 6.0001e-03, 2.7678e-02, -1.3036e-03,\n -1.2300e-02, -6.0173e-03, 2.2566e-03, -1.6208e-02, 5.2931e-03,\n 1.5479e-02, -5.8386e-02, 2.4551e-03, 2.8743e-02, 2.9157e-03,\n 9.7359e-03, 1.4621e-03, -1.2678e-02, -1.5211e-02, -2.3293e-02,\n -2.7286e-05, 1.8583e-02, 1.7581e-02, -3.0441e-03, -3.9659e-02,\n -1.3483e-03, 1.5743e-03, -2.2535e-02, 5.8813e-03, -7.8293e-05,\n -3.1185e-03, -1.0948e-02, 1.8998e-02, 3.3024e-03, -2.0726e-02,\n -7.8829e-03, -9.6723e-03, -1.0459e-02, 2.1232e-02, 3.1223e-02,\n 1.2489e-02, -7.0425e-03, 1.9629e-03, 1.1051e-02, -4.6408e-02,\n 2.7816e-03, 7.1215e-03, -2.5369e-02, 1.9043e-02, -1.4511e-02,\n 1.7568e-02, 8.1064e-03, -1.0444e-02, -1.0373e-02, -4.4138e-02,\n -7.6848e-03, -1.6807e-02, -5.3552e-03, 3.5353e-03, 1.7877e-02,\n -1.3080e-03, 2.1461e-02, 1.9499e-03, -4.2106e-03, -3.6306e-02,\n -1.5678e-02, 8.5211e-03, 9.1395e-03, -1.0793e-02, -8.1219e-04,\n 8.2307e-04, 3.4270e-03, -2.7472e-02, 1.2197e-02, -1.1212e-02,\n 2.2751e-02, 1.8279e-03, -1.0927e-02, -2.3278e-03, -2.2223e-02,\n -3.2218e-02, -3.3266e-04], device='cuda:0')",
|
| 13 |
+
"exp_avg_sq": "tensor([0.0048, 0.0050, 0.0053, 0.0029, 0.0065, 0.0031, 0.0034, 0.0032, 0.0059,\n 0.0040, 0.0027, 0.0039, 0.0046, 0.0032, 0.0040, 0.0036, 0.0012, 0.0021,\n 0.0027, 0.0029, 0.0033, 0.0037, 0.0042, 0.0045, 0.0052, 0.0038, 0.0041,\n 0.0035, 0.0051, 0.0016, 0.0034, 0.0047, 0.0048, 0.0031, 0.0037, 0.0032,\n 0.0024, 0.0039, 0.0033, 0.0031, 0.0060, 0.0034, 0.0036, 0.0047, 0.0037,\n 0.0051, 0.0079, 0.0045, 0.0034, 0.0048, 0.0029, 0.0038, 0.0044, 0.0021,\n 0.0065, 0.0045, 0.0025, 0.0049, 0.0031, 0.0034, 0.0034, 0.0067, 0.0053,\n 0.0027, 0.0024, 0.0025, 0.0033, 0.0049, 0.0086, 0.0060, 0.0051, 0.0032,\n 0.0068, 0.0024, 0.0040, 0.0070, 0.0040, 0.0039, 0.0040, 0.0083, 0.0062,\n 0.0031, 0.0055, 0.0046, 0.0032, 0.0035, 0.0013, 0.0080, 0.0046, 0.0042,\n 0.0025, 0.0034, 0.0026, 0.0043, 0.0059, 0.0038, 0.0051, 0.0050, 0.0040,\n 0.0046, 0.0030, 0.0032, 0.0047, 0.0018, 0.0031, 0.0031, 0.0063, 0.0028,\n 0.0046, 0.0035, 0.0017, 0.0058, 0.0036, 0.0023, 0.0025, 0.0025, 0.0042,\n 0.0056, 0.0024, 0.0038, 0.0027, 0.0026, 0.0038, 0.0049, 0.0054, 0.0044,\n 0.0049, 0.0034, 0.0050, 0.0029, 0.0045, 0.0034, 0.0035, 0.0032, 0.0045,\n 0.0026, 0.0029, 0.0018, 0.0047, 0.0074, 0.0051, 0.0027, 0.0032, 0.0051,\n 0.0050, 0.0031, 0.0039, 0.0042, 0.0046, 0.0049, 0.0031, 0.0042, 0.0028,\n 0.0026, 0.0019, 0.0024, 0.0045, 0.0042, 0.0025, 0.0038, 0.0031, 0.0035,\n 0.0034, 0.0035, 0.0046, 0.0044, 0.0018, 0.0028, 0.0043, 0.0037, 0.0044,\n 0.0033, 0.0022, 0.0035, 0.0046, 0.0026, 0.0035, 0.0036, 0.0021, 0.0037,\n 0.0051, 0.0080, 0.0020, 0.0043, 0.0065, 0.0042, 0.0016, 0.0022, 0.0039,\n 0.0034, 0.0022, 0.0024, 0.0061, 0.0039, 0.0042, 0.0037, 0.0028, 0.0031,\n 0.0034, 0.0092, 0.0022, 0.0038, 0.0025, 0.0051, 0.0033, 0.0031, 0.0022,\n 0.0026, 0.0033, 0.0049, 0.0023, 0.0047, 0.0051, 0.0077, 0.0041, 0.0024,\n 0.0038, 0.0024, 0.0034, 0.0047, 0.0023, 0.0043, 0.0060, 0.0052, 0.0018,\n 0.0036, 0.0039, 0.0046, 0.0038, 0.0062, 0.0039, 0.0030, 0.0047, 0.0025,\n 0.0037, 0.0045, 0.0044, 0.0041, 0.0036, 0.0083, 0.0056, 0.0031, 0.0037,\n 0.0030, 0.0027, 0.0050, 0.0032, 0.0039, 0.0024, 0.0063, 0.0028, 0.0032,\n 0.0017, 0.0087, 0.0043, 0.0043, 0.0077, 0.0030, 0.0041, 0.0039, 0.0041,\n 0.0070, 0.0029, 0.0028, 0.0018, 0.0042, 0.0029, 0.0050, 0.0019, 0.0031,\n 0.0024, 0.0024, 0.0049, 0.0037, 0.0031, 0.0065, 0.0016, 0.0029, 0.0079,\n 0.0037, 0.0033, 0.0046, 0.0038, 0.0027, 0.0063, 0.0047, 0.0036, 0.0042,\n 0.0057, 0.0030, 0.0035, 0.0057, 0.0039, 0.0042, 0.0061, 0.0049, 0.0051,\n 0.0044, 0.0063, 0.0061, 0.0045, 0.0028, 0.0035, 0.0026, 0.0030, 0.0033,\n 0.0042, 0.0028, 0.0055, 0.0039, 0.0034, 0.0045, 0.0027, 0.0038, 0.0028,\n 0.0050, 0.0021, 0.0052, 0.0036, 0.0028, 0.0048, 0.0073, 0.0035, 0.0027,\n 0.0042, 0.0029, 0.0034, 0.0036, 0.0023, 0.0039, 0.0057, 0.0035, 0.0034,\n 0.0029, 0.0027, 0.0029, 0.0036, 0.0040, 0.0051, 0.0049, 0.0035, 0.0035,\n 0.0031, 0.0034, 0.0045, 0.0047, 0.0028, 0.0032, 0.0031, 0.0036, 0.0042,\n 0.0067, 0.0031, 0.0046, 0.0023, 0.0039, 0.0051, 0.0041, 0.0025, 0.0038,\n 0.0038, 0.0042, 0.0039, 0.0022, 0.0031, 0.0031, 0.0027, 0.0073, 0.0027,\n 0.0026, 0.0028, 0.0079, 0.0049, 0.0043, 0.0037, 0.0026, 0.0039, 0.0030,\n 0.0042, 0.0031, 0.0024, 0.0041, 0.0034, 0.0043, 0.0052, 0.0031, 0.0025,\n 0.0041, 0.0051, 0.0017, 0.0042, 0.0073, 0.0033, 0.0038, 0.0027, 0.0053,\n 0.0024, 0.0031, 0.0074, 0.0020, 0.0021, 0.0033, 0.0037, 0.0050, 0.0047,\n 0.0067, 0.0035, 0.0030, 0.0046, 0.0024, 0.0049, 0.0049, 0.0055, 0.0028,\n 0.0061, 0.0029, 0.0037, 0.0028, 0.0054, 0.0028, 0.0042, 0.0035, 0.0046,\n 0.0056, 0.0062, 0.0036, 0.0072, 0.0056, 0.0028, 0.0024, 0.0043, 0.0027,\n 0.0030, 0.0105, 0.0024, 0.0044, 0.0044, 0.0050, 0.0052, 0.0048, 0.0052,\n 0.0068, 0.0037, 0.0033, 0.0026, 0.0032, 0.0044, 0.0027, 0.0030, 0.0030,\n 0.0017, 0.0065, 0.0037, 0.0044, 0.0032, 0.0057, 0.0039, 0.0027, 0.0041,\n 0.0043, 0.0045, 0.0055, 0.0032, 0.0052, 0.0013, 0.0041, 0.0028, 0.0038,\n 0.0037, 0.0052, 0.0046, 0.0031, 0.0055, 0.0035, 0.0035, 0.0026, 0.0032,\n 0.0040, 0.0035, 0.0030, 0.0031, 0.0037, 0.0028, 0.0034, 0.0032, 0.0039,\n 0.0027, 0.0030, 0.0073, 0.0046, 0.0006, 0.0043, 0.0042, 0.0032, 0.0050,\n 0.0045, 0.0045, 0.0046, 0.0050, 0.0041, 0.0027, 0.0035, 0.0045, 0.0030,\n 0.0046, 0.0054, 0.0039, 0.0029, 0.0041, 0.0038, 0.0054, 0.0028],\n device='cuda:0')"
|
| 14 |
+
},
|
| 15 |
+
"2": {
|
| 16 |
+
"step": "tensor(11268.)",
|
| 17 |
+
"exp_avg": "tensor([ 4.6031e-03, 3.8414e-03, -3.2292e-03, -3.1939e-03, 9.1023e-04,\n -2.3513e-04, 4.5959e-03, -7.6311e-03, 4.4207e-03, 8.1163e-03,\n 3.5375e-04, 3.0676e-03, 1.8497e-03, 1.0620e-03, 2.1356e-03,\n -3.0428e-03, -1.8022e-03, 1.3218e-02, -9.5368e-03, -6.0689e-03,\n 6.2057e-03, 3.6467e-03, -3.6197e-03, 1.4236e-04, 2.5245e-03,\n 7.4866e-04, -4.5217e-03, -3.5070e-03, 5.5272e-04, -1.6563e-03,\n 3.6899e-03, -1.9559e-03, -1.5528e-03, -2.0160e-03, 6.1850e-05,\n 1.8337e-03, 3.0109e-03, -4.2826e-03, 1.9704e-03, -2.6499e-03,\n 7.2248e-03, 4.4385e-03, 4.9552e-04, -2.9510e-03, -5.2398e-03,\n -7.9122e-04, -1.9608e-03, 3.1324e-03, 1.4843e-03, -4.8122e-03,\n -9.7687e-03, 3.7534e-03, 1.6650e-03, -2.3824e-03, -1.1616e-03,\n 3.7344e-03, -1.9324e-03, -3.6173e-03, 1.0841e-03, -1.8348e-03,\n 5.7453e-03, 1.4949e-03, 5.0766e-03, 1.0625e-02, 3.0771e-04,\n -9.9718e-03, 4.4277e-03, -2.2248e-03, -4.5703e-05, 1.0593e-03,\n -1.2005e-03, 1.4690e-03, 4.8255e-04, 1.4839e-05, 5.1279e-03,\n 3.4938e-04, -1.6224e-03, -2.3893e-03, -1.3471e-03, 3.0477e-03,\n 1.8052e-05, 1.6305e-03, 1.9233e-03, 3.5647e-03, 5.4279e-03,\n -3.2336e-03, 7.8011e-06, 2.0587e-03, 1.1144e-03, 9.1978e-05,\n 6.7845e-04, 8.6417e-03, 6.0411e-03, 1.5114e-03, 5.0670e-05,\n 1.5084e-03, 4.2364e-04, 1.1850e-04, -2.2306e-04, -9.8223e-04,\n 2.2291e-03, -2.3884e-03, 4.8528e-04, 2.6830e-03, 6.6701e-03,\n 7.5323e-04, 1.6600e-03, 4.9449e-03, -1.0605e-03, -6.5662e-03,\n -5.4704e-03, -8.5780e-03, 4.0161e-03, 3.7944e-03, 4.3299e-03,\n 5.4635e-03, 5.8981e-03, -3.3703e-03, 8.9837e-04, -9.8344e-03,\n -4.0125e-03, 1.0754e-03, 1.4701e-03, 2.0397e-03, -6.1496e-03,\n 2.1729e-03, 8.1073e-04, -2.0290e-03, 8.7814e-04, 5.9113e-05,\n -1.9073e-03, 5.2275e-03, 5.3627e-03, 8.2674e-03, -8.2283e-04,\n 4.1513e-03, -8.4229e-04, 2.3005e-03, 7.0055e-04, -4.3464e-03,\n -9.7800e-03, 1.9055e-03, 2.4787e-03, -1.6393e-03, 2.3803e-03,\n 1.7909e-03, 2.9819e-03, -9.9314e-04, 1.6808e-03, -2.5562e-03,\n -1.0654e-03, 2.1034e-03, -2.9216e-03, 9.0053e-03, -8.4874e-04,\n 4.9724e-03, 2.5405e-03, 3.7508e-03, -4.6365e-03, 8.0582e-05,\n 1.1193e-03, -1.0205e-03, -4.9311e-04, -7.5387e-04, -7.4048e-04,\n -1.9900e-03, 1.6978e-04, -3.7926e-03, 7.7339e-03, 3.8856e-03,\n -2.5349e-03, 4.8172e-03, -1.2799e-02, 1.9656e-04, -4.9526e-03,\n 1.5026e-04, -9.5008e-04, 3.9295e-03, -6.6921e-03, -5.6970e-03,\n 2.5982e-04, 3.2726e-03, 3.0716e-03, 2.0146e-03, -5.1987e-05,\n -2.5175e-03, 4.3365e-03, -8.7169e-04, 1.5504e-04, 2.1605e-03,\n -7.1467e-03, 3.3468e-03, 4.1751e-04, 1.4666e-03, 1.4645e-03,\n 2.7804e-03, -4.2210e-03, -1.2208e-03, 1.0280e-03, 4.9141e-05,\n 1.1818e-02, -3.1794e-03, -1.8580e-04, -1.0133e-03, 5.1498e-03,\n 7.2491e-03, 9.0675e-04, -2.2548e-04, -4.4867e-04, 4.7646e-03,\n 4.1554e-03, 2.9549e-03, 9.9204e-04, -1.5838e-03, 2.7689e-03,\n -5.4287e-03, -1.7290e-03, -7.5518e-04, -1.6319e-04, 3.2908e-03,\n -1.0611e-04, -4.8991e-03, 2.4602e-03, -1.6210e-03, 2.3874e-03,\n -3.6197e-03, -1.3555e-03, 3.1904e-03, 1.9771e-03, -7.4417e-04,\n -1.1668e-03, 3.8476e-03, 3.4952e-04, 7.5131e-03, -9.5561e-03,\n -8.8550e-04, -2.6342e-03, 3.2067e-03, 7.3222e-03, 1.8044e-04,\n -1.9948e-03, -2.6955e-03, 1.3186e-03, -5.6526e-05, 3.1656e-03,\n -1.0785e-03, -1.3548e-03, -2.5453e-03, 7.8193e-04, -2.9689e-03,\n -1.9654e-02, -5.2267e-03, 6.0136e-03, -1.4276e-03, 1.5119e-03,\n 2.5791e-03, 3.8273e-03, -3.5307e-05, 1.8323e-03, -2.3449e-03,\n -1.7462e-03, -1.2740e-03, -3.3914e-03, -8.2116e-03, 5.4500e-03,\n -3.0439e-03, -6.7236e-03, -3.3678e-03, -6.2045e-03, 2.1275e-03,\n 4.6509e-03, -1.9296e-03, -2.3216e-03, 3.0205e-03, 2.2999e-03,\n -8.8400e-03, -1.3444e-02, -3.1557e-03, 1.0561e-03, -3.9836e-03,\n -5.4254e-03, -5.4340e-03, -1.6242e-03, 2.4975e-03, 2.7010e-03,\n -1.7493e-03, -1.3494e-02, -6.8247e-04, -4.1344e-03, 4.7789e-03,\n -1.3290e-03, -4.2390e-03, -8.3718e-03, -7.3298e-03, 2.0847e-03,\n -2.0121e-03, -1.9439e-03, -3.8365e-03, -2.0957e-03, 5.8303e-03,\n 1.1020e-03, 7.0487e-03, -6.2595e-03, 1.0694e-02, 5.7238e-03,\n 3.9116e-04, 1.4367e-03, 5.8885e-03, -6.0230e-04, 1.5902e-03,\n 4.9000e-03, 3.8069e-03, 6.0272e-03, 1.0566e-02, 7.0155e-03,\n 4.6006e-04, -2.7682e-03, -5.5391e-05, -6.6375e-04, -2.5775e-03,\n -2.7087e-03, 2.5814e-03, 1.6651e-03, -2.6612e-04, 2.6778e-03,\n 3.4593e-03, 2.8917e-03, -3.0320e-03, -1.7876e-03, 2.1569e-03,\n -6.2723e-03, -6.1374e-03, 3.9420e-04, 4.2423e-03, 8.1284e-03,\n 3.9472e-03, -1.3562e-03, 2.5492e-04, -1.5315e-03, 9.1608e-06,\n 4.4920e-03, -1.6295e-03, -3.2572e-03, 2.3427e-03, -5.6907e-03,\n -2.5304e-04, -2.0085e-03, 1.8788e-03, -1.3715e-03, -3.4304e-03,\n -2.3353e-03, 4.2155e-04, -1.3139e-03, 1.3409e-03, -8.9320e-04,\n 1.2753e-03, 5.4212e-04, -4.7313e-03, 8.5150e-04, -3.3219e-03,\n 2.9567e-03, 1.5426e-04, -1.4802e-03, -3.8006e-03, -5.4012e-04,\n 1.6278e-03, 7.9429e-03, -1.1580e-03, 7.8475e-04, 9.1835e-04,\n 3.3377e-03, 4.7380e-03, 7.4097e-04, 2.1094e-04, -2.3892e-03,\n 2.7122e-03, 2.8022e-03, -3.1145e-03, -6.5356e-04, 6.7523e-03,\n -1.3943e-04, 2.1075e-03, 3.1959e-04, 2.1102e-03, 5.0989e-03,\n 3.0644e-03, -1.0549e-02, 4.1461e-03, 1.8647e-03, -4.5991e-03,\n -8.2869e-03, -2.9202e-04, -1.6917e-03, -8.6949e-03, -8.3796e-03,\n 4.0026e-03, -4.0050e-04, 1.3245e-02, -9.2023e-04, 6.3701e-03,\n -2.1077e-03, 1.2066e-03, 1.7811e-03, -7.9896e-03, 3.1914e-03,\n 2.4350e-03, 3.7398e-03, 1.5302e-03, 3.8779e-04, 1.7003e-03,\n 1.0034e-03, 9.1192e-05, 5.4516e-03, 6.5580e-03, -9.6933e-04,\n 6.8581e-03, 9.7070e-03, -6.5832e-03, -2.9274e-03, 5.9390e-04,\n -5.4972e-04, 1.7227e-03, -9.3981e-04, -4.2824e-03, 2.1858e-04,\n -1.1587e-03, -3.1856e-03, -3.3232e-03, 1.4906e-03, 7.5799e-03,\n 1.4455e-04, -3.3660e-03, 1.5075e-03, 3.6470e-03, 1.8677e-04,\n -1.2670e-04, -7.3287e-04, -1.2467e-03, -3.1353e-03, 9.2901e-04,\n 3.1089e-03, -1.1255e-02, 9.3294e-04, 9.0421e-03, 8.4580e-04,\n 2.0753e-03, 4.0902e-04, -4.7472e-03, -2.5565e-03, -7.2146e-03,\n 1.0558e-03, 2.4445e-03, 2.6439e-03, -2.7508e-04, -6.1490e-03,\n -1.8190e-03, 7.7839e-04, -1.1356e-02, -2.3217e-04, 2.5387e-04,\n -3.7586e-03, -6.1103e-04, 4.6926e-03, 2.1626e-04, -2.0647e-02,\n -2.8967e-03, -2.1483e-03, -1.8751e-03, 5.7021e-03, 4.4498e-03,\n 2.0648e-03, -8.7055e-04, -5.4207e-05, 2.6909e-03, -1.1282e-02,\n 2.7386e-03, 1.8365e-03, -7.0980e-03, 2.4907e-03, -1.9638e-03,\n 3.0068e-03, 1.0957e-03, -1.3711e-03, -1.8079e-03, -6.4161e-03,\n -2.1170e-03, -2.8213e-03, -1.6690e-03, 4.9022e-04, 2.9877e-03,\n 9.0554e-02, 3.2485e-03, -3.7267e-04, -1.2323e-03, -5.9728e-03,\n -3.5801e-03, 1.4613e-03, 1.4700e-03, -2.9388e-03, 7.3458e-04,\n 2.0715e-04, 1.2102e-03, -3.3482e-03, 1.0972e-03, -1.9859e-03,\n 4.5918e-03, 3.0779e-04, -2.8896e-03, 2.3806e-03, -4.2459e-03,\n -6.8443e-03, 3.3495e-04], device='cuda:0')",
|
| 18 |
+
"exp_avg_sq": "tensor([2.0510e-04, 8.3806e-05, 1.9209e-04, 2.9625e-04, 7.9022e-05, 1.4478e-04,\n 1.2275e-04, 1.6409e-04, 6.9286e-05, 1.7391e-04, 1.5150e-04, 2.1322e-04,\n 7.2641e-05, 1.4084e-04, 5.8408e-05, 8.2451e-05, 2.5407e-03, 2.0018e-04,\n 1.5281e-04, 2.1373e-04, 3.2352e-04, 3.0012e-04, 2.0695e-04, 1.1661e-04,\n 1.5065e-04, 3.5781e-04, 2.3552e-04, 1.6728e-04, 5.6722e-05, 4.1652e-04,\n 2.1092e-04, 9.4834e-05, 2.9447e-04, 7.2024e-05, 4.1247e-04, 3.3946e-04,\n 2.5006e-04, 2.9448e-04, 6.9191e-05, 1.2797e-04, 1.2808e-04, 1.3473e-04,\n 5.9471e-05, 6.2346e-05, 1.2532e-04, 8.0942e-05, 2.8777e-04, 2.1116e-04,\n 3.9965e-04, 1.3694e-04, 2.4569e-04, 3.1462e-04, 9.2454e-05, 2.6199e-04,\n 1.4459e-04, 2.1700e-04, 4.3294e-04, 1.7464e-04, 1.7391e-04, 1.9326e-04,\n 2.7840e-04, 5.8868e-05, 1.6706e-04, 3.2890e-04, 6.0247e-04, 1.7157e-04,\n 2.1882e-04, 1.6768e-04, 6.2217e-05, 2.0336e-04, 5.7408e-05, 9.9185e-05,\n 7.3481e-05, 1.0987e-04, 1.4915e-04, 1.0311e-04, 1.5816e-04, 1.3629e-04,\n 2.4542e-04, 1.7058e-04, 1.0789e-04, 7.2277e-05, 1.2309e-04, 1.5573e-04,\n 1.4836e-04, 2.5393e-04, 2.5395e-03, 1.0730e-04, 7.5708e-05, 1.9017e-04,\n 1.9706e-04, 2.8357e-04, 2.3382e-04, 9.3134e-05, 1.0376e-04, 1.0126e-04,\n 1.3854e-04, 1.1510e-04, 1.2616e-04, 7.8228e-05, 7.8773e-05, 7.7489e-05,\n 1.0683e-04, 2.8510e-04, 3.0522e-04, 1.9644e-04, 6.6259e-05, 2.8032e-04,\n 1.0643e-04, 7.1027e-05, 4.6332e-04, 6.9168e-05, 1.4722e-04, 7.3366e-05,\n 4.1206e-04, 9.3134e-05, 7.9513e-05, 1.1088e-04, 2.1105e-04, 1.3084e-04,\n 1.0466e-04, 2.0747e-04, 1.0347e-04, 1.1184e-04, 1.2189e-04, 1.0310e-04,\n 7.8701e-05, 1.9283e-04, 8.8784e-05, 3.3490e-04, 8.1582e-05, 1.4150e-04,\n 3.5524e-04, 2.8823e-04, 1.4981e-04, 2.2581e-04, 8.9535e-05, 2.4395e-04,\n 1.1347e-04, 1.0471e-04, 2.3610e-04, 2.0896e-04, 1.3018e-04, 1.4678e-04,\n 1.0351e-04, 2.0261e-04, 1.4695e-04, 1.8243e-04, 1.2525e-04, 1.2664e-04,\n 6.6111e-05, 2.0982e-04, 1.4609e-04, 2.6815e-04, 1.1765e-04, 2.4027e-04,\n 1.4334e-04, 1.3940e-04, 6.2795e-04, 5.1902e-05, 2.8186e-04, 1.3725e-04,\n 1.7962e-04, 1.2724e-04, 1.6694e-04, 9.9382e-05, 4.1418e-04, 2.0692e-04,\n 7.4849e-05, 1.1559e-04, 3.6700e-04, 1.3524e-04, 4.5050e-04, 1.1478e-04,\n 1.2134e-04, 1.5553e-04, 1.3849e-04, 1.9560e-04, 1.1046e-04, 7.5733e-05,\n 1.2253e-04, 1.3896e-04, 6.0924e-04, 1.6641e-04, 1.2331e-04, 1.7360e-04,\n 7.1555e-04, 4.6004e-04, 8.7272e-05, 2.4146e-04, 4.2352e-04, 4.3940e-04,\n 1.8470e-04, 1.2587e-04, 1.3193e-04, 1.3209e-04, 6.7299e-05, 1.3073e-04,\n 6.1991e-05, 1.1477e-04, 3.3397e-04, 2.2649e-04, 7.7903e-05, 8.7117e-05,\n 1.2864e-04, 2.8865e-04, 4.2018e-04, 2.0918e-04, 1.7651e-04, 1.1490e-04,\n 1.7025e-04, 1.3341e-04, 1.4298e-04, 1.9854e-04, 3.3073e-04, 1.5521e-04,\n 2.1915e-04, 8.1017e-05, 1.3977e-04, 1.4285e-04, 1.6545e-04, 8.3147e-05,\n 6.7920e-05, 1.2364e-04, 2.7555e-04, 1.2925e-04, 2.2466e-04, 7.6891e-05,\n 4.1015e-04, 2.0095e-04, 9.1202e-05, 1.4946e-04, 7.6665e-05, 1.4785e-04,\n 1.4158e-04, 1.2890e-04, 1.7002e-04, 1.6829e-04, 1.8605e-04, 1.3331e-04,\n 7.7996e-05, 1.6107e-04, 1.4757e-04, 1.3541e-04, 3.0117e-04, 8.8065e-05,\n 1.2691e-04, 6.0219e-05, 2.0511e-04, 1.6842e-04, 1.7660e-04, 1.8221e-04,\n 3.9772e-04, 1.2730e-04, 1.2165e-04, 8.0453e-05, 3.3735e-04, 1.2393e-04,\n 1.8029e-04, 1.6901e-04, 1.1408e-04, 1.0025e-04, 2.4972e-04, 3.6130e-04,\n 6.6500e-04, 2.5331e-04, 1.4062e-04, 1.2015e-04, 1.9486e-04, 1.0878e-04,\n 2.6774e-04, 1.6132e-04, 7.7448e-05, 6.4137e-05, 8.6694e-05, 3.5789e-04,\n 7.8061e-04, 2.1915e-04, 1.4181e-04, 9.3310e-05, 4.1607e-05, 1.6389e-04,\n 1.1918e-04, 2.0278e-04, 1.7800e-04, 9.1940e-05, 8.4070e-04, 8.9354e-05,\n 1.1730e-04, 2.5887e-04, 2.3837e-04, 1.2827e-04, 1.7326e-04, 1.1625e-04,\n 1.2378e-04, 1.3956e-04, 5.5600e-05, 1.7812e-04, 7.0813e-05, 1.1460e-04,\n 3.1849e-04, 1.2253e-04, 1.2244e-04, 3.2214e-04, 1.1476e-04, 1.6164e-04,\n 1.5737e-04, 1.3198e-04, 1.4015e-04, 1.1040e-04, 1.1715e-04, 7.9030e-05,\n 1.3946e-04, 9.7418e-05, 4.0768e-04, 2.0917e-04, 8.5965e-05, 2.3098e-04,\n 8.6741e-05, 3.2856e-04, 6.8589e-05, 2.9064e-04, 1.0368e-04, 1.3455e-04,\n 9.7613e-05, 8.8056e-05, 1.9199e-04, 1.2762e-04, 6.9496e-05, 1.4687e-04,\n 1.3379e-04, 2.2946e-04, 1.0574e-04, 1.4873e-04, 5.4321e-04, 3.0484e-04,\n 1.4350e-04, 3.6584e-04, 1.3862e-04, 9.3844e-05, 1.9260e-04, 2.5086e-04,\n 1.1899e-04, 1.3677e-04, 1.1516e-04, 9.8172e-05, 1.3084e-04, 8.7194e-05,\n 3.2213e-04, 6.1555e-05, 2.0787e-04, 9.5455e-05, 6.0082e-05, 1.0063e-04,\n 1.6786e-04, 1.0279e-04, 4.3376e-05, 8.4842e-05, 4.9939e-04, 9.6926e-05,\n 1.0319e-04, 5.5986e-05, 1.1522e-04, 2.0123e-04, 7.5428e-05, 1.1699e-04,\n 1.2990e-04, 1.1132e-04, 1.5856e-04, 2.0114e-04, 2.9910e-04, 1.1756e-04,\n 1.0341e-04, 1.4699e-04, 1.0161e-04, 2.0837e-04, 7.8338e-05, 1.4184e-04,\n 1.0956e-04, 2.1608e-04, 6.2273e-05, 1.1399e-04, 2.0266e-04, 1.7670e-04,\n 8.7753e-05, 9.2387e-05, 2.2438e-04, 6.5876e-05, 5.5013e-05, 8.0055e-04,\n 1.2898e-04, 8.3065e-05, 2.4892e-04, 1.3466e-04, 2.7972e-04, 2.2396e-04,\n 2.8546e-04, 2.7858e-04, 1.6484e-04, 2.7479e-04, 2.5179e-04, 1.0049e-04,\n 7.1356e-05, 6.6756e-05, 8.2939e-05, 8.7599e-05, 1.3917e-04, 1.5429e-04,\n 1.0062e-04, 4.6287e-04, 1.2996e-04, 6.1615e-05, 1.4490e-04, 4.6947e-04,\n 7.1587e-05, 2.7800e-04, 1.0798e-04, 1.3965e-04, 9.2970e-05, 3.1080e-04,\n 2.2700e-04, 1.2227e-04, 1.0361e-04, 1.4243e-04, 6.6576e-05, 1.1744e-04,\n 1.1754e-04, 1.0450e-04, 2.4434e-04, 2.3092e-04, 6.3726e-05, 4.4086e-04,\n 2.2675e-04, 9.1918e-05, 1.7193e-04, 1.5522e-04, 6.2440e-05, 1.5073e-04,\n 1.4978e-04, 8.4504e-05, 1.3663e-04, 1.6371e-04, 1.2061e-04, 3.3514e-04,\n 1.7109e-04, 7.1919e-05, 7.8839e-05, 2.6688e-04, 2.4804e-04, 2.6480e-04,\n 5.4605e-04, 1.2750e-04, 6.7659e-05, 2.6809e-04, 2.4718e-04, 1.0915e-04,\n 5.9791e-05, 6.9328e-04, 1.4067e-04, 1.7131e-04, 1.9421e-04, 9.9171e-05,\n 2.4729e-04, 6.6727e-05, 1.0412e-03, 2.2198e-04, 1.1562e-04, 6.9808e-05,\n 1.7009e-04, 8.0866e-05, 1.1611e-04, 2.8457e-04, 9.2861e-05, 1.6671e-04,\n 2.8571e-04, 2.2840e-04, 1.4715e-04, 2.3671e-04, 3.8296e-05, 1.2920e-04,\n 9.0639e-05, 4.5274e-05, 1.9487e-04, 1.6205e-04, 1.1172e-04, 2.9944e-04,\n 8.6509e-05, 1.7850e-04, 1.1396e-04, 1.2684e-04, 2.2122e-01, 8.6306e-05,\n 1.9528e-04, 1.2665e-04, 1.2955e-04, 2.6791e-04, 2.5287e-04, 1.3545e-04,\n 1.1063e-04, 2.0016e-04, 1.0104e-04, 1.2813e-04, 9.0627e-05, 1.2793e-04,\n 1.6235e-04, 1.8382e-04, 2.3865e-04, 1.9682e-04, 1.9711e-04, 2.1052e-04,\n 2.1417e-04, 1.4999e-04], device='cuda:0')"
|
| 19 |
+
},
|
| 20 |
+
"3": {
|
| 21 |
+
"step": "tensor(11268.)",
|
| 22 |
+
"exp_avg": "tensor([ 5.4963e-03, 4.7430e-03, -4.0248e-03, -1.4083e-03, 1.5550e-03,\n 3.3828e-05, 5.4506e-03, -7.7725e-03, 7.1548e-03, 9.5693e-03,\n 1.0954e-03, 2.6229e-03, 2.5646e-03, 6.4475e-04, 3.6525e-03,\n -3.7067e-03, -5.6845e-04, 1.0483e-02, -9.3560e-03, -4.7041e-03,\n 5.2376e-03, 2.0278e-03, -3.5902e-03, 1.2566e-03, 1.4355e-03,\n -4.7914e-04, -4.4044e-03, -8.3288e-04, 9.5268e-04, -7.7418e-04,\n 4.4206e-03, -3.2964e-03, -1.8292e-03, -2.2963e-03, 8.2590e-05,\n 2.2826e-03, 2.7676e-03, -3.0790e-03, 2.6787e-03, -1.8307e-03,\n 7.4212e-03, 3.5147e-03, 1.2242e-03, -1.8914e-03, -4.3575e-03,\n -6.7957e-04, -2.3989e-03, 4.1398e-03, 1.8731e-03, -4.4378e-03,\n -7.2047e-03, 2.2175e-03, 2.2836e-03, -2.1728e-03, -9.1427e-04,\n 3.5847e-03, -3.8941e-04, -3.9869e-03, 1.0360e-03, -1.8686e-03,\n 5.4004e-03, 2.8413e-03, 4.4163e-03, 8.9700e-03, -1.5906e-03,\n -7.0883e-03, 4.5340e-03, -1.2726e-03, 2.2709e-03, 1.4186e-03,\n 3.6788e-04, 6.8046e-04, 2.3485e-03, 1.1449e-03, 5.8374e-03,\n -2.8092e-05, -1.1297e-03, -3.5184e-03, -1.4009e-03, 3.5660e-03,\n 4.8421e-05, 2.7241e-03, 2.6430e-03, 4.8653e-03, 5.4275e-03,\n -1.8734e-03, -1.3438e-05, 2.4738e-03, 2.2906e-03, -3.6561e-04,\n 9.0090e-04, 6.0102e-03, 4.5734e-03, 2.0248e-03, 4.3238e-04,\n 1.0754e-03, 9.4883e-04, 5.3191e-04, -8.8331e-05, -1.3887e-03,\n 2.3399e-03, -2.5943e-03, -5.7812e-04, 3.0454e-03, 5.3244e-03,\n 1.3874e-03, 3.2678e-03, 3.1789e-03, -7.2287e-04, -6.4219e-03,\n -3.4301e-03, -1.4170e-02, 5.2553e-03, 3.6569e-03, 3.1406e-03,\n 6.1391e-03, 7.9604e-03, -6.0706e-03, -9.2640e-05, -8.0841e-03,\n -1.9609e-03, 1.2715e-04, 2.2329e-03, 2.2241e-03, -8.4700e-03,\n 1.8917e-03, 1.0053e-03, -1.2007e-03, 2.5422e-03, 5.5125e-04,\n -1.7878e-03, 5.0625e-03, 5.0555e-03, 6.8077e-03, -7.2427e-04,\n 3.0490e-03, -4.5750e-04, 2.5926e-03, 1.2036e-03, -5.4331e-03,\n -8.2960e-03, 2.0474e-03, 3.7691e-03, -2.9137e-03, 4.3434e-03,\n 5.7788e-04, 2.0554e-03, 2.5797e-04, 2.9001e-03, -4.8123e-03,\n 7.4253e-05, 1.8946e-03, -1.6701e-03, 8.3658e-03, -1.1522e-04,\n 3.5299e-03, 3.3998e-03, 3.9038e-03, -3.2444e-03, 1.2970e-03,\n 3.9325e-04, -1.2674e-03, -6.4865e-04, -1.9806e-03, -5.8817e-04,\n -3.7530e-03, 8.3653e-04, -2.9218e-03, 6.4118e-03, 4.9168e-03,\n -3.0033e-03, 4.9653e-03, -8.5044e-03, -2.4650e-04, -6.1642e-03,\n -2.6806e-04, 2.9728e-04, 2.4437e-03, -6.4031e-03, -5.6365e-03,\n 1.8616e-03, 3.7438e-03, 1.5763e-03, 1.9528e-03, 7.6535e-05,\n -2.6236e-03, 3.3895e-03, 5.1176e-04, 8.9902e-04, 2.9665e-03,\n -4.5563e-03, 2.8176e-03, -1.0759e-03, 2.3099e-03, 2.2629e-03,\n 4.1539e-03, -3.6871e-03, -1.0980e-03, 8.7271e-04, -1.8348e-04,\n 8.6846e-03, -1.9454e-03, -9.2581e-05, -2.7996e-03, 4.7113e-03,\n 5.1785e-03, -5.5440e-04, 1.3341e-04, 9.2039e-05, 5.0359e-03,\n 4.1494e-03, 3.1558e-03, 9.9468e-05, -2.4139e-03, 2.4921e-03,\n -5.0111e-03, -1.9165e-03, -1.3927e-03, 2.2155e-04, 2.9905e-03,\n 1.6916e-04, -8.7488e-03, 4.3917e-03, -2.5904e-03, 2.0570e-03,\n -9.6949e-04, 7.6255e-04, 4.7390e-03, 1.7449e-03, 7.4699e-04,\n -2.0459e-03, 3.3566e-03, 2.1540e-04, 5.6627e-03, -8.0548e-03,\n -2.0072e-04, -3.7390e-03, 4.0138e-03, 6.7495e-03, -1.3847e-03,\n -1.7496e-03, -2.3447e-03, 1.1748e-03, 5.6168e-04, 2.8790e-03,\n -2.1559e-03, -2.2870e-03, -3.5533e-03, 1.4087e-03, -4.2750e-03,\n -1.9081e-02, -4.9395e-03, 2.3475e-03, -2.0001e-03, 2.0495e-03,\n 2.6339e-03, 3.5503e-03, -2.7912e-04, 3.1838e-03, -1.3184e-03,\n -8.5373e-04, -1.6712e-03, -3.8906e-03, -4.4631e-03, 4.0177e-03,\n -2.6813e-03, -6.2399e-03, -4.1464e-03, -3.7556e-03, 1.5627e-03,\n 3.2060e-03, -1.1087e-03, -3.9357e-03, 2.8580e-03, 1.6715e-03,\n -6.9963e-03, -7.5373e-03, -2.9721e-03, 8.9668e-04, -4.5823e-03,\n -3.5330e-03, -4.6048e-03, -2.5657e-03, 2.3766e-03, 3.0524e-03,\n -3.2261e-03, -7.1544e-03, -1.8399e-03, -5.5553e-03, 4.1268e-03,\n -1.2135e-03, -5.1752e-03, -9.5901e-03, -8.6232e-03, 1.0752e-04,\n -1.6234e-03, -1.7640e-03, -3.9187e-03, -3.2315e-03, 6.0223e-03,\n 1.0689e-03, 6.5009e-03, -6.6701e-03, 8.9099e-03, 6.3557e-03,\n 7.2312e-04, 1.1623e-03, 5.0886e-03, -1.1016e-03, 1.2345e-03,\n 5.8901e-03, 4.6557e-03, 5.2362e-03, 1.0575e-02, 4.4074e-03,\n -9.2620e-04, -3.0635e-03, 1.3678e-03, -1.0071e-03, -1.4765e-03,\n -3.3488e-03, 2.6837e-03, 1.9822e-03, -1.2483e-03, 2.0844e-03,\n 4.6727e-03, 2.5145e-03, -2.5944e-03, -7.6292e-04, 2.1418e-03,\n -8.0467e-03, -5.7154e-03, 9.0032e-04, 1.7313e-03, 4.6672e-03,\n 3.4067e-03, -1.8186e-03, -8.9134e-04, -2.1030e-03, 4.6482e-04,\n 3.8873e-03, -1.6425e-03, -3.5835e-03, 2.5693e-03, -5.9273e-03,\n -2.1550e-03, -1.8777e-03, 1.8784e-03, -1.7701e-03, -5.7671e-03,\n -2.4007e-03, 1.7828e-03, -1.2788e-03, 2.1335e-03, -7.0581e-04,\n 1.3535e-03, 2.2416e-03, -5.6262e-03, 3.9882e-04, -3.0642e-03,\n 3.1964e-03, 6.8428e-04, -1.1841e-03, -3.2968e-03, -3.5521e-03,\n 5.8385e-04, 6.2244e-03, -3.0622e-03, 1.1526e-03, 3.8646e-05,\n 1.7998e-03, 3.8293e-03, 5.8043e-04, 6.8281e-04, -2.4062e-03,\n 2.1559e-03, 2.9930e-03, -5.1939e-03, -7.4762e-04, 6.1697e-03,\n 4.4007e-05, 2.7683e-03, 2.0966e-03, 1.8860e-03, 6.0972e-03,\n 3.0357e-03, -8.5546e-03, 5.1571e-03, 1.3850e-03, -3.1298e-03,\n -9.4185e-03, 5.4805e-04, -1.7740e-03, -1.0145e-02, -6.8361e-03,\n 4.1886e-03, -1.9090e-04, 1.0550e-02, -1.9185e-03, 4.7273e-03,\n -1.1728e-03, 1.3070e-03, 2.0972e-03, -1.4157e-02, 2.7003e-03,\n 2.3464e-03, 4.4675e-03, 1.5610e-03, -3.3328e-04, 8.6911e-04,\n 7.7954e-04, 8.4528e-04, 5.4616e-03, 4.6245e-03, -1.8661e-03,\n 4.4304e-03, 7.8746e-03, -7.2112e-03, -3.6785e-03, 4.9863e-04,\n 2.2939e-04, 2.8821e-03, -1.3124e-03, -4.8173e-03, -1.2533e-05,\n -5.7352e-04, -3.5639e-03, -1.9690e-03, 1.4324e-04, 6.3701e-03,\n 2.3421e-04, -3.4341e-03, 1.4396e-03, 5.0155e-03, -5.7275e-04,\n -1.5474e-03, -9.3677e-04, -7.4700e-04, -3.7526e-03, 1.1583e-03,\n 2.8651e-03, -1.1101e-02, 1.4088e-03, 6.9871e-03, 6.6163e-04,\n 1.8055e-03, 5.0432e-04, -2.8536e-03, -3.1850e-03, -6.1666e-03,\n 1.2365e-03, 3.3870e-03, 3.2168e-03, -6.4184e-04, -7.9035e-03,\n -6.5664e-04, 4.0548e-04, -7.4354e-03, 1.1238e-03, 3.8202e-05,\n -8.3827e-04, -1.6698e-03, 4.2179e-03, 8.6767e-04, -9.0839e-03,\n -1.3061e-03, -1.9247e-03, -2.0405e-03, 5.5290e-03, 5.2795e-03,\n 1.8482e-03, -1.6244e-03, 2.4344e-04, 2.2126e-03, -1.0107e-02,\n 1.1335e-03, 1.4815e-03, -4.4475e-03, 2.9604e-03, -2.3368e-03,\n 3.1147e-03, 1.5343e-03, -2.1815e-03, -2.1478e-03, -7.9553e-03,\n -2.8086e-03, -3.3085e-03, -1.1421e-03, 7.6244e-04, 3.7148e-03,\n 4.7414e-03, 3.5980e-03, 5.7304e-04, -4.4697e-04, -7.5879e-03,\n -4.0099e-03, 1.7911e-03, 1.5537e-03, -2.1261e-03, 6.9491e-04,\n -1.4616e-04, 6.5810e-04, -4.1163e-03, 2.2687e-03, -1.9653e-03,\n 5.1586e-03, -4.0530e-05, -2.5792e-03, 1.9678e-04, -3.6682e-03,\n -6.5094e-03, 8.3818e-04], device='cuda:0')",
|
| 23 |
+
"exp_avg_sq": "tensor([2.1573e-04, 1.7040e-04, 1.7038e-04, 1.8062e-04, 1.4541e-04, 1.4436e-04,\n 1.3958e-04, 1.4443e-04, 1.4733e-04, 2.0846e-04, 1.2593e-04, 2.0160e-04,\n 1.1078e-04, 1.3399e-04, 1.0320e-04, 1.1724e-04, 3.0041e-04, 1.2290e-04,\n 1.3017e-04, 1.4862e-04, 1.8797e-04, 2.2170e-04, 1.8244e-04, 1.3957e-04,\n 2.2272e-04, 2.4817e-04, 1.9321e-04, 1.4417e-04, 1.0489e-04, 1.5551e-04,\n 1.6759e-04, 1.6332e-04, 2.7213e-04, 1.0508e-04, 2.2510e-04, 1.9691e-04,\n 1.5635e-04, 2.0118e-04, 9.8800e-05, 1.2709e-04, 1.7152e-04, 1.3141e-04,\n 1.0887e-04, 1.1577e-04, 1.4897e-04, 1.3871e-04, 3.4403e-04, 2.1911e-04,\n 2.1445e-04, 1.6440e-04, 1.4479e-04, 2.1446e-04, 1.3322e-04, 1.4894e-04,\n 2.3685e-04, 2.1748e-04, 2.0487e-04, 1.8971e-04, 1.5455e-04, 1.5976e-04,\n 2.2389e-04, 1.2586e-04, 2.2205e-04, 1.8355e-04, 2.0974e-04, 1.3205e-04,\n 1.7986e-04, 1.7977e-04, 1.7327e-04, 2.3673e-04, 1.3874e-04, 1.0916e-04,\n 1.7896e-04, 1.1262e-04, 1.6446e-04, 2.1029e-04, 1.6416e-04, 1.5940e-04,\n 2.0283e-04, 2.6305e-04, 2.0815e-04, 1.0759e-04, 1.9866e-04, 1.8840e-04,\n 1.4441e-04, 1.9160e-04, 2.6832e-04, 2.1348e-04, 1.0715e-04, 2.0264e-04,\n 1.6970e-04, 2.0024e-04, 1.6187e-04, 1.3800e-04, 1.7192e-04, 1.4157e-04,\n 2.0431e-04, 1.7878e-04, 1.5533e-04, 1.2926e-04, 9.8874e-05, 1.1021e-04,\n 1.5996e-04, 1.4446e-04, 2.0938e-04, 1.7107e-04, 1.5716e-04, 1.6736e-04,\n 1.6487e-04, 1.1305e-04, 1.6413e-04, 1.3798e-04, 1.6575e-04, 9.5472e-05,\n 2.0494e-04, 1.0427e-04, 1.0303e-04, 2.0808e-04, 1.4484e-04, 1.3261e-04,\n 9.8404e-05, 1.2040e-04, 1.3890e-04, 1.6777e-04, 1.7964e-04, 1.3609e-04,\n 1.3363e-04, 1.5826e-04, 1.3496e-04, 1.7776e-04, 1.2649e-04, 1.3881e-04,\n 2.1103e-04, 1.6071e-04, 1.6235e-04, 1.5001e-04, 1.1593e-04, 1.2461e-04,\n 1.6712e-04, 1.6665e-04, 2.4513e-04, 1.4922e-04, 1.1304e-04, 1.9934e-04,\n 1.4718e-04, 1.6173e-04, 1.6534e-04, 1.8727e-04, 1.7129e-04, 1.8882e-04,\n 9.4472e-05, 1.8466e-04, 1.2529e-04, 1.8097e-04, 9.7822e-05, 1.6977e-04,\n 1.7882e-04, 1.6294e-04, 2.4500e-04, 9.3801e-05, 1.7841e-04, 1.3550e-04,\n 1.8013e-04, 1.2506e-04, 1.9515e-04, 1.3114e-04, 1.6211e-04, 1.4212e-04,\n 1.2176e-04, 1.6249e-04, 2.8794e-04, 1.3669e-04, 1.8918e-04, 1.2198e-04,\n 1.5207e-04, 1.1973e-04, 1.4848e-04, 1.6911e-04, 9.1584e-05, 1.1319e-04,\n 1.6474e-04, 2.6720e-04, 1.9567e-04, 1.7426e-04, 2.0934e-04, 1.5328e-04,\n 1.8462e-04, 1.9925e-04, 1.2162e-04, 1.7499e-04, 1.7929e-04, 1.8170e-04,\n 2.5418e-04, 1.4203e-04, 1.4312e-04, 1.4255e-04, 9.5125e-05, 1.5373e-04,\n 1.0727e-04, 2.6027e-04, 1.7458e-04, 2.1639e-04, 9.1851e-05, 1.4744e-04,\n 1.4546e-04, 1.7229e-04, 1.8147e-04, 1.2985e-04, 1.7165e-04, 1.6421e-04,\n 1.1665e-04, 1.9152e-04, 2.0037e-04, 2.7069e-04, 2.4448e-04, 1.2516e-04,\n 1.8587e-04, 8.5424e-05, 1.3037e-04, 1.5884e-04, 1.4222e-04, 1.4875e-04,\n 1.4851e-04, 1.8273e-04, 1.3884e-04, 1.5180e-04, 1.5798e-04, 1.1732e-04,\n 2.5763e-04, 2.5122e-04, 1.4329e-04, 1.3350e-04, 1.3681e-04, 1.2451e-04,\n 1.5612e-04, 1.9751e-04, 1.8420e-04, 2.1707e-04, 1.7809e-04, 2.2998e-04,\n 1.4099e-04, 1.4122e-04, 1.7062e-04, 1.3771e-04, 1.7579e-04, 1.5282e-04,\n 1.2823e-04, 9.8942e-05, 1.4787e-04, 2.3973e-04, 1.4117e-04, 1.5558e-04,\n 1.4325e-04, 2.8170e-04, 1.5787e-04, 1.3163e-04, 3.8614e-04, 1.5408e-04,\n 1.8014e-04, 1.8856e-04, 1.4984e-04, 1.9578e-04, 1.5336e-04, 1.8215e-04,\n 1.9563e-04, 2.1130e-04, 1.1604e-04, 1.6738e-04, 1.1553e-04, 1.2225e-04,\n 1.5040e-04, 1.1279e-04, 1.3362e-04, 1.0925e-04, 1.2758e-04, 3.3495e-04,\n 1.7492e-04, 1.7874e-04, 2.5960e-04, 1.5510e-04, 8.3055e-05, 2.0366e-04,\n 1.5411e-04, 1.4091e-04, 2.4547e-04, 1.3766e-04, 2.8228e-04, 1.2292e-04,\n 1.6547e-04, 1.8751e-04, 2.1033e-04, 1.9490e-04, 2.0179e-04, 1.5419e-04,\n 2.1953e-04, 1.7697e-04, 1.2468e-04, 2.1175e-04, 1.5018e-04, 2.2734e-04,\n 2.5052e-04, 1.2400e-04, 1.4513e-04, 1.9984e-04, 1.2274e-04, 1.4489e-04,\n 1.6986e-04, 1.2330e-04, 1.8691e-04, 1.4393e-04, 1.3128e-04, 1.2291e-04,\n 1.1759e-04, 1.1409e-04, 1.9033e-04, 2.0421e-04, 8.9738e-05, 2.2919e-04,\n 1.3080e-04, 1.8510e-04, 1.2140e-04, 2.9983e-04, 1.5255e-04, 1.2502e-04,\n 1.2516e-04, 1.2181e-04, 1.5399e-04, 1.6615e-04, 7.4472e-05, 1.5488e-04,\n 1.8880e-04, 1.8616e-04, 1.2604e-04, 1.2078e-04, 2.1757e-04, 1.8014e-04,\n 1.3905e-04, 2.4975e-04, 1.8555e-04, 1.5663e-04, 1.6906e-04, 2.0859e-04,\n 1.2963e-04, 1.2522e-04, 1.4723e-04, 1.6353e-04, 1.1451e-04, 1.2347e-04,\n 1.9992e-04, 7.8648e-05, 1.9374e-04, 1.7710e-04, 8.7534e-05, 1.5056e-04,\n 1.3313e-04, 1.3718e-04, 1.0416e-04, 1.2937e-04, 2.3780e-04, 1.1484e-04,\n 1.2601e-04, 9.2594e-05, 1.3477e-04, 1.3041e-04, 1.0112e-04, 1.3617e-04,\n 1.1182e-04, 2.3110e-04, 1.2497e-04, 1.3350e-04, 1.7417e-04, 2.1045e-04,\n 1.6508e-04, 1.7232e-04, 1.2326e-04, 1.4890e-04, 1.2446e-04, 1.3582e-04,\n 1.4434e-04, 1.4024e-04, 8.4816e-05, 1.5148e-04, 1.8303e-04, 1.9110e-04,\n 1.4682e-04, 9.2828e-05, 1.4868e-04, 1.1589e-04, 1.3229e-04, 2.2120e-04,\n 1.5804e-04, 1.6090e-04, 1.8498e-04, 1.2864e-04, 1.7418e-04, 2.0941e-04,\n 1.6102e-04, 1.7146e-04, 2.5199e-04, 1.4477e-04, 1.2744e-04, 1.2812e-04,\n 1.2062e-04, 1.3431e-04, 1.3019e-04, 1.5634e-04, 1.4164e-04, 1.2436e-04,\n 1.5376e-04, 2.0460e-04, 1.5971e-04, 1.1762e-04, 1.9444e-04, 2.4276e-04,\n 1.4207e-04, 1.6886e-04, 1.2454e-04, 1.1528e-04, 1.7686e-04, 1.7112e-04,\n 1.8805e-04, 1.2942e-04, 1.6003e-04, 1.8195e-04, 1.5850e-04, 1.3352e-04,\n 2.0141e-04, 1.4256e-04, 1.5630e-04, 1.4623e-04, 1.2325e-04, 2.0054e-04,\n 1.7317e-04, 2.7049e-04, 1.2283e-04, 1.9596e-04, 1.0796e-04, 2.1750e-04,\n 1.9793e-04, 1.3829e-04, 1.7652e-04, 2.5102e-04, 1.5220e-04, 1.8967e-04,\n 1.3502e-04, 1.0457e-04, 1.3219e-04, 1.6260e-04, 1.7112e-04, 1.8851e-04,\n 1.9126e-04, 1.6657e-04, 1.0153e-04, 2.2633e-04, 1.9465e-04, 1.8915e-04,\n 1.1831e-04, 2.4155e-04, 1.5764e-04, 2.0595e-04, 1.9779e-04, 1.7722e-04,\n 1.7701e-04, 1.3326e-04, 1.8280e-04, 1.8936e-04, 1.2065e-04, 1.1169e-04,\n 1.4183e-04, 1.5032e-04, 1.4535e-04, 1.8723e-04, 1.6531e-04, 1.7936e-04,\n 1.7036e-04, 1.5361e-04, 1.4928e-04, 1.9462e-04, 7.8423e-05, 1.2686e-04,\n 1.1321e-04, 9.4586e-05, 1.4341e-04, 1.3840e-04, 1.2228e-04, 2.0378e-04,\n 9.4703e-05, 1.4659e-04, 1.9398e-04, 1.7071e-04, 5.8368e-04, 1.4062e-04,\n 1.9799e-04, 1.2984e-04, 1.9854e-04, 2.2106e-04, 2.3662e-04, 1.8237e-04,\n 1.5583e-04, 1.8674e-04, 1.2502e-04, 1.3130e-04, 1.4528e-04, 1.2179e-04,\n 1.9350e-04, 2.2034e-04, 1.8312e-04, 1.4296e-04, 1.6866e-04, 1.6247e-04,\n 2.2814e-04, 1.3198e-04], device='cuda:0')"
|
| 24 |
+
},
|
| 25 |
+
"4": {
|
| 26 |
+
"step": "tensor(11268.)",
|
| 27 |
+
"exp_avg": "tensor([[-6.6093e-05, -9.0981e-05, 2.8383e-04, ..., 1.0390e-04,\n -4.2067e-05, 2.7262e-04],\n [-1.7330e-04, -1.8083e-04, -8.3192e-04, ..., 8.0123e-05,\n 1.4059e-04, 1.4396e-04],\n [ 2.8891e-04, -2.9809e-05, -4.9655e-04, ..., 9.5496e-05,\n -2.8312e-06, -5.1555e-04],\n ...,\n [-1.5571e-04, 3.6740e-04, 4.8044e-04, ..., -2.4438e-04,\n -1.2069e-04, -1.7865e-04],\n [ 1.4714e-04, 1.8102e-05, -1.0890e-04, ..., 6.8962e-05,\n -5.0974e-05, -3.0989e-04],\n [ 3.2825e-04, 1.7737e-04, 3.8763e-06, ..., 2.1211e-04,\n 9.8837e-05, 1.9245e-04]], device='cuda:0')",
|
| 28 |
+
"exp_avg_sq": "tensor([[2.9485e-07, 4.8727e-07, 1.0663e-07, ..., 2.0237e-07, 2.3695e-07,\n 3.4306e-07],\n [6.0073e-07, 1.1080e-06, 3.9923e-07, ..., 7.0824e-07, 6.4851e-07,\n 8.9577e-07],\n [7.0705e-07, 3.8627e-07, 3.3574e-07, ..., 7.2503e-07, 3.8702e-07,\n 7.2226e-07],\n ...,\n [4.1669e-07, 1.0834e-06, 4.4769e-07, ..., 5.7935e-07, 8.3852e-07,\n 7.1874e-07],\n [4.0662e-07, 8.5070e-07, 2.4924e-07, ..., 8.1077e-07, 4.6570e-07,\n 5.6462e-07],\n [3.6075e-07, 5.4725e-07, 3.3877e-07, ..., 1.1538e-06, 4.3584e-07,\n 7.2429e-07]], device='cuda:0')"
|
| 29 |
+
},
|
| 30 |
+
"5": {
|
| 31 |
+
"step": "tensor(11268.)",
|
| 32 |
+
"exp_avg": "tensor([[-9.5391e-05, -6.8253e-05, -2.2773e-06, ..., 7.4789e-05,\n -1.2514e-04, 2.6546e-05],\n [-3.3656e-05, -8.8968e-05, -7.8372e-04, ..., 7.7203e-05,\n 2.7927e-06, 1.8347e-04],\n [ 2.2377e-04, -2.0569e-05, 1.1707e-04, ..., 2.6661e-05,\n 2.8227e-05, -2.3756e-04],\n ...,\n [ 6.4193e-06, -1.1272e-06, -2.3610e-04, ..., -1.9651e-04,\n -1.2410e-04, -2.3277e-05],\n [ 9.0196e-05, -9.5642e-05, 4.4119e-04, ..., -3.6351e-06,\n -9.4623e-05, 1.4011e-05],\n [-1.4057e-04, -5.8231e-05, 2.8134e-04, ..., -2.2363e-04,\n -1.8599e-05, 2.1270e-04]], device='cuda:0')",
|
| 33 |
+
"exp_avg_sq": "tensor([[1.0448e-07, 8.3303e-08, 8.7546e-08, ..., 1.1312e-07, 8.1904e-08,\n 1.2972e-07],\n [1.9757e-07, 4.2987e-07, 2.3422e-07, ..., 1.6800e-07, 2.3281e-07,\n 2.6276e-07],\n [2.5370e-07, 1.5869e-07, 1.8138e-07, ..., 3.5818e-07, 2.9619e-07,\n 2.3982e-07],\n ...,\n [2.1249e-07, 5.5684e-07, 8.6314e-08, ..., 4.4893e-07, 2.8963e-07,\n 2.5907e-07],\n [2.2515e-07, 3.0465e-07, 9.0081e-08, ..., 3.8294e-07, 1.9086e-07,\n 2.3638e-07],\n [2.1392e-07, 1.5179e-07, 3.9882e-07, ..., 2.0801e-07, 1.5775e-07,\n 2.4518e-07]], device='cuda:0')"
|
| 34 |
+
},
|
| 35 |
+
"6": {
|
| 36 |
+
"step": "tensor(11268.)",
|
| 37 |
+
"exp_avg": "tensor([-0.0015, 0.0015], device='cuda:0')",
|
| 38 |
+
"exp_avg_sq": "tensor([7.9509e-06, 7.9509e-06], device='cuda:0')"
|
| 39 |
+
}
|
| 40 |
+
},
|
| 41 |
+
"param_groups": [
|
| 42 |
+
{
|
| 43 |
+
"lr": 2.5447270110570814e-05,
|
| 44 |
+
"name": "shared",
|
| 45 |
+
"betas": [
|
| 46 |
+
0.9,
|
| 47 |
+
0.999
|
| 48 |
+
],
|
| 49 |
+
"eps": 1e-08,
|
| 50 |
+
"weight_decay": 1e-05,
|
| 51 |
+
"amsgrad": false,
|
| 52 |
+
"maximize": false,
|
| 53 |
+
"foreach": null,
|
| 54 |
+
"capturable": false,
|
| 55 |
+
"differentiable": false,
|
| 56 |
+
"fused": null,
|
| 57 |
+
"decoupled_weight_decay": true,
|
| 58 |
+
"initial_lr": 0.001,
|
| 59 |
+
"params": [
|
| 60 |
+
0,
|
| 61 |
+
1,
|
| 62 |
+
2,
|
| 63 |
+
3
|
| 64 |
+
]
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"lr": 2.5447270110570814e-05,
|
| 68 |
+
"name": "scale_256",
|
| 69 |
+
"betas": [
|
| 70 |
+
0.9,
|
| 71 |
+
0.999
|
| 72 |
+
],
|
| 73 |
+
"eps": 1e-08,
|
| 74 |
+
"weight_decay": 1e-05,
|
| 75 |
+
"amsgrad": false,
|
| 76 |
+
"maximize": false,
|
| 77 |
+
"foreach": null,
|
| 78 |
+
"capturable": false,
|
| 79 |
+
"differentiable": false,
|
| 80 |
+
"fused": null,
|
| 81 |
+
"decoupled_weight_decay": true,
|
| 82 |
+
"initial_lr": 0.001,
|
| 83 |
+
"params": [
|
| 84 |
+
4
|
| 85 |
+
]
|
| 86 |
+
},
|
| 87 |
+
{
|
| 88 |
+
"lr": 2.5447270110570814e-05,
|
| 89 |
+
"name": "scale_512",
|
| 90 |
+
"betas": [
|
| 91 |
+
0.9,
|
| 92 |
+
0.999
|
| 93 |
+
],
|
| 94 |
+
"eps": 1e-08,
|
| 95 |
+
"weight_decay": 1e-05,
|
| 96 |
+
"amsgrad": false,
|
| 97 |
+
"maximize": false,
|
| 98 |
+
"foreach": null,
|
| 99 |
+
"capturable": false,
|
| 100 |
+
"differentiable": false,
|
| 101 |
+
"fused": null,
|
| 102 |
+
"decoupled_weight_decay": true,
|
| 103 |
+
"initial_lr": 0.001,
|
| 104 |
+
"params": [
|
| 105 |
+
5
|
| 106 |
+
]
|
| 107 |
+
},
|
| 108 |
+
{
|
| 109 |
+
"lr": 1.3211399184359193e-05,
|
| 110 |
+
"name": "fusion",
|
| 111 |
+
"betas": [
|
| 112 |
+
0.9,
|
| 113 |
+
0.999
|
| 114 |
+
],
|
| 115 |
+
"eps": 1e-08,
|
| 116 |
+
"weight_decay": 1e-05,
|
| 117 |
+
"amsgrad": false,
|
| 118 |
+
"maximize": false,
|
| 119 |
+
"foreach": null,
|
| 120 |
+
"capturable": false,
|
| 121 |
+
"differentiable": false,
|
| 122 |
+
"fused": null,
|
| 123 |
+
"decoupled_weight_decay": true,
|
| 124 |
+
"initial_lr": 0.0005,
|
| 125 |
+
"params": [
|
| 126 |
+
6
|
| 127 |
+
]
|
| 128 |
+
}
|
| 129 |
+
]
|
| 130 |
+
},
|
| 131 |
+
"scheduler_state_dict": {
|
| 132 |
+
"T_0": 10,
|
| 133 |
+
"T_i": 10,
|
| 134 |
+
"T_mult": 2,
|
| 135 |
+
"eta_min": 1e-06,
|
| 136 |
+
"T_cur": 9,
|
| 137 |
+
"base_lrs": [
|
| 138 |
+
0.001,
|
| 139 |
+
0.001,
|
| 140 |
+
0.001,
|
| 141 |
+
0.0005
|
| 142 |
+
],
|
| 143 |
+
"last_epoch": 9,
|
| 144 |
+
"_step_count": 0,
|
| 145 |
+
"_is_initial": false,
|
| 146 |
+
"_get_lr_called_within_step": false,
|
| 147 |
+
"_last_lr": [
|
| 148 |
+
2.5447270110570814e-05,
|
| 149 |
+
2.5447270110570814e-05,
|
| 150 |
+
2.5447270110570814e-05,
|
| 151 |
+
1.3211399184359193e-05
|
| 152 |
+
]
|
| 153 |
+
},
|
| 154 |
+
"metrics": {
|
| 155 |
+
"best_val_acc": 71.914,
|
| 156 |
+
"best_epoch": 8,
|
| 157 |
+
"scale_accuracies": {
|
| 158 |
+
"256": 71.568,
|
| 159 |
+
"512": 71.708
|
| 160 |
+
},
|
| 161 |
+
"training_history": {
|
| 162 |
+
"epochs": [
|
| 163 |
+
1,
|
| 164 |
+
2,
|
| 165 |
+
3,
|
| 166 |
+
4,
|
| 167 |
+
5,
|
| 168 |
+
6,
|
| 169 |
+
7,
|
| 170 |
+
8,
|
| 171 |
+
9
|
| 172 |
+
],
|
| 173 |
+
"train_loss": [
|
| 174 |
+
5.603825211905824,
|
| 175 |
+
4.145695094292918,
|
| 176 |
+
3.767625377201044,
|
| 177 |
+
3.572572173021091,
|
| 178 |
+
3.45402966711087,
|
| 179 |
+
3.3684399169854844,
|
| 180 |
+
3.3046513376906277,
|
| 181 |
+
3.255301171407913,
|
| 182 |
+
3.219453362611155
|
| 183 |
+
],
|
| 184 |
+
"train_acc": [
|
| 185 |
+
63.39829233815732,
|
| 186 |
+
69.48282308239285,
|
| 187 |
+
70.34266414917025,
|
| 188 |
+
70.91120829681064,
|
| 189 |
+
71.35642738222262,
|
| 190 |
+
71.84090754757186,
|
| 191 |
+
72.29198067074785,
|
| 192 |
+
72.7457856782137,
|
| 193 |
+
73.02162793765372
|
| 194 |
+
],
|
| 195 |
+
"val_acc": [
|
| 196 |
+
68.056,
|
| 197 |
+
69.624,
|
| 198 |
+
70.142,
|
| 199 |
+
70.622,
|
| 200 |
+
70.966,
|
| 201 |
+
71.462,
|
| 202 |
+
71.766,
|
| 203 |
+
71.776,
|
| 204 |
+
71.914
|
| 205 |
+
],
|
| 206 |
+
"scale_accs": {
|
| 207 |
+
"256": [
|
| 208 |
+
66.924,
|
| 209 |
+
68.828,
|
| 210 |
+
69.54,
|
| 211 |
+
69.996,
|
| 212 |
+
70.468,
|
| 213 |
+
71.106,
|
| 214 |
+
71.436,
|
| 215 |
+
71.486,
|
| 216 |
+
71.568
|
| 217 |
+
],
|
| 218 |
+
"512": [
|
| 219 |
+
67.882,
|
| 220 |
+
69.572,
|
| 221 |
+
70.028,
|
| 222 |
+
70.482,
|
| 223 |
+
70.854,
|
| 224 |
+
71.28,
|
| 225 |
+
71.578,
|
| 226 |
+
71.694,
|
| 227 |
+
71.708
|
| 228 |
+
]
|
| 229 |
+
},
|
| 230 |
+
"lr": [
|
| 231 |
+
0.0009755527298894294,
|
| 232 |
+
0.0009046039886902864,
|
| 233 |
+
0.0007940987335200904,
|
| 234 |
+
0.0006548539886902864,
|
| 235 |
+
0.0005005000000000001,
|
| 236 |
+
0.0003461460113097139,
|
| 237 |
+
0.00020690126647990973,
|
| 238 |
+
9.639601130971382e-05,
|
| 239 |
+
2.5447270110570814e-05
|
| 240 |
+
]
|
| 241 |
+
}
|
| 242 |
+
},
|
| 243 |
+
"train_config": {
|
| 244 |
+
"name": "david_training",
|
| 245 |
+
"run_id": "20251012_135249",
|
| 246 |
+
"dataset_name": "AbstractPhil/imagenet-clip-features-orderly",
|
| 247 |
+
"model_variant": "clip_vit_laion_b32",
|
| 248 |
+
"num_classes": 1000,
|
| 249 |
+
"preset": "small_fast",
|
| 250 |
+
"custom_config_path": null,
|
| 251 |
+
"num_classes_override": null,
|
| 252 |
+
"use_belly_override": null,
|
| 253 |
+
"belly_expand_override": null,
|
| 254 |
+
"progressive_training_override": false,
|
| 255 |
+
"scale_warmup_epochs_override": null,
|
| 256 |
+
"num_epochs": 10,
|
| 257 |
+
"batch_size": 1024,
|
| 258 |
+
"learning_rate": 0.001,
|
| 259 |
+
"weight_decay": 1e-05,
|
| 260 |
+
"warmup_epochs": 3,
|
| 261 |
+
"use_rose_loss": true,
|
| 262 |
+
"rose_initial_weight": 0.1,
|
| 263 |
+
"rose_max_weight": 0.5,
|
| 264 |
+
"rose_weight_schedule": "adaptive",
|
| 265 |
+
"use_cayley_loss": false,
|
| 266 |
+
"cayley_weight": 0.001,
|
| 267 |
+
"scale_loss_balance": null,
|
| 268 |
+
"use_mixed_precision": false,
|
| 269 |
+
"gradient_clip": 10.0,
|
| 270 |
+
"scheduler_type": "cosine_restarts",
|
| 271 |
+
"min_lr": 1e-06,
|
| 272 |
+
"freeze_strategy": "never",
|
| 273 |
+
"freeze_threshold": 90.0,
|
| 274 |
+
"unfreeze_on_plateau": true,
|
| 275 |
+
"patience": 10,
|
| 276 |
+
"track_gradients": true,
|
| 277 |
+
"gradient_scale_threshold": 1e-05,
|
| 278 |
+
"gradient_scale_multiplier": 10.0,
|
| 279 |
+
"log_interval": 50,
|
| 280 |
+
"val_interval": 1,
|
| 281 |
+
"save_interval": 5,
|
| 282 |
+
"log_fusion_weights": true,
|
| 283 |
+
"log_loss_components": true,
|
| 284 |
+
"save_format": "safetensors",
|
| 285 |
+
"hf_repo": "AbstractPhil/gated-david",
|
| 286 |
+
"upload_to_hub": true,
|
| 287 |
+
"base_dir": "./david_training",
|
| 288 |
+
"num_workers": 10,
|
| 289 |
+
"pin_memory": true,
|
| 290 |
+
"prefetch_factor": 4,
|
| 291 |
+
"persistent_workers": true
|
| 292 |
+
}
|
| 293 |
+
}
|
weights/David-fully_shared-weighted_sum/20251012_135249/training_history.json
CHANGED
|
@@ -7,7 +7,8 @@
|
|
| 7 |
5,
|
| 8 |
6,
|
| 9 |
7,
|
| 10 |
-
8
|
|
|
|
| 11 |
],
|
| 12 |
"train_loss": [
|
| 13 |
5.603825211905824,
|
|
@@ -17,7 +18,8 @@
|
|
| 17 |
3.45402966711087,
|
| 18 |
3.3684399169854844,
|
| 19 |
3.3046513376906277,
|
| 20 |
-
3.255301171407913
|
|
|
|
| 21 |
],
|
| 22 |
"train_acc": [
|
| 23 |
63.39829233815732,
|
|
@@ -27,7 +29,8 @@
|
|
| 27 |
71.35642738222262,
|
| 28 |
71.84090754757186,
|
| 29 |
72.29198067074785,
|
| 30 |
-
72.7457856782137
|
|
|
|
| 31 |
],
|
| 32 |
"val_acc": [
|
| 33 |
68.056,
|
|
@@ -37,7 +40,8 @@
|
|
| 37 |
70.966,
|
| 38 |
71.462,
|
| 39 |
71.766,
|
| 40 |
-
71.776
|
|
|
|
| 41 |
],
|
| 42 |
"scale_accs": {
|
| 43 |
"256": [
|
|
@@ -48,7 +52,8 @@
|
|
| 48 |
70.468,
|
| 49 |
71.106,
|
| 50 |
71.436,
|
| 51 |
-
71.486
|
|
|
|
| 52 |
],
|
| 53 |
"512": [
|
| 54 |
67.882,
|
|
@@ -58,7 +63,8 @@
|
|
| 58 |
70.854,
|
| 59 |
71.28,
|
| 60 |
71.578,
|
| 61 |
-
71.694
|
|
|
|
| 62 |
]
|
| 63 |
},
|
| 64 |
"lr": [
|
|
@@ -69,6 +75,7 @@
|
|
| 69 |
0.0005005000000000001,
|
| 70 |
0.0003461460113097139,
|
| 71 |
0.00020690126647990973,
|
| 72 |
-
9.639601130971382e-05
|
|
|
|
| 73 |
]
|
| 74 |
}
|
|
|
|
| 7 |
5,
|
| 8 |
6,
|
| 9 |
7,
|
| 10 |
+
8,
|
| 11 |
+
9
|
| 12 |
],
|
| 13 |
"train_loss": [
|
| 14 |
5.603825211905824,
|
|
|
|
| 18 |
3.45402966711087,
|
| 19 |
3.3684399169854844,
|
| 20 |
3.3046513376906277,
|
| 21 |
+
3.255301171407913,
|
| 22 |
+
3.219453362611155
|
| 23 |
],
|
| 24 |
"train_acc": [
|
| 25 |
63.39829233815732,
|
|
|
|
| 29 |
71.35642738222262,
|
| 30 |
71.84090754757186,
|
| 31 |
72.29198067074785,
|
| 32 |
+
72.7457856782137,
|
| 33 |
+
73.02162793765372
|
| 34 |
],
|
| 35 |
"val_acc": [
|
| 36 |
68.056,
|
|
|
|
| 40 |
70.966,
|
| 41 |
71.462,
|
| 42 |
71.766,
|
| 43 |
+
71.776,
|
| 44 |
+
71.914
|
| 45 |
],
|
| 46 |
"scale_accs": {
|
| 47 |
"256": [
|
|
|
|
| 52 |
70.468,
|
| 53 |
71.106,
|
| 54 |
71.436,
|
| 55 |
+
71.486,
|
| 56 |
+
71.568
|
| 57 |
],
|
| 58 |
"512": [
|
| 59 |
67.882,
|
|
|
|
| 63 |
70.854,
|
| 64 |
71.28,
|
| 65 |
71.578,
|
| 66 |
+
71.694,
|
| 67 |
+
71.708
|
| 68 |
]
|
| 69 |
},
|
| 70 |
"lr": [
|
|
|
|
| 75 |
0.0005005000000000001,
|
| 76 |
0.0003461460113097139,
|
| 77 |
0.00020690126647990973,
|
| 78 |
+
9.639601130971382e-05,
|
| 79 |
+
2.5447270110570814e-05
|
| 80 |
]
|
| 81 |
}
|