AbstractPhil commited on
Commit
b8fd2be
·
verified ·
1 Parent(s): 5e70454

Ablation M-LOW-M3_sgd_high_momentum

Browse files
M/LOW/M3_sgd_high_momentum/seed0/final_report.json ADDED
@@ -0,0 +1,205 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "group": "M",
4
+ "variant": "M3_sgd_high_momentum",
5
+ "band": "LOW",
6
+ "seed": 0,
7
+ "phase": 1,
8
+ "overrides": {
9
+ "optimizer": "sgd",
10
+ "lr": 0.003,
11
+ "momentum": 0.99
12
+ },
13
+ "description": "M-LOW-M3_sgd_high_momentum"
14
+ },
15
+ "run_config": {
16
+ "matrix_v": 64,
17
+ "D": 16,
18
+ "patch_size": 16,
19
+ "hidden": 64,
20
+ "depth": 1,
21
+ "n_cross_layers": 1,
22
+ "n_heads": 4,
23
+ "max_alpha": 0.2,
24
+ "alpha_init": -2.0,
25
+ "img_size": 64,
26
+ "batch_size": 128,
27
+ "lr": 0.003,
28
+ "epochs": 1,
29
+ "weight_decay": 0.0,
30
+ "use_cv_ema": true,
31
+ "cv_ema_alpha": 0.01,
32
+ "cv_alignment_epochs": 0,
33
+ "cv_measure_every": 50,
34
+ "cv_sigma_scale": 0.3,
35
+ "boost": 0.5,
36
+ "cross_attn_clip": 0.5,
37
+ "allowed_types": [
38
+ 0,
39
+ 1,
40
+ 2,
41
+ 3,
42
+ 4,
43
+ 5,
44
+ 6,
45
+ 7,
46
+ 8,
47
+ 9,
48
+ 10,
49
+ 11,
50
+ 12,
51
+ 13,
52
+ 14,
53
+ 15
54
+ ],
55
+ "train_size": 1000000,
56
+ "val_size": 10000,
57
+ "num_workers": 2,
58
+ "report_every": 100,
59
+ "major_report_every": 10,
60
+ "save_every": 5,
61
+ "seed": 0,
62
+ "hf_repo": "AbstractPhil/geolip-svae-batteries",
63
+ "upload": false
64
+ },
65
+ "cv_ema_final": 0.2039467897293445,
66
+ "cv_last": 0.2048882260410599,
67
+ "predicted_band": "LOW",
68
+ "expected_band": "LOW",
69
+ "band_match": true,
70
+ "test_mse": 0.9959005117416382,
71
+ "recon_ema": 1.0036648829000523,
72
+ "S0": 2.7821898460388184,
73
+ "SD": 1.1573772430419922,
74
+ "ratio": 2.4038746560175177,
75
+ "erank": 15.523387908935547,
76
+ "observed_sphere_cv": 0.20477168730807327,
77
+ "uniform_sphere_cv_prediction": 0.19902003258265127,
78
+ "band_deviation": 0.005751654725421995,
79
+ "params_count": 250211,
80
+ "wallclock_seconds": 14.191136360168457,
81
+ "batches_completed": 1000,
82
+ "batch_limit": 1000,
83
+ "cv_trajectory": [
84
+ {
85
+ "batch": 0,
86
+ "cv": 0.20395987159654982,
87
+ "cv_ema": 0.20395987159654982,
88
+ "recon": 1.6308250427246094
89
+ },
90
+ {
91
+ "batch": 50,
92
+ "cv": 0.1937983926667847,
93
+ "cv_ema": 0.20385825680725217,
94
+ "recon": 1.6899583339691162
95
+ },
96
+ {
97
+ "batch": 100,
98
+ "cv": 0.21203549714999062,
99
+ "cv_ema": 0.20394002921067955,
100
+ "recon": 1.5099332332611084
101
+ },
102
+ {
103
+ "batch": 150,
104
+ "cv": 0.19226184860796794,
105
+ "cv_ema": 0.20382324740465244,
106
+ "recon": 1.7411473989486694
107
+ },
108
+ {
109
+ "batch": 200,
110
+ "cv": 0.2140056891706862,
111
+ "cv_ema": 0.20392507182231276,
112
+ "recon": 1.3361775875091553
113
+ },
114
+ {
115
+ "batch": 250,
116
+ "cv": 0.20131880115500828,
117
+ "cv_ema": 0.2038990091156397,
118
+ "recon": 1.2594659328460693
119
+ },
120
+ {
121
+ "batch": 300,
122
+ "cv": 0.207123242704634,
123
+ "cv_ema": 0.20393125145152965,
124
+ "recon": 1.082836627960205
125
+ },
126
+ {
127
+ "batch": 350,
128
+ "cv": 0.22263688421487332,
129
+ "cv_ema": 0.2041183077791631,
130
+ "recon": 0.988299548625946
131
+ },
132
+ {
133
+ "batch": 400,
134
+ "cv": 0.19616606667486416,
135
+ "cv_ema": 0.2040387853681201,
136
+ "recon": 0.9831360578536987
137
+ },
138
+ {
139
+ "batch": 450,
140
+ "cv": 0.18965845895022088,
141
+ "cv_ema": 0.20389498210394108,
142
+ "recon": 1.1402337551116943
143
+ },
144
+ {
145
+ "batch": 500,
146
+ "cv": 0.19633968460988524,
147
+ "cv_ema": 0.20381942912900053,
148
+ "recon": 1.0152266025543213
149
+ },
150
+ {
151
+ "batch": 550,
152
+ "cv": 0.1988263929477364,
153
+ "cv_ema": 0.2037694987671879,
154
+ "recon": 1.0515997409820557
155
+ },
156
+ {
157
+ "batch": 600,
158
+ "cv": 0.19222247160716088,
159
+ "cv_ema": 0.20365402849558764,
160
+ "recon": 1.0844422578811646
161
+ },
162
+ {
163
+ "batch": 650,
164
+ "cv": 0.21738385279469116,
165
+ "cv_ema": 0.2037913267385787,
166
+ "recon": 1.0286909341812134
167
+ },
168
+ {
169
+ "batch": 700,
170
+ "cv": 0.2033766001803901,
171
+ "cv_ema": 0.20378717947299682,
172
+ "recon": 0.9552921652793884
173
+ },
174
+ {
175
+ "batch": 750,
176
+ "cv": 0.20174503906703276,
177
+ "cv_ema": 0.20376675806893718,
178
+ "recon": 1.0608594417572021
179
+ },
180
+ {
181
+ "batch": 800,
182
+ "cv": 0.2182513945573627,
183
+ "cv_ema": 0.20391160443382142,
184
+ "recon": 0.9039809107780457
185
+ },
186
+ {
187
+ "batch": 850,
188
+ "cv": 0.20077052796306824,
189
+ "cv_ema": 0.2038801936691139,
190
+ "recon": 0.8119789361953735
191
+ },
192
+ {
193
+ "batch": 900,
194
+ "cv": 0.20958885392276652,
195
+ "cv_ema": 0.2039372802716504,
196
+ "recon": 0.868818461894989
197
+ },
198
+ {
199
+ "batch": 950,
200
+ "cv": 0.2048882260410599,
201
+ "cv_ema": 0.2039467897293445,
202
+ "recon": 0.9619293212890625
203
+ }
204
+ ]
205
+ }