AbstractPhil commited on
Commit
b65ab5c
·
verified ·
1 Parent(s): 1894e7c

Ablation M-LOW-M2_sgd_huge_lr

Browse files
M/LOW/M2_sgd_huge_lr/seed0/final_report.json ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "group": "M",
4
+ "variant": "M2_sgd_huge_lr",
5
+ "band": "LOW",
6
+ "seed": 0,
7
+ "phase": 1,
8
+ "overrides": {
9
+ "optimizer": "sgd",
10
+ "lr": 1.0,
11
+ "momentum": 0.0,
12
+ "grad_clip": 1.0
13
+ },
14
+ "description": "M-LOW-M2_sgd_huge_lr"
15
+ },
16
+ "run_config": {
17
+ "matrix_v": 64,
18
+ "D": 16,
19
+ "patch_size": 16,
20
+ "hidden": 64,
21
+ "depth": 1,
22
+ "n_cross_layers": 1,
23
+ "n_heads": 4,
24
+ "max_alpha": 0.2,
25
+ "alpha_init": -2.0,
26
+ "img_size": 64,
27
+ "batch_size": 128,
28
+ "lr": 1.0,
29
+ "epochs": 1,
30
+ "weight_decay": 0.0,
31
+ "use_cv_ema": true,
32
+ "cv_ema_alpha": 0.01,
33
+ "cv_alignment_epochs": 0,
34
+ "cv_measure_every": 50,
35
+ "cv_sigma_scale": 0.3,
36
+ "boost": 0.5,
37
+ "cross_attn_clip": 0.5,
38
+ "allowed_types": [
39
+ 0,
40
+ 1,
41
+ 2,
42
+ 3,
43
+ 4,
44
+ 5,
45
+ 6,
46
+ 7,
47
+ 8,
48
+ 9,
49
+ 10,
50
+ 11,
51
+ 12,
52
+ 13,
53
+ 14,
54
+ 15
55
+ ],
56
+ "train_size": 1000000,
57
+ "val_size": 10000,
58
+ "num_workers": 2,
59
+ "report_every": 100,
60
+ "major_report_every": 10,
61
+ "save_every": 5,
62
+ "seed": 0,
63
+ "hf_repo": "AbstractPhil/geolip-svae-batteries",
64
+ "upload": false
65
+ },
66
+ "cv_ema_final": 0.20372004278652345,
67
+ "cv_last": 0.20254710977068147,
68
+ "predicted_band": "LOW",
69
+ "expected_band": "LOW",
70
+ "band_match": true,
71
+ "test_mse": 0.9774055480957031,
72
+ "recon_ema": 0.9852911975015156,
73
+ "S0": 2.7724547386169434,
74
+ "SD": 1.1687604188919067,
75
+ "ratio": 2.3721326202372266,
76
+ "erank": 15.530475616455078,
77
+ "observed_sphere_cv": 0.20287510285438146,
78
+ "uniform_sphere_cv_prediction": 0.19902003258265127,
79
+ "band_deviation": 0.003855070271730182,
80
+ "params_count": 250211,
81
+ "wallclock_seconds": 14.788687229156494,
82
+ "batches_completed": 1000,
83
+ "batch_limit": 1000,
84
+ "cv_trajectory": [
85
+ {
86
+ "batch": 0,
87
+ "cv": 0.20395987159654982,
88
+ "cv_ema": 0.20395987159654982,
89
+ "recon": 1.6308250427246094
90
+ },
91
+ {
92
+ "batch": 50,
93
+ "cv": 0.20033868626428966,
94
+ "cv_ema": 0.2039236597432272,
95
+ "recon": 1.3191968202590942
96
+ },
97
+ {
98
+ "batch": 100,
99
+ "cv": 0.18865724891054972,
100
+ "cv_ema": 0.20377099563490045,
101
+ "recon": 1.184766411781311
102
+ },
103
+ {
104
+ "batch": 150,
105
+ "cv": 0.20462534176289368,
106
+ "cv_ema": 0.20377953909618038,
107
+ "recon": 1.200871229171753
108
+ },
109
+ {
110
+ "batch": 200,
111
+ "cv": 0.19723662108363305,
112
+ "cv_ema": 0.2037141099160549,
113
+ "recon": 1.0721882581710815
114
+ },
115
+ {
116
+ "batch": 250,
117
+ "cv": 0.19743303327555795,
118
+ "cv_ema": 0.2036512991496499,
119
+ "recon": 1.3552755117416382
120
+ },
121
+ {
122
+ "batch": 300,
123
+ "cv": 0.21774443998271242,
124
+ "cv_ema": 0.2037922305579805,
125
+ "recon": 0.958040714263916
126
+ },
127
+ {
128
+ "batch": 350,
129
+ "cv": 0.21922485166654446,
130
+ "cv_ema": 0.20394655676906615,
131
+ "recon": 0.9064235687255859
132
+ },
133
+ {
134
+ "batch": 400,
135
+ "cv": 0.20211189196803692,
136
+ "cv_ema": 0.20392821012105586,
137
+ "recon": 0.8825730085372925
138
+ },
139
+ {
140
+ "batch": 450,
141
+ "cv": 0.19947792723971702,
142
+ "cv_ema": 0.20388370729224245,
143
+ "recon": 1.1056574583053589
144
+ },
145
+ {
146
+ "batch": 500,
147
+ "cv": 0.2059875457830873,
148
+ "cv_ema": 0.2039047456771509,
149
+ "recon": 0.9898160696029663
150
+ },
151
+ {
152
+ "batch": 550,
153
+ "cv": 0.208760565925814,
154
+ "cv_ema": 0.20395330387963753,
155
+ "recon": 1.0330350399017334
156
+ },
157
+ {
158
+ "batch": 600,
159
+ "cv": 0.188966789338058,
160
+ "cv_ema": 0.20380343873422174,
161
+ "recon": 1.0664596557617188
162
+ },
163
+ {
164
+ "batch": 650,
165
+ "cv": 0.20901556546623792,
166
+ "cv_ema": 0.20385556000154192,
167
+ "recon": 1.0025177001953125
168
+ },
169
+ {
170
+ "batch": 700,
171
+ "cv": 0.20120761512632368,
172
+ "cv_ema": 0.20382908055278973,
173
+ "recon": 0.9299343824386597
174
+ },
175
+ {
176
+ "batch": 750,
177
+ "cv": 0.19840689106072115,
178
+ "cv_ema": 0.20377485865786904,
179
+ "recon": 1.0383415222167969
180
+ },
181
+ {
182
+ "batch": 800,
183
+ "cv": 0.2131973850362997,
184
+ "cv_ema": 0.20386908392165334,
185
+ "recon": 0.8916809558868408
186
+ },
187
+ {
188
+ "batch": 850,
189
+ "cv": 0.1991026616840748,
190
+ "cv_ema": 0.20382141969927756,
191
+ "recon": 0.8004721403121948
192
+ },
193
+ {
194
+ "batch": 900,
195
+ "cv": 0.1948685092479475,
196
+ "cv_ema": 0.20373189059476426,
197
+ "recon": 0.8379831314086914
198
+ },
199
+ {
200
+ "batch": 950,
201
+ "cv": 0.20254710977068147,
202
+ "cv_ema": 0.20372004278652345,
203
+ "recon": 0.9481887817382812
204
+ }
205
+ ]
206
+ }