AbstractPhil commited on
Commit
d42557f
·
verified ·
1 Parent(s): 74f58bd

Ablation H-HIGH-H3_linear_unmatched-s2

Browse files
H/HIGH/H3_linear_unmatched/seed2/epoch_1_checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f13706a247589c4415544d7d29e38e8457ed681c223ac3935bb54ad82dd72b60
3
+ size 542895
H/HIGH/H3_linear_unmatched/seed2/final_report.json ADDED
@@ -0,0 +1,577 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "group": "H",
4
+ "variant": "H3_linear_unmatched",
5
+ "band": "HIGH",
6
+ "seed": 2,
7
+ "phase": 2,
8
+ "num_epochs": 1,
9
+ "batch_size": 256,
10
+ "overrides": {
11
+ "svd": "none",
12
+ "linear_readout": true,
13
+ "match_params": false
14
+ },
15
+ "description": "H-HIGH-H3_linear_unmatched-s2"
16
+ },
17
+ "run_config": {
18
+ "matrix_v": 32,
19
+ "D": 4,
20
+ "patch_size": 4,
21
+ "hidden": 64,
22
+ "depth": 1,
23
+ "n_cross_layers": 1,
24
+ "n_heads": 4,
25
+ "max_alpha": 0.2,
26
+ "alpha_init": -2.0,
27
+ "img_size": 64,
28
+ "batch_size": 128,
29
+ "lr": 0.0001,
30
+ "epochs": 1,
31
+ "weight_decay": 0.0,
32
+ "use_cv_ema": true,
33
+ "cv_ema_alpha": 0.01,
34
+ "cv_alignment_epochs": 0,
35
+ "cv_measure_every": 50,
36
+ "cv_sigma_scale": 0.3,
37
+ "boost": 0.5,
38
+ "cross_attn_clip": 0.5,
39
+ "allowed_types": [
40
+ 0,
41
+ 1,
42
+ 2,
43
+ 3,
44
+ 4,
45
+ 5,
46
+ 6,
47
+ 7,
48
+ 8,
49
+ 9,
50
+ 10,
51
+ 11,
52
+ 12,
53
+ 13,
54
+ 14,
55
+ 15
56
+ ],
57
+ "train_size": 1000000,
58
+ "val_size": 10000,
59
+ "num_workers": 2,
60
+ "report_every": 100,
61
+ "major_report_every": 10,
62
+ "save_every": 5,
63
+ "seed": 2,
64
+ "hf_repo": "AbstractPhil/geolip-svae-batteries",
65
+ "upload": false
66
+ },
67
+ "cv_ema_final": 0.8828428741312521,
68
+ "cv_last": 0.8273320063132668,
69
+ "predicted_band": "HIGH",
70
+ "expected_band": "HIGH",
71
+ "band_match": true,
72
+ "test_mse": 0.08074139803647995,
73
+ "recon_ema": 0.0807779443500184,
74
+ "S0": 2.9515204429626465,
75
+ "SD": 2.8531153202056885,
76
+ "ratio": 1.0344904083319562,
77
+ "erank": 3.9937214851379395,
78
+ "observed_sphere_cv": 0.8532852509353274,
79
+ "uniform_sphere_cv_prediction": 0.9230044578859847,
80
+ "band_deviation": -0.06971920695065736,
81
+ "params_finite": true,
82
+ "num_epochs_run": 1,
83
+ "start_epoch": 0,
84
+ "per_epoch_metrics": [
85
+ {
86
+ "epoch": 1,
87
+ "test_mse": 0.08074139803647995,
88
+ "cv_ema": 0.8828428741312521,
89
+ "observed_sphere_cv": 0.8532852509353274,
90
+ "band_deviation": -0.06971920695065736,
91
+ "erank": 3.9937214851379395,
92
+ "params_finite": true,
93
+ "wallclock_seconds": 33.53608584403992,
94
+ "checkpoint_path": "/content/ablations_phase2/H/HIGH/H3_linear_unmatched_s2/epoch_1_checkpoint.pt"
95
+ }
96
+ ],
97
+ "params_count": 40703,
98
+ "wallclock_seconds": 33.53733944892883,
99
+ "batches_completed": 3906,
100
+ "batch_limit": 3906,
101
+ "cv_trajectory": [
102
+ {
103
+ "batch": 0,
104
+ "cv": 0.8707637758885347,
105
+ "cv_ema": 0.8707637758885347,
106
+ "recon": 1.8225572109222412
107
+ },
108
+ {
109
+ "batch": 50,
110
+ "cv": 0.8339478538459713,
111
+ "cv_ema": 0.870395616668109,
112
+ "recon": 1.507354736328125
113
+ },
114
+ {
115
+ "batch": 100,
116
+ "cv": 0.959816686834879,
117
+ "cv_ema": 0.8712898273697768,
118
+ "recon": 1.4688609838485718
119
+ },
120
+ {
121
+ "batch": 150,
122
+ "cv": 0.9661816391586417,
123
+ "cv_ema": 0.8722387454876653,
124
+ "recon": 1.1263841390609741
125
+ },
126
+ {
127
+ "batch": 200,
128
+ "cv": 0.871838642256397,
129
+ "cv_ema": 0.8722347444553527,
130
+ "recon": 0.974519670009613
131
+ },
132
+ {
133
+ "batch": 250,
134
+ "cv": 0.8335924636922253,
135
+ "cv_ema": 0.8718483216477213,
136
+ "recon": 0.8641786575317383
137
+ },
138
+ {
139
+ "batch": 300,
140
+ "cv": 0.9389950753384311,
141
+ "cv_ema": 0.8725197891846285,
142
+ "recon": 0.6831504106521606
143
+ },
144
+ {
145
+ "batch": 350,
146
+ "cv": 0.9662350138057544,
147
+ "cv_ema": 0.8734569414308396,
148
+ "recon": 0.830031156539917
149
+ },
150
+ {
151
+ "batch": 400,
152
+ "cv": 0.8482784370189711,
153
+ "cv_ema": 0.8732051563867209,
154
+ "recon": 0.6995317339897156
155
+ },
156
+ {
157
+ "batch": 450,
158
+ "cv": 1.0507735316528162,
159
+ "cv_ema": 0.8749808401393818,
160
+ "recon": 0.6755117177963257
161
+ },
162
+ {
163
+ "batch": 500,
164
+ "cv": 0.8716058773574181,
165
+ "cv_ema": 0.8749470905115622,
166
+ "recon": 0.6005268096923828
167
+ },
168
+ {
169
+ "batch": 550,
170
+ "cv": 0.9393284524065831,
171
+ "cv_ema": 0.8755909041305124,
172
+ "recon": 0.6062716245651245
173
+ },
174
+ {
175
+ "batch": 600,
176
+ "cv": 0.916262768588547,
177
+ "cv_ema": 0.8759976227750927,
178
+ "recon": 0.529315173625946
179
+ },
180
+ {
181
+ "batch": 650,
182
+ "cv": 0.884715654424633,
183
+ "cv_ema": 0.8760848030915881,
184
+ "recon": 0.4516713619232178
185
+ },
186
+ {
187
+ "batch": 700,
188
+ "cv": 0.9067286019590894,
189
+ "cv_ema": 0.8763912410802631,
190
+ "recon": 0.46101951599121094
191
+ },
192
+ {
193
+ "batch": 750,
194
+ "cv": 0.9579664253821331,
195
+ "cv_ema": 0.8772069929232817,
196
+ "recon": 0.5414658784866333
197
+ },
198
+ {
199
+ "batch": 800,
200
+ "cv": 0.9714651911875615,
201
+ "cv_ema": 0.8781495749059245,
202
+ "recon": 0.3862035274505615
203
+ },
204
+ {
205
+ "batch": 850,
206
+ "cv": 0.9343966285830553,
207
+ "cv_ema": 0.8787120454426958,
208
+ "recon": 0.3523392081260681
209
+ },
210
+ {
211
+ "batch": 900,
212
+ "cv": 0.898794576262841,
213
+ "cv_ema": 0.8789128707508972,
214
+ "recon": 0.4363919198513031
215
+ },
216
+ {
217
+ "batch": 950,
218
+ "cv": 0.8083526901592777,
219
+ "cv_ema": 0.8782072689449809,
220
+ "recon": 0.2984492778778076
221
+ },
222
+ {
223
+ "batch": 1000,
224
+ "cv": 0.9025639166728266,
225
+ "cv_ema": 0.8784508354222593,
226
+ "recon": 0.3876347541809082
227
+ },
228
+ {
229
+ "batch": 1050,
230
+ "cv": 0.9467997252040327,
231
+ "cv_ema": 0.879134324320077,
232
+ "recon": 0.37546491622924805
233
+ },
234
+ {
235
+ "batch": 1100,
236
+ "cv": 0.9436044572090887,
237
+ "cv_ema": 0.8797790256489672,
238
+ "recon": 0.38061660528182983
239
+ },
240
+ {
241
+ "batch": 1150,
242
+ "cv": 0.9683396583616632,
243
+ "cv_ema": 0.8806646319760941,
244
+ "recon": 0.3002871870994568
245
+ },
246
+ {
247
+ "batch": 1200,
248
+ "cv": 0.972728773252805,
249
+ "cv_ema": 0.8815852733888613,
250
+ "recon": 0.3106653392314911
251
+ },
252
+ {
253
+ "batch": 1250,
254
+ "cv": 0.9688714220848722,
255
+ "cv_ema": 0.8824581348758214,
256
+ "recon": 0.3207021653652191
257
+ },
258
+ {
259
+ "batch": 1300,
260
+ "cv": 0.9413535082490181,
261
+ "cv_ema": 0.8830470886095534,
262
+ "recon": 0.3181518316268921
263
+ },
264
+ {
265
+ "batch": 1350,
266
+ "cv": 0.8191617673140967,
267
+ "cv_ema": 0.8824082353965987,
268
+ "recon": 0.3136539161205292
269
+ },
270
+ {
271
+ "batch": 1400,
272
+ "cv": 0.8800896807275669,
273
+ "cv_ema": 0.8823850498499084,
274
+ "recon": 0.3095695376396179
275
+ },
276
+ {
277
+ "batch": 1450,
278
+ "cv": 0.9324761528760076,
279
+ "cv_ema": 0.8828859608801694,
280
+ "recon": 0.30465933680534363
281
+ },
282
+ {
283
+ "batch": 1500,
284
+ "cv": 0.9564965786074313,
285
+ "cv_ema": 0.883622067057442,
286
+ "recon": 0.2137455940246582
287
+ },
288
+ {
289
+ "batch": 1550,
290
+ "cv": 0.8509796437111574,
291
+ "cv_ema": 0.8832956428239791,
292
+ "recon": 0.20447149872779846
293
+ },
294
+ {
295
+ "batch": 1600,
296
+ "cv": 0.9003838826846507,
297
+ "cv_ema": 0.8834665252225858,
298
+ "recon": 0.2683619558811188
299
+ },
300
+ {
301
+ "batch": 1650,
302
+ "cv": 0.9299265954038628,
303
+ "cv_ema": 0.8839311259243986,
304
+ "recon": 0.23209743201732635
305
+ },
306
+ {
307
+ "batch": 1700,
308
+ "cv": 0.8747726700664652,
309
+ "cv_ema": 0.8838395413658193,
310
+ "recon": 0.2058553397655487
311
+ },
312
+ {
313
+ "batch": 1750,
314
+ "cv": 0.909394680980643,
315
+ "cv_ema": 0.8840950927619676,
316
+ "recon": 0.23852041363716125
317
+ },
318
+ {
319
+ "batch": 1800,
320
+ "cv": 1.0216632850762584,
321
+ "cv_ema": 0.8854707746851105,
322
+ "recon": 0.18722489476203918
323
+ },
324
+ {
325
+ "batch": 1850,
326
+ "cv": 0.9258188810887357,
327
+ "cv_ema": 0.8858742557491467,
328
+ "recon": 0.19339041411876678
329
+ },
330
+ {
331
+ "batch": 1900,
332
+ "cv": 0.8591142811135695,
333
+ "cv_ema": 0.8856066560027909,
334
+ "recon": 0.19477799534797668
335
+ },
336
+ {
337
+ "batch": 1950,
338
+ "cv": 0.8811149769681691,
339
+ "cv_ema": 0.8855617392124446,
340
+ "recon": 0.1877443790435791
341
+ },
342
+ {
343
+ "batch": 2000,
344
+ "cv": 0.8723606644097379,
345
+ "cv_ema": 0.8854297284644175,
346
+ "recon": 0.16187813878059387
347
+ },
348
+ {
349
+ "batch": 2050,
350
+ "cv": 1.0402973247355518,
351
+ "cv_ema": 0.8869784044271288,
352
+ "recon": 0.16019025444984436
353
+ },
354
+ {
355
+ "batch": 2100,
356
+ "cv": 0.8065514065880853,
357
+ "cv_ema": 0.8861741344487384,
358
+ "recon": 0.10909396409988403
359
+ },
360
+ {
361
+ "batch": 2150,
362
+ "cv": 0.827994180728534,
363
+ "cv_ema": 0.8855923349115363,
364
+ "recon": 0.17265453934669495
365
+ },
366
+ {
367
+ "batch": 2200,
368
+ "cv": 0.8764408180567053,
369
+ "cv_ema": 0.885500819742988,
370
+ "recon": 0.1739843338727951
371
+ },
372
+ {
373
+ "batch": 2250,
374
+ "cv": 0.8593808193766155,
375
+ "cv_ema": 0.8852396197393243,
376
+ "recon": 0.1329735964536667
377
+ },
378
+ {
379
+ "batch": 2300,
380
+ "cv": 0.8411323697661999,
381
+ "cv_ema": 0.8847985472395931,
382
+ "recon": 0.1558782458305359
383
+ },
384
+ {
385
+ "batch": 2350,
386
+ "cv": 0.83428217778791,
387
+ "cv_ema": 0.8842933835450763,
388
+ "recon": 0.15291714668273926
389
+ },
390
+ {
391
+ "batch": 2400,
392
+ "cv": 0.8368278951783545,
393
+ "cv_ema": 0.883818728661409,
394
+ "recon": 0.13459140062332153
395
+ },
396
+ {
397
+ "batch": 2450,
398
+ "cv": 0.8185860144959429,
399
+ "cv_ema": 0.8831664015197543,
400
+ "recon": 0.14178141951560974
401
+ },
402
+ {
403
+ "batch": 2500,
404
+ "cv": 0.8875947397550186,
405
+ "cv_ema": 0.883210684902107,
406
+ "recon": 0.16588595509529114
407
+ },
408
+ {
409
+ "batch": 2550,
410
+ "cv": 0.9583973621971065,
411
+ "cv_ema": 0.883962551675057,
412
+ "recon": 0.13084453344345093
413
+ },
414
+ {
415
+ "batch": 2600,
416
+ "cv": 0.8793670030674524,
417
+ "cv_ema": 0.883916596188981,
418
+ "recon": 0.1109180897474289
419
+ },
420
+ {
421
+ "batch": 2650,
422
+ "cv": 0.795852409134397,
423
+ "cv_ema": 0.8830359543184352,
424
+ "recon": 0.1071411594748497
425
+ },
426
+ {
427
+ "batch": 2700,
428
+ "cv": 0.7266542823497347,
429
+ "cv_ema": 0.8814721375987482,
430
+ "recon": 0.10526465624570847
431
+ },
432
+ {
433
+ "batch": 2750,
434
+ "cv": 0.9387637956530872,
435
+ "cv_ema": 0.8820450541792916,
436
+ "recon": 0.09570842981338501
437
+ },
438
+ {
439
+ "batch": 2800,
440
+ "cv": 0.9027206110140432,
441
+ "cv_ema": 0.8822518097476391,
442
+ "recon": 0.08312828093767166
443
+ },
444
+ {
445
+ "batch": 2850,
446
+ "cv": 0.8194057380270919,
447
+ "cv_ema": 0.8816233490304336,
448
+ "recon": 0.07921471446752548
449
+ },
450
+ {
451
+ "batch": 2900,
452
+ "cv": 0.748521256070462,
453
+ "cv_ema": 0.8802923281008339,
454
+ "recon": 0.10324668139219284
455
+ },
456
+ {
457
+ "batch": 2950,
458
+ "cv": 0.9457012149832915,
459
+ "cv_ema": 0.8809464169696585,
460
+ "recon": 0.13486932218074799
461
+ },
462
+ {
463
+ "batch": 3000,
464
+ "cv": 0.9175880393427727,
465
+ "cv_ema": 0.8813128331933896,
466
+ "recon": 0.08009283244609833
467
+ },
468
+ {
469
+ "batch": 3050,
470
+ "cv": 0.8758652946749842,
471
+ "cv_ema": 0.8812583578082055,
472
+ "recon": 0.07816428691148758
473
+ },
474
+ {
475
+ "batch": 3100,
476
+ "cv": 0.9018820814533909,
477
+ "cv_ema": 0.8814645950446574,
478
+ "recon": 0.09347352385520935
479
+ },
480
+ {
481
+ "batch": 3150,
482
+ "cv": 0.8764107052613918,
483
+ "cv_ema": 0.8814140561468247,
484
+ "recon": 0.08519117534160614
485
+ },
486
+ {
487
+ "batch": 3200,
488
+ "cv": 0.9351910368885222,
489
+ "cv_ema": 0.8819518259542417,
490
+ "recon": 0.10691525787115097
491
+ },
492
+ {
493
+ "batch": 3250,
494
+ "cv": 0.8393018578224803,
495
+ "cv_ema": 0.8815253262729241,
496
+ "recon": 0.09321480989456177
497
+ },
498
+ {
499
+ "batch": 3300,
500
+ "cv": 0.8891557995843952,
501
+ "cv_ema": 0.8816016310060388,
502
+ "recon": 0.08325229585170746
503
+ },
504
+ {
505
+ "batch": 3350,
506
+ "cv": 0.9289252449360912,
507
+ "cv_ema": 0.8820748671453394,
508
+ "recon": 0.08234390616416931
509
+ },
510
+ {
511
+ "batch": 3400,
512
+ "cv": 0.8359662445791983,
513
+ "cv_ema": 0.881613780919678,
514
+ "recon": 0.10322006046772003
515
+ },
516
+ {
517
+ "batch": 3450,
518
+ "cv": 0.8475944318068342,
519
+ "cv_ema": 0.8812735874285496,
520
+ "recon": 0.0799802914261818
521
+ },
522
+ {
523
+ "batch": 3500,
524
+ "cv": 0.9709689370498706,
525
+ "cv_ema": 0.8821705409247627,
526
+ "recon": 0.07991938292980194
527
+ },
528
+ {
529
+ "batch": 3550,
530
+ "cv": 0.9130327593200289,
531
+ "cv_ema": 0.8824791631087153,
532
+ "recon": 0.08104370534420013
533
+ },
534
+ {
535
+ "batch": 3600,
536
+ "cv": 0.8709947599026943,
537
+ "cv_ema": 0.8823643190766551,
538
+ "recon": 0.1056627556681633
539
+ },
540
+ {
541
+ "batch": 3650,
542
+ "cv": 0.942440727776526,
543
+ "cv_ema": 0.8829650831636539,
544
+ "recon": 0.06789380311965942
545
+ },
546
+ {
547
+ "batch": 3700,
548
+ "cv": 0.9663605800063942,
549
+ "cv_ema": 0.8837990381320813,
550
+ "recon": 0.0675361156463623
551
+ },
552
+ {
553
+ "batch": 3750,
554
+ "cv": 0.8545756821273168,
555
+ "cv_ema": 0.8835068045720336,
556
+ "recon": 0.07059990614652634
557
+ },
558
+ {
559
+ "batch": 3800,
560
+ "cv": 0.9045237432339848,
561
+ "cv_ema": 0.8837169739586531,
562
+ "recon": 0.05011414736509323
563
+ },
564
+ {
565
+ "batch": 3850,
566
+ "cv": 0.8523785748730878,
567
+ "cv_ema": 0.8834035899677974,
568
+ "recon": 0.07202216982841492
569
+ },
570
+ {
571
+ "batch": 3900,
572
+ "cv": 0.8273320063132668,
573
+ "cv_ema": 0.8828428741312521,
574
+ "recon": 0.08422261476516724
575
+ }
576
+ ]
577
+ }
H/HIGH/H3_linear_unmatched/seed2/tensorboard/events.out.tfevents.1776797575.52b79b2c7612.4109.62 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d846dc1a3eaf6ed1e6bfdf38918054d63ba64f4d57acd16901abf2af45763043
3
+ size 612125