AbstractPhil commited on
Commit
cc5b5be
·
verified ·
1 Parent(s): ecfca69

Ablation H-MID-H2_linear_matched-s2

Browse files
H/MID/H2_linear_matched/seed2/epoch_1_checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e437c3b0cd011d94adeaf90e995c91e7fcaf90e446c675b2ae91870bd2c78af5
3
+ size 5408631
H/MID/H2_linear_matched/seed2/final_report.json ADDED
@@ -0,0 +1,577 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "group": "H",
4
+ "variant": "H2_linear_matched",
5
+ "band": "MID",
6
+ "seed": 2,
7
+ "phase": 2,
8
+ "num_epochs": 1,
9
+ "batch_size": 256,
10
+ "overrides": {
11
+ "svd": "none",
12
+ "linear_readout": true,
13
+ "match_params": true
14
+ },
15
+ "description": "H-MID-H2_linear_matched-s2"
16
+ },
17
+ "run_config": {
18
+ "matrix_v": 64,
19
+ "D": 8,
20
+ "patch_size": 16,
21
+ "hidden": 64,
22
+ "depth": 1,
23
+ "n_cross_layers": 1,
24
+ "n_heads": 4,
25
+ "max_alpha": 0.2,
26
+ "alpha_init": -2.0,
27
+ "img_size": 64,
28
+ "batch_size": 128,
29
+ "lr": 0.0001,
30
+ "epochs": 1,
31
+ "weight_decay": 0.0,
32
+ "use_cv_ema": true,
33
+ "cv_ema_alpha": 0.01,
34
+ "cv_alignment_epochs": 0,
35
+ "cv_measure_every": 50,
36
+ "cv_sigma_scale": 0.3,
37
+ "boost": 0.5,
38
+ "cross_attn_clip": 0.5,
39
+ "allowed_types": [
40
+ 0,
41
+ 1,
42
+ 2,
43
+ 3,
44
+ 4,
45
+ 5,
46
+ 6,
47
+ 7,
48
+ 8,
49
+ 9,
50
+ 10,
51
+ 11,
52
+ 12,
53
+ 13,
54
+ 14,
55
+ 15
56
+ ],
57
+ "train_size": 1000000,
58
+ "val_size": 10000,
59
+ "num_workers": 2,
60
+ "report_every": 100,
61
+ "major_report_every": 10,
62
+ "save_every": 5,
63
+ "seed": 2,
64
+ "hf_repo": "AbstractPhil/geolip-svae-batteries",
65
+ "upload": false
66
+ },
67
+ "cv_ema_final": 0.3651393675164828,
68
+ "cv_last": 0.34896547641876635,
69
+ "predicted_band": "MID",
70
+ "expected_band": "MID",
71
+ "band_match": true,
72
+ "test_mse": 0.9220280051231384,
73
+ "recon_ema": 0.9279508530324391,
74
+ "S0": 2.665696620941162,
75
+ "SD": 1.6292178630828857,
76
+ "ratio": 1.6361817931060385,
77
+ "erank": 7.169526100158691,
78
+ "observed_sphere_cv": 0.3674532584137462,
79
+ "uniform_sphere_cv_prediction": 0.35418994230276496,
80
+ "band_deviation": 0.01326331611098125,
81
+ "params_finite": true,
82
+ "num_epochs_run": 1,
83
+ "start_epoch": 0,
84
+ "per_epoch_metrics": [
85
+ {
86
+ "epoch": 1,
87
+ "test_mse": 0.9220280051231384,
88
+ "cv_ema": 0.3651393675164828,
89
+ "observed_sphere_cv": 0.3674532584137462,
90
+ "band_deviation": 0.01326331611098125,
91
+ "erank": 7.169526100158691,
92
+ "params_finite": true,
93
+ "wallclock_seconds": 33.291502952575684,
94
+ "checkpoint_path": "/content/ablations_phase2/H/MID/H2_linear_matched_s2/epoch_1_checkpoint.pt"
95
+ }
96
+ ],
97
+ "params_count": 445995,
98
+ "wallclock_seconds": 33.29287314414978,
99
+ "batches_completed": 3906,
100
+ "batch_limit": 3906,
101
+ "cv_trajectory": [
102
+ {
103
+ "batch": 0,
104
+ "cv": 0.36314239615396,
105
+ "cv_ema": 0.36314239615396,
106
+ "recon": 1.7020829916000366
107
+ },
108
+ {
109
+ "batch": 50,
110
+ "cv": 0.3660811464656042,
111
+ "cv_ema": 0.3631717836570764,
112
+ "recon": 1.5348536968231201
113
+ },
114
+ {
115
+ "batch": 100,
116
+ "cv": 0.39834169903848127,
117
+ "cv_ema": 0.3635234828108904,
118
+ "recon": 1.3454840183258057
119
+ },
120
+ {
121
+ "batch": 150,
122
+ "cv": 0.35393369941043223,
123
+ "cv_ema": 0.3634275849768858,
124
+ "recon": 1.2261344194412231
125
+ },
126
+ {
127
+ "batch": 200,
128
+ "cv": 0.37126527803012443,
129
+ "cv_ema": 0.3635059619074182,
130
+ "recon": 1.2563807964324951
131
+ },
132
+ {
133
+ "batch": 250,
134
+ "cv": 0.38860848533534426,
135
+ "cv_ema": 0.36375698714169746,
136
+ "recon": 1.1211161613464355
137
+ },
138
+ {
139
+ "batch": 300,
140
+ "cv": 0.36465757242217994,
141
+ "cv_ema": 0.36376599299450224,
142
+ "recon": 1.0642451047897339
143
+ },
144
+ {
145
+ "batch": 350,
146
+ "cv": 0.34536348858414767,
147
+ "cv_ema": 0.3635819679503987,
148
+ "recon": 1.1546896696090698
149
+ },
150
+ {
151
+ "batch": 400,
152
+ "cv": 0.3571158487478453,
153
+ "cv_ema": 0.3635173067583732,
154
+ "recon": 0.9471598863601685
155
+ },
156
+ {
157
+ "batch": 450,
158
+ "cv": 0.349233450310491,
159
+ "cv_ema": 0.3633744681938944,
160
+ "recon": 1.181800127029419
161
+ },
162
+ {
163
+ "batch": 500,
164
+ "cv": 0.32063256308584737,
165
+ "cv_ema": 0.36294704914281395,
166
+ "recon": 1.0603301525115967
167
+ },
168
+ {
169
+ "batch": 550,
170
+ "cv": 0.3670962077575048,
171
+ "cv_ema": 0.36298854072896086,
172
+ "recon": 0.9539375305175781
173
+ },
174
+ {
175
+ "batch": 600,
176
+ "cv": 0.39215969246427684,
177
+ "cv_ema": 0.36328025224631405,
178
+ "recon": 0.9011001586914062
179
+ },
180
+ {
181
+ "batch": 650,
182
+ "cv": 0.39583800779217526,
183
+ "cv_ema": 0.3636058298017727,
184
+ "recon": 0.8944052457809448
185
+ },
186
+ {
187
+ "batch": 700,
188
+ "cv": 0.33251458417787033,
189
+ "cv_ema": 0.36329491734553365,
190
+ "recon": 0.7591871023178101
191
+ },
192
+ {
193
+ "batch": 750,
194
+ "cv": 0.35806075014381483,
195
+ "cv_ema": 0.3632425756735164,
196
+ "recon": 0.9665374159812927
197
+ },
198
+ {
199
+ "batch": 800,
200
+ "cv": 0.3755652128518091,
201
+ "cv_ema": 0.36336580204529934,
202
+ "recon": 1.0322152376174927
203
+ },
204
+ {
205
+ "batch": 850,
206
+ "cv": 0.34569367785970645,
207
+ "cv_ema": 0.36318908080344336,
208
+ "recon": 0.8947685956954956
209
+ },
210
+ {
211
+ "batch": 900,
212
+ "cv": 0.3819913748252224,
213
+ "cv_ema": 0.36337710374366117,
214
+ "recon": 1.0027801990509033
215
+ },
216
+ {
217
+ "batch": 950,
218
+ "cv": 0.36230336885958975,
219
+ "cv_ema": 0.36336636639482045,
220
+ "recon": 0.9741179943084717
221
+ },
222
+ {
223
+ "batch": 1000,
224
+ "cv": 0.3639048202233513,
225
+ "cv_ema": 0.36337175093310575,
226
+ "recon": 0.8325948715209961
227
+ },
228
+ {
229
+ "batch": 1050,
230
+ "cv": 0.35624666060466,
231
+ "cv_ema": 0.3633005000298213,
232
+ "recon": 1.1810990571975708
233
+ },
234
+ {
235
+ "batch": 1100,
236
+ "cv": 0.3603283672682557,
237
+ "cv_ema": 0.3632707787022056,
238
+ "recon": 1.02189302444458
239
+ },
240
+ {
241
+ "batch": 1150,
242
+ "cv": 0.37925602815015075,
243
+ "cv_ema": 0.363430631196685,
244
+ "recon": 1.007506251335144
245
+ },
246
+ {
247
+ "batch": 1200,
248
+ "cv": 0.349472850463165,
249
+ "cv_ema": 0.3632910533893498,
250
+ "recon": 1.0724741220474243
251
+ },
252
+ {
253
+ "batch": 1250,
254
+ "cv": 0.380909427656155,
255
+ "cv_ema": 0.3634672371320179,
256
+ "recon": 1.1108335256576538
257
+ },
258
+ {
259
+ "batch": 1300,
260
+ "cv": 0.3851107449158549,
261
+ "cv_ema": 0.36368367220985626,
262
+ "recon": 1.0437992811203003
263
+ },
264
+ {
265
+ "batch": 1350,
266
+ "cv": 0.3695019765630425,
267
+ "cv_ema": 0.3637418552533881,
268
+ "recon": 1.007309913635254
269
+ },
270
+ {
271
+ "batch": 1400,
272
+ "cv": 0.3424854495860667,
273
+ "cv_ema": 0.36352929119671484,
274
+ "recon": 0.8552716970443726
275
+ },
276
+ {
277
+ "batch": 1450,
278
+ "cv": 0.4143840920191249,
279
+ "cv_ema": 0.364037839204939,
280
+ "recon": 0.8670822978019714
281
+ },
282
+ {
283
+ "batch": 1500,
284
+ "cv": 0.35959668026522557,
285
+ "cv_ema": 0.3639934276155418,
286
+ "recon": 0.9179059267044067
287
+ },
288
+ {
289
+ "batch": 1550,
290
+ "cv": 0.3792132958673496,
291
+ "cv_ema": 0.36414562629805985,
292
+ "recon": 0.8212594985961914
293
+ },
294
+ {
295
+ "batch": 1600,
296
+ "cv": 0.3514081147081333,
297
+ "cv_ema": 0.3640182511821606,
298
+ "recon": 1.14520263671875
299
+ },
300
+ {
301
+ "batch": 1650,
302
+ "cv": 0.36386731049728405,
303
+ "cv_ema": 0.3640167417753118,
304
+ "recon": 0.914709746837616
305
+ },
306
+ {
307
+ "batch": 1700,
308
+ "cv": 0.39473811561408895,
309
+ "cv_ema": 0.36432395551369956,
310
+ "recon": 0.7908166646957397
311
+ },
312
+ {
313
+ "batch": 1750,
314
+ "cv": 0.3940420648509566,
315
+ "cv_ema": 0.3646211366070721,
316
+ "recon": 0.8748637437820435
317
+ },
318
+ {
319
+ "batch": 1800,
320
+ "cv": 0.37968272635197015,
321
+ "cv_ema": 0.3647717525045211,
322
+ "recon": 0.9712508916854858
323
+ },
324
+ {
325
+ "batch": 1850,
326
+ "cv": 0.3688338752786374,
327
+ "cv_ema": 0.3648123737322623,
328
+ "recon": 0.9489051103591919
329
+ },
330
+ {
331
+ "batch": 1900,
332
+ "cv": 0.3797620388895012,
333
+ "cv_ema": 0.36496187038383465,
334
+ "recon": 1.0155394077301025
335
+ },
336
+ {
337
+ "batch": 1950,
338
+ "cv": 0.35055758617625254,
339
+ "cv_ema": 0.36481782754175884,
340
+ "recon": 0.9859466552734375
341
+ },
342
+ {
343
+ "batch": 2000,
344
+ "cv": 0.35848175667966675,
345
+ "cv_ema": 0.36475446683313795,
346
+ "recon": 0.8321199417114258
347
+ },
348
+ {
349
+ "batch": 2050,
350
+ "cv": 0.3688803727516826,
351
+ "cv_ema": 0.3647957258923234,
352
+ "recon": 0.7407306432723999
353
+ },
354
+ {
355
+ "batch": 2100,
356
+ "cv": 0.33949492898749145,
357
+ "cv_ema": 0.3645427179232751,
358
+ "recon": 0.9544631242752075
359
+ },
360
+ {
361
+ "batch": 2150,
362
+ "cv": 0.3653026959285691,
363
+ "cv_ema": 0.364550317703328,
364
+ "recon": 0.8809036016464233
365
+ },
366
+ {
367
+ "batch": 2200,
368
+ "cv": 0.36159961825596526,
369
+ "cv_ema": 0.3645208107088544,
370
+ "recon": 1.023290753364563
371
+ },
372
+ {
373
+ "batch": 2250,
374
+ "cv": 0.35181887753646846,
375
+ "cv_ema": 0.3643937913771305,
376
+ "recon": 1.0038083791732788
377
+ },
378
+ {
379
+ "batch": 2300,
380
+ "cv": 0.38330351021343984,
381
+ "cv_ema": 0.3645828885654936,
382
+ "recon": 1.1119976043701172
383
+ },
384
+ {
385
+ "batch": 2350,
386
+ "cv": 0.3525992945597004,
387
+ "cv_ema": 0.36446305262543566,
388
+ "recon": 0.81243896484375
389
+ },
390
+ {
391
+ "batch": 2400,
392
+ "cv": 0.3840627974347993,
393
+ "cv_ema": 0.3646590500735293,
394
+ "recon": 1.0367182493209839
395
+ },
396
+ {
397
+ "batch": 2450,
398
+ "cv": 0.3578032614553835,
399
+ "cv_ema": 0.36459049218734785,
400
+ "recon": 0.9908058047294617
401
+ },
402
+ {
403
+ "batch": 2500,
404
+ "cv": 0.3623221488186694,
405
+ "cv_ema": 0.364567808753661,
406
+ "recon": 0.9225941896438599
407
+ },
408
+ {
409
+ "batch": 2550,
410
+ "cv": 0.35291871081928333,
411
+ "cv_ema": 0.3644513177743172,
412
+ "recon": 0.9174164533615112
413
+ },
414
+ {
415
+ "batch": 2600,
416
+ "cv": 0.383764852961136,
417
+ "cv_ema": 0.36464445312618543,
418
+ "recon": 0.9298324584960938
419
+ },
420
+ {
421
+ "batch": 2650,
422
+ "cv": 0.3289928798858076,
423
+ "cv_ema": 0.3642879373937817,
424
+ "recon": 0.8829406499862671
425
+ },
426
+ {
427
+ "batch": 2700,
428
+ "cv": 0.39237527201045885,
429
+ "cv_ema": 0.36456881073994846,
430
+ "recon": 0.8058251738548279
431
+ },
432
+ {
433
+ "batch": 2750,
434
+ "cv": 0.34781883736489905,
435
+ "cv_ema": 0.36440131100619794,
436
+ "recon": 0.9852740168571472
437
+ },
438
+ {
439
+ "batch": 2800,
440
+ "cv": 0.34693851550564575,
441
+ "cv_ema": 0.36422668305119243,
442
+ "recon": 0.8391839861869812
443
+ },
444
+ {
445
+ "batch": 2850,
446
+ "cv": 0.3693932711454146,
447
+ "cv_ema": 0.3642783489321346,
448
+ "recon": 0.9568033218383789
449
+ },
450
+ {
451
+ "batch": 2900,
452
+ "cv": 0.3512776442322587,
453
+ "cv_ema": 0.36414834188513584,
454
+ "recon": 0.804368257522583
455
+ },
456
+ {
457
+ "batch": 2950,
458
+ "cv": 0.3761463691844898,
459
+ "cv_ema": 0.3642683221581294,
460
+ "recon": 0.9543739557266235
461
+ },
462
+ {
463
+ "batch": 3000,
464
+ "cv": 0.3677699435626354,
465
+ "cv_ema": 0.36430333837217443,
466
+ "recon": 1.0472074747085571
467
+ },
468
+ {
469
+ "batch": 3050,
470
+ "cv": 0.38624292278957534,
471
+ "cv_ema": 0.36452273421634845,
472
+ "recon": 0.9402758479118347
473
+ },
474
+ {
475
+ "batch": 3100,
476
+ "cv": 0.371040698881504,
477
+ "cv_ema": 0.364587913863,
478
+ "recon": 0.9393660426139832
479
+ },
480
+ {
481
+ "batch": 3150,
482
+ "cv": 0.3665235200449661,
483
+ "cv_ema": 0.3646072699248196,
484
+ "recon": 1.024072527885437
485
+ },
486
+ {
487
+ "batch": 3200,
488
+ "cv": 0.36479161645469205,
489
+ "cv_ema": 0.3646091133901183,
490
+ "recon": 1.0041594505310059
491
+ },
492
+ {
493
+ "batch": 3250,
494
+ "cv": 0.3747372897369722,
495
+ "cv_ema": 0.36471039515358683,
496
+ "recon": 0.9670217037200928
497
+ },
498
+ {
499
+ "batch": 3300,
500
+ "cv": 0.39338452260008466,
501
+ "cv_ema": 0.36499713642805176,
502
+ "recon": 1.006697177886963
503
+ },
504
+ {
505
+ "batch": 3350,
506
+ "cv": 0.3945355377504124,
507
+ "cv_ema": 0.36529252044127536,
508
+ "recon": 0.8629468679428101
509
+ },
510
+ {
511
+ "batch": 3400,
512
+ "cv": 0.3317328978447411,
513
+ "cv_ema": 0.36495692421531,
514
+ "recon": 0.9281606674194336
515
+ },
516
+ {
517
+ "batch": 3450,
518
+ "cv": 0.38188422227431645,
519
+ "cv_ema": 0.36512619719590006,
520
+ "recon": 0.9411990642547607
521
+ },
522
+ {
523
+ "batch": 3500,
524
+ "cv": 0.3595978600216644,
525
+ "cv_ema": 0.3650709138241577,
526
+ "recon": 1.0228345394134521
527
+ },
528
+ {
529
+ "batch": 3550,
530
+ "cv": 0.3481832232676977,
531
+ "cv_ema": 0.3649020369185931,
532
+ "recon": 1.037837266921997
533
+ },
534
+ {
535
+ "batch": 3600,
536
+ "cv": 0.352538650315001,
537
+ "cv_ema": 0.36477840305255715,
538
+ "recon": 0.892354428768158
539
+ },
540
+ {
541
+ "batch": 3650,
542
+ "cv": 0.3941376565896027,
543
+ "cv_ema": 0.36507199558792763,
544
+ "recon": 0.8482953906059265
545
+ },
546
+ {
547
+ "batch": 3700,
548
+ "cv": 0.388144123042866,
549
+ "cv_ema": 0.365302716862477,
550
+ "recon": 0.9098829030990601
551
+ },
552
+ {
553
+ "batch": 3750,
554
+ "cv": 0.36607821874493635,
555
+ "cv_ema": 0.3653104718813016,
556
+ "recon": 0.9439947009086609
557
+ },
558
+ {
559
+ "batch": 3800,
560
+ "cv": 0.3662845785467337,
561
+ "cv_ema": 0.3653202129479559,
562
+ "recon": 1.074286699295044
563
+ },
564
+ {
565
+ "batch": 3850,
566
+ "cv": 0.3635729335357102,
567
+ "cv_ema": 0.36530274015383346,
568
+ "recon": 0.8830778002738953
569
+ },
570
+ {
571
+ "batch": 3900,
572
+ "cv": 0.34896547641876635,
573
+ "cv_ema": 0.3651393675164828,
574
+ "recon": 1.0607995986938477
575
+ }
576
+ ]
577
+ }
H/MID/H2_linear_matched/seed2/tensorboard/events.out.tfevents.1776797143.52b79b2c7612.4109.50 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d6adece16ed012bde9590a6e0ddbf4768e96e12014413002525d6528842719
3
+ size 612125