AbstractPhil commited on
Commit
818f944
·
verified ·
1 Parent(s): c06694f

Ablation E-MID-E1_full_softhand-s2

Browse files
E/MID/E1_full_softhand/seed2/epoch_1_checkpoint.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8a48ee4ad576086073efa51b520cba4d1dc898100ef3d26fbe2afb4335eae38
3
+ size 2254383
E/MID/E1_full_softhand/seed2/final_report.json ADDED
@@ -0,0 +1,577 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "group": "E",
4
+ "variant": "E1_full_softhand",
5
+ "band": "MID",
6
+ "seed": 2,
7
+ "phase": 2,
8
+ "num_epochs": 1,
9
+ "batch_size": 256,
10
+ "overrides": {
11
+ "soft_hand": true,
12
+ "boost": 0.5,
13
+ "cv_penalty": 0.3
14
+ },
15
+ "description": "E-MID-E1_full_softhand-s2"
16
+ },
17
+ "run_config": {
18
+ "matrix_v": 64,
19
+ "D": 8,
20
+ "patch_size": 16,
21
+ "hidden": 64,
22
+ "depth": 1,
23
+ "n_cross_layers": 1,
24
+ "n_heads": 4,
25
+ "max_alpha": 0.2,
26
+ "alpha_init": -2.0,
27
+ "img_size": 64,
28
+ "batch_size": 128,
29
+ "lr": 0.0001,
30
+ "epochs": 1,
31
+ "weight_decay": 0.0,
32
+ "use_cv_ema": true,
33
+ "cv_ema_alpha": 0.01,
34
+ "cv_alignment_epochs": 0,
35
+ "cv_measure_every": 50,
36
+ "cv_sigma_scale": 0.3,
37
+ "boost": 0.5,
38
+ "cross_attn_clip": 0.5,
39
+ "allowed_types": [
40
+ 0,
41
+ 1,
42
+ 2,
43
+ 3,
44
+ 4,
45
+ 5,
46
+ 6,
47
+ 7,
48
+ 8,
49
+ 9,
50
+ 10,
51
+ 11,
52
+ 12,
53
+ 13,
54
+ 14,
55
+ 15
56
+ ],
57
+ "train_size": 1000000,
58
+ "val_size": 10000,
59
+ "num_workers": 2,
60
+ "report_every": 100,
61
+ "major_report_every": 10,
62
+ "save_every": 5,
63
+ "seed": 2,
64
+ "hf_repo": "AbstractPhil/geolip-svae-batteries",
65
+ "upload": false
66
+ },
67
+ "cv_ema_final": 0.3480703952097737,
68
+ "cv_last": 0.3447459086455023,
69
+ "predicted_band": "MID",
70
+ "expected_band": "MID",
71
+ "band_match": true,
72
+ "test_mse": 0.9399646520614624,
73
+ "recon_ema": 0.9422848500100067,
74
+ "S0": 3.489184856414795,
75
+ "SD": 1.9874465465545654,
76
+ "ratio": 1.755611915655051,
77
+ "erank": 7.878285884857178,
78
+ "observed_sphere_cv": 0.3458565145565521,
79
+ "uniform_sphere_cv_prediction": 0.35418994230276496,
80
+ "band_deviation": -0.008333427746212874,
81
+ "params_finite": true,
82
+ "num_epochs_run": 1,
83
+ "start_epoch": 0,
84
+ "per_epoch_metrics": [
85
+ {
86
+ "epoch": 1,
87
+ "test_mse": 0.9399646520614624,
88
+ "cv_ema": 0.3480703952097737,
89
+ "observed_sphere_cv": 0.3458565145565521,
90
+ "band_deviation": -0.008333427746212874,
91
+ "erank": 7.878285884857178,
92
+ "params_finite": true,
93
+ "wallclock_seconds": 76.96068954467773,
94
+ "checkpoint_path": "/content/ablations_phase2/E/MID/E1_full_softhand_s2/epoch_1_checkpoint.pt"
95
+ }
96
+ ],
97
+ "params_count": 183339,
98
+ "wallclock_seconds": 76.96198296546936,
99
+ "batches_completed": 3906,
100
+ "batch_limit": 3906,
101
+ "cv_trajectory": [
102
+ {
103
+ "batch": 0,
104
+ "cv": 0.3262890855598136,
105
+ "cv_ema": 0.3262890855598136,
106
+ "recon": 1.7023042440414429
107
+ },
108
+ {
109
+ "batch": 50,
110
+ "cv": 0.35420124526300195,
111
+ "cv_ema": 0.3265682071568455,
112
+ "recon": 1.7544864416122437
113
+ },
114
+ {
115
+ "batch": 100,
116
+ "cv": 0.3771013579566092,
117
+ "cv_ema": 0.3270735386648431,
118
+ "recon": 1.5822032690048218
119
+ },
120
+ {
121
+ "batch": 150,
122
+ "cv": 0.3601170716015174,
123
+ "cv_ema": 0.32740397399420984,
124
+ "recon": 1.2908776998519897
125
+ },
126
+ {
127
+ "batch": 200,
128
+ "cv": 0.3563539538750632,
129
+ "cv_ema": 0.3276934737930184,
130
+ "recon": 1.192936897277832
131
+ },
132
+ {
133
+ "batch": 250,
134
+ "cv": 0.36288384358713255,
135
+ "cv_ema": 0.3280453774909595,
136
+ "recon": 1.1890082359313965
137
+ },
138
+ {
139
+ "batch": 300,
140
+ "cv": 0.36308259348411515,
141
+ "cv_ema": 0.3283957496508911,
142
+ "recon": 1.1174583435058594
143
+ },
144
+ {
145
+ "batch": 350,
146
+ "cv": 0.34839258912238397,
147
+ "cv_ema": 0.328595718045606,
148
+ "recon": 1.089463233947754
149
+ },
150
+ {
151
+ "batch": 400,
152
+ "cv": 0.35545692302531184,
153
+ "cv_ema": 0.32886433009540306,
154
+ "recon": 1.2116528749465942
155
+ },
156
+ {
157
+ "batch": 450,
158
+ "cv": 0.38005769275218515,
159
+ "cv_ema": 0.3293762637219709,
160
+ "recon": 1.0988701581954956
161
+ },
162
+ {
163
+ "batch": 500,
164
+ "cv": 0.35120833191677603,
165
+ "cv_ema": 0.3295945844039189,
166
+ "recon": 0.92216557264328
167
+ },
168
+ {
169
+ "batch": 550,
170
+ "cv": 0.3495670338764207,
171
+ "cv_ema": 0.3297943088986439,
172
+ "recon": 0.9793009757995605
173
+ },
174
+ {
175
+ "batch": 600,
176
+ "cv": 0.35906572919073326,
177
+ "cv_ema": 0.3300870231015648,
178
+ "recon": 1.0837210416793823
179
+ },
180
+ {
181
+ "batch": 650,
182
+ "cv": 0.3529607397156278,
183
+ "cv_ema": 0.3303157602677054,
184
+ "recon": 1.0782544612884521
185
+ },
186
+ {
187
+ "batch": 700,
188
+ "cv": 0.3639000817240141,
189
+ "cv_ema": 0.3306516034822685,
190
+ "recon": 1.0257539749145508
191
+ },
192
+ {
193
+ "batch": 750,
194
+ "cv": 0.33643578108291694,
195
+ "cv_ema": 0.330709445258275,
196
+ "recon": 1.165489912033081
197
+ },
198
+ {
199
+ "batch": 800,
200
+ "cv": 0.34578710999106915,
201
+ "cv_ema": 0.33086022190560294,
202
+ "recon": 1.0324760675430298
203
+ },
204
+ {
205
+ "batch": 850,
206
+ "cv": 0.37876860235255655,
207
+ "cv_ema": 0.3313393057100725,
208
+ "recon": 1.1925572156906128
209
+ },
210
+ {
211
+ "batch": 900,
212
+ "cv": 0.3556402646479365,
213
+ "cv_ema": 0.33158231529945115,
214
+ "recon": 0.9723278880119324
215
+ },
216
+ {
217
+ "batch": 950,
218
+ "cv": 0.3302559285322808,
219
+ "cv_ema": 0.33156905143177945,
220
+ "recon": 0.9787582159042358
221
+ },
222
+ {
223
+ "batch": 1000,
224
+ "cv": 0.34527670767352703,
225
+ "cv_ema": 0.3317061279941969,
226
+ "recon": 0.9641728401184082
227
+ },
228
+ {
229
+ "batch": 1050,
230
+ "cv": 0.35757910457676606,
231
+ "cv_ema": 0.3319648577600226,
232
+ "recon": 1.0182446241378784
233
+ },
234
+ {
235
+ "batch": 1100,
236
+ "cv": 0.3503497764586378,
237
+ "cv_ema": 0.3321487069470087,
238
+ "recon": 1.039778709411621
239
+ },
240
+ {
241
+ "batch": 1150,
242
+ "cv": 0.358617288888518,
243
+ "cv_ema": 0.3324133927664238,
244
+ "recon": 0.962005615234375
245
+ },
246
+ {
247
+ "batch": 1200,
248
+ "cv": 0.33819880409674,
249
+ "cv_ema": 0.33247124687972696,
250
+ "recon": 1.0247961282730103
251
+ },
252
+ {
253
+ "batch": 1250,
254
+ "cv": 0.371364188013269,
255
+ "cv_ema": 0.33286017629106235,
256
+ "recon": 0.8481518626213074
257
+ },
258
+ {
259
+ "batch": 1300,
260
+ "cv": 0.3825887153106035,
261
+ "cv_ema": 0.3333574616812578,
262
+ "recon": 1.0951452255249023
263
+ },
264
+ {
265
+ "batch": 1350,
266
+ "cv": 0.3864992694746269,
267
+ "cv_ema": 0.33388887975919146,
268
+ "recon": 0.8396276235580444
269
+ },
270
+ {
271
+ "batch": 1400,
272
+ "cv": 0.3641736793284047,
273
+ "cv_ema": 0.3341917277548836,
274
+ "recon": 1.0711067914962769
275
+ },
276
+ {
277
+ "batch": 1450,
278
+ "cv": 0.36231571739127005,
279
+ "cv_ema": 0.33447296765124745,
280
+ "recon": 0.9473068118095398
281
+ },
282
+ {
283
+ "batch": 1500,
284
+ "cv": 0.40735128745692256,
285
+ "cv_ema": 0.3352017508493042,
286
+ "recon": 0.8713144063949585
287
+ },
288
+ {
289
+ "batch": 1550,
290
+ "cv": 0.38596349346626546,
291
+ "cv_ema": 0.3357093682754738,
292
+ "recon": 1.0868606567382812
293
+ },
294
+ {
295
+ "batch": 1600,
296
+ "cv": 0.3725137940352693,
297
+ "cv_ema": 0.3360774125330718,
298
+ "recon": 0.9734262228012085
299
+ },
300
+ {
301
+ "batch": 1650,
302
+ "cv": 0.38136895432314044,
303
+ "cv_ema": 0.3365303279509725,
304
+ "recon": 1.0778909921646118
305
+ },
306
+ {
307
+ "batch": 1700,
308
+ "cv": 0.3859139768753493,
309
+ "cv_ema": 0.33702416444021627,
310
+ "recon": 1.0689404010772705
311
+ },
312
+ {
313
+ "batch": 1750,
314
+ "cv": 0.3792825754933931,
315
+ "cv_ema": 0.3374467485507481,
316
+ "recon": 0.9017398357391357
317
+ },
318
+ {
319
+ "batch": 1800,
320
+ "cv": 0.39472380676548374,
321
+ "cv_ema": 0.33801951913289546,
322
+ "recon": 0.8950915336608887
323
+ },
324
+ {
325
+ "batch": 1850,
326
+ "cv": 0.3874323960152432,
327
+ "cv_ema": 0.3385136479017189,
328
+ "recon": 0.9699146747589111
329
+ },
330
+ {
331
+ "batch": 1900,
332
+ "cv": 0.38665402019817235,
333
+ "cv_ema": 0.33899505162468346,
334
+ "recon": 1.0061593055725098
335
+ },
336
+ {
337
+ "batch": 1950,
338
+ "cv": 0.3383747914196531,
339
+ "cv_ema": 0.33898884902263315,
340
+ "recon": 0.9713768362998962
341
+ },
342
+ {
343
+ "batch": 2000,
344
+ "cv": 0.3690088737818903,
345
+ "cv_ema": 0.33928904927022574,
346
+ "recon": 0.9764444231987
347
+ },
348
+ {
349
+ "batch": 2050,
350
+ "cv": 0.36064619412955606,
351
+ "cv_ema": 0.33950262071881904,
352
+ "recon": 1.093001127243042
353
+ },
354
+ {
355
+ "batch": 2100,
356
+ "cv": 0.376390717013402,
357
+ "cv_ema": 0.3398715016817649,
358
+ "recon": 0.8027586936950684
359
+ },
360
+ {
361
+ "batch": 2150,
362
+ "cv": 0.3662955161675649,
363
+ "cv_ema": 0.3401357418266229,
364
+ "recon": 0.8901537656784058
365
+ },
366
+ {
367
+ "batch": 2200,
368
+ "cv": 0.3835754644617178,
369
+ "cv_ema": 0.3405701390529738,
370
+ "recon": 0.8826779127120972
371
+ },
372
+ {
373
+ "batch": 2250,
374
+ "cv": 0.3736330996388438,
375
+ "cv_ema": 0.34090076865883245,
376
+ "recon": 0.8642450571060181
377
+ },
378
+ {
379
+ "batch": 2300,
380
+ "cv": 0.3460144308999767,
381
+ "cv_ema": 0.3409519052812439,
382
+ "recon": 0.9384070634841919
383
+ },
384
+ {
385
+ "batch": 2350,
386
+ "cv": 0.3627402366441504,
387
+ "cv_ema": 0.34116978859487296,
388
+ "recon": 0.9572822451591492
389
+ },
390
+ {
391
+ "batch": 2400,
392
+ "cv": 0.35234590080180606,
393
+ "cv_ema": 0.34128154971694225,
394
+ "recon": 1.0572890043258667
395
+ },
396
+ {
397
+ "batch": 2450,
398
+ "cv": 0.3462620084471221,
399
+ "cv_ema": 0.34133135430424405,
400
+ "recon": 0.7617369890213013
401
+ },
402
+ {
403
+ "batch": 2500,
404
+ "cv": 0.3431133919561225,
405
+ "cv_ema": 0.34134917468076287,
406
+ "recon": 0.9448237419128418
407
+ },
408
+ {
409
+ "batch": 2550,
410
+ "cv": 0.32826922107734885,
411
+ "cv_ema": 0.34121837514472875,
412
+ "recon": 0.9084067344665527
413
+ },
414
+ {
415
+ "batch": 2600,
416
+ "cv": 0.376660529242341,
417
+ "cv_ema": 0.34157279668570484,
418
+ "recon": 0.8209115862846375
419
+ },
420
+ {
421
+ "batch": 2650,
422
+ "cv": 0.35852545721051093,
423
+ "cv_ema": 0.3417423232909529,
424
+ "recon": 0.90264892578125
425
+ },
426
+ {
427
+ "batch": 2700,
428
+ "cv": 0.3649905889123999,
429
+ "cv_ema": 0.34197480594716734,
430
+ "recon": 1.0998560190200806
431
+ },
432
+ {
433
+ "batch": 2750,
434
+ "cv": 0.376912584265846,
435
+ "cv_ema": 0.34232418373035417,
436
+ "recon": 0.9626335501670837
437
+ },
438
+ {
439
+ "batch": 2800,
440
+ "cv": 0.3670247890322585,
441
+ "cv_ema": 0.34257118978337325,
442
+ "recon": 0.9161946773529053
443
+ },
444
+ {
445
+ "batch": 2850,
446
+ "cv": 0.3519428296637673,
447
+ "cv_ema": 0.34266490618217715,
448
+ "recon": 1.0545369386672974
449
+ },
450
+ {
451
+ "batch": 2900,
452
+ "cv": 0.3499493025029879,
453
+ "cv_ema": 0.34273775014538527,
454
+ "recon": 0.9505127668380737
455
+ },
456
+ {
457
+ "batch": 2950,
458
+ "cv": 0.37609966132096184,
459
+ "cv_ema": 0.34307136925714105,
460
+ "recon": 0.9866455793380737
461
+ },
462
+ {
463
+ "batch": 3000,
464
+ "cv": 0.35332601323477253,
465
+ "cv_ema": 0.34317391569691735,
466
+ "recon": 1.1191105842590332
467
+ },
468
+ {
469
+ "batch": 3050,
470
+ "cv": 0.37775898502298294,
471
+ "cv_ema": 0.34351976639017806,
472
+ "recon": 0.9974138736724854
473
+ },
474
+ {
475
+ "batch": 3100,
476
+ "cv": 0.3566859568152597,
477
+ "cv_ema": 0.3436514282944289,
478
+ "recon": 0.9225505590438843
479
+ },
480
+ {
481
+ "batch": 3150,
482
+ "cv": 0.3580856064054484,
483
+ "cv_ema": 0.34379577007553913,
484
+ "recon": 0.7871040105819702
485
+ },
486
+ {
487
+ "batch": 3200,
488
+ "cv": 0.35921556928602016,
489
+ "cv_ema": 0.34394996806764394,
490
+ "recon": 0.9113373756408691
491
+ },
492
+ {
493
+ "batch": 3250,
494
+ "cv": 0.3850812704331315,
495
+ "cv_ema": 0.3443612810912988,
496
+ "recon": 0.8876476287841797
497
+ },
498
+ {
499
+ "batch": 3300,
500
+ "cv": 0.39109260587121863,
501
+ "cv_ema": 0.344828594339098,
502
+ "recon": 0.8876112699508667
503
+ },
504
+ {
505
+ "batch": 3350,
506
+ "cv": 0.37187065249713747,
507
+ "cv_ema": 0.3450990149206784,
508
+ "recon": 0.9611567258834839
509
+ },
510
+ {
511
+ "batch": 3400,
512
+ "cv": 0.3682274775478585,
513
+ "cv_ema": 0.3453302995469502,
514
+ "recon": 1.0167291164398193
515
+ },
516
+ {
517
+ "batch": 3450,
518
+ "cv": 0.3893340080640225,
519
+ "cv_ema": 0.3457703366321209,
520
+ "recon": 0.9901139736175537
521
+ },
522
+ {
523
+ "batch": 3500,
524
+ "cv": 0.3690285401562733,
525
+ "cv_ema": 0.34600291866736244,
526
+ "recon": 1.1799705028533936
527
+ },
528
+ {
529
+ "batch": 3550,
530
+ "cv": 0.36429238834180416,
531
+ "cv_ema": 0.3461858133641068,
532
+ "recon": 0.941440761089325
533
+ },
534
+ {
535
+ "batch": 3600,
536
+ "cv": 0.41327815120862016,
537
+ "cv_ema": 0.3468567367425519,
538
+ "recon": 0.9302716255187988
539
+ },
540
+ {
541
+ "batch": 3650,
542
+ "cv": 0.35642114686436877,
543
+ "cv_ema": 0.34695238084377006,
544
+ "recon": 0.9333240389823914
545
+ },
546
+ {
547
+ "batch": 3700,
548
+ "cv": 0.36079900345752214,
549
+ "cv_ema": 0.3470908470699076,
550
+ "recon": 0.7871812582015991
551
+ },
552
+ {
553
+ "batch": 3750,
554
+ "cv": 0.3871005363923662,
555
+ "cv_ema": 0.3474909439631322,
556
+ "recon": 0.9142307043075562
557
+ },
558
+ {
559
+ "batch": 3800,
560
+ "cv": 0.3744958407268366,
561
+ "cv_ema": 0.34776099293076923,
562
+ "recon": 0.8225603103637695
563
+ },
564
+ {
565
+ "batch": 3850,
566
+ "cv": 0.38205928806785244,
567
+ "cv_ema": 0.3481039758821401,
568
+ "recon": 0.9965871572494507
569
+ },
570
+ {
571
+ "batch": 3900,
572
+ "cv": 0.3447459086455023,
573
+ "cv_ema": 0.3480703952097737,
574
+ "recon": 1.0565013885498047
575
+ }
576
+ ]
577
+ }
E/MID/E1_full_softhand/seed2/tensorboard/events.out.tfevents.1776794168.52b79b2c7612.4109.5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5f801ad421500428663dcb425684ca97fae54e479bfa7b77583f735dd6e4df6f
3
+ size 612125