AbstractPhil commited on
Commit
c147d46
·
verified ·
1 Parent(s): fed6abb

Ablation F-HIGH-F4_tanh

Browse files
F/HIGH/F4_tanh/seed0/final_report.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "config": {
3
+ "group": "F",
4
+ "variant": "F4_tanh",
5
+ "band": "HIGH",
6
+ "seed": 0,
7
+ "phase": 1,
8
+ "overrides": {
9
+ "activation": "tanh"
10
+ },
11
+ "description": "F-HIGH-F4_tanh"
12
+ },
13
+ "run_config": {
14
+ "matrix_v": 32,
15
+ "D": 4,
16
+ "patch_size": 4,
17
+ "hidden": 64,
18
+ "depth": 1,
19
+ "n_cross_layers": 1,
20
+ "n_heads": 4,
21
+ "max_alpha": 0.2,
22
+ "alpha_init": -2.0,
23
+ "img_size": 64,
24
+ "batch_size": 128,
25
+ "lr": 0.0001,
26
+ "epochs": 1,
27
+ "weight_decay": 0.0,
28
+ "use_cv_ema": true,
29
+ "cv_ema_alpha": 0.01,
30
+ "cv_alignment_epochs": 0,
31
+ "cv_measure_every": 50,
32
+ "cv_sigma_scale": 0.3,
33
+ "boost": 0.5,
34
+ "cross_attn_clip": 0.5,
35
+ "allowed_types": [
36
+ 0,
37
+ 1,
38
+ 2,
39
+ 3,
40
+ 4,
41
+ 5,
42
+ 6,
43
+ 7,
44
+ 8,
45
+ 9,
46
+ 10,
47
+ 11,
48
+ 12,
49
+ 13,
50
+ 14,
51
+ 15
52
+ ],
53
+ "train_size": 1000000,
54
+ "val_size": 10000,
55
+ "num_workers": 2,
56
+ "report_every": 100,
57
+ "major_report_every": 10,
58
+ "save_every": 5,
59
+ "seed": 0,
60
+ "hf_repo": "AbstractPhil/geolip-svae-batteries",
61
+ "upload": false
62
+ },
63
+ "cv_ema_final": 0.8061596932240116,
64
+ "cv_last": 0.952047175548824,
65
+ "predicted_band": "HIGH",
66
+ "expected_band": "HIGH",
67
+ "band_match": true,
68
+ "test_mse": 1.4550015926361084,
69
+ "recon_ema": 1.5872148255095602,
70
+ "S0": 3.410879611968994,
71
+ "SD": 2.194139003753662,
72
+ "ratio": 1.5545412531240541,
73
+ "erank": 3.9437694549560547,
74
+ "observed_sphere_cv": 0.9501394117796056,
75
+ "uniform_sphere_cv_prediction": 0.922915479249404,
76
+ "band_deviation": 0.02722393253020161,
77
+ "params_count": 40703,
78
+ "wallclock_seconds": 2.294936180114746,
79
+ "batches_completed": 100,
80
+ "batch_limit": 100,
81
+ "cv_trajectory": [
82
+ {
83
+ "batch": 0,
84
+ "cv": 0.8046860822914378,
85
+ "cv_ema": 0.8046860822914378,
86
+ "recon": 1.646982192993164
87
+ },
88
+ {
89
+ "batch": 50,
90
+ "cv": 0.952047175548824,
91
+ "cv_ema": 0.8061596932240116,
92
+ "recon": 1.5937037467956543
93
+ }
94
+ ]
95
+ }