AbstractPhil commited on
Commit
016cb5d
·
verified ·
1 Parent(s): cd899b9

Epoch 10 - 25.63% acc

Browse files
weights/run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036/README.md ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Run: run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036
2
+
3
+ ## Results
4
+ - **Best Accuracy**: 25.63%
5
+ - **Dataset**: cifar100
6
+ - **Epochs**: 120
7
+ - **Model Version**: V2
8
+
9
+ ## Model Config
10
+ | Parameter | Value |
11
+ |-----------|-------|
12
+ | Dim | 768 |
13
+ | Layers | 4 |
14
+ | Heads | 16 |
15
+ | Patch Size | 4 |
16
+
17
+ ## Wormhole Routing (V2)
18
+ | Parameter | Value |
19
+ |-----------|-------|
20
+ | Mode | hybrid |
21
+ | Wormholes/Position | 8 |
22
+ | Temperature | 0.1 |
23
+ | Tiles | 16 |
24
+ | Tile Wormholes | 4 |
25
+
26
+ ## Crystal Head
27
+ | Parameter | Value |
28
+ |-----------|-------|
29
+ | Scales | [64, 128, 256, 512, 768] |
30
+ | Scale Copies | [16, 8, 4, 2, 1] |
31
+ | Weighting Mode | learned |
32
+ | Belly Layers | 2 |
33
+ | Belly Residual | False |
34
+ | Use Spine | True |
35
+ | Use Collective | False |
36
+
37
+
38
+ ## Training Config
39
+ | Parameter | Value |
40
+ |-----------|-------|
41
+ | Learning Rate | 0.0003 |
42
+ | Weight Decay | 0.05 |
43
+ | Batch Size | 128 |
44
+ | CE Weight | 1.0 |
45
+ | Contrast Weight | 0.5 |
46
+
47
+ ## Augmentation
48
+ | Parameter | Value |
49
+ |-----------|-------|
50
+ | Normalization | standard |
51
+ | Mixup Alpha | 0.2 |
52
+ | CutMix Alpha | 1.0 |
53
+ | AlphaMix | False |
54
+ | Label Smoothing | 0.1 |
55
+
56
+
57
+ ## Key Findings Applied
58
+ - Routing learns from task pressure (no auxiliary routing losses)
59
+ - Gradients verified to flow through router
60
+ - Cross-contrastive aligns patch↔scale features
weights/run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036/best.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb0f396245d69e8ac84d6f323863b622c0f6c3458767e34ec48848336786391f
3
+ size 256621812
weights/run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036/config.json ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "DavidBeans_V2",
3
+ "model_type": "david_beans_v2",
4
+ "image_size": 32,
5
+ "patch_size": 4,
6
+ "in_channels": 3,
7
+ "dim": 768,
8
+ "num_layers": 4,
9
+ "num_heads": 16,
10
+ "mlp_ratio": 4.0,
11
+ "num_wormholes": 8,
12
+ "wormhole_temperature": 0.1,
13
+ "wormhole_mode": "hybrid",
14
+ "cantor_weight": 0.3,
15
+ "num_tiles": 16,
16
+ "tile_wormholes": 4,
17
+ "scales": [
18
+ 64,
19
+ 128,
20
+ 256,
21
+ 512,
22
+ 768
23
+ ],
24
+ "num_classes": 100,
25
+ "use_belly": true,
26
+ "belly_expand": 2.0,
27
+ "belly_layers": 2,
28
+ "belly_residual": false,
29
+ "weighting_mode": "learned",
30
+ "scale_weight_floor": 0.1,
31
+ "scale_copies": [
32
+ 16,
33
+ 8,
34
+ 4,
35
+ 2,
36
+ 1
37
+ ],
38
+ "copy_theta_step": 0.101,
39
+ "use_collective": false,
40
+ "collective_temperature": 0.07,
41
+ "use_spine": true,
42
+ "spine_channels": [
43
+ 1024
44
+ ],
45
+ "spine_cross_attn": true,
46
+ "spine_gate_init": 0.0,
47
+ "contrast_temperature": 0.07,
48
+ "contrast_weight": 0.5,
49
+ "routing_weight": 0.0,
50
+ "dropout": 0.1,
51
+ "use_topo_dropout": true,
52
+ "topo_drop_prob": 0.15,
53
+ "topo_warmup_epochs": 35,
54
+ "topo_min_routes_keep": 2,
55
+ "topo_steps_per_epoch": 391,
56
+ "use_spatial_dropout": true,
57
+ "spatial_drop_prob": 0.1,
58
+ "spatial_patch_size": 4,
59
+ "route_dim_attention": -2,
60
+ "route_dim_tiles": 2,
61
+ "pooling": "cls"
62
+ }
weights/run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036/tensorboard/events.out.tfevents.1764697839.7ccc73cc3f03.55035.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84c7855aceacc36577612b3e2dc705fb3850398f670744a75c99c321008d2c6d
3
+ size 44262
weights/run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036/training_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_name": "1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix",
3
+ "run_number": null,
4
+ "model_version": 2,
5
+ "dataset": "cifar100",
6
+ "image_size": 32,
7
+ "batch_size": 128,
8
+ "num_workers": 4,
9
+ "normalization": "standard",
10
+ "epochs": 120,
11
+ "warmup_epochs": 10,
12
+ "learning_rate": 0.0003,
13
+ "weight_decay": 0.05,
14
+ "betas": [
15
+ 0.9,
16
+ 0.999
17
+ ],
18
+ "scheduler": "cosine",
19
+ "min_lr": 1e-06,
20
+ "ce_weight": 1.0,
21
+ "contrast_weight": 0.5,
22
+ "gradient_clip": 1.0,
23
+ "label_smoothing": 0.1,
24
+ "use_augmentation": true,
25
+ "mixup_alpha": 0.2,
26
+ "cutmix_alpha": 1.0,
27
+ "use_alphamix": false,
28
+ "alphamix_alpha_range": [
29
+ 0.3,
30
+ 0.7
31
+ ],
32
+ "alphamix_spatial_ratio": 0.25,
33
+ "save_interval": 10,
34
+ "output_dir": "./checkpoints/cifar100_v2",
35
+ "resume_from": null,
36
+ "use_tensorboard": true,
37
+ "log_interval": 50,
38
+ "log_routing": true,
39
+ "push_to_hub": true,
40
+ "hub_repo_id": "AbstractPhil/geovit-david-beans",
41
+ "hub_private": false,
42
+ "device": "cuda"
43
+ }
weights/run_006_v2_1spine_8redundantscale_4x4patch_d768_4layer_topo_dropout_cutmix_20251202_175036/training_history.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 21.2852105262952,
4
+ 19.912000132829714,
5
+ 19.229191677386943,
6
+ 18.828460888984875,
7
+ 18.32927703368358,
8
+ 18.09517431992751,
9
+ 17.983504742842456,
10
+ 18.02426992563101,
11
+ 17.59568115625626
12
+ ],
13
+ "ce": [
14
+ 4.465974866426908,
15
+ 4.214199514878102,
16
+ 4.055962468416263,
17
+ 3.9652959634096194,
18
+ 3.8591288285377696,
19
+ 3.814647142092387,
20
+ 3.779529088582748,
21
+ 3.7895033976970574,
22
+ 3.7330856402715047
23
+ ],
24
+ "contrast": [
25
+ 4.5800990055768915,
26
+ 4.317624877049373,
27
+ 4.183825146234953,
28
+ 4.1069326437436615,
29
+ 3.999085405545357,
30
+ 3.9506665718861114,
31
+ 3.932388677352514,
32
+ 3.9445555087847586,
33
+ 3.8392515818277997
34
+ ],
35
+ "lr": [
36
+ 2.9999999999999997e-05,
37
+ 5.9999999999999995e-05,
38
+ 8.999999999999999e-05,
39
+ 0.00011999999999999999,
40
+ 0.00015,
41
+ 0.00017999999999999998,
42
+ 0.00020999999999999995,
43
+ 0.00023999999999999998,
44
+ 0.00026999999999999995
45
+ ]
46
+ }