AbstractPhil commited on
Commit
cf0786a
·
verified ·
1 Parent(s): b366e79

Epoch 10 - 24.20% acc

Browse files
weights/run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437/README.md ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Run: run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437
2
+
3
+ ## Results
4
+ - **Best Accuracy**: 24.20%
5
+ - **Dataset**: cifar100
6
+ - **Epochs**: 200
7
+ - **Model Version**: V2
8
+
9
+ ## Model Config
10
+ | Parameter | Value |
11
+ |-----------|-------|
12
+ | Dim | 768 |
13
+ | Layers | 4 |
14
+ | Heads | 12 |
15
+ | Scales | [64, 128, 192, 256, 320, 384, 448, 512] |
16
+
17
+ ## Wormhole Routing (V2)
18
+ | Parameter | Value |
19
+ |-----------|-------|
20
+ | Mode | hybrid |
21
+ | Wormholes/Position | 16 |
22
+ | Temperature | 0.05 |
23
+ | Tiles | 24 |
24
+ | Tile Wormholes | 6 |
25
+
26
+
27
+ ## Training Config
28
+ | Parameter | Value |
29
+ |-----------|-------|
30
+ | Learning Rate | 0.0005 |
31
+ | Weight Decay | 0.1 |
32
+ | Batch Size | 128 |
33
+ | CE Weight | 1.0 |
34
+ | Contrast Weight | 0.5 |
35
+
36
+ ## Key Findings Applied
37
+ - Routing learns from task pressure (no auxiliary routing losses)
38
+ - Gradients verified to flow through router
39
+ - Cross-contrastive aligns patch↔scale features
weights/run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437/best.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c9771cfb701fc5f39b9ca349a2feb3caafce4cebfa158435d659229e1275607
3
+ size 156046704
weights/run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437/config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architecture": "DavidBeans_V2",
3
+ "model_type": "david_beans_v2",
4
+ "image_size": 32,
5
+ "patch_size": 4,
6
+ "in_channels": 3,
7
+ "dim": 768,
8
+ "num_layers": 4,
9
+ "num_heads": 12,
10
+ "mlp_ratio": 4.0,
11
+ "num_wormholes": 16,
12
+ "wormhole_temperature": 0.05,
13
+ "wormhole_mode": "hybrid",
14
+ "cantor_weight": 0.3,
15
+ "num_tiles": 24,
16
+ "tile_wormholes": 6,
17
+ "scales": [
18
+ 64,
19
+ 128,
20
+ 192,
21
+ 256,
22
+ 320,
23
+ 384,
24
+ 448,
25
+ 512
26
+ ],
27
+ "num_classes": 100,
28
+ "use_belly": true,
29
+ "belly_expand": 2.0,
30
+ "contrast_temperature": 0.07,
31
+ "contrast_weight": 0.5,
32
+ "routing_weight": 0.0,
33
+ "dropout": 0.15,
34
+ "pooling": "cls"
35
+ }
weights/run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437/tensorboard/events.out.tfevents.1764478481.c46210e51d85.42116.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad7f75a3b1839c38ef620074e1c157e012829646e5c8b1b9699e3099afbdd71a
3
+ size 38393
weights/run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437/training_config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "run_name": "16patch_4tilewormholes_d768_4layer",
3
+ "run_number": null,
4
+ "model_version": 2,
5
+ "dataset": "cifar100",
6
+ "image_size": 32,
7
+ "batch_size": 128,
8
+ "num_workers": 4,
9
+ "epochs": 200,
10
+ "warmup_epochs": 15,
11
+ "learning_rate": 0.0005,
12
+ "weight_decay": 0.1,
13
+ "betas": [
14
+ 0.9,
15
+ 0.999
16
+ ],
17
+ "scheduler": "cosine",
18
+ "min_lr": 1e-06,
19
+ "ce_weight": 1.0,
20
+ "contrast_weight": 0.5,
21
+ "gradient_clip": 1.0,
22
+ "label_smoothing": 0.15,
23
+ "use_augmentation": true,
24
+ "mixup_alpha": 0.3,
25
+ "cutmix_alpha": 1.0,
26
+ "save_interval": 10,
27
+ "output_dir": "./checkpoints/cifar100_v2",
28
+ "resume_from": null,
29
+ "use_tensorboard": true,
30
+ "log_interval": 50,
31
+ "log_routing": true,
32
+ "push_to_hub": true,
33
+ "hub_repo_id": "AbstractPhil/geovit-david-beans",
34
+ "hub_private": false,
35
+ "device": "cuda"
36
+ }
weights/run_002_v2_16patch_4tilewormholes_d768_4layer_20251130_045437/training_history.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "loss": [
3
+ 10.382154626112717,
4
+ 9.826913677117764,
5
+ 9.627644357925806,
6
+ 9.398329890079987,
7
+ 9.207778887870985,
8
+ 9.057374726808987,
9
+ 9.029630415256207,
10
+ 8.938092751380724,
11
+ 8.874607647382296
12
+ ],
13
+ "ce": [
14
+ 4.441570006884062,
15
+ 4.226625170463171,
16
+ 4.139200558417882,
17
+ 4.05009743861663,
18
+ 3.9672310199493017,
19
+ 3.8850555597207483,
20
+ 3.8780411903674787,
21
+ 3.8434145303872915,
22
+ 3.817438381146162
23
+ ],
24
+ "contrast": [
25
+ 4.56968045601478,
26
+ 4.307914226483076,
27
+ 4.221879853346409,
28
+ 4.114024965579693,
29
+ 4.031190662506299,
30
+ 3.9787070555564683,
31
+ 3.9627609515801456,
32
+ 3.9189832241107254,
33
+ 3.8901301915829
34
+ ],
35
+ "lr": [
36
+ 3.3333333333333335e-05,
37
+ 6.666666666666667e-05,
38
+ 0.0001,
39
+ 0.00013333333333333334,
40
+ 0.00016666666666666666,
41
+ 0.0002,
42
+ 0.00023333333333333333,
43
+ 0.0002666666666666667,
44
+ 0.00030000000000000003
45
+ ]
46
+ }