Checkpoint epoch 10 - 30.86% acc

Browse files

Files changed (7) hide show

README.md +153 -0
config.json +30 -0
model.safetensors +3 -0
pytorch_model.bin +3 -0
tensorboard/events.out.tfevents.1764436353.86289bf9c07c.408448.0 +3 -0
training_config.json +37 -0
training_history.json +90 -0

README.md ADDED Viewed

	@@ -0,0 +1,153 @@

+---
+library_name: pytorch
+license: apache-2.0
+tags:
+  - vision
+  - image-classification
+  - geometric-deep-learning
+  - vit
+  - cantor-routing
+  - pentachoron
+  - multi-scale
+datasets:
+  - cifar100
+metrics:
+  - accuracy
+model-index:
+  - name: DavidBeans
+    results:
+      - task:
+          type: image-classification
+          name: Image Classification
+        dataset:
+          name: CIFAR-100
+          type: cifar100
+        metrics:
+          - type: accuracy
+            value: 30.86
+            name: Top-1 Accuracy
+---
+# 🫘💎 DavidBeans: Unified Vision-to-Crystal Architecture
+DavidBeans combines **ViT-Beans** (Cantor-routed sparse attention) with **David** (multi-scale crystal classification) into a unified geometric deep learning architecture.
+## Model Description
+This model implements several novel techniques:
+- **Hybrid Cantor Routing**: Combines fractal Cantor set distances with positional proximity for sparse attention patterns
+- **Pentachoron Experts**: 5-vertex simplex structure with Cayley-Menger geometric regularization
+- **Multi-Scale Crystal Projection**: Projects features to multiple representation scales with learned fusion
+- **Cross-Contrastive Learning**: Aligns patch-level features with crystal anchors
+## Architecture
+```
+Image [B, 3, 32, 32]
+       │
+       ▼
+┌─────────────────────────────────────────┐
+│  BEANS BACKBONE                         │
+│  ├─ Patch Embed → [64 patches, 512d]
+│  ├─ Hybrid Cantor Router (α=0.3)
+│  ├─ 4 × Attention Blocks (16 heads)
+│  └─ 4 × Pentachoron Expert Layers
+└─────────────────────────────────────────┘
+       │
+       ▼
+┌─────────────────────────────────────────┐
+│  DAVID HEAD                             │
+│  ├─ Multi-scale projection: [256, 384, 512, 640, 768]
+│  ├─ Per-scale Crystal Heads
+│  └─ Geometric Fusion (learned weights)
+└─────────────────────────────────────────┘
+       │
+       ▼
+    [100 classes]
+```
+## Training Details
+| Parameter | Value |
+|-----------|-------|
+| Dataset | CIFAR-100 |
+| Classes | 100 |
+| Image Size | 32×32 |
+| Patch Size | 4×4 |
+| Embedding Dim | 512 |
+| Layers | 4 |
+| Attention Heads | 16 |
+| Experts | 5 (pentachoron) |
+| Sparse Neighbors | k=32 |
+| Scales | [256, 384, 512, 640, 768] |
+| Epochs | 200 |
+| Batch Size | 128 |
+| Learning Rate | 0.0005 |
+| Weight Decay | 0.1 |
+| Mixup α | 0.3 |
+| CutMix α | 1.0 |
+| Label Smoothing | 0.1 |
+## Results
+| Metric | Value |
+|--------|-------|
+| **Top-1 Accuracy** | **30.86%** |
+## TensorBoard Logs
+Training logs are included in the `tensorboard/` directory. To view:
+```bash
+tensorboard --logdir tensorboard/
+```
+## Usage
+```python
+import torch
+from safetensors.torch import load_file
+from david_beans import DavidBeans, DavidBeansConfig
+# Load config
+config = DavidBeansConfig(
+    image_size=32,
+    patch_size=4,
+    dim=512,
+    num_layers=4,
+    num_heads=16,
+    num_experts=5,
+    k_neighbors=32,
+    cantor_weight=0.3,
+    scales=[256, 384, 512, 640, 768],
+    num_classes=100
+)
+# Create model and load weights
+model = DavidBeans(config)
+state_dict = load_file("model.safetensors")
+model.load_state_dict(state_dict)
+# Inference
+model.eval()
+with torch.no_grad():
+    output = model(images)
+    predictions = output['logits'].argmax(dim=-1)
+```
+## Citation
+```bibtex
+@misc{davidbeans2025,
+  author = {AbstractPhil},
+  title = {DavidBeans: Unified Vision-to-Crystal Architecture},
+  year = {2025},
+  publisher = {HuggingFace},
+  url = {https://huggingface.co/AbstractPhil/geovit-david-beans}
+}
+```
+## License
+Apache 2.0

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architecture": "DavidBeans",
+  "model_type": "david_beans",
+  "image_size": 32,
+  "patch_size": 4,
+  "in_channels": 3,
+  "dim": 512,
+  "num_layers": 4,
+  "num_heads": 16,
+  "num_experts": 5,
+  "k_neighbors": 32,
+  "cantor_weight": 0.3,
+  "mlp_ratio": 4.0,
+  "scales": [
+    256,
+    384,
+    512,
+    640,
+    768
+  ],
+  "num_classes": 100,
+  "use_belly": true,
+  "belly_expand": 2.0,
+  "contrast_temperature": 0.07,
+  "contrast_weight": 0.5,
+  "cayley_weight": 0.01,
+  "volume_floor": 0.0001,
+  "dropout": 0.15,
+  "pooling": "cls"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9289e3b29f245c20d06db0ca2540cebe3e00994b885667ded9be9c184864a051
+size 75434940

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a8f26949e19b83369f733a0e95895a131ee7759ef55baeba8ae21cd4b9ba98e
+size 75464207

tensorboard/events.out.tfevents.1764436353.86289bf9c07c.408448.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8dbcd87978d749ce5a74554a36e4f4828130ea2e43cee84e17e84e2f3e87ef30
+size 47755

training_config.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+  "run_name": "5expert_5scale",
+  "run_number": null,
+  "dataset": "cifar100",
+  "image_size": 32,
+  "batch_size": 128,
+  "num_workers": 4,
+  "epochs": 200,
+  "warmup_epochs": 20,
+  "learning_rate": 0.0005,
+  "weight_decay": 0.1,
+  "betas": [
+    0.9,
+    0.999
+  ],
+  "scheduler": "cosine",
+  "min_lr": 1e-06,
+  "ce_weight": 1.0,
+  "cayley_weight": 0.01,
+  "contrast_weight": 0.5,
+  "scale_ce_weight": 0.1,
+  "gradient_clip": 1.0,
+  "label_smoothing": 0.1,
+  "use_augmentation": true,
+  "mixup_alpha": 0.3,
+  "cutmix_alpha": 1.0,
+  "save_interval": 10,
+  "output_dir": "./checkpoints/cifar100",
+  "resume_from": null,
+  "use_tensorboard": true,
+  "log_interval": 50,
+  "push_to_hub": true,
+  "hub_repo_id": "AbstractPhil/geovit-david-beans",
+  "hub_private": false,
+  "hub_append_run": true,
+  "device": "cuda"
+}

training_history.json ADDED Viewed

	@@ -0,0 +1,90 @@

+{
+  "loss": [
+    9.14811771343916,
+    8.643415237084414,
+    8.412463459601769,
+    8.140621100939237,
+    7.911913275107359,
+    7.716725044984084,
+    7.681668006456816,
+    7.530832956998776,
+    7.425180815427732
+  ],
+  "ce": [
+    4.456742456631782,
+    4.246037974724403,
+    4.117695839588459,
+    3.994500890144935,
+    3.877463692273849,
+    3.7859254867602616,
+    3.7558192497644667,
+    3.655832992455898,
+    3.6306867299935757
+  ],
+  "geo": [
+    0.003279420401593551,
+    0.00022009814308078673,
+    0.00026651134823437014,
+    0.00033277957815862,
+    0.0003850076214010374,
+    0.0004454412114006491,
+    0.0005292222191854261,
+    0.0005809825301044979,
+    0.0006250078629445619
+  ],
+  "contrast": [
+    4.855305493183625,
+    4.490192336302537,
+    4.363886697475727,
+    4.2106811761856076,
+    4.097067264410166,
+    3.9898384222617516,
+    3.977252336648794,
+    3.9234435209861167,
+    3.84050482297555
+  ],
+  "expert_vol": [
+    9.612834081706704e-06,
+    9.694315777335596e-06,
+    9.655668553676104e-06,
+    9.613265119230327e-06,
+    9.574969063135377e-06,
+    9.548803586184452e-06,
+    9.560007872959175e-06,
+    9.609137787964964e-06,
+    9.700101159418968e-06
+  ],
+  "expert_collapse": [
+    9.03871649568781e-05,
+    9.030568329856181e-05,
+    9.034433044293202e-05,
+    9.038673395899913e-05,
+    9.042502984252328e-05,
+    9.045119536145089e-05,
+    9.043999131954012e-05,
+    9.03908609567831e-05,
+    9.029989781153676e-05
+  ],
+  "expert_edge": [
+    0.00637806647577329,
+    0.0002595849193778868,
+    0.00035233403524706286,
+    0.0004847856876903023,
+    0.000589165182184213,
+    0.0007099800328992737,
+    0.0008775644565526492,
+    0.0009811833403849354,
+    0.0010694159361987541
+  ],
+  "lr": [
+    2.5e-05,
+    5e-05,
+    7.500000000000001e-05,
+    0.0001,
+    0.000125,
+    0.00015000000000000001,
+    0.00017500000000000003,
+    0.0002,
+    0.00022500000000000002
+  ]
+}