| { | |
| "num_classes": 100, | |
| "img_size": 32, | |
| "patch_size": 4, | |
| "visual_dim": 512, | |
| "geom_dim": 256, | |
| "k_simplex": 4, | |
| "depth": 8, | |
| "num_heads": 8, | |
| "mlp_ratio": 4.0, | |
| "dropout": 0.0, | |
| "num_geom_tokens": 8, | |
| "pe_levels": 12, | |
| "pe_features_per_level": 2, | |
| "pe_smooth_tau": 0.25, | |
| "simplex_init_method": "regular", | |
| "simplex_init_scale": 1.0, | |
| "batch_size": 512, | |
| "num_epochs": 150, | |
| "learning_rate": 0.0001, | |
| "weight_decay": 0.005, | |
| "warmup_epochs": 10, | |
| "task_loss_weight": 0.5, | |
| "flow_loss_weight": 1.5, | |
| "coherence_loss_weight": 0.5, | |
| "multiscale_loss_weight": 0.3, | |
| "use_adaptive_augmentation": false, | |
| "overfit_threshold": 0.05, | |
| "augmentation_cooldown_epochs": 5, | |
| "min_accuracy_for_augmentation": 0.45, | |
| "mixup_alpha": 0.2, | |
| "cutmix_alpha": 1.0, | |
| "use_cutmix_schedule": true, | |
| "cutmix_schedule": [ | |
| [ | |
| 0, | |
| 0.2 | |
| ], | |
| [ | |
| 20, | |
| 0.5 | |
| ], | |
| [ | |
| 40, | |
| 1.0 | |
| ], | |
| [ | |
| 60, | |
| 1.2 | |
| ], | |
| [ | |
| 80, | |
| 1.5 | |
| ], | |
| [ | |
| 100, | |
| 1.8 | |
| ], | |
| [ | |
| 120, | |
| 2.0 | |
| ] | |
| ], | |
| "device": "cuda", | |
| "num_workers": 4, | |
| "pin_memory": true, | |
| "save_dir": "./checkpoints_dualstream", | |
| "save_every": 10, | |
| "use_safetensors": true, | |
| "timestamp_dirs": true, | |
| "push_to_hub": true, | |
| "hub_model_id": "AbstractPhil/vit-beatrix-dualstream", | |
| "hub_model_name": "beatrix-trainC-chaos-native", | |
| "hub_upload_best_only": true, | |
| "hub_upload_every_n_epochs": 10, | |
| "use_tensorboard": true, | |
| "log_dir": "./logs_dualstream", | |
| "log_every": 50, | |
| "monitor_stream_health": true, | |
| "log_stream_norms": true | |
| } |