from dataclasses import dataclass @dataclass class ModelConfig: # Backbone vocab_size: int = 8192 d_model: int = 256 n_heads: int = 4 n_layers: int = 6 d_ff: int = 512 max_seq_len: int = 512 dropout: float = 0.1 # Attention Residuals use_attn_res: bool = True # Branching use_branches: bool = True n_branches: int = 2 diversity_weight: float = 0.1 branch_diversity_target: float = 0.08 # Verifier use_verifier: bool = True verifier_entropy_weight: float = 0.4 verifier_agreement_weight: float = 0.4 verifier_consistency_weight: float = 0.2 verifier_temperature: float = 4.0 # Plastic Layer use_plastic: bool = True plastic_lr: float = 1e-4 plastic_decay: float = 0.99 plastic_l2_weight: float = 0.01 plastic_hidden: int = 64 plastic_noise_scale: float = 0.05 plastic_mask_ratio: float = 0.15 plastic_train_updates: int = 1 # Equilibrium / Routing (Phase 0) eq_momentum: float = 0.1 eq_warmup_steps: int = 50 router_lr: float = 3e-5 router_warmup_steps: int = 500 router_entropy_weight: float = 0.01 route_temperature: float = 8.0 route_threshold_momentum: float = 0.2 route_threshold_offset_scale: float = 0.2 route_forward_target: float = 0.55 route_branch_target: float = 0.25 route_backward_target: float = 0.15 route_plastic_target: float = 0.05 # Anchor V1 use_fog_flow: bool = False fog_task_profile: str = "auto" fog_compare_ratio: float = 0.25 fog_memory_ratio: float = 0.75 fog_expand_ratio: float = 2.0 fog_gate_ratio: float = 0.125 anchor_prior_weight: float = 1.0 anchor_runtime_weight: float = 1.0 anchor_threshold: float = 0.65 anchor_domain_mode: str = "auto" anchor_max_candidates: int = 6 anchor_ttl_init: float = 4.0 anchor_support_decay: float = 0.9 anchor_candidate_promote_threshold: float = 0.55 anchor_confirm_threshold: float = 0.7 anchor_revision_threshold: float = 0.35 anchor_contradiction_threshold: float = 0.65 anchor_dead_end_threshold: float = 0.85 anchor_arbiter_beta: float = 8.0 anchor_arbiter_revise_threshold: float = 0.45 anchor_revision_temperature: float = 1.0 anchor_viability_alpha: float = 2.0 anchor_viability_beta: float = 2.5 anchor_age_gamma: float = 0.25 anchor_descendant_mass_delta: float = 0.75 anchor_descendant_coherence_eta: float = 0.75 anchor_detector_alignment_weight: float = 0.05 anchor_context_stability_weight: float = 0.01 anchor_dependency_threshold: float = 0.55 anchor_dependency_confirm_slope: float = 0.10 anchor_dependency_temporal_window: float = 16.0 anchor_dependency_similarity_weight: float = 0.55 anchor_dependency_temporal_weight: float = 0.20 anchor_dependency_support_weight: float = 0.15 anchor_dependency_viability_weight: float = 0.10 anchor_dependency_max_predecessors: int = 4 anchor_dependency_counterfactual_top_edges: int = 0 anchor_dependency_future_window: int = 16 anchor_context_min_viability: float = 0.30 anchor_use_future_proposal_head: bool = True anchor_future_proposal_trigger: float = 0.35 anchor_future_proposal_hidden: int = 64 anchor_future_proposal_threshold: float = 0.58 anchor_future_proposal_temperature: float = 0.75 anchor_future_proposal_horizon_scale: float = 4.0 anchor_future_proposal_span_scale: float = 4.0 anchor_future_proposal_max_horizon: int = 32 anchor_future_proposal_max_windows: int = 48 anchor_future_proposal_topk: int = 4 anchor_future_proposal_residual_scale: float = 0.10 anchor_proposal_score_weight: float = 0.05 anchor_proposal_margin_weight: float = 0.05 anchor_proposal_alignment_weight: float = 0.02 anchor_proposal_counterfactual_weight: float = 0.05 anchor_proposal_margin_target: float = 0.05 anchor_proposal_target_temperature: float = 0.15 anchor_proposal_counterfactual_margin: float = 0.02 anchor_proposal_counterfactual_window: int = 4 anchor_use_proposal_rollout: bool = True anchor_proposal_rollout_steps: int = 4 anchor_proposal_rollout_hidden: int = 64 anchor_proposal_rollout_weight: float = 0.05 anchor_proposal_rollout_margin: float = 0.02 anchor_proposal_rollout_residual_scale: float = 0.15 anchor_proposal_rollout_pressure_trigger: float = 0.45 anchor_proposal_rollout_score_trigger: float = 0.90 # Training learning_rate: float = 3e-4 weight_decay: float = 0.01 warmup_steps: int = 100 max_steps: int = 5000 batch_size: int = 32 eval_interval: int = 100 gradient_clip: float = 1.0 # Ablation presets BASELINE_0 = ModelConfig( use_attn_res=False, use_branches=False, use_verifier=False, use_plastic=False, ) BASELINE_1_ATTNRES = ModelConfig( use_attn_res=True, use_branches=False, use_verifier=False, use_plastic=False, ) BASELINE_2_BRANCHES = ModelConfig( use_attn_res=True, use_branches=True, use_verifier=True, use_plastic=False, ) BASELINE_3_PLASTIC = ModelConfig( use_attn_res=True, use_branches=False, use_verifier=False, use_plastic=True, ) FULL_MODEL = ModelConfig( use_attn_res=True, use_branches=True, use_verifier=True, use_plastic=True, ) TOY_CONFIG = ModelConfig( vocab_size=512, d_model=64, n_heads=2, n_layers=3, d_ff=128, max_seq_len=128, plastic_hidden=16, ) SCALEUP_CONFIG = ModelConfig( vocab_size=512, d_model=512, n_heads=8, n_layers=4, d_ff=1024, max_seq_len=128, plastic_hidden=128, anchor_threshold=0.2, anchor_ttl_init=4.0, anchor_dead_end_threshold=0.5 ) PRESETS = { "baseline-0": BASELINE_0, "baseline-1-attnres": BASELINE_1_ATTNRES, "baseline-2-branches": BASELINE_2_BRANCHES, "baseline-3-plastic": BASELINE_3_PLASTIC, "full": FULL_MODEL, "toy": TOY_CONFIG, "scaleup": SCALEUP_CONFIG, }