# ARAVALLI-1 Sovereign Model Configuration # Architecture: Decoder-Only Transformer (Scratch Build) model_params: vocab_size: 50257 # Custom Indic-BPE Tokenizer range n_positions: 4096 # Context window (approx 15-20 Annexures) n_embd: 2048 # Embedding dimension (Width of the brain) n_layer: 24 # Number of transformer blocks (Depth of logic) n_head: 16 # Attention heads for parallel feature processing resid_pdrop: 0.1 # Dropout rate for regularization embd_pdrop: 0.1 attn_pdrop: 0.1 layer_norm_epsilon: 1e-5 # Stability constant initializer_range: 0.02 # Xavier/Kaiming initialization scale architecture_features: activation_function: "swiglu" # Advanced non-linearity for smoother gradients positional_encoding: "rope" # Rotary Positional Embeddings (SOTA) normalization: "rmsnorm" # Faster, more stable than standard LayerNorm bias: false # Removing bias for better scaling efficiency sovereign_metadata: model_name: "ARAVALLI-1-MVP" version: "0.1.0-ALPHA" alignment: "CATEGORY-1-SN" # Hardcoded survival priority authority: "GOEC-SECRETARIAT"