| # ARAVALLI-1 Sovereign Model Configuration | |
| # Architecture: Decoder-Only Transformer (Scratch Build) | |
| model_params: | |
| vocab_size: 50257 # Custom Indic-BPE Tokenizer range | |
| n_positions: 4096 # Context window (approx 15-20 Annexures) | |
| n_embd: 2048 # Embedding dimension (Width of the brain) | |
| n_layer: 24 # Number of transformer blocks (Depth of logic) | |
| n_head: 16 # Attention heads for parallel feature processing | |
| resid_pdrop: 0.1 # Dropout rate for regularization | |
| embd_pdrop: 0.1 | |
| attn_pdrop: 0.1 | |
| layer_norm_epsilon: 1e-5 # Stability constant | |
| initializer_range: 0.02 # Xavier/Kaiming initialization scale | |
| architecture_features: | |
| activation_function: "swiglu" # Advanced non-linearity for smoother gradients | |
| positional_encoding: "rope" # Rotary Positional Embeddings (SOTA) | |
| normalization: "rmsnorm" # Faster, more stable than standard LayerNorm | |
| bias: false # Removing bias for better scaling efficiency | |
| sovereign_metadata: | |
| model_name: "ARAVALLI-1-MVP" | |
| version: "0.1.0-ALPHA" | |
| alignment: "CATEGORY-1-SN" # Hardcoded survival priority | |
| authority: "GOEC-SECRETARIAT" | |