Create config/model_config.yaml
Browse files- config/model_config.yaml +26 -0
config/model_config.yaml
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ARAVALLI-1 Sovereign Model Configuration
|
| 2 |
+
# Architecture: Decoder-Only Transformer (Scratch Build)
|
| 3 |
+
|
| 4 |
+
model_params:
|
| 5 |
+
vocab_size: 50257 # Custom Indic-BPE Tokenizer range
|
| 6 |
+
n_positions: 4096 # Context window (approx 15-20 Annexures)
|
| 7 |
+
n_embd: 2048 # Embedding dimension (Width of the brain)
|
| 8 |
+
n_layer: 24 # Number of transformer blocks (Depth of logic)
|
| 9 |
+
n_head: 16 # Attention heads for parallel feature processing
|
| 10 |
+
resid_pdrop: 0.1 # Dropout rate for regularization
|
| 11 |
+
embd_pdrop: 0.1
|
| 12 |
+
attn_pdrop: 0.1
|
| 13 |
+
layer_norm_epsilon: 1e-5 # Stability constant
|
| 14 |
+
initializer_range: 0.02 # Xavier/Kaiming initialization scale
|
| 15 |
+
|
| 16 |
+
architecture_features:
|
| 17 |
+
activation_function: "swiglu" # Advanced non-linearity for smoother gradients
|
| 18 |
+
positional_encoding: "rope" # Rotary Positional Embeddings (SOTA)
|
| 19 |
+
normalization: "rmsnorm" # Faster, more stable than standard LayerNorm
|
| 20 |
+
bias: false # Removing bias for better scaling efficiency
|
| 21 |
+
|
| 22 |
+
sovereign_metadata:
|
| 23 |
+
model_name: "ARAVALLI-1-MVP"
|
| 24 |
+
version: "0.1.0-ALPHA"
|
| 25 |
+
alignment: "CATEGORY-1-SN" # Hardcoded survival priority
|
| 26 |
+
authority: "GOEC-SECRETARIAT"
|