| # ARAVALLI-1 Sovereign Pre-training Pipeline | |
| # Status: CATEGORY 1-SN | Authority: GOEC-Secretariat | |
| echo "----------------------------------------------------------------" | |
| echo "INITIATING ARAVALLI-1 BIRTH CYCLE: GOEC SOVEREIGN AI" | |
| echo "----------------------------------------------------------------" | |
| # 1. Ingestion Phase | |
| echo "[STEP 1/4] Ingesting Global & Indigenous Data..." | |
| python3 data/scripts/scraper.py | |
| # 2. Refinement Phase | |
| echo "[STEP 2/4] Refining & Hashing Sovereign Corpus..." | |
| python3 data/scripts/cleaner.py | |
| # 3. Linguistic Evolution | |
| echo "[STEP 3/4] Training Sovereign Tokenizer (Indic-BPE)..." | |
| python3 data/tokenizer_train.py | |
| # 4. Neural Forging | |
| echo "[STEP 4/4] Commencing Scratch Pre-training (1.2B Parameters)..." | |
| # Setting environment for multi-GPU training if available | |
| export CUDA_VISIBLE_DEVICES=0,1,2,3 | |
| python3 src/training/trainer.py --config config/model_config.yaml | |
| echo "----------------------------------------------------------------" | |
| echo "SOVEREIGN BIRTH CYCLE COMPLETE. MODEL SEALING INITIATED." | |
| echo "----------------------------------------------------------------" | |