#!/bin/bash # ============================================================================ # VIVEKANANDA AI - AUTOMATED SETUP SCRIPT # Sets up entire project structure and dependencies # ============================================================================ set -e # Exit on error echo "========================================================================" echo "🕉️ VIVEKANANDA AI - PROJECT SETUP" echo "========================================================================" # Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' NC='\033[0m' # No Color # ============================================================================ # STEP 1: CREATE DIRECTORY STRUCTURE # ============================================================================ echo "" echo "${GREEN}[STEP 1/7] Creating directory structure...${NC}" mkdir -p data/raw/complete_works mkdir -p data/raw/individual_books mkdir -p data/processed mkdir -p data/extracted_text mkdir -p vectorstore mkdir -p models/base mkdir -p models/fine_tuned mkdir -p scripts mkdir -p notebooks mkdir -p outputs/logs mkdir -p outputs/results echo "✓ Directory structure created" # ============================================================================ # STEP 2: CREATE PYTHON VIRTUAL ENVIRONMENT # ============================================================================ echo "" echo "${GREEN}[STEP 2/7] Creating Python virtual environment...${NC}" if [ ! -d "venv" ]; then python3 -m venv venv echo "✓ Virtual environment created" else echo "✓ Virtual environment already exists" fi # Activate virtual environment source venv/bin/activate # ============================================================================ # STEP 3: UPGRADE PIP # ============================================================================ echo "" echo "${GREEN}[STEP 3/7] Upgrading pip...${NC}" pip install --upgrade pip setuptools wheel echo "✓ Pip upgraded" # ============================================================================ # STEP 4: INSTALL DEPENDENCIES # ============================================================================ echo "" echo "${GREEN}[STEP 4/7] Installing dependencies...${NC}" if [ -f "requirements.txt" ]; then pip install -r requirements.txt echo "✓ Dependencies installed" else echo "${RED}✗ requirements.txt not found!${NC}" exit 1 fi # ============================================================================ # STEP 5: DOWNLOAD SPACY MODEL # ============================================================================ echo "" echo "${GREEN}[STEP 5/7] Downloading spaCy model...${NC}" python -m spacy download en_core_web_sm echo "✓ spaCy model downloaded" # ============================================================================ # STEP 6: DOWNLOAD NLTK DATA # ============================================================================ echo "" echo "${GREEN}[STEP 6/7] Downloading NLTK data...${NC}" python -c " import nltk nltk.download('punkt', quiet=True) nltk.download('stopwords', quiet=True) print('✓ NLTK data downloaded') " # ============================================================================ # STEP 7: VERIFY INSTALLATION # ============================================================================ echo "" echo "${GREEN}[STEP 7/7] Verifying installation...${NC}" python -c " import sys import torch import spacy import nltk from langchain import __version__ as lc_version print(f'Python: {sys.version.split()[0]}') print(f'PyTorch: {torch.__version__}') print(f'spaCy: {spacy.__version__}') print(f'NLTK: {nltk.__version__}') print(f'LangChain: {lc_version}') # Check device if torch.backends.mps.is_available(): print('Device: MPS (Apple Silicon) ✓') elif torch.cuda.is_available(): print('Device: CUDA ✓') else: print('Device: CPU') " echo "" echo "✓ Installation verified" # ============================================================================ # FINAL INSTRUCTIONS # ============================================================================ echo "" echo "========================================================================" echo "${GREEN}🎉 SETUP COMPLETE!${NC}" echo "========================================================================" echo "" echo "Next steps:" echo "" echo "1. ${YELLOW}Add your data:${NC}" echo " - PDFs → data/raw/" echo " - JSON dataset → data/processed/" echo " - Mistral model → models/base/" echo "" echo "2. ${YELLOW}Configure:${NC}" echo " - Edit config.yaml to customize settings" echo "" echo "3. ${YELLOW}Create embeddings:${NC}" echo " source venv/bin/activate" echo " python scripts/01_embed_data.py" echo "" echo "4. ${YELLOW}Test RAG:${NC}" echo " python scripts/02_query_rag.py" echo "" echo "5. ${YELLOW}Run Streamlit:${NC}" echo " streamlit run app.py" echo "" echo "========================================================================"