VivekanandaAI / setup.sh
jyotirmoy05's picture
Upload 8 files
e889148 verified
#!/bin/bash
# ============================================================================
# VIVEKANANDA AI - AUTOMATED SETUP SCRIPT
# Sets up entire project structure and dependencies
# ============================================================================
set -e # Exit on error
echo "========================================================================"
echo "πŸ•‰οΈ VIVEKANANDA AI - PROJECT SETUP"
echo "========================================================================"
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# ============================================================================
# STEP 1: CREATE DIRECTORY STRUCTURE
# ============================================================================
echo ""
echo "${GREEN}[STEP 1/7] Creating directory structure...${NC}"
mkdir -p data/raw/complete_works
mkdir -p data/raw/individual_books
mkdir -p data/processed
mkdir -p data/extracted_text
mkdir -p vectorstore
mkdir -p models/base
mkdir -p models/fine_tuned
mkdir -p scripts
mkdir -p notebooks
mkdir -p outputs/logs
mkdir -p outputs/results
echo "βœ“ Directory structure created"
# ============================================================================
# STEP 2: CREATE PYTHON VIRTUAL ENVIRONMENT
# ============================================================================
echo ""
echo "${GREEN}[STEP 2/7] Creating Python virtual environment...${NC}"
if [ ! -d "venv" ]; then
python3 -m venv venv
echo "βœ“ Virtual environment created"
else
echo "βœ“ Virtual environment already exists"
fi
# Activate virtual environment
source venv/bin/activate
# ============================================================================
# STEP 3: UPGRADE PIP
# ============================================================================
echo ""
echo "${GREEN}[STEP 3/7] Upgrading pip...${NC}"
pip install --upgrade pip setuptools wheel
echo "βœ“ Pip upgraded"
# ============================================================================
# STEP 4: INSTALL DEPENDENCIES
# ============================================================================
echo ""
echo "${GREEN}[STEP 4/7] Installing dependencies...${NC}"
if [ -f "requirements.txt" ]; then
pip install -r requirements.txt
echo "βœ“ Dependencies installed"
else
echo "${RED}βœ— requirements.txt not found!${NC}"
exit 1
fi
# ============================================================================
# STEP 5: DOWNLOAD SPACY MODEL
# ============================================================================
echo ""
echo "${GREEN}[STEP 5/7] Downloading spaCy model...${NC}"
python -m spacy download en_core_web_sm
echo "βœ“ spaCy model downloaded"
# ============================================================================
# STEP 6: DOWNLOAD NLTK DATA
# ============================================================================
echo ""
echo "${GREEN}[STEP 6/7] Downloading NLTK data...${NC}"
python -c "
import nltk
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
print('βœ“ NLTK data downloaded')
"
# ============================================================================
# STEP 7: VERIFY INSTALLATION
# ============================================================================
echo ""
echo "${GREEN}[STEP 7/7] Verifying installation...${NC}"
python -c "
import sys
import torch
import spacy
import nltk
from langchain import __version__ as lc_version
print(f'Python: {sys.version.split()[0]}')
print(f'PyTorch: {torch.__version__}')
print(f'spaCy: {spacy.__version__}')
print(f'NLTK: {nltk.__version__}')
print(f'LangChain: {lc_version}')
# Check device
if torch.backends.mps.is_available():
print('Device: MPS (Apple Silicon) βœ“')
elif torch.cuda.is_available():
print('Device: CUDA βœ“')
else:
print('Device: CPU')
"
echo ""
echo "βœ“ Installation verified"
# ============================================================================
# FINAL INSTRUCTIONS
# ============================================================================
echo ""
echo "========================================================================"
echo "${GREEN}πŸŽ‰ SETUP COMPLETE!${NC}"
echo "========================================================================"
echo ""
echo "Next steps:"
echo ""
echo "1. ${YELLOW}Add your data:${NC}"
echo " - PDFs β†’ data/raw/"
echo " - JSON dataset β†’ data/processed/"
echo " - Mistral model β†’ models/base/"
echo ""
echo "2. ${YELLOW}Configure:${NC}"
echo " - Edit config.yaml to customize settings"
echo ""
echo "3. ${YELLOW}Create embeddings:${NC}"
echo " source venv/bin/activate"
echo " python scripts/01_embed_data.py"
echo ""
echo "4. ${YELLOW}Test RAG:${NC}"
echo " python scripts/02_query_rag.py"
echo ""
echo "5. ${YELLOW}Run Streamlit:${NC}"
echo " streamlit run app.py"
echo ""
echo "========================================================================"