| #!/bin/bash |
|
|
| |
| |
| |
| |
|
|
| set -e |
|
|
| echo "========================================================================" |
| echo "ποΈ VIVEKANANDA AI - PROJECT SETUP" |
| echo "========================================================================" |
|
|
| |
| RED='\033[0;31m' |
| GREEN='\033[0;32m' |
| YELLOW='\033[1;33m' |
| NC='\033[0m' |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 1/7] Creating directory structure...${NC}" |
|
|
| mkdir -p data/raw/complete_works |
| mkdir -p data/raw/individual_books |
| mkdir -p data/processed |
| mkdir -p data/extracted_text |
| mkdir -p vectorstore |
| mkdir -p models/base |
| mkdir -p models/fine_tuned |
| mkdir -p scripts |
| mkdir -p notebooks |
| mkdir -p outputs/logs |
| mkdir -p outputs/results |
|
|
| echo "β Directory structure created" |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 2/7] Creating Python virtual environment...${NC}" |
|
|
| if [ ! -d "venv" ]; then |
| python3 -m venv venv |
| echo "β Virtual environment created" |
| else |
| echo "β Virtual environment already exists" |
| fi |
|
|
| |
| source venv/bin/activate |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 3/7] Upgrading pip...${NC}" |
|
|
| pip install --upgrade pip setuptools wheel |
| echo "β Pip upgraded" |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 4/7] Installing dependencies...${NC}" |
|
|
| if [ -f "requirements.txt" ]; then |
| pip install -r requirements.txt |
| echo "β Dependencies installed" |
| else |
| echo "${RED}β requirements.txt not found!${NC}" |
| exit 1 |
| fi |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 5/7] Downloading spaCy model...${NC}" |
|
|
| python -m spacy download en_core_web_sm |
| echo "β spaCy model downloaded" |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 6/7] Downloading NLTK data...${NC}" |
|
|
| python -c " |
| import nltk |
| nltk.download('punkt', quiet=True) |
| nltk.download('stopwords', quiet=True) |
| print('β NLTK data downloaded') |
| " |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "${GREEN}[STEP 7/7] Verifying installation...${NC}" |
|
|
| python -c " |
| import sys |
| import torch |
| import spacy |
| import nltk |
| from langchain import __version__ as lc_version |
| |
| print(f'Python: {sys.version.split()[0]}') |
| print(f'PyTorch: {torch.__version__}') |
| print(f'spaCy: {spacy.__version__}') |
| print(f'NLTK: {nltk.__version__}') |
| print(f'LangChain: {lc_version}') |
| |
| # Check device |
| if torch.backends.mps.is_available(): |
| print('Device: MPS (Apple Silicon) β') |
| elif torch.cuda.is_available(): |
| print('Device: CUDA β') |
| else: |
| print('Device: CPU') |
| " |
|
|
| echo "" |
| echo "β Installation verified" |
|
|
| |
| |
| |
|
|
| echo "" |
| echo "========================================================================" |
| echo "${GREEN}π SETUP COMPLETE!${NC}" |
| echo "========================================================================" |
| echo "" |
| echo "Next steps:" |
| echo "" |
| echo "1. ${YELLOW}Add your data:${NC}" |
| echo " - PDFs β data/raw/" |
| echo " - JSON dataset β data/processed/" |
| echo " - Mistral model β models/base/" |
| echo "" |
| echo "2. ${YELLOW}Configure:${NC}" |
| echo " - Edit config.yaml to customize settings" |
| echo "" |
| echo "3. ${YELLOW}Create embeddings:${NC}" |
| echo " source venv/bin/activate" |
| echo " python scripts/01_embed_data.py" |
| echo "" |
| echo "4. ${YELLOW}Test RAG:${NC}" |
| echo " python scripts/02_query_rag.py" |
| echo "" |
| echo "5. ${YELLOW}Run Streamlit:${NC}" |
| echo " streamlit run app.py" |
| echo "" |
| echo "========================================================================" |