time-aware-rag / scripts /colab_setup.sh
manojarulmurugan's picture
Add full pipeline code + precomputed demo_data
46b9b58 verified
#!/bin/bash
# Colab-Specific Setup for Time-Aware RAG
# This script is optimized for Google Colab environment
echo "=========================================="
echo "TIME-AWARE RAG SETUP FOR GOOGLE COLAB"
echo "=========================================="
# Set working directory
cd /content
# Clone or ensure we have the project
if [ ! -d "TimeAwareRAG_Final" ]; then
echo "Project directory not found. Please ensure the files are uploaded to /content/TimeAwareRAG_Final"
exit 1
fi
cd TimeAwareRAG_Final
# Install dependencies directly with pip3
echo "Installing required packages for Colab..."
pip3 install --upgrade pip
# Install core ML packages
pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
# Install transformers and related packages
pip3 install transformers==4.36.0
pip3 install datasets==2.14.6
pip3 install accelerate==0.24.1
pip3 install sentence-transformers==2.2.2
# Install data processing packages
pip3 install pandas numpy tqdm jsonlines
# Install evaluation packages
pip3 install scikit-learn nltk rouge-score
# Install retrieval packages
pip3 install faiss-cpu rank-bm25
# Install visualization packages
pip3 install matplotlib seaborn
# Install utilities
pip3 install click pyyaml
# Download NLTK data
python3 -c "
import nltk
try:
nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
print('βœ“ NLTK data downloaded')
except:
print('⚠ NLTK download failed')
"
# Check GPU availability
python3 -c "
import torch
print(f'PyTorch version: {torch.__version__}')
if torch.cuda.is_available():
print(f'βœ“ CUDA available: {torch.cuda.get_device_name(0)}')
print(f'βœ“ CUDA version: {torch.version.cuda}')
print(f'βœ“ GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB')
else:
print('⚠ CUDA not available, using CPU')
"
# Create necessary directories
echo "Creating project directories..."
mkdir -p data/{chroniclingqa,temprageval,generated_questions,atlas_2021}
mkdir -p models/{cache,time_aware_contriever}
mkdir -p outputs/{chroniclingqa,temprageval,mrag}
mkdir -p logs
# Set permissions
chmod +x scripts/*.sh
echo ""
echo "βœ“ Colab setup completed!"
echo "βœ“ Ready to run: !bash scripts/run_complete_pipeline.sh"
echo ""
echo "πŸš€ To start the pipeline, run:"
echo " !bash /content/TimeAwareRAG_Final/scripts/run_complete_pipeline.sh"