Spaces:
Running
Running
| # Step 1: Environment Setup and Dependency Installation | |
| echo "Setting up Python environment and installing dependencies..." | |
| set -e | |
| PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" | |
| echo "Environment setup - Project root: $PROJECT_ROOT" | |
| cd "$PROJECT_ROOT" | |
| echo "Changed to project directory: $(pwd)" | |
| # Check Python version | |
| python_version=$(python3 --version 2>&1) | |
| echo "Using Python: $python_version" | |
| # Skip virtual environment creation for now | |
| echo "Skipping virtual environment creation (will be created later if needed)" | |
| echo "Installing packages globally for now..." | |
| # Upgrade pip | |
| echo "Upgrading pip..." | |
| pip install --upgrade pip | |
| # Install dependencies | |
| echo "Installing dependencies from requirements.txt..." | |
| pip install -r requirements.txt | |
| # Install additional development dependencies | |
| echo "Installing additional development tools..." | |
| pip install jupyter ipykernel | |
| # Set up pre-commit hooks (optional) | |
| echo "Setting up development tools..." | |
| pip install pre-commit black flake8 | |
| # Download NLTK data (required for some evaluations) | |
| echo "Downloading NLTK data..." | |
| python -c " | |
| import nltk | |
| try: | |
| nltk.download('punkt', quiet=True) | |
| nltk.download('stopwords', quiet=True) | |
| print('NLTK data downloaded successfully') | |
| except Exception as e: | |
| print(f'Note: NLTK download failed: {e}') | |
| " | |
| # Check if CUDA is available | |
| echo "Checking CUDA availability..." | |
| python -c " | |
| import torch | |
| if torch.cuda.is_available(): | |
| print(f'CUDA available: {torch.cuda.get_device_name(0)}') | |
| print(f'CUDA version: {torch.version.cuda}') | |
| else: | |
| print('CUDA not available, will use CPU') | |
| " | |
| # Set up Weights & Biases (optional) | |
| echo "Setting up Weights & Biases (optional)..." | |
| if command -v wandb &> /dev/null; then | |
| echo "wandb is already installed" | |
| else | |
| pip install wandb | |
| fi | |
| echo "Note: To use wandb logging, run 'wandb login' and set your API key" | |
| # Create necessary directories | |
| echo "Creating project directories..." | |
| mkdir -p data/{chroniclingqa,temprageval,generated_questions,atlas_2021,fineweb} | |
| mkdir -p models/{cache,time_aware_contriever} | |
| mkdir -p outputs/{chroniclingqa,temprageval,mrag} | |
| mkdir -p logs | |
| # Download FineWeb dataset (matches notebook exactly) | |
| echo "" | |
| echo "Downloading and processing FineWeb dataset..." | |
| echo "This will collect 500,000 temporal passages from FineWeb-edu (this may take some time)..." | |
| python src/fineweb_loader.py | |
| echo "Environment setup completed successfully!" | |
| echo "Using Python at: $(which python3)" | |
| echo "" | |
| echo "Note: Packages installed globally. You can create a virtual environment later if needed." | |
| echo "" | |
| echo "Next step: Run ./scripts/02_generate_questions.sh" |