#!/usr/bin/env bash
# SparseVLM — Vast AI setup script
# Run once on a fresh instance (A100 40GB or RTX 4090 24GB recommended):
#   bash setup_vastai.sh
set -euo pipefail

echo "=== SparseVLM Vast AI Setup ==="
echo "GPU: $(nvidia-smi --query-gpu=name,memory.total --format=csv,noheader 2>/dev/null || echo 'no GPU detected')"

# --- system deps ---------------------------------------------------------
apt-get update -qq && apt-get install -y -qq git wget unzip

# --- Python deps ---------------------------------------------------------
pip install --quiet --upgrade pip
pip install --quiet \
    "torch>=2.1.0" \
    "torchvision" \
    "transformers>=4.40.0" \
    "triton>=2.1.0" \
    "numpy>=1.24.0" \
    "accelerate" \
    "Pillow" \
    "huggingface_hub" \
    "pytest" \
    "requests"

# --- install SparseVLM from local source ---------------------------------
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
pip install --quiet -e "$SCRIPT_DIR"

echo ""
echo "=== Verifying install ==="
python -c "
import torch, triton, transformers, sparsevlm, kernels
print(f'torch      {torch.__version__}')
print(f'triton     {triton.__version__}')
print(f'transformers {transformers.__version__}')
print(f'sparsevlm  {sparsevlm.__version__}')
print(f'CUDA avail {torch.cuda.is_available()}')
if torch.cuda.is_available():
    print(f'GPU        {torch.cuda.get_device_name(0)}')
    print(f'VRAM       {torch.cuda.get_device_properties(0).total_memory/1e9:.1f} GB')
"

echo ""
echo "=== Setup complete. Next steps ==="
echo "  Layer-1 kernel benchmark (no model download):  python benchmark/bench_layer1.py"
echo "  Unit tests:                                     pytest tests/"
echo "  Full e2e + benchmark:                           python test_e2e.py"