#!/bin/bash # setup.sh — SimQuantum MI300X one-time setup # Run this ONCE on a fresh droplet. # After this, use start.sh every time. set -e CONDA_ENV="qdots" MODEL="Qwen/Qwen2.5-1.5B-Instruct" REPO_URL="https://huggingface.co/spaces/lablab-ai-amd-developer-hackathon/simquantum-tuning-lab" REPO_DIR="/root/simquantum-tuning-lab" echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " SimQuantum — One-Time Droplet Setup" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo "" # ── 1. Clone repo ───────────────────────────────────────────────────────────── if [ -d "$REPO_DIR/.git" ]; then echo "► Repo already exists — pulling latest..." cd "$REPO_DIR" git pull origin main else echo "► Cloning SimQuantum from HuggingFace..." git clone "$REPO_URL" "$REPO_DIR" cd "$REPO_DIR" fi echo " ✓ Repo ready at $REPO_DIR" # ── 2. Conda init ───────────────────────────────────────────────────────────── echo "► Initializing conda..." source /root/miniconda3/etc/profile.d/conda.sh # ── 3. Create env if it doesn't exist ──────────────────────────────────────── if conda env list | grep -q "^${CONDA_ENV} "; then echo " ✓ Conda env '$CONDA_ENV' already exists" else echo "► Creating conda env '$CONDA_ENV' (Python 3.11)..." conda create -y -n "$CONDA_ENV" python=3.11 echo " ✓ Created" fi conda activate "$CONDA_ENV" echo " ✓ Python: $(python --version)" # ── 4. ROCm PyTorch ─────────────────────────────────────────────────────────── if python -c "import torch; assert torch.cuda.is_available()" 2>/dev/null; then echo " ✓ ROCm PyTorch already installed" else echo "► Installing ROCm PyTorch (this takes a few minutes)..." pip install torch torchvision \ --index-url https://download.pytorch.org/whl/rocm6.2 \ --quiet echo " ✓ Done" fi # ── 5. vLLM ─────────────────────────────────────────────────────────────────── if python -c "import vllm" 2>/dev/null; then echo " ✓ vLLM already installed" else echo "► Installing vLLM..." pip install vllm --quiet echo " ✓ Done" fi # ── 6. App dependencies ─────────────────────────────────────────────────────── echo "► Installing app dependencies..." pip install streamlit==1.57.0 plotly openai numpy scipy scikit-learn tqdm --quiet pip install -e . --quiet echo " ✓ Done" # ── 7. Pre-download the model weights ──────────────────────────────────────── # Do this now so start.sh doesn't spend credits downloading later echo "► Pre-downloading $MODEL weights (one-time, ~3GB)..." python -c " from huggingface_hub import snapshot_download snapshot_download('$MODEL') print(' ✓ Model weights cached') " # ── 8. Write start.sh into the repo dir so it's always there ───────────────── cat > "$REPO_DIR/start.sh" << 'STARTSCRIPT' #!/bin/bash # start.sh — run this every time you boot the droplet CONDA_ENV="qdots" MODEL="Qwen/Qwen2.5-1.5B-Instruct" VLLM_PORT=8000 STREAMLIT_PORT=8501 REPO_DIR="/root/simquantum-tuning-lab" echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " SimQuantum — Starting Up" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" cd "$REPO_DIR" # Pull latest code (cheap, always do it) git pull origin main --quiet && echo "► Code up to date ✓" || echo "► (git pull skipped)" # Init conda source /root/miniconda3/etc/profile.d/conda.sh conda activate "$CONDA_ENV" PYTHON="$(which python)" VLLM_BIN="$(which vllm)" # ── Start vLLM if not running ───────────────────────────────────────────────── if curl -s http://localhost:$VLLM_PORT/v1/models > /dev/null 2>&1; then echo "► vLLM already running ✓" else echo "► Starting vLLM (Qwen2.5-1.5B on MI300X)..." export HIP_VISIBLE_DEVICES=0 export ROCR_VISIBLE_DEVICES=0 export VLLM_TARGET_DEVICE=rocm export HSA_OVERRIDE_GFX_VERSION=9.4.2 nohup "$VLLM_BIN" serve "$MODEL" \ --host 0.0.0.0 \ --port $VLLM_PORT \ --gpu-memory-utilization 0.45 \ --max-model-len 4096 \ > /tmp/vllm.log 2>&1 & VLLM_PID=$! echo -n " Waiting for vLLM" for i in $(seq 1 120); do curl -s http://localhost:$VLLM_PORT/v1/models > /dev/null 2>&1 && echo " ✓" && break if ! kill -0 $VLLM_PID 2>/dev/null; then echo "" echo " ✗ vLLM crashed. Last 20 lines:" tail -20 /tmp/vllm.log exit 1 fi printf "."; sleep 1 done fi # ── Kill old Streamlit only ─────────────────────────────────────────────────── pkill -f "streamlit run" 2>/dev/null || true sleep 1 # ── Start Streamlit ─────────────────────────────────────────────────────────── echo "► Starting Streamlit..." export QDOT_LLM_BASE_URL="http://localhost:${VLLM_PORT}/v1" export QDOT_LLM_MODEL="$MODEL" nohup "$PYTHON" -m streamlit run app.py \ --server.port "$STREAMLIT_PORT" \ --server.address 0.0.0.0 \ --server.headless true \ > /tmp/streamlit.log 2>&1 & echo -n " Waiting for Streamlit" for i in $(seq 1 30); do curl -s http://localhost:$STREAMLIT_PORT > /dev/null 2>&1 && echo " ✓" && break printf "."; sleep 1 done PUBLIC_IP=$(curl -s ifconfig.me 2>/dev/null || echo "YOUR_IP") echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " ✓ SimQuantum is live!" echo "" echo " Open this in your browser:" echo " http://${PUBLIC_IP}:${STREAMLIT_PORT}" echo "" echo " If something looks wrong:" echo " tail -f /tmp/vllm.log" echo " tail -f /tmp/streamlit.log" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" STARTSCRIPT chmod +x "$REPO_DIR/start.sh" echo "" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━" echo " ✓ Setup complete!" echo "" echo " From now on, just run:" echo " cd /root/simquantum-tuning-lab" echo " bash start.sh" echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"