#!/usr/bin/env bash # Runs ON the GCE instance — installs Docker, NVIDIA toolkit, builds stack set -euo pipefail echo "=== Installing Docker ===" curl -fsSL https://get.docker.com | sh sudo usermod -aG docker "$USER" echo "=== Installing NVIDIA Container Toolkit ===" curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | \ sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \ sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \ sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list sudo apt-get update sudo apt-get install -y nvidia-container-toolkit sudo nvidia-ctk runtime configure --runtime=docker sudo systemctl restart docker echo "=== Extracting code ===" mkdir -p ~/arcisvlm cd ~/arcisvlm tar xzf ~/arcisvlm-deploy.tar.gz echo "=== Downloading checkpoint from HuggingFace ===" pip3 install --quiet huggingface_hub python3 -c " from huggingface_hub import hf_hub_download import os os.makedirs('checkpoints', exist_ok=True) hf_hub_download('hardiksa/arcisvlm', 'v3_stage1_final.pt', local_dir='checkpoints/') print('Checkpoint downloaded successfully') " || echo "WARN: HF download failed — will need to copy checkpoint manually" echo "=== Building and starting Docker Compose ===" cd ~/arcisvlm sudo docker compose -f deploy/docker-compose.yml up -d --build echo "=== Waiting for services (30s) ===" sleep 30 curl -s http://localhost/health && echo "" || echo "WARN: Health check not yet responding" echo "=== Setup complete ===" sudo docker compose -f deploy/docker-compose.yml ps