divhanimajokweni-ctrl's picture
feat: submit ProofBridge Liner to LabLab AI AMD Developer Hackathon
93c7565
#!/usr/bin/env bash
# scripts/deploy-droplet.sh
# ──────────────────────────────────────────────────────────────────
# Deploy ProofBridge Liner to the AMD MI300X DigitalOcean droplet.
#
# Target : 165.245.140.197
# Droplet : FX-AM7-gpu-mi300x1-192gb-devcloud-atl1
# GPU : AMD Instinct MI300X β€” 192 GB HBM3
# Image : OpenAI GPT OSS (ROCm 7) on Ubuntu 24.04
# User : root (DigitalOcean marketplace default)
#
# Usage:
# chmod +x scripts/deploy-droplet.sh
# ./scripts/deploy-droplet.sh [flags]
#
# Flags:
# --setup First-time: apt update/upgrade + reboot (required by DO
# security notice). Waits for reboot then continues deploy.
# --rocm Mount host ROCm 7 into container for GPU acceleration
# --restart-only Skip build; just restart the running service
# --dry-run Print every SSH command without executing
# ──────────────────────────────────────────────────────────────────
set -euo pipefail
# ── Config ────────────────────────────────────────────────────────
DROPLET_IP="165.245.140.197"
DROPLET_USER="root"
REPO_DIR="/root/proofbridge-liner"
GITHUB_REPO="https://github.com/divhanimajokweni-ctrl/proofbridge-liner.git"
SERVICE_NAME="proofbridge"
CONTAINER_NAME="proofbridge-liner"
PORT=7860
# ── Flags ─────────────────────────────────────────────────────────
DO_SETUP=false
USE_ROCM=false
RESTART_ONLY=false
DRY_RUN=false
for arg in "$@"; do
case "$arg" in
--setup) DO_SETUP=true ;;
--rocm) USE_ROCM=true ;;
--restart-only) RESTART_ONLY=true ;;
--dry-run) DRY_RUN=true ;;
*) echo "Unknown flag: $arg (valid: --setup --rocm --restart-only --dry-run)"; exit 1 ;;
esac
done
DOCKERFILE="Dockerfile"
IMAGE_TAG="proofbridge-liner:latest"
if $USE_ROCM; then
DOCKERFILE="Dockerfile.rocm"
IMAGE_TAG="proofbridge-liner:rocm"
fi
# ── Helpers ───────────────────────────────────────────────────────
bold() { printf '\033[1m%s\033[0m\n' "$*"; }
ok() { printf ' \033[32mβœ”\033[0m %s\n' "$*"; }
info() { printf ' \033[34mΒ·\033[0m %s\n' "$*"; }
warn() { printf ' \033[33m⚠\033[0m %s\n' "$*"; }
die() { printf '\033[31mERROR: %s\033[0m\n' "$*" >&2; exit 1; }
SSH_OPTS="-o StrictHostKeyChecking=accept-new -o ConnectTimeout=15 -o ServerAliveInterval=30"
SSH="ssh $SSH_OPTS ${DROPLET_USER}@${DROPLET_IP}"
ssh_run() {
local label="$1"; shift
info "$label"
if $DRY_RUN; then
printf ' \033[2m[dry-run] ssh %s@%s << HEREDOC\n' "$DROPLET_USER" "$DROPLET_IP"
printf ' %s\033[0m\n' "$*"
else
$SSH "$@"
fi
}
wait_for_ssh() {
info "Waiting for droplet to come back online after reboot..."
local attempts=0
until $SSH "echo ok" &>/dev/null || [ $attempts -ge 30 ]; do
sleep 5
attempts=$((attempts+1))
printf '.'
done
echo ""
[ $attempts -lt 30 ] && ok "Droplet is back online" || die "Timed out waiting for reboot"
}
# ── Banner ────────────────────────────────────────────────────────
echo ""
bold "╔══════════════════════════════════════════════════════════╗"
bold "β•‘ ProofBridge Liner β€” MI300X Droplet Deploy β•‘"
bold "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
echo ""
info "Target : ${DROPLET_USER}@${DROPLET_IP}:${PORT}"
info "GPU : AMD Instinct MI300X β€” 192 GB HBM3"
info "Image : ${IMAGE_TAG} (${DOCKERFILE})"
info "Mode : $(if $DRY_RUN; then echo DRY-RUN; elif $RESTART_ONLY; then echo RESTART-ONLY; elif $DO_SETUP; then echo SETUP+DEPLOY; else echo FULL-DEPLOY; fi)"
echo ""
# ── SSH connectivity check ────────────────────────────────────────
if ! $DRY_RUN; then
info "Testing SSH connectivity to ${DROPLET_IP}..."
if ! $SSH "echo ok" &>/dev/null; then
die "Cannot SSH to ${DROPLET_IP}. Check your key is loaded (ssh-add) and the droplet is active."
fi
ok "SSH connection established"
fi
# ── SETUP: apt update/upgrade + reboot (run once after provisioning) ──
if $DO_SETUP; then
bold ""
bold " ── Phase 0: System update (required by DigitalOcean security notice)"
echo ""
ssh_run "Updating system packages (this takes a few minutes)..." bash << 'ENDSSH'
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get upgrade -y -o Dpkg::Options::="--force-confdef" -o Dpkg::Options::="--force-confold"
apt-get autoremove -y
echo " System updated."
ENDSSH
ok "System packages updated"
ssh_run "Rebooting droplet..." bash << 'ENDSSH'
nohup bash -c 'sleep 2 && reboot' &>/dev/null &
ENDSSH
ok "Reboot initiated"
if ! $DRY_RUN; then
sleep 8
wait_for_ssh
fi
fi
# ── Clone or pull repo ────────────────────────────────────────────
if ! $RESTART_ONLY; then
bold ""
bold " ── Phase 1: Sync repository"
echo ""
ssh_run "Cloning / pulling repo on droplet" bash << ENDSSH
set -e
if [ -d "${REPO_DIR}/.git" ]; then
echo " Pulling latest..."
cd "${REPO_DIR}" && git pull --ff-only
else
echo " Cloning from GitHub..."
git clone "${GITHUB_REPO}" "${REPO_DIR}"
fi
cd "${REPO_DIR}"
echo " HEAD: \$(git log --oneline -1)"
ENDSSH
ok "Repo synced at ${REPO_DIR}"
# ── Ensure Docker is installed ──────────────────────────────────
ssh_run "Checking Docker installation" bash << 'ENDSSH'
set -e
if ! command -v docker &>/dev/null; then
echo " Docker not found β€” installing..."
curl -fsSL https://get.docker.com | sh
else
echo " Docker $(docker --version | cut -d' ' -f3) already installed"
fi
ENDSSH
ok "Docker ready"
# ── Build Docker image ──────────────────────────────────────────
bold ""
bold " ── Phase 2: Build Docker image"
echo ""
ssh_run "Building ${IMAGE_TAG} from ${DOCKERFILE}" bash << ENDSSH
set -e
cd "${REPO_DIR}"
echo " Building ${IMAGE_TAG}..."
docker build -f ${DOCKERFILE} -t ${IMAGE_TAG} .
echo " Image size: \$(docker image inspect ${IMAGE_TAG} --format '{{.Size}}' | numfmt --to=iec 2>/dev/null || echo 'N/A')"
ENDSSH
ok "Docker image built: ${IMAGE_TAG}"
# ── Install systemd service ─────────────────────────────────────
bold ""
bold " ── Phase 3: Install systemd service"
echo ""
# ROCm device flags (host ROCm mounted at /opt/rocm)
ROCM_FLAGS=""
if $USE_ROCM; then
ROCM_FLAGS=" --device /dev/kfd \\\\\n --device /dev/dri \\\\\n --group-add video \\\\\n --group-add render \\\\\n -v /opt/rocm:/opt/rocm:ro \\\\\n -e ROCM_PATH=/opt/rocm \\\\\n -e LD_LIBRARY_PATH=/opt/rocm/lib:\$LD_LIBRARY_PATH \\\\"
fi
ssh_run "Writing /etc/systemd/system/${SERVICE_NAME}.service" bash << ENDSSH
set -e
cat > /etc/systemd/system/${SERVICE_NAME}.service << 'EOF'
[Unit]
Description=ProofBridge Liner β€” AMD MI300X Safety Kernel
After=docker.service network-online.target
Wants=network-online.target
Requires=docker.service
[Service]
Restart=always
RestartSec=10
EnvironmentFile=-/root/proofbridge-liner/.env.deployed
ExecStartPre=-/usr/bin/docker rm -f ${CONTAINER_NAME}
ExecStart=/usr/bin/docker run --rm \\
--name ${CONTAINER_NAME} \\
--network host \\
$(if $USE_ROCM; then printf '%b' "${ROCM_FLAGS}"; fi)
-e NODE_ENV=production \\
-e DASHBOARD_PORT=${PORT} \\
-e DASHBOARD_HOST=0.0.0.0 \\
-e HF_TOKEN=\${HF_TOKEN:-} \\
-e POLYGON_AMOY_RPC_URL=\${POLYGON_AMOY_RPC_URL:-} \\
-e ORACLE_ADDRESS=\${ORACLE_ADDRESS:-} \\
-e CIRCUIT_BREAKER_ADDRESS=\${CIRCUIT_BREAKER_ADDRESS:-} \\
-e ASSET_REGISTRY_ADDRESS=\${ASSET_REGISTRY_ADDRESS:-} \\
-e TEE_VERIFIER_ADDRESS=\${TEE_VERIFIER_ADDRESS:-} \\
-e ENCLAVE_ADDRESS=\${ENCLAVE_ADDRESS:-} \\
${IMAGE_TAG}
ExecStop=/usr/bin/docker stop ${CONTAINER_NAME}
TimeoutStopSec=30
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable ${SERVICE_NAME}
echo " Service file written and enabled."
ENDSSH
ok "systemd service installed"
fi
# ── Start / restart service ───────────────────────────────────────
bold ""
bold " ── Phase 4: Start service"
echo ""
ssh_run "Starting ${SERVICE_NAME}" bash << ENDSSH
set -e
systemctl restart ${SERVICE_NAME}
sleep 4
systemctl is-active ${SERVICE_NAME} && echo " Service is active" || echo " WARNING: service not active"
ENDSSH
ok "Service started"
# ── Health check ─────────────────────────────────────────────────
if ! $DRY_RUN; then
info "Waiting for health check on :${PORT}/health ..."
sleep 6
HTTP_CODE=$(curl -sf -o /dev/null -w "%{http_code}" \
"http://${DROPLET_IP}:${PORT}/health" 2>/dev/null || echo "000")
if [ "$HTTP_CODE" = "200" ]; then
PAYLOAD=$(curl -sf "http://${DROPLET_IP}:${PORT}/health" 2>/dev/null || echo "{}")
ok "Health check passed β€” ${PAYLOAD}"
else
warn "Health returned HTTP ${HTTP_CODE} β€” check logs:"
echo " ssh root@${DROPLET_IP} 'journalctl -u ${SERVICE_NAME} -n 50 --no-pager'"
fi
fi
# ── GPU status (ROCm only) ────────────────────────────────────────
if $USE_ROCM && ! $DRY_RUN; then
echo ""
info "AMD GPU status:"
$SSH "rocm-smi --showproductname --showmeminfo vram 2>/dev/null || echo ' rocm-smi not in PATH on host β€” check inside container'" \
| sed 's/^/ /'
fi
# ── Summary ───────────────────────────────────────────────────────
echo ""
bold "╔══════════════════════════════════════════════════════════╗"
bold "β•‘ DEPLOY COMPLETE β•‘"
bold "β•šβ•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•β•"
echo ""
printf ' %-12s %s\n' "Dashboard" "http://${DROPLET_IP}:${PORT}"
printf ' %-12s %s\n' "Health" "http://${DROPLET_IP}:${PORT}/health"
printf ' %-12s %s\n' "API" "http://${DROPLET_IP}:${PORT}/api/status"
echo ""
printf ' %-12s %s\n' "Logs" "ssh root@${DROPLET_IP} 'journalctl -u ${SERVICE_NAME} -f'"
printf ' %-12s %s\n' "Shell" "ssh root@${DROPLET_IP}"
if $USE_ROCM; then
printf ' %-12s %s\n' "GPU info" "ssh root@${DROPLET_IP} 'rocm-smi'"
printf ' %-12s %s\n' "VRAM" "192 GB HBM3 β€” AMD Instinct MI300X"
fi
echo ""