File size: 5,309 Bytes
2456a67 ee9aa39 2456a67 ee9aa39 2456a67 ee9aa39 a5324ea 19911c4 a5324ea 2456a67 19911c4 a5324ea ee9aa39 a5324ea ee9aa39 a5324ea 2456a67 ee9aa39 2456a67 a5324ea ee9aa39 2456a67 ee9aa39 a5324ea 2456a67 ee9aa39 2456a67 a5324ea 782c98f 2456a67 a5324ea ee9aa39 2456a67 a5324ea 2456a67 ee9aa39 a5324ea 2456a67 ee9aa39 2456a67 ee9aa39 2456a67 ee9aa39 2456a67 ee9aa39 2456a67 a5324ea 4ba5b45 ee9aa39 2456a67 96a43a6 2456a67 a5324ea 96a43a6 19911c4 96a43a6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 | #!/bin/bash
set -Eeuo pipefail
set -x
exec > >(tee -a /var/log/setup.log) 2>&1
trap 'echo "ERROR: setup failed at line $LINENO"' ERR
APT_GET="apt-get -o DPkg::Lock::Timeout=300"
PYTHON_BIN="/root/comfyui-venv/bin/python"
APP_REPO_URL="${APP_REPO_URL:-https://github.com/ortegarod/nemoflix.git}"
APP_DIR="${APP_DIR:-/root/nemoflix}"
COMFY_URL="${COMFY_URL:-http://127.0.0.1:8188}"
export DEBIAN_FRONTEND=noninteractive
# DigitalOcean/Ubuntu images can auto-restart services during apt operations.
# Keep restarts list-only so SSH/network services do not bounce mid-bootstrap.
export NEEDRESTART_MODE=l
echo "=== AMD MI300X ROCm 7.2 ComfyUI Worker Setup Starting ==="
# Refresh package metadata before installing dependencies.
$APT_GET update -y
# Base utilities and Python tooling.
$APT_GET install -y git git-lfs python3-pip python3.12-venv wget htop curl ca-certificates
git lfs install --system || true
# Verify host GPU and ROCm visibility.
echo "=== Host GPU Check ==="
/opt/rocm/bin/rocm-smi
/opt/rocm/bin/rocminfo > /tmp/rocminfo.txt
head -20 /tmp/rocminfo.txt
# Create virtual environment on the host.
echo "=== Creating Python venv ==="
if [ ! -d /root/comfyui-venv ]; then
python3 -m venv /root/comfyui-venv
fi
"$PYTHON_BIN" -m pip install --upgrade pip setuptools wheel
# Install PyTorch for ROCm inside venv.
# This is explicit and avoids DigitalOcean's Jupyter/Docker appliance behavior.
echo "=== Installing PyTorch for ROCm ==="
"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.2 || \
"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.0 || \
"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2
# Verify PyTorch sees the GPU.
echo "=== PyTorch GPU Check ==="
"$PYTHON_BIN" -c "import torch; print('PyTorch:', torch.__version__); print('ROCm available:', torch.cuda.is_available()); print('GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None')"
# Clone or update the project repo. The droplet uses this repo only for worker
# install scripts/workflow assets. The durable API, database, Studio UI, and
# control plane live on the VPS.
echo "=== Cloning/updating Nemoflix repo ==="
if [ -d "$APP_DIR/.git" ]; then
git -C "$APP_DIR" fetch --depth 1 origin main
git -C "$APP_DIR" reset --hard origin/main
else
git clone --depth 1 "$APP_REPO_URL" "$APP_DIR"
fi
# NOTE: Studio frontend and Nemoflix AMD API are hosted on the VPS, not on the
# droplet. This droplet is disposable and runs ComfyUI only.
# Install ComfyUI.
echo "=== Installing ComfyUI ==="
cd /root
if [ ! -d /root/ComfyUI/.git ]; then
git clone https://github.com/comfyanonymous/ComfyUI.git
else
git -C /root/ComfyUI pull --ff-only || true
fi
# Install ComfyUI-Manager.
if [ ! -d /root/ComfyUI/custom_nodes/ComfyUI-Manager/.git ]; then
git clone https://github.com/ltdrdata/ComfyUI-Manager.git /root/ComfyUI/custom_nodes/ComfyUI-Manager
fi
cd /root/ComfyUI
# Install ComfyUI requirements inside venv.
"$PYTHON_BIN" -m pip install -r requirements.txt
"$PYTHON_BIN" -m pip install -r /root/ComfyUI/custom_nodes/ComfyUI-Manager/requirements.txt
# Copy official example for testing.
cp /root/ComfyUI/script_examples/basic_api_example.py /root/test_comfyui.py
# Create ComfyUI systemd service on the host.
cat > /etc/systemd/system/comfyui.service << EOF
[Unit]
Description=ComfyUI
After=network-online.target
Wants=network-online.target
[Service]
Type=simple
User=root
WorkingDirectory=/root/ComfyUI
Environment="PATH=/root/comfyui-venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ExecStart=$PYTHON_BIN /root/ComfyUI/main.py --listen 0.0.0.0 --port 8188
Restart=always
RestartSec=5
[Install]
WantedBy=multi-user.target
EOF
systemctl daemon-reload
systemctl enable comfyui.service
systemctl restart comfyui.service
systemctl daemon-reload
# Show service status in log.
systemctl --no-pager --full status comfyui.service
# Verify API from the host.
echo "=== Waiting for ComfyUI API ==="
for i in {1..60}; do
if curl -sS --max-time 5 http://127.0.0.1:8188/system_stats; then
break
fi
echo "Waiting for ComfyUI API... ($i/60)"
sleep 5
done
curl -sS --max-time 5 http://127.0.0.1:8188/system_stats
# Install model stacks.
echo "=== Installing FLUX.2 image stack ==="
bash "$APP_DIR/scripts/install-image-stack.sh"
echo "=== Installing Wan 2.2 video stack ==="
bash "$APP_DIR/scripts/install-video-stack.sh"
echo "=== Setup Complete ==="
echo "ComfyUI worker: http://<droplet-ip>:8188"
echo "Studio UI and Nemoflix AMD API are hosted on the VPS."
echo "On the VPS, set COMFY_URL=http://<droplet-ip>:8188 in nemoflix-amd-api.service and restart it."
echo ""
echo "!!! REMINDER !!! Transfer any custom LoRA models to the droplet:"
echo " scp -i <ssh-key> <your-lora.safetensors> root@<droplet-ip>:/root/ComfyUI/models/loras/nemoflix-amd/"
echo ""
echo "!!! REMINDER !!! For LoRA training, create the ai-toolkit env file with your HF token:"
echo " echo 'HF_TOKEN=hf_...' > /root/ai-toolkit/.env"
echo " (FLUX.2-dev will be downloaded automatically on the first training job)"
echo "Then restart ComfyUI: systemctl restart comfyui.service"
|