File size: 5,309 Bytes
2456a67
ee9aa39
2456a67
 
 
ee9aa39
2456a67
ee9aa39
 
a5324ea
 
19911c4
a5324ea
2456a67
19911c4
a5324ea
 
 
 
 
 
ee9aa39
 
 
 
a5324ea
 
ee9aa39
a5324ea
2456a67
ee9aa39
 
 
 
 
 
 
2456a67
a5324ea
 
 
ee9aa39
2456a67
ee9aa39
a5324ea
2456a67
ee9aa39
 
 
 
 
 
 
2456a67
a5324ea
 
 
782c98f
 
 
 
 
 
 
2456a67
a5324ea
 
ee9aa39
 
2456a67
 
a5324ea
 
 
 
 
2456a67
ee9aa39
a5324ea
 
 
2456a67
 
ee9aa39
 
 
2456a67
ee9aa39
2456a67
 
ee9aa39
 
2456a67
 
 
 
 
 
 
 
 
 
ee9aa39
2456a67
 
 
 
 
 
 
 
 
a5324ea
4ba5b45
 
 
ee9aa39
 
 
 
 
 
 
 
 
 
 
 
 
2456a67
96a43a6
 
 
 
 
 
 
2456a67
a5324ea
 
 
96a43a6
19911c4
 
 
 
 
 
96a43a6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#!/bin/bash
set -Eeuo pipefail
set -x
exec > >(tee -a /var/log/setup.log) 2>&1

trap 'echo "ERROR: setup failed at line $LINENO"' ERR

APT_GET="apt-get -o DPkg::Lock::Timeout=300"
PYTHON_BIN="/root/comfyui-venv/bin/python"
APP_REPO_URL="${APP_REPO_URL:-https://github.com/ortegarod/nemoflix.git}"
APP_DIR="${APP_DIR:-/root/nemoflix}"

COMFY_URL="${COMFY_URL:-http://127.0.0.1:8188}"


export DEBIAN_FRONTEND=noninteractive
# DigitalOcean/Ubuntu images can auto-restart services during apt operations.
# Keep restarts list-only so SSH/network services do not bounce mid-bootstrap.
export NEEDRESTART_MODE=l

echo "=== AMD MI300X ROCm 7.2 ComfyUI Worker Setup Starting ==="

# Refresh package metadata before installing dependencies.
$APT_GET update -y

# Base utilities and Python tooling.
$APT_GET install -y git git-lfs python3-pip python3.12-venv wget htop curl ca-certificates

git lfs install --system || true

# Verify host GPU and ROCm visibility.
echo "=== Host GPU Check ==="
/opt/rocm/bin/rocm-smi
/opt/rocm/bin/rocminfo > /tmp/rocminfo.txt
head -20 /tmp/rocminfo.txt

# Create virtual environment on the host.
echo "=== Creating Python venv ==="
if [ ! -d /root/comfyui-venv ]; then
    python3 -m venv /root/comfyui-venv
fi
"$PYTHON_BIN" -m pip install --upgrade pip setuptools wheel

# Install PyTorch for ROCm inside venv.
# This is explicit and avoids DigitalOcean's Jupyter/Docker appliance behavior.
echo "=== Installing PyTorch for ROCm ==="
"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.2 || \
"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.0 || \
"$PYTHON_BIN" -m pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.2

# Verify PyTorch sees the GPU.
echo "=== PyTorch GPU Check ==="
"$PYTHON_BIN" -c "import torch; print('PyTorch:', torch.__version__); print('ROCm available:', torch.cuda.is_available()); print('GPU:', torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'None')"

# Clone or update the project repo. The droplet uses this repo only for worker
# install scripts/workflow assets. The durable API, database, Studio UI, and
# control plane live on the VPS.
echo "=== Cloning/updating Nemoflix repo ==="
if [ -d "$APP_DIR/.git" ]; then
    git -C "$APP_DIR" fetch --depth 1 origin main
    git -C "$APP_DIR" reset --hard origin/main
else
    git clone --depth 1 "$APP_REPO_URL" "$APP_DIR"
fi

# NOTE: Studio frontend and Nemoflix AMD API are hosted on the VPS, not on the
# droplet. This droplet is disposable and runs ComfyUI only.

# Install ComfyUI.
echo "=== Installing ComfyUI ==="
cd /root
if [ ! -d /root/ComfyUI/.git ]; then
    git clone https://github.com/comfyanonymous/ComfyUI.git
else
    git -C /root/ComfyUI pull --ff-only || true
fi

# Install ComfyUI-Manager.
if [ ! -d /root/ComfyUI/custom_nodes/ComfyUI-Manager/.git ]; then
    git clone https://github.com/ltdrdata/ComfyUI-Manager.git /root/ComfyUI/custom_nodes/ComfyUI-Manager
fi
cd /root/ComfyUI

# Install ComfyUI requirements inside venv.
"$PYTHON_BIN" -m pip install -r requirements.txt
"$PYTHON_BIN" -m pip install -r /root/ComfyUI/custom_nodes/ComfyUI-Manager/requirements.txt

# Copy official example for testing.
cp /root/ComfyUI/script_examples/basic_api_example.py /root/test_comfyui.py

# Create ComfyUI systemd service on the host.
cat > /etc/systemd/system/comfyui.service << EOF
[Unit]
Description=ComfyUI
After=network-online.target
Wants=network-online.target

[Service]
Type=simple
User=root
WorkingDirectory=/root/ComfyUI
Environment="PATH=/root/comfyui-venv/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"
ExecStart=$PYTHON_BIN /root/ComfyUI/main.py --listen 0.0.0.0 --port 8188
Restart=always
RestartSec=5

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable comfyui.service
systemctl restart comfyui.service

systemctl daemon-reload

# Show service status in log.
systemctl --no-pager --full status comfyui.service

# Verify API from the host.
echo "=== Waiting for ComfyUI API ==="
for i in {1..60}; do
    if curl -sS --max-time 5 http://127.0.0.1:8188/system_stats; then
        break
    fi
    echo "Waiting for ComfyUI API... ($i/60)"
    sleep 5
done
curl -sS --max-time 5 http://127.0.0.1:8188/system_stats

# Install model stacks.
echo "=== Installing FLUX.2 image stack ==="
bash "$APP_DIR/scripts/install-image-stack.sh"

echo "=== Installing Wan 2.2 video stack ==="
bash "$APP_DIR/scripts/install-video-stack.sh"

echo "=== Setup Complete ==="
echo "ComfyUI worker: http://<droplet-ip>:8188"
echo "Studio UI and Nemoflix AMD API are hosted on the VPS."
echo "On the VPS, set COMFY_URL=http://<droplet-ip>:8188 in nemoflix-amd-api.service and restart it."
echo ""
echo "!!! REMINDER !!! Transfer any custom LoRA models to the droplet:"
echo "  scp -i <ssh-key> <your-lora.safetensors> root@<droplet-ip>:/root/ComfyUI/models/loras/nemoflix-amd/"
echo ""
echo "!!! REMINDER !!! For LoRA training, create the ai-toolkit env file with your HF token:"
echo "  echo 'HF_TOKEN=hf_...' > /root/ai-toolkit/.env"
echo "  (FLUX.2-dev will be downloaded automatically on the first training job)"
echo "Then restart ComfyUI: systemctl restart comfyui.service"