Spaces:

dippoo
/

content-engine

Running

App Files Files

content-engine / src /content_engine /api /routes_pod.py

dippoo

Sync all local changes: video routes, pod management, wavespeed, UI updates

e808ae1 5 days ago

raw

history blame

80.2 kB

	"""RunPod Pod management routes — start/stop GPU pods for generation.

	Starts a persistent ComfyUI pod with network volume access.
	Models and LoRAs are loaded from the shared network volume.
	"""

	from __future__ import annotations

	import asyncio
	import json
	import logging
	import os
	import time
	import uuid
	from pathlib import Path
	from typing import Any

	import runpod
	from fastapi import APIRouter, File, HTTPException, UploadFile
	from pydantic import BaseModel

	logger = logging.getLogger(__name__)

	router = APIRouter(prefix="/api/pod", tags=["pod"])

	# Persist pod state to disk so it survives server restarts
	_POD_STATE_FILE = Path(__file__).parent.parent.parent.parent / "pod_state.json"


	def _save_pod_state():
	"""Save pod state to disk."""
	try:
	data = {k: v for k, v in _pod_state.items() if k != "setup_status"}
	_POD_STATE_FILE.write_text(json.dumps(data))
	except Exception as e:
	logger.warning("Failed to save pod state: %s", e)


	def _load_pod_state():
	"""Load pod state from disk on startup."""
	try:
	if _POD_STATE_FILE.exists():
	data = json.loads(_POD_STATE_FILE.read_text())
	for k, v in data.items():
	if k in _pod_state:
	_pod_state[k] = v
	logger.info("Restored pod state: pod_id=%s status=%s", _pod_state.get("pod_id"), _pod_state.get("status"))
	except Exception as e:
	logger.warning("Failed to load pod state: %s", e)

	def _get_volume_config() -> tuple[str, str]:
	"""Get network volume config at runtime (after dotenv loads)."""
	return (
	os.environ.get("RUNPOD_VOLUME_ID", ""),
	os.environ.get("RUNPOD_VOLUME_DC", ""),
	)

	# Docker image — PyTorch base with CUDA, we install ComfyUI ourselves
	DOCKER_IMAGE = "runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04"

	# Pod state
	_pod_state = {
	"pod_id": None,
	"status": "stopped", # stopped, starting, setting_up, running, stopping
	"ip": None,
	"ssh_port": None,
	"comfyui_port": None,
	"gpu_type": "NVIDIA RTX A6000",
	"model_type": "flux2",
	"started_at": None,
	"cost_per_hour": 0.76,
	"setup_status": None,
	}

	_load_pod_state()

	# GPU options (same as training)
	GPU_OPTIONS = {
	"NVIDIA A40": {"name": "A40 48GB", "vram": 48, "cost": 0.64},
	"NVIDIA RTX A6000": {"name": "RTX A6000 48GB", "vram": 48, "cost": 0.76},
	"NVIDIA L40": {"name": "L40 48GB", "vram": 48, "cost": 0.89},
	"NVIDIA L40S": {"name": "L40S 48GB", "vram": 48, "cost": 1.09},
	"NVIDIA A100-SXM4-80GB": {"name": "A100 SXM 80GB", "vram": 80, "cost": 1.64},
	"NVIDIA A100 80GB PCIe": {"name": "A100 PCIe 80GB", "vram": 80, "cost": 1.89},
	"NVIDIA H100 80GB HBM3": {"name": "H100 80GB", "vram": 80, "cost": 3.89},
	"NVIDIA GeForce RTX 5090": {"name": "RTX 5090 32GB", "vram": 32, "cost": 0.69},
	"NVIDIA GeForce RTX 4090": {"name": "RTX 4090 24GB", "vram": 24, "cost": 0.44},
	"NVIDIA GeForce RTX 3090": {"name": "RTX 3090 24GB", "vram": 24, "cost": 0.22},
	}


	def _get_comfyui_url() -> str \| None:
	"""Get the ComfyUI URL via RunPod's HTTPS proxy.

	RunPod HTTP ports are only accessible through their proxy at
	https://{pod_id}-{private_port}.proxy.runpod.net
	The raw IP:port from the API is an internal address, not publicly routable.
	"""
	pod_id = _pod_state.get("pod_id")
	if pod_id:
	return f"https://{pod_id}-8188.proxy.runpod.net"
	return None


	def _get_api_key() -> str:
	key = os.environ.get("RUNPOD_API_KEY")
	if not key:
	raise HTTPException(503, "RUNPOD_API_KEY not configured")
	runpod.api_key = key
	return key


	class StartPodRequest(BaseModel):
	gpu_type: str = "NVIDIA RTX A6000"
	model_type: str = "flux2"


	class PodStatus(BaseModel):
	status: str
	pod_id: str \| None = None
	ip: str \| None = None
	port: int \| None = None
	gpu_type: str \| None = None
	model_type: str \| None = None
	cost_per_hour: float \| None = None
	setup_status: str \| None = None
	uptime_minutes: float \| None = None
	comfyui_url: str \| None = None


	@router.get("/status", response_model=PodStatus)
	async def get_pod_status():
	"""Get current pod status."""
	_get_api_key()

	if _pod_state["pod_id"]:
	try:
	pod = await asyncio.wait_for(
	asyncio.to_thread(runpod.get_pod, _pod_state["pod_id"]),
	timeout=10,
	)
	if pod:
	desired = pod.get("desiredStatus", "")
	if desired == "RUNNING":
	runtime = pod.get("runtime") or {}
	ports = runtime.get("ports") or []
	for p in ports:
	if p.get("privatePort") == 22:
	_pod_state["ssh_ip"] = p.get("ip")
	_pod_state["ssh_port"] = p.get("publicPort")
	if p.get("privatePort") == 8188:
	_pod_state["comfyui_ip"] = p.get("ip")
	_pod_state["comfyui_port"] = p.get("publicPort")
	# Use SSH IP as the main IP for display
	_pod_state["ip"] = _pod_state.get("ssh_ip") or _pod_state.get("comfyui_ip")
	elif desired == "EXITED":
	_pod_state["status"] = "stopped"
	_pod_state["pod_id"] = None
	else:
	_pod_state["status"] = "stopped"
	_pod_state["pod_id"] = None
	except asyncio.TimeoutError:
	logger.warning("RunPod API timeout checking pod status")
	except Exception as e:
	logger.warning("Failed to check pod: %s", e)

	uptime = None
	if _pod_state["started_at"] and _pod_state["status"] in ("running", "setting_up"):
	uptime = (time.time() - _pod_state["started_at"]) / 60

	comfyui_url = _get_comfyui_url()

	return PodStatus(
	status=_pod_state["status"],
	pod_id=_pod_state["pod_id"],
	ip=_pod_state["ip"],
	port=_pod_state.get("comfyui_port"),
	gpu_type=_pod_state["gpu_type"],
	model_type=_pod_state.get("model_type", "flux2"),
	cost_per_hour=_pod_state["cost_per_hour"],
	setup_status=_pod_state.get("setup_status"),
	uptime_minutes=uptime,
	comfyui_url=comfyui_url,
	)


	@router.get("/gpu-options")
	async def list_gpu_options():
	"""List available GPU types."""
	return {"gpus": GPU_OPTIONS}


	@router.get("/model-options")
	async def list_model_options():
	"""List available model types for the pod."""
	return {
	"models": {
	"flux2": {"name": "FLUX.2 Dev", "description": "Best for realistic txt2img (requires 48GB+ VRAM)", "use_case": "txt2img"},
	"flux1": {"name": "FLUX.1 Dev", "description": "Previous gen FLUX txt2img", "use_case": "txt2img"},
	"wan22": {"name": "WAN 2.2 Remix", "description": "Realistic generation — dual-DiT MoE split-step (NSFW OK)", "use_case": "txt2img"},
	"wan22_i2v": {"name": "WAN 2.2 I2V", "description": "Image-to-video generation", "use_case": "img2video"},
	"wan22_animate": {"name": "WAN 2.2 Animate", "description": "Dance/motion transfer — animate a character from a driving video", "use_case": "animate"},
	}
	}


	@router.post("/start")
	async def start_pod(request: StartPodRequest):
	"""Start a GPU pod with ComfyUI for generation."""
	_get_api_key()

	if _pod_state["status"] in ("running", "setting_up"):
	return {"status": "already_running", "pod_id": _pod_state["pod_id"]}

	if _pod_state["status"] == "starting":
	return {"status": "starting", "message": "Pod is already starting"}

	gpu_info = GPU_OPTIONS.get(request.gpu_type)
	if not gpu_info:
	raise HTTPException(400, f"Unknown GPU type: {request.gpu_type}")

	_pod_state["status"] = "starting"
	_pod_state["gpu_type"] = request.gpu_type
	_pod_state["cost_per_hour"] = gpu_info["cost"]
	_pod_state["model_type"] = request.model_type
	_pod_state["setup_status"] = "Creating pod..."

	try:
	logger.info("Starting RunPod with %s for %s...", request.gpu_type, request.model_type)

	pod_kwargs = {
	"container_disk_in_gb": 30,
	"ports": "22/tcp,8188/http",
	"docker_args": "bash -c 'apt-get update && apt-get install -y openssh-server && mkdir -p /run/sshd && echo root:runpod \| chpasswd && /usr/sbin/sshd -o PermitRootLogin=yes && sleep infinity'",
	}

	volume_id, volume_dc = _get_volume_config()
	if volume_id:
	pod_kwargs["network_volume_id"] = volume_id
	if volume_dc:
	pod_kwargs["data_center_id"] = volume_dc
	logger.info("Using network volume: %s (DC: %s)", volume_id, volume_dc)
	else:
	pod_kwargs["volume_in_gb"] = 75
	logger.warning("No network volume configured — using temporary volume")

	pod = await asyncio.to_thread(
	runpod.create_pod,
	f"comfyui-gen-{request.model_type}",
	DOCKER_IMAGE,
	request.gpu_type,
	**pod_kwargs,
	)

	_pod_state["pod_id"] = pod["id"]
	_pod_state["started_at"] = time.time()
	_save_pod_state()

	logger.info("Pod created: %s", pod["id"])

	asyncio.create_task(_wait_and_setup_pod(pod["id"], request.model_type))

	return {
	"status": "starting",
	"pod_id": pod["id"],
	"message": f"Starting {gpu_info['name']} pod (~5-8 min for setup)",
	}

	except Exception as e:
	_pod_state["status"] = "stopped"
	_pod_state["setup_status"] = None
	logger.error("Failed to start pod: %s", e)
	raise HTTPException(500, f"Failed to start pod: {e}")


	async def _wait_and_setup_pod(pod_id: str, model_type: str, timeout: int = 600):
	"""Wait for pod to be ready, then install ComfyUI and link models via SSH."""
	start = time.time()
	ssh_host = None
	ssh_port = None

	# Phase 1: Wait for SSH to be available
	_pod_state["setup_status"] = "Waiting for pod to start..."
	while time.time() - start < timeout:
	try:
	pod = await asyncio.to_thread(runpod.get_pod, pod_id)
	if pod and pod.get("desiredStatus") == "RUNNING":
	runtime = pod.get("runtime") or {}
	ports = runtime.get("ports") or []
	for p in ports:
	if p.get("privatePort") == 22:
	ssh_host = p.get("ip")
	ssh_port = p.get("publicPort")
	_pod_state["ssh_ip"] = ssh_host
	_pod_state["ssh_port"] = ssh_port
	_pod_state["ip"] = ssh_host
	if p.get("privatePort") == 8188:
	_pod_state["comfyui_ip"] = p.get("ip")
	_pod_state["comfyui_port"] = p.get("publicPort")
	if ssh_host and ssh_port:
	break
	except Exception as e:
	logger.debug("Waiting for pod: %s", e)
	await asyncio.sleep(5)

	if not ssh_host or not ssh_port:
	logger.error("Pod did not become ready within %ds", timeout)
	_pod_state["status"] = "stopped"
	_pod_state["setup_status"] = "Failed: pod did not start"
	return

	# Phase 2: SSH in and set up ComfyUI
	_pod_state["status"] = "setting_up"
	_pod_state["setup_status"] = "Connecting via SSH..."

	import paramiko

	async def _ssh_connect_new() -> "paramiko.SSHClient":
	"""Create a fresh SSH connection to the pod."""
	client = paramiko.SSHClient()
	client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
	for attempt in range(10):
	try:
	await asyncio.to_thread(
	client.connect, ssh_host, port=int(ssh_port),
	username="root", password="runpod", timeout=15,
	banner_timeout=30,
	)
	client.get_transport().set_keepalive(30)
	return client
	except Exception:
	if attempt == 9:
	raise
	await asyncio.sleep(5)
	raise RuntimeError("SSH connection failed after retries")

	async def _ssh_exec_r(cmd: str, timeout: int = 120) -> str:
	"""Execute SSH command, reconnecting once if the session dropped."""
	nonlocal ssh
	try:
	t = ssh.get_transport()
	if t is None or not t.is_active():
	logger.info("SSH session dropped, reconnecting...")
	ssh = await _ssh_connect_new()
	return await _ssh_exec_async(ssh, cmd, timeout)
	except Exception as e:
	if "not active" in str(e).lower() or "session" in str(e).lower():
	logger.info("SSH error '%s', reconnecting and retrying...", e)
	ssh = await _ssh_connect_new()
	return await _ssh_exec_async(ssh, cmd, timeout)
	raise

	for attempt in range(30):
	try:
	ssh = await _ssh_connect_new()
	break
	except Exception:
	if attempt == 29:
	_pod_state["setup_status"] = "Failed: SSH connection error"
	_pod_state["status"] = "stopped"
	return
	await asyncio.sleep(5)

	try:
	# Symlink network volume
	volume_id, _ = _get_volume_config()
	if volume_id:
	await _ssh_exec_async(ssh, "mkdir -p /runpod-volume/models /runpod-volume/loras")
	await _ssh_exec_async(ssh, "rm -rf /workspace/models 2>/dev/null; ln -sf /runpod-volume/models /workspace/models")

	# Install ComfyUI (cache on volume for reuse)
	comfy_dir = "/workspace/ComfyUI"
	_pod_state["setup_status"] = "Installing ComfyUI..."

	comfy_exists = (await _ssh_exec_async(ssh, f"test -f {comfy_dir}/main.py && echo EXISTS \|\| echo MISSING")).strip()
	if comfy_exists == "EXISTS":
	logger.info("ComfyUI already installed")
	_pod_state["setup_status"] = "ComfyUI found, updating..."
	await _ssh_exec_async(ssh, f"cd {comfy_dir} && git pull 2>&1 \| tail -3", timeout=120)
	else:
	# Check volume cache
	vol_comfy = (await _ssh_exec_async(ssh, "test -f /runpod-volume/ComfyUI/main.py && echo EXISTS \|\| echo MISSING")).strip()
	if vol_comfy == "EXISTS":
	_pod_state["setup_status"] = "Restoring ComfyUI from volume..."
	await _ssh_exec_async(ssh, f"cp -r /runpod-volume/ComfyUI {comfy_dir}", timeout=300)
	else:
	_pod_state["setup_status"] = "Cloning ComfyUI (first time, ~2 min)..."
	await _ssh_exec_async(ssh, f"cd /workspace && git clone --depth 1 https://github.com/comfyanonymous/ComfyUI.git", timeout=300)
	await _ssh_exec_async(ssh, f"cd {comfy_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=600)
	# Cache to volume
	volume_id, _ = _get_volume_config()
	if volume_id:
	await _ssh_exec_async(ssh, f"cp -r {comfy_dir} /runpod-volume/ComfyUI", timeout=300)

	# Install pip deps that aren't in ComfyUI requirements
	_pod_state["setup_status"] = "Installing dependencies..."
	await _ssh_exec_async(ssh, f"cd {comfy_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=600)
	await _ssh_exec_async(ssh, "pip install aiohttp einops sqlalchemy 2>&1 \| tail -3", timeout=120)

	# Symlink models into ComfyUI directories
	_pod_state["setup_status"] = "Linking models..."
	await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/checkpoints {comfy_dir}/models/vae {comfy_dir}/models/loras {comfy_dir}/models/text_encoders")

	if model_type == "flux2":
	# FLUX.2 Dev — separate UNet, text encoder, and VAE
	await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models")
	await _ssh_exec_async(ssh, f"ln -sf /workspace/models/FLUX.2-dev/flux2-dev.safetensors {comfy_dir}/models/diffusion_models/flux2-dev.safetensors")
	await _ssh_exec_async(ssh, f"ln -sf /workspace/models/FLUX.2-dev/ae.safetensors {comfy_dir}/models/vae/ae.safetensors")

	# Text encoder — use Comfy-Org's pre-converted single-file version
	# (HF sharded format is incompatible with ComfyUI's CLIPLoader)
	te_file = "/runpod-volume/models/mistral_3_small_flux2_fp8.safetensors"
	te_exists = (await _ssh_exec_async(ssh, f"test -f {te_file} && echo EXISTS \|\| echo MISSING")).strip()
	if te_exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading FLUX.2 text encoder (~12GB, first time only)..."
	await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 \| tail -1", timeout=60)
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	hf_hub_download(
	repo_id='Comfy-Org/flux2-dev',
	filename='split_files/text_encoders/mistral_3_small_flux2_fp8.safetensors',
	local_dir='/tmp/flux2_te',
	)
	import shutil
	shutil.move('/tmp/flux2_te/split_files/text_encoders/mistral_3_small_flux2_fp8.safetensors', '{te_file}')
	print('Text encoder downloaded')
	" 2>&1 \| tail -5""", timeout=1800)
	await _ssh_exec_async(ssh, f"ln -sf {te_file} {comfy_dir}/models/text_encoders/mistral_3_small_flux2_fp8.safetensors")
	# Remove old sharded loader patch if present
	await _ssh_exec_async(ssh, f"rm -f {comfy_dir}/custom_nodes/sharded_loader.py")
	elif model_type == "flux1":
	await _ssh_exec_async(ssh, f"ln -sf /workspace/models/flux1-dev.safetensors {comfy_dir}/models/checkpoints/flux1-dev.safetensors")
	await _ssh_exec_async(ssh, f"ln -sf /workspace/models/ae.safetensors {comfy_dir}/models/vae/ae.safetensors")
	await _ssh_exec_async(ssh, f"ln -sf /workspace/models/clip_l.safetensors {comfy_dir}/models/text_encoders/clip_l.safetensors")
	await _ssh_exec_async(ssh, f"ln -sf /workspace/models/t5xxl_fp16.safetensors {comfy_dir}/models/text_encoders/t5xxl_fp16.safetensors")
	elif model_type == "z_image":
	# Z-Image Turbo — 6B param model by Tongyi-MAI, runs in 16GB VRAM
	z_dir = "/runpod-volume/models/z_image"
	await _ssh_exec_async(ssh, f"mkdir -p {z_dir}")
	await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 \| tail -1", timeout=60)

	# Delete FLUX.2 from volume to free space
	_pod_state["setup_status"] = "Cleaning up FLUX.2 from volume..."
	await _ssh_exec_async(ssh, "rm -rf /runpod-volume/models/FLUX.2-dev /runpod-volume/models/mistral_3_small_flux2_fp8.safetensors 2>/dev/null; echo done")

	# Download diffusion model (~12GB)
	diff_model = f"{z_dir}/z_image_turbo_bf16.safetensors"
	exists = (await _ssh_exec_async(ssh, f"test -f {diff_model} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading Z-Image Turbo diffusion model (~12GB)..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import shutil, os
	p = hf_hub_download('Comfy-Org/z_image_turbo', 'split_files/diffusion_models/z_image_turbo_bf16.safetensors', local_dir='/tmp/z_image')
	shutil.move(p, '{diff_model}')
	print('Diffusion model downloaded')
	" 2>&1 \| tail -5""", timeout=3600)

	# Download text encoder (~8GB Qwen 3 4B)
	te_model = f"{z_dir}/qwen_3_4b.safetensors"
	exists = (await _ssh_exec_async(ssh, f"test -f {te_model} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading Z-Image text encoder (~8GB)..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import shutil
	p = hf_hub_download('Comfy-Org/z_image_turbo', 'split_files/text_encoders/qwen_3_4b.safetensors', local_dir='/tmp/z_image')
	shutil.move(p, '{te_model}')
	print('Text encoder downloaded')
	" 2>&1 \| tail -5""", timeout=3600)

	# Download VAE (~335MB)
	vae_model = f"{z_dir}/ae.safetensors"
	exists = (await _ssh_exec_async(ssh, f"test -f {vae_model} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading Z-Image VAE..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import shutil
	p = hf_hub_download('Comfy-Org/z_image_turbo', 'split_files/vae/ae.safetensors', local_dir='/tmp/z_image')
	shutil.move(p, '{vae_model}')
	print('VAE downloaded')
	" 2>&1 \| tail -5""", timeout=600)

	# Symlink into ComfyUI directories
	await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models {comfy_dir}/models/text_encoders {comfy_dir}/models/vae")
	await _ssh_exec_async(ssh, f"ln -sf {diff_model} {comfy_dir}/models/diffusion_models/z_image_turbo_bf16.safetensors")
	await _ssh_exec_async(ssh, f"ln -sf {te_model} {comfy_dir}/models/text_encoders/qwen_3_4b.safetensors")
	await _ssh_exec_async(ssh, f"ln -sf {vae_model} {comfy_dir}/models/vae/ae_z_image.safetensors")

	elif model_type == "wan22":
	# WAN 2.2 Remix NSFW — dual-DiT MoE split-step for realistic generation
	wan_dir = "/workspace/models/WAN2.2"
	await _ssh_exec_async(ssh, f"mkdir -p {wan_dir}")

	civitai_token = os.environ.get("CIVITAI_API_TOKEN", "")
	token_param = f"&token={civitai_token}" if civitai_token else ""

	# CivitAI Remix models (fp8 ~14GB each)
	civitai_models = {
	"Remix T2V High-noise": {
	"path": f"{wan_dir}/wan22_remix_t2v_high_fp8.safetensors",
	"url": f"https://civitai.com/api/download/models/2424167?type=Model&format=SafeTensor&size=pruned{token_param}",
	},
	"Remix T2V Low-noise": {
	"path": f"{wan_dir}/wan22_remix_t2v_low_fp8.safetensors",
	"url": f"https://civitai.com/api/download/models/2424912?type=Model&format=SafeTensor&size=pruned{token_param}",
	},
	}

	# HuggingFace models (T5 fp8 ~7GB, VAE ~1GB)
	hf_models = {
	"T5 text encoder (fp8)": {
	"path": f"{wan_dir}/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
	"repo": "Comfy-Org/Wan_2.2_ComfyUI_Repackaged",
	"filename": "split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
	},
	"VAE": {
	"path": f"{wan_dir}/wan_2.1_vae.safetensors",
	"repo": "Comfy-Org/Wan_2.2_ComfyUI_Repackaged",
	"filename": "split_files/vae/wan_2.1_vae.safetensors",
	},
	}

	# Download CivitAI Remix models
	for label, info in civitai_models.items():
	exists = (await _ssh_exec_async(ssh, f"test -f {info['path']} && echo EXISTS \|\| echo MISSING")).strip()
	if exists == "EXISTS":
	logger.info("WAN 2.2 %s already cached", label)
	else:
	_pod_state["setup_status"] = f"Downloading {label} (~14GB)..."
	await _ssh_exec_async(ssh, f"wget -q -O '{info['path']}' '{info['url']}'", timeout=1800)
	# Verify download
	check = (await _ssh_exec_async(ssh, f"test -f {info['path']} && stat -c%s {info['path']} \|\| echo 0")).strip()
	if check == "0" or int(check) < 1000000:
	logger.error("Failed to download %s (size: %s). CivitAI API token may be required.", label, check)
	_pod_state["setup_status"] = f"Failed: {label} download failed. Set CIVITAI_API_TOKEN env var for NSFW models."
	return

	# Download HuggingFace models
	await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 \| tail -1", timeout=60)
	for label, info in hf_models.items():
	exists = (await _ssh_exec_async(ssh, f"test -f {info['path']} && echo EXISTS \|\| echo MISSING")).strip()
	if exists == "EXISTS":
	logger.info("WAN 2.2 %s already cached", label)
	else:
	_pod_state["setup_status"] = f"Downloading {label}..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import os, shutil
	hf_hub_download('{info['repo']}', '{info['filename']}', local_dir='{wan_dir}')
	downloaded = os.path.join('{wan_dir}', '{info['filename']}')
	target = '{info['path']}'
	if os.path.exists(downloaded) and downloaded != target:
	os.makedirs(os.path.dirname(target), exist_ok=True)
	shutil.move(downloaded, target)
	print('Downloaded {label}')
	" 2>&1 \| tail -5""", timeout=1800)

	# Symlink models into ComfyUI
	await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models {comfy_dir}/models/text_encoders")
	await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/wan22_remix_t2v_high_fp8.safetensors {comfy_dir}/models/diffusion_models/")
	await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/wan22_remix_t2v_low_fp8.safetensors {comfy_dir}/models/diffusion_models/")
	await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/wan_2.1_vae.safetensors {comfy_dir}/models/vae/")
	await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/umt5_xxl_fp8_e4m3fn_scaled.safetensors {comfy_dir}/models/text_encoders/")

	# Install wanBlockSwap custom node (VRAM optimization for dual-DiT on 24GB GPUs)
	_pod_state["setup_status"] = "Installing WAN 2.2 custom nodes..."
	blockswap_dir = f"{comfy_dir}/custom_nodes/ComfyUI-wanBlockswap"
	blockswap_exists = (await _ssh_exec_async(ssh, f"test -d {blockswap_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if blockswap_exists != "EXISTS":
	await _ssh_exec_async(ssh, f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/orssorbit/ComfyUI-wanBlockswap.git", timeout=120)

	elif model_type == "wan22_i2v":
	# WAN 2.2 Image-to-Video (14B params) — full model snapshot
	wan_dir = "/workspace/models/Wan2.2-I2V-A14B"
	wan_exists = (await _ssh_exec_async(ssh, f"test -d {wan_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if wan_exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading WAN 2.2 I2V model (~28GB, first time only)..."
	await _ssh_exec_async(ssh, f"pip install huggingface_hub 2>&1 \| tail -1", timeout=60)
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import snapshot_download
	snapshot_download('Wan-AI/Wan2.2-I2V-A14B', local_dir='{wan_dir}', ignore_patterns=['.md', '.txt'])
	print('WAN 2.2 I2V downloaded')
	" 2>&1 \| tail -10""", timeout=3600)
	await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models")
	await _ssh_exec_async(ssh, f"ln -sf {wan_dir} {comfy_dir}/models/diffusion_models/Wan2.2-I2V-A14B")
	await _ssh_exec_async(ssh, f"ln -sf {wan_dir} {comfy_dir}/models/checkpoints/Wan2.2-I2V-A14B")

	# Install ComfyUI-WanVideoWrapper custom nodes
	_pod_state["setup_status"] = "Installing WAN 2.2 ComfyUI nodes..."
	wan_nodes_dir = f"{comfy_dir}/custom_nodes/ComfyUI-WanVideoWrapper"
	wan_nodes_exist = (await _ssh_exec_async(ssh, f"test -d {wan_nodes_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if wan_nodes_exist != "EXISTS":
	await _ssh_exec_async(ssh, f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-WanVideoWrapper.git", timeout=120)
	await _ssh_exec_async(ssh, f"cd {wan_nodes_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=300)

	elif model_type == "wan22_animate":
	# WAN 2.2 Animate (14B fp8) — dance/motion transfer via pose skeleton
	animate_dir = "/workspace/models/WAN2.2-Animate"
	wan22_dir = "/workspace/models/WAN2.2"
	await _ssh_exec_async(ssh, f"mkdir -p {animate_dir}")
	await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 \| tail -1", timeout=60)

	# Download main Animate model (~28GB bf16 — only version available)
	animate_model = f"{animate_dir}/wan2.2_animate_14B_bf16.safetensors"
	exists = (await _ssh_exec_async(ssh, f"test -f {animate_model} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading WAN 2.2 Animate model (~28GB, first time only)..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import os, shutil
	hf_hub_download('Comfy-Org/Wan_2.2_ComfyUI_Repackaged', 'split_files/diffusion_models/wan2.2_animate_14B_bf16.safetensors', local_dir='{animate_dir}')
	src = os.path.join('{animate_dir}', 'split_files', 'diffusion_models', 'wan2.2_animate_14B_bf16.safetensors')
	if os.path.exists(src):
	shutil.move(src, '{animate_model}')
	print('Animate model downloaded')
	" 2>&1 \| tail -5""", timeout=7200)

	# CLIP Vision H (~2.5GB) — ViT-H vision encoder
	clip_vision_target = f"{animate_dir}/clip_vision_h.safetensors"
	exists = (await _ssh_exec_async(ssh, f"test -f {clip_vision_target} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	_pod_state["setup_status"] = "Downloading CLIP Vision H (~2.5GB)..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import os, shutil
	result = hf_hub_download('h94/IP-Adapter', 'models/image_encoder/model.safetensors', local_dir='{animate_dir}/tmp_clip')
	shutil.move(result, '{clip_vision_target}')
	shutil.rmtree('{animate_dir}/tmp_clip', ignore_errors=True)
	print('CLIP Vision H downloaded')
	" 2>&1 \| tail -5""", timeout=1800)

	# VAE — reuse from WAN2.2 dir if available, else download (~1GB)
	vae_target = f"{animate_dir}/wan_2.1_vae.safetensors"
	exists = (await _ssh_exec_async(ssh, f"test -f {vae_target} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	vae_from_wan22 = (await _ssh_exec_async(ssh, f"test -f {wan22_dir}/wan_2.1_vae.safetensors && echo EXISTS \|\| echo MISSING")).strip()
	if vae_from_wan22 == "EXISTS":
	await _ssh_exec_async(ssh, f"ln -sf {wan22_dir}/wan_2.1_vae.safetensors {vae_target}")
	else:
	_pod_state["setup_status"] = "Downloading VAE (~1GB)..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	import os, shutil
	hf_hub_download('Comfy-Org/Wan_2.2_ComfyUI_Repackaged', 'split_files/vae/wan_2.1_vae.safetensors', local_dir='{animate_dir}')
	src = os.path.join('{animate_dir}', 'split_files', 'vae', 'wan_2.1_vae.safetensors')
	if os.path.exists(src):
	shutil.move(src, '{vae_target}')
	print('VAE downloaded')
	" 2>&1 \| tail -5""", timeout=600)

	# UMT5 T5 encoder fp8 (non-scaled) — use Kijai/WanVideo_comfy version
	# which is compatible with LoadWanVideoT5TextEncoder (scaled_fp8 is not supported)
	t5_filename = "umt5-xxl-enc-fp8_e4m3fn.safetensors"
	t5_target = f"{animate_dir}/{t5_filename}"
	t5_comfy_path = f"{comfy_dir}/models/text_encoders/{t5_filename}"
	t5_in_comfy = (await _ssh_exec_async(ssh, f"test -f {t5_comfy_path} && echo EXISTS \|\| echo MISSING")).strip()
	t5_in_vol = (await _ssh_exec_async(ssh, f"test -f {t5_target} && echo EXISTS \|\| echo MISSING")).strip()
	if t5_in_comfy != "EXISTS" and t5_in_vol != "EXISTS":
	_pod_state["setup_status"] = "Downloading UMT5 text encoder (~6.3GB, first time only)..."
	await _ssh_exec_async(ssh, f"""python -c "
	from huggingface_hub import hf_hub_download
	hf_hub_download('Kijai/WanVideo_comfy', '{t5_filename}', local_dir='{animate_dir}')
	print('UMT5 text encoder downloaded')
	" 2>&1 \| tail -5""", timeout=1800)
	t5_in_vol = "EXISTS"

	# Symlink models into ComfyUI directories
	await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models {comfy_dir}/models/vae {comfy_dir}/models/clip_vision {comfy_dir}/models/text_encoders")
	await _ssh_exec_async(ssh, f"ln -sf {animate_model} {comfy_dir}/models/diffusion_models/")
	await _ssh_exec_async(ssh, f"ln -sf {vae_target} {comfy_dir}/models/vae/")
	await _ssh_exec_async(ssh, f"ln -sf {clip_vision_target} {comfy_dir}/models/clip_vision/")
	if t5_in_vol == "EXISTS" and t5_in_comfy != "EXISTS":
	await _ssh_exec_async(ssh, f"ln -sf {t5_target} {t5_comfy_path}")

	# Reconnect SSH before custom node setup — connection may have dropped during long downloads
	ssh = await _ssh_connect_new()

	# Install required custom nodes
	_pod_state["setup_status"] = "Installing WAN Animate custom nodes..."

	# ComfyUI-WanVideoWrapper (WanVideoAnimateEmbeds, WanVideoSampler, etc.)
	wan_nodes_dir = f"{comfy_dir}/custom_nodes/ComfyUI-WanVideoWrapper"
	exists = (await _ssh_exec_r(f"test -d {wan_nodes_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-WanVideoWrapper.git", timeout=120)
	await _ssh_exec_r(f"cd {wan_nodes_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=300)

	# ComfyUI-VideoHelperSuite (VHS_LoadVideo, VHS_VideoCombine)
	vhs_dir = f"{comfy_dir}/custom_nodes/ComfyUI-VideoHelperSuite"
	exists = (await _ssh_exec_r(f"test -d {vhs_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git", timeout=120)
	await _ssh_exec_r(f"cd {vhs_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=300)

	# comfyui_controlnet_aux (DWPreprocessor for pose extraction)
	aux_dir = f"{comfy_dir}/custom_nodes/comfyui_controlnet_aux"
	exists = (await _ssh_exec_r(f"test -d {aux_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/Fannovel16/comfyui_controlnet_aux.git", timeout=120)
	await _ssh_exec_r(f"cd {aux_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=300)

	# ComfyUI-KJNodes (ImageResizeKJv2 used in animate workflow)
	kj_dir = f"{comfy_dir}/custom_nodes/ComfyUI-KJNodes"
	exists = (await _ssh_exec_r(f"test -d {kj_dir} && echo EXISTS \|\| echo MISSING")).strip()
	if exists != "EXISTS":
	await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-KJNodes.git", timeout=120)
	await _ssh_exec_r(f"cd {kj_dir} && pip install -r requirements.txt 2>&1 \| tail -5", timeout=300)

	# Symlink all LoRAs from volume
	await _ssh_exec_r(f"ls /runpod-volume/loras/*.safetensors 2>/dev/null \| while read f; do ln -sf \"$f\" {comfy_dir}/models/loras/; done")

	# Start ComfyUI in background (fire-and-forget — don't wait for output)
	_pod_state["setup_status"] = "Starting ComfyUI..."
	await asyncio.to_thread(
	_ssh_exec_fire_and_forget,
	ssh,
	f"cd {comfy_dir} && python main.py --listen 0.0.0.0 --port 8188 --fp8_e4m3fn-unet > /tmp/comfyui.log 2>&1",
	)
	await asyncio.sleep(2) # Give it a moment to start

	# Wait for ComfyUI HTTP to respond
	_pod_state["setup_status"] = "Waiting for ComfyUI to load model..."
	import httpx
	comfyui_url = _get_comfyui_url()
	for attempt in range(120): # Up to 10 minutes
	try:
	async with httpx.AsyncClient(timeout=5) as client:
	resp = await client.get(f"{comfyui_url}/system_stats")
	if resp.status_code == 200:
	_pod_state["status"] = "running"
	_pod_state["setup_status"] = "Ready"
	_save_pod_state()
	logger.info("ComfyUI ready at %s", comfyui_url)
	return
	except Exception:
	pass
	await asyncio.sleep(5)

	# If we get here, ComfyUI didn't start
	# Check the log for errors
	log_tail = await _ssh_exec_async(ssh, "tail -20 /tmp/comfyui.log")
	logger.error("ComfyUI didn't start. Log: %s", log_tail)
	_pod_state["setup_status"] = f"ComfyUI failed to start. Check logs."
	_pod_state["status"] = "setting_up" # Keep pod running so user can debug

	except Exception as e:
	import traceback
	err_msg = f"{type(e).__name__}: {e}"
	logger.error("Pod setup failed: %s\n%s", err_msg, traceback.format_exc())
	_pod_state["setup_status"] = f"Setup failed: {err_msg}"
	_pod_state["status"] = "setting_up" # Keep pod running so user can debug
	finally:
	try:
	ssh.close()
	except Exception:
	pass


	def _ssh_exec(ssh, cmd: str, timeout: int = 120) -> str:
	"""Execute a command over SSH and return stdout (blocking — call from async via to_thread or background task)."""
	_, stdout, stderr = ssh.exec_command(cmd, timeout=timeout)
	out = stdout.read().decode("utf-8", errors="replace")
	return out.strip()


	async def _ssh_exec_async(ssh, cmd: str, timeout: int = 120) -> str:
	"""Async wrapper for SSH exec that doesn't block the event loop."""
	return await asyncio.to_thread(_ssh_exec, ssh, cmd, timeout)


	def _ssh_exec_fire_and_forget(ssh, cmd: str):
	"""Start a command over SSH without waiting for output (for background processes)."""
	transport = ssh.get_transport()
	channel = transport.open_session()
	channel.exec_command(cmd)
	# Don't read stdout/stderr — just let it run


	# --- Pre-download models to network volume (saves money during training) ---

	_download_state = {
	"status": "idle", # idle, downloading, completed, failed
	"pod_id": None,
	"progress": "",
	"error": None,
	}


	class DownloadModelsRequest(BaseModel):
	model_type: str = "wan22"
	gpu_type: str = "NVIDIA GeForce RTX 3090" # Cheapest GPU, just for downloading


	@router.post("/download-models")
	async def download_models_to_volume(request: DownloadModelsRequest):
	"""Pre-download model files to network volume using a cheap pod.

	This saves expensive GPU time during training — models are cached on the
	shared volume and reused across all future training/generation pods.
	"""
	_get_api_key()

	volume_id, volume_dc = _get_volume_config()
	if not volume_id:
	raise HTTPException(400, "No network volume configured (set RUNPOD_VOLUME_ID)")

	if _download_state["status"] == "downloading":
	return {"status": "already_downloading", "progress": _download_state["progress"]}

	_download_state["status"] = "downloading"
	_download_state["progress"] = "Creating cheap download pod..."
	_download_state["error"] = None

	asyncio.create_task(_download_models_task(request.model_type, request.gpu_type, volume_id, volume_dc))

	return {"status": "started", "message": f"Downloading {request.model_type} models to volume (using {request.gpu_type})"}


	@router.get("/download-models/status")
	async def download_models_status():
	"""Check model download progress."""
	return _download_state


	async def _download_models_task(model_type: str, gpu_type: str, volume_id: str, volume_dc: str):
	"""Background task: spin up cheap pod, download models, terminate."""
	import paramiko
	ssh = None
	pod_id = None

	try:
	# Create cheap pod with network volume — try multiple GPU types if first unavailable
	pod_kwargs = {
	"container_disk_in_gb": 10,
	"ports": "22/tcp",
	"network_volume_id": volume_id,
	"docker_args": "bash -c 'apt-get update && apt-get install -y openssh-server && mkdir -p /run/sshd && echo root:runpod \| chpasswd && /usr/sbin/sshd -o PermitRootLogin=yes && sleep infinity'",
	}
	if volume_dc:
	pod_kwargs["data_center_id"] = volume_dc

	gpu_fallbacks = [
	gpu_type,
	"NVIDIA RTX A4000",
	"NVIDIA RTX A5000",
	"NVIDIA GeForce RTX 4090",
	"NVIDIA GeForce RTX 4080",
	"NVIDIA A100-SXM4-80GB",
	]
	pod = None
	used_gpu = gpu_type
	for try_gpu in gpu_fallbacks:
	try:
	pod = await asyncio.to_thread(
	runpod.create_pod,
	f"model-download-{model_type}",
	DOCKER_IMAGE,
	try_gpu,
	**pod_kwargs,
	)
	used_gpu = try_gpu
	logger.info("Download pod created with %s", try_gpu)
	break
	except Exception as e:
	if "SUPPLY_CONSTRAINT" in str(e) or "no longer any instances" in str(e).lower():
	logger.info("GPU %s unavailable, trying next...", try_gpu)
	continue
	raise
	if pod is None:
	raise RuntimeError("No GPU available for download pod. Try again later.")
	pod_id = pod["id"]
	_download_state["pod_id"] = pod_id
	_download_state["progress"] = f"Pod created with {used_gpu} ({pod_id}), waiting for SSH..."

	# Wait for SSH
	ssh_host = ssh_port = None
	start = time.time()
	while time.time() - start < 300:
	try:
	p = await asyncio.to_thread(runpod.get_pod, pod_id)
	if p and p.get("desiredStatus") == "RUNNING":
	for port in (p.get("runtime") or {}).get("ports") or []:
	if port.get("privatePort") == 22:
	ssh_host = port.get("ip")
	ssh_port = port.get("publicPort")
	if ssh_host and ssh_port:
	break
	except Exception:
	pass
	await asyncio.sleep(5)

	if not ssh_host:
	raise RuntimeError("Pod SSH not available after 5 min")

	# Connect SSH
	ssh = paramiko.SSHClient()
	ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
	for attempt in range(20):
	try:
	await asyncio.to_thread(ssh.connect, ssh_host, port=int(ssh_port), username="root", password="runpod", timeout=10)
	break
	except Exception:
	if attempt == 19:
	raise RuntimeError("SSH connection failed after 20 attempts")
	await asyncio.sleep(5)

	ssh.get_transport().set_keepalive(30)
	_download_state["progress"] = "SSH connected, setting up tools..."

	# Symlink volume
	await _ssh_exec_async(ssh, "mkdir -p /runpod-volume/models && rm -rf /workspace/models 2>/dev/null; ln -sf /runpod-volume/models /workspace/models")
	await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 \| tail -1", timeout=120)
	await _ssh_exec_async(ssh, "which aria2c \|\| apt-get install -y aria2 2>&1 \| tail -1", timeout=120)

	if model_type == "wan22":
	wan_dir = "/workspace/models/WAN2.2"
	await _ssh_exec_async(ssh, f"mkdir -p {wan_dir}")

	civitai_token = os.environ.get("CIVITAI_API_TOKEN", "")
	token_param = f"&token={civitai_token}" if civitai_token else ""

	# CivitAI Remix models (fp8)
	civitai_files = [
	("Remix T2V High-noise", f"https://civitai.com/api/download/models/2424167?type=Model&format=SafeTensor&size=pruned{token_param}", f"{wan_dir}/wan22_remix_t2v_high_fp8.safetensors"),
	("Remix T2V Low-noise", f"https://civitai.com/api/download/models/2424912?type=Model&format=SafeTensor&size=pruned{token_param}", f"{wan_dir}/wan22_remix_t2v_low_fp8.safetensors"),
	]

	# HuggingFace models
	hf_files = [
	("T5 text encoder (fp8)", "Comfy-Org/Wan_2.2_ComfyUI_Repackaged", "split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", f"{wan_dir}/umt5_xxl_fp8_e4m3fn_scaled.safetensors"),
	("VAE", "Comfy-Org/Wan_2.2_ComfyUI_Repackaged", "split_files/vae/wan_2.1_vae.safetensors", f"{wan_dir}/wan_2.1_vae.safetensors"),
	]

	total = len(civitai_files) + len(hf_files)
	idx = 0

	for label, url, target in civitai_files:
	idx += 1
	exists = (await _ssh_exec_async(ssh, f"test -f {target} && echo EXISTS \|\| echo MISSING")).strip()
	if exists == "EXISTS":
	_download_state["progress"] = f"[{idx}/{total}] {label} already cached"
	logger.info("WAN 2.2 %s already on volume", label)
	else:
	_download_state["progress"] = f"[{idx}/{total}] Downloading {label} (~14GB)..."
	await _ssh_exec_async(ssh, f"wget -q -O '{target}' '{url}'", timeout=1800)
	check = (await _ssh_exec_async(ssh, f"test -f {target} && stat -c%s {target} \|\| echo 0")).strip()
	if check == "0" or int(check) < 1000000:
	raise RuntimeError(f"Failed to download {label}. Set CIVITAI_API_TOKEN for NSFW models.")
	_download_state["progress"] = f"[{idx}/{total}] {label} downloaded"

	for label, repo, filename, target in hf_files:
	idx += 1
	exists = (await _ssh_exec_async(ssh, f"test -f {target} && echo EXISTS \|\| echo MISSING")).strip()
	if exists == "EXISTS":
	_download_state["progress"] = f"[{idx}/{total}] {label} already cached"
	logger.info("WAN 2.2 %s already on volume", label)
	else:
	_download_state["progress"] = f"[{idx}/{total}] Downloading {label}..."
	hf_url = f"https://huggingface.co/{repo}/resolve/main/{filename}"
	fname = target.split("/")[-1]
	tdir = "/".join(target.split("/")[:-1])
	await _ssh_exec_async(ssh, f"aria2c -x 16 -s 16 -c -o '{fname}' --dir='{tdir}' '{hf_url}' 2>&1 \| tail -3", timeout=1800)
	check = (await _ssh_exec_async(ssh, f"test -f {target} && echo EXISTS \|\| echo MISSING")).strip()
	if check != "EXISTS":
	raise RuntimeError(f"Failed to download {label}")
	_download_state["progress"] = f"[{idx}/{total}] {label} downloaded"

	# Also pre-clone musubi-tuner to volume (for training)
	_download_state["progress"] = "Caching musubi-tuner to volume..."
	tuner_exists = (await _ssh_exec_async(ssh, "test -f /runpod-volume/musubi-tuner/pyproject.toml && echo EXISTS \|\| echo MISSING")).strip()
	if tuner_exists != "EXISTS":
	await _ssh_exec_async(ssh, "cd /workspace && git clone --depth 1 https://github.com/kohya-ss/musubi-tuner.git && cp -r /workspace/musubi-tuner /runpod-volume/musubi-tuner", timeout=300)
	_download_state["progress"] = "musubi-tuner cached"
	else:
	_download_state["progress"] = "musubi-tuner already cached"

	elif model_type == "wan22_animate":
	animate_dir = "/workspace/models/WAN2.2-Animate"
	wan22_dir = "/workspace/models/WAN2.2"
	hf_base = "https://huggingface.co"
	await _ssh_exec_async(ssh, f"mkdir -p {animate_dir}")

	# Files to download: (label, url, target, timeout_s, min_bytes)
	wget_files = [
	(
	"WAN 2.2 Animate model (~32GB)",
	f"{hf_base}/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_animate_14B_bf16.safetensors",
	f"{animate_dir}/wan2.2_animate_14B_bf16.safetensors",
	7200,
	30_000_000_000, # 30GB min — partial downloads get resumed
	),
	(
	"UMT5 text encoder fp8 (~6.3GB)",
	f"{hf_base}/Kijai/WanVideo_comfy/resolve/main/umt5-xxl-enc-fp8_e4m3fn.safetensors",
	f"{animate_dir}/umt5-xxl-enc-fp8_e4m3fn.safetensors",
	1800,
	6_000_000_000,
	),
	(
	"VAE (~242MB)",
	f"{hf_base}/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors",
	f"{animate_dir}/wan_2.1_vae.safetensors",
	300,
	200_000_000,
	),
	(
	"CLIP Vision H (~2.4GB)",
	f"{hf_base}/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors",
	f"{animate_dir}/clip_vision_h.safetensors",
	900,
	2_000_000_000,
	),
	]

	total = len(wget_files)

	for idx, (label, url, target, dl_timeout, min_bytes) in enumerate(wget_files, 1):
	# For T5 and VAE, reuse from wan22 dir if already present (and complete)
	wan22_candidate = f"{wan22_dir}/{target.split('/')[-1]}"
	reused = False
	if label in ("UMT5 text encoder fp8 (~6.3GB)", "VAE (~1GB)"):
	wan22_size = (await _ssh_exec_async(ssh, f"stat -c%s {wan22_candidate} 2>/dev/null \|\| echo 0")).strip()
	if int(wan22_size) >= min_bytes:
	_download_state["progress"] = f"[{idx}/{total}] {label} — reusing from WAN2.2 dir"
	await _ssh_exec_async(ssh, f"ln -sf {wan22_candidate} {target} 2>/dev/null \|\| cp {wan22_candidate} {target}")
	reused = True

	if not reused:
	size_str = (await _ssh_exec_async(ssh, f"stat -c%s {target} 2>/dev/null \|\| echo 0")).strip()
	if int(size_str) >= min_bytes:
	_download_state["progress"] = f"[{idx}/{total}] {label} already cached"
	else:
	_download_state["progress"] = f"[{idx}/{total}] Downloading {label}..."
	filename = target.split("/")[-1]
	target_dir = "/".join(target.split("/")[:-1])
	# Remove stale symlinks before downloading (can't resume through a symlink)
	await _ssh_exec_async(ssh, f"test -L '{target}' && rm -f '{target}'; true")
	await _ssh_exec_async(
	ssh,
	f"aria2c -x 16 -s 16 -c -o '{filename}' --dir='{target_dir}' '{url}' 2>&1 \| tail -3",
	timeout=dl_timeout,
	)
	size_str = (await _ssh_exec_async(ssh, f"stat -c%s {target} 2>/dev/null \|\| echo 0")).strip()
	if int(size_str) < min_bytes:
	raise RuntimeError(f"Failed to download {label} (size {size_str} < {min_bytes})")
	_download_state["progress"] = f"[{idx}/{total}] {label} downloaded"

	_download_state["status"] = "completed"
	_download_state["progress"] = "All models downloaded to volume! Ready for training."
	logger.info("Model pre-download complete for %s", model_type)

	except Exception as e:
	_download_state["status"] = "failed"
	_download_state["error"] = str(e)
	_download_state["progress"] = f"Failed: {e}"
	logger.error("Model download failed: %s", e)

	finally:
	if ssh:
	try:
	ssh.close()
	except Exception:
	pass
	if pod_id:
	try:
	await asyncio.to_thread(runpod.terminate_pod, pod_id)
	logger.info("Download pod terminated: %s", pod_id)
	except Exception as e:
	logger.warning("Failed to terminate download pod: %s", e)
	_download_state["pod_id"] = None


	@router.post("/stop")
	async def stop_pod():
	"""Stop the GPU pod."""
	_get_api_key()

	if not _pod_state["pod_id"]:
	return {"status": "already_stopped"}

	if _pod_state["status"] == "stopping":
	return {"status": "stopping", "message": "Pod is already stopping"}

	_pod_state["status"] = "stopping"

	try:
	pod_id = _pod_state["pod_id"]
	logger.info("Stopping pod: %s", pod_id)

	await asyncio.to_thread(runpod.terminate_pod, pod_id)

	_pod_state["pod_id"] = None
	_pod_state["ip"] = None
	_pod_state["ssh_port"] = None
	_pod_state["comfyui_port"] = None
	_pod_state["status"] = "stopped"
	_pod_state["started_at"] = None
	_pod_state["setup_status"] = None
	_save_pod_state()

	logger.info("Pod stopped")
	return {"status": "stopped", "message": "Pod terminated"}

	except Exception as e:
	logger.error("Failed to stop pod: %s", e)
	_pod_state["status"] = "running"
	raise HTTPException(500, f"Failed to stop pod: {e}")


	@router.get("/loras")
	async def list_pod_loras():
	"""List LoRAs available on the pod."""
	if _pod_state["status"] != "running" or not _pod_state["ip"]:
	return {"loras": [], "message": "Pod not running"}

	comfyui_url = _get_comfyui_url()
	try:
	import httpx
	async with httpx.AsyncClient(timeout=30) as client:
	url = f"{comfyui_url}/object_info/LoraLoader"
	resp = await client.get(url)
	if resp.status_code == 200:
	data = resp.json()
	loras = data.get("LoraLoader", {}).get("input", {}).get("required", {}).get("lora_name", [[]])[0]
	return {"loras": loras if isinstance(loras, list) else []}
	except Exception as e:
	logger.warning("Failed to list pod LoRAs: %s", e)

	return {"loras": [], "comfyui_url": comfyui_url}


	@router.post("/upload-lora")
	async def upload_lora_to_pod(file: UploadFile = File(...)):
	"""Upload a LoRA file directly to /runpod-volume/loras/ via SFTP so it persists."""
	import paramiko, io

	if _pod_state["status"] != "running":
	raise HTTPException(400, "Pod not running - start it first")

	if not file.filename.endswith(".safetensors"):
	raise HTTPException(400, "Only .safetensors files supported")

	ip = _pod_state.get("ip")
	port = _pod_state.get("ssh_port") or 22
	if not ip:
	raise HTTPException(500, "No SSH IP available")

	content = await file.read()
	dest_path = f"/runpod-volume/loras/{file.filename}"
	comfy_link = f"/workspace/ComfyUI/models/loras/{file.filename}"

	def _sftp_upload():
	client = paramiko.SSHClient()
	client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
	client.connect(ip, port=port, username="root", timeout=30)
	# Ensure dir exists
	client.exec_command("mkdir -p /runpod-volume/loras")[1].read()
	sftp = client.open_sftp()
	sftp.putfo(io.BytesIO(content), dest_path)
	sftp.close()
	# Symlink into ComfyUI
	client.exec_command(f"ln -sf {dest_path} {comfy_link}")[1].read()
	client.close()

	try:
	await asyncio.to_thread(_sftp_upload)
	logger.info("LoRA uploaded to volume: %s (%d bytes)", file.filename, len(content))
	return {"status": "uploaded", "filename": file.filename, "path": dest_path}
	except Exception as e:
	logger.error("LoRA upload failed: %s", e)
	raise HTTPException(500, f"Upload failed: {e}")


	@router.post("/upload-lora-local")
	async def upload_lora_from_local(local_path: str, filename: str \| None = None):
	"""Upload a LoRA from a local server path directly to the volume via SFTP."""
	import paramiko, io
	from pathlib import Path

	if _pod_state["status"] != "running":
	raise HTTPException(400, "Pod not running - start it first")

	src = Path(local_path)
	if not src.exists():
	raise HTTPException(404, f"Local file not found: {local_path}")

	dest_name = filename or src.name
	if not dest_name.endswith(".safetensors"):
	raise HTTPException(400, "Only .safetensors files supported")

	ip = _pod_state.get("ip")
	port = _pod_state.get("ssh_port") or 22
	dest_path = f"/runpod-volume/loras/{dest_name}"
	comfy_link = f"/workspace/ComfyUI/models/loras/{dest_name}"

	def _sftp_upload():
	client = paramiko.SSHClient()
	client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
	client.connect(ip, port=port, username="root", timeout=30)
	client.exec_command("mkdir -p /runpod-volume/loras")[1].read()
	sftp = client.open_sftp()
	sftp.put(str(src), dest_path)
	sftp.close()
	client.exec_command(f"ln -sf {dest_path} {comfy_link}")[1].read()
	client.close()

	try:
	await asyncio.to_thread(_sftp_upload)
	size_mb = src.stat().st_size / 1024 / 1024
	logger.info("LoRA uploaded from local: %s (%.1f MB)", dest_name, size_mb)
	return {"status": "uploaded", "filename": dest_name, "path": dest_path, "size_mb": round(size_mb, 1)}
	except Exception as e:
	logger.error("Local LoRA upload failed: %s", e)
	raise HTTPException(500, f"Upload failed: {e}")


	class PodGenerateRequest(BaseModel):
	prompt: str
	negative_prompt: str = ""
	width: int = 1024
	height: int = 1024
	steps: int = 28
	cfg: float = 3.5
	seed: int = -1
	lora_name: str \| None = None
	lora_strength: float = 0.85
	lora_name_2: str \| None = None
	lora_strength_2: float = 0.85
	character_id: str \| None = None
	template_id: str \| None = None
	content_rating: str = "sfw"


	# In-memory job tracking for pod generation
	_pod_jobs: dict[str, dict] = {}


	@router.post("/generate")
	async def generate_on_pod(request: PodGenerateRequest):
	"""Generate an image using the running pod's ComfyUI."""
	import httpx
	import random

	if _pod_state["status"] != "running":
	raise HTTPException(400, "Pod not running - start it first")

	job_id = str(uuid.uuid4())[:8]
	seed = request.seed if request.seed >= 0 else random.randint(0, 2**32 - 1)

	model_type = _pod_state.get("model_type", "flux2")
	if model_type == "wan22":
	workflow = _build_wan_t2i_workflow(
	prompt=request.prompt,
	negative_prompt=request.negative_prompt,
	width=request.width,
	height=request.height,
	steps=request.steps,
	cfg=request.cfg,
	seed=seed,
	lora_name=request.lora_name,
	lora_strength=request.lora_strength,
	lora_name_2=request.lora_name_2,
	lora_strength_2=request.lora_strength_2,
	)
	else:
	workflow = _build_flux_workflow(
	prompt=request.prompt,
	negative_prompt=request.negative_prompt,
	width=request.width,
	height=request.height,
	steps=request.steps,
	cfg=request.cfg,
	seed=seed,
	lora_name=request.lora_name,
	lora_strength=request.lora_strength,
	model_type=model_type,
	)

	comfyui_url = _get_comfyui_url()

	try:
	async with httpx.AsyncClient(timeout=30) as client:
	resp = await client.post(f"{comfyui_url}/prompt", json={"prompt": workflow})
	resp.raise_for_status()

	data = resp.json()
	prompt_id = data["prompt_id"]

	_pod_jobs[job_id] = {
	"prompt_id": prompt_id,
	"status": "running",
	"seed": seed,
	"created_at": time.time(),
	"started_at": time.time(),
	"positive_prompt": request.prompt,
	"negative_prompt": request.negative_prompt,
	"steps": request.steps,
	"cfg": request.cfg,
	"width": request.width,
	"height": request.height,
	}

	logger.info("Pod generation started: %s -> %s", job_id, prompt_id)
	asyncio.create_task(_poll_pod_job(job_id, prompt_id, request.content_rating))

	return {"job_id": job_id, "status": "running", "seed": seed}

	except Exception as e:
	logger.error("Pod generation failed: %s", e)
	raise HTTPException(500, f"Generation failed: {e}")


	async def _poll_pod_job(job_id: str, prompt_id: str, content_rating: str):
	"""Poll ComfyUI for job completion and save the result."""
	import httpx

	start = time.time()
	timeout = 900 # 15 min — first gen loads model (~12GB) + samples
	comfyui_url = _get_comfyui_url()
	last_log_time = 0

	async with httpx.AsyncClient(timeout=60) as client:
	while time.time() - start < timeout:
	try:
	# Log queue progress every 15 seconds and store in job
	elapsed = time.time() - start
	if elapsed - last_log_time >= 15:
	last_log_time = elapsed
	try:
	q_resp = await client.get(f"{comfyui_url}/queue")
	if q_resp.status_code == 200:
	q_data = q_resp.json()
	running = q_data.get("queue_running", [])
	pending = len(q_data.get("queue_pending", []))
	status_msg = f"{int(elapsed)}s elapsed"
	if running:
	# Try to get node execution progress
	try:
	p_resp = await client.get(f"{comfyui_url}/prompt")
	if p_resp.status_code == 200:
	p_data = p_resp.json()
	exec_info = p_data.get("exec_info", {})
	if exec_info:
	status_msg += f" \| nodes: {exec_info}"
	except Exception:
	pass
	status_msg += " \| generating..."
	elif pending:
	status_msg += " \| loading models..."
	else:
	status_msg += " \| waiting..."
	_pod_jobs[job_id]["progress_msg"] = status_msg
	logger.info("Pod gen %s: %s", job_id, status_msg)
	except Exception:
	pass

	resp = await client.get(f"{comfyui_url}/history/{prompt_id}")

	if resp.status_code == 200:
	data = resp.json()
	if prompt_id in data:
	outputs = data[prompt_id].get("outputs", {})

	for node_id, node_output in outputs.items():
	if "images" in node_output:
	image_info = node_output["images"][0]
	filename = image_info["filename"]
	subfolder = image_info.get("subfolder", "")

	params = {"filename": filename}
	if subfolder:
	params["subfolder"] = subfolder

	img_resp = await client.get(f"{comfyui_url}/view", params=params)
	if img_resp.status_code == 200:
	from content_engine.config import settings
	output_dir = settings.paths.output_dir / "pod" / content_rating / "raw"
	output_dir.mkdir(parents=True, exist_ok=True)

	local_path = output_dir / f"pod_{job_id}.png"
	local_path.write_bytes(img_resp.content)

	_pod_jobs[job_id]["status"] = "completed"
	_pod_jobs[job_id]["output_path"] = str(local_path)
	_pod_jobs[job_id]["completed_at"] = time.time()

	logger.info("Pod generation completed: %s -> %s", job_id, local_path)

	try:
	from content_engine.services.catalog import CatalogService
	catalog = CatalogService()
	job_info = _pod_jobs[job_id]
	await catalog.insert_image(
	file_path=str(local_path),
	image_bytes=img_resp.content,
	content_rating=content_rating,
	positive_prompt=job_info.get("positive_prompt"),
	negative_prompt=job_info.get("negative_prompt"),
	seed=job_info.get("seed"),
	steps=job_info.get("steps"),
	cfg=job_info.get("cfg"),
	width=job_info.get("width"),
	height=job_info.get("height"),
	generation_backend="runpod-pod",
	generation_time_seconds=time.time() - job_info.get("created_at", time.time()),
	)
	logger.info("Pod image cataloged: %s", job_id)
	except Exception as e:
	logger.warning("Failed to catalog pod image: %s", e)

	return

	except Exception as e:
	logger.debug("Polling pod job: %s", e)

	await asyncio.sleep(2)

	_pod_jobs[job_id]["status"] = "failed"
	_pod_jobs[job_id]["error"] = "Timeout waiting for generation"
	logger.error("Pod generation timed out: %s", job_id)


	@router.get("/jobs/{job_id}")
	async def get_pod_job(job_id: str):
	"""Get status of a pod generation job."""
	job = _pod_jobs.get(job_id)
	if not job:
	raise HTTPException(404, "Job not found")
	return job


	@router.get("/jobs/{job_id}/image")
	async def get_pod_job_image(job_id: str):
	"""Serve the generated image for a completed pod job."""
	from fastapi.responses import FileResponse
	job = _pod_jobs.get(job_id)
	if not job:
	raise HTTPException(404, "Job not found")
	output_path = job.get("output_path")
	if not output_path:
	raise HTTPException(404, "No image yet")
	from pathlib import Path
	p = Path(output_path)
	if not p.exists():
	raise HTTPException(404, "Image file not found")
	return FileResponse(p, media_type="image/png")


	def _build_flux_workflow(
	prompt: str,
	negative_prompt: str,
	width: int,
	height: int,
	steps: int,
	cfg: float,
	seed: int,
	lora_name: str \| None,
	lora_strength: float,
	model_type: str = "flux2",
	) -> dict:
	"""Build a ComfyUI workflow for FLUX generation.

	FLUX.2 Dev uses separate model components (not a single checkpoint):
	- UNETLoader for the diffusion model
	- CLIPLoader (type=flux2) for the Mistral text encoder
	- VAELoader for the autoencoder
	"""

	if model_type == "flux2":
	unet_name = "flux2-dev.safetensors"
	clip_type = "flux2"
	clip_name = "mistral_3_small_flux2_fp8.safetensors"
	else:
	unet_name = "flux1-dev.safetensors"
	clip_type = "flux"
	clip_name = "t5xxl_fp16.safetensors"

	# Model node ID references
	model_out = ["1", 0] # UNETLoader -> MODEL
	clip_out = ["2", 0] # CLIPLoader -> CLIP
	vae_out = ["3", 0] # VAELoader -> VAE

	workflow = {
	# Load diffusion model (UNet)
	"1": {
	"class_type": "UNETLoader",
	"inputs": {
	"unet_name": unet_name,
	"weight_dtype": "fp8_e4m3fn",
	},
	},
	# Load text encoder
	"2": {
	"class_type": "CLIPLoader",
	"inputs": {
	"clip_name": clip_name,
	"type": clip_type,
	},
	},
	# Load VAE
	"3": {
	"class_type": "VAELoader",
	"inputs": {"vae_name": "ae.safetensors"},
	},
	# Positive prompt
	"6": {
	"class_type": "CLIPTextEncode",
	"inputs": {
	"text": prompt,
	"clip": clip_out,
	},
	},
	# Negative prompt
	"7": {
	"class_type": "CLIPTextEncode",
	"inputs": {
	"text": negative_prompt or "",
	"clip": clip_out,
	},
	},
	# Empty latent
	"5": {
	"class_type": "EmptyLatentImage",
	"inputs": {
	"width": width,
	"height": height,
	"batch_size": 1,
	},
	},
	# Sampler
	"10": {
	"class_type": "KSampler",
	"inputs": {
	"seed": seed,
	"steps": steps,
	"cfg": cfg,
	"sampler_name": "euler",
	"scheduler": "simple",
	"denoise": 1.0,
	"model": model_out,
	"positive": ["6", 0],
	"negative": ["7", 0],
	"latent_image": ["5", 0],
	},
	},
	# Decode
	"8": {
	"class_type": "VAEDecode",
	"inputs": {
	"samples": ["10", 0],
	"vae": vae_out,
	},
	},
	# Save
	"9": {
	"class_type": "SaveImage",
	"inputs": {
	"filename_prefix": "flux_pod",
	"images": ["8", 0],
	},
	},
	}

	# Add LoRA if specified
	if lora_name:
	workflow["20"] = {
	"class_type": "LoraLoader",
	"inputs": {
	"lora_name": lora_name,
	"strength_model": lora_strength,
	"strength_clip": lora_strength,
	"model": model_out,
	"clip": clip_out,
	},
	}
	# Rewire sampler and text encoders to use LoRA output
	workflow["10"]["inputs"]["model"] = ["20", 0]
	workflow["6"]["inputs"]["clip"] = ["20", 1]
	workflow["7"]["inputs"]["clip"] = ["20", 1]

	return workflow


	def _build_wan_t2i_workflow(
	prompt: str,
	negative_prompt: str,
	width: int,
	height: int,
	steps: int,
	cfg: float,
	seed: int,
	lora_name: str \| None,
	lora_strength: float,
	lora_name_2: str \| None = None,
	lora_strength_2: float = 0.85,
	) -> dict:
	"""Build a ComfyUI workflow for WAN 2.2 Remix — dual-DiT MoE split-step.

	Based on the WAN 2.2 Remix workflow from CivitAI:
	- Two UNETLoaders: high-noise + low-noise Remix models (fp8)
	- wanBlockSwap on both (offloads blocks to CPU for 24GB GPUs)
	- ModelSamplingSD3 with shift=5 on both
	- Dual KSamplerAdvanced: high-noise runs first half, low-noise finishes
	- CLIPLoader (type=wan) + CLIPTextEncode for T5 text encoding
	- Standard VAELoader + VAEDecode
	- EmptyHunyuanLatentVideo for latent (1 frame = image, 81+ = video)
	"""
	high_dit = "wan22_remix_t2v_high_fp8.safetensors"
	low_dit = "wan22_remix_t2v_low_fp8.safetensors"
	t5_name = "umt5_xxl_fp8_e4m3fn_scaled.safetensors"
	vae_name = "wan_2.1_vae.safetensors"

	total_steps = steps # default 8
	split_step = total_steps // 2 # high-noise does first half, low-noise does rest
	shift = 5.0
	block_swap = 20 # blocks offloaded to CPU (0-40, higher = less VRAM)

	workflow = {
	# ── Load high-noise DiT ──
	"1": {
	"class_type": "UNETLoader",
	"inputs": {
	"unet_name": high_dit,
	"weight_dtype": "fp8_e4m3fn",
	},
	},
	# ── Load low-noise DiT ──
	"2": {
	"class_type": "UNETLoader",
	"inputs": {
	"unet_name": low_dit,
	"weight_dtype": "fp8_e4m3fn",
	},
	},
	# ── wanBlockSwap on high-noise (VRAM optimization) ──
	"11": {
	"class_type": "wanBlockSwap",
	"inputs": {
	"model": ["1", 0],
	"blocks_to_swap": block_swap,
	"offload_img_emb": False,
	"offload_txt_emb": False,
	},
	},
	# ── wanBlockSwap on low-noise ──
	"12": {
	"class_type": "wanBlockSwap",
	"inputs": {
	"model": ["2", 0],
	"blocks_to_swap": block_swap,
	"offload_img_emb": False,
	"offload_txt_emb": False,
	},
	},
	# ── ModelSamplingSD3 shift on high-noise ──
	"13": {
	"class_type": "ModelSamplingSD3",
	"inputs": {
	"model": ["11", 0],
	"shift": shift,
	},
	},
	# ── ModelSamplingSD3 shift on low-noise ──
	"14": {
	"class_type": "ModelSamplingSD3",
	"inputs": {
	"model": ["12", 0],
	"shift": shift,
	},
	},
	# ── Load T5 text encoder ──
	"3": {
	"class_type": "CLIPLoader",
	"inputs": {
	"clip_name": t5_name,
	"type": "wan",
	},
	},
	# ── Positive prompt ──
	"6": {
	"class_type": "CLIPTextEncode",
	"inputs": {
	"text": prompt,
	"clip": ["3", 0],
	},
	},
	# ── Negative prompt ──
	"7": {
	"class_type": "CLIPTextEncode",
	"inputs": {
	"text": negative_prompt or "",
	"clip": ["3", 0],
	},
	},
	# ── VAE ──
	"4": {
	"class_type": "VAELoader",
	"inputs": {"vae_name": vae_name},
	},
	# ── Empty latent (1 frame = single image) ──
	"5": {
	"class_type": "EmptyHunyuanLatentVideo",
	"inputs": {
	"width": width,
	"height": height,
	"length": 1,
	"batch_size": 1,
	},
	},
	# ── KSamplerAdvanced #1: High-noise model (first half of steps) ──
	"15": {
	"class_type": "KSamplerAdvanced",
	"inputs": {
	"model": ["13", 0],
	"positive": ["6", 0],
	"negative": ["7", 0],
	"latent_image": ["5", 0],
	"add_noise": "enable",
	"noise_seed": seed,
	"steps": total_steps,
	"cfg": cfg,
	"sampler_name": "euler",
	"scheduler": "simple",
	"start_at_step": 0,
	"end_at_step": split_step,
	"return_with_leftover_noise": "enable",
	},
	},
	# ── KSamplerAdvanced #2: Low-noise model (second half of steps) ──
	"16": {
	"class_type": "KSamplerAdvanced",
	"inputs": {
	"model": ["14", 0],
	"positive": ["6", 0],
	"negative": ["7", 0],
	"latent_image": ["15", 0],
	"add_noise": "disable",
	"noise_seed": seed,
	"steps": total_steps,
	"cfg": cfg,
	"sampler_name": "euler",
	"scheduler": "simple",
	"start_at_step": split_step,
	"end_at_step": 10000,
	"return_with_leftover_noise": "disable",
	},
	},
	# ── VAE Decode ──
	"8": {
	"class_type": "VAEDecode",
	"inputs": {
	"samples": ["16", 0],
	"vae": ["4", 0],
	},
	},
	# ── Save Image ──
	"9": {
	"class_type": "SaveImage",
	"inputs": {
	"filename_prefix": "wan_remix_pod",
	"images": ["8", 0],
	},
	},
	}

	# Add LoRA(s) to both models if specified — chained: DiT → LoRA1 → LoRA2 → Sampler
	if lora_name:
	# LoRA 1 (body) on high-noise and low-noise DiT
	workflow["20"] = {
	"class_type": "LoraLoader",
	"inputs": {
	"lora_name": lora_name,
	"strength_model": lora_strength,
	"strength_clip": 1.0,
	"model": ["13", 0],
	"clip": ["3", 0],
	},
	}
	workflow["21"] = {
	"class_type": "LoraLoader",
	"inputs": {
	"lora_name": lora_name,
	"strength_model": lora_strength,
	"strength_clip": 1.0,
	"model": ["14", 0],
	"clip": ["3", 0],
	},
	}

	# Determine what the samplers and CLIP read from (LoRA2 if present, else LoRA1)
	high_model_out = ["20", 0]
	low_model_out = ["21", 0]
	clip_out = ["20", 1]

	if lora_name_2:
	# LoRA 2 (face) chained after LoRA 1 on both models
	workflow["22"] = {
	"class_type": "LoraLoader",
	"inputs": {
	"lora_name": lora_name_2,
	"strength_model": lora_strength_2,
	"strength_clip": 1.0,
	"model": ["20", 0],
	"clip": ["20", 1],
	},
	}
	workflow["23"] = {
	"class_type": "LoraLoader",
	"inputs": {
	"lora_name": lora_name_2,
	"strength_model": lora_strength_2,
	"strength_clip": 1.0,
	"model": ["21", 0],
	"clip": ["21", 1],
	},
	}
	high_model_out = ["22", 0]
	low_model_out = ["23", 0]
	clip_out = ["22", 1]

	# Rewire samplers and CLIP encoding
	workflow["15"]["inputs"]["model"] = high_model_out
	workflow["16"]["inputs"]["model"] = low_model_out
	workflow["6"]["inputs"]["clip"] = clip_out
	workflow["7"]["inputs"]["clip"] = clip_out

	return workflow