Spaces:

dippoo
/

content-engine

Sleeping

App Files Files

dippoo Claude Sonnet 4.6 commited on Mar 8

Commit

e808ae1

1 Parent(s): 4340a68

Sync all local changes: video routes, pod management, wavespeed, UI updates

Browse files

Files changed (9) hide show

config/models.yaml +29 -0
src/content_engine/api/routes_catalog.py +3 -1
src/content_engine/api/routes_generation.py +10 -1
src/content_engine/api/routes_pod.py +931 -43
src/content_engine/api/routes_ui.py +4 -153
src/content_engine/api/routes_video.py +349 -15
src/content_engine/api/ui.html +362 -72
src/content_engine/services/cloud_providers/wavespeed_provider.py +29 -56
src/content_engine/services/runpod_trainer.py +186 -40

config/models.yaml CHANGED Viewed

@@ -52,6 +52,35 @@ training_models:
     recommended_images: "15-30 high quality photos"
     training_script: "flux_train_network.py"
   # SD 1.5 Realistic Vision - Good balance of quality and speed
   sd15_realistic:
     name: "Realistic Vision V5.1"

     recommended_images: "15-30 high quality photos"
     training_script: "flux_train_network.py"
+  # WAN 2.2 - Text-to-Video LoRA training (14B params, uses musubi-tuner)
+  wan22_t2v:
+    name: "WAN 2.2 T2V (14B)"
+    description: "WAN 2.2 text-to-video model. Trains natural-looking video LoRAs. Requires A100 80GB."
+    model_type: "wan22"
+    training_framework: "musubi-tuner"
+    training_script: "wan_train_network.py"
+    network_module: "networks.lora_wan"
+    resolution: 512
+    learning_rate: 2e-4
+    network_rank: 64
+    network_alpha: 32
+    optimizer: "adamw8bit"
+    lr_scheduler: "constant"
+    timestep_sampling: "shift"
+    discrete_flow_shift: 5.0
+    gradient_checkpointing: true
+    max_train_steps: 2000
+    save_every_n_steps: 500
+    use_case: "images+video"
+    vram_required_gb: 48
+    recommended_gpu: "NVIDIA A100 80GB"
+    recommended_images: "20-50 high quality photos with detailed captions"
+    # Model paths on network volume:
+    # DiT low-noise: /workspace/models/WAN2.2/wan2.2_t2v_low_noise_14B_fp16.safetensors
+    # DiT high-noise: /workspace/models/WAN2.2/wan2.2_t2v_high_noise_14B_fp16.safetensors
+    # VAE: /workspace/models/WAN2.2/Wan2.1_VAE.pth
+    # T5: /workspace/models/WAN2.2/models_t5_umt5-xxl-enc-bf16.pth
   # SD 1.5 Realistic Vision - Good balance of quality and speed
   sd15_realistic:
     name: "Realistic Vision V5.1"

src/content_engine/api/routes_catalog.py CHANGED Viewed

@@ -117,9 +117,11 @@ async def serve_image_file(image_id: str):
     if not file_path.exists():
         raise HTTPException(404, f"Image file not found on disk")
     return FileResponse(
         file_path,
-        media_type="image/png",
         headers={"Cache-Control": "public, max-age=3600"},
     )

     if not file_path.exists():
         raise HTTPException(404, f"Image file not found on disk")
+    ext = file_path.suffix.lower()
+    media_type = "video/mp4" if ext == ".mp4" else "video/webm" if ext == ".webm" else "image/png"
     return FileResponse(
         file_path,
+        media_type=media_type,
         headers={"Cache-Control": "public, max-age=3600"},
     )

src/content_engine/api/routes_generation.py CHANGED Viewed

@@ -169,12 +169,15 @@ async def generate_cloud(request: GenerationRequest):
     job_id = str(uuid.uuid4())
     asyncio.create_task(
         _run_cloud_generation(
             job_id=job_id,
             positive_prompt=request.positive_prompt or "",
             negative_prompt=request.negative_prompt or "",
-            model=request.checkpoint,  # Use checkpoint field for model selection
             width=request.width or 1024,
             height=request.height or 1024,
             seed=request.seed or -1,
@@ -182,6 +185,8 @@ async def generate_cloud(request: GenerationRequest):
             character_id=request.character_id,
             template_id=request.template_id,
             variables=request.variables,
         )
     )
@@ -399,6 +404,8 @@ async def _run_cloud_generation(
     character_id: str | None,
     template_id: str | None,
     variables: dict | None,
 ):
     """Background task to run a WaveSpeed cloud generation."""
     import time
@@ -451,6 +458,8 @@ async def _run_cloud_generation(
             width=width,
             height=height,
             seed=seed,
         )
         # Check if cancelled after API call

     job_id = str(uuid.uuid4())
+    lora_path = request.loras[0].name if request.loras else None
+    lora_strength = request.loras[0].strength_model if request.loras else 0.85
     asyncio.create_task(
         _run_cloud_generation(
             job_id=job_id,
             positive_prompt=request.positive_prompt or "",
             negative_prompt=request.negative_prompt or "",
+            model=request.checkpoint,
             width=request.width or 1024,
             height=request.height or 1024,
             seed=request.seed or -1,
             character_id=request.character_id,
             template_id=request.template_id,
             variables=request.variables,
+            lora_path=lora_path,
+            lora_strength=lora_strength,
         )
     )
     character_id: str | None,
     template_id: str | None,
     variables: dict | None,
+    lora_path: str | None = None,
+    lora_strength: float = 0.85,
 ):
     """Background task to run a WaveSpeed cloud generation."""
     import time
             width=width,
             height=height,
             seed=seed,
+            lora_name=lora_path,
+            lora_strength=lora_strength,
         )
         # Check if cancelled after API call

src/content_engine/api/routes_pod.py CHANGED Viewed

@@ -197,7 +197,9 @@ async def list_model_options():
         "models": {
             "flux2": {"name": "FLUX.2 Dev", "description": "Best for realistic txt2img (requires 48GB+ VRAM)", "use_case": "txt2img"},
             "flux1": {"name": "FLUX.1 Dev", "description": "Previous gen FLUX txt2img", "use_case": "txt2img"},
-            "wan22": {"name": "WAN 2.2", "description": "Image-to-video and general generation", "use_case": "img2video"},
         }
     }
@@ -312,15 +314,45 @@ async def _wait_and_setup_pod(pod_id: str, model_type: str, timeout: int = 600):
     _pod_state["setup_status"] = "Connecting via SSH..."
     import paramiko
-    ssh = paramiko.SSHClient()
-    ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
     for attempt in range(30):
         try:
-            await asyncio.to_thread(
-                ssh.connect, ssh_host, port=int(ssh_port),
-                username="root", password="runpod", timeout=10,
-            )
             break
         except Exception:
             if attempt == 29:
@@ -329,9 +361,6 @@ async def _wait_and_setup_pod(pod_id: str, model_type: str, timeout: int = 600):
                 return
             await asyncio.sleep(5)
-    transport = ssh.get_transport()
-    transport.set_keepalive(30)
     try:
         # Symlink network volume
         volume_id, _ = _get_volume_config()
@@ -404,22 +433,158 @@ print('Text encoder downloaded')
             await _ssh_exec_async(ssh, f"ln -sf /workspace/models/ae.safetensors {comfy_dir}/models/vae/ae.safetensors")
             await _ssh_exec_async(ssh, f"ln -sf /workspace/models/clip_l.safetensors {comfy_dir}/models/text_encoders/clip_l.safetensors")
             await _ssh_exec_async(ssh, f"ln -sf /workspace/models/t5xxl_fp16.safetensors {comfy_dir}/models/text_encoders/t5xxl_fp16.safetensors")
         elif model_type == "wan22":
-            # WAN 2.2 Image-to-Video (14B params)
             wan_dir = "/workspace/models/Wan2.2-I2V-A14B"
             wan_exists = (await _ssh_exec_async(ssh, f"test -d {wan_dir} && echo EXISTS || echo MISSING")).strip()
             if wan_exists != "EXISTS":
-                _pod_state["setup_status"] = "Downloading WAN 2.2 model (~28GB, first time only)..."
                 await _ssh_exec_async(ssh, f"pip install huggingface_hub 2>&1 | tail -1", timeout=60)
                 await _ssh_exec_async(ssh, f"""python -c "
 from huggingface_hub import snapshot_download
 snapshot_download('Wan-AI/Wan2.2-I2V-A14B', local_dir='{wan_dir}', ignore_patterns=['*.md', '*.txt'])
-print('WAN 2.2 downloaded')
 " 2>&1 | tail -10""", timeout=3600)
-            # Symlink WAN model to ComfyUI diffusion_models dir
             await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models")
             await _ssh_exec_async(ssh, f"ln -sf {wan_dir} {comfy_dir}/models/diffusion_models/Wan2.2-I2V-A14B")
-            # Also need a VAE and text encoder for WAN — they use their own
             await _ssh_exec_async(ssh, f"ln -sf {wan_dir} {comfy_dir}/models/checkpoints/Wan2.2-I2V-A14B")
             # Install ComfyUI-WanVideoWrapper custom nodes
@@ -430,8 +595,121 @@ print('WAN 2.2 downloaded')
                 await _ssh_exec_async(ssh, f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-WanVideoWrapper.git", timeout=120)
                 await _ssh_exec_async(ssh, f"cd {wan_nodes_dir} && pip install -r requirements.txt 2>&1 | tail -5", timeout=300)
         # Symlink all LoRAs from volume
-        await _ssh_exec_async(ssh, f"ls /runpod-volume/loras/*.safetensors 2>/dev/null | while read f; do ln -sf \"$f\" {comfy_dir}/models/loras/; done")
         # Start ComfyUI in background (fire-and-forget — don't wait for output)
         _pod_state["setup_status"] = "Starting ComfyUI..."
@@ -500,6 +778,300 @@ def _ssh_exec_fire_and_forget(ssh, cmd: str):
     # Don't read stdout/stderr — just let it run
 @router.post("/stop")
 async def stop_pod():
     """Stop the GPU pod."""
@@ -561,8 +1133,8 @@ async def list_pod_loras():
 @router.post("/upload-lora")
 async def upload_lora_to_pod(file: UploadFile = File(...)):
-    """Upload a LoRA file to the running pod."""
-    import httpx
     if _pod_state["status"] != "running":
         raise HTTPException(400, "Pod not running - start it first")
@@ -570,24 +1142,77 @@ async def upload_lora_to_pod(file: UploadFile = File(...)):
     if not file.filename.endswith(".safetensors"):
         raise HTTPException(400, "Only .safetensors files supported")
     try:
-        content = await file.read()
-        comfyui_url = _get_comfyui_url()
-        async with httpx.AsyncClient(timeout=120) as client:
-            url = f"{comfyui_url}/upload/image"
-            files = {"image": (file.filename, content, "application/octet-stream")}
-            data = {"subfolder": "loras", "type": "input"}
-            resp = await client.post(url, files=files, data=data)
-            if resp.status_code == 200:
-                return {"status": "uploaded", "filename": file.filename}
-            else:
-                raise HTTPException(500, f"Upload failed: {resp.text}")
-    except httpx.TimeoutException:
-        raise HTTPException(504, "Upload timed out")
     except Exception as e:
         raise HTTPException(500, f"Upload failed: {e}")
@@ -601,6 +1226,8 @@ class PodGenerateRequest(BaseModel):
     seed: int = -1
     lora_name: str | None = None
     lora_strength: float = 0.85
     character_id: str | None = None
     template_id: str | None = None
     content_rating: str = "sfw"
@@ -623,18 +1250,33 @@ async def generate_on_pod(request: PodGenerateRequest):
     seed = request.seed if request.seed >= 0 else random.randint(0, 2**32 - 1)
     model_type = _pod_state.get("model_type", "flux2")
-    workflow = _build_flux_workflow(
-        prompt=request.prompt,
-        negative_prompt=request.negative_prompt,
-        width=request.width,
-        height=request.height,
-        steps=request.steps,
-        cfg=request.cfg,
-        seed=seed,
-        lora_name=request.lora_name,
-        lora_strength=request.lora_strength,
-        model_type=model_type,
-    )
     comfyui_url = _get_comfyui_url()
@@ -939,3 +1581,249 @@ def _build_flux_workflow(
         workflow["7"]["inputs"]["clip"] = ["20", 1]
     return workflow

         "models": {
             "flux2": {"name": "FLUX.2 Dev", "description": "Best for realistic txt2img (requires 48GB+ VRAM)", "use_case": "txt2img"},
             "flux1": {"name": "FLUX.1 Dev", "description": "Previous gen FLUX txt2img", "use_case": "txt2img"},
+            "wan22": {"name": "WAN 2.2 Remix", "description": "Realistic generation — dual-DiT MoE split-step (NSFW OK)", "use_case": "txt2img"},
+            "wan22_i2v": {"name": "WAN 2.2 I2V", "description": "Image-to-video generation", "use_case": "img2video"},
+            "wan22_animate": {"name": "WAN 2.2 Animate", "description": "Dance/motion transfer — animate a character from a driving video", "use_case": "animate"},
         }
     }
     _pod_state["setup_status"] = "Connecting via SSH..."
     import paramiko
+    async def _ssh_connect_new() -> "paramiko.SSHClient":
+        """Create a fresh SSH connection to the pod."""
+        client = paramiko.SSHClient()
+        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        for attempt in range(10):
+            try:
+                await asyncio.to_thread(
+                    client.connect, ssh_host, port=int(ssh_port),
+                    username="root", password="runpod", timeout=15,
+                    banner_timeout=30,
+                )
+                client.get_transport().set_keepalive(30)
+                return client
+            except Exception:
+                if attempt == 9:
+                    raise
+                await asyncio.sleep(5)
+        raise RuntimeError("SSH connection failed after retries")
+    async def _ssh_exec_r(cmd: str, timeout: int = 120) -> str:
+        """Execute SSH command, reconnecting once if the session dropped."""
+        nonlocal ssh
+        try:
+            t = ssh.get_transport()
+            if t is None or not t.is_active():
+                logger.info("SSH session dropped, reconnecting...")
+                ssh = await _ssh_connect_new()
+            return await _ssh_exec_async(ssh, cmd, timeout)
+        except Exception as e:
+            if "not active" in str(e).lower() or "session" in str(e).lower():
+                logger.info("SSH error '%s', reconnecting and retrying...", e)
+                ssh = await _ssh_connect_new()
+                return await _ssh_exec_async(ssh, cmd, timeout)
+            raise
     for attempt in range(30):
         try:
+            ssh = await _ssh_connect_new()
             break
         except Exception:
             if attempt == 29:
                 return
             await asyncio.sleep(5)
     try:
         # Symlink network volume
         volume_id, _ = _get_volume_config()
             await _ssh_exec_async(ssh, f"ln -sf /workspace/models/ae.safetensors {comfy_dir}/models/vae/ae.safetensors")
             await _ssh_exec_async(ssh, f"ln -sf /workspace/models/clip_l.safetensors {comfy_dir}/models/text_encoders/clip_l.safetensors")
             await _ssh_exec_async(ssh, f"ln -sf /workspace/models/t5xxl_fp16.safetensors {comfy_dir}/models/text_encoders/t5xxl_fp16.safetensors")
+        elif model_type == "z_image":
+            # Z-Image Turbo — 6B param model by Tongyi-MAI, runs in 16GB VRAM
+            z_dir = "/runpod-volume/models/z_image"
+            await _ssh_exec_async(ssh, f"mkdir -p {z_dir}")
+            await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 | tail -1", timeout=60)
+            # Delete FLUX.2 from volume to free space
+            _pod_state["setup_status"] = "Cleaning up FLUX.2 from volume..."
+            await _ssh_exec_async(ssh, "rm -rf /runpod-volume/models/FLUX.2-dev /runpod-volume/models/mistral_3_small_flux2_fp8.safetensors 2>/dev/null; echo done")
+            # Download diffusion model (~12GB)
+            diff_model = f"{z_dir}/z_image_turbo_bf16.safetensors"
+            exists = (await _ssh_exec_async(ssh, f"test -f {diff_model} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                _pod_state["setup_status"] = "Downloading Z-Image Turbo diffusion model (~12GB)..."
+                await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import shutil, os
+p = hf_hub_download('Comfy-Org/z_image_turbo', 'split_files/diffusion_models/z_image_turbo_bf16.safetensors', local_dir='/tmp/z_image')
+shutil.move(p, '{diff_model}')
+print('Diffusion model downloaded')
+" 2>&1 | tail -5""", timeout=3600)
+            # Download text encoder (~8GB Qwen 3 4B)
+            te_model = f"{z_dir}/qwen_3_4b.safetensors"
+            exists = (await _ssh_exec_async(ssh, f"test -f {te_model} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                _pod_state["setup_status"] = "Downloading Z-Image text encoder (~8GB)..."
+                await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import shutil
+p = hf_hub_download('Comfy-Org/z_image_turbo', 'split_files/text_encoders/qwen_3_4b.safetensors', local_dir='/tmp/z_image')
+shutil.move(p, '{te_model}')
+print('Text encoder downloaded')
+" 2>&1 | tail -5""", timeout=3600)
+            # Download VAE (~335MB)
+            vae_model = f"{z_dir}/ae.safetensors"
+            exists = (await _ssh_exec_async(ssh, f"test -f {vae_model} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                _pod_state["setup_status"] = "Downloading Z-Image VAE..."
+                await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import shutil
+p = hf_hub_download('Comfy-Org/z_image_turbo', 'split_files/vae/ae.safetensors', local_dir='/tmp/z_image')
+shutil.move(p, '{vae_model}')
+print('VAE downloaded')
+" 2>&1 | tail -5""", timeout=600)
+            # Symlink into ComfyUI directories
+            await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models {comfy_dir}/models/text_encoders {comfy_dir}/models/vae")
+            await _ssh_exec_async(ssh, f"ln -sf {diff_model} {comfy_dir}/models/diffusion_models/z_image_turbo_bf16.safetensors")
+            await _ssh_exec_async(ssh, f"ln -sf {te_model} {comfy_dir}/models/text_encoders/qwen_3_4b.safetensors")
+            await _ssh_exec_async(ssh, f"ln -sf {vae_model} {comfy_dir}/models/vae/ae_z_image.safetensors")
         elif model_type == "wan22":
+            # WAN 2.2 Remix NSFW — dual-DiT MoE split-step for realistic generation
+            wan_dir = "/workspace/models/WAN2.2"
+            await _ssh_exec_async(ssh, f"mkdir -p {wan_dir}")
+            civitai_token = os.environ.get("CIVITAI_API_TOKEN", "")
+            token_param = f"&token={civitai_token}" if civitai_token else ""
+            # CivitAI Remix models (fp8 ~14GB each)
+            civitai_models = {
+                "Remix T2V High-noise": {
+                    "path": f"{wan_dir}/wan22_remix_t2v_high_fp8.safetensors",
+                    "url": f"https://civitai.com/api/download/models/2424167?type=Model&format=SafeTensor&size=pruned{token_param}",
+                },
+                "Remix T2V Low-noise": {
+                    "path": f"{wan_dir}/wan22_remix_t2v_low_fp8.safetensors",
+                    "url": f"https://civitai.com/api/download/models/2424912?type=Model&format=SafeTensor&size=pruned{token_param}",
+                },
+            }
+            # HuggingFace models (T5 fp8 ~7GB, VAE ~1GB)
+            hf_models = {
+                "T5 text encoder (fp8)": {
+                    "path": f"{wan_dir}/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+                    "repo": "Comfy-Org/Wan_2.2_ComfyUI_Repackaged",
+                    "filename": "split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors",
+                },
+                "VAE": {
+                    "path": f"{wan_dir}/wan_2.1_vae.safetensors",
+                    "repo": "Comfy-Org/Wan_2.2_ComfyUI_Repackaged",
+                    "filename": "split_files/vae/wan_2.1_vae.safetensors",
+                },
+            }
+            # Download CivitAI Remix models
+            for label, info in civitai_models.items():
+                exists = (await _ssh_exec_async(ssh, f"test -f {info['path']} && echo EXISTS || echo MISSING")).strip()
+                if exists == "EXISTS":
+                    logger.info("WAN 2.2 %s already cached", label)
+                else:
+                    _pod_state["setup_status"] = f"Downloading {label} (~14GB)..."
+                    await _ssh_exec_async(ssh, f"wget -q -O '{info['path']}' '{info['url']}'", timeout=1800)
+                    # Verify download
+                    check = (await _ssh_exec_async(ssh, f"test -f {info['path']} && stat -c%s {info['path']} || echo 0")).strip()
+                    if check == "0" or int(check) < 1000000:
+                        logger.error("Failed to download %s (size: %s). CivitAI API token may be required.", label, check)
+                        _pod_state["setup_status"] = f"Failed: {label} download failed. Set CIVITAI_API_TOKEN env var for NSFW models."
+                        return
+            # Download HuggingFace models
+            await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 | tail -1", timeout=60)
+            for label, info in hf_models.items():
+                exists = (await _ssh_exec_async(ssh, f"test -f {info['path']} && echo EXISTS || echo MISSING")).strip()
+                if exists == "EXISTS":
+                    logger.info("WAN 2.2 %s already cached", label)
+                else:
+                    _pod_state["setup_status"] = f"Downloading {label}..."
+                    await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import os, shutil
+hf_hub_download('{info['repo']}', '{info['filename']}', local_dir='{wan_dir}')
+downloaded = os.path.join('{wan_dir}', '{info['filename']}')
+target = '{info['path']}'
+if os.path.exists(downloaded) and downloaded != target:
+    os.makedirs(os.path.dirname(target), exist_ok=True)
+    shutil.move(downloaded, target)
+print('Downloaded {label}')
+" 2>&1 | tail -5""", timeout=1800)
+            # Symlink models into ComfyUI
+            await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models {comfy_dir}/models/text_encoders")
+            await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/wan22_remix_t2v_high_fp8.safetensors {comfy_dir}/models/diffusion_models/")
+            await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/wan22_remix_t2v_low_fp8.safetensors {comfy_dir}/models/diffusion_models/")
+            await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/wan_2.1_vae.safetensors {comfy_dir}/models/vae/")
+            await _ssh_exec_async(ssh, f"ln -sf {wan_dir}/umt5_xxl_fp8_e4m3fn_scaled.safetensors {comfy_dir}/models/text_encoders/")
+            # Install wanBlockSwap custom node (VRAM optimization for dual-DiT on 24GB GPUs)
+            _pod_state["setup_status"] = "Installing WAN 2.2 custom nodes..."
+            blockswap_dir = f"{comfy_dir}/custom_nodes/ComfyUI-wanBlockswap"
+            blockswap_exists = (await _ssh_exec_async(ssh, f"test -d {blockswap_dir} && echo EXISTS || echo MISSING")).strip()
+            if blockswap_exists != "EXISTS":
+                await _ssh_exec_async(ssh, f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/orssorbit/ComfyUI-wanBlockswap.git", timeout=120)
+        elif model_type == "wan22_i2v":
+            # WAN 2.2 Image-to-Video (14B params) — full model snapshot
             wan_dir = "/workspace/models/Wan2.2-I2V-A14B"
             wan_exists = (await _ssh_exec_async(ssh, f"test -d {wan_dir} && echo EXISTS || echo MISSING")).strip()
             if wan_exists != "EXISTS":
+                _pod_state["setup_status"] = "Downloading WAN 2.2 I2V model (~28GB, first time only)..."
                 await _ssh_exec_async(ssh, f"pip install huggingface_hub 2>&1 | tail -1", timeout=60)
                 await _ssh_exec_async(ssh, f"""python -c "
 from huggingface_hub import snapshot_download
 snapshot_download('Wan-AI/Wan2.2-I2V-A14B', local_dir='{wan_dir}', ignore_patterns=['*.md', '*.txt'])
+print('WAN 2.2 I2V downloaded')
 " 2>&1 | tail -10""", timeout=3600)
             await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models")
             await _ssh_exec_async(ssh, f"ln -sf {wan_dir} {comfy_dir}/models/diffusion_models/Wan2.2-I2V-A14B")
             await _ssh_exec_async(ssh, f"ln -sf {wan_dir} {comfy_dir}/models/checkpoints/Wan2.2-I2V-A14B")
             # Install ComfyUI-WanVideoWrapper custom nodes
                 await _ssh_exec_async(ssh, f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-WanVideoWrapper.git", timeout=120)
                 await _ssh_exec_async(ssh, f"cd {wan_nodes_dir} && pip install -r requirements.txt 2>&1 | tail -5", timeout=300)
+        elif model_type == "wan22_animate":
+            # WAN 2.2 Animate (14B fp8) — dance/motion transfer via pose skeleton
+            animate_dir = "/workspace/models/WAN2.2-Animate"
+            wan22_dir = "/workspace/models/WAN2.2"
+            await _ssh_exec_async(ssh, f"mkdir -p {animate_dir}")
+            await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 | tail -1", timeout=60)
+            # Download main Animate model (~28GB bf16 — only version available)
+            animate_model = f"{animate_dir}/wan2.2_animate_14B_bf16.safetensors"
+            exists = (await _ssh_exec_async(ssh, f"test -f {animate_model} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                _pod_state["setup_status"] = "Downloading WAN 2.2 Animate model (~28GB, first time only)..."
+                await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import os, shutil
+hf_hub_download('Comfy-Org/Wan_2.2_ComfyUI_Repackaged', 'split_files/diffusion_models/wan2.2_animate_14B_bf16.safetensors', local_dir='{animate_dir}')
+src = os.path.join('{animate_dir}', 'split_files', 'diffusion_models', 'wan2.2_animate_14B_bf16.safetensors')
+if os.path.exists(src):
+    shutil.move(src, '{animate_model}')
+print('Animate model downloaded')
+" 2>&1 | tail -5""", timeout=7200)
+            # CLIP Vision H (~2.5GB) — ViT-H vision encoder
+            clip_vision_target = f"{animate_dir}/clip_vision_h.safetensors"
+            exists = (await _ssh_exec_async(ssh, f"test -f {clip_vision_target} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                _pod_state["setup_status"] = "Downloading CLIP Vision H (~2.5GB)..."
+                await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import os, shutil
+result = hf_hub_download('h94/IP-Adapter', 'models/image_encoder/model.safetensors', local_dir='{animate_dir}/tmp_clip')
+shutil.move(result, '{clip_vision_target}')
+shutil.rmtree('{animate_dir}/tmp_clip', ignore_errors=True)
+print('CLIP Vision H downloaded')
+" 2>&1 | tail -5""", timeout=1800)
+            # VAE — reuse from WAN2.2 dir if available, else download (~1GB)
+            vae_target = f"{animate_dir}/wan_2.1_vae.safetensors"
+            exists = (await _ssh_exec_async(ssh, f"test -f {vae_target} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                vae_from_wan22 = (await _ssh_exec_async(ssh, f"test -f {wan22_dir}/wan_2.1_vae.safetensors && echo EXISTS || echo MISSING")).strip()
+                if vae_from_wan22 == "EXISTS":
+                    await _ssh_exec_async(ssh, f"ln -sf {wan22_dir}/wan_2.1_vae.safetensors {vae_target}")
+                else:
+                    _pod_state["setup_status"] = "Downloading VAE (~1GB)..."
+                    await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+import os, shutil
+hf_hub_download('Comfy-Org/Wan_2.2_ComfyUI_Repackaged', 'split_files/vae/wan_2.1_vae.safetensors', local_dir='{animate_dir}')
+src = os.path.join('{animate_dir}', 'split_files', 'vae', 'wan_2.1_vae.safetensors')
+if os.path.exists(src):
+    shutil.move(src, '{vae_target}')
+print('VAE downloaded')
+" 2>&1 | tail -5""", timeout=600)
+            # UMT5 T5 encoder fp8 (non-scaled) — use Kijai/WanVideo_comfy version
+            # which is compatible with LoadWanVideoT5TextEncoder (scaled_fp8 is not supported)
+            t5_filename = "umt5-xxl-enc-fp8_e4m3fn.safetensors"
+            t5_target = f"{animate_dir}/{t5_filename}"
+            t5_comfy_path = f"{comfy_dir}/models/text_encoders/{t5_filename}"
+            t5_in_comfy = (await _ssh_exec_async(ssh, f"test -f {t5_comfy_path} && echo EXISTS || echo MISSING")).strip()
+            t5_in_vol = (await _ssh_exec_async(ssh, f"test -f {t5_target} && echo EXISTS || echo MISSING")).strip()
+            if t5_in_comfy != "EXISTS" and t5_in_vol != "EXISTS":
+                _pod_state["setup_status"] = "Downloading UMT5 text encoder (~6.3GB, first time only)..."
+                await _ssh_exec_async(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+hf_hub_download('Kijai/WanVideo_comfy', '{t5_filename}', local_dir='{animate_dir}')
+print('UMT5 text encoder downloaded')
+" 2>&1 | tail -5""", timeout=1800)
+                t5_in_vol = "EXISTS"
+            # Symlink models into ComfyUI directories
+            await _ssh_exec_async(ssh, f"mkdir -p {comfy_dir}/models/diffusion_models {comfy_dir}/models/vae {comfy_dir}/models/clip_vision {comfy_dir}/models/text_encoders")
+            await _ssh_exec_async(ssh, f"ln -sf {animate_model} {comfy_dir}/models/diffusion_models/")
+            await _ssh_exec_async(ssh, f"ln -sf {vae_target} {comfy_dir}/models/vae/")
+            await _ssh_exec_async(ssh, f"ln -sf {clip_vision_target} {comfy_dir}/models/clip_vision/")
+            if t5_in_vol == "EXISTS" and t5_in_comfy != "EXISTS":
+                await _ssh_exec_async(ssh, f"ln -sf {t5_target} {t5_comfy_path}")
+            # Reconnect SSH before custom node setup — connection may have dropped during long downloads
+            ssh = await _ssh_connect_new()
+            # Install required custom nodes
+            _pod_state["setup_status"] = "Installing WAN Animate custom nodes..."
+            # ComfyUI-WanVideoWrapper (WanVideoAnimateEmbeds, WanVideoSampler, etc.)
+            wan_nodes_dir = f"{comfy_dir}/custom_nodes/ComfyUI-WanVideoWrapper"
+            exists = (await _ssh_exec_r(f"test -d {wan_nodes_dir} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-WanVideoWrapper.git", timeout=120)
+                await _ssh_exec_r(f"cd {wan_nodes_dir} && pip install -r requirements.txt 2>&1 | tail -5", timeout=300)
+            # ComfyUI-VideoHelperSuite (VHS_LoadVideo, VHS_VideoCombine)
+            vhs_dir = f"{comfy_dir}/custom_nodes/ComfyUI-VideoHelperSuite"
+            exists = (await _ssh_exec_r(f"test -d {vhs_dir} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/Kosinkadink/ComfyUI-VideoHelperSuite.git", timeout=120)
+                await _ssh_exec_r(f"cd {vhs_dir} && pip install -r requirements.txt 2>&1 | tail -5", timeout=300)
+            # comfyui_controlnet_aux (DWPreprocessor for pose extraction)
+            aux_dir = f"{comfy_dir}/custom_nodes/comfyui_controlnet_aux"
+            exists = (await _ssh_exec_r(f"test -d {aux_dir} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/Fannovel16/comfyui_controlnet_aux.git", timeout=120)
+                await _ssh_exec_r(f"cd {aux_dir} && pip install -r requirements.txt 2>&1 | tail -5", timeout=300)
+            # ComfyUI-KJNodes (ImageResizeKJv2 used in animate workflow)
+            kj_dir = f"{comfy_dir}/custom_nodes/ComfyUI-KJNodes"
+            exists = (await _ssh_exec_r(f"test -d {kj_dir} && echo EXISTS || echo MISSING")).strip()
+            if exists != "EXISTS":
+                await _ssh_exec_r(f"cd {comfy_dir}/custom_nodes && git clone --depth 1 https://github.com/kijai/ComfyUI-KJNodes.git", timeout=120)
+                await _ssh_exec_r(f"cd {kj_dir} && pip install -r requirements.txt 2>&1 | tail -5", timeout=300)
         # Symlink all LoRAs from volume
+        await _ssh_exec_r(f"ls /runpod-volume/loras/*.safetensors 2>/dev/null | while read f; do ln -sf \"$f\" {comfy_dir}/models/loras/; done")
         # Start ComfyUI in background (fire-and-forget — don't wait for output)
         _pod_state["setup_status"] = "Starting ComfyUI..."
     # Don't read stdout/stderr — just let it run
+# --- Pre-download models to network volume (saves money during training) ---
+_download_state = {
+    "status": "idle",  # idle, downloading, completed, failed
+    "pod_id": None,
+    "progress": "",
+    "error": None,
+}
+class DownloadModelsRequest(BaseModel):
+    model_type: str = "wan22"
+    gpu_type: str = "NVIDIA GeForce RTX 3090"  # Cheapest GPU, just for downloading
+@router.post("/download-models")
+async def download_models_to_volume(request: DownloadModelsRequest):
+    """Pre-download model files to network volume using a cheap pod.
+    This saves expensive GPU time during training — models are cached on the
+    shared volume and reused across all future training/generation pods.
+    """
+    _get_api_key()
+    volume_id, volume_dc = _get_volume_config()
+    if not volume_id:
+        raise HTTPException(400, "No network volume configured (set RUNPOD_VOLUME_ID)")
+    if _download_state["status"] == "downloading":
+        return {"status": "already_downloading", "progress": _download_state["progress"]}
+    _download_state["status"] = "downloading"
+    _download_state["progress"] = "Creating cheap download pod..."
+    _download_state["error"] = None
+    asyncio.create_task(_download_models_task(request.model_type, request.gpu_type, volume_id, volume_dc))
+    return {"status": "started", "message": f"Downloading {request.model_type} models to volume (using {request.gpu_type})"}
+@router.get("/download-models/status")
+async def download_models_status():
+    """Check model download progress."""
+    return _download_state
+async def _download_models_task(model_type: str, gpu_type: str, volume_id: str, volume_dc: str):
+    """Background task: spin up cheap pod, download models, terminate."""
+    import paramiko
+    ssh = None
+    pod_id = None
+    try:
+        # Create cheap pod with network volume — try multiple GPU types if first unavailable
+        pod_kwargs = {
+            "container_disk_in_gb": 10,
+            "ports": "22/tcp",
+            "network_volume_id": volume_id,
+            "docker_args": "bash -c 'apt-get update && apt-get install -y openssh-server && mkdir -p /run/sshd && echo root:runpod | chpasswd && /usr/sbin/sshd -o PermitRootLogin=yes && sleep infinity'",
+        }
+        if volume_dc:
+            pod_kwargs["data_center_id"] = volume_dc
+        gpu_fallbacks = [
+            gpu_type,
+            "NVIDIA RTX A4000",
+            "NVIDIA RTX A5000",
+            "NVIDIA GeForce RTX 4090",
+            "NVIDIA GeForce RTX 4080",
+            "NVIDIA A100-SXM4-80GB",
+        ]
+        pod = None
+        used_gpu = gpu_type
+        for try_gpu in gpu_fallbacks:
+            try:
+                pod = await asyncio.to_thread(
+                    runpod.create_pod,
+                    f"model-download-{model_type}",
+                    DOCKER_IMAGE,
+                    try_gpu,
+                    **pod_kwargs,
+                )
+                used_gpu = try_gpu
+                logger.info("Download pod created with %s", try_gpu)
+                break
+            except Exception as e:
+                if "SUPPLY_CONSTRAINT" in str(e) or "no longer any instances" in str(e).lower():
+                    logger.info("GPU %s unavailable, trying next...", try_gpu)
+                    continue
+                raise
+        if pod is None:
+            raise RuntimeError("No GPU available for download pod. Try again later.")
+        pod_id = pod["id"]
+        _download_state["pod_id"] = pod_id
+        _download_state["progress"] = f"Pod created with {used_gpu} ({pod_id}), waiting for SSH..."
+        # Wait for SSH
+        ssh_host = ssh_port = None
+        start = time.time()
+        while time.time() - start < 300:
+            try:
+                p = await asyncio.to_thread(runpod.get_pod, pod_id)
+                if p and p.get("desiredStatus") == "RUNNING":
+                    for port in (p.get("runtime") or {}).get("ports") or []:
+                        if port.get("privatePort") == 22:
+                            ssh_host = port.get("ip")
+                            ssh_port = port.get("publicPort")
+                    if ssh_host and ssh_port:
+                        break
+            except Exception:
+                pass
+            await asyncio.sleep(5)
+        if not ssh_host:
+            raise RuntimeError("Pod SSH not available after 5 min")
+        # Connect SSH
+        ssh = paramiko.SSHClient()
+        ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        for attempt in range(20):
+            try:
+                await asyncio.to_thread(ssh.connect, ssh_host, port=int(ssh_port), username="root", password="runpod", timeout=10)
+                break
+            except Exception:
+                if attempt == 19:
+                    raise RuntimeError("SSH connection failed after 20 attempts")
+                await asyncio.sleep(5)
+        ssh.get_transport().set_keepalive(30)
+        _download_state["progress"] = "SSH connected, setting up tools..."
+        # Symlink volume
+        await _ssh_exec_async(ssh, "mkdir -p /runpod-volume/models && rm -rf /workspace/models 2>/dev/null; ln -sf /runpod-volume/models /workspace/models")
+        await _ssh_exec_async(ssh, "pip install huggingface_hub 2>&1 | tail -1", timeout=120)
+        await _ssh_exec_async(ssh, "which aria2c || apt-get install -y aria2 2>&1 | tail -1", timeout=120)
+        if model_type == "wan22":
+            wan_dir = "/workspace/models/WAN2.2"
+            await _ssh_exec_async(ssh, f"mkdir -p {wan_dir}")
+            civitai_token = os.environ.get("CIVITAI_API_TOKEN", "")
+            token_param = f"&token={civitai_token}" if civitai_token else ""
+            # CivitAI Remix models (fp8)
+            civitai_files = [
+                ("Remix T2V High-noise", f"https://civitai.com/api/download/models/2424167?type=Model&format=SafeTensor&size=pruned{token_param}", f"{wan_dir}/wan22_remix_t2v_high_fp8.safetensors"),
+                ("Remix T2V Low-noise", f"https://civitai.com/api/download/models/2424912?type=Model&format=SafeTensor&size=pruned{token_param}", f"{wan_dir}/wan22_remix_t2v_low_fp8.safetensors"),
+            ]
+            # HuggingFace models
+            hf_files = [
+                ("T5 text encoder (fp8)", "Comfy-Org/Wan_2.2_ComfyUI_Repackaged", "split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", f"{wan_dir}/umt5_xxl_fp8_e4m3fn_scaled.safetensors"),
+                ("VAE", "Comfy-Org/Wan_2.2_ComfyUI_Repackaged", "split_files/vae/wan_2.1_vae.safetensors", f"{wan_dir}/wan_2.1_vae.safetensors"),
+            ]
+            total = len(civitai_files) + len(hf_files)
+            idx = 0
+            for label, url, target in civitai_files:
+                idx += 1
+                exists = (await _ssh_exec_async(ssh, f"test -f {target} && echo EXISTS || echo MISSING")).strip()
+                if exists == "EXISTS":
+                    _download_state["progress"] = f"[{idx}/{total}] {label} already cached"
+                    logger.info("WAN 2.2 %s already on volume", label)
+                else:
+                    _download_state["progress"] = f"[{idx}/{total}] Downloading {label} (~14GB)..."
+                    await _ssh_exec_async(ssh, f"wget -q -O '{target}' '{url}'", timeout=1800)
+                    check = (await _ssh_exec_async(ssh, f"test -f {target} && stat -c%s {target} || echo 0")).strip()
+                    if check == "0" or int(check) < 1000000:
+                        raise RuntimeError(f"Failed to download {label}. Set CIVITAI_API_TOKEN for NSFW models.")
+                    _download_state["progress"] = f"[{idx}/{total}] {label} downloaded"
+            for label, repo, filename, target in hf_files:
+                idx += 1
+                exists = (await _ssh_exec_async(ssh, f"test -f {target} && echo EXISTS || echo MISSING")).strip()
+                if exists == "EXISTS":
+                    _download_state["progress"] = f"[{idx}/{total}] {label} already cached"
+                    logger.info("WAN 2.2 %s already on volume", label)
+                else:
+                    _download_state["progress"] = f"[{idx}/{total}] Downloading {label}..."
+                    hf_url = f"https://huggingface.co/{repo}/resolve/main/{filename}"
+                    fname = target.split("/")[-1]
+                    tdir = "/".join(target.split("/")[:-1])
+                    await _ssh_exec_async(ssh, f"aria2c -x 16 -s 16 -c -o '{fname}' --dir='{tdir}' '{hf_url}' 2>&1 | tail -3", timeout=1800)
+                    check = (await _ssh_exec_async(ssh, f"test -f {target} && echo EXISTS || echo MISSING")).strip()
+                    if check != "EXISTS":
+                        raise RuntimeError(f"Failed to download {label}")
+                    _download_state["progress"] = f"[{idx}/{total}] {label} downloaded"
+            # Also pre-clone musubi-tuner to volume (for training)
+            _download_state["progress"] = "Caching musubi-tuner to volume..."
+            tuner_exists = (await _ssh_exec_async(ssh, "test -f /runpod-volume/musubi-tuner/pyproject.toml && echo EXISTS || echo MISSING")).strip()
+            if tuner_exists != "EXISTS":
+                await _ssh_exec_async(ssh, "cd /workspace && git clone --depth 1 https://github.com/kohya-ss/musubi-tuner.git && cp -r /workspace/musubi-tuner /runpod-volume/musubi-tuner", timeout=300)
+                _download_state["progress"] = "musubi-tuner cached"
+            else:
+                _download_state["progress"] = "musubi-tuner already cached"
+        elif model_type == "wan22_animate":
+            animate_dir = "/workspace/models/WAN2.2-Animate"
+            wan22_dir = "/workspace/models/WAN2.2"
+            hf_base = "https://huggingface.co"
+            await _ssh_exec_async(ssh, f"mkdir -p {animate_dir}")
+            # Files to download: (label, url, target, timeout_s, min_bytes)
+            wget_files = [
+                (
+                    "WAN 2.2 Animate model (~32GB)",
+                    f"{hf_base}/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_animate_14B_bf16.safetensors",
+                    f"{animate_dir}/wan2.2_animate_14B_bf16.safetensors",
+                    7200,
+                    30_000_000_000,  # 30GB min — partial downloads get resumed
+                ),
+                (
+                    "UMT5 text encoder fp8 (~6.3GB)",
+                    f"{hf_base}/Kijai/WanVideo_comfy/resolve/main/umt5-xxl-enc-fp8_e4m3fn.safetensors",
+                    f"{animate_dir}/umt5-xxl-enc-fp8_e4m3fn.safetensors",
+                    1800,
+                    6_000_000_000,
+                ),
+                (
+                    "VAE (~242MB)",
+                    f"{hf_base}/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors",
+                    f"{animate_dir}/wan_2.1_vae.safetensors",
+                    300,
+                    200_000_000,
+                ),
+                (
+                    "CLIP Vision H (~2.4GB)",
+                    f"{hf_base}/h94/IP-Adapter/resolve/main/models/image_encoder/model.safetensors",
+                    f"{animate_dir}/clip_vision_h.safetensors",
+                    900,
+                    2_000_000_000,
+                ),
+            ]
+            total = len(wget_files)
+            for idx, (label, url, target, dl_timeout, min_bytes) in enumerate(wget_files, 1):
+                # For T5 and VAE, reuse from wan22 dir if already present (and complete)
+                wan22_candidate = f"{wan22_dir}/{target.split('/')[-1]}"
+                reused = False
+                if label in ("UMT5 text encoder fp8 (~6.3GB)", "VAE (~1GB)"):
+                    wan22_size = (await _ssh_exec_async(ssh, f"stat -c%s {wan22_candidate} 2>/dev/null || echo 0")).strip()
+                    if int(wan22_size) >= min_bytes:
+                        _download_state["progress"] = f"[{idx}/{total}] {label} — reusing from WAN2.2 dir"
+                        await _ssh_exec_async(ssh, f"ln -sf {wan22_candidate} {target} 2>/dev/null || cp {wan22_candidate} {target}")
+                        reused = True
+                if not reused:
+                    size_str = (await _ssh_exec_async(ssh, f"stat -c%s {target} 2>/dev/null || echo 0")).strip()
+                    if int(size_str) >= min_bytes:
+                        _download_state["progress"] = f"[{idx}/{total}] {label} already cached"
+                    else:
+                        _download_state["progress"] = f"[{idx}/{total}] Downloading {label}..."
+                        filename = target.split("/")[-1]
+                        target_dir = "/".join(target.split("/")[:-1])
+                        # Remove stale symlinks before downloading (can't resume through a symlink)
+                        await _ssh_exec_async(ssh, f"test -L '{target}' && rm -f '{target}'; true")
+                        await _ssh_exec_async(
+                            ssh,
+                            f"aria2c -x 16 -s 16 -c -o '{filename}' --dir='{target_dir}' '{url}' 2>&1 | tail -3",
+                            timeout=dl_timeout,
+                        )
+                        size_str = (await _ssh_exec_async(ssh, f"stat -c%s {target} 2>/dev/null || echo 0")).strip()
+                        if int(size_str) < min_bytes:
+                            raise RuntimeError(f"Failed to download {label} (size {size_str} < {min_bytes})")
+                        _download_state["progress"] = f"[{idx}/{total}] {label} downloaded"
+        _download_state["status"] = "completed"
+        _download_state["progress"] = "All models downloaded to volume! Ready for training."
+        logger.info("Model pre-download complete for %s", model_type)
+    except Exception as e:
+        _download_state["status"] = "failed"
+        _download_state["error"] = str(e)
+        _download_state["progress"] = f"Failed: {e}"
+        logger.error("Model download failed: %s", e)
+    finally:
+        if ssh:
+            try:
+                ssh.close()
+            except Exception:
+                pass
+        if pod_id:
+            try:
+                await asyncio.to_thread(runpod.terminate_pod, pod_id)
+                logger.info("Download pod terminated: %s", pod_id)
+            except Exception as e:
+                logger.warning("Failed to terminate download pod: %s", e)
+            _download_state["pod_id"] = None
 @router.post("/stop")
 async def stop_pod():
     """Stop the GPU pod."""
 @router.post("/upload-lora")
 async def upload_lora_to_pod(file: UploadFile = File(...)):
+    """Upload a LoRA file directly to /runpod-volume/loras/ via SFTP so it persists."""
+    import paramiko, io
     if _pod_state["status"] != "running":
         raise HTTPException(400, "Pod not running - start it first")
     if not file.filename.endswith(".safetensors"):
         raise HTTPException(400, "Only .safetensors files supported")
+    ip = _pod_state.get("ip")
+    port = _pod_state.get("ssh_port") or 22
+    if not ip:
+        raise HTTPException(500, "No SSH IP available")
+    content = await file.read()
+    dest_path = f"/runpod-volume/loras/{file.filename}"
+    comfy_link = f"/workspace/ComfyUI/models/loras/{file.filename}"
+    def _sftp_upload():
+        client = paramiko.SSHClient()
+        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        client.connect(ip, port=port, username="root", timeout=30)
+        # Ensure dir exists
+        client.exec_command("mkdir -p /runpod-volume/loras")[1].read()
+        sftp = client.open_sftp()
+        sftp.putfo(io.BytesIO(content), dest_path)
+        sftp.close()
+        # Symlink into ComfyUI
+        client.exec_command(f"ln -sf {dest_path} {comfy_link}")[1].read()
+        client.close()
     try:
+        await asyncio.to_thread(_sftp_upload)
+        logger.info("LoRA uploaded to volume: %s (%d bytes)", file.filename, len(content))
+        return {"status": "uploaded", "filename": file.filename, "path": dest_path}
+    except Exception as e:
+        logger.error("LoRA upload failed: %s", e)
+        raise HTTPException(500, f"Upload failed: {e}")
+@router.post("/upload-lora-local")
+async def upload_lora_from_local(local_path: str, filename: str | None = None):
+    """Upload a LoRA from a local server path directly to the volume via SFTP."""
+    import paramiko, io
+    from pathlib import Path
+    if _pod_state["status"] != "running":
+        raise HTTPException(400, "Pod not running - start it first")
+    src = Path(local_path)
+    if not src.exists():
+        raise HTTPException(404, f"Local file not found: {local_path}")
+    dest_name = filename or src.name
+    if not dest_name.endswith(".safetensors"):
+        raise HTTPException(400, "Only .safetensors files supported")
+    ip = _pod_state.get("ip")
+    port = _pod_state.get("ssh_port") or 22
+    dest_path = f"/runpod-volume/loras/{dest_name}"
+    comfy_link = f"/workspace/ComfyUI/models/loras/{dest_name}"
+    def _sftp_upload():
+        client = paramiko.SSHClient()
+        client.set_missing_host_key_policy(paramiko.AutoAddPolicy())
+        client.connect(ip, port=port, username="root", timeout=30)
+        client.exec_command("mkdir -p /runpod-volume/loras")[1].read()
+        sftp = client.open_sftp()
+        sftp.put(str(src), dest_path)
+        sftp.close()
+        client.exec_command(f"ln -sf {dest_path} {comfy_link}")[1].read()
+        client.close()
+    try:
+        await asyncio.to_thread(_sftp_upload)
+        size_mb = src.stat().st_size / 1024 / 1024
+        logger.info("LoRA uploaded from local: %s (%.1f MB)", dest_name, size_mb)
+        return {"status": "uploaded", "filename": dest_name, "path": dest_path, "size_mb": round(size_mb, 1)}
     except Exception as e:
+        logger.error("Local LoRA upload failed: %s", e)
         raise HTTPException(500, f"Upload failed: {e}")
     seed: int = -1
     lora_name: str | None = None
     lora_strength: float = 0.85
+    lora_name_2: str | None = None
+    lora_strength_2: float = 0.85
     character_id: str | None = None
     template_id: str | None = None
     content_rating: str = "sfw"
     seed = request.seed if request.seed >= 0 else random.randint(0, 2**32 - 1)
     model_type = _pod_state.get("model_type", "flux2")
+    if model_type == "wan22":
+        workflow = _build_wan_t2i_workflow(
+            prompt=request.prompt,
+            negative_prompt=request.negative_prompt,
+            width=request.width,
+            height=request.height,
+            steps=request.steps,
+            cfg=request.cfg,
+            seed=seed,
+            lora_name=request.lora_name,
+            lora_strength=request.lora_strength,
+            lora_name_2=request.lora_name_2,
+            lora_strength_2=request.lora_strength_2,
+        )
+    else:
+        workflow = _build_flux_workflow(
+            prompt=request.prompt,
+            negative_prompt=request.negative_prompt,
+            width=request.width,
+            height=request.height,
+            steps=request.steps,
+            cfg=request.cfg,
+            seed=seed,
+            lora_name=request.lora_name,
+            lora_strength=request.lora_strength,
+            model_type=model_type,
+        )
     comfyui_url = _get_comfyui_url()
         workflow["7"]["inputs"]["clip"] = ["20", 1]
     return workflow
+def _build_wan_t2i_workflow(
+    prompt: str,
+    negative_prompt: str,
+    width: int,
+    height: int,
+    steps: int,
+    cfg: float,
+    seed: int,
+    lora_name: str | None,
+    lora_strength: float,
+    lora_name_2: str | None = None,
+    lora_strength_2: float = 0.85,
+) -> dict:
+    """Build a ComfyUI workflow for WAN 2.2 Remix — dual-DiT MoE split-step.
+    Based on the WAN 2.2 Remix workflow from CivitAI:
+    - Two UNETLoaders: high-noise + low-noise Remix models (fp8)
+    - wanBlockSwap on both (offloads blocks to CPU for 24GB GPUs)
+    - ModelSamplingSD3 with shift=5 on both
+    - Dual KSamplerAdvanced: high-noise runs first half, low-noise finishes
+    - CLIPLoader (type=wan) + CLIPTextEncode for T5 text encoding
+    - Standard VAELoader + VAEDecode
+    - EmptyHunyuanLatentVideo for latent (1 frame = image, 81+ = video)
+    """
+    high_dit = "wan22_remix_t2v_high_fp8.safetensors"
+    low_dit = "wan22_remix_t2v_low_fp8.safetensors"
+    t5_name = "umt5_xxl_fp8_e4m3fn_scaled.safetensors"
+    vae_name = "wan_2.1_vae.safetensors"
+    total_steps = steps  # default 8
+    split_step = total_steps // 2  # high-noise does first half, low-noise does rest
+    shift = 5.0
+    block_swap = 20  # blocks offloaded to CPU (0-40, higher = less VRAM)
+    workflow = {
+        # ── Load high-noise DiT ──
+        "1": {
+            "class_type": "UNETLoader",
+            "inputs": {
+                "unet_name": high_dit,
+                "weight_dtype": "fp8_e4m3fn",
+            },
+        },
+        # ── Load low-noise DiT ──
+        "2": {
+            "class_type": "UNETLoader",
+            "inputs": {
+                "unet_name": low_dit,
+                "weight_dtype": "fp8_e4m3fn",
+            },
+        },
+        # ── wanBlockSwap on high-noise (VRAM optimization) ──
+        "11": {
+            "class_type": "wanBlockSwap",
+            "inputs": {
+                "model": ["1", 0],
+                "blocks_to_swap": block_swap,
+                "offload_img_emb": False,
+                "offload_txt_emb": False,
+            },
+        },
+        # ── wanBlockSwap on low-noise ──
+        "12": {
+            "class_type": "wanBlockSwap",
+            "inputs": {
+                "model": ["2", 0],
+                "blocks_to_swap": block_swap,
+                "offload_img_emb": False,
+                "offload_txt_emb": False,
+            },
+        },
+        # ── ModelSamplingSD3 shift on high-noise ──
+        "13": {
+            "class_type": "ModelSamplingSD3",
+            "inputs": {
+                "model": ["11", 0],
+                "shift": shift,
+            },
+        },
+        # ── ModelSamplingSD3 shift on low-noise ──
+        "14": {
+            "class_type": "ModelSamplingSD3",
+            "inputs": {
+                "model": ["12", 0],
+                "shift": shift,
+            },
+        },
+        # ── Load T5 text encoder ──
+        "3": {
+            "class_type": "CLIPLoader",
+            "inputs": {
+                "clip_name": t5_name,
+                "type": "wan",
+            },
+        },
+        # ── Positive prompt ──
+        "6": {
+            "class_type": "CLIPTextEncode",
+            "inputs": {
+                "text": prompt,
+                "clip": ["3", 0],
+            },
+        },
+        # ── Negative prompt ──
+        "7": {
+            "class_type": "CLIPTextEncode",
+            "inputs": {
+                "text": negative_prompt or "",
+                "clip": ["3", 0],
+            },
+        },
+        # ── VAE ──
+        "4": {
+            "class_type": "VAELoader",
+            "inputs": {"vae_name": vae_name},
+        },
+        # ── Empty latent (1 frame = single image) ──
+        "5": {
+            "class_type": "EmptyHunyuanLatentVideo",
+            "inputs": {
+                "width": width,
+                "height": height,
+                "length": 1,
+                "batch_size": 1,
+            },
+        },
+        # ── KSamplerAdvanced #1: High-noise model (first half of steps) ──
+        "15": {
+            "class_type": "KSamplerAdvanced",
+            "inputs": {
+                "model": ["13", 0],
+                "positive": ["6", 0],
+                "negative": ["7", 0],
+                "latent_image": ["5", 0],
+                "add_noise": "enable",
+                "noise_seed": seed,
+                "steps": total_steps,
+                "cfg": cfg,
+                "sampler_name": "euler",
+                "scheduler": "simple",
+                "start_at_step": 0,
+                "end_at_step": split_step,
+                "return_with_leftover_noise": "enable",
+            },
+        },
+        # ── KSamplerAdvanced #2: Low-noise model (second half of steps) ──
+        "16": {
+            "class_type": "KSamplerAdvanced",
+            "inputs": {
+                "model": ["14", 0],
+                "positive": ["6", 0],
+                "negative": ["7", 0],
+                "latent_image": ["15", 0],
+                "add_noise": "disable",
+                "noise_seed": seed,
+                "steps": total_steps,
+                "cfg": cfg,
+                "sampler_name": "euler",
+                "scheduler": "simple",
+                "start_at_step": split_step,
+                "end_at_step": 10000,
+                "return_with_leftover_noise": "disable",
+            },
+        },
+        # ── VAE Decode ──
+        "8": {
+            "class_type": "VAEDecode",
+            "inputs": {
+                "samples": ["16", 0],
+                "vae": ["4", 0],
+            },
+        },
+        # ── Save Image ──
+        "9": {
+            "class_type": "SaveImage",
+            "inputs": {
+                "filename_prefix": "wan_remix_pod",
+                "images": ["8", 0],
+            },
+        },
+    }
+    # Add LoRA(s) to both models if specified — chained: DiT → LoRA1 → LoRA2 → Sampler
+    if lora_name:
+        # LoRA 1 (body) on high-noise and low-noise DiT
+        workflow["20"] = {
+            "class_type": "LoraLoader",
+            "inputs": {
+                "lora_name": lora_name,
+                "strength_model": lora_strength,
+                "strength_clip": 1.0,
+                "model": ["13", 0],
+                "clip": ["3", 0],
+            },
+        }
+        workflow["21"] = {
+            "class_type": "LoraLoader",
+            "inputs": {
+                "lora_name": lora_name,
+                "strength_model": lora_strength,
+                "strength_clip": 1.0,
+                "model": ["14", 0],
+                "clip": ["3", 0],
+            },
+        }
+        # Determine what the samplers and CLIP read from (LoRA2 if present, else LoRA1)
+        high_model_out = ["20", 0]
+        low_model_out = ["21", 0]
+        clip_out = ["20", 1]
+        if lora_name_2:
+            # LoRA 2 (face) chained after LoRA 1 on both models
+            workflow["22"] = {
+                "class_type": "LoraLoader",
+                "inputs": {
+                    "lora_name": lora_name_2,
+                    "strength_model": lora_strength_2,
+                    "strength_clip": 1.0,
+                    "model": ["20", 0],
+                    "clip": ["20", 1],
+                },
+            }
+            workflow["23"] = {
+                "class_type": "LoraLoader",
+                "inputs": {
+                    "lora_name": lora_name_2,
+                    "strength_model": lora_strength_2,
+                    "strength_clip": 1.0,
+                    "model": ["21", 0],
+                    "clip": ["21", 1],
+                },
+            }
+            high_model_out = ["22", 0]
+            low_model_out = ["23", 0]
+            clip_out = ["22", 1]
+        # Rewire samplers and CLIP encoding
+        workflow["15"]["inputs"]["model"] = high_model_out
+        workflow["16"]["inputs"]["model"] = low_model_out
+        workflow["6"]["inputs"]["clip"] = clip_out
+        workflow["7"]["inputs"]["clip"] = clip_out
+    return workflow

src/content_engine/api/routes_ui.py CHANGED Viewed

@@ -1,172 +1,23 @@
-"""Web UI route — serves the single-page dashboard with password protection."""
 from __future__ import annotations
-import hashlib
-import os
-import secrets
 from pathlib import Path
-from fastapi import APIRouter, Request, Form, HTTPException
-from fastapi.responses import HTMLResponse, Response, RedirectResponse
 router = APIRouter(tags=["ui"])
 UI_HTML_PATH = Path(__file__).parent / "ui.html"
-# Simple session storage (in-memory, resets on restart)
-_valid_sessions: set[str] = set()
-# Get password from environment variable
-APP_PASSWORD = os.environ.get("APP_PASSWORD", "")
-def _check_session(request: Request) -> bool:
-    """Check if request has valid session."""
-    if not APP_PASSWORD:
-        return True  # No password set, allow access
-    session_token = request.cookies.get("session")
-    return session_token in _valid_sessions
-LOGIN_HTML = """
-<!DOCTYPE html>
-<html lang="en">
-<head>
-<meta charset="UTF-8">
-<meta name="viewport" content="width=device-width, initial-scale=1.0">
-<title>Login - Content Engine</title>
-<style>
-*, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
-body {
-  font-family: 'Segoe UI', -apple-system, system-ui, sans-serif;
-  background: linear-gradient(135deg, #0a0a0f 0%, #1a1a2e 100%);
-  color: #eee;
-  min-height: 100vh;
-  display: flex;
-  align-items: center;
-  justify-content: center;
-}
-.login-box {
-  background: #1a1a28;
-  border: 1px solid #2a2a3a;
-  border-radius: 16px;
-  padding: 40px;
-  width: 100%;
-  max-width: 400px;
-  box-shadow: 0 20px 60px rgba(0,0,0,0.5);
-}
-h1 {
-  font-size: 24px;
-  margin-bottom: 8px;
-  background: linear-gradient(135deg, #7c3aed, #ec4899);
-  -webkit-background-clip: text;
-  -webkit-text-fill-color: transparent;
-}
-.subtitle { color: #888; font-size: 14px; margin-bottom: 30px; }
-label { display: block; font-size: 13px; color: #888; margin-bottom: 6px; }
-input[type="password"] {
-  width: 100%;
-  padding: 12px 16px;
-  border-radius: 8px;
-  border: 1px solid #2a2a3a;
-  background: #0a0a0f;
-  color: #eee;
-  font-size: 16px;
-  margin-bottom: 20px;
-}
-input[type="password"]:focus { outline: none; border-color: #7c3aed; }
-button {
-  width: 100%;
-  padding: 14px;
-  border-radius: 8px;
-  border: none;
-  background: linear-gradient(135deg, #7c3aed, #6d28d9);
-  color: white;
-  font-size: 16px;
-  font-weight: 600;
-  cursor: pointer;
-  transition: transform 0.1s, box-shadow 0.2s;
-}
-button:hover { transform: translateY(-1px); box-shadow: 0 4px 20px rgba(124, 58, 237, 0.4); }
-.error { color: #ef4444; font-size: 13px; margin-bottom: 16px; }
-</style>
-</head>
-<body>
-<div class="login-box">
-  <h1>Content Engine</h1>
-  <p class="subtitle">Enter password to access</p>
-  {{ERROR}}
-  <form method="POST" action="/login">
-    <label>Password</label>
-    <input type="password" name="password" placeholder="Enter password" autofocus required>
-    <button type="submit">Login</button>
-  </form>
-</div>
-</body>
-</html>
-"""
 @router.get("/", response_class=HTMLResponse)
-async def dashboard(request: Request):
     """Serve the main dashboard UI."""
-    if not _check_session(request):
-        return RedirectResponse(url="/login", status_code=302)
     content = UI_HTML_PATH.read_text(encoding="utf-8")
     return Response(
         content=content,
         media_type="text/html",
         headers={"Cache-Control": "no-cache, no-store, must-revalidate"},
     )
-@router.get("/login", response_class=HTMLResponse)
-async def login_page(request: Request, error: str = ""):
-    """Show login page."""
-    if not APP_PASSWORD:
-        return RedirectResponse(url="/", status_code=302)
-    if _check_session(request):
-        return RedirectResponse(url="/", status_code=302)
-    error_html = f'<p class="error">{error}</p>' if error else ""
-    html = LOGIN_HTML.replace("{{ERROR}}", error_html)
-    return Response(content=html, media_type="text/html")
-@router.post("/login")
-async def login_submit(password: str = Form(...)):
-    """Handle login form submission."""
-    if not APP_PASSWORD:
-        return RedirectResponse(url="/", status_code=302)
-    if password == APP_PASSWORD:
-        # Create session token
-        session_token = secrets.token_hex(32)
-        _valid_sessions.add(session_token)
-        response = RedirectResponse(url="/", status_code=302)
-        response.set_cookie(
-            key="session",
-            value=session_token,
-            httponly=True,
-            max_age=86400 * 7,  # 7 days
-            samesite="lax",
-        )
-        return response
-    else:
-        return RedirectResponse(url="/login?error=Invalid+password", status_code=302)
-@router.get("/logout")
-async def logout(request: Request):
-    """Log out and clear session."""
-    session_token = request.cookies.get("session")
-    if session_token in _valid_sessions:
-        _valid_sessions.discard(session_token)
-    response = RedirectResponse(url="/login", status_code=302)
-    response.delete_cookie("session")
-    return response

+"""Web UI route — serves the single-page dashboard."""
 from __future__ import annotations
 from pathlib import Path
+from fastapi import APIRouter
+from fastapi.responses import HTMLResponse, Response
 router = APIRouter(tags=["ui"])
 UI_HTML_PATH = Path(__file__).parent / "ui.html"
 @router.get("/", response_class=HTMLResponse)
+async def dashboard():
     """Serve the main dashboard UI."""
     content = UI_HTML_PATH.read_text(encoding="utf-8")
     return Response(
         content=content,
         media_type="text/html",
         headers={"Cache-Control": "no-cache, no-store, must-revalidate"},
     )

src/content_engine/api/routes_video.py CHANGED Viewed

@@ -10,17 +10,10 @@ import time
 import uuid
 from pathlib import Path
 from fastapi import APIRouter, File, Form, HTTPException, UploadFile
 from pydantic import BaseModel
-# Optional RunPod import
-try:
-    import runpod
-    RUNPOD_AVAILABLE = True
-except ImportError:
-    runpod = None
-    RUNPOD_AVAILABLE = False
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/video", tags=["video"])
@@ -47,6 +40,10 @@ def _get_pod_state():
     from content_engine.api.routes_pod import _pod_state
     return _pod_state
 class VideoGenerateRequest(BaseModel):
     prompt: str
@@ -93,9 +90,10 @@ async def generate_video(
     )
     try:
         async with httpx.AsyncClient(timeout=30) as client:
             # First upload the image to ComfyUI
-            upload_url = f"http://{pod_state['ip']}:{pod_state['port']}/upload/image"
             files = {"image": (f"input_{job_id}.png", image_bytes, "image/png")}
             upload_resp = await client.post(upload_url, files=files)
@@ -116,7 +114,7 @@ async def generate_video(
             )
             # Submit workflow
-            url = f"http://{pod_state['ip']}:{pod_state['port']}/prompt"
             resp = await client.post(url, json={"prompt": workflow})
             resp.raise_for_status()
@@ -364,6 +362,8 @@ async def _poll_wavespeed_video(poll_url: str, api_key: str, job_id: str, max_at
                 if status == "failed":
                     error_msg = data.get("error", "Unknown error")
                     logger.error("WaveSpeed video job failed: %s", error_msg)
                     return None
                 outputs = data.get("outputs", [])
@@ -430,6 +430,11 @@ async def _generate_cloud_video(
         if negative_prompt:
             payload["negative_prompt"] = negative_prompt
         _video_jobs[job_id]["message"] = f"Calling WaveSpeed API ({wavespeed_model})..."
         logger.info("Calling WaveSpeed video API: %s", endpoint)
@@ -527,14 +532,14 @@ async def _poll_video_job(job_id: str, prompt_id: str):
     """Poll ComfyUI for video job completion."""
     import httpx
-    pod_state = _get_pod_state()
     start = time.time()
-    timeout = 600  # 10 minutes for video
     async with httpx.AsyncClient(timeout=60) as client:
         while time.time() - start < timeout:
             try:
-                url = f"http://{pod_state['ip']}:{pod_state['port']}/history/{prompt_id}"
                 resp = await client.get(url)
                 if resp.status_code == 200:
@@ -573,7 +578,7 @@ async def _download_video(client, job_id: str, video_info: dict, pod_state: dict
     file_type = video_info.get("type", "output")
     # Download video
-    view_url = f"http://{pod_state['ip']}:{pod_state['port']}/view"
     params = {"filename": filename, "type": file_type}
     if subfolder:
         params["subfolder"] = subfolder
@@ -627,10 +632,339 @@ async def get_video_file(filename: str):
     if not video_path.exists():
         raise HTTPException(404, "Video not found")
-    media_type = "video/webm" if filename.endswith(".webm") else "image/webp"
     return FileResponse(video_path, media_type=media_type)
 def _build_wan_i2v_workflow(
     uploaded_filename: str = None,
     image_b64: str = None,

 import uuid
 from pathlib import Path
+import runpod
 from fastapi import APIRouter, File, Form, HTTPException, UploadFile
 from pydantic import BaseModel
 logger = logging.getLogger(__name__)
 router = APIRouter(prefix="/api/video", tags=["video"])
     from content_engine.api.routes_pod import _pod_state
     return _pod_state
+def _get_comfyui_url():
+    from content_engine.api.routes_pod import _get_comfyui_url as _gcurl
+    return _gcurl()
 class VideoGenerateRequest(BaseModel):
     prompt: str
     )
     try:
+        comfyui_url = _get_comfyui_url()
         async with httpx.AsyncClient(timeout=30) as client:
             # First upload the image to ComfyUI
+            upload_url = f"{comfyui_url}/upload/image"
             files = {"image": (f"input_{job_id}.png", image_bytes, "image/png")}
             upload_resp = await client.post(upload_url, files=files)
             )
             # Submit workflow
+            url = f"{comfyui_url}/prompt"
             resp = await client.post(url, json={"prompt": workflow})
             resp.raise_for_status()
                 if status == "failed":
                     error_msg = data.get("error", "Unknown error")
                     logger.error("WaveSpeed video job failed: %s", error_msg)
+                    _video_jobs[job_id]["status"] = "failed"
+                    _video_jobs[job_id]["error"] = error_msg
                     return None
                 outputs = data.get("outputs", [])
         if negative_prompt:
             payload["negative_prompt"] = negative_prompt
+        # Grok Imagine Video uses duration (6 or 10s) instead of frame counts
+        if model == "grok-imagine-i2v":
+            num_frames = _video_jobs[job_id].get("num_frames", 81)
+            payload["duration"] = 10 if num_frames > 150 else 6
         _video_jobs[job_id]["message"] = f"Calling WaveSpeed API ({wavespeed_model})..."
         logger.info("Calling WaveSpeed video API: %s", endpoint)
     """Poll ComfyUI for video job completion."""
     import httpx
     start = time.time()
+    timeout = 1800  # 30 minutes for video (WAN 2.2 needs time to load 14B model first run)
+    comfyui_url = _get_comfyui_url()
     async with httpx.AsyncClient(timeout=60) as client:
         while time.time() - start < timeout:
             try:
+                url = f"{comfyui_url}/history/{prompt_id}"
                 resp = await client.get(url)
                 if resp.status_code == 200:
     file_type = video_info.get("type", "output")
     # Download video
+    view_url = f"{_get_comfyui_url()}/view"
     params = {"filename": filename, "type": file_type}
     if subfolder:
         params["subfolder"] = subfolder
     if not video_path.exists():
         raise HTTPException(404, "Video not found")
+    if filename.endswith(".webm"):
+        media_type = "video/webm"
+    elif filename.endswith(".mp4"):
+        media_type = "video/mp4"
+    else:
+        media_type = "image/webp"
     return FileResponse(video_path, media_type=media_type)
+@router.post("/animate")
+async def generate_video_animate(
+    image: UploadFile = File(...),
+    driving_video: UploadFile = File(...),
+    prompt: str = Form("a person dancing, smooth motion, high quality"),
+    negative_prompt: str = Form(""),
+    width: int = Form(832),
+    height: int = Form(480),
+    num_frames: int = Form(81),
+    fps: int = Form(16),
+    seed: int = Form(-1),
+    steps: int = Form(20),
+    cfg: float = Form(6.0),
+    bg_mode: str = Form("keep"),  # keep | driving_video | auto
+):
+    """Generate a dance animation via WAN 2.2 Animate on RunPod ComfyUI pod.
+    Requires on the pod:
+      - models/diffusion_models/Wan2_2-Animate-14B_fp8_e4m3fn_scaled_KJ.safetensors
+      - models/vae/wan_2.1_vae.safetensors
+      - models/clip_vision/clip_vision_h.safetensors
+      - models/text_encoders/umt5-xxl-enc-bf16.safetensors
+      - Custom nodes: ComfyUI-WanVideoWrapper, ComfyUI-VideoHelperSuite, comfyui_controlnet_aux
+    """
+    import httpx
+    import random
+    pod_state = _get_pod_state()
+    if pod_state["status"] != "running":
+        raise HTTPException(400, "Pod not running — start it first in Status page")
+    job_id = str(uuid.uuid4())[:8]
+    seed = seed if seed >= 0 else random.randint(0, 2**32 - 1)
+    image_bytes = await image.read()
+    video_bytes = await driving_video.read()
+    try:
+        base_url = _get_comfyui_url()
+        async with httpx.AsyncClient(timeout=60) as client:
+            # Upload character reference image
+            img_resp = await client.post(
+                f"{base_url}/upload/image",
+                files={"image": (f"ref_{job_id}.png", image_bytes, "image/png")},
+            )
+            if img_resp.status_code != 200:
+                raise HTTPException(500, f"Failed to upload character image: {img_resp.text[:200]}")
+            img_filename = img_resp.json().get("name", f"ref_{job_id}.png")
+            logger.info("Uploaded character image: %s", img_filename)
+            # Upload driving video
+            vid_ext = "mp4"
+            if driving_video.filename and "." in driving_video.filename:
+                vid_ext = driving_video.filename.rsplit(".", 1)[-1].lower()
+            vid_resp = await client.post(
+                f"{base_url}/upload/image",
+                files={"image": (f"drive_{job_id}.{vid_ext}", video_bytes, "video/mp4")},
+            )
+            if vid_resp.status_code != 200:
+                raise HTTPException(500, f"Failed to upload driving video: {vid_resp.text[:200]}")
+            vid_filename = vid_resp.json().get("name", f"drive_{job_id}.{vid_ext}")
+            logger.info("Uploaded driving video: %s", vid_filename)
+            workflow = _build_wan_animate_workflow(
+                ref_image_filename=img_filename,
+                driving_video_filename=vid_filename,
+                prompt=prompt,
+                negative_prompt=negative_prompt,
+                width=width,
+                height=height,
+                num_frames=num_frames,
+                fps=fps,
+                seed=seed,
+                steps=steps,
+                cfg=cfg,
+                bg_mode=bg_mode,
+            )
+            resp = await client.post(f"{base_url}/prompt", json={"prompt": workflow})
+            if resp.status_code != 200:
+                logger.error("ComfyUI /prompt rejected workflow: %s", resp.text[:2000])
+            resp.raise_for_status()
+            prompt_id = resp.json()["prompt_id"]
+            _video_jobs[job_id] = {
+                "prompt_id": prompt_id,
+                "status": "running",
+                "seed": seed,
+                "started_at": time.time(),
+                "num_frames": num_frames,
+                "fps": fps,
+                "mode": "animate",
+                "message": "WAN 2.2 Animate submitted...",
+            }
+            logger.info("WAN Animate job started: %s -> %s", job_id, prompt_id)
+            asyncio.create_task(_poll_video_job(job_id, prompt_id))
+            return {
+                "job_id": job_id,
+                "status": "running",
+                "seed": seed,
+                "estimated_time": f"~{num_frames * 3} seconds",
+            }
+    except httpx.HTTPError as e:
+        logger.error("WAN Animate generation failed: %s", e)
+        raise HTTPException(500, f"Generation failed: {e}")
+def _build_wan_animate_workflow(
+    ref_image_filename: str,
+    driving_video_filename: str,
+    prompt: str = "a person dancing, smooth motion",
+    negative_prompt: str = "",
+    width: int = 832,
+    height: int = 480,
+    num_frames: int = 81,
+    fps: int = 16,
+    seed: int = 42,
+    steps: int = 20,
+    cfg: float = 6.0,
+    bg_mode: str = "auto",
+) -> dict:
+    """Build ComfyUI API workflow for WAN 2.2 Animate (motion transfer from driving video).
+    Pipeline:
+      reference image -> CLIP encode + resize
+      driving video   -> DWPreprocessor (pose skeleton)
+      both            -> WanVideoAnimateEmbeds -> WanVideoSampler -> decode -> MP4
+    bg_mode options:
+      "keep"          - use reference image as background (character's original background)
+      "driving_video" - use driving video frames as background
+      "auto"          - no bg hint, model generates its own background
+    """
+    neg = negative_prompt or "blurry, static, low quality, watermark, text"
+    workflow = {
+        # VAE
+        "1": {
+            "class_type": "WanVideoVAELoader",
+            "inputs": {
+                "model_name": "wan_2.1_vae.safetensors",
+                "precision": "bf16",
+            },
+        },
+        # CLIP Vision
+        "2": {
+            "class_type": "CLIPVisionLoader",
+            "inputs": {"clip_name": "clip_vision_h.safetensors"},
+        },
+        # Diffusion model
+        "3": {
+            "class_type": "WanVideoModelLoader",
+            "inputs": {
+                "model": "wan2.2_animate_14B_bf16.safetensors",
+                "base_precision": "bf16",
+                "quantization": "fp8_e4m3fn",
+                "load_device": "offload_device",
+                "attention_mode": "sdpa",
+            },
+        },
+        # Load T5 text encoder
+        "4": {
+            "class_type": "LoadWanVideoT5TextEncoder",
+            "inputs": {
+                "model_name": "umt5-xxl-enc-fp8_e4m3fn.safetensors",
+                "precision": "bf16",
+            },
+        },
+        # Encode text prompts
+        "16": {
+            "class_type": "WanVideoTextEncode",
+            "inputs": {
+                "positive_prompt": prompt,
+                "negative_prompt": neg,
+                "t5": ["4", 0],
+                "force_offload": True,
+            },
+        },
+        # Load reference character image
+        "5": {
+            "class_type": "LoadImage",
+            "inputs": {"image": ref_image_filename},
+        },
+        # Resize to target resolution
+        "6": {
+            "class_type": "ImageResizeKJv2",
+            "inputs": {
+                "image": ["5", 0],
+                "width": width,
+                "height": height,
+                "upscale_method": "lanczos",
+                "keep_proportion": "pad_edge_pixel",
+                "pad_color": "0, 0, 0",
+                "crop_position": "top",
+                "divisible_by": 16,
+            },
+        },
+        # CLIP Vision encode reference
+        "7": {
+            "class_type": "WanVideoClipVisionEncode",
+            "inputs": {
+                "clip_vision": ["2", 0],
+                "image_1": ["6", 0],
+                "strength_1": 1.0,
+                "strength_2": 1.0,
+                "crop": "center",
+                "combine_embeds": "average",
+                "force_offload": True,
+            },
+        },
+        # Load driving video (dance moves)
+        "8": {
+            "class_type": "VHS_LoadVideo",
+            "inputs": {
+                "video": driving_video_filename,
+                "force_rate": fps,
+                "custom_width": 0,
+                "custom_height": 0,
+                "frame_load_cap": num_frames if num_frames > 0 else 0,
+                "skip_first_frames": 0,
+                "select_every_nth": 1,
+            },
+        },
+        # Extract pose skeleton from driving video
+        "9": {
+            "class_type": "DWPreprocessor",
+            "inputs": {
+                "image": ["8", 0],
+                "detect_hand": "disable",
+                "detect_body": "enable",
+                "detect_face": "disable",
+                "resolution": max(width, height),
+                "bbox_detector": "yolox_l.torchscript.pt",
+                "pose_estimator": "dw-ll_ucoco_384_bs5.torchscript.pt",
+                "scale_stick_for_xinsr_cn": "disable",
+            },
+        },
+        # Animate embeddings: combine ref image + pose + optional background
+        "10": {
+            "class_type": "WanVideoAnimateEmbeds",
+            "inputs": {
+                "vae": ["1", 0],
+                "clip_embeds": ["7", 0],
+                "ref_images": ["6", 0],
+                "pose_images": ["9", 0],
+                # bg_mode: "keep" = ref image bg, "driving_video" = video frames bg, "auto" = model decides
+                **({} if bg_mode == "auto" else {
+                    "bg_images": ["6", 0] if bg_mode == "keep" else ["8", 0],
+                }),
+                "width": width,
+                "height": height,
+                # When num_frames==0 ("Match video"), link to GetImageSizeAndCount output slot 3
+                "num_frames": ["15", 3] if num_frames == 0 else num_frames,
+                "force_offload": True,
+                "frame_window_size": 77,
+                "colormatch": "disabled",
+                "pose_strength": 1.0,
+                "face_strength": 1.0,
+            },
+        },
+        # Diffusion sampler (no context_options — WanAnim handles looping internally)
+        "12": {
+            "class_type": "WanVideoSampler",
+            "inputs": {
+                "model": ["3", 0],
+                "image_embeds": ["10", 0],
+                "text_embeds": ["16", 0],
+                "steps": steps,
+                "cfg": cfg,
+                "shift": 5.0,
+                "seed": seed,
+                "force_offload": True,
+                "scheduler": "dpm++_sde",
+                "riflex_freq_index": 0,
+                "denoise_strength": 1.0,
+            },
+        },
+        # Decode latents to frames
+        "13": {
+            "class_type": "WanVideoDecode",
+            "inputs": {
+                "vae": ["1", 0],
+                "samples": ["12", 0],
+                "enable_vae_tiling": True,
+                "tile_x": 272,
+                "tile_y": 272,
+                "tile_stride_x": 144,
+                "tile_stride_y": 128,
+            },
+        },
+        # Combine frames into MP4
+        "14": {
+            "class_type": "VHS_VideoCombine",
+            "inputs": {
+                "images": ["13", 0],
+                "frame_rate": fps,
+                "loop_count": 0,
+                "filename_prefix": "WanAnimate",
+                "format": "video/h264-mp4",
+                "pix_fmt": "yuv420p",
+                "crf": 19,
+                "save_metadata": True,
+                "trim_to_audio": False,
+                "pingpong": False,
+                "save_output": True,
+            },
+        },
+    }
+    # "Match video" mode (num_frames=0): detect actual frame count from posed video
+    # GetImageSizeAndCount outputs: (IMAGE, width, height, count) — slot 3 = frame count
+    if num_frames == 0:
+        workflow["15"] = {
+            "class_type": "GetImageSizeAndCount",
+            "inputs": {"image": ["9", 0]},
+        }
+    return workflow
 def _build_wan_i2v_workflow(
     uploaded_filename: str = None,
     image_b64: str = None,

src/content_engine/api/ui.html CHANGED Viewed

@@ -898,7 +898,7 @@ select { cursor: pointer; }
           <div id="cloud-model-select" style="display:none">
             <label>Model</label>
-            <select id="gen-cloud-model">
               <optgroup label="Recommended">
                 <option value="seedream-4.5" selected>SeeDream v4.5 (Best)</option>
                 <option value="gpt-image-1.5">GPT Image 1.5</option>
@@ -909,9 +909,14 @@ select { cursor: pointer; }
                 <option value="seedream-3.1">SeeDream v3.1</option>
               </optgroup>
               <optgroup label="Fast">
                 <option value="gpt-image-1-mini">GPT Image Mini</option>
                 <option value="nano-banana">NanoBanana</option>
               </optgroup>
               <optgroup label="Other">
                 <option value="kling-image-o3">Kling Image O3</option>
                 <option value="wan-2.6">WAN 2.6</option>
@@ -922,6 +927,19 @@ select { cursor: pointer; }
             </select>
           </div>
           <div id="cloud-edit-model-select" style="display:none">
             <label>Model</label>
             <select id="gen-cloud-edit-model">
@@ -933,6 +951,7 @@ select { cursor: pointer; }
               <optgroup label="Multi-Reference (2+ images)">
                 <option value="seedream-4.5-multi">SeeDream v4.5 Sequential (up to 3)</option>
                 <option value="seedream-4-multi">SeeDream v4 Sequential (up to 3)</option>
                 <option value="kling-o1-multi">Kling O1 (up to 10 refs)</option>
                 <option value="qwen-multi-angle">Qwen Multi-Angle</option>
               </optgroup>
@@ -948,7 +967,6 @@ select { cursor: pointer; }
                 <option value="wan-2.5-edit">WAN 2.5 Edit</option>
                 <option value="wan-2.2-edit">WAN 2.2 Edit</option>
                 <option value="qwen-edit-lora">Qwen Edit + LoRA</option>
-                <option value="nano-banana-pro-edit">NanoBanana Pro Edit</option>
                 <option value="kling-o3-edit">Kling O3 Edit</option>
                 <option value="dreamina-3-edit">Dreamina v3 Edit</option>
               </optgroup>
@@ -964,16 +982,23 @@ select { cursor: pointer; }
               <span id="pod-status-indicator">Checking pod status...</span>
             </div>
             <label>Base Model</label>
-            <select id="pod-model-select">
               <option value="flux">FLUX.2 Dev (Realistic)</option>
-              <option value="wan-t2i">WAN 2.2 (Stylized/Anime)</option>
             </select>
-            <label style="margin-top:8px">Your LoRA</label>
             <select id="pod-lora-select">
               <option value="">None (Base model only)</option>
             </select>
-            <label style="margin-top:8px">LoRA Strength</label>
             <input type="number" id="pod-lora-strength" value="0.85" min="0" max="1.5" step="0.05" style="width:80px">
             <div style="font-size:11px;color:var(--text-secondary);margin-top:4px">
               Start the pod in Status page first.
             </div>
@@ -981,55 +1006,121 @@ select { cursor: pointer; }
           <!-- Image to Video settings -->
           <div id="img2video-section" style="display:none">
-            <div class="section-title">Source Image</div>
-            <div class="drop-zone" id="video-drop-zone" onclick="document.getElementById('video-file-input').click()">
-              <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
-              <div>Drop or click to upload</div>
             </div>
-            <input type="file" id="video-file-input" accept="image/*" style="display:none" onchange="handleVideoImage(this)">
-            <div id="video-preview" style="display:none; margin-top:6px">
-              <img id="video-preview-img" style="max-width:100%; max-height:100px; border-radius:6px">
             </div>
-            <label style="margin-top:12px">Video Model</label>
-            <select id="video-cloud-model">
-              <optgroup label="Recommended">
-                <option value="wan-2.6-i2v-pro" selected>WAN 2.6 Pro ($0.05/s)</option>
-                <option value="wan-2.6-i2v-flash">WAN 2.6 Flash (Fast)</option>
-                <option value="kling-o3-pro">Kling O3 Pro</option>
-              </optgroup>
-              <optgroup label="Premium (Higgsfield - requires API key)">
-                <option value="kling-3.0-pro">Kling 3.0 Pro (15s + Audio)</option>
-                <option value="kling-3.0">Kling 3.0</option>
-                <option value="sora-2-hf">Sora 2</option>
-                <option value="veo-3.1-hf">Veo 3.1</option>
-              </optgroup>
-              <optgroup label="Budget Friendly">
-                <option value="wan-2.2-i2v-720p">WAN 2.2 720p ($0.01/s)</option>
-                <option value="wan-2.2-i2v-1080p">WAN 2.2 1080p</option>
-                <option value="wan-2.5-i2v">WAN 2.5</option>
-              </optgroup>
-              <optgroup label="Cinematic">
-                <option value="higgsfield-dop">Higgsfield DoP (5s)</option>
-                <option value="seedance-1.5-pro">Seedance Pro</option>
-                <option value="dreamina-i2v-1080p">Dreamina 1080p</option>
-              </optgroup>
-              <optgroup label="Other">
-                <option value="kling-o3">Kling O3</option>
-                <option value="veo-3.1">Veo 3.1 (WaveSpeed)</option>
-                <option value="sora-2">Sora 2 (WaveSpeed)</option>
-                <option value="vidu-q3">Vidu Q3</option>
-              </optgroup>
-            </select>
-            <label>Duration</label>
-            <select id="video-duration">
-              <option value="41">2s</option>
-              <option value="81" selected>3s</option>
-              <option value="121">5s</option>
-              <option value="241">10s</option>
-              <option value="361">15s</option>
-            </select>
           </div>
           <!-- Reference image upload for img2img -->
@@ -1284,6 +1375,10 @@ select { cursor: pointer; }
                 </optgroup>
               </select>
             </div>
           </div>
           <div id="runpod-not-configured" style="display:none;margin-top:8px;padding:12px;background:rgba(239,68,68,0.08);border:1px solid var(--red);border-radius:8px;font-size:12px;color:var(--text-secondary)">
             <div style="font-weight:600;color:var(--red);margin-bottom:4px">RunPod Not Configured</div>
@@ -1303,7 +1398,7 @@ select { cursor: pointer; }
             <div>Drop images here or click to browse</div>
             <div style="font-size:11px;margin-top:4px">Upload 20-50 images of the subject (min 5)</div>
           </div>
-          <input type="file" id="train-file-input" accept="image/*" multiple style="display:none" onchange="handleTrainImages(this)">
           <div id="train-image-count" style="font-size:12px;color:var(--text-secondary);margin-top:6px"></div>
           <!-- Caption editor: shown after images are uploaded -->
@@ -1422,9 +1517,12 @@ select { cursor: pointer; }
           </div>
           <div id="pod-controls" style="display:flex; gap:8px; align-items:center; flex-wrap:wrap">
             <select id="pod-model-type" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
               <option value="flux2">FLUX.2 Dev (Realistic txt2img)</option>
               <option value="flux1">FLUX.1 Dev (txt2img)</option>
-              <option value="wan22">WAN 2.2 (img2video)</option>
             </select>
             <select id="pod-gpu-select" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
               <optgroup label="48GB+ (FLUX.2 / Large models)">
@@ -1583,6 +1681,9 @@ let currentPage = 'generate';
 let selectedRating = 'sfw';
 let selectedBackend = 'pod';
 let selectedVideoBackend = 'cloud';
 let selectedMode = 'txt2img';
 let templatesData = [];
 let charactersData = [];
@@ -1832,6 +1933,43 @@ function clearPoseImage() {
   document.getElementById('pose-file-input').value = '';
 }
 function handleVideoImage(input) {
   if (input.files[0]) {
     videoImageFile = input.files[0];
@@ -1866,15 +2004,50 @@ function clearVideoImage() {
 }
 function handleTrainImages(input) {
-  trainImageFiles = Array.from(input.files);
-  updateTrainCount();
-  buildCaptionEditor();
 }
 function handleTrainDrop(files) {
-  trainImageFiles = Array.from(files).filter(f => f.type.startsWith('image/'));
-  updateTrainCount();
-  buildCaptionEditor();
 }
 function updateTrainCount() {
@@ -2144,6 +2317,12 @@ function selectBackend(chip, backend) {
   updateCloudModelVisibility();
 }
 function updateDimensions() {
   const aspect = document.getElementById('gen-aspect').value;
   const dimensions = {
@@ -2194,13 +2373,21 @@ function updateCloudModelVisibility() {
   document.getElementById('cloud-model-select').style.display = (isCloud && !isImg2img) ? '' : 'none';
   // Show edit cloud models when cloud + img2img
   document.getElementById('cloud-edit-model-select').style.display = (isCloud && isImg2img) ? '' : 'none';
   // Show pod settings when pod backend selected (not in video mode)
   document.getElementById('pod-settings-section').style.display = isPod ? '' : 'none';
   if (isPod) {
     loadPodLorasForGeneration();
-    // Set FLUX.2 defaults (low CFG, 28 steps)
-    document.getElementById('gen-cfg').value = '2';
-    document.getElementById('gen-steps').value = '28';
     // Auto-open Advanced section so CFG/steps are visible
     const adv = document.querySelector('#local-settings-section details');
     if (adv) adv.open = true;
@@ -2241,13 +2428,14 @@ async function loadPodLorasForGeneration() {
     const loraRes = await fetch(API + '/api/pod/loras');
     const loraData = await loraRes.json();
-    loraSelect.innerHTML = '<option value="">None - Base FLUX model</option>';
     if (loraData.loras && loraData.loras.length > 0) {
       loraData.loras.forEach(lora => {
-        const opt = document.createElement('option');
-        opt.value = lora;
-        opt.text = lora.replace('.safetensors', '');
-        loraSelect.appendChild(opt);
       });
     }
@@ -2280,6 +2468,31 @@ async function doGenerate() {
   try {
     // img2video mode — video generation
     if (selectedMode === 'img2video') {
       if (!videoImageFile) {
         throw new Error('Please upload an image to animate');
       }
@@ -2288,7 +2501,7 @@ async function doGenerate() {
       formData.append('prompt', document.getElementById('gen-positive').value || 'smooth motion, high quality video');
       formData.append('negative_prompt', document.getElementById('gen-negative').value || 'blurry, low quality, static');
       formData.append('num_frames', document.getElementById('video-duration').value || '81');
-      formData.append('fps', document.getElementById('video-fps').value || '24');
       formData.append('seed', document.getElementById('gen-seed').value || '-1');
       formData.append('backend', selectedVideoBackend);
@@ -2388,6 +2601,8 @@ async function doGenerate() {
         height: parseInt(document.getElementById('gen-height').value) || 1024,
         lora_name: document.getElementById('pod-lora-select')?.value || null,
         lora_strength: parseFloat(document.getElementById('pod-lora-strength')?.value) || 0.85,
         character_id: document.getElementById('gen-character').value || null,
         template_id: document.getElementById('gen-template').value || null,
       };
@@ -2406,6 +2621,8 @@ async function doGenerate() {
       return;
     }
     const body = {
       character_id: document.getElementById('gen-character').value || null,
       template_id: document.getElementById('gen-template').value || null,
@@ -2419,6 +2636,7 @@ async function doGenerate() {
       width: parseInt(document.getElementById('gen-width').value) || 832,
       height: parseInt(document.getElementById('gen-height').value) || 1216,
       variables: variables,
     };
     const endpoint = selectedBackend === 'cloud' ? '/api/generate/cloud' : '/api/generate';
@@ -2536,7 +2754,7 @@ async function pollForVideo(jobId) {
   const preview = document.getElementById('preview-body');
   const startTime = Date.now();
-  for (let i = 0; i < 120; i++) {  // Up to 6 minutes
     await new Promise(r => setTimeout(r, 3000));
     try {
@@ -2573,19 +2791,28 @@ function showPreviewVideo(job) {
   const preview = document.getElementById('preview-body');
   preview.innerHTML = `
     <div style="text-align:center;width:100%">
-      <video src="/api/video/${job.filename}" autoplay loop muted playsinline
              style="max-width:100%;max-height:70vh;border-radius:8px;margin-bottom:12px"></video>
-      <div style="display:flex;gap:8px;justify-content:center;flex-wrap:wrap">
         <span class="tag" style="background:var(--accent);color:white">Video</span>
         <span class="tag" style="background:var(--bg-hover)">${job.num_frames} frames</span>
         <span class="tag" style="background:var(--bg-hover)">${job.fps} fps</span>
       </div>
-      <p style="color:var(--text-secondary);margin-top:8px;font-size:12px">Seed: ${job.seed || 'N/A'}</p>
       <a href="/api/video/${job.filename}" download class="btn btn-secondary" style="margin-top:12px">Download Video</a>
     </div>
   `;
 }
 // --- Batch ---
 async function doBatch() {
   const btn = document.getElementById('batch-btn');
@@ -2933,6 +3160,69 @@ function updateModelDefaults() {
       break;
     }
   }
 }
 function selectTrainBackend(chip, backend) {

           <div id="cloud-model-select" style="display:none">
             <label>Model</label>
+            <select id="gen-cloud-model" onchange="updateCloudLoraVisibility()">
               <optgroup label="Recommended">
                 <option value="seedream-4.5" selected>SeeDream v4.5 (Best)</option>
                 <option value="gpt-image-1.5">GPT Image 1.5</option>
                 <option value="seedream-3.1">SeeDream v3.1</option>
               </optgroup>
               <optgroup label="Fast">
+                <option value="z-image-turbo">Z-Image Turbo (Fastest)</option>
+                <option value="z-image-turbo-lora">Z-Image Turbo + LoRA</option>
                 <option value="gpt-image-1-mini">GPT Image Mini</option>
                 <option value="nano-banana">NanoBanana</option>
               </optgroup>
+              <optgroup label="LoRA Support">
+                <option value="z-image-base-lora">Z-Image Base + LoRA ($0.012)</option>
+              </optgroup>
               <optgroup label="Other">
                 <option value="kling-image-o3">Kling Image O3</option>
                 <option value="wan-2.6">WAN 2.6</option>
             </select>
           </div>
+          <div id="cloud-lora-input" style="display:none">
+            <label>LoRA Path <span style="color:var(--text-secondary);font-weight:400">(HuggingFace repo or URL)</span></label>
+            <input type="text" id="cloud-lora-path" placeholder="e.g. username/my-character-lora"
+                   style="width:100%;padding:8px;border-radius:6px;border:1px solid var(--border);background:var(--bg-primary);color:var(--text-primary);font-size:13px;box-sizing:border-box">
+            <div style="display:flex;align-items:center;gap:8px;margin-top:6px">
+              <label style="margin:0;flex-shrink:0">Strength</label>
+              <input type="range" id="cloud-lora-strength" min="0" max="2" step="0.05" value="1"
+                     oninput="this.nextElementSibling.textContent=this.value"
+                     style="flex:1">
+              <span style="font-size:12px;min-width:28px">1</span>
+            </div>
+          </div>
           <div id="cloud-edit-model-select" style="display:none">
             <label>Model</label>
             <select id="gen-cloud-edit-model">
               <optgroup label="Multi-Reference (2+ images)">
                 <option value="seedream-4.5-multi">SeeDream v4.5 Sequential (up to 3)</option>
                 <option value="seedream-4-multi">SeeDream v4 Sequential (up to 3)</option>
+                <option value="nano-banana-pro-multi">NanoBanana Pro (2 refs)</option>
                 <option value="kling-o1-multi">Kling O1 (up to 10 refs)</option>
                 <option value="qwen-multi-angle">Qwen Multi-Angle</option>
               </optgroup>
                 <option value="wan-2.5-edit">WAN 2.5 Edit</option>
                 <option value="wan-2.2-edit">WAN 2.2 Edit</option>
                 <option value="qwen-edit-lora">Qwen Edit + LoRA</option>
                 <option value="kling-o3-edit">Kling O3 Edit</option>
                 <option value="dreamina-3-edit">Dreamina v3 Edit</option>
               </optgroup>
               <span id="pod-status-indicator">Checking pod status...</span>
             </div>
             <label>Base Model</label>
+            <select id="pod-model-select" onchange="updateVisibility()">
+              <option value="z_image">Z-Image Turbo (+ LoRA)</option>
               <option value="flux">FLUX.2 Dev (Realistic)</option>
+              <option value="wan22">WAN 2.2 T2V (txt2img + LoRA)</option>
             </select>
+            <label style="margin-top:8px">LoRA 1 <span style="color:var(--text-secondary);font-weight:400">(body)</span></label>
             <select id="pod-lora-select">
               <option value="">None (Base model only)</option>
             </select>
+            <label style="margin-top:6px">Strength</label>
             <input type="number" id="pod-lora-strength" value="0.85" min="0" max="1.5" step="0.05" style="width:80px">
+            <label style="margin-top:8px">LoRA 2 <span style="color:var(--text-secondary);font-weight:400">(face)</span></label>
+            <select id="pod-lora-select-2">
+              <option value="">None</option>
+            </select>
+            <label style="margin-top:6px">Strength</label>
+            <input type="number" id="pod-lora-strength-2" value="0.85" min="0" max="1.5" step="0.05" style="width:80px">
             <div style="font-size:11px;color:var(--text-secondary);margin-top:4px">
               Start the pod in Status page first.
             </div>
           <!-- Image to Video settings -->
           <div id="img2video-section" style="display:none">
+            <!-- Sub-mode: Image to Video vs Animate -->
+            <div class="chips" id="video-submode-chips" style="margin-bottom:10px">
+              <div class="chip selected" onclick="selectVideoSubMode(this,'i2v')">Image to Video</div>
+              <div class="chip" onclick="selectVideoSubMode(this,'animate')">Animate (Dance)</div>
             </div>
+            <!-- Standard Image-to-Video -->
+            <div id="i2v-sub-section">
+              <div class="section-title">Source Image</div>
+              <div class="drop-zone" id="video-drop-zone" onclick="document.getElementById('video-file-input').click()">
+                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
+                <div>Drop or click to upload</div>
+              </div>
+              <input type="file" id="video-file-input" accept="image/*" style="display:none" onchange="handleVideoImage(this)">
+              <div id="video-preview" style="display:none; margin-top:6px">
+                <img id="video-preview-img" style="max-width:100%; max-height:100px; border-radius:6px">
+              </div>
+              <label style="margin-top:12px">Video Model</label>
+              <select id="video-cloud-model">
+                <optgroup label="Recommended">
+                  <option value="wan-2.6-i2v-pro" selected>WAN 2.6 Pro ($0.05/s)</option>
+                  <option value="wan-2.6-i2v-flash">WAN 2.6 Flash (Fast)</option>
+                  <option value="kling-o3-pro">Kling O3 Pro</option>
+                </optgroup>
+                <optgroup label="Premium (Higgsfield - requires API key)">
+                  <option value="kling-3.0-pro">Kling 3.0 Pro (15s + Audio)</option>
+                  <option value="kling-3.0">Kling 3.0</option>
+                  <option value="sora-2-hf">Sora 2</option>
+                  <option value="veo-3.1-hf">Veo 3.1</option>
+                </optgroup>
+                <optgroup label="Budget Friendly">
+                  <option value="wan-2.2-i2v-720p">WAN 2.2 720p ($0.01/s)</option>
+                  <option value="wan-2.2-i2v-1080p">WAN 2.2 1080p</option>
+                  <option value="wan-2.5-i2v">WAN 2.5</option>
+                </optgroup>
+                <optgroup label="Cinematic">
+                  <option value="higgsfield-dop">Higgsfield DoP (5s)</option>
+                  <option value="seedance-1.5-pro">Seedance Pro</option>
+                  <option value="dreamina-i2v-1080p">Dreamina 1080p</option>
+                </optgroup>
+                <optgroup label="Other">
+                  <option value="kling-o3">Kling O3</option>
+                  <option value="grok-imagine-i2v">Grok Imagine Video (xAI)</option>
+                  <option value="veo-3.1">Veo 3.1 (WaveSpeed)</option>
+                  <option value="sora-2">Sora 2 (WaveSpeed)</option>
+                  <option value="vidu-q3">Vidu Q3</option>
+                </optgroup>
+              </select>
+              <label>Duration</label>
+              <select id="video-duration">
+                <option value="41">2s</option>
+                <option value="81" selected>3s</option>
+                <option value="121">5s</option>
+                <option value="241">10s</option>
+                <option value="361">15s</option>
+              </select>
             </div>
+            <!-- Animate (Dance) sub-section -->
+            <div id="animate-sub-section" style="display:none">
+              <div class="section-title">Character Image</div>
+              <div class="drop-zone" id="animate-char-zone" onclick="document.getElementById('animate-char-input').click()">
+                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
+                <div>Character photo</div>
+              </div>
+              <input type="file" id="animate-char-input" accept="image/*" style="display:none" onchange="handleAnimateChar(this)">
+              <div class="section-title" style="margin-top:10px">Driving Video</div>
+              <div class="drop-zone" id="animate-video-zone" onclick="document.getElementById('animate-video-input').click()">
+                <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><rect x="2" y="2" width="20" height="20" rx="2"/><polygon points="10,8 16,12 10,16"/></svg>
+                <div>Dance video (mp4)</div>
+              </div>
+              <input type="file" id="animate-video-input" accept="video/*" style="display:none" onchange="handleAnimateVideo(this)">
+              <label style="margin-top:10px">Resolution</label>
+              <select id="animate-resolution">
+                <option value="480x832">480×832 (portrait)</option>
+                <option value="720x1280" selected>720×1280 (HD portrait)</option>
+                <option value="1080x1920">1080×1920 (TikTok full HD ⚡ high VRAM)</option>
+                <option value="832x480">832×480 (landscape)</option>
+                <option value="1280x720">1280×720 (HD landscape)</option>
+                <option value="512x512">512×512 (square)</option>
+              </select>
+              <label>Background</label>
+              <select id="animate-bg-mode">
+                <option value="auto" selected>Auto (model decides)</option>
+                <option value="driving_video">From driving video</option>
+                <option value="keep">Keep (character image bg)</option>
+              </select>
+              <label>Frames</label>
+              <select id="animate-frames">
+                <option value="0">Match video (auto)</option>
+                <option value="25">25 (~1.5s)</option>
+                <option value="49">49 (~3s)</option>
+                <option value="81" selected>81 (~5s)</option>
+                <option value="121">121 (~7.5s)</option>
+                <option value="161">161 (~10s)</option>
+                <option value="201">201 (~12.5s)</option>
+                <option value="241">241 (~15s)</option>
+                <option value="289">289 (~18s)</option>
+                <option value="321">321 (~20s)</option>
+                <option value="385">385 (~24s)</option>
+                <option value="481">481 (~30s)</option>
+              </select>
+              <div style="font-size:11px;color:var(--text-secondary);margin-top:6px">
+                Runs on RunPod pod via WAN 2.2 Animate. Pod must be running with models installed.
+              </div>
+            </div>
           </div>
           <!-- Reference image upload for img2img -->
                 </optgroup>
               </select>
             </div>
+            <div style="margin-top:8px;padding-top:8px;border-top:1px solid rgba(59,130,246,0.2)">
+              <button class="btn btn-secondary btn-small" onclick="preDownloadModels()" id="btn-predownload">Pre-download models to volume</button>
+              <span id="predownload-status" style="font-size:11px;margin-left:8px;color:var(--text-secondary)"></span>
+            </div>
           </div>
           <div id="runpod-not-configured" style="display:none;margin-top:8px;padding:12px;background:rgba(239,68,68,0.08);border:1px solid var(--red);border-radius:8px;font-size:12px;color:var(--text-secondary)">
             <div style="font-weight:600;color:var(--red);margin-bottom:4px">RunPod Not Configured</div>
             <div>Drop images here or click to browse</div>
             <div style="font-size:11px;margin-top:4px">Upload 20-50 images of the subject (min 5)</div>
           </div>
+          <input type="file" id="train-file-input" accept="image/*,.txt" multiple style="display:none" onchange="handleTrainImages(this)">
           <div id="train-image-count" style="font-size:12px;color:var(--text-secondary);margin-top:6px"></div>
           <!-- Caption editor: shown after images are uploaded -->
           </div>
           <div id="pod-controls" style="display:flex; gap:8px; align-items:center; flex-wrap:wrap">
             <select id="pod-model-type" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
+              <option value="z_image">Z-Image Turbo (txt2img + LoRA)</option>
               <option value="flux2">FLUX.2 Dev (Realistic txt2img)</option>
               <option value="flux1">FLUX.1 Dev (txt2img)</option>
+              <option value="wan22">WAN 2.2 T2V (txt2img + LoRA)</option>
+              <option value="wan22_i2v">WAN 2.2 I2V (img2video)</option>
+              <option value="wan22_animate">WAN 2.2 Animate (Dance/Motion transfer)</option>
             </select>
             <select id="pod-gpu-select" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
               <optgroup label="48GB+ (FLUX.2 / Large models)">
 let selectedRating = 'sfw';
 let selectedBackend = 'pod';
 let selectedVideoBackend = 'cloud';
+let videoSubMode = 'i2v';
+let animateCharFile = null;
+let animateDrivingVideoFile = null;
 let selectedMode = 'txt2img';
 let templatesData = [];
 let charactersData = [];
   document.getElementById('pose-file-input').value = '';
 }
+function selectVideoSubMode(chip, mode) {
+  chip.parentElement.querySelectorAll('.chip').forEach(c => c.classList.remove('selected'));
+  chip.classList.add('selected');
+  videoSubMode = mode;
+  document.getElementById('i2v-sub-section').style.display = mode === 'i2v' ? '' : 'none';
+  document.getElementById('animate-sub-section').style.display = mode === 'animate' ? '' : 'none';
+}
+function handleAnimateChar(input) {
+  if (!input.files[0]) return;
+  animateCharFile = input.files[0];
+  const zone = document.getElementById('animate-char-zone');
+  zone.classList.add('has-file');
+  const reader = new FileReader();
+  reader.onload = e => {
+    zone.innerHTML = `
+      <img src="${e.target.result}" style="max-height:120px;border-radius:6px">
+      <div style="margin-top:4px;font-size:11px">${input.files[0].name}</div>
+      <button class="btn btn-secondary btn-small" onclick="event.stopPropagation();animateCharFile=null;this.closest('.drop-zone').classList.remove('has-file');this.closest('.drop-zone').innerHTML='<svg viewBox=\\'0 0 24 24\\' fill=\\'none\\' stroke=\\'currentColor\\' stroke-width=\\'1.5\\'><path d=\\'M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4\\'/><polyline points=\\'17 8 12 3 7 8\\'/><line x1=\\'12\\' y1=\\'3\\' x2=\\'12\\' y2=\\'15\\'/></svg><div>Character photo</div>'" style="margin-top:6px">Remove</button>
+    `;
+  };
+  reader.readAsDataURL(input.files[0]);
+}
+function handleAnimateVideo(input) {
+  if (!input.files[0]) return;
+  animateDrivingVideoFile = input.files[0];
+  const zone = document.getElementById('animate-video-zone');
+  zone.classList.add('has-file');
+  zone.innerHTML = `
+    <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><rect x="2" y="2" width="20" height="20" rx="2"/><polygon points="10,8 16,12 10,16"/></svg>
+    <div style="font-size:12px;margin-top:4px">${input.files[0].name}</div>
+    <div style="font-size:11px;color:var(--text-secondary)">${(input.files[0].size/1024/1024).toFixed(1)} MB</div>
+    <button class="btn btn-secondary btn-small" onclick="event.stopPropagation();animateDrivingVideoFile=null;this.closest('.drop-zone').classList.remove('has-file');this.closest('.drop-zone').innerHTML='<svg viewBox=\\'0 0 24 24\\' fill=\\'none\\' stroke=\\'currentColor\\' stroke-width=\\'1.5\\'><rect x=\\'2\\' y=\\'2\\' width=\\'20\\' height=\\'20\\' rx=\\'2\\'/><polygon points=\\'10,8 16,12 10,16\\'/></svg><div>Dance video (mp4)</div>'" style="margin-top:6px">Remove</button>
+  `;
+}
 function handleVideoImage(input) {
   if (input.files[0]) {
     videoImageFile = input.files[0];
 }
 function handleTrainImages(input) {
+  const allFiles = Array.from(input.files);
+  const imageFiles = allFiles.filter(f => f.type.startsWith('image/'));
+  const txtFiles = allFiles.filter(f => f.name.endsWith('.txt'));
+  trainImageFiles = imageFiles;
+  // Auto-load captions from .txt files matching image filenames (e.g. 1.txt -> 1.png)
+  if (txtFiles.length > 0) {
+    const pending = txtFiles.map(tf => tf.text().then(text => {
+      const baseName = tf.name.replace(/\.txt$/, '');
+      const matchImg = imageFiles.find(img => img.name.replace(/\.[^.]+$/, '') === baseName);
+      if (matchImg) trainCaptions[matchImg.name] = text.trim();
+    }));
+    Promise.all(pending).then(() => {
+      updateTrainCount();
+      buildCaptionEditor();
+      const loaded = Object.keys(trainCaptions).length;
+      if (loaded > 0) toast(`Loaded ${loaded} captions from .txt files`, 'success');
+    });
+  } else {
+    updateTrainCount();
+    buildCaptionEditor();
+  }
 }
 function handleTrainDrop(files) {
+  const allFiles = Array.from(files);
+  const imageFiles = allFiles.filter(f => f.type.startsWith('image/'));
+  const txtFiles = allFiles.filter(f => f.name.endsWith('.txt'));
+  trainImageFiles = imageFiles;
+  if (txtFiles.length > 0) {
+    const pending = txtFiles.map(tf => tf.text().then(text => {
+      const baseName = tf.name.replace(/\.txt$/, '');
+      const matchImg = imageFiles.find(img => img.name.replace(/\.[^.]+$/, '') === baseName);
+      if (matchImg) trainCaptions[matchImg.name] = text.trim();
+    }));
+    Promise.all(pending).then(() => {
+      updateTrainCount();
+      buildCaptionEditor();
+      const loaded = Object.keys(trainCaptions).length;
+      if (loaded > 0) toast(`Loaded ${loaded} captions from .txt files`, 'success');
+    });
+  } else {
+    updateTrainCount();
+    buildCaptionEditor();
+  }
 }
 function updateTrainCount() {
   updateCloudModelVisibility();
 }
+function updateCloudLoraVisibility() {
+  const model = document.getElementById('gen-cloud-model')?.value || '';
+  const loraInput = document.getElementById('cloud-lora-input');
+  if (loraInput) loraInput.style.display = model.includes('-lora') ? '' : 'none';
+}
 function updateDimensions() {
   const aspect = document.getElementById('gen-aspect').value;
   const dimensions = {
   document.getElementById('cloud-model-select').style.display = (isCloud && !isImg2img) ? '' : 'none';
   // Show edit cloud models when cloud + img2img
   document.getElementById('cloud-edit-model-select').style.display = (isCloud && isImg2img) ? '' : 'none';
+  // Show LoRA input for z-image lora models
+  updateCloudLoraVisibility();
   // Show pod settings when pod backend selected (not in video mode)
   document.getElementById('pod-settings-section').style.display = isPod ? '' : 'none';
   if (isPod) {
     loadPodLorasForGeneration();
+    // Set defaults based on pod model type
+    const podModel = document.getElementById('pod-model-select')?.value || '';
+    if (podModel.startsWith('wan22')) {
+      document.getElementById('gen-cfg').value = '1';
+      document.getElementById('gen-steps').value = '8';
+    } else {
+      document.getElementById('gen-cfg').value = '2';
+      document.getElementById('gen-steps').value = '28';
+    }
     // Auto-open Advanced section so CFG/steps are visible
     const adv = document.querySelector('#local-settings-section details');
     if (adv) adv.open = true;
     const loraRes = await fetch(API + '/api/pod/loras');
     const loraData = await loraRes.json();
+    const loraSelect2 = document.getElementById('pod-lora-select-2');
+    loraSelect.innerHTML = '<option value="">None - Base model</option>';
+    if (loraSelect2) loraSelect2.innerHTML = '<option value="">None</option>';
     if (loraData.loras && loraData.loras.length > 0) {
       loraData.loras.forEach(lora => {
+        const label = lora.replace('.safetensors', '');
+        loraSelect.appendChild(Object.assign(document.createElement('option'), { value: lora, text: label }));
+        if (loraSelect2) loraSelect2.appendChild(Object.assign(document.createElement('option'), { value: lora, text: label }));
       });
     }
   try {
     // img2video mode — video generation
     if (selectedMode === 'img2video') {
+      // Animate (Dance) sub-mode — WAN 2.2 Animate on RunPod
+      if (videoSubMode === 'animate') {
+        if (!animateCharFile) throw new Error('Please upload a character image');
+        if (!animateDrivingVideoFile) throw new Error('Please upload a driving dance video');
+        const resParts = document.getElementById('animate-resolution').value.split('x');
+        const formData = new FormData();
+        formData.append('image', animateCharFile);
+        formData.append('driving_video', animateDrivingVideoFile);
+        formData.append('prompt', document.getElementById('gen-positive').value || 'a person dancing, smooth motion, high quality');
+        formData.append('negative_prompt', document.getElementById('gen-negative').value || '');
+        formData.append('width', resParts[0] || '832');
+        formData.append('height', resParts[1] || '480');
+        formData.append('num_frames', document.getElementById('animate-frames').value || '81');
+        formData.append('bg_mode', document.getElementById('animate-bg-mode').value || 'keep');
+        formData.append('seed', document.getElementById('gen-seed').value || '-1');
+        const res = await fetch(API + '/api/video/animate', { method: 'POST', body: formData });
+        const data = await res.json();
+        if (!res.ok) throw new Error(data.detail || 'Animate generation failed');
+        toast('Animation generating on RunPod (WAN 2.2 Animate)...', 'info');
+        await pollForVideo(data.job_id);
+        return;
+      }
+      // Standard Image-to-Video
       if (!videoImageFile) {
         throw new Error('Please upload an image to animate');
       }
       formData.append('prompt', document.getElementById('gen-positive').value || 'smooth motion, high quality video');
       formData.append('negative_prompt', document.getElementById('gen-negative').value || 'blurry, low quality, static');
       formData.append('num_frames', document.getElementById('video-duration').value || '81');
+      formData.append('fps', document.getElementById('video-fps')?.value || '24');
       formData.append('seed', document.getElementById('gen-seed').value || '-1');
       formData.append('backend', selectedVideoBackend);
         height: parseInt(document.getElementById('gen-height').value) || 1024,
         lora_name: document.getElementById('pod-lora-select')?.value || null,
         lora_strength: parseFloat(document.getElementById('pod-lora-strength')?.value) || 0.85,
+        lora_name_2: document.getElementById('pod-lora-select-2')?.value || null,
+        lora_strength_2: parseFloat(document.getElementById('pod-lora-strength-2')?.value) || 0.85,
         character_id: document.getElementById('gen-character').value || null,
         template_id: document.getElementById('gen-template').value || null,
       };
       return;
     }
+    const cloudLoraPath = document.getElementById('cloud-lora-path')?.value?.trim();
+    const cloudLoraStrength = parseFloat(document.getElementById('cloud-lora-strength')?.value) || 1.0;
     const body = {
       character_id: document.getElementById('gen-character').value || null,
       template_id: document.getElementById('gen-template').value || null,
       width: parseInt(document.getElementById('gen-width').value) || 832,
       height: parseInt(document.getElementById('gen-height').value) || 1216,
       variables: variables,
+      loras: cloudLoraPath ? [{ name: cloudLoraPath, strength_model: cloudLoraStrength, strength_clip: cloudLoraStrength }] : [],
     };
     const endpoint = selectedBackend === 'cloud' ? '/api/generate/cloud' : '/api/generate';
   const preview = document.getElementById('preview-body');
   const startTime = Date.now();
+  for (let i = 0; i < 600; i++) {  // Up to 30 minutes
     await new Promise(r => setTimeout(r, 3000));
     try {
   const preview = document.getElementById('preview-body');
   preview.innerHTML = `
     <div style="text-align:center;width:100%">
+      <video id="preview-video" src="/api/video/${job.filename}" autoplay loop controls playsinline
              style="max-width:100%;max-height:70vh;border-radius:8px;margin-bottom:12px"></video>
+      <div style="display:flex;gap:8px;justify-content:center;flex-wrap:wrap;margin-bottom:8px">
         <span class="tag" style="background:var(--accent);color:white">Video</span>
         <span class="tag" style="background:var(--bg-hover)">${job.num_frames} frames</span>
         <span class="tag" style="background:var(--bg-hover)">${job.fps} fps</span>
+        <button id="audio-toggle-btn" onclick="toggleVideoAudio()" style="padding:4px 12px;border-radius:6px;border:1px solid var(--border);background:var(--bg-secondary);color:var(--text-primary);cursor:pointer;font-size:12px">🔇 Unmute</button>
       </div>
+      <p style="color:var(--text-secondary);margin-top:4px;font-size:12px">Seed: ${job.seed || 'N/A'}</p>
       <a href="/api/video/${job.filename}" download class="btn btn-secondary" style="margin-top:12px">Download Video</a>
     </div>
   `;
 }
+function toggleVideoAudio() {
+  const video = document.getElementById('preview-video');
+  const btn = document.getElementById('audio-toggle-btn');
+  if (!video) return;
+  video.muted = !video.muted;
+  btn.textContent = video.muted ? '🔇 Unmute' : '🔊 Mute';
+}
 // --- Batch ---
 async function doBatch() {
   const btn = document.getElementById('batch-btn');
       break;
     }
   }
+  // Auto-select GPU for models that need specific hardware
+  const gpuSelect = document.getElementById('train-gpu-type');
+  if (gpuSelect) {
+    const modelType = model.model_type || '';
+    if (modelType === 'wan22') {
+      // WAN 2.2 needs A100 80GB
+      for (let opt of gpuSelect.options) {
+        if (opt.value.includes('A100-SXM4')) { opt.selected = true; break; }
+      }
+    } else if (modelType === 'flux2') {
+      // FLUX.2 needs 48GB+ — default to A6000
+      for (let opt of gpuSelect.options) {
+        if (opt.value.includes('A6000')) { opt.selected = true; break; }
+      }
+    }
+  }
+}
+async function preDownloadModels() {
+  const btn = document.getElementById('btn-predownload');
+  const status = document.getElementById('predownload-status');
+  const modelKey = document.getElementById('train-base-model').value;
+  const model = trainingModels[modelKey];
+  const modelType = model?.model_type || 'wan22';
+  btn.disabled = true;
+  status.textContent = 'Starting download pod...';
+  status.style.color = 'var(--blue)';
+  try {
+    const res = await fetch(API + '/api/pod/download-models', {
+      method: 'POST',
+      headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({model_type: modelType, gpu_type: 'NVIDIA GeForce RTX 3090'})
+    });
+    const data = await res.json();
+    if (!res.ok) { throw new Error(data.detail || 'Failed'); }
+    // Poll for progress
+    const poll = setInterval(async () => {
+      try {
+        const r = await fetch(API + '/api/pod/download-models/status');
+        const d = await r.json();
+        status.textContent = d.progress || d.status;
+        if (d.status === 'completed') {
+          clearInterval(poll);
+          status.style.color = 'var(--green)';
+          btn.disabled = false;
+          btn.textContent = 'Models downloaded!';
+        } else if (d.status === 'failed') {
+          clearInterval(poll);
+          status.style.color = 'var(--red)';
+          status.textContent = 'Failed: ' + (d.error || 'unknown');
+          btn.disabled = false;
+        }
+      } catch(e) { /* ignore poll errors */ }
+    }, 5000);
+  } catch(e) {
+    status.textContent = 'Error: ' + e.message;
+    status.style.color = 'var(--red)';
+    btn.disabled = false;
+  }
 }
 function selectTrainBackend(chip, backend) {

src/content_engine/services/cloud_providers/wavespeed_provider.py CHANGED Viewed

@@ -27,17 +27,10 @@ import uuid
 from typing import Any
 import httpx
 from content_engine.services.cloud_providers.base import CloudGenerationResult, CloudProvider
-# Optional wavespeed SDK import
-try:
-    from wavespeed import Client as WaveSpeedClient
-    WAVESPEED_SDK_AVAILABLE = True
-except ImportError:
-    WaveSpeedClient = None
-    WAVESPEED_SDK_AVAILABLE = False
 logger = logging.getLogger(__name__)
 # Map friendly names to WaveSpeed model IDs (text-to-image)
@@ -53,6 +46,10 @@ MODEL_MAP = {
     # WAN (Alibaba)
     "wan-2.6": "alibaba/wan-2.6/text-to-image",
     "wan-2.5": "alibaba/wan-2.5/text-to-image",
     # Qwen (WaveSpeed)
     "qwen-image": "wavespeed-ai/qwen-image/text-to-image",
     # GPT Image (OpenAI)
@@ -96,6 +93,8 @@ VIDEO_MODEL_MAP = {
     "dreamina-i2v-720p": "bytedance/dreamina-v3.0/image-to-video-720p",
     # Sora (OpenAI)
     "sora-2": "openai/sora-2/image-to-video",
     # Vidu
     "vidu-q3": "vidu/q3-turbo/image-to-video",
     # Default
@@ -141,6 +140,8 @@ MULTI_REF_MODELS = {
     # SeeDream Sequential (up to 3 images for character consistency)
     "seedream-4.5-multi": "bytedance/seedream-v4.5/edit-sequential",
     "seedream-4-multi": "bytedance/seedream-v4/edit-sequential",
     # Kling O1 (up to 10 reference images)
     "kling-o1-multi": "kwaivgi/kling-o1/image-to-image",
     # Qwen Multi-Angle (multiple angles of same subject)
@@ -165,12 +166,7 @@ class WaveSpeedProvider(CloudProvider):
     def __init__(self, api_key: str):
         self._api_key = api_key
-        self._client = None
-        if WAVESPEED_SDK_AVAILABLE and WaveSpeedClient:
-            try:
-                self._client = WaveSpeedClient(api_key=api_key)
-            except Exception as e:
-                logger.warning("Failed to initialize WaveSpeed SDK: %s", e)
         self._http_client = httpx.AsyncClient(timeout=300)
     @property
@@ -390,29 +386,12 @@ class WaveSpeedProvider(CloudProvider):
         logger.info("Submitting to WaveSpeed model=%s", wavespeed_model)
         try:
-            if self._client:
-                # Use SDK if available
-                output = self._client.run(
-                    wavespeed_model,
-                    payload,
-                    timeout=300.0,
-                    poll_interval=2.0,
-                )
-            else:
-                # Fall back to direct HTTP API
-                endpoint = f"{WAVESPEED_API_BASE}/{wavespeed_model}"
-                payload["enable_sync_mode"] = True
-                resp = await self._http_client.post(
-                    endpoint,
-                    json=payload,
-                    headers={
-                        "Authorization": f"Bearer {self._api_key}",
-                        "Content-Type": "application/json",
-                    },
-                )
-                resp.raise_for_status()
-                output = resp.json()
             job_id = str(uuid.uuid4())
             self._last_result = {
                 "job_id": job_id,
@@ -613,26 +592,20 @@ class WaveSpeedProvider(CloudProvider):
     async def is_available(self) -> bool:
         """Check if WaveSpeed API is reachable with valid credentials."""
-        # Try SDK first if available
-        if self._client:
-            try:
-                self._client.run(
-                    "wavespeed-ai/z-image/turbo",
-                    {"prompt": "test"},
-                    enable_sync_mode=True,
-                    timeout=10.0,
-                )
-                return True
-            except Exception:
-                pass
-        # Fall back to HTTP health check
         try:
-            resp = await self._http_client.get(
-                "https://api.wavespeed.ai/api/v3/health",
-                headers={"Authorization": f"Bearer {self._api_key}"},
                 timeout=10.0,
             )
-            return resp.status_code < 500
         except Exception:
-            return False

 from typing import Any
 import httpx
+from wavespeed import Client as WaveSpeedClient
 from content_engine.services.cloud_providers.base import CloudGenerationResult, CloudProvider
 logger = logging.getLogger(__name__)
 # Map friendly names to WaveSpeed model IDs (text-to-image)
     # WAN (Alibaba)
     "wan-2.6": "alibaba/wan-2.6/text-to-image",
     "wan-2.5": "alibaba/wan-2.5/text-to-image",
+    # Z-Image (WaveSpeed) — supports LoRA, ultra fast
+    "z-image-turbo": "wavespeed-ai/z-image/turbo",
+    "z-image-turbo-lora": "wavespeed-ai/z-image/turbo-lora",
+    "z-image-base-lora": "wavespeed-ai/z-image/base-lora",
     # Qwen (WaveSpeed)
     "qwen-image": "wavespeed-ai/qwen-image/text-to-image",
     # GPT Image (OpenAI)
     "dreamina-i2v-720p": "bytedance/dreamina-v3.0/image-to-video-720p",
     # Sora (OpenAI)
     "sora-2": "openai/sora-2/image-to-video",
+    # Grok (xAI)
+    "grok-imagine-i2v": "x-ai/grok-imagine-video/image-to-video",
     # Vidu
     "vidu-q3": "vidu/q3-turbo/image-to-video",
     # Default
     # SeeDream Sequential (up to 3 images for character consistency)
     "seedream-4.5-multi": "bytedance/seedream-v4.5/edit-sequential",
     "seedream-4-multi": "bytedance/seedream-v4/edit-sequential",
+    # NanoBanana Pro (Google) - multi-reference edit
+    "nano-banana-pro-multi": "google/nano-banana-pro/edit",
     # Kling O1 (up to 10 reference images)
     "kling-o1-multi": "kwaivgi/kling-o1/image-to-image",
     # Qwen Multi-Angle (multiple angles of same subject)
     def __init__(self, api_key: str):
         self._api_key = api_key
+        self._client = WaveSpeedClient(api_key=api_key)
         self._http_client = httpx.AsyncClient(timeout=300)
     @property
         logger.info("Submitting to WaveSpeed model=%s", wavespeed_model)
         try:
+            output = self._client.run(
+                wavespeed_model,
+                payload,
+                timeout=300.0,
+                poll_interval=2.0,
+            )
             job_id = str(uuid.uuid4())
             self._last_result = {
                 "job_id": job_id,
     async def is_available(self) -> bool:
         """Check if WaveSpeed API is reachable with valid credentials."""
         try:
+            test = self._client.run(
+                "wavespeed-ai/z-image/turbo",
+                {"prompt": "test"},
+                enable_sync_mode=True,
                 timeout=10.0,
             )
+            return True
         except Exception:
+            try:
+                resp = await self._http_client.get(
+                    "https://api.wavespeed.ai/api/v3/health",
+                    headers={"Authorization": f"Bearer {self._api_key}"},
+                )
+                return resp.status_code < 500
+            except Exception:
+                return False

src/content_engine/services/runpod_trainer.py CHANGED Viewed

@@ -472,6 +472,58 @@ print('Downloaded ae.safetensors')
                 job._log("FLUX.2 Dev models ready")
             else:
                 # SD 1.5 / SDXL / FLUX.1 — download single model file
                 model_exists = (await self._ssh_exec(ssh, f"test -f /workspace/models/{hf_filename} && echo EXISTS || echo MISSING")).strip()
@@ -512,6 +564,8 @@ hf_hub_download('black-forest-labs/FLUX.1-dev', 'ae.safetensors', local_dir='/wo
             if model_type == "flux2":
                 model_path = f"/workspace/models/FLUX.2-dev/flux2-dev.safetensors"
             else:
                 model_path = f"/workspace/models/{hf_filename}"
@@ -529,47 +583,87 @@ resolution = [{resolution}, {resolution}]
                 job._log("Created dataset.toml config")
                 # musubi-tuner requires pre-caching latents and text encoder outputs
-                flux2_dir = "/workspace/models/FLUX.2-dev"
-                vae_path = f"{flux2_dir}/ae.safetensors"
-                te_path = f"{flux2_dir}/text_encoder/model-00001-of-00010.safetensors"
-                job._log("Caching latents (VAE encoding)...")
-                job.progress = 0.15
-                self._schedule_db_save(job)
-                cache_latents_cmd = (
-                    f"cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python src/musubi_tuner/flux_2_cache_latents.py"
-                    f" --dataset_config /workspace/dataset.toml"
-                    f" --vae {vae_path}"
-                    f" --model_version dev"
-                    f" --vae_dtype bfloat16"
-                    f" 2>&1 | tee /tmp/cache_latents.log; echo EXIT_CODE=${{PIPESTATUS[0]}}"
-                )
-                out = await self._ssh_exec(ssh, cache_latents_cmd, timeout=600)
-                # Get last lines which have the real error
-                last_lines = out.split('\n')[-30:]
-                job._log('\n'.join(last_lines))
-                if "EXIT_CODE=0" not in out:
-                    # Fetch the full error log
-                    err_log = await self._ssh_exec(ssh, "grep -i 'error\\|exception\\|traceback\\|failed' /tmp/cache_latents.log | tail -10")
-                    job._log(f"Cache error details: {err_log}")
-                    raise RuntimeError(f"Latent caching failed")
-                job._log("Caching text encoder outputs (bf16)...")
-                job.progress = 0.25
-                self._schedule_db_save(job)
-                cache_te_cmd = (
-                    f"cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"
-                    f" python src/musubi_tuner/flux_2_cache_text_encoder_outputs.py"
-                    f" --dataset_config /workspace/dataset.toml"
-                    f" --text_encoder {te_path}"
-                    f" --model_version dev"
-                    f" --batch_size 1"
-                    f" 2>&1; echo EXIT_CODE=$?"
-                )
-                out = await self._ssh_exec(ssh, cache_te_cmd, timeout=600)
-                job._log(out[-500:] if out else "done")
-                if "EXIT_CODE=0" not in out:
-                    raise RuntimeError(f"Text encoder caching failed: {out[-200:]}")
             # Build training command based on model type
             train_cmd = self._build_training_command(
@@ -689,6 +783,16 @@ resolution = [{resolution}, {resolution}]
             await self._ssh_exec(ssh, f"cp {remote_output} /runpod-volume/loras/{name}.safetensors")
             job._log(f"LoRA saved to volume: /runpod-volume/loras/{name}.safetensors")
             # Download locally (skip on HF Spaces — limited storage)
             if IS_HF_SPACES:
                 job.output_path = f"/runpod-volume/loras/{name}.safetensors"
@@ -1098,6 +1202,48 @@ resolution = [{resolution}, {resolution}]
             return " ".join(args) + " 2>&1"
         elif model_type == "flux":
             # FLUX.1 training via sd-scripts
             script = "flux_train_network.py"

                 job._log("FLUX.2 Dev models ready")
+            elif model_type == "wan22":
+                # WAN 2.2 T2V — 4 model files stored in /workspace/models/WAN2.2/
+                wan_dir = "/workspace/models/WAN2.2"
+                await self._ssh_exec(ssh, f"mkdir -p {wan_dir}")
+                wan_files = {
+                    "DiT low-noise": {
+                        "path": f"{wan_dir}/wan2.2_t2v_low_noise_14B_fp16.safetensors",
+                        "repo": "Comfy-Org/Wan_2.2_ComfyUI_Repackaged",
+                        "filename": "split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp16.safetensors",
+                    },
+                    "DiT high-noise": {
+                        "path": f"{wan_dir}/wan2.2_t2v_high_noise_14B_fp16.safetensors",
+                        "repo": "Comfy-Org/Wan_2.2_ComfyUI_Repackaged",
+                        "filename": "split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp16.safetensors",
+                    },
+                    "VAE": {
+                        "path": f"{wan_dir}/Wan2.1_VAE.pth",
+                        "repo": "Wan-AI/Wan2.1-I2V-14B-720P",
+                        "filename": "Wan2.1_VAE.pth",
+                    },
+                    "T5 text encoder": {
+                        "path": f"{wan_dir}/models_t5_umt5-xxl-enc-bf16.pth",
+                        "repo": "Wan-AI/Wan2.1-I2V-14B-720P",
+                        "filename": "models_t5_umt5-xxl-enc-bf16.pth",
+                    },
+                }
+                for label, info in wan_files.items():
+                    exists = (await self._ssh_exec(ssh, f"test -f {info['path']} && echo EXISTS || echo MISSING")).strip()
+                    if exists == "EXISTS":
+                        job._log(f"WAN 2.2 {label} already cached")
+                    else:
+                        job._log(f"Downloading WAN 2.2 {label}...")
+                        await self._ssh_exec(ssh, f"""python -c "
+from huggingface_hub import hf_hub_download
+hf_hub_download('{info['repo']}', '{info['filename']}', local_dir='{wan_dir}')
+# hf_hub_download puts files in subdirs matching the filename path — move to root
+import os, shutil
+downloaded = os.path.join('{wan_dir}', '{info['filename']}')
+target = '{info['path']}'
+if os.path.exists(downloaded) and downloaded != target:
+    shutil.move(downloaded, target)
+print('Downloaded {label}')
+" 2>&1 | tail -5""", timeout=1800)
+                        # Verify
+                        check = (await self._ssh_exec(ssh, f"test -f {info['path']} && echo EXISTS || echo MISSING")).strip()
+                        if check != "EXISTS":
+                            raise RuntimeError(f"Failed to download WAN 2.2 {label}")
+                job._log("WAN 2.2 models ready")
             else:
                 # SD 1.5 / SDXL / FLUX.1 — download single model file
                 model_exists = (await self._ssh_exec(ssh, f"test -f /workspace/models/{hf_filename} && echo EXISTS || echo MISSING")).strip()
             if model_type == "flux2":
                 model_path = f"/workspace/models/FLUX.2-dev/flux2-dev.safetensors"
+            elif model_type == "wan22":
+                model_path = "/workspace/models/WAN2.2/wan2.2_t2v_low_noise_14B_fp16.safetensors"
             else:
                 model_path = f"/workspace/models/{hf_filename}"
                 job._log("Created dataset.toml config")
                 # musubi-tuner requires pre-caching latents and text encoder outputs
+                if model_type == "wan22":
+                    wan_dir = "/workspace/models/WAN2.2"
+                    vae_path = f"{wan_dir}/Wan2.1_VAE.pth"
+                    te_path = f"{wan_dir}/models_t5_umt5-xxl-enc-bf16.pth"
+                    job._log("Caching WAN 2.2 latents (VAE encoding)...")
+                    job.progress = 0.15
+                    self._schedule_db_save(job)
+                    cache_latents_cmd = (
+                        f"cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"
+                        f" python src/musubi_tuner/wan_cache_latents.py"
+                        f" --dataset_config /workspace/dataset.toml"
+                        f" --vae {vae_path}"
+                        f" --vae_dtype bfloat16"
+                        f" 2>&1 | tee /tmp/cache_latents.log; echo EXIT_CODE=${{PIPESTATUS[0]}}"
+                    )
+                    out = await self._ssh_exec(ssh, cache_latents_cmd, timeout=600)
+                    last_lines = out.split('\n')[-30:]
+                    job._log('\n'.join(last_lines))
+                    if "EXIT_CODE=0" not in out:
+                        err_log = await self._ssh_exec(ssh, "grep -i 'error\\|exception\\|traceback\\|failed' /tmp/cache_latents.log | tail -10")
+                        job._log(f"Cache error details: {err_log}")
+                        raise RuntimeError(f"WAN latent caching failed")
+                    job._log("Caching WAN 2.2 text encoder outputs (T5)...")
+                    job.progress = 0.25
+                    self._schedule_db_save(job)
+                    cache_te_cmd = (
+                        f"cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"
+                        f" python src/musubi_tuner/wan_cache_text_encoder_outputs.py"
+                        f" --dataset_config /workspace/dataset.toml"
+                        f" --t5 {te_path}"
+                        f" --batch_size 16"
+                        f" 2>&1; echo EXIT_CODE=$?"
+                    )
+                    out = await self._ssh_exec(ssh, cache_te_cmd, timeout=600)
+                    job._log(out[-500:] if out else "done")
+                    if "EXIT_CODE=0" not in out:
+                        raise RuntimeError(f"WAN text encoder caching failed: {out[-200:]}")
+                else:
+                    # FLUX.2 caching
+                    flux2_dir = "/workspace/models/FLUX.2-dev"
+                    vae_path = f"{flux2_dir}/ae.safetensors"
+                    te_path = f"{flux2_dir}/text_encoder/model-00001-of-00010.safetensors"
+                    job._log("Caching latents (VAE encoding)...")
+                    job.progress = 0.15
+                    self._schedule_db_save(job)
+                    cache_latents_cmd = (
+                        f"cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True python src/musubi_tuner/flux_2_cache_latents.py"
+                        f" --dataset_config /workspace/dataset.toml"
+                        f" --vae {vae_path}"
+                        f" --model_version dev"
+                        f" --vae_dtype bfloat16"
+                        f" 2>&1 | tee /tmp/cache_latents.log; echo EXIT_CODE=${{PIPESTATUS[0]}}"
+                    )
+                    out = await self._ssh_exec(ssh, cache_latents_cmd, timeout=600)
+                    last_lines = out.split('\n')[-30:]
+                    job._log('\n'.join(last_lines))
+                    if "EXIT_CODE=0" not in out:
+                        err_log = await self._ssh_exec(ssh, "grep -i 'error\\|exception\\|traceback\\|failed' /tmp/cache_latents.log | tail -10")
+                        job._log(f"Cache error details: {err_log}")
+                        raise RuntimeError(f"Latent caching failed")
+                    job._log("Caching text encoder outputs (bf16)...")
+                    job.progress = 0.25
+                    self._schedule_db_save(job)
+                    cache_te_cmd = (
+                        f"cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True"
+                        f" python src/musubi_tuner/flux_2_cache_text_encoder_outputs.py"
+                        f" --dataset_config /workspace/dataset.toml"
+                        f" --text_encoder {te_path}"
+                        f" --model_version dev"
+                        f" --batch_size 1"
+                        f" 2>&1; echo EXIT_CODE=$?"
+                    )
+                    out = await self._ssh_exec(ssh, cache_te_cmd, timeout=600)
+                    job._log(out[-500:] if out else "done")
+                    if "EXIT_CODE=0" not in out:
+                        raise RuntimeError(f"Text encoder caching failed: {out[-200:]}")
             # Build training command based on model type
             train_cmd = self._build_training_command(
             await self._ssh_exec(ssh, f"cp {remote_output} /runpod-volume/loras/{name}.safetensors")
             job._log(f"LoRA saved to volume: /runpod-volume/loras/{name}.safetensors")
+            # Also save intermediate checkpoints (step 500, 1000, 1500, etc.)
+            checkpoint_files = (await self._ssh_exec(ssh, f"ls /workspace/output/{name}-step*.safetensors 2>/dev/null")).strip()
+            if checkpoint_files:
+                for ckpt in checkpoint_files.split("\n"):
+                    ckpt = ckpt.strip()
+                    if ckpt:
+                        ckpt_name = ckpt.split("/")[-1]
+                        await self._ssh_exec(ssh, f"cp {ckpt} /runpod-volume/loras/{ckpt_name}")
+                        job._log(f"Checkpoint saved: /runpod-volume/loras/{ckpt_name}")
             # Download locally (skip on HF Spaces — limited storage)
             if IS_HF_SPACES:
                 job.output_path = f"/runpod-volume/loras/{name}.safetensors"
             return " ".join(args) + " 2>&1"
+        elif model_type == "wan22":
+            # WAN 2.2 T2V LoRA training via musubi-tuner
+            wan_dir = "/workspace/models/WAN2.2"
+            dit_low = f"{wan_dir}/wan2.2_t2v_low_noise_14B_fp16.safetensors"
+            dit_high = f"{wan_dir}/wan2.2_t2v_high_noise_14B_fp16.safetensors"
+            network_mod = model_cfg.get("network_module", "networks.lora_wan")
+            ts_sampling = model_cfg.get("timestep_sampling", "shift")
+            discrete_shift = model_cfg.get("discrete_flow_shift", 5.0)
+            args = [
+                "cd /workspace/musubi-tuner && PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True",
+                "accelerate launch --num_cpu_threads_per_process 1 --mixed_precision fp16",
+                "src/musubi_tuner/wan_train_network.py",
+                "--task t2v-A14B",
+                f"--dit {dit_low}",
+                f"--dit_high_noise {dit_high}",
+                "--dataset_config /workspace/dataset.toml",
+                "--sdpa --mixed_precision fp16",
+                "--gradient_checkpointing",
+                f"--timestep_sampling {ts_sampling}",
+                f"--discrete_flow_shift {discrete_shift}",
+                f"--network_module {network_mod}",
+                f"--network_dim={network_rank}",
+                f"--network_alpha={network_alpha}",
+                f"--optimizer_type={optimizer}",
+                f"--learning_rate={learning_rate}",
+                "--seed=42",
+                "--output_dir=/workspace/output",
+                f"--output_name={name}",
+            ]
+            if max_train_steps:
+                args.append(f"--max_train_steps={max_train_steps}")
+                if save_every_n_steps:
+                    args.append(f"--save_every_n_steps={save_every_n_steps}")
+            else:
+                args.append(f"--max_train_epochs={num_epochs}")
+                args.append(f"--save_every_n_epochs={save_every_n_epochs}")
+            return " ".join(args) + " 2>&1"
         elif model_type == "flux":
             # FLUX.1 training via sd-scripts
             script = "flux_train_network.py"