Spaces:

dippoo
/

content-engine

Running

dippoo Claude Opus 4.5 commited on Feb 17

Commit

7b5f7c4

1 Parent(s): 4b21f3a

Add video model selector with cloud/pod backend support

- Added video model dropdown for img2video mode (WAN I2V, Kling, Veo, Seedance, Sora)
- Added backend selector for video generation (Cloud API vs RunPod)
- Added cloud video generation endpoint using WaveSpeed API
- Added more text-to-image models (FLUX Pro, WAN 2.6, Dreamina, Qwen)
- Added more video models (Kling O1/O3/Pro, Veo 3/3.1, Sora 2, Seedance)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (4) hide show

src/content_engine/api/routes_video.py +172 -1
src/content_engine/api/ui.html +99 -18
src/content_engine/main.py +4 -0
src/content_engine/services/cloud_providers/wavespeed_provider.py +42 -9

src/content_engine/api/routes_video.py CHANGED Viewed

@@ -1,8 +1,9 @@
-"""Video generation routes — WAN 2.2 img2video on RunPod pod."""
 from __future__ import annotations
 import asyncio
 import logging
 import os
 import time
@@ -20,6 +21,14 @@ router = APIRouter(prefix="/api/video", tags=["video"])
 # Video jobs tracking
 _video_jobs: dict[str, dict] = {}
 # Pod state is shared from routes_pod
 def _get_pod_state():
     from content_engine.api.routes_pod import _pod_state
@@ -127,6 +136,168 @@ async def generate_video(
         raise HTTPException(500, f"Generation failed: {e}")
 async def _poll_video_job(job_id: str, prompt_id: str):
     """Poll ComfyUI for video job completion."""
     import httpx

+"""Video generation routes — WAN 2.2 img2video on RunPod pod or WaveSpeed cloud."""
 from __future__ import annotations
 import asyncio
+import base64
 import logging
 import os
 import time
 # Video jobs tracking
 _video_jobs: dict[str, dict] = {}
+# WaveSpeed provider (initialized from main.py)
+_wavespeed_provider = None
+def init_wavespeed(provider):
+    """Initialize WaveSpeed provider for cloud video generation."""
+    global _wavespeed_provider
+    _wavespeed_provider = provider
 # Pod state is shared from routes_pod
 def _get_pod_state():
     from content_engine.api.routes_pod import _pod_state
         raise HTTPException(500, f"Generation failed: {e}")
+@router.post("/generate/cloud")
+async def generate_video_cloud(
+    image: UploadFile = File(...),
+    prompt: str = Form("smooth motion, high quality video"),
+    negative_prompt: str = Form(""),
+    model: str = Form("wan-2.6-i2v"),
+    num_frames: int = Form(81),
+    fps: int = Form(24),
+    seed: int = Form(-1),
+):
+    """Generate a video using WaveSpeed cloud API (Kling, WAN I2V, Veo, etc)."""
+    import random
+    import httpx
+    if not _wavespeed_provider:
+        raise HTTPException(500, "WaveSpeed API not configured")
+    job_id = str(uuid.uuid4())[:8]
+    seed = seed if seed >= 0 else random.randint(0, 2**32 - 1)
+    # Read the image
+    image_bytes = await image.read()
+    image_b64 = base64.b64encode(image_bytes).decode("utf-8")
+    # Create job entry
+    _video_jobs[job_id] = {
+        "status": "running",
+        "seed": seed,
+        "started_at": time.time(),
+        "num_frames": num_frames,
+        "fps": fps,
+        "model": model,
+        "backend": "cloud",
+    }
+    logger.info("Cloud video generation started: %s (model=%s)", job_id, model)
+    # Start background task for cloud video generation
+    asyncio.create_task(_generate_cloud_video(job_id, image_bytes, prompt, negative_prompt, model, seed))
+    return {
+        "job_id": job_id,
+        "status": "running",
+        "seed": seed,
+        "model": model,
+        "estimated_time": "~30-120 seconds",
+    }
+async def _generate_cloud_video(
+    job_id: str,
+    image_bytes: bytes,
+    prompt: str,
+    negative_prompt: str,
+    model: str,
+    seed: int,
+):
+    """Background task to generate video via WaveSpeed cloud API."""
+    import httpx
+    import aiohttp
+    try:
+        # Upload image to temporary hosting (WaveSpeed needs URL)
+        image_url = await _wavespeed_provider._upload_temp_image(image_bytes)
+        # Resolve model to WaveSpeed model ID
+        from content_engine.services.cloud_providers.wavespeed_provider import VIDEO_MODEL_MAP
+        wavespeed_model = VIDEO_MODEL_MAP.get(model, VIDEO_MODEL_MAP.get("default", "alibaba/wan-2.6-i2v-720p"))
+        # Call WaveSpeed video API
+        api_key = _wavespeed_provider._api_key
+        endpoint = f"https://api.wavespeed.ai/api/v3/{wavespeed_model}"
+        payload = {
+            "image": image_url,
+            "prompt": prompt,
+            "enable_sync_mode": True,
+        }
+        if negative_prompt:
+            payload["negative_prompt"] = negative_prompt
+        async with httpx.AsyncClient(timeout=300) as client:
+            resp = await client.post(
+                endpoint,
+                json=payload,
+                headers={
+                    "Authorization": f"Bearer {api_key}",
+                    "Content-Type": "application/json",
+                },
+            )
+            if resp.status_code != 200:
+                error_text = resp.text[:500]
+                logger.error("WaveSpeed video API error: %s", error_text)
+                _video_jobs[job_id]["status"] = "failed"
+                _video_jobs[job_id]["error"] = f"API error: {error_text[:200]}"
+                return
+            result = resp.json()
+            data = result.get("data", result)
+            # Check for failed status
+            if data.get("status") == "failed":
+                error_msg = data.get("error", "Unknown error")
+                _video_jobs[job_id]["status"] = "failed"
+                _video_jobs[job_id]["error"] = error_msg
+                return
+            # Extract video URL
+            video_url = None
+            outputs = data.get("outputs", [])
+            if outputs:
+                video_url = outputs[0]
+            elif "output" in data:
+                out = data["output"]
+                if isinstance(out, list) and out:
+                    video_url = out[0]
+                elif isinstance(out, str):
+                    video_url = out
+            if not video_url:
+                _video_jobs[job_id]["status"] = "failed"
+                _video_jobs[job_id]["error"] = "No video URL in response"
+                return
+            # Download the video
+            logger.info("Downloading cloud video: %s", video_url[:80])
+            video_resp = await client.get(video_url)
+            if video_resp.status_code != 200:
+                _video_jobs[job_id]["status"] = "failed"
+                _video_jobs[job_id]["error"] = "Failed to download video"
+                return
+            # Save to local output directory
+            from content_engine.config import settings
+            output_dir = settings.paths.output_dir / "videos"
+            output_dir.mkdir(parents=True, exist_ok=True)
+            # Determine extension from URL or default to mp4
+            ext = ".mp4"
+            if video_url.endswith(".webm"):
+                ext = ".webm"
+            elif video_url.endswith(".webp"):
+                ext = ".webp"
+            local_path = output_dir / f"video_{job_id}{ext}"
+            local_path.write_bytes(video_resp.content)
+            _video_jobs[job_id]["status"] = "completed"
+            _video_jobs[job_id]["output_path"] = str(local_path)
+            _video_jobs[job_id]["completed_at"] = time.time()
+            _video_jobs[job_id]["filename"] = local_path.name
+            logger.info("Cloud video saved: %s", local_path)
+    except Exception as e:
+        logger.error("Cloud video generation failed: %s", e)
+        _video_jobs[job_id]["status"] = "failed"
+        _video_jobs[job_id]["error"] = str(e)
 async def _poll_video_job(job_id: str, prompt_id: str):
     """Poll ComfyUI for video job completion."""
     import httpx

src/content_engine/api/ui.html CHANGED Viewed

@@ -838,11 +838,28 @@ select { cursor: pointer; }
           <div id="cloud-model-select" style="display:none">
             <label>Cloud Model (Text-to-Image)</label>
             <select id="gen-cloud-model">
-              <option value="seedream-4.5">SeeDream v4.5 (Best Quality)</option>
-              <option value="nano-banana-pro">NanoBanana Pro</option>
-              <option value="nano-banana">NanoBanana</option>
-              <option value="seedream-4">SeeDream v4</option>
-              <option value="seedream-3.1">SeeDream v3.1</option>
             </select>
           </div>
@@ -891,7 +908,39 @@ select { cursor: pointer; }
               <img id="video-preview-img" style="max-width:100%; border-radius:6px">
             </div>
             <div class="section-title" style="margin-top:12px">Video Settings</div>
-            <div style="display:grid; grid-template-columns:1fr 1fr; gap:8px">
               <div>
                 <label>Duration</label>
                 <select id="video-duration">
@@ -908,8 +957,8 @@ select { cursor: pointer; }
                 </select>
               </div>
             </div>
-            <div style="font-size:11px;color:var(--text-secondary);margin-top:8px">
-              Uses WAN 2.2 I2V on RunPod. Longer videos take more time (~2 sec per frame).
             </div>
           </div>
@@ -1244,7 +1293,11 @@ select { cursor: pointer; }
               <span style="color:var(--text-secondary)">Checking...</span>
             </div>
           </div>
-          <div id="pod-controls" style="display:flex; gap:8px; align-items:center">
             <select id="pod-gpu-select" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
               <option value="NVIDIA GeForce RTX 4090">RTX 4090 - $0.44/hr (24GB)</option>
               <option value="NVIDIA RTX A6000">RTX A6000 - $0.76/hr (48GB)</option>
@@ -1388,6 +1441,7 @@ const API = '';  // same origin
 let currentPage = 'generate';
 let selectedRating = 'sfw';
 let selectedBackend = 'pod';
 let selectedMode = 'txt2img';
 let templatesData = [];
 let charactersData = [];
@@ -1633,11 +1687,10 @@ function selectMode(chip, mode) {
   selectedMode = mode;
   document.getElementById('img2img-section').style.display = mode === 'img2img' ? '' : 'none';
   document.getElementById('img2video-section').style.display = mode === 'img2video' ? '' : 'none';
-  // For video mode, force pod backend
-  if (mode === 'img2video') {
-    document.querySelectorAll('#backend-chips .chip').forEach(c => c.classList.remove('selected'));
-    document.querySelector('#backend-chips .chip:nth-child(2)').classList.add('selected');
-    selectedBackend = 'pod';
   }
   // Update generate button text
   const btn = document.getElementById('generate-btn');
@@ -1753,6 +1806,24 @@ function selectBackend(chip, backend) {
   updateCloudModelVisibility();
 }
 function updateCloudModelVisibility() {
   const isCloud = selectedBackend === 'cloud';
   const isPod = selectedBackend === 'pod';
@@ -1844,12 +1915,20 @@ async function doGenerate() {
       formData.append('num_frames', document.getElementById('video-duration').value || '81');
       formData.append('fps', document.getElementById('video-fps').value || '24');
       formData.append('seed', document.getElementById('gen-seed').value || '-1');
-      const res = await fetch(API + '/api/video/generate', { method: 'POST', body: formData });
       const data = await res.json();
       if (!res.ok) throw new Error(data.detail || 'Video generation failed');
-      toast('Video generating on RunPod... This takes 1-3 minutes', 'info');
       await pollForVideo(data.job_id);
       return;
     }
@@ -2734,6 +2813,7 @@ function updatePodUI(pod) {
 async function startPod() {
   const gpuType = document.getElementById('pod-gpu-select').value;
   const btn = document.getElementById('pod-start-btn');
   btn.disabled = true;
   btn.textContent = 'Starting...';
@@ -2742,7 +2822,7 @@ async function startPod() {
     const res = await fetch(API + '/api/pod/start', {
       method: 'POST',
       headers: {'Content-Type': 'application/json'},
-      body: JSON.stringify({gpu_type: gpuType})
     });
     const data = await res.json();
@@ -2750,7 +2830,8 @@ async function startPod() {
       throw new Error(data.detail || 'Failed to start pod');
     }
-    toast('Starting GPU pod... This takes 2-3 minutes', 'info');
     loadPodStatus();
   } catch(e) {
     toast('Failed to start pod: ' + e.message, 'error');

           <div id="cloud-model-select" style="display:none">
             <label>Cloud Model (Text-to-Image)</label>
             <select id="gen-cloud-model">
+              <optgroup label="SeeDream (ByteDance)">
+                <option value="seedream-4.5">SeeDream v4.5 (Best Quality)</option>
+                <option value="seedream-4">SeeDream v4</option>
+                <option value="seedream-3.1">SeeDream v3.1</option>
+              </optgroup>
+              <optgroup label="NanoBanana (Google)">
+                <option value="nano-banana-pro">NanoBanana Pro</option>
+                <option value="nano-banana">NanoBanana</option>
+              </optgroup>
+              <optgroup label="FLUX (Black Forest Labs)">
+                <option value="flux-pro">FLUX Pro (Highest Quality)</option>
+                <option value="flux-dev">FLUX Dev</option>
+                <option value="flux-schnell">FLUX Schnell (Fast)</option>
+              </optgroup>
+              <optgroup label="WAN (Alibaba)">
+                <option value="wan-2.6">WAN 2.6 (Latest)</option>
+                <option value="wan-2.2">WAN 2.2</option>
+              </optgroup>
+              <optgroup label="Other">
+                <option value="dreamina-3.1">Dreamina v3.1</option>
+                <option value="qwen-image">Qwen Image</option>
+              </optgroup>
             </select>
           </div>
               <img id="video-preview-img" style="max-width:100%; border-radius:6px">
             </div>
             <div class="section-title" style="margin-top:12px">Video Settings</div>
+            <div style="margin-bottom:8px">
+              <label>Backend</label>
+              <div id="video-backend-chips" class="chips" style="margin-top:4px">
+                <div class="chip selected" onclick="selectVideoBackend(this, 'cloud')">Cloud API (WaveSpeed)</div>
+                <div class="chip" onclick="selectVideoBackend(this, 'pod')">RunPod (WAN 2.2)</div>
+              </div>
+            </div>
+            <div id="video-cloud-model-select">
+              <label>Video Model</label>
+              <select id="video-cloud-model">
+                <optgroup label="WAN I2V (Alibaba)">
+                  <option value="wan-2.6-i2v" selected>WAN 2.6 I2V 720p (Best)</option>
+                  <option value="wan-2.2-i2v">WAN 2.2 I2V 480p</option>
+                </optgroup>
+                <optgroup label="Kling (Kuaishou)">
+                  <option value="kling-o3-pro">Kling O3 Pro (Highest Quality)</option>
+                  <option value="kling-o3">Kling O3</option>
+                  <option value="kling-o1">Kling O1</option>
+                </optgroup>
+                <optgroup label="Seedance (ByteDance)">
+                  <option value="seedance-1.5-pro">Seedance 1.5 Pro</option>
+                  <option value="seedance-1.5">Seedance 1.5</option>
+                </optgroup>
+                <optgroup label="Veo (Google)">
+                  <option value="veo-3.1">Veo 3.1 (Latest)</option>
+                  <option value="veo-3">Veo 3</option>
+                </optgroup>
+                <optgroup label="Sora (OpenAI)">
+                  <option value="sora-2">Sora 2</option>
+                </optgroup>
+              </select>
+            </div>
+            <div style="display:grid; grid-template-columns:1fr 1fr; gap:8px; margin-top:8px">
               <div>
                 <label>Duration</label>
                 <select id="video-duration">
                 </select>
               </div>
             </div>
+            <div id="video-note" style="font-size:11px;color:var(--text-secondary);margin-top:8px">
+              Cloud API: Fast generation via WaveSpeed. RunPod: Uses WAN 2.2 I2V (~2 sec per frame).
             </div>
           </div>
               <span style="color:var(--text-secondary)">Checking...</span>
             </div>
           </div>
+          <div id="pod-controls" style="display:flex; gap:8px; align-items:center; flex-wrap:wrap">
+            <select id="pod-model-type" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
+              <option value="flux">FLUX.2 (Realistic)</option>
+              <option value="wan">WAN 2.2 (General/Anime)</option>
+            </select>
             <select id="pod-gpu-select" style="padding:8px 12px; border-radius:6px; background:var(--bg-primary); border:1px solid var(--border); color:var(--text-primary)">
               <option value="NVIDIA GeForce RTX 4090">RTX 4090 - $0.44/hr (24GB)</option>
               <option value="NVIDIA RTX A6000">RTX A6000 - $0.76/hr (48GB)</option>
 let currentPage = 'generate';
 let selectedRating = 'sfw';
 let selectedBackend = 'pod';
+let selectedVideoBackend = 'cloud';
 let selectedMode = 'txt2img';
 let templatesData = [];
 let charactersData = [];
   selectedMode = mode;
   document.getElementById('img2img-section').style.display = mode === 'img2img' ? '' : 'none';
   document.getElementById('img2video-section').style.display = mode === 'img2video' ? '' : 'none';
+  // Hide regular backend chips for video mode (video has its own backend selector)
+  const backendSection = document.getElementById('backend-chips').parentElement;
+  if (backendSection) {
+    backendSection.style.display = mode === 'img2video' ? 'none' : '';
   }
   // Update generate button text
   const btn = document.getElementById('generate-btn');
   updateCloudModelVisibility();
 }
+function selectVideoBackend(chip, backend) {
+  chip.parentElement.querySelectorAll('.chip').forEach(c => c.classList.remove('selected'));
+  chip.classList.add('selected');
+  selectedVideoBackend = backend;
+  // Show/hide video model dropdown based on backend
+  const videoModelSelect = document.getElementById('video-cloud-model-select');
+  if (videoModelSelect) {
+    videoModelSelect.style.display = backend === 'cloud' ? '' : 'none';
+  }
+  // Update note
+  const videoNote = document.getElementById('video-note');
+  if (videoNote) {
+    videoNote.textContent = backend === 'cloud'
+      ? 'Cloud API: Fast generation via WaveSpeed. Pay per video.'
+      : 'RunPod: Uses WAN 2.2 I2V on your pod (~2 sec per frame).';
+  }
+}
 function updateCloudModelVisibility() {
   const isCloud = selectedBackend === 'cloud';
   const isPod = selectedBackend === 'pod';
       formData.append('num_frames', document.getElementById('video-duration').value || '81');
       formData.append('fps', document.getElementById('video-fps').value || '24');
       formData.append('seed', document.getElementById('gen-seed').value || '-1');
+      formData.append('backend', selectedVideoBackend);
+      // Add video model for cloud backend
+      if (selectedVideoBackend === 'cloud') {
+        formData.append('model', document.getElementById('video-cloud-model').value);
+      }
+      const endpoint = selectedVideoBackend === 'cloud' ? '/api/video/generate/cloud' : '/api/video/generate';
+      const res = await fetch(API + endpoint, { method: 'POST', body: formData });
       const data = await res.json();
       if (!res.ok) throw new Error(data.detail || 'Video generation failed');
+      const backendLabel = selectedVideoBackend === 'cloud' ? 'Cloud API' : 'RunPod';
+      toast(`Video generating via ${backendLabel}...`, 'info');
       await pollForVideo(data.job_id);
       return;
     }
 async function startPod() {
   const gpuType = document.getElementById('pod-gpu-select').value;
+  const modelType = document.getElementById('pod-model-type').value;
   const btn = document.getElementById('pod-start-btn');
   btn.disabled = true;
   btn.textContent = 'Starting...';
     const res = await fetch(API + '/api/pod/start', {
       method: 'POST',
       headers: {'Content-Type': 'application/json'},
+      body: JSON.stringify({gpu_type: gpuType, model_type: modelType})
     });
     const data = await res.json();
       throw new Error(data.detail || 'Failed to start pod');
     }
+    const modelName = modelType === 'wan' ? 'WAN 2.2' : 'FLUX.2';
+    toast(`Starting ${modelName} pod... This takes 3-5 minutes`, 'info');
     loadPodStatus();
   } catch(e) {
     toast('Failed to start pod: ' + e.message, 'error');

src/content_engine/main.py CHANGED Viewed

@@ -138,6 +138,10 @@ async def lifespan(app: FastAPI):
     routes_catalog.init_routes(catalog)
     routes_system.init_routes(comfyui_client, catalog, template_engine, character_profiles)
     # Initialize LoRA trainer (local)
     from content_engine.services.lora_trainer import LoRATrainer
     lora_trainer = LoRATrainer()

     routes_catalog.init_routes(catalog)
     routes_system.init_routes(comfyui_client, catalog, template_engine, character_profiles)
+    # Initialize video routes with WaveSpeed provider for cloud video generation
+    if wavespeed_provider:
+        routes_video.init_wavespeed(wavespeed_provider)
     # Initialize LoRA trainer (local)
     from content_engine.services.lora_trainer import LoRATrainer
     lora_trainer = LoRATrainer()

src/content_engine/services/cloud_providers/wavespeed_provider.py CHANGED Viewed

@@ -35,16 +35,49 @@ logger = logging.getLogger(__name__)
 # Map friendly names to WaveSpeed model IDs (text-to-image)
 MODEL_MAP = {
-    # NanoBanana
-    "nano-banana": "google-nano-banana-text-to-image",
-    "nano-banana-pro": "google-nano-banana-pro-text-to-image",
-    # SeeDream
-    "seedream-3": "bytedance-seedream-v3",
-    "seedream-3.1": "bytedance-seedream-v3.1",
-    "seedream-4": "bytedance-seedream-v4",
-    "seedream-4.5": "bytedance-seedream-v4.5",
     # Default
-    "default": "bytedance-seedream-v4.5",
 }
 # Map friendly names to WaveSpeed edit model API paths

 # Map friendly names to WaveSpeed model IDs (text-to-image)
 MODEL_MAP = {
+    # NanoBanana (Google)
+    "nano-banana": "google/nano-banana",
+    "nano-banana-pro": "google/nano-banana-pro",
+    # SeeDream (ByteDance)
+    "seedream-3": "bytedance/seedream-v3",
+    "seedream-3.1": "bytedance/seedream-v3.1",
+    "seedream-4": "bytedance/seedream-v4",
+    "seedream-4.5": "bytedance/seedream-v4.5",
+    # WAN (Alibaba)
+    "wan-2.2": "alibaba/wan-2.2",
+    "wan-2.6": "alibaba/wan-2.6",
+    # FLUX (Black Forest Labs)
+    "flux-dev": "black-forest-labs/flux-dev",
+    "flux-schnell": "black-forest-labs/flux-schnell",
+    "flux-pro": "black-forest-labs/flux-pro",
+    # Dreamina (ByteDance)
+    "dreamina-3": "bytedance/dreamina-v3",
+    "dreamina-3.1": "bytedance/dreamina-v3.1",
+    # Qwen (WaveSpeed optimized)
+    "qwen-image": "wavespeedai/qwen-image",
     # Default
+    "default": "bytedance/seedream-v4.5",
+}
+# Image-to-Video models
+VIDEO_MODEL_MAP = {
+    # Kling (Kuaishou)
+    "kling-o1": "kuaishou/kling-o1",
+    "kling-o3": "kuaishou/kling-o3",
+    "kling-o3-pro": "kuaishou/kling-o3-pro",
+    # Veo (Google)
+    "veo-3": "google/veo-3",
+    "veo-3.1": "google/veo-3.1",
+    # WAN I2V (Alibaba)
+    "wan-2.2-i2v": "alibaba/wan-2.2-i2v-480p",
+    "wan-2.6-i2v": "alibaba/wan-2.6-i2v-720p",
+    # Seedance (ByteDance)
+    "seedance-1.5": "bytedance/seedance-1.5",
+    "seedance-1.5-pro": "bytedance/seedance-1.5-pro",
+    # Sora (OpenAI)
+    "sora-2": "openai/sora-2",
+    # Default
+    "default": "alibaba/wan-2.6-i2v-720p",
 }
 # Map friendly names to WaveSpeed edit model API paths