dippoo commited on
Commit
b02f80a
·
1 Parent(s): 4edd939

Add Kling Motion Control: character image + driving video via WaveSpeed

Browse files
src/content_engine/api/routes_video.py CHANGED
@@ -261,6 +261,147 @@ async def generate_video_higgsfield(
261
  }
262
 
263
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
264
  async def _generate_higgsfield_video(
265
  job_id: str,
266
  image_bytes: bytes,
 
261
  }
262
 
263
 
264
+ @router.post("/api/video/generate/kling-motion")
265
+ async def generate_kling_motion(
266
+ image: UploadFile = File(...),
267
+ driving_video: UploadFile = File(...),
268
+ prompt: str = Form("smooth motion, high quality video"),
269
+ duration: int = Form(5),
270
+ seed: int = Form(-1),
271
+ ):
272
+ """Generate video using Kling Motion Control (character image + driving video)."""
273
+ import random
274
+
275
+ if not _wavespeed_provider:
276
+ raise HTTPException(500, "WaveSpeed API not configured")
277
+
278
+ job_id = str(uuid.uuid4())[:8]
279
+ seed = seed if seed >= 0 else random.randint(0, 2**32 - 1)
280
+
281
+ image_bytes = await image.read()
282
+ video_bytes = await driving_video.read()
283
+
284
+ _video_jobs[job_id] = {
285
+ "status": "running",
286
+ "seed": seed,
287
+ "started_at": time.time(),
288
+ "model": "kling-motion",
289
+ "backend": "cloud",
290
+ "message": "Uploading files...",
291
+ }
292
+
293
+ asyncio.create_task(_generate_kling_motion_video(job_id, image_bytes, video_bytes, prompt, duration))
294
+
295
+ return {"job_id": job_id, "status": "running", "estimated_time": "~60-120 seconds"}
296
+
297
+
298
+ async def _generate_kling_motion_video(
299
+ job_id: str,
300
+ image_bytes: bytes,
301
+ video_bytes: bytes,
302
+ prompt: str,
303
+ duration: int,
304
+ ):
305
+ """Background task: upload image + driving video, call Kling Motion Control API."""
306
+ import httpx
307
+ import aiohttp
308
+
309
+ try:
310
+ _video_jobs[job_id]["message"] = "Uploading character image..."
311
+ image_url = await _wavespeed_provider._upload_temp_image(image_bytes)
312
+ logger.info("Kling motion: character image uploaded: %s", image_url[:80])
313
+
314
+ _video_jobs[job_id]["message"] = "Uploading driving video..."
315
+ video_url = await _upload_temp_video(video_bytes)
316
+ logger.info("Kling motion: driving video uploaded: %s", video_url[:80])
317
+
318
+ api_key = _wavespeed_provider._api_key
319
+ endpoint = "https://api.wavespeed.ai/api/v3/kwaivgi/kling-v2.6-pro/motion-control"
320
+
321
+ payload = {
322
+ "image": image_url,
323
+ "video": video_url,
324
+ "prompt": prompt,
325
+ "duration": duration,
326
+ "enable_sync_mode": False,
327
+ }
328
+
329
+ _video_jobs[job_id]["message"] = "Calling Kling Motion Control API..."
330
+ logger.info("Calling Kling Motion Control: %s", endpoint)
331
+
332
+ async with httpx.AsyncClient(timeout=300) as client:
333
+ resp = await client.post(
334
+ endpoint,
335
+ json=payload,
336
+ headers={"Authorization": f"Bearer {api_key}", "Content-Type": "application/json"},
337
+ )
338
+
339
+ if resp.status_code != 200:
340
+ error_text = resp.text[:500]
341
+ logger.error("Kling Motion API error: %s", error_text)
342
+ _video_jobs[job_id]["status"] = "failed"
343
+ _video_jobs[job_id]["error"] = f"API error: {error_text[:200]}"
344
+ return
345
+
346
+ result = resp.json()
347
+ data = result.get("data", result)
348
+ logger.info("Kling Motion API response: %s", str(result)[:300])
349
+
350
+ # Poll if async
351
+ outputs = data.get("outputs", [])
352
+ urls_data = data.get("urls", {})
353
+ if not outputs and urls_data.get("get"):
354
+ _video_jobs[job_id]["message"] = "Waiting for Kling Motion to complete..."
355
+ video_url_out = await _poll_wavespeed_video(urls_data["get"], api_key, job_id)
356
+ elif outputs:
357
+ video_url_out = outputs[0] if isinstance(outputs[0], str) else outputs[0].get("url")
358
+ else:
359
+ _video_jobs[job_id]["status"] = "failed"
360
+ _video_jobs[job_id]["error"] = "No output URL in response"
361
+ return
362
+
363
+ if not video_url_out:
364
+ return # poll already set status
365
+
366
+ # Download and save
367
+ _video_jobs[job_id]["message"] = "Downloading video..."
368
+ output_dir = Path(os.environ.get("OUTPUT_DIR", "outputs")) / "videos"
369
+ output_dir.mkdir(parents=True, exist_ok=True)
370
+ filename = f"kling_motion_{job_id}.mp4"
371
+ output_path = output_dir / filename
372
+
373
+ async with httpx.AsyncClient(timeout=120) as dl_client:
374
+ r = await dl_client.get(video_url_out)
375
+ output_path.write_bytes(r.content)
376
+
377
+ _video_jobs[job_id]["status"] = "done"
378
+ _video_jobs[job_id]["filename"] = filename
379
+ _video_jobs[job_id]["message"] = "Done"
380
+ logger.info("Kling Motion video saved: %s", filename)
381
+
382
+ except Exception as e:
383
+ logger.exception("Kling Motion generation failed: %s", e)
384
+ _video_jobs[job_id]["status"] = "failed"
385
+ _video_jobs[job_id]["error"] = str(e)
386
+
387
+
388
+ async def _upload_temp_video(video_bytes: bytes) -> str:
389
+ """Upload a video file to litterbox.catbox.moe and return the URL."""
390
+ import aiohttp
391
+
392
+ async with aiohttp.ClientSession() as session:
393
+ data = aiohttp.FormData()
394
+ data.add_field("reqtype", "fileupload")
395
+ data.add_field("time", "1h")
396
+ data.add_field("fileToUpload", video_bytes, filename="driving.mp4", content_type="video/mp4")
397
+ async with session.post("https://litterbox.catbox.moe/resources/internals/api.php", data=data) as resp:
398
+ if resp.status == 200:
399
+ url = (await resp.text()).strip()
400
+ if url.startswith("http"):
401
+ return url
402
+ raise RuntimeError("Failed to upload driving video to litterbox.catbox.moe")
403
+
404
+
405
  async def _generate_higgsfield_video(
406
  job_id: str,
407
  image_bytes: bytes,
src/content_engine/api/ui.html CHANGED
@@ -1011,6 +1011,7 @@ select { cursor: pointer; }
1011
  <div class="chips" id="video-submode-chips" style="margin-bottom:10px">
1012
  <div class="chip selected" onclick="selectVideoSubMode(this,'i2v')">Image to Video</div>
1013
  <div class="chip" onclick="selectVideoSubMode(this,'animate')">Animate (Dance)</div>
 
1014
  </div>
1015
 
1016
  <!-- Standard Image-to-Video -->
@@ -1121,6 +1122,33 @@ select { cursor: pointer; }
1121
  </div>
1122
  </div>
1123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1124
  </div>
1125
 
1126
  <!-- Reference image upload for img2img -->
@@ -1684,6 +1712,8 @@ let selectedVideoBackend = 'cloud';
1684
  let videoSubMode = 'i2v';
1685
  let animateCharFile = null;
1686
  let animateDrivingVideoFile = null;
 
 
1687
  let selectedMode = 'txt2img';
1688
  let templatesData = [];
1689
  let charactersData = [];
@@ -1939,6 +1969,7 @@ function selectVideoSubMode(chip, mode) {
1939
  videoSubMode = mode;
1940
  document.getElementById('i2v-sub-section').style.display = mode === 'i2v' ? '' : 'none';
1941
  document.getElementById('animate-sub-section').style.display = mode === 'animate' ? '' : 'none';
 
1942
  }
1943
 
1944
  function handleAnimateChar(input) {
@@ -1970,6 +2001,35 @@ function handleAnimateVideo(input) {
1970
  `;
1971
  }
1972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1973
  function handleVideoImage(input) {
1974
  if (input.files[0]) {
1975
  videoImageFile = input.files[0];
@@ -2469,6 +2529,24 @@ async function doGenerate() {
2469
  // img2video mode — video generation
2470
  if (selectedMode === 'img2video') {
2471
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2472
  // Animate (Dance) sub-mode — WAN 2.2 Animate on RunPod
2473
  if (videoSubMode === 'animate') {
2474
  if (!animateCharFile) throw new Error('Please upload a character image');
 
1011
  <div class="chips" id="video-submode-chips" style="margin-bottom:10px">
1012
  <div class="chip selected" onclick="selectVideoSubMode(this,'i2v')">Image to Video</div>
1013
  <div class="chip" onclick="selectVideoSubMode(this,'animate')">Animate (Dance)</div>
1014
+ <div class="chip" onclick="selectVideoSubMode(this,'kling-motion')">Kling Motion</div>
1015
  </div>
1016
 
1017
  <!-- Standard Image-to-Video -->
 
1122
  </div>
1123
  </div>
1124
 
1125
+ <!-- Kling Motion Control sub-section -->
1126
+ <div id="kling-motion-sub-section" style="display:none">
1127
+ <div class="section-title">Character Image</div>
1128
+ <div class="drop-zone" id="kling-motion-char-zone" onclick="document.getElementById('kling-motion-char-input').click()">
1129
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><path d="M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4"/><polyline points="17 8 12 3 7 8"/><line x1="12" y1="3" x2="12" y2="15"/></svg>
1130
+ <div>Character photo</div>
1131
+ </div>
1132
+ <input type="file" id="kling-motion-char-input" accept="image/*" style="display:none" onchange="handleKlingMotionChar(this)">
1133
+
1134
+ <div class="section-title" style="margin-top:10px">Driving Video</div>
1135
+ <div class="drop-zone" id="kling-motion-video-zone" onclick="document.getElementById('kling-motion-video-input').click()">
1136
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><rect x="2" y="2" width="20" height="20" rx="2"/><polygon points="10,8 16,12 10,16"/></svg>
1137
+ <div>Motion reference video (mp4)</div>
1138
+ </div>
1139
+ <input type="file" id="kling-motion-video-input" accept="video/*" style="display:none" onchange="handleKlingMotionVideo(this)">
1140
+
1141
+ <label style="margin-top:10px">Duration</label>
1142
+ <select id="kling-motion-duration">
1143
+ <option value="5" selected>5s (~$0.56)</option>
1144
+ <option value="10">10s (~$1.12)</option>
1145
+ </select>
1146
+
1147
+ <div style="font-size:11px;color:var(--text-secondary);margin-top:6px">
1148
+ Kling Motion Control via WaveSpeed. ~1 min generation. Requires WAVESPEED_API_KEY.
1149
+ </div>
1150
+ </div>
1151
+
1152
  </div>
1153
 
1154
  <!-- Reference image upload for img2img -->
 
1712
  let videoSubMode = 'i2v';
1713
  let animateCharFile = null;
1714
  let animateDrivingVideoFile = null;
1715
+ let klingMotionCharFile = null;
1716
+ let klingMotionVideoFile = null;
1717
  let selectedMode = 'txt2img';
1718
  let templatesData = [];
1719
  let charactersData = [];
 
1969
  videoSubMode = mode;
1970
  document.getElementById('i2v-sub-section').style.display = mode === 'i2v' ? '' : 'none';
1971
  document.getElementById('animate-sub-section').style.display = mode === 'animate' ? '' : 'none';
1972
+ document.getElementById('kling-motion-sub-section').style.display = mode === 'kling-motion' ? '' : 'none';
1973
  }
1974
 
1975
  function handleAnimateChar(input) {
 
2001
  `;
2002
  }
2003
 
2004
+ function handleKlingMotionChar(input) {
2005
+ if (!input.files[0]) return;
2006
+ klingMotionCharFile = input.files[0];
2007
+ const zone = document.getElementById('kling-motion-char-zone');
2008
+ zone.classList.add('has-file');
2009
+ const reader = new FileReader();
2010
+ reader.onload = e => {
2011
+ zone.innerHTML = `
2012
+ <img src="${e.target.result}" style="max-height:120px;border-radius:6px">
2013
+ <div style="margin-top:4px;font-size:11px">${input.files[0].name}</div>
2014
+ <button class="btn btn-secondary btn-small" onclick="event.stopPropagation();klingMotionCharFile=null;this.closest('.drop-zone').classList.remove('has-file');this.closest('.drop-zone').innerHTML='<svg viewBox=\\'0 0 24 24\\' fill=\\'none\\' stroke=\\'currentColor\\' stroke-width=\\'1.5\\'><path d=\\'M21 15v4a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2v-4\\'/><polyline points=\\'17 8 12 3 7 8\\'/><line x1=\\'12\\' y1=\\'3\\' x2=\\'12\\' y2=\\'15\\'/></svg><div>Character photo</div>'" style="margin-top:6px">Remove</button>
2015
+ `;
2016
+ };
2017
+ reader.readAsDataURL(input.files[0]);
2018
+ }
2019
+
2020
+ function handleKlingMotionVideo(input) {
2021
+ if (!input.files[0]) return;
2022
+ klingMotionVideoFile = input.files[0];
2023
+ const zone = document.getElementById('kling-motion-video-zone');
2024
+ zone.classList.add('has-file');
2025
+ zone.innerHTML = `
2026
+ <svg viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1.5"><rect x="2" y="2" width="20" height="20" rx="2"/><polygon points="10,8 16,12 10,16"/></svg>
2027
+ <div style="font-size:12px;margin-top:4px">${input.files[0].name}</div>
2028
+ <div style="font-size:11px;color:var(--text-secondary)">${(input.files[0].size/1024/1024).toFixed(1)} MB</div>
2029
+ <button class="btn btn-secondary btn-small" onclick="event.stopPropagation();klingMotionVideoFile=null;this.closest('.drop-zone').classList.remove('has-file');this.closest('.drop-zone').innerHTML='<svg viewBox=\\'0 0 24 24\\' fill=\\'none\\' stroke=\\'currentColor\\' stroke-width=\\'1.5\\'><rect x=\\'2\\' y=\\'2\\' width=\\'20\\' height=\\'20\\' rx=\\'2\\'/><polygon points=\\'10,8 16,12 10,16\\'/></svg><div>Motion reference video (mp4)</div>'" style="margin-top:6px">Remove</button>
2030
+ `;
2031
+ }
2032
+
2033
  function handleVideoImage(input) {
2034
  if (input.files[0]) {
2035
  videoImageFile = input.files[0];
 
2529
  // img2video mode — video generation
2530
  if (selectedMode === 'img2video') {
2531
 
2532
+ // Kling Motion Control
2533
+ if (videoSubMode === 'kling-motion') {
2534
+ if (!klingMotionCharFile) throw new Error('Please upload a character image');
2535
+ if (!klingMotionVideoFile) throw new Error('Please upload a driving video');
2536
+ const formData = new FormData();
2537
+ formData.append('image', klingMotionCharFile);
2538
+ formData.append('driving_video', klingMotionVideoFile);
2539
+ formData.append('prompt', document.getElementById('gen-positive').value || 'smooth motion, high quality video');
2540
+ formData.append('duration', document.getElementById('kling-motion-duration').value || '5');
2541
+ formData.append('seed', document.getElementById('gen-seed').value || '-1');
2542
+ const res = await fetch(API + '/api/video/generate/kling-motion', { method: 'POST', body: formData });
2543
+ const data = await res.json();
2544
+ if (!res.ok) throw new Error(data.detail || 'Kling Motion generation failed');
2545
+ toast('Kling Motion generating via WaveSpeed (~1 min)...', 'info');
2546
+ await pollForVideo(data.job_id);
2547
+ return;
2548
+ }
2549
+
2550
  // Animate (Dance) sub-mode — WAN 2.2 Animate on RunPod
2551
  if (videoSubMode === 'animate') {
2552
  if (!animateCharFile) throw new Error('Please upload a character image');
src/content_engine/services/cloud_providers/wavespeed_provider.py CHANGED
@@ -90,6 +90,7 @@ VIDEO_MODEL_MAP = {
90
  # Kling (Kuaishou)
91
  "kling-o3-pro": "kwaivgi/kling-video-o3-pro/image-to-video",
92
  "kling-o3": "kwaivgi/kling-video-o3-std/image-to-video",
 
93
  # Veo (Google)
94
  "veo-3.1": "google/veo-3.1",
95
  # Seedance (ByteDance)
 
90
  # Kling (Kuaishou)
91
  "kling-o3-pro": "kwaivgi/kling-video-o3-pro/image-to-video",
92
  "kling-o3": "kwaivgi/kling-video-o3-std/image-to-video",
93
+ "kling-motion": "kwaivgi/kling-v2.6-pro/motion-control",
94
  # Veo (Google)
95
  "veo-3.1": "google/veo-3.1",
96
  # Seedance (ByteDance)