daVinci-MagiHuman

Runtime error

jiadisu Claude Opus 4.6 commited on Mar 22

Commit

8de8ab2

1 Parent(s): e6066e8

Switch to lightweight frontend-only Space

HF Space is now a pure Gradio frontend (no GPU, no model).
Requests are forwarded to a remote router via HTTP.

- app.py: Gradio UI, calls api_client
- api_client.py: sends requests to ROUTER_URL
- requirements.txt: minimal (requests, Pillow)
- README.md: sdk: gradio

Set ROUTER_URL as a Space secret pointing to your public router.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (4) hide show

README.md +2 -1
api_client.py +90 -0
app.py +48 -180
requirements.txt +2 -28

README.md CHANGED Viewed

@@ -3,7 +3,8 @@ title: daVinci-MagiHuman
 emoji: 🎬
 colorFrom: blue
 colorTo: purple
-sdk: docker
 app_port: 7860
 ---

 emoji: 🎬
 colorFrom: blue
 colorTo: purple
+sdk: gradio
+sdk_version: 5.23.0
 app_port: 7860
 ---

api_client.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""API client for daVinci-MagiHuman WebUI.
+Sends generation requests to the remote Router, which load-balances
+across multiple backend inference servers.
+Configure via environment variables:
+    ROUTER_URL       Router endpoint (e.g. http://your-server:7860)
+    ROUTER_TIMEOUT   Request timeout in seconds (default 660)
+"""
+import base64
+import io
+import os
+import random
+import uuid
+import requests
+from PIL import Image
+ROUTER_URL = os.environ.get("ROUTER_URL", "http://localhost:7860").rstrip("/")
+ROUTER_TIMEOUT = int(os.environ.get("ROUTER_TIMEOUT", "660"))
+def _pil_to_base64(image: Image.Image) -> str:
+    """Encode a PIL Image to a base64 string (PNG format)."""
+    buf = io.BytesIO()
+    image.save(buf, format="PNG")
+    return base64.b64encode(buf.getvalue()).decode("utf-8")
+def generate(
+    image: Image.Image,
+    video_prompt: str,
+    seed: int = -1,
+    output_dir: str = "./outputs",
+    seconds: int = 5,
+) -> dict:
+    """
+    Send a generation request to the router and download the video.
+    Returns:
+        dict with keys: video_path (local), seed, error
+    """
+    if seed == -1:
+        seed = random.randint(0, 2**31 - 1)
+    os.makedirs(output_dir, exist_ok=True)
+    image_base64 = _pil_to_base64(image)
+    payload = {
+        "task": "ti2av",
+        "prompt": video_prompt,
+        "image_base64": image_base64,
+        "seed": seed,
+        "output_dir": "/tmp/magihuman_outputs",
+        "seconds": seconds,
+        "sr_resolution": "540p",
+    }
+    result = {
+        "video_path": "",
+        "seed": seed,
+        "error": None,
+    }
+    try:
+        resp = requests.post(
+            f"{ROUTER_URL}/generate/file",
+            json=payload,
+            timeout=ROUTER_TIMEOUT,
+        )
+        resp.raise_for_status()
+        # Save mp4 bytes to local file
+        local_path = os.path.join(output_dir, f"magihuman_{uuid.uuid4().hex[:8]}.mp4")
+        with open(local_path, "wb") as f:
+            f.write(resp.content)
+        result["video_path"] = local_path
+    except requests.HTTPError as e:
+        detail = ""
+        try:
+            detail = e.response.json().get("detail", "")
+        except Exception:
+            detail = e.response.text[:200] if e.response else ""
+        result["error"] = f"HTTP {e.response.status_code}: {detail}" if e.response else str(e)
+    except Exception as e:
+        result["error"] = str(e)
+    return result

app.py CHANGED Viewed

@@ -1,175 +1,60 @@
-#!/usr/bin/env python3
-"""
-Gradio frontend for daVinci-MagiHuman distilled model.
-Designed for Hugging Face Spaces (Docker SDK, A100-80GB GPU).
-Accepts an image + text prompt + duration, generates audio-video output.
 """
-import json
 import os
-import sys
-import tempfile
-import uuid
-# ---------------------------------------------------------------------------
-# 1. Download all model weights from HF Hub (runs on CPU, cached)
-# ---------------------------------------------------------------------------
-MODEL_ROOT = os.environ.get("MODEL_ROOT", "/data/models")
-os.makedirs(MODEL_ROOT, exist_ok=True)
-HF_REPOS = {
-    "GAIR-NLP/daVinci-MagiHuman": {
-        "subdir": ".",
-        "allow_patterns": ["distill/**", "turbo_vae/**"],
-    },
-    "stabilityai/stable-audio-open-1.0": {
-        "subdir": "audio",
-    },
-    "google/t5gemma-9b-9b-ul2": {
-        "subdir": "t5/t5gemma-9b-9b-ul2",
-    },
-    "Wan-AI/Wan2.2-TI2V-5B": {
-        "subdir": "wan_vae/Wan2.2-TI2V-5B",
-    },
-}
-def download_models():
-    """Download all required model weights from HF Hub."""
-    from huggingface_hub import snapshot_download
-    hf_token = os.environ.get("HF_TOKEN")
-    for repo_id, spec in HF_REPOS.items():
-        local_dir = os.path.join(MODEL_ROOT, spec["subdir"])
-        if os.path.isdir(local_dir) and os.listdir(local_dir):
-            print(f"[download] {repo_id} → {local_dir}  (cached, skipping)")
-            continue
-        print(f"[download] {repo_id} → {local_dir}  (downloading …)")
-        os.makedirs(local_dir, exist_ok=True)
-        kwargs = {
-            "repo_id": repo_id,
-            "local_dir": local_dir,
-            "token": hf_token,
-        }
-        if "allow_patterns" in spec:
-            kwargs["allow_patterns"] = spec["allow_patterns"]
-        snapshot_download(**kwargs)
-        print(f"[download] {repo_id} done.")
-    print("[download] All models ready.")
-print("[app] Checking / downloading model weights …")
-download_models()
-# ---------------------------------------------------------------------------
-# 2. Environment bootstrap
-# ---------------------------------------------------------------------------
-os.environ.setdefault("MASTER_ADDR", "localhost")
-os.environ.setdefault("MASTER_PORT", "29500")
-os.environ.setdefault("RANK", "0")
-os.environ.setdefault("WORLD_SIZE", "1")
-os.environ.setdefault("LOCAL_RANK", "0")
-os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
-PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
-if PROJECT_ROOT not in sys.path:
-    sys.path.insert(0, PROJECT_ROOT)
-CONFIG_OVERRIDES = {
-    "engine_config": {
-        "load": os.path.join(MODEL_ROOT, "distill"),
-        "distill": True,
-        "cp_size": 1,
-    },
-    "evaluation_config": {
-        "cfg_number": 1,
-        "num_inference_steps": 8,
-        "audio_model_path": os.path.join(MODEL_ROOT, "audio"),
-        "txt_model_path": os.path.join(MODEL_ROOT, "t5/t5gemma-9b-9b-ul2"),
-        "vae_model_path": os.path.join(MODEL_ROOT, "wan_vae/Wan2.2-TI2V-5B"),
-        "use_turbo_vae": True,
-        "student_config_path": os.path.join(MODEL_ROOT, "turbo_vae/TurboV3-Wan22-TinyShallow_7_7.json"),
-        "student_ckpt_path": os.path.join(MODEL_ROOT, "turbo_vae/checkpoint-340000.ckpt"),
-    },
-}
-_tmp_config = os.path.join(tempfile.gettempdir(), "magihuman_config.json")
-with open(_tmp_config, "w") as f:
-    json.dump(CONFIG_OVERRIDES, f)
-sys.argv = [sys.argv[0], "--config-load-path", _tmp_config]
-# ---------------------------------------------------------------------------
-# 3. Initialize infrastructure & build pipeline (on CPU at startup)
-# ---------------------------------------------------------------------------
 import gradio as gr
-import torch
-from inference.infra import initialize_infra
-from inference.common import parse_config
-from inference.model.dit import get_dit
-from inference.pipeline.pipeline import MagiPipeline
-print("[app] Initializing infrastructure …")
-initialize_infra()
-print("[app] Loading model …")
-config = parse_config()
-model = get_dit(config.arch_config, config.engine_config)
-pipeline = MagiPipeline(model, config.evaluation_config)
-print("[app] Pipeline ready.")
-# ---------------------------------------------------------------------------
-# 4. Inference wrapper — @spaces.GPU requests a ZeroGPU allocation
-#    duration= sets the max GPU time in seconds (default 60, max 300)
-# ---------------------------------------------------------------------------
-def generate_video(
-    image,
-    prompt: str,
-    seconds: int,
-    seed: int,
-):
-    """Called by Gradio – returns path to the output .mp4 file."""
     if image is None:
         raise gr.Error("Please upload a reference image.")
     if not prompt or not prompt.strip():
         raise gr.Error("Please enter a text prompt.")
-    image_path = image
-    output_dir = tempfile.mkdtemp(prefix="magihuman_")
-    save_prefix = os.path.join(output_dir, f"output_{uuid.uuid4().hex[:8]}")
-    result_path = pipeline.run_offline(
-        prompt=prompt,
-        image=image_path,
-        audio=None,
-        save_path_prefix=save_prefix,
         seed=int(seed),
         seconds=int(seconds),
-        br_width=448,
-        br_height=256,
     )
-    return result_path
-# ---------------------------------------------------------------------------
-# 5. Gradio UI
-# ---------------------------------------------------------------------------
-TITLE = "daVinci-MagiHuman – Audio-Video Generation"
 DESCRIPTION = (
     "Upload a reference image, enter a descriptive prompt, choose the video "
     "duration (4–10 s), and click **Generate**. The model produces a video "
     "with synchronized audio.\n\n"
     "**Model**: 15B single-stream Transformer (distilled, 8-step inference) "
-    "| **Resolution**: 448×256 | **FPS**: 25"
 )
 with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
@@ -180,7 +65,7 @@ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=1):
             image_input = gr.Image(
                 label="Reference Image",
-                type="filepath",
                 height=300,
             )
             prompt_input = gr.Textbox(
@@ -189,46 +74,29 @@ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
                 lines=4,
             )
             with gr.Row():
                 seconds_slider = gr.Slider(
                     minimum=4,
                     maximum=10,
                     step=1,
-                    value=4,
                     label="Duration (seconds)",
                 )
-                seed_input = gr.Number(
-                    value=42,
-                    label="Seed",
-                    precision=0,
-                )
             generate_btn = gr.Button("Generate", variant="primary")
         with gr.Column(scale=1):
             video_output = gr.Video(label="Generated Video")
     generate_btn.click(
-        fn=generate_video,
-        inputs=[image_input, prompt_input, seconds_slider, seed_input],
-        outputs=[video_output],
     )
-    example_prompt_path = os.path.join(PROJECT_ROOT, "example/assets/prompt.txt")
-    example_prompt = "A person talking in a living room."
-    if os.path.exists(example_prompt_path):
-        with open(example_prompt_path) as f:
-            example_prompt = f.read().strip()
-    example_image_path = os.path.join(PROJECT_ROOT, "example/assets/image.png")
-    if os.path.exists(example_image_path):
-        gr.Examples(
-            examples=[
-                [example_image_path, example_prompt, 10, 42],
-            ],
-            inputs=[image_input, prompt_input, seconds_slider, seed_input],
-            outputs=[video_output],
-            cache_examples=False,
-        )
 if __name__ == "__main__":
-    demo.queue(max_size=2).launch(server_name="0.0.0.0", server_port=7860)

+"""daVinci-MagiHuman WebUI — Gradio frontend for HF Spaces.
+A lightweight frontend that sends generation requests to a remote router,
+which load-balances across multiple backend inference servers.
+Architecture:
+    HF Space (this app) ──HTTP──▶ Router (public IP) ──▶ 4x inference servers
+Configure via HF Space secrets:
+    ROUTER_URL      e.g. http://your-server:7860
+    ROUTER_TIMEOUT  request timeout in seconds (default 660)
 """
 import os
 import gradio as gr
+from api_client import generate
+OUTPUT_DIR = "/tmp/magihuman_webui_outputs"
+def run_generation(image, prompt, seed, seconds):
+    """Validate inputs and send a generation request to the router."""
     if image is None:
         raise gr.Error("Please upload a reference image.")
     if not prompt or not prompt.strip():
         raise gr.Error("Please enter a text prompt.")
+    result = generate(
+        image=image,
+        video_prompt=prompt.strip(),
         seed=int(seed),
+        output_dir=OUTPUT_DIR,
         seconds=int(seconds),
     )
+    if result["error"]:
+        raise gr.Error(result["error"])
+    video_path = result["video_path"]
+    if not video_path or not os.path.isfile(video_path):
+        raise gr.Error("Video file not found.")
+    status = f"Done. seed={result['seed']}"
+    return video_path, status
+# ── Gradio UI ────────────────────────────────────────────────────────
+TITLE = "daVinci-MagiHuman — Audio-Video Generation"
 DESCRIPTION = (
     "Upload a reference image, enter a descriptive prompt, choose the video "
     "duration (4–10 s), and click **Generate**. The model produces a video "
     "with synchronized audio.\n\n"
     "**Model**: 15B single-stream Transformer (distilled, 8-step inference) "
+    "| **Resolution**: 448×256 → 540p | **FPS**: 25"
 )
 with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
         with gr.Column(scale=1):
             image_input = gr.Image(
                 label="Reference Image",
+                type="pil",
                 height=300,
             )
             prompt_input = gr.Textbox(
                 lines=4,
             )
             with gr.Row():
+                seed_input = gr.Number(
+                    label="Seed (-1 = random)",
+                    value=-1,
+                    precision=0,
+                )
                 seconds_slider = gr.Slider(
                     minimum=4,
                     maximum=10,
                     step=1,
+                    value=5,
                     label="Duration (seconds)",
                 )
             generate_btn = gr.Button("Generate", variant="primary")
         with gr.Column(scale=1):
             video_output = gr.Video(label="Generated Video")
+            status_box = gr.Textbox(label="Status", interactive=False, lines=2)
     generate_btn.click(
+        fn=run_generation,
+        inputs=[image_input, prompt_input, seed_input, seconds_slider],
+        outputs=[video_output, status_box],
     )
 if __name__ == "__main__":
+    demo.queue(max_size=8).launch(server_name="0.0.0.0", server_port=7860)

requirements.txt CHANGED Viewed

@@ -1,28 +1,2 @@
-accelerate==1.10.1
-av==15.1.0
-beautifulsoup4
-boto3
-debugpy
-depyf
-diffusers
-ffmpeg-python==0.2.0
-ftfy
-graphviz
-imageio[ffmpeg]
-loguru==0.7.3
-mosaicml_streaming==0.8.0
-packaging>=24.2
-pandas
-psycopg2-binary
-pydantic
-pydantic-settings
-redis
-redislite
-rich
-sentencepiece
-setuptools>=78.1.1
-timm==1.0.20
-torchao
-transformers==4.56.0
-unfoldNd
-versioningit


1	+ requests
2	+ Pillow