jiadisu Claude Opus 4.6 commited on
Commit
8de8ab2
Β·
1 Parent(s): e6066e8

Switch to lightweight frontend-only Space

Browse files

HF Space is now a pure Gradio frontend (no GPU, no model).
Requests are forwarded to a remote router via HTTP.

- app.py: Gradio UI, calls api_client
- api_client.py: sends requests to ROUTER_URL
- requirements.txt: minimal (requests, Pillow)
- README.md: sdk: gradio

Set ROUTER_URL as a Space secret pointing to your public router.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (4) hide show
  1. README.md +2 -1
  2. api_client.py +90 -0
  3. app.py +48 -180
  4. requirements.txt +2 -28
README.md CHANGED
@@ -3,7 +3,8 @@ title: daVinci-MagiHuman
3
  emoji: 🎬
4
  colorFrom: blue
5
  colorTo: purple
6
- sdk: docker
 
7
  app_port: 7860
8
  ---
9
 
 
3
  emoji: 🎬
4
  colorFrom: blue
5
  colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 5.23.0
8
  app_port: 7860
9
  ---
10
 
api_client.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """API client for daVinci-MagiHuman WebUI.
2
+
3
+ Sends generation requests to the remote Router, which load-balances
4
+ across multiple backend inference servers.
5
+
6
+ Configure via environment variables:
7
+ ROUTER_URL Router endpoint (e.g. http://your-server:7860)
8
+ ROUTER_TIMEOUT Request timeout in seconds (default 660)
9
+ """
10
+
11
+ import base64
12
+ import io
13
+ import os
14
+ import random
15
+ import uuid
16
+
17
+ import requests
18
+ from PIL import Image
19
+
20
+ ROUTER_URL = os.environ.get("ROUTER_URL", "http://localhost:7860").rstrip("/")
21
+ ROUTER_TIMEOUT = int(os.environ.get("ROUTER_TIMEOUT", "660"))
22
+
23
+
24
+ def _pil_to_base64(image: Image.Image) -> str:
25
+ """Encode a PIL Image to a base64 string (PNG format)."""
26
+ buf = io.BytesIO()
27
+ image.save(buf, format="PNG")
28
+ return base64.b64encode(buf.getvalue()).decode("utf-8")
29
+
30
+
31
+ def generate(
32
+ image: Image.Image,
33
+ video_prompt: str,
34
+ seed: int = -1,
35
+ output_dir: str = "./outputs",
36
+ seconds: int = 5,
37
+ ) -> dict:
38
+ """
39
+ Send a generation request to the router and download the video.
40
+
41
+ Returns:
42
+ dict with keys: video_path (local), seed, error
43
+ """
44
+ if seed == -1:
45
+ seed = random.randint(0, 2**31 - 1)
46
+
47
+ os.makedirs(output_dir, exist_ok=True)
48
+ image_base64 = _pil_to_base64(image)
49
+
50
+ payload = {
51
+ "task": "ti2av",
52
+ "prompt": video_prompt,
53
+ "image_base64": image_base64,
54
+ "seed": seed,
55
+ "output_dir": "/tmp/magihuman_outputs",
56
+ "seconds": seconds,
57
+ "sr_resolution": "540p",
58
+ }
59
+
60
+ result = {
61
+ "video_path": "",
62
+ "seed": seed,
63
+ "error": None,
64
+ }
65
+
66
+ try:
67
+ resp = requests.post(
68
+ f"{ROUTER_URL}/generate/file",
69
+ json=payload,
70
+ timeout=ROUTER_TIMEOUT,
71
+ )
72
+ resp.raise_for_status()
73
+
74
+ # Save mp4 bytes to local file
75
+ local_path = os.path.join(output_dir, f"magihuman_{uuid.uuid4().hex[:8]}.mp4")
76
+ with open(local_path, "wb") as f:
77
+ f.write(resp.content)
78
+ result["video_path"] = local_path
79
+
80
+ except requests.HTTPError as e:
81
+ detail = ""
82
+ try:
83
+ detail = e.response.json().get("detail", "")
84
+ except Exception:
85
+ detail = e.response.text[:200] if e.response else ""
86
+ result["error"] = f"HTTP {e.response.status_code}: {detail}" if e.response else str(e)
87
+ except Exception as e:
88
+ result["error"] = str(e)
89
+
90
+ return result
app.py CHANGED
@@ -1,175 +1,60 @@
1
- #!/usr/bin/env python3
2
- """
3
- Gradio frontend for daVinci-MagiHuman distilled model.
 
 
 
 
4
 
5
- Designed for Hugging Face Spaces (Docker SDK, A100-80GB GPU).
6
- Accepts an image + text prompt + duration, generates audio-video output.
 
7
  """
8
 
9
- import json
10
  import os
11
- import sys
12
- import tempfile
13
- import uuid
14
-
15
- # ---------------------------------------------------------------------------
16
- # 1. Download all model weights from HF Hub (runs on CPU, cached)
17
- # ---------------------------------------------------------------------------
18
- MODEL_ROOT = os.environ.get("MODEL_ROOT", "/data/models")
19
- os.makedirs(MODEL_ROOT, exist_ok=True)
20
-
21
- HF_REPOS = {
22
- "GAIR-NLP/daVinci-MagiHuman": {
23
- "subdir": ".",
24
- "allow_patterns": ["distill/**", "turbo_vae/**"],
25
- },
26
- "stabilityai/stable-audio-open-1.0": {
27
- "subdir": "audio",
28
- },
29
- "google/t5gemma-9b-9b-ul2": {
30
- "subdir": "t5/t5gemma-9b-9b-ul2",
31
- },
32
- "Wan-AI/Wan2.2-TI2V-5B": {
33
- "subdir": "wan_vae/Wan2.2-TI2V-5B",
34
- },
35
- }
36
-
37
-
38
- def download_models():
39
- """Download all required model weights from HF Hub."""
40
- from huggingface_hub import snapshot_download
41
-
42
- hf_token = os.environ.get("HF_TOKEN")
43
-
44
- for repo_id, spec in HF_REPOS.items():
45
- local_dir = os.path.join(MODEL_ROOT, spec["subdir"])
46
- if os.path.isdir(local_dir) and os.listdir(local_dir):
47
- print(f"[download] {repo_id} β†’ {local_dir} (cached, skipping)")
48
- continue
49
-
50
- print(f"[download] {repo_id} β†’ {local_dir} (downloading …)")
51
- os.makedirs(local_dir, exist_ok=True)
52
-
53
- kwargs = {
54
- "repo_id": repo_id,
55
- "local_dir": local_dir,
56
- "token": hf_token,
57
- }
58
- if "allow_patterns" in spec:
59
- kwargs["allow_patterns"] = spec["allow_patterns"]
60
-
61
- snapshot_download(**kwargs)
62
- print(f"[download] {repo_id} done.")
63
-
64
- print("[download] All models ready.")
65
-
66
-
67
- print("[app] Checking / downloading model weights …")
68
- download_models()
69
-
70
- # ---------------------------------------------------------------------------
71
- # 2. Environment bootstrap
72
- # ---------------------------------------------------------------------------
73
- os.environ.setdefault("MASTER_ADDR", "localhost")
74
- os.environ.setdefault("MASTER_PORT", "29500")
75
- os.environ.setdefault("RANK", "0")
76
- os.environ.setdefault("WORLD_SIZE", "1")
77
- os.environ.setdefault("LOCAL_RANK", "0")
78
- os.environ.setdefault("PYTORCH_CUDA_ALLOC_CONF", "expandable_segments:True")
79
-
80
- PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__))
81
- if PROJECT_ROOT not in sys.path:
82
- sys.path.insert(0, PROJECT_ROOT)
83
-
84
- CONFIG_OVERRIDES = {
85
- "engine_config": {
86
- "load": os.path.join(MODEL_ROOT, "distill"),
87
- "distill": True,
88
- "cp_size": 1,
89
- },
90
- "evaluation_config": {
91
- "cfg_number": 1,
92
- "num_inference_steps": 8,
93
- "audio_model_path": os.path.join(MODEL_ROOT, "audio"),
94
- "txt_model_path": os.path.join(MODEL_ROOT, "t5/t5gemma-9b-9b-ul2"),
95
- "vae_model_path": os.path.join(MODEL_ROOT, "wan_vae/Wan2.2-TI2V-5B"),
96
- "use_turbo_vae": True,
97
- "student_config_path": os.path.join(MODEL_ROOT, "turbo_vae/TurboV3-Wan22-TinyShallow_7_7.json"),
98
- "student_ckpt_path": os.path.join(MODEL_ROOT, "turbo_vae/checkpoint-340000.ckpt"),
99
- },
100
- }
101
-
102
- _tmp_config = os.path.join(tempfile.gettempdir(), "magihuman_config.json")
103
- with open(_tmp_config, "w") as f:
104
- json.dump(CONFIG_OVERRIDES, f)
105
-
106
- sys.argv = [sys.argv[0], "--config-load-path", _tmp_config]
107
-
108
- # ---------------------------------------------------------------------------
109
- # 3. Initialize infrastructure & build pipeline (on CPU at startup)
110
- # ---------------------------------------------------------------------------
111
  import gradio as gr
112
- import torch
113
-
114
- from inference.infra import initialize_infra
115
- from inference.common import parse_config
116
- from inference.model.dit import get_dit
117
- from inference.pipeline.pipeline import MagiPipeline
118
-
119
- print("[app] Initializing infrastructure …")
120
- initialize_infra()
121
-
122
- print("[app] Loading model …")
123
- config = parse_config()
124
- model = get_dit(config.arch_config, config.engine_config)
125
- pipeline = MagiPipeline(model, config.evaluation_config)
126
- print("[app] Pipeline ready.")
127
-
128
-
129
- # ---------------------------------------------------------------------------
130
- # 4. Inference wrapper β€” @spaces.GPU requests a ZeroGPU allocation
131
- # duration= sets the max GPU time in seconds (default 60, max 300)
132
- # ---------------------------------------------------------------------------
133
- def generate_video(
134
- image,
135
- prompt: str,
136
- seconds: int,
137
- seed: int,
138
- ):
139
- """Called by Gradio – returns path to the output .mp4 file."""
140
  if image is None:
141
  raise gr.Error("Please upload a reference image.")
142
  if not prompt or not prompt.strip():
143
  raise gr.Error("Please enter a text prompt.")
144
 
145
- image_path = image
146
- output_dir = tempfile.mkdtemp(prefix="magihuman_")
147
- save_prefix = os.path.join(output_dir, f"output_{uuid.uuid4().hex[:8]}")
148
-
149
- result_path = pipeline.run_offline(
150
- prompt=prompt,
151
- image=image_path,
152
- audio=None,
153
- save_path_prefix=save_prefix,
154
  seed=int(seed),
 
155
  seconds=int(seconds),
156
- br_width=448,
157
- br_height=256,
158
  )
159
 
160
- return result_path
 
 
 
 
 
 
 
 
161
 
162
 
163
- # ---------------------------------------------------------------------------
164
- # 5. Gradio UI
165
- # ---------------------------------------------------------------------------
166
- TITLE = "daVinci-MagiHuman – Audio-Video Generation"
167
  DESCRIPTION = (
168
  "Upload a reference image, enter a descriptive prompt, choose the video "
169
  "duration (4–10 s), and click **Generate**. The model produces a video "
170
  "with synchronized audio.\n\n"
171
  "**Model**: 15B single-stream Transformer (distilled, 8-step inference) "
172
- "| **Resolution**: 448Γ—256 | **FPS**: 25"
173
  )
174
 
175
  with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
@@ -180,7 +65,7 @@ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
180
  with gr.Column(scale=1):
181
  image_input = gr.Image(
182
  label="Reference Image",
183
- type="filepath",
184
  height=300,
185
  )
186
  prompt_input = gr.Textbox(
@@ -189,46 +74,29 @@ with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
189
  lines=4,
190
  )
191
  with gr.Row():
 
 
 
 
 
192
  seconds_slider = gr.Slider(
193
  minimum=4,
194
  maximum=10,
195
  step=1,
196
- value=4,
197
  label="Duration (seconds)",
198
  )
199
- seed_input = gr.Number(
200
- value=42,
201
- label="Seed",
202
- precision=0,
203
- )
204
  generate_btn = gr.Button("Generate", variant="primary")
205
 
206
  with gr.Column(scale=1):
207
  video_output = gr.Video(label="Generated Video")
 
208
 
209
  generate_btn.click(
210
- fn=generate_video,
211
- inputs=[image_input, prompt_input, seconds_slider, seed_input],
212
- outputs=[video_output],
213
  )
214
 
215
- example_prompt_path = os.path.join(PROJECT_ROOT, "example/assets/prompt.txt")
216
- example_prompt = "A person talking in a living room."
217
- if os.path.exists(example_prompt_path):
218
- with open(example_prompt_path) as f:
219
- example_prompt = f.read().strip()
220
-
221
- example_image_path = os.path.join(PROJECT_ROOT, "example/assets/image.png")
222
- if os.path.exists(example_image_path):
223
- gr.Examples(
224
- examples=[
225
- [example_image_path, example_prompt, 10, 42],
226
- ],
227
- inputs=[image_input, prompt_input, seconds_slider, seed_input],
228
- outputs=[video_output],
229
- cache_examples=False,
230
- )
231
-
232
-
233
  if __name__ == "__main__":
234
- demo.queue(max_size=2).launch(server_name="0.0.0.0", server_port=7860)
 
1
+ """daVinci-MagiHuman WebUI β€” Gradio frontend for HF Spaces.
2
+
3
+ A lightweight frontend that sends generation requests to a remote router,
4
+ which load-balances across multiple backend inference servers.
5
+
6
+ Architecture:
7
+ HF Space (this app) ──HTTP──▢ Router (public IP) ──▢ 4x inference servers
8
 
9
+ Configure via HF Space secrets:
10
+ ROUTER_URL e.g. http://your-server:7860
11
+ ROUTER_TIMEOUT request timeout in seconds (default 660)
12
  """
13
 
 
14
  import os
15
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  import gradio as gr
17
+
18
+ from api_client import generate
19
+
20
+ OUTPUT_DIR = "/tmp/magihuman_webui_outputs"
21
+
22
+
23
+ def run_generation(image, prompt, seed, seconds):
24
+ """Validate inputs and send a generation request to the router."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  if image is None:
26
  raise gr.Error("Please upload a reference image.")
27
  if not prompt or not prompt.strip():
28
  raise gr.Error("Please enter a text prompt.")
29
 
30
+ result = generate(
31
+ image=image,
32
+ video_prompt=prompt.strip(),
 
 
 
 
 
 
33
  seed=int(seed),
34
+ output_dir=OUTPUT_DIR,
35
  seconds=int(seconds),
 
 
36
  )
37
 
38
+ if result["error"]:
39
+ raise gr.Error(result["error"])
40
+
41
+ video_path = result["video_path"]
42
+ if not video_path or not os.path.isfile(video_path):
43
+ raise gr.Error("Video file not found.")
44
+
45
+ status = f"Done. seed={result['seed']}"
46
+ return video_path, status
47
 
48
 
49
+ # ── Gradio UI ────────────────────────────────────────────────────────
50
+
51
+ TITLE = "daVinci-MagiHuman β€” Audio-Video Generation"
 
52
  DESCRIPTION = (
53
  "Upload a reference image, enter a descriptive prompt, choose the video "
54
  "duration (4–10 s), and click **Generate**. The model produces a video "
55
  "with synchronized audio.\n\n"
56
  "**Model**: 15B single-stream Transformer (distilled, 8-step inference) "
57
+ "| **Resolution**: 448Γ—256 β†’ 540p | **FPS**: 25"
58
  )
59
 
60
  with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
 
65
  with gr.Column(scale=1):
66
  image_input = gr.Image(
67
  label="Reference Image",
68
+ type="pil",
69
  height=300,
70
  )
71
  prompt_input = gr.Textbox(
 
74
  lines=4,
75
  )
76
  with gr.Row():
77
+ seed_input = gr.Number(
78
+ label="Seed (-1 = random)",
79
+ value=-1,
80
+ precision=0,
81
+ )
82
  seconds_slider = gr.Slider(
83
  minimum=4,
84
  maximum=10,
85
  step=1,
86
+ value=5,
87
  label="Duration (seconds)",
88
  )
 
 
 
 
 
89
  generate_btn = gr.Button("Generate", variant="primary")
90
 
91
  with gr.Column(scale=1):
92
  video_output = gr.Video(label="Generated Video")
93
+ status_box = gr.Textbox(label="Status", interactive=False, lines=2)
94
 
95
  generate_btn.click(
96
+ fn=run_generation,
97
+ inputs=[image_input, prompt_input, seed_input, seconds_slider],
98
+ outputs=[video_output, status_box],
99
  )
100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  if __name__ == "__main__":
102
+ demo.queue(max_size=8).launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,28 +1,2 @@
1
- accelerate==1.10.1
2
- av==15.1.0
3
- beautifulsoup4
4
- boto3
5
- debugpy
6
- depyf
7
- diffusers
8
- ffmpeg-python==0.2.0
9
- ftfy
10
- graphviz
11
- imageio[ffmpeg]
12
- loguru==0.7.3
13
- mosaicml_streaming==0.8.0
14
- packaging>=24.2
15
- pandas
16
- psycopg2-binary
17
- pydantic
18
- pydantic-settings
19
- redis
20
- redislite
21
- rich
22
- sentencepiece
23
- setuptools>=78.1.1
24
- timm==1.0.20
25
- torchao
26
- transformers==4.56.0
27
- unfoldNd
28
- versioningit
 
1
+ requests
2
+ Pillow