jiadisu
update port
f4ca1dd
"""daVinci-MagiHuman WebUI β€” Gradio frontend for HF Spaces.
Pure frontend, no queuing, no load management.
All requests are sent immediately to the router.
If the router rejects (503), the error is shown directly to the user.
Architecture:
User prompt ──LLM rewrite──▢ refined prompt
HF Space (this app) ──HTTP──▢ Router (public IP) ──▢ 4x inference servers
"""
import os
import gradio as gr
from openai import OpenAI
from api_client import generate
OUTPUT_DIR = "/tmp/magihuman_webui_outputs"
# ── Prompt rewrite via LLM ───────────────────────────────────────────
_enhance_client = OpenAI(
base_url="https://apicz.boyuerichdata.com/v1/",
api_key=os.environ.get("REWRITE_API_KEY", ""),
)
_ENHANCE_MODEL = "gemini-3-flash-preview"
_PROMPT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompt.txt")
with open(_PROMPT_FILE, "r") as f:
_ENHANCE_SYSTEM_PROMPT = f.read()
print(f"[Enhance] Loaded system prompt from {os.path.basename(_PROMPT_FILE)}, length={len(_ENHANCE_SYSTEM_PROMPT)} chars")
print(f"[Enhance] System prompt preview: {_ENHANCE_SYSTEM_PROMPT[:200]}...")
def _pil_to_base64_url(image) -> str:
"""Convert a PIL Image to a base64 data URL for the vision API."""
import base64
import io
buf = io.BytesIO()
image.save(buf, format="PNG")
b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
return f"data:image/png;base64,{b64}"
class ContentBlockedError(Exception):
"""Raised when the LLM safety filter blocks the request."""
pass
def enhance_prompt(user_prompt: str, image=None) -> str:
"""Rewrite user prompt into the model's required format via LLM API.
Sends both the text prompt and the reference image to the LLM
so it can describe the character and scene accurately.
Raises ContentBlockedError if the safety filter blocks the content.
"""
print(f"[Enhance] Starting rewrite, input length={len(user_prompt)} chars, has_image={image is not None}")
print(f"[Enhance] User prompt: {user_prompt[:100]}...")
try:
# Build user message with text + image
user_content = []
if image is not None:
user_content.append({
"type": "image_url",
"image_url": {"url": _pil_to_base64_url(image)},
})
user_content.append({
"type": "text",
"text": user_prompt,
})
resp = _enhance_client.chat.completions.create(
model=_ENHANCE_MODEL,
messages=[
{"role": "system", "content": _ENHANCE_SYSTEM_PROMPT},
{"role": "user", "content": user_content},
],
temperature=0.3,
max_tokens=2048,
)
choice = resp.choices[0]
raw_content = choice.message.content
finish_reason = choice.finish_reason
print(f"[Enhance] API returned: finish_reason={finish_reason}, "
f"content_length={len(raw_content) if raw_content else 0}")
# Check for content filter block
if finish_reason == "content_filter" or not (raw_content or "").strip():
print(f"[Enhance] Content blocked by safety filter (finish_reason={finish_reason})")
raise ContentBlockedError(
"Your request was blocked by the content safety filter. "
"Please modify your prompt or image and try again."
)
rewritten = raw_content.strip()
print(f"[Enhance] Done, output length={len(rewritten)} chars")
print(f"[Enhance] Rewritten: {rewritten[:150]}...")
return rewritten
except ContentBlockedError:
raise
except Exception as e:
err_str = str(e).lower()
if "block" in err_str or "safety" in err_str or "content" in err_str:
print(f"[Enhance] Content blocked by API error: {e}")
raise ContentBlockedError(
"Your request was blocked by the content safety filter. "
"Please modify your prompt or image and try again."
)
print(f"[Enhance] FAILED: {e}, using original prompt")
return user_prompt
# ── Generation ───────────────────────────────────────────────────────
def step1_enhance(image, prompt, seed, seconds):
"""Step 1: Validate inputs and enhance prompt via LLM.
Returns the enhanced prompt. Runs as a regular function so only
enhanced_prompt_box shows the processing animation.
"""
if image is None:
raise gr.Error("Please upload a reference image.")
if not prompt or not prompt.strip():
raise gr.Error("Please enter a text prompt.")
print(f"[Generate] Request received: seed={seed} seconds={seconds} prompt={prompt[:50]!r}")
try:
enhanced = enhance_prompt(prompt.strip(), image=image)
except ContentBlockedError as e:
print(f"[Generate] Blocked by content filter, aborting generation")
raise gr.Error(str(e))
return enhanced
def step2_generate(image, enhanced, seed, seconds):
"""Step 2: Send generation request to router.
Regular (non-generator) function β€” Gradio's queue mode keeps the SSE
connection alive via its own heartbeat, so the processing animation on
video_output stays active for the entire duration.
"""
if not enhanced or not enhanced.strip():
return None, "Error: No enhanced prompt."
print(f"[Generate] Sending to router ...")
result = generate(
image=image,
video_prompt=enhanced,
seed=int(seed),
output_dir=OUTPUT_DIR,
seconds=int(seconds),
)
if result["error"]:
print(f"[Generate] Error from router: {result['error']}")
return None, f"Error: {result['error']}"
video_path = result["video_path"]
if not video_path or not os.path.isfile(video_path):
return None, "Error: Video file not found."
status = f"Done. seed={result['seed']}"
print(f"[Generate] Success: {video_path}")
return video_path, status
# ── Gradio UI ────────────────────────────────────────────────────────
TITLE = "daVinci-MagiHuman β€” Audio-Video Generation"
DESCRIPTION = (
"Upload a reference image, describe what you want in the video, choose the "
"duration (4–10 s), and click **Generate**. Your prompt will be automatically "
"enhanced into the optimal format before generation.\n\n"
"**Model**: 15B single-stream Transformer (distilled, 8-step inference) "
"| **Resolution**: 448Γ—256 β†’ 540p | **FPS**: 25"
)
with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
gr.Markdown(f"# {TITLE}")
gr.Markdown(DESCRIPTION)
with gr.Row():
with gr.Column(scale=1):
image_input = gr.Image(
label="Reference Image",
type="pil",
height=300,
)
prompt_input = gr.Textbox(
label="Video Description (will be auto-enhanced)",
placeholder="Describe the scene, character actions, dialogue, etc. Your prompt will be automatically enhanced for optimal generation.",
lines=6,
)
with gr.Row():
seed_input = gr.Number(
label="Seed (-1 = random)",
value=-1,
precision=0,
)
seconds_slider = gr.Slider(
minimum=4,
maximum=10,
step=1,
value=5,
label="Duration (seconds)",
)
generate_btn = gr.Button("Generate", variant="primary")
with gr.Column(scale=1):
video_output = gr.Video(label="Generated Video")
enhanced_prompt_box = gr.Textbox(
label="Enhanced Prompt (sent to model)",
interactive=False,
lines=8,
)
status_box = gr.Textbox(label="Status", interactive=False, lines=2)
generate_btn.click(
fn=step1_enhance,
inputs=[image_input, prompt_input, seed_input, seconds_slider],
outputs=[enhanced_prompt_box],
).then(
fn=step2_generate,
inputs=[image_input, enhanced_prompt_box, seed_input, seconds_slider],
outputs=[video_output, status_box],
)
if __name__ == "__main__":
# Gradio does NOT queue or throttle β€” every click goes straight to the router.
# default_concurrency_limit=None removes Gradio's concurrency gate.
demo.queue(default_concurrency_limit=None).launch(
server_name="0.0.0.0",
server_port=7860,
)