daVinci-MagiHuman

Runtime error

jiadisu

update port

f4ca1dd 2 months ago

8.98 kB

	"""daVinci-MagiHuman WebUI — Gradio frontend for HF Spaces.

	Pure frontend, no queuing, no load management.
	All requests are sent immediately to the router.
	If the router rejects (503), the error is shown directly to the user.

	Architecture:
	User prompt ──LLM rewrite──▶ refined prompt
	HF Space (this app) ──HTTP──▶ Router (public IP) ──▶ 4x inference servers
	"""

	import os

	import gradio as gr
	from openai import OpenAI

	from api_client import generate

	OUTPUT_DIR = "/tmp/magihuman_webui_outputs"

	# ── Prompt rewrite via LLM ───────────────────────────────────────────

	_enhance_client = OpenAI(
	base_url="https://apicz.boyuerichdata.com/v1/",
	api_key=os.environ.get("REWRITE_API_KEY", ""),
	)
	_ENHANCE_MODEL = "gemini-3-flash-preview"

	_PROMPT_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)), "prompt.txt")
	with open(_PROMPT_FILE, "r") as f:
	_ENHANCE_SYSTEM_PROMPT = f.read()
	print(f"[Enhance] Loaded system prompt from {os.path.basename(_PROMPT_FILE)}, length={len(_ENHANCE_SYSTEM_PROMPT)} chars")
	print(f"[Enhance] System prompt preview: {_ENHANCE_SYSTEM_PROMPT[:200]}...")

	def _pil_to_base64_url(image) -> str:
	"""Convert a PIL Image to a base64 data URL for the vision API."""
	import base64
	import io
	buf = io.BytesIO()
	image.save(buf, format="PNG")
	b64 = base64.b64encode(buf.getvalue()).decode("utf-8")
	return f"data:image/png;base64,{b64}"


	class ContentBlockedError(Exception):
	"""Raised when the LLM safety filter blocks the request."""
	pass


	def enhance_prompt(user_prompt: str, image=None) -> str:
	"""Rewrite user prompt into the model's required format via LLM API.

	Sends both the text prompt and the reference image to the LLM
	so it can describe the character and scene accurately.

	Raises ContentBlockedError if the safety filter blocks the content.
	"""
	print(f"[Enhance] Starting rewrite, input length={len(user_prompt)} chars, has_image={image is not None}")
	print(f"[Enhance] User prompt: {user_prompt[:100]}...")
	try:
	# Build user message with text + image
	user_content = []
	if image is not None:
	user_content.append({
	"type": "image_url",
	"image_url": {"url": _pil_to_base64_url(image)},
	})
	user_content.append({
	"type": "text",
	"text": user_prompt,
	})

	resp = _enhance_client.chat.completions.create(
	model=_ENHANCE_MODEL,
	messages=[
	{"role": "system", "content": _ENHANCE_SYSTEM_PROMPT},
	{"role": "user", "content": user_content},
	],
	temperature=0.3,
	max_tokens=2048,
	)
	choice = resp.choices[0]
	raw_content = choice.message.content
	finish_reason = choice.finish_reason
	print(f"[Enhance] API returned: finish_reason={finish_reason}, "
	f"content_length={len(raw_content) if raw_content else 0}")

	# Check for content filter block
	if finish_reason == "content_filter" or not (raw_content or "").strip():
	print(f"[Enhance] Content blocked by safety filter (finish_reason={finish_reason})")
	raise ContentBlockedError(
	"Your request was blocked by the content safety filter. "
	"Please modify your prompt or image and try again."
	)

	rewritten = raw_content.strip()
	print(f"[Enhance] Done, output length={len(rewritten)} chars")
	print(f"[Enhance] Rewritten: {rewritten[:150]}...")
	return rewritten
	except ContentBlockedError:
	raise
	except Exception as e:
	err_str = str(e).lower()
	if "block" in err_str or "safety" in err_str or "content" in err_str:
	print(f"[Enhance] Content blocked by API error: {e}")
	raise ContentBlockedError(
	"Your request was blocked by the content safety filter. "
	"Please modify your prompt or image and try again."
	)
	print(f"[Enhance] FAILED: {e}, using original prompt")
	return user_prompt


	# ── Generation ───────────────────────────────────────────────────────


	def step1_enhance(image, prompt, seed, seconds):
	"""Step 1: Validate inputs and enhance prompt via LLM.

	Returns the enhanced prompt. Runs as a regular function so only
	enhanced_prompt_box shows the processing animation.
	"""
	if image is None:
	raise gr.Error("Please upload a reference image.")
	if not prompt or not prompt.strip():
	raise gr.Error("Please enter a text prompt.")

	print(f"[Generate] Request received: seed={seed} seconds={seconds} prompt={prompt[:50]!r}")
	try:
	enhanced = enhance_prompt(prompt.strip(), image=image)
	except ContentBlockedError as e:
	print(f"[Generate] Blocked by content filter, aborting generation")
	raise gr.Error(str(e))

	return enhanced


	def step2_generate(image, enhanced, seed, seconds):
	"""Step 2: Send generation request to router.

	Regular (non-generator) function — Gradio's queue mode keeps the SSE
	connection alive via its own heartbeat, so the processing animation on
	video_output stays active for the entire duration.
	"""
	if not enhanced or not enhanced.strip():
	return None, "Error: No enhanced prompt."

	print(f"[Generate] Sending to router ...")
	result = generate(
	image=image,
	video_prompt=enhanced,
	seed=int(seed),
	output_dir=OUTPUT_DIR,
	seconds=int(seconds),
	)

	if result["error"]:
	print(f"[Generate] Error from router: {result['error']}")
	return None, f"Error: {result['error']}"

	video_path = result["video_path"]
	if not video_path or not os.path.isfile(video_path):
	return None, "Error: Video file not found."

	status = f"Done. seed={result['seed']}"
	print(f"[Generate] Success: {video_path}")
	return video_path, status


	# ── Gradio UI ────────────────────────────────────────────────────────

	TITLE = "daVinci-MagiHuman — Audio-Video Generation"
	DESCRIPTION = (
	"Upload a reference image, describe what you want in the video, choose the "
	"duration (4–10 s), and click Generate. Your prompt will be automatically "
	"enhanced into the optimal format before generation.\n\n"
	"Model: 15B single-stream Transformer (distilled, 8-step inference) "
	"\| Resolution: 448×256 → 540p \| FPS: 25"
	)

	with gr.Blocks(title=TITLE, theme=gr.themes.Soft()) as demo:
	gr.Markdown(f"# {TITLE}")
	gr.Markdown(DESCRIPTION)

	with gr.Row():
	with gr.Column(scale=1):
	image_input = gr.Image(
	label="Reference Image",
	type="pil",
	height=300,
	)
	prompt_input = gr.Textbox(
	label="Video Description (will be auto-enhanced)",
	placeholder="Describe the scene, character actions, dialogue, etc. Your prompt will be automatically enhanced for optimal generation.",
	lines=6,
	)
	with gr.Row():
	seed_input = gr.Number(
	label="Seed (-1 = random)",
	value=-1,
	precision=0,
	)
	seconds_slider = gr.Slider(
	minimum=4,
	maximum=10,
	step=1,
	value=5,
	label="Duration (seconds)",
	)
	generate_btn = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	video_output = gr.Video(label="Generated Video")
	enhanced_prompt_box = gr.Textbox(
	label="Enhanced Prompt (sent to model)",
	interactive=False,
	lines=8,
	)
	status_box = gr.Textbox(label="Status", interactive=False, lines=2)

	generate_btn.click(
	fn=step1_enhance,
	inputs=[image_input, prompt_input, seed_input, seconds_slider],
	outputs=[enhanced_prompt_box],
	).then(
	fn=step2_generate,
	inputs=[image_input, enhanced_prompt_box, seed_input, seconds_slider],
	outputs=[video_output, status_box],
	)

	if __name__ == "__main__":
	# Gradio does NOT queue or throttle — every click goes straight to the router.
	# default_concurrency_limit=None removes Gradio's concurrency gate.
	demo.queue(default_concurrency_limit=None).launch(
	server_name="0.0.0.0",
	server_port=7860,
	)