Spaces:

llaa33219
/

SteadyDancer-14B

Paused

App Files Files Community

SteadyDancer-14B / app.py

llaa33219

Update app.py

919bd60 verified 3 months ago

raw

history blame contribute delete

21.5 kB

	"""
	SteadyDancer-14B - ZeroGPU 최적화 버전
	=====================================

	주요 변경사항:
	1. subprocess 제거 → 직접 Python import 사용 (ZeroGPU 호환성)
	2. 모델 로딩 최적화 (전역 캐싱 + GPU 함수 내 이동)
	3. duration 조정 (300초 = ZeroGPU 최대값)
	4. 프레임 수 제한으로 타임아웃 방지
	5. 포즈 추출을 GPU 함수 밖으로 분리
	6. 메모리 최적화 (torch.cuda.empty_cache, gc)
	"""

	import gradio as gr
	import spaces
	import torch
	import os
	import gc
	import tempfile
	import sys
	import shutil
	from pathlib import Path
	from PIL import Image
	import cv2
	import numpy as np
	from huggingface_hub import snapshot_download

	# ========== 상수 정의 ==========
	REPO_DIR = Path("SteadyDancer")
	MODEL_DIR = Path("SteadyDancer-14B")
	MAX_FRAMES = 49 # ZeroGPU 토큰 만료 방지를 위해 제한
	MAX_DURATION_SECONDS = 300 # ZeroGPU 최대 허용 시간

	# ========== 전역 캐시 (CPU에 유지) ==========
	_pipe = None
	_pose_detector = None
	_repo_ready = False


	def ensure_repo():
	"""SteadyDancer 레포지토리 클론 및 의존성 설치 (1회만)"""
	global _repo_ready
	if _repo_ready:
	return

	if not REPO_DIR.exists():
	print("📥 Cloning SteadyDancer repository...")
	import git
	git.Repo.clone_from(
	"https://github.com/MCG-NJU/SteadyDancer.git",
	str(REPO_DIR),
	depth=1
	)

	# 레포의 requirements.txt 설치
	repo_requirements = REPO_DIR / "requirements.txt"
	if repo_requirements.exists():
	print("📦 Installing SteadyDancer requirements...")
	import subprocess
	subprocess.run([
	sys.executable, "-m", "pip", "install", "-q",
	"-r", str(repo_requirements)
	], check=False)

	if str(REPO_DIR) not in sys.path:
	sys.path.insert(0, str(REPO_DIR))

	_repo_ready = True
	print("✅ Repository ready")


	def ensure_model():
	"""모델 가중치 다운로드 (1회만)"""
	if not MODEL_DIR.exists():
	print("📥 Downloading SteadyDancer-14B model weights...")
	snapshot_download(
	repo_id="MCG-NJU/SteadyDancer-14B",
	local_dir=str(MODEL_DIR),
	resume_download=True
	)
	print("✅ Model weights downloaded")


	def get_pose_detector():
	"""포즈 디텍터 로드 (CPU에 유지, 필요시 GPU로 이동)"""
	global _pose_detector

	if _pose_detector is None:
	print("📥 Loading DWPose detector...")
	try:
	from controlnet_aux import DWposeDetector
	_pose_detector = DWposeDetector.from_pretrained("lllyasviel/Annotators")
	except Exception as e:
	print(f"⚠️ DWPose 로드 실패, OpenPose로 대체: {e}")
	from controlnet_aux import OpenposeDetector
	_pose_detector = OpenposeDetector.from_pretrained("lllyasviel/Annotators")
	print("✅ Pose detector loaded")

	return _pose_detector


	def extract_poses_from_video(video_path, output_dir, max_frames=MAX_FRAMES, progress_callback=None):
	"""
	드라이빙 비디오에서 포즈 추출 (CPU에서 실행)
	- GPU 시간 절약을 위해 @spaces.GPU 밖에서 실행
	- 프레임 수 제한으로 타임아웃 방지
	"""
	pose_detector = get_pose_detector()

	# GPU가 있으면 이동 (ZeroGPU가 아닌 환경에서)
	device = "cuda" if torch.cuda.is_available() else "cpu"
	try:
	pose_detector = pose_detector.to(device)
	except:
	pass # ZeroGPU에서는 실패할 수 있음

	cap = cv2.VideoCapture(str(video_path))
	fps = cap.get(cv2.CAP_PROP_FPS) or 24
	total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

	# 프레임 수 제한
	target_frames = min(total_frames, max_frames)

	# 프레임 샘플링 (원본 프레임이 많으면 균등 샘플링)
	if total_frames > max_frames:
	frame_indices = np.linspace(0, total_frames - 1, max_frames, dtype=int)
	else:
	frame_indices = list(range(total_frames))

	pos_dir = Path(output_dir) / "positive"
	neg_dir = Path(output_dir) / "negative"
	pos_dir.mkdir(parents=True, exist_ok=True)
	neg_dir.mkdir(parents=True, exist_ok=True)

	extracted_count = 0
	for idx, frame_idx in enumerate(frame_indices):
	cap.set(cv2.CAP_PROP_POS_FRAMES, frame_idx)
	ret, frame = cap.read()
	if not ret:
	continue

	frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	pil_image = Image.fromarray(frame_rgb)

	try:
	with torch.inference_mode():
	pose_image = pose_detector(pil_image)
	except Exception as e:
	print(f"⚠️ Frame {idx} pose extraction failed: {e}")
	pose_image = Image.new('RGB', pil_image.size, (0, 0, 0))

	pose_image.save(pos_dir / f"{idx:04d}.jpg")
	pose_image.save(neg_dir / f"{idx:04d}.jpg")
	extracted_count += 1

	if progress_callback:
	progress_callback(idx / len(frame_indices))

	cap.release()

	# 메모리 정리
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	return str(pos_dir), str(neg_dir), fps, extracted_count


	def load_steadydancer_components():
	"""
	SteadyDancer 컴포넌트 로드 (generate.py 방식 참조)
	ZeroGPU에서는 매 호출마다 새로 로드해야 함
	"""
	ensure_repo()
	ensure_model()

	print("📥 Loading SteadyDancer components...")

	# SteadyDancer 내부 모듈 import
	from wan.configs import WAN_CONFIGS
	from wan.modules.vae import WanVAE
	from wan.modules.t5 import T5EncoderModel
	from wan.modules.clip import CLIPModel
	from wan.modules.model import WanModel

	cfg = WAN_CONFIGS["i2v-14B"]

	# 모델 경로 설정
	ckpt_dir = str(MODEL_DIR)

	# T5 텍스트 인코더
	t5_encoder = T5EncoderModel(
	text_len=cfg.text_len,
	dtype=cfg.t5_dtype,
	device="cuda",
	checkpoint_path=f"{ckpt_dir}/models_t5_umt5-xxl-enc-bf16.pth",
	tokenizer_path=f"{ckpt_dir}/google_umt5-xxl",
	spiece_path=f"{ckpt_dir}/google_umt5-xxl/spiece.model",
	)

	# CLIP 비전 인코더
	clip_encoder = CLIPModel(
	dtype=cfg.clip_dtype,
	device="cuda",
	checkpoint_path=f"{ckpt_dir}/models_clip_open-clip-xlm-roberta-large-vit-huge-14.pth",
	)

	# VAE
	vae = WanVAE(
	vae_pth=f"{ckpt_dir}/Wan2.1_VAE.pth",
	device="cuda",
	dtype=cfg.vae_dtype,
	)

	# Main Model (DiT)
	model = WanModel.from_pretrained(ckpt_dir, torch_dtype=torch.bfloat16)
	model = model.to("cuda")
	model.eval()

	print("✅ All components loaded")
	return cfg, t5_encoder, clip_encoder, vae, model


	@spaces.GPU(duration=MAX_DURATION_SECONDS)
	def generate_video_gpu(
	ref_image_path: str,
	pos_folder: str,
	neg_folder: str,
	prompt: str,
	cfg_scale: float,
	condition_guide_scale: float,
	seed: int,
	width: int,
	height: int,
	output_path: str,
	num_frames: int = 49,
	):
	"""
	GPU에서 비디오 생성 (SteadyDancer 내부 API 직접 사용)
	"""
	import random
	import subprocess
	from PIL import Image

	print(f"🎬 Starting generation: {width}x{height}, seed={seed}, frames={num_frames}")

	# 시드 설정
	random.seed(seed)
	np.random.seed(seed)
	torch.manual_seed(seed)
	if torch.cuda.is_available():
	torch.cuda.manual_seed_all(seed)

	try:
	# SteadyDancer 내부 모듈 import
	if str(REPO_DIR) not in sys.path:
	sys.path.insert(0, str(REPO_DIR))

	from wan.pipelines.pipeline_dancer import DancerPipeline
	from wan.configs import WAN_CONFIGS

	cfg = WAN_CONFIGS["i2v-14B"]

	# 파이프라인 생성
	print("📦 Creating DancerPipeline...")
	pipe = DancerPipeline(
	config=cfg,
	checkpoint_dir=str(MODEL_DIR),
	device_id=0,
	dtype=torch.bfloat16,
	)

	# 참조 이미지 로드
	ref_image = Image.open(ref_image_path).convert("RGB")

	# 생성 실행
	print("🎨 Running inference...")
	output = pipe.generate(
	image=ref_image,
	prompt=prompt,
	cond_pos_folder=pos_folder,
	cond_neg_folder=neg_folder,
	size=f"{width}*{height}",
	num_frames=num_frames,
	sample_guide_scale=cfg_scale,
	condition_guide_scale=condition_guide_scale,
	seed=seed,
	save_path=output_path,
	)

	print(f"✅ Generation complete!")

	except Exception as e:
	print(f"⚠️ Direct API failed: {e}")
	print("⚠️ Trying CLI fallback...")
	import traceback
	traceback.print_exc()

	# Fallback: CLI 실행
	cmd = [
	sys.executable, str(REPO_DIR / "generate_dancer.py"),
	"--task", "i2v-14B",
	"--size", f"{width}*{height}",
	"--image", ref_image_path,
	"--cond_pos_folder", pos_folder,
	"--cond_neg_folder", neg_folder,
	"--prompt", prompt,
	"--save_file", output_path,
	"--sample_guide_scale", str(cfg_scale),
	"--condition_guide_scale", str(condition_guide_scale),
	"--base_seed", str(seed),
	"--ckpt_dir", str(MODEL_DIR),
	]

	result = subprocess.run(cmd, capture_output=True, text=True, timeout=250)

	if result.returncode != 0:
	error_msg = result.stderr or result.stdout or str(e)
	raise gr.Error(f"생성 실패: {error_msg[:300]}")

	finally:
	# 메모리 정리
	if torch.cuda.is_available():
	torch.cuda.empty_cache()
	gc.collect()

	return output_path


	def generate_video(
	reference_image,
	driving_video,
	prompt,
	cfg_scale,
	condition_guide_scale,
	seed,
	resolution,
	max_frames,
	progress=gr.Progress()
	):
	"""
	메인 생성 함수 (Gradio 인터페이스)
	- 포즈 추출: CPU (GPU 시간 절약)
	- 비디오 생성: GPU (@spaces.GPU)

	중요: ZeroGPU 토큰 만료 방지를 위해 최대한 빨리 GPU 함수 호출
	"""
	if reference_image is None:
	raise gr.Error("❌ 참조 이미지를 업로드해주세요.")
	if driving_video is None:
	raise gr.Error("❌ 드라이빙 비디오를 업로드해주세요.")

	# 모델이 준비되지 않았으면 에러 (warmup에서 미리 되어야 함)
	if not MODEL_DIR.exists():
	progress(0.1, desc="⏳ 모델 다운로드 중 (첫 실행 시 오래 걸림)...")
	ensure_model()

	progress(0.05, desc="🔧 환경 설정 중...")

	with tempfile.TemporaryDirectory() as tmpdir:
	tmpdir = Path(tmpdir)

	# 1. 참조 이미지 저장 (빠름)
	progress(0.08, desc="📸 이미지 처리 중...")
	ref_image_path = tmpdir / "reference.png"
	if isinstance(reference_image, str):
	shutil.copy(reference_image, ref_image_path)
	elif isinstance(reference_image, np.ndarray):
	Image.fromarray(reference_image).save(ref_image_path)
	else:
	reference_image.save(ref_image_path)

	# 2. 포즈 추출 (CPU - 최대한 빨리!)
	progress(0.1, desc="🕺 포즈 추출 중...")
	pose_dir = tmpdir / "poses"
	pose_dir.mkdir(exist_ok=True)

	# 프레임 수 더 제한 (토큰 만료 방지)
	actual_max_frames = min(int(max_frames), 49) # 최대 49프레임으로 제한

	def pose_progress(p):
	progress(0.1 + 0.25 * p, desc=f"🕺 포즈 추출 중... {int(p*100)}%")

	pos_folder, neg_folder, fps, frame_count = extract_poses_from_video(
	driving_video,
	pose_dir,
	max_frames=actual_max_frames,
	progress_callback=pose_progress
	)

	if frame_count == 0:
	raise gr.Error("❌ 비디오에서 프레임을 추출할 수 없습니다.")

	progress(0.35, desc=f"✅ {frame_count}개 프레임 추출 완료")

	# 3. 해상도 파싱
	width, height = map(int, resolution.split("x"))

	# 4. 출력 경로
	output_path = str(tmpdir / "output.mp4")

	# 5. GPU 생성 실행 (최대한 빨리 호출!)
	progress(0.4, desc="🎬 비디오 생성 중 (GPU)...")

	final_prompt = prompt.strip() if prompt and prompt.strip() else "A person dancing gracefully"

	try:
	generate_video_gpu(
	ref_image_path=str(ref_image_path),
	pos_folder=pos_folder,
	neg_folder=neg_folder,
	prompt=final_prompt,
	cfg_scale=cfg_scale,
	condition_guide_scale=condition_guide_scale,
	seed=int(seed),
	width=width,
	height=height,
	output_path=output_path,
	num_frames=frame_count,
	)
	except Exception as e:
	error_msg = str(e)
	if "Expired ZeroGPU proxy token" in error_msg:
	raise gr.Error(
	"❌ ZeroGPU 토큰 만료됨. 페이지를 새로고침하고 다시 시도해주세요. "
	"팁: 프레임 수를 30 이하로 줄여보세요."
	)
	raise gr.Error(f"❌ 생성 실패: {error_msg[:300]}")

	progress(0.95, desc="📼 비디오 저장 중...")

	# 6. 최종 출력 복사
	final_output = Path(tempfile.gettempdir()) / f"steadydancer_output_{seed}.mp4"

	if Path(output_path).exists():
	shutil.copy(output_path, final_output)
	else:
	raise gr.Error("❌ 출력 파일을 찾을 수 없습니다.")

	progress(1.0, desc="✅ 완료!")
	return str(final_output)


	# ========== Gradio UI ==========
	with gr.Blocks(
	title="SteadyDancer-14B - ZeroGPU Optimized",
	theme=gr.themes.Soft(),
	css="""
	.main-title { text-align: center; margin-bottom: 1rem; }
	.warning-box {
	background: linear-gradient(135deg, #fff3cd 0%, #ffeeba 100%);
	border: 1px solid #ffc107;
	border-radius: 8px;
	padding: 1rem;
	margin: 1rem 0;
	}
	.tip-box {
	background: linear-gradient(135deg, #d4edda 0%, #c3e6cb 100%);
	border: 1px solid #28a745;
	border-radius: 8px;
	padding: 1rem;
	margin-top: 1rem;
	}
	"""
	) as demo:
	gr.Markdown("""
	# 🕺 SteadyDancer-14B (ZeroGPU 최적화)
	## Pose-Guided Human Image Animation

	드라이빙 비디오의 동작을 참조 이미지에 전송합니다!

	📝 [Paper](https://arxiv.org/abs/2412.12534) \|
	🔗 [GitHub](https://github.com/MCG-NJU/SteadyDancer) \|
	🤗 [Model](https://huggingface.co/MCG-NJU/SteadyDancer-14B)
	""", elem_classes=["main-title"])

	gr.Markdown("""
	### ⚠️ ZeroGPU 제한사항
	- 최대 실행 시간: 5분 (300초)
	- 권장 프레임 수: 20-30 프레임 (타임아웃/토큰 만료 방지)
	- 권장 해상도: 480x832 또는 더 낮은 해상도
	- 첫 실행: 모델 다운로드로 시간이 걸릴 수 있음 → 페이지 새로고침 후 재시도
	""", elem_classes=["warning-box"])

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📸 입력")

	reference_image = gr.Image(
	label="참조 이미지 (애니메이션할 인물)",
	type="numpy",
	sources=["upload", "clipboard"],
	height=280
	)

	driving_video = gr.Video(
	label="드라이빙 비디오 (동작 소스)",
	sources=["upload"],
	height=280
	)

	prompt = gr.Textbox(
	label="프롬프트 (선택사항)",
	placeholder="예: A person dancing gracefully in a studio",
	value=""
	)

	with gr.Accordion("⚙️ 고급 설정", open=True):
	resolution = gr.Dropdown(
	label="출력 해상도",
	choices=[
	"480x832", # 세로 (권장)
	"832x480", # 가로 (권장)
	"576x1024", # 세로 HD
	"1024x576", # 가로 HD
	"720x1280", # 세로 HD+
	"1280x720", # 가로 HD+
	],
	value="480x832",
	info="⚡ 낮은 해상도 = 빠른 생성 + 타임아웃 방지"
	)

	max_frames = gr.Slider(
	label="최대 프레임 수",
	minimum=10,
	maximum=49,
	value=30, # 기본값 낮춤
	step=1,
	info="⚡ 적은 프레임 = 빠른 생성 + 토큰 만료 방지 (30 권장)"
	)

	cfg_scale = gr.Slider(
	label="CFG Scale",
	minimum=1.0,
	maximum=10.0,
	value=5.0,
	step=0.5
	)

	condition_guide_scale = gr.Slider(
	label="Condition Guide Scale",
	minimum=0.0,
	maximum=2.0,
	value=1.0,
	step=0.1
	)

	seed = gr.Slider(
	label="시드",
	minimum=0,
	maximum=999999,
	value=42,
	step=1
	)

	generate_btn = gr.Button(
	"🎬 비디오 생성",
	variant="primary",
	size="lg"
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🎥 출력")

	output_video = gr.Video(
	label="생성된 비디오",
	height=450,
	autoplay=True
	)

	gr.Markdown("""
	### 💡 팁
	- 참조 이미지: 전신이 보이고 배경이 단순한 이미지가 좋습니다
	- 드라이빙 비디오: 3-5초 정도의 짧은 비디오가 좋습니다
	- 타임아웃 발생 시: 프레임 수와 해상도를 낮춰보세요
	- 첫 실행: 모델 다운로드로 시간이 걸릴 수 있습니다
	""", elem_classes=["tip-box"])

	generate_btn.click(
	fn=generate_video,
	inputs=[
	reference_image,
	driving_video,
	prompt,
	cfg_scale,
	condition_guide_scale,
	seed,
	resolution,
	max_frames,
	],
	outputs=output_video
	)


	def warmup():
	"""
	Space 시작 시 모든 준비 작업 수행
	- 레포 클론
	- 의존성 설치
	- 모델 다운로드 (가장 오래 걸림!)
	- 포즈 디텍터 로드

	이렇게 하면 사용자 요청 시 ZeroGPU 토큰 만료 방지
	"""
	import subprocess

	print("🚀 Warming up SteadyDancer-14B...")

	# 1. 필수 의존성 먼저 설치
	print("📦 Checking dependencies...")
	deps_to_install = []

	try:
	import easydict
	except ImportError:
	deps_to_install.append("easydict")

	try:
	import einops
	except ImportError:
	deps_to_install.append("einops")

	try:
	import ftfy
	except ImportError:
	deps_to_install.append("ftfy")

	try:
	import decord
	except ImportError:
	deps_to_install.append("decord")

	if deps_to_install:
	print(f"📦 Installing missing dependencies: {deps_to_install}")
	subprocess.run(
	[sys.executable, "-m", "pip", "install", "-q"] + deps_to_install,
	check=False
	)

	# 2. 레포 클론
	ensure_repo()
	print("✅ Repository ready")

	# 3. 모델 다운로드 (가장 중요!)
	ensure_model()
	print("✅ Model weights ready")

	# 4. 포즈 디텍터 미리 로드 (선택적)
	try:
	get_pose_detector()
	print("✅ Pose detector ready")
	except Exception as e:
	print(f"⚠️ Pose detector will be loaded on first use: {e}")

	# 5. SteadyDancer 모듈 import 테스트
	try:
	sys.path.insert(0, str(REPO_DIR))
	from wan.configs import WAN_CONFIGS
	print("✅ SteadyDancer modules importable")
	except Exception as e:
	print(f"⚠️ SteadyDancer import test failed: {e}")
	print(" (Will try again during generation)")

	print("🎉 Warmup complete! Ready for requests.")


	if __name__ == "__main__":
	# Space 시작 시 모든 준비 작업 미리 수행
	warmup()
	demo.launch()