Spaces:

naicoi
/

lipsync-docker

Runtime error

App Files Files Community

lipsync-docker / lipsync.py

naicoi

model-dirs (#2)

f5651ba 4 months ago

raw

history blame contribute delete

4.88 kB

	import gc
	import os
	import time
	import traceback

	import torch

	from DeepCache import DeepCacheSDHelper
	from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
	from shared.model_manager import ModelManager
	from config import MODELS_DIR

	torch.backends.cudnn.benchmark = True
	torch.backends.cudnn.deterministic = False

	os.makedirs(MODELS_DIR, exist_ok=True)


	def get_quality_params(level: str) -> tuple:
	"""Get lipsync parameters based on quality level

	Args:
	level: Quality level (Fast, Normal, Medium, Best, Super Best)

	Returns:
	tuple of (num_frames, num_inference_steps, guidance_scale)
	"""
	params = {
	"Fast": (12, 15, 1.0),
	"Normal": (12, 20, 1.5),
	"Medium": (16, 30, 1.5),
	"Best": (20, 40, 1.5),
	"Super Best": (24, 50, 1.5),
	}
	return params.get(level, (12, 20, 1.0))


	def apply_lipsync(
	video_input_path, audio_path, video_out_path, crop_size=256, quality_level="Normal"
	):
	print(f"\n{'=' * 60}")
	print("LIPSYNC START")
	print(f"Input video: {video_input_path}")
	print(f"Input audio: {audio_path}")
	print(f"Output video: {video_out_path}")
	print(f"Crop size: {crop_size}x{crop_size}")
	print(f"{'=' * 60}\n")

	manager = ModelManager.get_instance()

	config = manager.get_latentsync_config()
	vae = manager.load_vae()
	audio_encoder = manager.load_whisper_encoder(
	manager.get_whisper_model_path(config.model.cross_attention_dim),
	"cuda",
	config.data.num_frames,
	)
	unet = manager.load_latentsync_unet()
	scheduler = manager.get_scheduler()

	pipeline = LipsyncPipeline(
	vae=vae,
	audio_encoder=audio_encoder,
	unet=unet,
	scheduler=scheduler,
	).to("cuda")

	print("Enabling DeepCache (cache_interval=3, cache_branch_id=0)...")
	deepcache_helper = DeepCacheSDHelper(pipe=pipeline)
	deepcache_helper.set_params(cache_interval=3, cache_branch_id=0)
	deepcache_helper.enable()

	try:
	if not torch.cuda.is_available():
	raise RuntimeError("CUDA not available - GPU required for lipsync")

	num_frames, num_inference_steps, guidance_scale = get_quality_params(
	quality_level
	)

	print(f"\nQuality level: {quality_level}")
	print("Parameters:")
	print(f" num_frames: {num_frames}")
	print(f" num_inference_steps: {num_inference_steps}")
	print(f" guidance_scale: {guidance_scale}")
	print(f" resolution: {config.data.resolution}")

	print(f"Initial seed: {torch.initial_seed()}")

	print("\nStarting pipeline inference...")
	print(
	f"Parameters: num_frames={num_frames}, num_inference_steps={num_inference_steps}, "
	f"guidance_scale={guidance_scale}, size={crop_size}x{crop_size}"
	)

	try:
	with torch.no_grad():
	result = pipeline(
	video_path=video_input_path,
	audio_path=audio_path,
	video_out_path=video_out_path,
	video_mask_path=video_out_path.replace(".mp4", "_mask.mp4"),
	num_frames=num_frames,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	weight_dtype=torch.float16,
	width=crop_size,
	height=crop_size,
	)
	print("Pipeline completed successfully")

	except RuntimeError as e:
	error_msg = str(e).lower()
	print(f"RuntimeError in pipeline: {e}")
	if "out of memory" in error_msg or "cuda out of memory" in error_msg:
	print("GPU OOM DETECTED!")
	torch.cuda.empty_cache()
	raise RuntimeError(
	"GPU out of memory during lipsync. Try: 1) Shorter video 2) Lower resolution 3) Close other GPU apps"
	)
	raise
	except Exception as e:
	print(f"Unexpected error in pipeline: {e}")
	print(f"Error type: {type(e).__name__}")
	traceback.print_exc()
	raise
	finally:
	print("Clearing GPU cache...")
	torch.cuda.empty_cache()
	gc.collect()

	print(f"\n{'=' * 60}")
	print(f"LIPSYNC SUCCESS - Output: {video_out_path}")
	print(f"{'=' * 60}\n")

	time.sleep(1)
	if not os.path.exists(video_out_path):
	raise RuntimeError(
	f"Pipeline succeeded but output file not created: {video_out_path}"
	)

	return video_out_path

	except Exception as e:
	print(f"\n{'=' * 60}")
	print(f"LIPSYNC FAILED: {type(e).__name__}")
	print(f"Error: {e}")
	print(f"{'=' * 60}\n")
	traceback.print_exc()
	raise