openlipsync / scripts /tests /modal_lipsync_test.py

Initial upload of directory

75da08b verified 3 months ago

2.36 kB



	import modal
	lipsync_image = (
	modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
	.uv_pip_install(
	[
	"torch",
	"torchvision",
	"xformers",
	"triton",
	"diffusers",
	"transformers",
	"huggingface-hub",
	"imageio==2.27.0",
	"decord==0.6.0",
	"accelerate",
	"einops==0.7.0",
	"omegaconf==2.3.0",
	"safetensors>=0.4.3",
	"opencv-python==4.9.0.80",
	"mediapipe==0.10.11",
	"av==11.0.0",
	"torch-fidelity==0.3.0",
	"torchmetrics==1.3.1",
	"python_speech_features==0.6",
	"librosa==0.10.1",
	"scenedetect==0.6.1",
	"ffmpeg-python==0.2.0",
	"lpips==0.1.4",
	"face-alignment==1.4.1",
	"ninja==1.11.1.1",
	"pandas==2.0.3",
	"numpy<2",
	"pydub==0.25.1",
	"moviepy==1.0.3",
	"hf-xet==1.1.8"
	]
	)
	.apt_install([
	"libgl1",
	"curl",
	"git",
	"wget",
	"ffmpeg",
	])
	.env(
	{"HF_HUB_ENABLE_HF_TRANSFER": "1"}
	)
	.entrypoint([]
	)
	.add.add_local_dir(
	"/home/misha/OpenLipSync/latentsync",
	remote_path="/latentsync",
	)# remove NVIDIA base container entrypoint
	)

	#with lipsync_image.imports():
	# import torch
	# import time


	# Create the Modal app
	app = modal.App("lipsync-dummy")

	@app.function(
	image=lipsync_image,
	timeout=300
	)
	def inference(video_url="https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_video.mp4",audio_url="https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo2_audio.wav"):
	"""Generates a lipsynced video"""
	from omegaconf import OmegaConf
	import torch
	from diffusers import AutoencoderKL, DDIMScheduler
	from latentsync.models.unet import UNet3DConditionModel
	from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
	from accelerate.utils import set_seed
	from latentsync.whisper.audio2feature import Audio2Feature
	import torch
	return "a test"


	@app.local_entrypoint()
	def main():
	# run the function locally
	print(inference.local())
	print(inference.remote())