openlipsync / scripts /tests /modal_lipsync_test.py
miguelamendez's picture
Initial upload of directory
75da08b verified
import modal
lipsync_image = (
modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
.uv_pip_install(
[
"torch",
"torchvision",
"xformers",
"triton",
"diffusers",
"transformers",
"huggingface-hub",
"imageio==2.27.0",
"decord==0.6.0",
"accelerate",
"einops==0.7.0",
"omegaconf==2.3.0",
"safetensors>=0.4.3",
"opencv-python==4.9.0.80",
"mediapipe==0.10.11",
"av==11.0.0",
"torch-fidelity==0.3.0",
"torchmetrics==1.3.1",
"python_speech_features==0.6",
"librosa==0.10.1",
"scenedetect==0.6.1",
"ffmpeg-python==0.2.0",
"lpips==0.1.4",
"face-alignment==1.4.1",
"ninja==1.11.1.1",
"pandas==2.0.3",
"numpy<2",
"pydub==0.25.1",
"moviepy==1.0.3",
"hf-xet==1.1.8"
]
)
.apt_install([
"libgl1",
"curl",
"git",
"wget",
"ffmpeg",
])
.env(
{"HF_HUB_ENABLE_HF_TRANSFER": "1"}
)
.entrypoint([]
)
.add.add_local_dir(
"/home/misha/OpenLipSync/latentsync",
remote_path="/latentsync",
)# remove NVIDIA base container entrypoint
)
#with lipsync_image.imports():
# import torch
# import time
# Create the Modal app
app = modal.App("lipsync-dummy")
@app.function(
image=lipsync_image,
timeout=300
)
def inference(video_url="https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_video.mp4",audio_url="https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo2_audio.wav"):
"""Generates a lipsynced video"""
from omegaconf import OmegaConf
import torch
from diffusers import AutoencoderKL, DDIMScheduler
from latentsync.models.unet import UNet3DConditionModel
from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
from accelerate.utils import set_seed
from latentsync.whisper.audio2feature import Audio2Feature
import torch
return "a test"
@app.local_entrypoint()
def main():
# run the function locally
print(inference.local())
print(inference.remote())