Initial upload of directory

Browse files

Files changed (6) hide show

README.md +1 -0
modal_lipsync_inference.py +5 -5
modal_lipsync_serve.py +98 -90
scripts/tests/modal_jobs.py +44 -0
scripts/tests/modal_jobsv2.py +79 -0
temp/video.mp4 +2 -2

README.md CHANGED Viewed

@@ -35,3 +35,4 @@ uv run modal run modal_lipsync_serve.py
 ## TODO:
 - Add MuseTalk checkpoints
 - Add LatentSync16 checkpoints

 ## TODO:
 - Add MuseTalk checkpoints
 - Add LatentSync16 checkpoints

modal_lipsync_inference.py CHANGED Viewed

@@ -14,6 +14,7 @@ lipsync_image = (
     modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
     .uv_pip_install(
         [
             "torch",
             "torchvision",
             "xformers",
@@ -75,13 +76,12 @@ lipsync_image = (
     )
     .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
 )
-#with lipsync_image.imports():
-#    import torch
-#    import time
 # Create the Modal app
-app = modal.App("lipsync-dummy")
 @app.function(
     image=lipsync_image,
     #gpu="A100",
@@ -200,7 +200,6 @@ def main():
     audio_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_audio.wav"
     # Call the inference function
     #print(volume_search.remote())
-    """
     print("Local inference")
     try:
         video_bytes,exec_time = inference.local(
@@ -244,3 +243,4 @@ def main():
         print(f"Video saved successfully as {output_filename}")
     except Exception as e:
         print(f"Error during inference: {e}")

     modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
     .uv_pip_install(
         [
+            "fastapi[standard]",
             "torch",
             "torchvision",
             "xformers",
     )
     .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
 )
+with lipsync_image.imports():
+    import time
 # Create the Modal app
+app = modal.App("lipsync-dummy",image=lipsync_image)
 @app.function(
     image=lipsync_image,
     #gpu="A100",
     audio_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_audio.wav"
     # Call the inference function
     #print(volume_search.remote())
     print("Local inference")
     try:
         video_bytes,exec_time = inference.local(
         print(f"Video saved successfully as {output_filename}")
     except Exception as e:
         print(f"Error during inference: {e}")
+    """

modal_lipsync_serve.py CHANGED Viewed

@@ -1,19 +1,22 @@
-"""
-This script runs inference of LatentSync using Modal.
-To run you must first install modal.
-Then you should run the download of the
-"""
 import modal
 #Shared volume with models
 volume = modal.Volume.from_name("openlipsync-volume", create_if_missing=True)
 model_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
-MODEL_PATH = "/models"  # where the Volume will appear on our Functions' filesystems
 #Lipsync image
 lipsync_image = (
     modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
     .uv_pip_install(
         [
             "torch",
             "torchvision",
             "xformers",
@@ -75,57 +78,31 @@ lipsync_image = (
     )
     .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
 )
-#with lipsync_image.imports():
-#    import torch
-#    import time
-# Create the Modal app
-app = modal.App("lipsync-dummy")
-@app.function(
-    image=lipsync_image,
-    #gpu="A100",
-    volumes={"/data": volume,MODEL_PATH:model_volume},
-    timeout=300
-)
-def volume_search(some_path="/data"):
-    """Generates a lipsynced video"""
-    import os
-    print("Files in volume:")
-    def list_directory(path):
-        try:
-            for item in os.listdir(path):
-                item_path = os.path.join(path, item)
-                abs_path = os.path.abspath(item_path)
-                if os.path.isdir(item_path):
-                    print(f"  {abs_path}/")
-                    list_directory(item_path)
-                else:
-                    print(f"  {abs_path}")
-        except Exception as e:
-            print(f"Error accessing {path}: {e}")
-    # List files in the volume
-    list_directory(some_path)
 @app.function(
     image=lipsync_image,
     gpu="A100",
     volumes={"/data": volume,MODEL_PATH:model_volume},
     timeout=300
 )
-def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/latentsync_unet.pt", vae_path="./checkpoints/sd-vae-ft-mse", unet_config_path="configs/unet/second_stage.yaml", scheduler_path="configs/scheduler_config.json",whisper_model_path="./checkpoints/whisper",guidance_scale=1.0, seed=1247):
     """Generates a lipsynced video"""
     from omegaconf import OmegaConf
     import torch
     from diffusers import AutoencoderKL, DDIMScheduler
     from latentsync.models.unet import UNet3DConditionModel
     from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
     from accelerate.utils import set_seed
     from latentsync.whisper.audio2feature import Audio2Feature
     import torch
     import requests
     from PIL import Image
     import io
     # Download video and audio files
     video_response = requests.get(video_uri)
     audio_response = requests.get(audio_uri)
@@ -136,7 +113,10 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
         video_file.write(video_response.content)
     with open(audio_path, "wb") as audio_file:
         audio_file.write(audio_response.content)
-    video_out_path = "./outvideo.mp4"
     config = OmegaConf.load(unet_config_path)
     scheduler = DDIMScheduler.from_pretrained(scheduler_path)
     if config.model.cross_attention_dim == 768:
@@ -156,7 +136,6 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
     )
     unet = unet.to(dtype=torch.float16)
     pipeline = LipsyncPipeline(
         vae=vae,
         audio_encoder=audio_encoder,
@@ -169,7 +148,8 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
     else:
         torch.seed()
     print(f"Initial seed: {torch.initial_seed()}")
     pipeline(
         video_path=video_path,
         audio_path=audio_path,
@@ -182,58 +162,86 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
         width=config.data.resolution,
         height=config.data.resolution,
     )
     # Read the processed video as bytes and return it
-    with open(video_out_path, "rb") as video_file:
-        video_bytes = video_file.read()
-    return video_bytes
-@app.local_entrypoint()
-def main():
-    #run the function locally
-        # Example video and audio URIs (replace with actual URLs)
-    video_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo3_video.mp4"
-    audio_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_audio.wav"
-    # Call the inference function
-    #print(volume_search.remote())
-    """"
-    print("Local inference")
     try:
-        video_bytes = inference.local(
-            video_uri=video_uri,
-            audio_uri=audio_uri,
-            unet_ckpt_path="./checkpoints/latentsync/latentsync_unet.pt",
-            vae_path="./checkpoints/sd-vae-ft-mse",
-            unet_config_path="./configs/unet/second_stage.yaml",
-            whisper_model_path="./checkpoints/whisper",
-            scheduler_path="./configs/scheduler_config.json",
-            guidance_scale=1.0,
-            seed=1247
-        )
-        # Save the video bytes to a file in the current path
-        output_filename = "local_video.mp4"
-        with open(output_filename, "wb") as output_file:
-            output_file.write(video_bytes)
-        print(f"Video saved successfully as {output_filename}")
     except Exception as e:
-        print(f"Error during inference: {e}")
-    """
-    print("remote inference")
     try:
-        video_bytes,time = inference.remote(
-            video_uri=video_uri,
-            audio_uri=audio_uri,
-            unet_ckpt_path="/data/data/checkpoints/latentsync/latentsync_unet.pt",
-            vae_path="/data/data/checkpoints/sd-vae-ft-mse",
-            unet_config_path="/data/data/configs/unet/second_stage.yaml",
-            whisper_model_path="/data/data/checkpoints/whisper",
-            scheduler_path="/data/data/configs/scheduler_config.json",
-            guidance_scale=1.0,
-            seed=1247
-        )
-        # Save the video bytes to a file in the current path
-        output_filename = "remote_video.mp4"
-        with open(output_filename, "wb") as output_file:
-            output_file.write(video_bytes)
-        print(f"Video saved successfully as {output_filename}")
     except Exception as e:
-        print(f"Error during inference: {e}")

+# my_job_queue_endpoint.py
+import fastapi
+import modal
+import random
+import time
+from fastapi.responses import FileResponse
+web_app = fastapi.FastAPI()
 import modal
 #Shared volume with models
 volume = modal.Volume.from_name("openlipsync-volume", create_if_missing=True)
 model_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
+MODEL_PATH = "/outputs"  # where the Volume will appear on our Functions' filesystems
 #Lipsync image
 lipsync_image = (
     modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
     .uv_pip_install(
         [
+            "fastapi[standard]",
             "torch",
             "torchvision",
             "xformers",
     )
     .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
 )
+app = modal.App("fastapi-lipsync",image=lipsync_image)
 @app.function(
     image=lipsync_image,
     gpu="A100",
     volumes={"/data": volume,MODEL_PATH:model_volume},
     timeout=300
 )
+def process_job(video_uri, audio_uri, unet_ckpt_path="/data/data/checkpoints/latentsync/latentsync_unet.pt", vae_path="/data/data/checkpoints/sd-vae-ft-mse", unet_config_path="/data/data/configs/unet/second_stage.yaml", scheduler_path="/data/data/configs/scheduler_config.json",whisper_model_path="/data/data/checkpoints/whisper",guidance_scale=1.0, seed=1247):
     """Generates a lipsynced video"""
     from omegaconf import OmegaConf
     import torch
+    import time
     from diffusers import AutoencoderKL, DDIMScheduler
     from latentsync.models.unet import UNet3DConditionModel
     from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
     from accelerate.utils import set_seed
     from latentsync.whisper.audio2feature import Audio2Feature
     import torch
+    from fastapi.responses import FileResponse
+    from fastapi import Response
     import requests
     from PIL import Image
     import io
+    import os
     # Download video and audio files
     video_response = requests.get(video_uri)
     audio_response = requests.get(audio_uri)
         video_file.write(video_response.content)
     with open(audio_path, "wb") as audio_file:
         audio_file.write(audio_response.content)
+    import uuid
+    # Generate a random UUID
+    unique_id = str(uuid.uuid4())
+    video_out_path = f"/data/{unique_id}.mp4"
     config = OmegaConf.load(unet_config_path)
     scheduler = DDIMScheduler.from_pretrained(scheduler_path)
     if config.model.cross_attention_dim == 768:
     )
     unet = unet.to(dtype=torch.float16)
     pipeline = LipsyncPipeline(
         vae=vae,
         audio_encoder=audio_encoder,
     else:
         torch.seed()
     print(f"Initial seed: {torch.initial_seed()}")
+    # Start timing
+    start_time = time.time()
     pipeline(
         video_path=video_path,
         audio_path=audio_path,
         width=config.data.resolution,
         height=config.data.resolution,
     )
+    # Calculate execution time
+    end_time = time.time()
+    execution_time = end_time - start_time
     # Read the processed video as bytes and return it
+    #with open(video_out_path, "rb") as video_file:
+    #    video_bytes = video_file.read()
+    #video=FileResponse(
+    #    path=video_out_path,
+    #    media_type="video/mp4",  # Adjust based on your video format
+    #    filename=os.path.basename(video_out_path)
+    #)
+    return {"result":os.path.abspath(video_out_path),"processing_time":execution_time}
+    #return Response(
+    #    content=video_bytes,
+    #    media_type="video/mp4",
+    #    headers={
+    #        "Content-Disposition": f"attachment; filename='{os.path.basename(video_out_path)}'",
+    #        "X-File-Size": str(len(video_bytes))
+    #    }
+    #)
+@app.function(volumes={"/data": volume,MODEL_PATH:model_volume})
+@modal.asgi_app()
+def fastapi_app():
+    return web_app
+@web_app.post("/submit")
+async def submit_job_endpoint(video_uri:str,audio_uri:str,guidance_scale:float=1.0,seed:int=1024):
+    call = process_job.spawn(video_uri=video_uri,audio_uri=audio_uri,guidance_scale=guidance_scale,seed=seed)
+    return {"call_id": call.object_id, "status": "queued"}
+@web_app.get("/status/{call_id}")
+async def get_job_status_endpoint(call_id: str):
+    function_call = modal.FunctionCall.from_id(call_id)
     try:
+        result = function_call.get(timeout=0)
+        return {"call_id": call_id, "status": "completed", "result": result}
+    except modal.exception.OutputExpiredError:
+        return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
+    except TimeoutError:
+        # Check if function is still running or queued
+        try:
+            # Try to get the function state by checking if it's running
+            # This is a simplified approach - in practice, you might want more sophisticated state tracking
+            function_call.get(timeout=1)  # This will raise TimeoutError if still running
+            return {"call_id": call_id, "status": "completed"}
+        except TimeoutError:
+            # Function is still running or queued
+            return {"call_id": call_id, "status": "processing"}
+        except Exception:
+            return {"call_id": call_id, "status": "error"}
     except Exception as e:
+        return {"call_id": call_id, "status": "error", "error": str(e)}
+@web_app.get("/result/{call_id}")
+async def get_job_result_endpoint(call_id: str):
+    function_call = modal.FunctionCall.from_id(call_id)
     try:
+        result = function_call.get(timeout=0)
+        return {"call_id": call_id, "status": "completed", "result": result}
+    except modal.exception.OutputExpiredError:
+        return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
+    except TimeoutError:
+        return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "processing"}, status_code=202)
     except Exception as e:
+        return {"call_id": call_id, "status": "error", "error": str(e)}
+@web_app.post("/video")
+async def get_video_endpoint(video_path: str):
+    import os
+    from fastapi.responses import FileResponse
+    if not os.path.exists(video_path):
+        raise fastapi.HTTPException(status_code=404, detail="Video not found")
+    return FileResponse(video_path, media_type="video/mp4", filename=f"output_video.mp4")
+# Add a local entrypoint to test locally (optional)
+@app.local_entrypoint()
+def main():
+    print("FastAPI app deployed as Modal ASGI app. for lipsync")

scripts/tests/modal_jobs.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# my_job_queue_endpoint.py
+import fastapi
+import modal
+image = modal.Image.debian_slim().pip_install("fastapi[standard]")
+app = modal.App("fastapi-modal", image=image)
+web_app = fastapi.FastAPI()
+@app.function()
+def process_job(data):
+    # Perform the job processing here
+    return {"result": data}
+@app.function()
+@modal.asgi_app()
+def fastapi_app():
+    return web_app
+@web_app.post("/submit")
+async def submit_job_endpoint(data: str):
+    call = process_job.spawn(data)
+    return {"call_id": call.object_id}
+@web_app.get("/result/{call_id}")
+async def get_job_result_endpoint(call_id: str):
+    function_call = modal.FunctionCall.from_id(call_id)
+    try:
+        result = function_call.get(timeout=0)
+    except modal.exception.OutputExpiredError:
+        return fastapi.responses.JSONResponse(content="", status_code=404)
+    except TimeoutError:
+        return fastapi.responses.JSONResponse(content="", status_code=202)
+    return result
+# Add a local entrypoint to test locally (optional)
+@app.local_entrypoint()
+def main():
+    print("FastAPI app deployed as Modal ASGI app.")
+    print("Use `modal serve my_job_queue_endpoint.py` to serve the web app.")

scripts/tests/modal_jobsv2.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# my_job_queue_endpoint.py
+import fastapi
+import modal
+import random
+import time
+image = modal.Image.debian_slim().pip_install("fastapi[standard]")
+app = modal.App("fastapi-modal", image=image)
+web_app = fastapi.FastAPI()
+@app.function()
+def process_job(data):
+    # Simulate long workload with random sleep between 5-10 seconds
+    sleep_time = random.randint(5, 10)
+    time.sleep(sleep_time)
+    # Simulate potential error
+    if random.random() < 0.1:  # 10% chance of error
+        raise Exception("Random processing error occurred")
+    return {"result": data, "processing_time": sleep_time}
+@app.function()
+@modal.asgi_app()
+def fastapi_app():
+    return web_app
+@web_app.post("/submit")
+async def submit_job_endpoint(data: str):
+    call = process_job.spawn(data)
+    return {"call_id": call.object_id, "status": "queued"}
+@web_app.get("/status/{call_id}")
+async def get_job_status_endpoint(call_id: str):
+    function_call = modal.FunctionCall.from_id(call_id)
+    try:
+        result = function_call.get(timeout=0)
+        return {"call_id": call_id, "status": "completed", "result": result}
+    except modal.exception.OutputExpiredError:
+        return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
+    except TimeoutError:
+        # Check if function is still running or queued
+        try:
+            # Try to get the function state by checking if it's running
+            # This is a simplified approach - in practice, you might want more sophisticated state tracking
+            function_call.get(timeout=1)  # This will raise TimeoutError if still running
+            return {"call_id": call_id, "status": "completed"}
+        except TimeoutError:
+            # Function is still running or queued
+            return {"call_id": call_id, "status": "processing"}
+        except Exception:
+            return {"call_id": call_id, "status": "error"}
+    except Exception as e:
+        return {"call_id": call_id, "status": "error", "error": str(e)}
+@web_app.get("/result/{call_id}")
+async def get_job_result_endpoint(call_id: str):
+    function_call = modal.FunctionCall.from_id(call_id)
+    try:
+        result = function_call.get(timeout=0)
+        return {"call_id": call_id, "status": "completed", "result": result}
+    except modal.exception.OutputExpiredError:
+        return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
+    except TimeoutError:
+        return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "processing"}, status_code=202)
+    except Exception as e:
+        return {"call_id": call_id, "status": "error", "error": str(e)}
+# Add a local entrypoint to test locally (optional)
+@app.local_entrypoint()
+def main():
+    print("FastAPI app deployed as Modal ASGI app.")
+    print("Use `modal serve my_job_queue_endpoint.py` to serve the web app.")

temp/video.mp4 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ab1859b75dd977214c3d7443d00d8aff818463f8e7c40e11fcaf6473e5d0e835
-size 4904259

 version https://git-lfs.github.com/spec/v1
+oid sha256:5909279c3131345d7c5f9c30bf70393ee334a30657317b226ae6a291eebccaf9
+size 8402419