miguelamendez commited on
Commit
2795099
·
verified ·
1 Parent(s): 0fb90ca

Initial upload of directory

Browse files
README.md CHANGED
@@ -35,3 +35,4 @@ uv run modal run modal_lipsync_serve.py
35
  ## TODO:
36
  - Add MuseTalk checkpoints
37
  - Add LatentSync16 checkpoints
 
 
35
  ## TODO:
36
  - Add MuseTalk checkpoints
37
  - Add LatentSync16 checkpoints
38
+
modal_lipsync_inference.py CHANGED
@@ -14,6 +14,7 @@ lipsync_image = (
14
  modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
15
  .uv_pip_install(
16
  [
 
17
  "torch",
18
  "torchvision",
19
  "xformers",
@@ -75,13 +76,12 @@ lipsync_image = (
75
  )
76
  .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
77
  )
78
- #with lipsync_image.imports():
79
- # import torch
80
- # import time
81
 
82
 
83
  # Create the Modal app
84
- app = modal.App("lipsync-dummy")
85
  @app.function(
86
  image=lipsync_image,
87
  #gpu="A100",
@@ -200,7 +200,6 @@ def main():
200
  audio_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_audio.wav"
201
  # Call the inference function
202
  #print(volume_search.remote())
203
- """
204
  print("Local inference")
205
  try:
206
  video_bytes,exec_time = inference.local(
@@ -244,3 +243,4 @@ def main():
244
  print(f"Video saved successfully as {output_filename}")
245
  except Exception as e:
246
  print(f"Error during inference: {e}")
 
 
14
  modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
15
  .uv_pip_install(
16
  [
17
+ "fastapi[standard]",
18
  "torch",
19
  "torchvision",
20
  "xformers",
 
76
  )
77
  .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
78
  )
79
+ with lipsync_image.imports():
80
+ import time
 
81
 
82
 
83
  # Create the Modal app
84
+ app = modal.App("lipsync-dummy",image=lipsync_image)
85
  @app.function(
86
  image=lipsync_image,
87
  #gpu="A100",
 
200
  audio_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_audio.wav"
201
  # Call the inference function
202
  #print(volume_search.remote())
 
203
  print("Local inference")
204
  try:
205
  video_bytes,exec_time = inference.local(
 
243
  print(f"Video saved successfully as {output_filename}")
244
  except Exception as e:
245
  print(f"Error during inference: {e}")
246
+ """
modal_lipsync_serve.py CHANGED
@@ -1,19 +1,22 @@
1
- """
2
- This script runs inference of LatentSync using Modal.
3
- To run you must first install modal.
4
- Then you should run the download of the
5
- """
6
 
 
 
7
  import modal
8
  #Shared volume with models
9
  volume = modal.Volume.from_name("openlipsync-volume", create_if_missing=True)
10
  model_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
11
- MODEL_PATH = "/models" # where the Volume will appear on our Functions' filesystems
12
  #Lipsync image
13
  lipsync_image = (
14
  modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
15
  .uv_pip_install(
16
  [
 
17
  "torch",
18
  "torchvision",
19
  "xformers",
@@ -75,57 +78,31 @@ lipsync_image = (
75
  )
76
  .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
77
  )
78
- #with lipsync_image.imports():
79
- # import torch
80
- # import time
81
-
82
-
83
- # Create the Modal app
84
- app = modal.App("lipsync-dummy")
85
- @app.function(
86
- image=lipsync_image,
87
- #gpu="A100",
88
- volumes={"/data": volume,MODEL_PATH:model_volume},
89
- timeout=300
90
- )
91
- def volume_search(some_path="/data"):
92
- """Generates a lipsynced video"""
93
- import os
94
- print("Files in volume:")
95
- def list_directory(path):
96
- try:
97
- for item in os.listdir(path):
98
- item_path = os.path.join(path, item)
99
- abs_path = os.path.abspath(item_path)
100
- if os.path.isdir(item_path):
101
- print(f" {abs_path}/")
102
- list_directory(item_path)
103
- else:
104
- print(f" {abs_path}")
105
- except Exception as e:
106
- print(f"Error accessing {path}: {e}")
107
- # List files in the volume
108
- list_directory(some_path)
109
 
 
110
  @app.function(
111
  image=lipsync_image,
112
  gpu="A100",
113
  volumes={"/data": volume,MODEL_PATH:model_volume},
114
  timeout=300
115
  )
116
- def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/latentsync_unet.pt", vae_path="./checkpoints/sd-vae-ft-mse", unet_config_path="configs/unet/second_stage.yaml", scheduler_path="configs/scheduler_config.json",whisper_model_path="./checkpoints/whisper",guidance_scale=1.0, seed=1247):
117
  """Generates a lipsynced video"""
118
  from omegaconf import OmegaConf
119
  import torch
 
120
  from diffusers import AutoencoderKL, DDIMScheduler
121
  from latentsync.models.unet import UNet3DConditionModel
122
  from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
123
  from accelerate.utils import set_seed
124
  from latentsync.whisper.audio2feature import Audio2Feature
125
  import torch
 
 
126
  import requests
127
  from PIL import Image
128
  import io
 
129
  # Download video and audio files
130
  video_response = requests.get(video_uri)
131
  audio_response = requests.get(audio_uri)
@@ -136,7 +113,10 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
136
  video_file.write(video_response.content)
137
  with open(audio_path, "wb") as audio_file:
138
  audio_file.write(audio_response.content)
139
- video_out_path = "./outvideo.mp4"
 
 
 
140
  config = OmegaConf.load(unet_config_path)
141
  scheduler = DDIMScheduler.from_pretrained(scheduler_path)
142
  if config.model.cross_attention_dim == 768:
@@ -156,7 +136,6 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
156
  )
157
 
158
  unet = unet.to(dtype=torch.float16)
159
-
160
  pipeline = LipsyncPipeline(
161
  vae=vae,
162
  audio_encoder=audio_encoder,
@@ -169,7 +148,8 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
169
  else:
170
  torch.seed()
171
  print(f"Initial seed: {torch.initial_seed()}")
172
-
 
173
  pipeline(
174
  video_path=video_path,
175
  audio_path=audio_path,
@@ -182,58 +162,86 @@ def inference(video_uri, audio_uri, unet_ckpt_path="./checkpoints/latentsync/lat
182
  width=config.data.resolution,
183
  height=config.data.resolution,
184
  )
 
 
 
185
  # Read the processed video as bytes and return it
186
- with open(video_out_path, "rb") as video_file:
187
- video_bytes = video_file.read()
188
- return video_bytes
 
 
 
 
 
 
 
 
 
 
 
 
 
189
 
190
- @app.local_entrypoint()
191
- def main():
192
- #run the function locally
193
- # Example video and audio URIs (replace with actual URLs)
194
- video_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo3_video.mp4"
195
- audio_uri = "https://huggingface.co/miguelamendez/openlipsync/resolve/main/assets/demo1_audio.wav"
196
- # Call the inference function
197
- #print(volume_search.remote())
198
- """"
199
- print("Local inference")
 
 
 
 
 
200
  try:
201
- video_bytes = inference.local(
202
- video_uri=video_uri,
203
- audio_uri=audio_uri,
204
- unet_ckpt_path="./checkpoints/latentsync/latentsync_unet.pt",
205
- vae_path="./checkpoints/sd-vae-ft-mse",
206
- unet_config_path="./configs/unet/second_stage.yaml",
207
- whisper_model_path="./checkpoints/whisper",
208
- scheduler_path="./configs/scheduler_config.json",
209
- guidance_scale=1.0,
210
- seed=1247
211
- )
212
- # Save the video bytes to a file in the current path
213
- output_filename = "local_video.mp4"
214
- with open(output_filename, "wb") as output_file:
215
- output_file.write(video_bytes)
216
- print(f"Video saved successfully as {output_filename}")
217
  except Exception as e:
218
- print(f"Error during inference: {e}")
219
- """
220
- print("remote inference")
 
 
 
221
  try:
222
- video_bytes,time = inference.remote(
223
- video_uri=video_uri,
224
- audio_uri=audio_uri,
225
- unet_ckpt_path="/data/data/checkpoints/latentsync/latentsync_unet.pt",
226
- vae_path="/data/data/checkpoints/sd-vae-ft-mse",
227
- unet_config_path="/data/data/configs/unet/second_stage.yaml",
228
- whisper_model_path="/data/data/checkpoints/whisper",
229
- scheduler_path="/data/data/configs/scheduler_config.json",
230
- guidance_scale=1.0,
231
- seed=1247
232
- )
233
- # Save the video bytes to a file in the current path
234
- output_filename = "remote_video.mp4"
235
- with open(output_filename, "wb") as output_file:
236
- output_file.write(video_bytes)
237
- print(f"Video saved successfully as {output_filename}")
238
  except Exception as e:
239
- print(f"Error during inference: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # my_job_queue_endpoint.py
2
+ import fastapi
3
+ import modal
4
+ import random
5
+ import time
6
 
7
+ from fastapi.responses import FileResponse
8
+ web_app = fastapi.FastAPI()
9
  import modal
10
  #Shared volume with models
11
  volume = modal.Volume.from_name("openlipsync-volume", create_if_missing=True)
12
  model_volume = modal.Volume.from_name("hf-hub-cache", create_if_missing=True)
13
+ MODEL_PATH = "/outputs" # where the Volume will appear on our Functions' filesystems
14
  #Lipsync image
15
  lipsync_image = (
16
  modal.Image.from_registry("nvidia/cuda:12.8.0-devel-ubuntu22.04", add_python="3.11")
17
  .uv_pip_install(
18
  [
19
+ "fastapi[standard]",
20
  "torch",
21
  "torchvision",
22
  "xformers",
 
78
  )
79
  .add_local_python_source("latentsync")# remove NVIDIA base container entrypoint
80
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
 
82
+ app = modal.App("fastapi-lipsync",image=lipsync_image)
83
  @app.function(
84
  image=lipsync_image,
85
  gpu="A100",
86
  volumes={"/data": volume,MODEL_PATH:model_volume},
87
  timeout=300
88
  )
89
+ def process_job(video_uri, audio_uri, unet_ckpt_path="/data/data/checkpoints/latentsync/latentsync_unet.pt", vae_path="/data/data/checkpoints/sd-vae-ft-mse", unet_config_path="/data/data/configs/unet/second_stage.yaml", scheduler_path="/data/data/configs/scheduler_config.json",whisper_model_path="/data/data/checkpoints/whisper",guidance_scale=1.0, seed=1247):
90
  """Generates a lipsynced video"""
91
  from omegaconf import OmegaConf
92
  import torch
93
+ import time
94
  from diffusers import AutoencoderKL, DDIMScheduler
95
  from latentsync.models.unet import UNet3DConditionModel
96
  from latentsync.pipelines.lipsync_pipeline import LipsyncPipeline
97
  from accelerate.utils import set_seed
98
  from latentsync.whisper.audio2feature import Audio2Feature
99
  import torch
100
+ from fastapi.responses import FileResponse
101
+ from fastapi import Response
102
  import requests
103
  from PIL import Image
104
  import io
105
+ import os
106
  # Download video and audio files
107
  video_response = requests.get(video_uri)
108
  audio_response = requests.get(audio_uri)
 
113
  video_file.write(video_response.content)
114
  with open(audio_path, "wb") as audio_file:
115
  audio_file.write(audio_response.content)
116
+ import uuid
117
+ # Generate a random UUID
118
+ unique_id = str(uuid.uuid4())
119
+ video_out_path = f"/data/{unique_id}.mp4"
120
  config = OmegaConf.load(unet_config_path)
121
  scheduler = DDIMScheduler.from_pretrained(scheduler_path)
122
  if config.model.cross_attention_dim == 768:
 
136
  )
137
 
138
  unet = unet.to(dtype=torch.float16)
 
139
  pipeline = LipsyncPipeline(
140
  vae=vae,
141
  audio_encoder=audio_encoder,
 
148
  else:
149
  torch.seed()
150
  print(f"Initial seed: {torch.initial_seed()}")
151
+ # Start timing
152
+ start_time = time.time()
153
  pipeline(
154
  video_path=video_path,
155
  audio_path=audio_path,
 
162
  width=config.data.resolution,
163
  height=config.data.resolution,
164
  )
165
+ # Calculate execution time
166
+ end_time = time.time()
167
+ execution_time = end_time - start_time
168
  # Read the processed video as bytes and return it
169
+ #with open(video_out_path, "rb") as video_file:
170
+ # video_bytes = video_file.read()
171
+ #video=FileResponse(
172
+ # path=video_out_path,
173
+ # media_type="video/mp4", # Adjust based on your video format
174
+ # filename=os.path.basename(video_out_path)
175
+ #)
176
+ return {"result":os.path.abspath(video_out_path),"processing_time":execution_time}
177
+ #return Response(
178
+ # content=video_bytes,
179
+ # media_type="video/mp4",
180
+ # headers={
181
+ # "Content-Disposition": f"attachment; filename='{os.path.basename(video_out_path)}'",
182
+ # "X-File-Size": str(len(video_bytes))
183
+ # }
184
+ #)
185
 
186
+ @app.function(volumes={"/data": volume,MODEL_PATH:model_volume})
187
+ @modal.asgi_app()
188
+ def fastapi_app():
189
+ return web_app
190
+
191
+
192
+ @web_app.post("/submit")
193
+ async def submit_job_endpoint(video_uri:str,audio_uri:str,guidance_scale:float=1.0,seed:int=1024):
194
+ call = process_job.spawn(video_uri=video_uri,audio_uri=audio_uri,guidance_scale=guidance_scale,seed=seed)
195
+ return {"call_id": call.object_id, "status": "queued"}
196
+
197
+
198
+ @web_app.get("/status/{call_id}")
199
+ async def get_job_status_endpoint(call_id: str):
200
+ function_call = modal.FunctionCall.from_id(call_id)
201
  try:
202
+ result = function_call.get(timeout=0)
203
+ return {"call_id": call_id, "status": "completed", "result": result}
204
+ except modal.exception.OutputExpiredError:
205
+ return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
206
+ except TimeoutError:
207
+ # Check if function is still running or queued
208
+ try:
209
+ # Try to get the function state by checking if it's running
210
+ # This is a simplified approach - in practice, you might want more sophisticated state tracking
211
+ function_call.get(timeout=1) # This will raise TimeoutError if still running
212
+ return {"call_id": call_id, "status": "completed"}
213
+ except TimeoutError:
214
+ # Function is still running or queued
215
+ return {"call_id": call_id, "status": "processing"}
216
+ except Exception:
217
+ return {"call_id": call_id, "status": "error"}
218
  except Exception as e:
219
+ return {"call_id": call_id, "status": "error", "error": str(e)}
220
+
221
+
222
+ @web_app.get("/result/{call_id}")
223
+ async def get_job_result_endpoint(call_id: str):
224
+ function_call = modal.FunctionCall.from_id(call_id)
225
  try:
226
+ result = function_call.get(timeout=0)
227
+ return {"call_id": call_id, "status": "completed", "result": result}
228
+ except modal.exception.OutputExpiredError:
229
+ return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
230
+ except TimeoutError:
231
+ return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "processing"}, status_code=202)
 
 
 
 
 
 
 
 
 
 
232
  except Exception as e:
233
+ return {"call_id": call_id, "status": "error", "error": str(e)}
234
+
235
+ @web_app.post("/video")
236
+ async def get_video_endpoint(video_path: str):
237
+ import os
238
+ from fastapi.responses import FileResponse
239
+ if not os.path.exists(video_path):
240
+ raise fastapi.HTTPException(status_code=404, detail="Video not found")
241
+ return FileResponse(video_path, media_type="video/mp4", filename=f"output_video.mp4")
242
+
243
+
244
+ # Add a local entrypoint to test locally (optional)
245
+ @app.local_entrypoint()
246
+ def main():
247
+ print("FastAPI app deployed as Modal ASGI app. for lipsync")
scripts/tests/modal_jobs.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # my_job_queue_endpoint.py
2
+ import fastapi
3
+ import modal
4
+
5
+ image = modal.Image.debian_slim().pip_install("fastapi[standard]")
6
+ app = modal.App("fastapi-modal", image=image)
7
+ web_app = fastapi.FastAPI()
8
+
9
+ @app.function()
10
+ def process_job(data):
11
+ # Perform the job processing here
12
+ return {"result": data}
13
+
14
+
15
+ @app.function()
16
+ @modal.asgi_app()
17
+ def fastapi_app():
18
+ return web_app
19
+
20
+
21
+ @web_app.post("/submit")
22
+ async def submit_job_endpoint(data: str):
23
+ call = process_job.spawn(data)
24
+ return {"call_id": call.object_id}
25
+
26
+
27
+ @web_app.get("/result/{call_id}")
28
+ async def get_job_result_endpoint(call_id: str):
29
+ function_call = modal.FunctionCall.from_id(call_id)
30
+ try:
31
+ result = function_call.get(timeout=0)
32
+ except modal.exception.OutputExpiredError:
33
+ return fastapi.responses.JSONResponse(content="", status_code=404)
34
+ except TimeoutError:
35
+ return fastapi.responses.JSONResponse(content="", status_code=202)
36
+ return result
37
+
38
+
39
+ # Add a local entrypoint to test locally (optional)
40
+ @app.local_entrypoint()
41
+ def main():
42
+ print("FastAPI app deployed as Modal ASGI app.")
43
+ print("Use `modal serve my_job_queue_endpoint.py` to serve the web app.")
44
+
scripts/tests/modal_jobsv2.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # my_job_queue_endpoint.py
2
+ import fastapi
3
+ import modal
4
+ import random
5
+ import time
6
+
7
+ image = modal.Image.debian_slim().pip_install("fastapi[standard]")
8
+ app = modal.App("fastapi-modal", image=image)
9
+ web_app = fastapi.FastAPI()
10
+
11
+ @app.function()
12
+ def process_job(data):
13
+ # Simulate long workload with random sleep between 5-10 seconds
14
+ sleep_time = random.randint(5, 10)
15
+ time.sleep(sleep_time)
16
+
17
+ # Simulate potential error
18
+ if random.random() < 0.1: # 10% chance of error
19
+ raise Exception("Random processing error occurred")
20
+
21
+ return {"result": data, "processing_time": sleep_time}
22
+
23
+
24
+ @app.function()
25
+ @modal.asgi_app()
26
+ def fastapi_app():
27
+ return web_app
28
+
29
+
30
+ @web_app.post("/submit")
31
+ async def submit_job_endpoint(data: str):
32
+ call = process_job.spawn(data)
33
+ return {"call_id": call.object_id, "status": "queued"}
34
+
35
+
36
+ @web_app.get("/status/{call_id}")
37
+ async def get_job_status_endpoint(call_id: str):
38
+ function_call = modal.FunctionCall.from_id(call_id)
39
+ try:
40
+ result = function_call.get(timeout=0)
41
+ return {"call_id": call_id, "status": "completed", "result": result}
42
+ except modal.exception.OutputExpiredError:
43
+ return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
44
+ except TimeoutError:
45
+ # Check if function is still running or queued
46
+ try:
47
+ # Try to get the function state by checking if it's running
48
+ # This is a simplified approach - in practice, you might want more sophisticated state tracking
49
+ function_call.get(timeout=1) # This will raise TimeoutError if still running
50
+ return {"call_id": call_id, "status": "completed"}
51
+ except TimeoutError:
52
+ # Function is still running or queued
53
+ return {"call_id": call_id, "status": "processing"}
54
+ except Exception:
55
+ return {"call_id": call_id, "status": "error"}
56
+ except Exception as e:
57
+ return {"call_id": call_id, "status": "error", "error": str(e)}
58
+
59
+
60
+ @web_app.get("/result/{call_id}")
61
+ async def get_job_result_endpoint(call_id: str):
62
+ function_call = modal.FunctionCall.from_id(call_id)
63
+ try:
64
+ result = function_call.get(timeout=0)
65
+ return {"call_id": call_id, "status": "completed", "result": result}
66
+ except modal.exception.OutputExpiredError:
67
+ return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "error"}, status_code=404)
68
+ except TimeoutError:
69
+ return fastapi.responses.JSONResponse(content={"call_id": call_id, "status": "processing"}, status_code=202)
70
+ except Exception as e:
71
+ return {"call_id": call_id, "status": "error", "error": str(e)}
72
+
73
+
74
+ # Add a local entrypoint to test locally (optional)
75
+ @app.local_entrypoint()
76
+ def main():
77
+ print("FastAPI app deployed as Modal ASGI app.")
78
+ print("Use `modal serve my_job_queue_endpoint.py` to serve the web app.")
79
+
temp/video.mp4 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ab1859b75dd977214c3d7443d00d8aff818463f8e7c40e11fcaf6473e5d0e835
3
- size 4904259
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5909279c3131345d7c5f9c30bf70393ee334a30657317b226ae6a291eebccaf9
3
+ size 8402419