Spaces:

ginipick
/

Dokdo-multimodal

Paused

App Files Files Community

aiqcamp commited on Dec 22, 2024

Commit

f6c9d00

verified ·

1 Parent(s): 46cfad8

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -13

app.py CHANGED Viewed

@@ -8,7 +8,17 @@ import os
 import time
 from datetime import datetime
 import gradio as gr
 import torch
 import requests
 from pathlib import Path
 import cv2
@@ -37,25 +47,34 @@ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-ko-en")
 CATBOX_USER_HASH = "30f52c895fd9d9cb387eee489"
 REPLICATE_API_TOKEN = os.getenv("API_KEY")
 # 4. 오디오 모델 설정
-device = 'cuda'
-dtype = torch.bfloat16
 # 5. get_model 함수 정의
 def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
     seq_cfg = model.seq_cfg
-    net: MMAudio = get_my_mmaudio(model.model_name).to(device, dtype).eval()
     net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
     logger.info(f'Loaded weights from {model.model_path}')
-    feature_utils = FeaturesUtils(tod_vae_ckpt=model.vae_path,
-                                  synchformer_ckpt=model.synchformer_ckpt,
-                                  enable_conditions=True,
-                                  mode=model.mode,
-                                  bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
-                                  need_vae_encoder=False)
-    feature_utils = feature_utils.to(device, dtype).eval()
     return net, feature_utils, seq_cfg
@@ -67,13 +86,16 @@ output_dir = Path('./output/gradio')
 setup_eval_logging()
 net, feature_utils, seq_cfg = get_model()
 def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
                    seed: int = -1, num_steps: int = 15,
-                   cfg_strength: float = 4.0, target_duration: float = None):  # target_duration을 선택적으로 변경
     try:
         logger.info("Starting audio generation process")
-        torch.cuda.empty_cache()
         # 비디오 길이 확인
         cap = cv2.VideoCapture(video_path)
@@ -493,4 +515,10 @@ with gr.Blocks(theme="Yntec/HaleyCH_Theme_Orange", css=css) as demo:
     )
 if __name__ == "__main__":
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)

 import time
 from datetime import datetime
 import gradio as gr
+# GPU 초기화 설정
 import torch
+if torch.cuda.is_available():
+    torch.cuda.init()
+    device = torch.device('cuda')
+    logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+else:
+    device = torch.device('cpu')
+    logger.warning("GPU not available, using CPU")
 import requests
 from pathlib import Path
 import cv2
 CATBOX_USER_HASH = "30f52c895fd9d9cb387eee489"
 REPLICATE_API_TOKEN = os.getenv("API_KEY")
 # 4. 오디오 모델 설정
+dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 # 5. get_model 함수 정의
 def get_model() -> tuple[MMAudio, FeaturesUtils, SequenceConfig]:
     seq_cfg = model.seq_cfg
+    net: MMAudio = get_my_mmaudio(model.model_name).to(device)
+    if torch.cuda.is_available():
+        net = net.to(dtype)
+    net.eval()
     net.load_weights(torch.load(model.model_path, map_location=device, weights_only=True))
     logger.info(f'Loaded weights from {model.model_path}')
+    feature_utils = FeaturesUtils(
+        tod_vae_ckpt=model.vae_path,
+        synchformer_ckpt=model.synchformer_ckpt,
+        enable_conditions=True,
+        mode=model.mode,
+        bigvgan_vocoder_ckpt=model.bigvgan_16k_path,
+        need_vae_encoder=False
+    ).to(device)
+    if torch.cuda.is_available():
+        feature_utils = feature_utils.to(dtype)
+    feature_utils.eval()
     return net, feature_utils, seq_cfg
 setup_eval_logging()
 net, feature_utils, seq_cfg = get_model()
+@spaces.GPU(duration=30)
+@torch.inference_mode()
 def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
                    seed: int = -1, num_steps: int = 15,
+                   cfg_strength: float = 4.0, target_duration: float = None):
     try:
         logger.info("Starting audio generation process")
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
         # 비디오 길이 확인
         cap = cv2.VideoCapture(video_path)
     )
 if __name__ == "__main__":
+    # GPU 초기화 확인
+    if torch.cuda.is_available():
+        logger.info(f"Using GPU: {torch.cuda.get_device_name(0)}")
+    else:
+        logger.warning("GPU not available, using CPU")
     demo.launch(server_name="0.0.0.0", server_port=7860, share=False)