Spaces:

ginipick
/

Dokdo-multimodal

Paused

App Files Files Community

aiqcamp commited on Dec 22, 2024

Commit

0c5effb

verified ·

1 Parent(s): 9b8d878

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -72,8 +72,6 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
                    cfg_strength: float = 4.5, target_duration: float = 6.0):
     try:
         logger.info("Starting audio generation process")
-        # GPU 메모리 최적화
         torch.cuda.empty_cache()
         rng = torch.Generator(device=device)
@@ -85,12 +83,16 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
         # load_video 함수 호출 수정
-        video_info = load_video(video_path, duration=target_duration)  # static_duration을 duration으로 변경
         if video_info is None:
             logger.error("Failed to load video")
             return video_path
         clip_frames = video_info.clip_frames
         sync_frames = video_info.sync_frames
         actual_duration = video_info.duration_sec
@@ -99,7 +101,6 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
             logger.error("Failed to extract frames from video")
             return video_path
-        # 메모리 효율을 위해 배치 크기 조정
         clip_frames = clip_frames.unsqueeze(0).to(device, dtype=torch.float16)
         sync_frames = sync_frames.unsqueeze(0).to(device, dtype=torch.float16)
@@ -126,13 +127,12 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
         logger.info(f"Creating final video with audio at {output_path}")
-        success = make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
-        # GPU 메모리 정리
         torch.cuda.empty_cache()
-        if not success:
-            logger.error("Failed to create video with audio")
             return video_path
         logger.info(f'Successfully saved video with audio to {output_path}')

                    cfg_strength: float = 4.5, target_duration: float = 6.0):
     try:
         logger.info("Starting audio generation process")
         torch.cuda.empty_cache()
         rng = torch.Generator(device=device)
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
         # load_video 함수 호출 수정
+        video_info = load_video(video_path)  # duration 파라미터 제거
         if video_info is None:
             logger.error("Failed to load video")
             return video_path
+        # 비디오 길이 조정이 필요한 경우 여기서 처리
+        if hasattr(video_info, 'set_duration'):
+            video_info.set_duration(target_duration)
         clip_frames = video_info.clip_frames
         sync_frames = video_info.sync_frames
         actual_duration = video_info.duration_sec
             logger.error("Failed to extract frames from video")
             return video_path
         clip_frames = clip_frames.unsqueeze(0).to(device, dtype=torch.float16)
         sync_frames = sync_frames.unsqueeze(0).to(device, dtype=torch.float16)
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
         logger.info(f"Creating final video with audio at {output_path}")
+        make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
         torch.cuda.empty_cache()
+        if not os.path.exists(output_path):
+            logger.error("Failed to create output video")
             return video_path
         logger.info(f'Successfully saved video with audio to {output_path}')