Spaces:

ginipick
/

Dokdo-multimodal

Paused

App Files Files Community

aiqcamp commited on Dec 22, 2024

Commit

8a61a4d

verified ·

1 Parent(s): 1cfe547

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -10

app.py CHANGED Viewed

@@ -65,22 +65,25 @@ output_dir = Path('./output/gradio')
 setup_eval_logging()
 net, feature_utils, seq_cfg = get_model()
 @spaces.GPU(duration=120)
 @torch.inference_mode()
 def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
                    seed: int = -1, num_steps: int = 25,
-                   cfg_strength: float = 4.5, duration: float = 8):
     try:
         rng = torch.Generator(device=device)
         if seed >= 0:
             rng.manual_seed(seed)
         else:
             rng.seed()
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
         # video_info = load_video(video_path, duration) 대신:
-        video_info = load_video(video_path, static_duration=duration)
         if video_info is None:
             logger.error("Failed to load video")
@@ -94,14 +97,15 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
             logger.error("Failed to extract frames from video")
             return video_path
-        clip_frames = clip_frames.unsqueeze(0)
-        sync_frames = sync_frames.unsqueeze(0)
         # 시퀀스 길이 업데이트
         seq_cfg.duration = actual_duration
         net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
         # 오디오 생성
         audios = generate(clip_frames,
                          sync_frames,
                          [prompt],
@@ -120,12 +124,14 @@ def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
         # 결과 비디오 생성
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
-        success = make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
-        if not success:
-            logger.error("Failed to create video with audio")
             return video_path
         logger.info(f'Successfully saved video with audio to {output_path}')
         return output_path
@@ -353,7 +359,7 @@ def generate_video(image, prompt):
                             seed=-1,
                             num_steps=25,
                             cfg_strength=4.5,
-                            duration=8.0  # float 타입으로 명시
                         )
                         if final_path_with_audio != final_path:

 setup_eval_logging()
 net, feature_utils, seq_cfg = get_model()
 @spaces.GPU(duration=120)
 @torch.inference_mode()
 def video_to_audio(video_path: str, prompt: str, negative_prompt: str = "music",
                    seed: int = -1, num_steps: int = 25,
+                   cfg_strength: float = 4.5, target_duration: float = 8.0):
     try:
+        logger.info("Starting audio generation process")
         rng = torch.Generator(device=device)
         if seed >= 0:
             rng.manual_seed(seed)
         else:
             rng.seed()
         fm = FlowMatching(min_sigma=0, inference_mode='euler', num_steps=num_steps)
         # video_info = load_video(video_path, duration) 대신:
+        kwargs = {'static_duration': target_duration}
+        video_info = load_video(video_path, **kwargs)
         if video_info is None:
             logger.error("Failed to load video")
             logger.error("Failed to extract frames from video")
             return video_path
+        clip_frames = clip_frames.unsqueeze(0).to(device)
+        sync_frames = sync_frames.unsqueeze(0).to(device)
         # 시퀀스 길이 업데이트
         seq_cfg.duration = actual_duration
         net.update_seq_lengths(seq_cfg.latent_seq_len, seq_cfg.clip_seq_len, seq_cfg.sync_seq_len)
         # 오디오 생성
+        logger.info("Generating audio...")
         audios = generate(clip_frames,
                          sync_frames,
                          [prompt],
         # 결과 비디오 생성
         output_path = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4').name
+        logger.info(f"Creating final video with audio at {output_path}")
+        make_video(video_info, output_path, audio, sampling_rate=seq_cfg.sampling_rate)
+        if not os.path.exists(output_path):
+            logger.error("Failed to create output video")
             return video_path
         logger.info(f'Successfully saved video with audio to {output_path}')
         return output_path
                             seed=-1,
                             num_steps=25,
                             cfg_strength=4.5,
+                            target_duration=8.0  # duration을 target_duration으로 변경
                         )
                         if final_path_with_audio != final_path: