Spaces:

JackIsNotInTheBox
/

Generate_Audio_for_Video

Sleeping

BoxOfColors commited on 18 days ago

Commit

15ab81a

1 Parent(s): 1044cda

Fix MMAudio inference_mode error (torch.no_grad); fix Hunyuan weights subdir path

Files changed (1) hide show

app.py CHANGED Viewed

@@ -418,17 +418,18 @@ def generate_mmaudio(video_file, prompt, negative_prompt, seed_val,
         print(f"[MMAudio] Sample {sample_idx+1} | duration={actual_dur:.2f}s | prompt='{prompt}'")
-        audios = generate(
-            clip_frames,
-            sync_frames,
-            [prompt],
-            negative_text=[negative_prompt] if negative_prompt else None,
-            feature_utils=feature_utils,
-            net=net,
-            fm=fm,
-            rng=rng,
-            cfg_strength=float(cfg_strength),
-        )
         audio = audios.float().cpu()[0]   # (C, T)
         audio_path = os.path.join(tmp_dir, f"mmaudio_{sample_idx}.flac")
@@ -485,12 +486,12 @@ def generate_hunyuan(video_file, prompt, negative_prompt, seed_val,
     }
     config_path = config_map.get(model_size, config_map["xxl"])
-    print(f"[HunyuanFoley] Loading {model_size.upper()} model from {HUNYUAN_MODEL_DIR}")
-    # load_model() handles: HunyuanVideoFoley main model, DAC-VAE, SigLIP2, CLAP, Synchformer
-    # CLAP (laion/larger_clap_general) and SigLIP2 (google/siglip2-base-patch16-512) are
-    # downloaded from HuggingFace Hub automatically by load_model().
     model_dict, cfg = load_model(
-        str(HUNYUAN_MODEL_DIR),
         config_path,
         device,
         enable_offload=False,

         print(f"[MMAudio] Sample {sample_idx+1} | duration={actual_dur:.2f}s | prompt='{prompt}'")
+        with torch.no_grad():
+            audios = generate(
+                clip_frames,
+                sync_frames,
+                [prompt],
+                negative_text=[negative_prompt] if negative_prompt else None,
+                feature_utils=feature_utils,
+                net=net,
+                fm=fm,
+                rng=rng,
+                cfg_strength=float(cfg_strength),
+            )
         audio = audios.float().cpu()[0]   # (C, T)
         audio_path = os.path.join(tmp_dir, f"mmaudio_{sample_idx}.flac")
     }
     config_path = config_map.get(model_size, config_map["xxl"])
+    # hf_hub_download preserves the repo subfolder, so weights land in
+    # HUNYUAN_MODEL_DIR/HunyuanVideo-Foley/  — pass that as the weights dir.
+    hunyuan_weights_dir = str(HUNYUAN_MODEL_DIR / "HunyuanVideo-Foley")
+    print(f"[HunyuanFoley] Loading {model_size.upper()} model from {hunyuan_weights_dir}")
     model_dict, cfg = load_model(
+        hunyuan_weights_dir,
         config_path,
         device,
         enable_offload=False,