VibeVoice-demo-dev

Paused

App Files Files Community

broadfield-dev commited on Aug 26

Commit

d082ce1

verified ·

1 Parent(s): b37fbdb

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -41

app.py CHANGED Viewed

@@ -4,8 +4,7 @@ import sys
 from pathlib import Path
 # --- 0. Hardcoded Toggle for Execution Environment ---
-# Set this to True to use Hugging Face ZeroGPU (recommended)
-# Set this to False to use the slower, pure CPU environment
 USE_ZEROGPU = True
 # --- 1. Clone the VibeVoice Repository ---
@@ -26,8 +25,7 @@ if not os.path.exists(repo_dir):
 else:
     print("Repository already exists. Skipping clone.")
-# --- 2. Install the VibeVoice Package ---
-# Note: Other dependencies are installed via requirements.txt
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
@@ -51,64 +49,74 @@ print(f"Reading {demo_script_path} to apply environment-specific modifications..
 try:
     modified_content = demo_script_path.read_text()
     if USE_ZEROGPU:
-        print("Configuring for ZeroGPU execution while keeping Flash Attention...")
         # Add 'import spaces' if it's not already there.
         if "import spaces" not in modified_content:
             modified_content = "import spaces\n" + modified_content
-        # Define the generation method signature to add the decorator to.
-        # We target only the first line for robustness.
-        original_method_signature = "    def generate_podcast_streaming(self,"
-        # Define the replacement with the correctly indented decorator.
         replacement_method_signature_gpu = "    @spaces.GPU(duration=120)\n" + original_method_signature
         # --- Apply Patches for GPU ---
-        # The only change needed is to add the decorator. We will NOT modify the
-        # from_pretrained call, leaving attn_implementation="flash_attention_2" in place.
         if original_method_signature in modified_content:
             modified_content = modified_content.replace(original_method_signature, replacement_method_signature_gpu)
             print("Successfully applied GPU decorator to the generation method.")
-            print("Model loading block remains unchanged to explicitly use Flash Attention.")
         else:
-            print("\033[91mError: Could not find the generation method signature to apply the GPU decorator.\033[0m")
             sys.exit(1)
     else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
-        # For the CPU path, we still need to replace the entire CUDA-specific block.
-        original_model_lines = [
-            '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
-            '            self.model_path,',
-            '            torch_dtype=torch.bfloat16,',
-            "            device_map='cuda',",
-            '            attn_implementation="flash_attention_2",',
-            '        )'
-        ]
-        original_model_block = "\n".join(original_model_lines)
-        # New block for CPU: Use float32 and map to CPU.
         replacement_model_lines_cpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
-            '            torch_dtype=torch.float32,  # Use float32 for CPU',
             '            device_map="cpu",',
             '        )'
         ]
         replacement_model_block_cpu = "\n".join(replacement_model_lines_cpu)
-        # Apply patch for CPU
         if original_model_block in modified_content:
             modified_content = modified_content.replace(original_model_block, replacement_model_block_cpu)
-            print("Script modified for CPU successfully.")
         else:
             print("\033[91mError: The original model loading block was not found for CPU patching.\033[0m")
             sys.exit(1)
-    # Write the dynamically modified content back to the demo file
     demo_script_path.write_text(modified_content)
 except Exception as e:
@@ -117,15 +125,6 @@ except Exception as e:
 # --- 4. Launch the Gradio Demo ---
 model_id = "microsoft/VibeVoice-1.5B"
-# Construct the command to run the modified demo script
-command = [
-    "python",
-    str(demo_script_path),
-    "--model_path",
-    model_id,
-    "--share"
-]
 print(f"Launching Gradio demo with command: {' '.join(command)}")
 subprocess.run(command)

 from pathlib import Path
 # --- 0. Hardcoded Toggle for Execution Environment ---
+# Ensure this is set to True to use the GPU
 USE_ZEROGPU = True
 # --- 1. Clone the VibeVoice Repository ---
 else:
     print("Repository already exists. Skipping clone.")
+# --- 2. Install Dependencies ---
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
 try:
     modified_content = demo_script_path.read_text()
+    # Define the original model loading block to be replaced.
+    original_model_lines = [
+        '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
+        '            self.model_path,',
+        '            torch_dtype=torch.bfloat16,',
+        "            device_map='cuda',",
+        '            attn_implementation="flash_attention_2",',
+        '        )'
+    ]
+    original_model_block = "\n".join(original_model_lines)
+    # Define the generation method signature to add the decorator to.
+    original_method_signature = "    def generate_podcast_streaming(self,"
     if USE_ZEROGPU:
+        print("Optimizing for ZeroGPU execution with robust attention...")
         # Add 'import spaces' if it's not already there.
         if "import spaces" not in modified_content:
             modified_content = "import spaces\n" + modified_content
+        # New block for ZeroGPU model loading: remove `attn_implementation` for auto-detection.
+        replacement_model_lines_gpu = [
+            '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
+            '            self.model_path,',
+            '            torch_dtype=torch.bfloat16,',
+            "            device_map='cuda',",
+            '        )'
+        ]
+        replacement_model_block_gpu = "\n".join(replacement_model_lines_gpu)
+        # Add the @spaces.GPU decorator with correct indentation.
         replacement_method_signature_gpu = "    @spaces.GPU(duration=120)\n" + original_method_signature
         # --- Apply Patches for GPU ---
+        # Patch 1: Decorate the generation method
         if original_method_signature in modified_content:
             modified_content = modified_content.replace(original_method_signature, replacement_method_signature_gpu)
             print("Successfully applied GPU decorator to the generation method.")
         else:
+            print("\033[91mError: Could not find the generation method signature to patch.\033[0m")
+            sys.exit(1)
+        # Patch 2: Modify the model loading to allow auto-detection of attention
+        if original_model_block in modified_content:
+            modified_content = modified_content.replace(original_model_block, replacement_model_block_gpu)
+            print("Successfully patched model loading to remove hardcoded Flash Attention.")
+        else:
+            print("\033[91mError: The original model loading block was not found.\033[0m")
             sys.exit(1)
     else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
         replacement_model_lines_cpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
+            '            torch_dtype=torch.float32,',
             '            device_map="cpu",',
             '        )'
         ]
         replacement_model_block_cpu = "\n".join(replacement_model_lines_cpu)
         if original_model_block in modified_content:
             modified_content = modified_content.replace(original_model_block, replacement_model_block_cpu)
         else:
             print("\033[91mError: The original model loading block was not found for CPU patching.\033[0m")
             sys.exit(1)
     demo_script_path.write_text(modified_content)
 except Exception as e:
 # --- 4. Launch the Gradio Demo ---
 model_id = "microsoft/VibeVoice-1.5B"
+command = ["python", str(demo_script_path), "--model_path", model_id, "--share"]
 print(f"Launching Gradio demo with command: {' '.join(command)}")
 subprocess.run(command)