VibeVoice-demo-dev

Paused

App Files Files Community

broadfield-dev commited on Aug 26, 2025

Commit

e59066e

verified ·

1 Parent(s): a549db3

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -11

app.py CHANGED Viewed

@@ -4,8 +4,8 @@ import sys
 from pathlib import Path
 # --- 0. Hardcoded Toggle for Execution Environment ---
-# Ensure this is set to True to use the GPU
-USE_ZEROGPU = False
 # --- 1. Clone the VibeVoice Repository ---
 repo_dir = "VibeVoice"
@@ -29,6 +29,18 @@ else:
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
 print("Installing the VibeVoice package in editable mode...")
 try:
     subprocess.run(
@@ -42,7 +54,7 @@ except subprocess.CalledProcessError as e:
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
-# --- 3. Modify the demo script to be environment-aware ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Reading {demo_script_path} to apply environment-specific modifications...")
@@ -64,18 +76,19 @@ try:
     original_method_signature = "    def generate_podcast_streaming(self,"
     if USE_ZEROGPU:
-        print("Optimizing for ZeroGPU execution with robust attention...")
-        # Add 'import spaces' if it's not already there.
         if "import spaces" not in modified_content:
             modified_content = "import spaces\n" + modified_content
-        # New block for ZeroGPU model loading: remove `attn_implementation` for auto-detection.
         replacement_model_lines_gpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
-            '            torch_dtype=torch.bfloat16,',
-            "            device_map='cuda',",
             '        )'
         ]
         replacement_model_block_gpu = "\n".join(replacement_model_lines_gpu)
@@ -93,15 +106,16 @@ try:
             print("\033[91mError: Could not find the generation method signature to patch.\033[0m")
             sys.exit(1)
-        # Patch 2: Modify the model loading to allow auto-detection of attention
         if original_model_block in modified_content:
             modified_content = modified_content.replace(original_model_block, replacement_model_block_gpu)
-            print("Successfully patched model loading to remove hardcoded Flash Attention.")
         else:
             print("\033[91mError: The original model loading block was not found.\033[0m")
             sys.exit(1)
-    else: # Pure CPU execution
         print("Modifying for pure CPU execution...")
         replacement_model_lines_cpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',

 from pathlib import Path
 # --- 0. Hardcoded Toggle for Execution Environment ---
+# Ensure this is set to True to use the GPU with quantization
+USE_ZEROGPU = True
 # --- 1. Clone the VibeVoice Repository ---
 repo_dir = "VibeVoice"
 os.chdir(repo_dir)
 print(f"Changed directory to: {os.getcwd()}")
+# Install bitsandbytes for quantization to reduce memory usage
+print("Installing bitsandbytes for quantization...")
+try:
+    subprocess.run(
+        [sys.executable, "-m", "pip", "install", "bitsandbytes"],
+        check=True, capture_output=True, text=True
+    )
+    print("bitsandbytes installed successfully.")
+except subprocess.CalledProcessError as e:
+    print(f"Error installing bitsandbytes: {e.stderr}")
+    sys.exit(1)
 print("Installing the VibeVoice package in editable mode...")
 try:
     subprocess.run(
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
+# --- 3. Modify the demo script for a memory-constrained environment ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Reading {demo_script_path} to apply environment-specific modifications...")
     original_method_signature = "    def generate_podcast_streaming(self,"
     if USE_ZEROGPU:
+        print("Optimizing for ZeroGPU with 8-bit quantization...")
+        # Add necessary imports if they are not already there.
         if "import spaces" not in modified_content:
             modified_content = "import spaces\n" + modified_content
+        # New block for ZeroGPU with 8-bit quantization.
+        # This is the key change to solve the memory issue.
         replacement_model_lines_gpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
             '            self.model_path,',
+            '            load_in_8bit=True,',
+            '            device_map="auto",',
             '        )'
         ]
         replacement_model_block_gpu = "\n".join(replacement_model_lines_gpu)
             print("\033[91mError: Could not find the generation method signature to patch.\033[0m")
             sys.exit(1)
+        # Patch 2: Modify the model loading to use 8-bit quantization
         if original_model_block in modified_content:
             modified_content = modified_content.replace(original_model_block, replacement_model_block_gpu)
+            print("Successfully patched model loading for 8-bit quantization.")
         else:
             print("\033[91mError: The original model loading block was not found.\033[0m")
             sys.exit(1)
+    else: # Pure CPU execution (not recommended on ZeroGPU hardware)
+        # This block is unlikely to be used but kept for completeness
         print("Modifying for pure CPU execution...")
         replacement_model_lines_cpu = [
             '        self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',