VibeVoice-demo-dev

Paused

App Files Files Community

broadfield-dev commited on Aug 26

Commit

2877f43

verified ·

1 Parent(s): 138e306

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -44

app.py CHANGED Viewed

@@ -34,35 +34,15 @@ except subprocess.CalledProcessError as e:
     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
-# --- 3. Refactor the demo script for ZeroGPU compatibility ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Refactoring {demo_script_path} for ZeroGPU lazy loading...")
 try:
-    modified_content = demo_script_path.read_text()
-    # --- Add necessary imports ---
-    if "import spaces" not in modified_content:
-        modified_content = "import spaces\n" + modified_content
-    # --- Patch 1: Prevent model loading at startup ---
-    # Comment out self.load_model() in __init__ to avoid loading on the main CPU process.
-    original_init_line = "        self.load_model()"
-    replacement_init_line = "        # self.load_model() # Patched: Defer model loading\n        self.model = None\n        self.processor = None"
-    if original_init_line in modified_content:
-        modified_content = modified_content.replace(original_init_line, replacement_init_line)
-        print("Successfully patched __init__ to prevent model loading on startup.")
-    else:
-        print(f"\033[91mError: Could not find '{original_init_line}' to patch.\033[0m")
-        sys.exit(1)
-    # --- Patch 2: Move model loading inside the generation function and add decorator ---
-    # This ensures the model is loaded "just-in-time" on the GPU worker with proper precision.
-    original_method_signature = "    def generate_podcast_streaming(self,"
-    # Define the model loading code to be inserted.
-    # We use torch.bfloat16 for a balance of performance and quality.
     lazy_load_code = """
         # Patched: Lazy-load model and processor on the GPU worker
         if self.model is None or self.processor is None:
@@ -83,28 +63,42 @@ try:
             print("Model and processor loaded successfully on GPU worker.")
 """
-    # We need to find the full method signature to insert code into it.
-    full_method_signature_line = None
-    for line in modified_content.splitlines():
-        if "def generate_podcast_streaming" in line:
-            full_method_signature_line = line.strip()
-            break
-    if full_method_signature_line:
-        # We find the end of the method signature to insert our code block.
-        target_to_replace = full_method_signature_line + "\n"
-        replacement_block = (
-            "    @spaces.GPU(duration=120)\n" +
-            "    " + full_method_signature_line + "\n" +
-            lazy_load_code
-        )
-        modified_content = modified_content.replace(target_to_replace, replacement_block, 1)
-        print("Successfully refactored generation method for lazy loading on GPU.")
-    else:
-        print(f"\033[91mError: Could not find full method signature for 'generate_podcast_streaming' to patch.\033[0m")
         sys.exit(1)
-    demo_script_path.write_text(modified_content)
     print("Script patching complete.")
 except Exception as e:

     print(f"Error installing package: {e.stderr}")
     sys.exit(1)
+# --- 3. Refactor the demo script using a robust line-by-line patch ---
 demo_script_path = Path("demo/gradio_demo.py")
 print(f"Refactoring {demo_script_path} for ZeroGPU lazy loading...")
 try:
+    with open(demo_script_path, 'r') as f:
+        lines = f.readlines()
+    # --- Prepare the code blocks to be inserted ---
     lazy_load_code = """
         # Patched: Lazy-load model and processor on the GPU worker
         if self.model is None or self.processor is None:
             print("Model and processor loaded successfully on GPU worker.")
 """
+    # --- Perform the line-by-line modifications ---
+    new_lines = []
+    # Add 'import spaces' at the top if it doesn't exist
+    if not any("import spaces" in line for line in lines):
+        new_lines.append("import spaces\n")
+    patched = False
+    for line in lines:
+        # Defer the initial model loading to prevent PicklingError
+        if "self.load_model()" in line and "def __init__" in "".join(lines[lines.index(line)-2:lines.index(line)]):
+            new_lines.append("        # self.load_model() # Patched: Defer model loading\n")
+            new_lines.append("        self.model = None\n")
+            new_lines.append("        self.processor = None\n")
+            print("Successfully patched __init__ to prevent startup model load.")
+        # Find the generation method to add the decorator and lazy-loading logic
+        elif "def generate_podcast_streaming(self," in line:
+            new_lines.append("    @spaces.GPU(duration=120)\n")
+            new_lines.append(line)
+        elif "-> Iterator[tuple]:" in line and "generate_podcast_streaming" in new_lines[-1]:
+            new_lines.append(line)
+            # Indent the lazy load code correctly
+            for code_line in lazy_load_code.strip().split('\n'):
+                new_lines.append(' ' * 8 + code_line + '\n')
+            patched = True
+            print("Successfully patched generation method for lazy loading.")
+        else:
+            new_lines.append(line)
+    if not patched:
+        print("\033[91mError: Failed to apply the lazy-loading patch. The target method signature may have changed.\033[0m")
         sys.exit(1)
+    # --- Write the modified content back to the file ---
+    with open(demo_script_path, 'w') as f:
+        f.writelines(new_lines)
     print("Script patching complete.")
 except Exception as e: