broadfield-dev commited on
Commit
d082ce1
·
verified ·
1 Parent(s): b37fbdb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -41
app.py CHANGED
@@ -4,8 +4,7 @@ import sys
4
  from pathlib import Path
5
 
6
  # --- 0. Hardcoded Toggle for Execution Environment ---
7
- # Set this to True to use Hugging Face ZeroGPU (recommended)
8
- # Set this to False to use the slower, pure CPU environment
9
  USE_ZEROGPU = True
10
 
11
  # --- 1. Clone the VibeVoice Repository ---
@@ -26,8 +25,7 @@ if not os.path.exists(repo_dir):
26
  else:
27
  print("Repository already exists. Skipping clone.")
28
 
29
- # --- 2. Install the VibeVoice Package ---
30
- # Note: Other dependencies are installed via requirements.txt
31
  os.chdir(repo_dir)
32
  print(f"Changed directory to: {os.getcwd()}")
33
 
@@ -51,64 +49,74 @@ print(f"Reading {demo_script_path} to apply environment-specific modifications..
51
  try:
52
  modified_content = demo_script_path.read_text()
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  if USE_ZEROGPU:
55
- print("Configuring for ZeroGPU execution while keeping Flash Attention...")
56
 
57
  # Add 'import spaces' if it's not already there.
58
  if "import spaces" not in modified_content:
59
  modified_content = "import spaces\n" + modified_content
60
 
61
- # Define the generation method signature to add the decorator to.
62
- # We target only the first line for robustness.
63
- original_method_signature = " def generate_podcast_streaming(self,"
 
 
 
 
 
 
64
 
65
- # Define the replacement with the correctly indented decorator.
66
  replacement_method_signature_gpu = " @spaces.GPU(duration=120)\n" + original_method_signature
67
 
68
  # --- Apply Patches for GPU ---
69
- # The only change needed is to add the decorator. We will NOT modify the
70
- # from_pretrained call, leaving attn_implementation="flash_attention_2" in place.
71
  if original_method_signature in modified_content:
72
  modified_content = modified_content.replace(original_method_signature, replacement_method_signature_gpu)
73
  print("Successfully applied GPU decorator to the generation method.")
74
- print("Model loading block remains unchanged to explicitly use Flash Attention.")
75
  else:
76
- print("\033[91mError: Could not find the generation method signature to apply the GPU decorator.\033[0m")
 
 
 
 
 
 
 
 
77
  sys.exit(1)
78
 
79
  else: # Pure CPU execution
80
  print("Modifying for pure CPU execution...")
81
-
82
- # For the CPU path, we still need to replace the entire CUDA-specific block.
83
- original_model_lines = [
84
- ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
85
- ' self.model_path,',
86
- ' torch_dtype=torch.bfloat16,',
87
- " device_map='cuda',",
88
- ' attn_implementation="flash_attention_2",',
89
- ' )'
90
- ]
91
- original_model_block = "\n".join(original_model_lines)
92
-
93
- # New block for CPU: Use float32 and map to CPU.
94
  replacement_model_lines_cpu = [
95
  ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
96
  ' self.model_path,',
97
- ' torch_dtype=torch.float32, # Use float32 for CPU',
98
  ' device_map="cpu",',
99
  ' )'
100
  ]
101
  replacement_model_block_cpu = "\n".join(replacement_model_lines_cpu)
102
-
103
- # Apply patch for CPU
104
  if original_model_block in modified_content:
105
  modified_content = modified_content.replace(original_model_block, replacement_model_block_cpu)
106
- print("Script modified for CPU successfully.")
107
  else:
108
  print("\033[91mError: The original model loading block was not found for CPU patching.\033[0m")
109
  sys.exit(1)
110
 
111
- # Write the dynamically modified content back to the demo file
112
  demo_script_path.write_text(modified_content)
113
 
114
  except Exception as e:
@@ -117,15 +125,6 @@ except Exception as e:
117
 
118
  # --- 4. Launch the Gradio Demo ---
119
  model_id = "microsoft/VibeVoice-1.5B"
120
-
121
- # Construct the command to run the modified demo script
122
- command = [
123
- "python",
124
- str(demo_script_path),
125
- "--model_path",
126
- model_id,
127
- "--share"
128
- ]
129
-
130
  print(f"Launching Gradio demo with command: {' '.join(command)}")
131
  subprocess.run(command)
 
4
  from pathlib import Path
5
 
6
  # --- 0. Hardcoded Toggle for Execution Environment ---
7
+ # Ensure this is set to True to use the GPU
 
8
  USE_ZEROGPU = True
9
 
10
  # --- 1. Clone the VibeVoice Repository ---
 
25
  else:
26
  print("Repository already exists. Skipping clone.")
27
 
28
+ # --- 2. Install Dependencies ---
 
29
  os.chdir(repo_dir)
30
  print(f"Changed directory to: {os.getcwd()}")
31
 
 
49
  try:
50
  modified_content = demo_script_path.read_text()
51
 
52
+ # Define the original model loading block to be replaced.
53
+ original_model_lines = [
54
+ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
55
+ ' self.model_path,',
56
+ ' torch_dtype=torch.bfloat16,',
57
+ " device_map='cuda',",
58
+ ' attn_implementation="flash_attention_2",',
59
+ ' )'
60
+ ]
61
+ original_model_block = "\n".join(original_model_lines)
62
+
63
+ # Define the generation method signature to add the decorator to.
64
+ original_method_signature = " def generate_podcast_streaming(self,"
65
+
66
  if USE_ZEROGPU:
67
+ print("Optimizing for ZeroGPU execution with robust attention...")
68
 
69
  # Add 'import spaces' if it's not already there.
70
  if "import spaces" not in modified_content:
71
  modified_content = "import spaces\n" + modified_content
72
 
73
+ # New block for ZeroGPU model loading: remove `attn_implementation` for auto-detection.
74
+ replacement_model_lines_gpu = [
75
+ ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
76
+ ' self.model_path,',
77
+ ' torch_dtype=torch.bfloat16,',
78
+ " device_map='cuda',",
79
+ ' )'
80
+ ]
81
+ replacement_model_block_gpu = "\n".join(replacement_model_lines_gpu)
82
 
83
+ # Add the @spaces.GPU decorator with correct indentation.
84
  replacement_method_signature_gpu = " @spaces.GPU(duration=120)\n" + original_method_signature
85
 
86
  # --- Apply Patches for GPU ---
87
+
88
+ # Patch 1: Decorate the generation method
89
  if original_method_signature in modified_content:
90
  modified_content = modified_content.replace(original_method_signature, replacement_method_signature_gpu)
91
  print("Successfully applied GPU decorator to the generation method.")
 
92
  else:
93
+ print("\033[91mError: Could not find the generation method signature to patch.\033[0m")
94
+ sys.exit(1)
95
+
96
+ # Patch 2: Modify the model loading to allow auto-detection of attention
97
+ if original_model_block in modified_content:
98
+ modified_content = modified_content.replace(original_model_block, replacement_model_block_gpu)
99
+ print("Successfully patched model loading to remove hardcoded Flash Attention.")
100
+ else:
101
+ print("\033[91mError: The original model loading block was not found.\033[0m")
102
  sys.exit(1)
103
 
104
  else: # Pure CPU execution
105
  print("Modifying for pure CPU execution...")
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  replacement_model_lines_cpu = [
107
  ' self.model = VibeVoiceForConditionalGenerationInference.from_pretrained(',
108
  ' self.model_path,',
109
+ ' torch_dtype=torch.float32,',
110
  ' device_map="cpu",',
111
  ' )'
112
  ]
113
  replacement_model_block_cpu = "\n".join(replacement_model_lines_cpu)
 
 
114
  if original_model_block in modified_content:
115
  modified_content = modified_content.replace(original_model_block, replacement_model_block_cpu)
 
116
  else:
117
  print("\033[91mError: The original model loading block was not found for CPU patching.\033[0m")
118
  sys.exit(1)
119
 
 
120
  demo_script_path.write_text(modified_content)
121
 
122
  except Exception as e:
 
125
 
126
  # --- 4. Launch the Gradio Demo ---
127
  model_id = "microsoft/VibeVoice-1.5B"
128
+ command = ["python", str(demo_script_path), "--model_path", model_id, "--share"]
 
 
 
 
 
 
 
 
 
129
  print(f"Launching Gradio demo with command: {' '.join(command)}")
130
  subprocess.run(command)