|
|
import os |
|
|
import subprocess |
|
|
import sys |
|
|
from pathlib import Path |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
USE_ZEROGPU = True |
|
|
|
|
|
|
|
|
repo_dir = "VibeVoice" |
|
|
if not os.path.exists(repo_dir): |
|
|
print("Cloning the VibeVoice repository...") |
|
|
try: |
|
|
subprocess.run( |
|
|
["git", "clone", "https://github.com/microsoft/VibeVoice.git"], |
|
|
check=True, |
|
|
capture_output=True, |
|
|
text=True |
|
|
) |
|
|
print("Repository cloned successfully.") |
|
|
except subprocess.CalledProcessError as e: |
|
|
print(f"Error cloning repository: {e.stderr}") |
|
|
sys.exit(1) |
|
|
else: |
|
|
print("Repository already exists. Skipping clone.") |
|
|
|
|
|
|
|
|
os.chdir(repo_dir) |
|
|
print(f"Changed directory to: {os.getcwd()}") |
|
|
|
|
|
|
|
|
print("Installing the VibeVoice package...") |
|
|
try: |
|
|
subprocess.run( |
|
|
[sys.executable, "-m", "pip", "install", "-e", "."], |
|
|
check=True, |
|
|
capture_output=True, |
|
|
text=True |
|
|
) |
|
|
print("Package installed successfully.") |
|
|
except subprocess.CalledProcessError as e: |
|
|
print(f"Error installing package: {e.stderr}") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
if USE_ZEROGPU: |
|
|
print("Installing the 'spaces' library for ZeroGPU...") |
|
|
try: |
|
|
subprocess.run( |
|
|
[sys.executable, "-m", "pip", "install", "spaces"], |
|
|
check=True, |
|
|
capture_output=True, |
|
|
text=True |
|
|
) |
|
|
print("'spaces' library installed successfully.") |
|
|
except subprocess.CalledProcessError as e: |
|
|
print(f"Error installing 'spaces' library: {e.stderr}") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
demo_script_path = Path("demo/gradio_demo.py") |
|
|
print(f"Reading {demo_script_path}...") |
|
|
|
|
|
try: |
|
|
file_content = demo_script_path.read_text() |
|
|
|
|
|
|
|
|
|
|
|
original_block = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( |
|
|
self.model_path, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map='cuda', |
|
|
attn_implementation="flash_attention_2", |
|
|
)""" |
|
|
|
|
|
if USE_ZEROGPU: |
|
|
print("Optimizing for ZeroGPU execution...") |
|
|
|
|
|
|
|
|
|
|
|
replacement_block_gpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( |
|
|
self.model_path, |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map='cuda', |
|
|
)""" |
|
|
|
|
|
|
|
|
modified_content = "import spaces\n" + file_content |
|
|
|
|
|
|
|
|
modified_content = modified_content.replace( |
|
|
"class VibeVoiceGradioInterface:", |
|
|
"@spaces.GPU(duration=120)\nclass VibeVoiceGradioInterface:" |
|
|
) |
|
|
|
|
|
|
|
|
modified_content = modified_content.replace(original_block, replacement_block_gpu) |
|
|
print("Script modified for ZeroGPU successfully.") |
|
|
|
|
|
else: |
|
|
print("Modifying for pure CPU execution...") |
|
|
|
|
|
|
|
|
|
|
|
replacement_block_cpu = """ self.model = VibeVoiceForConditionalGenerationInference.from_pretrained( |
|
|
self.model_path, |
|
|
torch_dtype=torch.float32, # Use float32 for CPU |
|
|
device_map="cpu", |
|
|
)""" |
|
|
|
|
|
|
|
|
modified_content = file_content.replace(original_block, replacement_block_cpu) |
|
|
print("Script modified for CPU successfully.") |
|
|
|
|
|
|
|
|
demo_script_path.write_text(modified_content) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"An error occurred while modifying the script: {e}") |
|
|
sys.exit(1) |
|
|
|
|
|
|
|
|
model_id = "microsoft/VibeVoice-1.5B" |
|
|
|
|
|
|
|
|
command = [ |
|
|
"python", |
|
|
str(demo_script_path), |
|
|
"--model_path", |
|
|
model_id, |
|
|
"--share" |
|
|
] |
|
|
|
|
|
print(f"Launching Gradio demo with command: {' '.join(command)}") |
|
|
subprocess.run(command) |