File size: 4,633 Bytes
0a81958 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 | #!/usr/bin/env python3
"""
VibeVoice vLLM ASR Server Launcher
One-click deployment script that handles:
1. Installing system dependencies (FFmpeg, etc.)
2. Installing VibeVoice Python package
3. Downloading model from HuggingFace
4. Generating tokenizer files
5. Starting vLLM server
Usage:
python3 start_server.py [--model MODEL_ID] [--port PORT]
"""
import argparse
import os
import subprocess
import sys
def run_command(cmd: list[str], description: str, shell: bool = False) -> None:
"""Run a command with logging."""
print(f"\n{'='*60}")
print(f" {description}")
print(f"{'='*60}\n")
if shell:
subprocess.run(cmd, shell=True, check=True)
else:
subprocess.run(cmd, check=True)
def install_system_deps() -> None:
"""Install system dependencies (FFmpeg, etc.)."""
run_command(["apt-get", "update"], "Updating package list")
run_command(
["apt-get", "install", "-y", "ffmpeg", "libsndfile1"],
"Installing FFmpeg and audio libraries"
)
def install_vibevoice() -> None:
"""Install VibeVoice Python package."""
run_command(
[sys.executable, "-m", "pip", "install", "-e", "/app[vllm]"],
"Installing VibeVoice with vLLM support"
)
def download_model(model_id: str) -> str:
"""Download model from HuggingFace using default cache."""
print(f"\n{'='*60}")
print(f" Downloading model: {model_id}")
print(f"{'='*60}\n")
import warnings
from huggingface_hub import snapshot_download
# Suppress deprecation warnings from huggingface_hub
with warnings.catch_warnings():
warnings.simplefilter("ignore")
model_path = snapshot_download(model_id)
print(f"\n{'='*60}")
print(f" ✅ Model downloaded successfully!")
print(f" 📁 Path: {model_path}")
print(f"{'='*60}\n")
return model_path
def generate_tokenizer(model_path: str) -> None:
"""Generate tokenizer files for the model."""
run_command(
[sys.executable, "-m", "vllm_plugin.tools.generate_tokenizer_files",
"--output", model_path],
"Generating tokenizer files"
)
def start_vllm_server(model_path: str, port: int) -> None:
"""Start vLLM server (replaces current process)."""
print(f"\n{'='*60}")
print(f" Starting vLLM server on port {port}")
print(f"{'='*60}\n")
vllm_cmd = [
"vllm", "serve", model_path,
"--served-model-name", "vibevoice",
"--trust-remote-code",
"--dtype", "bfloat16",
"--max-num-seqs", "64",
"--max-model-len", "65536",
# "--max-num-batched-tokens", "32768",
"--gpu-memory-utilization", "0.8",
# "--enforce-eager",
"--no-enable-prefix-caching",
"--enable-chunked-prefill",
"--chat-template-content-format", "openai",
"--tensor-parallel-size", "1",
"--allowed-local-media-path", "/app",
"--port", str(port),
]
os.execvp("vllm", vllm_cmd)
def main():
parser = argparse.ArgumentParser(
description="VibeVoice vLLM ASR Server - One-Click Deployment",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
# Start with default settings
python3 start_server.py
# Use custom port
python3 start_server.py --port 8080
# Skip dependency installation (if already installed)
python3 start_server.py --skip-deps
"""
)
parser.add_argument(
"--model", "-m",
default="microsoft/VibeVoice-ASR",
help="HuggingFace model ID (default: microsoft/VibeVoice-ASR)"
)
parser.add_argument(
"--port", "-p",
type=int,
default=8000,
help="Server port (default: 8000)"
)
parser.add_argument(
"--skip-deps",
action="store_true",
help="Skip installing system dependencies"
)
parser.add_argument(
"--skip-tokenizer",
action="store_true",
help="Skip generating tokenizer files"
)
args = parser.parse_args()
print("\n" + "="*60)
print(" VibeVoice vLLM ASR Server - One-Click Deployment")
print("="*60)
# Step 1: Install system dependencies
if not args.skip_deps:
install_system_deps()
# Step 2: Install VibeVoice
install_vibevoice()
# Step 3: Download model
model_path = download_model(args.model)
# Step 4: Generate tokenizer files
if not args.skip_tokenizer:
generate_tokenizer(model_path)
# Step 5: Start vLLM server
start_vllm_server(model_path, args.port)
if __name__ == "__main__":
main()
|