Spaces:

borquez1
/

mesafe-voicebox

Sleeping

mesafe-voicebox / backend /build_binary.py

Bot

Sadece backend birakildi, tum medya ve arayuzler silindi

0dfbd72 9 days ago

20.7 kB

	"""
	PyInstaller build script for creating standalone Python server binary.

	Usage:
	python build_binary.py # Build default (CPU) server binary
	python build_binary.py --cuda # Build CUDA-enabled server binary
	"""

	import PyInstaller.__main__
	import argparse
	import logging
	import os
	import platform
	import sys
	from pathlib import Path

	logger = logging.getLogger(__name__)


	def is_apple_silicon():
	"""Check if running on Apple Silicon."""
	return platform.system() == "Darwin" and platform.machine() == "arm64"


	def build_server(cuda=False):
	"""Build Python server as standalone binary.

	Args:
	cuda: If True, build with CUDA support and name the binary
	voicebox-server-cuda instead of voicebox-server.
	"""
	backend_dir = Path(__file__).parent

	binary_name = "voicebox-server-cuda" if cuda else "voicebox-server"

	# PyInstaller arguments
	# CUDA builds use --onedir so we can split the output into two archives:
	# 1. Server core (~200-400MB) — versioned with the app
	# 2. CUDA libs (~2GB) — versioned independently (only redownloaded on
	# CUDA toolkit / torch major version changes)
	# CPU builds remain --onefile for simplicity.
	pack_mode = "--onedir" if cuda else "--onefile"
	args = [
	"server.py", # Use server.py as entry point instead of main.py
	pack_mode,
	"--name",
	binary_name,
	]

	# Hide console window on Windows only. On macOS/Linux the sidecar needs
	# stdout/stderr for Tauri to capture logs.
	if platform.system() == "Windows":
	args.append("--noconsole")

	# numpy 2.x / torch ABI mismatch fix: install memmove fallback for
	# torch.from_numpy() before the app starts. Runtime hooks run after
	# FrozenImporter is registered so frozen torch/numpy are importable.
	# Paths are passed relative to backend_dir because os.chdir(backend_dir)
	# runs before PyInstaller. Absolute paths would get baked into the
	# generated .spec, breaking reproducible builds on other machines / CI.
	args.extend(
	[
	"--runtime-hook",
	"pyi_rth_numpy_compat.py",
	# Stub torch.compiler.disable before transformers imports
	# flex_attention, which otherwise triggers torch._dynamo →
	# torch._numpy._ufuncs and crashes at module load under
	# PyInstaller. See pyi_rth_torch_compiler_disable.py.
	"--runtime-hook",
	"pyi_rth_torch_compiler_disable.py",
	# Per-module collection overrides (e.g. forcing scipy.stats._distn_infrastructure
	# to bundle .py source alongside .pyc so the runtime hook can source-patch it).
	"--additional-hooks-dir",
	"pyi_hooks",
	]
	)

	# Add local qwen_tts path if specified (for editable installs)
	qwen_tts_path = os.getenv("QWEN_TTS_PATH")
	if qwen_tts_path and Path(qwen_tts_path).exists():
	args.extend(["--paths", str(qwen_tts_path)])
	logger.info("Using local qwen_tts source from: %s", qwen_tts_path)

	# Add common hidden imports
	args.extend(
	[
	"--hidden-import",
	"backend",
	"--hidden-import",
	"backend.main",
	"--hidden-import",
	"backend.config",
	"--hidden-import",
	"backend.database",
	"--hidden-import",
	"backend.models",
	"--hidden-import",
	"backend.services.profiles",
	"--hidden-import",
	"backend.services.history",
	"--hidden-import",
	"backend.services.tts",
	"--hidden-import",
	"backend.services.transcribe",
	"--hidden-import",
	"backend.utils.platform_detect",
	"--hidden-import",
	"backend.backends",
	"--hidden-import",
	"backend.backends.pytorch_backend",
	"--hidden-import",
	"backend.backends.qwen_custom_voice_backend",
	"--hidden-import",
	"backend.utils.audio",
	"--hidden-import",
	"backend.utils.cache",
	"--hidden-import",
	"backend.utils.progress",
	"--hidden-import",
	"backend.utils.hf_progress",
	"--hidden-import",
	"backend.services.cuda",
	"--hidden-import",
	"backend.services.effects",
	"--hidden-import",
	"backend.utils.effects",
	"--hidden-import",
	"backend.services.versions",
	"--hidden-import",
	"pedalboard",
	"--hidden-import",
	"chatterbox",
	"--hidden-import",
	"chatterbox.tts_turbo",
	"--hidden-import",
	"chatterbox.mtl_tts",
	"--hidden-import",
	"backend.backends.chatterbox_backend",
	"--hidden-import",
	"backend.backends.chatterbox_turbo_backend",
	# chatterbox multilingual uses spacy_pkuseg for Chinese word
	# segmentation, which ships pickled dict files (dicts/default.pkl)
	# and native .so extensions that --hidden-import alone won't bundle.
	"--collect-all",
	"spacy_pkuseg",
	"--hidden-import",
	"backend.backends.luxtts_backend",
	"--hidden-import",
	"zipvoice",
	"--hidden-import",
	"zipvoice.luxvoice",
	"--collect-all",
	"zipvoice",
	"--collect-all",
	"linacodec",
	"--hidden-import",
	"torch",
	"--hidden-import",
	"transformers",
	"--hidden-import",
	"fastapi",
	"--hidden-import",
	"uvicorn",
	"--hidden-import",
	"sqlalchemy",
	# librosa uses lazy_loader which generates .pyi stub files at
	# install time and reads them at runtime to discover submodules.
	# --hidden-import alone doesn't bundle the stubs, causing
	# "Cannot load imports from non-existent stub" at runtime.
	"--collect-all",
	"lazy_loader",
	"--collect-all",
	"librosa",
	"--hidden-import",
	"soundfile",
	"--hidden-import",
	"qwen_tts",
	"--hidden-import",
	"qwen_tts.inference",
	"--hidden-import",
	"qwen_tts.inference.qwen3_tts_model",
	"--hidden-import",
	"qwen_tts.inference.qwen3_tts_tokenizer",
	"--hidden-import",
	"qwen_tts.core",
	"--hidden-import",
	"qwen_tts.cli",
	"--copy-metadata",
	"qwen-tts",
	"--copy-metadata",
	"requests",
	"--copy-metadata",
	"transformers",
	"--copy-metadata",
	"huggingface-hub",
	"--copy-metadata",
	"tokenizers",
	"--copy-metadata",
	"safetensors",
	"--copy-metadata",
	"tqdm",
	"--hidden-import",
	"requests",
	# qwen_tts uses inspect.getsource() at runtime to locate
	# modeling_qwen3_tts.py — needs physical .py source files bundled
	"--collect-all",
	"qwen_tts",
	# Fix for pkg_resources and jaraco namespace packages
	"--hidden-import",
	"pkg_resources.extern",
	"--collect-submodules",
	"jaraco",
	# inflect uses typeguard @typechecked which calls inspect.getsource()
	# at import time — needs .py source files, not just .pyc bytecode
	"--collect-all",
	"inflect",
	# perth ships pretrained watermark model files (hparams.yaml, .pth.tar)
	# in perth/perth_net/pretrained/ — needed by chatterbox at runtime
	"--collect-all",
	"perth",
	# piper_phonemize ships espeak-ng-data/ (phoneme tables, language dicts)
	# needed by LuxTTS for text-to-phoneme conversion
	"--collect-all",
	"piper_phonemize",
	# HumeAI TADA — speech-language model using Llama + flow matching
	"--hidden-import",
	"backend.backends.hume_backend",
	"--hidden-import",
	"tada",
	"--hidden-import",
	"tada.modules",
	"--hidden-import",
	"tada.modules.tada",
	"--hidden-import",
	"tada.modules.encoder",
	"--hidden-import",
	"tada.modules.decoder",
	"--hidden-import",
	"tada.modules.aligner",
	"--hidden-import",
	"tada.modules.acoustic_spkr_verf",
	"--hidden-import",
	"tada.nn",
	"--hidden-import",
	"tada.nn.vibevoice",
	"--hidden-import",
	"tada.utils",
	"--hidden-import",
	"tada.utils.gray_code",
	"--hidden-import",
	"tada.utils.text",
	# DAC shim — provides dac.nn.layers.Snake1d without the real
	# descript-audio-codec package (which pulls onnx/tensorboard via
	# descript-audiotools). The shim is in backend/utils/dac_shim.py.
	"--hidden-import",
	"backend.utils.dac_shim",
	"--hidden-import",
	"torchaudio",
	"--collect-submodules",
	"tada",
	# Kokoro 82M — lightweight TTS engine using misaki G2P
	# collect-all is required because transformers introspects .py source
	# files at runtime (e.g. _can_set_attn_implementation opens the class
	# file); hidden-import alone only bundles bytecode.
	"--hidden-import",
	"backend.backends.kokoro_backend",
	"--collect-all",
	"kokoro",
	# misaki ships G2P data files (dictionaries, phoneme tables)
	# that must be bundled for espeak/en/ja/zh G2P to work
	"--collect-all",
	"misaki",
	# language_tags ships JSON data files (index.json etc.) loaded at
	# runtime via: misaki → phonemizer → segments → csvw → language_tags
	"--collect-all",
	"language_tags",
	# espeakng_loader ships the entire espeak-ng-data directory (369 files)
	# loaded at import time by misaki.espeak via get_data_path()
	"--collect-all",
	"espeakng_loader",
	# spacy en_core_web_sm model — misaki.en tries to spacy.cli.download()
	# at runtime if not found, which calls pip as a subprocess and crashes
	# the frozen binary. Bundle the model so spacy.util.is_package() passes.
	"--collect-all",
	"en_core_web_sm",
	"--copy-metadata",
	"en_core_web_sm",
	"--hidden-import",
	"en_core_web_sm",
	# unidic-lite ships the MeCab dictionary used by fugashi (pulled in
	# by misaki[ja]). The dict lives in unidic_lite/dicdir/ and is
	# discovered via the package's DICDIR constant, so the data files
	# must be collected or Japanese Kokoro voices crash at runtime.
	"--collect-all",
	"unidic_lite",
	"--hidden-import",
	"loguru",
	# MCP server — Streamable-HTTP endpoint and the 4 voicebox.* tools.
	# FastMCP pulls in a chain of deps (mcp, cyclopts, openapi-pydantic,
	# etc.) that don't auto-discover cleanly under PyInstaller, so we
	# collect them whole. Small compared to torch.
	"--hidden-import",
	"backend.mcp_server",
	"--hidden-import",
	"backend.mcp_server.server",
	"--hidden-import",
	"backend.mcp_server.tools",
	"--hidden-import",
	"backend.mcp_server.context",
	"--hidden-import",
	"backend.mcp_server.resolve",
	"--hidden-import",
	"backend.mcp_server.events",
	"--collect-all",
	"fastmcp",
	"--collect-all",
	"mcp",
	"--hidden-import",
	"sse_starlette",
	]
	)

	# Add CUDA-specific hidden imports
	if cuda:
	logger.info("Building with CUDA support")
	args.extend(
	[
	"--hidden-import",
	"torch.cuda",
	"--hidden-import",
	"torch.backends.cudnn",
	]
	)
	else:
	# Exclude NVIDIA CUDA packages from CPU-only builds to keep binary small.
	# When building from a venv with CUDA torch installed, PyInstaller would
	# bundle ~3GB of NVIDIA shared libraries. We exclude both the Python
	# modules and the binary DLLs.
	nvidia_packages = [
	"nvidia",
	"nvidia.cublas",
	"nvidia.cuda_cupti",
	"nvidia.cuda_nvrtc",
	"nvidia.cuda_runtime",
	"nvidia.cudnn",
	"nvidia.cufft",
	"nvidia.curand",
	"nvidia.cusolver",
	"nvidia.cusparse",
	"nvidia.nccl",
	"nvidia.nvjitlink",
	"nvidia.nvtx",
	]
	for pkg in nvidia_packages:
	args.extend(["--exclude-module", pkg])

	# Add MLX-specific imports if building on Apple Silicon (never for CUDA builds)
	if is_apple_silicon() and not cuda:
	logger.info("Building for Apple Silicon - including MLX dependencies")
	args.extend(
	[
	"--hidden-import",
	"backend.backends.mlx_backend",
	"--hidden-import",
	"mlx",
	"--hidden-import",
	"mlx.core",
	"--hidden-import",
	"mlx.nn",
	"--hidden-import",
	"mlx_audio",
	"--hidden-import",
	"mlx_audio.tts",
	"--hidden-import",
	"mlx_audio.stt",
	"--hidden-import",
	"mlx_lm",
	"--hidden-import",
	"backend.backends.qwen_llm_backend",
	"--collect-submodules",
	"mlx",
	"--collect-submodules",
	"mlx_audio",
	"--collect-submodules",
	"mlx_lm",
	# Use --collect-all so PyInstaller bundles both data files AND
	# native shared libraries (.dylib, .metallib) for MLX.
	# Previously only --collect-data was used, which caused MLX to
	# raise OSError at runtime inside the bundled binary because
	# the Metal shader libraries were missing.
	"--collect-all",
	"mlx",
	"--collect-all",
	"mlx_audio",
	# mlx_lm ships chat_templates/ JSON files and loads tool_parsers
	# submodules dynamically via importlib at tokenizer load time,
	# which --hidden-import alone can't resolve.
	"--collect-all",
	"mlx_lm",
	]
	)
	elif not cuda:
	logger.info("Building for non-Apple Silicon platform - PyTorch only")

	dist_dir = str(backend_dir / "dist")
	build_dir = str(backend_dir / "build")

	args.extend(
	[
	"--distpath",
	dist_dir,
	"--workpath",
	build_dir,
	"--noconfirm",
	"--clean",
	]
	)

	# Change to backend directory
	os.chdir(backend_dir)

	# For CPU builds on Windows, ensure we're using CPU-only torch.
	# If CUDA torch is installed (local dev), swap to CPU torch before building,
	# then restore CUDA torch after. This prevents PyInstaller from bundling
	# ~3GB of CUDA DLLs into the CPU binary.
	restore_cuda = False
	if not cuda and platform.system() == "Windows":
	import subprocess

	result = subprocess.run(
	[sys.executable, "-c", "import torch; print(torch.version.cuda or '')"], capture_output=True, text=True
	)
	has_cuda_torch = bool(result.stdout.strip())
	if has_cuda_torch:
	logger.info("CUDA torch detected — installing CPU torch for CPU build...")
	subprocess.run(
	[
	sys.executable,
	"-m",
	"pip",
	"install",
	"torch",
	"torchvision",
	"torchaudio",
	"--index-url",
	"https://download.pytorch.org/whl/cpu",
	"--force-reinstall",
	"-q",
	],
	check=True,
	)
	restore_cuda = True

	# Run PyInstaller
	try:
	PyInstaller.__main__.run(args)
	finally:
	# Restore CUDA torch if we swapped it out (even on build failure)
	if restore_cuda:
	logger.info("Restoring CUDA torch...")
	import subprocess

	subprocess.run(
	[
	sys.executable,
	"-m",
	"pip",
	"install",
	"torch",
	"torchvision",
	"torchaudio",
	"--index-url",
	"https://download.pytorch.org/whl/cu128",
	"--force-reinstall",
	"-q",
	],
	check=True,
	)

	logger.info("Binary built in %s", backend_dir / "dist" / binary_name)


	def build_shim():
	"""Build the voicebox-mcp stdio shim as a tiny standalone binary.

	This is the bridge for MCP clients that only speak stdio — it proxies
	JSON-RPC to the main voicebox-server's /mcp endpoint. Keep it small: no
	torch, no ML deps, just httpx + asyncio.
	"""
	backend_dir = Path(__file__).parent

	args = [
	"mcp_shim/__main__.py",
	"--onefile",
	"--name",
	"voicebox-mcp",
	# Stdio-only — no console hiding needed on Windows since the parent
	# MCP client is spawning this as a child process and wants stdio.
	"--hidden-import",
	"backend.mcp_shim",
	"--hidden-import",
	"backend.mcp_shim.__main__",
	"--hidden-import",
	"httpx",
	"--hidden-import",
	"httpx._transports.default",
	"--hidden-import",
	"anyio",
	# Exclude everything heavy that httpx/asyncio don't actually need so
	# the binary stays tiny (~15 MB instead of ~400 MB).
	"--exclude-module",
	"torch",
	"--exclude-module",
	"transformers",
	"--exclude-module",
	"mlx",
	"--exclude-module",
	"mlx_audio",
	"--exclude-module",
	"mlx_lm",
	"--exclude-module",
	"qwen_tts",
	"--exclude-module",
	"chatterbox",
	"--exclude-module",
	"zipvoice",
	"--exclude-module",
	"tada",
	"--exclude-module",
	"kokoro",
	"--exclude-module",
	"misaki",
	"--exclude-module",
	"spacy",
	"--exclude-module",
	"librosa",
	"--exclude-module",
	"numba",
	"--exclude-module",
	"numpy",
	"--exclude-module",
	"pedalboard",
	"--exclude-module",
	"fastapi",
	"--exclude-module",
	"uvicorn",
	"--exclude-module",
	"sqlalchemy",
	"--exclude-module",
	"fastmcp",
	"--exclude-module",
	"mcp",
	]

	dist_dir = str(backend_dir / "dist")
	build_dir = str(backend_dir / "build")
	args.extend(
	[
	"--distpath",
	dist_dir,
	"--workpath",
	build_dir,
	"--noconfirm",
	"--clean",
	]
	)

	os.chdir(backend_dir)
	PyInstaller.__main__.run(args)
	logger.info("Shim built: %s", backend_dir / "dist" / "voicebox-mcp")


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(description="Build voicebox binaries")
	parser.add_argument(
	"--cuda",
	action="store_true",
	help="Build CUDA-enabled binary (voicebox-server-cuda)",
	)
	parser.add_argument(
	"--shim",
	action="store_true",
	help="Build the voicebox-mcp stdio shim binary instead of the server",
	)
	cli_args = parser.parse_args()
	if cli_args.shim:
	build_shim()
	else:
	build_server(cuda=cli_args.cuda)