Spaces:

borquez1
/

mesafe-voicebox

Running

mesafe-voicebox / scripts /package_cuda.py

Bot

Sadece backend birakildi, tum medya ve arayuzler silindi

0dfbd72 2 days ago

8.09 kB

	"""
	Package the PyInstaller --onedir CUDA build into two archives.

	Takes the PyInstaller --onedir output directory and splits it into:
	1. voicebox-server-cuda.tar.gz — server core (exe + non-NVIDIA deps)
	2. cuda-libs-cu128.tar.gz — NVIDIA runtime libraries only
	3. cuda-libs.json — version manifest for the CUDA libs

	Usage:
	python scripts/package_cuda.py backend/dist/voicebox-server-cuda/
	python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/
	python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu128-v1
	"""

	import argparse
	import hashlib
	import json
	import sys
	import tarfile
	from pathlib import Path

	# DLL name prefixes that identify NVIDIA CUDA runtime libraries.
	# These DLLs may appear in different locations depending on the torch
	# and PyInstaller version:
	# - nvidia/ subdirectories (older torch with separate nvidia-* packages)
	# - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly)
	# - Top-level directory (some PyInstaller versions)
	NVIDIA_DLL_PREFIXES = (
	"cublas",
	"cublaslt",
	"cudart",
	"cudnn",
	"cufft",
	"cufftw",
	"curand",
	"cusolver",
	"cusolvermg",
	"cusparse",
	"nvjitlink",
	"nvrtc",
	"nccl",
	"caffe2_nvrtc",
	)

	# Files to keep in the server core even if they match NVIDIA prefixes.
	# These are small Python modules or stubs, not the large runtime DLLs.
	NVIDIA_KEEP_IN_CORE = {
	"torch/cuda/nccl.py",
	"torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py",
	}


	def is_nvidia_file(rel_path: str) -> bool:
	"""Check if a relative path belongs to the NVIDIA CUDA libs.

	Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where
	PyInstaller placed them. Excludes small Python stubs that happen to
	share NVIDIA-related names.
	"""
	rel_lower = rel_path.lower().replace("\\", "/")

	# Never split out Python source files or small stubs
	if rel_lower in NVIDIA_KEEP_IN_CORE:
	return False

	# Files under nvidia/ subdirectory tree (older torch layout)
	if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower:
	# Only DLLs/shared objects — not .py, .dist-info, etc.
	if rel_lower.endswith((".dll", ".so")):
	return True
	# Include entire nvidia/ namespace package tree
	for part in rel_lower.split("/"):
	if part == "nvidia":
	return True

	# NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll)
	name = rel_lower.rsplit("/", 1)[-1]
	if name.endswith(".dll") or name.endswith(".so"):
	name_no_ext = name.rsplit(".", 1)[0]
	for prefix in NVIDIA_DLL_PREFIXES:
	if name_no_ext.startswith(prefix):
	return True

	return False


	def sha256_file(path: Path) -> str:
	"""Compute SHA-256 hex digest of a file."""
	h = hashlib.sha256()
	with open(path, "rb") as f:
	while True:
	chunk = f.read(1024 * 1024)
	if not chunk:
	break
	h.update(chunk)
	return h.hexdigest()


	def package(
	onedir_path: Path,
	output_dir: Path,
	cuda_libs_version: str,
	torch_compat: str,
	):
	output_dir.mkdir(parents=True, exist_ok=True)

	# Collect all files in the onedir output, split into core vs nvidia
	core_files = []
	nvidia_files = []

	for item in sorted(onedir_path.rglob("*")):
	if item.is_dir():
	continue
	rel = item.relative_to(onedir_path)
	rel_str = str(rel)
	if is_nvidia_file(rel_str):
	nvidia_files.append((rel_str, item))
	else:
	core_files.append((rel_str, item))

	core_size = sum(f.stat().st_size for _, f in core_files)
	nvidia_size = sum(f.stat().st_size for _, f in nvidia_files)

	print(f"Input directory: {onedir_path}")
	print(f"Core files: {len(core_files)} ({core_size / (1024**2):.1f} MB)")
	print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)")

	if not nvidia_files:
	print(
	f"ERROR: No NVIDIA files found in {onedir_path}. "
	"Refusing to create an empty CUDA libs archive.",
	file=sys.stderr,
	)
	print(
	"Make sure you built with --cuda and the NVIDIA packages are present.",
	file=sys.stderr,
	)
	sys.exit(1)

	# Create server core archive
	# Files are stored relative to the archive root (no parent directory prefix)
	# so extracting to backends/cuda/ puts everything at the right level.
	server_archive = output_dir / "voicebox-server-cuda.tar.gz"
	print(f"\nCreating server core archive: {server_archive.name}")
	with tarfile.open(server_archive, "w:gz") as tar:
	for rel_str, full_path in core_files:
	tar.add(full_path, arcname=rel_str)
	server_sha = sha256_file(server_archive)
	(output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text(
	f"{server_sha} voicebox-server-cuda.tar.gz\n"
	)
	print(f" Size: {server_archive.stat().st_size / (1024**2):.1f} MB")
	print(f" SHA-256: {server_sha[:16]}...")

	# Create CUDA libs archive
	cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz"
	print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}")
	with tarfile.open(cuda_libs_archive, "w:gz") as tar:
	for rel_str, full_path in nvidia_files:
	tar.add(full_path, arcname=rel_str)
	cuda_sha = sha256_file(cuda_libs_archive)
	(output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text(
	f"{cuda_sha} cuda-libs-{cuda_libs_version}.tar.gz\n"
	)
	print(f" Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB")
	print(f" SHA-256: {cuda_sha[:16]}...")

	# Write cuda-libs.json manifest
	manifest = {
	"version": cuda_libs_version,
	"torch_compat": torch_compat,
	"archive": cuda_libs_archive.name,
	"sha256": cuda_sha,
	}
	manifest_path = output_dir / "cuda-libs.json"
	manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
	print(f"\nManifest: {manifest_path.name}")
	print(json.dumps(manifest, indent=2))

	# Summary
	total_input = core_size + nvidia_size
	total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size
	print(f"\nTotal input: {total_input / (1024**3):.2f} GB")
	print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)")
	print(
	f"Server core: {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)"
	)
	print(
	f"CUDA libs: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)"
	)


	def main():
	parser = argparse.ArgumentParser(
	description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives"
	)
	parser.add_argument(
	"input",
	type=Path,
	help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)",
	)
	parser.add_argument(
	"--output",
	type=Path,
	default=None,
	help="Output directory for archives (default: same as input parent)",
	)
	parser.add_argument(
	"--cuda-libs-version",
	type=str,
	default="cu128-v1",
	help="Version string for the CUDA libs archive (default: cu128-v1)",
	)
	parser.add_argument(
	"--torch-compat",
	type=str,
	default=">=2.7.0,<2.11.0",
	help="Torch version compatibility range (default: >=2.6.0,<2.11.0)",
	)
	args = parser.parse_args()

	if not args.input.is_dir():
	print(f"Error: {args.input} is not a directory", file=sys.stderr)
	print("Expected a PyInstaller --onedir output directory.", file=sys.stderr)
	sys.exit(1)

	output_dir = args.output or args.input.parent
	package(args.input, output_dir, args.cuda_libs_version, args.torch_compat)


	if __name__ == "__main__":
	main()