Spaces:
Running
Running
| """ | |
| Package the PyInstaller --onedir CUDA build into two archives. | |
| Takes the PyInstaller --onedir output directory and splits it into: | |
| 1. voicebox-server-cuda.tar.gz — server core (exe + non-NVIDIA deps) | |
| 2. cuda-libs-cu128.tar.gz — NVIDIA runtime libraries only | |
| 3. cuda-libs.json — version manifest for the CUDA libs | |
| Usage: | |
| python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ | |
| python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/ | |
| python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu128-v1 | |
| """ | |
| import argparse | |
| import hashlib | |
| import json | |
| import sys | |
| import tarfile | |
| from pathlib import Path | |
| # DLL name prefixes that identify NVIDIA CUDA runtime libraries. | |
| # These DLLs may appear in different locations depending on the torch | |
| # and PyInstaller version: | |
| # - nvidia/ subdirectories (older torch with separate nvidia-* packages) | |
| # - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly) | |
| # - Top-level directory (some PyInstaller versions) | |
| NVIDIA_DLL_PREFIXES = ( | |
| "cublas", | |
| "cublaslt", | |
| "cudart", | |
| "cudnn", | |
| "cufft", | |
| "cufftw", | |
| "curand", | |
| "cusolver", | |
| "cusolvermg", | |
| "cusparse", | |
| "nvjitlink", | |
| "nvrtc", | |
| "nccl", | |
| "caffe2_nvrtc", | |
| ) | |
| # Files to keep in the server core even if they match NVIDIA prefixes. | |
| # These are small Python modules or stubs, not the large runtime DLLs. | |
| NVIDIA_KEEP_IN_CORE = { | |
| "torch/cuda/nccl.py", | |
| "torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py", | |
| } | |
| def is_nvidia_file(rel_path: str) -> bool: | |
| """Check if a relative path belongs to the NVIDIA CUDA libs. | |
| Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where | |
| PyInstaller placed them. Excludes small Python stubs that happen to | |
| share NVIDIA-related names. | |
| """ | |
| rel_lower = rel_path.lower().replace("\\", "/") | |
| # Never split out Python source files or small stubs | |
| if rel_lower in NVIDIA_KEEP_IN_CORE: | |
| return False | |
| # Files under nvidia/ subdirectory tree (older torch layout) | |
| if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower: | |
| # Only DLLs/shared objects — not .py, .dist-info, etc. | |
| if rel_lower.endswith((".dll", ".so")): | |
| return True | |
| # Include entire nvidia/ namespace package tree | |
| for part in rel_lower.split("/"): | |
| if part == "nvidia": | |
| return True | |
| # NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll) | |
| name = rel_lower.rsplit("/", 1)[-1] | |
| if name.endswith(".dll") or name.endswith(".so"): | |
| name_no_ext = name.rsplit(".", 1)[0] | |
| for prefix in NVIDIA_DLL_PREFIXES: | |
| if name_no_ext.startswith(prefix): | |
| return True | |
| return False | |
| def sha256_file(path: Path) -> str: | |
| """Compute SHA-256 hex digest of a file.""" | |
| h = hashlib.sha256() | |
| with open(path, "rb") as f: | |
| while True: | |
| chunk = f.read(1024 * 1024) | |
| if not chunk: | |
| break | |
| h.update(chunk) | |
| return h.hexdigest() | |
| def package( | |
| onedir_path: Path, | |
| output_dir: Path, | |
| cuda_libs_version: str, | |
| torch_compat: str, | |
| ): | |
| output_dir.mkdir(parents=True, exist_ok=True) | |
| # Collect all files in the onedir output, split into core vs nvidia | |
| core_files = [] | |
| nvidia_files = [] | |
| for item in sorted(onedir_path.rglob("*")): | |
| if item.is_dir(): | |
| continue | |
| rel = item.relative_to(onedir_path) | |
| rel_str = str(rel) | |
| if is_nvidia_file(rel_str): | |
| nvidia_files.append((rel_str, item)) | |
| else: | |
| core_files.append((rel_str, item)) | |
| core_size = sum(f.stat().st_size for _, f in core_files) | |
| nvidia_size = sum(f.stat().st_size for _, f in nvidia_files) | |
| print(f"Input directory: {onedir_path}") | |
| print(f"Core files: {len(core_files)} ({core_size / (1024**2):.1f} MB)") | |
| print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)") | |
| if not nvidia_files: | |
| print( | |
| f"ERROR: No NVIDIA files found in {onedir_path}. " | |
| "Refusing to create an empty CUDA libs archive.", | |
| file=sys.stderr, | |
| ) | |
| print( | |
| "Make sure you built with --cuda and the NVIDIA packages are present.", | |
| file=sys.stderr, | |
| ) | |
| sys.exit(1) | |
| # Create server core archive | |
| # Files are stored relative to the archive root (no parent directory prefix) | |
| # so extracting to backends/cuda/ puts everything at the right level. | |
| server_archive = output_dir / "voicebox-server-cuda.tar.gz" | |
| print(f"\nCreating server core archive: {server_archive.name}") | |
| with tarfile.open(server_archive, "w:gz") as tar: | |
| for rel_str, full_path in core_files: | |
| tar.add(full_path, arcname=rel_str) | |
| server_sha = sha256_file(server_archive) | |
| (output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text( | |
| f"{server_sha} voicebox-server-cuda.tar.gz\n" | |
| ) | |
| print(f" Size: {server_archive.stat().st_size / (1024**2):.1f} MB") | |
| print(f" SHA-256: {server_sha[:16]}...") | |
| # Create CUDA libs archive | |
| cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz" | |
| print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}") | |
| with tarfile.open(cuda_libs_archive, "w:gz") as tar: | |
| for rel_str, full_path in nvidia_files: | |
| tar.add(full_path, arcname=rel_str) | |
| cuda_sha = sha256_file(cuda_libs_archive) | |
| (output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text( | |
| f"{cuda_sha} cuda-libs-{cuda_libs_version}.tar.gz\n" | |
| ) | |
| print(f" Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB") | |
| print(f" SHA-256: {cuda_sha[:16]}...") | |
| # Write cuda-libs.json manifest | |
| manifest = { | |
| "version": cuda_libs_version, | |
| "torch_compat": torch_compat, | |
| "archive": cuda_libs_archive.name, | |
| "sha256": cuda_sha, | |
| } | |
| manifest_path = output_dir / "cuda-libs.json" | |
| manifest_path.write_text(json.dumps(manifest, indent=2) + "\n") | |
| print(f"\nManifest: {manifest_path.name}") | |
| print(json.dumps(manifest, indent=2)) | |
| # Summary | |
| total_input = core_size + nvidia_size | |
| total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size | |
| print(f"\nTotal input: {total_input / (1024**3):.2f} GB") | |
| print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)") | |
| print( | |
| f"Server core: {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)" | |
| ) | |
| print( | |
| f"CUDA libs: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)" | |
| ) | |
| def main(): | |
| parser = argparse.ArgumentParser( | |
| description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives" | |
| ) | |
| parser.add_argument( | |
| "input", | |
| type=Path, | |
| help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)", | |
| ) | |
| parser.add_argument( | |
| "--output", | |
| type=Path, | |
| default=None, | |
| help="Output directory for archives (default: same as input parent)", | |
| ) | |
| parser.add_argument( | |
| "--cuda-libs-version", | |
| type=str, | |
| default="cu128-v1", | |
| help="Version string for the CUDA libs archive (default: cu128-v1)", | |
| ) | |
| parser.add_argument( | |
| "--torch-compat", | |
| type=str, | |
| default=">=2.7.0,<2.11.0", | |
| help="Torch version compatibility range (default: >=2.6.0,<2.11.0)", | |
| ) | |
| args = parser.parse_args() | |
| if not args.input.is_dir(): | |
| print(f"Error: {args.input} is not a directory", file=sys.stderr) | |
| print("Expected a PyInstaller --onedir output directory.", file=sys.stderr) | |
| sys.exit(1) | |
| output_dir = args.output or args.input.parent | |
| package(args.input, output_dir, args.cuda_libs_version, args.torch_compat) | |
| if __name__ == "__main__": | |
| main() | |