Spaces:
Sleeping
Sleeping
File size: 8,085 Bytes
0dfbd72 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 | """
Package the PyInstaller --onedir CUDA build into two archives.
Takes the PyInstaller --onedir output directory and splits it into:
1. voicebox-server-cuda.tar.gz — server core (exe + non-NVIDIA deps)
2. cuda-libs-cu128.tar.gz — NVIDIA runtime libraries only
3. cuda-libs.json — version manifest for the CUDA libs
Usage:
python scripts/package_cuda.py backend/dist/voicebox-server-cuda/
python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --output release-assets/
python scripts/package_cuda.py backend/dist/voicebox-server-cuda/ --cuda-libs-version cu128-v1
"""
import argparse
import hashlib
import json
import sys
import tarfile
from pathlib import Path
# DLL name prefixes that identify NVIDIA CUDA runtime libraries.
# These DLLs may appear in different locations depending on the torch
# and PyInstaller version:
# - nvidia/ subdirectories (older torch with separate nvidia-* packages)
# - _internal/torch/lib/ (torch 2.10+ bundles NVIDIA DLLs directly)
# - Top-level directory (some PyInstaller versions)
NVIDIA_DLL_PREFIXES = (
"cublas",
"cublaslt",
"cudart",
"cudnn",
"cufft",
"cufftw",
"curand",
"cusolver",
"cusolvermg",
"cusparse",
"nvjitlink",
"nvrtc",
"nccl",
"caffe2_nvrtc",
)
# Files to keep in the server core even if they match NVIDIA prefixes.
# These are small Python modules or stubs, not the large runtime DLLs.
NVIDIA_KEEP_IN_CORE = {
"torch/cuda/nccl.py",
"torch/_inductor/codegen/cuda/cutlass_lib_extensions/cutlass_mock_imports/cuda/cudart.py",
}
def is_nvidia_file(rel_path: str) -> bool:
"""Check if a relative path belongs to the NVIDIA CUDA libs.
Identifies large NVIDIA runtime DLLs (.dll/.so) regardless of where
PyInstaller placed them. Excludes small Python stubs that happen to
share NVIDIA-related names.
"""
rel_lower = rel_path.lower().replace("\\", "/")
# Never split out Python source files or small stubs
if rel_lower in NVIDIA_KEEP_IN_CORE:
return False
# Files under nvidia/ subdirectory tree (older torch layout)
if rel_lower.startswith("nvidia/") or "/nvidia/" in rel_lower:
# Only DLLs/shared objects — not .py, .dist-info, etc.
if rel_lower.endswith((".dll", ".so")):
return True
# Include entire nvidia/ namespace package tree
for part in rel_lower.split("/"):
if part == "nvidia":
return True
# NVIDIA DLLs anywhere in the tree (e.g. _internal/torch/lib/cublas64_12.dll)
name = rel_lower.rsplit("/", 1)[-1]
if name.endswith(".dll") or name.endswith(".so"):
name_no_ext = name.rsplit(".", 1)[0]
for prefix in NVIDIA_DLL_PREFIXES:
if name_no_ext.startswith(prefix):
return True
return False
def sha256_file(path: Path) -> str:
"""Compute SHA-256 hex digest of a file."""
h = hashlib.sha256()
with open(path, "rb") as f:
while True:
chunk = f.read(1024 * 1024)
if not chunk:
break
h.update(chunk)
return h.hexdigest()
def package(
onedir_path: Path,
output_dir: Path,
cuda_libs_version: str,
torch_compat: str,
):
output_dir.mkdir(parents=True, exist_ok=True)
# Collect all files in the onedir output, split into core vs nvidia
core_files = []
nvidia_files = []
for item in sorted(onedir_path.rglob("*")):
if item.is_dir():
continue
rel = item.relative_to(onedir_path)
rel_str = str(rel)
if is_nvidia_file(rel_str):
nvidia_files.append((rel_str, item))
else:
core_files.append((rel_str, item))
core_size = sum(f.stat().st_size for _, f in core_files)
nvidia_size = sum(f.stat().st_size for _, f in nvidia_files)
print(f"Input directory: {onedir_path}")
print(f"Core files: {len(core_files)} ({core_size / (1024**2):.1f} MB)")
print(f"NVIDIA files: {len(nvidia_files)} ({nvidia_size / (1024**2):.1f} MB)")
if not nvidia_files:
print(
f"ERROR: No NVIDIA files found in {onedir_path}. "
"Refusing to create an empty CUDA libs archive.",
file=sys.stderr,
)
print(
"Make sure you built with --cuda and the NVIDIA packages are present.",
file=sys.stderr,
)
sys.exit(1)
# Create server core archive
# Files are stored relative to the archive root (no parent directory prefix)
# so extracting to backends/cuda/ puts everything at the right level.
server_archive = output_dir / "voicebox-server-cuda.tar.gz"
print(f"\nCreating server core archive: {server_archive.name}")
with tarfile.open(server_archive, "w:gz") as tar:
for rel_str, full_path in core_files:
tar.add(full_path, arcname=rel_str)
server_sha = sha256_file(server_archive)
(output_dir / "voicebox-server-cuda.tar.gz.sha256").write_text(
f"{server_sha} voicebox-server-cuda.tar.gz\n"
)
print(f" Size: {server_archive.stat().st_size / (1024**2):.1f} MB")
print(f" SHA-256: {server_sha[:16]}...")
# Create CUDA libs archive
cuda_libs_archive = output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz"
print(f"\nCreating CUDA libs archive: {cuda_libs_archive.name}")
with tarfile.open(cuda_libs_archive, "w:gz") as tar:
for rel_str, full_path in nvidia_files:
tar.add(full_path, arcname=rel_str)
cuda_sha = sha256_file(cuda_libs_archive)
(output_dir / f"cuda-libs-{cuda_libs_version}.tar.gz.sha256").write_text(
f"{cuda_sha} cuda-libs-{cuda_libs_version}.tar.gz\n"
)
print(f" Size: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB")
print(f" SHA-256: {cuda_sha[:16]}...")
# Write cuda-libs.json manifest
manifest = {
"version": cuda_libs_version,
"torch_compat": torch_compat,
"archive": cuda_libs_archive.name,
"sha256": cuda_sha,
}
manifest_path = output_dir / "cuda-libs.json"
manifest_path.write_text(json.dumps(manifest, indent=2) + "\n")
print(f"\nManifest: {manifest_path.name}")
print(json.dumps(manifest, indent=2))
# Summary
total_input = core_size + nvidia_size
total_output = server_archive.stat().st_size + cuda_libs_archive.stat().st_size
print(f"\nTotal input: {total_input / (1024**3):.2f} GB")
print(f"Total output: {total_output / (1024**3):.2f} GB (compressed)")
print(
f"Server core: {server_archive.stat().st_size / (1024**2):.1f} MB (redownloaded on app update)"
)
print(
f"CUDA libs: {cuda_libs_archive.stat().st_size / (1024**2):.1f} MB (cached until CUDA toolkit bump)"
)
def main():
parser = argparse.ArgumentParser(
description="Package PyInstaller --onedir CUDA build into server + CUDA libs archives"
)
parser.add_argument(
"input",
type=Path,
help="Path to PyInstaller --onedir output directory (e.g. backend/dist/voicebox-server-cuda/)",
)
parser.add_argument(
"--output",
type=Path,
default=None,
help="Output directory for archives (default: same as input parent)",
)
parser.add_argument(
"--cuda-libs-version",
type=str,
default="cu128-v1",
help="Version string for the CUDA libs archive (default: cu128-v1)",
)
parser.add_argument(
"--torch-compat",
type=str,
default=">=2.7.0,<2.11.0",
help="Torch version compatibility range (default: >=2.6.0,<2.11.0)",
)
args = parser.parse_args()
if not args.input.is_dir():
print(f"Error: {args.input} is not a directory", file=sys.stderr)
print("Expected a PyInstaller --onedir output directory.", file=sys.stderr)
sys.exit(1)
output_dir = args.output or args.input.parent
package(args.input, output_dir, args.cuda_libs_version, args.torch_compat)
if __name__ == "__main__":
main()
|