StyleForge / compile_kernels.py
github-actions[bot]
Deploy from GitHub - 2026-01-19 04:19:46
20cfecf
#!/usr/bin/env python3
"""
Compile CUDA kernels locally for deployment to Hugging Face Spaces.
"""
import sys
import os
import torch
from pathlib import Path
# Suppress warnings for cleaner output
import warnings
warnings.filterwarnings('ignore')
print("=" * 60)
print("StyleForge CUDA Kernel Compiler")
print("=" * 60)
print()
# Check CUDA availability
if not torch.cuda.is_available():
print("ERROR: CUDA is not available on this system.")
print("This script requires a CUDA-capable GPU.")
sys.exit(1)
print(f"CUDA Version: {torch.version.cuda}")
print(f"PyTorch Version: {torch.__version__}")
print(f"GPU: {torch.cuda.get_device_name(0)}")
# Get compute capability
major, minor = torch.cuda.get_device_capability(0)
compute_capability = f"{major}.{minor}"
print(f"Compute Capability: {compute_capability}")
print()
# Create prebuilt directory
prebuilt_dir = Path("kernels/prebuilt")
prebuilt_dir.mkdir(exist_ok=True, parents=True)
print("Compiling CUDA kernels...")
print("-" * 60)
try:
# Import PyTorch CUDA extension utilities
from torch.utils.cpp_extension import load_inline, CUDA_HOME
if CUDA_HOME is None:
print("ERROR: CUDA_HOME is not set. CUDA toolkit may not be installed.")
sys.exit(1)
print(f"CUDA Home: {CUDA_HOME}")
# Read CUDA source
kernel_path = Path("kernels/instance_norm.cu")
if not kernel_path.exists():
print(f"ERROR: Kernel source not found at {kernel_path}")
sys.exit(1)
cuda_source = kernel_path.read_text()
print(f"Loaded CUDA source: {len(cuda_source)} bytes")
# Architecture-specific flags for Hugging Face GPUs
extra_cuda_cflags = ['-O3', '--use_fast_math']
hf_arch_flags = [
'-gencode=arch=compute_70,code=sm_70', # V100
'-gencode=arch=compute_75,code=sm_75', # T4
'-gencode=arch=compute_80,code=sm_80', # A100
]
extra_cuda_cflags.extend(hf_arch_flags)
print("Build flags:", ' '.join(extra_cuda_cflags))
print()
print("Compiling... (this may take 1-2 minutes)")
# Compile the kernel
# Note: PyTorch 2.x requires cpp_sources even if empty (bindings are in CUDA)
module = load_inline(
name='fused_instance_norm',
cpp_sources=[], # Empty since bindings are in the .cu file
cuda_sources=[cuda_source],
extra_cuda_cflags=extra_cuda_cflags,
verbose=False
)
print()
print("-" * 60)
print("Compilation successful!")
print()
# Find the compiled library
import torch.utils.cpp_extension
build_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False))
print(f"Build directory: {build_dir}")
so_files = list(build_dir.rglob("*.so")) + list(build_dir.rglob("*.pyd"))
if not so_files:
print("ERROR: No compiled .so/.pyd file found")
sys.exit(1)
# Copy to prebuilt directory
import shutil
for src_file in so_files:
dst_file = prebuilt_dir / src_file.name
shutil.copy2(src_file, dst_file)
size_kb = dst_file.stat().st_size / 1024
print(f"Copied: {dst_file.name} ({size_kb:.1f} KB)")
print()
print("=" * 60)
print("Kernel compilation complete!")
print(f"Pre-compiled kernels saved to: {prebuilt_dir}")
print()
print("Download the .so file and add it to your local repo:")
print(" kernels/prebuilt/" + list(prebuilt_dir.glob("*.so"))[0].name if list(prebuilt_dir.glob("*.so")) else "")
print("=" * 60)
except Exception as e:
print()
print("-" * 60)
print("ERROR: Compilation failed!")
print(f"Details: {e}")
import traceback
traceback.print_exc()
sys.exit(1)