Spaces:

oliau
/

StyleForge

Running on Zero

Olivia Claude Opus 4.5 commited on Jan 19

Commit

a62d768

1 Parent(s): 1282ba1

Add CUDA kernel compilation script for Colab

- Add compile_kernels.py for compiling kernels locally
- Add compile_kernels_colab.ipynb for Google Colab
- Update .gitignore to allow prebuilt/*.so files

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show

compile_kernels.py +44 -53
compile_kernels_colab.ipynb +21 -21

compile_kernels.py CHANGED Viewed

@@ -1,17 +1,16 @@
 #!/usr/bin/env python3
 """
 Compile CUDA kernels locally for deployment to Hugging Face Spaces.
-This script compiles the custom CUDA kernels and saves them to the prebuilt/
-directory for use on ZeroGPU where JIT compilation is too slow.
 """
 import sys
 import torch
 from pathlib import Path
-# Add kernels directory to path
-sys.path.insert(0, str(Path(__file__).parent))
 print("=" * 60)
 print("StyleForge CUDA Kernel Compiler")
@@ -35,47 +34,51 @@ print(f"Compute Capability: {compute_capability}")
 print()
 # Create prebuilt directory
-prebuilt_dir = Path(__file__).parent / "kernels" / "prebuilt"
 prebuilt_dir.mkdir(exist_ok=True, parents=True)
 print("Compiling CUDA kernels...")
 print("-" * 60)
 try:
-    from kernels.cuda_build import compile_inline, get_cuda_info
     # Read CUDA source
-    kernel_path = Path(__file__).parent / "kernels" / "instance_norm.cu"
     if not kernel_path.exists():
         print(f"ERROR: Kernel source not found at {kernel_path}")
         sys.exit(1)
     cuda_source = kernel_path.read_text()
-    # Get CUDA build info
-    cuda_info = get_cuda_info()
-    extra_flags = cuda_info.get('extra_cuda_cflags', [])
-    # Add architecture-specific flags for Hugging Face GPUs
-    # Common GPUs on HF: T4 (7.5), A100 (8.0), V100 (7.0)
     hf_arch_flags = [
         '-gencode=arch=compute_70,code=sm_70',  # V100
         '-gencode=arch=compute_75,code=sm_75',  # T4
         '-gencode=arch=compute_80,code=sm_80',  # A100
     ]
-    extra_flags.extend(hf_arch_flags)
-    print("Build flags:", ' '.join(extra_flags))
     print()
     # Compile the kernel
-    module = compile_inline(
         name='fused_instance_norm',
-        cuda_source=cuda_source,
-        functions=['forward'],
-        build_directory=Path(__file__).parent / "build",
-        extra_cuda_cflags=extra_flags,
-        verbose=True
     )
     print()
@@ -83,44 +86,32 @@ try:
     print("Compilation successful!")
     print()
-    # Find and copy the compiled library
-    build_dir = Path(__file__).parent / "build"
-    so_files = list(build_dir.rglob("fused_instance_norm*.so"))
-    pyd_files = list(build_dir.rglob("fused_instance_norm*.pyd"))
-    compiled_files = so_files + pyd_files
-    if not compiled_files:
-        print("WARNING: No compiled .so/.pyd file found in build/")
-        print("The module may be in torch's cache. Trying to locate...")
-        # Try to find in torch extension cache
-        import torch.utils.cpp_extension
-        cache_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False))
-        if cache_dir.exists():
-            compiled_files = list(cache_dir.rglob("*.so")) + list(cache_dir.rglob("*.pyd"))
-    if compiled_files:
-        # Copy each compiled file to prebuilt/
-        for src_file in compiled_files:
-            dst_file = prebuilt_dir / src_file.name
-            import shutil
-            shutil.copy2(src_file, dst_file)
-            print(f"Copied: {dst_file}")
-            print(f"  Size: {dst_file.stat().st_size / 1024:.1f} KB")
-    else:
-        print("ERROR: Could not locate compiled kernel file.")
-        print("The kernel was compiled but the output file location is unknown.")
         sys.exit(1)
     print()
     print("=" * 60)
     print("Kernel compilation complete!")
     print(f"Pre-compiled kernels saved to: {prebuilt_dir}")
     print()
-    print("Next steps:")
-    print("1. Test the kernels locally: python -c 'from kernels import compile_kernels; print(compile_kernels())'")
-    print("2. Commit and push to deploy")
     print("=" * 60)
 except Exception as e:

 #!/usr/bin/env python3
 """
 Compile CUDA kernels locally for deployment to Hugging Face Spaces.
 """
 import sys
+import os
 import torch
 from pathlib import Path
+# Suppress warnings for cleaner output
+import warnings
+warnings.filterwarnings('ignore')
 print("=" * 60)
 print("StyleForge CUDA Kernel Compiler")
 print()
 # Create prebuilt directory
+prebuilt_dir = Path("kernels/prebuilt")
 prebuilt_dir.mkdir(exist_ok=True, parents=True)
 print("Compiling CUDA kernels...")
 print("-" * 60)
 try:
+    # Import PyTorch CUDA extension utilities
+    from torch.utils.cpp_extension import load_inline, CUDA_HOME
+    if CUDA_HOME is None:
+        print("ERROR: CUDA_HOME is not set. CUDA toolkit may not be installed.")
+        sys.exit(1)
+    print(f"CUDA Home: {CUDA_HOME}")
     # Read CUDA source
+    kernel_path = Path("kernels/instance_norm.cu")
     if not kernel_path.exists():
         print(f"ERROR: Kernel source not found at {kernel_path}")
         sys.exit(1)
     cuda_source = kernel_path.read_text()
+    print(f"Loaded CUDA source: {len(cuda_source)} bytes")
+    # Architecture-specific flags for Hugging Face GPUs
+    extra_cuda_cflags = ['-O3', '--use_fast_math']
     hf_arch_flags = [
         '-gencode=arch=compute_70,code=sm_70',  # V100
         '-gencode=arch=compute_75,code=sm_75',  # T4
         '-gencode=arch=compute_80,code=sm_80',  # A100
     ]
+    extra_cuda_cflags.extend(hf_arch_flags)
+    print("Build flags:", ' '.join(extra_cuda_cflags))
     print()
+    print("Compiling... (this may take 1-2 minutes)")
     # Compile the kernel
+    module = load_inline(
         name='fused_instance_norm',
+        cuda_sources=[cuda_source],
+        extra_cuda_cflags=extra_cuda_cflags,
+        with_pybind11=True,
+        verbose=False
     )
     print()
     print("Compilation successful!")
     print()
+    # Find the compiled library
+    import torch.utils.cpp_extension
+    build_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False))
+    print(f"Build directory: {build_dir}")
+    so_files = list(build_dir.rglob("*.so")) + list(build_dir.rglob("*.pyd"))
+    if not so_files:
+        print("ERROR: No compiled .so/.pyd file found")
         sys.exit(1)
+    # Copy to prebuilt directory
+    import shutil
+    for src_file in so_files:
+        dst_file = prebuilt_dir / src_file.name
+        shutil.copy2(src_file, dst_file)
+        size_kb = dst_file.stat().st_size / 1024
+        print(f"Copied: {dst_file.name} ({size_kb:.1f} KB)")
     print()
     print("=" * 60)
     print("Kernel compilation complete!")
     print(f"Pre-compiled kernels saved to: {prebuilt_dir}")
     print()
+    print("Download the .so file and add it to your local repo:")
+    print("  kernels/prebuilt/" + list(prebuilt_dir.glob("*.so"))[0].name if list(prebuilt_dir.glob("*.so")) else "")
     print("=" * 60)
 except Exception as e:

compile_kernels_colab.ipynb CHANGED Viewed

@@ -36,9 +36,23 @@
       "metadata": {},
       "outputs": [],
       "source": [
-        "# Install dependencies\n",
-        "!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121\n",
-        "!pip install ninja"
       ]
     },
     {
@@ -82,24 +96,6 @@
         "    print('No kernel files found!')\n",
         "    print('Check the compilation output above for errors.')"
       ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Next Steps\n",
-        "\n",
-        "1. Download the `.so` file(s)\n",
-        "2. Copy them to your local `kernels/prebuilt/` directory\n",
-        "3. Commit and push to GitHub:\n",
-        "   ```\n",
-        "   git add kernels/prebuilt/\n",
-        "   git commit -m \"Add pre-compiled CUDA kernels\"\n",
-        "   git push\n",
-        "   ```\n",
-        "\n",
-        "The kernels will be automatically loaded on Hugging Face Spaces!"
-      ]
     }
   ],
   "metadata": {
@@ -107,6 +103,10 @@
       "display_name": "Python 3",
       "language": "python",
       "name": "python3"
     }
   },
   "nbformat": 4,

       "metadata": {},
       "outputs": [],
       "source": [
+        "# The PyTorch with CUDA should already be installed in Colab\n",
+        "import torch\n",
+        "print(f'PyTorch: {torch.__version__}')\n",
+        "print(f'CUDA available: {torch.cuda.is_available()}')\n",
+        "if torch.cuda.is_available():\n",
+        "    print(f'CUDA version: {torch.version.cuda}')\n",
+        "    print(f'GPU: {torch.cuda.get_device_name(0)}')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {},
+      "outputs": [],
+      "source": [
+        "# Install ninja for compilation\n",
+        "!pip install ninja -q"
       ]
     },
     {
         "    print('No kernel files found!')\n",
         "    print('Check the compilation output above for errors.')"
       ]
     }
   ],
   "metadata": {
       "display_name": "Python 3",
       "language": "python",
       "name": "python3"
+    },
+    "language_info": {
+      "name": "python",
+      "version": "3.10.0"
     }
   },
   "nbformat": 4,