Olivia Claude Opus 4.5 commited on
Commit
a62d768
·
1 Parent(s): 1282ba1

Add CUDA kernel compilation script for Colab

Browse files

- Add compile_kernels.py for compiling kernels locally
- Add compile_kernels_colab.ipynb for Google Colab
- Update .gitignore to allow prebuilt/*.so files

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (2) hide show
  1. compile_kernels.py +44 -53
  2. compile_kernels_colab.ipynb +21 -21
compile_kernels.py CHANGED
@@ -1,17 +1,16 @@
1
  #!/usr/bin/env python3
2
  """
3
  Compile CUDA kernels locally for deployment to Hugging Face Spaces.
4
-
5
- This script compiles the custom CUDA kernels and saves them to the prebuilt/
6
- directory for use on ZeroGPU where JIT compilation is too slow.
7
  """
8
 
9
  import sys
 
10
  import torch
11
  from pathlib import Path
12
 
13
- # Add kernels directory to path
14
- sys.path.insert(0, str(Path(__file__).parent))
 
15
 
16
  print("=" * 60)
17
  print("StyleForge CUDA Kernel Compiler")
@@ -35,47 +34,51 @@ print(f"Compute Capability: {compute_capability}")
35
  print()
36
 
37
  # Create prebuilt directory
38
- prebuilt_dir = Path(__file__).parent / "kernels" / "prebuilt"
39
  prebuilt_dir.mkdir(exist_ok=True, parents=True)
40
 
41
  print("Compiling CUDA kernels...")
42
  print("-" * 60)
43
 
44
  try:
45
- from kernels.cuda_build import compile_inline, get_cuda_info
 
 
 
 
 
 
 
46
 
47
  # Read CUDA source
48
- kernel_path = Path(__file__).parent / "kernels" / "instance_norm.cu"
49
  if not kernel_path.exists():
50
  print(f"ERROR: Kernel source not found at {kernel_path}")
51
  sys.exit(1)
52
 
53
  cuda_source = kernel_path.read_text()
 
54
 
55
- # Get CUDA build info
56
- cuda_info = get_cuda_info()
57
- extra_flags = cuda_info.get('extra_cuda_cflags', [])
58
-
59
- # Add architecture-specific flags for Hugging Face GPUs
60
- # Common GPUs on HF: T4 (7.5), A100 (8.0), V100 (7.0)
61
  hf_arch_flags = [
62
  '-gencode=arch=compute_70,code=sm_70', # V100
63
  '-gencode=arch=compute_75,code=sm_75', # T4
64
  '-gencode=arch=compute_80,code=sm_80', # A100
65
  ]
66
- extra_flags.extend(hf_arch_flags)
67
 
68
- print("Build flags:", ' '.join(extra_flags))
69
  print()
 
70
 
71
  # Compile the kernel
72
- module = compile_inline(
73
  name='fused_instance_norm',
74
- cuda_source=cuda_source,
75
- functions=['forward'],
76
- build_directory=Path(__file__).parent / "build",
77
- extra_cuda_cflags=extra_flags,
78
- verbose=True
79
  )
80
 
81
  print()
@@ -83,44 +86,32 @@ try:
83
  print("Compilation successful!")
84
  print()
85
 
86
- # Find and copy the compiled library
87
- build_dir = Path(__file__).parent / "build"
88
- so_files = list(build_dir.rglob("fused_instance_norm*.so"))
89
- pyd_files = list(build_dir.rglob("fused_instance_norm*.pyd"))
90
-
91
- compiled_files = so_files + pyd_files
92
-
93
- if not compiled_files:
94
- print("WARNING: No compiled .so/.pyd file found in build/")
95
- print("The module may be in torch's cache. Trying to locate...")
96
-
97
- # Try to find in torch extension cache
98
- import torch.utils.cpp_extension
99
- cache_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False))
100
- if cache_dir.exists():
101
- compiled_files = list(cache_dir.rglob("*.so")) + list(cache_dir.rglob("*.pyd"))
102
-
103
- if compiled_files:
104
- # Copy each compiled file to prebuilt/
105
- for src_file in compiled_files:
106
- dst_file = prebuilt_dir / src_file.name
107
- import shutil
108
- shutil.copy2(src_file, dst_file)
109
- print(f"Copied: {dst_file}")
110
- print(f" Size: {dst_file.stat().st_size / 1024:.1f} KB")
111
- else:
112
- print("ERROR: Could not locate compiled kernel file.")
113
- print("The kernel was compiled but the output file location is unknown.")
114
  sys.exit(1)
115
 
 
 
 
 
 
 
 
 
116
  print()
117
  print("=" * 60)
118
  print("Kernel compilation complete!")
119
  print(f"Pre-compiled kernels saved to: {prebuilt_dir}")
120
  print()
121
- print("Next steps:")
122
- print("1. Test the kernels locally: python -c 'from kernels import compile_kernels; print(compile_kernels())'")
123
- print("2. Commit and push to deploy")
124
  print("=" * 60)
125
 
126
  except Exception as e:
 
1
  #!/usr/bin/env python3
2
  """
3
  Compile CUDA kernels locally for deployment to Hugging Face Spaces.
 
 
 
4
  """
5
 
6
  import sys
7
+ import os
8
  import torch
9
  from pathlib import Path
10
 
11
+ # Suppress warnings for cleaner output
12
+ import warnings
13
+ warnings.filterwarnings('ignore')
14
 
15
  print("=" * 60)
16
  print("StyleForge CUDA Kernel Compiler")
 
34
  print()
35
 
36
  # Create prebuilt directory
37
+ prebuilt_dir = Path("kernels/prebuilt")
38
  prebuilt_dir.mkdir(exist_ok=True, parents=True)
39
 
40
  print("Compiling CUDA kernels...")
41
  print("-" * 60)
42
 
43
  try:
44
+ # Import PyTorch CUDA extension utilities
45
+ from torch.utils.cpp_extension import load_inline, CUDA_HOME
46
+
47
+ if CUDA_HOME is None:
48
+ print("ERROR: CUDA_HOME is not set. CUDA toolkit may not be installed.")
49
+ sys.exit(1)
50
+
51
+ print(f"CUDA Home: {CUDA_HOME}")
52
 
53
  # Read CUDA source
54
+ kernel_path = Path("kernels/instance_norm.cu")
55
  if not kernel_path.exists():
56
  print(f"ERROR: Kernel source not found at {kernel_path}")
57
  sys.exit(1)
58
 
59
  cuda_source = kernel_path.read_text()
60
+ print(f"Loaded CUDA source: {len(cuda_source)} bytes")
61
 
62
+ # Architecture-specific flags for Hugging Face GPUs
63
+ extra_cuda_cflags = ['-O3', '--use_fast_math']
 
 
 
 
64
  hf_arch_flags = [
65
  '-gencode=arch=compute_70,code=sm_70', # V100
66
  '-gencode=arch=compute_75,code=sm_75', # T4
67
  '-gencode=arch=compute_80,code=sm_80', # A100
68
  ]
69
+ extra_cuda_cflags.extend(hf_arch_flags)
70
 
71
+ print("Build flags:", ' '.join(extra_cuda_cflags))
72
  print()
73
+ print("Compiling... (this may take 1-2 minutes)")
74
 
75
  # Compile the kernel
76
+ module = load_inline(
77
  name='fused_instance_norm',
78
+ cuda_sources=[cuda_source],
79
+ extra_cuda_cflags=extra_cuda_cflags,
80
+ with_pybind11=True,
81
+ verbose=False
 
82
  )
83
 
84
  print()
 
86
  print("Compilation successful!")
87
  print()
88
 
89
+ # Find the compiled library
90
+ import torch.utils.cpp_extension
91
+ build_dir = Path(torch.utils.cpp_extension._get_build_directory('fused_instance_norm', False))
92
+ print(f"Build directory: {build_dir}")
93
+
94
+ so_files = list(build_dir.rglob("*.so")) + list(build_dir.rglob("*.pyd"))
95
+
96
+ if not so_files:
97
+ print("ERROR: No compiled .so/.pyd file found")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  sys.exit(1)
99
 
100
+ # Copy to prebuilt directory
101
+ import shutil
102
+ for src_file in so_files:
103
+ dst_file = prebuilt_dir / src_file.name
104
+ shutil.copy2(src_file, dst_file)
105
+ size_kb = dst_file.stat().st_size / 1024
106
+ print(f"Copied: {dst_file.name} ({size_kb:.1f} KB)")
107
+
108
  print()
109
  print("=" * 60)
110
  print("Kernel compilation complete!")
111
  print(f"Pre-compiled kernels saved to: {prebuilt_dir}")
112
  print()
113
+ print("Download the .so file and add it to your local repo:")
114
+ print(" kernels/prebuilt/" + list(prebuilt_dir.glob("*.so"))[0].name if list(prebuilt_dir.glob("*.so")) else "")
 
115
  print("=" * 60)
116
 
117
  except Exception as e:
compile_kernels_colab.ipynb CHANGED
@@ -36,9 +36,23 @@
36
  "metadata": {},
37
  "outputs": [],
38
  "source": [
39
- "# Install dependencies\n",
40
- "!pip install torch torchvision --index-url https://download.pytorch.org/whl/cu121\n",
41
- "!pip install ninja"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  ]
43
  },
44
  {
@@ -82,24 +96,6 @@
82
  " print('No kernel files found!')\n",
83
  " print('Check the compilation output above for errors.')"
84
  ]
85
- },
86
- {
87
- "cell_type": "markdown",
88
- "metadata": {},
89
- "source": [
90
- "## Next Steps\n",
91
- "\n",
92
- "1. Download the `.so` file(s)\n",
93
- "2. Copy them to your local `kernels/prebuilt/` directory\n",
94
- "3. Commit and push to GitHub:\n",
95
- " ```\n",
96
- " git add kernels/prebuilt/\n",
97
- " git commit -m \"Add pre-compiled CUDA kernels\"\n",
98
- " git push\n",
99
- " ```\n",
100
- "\n",
101
- "The kernels will be automatically loaded on Hugging Face Spaces!"
102
- ]
103
  }
104
  ],
105
  "metadata": {
@@ -107,6 +103,10 @@
107
  "display_name": "Python 3",
108
  "language": "python",
109
  "name": "python3"
 
 
 
 
110
  }
111
  },
112
  "nbformat": 4,
 
36
  "metadata": {},
37
  "outputs": [],
38
  "source": [
39
+ "# The PyTorch with CUDA should already be installed in Colab\n",
40
+ "import torch\n",
41
+ "print(f'PyTorch: {torch.__version__}')\n",
42
+ "print(f'CUDA available: {torch.cuda.is_available()}')\n",
43
+ "if torch.cuda.is_available():\n",
44
+ " print(f'CUDA version: {torch.version.cuda}')\n",
45
+ " print(f'GPU: {torch.cuda.get_device_name(0)}')"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": null,
51
+ "metadata": {},
52
+ "outputs": [],
53
+ "source": [
54
+ "# Install ninja for compilation\n",
55
+ "!pip install ninja -q"
56
  ]
57
  },
58
  {
 
96
  " print('No kernel files found!')\n",
97
  " print('Check the compilation output above for errors.')"
98
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "metadata": {
 
103
  "display_name": "Python 3",
104
  "language": "python",
105
  "name": "python3"
106
+ },
107
+ "language_info": {
108
+ "name": "python",
109
+ "version": "3.10.0"
110
  }
111
  },
112
  "nbformat": 4,