Spaces:

oliau
/

StyleForge

Sleeping

App Files Files Community

Olivia commited on 14 days ago

Commit

4aca758

1 Parent(s): 0122045

info endpoint

Browse files

Files changed (2) hide show

app.py +16 -3
kernels/__init__.py +37 -4

app.py CHANGED Viewed

@@ -73,9 +73,10 @@ if SPACES_AVAILABLE:
 # Check CUDA kernels availability
 try:
-    from kernels import check_cuda_kernels, get_fused_instance_norm
     CUDA_KERNELS_AVAILABLE = check_cuda_kernels()
-    print(f"CUDA Kernels: {'Available' if CUDA_KERNELS_AVAILABLE else 'Not Available'}")
 except Exception:
     CUDA_KERNELS_AVAILABLE = False
     print("CUDA Kernels: Not Available (using PyTorch fallback)")
@@ -528,7 +529,7 @@ print("=" * 50)
 print("StyleForge - Initializing...")
 print("=" * 50)
 print(f"Device: {DEVICE.type.upper()}")
-print(f"CUDA Kernels: {'Available' if CUDA_KERNELS_AVAILABLE else 'Not Available'}")
 print("Preloading models...")
 for style in STYLES.keys():
     try:
@@ -1307,10 +1308,22 @@ def stylize_image_impl(
     add_watermark: bool
 ) -> Tuple[Optional[Image.Image], str, Optional[str]]:
     """Main stylization function for Gradio."""
     if input_image is None:
         return None, "Please upload an image first.", None
     try:
         # Convert to RGB if needed
         if input_image.mode != 'RGB':
             input_image = input_image.convert('RGB')

 # Check CUDA kernels availability
 try:
+    from kernels import check_cuda_kernels, get_fused_instance_norm, compile_kernels
+    # On ZeroGPU, kernels will be compiled on-demand within GPU tasks
     CUDA_KERNELS_AVAILABLE = check_cuda_kernels()
+    print(f"CUDA Kernels: {'Available (lazy-loaded)' if not CUDA_KERNELS_AVAILABLE and SPACES_AVAILABLE else 'Available' if CUDA_KERNELS_AVAILABLE else 'Not Available'}")
 except Exception:
     CUDA_KERNELS_AVAILABLE = False
     print("CUDA Kernels: Not Available (using PyTorch fallback)")
 print("StyleForge - Initializing...")
 print("=" * 50)
 print(f"Device: {DEVICE.type.upper()}")
+print(f"CUDA Kernels: {'Available' if CUDA_KERNELS_AVAILABLE else 'Not Available (will compile on first GPU task)'}")
 print("Preloading models...")
 for style in STYLES.keys():
     try:
     add_watermark: bool
 ) -> Tuple[Optional[Image.Image], str, Optional[str]]:
     """Main stylization function for Gradio."""
+    global CUDA_KERNELS_AVAILABLE
     if input_image is None:
         return None, "Please upload an image first.", None
     try:
+        # On ZeroGPU, compile CUDA kernels within the GPU task on first use
+        if SPACES_AVAILABLE and not CUDA_KERNELS_AVAILABLE:
+            try:
+                from kernels import compile_kernels
+                CUDA_KERNELS_AVAILABLE = compile_kernels()
+                if CUDA_KERNELS_AVAILABLE:
+                    print("CUDA kernels compiled successfully within GPU task!")
+            except Exception as e:
+                print(f"Failed to compile CUDA kernels: {e}")
         # Convert to RGB if needed
         if input_image.mode != 'RGB':
             input_image = input_image.convert('RGB')

kernels/__init__.py CHANGED Viewed

@@ -1,13 +1,20 @@
 """
 StyleForge CUDA Kernels Package
 Custom CUDA kernels for accelerated neural style transfer.
 """
 import torch
 # Try to import CUDA kernels, fall back gracefully
 _CUDA_KERNELS_AVAILABLE = False
 _FusedInstanceNorm2d = None
 def check_cuda_kernels():
@@ -26,18 +33,44 @@ def get_fused_instance_norm(num_features, **kwargs):
     return torch.nn.InstanceNorm2d(num_features, affine=kwargs.get('affine', True))
-# Try to import CUDA kernels on load
-if torch.cuda.is_available():
     try:
         from .instance_norm_wrapper import FusedInstanceNorm2d
         _FusedInstanceNorm2d = FusedInstanceNorm2d
         _CUDA_KERNELS_AVAILABLE = True
-    except Exception:
-        _CUDA_KERNELS_AVAILABLE = False
 __all__ = [
     'check_cuda_kernels',
     'get_fused_instance_norm',
     'FusedInstanceNorm2d',
 ]

 """
 StyleForge CUDA Kernels Package
 Custom CUDA kernels for accelerated neural style transfer.
+For ZeroGPU: Kernels are compiled on-demand within GPU task context.
 """
 import torch
+import os
 # Try to import CUDA kernels, fall back gracefully
 _CUDA_KERNELS_AVAILABLE = False
 _FusedInstanceNorm2d = None
+_KERNELS_COMPILED = False
+# Check if running on ZeroGPU
+_ZERO_GPU = os.environ.get('SPACE_ID', '').startswith('hf.co') or os.environ.get('ZERO_GPU') == '1'
 def check_cuda_kernels():
     return torch.nn.InstanceNorm2d(num_features, affine=kwargs.get('affine', True))
+def compile_kernels():
+    """
+    Compile CUDA kernels on-demand.
+    This function is called within a GPU task on ZeroGPU to ensure
+    compilation happens within the task's timeout budget.
+    """
+    global _CUDA_KERNELS_AVAILABLE, _FusedInstanceNorm2d, _KERNELS_COMPILED
+    if _KERNELS_COMPILED:
+        return _CUDA_KERNELS_AVAILABLE
+    if not torch.cuda.is_available():
+        _KERNELS_COMPILED = True
+        return False
     try:
         from .instance_norm_wrapper import FusedInstanceNorm2d
         _FusedInstanceNorm2d = FusedInstanceNorm2d
         _CUDA_KERNELS_AVAILABLE = True
+        _KERNELS_COMPILED = True
+        print("CUDA kernels compiled successfully!")
+        return True
+    except Exception as e:
+        print(f"Failed to compile CUDA kernels: {e}")
+        print("Using PyTorch InstanceNorm2d fallback")
+        _KERNELS_COMPILED = True
+        return False
+# Auto-compile on import for non-ZeroGPU environments
+if torch.cuda.is_available() and not _ZERO_GPU:
+    compile_kernels()
 __all__ = [
     'check_cuda_kernels',
     'get_fused_instance_norm',
     'FusedInstanceNorm2d',
+    'compile_kernels',
 ]