Spaces:
Running
on
Zero
Running
on
Zero
fixing issues
Browse files
app.py
CHANGED
|
@@ -255,21 +255,10 @@ def log_progress(message, gr_progress=None, progress_value=None):
|
|
| 255 |
|
| 256 |
|
| 257 |
# Device-specific optimizations
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
# Check if bfloat16 is supported (requires compute capability >= 8.0, e.g., A100, H100)
|
| 263 |
-
if torch.cuda.is_bf16_supported():
|
| 264 |
-
print(" ✓ Using bfloat16 precision for optimal performance")
|
| 265 |
-
print(" ✓ Memory optimizations enabled")
|
| 266 |
-
# Note: bfloat16 is handled automatically by model_management on CUDA
|
| 267 |
-
# No dtype forcing needed - ComfyUI uses optimal dtypes by default
|
| 268 |
-
else:
|
| 269 |
-
print(" ⚠️ bfloat16 not supported on this GPU, using default precision")
|
| 270 |
-
print(" ℹ️ For best performance, use GPU with compute capability >= 8.0")
|
| 271 |
-
|
| 272 |
-
elif torch.backends.mps.is_available():
|
| 273 |
# MPS device (Apple Silicon) - force fp32 to avoid black image bug
|
| 274 |
print(f"MPS device detected (PyTorch {torch.__version__})")
|
| 275 |
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = (
|
|
@@ -328,6 +317,11 @@ elif torch.backends.mps.is_available():
|
|
| 328 |
print(" ✓ Enabled global fp32 dtype enforcement (monkey-patched)")
|
| 329 |
print(" ✓ Enabled MPS fallback mode")
|
| 330 |
print(f" ✓ lowvram: {lowvram_status}, split-cross-attention: {split_attn_status}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 331 |
|
| 332 |
# Add all the models that load a safetensors file
|
| 333 |
model_loaders = [checkpointloadersimple_4, checkpointloadersimple_artistic]
|
|
@@ -340,7 +334,10 @@ valid_models = [
|
|
| 340 |
and not isinstance(getattr(loader[0], "patcher", None), dict)
|
| 341 |
]
|
| 342 |
|
| 343 |
-
|
|
|
|
|
|
|
|
|
|
| 344 |
|
| 345 |
|
| 346 |
# Apply torch.compile to diffusion models for 1.5-1.7× speedup
|
|
@@ -387,14 +384,10 @@ def _apply_torch_compile_optimizations():
|
|
| 387 |
# This is a known PyTorch limitation - torch.compile can't handle torch.device in graph
|
| 388 |
# Uncomment when PyTorch/ComfyUI fixes ConstantVariable handling for torch.device
|
| 389 |
#
|
| 390 |
-
#
|
| 391 |
-
#
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
if torch.cuda.is_available():
|
| 396 |
-
print("ℹ️ torch.compile disabled (compatibility issues with ComfyUI)")
|
| 397 |
-
print(" App uses bfloat16 + VAE tiling + cache clearing for optimization")
|
| 398 |
|
| 399 |
|
| 400 |
@spaces.GPU(duration=30)
|
|
|
|
| 255 |
|
| 256 |
|
| 257 |
# Device-specific optimizations
|
| 258 |
+
# Note: On ZeroGPU, torch.cuda.is_available() is False at module load time
|
| 259 |
+
# CUDA only becomes available inside @spaces.GPU decorated functions
|
| 260 |
+
# So we only check for MPS (local development) and apply those workarounds
|
| 261 |
+
if torch.backends.mps.is_available():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 262 |
# MPS device (Apple Silicon) - force fp32 to avoid black image bug
|
| 263 |
print(f"MPS device detected (PyTorch {torch.__version__})")
|
| 264 |
os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = (
|
|
|
|
| 317 |
print(" ✓ Enabled global fp32 dtype enforcement (monkey-patched)")
|
| 318 |
print(" ✓ Enabled MPS fallback mode")
|
| 319 |
print(f" ✓ lowvram: {lowvram_status}, split-cross-attention: {split_attn_status}")
|
| 320 |
+
else:
|
| 321 |
+
# Not MPS - likely ZeroGPU or other CUDA environment
|
| 322 |
+
# CUDA optimizations (bfloat16) are handled automatically by ComfyUI's model_management
|
| 323 |
+
print(f"PyTorch {torch.__version__} loaded")
|
| 324 |
+
print(" ℹ️ CUDA optimizations will be applied when GPU becomes available")
|
| 325 |
|
| 326 |
# Add all the models that load a safetensors file
|
| 327 |
model_loaders = [checkpointloadersimple_4, checkpointloadersimple_artistic]
|
|
|
|
| 334 |
and not isinstance(getattr(loader[0], "patcher", None), dict)
|
| 335 |
]
|
| 336 |
|
| 337 |
+
# Note: Commenting out pre-loading to GPU for ZeroGPU compatibility
|
| 338 |
+
# On ZeroGPU, CUDA is not available until inside @spaces.GPU decorator
|
| 339 |
+
# Models will be automatically loaded to GPU when first used
|
| 340 |
+
# model_management.load_models_gpu(valid_models)
|
| 341 |
|
| 342 |
|
| 343 |
# Apply torch.compile to diffusion models for 1.5-1.7× speedup
|
|
|
|
| 384 |
# This is a known PyTorch limitation - torch.compile can't handle torch.device in graph
|
| 385 |
# Uncomment when PyTorch/ComfyUI fixes ConstantVariable handling for torch.device
|
| 386 |
#
|
| 387 |
+
# Note: Can't check torch.cuda.is_available() here on ZeroGPU (CUDA not yet initialized)
|
| 388 |
+
# torch.compile would need to be applied inside @spaces.GPU decorator
|
| 389 |
+
print("ℹ️ torch.compile disabled (compatibility issues with ComfyUI)")
|
| 390 |
+
print(" App uses bfloat16 + VAE tiling + cache clearing for optimization")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 391 |
|
| 392 |
|
| 393 |
@spaces.GPU(duration=30)
|