Oysiyl commited on
Commit
1f16a19
·
1 Parent(s): 8eb6a33

fixing issues

Browse files
Files changed (1) hide show
  1. app.py +17 -24
app.py CHANGED
@@ -255,21 +255,10 @@ def log_progress(message, gr_progress=None, progress_value=None):
255
 
256
 
257
  # Device-specific optimizations
258
- if torch.cuda.is_available() and not torch.backends.mps.is_available():
259
- # CUDA device - check bfloat16 support
260
- print(f"CUDA device detected (PyTorch {torch.__version__})")
261
-
262
- # Check if bfloat16 is supported (requires compute capability >= 8.0, e.g., A100, H100)
263
- if torch.cuda.is_bf16_supported():
264
- print(" ✓ Using bfloat16 precision for optimal performance")
265
- print(" ✓ Memory optimizations enabled")
266
- # Note: bfloat16 is handled automatically by model_management on CUDA
267
- # No dtype forcing needed - ComfyUI uses optimal dtypes by default
268
- else:
269
- print(" ⚠️ bfloat16 not supported on this GPU, using default precision")
270
- print(" ℹ️ For best performance, use GPU with compute capability >= 8.0")
271
-
272
- elif torch.backends.mps.is_available():
273
  # MPS device (Apple Silicon) - force fp32 to avoid black image bug
274
  print(f"MPS device detected (PyTorch {torch.__version__})")
275
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = (
@@ -328,6 +317,11 @@ elif torch.backends.mps.is_available():
328
  print(" ✓ Enabled global fp32 dtype enforcement (monkey-patched)")
329
  print(" ✓ Enabled MPS fallback mode")
330
  print(f" ✓ lowvram: {lowvram_status}, split-cross-attention: {split_attn_status}")
 
 
 
 
 
331
 
332
  # Add all the models that load a safetensors file
333
  model_loaders = [checkpointloadersimple_4, checkpointloadersimple_artistic]
@@ -340,7 +334,10 @@ valid_models = [
340
  and not isinstance(getattr(loader[0], "patcher", None), dict)
341
  ]
342
 
343
- model_management.load_models_gpu(valid_models)
 
 
 
344
 
345
 
346
  # Apply torch.compile to diffusion models for 1.5-1.7× speedup
@@ -387,14 +384,10 @@ def _apply_torch_compile_optimizations():
387
  # This is a known PyTorch limitation - torch.compile can't handle torch.device in graph
388
  # Uncomment when PyTorch/ComfyUI fixes ConstantVariable handling for torch.device
389
  #
390
- # if torch.cuda.is_available():
391
- # _apply_torch_compile_optimizations()
392
- # else:
393
- # print("ℹ️ Skipping torch.compile (not on CUDA)")
394
-
395
- if torch.cuda.is_available():
396
- print("ℹ️ torch.compile disabled (compatibility issues with ComfyUI)")
397
- print(" App uses bfloat16 + VAE tiling + cache clearing for optimization")
398
 
399
 
400
  @spaces.GPU(duration=30)
 
255
 
256
 
257
  # Device-specific optimizations
258
+ # Note: On ZeroGPU, torch.cuda.is_available() is False at module load time
259
+ # CUDA only becomes available inside @spaces.GPU decorated functions
260
+ # So we only check for MPS (local development) and apply those workarounds
261
+ if torch.backends.mps.is_available():
 
 
 
 
 
 
 
 
 
 
 
262
  # MPS device (Apple Silicon) - force fp32 to avoid black image bug
263
  print(f"MPS device detected (PyTorch {torch.__version__})")
264
  os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = (
 
317
  print(" ✓ Enabled global fp32 dtype enforcement (monkey-patched)")
318
  print(" ✓ Enabled MPS fallback mode")
319
  print(f" ✓ lowvram: {lowvram_status}, split-cross-attention: {split_attn_status}")
320
+ else:
321
+ # Not MPS - likely ZeroGPU or other CUDA environment
322
+ # CUDA optimizations (bfloat16) are handled automatically by ComfyUI's model_management
323
+ print(f"PyTorch {torch.__version__} loaded")
324
+ print(" ℹ️ CUDA optimizations will be applied when GPU becomes available")
325
 
326
  # Add all the models that load a safetensors file
327
  model_loaders = [checkpointloadersimple_4, checkpointloadersimple_artistic]
 
334
  and not isinstance(getattr(loader[0], "patcher", None), dict)
335
  ]
336
 
337
+ # Note: Commenting out pre-loading to GPU for ZeroGPU compatibility
338
+ # On ZeroGPU, CUDA is not available until inside @spaces.GPU decorator
339
+ # Models will be automatically loaded to GPU when first used
340
+ # model_management.load_models_gpu(valid_models)
341
 
342
 
343
  # Apply torch.compile to diffusion models for 1.5-1.7× speedup
 
384
  # This is a known PyTorch limitation - torch.compile can't handle torch.device in graph
385
  # Uncomment when PyTorch/ComfyUI fixes ConstantVariable handling for torch.device
386
  #
387
+ # Note: Can't check torch.cuda.is_available() here on ZeroGPU (CUDA not yet initialized)
388
+ # torch.compile would need to be applied inside @spaces.GPU decorator
389
+ print("ℹ️ torch.compile disabled (compatibility issues with ComfyUI)")
390
+ print(" App uses bfloat16 + VAE tiling + cache clearing for optimization")
 
 
 
 
391
 
392
 
393
  @spaces.GPU(duration=30)