Spaces:

Raxephion
/

CipherCore-SD1.5-Image-Generator

Runtime error

App Files Files Community

Raxephion commited on May 24, 2025

Commit

ce1167c

verified ·

1 Parent(s): 049d1c0

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -10

app.py CHANGED Viewed

@@ -4,7 +4,7 @@ Author: @Raxephion 2025
 """
 import gradio as gr
-import numpy as np # <-- Needed for np.iinfo
 import random
 import torch
 from diffusers import StableDiffusionPipeline
@@ -95,12 +95,21 @@ if INITIAL_MODEL_ID:
     print(f"\nLoading initial model '{INITIAL_MODEL_ID}' on startup...")
     try:
         # Load the pipeline onto the initial device and dtype
-        current_pipeline = StableDiffusionPipeline.from_pretrained(
             INITIAL_MODEL_ID,
             torch_dtype=initial_dtype_to_use,
             safety_checker=None, # <<< SAFETY CHECKER DISABLED <<<
         )
-        current_pipeline = current_pipeline.to(initial_device_to_use)
         current_model_id = INITIAL_MODEL_ID
         current_device_loaded = torch.device(initial_device_to_use)
         print(f"Initial model loaded successfully on {current_device_loaded}.")
@@ -146,10 +155,11 @@ def infer(
     size,                     # From size_dropdown
     seed,                     # From seed_input (now a Slider)
     randomize_seed,           # From randomize_seed_checkbox
     progress=gr.Progress(track_tqdm=True), # Added progress argument from template
 ):
     """Generates an image using the selected model and parameters on the chosen device."""
-    global current_pipeline, current_model_id, current_device_loaded, SCHEDULER_MAP, MAX_SEED # MAX_SEED is global
     # This check is done before parameter parsing so we can determine device/dtype for loading
     # Need to redo some parameter parsing here to get device_to_use early
@@ -165,7 +175,6 @@ def infer(
     # 1. Load/Switch Model if necessary
     # Check if the requested model identifier OR the requested device has changed
-    # Use string comparison for current_device_loaded as it's a torch.device object
     if current_pipeline is None or current_model_id != model_identifier or (current_device_loaded is not None and str(current_device_loaded) != temp_device_to_use):
          print(f"Loading model: {model_identifier} onto {temp_device_to_use} with dtype {temp_dtype_to_use}...")
@@ -180,6 +189,7 @@ def infer(
                        print(f"Warning: Failed to move previous pipeline to CPU: {move_e}")
               del current_pipeline
               current_pipeline = None # Set to None immediately
               if str(current_device_loaded) == "cuda":
                   try:
                       torch.cuda.empty_cache()
@@ -190,7 +200,7 @@ def infer(
          # Ensure the device is actually available if not CPU (redundant with earlier check but safe)
          if temp_device_to_use == "cuda":
               if not torch.cuda.is_available():
-                   raise gr.Error("CUDA selected but not available to PyTorch on this Space. Please select CPU or ensure the Space is configured with a GPU and the CUDA version of PyTorch is installed.")
          try:
              pipeline = StableDiffusionPipeline.from_pretrained(
@@ -198,6 +208,24 @@ def infer(
                  torch_dtype=temp_dtype_to_use, # Use the determined dtype for loading
                  safety_checker=None, # DISABLED
              )
              pipeline = pipeline.to(temp_device_to_use) # Use the determined device
              current_pipeline = pipeline
@@ -244,6 +272,8 @@ def infer(
     # Re-determine device_to_use and dtype_to_use *after* ensuring pipeline is loaded
     # They should match current_device_loaded and the pipeline's dtype
     device_to_use = str(current_pipeline.device) if current_pipeline else ("cuda" if selected_device_str == "GPU" and "GPU" in AVAILABLE_DEVICES else "cpu")
     dtype_to_use = current_pipeline.dtype if current_pipeline else torch.float32 # Fallback if somehow pipeline is still None
@@ -253,6 +283,30 @@ def infer(
          raise gr.Error("Model failed to load during setup or switching. Cannot generate image.")
     # 2. Configure Scheduler
     selected_scheduler_class = SCHEDULER_MAP.get(scheduler_name)
     if selected_scheduler_class is None:
@@ -348,7 +402,7 @@ def infer(
     if width <= 0 or height <= 0:
          raise ValueError("Image width and height must be positive.")
-    print(f"Generating: Prompt='{prompt[:80]}{'...' if len(prompt) > 80 else ''}', NegPrompt='{negative_prompt[:80]}{'...' if len(negative_prompt) > 80 else ''}', Steps={num_inference_steps_int}, CFG={guidance_scale_float}, Size={width}x{height}, Scheduler={scheduler_name}, Seed={seed_int if generator else 'System Random'}, Device={device_to_use}, Dtype={dtype_to_use}")
     start_time = time.time()
     try:
@@ -367,8 +421,6 @@ def infer(
             # Add VAE usage here if needed for specific models that require it
             # vae=...
-            # Potentially add attention slicing/xformers/etc. for memory efficiency
-            # enable_attention_slicing="auto", # Can help with VRAM on smaller GPUs
             # enable_xformers_memory_efficient_attention() # Needs xformers installed & compatible GPU
         )
         end_time = time.time()
@@ -488,6 +540,17 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: # Added Soft theme from
                      seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, interactive=True) # Use 0 as default, interactive initially
                      randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True) # Simplified label
             generate_button = gr.Button("✨ Generate Image ✨", variant="primary", scale=1) # Added emojis
@@ -520,7 +583,8 @@ with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: # Added Soft theme from
             scheduler_dropdown,
             size_dropdown,
             seed_input,
-            randomize_seed_checkbox, # Pass the checkbox value
         ],
         outputs=[output_image, actual_seed_output], # Return image and the actual seed used
         api_name="generate" # Optional: For API access

 """
 import gradio as gr
+import numpy as np
 import random
 import torch
 from diffusers import StableDiffusionPipeline
     print(f"\nLoading initial model '{INITIAL_MODEL_ID}' on startup...")
     try:
         # Load the pipeline onto the initial device and dtype
+        pipeline = StableDiffusionPipeline.from_pretrained(
             INITIAL_MODEL_ID,
             torch_dtype=initial_dtype_to_use,
             safety_checker=None, # <<< SAFETY CHECKER DISABLED <<<
         )
+        # --- Apply Optimizations during initial load ---
+        # Apply attention slicing by default for memory efficiency on Spaces
+        # Can be turned off via UI toggle later, but good default for VRAM
+        # We'll add the UI toggle later, for now, just enable it here
+        # pipeline.enable_attention_slicing() # Enable by default on initial load
+        pipeline = pipeline.to(initial_device_to_use) # Move to the initial device
+        current_pipeline = pipeline
         current_model_id = INITIAL_MODEL_ID
         current_device_loaded = torch.device(initial_device_to_use)
         print(f"Initial model loaded successfully on {current_device_loaded}.")
     size,                     # From size_dropdown
     seed,                     # From seed_input (now a Slider)
     randomize_seed,           # From randomize_seed_checkbox
+    enable_attention_slicing, # <-- New input for the optimization toggle
     progress=gr.Progress(track_tqdm=True), # Added progress argument from template
 ):
     """Generates an image using the selected model and parameters on the chosen device."""
+    global current_pipeline, current_model_id, current_device_loaded, SCHEDULER_MAP, MAX_SEED
     # This check is done before parameter parsing so we can determine device/dtype for loading
     # Need to redo some parameter parsing here to get device_to_use early
     # 1. Load/Switch Model if necessary
     # Check if the requested model identifier OR the requested device has changed
     if current_pipeline is None or current_model_id != model_identifier or (current_device_loaded is not None and str(current_device_loaded) != temp_device_to_use):
          print(f"Loading model: {model_identifier} onto {temp_device_to_use} with dtype {temp_dtype_to_use}...")
                        print(f"Warning: Failed to move previous pipeline to CPU: {move_e}")
               del current_pipeline
               current_pipeline = None # Set to None immediately
+              # Attempt to clear CUDA cache if using GPU (from the previous device)
               if str(current_device_loaded) == "cuda":
                   try:
                       torch.cuda.empty_cache()
          # Ensure the device is actually available if not CPU (redundant with earlier check but safe)
          if temp_device_to_use == "cuda":
               if not torch.cuda.is_available():
+                   raise gr.Error("GPU selected but CUDA is not available to PyTorch on this Space. Please select CPU or ensure the Space is configured with a GPU and the CUDA version of PyTorch is installed.")
          try:
              pipeline = StableDiffusionPipeline.from_pretrained(
                  torch_dtype=temp_dtype_to_use, # Use the determined dtype for loading
                  safety_checker=None, # DISABLED
              )
+             # Apply optimizations based on UI input during load
+             if enable_attention_slicing and temp_device_to_use == "cuda": # Only apply on GPU
+                 try:
+                     pipeline.enable_attention_slicing()
+                     print("Attention Slicing enabled.")
+                 except Exception as e:
+                     print(f"Warning: Failed to enable Attention Slicing: {e}")
+                     gr.Warning(f"Failed to enable Attention Slicing. Error: {e}")
+             else:
+                 try:
+                     pipeline.disable_attention_slicing() # Ensure it's off if toggle is off or on CPU
+                     print("Attention Slicing disabled.")
+                 except Exception as e:
+                      # May fail if it wasn't enabled, ignore
+                      pass
              pipeline = pipeline.to(temp_device_to_use) # Use the determined device
              current_pipeline = pipeline
     # Re-determine device_to_use and dtype_to_use *after* ensuring pipeline is loaded
     # They should match current_device_loaded and the pipeline's dtype
+    # This is crucial because current_pipeline.device and dtype are the definitive source
+    # after a potentially successful load or switch.
     device_to_use = str(current_pipeline.device) if current_pipeline else ("cuda" if selected_device_str == "GPU" and "GPU" in AVAILABLE_DEVICES else "cpu")
     dtype_to_use = current_pipeline.dtype if current_pipeline else torch.float32 # Fallback if somehow pipeline is still None
          raise gr.Error("Model failed to load during setup or switching. Cannot generate image.")
+    # --- Apply Optimizations *before* generation if model was already loaded ---
+    # If the model didn't need reloading, we need to apply/remove slicing here
+    if str(current_pipeline.device) == "cuda": # Only attempt on GPU
+        if enable_attention_slicing:
+             try:
+                 current_pipeline.enable_attention_slicing()
+                 # print("Attention Slicing enabled for generation.") # Too verbose
+             except Exception as e:
+                  print(f"Warning: Failed to enable Attention Slicing before generation: {e}")
+                  gr.Warning(f"Failed to enable Attention Slicing. Error: {e}")
+        else:
+             try:
+                 current_pipeline.disable_attention_slicing()
+                 # print("Attention Slicing disabled for generation.") # Too verbose
+             except Exception as e:
+                  # May fail if it wasn't enabled, ignore
+                  pass
+    else: # Ensure slicing is off on CPU
+         try:
+              current_pipeline.disable_attention_slicing()
+         except Exception as e:
+              pass # Ignore
     # 2. Configure Scheduler
     selected_scheduler_class = SCHEDULER_MAP.get(scheduler_name)
     if selected_scheduler_class is None:
     if width <= 0 or height <= 0:
          raise ValueError("Image width and height must be positive.")
+    print(f"Generating: Prompt='{prompt[:80]}{'...' if len(prompt) > 80 else ''}', NegPrompt='{negative_prompt[:80]}{'...' if len(negative_prompt) > 80 else ''}', Steps={num_inference_steps_int}, CFG={guidance_scale_float}, Size={width}x{height}, Scheduler={scheduler_name}, Seed={seed_int if generator else 'System Random'}, Device={device_to_use}, Dtype={dtype_to_use}, Slicing Enabled={enable_attention_slicing and device_to_use == 'cuda'}")
     start_time = time.time()
     try:
             # Add VAE usage here if needed for specific models that require it
             # vae=...
             # enable_xformers_memory_efficient_attention() # Needs xformers installed & compatible GPU
         )
         end_time = time.time()
                      seed_input = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0, interactive=True) # Use 0 as default, interactive initially
                      randomize_seed_checkbox = gr.Checkbox(label="Randomize seed", value=True) # Simplified label
+                # --- New: Memory Optimization Toggle ---
+                with gr.Row():
+                     # Default to enabled if GPU is available, otherwise off
+                     default_slicing = True if "GPU" in AVAILABLE_DEVICES else False
+                     enable_attention_slicing_checkbox = gr.Checkbox(
+                         label="Enable Attention Slicing (Memory Optimization - GPU only)",
+                         value=default_slicing,
+                         interactive="GPU" in AVAILABLE_DEVICES # Only interactive if GPU is an option
+                     )
+                     gr.Markdown("*(Helps reduce VRAM usage, may slightly affect speed/quality)*")
             generate_button = gr.Button("✨ Generate Image ✨", variant="primary", scale=1) # Added emojis
             scheduler_dropdown,
             size_dropdown,
             seed_input,
+            randomize_seed_checkbox,
+            enable_attention_slicing_checkbox, # <-- Pass the new checkbox value
         ],
         outputs=[output_image, actual_seed_output], # Return image and the actual seed used
         api_name="generate" # Optional: For API access