Spaces:

rokmr
/

flux2.0

Running on Zero

App Files Files Community

rokmr commited on 19 days ago

Commit

894f265

verified ·

1 Parent(s): 0b524ac

Updating with lazy loading

Browse files

Files changed (1) hide show

app.py +38 -24

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-import spaces
 import torch
 from diffusers import Flux2Pipeline
 from huggingface_hub import get_token
@@ -10,11 +10,33 @@ import os
 # Configuration
 repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
-device = "cuda" if torch.cuda.is_available() else "cpu"
-torch_dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
-print(f"Using device: {device}")
-print(f"Using dtype: {torch_dtype}")
 def remote_text_encoder(prompts):
     """Encode prompts using remote text encoder API."""
@@ -34,26 +56,12 @@ def remote_text_encoder(prompts):
         )
         response.raise_for_status()
         prompt_embeds = torch.load(io.BytesIO(response.content))
         return prompt_embeds.to(device)
     except Exception as e:
         raise Exception(f"Failed to encode prompt: {str(e)}")
-# Load the pipeline
-print("Loading Flux2 pipeline...")
-try:
-    pipe = Flux2Pipeline.from_pretrained(
-        repo_id,
-        text_encoder=None,
-        torch_dtype=torch_dtype,
-        device_map="cuda"
-    )
-    if not torch.cuda.is_available():
-        pipe = pipe.to(device)
-    print("Pipeline loaded successfully!")
-except Exception as e:
-    print(f"Error loading pipeline: {e}")
-    raise
 def get_duration(num_inference_steps: int, input_image: Image.Image = None):
     """Calculate dynamic GPU duration based on inference steps and input image."""
     num_images = 0 if input_image is None else 1
@@ -82,9 +90,14 @@ def generate_image(
     if not prompt or prompt.strip() == "":
         raise gr.Error("Please enter a prompt!")
-    progress(0, desc="Encoding prompt...")
     try:
         # Get prompt embeddings from remote encoder
         prompt_embeds = remote_text_encoder(prompt)
@@ -113,7 +126,7 @@ def generate_image(
         # Generate image
         with torch.inference_mode():
-            image = pipe(**pipe_kwargs).images[0]
         progress(1.0, desc="Done!")
@@ -129,6 +142,7 @@ def generate_image(
 # Create Gradio interface
 with gr.Blocks(
     title="Flux2 Image Generator",
 ) as demo:
     gr.Markdown(
         """
@@ -191,7 +205,6 @@ with gr.Blocks(
                 "🚀 Generate Image",
                 variant="primary",
                 size="lg",
-                elem_classes="generate-btn"
             )
             gr.Markdown(
@@ -267,4 +280,5 @@ with gr.Blocks(
     )
 if __name__ == "__main__":
     demo.queue(max_size=20).launch()

+import spaces  # Import spaces FIRST, before any CUDA-related packages
 import torch
 from diffusers import Flux2Pipeline
 from huggingface_hub import get_token
 # Configuration
 repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
+torch_dtype = torch.bfloat16
+print("Starting Flux2 Image Generator...")
+# Global variable to hold the pipeline
+pipe = None
+def load_pipeline():
+    """Lazy load the pipeline when needed."""
+    global pipe
+    if pipe is None:
+        print("Loading Flux2 pipeline...")
+        device = "cuda" if torch.cuda.is_available() else "cpu"
+        print(f"Using device: {device}")
+        try:
+            pipe = Flux2Pipeline.from_pretrained(
+                repo_id,
+                text_encoder=None,
+                torch_dtype=torch_dtype,
+                device_map="auto"
+            )
+            print("Pipeline loaded successfully!")
+        except Exception as e:
+            print(f"Error loading pipeline: {e}")
+            raise
+    return pipe
 def remote_text_encoder(prompts):
     """Encode prompts using remote text encoder API."""
         )
         response.raise_for_status()
         prompt_embeds = torch.load(io.BytesIO(response.content))
+        device = "cuda" if torch.cuda.is_available() else "cpu"
         return prompt_embeds.to(device)
     except Exception as e:
         raise Exception(f"Failed to encode prompt: {str(e)}")
 def get_duration(num_inference_steps: int, input_image: Image.Image = None):
     """Calculate dynamic GPU duration based on inference steps and input image."""
     num_images = 0 if input_image is None else 1
     if not prompt or prompt.strip() == "":
         raise gr.Error("Please enter a prompt!")
+    progress(0, desc="Loading model...")
     try:
+        # Load pipeline (lazy loading)
+        pipeline = load_pipeline()
+        progress(0.1, desc="Encoding prompt...")
         # Get prompt embeddings from remote encoder
         prompt_embeds = remote_text_encoder(prompt)
         # Generate image
         with torch.inference_mode():
+            image = pipeline(**pipe_kwargs).images[0]
         progress(1.0, desc="Done!")
 # Create Gradio interface
 with gr.Blocks(
     title="Flux2 Image Generator",
+    theme=gr.themes.Soft(),
 ) as demo:
     gr.Markdown(
         """
                 "🚀 Generate Image",
                 variant="primary",
                 size="lg",
             )
             gr.Markdown(
     )
 if __name__ == "__main__":
+    print("Launching Gradio interface...")
     demo.queue(max_size=20).launch()