Spaces:

rokmr
/

flux2.0

Running on Zero

App Files Files Community

rokmr commited on 19 days ago

Commit

34a75a5

verified ·

1 Parent(s): 998921e

Optimizing for GPU time

Browse files

Files changed (1) hide show

app.py +60 -42

app.py CHANGED Viewed

@@ -14,29 +14,21 @@ torch_dtype = torch.bfloat16
 print("Starting Flux2 Image Generator...")
-# Global variable to hold the pipeline
 pipe = None
-def load_pipeline():
-    """Lazy load the pipeline when needed."""
-    global pipe
-    if pipe is None:
-        print("Loading Flux2 pipeline...")
-        device = "cuda" if torch.cuda.is_available() else "cpu"
-        print(f"Using device: {device}")
-        try:
-            pipe = Flux2Pipeline.from_pretrained(
-                repo_id,
-                text_encoder=None,
-                torch_dtype=torch_dtype,
-                device_map="cuda"
-            )
-            print("Pipeline loaded successfully!")
-        except Exception as e:
-            print(f"Error loading pipeline: {e}")
-            raise
-    return pipe
 def remote_text_encoder(prompts):
     """Encode prompts using remote text encoder API."""
@@ -46,25 +38,39 @@ def remote_text_encoder(prompts):
         # Method 1: From huggingface_hub
         try:
-            token = get_token()
         except:
             pass
-        # Method 2: From environment variable (Spaces sets this automatically)
         if not token:
             token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
-        # Method 3: From Spaces secrets
         if not token:
-            token = os.environ.get("SPACE_TOKEN")
         if not token:
             raise ValueError(
-                "HuggingFace token not found. "
-                "If running on Spaces, make sure your Space has access to gated models. "
-                "If running locally, please login using 'huggingface-cli login'"
             )
         response = requests.post(
             "https://remote-text-encoder-flux-2.huggingface.co/predict",
             json={"prompt": prompts},
@@ -82,23 +88,31 @@ def remote_text_encoder(prompts):
     except requests.HTTPError as e:
         if e.response.status_code == 401:
             raise Exception(
-                "Authentication failed (401). Your HuggingFace token may not have access to this model. "
                 "Please ensure your token has permission to access FLUX.2 models."
             )
         elif e.response.status_code == 403:
             raise Exception(
-                "Access forbidden (403). You may need to accept the model's license agreement on HuggingFace."
             )
         else:
             raise Exception(f"HTTP error {e.response.status_code}: {str(e)}")
     except Exception as e:
         raise Exception(f"Failed to encode prompt: {str(e)}")
 def get_duration(prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=None):
     """Calculate dynamic GPU duration based on inference steps and input image."""
     num_images = 0 if input_image is None else 1
-    step_duration = 1 + 0.7 * num_images
-    return max(65, num_inference_steps * step_duration + 10)
 @spaces.GPU(duration=get_duration)  # Dynamic GPU allocation
 def generate_image(
@@ -119,6 +133,8 @@ def generate_image(
         guidance_scale: How closely to follow the prompt (higher = more strict)
         seed: Random seed for reproducibility (-1 for random)
     """
     print(f"=== Starting generation ===")
     print(f"Prompt: {prompt[:100]}...")
     print(f"CUDA available: {torch.cuda.is_available()}")
@@ -126,13 +142,15 @@ def generate_image(
     if not prompt or prompt.strip() == "":
         raise gr.Error("Please enter a prompt!")
-    progress(0, desc="Loading model...")
     try:
-        # Load pipeline (lazy loading)
-        print("Loading pipeline...")
-        pipeline = load_pipeline()
-        print("Pipeline loaded successfully")
         progress(0.1, desc="Encoding prompt...")
         print("Encoding prompt...")
@@ -145,7 +163,7 @@ def generate_image(
             print(f"Error encoding prompt: {str(e)}")
             raise gr.Error(f"Failed to encode prompt. Please check your HuggingFace token. Error: {str(e)}")
-        progress(0.3, desc="Generating image...")
         # Set up generator
         generator_device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -169,14 +187,14 @@ def generate_image(
         # Add input image if provided
         if input_image is not None:
             pipe_kwargs["image"] = input_image
-            progress(0.4, desc="Processing input image...")
             print("Processing with input image")
         print(f"Starting generation with {num_inference_steps} steps...")
         # Generate image
         with torch.inference_mode():
-            result = pipeline(**pipe_kwargs)
             image = result.images[0]
         print("Generation complete!")
@@ -193,8 +211,8 @@ def generate_image(
         print(error_msg)
         # Provide more helpful error messages
-        if "CUDA" in str(e):
-            raise gr.Error(f"GPU Error: {str(e)}. The model requires GPU to run.")
         elif "token" in str(e).lower() or "401" in str(e):
             raise gr.Error("Authentication failed. Please ensure your HuggingFace token is set correctly.")
         elif "timeout" in str(e).lower():

 print("Starting Flux2 Image Generator...")
+# Load the pipeline at startup (NOT inside GPU decorator)
+print("Loading Flux2 pipeline...")
 pipe = None
+try:
+    pipe = Flux2Pipeline.from_pretrained(
+        repo_id,
+        text_encoder=None,
+        torch_dtype=torch_dtype,
+        device_map="balanced"  # Use balanced for CPU during startup
+    )
+    print("Pipeline loaded successfully!")
+except Exception as e:
+    print(f"Error loading pipeline: {e}")
+    # Don't raise - will try to load later if needed
 def remote_text_encoder(prompts):
     """Encode prompts using remote text encoder API."""
         # Method 1: From huggingface_hub
         try:
+            from huggingface_hub import HfFolder
+            token = HfFolder.get_token()
         except:
             pass
+        # Method 2: get_token from huggingface_hub
+        if not token:
+            try:
+                token = get_token()
+            except:
+                pass
+        # Method 3: From environment variable (Spaces sets this automatically)
         if not token:
             token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
+        # Method 4: From Spaces secrets
         if not token:
+            token = os.environ.get("SPACE_TOKEN") or os.environ.get("SPACES_TOKEN")
         if not token:
             raise ValueError(
+                "❌ HuggingFace token not found!\n\n"
+                "📝 To fix this:\n"
+                "1. Go to https://huggingface.co/settings/tokens\n"
+                "2. Create a token with 'read' access\n"
+                "3. In your Space settings, add a secret named 'HF_TOKEN' with your token value\n"
+                "4. Restart your Space\n\n"
+                "If running locally, use: huggingface-cli login"
             )
+        print(f"Token found: {token[:10]}... (length: {len(token)})")
         response = requests.post(
             "https://remote-text-encoder-flux-2.huggingface.co/predict",
             json={"prompt": prompts},
     except requests.HTTPError as e:
         if e.response.status_code == 401:
             raise Exception(
+                "❌ Authentication failed (401).\n\n"
+                "Your HuggingFace token may not have access to this model.\n"
                 "Please ensure your token has permission to access FLUX.2 models."
             )
         elif e.response.status_code == 403:
             raise Exception(
+                "❌ Access forbidden (403).\n\n"
+                "You may need to accept the model's license agreement on HuggingFace:\n"
+                "Visit: https://huggingface.co/black-forest-labs/FLUX.1-dev"
             )
         else:
             raise Exception(f"HTTP error {e.response.status_code}: {str(e)}")
     except Exception as e:
+        if "token" in str(e).lower():
+            raise  # Re-raise token errors as-is
         raise Exception(f"Failed to encode prompt: {str(e)}")
 def get_duration(prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=None):
     """Calculate dynamic GPU duration based on inference steps and input image."""
     num_images = 0 if input_image is None else 1
+    step_duration = 1.3 + 0.7 * num_images  # Increased from 1 to 1.3
+    # Add extra time for model transfer to GPU + generation
+    base_time = 30  # Time for moving model to GPU
+    generation_time = num_inference_steps * step_duration
+    return int(base_time + generation_time + 15)  # Extra 15s buffer
 @spaces.GPU(duration=get_duration)  # Dynamic GPU allocation
 def generate_image(
         guidance_scale: How closely to follow the prompt (higher = more strict)
         seed: Random seed for reproducibility (-1 for random)
     """
+    global pipe
     print(f"=== Starting generation ===")
     print(f"Prompt: {prompt[:100]}...")
     print(f"CUDA available: {torch.cuda.is_available()}")
     if not prompt or prompt.strip() == "":
         raise gr.Error("Please enter a prompt!")
+    progress(0, desc="Moving model to GPU...")
     try:
+        # Move pipeline to GPU
+        if pipe is None:
+            raise gr.Error("Pipeline not loaded. Please refresh the page.")
+        print("Moving pipeline to CUDA...")
+        pipe = pipe.to("cuda")
         progress(0.1, desc="Encoding prompt...")
         print("Encoding prompt...")
             print(f"Error encoding prompt: {str(e)}")
             raise gr.Error(f"Failed to encode prompt. Please check your HuggingFace token. Error: {str(e)}")
+        progress(0.2, desc="Generating image...")
         # Set up generator
         generator_device = "cuda" if torch.cuda.is_available() else "cpu"
         # Add input image if provided
         if input_image is not None:
             pipe_kwargs["image"] = input_image
+            progress(0.25, desc="Processing input image...")
             print("Processing with input image")
         print(f"Starting generation with {num_inference_steps} steps...")
         # Generate image
         with torch.inference_mode():
+            result = pipe(**pipe_kwargs)
             image = result.images[0]
         print("Generation complete!")
         print(error_msg)
         # Provide more helpful error messages
+        if "CUDA" in str(e) or "out of memory" in str(e).lower():
+            raise gr.Error(f"GPU Error: {str(e)}. Try reducing inference steps.")
         elif "token" in str(e).lower() or "401" in str(e):
             raise gr.Error("Authentication failed. Please ensure your HuggingFace token is set correctly.")
         elif "timeout" in str(e).lower():