import spaces # Import spaces FIRST, before any CUDA-related packages import torch from diffusers import Flux2Pipeline from huggingface_hub import get_token import requests import io import gradio as gr from PIL import Image import os # Configuration repo_id = "diffusers/FLUX.2-dev-bnb-4bit" torch_dtype = torch.bfloat16 print("Starting Flux2 Image Generator...") # Load the pipeline at startup print("Loading Flux2 pipeline...") pipe = None def load_pipeline_startup(): """Load pipeline at startup without CUDA.""" global pipe try: print("Loading pipeline components...") pipe = Flux2Pipeline.from_pretrained( repo_id, text_encoder=None, torch_dtype=torch_dtype, ) # Keep on CPU initially - will move to CUDA when needed print("Pipeline loaded successfully on CPU!") except Exception as e: print(f"Warning: Could not load pipeline at startup: {e}") print("Pipeline will be loaded on first use.") # Try to load at startup load_pipeline_startup() def remote_text_encoder(prompts): """Encode prompts using remote text encoder API.""" try: # Try multiple methods to get the token token = None # Method 1: From huggingface_hub try: from huggingface_hub import HfFolder token = HfFolder.get_token() except: pass # Method 2: get_token from huggingface_hub if not token: try: token = get_token() except: pass # Method 3: From environment variable (Spaces sets this automatically) if not token: token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") # Method 4: From Spaces secrets if not token: token = os.environ.get("SPACE_TOKEN") or os.environ.get("SPACES_TOKEN") if not token: raise ValueError( "❌ HuggingFace token not found!\n\n" "📝 To fix this:\n" "1. Go to https://huggingface.co/settings/tokens\n" "2. Create a token with 'read' access\n" "3. In your Space settings, add a secret named 'HF_TOKEN' with your token value\n" "4. Restart your Space\n\n" "If running locally, use: huggingface-cli login" ) print(f"Token found: {token[:10]}... (length: {len(token)})") response = requests.post( "https://remote-text-encoder-flux-2.huggingface.co/predict", json={"prompt": prompts}, headers={ "Authorization": f"Bearer {token}", "Content-Type": "application/json" }, timeout=60 ) response.raise_for_status() prompt_embeds = torch.load(io.BytesIO(response.content)) device = "cuda" if torch.cuda.is_available() else "cpu" return prompt_embeds.to(device) except requests.HTTPError as e: if e.response.status_code == 401: raise Exception( "❌ Authentication failed (401).\n\n" "Your HuggingFace token may not have access to this model.\n" "Please ensure your token has permission to access FLUX.2 models." ) elif e.response.status_code == 403: raise Exception( "❌ Access forbidden (403).\n\n" "You may need to accept the model's license agreement on HuggingFace:\n" "Visit: https://huggingface.co/black-forest-labs/FLUX.1-dev" ) else: raise Exception(f"HTTP error {e.response.status_code}: {str(e)}") except Exception as e: if "token" in str(e).lower(): raise # Re-raise token errors as-is raise Exception(f"Failed to encode prompt: {str(e)}") def get_duration(prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=None): """Calculate dynamic GPU duration based on inference steps and input image.""" num_images = 0 if input_image is None else 1 step_duration = 1.3 + 0.7 * num_images # Increased from 1 to 1.3 # Add extra time for model transfer to GPU + generation base_time = 30 # Time for moving model to GPU generation_time = num_inference_steps * step_duration return int(base_time + generation_time + 15) # Extra 15s buffer @spaces.GPU(duration=get_duration) # Dynamic GPU allocation def generate_image( prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=gr.Progress() ): """ Generate an image using Flux2 based on text prompt and optional input image. Args: prompt: Text description of the desired image input_image: Optional input image for image-to-image generation num_inference_steps: Number of denoising steps (higher = better quality but slower) guidance_scale: How closely to follow the prompt (higher = more strict) seed: Random seed for reproducibility (-1 for random) """ global pipe print(f"=== Starting generation ===") print(f"Prompt: {prompt[:100]}...") print(f"CUDA available: {torch.cuda.is_available()}") if not prompt or prompt.strip() == "": raise gr.Error("Please enter a prompt!") progress(0, desc="Moving model to GPU...") try: # Load or get pipeline if pipe is None: print("Pipeline not loaded at startup, loading now...") load_pipeline_startup() if pipe is None: raise gr.Error("Failed to load pipeline. Please try again or contact support.") print("Moving pipeline to CUDA...") pipeline = pipe.to("cuda") torch.cuda.empty_cache() # Clear cache before generation progress(0.1, desc="Encoding prompt...") print("Encoding prompt...") # Get prompt embeddings from remote encoder try: prompt_embeds = remote_text_encoder(prompt) print(f"Prompt embeds shape: {prompt_embeds.shape}") except Exception as e: print(f"Error encoding prompt: {str(e)}") raise gr.Error(f"Failed to encode prompt. Please check your HuggingFace token. Error: {str(e)}") progress(0.2, desc="Generating image...") # Set up generator generator_device = "cuda" if torch.cuda.is_available() else "cpu" print(f"Generator device: {generator_device}") if seed == -1: import random seed = random.randint(0, 2**32 - 1) print(f"Using seed: {seed}") generator = torch.Generator(device=generator_device).manual_seed(int(seed)) # Prepare pipeline arguments pipe_kwargs = { "prompt_embeds": prompt_embeds, "generator": generator, "num_inference_steps": int(num_inference_steps), "guidance_scale": float(guidance_scale), } # Add input image if provided if input_image is not None: pipe_kwargs["image"] = input_image progress(0.25, desc="Processing input image...") print("Processing with input image") print(f"Starting generation with {num_inference_steps} steps...") # Custom callback for progress updates def progress_callback(pipe, step, timestep, callback_kwargs): progress((0.2 + (step / num_inference_steps) * 0.75), desc=f"Generating... Step {step}/{num_inference_steps}") return callback_kwargs # Generate image with torch.inference_mode(): result = pipeline( **pipe_kwargs, callback_on_step_end=progress_callback, ) image = result.images[0] print("Generation complete!") progress(1.0, desc="Done!") # Move pipeline back to CPU to free GPU memory print("Moving pipeline back to CPU...") pipe.to("cpu") torch.cuda.empty_cache() return image except gr.Error: # Re-raise Gradio errors as-is raise except Exception as e: import traceback error_msg = f"Error generating image: {str(e)}\n{traceback.format_exc()}" print(error_msg) # Provide more helpful error messages if "CUDA" in str(e) or "out of memory" in str(e).lower(): raise gr.Error(f"GPU Error: {str(e)}. Try reducing inference steps.") elif "token" in str(e).lower() or "401" in str(e): raise gr.Error("Authentication failed. Please ensure your HuggingFace token is set correctly.") elif "timeout" in str(e).lower(): raise gr.Error("Request timed out. Please try again.") else: raise gr.Error(f"Error: {str(e)}") # Create Gradio interface with gr.Blocks( title="Flux2 Image Generator", ) as demo: gr.Markdown( """ # 🎨 Flux2 Image Generator Generate stunning images using **FLUX.2-dev** with 4-bit quantization for efficient inference. Supports both **text-to-image** and **image-to-image** generation. ⚡ **Powered by Hugging Face Zero GPU** - Automatic GPU allocation on demand! """ ) with gr.Row(): with gr.Column(scale=1): gr.Markdown("### 📝 Input") prompt_input = gr.Textbox( label="Prompt", placeholder="Describe the image you want to generate...", lines=4, value="A cozy coffee shop scene on a rainy afternoon, warm lighting streaming through large windows with raindrops, a steaming cup of coffee on a wooden table with latte art, blurred background showing bookshelves and soft bokeh lights, photorealistic, cinematic composition, shallow depth of field" ) image_input = gr.Image( label="Input Image (Optional)", type="pil", sources=["upload", "clipboard"], height=300 ) gr.Markdown("### ⚙️ Parameters") with gr.Row(): num_steps = gr.Slider( minimum=1, maximum=100, value=28, step=1, label="Inference Steps", info="More steps = better quality but slower" ) guidance = gr.Slider( minimum=1.0, maximum=15.0, value=4.0, step=0.5, label="Guidance Scale", info="How closely to follow the prompt" ) seed_input = gr.Number( label="Seed", value=42, precision=0, info="Use -1 for random seed" ) generate_btn = gr.Button( "🚀 Generate Image", variant="primary", size="lg", ) gr.Markdown( """ ### 💡 Tips - **Text-to-Image**: Just enter a prompt and click generate - **Image-to-Image**: Upload an image and describe the changes - Start with 28 steps for a good balance of quality and speed - Higher guidance scale follows your prompt more strictly - Use the same seed to reproduce results - First generation may take longer as the model loads """ ) with gr.Column(scale=1): gr.Markdown("### 🖼️ Output") output_image = gr.Image( label="Generated Image", type="pil", height=600 ) gr.Markdown( """ ### 📊 Examples Try these prompts for inspiration! """ ) # Examples gr.Examples( examples=[ [ "A serene landscape with mountains at sunset, vibrant orange and pink sky, reflected in a calm lake, photorealistic", None, 28, 4.0, 42 ], [ "A futuristic cityscape at night, neon lights, flying cars, cyberpunk style, highly detailed", None, 28, 4.0, 123 ], [ "A cute robot reading a book in a cozy library, warm lighting, digital art style", None, 28, 4.0, 456 ], [ "Macro photography of a dew drop on a leaf, morning light, sharp focus, bokeh background", None, 28, 4.0, 789 ], ], inputs=[prompt_input, image_input, num_steps, guidance, seed_input], outputs=output_image, cache_examples=False, ) # Connect the generate button generate_btn.click( fn=generate_image, inputs=[prompt_input, image_input, num_steps, guidance, seed_input], outputs=output_image, ) if __name__ == "__main__": print("Launching Gradio interface...") demo.queue(max_size=20).launch()