flux2.0 / app.py
rokmr's picture
progress bar fix
29ce2ba verified
import spaces # Import spaces FIRST, before any CUDA-related packages
import torch
from diffusers import Flux2Pipeline
from huggingface_hub import get_token
import requests
import io
import gradio as gr
from PIL import Image
import os
# Configuration
repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
torch_dtype = torch.bfloat16
print("Starting Flux2 Image Generator...")
# Load the pipeline at startup
print("Loading Flux2 pipeline...")
pipe = None
def load_pipeline_startup():
"""Load pipeline at startup without CUDA."""
global pipe
try:
print("Loading pipeline components...")
pipe = Flux2Pipeline.from_pretrained(
repo_id,
text_encoder=None,
torch_dtype=torch_dtype,
)
# Keep on CPU initially - will move to CUDA when needed
print("Pipeline loaded successfully on CPU!")
except Exception as e:
print(f"Warning: Could not load pipeline at startup: {e}")
print("Pipeline will be loaded on first use.")
# Try to load at startup
load_pipeline_startup()
def remote_text_encoder(prompts):
"""Encode prompts using remote text encoder API."""
try:
# Try multiple methods to get the token
token = None
# Method 1: From huggingface_hub
try:
from huggingface_hub import HfFolder
token = HfFolder.get_token()
except:
pass
# Method 2: get_token from huggingface_hub
if not token:
try:
token = get_token()
except:
pass
# Method 3: From environment variable (Spaces sets this automatically)
if not token:
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
# Method 4: From Spaces secrets
if not token:
token = os.environ.get("SPACE_TOKEN") or os.environ.get("SPACES_TOKEN")
if not token:
raise ValueError(
"❌ HuggingFace token not found!\n\n"
"📝 To fix this:\n"
"1. Go to https://huggingface.co/settings/tokens\n"
"2. Create a token with 'read' access\n"
"3. In your Space settings, add a secret named 'HF_TOKEN' with your token value\n"
"4. Restart your Space\n\n"
"If running locally, use: huggingface-cli login"
)
print(f"Token found: {token[:10]}... (length: {len(token)})")
response = requests.post(
"https://remote-text-encoder-flux-2.huggingface.co/predict",
json={"prompt": prompts},
headers={
"Authorization": f"Bearer {token}",
"Content-Type": "application/json"
},
timeout=60
)
response.raise_for_status()
prompt_embeds = torch.load(io.BytesIO(response.content))
device = "cuda" if torch.cuda.is_available() else "cpu"
return prompt_embeds.to(device)
except requests.HTTPError as e:
if e.response.status_code == 401:
raise Exception(
"❌ Authentication failed (401).\n\n"
"Your HuggingFace token may not have access to this model.\n"
"Please ensure your token has permission to access FLUX.2 models."
)
elif e.response.status_code == 403:
raise Exception(
"❌ Access forbidden (403).\n\n"
"You may need to accept the model's license agreement on HuggingFace:\n"
"Visit: https://huggingface.co/black-forest-labs/FLUX.1-dev"
)
else:
raise Exception(f"HTTP error {e.response.status_code}: {str(e)}")
except Exception as e:
if "token" in str(e).lower():
raise # Re-raise token errors as-is
raise Exception(f"Failed to encode prompt: {str(e)}")
def get_duration(prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=None):
"""Calculate dynamic GPU duration based on inference steps and input image."""
num_images = 0 if input_image is None else 1
step_duration = 1.3 + 0.7 * num_images # Increased from 1 to 1.3
# Add extra time for model transfer to GPU + generation
base_time = 30 # Time for moving model to GPU
generation_time = num_inference_steps * step_duration
return int(base_time + generation_time + 15) # Extra 15s buffer
@spaces.GPU(duration=get_duration) # Dynamic GPU allocation
def generate_image(
prompt: str,
input_image: Image.Image = None,
num_inference_steps: int = 28,
guidance_scale: float = 4.0,
seed: int = 42,
progress=gr.Progress()
):
"""
Generate an image using Flux2 based on text prompt and optional input image.
Args:
prompt: Text description of the desired image
input_image: Optional input image for image-to-image generation
num_inference_steps: Number of denoising steps (higher = better quality but slower)
guidance_scale: How closely to follow the prompt (higher = more strict)
seed: Random seed for reproducibility (-1 for random)
"""
global pipe
print(f"=== Starting generation ===")
print(f"Prompt: {prompt[:100]}...")
print(f"CUDA available: {torch.cuda.is_available()}")
if not prompt or prompt.strip() == "":
raise gr.Error("Please enter a prompt!")
progress(0, desc="Moving model to GPU...")
try:
# Load or get pipeline
if pipe is None:
print("Pipeline not loaded at startup, loading now...")
load_pipeline_startup()
if pipe is None:
raise gr.Error("Failed to load pipeline. Please try again or contact support.")
print("Moving pipeline to CUDA...")
pipeline = pipe.to("cuda")
torch.cuda.empty_cache() # Clear cache before generation
progress(0.1, desc="Encoding prompt...")
print("Encoding prompt...")
# Get prompt embeddings from remote encoder
try:
prompt_embeds = remote_text_encoder(prompt)
print(f"Prompt embeds shape: {prompt_embeds.shape}")
except Exception as e:
print(f"Error encoding prompt: {str(e)}")
raise gr.Error(f"Failed to encode prompt. Please check your HuggingFace token. Error: {str(e)}")
progress(0.2, desc="Generating image...")
# Set up generator
generator_device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Generator device: {generator_device}")
if seed == -1:
import random
seed = random.randint(0, 2**32 - 1)
print(f"Using seed: {seed}")
generator = torch.Generator(device=generator_device).manual_seed(int(seed))
# Prepare pipeline arguments
pipe_kwargs = {
"prompt_embeds": prompt_embeds,
"generator": generator,
"num_inference_steps": int(num_inference_steps),
"guidance_scale": float(guidance_scale),
}
# Add input image if provided
if input_image is not None:
pipe_kwargs["image"] = input_image
progress(0.25, desc="Processing input image...")
print("Processing with input image")
print(f"Starting generation with {num_inference_steps} steps...")
# Custom callback for progress updates
def progress_callback(pipe, step, timestep, callback_kwargs):
progress((0.2 + (step / num_inference_steps) * 0.75),
desc=f"Generating... Step {step}/{num_inference_steps}")
return callback_kwargs
# Generate image
with torch.inference_mode():
result = pipeline(
**pipe_kwargs,
callback_on_step_end=progress_callback,
)
image = result.images[0]
print("Generation complete!")
progress(1.0, desc="Done!")
# Move pipeline back to CPU to free GPU memory
print("Moving pipeline back to CPU...")
pipe.to("cpu")
torch.cuda.empty_cache()
return image
except gr.Error:
# Re-raise Gradio errors as-is
raise
except Exception as e:
import traceback
error_msg = f"Error generating image: {str(e)}\n{traceback.format_exc()}"
print(error_msg)
# Provide more helpful error messages
if "CUDA" in str(e) or "out of memory" in str(e).lower():
raise gr.Error(f"GPU Error: {str(e)}. Try reducing inference steps.")
elif "token" in str(e).lower() or "401" in str(e):
raise gr.Error("Authentication failed. Please ensure your HuggingFace token is set correctly.")
elif "timeout" in str(e).lower():
raise gr.Error("Request timed out. Please try again.")
else:
raise gr.Error(f"Error: {str(e)}")
# Create Gradio interface
with gr.Blocks(
title="Flux2 Image Generator",
) as demo:
gr.Markdown(
"""
# 🎨 Flux2 Image Generator
Generate stunning images using **FLUX.2-dev** with 4-bit quantization for efficient inference.
Supports both **text-to-image** and **image-to-image** generation.
⚡ **Powered by Hugging Face Zero GPU** - Automatic GPU allocation on demand!
"""
)
with gr.Row():
with gr.Column(scale=1):
gr.Markdown("### 📝 Input")
prompt_input = gr.Textbox(
label="Prompt",
placeholder="Describe the image you want to generate...",
lines=4,
value="A cozy coffee shop scene on a rainy afternoon, warm lighting streaming through large windows with raindrops, a steaming cup of coffee on a wooden table with latte art, blurred background showing bookshelves and soft bokeh lights, photorealistic, cinematic composition, shallow depth of field"
)
image_input = gr.Image(
label="Input Image (Optional)",
type="pil",
sources=["upload", "clipboard"],
height=300
)
gr.Markdown("### ⚙️ Parameters")
with gr.Row():
num_steps = gr.Slider(
minimum=1,
maximum=100,
value=28,
step=1,
label="Inference Steps",
info="More steps = better quality but slower"
)
guidance = gr.Slider(
minimum=1.0,
maximum=15.0,
value=4.0,
step=0.5,
label="Guidance Scale",
info="How closely to follow the prompt"
)
seed_input = gr.Number(
label="Seed",
value=42,
precision=0,
info="Use -1 for random seed"
)
generate_btn = gr.Button(
"🚀 Generate Image",
variant="primary",
size="lg",
)
gr.Markdown(
"""
### 💡 Tips
- **Text-to-Image**: Just enter a prompt and click generate
- **Image-to-Image**: Upload an image and describe the changes
- Start with 28 steps for a good balance of quality and speed
- Higher guidance scale follows your prompt more strictly
- Use the same seed to reproduce results
- First generation may take longer as the model loads
"""
)
with gr.Column(scale=1):
gr.Markdown("### 🖼️ Output")
output_image = gr.Image(
label="Generated Image",
type="pil",
height=600
)
gr.Markdown(
"""
### 📊 Examples
Try these prompts for inspiration!
"""
)
# Examples
gr.Examples(
examples=[
[
"A serene landscape with mountains at sunset, vibrant orange and pink sky, reflected in a calm lake, photorealistic",
None,
28,
4.0,
42
],
[
"A futuristic cityscape at night, neon lights, flying cars, cyberpunk style, highly detailed",
None,
28,
4.0,
123
],
[
"A cute robot reading a book in a cozy library, warm lighting, digital art style",
None,
28,
4.0,
456
],
[
"Macro photography of a dew drop on a leaf, morning light, sharp focus, bokeh background",
None,
28,
4.0,
789
],
],
inputs=[prompt_input, image_input, num_steps, guidance, seed_input],
outputs=output_image,
cache_examples=False,
)
# Connect the generate button
generate_btn.click(
fn=generate_image,
inputs=[prompt_input, image_input, num_steps, guidance, seed_input],
outputs=output_image,
)
if __name__ == "__main__":
print("Launching Gradio interface...")
demo.queue(max_size=20).launch()