Spaces:

rokmr
/

flux2.0

Running on Zero

App Files Files Community

flux2.0 / app.py

rokmr

progress bar fix

29ce2ba verified 18 days ago

raw

history blame contribute delete

13.9 kB

	import spaces # Import spaces FIRST, before any CUDA-related packages
	import torch
	from diffusers import Flux2Pipeline
	from huggingface_hub import get_token
	import requests
	import io
	import gradio as gr
	from PIL import Image
	import os

	# Configuration
	repo_id = "diffusers/FLUX.2-dev-bnb-4bit"
	torch_dtype = torch.bfloat16

	print("Starting Flux2 Image Generator...")

	# Load the pipeline at startup
	print("Loading Flux2 pipeline...")
	pipe = None

	def load_pipeline_startup():
	"""Load pipeline at startup without CUDA."""
	global pipe
	try:
	print("Loading pipeline components...")
	pipe = Flux2Pipeline.from_pretrained(
	repo_id,
	text_encoder=None,
	torch_dtype=torch_dtype,
	)
	# Keep on CPU initially - will move to CUDA when needed
	print("Pipeline loaded successfully on CPU!")
	except Exception as e:
	print(f"Warning: Could not load pipeline at startup: {e}")
	print("Pipeline will be loaded on first use.")

	# Try to load at startup
	load_pipeline_startup()

	def remote_text_encoder(prompts):
	"""Encode prompts using remote text encoder API."""
	try:
	# Try multiple methods to get the token
	token = None

	# Method 1: From huggingface_hub
	try:
	from huggingface_hub import HfFolder
	token = HfFolder.get_token()
	except:
	pass

	# Method 2: get_token from huggingface_hub
	if not token:
	try:
	token = get_token()
	except:
	pass

	# Method 3: From environment variable (Spaces sets this automatically)
	if not token:
	token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")

	# Method 4: From Spaces secrets
	if not token:
	token = os.environ.get("SPACE_TOKEN") or os.environ.get("SPACES_TOKEN")

	if not token:
	raise ValueError(
	"❌ HuggingFace token not found!\n\n"
	"📝 To fix this:\n"
	"1. Go to https://huggingface.co/settings/tokens\n"
	"2. Create a token with 'read' access\n"
	"3. In your Space settings, add a secret named 'HF_TOKEN' with your token value\n"
	"4. Restart your Space\n\n"
	"If running locally, use: huggingface-cli login"
	)

	print(f"Token found: {token[:10]}... (length: {len(token)})")

	response = requests.post(
	"https://remote-text-encoder-flux-2.huggingface.co/predict",
	json={"prompt": prompts},
	headers={
	"Authorization": f"Bearer {token}",
	"Content-Type": "application/json"
	},
	timeout=60
	)
	response.raise_for_status()
	prompt_embeds = torch.load(io.BytesIO(response.content))

	device = "cuda" if torch.cuda.is_available() else "cpu"
	return prompt_embeds.to(device)
	except requests.HTTPError as e:
	if e.response.status_code == 401:
	raise Exception(
	"❌ Authentication failed (401).\n\n"
	"Your HuggingFace token may not have access to this model.\n"
	"Please ensure your token has permission to access FLUX.2 models."
	)
	elif e.response.status_code == 403:
	raise Exception(
	"❌ Access forbidden (403).\n\n"
	"You may need to accept the model's license agreement on HuggingFace:\n"
	"Visit: https://huggingface.co/black-forest-labs/FLUX.1-dev"
	)
	else:
	raise Exception(f"HTTP error {e.response.status_code}: {str(e)}")
	except Exception as e:
	if "token" in str(e).lower():
	raise # Re-raise token errors as-is
	raise Exception(f"Failed to encode prompt: {str(e)}")

	def get_duration(prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=None):
	"""Calculate dynamic GPU duration based on inference steps and input image."""
	num_images = 0 if input_image is None else 1
	step_duration = 1.3 + 0.7 * num_images # Increased from 1 to 1.3
	# Add extra time for model transfer to GPU + generation
	base_time = 30 # Time for moving model to GPU
	generation_time = num_inference_steps * step_duration
	return int(base_time + generation_time + 15) # Extra 15s buffer

	@spaces.GPU(duration=get_duration) # Dynamic GPU allocation
	def generate_image(
	prompt: str,
	input_image: Image.Image = None,
	num_inference_steps: int = 28,
	guidance_scale: float = 4.0,
	seed: int = 42,
	progress=gr.Progress()
	):
	"""
	Generate an image using Flux2 based on text prompt and optional input image.

	Args:
	prompt: Text description of the desired image
	input_image: Optional input image for image-to-image generation
	num_inference_steps: Number of denoising steps (higher = better quality but slower)
	guidance_scale: How closely to follow the prompt (higher = more strict)
	seed: Random seed for reproducibility (-1 for random)
	"""
	global pipe

	print(f"=== Starting generation ===")
	print(f"Prompt: {prompt[:100]}...")
	print(f"CUDA available: {torch.cuda.is_available()}")

	if not prompt or prompt.strip() == "":
	raise gr.Error("Please enter a prompt!")

	progress(0, desc="Moving model to GPU...")

	try:
	# Load or get pipeline
	if pipe is None:
	print("Pipeline not loaded at startup, loading now...")
	load_pipeline_startup()
	if pipe is None:
	raise gr.Error("Failed to load pipeline. Please try again or contact support.")

	print("Moving pipeline to CUDA...")
	pipeline = pipe.to("cuda")
	torch.cuda.empty_cache() # Clear cache before generation

	progress(0.1, desc="Encoding prompt...")
	print("Encoding prompt...")

	# Get prompt embeddings from remote encoder
	try:
	prompt_embeds = remote_text_encoder(prompt)
	print(f"Prompt embeds shape: {prompt_embeds.shape}")
	except Exception as e:
	print(f"Error encoding prompt: {str(e)}")
	raise gr.Error(f"Failed to encode prompt. Please check your HuggingFace token. Error: {str(e)}")

	progress(0.2, desc="Generating image...")

	# Set up generator
	generator_device = "cuda" if torch.cuda.is_available() else "cpu"
	print(f"Generator device: {generator_device}")

	if seed == -1:
	import random
	seed = random.randint(0, 2**32 - 1)

	print(f"Using seed: {seed}")
	generator = torch.Generator(device=generator_device).manual_seed(int(seed))

	# Prepare pipeline arguments
	pipe_kwargs = {
	"prompt_embeds": prompt_embeds,
	"generator": generator,
	"num_inference_steps": int(num_inference_steps),
	"guidance_scale": float(guidance_scale),
	}

	# Add input image if provided
	if input_image is not None:
	pipe_kwargs["image"] = input_image
	progress(0.25, desc="Processing input image...")
	print("Processing with input image")

	print(f"Starting generation with {num_inference_steps} steps...")

	# Custom callback for progress updates
	def progress_callback(pipe, step, timestep, callback_kwargs):
	progress((0.2 + (step / num_inference_steps) * 0.75),
	desc=f"Generating... Step {step}/{num_inference_steps}")
	return callback_kwargs

	# Generate image
	with torch.inference_mode():
	result = pipeline(
	**pipe_kwargs,
	callback_on_step_end=progress_callback,
	)
	image = result.images[0]

	print("Generation complete!")
	progress(1.0, desc="Done!")

	# Move pipeline back to CPU to free GPU memory
	print("Moving pipeline back to CPU...")
	pipe.to("cpu")
	torch.cuda.empty_cache()

	return image

	except gr.Error:
	# Re-raise Gradio errors as-is
	raise
	except Exception as e:
	import traceback
	error_msg = f"Error generating image: {str(e)}\n{traceback.format_exc()}"
	print(error_msg)

	# Provide more helpful error messages
	if "CUDA" in str(e) or "out of memory" in str(e).lower():
	raise gr.Error(f"GPU Error: {str(e)}. Try reducing inference steps.")
	elif "token" in str(e).lower() or "401" in str(e):
	raise gr.Error("Authentication failed. Please ensure your HuggingFace token is set correctly.")
	elif "timeout" in str(e).lower():
	raise gr.Error("Request timed out. Please try again.")
	else:
	raise gr.Error(f"Error: {str(e)}")


	# Create Gradio interface
	with gr.Blocks(
	title="Flux2 Image Generator",
	) as demo:
	gr.Markdown(
	"""
	# 🎨 Flux2 Image Generator
	Generate stunning images using FLUX.2-dev with 4-bit quantization for efficient inference.

	Supports both text-to-image and image-to-image generation.

	⚡ Powered by Hugging Face Zero GPU - Automatic GPU allocation on demand!
	"""
	)

	with gr.Row():
	with gr.Column(scale=1):
	gr.Markdown("### 📝 Input")

	prompt_input = gr.Textbox(
	label="Prompt",
	placeholder="Describe the image you want to generate...",
	lines=4,
	value="A cozy coffee shop scene on a rainy afternoon, warm lighting streaming through large windows with raindrops, a steaming cup of coffee on a wooden table with latte art, blurred background showing bookshelves and soft bokeh lights, photorealistic, cinematic composition, shallow depth of field"
	)

	image_input = gr.Image(
	label="Input Image (Optional)",
	type="pil",
	sources=["upload", "clipboard"],
	height=300
	)

	gr.Markdown("### ⚙️ Parameters")

	with gr.Row():
	num_steps = gr.Slider(
	minimum=1,
	maximum=100,
	value=28,
	step=1,
	label="Inference Steps",
	info="More steps = better quality but slower"
	)

	guidance = gr.Slider(
	minimum=1.0,
	maximum=15.0,
	value=4.0,
	step=0.5,
	label="Guidance Scale",
	info="How closely to follow the prompt"
	)

	seed_input = gr.Number(
	label="Seed",
	value=42,
	precision=0,
	info="Use -1 for random seed"
	)

	generate_btn = gr.Button(
	"🚀 Generate Image",
	variant="primary",
	size="lg",
	)

	gr.Markdown(
	"""
	### 💡 Tips
	- Text-to-Image: Just enter a prompt and click generate
	- Image-to-Image: Upload an image and describe the changes
	- Start with 28 steps for a good balance of quality and speed
	- Higher guidance scale follows your prompt more strictly
	- Use the same seed to reproduce results
	- First generation may take longer as the model loads
	"""
	)

	with gr.Column(scale=1):
	gr.Markdown("### 🖼️ Output")

	output_image = gr.Image(
	label="Generated Image",
	type="pil",
	height=600
	)

	gr.Markdown(
	"""
	### 📊 Examples
	Try these prompts for inspiration!
	"""
	)

	# Examples
	gr.Examples(
	examples=[
	[
	"A serene landscape with mountains at sunset, vibrant orange and pink sky, reflected in a calm lake, photorealistic",
	None,
	28,
	4.0,
	42
	],
	[
	"A futuristic cityscape at night, neon lights, flying cars, cyberpunk style, highly detailed",
	None,
	28,
	4.0,
	123
	],
	[
	"A cute robot reading a book in a cozy library, warm lighting, digital art style",
	None,
	28,
	4.0,
	456
	],
	[
	"Macro photography of a dew drop on a leaf, morning light, sharp focus, bokeh background",
	None,
	28,
	4.0,
	789
	],
	],
	inputs=[prompt_input, image_input, num_steps, guidance, seed_input],
	outputs=output_image,
	cache_examples=False,
	)

	# Connect the generate button
	generate_btn.click(
	fn=generate_image,
	inputs=[prompt_input, image_input, num_steps, guidance, seed_input],
	outputs=output_image,
	)

	if __name__ == "__main__":
	print("Launching Gradio interface...")
	demo.queue(max_size=20).launch()