|
|
import spaces |
|
|
import torch |
|
|
from diffusers import Flux2Pipeline |
|
|
from huggingface_hub import get_token |
|
|
import requests |
|
|
import io |
|
|
import gradio as gr |
|
|
from PIL import Image |
|
|
import os |
|
|
|
|
|
|
|
|
repo_id = "diffusers/FLUX.2-dev-bnb-4bit" |
|
|
torch_dtype = torch.bfloat16 |
|
|
|
|
|
print("Starting Flux2 Image Generator...") |
|
|
|
|
|
|
|
|
print("Loading Flux2 pipeline...") |
|
|
pipe = None |
|
|
|
|
|
def load_pipeline_startup(): |
|
|
"""Load pipeline at startup without CUDA.""" |
|
|
global pipe |
|
|
try: |
|
|
print("Loading pipeline components...") |
|
|
pipe = Flux2Pipeline.from_pretrained( |
|
|
repo_id, |
|
|
text_encoder=None, |
|
|
torch_dtype=torch_dtype, |
|
|
) |
|
|
|
|
|
print("Pipeline loaded successfully on CPU!") |
|
|
except Exception as e: |
|
|
print(f"Warning: Could not load pipeline at startup: {e}") |
|
|
print("Pipeline will be loaded on first use.") |
|
|
|
|
|
|
|
|
load_pipeline_startup() |
|
|
|
|
|
def remote_text_encoder(prompts): |
|
|
"""Encode prompts using remote text encoder API.""" |
|
|
try: |
|
|
|
|
|
token = None |
|
|
|
|
|
|
|
|
try: |
|
|
from huggingface_hub import HfFolder |
|
|
token = HfFolder.get_token() |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
if not token: |
|
|
try: |
|
|
token = get_token() |
|
|
except: |
|
|
pass |
|
|
|
|
|
|
|
|
if not token: |
|
|
token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN") |
|
|
|
|
|
|
|
|
if not token: |
|
|
token = os.environ.get("SPACE_TOKEN") or os.environ.get("SPACES_TOKEN") |
|
|
|
|
|
if not token: |
|
|
raise ValueError( |
|
|
"❌ HuggingFace token not found!\n\n" |
|
|
"📝 To fix this:\n" |
|
|
"1. Go to https://huggingface.co/settings/tokens\n" |
|
|
"2. Create a token with 'read' access\n" |
|
|
"3. In your Space settings, add a secret named 'HF_TOKEN' with your token value\n" |
|
|
"4. Restart your Space\n\n" |
|
|
"If running locally, use: huggingface-cli login" |
|
|
) |
|
|
|
|
|
print(f"Token found: {token[:10]}... (length: {len(token)})") |
|
|
|
|
|
response = requests.post( |
|
|
"https://remote-text-encoder-flux-2.huggingface.co/predict", |
|
|
json={"prompt": prompts}, |
|
|
headers={ |
|
|
"Authorization": f"Bearer {token}", |
|
|
"Content-Type": "application/json" |
|
|
}, |
|
|
timeout=60 |
|
|
) |
|
|
response.raise_for_status() |
|
|
prompt_embeds = torch.load(io.BytesIO(response.content)) |
|
|
|
|
|
device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
return prompt_embeds.to(device) |
|
|
except requests.HTTPError as e: |
|
|
if e.response.status_code == 401: |
|
|
raise Exception( |
|
|
"❌ Authentication failed (401).\n\n" |
|
|
"Your HuggingFace token may not have access to this model.\n" |
|
|
"Please ensure your token has permission to access FLUX.2 models." |
|
|
) |
|
|
elif e.response.status_code == 403: |
|
|
raise Exception( |
|
|
"❌ Access forbidden (403).\n\n" |
|
|
"You may need to accept the model's license agreement on HuggingFace:\n" |
|
|
"Visit: https://huggingface.co/black-forest-labs/FLUX.1-dev" |
|
|
) |
|
|
else: |
|
|
raise Exception(f"HTTP error {e.response.status_code}: {str(e)}") |
|
|
except Exception as e: |
|
|
if "token" in str(e).lower(): |
|
|
raise |
|
|
raise Exception(f"Failed to encode prompt: {str(e)}") |
|
|
|
|
|
def get_duration(prompt: str, input_image: Image.Image = None, num_inference_steps: int = 28, guidance_scale: float = 4.0, seed: int = 42, progress=None): |
|
|
"""Calculate dynamic GPU duration based on inference steps and input image.""" |
|
|
num_images = 0 if input_image is None else 1 |
|
|
step_duration = 1.3 + 0.7 * num_images |
|
|
|
|
|
base_time = 30 |
|
|
generation_time = num_inference_steps * step_duration |
|
|
return int(base_time + generation_time + 15) |
|
|
|
|
|
@spaces.GPU(duration=get_duration) |
|
|
def generate_image( |
|
|
prompt: str, |
|
|
input_image: Image.Image = None, |
|
|
num_inference_steps: int = 28, |
|
|
guidance_scale: float = 4.0, |
|
|
seed: int = 42, |
|
|
progress=gr.Progress() |
|
|
): |
|
|
""" |
|
|
Generate an image using Flux2 based on text prompt and optional input image. |
|
|
|
|
|
Args: |
|
|
prompt: Text description of the desired image |
|
|
input_image: Optional input image for image-to-image generation |
|
|
num_inference_steps: Number of denoising steps (higher = better quality but slower) |
|
|
guidance_scale: How closely to follow the prompt (higher = more strict) |
|
|
seed: Random seed for reproducibility (-1 for random) |
|
|
""" |
|
|
global pipe |
|
|
|
|
|
print(f"=== Starting generation ===") |
|
|
print(f"Prompt: {prompt[:100]}...") |
|
|
print(f"CUDA available: {torch.cuda.is_available()}") |
|
|
|
|
|
if not prompt or prompt.strip() == "": |
|
|
raise gr.Error("Please enter a prompt!") |
|
|
|
|
|
progress(0, desc="Moving model to GPU...") |
|
|
|
|
|
try: |
|
|
|
|
|
if pipe is None: |
|
|
print("Pipeline not loaded at startup, loading now...") |
|
|
load_pipeline_startup() |
|
|
if pipe is None: |
|
|
raise gr.Error("Failed to load pipeline. Please try again or contact support.") |
|
|
|
|
|
print("Moving pipeline to CUDA...") |
|
|
pipeline = pipe.to("cuda") |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
progress(0.1, desc="Encoding prompt...") |
|
|
print("Encoding prompt...") |
|
|
|
|
|
|
|
|
try: |
|
|
prompt_embeds = remote_text_encoder(prompt) |
|
|
print(f"Prompt embeds shape: {prompt_embeds.shape}") |
|
|
except Exception as e: |
|
|
print(f"Error encoding prompt: {str(e)}") |
|
|
raise gr.Error(f"Failed to encode prompt. Please check your HuggingFace token. Error: {str(e)}") |
|
|
|
|
|
progress(0.2, desc="Generating image...") |
|
|
|
|
|
|
|
|
generator_device = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Generator device: {generator_device}") |
|
|
|
|
|
if seed == -1: |
|
|
import random |
|
|
seed = random.randint(0, 2**32 - 1) |
|
|
|
|
|
print(f"Using seed: {seed}") |
|
|
generator = torch.Generator(device=generator_device).manual_seed(int(seed)) |
|
|
|
|
|
|
|
|
pipe_kwargs = { |
|
|
"prompt_embeds": prompt_embeds, |
|
|
"generator": generator, |
|
|
"num_inference_steps": int(num_inference_steps), |
|
|
"guidance_scale": float(guidance_scale), |
|
|
} |
|
|
|
|
|
|
|
|
if input_image is not None: |
|
|
pipe_kwargs["image"] = input_image |
|
|
progress(0.25, desc="Processing input image...") |
|
|
print("Processing with input image") |
|
|
|
|
|
print(f"Starting generation with {num_inference_steps} steps...") |
|
|
|
|
|
|
|
|
def progress_callback(pipe, step, timestep, callback_kwargs): |
|
|
progress((0.2 + (step / num_inference_steps) * 0.75), |
|
|
desc=f"Generating... Step {step}/{num_inference_steps}") |
|
|
return callback_kwargs |
|
|
|
|
|
|
|
|
with torch.inference_mode(): |
|
|
result = pipeline( |
|
|
**pipe_kwargs, |
|
|
callback_on_step_end=progress_callback, |
|
|
) |
|
|
image = result.images[0] |
|
|
|
|
|
print("Generation complete!") |
|
|
progress(1.0, desc="Done!") |
|
|
|
|
|
|
|
|
print("Moving pipeline back to CPU...") |
|
|
pipe.to("cpu") |
|
|
torch.cuda.empty_cache() |
|
|
|
|
|
return image |
|
|
|
|
|
except gr.Error: |
|
|
|
|
|
raise |
|
|
except Exception as e: |
|
|
import traceback |
|
|
error_msg = f"Error generating image: {str(e)}\n{traceback.format_exc()}" |
|
|
print(error_msg) |
|
|
|
|
|
|
|
|
if "CUDA" in str(e) or "out of memory" in str(e).lower(): |
|
|
raise gr.Error(f"GPU Error: {str(e)}. Try reducing inference steps.") |
|
|
elif "token" in str(e).lower() or "401" in str(e): |
|
|
raise gr.Error("Authentication failed. Please ensure your HuggingFace token is set correctly.") |
|
|
elif "timeout" in str(e).lower(): |
|
|
raise gr.Error("Request timed out. Please try again.") |
|
|
else: |
|
|
raise gr.Error(f"Error: {str(e)}") |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks( |
|
|
title="Flux2 Image Generator", |
|
|
) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# 🎨 Flux2 Image Generator |
|
|
Generate stunning images using **FLUX.2-dev** with 4-bit quantization for efficient inference. |
|
|
|
|
|
Supports both **text-to-image** and **image-to-image** generation. |
|
|
|
|
|
⚡ **Powered by Hugging Face Zero GPU** - Automatic GPU allocation on demand! |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### 📝 Input") |
|
|
|
|
|
prompt_input = gr.Textbox( |
|
|
label="Prompt", |
|
|
placeholder="Describe the image you want to generate...", |
|
|
lines=4, |
|
|
value="A cozy coffee shop scene on a rainy afternoon, warm lighting streaming through large windows with raindrops, a steaming cup of coffee on a wooden table with latte art, blurred background showing bookshelves and soft bokeh lights, photorealistic, cinematic composition, shallow depth of field" |
|
|
) |
|
|
|
|
|
image_input = gr.Image( |
|
|
label="Input Image (Optional)", |
|
|
type="pil", |
|
|
sources=["upload", "clipboard"], |
|
|
height=300 |
|
|
) |
|
|
|
|
|
gr.Markdown("### ⚙️ Parameters") |
|
|
|
|
|
with gr.Row(): |
|
|
num_steps = gr.Slider( |
|
|
minimum=1, |
|
|
maximum=100, |
|
|
value=28, |
|
|
step=1, |
|
|
label="Inference Steps", |
|
|
info="More steps = better quality but slower" |
|
|
) |
|
|
|
|
|
guidance = gr.Slider( |
|
|
minimum=1.0, |
|
|
maximum=15.0, |
|
|
value=4.0, |
|
|
step=0.5, |
|
|
label="Guidance Scale", |
|
|
info="How closely to follow the prompt" |
|
|
) |
|
|
|
|
|
seed_input = gr.Number( |
|
|
label="Seed", |
|
|
value=42, |
|
|
precision=0, |
|
|
info="Use -1 for random seed" |
|
|
) |
|
|
|
|
|
generate_btn = gr.Button( |
|
|
"🚀 Generate Image", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
### 💡 Tips |
|
|
- **Text-to-Image**: Just enter a prompt and click generate |
|
|
- **Image-to-Image**: Upload an image and describe the changes |
|
|
- Start with 28 steps for a good balance of quality and speed |
|
|
- Higher guidance scale follows your prompt more strictly |
|
|
- Use the same seed to reproduce results |
|
|
- First generation may take longer as the model loads |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1): |
|
|
gr.Markdown("### 🖼️ Output") |
|
|
|
|
|
output_image = gr.Image( |
|
|
label="Generated Image", |
|
|
type="pil", |
|
|
height=600 |
|
|
) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
### 📊 Examples |
|
|
Try these prompts for inspiration! |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
gr.Examples( |
|
|
examples=[ |
|
|
[ |
|
|
"A serene landscape with mountains at sunset, vibrant orange and pink sky, reflected in a calm lake, photorealistic", |
|
|
None, |
|
|
28, |
|
|
4.0, |
|
|
42 |
|
|
], |
|
|
[ |
|
|
"A futuristic cityscape at night, neon lights, flying cars, cyberpunk style, highly detailed", |
|
|
None, |
|
|
28, |
|
|
4.0, |
|
|
123 |
|
|
], |
|
|
[ |
|
|
"A cute robot reading a book in a cozy library, warm lighting, digital art style", |
|
|
None, |
|
|
28, |
|
|
4.0, |
|
|
456 |
|
|
], |
|
|
[ |
|
|
"Macro photography of a dew drop on a leaf, morning light, sharp focus, bokeh background", |
|
|
None, |
|
|
28, |
|
|
4.0, |
|
|
789 |
|
|
], |
|
|
], |
|
|
inputs=[prompt_input, image_input, num_steps, guidance, seed_input], |
|
|
outputs=output_image, |
|
|
cache_examples=False, |
|
|
) |
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_image, |
|
|
inputs=[prompt_input, image_input, num_steps, guidance, seed_input], |
|
|
outputs=output_image, |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("Launching Gradio interface...") |
|
|
demo.queue(max_size=20).launch() |