Spaces:
Running
on
Zero
Running
on
Zero
| import os | |
| import subprocess | |
| import sys | |
| import io | |
| import gradio as gr | |
| import numpy as np | |
| import random | |
| import spaces | |
| import torch | |
| from diffusers import Flux2KleinPipeline | |
| import requests | |
| from PIL import Image | |
| import json | |
| import base64 | |
| from huggingface_hub import InferenceClient | |
| dtype = torch.bfloat16 | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| MAX_SEED = np.iinfo(np.int32).max | |
| MAX_IMAGE_SIZE = 1024 | |
| hf_client = InferenceClient( | |
| api_key=os.environ.get("HF_TOKEN"), | |
| ) | |
| VLM_MODEL = "baidu/ERNIE-4.5-VL-424B-A47B-Base-PT" | |
| SYSTEM_PROMPT_TEXT_ONLY = """You are an expert prompt engineer for FLUX.2 by Black Forest Labs. Rewrite user prompts to be more descriptive while strictly preserving their core subject and intent. | |
| Guidelines: | |
| 1. Structure: Keep structured inputs structured (enhance within fields). Convert natural language to detailed paragraphs. | |
| 2. Details: Add concrete visual specifics - form, scale, textures, materials, lighting (quality, direction, color), shadows, spatial relationships, and environmental context. | |
| 3. Text in Images: Put ALL text in quotation marks, matching the prompt's language. Always provide explicit quoted text for objects that would contain text in reality (signs, labels, screens, etc.) - without it, the model generates gibberish. | |
| Output only the revised prompt and nothing else.""" | |
| SYSTEM_PROMPT_WITH_IMAGES = """You are FLUX.2 by Black Forest Labs, an image-editing expert. You convert editing requests into one concise instruction (50-80 words, ~30 for brief requests). | |
| Rules: | |
| - Single instruction only, no commentary | |
| - Use clear, analytical language (avoid "whimsical," "cascading," etc.) | |
| - Specify what changes AND what stays the same (face, lighting, composition) | |
| - Reference actual image elements | |
| - Turn negatives into positives ("don't change X" β "keep X") | |
| - Make abstractions concrete ("futuristic" β "glowing cyan neon, metallic panels") | |
| - Keep content PG-13 | |
| Output only the final instruction in plain text and nothing else.""" | |
| # Model repository ID for 9B | |
| REPO_ID = "black-forest-labs/FLUX.2-klein-base-9B" | |
| # Load 9B model | |
| print("Loading 9B Base model...") | |
| pipe = Flux2KleinPipeline.from_pretrained(REPO_ID, torch_dtype=dtype) | |
| pipe.to("cuda") | |
| # Default settings for Base model | |
| DEFAULT_STEPS = 50 | |
| DEFAULT_CFG = 4.0 | |
| def image_to_data_uri(img): | |
| buffered = io.BytesIO() | |
| img.save(buffered, format="PNG") | |
| img_str = base64.b64encode(buffered.getvalue()).decode("utf-8") | |
| return f"data:image/png;base64,{img_str}" | |
| def upsample_prompt_logic(prompt, image_list): | |
| try: | |
| if image_list and len(image_list) > 0: | |
| # Image + Text Editing Mode | |
| system_content = SYSTEM_PROMPT_WITH_IMAGES | |
| # Construct user message with text and images | |
| user_content = [{"type": "text", "text": prompt}] | |
| for img in image_list: | |
| data_uri = image_to_data_uri(img) | |
| user_content.append({ | |
| "type": "image_url", | |
| "image_url": {"url": data_uri} | |
| }) | |
| messages = [ | |
| {"role": "system", "content": system_content}, | |
| {"role": "user", "content": user_content} | |
| ] | |
| else: | |
| # Text Only Mode | |
| system_content = SYSTEM_PROMPT_TEXT_ONLY | |
| messages = [ | |
| {"role": "system", "content": system_content}, | |
| {"role": "user", "content": prompt} | |
| ] | |
| completion = hf_client.chat.completions.create( | |
| model=VLM_MODEL, | |
| messages=messages, | |
| max_tokens=1024 | |
| ) | |
| return completion.choices[0].message.content | |
| except Exception as e: | |
| print(f"Upsampling failed: {e}") | |
| return prompt | |
| def update_dimensions_from_image(image_list): | |
| """Update width/height sliders based on uploaded image aspect ratio. | |
| Keeps one side at 1024 and scales the other proportionally, with both sides as multiples of 8.""" | |
| if image_list is None or len(image_list) == 0: | |
| return 1024, 1024 # Default dimensions | |
| # Get the first image to determine dimensions | |
| img = image_list[0][0] # Gallery returns list of tuples (image, caption) | |
| img_width, img_height = img.size | |
| aspect_ratio = img_width / img_height | |
| if aspect_ratio >= 1: # Landscape or square | |
| new_width = 1024 | |
| new_height = int(1024 / aspect_ratio) | |
| else: # Portrait | |
| new_height = 1024 | |
| new_width = int(1024 * aspect_ratio) | |
| # Round to nearest multiple of 8 | |
| new_width = round(new_width / 8) * 8 | |
| new_height = round(new_height / 8) * 8 | |
| # Ensure within valid range (minimum 256, maximum 1024) | |
| new_width = max(256, min(1024, new_width)) | |
| new_height = max(256, min(1024, new_height)) | |
| return new_width, new_height | |
| def infer(prompt, input_images=None, seed=42, randomize_seed=False, width=1024, height=1024, num_inference_steps=50, guidance_scale=4.0, prompt_upsampling=False, progress=gr.Progress(track_tqdm=True)): | |
| if randomize_seed: | |
| seed = random.randint(0, MAX_SEED) | |
| # Prepare image list (convert None or empty gallery to None) | |
| image_list = None | |
| if input_images is not None and len(input_images) > 0: | |
| image_list = [] | |
| for item in input_images: | |
| image_list.append(item[0]) | |
| # 1. Upsampling (Network bound) | |
| final_prompt = prompt | |
| if prompt_upsampling: | |
| progress(0.1, desc="β¨ Enhancing your prompt with AI...") | |
| final_prompt = upsample_prompt_logic(prompt, image_list) | |
| print(f"Original Prompt: {prompt}") | |
| print(f"Upsampled Prompt: {final_prompt}") | |
| # 2. Image Generation | |
| progress(0.2, desc="π¨ Creating your masterpiece with 9B model...") | |
| generator = torch.Generator(device=device).manual_seed(seed) | |
| pipe_kwargs = { | |
| "prompt": final_prompt, | |
| "height": height, | |
| "width": width, | |
| "num_inference_steps": num_inference_steps, | |
| "guidance_scale": guidance_scale, | |
| "generator": generator, | |
| } | |
| # Add images if provided | |
| if image_list is not None: | |
| pipe_kwargs["image"] = image_list | |
| image = pipe(**pipe_kwargs).images[0] | |
| return image, seed | |
| examples = [ | |
| ["Create a vase on a table in living room, the color of the vase is a gradient of color, starting with #02eb3c color and finishing with #edfa3c. The flowers inside the vase have the color #ff0088"], | |
| ["Photorealistic infographic showing the complete Berlin TV Tower (Fernsehturm) from ground base to antenna tip, full vertical view with entire structure visible including concrete shaft, metallic sphere, and antenna spire. Slight upward perspective angle looking up toward the iconic sphere, perfectly centered on clean white background. Left side labels with thin horizontal connector lines: the text '368m' in extra large bold dark grey numerals (#2D3748) positioned at exactly the antenna tip with 'TOTAL HEIGHT' in small caps below. The text '207m' in extra large bold with 'TELECAFΓ' in small caps below, with connector line touching the sphere precisely at the window level. Right side label with horizontal connector line touching the sphere's equator: the text '32m' in extra large bold dark grey numerals with 'SPHERE DIAMETER' in small caps below. Bottom section arranged in three balanced columns: Left - Large text '986' in extra bold dark grey with 'STEPS' in caps below. Center - 'BERLIN TV TOWER' in bold caps with 'FERNSEHTURM' in lighter weight below. Right - 'INAUGURATED' in bold caps with 'OCTOBER 3, 1969' below. All typography in modern sans-serif font (such as Inter or Helvetica), color #2D3748, clean minimal technical diagram style. Horizontal connector lines are thin, precise, and clearly visible, touching the tower structure at exact corresponding measurement points. Professional architectural elevation drawing aesthetic with dynamic low angle perspective creating sense of height and grandeur, poster-ready infographic design with perfect visual hierarchy."], | |
| ["Soaking wet capybara taking shelter under a banana leaf in the rainy jungle, close up photo"], | |
| ["A kawaii die-cut sticker of a chubby orange cat, featuring big sparkly eyes and a happy smile with paws raised in greeting and a heart-shaped pink nose. The design should have smooth rounded lines with black outlines and soft gradient shading with pink cheeks."], | |
| ] | |
| examples_images = [ | |
| ["The person from image 1 is petting the cat from image 2, the bird from image 3 is next to them", ["woman1.webp", "cat_window.webp", "bird.webp"]] | |
| ] | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π§Έ CLAYMORPHISM STYLE - Warm, playful, 3D clay aesthetic | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| css = """ | |
| /* π¨ Core Color Palette */ | |
| :root { | |
| --clay-bg-start: #ffecd2; | |
| --clay-bg-end: #fcb69f; | |
| --clay-surface: #f8b595; | |
| --clay-surface-light: #ffd4b8; | |
| --clay-shadow: rgba(180, 100, 60, 0.4); | |
| --clay-shadow-dark: rgba(150, 80, 40, 0.5); | |
| --clay-highlight: rgba(255, 255, 255, 0.7); | |
| --clay-text: #7f3300; | |
| --clay-text-light: #a85d2a; | |
| --clay-accent: #ff9a6c; | |
| --clay-accent-hover: #e07b4c; | |
| } | |
| /* π¨ Gradient Background */ | |
| .gradio-container { | |
| background: linear-gradient(145deg, var(--clay-bg-start) 0%, var(--clay-bg-end) 100%) !important; | |
| min-height: 100vh; | |
| } | |
| #col-container { | |
| margin: 0 auto; | |
| max-width: 1200px; | |
| padding: 20px; | |
| } | |
| /* π·οΈ Main Title */ | |
| #col-container h1 { | |
| color: var(--clay-text) !important; | |
| text-shadow: 2px 2px 4px rgba(255, 255, 255, 0.5), -1px -1px 2px rgba(180, 100, 60, 0.2); | |
| font-weight: 700 !important; | |
| } | |
| /* π² Clay Card Effect for Panels */ | |
| .gr-panel, .gr-box, .gr-form, .gr-group { | |
| background: var(--clay-surface-light) !important; | |
| border: none !important; | |
| border-radius: 25px !important; | |
| box-shadow: | |
| 10px 10px 20px var(--clay-shadow), | |
| -6px -6px 15px var(--clay-highlight), | |
| inset 2px 2px 5px rgba(255, 255, 255, 0.5), | |
| inset -2px -2px 5px rgba(180, 100, 60, 0.15) !important; | |
| padding: 20px !important; | |
| margin: 10px 0 !important; | |
| } | |
| /* π Text Input / Textbox */ | |
| .gr-textbox textarea, .gr-textbox input, textarea, input[type="text"] { | |
| background: linear-gradient(145deg, #fff5eb, #ffe8d6) !important; | |
| border: none !important; | |
| border-radius: 20px !important; | |
| box-shadow: | |
| inset 4px 4px 8px rgba(180, 100, 60, 0.2), | |
| inset -3px -3px 6px rgba(255, 255, 255, 0.8) !important; | |
| color: var(--clay-text) !important; | |
| font-size: 16px !important; | |
| padding: 15px 20px !important; | |
| transition: all 0.3s ease !important; | |
| } | |
| .gr-textbox textarea:focus, .gr-textbox input:focus, textarea:focus, input[type="text"]:focus { | |
| box-shadow: | |
| inset 5px 5px 10px rgba(180, 100, 60, 0.25), | |
| inset -4px -4px 8px rgba(255, 255, 255, 0.9), | |
| 0 0 0 3px rgba(255, 154, 108, 0.3) !important; | |
| outline: none !important; | |
| } | |
| /* π Primary Button (Run) */ | |
| .gr-button.primary, button.primary { | |
| background: linear-gradient(145deg, #ff9a6c, #e07b4c) !important; | |
| border: none !important; | |
| border-radius: 50px !important; | |
| padding: 15px 40px !important; | |
| color: #fff !important; | |
| font-weight: 600 !important; | |
| font-size: 16px !important; | |
| box-shadow: | |
| 8px 8px 16px var(--clay-shadow), | |
| -4px -4px 10px var(--clay-highlight), | |
| inset 2px 2px 4px rgba(255, 255, 255, 0.3) !important; | |
| transition: all 0.2s ease !important; | |
| text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.2) !important; | |
| } | |
| .gr-button.primary:hover, button.primary:hover { | |
| transform: translateY(-3px) scale(1.02) !important; | |
| box-shadow: | |
| 12px 12px 24px var(--clay-shadow), | |
| -6px -6px 15px var(--clay-highlight), | |
| inset 2px 2px 4px rgba(255, 255, 255, 0.4) !important; | |
| } | |
| .gr-button.primary:active, button.primary:active { | |
| transform: translateY(1px) scale(0.98) !important; | |
| box-shadow: | |
| 4px 4px 8px var(--clay-shadow), | |
| -2px -2px 5px var(--clay-highlight), | |
| inset 3px 3px 6px rgba(180, 100, 60, 0.3) !important; | |
| } | |
| /* π Secondary Buttons */ | |
| .gr-button, button { | |
| background: linear-gradient(145deg, var(--clay-surface-light), var(--clay-surface)) !important; | |
| border: none !important; | |
| border-radius: 20px !important; | |
| color: var(--clay-text) !important; | |
| font-weight: 500 !important; | |
| box-shadow: | |
| 6px 6px 12px var(--clay-shadow), | |
| -3px -3px 8px var(--clay-highlight), | |
| inset 1px 1px 3px rgba(255, 255, 255, 0.4) !important; | |
| transition: all 0.2s ease !important; | |
| } | |
| .gr-button:hover, button:hover { | |
| transform: translateY(-2px) !important; | |
| box-shadow: | |
| 8px 8px 16px var(--clay-shadow), | |
| -4px -4px 10px var(--clay-highlight) !important; | |
| } | |
| /* π» Radio Buttons */ | |
| .gr-radio label, .gr-checkbox label { | |
| background: linear-gradient(145deg, #fff5eb, #ffe0cc) !important; | |
| border: none !important; | |
| border-radius: 15px !important; | |
| padding: 12px 20px !important; | |
| margin: 5px !important; | |
| box-shadow: | |
| 5px 5px 10px var(--clay-shadow), | |
| -3px -3px 8px var(--clay-highlight), | |
| inset 1px 1px 2px rgba(255, 255, 255, 0.5) !important; | |
| color: var(--clay-text) !important; | |
| font-weight: 500 !important; | |
| transition: all 0.2s ease !important; | |
| cursor: pointer !important; | |
| } | |
| .gr-radio label:hover, .gr-checkbox label:hover { | |
| transform: scale(1.02) !important; | |
| } | |
| .gr-radio input:checked + label, .gr-checkbox input:checked + label { | |
| background: linear-gradient(145deg, #ff9a6c, #e07b4c) !important; | |
| color: #fff !important; | |
| box-shadow: | |
| inset 3px 3px 6px rgba(180, 100, 60, 0.3), | |
| inset -2px -2px 4px rgba(255, 255, 255, 0.2) !important; | |
| } | |
| /* ποΈ Sliders */ | |
| .gr-slider input[type="range"] { | |
| background: linear-gradient(145deg, #ffe0cc, #ffd4b8) !important; | |
| border-radius: 10px !important; | |
| height: 10px !important; | |
| box-shadow: | |
| inset 3px 3px 6px var(--clay-shadow), | |
| inset -2px -2px 4px var(--clay-highlight) !important; | |
| } | |
| .gr-slider input[type="range"]::-webkit-slider-thumb { | |
| background: linear-gradient(145deg, #ff9a6c, #e07b4c) !important; | |
| border: none !important; | |
| border-radius: 50% !important; | |
| width: 24px !important; | |
| height: 24px !important; | |
| box-shadow: | |
| 4px 4px 8px var(--clay-shadow), | |
| -2px -2px 5px var(--clay-highlight) !important; | |
| cursor: pointer !important; | |
| transition: all 0.2s ease !important; | |
| } | |
| .gr-slider input[type="range"]::-webkit-slider-thumb:hover { | |
| transform: scale(1.1) !important; | |
| } | |
| /* πΌοΈ Image Gallery */ | |
| .gr-gallery, .gallery-container { | |
| background: linear-gradient(145deg, #fff5eb, #ffe8d6) !important; | |
| border: none !important; | |
| border-radius: 25px !important; | |
| box-shadow: | |
| inset 5px 5px 10px rgba(180, 100, 60, 0.2), | |
| inset -4px -4px 8px rgba(255, 255, 255, 0.7) !important; | |
| padding: 15px !important; | |
| } | |
| .gallery-container img { | |
| object-fit: contain; | |
| border-radius: 15px !important; | |
| } | |
| /* πΌοΈ Result Image */ | |
| .gr-image, .gr-image img { | |
| border-radius: 25px !important; | |
| box-shadow: | |
| 10px 10px 20px var(--clay-shadow), | |
| -6px -6px 15px var(--clay-highlight) !important; | |
| } | |
| /* π Accordion */ | |
| .gr-accordion { | |
| background: var(--clay-surface-light) !important; | |
| border: none !important; | |
| border-radius: 20px !important; | |
| box-shadow: | |
| 8px 8px 16px var(--clay-shadow), | |
| -4px -4px 10px var(--clay-highlight), | |
| inset 1px 1px 3px rgba(255, 255, 255, 0.4) !important; | |
| margin: 15px 0 !important; | |
| overflow: hidden !important; | |
| } | |
| .gr-accordion summary, .gr-accordion .label-wrap { | |
| background: linear-gradient(145deg, var(--clay-surface-light), var(--clay-surface)) !important; | |
| color: var(--clay-text) !important; | |
| font-weight: 600 !important; | |
| padding: 15px 20px !important; | |
| border-radius: 20px !important; | |
| cursor: pointer !important; | |
| transition: all 0.2s ease !important; | |
| } | |
| .gr-accordion summary:hover, .gr-accordion .label-wrap:hover { | |
| background: linear-gradient(145deg, #ffd4b8, var(--clay-surface-light)) !important; | |
| } | |
| /* βοΈ Checkbox */ | |
| .gr-checkbox input[type="checkbox"] { | |
| width: 24px !important; | |
| height: 24px !important; | |
| border-radius: 8px !important; | |
| background: linear-gradient(145deg, #fff5eb, #ffe0cc) !important; | |
| box-shadow: | |
| inset 2px 2px 4px rgba(180, 100, 60, 0.2), | |
| inset -1px -1px 3px rgba(255, 255, 255, 0.6) !important; | |
| border: none !important; | |
| cursor: pointer !important; | |
| } | |
| .gr-checkbox input[type="checkbox"]:checked { | |
| background: linear-gradient(145deg, #ff9a6c, #e07b4c) !important; | |
| } | |
| /* π Labels */ | |
| label, .gr-label { | |
| color: var(--clay-text) !important; | |
| font-weight: 600 !important; | |
| font-size: 14px !important; | |
| text-shadow: 1px 1px 2px rgba(255, 255, 255, 0.5) !important; | |
| } | |
| /* π‘ Info Text */ | |
| .gr-info, .info, span.desc { | |
| color: var(--clay-text-light) !important; | |
| font-size: 13px !important; | |
| font-style: italic !important; | |
| } | |
| /* π Examples */ | |
| .gr-examples { | |
| background: linear-gradient(145deg, #fff5eb, #ffe8d6) !important; | |
| border-radius: 25px !important; | |
| padding: 20px !important; | |
| box-shadow: | |
| 8px 8px 16px var(--clay-shadow), | |
| -4px -4px 10px var(--clay-highlight), | |
| inset 2px 2px 5px rgba(255, 255, 255, 0.4) !important; | |
| } | |
| .gr-examples .gr-sample { | |
| background: var(--clay-surface-light) !important; | |
| border-radius: 15px !important; | |
| padding: 10px 15px !important; | |
| margin: 5px !important; | |
| box-shadow: | |
| 4px 4px 8px var(--clay-shadow), | |
| -2px -2px 5px var(--clay-highlight) !important; | |
| transition: all 0.2s ease !important; | |
| cursor: pointer !important; | |
| } | |
| .gr-examples .gr-sample:hover { | |
| transform: translateY(-2px) scale(1.01) !important; | |
| } | |
| /* π Progress Bar */ | |
| .progress-bar { | |
| background: linear-gradient(145deg, #ffe0cc, #ffd4b8) !important; | |
| border-radius: 10px !important; | |
| box-shadow: inset 3px 3px 6px var(--clay-shadow) !important; | |
| } | |
| .progress-bar .progress { | |
| background: linear-gradient(90deg, #ff9a6c, #e07b4c) !important; | |
| border-radius: 10px !important; | |
| } | |
| /* π Markdown */ | |
| .gr-markdown, .markdown-body { | |
| color: var(--clay-text) !important; | |
| } | |
| .gr-markdown a { | |
| color: var(--clay-accent-hover) !important; | |
| text-decoration: underline !important; | |
| } | |
| /* π― Row and Column Spacing */ | |
| .gr-row { | |
| gap: 20px !important; | |
| } | |
| .gr-column { | |
| gap: 15px !important; | |
| } | |
| /* π± Responsive Adjustments */ | |
| @media (max-width: 768px) { | |
| .gr-button.primary, button.primary { | |
| padding: 12px 30px !important; | |
| font-size: 14px !important; | |
| } | |
| .gr-panel, .gr-box { | |
| padding: 15px !important; | |
| border-radius: 20px !important; | |
| } | |
| } | |
| /* β¨ Floating Animation for Title */ | |
| @keyframes float { | |
| 0%, 100% { transform: translateY(0px); } | |
| 50% { transform: translateY(-5px); } | |
| } | |
| #col-container h1 { | |
| animation: float 3s ease-in-out infinite; | |
| } | |
| /* π¨ Custom Scrollbar */ | |
| ::-webkit-scrollbar { | |
| width: 12px; | |
| height: 12px; | |
| } | |
| ::-webkit-scrollbar-track { | |
| background: var(--clay-bg-start); | |
| border-radius: 10px; | |
| } | |
| ::-webkit-scrollbar-thumb { | |
| background: linear-gradient(145deg, var(--clay-surface), var(--clay-accent)); | |
| border-radius: 10px; | |
| box-shadow: inset 2px 2px 4px rgba(255, 255, 255, 0.3); | |
| } | |
| ::-webkit-scrollbar-thumb:hover { | |
| background: linear-gradient(145deg, var(--clay-accent), var(--clay-accent-hover)); | |
| } | |
| """ | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # π¨ GRADIO INTERFACE | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| with gr.Blocks(css=css, theme=gr.themes.Soft()) as demo: | |
| with gr.Column(elem_id="col-container"): | |
| gr.Markdown(""" | |
| # π¨ FLUX.2 [Klein] - 9B Base Image Generator | |
| **Create stunning AI images with just a text description!** FLUX.2 Klein 9B Base is a high-quality model for both image generation and editing. | |
| π **How to use:** Type what you want to see β Click Run β Get your image! | |
| [[Model Card](https://huggingface.co/black-forest-labs/FLUX.2-klein-base-9B)] | [[Blog Post](https://bfl.ai/blog/flux-2)] | License: FLUX Non-Commercial | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| # βββ PROMPT INPUT βββ | |
| with gr.Row(): | |
| prompt = gr.Text( | |
| label="βοΈ Your Image Description", | |
| show_label=True, | |
| max_lines=3, | |
| placeholder="Describe what you want to create... (e.g., 'A cute robot reading a book in a cozy library')", | |
| container=True, | |
| scale=3, | |
| info="Be specific! Include details about style, lighting, colors, and composition for best results." | |
| ) | |
| run_button = gr.Button( | |
| "π Generate", | |
| scale=1, | |
| variant="primary" | |
| ) | |
| # βββ IMAGE INPUT βββ | |
| with gr.Accordion("πΌοΈ Reference Images (Optional) - Upload images to edit or combine", open=False): | |
| gr.Markdown(""" | |
| **Image Editing Mode:** Upload 1+ images and describe how you want to modify them. | |
| π‘ **Examples:** | |
| - Upload a photo β "Make it look like a watercolor painting" | |
| - Upload multiple images β "Combine the person from image 1 with the background from image 2" | |
| """) | |
| input_images = gr.Gallery( | |
| label="Drop your images here", | |
| type="pil", | |
| columns=3, | |
| rows=1, | |
| ) | |
| # βββ ADVANCED SETTINGS βββ | |
| with gr.Accordion("π§ Advanced Settings - Fine-tune your generation", open=False): | |
| gr.Markdown(""" | |
| **ποΈ Customize your image generation parameters below:** | |
| """) | |
| # Prompt Upsampling | |
| prompt_upsampling = gr.Checkbox( | |
| label="β¨ AI Prompt Enhancement", | |
| value=False, | |
| info="Let AI automatically expand your simple prompt into a detailed, optimized description. Great for beginners!" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("**π² Randomness Control**") | |
| seed = gr.Slider( | |
| label="Seed Number", | |
| minimum=0, | |
| maximum=MAX_SEED, | |
| step=1, | |
| value=0, | |
| info="Same seed + same prompt = same image. Useful for recreating or tweaking results." | |
| ) | |
| randomize_seed = gr.Checkbox( | |
| label="π² Randomize Seed", | |
| value=True, | |
| info="Generate a unique image every time (recommended for exploration)" | |
| ) | |
| gr.Markdown("---") | |
| gr.Markdown("**π Image Dimensions**") | |
| with gr.Row(): | |
| width = gr.Slider( | |
| label="Width (px)", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=8, | |
| value=1024, | |
| info="Image width in pixels. Must be multiple of 8." | |
| ) | |
| height = gr.Slider( | |
| label="Height (px)", | |
| minimum=256, | |
| maximum=MAX_IMAGE_SIZE, | |
| step=8, | |
| value=1024, | |
| info="Image height in pixels. Must be multiple of 8." | |
| ) | |
| gr.Markdown(""" | |
| π‘ **Common Sizes:** 1024Γ1024 (Square) | 1024Γ768 (Landscape) | 768Γ1024 (Portrait) | |
| """) | |
| gr.Markdown("---") | |
| gr.Markdown("**π¨ Quality Settings**") | |
| with gr.Row(): | |
| num_inference_steps = gr.Slider( | |
| label="Inference Steps", | |
| minimum=1, | |
| maximum=100, | |
| step=1, | |
| value=50, | |
| info="More steps = better quality but slower. Recommended: 30-50 steps." | |
| ) | |
| guidance_scale = gr.Slider( | |
| label="Guidance Scale (CFG)", | |
| minimum=0.0, | |
| maximum=10.0, | |
| step=0.1, | |
| value=4.0, | |
| info="How closely to follow your prompt. Recommended: 3.5-4.0" | |
| ) | |
| gr.Markdown(""" | |
| π‘ **Tips for 9B Base Model:** | |
| - **Steps:** 50 recommended for best quality, 30 for faster results | |
| - **CFG:** 4.0 is optimal, lower values give more creative freedom | |
| """) | |
| # βββ OUTPUT βββ | |
| with gr.Column(): | |
| gr.Markdown("### πΌοΈ Generated Image") | |
| result = gr.Image( | |
| label="Your Creation", | |
| show_label=False, | |
| type="pil" | |
| ) | |
| gr.Markdown(""" | |
| πΎ **Right-click** the image to save it to your device. | |
| """) | |
| # βββ EXAMPLES βββ | |
| gr.Markdown(""" | |
| --- | |
| ### π‘ Example Prompts - Click to try! | |
| """) | |
| gr.Examples( | |
| examples=examples, | |
| fn=infer, | |
| inputs=[prompt], | |
| outputs=[result, seed], | |
| cache_examples=True, | |
| cache_mode="lazy" | |
| ) | |
| gr.Markdown(""" | |
| --- | |
| ### πΌοΈ Image Editing Examples - Click to try! | |
| """) | |
| gr.Examples( | |
| examples=examples_images, | |
| fn=infer, | |
| inputs=[prompt, input_images], | |
| outputs=[result, seed], | |
| cache_examples=True, | |
| cache_mode="lazy" | |
| ) | |
| # βββ EVENT HANDLERS βββ | |
| # Auto-update dimensions when images are uploaded | |
| input_images.upload( | |
| fn=update_dimensions_from_image, | |
| inputs=[input_images], | |
| outputs=[width, height] | |
| ) | |
| # Generate image on button click or Enter key | |
| gr.on( | |
| triggers=[run_button.click, prompt.submit], | |
| fn=infer, | |
| inputs=[prompt, input_images, seed, randomize_seed, width, height, num_inference_steps, guidance_scale, prompt_upsampling], | |
| outputs=[result, seed] | |
| ) | |
| demo.launch() |