|
|
""" |
|
|
GLM-Image to Image Editing App |
|
|
A Gradio 6 application for image-to-image editing using the GLM-Image model. |
|
|
|
|
|
This app allows users to upload an image and provide a prompt to transform |
|
|
the image using the GLM-Image diffusion model. |
|
|
Features ZeroGPU support for dynamic GPU allocation on Hugging Face Spaces. |
|
|
""" |
|
|
|
|
|
|
|
|
import spaces |
|
|
import gradio as gr |
|
|
import torch |
|
|
from diffusers.pipelines.glm_image import GlmImagePipeline |
|
|
from PIL import Image |
|
|
import time |
|
|
import random |
|
|
import os |
|
|
import tempfile |
|
|
|
|
|
|
|
|
TEMP_DIR = tempfile.mkdtemp(prefix="glm_image_") |
|
|
|
|
|
|
|
|
print("Loading GLM-Image model... This may take a few minutes.") |
|
|
pipe = GlmImagePipeline.from_pretrained( |
|
|
"zai-org/GLM-Image", |
|
|
torch_dtype=torch.bfloat16, |
|
|
device_map="cuda" |
|
|
) |
|
|
print("Model loaded successfully!") |
|
|
|
|
|
def calculate_duration(num_inference_steps: int) -> int: |
|
|
""" |
|
|
Calculate the estimated duration in seconds based on inference steps. |
|
|
ZeroGPU uses this to prioritize shorter tasks in the queue. |
|
|
|
|
|
Args: |
|
|
num_inference_steps: Number of diffusion steps |
|
|
|
|
|
Returns: |
|
|
Estimated duration in seconds |
|
|
""" |
|
|
step_duration = 3.75 |
|
|
base_time = 15 |
|
|
return base_time + (num_inference_steps * step_duration) |
|
|
|
|
|
def estimate_display_time(num_inference_steps: int) -> str: |
|
|
""" |
|
|
Estimate the processing duration for display purposes. |
|
|
Returns a human-readable time estimate. |
|
|
""" |
|
|
estimated_seconds = calculate_duration(num_inference_steps) |
|
|
|
|
|
if estimated_seconds < 60: |
|
|
return f"~{int(estimated_seconds)}s" |
|
|
else: |
|
|
minutes = estimated_seconds // 60 |
|
|
seconds = estimated_seconds % 60 |
|
|
return f"~{int(minutes)}m {int(seconds)}s" |
|
|
|
|
|
def validate_dimensions(height: int, width: int) -> tuple: |
|
|
""" |
|
|
Validate and adjust dimensions to be multiples of 32. |
|
|
GLM-Image requires height and width to be multiples of 32. |
|
|
""" |
|
|
adjusted_height = (height // 32 + (1 if height % 32 != 0 else 0)) * 32 |
|
|
adjusted_width = (width // 32 + (1 if width % 32 != 0 else 0)) * 32 |
|
|
return adjusted_height, adjusted_width |
|
|
|
|
|
def get_image_dimensions(image: Image.Image) -> tuple: |
|
|
"""Get the dimensions of an uploaded PIL image.""" |
|
|
return image.size[1], image.size[0] |
|
|
|
|
|
def get_duration( |
|
|
image: Image.Image, |
|
|
prompt: str, |
|
|
height: int, |
|
|
width: int, |
|
|
num_inference_steps: int, |
|
|
guidance_scale: float, |
|
|
seed: int, |
|
|
progress: gr.Progress = None |
|
|
) -> int: |
|
|
""" |
|
|
Dynamic duration function for ZeroGPU. |
|
|
Calculates estimated runtime based on inference steps. |
|
|
|
|
|
Args: |
|
|
image: Input PIL Image |
|
|
prompt: Text prompt describing the desired transformation |
|
|
height: Output height (must be multiple of 32) |
|
|
width: Output width (must be multiple of 32) |
|
|
num_inference_steps: Number of diffusion steps |
|
|
guidance_scale: Guidance scale for diffusion |
|
|
seed: Random seed for reproducibility |
|
|
progress: Gradio progress tracker (passed automatically by Gradio 6) |
|
|
|
|
|
Returns: |
|
|
Estimated duration in seconds |
|
|
""" |
|
|
return calculate_duration(num_inference_steps) |
|
|
|
|
|
@spaces.GPU(duration=get_duration) |
|
|
def process_image( |
|
|
image: Image.Image, |
|
|
prompt: str, |
|
|
height: int, |
|
|
width: int, |
|
|
num_inference_steps: int, |
|
|
guidance_scale: float, |
|
|
seed: int, |
|
|
progress: gr.Progress = None |
|
|
) -> tuple: |
|
|
""" |
|
|
Process the image through the GLM-Image pipeline. |
|
|
Uses ZeroGPU for dynamic GPU allocation. |
|
|
|
|
|
Args: |
|
|
image: Input PIL Image |
|
|
prompt: Text prompt describing the desired transformation |
|
|
height: Output height (must be multiple of 32) |
|
|
width: Output width (must be multiple of 32) |
|
|
num_inference_steps: Number of diffusion steps |
|
|
guidance_scale: Guidance scale for diffusion |
|
|
seed: Random seed for reproducibility |
|
|
progress: Gradio progress tracker (handled automatically by Gradio 6) |
|
|
|
|
|
Returns: |
|
|
Tuple of (output_image, status_message, file_path) |
|
|
file_path: Path to the saved image file for download (or None if failed) |
|
|
""" |
|
|
try: |
|
|
if image is None: |
|
|
raise ValueError("Please upload an image first.") |
|
|
|
|
|
if not prompt or not prompt.strip(): |
|
|
raise ValueError("Please enter a prompt describing the image transformation.") |
|
|
|
|
|
adjusted_height, adjusted_width = validate_dimensions(height, width) |
|
|
|
|
|
if adjusted_height != height or adjusted_width != width: |
|
|
height, width = adjusted_height, adjusted_width |
|
|
|
|
|
if progress: |
|
|
progress(0.1, desc="Loading model...") |
|
|
|
|
|
if progress: |
|
|
progress(0.2, desc="Preparing image...") |
|
|
input_image = image.convert("RGB") |
|
|
|
|
|
generator = torch.Generator(device="cuda").manual_seed(seed) |
|
|
|
|
|
if progress: |
|
|
progress(0.4, desc="Generating image...", visible=True) |
|
|
result = pipe( |
|
|
prompt=prompt, |
|
|
image=[input_image], |
|
|
height=height, |
|
|
width=width, |
|
|
num_inference_steps=num_inference_steps, |
|
|
guidance_scale=guidance_scale, |
|
|
generator=generator |
|
|
) |
|
|
|
|
|
output_image = result.images[0] |
|
|
|
|
|
|
|
|
timestamp = int(time.time() * 1000) |
|
|
temp_path = os.path.join(TEMP_DIR, f"glm_output_{timestamp}.png") |
|
|
output_image.save(temp_path, format="PNG") |
|
|
|
|
|
if progress: |
|
|
progress(1.0, desc="Complete!") |
|
|
|
|
|
status = f"Successfully generated! ({height}x{width}, {num_inference_steps} steps)" |
|
|
return output_image, status, temp_path |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Error: {str(e)}" |
|
|
return None, error_msg, None |
|
|
|
|
|
def update_dimensions_from_image(image: Image.Image) -> tuple: |
|
|
"""Update height and width based on uploaded image dimensions.""" |
|
|
if image is None: |
|
|
return 1024, 1024 |
|
|
h, w = get_image_dimensions(image) |
|
|
adjusted_h = (h // 32 + (1 if h % 32 != 0 else 0)) * 32 |
|
|
adjusted_w = (w // 32 + (1 if w % 32 != 0 else 0)) * 32 |
|
|
return adjusted_h, adjusted_w |
|
|
|
|
|
def generate_random_seed() -> int: |
|
|
"""Generate a random seed for the diffusion process.""" |
|
|
return random.randint(0, 2**32 - 1) |
|
|
|
|
|
def update_time_estimate(num_steps: int) -> str: |
|
|
"""Update the estimated processing time display.""" |
|
|
return f"**Estimated time:** {estimate_display_time(num_steps)}" |
|
|
|
|
|
|
|
|
apple_theme = gr.themes.Soft( |
|
|
primary_hue="blue", |
|
|
secondary_hue="gray", |
|
|
neutral_hue="gray", |
|
|
font=gr.themes.GoogleFont("Inter"), |
|
|
text_size="lg", |
|
|
spacing_size="lg", |
|
|
radius_size="lg" |
|
|
).set( |
|
|
|
|
|
body_background_fill="*neutral_50", |
|
|
body_background_fill_dark="*neutral_950", |
|
|
block_background_fill="*background_fill_primary", |
|
|
block_border_width="0px", |
|
|
block_shadow="*shadow_sm", |
|
|
|
|
|
|
|
|
block_title_text_weight="600", |
|
|
block_title_text_color="*neutral_900", |
|
|
body_text_color="*neutral_800", |
|
|
body_text_color_subdued="*neutral_500", |
|
|
|
|
|
|
|
|
button_primary_background_fill="*primary_500", |
|
|
button_primary_background_fill_hover="*primary_600", |
|
|
button_primary_text_color="white", |
|
|
button_primary_border_color="*primary_500", |
|
|
button_secondary_background_fill="*neutral_200", |
|
|
button_secondary_background_fill_hover="*neutral_300", |
|
|
button_secondary_text_color="*neutral_800", |
|
|
|
|
|
|
|
|
input_background_fill="*neutral_50", |
|
|
input_background_fill_dark="*neutral_800", |
|
|
input_border_color="*neutral_300", |
|
|
input_border_color_dark="*neutral_700", |
|
|
input_shadow="none", |
|
|
|
|
|
|
|
|
shadow_drop="rgba(0,0,0,0.05) 0px 1px 2px 0px", |
|
|
shadow_drop_lg="rgba(0,0,0,0.1) 0px 10px 15px -3px, rgba(0,0,0,0.05) 0px 4px 6px -2px", |
|
|
) |
|
|
|
|
|
with gr.Blocks(fill_height=True) as demo: |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
# GLM-Image Editor |
|
|
|
|
|
Transform your images with AI-powered editing. Upload an image and describe how you want to modify it. |
|
|
|
|
|
<div align="center"> |
|
|
<a href="https://huggingface.co/spaces/akhaliq/anycoder" style="color: #007AFF; text-decoration: none;">Built with anycoder</a> |
|
|
</div> |
|
|
""", |
|
|
elem_classes=["apple-header"] |
|
|
) |
|
|
|
|
|
with gr.Row(equal_height=True): |
|
|
with gr.Column(scale=1, min_width=350): |
|
|
gr.Markdown("### Input", elem_classes=["section-title"]) |
|
|
|
|
|
input_image = gr.Image( |
|
|
label="Upload Image", |
|
|
type="pil", |
|
|
sources=["upload", "clipboard"], |
|
|
elem_id="input-image", |
|
|
height=300, |
|
|
show_label=False, |
|
|
buttons=[] |
|
|
) |
|
|
|
|
|
prompt = gr.Textbox( |
|
|
label="Prompt", |
|
|
placeholder="Describe how you want to transform the image...", |
|
|
lines=4, |
|
|
max_lines=6, |
|
|
show_label=False, |
|
|
container=False |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Settings", open=False): |
|
|
with gr.Row(): |
|
|
height = gr.Number( |
|
|
label="Height", |
|
|
value=1024, |
|
|
minimum=64, |
|
|
maximum=2048, |
|
|
step=32, |
|
|
info="Adjusted to multiple of 32" |
|
|
) |
|
|
width = gr.Number( |
|
|
label="Width", |
|
|
value=1024, |
|
|
minimum=64, |
|
|
maximum=2048, |
|
|
step=32, |
|
|
info="Adjusted to multiple of 32" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
num_inference_steps = gr.Slider( |
|
|
label="Inference Steps", |
|
|
minimum=10, |
|
|
maximum=100, |
|
|
value=50, |
|
|
step=5, |
|
|
info="More steps = higher quality" |
|
|
) |
|
|
guidance_scale = gr.Slider( |
|
|
label="Guidance Scale", |
|
|
minimum=0.5, |
|
|
maximum=3.0, |
|
|
value=1.5, |
|
|
step=0.1, |
|
|
info="Prompt adherence" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
seed = gr.Number( |
|
|
label="Seed", |
|
|
value=42, |
|
|
minimum=0, |
|
|
maximum=2**32 - 1, |
|
|
step=1, |
|
|
info="For reproducibility" |
|
|
) |
|
|
random_seed_btn = gr.Button( |
|
|
"Random", |
|
|
size="sm" |
|
|
) |
|
|
|
|
|
time_estimate = gr.Markdown( |
|
|
value=update_time_estimate(50), |
|
|
elem_classes=["time-estimate"] |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
generate_btn = gr.Button( |
|
|
"Generate Image", |
|
|
variant="primary", |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
clear_btn = gr.Button( |
|
|
"Clear", |
|
|
variant="secondary", |
|
|
size="sm" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=1, min_width=350): |
|
|
gr.Markdown("### Output", elem_classes=["section-title"]) |
|
|
|
|
|
output_image = gr.Image( |
|
|
label="Generated Image", |
|
|
type="pil", |
|
|
elem_id="output-image", |
|
|
height=400, |
|
|
interactive=False, |
|
|
show_label=False |
|
|
) |
|
|
|
|
|
status = gr.Textbox( |
|
|
label="Status", |
|
|
value="Ready to generate.", |
|
|
interactive=False, |
|
|
show_label=True, |
|
|
container=True |
|
|
) |
|
|
|
|
|
download_btn = gr.DownloadButton( |
|
|
"Download Image", |
|
|
value=None, |
|
|
variant="secondary", |
|
|
interactive=False, |
|
|
size="lg" |
|
|
) |
|
|
|
|
|
with gr.Accordion("Tips", open=False): |
|
|
gr.Markdown( |
|
|
""" |
|
|
- **Be specific** about colors and style |
|
|
- **Background changes**: "Replace the background with..." |
|
|
- **Style transfer**: "In the style of..." |
|
|
- **Lighting**: "Soft natural lighting" |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Accordion("Example Prompts", open=False): |
|
|
gr.Examples( |
|
|
examples=[ |
|
|
["Replace the background with a futuristic city skyline at sunset"], |
|
|
["Transform this into an oil painting in the style of Van Gogh"], |
|
|
["Change the environment to an underwater coral reef"], |
|
|
["Add a red sports car parked in the foreground"], |
|
|
], |
|
|
inputs=prompt |
|
|
) |
|
|
|
|
|
|
|
|
input_image.change( |
|
|
fn=update_dimensions_from_image, |
|
|
inputs=input_image, |
|
|
outputs=[height, width], |
|
|
api_visibility="private" |
|
|
) |
|
|
|
|
|
random_seed_btn.click( |
|
|
fn=generate_random_seed, |
|
|
outputs=seed, |
|
|
api_visibility="private" |
|
|
) |
|
|
|
|
|
num_inference_steps.change( |
|
|
fn=update_time_estimate, |
|
|
inputs=num_inference_steps, |
|
|
outputs=time_estimate, |
|
|
api_visibility="private" |
|
|
) |
|
|
|
|
|
generate_btn.click( |
|
|
fn=process_image, |
|
|
inputs=[ |
|
|
input_image, |
|
|
prompt, |
|
|
height, |
|
|
width, |
|
|
num_inference_steps, |
|
|
guidance_scale, |
|
|
seed |
|
|
], |
|
|
outputs=[output_image, status, download_btn] |
|
|
) |
|
|
|
|
|
def enable_download(img, file_path): |
|
|
if file_path is not None and os.path.exists(file_path): |
|
|
return { |
|
|
download_btn: gr.DownloadButton( |
|
|
value=file_path, |
|
|
interactive=True, |
|
|
variant="primary" |
|
|
) |
|
|
} |
|
|
return { |
|
|
download_btn: gr.DownloadButton( |
|
|
value=None, |
|
|
interactive=False, |
|
|
variant="secondary" |
|
|
) |
|
|
} |
|
|
|
|
|
def clear_all(): |
|
|
return { |
|
|
input_image: None, |
|
|
prompt: "", |
|
|
output_image: None, |
|
|
status: "Ready to generate.", |
|
|
download_btn: gr.DownloadButton( |
|
|
value=None, |
|
|
interactive=False, |
|
|
variant="secondary" |
|
|
) |
|
|
} |
|
|
|
|
|
clear_btn.click( |
|
|
fn=clear_all, |
|
|
outputs=[input_image, prompt, output_image, status, download_btn], |
|
|
api_visibility="private" |
|
|
) |
|
|
|
|
|
|
|
|
apple_css = """ |
|
|
/* Apple System Font Stack */ |
|
|
.gradio-container { |
|
|
font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", Roboto, Helvetica, Arial, sans-serif !important; |
|
|
} |
|
|
|
|
|
/* Header Styling */ |
|
|
.apple-header { |
|
|
background-color: white; |
|
|
padding: 2rem 1rem; |
|
|
border-radius: 18px; |
|
|
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03); |
|
|
margin-bottom: 1.5rem; |
|
|
text-align: center; |
|
|
} |
|
|
.apple-header h1 { |
|
|
color: #1d1d1f; |
|
|
font-weight: 600; |
|
|
margin-bottom: 0.5rem; |
|
|
} |
|
|
.apple-header p { |
|
|
color: #86868b; |
|
|
font-size: 1.1em; |
|
|
margin: 0; |
|
|
} |
|
|
|
|
|
/* Section Titles */ |
|
|
.section-title { |
|
|
color: #1d1d1f; |
|
|
font-weight: 600; |
|
|
margin-bottom: 1rem; |
|
|
font-size: 1.2rem; |
|
|
} |
|
|
|
|
|
/* Buttons - Pill Shape */ |
|
|
button { |
|
|
border-radius: 9999px !important; |
|
|
transition: all 0.2s ease; |
|
|
font-weight: 500; |
|
|
} |
|
|
|
|
|
/* Time Estimate */ |
|
|
.time-estimate { |
|
|
font-size: 0.9em; |
|
|
color: #86868b; |
|
|
padding: 0.75rem; |
|
|
background-color: #f5f5f7; |
|
|
border-radius: 12px; |
|
|
text-align: center; |
|
|
margin-top: 1rem; |
|
|
} |
|
|
|
|
|
/* Input Areas */ |
|
|
#input-image, #output-image { |
|
|
border-radius: 18px !important; |
|
|
border: 1px solid #d2d2d7 !important; |
|
|
overflow: hidden; |
|
|
background-color: white; |
|
|
} |
|
|
|
|
|
/* Accordion */ |
|
|
.accordion { |
|
|
border-radius: 12px !important; |
|
|
border: 1px solid #d2d2d7 !important; |
|
|
} |
|
|
|
|
|
/* Markdown Tables (in Examples) */ |
|
|
table { |
|
|
border-collapse: collapse; |
|
|
width: 100%; |
|
|
border-radius: 12px; |
|
|
overflow: hidden; |
|
|
} |
|
|
th, td { |
|
|
padding: 12px; |
|
|
text-align: left; |
|
|
border-bottom: 1px solid #e5e5ea; |
|
|
} |
|
|
th { |
|
|
background-color: #f5f5f7; |
|
|
font-weight: 600; |
|
|
} |
|
|
""" |
|
|
|
|
|
demo.launch( |
|
|
theme=apple_theme, |
|
|
css=apple_css, |
|
|
footer_links=[ |
|
|
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, |
|
|
{"label": "GLM-Image Model", "url": "https://huggingface.co/zai-org/GLM-Image"}, |
|
|
{"label": "ZeroGPU", "url": "https://huggingface.co/docs/spaces/spaces-sdks/gradio-zerogpu"} |
|
|
], |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860 |
|
|
) |