GLM-Image / app.py
akhaliq's picture
akhaliq HF Staff
Update app.py from anycoder
6320bc8 verified
"""
GLM-Image to Image Editing App
A Gradio 6 application for image-to-image editing using the GLM-Image model.
This app allows users to upload an image and provide a prompt to transform
the image using the GLM-Image diffusion model.
Features ZeroGPU support for dynamic GPU allocation on Hugging Face Spaces.
"""
# Import spaces FIRST - before any CUDA-related packages!
import spaces
import gradio as gr
import torch
from diffusers.pipelines.glm_image import GlmImagePipeline
from PIL import Image
import time
import random
import os
import tempfile
# Create a temp directory for saving images
TEMP_DIR = tempfile.mkdtemp(prefix="glm_image_")
# Load the GLM-Image model directly with bfloat16 precision
print("Loading GLM-Image model... This may take a few minutes.")
pipe = GlmImagePipeline.from_pretrained(
"zai-org/GLM-Image",
torch_dtype=torch.bfloat16,
device_map="cuda"
)
print("Model loaded successfully!")
def calculate_duration(num_inference_steps: int) -> int:
"""
Calculate the estimated duration in seconds based on inference steps.
ZeroGPU uses this to prioritize shorter tasks in the queue.
Args:
num_inference_steps: Number of diffusion steps
Returns:
Estimated duration in seconds
"""
step_duration = 3.75
base_time = 15
return base_time + (num_inference_steps * step_duration)
def estimate_display_time(num_inference_steps: int) -> str:
"""
Estimate the processing duration for display purposes.
Returns a human-readable time estimate.
"""
estimated_seconds = calculate_duration(num_inference_steps)
if estimated_seconds < 60:
return f"~{int(estimated_seconds)}s"
else:
minutes = estimated_seconds // 60
seconds = estimated_seconds % 60
return f"~{int(minutes)}m {int(seconds)}s"
def validate_dimensions(height: int, width: int) -> tuple:
"""
Validate and adjust dimensions to be multiples of 32.
GLM-Image requires height and width to be multiples of 32.
"""
adjusted_height = (height // 32 + (1 if height % 32 != 0 else 0)) * 32
adjusted_width = (width // 32 + (1 if width % 32 != 0 else 0)) * 32
return adjusted_height, adjusted_width
def get_image_dimensions(image: Image.Image) -> tuple:
"""Get the dimensions of an uploaded PIL image."""
return image.size[1], image.size[0] # height, width
def get_duration(
image: Image.Image,
prompt: str,
height: int,
width: int,
num_inference_steps: int,
guidance_scale: float,
seed: int,
progress: gr.Progress = None
) -> int:
"""
Dynamic duration function for ZeroGPU.
Calculates estimated runtime based on inference steps.
Args:
image: Input PIL Image
prompt: Text prompt describing the desired transformation
height: Output height (must be multiple of 32)
width: Output width (must be multiple of 32)
num_inference_steps: Number of diffusion steps
guidance_scale: Guidance scale for diffusion
seed: Random seed for reproducibility
progress: Gradio progress tracker (passed automatically by Gradio 6)
Returns:
Estimated duration in seconds
"""
return calculate_duration(num_inference_steps)
@spaces.GPU(duration=get_duration)
def process_image(
image: Image.Image,
prompt: str,
height: int,
width: int,
num_inference_steps: int,
guidance_scale: float,
seed: int,
progress: gr.Progress = None
) -> tuple:
"""
Process the image through the GLM-Image pipeline.
Uses ZeroGPU for dynamic GPU allocation.
Args:
image: Input PIL Image
prompt: Text prompt describing the desired transformation
height: Output height (must be multiple of 32)
width: Output width (must be multiple of 32)
num_inference_steps: Number of diffusion steps
guidance_scale: Guidance scale for diffusion
seed: Random seed for reproducibility
progress: Gradio progress tracker (handled automatically by Gradio 6)
Returns:
Tuple of (output_image, status_message, file_path)
file_path: Path to the saved image file for download (or None if failed)
"""
try:
if image is None:
raise ValueError("Please upload an image first.")
if not prompt or not prompt.strip():
raise ValueError("Please enter a prompt describing the image transformation.")
adjusted_height, adjusted_width = validate_dimensions(height, width)
if adjusted_height != height or adjusted_width != width:
height, width = adjusted_height, adjusted_width
if progress:
progress(0.1, desc="Loading model...")
if progress:
progress(0.2, desc="Preparing image...")
input_image = image.convert("RGB")
generator = torch.Generator(device="cuda").manual_seed(seed)
if progress:
progress(0.4, desc="Generating image...", visible=True)
result = pipe(
prompt=prompt,
image=[input_image],
height=height,
width=width,
num_inference_steps=num_inference_steps,
guidance_scale=guidance_scale,
generator=generator
)
output_image = result.images[0]
# Save image to temp file for download button
timestamp = int(time.time() * 1000)
temp_path = os.path.join(TEMP_DIR, f"glm_output_{timestamp}.png")
output_image.save(temp_path, format="PNG")
if progress:
progress(1.0, desc="Complete!")
status = f"Successfully generated! ({height}x{width}, {num_inference_steps} steps)"
return output_image, status, temp_path
except Exception as e:
error_msg = f"Error: {str(e)}"
return None, error_msg, None
def update_dimensions_from_image(image: Image.Image) -> tuple:
"""Update height and width based on uploaded image dimensions."""
if image is None:
return 1024, 1024
h, w = get_image_dimensions(image)
adjusted_h = (h // 32 + (1 if h % 32 != 0 else 0)) * 32
adjusted_w = (w // 32 + (1 if w % 32 != 0 else 0)) * 32
return adjusted_h, adjusted_w
def generate_random_seed() -> int:
"""Generate a random seed for the diffusion process."""
return random.randint(0, 2**32 - 1)
def update_time_estimate(num_steps: int) -> str:
"""Update the estimated processing time display."""
return f"**Estimated time:** {estimate_display_time(num_steps)}"
# Apple-style Theme: Clean, minimal, rounded corners, soft shadows
apple_theme = gr.themes.Soft(
primary_hue="blue",
secondary_hue="gray",
neutral_hue="gray",
font=gr.themes.GoogleFont("Inter"),
text_size="lg",
spacing_size="lg",
radius_size="lg" # Larger radius for rounded corners
).set(
# Apple-like Colors
body_background_fill="*neutral_50", # Classic Apple light gray background (#F5F5F7)
body_background_fill_dark="*neutral_950",
block_background_fill="*background_fill_primary", # White cards on gray background
block_border_width="0px", # Clean look, no borders
block_shadow="*shadow_sm", # Subtle shadow
# Typography
block_title_text_weight="600",
block_title_text_color="*neutral_900",
body_text_color="*neutral_800",
body_text_color_subdued="*neutral_500",
# Buttons
button_primary_background_fill="*primary_500", # Apple Blue
button_primary_background_fill_hover="*primary_600",
button_primary_text_color="white",
button_primary_border_color="*primary_500",
button_secondary_background_fill="*neutral_200",
button_secondary_background_fill_hover="*neutral_300",
button_secondary_text_color="*neutral_800",
# Inputs
input_background_fill="*neutral_50",
input_background_fill_dark="*neutral_800",
input_border_color="*neutral_300",
input_border_color_dark="*neutral_700",
input_shadow="none",
# Shadows
shadow_drop="rgba(0,0,0,0.05) 0px 1px 2px 0px",
shadow_drop_lg="rgba(0,0,0,0.1) 0px 10px 15px -3px, rgba(0,0,0,0.05) 0px 4px 6px -2px",
)
with gr.Blocks(fill_height=True) as demo:
gr.Markdown(
"""
# GLM-Image Editor
Transform your images with AI-powered editing. Upload an image and describe how you want to modify it.
<div align="center">
<a href="https://huggingface.co/spaces/akhaliq/anycoder" style="color: #007AFF; text-decoration: none;">Built with anycoder</a>
</div>
""",
elem_classes=["apple-header"]
)
with gr.Row(equal_height=True):
with gr.Column(scale=1, min_width=350):
gr.Markdown("### Input", elem_classes=["section-title"])
input_image = gr.Image(
label="Upload Image",
type="pil",
sources=["upload", "clipboard"],
elem_id="input-image",
height=300,
show_label=False,
buttons=[] # Gradio 6: Replaces show_download_button=False
)
prompt = gr.Textbox(
label="Prompt",
placeholder="Describe how you want to transform the image...",
lines=4,
max_lines=6,
show_label=False,
container=False
)
with gr.Accordion("Advanced Settings", open=False):
with gr.Row():
height = gr.Number(
label="Height",
value=1024,
minimum=64,
maximum=2048,
step=32,
info="Adjusted to multiple of 32"
)
width = gr.Number(
label="Width",
value=1024,
minimum=64,
maximum=2048,
step=32,
info="Adjusted to multiple of 32"
)
with gr.Row():
num_inference_steps = gr.Slider(
label="Inference Steps",
minimum=10,
maximum=100,
value=50,
step=5,
info="More steps = higher quality"
)
guidance_scale = gr.Slider(
label="Guidance Scale",
minimum=0.5,
maximum=3.0,
value=1.5,
step=0.1,
info="Prompt adherence"
)
with gr.Row():
seed = gr.Number(
label="Seed",
value=42,
minimum=0,
maximum=2**32 - 1,
step=1,
info="For reproducibility"
)
random_seed_btn = gr.Button(
"Random",
size="sm"
)
time_estimate = gr.Markdown(
value=update_time_estimate(50),
elem_classes=["time-estimate"]
)
with gr.Row():
generate_btn = gr.Button(
"Generate Image",
variant="primary",
size="lg"
)
clear_btn = gr.Button(
"Clear",
variant="secondary",
size="sm"
)
with gr.Column(scale=1, min_width=350):
gr.Markdown("### Output", elem_classes=["section-title"])
output_image = gr.Image(
label="Generated Image",
type="pil",
elem_id="output-image",
height=400,
interactive=False,
show_label=False
)
status = gr.Textbox(
label="Status",
value="Ready to generate.",
interactive=False,
show_label=True,
container=True
)
download_btn = gr.DownloadButton(
"Download Image",
value=None,
variant="secondary",
interactive=False,
size="lg"
)
with gr.Accordion("Tips", open=False):
gr.Markdown(
"""
- **Be specific** about colors and style
- **Background changes**: "Replace the background with..."
- **Style transfer**: "In the style of..."
- **Lighting**: "Soft natural lighting"
"""
)
with gr.Accordion("Example Prompts", open=False):
gr.Examples(
examples=[
["Replace the background with a futuristic city skyline at sunset"],
["Transform this into an oil painting in the style of Van Gogh"],
["Change the environment to an underwater coral reef"],
["Add a red sports car parked in the foreground"],
],
inputs=prompt
)
# Event Listeners
input_image.change(
fn=update_dimensions_from_image,
inputs=input_image,
outputs=[height, width],
api_visibility="private"
)
random_seed_btn.click(
fn=generate_random_seed,
outputs=seed,
api_visibility="private"
)
num_inference_steps.change(
fn=update_time_estimate,
inputs=num_inference_steps,
outputs=time_estimate,
api_visibility="private"
)
generate_btn.click(
fn=process_image,
inputs=[
input_image,
prompt,
height,
width,
num_inference_steps,
guidance_scale,
seed
],
outputs=[output_image, status, download_btn]
)
def enable_download(img, file_path):
if file_path is not None and os.path.exists(file_path):
return {
download_btn: gr.DownloadButton(
value=file_path,
interactive=True,
variant="primary"
)
}
return {
download_btn: gr.DownloadButton(
value=None,
interactive=False,
variant="secondary"
)
}
def clear_all():
return {
input_image: None,
prompt: "",
output_image: None,
status: "Ready to generate.",
download_btn: gr.DownloadButton(
value=None,
interactive=False,
variant="secondary"
)
}
clear_btn.click(
fn=clear_all,
outputs=[input_image, prompt, output_image, status, download_btn],
api_visibility="private"
)
# CSS for Apple Styling
apple_css = """
/* Apple System Font Stack */
.gradio-container {
font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", Roboto, Helvetica, Arial, sans-serif !important;
}
/* Header Styling */
.apple-header {
background-color: white;
padding: 2rem 1rem;
border-radius: 18px;
box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03);
margin-bottom: 1.5rem;
text-align: center;
}
.apple-header h1 {
color: #1d1d1f;
font-weight: 600;
margin-bottom: 0.5rem;
}
.apple-header p {
color: #86868b;
font-size: 1.1em;
margin: 0;
}
/* Section Titles */
.section-title {
color: #1d1d1f;
font-weight: 600;
margin-bottom: 1rem;
font-size: 1.2rem;
}
/* Buttons - Pill Shape */
button {
border-radius: 9999px !important;
transition: all 0.2s ease;
font-weight: 500;
}
/* Time Estimate */
.time-estimate {
font-size: 0.9em;
color: #86868b;
padding: 0.75rem;
background-color: #f5f5f7;
border-radius: 12px;
text-align: center;
margin-top: 1rem;
}
/* Input Areas */
#input-image, #output-image {
border-radius: 18px !important;
border: 1px solid #d2d2d7 !important;
overflow: hidden;
background-color: white;
}
/* Accordion */
.accordion {
border-radius: 12px !important;
border: 1px solid #d2d2d7 !important;
}
/* Markdown Tables (in Examples) */
table {
border-collapse: collapse;
width: 100%;
border-radius: 12px;
overflow: hidden;
}
th, td {
padding: 12px;
text-align: left;
border-bottom: 1px solid #e5e5ea;
}
th {
background-color: #f5f5f7;
font-weight: 600;
}
"""
demo.launch(
theme=apple_theme,
css=apple_css,
footer_links=[
{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
{"label": "GLM-Image Model", "url": "https://huggingface.co/zai-org/GLM-Image"},
{"label": "ZeroGPU", "url": "https://huggingface.co/docs/spaces/spaces-sdks/gradio-zerogpu"}
],
server_name="0.0.0.0",
server_port=7860
)