""" GLM-Image to Image Editing App A Gradio 6 application for image-to-image editing using the GLM-Image model. This app allows users to upload an image and provide a prompt to transform the image using the GLM-Image diffusion model. Features ZeroGPU support for dynamic GPU allocation on Hugging Face Spaces. """ # Import spaces FIRST - before any CUDA-related packages! import spaces import gradio as gr import torch from diffusers.pipelines.glm_image import GlmImagePipeline from PIL import Image import time import random import os import tempfile # Create a temp directory for saving images TEMP_DIR = tempfile.mkdtemp(prefix="glm_image_") # Load the GLM-Image model directly with bfloat16 precision print("Loading GLM-Image model... This may take a few minutes.") pipe = GlmImagePipeline.from_pretrained( "zai-org/GLM-Image", torch_dtype=torch.bfloat16, device_map="cuda" ) print("Model loaded successfully!") def calculate_duration(num_inference_steps: int) -> int: """ Calculate the estimated duration in seconds based on inference steps. ZeroGPU uses this to prioritize shorter tasks in the queue. Args: num_inference_steps: Number of diffusion steps Returns: Estimated duration in seconds """ step_duration = 3.75 base_time = 15 return base_time + (num_inference_steps * step_duration) def estimate_display_time(num_inference_steps: int) -> str: """ Estimate the processing duration for display purposes. Returns a human-readable time estimate. """ estimated_seconds = calculate_duration(num_inference_steps) if estimated_seconds < 60: return f"~{int(estimated_seconds)}s" else: minutes = estimated_seconds // 60 seconds = estimated_seconds % 60 return f"~{int(minutes)}m {int(seconds)}s" def validate_dimensions(height: int, width: int) -> tuple: """ Validate and adjust dimensions to be multiples of 32. GLM-Image requires height and width to be multiples of 32. """ adjusted_height = (height // 32 + (1 if height % 32 != 0 else 0)) * 32 adjusted_width = (width // 32 + (1 if width % 32 != 0 else 0)) * 32 return adjusted_height, adjusted_width def get_image_dimensions(image: Image.Image) -> tuple: """Get the dimensions of an uploaded PIL image.""" return image.size[1], image.size[0] # height, width def get_duration( image: Image.Image, prompt: str, height: int, width: int, num_inference_steps: int, guidance_scale: float, seed: int, progress: gr.Progress = None ) -> int: """ Dynamic duration function for ZeroGPU. Calculates estimated runtime based on inference steps. Args: image: Input PIL Image prompt: Text prompt describing the desired transformation height: Output height (must be multiple of 32) width: Output width (must be multiple of 32) num_inference_steps: Number of diffusion steps guidance_scale: Guidance scale for diffusion seed: Random seed for reproducibility progress: Gradio progress tracker (passed automatically by Gradio 6) Returns: Estimated duration in seconds """ return calculate_duration(num_inference_steps) @spaces.GPU(duration=get_duration) def process_image( image: Image.Image, prompt: str, height: int, width: int, num_inference_steps: int, guidance_scale: float, seed: int, progress: gr.Progress = None ) -> tuple: """ Process the image through the GLM-Image pipeline. Uses ZeroGPU for dynamic GPU allocation. Args: image: Input PIL Image prompt: Text prompt describing the desired transformation height: Output height (must be multiple of 32) width: Output width (must be multiple of 32) num_inference_steps: Number of diffusion steps guidance_scale: Guidance scale for diffusion seed: Random seed for reproducibility progress: Gradio progress tracker (handled automatically by Gradio 6) Returns: Tuple of (output_image, status_message, file_path) file_path: Path to the saved image file for download (or None if failed) """ try: if image is None: raise ValueError("Please upload an image first.") if not prompt or not prompt.strip(): raise ValueError("Please enter a prompt describing the image transformation.") adjusted_height, adjusted_width = validate_dimensions(height, width) if adjusted_height != height or adjusted_width != width: height, width = adjusted_height, adjusted_width if progress: progress(0.1, desc="Loading model...") if progress: progress(0.2, desc="Preparing image...") input_image = image.convert("RGB") generator = torch.Generator(device="cuda").manual_seed(seed) if progress: progress(0.4, desc="Generating image...", visible=True) result = pipe( prompt=prompt, image=[input_image], height=height, width=width, num_inference_steps=num_inference_steps, guidance_scale=guidance_scale, generator=generator ) output_image = result.images[0] # Save image to temp file for download button timestamp = int(time.time() * 1000) temp_path = os.path.join(TEMP_DIR, f"glm_output_{timestamp}.png") output_image.save(temp_path, format="PNG") if progress: progress(1.0, desc="Complete!") status = f"Successfully generated! ({height}x{width}, {num_inference_steps} steps)" return output_image, status, temp_path except Exception as e: error_msg = f"Error: {str(e)}" return None, error_msg, None def update_dimensions_from_image(image: Image.Image) -> tuple: """Update height and width based on uploaded image dimensions.""" if image is None: return 1024, 1024 h, w = get_image_dimensions(image) adjusted_h = (h // 32 + (1 if h % 32 != 0 else 0)) * 32 adjusted_w = (w // 32 + (1 if w % 32 != 0 else 0)) * 32 return adjusted_h, adjusted_w def generate_random_seed() -> int: """Generate a random seed for the diffusion process.""" return random.randint(0, 2**32 - 1) def update_time_estimate(num_steps: int) -> str: """Update the estimated processing time display.""" return f"**Estimated time:** {estimate_display_time(num_steps)}" # Apple-style Theme: Clean, minimal, rounded corners, soft shadows apple_theme = gr.themes.Soft( primary_hue="blue", secondary_hue="gray", neutral_hue="gray", font=gr.themes.GoogleFont("Inter"), text_size="lg", spacing_size="lg", radius_size="lg" # Larger radius for rounded corners ).set( # Apple-like Colors body_background_fill="*neutral_50", # Classic Apple light gray background (#F5F5F7) body_background_fill_dark="*neutral_950", block_background_fill="*background_fill_primary", # White cards on gray background block_border_width="0px", # Clean look, no borders block_shadow="*shadow_sm", # Subtle shadow # Typography block_title_text_weight="600", block_title_text_color="*neutral_900", body_text_color="*neutral_800", body_text_color_subdued="*neutral_500", # Buttons button_primary_background_fill="*primary_500", # Apple Blue button_primary_background_fill_hover="*primary_600", button_primary_text_color="white", button_primary_border_color="*primary_500", button_secondary_background_fill="*neutral_200", button_secondary_background_fill_hover="*neutral_300", button_secondary_text_color="*neutral_800", # Inputs input_background_fill="*neutral_50", input_background_fill_dark="*neutral_800", input_border_color="*neutral_300", input_border_color_dark="*neutral_700", input_shadow="none", # Shadows shadow_drop="rgba(0,0,0,0.05) 0px 1px 2px 0px", shadow_drop_lg="rgba(0,0,0,0.1) 0px 10px 15px -3px, rgba(0,0,0,0.05) 0px 4px 6px -2px", ) with gr.Blocks(fill_height=True) as demo: gr.Markdown( """ # GLM-Image Editor Transform your images with AI-powered editing. Upload an image and describe how you want to modify it.
""", elem_classes=["apple-header"] ) with gr.Row(equal_height=True): with gr.Column(scale=1, min_width=350): gr.Markdown("### Input", elem_classes=["section-title"]) input_image = gr.Image( label="Upload Image", type="pil", sources=["upload", "clipboard"], elem_id="input-image", height=300, show_label=False, buttons=[] # Gradio 6: Replaces show_download_button=False ) prompt = gr.Textbox( label="Prompt", placeholder="Describe how you want to transform the image...", lines=4, max_lines=6, show_label=False, container=False ) with gr.Accordion("Advanced Settings", open=False): with gr.Row(): height = gr.Number( label="Height", value=1024, minimum=64, maximum=2048, step=32, info="Adjusted to multiple of 32" ) width = gr.Number( label="Width", value=1024, minimum=64, maximum=2048, step=32, info="Adjusted to multiple of 32" ) with gr.Row(): num_inference_steps = gr.Slider( label="Inference Steps", minimum=10, maximum=100, value=50, step=5, info="More steps = higher quality" ) guidance_scale = gr.Slider( label="Guidance Scale", minimum=0.5, maximum=3.0, value=1.5, step=0.1, info="Prompt adherence" ) with gr.Row(): seed = gr.Number( label="Seed", value=42, minimum=0, maximum=2**32 - 1, step=1, info="For reproducibility" ) random_seed_btn = gr.Button( "Random", size="sm" ) time_estimate = gr.Markdown( value=update_time_estimate(50), elem_classes=["time-estimate"] ) with gr.Row(): generate_btn = gr.Button( "Generate Image", variant="primary", size="lg" ) clear_btn = gr.Button( "Clear", variant="secondary", size="sm" ) with gr.Column(scale=1, min_width=350): gr.Markdown("### Output", elem_classes=["section-title"]) output_image = gr.Image( label="Generated Image", type="pil", elem_id="output-image", height=400, interactive=False, show_label=False ) status = gr.Textbox( label="Status", value="Ready to generate.", interactive=False, show_label=True, container=True ) download_btn = gr.DownloadButton( "Download Image", value=None, variant="secondary", interactive=False, size="lg" ) with gr.Accordion("Tips", open=False): gr.Markdown( """ - **Be specific** about colors and style - **Background changes**: "Replace the background with..." - **Style transfer**: "In the style of..." - **Lighting**: "Soft natural lighting" """ ) with gr.Accordion("Example Prompts", open=False): gr.Examples( examples=[ ["Replace the background with a futuristic city skyline at sunset"], ["Transform this into an oil painting in the style of Van Gogh"], ["Change the environment to an underwater coral reef"], ["Add a red sports car parked in the foreground"], ], inputs=prompt ) # Event Listeners input_image.change( fn=update_dimensions_from_image, inputs=input_image, outputs=[height, width], api_visibility="private" ) random_seed_btn.click( fn=generate_random_seed, outputs=seed, api_visibility="private" ) num_inference_steps.change( fn=update_time_estimate, inputs=num_inference_steps, outputs=time_estimate, api_visibility="private" ) generate_btn.click( fn=process_image, inputs=[ input_image, prompt, height, width, num_inference_steps, guidance_scale, seed ], outputs=[output_image, status, download_btn] ) def enable_download(img, file_path): if file_path is not None and os.path.exists(file_path): return { download_btn: gr.DownloadButton( value=file_path, interactive=True, variant="primary" ) } return { download_btn: gr.DownloadButton( value=None, interactive=False, variant="secondary" ) } def clear_all(): return { input_image: None, prompt: "", output_image: None, status: "Ready to generate.", download_btn: gr.DownloadButton( value=None, interactive=False, variant="secondary" ) } clear_btn.click( fn=clear_all, outputs=[input_image, prompt, output_image, status, download_btn], api_visibility="private" ) # CSS for Apple Styling apple_css = """ /* Apple System Font Stack */ .gradio-container { font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", Roboto, Helvetica, Arial, sans-serif !important; } /* Header Styling */ .apple-header { background-color: white; padding: 2rem 1rem; border-radius: 18px; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03); margin-bottom: 1.5rem; text-align: center; } .apple-header h1 { color: #1d1d1f; font-weight: 600; margin-bottom: 0.5rem; } .apple-header p { color: #86868b; font-size: 1.1em; margin: 0; } /* Section Titles */ .section-title { color: #1d1d1f; font-weight: 600; margin-bottom: 1rem; font-size: 1.2rem; } /* Buttons - Pill Shape */ button { border-radius: 9999px !important; transition: all 0.2s ease; font-weight: 500; } /* Time Estimate */ .time-estimate { font-size: 0.9em; color: #86868b; padding: 0.75rem; background-color: #f5f5f7; border-radius: 12px; text-align: center; margin-top: 1rem; } /* Input Areas */ #input-image, #output-image { border-radius: 18px !important; border: 1px solid #d2d2d7 !important; overflow: hidden; background-color: white; } /* Accordion */ .accordion { border-radius: 12px !important; border: 1px solid #d2d2d7 !important; } /* Markdown Tables (in Examples) */ table { border-collapse: collapse; width: 100%; border-radius: 12px; overflow: hidden; } th, td { padding: 12px; text-align: left; border-bottom: 1px solid #e5e5ea; } th { background-color: #f5f5f7; font-weight: 600; } """ demo.launch( theme=apple_theme, css=apple_css, footer_links=[ {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"}, {"label": "GLM-Image Model", "url": "https://huggingface.co/zai-org/GLM-Image"}, {"label": "ZeroGPU", "url": "https://huggingface.co/docs/spaces/spaces-sdks/gradio-zerogpu"} ], server_name="0.0.0.0", server_port=7860 )