Spaces:

akhaliq
/

GLM-Image

Running on Zero

File size: 17,637 Bytes

"""
GLM-Image to Image Editing App
A Gradio 6 application for image-to-image editing using the GLM-Image model.

This app allows users to upload an image and provide a prompt to transform
the image using the GLM-Image diffusion model.
Features ZeroGPU support for dynamic GPU allocation on Hugging Face Spaces.
"""

# Import spaces FIRST - before any CUDA-related packages!
import spaces
import gradio as gr
import torch
from diffusers.pipelines.glm_image import GlmImagePipeline
from PIL import Image
import time
import random
import os
import tempfile

# Create a temp directory for saving images
TEMP_DIR = tempfile.mkdtemp(prefix="glm_image_")

# Load the GLM-Image model directly with bfloat16 precision
print("Loading GLM-Image model... This may take a few minutes.")
pipe = GlmImagePipeline.from_pretrained(
    "zai-org/GLM-Image",
    torch_dtype=torch.bfloat16,
    device_map="cuda"
)
print("Model loaded successfully!")

def calculate_duration(num_inference_steps: int) -> int:
    """
    Calculate the estimated duration in seconds based on inference steps.
    ZeroGPU uses this to prioritize shorter tasks in the queue.
    
    Args:
        num_inference_steps: Number of diffusion steps
    
    Returns:
        Estimated duration in seconds
    """
    step_duration = 3.75
    base_time = 15
    return base_time + (num_inference_steps * step_duration)

def estimate_display_time(num_inference_steps: int) -> str:
    """
    Estimate the processing duration for display purposes.
    Returns a human-readable time estimate.
    """
    estimated_seconds = calculate_duration(num_inference_steps)
    
    if estimated_seconds < 60:
        return f"~{int(estimated_seconds)}s"
    else:
        minutes = estimated_seconds // 60
        seconds = estimated_seconds % 60
        return f"~{int(minutes)}m {int(seconds)}s"

def validate_dimensions(height: int, width: int) -> tuple:
    """
    Validate and adjust dimensions to be multiples of 32.
    GLM-Image requires height and width to be multiples of 32.
    """
    adjusted_height = (height // 32 + (1 if height % 32 != 0 else 0)) * 32
    adjusted_width = (width // 32 + (1 if width % 32 != 0 else 0)) * 32
    return adjusted_height, adjusted_width

def get_image_dimensions(image: Image.Image) -> tuple:
    """Get the dimensions of an uploaded PIL image."""
    return image.size[1], image.size[0]  # height, width

def get_duration(
    image: Image.Image,
    prompt: str,
    height: int,
    width: int,
    num_inference_steps: int,
    guidance_scale: float,
    seed: int,
    progress: gr.Progress = None
) -> int:
    """
    Dynamic duration function for ZeroGPU.
    Calculates estimated runtime based on inference steps.
    
    Args:
        image: Input PIL Image
        prompt: Text prompt describing the desired transformation
        height: Output height (must be multiple of 32)
        width: Output width (must be multiple of 32)
        num_inference_steps: Number of diffusion steps
        guidance_scale: Guidance scale for diffusion
        seed: Random seed for reproducibility
        progress: Gradio progress tracker (passed automatically by Gradio 6)
    
    Returns:
        Estimated duration in seconds
    """
    return calculate_duration(num_inference_steps)

@spaces.GPU(duration=get_duration)
def process_image(
    image: Image.Image,
    prompt: str,
    height: int,
    width: int,
    num_inference_steps: int,
    guidance_scale: float,
    seed: int,
    progress: gr.Progress = None
) -> tuple:
    """
    Process the image through the GLM-Image pipeline.
    Uses ZeroGPU for dynamic GPU allocation.
    
    Args:
        image: Input PIL Image
        prompt: Text prompt describing the desired transformation
        height: Output height (must be multiple of 32)
        width: Output width (must be multiple of 32)
        num_inference_steps: Number of diffusion steps
        guidance_scale: Guidance scale for diffusion
        seed: Random seed for reproducibility
        progress: Gradio progress tracker (handled automatically by Gradio 6)
    
    Returns:
        Tuple of (output_image, status_message, file_path)
        file_path: Path to the saved image file for download (or None if failed)
    """
    try:
        if image is None:
            raise ValueError("Please upload an image first.")
        
        if not prompt or not prompt.strip():
            raise ValueError("Please enter a prompt describing the image transformation.")
        
        adjusted_height, adjusted_width = validate_dimensions(height, width)
        
        if adjusted_height != height or adjusted_width != width:
            height, width = adjusted_height, adjusted_width
        
        if progress:
            progress(0.1, desc="Loading model...")
        
        if progress:
            progress(0.2, desc="Preparing image...")
        input_image = image.convert("RGB")
        
        generator = torch.Generator(device="cuda").manual_seed(seed)
        
        if progress:
            progress(0.4, desc="Generating image...", visible=True)
        result = pipe(
            prompt=prompt,
            image=[input_image],
            height=height,
            width=width,
            num_inference_steps=num_inference_steps,
            guidance_scale=guidance_scale,
            generator=generator
        )
        
        output_image = result.images[0]
        
        # Save image to temp file for download button
        timestamp = int(time.time() * 1000)
        temp_path = os.path.join(TEMP_DIR, f"glm_output_{timestamp}.png")
        output_image.save(temp_path, format="PNG")
        
        if progress:
            progress(1.0, desc="Complete!")
        
        status = f"Successfully generated! ({height}x{width}, {num_inference_steps} steps)"
        return output_image, status, temp_path
        
    except Exception as e:
        error_msg = f"Error: {str(e)}"
        return None, error_msg, None

def update_dimensions_from_image(image: Image.Image) -> tuple:
    """Update height and width based on uploaded image dimensions."""
    if image is None:
        return 1024, 1024
    h, w = get_image_dimensions(image)
    adjusted_h = (h // 32 + (1 if h % 32 != 0 else 0)) * 32
    adjusted_w = (w // 32 + (1 if w % 32 != 0 else 0)) * 32
    return adjusted_h, adjusted_w

def generate_random_seed() -> int:
    """Generate a random seed for the diffusion process."""
    return random.randint(0, 2**32 - 1)

def update_time_estimate(num_steps: int) -> str:
    """Update the estimated processing time display."""
    return f"**Estimated time:** {estimate_display_time(num_steps)}"

# Apple-style Theme: Clean, minimal, rounded corners, soft shadows
apple_theme = gr.themes.Soft(
    primary_hue="blue",
    secondary_hue="gray",
    neutral_hue="gray",
    font=gr.themes.GoogleFont("Inter"),
    text_size="lg",
    spacing_size="lg",
    radius_size="lg" # Larger radius for rounded corners
).set(
    # Apple-like Colors
    body_background_fill="*neutral_50", # Classic Apple light gray background (#F5F5F7)
    body_background_fill_dark="*neutral_950",
    block_background_fill="*background_fill_primary", # White cards on gray background
    block_border_width="0px", # Clean look, no borders
    block_shadow="*shadow_sm", # Subtle shadow
    
    # Typography
    block_title_text_weight="600",
    block_title_text_color="*neutral_900",
    body_text_color="*neutral_800",
    body_text_color_subdued="*neutral_500",
    
    # Buttons
    button_primary_background_fill="*primary_500", # Apple Blue
    button_primary_background_fill_hover="*primary_600",
    button_primary_text_color="white",
    button_primary_border_color="*primary_500",
    button_secondary_background_fill="*neutral_200",
    button_secondary_background_fill_hover="*neutral_300",
    button_secondary_text_color="*neutral_800",
    
    # Inputs
    input_background_fill="*neutral_50",
    input_background_fill_dark="*neutral_800",
    input_border_color="*neutral_300",
    input_border_color_dark="*neutral_700",
    input_shadow="none",
    
    # Shadows
    shadow_drop="rgba(0,0,0,0.05) 0px 1px 2px 0px",
    shadow_drop_lg="rgba(0,0,0,0.1) 0px 10px 15px -3px, rgba(0,0,0,0.05) 0px 4px 6px -2px",
)

with gr.Blocks(fill_height=True) as demo:
    
    gr.Markdown(
        """
        # GLM-Image Editor
        
        Transform your images with AI-powered editing. Upload an image and describe how you want to modify it.
        
        <div align="center">
        <a href="https://huggingface.co/spaces/akhaliq/anycoder" style="color: #007AFF; text-decoration: none;">Built with anycoder</a>
        </div>
        """,
        elem_classes=["apple-header"]
    )
    
    with gr.Row(equal_height=True):
        with gr.Column(scale=1, min_width=350):
            gr.Markdown("### Input", elem_classes=["section-title"])
            
            input_image = gr.Image(
                label="Upload Image",
                type="pil",
                sources=["upload", "clipboard"],
                elem_id="input-image",
                height=300,
                show_label=False,
                buttons=[] # Gradio 6: Replaces show_download_button=False
            )
            
            prompt = gr.Textbox(
                label="Prompt",
                placeholder="Describe how you want to transform the image...",
                lines=4,
                max_lines=6,
                show_label=False,
                container=False
            )
            
            with gr.Accordion("Advanced Settings", open=False):
                with gr.Row():
                    height = gr.Number(
                        label="Height",
                        value=1024,
                        minimum=64,
                        maximum=2048,
                        step=32,
                        info="Adjusted to multiple of 32"
                    )
                    width = gr.Number(
                        label="Width",
                        value=1024,
                        minimum=64,
                        maximum=2048,
                        step=32,
                        info="Adjusted to multiple of 32"
                    )
                
                with gr.Row():
                    num_inference_steps = gr.Slider(
                        label="Inference Steps",
                        minimum=10,
                        maximum=100,
                        value=50,
                        step=5,
                        info="More steps = higher quality"
                    )
                    guidance_scale = gr.Slider(
                        label="Guidance Scale",
                        minimum=0.5,
                        maximum=3.0,
                        value=1.5,
                        step=0.1,
                        info="Prompt adherence"
                    )
                
                with gr.Row():
                    seed = gr.Number(
                        label="Seed",
                        value=42,
                        minimum=0,
                        maximum=2**32 - 1,
                        step=1,
                        info="For reproducibility"
                    )
                    random_seed_btn = gr.Button(
                        "Random",
                        size="sm"
                    )
                
                time_estimate = gr.Markdown(
                    value=update_time_estimate(50),
                    elem_classes=["time-estimate"]
                )
            
            with gr.Row():
                generate_btn = gr.Button(
                    "Generate Image",
                    variant="primary",
                    size="lg"
                )
            
            clear_btn = gr.Button(
                "Clear",
                variant="secondary",
                size="sm"
            )
        
        with gr.Column(scale=1, min_width=350):
            gr.Markdown("### Output", elem_classes=["section-title"])
            
            output_image = gr.Image(
                label="Generated Image",
                type="pil",
                elem_id="output-image",
                height=400,
                interactive=False,
                show_label=False
            )
            
            status = gr.Textbox(
                label="Status",
                value="Ready to generate.",
                interactive=False,
                show_label=True,
                container=True
            )
            
            download_btn = gr.DownloadButton(
                "Download Image",
                value=None,
                variant="secondary",
                interactive=False,
                size="lg"
            )
            
            with gr.Accordion("Tips", open=False):
                gr.Markdown(
                    """
                    - **Be specific** about colors and style
                    - **Background changes**: "Replace the background with..."
                    - **Style transfer**: "In the style of..."
                    - **Lighting**: "Soft natural lighting"
                    """
                )
    
    with gr.Accordion("Example Prompts", open=False):
        gr.Examples(
            examples=[
                ["Replace the background with a futuristic city skyline at sunset"],
                ["Transform this into an oil painting in the style of Van Gogh"],
                ["Change the environment to an underwater coral reef"],
                ["Add a red sports car parked in the foreground"],
            ],
            inputs=prompt
        )
    
    # Event Listeners
    input_image.change(
        fn=update_dimensions_from_image,
        inputs=input_image,
        outputs=[height, width],
        api_visibility="private"
    )
    
    random_seed_btn.click(
        fn=generate_random_seed,
        outputs=seed,
        api_visibility="private"
    )
    
    num_inference_steps.change(
        fn=update_time_estimate,
        inputs=num_inference_steps,
        outputs=time_estimate,
        api_visibility="private"
    )
    
    generate_btn.click(
        fn=process_image,
        inputs=[
            input_image,
            prompt,
            height,
            width,
            num_inference_steps,
            guidance_scale,
            seed
        ],
        outputs=[output_image, status, download_btn]
    )
    
    def enable_download(img, file_path):
        if file_path is not None and os.path.exists(file_path):
            return {
                download_btn: gr.DownloadButton(
                    value=file_path,
                    interactive=True,
                    variant="primary"
                )
            }
        return {
            download_btn: gr.DownloadButton(
                value=None,
                interactive=False,
                variant="secondary"
            )
        }
    
    def clear_all():
        return {
            input_image: None,
            prompt: "",
            output_image: None,
            status: "Ready to generate.",
            download_btn: gr.DownloadButton(
                value=None,
                interactive=False,
                variant="secondary"
            )
        }
    
    clear_btn.click(
        fn=clear_all,
        outputs=[input_image, prompt, output_image, status, download_btn],
        api_visibility="private"
    )

# CSS for Apple Styling
apple_css = """
/* Apple System Font Stack */
.gradio-container {
    font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", Roboto, Helvetica, Arial, sans-serif !important;
}

/* Header Styling */
.apple-header {
    background-color: white;
    padding: 2rem 1rem;
    border-radius: 18px;
    box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03);
    margin-bottom: 1.5rem;
    text-align: center;
}
.apple-header h1 {
    color: #1d1d1f;
    font-weight: 600;
    margin-bottom: 0.5rem;
}
.apple-header p {
    color: #86868b;
    font-size: 1.1em;
    margin: 0;
}

/* Section Titles */
.section-title {
    color: #1d1d1f;
    font-weight: 600;
    margin-bottom: 1rem;
    font-size: 1.2rem;
}

/* Buttons - Pill Shape */
button {
    border-radius: 9999px !important;
    transition: all 0.2s ease;
    font-weight: 500;
}

/* Time Estimate */
.time-estimate {
    font-size: 0.9em;
    color: #86868b;
    padding: 0.75rem;
    background-color: #f5f5f7;
    border-radius: 12px;
    text-align: center;
    margin-top: 1rem;
}

/* Input Areas */
#input-image, #output-image {
    border-radius: 18px !important;
    border: 1px solid #d2d2d7 !important;
    overflow: hidden;
    background-color: white;
}

/* Accordion */
.accordion {
    border-radius: 12px !important;
    border: 1px solid #d2d2d7 !important;
}

/* Markdown Tables (in Examples) */
table {
    border-collapse: collapse;
    width: 100%;
    border-radius: 12px;
    overflow: hidden;
}
th, td {
    padding: 12px;
    text-align: left;
    border-bottom: 1px solid #e5e5ea;
}
th {
    background-color: #f5f5f7;
    font-weight: 600;
}
"""

demo.launch(
    theme=apple_theme,
    css=apple_css,
    footer_links=[
        {"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
        {"label": "GLM-Image Model", "url": "https://huggingface.co/zai-org/GLM-Image"},
        {"label": "ZeroGPU", "url": "https://huggingface.co/docs/spaces/spaces-sdks/gradio-zerogpu"}
    ],
    server_name="0.0.0.0",
    server_port=7860
)