Spaces:

multimodalart
/

GLM-Image

Running on Zero

File size: 6,972 Bytes

import os
import gradio as gr
import numpy as np
import random
import spaces
import torch
from diffusers.pipelines.glm_image import GlmImagePipeline
from PIL import Image

dtype = torch.bfloat16
device = "cuda" if torch.cuda.is_available() else "cpu"

MAX_SEED = np.iinfo(np.int32).max
MAX_IMAGE_SIZE = 2048

# Load model
pipe = GlmImagePipeline.from_pretrained(
    "zai-org/GLM-Image",
    torch_dtype=torch.bfloat16,
).to("cuda")


@spaces.GPU(duration=120)
def infer(prompt, input_images=None, seed=42, randomize_seed=False, width=1024, height=1024, 
          num_inference_steps=50, guidance_scale=1.5, progress=gr.Progress(track_tqdm=True)):
    """Main inference function"""
    print("Randomizing seed")
    if randomize_seed:
        seed = random.randint(0, MAX_SEED)
    
    # Ensure dimensions are multiples of 32
    width = (width // 32) * 32
    height = (height // 32) * 32
    
    generator = torch.Generator(device="cuda").manual_seed(seed)

    print("preparing iages")
    # Prepare image list for image-to-image mode
    image_list = None
    if input_images is not None and len(input_images) > 0:
        image_list = []
        for item in input_images:
            img = item[0] if isinstance(item, tuple) else item
            if isinstance(img, str):
                img = Image.open(img).convert("RGB")
            elif isinstance(img, Image.Image):
                img = img.convert("RGB")
            image_list.append(img)
    print("handling kwargs")
    pipe_kwargs = {
        "prompt": prompt,
        "height": height,
        "width": width,
        "num_inference_steps": num_inference_steps,
        "guidance_scale": guidance_scale,
        "generator": generator,
    }
    print("adding images")
    # Add images for image-to-image mode
    if image_list is not None:
        pipe_kwargs["image"] = image_list
    print("running kwargs")
    image = pipe(**pipe_kwargs).images[0]
    
    return image, seed


def update_dimensions_from_image(image_list):
    """Update width/height sliders based on uploaded image aspect ratio.
    Keeps dimensions proportional with both sides as multiples of 32."""
    if image_list is None or len(image_list) == 0:
        return 1024, 1024  # Default dimensions
    
    # Get the first image to determine dimensions
    item = image_list[0]
    img = item[0] if isinstance(item, tuple) else item
    
    if isinstance(img, str):
        img = Image.open(img)
    
    img_width, img_height = img.size
    aspect_ratio = img_width / img_height
    
    if aspect_ratio >= 1:  # Landscape or square
        new_width = 1024
        new_height = int(1024 / aspect_ratio)
    else:  # Portrait
        new_height = 1024
        new_width = int(1024 * aspect_ratio)
    
    # Round to nearest multiple of 32 (GLM-Image requirement)
    new_width = round(new_width / 32) * 32
    new_height = round(new_height / 32) * 32
    
    # Ensure within valid range
    new_width = max(256, min(MAX_IMAGE_SIZE, new_width))
    new_height = max(256, min(MAX_IMAGE_SIZE, new_height))
    
    return new_width, new_height

css = """
#col-container {
    margin: 0 auto;
    max-width: 1200px;
}
.gallery-container img {
    object-fit: contain;
}
"""

with gr.Blocks() as demo:
    
    with gr.Column(elem_id="col-container"):
        gr.Markdown("""# GLM-Image
GLM-Image is a hybrid auto-regressive + diffusion 9B parameters model by z.ai
[[Model](https://huggingface.co/zai-org/GLM-Image)]
        """)
        
        with gr.Row():
            with gr.Column():
                prompt = gr.Text(
                    label="Prompt",
                    show_label=False,
                    max_lines=4,
                    placeholder="Enter your prompt (for text-to-image) or editing instructions (for image-to-image)",
                    container=False,
                    scale=3
                )
                
                run_button = gr.Button("🎨 Generate", variant="primary", scale=1)
                
                with gr.Accordion("📷 Input Image(s) (optional - for image-to-image mode)", open=True):
                    input_images = gr.Gallery(
                        label="Input Image(s)",
                        type="pil",
                        columns=3,
                        rows=1,
                        elem_classes="gallery-container"
                    )
                    gr.Markdown("*Upload one or more images for image-to-image generation. Leave empty for text-to-image mode.*")
                
                with gr.Accordion("⚙️ Advanced Settings", open=False):
                    seed = gr.Slider(
                        label="Seed",
                        minimum=0,
                        maximum=MAX_SEED,
                        step=1,
                        value=42,
                    )
                    
                    randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
                    
                    with gr.Row():
                        width = gr.Slider(
                            label="Width",
                            minimum=256,
                            maximum=MAX_IMAGE_SIZE,
                            step=32,
                            value=1024,
                            info="Must be a multiple of 32"
                        )
                        
                        height = gr.Slider(
                            label="Height",
                            minimum=256,
                            maximum=MAX_IMAGE_SIZE,
                            step=32,
                            value=1024,
                            info="Must be a multiple of 32"
                        )
                    
                    with gr.Row():
                        num_inference_steps = gr.Slider(
                            label="Number of inference steps",
                            minimum=1,
                            maximum=100,
                            step=1,
                            value=50,
                        )
                        
                        guidance_scale = gr.Slider(
                            label="Guidance scale",
                            minimum=0.0,
                            maximum=10.0,
                            step=0.1,
                            value=1.5,
                        )
                
            with gr.Column():
                result = gr.Image(label="Result", show_label=False)

    # Auto-update dimensions when images are uploaded
    input_images.upload(
        fn=update_dimensions_from_image,
        inputs=[input_images],
        outputs=[width, height]
    )

    gr.on(
        triggers=[run_button.click, prompt.submit],
        fn=infer,
        inputs=[prompt, input_images, seed, randomize_seed, width, height, num_inference_steps, guidance_scale],
        outputs=[result, seed]
    )

demo.launch(theme=gr.themes.Citrus(), css=css)