Spaces:

akhaliq
/

GLM-Image

Running on Zero

App Files Files Community

GLM-Image / app.py

akhaliq HF Staff

Update app.py from anycoder

6320bc8 verified about 23 hours ago

raw

history blame contribute delete

17.6 kB

	"""
	GLM-Image to Image Editing App
	A Gradio 6 application for image-to-image editing using the GLM-Image model.

	This app allows users to upload an image and provide a prompt to transform
	the image using the GLM-Image diffusion model.
	Features ZeroGPU support for dynamic GPU allocation on Hugging Face Spaces.
	"""

	# Import spaces FIRST - before any CUDA-related packages!
	import spaces
	import gradio as gr
	import torch
	from diffusers.pipelines.glm_image import GlmImagePipeline
	from PIL import Image
	import time
	import random
	import os
	import tempfile

	# Create a temp directory for saving images
	TEMP_DIR = tempfile.mkdtemp(prefix="glm_image_")

	# Load the GLM-Image model directly with bfloat16 precision
	print("Loading GLM-Image model... This may take a few minutes.")
	pipe = GlmImagePipeline.from_pretrained(
	"zai-org/GLM-Image",
	torch_dtype=torch.bfloat16,
	device_map="cuda"
	)
	print("Model loaded successfully!")

	def calculate_duration(num_inference_steps: int) -> int:
	"""
	Calculate the estimated duration in seconds based on inference steps.
	ZeroGPU uses this to prioritize shorter tasks in the queue.

	Args:
	num_inference_steps: Number of diffusion steps

	Returns:
	Estimated duration in seconds
	"""
	step_duration = 3.75
	base_time = 15
	return base_time + (num_inference_steps * step_duration)

	def estimate_display_time(num_inference_steps: int) -> str:
	"""
	Estimate the processing duration for display purposes.
	Returns a human-readable time estimate.
	"""
	estimated_seconds = calculate_duration(num_inference_steps)

	if estimated_seconds < 60:
	return f"~{int(estimated_seconds)}s"
	else:
	minutes = estimated_seconds // 60
	seconds = estimated_seconds % 60
	return f"~{int(minutes)}m {int(seconds)}s"

	def validate_dimensions(height: int, width: int) -> tuple:
	"""
	Validate and adjust dimensions to be multiples of 32.
	GLM-Image requires height and width to be multiples of 32.
	"""
	adjusted_height = (height // 32 + (1 if height % 32 != 0 else 0)) * 32
	adjusted_width = (width // 32 + (1 if width % 32 != 0 else 0)) * 32
	return adjusted_height, adjusted_width

	def get_image_dimensions(image: Image.Image) -> tuple:
	"""Get the dimensions of an uploaded PIL image."""
	return image.size[1], image.size[0] # height, width

	def get_duration(
	image: Image.Image,
	prompt: str,
	height: int,
	width: int,
	num_inference_steps: int,
	guidance_scale: float,
	seed: int,
	progress: gr.Progress = None
	) -> int:
	"""
	Dynamic duration function for ZeroGPU.
	Calculates estimated runtime based on inference steps.

	Args:
	image: Input PIL Image
	prompt: Text prompt describing the desired transformation
	height: Output height (must be multiple of 32)
	width: Output width (must be multiple of 32)
	num_inference_steps: Number of diffusion steps
	guidance_scale: Guidance scale for diffusion
	seed: Random seed for reproducibility
	progress: Gradio progress tracker (passed automatically by Gradio 6)

	Returns:
	Estimated duration in seconds
	"""
	return calculate_duration(num_inference_steps)

	@spaces.GPU(duration=get_duration)
	def process_image(
	image: Image.Image,
	prompt: str,
	height: int,
	width: int,
	num_inference_steps: int,
	guidance_scale: float,
	seed: int,
	progress: gr.Progress = None
	) -> tuple:
	"""
	Process the image through the GLM-Image pipeline.
	Uses ZeroGPU for dynamic GPU allocation.

	Args:
	image: Input PIL Image
	prompt: Text prompt describing the desired transformation
	height: Output height (must be multiple of 32)
	width: Output width (must be multiple of 32)
	num_inference_steps: Number of diffusion steps
	guidance_scale: Guidance scale for diffusion
	seed: Random seed for reproducibility
	progress: Gradio progress tracker (handled automatically by Gradio 6)

	Returns:
	Tuple of (output_image, status_message, file_path)
	file_path: Path to the saved image file for download (or None if failed)
	"""
	try:
	if image is None:
	raise ValueError("Please upload an image first.")

	if not prompt or not prompt.strip():
	raise ValueError("Please enter a prompt describing the image transformation.")

	adjusted_height, adjusted_width = validate_dimensions(height, width)

	if adjusted_height != height or adjusted_width != width:
	height, width = adjusted_height, adjusted_width

	if progress:
	progress(0.1, desc="Loading model...")

	if progress:
	progress(0.2, desc="Preparing image...")
	input_image = image.convert("RGB")

	generator = torch.Generator(device="cuda").manual_seed(seed)

	if progress:
	progress(0.4, desc="Generating image...", visible=True)
	result = pipe(
	prompt=prompt,
	image=[input_image],
	height=height,
	width=width,
	num_inference_steps=num_inference_steps,
	guidance_scale=guidance_scale,
	generator=generator
	)

	output_image = result.images[0]

	# Save image to temp file for download button
	timestamp = int(time.time() * 1000)
	temp_path = os.path.join(TEMP_DIR, f"glm_output_{timestamp}.png")
	output_image.save(temp_path, format="PNG")

	if progress:
	progress(1.0, desc="Complete!")

	status = f"Successfully generated! ({height}x{width}, {num_inference_steps} steps)"
	return output_image, status, temp_path

	except Exception as e:
	error_msg = f"Error: {str(e)}"
	return None, error_msg, None

	def update_dimensions_from_image(image: Image.Image) -> tuple:
	"""Update height and width based on uploaded image dimensions."""
	if image is None:
	return 1024, 1024
	h, w = get_image_dimensions(image)
	adjusted_h = (h // 32 + (1 if h % 32 != 0 else 0)) * 32
	adjusted_w = (w // 32 + (1 if w % 32 != 0 else 0)) * 32
	return adjusted_h, adjusted_w

	def generate_random_seed() -> int:
	"""Generate a random seed for the diffusion process."""
	return random.randint(0, 2**32 - 1)

	def update_time_estimate(num_steps: int) -> str:
	"""Update the estimated processing time display."""
	return f"Estimated time: {estimate_display_time(num_steps)}"

	# Apple-style Theme: Clean, minimal, rounded corners, soft shadows
	apple_theme = gr.themes.Soft(
	primary_hue="blue",
	secondary_hue="gray",
	neutral_hue="gray",
	font=gr.themes.GoogleFont("Inter"),
	text_size="lg",
	spacing_size="lg",
	radius_size="lg" # Larger radius for rounded corners
	).set(
	# Apple-like Colors
	body_background_fill="*neutral_50", # Classic Apple light gray background (#F5F5F7)
	body_background_fill_dark="*neutral_950",
	block_background_fill="*background_fill_primary", # White cards on gray background
	block_border_width="0px", # Clean look, no borders
	block_shadow="*shadow_sm", # Subtle shadow

	# Typography
	block_title_text_weight="600",
	block_title_text_color="*neutral_900",
	body_text_color="*neutral_800",
	body_text_color_subdued="*neutral_500",

	# Buttons
	button_primary_background_fill="*primary_500", # Apple Blue
	button_primary_background_fill_hover="*primary_600",
	button_primary_text_color="white",
	button_primary_border_color="*primary_500",
	button_secondary_background_fill="*neutral_200",
	button_secondary_background_fill_hover="*neutral_300",
	button_secondary_text_color="*neutral_800",

	# Inputs
	input_background_fill="*neutral_50",
	input_background_fill_dark="*neutral_800",
	input_border_color="*neutral_300",
	input_border_color_dark="*neutral_700",
	input_shadow="none",

	# Shadows
	shadow_drop="rgba(0,0,0,0.05) 0px 1px 2px 0px",
	shadow_drop_lg="rgba(0,0,0,0.1) 0px 10px 15px -3px, rgba(0,0,0,0.05) 0px 4px 6px -2px",
	)

	with gr.Blocks(fill_height=True) as demo:

	gr.Markdown(
	"""
	# GLM-Image Editor

	Transform your images with AI-powered editing. Upload an image and describe how you want to modify it.

	<div align="center">
	<a href="https://huggingface.co/spaces/akhaliq/anycoder" style="color: #007AFF; text-decoration: none;">Built with anycoder</a>
	</div>
	""",
	elem_classes=["apple-header"]
	)

	with gr.Row(equal_height=True):
	with gr.Column(scale=1, min_width=350):
	gr.Markdown("### Input", elem_classes=["section-title"])

	input_image = gr.Image(
	label="Upload Image",
	type="pil",
	sources=["upload", "clipboard"],
	elem_id="input-image",
	height=300,
	show_label=False,
	buttons=[] # Gradio 6: Replaces show_download_button=False
	)

	prompt = gr.Textbox(
	label="Prompt",
	placeholder="Describe how you want to transform the image...",
	lines=4,
	max_lines=6,
	show_label=False,
	container=False
	)

	with gr.Accordion("Advanced Settings", open=False):
	with gr.Row():
	height = gr.Number(
	label="Height",
	value=1024,
	minimum=64,
	maximum=2048,
	step=32,
	info="Adjusted to multiple of 32"
	)
	width = gr.Number(
	label="Width",
	value=1024,
	minimum=64,
	maximum=2048,
	step=32,
	info="Adjusted to multiple of 32"
	)

	with gr.Row():
	num_inference_steps = gr.Slider(
	label="Inference Steps",
	minimum=10,
	maximum=100,
	value=50,
	step=5,
	info="More steps = higher quality"
	)
	guidance_scale = gr.Slider(
	label="Guidance Scale",
	minimum=0.5,
	maximum=3.0,
	value=1.5,
	step=0.1,
	info="Prompt adherence"
	)

	with gr.Row():
	seed = gr.Number(
	label="Seed",
	value=42,
	minimum=0,
	maximum=2**32 - 1,
	step=1,
	info="For reproducibility"
	)
	random_seed_btn = gr.Button(
	"Random",
	size="sm"
	)

	time_estimate = gr.Markdown(
	value=update_time_estimate(50),
	elem_classes=["time-estimate"]
	)

	with gr.Row():
	generate_btn = gr.Button(
	"Generate Image",
	variant="primary",
	size="lg"
	)

	clear_btn = gr.Button(
	"Clear",
	variant="secondary",
	size="sm"
	)

	with gr.Column(scale=1, min_width=350):
	gr.Markdown("### Output", elem_classes=["section-title"])

	output_image = gr.Image(
	label="Generated Image",
	type="pil",
	elem_id="output-image",
	height=400,
	interactive=False,
	show_label=False
	)

	status = gr.Textbox(
	label="Status",
	value="Ready to generate.",
	interactive=False,
	show_label=True,
	container=True
	)

	download_btn = gr.DownloadButton(
	"Download Image",
	value=None,
	variant="secondary",
	interactive=False,
	size="lg"
	)

	with gr.Accordion("Tips", open=False):
	gr.Markdown(
	"""
	- Be specific about colors and style
	- Background changes: "Replace the background with..."
	- Style transfer: "In the style of..."
	- Lighting: "Soft natural lighting"
	"""
	)

	with gr.Accordion("Example Prompts", open=False):
	gr.Examples(
	examples=[
	["Replace the background with a futuristic city skyline at sunset"],
	["Transform this into an oil painting in the style of Van Gogh"],
	["Change the environment to an underwater coral reef"],
	["Add a red sports car parked in the foreground"],
	],
	inputs=prompt
	)

	# Event Listeners
	input_image.change(
	fn=update_dimensions_from_image,
	inputs=input_image,
	outputs=[height, width],
	api_visibility="private"
	)

	random_seed_btn.click(
	fn=generate_random_seed,
	outputs=seed,
	api_visibility="private"
	)

	num_inference_steps.change(
	fn=update_time_estimate,
	inputs=num_inference_steps,
	outputs=time_estimate,
	api_visibility="private"
	)

	generate_btn.click(
	fn=process_image,
	inputs=[
	input_image,
	prompt,
	height,
	width,
	num_inference_steps,
	guidance_scale,
	seed
	],
	outputs=[output_image, status, download_btn]
	)

	def enable_download(img, file_path):
	if file_path is not None and os.path.exists(file_path):
	return {
	download_btn: gr.DownloadButton(
	value=file_path,
	interactive=True,
	variant="primary"
	)
	}
	return {
	download_btn: gr.DownloadButton(
	value=None,
	interactive=False,
	variant="secondary"
	)
	}

	def clear_all():
	return {
	input_image: None,
	prompt: "",
	output_image: None,
	status: "Ready to generate.",
	download_btn: gr.DownloadButton(
	value=None,
	interactive=False,
	variant="secondary"
	)
	}

	clear_btn.click(
	fn=clear_all,
	outputs=[input_image, prompt, output_image, status, download_btn],
	api_visibility="private"
	)

	# CSS for Apple Styling
	apple_css = """
	/* Apple System Font Stack */
	.gradio-container {
	font-family: -apple-system, BlinkMacSystemFont, "Inter", "Segoe UI", Roboto, Helvetica, Arial, sans-serif !important;
	}

	/* Header Styling */
	.apple-header {
	background-color: white;
	padding: 2rem 1rem;
	border-radius: 18px;
	box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.05), 0 2px 4px -1px rgba(0, 0, 0, 0.03);
	margin-bottom: 1.5rem;
	text-align: center;
	}
	.apple-header h1 {
	color: #1d1d1f;
	font-weight: 600;
	margin-bottom: 0.5rem;
	}
	.apple-header p {
	color: #86868b;
	font-size: 1.1em;
	margin: 0;
	}

	/* Section Titles */
	.section-title {
	color: #1d1d1f;
	font-weight: 600;
	margin-bottom: 1rem;
	font-size: 1.2rem;
	}

	/* Buttons - Pill Shape */
	button {
	border-radius: 9999px !important;
	transition: all 0.2s ease;
	font-weight: 500;
	}

	/* Time Estimate */
	.time-estimate {
	font-size: 0.9em;
	color: #86868b;
	padding: 0.75rem;
	background-color: #f5f5f7;
	border-radius: 12px;
	text-align: center;
	margin-top: 1rem;
	}

	/* Input Areas */
	#input-image, #output-image {
	border-radius: 18px !important;
	border: 1px solid #d2d2d7 !important;
	overflow: hidden;
	background-color: white;
	}

	/* Accordion */
	.accordion {
	border-radius: 12px !important;
	border: 1px solid #d2d2d7 !important;
	}

	/* Markdown Tables (in Examples) */
	table {
	border-collapse: collapse;
	width: 100%;
	border-radius: 12px;
	overflow: hidden;
	}
	th, td {
	padding: 12px;
	text-align: left;
	border-bottom: 1px solid #e5e5ea;
	}
	th {
	background-color: #f5f5f7;
	font-weight: 600;
	}
	"""

	demo.launch(
	theme=apple_theme,
	css=apple_css,
	footer_links=[
	{"label": "Built with anycoder", "url": "https://huggingface.co/spaces/akhaliq/anycoder"},
	{"label": "GLM-Image Model", "url": "https://huggingface.co/zai-org/GLM-Image"},
	{"label": "ZeroGPU", "url": "https://huggingface.co/docs/spaces/spaces-sdks/gradio-zerogpu"}
	],
	server_name="0.0.0.0",
	server_port=7860
	)