multimodalart HF Staff commited on
Commit
153fa23
·
verified ·
1 Parent(s): 806b8af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +263 -0
app.py ADDED
@@ -0,0 +1,263 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import numpy as np
4
+ import random
5
+ import spaces
6
+ import torch
7
+ from diffusers.pipelines.glm_image import GlmImagePipeline
8
+ from PIL import Image
9
+
10
+ dtype = torch.bfloat16
11
+ device = "cuda" if torch.cuda.is_available() else "cpu"
12
+
13
+ MAX_SEED = np.iinfo(np.int32).max
14
+ MAX_IMAGE_SIZE = 2048
15
+
16
+ # Load model
17
+ pipe = GlmImagePipeline.from_pretrained(
18
+ "zai-org/GLM-Image",
19
+ torch_dtype=torch.bfloat16,
20
+ device_map="cuda"
21
+ )
22
+
23
+
24
+ def get_duration(prompt, input_images, width, height, num_inference_steps, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)):
25
+ """Calculate GPU duration based on parameters"""
26
+ num_images = 0 if input_images is None else len(input_images)
27
+ step_duration = 1.2 + 0.5 * num_images
28
+ return max(90, num_inference_steps * step_duration + 15)
29
+
30
+
31
+ @spaces.GPU(duration=get_duration)
32
+ def generate_image(prompt, input_images, width, height, num_inference_steps, guidance_scale, seed, progress=gr.Progress(track_tqdm=True)):
33
+ """Generate image using GLM-Image pipeline"""
34
+ generator = torch.Generator(device="cuda").manual_seed(seed)
35
+
36
+ # Prepare image list for image-to-image mode
37
+ image_list = None
38
+ if input_images is not None and len(input_images) > 0:
39
+ image_list = []
40
+ for item in input_images:
41
+ img = item[0] if isinstance(item, tuple) else item
42
+ if isinstance(img, str):
43
+ img = Image.open(img).convert("RGB")
44
+ elif isinstance(img, Image.Image):
45
+ img = img.convert("RGB")
46
+ image_list.append(img)
47
+
48
+ pipe_kwargs = {
49
+ "prompt": prompt,
50
+ "height": height,
51
+ "width": width,
52
+ "num_inference_steps": num_inference_steps,
53
+ "guidance_scale": guidance_scale,
54
+ "generator": generator,
55
+ }
56
+
57
+ # Add images for image-to-image mode
58
+ if image_list is not None:
59
+ pipe_kwargs["image"] = image_list
60
+
61
+ if progress:
62
+ progress(0, desc="Starting generation...")
63
+
64
+ image = pipe(**pipe_kwargs).images[0]
65
+ return image
66
+
67
+
68
+ def infer(prompt, input_images=None, seed=42, randomize_seed=False, width=1024, height=1024,
69
+ num_inference_steps=50, guidance_scale=1.5, progress=gr.Progress(track_tqdm=True)):
70
+ """Main inference function"""
71
+
72
+ if randomize_seed:
73
+ seed = random.randint(0, MAX_SEED)
74
+
75
+ # Ensure dimensions are multiples of 32
76
+ width = (width // 32) * 32
77
+ height = (height // 32) * 32
78
+
79
+ progress(0.1, desc="Preparing generation...")
80
+
81
+ image = generate_image(
82
+ prompt,
83
+ input_images,
84
+ width,
85
+ height,
86
+ num_inference_steps,
87
+ guidance_scale,
88
+ seed,
89
+ progress
90
+ )
91
+
92
+ return image, seed
93
+
94
+
95
+ def update_dimensions_from_image(image_list):
96
+ """Update width/height sliders based on uploaded image aspect ratio.
97
+ Keeps dimensions proportional with both sides as multiples of 32."""
98
+ if image_list is None or len(image_list) == 0:
99
+ return 1024, 1024 # Default dimensions
100
+
101
+ # Get the first image to determine dimensions
102
+ item = image_list[0]
103
+ img = item[0] if isinstance(item, tuple) else item
104
+
105
+ if isinstance(img, str):
106
+ img = Image.open(img)
107
+
108
+ img_width, img_height = img.size
109
+ aspect_ratio = img_width / img_height
110
+
111
+ if aspect_ratio >= 1: # Landscape or square
112
+ new_width = 1024
113
+ new_height = int(1024 / aspect_ratio)
114
+ else: # Portrait
115
+ new_height = 1024
116
+ new_width = int(1024 * aspect_ratio)
117
+
118
+ # Round to nearest multiple of 32 (GLM-Image requirement)
119
+ new_width = round(new_width / 32) * 32
120
+ new_height = round(new_height / 32) * 32
121
+
122
+ # Ensure within valid range
123
+ new_width = max(256, min(MAX_IMAGE_SIZE, new_width))
124
+ new_height = max(256, min(MAX_IMAGE_SIZE, new_height))
125
+
126
+ return new_width, new_height
127
+
128
+
129
+ # Text-to-Image examples
130
+ examples_t2i = [
131
+ ["A beautifully designed modern food magazine style dessert recipe illustration, themed around a raspberry mousse cake. Clean and bright layout with a bold black title 'Raspberry Mousse Cake Recipe Guide', soft-lit close-up of the finished cake showing light pink layers with fresh raspberries and mint leaves."],
132
+ ["A kawaii die-cut sticker of a chubby orange cat, featuring big sparkly eyes and a happy smile with paws raised in greeting and a heart-shaped pink nose. Smooth rounded lines with black outlines and soft gradient shading with pink cheeks."],
133
+ ["Soaking wet capybara taking shelter under a banana leaf in the rainy jungle, close up photo, water droplets visible on fur, lush green background"],
134
+ ["A photorealistic portrait of an astronaut in a detailed spacesuit, floating in space with Earth visible in the background, dramatic lighting from the sun"],
135
+ ["An elegant art deco poster design for a jazz concert, featuring geometric patterns in gold and deep blue, stylized saxophone silhouette, vintage typography"],
136
+ ]
137
+
138
+ # Image-to-Image examples
139
+ examples_i2i = [
140
+ ["Transform this scene into a cyberpunk cityscape at night with neon lights and flying cars"],
141
+ ["Replace the background with an underwater coral reef scene with tropical fish"],
142
+ ["Convert this to a watercolor painting style with soft pastel colors"],
143
+ ]
144
+
145
+ css = """
146
+ #col-container {
147
+ margin: 0 auto;
148
+ max-width: 1200px;
149
+ }
150
+ .gallery-container img {
151
+ object-fit: contain;
152
+ }
153
+ """
154
+
155
+ with gr.Blocks() as demo:
156
+
157
+ with gr.Column(elem_id="col-container"):
158
+ gr.Markdown("""# GLM-Image
159
+ [[Model](https://huggingface.co/zai-org/GLM-Image)]
160
+ """)
161
+
162
+ with gr.Row():
163
+ with gr.Column():
164
+ prompt = gr.Text(
165
+ label="Prompt",
166
+ show_label=False,
167
+ max_lines=4,
168
+ placeholder="Enter your prompt (for text-to-image) or editing instructions (for image-to-image)",
169
+ container=False,
170
+ scale=3
171
+ )
172
+
173
+ run_button = gr.Button("🎨 Generate", variant="primary", scale=1)
174
+
175
+ with gr.Accordion("📷 Input Image(s) (optional - for image-to-image mode)", open=True):
176
+ input_images = gr.Gallery(
177
+ label="Input Image(s)",
178
+ type="pil",
179
+ columns=3,
180
+ rows=1,
181
+ elem_classes="gallery-container"
182
+ )
183
+ gr.Markdown("*Upload one or more images for image-to-image generation. Leave empty for text-to-image mode.*")
184
+
185
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
186
+ seed = gr.Slider(
187
+ label="Seed",
188
+ minimum=0,
189
+ maximum=MAX_SEED,
190
+ step=1,
191
+ value=42,
192
+ )
193
+
194
+ randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
195
+
196
+ with gr.Row():
197
+ width = gr.Slider(
198
+ label="Width",
199
+ minimum=256,
200
+ maximum=MAX_IMAGE_SIZE,
201
+ step=32,
202
+ value=1024,
203
+ info="Must be a multiple of 32"
204
+ )
205
+
206
+ height = gr.Slider(
207
+ label="Height",
208
+ minimum=256,
209
+ maximum=MAX_IMAGE_SIZE,
210
+ step=32,
211
+ value=1024,
212
+ info="Must be a multiple of 32"
213
+ )
214
+
215
+ with gr.Row():
216
+ num_inference_steps = gr.Slider(
217
+ label="Number of inference steps",
218
+ minimum=1,
219
+ maximum=100,
220
+ step=1,
221
+ value=50,
222
+ )
223
+
224
+ guidance_scale = gr.Slider(
225
+ label="Guidance scale",
226
+ minimum=0.0,
227
+ maximum=10.0,
228
+ step=0.1,
229
+ value=1.5,
230
+ )
231
+
232
+ with gr.Column():
233
+ result = gr.Image(label="Result", show_label=False)
234
+
235
+ gr.Markdown("### 📝 Text-to-Image Examples")
236
+ gr.Examples(
237
+ examples=examples_t2i,
238
+ fn=infer,
239
+ inputs=[prompt],
240
+ outputs=[result, seed],
241
+ cache_examples="lazy"
242
+ )
243
+
244
+ gr.Markdown("### 🖼️ Image-to-Image Example Prompts")
245
+ gr.Markdown("*Upload an image above and use one of these prompts:*")
246
+ for example in examples_i2i:
247
+ gr.Markdown(f"- {example[0]}")
248
+
249
+ # Auto-update dimensions when images are uploaded
250
+ input_images.upload(
251
+ fn=update_dimensions_from_image,
252
+ inputs=[input_images],
253
+ outputs=[width, height]
254
+ )
255
+
256
+ gr.on(
257
+ triggers=[run_button.click, prompt.submit],
258
+ fn=infer,
259
+ inputs=[prompt, input_images, seed, randomize_seed, width, height, num_inference_steps, guidance_scale],
260
+ outputs=[result, seed]
261
+ )
262
+
263
+ demo.launch(theme=gr.themes.Citrus(), css=css)