akhaliq HF Staff commited on
Commit
d0e03bd
·
verified ·
1 Parent(s): b8b0baf

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +313 -113
app.py CHANGED
@@ -4,38 +4,111 @@ import torch
4
  from PIL import Image
5
  from transformers import AutoProcessor
6
  from longcat_image.models import LongCatImageTransformer2DModel
7
- from longcat_image.pipelines import LongCatImageEditPipeline
8
  import numpy as np
9
 
10
- # Load model directly at startup
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
12
- model_id = 'meituan-longcat/LongCat-Image-Edit'
13
 
14
- print(f"🔄 Loading model from {model_id}...")
 
 
15
 
16
- # Load text processor
17
- text_processor = AutoProcessor.from_pretrained(
18
- model_id,
19
  subfolder='tokenizer'
20
  )
21
 
22
- # Load transformer
23
- transformer = LongCatImageTransformer2DModel.from_pretrained(
24
- model_id,
25
  subfolder='transformer',
26
  torch_dtype=torch.bfloat16,
27
  use_safetensors=True
28
  ).to(device)
29
 
30
- # Load pipeline
31
- pipe = LongCatImageEditPipeline.from_pretrained(
32
- model_id,
33
- transformer=transformer,
34
- text_processor=text_processor,
35
  )
36
- pipe.to(device, torch.bfloat16)
37
 
38
- print(f"✅ Model loaded successfully on {device}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  @spaces.GPU(duration=120)
41
  def edit_image(
@@ -69,7 +142,7 @@ def edit_image(
69
 
70
  # Run the pipeline
71
  with torch.inference_mode():
72
- output = pipe(
73
  input_image,
74
  prompt,
75
  negative_prompt=negative_prompt,
@@ -88,124 +161,250 @@ def edit_image(
88
  except Exception as e:
89
  raise gr.Error(f"Error during image editing: {str(e)}")
90
 
91
- # Example with image
92
- example_image_url = "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
 
 
 
93
 
94
- example_data = [
95
- [example_image_url, "add a mustache", "", 4.5, 50, 42],
 
 
 
96
  ]
97
 
98
  # Build Gradio interface
99
  with gr.Blocks(fill_height=True) as demo:
100
  gr.HTML("""
101
  <div style="text-align: center; margin-bottom: 20px;">
102
- <h1>🎨 LongCat Image Edit</h1>
103
  <p style="font-size: 16px; color: #666;">
104
- Transform your images with AI-powered editing using natural language instructions
105
  </p>
106
  <p style="font-size: 14px; margin-top: 10px;">
107
  Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
108
  </p>
109
  <p style="font-size: 12px; color: #888; margin-top: 5px;">
110
- ⚡ Powered by Zero-GPU | 🤗 Model: <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">meituan-longcat/LongCat-Image-Edit</a>
 
 
111
  </p>
112
  </div>
113
  """)
114
 
115
- with gr.Row():
116
- with gr.Column(scale=1):
117
- gr.Markdown("### 📤 Input")
118
- input_image = gr.Image(
119
- label="Upload Image",
120
- type="pil",
121
- sources=["upload", "clipboard"],
122
- height=400
123
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
- prompt = gr.Textbox(
126
- label="Edit Instruction",
127
- placeholder="Describe how you want to edit the image (e.g., '将猫变成狗' or 'Change the cat to a dog')",
128
- lines=3
 
 
 
 
129
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
 
131
- with gr.Accordion("⚙️ Advanced Settings", open=False):
132
- negative_prompt = gr.Textbox(
133
- label="Negative Prompt (Optional)",
134
- placeholder="What you don't want in the image",
135
- lines=2
136
- )
137
-
138
- guidance_scale = gr.Slider(
139
- minimum=1.0,
140
- maximum=10.0,
141
- value=4.5,
142
- step=0.5,
143
- label="Guidance Scale",
144
- info="Higher values = stronger adherence to prompt"
145
- )
146
-
147
- num_inference_steps = gr.Slider(
148
- minimum=20,
149
- maximum=100,
150
- value=50,
151
- step=5,
152
- label="Inference Steps",
153
- info="More steps = higher quality but slower"
154
- )
155
-
156
- seed = gr.Slider(
157
- minimum=0,
158
- maximum=999999,
159
- value=42,
160
- step=1,
161
- label="Random Seed",
162
- info="Use same seed for reproducible results"
163
- )
164
-
165
- edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
166
-
167
- gr.Markdown("""
168
- <div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin-top: 10px;">
169
- <p style="margin: 0; font-size: 12px; color: #555;">
170
- ⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
171
- Model is loaded at startup from Hugging Face Hub.
172
- Processing typically takes 30-60 seconds depending on settings.
173
- </p>
174
- </div>
175
- """)
176
-
177
- with gr.Column(scale=1):
178
- gr.Markdown("### 🎯 Output")
179
- output_image = gr.Image(
180
- label="Edited Image",
181
- type="pil",
182
- height=400,
183
- buttons=["download"]
184
  )
185
-
186
- gr.Markdown("### 💡 Tips")
187
- gr.Markdown("""
188
- - Upload a clear, well-lit image for best results
189
- - Be specific in your edit instructions
190
- - Supports both English and Chinese prompts
191
- - Try different guidance scales for varied results
192
- - Higher inference steps = better quality (but slower)
193
- - GPU time is limited - optimize your settings for speed
194
- - Model loads automatically from Hugging Face Hub
195
- """)
196
 
197
- # Examples section
198
- gr.Markdown("### 📝 Example")
199
- gr.Examples(
200
- examples=example_data,
201
- inputs=[input_image, prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
202
- outputs=output_image,
203
- fn=edit_image,
204
- cache_examples=False,
205
- label="Click to try this example"
206
- )
207
 
208
  # Event handlers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
209
  edit_btn.click(
210
  fn=edit_image,
211
  inputs=[
@@ -224,7 +423,8 @@ with gr.Blocks(fill_height=True) as demo:
224
  gr.HTML("""
225
  <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
226
  <p style="color: #666; font-size: 14px;">
227
- Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> with Zero-GPU |
 
228
  <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
229
  </p>
230
  </div>
 
4
  from PIL import Image
5
  from transformers import AutoProcessor
6
  from longcat_image.models import LongCatImageTransformer2DModel
7
+ from longcat_image.pipelines import LongCatImageEditPipeline, LongCatImagePipeline
8
  import numpy as np
9
 
10
+ # Load models directly at startup
11
  device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 
12
 
13
+ # Text-to-Image Model
14
+ t2i_model_id = 'meituan-longcat/LongCat-Image'
15
+ print(f"🔄 Loading Text-to-Image model from {t2i_model_id}...")
16
 
17
+ t2i_text_processor = AutoProcessor.from_pretrained(
18
+ t2i_model_id,
 
19
  subfolder='tokenizer'
20
  )
21
 
22
+ t2i_transformer = LongCatImageTransformer2DModel.from_pretrained(
23
+ t2i_model_id,
 
24
  subfolder='transformer',
25
  torch_dtype=torch.bfloat16,
26
  use_safetensors=True
27
  ).to(device)
28
 
29
+ t2i_pipe = LongCatImagePipeline.from_pretrained(
30
+ t2i_model_id,
31
+ transformer=t2i_transformer,
32
+ text_processor=t2i_text_processor,
 
33
  )
34
+ t2i_pipe.to(device, torch.bfloat16)
35
 
36
+ print(f"✅ Text-to-Image model loaded successfully")
37
+
38
+ # Image Edit Model
39
+ edit_model_id = 'meituan-longcat/LongCat-Image-Edit'
40
+ print(f"🔄 Loading Image Edit model from {edit_model_id}...")
41
+
42
+ edit_text_processor = AutoProcessor.from_pretrained(
43
+ edit_model_id,
44
+ subfolder='tokenizer'
45
+ )
46
+
47
+ edit_transformer = LongCatImageTransformer2DModel.from_pretrained(
48
+ edit_model_id,
49
+ subfolder='transformer',
50
+ torch_dtype=torch.bfloat16,
51
+ use_safetensors=True
52
+ ).to(device)
53
+
54
+ edit_pipe = LongCatImageEditPipeline.from_pretrained(
55
+ edit_model_id,
56
+ transformer=edit_transformer,
57
+ text_processor=edit_text_processor,
58
+ )
59
+ edit_pipe.to(device, torch.bfloat16)
60
+
61
+ print(f"✅ Image Edit model loaded successfully on {device}")
62
+
63
+ @spaces.GPU(duration=120)
64
+ def generate_image(
65
+ prompt: str,
66
+ negative_prompt: str,
67
+ width: int,
68
+ height: int,
69
+ guidance_scale: float,
70
+ num_inference_steps: int,
71
+ seed: int,
72
+ enable_cfg_renorm: bool,
73
+ enable_prompt_rewrite: bool,
74
+ progress=gr.Progress()
75
+ ):
76
+ """Generate image from text prompt"""
77
+
78
+ if not prompt or prompt.strip() == "":
79
+ raise gr.Error("Please enter a prompt")
80
+
81
+ try:
82
+ progress(0.1, desc="Preparing generation...")
83
+
84
+ progress(0.2, desc="Generating image...")
85
+
86
+ # Set random seed for reproducibility
87
+ generator = torch.Generator("cuda" if torch.cuda.is_available() else "cpu").manual_seed(seed)
88
+
89
+ # Run the pipeline
90
+ with torch.inference_mode():
91
+ output = t2i_pipe(
92
+ prompt,
93
+ negative_prompt=negative_prompt,
94
+ height=height,
95
+ width=width,
96
+ guidance_scale=guidance_scale,
97
+ num_inference_steps=num_inference_steps,
98
+ num_images_per_prompt=1,
99
+ generator=generator,
100
+ enable_cfg_renorm=enable_cfg_renorm,
101
+ enable_prompt_rewrite=enable_prompt_rewrite
102
+ )
103
+
104
+ progress(1.0, desc="Done!")
105
+
106
+ generated_image = output.images[0]
107
+
108
+ return generated_image
109
+
110
+ except Exception as e:
111
+ raise gr.Error(f"Error during image generation: {str(e)}")
112
 
113
  @spaces.GPU(duration=120)
114
  def edit_image(
 
142
 
143
  # Run the pipeline
144
  with torch.inference_mode():
145
+ output = edit_pipe(
146
  input_image,
147
  prompt,
148
  negative_prompt=negative_prompt,
 
161
  except Exception as e:
162
  raise gr.Error(f"Error during image editing: {str(e)}")
163
 
164
+ # Example for image editing
165
+ edit_example_image_url = "https://huggingface.co/datasets/huggingface/brand-assets/resolve/main/hf-logo.png"
166
+ edit_example_data = [
167
+ [edit_example_image_url, "Add a mustache", "", 4.5, 50, 42],
168
+ ]
169
 
170
+ # Examples for text-to-image
171
+ t2i_example_prompts = [
172
+ ["一个年轻的亚裔女性,身穿黄色针织衫,搭配白色项链。她的双手放在膝盖上,表情恬静。背景是一堵粗糙的砖墙,午后的阳光温暖地洒在她身上,营造出一种宁静而温馨的氛围。", "", 1344, 768, 4.5, 50, 43, True, True],
173
+ ["A serene mountain landscape at sunset with golden clouds", "", 1344, 768, 4.5, 50, 42, True, True],
174
+ ["A cute robot sitting at a desk, digital art style", "", 1024, 1024, 4.5, 50, 44, True, True],
175
  ]
176
 
177
  # Build Gradio interface
178
  with gr.Blocks(fill_height=True) as demo:
179
  gr.HTML("""
180
  <div style="text-align: center; margin-bottom: 20px;">
181
+ <h1>🎨 LongCat Image Studio</h1>
182
  <p style="font-size: 16px; color: #666;">
183
+ Generate images from text or edit existing images with AI-powered tools
184
  </p>
185
  <p style="font-size: 14px; margin-top: 10px;">
186
  Built with <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2; text-decoration: none;">anycoder</a>
187
  </p>
188
  <p style="font-size: 12px; color: #888; margin-top: 5px;">
189
+ ⚡ Powered by Zero-GPU | 🤗 Models:
190
+ <a href="https://huggingface.co/meituan-longcat/LongCat-Image" target="_blank" style="color: #4A90E2;">Text-to-Image</a> &
191
+ <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">Image Edit</a>
192
  </p>
193
  </div>
194
  """)
195
 
196
+ with gr.Tabs():
197
+ # Text-to-Image Tab
198
+ with gr.TabItem("🖼️ Text to Image"):
199
+ with gr.Row():
200
+ with gr.Column(scale=1):
201
+ gr.Markdown("### 📝 Prompt")
202
+ t2i_prompt = gr.Textbox(
203
+ label="Image Description",
204
+ placeholder="Describe the image you want to generate (supports English and Chinese)",
205
+ lines=5
206
+ )
207
+
208
+ with gr.Accordion("⚙️ Settings", open=True):
209
+ t2i_negative_prompt = gr.Textbox(
210
+ label="Negative Prompt (Optional)",
211
+ placeholder="What you don't want in the image",
212
+ lines=2
213
+ )
214
+
215
+ with gr.Row():
216
+ t2i_width = gr.Slider(
217
+ minimum=512,
218
+ maximum=2048,
219
+ value=1344,
220
+ step=64,
221
+ label="Width",
222
+ )
223
+
224
+ t2i_height = gr.Slider(
225
+ minimum=512,
226
+ maximum=2048,
227
+ value=768,
228
+ step=64,
229
+ label="Height",
230
+ )
231
+
232
+ t2i_guidance_scale = gr.Slider(
233
+ minimum=1.0,
234
+ maximum=10.0,
235
+ value=4.5,
236
+ step=0.5,
237
+ label="Guidance Scale",
238
+ info="Higher values = stronger adherence to prompt"
239
+ )
240
+
241
+ t2i_num_inference_steps = gr.Slider(
242
+ minimum=20,
243
+ maximum=100,
244
+ value=50,
245
+ step=5,
246
+ label="Inference Steps",
247
+ info="More steps = higher quality but slower"
248
+ )
249
+
250
+ t2i_seed = gr.Slider(
251
+ minimum=0,
252
+ maximum=999999,
253
+ value=42,
254
+ step=1,
255
+ label="Random Seed",
256
+ )
257
+
258
+ t2i_enable_cfg_renorm = gr.Checkbox(
259
+ label="Enable CFG Renormalization",
260
+ value=True,
261
+ info="Improves image quality"
262
+ )
263
+
264
+ t2i_enable_prompt_rewrite = gr.Checkbox(
265
+ label="Enable Prompt Rewrite",
266
+ value=True,
267
+ info="Uses text encoder as built-in prompt enhancer"
268
+ )
269
+
270
+ generate_btn = gr.Button("✨ Generate Image", variant="primary", size="lg")
271
+
272
+ with gr.Column(scale=1):
273
+ gr.Markdown("### 🎯 Generated Image")
274
+ t2i_output = gr.Image(
275
+ label="Output",
276
+ type="pil",
277
+ height=500,
278
+ buttons=["download"]
279
+ )
280
+
281
+ gr.Markdown("### 💡 Tips")
282
+ gr.Markdown("""
283
+ - Be detailed and specific in your descriptions
284
+ - Supports both English and Chinese prompts
285
+ - Try different aspect ratios for varied compositions
286
+ - Enable prompt rewrite for enhanced descriptions
287
+ - Higher inference steps = better quality (but slower)
288
+ """)
289
 
290
+ gr.Markdown("### 📝 Example Prompts")
291
+ gr.Examples(
292
+ examples=t2i_example_prompts,
293
+ inputs=[t2i_prompt, t2i_negative_prompt, t2i_width, t2i_height, t2i_guidance_scale, t2i_num_inference_steps, t2i_seed, t2i_enable_cfg_renorm, t2i_enable_prompt_rewrite],
294
+ outputs=t2i_output,
295
+ fn=generate_image,
296
+ cache_examples=False,
297
+ label="Click to try these examples"
298
  )
299
+
300
+ # Image Edit Tab
301
+ with gr.TabItem("✏️ Image Edit"):
302
+ with gr.Row():
303
+ with gr.Column(scale=1):
304
+ gr.Markdown("### 📤 Input")
305
+ input_image = gr.Image(
306
+ label="Upload Image",
307
+ type="pil",
308
+ sources=["upload", "clipboard"],
309
+ height=400
310
+ )
311
+
312
+ prompt = gr.Textbox(
313
+ label="Edit Instruction",
314
+ placeholder="Describe how you want to edit the image",
315
+ lines=3
316
+ )
317
+
318
+ with gr.Accordion("⚙️ Advanced Settings", open=False):
319
+ negative_prompt = gr.Textbox(
320
+ label="Negative Prompt (Optional)",
321
+ placeholder="What you don't want in the image",
322
+ lines=2
323
+ )
324
+
325
+ guidance_scale = gr.Slider(
326
+ minimum=1.0,
327
+ maximum=10.0,
328
+ value=4.5,
329
+ step=0.5,
330
+ label="Guidance Scale",
331
+ info="Higher values = stronger adherence to prompt"
332
+ )
333
+
334
+ num_inference_steps = gr.Slider(
335
+ minimum=20,
336
+ maximum=100,
337
+ value=50,
338
+ step=5,
339
+ label="Inference Steps",
340
+ info="More steps = higher quality but slower"
341
+ )
342
+
343
+ seed = gr.Slider(
344
+ minimum=0,
345
+ maximum=999999,
346
+ value=42,
347
+ step=1,
348
+ label="Random Seed",
349
+ )
350
+
351
+ edit_btn = gr.Button("✨ Edit Image", variant="primary", size="lg")
352
+
353
+ with gr.Column(scale=1):
354
+ gr.Markdown("### 🎯 Output")
355
+ output_image = gr.Image(
356
+ label="Edited Image",
357
+ type="pil",
358
+ height=400,
359
+ buttons=["download"]
360
+ )
361
+
362
+ gr.Markdown("### 💡 Tips")
363
+ gr.Markdown("""
364
+ - Upload a clear, well-lit image for best results
365
+ - Be specific in your edit instructions
366
+ - Supports both English and Chinese prompts
367
+ - Try different guidance scales for varied results
368
+ """)
369
 
370
+ gr.Markdown("### 📝 Example")
371
+ gr.Examples(
372
+ examples=edit_example_data,
373
+ inputs=[input_image, prompt, negative_prompt, guidance_scale, num_inference_steps, seed],
374
+ outputs=output_image,
375
+ fn=edit_image,
376
+ cache_examples=False,
377
+ label="Click to try this example"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  )
 
 
 
 
 
 
 
 
 
 
 
379
 
380
+ gr.HTML("""
381
+ <div style="padding: 10px; background-color: #f0f7ff; border-radius: 8px; margin: 20px 0;">
382
+ <p style="margin: 0; font-size: 12px; color: #555;">
383
+ ⏱️ <strong>Note:</strong> Zero-GPU provides 120 seconds of GPU time per request.
384
+ Models are loaded at startup from Hugging Face Hub.
385
+ Processing typically takes 30-60 seconds depending on settings.
386
+ </p>
387
+ </div>
388
+ """)
 
389
 
390
  # Event handlers
391
+ generate_btn.click(
392
+ fn=generate_image,
393
+ inputs=[
394
+ t2i_prompt,
395
+ t2i_negative_prompt,
396
+ t2i_width,
397
+ t2i_height,
398
+ t2i_guidance_scale,
399
+ t2i_num_inference_steps,
400
+ t2i_seed,
401
+ t2i_enable_cfg_renorm,
402
+ t2i_enable_prompt_rewrite
403
+ ],
404
+ outputs=t2i_output,
405
+ api_visibility="public"
406
+ )
407
+
408
  edit_btn.click(
409
  fn=edit_image,
410
  inputs=[
 
423
  gr.HTML("""
424
  <div style="text-align: center; margin-top: 40px; padding: 20px; border-top: 1px solid #eee;">
425
  <p style="color: #666; font-size: 14px;">
426
+ Powered by <a href="https://huggingface.co/meituan-longcat/LongCat-Image" target="_blank" style="color: #4A90E2;">LongCat Image</a> &
427
+ <a href="https://huggingface.co/meituan-longcat/LongCat-Image-Edit" target="_blank" style="color: #4A90E2;">LongCat Image Edit</a> |
428
  <a href="https://huggingface.co/spaces/akhaliq/anycoder" target="_blank" style="color: #4A90E2;">Built with anycoder</a>
429
  </p>
430
  </div>