iitolstykh commited on
Commit
7203e96
·
verified ·
1 Parent(s): 3df8c38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +134 -43
app.py CHANGED
@@ -1,54 +1,61 @@
1
  import spaces
2
  import gradio as gr
3
  import os
4
-
5
- from huggingface_hub import snapshot_download
6
  import random
7
  import torch
8
  import numpy as np
9
- import pathlib
10
 
11
  from vibe.editor import ImageEditor
12
 
13
  MAX_SEED = np.iinfo(np.int32).max
14
 
 
15
 
16
- def load_pipeline():
 
17
  model_path = snapshot_download(
18
  repo_id="iitolstykh/VIBE-Image-Edit",
19
  repo_type="model",
20
  )
21
-
22
- # Load model
23
- editor_pipeline = ImageEditor(
24
  checkpoint_path=model_path,
25
  image_guidance_scale=1.2,
26
  guidance_scale=4.5,
27
  num_inference_steps=20,
28
  device="cuda",
29
  )
 
 
30
 
31
- print(f"Model loaded. Model device: {editor_pipeline.pipe.device}")
32
-
33
- return editor_pipeline
34
-
35
-
36
- pipeline = load_pipeline()
37
-
38
-
39
- def set_env(seed=0):
40
- torch.manual_seed(seed)
41
- torch.set_grad_enabled(False)
 
 
 
 
 
42
 
 
 
 
43
 
44
- def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
45
- if randomize_seed:
46
- seed = random.randint(0, MAX_SEED)
47
- return seed
48
 
 
49
 
50
  @spaces.GPU(duration=180)
51
- def edit_img(
52
  pil_image,
53
  edit_prompt: str,
54
  sample_steps,
@@ -57,7 +64,11 @@ def edit_img(
57
  seed,
58
  progress=gr.Progress(track_tqdm=True),
59
  ):
60
- edited_image = pipeline.generate_edited_image(
 
 
 
 
61
  instruction=edit_prompt,
62
  conditioning_image=pil_image,
63
  num_images_per_prompt=1,
@@ -69,6 +80,31 @@ def edit_img(
69
  return edited_image[0]
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  @spaces.GPU(duration=180)
73
  def gen_img(
74
  prompt: str,
@@ -79,7 +115,8 @@ def gen_img(
79
  seed: int,
80
  progress=gr.Progress(track_tqdm=True),
81
  ):
82
- generated_images = pipeline.generate_edited_image(
 
83
  instruction=prompt,
84
  num_images_per_prompt=1,
85
  t2i_height=height,
@@ -91,51 +128,105 @@ def gen_img(
91
  return generated_images[0]
92
 
93
 
94
- if __name__ == "__main__":
95
-
96
- DESCRIPTION = """DEMO for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit"""
97
 
 
 
 
98
  image_dir = pathlib.Path('images')
99
- edit_examples = [[path.as_posix(), "let this case swim in the river", 20, 4.5, 1.2, 42] for path in sorted(image_dir.glob('*.png'))]
 
 
 
 
 
 
 
 
 
 
100
  gen_examples = [["View through the clouds at Earth from a plane", 512, 1024, 20, 6.5, 234]]
101
 
102
  with gr.Blocks() as demo:
103
- gr.Markdown(f"# {DESCRIPTION}")
104
 
105
  with gr.Tabs():
 
 
106
  with gr.Tab(label="Image Editing"):
 
 
107
  with gr.Row():
108
  with gr.Column():
109
  edit_input_image = gr.Image(label="Input", type="pil")
110
- edit_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt.\n")
111
- edit_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
112
- edit_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1)
113
- edit_image_guidance_scale = gr.Slider(label="Image Guidance Scale", minimum=0.1, maximum=30.0, value=1.2, step=0.1)
114
- edit_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
 
 
 
115
  edit_btn = gr.Button("Edit Image", variant="primary")
116
  with gr.Column():
117
  edit_output = gr.Image(label="Result", type="pil")
118
 
119
  gr.Examples(
120
- examples=edit_examples,
121
  inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
122
  )
123
 
124
  edit_btn.click(
125
- fn=edit_img,
126
  inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
127
  outputs=[edit_output],
128
  )
129
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  with gr.Tab(label="Image Generation"):
 
 
131
  with gr.Row():
132
  with gr.Column():
133
- gen_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt.\n")
134
- gen_height = gr.Slider(label="Height", minimum=64, maximum=2048, value=1024, step=64)
135
- gen_width = gr.Slider(label="Width", minimum=64, maximum=2048, value=1024, step=64)
136
- gen_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
137
- gen_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.5, step=0.1)
138
- gen_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
 
 
 
139
  gen_btn = gr.Button("Generate Image", variant="primary")
140
  with gr.Column():
141
  gen_output = gr.Image(label="Result", type="pil")
 
1
  import spaces
2
  import gradio as gr
3
  import os
4
+ import pathlib
 
5
  import random
6
  import torch
7
  import numpy as np
8
+ from huggingface_hub import snapshot_download
9
 
10
  from vibe.editor import ImageEditor
11
 
12
  MAX_SEED = np.iinfo(np.int32).max
13
 
14
+ # --- Loading Pipelines ---
15
 
16
+ def load_original_pipeline():
17
+ print("Loading Original Model...")
18
  model_path = snapshot_download(
19
  repo_id="iitolstykh/VIBE-Image-Edit",
20
  repo_type="model",
21
  )
22
+ # Load model with default guidance settings for the original
23
+ editor = ImageEditor(
 
24
  checkpoint_path=model_path,
25
  image_guidance_scale=1.2,
26
  guidance_scale=4.5,
27
  num_inference_steps=20,
28
  device="cuda",
29
  )
30
+ print(f"Original Model loaded. Device: {editor.pipe.device}")
31
+ return editor
32
 
33
+ def load_distilled_pipeline():
34
+ print("Loading Distilled CFG Model...")
35
+ model_path = snapshot_download(
36
+ repo_id="iitolstykh/VIBE-Image-Edit-DistilledCFG",
37
+ repo_type="model",
38
+ )
39
+ # Load model with disabled cfg.
40
+ editor = ImageEditor(
41
+ checkpoint_path=model_path,
42
+ num_inference_steps=20,
43
+ guidance_scale=0.0,
44
+ image_guidance_scale=0.0,
45
+ device="cuda",
46
+ )
47
+ print(f"Distilled Model loaded. Device: {editor.pipe.device}")
48
+ return editor
49
 
50
+ # Initialize pipelines globally
51
+ pipeline_original = load_original_pipeline()
52
+ pipeline_distilled = load_distilled_pipeline()
53
 
 
 
 
 
54
 
55
+ # --- Inference Functions ---
56
 
57
  @spaces.GPU(duration=180)
58
+ def edit_img_original(
59
  pil_image,
60
  edit_prompt: str,
61
  sample_steps,
 
64
  seed,
65
  progress=gr.Progress(track_tqdm=True),
66
  ):
67
+ """Inference for the original model with CFG."""
68
+ if pil_image is None:
69
+ raise gr.Error("Please upload an image.")
70
+
71
+ edited_image = pipeline_original.generate_edited_image(
72
  instruction=edit_prompt,
73
  conditioning_image=pil_image,
74
  num_images_per_prompt=1,
 
80
  return edited_image[0]
81
 
82
 
83
+ @spaces.GPU(duration=120)
84
+ def edit_img_distilled(
85
+ pil_image,
86
+ edit_prompt: str,
87
+ sample_steps,
88
+ seed,
89
+ progress=gr.Progress(track_tqdm=True),
90
+ ):
91
+ """Inference for the distilled model (No CFG)."""
92
+ if pil_image is None:
93
+ raise gr.Error("Please upload an image.")
94
+
95
+ # Note: No guidance_scale or image_guidance_scale passed
96
+ edited_image = pipeline_distilled.generate_edited_image(
97
+ instruction=edit_prompt,
98
+ conditioning_image=pil_image,
99
+ num_images_per_prompt=1,
100
+ num_inference_steps=sample_steps,
101
+ guidance_scale=0.0,
102
+ image_guidance_scale=0.0,
103
+ seed=seed,
104
+ )
105
+ return edited_image[0]
106
+
107
+
108
  @spaces.GPU(duration=180)
109
  def gen_img(
110
  prompt: str,
 
115
  seed: int,
116
  progress=gr.Progress(track_tqdm=True),
117
  ):
118
+ """Text-to-Image using the original model."""
119
+ generated_images = pipeline_original.generate_edited_image(
120
  instruction=prompt,
121
  num_images_per_prompt=1,
122
  t2i_height=height,
 
128
  return generated_images[0]
129
 
130
 
131
+ # --- UI Construction ---
 
 
132
 
133
+ if __name__ == "__main__":
134
+
135
+ # Pre-define examples
136
  image_dir = pathlib.Path('images')
137
+ if not image_dir.exists():
138
+ # Fallback if local images dir doesn't exist, though usually it should in the space
139
+ os.makedirs('images', exist_ok=True)
140
+
141
+ # [Image, Prompt, Steps, Guid_Scale, Img_Guid_Scale, Seed]
142
+ edit_examples_original = [[path.as_posix(), "let this case swim in the river", 20, 4.5, 1.2, 42] for path in sorted(image_dir.glob('*.png'))]
143
+
144
+ # [Image, Prompt, Steps, Seed] - Subset for distilled
145
+ edit_examples_distilled = [[path.as_posix(), "let this case swim in the river", 20, 42] for path in sorted(image_dir.glob('*.png'))]
146
+
147
+ # [Prompt, H, W, Steps, Scale, Seed]
148
  gen_examples = [["View through the clouds at Earth from a plane", 512, 1024, 20, 6.5, 234]]
149
 
150
  with gr.Blocks() as demo:
151
+ gr.Markdown("# VIBE: Visual Instruction Based Editor")
152
 
153
  with gr.Tabs():
154
+
155
+ # --- TAB 1: Original Image Editing ---
156
  with gr.Tab(label="Image Editing"):
157
+ gr.Markdown("### Image Editing demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit")
158
+
159
  with gr.Row():
160
  with gr.Column():
161
  edit_input_image = gr.Image(label="Input", type="pil")
162
+ edit_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt (e.g., 'Add a cat on the sofa')")
163
+
164
+ with gr.Accordion("Advanced Settings", open=True):
165
+ edit_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
166
+ edit_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1)
167
+ edit_image_guidance_scale = gr.Slider(label="Image Guidance Scale", minimum=0.1, maximum=30.0, value=1.2, step=0.1)
168
+ edit_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
169
+
170
  edit_btn = gr.Button("Edit Image", variant="primary")
171
  with gr.Column():
172
  edit_output = gr.Image(label="Result", type="pil")
173
 
174
  gr.Examples(
175
+ examples=edit_examples_original,
176
  inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
177
  )
178
 
179
  edit_btn.click(
180
+ fn=edit_img_original,
181
  inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
182
  outputs=[edit_output],
183
  )
184
 
185
+ # --- TAB 2: Distilled Image Editing ---
186
+ with gr.Tab(label="Image Editing [CFG Distill]"):
187
+ gr.Markdown("### Image Editing demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit-DistilledCFG")
188
+ gr.Markdown("*This model runs without CFG, providing faster inference.*")
189
+
190
+ with gr.Row():
191
+ with gr.Column():
192
+ distill_input_image = gr.Image(label="Input", type="pil")
193
+ distill_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt")
194
+
195
+ with gr.Accordion("Advanced Settings", open=True):
196
+ distill_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
197
+ # No Guidance Sliders here
198
+ distill_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
199
+
200
+ distill_btn = gr.Button("Edit Image (Fast)", variant="primary")
201
+ with gr.Column():
202
+ distill_output = gr.Image(label="Result", type="pil")
203
+
204
+ gr.Examples(
205
+ examples=edit_examples_distilled,
206
+ inputs=[distill_input_image, distill_prompt, distill_sample_steps, distill_seed],
207
+ )
208
+
209
+ distill_btn.click(
210
+ fn=edit_img_distilled,
211
+ inputs=[distill_input_image, distill_prompt, distill_sample_steps, distill_seed],
212
+ outputs=[distill_output],
213
+ )
214
+
215
+ # --- TAB 3: Text to Image ---
216
  with gr.Tab(label="Image Generation"):
217
+ gr.Markdown("### Text-to-image demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit")
218
+
219
  with gr.Row():
220
  with gr.Column():
221
+ gen_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt")
222
+
223
+ with gr.Accordion("Advanced Settings", open=True):
224
+ gen_height = gr.Slider(label="Height", minimum=64, maximum=2048, value=1024, step=64)
225
+ gen_width = gr.Slider(label="Width", minimum=64, maximum=2048, value=1024, step=64)
226
+ gen_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
227
+ gen_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.5, step=0.1)
228
+ gen_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
229
+
230
  gen_btn = gr.Button("Generate Image", variant="primary")
231
  with gr.Column():
232
  gen_output = gr.Image(label="Result", type="pil")