Spaces:

iitolstykh
/

VIBE-Image-Edit-DEMO

Running on Zero

App Files Files Community

iitolstykh commited on 20 days ago

Commit

7203e96

verified ·

1 Parent(s): 3df8c38

Update app.py

Browse files

Files changed (1) hide show

app.py +134 -43

app.py CHANGED Viewed

@@ -1,54 +1,61 @@
 import spaces
 import gradio as gr
 import os
-from huggingface_hub import snapshot_download
 import random
 import torch
 import numpy as np
-import pathlib
 from vibe.editor import ImageEditor
 MAX_SEED = np.iinfo(np.int32).max
-def load_pipeline():
     model_path = snapshot_download(
         repo_id="iitolstykh/VIBE-Image-Edit",
         repo_type="model",
     )
-    # Load model
-    editor_pipeline = ImageEditor(
         checkpoint_path=model_path,
         image_guidance_scale=1.2,
         guidance_scale=4.5,
         num_inference_steps=20,
         device="cuda",
     )
-    print(f"Model loaded. Model device: {editor_pipeline.pipe.device}")
-    return editor_pipeline
-pipeline = load_pipeline()
-def set_env(seed=0):
-    torch.manual_seed(seed)
-    torch.set_grad_enabled(False)
-def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    return seed
 @spaces.GPU(duration=180)
-def edit_img(
     pil_image,
     edit_prompt: str,
     sample_steps,
@@ -57,7 +64,11 @@ def edit_img(
     seed,
     progress=gr.Progress(track_tqdm=True),
 ):
-    edited_image = pipeline.generate_edited_image(
         instruction=edit_prompt,
         conditioning_image=pil_image,
         num_images_per_prompt=1,
@@ -69,6 +80,31 @@ def edit_img(
     return edited_image[0]
 @spaces.GPU(duration=180)
 def gen_img(
     prompt: str,
@@ -79,7 +115,8 @@ def gen_img(
     seed: int,
     progress=gr.Progress(track_tqdm=True),
 ):
-    generated_images = pipeline.generate_edited_image(
         instruction=prompt,
         num_images_per_prompt=1,
         t2i_height=height,
@@ -91,51 +128,105 @@ def gen_img(
     return generated_images[0]
-if __name__ == "__main__":
-    DESCRIPTION = """DEMO for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit"""
     image_dir = pathlib.Path('images')
-    edit_examples = [[path.as_posix(), "let this case swim in the river", 20, 4.5, 1.2, 42] for path in sorted(image_dir.glob('*.png'))]
     gen_examples = [["View through the clouds at Earth from a plane", 512, 1024, 20, 6.5, 234]]
     with gr.Blocks() as demo:
-        gr.Markdown(f"# {DESCRIPTION}")
         with gr.Tabs():
             with gr.Tab(label="Image Editing"):
                 with gr.Row():
                     with gr.Column():
                         edit_input_image = gr.Image(label="Input", type="pil")
-                        edit_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt.\n")
-                        edit_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
-                        edit_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1)
-                        edit_image_guidance_scale = gr.Slider(label="Image Guidance Scale", minimum=0.1, maximum=30.0, value=1.2, step=0.1)
-                        edit_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
                         edit_btn = gr.Button("Edit Image", variant="primary")
                     with gr.Column():
                         edit_output = gr.Image(label="Result", type="pil")
                 gr.Examples(
-                    examples=edit_examples,
                     inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
                 )
                 edit_btn.click(
-                    fn=edit_img,
                     inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
                     outputs=[edit_output],
                 )
             with gr.Tab(label="Image Generation"):
                 with gr.Row():
                     with gr.Column():
-                        gen_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt.\n")
-                        gen_height = gr.Slider(label="Height", minimum=64, maximum=2048, value=1024, step=64)
-                        gen_width = gr.Slider(label="Width", minimum=64, maximum=2048, value=1024, step=64)
-                        gen_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
-                        gen_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.5, step=0.1)
-                        gen_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
                         gen_btn = gr.Button("Generate Image", variant="primary")
                     with gr.Column():
                         gen_output = gr.Image(label="Result", type="pil")

 import spaces
 import gradio as gr
 import os
+import pathlib
 import random
 import torch
 import numpy as np
+from huggingface_hub import snapshot_download
 from vibe.editor import ImageEditor
 MAX_SEED = np.iinfo(np.int32).max
+# --- Loading Pipelines ---
+def load_original_pipeline():
+    print("Loading Original Model...")
     model_path = snapshot_download(
         repo_id="iitolstykh/VIBE-Image-Edit",
         repo_type="model",
     )
+    # Load model with default guidance settings for the original
+    editor = ImageEditor(
         checkpoint_path=model_path,
         image_guidance_scale=1.2,
         guidance_scale=4.5,
         num_inference_steps=20,
         device="cuda",
     )
+    print(f"Original Model loaded. Device: {editor.pipe.device}")
+    return editor
+def load_distilled_pipeline():
+    print("Loading Distilled CFG Model...")
+    model_path = snapshot_download(
+        repo_id="iitolstykh/VIBE-Image-Edit-DistilledCFG",
+        repo_type="model",
+    )
+    # Load model with disabled cfg.
+    editor = ImageEditor(
+        checkpoint_path=model_path,
+        num_inference_steps=20,
+        guidance_scale=0.0,
+        image_guidance_scale=0.0,
+        device="cuda",
+    )
+    print(f"Distilled Model loaded. Device: {editor.pipe.device}")
+    return editor
+# Initialize pipelines globally
+pipeline_original = load_original_pipeline()
+pipeline_distilled = load_distilled_pipeline()
+# --- Inference Functions ---
 @spaces.GPU(duration=180)
+def edit_img_original(
     pil_image,
     edit_prompt: str,
     sample_steps,
     seed,
     progress=gr.Progress(track_tqdm=True),
 ):
+    """Inference for the original model with CFG."""
+    if pil_image is None:
+        raise gr.Error("Please upload an image.")
+    edited_image = pipeline_original.generate_edited_image(
         instruction=edit_prompt,
         conditioning_image=pil_image,
         num_images_per_prompt=1,
     return edited_image[0]
+@spaces.GPU(duration=120)
+def edit_img_distilled(
+    pil_image,
+    edit_prompt: str,
+    sample_steps,
+    seed,
+    progress=gr.Progress(track_tqdm=True),
+):
+    """Inference for the distilled model (No CFG)."""
+    if pil_image is None:
+        raise gr.Error("Please upload an image.")
+    # Note: No guidance_scale or image_guidance_scale passed
+    edited_image = pipeline_distilled.generate_edited_image(
+        instruction=edit_prompt,
+        conditioning_image=pil_image,
+        num_images_per_prompt=1,
+        num_inference_steps=sample_steps,
+        guidance_scale=0.0,
+        image_guidance_scale=0.0,
+        seed=seed,
+    )
+    return edited_image[0]
 @spaces.GPU(duration=180)
 def gen_img(
     prompt: str,
     seed: int,
     progress=gr.Progress(track_tqdm=True),
 ):
+    """Text-to-Image using the original model."""
+    generated_images = pipeline_original.generate_edited_image(
         instruction=prompt,
         num_images_per_prompt=1,
         t2i_height=height,
     return generated_images[0]
+# --- UI Construction ---
+if __name__ == "__main__":
+    # Pre-define examples
     image_dir = pathlib.Path('images')
+    if not image_dir.exists():
+        # Fallback if local images dir doesn't exist, though usually it should in the space
+        os.makedirs('images', exist_ok=True)
+    # [Image, Prompt, Steps, Guid_Scale, Img_Guid_Scale, Seed]
+    edit_examples_original = [[path.as_posix(), "let this case swim in the river", 20, 4.5, 1.2, 42] for path in sorted(image_dir.glob('*.png'))]
+    # [Image, Prompt, Steps, Seed] - Subset for distilled
+    edit_examples_distilled = [[path.as_posix(), "let this case swim in the river", 20, 42] for path in sorted(image_dir.glob('*.png'))]
+    # [Prompt, H, W, Steps, Scale, Seed]
     gen_examples = [["View through the clouds at Earth from a plane", 512, 1024, 20, 6.5, 234]]
     with gr.Blocks() as demo:
+        gr.Markdown("# VIBE: Visual Instruction Based Editor")
         with gr.Tabs():
+            # --- TAB 1: Original Image Editing ---
             with gr.Tab(label="Image Editing"):
+                gr.Markdown("### Image Editing demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit")
                 with gr.Row():
                     with gr.Column():
                         edit_input_image = gr.Image(label="Input", type="pil")
+                        edit_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt (e.g., 'Add a cat on the sofa')")
+                        with gr.Accordion("Advanced Settings", open=True):
+                            edit_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
+                            edit_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=4.5, step=0.1)
+                            edit_image_guidance_scale = gr.Slider(label="Image Guidance Scale", minimum=0.1, maximum=30.0, value=1.2, step=0.1)
+                            edit_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
                         edit_btn = gr.Button("Edit Image", variant="primary")
                     with gr.Column():
                         edit_output = gr.Image(label="Result", type="pil")
                 gr.Examples(
+                    examples=edit_examples_original,
                     inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
                 )
                 edit_btn.click(
+                    fn=edit_img_original,
                     inputs=[edit_input_image, edit_prompt, edit_sample_steps, edit_guidance_scale, edit_image_guidance_scale, edit_seed],
                     outputs=[edit_output],
                 )
+            # --- TAB 2: Distilled Image Editing ---
+            with gr.Tab(label="Image Editing [CFG Distill]"):
+                gr.Markdown("### Image Editing demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit-DistilledCFG")
+                gr.Markdown("*This model runs without CFG, providing faster inference.*")
+                with gr.Row():
+                    with gr.Column():
+                        distill_input_image = gr.Image(label="Input", type="pil")
+                        distill_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt")
+                        with gr.Accordion("Advanced Settings", open=True):
+                            distill_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
+                            # No Guidance Sliders here
+                            distill_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
+                        distill_btn = gr.Button("Edit Image (Fast)", variant="primary")
+                    with gr.Column():
+                        distill_output = gr.Image(label="Result", type="pil")
+                gr.Examples(
+                    examples=edit_examples_distilled,
+                    inputs=[distill_input_image, distill_prompt, distill_sample_steps, distill_seed],
+                )
+                distill_btn.click(
+                    fn=edit_img_distilled,
+                    inputs=[distill_input_image, distill_prompt, distill_sample_steps, distill_seed],
+                    outputs=[distill_output],
+                )
+            # --- TAB 3: Text to Image ---
             with gr.Tab(label="Image Generation"):
+                gr.Markdown("### Text-to-image demo for VIBE-Image-Edit model: https://huggingface.co/iitolstykh/VIBE-Image-Edit")
                 with gr.Row():
                     with gr.Column():
+                        gen_prompt = gr.Textbox(label="Prompt", placeholder="Please enter your prompt")
+                        with gr.Accordion("Advanced Settings", open=True):
+                            gen_height = gr.Slider(label="Height", minimum=64, maximum=2048, value=1024, step=64)
+                            gen_width = gr.Slider(label="Width", minimum=64, maximum=2048, value=1024, step=64)
+                            gen_sample_steps = gr.Slider(label="Sample Steps", minimum=1, maximum=100, value=20, step=1)
+                            gen_guidance_scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=6.5, step=0.1)
+                            gen_seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=42)
                         gen_btn = gr.Button("Generate Image", variant="primary")
                     with gr.Column():
                         gen_output = gr.Image(label="Result", type="pil")