Spaces:

Reboot2004
/

Image-to-Image

Sleeping

App Files Files Community

Reboot2004 commited on 25 days ago

Commit

2a0297b

verified ·

1 Parent(s): a521929

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -35

app.py CHANGED Viewed

@@ -1,62 +1,69 @@
 import gradio as gr
 import torch
-from diffusers import StableDiffusionInstructPix2PixPipeline
 from PIL import Image
 # ---------------------------------------------------------------------------
-# Instruct-Pix2Pix
-# This model is specifically trained to edit images while preserving
-# their exact structure. It takes much longer on CPU (approx 2-3 mins),
-# but the text and layout will look significantly more accurate.
 # ---------------------------------------------------------------------------
-print("Loading Instruct-Pix2Pix Model... (This takes a moment)")
-pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
-    "timbrooks/instruct-pix2pix",
-    torch_dtype=torch.float32,
-    safety_checker=None
-)
 def process_image(init_image, prompt, strength, steps):
-    if init_image is None:
-        return None
-    print(f"Received request: '{prompt}'")
-    init_image = init_image.convert("RGB")
-    # Resize keeping aspect ratio to max 512 for CPU memory limits
-    init_image.thumbnail((512, 512))
-    # InstructPix2Pix uses image_guidance_scale.
-    # 1.5 strictly preserves original image structure.
     image = pipe(
         prompt=prompt,
         image=init_image,
-        num_inference_steps=20, # Higher quality, but takes 2 mins on free CPU
-        image_guidance_scale=1.5,
-        guidance_scale=7.5
     ).images[0]
     return image
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
-    gr.Markdown("# 🪄 WiggleAgent // Instruct-Pix2Pix Backend")
-    gr.Markdown("Powered by Instruct-Pix2Pix (High Structural Preservation)")
     with gr.Row():
         with gr.Column():
-            input_image = gr.Image(type="pil", label="Input Image (Your Screenshot)")
-            prompt = gr.Textbox(label="Prompt", value="cyberpunk style, dark neon city")
-            # These sliders exist so the Gradio Client in main.py doesn't crash,
-            # but the actual function hardcodes the best values for Instruct-Pix2Pix.
-            strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, label="Ignored by Backend")
-            steps = gr.Slider(minimum=1, maximum=4, value=2, label="Ignored by Backend")
-            btn = gr.Button("Generate", variant="primary")
         with gr.Column():
-            output_image = gr.Image(type="pil", label="Output Image")
     btn.click(
         fn=process_image,

 import gradio as gr
 import torch
+from diffusers import FluxImg2ImgPipeline
 from PIL import Image
+import os
 # ---------------------------------------------------------------------------
+# FLUX.1 Kontext [dev]
+# ⚠️ CRITICAL WARNING: This model is 12B parameters (~24GB).
+# 1. This model WILL NOT RUN on a free Hugging Face CPU space (16GB RAM limit).
+# 2. It requires a paid GPU instance (A10G, L4, or A100).
+# 3. This model is GATED. You must accept the license on Hugging Face
+#    and add your HF_TOKEN as a Secret in your Space settings.
 # ---------------------------------------------------------------------------
+HF_TOKEN = os.getenv("HF_TOKEN")
+print("Attempting to load FLUX.1 Kontext [dev]...")
+try:
+    # We use bfloat16 for memory efficiency, but this requires a GPU.
+    # On CPU, we must use float32, but it will almost certainly OOM.
+    pipe = FluxImg2ImgPipeline.from_pretrained(
+        "black-forest-labs/FLUX.1-Kontext-dev",
+        torch_dtype=torch.float32,
+        use_auth_token=HF_TOKEN
+    )
+    # pipe.to("cuda") # Uncomment if using a GPU Space
+except Exception as e:
+    print(f"FAILED TO LOAD MODEL: {e}")
+    pipe = None
 def process_image(init_image, prompt, strength, steps):
+    if pipe is None:
+        return Image.new("RGB", (512, 512), (50, 0, 0)) # Error indicator
+    init_image = init_image.convert("RGB").resize((512, 512))
+    # Generate
     image = pipe(
         prompt=prompt,
         image=init_image,
+        num_inference_steps=int(steps),
+        strength=float(strength),
+        guidance_scale=3.5
     ).images[0]
     return image
 with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
+    gr.Markdown("# 🪄 WiggleAgent // FLUX Kontext SOTA")
+    gr.Markdown("Using FLUX.1-Kontext-dev for high-fidelity in-context editing.")
+    if not HF_TOKEN:
+        gr.Markdown("## ⚠️ ERROR: HF_TOKEN Secret not found in Space Settings!")
     with gr.Row():
         with gr.Column():
+            input_image = gr.Image(type="pil", label="Input Screen")
+            prompt = gr.Textbox(label="Edit Prompt", value="redesign the UI with a cyberpunk aesthetic")
+            strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, label="Edit Strength")
+            steps = gr.Slider(minimum=10, maximum=30, value=20, label="Steps")
+            btn = gr.Button("Transform", variant="primary")
         with gr.Column():
+            output_image = gr.Image(type="pil", label="Result")
     btn.click(
         fn=process_image,