Reboot2004 commited on
Commit
2a0297b
·
verified ·
1 Parent(s): a521929

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -35
app.py CHANGED
@@ -1,62 +1,69 @@
1
  import gradio as gr
2
  import torch
3
- from diffusers import StableDiffusionInstructPix2PixPipeline
4
  from PIL import Image
 
5
 
6
  # ---------------------------------------------------------------------------
7
- # Instruct-Pix2Pix
8
- # This model is specifically trained to edit images while preserving
9
- # their exact structure. It takes much longer on CPU (approx 2-3 mins),
10
- # but the text and layout will look significantly more accurate.
 
 
11
  # ---------------------------------------------------------------------------
12
 
13
- print("Loading Instruct-Pix2Pix Model... (This takes a moment)")
14
- pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
15
- "timbrooks/instruct-pix2pix",
16
- torch_dtype=torch.float32,
17
- safety_checker=None
18
- )
 
 
 
 
 
 
 
 
 
 
19
 
20
  def process_image(init_image, prompt, strength, steps):
21
- if init_image is None:
22
- return None
23
 
24
- print(f"Received request: '{prompt}'")
25
- init_image = init_image.convert("RGB")
26
-
27
- # Resize keeping aspect ratio to max 512 for CPU memory limits
28
- init_image.thumbnail((512, 512))
29
 
30
- # InstructPix2Pix uses image_guidance_scale.
31
- # 1.5 strictly preserves original image structure.
32
  image = pipe(
33
  prompt=prompt,
34
  image=init_image,
35
- num_inference_steps=20, # Higher quality, but takes 2 mins on free CPU
36
- image_guidance_scale=1.5,
37
- guidance_scale=7.5
38
  ).images[0]
39
 
40
  return image
41
 
42
  with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
43
- gr.Markdown("# 🪄 WiggleAgent // Instruct-Pix2Pix Backend")
44
- gr.Markdown("Powered by Instruct-Pix2Pix (High Structural Preservation)")
 
 
 
45
 
46
  with gr.Row():
47
  with gr.Column():
48
- input_image = gr.Image(type="pil", label="Input Image (Your Screenshot)")
49
- prompt = gr.Textbox(label="Prompt", value="cyberpunk style, dark neon city")
50
-
51
- # These sliders exist so the Gradio Client in main.py doesn't crash,
52
- # but the actual function hardcodes the best values for Instruct-Pix2Pix.
53
- strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, label="Ignored by Backend")
54
- steps = gr.Slider(minimum=1, maximum=4, value=2, label="Ignored by Backend")
55
-
56
- btn = gr.Button("Generate", variant="primary")
57
 
58
  with gr.Column():
59
- output_image = gr.Image(type="pil", label="Output Image")
60
 
61
  btn.click(
62
  fn=process_image,
 
1
  import gradio as gr
2
  import torch
3
+ from diffusers import FluxImg2ImgPipeline
4
  from PIL import Image
5
+ import os
6
 
7
  # ---------------------------------------------------------------------------
8
+ # FLUX.1 Kontext [dev]
9
+ # ⚠️ CRITICAL WARNING: This model is 12B parameters (~24GB).
10
+ # 1. This model WILL NOT RUN on a free Hugging Face CPU space (16GB RAM limit).
11
+ # 2. It requires a paid GPU instance (A10G, L4, or A100).
12
+ # 3. This model is GATED. You must accept the license on Hugging Face
13
+ # and add your HF_TOKEN as a Secret in your Space settings.
14
  # ---------------------------------------------------------------------------
15
 
16
+ HF_TOKEN = os.getenv("HF_TOKEN")
17
+
18
+ print("Attempting to load FLUX.1 Kontext [dev]...")
19
+
20
+ try:
21
+ # We use bfloat16 for memory efficiency, but this requires a GPU.
22
+ # On CPU, we must use float32, but it will almost certainly OOM.
23
+ pipe = FluxImg2ImgPipeline.from_pretrained(
24
+ "black-forest-labs/FLUX.1-Kontext-dev",
25
+ torch_dtype=torch.float32,
26
+ use_auth_token=HF_TOKEN
27
+ )
28
+ # pipe.to("cuda") # Uncomment if using a GPU Space
29
+ except Exception as e:
30
+ print(f"FAILED TO LOAD MODEL: {e}")
31
+ pipe = None
32
 
33
  def process_image(init_image, prompt, strength, steps):
34
+ if pipe is None:
35
+ return Image.new("RGB", (512, 512), (50, 0, 0)) # Error indicator
36
 
37
+ init_image = init_image.convert("RGB").resize((512, 512))
 
 
 
 
38
 
39
+ # Generate
 
40
  image = pipe(
41
  prompt=prompt,
42
  image=init_image,
43
+ num_inference_steps=int(steps),
44
+ strength=float(strength),
45
+ guidance_scale=3.5
46
  ).images[0]
47
 
48
  return image
49
 
50
  with gr.Blocks(theme=gr.themes.Monochrome()) as demo:
51
+ gr.Markdown("# 🪄 WiggleAgent // FLUX Kontext SOTA")
52
+ gr.Markdown("Using FLUX.1-Kontext-dev for high-fidelity in-context editing.")
53
+
54
+ if not HF_TOKEN:
55
+ gr.Markdown("## ⚠️ ERROR: HF_TOKEN Secret not found in Space Settings!")
56
 
57
  with gr.Row():
58
  with gr.Column():
59
+ input_image = gr.Image(type="pil", label="Input Screen")
60
+ prompt = gr.Textbox(label="Edit Prompt", value="redesign the UI with a cyberpunk aesthetic")
61
+ strength = gr.Slider(minimum=0.1, maximum=1.0, value=0.6, label="Edit Strength")
62
+ steps = gr.Slider(minimum=10, maximum=30, value=20, label="Steps")
63
+ btn = gr.Button("Transform", variant="primary")
 
 
 
 
64
 
65
  with gr.Column():
66
+ output_image = gr.Image(type="pil", label="Result")
67
 
68
  btn.click(
69
  fn=process_image,