File size: 8,067 Bytes
bc870ea
 
 
3416431
bc870ea
 
 
ed52ef5
db1e6d4
bc870ea
 
f43190d
bc870ea
 
 
 
 
 
 
668b8e4
bc870ea
 
 
 
 
 
 
 
668b8e4
 
bc870ea
 
668b8e4
f77dded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc870ea
e971244
 
 
 
bc870ea
e971244
bc870ea
 
 
 
 
668b8e4
 
bc870ea
 
 
 
 
 
668b8e4
 
 
 
 
 
bc870ea
 
 
 
 
 
d548d36
bc870ea
d548d36
 
e9fd080
 
d548d36
1f823d6
e9fd080
0bb39a0
d548d36
e9fd080
bc870ea
e9fd080
 
bc870ea
 
 
 
9e24507
 
 
 
 
bc870ea
ed52ef5
 
bc870ea
3dd110d
bc870ea
 
 
3dd110d
9e24507
 
 
3dd110d
bc870ea
 
 
 
668b8e4
 
3dd110d
668b8e4
 
 
bc870ea
 
 
668b8e4
bc870ea
 
0bb39a0
f43190d
bc870ea
3dd110d
9e24507
 
5711ff0
 
 
 
 
 
3dd110d
 
bc870ea
 
98ce228
 
 
 
4acf073
 
ed52ef5
b6e6a9c
5711ff0
0618cf9
b6e6a9c
 
2e47b99
ed52ef5
 
41a21ba
3dd110d
0bb39a0
 
 
 
 
bc870ea
 
668b8e4
0bb39a0
bc870ea
 
 
3dd110d
75a8eb7
090a80f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
# -*- coding: utf-8 -*-
# ZenCtrl Inpainting Playground (Baseten backend)

#import spaces
import os, json, base64, requests
from io import BytesIO
from PIL import Image, ImageDraw
import gradio as gr
import replicate

# ────────── Secrets & endpoints ──────────
BASETEN_MODEL_URL = os.getenv("BASETEN_MODEL_URL")
BASETEN_API_KEY = os.getenv("BASETEN_API_KEY")
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN")

# ────────── Globals ──────────
ADAPTER_SIZE = 1024
css = "#col-container {margin:0 auto; max-width:960px;}"

# Background generation via Replicate
def _gen_bg(prompt: str):
    url = replicate.run(
        "google/imagen-4-fast",
        input={"prompt": prompt or "cinematic background", "aspect_ratio": "1:1"},
    )
    url = url[0] if isinstance(url, list) else url
    return Image.open(BytesIO(requests.get(url, timeout=120).content)).convert("RGB")

# Main processing function
def process_image_and_text(subject_image, adapter_dict, prompt, _unused1, _unused2, size=ADAPTER_SIZE, rank=10.0):
    seed, guidance_scale, steps = 42, 2.5, 28

    adapter_image = adapter_dict["image"] if isinstance(adapter_dict, dict) else adapter_dict
    if isinstance(adapter_dict, dict):  # Image/sketch input
        adapter_image = adapter_dict["image"]
        adapter_mask = adapter_dict["mask"]
        if adapter_mask is not None:
            # convert mask -> solid green rectangle on copy of adapter_image
            m = adapter_mask.convert("L").point(lambda p: 255 if p else 0)
            bbox = m.getbbox()
            if bbox:
                rect = Image.new("L", m.size, 0)
                ImageDraw.Draw(rect).rectangle(bbox, fill=255)
                m = rect
            green = Image.new("RGB", adapter_image.size, "#00FF00")
            adapter_image = Image.composite(green, adapter_image, m)
    else:
        adapter_image = adapter_dict

    # def prep(img: Image.Image):
    #     w, h = img.size
    #     m = min(w, h)
    #     return img.crop(((w - m) // 2, (h - m) // 2, (w + m) // 2, (h + m) // 2)).resize((size, size), Image.LANCZOS)
    def prep(img: Image.Image):
        return img.resize((size, size), Image.LANCZOS)

    subj_proc = prep(subject_image)
    adap_proc = prep(adapter_image)

    def b64(img):
        buf = BytesIO()
        img.save(buf, format="PNG")
        return base64.b64encode(buf.getvalue()).decode()

    payload = {
        "prompt": prompt,
        "subject_image": b64(subj_proc),
        "adapter_image": b64(adap_proc),
        "height": size,
        "width": size,
        "steps": steps,
        "seed": seed,
        "guidance_scale": guidance_scale,
        "rank": rank,
    }

    headers = {"Content-Type": "application/json"}
    if BASETEN_API_KEY:
        headers["Authorization"] = f"Api-Key {BASETEN_API_KEY}"

    resp = requests.post(BASETEN_MODEL_URL, headers=headers, json=payload, timeout=180)
    resp.raise_for_status()
    data = resp.json()
    
    # Extract base64 image from 'blended' key
    if "blended" in data:
        try:
            blended_bytes = base64.b64decode(data["raw_result"])
            raw_img = Image.open(BytesIO(blended_bytes)).convert("RGB")
            return raw_img, raw_img
        except Exception:
            raise gr.Error("Failed to decode 'blended' image from Baseten response.")
    else:
        raise gr.Error("Baseten response missing 'blended' image.")



# ────────── Header HTML ──────────
header_html = """
<h1>ZenCtrl Inpainting Beta</h1>
<div align=\"center\" style=\"line-height: 1;\">
  <a href=\"https://discord.com/invite/b9RuYQ3F8k\" target=\"_blank\" style=\"margin: 10px;\" name=\"discord_link\"><img src=\"https://img.shields.io/badge/Discord-Join-7289da.svg?logo=discord\" alt=\"Discord\" style=\"display: inline-block; vertical-align: middle;\"></a>
  <a href=\"https://fotographer.ai/zen-control\" target=\"_blank\" style=\"margin: 10px;\" name=\"lp_link\"><img src=\"https://img.shields.io/badge/Website-Landing_Page-blue\" alt=\"LP\" style=\"display: inline-block; vertical-align: middle;\"></a>
  <a href=\"https://x.com/FotographerAI\" target=\"_blank\" style=\"margin: 10px;\" name=\"twitter_link\"><img src=\"https://img.shields.io/twitter/follow/FotographerAI?style=social\" alt=\"X\" style=\"display: inline-block; vertical-align: middle;\"></a>
</div>
"""

# ────────── Gradio UI ──────────
with gr.Blocks(css=css, title="ZenCtrl Inpainting") as demo:
    raw_state = gr.State()

    gr.HTML(header_html)
    gr.Markdown(
        "**Generate context-aware images of your subject with ZenCtrl’s inpainting playground.** Upload a subject + optional mask, write a prompt, and hit **Generate**.   \n"
        "Open *Advanced Settings* for an AI-generated background.  \n\n"
        "**Note:** The model was trained mainly on interior scenes and other *rigid* objects. Results on people or highly deformable items may contain visual distortions."
    )

    with gr.Row():
        with gr.Column(scale=2, elem_id="col-container"):
            subj_img = gr.Image(type="pil", label="Subject image")
            ref_img = gr.Image(type="pil", label="Background / Mask image", tool="sketch", brush_color="#00FF00")
            ref_img_ex = gr.Image(type="pil", visible=False)

            # Removed Florence-SAM
            promptbox = gr.Textbox(label="Generation prompt", value="furniture", lines=2)
            run_btn = gr.Button("Generate", variant="primary")

            with gr.Accordion("Advanced Settings", open=False):
                bgprompt = gr.Textbox(label="Background Prompt", value="Scandinavian living room …")
                bg_btn = gr.Button("Generate BG")

        with gr.Column(scale=2):
            output_img = gr.Image(label="Output Image")
            bg_img = gr.Image(label="Background", visible=True)

    # ---------- Example wrapper ---------------------------------
    def _load_and_show(subj_path, bg_path, prompt_text):
        out_path = subj_path.replace(".png", "_out.png")
        return (
            Image.open(subj_path),
            Image.open(bg_path),
            prompt_text,
            Image.open(out_path)
        ) 

    # ---------- Examples ----------------------------------------
    gr.Examples(
        examples=[
            ["examples/sofa1.png", "examples/sofa1_bg.png", "add the sofa"], #"examples/sofa1_out.png"],
            ["examples/sofa2.png", "examples/sofa2_bg.png", "add this sofa"],# "examples/sofa2_out.png"],
            ["examples/chair1.png", "examples/chair1_bg.png", "add the chair"], #"examples/chair1_out.png"],
            ["examples/console_table.png", "examples/console_table_bg.png", "Scandinavian console table against a gallery-style wall filled with abstract framed art,"], #"examples/console_table_out.png"],
            ["examples/office_chair.png", "examples/office_chair_bg.png", "office chair"], #"examples/office_chair_out.png"],
            ["examples/car.png", "examples/car_bg.png", "car on the road"], #"examples/car_out.png"],
        ],
        inputs=[subj_img, ref_img, promptbox],   # three inputs
        outputs=[subj_img, ref_img, promptbox, output_img],                    # 
        fn=_load_and_show,
        label="Visual Presets – Subject Β· Background Β· Prompt",
        examples_per_page="all",
        cache_examples="lazy",
    )


    # ---------- Buttons & interactions --------------------------
    # run_btn.click(
    #     process_image_and_text,
    #     inputs=[subj_img, ref_img, promptbox, gr.State(False), gr.State("")],
    #     outputs=[gallery, raw_state]
    # )
    run_btn.click(
        process_image_and_text,
        inputs=[subj_img, ref_img, promptbox, gr.State(False), gr.State("")],
        outputs=[output_img, raw_state]
    )
    bg_btn.click(_gen_bg, inputs=[bgprompt], outputs=[bg_img])

# ---------------- Launch ---------------------------------------
if __name__ == "__main__":
    demo.launch()