Spaces:

fotographerai
/

Zenctrl-Inpaint

Running

File size: 10,945 Bytes

bc870ea
 
 
3416431
bc870ea
 
 
ed52ef5
db1e6d4
bc870ea
 
f43190d
bc870ea
 
 
 
 
 
 
668b8e4
bc870ea
c861fd1
 
 
 
 
 
 
 
 
bc870ea
 
 
 
 
 
 
668b8e4
 
c861fd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc870ea
 
668b8e4
f77dded
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bc870ea
e971244
 
 
 
bc870ea
e971244
bc870ea
 
 
 
 
668b8e4
 
bc870ea
 
 
 
 
 
668b8e4
 
 
 
 
 
bc870ea
 
 
 
 
 
d548d36
bc870ea
d548d36
 
e9fd080
 
d548d36
1f823d6
e9fd080
0bb39a0
d548d36
e9fd080
bc870ea
e9fd080
 
bc870ea
 
 
 
9e24507
 
 
 
 
bc870ea
ed52ef5
 
bc870ea
3dd110d
bc870ea
 
 
3dd110d
c861fd1
9e24507
 
3dd110d
bc870ea
 
 
 
668b8e4
 
3dd110d
668b8e4
 
 
bc870ea
 
 
668b8e4
bc870ea
 
0bb39a0
f43190d
bc870ea
3dd110d
2f44119
 
 
 
 
 
 
 
 
b57663f
2f44119
c861fd1
 
2f44119
 
 
 
 
 
c861fd1
 
 
 
 
 
 
 
2f44119
 
b57663f
2f44119
 
 
 
6bb4fbf
 
 
 
 
 
 
 
b57663f
 
f1e6a3a
b57663f
97a7c26
 
 
 
 
 
 
 
b57663f
7054a43
6bb4fbf
 
b68b0af
6bb4fbf
 
ed52ef5
b57663f
3dd110d
0bb39a0
 
 
 
 
bc870ea
 
668b8e4
0bb39a0
bc870ea
 
 
3dd110d
75a8eb7
c861fd1

# -*- coding: utf-8 -*-
# ZenCtrl Inpainting Playground (Baseten backend)

#import spaces
import os, json, base64, requests
from io import BytesIO
from PIL import Image, ImageDraw
import gradio as gr
import replicate

# ────────── Secrets & endpoints ──────────
BASETEN_MODEL_URL = os.getenv("BASETEN_MODEL_URL")
BASETEN_API_KEY = os.getenv("BASETEN_API_KEY")
REPLICATE_TOKEN = os.getenv("REPLICATE_API_TOKEN")

# ────────── Globals ──────────
ADAPTER_SIZE = 1024
css = "#col-container {margin:0 auto; max-width:960px;}"

# Background generation via Replicate
def _gen_bg(prompt: str):
    """
    Generate a background image using Replicate's imagen-4-fast model.
    
    Args:
        prompt: Text description for the background to generate
        
    Returns:
        PIL.Image: Generated background image in RGB format
    """
    url = replicate.run(
        "google/imagen-4-fast",
        input={"prompt": prompt or "cinematic background", "aspect_ratio": "1:1"},
    )
    url = url[0] if isinstance(url, list) else url
    return Image.open(BytesIO(requests.get(url, timeout=120).content)).convert("RGB")

# Main processing function
def process_image_and_text(subject_image, adapter_dict, prompt, _unused1, _unused2, size=ADAPTER_SIZE, rank=10.0):
    """
    Process subject and adapter images with text prompt to generate inpainted result.
    
    Args:
        subject_image: PIL.Image of the subject to be placed
        adapter_dict: Either a PIL.Image or dict with 'image' and 'mask' keys for background/sketch
        prompt: Text description for the generation
        _unused1: Unused parameter (placeholder)
        _unused2: Unused parameter (placeholder)
        size: Target size for processing (default: ADAPTER_SIZE)
        rank: Rank parameter for the model (default: 10.0)
        
    Returns:
        tuple: (output_image, raw_image) - both are the same PIL.Image result
    """
    seed, guidance_scale, steps = 42, 2.5, 28

    adapter_image = adapter_dict["image"] if isinstance(adapter_dict, dict) else adapter_dict
    if isinstance(adapter_dict, dict):  # Image/sketch input
        adapter_image = adapter_dict["image"]
        adapter_mask = adapter_dict["mask"]
        if adapter_mask is not None:
            # convert mask -> solid green rectangle on copy of adapter_image
            m = adapter_mask.convert("L").point(lambda p: 255 if p else 0)
            bbox = m.getbbox()
            if bbox:
                rect = Image.new("L", m.size, 0)
                ImageDraw.Draw(rect).rectangle(bbox, fill=255)
                m = rect
            green = Image.new("RGB", adapter_image.size, "#00FF00")
            adapter_image = Image.composite(green, adapter_image, m)
    else:
        adapter_image = adapter_dict

    # def prep(img: Image.Image):
    #     w, h = img.size
    #     m = min(w, h)
    #     return img.crop(((w - m) // 2, (h - m) // 2, (w + m) // 2, (h + m) // 2)).resize((size, size), Image.LANCZOS)
    def prep(img: Image.Image):
        return img.resize((size, size), Image.LANCZOS)

    subj_proc = prep(subject_image)
    adap_proc = prep(adapter_image)

    def b64(img):
        buf = BytesIO()
        img.save(buf, format="PNG")
        return base64.b64encode(buf.getvalue()).decode()

    payload = {
        "prompt": prompt,
        "subject_image": b64(subj_proc),
        "adapter_image": b64(adap_proc),
        "height": size,
        "width": size,
        "steps": steps,
        "seed": seed,
        "guidance_scale": guidance_scale,
        "rank": rank,
    }

    headers = {"Content-Type": "application/json"}
    if BASETEN_API_KEY:
        headers["Authorization"] = f"Api-Key {BASETEN_API_KEY}"

    resp = requests.post(BASETEN_MODEL_URL, headers=headers, json=payload, timeout=180)
    resp.raise_for_status()
    data = resp.json()
    
    # Extract base64 image from 'blended' key
    if "blended" in data:
        try:
            blended_bytes = base64.b64decode(data["raw_result"])
            raw_img = Image.open(BytesIO(blended_bytes)).convert("RGB")
            return raw_img, raw_img
        except Exception:
            raise gr.Error("Failed to decode 'blended' image from Baseten response.")
    else:
        raise gr.Error("Baseten response missing 'blended' image.")



# ────────── Header HTML ──────────
header_html = """
<h1>ZenCtrl Inpainting Beta</h1>
<div align=\"center\" style=\"line-height: 1;\">
  <a href=\"https://discord.com/invite/b9RuYQ3F8k\" target=\"_blank\" style=\"margin: 10px;\" name=\"discord_link\"><img src=\"https://img.shields.io/badge/Discord-Join-7289da.svg?logo=discord\" alt=\"Discord\" style=\"display: inline-block; vertical-align: middle;\"></a>
  <a href=\"https://fotographer.ai/zen-control\" target=\"_blank\" style=\"margin: 10px;\" name=\"lp_link\"><img src=\"https://img.shields.io/badge/Website-Landing_Page-blue\" alt=\"LP\" style=\"display: inline-block; vertical-align: middle;\"></a>
  <a href=\"https://x.com/FotographerAI\" target=\"_blank\" style=\"margin: 10px;\" name=\"twitter_link\"><img src=\"https://img.shields.io/twitter/follow/FotographerAI?style=social\" alt=\"X\" style=\"display: inline-block; vertical-align: middle;\"></a>
</div>
"""

# ────────── Gradio UI ──────────
with gr.Blocks(css=css, title="ZenCtrl Inpainting") as demo:
    raw_state = gr.State()

    gr.HTML(header_html)
    gr.Markdown(
        "**Generate context-aware images of your subject with ZenCtrl's inpainting playground.** Upload a subject + optional mask, write a prompt, and hit **Generate**.   \n"
        "Open *Advanced Settings* for an AI-generated background.  \n\n"
        "**Note:** The model was trained mainly on interior scenes and other *rigid* objects. Results on people or highly deformable items may contain visual distortions."
    )

    with gr.Row():
        with gr.Column(scale=2, elem_id="col-container"):
            subj_img = gr.Image(type="pil", label="Subject image")
            ref_img = gr.Image(type="pil", label="Background / Mask image", tool="sketch", brush_color="#00FF00")
            ref_img_ex = gr.Image(type="pil", visible=False)

            # Removed Florence-SAM
            promptbox = gr.Textbox(label="Generation prompt", value="furniture", lines=2)
            run_btn = gr.Button("Generate", variant="primary")

            with gr.Accordion("Advanced Settings", open=False):
                bgprompt = gr.Textbox(label="Background Prompt", value="Scandinavian living room …")
                bg_btn = gr.Button("Generate BG")

        with gr.Column(scale=2):
            output_img = gr.Image(label="Output Image")
            bg_img = gr.Image(label="Background", visible=True)

    # ---------- Example wrapper ---------------------------------
    # def _load_and_show(subj_path, bg_path, prompt_text):
    #     out_path = subj_path.replace(".png", "_out.png")
    #     return (
    #         Image.open(subj_path),                             # → gr.Image widget ok
    #         {"image": Image.open(bg_path), "mask": None},      # ← **dict for sketch!**
    #         prompt_text,                                       # → gr.Textbox
    #         Image.open(out_path)                               # → gr.Image output
    #     )

    def _load_and_show(subj_path, bg_path, prompt_text):
        """
        Load example images and prompt for display in the interface.
        
        Takes the three values coming from an Examples row
        and returns FOUR objects – one for every output widget:
          1. subject PIL image               -> subj_img
          2. dict for the sketch component   -> ref_img
          3. prompt string                   -> promptbox
          4. pre-rendered result PIL         -> output_img
          
        Args:
            subj_path: Path to subject image file
            bg_path: Path to background image file
            prompt_text: Example prompt text
            
        Returns:
            tuple: (subject_image, sketch_dict, prompt, output_image)
        """
        out_path = subj_path.replace(".png", "_out.png")          # your saved result
        return (
            Image.open(subj_path),                                # 1️⃣ subject
            {"image": Image.open(bg_path), "mask": None},         # 2️⃣ sketch dict
            prompt_text,                                          # 3️⃣ prompt
            Image.open(out_path)                                  # 4️⃣ output image
        )

    def ex(subj, bg, prompt):
        return [
            Image.open(subj),
            {"image": Image.open(bg), "mask": None},
            prompt
        ]

    # ---------- Examples ----------------------------------------
    gr.Examples(
        examples=[
            ["examples/sofa1_1.png", "examples/sofa1_bg.png", "add the sofa", "examples/sofa1_out.png"],
            ["examples/sofa2.png", "examples/sofa2_bg.png", "add this sofa", "examples/sofa2_out.png"],
            ["examples/chair1.png", "examples/chair1_bg.png", "add the chair", "examples/chair1_out.png"],
            ["examples/console_table.png", "examples/console_table_bg.png", "Scandinavian console table against a gallery-style wall filled with abstract framed art,", "examples/console_table_out.png"],
            ["examples/office_chair.png", "examples/office_chair_bg.png", "office chair", "examples/office_chair_out.png"],
            ["examples/office_chair1.png", "examples/office_chair1_bg.png", "Executive mesh chair in a modern home office, with matte black wall panels, built-in shelves, ", "examples/office_chair1_out.png"],
            ["examples/bed.png", "examples/bed_in.png", "Low platform bed in a Japandi-style bedroom, surrounded by floating nightstands", "examples/bed_out.png"],
            ["examples/car.png", "examples/car_bg.png", "car on the road", "examples/car_out.png"],
        ],
        inputs        = [subj_img, ref_img, promptbox, output_img],
        outputs       = [subj_img, ref_img, promptbox, output_img],
        fn            = _load_and_show,
        #preprocess    = False,     # data already widget-ready
        cache_examples=False
        )


    # ---------- Buttons & interactions --------------------------
    # run_btn.click(
    #     process_image_and_text,
    #     inputs=[subj_img, ref_img, promptbox, gr.State(False), gr.State("")],
    #     outputs=[gallery, raw_state]
    # )
    run_btn.click(
        process_image_and_text,
        inputs=[subj_img, ref_img, promptbox, gr.State(False), gr.State("")],
        outputs=[output_img, raw_state]
    )
    bg_btn.click(_gen_bg, inputs=[bgprompt], outputs=[bg_img])

# ---------------- Launch ---------------------------------------
if __name__ == "__main__":
    demo.launch(mcp_server=True)