dee-Z-Image-Turbo

Sleeping

App Files Files Community

telcom commited on Dec 28, 2025

Commit

f762924

verified ·

1 Parent(s): e8d221e

Update app.py

Browse files

Files changed (1) hide show

app.py +123 -231

app.py CHANGED Viewed

@@ -1,280 +1,172 @@
-# app.py
-# ============================================================
-# SDXL Inpainting with ONE "paint-on-image" input (Gradio ImageEditor)
-# ============================================================
 import os
 import gc
 import random
-import warnings
-import logging
-# ---- Spaces GPU decorator (must be imported early) ----------
-try:
-    import spaces  # noqa: F401
-    SPACES_AVAILABLE = True
-except Exception:
-    SPACES_AVAILABLE = False
-import gradio as gr
 import numpy as np
-from PIL import Image, ImageChops
 import torch
-from huggingface_hub import login
-from diffusers import StableDiffusionXLInpaintPipeline
-# ============================================================
-# Config
-# ============================================================
-INPAINT_MODEL = os.environ.get(
-    "INPAINT_MODEL",
-    "diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
-).strip()
-HF_TOKEN = os.getenv("HF_TOKEN", "").strip()
-if HF_TOKEN:
-    login(token=HF_TOKEN)
-os.environ["TOKENIZERS_PARALLELISM"] = "false"
-warnings.filterwarnings("ignore")
-logging.getLogger("transformers").setLevel(logging.ERROR)
-MAX_SEED = np.iinfo(np.int32).max
 # ============================================================
-# Device & dtype
 # ============================================================
-cuda_available = torch.cuda.is_available()
-device = torch.device("cuda" if cuda_available else "cpu")
-if cuda_available and hasattr(torch.cuda, "is_bf16_supported") and torch.cuda.is_bf16_supported():
-    dtype = torch.bfloat16
-elif cuda_available:
-    dtype = torch.float16
-else:
-    dtype = torch.float32
-MAX_IMAGE_SIZE = 1536 if cuda_available else 768
-fallback_msg = "" if cuda_available else "GPU unavailable. Running in CPU fallback mode (slow)."
 # ============================================================
-# Load pipeline
 # ============================================================
-pipe = None
-model_loaded = False
-load_error = None
-try:
-    fp_kwargs = {"torch_dtype": dtype, "use_safetensors": True}
-    if HF_TOKEN:
-        fp_kwargs["token"] = HF_TOKEN
-    pipe = StableDiffusionXLInpaintPipeline.from_pretrained(INPAINT_MODEL, **fp_kwargs).to(device)
-    try:
-        pipe.set_progress_bar_config(disable=True)
-    except Exception:
-        pass
-    model_loaded = True
-except Exception as e:
-    load_error = repr(e)
-    model_loaded = False
 # ============================================================
-# Helpers
 # ============================================================
-def make_error_image(w: int, h: int) -> Image.Image:
-    return Image.new("RGB", (int(w), int(h)), (18, 18, 22))
-def _resize(img: Image.Image, w: int, h: int, is_mask: bool = False) -> Image.Image:
-    if img.size == (w, h):
-        return img
-    return img.resize((w, h), Image.NEAREST if is_mask else Image.LANCZOS)
-def extract_image_and_mask(editor_value):
-    """
-    Gradio ImageEditor returns a dict-like object (varies a bit by version),
-    usually containing:
-      - "background": PIL image (original)
-      - "layers": list of PIL images (paint strokes etc.)
-      - "composite": PIL image (background + paint)
-    We build a binary-ish mask from the difference between composite and background.
-    White = edit, Black = keep.
-    """
-    if editor_value is None:
-        return None, None
-    background = editor_value.get("background", None)
-    composite = editor_value.get("composite", None)
-    if not isinstance(background, Image.Image) or not isinstance(composite, Image.Image):
-        return None, None
-    background = background.convert("RGB")
-    composite = composite.convert("RGB")
-    # Mask = difference between composite and background (where user painted)
-    diff = ImageChops.difference(composite, background).convert("L")
-    # Make it more binary (stronger mask)
-    # Pixels > threshold become white (edit region)
-    threshold = 10
-    mask = diff.point(lambda p: 255 if p > threshold else 0).convert("L")
-    return background, mask
-# ============================================================
-# Inference
-# ============================================================
-def _infer_impl(
-    prompt,
-    negative_prompt,
-    seed,
-    randomize_seed,
-    width,
-    height,
-    guidance_scale,
-    num_inference_steps,
-    painted,  # ImageEditor value
-):
-    width = int(width)
-    height = int(height)
-    seed = int(seed)
-    if not model_loaded:
-        return make_error_image(width, height), f"Model load failed: {load_error}"
-    prompt = (prompt or "").strip()
-    if not prompt:
-        return make_error_image(width, height), "Error: prompt is empty."
-    init_image, mask_image = extract_image_and_mask(painted)
-    if init_image is None or mask_image is None:
-        return make_error_image(width, height), "Error: upload an image and paint over the area you want to change."
-    if randomize_seed:
-        seed = random.randint(0, MAX_SEED)
-    generator = torch.Generator(device=device).manual_seed(seed)
-    neg = (negative_prompt or "").strip()
-    if not neg:
-        neg = None
-    init_image = _resize(init_image, width, height, is_mask=False)
-    mask_image = _resize(mask_image, width, height, is_mask=True)
-    status = f"Seed: {seed}"
-    if fallback_msg:
-        status += f" | {fallback_msg}"
-    try:
-        with torch.inference_mode():
-            if device.type == "cuda":
-                with torch.autocast("cuda", dtype=dtype):
-                    out = pipe(
-                        prompt=prompt,
-                        negative_prompt=neg,
-                        image=init_image,
-                        mask_image=mask_image,
-                        width=width,
-                        height=height,
-                        guidance_scale=float(guidance_scale),
-                        num_inference_steps=int(num_inference_steps),
-                        generator=generator,
-                    )
-            else:
-                out = pipe(
-                    prompt=prompt,
-                    negative_prompt=neg,
-                    image=init_image,
-                    mask_image=mask_image,
-                    width=width,
-                    height=height,
-                    guidance_scale=float(guidance_scale),
-                    num_inference_steps=int(num_inference_steps),
-                    generator=generator,
-                )
-        return out.images[0], status
-    except Exception as e:
-        return make_error_image(width, height), f"Error: {type(e).__name__}: {e}"
-    finally:
-        gc.collect()
-        if device.type == "cuda":
-            torch.cuda.empty_cache()
-if SPACES_AVAILABLE:
-    @spaces.GPU
-    def infer(*args, **kwargs):
-        return _infer_impl(*args, **kwargs)
-else:
-    def infer(*args, **kwargs):
-        return _infer_impl(*args, **kwargs)
 # ============================================================
-# UI
 # ============================================================
-CSS = "body { background: #000; color: #fff; }"
-with gr.Blocks(title="Inpainting (paint-to-edit)") as demo:
-    gr.HTML(f"<style>{CSS}</style>")
-    if fallback_msg:
-        gr.Markdown(f"**{fallback_msg}**")
-    if not model_loaded:
-        gr.Markdown(f"⚠️ Model failed to load:\n\n{load_error}")
-    gr.Markdown("## Inpainting (paint the area you want to change)")
-    gr.Markdown("Upload an image, then paint over the clothing area, then describe the new clothing in the prompt.")
-    prompt = gr.Textbox(label="Prompt (describe new clothing)", lines=2)
-    negative_prompt = gr.Textbox(label="Negative prompt (optional)", lines=2)
-    painted = gr.ImageEditor(
-        label="Image editor (paint where you want to edit)",
-        type="pil",
-    )
-    run_button = gr.Button("Inpaint")
-    result = gr.Image(label="Result")
-    status = gr.Markdown("")
-    with gr.Accordion("Advanced Settings", open=False):
-        seed = gr.Slider(0, MAX_SEED, step=1, value=0, label="Seed")
-        randomize_seed = gr.Checkbox(value=True, label="Randomize seed")
-        width = gr.Slider(256, MAX_IMAGE_SIZE, step=64, value=1024, label="Width")
-        height = gr.Slider(256, MAX_IMAGE_SIZE, step=64, value=1024, label="Height")
-        guidance_scale = gr.Slider(0.0, 15.0, step=0.1, value=7.0, label="Guidance scale")
-        num_inference_steps = gr.Slider(1, 80, step=1, value=30, label="Steps")
-    run_button.click(
-        fn=infer,
-        inputs=[
-            prompt,
-            negative_prompt,
-            seed,
-            randomize_seed,
-            width,
-            height,
-            guidance_scale,
-            num_inference_steps,
-            painted,
-        ],
-        outputs=[result, status],
     )
-if __name__ == "__main__":
-    demo.queue().launch(ssr_mode=False)

 import os
 import gc
 import random
 import numpy as np
 import torch
+import gradio as gr
+import cv2
+from PIL import Image
+from diffusers import StableDiffusionXLInpaintPipeline
+from huggingface_hub import login
+# --- GroundingDINO ---
+from groundingdino.util.inference import load_model, predict
+# --- SAM ---
+from segment_anything import sam_model_registry, SamPredictor
 # ============================================================
+# CONFIG
 # ============================================================
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+if HF_TOKEN:
+    login(HF_TOKEN)
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+DTYPE = torch.float16 if DEVICE == "cuda" else torch.float32
+INPAINT_MODEL = "diffusers/stable-diffusion-xl-1.0-inpainting-0.1"
+# Clothing keywords (you can tune this)
+CLOTHING_PROMPT = "shirt, jacket, coat, dress, hoodie, sweater, t-shirt"
 # ============================================================
+# LOAD MODELS
 # ============================================================
+# --- GroundingDINO ---
+dino = load_model(
+    "GroundingDINO/groundingdino_swint_ogc.pth",
+    "GroundingDINO/groundingdino_swint_ogc.cfg.py",
+)
+# --- SAM ---
+sam = sam_model_registry["vit_h"](
+    checkpoint="sam_vit_h_4b8939.pth"
+)
+sam.to(DEVICE)
+sam_predictor = SamPredictor(sam)
+# --- SDXL Inpaint ---
+pipe = StableDiffusionXLInpaintPipeline.from_pretrained(
+    INPAINT_MODEL,
+    torch_dtype=DTYPE,
+    use_safetensors=True,
+).to(DEVICE)
+pipe.set_progress_bar_config(disable=True)
 # ============================================================
+# UTILS
 # ============================================================
+def pil_to_cv(img):
+    return cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
+def cv_to_pil(img):
+    return Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
+def detect_clothing_mask(image: Image.Image):
+    """Automatically detect clothing and return a binary mask"""
+    img_cv = pil_to_cv(image)
+    h, w, _ = img_cv.shape
+    boxes, _, _ = predict(
+        model=dino,
+        image=img_cv,
+        caption=CLOTHING_PROMPT,
+        box_threshold=0.35,
+        text_threshold=0.25,
+    )
+    if len(boxes) == 0:
+        return None
+    # Convert normalized boxes to pixels
+    boxes_px = []
+    for box in boxes:
+        x1 = int((box[0] - box[2] / 2) * w)
+        y1 = int((box[1] - box[3] / 2) * h)
+        x2 = int((box[0] + box[2] / 2) * w)
+        y2 = int((box[1] + box[3] / 2) * h)
+        boxes_px.append([x1, y1, x2, y2])
+    # SAM segmentation
+    sam_predictor.set_image(img_cv)
+    masks = []
+    for box in boxes_px:
+        mask, _, _ = sam_predictor.predict(
+            box=np.array(box),
+            multimask_output=False,
+        )
+        masks.append(mask[0])
+    # Merge all masks
+    full_mask = np.zeros((h, w), dtype=np.uint8)
+    for m in masks:
+        full_mask[m] = 255
+    return Image.fromarray(full_mask)
 # ============================================================
+# INFERENCE
 # ============================================================
+def replace_clothing(image, prompt, seed):
+    if image is None or not prompt:
+        return None, "Upload an image and provide a prompt."
+    mask = detect_clothing_mask(image)
+    if mask is None:
+        return image, "No clothing detected."
+    generator = torch.Generator(device=DEVICE).manual_seed(seed)
+    with torch.inference_mode():
+        out = pipe(
+            prompt=prompt,
+            image=image,
+            mask_image=mask,
+            guidance_scale=7.0,
+            num_inference_steps=30,
+            generator=generator,
+        )
+    gc.collect()
+    if DEVICE == "cuda":
+        torch.cuda.empty_cache()
+    return out.images[0], "Clothing replaced automatically."
+# ============================================================
+# UI
+# ============================================================
+with gr.Blocks(title="Auto Clothing Replacement") as demo:
+    gr.Markdown("## Automatic Clothing Replacement (no mask, no painting)")
+    gr.Markdown("Upload a photo, describe the new outfit. Everything else is automatic.")
+    image = gr.Image(type="pil", label="Input image")
+    prompt = gr.Textbox(label="New clothing description")
+    seed = gr.Slider(0, 999999, value=0, label="Seed")
+    run = gr.Button("Replace Clothing")
+    output = gr.Image(label="Result")
+    status = gr.Markdown()
+    run.click(
+        replace_clothing,
+        inputs=[image, prompt, seed],
+        outputs=[output, status],
     )
+demo.launch()