Spaces:

Merlimhhs
/

Lhamaluy

Sleeping

App Files Files Community

Merlimhhs commited on 27 days ago

Commit

fe45438

verified ·

1 Parent(s): de33349

Update app.py

Browse files

Files changed (1) hide show

app.py +266 -63

app.py CHANGED Viewed

@@ -1,95 +1,298 @@
-import gradio as gr
-import torch
-import numpy as np
-from PIL import Image, ImageFilter, ImageOps
-from transformers import pipeline
-from pathlib import Path
 import zipfile
 import shutil
-print("DEPTH AAA ENGINE (PURE MODE)")
-device = "cuda" if torch.cuda.is_available() else "cpu"
-pipe = pipeline(
-    task="depth-estimation",
-    model="depth-anything/Depth-Anything-V2-Base-hf",
-    device=device
-)
-# =========================
-# NORMALIZAÇÃO PROFISSIONAL
-# =========================
-def normalize_depth(depth):
-    d = np.array(depth).astype(np.float32)
-    # normalização estável
-    d = (d - d.min()) / (d.max() - d.min() + 1e-6)
-    return d
-# =========================
-# REFINO (SEM QUEBRAR O MAPA)
-# =========================
-def refine_depth(depth_np):
-    img = Image.fromarray((depth_np * 255).astype(np.uint8))
-    # 🔥 suavização leve (ANTI-BANDING)
-    img = img.filter(ImageFilter.GaussianBlur(1.0))
-    # 🔥 contraste leve (mantém detalhe)
-    img = ImageOps.autocontrast(img, cutoff=0.3)
-    return img
-# =========================
-# PIPELINE
-# =========================
-def process(files):
-    if not files:
         return None
-    out = Path("depth_clean_output")
-    if out.exists():
-        shutil.rmtree(out)
-    out.mkdir()
-    zip_path = "DEPTH_AAA_ONLY.zip"
-    with zipfile.ZipFile(zip_path, 'w') as zipf:
-        for file in files:
-            name = Path(file.name).stem
-            img = Image.open(file.name).convert("RGB")
-            # 💥 DEPTH DIRETO DO BG (VOCÊ JÁ VAI FAZER ISSO CERTO)
-            depth_raw = pipe(img)["depth"]
-            # normalização real
-            depth_np = normalize_depth(depth_raw)
-            # refinamento seguro
-            depth_final = refine_depth(depth_np)
-            # salvar
-            path = out / f"depth_{name}.png"
-            depth_final.save(path)
-            zipf.write(path, path.name)
-    print("✅ DEPTH PERFEITO GERADO")
     return zip_path
-# =========================
 # UI
-# =========================
 with gr.Blocks() as demo:
-    gr.Markdown("# 🧠 DEPTH AAA (PARALLAX READY)")
-    inp = gr.File(file_count="multiple")
-    out = gr.File()
-    btn = gr.Button("GERAR DEPTH")
     btn.click(fn=process, inputs=inp, outputs=out)
-demo.launch()

+import os
 import zipfile
 import shutil
+import urllib.request
+from pathlib import Path
+import gradio as gr
+import numpy as np
+import cv2
+import torch
+from PIL import Image
+from ultralytics import YOLO
+from segment_anything import sam_model_registry, SamPredictor
+from pymatting import estimate_alpha_cf, estimate_foreground_ml
+from scipy.ndimage import binary_erosion, binary_dilation
+print("CINEMA CHARACTER CUT (ONE PNG PER IMAGE)")
+# -------------------------
+# CONFIG
+# -------------------------
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+SAM_CKPT = "sam_vit_b_01ec64.pth"
+SAM_URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
+# COCO: person + common animals
+TARGET_CLASS_IDS = {
+    0,   # person
+    14,  # bird
+    15,  # cat
+    16,  # dog
+    17,  # horse
+    18,  # sheep
+    19,  # cow
+    20,  # elephant
+    21,  # bear
+    22,  # zebra
+    23,  # giraffe
+}
+CONF_THRES = 0.18
+BOX_PAD_RATIO = 0.08  # base padding relative to box size
+MAX_SIDE_FOR_MATTING = 1400  # keeps the crop manageable
+# -------------------------
+# DOWNLOAD SAM CHECKPOINT
+# -------------------------
+def ensure_sam_checkpoint():
+    if not os.path.exists(SAM_CKPT):
+        print("Downloading SAM checkpoint...")
+        urllib.request.urlretrieve(SAM_URL, SAM_CKPT)
+        print("SAM checkpoint ready.")
+ensure_sam_checkpoint()
+# -------------------------
+# MODELS
+# -------------------------
+sam = sam_model_registry["vit_b"](checkpoint=SAM_CKPT)
+sam.to(DEVICE)
+predictor = SamPredictor(sam)
+yolo = YOLO("yolov8n.pt")
+# -------------------------
+# HELPERS
+# -------------------------
+def as_numpy_image(img_input):
+    if isinstance(img_input, str):
+        return np.array(Image.open(img_input).convert("RGB"))
+    if isinstance(img_input, Image.Image):
+        return np.array(img_input.convert("RGB"))
+    return np.array(Image.open(img_input.name).convert("RGB"))
+def clip_box(box, w, h):
+    x1, y1, x2, y2 = box
+    x1 = max(0, min(w - 1, x1))
+    y1 = max(0, min(h - 1, y1))
+    x2 = max(1, min(w, x2))
+    y2 = max(1, min(h, y2))
+    if x2 <= x1 + 1:
+        x2 = min(w, x1 + 2)
+    if y2 <= y1 + 1:
+        y2 = min(h, y1 + 2)
+    return [x1, y1, x2, y2]
+def pad_box(box, w, h, ratio=0.08):
+    x1, y1, x2, y2 = box
+    bw = x2 - x1
+    bh = y2 - y1
+    pad = int(max(bw, bh) * ratio)
+    return clip_box([x1 - pad, y1 - pad, x2 + pad, y2 + pad], w, h)
+def detect_boxes(img):
+    results = yolo.predict(img, verbose=False)
+    h, w = img.shape[:2]
+    boxes = []
+    for r in results:
+        for b in r.boxes:
+            cls = int(b.cls.item())
+            conf = float(b.conf.item())
+            if cls in TARGET_CLASS_IDS and conf >= CONF_THRES:
+                x1, y1, x2, y2 = map(int, b.xyxy[0].tolist())
+                boxes.append(pad_box([x1, y1, x2, y2], w, h, BOX_PAD_RATIO))
+    # fallback only if detector misses everything
+    if not boxes:
+        cx1 = int(w * 0.20)
+        cy1 = int(h * 0.10)
+        cx2 = int(w * 0.80)
+        cy2 = int(h * 0.95)
+        boxes = [clip_box([cx1, cy1, cx2, cy2], w, h)]
+    # de-duplicate very similar boxes
+    uniq = []
+    for b in boxes:
+        if b not in uniq:
+            uniq.append(b)
+    return uniq
+def predict_union_mask(img, boxes):
+    predictor.set_image(img)
+    h, w = img.shape[:2]
+    union = np.zeros((h, w), dtype=bool)
+    for box in boxes:
+        masks, scores, _ = predictor.predict(
+            box=np.array(box),
+            multimask_output=True
+        )
+        best_idx = int(np.argmax(scores))
+        union |= masks[best_idx].astype(bool)
+    return (union.astype(np.uint8) * 255)
+def clean_mask(mask):
+    mask = (mask > 127).astype(np.uint8) * 255
+    # close small holes, then remove tiny noise
+    kernel_close = np.ones((7, 7), np.uint8)
+    kernel_open = np.ones((3, 3), np.uint8)
+    mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
+    mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
+    # tiny dilation to restore thin parts like fingers/hair edges
+    mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=1)
+    # soften a little before matting
+    mask = cv2.GaussianBlur(mask, (5, 5), 0)
+    mask = (mask > 110).astype(np.uint8) * 255
+    return mask
+def bbox_from_mask(mask):
+    ys, xs = np.where(mask > 0)
+    if len(xs) == 0 or len(ys) == 0:
         return None
+    x1, x2 = int(xs.min()), int(xs.max()) + 1
+    y1, y2 = int(ys.min()), int(ys.max()) + 1
+    return [x1, y1, x2, y2]
+def make_trimap(mask):
+    binmask = (mask > 127)
+    if binmask.sum() == 0:
+        return np.full(mask.shape, 0.5, dtype=np.float64)
+    sure_fg = binary_erosion(binmask, iterations=3)
+    sure_bg = binary_dilation(~binmask, iterations=10)
+    trimap = np.full(mask.shape, 0.5, dtype=np.float64)
+    trimap[sure_fg] = 1.0
+    trimap[sure_bg] = 0.0
+    return trimap
+def alpha_matte_crop(img_crop, mask_crop):
+    img_f = img_crop.astype(np.float64) / 255.0
+    trimap = make_trimap(mask_crop)
+    alpha = estimate_alpha_cf(img_f, trimap)
+    alpha = np.clip(alpha, 0.0, 1.0)
+    foreground, _ = estimate_foreground_ml(img_f, alpha, return_background=True)
+    foreground = np.clip(foreground, 0.0, 1.0)
+    rgba = np.dstack([foreground, alpha])
+    rgba = (np.clip(rgba, 0.0, 1.0) * 255.0).astype(np.uint8)
+    alpha_u8 = (np.clip(alpha, 0.0, 1.0) * 255.0).astype(np.uint8)
+    return rgba, alpha_u8
+def fallback_rgba(img_crop, mask_crop):
+    alpha = clean_mask(mask_crop)
+    rgba = np.dstack([img_crop, alpha])
+    return rgba.astype(np.uint8), alpha.astype(np.uint8)
+def process_one_image(img):
+    h, w = img.shape[:2]
+    boxes = detect_boxes(img)
+    raw_mask = predict_union_mask(img, boxes)
+    raw_mask = clean_mask(raw_mask)
+    bbox = bbox_from_mask(raw_mask)
+    if bbox is None:
+        # no mask at all; return fully transparent FG and original BG
+        fg = np.zeros((h, w, 4), dtype=np.uint8)
+        bg = img.copy()
+        return fg, bg
+    x1, y1, x2, y2 = bbox
+    # crop with padding for better matting
+    pad = int(max(x2 - x1, y2 - y1) * 0.12)
+    x1 = max(0, x1 - pad)
+    y1 = max(0, y1 - pad)
+    x2 = min(w, x2 + pad)
+    y2 = min(h, y2 + pad)
+    img_crop = img[y1:y2, x1:x2]
+    mask_crop = raw_mask[y1:y2, x1:x2]
+    # optional resize for stability/speed on very large crops
+    crop_h, crop_w = img_crop.shape[:2]
+    scale = 1.0
+    max_side = max(crop_h, crop_w)
+    if max_side > MAX_SIDE_FOR_MATTING:
+        scale = MAX_SIDE_FOR_MATTING / float(max_side)
+        new_w = max(2, int(crop_w * scale))
+        new_h = max(2, int(crop_h * scale))
+        img_crop_small = cv2.resize(img_crop, (new_w, new_h), interpolation=cv2.INTER_AREA)
+        mask_crop_small = cv2.resize(mask_crop, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
+        try:
+            rgba_small, alpha_small = alpha_matte_crop(img_crop_small, mask_crop_small)
+            rgba = cv2.resize(rgba_small, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
+            alpha = cv2.resize(alpha_small, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
+        except Exception:
+            rgba, alpha = fallback_rgba(img_crop, mask_crop)
+    else:
+        try:
+            rgba, alpha = alpha_matte_crop(img_crop, mask_crop)
+        except Exception:
+            rgba, alpha = fallback_rgba(img_crop, mask_crop)
+    # place crop back into full-size canvases
+    fg_full = np.zeros((h, w, 4), dtype=np.uint8)
+    fg_full[y1:y2, x1:x2] = rgba
+    alpha_full = np.zeros((h, w), dtype=np.uint8)
+    alpha_full[y1:y2, x1:x2] = alpha
+    bg = img.copy()
+    bg[alpha_full > 8] = 0
+    return fg_full, bg
+def process(files):
+    if not files:
+        return None
+    out_dir = Path("cinema_cut_output")
+    if out_dir.exists():
+        shutil.rmtree(out_dir)
+    out_dir.mkdir(parents=True, exist_ok=True)
+    zip_path = "CINEMA_CHARACTER_CUT.zip"
+    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
+        for item in files:
+            path = item if isinstance(item, str) else getattr(item, "name", item)
+            stem = Path(path).stem
+            img = as_numpy_image(path)
+            fg, bg = process_one_image(img)
+            fg_path = out_dir / f"{stem}_FG.png"
+            bg_path = out_dir / f"{stem}_BG.png"
+            Image.fromarray(fg).save(fg_path)
+            Image.fromarray(bg).save(bg_path)
+            zipf.write(fg_path, fg_path.name)
+            zipf.write(bg_path, bg_path.name)
     return zip_path
+# -------------------------
 # UI
+# -------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎬 Cinema Character Cut")
+    inp = gr.File(file_count="multiple", type="filepath")
+    out = gr.File(label="Baixar ZIP")
+    btn = gr.Button("PROCESSAR")
     btn.click(fn=process, inputs=inp, outputs=out)
+demo.launch()