Merlimhhs commited on
Commit
fe45438
·
verified ·
1 Parent(s): de33349

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +266 -63
app.py CHANGED
@@ -1,95 +1,298 @@
1
- import gradio as gr
2
- import torch
3
- import numpy as np
4
- from PIL import Image, ImageFilter, ImageOps
5
- from transformers import pipeline
6
- from pathlib import Path
7
  import zipfile
8
  import shutil
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
- print("DEPTH AAA ENGINE (PURE MODE)")
11
 
12
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
13
 
14
- pipe = pipeline(
15
- task="depth-estimation",
16
- model="depth-anything/Depth-Anything-V2-Base-hf",
17
- device=device
18
- )
19
 
20
- # =========================
21
- # NORMALIZAÇÃO PROFISSIONAL
22
- # =========================
23
- def normalize_depth(depth):
24
- d = np.array(depth).astype(np.float32)
 
 
 
 
 
 
 
 
 
25
 
26
- # normalização estável
27
- d = (d - d.min()) / (d.max() - d.min() + 1e-6)
 
28
 
29
- return d
 
 
 
 
 
 
 
30
 
31
- # =========================
32
- # REFINO (SEM QUEBRAR O MAPA)
33
- # =========================
34
- def refine_depth(depth_np):
35
- img = Image.fromarray((depth_np * 255).astype(np.uint8))
36
 
37
- # 🔥 suavização leve (ANTI-BANDING)
38
- img = img.filter(ImageFilter.GaussianBlur(1.0))
 
 
 
 
39
 
40
- # 🔥 contraste leve (mantém detalhe)
41
- img = ImageOps.autocontrast(img, cutoff=0.3)
42
 
43
- return img
 
 
 
 
 
 
 
 
44
 
45
- # =========================
46
- # PIPELINE
47
- # =========================
48
- def process(files):
49
- if not files:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
 
52
- out = Path("depth_clean_output")
53
- if out.exists():
54
- shutil.rmtree(out)
55
- out.mkdir()
 
 
56
 
57
- zip_path = "DEPTH_AAA_ONLY.zip"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
 
59
- with zipfile.ZipFile(zip_path, 'w') as zipf:
60
- for file in files:
61
- name = Path(file.name).stem
 
62
 
63
- img = Image.open(file.name).convert("RGB")
64
 
65
- # 💥 DEPTH DIRETO DO BG (VOCÊ VAI FAZER ISSO CERTO)
66
- depth_raw = pipe(img)["depth"]
 
 
67
 
68
- # normalização real
69
- depth_np = normalize_depth(depth_raw)
70
 
71
- # refinamento seguro
72
- depth_final = refine_depth(depth_np)
73
 
74
- # salvar
75
- path = out / f"depth_{name}.png"
76
- depth_final.save(path)
77
 
78
- zipf.write(path, path.name)
 
79
 
80
- print("✅ DEPTH PERFEITO GERADO")
81
  return zip_path
82
 
83
- # =========================
84
  # UI
85
- # =========================
86
  with gr.Blocks() as demo:
87
- gr.Markdown("# 🧠 DEPTH AAA (PARALLAX READY)")
88
 
89
- inp = gr.File(file_count="multiple")
90
- out = gr.File()
91
 
92
- btn = gr.Button("GERAR DEPTH")
93
  btn.click(fn=process, inputs=inp, outputs=out)
94
 
95
- demo.launch()
 
1
+ import os
 
 
 
 
 
2
  import zipfile
3
  import shutil
4
+ import urllib.request
5
+ from pathlib import Path
6
+
7
+ import gradio as gr
8
+ import numpy as np
9
+ import cv2
10
+ import torch
11
+ from PIL import Image
12
+ from ultralytics import YOLO
13
+ from segment_anything import sam_model_registry, SamPredictor
14
+ from pymatting import estimate_alpha_cf, estimate_foreground_ml
15
+ from scipy.ndimage import binary_erosion, binary_dilation
16
 
17
+ print("CINEMA CHARACTER CUT (ONE PNG PER IMAGE)")
18
 
19
+ # -------------------------
20
+ # CONFIG
21
+ # -------------------------
22
+ DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
23
 
24
+ SAM_CKPT = "sam_vit_b_01ec64.pth"
25
+ SAM_URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
 
 
 
26
 
27
+ # COCO: person + common animals
28
+ TARGET_CLASS_IDS = {
29
+ 0, # person
30
+ 14, # bird
31
+ 15, # cat
32
+ 16, # dog
33
+ 17, # horse
34
+ 18, # sheep
35
+ 19, # cow
36
+ 20, # elephant
37
+ 21, # bear
38
+ 22, # zebra
39
+ 23, # giraffe
40
+ }
41
 
42
+ CONF_THRES = 0.18
43
+ BOX_PAD_RATIO = 0.08 # base padding relative to box size
44
+ MAX_SIDE_FOR_MATTING = 1400 # keeps the crop manageable
45
 
46
+ # -------------------------
47
+ # DOWNLOAD SAM CHECKPOINT
48
+ # -------------------------
49
+ def ensure_sam_checkpoint():
50
+ if not os.path.exists(SAM_CKPT):
51
+ print("Downloading SAM checkpoint...")
52
+ urllib.request.urlretrieve(SAM_URL, SAM_CKPT)
53
+ print("SAM checkpoint ready.")
54
 
55
+ ensure_sam_checkpoint()
 
 
 
 
56
 
57
+ # -------------------------
58
+ # MODELS
59
+ # -------------------------
60
+ sam = sam_model_registry["vit_b"](checkpoint=SAM_CKPT)
61
+ sam.to(DEVICE)
62
+ predictor = SamPredictor(sam)
63
 
64
+ yolo = YOLO("yolov8n.pt")
 
65
 
66
+ # -------------------------
67
+ # HELPERS
68
+ # -------------------------
69
+ def as_numpy_image(img_input):
70
+ if isinstance(img_input, str):
71
+ return np.array(Image.open(img_input).convert("RGB"))
72
+ if isinstance(img_input, Image.Image):
73
+ return np.array(img_input.convert("RGB"))
74
+ return np.array(Image.open(img_input.name).convert("RGB"))
75
 
76
+ def clip_box(box, w, h):
77
+ x1, y1, x2, y2 = box
78
+ x1 = max(0, min(w - 1, x1))
79
+ y1 = max(0, min(h - 1, y1))
80
+ x2 = max(1, min(w, x2))
81
+ y2 = max(1, min(h, y2))
82
+ if x2 <= x1 + 1:
83
+ x2 = min(w, x1 + 2)
84
+ if y2 <= y1 + 1:
85
+ y2 = min(h, y1 + 2)
86
+ return [x1, y1, x2, y2]
87
+
88
+ def pad_box(box, w, h, ratio=0.08):
89
+ x1, y1, x2, y2 = box
90
+ bw = x2 - x1
91
+ bh = y2 - y1
92
+ pad = int(max(bw, bh) * ratio)
93
+ return clip_box([x1 - pad, y1 - pad, x2 + pad, y2 + pad], w, h)
94
+
95
+ def detect_boxes(img):
96
+ results = yolo.predict(img, verbose=False)
97
+ h, w = img.shape[:2]
98
+ boxes = []
99
+
100
+ for r in results:
101
+ for b in r.boxes:
102
+ cls = int(b.cls.item())
103
+ conf = float(b.conf.item())
104
+ if cls in TARGET_CLASS_IDS and conf >= CONF_THRES:
105
+ x1, y1, x2, y2 = map(int, b.xyxy[0].tolist())
106
+ boxes.append(pad_box([x1, y1, x2, y2], w, h, BOX_PAD_RATIO))
107
+
108
+ # fallback only if detector misses everything
109
+ if not boxes:
110
+ cx1 = int(w * 0.20)
111
+ cy1 = int(h * 0.10)
112
+ cx2 = int(w * 0.80)
113
+ cy2 = int(h * 0.95)
114
+ boxes = [clip_box([cx1, cy1, cx2, cy2], w, h)]
115
+
116
+ # de-duplicate very similar boxes
117
+ uniq = []
118
+ for b in boxes:
119
+ if b not in uniq:
120
+ uniq.append(b)
121
+ return uniq
122
+
123
+ def predict_union_mask(img, boxes):
124
+ predictor.set_image(img)
125
+ h, w = img.shape[:2]
126
+ union = np.zeros((h, w), dtype=bool)
127
+
128
+ for box in boxes:
129
+ masks, scores, _ = predictor.predict(
130
+ box=np.array(box),
131
+ multimask_output=True
132
+ )
133
+ best_idx = int(np.argmax(scores))
134
+ union |= masks[best_idx].astype(bool)
135
+
136
+ return (union.astype(np.uint8) * 255)
137
+
138
+ def clean_mask(mask):
139
+ mask = (mask > 127).astype(np.uint8) * 255
140
+
141
+ # close small holes, then remove tiny noise
142
+ kernel_close = np.ones((7, 7), np.uint8)
143
+ kernel_open = np.ones((3, 3), np.uint8)
144
+
145
+ mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel_close)
146
+ mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel_open)
147
+
148
+ # tiny dilation to restore thin parts like fingers/hair edges
149
+ mask = cv2.dilate(mask, np.ones((3, 3), np.uint8), iterations=1)
150
+
151
+ # soften a little before matting
152
+ mask = cv2.GaussianBlur(mask, (5, 5), 0)
153
+ mask = (mask > 110).astype(np.uint8) * 255
154
+ return mask
155
+
156
+ def bbox_from_mask(mask):
157
+ ys, xs = np.where(mask > 0)
158
+ if len(xs) == 0 or len(ys) == 0:
159
  return None
160
+ x1, x2 = int(xs.min()), int(xs.max()) + 1
161
+ y1, y2 = int(ys.min()), int(ys.max()) + 1
162
+ return [x1, y1, x2, y2]
163
+
164
+ def make_trimap(mask):
165
+ binmask = (mask > 127)
166
+ if binmask.sum() == 0:
167
+ return np.full(mask.shape, 0.5, dtype=np.float64)
168
+
169
+ sure_fg = binary_erosion(binmask, iterations=3)
170
+ sure_bg = binary_dilation(~binmask, iterations=10)
171
+
172
+ trimap = np.full(mask.shape, 0.5, dtype=np.float64)
173
+ trimap[sure_fg] = 1.0
174
+ trimap[sure_bg] = 0.0
175
+ return trimap
176
+
177
+ def alpha_matte_crop(img_crop, mask_crop):
178
+ img_f = img_crop.astype(np.float64) / 255.0
179
+ trimap = make_trimap(mask_crop)
180
+
181
+ alpha = estimate_alpha_cf(img_f, trimap)
182
+ alpha = np.clip(alpha, 0.0, 1.0)
183
+
184
+ foreground, _ = estimate_foreground_ml(img_f, alpha, return_background=True)
185
+ foreground = np.clip(foreground, 0.0, 1.0)
186
+
187
+ rgba = np.dstack([foreground, alpha])
188
+ rgba = (np.clip(rgba, 0.0, 1.0) * 255.0).astype(np.uint8)
189
+ alpha_u8 = (np.clip(alpha, 0.0, 1.0) * 255.0).astype(np.uint8)
190
+ return rgba, alpha_u8
191
+
192
+ def fallback_rgba(img_crop, mask_crop):
193
+ alpha = clean_mask(mask_crop)
194
+ rgba = np.dstack([img_crop, alpha])
195
+ return rgba.astype(np.uint8), alpha.astype(np.uint8)
196
+
197
+ def process_one_image(img):
198
+ h, w = img.shape[:2]
199
+ boxes = detect_boxes(img)
200
+ raw_mask = predict_union_mask(img, boxes)
201
+ raw_mask = clean_mask(raw_mask)
202
 
203
+ bbox = bbox_from_mask(raw_mask)
204
+ if bbox is None:
205
+ # no mask at all; return fully transparent FG and original BG
206
+ fg = np.zeros((h, w, 4), dtype=np.uint8)
207
+ bg = img.copy()
208
+ return fg, bg
209
 
210
+ x1, y1, x2, y2 = bbox
211
+
212
+ # crop with padding for better matting
213
+ pad = int(max(x2 - x1, y2 - y1) * 0.12)
214
+ x1 = max(0, x1 - pad)
215
+ y1 = max(0, y1 - pad)
216
+ x2 = min(w, x2 + pad)
217
+ y2 = min(h, y2 + pad)
218
+
219
+ img_crop = img[y1:y2, x1:x2]
220
+ mask_crop = raw_mask[y1:y2, x1:x2]
221
+
222
+ # optional resize for stability/speed on very large crops
223
+ crop_h, crop_w = img_crop.shape[:2]
224
+ scale = 1.0
225
+ max_side = max(crop_h, crop_w)
226
+ if max_side > MAX_SIDE_FOR_MATTING:
227
+ scale = MAX_SIDE_FOR_MATTING / float(max_side)
228
+ new_w = max(2, int(crop_w * scale))
229
+ new_h = max(2, int(crop_h * scale))
230
+ img_crop_small = cv2.resize(img_crop, (new_w, new_h), interpolation=cv2.INTER_AREA)
231
+ mask_crop_small = cv2.resize(mask_crop, (new_w, new_h), interpolation=cv2.INTER_NEAREST)
232
+ try:
233
+ rgba_small, alpha_small = alpha_matte_crop(img_crop_small, mask_crop_small)
234
+ rgba = cv2.resize(rgba_small, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
235
+ alpha = cv2.resize(alpha_small, (crop_w, crop_h), interpolation=cv2.INTER_LINEAR)
236
+ except Exception:
237
+ rgba, alpha = fallback_rgba(img_crop, mask_crop)
238
+ else:
239
+ try:
240
+ rgba, alpha = alpha_matte_crop(img_crop, mask_crop)
241
+ except Exception:
242
+ rgba, alpha = fallback_rgba(img_crop, mask_crop)
243
+
244
+ # place crop back into full-size canvases
245
+ fg_full = np.zeros((h, w, 4), dtype=np.uint8)
246
+ fg_full[y1:y2, x1:x2] = rgba
247
+
248
+ alpha_full = np.zeros((h, w), dtype=np.uint8)
249
+ alpha_full[y1:y2, x1:x2] = alpha
250
+
251
+ bg = img.copy()
252
+ bg[alpha_full > 8] = 0
253
+
254
+ return fg_full, bg
255
+
256
+ def process(files):
257
+ if not files:
258
+ return None
259
 
260
+ out_dir = Path("cinema_cut_output")
261
+ if out_dir.exists():
262
+ shutil.rmtree(out_dir)
263
+ out_dir.mkdir(parents=True, exist_ok=True)
264
 
265
+ zip_path = "CINEMA_CHARACTER_CUT.zip"
266
 
267
+ with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zipf:
268
+ for item in files:
269
+ path = item if isinstance(item, str) else getattr(item, "name", item)
270
+ stem = Path(path).stem
271
 
272
+ img = as_numpy_image(path)
273
+ fg, bg = process_one_image(img)
274
 
275
+ fg_path = out_dir / f"{stem}_FG.png"
276
+ bg_path = out_dir / f"{stem}_BG.png"
277
 
278
+ Image.fromarray(fg).save(fg_path)
279
+ Image.fromarray(bg).save(bg_path)
 
280
 
281
+ zipf.write(fg_path, fg_path.name)
282
+ zipf.write(bg_path, bg_path.name)
283
 
 
284
  return zip_path
285
 
286
+ # -------------------------
287
  # UI
288
+ # -------------------------
289
  with gr.Blocks() as demo:
290
+ gr.Markdown("# 🎬 Cinema Character Cut")
291
 
292
+ inp = gr.File(file_count="multiple", type="filepath")
293
+ out = gr.File(label="Baixar ZIP")
294
 
295
+ btn = gr.Button("PROCESSAR")
296
  btn.click(fn=process, inputs=inp, outputs=out)
297
 
298
+ demo.launch()