Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import numpy as np | |
| from PIL import Image | |
| import cv2 | |
| import zipfile, os, tempfile | |
| from typing import List, Tuple | |
| # ---------------- Utils ---------------- | |
| def _iou(a, b): | |
| """Calcula la intersección sobre la unión (IoU) para dos cajas.""" | |
| ax, ay, aw, ah = a | |
| bx, by, bw, bh = b | |
| inter_w = max(0, min(ax+aw, bx+bw) - max(ax, bx)) | |
| inter_h = max(0, min(ay+ah, by+bh) - max(ay, by)) | |
| inter = inter_w * inter_h | |
| if inter == 0: return 0.0 | |
| union = aw*ah + bw*bh - inter | |
| return inter / union | |
| def _nms(boxes: List[Tuple[int,int,int,int]], thr=0.5): | |
| """Non-Maximum Suppression para eliminar cajas duplicadas.""" | |
| keep = [] | |
| for b in boxes: | |
| if all(_iou(b, k) < thr for k in keep): | |
| keep.append(b) | |
| return keep | |
| def _largest_component_bbox(mask): | |
| """Devuelve la caja delimitadora del componente conectado más grande.""" | |
| cnts, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if not cnts: return None | |
| c = max(cnts, key=cv2.contourArea) | |
| return cv2.boundingRect(c) | |
| def _background_from_corners(img_np): | |
| """Estima el color de fondo promediando las 4 esquinas.""" | |
| corners = np.array([ | |
| img_np[0,0,:3], img_np[0,-1,:3], | |
| img_np[-1,0,:3], img_np[-1,-1,:3] | |
| ], dtype=np.float32) | |
| return corners.mean(0).astype(np.uint8) | |
| def _auto_diff_threshold(img_np, bg_rgb): | |
| """Calcula automáticamente el umbral de diferencia de color.""" | |
| diff = np.linalg.norm(img_np[:,:,:3].astype(np.int16) - bg_rgb.astype(np.int16), axis=2) | |
| diff_u8 = np.clip(diff, 0, 255).astype(np.uint8) | |
| thr_val, _ = cv2.threshold(diff_u8, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) | |
| return float(thr_val) | |
| # ---------------- Máscaras de primer plano ---------------- | |
| def _foreground_mask_hybrid(img_np, bg_rgb, diff_thr): | |
| """Genera máscara basada en diferencia de color y saturación HSV.""" | |
| diff = np.linalg.norm(img_np[:,:,:3].astype(np.int16) - bg_rgb.astype(np.int16), axis=2) | |
| mask_diff = (diff > max(20.0, min(80.0, diff_thr))).astype(np.uint8) * 255 | |
| bgr = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR) | |
| hsv = cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV) | |
| H,S,V = cv2.split(hsv) | |
| S_thr = int(max(25, min(110, np.percentile(S, 70)))) | |
| V_thr = int(max(60, min(200, np.percentile(V, 40)))) | |
| mask_sat = cv2.inRange(hsv, (0, S_thr, V_thr), (179, 255, 255)) | |
| mask = cv2.bitwise_or(mask_diff, mask_sat) | |
| k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7)) | |
| mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k) | |
| mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, k) | |
| return mask | |
| def _foreground_mask_kmeans_lab(img_np): | |
| """Genera máscara usando clustering K-Means en espacio de color LAB (más lento pero preciso).""" | |
| bgr = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR) | |
| lab = cv2.cvtColor(bgr, cv2.COLOR_BGR2LAB) | |
| H, W = lab.shape[:2] | |
| Z = lab.reshape(-1,3).astype(np.float32) | |
| N = Z.shape[0] | |
| sample_idx = np.linspace(0, N-1, min(60000, N), dtype=np.int32) | |
| Zs = Z[sample_idx] | |
| criteria = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 20, 1.0) | |
| _compact, labels, centers = cv2.kmeans(Zs, 3, None, criteria, 2, cv2.KMEANS_PP_CENTERS) | |
| dists = np.linalg.norm(Z[:,None,:] - centers[None,:,:], axis=2) | |
| full_labels = np.argmin(dists, axis=1).reshape(H, W) | |
| areas = np.array([(full_labels==i).sum() for i in range(3)], dtype=np.float32) | |
| varL = np.array([np.var(Z[full_labels.reshape(-1)==i, 0]) for i in range(3)], dtype=np.float32) | |
| area_norm = areas / (areas.max()+1e-6) | |
| var_norm = varL / (varL.max()+1e-6) | |
| score = -area_norm + 0.5*var_norm | |
| bg_id = int(np.argmin(score)) | |
| mask = (full_labels != bg_id).astype(np.uint8) * 255 | |
| k = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (7,7)) | |
| mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, k) | |
| mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, k) | |
| return mask | |
| # ---------------- Helpers ---------------- | |
| def _fill_holes(mask): | |
| """Rellena huecos internos en una máscara binaria.""" | |
| h, w = mask.shape | |
| flood = np.zeros((h+2, w+2), np.uint8) | |
| inv = cv2.bitwise_not(mask) | |
| cv2.floodFill(inv, flood, (0, 0), 255) | |
| holes = cv2.bitwise_not(inv) | |
| return cv2.bitwise_or(mask, holes) | |
| def _split_touching_blocks(roi_mask, min_gap_ratio=0.04): | |
| """Separa bloques verticalmente si están pegados pero hay un ligero estrechamiento.""" | |
| m = roi_mask.copy() | |
| H, W = m.shape | |
| row_fill = (m > 0).sum(axis=1) / max(1, W) | |
| gap_rows = np.where(row_fill <= min_gap_ratio)[0] | |
| if gap_rows.size == 0: | |
| return [m] | |
| masks = [] | |
| start = 0 | |
| for gap in gap_rows: | |
| if gap - start > 5: | |
| submask = np.zeros_like(m) | |
| submask[start:gap, :] = m[start:gap, :] | |
| if submask.any(): | |
| masks.append(submask) | |
| start = gap+1 | |
| submask = np.zeros_like(m) | |
| submask[start:, :] = m[start:, :] | |
| if submask.any(): | |
| masks.append(submask) | |
| return masks if masks else [m] | |
| # ---------------- REFINAMIENTO DE MÁSCARA (CORREGIDO) ---------------- | |
| def _refine_mask(dirty_mask): | |
| """ | |
| Limpia la máscara final para el recorte. | |
| 1. CIERRE (CLOSE): Aplica 'pegamento' para cerrar paredes rotas o bordes finos. | |
| 2. Rellena SOLIDEZ: Dibuja el contorno externo relleno para tapar inputs trasparentes. | |
| 3. Elimina BORDES: Aplica erosión para quitar el halo blanco/gris. | |
| """ | |
| # --- PASO 1: Morphological Closing (SOLUCIÓN AL BLOQUE ROTO) --- | |
| # Esto conecta partes del bloque que están casi tocándose pero separadas por | |
| # un píxel claro (común en bordes de inputs o brillos). | |
| k_close = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)) | |
| closed_mask = cv2.morphologyEx(dirty_mask, cv2.MORPH_CLOSE, k_close) | |
| # --- PASO 2: Rellenar inputs (SOLUCIÓN AL INPUT TRANSPARENTE) --- | |
| # Buscamos contornos sobre la máscara YA CERRADA | |
| cnts, _ = cv2.findContours(closed_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| if not cnts: | |
| return dirty_mask # Fallback si algo sale mal | |
| # Tomamos el contorno más grande (el bloque principal) | |
| c = max(cnts, key=cv2.contourArea) | |
| # Creamos la máscara sólida final | |
| solid_mask = np.zeros_like(dirty_mask) | |
| cv2.drawContours(solid_mask, [c], -1, 255, thickness=cv2.FILLED) | |
| # --- PASO 3: Erosión (SOLUCIÓN AL HALO BLANCO) --- | |
| # Quitamos 1 píxel del borde para eliminar el antialiasing sucio | |
| kernel_erode = np.ones((3,3), np.uint8) | |
| eroded_mask = cv2.erode(solid_mask, kernel_erode, iterations=1) | |
| return eroded_mask | |
| # ---------------- Detección ---------------- | |
| def _detect_blocks(img_np, fg_mask): | |
| """Detecta bloques candidatos basándose en la máscara inicial.""" | |
| cnts, _ = cv2.findContours(fg_mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| H, W = img_np.shape[:2] | |
| boxes = [] | |
| for c in cnts: | |
| x,y,w,h = cv2.boundingRect(c) | |
| roi = np.zeros((h,w), np.uint8) | |
| cv2.drawContours(roi, [c - [x,y]], -1, 255, thickness=cv2.FILLED) | |
| roi = _fill_holes(roi) | |
| parts = _split_touching_blocks(roi, min_gap_ratio=0.04) | |
| for part in parts: | |
| if part.sum() == 0: | |
| continue | |
| cx,cy,cw,ch = cv2.boundingRect(part) | |
| area = int(part.astype(bool).sum()) | |
| rect_area = cw*ch | |
| rectangularity = area / max(1.0, rect_area) | |
| # Filtros de tamaño y forma para descartar ruido | |
| if area < 1200 or cw < 16 or ch < 16: | |
| continue | |
| if rectangularity < 0.30: | |
| continue | |
| if rect_area > 0.92*W*H: # Ignorar si ocupa casi toda la pantalla (probablemente fondo mal detectado) | |
| continue | |
| boxes.append((x+cx, y+cy, cw, ch)) | |
| boxes = _nms(boxes, thr=0.30) | |
| return boxes | |
| # ---------------- Anti-ghost ---------------- | |
| def _bleed_colors(bgr, mask_255): | |
| """Expande el color hacia afuera para evitar bordes blancos al recortar.""" | |
| outside = cv2.bitwise_not(mask_255) | |
| return cv2.inpaint(bgr, outside, 3, cv2.INPAINT_TELEA) | |
| # ---------------- Mask Picker ---------------- | |
| def _pick_best_mask(img_rgba): | |
| """Prueba varias estrategias de máscara y elige la que detecta mejores bloques.""" | |
| img_np = np.array(img_rgba) | |
| img_bgr = cv2.cvtColor(img_np, cv2.COLOR_RGBA2BGR) | |
| bg_rgb = _background_from_corners(img_np) | |
| diff_thr = _auto_diff_threshold(img_np, bg_rgb) | |
| m1 = _foreground_mask_hybrid(img_np, bg_rgb, diff_thr) | |
| m2 = _foreground_mask_hybrid(img_np, bg_rgb, max(20.0, diff_thr-12)) | |
| m3 = _foreground_mask_kmeans_lab(img_np) | |
| candidates = [m1, m2, m3] | |
| scored = [] | |
| for m in candidates: | |
| b = _detect_blocks(img_np, m) | |
| scored.append((len(b), b, m)) | |
| scored.sort(key=lambda t: t[0], reverse=True) | |
| best_count, best_blocks, best_mask = scored[0] | |
| return best_blocks, best_mask, img_np, img_bgr | |
| # ---------------- Pipeline ---------------- | |
| def extract_blocks(image: Image.Image): | |
| if image is None: | |
| raise gr.Error("Sube una imagen.") | |
| base = getattr(image, "name", "output").rsplit(".", 1)[0] or "output" | |
| img_rgba = image.convert("RGBA") | |
| # Paso 1: Detección general | |
| blocks, mask, img_np, img_bgr = _pick_best_mask(img_rgba) | |
| if not blocks: | |
| raise gr.Error("No se detectaron bloques. Prueba con otra captura o mayor contraste.") | |
| tmpdir = tempfile.mkdtemp() | |
| zip_path = os.path.join(tmpdir, f"{base}.zip") | |
| preview_path = os.path.join(tmpdir, f"{base}_preview.png") | |
| annotated = img_bgr.copy() | |
| with zipfile.ZipFile(zip_path, "w") as zf: | |
| for i, (x,y,w,h) in enumerate(blocks, start=1): | |
| # Recorte inicial sobre la máscara "sucia" (con huecos y ruido) | |
| roi_mask_dirty = mask[y:y+h, x:x+w] | |
| # Ajustar bbox al contenido real | |
| bbox = _largest_component_bbox(roi_mask_dirty) | |
| if not bbox: continue | |
| bx, by, bw, bh = bbox | |
| pad = 2 | |
| bx2 = max(0, bx - pad); by2 = max(0, by - pad) | |
| bw2 = min(w, bx + bw + pad) - bx2 | |
| bh2 = min(h, by + bh + pad) - by2 | |
| # Recortes ajustados | |
| block_bgr = img_bgr[y+by2:y+by2+bh2, x+bx2:x+bx2+bw2].copy() | |
| block_mask_dirty = roi_mask_dirty[by2:by2+bh2, bx2:bx2+bw2].copy() | |
| # --- CORRECCIÓN FINAL --- | |
| # Aquí aplicamos la lógica nueva para arreglar agujeros y bordes | |
| block_mask_clean = _refine_mask(block_mask_dirty) | |
| # Anti-ghosting (Bleed colors) | |
| block_bgr = _bleed_colors(block_bgr, block_mask_clean) | |
| # Composición final | |
| alpha = block_mask_clean | |
| rgba = np.dstack([cv2.cvtColor(block_bgr, cv2.COLOR_BGR2RGB), alpha]) | |
| out_name = f"{i:02d}.png" | |
| _, buf = cv2.imencode(".png", cv2.cvtColor(rgba, cv2.COLOR_RGBA2BGRA)) | |
| zf.writestr(out_name, buf) | |
| # Dibujar en preview | |
| cv2.rectangle(annotated, (x, y), (x+w, y+h), (0,255,0), 2) | |
| cv2.putText(annotated, out_name, (x, y-5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0), 1) | |
| cv2.imwrite(preview_path, annotated) | |
| return Image.fromarray(cv2.cvtColor(annotated, cv2.COLOR_BGR2RGB)), zip_path | |
| # ---------------- Interfaz ---------------- | |
| with gr.Blocks() as demo: | |
| gr.Markdown("## ✂️ Extractor de bloques tipo Scratch, makeCode, Code.org — Vista previa y descarga ZIP") | |
| with gr.Row(): | |
| inp = gr.Image(type="pil", label="Subir captura") | |
| with gr.Column(): | |
| out_prev = gr.Image(type="pil", label="Vista previa de detección") | |
| out_zip = gr.File(label="Descargar ZIP") | |
| btn = gr.Button("Procesar") | |
| btn.click(extract_blocks, inputs=inp, outputs=[out_prev, out_zip]) | |
| if __name__ == "__main__": | |
| demo.launch() |