import os os.environ["FLAGS_use_mkldnn"] = "0" import cv2 import numpy as np import gradio as gr from paddleocr import PaddleOCR import tempfile import shutil import psutil import time # ---------------- OCR ---------------- ocr = PaddleOCR(use_angle_cls=True, lang='en') # ---------------- CPU CONTROL ---------------- def wait_for_cpu(threshold=90, interval=2, timeout=30): start = time.time() while psutil.cpu_percent(interval=1) > threshold: time.sleep(interval) if time.time() - start > timeout: break # ---------------- OCR DETECTION ---------------- def detect_text_boxes(image): results = ocr.ocr(image, cls=True) boxes = [] if not results or not results[0]: return boxes for line in results[0]: try: box = line[0] text = line[1][0] conf = line[1][1] if not text.strip(): continue xs = [int(p[0]) for p in box] ys = [int(p[1]) for p in box] boxes.append(((min(xs), min(ys), max(xs), max(ys)), text, conf)) except: continue return boxes # ---------------- FILTERS ---------------- def is_reasonable_shape(cnt): area = cv2.contourArea(cnt) x, y, w, h = cv2.boundingRect(cnt) aspect = w / (h + 1e-5) if area < 1500: return False if area > 80000: return False if aspect > 4 or aspect < 0.25: return False return True def has_valid_text(cnt_bbox, boxes): x, y, w, h = cnt_bbox for (bbox, text, conf) in boxes: if conf < 0.5: continue bx1, by1, bx2, by2 = bbox if bx1 >= x and bx2 <= x+w and by1 >= y and by2 <= y+h: # skip SFX if text.isupper() and len(text) <= 6: continue return True return False def is_bubble_region(image_rgb, mask): pixels = image_rgb[mask == 255] if len(pixels) == 0: return False avg = np.mean(pixels, axis=0) r, g, b = avg # white bubble if r > 200 and g > 200 and b > 200: return True # yellow narration if r > 180 and g > 180 and b < 150: return True return False # ---------------- MAIN LOGIC ---------------- def remove_text_dynamic_fill(img_path, output_path): image = cv2.imread(img_path) if image is None: return image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY) # -------- EDGE DETECTION -------- edges = cv2.Canny(gray, 50, 150) kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5)) edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel) contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) boxes = detect_text_boxes(image_rgb) for cnt in contours: if not is_reasonable_shape(cnt): continue x, y, w, h = cv2.boundingRect(cnt) if not has_valid_text((x, y, w, h), boxes): continue mask = np.zeros(image.shape[:2], dtype=np.uint8) cv2.drawContours(mask, [cnt], -1, 255, -1) if not is_bubble_region(image_rgb, mask): continue # -------- COLOR SAMPLING -------- border = cv2.dilate(mask, np.ones((5,5), np.uint8)) - mask border_pixels = image_rgb[border == 255] if len(border_pixels) == 0: continue fill_color = tuple(np.median(border_pixels, axis=0).astype(int)) # -------- FILL -------- image[mask == 255] = fill_color cv2.imwrite(output_path, image) # ---------------- PROCESS ---------------- def process_folder(files): wait_for_cpu() out_dir = tempfile.mkdtemp() for f in files: path = f if isinstance(f, str) else f.name filename = os.path.basename(path) out_path = os.path.join(out_dir, filename) remove_text_dynamic_fill(path, out_path) zip_path = shutil.make_archive(out_dir, 'zip', out_dir) return zip_path # ---------------- UI ---------------- demo = gr.Interface( fn=process_folder, inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"], file_count="multiple"), outputs=gr.File(), title="Comic Bubble Cleaner (Smart Detection)", description="Removes dialogue bubbles while preserving SFX and background." ) demo.launch()