SohomToom's picture
Update app.py
96e3277 verified
import os
os.environ["FLAGS_use_mkldnn"] = "0"
import cv2
import numpy as np
import gradio as gr
from paddleocr import PaddleOCR
import tempfile
import shutil
import psutil
import time
# ---------------- OCR ----------------
ocr = PaddleOCR(use_angle_cls=True, lang='en')
# ---------------- CPU CONTROL ----------------
def wait_for_cpu(threshold=90, interval=2, timeout=30):
start = time.time()
while psutil.cpu_percent(interval=1) > threshold:
time.sleep(interval)
if time.time() - start > timeout:
break
# ---------------- OCR DETECTION ----------------
def detect_text_boxes(image):
results = ocr.ocr(image, cls=True)
boxes = []
if not results or not results[0]:
return boxes
for line in results[0]:
try:
box = line[0]
text = line[1][0]
conf = line[1][1]
if not text.strip():
continue
xs = [int(p[0]) for p in box]
ys = [int(p[1]) for p in box]
boxes.append(((min(xs), min(ys), max(xs), max(ys)), text, conf))
except:
continue
return boxes
# ---------------- FILTERS ----------------
def is_reasonable_shape(cnt):
area = cv2.contourArea(cnt)
x, y, w, h = cv2.boundingRect(cnt)
aspect = w / (h + 1e-5)
if area < 1500:
return False
if area > 80000:
return False
if aspect > 4 or aspect < 0.25:
return False
return True
def has_valid_text(cnt_bbox, boxes):
x, y, w, h = cnt_bbox
for (bbox, text, conf) in boxes:
if conf < 0.5:
continue
bx1, by1, bx2, by2 = bbox
if bx1 >= x and bx2 <= x+w and by1 >= y and by2 <= y+h:
# skip SFX
if text.isupper() and len(text) <= 6:
continue
return True
return False
def is_bubble_region(image_rgb, mask):
pixels = image_rgb[mask == 255]
if len(pixels) == 0:
return False
avg = np.mean(pixels, axis=0)
r, g, b = avg
# white bubble
if r > 200 and g > 200 and b > 200:
return True
# yellow narration
if r > 180 and g > 180 and b < 150:
return True
return False
# ---------------- MAIN LOGIC ----------------
def remove_text_dynamic_fill(img_path, output_path):
image = cv2.imread(img_path)
if image is None:
return
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)
# -------- EDGE DETECTION --------
edges = cv2.Canny(gray, 50, 150)
kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)
contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
boxes = detect_text_boxes(image_rgb)
for cnt in contours:
if not is_reasonable_shape(cnt):
continue
x, y, w, h = cv2.boundingRect(cnt)
if not has_valid_text((x, y, w, h), boxes):
continue
mask = np.zeros(image.shape[:2], dtype=np.uint8)
cv2.drawContours(mask, [cnt], -1, 255, -1)
if not is_bubble_region(image_rgb, mask):
continue
# -------- COLOR SAMPLING --------
border = cv2.dilate(mask, np.ones((5,5), np.uint8)) - mask
border_pixels = image_rgb[border == 255]
if len(border_pixels) == 0:
continue
fill_color = tuple(np.median(border_pixels, axis=0).astype(int))
# -------- FILL --------
image[mask == 255] = fill_color
cv2.imwrite(output_path, image)
# ---------------- PROCESS ----------------
def process_folder(files):
wait_for_cpu()
out_dir = tempfile.mkdtemp()
for f in files:
path = f if isinstance(f, str) else f.name
filename = os.path.basename(path)
out_path = os.path.join(out_dir, filename)
remove_text_dynamic_fill(path, out_path)
zip_path = shutil.make_archive(out_dir, 'zip', out_dir)
return zip_path
# ---------------- UI ----------------
demo = gr.Interface(
fn=process_folder,
inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"], file_count="multiple"),
outputs=gr.File(),
title="Comic Bubble Cleaner (Smart Detection)",
description="Removes dialogue bubbles while preserving SFX and background."
)
demo.launch()