File size: 4,394 Bytes
0992883
96e3277
 
7d1906d
 
 
 
96e3277
 
f20aaf1
 
7d1906d
96e3277
f79c50a
699e60d
 
96e3277
699e60d
 
 
 
 
 
 
2502b39
f79c50a
2502b39
886968d
c6606e2
699e60d
886968d
c6606e2
 
 
 
96e3277
886968d
699e60d
 
886968d
96e3277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
886968d
96e3277
886968d
 
96e3277
 
 
 
 
c6606e2
886968d
96e3277
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d1906d
 
 
 
 
886968d
eb5fd0f
4db19f9
96e3277
eb5fd0f
4db19f9
eb5fd0f
 
886968d
eb5fd0f
7d1906d
eb5fd0f
 
 
397a60b
96e3277
397a60b
886968d
eb5fd0f
7d1906d
96e3277
eb5fd0f
886968d
eb5fd0f
 
886968d
96e3277
 
 
 
eb5fd0f
397a60b
 
 
 
886968d
397a60b
7d1906d
96e3277
eb5fd0f
7d1906d
397a60b
96e3277
 
699e60d
96e3277
699e60d
 
96e3277
ae90b2f
96e3277
 
 
699e60d
96e3277
 
b8ab83d
96e3277
0992883
b8ab83d
96e3277
699e60d
7d1906d
 
96e3277
 
 
 
7d1906d
 
2502b39
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
import os
os.environ["FLAGS_use_mkldnn"] = "0"

import cv2
import numpy as np
import gradio as gr
from paddleocr import PaddleOCR
import tempfile
import shutil
import psutil
import time

# ---------------- OCR ----------------
ocr = PaddleOCR(use_angle_cls=True, lang='en')

# ---------------- CPU CONTROL ----------------
def wait_for_cpu(threshold=90, interval=2, timeout=30):
    start = time.time()
    while psutil.cpu_percent(interval=1) > threshold:
        time.sleep(interval)
        if time.time() - start > timeout:
            break

# ---------------- OCR DETECTION ----------------
def detect_text_boxes(image):
    results = ocr.ocr(image, cls=True)
    boxes = []

    if not results or not results[0]:
        return boxes

    for line in results[0]:
        try:
            box = line[0]
            text = line[1][0]
            conf = line[1][1]

            if not text.strip():
                continue

            xs = [int(p[0]) for p in box]
            ys = [int(p[1]) for p in box]

            boxes.append(((min(xs), min(ys), max(xs), max(ys)), text, conf))
        except:
            continue

    return boxes

# ---------------- FILTERS ----------------
def is_reasonable_shape(cnt):
    area = cv2.contourArea(cnt)
    x, y, w, h = cv2.boundingRect(cnt)
    aspect = w / (h + 1e-5)

    if area < 1500:
        return False

    if area > 80000:
        return False

    if aspect > 4 or aspect < 0.25:
        return False

    return True


def has_valid_text(cnt_bbox, boxes):
    x, y, w, h = cnt_bbox

    for (bbox, text, conf) in boxes:
        if conf < 0.5:
            continue

        bx1, by1, bx2, by2 = bbox

        if bx1 >= x and bx2 <= x+w and by1 >= y and by2 <= y+h:

            # skip SFX
            if text.isupper() and len(text) <= 6:
                continue

            return True

    return False


def is_bubble_region(image_rgb, mask):
    pixels = image_rgb[mask == 255]

    if len(pixels) == 0:
        return False

    avg = np.mean(pixels, axis=0)
    r, g, b = avg

    # white bubble
    if r > 200 and g > 200 and b > 200:
        return True

    # yellow narration
    if r > 180 and g > 180 and b < 150:
        return True

    return False


# ---------------- MAIN LOGIC ----------------
def remove_text_dynamic_fill(img_path, output_path):
    image = cv2.imread(img_path)
    if image is None:
        return

    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    gray = cv2.cvtColor(image_rgb, cv2.COLOR_RGB2GRAY)

    # -------- EDGE DETECTION --------
    edges = cv2.Canny(gray, 50, 150)

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5,5))
    edges = cv2.morphologyEx(edges, cv2.MORPH_CLOSE, kernel)

    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    boxes = detect_text_boxes(image_rgb)

    for cnt in contours:

        if not is_reasonable_shape(cnt):
            continue

        x, y, w, h = cv2.boundingRect(cnt)

        if not has_valid_text((x, y, w, h), boxes):
            continue

        mask = np.zeros(image.shape[:2], dtype=np.uint8)
        cv2.drawContours(mask, [cnt], -1, 255, -1)

        if not is_bubble_region(image_rgb, mask):
            continue

        # -------- COLOR SAMPLING --------
        border = cv2.dilate(mask, np.ones((5,5), np.uint8)) - mask
        border_pixels = image_rgb[border == 255]

        if len(border_pixels) == 0:
            continue

        fill_color = tuple(np.median(border_pixels, axis=0).astype(int))

        # -------- FILL --------
        image[mask == 255] = fill_color

    cv2.imwrite(output_path, image)


# ---------------- PROCESS ----------------
def process_folder(files):
    wait_for_cpu()

    out_dir = tempfile.mkdtemp()

    for f in files:
        path = f if isinstance(f, str) else f.name
        filename = os.path.basename(path)

        out_path = os.path.join(out_dir, filename)
        remove_text_dynamic_fill(path, out_path)

    zip_path = shutil.make_archive(out_dir, 'zip', out_dir)
    return zip_path


# ---------------- UI ----------------
demo = gr.Interface(
    fn=process_folder,
    inputs=gr.File(file_types=[".jpg", ".jpeg", ".png"], file_count="multiple"),
    outputs=gr.File(),
    title="Comic Bubble Cleaner (Smart Detection)",
    description="Removes dialogue bubbles while preserving SFX and background."
)

demo.launch()