segmentation / app.py
arthur0leywin's picture
Update app.py
938b203 verified
Raw
History Blame Contribute Delete
10.4 kB
"""
๐Ÿ”ฑ Emperor SEG Space
ูŠุณุชู‚ุจู„ ุตูˆุฑุฉ ู…ุงู†ุฌุง โ†’ ูŠุฑุฌุน mask ุฃุจูŠุถ ูˆุฃุณูˆุฏ ู„ู„ู†ุตูˆุต
ุงู„ู†ู…ูˆุฐุฌ: ogkalu/comic-text-segmenter-yolov8m + ogkalu/comic-speech-bubble-detector-yolov8m
ูƒู„ุงู‡ู… YOLOv8 ู…ุชุฏุฑุจูŠู† ุนู„ู‰ 8k+ ุตูˆุฑุฉ ู…ุงู†ุฌุง/ูˆูŠุจ-ุชูˆู†/ู…ุงู†ู‡ูˆุง
"""
import io
import base64
import numpy as np
import gradio as gr
from PIL import Image
import torch
# โ”€โ”€ ุชุญู…ูŠู„ ุงู„ู†ู…ุงุฐุฌ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
print("โณ Loading YOLOv8 manga models...", flush=True)
text_model = None # ูƒุดู ุงู„ู†ุต ู…ุจุงุดุฑุฉ (ุญุฑูˆู + SFX)
bubble_model = None # ูƒุดู ูู‚ุงุนุงุช ุงู„ูƒู„ุงู…
try:
from ultralytics import YOLO
from huggingface_hub import hf_hub_download
# โ”€โ”€ ุงู„ู…ูˆุฏูŠู„ ุงู„ุฃูˆู„: ูƒุงุดู ุงู„ู†ุต โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
text_pt = hf_hub_download(
repo_id="ogkalu/comic-text-segmenter-yolov8m",
filename="comic-text-segmenter.pt",
)
text_model = YOLO(text_pt)
print("โœ… Text segmenter loaded (ogkalu/comic-text-segmenter-yolov8m)", flush=True)
except Exception as e:
print(f"โš ๏ธ Text segmenter failed: {e}", flush=True)
try:
from ultralytics import YOLO
from huggingface_hub import hf_hub_download
# โ”€โ”€ ุงู„ู…ูˆุฏูŠู„ ุงู„ุซุงู†ูŠ: ูƒุงุดู ุงู„ูู‚ุงุนุงุช โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
bubble_pt = hf_hub_download(
repo_id="ogkalu/comic-speech-bubble-detector-yolov8m",
filename="comic-speech-bubble-detector.pt",
)
bubble_model = YOLO(bubble_pt)
print("โœ… Bubble detector loaded (ogkalu/comic-speech-bubble-detector-yolov8m)", flush=True)
except Exception as e:
print(f"โš ๏ธ Bubble detector failed: {e}", flush=True)
if text_model is None and bubble_model is None:
print("๐Ÿ”„ Both models failed โ€” will use OpenCV fallback only", flush=True)
else:
print("โœ… Models ready!", flush=True)
# โ”€โ”€ ุฏุงู„ุฉ ุจู†ุงุก ุงู„ู€ mask ู…ู† ู†ุชุงุฆุฌ YOLO Segmentation โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def _yolo_seg_to_mask(results, w: int, h: int) -> np.ndarray:
"""
ุชุญูˆู‘ู„ ู†ุชุงุฆุฌ YOLO (instance masks) ู„ู€ mask ูˆุงุญุฏ ุจุงู„ุญุฌู… ุงู„ูƒุงู…ู„.
"""
mask = np.zeros((h, w), dtype=np.uint8)
for r in results:
if r.masks is None:
continue
for seg_mask in r.masks.data:
# seg_mask: tensor (H', W') ู‚ูŠู… 0-1
arr = seg_mask.cpu().numpy()
arr = (arr * 255).astype(np.uint8)
# resize ู„ู„ุญุฌู… ุงู„ุฃุตู„ูŠ
from PIL import Image as _PIL
arr_resized = np.array(
_PIL.fromarray(arr).resize((w, h), _PIL.NEAREST)
)
mask = np.maximum(mask, arr_resized)
return mask
# โ”€โ”€ ุฏุงู„ุฉ ุจู†ุงุก ุงู„ู€ mask ู…ู† ู†ุชุงุฆุฌ YOLO Detection (boxes ูู‚ุท) โ”€
def _yolo_det_to_mask(results, w: int, h: int) -> np.ndarray:
"""
ู„ูˆ ุงู„ู…ูˆุฏูŠู„ detection ู…ุด seg โ€” ูŠู…ู„ุฃ ุงู„ู…ุณุชุทูŠู„ุงุช.
"""
import cv2
mask = np.zeros((h, w), dtype=np.uint8)
for r in results:
if r.boxes is None:
continue
for box in r.boxes.xyxy.cpu().numpy():
x1, y1, x2, y2 = map(int, box[:4])
pad = 6
x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
x2 = min(w, x2 + pad); y2 = min(h, y2 + pad)
cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
return mask
# โ”€โ”€ OpenCV fallback (ุงุญุชูŠุงุทูŠ ูู‚ุท) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def detect_text_opencv(img: Image.Image) -> np.ndarray:
import cv2
img_rgb = np.array(img.convert("RGB"))
gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
h, w = gray.shape
mask = np.zeros_like(gray)
# ูƒุดู ุงู„ูู‚ุงุนุงุช ุงู„ุจูŠุถุงุก
_, white_thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
border = 10
white_thresh[:border, :] = 0; white_thresh[-border:, :] = 0
white_thresh[:, :border] = 0; white_thresh[:, -border:] = 0
kernel_fill = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
white_closed = cv2.morphologyEx(white_thresh, cv2.MORPH_CLOSE, kernel_fill, iterations=3)
contours_b, _ = cv2.findContours(white_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours_b:
x, y, cw, ch = cv2.boundingRect(cnt)
area = cw * ch
if area < 800 or area > 0.5 * h * w: continue
ratio = cw / max(ch, 1)
if ratio > 8 or ratio < 0.12: continue
roi = gray[y:y+ch, x:x+cw]
if np.sum(roi < 100) / max(roi.size, 1) < 0.02: continue
pad = 8
cv2.rectangle(mask, (max(0,x-pad), max(0,y-pad)),
(min(w,x+cw+pad), min(h,y+ch+pad)), 255, -1)
# ูƒุดู ุงู„ู†ุต ุงู„ู…ุจุงุดุฑ
binary = cv2.adaptiveThreshold(gray, 255,
cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 10)
kernel_noise = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel_noise)
combined = cv2.bitwise_or(
cv2.dilate(binary, cv2.getStructuringElement(cv2.MORPH_RECT, (25, 4)), iterations=2),
cv2.dilate(binary, cv2.getStructuringElement(cv2.MORPH_RECT, (4, 12)), iterations=2),
)
closed = cv2.morphologyEx(combined, cv2.MORPH_CLOSE,
cv2.getStructuringElement(cv2.MORPH_RECT, (35, 25)), iterations=2)
contours_t, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
for cnt in contours_t:
x, y, cw, ch = cv2.boundingRect(cnt)
area = cw * ch
ratio = cw / max(ch, 1)
if area > 0.5 * h * w or area < 60: continue
if ratio > 20 or ratio < 0.05: continue
pad = 6
cv2.rectangle(mask, (max(0,x-pad), max(0,y-pad)),
(min(w,x+cw+pad), min(h,y+ch+pad)), 255, -1)
return mask
# โ”€โ”€ Core function โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
def segment(image_b64: str) -> str:
"""
Input: base64 image (data:image/...;base64,... ุฃูˆ raw base64)
Output: base64 mask PNG (data:image/png;base64,...)
"""
try:
if not image_b64 or not image_b64.strip():
return ""
raw = image_b64.split(",")[1] if "," in image_b64 else image_b64
img_bytes = base64.b64decode(raw)
img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
img.thumbnail((1024, 1024))
w, h = img.size
final_mask = np.zeros((h, w), dtype=np.uint8)
# โ”€โ”€ ุงู„ู…ูˆุฏูŠู„ ุงู„ุฃูˆู„: ุงู„ู†ุต โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if text_model is not None:
try:
results = text_model(img, imgsz=1024, conf=0.25, verbose=False)
# ุฌุฑุจ seg ุฃูˆู„ุงู‹ุŒ ู„ูˆ ู…ุงููŠุด masks ุงุณุชุฎุฏู… boxes
seg_mask = _yolo_seg_to_mask(results, w, h)
if seg_mask.max() == 0:
seg_mask = _yolo_det_to_mask(results, w, h)
final_mask = np.maximum(final_mask, seg_mask)
n = np.count_nonzero(seg_mask)
print(f"โœ… Text model: {n} pixels detected", flush=True)
except Exception as e:
print(f"โš ๏ธ Text model inference error: {e}", flush=True)
# โ”€โ”€ ุงู„ู…ูˆุฏูŠู„ ุงู„ุซุงู†ูŠ: ุงู„ูู‚ุงุนุงุช โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if bubble_model is not None:
try:
results = bubble_model(img, imgsz=1024, conf=0.25, verbose=False)
seg_mask = _yolo_seg_to_mask(results, w, h)
if seg_mask.max() == 0:
seg_mask = _yolo_det_to_mask(results, w, h)
final_mask = np.maximum(final_mask, seg_mask)
n = np.count_nonzero(seg_mask)
print(f"โœ… Bubble model: {n} pixels detected", flush=True)
except Exception as e:
print(f"โš ๏ธ Bubble model inference error: {e}", flush=True)
# โ”€โ”€ fallback ู„ูˆ ูƒู„ุงู‡ู… ูุดู„ ุฃูˆ ุฑุฌู‘ุนูˆุง ูุงุถูŠ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if final_mask.max() == 0:
print("๐Ÿ”„ YOLO masks empty โ€” using OpenCV fallback", flush=True)
final_mask = detect_text_opencv(img)
# ุชู‚ุฑูŠุฑ ู†ู‡ุงุฆูŠ
if final_mask.max() == 0:
print("โš ๏ธ Final mask is empty โ€” no text detected", flush=True)
else:
pct = 100 * np.count_nonzero(final_mask) / final_mask.size
print(f"โœ… Final mask: {pct:.1f}% coverage", flush=True)
buf = io.BytesIO()
Image.fromarray(final_mask).save(buf, format="PNG")
return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()
except Exception as e:
print(f"โŒ segment error: {e}", flush=True)
import traceback; traceback.print_exc()
return ""
# โ”€โ”€ Gradio UI + API โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
with gr.Blocks(title="Emperor SEG API") as demo:
gr.Markdown("## ๐Ÿ”ฑ Emperor SEG Space\nText Segmentation API for Manga/Manhwa cleaning.")
with gr.Row():
with gr.Column():
img_input = gr.Image(label="Input Image", type="pil")
btn = gr.Button("Segment", variant="primary")
with gr.Column():
mask_output = gr.Image(label="Text Mask")
def _demo_fn(img_pil):
if img_pil is None:
return None
buf = io.BytesIO()
img_pil.save(buf, format="PNG")
mask_b64 = segment(base64.b64encode(buf.getvalue()).decode())
if not mask_b64:
return None
mask_bytes = base64.b64decode(mask_b64.split(",")[1])
return Image.open(io.BytesIO(mask_bytes))
btn.click(_demo_fn, inputs=[img_input], outputs=[mask_output])
gr.api(segment, api_name="segment")
demo.launch(server_name="0.0.0.0", server_port=7860)