Spaces:

arthur0leywin
/

segmentation

Paused

App Files Files Community

segmentation / app.py

arthur0leywin

Update app.py

938b203 verified about 1 month ago

Raw

History Blame Contribute Delete

10.4 kB

	"""
	🔱 Emperor SEG Space
	يستقبل صورة مانجا → يرجع mask أبيض وأسود للنصوص
	النموذج: ogkalu/comic-text-segmenter-yolov8m + ogkalu/comic-speech-bubble-detector-yolov8m
	كلاهم YOLOv8 متدربين على 8k+ صورة مانجا/ويب-تون/مانهوا
	"""
	import io
	import base64
	import numpy as np
	import gradio as gr
	from PIL import Image
	import torch

	# ── تحميل النماذج ──────────────────────────────────────────
	print("⏳ Loading YOLOv8 manga models...", flush=True)

	text_model = None # كشف النص مباشرة (حروف + SFX)
	bubble_model = None # كشف فقاعات الكلام

	try:
	from ultralytics import YOLO
	from huggingface_hub import hf_hub_download

	# ── الموديل الأول: كاشف النص ──────────────────────────
	text_pt = hf_hub_download(
	repo_id="ogkalu/comic-text-segmenter-yolov8m",
	filename="comic-text-segmenter.pt",
	)
	text_model = YOLO(text_pt)
	print("✅ Text segmenter loaded (ogkalu/comic-text-segmenter-yolov8m)", flush=True)

	except Exception as e:
	print(f"⚠️ Text segmenter failed: {e}", flush=True)

	try:
	from ultralytics import YOLO
	from huggingface_hub import hf_hub_download

	# ── الموديل الثاني: كاشف الفقاعات ────────────────────
	bubble_pt = hf_hub_download(
	repo_id="ogkalu/comic-speech-bubble-detector-yolov8m",
	filename="comic-speech-bubble-detector.pt",
	)
	bubble_model = YOLO(bubble_pt)
	print("✅ Bubble detector loaded (ogkalu/comic-speech-bubble-detector-yolov8m)", flush=True)

	except Exception as e:
	print(f"⚠️ Bubble detector failed: {e}", flush=True)

	if text_model is None and bubble_model is None:
	print("🔄 Both models failed — will use OpenCV fallback only", flush=True)
	else:
	print("✅ Models ready!", flush=True)


	# ── دالة بناء الـ mask من نتائج YOLO Segmentation ─────────
	def _yolo_seg_to_mask(results, w: int, h: int) -> np.ndarray:
	"""
	تحوّل نتائج YOLO (instance masks) لـ mask واحد بالحجم الكامل.
	"""
	mask = np.zeros((h, w), dtype=np.uint8)
	for r in results:
	if r.masks is None:
	continue
	for seg_mask in r.masks.data:
	# seg_mask: tensor (H', W') قيم 0-1
	arr = seg_mask.cpu().numpy()
	arr = (arr * 255).astype(np.uint8)
	# resize للحجم الأصلي
	from PIL import Image as _PIL
	arr_resized = np.array(
	_PIL.fromarray(arr).resize((w, h), _PIL.NEAREST)
	)
	mask = np.maximum(mask, arr_resized)
	return mask


	# ── دالة بناء الـ mask من نتائج YOLO Detection (boxes فقط) ─
	def _yolo_det_to_mask(results, w: int, h: int) -> np.ndarray:
	"""
	لو الموديل detection مش seg — يملأ المستطيلات.
	"""
	import cv2
	mask = np.zeros((h, w), dtype=np.uint8)
	for r in results:
	if r.boxes is None:
	continue
	for box in r.boxes.xyxy.cpu().numpy():
	x1, y1, x2, y2 = map(int, box[:4])
	pad = 6
	x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
	x2 = min(w, x2 + pad); y2 = min(h, y2 + pad)
	cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
	return mask


	# ── OpenCV fallback (احتياطي فقط) ─────────────────────────
	def detect_text_opencv(img: Image.Image) -> np.ndarray:
	import cv2
	img_rgb = np.array(img.convert("RGB"))
	gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
	h, w = gray.shape
	mask = np.zeros_like(gray)

	# كشف الفقاعات البيضاء
	_, white_thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)
	border = 10
	white_thresh[:border, :] = 0; white_thresh[-border:, :] = 0
	white_thresh[:, :border] = 0; white_thresh[:, -border:] = 0
	kernel_fill = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (15, 15))
	white_closed = cv2.morphologyEx(white_thresh, cv2.MORPH_CLOSE, kernel_fill, iterations=3)
	contours_b, _ = cv2.findContours(white_closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	for cnt in contours_b:
	x, y, cw, ch = cv2.boundingRect(cnt)
	area = cw * ch
	if area < 800 or area > 0.5 * h * w: continue
	ratio = cw / max(ch, 1)
	if ratio > 8 or ratio < 0.12: continue
	roi = gray[y:y+ch, x:x+cw]
	if np.sum(roi < 100) / max(roi.size, 1) < 0.02: continue
	pad = 8
	cv2.rectangle(mask, (max(0,x-pad), max(0,y-pad)),
	(min(w,x+cw+pad), min(h,y+ch+pad)), 255, -1)

	# كشف النص المباشر
	binary = cv2.adaptiveThreshold(gray, 255,
	cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 15, 10)
	kernel_noise = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2))
	binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel_noise)
	combined = cv2.bitwise_or(
	cv2.dilate(binary, cv2.getStructuringElement(cv2.MORPH_RECT, (25, 4)), iterations=2),
	cv2.dilate(binary, cv2.getStructuringElement(cv2.MORPH_RECT, (4, 12)), iterations=2),
	)
	closed = cv2.morphologyEx(combined, cv2.MORPH_CLOSE,
	cv2.getStructuringElement(cv2.MORPH_RECT, (35, 25)), iterations=2)
	contours_t, _ = cv2.findContours(closed, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	for cnt in contours_t:
	x, y, cw, ch = cv2.boundingRect(cnt)
	area = cw * ch
	ratio = cw / max(ch, 1)
	if area > 0.5 * h * w or area < 60: continue
	if ratio > 20 or ratio < 0.05: continue
	pad = 6
	cv2.rectangle(mask, (max(0,x-pad), max(0,y-pad)),
	(min(w,x+cw+pad), min(h,y+ch+pad)), 255, -1)
	return mask


	# ── Core function ──────────────────────────────────────────
	def segment(image_b64: str) -> str:
	"""
	Input: base64 image (data:image/...;base64,... أو raw base64)
	Output: base64 mask PNG (data:image/png;base64,...)
	"""
	try:
	if not image_b64 or not image_b64.strip():
	return ""

	raw = image_b64.split(",")[1] if "," in image_b64 else image_b64
	img_bytes = base64.b64decode(raw)
	img = Image.open(io.BytesIO(img_bytes)).convert("RGB")
	img.thumbnail((1024, 1024))
	w, h = img.size

	final_mask = np.zeros((h, w), dtype=np.uint8)

	# ── الموديل الأول: النص ───────────────────────────
	if text_model is not None:
	try:
	results = text_model(img, imgsz=1024, conf=0.25, verbose=False)
	# جرب seg أولاً، لو مافيش masks استخدم boxes
	seg_mask = _yolo_seg_to_mask(results, w, h)
	if seg_mask.max() == 0:
	seg_mask = _yolo_det_to_mask(results, w, h)
	final_mask = np.maximum(final_mask, seg_mask)
	n = np.count_nonzero(seg_mask)
	print(f"✅ Text model: {n} pixels detected", flush=True)
	except Exception as e:
	print(f"⚠️ Text model inference error: {e}", flush=True)

	# ── الموديل الثاني: الفقاعات ──────────────────────
	if bubble_model is not None:
	try:
	results = bubble_model(img, imgsz=1024, conf=0.25, verbose=False)
	seg_mask = _yolo_seg_to_mask(results, w, h)
	if seg_mask.max() == 0:
	seg_mask = _yolo_det_to_mask(results, w, h)
	final_mask = np.maximum(final_mask, seg_mask)
	n = np.count_nonzero(seg_mask)
	print(f"✅ Bubble model: {n} pixels detected", flush=True)
	except Exception as e:
	print(f"⚠️ Bubble model inference error: {e}", flush=True)

	# ── fallback لو كلاهم فشل أو رجّعوا فاضي ────────
	if final_mask.max() == 0:
	print("🔄 YOLO masks empty — using OpenCV fallback", flush=True)
	final_mask = detect_text_opencv(img)

	# تقرير نهائي
	if final_mask.max() == 0:
	print("⚠️ Final mask is empty — no text detected", flush=True)
	else:
	pct = 100 * np.count_nonzero(final_mask) / final_mask.size
	print(f"✅ Final mask: {pct:.1f}% coverage", flush=True)

	buf = io.BytesIO()
	Image.fromarray(final_mask).save(buf, format="PNG")
	return "data:image/png;base64," + base64.b64encode(buf.getvalue()).decode()

	except Exception as e:
	print(f"❌ segment error: {e}", flush=True)
	import traceback; traceback.print_exc()
	return ""


	# ── Gradio UI + API ────────────────────────────────────────
	with gr.Blocks(title="Emperor SEG API") as demo:
	gr.Markdown("## 🔱 Emperor SEG Space\nText Segmentation API for Manga/Manhwa cleaning.")

	with gr.Row():
	with gr.Column():
	img_input = gr.Image(label="Input Image", type="pil")
	btn = gr.Button("Segment", variant="primary")
	with gr.Column():
	mask_output = gr.Image(label="Text Mask")

	def _demo_fn(img_pil):
	if img_pil is None:
	return None
	buf = io.BytesIO()
	img_pil.save(buf, format="PNG")
	mask_b64 = segment(base64.b64encode(buf.getvalue()).decode())
	if not mask_b64:
	return None
	mask_bytes = base64.b64decode(mask_b64.split(",")[1])
	return Image.open(io.BytesIO(mask_bytes))

	btn.click(_demo_fn, inputs=[img_input], outputs=[mask_output])

	gr.api(segment, api_name="segment")

	demo.launch(server_name="0.0.0.0", server_port=7860)