Spaces:

lazistar
/

toonslate-detector

Sleeping

App Files Files Community

lazistar commited on Jan 28

Commit

5874bb3

verified ·

1 Parent(s): 48d4586

Update app.py

Browse files

Files changed (1) hide show

app.py +9 -89

app.py CHANGED Viewed

@@ -7,9 +7,7 @@ from huggingface_hub import hf_hub_download
 from ultralytics import YOLO
 from PIL import Image
 import numpy as np
-import cv2
-# ===== Detection 모델 로드 =====
 bubble_model_path = hf_hub_download(
   repo_id="ogkalu/comic-speech-bubble-detector-yolov8m",
   filename="comic-speech-bubble-detector.pt"
@@ -18,43 +16,12 @@ text_model_path = hf_hub_download(
   repo_id="ogkalu/comic-text-segmenter-yolov8m",
   filename="comic-text-segmenter.pt"
 )
 bubble_model = YOLO(bubble_model_path)
 text_model = YOLO(text_model_path)
-# ===== LaMa 모델 직접 로드 (CPU) =====
-LAMA_MODEL_URL = "https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt"
-lama_model_path = torch.hub.download_url_to_file(LAMA_MODEL_URL, "/tmp/big-lama.pt", progress=True)
-lama_model = torch.jit.load("/tmp/big-lama.pt", map_location='cpu')
-lama_model.eval()
-def run_lama_inpaint(image: Image.Image, mask: Image.Image) -> Image.Image:
-  """LaMa inpainting 실행 (CPU)"""
-  # 이미지 전처리
-  img = np.array(image.convert('RGB'))
-  img = img.astype(np.float32) / 255.0
-  img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0)  # [1, 3, H, W]
-  # 마스크 전처리
-  msk = np.array(mask.convert('L'))
-  msk = (msk > 128).astype(np.float32)  # 이진화
-  msk = torch.from_numpy(msk).unsqueeze(0).unsqueeze(0)  # [1, 1, H, W]
-  # 추론
-  with torch.no_grad():
-      result = lama_model(img, msk)
-  # 후처리
-  result = result[0].permute(1, 2, 0).numpy()
-  result = (result * 255).clip(0, 255).astype(np.uint8)
-  return Image.fromarray(result)
-# ===== API 함수 =====
 def detect(image: Image.Image) -> dict:
-  """말풍선/텍스트 영역 탐지"""
   img_array = np.array(image)
   height, width = img_array.shape[:2]
@@ -69,59 +36,12 @@ def detect(image: Image.Image) -> dict:
       "text_confs": text_results[0].boxes.conf.tolist(),
   }
-def inpaint(image: Image.Image, mask: Image.Image) -> Image.Image:
-  """마스크 영역 inpainting"""
-  return run_lama_inpaint(image, mask)
-def detect_and_clean(image: Image.Image) -> Image.Image:
-  """탐지 → 마스크 생성 → inpainting 통합"""
-  img_array = np.array(image)
-  height, width = img_array.shape[:2]
-  text_results = text_model.predict(img_array, imgsz=1024, conf=0.35)
-  text_boxes = text_results[0].boxes.xyxy.tolist()
-  mask = np.zeros((height, width), dtype=np.uint8)
-  for box in text_boxes:
-      x1, y1, x2, y2 = map(int, box)
-      cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
-  mask_pil = Image.fromarray(mask)
-  return run_lama_inpaint(image, mask_pil)
-# ===== Gradio 인터페이스 =====
-with gr.Blocks(title="Toonslate API") as demo:
-  gr.Markdown("# Toonslate API\n웹툰 탐지 + Inpainting")
-  with gr.Tab("Detect"):
-      gr.Interface(
-          fn=detect,
-          inputs=gr.Image(type="pil"),
-          outputs=gr.JSON(),
-          description="말풍선/텍스트 bbox 탐지"
-      )
-  with gr.Tab("Inpaint"):
-      gr.Interface(
-          fn=inpaint,
-          inputs=[
-              gr.Image(type="pil", label="원본 이미지"),
-              gr.Image(type="pil", label="마스크 (흰색=제거)")
-          ],
-          outputs=gr.Image(type="pil", label="결과"),
-          description="마스크 영역 inpainting"
-      )
-  with gr.Tab("Detect & Clean"):
-      gr.Interface(
-          fn=detect_and_clean,
-          inputs=gr.Image(type="pil", label="원본 이미지"),
-          outputs=gr.Image(type="pil", label="텍스트 제거된 이미지"),
-          description="텍스트 탐지 → 자동 제거 (통합)"
-      )
 demo.launch(show_error=True)

 from ultralytics import YOLO
 from PIL import Image
 import numpy as np
 bubble_model_path = hf_hub_download(
   repo_id="ogkalu/comic-speech-bubble-detector-yolov8m",
   filename="comic-speech-bubble-detector.pt"
   repo_id="ogkalu/comic-text-segmenter-yolov8m",
   filename="comic-text-segmenter.pt"
 )
 bubble_model = YOLO(bubble_model_path)
 text_model = YOLO(text_model_path)
 def detect(image: Image.Image) -> dict:
+  """말풍선과 텍스트 영역 탐지"""
   img_array = np.array(image)
   height, width = img_array.shape[:2]
       "text_confs": text_results[0].boxes.conf.tolist(),
   }
+demo = gr.Interface(
+  fn=detect,
+  inputs=gr.Image(type="pil"),
+  outputs=gr.JSON(),
+  title="Toonslate Detector",
+  description="웹툰 말풍선 및 텍스트 영역 탐지 API"
+)
 demo.launch(show_error=True)