lazistar commited on
Commit
5874bb3
ยท
verified ยท
1 Parent(s): 48d4586

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -89
app.py CHANGED
@@ -7,9 +7,7 @@ from huggingface_hub import hf_hub_download
7
  from ultralytics import YOLO
8
  from PIL import Image
9
  import numpy as np
10
- import cv2
11
 
12
- # ===== Detection ๋ชจ๋ธ ๋กœ๋“œ =====
13
  bubble_model_path = hf_hub_download(
14
  repo_id="ogkalu/comic-speech-bubble-detector-yolov8m",
15
  filename="comic-speech-bubble-detector.pt"
@@ -18,43 +16,12 @@ text_model_path = hf_hub_download(
18
  repo_id="ogkalu/comic-text-segmenter-yolov8m",
19
  filename="comic-text-segmenter.pt"
20
  )
 
21
  bubble_model = YOLO(bubble_model_path)
22
  text_model = YOLO(text_model_path)
23
 
24
- # ===== LaMa ๋ชจ๋ธ ์ง์ ‘ ๋กœ๋“œ (CPU) =====
25
- LAMA_MODEL_URL = "https://github.com/enesmsahin/simple-lama-inpainting/releases/download/v0.1.0/big-lama.pt"
26
- lama_model_path = torch.hub.download_url_to_file(LAMA_MODEL_URL, "/tmp/big-lama.pt", progress=True)
27
- lama_model = torch.jit.load("/tmp/big-lama.pt", map_location='cpu')
28
- lama_model.eval()
29
-
30
-
31
- def run_lama_inpaint(image: Image.Image, mask: Image.Image) -> Image.Image:
32
- """LaMa inpainting ์‹คํ–‰ (CPU)"""
33
- # ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ
34
- img = np.array(image.convert('RGB'))
35
- img = img.astype(np.float32) / 255.0
36
- img = torch.from_numpy(img).permute(2, 0, 1).unsqueeze(0) # [1, 3, H, W]
37
-
38
- # ๋งˆ์Šคํฌ ์ „์ฒ˜๋ฆฌ
39
- msk = np.array(mask.convert('L'))
40
- msk = (msk > 128).astype(np.float32) # ์ด์ง„ํ™”
41
- msk = torch.from_numpy(msk).unsqueeze(0).unsqueeze(0) # [1, 1, H, W]
42
-
43
- # ์ถ”๋ก 
44
- with torch.no_grad():
45
- result = lama_model(img, msk)
46
-
47
- # ํ›„์ฒ˜๋ฆฌ
48
- result = result[0].permute(1, 2, 0).numpy()
49
- result = (result * 255).clip(0, 255).astype(np.uint8)
50
-
51
- return Image.fromarray(result)
52
-
53
-
54
- # ===== API ํ•จ์ˆ˜ =====
55
-
56
  def detect(image: Image.Image) -> dict:
57
- """๋งํ’์„ /ํ…์ŠคํŠธ ์˜์—ญ ํƒ์ง€"""
58
  img_array = np.array(image)
59
  height, width = img_array.shape[:2]
60
 
@@ -69,59 +36,12 @@ def detect(image: Image.Image) -> dict:
69
  "text_confs": text_results[0].boxes.conf.tolist(),
70
  }
71
 
72
-
73
- def inpaint(image: Image.Image, mask: Image.Image) -> Image.Image:
74
- """๋งˆ์Šคํฌ ์˜์—ญ inpainting"""
75
- return run_lama_inpaint(image, mask)
76
-
77
-
78
- def detect_and_clean(image: Image.Image) -> Image.Image:
79
- """ํƒ์ง€ โ†’ ๋งˆ์Šคํฌ ์ƒ์„ฑ โ†’ inpainting ํ†ตํ•ฉ"""
80
- img_array = np.array(image)
81
- height, width = img_array.shape[:2]
82
-
83
- text_results = text_model.predict(img_array, imgsz=1024, conf=0.35)
84
- text_boxes = text_results[0].boxes.xyxy.tolist()
85
-
86
- mask = np.zeros((height, width), dtype=np.uint8)
87
- for box in text_boxes:
88
- x1, y1, x2, y2 = map(int, box)
89
- cv2.rectangle(mask, (x1, y1), (x2, y2), 255, -1)
90
-
91
- mask_pil = Image.fromarray(mask)
92
- return run_lama_inpaint(image, mask_pil)
93
-
94
-
95
- # ===== Gradio ์ธํ„ฐํŽ˜์ด์Šค =====
96
-
97
- with gr.Blocks(title="Toonslate API") as demo:
98
- gr.Markdown("# Toonslate API\n์›นํˆฐ ํƒ์ง€ + Inpainting")
99
-
100
- with gr.Tab("Detect"):
101
- gr.Interface(
102
- fn=detect,
103
- inputs=gr.Image(type="pil"),
104
- outputs=gr.JSON(),
105
- description="๋งํ’์„ /ํ…์ŠคํŠธ bbox ํƒ์ง€"
106
- )
107
-
108
- with gr.Tab("Inpaint"):
109
- gr.Interface(
110
- fn=inpaint,
111
- inputs=[
112
- gr.Image(type="pil", label="์›๋ณธ ์ด๋ฏธ์ง€"),
113
- gr.Image(type="pil", label="๋งˆ์Šคํฌ (ํฐ์ƒ‰=์ œ๊ฑฐ)")
114
- ],
115
- outputs=gr.Image(type="pil", label="๊ฒฐ๊ณผ"),
116
- description="๋งˆ์Šคํฌ ์˜์—ญ inpainting"
117
- )
118
-
119
- with gr.Tab("Detect & Clean"):
120
- gr.Interface(
121
- fn=detect_and_clean,
122
- inputs=gr.Image(type="pil", label="์›๋ณธ ์ด๋ฏธ์ง€"),
123
- outputs=gr.Image(type="pil", label="ํ…์ŠคํŠธ ์ œ๊ฑฐ๋œ ์ด๋ฏธ์ง€"),
124
- description="ํ…์ŠคํŠธ ํƒ์ง€ โ†’ ์ž๋™ ์ œ๊ฑฐ (ํ†ตํ•ฉ)"
125
- )
126
 
127
  demo.launch(show_error=True)
 
7
  from ultralytics import YOLO
8
  from PIL import Image
9
  import numpy as np
 
10
 
 
11
  bubble_model_path = hf_hub_download(
12
  repo_id="ogkalu/comic-speech-bubble-detector-yolov8m",
13
  filename="comic-speech-bubble-detector.pt"
 
16
  repo_id="ogkalu/comic-text-segmenter-yolov8m",
17
  filename="comic-text-segmenter.pt"
18
  )
19
+
20
  bubble_model = YOLO(bubble_model_path)
21
  text_model = YOLO(text_model_path)
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def detect(image: Image.Image) -> dict:
24
+ """๋งํ’์„ ๊ณผ ํ…์ŠคํŠธ ์˜์—ญ ํƒ์ง€"""
25
  img_array = np.array(image)
26
  height, width = img_array.shape[:2]
27
 
 
36
  "text_confs": text_results[0].boxes.conf.tolist(),
37
  }
38
 
39
+ demo = gr.Interface(
40
+ fn=detect,
41
+ inputs=gr.Image(type="pil"),
42
+ outputs=gr.JSON(),
43
+ title="Toonslate Detector",
44
+ description="์›นํˆฐ ๋งํ’์„  ๋ฐ ํ…์ŠคํŠธ ์˜์—ญ ํƒ์ง€ API"
45
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  demo.launch(show_error=True)