lookalike-yolo / app.py
daniel0708's picture
Update app.py
d985224 verified
Raw
History Blame Contribute Delete
13.7 kB
import gradio as gr
from ultralytics import YOLO
import numpy as np
from PIL import Image
import torch
from transformers import CLIPProcessor, CLIPModel
import logging
import traceback
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info("YOLO ๋กœ๋”ฉ...")
yolo_model = YOLO("best.pt")
logger.info("Fashion-CLIP ๋กœ๋”ฉ...")
clip_model = CLIPModel.from_pretrained("patrickjohncyh/fashion-clip")
clip_processor = CLIPProcessor.from_pretrained("patrickjohncyh/fashion-clip")
clip_model.eval()
logger.info("๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ!")
# โ”€โ”€โ”€ ํƒ์ง€ ํ’ˆ์งˆ ํŒŒ๋ผ๋ฏธํ„ฐ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ์‹ ๋ขฐ๋„ ์ž„๊ณ„๊ฐ’: ์ด๋ณด๋‹ค ๋‚ฎ์€ ํƒ์ง€ ๊ฒฐ๊ณผ๋Š” ๋…ธ์ด์ฆˆ๋กœ ๊ฐ„์ฃผํ•˜์—ฌ ์ œ๊ฑฐ
CONF_THRESHOLD = 0.35
# ์ตœ์†Œ ๋ฉด์  ๋น„์œจ: ์ด๋ฏธ์ง€ ์ „์ฒด ๋ฉด์  ๋Œ€๋น„ ํƒ์ง€ ๋ฐ•์Šค ๋ฉด์ ์ด ์ด ๋น„์œจ ๋ฏธ๋งŒ์ด๋ฉด ์ œ๊ฑฐ
# (์˜ˆ: 0.04 = ์ „์ฒด ์ด๋ฏธ์ง€์˜ 4% ๋ฏธ๋งŒ์ธ ๋ฐ•์Šค๋Š” ๋„ˆ๋ฌด ์ž‘์•„ ์‹ ๋ขฐํ•  ์ˆ˜ ์—†์Œ)
MIN_AREA_RATIO = 0.04
# IOU ์ž„๊ณ„๊ฐ’: YOLO ๋‚ด๋ถ€ NMS์—์„œ ์ค‘๋ณต ๋ฐ•์Šค ์ œ๊ฑฐ ๊ธฐ์ค€ (๋‚ฎ์„์ˆ˜๋ก ์—„๊ฒฉ)
IOU_THRESHOLD = 0.45
def _containment_ratio(inner: dict, outer: dict) -> float:
"""
inner ๋ฐ•์Šค๊ฐ€ outer ๋ฐ•์Šค ๋‚ด๋ถ€์— ํฌํ•จ๋œ ๋น„์œจ(0.0~1.0)์„ ๋ฐ˜ํ™˜.
1.0์ด๋ฉด inner๊ฐ€ outer์— ์™„์ „ํžˆ ํฌํ•จ๋จ.
inner ๋ฉด์  ๋Œ€๋น„ ๊ต์ง‘ํ•ฉ ๋ฉด์ ์˜ ๋น„์œจ๋กœ ๊ณ„์‚ฐ.
"""
ix1 = max(inner["x1"], outer["x1"])
iy1 = max(inner["y1"], outer["y1"])
ix2 = min(inner["x2"], outer["x2"])
iy2 = min(inner["y2"], outer["y2"])
inter_w = max(0.0, ix2 - ix1)
inter_h = max(0.0, iy2 - iy1)
inter_area = inter_w * inter_h
inner_area = max(1.0, (inner["x2"] - inner["x1"]) * (inner["y2"] - inner["y1"]))
return inter_area / inner_area
def _select_best_boxes(raw_boxes: list[dict], img_w: int, img_h: int) -> list[dict]:
"""
YOLO๊ฐ€ ๋ฐ˜ํ™˜ํ•œ ๋ชจ๋“  ๋ฐ•์Šค์—์„œ ํ’ˆ์งˆ ๋‚ฎ์€ ๋ฐ•์Šค๋ฅผ ์ œ๊ฑฐํ•˜๊ณ 
์นดํ…Œ๊ณ ๋ฆฌ(๋ ˆ์ด๋ธ”)๋ณ„๋กœ ์‹ ๋ขฐ๋„๊ฐ€ ๊ฐ€์žฅ ๋†’์€ ๋ฐ•์Šค 1๊ฐœ์”ฉ๋งŒ ์„ ํƒ.
์ถ”๊ฐ€ ํ›„์ฒ˜๋ฆฌ:
- ํฌํ•จ ๊ด€๊ณ„ ํ•„ํ„ฐ: ํ•œ ๋ฐ•์Šค๊ฐ€ ๋‹ค๋ฅธ ๋ฐ•์Šค ์•ˆ์— ํฌ๊ฒŒ ํฌํ•จ๋˜๋ฉด ์ œ๊ฑฐ
์˜ˆ) Outer ๋ฐ•์Šค ์•ˆ์— Top ๋ฐ•์Šค๊ฐ€ 80% ์ด์ƒ ๋“ค์–ด ์žˆ์œผ๋ฉด Top ์ œ๊ฑฐ
- Bottom ๋ฐ•์Šค ํ™•์žฅ: ํ•˜์˜ ๋ฐ•์Šค๊ฐ€ ์ด๋ฏธ์ง€ ํ•˜๋‹จ์— ๋‹ฟ์ง€ ์•Š์œผ๋ฉด ์•„๋ž˜๋กœ ๋Š˜๋ ค ๋ฐ”์ง€ ์ „์ฒด ํฌํ•จ
Args:
raw_boxes: YOLO์—์„œ ๋ฐ˜ํ™˜๋œ ์›๋ณธ ๋ฐ•์Šค ๋ชฉ๋ก
img_w: ์›๋ณธ ์ด๋ฏธ์ง€ ๊ฐ€๋กœ ํ”ฝ์…€
img_h: ์›๋ณธ ์ด๋ฏธ์ง€ ์„ธ๋กœ ํ”ฝ์…€
Returns:
์ •์ œ๋œ ๋ฐ•์Šค ๋ชฉ๋ก
"""
img_area = img_w * img_h
if img_area <= 0:
return raw_boxes
# 1๋‹จ๊ณ„: ์‹ ๋ขฐ๋„ + ์ตœ์†Œ ๋ฉด์  ๊ธฐ์ค€์œผ๋กœ ๋…ธ์ด์ฆˆ ๋ฐ•์Šค ์ œ๊ฑฐ
# Bottom ์ „์šฉ ์ž„๊ณ„๊ฐ’: ๊ฐ€๋ ค์ง„ ๋‹ค๋ฆฌยท์˜ค๋ฅธ์ชฝ ๋ฐ”์ง€ ๋“ฑ์ด ๋‚ฎ์€ ์‹ ๋ขฐ๋„๋กœ ํƒ์ง€๋  ์ˆ˜ ์žˆ์œผ๋ฏ€๋กœ
# ๋‹ค๋ฅธ ์นดํ…Œ๊ณ ๋ฆฌ(0.35)๋ณด๋‹ค ๋‚ฎ์€ 0.20์„ ์ ์šฉํ•ด 2๊ฐœ ๋ฐ•์Šค๊ฐ€ Union๊นŒ์ง€ ์‚ด์•„๋‚จ๋„๋ก ํ•จ.
BOTTOM_CONF_THRESHOLD = 0.20
filtered = []
for box in raw_boxes:
label = box.get("label", "").lower()
conf = box.get("confidence", 0.0)
# ํ•˜์˜๋Š” ๋‚ฎ์€ ์‹ ๋ขฐ๋„ ์ž„๊ณ„๊ฐ’ ์ ์šฉ, ๋‚˜๋จธ์ง€๋Š” ๊ธฐ๋ณธ ์ž„๊ณ„๊ฐ’
threshold = BOTTOM_CONF_THRESHOLD if label in ("bottom", "ํ•˜์˜") else CONF_THRESHOLD
if conf < threshold:
logger.debug(f"์‹ ๋ขฐ๋„ ๋ฏธ๋‹ฌ ๋ฐ•์Šค ์ œ๊ฑฐ: label={label}, conf={conf:.3f} (๊ธฐ์ค€={threshold:.2f})")
continue
x1, y1, x2, y2 = box["x1"], box["y1"], box["x2"], box["y2"]
box_area = max(0.0, (x2 - x1)) * max(0.0, (y2 - y1))
area_ratio = box_area / img_area
if area_ratio < MIN_AREA_RATIO:
logger.debug(
f"๋ฉด์  ๋ฏธ๋‹ฌ ๋ฐ•์Šค ์ œ๊ฑฐ: label={label}, "
f"area_ratio={area_ratio:.3f} (<{MIN_AREA_RATIO})"
)
continue
filtered.append(box)
# 2๋‹จ๊ณ„: ์นดํ…Œ๊ณ ๋ฆฌ(๋ ˆ์ด๋ธ”)๋ณ„ ๋ฐ•์Šค๋ฅผ Union์œผ๋กœ ๋ณ‘ํ•ฉ
# ๋™์ผ ์นดํ…Œ๊ณ ๋ฆฌ(์˜ˆ: Bottom)๊ฐ€ ์—ฌ๋Ÿฌ ๊ฐœ ํƒ์ง€๋œ ๊ฒฝ์šฐ,
# ๊ฐ€์žฅ ์‹ ๋ขฐ๋„ ๋†’์€ ๊ฒƒ๋งŒ ๊ณ ๋ฅด์ง€ ์•Š๊ณ  ๋ชจ๋“  ๋ฐ•์Šค๋ฅผ ํ•ฉ์ง‘ํ•ฉ(Union)์œผ๋กœ ํ•ฉ์นจ.
# โ†’ ๋ฐ”์ง€๊ฐ€ ๋‘ ๋ฐ•์Šค๋กœ ๋‚˜๋‰˜์–ด ํƒ์ง€๋  ๋•Œ ๋‘ ๋ฐ•์Šค๋ฅผ ํ•ฉ์ณ ์ „์ฒด ๋ฐ”์ง€ ์˜์—ญ ์ปค๋ฒ„
union_by_label: dict[str, dict] = {}
for box in filtered:
label = box.get("label", "unknown")
if label not in union_by_label:
# ์ฒซ ๋ฒˆ์งธ ๋ฐ•์Šค๋Š” ๊ทธ๋Œ€๋กœ ๋ณต์‚ฌ (์›๋ณธ ๋ณ€๊ฒฝ ๋ฐฉ์ง€)
union_by_label[label] = dict(box)
else:
prev = union_by_label[label]
# ๊ธฐ์กด ๋ฐ•์Šค์™€ ํ˜„์žฌ ๋ฐ•์Šค์˜ ํ•ฉ์ง‘ํ•ฉ(Union) ๊ณ„์‚ฐ
prev["x1"] = min(prev["x1"], box["x1"])
prev["y1"] = min(prev["y1"], box["y1"])
prev["x2"] = max(prev["x2"], box["x2"])
prev["y2"] = max(prev["y2"], box["y2"])
# ์‹ ๋ขฐ๋„๋Š” ์ตœ๋Œ€๊ฐ’ ์œ ์ง€
prev["confidence"] = max(prev.get("confidence", 0.0), box.get("confidence", 0.0))
logger.info(
f"๋ฐ•์Šค Union ๋ณ‘ํ•ฉ: '{label}' ๋ฐ•์Šค 2๊ฐœ ํ•ฉ์‚ฐ "
f"โ†’ ({prev['x1']:.0f},{prev['y1']:.0f})-({prev['x2']:.0f},{prev['y2']:.0f})"
)
# 3๋‹จ๊ณ„: ํฌํ•จ ๊ด€๊ณ„ ํ•„ํ„ฐ
# ๋ฐ•์Šค A๊ฐ€ ๋ฐ•์Šค B ์•ˆ์— CONTAINMENT_THRESHOLD ์ด์ƒ ํฌํ•จ๋˜๋ฉด A๋ฅผ ์ œ๊ฑฐ
# ์˜ˆ) Outer(ํฐ ๋ฐ•์Šค) ์•ˆ์— Top(์ž‘์€ ๋ฐ•์Šค)์ด 80%+ ํฌํ•จ โ†’ Top ์ œ๊ฑฐ
# ๋‹จ, Bottom์€ Outer ์•„๋ž˜์ชฝ์— ๋ณ„๋„ ์กด์žฌํ•˜๋ฏ€๋กœ ๋‹ค๋ฅธ ๊ธฐ์ค€ ์ ์šฉ
CONTAINMENT_THRESHOLD = 0.75 # inner ๋ฐ•์Šค ๋ฉด์ ์˜ ์ด ๋น„์œจ ์ด์ƒ์ด outer ์•ˆ์— ์žˆ์œผ๋ฉด ์ œ๊ฑฐ
candidates = list(union_by_label.values())
to_remove = set()
for i, box_a in enumerate(candidates):
for j, box_b in enumerate(candidates):
if i == j:
continue
label_a = box_a.get("label", "").lower()
label_b = box_b.get("label", "").lower()
# bottom์€ outer์™€ ์ˆ˜์ง์œผ๋กœ ๋ถ„๋ฆฌ๋˜๋ฏ€๋กœ ํฌํ•จ ํŒ๋‹จ์—์„œ ์ œ์™ธ
if label_a in ("bottom", "ํ•˜์˜") or label_b in ("bottom", "ํ•˜์˜"):
continue
ratio = _containment_ratio(box_a, box_b)
if ratio >= CONTAINMENT_THRESHOLD:
# box_a๊ฐ€ box_b ์•ˆ์— ํฌ๊ฒŒ ํฌํ•จ๋จ โ†’ box_a ๋ฉด์ ์ด box_b๋ณด๋‹ค ์ž‘์œผ๋ฉด ์ œ๊ฑฐ
area_a = (box_a["x2"] - box_a["x1"]) * (box_a["y2"] - box_a["y1"])
area_b = (box_b["x2"] - box_b["x1"]) * (box_b["y2"] - box_b["y1"])
if area_a < area_b:
to_remove.add(label_a)
logger.info(
f"ํฌํ•จ ๊ด€๊ณ„ ํ•„ํ„ฐ: '{label_a}' ๋ฐ•์Šค๊ฐ€ '{label_b}' ๋ฐ•์Šค์— "
f"{ratio:.0%} ํฌํ•จ โ†’ '{label_a}' ์ œ๊ฑฐ"
)
result = [b for b in candidates if b.get("label", "").lower() not in to_remove]
logger.info(
f"๋ฐ•์Šค ํ•„ํ„ฐ๋ง: ์›๋ณธ {len(raw_boxes)}๊ฐœ โ†’ "
f"์‹ ๋ขฐ๋„/๋ฉด์  ํ•„ํ„ฐ ํ›„ {len(filtered)}๊ฐœ โ†’ "
f"Union ๋ณ‘ํ•ฉ ํ›„ {len(union_by_label)}๊ฐœ โ†’ "
f"ํฌํ•จ ๊ด€๊ณ„ ํ•„ํ„ฐ ํ›„ {len(result)}๊ฐœ"
)
return result
def _get_best_crop(pil_img: Image.Image, boxes: list[dict]) -> Image.Image:
"""
ํ•„ํ„ฐ๋ง๋œ ๋ฐ•์Šค ์ค‘ ์‹ ๋ขฐ๋„๊ฐ€ ๊ฐ€์žฅ ๋†’์€ ๋ฐ•์Šค ์˜์—ญ์„ ํฌ๋กญํ•˜์—ฌ ๋ฐ˜ํ™˜.
๋ฐ•์Šค๊ฐ€ ์—†์œผ๋ฉด ์›๋ณธ ์ด๋ฏธ์ง€๋ฅผ ๊ทธ๋Œ€๋กœ ๋ฐ˜ํ™˜.
ํฌ๋กญ ์ด๋ฏธ์ง€๋กœ CLIP ์ž„๋ฒ ๋”ฉ์„ ์ƒ์„ฑํ•˜๋ฉด
์ „์ฒด ์ด๋ฏธ์ง€ ์ž„๋ฒ ๋”ฉ๋ณด๋‹ค ํŒจ์…˜ ์•„์ดํ…œ์— ์ง‘์ค‘๋œ ๋” ์ •ํ™•ํ•œ ๋ฒกํ„ฐ๋ฅผ ์–ป์„ ์ˆ˜ ์žˆ์Œ.
"""
if not boxes:
return pil_img
# ์‹ ๋ขฐ๋„๊ฐ€ ๊ฐ€์žฅ ๋†’์€ ๋ฐ•์Šค ์„ ํƒ
best = max(boxes, key=lambda b: b.get("confidence", 0.0))
x1 = int(best["x1"])
y1 = int(best["y1"])
x2 = int(best["x2"])
y2 = int(best["y2"])
# ์›๋ณธ ์ด๋ฏธ์ง€ ๋ฒ”์œ„ ํด๋žจํ•‘
w, h = pil_img.size
x1 = max(0, min(x1, w - 1))
x2 = max(x1 + 1, min(x2, w))
y1 = max(0, min(y1, h - 1))
y2 = max(y1 + 1, min(y2, h))
try:
cropped = pil_img.crop((x1, y1, x2, y2))
logger.info(
f"CLIP ์ž„๋ฒ ๋”ฉ์šฉ ํฌ๋กญ ์ด๋ฏธ์ง€: "
f"label={best.get('label')}, conf={best.get('confidence', 0):.3f}, "
f"crop=({x1},{y1},{x2},{y2})"
)
return cropped
except Exception as e:
logger.warning(f"ํฌ๋กญ ์‹คํŒจ, ์›๋ณธ ์‚ฌ์šฉ: {e}")
return pil_img
def predict(image):
try:
if image is None:
return {
"status": "error",
"error_message": "No image provided",
"embedding": None,
"boxes": [],
"label": "unknown",
"category": None,
}
# โ”€โ”€ ์ด๋ฏธ์ง€ ์ „์ฒ˜๋ฆฌ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
if isinstance(image, str):
pil_img = Image.open(image).convert("RGB")
elif isinstance(image, np.ndarray):
pil_img = Image.fromarray(image).convert("RGB")
elif isinstance(image, Image.Image):
pil_img = image.convert("RGB")
else:
pil_img = Image.open(str(image)).convert("RGB")
img_w, img_h = pil_img.size
# โ”€โ”€ YOLO ํƒ์ง€ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# conf=0.10: ๋‚ฎ๊ฒŒ ์„ค์ •ํ•˜์—ฌ ๊ฐ€๋ ค์ง„/์•ฝํ•˜๊ฒŒ ํƒ์ง€๋œ ๋ฐ•์Šค๋„ ์ผ๋‹จ ์ˆ˜์ง‘
# ์‹ค์ œ ์‹ ๋ขฐ๋„ ํ•„ํ„ฐ๋Š” _select_best_boxes์—์„œ ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„๋กœ ์ฒ˜๋ฆฌ
# iou=0.80: NMS๋ฅผ ๋А์Šจํ•˜๊ฒŒ โ†’ ๊ฒน์น˜๋Š” Bottom ๋ฐ•์Šค 2๊ฐœ๊ฐ€ ๋ชจ๋‘ ์‚ด์•„๋‚จ์Œ
# (์˜ˆ: ์™ผ์ชฝ ๋‹ค๋ฆฌ ๋ฐ•์Šค + ์˜ค๋ฅธ์ชฝ ๋‹ค๋ฆฌ ๋ฐ•์Šค๊ฐ€ 0.80 ๋ฏธ๋งŒ์œผ๋กœ ๊ฒน์นจ)
# ๋‘ ๋ฐ•์Šค๊ฐ€ ๋ชจ๋‘ ๋„๋‹ฌํ•ด์•ผ Union์œผ๋กœ ์ „์ฒด ๋ฐ”์ง€ ์˜์—ญ ํ•ฉ์‚ฐ ๊ฐ€๋Šฅ
results = yolo_model.predict(
source=pil_img,
conf=0.10, # ๋‚ฎ๊ฒŒ: ์•ฝํ•œ ํƒ์ง€๋„ ์ˆ˜์ง‘ (์ดํ›„ _select_best_boxes์—์„œ ํ•„ํ„ฐ)
iou=0.80, # ๋А์Šจํ•œ NMS: ๊ฐ™์€ ์นดํ…Œ๊ณ ๋ฆฌ ๋ฐ•์Šค 2๊ฐœ๊ฐ€ ๋ชจ๋‘ ์‚ด์•„๋‚จ๋„๋ก
save=False,
verbose=False,
)
raw_boxes = []
if results and len(results) > 0:
for result in results:
if result.boxes:
for box in result.boxes:
x1, y1, x2, y2 = box.xyxy[0].tolist()
conf = float(box.conf[0]) if box.conf is not None else 0
cls = int(box.cls[0]) if box.cls is not None else 0
label = (
result.names.get(cls, "unknown")
if hasattr(result, "names")
else "unknown"
)
raw_boxes.append(
{
"x1": x1,
"y1": y1,
"x2": x2,
"y2": y2,
"confidence": conf,
"label": label,
}
)
# โ”€โ”€ ๋ฐ•์Šค ํ›„์ฒ˜๋ฆฌ: ๋…ธ์ด์ฆˆ ์ œ๊ฑฐ + ์นดํ…Œ๊ณ ๋ฆฌ๋ณ„ ์ตœ๊ณ  ์‹ ๋ขฐ๋„ 1๊ฐœ ์„ ํƒ โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
filtered_boxes = _select_best_boxes(raw_boxes, img_w, img_h)
# ๋Œ€ํ‘œ ์นดํ…Œ๊ณ ๋ฆฌ: ์‹ ๋ขฐ๋„ ๊ฐ€์žฅ ๋†’์€ ๋ฐ•์Šค์˜ ๋ ˆ์ด๋ธ”
detected_category = None
if filtered_boxes:
best_box = max(filtered_boxes, key=lambda b: b.get("confidence", 0.0))
label = best_box.get("label", "")
if label and label != "unknown":
detected_category = label
# โ”€โ”€ CLIP ์ž„๋ฒ ๋”ฉ (512d) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
# ํฌ๋กญ ์ด๋ฏธ์ง€๋กœ ์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ (๋ฐ•์Šค๊ฐ€ ์žˆ์œผ๋ฉด ์ƒํ’ˆ ์˜์—ญ๋งŒ ํฌ๋กญ)
embed_img = _get_best_crop(pil_img, filtered_boxes)
inputs = clip_processor(images=embed_img, return_tensors="pt")
with torch.no_grad():
# vision_model โ†’ visual_projection ์ˆœ์„œ๋กœ ๋ช…์‹œ ํ˜ธ์ถœ
vision_outputs = clip_model.vision_model(**inputs)
features = clip_model.visual_projection(vision_outputs.pooler_output)
# L2 ์ •๊ทœํ™” (์ฝ”์‚ฌ์ธ ์œ ์‚ฌ๋„ ์ตœ์ ํ™”)
embedding = torch.nn.functional.normalize(features, p=2, dim=1)
embedding_list = embedding[0].cpu().tolist()
logger.info(
f"์ž„๋ฒ ๋”ฉ ์ƒ์„ฑ ์™„๋ฃŒ: dim={len(embedding_list)}, "
f"filtered_boxes={len(filtered_boxes)}, "
f"category={detected_category}"
)
return {
"status": "success",
"embedding": embedding_list, # 512d ๋ฒกํ„ฐ
"boxes": filtered_boxes,
"label": detected_category if detected_category else "full_image",
"category": detected_category,
}
except Exception as e:
err_msg = traceback.format_exc()
logger.error(f"์ถ”๋ก  ์ค‘ ์˜ˆ์™ธ ๋ฐœ์ƒ: {err_msg}")
return {
"status": "error",
"error_message": str(e),
"traceback": err_msg,
"embedding": None,
"boxes": [],
"label": "unknown",
"category": None,
}
demo = gr.Interface(
fn=predict,
inputs=gr.Image(type="numpy"),
outputs=gr.JSON(),
)
demo.launch(show_error=True)