Shrimp / app.py
Be2Jay's picture
Fix RT-DETR module dependency issue
27a8db0
# -*- coding: utf-8 -*-
"""
๐Ÿฆ ์ƒˆ์šฐ ๊ฒ€์ถœ ํ†ตํ•ฉ ์‹œ์Šคํ…œ
3๊ฐœ์˜ ์•ฑ์„ ํ•˜๋‚˜๋กœ ํ†ตํ•ฉ: ์ž๋™ ๊ฒ€์ถœ, ๋ผ๋ฒจ๋ง ๋„๊ตฌ, ๋ฐ๋ชจ
RT-DETR ๋˜๋Š” VIDraft/Shrimp ํด๋ผ์šฐ๋“œ ๋ชจ๋ธ ์„ ํƒ ๊ฐ€๋Šฅ
"""
import sys
sys.stdout.reconfigure(encoding='utf-8')
import gradio as gr
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import json
import os
import glob
from datetime import datetime
import torch
from transformers import RTDetrForObjectDetection, RTDetrImageProcessor
import requests
import base64
from io import BytesIO
from inference_sdk import InferenceHTTPClient
import tempfile
# ============================================================
# RT-DETR ๋ฐ ํ•„ํ„ฐ๋ง ํ•จ์ˆ˜๋“ค (์ด์ „ test_visual_validation์—์„œ ํ†ตํ•ฉ)
# ============================================================
import cv2
def load_rtdetr_model():
"""RT-DETR ๋ชจ๋ธ ๋กœ๋“œ"""
print("๐Ÿ”„ RT-DETR ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘...")
processor = RTDetrImageProcessor.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
model = RTDetrForObjectDetection.from_pretrained("PekingU/rtdetr_r50vd_coco_o365")
model.eval()
print("โœ… RT-DETR ๋กœ๋”ฉ ์™„๋ฃŒ")
return processor, model
def detect_with_rtdetr(image, processor, model, confidence=0.3):
"""RT-DETR๋กœ ๊ฐ์ฒด ๊ฒ€์ถœ"""
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(
outputs,
target_sizes=target_sizes,
threshold=confidence
)[0]
detections = []
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
x1, y1, x2, y2 = box.tolist()
detections.append({
'bbox': [x1, y1, x2, y2],
'confidence': score.item(),
'label': label.item()
})
return detections
def calculate_morphological_features(bbox, image_size):
"""ํ˜•ํƒœํ•™์  ํŠน์ง• ๊ณ„์‚ฐ"""
x1, y1, x2, y2 = bbox
width = x2 - x1
height = y2 - y1
# Aspect ratio (๊ธด ์ชฝ / ์งง์€ ์ชฝ)
aspect_ratio = max(width, height) / max(min(width, height), 1)
# Area ratio (์ด๋ฏธ์ง€ ๋Œ€๋น„ ๋ฉด์ )
img_w, img_h = image_size
area_ratio = (width * height) / (img_w * img_h)
# Compactness (4ฯ€ * Area / Perimeterยฒ)
perimeter = 2 * (width + height)
compactness = (4 * np.pi * width * height) / max(perimeter ** 2, 1)
return {
'aspect_ratio': aspect_ratio,
'area_ratio': area_ratio,
'compactness': compactness,
'width': width,
'height': height
}
def calculate_visual_features(image_pil, bbox):
"""์‹œ๊ฐ์  ํŠน์ง• ๊ณ„์‚ฐ (์ƒ‰์ƒ, ํ…์Šค์ฒ˜)"""
# PIL โ†’ OpenCV
image_cv = cv2.cvtColor(np.array(image_pil), cv2.COLOR_RGB2BGR)
x1, y1, x2, y2 = [int(v) for v in bbox]
# ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ์˜์—ญ ์ถ”์ถœ
roi = image_cv[y1:y2, x1:x2]
if roi.size == 0:
return {'hue': 100, 'saturation': 255, 'color_std': 255}
# HSV ๋ณ€ํ™˜
hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV)
# ์ƒ‰์ƒ (Hue)
hue_mean = np.mean(hsv[:, :, 0])
# ์ฑ„๋„ (Saturation)
saturation = np.mean(hsv[:, :, 1])
# ์ƒ‰์ƒ ์ผ๊ด€์„ฑ (ํ‘œ์ค€ํŽธ์ฐจ)
color_std = np.std(hsv[:, :, 0])
return {
'hue': hue_mean,
'saturation': saturation,
'color_std': color_std
}
def apply_universal_filter(detections, image, threshold=90):
"""๋ฒ”์šฉ ์ƒˆ์šฐ ํ•„ํ„ฐ ์ ์šฉ"""
img_size = image.size
filtered = []
for det in detections:
bbox = det['bbox']
# 1. ํ˜•ํƒœํ•™์  ํŠน์ง•
morph = calculate_morphological_features(bbox, img_size)
# 2. ์‹œ๊ฐ์  ํŠน์ง•
visual = calculate_visual_features(image, bbox)
# 3. ์ ์ˆ˜ ๊ณ„์‚ฐ
score = 0
reasons = []
# Aspect ratio (4:1 ~ 9:1)
if 4.0 <= morph['aspect_ratio'] <= 9.0:
score += 25
reasons.append(f"โœ“ ์ข…ํšก๋น„ {morph['aspect_ratio']:.1f}")
elif 3.0 <= morph['aspect_ratio'] < 4.0 or 9.0 < morph['aspect_ratio'] <= 10.0:
score += 12
reasons.append(f"โ–ณ ์ข…ํšก๋น„ {morph['aspect_ratio']:.1f}")
else:
score -= 5
reasons.append(f"โœ— ์ข…ํšก๋น„ {morph['aspect_ratio']:.1f}")
# Compactness (< 0.50, ๊ธด ํ˜•ํƒœ)
if morph['compactness'] < 0.40:
score += 30
reasons.append(f"โœ“ ์„ธ์žฅ๋„ {morph['compactness']:.2f}")
elif 0.40 <= morph['compactness'] < 0.50:
score += 15
reasons.append(f"โ–ณ ์„ธ์žฅ๋„ {morph['compactness']:.2f}")
else:
reasons.append(f"โœ— ์„ธ์žฅ๋„ {morph['compactness']:.2f}")
score -= 20
# Area
abs_area = morph['width'] * morph['height']
if 50000 <= abs_area <= 500000:
score += 35
reasons.append(f"โœ“ ๋ฉด์  {abs_area/1000:.0f}K")
elif 500000 < abs_area <= 800000:
score -= 10
reasons.append(f"โ–ณ ๋ฉด์  {abs_area/1000:.0f}K")
elif abs_area > 800000:
score -= 30
reasons.append(f"โœ— ๋ฉด์  {abs_area/1000:.0f}K (๋„ˆ๋ฌดํผ)")
else:
score -= 10
reasons.append(f"โœ— ๋ฉด์  {abs_area/1000:.0f}K (๋„ˆ๋ฌด์ž‘์Œ)")
# Hue (์ƒ‰์ƒ)
hue = visual['hue']
if hue < 40 or hue > 130:
score += 10
reasons.append(f"โœ“ ์ƒ‰์ƒ {hue:.0f}")
elif 90 <= hue <= 130:
score -= 5
reasons.append(f"โœ— ์ƒ‰์ƒ {hue:.0f} (๋ฐฐ๊ฒฝ)")
else:
reasons.append(f"โ–ณ ์ƒ‰์ƒ {hue:.0f}")
# Saturation
if visual['saturation'] < 85:
score += 20
reasons.append(f"โœ“ ์ฑ„๋„ {visual['saturation']:.0f}")
elif 85 <= visual['saturation'] < 120:
score += 5
reasons.append(f"โ–ณ ์ฑ„๋„ {visual['saturation']:.0f}")
else:
score -= 15
reasons.append(f"โœ— ์ฑ„๋„ {visual['saturation']:.0f} (๋†’์Œ)")
# Color consistency
if visual['color_std'] < 50:
score += 15
reasons.append(f"โœ“ ์ƒ‰์ƒ์ผ๊ด€์„ฑ {visual['color_std']:.1f}")
elif 50 <= visual['color_std'] < 80:
score += 5
reasons.append(f"โ–ณ ์ƒ‰์ƒ์ผ๊ด€์„ฑ {visual['color_std']:.1f}")
else:
score -= 10
reasons.append(f"โœ— ์ƒ‰์ƒ์ผ๊ด€์„ฑ {visual['color_std']:.1f} (๋ถˆ์ผ์น˜)")
# RT-DETR confidence
if 'confidence' in det:
if det['confidence'] >= 0.3:
score += 15
reasons.append(f"โœ“ ์‹ ๋ขฐ๋„ {det['confidence']:.0%}")
elif det['confidence'] >= 0.1:
score += 8
reasons.append(f"โ–ณ ์‹ ๋ขฐ๋„ {det['confidence']:.0%}")
else:
reasons.append(f"โœ— ์‹ ๋ขฐ๋„ {det['confidence']:.0%}")
det['filter_score'] = score
det['filter_reasons'] = reasons
filtered.append(det)
return filtered
# YOLOv8 import
# ============================================================
# YOLOv8 ๋ชจ๋ธ ์„ค์ • (์„ ํƒ์ )
# ============================================================
YOLO_MODEL_PATH = "runs/train/yolov8m_shrimp2/weights/best.pt"
yolo_model = None
YOLO_AVAILABLE = False
try:
from ultralytics import YOLO
import os
if os.path.exists(YOLO_MODEL_PATH):
YOLO_AVAILABLE = True
print(f"โœ… YOLOv8 ์‚ฌ์šฉ ๊ฐ€๋Šฅ: {YOLO_MODEL_PATH}")
else:
print(f"โš ๏ธ YOLOv8 ๋ชจ๋ธ ํŒŒ์ผ ์—†์Œ: {YOLO_MODEL_PATH}")
except ImportError:
print("โš ๏ธ ultralytics ํŒจํ‚ค์ง€ ์—†์Œ - YOLOv8 ๋น„ํ™œ์„ฑํ™”")
def load_yolo_model():
"""YOLOv8 ๋ชจ๋ธ ๋กœ๋”ฉ"""
global yolo_model
if not YOLO_AVAILABLE:
raise Exception("YOLOv8 ๋ชจ๋ธ์„ ์‚ฌ์šฉํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค")
if yolo_model is None:
print(f"๐Ÿ”„ YOLOv8 ๋ชจ๋ธ ๋กœ๋”ฉ ์ค‘: {YOLO_MODEL_PATH}")
yolo_model = YOLO(YOLO_MODEL_PATH)
print("โœ… YOLOv8 ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ")
return yolo_model
def detect_with_yolo(image, confidence=0.1):
"""YOLOv8 ๋ชจ๋ธ๋กœ ๊ฒ€์ถœ"""
try:
model = load_yolo_model()
# ์ถ”๋ก  ์‹คํ–‰
results = model.predict(
source=image,
conf=confidence,
verbose=False
)
detections = []
for result in results:
boxes = result.boxes
for box in boxes:
x1, y1, x2, y2 = box.xyxy[0].tolist()
conf = box.conf[0].item()
detections.append({
'bbox': [x1, y1, x2, y2],
'confidence': conf
})
print(f"โœ… YOLOv8 ๊ฒ€์ถœ ์™„๋ฃŒ: {len(detections)}๊ฐœ")
return detections
except Exception as e:
print(f"โŒ YOLOv8 ๊ฒ€์ถœ ์˜ค๋ฅ˜: {str(e)}")
import traceback
traceback.print_exc()
return []
# ============================================================
# Roboflow SDK ์„ค์ • (์ตœ์ ํ™”๋œ ๋ฐฉ์‹)
# ============================================================
# ํ™˜๊ฒฝ๋ณ€์ˆ˜์—์„œ API ํ‚ค ์ฝ๊ธฐ (Hugging Face Secrets ์‚ฌ์šฉ)
ROBOFLOW_API_KEY = os.getenv("ROBOFLOW_API_KEY", "")
# Roboflow Inference SDK ํด๋ผ์ด์–ธํŠธ (connection pooling ์ง€์›)
roboflow_client = InferenceHTTPClient(
api_url="https://serverless.roboflow.com",
api_key=ROBOFLOW_API_KEY
)
def detect_with_roboflow(image, confidence=0.065):
"""Roboflow API๋ฅผ ์‚ฌ์šฉํ•œ ์ตœ์ ํ™”๋œ ๊ฒ€์ถœ (๋กœ์ปฌ ํ…Œ์ŠคํŠธ์™€ ๋™์ผ)"""
try:
# ์›๋ณธ ์ด๋ฏธ์ง€ ๋ณด์กด
image_original = image
original_size = image_original.size
# ๋ฆฌ์‚ฌ์ด์ฆˆ (API ์ „์†ก์šฉ)
image_resized = image_original.copy()
image_resized.thumbnail((640, 640), Image.Resampling.LANCZOS)
print(f"๐Ÿ“ ์ด๋ฏธ์ง€ ๋ฆฌ์‚ฌ์ด์ฆˆ: {original_size} โ†’ {image_resized.size}")
# Base64 ์ธ์ฝ”๋”ฉ
buffered = BytesIO()
image_resized.save(buffered, format="JPEG", quality=80)
img_base64 = base64.b64encode(buffered.getvalue()).decode()
print(f"๐Ÿ“ฆ Base64 ํฌ๊ธฐ: {len(img_base64)} bytes")
print(f"๐Ÿ”„ Roboflow API ์ถ”๋ก  ์‹œ์ž‘...")
# ๐Ÿš€ ์ตœ์ ํ™” 3: requests๋กœ API ํ˜ธ์ถœ (SDK ๋Œ€์‹  ์‚ฌ์šฉ - ๋” ์•ˆ์ •์ )
response = requests.post(
'https://serverless.roboflow.com/vidraft/workflows/find-shrimp-6',
headers={'Content-Type': 'application/json'},
json={
'api_key': ROBOFLOW_API_KEY,
'inputs': {
'image': {'type': 'base64', 'value': img_base64}
}
},
timeout=30
)
if response.status_code != 200:
print(f"โŒ Roboflow API ์˜ค๋ฅ˜: {response.status_code}")
print(f"์‘๋‹ต: {response.text}")
return []
result = response.json()
print(f"๐Ÿ” Roboflow ์‘๋‹ต: {json.dumps(result, indent=2, ensure_ascii=False)[:500]}...")
# Workflow ์‘๋‹ต ๊ตฌ์กฐ ํŒŒ์‹ฑ
detections = []
predictions = []
# ๋ฐฉ๋ฒ• 1: outputs[0].predictions.predictions (workflow ํ˜•ํƒœ)
if isinstance(result, dict) and 'outputs' in result and len(result['outputs']) > 0:
output = result['outputs'][0]
if isinstance(output, dict) and 'predictions' in output:
pred_data = output['predictions']
# predictions๊ฐ€ dict์ด๊ณ  ๊ทธ ์•ˆ์— predictions ๋ฐฐ์—ด์ด ์žˆ๋Š” ๊ฒฝ์šฐ
if isinstance(pred_data, dict) and 'predictions' in pred_data:
predictions = pred_data['predictions']
# predictions๊ฐ€ ๋ฐ”๋กœ ๋ฐฐ์—ด์ธ ๊ฒฝ์šฐ
elif isinstance(pred_data, list):
predictions = pred_data
else:
predictions = [pred_data]
# ๋ฐฉ๋ฒ• 2: ์ง์ ‘ predictions
elif isinstance(result, dict) and 'predictions' in result:
predictions = result['predictions']
# ๋ฐฉ๋ฒ• 3: ๋‹ค๋ฅธ ๊ตฌ์กฐ
elif isinstance(result, list):
predictions = result
print(f"๐Ÿ“ฆ ์ฐพ์€ predictions: {len(predictions)}๊ฐœ")
# ์Šค์ผ€์ผ ๊ณ„์‚ฐ (๋ฆฌ์‚ฌ์ด์ฆˆ๋œ ์ขŒํ‘œ โ†’ ์›๋ณธ ์ขŒํ‘œ)
scale_x = original_size[0] / image_resized.size[0]
scale_y = original_size[1] / image_resized.size[1]
print(f"๐Ÿ“ ์Šค์ผ€์ผ: x={scale_x:.2f}, y={scale_y:.2f}")
for pred in predictions:
# ํด๋ž˜์Šค ํ•„ํ„ฐ๋ง (shrimp๋งŒ ๊ฒ€์ถœ)
pred_class = pred.get('class', '')
if pred_class != 'shrimp':
continue
# ์‹ ๋ขฐ๋„ ํ•„ํ„ฐ๋ง
pred_confidence = pred.get('confidence', 0)
if pred_confidence < confidence:
continue
# ๋ฐ”์šด๋”ฉ ๋ฐ•์Šค ์ถ”์ถœ (๋ฆฌ์‚ฌ์ด์ฆˆ๋œ ์ขŒํ‘œ)
x = pred.get('x', 0)
y = pred.get('y', 0)
width = pred.get('width', 0)
height = pred.get('height', 0)
# ์›๋ณธ ํฌ๊ธฐ๋กœ ์Šค์ผ€์ผ ๋ณ€ํ™˜
x_scaled = x * scale_x
y_scaled = y * scale_y
width_scaled = width * scale_x
height_scaled = height * scale_y
# ์ค‘์‹ฌ์  ์ขŒํ‘œ๋ฅผ ์ขŒ์ƒ๋‹จ/์šฐํ•˜๋‹จ ์ขŒํ‘œ๋กœ ๋ณ€ํ™˜
x1 = x_scaled - width_scaled / 2
y1 = y_scaled - height_scaled / 2
x2 = x_scaled + width_scaled / 2
y2 = y_scaled + height_scaled / 2
detections.append({
'bbox': [x1, y1, x2, y2],
'confidence': pred_confidence
})
print(f" โœ“ ๊ฒ€์ถœ (shrimp): conf={pred_confidence:.2%}, bbox=[{x1:.0f},{y1:.0f},{x2:.0f},{y2:.0f}]")
print(f"โœ… Roboflow ๊ฒ€์ถœ ์™„๋ฃŒ: {len(detections)}๊ฐœ")
return detections
except Exception as e:
print(f"โŒ Roboflow SDK ์˜ค๋ฅ˜: {str(e)}")
import traceback
traceback.print_exc()
return []
# ============================================================
# ์ „์—ญ ๋ชจ๋ธ ๋ณ€์ˆ˜ (์ง€์—ฐ ๋กœ๋”ฉ)
# ============================================================
processor = None
model = None
def load_rtdetr_on_demand():
"""RT-DETR ๋ชจ๋ธ์„ ํ•„์š”์‹œ์—๋งŒ ๋กœ๋”ฉ"""
global processor, model
if processor is None or model is None:
processor, model = load_rtdetr_model()
return "โœ… RT-DETR ๋ชจ๋ธ ๋กœ๋”ฉ ์™„๋ฃŒ"
else:
return "โ„น๏ธ RT-DETR ๋ชจ๋ธ์ด ์ด๋ฏธ ๋กœ๋”ฉ๋˜์–ด ์žˆ์Šต๋‹ˆ๋‹ค"
print("โœ… VIDraft/Shrimp ํด๋ผ์šฐ๋“œ ๋ชจ๋ธ ์‚ฌ์šฉ ๊ฐ€๋Šฅ\n")
# ============================================================
# ๋ผ๋ฒจ๋ง ๋„๊ตฌ ์ „์—ญ ๋ณ€์ˆ˜
# ============================================================
current_data = {
'folder': None,
'images': [],
'current_idx': 0,
'detections': {},
'selections': {},
'confidence_threshold': 0.2,
'image_cache': {},
'model_type': 'RT-DETR' # ํ˜„์žฌ ์„ ํƒ๋œ ๋ชจ๋ธ
}
GROUND_TRUTH_FILE = "ground_truth.json"
DATA_BASE = "data/ํฐ๋‹ค๋ฆฌ์ƒˆ์šฐ ์‹ค์ธก ๋ฐ์ดํ„ฐ_์ตํˆฌ์Šค์—์ด์•„์ด(์ฃผ)"
# ============================================================
# ๋ชจ๋ธ๋ณ„ ๊ฒ€์ถœ ํ•จ์ˆ˜
# ============================================================
def detect_with_selected_model(image, confidence, model_type):
"""์„ ํƒ๋œ ๋ชจ๋ธ๋กœ ๊ฒ€์ถœ"""
if model_type == "RT-DETR":
if processor is None or model is None:
raise ValueError("โš ๏ธ RT-DETR ๋ชจ๋ธ์ด ๋กœ๋”ฉ๋˜์ง€ ์•Š์•˜์Šต๋‹ˆ๋‹ค. '๐Ÿ”„ RT-DETR ๋กœ๋“œ' ๋ฒ„ํŠผ์„ ๋จผ์ € ํด๋ฆญํ•˜์„ธ์š”.")
return detect_with_rtdetr(image, processor, model, confidence)
elif model_type == "VIDraft/Shrimp":
return detect_with_roboflow(image, confidence)
elif model_type == "YOLOv8":
return detect_with_yolo(image, confidence)
else:
return []
# ============================================================
# ํƒญ 1: ์ž๋™ ๊ฒ€์ถœ (Interactive Validation)
# ============================================================
def interactive_detect(image, confidence, filter_threshold, show_all, model_type, use_filter):
"""๋Œ€ํ™”ํ˜• ๊ฒ€์ถœ"""
if image is None:
return None, "โš ๏ธ ์ด๋ฏธ์ง€๋ฅผ ์—…๋กœ๋“œํ•˜์„ธ์š”."
try:
# ์„ ํƒ๋œ ๋ชจ๋ธ๋กœ ๊ฒ€์ถœ
all_detections = detect_with_selected_model(image, confidence, model_type)
# ํ•„ํ„ฐ ์ ์šฉ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ์ฒ˜๋ฆฌ
if not use_filter:
# ํ•„ํ„ฐ ๋ฏธ์‚ฌ์šฉ: ์‹ ๋ขฐ๋„๋งŒ ์ ์šฉ
filtered_detections = all_detections
for det in filtered_detections:
det['filter_score'] = det['confidence'] * 100
det['filter_reasons'] = [f"์‹ ๋ขฐ๋„: {det['confidence']:.0%} (ํ•„ํ„ฐ ๋ฏธ์‚ฌ์šฉ)"]
all_detections_scored = filtered_detections
else:
# ํ•„ํ„ฐ ์‚ฌ์šฉ
if model_type in ["VIDraft/Shrimp", "YOLOv8"]:
# Roboflow & YOLOv8: ์‹ ๋ขฐ๋„๋ฅผ ํ•„ํ„ฐ ์ ์ˆ˜๋กœ ์‚ฌ์šฉ
for det in all_detections:
det['filter_score'] = det['confidence'] * 100
det['filter_reasons'] = [f"{model_type} ์‹ ๋ขฐ๋„: {det['confidence']:.0%}"]
all_detections_scored = all_detections
else:
# RT-DETR: Universal Filter ์‚ฌ์šฉ
all_detections_scored = apply_universal_filter(all_detections, image, threshold=0)
# ํ•„ํ„ฐ ์ž„๊ณ„๊ฐ’ ์ ์šฉ
filtered_detections = [det for det in all_detections_scored if det['filter_score'] >= filter_threshold]
# ์‹œ๊ฐํ™”
img = image.copy()
draw = ImageDraw.Draw(img)
try:
font = ImageFont.truetype("arial.ttf", 14)
font_large = ImageFont.truetype("arial.ttf", 18)
font_small = ImageFont.truetype("arial.ttf", 10)
except:
font = ImageFont.load_default()
font_large = ImageFont.load_default()
font_small = ImageFont.load_default()
# ์ œ๊ฑฐ๋œ ๊ฐ์ฒด ๋จผ์ € ํ‘œ์‹œ (๋นจ๊ฐ„์ƒ‰)
rejected_detections = [det for det in all_detections_scored if det['filter_score'] < filter_threshold]
for idx, det in enumerate(rejected_detections, 1):
x1, y1, x2, y2 = det['bbox']
score = det['filter_score']
# ๋นจ๊ฐ„์ƒ‰ ๋ฐ•์Šค (์ œ๊ฑฐ๋จ)
draw.rectangle([x1, y1, x2, y2], outline="red", width=8)
# ๋ผ๋ฒจ (์ž‘๊ฒŒ)
label = f"โœ—{idx} {score:.0f}์ "
bbox = draw.textbbox((x1, y1 - 20), label, font=font_small)
draw.rectangle(bbox, fill="red")
draw.text((x1, y1 - 20), label, fill="white", font=font_small)
# ์ „์ฒด ๊ฒ€์ถœ ํ‘œ์‹œ (์˜ต์…˜) - ํšŒ์ƒ‰
if show_all:
for det in all_detections_scored:
if det not in filtered_detections and det not in rejected_detections:
x1, y1, x2, y2 = det['bbox']
draw.rectangle([x1, y1, x2, y2], outline="gray", width=4)
# ํ•„ํ„ฐ๋ง๋œ ๊ฒฐ๊ณผ (ํ†ต๊ณผ) - ๋…น์ƒ‰/๋…ธ๋ž€์ƒ‰/์ฃผํ™ฉ์ƒ‰
for idx, det in enumerate(filtered_detections, 1):
x1, y1, x2, y2 = det['bbox']
score = det['filter_score']
# ์ ์ˆ˜์— ๋”ฐ๋ผ ์ƒ‰์ƒ
if score >= 75:
color = "lime"
elif score >= 50:
color = "yellow"
else:
color = "orange"
# ๋ฐ•์Šค (๋‘๊ป๊ฒŒ)
draw.rectangle([x1, y1, x2, y2], outline=color, width=10)
# ๋ผ๋ฒจ
label = f"โœ“#{idx} {score:.0f}์ "
bbox = draw.textbbox((x1, y1 - 25), label, font=font)
draw.rectangle(bbox, fill=color)
draw.text((x1, y1 - 25), label, fill="black", font=font)
# ์„ธ๋ถ€ ์ •๋ณด (์ž‘๊ฒŒ)
details = f"{model_type}:{det['confidence']:.0%}"
draw.text((x1, y2 + 5), details, fill=color, font=font_small)
# ํ—ค๋”
header = f"[{model_type}] โœ“ {len(filtered_detections)}๊ฐœ / โœ— {len(rejected_detections)}๊ฐœ (์ „์ฒด: {len(all_detections_scored)}๊ฐœ)"
header_bbox = draw.textbbox((10, 10), header, font=font_large)
draw.rectangle([5, 5, header_bbox[2]+10, header_bbox[3]+10],
fill="black", outline="lime", width=2)
draw.text((10, 10), header, fill="lime", font=font_large)
# ์ •๋ณด ์ƒ์„ฑ
info = f"""
### ๐Ÿ“Š ๊ฒ€์ถœ ๊ฒฐ๊ณผ (๋ชจ๋ธ: {model_type})
- **์ „์ฒด ๊ฒ€์ถœ**: {len(all_detections_scored)}๊ฐœ
- **ํ•„ํ„ฐ๋ง ํ›„**: {len(filtered_detections)}๊ฐœ
- **์ œ๊ฑฐ๋จ**: {len(rejected_detections)}๊ฐœ
---
### ๐ŸŽฏ ๊ฒ€์ถœ๋œ ๊ฐ์ฒด ์ƒ์„ธ (โœ… ํ†ต๊ณผ)
"""
for idx, det in enumerate(filtered_detections, 1):
info += f"""
**#{idx} - ์ ์ˆ˜: {det['filter_score']:.0f}์ ** ({model_type} ์‹ ๋ขฐ๋„: {det['confidence']:.0%})
"""
# ์ฃผ์š” ํŠน์ง•๋งŒ 5๊ฐœ
for reason in det['filter_reasons'][:5]:
info += f"- {reason}\n"
if not filtered_detections:
info += """
โš ๏ธ **๊ฒ€์ถœ๋œ ๊ฐ์ฒด๊ฐ€ ์—†์Šต๋‹ˆ๋‹ค.**
"""
# ์ œ๊ฑฐ๋œ ๊ฐ์ฒด ์ •๋ณด ์ถ”๊ฐ€
if rejected_detections:
info += f"""
---
### โŒ ์ œ๊ฑฐ๋œ ๊ฐ์ฒด ({len(rejected_detections)}๊ฐœ)
"""
for idx, det in enumerate(rejected_detections[:3], 1): # ์ตœ๋Œ€ 3๊ฐœ๋งŒ ํ‘œ์‹œ
info += f"""
**์ œ๊ฑฐ #{idx} - ์ ์ˆ˜: {det['filter_score']:.0f}์ ** (์ž„๊ณ„๊ฐ’ ๋ฏธ๋‹ฌ)
- {model_type} ์‹ ๋ขฐ๋„: {det['confidence']:.0%}
"""
# ์‹คํŒจ ์ด์œ  ํ‘œ์‹œ
for reason in det['filter_reasons'][:3]:
info += f"- {reason}\n"
return img, info
except Exception as e:
import traceback
error_detail = traceback.format_exc()
return None, f"โŒ ์˜ค๋ฅ˜ ๋ฐœ์ƒ:\n\n```\n{error_detail}\n```"
# ============================================================
# ํƒญ 2: ๋ผ๋ฒจ๋ง ๋„๊ตฌ (Labeling Tool)
# ============================================================
def detect_with_rtdetr_fast(image, confidence=0.3):
"""RT-DETR ๋น ๋ฅธ ๊ฒ€์ถœ"""
inputs = processor(images=image, return_tensors="pt")
with torch.no_grad():
outputs = model(**inputs)
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(
outputs,
target_sizes=target_sizes,
threshold=confidence
)[0]
detections = []
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
x1, y1, x2, y2 = box.tolist()
detections.append({
'bbox': [x1, y1, x2, y2],
'confidence': score.item()
})
return detections
def load_existing_ground_truth():
"""๊ธฐ์กด ground_truth.json ๋กœ๋“œ"""
if os.path.exists(GROUND_TRUTH_FILE):
with open(GROUND_TRUTH_FILE, 'r', encoding='utf-8') as f:
return json.load(f)
return {}
def save_ground_truth(data):
"""ground_truth.json ์ €์žฅ"""
backup_dir = "backups"
if not os.path.exists(backup_dir):
os.makedirs(backup_dir)
if os.path.exists(GROUND_TRUTH_FILE):
backup_name = f"ground_truth_backup_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
backup_path = os.path.join(backup_dir, backup_name)
import shutil
shutil.copy2(GROUND_TRUTH_FILE, backup_path)
with open(GROUND_TRUTH_FILE, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"โœ… Ground Truth ์ €์žฅ ์™„๋ฃŒ: {len(data)}๊ฐœ ์ด๋ฏธ์ง€")
def get_folders():
"""์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ํด๋” ๋ชฉ๋ก"""
folders = sorted(glob.glob(os.path.join(DATA_BASE, "2*")))
return [os.path.basename(f) for f in folders if os.path.isdir(f)]
def start_labeling(folder, conf_threshold, model_type):
"""๋ผ๋ฒจ๋ง ์‹œ์ž‘"""
if not folder:
return None, "โŒ ํด๋”๋ฅผ ์„ ํƒํ•˜์„ธ์š”.", ""
current_data['folder'] = folder
current_data['confidence_threshold'] = conf_threshold
current_data['model_type'] = model_type
folder_path = os.path.join(DATA_BASE, folder)
all_images = sorted(glob.glob(os.path.join(folder_path, "*.jpg")))
# -1, -2 ๋“ฑ์ด ๋ถ™์€ ํŒŒ์ผ ์ œ์™ธ (์˜ˆ: 251017_01-1.jpg ์ œ์™ธ, 251017_01.jpg๋งŒ ํฌํ•จ)
import re
images = [img for img in all_images if not re.search(r'-\d+\.jpg$', os.path.basename(img))]
if not images:
return None, "โŒ ์ด๋ฏธ์ง€ ์—†์Œ", ""
print(f"๐Ÿ“ ํด๋”: {folder}")
print(f" ์ „์ฒด ์ด๋ฏธ์ง€: {len(all_images)}๊ฐœ")
print(f" ๋ผ๋ฒจ๋ง ๋Œ€์ƒ: {len(images)}๊ฐœ (-์ˆซ์ž ํŒŒ์ผ ์ œ์™ธ)")
current_data['images'] = images
current_data['current_idx'] = 0
current_data['detections'] = {}
current_data['selections'] = {}
# ๊ธฐ์กด GT ๋กœ๋“œ
gt = load_existing_ground_truth()
# ์ด๋ฏธ ๋ผ๋ฒจ๋ง๋œ ์ด๋ฏธ์ง€ ๊ฑด๋„ˆ๋›ฐ๊ธฐ
for i, img_path in enumerate(images):
filename = os.path.basename(img_path)
if filename in gt:
current_data['selections'][filename] = [j for j in range(len(gt[filename]))]
print(f"โญ๏ธ ๊ฑด๋„ˆ๋›ฐ๊ธฐ: {filename} (์ด๋ฏธ ๋ผ๋ฒจ๋ง๋จ)")
# ์ฒซ ๋ฏธ๋ผ๋ฒจ๋ง ์ด๋ฏธ์ง€ ์ฐพ๊ธฐ
while current_data['current_idx'] < len(images):
filename = os.path.basename(images[current_data['current_idx']])
if filename not in current_data['selections']:
break
current_data['current_idx'] += 1
if current_data['current_idx'] >= len(images):
return None, "โœ… ๋ชจ๋“  ์ด๋ฏธ์ง€ ๋ผ๋ฒจ๋ง ์™„๋ฃŒ!", ""
return show_current_image()
def show_current_image():
"""ํ˜„์žฌ ์ด๋ฏธ์ง€ ํ‘œ์‹œ"""
if current_data['current_idx'] >= len(current_data['images']):
return None, "โœ… ์™„๋ฃŒ!", ""
img_path = current_data['images'][current_data['current_idx']]
filename = os.path.basename(img_path)
# ์บ์‹œ ํ™•์ธ
if filename in current_data['image_cache']:
image = current_data['image_cache'][filename]
else:
image = Image.open(img_path)
current_data['image_cache'][filename] = image
# ์„ ํƒ๋œ ๋ชจ๋ธ๋กœ ๊ฒ€์ถœ
if filename not in current_data['detections']:
if current_data['model_type'] == 'RT-DETR':
detections = detect_with_rtdetr_fast(image, current_data['confidence_threshold'])
elif current_data['model_type'] == 'YOLOv8':
detections = detect_with_yolo(image, current_data['confidence_threshold'])
else: # VIDraft/Shrimp
detections = detect_with_roboflow(image, current_data['confidence_threshold'])
current_data['detections'][filename] = detections
else:
detections = current_data['detections'][filename]
# ์„ ํƒ๋œ ๋ฐ•์Šค
selected_indices = current_data['selections'].get(filename, [])
# ์‹œ๊ฐํ™”
vis_image = draw_detections(image, detections, selected_indices)
info = f"""
### ๐Ÿ“ {current_data['folder']} - ์ด๋ฏธ์ง€ {current_data['current_idx']+1}/{len(current_data['images'])}
**ํŒŒ์ผ**: {filename}
**๋ชจ๋ธ**: {current_data['model_type']}
**๊ฒ€์ถœ**: {len(detections)}๊ฐœ
**์„ ํƒ**: {len(selected_indices)}๊ฐœ
---
### ๐Ÿ–ฑ๏ธ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•:
1. ์ด๋ฏธ์ง€๋ฅผ ํด๋ฆญํ•˜์—ฌ ๋ฐ•์Šค ์„ ํƒ/ํ•ด์ œ
2. "๋‹ค์Œ" ๋ฒ„ํŠผ์œผ๋กœ ์ €์žฅ ํ›„ ์ด๋™
3. "๊ฑด๋„ˆ๋›ฐ๊ธฐ"๋กœ ์„ ํƒ ์—†์ด ์ด๋™
"""
return vis_image, info, filename
def draw_detections(image, detections, selected_indices):
"""๊ฒ€์ถœ ๊ฒฐ๊ณผ ๊ทธ๋ฆฌ๊ธฐ"""
img = image.copy()
draw = ImageDraw.Draw(img)
try:
font_tiny = ImageFont.truetype("arial.ttf", 10)
font_large = ImageFont.truetype("arial.ttf", 40)
except:
font_tiny = ImageFont.load_default()
font_large = ImageFont.load_default()
# ์„ ํƒ๋˜์ง€ ์•Š์€ ๋ฐ•์Šค ๋จผ์ € (๋’ค์ชฝ ๋ ˆ์ด์–ด)
for idx, det in enumerate(detections):
if idx not in selected_indices:
x1, y1, x2, y2 = det['bbox']
draw.rectangle([x1, y1, x2, y2], outline="lime", width=20)
corner_label = f"#{idx+1}"
draw.rectangle([x1-2, y1-24, x1+30, y1-2], fill="lime")
draw.text((x1, y1 - 22), corner_label, fill="white", font=font_tiny)
# ์„ ํƒ๋œ ๋ฐ•์Šค ๋‚˜์ค‘์— (์•ž์ชฝ ๋ ˆ์ด์–ด)
for idx, det in enumerate(detections):
if idx in selected_indices:
x1, y1, x2, y2 = det['bbox']
draw.rectangle([x1, y1, x2, y2], outline="blue", width=28)
corner_label = f"โœ“#{idx+1}"
draw.rectangle([x1-2, y1-24, x1+40, y1-2], fill="blue")
draw.text((x1, y1 - 22), corner_label, fill="white", font=font_tiny)
# ์›ํ˜• ๋ฒ„ํŠผ
for idx, det in enumerate(detections):
x1, y1, x2, y2 = det['bbox']
center_x = (x1 + x2) / 2
center_y = (y1 + y2) / 2
selected = idx in selected_indices
btn_color = "blue" if selected else "lime"
btn_text = f"โœ“{idx+1}" if selected else f"{idx+1}"
box_width = x2 - x1
box_height = y2 - y1
radius = min(55, box_width * 0.18, box_height * 0.35)
# ์›ํ˜• ๋ฒ„ํŠผ
draw.ellipse(
[center_x - radius, center_y - radius,
center_x + radius, center_y + radius],
fill=btn_color, outline="white", width=4
)
draw.text((center_x - radius*0.5, center_y - radius*0.6),
btn_text, fill="white", font=font_large)
return img
def labeling_click(image, filename, evt: gr.SelectData):
"""์ด๋ฏธ์ง€ ํด๋ฆญ ์ด๋ฒคํŠธ"""
if not filename or filename not in current_data['detections']:
return image, "โš ๏ธ ์ด๋ฏธ์ง€๋ฅผ ๋จผ์ € ๋กœ๋“œํ•˜์„ธ์š”."
click_x, click_y = evt.index[0], evt.index[1]
detections = current_data['detections'][filename]
selected_indices = set(current_data['selections'].get(filename, []))
# ํด๋ฆญํ•œ ๋ฐ•์Šค ์ฐพ๊ธฐ
clicked_idx = None
button_candidates = []
# ๋ฒ„ํŠผ ์˜์—ญ ํ™•์ธ
for idx, det in enumerate(detections):
x1, y1, x2, y2 = det['bbox']
center_x = (x1 + x2) / 2
center_y = (y1 + y2) / 2
box_width = x2 - x1
box_height = y2 - y1
radius = min(55, box_width * 0.18, box_height * 0.35)
distance = ((click_x - center_x) ** 2 + (click_y - center_y) ** 2) ** 0.5
if distance <= radius:
button_candidates.append((idx, distance))
# ๋ฒ„ํŠผ ํด๋ฆญ์ด ์žˆ์œผ๋ฉด ์„ ํƒ
if button_candidates:
button_candidates.sort(key=lambda x: x[1])
clicked_idx = button_candidates[0][0]
else:
# ๋ฐ•์Šค ์˜์—ญ ํด๋ฆญ ํ™•์ธ
for idx, det in enumerate(detections):
x1, y1, x2, y2 = det['bbox']
if x1 <= click_x <= x2 and y1 <= click_y <= y2:
clicked_idx = idx
break
# ์„ ํƒ ํ† ๊ธ€
if clicked_idx is not None:
if clicked_idx in selected_indices:
selected_indices.remove(clicked_idx)
print(f"โŒ ์„ ํƒ ํ•ด์ œ: ๋ฐ•์Šค #{clicked_idx+1}")
else:
selected_indices.add(clicked_idx)
print(f"โœ… ์„ ํƒ: ๋ฐ•์Šค #{clicked_idx+1}")
current_data['selections'][filename] = list(selected_indices)
# ์ด๋ฏธ์ง€ ๋‹ค์‹œ ๊ทธ๋ฆฌ๊ธฐ
img_path = current_data['images'][current_data['current_idx']]
image = Image.open(img_path)
vis_image = draw_detections(image, detections, list(selected_indices))
info = f"โœ… ๋ฐ•์Šค #{clicked_idx+1} {'์„ ํƒ' if clicked_idx in selected_indices else 'ํ•ด์ œ'}"
return vis_image, info
return image, "โŒ ๋ฐ•์Šค๋ฅผ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค."
def save_and_next():
"""์ €์žฅ ํ›„ ๋‹ค์Œ"""
if current_data['current_idx'] >= len(current_data['images']):
return None, "โœ… ์™„๋ฃŒ!", ""
img_path = current_data['images'][current_data['current_idx']]
filename = os.path.basename(img_path)
# GT ์ €์žฅ
gt = load_existing_ground_truth()
selected_indices = current_data['selections'].get(filename, [])
if selected_indices:
detections = current_data['detections'][filename]
gt[filename] = [
{
'bbox': detections[i]['bbox'],
'folder': current_data['folder']
}
for i in selected_indices
]
save_ground_truth(gt)
print(f"๐Ÿ’พ ์ €์žฅ: {filename} - {len(selected_indices)}๊ฐœ ๋ฐ•์Šค")
else:
print(f"โญ๏ธ ๊ฑด๋„ˆ๋›ฐ๊ธฐ: {filename} - ์„ ํƒ ์—†์Œ")
# ๋‹ค์Œ ์ด๋ฏธ์ง€
current_data['current_idx'] += 1
# ๋‹ค์Œ ๋ฏธ๋ผ๋ฒจ๋ง ์ด๋ฏธ์ง€ ์ฐพ๊ธฐ
while current_data['current_idx'] < len(current_data['images']):
next_filename = os.path.basename(current_data['images'][current_data['current_idx']])
if next_filename not in current_data['selections']:
break
current_data['current_idx'] += 1
if current_data['current_idx'] >= len(current_data['images']):
return None, "โœ… ๋ชจ๋“  ์ด๋ฏธ์ง€ ๋ผ๋ฒจ๋ง ์™„๋ฃŒ!", ""
return show_current_image()
def skip_image():
"""๊ฑด๋„ˆ๋›ฐ๊ธฐ"""
current_data['current_idx'] += 1
if current_data['current_idx'] >= len(current_data['images']):
return None, "โœ… ์™„๋ฃŒ!", ""
return show_current_image()
# ============================================================
# Gradio ์ธํ„ฐํŽ˜์ด์Šค - 2๊ฐœ ํƒญ์œผ๋กœ ํ†ตํ•ฉ
# ============================================================
with gr.Blocks(title="๐Ÿฆ ์ƒˆ์šฐ ๊ฒ€์ถœ ํ†ตํ•ฉ ์‹œ์Šคํ…œ", theme=gr.themes.Soft()) as demo:
gr.Markdown("""
# ๐Ÿฆ ์ƒˆ์šฐ ๊ฒ€์ถœ ํ†ตํ•ฉ ์‹œ์Šคํ…œ
**2๊ฐ€์ง€ ํƒญ์œผ๋กœ ์ƒˆ์šฐ๋ฅผ ์ •ํ™•ํ•˜๊ฒŒ ๊ฒ€์ถœํ•˜๊ณ  ๊ด€๋ฆฌํ•˜์„ธ์š”**
---
""")
# ==================== ์ตœ์ƒ๋‹จ: ๋ชจ๋ธ ์„ ํƒ ====================
with gr.Row():
with gr.Column(scale=3):
# YOLOv8 ์‚ฌ์šฉ ๊ฐ€๋Šฅ ์—ฌ๋ถ€์— ๋”ฐ๋ผ ์„ ํƒ์ง€ ๊ฒฐ์ •
model_choices = ["RT-DETR", "VIDraft/Shrimp"]
default_model = "RT-DETR"
if YOLO_AVAILABLE:
model_choices.append("YOLOv8")
default_model = "YOLOv8"
model_selector = gr.Radio(
choices=model_choices,
value=default_model,
label="๐Ÿค– ๊ฒ€์ถœ ๋ชจ๋ธ ์„ ํƒ",
info="๋ชจ๋“  ํƒญ์— ์ ์šฉ๋ฉ๋‹ˆ๋‹ค"
)
with gr.Column(scale=1):
load_rtdetr_btn = gr.Button("๐Ÿ”„ RT-DETR ๋กœ๋“œ", size="sm", variant="secondary")
rtdetr_status = gr.Textbox(label="๋ชจ๋ธ ์ƒํƒœ", value="โธ๏ธ RT-DETR ๋ฏธ๋กœ๋“œ (VIDraft/Shrimp ํด๋ผ์šฐ๋“œ ๋ชจ๋ธ ์‚ฌ์šฉ ๊ฐ€๋Šฅ)", interactive=False, lines=1)
# RT-DETR ๋กœ๋”ฉ ๋ฒ„ํŠผ ์ด๋ฒคํŠธ
load_rtdetr_btn.click(
load_rtdetr_on_demand,
inputs=[],
outputs=[rtdetr_status]
)
gr.Markdown("---")
with gr.Tabs():
# ==================== ํƒญ 1: ์ž๋™ ๊ฒ€์ถœ ====================
with gr.TabItem("๐Ÿค– ์ž๋™ ๊ฒ€์ถœ & ๊ฒ€์ฆ"):
gr.Markdown("""
### ์‹ค์‹œ๊ฐ„์œผ๋กœ ํŒŒ๋ผ๋ฏธํ„ฐ๋ฅผ ์กฐ์ •ํ•˜๋ฉฐ ๊ฒ€์ถœ ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธ
์ตœ์ ํ™”๋œ ํŒŒ๋ผ๋ฏธํ„ฐ๋กœ ์ƒˆ์šฐ ๊ฒ€์ถœ์„ ํ…Œ์ŠคํŠธํ•˜์„ธ์š”.
""")
with gr.Row():
with gr.Column():
input_image_detect = gr.Image(label="์ž…๋ ฅ ์ด๋ฏธ์ง€", type="pil")
confidence_slider_detect = gr.Slider(
0.01, 1.0, 0.1,
step=0.01,
label="์‹ ๋ขฐ๋„ ์ž„๊ณ„๊ฐ’",
info="RT-DETR: 0.065 | VIDraft/Shrimp: 0.3~0.5 | YOLOv8: 0.1~0.3 ๊ถŒ์žฅ"
)
use_filter_check = gr.Checkbox(
label="๐Ÿ” ํ•„ํ„ฐ ์ ์ˆ˜ ์ž„๊ณ„๊ฐ’ ์‚ฌ์šฉ",
value=False,
info="์ฒดํฌํ•˜๋ฉด ํ•„ํ„ฐ ์ ์ˆ˜ ๊ธฐ์ค€์œผ๋กœ ์ถ”๊ฐ€ ํ•„ํ„ฐ๋ง"
)
filter_slider_detect = gr.Slider(
0, 100, 90,
step=5,
label="ํ•„ํ„ฐ ์ ์ˆ˜ ์ž„๊ณ„๊ฐ’",
info="RT-DETR: Universal Filter | VIDraft/Shrimp: ์‹ ๋ขฐ๋„ ๊ธฐ๋ฐ˜",
visible=True
)
show_all_check = gr.Checkbox(
label="์ „์ฒด ๊ฒ€์ถœ ๊ฒฐ๊ณผ ํ‘œ์‹œ (ํšŒ์ƒ‰)",
value=False
)
detect_btn = gr.Button("๐Ÿš€ ๊ฒ€์ถœ ์‹คํ–‰", variant="primary", size="lg")
# ์˜ˆ์ œ ์ด๋ฏธ์ง€ (๊ฒฐ๊ณผ ํŒŒ์ผ ์ œ์™ธ)
example_images = [
"examples/250818_03.jpg",
"examples/test_shrimp_tank.png",
"examples/250818_05.jpg",
]
# ํŒŒ์ผ์ด ์กด์žฌํ•˜๋Š” ๊ฒƒ๋งŒ ํ•„ํ„ฐ๋ง
example_images = [img for img in example_images if os.path.exists(img)]
if example_images:
gr.Examples(
examples=[[img] for img in example_images],
inputs=[input_image_detect],
label="๐Ÿ“ท ์˜ˆ์ œ ์ด๋ฏธ์ง€"
)
with gr.Column():
output_image_detect = gr.Image(label="๊ฒ€์ถœ ๊ฒฐ๊ณผ")
output_info_detect = gr.Markdown()
detect_btn.click(
interactive_detect,
[input_image_detect, confidence_slider_detect, filter_slider_detect, show_all_check, model_selector, use_filter_check],
[output_image_detect, output_info_detect]
)
# ํ•„ํ„ฐ ์‚ฌ์šฉ ์ฒดํฌ๋ฐ•์Šค์— ๋”ฐ๋ผ ํ•„ํ„ฐ ์Šฌ๋ผ์ด๋” ํ™œ์„ฑํ™”/๋น„ํ™œ์„ฑํ™”
def update_filter_interactivity(use_filter):
return gr.update(interactive=use_filter)
use_filter_check.change(
update_filter_interactivity,
inputs=[use_filter_check],
outputs=[filter_slider_detect]
)
gr.Markdown("""
### ๐Ÿ’ก ์‚ฌ์šฉ ํŒ
- ๋ชจ๋ธ์„ ์„ ํƒํ•˜๊ณ  ์‹ ๋ขฐ๋„๋ฅผ ์กฐ์ •ํ•˜์—ฌ ๊ฒ€์ถœ ๊ฒฐ๊ณผ๋ฅผ ํ™•์ธํ•˜์„ธ์š”
- ๊ฒ€์ถœ์ด ์ ์„ ๋•Œ๋Š” ์‹ ๋ขฐ๋„๋ฅผ ๋‚ฎ์ถ”๊ณ , ์˜ค๊ฒ€์ถœ์ด ๋งŽ์„ ๋•Œ๋Š” ๋†’์ด์„ธ์š”
- ํ•„ํ„ฐ ๊ธฐ๋Šฅ์„ ์‚ฌ์šฉํ•˜์—ฌ ๋” ์ •ํ™•ํ•œ ๊ฒฐ๊ณผ๋ฅผ ์–ป์„ ์ˆ˜ ์žˆ์Šต๋‹ˆ๋‹ค
**๋ฐ•์Šค ์ƒ‰์ƒ:** ๐ŸŸข ๋…น์ƒ‰(๋†’์€ ํ™•๋ฅ ) | ๐ŸŸก ๋…ธ๋ž€์ƒ‰(์ค‘๊ฐ„ ํ™•๋ฅ ) | ๐ŸŸ  ์ฃผํ™ฉ์ƒ‰(๋‚ฎ์€ ํ™•๋ฅ ) | ๐Ÿ”ด ๋นจ๊ฐ„์ƒ‰(์ œ๊ฑฐ๋จ)
""")
# ==================== ํƒญ 2: ๋ผ๋ฒจ๋ง ๋„๊ตฌ ====================
with gr.TabItem("๐Ÿ“ Ground Truth ๋ผ๋ฒจ๋ง"):
gr.Markdown("""
### ์„ ํƒ๋œ ๋ชจ๋ธ์˜ ๊ฒ€์ถœ ๊ฒฐ๊ณผ์—์„œ ์˜ฌ๋ฐ”๋ฅธ ๋ฐ•์Šค๋งŒ ์„ ํƒํ•˜์—ฌ ๋ผ๋ฒจ๋ง
์ด๋ฏธ์ง€๋ฅผ ํด๋ฆญํ•˜์—ฌ ์ƒˆ์šฐ ๋ฐ•์Šค๋ฅผ ์„ ํƒ/ํ•ด์ œํ•˜์„ธ์š”.
""")
with gr.Row():
with gr.Column(scale=1):
folder_dropdown = gr.Dropdown(
choices=get_folders(),
label="๐Ÿ“ ํด๋” ์„ ํƒ",
info="๋ผ๋ฒจ๋งํ•  ํด๋”๋ฅผ ์„ ํƒํ•˜์„ธ์š”"
)
conf_slider_label = gr.Slider(
0.01, 0.5, 0.2,
step=0.05,
label="์‹ ๋ขฐ๋„",
info="๊ฒ€์ถœ ๋ฏผ๊ฐ๋„ ์กฐ์ •"
)
start_btn = gr.Button("โ–ถ๏ธ ๋ผ๋ฒจ๋ง ์‹œ์ž‘", variant="primary", size="lg")
gr.Markdown("---")
next_btn = gr.Button("โญ๏ธ ์ €์žฅ & ๋‹ค์Œ", variant="secondary", size="lg")
skip_btn = gr.Button("โฉ ๊ฑด๋„ˆ๋›ฐ๊ธฐ", size="lg")
labeling_info = gr.Markdown("ํด๋”๋ฅผ ์„ ํƒํ•˜๊ณ  '๋ผ๋ฒจ๋ง ์‹œ์ž‘'์„ ํด๋ฆญํ•˜์„ธ์š”.")
with gr.Column(scale=2):
labeling_image = gr.Image(
label="๐Ÿ–ฑ๏ธ ํด๋ฆญํ•˜์—ฌ ๋ฐ•์Šค ์„ ํƒ/ํ•ด์ œ",
type="pil",
interactive=True
)
labeling_filename = gr.Textbox(visible=False)
click_info = gr.Markdown()
# ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ
start_btn.click(
start_labeling,
[folder_dropdown, conf_slider_label, model_selector],
[labeling_image, labeling_info, labeling_filename]
)
labeling_image.select(
labeling_click,
[labeling_image, labeling_filename],
[labeling_image, click_info]
)
next_btn.click(
save_and_next,
[],
[labeling_image, labeling_info, labeling_filename]
)
skip_btn.click(
skip_image,
[],
[labeling_image, labeling_info, labeling_filename]
)
gr.Markdown("""
### ๐Ÿ–ฑ๏ธ ์‚ฌ์šฉ ๋ฐฉ๋ฒ•
1. **๋ชจ๋ธ ์„ ํƒ** (์ตœ์ƒ๋‹จ์—์„œ ์„ ํƒ)
2. ํด๋” ์„ ํƒ ํ›„ "๋ผ๋ฒจ๋ง ์‹œ์ž‘"
3. ์ด๋ฏธ์ง€์—์„œ **์›ํ˜• ๋ฒ„ํŠผ ํด๋ฆญ** ๋˜๋Š” **๋ฐ•์Šค ์˜์—ญ ํด๋ฆญ**์œผ๋กœ ์„ ํƒ/ํ•ด์ œ
4. "์ €์žฅ & ๋‹ค์Œ"์œผ๋กœ ๋‹ค์Œ ์ด๋ฏธ์ง€๋กœ ์ด๋™ (์ž๋™ ์ €์žฅ)
5. "๊ฑด๋„ˆ๋›ฐ๊ธฐ"๋กœ ์„ ํƒ ์—†์ด ๋‹ค์Œ ์ด๋ฏธ์ง€๋กœ
**๐Ÿ’พ ์ €์žฅ ์œ„์น˜:** `ground_truth.json` (์ž๋™ ๋ฐฑ์—…: `backups/`)
""")
gr.Markdown("""
---
### ๐Ÿค– ๋ชจ๋ธ ์„ค๋ช…
- **RT-DETR**: ๋กœ์ปฌ ๋ชจ๋ธ, ๋น ๋ฅธ ์ถ”๋ก  ์†๋„, ์˜คํ”„๋ผ์ธ ์‚ฌ์šฉ ๊ฐ€๋Šฅ
- **VIDraft/Shrimp**: ํด๋ผ์šฐ๋“œ ๋ชจ๋ธ, ์ธํ„ฐ๋„ท ์—ฐ๊ฒฐ ํ•„์š”
- **YOLOv8**: ๋กœ์ปฌ ์ปค์Šคํ…€ ํ•™์Šต ๋ชจ๋ธ, ๋น ๋ฅธ ์ถ”๋ก  ์†๋„
---
ยฉ 2025 VIDraft. All rights reserved.
""")
if __name__ == "__main__":
print("\n" + "="*60)
print("๐Ÿฆ ์ƒˆ์šฐ ๊ฒ€์ถœ ํ†ตํ•ฉ ์‹œ์Šคํ…œ v2.2 ์‹œ์ž‘")
print("="*60)
print("๐Ÿค– ์‚ฌ์šฉ ๊ฐ€๋Šฅํ•œ ๋ชจ๋ธ:")
print(" 1. RT-DETR (๋กœ์ปฌ)")
print(" 2. VIDraft/Shrimp (ํด๋ผ์šฐ๋“œ)")
print(" 3. YOLOv8 (๋กœ์ปฌ ํ•™์Šต) โญ ๊ธฐ๋ณธ๊ฐ’")
print(f"\n๐Ÿ“ฆ YOLOv8 ๋ชจ๋ธ: {YOLO_MODEL_PATH}")
print("="*60)
demo.launch(
server_name="0.0.0.0",
server_port=None, # ์ž๋™์œผ๋กœ ๋นˆ ํฌํŠธ ์ฐพ๊ธฐ
share=False
)