|
|
import io |
|
|
import os |
|
|
import zipfile |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
from PIL import Image, ImageDraw, ImageFont, ImageEnhance |
|
|
import gradio as gr |
|
|
from ultralytics import YOLO |
|
|
import torch |
|
|
from typing import List, Dict, Tuple |
|
|
import json |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CLASSES_PATH = Path("model/classes.txt") |
|
|
MODEL_PATH = Path("model/best.pt") |
|
|
|
|
|
|
|
|
if not MODEL_PATH.exists(): |
|
|
raise FileNotFoundError( |
|
|
f"β Model not found at {MODEL_PATH}.\n" |
|
|
f"Please ensure your directory structure is:\n" |
|
|
f" model/\n" |
|
|
f" βββ best.pt\n" |
|
|
f" βββ classes.txt" |
|
|
) |
|
|
if not CLASSES_PATH.exists(): |
|
|
raise FileNotFoundError( |
|
|
f"β Classes file not found at {CLASSES_PATH}.\n" |
|
|
f"Please ensure 'classes.txt' exists in the model/ directory." |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_classes(path): |
|
|
with open(path, "r", encoding="utf-8") as f: |
|
|
return [line.strip() for line in f.readlines()] |
|
|
|
|
|
|
|
|
CLASS_NAMES = load_classes(CLASSES_PATH) |
|
|
print(f"β
Loaded {len(CLASS_NAMES)} classes: {', '.join(CLASS_NAMES)}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_font(size=20): |
|
|
"""Try multiple font sources for cross-platform compatibility""" |
|
|
font_options = [ |
|
|
"arial.ttf", |
|
|
"Arial.ttf", |
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans-Bold.ttf", |
|
|
"/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf", |
|
|
"/System/Library/Fonts/Helvetica.ttc", |
|
|
"C:\\Windows\\Fonts\\arial.ttf", |
|
|
"C:\\Windows\\Fonts\\arialbd.ttf", |
|
|
] |
|
|
|
|
|
for font_path in font_options: |
|
|
try: |
|
|
return ImageFont.truetype(font_path, size) |
|
|
except: |
|
|
continue |
|
|
|
|
|
return ImageFont.load_default() |
|
|
|
|
|
|
|
|
FONT = get_font(24) |
|
|
FONT_SMALL = get_font(18) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"π Loading model from {MODEL_PATH}...") |
|
|
device = 'cuda' if torch.cuda.is_available() else 'cpu' |
|
|
print(f"π₯οΈ Using device: {device}") |
|
|
|
|
|
model = YOLO(str(MODEL_PATH)) |
|
|
model.model.eval() |
|
|
if device == 'cuda': |
|
|
model.model.half() |
|
|
print(f"β
Model loaded successfully!") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_color_palette(num_classes): |
|
|
"""Generate distinct colors for each class""" |
|
|
np.random.seed(42) |
|
|
colors = [] |
|
|
for i in range(num_classes): |
|
|
|
|
|
hue = int(360 * i / num_classes) |
|
|
saturation = 200 + np.random.randint(0, 55) |
|
|
value = 180 + np.random.randint(0, 75) |
|
|
|
|
|
|
|
|
import colorsys |
|
|
r, g, b = colorsys.hsv_to_rgb(hue / 360, saturation / 255, value / 255) |
|
|
colors.append((int(r * 255), int(g * 255), int(b * 255))) |
|
|
|
|
|
return colors |
|
|
|
|
|
|
|
|
COLOR_PALETTE = get_color_palette(len(CLASS_NAMES)) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def preprocess_image(image: Image.Image, enhance: bool = True) -> Image.Image: |
|
|
""" |
|
|
Enhance image quality for better detection |
|
|
""" |
|
|
if not enhance: |
|
|
return image |
|
|
|
|
|
|
|
|
if image.mode != 'RGB': |
|
|
image = image.convert('RGB') |
|
|
|
|
|
|
|
|
enhancer = ImageEnhance.Contrast(image) |
|
|
image = enhancer.enhance(1.15) |
|
|
|
|
|
|
|
|
enhancer = ImageEnhance.Sharpness(image) |
|
|
image = enhancer.enhance(1.2) |
|
|
|
|
|
|
|
|
enhancer = ImageEnhance.Brightness(image) |
|
|
image = enhancer.enhance(1.05) |
|
|
|
|
|
return image |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def compute_iou(box1: List[int], box2: List[int]) -> float: |
|
|
"""Compute Intersection over Union between two boxes""" |
|
|
x1 = max(box1[0], box2[0]) |
|
|
y1 = max(box1[1], box2[1]) |
|
|
x2 = min(box1[2], box2[2]) |
|
|
y2 = min(box1[3], box2[3]) |
|
|
|
|
|
intersection = max(0, x2 - x1) * max(0, y2 - y1) |
|
|
|
|
|
area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) |
|
|
area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) |
|
|
union = area1 + area2 - intersection |
|
|
|
|
|
return intersection / union if union > 0 else 0 |
|
|
|
|
|
|
|
|
def compute_box_area(box: List[int]) -> int: |
|
|
"""Compute area of a box""" |
|
|
return (box[2] - box[0]) * (box[3] - box[1]) |
|
|
|
|
|
|
|
|
def is_box_inside(box1: List[int], box2: List[int], threshold: float = 0.95) -> bool: |
|
|
"""Check if box1 is inside box2""" |
|
|
x1, y1, x2, y2 = box1 |
|
|
bx1, by1, bx2, by2 = box2 |
|
|
|
|
|
|
|
|
if x1 >= bx1 and y1 >= by1 and x2 <= bx2 and y2 <= by2: |
|
|
area1 = compute_box_area(box1) |
|
|
area2 = compute_box_area(box2) |
|
|
|
|
|
|
|
|
if area1 < area2 * threshold: |
|
|
return True |
|
|
|
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def remove_nested_boxes(boxes: List[Dict], containment_threshold: float = 0.85) -> List[Dict]: |
|
|
""" |
|
|
Remove boxes that are nested inside other boxes of different classes |
|
|
Keeps the higher confidence detection |
|
|
""" |
|
|
if len(boxes) <= 1: |
|
|
return boxes |
|
|
|
|
|
|
|
|
boxes = sorted(boxes, key=lambda x: x['conf'], reverse=True) |
|
|
keep = [] |
|
|
|
|
|
for box1 in boxes: |
|
|
is_nested = False |
|
|
|
|
|
|
|
|
for box2 in keep: |
|
|
|
|
|
if box1['cls'] != box2['cls']: |
|
|
|
|
|
if is_box_inside(box1['xyxy'], box2['xyxy'], containment_threshold): |
|
|
is_nested = True |
|
|
break |
|
|
|
|
|
|
|
|
iou = compute_iou(box1['xyxy'], box2['xyxy']) |
|
|
if iou > 0.85: |
|
|
is_nested = True |
|
|
break |
|
|
|
|
|
if not is_nested: |
|
|
keep.append(box1) |
|
|
|
|
|
return keep |
|
|
|
|
|
|
|
|
def non_max_suppression_custom(boxes: List[Dict], iou_threshold: float) -> List[Dict]: |
|
|
""" |
|
|
Improved NMS with better handling of overlapping detections |
|
|
""" |
|
|
if not boxes: |
|
|
return [] |
|
|
|
|
|
|
|
|
boxes = sorted(boxes, key=lambda x: x['conf'], reverse=True) |
|
|
keep = [] |
|
|
|
|
|
while boxes: |
|
|
best = boxes[0] |
|
|
keep.append(best) |
|
|
boxes = boxes[1:] |
|
|
|
|
|
filtered = [] |
|
|
for box in boxes: |
|
|
iou = compute_iou(best['xyxy'], box['xyxy']) |
|
|
|
|
|
|
|
|
if best['cls'] == box['cls']: |
|
|
if iou < iou_threshold: |
|
|
filtered.append(box) |
|
|
|
|
|
else: |
|
|
if iou < 0.80: |
|
|
filtered.append(box) |
|
|
|
|
|
boxes = filtered |
|
|
|
|
|
return keep |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def advanced_inference( |
|
|
img: Image.Image, |
|
|
conf: float, |
|
|
iou: float, |
|
|
img_size: int, |
|
|
use_tta: bool, |
|
|
use_ensemble: bool, |
|
|
enhance_img: bool |
|
|
) -> List[Dict]: |
|
|
""" |
|
|
Enhanced inference with better false positive suppression |
|
|
""" |
|
|
|
|
|
img = preprocess_image(img, enhance=enhance_img) |
|
|
|
|
|
all_predictions = [] |
|
|
|
|
|
|
|
|
results = model.predict( |
|
|
img, |
|
|
conf=conf, |
|
|
iou=iou, |
|
|
verbose=False, |
|
|
augment=use_tta, |
|
|
imgsz=img_size, |
|
|
half=(device == 'cuda'), |
|
|
device=device, |
|
|
max_det=150, |
|
|
agnostic_nms=True |
|
|
)[0] |
|
|
all_predictions.append(results) |
|
|
|
|
|
|
|
|
if use_ensemble: |
|
|
scales = [img_size - 64, img_size + |
|
|
64] if img_size >= 704 else [img_size] |
|
|
for scale in scales: |
|
|
results_scaled = model.predict( |
|
|
img, |
|
|
conf=conf * 1.1, |
|
|
iou=iou, |
|
|
verbose=False, |
|
|
augment=False, |
|
|
imgsz=scale, |
|
|
half=(device == 'cuda'), |
|
|
device=device, |
|
|
max_det=150, |
|
|
agnostic_nms=True |
|
|
)[0] |
|
|
all_predictions.append(results_scaled) |
|
|
|
|
|
|
|
|
merged_boxes = merge_predictions( |
|
|
all_predictions, iou_threshold=iou, conf_threshold=conf) |
|
|
|
|
|
|
|
|
merged_boxes = remove_nested_boxes( |
|
|
merged_boxes, containment_threshold=0.85) |
|
|
|
|
|
return merged_boxes |
|
|
|
|
|
|
|
|
def merge_predictions(predictions: List, iou_threshold: float, conf_threshold: float) -> List[Dict]: |
|
|
""" |
|
|
Merge multiple predictions using improved NMS |
|
|
""" |
|
|
if len(predictions) == 1: |
|
|
boxes = yolo_to_boxes(predictions[0]) |
|
|
return [b for b in boxes if b['conf'] >= conf_threshold] |
|
|
|
|
|
all_boxes = [] |
|
|
for pred in predictions: |
|
|
boxes = yolo_to_boxes(pred) |
|
|
all_boxes.extend(boxes) |
|
|
|
|
|
if not all_boxes: |
|
|
return [] |
|
|
|
|
|
|
|
|
class_boxes = {} |
|
|
for box in all_boxes: |
|
|
cls = box['cls'] |
|
|
if cls not in class_boxes: |
|
|
class_boxes[cls] = [] |
|
|
class_boxes[cls].append(box) |
|
|
|
|
|
|
|
|
final_boxes = [] |
|
|
for cls, boxes in class_boxes.items(): |
|
|
nms_boxes = non_max_suppression_custom(boxes, iou_threshold) |
|
|
final_boxes.extend( |
|
|
[b for b in nms_boxes if b['conf'] >= conf_threshold]) |
|
|
|
|
|
return final_boxes |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def yolo_to_boxes(res): |
|
|
boxes = [] |
|
|
for r in res.boxes: |
|
|
x1, y1, x2, y2 = r.xyxy[0].tolist() |
|
|
boxes.append({ |
|
|
"cls": int(r.cls[0]), |
|
|
"conf": float(r.conf[0]), |
|
|
"xyxy": [int(x1), int(y1), int(x2), int(y2)] |
|
|
}) |
|
|
return boxes |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def draw_boxes(image, boxes, show_conf=True, box_thickness=3): |
|
|
"""Enhanced visualization with better styling""" |
|
|
img = image.convert("RGBA") |
|
|
overlay = Image.new("RGBA", img.size, (0, 0, 0, 0)) |
|
|
d = ImageDraw.Draw(overlay) |
|
|
|
|
|
for b in boxes: |
|
|
cls_idx = b["cls"] |
|
|
cls = CLASS_NAMES[cls_idx] |
|
|
conf = b['conf'] |
|
|
x1, y1, x2, y2 = b["xyxy"] |
|
|
color = COLOR_PALETTE[cls_idx] |
|
|
|
|
|
|
|
|
thickness = max(2, int(box_thickness * (0.6 + conf * 0.4))) |
|
|
|
|
|
|
|
|
d.rectangle([x1, y1, x2, y2], outline=color + (255,), width=thickness) |
|
|
|
|
|
|
|
|
corner_len = 20 |
|
|
d.line([x1, y1, x1 + corner_len, y1], |
|
|
fill=color + (255,), width=thickness + 1) |
|
|
d.line([x1, y1, x1, y1 + corner_len], |
|
|
fill=color + (255,), width=thickness + 1) |
|
|
d.line([x2, y1, x2 - corner_len, y1], |
|
|
fill=color + (255,), width=thickness + 1) |
|
|
d.line([x2, y1, x2, y1 + corner_len], |
|
|
fill=color + (255,), width=thickness + 1) |
|
|
|
|
|
|
|
|
if show_conf: |
|
|
label = f"{cls} {conf:.0%}" |
|
|
else: |
|
|
label = cls |
|
|
|
|
|
|
|
|
bbox = FONT.getbbox(label) |
|
|
text_w = bbox[2] - bbox[0] |
|
|
text_h = bbox[3] - bbox[1] |
|
|
|
|
|
|
|
|
padding = 8 |
|
|
if y1 - text_h - padding * 2 >= 0: |
|
|
label_y = y1 - text_h - padding * 2 |
|
|
label_pos = "top" |
|
|
else: |
|
|
label_y = y2 |
|
|
label_pos = "bottom" |
|
|
|
|
|
|
|
|
bg_coords = [x1, label_y, x1 + text_w + |
|
|
padding * 2, label_y + text_h + padding * 2] |
|
|
d.rectangle(bg_coords, fill=color + (240,)) |
|
|
|
|
|
|
|
|
d.rectangle(bg_coords, outline=color + (255,), width=2) |
|
|
|
|
|
|
|
|
shadow_offset = 2 |
|
|
d.text( |
|
|
(x1 + padding + shadow_offset, label_y + padding + shadow_offset), |
|
|
label, |
|
|
fill=(0, 0, 0, 120), |
|
|
font=FONT |
|
|
) |
|
|
d.text( |
|
|
(x1 + padding, label_y + padding), |
|
|
label, |
|
|
fill="white", |
|
|
font=FONT |
|
|
) |
|
|
|
|
|
return Image.alpha_composite(img, overlay).convert("RGB") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_single( |
|
|
input_image, |
|
|
conf, |
|
|
iou, |
|
|
use_tta, |
|
|
img_size, |
|
|
use_ensemble, |
|
|
enhance_img, |
|
|
show_conf, |
|
|
box_thickness |
|
|
): |
|
|
if input_image is None: |
|
|
return None, [], "β οΈ Please upload an image first" |
|
|
|
|
|
img = Image.fromarray(input_image).convert("RGB") |
|
|
|
|
|
|
|
|
boxes = advanced_inference( |
|
|
img, |
|
|
conf=conf, |
|
|
iou=iou, |
|
|
img_size=img_size, |
|
|
use_tta=use_tta, |
|
|
use_ensemble=use_ensemble, |
|
|
enhance_img=enhance_img |
|
|
) |
|
|
|
|
|
if not boxes: |
|
|
return img, [], f"βΉοΈ No objects detected with confidence β₯ {conf:.0%}. Try lowering the confidence threshold or enabling TTA/Ensemble modes." |
|
|
|
|
|
out_img = draw_boxes(img, boxes, show_conf=show_conf, |
|
|
box_thickness=box_thickness) |
|
|
|
|
|
|
|
|
det_table = [ |
|
|
[ |
|
|
CLASS_NAMES[b["cls"]], |
|
|
f"{b['conf']:.2%}", |
|
|
f"({b['xyxy'][0]}, {b['xyxy'][1]})", |
|
|
f"({b['xyxy'][2]}, {b['xyxy'][3]})", |
|
|
f"{compute_box_area(b['xyxy']):,}" |
|
|
] |
|
|
for b in sorted(boxes, key=lambda x: x['conf'], reverse=True) |
|
|
] |
|
|
|
|
|
|
|
|
counts = {} |
|
|
conf_by_class = {} |
|
|
for b in boxes: |
|
|
cls = CLASS_NAMES[b["cls"]] |
|
|
counts[cls] = counts.get(cls, 0) + 1 |
|
|
if cls not in conf_by_class: |
|
|
conf_by_class[cls] = [] |
|
|
conf_by_class[cls].append(b['conf']) |
|
|
|
|
|
|
|
|
avg_conf = np.mean([b['conf'] for b in boxes]) |
|
|
summary = f"### π― Detection Summary\n\n" |
|
|
summary += f"**Total Objects Detected:** {len(boxes)}\n" |
|
|
summary += f"**Average Confidence:** {avg_conf:.1%}" |
|
|
|
|
|
|
|
|
if avg_conf >= 0.70: |
|
|
summary += " β
(High Quality)\n" |
|
|
elif avg_conf >= 0.50: |
|
|
summary += " β οΈ (Medium Quality - verify results)\n" |
|
|
else: |
|
|
summary += " β οΈ (Low Quality - may contain false positives)\n" |
|
|
|
|
|
summary += "\n**Breakdown by Class:**\n" |
|
|
for cls, count in sorted(counts.items(), key=lambda x: x[1], reverse=True): |
|
|
avg_cls_conf = np.mean(conf_by_class[cls]) |
|
|
quality_icon = "β
" if avg_cls_conf >= 0.60 else "β οΈ" if avg_cls_conf >= 0.40 else "β" |
|
|
summary += f"- {quality_icon} **{cls}**: {count} object{'s' if count > 1 else ''} (avg conf: {avg_cls_conf:.1%})\n" |
|
|
|
|
|
|
|
|
warnings = [] |
|
|
if avg_conf < 0.35: |
|
|
warnings.append( |
|
|
f"β οΈ Very low average confidence. Consider increasing threshold to {min(0.50, conf + 0.15):.2f}") |
|
|
if len(boxes) > 15: |
|
|
warnings.append( |
|
|
"β οΈ Many detections found. Consider increasing confidence threshold to reduce false positives") |
|
|
|
|
|
|
|
|
low_conf_count = sum(1 for b in boxes if b['conf'] < 0.40) |
|
|
if low_conf_count > len(boxes) * 0.5 and len(set(b['cls'] for b in boxes)) > 3: |
|
|
warnings.append( |
|
|
"β οΈ Multiple low-confidence detections across different classes detected") |
|
|
warnings.append( |
|
|
f"π‘ Recommended: Increase confidence to {min(0.50, conf + 0.20):.2f}") |
|
|
|
|
|
if warnings: |
|
|
summary += "\n**β οΈ Recommendations:**\n" |
|
|
for warning in warnings: |
|
|
summary += f"- {warning}\n" |
|
|
|
|
|
|
|
|
summary += f"\n**Inference Configuration:**\n" |
|
|
summary += f"- Test-Time Augmentation: {'β
Enabled' if use_tta else 'β Disabled'}\n" |
|
|
summary += f"- Multi-Scale Ensemble: {'β
Enabled' if use_ensemble else 'β Disabled'}\n" |
|
|
summary += f"- Image Enhancement: {'β
Enabled' if enhance_img else 'β Disabled'}\n" |
|
|
summary += f"- Input Image Size: {img_size}px\n" |
|
|
summary += f"- Confidence Threshold: {conf:.1%}\n" |
|
|
summary += f"- IoU Threshold: {iou:.1%}\n" |
|
|
|
|
|
return out_img, det_table, summary |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def predict_batch(files, conf, iou, use_tta, img_size, use_ensemble, enhance_img): |
|
|
if not files: |
|
|
return {"message": "β οΈ No files uploaded"}, None |
|
|
|
|
|
tmp = Path("pred_tmp") |
|
|
tmp.mkdir(exist_ok=True) |
|
|
|
|
|
meta = [] |
|
|
output_paths = [] |
|
|
total_detections = 0 |
|
|
all_class_counts = {} |
|
|
failed_images = [] |
|
|
|
|
|
for idx, f in enumerate(files, 1): |
|
|
try: |
|
|
img = Image.open(f).convert("RGB") |
|
|
|
|
|
boxes = advanced_inference( |
|
|
img, |
|
|
conf=conf, |
|
|
iou=iou, |
|
|
img_size=img_size, |
|
|
use_tta=use_tta, |
|
|
use_ensemble=use_ensemble, |
|
|
enhance_img=enhance_img |
|
|
) |
|
|
|
|
|
out_img = draw_boxes(img, boxes) |
|
|
|
|
|
out_path = tmp / f"pred_{Path(f).name}" |
|
|
out_img.save(out_path, quality=95, optimize=True) |
|
|
output_paths.append(out_path) |
|
|
|
|
|
counts = {} |
|
|
for b in boxes: |
|
|
cls = CLASS_NAMES[b["cls"]] |
|
|
counts[cls] = counts.get(cls, 0) + 1 |
|
|
all_class_counts[cls] = all_class_counts.get(cls, 0) + 1 |
|
|
|
|
|
total_detections += len(boxes) |
|
|
|
|
|
meta.append({ |
|
|
"image": Path(f).name, |
|
|
"detections": len(boxes), |
|
|
"avg_confidence": f"{np.mean([b['conf'] for b in boxes]):.1%}" if boxes else "N/A", |
|
|
"objects": counts, |
|
|
"status": "β
Success" |
|
|
}) |
|
|
|
|
|
print( |
|
|
f"β
[{idx}/{len(files)}] {Path(f).name} - {len(boxes)} objects detected") |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = str(e) |
|
|
print( |
|
|
f"β [{idx}/{len(files)}] Error processing {Path(f).name}: {error_msg}") |
|
|
failed_images.append(Path(f).name) |
|
|
meta.append({ |
|
|
"image": Path(f).name, |
|
|
"status": "β Failed", |
|
|
"error": error_msg |
|
|
}) |
|
|
|
|
|
|
|
|
zip_path = None |
|
|
if output_paths: |
|
|
zip_path = tmp / "predictions.zip" |
|
|
with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED, compresslevel=6) as z: |
|
|
for p in output_paths: |
|
|
z.write(p, arcname=p.name) |
|
|
|
|
|
|
|
|
summary = { |
|
|
"π Processing Summary": { |
|
|
"Total Images": len(files), |
|
|
"β
Successful": len(output_paths), |
|
|
"β Failed": len(failed_images), |
|
|
"Success Rate": f"{(len(output_paths) / len(files) * 100):.1f}%" |
|
|
}, |
|
|
"π― Detection Summary": { |
|
|
"Total Detections": total_detections, |
|
|
"Avg Detections/Image": f"{total_detections / len(output_paths):.1f}" if output_paths else "0", |
|
|
"Images with Detections": sum(1 for m in meta if m.get('detections', 0) > 0) |
|
|
}, |
|
|
"π¦ Class Distribution": all_class_counts, |
|
|
"πΌοΈ Detailed Results": meta |
|
|
} |
|
|
|
|
|
if failed_images: |
|
|
summary["β Failed Images"] = failed_images |
|
|
|
|
|
return summary, str(zip_path) if zip_path else None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
css = """ |
|
|
.gradio-container { |
|
|
font-family: 'Inter', -apple-system, BlinkMacSystemFont, sans-serif; |
|
|
max-width: 1600px; |
|
|
margin: auto; |
|
|
} |
|
|
.primary-btn { |
|
|
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a6f 100%) !important; |
|
|
border: none !important; |
|
|
color: white !important; |
|
|
font-weight: 600 !important; |
|
|
transition: all 0.3s ease !important; |
|
|
padding: 12px 24px !important; |
|
|
} |
|
|
.primary-btn:hover { |
|
|
transform: translateY(-2px) !important; |
|
|
box-shadow: 0 12px 24px rgba(255, 107, 107, 0.4) !important; |
|
|
} |
|
|
.stats-box { |
|
|
background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); |
|
|
padding: 20px; |
|
|
border-radius: 12px; |
|
|
margin: 10px 0; |
|
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
|
} |
|
|
.accuracy-badge { |
|
|
display: inline-block; |
|
|
background: linear-gradient(135deg, #10b981 0%, #059669 100%); |
|
|
color: white; |
|
|
padding: 6px 16px; |
|
|
border-radius: 20px; |
|
|
font-weight: bold; |
|
|
font-size: 14px; |
|
|
box-shadow: 0 2px 4px rgba(16, 185, 129, 0.3); |
|
|
} |
|
|
.hackathon-badge { |
|
|
display: inline-block; |
|
|
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a6f 100%); |
|
|
color: white; |
|
|
padding: 8px 20px; |
|
|
border-radius: 25px; |
|
|
font-weight: bold; |
|
|
font-size: 16px; |
|
|
margin: 10px 5px; |
|
|
box-shadow: 0 4px 8px rgba(255, 107, 107, 0.3); |
|
|
} |
|
|
.header-gradient { |
|
|
background: linear-gradient(135deg, #ff6b6b 0%, #ee5a6f 100%); |
|
|
-webkit-background-clip: text; |
|
|
-webkit-text-fill-color: transparent; |
|
|
background-clip: text; |
|
|
} |
|
|
.paranox-banner { |
|
|
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 50%, #0f3460 100%); |
|
|
color: white; |
|
|
padding: 25px; |
|
|
border-radius: 15px; |
|
|
text-align: center; |
|
|
margin-bottom: 20px; |
|
|
box-shadow: 0 8px 16px rgba(0, 0, 0, 0.3); |
|
|
} |
|
|
""" |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft(), css=css, title="TechXNinjas | PARANOX 2.0 - Safety Detector") as demo: |
|
|
|
|
|
gr.HTML(""" |
|
|
<div class="paranox-banner"> |
|
|
<h1 style="font-size: 3em; margin: 0; font-weight: bold; color: white;">β‘ TechXNinjas</h1> |
|
|
<div style="margin: 15px 0;"> |
|
|
<span class="hackathon-badge">PARANOX 2.0</span> |
|
|
</div> |
|
|
<p style="font-size: 1.3em; margin: 10px 0; opacity: 0.9; color: #f0f0f0;"> |
|
|
24-Hour National Innovation Hackathon | 3-Month Journey: Build β Pitch β Prototype |
|
|
</p> |
|
|
<p style="font-size: 1em; margin: 5px 0; opacity: 0.7; color: #f0f0f0;"> |
|
|
π Where Students Transform Ideas Into Reality |
|
|
</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
gr.Markdown(""" |
|
|
<div style="text-align: center; padding: 30px 20px; background: linear-gradient(135deg, #ff6b6b15 0%, #ee5a6f15 100%); border-radius: 15px; margin-bottom: 20px;"> |
|
|
<h1 style="font-size: 2.5em; margin-bottom: 10px;">π‘οΈ AI Safety Object Detector</h1> |
|
|
<p style="font-size: 1.2em; color: #555; margin: 10px 0;"> |
|
|
<span class="accuracy-badge">MAXIMUM ACCURACY MODE</span><br> |
|
|
<span style="margin-top: 10px; display: inline-block;">Advanced YOLOv8 with Enhanced NMS & False Positive Suppression</span> |
|
|
</p> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
with gr.Row(): |
|
|
|
|
|
with gr.Column(scale=2): |
|
|
gr.Markdown("### πΈ Single Image Detection") |
|
|
img_input = gr.Image( |
|
|
type="numpy", |
|
|
label="Upload Image for Detection", |
|
|
height=400, |
|
|
interactive=True |
|
|
) |
|
|
|
|
|
with gr.Accordion("βοΈ Detection Settings", open=True): |
|
|
gr.Markdown("**Core Parameters** - Adjust for optimal results") |
|
|
with gr.Row(): |
|
|
conf = gr.Slider( |
|
|
0.05, 0.95, 0.25, step=0.05, |
|
|
label="π― Confidence Threshold", |
|
|
info="Higher = fewer but more accurate detections (recommended: 0.25-0.45)" |
|
|
) |
|
|
iou = gr.Slider( |
|
|
0.10, 0.95, 0.45, step=0.05, |
|
|
label="π¦ IoU Threshold", |
|
|
info="Higher = less overlap filtering (recommended: 0.45-0.55)" |
|
|
) |
|
|
|
|
|
with gr.Accordion("π¬ Advanced Accuracy Boosters", open=True): |
|
|
gr.Markdown( |
|
|
"**Performance Enhancers** - Enable for maximum accuracy") |
|
|
with gr.Row(): |
|
|
use_tta = gr.Checkbox( |
|
|
value=True, |
|
|
label="β¨ Test-Time Augmentation (TTA)", |
|
|
info="Multiple augmented predictions (+3-7% mAP, slower)" |
|
|
) |
|
|
use_ensemble = gr.Checkbox( |
|
|
value=False, |
|
|
label="π Multi-Scale Ensemble", |
|
|
info="Multiple image sizes (+2-5% mAP, much slower)" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
enhance_img = gr.Checkbox( |
|
|
value=True, |
|
|
label="π¨ Image Enhancement", |
|
|
info="Auto contrast, sharpness & brightness boost" |
|
|
) |
|
|
img_size = gr.Dropdown( |
|
|
choices=[640, 800, 1024, 1280], |
|
|
value=640, |
|
|
label="π Input Image Size", |
|
|
info="Higher = better for small objects (slower)" |
|
|
) |
|
|
|
|
|
with gr.Accordion("π¨ Visualization Options", open=False): |
|
|
with gr.Row(): |
|
|
show_conf = gr.Checkbox( |
|
|
value=True, |
|
|
label="π Show Confidence Scores", |
|
|
info="Display confidence percentages in labels" |
|
|
) |
|
|
box_thickness = gr.Slider( |
|
|
1, 8, 3, step=1, |
|
|
label="π Bounding Box Thickness", |
|
|
info="Visual thickness of detection boxes" |
|
|
) |
|
|
|
|
|
detect_btn = gr.Button( |
|
|
"π Detect Objects (High Accuracy Mode)", |
|
|
variant="primary", |
|
|
size="lg", |
|
|
elem_classes="primary-btn" |
|
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### π Batch Processing Mode") |
|
|
batch_input = gr.File( |
|
|
file_count="multiple", |
|
|
label="Upload Multiple Images (JPG, PNG)", |
|
|
file_types=["image"], |
|
|
height=120 |
|
|
) |
|
|
gr.Markdown( |
|
|
"*π‘ Tip: Upload multiple images to process them all at once and download as ZIP*") |
|
|
|
|
|
|
|
|
with gr.Column(scale=3): |
|
|
gr.Markdown("### π¨ Detection Results") |
|
|
out_img = gr.Image( |
|
|
type="pil", |
|
|
label="Annotated Image with Detections", |
|
|
height=400, |
|
|
show_label=True |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
out_counts = gr.Markdown( |
|
|
value="π€ Upload an image to start detecting objects", |
|
|
elem_classes="stats-box" |
|
|
) |
|
|
|
|
|
with gr.Accordion("π Detailed Detection Table", open=True): |
|
|
out_table = gr.Dataframe( |
|
|
headers=["Class", "Confidence", |
|
|
"Top-Left (x,y)", "Bottom-Right (x,y)", "Area (pxΒ²)"], |
|
|
label="All Detections Sorted by Confidence", |
|
|
row_count=10, |
|
|
wrap=True |
|
|
) |
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### π¦ Batch Processing Results") |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
batch_meta = gr.JSON( |
|
|
label="π Batch Statistics & Details", show_label=True) |
|
|
with gr.Column(): |
|
|
batch_zip = gr.File( |
|
|
label="π₯ Download All Predictions (ZIP)", show_label=True) |
|
|
|
|
|
|
|
|
with gr.Accordion("π‘ Configuration Guide - Get Best Results", open=False): |
|
|
gr.Markdown(""" |
|
|
## π― Recommended Settings by Use Case |
|
|
|
|
|
### π MAXIMUM ACCURACY (Best for Critical Applications) |
|
|
Perfect for: Safety inspections, compliance checks, detailed analysis |
|
|
|
|
|
| Parameter | Value | Why? | |
|
|
|-----------|-------|------| |
|
|
| Confidence | `0.35-0.45` | Filters out most false positives while keeping real objects | |
|
|
| IoU | `0.45-0.55` | Good balance for overlapping objects | |
|
|
| TTA | β
**Enabled** | +3-7% accuracy through augmentation | |
|
|
| Ensemble | β
**Enabled** | +2-5% accuracy through multi-scale detection | |
|
|
| Enhancement | β
**Enabled** | Improves detection on low-quality images | |
|
|
| Image Size | `800-1024px` | Better for small and distant objects | |
|
|
|
|
|
**Expected Performance:** Best accuracy, ~5-10 seconds per image |
|
|
|
|
|
--- |
|
|
|
|
|
### β‘ BALANCED MODE (Speed + Accuracy) |
|
|
Perfect for: General use, moderate batch processing |
|
|
|
|
|
| Parameter | Value | Why? | |
|
|
|-----------|-------|------| |
|
|
| Confidence | `0.30-0.40` | Good detection rate with acceptable false positives | |
|
|
| IoU | `0.45-0.50` | Standard NMS threshold | |
|
|
| TTA | β
**Enabled** | Worth the small speed cost | |
|
|
| Ensemble | β **Disabled** | Too slow for marginal gains | |
|
|
| Enhancement | β
**Enabled** | Fast and helpful | |
|
|
| Image Size | `640px` | Fast and sufficient for most cases | |
|
|
|
|
|
**Expected Performance:** Good accuracy, ~2-3 seconds per image |
|
|
|
|
|
--- |
|
|
|
|
|
### π SPEED MODE (Real-time/Batch) |
|
|
Perfect for: Large batches, real-time monitoring, quick scans |
|
|
|
|
|
| Parameter | Value | Why? | |
|
|
|-----------|-------|------| |
|
|
| Confidence | `0.40-0.55` | Higher threshold = fewer detections but faster | |
|
|
| IoU | `0.50-0.60` | Standard NMS, less computation | |
|
|
| TTA | β **Disabled** | Too slow for speed mode | |
|
|
| Ensemble | β **Disabled** | Significantly slower | |
|
|
| Enhancement | β **Disabled** | Save preprocessing time | |
|
|
| Image Size | `640px` | Fastest inference size | |
|
|
|
|
|
**Expected Performance:** Fast, ~0.5-1 second per image |
|
|
|
|
|
--- |
|
|
|
|
|
## π Understanding Each Parameter |
|
|
|
|
|
### Confidence Threshold (0.05-0.95) |
|
|
- **What it does:** Minimum probability score for a detection to be kept |
|
|
- **Lower (0.15-0.25):** More detections, more false positives |
|
|
- **Higher (0.40-0.60):** Fewer detections, fewer false positives |
|
|
- **Sweet spot:** 0.30-0.40 for most use cases |
|
|
|
|
|
### IoU Threshold (0.10-0.95) |
|
|
- **What it does:** Controls how much boxes can overlap before one is removed (Non-Maximum Suppression) |
|
|
- **Lower (0.30-0.40):** More aggressive overlap removal, fewer boxes kept |
|
|
- **Higher (0.50-0.70):** Keeps more overlapping boxes (good for crowded scenes) |
|
|
- **Sweet spot:** 0.45-0.55 for most use cases |
|
|
""") |
|
|
|
|
|
|
|
|
with gr.Accordion("π Model & System Information", open=False): |
|
|
gr.Markdown(f""" |
|
|
## π€ Model Details |
|
|
|
|
|
**Architecture:** YOLOv8s (Small) |
|
|
- Parameters: 11.2M |
|
|
- FLOPs: 28.6G |
|
|
- Size: ~22MB |
|
|
|
|
|
**Trained Classes ({len(CLASS_NAMES)}):** |
|
|
``` |
|
|
{' β’ '.join(CLASS_NAMES)} |
|
|
``` |
|
|
|
|
|
## π₯οΈ Runtime Configuration |
|
|
|
|
|
**Device:** {device.upper()} |
|
|
**Precision:** {"FP16 (Half-precision)" if device == "cuda" else "FP32 (Full-precision)"} |
|
|
**CUDA Available:** {"β
Yes" if torch.cuda.is_available() else "β No (using CPU)"} |
|
|
|
|
|
## β¨ Advanced Features Enabled |
|
|
|
|
|
β
**Test-Time Augmentation (TTA)** |
|
|
- Horizontal flips, brightness adjustments, scale variations |
|
|
- Predictions averaged across augmentations |
|
|
|
|
|
β
**Multi-Scale Ensemble Inference** |
|
|
- Multiple input resolutions (Β±64px from base size) |
|
|
- Weighted Box Fusion (WBF) for merging predictions |
|
|
|
|
|
β
**Image Preprocessing & Enhancement** |
|
|
- Contrast enhancement (+15%) |
|
|
- Sharpness boost (+20%) |
|
|
- Brightness normalization (+5%) |
|
|
|
|
|
β
**Improved Non-Maximum Suppression (NMS)** |
|
|
- Class-agnostic NMS for better cross-class handling |
|
|
- Nested box removal algorithm |
|
|
- Confidence-weighted box merging |
|
|
|
|
|
β
**False Positive Suppression** |
|
|
- Containment-based filtering (boxes inside other boxes) |
|
|
- High-overlap cross-class suppression |
|
|
- Confidence-based quality assessment |
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
detect_btn.click( |
|
|
fn=predict_single, |
|
|
inputs=[ |
|
|
img_input, |
|
|
conf, |
|
|
iou, |
|
|
use_tta, |
|
|
img_size, |
|
|
use_ensemble, |
|
|
enhance_img, |
|
|
show_conf, |
|
|
box_thickness |
|
|
], |
|
|
outputs=[out_img, out_table, out_counts] |
|
|
) |
|
|
|
|
|
|
|
|
batch_input.change( |
|
|
fn=predict_batch, |
|
|
inputs=[ |
|
|
batch_input, |
|
|
conf, |
|
|
iou, |
|
|
use_tta, |
|
|
img_size, |
|
|
use_ensemble, |
|
|
enhance_img |
|
|
], |
|
|
outputs=[batch_meta, batch_zip] |
|
|
) |
|
|
|
|
|
|
|
|
gr.Markdown("---") |
|
|
gr.Markdown("### π Quick Start Examples") |
|
|
gr.Markdown(""" |
|
|
**Try these configurations for common scenarios:** |
|
|
|
|
|
1. **Single clear object (like fire extinguisher):** |
|
|
- Confidence: 0.40, IoU: 0.50, TTA: β
, Ensemble: β, Size: 640px |
|
|
|
|
|
2. **Multiple small objects:** |
|
|
- Confidence: 0.25, IoU: 0.45, TTA: β
, Ensemble: β
, Size: 1024px |
|
|
|
|
|
3. **Fast batch processing:** |
|
|
- Confidence: 0.45, IoU: 0.55, TTA: β, Ensemble: β, Size: 640px |
|
|
|
|
|
4. **Low quality/dark images:** |
|
|
- Confidence: 0.30, IoU: 0.50, TTA: β
, Enhancement: β
, Size: 800px |
|
|
""") |
|
|
|
|
|
|
|
|
gr.HTML(""" |
|
|
<div style="text-align: center; padding: 30px 20px; margin-top: 40px; background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%); border-radius: 15px; color: white;"> |
|
|
<h3 style="color: #ff6b6b; margin-bottom: 15px;">π Built with Innovation & Passion</h3> |
|
|
<p style="font-size: 1.1em; margin: 10px 0; color: white;"> |
|
|
Powered by TechXNinjas | PARANOX 2.0 Hackathon Project |
|
|
</p> |
|
|
<p style="opacity: 0.8; margin: 10px 0; color: white;"> |
|
|
24-Hour National Hackathon β’ 3-Month Innovation Journey β’ Student-Led Excellence |
|
|
</p> |
|
|
<div style="margin-top: 20px; padding-top: 20px; border-top: 1px solid rgba(255,255,255,0.2);"> |
|
|
<p style="opacity: 0.7; font-size: 0.9em; color: white;"> |
|
|
β οΈ AI-Powered Tool β’ Always verify critical detections manually<br> |
|
|
Made with β€οΈ for safety and security applications |
|
|
</p> |
|
|
</div> |
|
|
</div> |
|
|
""") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
print("\n" + "="*60) |
|
|
print("β‘ TechXNinjas - PARANOX 2.0") |
|
|
print("π Starting AI Safety Object Detector") |
|
|
print("="*60) |
|
|
print(f"π¦ Model: {MODEL_PATH}") |
|
|
print(f"π·οΈ Classes: {len(CLASS_NAMES)}") |
|
|
print(f"π₯οΈ Device: {device.upper()}") |
|
|
print(f"β‘ Precision: {'FP16' if device == 'cuda' else 'FP32'}") |
|
|
print("="*60 + "\n") |
|
|
|
|
|
demo.launch( |
|
|
server_name="0.0.0.0", |
|
|
server_port=7860, |
|
|
show_error=True, |
|
|
share=False, |
|
|
show_api=False, |
|
|
favicon_path=None |
|
|
) |
|
|
|
|
|
print("\nβ
Application started successfully!") |
|
|
print("π Open your browser and navigate to the URL shown above") |
|
|
print("β οΈ Press Ctrl+C to stop the server\n") |
|
|
|