ObjectDetection_App / backend /yolo_enhanced.py
Navada25's picture
Deploy NAVADA 2.0 Lite - Optimized for HF Spaces (no face recognition)
75f48fa verified
"""
Enhanced YOLO detection with improved accuracy, color detection, and detailed attributes
"""
from ultralytics import YOLO # type: ignore
import cv2 # type: ignore
import numpy as np # type: ignore
from collections import Counter
import webcolors # type: ignore
# from sklearn.cluster import KMeans # type: ignore # Temporarily disabled due to numpy compatibility
import torch # type: ignore
# Load a more accurate YOLO model
# For better accuracy, use yolov8m.pt or yolov8l.pt instead of yolov8n.pt
model_size = 'yolov8m.pt' # Medium model for better accuracy vs speed balance
model = YOLO(model_size)
# Set higher confidence threshold for better accuracy
CONFIDENCE_THRESHOLD = 0.5 # Increase this for fewer but more accurate detections
NMS_THRESHOLD = 0.45 # Non-maximum suppression threshold
def get_dominant_colors(image, n_colors=3):
"""
Extract dominant colors from an image region using simple averaging
(K-means temporarily disabled due to numpy compatibility)
"""
try:
# Simple color detection without sklearn
# Get average color
avg_color = np.mean(image.reshape(-1, 3), axis=0).astype(int)
# Get corners for variety
h, w = image.shape[:2]
corners = [
image[0, 0], # Top-left
image[0, w-1] if w > 0 else image[0, 0], # Top-right
image[h-1, 0] if h > 0 else image[0, 0], # Bottom-left
image[h//2, w//2] if h > 0 and w > 0 else image[0, 0] # Center
]
color_names = []
# Add average color
try:
color_names.append(get_color_name(avg_color))
except:
color_names.append(f"RGB({avg_color[0]},{avg_color[1]},{avg_color[2]})")
# Add dominant corner color if different
for corner in corners[:n_colors-1]:
try:
name = get_color_name(corner)
if name not in color_names:
color_names.append(name)
if len(color_names) >= n_colors:
break
except:
pass
return color_names if color_names else ["Unknown"]
except:
return ["Unknown"]
def get_color_name(rgb_color):
"""
Convert RGB values to a human-readable color name
"""
min_colors = {}
for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
r_c, g_c, b_c = webcolors.hex_to_rgb(key)
rd = (r_c - rgb_color[0]) ** 2
gd = (g_c - rgb_color[1]) ** 2
bd = (b_c - rgb_color[2]) ** 2
min_colors[(rd + gd + bd)] = name
return min_colors[min(min_colors.keys())]
def analyze_object_attributes(image, box, label):
"""
Analyze detailed attributes of detected objects
"""
x1, y1, x2, y2 = box
object_region = image[int(y1):int(y2), int(x1):int(x2)]
attributes = {
'label': label,
'position': get_position_description(x1, y1, x2, y2, image.shape),
'size': get_size_description(x2-x1, y2-y1, image.shape),
'colors': get_dominant_colors(object_region, n_colors=2),
'confidence': None, # Will be set from detection
'bbox': [float(x1), float(y1), float(x2), float(y2)] # Add bounding box coordinates
}
return attributes
def get_position_description(x1, y1, x2, y2, image_shape):
"""
Describe object position in human terms
"""
h, w = image_shape[:2]
center_x = (x1 + x2) / 2
center_y = (y1 + y2) / 2
# Horizontal position
if center_x < w / 3:
h_pos = "left"
elif center_x > 2 * w / 3:
h_pos = "right"
else:
h_pos = "center"
# Vertical position
if center_y < h / 3:
v_pos = "top"
elif center_y > 2 * h / 3:
v_pos = "bottom"
else:
v_pos = "middle"
if h_pos == "center" and v_pos == "middle":
return "center"
elif v_pos == "middle":
return h_pos
elif h_pos == "center":
return v_pos
else:
return f"{v_pos}-{h_pos}"
def get_size_description(width, height, image_shape):
"""
Describe object size relative to image
"""
img_area = image_shape[0] * image_shape[1]
obj_area = width * height
ratio = obj_area / img_area
if ratio > 0.5:
return "very large"
elif ratio > 0.25:
return "large"
elif ratio > 0.1:
return "medium"
elif ratio > 0.05:
return "small"
else:
return "tiny"
def detect_objects_enhanced(image, confidence_threshold=CONFIDENCE_THRESHOLD):
"""
Enhanced YOLO detection with improved accuracy and detailed attributes
Returns:
- annotated image with bounding boxes
- list of detected object names
- detailed attributes for each detection
"""
# Handle different image formats
if isinstance(image, np.ndarray):
if image.shape[-1] == 4:
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
elif len(image.shape) == 2 or image.shape[-1] == 1:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
# Run YOLO with custom parameters for better accuracy
results = model(
image,
conf=confidence_threshold, # Confidence threshold
iou=NMS_THRESHOLD, # NMS IoU threshold
imgsz=640, # Image size (can increase for better accuracy)
device='cuda' if torch.cuda.is_available() else 'cpu'
)
# Get annotated image
annotated_img = results[0].plot(
conf=True, # Show confidence scores
line_width=2,
font_size=10
)
# Extract detailed information
detected_objects = []
detailed_attributes = []
for box in results[0].boxes:
if box.conf[0] >= confidence_threshold: # Double-check confidence
cls_id = int(box.cls[0].item())
label = results[0].names[cls_id]
confidence = float(box.conf[0].item())
# Get box coordinates
xyxy = box.xyxy[0].tolist()
# Analyze attributes
attributes = analyze_object_attributes(image, xyxy, label)
attributes['confidence'] = f"{confidence:.2%}"
detected_objects.append(label)
detailed_attributes.append(attributes)
return annotated_img, detected_objects, detailed_attributes
def get_intelligence_report(detailed_attributes):
"""
Generate an intelligent report about detected objects
"""
if not detailed_attributes:
return "No objects detected in the image."
report = []
report.append(f"Detected {len(detailed_attributes)} object(s):")
for attr in detailed_attributes:
colors_str = " and ".join(attr['colors'][:2]) if attr['colors'] else "unknown colors"
report.append(
f"- A {attr['size']} {colors_str} {attr['label']} "
f"in the {attr['position']} of the image "
f"(confidence: {attr['confidence']})"
)
# Add summary statistics
object_types = Counter([attr['label'] for attr in detailed_attributes])
if len(object_types) > 1:
report.append("\nSummary:")
for obj_type, count in object_types.most_common():
report.append(f" • {count} {obj_type}(s)")
return "\n".join(report)
# Backward compatibility wrapper
def detect_objects(image):
"""
Wrapper for backward compatibility with original function
"""
annotated_img, detected_objects, _ = detect_objects_enhanced(image)
return annotated_img, detected_objects