Spaces:

Navada25
/

ObjectDetection_App

Sleeping

File size: 7,847 Bytes

"""

Enhanced YOLO detection with improved accuracy, color detection, and detailed attributes

"""
from ultralytics import YOLO # type: ignore
import cv2 # type: ignore
import numpy as np # type: ignore
from collections import Counter
import webcolors # type: ignore
# from sklearn.cluster import KMeans # type: ignore  # Temporarily disabled due to numpy compatibility
import torch # type: ignore

# Load a more accurate YOLO model
# For better accuracy, use yolov8m.pt or yolov8l.pt instead of yolov8n.pt
model_size = 'yolov8m.pt'  # Medium model for better accuracy vs speed balance
model = YOLO(model_size)

# Set higher confidence threshold for better accuracy
CONFIDENCE_THRESHOLD = 0.5  # Increase this for fewer but more accurate detections
NMS_THRESHOLD = 0.45  # Non-maximum suppression threshold

def get_dominant_colors(image, n_colors=3):
    """

    Extract dominant colors from an image region using simple averaging

    (K-means temporarily disabled due to numpy compatibility)

    """
    try:
        # Simple color detection without sklearn
        # Get average color
        avg_color = np.mean(image.reshape(-1, 3), axis=0).astype(int)
        
        # Get corners for variety
        h, w = image.shape[:2]
        corners = [
            image[0, 0],  # Top-left
            image[0, w-1] if w > 0 else image[0, 0],  # Top-right
            image[h-1, 0] if h > 0 else image[0, 0],  # Bottom-left
            image[h//2, w//2] if h > 0 and w > 0 else image[0, 0]  # Center
        ]
        
        color_names = []
        # Add average color
        try:
            color_names.append(get_color_name(avg_color))
        except:
            color_names.append(f"RGB({avg_color[0]},{avg_color[1]},{avg_color[2]})")
        
        # Add dominant corner color if different
        for corner in corners[:n_colors-1]:
            try:
                name = get_color_name(corner)
                if name not in color_names:
                    color_names.append(name)
                    if len(color_names) >= n_colors:
                        break
            except:
                pass
        
        return color_names if color_names else ["Unknown"]
    except:
        return ["Unknown"]

def get_color_name(rgb_color):
    """

    Convert RGB values to a human-readable color name

    """
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - rgb_color[0]) ** 2
        gd = (g_c - rgb_color[1]) ** 2
        bd = (b_c - rgb_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def analyze_object_attributes(image, box, label):
    """

    Analyze detailed attributes of detected objects

    """
    x1, y1, x2, y2 = box
    object_region = image[int(y1):int(y2), int(x1):int(x2)]
    
    attributes = {
        'label': label,
        'position': get_position_description(x1, y1, x2, y2, image.shape),
        'size': get_size_description(x2-x1, y2-y1, image.shape),
        'colors': get_dominant_colors(object_region, n_colors=2),
        'confidence': None,  # Will be set from detection
        'bbox': [float(x1), float(y1), float(x2), float(y2)]  # Add bounding box coordinates
    }
    
    return attributes

def get_position_description(x1, y1, x2, y2, image_shape):
    """

    Describe object position in human terms

    """
    h, w = image_shape[:2]
    center_x = (x1 + x2) / 2
    center_y = (y1 + y2) / 2
    
    # Horizontal position
    if center_x < w / 3:
        h_pos = "left"
    elif center_x > 2 * w / 3:
        h_pos = "right"
    else:
        h_pos = "center"
    
    # Vertical position
    if center_y < h / 3:
        v_pos = "top"
    elif center_y > 2 * h / 3:
        v_pos = "bottom"
    else:
        v_pos = "middle"
    
    if h_pos == "center" and v_pos == "middle":
        return "center"
    elif v_pos == "middle":
        return h_pos
    elif h_pos == "center":
        return v_pos
    else:
        return f"{v_pos}-{h_pos}"

def get_size_description(width, height, image_shape):
    """

    Describe object size relative to image

    """
    img_area = image_shape[0] * image_shape[1]
    obj_area = width * height
    ratio = obj_area / img_area
    
    if ratio > 0.5:
        return "very large"
    elif ratio > 0.25:
        return "large"
    elif ratio > 0.1:
        return "medium"
    elif ratio > 0.05:
        return "small"
    else:
        return "tiny"

def detect_objects_enhanced(image, confidence_threshold=CONFIDENCE_THRESHOLD):
    """

    Enhanced YOLO detection with improved accuracy and detailed attributes

    Returns:

      - annotated image with bounding boxes

      - list of detected object names

      - detailed attributes for each detection

    """
    # Handle different image formats
    if isinstance(image, np.ndarray):
        if image.shape[-1] == 4:
            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
        elif len(image.shape) == 2 or image.shape[-1] == 1:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    
    # Run YOLO with custom parameters for better accuracy
    results = model(
        image,
        conf=confidence_threshold,  # Confidence threshold
        iou=NMS_THRESHOLD,  # NMS IoU threshold
        imgsz=640,  # Image size (can increase for better accuracy)
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )
    
    # Get annotated image
    annotated_img = results[0].plot(
        conf=True,  # Show confidence scores
        line_width=2,
        font_size=10
    )
    
    # Extract detailed information
    detected_objects = []
    detailed_attributes = []
    
    for box in results[0].boxes:
        if box.conf[0] >= confidence_threshold:  # Double-check confidence
            cls_id = int(box.cls[0].item())
            label = results[0].names[cls_id]
            confidence = float(box.conf[0].item())
            
            # Get box coordinates
            xyxy = box.xyxy[0].tolist()
            
            # Analyze attributes
            attributes = analyze_object_attributes(image, xyxy, label)
            attributes['confidence'] = f"{confidence:.2%}"
            
            detected_objects.append(label)
            detailed_attributes.append(attributes)
    
    return annotated_img, detected_objects, detailed_attributes

def get_intelligence_report(detailed_attributes):
    """

    Generate an intelligent report about detected objects

    """
    if not detailed_attributes:
        return "No objects detected in the image."
    
    report = []
    report.append(f"Detected {len(detailed_attributes)} object(s):")
    
    for attr in detailed_attributes:
        colors_str = " and ".join(attr['colors'][:2]) if attr['colors'] else "unknown colors"
        report.append(
            f"- A {attr['size']} {colors_str} {attr['label']} "
            f"in the {attr['position']} of the image "
            f"(confidence: {attr['confidence']})"
        )
    
    # Add summary statistics
    object_types = Counter([attr['label'] for attr in detailed_attributes])
    if len(object_types) > 1:
        report.append("\nSummary:")
        for obj_type, count in object_types.most_common():
            report.append(f"  • {count} {obj_type}(s)")
    
    return "\n".join(report)

# Backward compatibility wrapper
def detect_objects(image):
    """

    Wrapper for backward compatibility with original function

    """
    annotated_img, detected_objects, _ = detect_objects_enhanced(image)
    return annotated_img, detected_objects