File size: 7,847 Bytes
75f48fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8a3c0e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
"""

Enhanced YOLO detection with improved accuracy, color detection, and detailed attributes

"""
from ultralytics import YOLO # type: ignore
import cv2 # type: ignore
import numpy as np # type: ignore
from collections import Counter
import webcolors # type: ignore
# from sklearn.cluster import KMeans # type: ignore  # Temporarily disabled due to numpy compatibility
import torch # type: ignore

# Load a more accurate YOLO model
# For better accuracy, use yolov8m.pt or yolov8l.pt instead of yolov8n.pt
model_size = 'yolov8m.pt'  # Medium model for better accuracy vs speed balance
model = YOLO(model_size)

# Set higher confidence threshold for better accuracy
CONFIDENCE_THRESHOLD = 0.5  # Increase this for fewer but more accurate detections
NMS_THRESHOLD = 0.45  # Non-maximum suppression threshold

def get_dominant_colors(image, n_colors=3):
    """

    Extract dominant colors from an image region using simple averaging

    (K-means temporarily disabled due to numpy compatibility)

    """
    try:
        # Simple color detection without sklearn
        # Get average color
        avg_color = np.mean(image.reshape(-1, 3), axis=0).astype(int)
        
        # Get corners for variety
        h, w = image.shape[:2]
        corners = [
            image[0, 0],  # Top-left
            image[0, w-1] if w > 0 else image[0, 0],  # Top-right
            image[h-1, 0] if h > 0 else image[0, 0],  # Bottom-left
            image[h//2, w//2] if h > 0 and w > 0 else image[0, 0]  # Center
        ]
        
        color_names = []
        # Add average color
        try:
            color_names.append(get_color_name(avg_color))
        except:
            color_names.append(f"RGB({avg_color[0]},{avg_color[1]},{avg_color[2]})")
        
        # Add dominant corner color if different
        for corner in corners[:n_colors-1]:
            try:
                name = get_color_name(corner)
                if name not in color_names:
                    color_names.append(name)
                    if len(color_names) >= n_colors:
                        break
            except:
                pass
        
        return color_names if color_names else ["Unknown"]
    except:
        return ["Unknown"]

def get_color_name(rgb_color):
    """

    Convert RGB values to a human-readable color name

    """
    min_colors = {}
    for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
        r_c, g_c, b_c = webcolors.hex_to_rgb(key)
        rd = (r_c - rgb_color[0]) ** 2
        gd = (g_c - rgb_color[1]) ** 2
        bd = (b_c - rgb_color[2]) ** 2
        min_colors[(rd + gd + bd)] = name
    return min_colors[min(min_colors.keys())]

def analyze_object_attributes(image, box, label):
    """

    Analyze detailed attributes of detected objects

    """
    x1, y1, x2, y2 = box
    object_region = image[int(y1):int(y2), int(x1):int(x2)]
    
    attributes = {
        'label': label,
        'position': get_position_description(x1, y1, x2, y2, image.shape),
        'size': get_size_description(x2-x1, y2-y1, image.shape),
        'colors': get_dominant_colors(object_region, n_colors=2),
        'confidence': None,  # Will be set from detection
        'bbox': [float(x1), float(y1), float(x2), float(y2)]  # Add bounding box coordinates
    }
    
    return attributes

def get_position_description(x1, y1, x2, y2, image_shape):
    """

    Describe object position in human terms

    """
    h, w = image_shape[:2]
    center_x = (x1 + x2) / 2
    center_y = (y1 + y2) / 2
    
    # Horizontal position
    if center_x < w / 3:
        h_pos = "left"
    elif center_x > 2 * w / 3:
        h_pos = "right"
    else:
        h_pos = "center"
    
    # Vertical position
    if center_y < h / 3:
        v_pos = "top"
    elif center_y > 2 * h / 3:
        v_pos = "bottom"
    else:
        v_pos = "middle"
    
    if h_pos == "center" and v_pos == "middle":
        return "center"
    elif v_pos == "middle":
        return h_pos
    elif h_pos == "center":
        return v_pos
    else:
        return f"{v_pos}-{h_pos}"

def get_size_description(width, height, image_shape):
    """

    Describe object size relative to image

    """
    img_area = image_shape[0] * image_shape[1]
    obj_area = width * height
    ratio = obj_area / img_area
    
    if ratio > 0.5:
        return "very large"
    elif ratio > 0.25:
        return "large"
    elif ratio > 0.1:
        return "medium"
    elif ratio > 0.05:
        return "small"
    else:
        return "tiny"

def detect_objects_enhanced(image, confidence_threshold=CONFIDENCE_THRESHOLD):
    """

    Enhanced YOLO detection with improved accuracy and detailed attributes

    Returns:

      - annotated image with bounding boxes

      - list of detected object names

      - detailed attributes for each detection

    """
    # Handle different image formats
    if isinstance(image, np.ndarray):
        if image.shape[-1] == 4:
            image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
        elif len(image.shape) == 2 or image.shape[-1] == 1:
            image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
    
    # Run YOLO with custom parameters for better accuracy
    results = model(
        image,
        conf=confidence_threshold,  # Confidence threshold
        iou=NMS_THRESHOLD,  # NMS IoU threshold
        imgsz=640,  # Image size (can increase for better accuracy)
        device='cuda' if torch.cuda.is_available() else 'cpu'
    )
    
    # Get annotated image
    annotated_img = results[0].plot(
        conf=True,  # Show confidence scores
        line_width=2,
        font_size=10
    )
    
    # Extract detailed information
    detected_objects = []
    detailed_attributes = []
    
    for box in results[0].boxes:
        if box.conf[0] >= confidence_threshold:  # Double-check confidence
            cls_id = int(box.cls[0].item())
            label = results[0].names[cls_id]
            confidence = float(box.conf[0].item())
            
            # Get box coordinates
            xyxy = box.xyxy[0].tolist()
            
            # Analyze attributes
            attributes = analyze_object_attributes(image, xyxy, label)
            attributes['confidence'] = f"{confidence:.2%}"
            
            detected_objects.append(label)
            detailed_attributes.append(attributes)
    
    return annotated_img, detected_objects, detailed_attributes

def get_intelligence_report(detailed_attributes):
    """

    Generate an intelligent report about detected objects

    """
    if not detailed_attributes:
        return "No objects detected in the image."
    
    report = []
    report.append(f"Detected {len(detailed_attributes)} object(s):")
    
    for attr in detailed_attributes:
        colors_str = " and ".join(attr['colors'][:2]) if attr['colors'] else "unknown colors"
        report.append(
            f"- A {attr['size']} {colors_str} {attr['label']} "
            f"in the {attr['position']} of the image "
            f"(confidence: {attr['confidence']})"
        )
    
    # Add summary statistics
    object_types = Counter([attr['label'] for attr in detailed_attributes])
    if len(object_types) > 1:
        report.append("\nSummary:")
        for obj_type, count in object_types.most_common():
            report.append(f"  • {count} {obj_type}(s)")
    
    return "\n".join(report)

# Backward compatibility wrapper
def detect_objects(image):
    """

    Wrapper for backward compatibility with original function

    """
    annotated_img, detected_objects, _ = detect_objects_enhanced(image)
    return annotated_img, detected_objects