Spaces:
Sleeping
Sleeping
File size: 7,847 Bytes
75f48fa f8a3c0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 |
"""
Enhanced YOLO detection with improved accuracy, color detection, and detailed attributes
"""
from ultralytics import YOLO # type: ignore
import cv2 # type: ignore
import numpy as np # type: ignore
from collections import Counter
import webcolors # type: ignore
# from sklearn.cluster import KMeans # type: ignore # Temporarily disabled due to numpy compatibility
import torch # type: ignore
# Load a more accurate YOLO model
# For better accuracy, use yolov8m.pt or yolov8l.pt instead of yolov8n.pt
model_size = 'yolov8m.pt' # Medium model for better accuracy vs speed balance
model = YOLO(model_size)
# Set higher confidence threshold for better accuracy
CONFIDENCE_THRESHOLD = 0.5 # Increase this for fewer but more accurate detections
NMS_THRESHOLD = 0.45 # Non-maximum suppression threshold
def get_dominant_colors(image, n_colors=3):
"""
Extract dominant colors from an image region using simple averaging
(K-means temporarily disabled due to numpy compatibility)
"""
try:
# Simple color detection without sklearn
# Get average color
avg_color = np.mean(image.reshape(-1, 3), axis=0).astype(int)
# Get corners for variety
h, w = image.shape[:2]
corners = [
image[0, 0], # Top-left
image[0, w-1] if w > 0 else image[0, 0], # Top-right
image[h-1, 0] if h > 0 else image[0, 0], # Bottom-left
image[h//2, w//2] if h > 0 and w > 0 else image[0, 0] # Center
]
color_names = []
# Add average color
try:
color_names.append(get_color_name(avg_color))
except:
color_names.append(f"RGB({avg_color[0]},{avg_color[1]},{avg_color[2]})")
# Add dominant corner color if different
for corner in corners[:n_colors-1]:
try:
name = get_color_name(corner)
if name not in color_names:
color_names.append(name)
if len(color_names) >= n_colors:
break
except:
pass
return color_names if color_names else ["Unknown"]
except:
return ["Unknown"]
def get_color_name(rgb_color):
"""
Convert RGB values to a human-readable color name
"""
min_colors = {}
for key, name in webcolors.CSS3_HEX_TO_NAMES.items():
r_c, g_c, b_c = webcolors.hex_to_rgb(key)
rd = (r_c - rgb_color[0]) ** 2
gd = (g_c - rgb_color[1]) ** 2
bd = (b_c - rgb_color[2]) ** 2
min_colors[(rd + gd + bd)] = name
return min_colors[min(min_colors.keys())]
def analyze_object_attributes(image, box, label):
"""
Analyze detailed attributes of detected objects
"""
x1, y1, x2, y2 = box
object_region = image[int(y1):int(y2), int(x1):int(x2)]
attributes = {
'label': label,
'position': get_position_description(x1, y1, x2, y2, image.shape),
'size': get_size_description(x2-x1, y2-y1, image.shape),
'colors': get_dominant_colors(object_region, n_colors=2),
'confidence': None, # Will be set from detection
'bbox': [float(x1), float(y1), float(x2), float(y2)] # Add bounding box coordinates
}
return attributes
def get_position_description(x1, y1, x2, y2, image_shape):
"""
Describe object position in human terms
"""
h, w = image_shape[:2]
center_x = (x1 + x2) / 2
center_y = (y1 + y2) / 2
# Horizontal position
if center_x < w / 3:
h_pos = "left"
elif center_x > 2 * w / 3:
h_pos = "right"
else:
h_pos = "center"
# Vertical position
if center_y < h / 3:
v_pos = "top"
elif center_y > 2 * h / 3:
v_pos = "bottom"
else:
v_pos = "middle"
if h_pos == "center" and v_pos == "middle":
return "center"
elif v_pos == "middle":
return h_pos
elif h_pos == "center":
return v_pos
else:
return f"{v_pos}-{h_pos}"
def get_size_description(width, height, image_shape):
"""
Describe object size relative to image
"""
img_area = image_shape[0] * image_shape[1]
obj_area = width * height
ratio = obj_area / img_area
if ratio > 0.5:
return "very large"
elif ratio > 0.25:
return "large"
elif ratio > 0.1:
return "medium"
elif ratio > 0.05:
return "small"
else:
return "tiny"
def detect_objects_enhanced(image, confidence_threshold=CONFIDENCE_THRESHOLD):
"""
Enhanced YOLO detection with improved accuracy and detailed attributes
Returns:
- annotated image with bounding boxes
- list of detected object names
- detailed attributes for each detection
"""
# Handle different image formats
if isinstance(image, np.ndarray):
if image.shape[-1] == 4:
image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB)
elif len(image.shape) == 2 or image.shape[-1] == 1:
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
# Run YOLO with custom parameters for better accuracy
results = model(
image,
conf=confidence_threshold, # Confidence threshold
iou=NMS_THRESHOLD, # NMS IoU threshold
imgsz=640, # Image size (can increase for better accuracy)
device='cuda' if torch.cuda.is_available() else 'cpu'
)
# Get annotated image
annotated_img = results[0].plot(
conf=True, # Show confidence scores
line_width=2,
font_size=10
)
# Extract detailed information
detected_objects = []
detailed_attributes = []
for box in results[0].boxes:
if box.conf[0] >= confidence_threshold: # Double-check confidence
cls_id = int(box.cls[0].item())
label = results[0].names[cls_id]
confidence = float(box.conf[0].item())
# Get box coordinates
xyxy = box.xyxy[0].tolist()
# Analyze attributes
attributes = analyze_object_attributes(image, xyxy, label)
attributes['confidence'] = f"{confidence:.2%}"
detected_objects.append(label)
detailed_attributes.append(attributes)
return annotated_img, detected_objects, detailed_attributes
def get_intelligence_report(detailed_attributes):
"""
Generate an intelligent report about detected objects
"""
if not detailed_attributes:
return "No objects detected in the image."
report = []
report.append(f"Detected {len(detailed_attributes)} object(s):")
for attr in detailed_attributes:
colors_str = " and ".join(attr['colors'][:2]) if attr['colors'] else "unknown colors"
report.append(
f"- A {attr['size']} {colors_str} {attr['label']} "
f"in the {attr['position']} of the image "
f"(confidence: {attr['confidence']})"
)
# Add summary statistics
object_types = Counter([attr['label'] for attr in detailed_attributes])
if len(object_types) > 1:
report.append("\nSummary:")
for obj_type, count in object_types.most_common():
report.append(f" • {count} {obj_type}(s)")
return "\n".join(report)
# Backward compatibility wrapper
def detect_objects(image):
"""
Wrapper for backward compatibility with original function
"""
annotated_img, detected_objects, _ = detect_objects_enhanced(image)
return annotated_img, detected_objects |