Spaces:
Sleeping
Sleeping
| """ | |
| Enhanced YOLO detection with improved accuracy, color detection, and detailed attributes | |
| """ | |
| from ultralytics import YOLO # type: ignore | |
| import cv2 # type: ignore | |
| import numpy as np # type: ignore | |
| from collections import Counter | |
| import webcolors # type: ignore | |
| # from sklearn.cluster import KMeans # type: ignore # Temporarily disabled due to numpy compatibility | |
| import torch # type: ignore | |
| # Load a more accurate YOLO model | |
| # For better accuracy, use yolov8m.pt or yolov8l.pt instead of yolov8n.pt | |
| model_size = 'yolov8m.pt' # Medium model for better accuracy vs speed balance | |
| model = YOLO(model_size) | |
| # Set higher confidence threshold for better accuracy | |
| CONFIDENCE_THRESHOLD = 0.5 # Increase this for fewer but more accurate detections | |
| NMS_THRESHOLD = 0.45 # Non-maximum suppression threshold | |
| def get_dominant_colors(image, n_colors=3): | |
| """ | |
| Extract dominant colors from an image region using simple averaging | |
| (K-means temporarily disabled due to numpy compatibility) | |
| """ | |
| try: | |
| # Simple color detection without sklearn | |
| # Get average color | |
| avg_color = np.mean(image.reshape(-1, 3), axis=0).astype(int) | |
| # Get corners for variety | |
| h, w = image.shape[:2] | |
| corners = [ | |
| image[0, 0], # Top-left | |
| image[0, w-1] if w > 0 else image[0, 0], # Top-right | |
| image[h-1, 0] if h > 0 else image[0, 0], # Bottom-left | |
| image[h//2, w//2] if h > 0 and w > 0 else image[0, 0] # Center | |
| ] | |
| color_names = [] | |
| # Add average color | |
| try: | |
| color_names.append(get_color_name(avg_color)) | |
| except: | |
| color_names.append(f"RGB({avg_color[0]},{avg_color[1]},{avg_color[2]})") | |
| # Add dominant corner color if different | |
| for corner in corners[:n_colors-1]: | |
| try: | |
| name = get_color_name(corner) | |
| if name not in color_names: | |
| color_names.append(name) | |
| if len(color_names) >= n_colors: | |
| break | |
| except: | |
| pass | |
| return color_names if color_names else ["Unknown"] | |
| except: | |
| return ["Unknown"] | |
| def get_color_name(rgb_color): | |
| """ | |
| Convert RGB values to a human-readable color name | |
| """ | |
| min_colors = {} | |
| for key, name in webcolors.CSS3_HEX_TO_NAMES.items(): | |
| r_c, g_c, b_c = webcolors.hex_to_rgb(key) | |
| rd = (r_c - rgb_color[0]) ** 2 | |
| gd = (g_c - rgb_color[1]) ** 2 | |
| bd = (b_c - rgb_color[2]) ** 2 | |
| min_colors[(rd + gd + bd)] = name | |
| return min_colors[min(min_colors.keys())] | |
| def analyze_object_attributes(image, box, label): | |
| """ | |
| Analyze detailed attributes of detected objects | |
| """ | |
| x1, y1, x2, y2 = box | |
| object_region = image[int(y1):int(y2), int(x1):int(x2)] | |
| attributes = { | |
| 'label': label, | |
| 'position': get_position_description(x1, y1, x2, y2, image.shape), | |
| 'size': get_size_description(x2-x1, y2-y1, image.shape), | |
| 'colors': get_dominant_colors(object_region, n_colors=2), | |
| 'confidence': None, # Will be set from detection | |
| 'bbox': [float(x1), float(y1), float(x2), float(y2)] # Add bounding box coordinates | |
| } | |
| return attributes | |
| def get_position_description(x1, y1, x2, y2, image_shape): | |
| """ | |
| Describe object position in human terms | |
| """ | |
| h, w = image_shape[:2] | |
| center_x = (x1 + x2) / 2 | |
| center_y = (y1 + y2) / 2 | |
| # Horizontal position | |
| if center_x < w / 3: | |
| h_pos = "left" | |
| elif center_x > 2 * w / 3: | |
| h_pos = "right" | |
| else: | |
| h_pos = "center" | |
| # Vertical position | |
| if center_y < h / 3: | |
| v_pos = "top" | |
| elif center_y > 2 * h / 3: | |
| v_pos = "bottom" | |
| else: | |
| v_pos = "middle" | |
| if h_pos == "center" and v_pos == "middle": | |
| return "center" | |
| elif v_pos == "middle": | |
| return h_pos | |
| elif h_pos == "center": | |
| return v_pos | |
| else: | |
| return f"{v_pos}-{h_pos}" | |
| def get_size_description(width, height, image_shape): | |
| """ | |
| Describe object size relative to image | |
| """ | |
| img_area = image_shape[0] * image_shape[1] | |
| obj_area = width * height | |
| ratio = obj_area / img_area | |
| if ratio > 0.5: | |
| return "very large" | |
| elif ratio > 0.25: | |
| return "large" | |
| elif ratio > 0.1: | |
| return "medium" | |
| elif ratio > 0.05: | |
| return "small" | |
| else: | |
| return "tiny" | |
| def detect_objects_enhanced(image, confidence_threshold=CONFIDENCE_THRESHOLD): | |
| """ | |
| Enhanced YOLO detection with improved accuracy and detailed attributes | |
| Returns: | |
| - annotated image with bounding boxes | |
| - list of detected object names | |
| - detailed attributes for each detection | |
| """ | |
| # Handle different image formats | |
| if isinstance(image, np.ndarray): | |
| if image.shape[-1] == 4: | |
| image = cv2.cvtColor(image, cv2.COLOR_RGBA2RGB) | |
| elif len(image.shape) == 2 or image.shape[-1] == 1: | |
| image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) | |
| # Run YOLO with custom parameters for better accuracy | |
| results = model( | |
| image, | |
| conf=confidence_threshold, # Confidence threshold | |
| iou=NMS_THRESHOLD, # NMS IoU threshold | |
| imgsz=640, # Image size (can increase for better accuracy) | |
| device='cuda' if torch.cuda.is_available() else 'cpu' | |
| ) | |
| # Get annotated image | |
| annotated_img = results[0].plot( | |
| conf=True, # Show confidence scores | |
| line_width=2, | |
| font_size=10 | |
| ) | |
| # Extract detailed information | |
| detected_objects = [] | |
| detailed_attributes = [] | |
| for box in results[0].boxes: | |
| if box.conf[0] >= confidence_threshold: # Double-check confidence | |
| cls_id = int(box.cls[0].item()) | |
| label = results[0].names[cls_id] | |
| confidence = float(box.conf[0].item()) | |
| # Get box coordinates | |
| xyxy = box.xyxy[0].tolist() | |
| # Analyze attributes | |
| attributes = analyze_object_attributes(image, xyxy, label) | |
| attributes['confidence'] = f"{confidence:.2%}" | |
| detected_objects.append(label) | |
| detailed_attributes.append(attributes) | |
| return annotated_img, detected_objects, detailed_attributes | |
| def get_intelligence_report(detailed_attributes): | |
| """ | |
| Generate an intelligent report about detected objects | |
| """ | |
| if not detailed_attributes: | |
| return "No objects detected in the image." | |
| report = [] | |
| report.append(f"Detected {len(detailed_attributes)} object(s):") | |
| for attr in detailed_attributes: | |
| colors_str = " and ".join(attr['colors'][:2]) if attr['colors'] else "unknown colors" | |
| report.append( | |
| f"- A {attr['size']} {colors_str} {attr['label']} " | |
| f"in the {attr['position']} of the image " | |
| f"(confidence: {attr['confidence']})" | |
| ) | |
| # Add summary statistics | |
| object_types = Counter([attr['label'] for attr in detailed_attributes]) | |
| if len(object_types) > 1: | |
| report.append("\nSummary:") | |
| for obj_type, count in object_types.most_common(): | |
| report.append(f" • {count} {obj_type}(s)") | |
| return "\n".join(report) | |
| # Backward compatibility wrapper | |
| def detect_objects(image): | |
| """ | |
| Wrapper for backward compatibility with original function | |
| """ | |
| annotated_img, detected_objects, _ = detect_objects_enhanced(image) | |
| return annotated_img, detected_objects |