#!/usr/bin/env python3 """ Automated Strawberry Ripeness Labeling System Uses color analysis to automatically label strawberry ripeness """ import os import sys import cv2 import numpy as np from pathlib import Path from PIL import Image import argparse import json from datetime import datetime class AutoRipenessLabeler: def __init__(self): """Initialize the automatic ripeness labeler""" print("✅ Initialized automatic ripeness labeler") def analyze_strawberry_color(self, image_path): """Analyze the color of strawberries to determine ripeness""" try: # Load image img = cv2.imread(str(image_path)) if img is None: return None # Convert BGR to RGB img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Convert to HSV for better color analysis hsv = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2HSV) # Define color ranges for different ripeness stages # Red range (ripe strawberries) red_lower1 = np.array([0, 50, 50]) red_upper1 = np.array([10, 255, 255]) red_lower2 = np.array([170, 50, 50]) red_upper2 = np.array([180, 255, 255]) # Green range (unripe strawberries) green_lower = np.array([40, 40, 40]) green_upper = np.array([80, 255, 255]) # Dark red range (overripe strawberries) dark_red_lower = np.array([0, 100, 0]) dark_red_upper = np.array([20, 255, 100]) # Create masks for each color range red_mask1 = cv2.inRange(hsv, red_lower1, red_upper1) red_mask2 = cv2.inRange(hsv, red_lower2, red_upper2) red_mask = cv2.bitwise_or(red_mask1, red_mask2) green_mask = cv2.inRange(hsv, green_lower, green_upper) dark_red_mask = cv2.inRange(hsv, dark_red_lower, dark_red_upper) # Calculate percentages total_pixels = hsv.shape[0] * hsv.shape[1] red_pixels = np.sum(red_mask > 0) green_pixels = np.sum(green_mask > 0) dark_red_pixels = np.sum(dark_red_mask > 0) red_percentage = red_pixels / total_pixels green_percentage = green_pixels / total_pixels dark_red_percentage = dark_red_pixels / total_pixels # Calculate brightness and saturation for fallback analysis gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY) avg_brightness = np.mean(gray) avg_saturation = np.mean(hsv[:, :, 1]) # Determine ripeness based on color percentages if green_percentage > 0.3: ripeness = "unripe" confidence = min(green_percentage * 2, 0.9) elif dark_red_percentage > 0.2: ripeness = "overripe" confidence = min(dark_red_percentage * 2, 0.9) elif red_percentage > 0.2: ripeness = "ripe" confidence = min(red_percentage * 2, 0.9) else: # Fallback: use brightness and saturation if avg_brightness < 80: ripeness = "overripe" confidence = 0.6 elif avg_brightness > 150: ripeness = "unripe" confidence = 0.6 else: ripeness = "ripe" confidence = 0.7 return { 'ripeness': ripeness, 'confidence': confidence, 'color_analysis': { 'red_percentage': red_percentage, 'green_percentage': green_percentage, 'dark_red_percentage': dark_red_percentage, 'avg_brightness': float(avg_brightness), 'avg_saturation': float(avg_saturation) } } except Exception as e: print(f"Error analyzing color in {image_path}: {e}") return None def batch_auto_label(self, image_files, output_dirs, confidence_threshold=0.6): """Automatically label a batch of images""" results = [] for i, image_path in enumerate(image_files): print(f"Processing {i+1}/{len(image_files)}: {image_path.name}") analysis = self.analyze_strawberry_color(image_path) if analysis and analysis['confidence'] >= confidence_threshold: ripeness = analysis['ripeness'] confidence = analysis['confidence'] # Copy image to appropriate directory dest_path = output_dirs[ripeness] / image_path.name try: import shutil shutil.copy2(image_path, dest_path) print(f" ✅ {ripeness} (confidence: {confidence:.2f})") results.append({ 'image': image_path.name, 'label': ripeness, 'confidence': confidence, 'analysis': analysis['color_analysis'] }) except Exception as e: print(f" ❌ Error copying file: {e}") else: print(f" ⚠️ Low confidence or analysis failed") results.append({ 'image': image_path.name, 'label': 'unknown', 'confidence': analysis['confidence'] if analysis else 0.0, 'analysis': analysis['color_analysis'] if analysis else None }) return results def main(): parser = argparse.ArgumentParser(description='Automatically label strawberry ripeness dataset') parser.add_argument('--dataset-path', type=str, default='model/ripeness_manual_dataset', help='Path to the ripeness dataset directory') parser.add_argument('--confidence-threshold', type=float, default=0.6, help='Minimum confidence for automatic labeling') parser.add_argument('--max-images', type=int, default=50, help='Maximum number of images to process') args = parser.parse_args() base_path = Path(args.dataset_path) to_label_path = base_path / 'to_label' if not to_label_path.exists(): print(f"Error: to_label directory not found at {to_label_path}") return # Create output directories output_dirs = {} for label in ['unripe', 'ripe', 'overripe']: dir_path = base_path / label dir_path.mkdir(exist_ok=True) output_dirs[label] = dir_path # Get image files image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff'} image_files = [] for file_path in to_label_path.iterdir(): if file_path.suffix.lower() in image_extensions: image_files.append(file_path) image_files = sorted(image_files)[:args.max_images] print(f"Found {len(image_files)} images to process") print(f"Confidence threshold: {args.confidence_threshold}") if not image_files: print("No images found to process.") return # Initialize auto labeler labeler = AutoRipenessLabeler() # Process images results = labeler.batch_auto_label(image_files, output_dirs, args.confidence_threshold) # Save results results_file = base_path / f'auto_labeling_results_{datetime.now().strftime("%Y%m%d_%H%M%S")}.json' with open(results_file, 'w') as f: json.dump(results, f, indent=2) # Print summary label_counts = {'unripe': 0, 'ripe': 0, 'overripe': 0, 'unknown': 0} for result in results: label_counts[result['label']] += 1 print("\n=== AUTOMATIC LABELING RESULTS ===") for label, count in label_counts.items(): print(f"{label}: {count} images") print(f"\nResults saved to: {results_file}") if label_counts['unknown'] > 0: print(f"\n⚠️ {label_counts['unknown']} images need manual review") print("You can use the manual labeling tool for these:") print("python3 label_ripeness_dataset.py") if __name__ == '__main__': main()