File size: 6,923 Bytes
efb1801 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
#!/usr/bin/env python3
"""
Complete the final batch of ripeness labeling using conservative color analysis.
This script processes the remaining 46 images with higher confidence thresholds.
"""
import os
import json
import shutil
from pathlib import Path
from datetime import datetime
import numpy as np
from PIL import Image
import cv2
def analyze_ripeness_conservative(image_path, confidence_threshold=0.8):
"""
Conservative ripeness analysis with higher confidence thresholds.
"""
try:
# Load and convert image
img = cv2.imread(str(image_path))
if img is None:
return None, 0.0
# Convert to HSV for better color analysis
hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
# Define color ranges for strawberry ripeness (more conservative)
# Red ranges (ripe strawberries)
red_lower1 = np.array([0, 50, 50])
red_upper1 = np.array([10, 255, 255])
red_lower2 = np.array([170, 50, 50])
red_upper2 = np.array([180, 255, 255])
# Green ranges (unripe strawberries)
green_lower = np.array([40, 40, 40])
green_upper = np.array([80, 255, 255])
# Yellow/orange ranges (overripe strawberries)
yellow_lower = np.array([15, 50, 50])
yellow_upper = np.array([35, 255, 255])
# Create masks
red_mask1 = cv2.inRange(hsv, red_lower1, red_upper1)
red_mask2 = cv2.inRange(hsv, red_lower2, red_upper2)
red_mask = cv2.bitwise_or(red_mask1, red_mask2)
green_mask = cv2.inRange(hsv, green_lower, green_upper)
yellow_mask = cv2.inRange(hsv, yellow_lower, yellow_upper)
# Calculate percentages
total_pixels = img.shape[0] * img.shape[1]
red_percentage = np.sum(red_mask > 0) / total_pixels
green_percentage = np.sum(green_mask > 0) / total_pixels
yellow_percentage = np.sum(yellow_mask > 0) / total_pixels
# Conservative classification logic
if red_percentage > 0.35 and red_percentage > green_percentage and red_percentage > yellow_percentage:
return "ripe", red_percentage
elif green_percentage > 0.25 and green_percentage > red_percentage and green_percentage > yellow_percentage:
return "unripe", green_percentage
elif yellow_percentage > 0.20 and yellow_percentage > red_percentage and yellow_percentage > green_percentage:
return "overripe", yellow_percentage
else:
# If no clear dominant color, use the highest percentage
max_percentage = max(red_percentage, green_percentage, yellow_percentage)
if max_percentage == red_percentage:
return "ripe", red_percentage
elif max_percentage == green_percentage:
return "unripe", green_percentage
else:
return "overripe", yellow_percentage
except Exception as e:
print(f"Error analyzing {image_path}: {e}")
return None, 0.0
def main():
"""Complete the final batch of labeling."""
# Paths
to_label_dir = Path("model/ripeness_manual_dataset/to_label")
unripe_dir = Path("model/ripeness_manual_dataset/unripe")
ripe_dir = Path("model/ripeness_manual_dataset/ripe")
overripe_dir = Path("model/ripeness_manual_dataset/overripe")
# Get remaining files
remaining_files = list(to_label_dir.glob("*.jpg"))
if not remaining_files:
print("No images remaining to label!")
return
print(f"=== FINAL BATCH LABELING ===")
print(f"Processing {len(remaining_files)} remaining images with conservative analysis...")
results = {
"timestamp": datetime.now().isoformat(),
"total_processed": len(remaining_files),
"unripe": 0,
"ripe": 0,
"overripe": 0,
"unknown": 0,
"images": []
}
for i, image_path in enumerate(remaining_files, 1):
print(f"Processing {i}/{len(remaining_files)}: {image_path.name}")
# Analyze with conservative threshold
label, confidence = analyze_ripeness_conservative(image_path, confidence_threshold=0.8)
if label:
# Move to appropriate directory
if label == "unripe":
dest = unripe_dir / image_path.name
results["unripe"] += 1
elif label == "ripe":
dest = ripe_dir / image_path.name
results["ripe"] += 1
elif label == "overripe":
dest = overripe_dir / image_path.name
results["overripe"] += 1
try:
shutil.move(str(image_path), str(dest))
print(f" ✅ {label} (confidence: {confidence:.2f})")
results["images"].append({
"filename": image_path.name,
"label": label,
"confidence": confidence
})
except Exception as e:
print(f" ❌ Error moving file: {e}")
results["unknown"] += 1
else:
print(f" ⚠️ Analysis failed")
results["unknown"] += 1
# Save results
results_file = f"model/ripeness_manual_dataset/final_labeling_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
with open(results_file, 'w') as f:
json.dump(results, f, indent=2)
# Print final summary
print(f"\n=== FINAL LABELING COMPLETE ===")
print(f"unripe: {results['unripe']} images")
print(f"ripe: {results['ripe']} images")
print(f"overripe: {results['overripe']} images")
print(f"unknown: {results['unknown']} images")
print(f"Total processed: {results['total_processed']} images")
# Calculate final dataset statistics
total_unripe = len(list(unripe_dir.glob("*.jpg")))
total_ripe = len(list(ripe_dir.glob("*.jpg")))
total_overripe = len(list(overripe_dir.glob("*.jpg")))
remaining = len(list(to_label_dir.glob("*.jpg")))
total_dataset = total_unripe + total_ripe + total_overripe + remaining
completion_percentage = (total_dataset - remaining) / total_dataset * 100
print(f"\n=== FINAL DATASET STATUS ===")
print(f"unripe: {total_unripe} images")
print(f"ripe: {total_ripe} images")
print(f"overripe: {total_overripe} images")
print(f"to_label: {remaining} images")
print(f"TOTAL: {total_dataset} images")
print(f"Completion: {completion_percentage:.1f}%")
if remaining == 0:
print(f"\n🎉 DATASET LABELING 100% COMPLETE! 🎉")
print(f"Total labeled images: {total_dataset}")
else:
print(f"\n⚠️ {remaining} images still need manual review")
print(f"Results saved to: {results_file}")
if __name__ == "__main__":
main() |