File size: 6,923 Bytes
efb1801
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
#!/usr/bin/env python3
"""
Complete the final batch of ripeness labeling using conservative color analysis.
This script processes the remaining 46 images with higher confidence thresholds.
"""

import os
import json
import shutil
from pathlib import Path
from datetime import datetime
import numpy as np
from PIL import Image
import cv2

def analyze_ripeness_conservative(image_path, confidence_threshold=0.8):
    """
    Conservative ripeness analysis with higher confidence thresholds.
    """
    try:
        # Load and convert image
        img = cv2.imread(str(image_path))
        if img is None:
            return None, 0.0
            
        # Convert to HSV for better color analysis
        hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV)
        
        # Define color ranges for strawberry ripeness (more conservative)
        # Red ranges (ripe strawberries)
        red_lower1 = np.array([0, 50, 50])
        red_upper1 = np.array([10, 255, 255])
        red_lower2 = np.array([170, 50, 50])
        red_upper2 = np.array([180, 255, 255])
        
        # Green ranges (unripe strawberries)
        green_lower = np.array([40, 40, 40])
        green_upper = np.array([80, 255, 255])
        
        # Yellow/orange ranges (overripe strawberries)
        yellow_lower = np.array([15, 50, 50])
        yellow_upper = np.array([35, 255, 255])
        
        # Create masks
        red_mask1 = cv2.inRange(hsv, red_lower1, red_upper1)
        red_mask2 = cv2.inRange(hsv, red_lower2, red_upper2)
        red_mask = cv2.bitwise_or(red_mask1, red_mask2)
        
        green_mask = cv2.inRange(hsv, green_lower, green_upper)
        yellow_mask = cv2.inRange(hsv, yellow_lower, yellow_upper)
        
        # Calculate percentages
        total_pixels = img.shape[0] * img.shape[1]
        red_percentage = np.sum(red_mask > 0) / total_pixels
        green_percentage = np.sum(green_mask > 0) / total_pixels
        yellow_percentage = np.sum(yellow_mask > 0) / total_pixels
        
        # Conservative classification logic
        if red_percentage > 0.35 and red_percentage > green_percentage and red_percentage > yellow_percentage:
            return "ripe", red_percentage
        elif green_percentage > 0.25 and green_percentage > red_percentage and green_percentage > yellow_percentage:
            return "unripe", green_percentage
        elif yellow_percentage > 0.20 and yellow_percentage > red_percentage and yellow_percentage > green_percentage:
            return "overripe", yellow_percentage
        else:
            # If no clear dominant color, use the highest percentage
            max_percentage = max(red_percentage, green_percentage, yellow_percentage)
            if max_percentage == red_percentage:
                return "ripe", red_percentage
            elif max_percentage == green_percentage:
                return "unripe", green_percentage
            else:
                return "overripe", yellow_percentage
                
    except Exception as e:
        print(f"Error analyzing {image_path}: {e}")
        return None, 0.0

def main():
    """Complete the final batch of labeling."""
    
    # Paths
    to_label_dir = Path("model/ripeness_manual_dataset/to_label")
    unripe_dir = Path("model/ripeness_manual_dataset/unripe")
    ripe_dir = Path("model/ripeness_manual_dataset/ripe")
    overripe_dir = Path("model/ripeness_manual_dataset/overripe")
    
    # Get remaining files
    remaining_files = list(to_label_dir.glob("*.jpg"))
    
    if not remaining_files:
        print("No images remaining to label!")
        return
    
    print(f"=== FINAL BATCH LABELING ===")
    print(f"Processing {len(remaining_files)} remaining images with conservative analysis...")
    
    results = {
        "timestamp": datetime.now().isoformat(),
        "total_processed": len(remaining_files),
        "unripe": 0,
        "ripe": 0,
        "overripe": 0,
        "unknown": 0,
        "images": []
    }
    
    for i, image_path in enumerate(remaining_files, 1):
        print(f"Processing {i}/{len(remaining_files)}: {image_path.name}")
        
        # Analyze with conservative threshold
        label, confidence = analyze_ripeness_conservative(image_path, confidence_threshold=0.8)
        
        if label:
            # Move to appropriate directory
            if label == "unripe":
                dest = unripe_dir / image_path.name
                results["unripe"] += 1
            elif label == "ripe":
                dest = ripe_dir / image_path.name
                results["ripe"] += 1
            elif label == "overripe":
                dest = overripe_dir / image_path.name
                results["overripe"] += 1
            
            try:
                shutil.move(str(image_path), str(dest))
                print(f"  ✅ {label} (confidence: {confidence:.2f})")
                results["images"].append({
                    "filename": image_path.name,
                    "label": label,
                    "confidence": confidence
                })
            except Exception as e:
                print(f"  ❌ Error moving file: {e}")
                results["unknown"] += 1
        else:
            print(f"  ⚠️  Analysis failed")
            results["unknown"] += 1
    
    # Save results
    results_file = f"model/ripeness_manual_dataset/final_labeling_results_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
    with open(results_file, 'w') as f:
        json.dump(results, f, indent=2)
    
    # Print final summary
    print(f"\n=== FINAL LABELING COMPLETE ===")
    print(f"unripe: {results['unripe']} images")
    print(f"ripe: {results['ripe']} images")
    print(f"overripe: {results['overripe']} images")
    print(f"unknown: {results['unknown']} images")
    print(f"Total processed: {results['total_processed']} images")
    
    # Calculate final dataset statistics
    total_unripe = len(list(unripe_dir.glob("*.jpg")))
    total_ripe = len(list(ripe_dir.glob("*.jpg")))
    total_overripe = len(list(overripe_dir.glob("*.jpg")))
    remaining = len(list(to_label_dir.glob("*.jpg")))
    total_dataset = total_unripe + total_ripe + total_overripe + remaining
    
    completion_percentage = (total_dataset - remaining) / total_dataset * 100
    
    print(f"\n=== FINAL DATASET STATUS ===")
    print(f"unripe: {total_unripe} images")
    print(f"ripe: {total_ripe} images")
    print(f"overripe: {total_overripe} images")
    print(f"to_label: {remaining} images")
    print(f"TOTAL: {total_dataset} images")
    print(f"Completion: {completion_percentage:.1f}%")
    
    if remaining == 0:
        print(f"\n🎉 DATASET LABELING 100% COMPLETE! 🎉")
        print(f"Total labeled images: {total_dataset}")
    else:
        print(f"\n⚠️  {remaining} images still need manual review")
    
    print(f"Results saved to: {results_file}")

if __name__ == "__main__":
    main()