"""
Coffee Leaf Rust Severity Estimation
====================================

This script calculates the severity of Coffee Leaf Rust (CLR) for individual leaf images.
It processes a directory of leaf images and their corresponding binary masks (representing diseased areas).

Methodology:
    1. Load the original leaf image to determine total leaf area.
    2. Load the corresponding disease mask (generated by SAM or other segmentation models).
    3. Calculate Severity (%) = (Diseased Area / Total Leaf Area) * 100.
    4. Export results to CSV and Excel.

Filename Convention:
    The script expects a naming convention like: "set_<id>_<image_id>_obj<leaf_id>"
    Example: "set_16_123_obj1.jpg"
"""

import os
import cv2
import numpy as np
import pandas as pd
import re

# ================= Configuration =================
# Directory containing individual leaf images (cropped from original)
LEAF_FOLDER = "./data/leaves"

# Directory containing disease masks (binary images: white=disease, black=background)
MASK_FOLDER = "./data/masks"

# Output file paths
OUTPUT_DIR = "./results"
OUTPUT_CSV_NAME = "severity_results.csv"
OUTPUT_EXCEL_NAME = "severity_results.xlsx"

# Regex pattern to parse filename metadata
# Adjust this pattern if your filenames follow a different structure
FILE_PATTERN = r"set_\d+_(\d+)_obj(\d+)"
# =================================================

def calculate_severity():
    # Ensure output directory exists
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    
    output_csv = os.path.join(OUTPUT_DIR, OUTPUT_CSV_NAME)
    output_excel = os.path.join(OUTPUT_DIR, OUTPUT_EXCEL_NAME)

    results = []
    
    print(f"Processing images from: {LEAF_FOLDER}")
    
    # Iterate through all files in the leaf folder
    for filename in os.listdir(LEAF_FOLDER):
        if not filename.lower().endswith((".jpg", ".png", ".jpeg", ".tif")):
            continue

        # Parse filename to extract Image ID and Leaf ID
        match = re.match(FILE_PATTERN, filename)
        if not match:
            print(f"⚠️ Skipping file with non-compliant name: {filename}")
            continue

        image_id, leaf_id = match.groups()

        leaf_path = os.path.join(LEAF_FOLDER, filename)
        mask_path = os.path.join(MASK_FOLDER, filename)

        # --- 1. Calculate Total Leaf Area ---
        # Read image to determine the leaf area. 
        # Assuming the leaf is segmented against a black/white background or we threshold it.
        leaf_img = cv2.imread(leaf_path)
        if leaf_img is None:
            print(f"Error reading image: {leaf_path}")
            continue
            
        gray_leaf = cv2.cvtColor(leaf_img, cv2.COLOR_BGR2GRAY)
        
        # Simple thresholding to separate leaf from background
        # Adjust threshold value (10) as needed based on background color
        _, leaf_binary = cv2.threshold(gray_leaf, 10, 255, cv2.THRESH_BINARY)
        total_leaf_pixels = cv2.countNonZero(leaf_binary)

        # --- 2. Calculate Diseased Area ---
        if os.path.exists(mask_path):
            mask_img = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
            if mask_img is not None:
                diseased_pixels = cv2.countNonZero(mask_img)
            else:
                print(f"Warning: Mask found but unreadable for {filename}")
                diseased_pixels = 0
        else:
            # If no mask exists, assume 0% severity (healthy leaf)
            diseased_pixels = 0  

        # --- 3. Compute Severity ---
        if total_leaf_pixels > 0:
            severity = (diseased_pixels / total_leaf_pixels) * 100
        else:
            severity = 0

        results.append({
            "Image_ID": int(image_id),
            "Leaf_ID": int(leaf_id),
            "Total_Pixels": total_leaf_pixels,
            "Diseased_Pixels": diseased_pixels,
            "Severity_Percent": round(severity, 2)
        })

    # --- 4. Save Outputs ---
    if results:
        df = pd.DataFrame(results).sort_values(["Image_ID", "Leaf_ID"])
        
        df.to_csv(output_csv, index=False)
        print(f"CSV saved to: {output_csv}")
        
        try:
            df.to_excel(output_excel, index=False)
            print(f"Excel saved to: {output_excel}")
        except ImportError:
            print("To save as Excel, please install openpyxl: pip install openpyxl")
        
        print(f"✅ Processing complete. Computed severity for {len(results)} leaves.")
    else:
        print("No valid images found to process.")

if __name__ == "__main__":
    calculate_severity()