File size: 4,881 Bytes
d576da9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
import tensorflow as tf
from pathlib import Path
import mlflow
import mlflow.keras
from urllib.parse import urlparse
from cnnClassifier.entity.config_entity import EvaluationConfig
from cnnClassifier.utils.common import save_json

# --- NEW IMPORTS for advanced evaluation ---
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
# -------------------------------------------

class Evaluation:
    def __init__(self, config: EvaluationConfig):
        self.config = config
        self.model = None
        self.valid_generator = None
        self.score = None
        self.y_true = None
        self.y_pred = None

    def _valid_generator(self):
        datagenerator_kwargs = dict(
            rescale=1./255,
            validation_split=0.30
        )

        dataflow_kwargs = dict(
            target_size=self.config.params_image_size[:-1],
            batch_size=self.config.params_batch_size,
            interpolation="bilinear"
        )

        valid_datagenerator = tf.keras.preprocessing.image.ImageDataGenerator(**datagenerator_kwargs)

        self.valid_generator = valid_datagenerator.flow_from_directory(
            directory=self.config.training_data,
            subset="validation",
            shuffle=False,
            **dataflow_kwargs
        )

    @staticmethod
    def load_model(path: Path) -> tf.keras.Model:
        return tf.keras.models.load_model(path)

    def _get_predictions(self):
        """Gets ground truth labels and model's predicted labels."""
        self.y_true = self.valid_generator.classes
        y_pred_probs = self.model.predict(self.valid_generator)
        self.y_pred = np.argmax(y_pred_probs, axis=1)

    def evaluation(self):
        """Loads model, evaluates basic metrics, and gets detailed predictions."""
        self.model = self.load_model(self.config.path_of_model)
        self._valid_generator()
        self.score = self.model.evaluate(self.valid_generator)
        self._get_predictions()
        self.save_score()

    # In your Evaluation component's save_score method

    def save_score(self):
        # If self.score is None or contains NaN, create a default file
        if self.score is None or np.isnan(self.score).any():
            print("⚠️ Warning: Invalid scores detected (NaN). Saving default scores file.")
            scores = {"loss": float('nan'), "accuracy": float('nan')}
        else:
            scores = {"loss": self.score[0], "accuracy": self.score[1]}
        
        # This will now always create the file
        save_json(path=Path("scores.json"), data=scores)
        print(f"Scores saved to scores.json: {scores}")

    def log_confusion_matrix(self):
        """Generates, saves, and logs the confusion matrix plot to MLflow."""
        cm = confusion_matrix(self.y_true, self.y_pred)
        class_names = list(self.valid_generator.class_indices.keys())
        
        plt.figure(figsize=(8, 6))
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=class_names, yticklabels=class_names)
        plt.title('Confusion Matrix')
        plt.ylabel('Actual')
        plt.xlabel('Predicted')
        
        matrix_path = Path("confusion_matrix.png")
        plt.savefig(matrix_path)
        
        mlflow.log_artifact(matrix_path, "plots")
        print("Confusion Matrix plot saved and logged to MLflow.")
    
    def log_into_mlflow(self):
        mlflow.set_tracking_uri(self.config.mlflow_uri)
        
        with mlflow.start_run():
            print("Logging basic parameters and metrics to MLflow...")
            mlflow.log_params(self.config.all_params)
            mlflow.log_metrics({"loss": self.score[0], "accuracy": self.score[1]})

            # --- Log detailed classification report metrics ---
            print("\n--- Classification Report ---")
            report = classification_report(self.y_true, self.y_pred, 
                                           target_names=list(self.valid_generator.class_indices.keys()),
                                           output_dict=True)
            print(classification_report(self.y_true, self.y_pred, 
                                        target_names=list(self.valid_generator.class_indices.keys())))

            for className, metrics in report.items():
                if isinstance(metrics, dict):
                    for metricName, value in metrics.items():
                        mlflow.log_metric(f"{className}_{metricName}", value)

            # --- Log the confusion matrix plot ---
            self.log_confusion_matrix()
            
            # --- Log the model as an artifact ---
            print("Logging model as an artifact...")
            mlflow.keras.log_model(self.model, "model")
            
            print("MLflow logging complete.")