c
File size: 5,005 Bytes
17c5137
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
from src.predictor import ModelResolver
from src.entity import config_entity
from src.entity import artifact_entity
from src.logger import logging
from src.exception import CropException
from src.config import TARGET_COLUMN
from src.utils import load_object

from sklearn.metrics import f1_score
import pandas as pd
import numpy as np
import os
import sys


class ModelEvaluation:
    def __init__(
        self,
        model_eval_config: config_entity.ModelEvaluationConfig,
        data_ingesiton_artifact: artifact_entity.DataIngestionArtifact,
        data_transformation_artifact: artifact_entity.DataTransformationArtifact,
        model_trainer_artifact: artifact_entity.ModelTrainerArtifact,
    ):
        try:
            logging.info(f"{'>'*20} Model Evaluation Initiated {'<'*20}")
            self.model_eval_config = model_eval_config
            self.data_ingesiton_artifact = data_ingesiton_artifact
            self.data_transformation_artifact = data_transformation_artifact
            self.model_trainer_artifact = model_trainer_artifact
            self.model_resolver = ModelResolver()

        except Exception as e:
            raise CropException(e, sys)

    def initiate_model_evaluation(self) -> artifact_entity.ModelEvaluationArtifact:
        try:
            logging.info(
                f"If the saved model directory contains a model, we will compare which model is best trained: \
                            the model from the saved model folder or the new model."
            )

            latest_dir_path = self.model_resolver.get_latest_dir_path()
            if latest_dir_path == None:
                model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
                    is_model_accepted=True, improved_accuracy=None
                )
                logging.info(f"Model evaluation artifact: {model_eval_artifact}")
                return model_eval_artifact

            # finding location of transformed model, and target encoder
            logging.info(f"Finding location of transformer model and target encoder")
            transformer_path = self.model_resolver.get_latest_transformer_path()
            
            model_path = self.model_resolver.get_latest_model_path()

            target_encoder_path = self.model_resolver.get_latest_target_encoder_path()

            logging.info(
                f"Previous trained objects of transformer, model and target encoder"
            )
            # previous trained objects
            transformer = load_object(file_path=transformer_path)
            model = load_object(file_path=model_path)
            target_encoder = load_object(file_path=target_encoder_path)

            logging.info(f"Currently trained model objects")
            # currently trained model objects
            current_transformer = load_object(
                file_path=self.data_transformation_artifact.transform_object_path
            )
            current_model = load_object(
                file_path=self.model_trainer_artifact.model_path
            )
            current_target_encoder = load_object(
                file_path=self.data_transformation_artifact.target_encoder_path
            )

            test_df = pd.read_csv(self.data_ingesiton_artifact.test_file_path)
            target_df = test_df[TARGET_COLUMN]

            y_true = target_encoder.transform(target_df)

            # accuracy using previous trained model

            input_feature_name = list(transformer.feature_names_in_)
            input_arr = transformer.transform(test_df[input_feature_name])

            y_pred = current_model.predict(input_arr)
            y_true = current_target_encoder.transform(target_df)

   
            previous_model_score = f1_score(
                y_true=y_true, y_pred=y_pred, average="weighted"
            )

            # accuracy using current model
            input_feature_name = list(current_transformer.feature_names_in_)
            input_arr = current_transformer.transform(test_df[input_feature_name])

            y_pred = current_model.predict(input_arr)
            y_true = current_target_encoder.transform(target_df)


            current_model_score = f1_score(
                y_true=y_true, y_pred=y_pred, average="weighted"
            )

            logging.info(f"Accuracy using current trained model: {current_model_score}")

            if current_model_score <= previous_model_score:
                logging.info(f"Current trained model is not better than previous model")
                raise Exception("Current trained model is not better than previous model")

            model_eval_artifact = artifact_entity.ModelEvaluationArtifact(
                is_model_accepted=True,
                improved_accuracy=current_model_score - previous_model_score,
            )
            logging.info(f"Model Eval artifacts: {model_eval_artifact}")

            return model_eval_artifact

        except Exception as e:
            raise CropException(e, sys)