Spaces:
Sleeping
Sleeping
| import os, sys | |
| from src.utils.ml_utils.model.estimator import NetworkSecurityModel | |
| from src.exception.exception import NetworkSecurityException | |
| from src.logging.logger import logging | |
| from src.entity.artifact_entity import DataTransformationArtifact, ModelTrainerArtifact | |
| from src.entity.config_entity import Model_trainer_config | |
| from src.utils.main_utils.utils import save_object, load_object | |
| from src.utils.main_utils.utils import load_numpy_array_data, evaluate_models | |
| from src.utils.ml_utils.metric.classification_metric import classification_score | |
| from sklearn.neighbors import KNeighborsClassifier | |
| from sklearn.tree import DecisionTreeClassifier | |
| from sklearn.ensemble import ( | |
| RandomForestClassifier, | |
| AdaBoostClassifier, | |
| GradientBoostingClassifier, | |
| ) | |
| from sklearn.linear_model import LogisticRegression | |
| from sklearn.metrics import r2_score | |
| import mlflow | |
| import dagshub | |
| import os | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Only initialize DagHub once, and make it optional | |
| try: | |
| if not os.getenv("DAGSHUB_INITIALIZED"): | |
| dagshub.init(repo_owner='kshitijk146', repo_name='MLOPS_project_network_Security_system', mlflow=True) | |
| os.environ["DAGSHUB_INITIALIZED"] = "1" | |
| logging.info("✅ DagHub/MLflow initialized in model_trainer") | |
| except Exception as e: | |
| logging.warning(f"⚠️ DagHub initialization failed: {e}. Continuing without MLflow tracking.") | |
| class ModelTrainer: | |
| def __init__(self, model_trainer_config: Model_trainer_config, data_transformation_artifact: DataTransformationArtifact): | |
| try: | |
| self.model_trainer_config = model_trainer_config | |
| self.data_transformation_artifact = data_transformation_artifact | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) from e | |
| def track_mlflow(self, best_model, preprocessor, classificationmetric): | |
| """Log model, preprocessor, and metrics to MLflow""" | |
| with mlflow.start_run() as run: | |
| f1_score = classificationmetric.f1_score | |
| precision_score = classificationmetric.precision_score | |
| recall_score = classificationmetric.recall_score | |
| mlflow.log_metric("f1_score", f1_score) | |
| mlflow.log_metric("precision_score", precision_score) | |
| mlflow.log_metric("recall_score", recall_score) | |
| # Log both model and preprocessor | |
| mlflow.sklearn.log_model(best_model, "model") | |
| mlflow.sklearn.log_model(preprocessor, "preprocessor") | |
| # Log run ID for easy retrieval | |
| logging.info(f"✅ Models logged to MLflow - Run ID: {run.info.run_id}") | |
| return run.info.run_id | |
| def train_model(self, x_train, y_train,x_test, y_test): | |
| models = { | |
| "KNN": KNeighborsClassifier(), | |
| "Decision Tree": DecisionTreeClassifier(), | |
| "Random Forest": RandomForestClassifier(verbose=True), | |
| "AdaBoost": AdaBoostClassifier(), | |
| "Gradient Boosting": GradientBoostingClassifier(verbose=True), | |
| "logistic regression": LogisticRegression(verbose=True) | |
| } | |
| params = { | |
| "KNN": { | |
| 'n_neighbors': [3, 5, 7], | |
| 'weights': ['uniform', 'distance'], | |
| 'metric': ['euclidean'] | |
| }, | |
| "Decision Tree": { | |
| 'criterion': ['gini', 'entropy'], | |
| 'max_depth': [None, 5, 10], | |
| 'min_samples_split': [2, 5], | |
| 'min_samples_leaf': [1, 2] | |
| }, | |
| "Random Forest": { | |
| 'n_estimators': [50, 100], | |
| 'max_depth': [None, 5], | |
| 'min_samples_split': [2, 5], | |
| 'min_samples_leaf': [1, 2], | |
| 'max_features': ['sqrt'] | |
| }, | |
| "AdaBoost": { | |
| 'n_estimators': [50, 100], | |
| 'learning_rate': [0.1, 1.0], | |
| # 'algorithm': ['SAMME.R'] | |
| }, | |
| "Gradient Boosting": { | |
| 'n_estimators': [50, 100], | |
| 'learning_rate': [0.1], | |
| 'max_depth': [3, 5], | |
| 'min_samples_split': [2], | |
| 'min_samples_leaf': [1], | |
| 'max_features': ['sqrt'] | |
| }, | |
| "logistic regression": { | |
| 'C': [1.0, 10.0], | |
| 'penalty': ['l2'], | |
| 'solver': ['liblinear'] | |
| } | |
| } | |
| model_report:dict = evaluate_models( | |
| x_train = x_train,y_train = y_train,x_test = x_test,y_test = y_test,models = models,params = params) | |
| # to get the best model score from the dict | |
| best_model_score = max(sorted(model_report.values())) | |
| # to get best model name from dict | |
| best_model_name = list(model_report.keys())[ | |
| list(model_report.values()).index(best_model_score) | |
| ] | |
| logging.info(f"best model name: {best_model_name}") | |
| best_model = models[best_model_name] | |
| y_train_pred = best_model.predict(x_train) | |
| classification_train_metric= classification_score(y_true = y_train, y_pred=y_train_pred) | |
| y_test_pred = best_model.predict(x_test) | |
| classification_test_metric = classification_score(y_true = y_test, y_pred=y_test_pred) | |
| preprocessor = load_object(file_path=self.data_transformation_artifact.transformed_object_file_path) | |
| # Track to MLflow (logs model + preprocessor) | |
| self.track_mlflow(best_model, preprocessor, classification_train_metric) | |
| model_dir_path = os.path.dirname(self.model_trainer_config.trained_model_file_path) | |
| os.makedirs(model_dir_path, exist_ok=True) | |
| NetwerkModel= NetworkSecurityModel(preprocessing_object=preprocessor, trained_model_object=best_model) | |
| save_object(self.model_trainer_config.trained_model_file_path, obj=NetwerkModel) | |
| save_object("final_model/model.pkl", best_model) | |
| model_trainer_artifact = ModelTrainerArtifact(trained_model_file_path=self.model_trainer_config.trained_model_file_path, train_metric_artifact=classification_train_metric, test_metric_artifact=classification_test_metric) | |
| logging.info(f"Model trainer artifact: {model_trainer_artifact}") | |
| return model_trainer_artifact | |
| def initiate_model_trainer(self)-> ModelTrainerArtifact: | |
| try: | |
| train_file_path = self.data_transformation_artifact.transformed_train_file_path | |
| test_file_path = self.data_transformation_artifact.transformed_test_file_path | |
| # loading training array and testing array | |
| train_array = load_numpy_array_data(train_file_path) | |
| test_array = load_numpy_array_data(test_file_path) | |
| x_train, y_train, x_test, y_test = ( | |
| train_array[:, :-1], | |
| train_array[:, -1], | |
| test_array[:, :-1], | |
| test_array[:, -1], | |
| ) | |
| model = self.train_model(x_train, y_train, x_test=x_test, y_test=y_test) | |
| except Exception as e: | |
| raise NetworkSecurityException(e, sys) from e |