Spaces:

SURESHBEEKHANI
/

Heart_Attack_Prediction

Build error

App Files Files Community

Heart_Attack_Prediction / src /components /model_tranier.py

SURESHBEEKHANI

Upload 29 files

46d8269 verified about 1 year ago

raw

history blame contribute delete

6.26 kB

	import os
	import sys
	from dataclasses import dataclass

	# Importing necessary libraries for machine learning models and evaluation
	from sklearn.linear_model import LogisticRegression # type: ignore
	from sklearn.tree import DecisionTreeClassifier # type: ignore
	from sklearn.ensemble import (RandomForestClassifier, AdaBoostClassifier, VotingClassifier) # type: ignore
	from sklearn.neighbors import KNeighborsClassifier # type: ignore
	from xgboost import XGBClassifier # type: ignore
	from catboost import CatBoostClassifier # type: ignore
	from sklearn.model_selection import (GridSearchCV, RandomizedSearchCV) # type: ignore
	import pandas as pd # type: ignore
	import matplotlib.pyplot as plt # type: ignore
	import numpy as np

	from ..exception import CustomException
	from src.logger import logging
	from src.utils import save_object, evaluate_models, model_metrics, print_evaluated_results

	@dataclass
	class ModelTrainerConfig:
	"""Configuration for Model Trainer."""
	trained_model_file_path: str = os.path.join('artifacts', 'model.pkl')

	class ModelTrainer:
	"""Class for training machine learning models."""

	def __init__(self):
	self.model_trainer_config = ModelTrainerConfig()

	def initiate_model_training(self, train_array: np.ndarray, test_array: np.ndarray):
	"""Initiates model training process."""
	try:
	logging.info('Splitting dependent and independent variables from train and test data')
	x_train, y_train, x_test, y_test = (
	train_array[:, :-1],
	train_array[:, -1],
	test_array[:, :-1],
	test_array[:, -1]
	)

	models = {
	"Logistic Regression": LogisticRegression(),
	"K-Neighbors Classifier": KNeighborsClassifier(),
	"Decision Tree": DecisionTreeClassifier(),
	"Random Forest Classifier": RandomForestClassifier(),
	"XGB Classifier": XGBClassifier(),
	"CatBoost Classifier": CatBoostClassifier(verbose=False),
	"AdaBoost Classifier": AdaBoostClassifier(),
	}

	model_report = evaluate_models(x_train, y_train, x_test, y_test, models)
	logging.info(f'Model Report: {model_report}')

	best_model_name, best_model_score = self.get_best_model(model_report)

	# Check if the best model score is satisfactory
	if best_model_score < 0.6:

	logging.info('Best model has R2 Score less than 60%')

	print(f'Best Model Found: Model Name: {best_model_name}, R2 Score: {best_model_score}')
	print('\n====================================================================================\n')
	logging.info(f'Best Model Found: Model Name: {best_model_name}, R2 Score: {best_model_score}')

	# Hyperparameter tuning for CatBoost
	logging.info('Hyperparameter tuning started for CatBoost')
	cbr = CatBoostClassifier(verbose=False)
	param_dist = {
	'depth': [4, 5, 6, 7, 8, 9, 10],
	'learning_rate': [0.01, 0.02, 0.03, 0.04],
	'iterations': [300, 400, 500, 600]
	}
	rscv = RandomizedSearchCV(cbr, param_dist, scoring='r2', cv=5, n_jobs=-1)
	rscv.fit(x_train, y_train)

	# Print the tuned parameters and score
	print(f'Best CatBoost Parameters: {rscv.best_params_}')
	print(f'Best CatBoost Score: {rscv.best_score_}')
	print('\n====================================================================================\n')

	best_cbr = rscv.best_estimator_
	logging.info('Hyperparameter tuning complete for CatBoost')

	# Hyperparameter tuning for KNN
	logging.info('Hyperparameter tuning started for KNN')
	knn = KNeighborsClassifier()
	param_grid = {'n_neighbors': list(range(2, 31))}
	grid = GridSearchCV(knn, param_grid, cv=5, scoring='r2', n_jobs=-1)
	grid.fit(x_train, y_train)

	# Print the tuned parameters and score
	print(f'Best KNN Parameters: {grid.best_params_}')
	print(f'Best KNN Score: {grid.best_score_}')
	print('\n====================================================================================\n')

	best_knn = grid.best_estimator_
	logging.info('Hyperparameter tuning complete for KNN')

	# Create and train Voting Classifier
	logging.info('Voting Classifier training started')
	voting_classifier = VotingClassifier(
	estimators=[('catboost', best_cbr), ('xgb', XGBClassifier()), ('knn', best_knn)],
	weights=[3, 2, 1]
	)
	voting_classifier.fit(x_train, y_train)

	print('Final Model Evaluation:\n')
	print_evaluated_results(x_train, y_train, x_test, y_test, voting_classifier)
	logging.info('Voting Classifier training completed')

	# Save the trained model
	save_object(file_path=self.model_trainer_config.trained_model_file_path, obj=voting_classifier)
	logging.info('Model pickle file saved')

	# Evaluate final model on test data
	y_test_pred = voting_classifier.predict(x_test)
	mae, rmse, r2 = model_metrics(y_test, y_test_pred)
	logging.info(f'Test MAE: {mae}')
	logging.info(f'Test RMSE: {rmse}')
	logging.info(f'Test R2 Score: {r2}')
	logging.info('Final Model Training Completed')

	return mae, rmse, r2

	except Exception as e:
	logging.info('Exception occurred during model training')
	raise CustomException(e, sys)

	def get_best_model(self, model_report: dict):
	"""Get the best model based on the evaluation report."""
	best_model_name = max(model_report, key=model_report.get)
	best_model_score = model_report[best_model_name]
	return best_model_name, best_model_score